eohi3-updates (#3)

updating eohi folder w/ third eohi exp. Reviewed-on: #3 Co-authored-by: Irina Levit <irina.levit.rn@gmail.com> Co-committed-by: Irina Levit <irina.levit.rn@gmail.com>
2026-01-26 16:30:09 -05:00 · 2026-01-26 16:30:09 -05:00 · ba54687da2
commit ba54687da2
parent 5e7ad6be15
38 changed files with 4967 additions and 0 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -0,0 +1,55 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "type": "R-Debugger",
+            "name": "Launch R-Workspace",
+            "request": "launch",
+            "debugMode": "workspace",
+            "workingDirectory": "${workspaceFolder}",
+            "splitOverwrittenOutput": true
+        },
+        {
+            "type": "R-Debugger",
+            "name": "Debug R-File",
+            "request": "launch",
+            "debugMode": "file",
+            "workingDirectory": "${workspaceFolder}",
+            "file": "${file}",
+            "splitOverwrittenOutput": true,
+            "stopOnEntry": false
+        },
+        {
+            "type": "R-Debugger",
+            "name": "Debug R-Function",
+            "request": "launch",
+            "debugMode": "function",
+            "workingDirectory": "${workspaceFolder}",
+            "file": "${file}",
+            "mainFunction": "main",
+            "allowGlobalDebugging": false,
+            "splitOverwrittenOutput": true
+        },
+        {
+            "type": "R-Debugger",
+            "name": "Debug R-Package",
+            "request": "launch",
+            "debugMode": "workspace",
+            "workingDirectory": "${workspaceFolder}",
+            "includePackageScopes": true,
+            "loadPackages": [
+                "."
+            ],
+            "splitOverwrittenOutput": true
+        },
+        {
+            "type": "R-Debugger",
+            "request": "attach",
+            "name": "Attach to R process",
+            "splitOverwrittenOutput": true
+        }
+    ]
+}
--- a/eohi3/dataREVIEW-JAN05/eohi3_filter2.csv
+++ b/eohi3/dataREVIEW-JAN05/eohi3_filter2.csv
--- a/eohi3/dataREVIEW-JAN05/eohi3_raw.csv
+++ b/eohi3/dataREVIEW-JAN05/eohi3_raw.csv
--- a/eohi3/dataREVIEW-JAN05/eohi3_raw2.csv
+++ b/eohi3/dataREVIEW-JAN05/eohi3_raw2.csv
--- a/eohi3/dataREVIEW-JAN05/eohi3_unprocessed.csv
+++ b/eohi3/dataREVIEW-JAN05/eohi3_unprocessed.csv
--- a/eohi3/dataREVIEW-JAN05/response
+++ b/eohi3/dataREVIEW-JAN05/response
@ -0,0 +1,68 @@
+ResponseId,RATIONALE
+R_12EXYt8gHauPaCb,duration
+R_142iZtlDp1Vam14,duration
+R_16eRiaoFPG5CpE4,duration
+R_1aK2JWzCFkpefUg,duration
+R_1FEuEk6VzuwxZby,duration
+R_1IsHUv4sb6oOphv,duration
+R_1J2cryciskOYjOV,duration
+R_1JFsZ1GXM7jDWmh,duration
+R_1JlV9H7AJKtNZ8g,duration
+R_1kgjhkT4sJwhfuV,duration
+R_1MAMwGkBHTTSyAh,duration
+R_1O6dV9hTlqpsYjP,duration
+R_1qatgZwcLPGctnd,age mismatch
+R_1QE5KaKNkt66Cer,duration
+R_1QsYazd3eOH62js,duration
+R_1vwOg7l0kSLHGRX,duration
+R_1YJ2G01dpxYqKAm,duration
+R_1YoddNWqybPbaNN,feedback in french
+R_1ZOjQ97Ph1VtRwp,duration
+R_347ABt6LFPUeVZS,duration
+R_34Ain6V2NbEDeQm,duration
+R_38J0VDB8JE8Dd0o,duration
+R_3DptQmS26X0Z8Wu,IP duplicate
+R_3Foc2aYGpXFrbnX,age mismatch + duration
+R_3HLz0FyaULkIPKu,IP duplicate
+R_3jUhefm4hAEQ6PC,duration
+R_3n8b0ndM4habNjB,age mismatch
+R_3nTLzs9jMwDHbFy,duration
+R_3rGudTtAd2oVze3,duration
+R_3t6giyCy5IwZgom,duration
+R_3WwXkl4IatPYDZ0,age mismatch
+R_5ByssDsdjMcQgUV,duration
+R_5cNBH4nxBlH8OSB,duration
+R_5FkttTgBeMePzhk,sex mismatch
+R_5FyLW7dHpyFojo5,duration
+R_5M3urkuYhhSG06E,duration
+R_5MRp7eFKMm59t14,feedback in french
+R_5n6H7xuYTQgvFEf,duration
+R_5rrbHXjKol6Zl9U,duration
+R_5youAGSa5hLGkuZ,age mismatch + duration
+R_5z5DYfTnai5Pj3j,duration
+R_64nOi2TWI4XCYkt,duration
+R_6BcdSiP0Nibxx1D,duration
+R_6C4v9kRnGm9Iqyj,IP duplicate
+R_6CpjN5tJoj8dYuB,duration
+R_6cwKXrr8R99m5ez,duration
+R_6F4ld4gRlKjsb06,age mismatch + duration
+R_6GqjTqXrehkbG0x,duration
+R_6HCtgHyy16nNMQ4,age mismatch
+R_6hQN1DUFkxGpDGD,IP duplicate
+R_6JKscJDUeAt7k1y,age mismatch
+R_6lKqtees5Z1hj2L,duration
+R_6m1NYZLedxbAxui,duration
+R_6pM4ierZhbT1FEb,duration
+R_6rQCiwlJHKrWWKB,duration
+R_7AwVrmL8AM0KLKx,duration
+R_7bH15XzvHpDCZO1,duration
+R_7Cl7KFkEiuYwdZn,duration
+R_7EfALTPED13tduG,duration
+R_7flJBV9qf88XSM5,duration
+R_7H0dTzsyEC1Pzyh,duration
+R_7HM0FXjrAoTeGqt,duration
+R_7HRMvwMPw3OBE7g,duration
+R_7o7FORJHlgWAahS,age mismatch
+R_7sTsQ9AI42QQgSV,duration
+R_7VJCRyovK5KAddn,duration
+R_7w4ggvRoPBkyTle,duration
--- a/eohi3/dataREVIEW-JAN21/datap
+++ b/eohi3/dataREVIEW-JAN21/datap
@ -0,0 +1,189 @@
+library(dplyr)
+
+setwd("/home/ladmin/Documents/DND/EOHI/eohi3/dataREVIEW-JAN21")
+
+# Read the data (with check.names=FALSE to preserve original column names)
+# Keep empty cells as empty strings, not NA
+# Only convert the literal string "NA" to NA, not empty strings
+df <- read.csv("eohi3_raw.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = "NA")
+
+# RATIONALE column should exist in the CSV
+# Ensure RATIONALE is character and convert any NA values to empty strings
+if (!is.character(df$RATIONALE)) {
+  df$RATIONALE <- as.character(df$RATIONALE)
+}
+df$RATIONALE[is.na(df$RATIONALE)] <- ""
+
+# Function to check if age falls within range
+check_age_range <- function(age_num, age_range_str) {
+  # Check if data is missing or empty
+  if (is.na(age_num) || is.null(age_num) || age_range_str == "" || is.na(age_range_str) || trimws(age_range_str) == "") {
+    return(NULL)  # Can't check if data is missing - return NULL to indicate skip
+  }
+  
+  # Parse range string (e.g., "46 - 52" or "25 - 31")
+  range_parts <- strsplit(trimws(age_range_str), "\\s*-\\s*")[[1]]
+  if (length(range_parts) != 2) {
+    return(NULL)  # Invalid range format - return NULL to indicate skip
+  }
+  
+  min_age <- as.numeric(trimws(range_parts[1]))
+  max_age <- as.numeric(trimws(range_parts[2]))
+  
+  if (is.na(min_age) || is.na(max_age)) {
+    return(NULL)  # Couldn't parse numbers - return NULL to indicate skip
+  }
+  
+  # Check if age falls within range (inclusive)
+  return(age_num >= min_age && age_num <= max_age)
+}
+
+# Function to check if a value is empty (empty string or whitespace only)
+# Empty cells are kept as empty strings, not NA
+# Vectorized to handle both single values and vectors
+is_empty <- function(x) {
+  if (is.null(x)) return(TRUE)
+  # Handle vectors
+  if (length(x) > 1) {
+    result <- rep(FALSE, length(x))
+    result[is.na(x)] <- TRUE
+    if (is.character(x)) {
+      result[trimws(x) == ""] <- TRUE
+      result[x == ""] <- TRUE
+    }
+    return(result)
+  }
+  # Handle single value
+  if (is.na(x)) return(TRUE)
+  if (is.character(x) && trimws(x) == "") return(TRUE)
+  if (is.character(x) && x == "") return(TRUE)
+  return(FALSE)
+}
+
+# 1. Check sex match
+# Only check if both values are non-empty
+sex_mismatch <- rep(FALSE, nrow(df))
+for (i in seq_len(nrow(df))) {
+  demo_sex_val <- ifelse(is.na(df$demo_sex[i]), "", trimws(df$demo_sex[i]))
+  taq_sex_val <- ifelse(is.na(df$taq_sex[i]), "", trimws(df$taq_sex[i]))
+  
+  # Only check if both are non-empty
+  if (demo_sex_val != "" && taq_sex_val != "") {
+    if (tolower(demo_sex_val) != tolower(taq_sex_val)) {
+      sex_mismatch[i] <- TRUE
+    }
+  }
+}
+
+# 2. Check age range match
+age_mismatch <- rep(FALSE, nrow(df))
+for (i in seq_len(nrow(df))) {
+  # Only check if demo_age is not empty/NA and taq_age is not empty
+  if (!is.na(df$demo_age[i]) && !is_empty(df$taq_age[i])) {
+    age_check <- check_age_range(df$demo_age[i], df$taq_age[i])
+    # age_check is NULL if we can't check, FALSE if mismatch, TRUE if match
+    if (!is.null(age_check) && !age_check) {
+      age_mismatch[i] <- TRUE
+    }
+  }
+}
+
+# 3. Check citizenship (taq_cit_1 or taq_cit_2)
+no_cit <- is_empty(df$taq_cit_1) & is_empty(df$taq_cit_2)
+
+# 4. Check IP address duplicates
+# Find IP addresses that appear more than once (non-empty IPs only)
+ip_duplicate <- rep(FALSE, nrow(df))
+if ("IPAddress" %in% colnames(df)) {
+  # Get non-empty IP addresses
+  ip_addresses <- ifelse(is.na(df$IPAddress), "", trimws(df$IPAddress))
+  
+  # Count occurrences of each IP
+  ip_counts <- table(ip_addresses)
+  # Get IPs that appear more than once (and are not empty)
+  duplicate_ips <- names(ip_counts)[ip_counts > 1 & names(ip_counts) != ""]
+  
+  # Mark rows with duplicate IPs
+  if (length(duplicate_ips) > 0) {
+    for (dup_ip in duplicate_ips) {
+      ip_duplicate[ip_addresses == dup_ip] <- TRUE
+    }
+  }
+}
+
+# Build RATIONALE column - only populate when there are issues
+# Start with empty strings to preserve existing empty cells
+rationale_parts <- rep("", nrow(df))
+
+# Add sex mismatch
+rationale_parts[sex_mismatch] <- "sex mismatch"
+
+# Add age mismatch (append if sex mismatch already exists)
+for (i in seq_len(nrow(df))) {
+  if (age_mismatch[i]) {
+    if (rationale_parts[i] != "") {
+      rationale_parts[i] <- paste(rationale_parts[i], "age mismatch", sep = "; ")
+    } else {
+      rationale_parts[i] <- "age mismatch"
+    }
+  }
+}
+
+# Add no cit (append if other issues already exist)
+for (i in seq_len(nrow(df))) {
+  if (no_cit[i]) {
+    if (rationale_parts[i] != "") {
+      rationale_parts[i] <- paste(rationale_parts[i], "no cit", sep = "; ")
+    } else {
+      rationale_parts[i] <- "no cit"
+    }
+  }
+}
+
+# Add IP duplicate (append if other issues already exist)
+for (i in seq_len(nrow(df))) {
+  if (ip_duplicate[i]) {
+    if (rationale_parts[i] != "") {
+      rationale_parts[i] <- paste(rationale_parts[i], "IP duplicate", sep = "; ")
+    } else {
+      rationale_parts[i] <- "IP duplicate"
+    }
+  }
+}
+
+# Update RATIONALE column - only set when there are issues, otherwise keep existing value
+# If no issues found, keep the cell empty (or existing value if any)
+for (i in seq_len(nrow(df))) {
+  if (rationale_parts[i] != "") {
+    df$RATIONALE[i] <- rationale_parts[i]
+  }
+  # If rationale_parts[i] is empty, leave RATIONALE as is (preserves existing empty or other values)
+}
+
+# Summary - using multiple methods to ensure output appears
+# Try message() first (better for debug console)
+message("Validation Summary:")
+message("Sex mismatches: ", sum(sex_mismatch))
+message("Age mismatches: ", sum(age_mismatch))
+message("No citizenship: ", sum(no_cit))
+message("IP duplicates: ", sum(ip_duplicate))
+message("Total rows with issues: ", sum(rationale_parts != ""))
+
+# Also use cat() to stdout (for terminal)
+cat("Validation Summary:\n", file = stdout())
+cat("Sex mismatches:", sum(sex_mismatch), "\n", file = stdout())
+cat("Age mismatches:", sum(age_mismatch), "\n", file = stdout())
+cat("No citizenship:", sum(no_cit), "\n", file = stdout())
+cat("IP duplicates:", sum(ip_duplicate), "\n", file = stdout())
+cat("Total rows with issues:", sum(rationale_parts != ""), "\n", file = stdout())
+flush(stdout())
+
+# Write the updated data
+# Preserve empty strings as empty (not NA)
+# Convert character column NAs to empty strings to preserve empty cells
+for (col in names(df)) {
+  if (is.character(df[[col]])) {
+    df[[col]][is.na(df[[col]])] <- ""
+  }
+}
+write.csv(df, "eohi3_raw2.csv", row.names = FALSE, na = "", quote = TRUE)
--- a/eohi3/dataREVIEW-JAN21/datap
+++ b/eohi3/dataREVIEW-JAN21/datap
@ -0,0 +1,39 @@
+library(dplyr)
+
+setwd("/home/ladmin/Documents/DND/EOHI/eohi3/dataREVIEW-JAN21")
+
+# Read the data (with check.names=FALSE to preserve original column names)
+# Keep empty cells as empty strings, not NA
+# Only convert the literal string "NA" to NA, not empty strings
+df <- read.csv("eohi3_raw.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = "NA")
+
+# Populate citizenship column from taq_cit_1 and taq_cit_2
+# If both have values, set to "Both"
+# Otherwise, use the value from whichever column has a value
+# Empty values remain as empty strings (not NA)
+
+# Ensure citizenship column exists, initialize with empty strings if needed
+if (!"citizenship" %in% names(df)) {
+  df$citizenship <- ""
+}
+
+# Convert NA to empty string for taq_cit columns to ensure consistent handling
+df$taq_cit_1[is.na(df$taq_cit_1)] <- ""
+df$taq_cit_2[is.na(df$taq_cit_2)] <- ""
+
+# Populate citizenship based on taq_cit_1 and taq_cit_2 using base R
+# Check if both have values (non-empty)
+both_have_values <- df$taq_cit_1 != "" & df$taq_cit_2 != ""
+
+# Check if only taq_cit_1 has a value
+only_cit1 <- df$taq_cit_1 != "" & df$taq_cit_2 == ""
+
+# Check if only taq_cit_2 has a value
+only_cit2 <- df$taq_cit_2 != "" & df$taq_cit_1 == ""
+
+# Assign values
+df$citizenship[both_have_values] <- "Both"
+df$citizenship[only_cit1] <- df$taq_cit_1[only_cit1]
+df$citizenship[only_cit2] <- df$taq_cit_2[only_cit2]
+# For rows where neither has a value, citizenship keeps its original value (may be empty string)
+write.csv(df, "eohi3_raw.csv", row.names = FALSE, na = "", quote = TRUE)
--- a/eohi3/dataREVIEW-JAN21/datap
+++ b/eohi3/dataREVIEW-JAN21/datap
@ -0,0 +1,130 @@
+library(dplyr)
+
+setwd("/home/ladmin/Documents/DND/EOHI/eohi3/dataREVIEW-JAN21")
+
+# Read the data (with check.names=FALSE to preserve original column names)
+# Keep empty cells as empty strings, not NA
+# Only convert the literal string "NA" to NA, not empty strings
+df <- read.csv("eohi3_raw.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = "NA")
+
+# Remove trailing columns with empty names (dplyr requires all columns to have names)
+empty_cols <- which(names(df) == "" | is.na(names(df)))
+if (length(empty_cols) > 0) {
+  df <- df[, -empty_cols, drop = FALSE]
+}
+
+# Set to TRUE to save all distributions to a document file
+save_to_doc <- TRUE
+doc_filename <- "eohi3_quotas.txt"
+
+# =============================================================================
+# SINGLE VARIABLE DISTRIBUTIONS
+# =============================================================================
+
+dist_age <- df %>% count(taq_age, sort = TRUE)
+print(dist_age)
+
+dist_sex <- df %>% count(taq_sex, sort = TRUE)
+print(dist_sex)
+
+dist_citizenship <- df %>% count(citizenship, sort = TRUE)
+print(dist_citizenship)
+
+dist_group <- df %>% count(group, sort = TRUE)
+print(dist_group)
+
+dist_temporalDO <- df %>% count(temporalDO, sort = TRUE)
+print(dist_temporalDO)
+
+dist_perspective <- df %>% count(perspective, sort = TRUE)
+print(dist_perspective)
+
+# =============================================================================
+# NESTED DISTRIBUTIONS
+# =============================================================================
+
+dist_age_citizenship <- df %>% count(citizenship, taq_age) %>% arrange(citizenship, taq_age)
+print(dist_age_citizenship)
+
+dist_sex_citizenship <- df %>% count(citizenship, taq_sex) %>% arrange(citizenship, taq_sex)
+print(dist_sex_citizenship)
+
+dist_age_temporalDO <- df %>% count(temporalDO, taq_age) %>% arrange(temporalDO, taq_age)
+print(dist_age_temporalDO)
+
+dist_age_perspective <- df %>% count(perspective, taq_age) %>% arrange(perspective, taq_age)
+print(dist_age_perspective)
+
+dist_sex_temporalDO <- df %>% count(temporalDO, taq_sex) %>% arrange(temporalDO, taq_sex)
+print(dist_sex_temporalDO)
+
+dist_sex_perspective <- df %>% count(perspective, taq_sex) %>% arrange(perspective, taq_sex)
+print(dist_sex_perspective)
+
+# =============================================================================
+# OPTIONAL: SAVE ALL DISTRIBUTIONS TO DOCUMENT
+# =============================================================================
+
+if (save_to_doc) {
+  sink(doc_filename)
+  
+  cat("DISTRIBUTION REPORT\n")
+  cat("==================\n\n")
+  
+  cat("SINGLE VARIABLE DISTRIBUTIONS\n")
+  cat("------------------------------\n\n")
+  
+  cat("Distribution of taq_age:\n")
+  print(dist_age)
+  cat("\n\n")
+  
+  cat("Distribution of taq_sex:\n")
+  print(dist_sex)
+  cat("\n\n")
+  
+  cat("Distribution of citizenship:\n")
+  print(dist_citizenship)
+  cat("\n\n")
+  
+  cat("Distribution of group:\n")
+  print(dist_group)
+  cat("\n\n")
+  
+  cat("Distribution of temporalDO:\n")
+  print(dist_temporalDO)
+  cat("\n\n")
+  
+  cat("Distribution of perspective:\n")
+  print(dist_perspective)
+  cat("\n\n")
+  
+  cat("NESTED DISTRIBUTIONS\n")
+  cat("---------------------\n\n")
+  
+  cat("Age within Citizenship:\n")
+  print(dist_age_citizenship)
+  cat("\n\n")
+  
+  cat("Sex within Citizenship:\n")
+  print(dist_sex_citizenship)
+  cat("\n\n")
+  
+  cat("Age within temporalDO:\n")
+  print(dist_age_temporalDO)
+  cat("\n\n")
+  
+  cat("Age within perspective:\n")
+  print(dist_age_perspective)
+  cat("\n\n")
+  
+  cat("Sex within temporalDO:\n")
+  print(dist_sex_temporalDO)
+  cat("\n\n")
+  
+  cat("Sex within perspective:\n")
+  print(dist_sex_perspective)
+  cat("\n")
+  
+  sink()
+  cat("Distributions saved to:", doc_filename, "\n")
+}
--- a/eohi3/dataREVIEW-JAN21/eohi3_quotas.txt
+++ b/eohi3/dataREVIEW-JAN21/eohi3_quotas.txt
@ -0,0 +1,177 @@
+DISTRIBUTION REPORT
+
+==================
+
+
+SINGLE VARIABLE DISTRIBUTIONS
+
+------------------------------
+
+
+Distribution of taq_age:
+
+  taq_age  n
+1 18 - 24 73
+2 53 - 59 67
+3 60 - 66 67
+4 67 - 73 65
+5 39 - 45 64
+6 46 - 52 63
+7 25 - 31 62
+8 32 - 38 61
+
+
+
+Distribution of taq_sex:
+
+            taq_sex   n
+1            Female 260
+2              Male 257
+3 Prefer not to say   5
+
+
+
+Distribution of citizenship:
+
+  citizenship   n
+1    American 262
+2    Canadian 258
+3        Both   2
+
+
+
+Distribution of group:
+
+  group   n
+1 01FPV 177
+2 03VFP 174
+3 02PVF 171
+
+
+
+Distribution of temporalDO:
+
+  temporalDO   n
+1       past 262
+2     future 260
+
+
+
+Distribution of perspective:
+
+  perspective   n
+1       other 261
+2        self 261
+
+
+
+NESTED DISTRIBUTIONS
+
+---------------------
+
+
+Age within Citizenship:
+
+   citizenship taq_age  n
+1     American 18 - 24 38
+2     American 25 - 31 30
+3     American 32 - 38 29
+4     American 39 - 45 33
+5     American 46 - 52 31
+6     American 53 - 59 34
+7     American 60 - 66 34
+8     American 67 - 73 33
+9         Both 32 - 38  1
+10        Both 46 - 52  1
+11    Canadian 18 - 24 35
+12    Canadian 25 - 31 32
+13    Canadian 32 - 38 31
+14    Canadian 39 - 45 31
+15    Canadian 46 - 52 31
+16    Canadian 53 - 59 33
+17    Canadian 60 - 66 33
+18    Canadian 67 - 73 32
+
+
+
+Sex within Citizenship:
+
+  citizenship           taq_sex   n
+1    American            Female 130
+2    American              Male 129
+3    American Prefer not to say   3
+4        Both            Female   1
+5        Both              Male   1
+6    Canadian            Female 129
+7    Canadian              Male 127
+8    Canadian Prefer not to say   2
+
+
+
+Age within temporalDO:
+
+   temporalDO taq_age  n
+1      future 18 - 24 38
+2      future 25 - 31 31
+3      future 32 - 38 29
+4      future 39 - 45 34
+5      future 46 - 52 35
+6      future 53 - 59 36
+7      future 60 - 66 29
+8      future 67 - 73 28
+9        past 18 - 24 35
+10       past 25 - 31 31
+11       past 32 - 38 32
+12       past 39 - 45 30
+13       past 46 - 52 28
+14       past 53 - 59 31
+15       past 60 - 66 38
+16       past 67 - 73 37
+
+
+
+Age within perspective:
+
+   perspective taq_age  n
+1        other 18 - 24 41
+2        other 25 - 31 36
+3        other 32 - 38 28
+4        other 39 - 45 32
+5        other 46 - 52 28
+6        other 53 - 59 33
+7        other 60 - 66 30
+8        other 67 - 73 33
+9         self 18 - 24 32
+10        self 25 - 31 26
+11        self 32 - 38 33
+12        self 39 - 45 32
+13        self 46 - 52 35
+14        self 53 - 59 34
+15        self 60 - 66 37
+16        self 67 - 73 32
+
+
+
+Sex within temporalDO:
+
+  temporalDO           taq_sex   n
+1     future            Female 130
+2     future              Male 129
+3     future Prefer not to say   1
+4       past            Female 130
+5       past              Male 128
+6       past Prefer not to say   4
+
+
+
+Sex within perspective:
+
+  perspective           taq_sex   n
+1       other            Female 130
+2       other              Male 128
+3       other Prefer not to say   3
+4        self            Female 130
+5        self              Male 129
+6        self Prefer not to say   2
+
+
--- a/eohi3/dataREVIEW-JAN21/eohi3_raw.csv
+++ b/eohi3/dataREVIEW-JAN21/eohi3_raw.csv
--- a/eohi3/dataREVIEW-JAN21/eohi3_raw2.csv
+++ b/eohi3/dataREVIEW-JAN21/eohi3_raw2.csv
--- a/eohi3/dataREVIEW-JAN21/eohi3_unprocessed_final.csv
+++ b/eohi3/dataREVIEW-JAN21/eohi3_unprocessed_final.csv
--- a/eohi3/eohi3.csv
+++ b/eohi3/eohi3.csv
--- a/eohi3/test-DEC29/eohi3-test.csv
+++ b/eohi3/test-DEC29/eohi3-test.csv
--- a/review/Diener_lifeScale.pdf
+++ b/review/Diener_lifeScale.pdf
--- a/review/brietzke_ehi2.pdf
+++ b/review/brietzke_ehi2.pdf
--- a/review/carmen_ehi2.pdf
+++ b/review/carmen_ehi2.pdf
--- a/review/fleming_pro-retro.pdf
+++ b/review/fleming_pro-retro.pdf
--- a/review/guo_ehi2.pdf
+++ b/review/guo_ehi2.pdf
--- a/review/gutral_ehi2.pdf
+++ b/review/gutral_ehi2.pdf
--- a/review/haas_ehi1.pdf
+++ b/review/haas_ehi1.pdf
--- a/review/haddock_futResearch.pdf
+++ b/review/haddock_futResearch.pdf
--- a/review/harris_ehi1.pdf
+++ b/review/harris_ehi1.pdf
--- a/review/hershfield_FUTforecast.pdf
+++ b/review/hershfield_FUTforecast.pdf
--- a/review/lechner_valueScale.pdf
+++ b/review/lechner_valueScale.pdf
--- a/review/pdf_to_txt.py
+++ b/review/pdf_to_txt.py
@ -0,0 +1,128 @@
+#!/home/ladmin/miniconda3/envs/nlp/bin/python
+"""
+PDF to Text Converter
+Converts PDF files to plain text files.
+
+Usage:
+    python pdf_to_txt.py <input.pdf>                    # Creates input.txt
+    python pdf_to_txt.py <input.pdf> <output.txt>       # Custom output name
+    python pdf_to_txt.py --all                           # Convert all PDFs in current directory
+
+Requirements:
+    pip install pypdf
+"""
+
+import sys
+import os
+from pathlib import Path
+
+try:
+    from pypdf import PdfReader
+except ImportError:
+    print("Error: pypdf library not found.")
+    print("Please install it with: pip install pypdf")
+    sys.exit(1)
+
+
+def pdf_to_text(pdf_path, output_path=None):
+    """
+    Convert a PDF file to a text file.
+    
+    Args:
+        pdf_path: Path to the PDF file
+        output_path: Path to the output text file (optional)
+    
+    Returns:
+        True if successful, False otherwise
+    """
+    try:
+        # Convert to Path objects
+        pdf_path = Path(pdf_path)
+        
+        if not pdf_path.exists():
+            print(f"Error: File not found: {pdf_path}")
+            return False
+        
+        # Determine output path
+        if output_path is None:
+            output_path = pdf_path.with_suffix('.txt')
+        else:
+            output_path = Path(output_path)
+        
+        print(f"Converting: {pdf_path.name}")
+        
+        # Read the PDF
+        reader = PdfReader(str(pdf_path))
+        
+        # Extract text from all pages
+        text_content = []
+        for i, page in enumerate(reader.pages, 1):
+            text = page.extract_text()
+            if text:
+                text_content.append(f"--- Page {i} ---\n{text}\n")
+        
+        # Write to text file
+        full_text = "\n".join(text_content)
+        output_path.write_text(full_text, encoding='utf-8')
+        
+        print(f"✓ Created: {output_path.name} ({len(reader.pages)} pages, {len(full_text):,} characters)")
+        return True
+        
+    except Exception as e:
+        print(f"✗ Error processing {pdf_path.name}: {str(e)}")
+        return False
+
+
+def convert_all_pdfs():
+    """Convert all PDF files in the current directory to text files."""
+    current_dir = Path.cwd()
+    pdf_files = list(current_dir.glob("*.pdf"))
+    
+    if not pdf_files:
+        print("No PDF files found in the current directory.")
+        return
+    
+    print(f"Found {len(pdf_files)} PDF file(s) to convert.\n")
+    
+    successful = 0
+    failed = 0
+    
+    for pdf_file in pdf_files:
+        if pdf_to_text(pdf_file):
+            successful += 1
+        else:
+            failed += 1
+    
+    print(f"\n{'='*60}")
+    print(f"Conversion complete: {successful} successful, {failed} failed")
+
+
+def main():
+    if len(sys.argv) < 2:
+        print(__doc__)
+        sys.exit(1)
+    
+    # Convert all PDFs in directory
+    if sys.argv[1] == "--all":
+        convert_all_pdfs()
+    
+    # Convert single PDF
+    elif len(sys.argv) == 2:
+        pdf_path = sys.argv[1]
+        pdf_to_text(pdf_path)
+    
+    # Convert single PDF with custom output name
+    elif len(sys.argv) == 3:
+        pdf_path = sys.argv[1]
+        output_path = sys.argv[2]
+        pdf_to_text(pdf_path, output_path)
+    
+    else:
+        print("Error: Too many arguments")
+        print(__doc__)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
+
--- a/review/quoidbach.sm.pdf
+++ b/review/quoidbach.sm.pdf
--- a/review/quoidbach_ehi1.pdf
+++ b/review/quoidbach_ehi1.pdf
--- a/review/reiff_ehi2.pdf
+++ b/review/reiff_ehi2.pdf
--- a/review/rutt_ehi2.pdf
+++ b/review/rutt_ehi2.pdf
--- a/review/sachi_ehi1.pdf
+++ b/review/sachi_ehi1.pdf
--- a/review/siedlecka_pro-retro.pdf
+++ b/review/siedlecka_pro-retro.pdf
--- a/review/vanRyzin_ehi1.pdf
+++ b/review/vanRyzin_ehi1.pdf
--- a/review/verner_FUTforecast.pdf
+++ b/review/verner_FUTforecast.pdf
--- a/review/wilson_FUTforecast.pdf
+++ b/review/wilson_FUTforecast.pdf
--- a/review/yue_ehi2.pdf
+++ b/review/yue_ehi2.pdf
--- a/manuscript/EOHI
+++ b/manuscript/EOHI