eohi/eohi2/datap 16 - ehi vars standardized .r
2025-12-23 15:47:09 -05:00

100 lines
3.9 KiB
R

options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
df <- read.csv("eohi2.csv")
# Display means and standard deviations of non-standardized variables for manual checking
print(round(mean(df$ehiDGEN_5_mean, na.rm = TRUE), 5))
print(round(sd(df$ehiDGEN_5_mean, na.rm = TRUE), 5))
print(round(mean(df$ehiDGEN_10_mean, na.rm = TRUE), 5))
print(round(sd(df$ehiDGEN_10_mean, na.rm = TRUE), 5))
print(round(mean(df$ehi5_global_mean, na.rm = TRUE), 5))
print(round(sd(df$ehi5_global_mean, na.rm = TRUE), 5))
print(round(mean(df$ehi10_global_mean, na.rm = TRUE), 5))
print(round(sd(df$ehi10_global_mean, na.rm = TRUE), 5))
# Calculate means and standard deviations for standardization
mean_DGEN_5 <- mean(df$ehiDGEN_5_mean, na.rm = TRUE)
sd_DGEN_5 <- sd(df$ehiDGEN_5_mean, na.rm = TRUE)
mean_DGEN_10 <- mean(df$ehiDGEN_10_mean, na.rm = TRUE)
sd_DGEN_10 <- sd(df$ehiDGEN_10_mean, na.rm = TRUE)
mean_DS_5 <- mean(df$ehi5_global_mean, na.rm = TRUE)
sd_DS_5 <- sd(df$ehi5_global_mean, na.rm = TRUE)
mean_DS_10 <- mean(df$ehi10_global_mean, na.rm = TRUE)
sd_DS_10 <- sd(df$ehi10_global_mean, na.rm = TRUE)
# Create standardized variables
df$stdDGEN_5 <- (df$ehiDGEN_5_mean - mean_DGEN_5) / sd_DGEN_5
df$stdDGEN_10 <- (df$ehiDGEN_10_mean - mean_DGEN_10) / sd_DGEN_10
df$stdDS_5 <- (df$ehi5_global_mean - mean_DS_5) / sd_DS_5
df$stdDS_10 <- (df$ehi10_global_mean - mean_DS_10) / sd_DS_10
# Check that variables have been standardized
print(round(mean(df$stdDGEN_5, na.rm = TRUE), 5))
print(round(sd(df$stdDGEN_5, na.rm = TRUE), 5))
print(round(mean(df$stdDGEN_10, na.rm = TRUE), 5))
print(round(sd(df$stdDGEN_10, na.rm = TRUE), 5))
print(round(mean(df$stdDS_5, na.rm = TRUE), 5))
print(round(sd(df$stdDS_5, na.rm = TRUE), 5))
print(round(mean(df$stdDS_10, na.rm = TRUE), 5))
print(round(sd(df$stdDS_10, na.rm = TRUE), 5))
# Calculate mean of standardized variables
df$stdEHI_mean <- rowMeans(df[, c("stdDGEN_5", "stdDGEN_10", "stdDS_5", "stdDS_10")], na.rm = TRUE)
#### check random 10 rows
# Check 10 random rows to verify calculations
set.seed(123) # For reproducible random selection
random_rows <- sample(nrow(df), 10)
cat("Checking 10 random rows:\n")
cat("Row | ehiDGEN_5_mean | stdDGEN_5 | Calculation | ehiDGEN_10_mean | stdDGEN_10 | Calculation\n")
cat("----|----------------|-----------|-------------|-----------------|------------|------------\n")
for(i in random_rows) {
orig_5 <- df$ehiDGEN_5_mean[i]
std_5 <- df$stdDGEN_5[i]
calc_5 <- (orig_5 - mean_DGEN_5) / sd_DGEN_5
orig_10 <- df$ehiDGEN_10_mean[i]
std_10 <- df$stdDGEN_10[i]
calc_10 <- (orig_10 - mean_DGEN_10) / sd_DGEN_10
cat(sprintf("%3d | %13.5f | %9.5f | %11.5f | %15.5f | %10.5f | %11.5f\n",
i, orig_5, std_5, calc_5, orig_10, std_10, calc_10))
}
cat("\nRow | ehi5_global_mean | stdDS_5 | Calculation | ehi10_global_mean | stdDS_10 | Calculation\n")
cat("----|------------------|---------|-------------|-------------------|----------|------------\n")
for(i in random_rows) {
orig_5 <- df$ehi5_global_mean[i]
std_5 <- df$stdDS_5[i]
calc_5 <- (orig_5 - mean_DS_5) / sd_DS_5
orig_10 <- df$ehi10_global_mean[i]
std_10 <- df$stdDS_10[i]
calc_10 <- (orig_10 - mean_DS_10) / sd_DS_10
cat(sprintf("%3d | %16.5f | %8.5f | %11.5f | %17.5f | %9.5f | %11.5f\n",
i, orig_5, std_5, calc_5, orig_10, std_10, calc_10))
}
# Show the final stdEHI_mean for these rows
cat("\nRow | stdEHI_mean | Manual calc\n")
cat("----|-------------|------------\n")
for(i in random_rows) {
manual_mean <- -0.042564413 -0.158849227 -1.444812436 -0.23426232 -0.470122099
mean(c(df$stdDGEN_5[i], df$stdDGEN_10[i], df$stdDS_5[i], df$stdDS_10[i]), na.rm = TRUE)
cat(sprintf("%3d | %11.5f | %11.5f\n", i, df$stdEHI_mean[i], manual_mean))
}
# Write to CSV
write.csv(df, "eohi2.csv", row.names = FALSE)