100 lines
3.9 KiB
R
100 lines
3.9 KiB
R
options(scipen = 999)
|
|
|
|
library(dplyr)
|
|
|
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
|
|
|
|
df <- read.csv("eohi2.csv")
|
|
|
|
# Display means and standard deviations of non-standardized variables for manual checking
|
|
print(round(mean(df$ehiDGEN_5_mean, na.rm = TRUE), 5))
|
|
print(round(sd(df$ehiDGEN_5_mean, na.rm = TRUE), 5))
|
|
print(round(mean(df$ehiDGEN_10_mean, na.rm = TRUE), 5))
|
|
print(round(sd(df$ehiDGEN_10_mean, na.rm = TRUE), 5))
|
|
print(round(mean(df$ehi5_global_mean, na.rm = TRUE), 5))
|
|
print(round(sd(df$ehi5_global_mean, na.rm = TRUE), 5))
|
|
print(round(mean(df$ehi10_global_mean, na.rm = TRUE), 5))
|
|
print(round(sd(df$ehi10_global_mean, na.rm = TRUE), 5))
|
|
|
|
# Calculate means and standard deviations for standardization
|
|
mean_DGEN_5 <- mean(df$ehiDGEN_5_mean, na.rm = TRUE)
|
|
sd_DGEN_5 <- sd(df$ehiDGEN_5_mean, na.rm = TRUE)
|
|
|
|
mean_DGEN_10 <- mean(df$ehiDGEN_10_mean, na.rm = TRUE)
|
|
sd_DGEN_10 <- sd(df$ehiDGEN_10_mean, na.rm = TRUE)
|
|
|
|
mean_DS_5 <- mean(df$ehi5_global_mean, na.rm = TRUE)
|
|
sd_DS_5 <- sd(df$ehi5_global_mean, na.rm = TRUE)
|
|
|
|
mean_DS_10 <- mean(df$ehi10_global_mean, na.rm = TRUE)
|
|
sd_DS_10 <- sd(df$ehi10_global_mean, na.rm = TRUE)
|
|
|
|
# Create standardized variables
|
|
df$stdDGEN_5 <- (df$ehiDGEN_5_mean - mean_DGEN_5) / sd_DGEN_5
|
|
df$stdDGEN_10 <- (df$ehiDGEN_10_mean - mean_DGEN_10) / sd_DGEN_10
|
|
df$stdDS_5 <- (df$ehi5_global_mean - mean_DS_5) / sd_DS_5
|
|
df$stdDS_10 <- (df$ehi10_global_mean - mean_DS_10) / sd_DS_10
|
|
|
|
# Check that variables have been standardized
|
|
print(round(mean(df$stdDGEN_5, na.rm = TRUE), 5))
|
|
print(round(sd(df$stdDGEN_5, na.rm = TRUE), 5))
|
|
print(round(mean(df$stdDGEN_10, na.rm = TRUE), 5))
|
|
print(round(sd(df$stdDGEN_10, na.rm = TRUE), 5))
|
|
print(round(mean(df$stdDS_5, na.rm = TRUE), 5))
|
|
print(round(sd(df$stdDS_5, na.rm = TRUE), 5))
|
|
print(round(mean(df$stdDS_10, na.rm = TRUE), 5))
|
|
print(round(sd(df$stdDS_10, na.rm = TRUE), 5))
|
|
|
|
# Calculate mean of standardized variables
|
|
df$stdEHI_mean <- rowMeans(df[, c("stdDGEN_5", "stdDGEN_10", "stdDS_5", "stdDS_10")], na.rm = TRUE)
|
|
|
|
#### check random 10 rows
|
|
|
|
# Check 10 random rows to verify calculations
|
|
set.seed(123) # For reproducible random selection
|
|
random_rows <- sample(nrow(df), 10)
|
|
|
|
cat("Checking 10 random rows:\n")
|
|
cat("Row | ehiDGEN_5_mean | stdDGEN_5 | Calculation | ehiDGEN_10_mean | stdDGEN_10 | Calculation\n")
|
|
cat("----|----------------|-----------|-------------|-----------------|------------|------------\n")
|
|
|
|
for(i in random_rows) {
|
|
orig_5 <- df$ehiDGEN_5_mean[i]
|
|
std_5 <- df$stdDGEN_5[i]
|
|
calc_5 <- (orig_5 - mean_DGEN_5) / sd_DGEN_5
|
|
|
|
orig_10 <- df$ehiDGEN_10_mean[i]
|
|
std_10 <- df$stdDGEN_10[i]
|
|
calc_10 <- (orig_10 - mean_DGEN_10) / sd_DGEN_10
|
|
|
|
cat(sprintf("%3d | %13.5f | %9.5f | %11.5f | %15.5f | %10.5f | %11.5f\n",
|
|
i, orig_5, std_5, calc_5, orig_10, std_10, calc_10))
|
|
}
|
|
|
|
cat("\nRow | ehi5_global_mean | stdDS_5 | Calculation | ehi10_global_mean | stdDS_10 | Calculation\n")
|
|
cat("----|------------------|---------|-------------|-------------------|----------|------------\n")
|
|
|
|
for(i in random_rows) {
|
|
orig_5 <- df$ehi5_global_mean[i]
|
|
std_5 <- df$stdDS_5[i]
|
|
calc_5 <- (orig_5 - mean_DS_5) / sd_DS_5
|
|
|
|
orig_10 <- df$ehi10_global_mean[i]
|
|
std_10 <- df$stdDS_10[i]
|
|
calc_10 <- (orig_10 - mean_DS_10) / sd_DS_10
|
|
|
|
cat(sprintf("%3d | %16.5f | %8.5f | %11.5f | %17.5f | %9.5f | %11.5f\n",
|
|
i, orig_5, std_5, calc_5, orig_10, std_10, calc_10))
|
|
}
|
|
|
|
# Show the final stdEHI_mean for these rows
|
|
cat("\nRow | stdEHI_mean | Manual calc\n")
|
|
cat("----|-------------|------------\n")
|
|
for(i in random_rows) {
|
|
manual_mean <- -0.042564413 -0.158849227 -1.444812436 -0.23426232 -0.470122099
|
|
mean(c(df$stdDGEN_5[i], df$stdDGEN_10[i], df$stdDS_5[i], df$stdDS_10[i]), na.rm = TRUE)
|
|
cat(sprintf("%3d | %11.5f | %11.5f\n", i, df$stdEHI_mean[i], manual_mean))
|
|
}
|
|
|
|
# Write to CSV
|
|
write.csv(df, "eohi2.csv", row.names = FALSE) |