options(scipen = 999) library(dplyr) setwd("C:/Users/irina/Documents/DND/EOHI/eohi2") df <- read.csv("eohi2.csv") # Display means and standard deviations of non-standardized variables for manual checking print(round(mean(df$ehiDGEN_5_mean, na.rm = TRUE), 5)) print(round(sd(df$ehiDGEN_5_mean, na.rm = TRUE), 5)) print(round(mean(df$ehiDGEN_10_mean, na.rm = TRUE), 5)) print(round(sd(df$ehiDGEN_10_mean, na.rm = TRUE), 5)) print(round(mean(df$ehi5_global_mean, na.rm = TRUE), 5)) print(round(sd(df$ehi5_global_mean, na.rm = TRUE), 5)) print(round(mean(df$ehi10_global_mean, na.rm = TRUE), 5)) print(round(sd(df$ehi10_global_mean, na.rm = TRUE), 5)) # Calculate means and standard deviations for standardization mean_DGEN_5 <- mean(df$ehiDGEN_5_mean, na.rm = TRUE) sd_DGEN_5 <- sd(df$ehiDGEN_5_mean, na.rm = TRUE) mean_DGEN_10 <- mean(df$ehiDGEN_10_mean, na.rm = TRUE) sd_DGEN_10 <- sd(df$ehiDGEN_10_mean, na.rm = TRUE) mean_DS_5 <- mean(df$ehi5_global_mean, na.rm = TRUE) sd_DS_5 <- sd(df$ehi5_global_mean, na.rm = TRUE) mean_DS_10 <- mean(df$ehi10_global_mean, na.rm = TRUE) sd_DS_10 <- sd(df$ehi10_global_mean, na.rm = TRUE) # Create standardized variables df$stdDGEN_5 <- (df$ehiDGEN_5_mean - mean_DGEN_5) / sd_DGEN_5 df$stdDGEN_10 <- (df$ehiDGEN_10_mean - mean_DGEN_10) / sd_DGEN_10 df$stdDS_5 <- (df$ehi5_global_mean - mean_DS_5) / sd_DS_5 df$stdDS_10 <- (df$ehi10_global_mean - mean_DS_10) / sd_DS_10 # Check that variables have been standardized print(round(mean(df$stdDGEN_5, na.rm = TRUE), 5)) print(round(sd(df$stdDGEN_5, na.rm = TRUE), 5)) print(round(mean(df$stdDGEN_10, na.rm = TRUE), 5)) print(round(sd(df$stdDGEN_10, na.rm = TRUE), 5)) print(round(mean(df$stdDS_5, na.rm = TRUE), 5)) print(round(sd(df$stdDS_5, na.rm = TRUE), 5)) print(round(mean(df$stdDS_10, na.rm = TRUE), 5)) print(round(sd(df$stdDS_10, na.rm = TRUE), 5)) # Calculate mean of standardized variables df$stdEHI_mean <- rowMeans(df[, c("stdDGEN_5", "stdDGEN_10", "stdDS_5", "stdDS_10")], na.rm = TRUE) #### check random 10 rows # Check 10 random rows to verify calculations set.seed(123) # For reproducible random selection random_rows <- sample(nrow(df), 10) cat("Checking 10 random rows:\n") cat("Row | ehiDGEN_5_mean | stdDGEN_5 | Calculation | ehiDGEN_10_mean | stdDGEN_10 | Calculation\n") cat("----|----------------|-----------|-------------|-----------------|------------|------------\n") for(i in random_rows) { orig_5 <- df$ehiDGEN_5_mean[i] std_5 <- df$stdDGEN_5[i] calc_5 <- (orig_5 - mean_DGEN_5) / sd_DGEN_5 orig_10 <- df$ehiDGEN_10_mean[i] std_10 <- df$stdDGEN_10[i] calc_10 <- (orig_10 - mean_DGEN_10) / sd_DGEN_10 cat(sprintf("%3d | %13.5f | %9.5f | %11.5f | %15.5f | %10.5f | %11.5f\n", i, orig_5, std_5, calc_5, orig_10, std_10, calc_10)) } cat("\nRow | ehi5_global_mean | stdDS_5 | Calculation | ehi10_global_mean | stdDS_10 | Calculation\n") cat("----|------------------|---------|-------------|-------------------|----------|------------\n") for(i in random_rows) { orig_5 <- df$ehi5_global_mean[i] std_5 <- df$stdDS_5[i] calc_5 <- (orig_5 - mean_DS_5) / sd_DS_5 orig_10 <- df$ehi10_global_mean[i] std_10 <- df$stdDS_10[i] calc_10 <- (orig_10 - mean_DS_10) / sd_DS_10 cat(sprintf("%3d | %16.5f | %8.5f | %11.5f | %17.5f | %9.5f | %11.5f\n", i, orig_5, std_5, calc_5, orig_10, std_10, calc_10)) } # Show the final stdEHI_mean for these rows cat("\nRow | stdEHI_mean | Manual calc\n") cat("----|-------------|------------\n") for(i in random_rows) { manual_mean <- -0.042564413 -0.158849227 -1.444812436 -0.23426232 -0.470122099 mean(c(df$stdDGEN_5[i], df$stdDGEN_10[i], df$stdDS_5[i], df$stdDS_10[i]), na.rm = TRUE) cat(sprintf("%3d | %11.5f | %11.5f\n", i, df$stdEHI_mean[i], manual_mean)) } # Write to CSV write.csv(df, "eohi2.csv", row.names = FALSE)