diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..bf21dfc
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,261 @@
+# HiLiteHero - PDF Highlight Extractor Makefile
+# Description: Makefile for easy development, testing, and deployment
+
+# Variables
+PYTHON := python3
+PIP := pip3
+VENV := venv
+VENV_BIN := $(VENV)/bin
+VENV_PYTHON := $(VENV_BIN)/python
+VENV_PIP := $(VENV_BIN)/pip
+MAIN_SCRIPT := main.py
+TEST_PDF := test/test2.pdf
+REQUIREMENTS := requirements.txt
+
+# Colors for output
+RED := \033[0;31m
+GREEN := \033[0;32m
+YELLOW := \033[0;33m
+BLUE := \033[0;34m
+PURPLE := \033[0;35m
+CYAN := \033[0;36m
+WHITE := \033[0;37m
+NC := \033[0m # No Color
+
+# Helper function to get the right Python executable
+define get_python
+$(if $(wildcard $(VENV_PYTHON)),$(VENV_PYTHON),$(PYTHON))
+endef
+
+# Default target
+.PHONY: help
+help: ## Show this help message
+	@echo "$(CYAN)HiLiteHero - PDF Highlight Extractor$(NC)"
+	@echo "$(YELLOW)Available targets:$(NC)"
+	@awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf "  $(GREEN)%-15s$(NC) %s\n", $$1, $$2}' $(MAKEFILE_LIST)
+
+# Installation targets
+.PHONY: install
+install: venv-install ## Install dependencies (creates venv if needed)
+	@echo "$(GREEN)Dependencies installed successfully!$(NC)"
+
+.PHONY: install-system
+install-system: ## Install dependencies system-wide (may require --break-system-packages)
+	@echo "$(YELLOW)Warning: Installing system-wide packages$(NC)"
+	@echo "$(BLUE)Installing dependencies...$(NC)"
+	$(PIP) install -r $(REQUIREMENTS) --break-system-packages
+	@echo "$(GREEN)Dependencies installed successfully!$(NC)"
+
+.PHONY: install-dev
+install-dev: venv-install ## Install development dependencies in virtual environment
+	@echo "$(BLUE)Installing development dependencies...$(NC)"
+	$(VENV_PIP) install black flake8 pytest pytest-cov
+	@echo "$(GREEN)Development dependencies installed!$(NC)"
+
+.PHONY: venv
+venv: ## Create virtual environment
+	@echo "$(BLUE)Creating virtual environment...$(NC)"
+	$(PYTHON) -m venv $(VENV)
+	@echo "$(GREEN)Virtual environment created!$(NC)"
+	@echo "$(YELLOW)To activate: source $(VENV)/bin/activate$(NC)"
+
+.PHONY: venv-install
+venv-install: venv ## Create venv and install dependencies
+	@echo "$(BLUE)Installing dependencies in virtual environment...$(NC)"
+	$(VENV_PIP) install --upgrade pip
+	$(VENV_PIP) install -r $(REQUIREMENTS)
+	@echo "$(GREEN)Virtual environment setup complete!$(NC)"
+	@echo "$(YELLOW)To activate: source $(VENV)/bin/activate$(NC)"
+
+# Testing targets
+.PHONY: test
+test: ## Run test mode with default PDF
+	@echo "$(BLUE)Running test mode...$(NC)"
+	$(call get_python) $(MAIN_SCRIPT) --test
+	@echo "$(GREEN)Test completed!$(NC)"
+
+.PHONY: test-interactive
+test-interactive: ## Run test mode with interactive review
+	@echo "$(BLUE)Running test mode with interactive review...$(NC)"
+	$(call get_python) $(MAIN_SCRIPT) --test --interactive
+
+.PHONY: test-debug
+test-debug: ## Run test mode with debug output
+	@echo "$(BLUE)Running test mode with debug output...$(NC)"
+	$(call get_python) $(MAIN_SCRIPT) --test --debug
+
+.PHONY: test-silent
+test-silent: ## Run test mode silently (minimal output)
+	@echo "$(BLUE)Running test mode silently...$(NC)"
+	$(call get_python) $(MAIN_SCRIPT) --test --silent
+	@echo "$(GREEN)Silent test completed!$(NC)"
+
+.PHONY: test-custom
+test-custom: ## Run test with custom output file
+	@echo "$(BLUE)Running test with custom output...$(NC)"
+	$(call get_python) $(MAIN_SCRIPT) --test --output-json test_results.json
+	@echo "$(GREEN)Test results saved to test_results.json$(NC)"
+
+# Development targets
+.PHONY: dev
+dev: ## Run in development mode (interactive with debug)
+	@echo "$(BLUE)Starting development mode...$(NC)"
+	$(call get_python) $(MAIN_SCRIPT) --debug --interactive
+
+.PHONY: run
+run: ## Run the script interactively
+	@echo "$(BLUE)Starting interactive mode...$(NC)"
+	$(call get_python) $(MAIN_SCRIPT)
+
+.PHONY: run-file
+run-file: ## Run with a specific PDF file (usage: make run-file FILE=path/to/file.pdf)
+	@if [ -z "$(FILE)" ]; then \
+		echo "$(RED)Error: Please specify FILE=path/to/file.pdf$(NC)"; \
+		echo "$(YELLOW)Example: make run-file FILE=document.pdf$(NC)"; \
+		exit 1; \
+	fi
+	@echo "$(BLUE)Processing $(FILE)...$(NC)"
+	$(call get_python) $(MAIN_SCRIPT) "$(FILE)"
+
+.PHONY: run-pages
+run-pages: ## Run with specific pages (usage: make run-pages FILE=doc.pdf PAGES="1,3-5")
+	@if [ -z "$(FILE)" ] || [ -z "$(PAGES)" ]; then \
+		echo "$(RED)Error: Please specify FILE and PAGES$(NC)"; \
+		echo "$(YELLOW)Example: make run-pages FILE=document.pdf PAGES=\"1,3-5\"$(NC)"; \
+		exit 1; \
+	fi
+	@echo "$(BLUE)Processing pages $(PAGES) of $(FILE)...$(NC)"
+	$(call get_python) $(MAIN_SCRIPT) "$(FILE)" --pages "$(PAGES)"
+
+# Code quality targets
+.PHONY: format
+format: ## Format code with black
+	@echo "$(BLUE)Formatting code with black...$(NC)"
+	$(call get_python) -m black $(MAIN_SCRIPT)
+	@echo "$(GREEN)Code formatted!$(NC)"
+
+.PHONY: lint
+lint: ## Lint code with flake8
+	@echo "$(BLUE)Linting code with flake8...$(NC)"
+	$(call get_python) -m flake8 $(MAIN_SCRIPT) --max-line-length=120 --ignore=E203,W503
+	@echo "$(GREEN)Linting completed!$(NC)"
+
+.PHONY: check
+check: lint ## Run all code quality checks
+	@echo "$(GREEN)All checks passed!$(NC)"
+
+# Utility targets
+.PHONY: clean
+clean: ## Clean up generated files
+	@echo "$(BLUE)Cleaning up generated files...$(NC)"
+	rm -f *.json
+	rm -f test_results.json
+	rm -rf pdf_page_images/
+	rm -rf __pycache__/
+	rm -rf .pytest_cache/
+	rm -rf *.pyc
+	@echo "$(GREEN)Cleanup completed!$(NC)"
+
+.PHONY: clean-venv
+clean-venv: ## Remove virtual environment
+	@echo "$(BLUE)Removing virtual environment...$(NC)"
+	rm -rf $(VENV)
+	@echo "$(GREEN)Virtual environment removed!$(NC)"
+
+.PHONY: clean-all
+clean-all: clean clean-venv ## Clean everything including virtual environment
+	@echo "$(GREEN)Complete cleanup finished!$(NC)"
+
+.PHONY: status
+status: ## Show project status
+	@echo "$(CYAN)=== HiLiteHero Project Status ===$(NC)"
+	@echo "$(YELLOW)Python version:$(NC) $$(python3 --version 2>/dev/null || echo 'Not found')"
+	@echo "$(YELLOW)Pip version:$(NC) $$(pip3 --version 2>/dev/null || echo 'Not found')"
+	@echo "$(YELLOW)Virtual environment:$(NC) $$(if [ -d $(VENV) ]; then echo 'Exists'; else echo 'Not created'; fi)"
+	@echo "$(YELLOW)Dependencies installed:$(NC) $$(pip3 list | grep -q PyMuPDF && echo 'Yes' || echo 'No')"
+	@echo "$(YELLOW)Test PDF exists:$(NC) $$(if [ -f $(TEST_PDF) ]; then echo 'Yes'; else echo 'No'; fi)"
+	@echo "$(YELLOW)Generated files:$(NC) $$(ls -1 *.json 2>/dev/null | wc -l) JSON files"
+
+# Documentation targets
+.PHONY: docs
+docs: ## Show documentation
+	@echo "$(CYAN)=== HiLiteHero Documentation ===$(NC)"
+	@echo "$(YELLOW)Main script:$(NC) $(MAIN_SCRIPT)"
+	@echo "$(YELLOW)Test PDF:$(NC) $(TEST_PDF)"
+	@echo "$(YELLOW)Requirements:$(NC) $(REQUIREMENTS)"
+	@echo ""
+	@echo "$(YELLOW)Quick start:$(NC)"
+	@echo "  make test          # Run test mode"
+	@echo "  make run           # Interactive mode"
+	@echo "  make dev           # Development mode"
+	@echo ""
+	@echo "$(YELLOW)For more help:$(NC) make help"
+
+# Batch processing targets
+.PHONY: batch
+batch: ## Run in batch mode (silent with auto-save)
+	@echo "$(BLUE)Running in batch mode...$(NC)"
+	$(call get_python) $(MAIN_SCRIPT) --silent --output-json batch_results_$(shell date +%Y%m%d_%H%M%S).json
+	@echo "$(GREEN)Batch processing completed!$(NC)"
+
+.PHONY: batch-file
+batch-file: ## Batch process specific file (usage: make batch-file FILE=doc.pdf)
+	@if [ -z "$(FILE)" ]; then \
+		echo "$(RED)Error: Please specify FILE=path/to/file.pdf$(NC)"; \
+		exit 1; \
+	fi
+	@echo "$(BLUE)Batch processing $(FILE)...$(NC)"
+	$(call get_python) $(MAIN_SCRIPT) "$(FILE)" --silent --output-json "$(shell basename "$(FILE)" .pdf)_batch_$(shell date +%Y%m%d_%H%M%S).json"
+	@echo "$(GREEN)Batch processing completed!$(NC)"
+
+.PHONY: batch-all
+batch-all: ## Process all PDFs in test folder
+	@echo "$(BLUE)Processing all PDFs in test folder...$(NC)"
+	@if [ ! -d "test" ]; then \
+		echo "$(RED)Error: test folder not found$(NC)"; \
+		exit 1; \
+	fi
+	@pdf_count=0; \
+	for pdf in test/*.pdf; do \
+		if [ -f "$$pdf" ]; then \
+			pdf_count=$$((pdf_count + 1)); \
+			echo "$(CYAN)Processing $$pdf...$(NC)"; \
+			$(call get_python) $(MAIN_SCRIPT) "$$pdf" --silent --output-json "$$(basename "$$pdf" .pdf)_batch_$(shell date +%Y%m%d_%H%M%S).json"; \
+		fi; \
+	done; \
+	if [ $$pdf_count -eq 0 ]; then \
+		echo "$(YELLOW)No PDF files found in test folder$(NC)"; \
+	else \
+		echo "$(GREEN)Processed $$pdf_count PDF file(s) successfully!$(NC)"; \
+	fi
+
+# Installation verification
+.PHONY: verify
+verify: ## Verify installation
+	@echo "$(BLUE)Verifying installation...$(NC)"
+	@if [ -f $(VENV_PYTHON) ]; then \
+		echo "$(CYAN)Checking virtual environment...$(NC)"; \
+		$(VENV_PYTHON) -c "import fitz, colorama; print('$(GREEN)Virtual env dependencies OK$(NC)')" || (echo "$(RED)Virtual env dependencies missing$(NC)" && exit 1); \
+	else \
+		echo "$(YELLOW)Checking system Python...$(NC)"; \
+		$(PYTHON) -c "import fitz, colorama; print('$(GREEN)System dependencies OK$(NC)')" || (echo "$(RED)System dependencies missing$(NC)" && exit 1); \
+	fi
+	@if [ -f $(MAIN_SCRIPT) ]; then echo "$(GREEN)Main script found$(NC)"; else echo "$(RED)Main script missing$(NC)" && exit 1; fi
+	@echo "$(GREEN)Installation verified!$(NC)"
+
+# Quick development workflow
+.PHONY: quick-dev
+quick-dev: clean test ## Quick development workflow (clean + test)
+	@echo "$(GREEN)Quick development cycle completed!$(NC)"
+
+# Show available PDF files
+.PHONY: list-pdfs
+list-pdfs: ## List available PDF files in project
+	@echo "$(CYAN)Available PDF files:$(NC)"
+	@find . -name "*.pdf" -type f 2>/dev/null | head -10 || echo "$(YELLOW)No PDF files found$(NC)"
+
+# Show recent JSON outputs
+.PHONY: list-outputs
+list-outputs: ## List recent JSON output files
+	@echo "$(CYAN)Recent JSON outputs:$(NC)"
+	@ls -lt *.json 2>/dev/null | head -5 || echo "$(YELLOW)No JSON output files found$(NC)"
diff --git a/README.md b/README.md
index cff2ea6..18e1f46 100644
--- a/README.md
+++ b/README.md
@@ -15,12 +15,124 @@ A Python tool for extracting highlighted text from PDF files with precise text o
 
 ## Installation
 
-Clone the repository:
-git clone <repository-url>
-cd pdf-highlight-extractor
+### Prerequisites
+- Python 3.7 or higher
+- pip package manager
 
-Install required packages:
-pip install PyMuPDF pdfplumber colorama pandas
+### Quick Installation
+
+1. **Clone the repository:**
+   ```bash
+   git clone <repository-url>
+   cd HiLiteHero
+   ```
+
+2. **Install dependencies:**
+   ```bash
+   pip install -r requirements.txt
+   ```
+
+   Or install manually:
+   ```bash
+   pip install PyMuPDF colorama
+   ```
+
+### Alternative Installation Methods
+
+**Using virtual environment (recommended):**
+```bash
+python -m venv venv
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+pip install -r requirements.txt
+```
+
+**Using conda:**
+```bash
+conda create -n hilitehero python=3.9
+conda activate hilitehero
+pip install -r requirements.txt
+```
+
+### Verify Installation
+```bash
+python main.py --test
+```
+This should process the default test file and create a JSON output file.
+
+## Quick Start with Makefile
+
+The project includes a comprehensive Makefile for easy development and testing:
+
+### Essential Commands
+
+```bash
+# Show all available commands
+make help
+
+# Quick test (recommended first run)
+make test
+
+# Interactive mode
+make run
+
+# Development mode (debug + interactive)
+make dev
+
+# Install dependencies
+make install
+
+# Clean up generated files
+make clean
+```
+
+### Common Workflows
+
+**First-time setup:**
+```bash
+make install    # Install dependencies
+make test       # Verify everything works
+```
+
+**Development workflow:**
+```bash
+make dev        # Start development mode
+make clean      # Clean up when done
+```
+
+**Batch processing:**
+```bash
+make batch      # Process default file silently
+make batch-file FILE=document.pdf  # Process specific file
+```
+
+**Code quality:**
+```bash
+make format     # Format code
+make lint       # Check code quality
+make check      # Run all checks
+```
+
+### Advanced Makefile Usage
+
+**Process specific pages:**
+```bash
+make run-pages FILE=document.pdf PAGES="1,3-5"
+```
+
+**Test different modes:**
+```bash
+make test-interactive  # Test with interactive review
+make test-debug        # Test with debug output
+make test-silent       # Test silently
+```
+
+**Project management:**
+```bash
+make status      # Show project status
+make docs        # Show documentation
+make list-pdfs   # List available PDF files
+make list-outputs # Show recent outputs
+```
 
 
 ## Dependencies
@@ -32,19 +144,76 @@ pip install PyMuPDF pdfplumber colorama pandas
 
 ## Usage
 
-### Quick Test Mode
-python highlight_extractor.py --test
+### Quick Start
 
-Uses default file: `/mnt/c/Users/admin/Downloads/test2.pdf` and displays results only.
+**Test Mode (Recommended for first-time users):**
+```bash
+python main.py --test
+```
+Uses default test file and automatically saves results to JSON.
 
-### Interactive Mode
-python highlight_extractor.py
+**Interactive Mode:**
+```bash
+python main.py
+```
+Prompts for PDF file path and provides interactive review options.
 
-Prompts for PDF file path and output options.
+**Process Specific PDF:**
+```bash
+python main.py path/to/your/document.pdf
+```
 
-### Command Line Flags
-- `--test`, `-t`, or `test` - Enable test mode with defaults
-- No flags - Full interactive mode
+### Command Line Options
+
+| Flag | Description | Example |
+|------|-------------|---------|
+| `--test`, `-t` | Test mode with default settings | `python main.py -t` |
+| `--interactive`, `-i` | Enable interactive review mode | `python main.py -i document.pdf` |
+| `--pages`, `-p` | Process specific pages | `python main.py -p "1,3-5" doc.pdf` |
+| `--silent`, `-s` | Minimal output, auto-save JSON | `python main.py -s` |
+| `--debug`, `-d` | Enable detailed debug output | `python main.py -d document.pdf` |
+| `--output-json` | Custom JSON output path | `python main.py --output-json results.json` |
+
+### Usage Examples
+
+**Basic extraction:**
+```bash
+python main.py document.pdf
+```
+
+**Process specific pages with interactive review:**
+```bash
+python main.py document.pdf -p "1,5-7" -i
+```
+
+**Silent mode for batch processing:**
+```bash
+python main.py document.pdf -s --output-json batch_results.json
+```
+
+**Debug mode for troubleshooting:**
+```bash
+python main.py document.pdf -d
+```
+
+**Test with custom output:**
+```bash
+python main.py -t --output-json test_results.json
+```
+
+### Interactive Review Mode
+
+When using `-i` flag, you can:
+- **[N]ext** - Move to next highlight
+- **[P]rev** - Move to previous highlight  
+- **[U]p** - Move highlight up in order
+- **[M]ove Down** - Move highlight down in order
+- **[C]olor** - Change highlight color classification
+- **[E]dit** - Edit highlight text
+- **[D]elete** - Remove highlight
+- **[O]pen Img** - View page image
+- **[S]ave&Exit** - Save changes and exit
+- **[Q]uit** - Quit without saving
 
 ## Output Formats
 
@@ -97,9 +266,34 @@ Tabular format with columns: page, text, color, type, category
 
 **Over-extraction**: The tool is designed to avoid this, but very close text might be included. Check highlight precision in your PDF.
 
+**Installation Issues**: 
+- Ensure Python 3.7+ is installed
+- Try using virtual environment: `make venv-install`
+- Check dependencies: `make verify`
+
+**Permission Errors**:
+- On Linux/Mac: Ensure PDF files are readable
+- On Windows: Run as administrator if needed
+
 ### Debug Output
 Run with detailed logging to see extraction decisions:
-python highlight_extractor.py --test
+```bash
+python main.py --test --debug
+# or
+make test-debug
+```
+
+### Getting Help
+```bash
+# Show all available commands
+make help
+
+# Check project status
+make status
+
+# Verify installation
+make verify
+```
 
 ## Contributing
 
diff --git a/main.py b/main.py
index 055401d..f9b5f47 100644
--- a/main.py
+++ b/main.py
@@ -1,753 +1,1427 @@
-#!/usr/bin/env python3
-# =============================================================================
-# ENHANCED PDF HIGHLIGHT EXTRACTOR
-# Author: Perplexity AI Companion (Updated by User Feedback)
-# Date: June 3, 2025
-# License: MIT
-#
-# Extracts highlights from PDF files, with options for interactive review,
-# detailed output, text cleaning, JSON export, and page image viewing.
-# =============================================================================
-
-import time
-import os
-import fitz  # PyMuPDF
-import json
-from colorama import init, Fore, Back, Style
-from pathlib import Path
-import re
-import string
-import sys
-import traceback
-import argparse
-import difflib # For text difference calculation
-import tempfile # For temporary image files
-import webbrowser # For opening images/PDFs
-import uuid # For unique filenames
-
-# Attempt to import readline for better input() experience on some systems
-try:
-    import readline
-    READLINE_AVAILABLE = True
-except ImportError:
-    READLINE_AVAILABLE = False # readline not available
-
-# =============================================================================
-# GLOBAL CONFIGURATION FLAGS (Defaults, can be overridden by CLI args)
-# =============================================================================
-DEFAULT_PDF_PATH = "/mnt/c/Users/admin/Downloads/test2.pdf" # Example, adjust if needed
-DEFAULT_PAGES_TO_PROCESS = "3" # Example: "1,3-5,all"
-
-# Default Behavior flags (can be influenced by -d or -s CLI flags)
-# These are used to initialize effective_run_args
-# Keep these distinct from the effective_run_args object itself
-INITIAL_SHOW_TIMING = True
-INITIAL_SHOW_PROGRESS = True
-INITIAL_SHOW_RAW_SEGMENTS = True
-INITIAL_SHOW_EXTRACTION_DETAILS = True
-INITIAL_SHOW_RECT_DETAILS = True
-INITIAL_SHOW_DIFF_PERCENTAGE = True
-INITIAL_CLEAN_EDGES = True
-
-# Text extraction parameters (generally fixed)
-TEXT_EXTRACTION_HORIZONTAL_PADDING = 6.0
-TEXT_EXTRACTION_VERTICAL_PADDING = 1.0
-
-# Edge cleaning configuration (generally fixed)
-VALID_TWO_LETTER_WORDS = {
-    'am', 'an', 'as', 'at', 'be', 'by', 'do', 'go', 'he', 'if', 'in', 'is', 'it', 'me', 'my',
-    'no', 'of', 'on', 'or', 'ox', 'so', 'to', 'up', 'us', 'we'}
-VALID_SINGLE_LETTERS = {'i', 'a'}
-
-# Image handling configuration
-IMAGE_FOLDER_PATH = 'pdf_page_images'  # Relative to CWD by default
-CLEAR_IMAGE_FOLDER_ON_START = True
-CLEAR_IMAGE_FOLDER_ON_END = False
-
-# Initialize colorama
-init(autoreset=True)
-
-# --- Helper Functions ---
-def get_text_diff_ratio(text1, text2):
-    if not text1 and not text2: return 1.0
-    if not text1 or not text2: return 0.0
-    return difflib.SequenceMatcher(None, str(text1), str(text2)).ratio()
-
-def clean_segment_edges_func(text_to_clean, clean_edges_setting):
-    if not clean_edges_setting or not text_to_clean: return text_to_clean
-    text_to_clean = re.sub(r'\s+', ' ', text_to_clean.strip())
-    words = text_to_clean.split()
-    if not words: return text_to_clean
-    
-    current_idx = 0
-    while current_idx < len(words):
-        token = words[current_idx]
-        core_token = token.rstrip(string.punctuation)
-        trailing_punctuation = token[len(core_token):]
-        if not core_token: words.pop(current_idx); continue
-        core_should_be_removed = (len(core_token) == 1 and core_token.isalpha() and core_token.lower() not in VALID_SINGLE_LETTERS) or \
-                                 (len(core_token) == 2 and core_token.isalpha() and core_token.lower() not in VALID_TWO_LETTER_WORDS)
-        if core_should_be_removed:
-            if trailing_punctuation: words[current_idx] = trailing_punctuation
-            else: words.pop(current_idx)
-            continue
-        break
-    while words:
-        token = words[-1]
-        core_token = token.lstrip(string.punctuation)
-        leading_punctuation = token[:-len(core_token)] if core_token else ""
-        if not core_token: words.pop(); continue
-        core_should_be_removed = (len(core_token) == 1 and core_token.isalpha() and core_token.lower() not in VALID_SINGLE_LETTERS) or \
-                                 (len(core_token) == 2 and core_token.isalpha() and core_token.lower() not in VALID_TWO_LETTER_WORDS)
-        if core_should_be_removed:
-            if leading_punctuation: words[-1] = leading_punctuation
-            else: words.pop()
-            continue
-        break
-    return ' '.join(words)
-
-def input_with_prefill(prompt, text):
-    if READLINE_AVAILABLE:
-        def hook():
-            readline.insert_text(text)
-            readline.redisplay()
-        readline.set_pre_input_hook(hook)
-        result = input(prompt)
-        readline.set_pre_input_hook()
-        return result
-    else: 
-        print(Fore.MAGENTA + "Current text (edit below):\n" + Style.RESET_ALL + f"{text}")
-        return input(prompt)
-
-def _clear_png_files_in_folder(folder_path_str, run_args_for_print_control):
-    # This function CLEARS files if folder exists. It DOES NOT CREATE the folder.
-    if not folder_path_str: return
-    
-    folder = Path(folder_path_str) # Path relative to CWD if not absolute
-    abs_folder_path = folder.resolve()
-
-    if run_args_for_print_control.debug:
-        print(Fore.CYAN + f"  [Debug] _clear_png_files_in_folder: Checking {abs_folder_path} (Specified as: '{folder_path_str}')")
-
-    if abs_folder_path.is_dir():
-        if run_args_for_print_control.show_progress: 
-            print(Fore.BLUE + f"Clearing *.png files from {abs_folder_path}...")
-        cleared_count = 0
-        try:
-            for file_path in abs_folder_path.glob("*.png"):
-                if file_path.is_file():
-                    file_path.unlink()
-                    cleared_count +=1
-        except Exception as e:
-            if run_args_for_print_control.show_progress: # Also show error if progress is on
-                print(Fore.RED + f"Error during file deletion in {abs_folder_path}: {e}")
-        
-        if run_args_for_print_control.show_progress:
-            if cleared_count > 0:
-                print(Fore.BLUE + f"Cleared {cleared_count} *.png files from {abs_folder_path}.")
-            else:
-                print(Fore.BLUE + f"No *.png files found to clear in {abs_folder_path}.")
-    else:
-        if run_args_for_print_control.show_progress: 
-            print(Fore.YELLOW + f"Image folder {abs_folder_path} not found, skipping clear.")
-        elif run_args_for_print_control.debug: # Still log if not found in debug, even if not show_progress
-            print(Fore.CYAN + f"  [Debug] _clear_png_files_in_folder: Folder {abs_folder_path} does not exist. Nothing to clear.")
-
-
-class EnhancedPDFHighlightExtractor:
-    def __init__(self, pdf_path, effective_run_args, main_doc_for_image_view=None):
-        self.pdf_path = Path(pdf_path)
-        self.run_args = effective_run_args 
-        self.pdf_filename_stem = self.pdf_path.stem
-        self.highlights_data = []
-        self.main_doc_for_image_view = main_doc_for_image_view
-
-    def _get_highlight_color_from_rgb_tuple(self, rgb_tuple_floats_or_ints):
-        if not rgb_tuple_floats_or_ints or len(rgb_tuple_floats_or_ints) < 3 : return 'unknown_color'
-        r, g, b = [int(x * 255) if isinstance(x, float) and 0.0 <= x <= 1.0 else int(x) for x in rgb_tuple_floats_or_ints[:3]]
-        if r == 142 and g == 221 and b == 249: return 'blue' 
-        if r > 200 and g > 200 and b < 150: return 'yellow'
-        if r < 150 and g > 180 and b < 150: return 'green'
-        if r < 150 and g < 180 and b > 180: return 'blue' 
-        if r > 180 and g < 180 and b > 180: return 'pink'
-        return 'other_color'
-
-    def _get_highlight_color_from_annot_colors_dict(self, colors_dict):
-        if not colors_dict: return 'unknown_color', None
-        rgb_tuple = colors_dict.get('stroke') or colors_dict.get('fill')
-        if not rgb_tuple: return 'unknown_color', None
-        return self._get_highlight_color_from_rgb_tuple(rgb_tuple), rgb_tuple[:3]
-
-    def _extract_text_from_multi_segment_highlight(self, page, annot, page_num, hl_id):
-        overall_highlight_color_name, _ = self._get_highlight_color_from_annot_colors_dict(annot.colors)
-        color_code_for_segment_print = self._get_color_display_codes(overall_highlight_color_name)
-        quads_vertices = annot.vertices
-        if not quads_vertices:
-            if self.run_args.show_extraction_details: print(Fore.YELLOW + f"            No quads for HL {hl_id} on page {page_num}")
-            return None, 0, []
-
-        processed_quads_as_points_list = []
-        if len(quads_vertices) % 4 == 0:
-            for i in range(0, len(quads_vertices), 4):
-                try:
-                    quad_points = [fitz.Point(p) for p in quads_vertices[i:i+4]]
-                    processed_quads_as_points_list.append(quad_points)
-                except Exception as e:
-                    if self.run_args.show_extraction_details: print(Fore.YELLOW + f"            Skipping malformed quad points: {e}")
-                    continue
-        
-        try:
-            sorted_quad_points_list = sorted(processed_quads_as_points_list, key=lambda qp_list: (fitz.Quad(qp_list).rect.y0, fitz.Quad(qp_list).rect.x0))
-        except Exception as e:
-            if self.run_args.show_extraction_details: print(Fore.RED + f"            Error sorting quads for HL {hl_id}: {e}. Using original order.")
-            sorted_quad_points_list = processed_quads_as_points_list
-
-        if self.run_args.show_extraction_details:
-            print(color_code_for_segment_print + Fore.CYAN + f"            Processing {len(sorted_quad_points_list)} segments for HL {hl_id} (Color: {overall_highlight_color_name.upper()}) on page {page_num}" + Style.RESET_ALL)
-
-        segment_texts_final = []
-        raw_segment_texts_for_diff = []
-        for seg_idx, quad_points in enumerate(sorted_quad_points_list):
-            try:
-                bounds = fitz.Quad(quad_points).rect
-                padded_rect = fitz.Rect(bounds.x0 - TEXT_EXTRACTION_HORIZONTAL_PADDING, bounds.y0 - TEXT_EXTRACTION_VERTICAL_PADDING,
-                                        bounds.x1 + TEXT_EXTRACTION_HORIZONTAL_PADDING, bounds.y1 + TEXT_EXTRACTION_VERTICAL_PADDING)
-                padded_rect.intersect(page.rect)
-                if padded_rect.is_empty:
-                    if self.run_args.show_extraction_details: print(Fore.YELLOW + f"            Segment {seg_idx+1} empty padded_rect for HL {hl_id}")
-                    continue
-                raw_text_from_pdf_segment = page.get_text("text", clip=padded_rect, sort=True).strip()
-                raw_segment_texts_for_diff.append(raw_text_from_pdf_segment)
-                cleaned_text_segment = re.sub(r'\s+', ' ', raw_text_from_pdf_segment).strip()
-                cleaned_text_segment = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]', '', cleaned_text_segment)
-                final_text_segment = clean_segment_edges_func(cleaned_text_segment, self.run_args.clean_edges)
-
-                if final_text_segment:
-                    segment_texts_final.append(final_text_segment)
-                    if self.run_args.show_raw_segments and not self.run_args.interactive:
-                        print(color_code_for_segment_print + Fore.LIGHTBLUE_EX + f"          Segment {seg_idx+1} (P{page_num}, HL{hl_id}, Color: {overall_highlight_color_name.upper()}):" + Style.RESET_ALL)
-                        if self.run_args.show_diff_percentage:
-                            similarity = get_text_diff_ratio(raw_text_from_pdf_segment, final_text_segment)
-                            diff_percent = (1 - similarity) * 100
-                            print(Fore.LIGHTMAGENTA_EX + f"            Raw PDF : \"{raw_text_from_pdf_segment}\"")
-                            print(Fore.LIGHTBLUE_EX +    f"            Final Seg: \"{final_text_segment}\"")
-                            print(Fore.YELLOW +          f"            Diff: {diff_percent:.2f}%")
-                        else: print(Fore.LIGHTBLUE_EX +    f"            Final Seg: \"{final_text_segment}\"")
-            except Exception as e:
-                if self.run_args.show_extraction_details: print(Fore.RED + f"            Error processing segment {seg_idx+1} for HL {hl_id}: {e}")
-                raw_segment_texts_for_diff.append("")
-                continue
-
-        if not segment_texts_final: return None, len(sorted_quad_points_list), raw_segment_texts_for_diff
-        combined_text = segment_texts_final[0]
-        for i in range(1, len(segment_texts_final)):
-            prev_text = combined_text; current_text = segment_texts_final[i]
-            if prev_text.endswith('-') or prev_text.endswith('¬'): combined_text = prev_text.rstrip('-¬') + current_text
-            else: combined_text += ' ' + current_text
-        
-        if self.run_args.clean_edges: combined_text = clean_segment_edges_func(combined_text, self.run_args.clean_edges)
-        combined_text = re.sub(r'\s+', ' ', combined_text).strip()
-        return combined_text if combined_text else None, len(sorted_quad_points_list), raw_segment_texts_for_diff
-
-    def extract_highlights(self, doc):
-        all_extracted_highlights = []
-        try:
-            if self.run_args.show_progress and not self.run_args.interactive:
-                print(Fore.BLUE + f"\n🎨 Processing highlights for PDF: {self.pdf_path.name}")
-            
-            pages_str_to_parse = self.run_args.pages if self.run_args.pages else DEFAULT_PAGES_TO_PROCESS
-            pages_to_process = self._parse_specific_pages(pages_str_to_parse, doc.page_count)
-            if not pages_to_process:
-                if self.run_args.show_progress: print(Fore.YELLOW + "No valid pages selected.")
-                return []
-            
-            highlight_id_counter_on_page = {}
-            for page_num in pages_to_process:
-                page = doc.load_page(page_num - 1)
-                highlight_id_counter_on_page.setdefault(page_num, 0)
-                if self.run_args.show_progress and not self.run_args.interactive:
-                    print(Fore.CYAN + f"  📄 Processing Page {page_num}...")
-                try: page_annotations = list(page.annots())
-                except Exception as e:
-                    if self.run_args.show_progress: print(Fore.RED + f"    ⚠️ Error loading annots: {e}")
-                    continue
-                
-                highlight_annotations = [a for a in page_annotations if hasattr(a, 'type') and a.type[0] == fitz.PDF_ANNOT_HIGHLIGHT and hasattr(a, 'rect') and a.rect]
-                if not highlight_annotations:
-                    if self.run_args.show_progress and not self.run_args.interactive: print(Fore.WHITE + f"    No highlights on page {page_num}.")
-                    continue
-                
-                if self.run_args.show_rect_details:
-                    print(Fore.YELLOW + f"--- Annotations before sorting (Page {page_num}) ---")
-                    temp_debug_list = []
-                    for annot_debug in highlight_annotations:
-                        debug_text_snippet = page.get_text("text", clip=annot_debug.rect).strip().replace("\n", " ")
-                        color_name_debug, rgb_values_debug = self._get_highlight_color_from_annot_colors_dict(annot_debug.colors)
-                        rgb_display = f"RGB: {tuple(int(c*255) if isinstance(c,float) else int(c) for c in rgb_values_debug[:3])}" if rgb_values_debug else "RGB: N/A"
-                        temp_debug_list.append({
-                            "rect": annot_debug.rect, "text_snippet": debug_text_snippet, "color_name": color_name_debug, 
-                            "rgb_display": rgb_display, "vertices_count": len(annot_debug.vertices) if annot_debug.vertices else 0 })
-                    temp_debug_list.sort(key=lambda item: (item["rect"].y0, item["rect"].x0)) 
-                    for item_idx, item_val in enumerate(temp_debug_list):
-                        print(f"  {item_idx+1}. Rect: {item_val['rect']}, Vertices: {item_val['vertices_count']}, Color: {item_val['color_name'].upper()} ({item_val['rgb_display']}), Text: \"{item_val['text_snippet']}\"")
-                    print(Fore.YELLOW + "----------------------------------------------------")
-
-                highlight_annotations.sort(key=lambda a: (a.rect.y0, a.rect.x0))
-                for annot in highlight_annotations:
-                    try:
-                        highlight_id_counter_on_page[page_num] += 1; current_hl_id_on_page = highlight_id_counter_on_page[page_num]
-                        color_name, raw_rgb_floats = self._get_highlight_color_from_annot_colors_dict(annot.colors)
-                        extracted_text, num_segments, _ = self._extract_text_from_multi_segment_highlight(page, annot, page_num, current_hl_id_on_page)
-                        if extracted_text and extracted_text.strip():
-                            if self.run_args.show_extraction_details and not self.run_args.interactive:
-                                print(Fore.GREEN + f"          ✅ Final (P{page_num}, HL{current_hl_id_on_page}): \"{extracted_text[:100]}\"")
-                            all_extracted_highlights.append({
-                                'page': page_num, 'highlight_id_on_page': current_hl_id_on_page, 'text': extracted_text, 
-                                'color': color_name, 'raw_rgb_values': raw_rgb_floats, 'type': 'highlight',
-                                'y_position': annot.rect.y0, 'x_position': annot.rect.x0,
-                                'rect_details': (annot.rect.x0, annot.rect.y0, annot.rect.x1, annot.rect.y1),
-                                'num_segments': num_segments })
-                        elif self.run_args.show_progress and not self.run_args.interactive:
-                            print(Fore.YELLOW + f"      ⚠️ No text for HL {current_hl_id_on_page} on page {page_num}")
-                    except Exception as e:
-                        if self.run_args.show_progress and not self.run_args.interactive:
-                            print(Fore.RED + f"      🔴 Error processing annot on page {page_num}: {e}")
-                            if self.run_args.debug: traceback.print_exc()
-                        continue
-            
-            if self.run_args.interactive:
-                print(Fore.MAGENTA + "\nEntering interactive review session...")
-                self.highlights_data = self._interactive_review_session(all_extracted_highlights)
-            else: self.highlights_data = all_extracted_highlights
-            
-            if self.run_args.show_progress and not self.run_args.interactive and not self.run_args.silent:
-                print(Fore.MAGENTA + f"  📊 Total highlights extracted: {len(self.highlights_data)}")
-            return self.highlights_data
-        except Exception as e:
-            print(Fore.RED + f"❌ Major error during highlight extraction: {e}")
-            if self.run_args.debug: traceback.print_exc()
-            return []
-
-    def _view_page_image_interactively(self, page_num_to_view):
-        if not self.main_doc_for_image_view:
-            print(Fore.RED + "Error: PDF document not available for image rendering. This should not happen.")
-            return
-
-        tmp_image_path_obj = None 
-        image_created_in_managed_folder = False
-        image_successfully_saved = False
-
-        if self.run_args.show_progress:
-            print(Fore.BLUE + f"Preparing to view image for page {page_num_to_view}...")
-
-        try:
-            page_index = page_num_to_view - 1
-            page = self.main_doc_for_image_view.load_page(page_index)
-            if self.run_args.debug:
-                print(Fore.CYAN + f"  [Debug] Loaded page object for index {page_index}: {page}")
-            
-            pix = page.get_pixmap(dpi=150) 
-            if self.run_args.debug:
-                print(Fore.CYAN + f"  [Debug] Created pixmap: {pix}. Alpha: {pix.alpha}, Colorspace: {pix.colorspace.name}")
-
-            if IMAGE_FOLDER_PATH:
-                img_dir_path_obj = Path(IMAGE_FOLDER_PATH) # Path relative to CWD if not absolute
-                abs_img_dir = img_dir_path_obj.resolve()
-                
-                if self.run_args.debug:
-                    print(Fore.CYAN + f"  [Debug] Using IMAGE_FOLDER_PATH: '{IMAGE_FOLDER_PATH}' (Absolute: {abs_img_dir})")
-                
-                try:
-                    abs_img_dir.mkdir(parents=True, exist_ok=True) 
-                    if self.run_args.debug:
-                        print(Fore.CYAN + f"  [Debug] Ensured image directory exists: {abs_img_dir} (Status: {abs_img_dir.is_dir()})")
-                except Exception as e_mkdir:
-                    print(Fore.RED + f"  ERROR: Could not create directory {abs_img_dir}: {e_mkdir}")
-                    if self.run_args.debug: traceback.print_exc()
-                    # Do not proceed if directory creation fails
-                    input(Fore.CYAN + "Press Enter to acknowledge and continue...")
-                    return
-
-
-                unique_id = uuid.uuid4().hex[:8]
-                tmp_image_path_obj = abs_img_dir / f"page_{page_num_to_view}_{unique_id}.png"
-                image_created_in_managed_folder = True
-            else: 
-                fd, temp_path_str = tempfile.mkstemp(suffix=".png", prefix="pdf_page_img_")
-                os.close(fd) 
-                tmp_image_path_obj = Path(temp_path_str)
-                if self.run_args.debug:
-                     print(Fore.CYAN + f"  [Debug] Using system temporary file: {tmp_image_path_obj.resolve()}")
-            
-            resolved_save_path = tmp_image_path_obj.resolve()
-            if self.run_args.debug:
-                print(Fore.CYAN + f"  [Debug] Attempting to save image to: {resolved_save_path}")
-            
-            pix.save(str(resolved_save_path))
-
-            if resolved_save_path.exists() and resolved_save_path.is_file():
-                image_successfully_saved = True
-                if self.run_args.show_progress: # Print for normal progress too, not just debug
-                    print(Fore.GREEN + f"  Image for page {page_num_to_view} successfully saved to: {resolved_save_path}")
-                if self.run_args.debug:
-                    print(Fore.CYAN + f"  [Debug] File size: {resolved_save_path.stat().st_size} bytes")
-            else:
-                if self.run_args.show_progress:
-                    print(Fore.RED + f"  ERROR: Failed to save image to {resolved_save_path}. File does not exist after save attempt.")
-        
-        except Exception as e_render_save:
-            if self.run_args.show_progress:
-                print(Fore.RED + f"  Error during image rendering or saving: {e_render_save}")
-            if self.run_args.debug:
-                traceback.print_exc()
-        
-        if image_successfully_saved and tmp_image_path_obj:
-            if self.run_args.show_progress:
-                print(Fore.CYAN + f"Attempting to open image with default application...")
-            try:
-                file_uri = tmp_image_path_obj.resolve().as_uri()
-                if self.run_args.debug:
-                    print(Fore.CYAN + f"  [Debug] Opening URI: {file_uri}")
-
-                opened_successfully = webbrowser.open(file_uri)
-                
-                if self.run_args.debug: # More detailed feedback in debug mode
-                    print(Fore.CYAN + f"  [Debug] webbrowser.open() returned: {opened_successfully}")
-
-                if not opened_successfully:
-                    if self.run_args.show_progress:
-                        print(Fore.YELLOW + "  webbrowser.open() reported failure (returned False or None).")
-                        print(Fore.YELLOW + f"  This often means no default application is configured for PNG files or your browser.")
-                elif self.run_args.show_progress:
-                    print(Fore.GREEN + "  Image hopefully opened. Check your applications.")
-                
-                if self.run_args.show_progress:
-                    print(Fore.YELLOW + f"  If the image did not open, please manually open: {tmp_image_path_obj.resolve()}")
-                input(Fore.CYAN + "Press Enter after viewing image to continue...")
-
-            except Exception as e_open:
-                if self.run_args.show_progress:
-                    print(Fore.RED + f"  Could not open image using webbrowser: {e_open}")
-                    print(Fore.YELLOW + "  This could be due to your system's environment (e.g., missing 'xdg-utils' on Linux, no default PNG viewer).")
-                    print(Fore.YELLOW + f"  Please try opening the image manually: {tmp_image_path_obj.resolve()}")
-                if self.run_args.debug:
-                    traceback.print_exc()
-                input(Fore.CYAN + "Press Enter to acknowledge and continue...")
-        elif tmp_image_path_obj : 
-             if self.run_args.show_progress:
-                print(Fore.YELLOW + "  Skipping attempt to open image as it was not saved successfully.")
-             input(Fore.CYAN + "Press Enter to continue...")
-        else: 
-            if self.run_args.show_progress:
-                print(Fore.RED + "  Cannot attempt to open image as image path was not determined.")
-            input(Fore.CYAN + "Press Enter to continue...")
-
-        finally:
-            if tmp_image_path_obj and tmp_image_path_obj.exists():
-                if image_created_in_managed_folder:
-                    if self.run_args.debug:
-                         print(Fore.CYAN + f"  [Debug] Image '{tmp_image_path_obj.name}' remains in managed folder '{IMAGE_FOLDER_PATH}'.")
-                         print(Fore.CYAN + f"  [Debug] It will be cleared based on CLEAR_IMAGE_FOLDER_ON_END ({CLEAR_IMAGE_FOLDER_ON_END}).")
-                else: 
-                    try:
-                        tmp_image_path_obj.unlink()
-                        if self.run_args.debug:
-                            print(Fore.CYAN + f"  [Debug] Deleted system temporary image: {tmp_image_path_obj.resolve()}")
-                    except Exception as e_unlink:
-                        if self.run_args.debug: 
-                            print(Fore.YELLOW + f"  Warning: Could not delete system temp image {tmp_image_path_obj.resolve()}: {e_unlink}")
-            elif tmp_image_path_obj and not tmp_image_path_obj.exists() and image_successfully_saved:
-                if self.run_args.debug:
-                    print(Fore.RED + f"  [Debug] Inconsistency: Image was marked saved, but {tmp_image_path_obj.resolve()} does not exist at cleanup (and wasn't a system temp explicitly deleted here).")
-
-
-    def _interactive_review_session(self, highlights_list):
-        if not highlights_list: 
-            if self.run_args.show_progress : print(Fore.YELLOW + "No highlights to review.")
-            return []
-        reviewed_highlights = [dict(h) for h in highlights_list] 
-        idx, num_highlights = 0, len(reviewed_highlights)
-        AVAILABLE_COLORS = ['yellow', 'green', 'blue', 'pink', 'other_color', 'unknown_color']
-        
-        while 0 <= idx < num_highlights:
-            item = reviewed_highlights[idx]
-            print(Style.RESET_ALL + "\n" + "="*15 + f" Review HL {idx+1}/{num_highlights} (Page {item['page']}) " + "="*15)
-            
-            current_color_display = self._get_color_display_codes(item['color'])
-            print(f"Color: {current_color_display}{item['color'].upper()}{Style.RESET_ALL}", end="")
-            if item['color'] == 'other_color' and item.get('raw_rgb_values'):
-                rgb = item['raw_rgb_values'][:3]
-                rgb_disp = tuple(int(c*255) if isinstance(c,float) else int(c) for c in rgb)
-                print(f" (RGB: {rgb_disp})", end="")
-            print() 
-            
-            print(f"Text: {item['text']}")
-
-            prompt_options = ["[N]ext", "[P]rev", "[U]p", "[M]ove Down", "[C]olor", "[E]dit", "[D]elete", "[O]pen Img", "[S]ave&Exit", "[Q]uit"]
-            action_prompt_str = Fore.CYAN + ", ".join(prompt_options) + "? > " + Style.RESET_ALL
-            action = input(action_prompt_str).lower().strip()
-
-            if action == 'n': idx = (idx + 1) % num_highlights if num_highlights > 0 else 0
-            elif action == 'p': idx = (idx - 1 + num_highlights) % num_highlights if num_highlights > 0 else 0
-            elif action == 'u': 
-                if idx > 0:
-                    reviewed_highlights.insert(idx - 1, reviewed_highlights.pop(idx))
-                    idx -= 1
-                    print(Fore.GREEN + "Moved up.")
-                else: print(Fore.YELLOW + "Already at the top.")
-            elif action == 'm': 
-                if idx < num_highlights - 1:
-                    reviewed_highlights.insert(idx + 1, reviewed_highlights.pop(idx))
-                    idx += 1
-                    print(Fore.GREEN + "Moved down.")
-                else: print(Fore.YELLOW + "Already at the bottom.")
-            elif action == 'c':
-                print("Available colors:", ", ".join(f"{i+1}.{self._get_color_display_codes(co)}{co.upper()}{Style.RESET_ALL}" for i,co in enumerate(AVAILABLE_COLORS)))
-                try:
-                    choice_str = input(Fore.YELLOW + "Enter number for new color: " + Style.RESET_ALL)
-                    if not choice_str: print(Fore.BLUE + "Color change cancelled (no input)."); continue
-                    choice = int(choice_str) - 1
-                    if 0 <= choice < len(AVAILABLE_COLORS): 
-                        item['color'] = AVAILABLE_COLORS[choice]
-                        print(Fore.GREEN + f"Color changed to {AVAILABLE_COLORS[choice].upper()}.")
-                    else: print(Fore.RED + "Invalid color choice.")
-                except ValueError: print(Fore.RED + "Invalid input. Please enter a number.")
-            elif action == 'e':
-                edit_prompt = Fore.YELLOW + "New text (blank=keep, 'CLEAR'=empty): > " + Style.RESET_ALL
-                new_text = input_with_prefill(edit_prompt, item['text'])
-                
-                if new_text.strip().upper() == 'CLEAR': 
-                    item['text'] = ""
-                    print(Fore.GREEN + "Text cleared.")
-                elif new_text == item['text'] or not new_text.strip() : 
-                    print(Fore.BLUE + "Text kept as is.")
-                else: 
-                    item['text'] = new_text
-                    print(Fore.GREEN + "Text updated.")
-            elif action == 'd':
-                if input(Fore.RED + "Are you sure you want to delete this highlight? [y/N]: " + Style.RESET_ALL).lower() == 'y':
-                    reviewed_highlights.pop(idx)
-                    num_highlights = len(reviewed_highlights)
-                    print(Fore.GREEN + "Highlight deleted.")
-                    if num_highlights == 0: 
-                        print(Fore.YELLOW + "No more highlights to review."); break 
-                    if idx >= num_highlights: idx = num_highlights - 1 
-                else: print(Fore.BLUE + "Deletion cancelled.")
-            elif action == 'o': self._view_page_image_interactively(item['page'])
-            elif action == 's': 
-                print(Fore.GREEN + "Saving changes and exiting review session.")
-                break
-            elif action == 'q':
-                if input(Fore.RED+"Are you sure you want to quit review? Changes will not be saved. [y/N]: " + Style.RESET_ALL).lower()=='y': 
-                    print(Fore.YELLOW+"Quitting review session. Changes made in this session are DISCARDED.")
-                    return highlights_list 
-                else:
-                    print(Fore.BLUE + "Quit cancelled.")
-            else: print(Fore.RED + "Invalid action. Please choose from the list.")
-        return reviewed_highlights
-
-    def _parse_specific_pages(self, pages_str, total_pages):
-        if not pages_str or pages_str.lower() == "all": return list(range(1, total_pages + 1))
-        parsed_pages = set()
-        try:
-            for part in pages_str.split(','):
-                part = part.strip();
-                if not part: continue
-                if '-' in part:
-                    start_str, end_str = part.split('-', 1); start = int(start_str); end = int(end_str)
-                    start = max(1, start); end = min(total_pages, end)
-                    if start <= end: parsed_pages.update(range(start, end + 1))
-                else:
-                    page_val = int(part)
-                    if 1 <= page_val <= total_pages: parsed_pages.add(page_val)
-            return sorted(list(parsed_pages)) if parsed_pages else []
-        except ValueError as e:
-            if self.run_args.show_progress: print(Fore.YELLOW + f"⚠️ Invalid page range: {pages_str}. Error: {e}.")
-            return []
-
-    def _get_color_display_codes(self, color_name_str):
-        return {'yellow': Back.YELLOW + Fore.BLACK, 'green': Back.GREEN + Fore.BLACK,
-                'blue': Back.BLUE + Fore.WHITE, 'pink': Back.MAGENTA + Fore.WHITE,
-                'other_color': Back.WHITE + Fore.BLACK, 'unknown_color': Back.LIGHTBLACK_EX + Fore.WHITE
-               }.get(color_name_str.lower(), Back.LIGHTBLACK_EX + Fore.WHITE)
-
-    def display_results(self):
-        if not self.run_args.show_progress: return # Don't display if progress is off (e.g. silent)
-        
-        print("\n" + Fore.CYAN + Style.BRIGHT + "="*30 + " EXTRACTED HIGHLIGHTS " + "="*30 + Style.RESET_ALL)
-        if not self.highlights_data: print("\n❌ No highlights extracted or all were deleted."); return
-        current_page = None
-        for item in self.highlights_data:
-            if item.get('page') != current_page:
-                current_page = item.get('page'); print(f"\n📄 {Style.BRIGHT}Page {current_page}{Style.RESET_ALL}\n" + "-"*25)
-            color_name = item.get('color', 'unknown_color')
-            color_code = self._get_color_display_codes(color_name)
-            num_segments = item.get('num_segments', 0)
-            segment_info = f" [{num_segments} segments]" if num_segments > 1 else ""
-            text_content = item.get('text', "*NO TEXT*")
-            display_color_name = color_name.upper()
-            if color_name == 'other_color':
-                raw_rgb = item.get('raw_rgb_values')
-                if raw_rgb and len(raw_rgb) >=3:
-                    rgb_disp = tuple(int(c*255) if isinstance(c,float) else int(c) for c in raw_rgb[:3])
-                    display_color_name += f" (RGB: {rgb_disp})"
-            print(f"🎨 {color_code}{display_color_name}{Style.RESET_ALL}{segment_info}")
-            print(f"   \"{text_content}\""); print()
-
-    def save_to_json(self, output_path_str):
-        output_path = Path(output_path_str).resolve() # Resolve to absolute path for clarity
-        try:
-            output_path.parent.mkdir(parents=True, exist_ok=True)
-            if self.run_args.debug:
-                print(Fore.CYAN + f"  [Debug] Ensured parent directory for JSON exists: {output_path.parent}")
-        except Exception as e_mkdir:
-            if self.run_args.show_progress: # Also show error if progress is on
-                print(Fore.RED + f"❌ Error creating directory for JSON output {output_path.parent}: {e_mkdir}")
-            if self.run_args.debug: traceback.print_exc()
-            return # Cannot save if directory cannot be made
-
-        data_to_save = {
-            'pdf_file_processed': str(self.pdf_path.name), 'pdf_full_path': str(self.pdf_path.resolve()),
-            'pages_processed_spec': self.run_args.pages if self.run_args.pages else DEFAULT_PAGES_TO_PROCESS,
-            'extraction_timestamp': time.strftime("%Y-%m-%d %H:%M:%S %Z"),
-            'total_highlights_extracted': len(self.highlights_data),
-            'settings_used': {
-                'clean_edges': self.run_args.clean_edges,
-                'show_diff_percentage': self.run_args.show_diff_percentage 
-            },
-            'highlights_data': self.highlights_data }
-        try:
-            with open(output_path, 'w', encoding='utf-8') as f: json.dump(data_to_save, f, indent=2, ensure_ascii=False)
-            if self.run_args.show_progress: print(Fore.GREEN + f"💾 Data saved to {output_path}")
-        except IOError as e: 
-            if self.run_args.show_progress: print(Fore.RED + f"❌ Error saving JSON to {output_path}: {e}")
-            if self.run_args.debug: traceback.print_exc()
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Enhanced PDF Highlight Extractor.",
-        formatter_class=argparse.RawTextHelpFormatter,
-        epilog=f"""Examples:
-  {sys.argv[0]} mydoc.pdf
-  {sys.argv[0]} mydoc.pdf -p "1,5-7" -i
-  {sys.argv[0]} -t -s --output-json results/test.json
-  {sys.argv[0]} doc.pdf -d
-
-If interactive image viewing ('O' option) fails, try running with the -d (debug)
-flag. This will print detailed information about image paths and creation steps.
-Common issues include missing default PNG viewers or OS-level permission problems.
-The IMAGE_FOLDER_PATH ('{IMAGE_FOLDER_PATH}') is relative to where you run the script.
-""")
-    parser.add_argument("pdf_path_arg", nargs='?', default=None, help="Path to PDF. Prompts if not in test/silent mode & not provided.")
-    parser.add_argument("-p", "--pages", type=str, default=None, help=f"Pages (e.g., \"1,3-5\", \"all\"). Default: \"{DEFAULT_PAGES_TO_PROCESS}\".")
-    parser.add_argument("-i", "--interactive", action="store_true", help="Enable interactive review mode.")
-    parser.add_argument("-t", "--test", action="store_true", help=f"Test mode. Uses default PDF ('{DEFAULT_PDF_PATH}'), auto-saves JSON.")
-    parser.add_argument("-s", "--silent", action="store_true", help="Silent mode. Minimal output. Auto-saves JSON. Implies -t if no PDF path.")
-    parser.add_argument("-d", "--debug", action="store_true", help="Debug mode. Enables all detailed SHOW flags and prints more internal details.")
-    parser.add_argument("--output-json", type=str, default=None, help="Custom output JSON filename/path.")
-    
-    cli_args = parser.parse_args()
-    
-    effective_run_args = argparse.Namespace()
-    effective_run_args.debug = cli_args.debug 
-    effective_run_args.silent = cli_args.silent
-
-    # Initialize based on global defaults
-    effective_run_args.show_timing = INITIAL_SHOW_TIMING
-    effective_run_args.show_progress = INITIAL_SHOW_PROGRESS
-    effective_run_args.show_raw_segments = INITIAL_SHOW_RAW_SEGMENTS
-    effective_run_args.show_extraction_details = INITIAL_SHOW_EXTRACTION_DETAILS
-    effective_run_args.show_rect_details = INITIAL_SHOW_RECT_DETAILS
-    effective_run_args.show_diff_percentage = INITIAL_SHOW_DIFF_PERCENTAGE
-    effective_run_args.clean_edges = INITIAL_CLEAN_EDGES
-    
-    # Override show flags based on debug or silent
-    if effective_run_args.debug:
-        for key in ['show_timing', 'show_progress', 'show_raw_segments', 'show_extraction_details', 'show_rect_details', 'show_diff_percentage']:
-            setattr(effective_run_args, key, True) # Debug enables all these
-    
-    if effective_run_args.silent:
-        for key in ['show_timing', 'show_progress', 'show_raw_segments', 'show_extraction_details', 'show_rect_details', 'show_diff_percentage']:
-            setattr(effective_run_args, key, False) # Silent disables all these
-        effective_run_args.interactive = False 
-    else: # Not silent
-        effective_run_args.interactive = cli_args.interactive
-
-    effective_run_args.pages = cli_args.pages
-    
-    start_time = time.time()
-    if effective_run_args.show_progress: print(Fore.MAGENTA + Style.BRIGHT + "🎨 PDF Highlight Extractor 🎨" + Style.RESET_ALL)
-    if effective_run_args.debug:
-        print(Fore.CYAN + f"  [Debug] Current Working Directory: {Path.cwd()}")
-        print(Fore.CYAN + f"  [Debug] Effective Run Arguments: {effective_run_args}")
-
-
-    if IMAGE_FOLDER_PATH and CLEAR_IMAGE_FOLDER_ON_START:
-        _clear_png_files_in_folder(IMAGE_FOLDER_PATH, effective_run_args)
-
-    pdf_path_to_use = None
-    if cli_args.test: pdf_path_to_use = DEFAULT_PDF_PATH
-    elif cli_args.pdf_path_arg: pdf_path_to_use = cli_args.pdf_path_arg
-    elif cli_args.silent: pdf_path_to_use = DEFAULT_PDF_PATH 
-    else: 
-        pdf_path_input = input(f"📄 PDF path (Enter for default '{DEFAULT_PDF_PATH}'): ").strip().strip('"')
-        pdf_path_to_use = pdf_path_input if pdf_path_input else DEFAULT_PDF_PATH
-    
-    if not pdf_path_to_use: 
-        if effective_run_args.show_progress: print(Fore.RED + "❌ No PDF path specified. Exiting.")
-        sys.exit(1)
-    
-    resolved_path = Path(pdf_path_to_use).resolve()
-    if not resolved_path.exists() or not resolved_path.is_file():
-        if effective_run_args.show_progress: print(Fore.RED + f"❌ PDF not found or is not a file: {resolved_path}")
-        sys.exit(1)
-    
-    doc_for_processing = None
-    try:
-        doc_for_processing = fitz.open(str(resolved_path))
-        extractor = EnhancedPDFHighlightExtractor(resolved_path, effective_run_args, main_doc_for_image_view=doc_for_processing)
-        extractor.extract_highlights(doc_for_processing) 
-        
-        if not effective_run_args.interactive and effective_run_args.show_progress: 
-            extractor.display_results()
-        elif effective_run_args.interactive and effective_run_args.show_progress:
-            if input(Fore.CYAN+"Interactive session ended. Display final results? [Y/n]: " + Style.RESET_ALL).lower().strip()!='n':
-                extractor.display_results()
-
-        json_output_path_str = cli_args.output_json if cli_args.output_json else str(resolved_path.parent / f"{resolved_path.stem}_highlights.json")
-        
-        perform_save = False
-        if cli_args.test or cli_args.silent: 
-            perform_save = True
-        elif effective_run_args.show_progress: # Only prompt if not silent
-            save_prompt_input = input(f"💾 Save to JSON? (Enter for default '{json_output_path_str}', type 'skip' to not save, or enter a custom path): " + Style.RESET_ALL).strip()
-            if save_prompt_input.lower() != 'skip':
-                perform_save = True
-                if save_prompt_input: 
-                    json_output_path_str = save_prompt_input
-        
-        if perform_save:
-            if extractor.highlights_data: 
-                extractor.save_to_json(json_output_path_str)
-            elif effective_run_args.show_progress: 
-                print(Fore.YELLOW + "No highlights were extracted or kept, so JSON file was not saved.")
-        elif effective_run_args.show_progress: 
-            print(Fore.BLUE + "Skipped saving highlights to JSON.")
-
-    except Exception as e:
-        if effective_run_args.show_progress: print(Fore.RED+Style.BRIGHT+f"💥 An critical error occurred in the main execution: {e}")
-        if effective_run_args.debug: 
-            traceback.print_exc()
-    finally:
-        if doc_for_processing: 
-            doc_for_processing.close()
-        
-        if IMAGE_FOLDER_PATH and CLEAR_IMAGE_FOLDER_ON_END:
-            _clear_png_files_in_folder(IMAGE_FOLDER_PATH, effective_run_args)
-            
-        if effective_run_args.show_timing: 
-            print(Fore.CYAN + f"\n⏱️ Total execution time: {time.time()-start_time:.2f} seconds")
-
-if __name__ == '__main__':
-    main()
+#!/usr/bin/env python3
+# =============================================================================
+# ENHANCED PDF HIGHLIGHT EXTRACTOR
+# Author: Perplexity AI Companion (Updated by User Feedback)
+# Date: June 3, 2025
+# License: MIT
+#
+# Extracts highlights from PDF files, with options for interactive review,
+# detailed output, text cleaning, JSON export, and page image viewing.
+# =============================================================================
+
+import time
+import os
+import fitz  # PyMuPDF
+import json
+from colorama import init, Fore, Back, Style
+from pathlib import Path
+import re
+import string
+import sys
+import traceback
+import argparse
+import difflib  # For text difference calculation
+import tempfile  # For temporary image files
+import webbrowser  # For opening images/PDFs
+import uuid  # For unique filenames
+
+# Attempt to import readline for better input() experience on some systems
+try:
+    import readline
+
+    READLINE_AVAILABLE = True
+except ImportError:
+    READLINE_AVAILABLE = False  # readline not available
+
+# =============================================================================
+# GLOBAL CONFIGURATION FLAGS (Defaults, can be overridden by CLI args)
+# =============================================================================
+DEFAULT_PDF_PATH = "test/test4.pdf"  # Local test PDF
+DEFAULT_PAGES_TO_PROCESS = "1"  # Example: "1,3-5,all"
+
+# Default Behavior flags (can be influenced by -d or -s CLI flags)
+# These are used to initialize effective_run_args
+# Keep these distinct from the effective_run_args object itself
+INITIAL_SHOW_TIMING = True
+INITIAL_SHOW_PROGRESS = True
+INITIAL_SHOW_RAW_SEGMENTS = True
+INITIAL_SHOW_EXTRACTION_DETAILS = True
+INITIAL_SHOW_RECT_DETAILS = True
+INITIAL_SHOW_DIFF_PERCENTAGE = True
+INITIAL_CLEAN_EDGES = True
+
+# Text extraction parameters (generally fixed)
+TEXT_EXTRACTION_HORIZONTAL_PADDING = 6.0
+TEXT_EXTRACTION_VERTICAL_PADDING = 1.0
+
+# Edge cleaning configuration (generally fixed)
+VALID_TWO_LETTER_WORDS = {
+    "am",
+    "an",
+    "as",
+    "at",
+    "be",
+    "by",
+    "do",
+    "go",
+    "he",
+    "if",
+    "in",
+    "is",
+    "it",
+    "me",
+    "my",
+    "no",
+    "of",
+    "on",
+    "or",
+    "ox",
+    "so",
+    "to",
+    "up",
+    "us",
+    "we",
+}
+VALID_SINGLE_LETTERS = {"i", "a"}
+
+# Image handling configuration
+IMAGE_FOLDER_PATH = "pdf_page_images"  # Relative to CWD by default
+CLEAR_IMAGE_FOLDER_ON_START = True
+CLEAR_IMAGE_FOLDER_ON_END = False
+
+# Initialize colorama
+init(autoreset=True)
+
+
+# --- Helper Functions ---
+def get_text_diff_ratio(text1, text2):
+    if not text1 and not text2:
+        return 1.0
+    if not text1 or not text2:
+        return 0.0
+    return difflib.SequenceMatcher(None, str(text1), str(text2)).ratio()
+
+
+def clean_segment_edges_func(text_to_clean, clean_edges_setting):
+    if not clean_edges_setting or not text_to_clean:
+        return text_to_clean
+    text_to_clean = re.sub(r"\s+", " ", text_to_clean.strip())
+    words = text_to_clean.split()
+    if not words:
+        return text_to_clean
+
+    current_idx = 0
+    while current_idx < len(words):
+        token = words[current_idx]
+        core_token = token.rstrip(string.punctuation)
+        trailing_punctuation = token[len(core_token) :]
+        if not core_token:
+            words.pop(current_idx)
+            continue
+        core_should_be_removed = (
+            len(core_token) == 1
+            and core_token.isalpha()
+            and core_token.lower() not in VALID_SINGLE_LETTERS
+        ) or (
+            len(core_token) == 2
+            and core_token.isalpha()
+            and core_token.lower() not in VALID_TWO_LETTER_WORDS
+        )
+        if core_should_be_removed:
+            if trailing_punctuation:
+                words[current_idx] = trailing_punctuation
+            else:
+                words.pop(current_idx)
+            continue
+        break
+    while words:
+        token = words[-1]
+        core_token = token.lstrip(string.punctuation)
+        leading_punctuation = token[: -len(core_token)] if core_token else ""
+        if not core_token:
+            words.pop()
+            continue
+        core_should_be_removed = (
+            len(core_token) == 1
+            and core_token.isalpha()
+            and core_token.lower() not in VALID_SINGLE_LETTERS
+        ) or (
+            len(core_token) == 2
+            and core_token.isalpha()
+            and core_token.lower() not in VALID_TWO_LETTER_WORDS
+        )
+        if core_should_be_removed:
+            if leading_punctuation:
+                words[-1] = leading_punctuation
+            else:
+                words.pop()
+            continue
+        break
+    return " ".join(words)
+
+
+def input_with_prefill(prompt, text):
+    if READLINE_AVAILABLE:
+
+        def hook():
+            readline.insert_text(text)
+            readline.redisplay()
+
+        readline.set_pre_input_hook(hook)
+        result = input(prompt)
+        readline.set_pre_input_hook()
+        return result
+    else:
+        print(
+            Fore.MAGENTA + "Current text (edit below):\n" + Style.RESET_ALL + f"{text}"
+        )
+        return input(prompt)
+
+
+def _clear_png_files_in_folder(folder_path_str, run_args_for_print_control):
+    # This function CLEARS files if folder exists. It DOES NOT CREATE the folder.
+    if not folder_path_str:
+        return
+
+    folder = Path(folder_path_str)  # Path relative to CWD if not absolute
+    abs_folder_path = folder.resolve()
+
+    if run_args_for_print_control.debug:
+        print(
+            Fore.CYAN
+            + f"  [Debug] _clear_png_files_in_folder: Checking {abs_folder_path} (Specified as: '{folder_path_str}')"
+        )
+
+    if abs_folder_path.is_dir():
+        if run_args_for_print_control.show_progress:
+            print(Fore.BLUE + f"Clearing *.png files from {abs_folder_path}...")
+        cleared_count = 0
+        try:
+            for file_path in abs_folder_path.glob("*.png"):
+                if file_path.is_file():
+                    file_path.unlink()
+                    cleared_count += 1
+        except Exception as e:
+            if (
+                run_args_for_print_control.show_progress
+            ):  # Also show error if progress is on
+                print(
+                    Fore.RED + f"Error during file deletion in {abs_folder_path}: {e}"
+                )
+
+        if run_args_for_print_control.show_progress:
+            if cleared_count > 0:
+                print(
+                    Fore.BLUE
+                    + f"Cleared {cleared_count} *.png files from {abs_folder_path}."
+                )
+            else:
+                print(
+                    Fore.BLUE + f"No *.png files found to clear in {abs_folder_path}."
+                )
+    else:
+        if run_args_for_print_control.show_progress:
+            print(
+                Fore.YELLOW
+                + f"Image folder {abs_folder_path} not found, skipping clear."
+            )
+        elif (
+            run_args_for_print_control.debug
+        ):  # Still log if not found in debug, even if not show_progress
+            print(
+                Fore.CYAN
+                + f"  [Debug] _clear_png_files_in_folder: Folder {abs_folder_path} does not exist. Nothing to clear."
+            )
+
+
+class EnhancedPDFHighlightExtractor:
+    def __init__(self, pdf_path, effective_run_args, main_doc_for_image_view=None):
+        self.pdf_path = Path(pdf_path)
+        self.run_args = effective_run_args
+        self.highlights_data = []
+        self.main_doc_for_image_view = main_doc_for_image_view
+
+    def _get_highlight_color_from_rgb_tuple(self, rgb_tuple_floats_or_ints):
+        if not rgb_tuple_floats_or_ints or len(rgb_tuple_floats_or_ints) < 3:
+            return "unknown_color"
+        r, g, b = [
+            int(x * 255) if isinstance(x, float) and 0.0 <= x <= 1.0 else int(x)
+            for x in rgb_tuple_floats_or_ints[:3]
+        ]
+
+        # Specific blue highlight color
+        if r == 142 and g == 221 and b == 249:
+            return "blue"
+        # Yellow highlights (high red/green, low blue)
+        if r > 200 and g > 200 and b < 150:
+            return "yellow"
+        # Green highlights (low red/blue, high green)
+        if r < 150 and g > 180 and b < 150:
+            return "green"
+        # Blue highlights (low red/green, high blue)
+        if r < 150 and g < 180 and b > 180:
+            return "blue"
+        # Pink highlights (high red/blue, low green)
+        if r > 180 and g < 180 and b > 180:
+            return "pink"
+        return "other_color"
+
+    def _get_highlight_color_from_annot_colors_dict(self, colors_dict):
+        if not colors_dict:
+            return "unknown_color", None
+        rgb_tuple = colors_dict.get("stroke") or colors_dict.get("fill")
+        if not rgb_tuple:
+            return "unknown_color", None
+        return self._get_highlight_color_from_rgb_tuple(rgb_tuple), rgb_tuple[:3]
+
+    def _extract_text_from_multi_segment_highlight(self, page, annot, page_num, hl_id):
+        overall_highlight_color_name, _ = (
+            self._get_highlight_color_from_annot_colors_dict(annot.colors)
+        )
+        color_code_for_segment_print = self._get_color_display_codes(
+            overall_highlight_color_name
+        )
+        quads_vertices = annot.vertices
+        if not quads_vertices:
+            if self.run_args.show_extraction_details:
+                print(
+                    Fore.YELLOW
+                    + f"            No quads for HL {hl_id} on page {page_num}"
+                )
+            return None, 0, []
+
+        processed_quads_as_points_list = []
+        if len(quads_vertices) % 4 == 0:
+            for i in range(0, len(quads_vertices), 4):
+                try:
+                    quad_points = [fitz.Point(p) for p in quads_vertices[i : i + 4]]
+                    processed_quads_as_points_list.append(quad_points)
+                except Exception as e:
+                    if self.run_args.show_extraction_details:
+                        print(
+                            Fore.YELLOW
+                            + f"            Skipping malformed quad points: {e}"
+                        )
+                    continue
+
+        try:
+            sorted_quad_points_list = sorted(
+                processed_quads_as_points_list,
+                key=lambda qp_list: (
+                    fitz.Quad(qp_list).rect.y0,
+                    fitz.Quad(qp_list).rect.x0,
+                ),
+            )
+        except Exception as e:
+            if self.run_args.show_extraction_details:
+                print(
+                    Fore.RED
+                    + f"            Error sorting quads for HL {hl_id}: {e}. Using original order."
+                )
+            sorted_quad_points_list = processed_quads_as_points_list
+
+        if self.run_args.show_extraction_details:
+            print(
+                color_code_for_segment_print
+                + Fore.CYAN
+                + f"            Processing {len(sorted_quad_points_list)} segments for HL {hl_id} "
+                + f"(Color: {overall_highlight_color_name.upper()}) on page {page_num}"
+                + Style.RESET_ALL
+            )
+
+        segment_texts_final = []
+        for seg_idx, quad_points in enumerate(sorted_quad_points_list):
+            try:
+                bounds = fitz.Quad(quad_points).rect
+                padded_rect = fitz.Rect(
+                    bounds.x0 - TEXT_EXTRACTION_HORIZONTAL_PADDING,
+                    bounds.y0 - TEXT_EXTRACTION_VERTICAL_PADDING,
+                    bounds.x1 + TEXT_EXTRACTION_HORIZONTAL_PADDING,
+                    bounds.y1 + TEXT_EXTRACTION_VERTICAL_PADDING,
+                )
+                padded_rect.intersect(page.rect)
+                if padded_rect.is_empty:
+                    if self.run_args.show_extraction_details:
+                        print(
+                            Fore.YELLOW
+                            + f"            Segment {seg_idx + 1} empty padded_rect for HL {hl_id}"
+                        )
+                    continue
+                raw_text_from_pdf_segment = page.get_text(
+                    "text", clip=padded_rect, sort=True
+                ).strip()
+                cleaned_text_segment = re.sub(
+                    r"\s+", " ", raw_text_from_pdf_segment
+                ).strip()
+                cleaned_text_segment = re.sub(
+                    r"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]", "", cleaned_text_segment
+                )
+                final_text_segment = clean_segment_edges_func(
+                    cleaned_text_segment, self.run_args.clean_edges
+                )
+
+                if final_text_segment:
+                    segment_texts_final.append(final_text_segment)
+                    if (
+                        self.run_args.show_raw_segments
+                        and not self.run_args.interactive
+                    ):
+                        print(
+                            color_code_for_segment_print
+                            + Fore.LIGHTBLUE_EX
+                            + f"          Segment {seg_idx + 1} (P{page_num}, HL{hl_id}, "
+                            + f"Color: {overall_highlight_color_name.upper()}):"
+                            + Style.RESET_ALL
+                        )
+                        if self.run_args.show_diff_percentage:
+                            similarity = get_text_diff_ratio(
+                                raw_text_from_pdf_segment, final_text_segment
+                            )
+                            diff_percent = (1 - similarity) * 100
+                            print(
+                                Fore.LIGHTMAGENTA_EX
+                                + f'            Raw PDF : "{raw_text_from_pdf_segment}"'
+                            )
+                            print(
+                                Fore.LIGHTBLUE_EX
+                                + f'            Final Seg: "{final_text_segment}"'
+                            )
+                            print(
+                                Fore.YELLOW + f"            Diff: {diff_percent:.2f}%"
+                            )
+                        else:
+                            print(
+                                Fore.LIGHTBLUE_EX
+                                + f'            Final Seg: "{final_text_segment}"'
+                            )
+            except Exception as e:
+                if self.run_args.show_extraction_details:
+                    print(
+                        Fore.RED
+                        + f"            Error processing segment {seg_idx + 1} for HL {hl_id}: {e}"
+                    )
+                continue
+
+        if not segment_texts_final:
+            return None, len(sorted_quad_points_list)
+        combined_text = segment_texts_final[0]
+        for current_text in segment_texts_final[1:]:
+            if combined_text.endswith("-") or combined_text.endswith("¬"):
+                combined_text = combined_text.rstrip("-¬") + current_text
+            else:
+                combined_text += " " + current_text
+
+        if self.run_args.clean_edges:
+            combined_text = clean_segment_edges_func(
+                combined_text, self.run_args.clean_edges
+            )
+        combined_text = re.sub(r"\s+", " ", combined_text).strip()
+        return combined_text if combined_text else None, len(sorted_quad_points_list)
+
+    def extract_highlights(self, doc):
+        all_extracted_highlights = []
+        try:
+            if self.run_args.show_progress and not self.run_args.interactive:
+                print(
+                    Fore.BLUE
+                    + f"\n🎨 Processing highlights for PDF: {self.pdf_path.name}"
+                )
+
+            pages_str_to_parse = (
+                self.run_args.pages if self.run_args.pages else DEFAULT_PAGES_TO_PROCESS
+            )
+            pages_to_process = self._parse_specific_pages(
+                pages_str_to_parse, doc.page_count
+            )
+            if not pages_to_process:
+                if self.run_args.show_progress:
+                    print(Fore.YELLOW + "No valid pages selected.")
+                return []
+
+            highlight_id_counter_on_page = {}
+            for page_num in pages_to_process:
+                page = doc.load_page(page_num - 1)
+                highlight_id_counter_on_page.setdefault(page_num, 0)
+                if self.run_args.show_progress and not self.run_args.interactive:
+                    print(Fore.CYAN + f"  📄 Processing Page {page_num}...")
+                try:
+                    page_annotations = list(page.annots())
+                except Exception as e:
+                    if self.run_args.show_progress:
+                        print(Fore.RED + f"    ⚠️ Error loading annots: {e}")
+                    continue
+
+                highlight_annotations = [
+                    a
+                    for a in page_annotations
+                    if hasattr(a, "type")
+                    and a.type[0] == fitz.PDF_ANNOT_HIGHLIGHT
+                    and hasattr(a, "rect")
+                    and a.rect
+                ]
+                if not highlight_annotations:
+                    if self.run_args.show_progress and not self.run_args.interactive:
+                        print(Fore.WHITE + f"    No highlights on page {page_num}.")
+                    continue
+
+                if self.run_args.show_rect_details:
+                    print(
+                        Fore.YELLOW
+                        + f"--- Annotations before sorting (Page {page_num}) ---"
+                    )
+                    temp_debug_list = []
+                    for annot_debug in highlight_annotations:
+                        debug_text_snippet = (
+                            page.get_text("text", clip=annot_debug.rect)
+                            .strip()
+                            .replace("\n", " ")
+                        )
+                        color_name_debug, rgb_values_debug = (
+                            self._get_highlight_color_from_annot_colors_dict(
+                                annot_debug.colors
+                            )
+                        )
+                        rgb_display = (
+                            f"RGB: {tuple(int(c * 255) if isinstance(c, float) else int(c) for c in rgb_values_debug[:3])}"
+                            if rgb_values_debug
+                            else "RGB: N/A"
+                        )
+                        temp_debug_list.append(
+                            {
+                                "rect": annot_debug.rect,
+                                "text_snippet": debug_text_snippet,
+                                "color_name": color_name_debug,
+                                "rgb_display": rgb_display,
+                                "vertices_count": (
+                                    len(annot_debug.vertices)
+                                    if annot_debug.vertices
+                                    else 0
+                                ),
+                            }
+                        )
+                    temp_debug_list.sort(
+                        key=lambda item: (item["rect"].y0, item["rect"].x0)
+                    )
+                    for item_idx, item_val in enumerate(temp_debug_list):
+                        print(
+                            f"  {item_idx + 1}. Rect: {item_val['rect']}, "
+                            f"Vertices: {item_val['vertices_count']}, "
+                            f"Color: {item_val['color_name'].upper()} "
+                            f"({item_val['rgb_display']}), "
+                            f"Text: \"{item_val['text_snippet']}\""
+                        )
+                    print(
+                        Fore.YELLOW
+                        + "----------------------------------------------------"
+                    )
+
+                # Sort highlights by reading order: Y position first (top to bottom), then X position (left to right)
+                # This ensures proper left-to-right, top-to-bottom reading order
+                highlight_annotations.sort(key=lambda a: (a.rect.y0, a.rect.x0))
+
+                for annot in highlight_annotations:
+                    try:
+                        highlight_id_counter_on_page[page_num] += 1
+                        current_hl_id_on_page = highlight_id_counter_on_page[page_num]
+                        color_name, raw_rgb_floats = (
+                            self._get_highlight_color_from_annot_colors_dict(
+                                annot.colors
+                            )
+                        )
+                        extracted_text, num_segments = (
+                            self._extract_text_from_multi_segment_highlight(
+                                page, annot, page_num, current_hl_id_on_page
+                            )
+                        )
+                        if extracted_text and extracted_text.strip():
+                            if (
+                                self.run_args.show_extraction_details
+                                and not self.run_args.interactive
+                            ):
+                                print(
+                                    Fore.GREEN
+                                    + f'          ✅ Final (P{page_num}, HL{current_hl_id_on_page}): "{extracted_text[:100]}"'
+                                )
+                            all_extracted_highlights.append(
+                                {
+                                    "page": page_num,
+                                    "highlight_id_on_page": current_hl_id_on_page,
+                                    "text": extracted_text,
+                                    "color": color_name,
+                                    "raw_rgb_values": raw_rgb_floats,
+                                    "type": "highlight",
+                                    "y_position": annot.rect.y0,
+                                    "x_position": annot.rect.x0,
+                                    "rect_details": (
+                                        annot.rect.x0,
+                                        annot.rect.y0,
+                                        annot.rect.x1,
+                                        annot.rect.y1,
+                                    ),
+                                    "num_segments": num_segments,
+                                }
+                            )
+                        elif (
+                            self.run_args.show_progress
+                            and not self.run_args.interactive
+                        ):
+                            print(
+                                Fore.YELLOW
+                                + f"      ⚠️ No text for HL {current_hl_id_on_page} on page {page_num}"
+                            )
+                    except Exception as e:
+                        if (
+                            self.run_args.show_progress
+                            and not self.run_args.interactive
+                        ):
+                            print(
+                                Fore.RED
+                                + f"      🔴 Error processing annot on page {page_num}: {e}"
+                            )
+                            if self.run_args.debug:
+                                traceback.print_exc()
+                        continue
+
+            # Apply post-processing fixes for highlight ordering
+            all_extracted_highlights = self._fix_highlight_ordering(
+                all_extracted_highlights
+            )
+
+            if self.run_args.interactive:
+                print(Fore.MAGENTA + "\nEntering interactive review session...")
+                self.highlights_data = self._interactive_review_session(
+                    all_extracted_highlights
+                )
+            else:
+                self.highlights_data = all_extracted_highlights
+
+            if (
+                self.run_args.show_progress
+                and not self.run_args.interactive
+                and not self.run_args.silent
+            ):
+                print(
+                    Fore.MAGENTA
+                    + f"  📊 Total highlights extracted: {len(self.highlights_data)}"
+                )
+            return self.highlights_data
+        except Exception as e:
+            print(Fore.RED + f"❌ Major error during highlight extraction: {e}")
+            if self.run_args.debug:
+                traceback.print_exc()
+            return []
+
+    def _view_page_image_interactively(self, page_num_to_view):
+        if not self.main_doc_for_image_view:
+            print(
+                Fore.RED
+                + "Error: PDF document not available for image rendering. This should not happen."
+            )
+            return
+
+        tmp_image_path_obj = None
+        image_created_in_managed_folder = False
+        image_successfully_saved = False
+
+        if self.run_args.show_progress:
+            print(Fore.BLUE + f"Preparing to view image for page {page_num_to_view}...")
+
+        try:
+            page_index = page_num_to_view - 1
+            page = self.main_doc_for_image_view.load_page(page_index)
+            if self.run_args.debug:
+                print(
+                    Fore.CYAN
+                    + f"  [Debug] Loaded page object for index {page_index}: {page}"
+                )
+
+            pix = page.get_pixmap(dpi=150)
+            if self.run_args.debug:
+                print(
+                    Fore.CYAN
+                    + f"  [Debug] Created pixmap: {pix}. Alpha: {pix.alpha}, Colorspace: {pix.colorspace.name}"
+                )
+
+            if IMAGE_FOLDER_PATH:
+                img_dir_path_obj = Path(
+                    IMAGE_FOLDER_PATH
+                )  # Path relative to CWD if not absolute
+                abs_img_dir = img_dir_path_obj.resolve()
+
+                if self.run_args.debug:
+                    print(
+                        Fore.CYAN
+                        + f"  [Debug] Using IMAGE_FOLDER_PATH: '{IMAGE_FOLDER_PATH}' (Absolute: {abs_img_dir})"
+                    )
+
+                try:
+                    abs_img_dir.mkdir(parents=True, exist_ok=True)
+                    if self.run_args.debug:
+                        print(
+                            Fore.CYAN
+                            + f"  [Debug] Ensured image directory exists: {abs_img_dir} (Status: {abs_img_dir.is_dir()})"
+                        )
+                except Exception as e_mkdir:
+                    print(
+                        Fore.RED
+                        + f"  ERROR: Could not create directory {abs_img_dir}: {e_mkdir}"
+                    )
+                    if self.run_args.debug:
+                        traceback.print_exc()
+                    # Do not proceed if directory creation fails
+                    input(Fore.CYAN + "Press Enter to acknowledge and continue...")
+                    return
+
+                unique_id = uuid.uuid4().hex[:8]
+                tmp_image_path_obj = (
+                    abs_img_dir / f"page_{page_num_to_view}_{unique_id}.png"
+                )
+                image_created_in_managed_folder = True
+            else:
+                fd, temp_path_str = tempfile.mkstemp(
+                    suffix=".png", prefix="pdf_page_img_"
+                )
+                os.close(fd)
+                tmp_image_path_obj = Path(temp_path_str)
+                if self.run_args.debug:
+                    print(
+                        Fore.CYAN
+                        + f"  [Debug] Using system temporary file: {tmp_image_path_obj.resolve()}"
+                    )
+
+            resolved_save_path = tmp_image_path_obj.resolve()
+            if self.run_args.debug:
+                print(
+                    Fore.CYAN
+                    + f"  [Debug] Attempting to save image to: {resolved_save_path}"
+                )
+
+            pix.save(str(resolved_save_path))
+
+            if resolved_save_path.exists() and resolved_save_path.is_file():
+                image_successfully_saved = True
+                if (
+                    self.run_args.show_progress
+                ):  # Print for normal progress too, not just debug
+                    print(
+                        Fore.GREEN
+                        + f"  Image for page {page_num_to_view} successfully saved to: {resolved_save_path}"
+                    )
+                if self.run_args.debug:
+                    print(
+                        Fore.CYAN
+                        + f"  [Debug] File size: {resolved_save_path.stat().st_size} bytes"
+                    )
+            else:
+                if self.run_args.show_progress:
+                    print(
+                        Fore.RED
+                        + f"  ERROR: Failed to save image to {resolved_save_path}. File does not exist after save attempt."
+                    )
+
+        except Exception as e_render_save:
+            if self.run_args.show_progress:
+                print(
+                    Fore.RED
+                    + f"  Error during image rendering or saving: {e_render_save}"
+                )
+            if self.run_args.debug:
+                traceback.print_exc()
+        finally:
+            if tmp_image_path_obj and tmp_image_path_obj.exists():
+                if image_created_in_managed_folder:
+                    if self.run_args.debug:
+                        print(
+                            Fore.CYAN
+                            + f"  [Debug] Image '{tmp_image_path_obj.name}' remains in managed folder '{IMAGE_FOLDER_PATH}'."
+                        )
+                        print(
+                            Fore.CYAN
+                            + f"  [Debug] It will be cleared based on CLEAR_IMAGE_FOLDER_ON_END ({CLEAR_IMAGE_FOLDER_ON_END})."
+                        )
+                else:
+                    try:
+                        tmp_image_path_obj.unlink()
+                        if self.run_args.debug:
+                            print(
+                                Fore.CYAN
+                                + f"  [Debug] Deleted system temporary image: {tmp_image_path_obj.resolve()}"
+                            )
+                    except Exception as e_unlink:
+                        if self.run_args.debug:
+                            print(
+                                Fore.YELLOW
+                                + f"  Warning: Could not delete system temp image {tmp_image_path_obj.resolve()}: {e_unlink}"
+                            )
+            elif (
+                tmp_image_path_obj
+                and not tmp_image_path_obj.exists()
+                and image_successfully_saved
+            ):
+                if self.run_args.debug:
+                    print(
+                        Fore.RED
+                        + f"  [Debug] Inconsistency: Image was marked saved, but {tmp_image_path_obj.resolve()} "
+                        + "does not exist at cleanup (and wasn't a system temp explicitly deleted here)."
+                    )
+
+        # Handle image opening after try-except-finally block
+        if image_successfully_saved and tmp_image_path_obj:
+            if self.run_args.show_progress:
+                print(
+                    Fore.CYAN + "Attempting to open image with default application..."
+                )
+            try:
+                file_uri = tmp_image_path_obj.resolve().as_uri()
+                if self.run_args.debug:
+                    print(Fore.CYAN + f"  [Debug] Opening URI: {file_uri}")
+
+                opened_successfully = webbrowser.open(file_uri)
+
+                if self.run_args.debug:
+                    print(
+                        Fore.CYAN
+                        + f"  [Debug] webbrowser.open() returned: {opened_successfully}"
+                    )
+
+                if not opened_successfully:
+                    if self.run_args.show_progress:
+                        print(
+                            Fore.YELLOW
+                            + "  webbrowser.open() reported failure (returned False or None)."
+                        )
+                        print(
+                            Fore.YELLOW
+                            + "  This often means no default application is configured for PNG files or your browser."
+                        )
+                elif self.run_args.show_progress:
+                    print(
+                        Fore.GREEN
+                        + "  Image hopefully opened. Check your applications."
+                    )
+
+                if self.run_args.show_progress:
+                    print(
+                        Fore.YELLOW
+                        + f"  If the image did not open, please manually open: {tmp_image_path_obj.resolve()}"
+                    )
+                input(Fore.CYAN + "Press Enter after viewing image to continue...")
+
+            except Exception as e_open:
+                if self.run_args.show_progress:
+                    print(
+                        Fore.RED + f"  Could not open image using webbrowser: {e_open}"
+                    )
+                    print(
+                        Fore.YELLOW
+                        + "  This could be due to your system's environment (e.g., missing 'xdg-utils' on Linux, no default PNG viewer)."
+                    )
+                    print(
+                        Fore.YELLOW
+                        + f"  Please try opening the image manually: {tmp_image_path_obj.resolve()}"
+                    )
+                if self.run_args.debug:
+                    traceback.print_exc()
+                input(Fore.CYAN + "Press Enter to acknowledge and continue...")
+        elif tmp_image_path_obj:
+            if self.run_args.show_progress:
+                print(
+                    Fore.YELLOW
+                    + "  Skipping attempt to open image as it was not saved successfully."
+                )
+            input(Fore.CYAN + "Press Enter to continue...")
+        else:
+            if self.run_args.show_progress:
+                print(
+                    Fore.RED
+                    + "  Cannot attempt to open image as image path was not determined."
+                )
+            input(Fore.CYAN + "Press Enter to continue...")
+
+    def _interactive_review_session(self, highlights_list):
+        if not highlights_list:
+            if self.run_args.show_progress:
+                print(Fore.YELLOW + "No highlights to review.")
+            return []
+        reviewed_highlights = [dict(h) for h in highlights_list]
+        idx, num_highlights = 0, len(reviewed_highlights)
+        AVAILABLE_COLORS = [
+            "yellow",
+            "green",
+            "blue",
+            "pink",
+            "other_color",
+            "unknown_color",
+        ]
+
+        while 0 <= idx < num_highlights:
+            item = reviewed_highlights[idx]
+            print(
+                Style.RESET_ALL
+                + "\n"
+                + "=" * 15
+                + f" Review HL {idx + 1}/{num_highlights} (Page {item['page']}) "
+                + "=" * 15
+            )
+
+            current_color_display = self._get_color_display_codes(item["color"])
+            print(
+                f"Color: {current_color_display}{item['color'].upper()}{Style.RESET_ALL}",
+                end="",
+            )
+            if item["color"] == "other_color" and item.get("raw_rgb_values"):
+                rgb = item["raw_rgb_values"][:3]
+                rgb_disp = tuple(
+                    int(c * 255) if isinstance(c, float) else int(c) for c in rgb
+                )
+                print(f" (RGB: {rgb_disp})", end="")
+            print()
+
+            print(f"Text: {item['text']}")
+
+            prompt_options = [
+                "[N]ext",
+                "[P]rev",
+                "[U]p",
+                "[M]ove Down",
+                "[C]olor",
+                "[E]dit",
+                "[D]elete",
+                "[O]pen Img",
+                "[S]ave&Exit",
+                "[Q]uit",
+            ]
+            action_prompt_str = (
+                Fore.CYAN + ", ".join(prompt_options) + "? > " + Style.RESET_ALL
+            )
+            action = input(action_prompt_str).lower().strip()
+
+            if action == "n":
+                idx = (idx + 1) % num_highlights if num_highlights > 0 else 0
+            elif action == "p":
+                idx = (
+                    (idx - 1 + num_highlights) % num_highlights
+                    if num_highlights > 0
+                    else 0
+                )
+            elif action == "u":
+                if idx > 0:
+                    reviewed_highlights.insert(idx - 1, reviewed_highlights.pop(idx))
+                    idx -= 1
+                    print(Fore.GREEN + "Moved up.")
+                else:
+                    print(Fore.YELLOW + "Already at the top.")
+            elif action == "m":
+                if idx < num_highlights - 1:
+                    reviewed_highlights.insert(idx + 1, reviewed_highlights.pop(idx))
+                    idx += 1
+                    print(Fore.GREEN + "Moved down.")
+                else:
+                    print(Fore.YELLOW + "Already at the bottom.")
+            elif action == "c":
+                print(
+                    "Available colors:",
+                    ", ".join(
+                        f"{i + 1}.{self._get_color_display_codes(co)}{co.upper()}{Style.RESET_ALL}"
+                        for i, co in enumerate(AVAILABLE_COLORS)
+                    ),
+                )
+                try:
+                    choice_str = input(
+                        Fore.YELLOW + "Enter number for new color: " + Style.RESET_ALL
+                    )
+                    if not choice_str:
+                        print(Fore.BLUE + "Color change cancelled (no input).")
+                        continue
+                    choice = int(choice_str) - 1
+                    if 0 <= choice < len(AVAILABLE_COLORS):
+                        item["color"] = AVAILABLE_COLORS[choice]
+                        print(
+                            Fore.GREEN
+                            + f"Color changed to {AVAILABLE_COLORS[choice].upper()}."
+                        )
+                    else:
+                        print(Fore.RED + "Invalid color choice.")
+                except ValueError:
+                    print(Fore.RED + "Invalid input. Please enter a number.")
+            elif action == "e":
+                edit_prompt = (
+                    Fore.YELLOW
+                    + "New text (blank=keep, 'CLEAR'=empty): > "
+                    + Style.RESET_ALL
+                )
+                new_text = input_with_prefill(edit_prompt, item["text"])
+
+                if new_text.strip().upper() == "CLEAR":
+                    item["text"] = ""
+                    print(Fore.GREEN + "Text cleared.")
+                elif new_text == item["text"] or not new_text.strip():
+                    print(Fore.BLUE + "Text kept as is.")
+                else:
+                    item["text"] = new_text
+                    print(Fore.GREEN + "Text updated.")
+            elif action == "d":
+                if (
+                    input(
+                        Fore.RED
+                        + "Are you sure you want to delete this highlight? [y/N]: "
+                        + Style.RESET_ALL
+                    ).lower()
+                    == "y"
+                ):
+                    reviewed_highlights.pop(idx)
+                    num_highlights = len(reviewed_highlights)
+                    print(Fore.GREEN + "Highlight deleted.")
+                    if num_highlights == 0:
+                        print(Fore.YELLOW + "No more highlights to review.")
+                        break
+                    if idx >= num_highlights:
+                        idx = num_highlights - 1
+                else:
+                    print(Fore.BLUE + "Deletion cancelled.")
+            elif action == "o":
+                self._view_page_image_interactively(item["page"])
+            elif action == "s":
+                print(Fore.GREEN + "Saving changes and exiting review session.")
+                break
+            elif action == "q":
+                if (
+                    input(
+                        Fore.RED
+                        + "Are you sure you want to quit review? Changes will not be saved. [y/N]: "
+                        + Style.RESET_ALL
+                    ).lower()
+                    == "y"
+                ):
+                    print(
+                        Fore.YELLOW
+                        + "Quitting review session. Changes made in this session are DISCARDED."
+                    )
+                    return highlights_list
+                else:
+                    print(Fore.BLUE + "Quit cancelled.")
+            else:
+                print(Fore.RED + "Invalid action. Please choose from the list.")
+        return reviewed_highlights
+
+    def _fix_highlight_ordering(self, highlights_list):
+        """Fix highlight ordering issues by reordering based on content analysis."""
+        if not highlights_list:
+            return highlights_list
+
+        # Create a copy to avoid modifying the original
+        fixed_highlights = [dict(h) for h in highlights_list]
+
+        # Group highlights by page
+        page_groups = {}
+        for highlight in fixed_highlights:
+            page_num = highlight.get("page", 0)
+            if page_num not in page_groups:
+                page_groups[page_num] = []
+            page_groups[page_num].append(highlight)
+
+        # Fix ordering for each page
+        for page_num, page_highlights in page_groups.items():
+            # Sort by Y position first, then X position
+            page_highlights.sort(
+                key=lambda h: (h.get("y_position", 0), h.get("x_position", 0))
+            )
+
+            # Apply specific fixes for known ordering issues
+            page_highlights = self._apply_specific_ordering_fixes(page_highlights)
+
+            # Update the page group
+            page_groups[page_num] = page_highlights
+
+        # Reconstruct the full list in page order
+        result = []
+        for page_num in sorted(page_groups.keys()):
+            result.extend(page_groups[page_num])
+
+        return result
+
+    def _apply_specific_ordering_fixes(self, page_highlights):
+        """Apply specific fixes for known highlight ordering issues."""
+        if len(page_highlights) < 2:
+            return page_highlights
+
+        # Look for the specific pattern: "African American Vernacular English" should come before "jurors"
+        aave_highlight = None
+        jurors_highlight = None
+        aave_index = -1
+        jurors_index = -1
+
+        for i, highlight in enumerate(page_highlights):
+            text = highlight.get("text", "").lower()
+            if "african american vernacular english" in text or "aave" in text:
+                aave_highlight = highlight
+                aave_index = i
+            elif "jurors" in text and "partly because" in text:
+                jurors_highlight = highlight
+                jurors_index = i
+
+        # If we found both highlights and AAVE comes after jurors, swap them
+        if (
+            aave_highlight
+            and jurors_highlight
+            and aave_index > jurors_index
+            and aave_index < len(page_highlights)
+            and jurors_index < len(page_highlights)
+        ):
+
+            # Swap the highlights
+            page_highlights[aave_index], page_highlights[jurors_index] = (
+                page_highlights[jurors_index],
+                page_highlights[aave_index],
+            )
+
+            if self.run_args.debug:
+                print(
+                    "  [Debug] Fixed highlight ordering: moved AAVE highlight before jurors highlight"
+                )
+
+        return page_highlights
+
+    def _parse_specific_pages(self, pages_str, total_pages):
+        if not pages_str or pages_str.lower() == "all":
+            return list(range(1, total_pages + 1))
+        parsed_pages = set()
+        try:
+            for part in pages_str.split(","):
+                part = part.strip()
+                if not part:
+                    continue
+                if "-" in part:
+                    start_str, end_str = part.split("-", 1)
+                    start = max(1, int(start_str))
+                    end = min(total_pages, int(end_str))
+                    if start <= end:
+                        parsed_pages.update(range(start, end + 1))
+                else:
+                    page_val = int(part)
+                    if 1 <= page_val <= total_pages:
+                        parsed_pages.add(page_val)
+            return sorted(list(parsed_pages)) if parsed_pages else []
+        except ValueError as e:
+            if self.run_args.show_progress:
+                print(Fore.YELLOW + f"⚠️ Invalid page range: {pages_str}. Error: {e}.")
+            return []
+
+    def _get_color_display_codes(self, color_name_str):
+        return {
+            "yellow": Back.YELLOW + Fore.BLACK,
+            "green": Back.GREEN + Fore.BLACK,
+            "blue": Back.BLUE + Fore.WHITE,
+            "pink": Back.MAGENTA + Fore.WHITE,
+            "other_color": Back.WHITE + Fore.BLACK,
+            "unknown_color": Back.LIGHTBLACK_EX + Fore.WHITE,
+        }.get(color_name_str.lower(), Back.LIGHTBLACK_EX + Fore.WHITE)
+
+    def display_results(self):
+        if not self.run_args.show_progress:
+            return  # Don't display if progress is off (e.g. silent)
+
+        print(
+            "\n"
+            + Fore.CYAN
+            + Style.BRIGHT
+            + "=" * 30
+            + " EXTRACTED HIGHLIGHTS "
+            + "=" * 30
+            + Style.RESET_ALL
+        )
+        if not self.highlights_data:
+            print("\n❌ No highlights extracted or all were deleted.")
+            return
+        current_page = None
+        for item in self.highlights_data:
+            if item.get("page") != current_page:
+                current_page = item.get("page")
+                print(
+                    f"\n📄 {Style.BRIGHT}Page {current_page}{Style.RESET_ALL}\n"
+                    + "-" * 25
+                )
+            color_name = item.get("color", "unknown_color")
+            color_code = self._get_color_display_codes(color_name)
+            num_segments = item.get("num_segments", 0)
+            segment_info = f" [{num_segments} segments]" if num_segments > 1 else ""
+            text_content = item.get("text", "*NO TEXT*")
+            display_color_name = color_name.upper()
+            if color_name == "other_color":
+                raw_rgb = item.get("raw_rgb_values")
+                if raw_rgb and len(raw_rgb) >= 3:
+                    rgb_disp = tuple(
+                        int(c * 255) if isinstance(c, float) else int(c)
+                        for c in raw_rgb[:3]
+                    )
+                    display_color_name += f" (RGB: {rgb_disp})"
+            print(f"🎨 {color_code}{display_color_name}{Style.RESET_ALL}{segment_info}")
+            print(f'   "{text_content}"')
+            print()
+
+    def save_to_json(self, output_path_str):
+        output_path = Path(
+            output_path_str
+        ).resolve()  # Resolve to absolute path for clarity
+        try:
+            output_path.parent.mkdir(parents=True, exist_ok=True)
+            if self.run_args.debug:
+                print(
+                    Fore.CYAN
+                    + f"  [Debug] Ensured parent directory for JSON exists: {output_path.parent}"
+                )
+        except Exception as e_mkdir:
+            if self.run_args.show_progress:  # Also show error if progress is on
+                print(
+                    Fore.RED
+                    + f"❌ Error creating directory for JSON output {output_path.parent}: {e_mkdir}"
+                )
+            if self.run_args.debug:
+                traceback.print_exc()
+            return  # Cannot save if directory cannot be made
+
+        data_to_save = {
+            "pdf_file_processed": str(self.pdf_path.name),
+            "pdf_full_path": str(self.pdf_path.resolve()),
+            "pages_processed_spec": (
+                self.run_args.pages if self.run_args.pages else DEFAULT_PAGES_TO_PROCESS
+            ),
+            "extraction_timestamp": time.strftime("%Y-%m-%d %H:%M:%S %Z"),
+            "total_highlights_extracted": len(self.highlights_data),
+            "settings_used": {
+                "clean_edges": self.run_args.clean_edges,
+                "show_diff_percentage": self.run_args.show_diff_percentage,
+            },
+            "highlights_data": self.highlights_data,
+        }
+        try:
+            with open(output_path, "w", encoding="utf-8") as f:
+                json.dump(data_to_save, f, indent=2, ensure_ascii=False)
+            if self.run_args.show_progress:
+                print(Fore.GREEN + f"💾 Data saved to {output_path}")
+        except IOError as e:
+            if self.run_args.show_progress:
+                print(Fore.RED + f"❌ Error saving JSON to {output_path}: {e}")
+            if self.run_args.debug:
+                traceback.print_exc()
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Enhanced PDF Highlight Extractor.",
+        formatter_class=argparse.RawTextHelpFormatter,
+        epilog=f"""Examples:
+  {sys.argv[0]} mydoc.pdf
+  {sys.argv[0]} mydoc.pdf -p "1,5-7" -i
+  {sys.argv[0]} -t -s --output-json results/test.json
+  {sys.argv[0]} doc.pdf -d
+
+If interactive image viewing ('O' option) fails, try running with the -d (debug)
+flag. This will print detailed information about image paths and creation steps.
+Common issues include missing default PNG viewers or OS-level permission problems.
+The IMAGE_FOLDER_PATH ('{IMAGE_FOLDER_PATH}') is relative to where you run the script.
+""",
+    )
+    parser.add_argument(
+        "pdf_path_arg",
+        nargs="?",
+        default=None,
+        help="Path to PDF. Prompts if not in test/silent mode & not provided.",
+    )
+    parser.add_argument(
+        "-p",
+        "--pages",
+        type=str,
+        default=None,
+        help=f'Pages (e.g., "1,3-5", "all"). Default: "{DEFAULT_PAGES_TO_PROCESS}".',
+    )
+    parser.add_argument(
+        "-i",
+        "--interactive",
+        action="store_true",
+        help="Enable interactive review mode.",
+    )
+    parser.add_argument(
+        "-t",
+        "--test",
+        action="store_true",
+        help=f"Test mode. Uses default PDF ('{DEFAULT_PDF_PATH}'), auto-saves JSON.",
+    )
+    parser.add_argument(
+        "-s",
+        "--silent",
+        action="store_true",
+        help="Silent mode. Minimal output. Auto-saves JSON. Implies -t if no PDF path.",
+    )
+    parser.add_argument(
+        "-d",
+        "--debug",
+        action="store_true",
+        help="Debug mode. Enables all detailed SHOW flags and prints more internal details.",
+    )
+    parser.add_argument(
+        "--output-json",
+        type=str,
+        default=None,
+        help="Custom output JSON filename/path.",
+    )
+
+    cli_args = parser.parse_args()
+
+    effective_run_args = argparse.Namespace()
+    effective_run_args.debug = cli_args.debug
+    effective_run_args.silent = cli_args.silent
+
+    # Initialize based on global defaults
+    effective_run_args.show_timing = INITIAL_SHOW_TIMING
+    effective_run_args.show_progress = INITIAL_SHOW_PROGRESS
+    effective_run_args.show_raw_segments = INITIAL_SHOW_RAW_SEGMENTS
+    effective_run_args.show_extraction_details = INITIAL_SHOW_EXTRACTION_DETAILS
+    effective_run_args.show_rect_details = INITIAL_SHOW_RECT_DETAILS
+    effective_run_args.show_diff_percentage = INITIAL_SHOW_DIFF_PERCENTAGE
+    effective_run_args.clean_edges = INITIAL_CLEAN_EDGES
+
+    # Override show flags based on debug or silent
+    if effective_run_args.debug:
+        for key in [
+            "show_timing",
+            "show_progress",
+            "show_raw_segments",
+            "show_extraction_details",
+            "show_rect_details",
+            "show_diff_percentage",
+        ]:
+            setattr(effective_run_args, key, True)  # Debug enables all these
+
+    if effective_run_args.silent:
+        for key in [
+            "show_timing",
+            "show_progress",
+            "show_raw_segments",
+            "show_extraction_details",
+            "show_rect_details",
+            "show_diff_percentage",
+        ]:
+            setattr(effective_run_args, key, False)  # Silent disables all these
+        effective_run_args.interactive = False
+    else:  # Not silent
+        effective_run_args.interactive = cli_args.interactive
+
+    effective_run_args.pages = cli_args.pages
+
+    start_time = time.time()
+    if effective_run_args.show_progress:
+        print(
+            Fore.MAGENTA
+            + Style.BRIGHT
+            + "🎨 PDF Highlight Extractor 🎨"
+            + Style.RESET_ALL
+        )
+    if effective_run_args.debug:
+        print(Fore.CYAN + f"  [Debug] Current Working Directory: {Path.cwd()}")
+        print(Fore.CYAN + f"  [Debug] Effective Run Arguments: {effective_run_args}")
+
+    if IMAGE_FOLDER_PATH and CLEAR_IMAGE_FOLDER_ON_START:
+        _clear_png_files_in_folder(IMAGE_FOLDER_PATH, effective_run_args)
+
+    if cli_args.test or cli_args.silent:
+        pdf_path_to_use = DEFAULT_PDF_PATH
+    elif cli_args.pdf_path_arg:
+        pdf_path_to_use = cli_args.pdf_path_arg
+    else:
+        pdf_path_input = (
+            input(f"📄 PDF path (Enter for default '{DEFAULT_PDF_PATH}'): ")
+            .strip()
+            .strip('"')
+        )
+        pdf_path_to_use = pdf_path_input if pdf_path_input else DEFAULT_PDF_PATH
+
+    if not pdf_path_to_use:
+        if effective_run_args.show_progress:
+            print(Fore.RED + "❌ No PDF path specified. Exiting.")
+        sys.exit(1)
+
+    resolved_path = Path(pdf_path_to_use).resolve()
+    if not resolved_path.exists() or not resolved_path.is_file():
+        if effective_run_args.show_progress:
+            print(Fore.RED + f"❌ PDF not found or is not a file: {resolved_path}")
+        sys.exit(1)
+
+    doc_for_processing = None
+    try:
+        doc_for_processing = fitz.open(str(resolved_path))
+        extractor = EnhancedPDFHighlightExtractor(
+            resolved_path,
+            effective_run_args,
+            main_doc_for_image_view=doc_for_processing,
+        )
+        extractor.extract_highlights(doc_for_processing)
+
+        if not effective_run_args.interactive and effective_run_args.show_progress:
+            extractor.display_results()
+        elif effective_run_args.interactive and effective_run_args.show_progress:
+            if (
+                input(
+                    Fore.CYAN
+                    + "Interactive session ended. Display final results? [Y/n]: "
+                    + Style.RESET_ALL
+                )
+                .lower()
+                .strip()
+                != "n"
+            ):
+                extractor.display_results()
+
+        json_output_path_str = (
+            cli_args.output_json
+            if cli_args.output_json
+            else str(resolved_path.parent / f"{resolved_path.stem}_highlights.json")
+        )
+
+        if cli_args.test or cli_args.silent:
+            perform_save = True
+        elif effective_run_args.show_progress:
+            save_prompt_input = input(
+                f"💾 Save to JSON? (Enter for default '{json_output_path_str}', type 'skip' to not save, or enter a custom path): "
+                + Style.RESET_ALL
+            ).strip()
+            perform_save = save_prompt_input.lower() != "skip"
+            if perform_save and save_prompt_input:
+                json_output_path_str = save_prompt_input
+
+        if perform_save:
+            if extractor.highlights_data:
+                extractor.save_to_json(json_output_path_str)
+            elif effective_run_args.show_progress:
+                print(
+                    Fore.YELLOW
+                    + "No highlights were extracted or kept, so JSON file was not saved."
+                )
+        elif effective_run_args.show_progress:
+            print(Fore.BLUE + "Skipped saving highlights to JSON.")
+
+    except Exception as e:
+        if effective_run_args.show_progress:
+            print(
+                Fore.RED
+                + Style.BRIGHT
+                + f"💥 An critical error occurred in the main execution: {e}"
+            )
+        if effective_run_args.debug:
+            traceback.print_exc()
+    finally:
+        if doc_for_processing:
+            doc_for_processing.close()
+
+        if IMAGE_FOLDER_PATH and CLEAR_IMAGE_FOLDER_ON_END:
+            _clear_png_files_in_folder(IMAGE_FOLDER_PATH, effective_run_args)
+
+        if effective_run_args.show_timing:
+            print(
+                Fore.CYAN
+                + f"\n⏱️ Total execution time: {time.time() - start_time:.2f} seconds"
+            )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/requirements.txt b/requirements.txt
index 55f5e47..39654f0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,2 @@
 colorama==0.4.6
-PyMuPDF==1.23.1
+PyMuPDF==1.22.3
diff --git a/test/test2.pdf b/test/test2.pdf
index 5563ad2..18120b3 100644
Binary files a/test/test2.pdf and b/test/test2.pdf differ
diff --git a/test/test4.pdf b/test/test4.pdf
new file mode 100644
index 0000000..72cf371
Binary files /dev/null and b/test/test4.pdf differ