diff --git a/.gitignore b/.gitignore
index 1795c8d..adf3546 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
-.history
\ No newline at end of file
+.history
+*.png
\ No newline at end of file
diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 0000000..0fed8bd
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,13 @@
+{
+    "version": "0.2.0",
+    "configurations": [
+        {"name":"Python Debugger: Current File","type":"debugpy","request":"launch","program":"${file}","console":"integratedTerminal"},
+        {
+            "name": "Python: Current File",
+            "type": "python",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/clear b/clear
new file mode 100644
index 0000000..1f1ff70
--- /dev/null
+++ b/clear
@@ -0,0 +1,433 @@
+{
+  "pdf_file_processed": "test2.pdf",
+  "pdf_full_path": "/mnt/c/Users/admin/Downloads/test2.pdf",
+  "pages_processed_spec": "5",
+  "extraction_timestamp": "2025-06-03 08:55:13 EDT",
+  "total_highlights_extracted": 20,
+  "settings_used": {
+    "clean_edges": true,
+    "show_diff_percentage": true
+  },
+  "highlights_data": [
+    {
+      "page": 5,
+      "highlight_id_on_page": 1,
+      "text": "or prejudice in",
+      "color": "yellow",
+      "raw_rgb_values": [
+        1.0,
+        0.9411770105361938,
+        0.4000000059604645
+      ],
+      "type": "highlight",
+      "y_position": 53.75,
+      "x_position": 60.0,
+      "rect_details": [
+        60.0,
+        53.75,
+        116.0,
+        63.75
+      ],
+      "num_segments": 1
+    },
+    {
+      "page": 5,
+      "highlight_id_on_page": 2,
+      "text": "unin",
+      "color": "yellow",
+      "raw_rgb_values": [
+        1.0,
+        0.9411770105361938,
+        0.4000000059604645
+      ],
+      "type": "highlight",
+      "y_position": 77.75,
+      "x_position": 164.0,
+      "rect_details": [
+        164.0,
+        77.75,
+        169.0,
+        87.75
+      ],
+      "num_segments": 1
+    },
+    {
+      "page": 5,
+      "highlight_id_on_page": 3,
+      "text": "uninformed about how ‘language can stand as a barrier to jus-",
+      "color": "yellow",
+      "raw_rgb_values": [
+        1.0,
+        0.9411770105361938,
+        0.4000000059604645
+      ],
+      "type": "highlight",
+      "y_position": 77.75,
+      "x_position": 164.0,
+      "rect_details": [
+        164.0,
+        77.75,
+        405.0,
+        87.75
+      ],
+      "num_segments": 1
+    },
+    {
+      "page": 5,
+      "highlight_id_on_page": 4,
+      "text": "tice or equal opportunity’.",
+      "color": "yellow",
+      "raw_rgb_values": [
+        1.0,
+        0.9411770105361938,
+        0.4000000059604645
+      ],
+      "type": "highlight",
+      "y_position": 89.75,
+      "x_position": 60.0,
+      "rect_details": [
+        60.0,
+        89.75,
+        158.0,
+        99.75
+      ],
+      "num_segments": 1
+    },
+    {
+      "page": 5,
+      "highlight_id_on_page": 5,
+      "text": "linguistics,",
+      "color": "yellow",
+      "raw_rgb_values": [
+        1.0,
+        0.9411770105361938,
+        0.4000000059604645
+      ],
+      "type": "highlight",
+      "y_position": 137.75,
+      "x_position": 188.0,
+      "rect_details": [
+        188.0,
+        137.75,
+        226.0,
+        147.75
+      ],
+      "num_segments": 1
+    },
+    {
+      "page": 5,
+      "highlight_id_on_page": 6,
+      "text": "needs to make applied contributions to the understanding and solution of racial discrimination, criminal injustice, and other social problems.",
+      "color": "yellow",
+      "raw_rgb_values": [
+        1.0,
+        0.9411770105361938,
+        0.4000000059604645
+      ],
+      "type": "highlight",
+      "y_position": 149.75,
+      "x_position": 60.0,
+      "rect_details": [
+        60.0,
+        149.75,
+        408.0,
+        171.75
+      ],
+      "num_segments": 2
+    },
+    {
+      "page": 5,
+      "highlight_id_on_page": 7,
+      "text": "first",
+      "color": "blue",
+      "raw_rgb_values": [
+        0.5607839822769165,
+        0.8705880045890808,
+        0.9764710068702698
+      ],
+      "type": "highlight",
+      "y_position": 173.75,
+      "x_position": 182.0,
+      "rect_details": [
+        182.0,
+        173.75,
+        198.0,
+        183.75
+      ],
+      "num_segments": 1
+    },
+    {
+      "page": 5,
+      "highlight_id_on_page": 8,
+      "text": "at interpreters are not generally provided for ‘dialects’ of a language, only for foreign ‘languages’",
+      "color": "yellow",
+      "raw_rgb_values": [
+        1.0,
+        0.9411770105361938,
+        0.4000000059604645
+      ],
+      "type": "highlight",
+      "y_position": 197.75,
+      "x_position": 60.0,
+      "rect_details": [
+        60.0,
+        197.75,
+        408.0,
+        219.75
+      ],
+      "num_segments": 2
+    },
+    {
+      "page": 5,
+      "highlight_id_on_page": 9,
+      "text": "(§2),",
+      "color": "blue",
+      "raw_rgb_values": [
+        0.5607839822769165,
+        0.8705880045890808,
+        0.9764710068702698
+      ],
+      "type": "highlight",
+      "y_position": 197.75,
+      "x_position": 182.0,
+      "rect_details": [
+        182.0,
+        197.75,
+        201.0,
+        207.75
+      ],
+      "num_segments": 1
+    },
+    {
+      "page": 5,
+      "highlight_id_on_page": 10,
+      "text": "§3",
+      "color": "blue",
+      "raw_rgb_values": [
+        0.5607839822769165,
+        0.8705880045890808,
+        0.9764710068702698
+      ],
+      "type": "highlight",
+      "y_position": 209.75,
+      "x_position": 398.0,
+      "rect_details": [
+        398.0,
+        209.75,
+        408.0,
+        219.75
+      ],
+      "num_segments": 1
+    },
+    {
+      "page": 5,
+      "highlight_id_on_page": 11,
+      "text": "specific case of Rachel Jeantel’s dialect, a",
+      "color": "yellow",
+      "raw_rgb_values": [
+        1.0,
+        0.9411770105361938,
+        0.4000000059604645
+      ],
+      "type": "highlight",
+      "y_position": 221.75,
+      "x_position": 84.0,
+      "rect_details": [
+        84.0,
+        221.75,
+        241.0,
+        231.75
+      ],
+      "num_segments": 1
+    },
+    {
+      "page": 5,
+      "highlight_id_on_page": 12,
+      "text": "whether the credibility and intelligibility problems that led jurors to disregard Jeantel’s testimony were due",
+      "color": "yellow",
+      "raw_rgb_values": [
+        1.0,
+        0.9411770105361938,
+        0.4000000059604645
+      ],
+      "type": "highlight",
+      "y_position": 269.75,
+      "x_position": 60.0,
+      "rect_details": [
+        60.0,
+        269.75,
+        408.0,
+        291.75
+      ],
+      "num_segments": 2
+    },
+    {
+      "page": 5,
+      "highlight_id_on_page": 13,
+      "text": "§4 we",
+      "color": "blue",
+      "raw_rgb_values": [
+        0.5607839822769165,
+        0.8705880045890808,
+        0.9764710068702698
+      ],
+      "type": "highlight",
+      "y_position": 269.75,
+      "x_position": 237.0,
+      "rect_details": [
+        237.0,
+        269.75,
+        257.0,
+        279.75
+      ],
+      "num_segments": 1
+    },
+    {
+      "page": 5,
+      "highlight_id_on_page": 14,
+      "text": "dialect and insti-",
+      "color": "yellow",
+      "raw_rgb_values": [
+        1.0,
+        0.9411770105361938,
+        0.4000000059604645
+      ],
+      "type": "highlight",
+      "y_position": 317.75,
+      "x_position": 342.0,
+      "rect_details": [
+        342.0,
+        317.75,
+        402.0,
+        327.75
+      ],
+      "num_segments": 1
+    },
+    {
+      "page": 5,
+      "highlight_id_on_page": 15,
+      "text": "tutionalized racism negatively impact AAVE and other vernacular speakers i",
+      "color": "yellow",
+      "raw_rgb_values": [
+        1.0,
+        0.9411770105361938,
+        0.4000000059604645
+      ],
+      "type": "highlight",
+      "y_position": 329.75,
+      "x_position": 60.0,
+      "rect_details": [
+        60.0,
+        329.75,
+        367.0,
+        339.75
+      ],
+      "num_segments": 1
+    },
+    {
+      "page": 5,
+      "highlight_id_on_page": 16,
+      "text": "(§5).",
+      "color": "blue",
+      "raw_rgb_values": [
+        0.5607839822769165,
+        0.8705880045890808,
+        0.9764710068702698
+      ],
+      "type": "highlight",
+      "y_position": 341.75,
+      "x_position": 342.0,
+      "rect_details": [
+        342.0,
+        341.75,
+        355.0,
+        351.75
+      ],
+      "num_segments": 1
+    },
+    {
+      "page": 5,
+      "highlight_id_on_page": 17,
+      "text": "summarize our conclusions a",
+      "color": "yellow",
+      "raw_rgb_values": [
+        1.0,
+        0.9411770105361938,
+        0.4000000059604645
+      ],
+      "type": "highlight",
+      "y_position": 353.75,
+      "x_position": 60.0,
+      "rect_details": [
+        60.0,
+        353.75,
+        170.0,
+        363.75
+      ],
+      "num_segments": 1
+    },
+    {
+      "page": 5,
+      "highlight_id_on_page": 18,
+      "text": "(§6).",
+      "color": "blue",
+      "raw_rgb_values": [
+        0.5607839822769165,
+        0.8705880045890808,
+        0.9764710068702698
+      ],
+      "type": "highlight",
+      "y_position": 365.75,
+      "x_position": 220.0,
+      "rect_details": [
+        220.0,
+        365.75,
+        236.0,
+        375.75
+      ],
+      "num_segments": 1
+    },
+    {
+      "page": 5,
+      "highlight_id_on_page": 19,
+      "text": "at nonstandard or vernacular dialects",
+      "color": "yellow",
+      "raw_rgb_values": [
+        1.0,
+        0.9411770105361938,
+        0.4000000059604645
+      ],
+      "type": "highlight",
+      "y_position": 407.75,
+      "x_position": 206.0,
+      "rect_details": [
+        206.0,
+        407.75,
+        340.0,
+        417.75
+      ],
+      "num_segments": 1
+    },
+    {
+      "page": 5,
+      "highlight_id_on_page": 20,
+      "text": "spoken most frequently and fluently by ethnic minorities and/or by less educated, working-class,orpoorpeopleworldwide.1",
+      "color": "yellow",
+      "raw_rgb_values": [
+        1.0,
+        0.9411770105361938,
+        0.4000000059604645
+      ],
+      "type": "highlight",
+      "y_position": 431.75,
+      "x_position": 60.0,
+      "rect_details": [
+        60.0,
+        431.75,
+        408.0,
+        453.75
+      ],
+      "num_segments": 2
+    }
+  ]
+}
\ No newline at end of file
diff --git a/main.py b/main.py
index 199cc00..055401d 100644
--- a/main.py
+++ b/main.py
@@ -1,709 +1,753 @@
-"""
-PDF Highlight Extractor
-======================
+#!/usr/bin/env python3
+# =============================================================================
+# ENHANCED PDF HIGHLIGHT EXTRACTOR
+# Author: Perplexity AI Companion (Updated by User Feedback)
+# Date: June 3, 2025
+# License: MIT
+#
+# Extracts highlights from PDF files, with options for interactive review,
+# detailed output, text cleaning, JSON export, and page image viewing.
+# =============================================================================
 
-A robust tool for extracting highlighted text from PDF files with intelligent text ordering
-and hyphenation handling.
-
-Overview:
---------
-This tool addresses common PDF text extraction challenges:
-- PDFs store text in creation order, not reading order
-- Multi-line highlights can extract in wrong sequence
-- Hyphenated words across lines need rejoining
-- Boundary words may be partially highlighted
-
-Architecture:
-------------
-1. PDFHighlightExtractor: Main class handling extraction logic
-2. Multi-method extraction: Fallback system for maximum compatibility
-3. Smart text ordering: Line detection and geometric sorting
-4. Hyphenation merger: Detects and combines split words
-
-Technical Approach:
------------------
-METHOD A: PyMuPDF built-in text sorting
-- Uses page.get_text("text", sort=True) for automatic ordering
-- Most reliable for simple layouts
-
-METHOD B: Text block extraction
-- Extracts PDF text blocks which maintain better reading order
-- Geometric sorting by block position
-
-METHOD C: Enhanced word-level sorting
-- Individual word extraction with custom line detection
-- Groups words by Y-position, sorts by X-position within lines
-- Handles complex multi-line highlights
-
-Hyphenation Algorithm:
---------------------
-1. Detects highlights ending with '-'
-2. Checks next highlight for same color and reasonable distance
-3. Merges: "lin-" + "guistics" → "linguistics"
-4. Supports both same-page and cross-page hyphenation
-
-Color Detection:
----------------
-- RGB color space analysis
-- Supports 4 highlight colors: Yellow, Pink, Green, Blue
-- Handles both fill and stroke color properties
-
-Precision Control:
------------------
-- 40% overlap threshold for word inclusion
-- +2 pixel boundary expansion for edge cases
-- 5-pixel line tolerance for multi-line detection
-
-Usage Patterns:
---------------
-Test Mode: python script.py --test
-- Uses default PDF path
-- Display-only output
-- Quick testing and debugging
-
-Full Mode: python script.py
-- Interactive prompts for file paths
-- Optional JSON/CSV export
-- Complete control over options
-"""
 import time
-import pdfplumber
+import os
 import fitz  # PyMuPDF
 import json
 from colorama import init, Fore, Back, Style
-import pandas as pd
 from pathlib import Path
 import re
+import string
 import sys
+import traceback
+import argparse
+import difflib # For text difference calculation
+import tempfile # For temporary image files
+import webbrowser # For opening images/PDFs
+import uuid # For unique filenames
 
-# Initialize colorama for colored terminal output
+# Attempt to import readline for better input() experience on some systems
+try:
+    import readline
+    READLINE_AVAILABLE = True
+except ImportError:
+    READLINE_AVAILABLE = False # readline not available
+
+# =============================================================================
+# GLOBAL CONFIGURATION FLAGS (Defaults, can be overridden by CLI args)
+# =============================================================================
+DEFAULT_PDF_PATH = "/mnt/c/Users/admin/Downloads/test2.pdf" # Example, adjust if needed
+DEFAULT_PAGES_TO_PROCESS = "3" # Example: "1,3-5,all"
+
+# Default Behavior flags (can be influenced by -d or -s CLI flags)
+# These are used to initialize effective_run_args
+# Keep these distinct from the effective_run_args object itself
+INITIAL_SHOW_TIMING = True
+INITIAL_SHOW_PROGRESS = True
+INITIAL_SHOW_RAW_SEGMENTS = True
+INITIAL_SHOW_EXTRACTION_DETAILS = True
+INITIAL_SHOW_RECT_DETAILS = True
+INITIAL_SHOW_DIFF_PERCENTAGE = True
+INITIAL_CLEAN_EDGES = True
+
+# Text extraction parameters (generally fixed)
+TEXT_EXTRACTION_HORIZONTAL_PADDING = 6.0
+TEXT_EXTRACTION_VERTICAL_PADDING = 1.0
+
+# Edge cleaning configuration (generally fixed)
+VALID_TWO_LETTER_WORDS = {
+    'am', 'an', 'as', 'at', 'be', 'by', 'do', 'go', 'he', 'if', 'in', 'is', 'it', 'me', 'my',
+    'no', 'of', 'on', 'or', 'ox', 'so', 'to', 'up', 'us', 'we'}
+VALID_SINGLE_LETTERS = {'i', 'a'}
+
+# Image handling configuration
+IMAGE_FOLDER_PATH = 'pdf_page_images'  # Relative to CWD by default
+CLEAR_IMAGE_FOLDER_ON_START = True
+CLEAR_IMAGE_FOLDER_ON_END = False
+
+# Initialize colorama
 init(autoreset=True)
 
-class PDFHighlightExtractor:
-    """
-Main extraction class for PDF highlighted text.
+# --- Helper Functions ---
+def get_text_diff_ratio(text1, text2):
+    if not text1 and not text2: return 1.0
+    if not text1 or not text2: return 0.0
+    return difflib.SequenceMatcher(None, str(text1), str(text2)).ratio()
 
-This class handles the complete extraction pipeline from PDF analysis
-to formatted output with intelligent text ordering and hyphenation.
-
-Key Features:
-------------
-- Multi-method text extraction with fallback
-- Geometric text ordering for proper reading sequence
-- Hyphenation detection and merging
-- 4-color highlight support (Yellow, Pink, Green, Blue)
-- Cross-page highlight handling
-
-Extraction Pipeline:
-------------------
-1. PDF Loading: Opens PDF with PyMuPDF
-2. Annotation Detection: Finds highlight annotations
-3. Color Classification: Identifies highlight colors
-4. Text Extraction: Uses multi-method approach
-5. Text Ordering: Applies geometric sorting
-6. Hyphenation Merging: Combines split words
-7. Output Formatting: Prepares results for display/export
-
-Methods Overview:
----------------
-extract_all_highlights(): Main entry point
-_extract_text_balanced(): Core text extraction with ordering
-_smart_hyphenation_merge(): Hyphenation detection and merging
-_is_clear_hyphenation(): Hyphenation pattern recognition
-display_results(): Formatted terminal output
-
-Usage:
-------
-extractor = PDFHighlightExtractor('path/to/file.pdf')
-annotations, highlights = extractor.extract_all_highlights()
-extractor.display_results()
-"""
-def __init__(self, pdf_path):
-    self.pdf_path = Path(pdf_path)
-    self.annotations = []
-    self.highlights = []
-
-def extract_annotation_highlights(self):
-    """Extract annotations with simple processing."""
-    annotations = []
-    try:
-        with pdfplumber.open(self.pdf_path) as pdf:
-            print(f"📄 Processing annotations...")
-            for page_num, page in enumerate(pdf.pages, 1):
-                if hasattr(page, 'annots') and page.annots:
-                    for annot in page.annots:
-                        try:
-                            annot_type = annot.get('subtype', 'Unknown')
-                            if annot_type in ['Highlight', 'Squiggly', 'StrikeOut', 'Underline', 'FreeText', 'Text']:
-                                rect = annot.get('rect', [])
-                                text = self._get_annotation_text(page, annot, rect)
-                                color = self._get_simple_color(annot.get('color', []))
-                                
-                                if text and text.strip():
-                                    annotations.append({
-                                        'page': page_num,
-                                        'text': text.strip(),
-                                        'color': color,
-                                        'type': 'annotation',
-                                        'y_position': rect[1] if len(rect) >= 4 else 0
-                                    })
-                        except:
-                            continue
-        
-        print(f"  ✅ Found {len(annotations)} annotations")
-    except Exception as e:
-        print(f"❌ Error: {e}")
+def clean_segment_edges_func(text_to_clean, clean_edges_setting):
+    if not clean_edges_setting or not text_to_clean: return text_to_clean
+    text_to_clean = re.sub(r'\s+', ' ', text_to_clean.strip())
+    words = text_to_clean.split()
+    if not words: return text_to_clean
     
-    return annotations
+    current_idx = 0
+    while current_idx < len(words):
+        token = words[current_idx]
+        core_token = token.rstrip(string.punctuation)
+        trailing_punctuation = token[len(core_token):]
+        if not core_token: words.pop(current_idx); continue
+        core_should_be_removed = (len(core_token) == 1 and core_token.isalpha() and core_token.lower() not in VALID_SINGLE_LETTERS) or \
+                                 (len(core_token) == 2 and core_token.isalpha() and core_token.lower() not in VALID_TWO_LETTER_WORDS)
+        if core_should_be_removed:
+            if trailing_punctuation: words[current_idx] = trailing_punctuation
+            else: words.pop(current_idx)
+            continue
+        break
+    while words:
+        token = words[-1]
+        core_token = token.lstrip(string.punctuation)
+        leading_punctuation = token[:-len(core_token)] if core_token else ""
+        if not core_token: words.pop(); continue
+        core_should_be_removed = (len(core_token) == 1 and core_token.isalpha() and core_token.lower() not in VALID_SINGLE_LETTERS) or \
+                                 (len(core_token) == 2 and core_token.isalpha() and core_token.lower() not in VALID_TWO_LETTER_WORDS)
+        if core_should_be_removed:
+            if leading_punctuation: words[-1] = leading_punctuation
+            else: words.pop()
+            continue
+        break
+    return ' '.join(words)
 
-def extract_background_highlights(self):
-    """Extract highlights with BALANCED precision - capture complete highlights."""
-    all_highlights = []
+def input_with_prefill(prompt, text):
+    if READLINE_AVAILABLE:
+        def hook():
+            readline.insert_text(text)
+            readline.redisplay()
+        readline.set_pre_input_hook(hook)
+        result = input(prompt)
+        readline.set_pre_input_hook()
+        return result
+    else: 
+        print(Fore.MAGENTA + "Current text (edit below):\n" + Style.RESET_ALL + f"{text}")
+        return input(prompt)
+
+def _clear_png_files_in_folder(folder_path_str, run_args_for_print_control):
+    # This function CLEARS files if folder exists. It DOES NOT CREATE the folder.
+    if not folder_path_str: return
     
-    try:
-        print(f"\n🎨 Processing highlights...")
-        doc = fitz.open(str(self.pdf_path))
+    folder = Path(folder_path_str) # Path relative to CWD if not absolute
+    abs_folder_path = folder.resolve()
+
+    if run_args_for_print_control.debug:
+        print(Fore.CYAN + f"  [Debug] _clear_png_files_in_folder: Checking {abs_folder_path} (Specified as: '{folder_path_str}')")
+
+    if abs_folder_path.is_dir():
+        if run_args_for_print_control.show_progress: 
+            print(Fore.BLUE + f"Clearing *.png files from {abs_folder_path}...")
+        cleared_count = 0
+        try:
+            for file_path in abs_folder_path.glob("*.png"):
+                if file_path.is_file():
+                    file_path.unlink()
+                    cleared_count +=1
+        except Exception as e:
+            if run_args_for_print_control.show_progress: # Also show error if progress is on
+                print(Fore.RED + f"Error during file deletion in {abs_folder_path}: {e}")
         
-        # Collect each individual highlight with BALANCED extraction
-        for page_num in range(doc.page_count):
-            page = doc[page_num]
-            annotations = page.annots()
-            
-            for annot in annotations:
+        if run_args_for_print_control.show_progress:
+            if cleared_count > 0:
+                print(Fore.BLUE + f"Cleared {cleared_count} *.png files from {abs_folder_path}.")
+            else:
+                print(Fore.BLUE + f"No *.png files found to clear in {abs_folder_path}.")
+    else:
+        if run_args_for_print_control.show_progress: 
+            print(Fore.YELLOW + f"Image folder {abs_folder_path} not found, skipping clear.")
+        elif run_args_for_print_control.debug: # Still log if not found in debug, even if not show_progress
+            print(Fore.CYAN + f"  [Debug] _clear_png_files_in_folder: Folder {abs_folder_path} does not exist. Nothing to clear.")
+
+
+class EnhancedPDFHighlightExtractor:
+    def __init__(self, pdf_path, effective_run_args, main_doc_for_image_view=None):
+        self.pdf_path = Path(pdf_path)
+        self.run_args = effective_run_args 
+        self.pdf_filename_stem = self.pdf_path.stem
+        self.highlights_data = []
+        self.main_doc_for_image_view = main_doc_for_image_view
+
+    def _get_highlight_color_from_rgb_tuple(self, rgb_tuple_floats_or_ints):
+        if not rgb_tuple_floats_or_ints or len(rgb_tuple_floats_or_ints) < 3 : return 'unknown_color'
+        r, g, b = [int(x * 255) if isinstance(x, float) and 0.0 <= x <= 1.0 else int(x) for x in rgb_tuple_floats_or_ints[:3]]
+        if r == 142 and g == 221 and b == 249: return 'blue' 
+        if r > 200 and g > 200 and b < 150: return 'yellow'
+        if r < 150 and g > 180 and b < 150: return 'green'
+        if r < 150 and g < 180 and b > 180: return 'blue' 
+        if r > 180 and g < 180 and b > 180: return 'pink'
+        return 'other_color'
+
+    def _get_highlight_color_from_annot_colors_dict(self, colors_dict):
+        if not colors_dict: return 'unknown_color', None
+        rgb_tuple = colors_dict.get('stroke') or colors_dict.get('fill')
+        if not rgb_tuple: return 'unknown_color', None
+        return self._get_highlight_color_from_rgb_tuple(rgb_tuple), rgb_tuple[:3]
+
+    def _extract_text_from_multi_segment_highlight(self, page, annot, page_num, hl_id):
+        overall_highlight_color_name, _ = self._get_highlight_color_from_annot_colors_dict(annot.colors)
+        color_code_for_segment_print = self._get_color_display_codes(overall_highlight_color_name)
+        quads_vertices = annot.vertices
+        if not quads_vertices:
+            if self.run_args.show_extraction_details: print(Fore.YELLOW + f"            No quads for HL {hl_id} on page {page_num}")
+            return None, 0, []
+
+        processed_quads_as_points_list = []
+        if len(quads_vertices) % 4 == 0:
+            for i in range(0, len(quads_vertices), 4):
                 try:
-                    if annot.type[1] == 'Highlight':
-                        colors = annot.colors
-                        color_name = self._get_highlight_color(colors)
-                        
-                        if color_name in ['yellow', 'pink', 'green', 'blue']:
-                            # BALANCED: Extract complete highlighted phrases
-                            text = self._extract_text_balanced(page, annot)
-                            
-                            if text and text.strip():
-                                all_highlights.append({
-                                    'page': page_num + 1,
-                                    'text': text.strip(),
-                                    'color': color_name,
-                                    'type': 'highlight',
-                                    'y_position': annot.rect.y0,
-                                    'x_position': annot.rect.x0,
-                                    'y_end': annot.rect.y1,
-                                    'x_end': annot.rect.x1,
-                                    'rect': annot.rect
-                                })
-                                print(f"    🎨 {color_name.upper()}: \"{text[:70]}...\"")
+                    quad_points = [fitz.Point(p) for p in quads_vertices[i:i+4]]
+                    processed_quads_as_points_list.append(quad_points)
                 except Exception as e:
+                    if self.run_args.show_extraction_details: print(Fore.YELLOW + f"            Skipping malformed quad points: {e}")
                     continue
         
-        doc.close()
-        
-        # Smart hyphenation merging only
-        merged_highlights = self._smart_hyphenation_merge(all_highlights)
-        
-        print(f"  📊 Raw: {len(all_highlights)} → Merged: {len(merged_highlights)}")
-        return merged_highlights
-        
-    except Exception as e:
-        print(f"❌ Error: {e}")
-        return []
-
-def _extract_text_balanced(self, page, annot):
-    """BALANCED: Extract text with PROPER READING ORDER."""
-    try:
-        # Method 1: Use PyMuPDF's built-in text ordering with sorting
-        highlight_rect = annot.rect
-        
-        # SMALL EXPANSION for boundary words
-        expanded_rect = fitz.Rect(
-            highlight_rect.x0 - 2,
-            highlight_rect.y0 - 1, 
-            highlight_rect.x1 + 2,
-            highlight_rect.y1 + 1
-        )
-        
-        # METHOD A: Use text extraction with BUILT-IN SORTING
-        print(f"      🔍 Method A: Text extraction with sorting")
-        text_with_sort = page.get_text("text", clip=expanded_rect, sort=True)
-        if text_with_sort and text_with_sort.strip():
-            cleaned_text = re.sub(r'\s+', ' ', text_with_sort.strip())
-            print(f"      ✅ Sorted text result: \"{cleaned_text}\"")
-            return cleaned_text
-        
-        # METHOD B: Text blocks (better reading order than individual words)
-        print(f"      🔍 Method B: Text blocks extraction")
-        text_blocks = page.get_text("blocks", clip=expanded_rect)
-        if text_blocks:
-            # Sort blocks by reading order (top to bottom, left to right)
-            text_blocks.sort(key=lambda block: (block[1], block[0]))  # y-pos, then x-pos
-            
-            block_texts = []
-            for block in text_blocks:
-                if len(block) >= 5 and block[4].strip():
-                    block_text = block[4].strip()
-                    block_text = re.sub(r'\s+', ' ', block_text)
-                    block_texts.append(block_text)
-            
-            if block_texts:
-                combined_text = " ".join(block_texts)
-                print(f"      ✅ Block result: \"{combined_text}\"")
-                return combined_text
-        
-        # METHOD C: Enhanced word-level with geometric sorting
-        print(f"      🔍 Method C: Enhanced word sorting")
-        all_words = page.get_text("words")
-        highlight_words = []
-        
-        for word in all_words:
-            word_rect = fitz.Rect(word[:4])
-            word_text = word[4]
-            
-            if expanded_rect.intersects(word_rect):
-                intersection = expanded_rect & word_rect
-                word_area = word_rect.get_area()
-                
-                if word_area > 0:
-                    overlap_ratio = intersection.get_area() / word_area
-                    
-                    if overlap_ratio >= 0.40:
-                        highlight_words.append({
-                            'text': word_text,
-                            'x0': word[0],
-                            'y0': word[1],
-                            'x1': word[2],
-                            'y1': word[3],
-                            'center_y': (word[1] + word[3]) / 2,
-                            'center_x': (word[0] + word[2]) / 2
-                        })
-        
-        if highlight_words:
-            # ENHANCED SORTING: Group by lines first, then sort within lines
-            # Group words by approximate line (within 5 pixels of each other)
-            lines = []
-            for word in highlight_words:
-                placed = False
-                for line in lines:
-                    # Check if word belongs to existing line
-                    avg_y = sum(w['center_y'] for w in line) / len(line)
-                    if abs(word['center_y'] - avg_y) <= 5:  # Same line tolerance
-                        line.append(word)
-                        placed = True
-                        break
-                
-                if not placed:
-                    lines.append([word])
-            
-            # Sort lines by Y position (top to bottom)
-            lines.sort(key=lambda line: sum(w['center_y'] for w in line) / len(line))
-            
-            # Sort words within each line by X position (left to right)
-            for line in lines:
-                line.sort(key=lambda w: w['center_x'])
-            
-            # Combine all words in reading order
-            ordered_words = []
-            for line in lines:
-                ordered_words.extend(line)
-            
-            extracted_text = " ".join([w['text'] for w in ordered_words])
-            print(f"      ✅ Enhanced word sorting ({len(ordered_words)} words): \"{extracted_text}\"")
-            return extracted_text
-        
-        print(f"      ❌ No text found in highlight area")
-        return ""
-        
-    except Exception as e:
-        print(f"      ❌ Extraction error: {e}")
-        return ""
-
-
-def _extract_by_quads_balanced(self, page, annot):
-    """Extract using quad points with BALANCED precision."""
-    try:
-        quad_points = annot.vertices
-        if not quad_points:
-            return ""
-            
-        quad_count = int(len(quad_points) / 4)
-        all_words = page.get_text("words")
-        highlight_words = []
-        
-        print(f"      🔍 Processing {quad_count} quads with balanced precision")
-        
-        for i in range(quad_count):
-            points = quad_points[i * 4: i * 4 + 4]
-            quad_rect = fitz.Quad(points).rect
-            
-            # SMALL EXPANSION - 2 pixels to catch boundary words
-            expanded_quad = fitz.Rect(
-                quad_rect.x0 - 2, quad_rect.y0 - 1,
-                quad_rect.x1 + 2, quad_rect.y1 + 1
-            )
-            
-            for word in all_words:
-                word_rect = fitz.Rect(word[:4])
-                word_text = word[4]
-                
-                if expanded_quad.intersects(word_rect):
-                    intersection = expanded_quad & word_rect
-                    word_area = word_rect.get_area()
-                    
-                    if word_area > 0:
-                        overlap_ratio = intersection.get_area() / word_area
-                        
-                        # RELAXED: 40% overlap required (was 75%)
-                        if overlap_ratio >= 0.40:
-                            highlight_words.append({
-                                'text': word_text,
-                                'x0': word[0],
-                                'y0': word[1],
-                                'line': self._estimate_line_number(word[1])
-                            })
-                            print(f"        ✓ Quad '{word_text}' (overlap: {overlap_ratio:.2f})")
-        
-        if highlight_words:
-            # Remove duplicates while preserving order
-            seen = set()
-            unique_words = []
-            for word in highlight_words:
-                word_key = (word['text'], word['x0'], word['y0'])
-                if word_key not in seen:
-                    seen.add(word_key)
-                    unique_words.append(word)
-            
-            # Sort by reading order
-            unique_words.sort(key=lambda w: (w['line'], w['x0']))
-            extracted_text = " ".join([w['text'] for w in unique_words])
-            print(f"      ✅ Quad balanced ({len(unique_words)} words): \"{extracted_text}\"")
-            return extracted_text
-        
-        return ""
-        
-    except Exception as e:
-        print(f"      ❌ Quad extraction error: {e}")
-        return ""
-
-def _estimate_line_number(self, y_position, avg_line_height=14):
-    """Estimate line number based on y-position."""
-    return round(y_position / avg_line_height)
-
-def _smart_hyphenation_merge(self, highlights):
-    """Smart merging - ONLY for clear hyphenation patterns."""
-    if not highlights:
-        return highlights
-    
-    # Sort by page, color, then position
-    highlights.sort(key=lambda x: (x['page'], x['color'], x['y_position'], x['x_position']))
-    
-    merged = []
-    i = 0
-    
-    while i < len(highlights):
-        current = highlights[i]
-        
-        # Look for hyphenation continuation
-        if (i + 1 < len(highlights) and 
-            self._is_clear_hyphenation(current, highlights[i + 1])):
-            
-            next_hl = highlights[i + 1]
-            merged_text = self._join_hyphenated_text(current['text'], next_hl['text'])
-            
-            merged_highlight = current.copy()
-            merged_highlight['text'] = merged_text
-            
-            if current['page'] != next_hl['page']:
-                merged_highlight['pages_spanned'] = f"Pages {current['page']}-{next_hl['page']}"
-                print(f"  🔗 Cross-page hyphen: \"{merged_text[:80]}\"")
-            else:
-                merged_highlight['hyphen_merged'] = True
-                print(f"  🔗 Same-page hyphen: \"{merged_text[:80]}\"")
-                
-            merged.append(merged_highlight)
-            i += 2  # Skip both highlights
-        else:
-            merged.append(current)
-            i += 1
-    
-    return merged
-
-def _is_clear_hyphenation(self, hl1, hl2):
-    """Detect ONLY clear hyphenation patterns."""
-    # Must be same color
-    if hl1['color'] != hl2['color']:
-        return False
-    
-    text1 = hl1['text'].strip()
-    text2 = hl2['text'].strip()
-    
-    # MUST end with hyphen for hyphenation
-    if not text1.endswith('-'):
-        return False
-    
-    # Same page: check reasonable line spacing
-    if hl1['page'] == hl2['page']:
-        y_diff = abs(hl1['y_position'] - hl2['y_position'])
-        # Reasonable line height (8-30 pixels) - slightly more lenient
-        if 8 <= y_diff <= 30 and hl2['y_position'] > hl1['y_position']:
-            print(f"  🔍 Same-page hyphen detected: '{text1}' + '{text2[:15]}'")
-            return True
-    
-    # Cross-page: second highlight should be near top
-    elif hl2['page'] == hl1['page'] + 1 and hl2['y_position'] < 150:
-        print(f"  🔍 Cross-page hyphen detected: '{text1}' + '{text2[:15]}'")
-        return True
-    
-    return False
-
-def _join_hyphenated_text(self, text1, text2):
-    """Join hyphenated text correctly."""
-    text1 = text1.strip()
-    text2 = text2.strip()
-    
-    if text1.endswith('-'):
-        # Remove hyphen and join
-        return text1[:-1] + text2
-    else:
-        return text1 + " " + text2
-
-def _get_highlight_color(self, colors):
-    """Get highlight color - only 4 colors."""
-    if not colors:
-        return 'unknown'
-    
-    if 'fill' in colors and colors['fill']:
-        rgb = colors['fill']
-    elif 'stroke' in colors and colors['stroke']:
-        rgb = colors['stroke']
-    else:
-        return 'unknown'
-    
-    return self._rgb_to_simple_color(rgb)
-def _rgb_to_simple_color(self, rgb):
-    """Convert RGB to one of 4 colors."""
-    if not rgb or len(rgb) < 3:
-        return 'unknown'
-    
-    r, g, b = rgb[:3]
-    
-    if r <= 1:
-        r, g, b = r*255, g*255, b*255
-    
-    if r > 220 and g > 220 and b < 120:
-        return 'yellow'
-    elif r < 120 and g > 180 and b < 120:
-        return 'green'
-    elif r < 120 and g < 180 and b > 180:
-        return 'blue'
-    elif r > 180 and g < 180 and b > 180:
-        return 'pink'
-    else:
-        max_val = max(r, g, b)
-        if max_val == r and r > 150:
-            return 'pink'
-        elif max_val == g and g > 150:
-            return 'green'
-        elif max_val == b and b > 150:
-            return 'blue'
-        elif r > 180 and g > 180:
-            return 'yellow'
-        return 'unknown'
-
-def _get_simple_color(self, color_rgb):
-    """Get simple color from annotation."""
-    if color_rgb:
-        return self._rgb_to_simple_color(color_rgb)
-    return 'unknown'
-
-def _get_annotation_text(self, page, annot, rect):
-    """Extract annotation text."""
-    text = annot.get('contents', '').strip()
-    if text:
-        return text
-    
-    if rect and len(rect) == 4:
         try:
-            x0, y0, x1, y1 = rect
-            cropped = page.crop((x0-1, y0-1, x1+1, y1+1))
-            text = cropped.extract_text()
-            if text and text.strip():
-                return text.strip()
-        except:
-            pass
-    
-    return ""
+            sorted_quad_points_list = sorted(processed_quads_as_points_list, key=lambda qp_list: (fitz.Quad(qp_list).rect.y0, fitz.Quad(qp_list).rect.x0))
+        except Exception as e:
+            if self.run_args.show_extraction_details: print(Fore.RED + f"            Error sorting quads for HL {hl_id}: {e}. Using original order.")
+            sorted_quad_points_list = processed_quads_as_points_list
 
-def extract_all_highlights(self):
-    """Main extraction method."""
-    print("🔍 PDF Highlight Extractor - BALANCED PRECISION")
-    print("🎯 Colors: Yellow, Pink, Green, Blue only")
-    print("🎯 BALANCED extraction - complete highlights without over-capture")
-    print("📏 Small expansion (+2 pixels) for boundary words")
-    print("🔍 40% overlap requirement (was 75% - more inclusive)")
-    print("🔗 Smart hyphenation merging")
-    print("=" * 70)
-    
-    self.annotations = self.extract_annotation_highlights()
-    self.highlights = self.extract_background_highlights()
-    
-    print(f"\n✨ Total: {len(self.annotations)} annotations, {len(self.highlights)} highlights")
-    return self.annotations, self.highlights
+        if self.run_args.show_extraction_details:
+            print(color_code_for_segment_print + Fore.CYAN + f"            Processing {len(sorted_quad_points_list)} segments for HL {hl_id} (Color: {overall_highlight_color_name.upper()}) on page {page_num}" + Style.RESET_ALL)
 
-def display_results(self):
-    """Display results cleanly."""
-    print("\n" + "="*70)
-    print("📋 EXTRACTION RESULTS")
-    print("="*70)
-    
-    all_items = []
-    for item in self.annotations:
-        item['category'] = 'annotation'
-        all_items.append(item)
-    for item in self.highlights:
-        item['category'] = 'highlight'
-        all_items.append(item)
-    
-    if not all_items:
-        print("\n❌ No highlights found")
-        return
-    
-    all_items.sort(key=lambda x: (x['page'], x['y_position']))
-    
-    current_page = None
-    for item in all_items:
-        if item['page'] != current_page:
-            current_page = item['page']
-            print(f"\n📄 Page {current_page}")
-            print("-" * 25)
+        segment_texts_final = []
+        raw_segment_texts_for_diff = []
+        for seg_idx, quad_points in enumerate(sorted_quad_points_list):
+            try:
+                bounds = fitz.Quad(quad_points).rect
+                padded_rect = fitz.Rect(bounds.x0 - TEXT_EXTRACTION_HORIZONTAL_PADDING, bounds.y0 - TEXT_EXTRACTION_VERTICAL_PADDING,
+                                        bounds.x1 + TEXT_EXTRACTION_HORIZONTAL_PADDING, bounds.y1 + TEXT_EXTRACTION_VERTICAL_PADDING)
+                padded_rect.intersect(page.rect)
+                if padded_rect.is_empty:
+                    if self.run_args.show_extraction_details: print(Fore.YELLOW + f"            Segment {seg_idx+1} empty padded_rect for HL {hl_id}")
+                    continue
+                raw_text_from_pdf_segment = page.get_text("text", clip=padded_rect, sort=True).strip()
+                raw_segment_texts_for_diff.append(raw_text_from_pdf_segment)
+                cleaned_text_segment = re.sub(r'\s+', ' ', raw_text_from_pdf_segment).strip()
+                cleaned_text_segment = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]', '', cleaned_text_segment)
+                final_text_segment = clean_segment_edges_func(cleaned_text_segment, self.run_args.clean_edges)
+
+                if final_text_segment:
+                    segment_texts_final.append(final_text_segment)
+                    if self.run_args.show_raw_segments and not self.run_args.interactive:
+                        print(color_code_for_segment_print + Fore.LIGHTBLUE_EX + f"          Segment {seg_idx+1} (P{page_num}, HL{hl_id}, Color: {overall_highlight_color_name.upper()}):" + Style.RESET_ALL)
+                        if self.run_args.show_diff_percentage:
+                            similarity = get_text_diff_ratio(raw_text_from_pdf_segment, final_text_segment)
+                            diff_percent = (1 - similarity) * 100
+                            print(Fore.LIGHTMAGENTA_EX + f"            Raw PDF : \"{raw_text_from_pdf_segment}\"")
+                            print(Fore.LIGHTBLUE_EX +    f"            Final Seg: \"{final_text_segment}\"")
+                            print(Fore.YELLOW +          f"            Diff: {diff_percent:.2f}%")
+                        else: print(Fore.LIGHTBLUE_EX +    f"            Final Seg: \"{final_text_segment}\"")
+            except Exception as e:
+                if self.run_args.show_extraction_details: print(Fore.RED + f"            Error processing segment {seg_idx+1} for HL {hl_id}: {e}")
+                raw_segment_texts_for_diff.append("")
+                continue
+
+        if not segment_texts_final: return None, len(sorted_quad_points_list), raw_segment_texts_for_diff
+        combined_text = segment_texts_final[0]
+        for i in range(1, len(segment_texts_final)):
+            prev_text = combined_text; current_text = segment_texts_final[i]
+            if prev_text.endswith('-') or prev_text.endswith('¬'): combined_text = prev_text.rstrip('-¬') + current_text
+            else: combined_text += ' ' + current_text
         
-        color_code = self._get_color_display(item['color'])
-        icon = "📝" if item['category'] == 'annotation' else "🎨"
+        if self.run_args.clean_edges: combined_text = clean_segment_edges_func(combined_text, self.run_args.clean_edges)
+        combined_text = re.sub(r'\s+', ' ', combined_text).strip()
+        return combined_text if combined_text else None, len(sorted_quad_points_list), raw_segment_texts_for_diff
+
+    def extract_highlights(self, doc):
+        all_extracted_highlights = []
+        try:
+            if self.run_args.show_progress and not self.run_args.interactive:
+                print(Fore.BLUE + f"\n🎨 Processing highlights for PDF: {self.pdf_path.name}")
+            
+            pages_str_to_parse = self.run_args.pages if self.run_args.pages else DEFAULT_PAGES_TO_PROCESS
+            pages_to_process = self._parse_specific_pages(pages_str_to_parse, doc.page_count)
+            if not pages_to_process:
+                if self.run_args.show_progress: print(Fore.YELLOW + "No valid pages selected.")
+                return []
+            
+            highlight_id_counter_on_page = {}
+            for page_num in pages_to_process:
+                page = doc.load_page(page_num - 1)
+                highlight_id_counter_on_page.setdefault(page_num, 0)
+                if self.run_args.show_progress and not self.run_args.interactive:
+                    print(Fore.CYAN + f"  📄 Processing Page {page_num}...")
+                try: page_annotations = list(page.annots())
+                except Exception as e:
+                    if self.run_args.show_progress: print(Fore.RED + f"    ⚠️ Error loading annots: {e}")
+                    continue
+                
+                highlight_annotations = [a for a in page_annotations if hasattr(a, 'type') and a.type[0] == fitz.PDF_ANNOT_HIGHLIGHT and hasattr(a, 'rect') and a.rect]
+                if not highlight_annotations:
+                    if self.run_args.show_progress and not self.run_args.interactive: print(Fore.WHITE + f"    No highlights on page {page_num}.")
+                    continue
+                
+                if self.run_args.show_rect_details:
+                    print(Fore.YELLOW + f"--- Annotations before sorting (Page {page_num}) ---")
+                    temp_debug_list = []
+                    for annot_debug in highlight_annotations:
+                        debug_text_snippet = page.get_text("text", clip=annot_debug.rect).strip().replace("\n", " ")
+                        color_name_debug, rgb_values_debug = self._get_highlight_color_from_annot_colors_dict(annot_debug.colors)
+                        rgb_display = f"RGB: {tuple(int(c*255) if isinstance(c,float) else int(c) for c in rgb_values_debug[:3])}" if rgb_values_debug else "RGB: N/A"
+                        temp_debug_list.append({
+                            "rect": annot_debug.rect, "text_snippet": debug_text_snippet, "color_name": color_name_debug, 
+                            "rgb_display": rgb_display, "vertices_count": len(annot_debug.vertices) if annot_debug.vertices else 0 })
+                    temp_debug_list.sort(key=lambda item: (item["rect"].y0, item["rect"].x0)) 
+                    for item_idx, item_val in enumerate(temp_debug_list):
+                        print(f"  {item_idx+1}. Rect: {item_val['rect']}, Vertices: {item_val['vertices_count']}, Color: {item_val['color_name'].upper()} ({item_val['rgb_display']}), Text: \"{item_val['text_snippet']}\"")
+                    print(Fore.YELLOW + "----------------------------------------------------")
+
+                highlight_annotations.sort(key=lambda a: (a.rect.y0, a.rect.x0))
+                for annot in highlight_annotations:
+                    try:
+                        highlight_id_counter_on_page[page_num] += 1; current_hl_id_on_page = highlight_id_counter_on_page[page_num]
+                        color_name, raw_rgb_floats = self._get_highlight_color_from_annot_colors_dict(annot.colors)
+                        extracted_text, num_segments, _ = self._extract_text_from_multi_segment_highlight(page, annot, page_num, current_hl_id_on_page)
+                        if extracted_text and extracted_text.strip():
+                            if self.run_args.show_extraction_details and not self.run_args.interactive:
+                                print(Fore.GREEN + f"          ✅ Final (P{page_num}, HL{current_hl_id_on_page}): \"{extracted_text[:100]}\"")
+                            all_extracted_highlights.append({
+                                'page': page_num, 'highlight_id_on_page': current_hl_id_on_page, 'text': extracted_text, 
+                                'color': color_name, 'raw_rgb_values': raw_rgb_floats, 'type': 'highlight',
+                                'y_position': annot.rect.y0, 'x_position': annot.rect.x0,
+                                'rect_details': (annot.rect.x0, annot.rect.y0, annot.rect.x1, annot.rect.y1),
+                                'num_segments': num_segments })
+                        elif self.run_args.show_progress and not self.run_args.interactive:
+                            print(Fore.YELLOW + f"      ⚠️ No text for HL {current_hl_id_on_page} on page {page_num}")
+                    except Exception as e:
+                        if self.run_args.show_progress and not self.run_args.interactive:
+                            print(Fore.RED + f"      🔴 Error processing annot on page {page_num}: {e}")
+                            if self.run_args.debug: traceback.print_exc()
+                        continue
+            
+            if self.run_args.interactive:
+                print(Fore.MAGENTA + "\nEntering interactive review session...")
+                self.highlights_data = self._interactive_review_session(all_extracted_highlights)
+            else: self.highlights_data = all_extracted_highlights
+            
+            if self.run_args.show_progress and not self.run_args.interactive and not self.run_args.silent:
+                print(Fore.MAGENTA + f"  📊 Total highlights extracted: {len(self.highlights_data)}")
+            return self.highlights_data
+        except Exception as e:
+            print(Fore.RED + f"❌ Major error during highlight extraction: {e}")
+            if self.run_args.debug: traceback.print_exc()
+            return []
+
+    def _view_page_image_interactively(self, page_num_to_view):
+        if not self.main_doc_for_image_view:
+            print(Fore.RED + "Error: PDF document not available for image rendering. This should not happen.")
+            return
+
+        tmp_image_path_obj = None 
+        image_created_in_managed_folder = False
+        image_successfully_saved = False
+
+        if self.run_args.show_progress:
+            print(Fore.BLUE + f"Preparing to view image for page {page_num_to_view}...")
+
+        try:
+            page_index = page_num_to_view - 1
+            page = self.main_doc_for_image_view.load_page(page_index)
+            if self.run_args.debug:
+                print(Fore.CYAN + f"  [Debug] Loaded page object for index {page_index}: {page}")
+            
+            pix = page.get_pixmap(dpi=150) 
+            if self.run_args.debug:
+                print(Fore.CYAN + f"  [Debug] Created pixmap: {pix}. Alpha: {pix.alpha}, Colorspace: {pix.colorspace.name}")
+
+            if IMAGE_FOLDER_PATH:
+                img_dir_path_obj = Path(IMAGE_FOLDER_PATH) # Path relative to CWD if not absolute
+                abs_img_dir = img_dir_path_obj.resolve()
+                
+                if self.run_args.debug:
+                    print(Fore.CYAN + f"  [Debug] Using IMAGE_FOLDER_PATH: '{IMAGE_FOLDER_PATH}' (Absolute: {abs_img_dir})")
+                
+                try:
+                    abs_img_dir.mkdir(parents=True, exist_ok=True) 
+                    if self.run_args.debug:
+                        print(Fore.CYAN + f"  [Debug] Ensured image directory exists: {abs_img_dir} (Status: {abs_img_dir.is_dir()})")
+                except Exception as e_mkdir:
+                    print(Fore.RED + f"  ERROR: Could not create directory {abs_img_dir}: {e_mkdir}")
+                    if self.run_args.debug: traceback.print_exc()
+                    # Do not proceed if directory creation fails
+                    input(Fore.CYAN + "Press Enter to acknowledge and continue...")
+                    return
+
+
+                unique_id = uuid.uuid4().hex[:8]
+                tmp_image_path_obj = abs_img_dir / f"page_{page_num_to_view}_{unique_id}.png"
+                image_created_in_managed_folder = True
+            else: 
+                fd, temp_path_str = tempfile.mkstemp(suffix=".png", prefix="pdf_page_img_")
+                os.close(fd) 
+                tmp_image_path_obj = Path(temp_path_str)
+                if self.run_args.debug:
+                     print(Fore.CYAN + f"  [Debug] Using system temporary file: {tmp_image_path_obj.resolve()}")
+            
+            resolved_save_path = tmp_image_path_obj.resolve()
+            if self.run_args.debug:
+                print(Fore.CYAN + f"  [Debug] Attempting to save image to: {resolved_save_path}")
+            
+            pix.save(str(resolved_save_path))
+
+            if resolved_save_path.exists() and resolved_save_path.is_file():
+                image_successfully_saved = True
+                if self.run_args.show_progress: # Print for normal progress too, not just debug
+                    print(Fore.GREEN + f"  Image for page {page_num_to_view} successfully saved to: {resolved_save_path}")
+                if self.run_args.debug:
+                    print(Fore.CYAN + f"  [Debug] File size: {resolved_save_path.stat().st_size} bytes")
+            else:
+                if self.run_args.show_progress:
+                    print(Fore.RED + f"  ERROR: Failed to save image to {resolved_save_path}. File does not exist after save attempt.")
         
-        merge_info = ""
-        if item.get('pages_spanned'):
-            merge_info = f" ({item['pages_spanned']})"
-        elif item.get('hyphen_merged'):
-            merge_info = " (hyphen-merged)"
+        except Exception as e_render_save:
+            if self.run_args.show_progress:
+                print(Fore.RED + f"  Error during image rendering or saving: {e_render_save}")
+            if self.run_args.debug:
+                traceback.print_exc()
         
-        print(f"{icon} {color_code}{item['color'].upper()}{Style.RESET_ALL}{merge_info}")
-        print(f"   \"{item['text']}\"")
+        if image_successfully_saved and tmp_image_path_obj:
+            if self.run_args.show_progress:
+                print(Fore.CYAN + f"Attempting to open image with default application...")
+            try:
+                file_uri = tmp_image_path_obj.resolve().as_uri()
+                if self.run_args.debug:
+                    print(Fore.CYAN + f"  [Debug] Opening URI: {file_uri}")
 
-def _get_color_display(self, color_name):
-    """Terminal color codes."""
-    colors = {
-        'yellow': Back.YELLOW + Fore.BLACK,
-        'green': Back.GREEN + Fore.BLACK,
-        'blue': Back.BLUE + Fore.WHITE,
-        'pink': Back.MAGENTA + Fore.WHITE,
-    }
-    return colors.get(color_name, Back.WHITE + Fore.BLACK)
+                opened_successfully = webbrowser.open(file_uri)
+                
+                if self.run_args.debug: # More detailed feedback in debug mode
+                    print(Fore.CYAN + f"  [Debug] webbrowser.open() returned: {opened_successfully}")
 
-def save_to_json(self, annotations, highlights, output_path):
-    """Save to JSON."""
-    data = {
-        'annotations': annotations,
-        'highlights': highlights,
-        'summary': {
-            'total_annotations': len(annotations),
-            'total_highlights': len(highlights)
-        }
-    }
-    with open(output_path, 'w', encoding='utf-8') as f:
-        json.dump(data, f, indent=2, ensure_ascii=False)
-    print(f"💾 Saved to {output_path}")
+                if not opened_successfully:
+                    if self.run_args.show_progress:
+                        print(Fore.YELLOW + "  webbrowser.open() reported failure (returned False or None).")
+                        print(Fore.YELLOW + f"  This often means no default application is configured for PNG files or your browser.")
+                elif self.run_args.show_progress:
+                    print(Fore.GREEN + "  Image hopefully opened. Check your applications.")
+                
+                if self.run_args.show_progress:
+                    print(Fore.YELLOW + f"  If the image did not open, please manually open: {tmp_image_path_obj.resolve()}")
+                input(Fore.CYAN + "Press Enter after viewing image to continue...")
 
-def save_to_csv(self, annotations, highlights, output_path):
-    """Save to CSV."""
-    all_items = []
-    for item in annotations:
-        item_copy = item.copy()
-        item_copy['category'] = 'annotation'
-        all_items.append(item_copy)
-    for item in highlights:
-        item_copy = item.copy()
-        item_copy['category'] = 'highlight'
-        all_items.append(item_copy)
-    
-    df = pd.DataFrame(all_items)
-    df.to_csv(output_path, index=False, encoding='utf-8')
-    print(f"📊 Saved to {output_path}")
+            except Exception as e_open:
+                if self.run_args.show_progress:
+                    print(Fore.RED + f"  Could not open image using webbrowser: {e_open}")
+                    print(Fore.YELLOW + "  This could be due to your system's environment (e.g., missing 'xdg-utils' on Linux, no default PNG viewer).")
+                    print(Fore.YELLOW + f"  Please try opening the image manually: {tmp_image_path_obj.resolve()}")
+                if self.run_args.debug:
+                    traceback.print_exc()
+                input(Fore.CYAN + "Press Enter to acknowledge and continue...")
+        elif tmp_image_path_obj : 
+             if self.run_args.show_progress:
+                print(Fore.YELLOW + "  Skipping attempt to open image as it was not saved successfully.")
+             input(Fore.CYAN + "Press Enter to continue...")
+        else: 
+            if self.run_args.show_progress:
+                print(Fore.RED + "  Cannot attempt to open image as image path was not determined.")
+            input(Fore.CYAN + "Press Enter to continue...")
+
+        finally:
+            if tmp_image_path_obj and tmp_image_path_obj.exists():
+                if image_created_in_managed_folder:
+                    if self.run_args.debug:
+                         print(Fore.CYAN + f"  [Debug] Image '{tmp_image_path_obj.name}' remains in managed folder '{IMAGE_FOLDER_PATH}'.")
+                         print(Fore.CYAN + f"  [Debug] It will be cleared based on CLEAR_IMAGE_FOLDER_ON_END ({CLEAR_IMAGE_FOLDER_ON_END}).")
+                else: 
+                    try:
+                        tmp_image_path_obj.unlink()
+                        if self.run_args.debug:
+                            print(Fore.CYAN + f"  [Debug] Deleted system temporary image: {tmp_image_path_obj.resolve()}")
+                    except Exception as e_unlink:
+                        if self.run_args.debug: 
+                            print(Fore.YELLOW + f"  Warning: Could not delete system temp image {tmp_image_path_obj.resolve()}: {e_unlink}")
+            elif tmp_image_path_obj and not tmp_image_path_obj.exists() and image_successfully_saved:
+                if self.run_args.debug:
+                    print(Fore.RED + f"  [Debug] Inconsistency: Image was marked saved, but {tmp_image_path_obj.resolve()} does not exist at cleanup (and wasn't a system temp explicitly deleted here).")
 
 
-def is_test_mode():
-    """Check if script is run in test mode."""
-    test_flags = ['--test', '-t', 'test']
-    return any(flag in sys.argv for flag in test_flags)
+    def _interactive_review_session(self, highlights_list):
+        if not highlights_list: 
+            if self.run_args.show_progress : print(Fore.YELLOW + "No highlights to review.")
+            return []
+        reviewed_highlights = [dict(h) for h in highlights_list] 
+        idx, num_highlights = 0, len(reviewed_highlights)
+        AVAILABLE_COLORS = ['yellow', 'green', 'blue', 'pink', 'other_color', 'unknown_color']
+        
+        while 0 <= idx < num_highlights:
+            item = reviewed_highlights[idx]
+            print(Style.RESET_ALL + "\n" + "="*15 + f" Review HL {idx+1}/{num_highlights} (Page {item['page']}) " + "="*15)
+            
+            current_color_display = self._get_color_display_codes(item['color'])
+            print(f"Color: {current_color_display}{item['color'].upper()}{Style.RESET_ALL}", end="")
+            if item['color'] == 'other_color' and item.get('raw_rgb_values'):
+                rgb = item['raw_rgb_values'][:3]
+                rgb_disp = tuple(int(c*255) if isinstance(c,float) else int(c) for c in rgb)
+                print(f" (RGB: {rgb_disp})", end="")
+            print() 
+            
+            print(f"Text: {item['text']}")
+
+            prompt_options = ["[N]ext", "[P]rev", "[U]p", "[M]ove Down", "[C]olor", "[E]dit", "[D]elete", "[O]pen Img", "[S]ave&Exit", "[Q]uit"]
+            action_prompt_str = Fore.CYAN + ", ".join(prompt_options) + "? > " + Style.RESET_ALL
+            action = input(action_prompt_str).lower().strip()
+
+            if action == 'n': idx = (idx + 1) % num_highlights if num_highlights > 0 else 0
+            elif action == 'p': idx = (idx - 1 + num_highlights) % num_highlights if num_highlights > 0 else 0
+            elif action == 'u': 
+                if idx > 0:
+                    reviewed_highlights.insert(idx - 1, reviewed_highlights.pop(idx))
+                    idx -= 1
+                    print(Fore.GREEN + "Moved up.")
+                else: print(Fore.YELLOW + "Already at the top.")
+            elif action == 'm': 
+                if idx < num_highlights - 1:
+                    reviewed_highlights.insert(idx + 1, reviewed_highlights.pop(idx))
+                    idx += 1
+                    print(Fore.GREEN + "Moved down.")
+                else: print(Fore.YELLOW + "Already at the bottom.")
+            elif action == 'c':
+                print("Available colors:", ", ".join(f"{i+1}.{self._get_color_display_codes(co)}{co.upper()}{Style.RESET_ALL}" for i,co in enumerate(AVAILABLE_COLORS)))
+                try:
+                    choice_str = input(Fore.YELLOW + "Enter number for new color: " + Style.RESET_ALL)
+                    if not choice_str: print(Fore.BLUE + "Color change cancelled (no input)."); continue
+                    choice = int(choice_str) - 1
+                    if 0 <= choice < len(AVAILABLE_COLORS): 
+                        item['color'] = AVAILABLE_COLORS[choice]
+                        print(Fore.GREEN + f"Color changed to {AVAILABLE_COLORS[choice].upper()}.")
+                    else: print(Fore.RED + "Invalid color choice.")
+                except ValueError: print(Fore.RED + "Invalid input. Please enter a number.")
+            elif action == 'e':
+                edit_prompt = Fore.YELLOW + "New text (blank=keep, 'CLEAR'=empty): > " + Style.RESET_ALL
+                new_text = input_with_prefill(edit_prompt, item['text'])
+                
+                if new_text.strip().upper() == 'CLEAR': 
+                    item['text'] = ""
+                    print(Fore.GREEN + "Text cleared.")
+                elif new_text == item['text'] or not new_text.strip() : 
+                    print(Fore.BLUE + "Text kept as is.")
+                else: 
+                    item['text'] = new_text
+                    print(Fore.GREEN + "Text updated.")
+            elif action == 'd':
+                if input(Fore.RED + "Are you sure you want to delete this highlight? [y/N]: " + Style.RESET_ALL).lower() == 'y':
+                    reviewed_highlights.pop(idx)
+                    num_highlights = len(reviewed_highlights)
+                    print(Fore.GREEN + "Highlight deleted.")
+                    if num_highlights == 0: 
+                        print(Fore.YELLOW + "No more highlights to review."); break 
+                    if idx >= num_highlights: idx = num_highlights - 1 
+                else: print(Fore.BLUE + "Deletion cancelled.")
+            elif action == 'o': self._view_page_image_interactively(item['page'])
+            elif action == 's': 
+                print(Fore.GREEN + "Saving changes and exiting review session.")
+                break
+            elif action == 'q':
+                if input(Fore.RED+"Are you sure you want to quit review? Changes will not be saved. [y/N]: " + Style.RESET_ALL).lower()=='y': 
+                    print(Fore.YELLOW+"Quitting review session. Changes made in this session are DISCARDED.")
+                    return highlights_list 
+                else:
+                    print(Fore.BLUE + "Quit cancelled.")
+            else: print(Fore.RED + "Invalid action. Please choose from the list.")
+        return reviewed_highlights
+
+    def _parse_specific_pages(self, pages_str, total_pages):
+        if not pages_str or pages_str.lower() == "all": return list(range(1, total_pages + 1))
+        parsed_pages = set()
+        try:
+            for part in pages_str.split(','):
+                part = part.strip();
+                if not part: continue
+                if '-' in part:
+                    start_str, end_str = part.split('-', 1); start = int(start_str); end = int(end_str)
+                    start = max(1, start); end = min(total_pages, end)
+                    if start <= end: parsed_pages.update(range(start, end + 1))
+                else:
+                    page_val = int(part)
+                    if 1 <= page_val <= total_pages: parsed_pages.add(page_val)
+            return sorted(list(parsed_pages)) if parsed_pages else []
+        except ValueError as e:
+            if self.run_args.show_progress: print(Fore.YELLOW + f"⚠️ Invalid page range: {pages_str}. Error: {e}.")
+            return []
+
+    def _get_color_display_codes(self, color_name_str):
+        return {'yellow': Back.YELLOW + Fore.BLACK, 'green': Back.GREEN + Fore.BLACK,
+                'blue': Back.BLUE + Fore.WHITE, 'pink': Back.MAGENTA + Fore.WHITE,
+                'other_color': Back.WHITE + Fore.BLACK, 'unknown_color': Back.LIGHTBLACK_EX + Fore.WHITE
+               }.get(color_name_str.lower(), Back.LIGHTBLACK_EX + Fore.WHITE)
+
+    def display_results(self):
+        if not self.run_args.show_progress: return # Don't display if progress is off (e.g. silent)
+        
+        print("\n" + Fore.CYAN + Style.BRIGHT + "="*30 + " EXTRACTED HIGHLIGHTS " + "="*30 + Style.RESET_ALL)
+        if not self.highlights_data: print("\n❌ No highlights extracted or all were deleted."); return
+        current_page = None
+        for item in self.highlights_data:
+            if item.get('page') != current_page:
+                current_page = item.get('page'); print(f"\n📄 {Style.BRIGHT}Page {current_page}{Style.RESET_ALL}\n" + "-"*25)
+            color_name = item.get('color', 'unknown_color')
+            color_code = self._get_color_display_codes(color_name)
+            num_segments = item.get('num_segments', 0)
+            segment_info = f" [{num_segments} segments]" if num_segments > 1 else ""
+            text_content = item.get('text', "*NO TEXT*")
+            display_color_name = color_name.upper()
+            if color_name == 'other_color':
+                raw_rgb = item.get('raw_rgb_values')
+                if raw_rgb and len(raw_rgb) >=3:
+                    rgb_disp = tuple(int(c*255) if isinstance(c,float) else int(c) for c in raw_rgb[:3])
+                    display_color_name += f" (RGB: {rgb_disp})"
+            print(f"🎨 {color_code}{display_color_name}{Style.RESET_ALL}{segment_info}")
+            print(f"   \"{text_content}\""); print()
+
+    def save_to_json(self, output_path_str):
+        output_path = Path(output_path_str).resolve() # Resolve to absolute path for clarity
+        try:
+            output_path.parent.mkdir(parents=True, exist_ok=True)
+            if self.run_args.debug:
+                print(Fore.CYAN + f"  [Debug] Ensured parent directory for JSON exists: {output_path.parent}")
+        except Exception as e_mkdir:
+            if self.run_args.show_progress: # Also show error if progress is on
+                print(Fore.RED + f"❌ Error creating directory for JSON output {output_path.parent}: {e_mkdir}")
+            if self.run_args.debug: traceback.print_exc()
+            return # Cannot save if directory cannot be made
+
+        data_to_save = {
+            'pdf_file_processed': str(self.pdf_path.name), 'pdf_full_path': str(self.pdf_path.resolve()),
+            'pages_processed_spec': self.run_args.pages if self.run_args.pages else DEFAULT_PAGES_TO_PROCESS,
+            'extraction_timestamp': time.strftime("%Y-%m-%d %H:%M:%S %Z"),
+            'total_highlights_extracted': len(self.highlights_data),
+            'settings_used': {
+                'clean_edges': self.run_args.clean_edges,
+                'show_diff_percentage': self.run_args.show_diff_percentage 
+            },
+            'highlights_data': self.highlights_data }
+        try:
+            with open(output_path, 'w', encoding='utf-8') as f: json.dump(data_to_save, f, indent=2, ensure_ascii=False)
+            if self.run_args.show_progress: print(Fore.GREEN + f"💾 Data saved to {output_path}")
+        except IOError as e: 
+            if self.run_args.show_progress: print(Fore.RED + f"❌ Error saving JSON to {output_path}: {e}")
+            if self.run_args.debug: traceback.print_exc()
 
 
 def main():
-    start_time = time.time()
-    
-    test_mode = is_test_mode()
-    
-    print("🎨 PDF Highlight Extractor - BALANCED PRECISION")
-    print("✅ More inclusive extraction (40% overlap vs 75%)")
-    print("✅ Small boundary expansion (+2 pixels)")
-    print("✅ Better word capture at highlight edges")
-    print("✅ Detailed extraction logging")
-    print("✅ Smart hyphenation merging")
-    
-    if test_mode:
-        print("🧪 TEST MODE: Using defaults")
-        print("✅ Default file: /mnt/c/Users/admin/Downloads/test2.pdf")
-        print("✅ Skipping JSON/CSV output")
-    else:
-        print("🔧 FULL MODE: Interactive prompts")
-    
-    print()
-    
-    if test_mode:
-        default_pdf = "/mnt/c/Users/admin/Downloads/test2.pdf"
-        pdf_path = default_pdf
-        print(f"📄 Using default: {pdf_path}")
-    else:
-        pdf_input = input("📄 PDF file path: ").strip('"')
-        if not pdf_input:
-            print("❌ No file specified!")
-            return
-        pdf_path = pdf_input
-    
-    if not Path(pdf_path).exists():
-        print("❌ File not found!")
-        return
-    
-    output_json = ""
-    output_csv = ""
-    
-    if test_mode:
-        print("📋 Test mode: Display only (no file output)")
-    else:
-        print("\n📤 Output options:")
-        output_json = input("💾 JSON file (Enter to skip): ").strip('"')
-        output_csv = input("📊 CSV file (Enter to skip): ").strip('"')
-    
-    # Process
-    extractor = PDFHighlightExtractor(pdf_path)
-    annotations, highlights = extractor.extract_all_highlights()
-    
-    # Display results
-    extractor.display_results()
-    
-    # Save files (only in full mode and if specified)
-    if not test_mode:
-        if output_json:
-            extractor.save_to_json(annotations, highlights, output_json)
-        if output_csv:
-            extractor.save_to_csv(annotations, highlights, output_csv)
-        
-        if not output_json and not output_csv:
-            print("\n📋 Display only - no files saved")
-    
-    end_time = time.time()
-    elapsed_time = end_time - start_time
-    
-    print(f"\n⏱️  Processing completed in {elapsed_time:.2f} seconds")
-    
-    if test_mode:
-        print("\n🧪 Test mode completed. Use without --test flag for full options.")
+    parser = argparse.ArgumentParser(
+        description="Enhanced PDF Highlight Extractor.",
+        formatter_class=argparse.RawTextHelpFormatter,
+        epilog=f"""Examples:
+  {sys.argv[0]} mydoc.pdf
+  {sys.argv[0]} mydoc.pdf -p "1,5-7" -i
+  {sys.argv[0]} -t -s --output-json results/test.json
+  {sys.argv[0]} doc.pdf -d
 
+If interactive image viewing ('O' option) fails, try running with the -d (debug)
+flag. This will print detailed information about image paths and creation steps.
+Common issues include missing default PNG viewers or OS-level permission problems.
+The IMAGE_FOLDER_PATH ('{IMAGE_FOLDER_PATH}') is relative to where you run the script.
+""")
+    parser.add_argument("pdf_path_arg", nargs='?', default=None, help="Path to PDF. Prompts if not in test/silent mode & not provided.")
+    parser.add_argument("-p", "--pages", type=str, default=None, help=f"Pages (e.g., \"1,3-5\", \"all\"). Default: \"{DEFAULT_PAGES_TO_PROCESS}\".")
+    parser.add_argument("-i", "--interactive", action="store_true", help="Enable interactive review mode.")
+    parser.add_argument("-t", "--test", action="store_true", help=f"Test mode. Uses default PDF ('{DEFAULT_PDF_PATH}'), auto-saves JSON.")
+    parser.add_argument("-s", "--silent", action="store_true", help="Silent mode. Minimal output. Auto-saves JSON. Implies -t if no PDF path.")
+    parser.add_argument("-d", "--debug", action="store_true", help="Debug mode. Enables all detailed SHOW flags and prints more internal details.")
+    parser.add_argument("--output-json", type=str, default=None, help="Custom output JSON filename/path.")
+    
+    cli_args = parser.parse_args()
+    
+    effective_run_args = argparse.Namespace()
+    effective_run_args.debug = cli_args.debug 
+    effective_run_args.silent = cli_args.silent
+
+    # Initialize based on global defaults
+    effective_run_args.show_timing = INITIAL_SHOW_TIMING
+    effective_run_args.show_progress = INITIAL_SHOW_PROGRESS
+    effective_run_args.show_raw_segments = INITIAL_SHOW_RAW_SEGMENTS
+    effective_run_args.show_extraction_details = INITIAL_SHOW_EXTRACTION_DETAILS
+    effective_run_args.show_rect_details = INITIAL_SHOW_RECT_DETAILS
+    effective_run_args.show_diff_percentage = INITIAL_SHOW_DIFF_PERCENTAGE
+    effective_run_args.clean_edges = INITIAL_CLEAN_EDGES
+    
+    # Override show flags based on debug or silent
+    if effective_run_args.debug:
+        for key in ['show_timing', 'show_progress', 'show_raw_segments', 'show_extraction_details', 'show_rect_details', 'show_diff_percentage']:
+            setattr(effective_run_args, key, True) # Debug enables all these
+    
+    if effective_run_args.silent:
+        for key in ['show_timing', 'show_progress', 'show_raw_segments', 'show_extraction_details', 'show_rect_details', 'show_diff_percentage']:
+            setattr(effective_run_args, key, False) # Silent disables all these
+        effective_run_args.interactive = False 
+    else: # Not silent
+        effective_run_args.interactive = cli_args.interactive
+
+    effective_run_args.pages = cli_args.pages
+    
+    start_time = time.time()
+    if effective_run_args.show_progress: print(Fore.MAGENTA + Style.BRIGHT + "🎨 PDF Highlight Extractor 🎨" + Style.RESET_ALL)
+    if effective_run_args.debug:
+        print(Fore.CYAN + f"  [Debug] Current Working Directory: {Path.cwd()}")
+        print(Fore.CYAN + f"  [Debug] Effective Run Arguments: {effective_run_args}")
+
+
+    if IMAGE_FOLDER_PATH and CLEAR_IMAGE_FOLDER_ON_START:
+        _clear_png_files_in_folder(IMAGE_FOLDER_PATH, effective_run_args)
+
+    pdf_path_to_use = None
+    if cli_args.test: pdf_path_to_use = DEFAULT_PDF_PATH
+    elif cli_args.pdf_path_arg: pdf_path_to_use = cli_args.pdf_path_arg
+    elif cli_args.silent: pdf_path_to_use = DEFAULT_PDF_PATH 
+    else: 
+        pdf_path_input = input(f"📄 PDF path (Enter for default '{DEFAULT_PDF_PATH}'): ").strip().strip('"')
+        pdf_path_to_use = pdf_path_input if pdf_path_input else DEFAULT_PDF_PATH
+    
+    if not pdf_path_to_use: 
+        if effective_run_args.show_progress: print(Fore.RED + "❌ No PDF path specified. Exiting.")
+        sys.exit(1)
+    
+    resolved_path = Path(pdf_path_to_use).resolve()
+    if not resolved_path.exists() or not resolved_path.is_file():
+        if effective_run_args.show_progress: print(Fore.RED + f"❌ PDF not found or is not a file: {resolved_path}")
+        sys.exit(1)
+    
+    doc_for_processing = None
+    try:
+        doc_for_processing = fitz.open(str(resolved_path))
+        extractor = EnhancedPDFHighlightExtractor(resolved_path, effective_run_args, main_doc_for_image_view=doc_for_processing)
+        extractor.extract_highlights(doc_for_processing) 
+        
+        if not effective_run_args.interactive and effective_run_args.show_progress: 
+            extractor.display_results()
+        elif effective_run_args.interactive and effective_run_args.show_progress:
+            if input(Fore.CYAN+"Interactive session ended. Display final results? [Y/n]: " + Style.RESET_ALL).lower().strip()!='n':
+                extractor.display_results()
+
+        json_output_path_str = cli_args.output_json if cli_args.output_json else str(resolved_path.parent / f"{resolved_path.stem}_highlights.json")
+        
+        perform_save = False
+        if cli_args.test or cli_args.silent: 
+            perform_save = True
+        elif effective_run_args.show_progress: # Only prompt if not silent
+            save_prompt_input = input(f"💾 Save to JSON? (Enter for default '{json_output_path_str}', type 'skip' to not save, or enter a custom path): " + Style.RESET_ALL).strip()
+            if save_prompt_input.lower() != 'skip':
+                perform_save = True
+                if save_prompt_input: 
+                    json_output_path_str = save_prompt_input
+        
+        if perform_save:
+            if extractor.highlights_data: 
+                extractor.save_to_json(json_output_path_str)
+            elif effective_run_args.show_progress: 
+                print(Fore.YELLOW + "No highlights were extracted or kept, so JSON file was not saved.")
+        elif effective_run_args.show_progress: 
+            print(Fore.BLUE + "Skipped saving highlights to JSON.")
+
+    except Exception as e:
+        if effective_run_args.show_progress: print(Fore.RED+Style.BRIGHT+f"💥 An critical error occurred in the main execution: {e}")
+        if effective_run_args.debug: 
+            traceback.print_exc()
+    finally:
+        if doc_for_processing: 
+            doc_for_processing.close()
+        
+        if IMAGE_FOLDER_PATH and CLEAR_IMAGE_FOLDER_ON_END:
+            _clear_png_files_in_folder(IMAGE_FOLDER_PATH, effective_run_args)
+            
+        if effective_run_args.show_timing: 
+            print(Fore.CYAN + f"\n⏱️ Total execution time: {time.time()-start_time:.2f} seconds")
 
 if __name__ == '__main__':
     main()
diff --git a/requirements.txt b/requirements.txt
index 70368a5..55f5e47 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,2 @@
-pdfplumber==0.10.3
 colorama==0.4.6
-pandas==2.0.3
 PyMuPDF==1.23.1
diff --git a/test/test2.pdf b/test/test2.pdf
new file mode 100644
index 0000000..5563ad2
Binary files /dev/null and b/test/test2.pdf differ