Add core functionality for PunimTag with modular architecture

This commit introduces a comprehensive set of modules for the PunimTag application, including configuration management, database operations, face processing, photo management, and tag management. Each module is designed to encapsulate specific functionalities, enhancing maintainability and scalability. The GUI components are also integrated, allowing for a cohesive user experience. This foundational work sets the stage for future enhancements and features, ensuring a robust framework for photo tagging and face recognition tasks.
2025-10-03 12:25:41 -04:00 · 2025-10-03 12:25:41 -04:00 · f410e60e66
commit f410e60e66
parent b910be9fe7
11 changed files with 9765 additions and 6919 deletions
--- a/config.py
+++ b/config.py
@ -0,0 +1,33 @@
+#!/usr/bin/env python3
+"""
+Configuration constants and settings for PunimTag
+"""
+
+# Default file paths
+DEFAULT_DB_PATH = "data/photos.db"
+DEFAULT_CONFIG_FILE = "gui_config.json"
+DEFAULT_WINDOW_SIZE = "600x500"
+
+# Face detection settings
+DEFAULT_FACE_DETECTION_MODEL = "hog"
+DEFAULT_FACE_TOLERANCE = 0.6
+DEFAULT_BATCH_SIZE = 20
+DEFAULT_PROCESSING_LIMIT = 50
+
+# Face quality settings
+MIN_FACE_QUALITY = 0.3
+DEFAULT_CONFIDENCE_THRESHOLD = 0.5
+
+# GUI settings
+FACE_CROP_SIZE = 100
+ICON_SIZE = 20
+MAX_SUGGESTIONS = 10
+
+# Database settings
+DB_TIMEOUT = 30.0
+
+# Supported image formats
+SUPPORTED_IMAGE_FORMATS = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif'}
+
+# Face crop temporary directory
+TEMP_FACE_CROP_DIR = "temp_face_crops"
--- a/database.py
+++ b/database.py
@ -0,0 +1,482 @@
+#!/usr/bin/env python3
+"""
+Database operations and schema management for PunimTag
+"""
+
+import sqlite3
+import threading
+from contextlib import contextmanager
+from typing import Dict, List, Tuple, Optional
+from config import DEFAULT_DB_PATH, DB_TIMEOUT
+
+
+class DatabaseManager:
+    """Handles all database operations for the photo tagger"""
+    
+    def __init__(self, db_path: str = DEFAULT_DB_PATH, verbose: int = 0):
+        """Initialize database manager"""
+        self.db_path = db_path
+        self.verbose = verbose
+        self._db_connection = None
+        self._db_lock = threading.Lock()
+        self.init_database()
+    
+    @contextmanager
+    def get_db_connection(self):
+        """Context manager for database connections with connection pooling"""
+        with self._db_lock:
+            if self._db_connection is None:
+                self._db_connection = sqlite3.connect(self.db_path, timeout=DB_TIMEOUT)
+                self._db_connection.row_factory = sqlite3.Row
+            try:
+                yield self._db_connection
+            except Exception:
+                self._db_connection.rollback()
+                raise
+            else:
+                self._db_connection.commit()
+    
+    def close_db_connection(self):
+        """Close database connection"""
+        with self._db_lock:
+            if self._db_connection:
+                self._db_connection.close()
+                self._db_connection = None
+    
+    def init_database(self):
+        """Create database tables if they don't exist"""
+        with self.get_db_connection() as conn:
+            cursor = conn.cursor()
+            
+            # Photos table
+            cursor.execute('''
+                CREATE TABLE IF NOT EXISTS photos (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    path TEXT UNIQUE NOT NULL,
+                    filename TEXT NOT NULL,
+                    date_added DATETIME DEFAULT CURRENT_TIMESTAMP,
+                    date_taken DATE,
+                    processed BOOLEAN DEFAULT 0
+                )
+            ''')
+            
+            # People table
+            cursor.execute('''
+                CREATE TABLE IF NOT EXISTS people (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    first_name TEXT NOT NULL,
+                    last_name TEXT NOT NULL,
+                    middle_name TEXT,
+                    maiden_name TEXT,
+                    date_of_birth DATE,
+                    created_date DATETIME DEFAULT CURRENT_TIMESTAMP,
+                    UNIQUE(first_name, last_name, middle_name, maiden_name, date_of_birth)
+                )
+            ''')
+            
+            # Faces table
+            cursor.execute('''
+                CREATE TABLE IF NOT EXISTS faces (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    photo_id INTEGER NOT NULL,
+                    person_id INTEGER,
+                    encoding BLOB NOT NULL,
+                    location TEXT NOT NULL,
+                    confidence REAL DEFAULT 0.0,
+                    quality_score REAL DEFAULT 0.0,
+                    is_primary_encoding BOOLEAN DEFAULT 0,
+                    FOREIGN KEY (photo_id) REFERENCES photos (id),
+                    FOREIGN KEY (person_id) REFERENCES people (id)
+                )
+            ''')
+            
+            # Person encodings table for multiple encodings per person
+            cursor.execute('''
+                CREATE TABLE IF NOT EXISTS person_encodings (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    person_id INTEGER NOT NULL,
+                    face_id INTEGER NOT NULL,
+                    encoding BLOB NOT NULL,
+                    quality_score REAL DEFAULT 0.0,
+                    created_date DATETIME DEFAULT CURRENT_TIMESTAMP,
+                    FOREIGN KEY (person_id) REFERENCES people (id),
+                    FOREIGN KEY (face_id) REFERENCES faces (id)
+                )
+            ''')
+            
+            # Tags table - holds only tag information
+            cursor.execute('''
+                CREATE TABLE IF NOT EXISTS tags (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    tag_name TEXT UNIQUE NOT NULL,
+                    created_date DATETIME DEFAULT CURRENT_TIMESTAMP
+                )
+            ''')
+            
+            # Photo-Tag linkage table
+            cursor.execute('''
+                CREATE TABLE IF NOT EXISTS phototaglinkage (
+                    linkage_id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    photo_id INTEGER NOT NULL,
+                    tag_id INTEGER NOT NULL,
+                    created_date DATETIME DEFAULT CURRENT_TIMESTAMP,
+                    FOREIGN KEY (photo_id) REFERENCES photos (id),
+                    FOREIGN KEY (tag_id) REFERENCES tags (id),
+                    UNIQUE(photo_id, tag_id)
+                )
+            ''')
+            
+            # Add indexes for better performance
+            cursor.execute('CREATE INDEX IF NOT EXISTS idx_faces_person_id ON faces(person_id)')
+            cursor.execute('CREATE INDEX IF NOT EXISTS idx_faces_photo_id ON faces(photo_id)')
+            cursor.execute('CREATE INDEX IF NOT EXISTS idx_photos_processed ON photos(processed)')
+            cursor.execute('CREATE INDEX IF NOT EXISTS idx_faces_quality ON faces(quality_score)')
+            cursor.execute('CREATE INDEX IF NOT EXISTS idx_person_encodings_person_id ON person_encodings(person_id)')
+            cursor.execute('CREATE INDEX IF NOT EXISTS idx_person_encodings_quality ON person_encodings(quality_score)')
+            cursor.execute('CREATE INDEX IF NOT EXISTS idx_photos_date_taken ON photos(date_taken)')
+            cursor.execute('CREATE INDEX IF NOT EXISTS idx_photos_date_added ON photos(date_added)')
+            
+            # Migration: Add date_taken column to existing photos table if it doesn't exist
+            try:
+                cursor.execute('ALTER TABLE photos ADD COLUMN date_taken DATE')
+                if self.verbose >= 1:
+                    print("✅ Added date_taken column to photos table")
+            except Exception:
+                # Column already exists, ignore
+                pass
+            
+            # Migration: Add date_added column to existing photos table if it doesn't exist
+            try:
+                cursor.execute('ALTER TABLE photos ADD COLUMN date_added DATETIME DEFAULT CURRENT_TIMESTAMP')
+                if self.verbose >= 1:
+                    print("✅ Added date_added column to photos table")
+            except Exception:
+                # Column already exists, ignore
+                pass
+            
+        if self.verbose >= 1:
+            print(f"✅ Database initialized: {self.db_path}")
+    
+    def load_tag_mappings(self) -> Tuple[Dict[int, str], Dict[str, int]]:
+        """Load tag name to ID and ID to name mappings from database"""
+        with self.get_db_connection() as conn:
+            cursor = conn.cursor()
+            cursor.execute('SELECT id, tag_name FROM tags ORDER BY tag_name')
+            tag_id_to_name = {}
+            tag_name_to_id = {}
+            for row in cursor.fetchall():
+                tag_id, tag_name = row
+                tag_id_to_name[tag_id] = tag_name
+                tag_name_to_id[tag_name] = tag_id
+            return tag_id_to_name, tag_name_to_id
+    
+    def get_existing_tag_ids_for_photo(self, photo_id: int) -> List[int]:
+        """Get list of tag IDs for a photo from database"""
+        with self.get_db_connection() as conn:
+            cursor = conn.cursor()
+            cursor.execute('''
+                SELECT ptl.tag_id 
+                FROM phototaglinkage ptl 
+                WHERE ptl.photo_id = ?
+                ORDER BY ptl.created_date
+            ''', (photo_id,))
+            return [row[0] for row in cursor.fetchall()]
+    
+    def get_tag_id_by_name(self, tag_name: str, tag_name_to_id_map: Dict[str, int]) -> Optional[int]:
+        """Get tag ID by name, creating the tag if it doesn't exist"""
+        if tag_name in tag_name_to_id_map:
+            return tag_name_to_id_map[tag_name]
+        return None
+    
+    def get_tag_name_by_id(self, tag_id: int, tag_id_to_name_map: Dict[int, str]) -> str:
+        """Get tag name by ID"""
+        return tag_id_to_name_map.get(tag_id, f"Unknown Tag {tag_id}")
+    
+    def show_people_list(self, cursor=None) -> List[Tuple]:
+        """Show list of people in database"""
+        if cursor is None:
+            with self.get_db_connection() as conn:
+                cursor = conn.cursor()
+        
+        cursor.execute('''
+            SELECT id, first_name, last_name, middle_name, maiden_name, date_of_birth, created_date
+            FROM people 
+            ORDER BY last_name, first_name
+        ''')
+        return cursor.fetchall()
+    
+    def add_photo(self, photo_path: str, filename: str, date_taken: Optional[str] = None) -> int:
+        """Add a photo to the database and return its ID if new, None if already exists"""
+        with self.get_db_connection() as conn:
+            cursor = conn.cursor()
+            
+            # Check if photo already exists
+            cursor.execute('SELECT id FROM photos WHERE path = ?', (photo_path,))
+            existing = cursor.fetchone()
+            
+            if existing:
+                # Photo already exists, return None to indicate it wasn't added
+                return None
+            
+            # Photo doesn't exist, insert it
+            cursor.execute('''
+                INSERT INTO photos (path, filename, date_taken)
+                VALUES (?, ?, ?)
+            ''', (photo_path, filename, date_taken))
+            
+            # Get the new photo ID
+            cursor.execute('SELECT id FROM photos WHERE path = ?', (photo_path,))
+            result = cursor.fetchone()
+            return result[0] if result else None
+    
+    def mark_photo_processed(self, photo_id: int):
+        """Mark a photo as processed"""
+        with self.get_db_connection() as conn:
+            cursor = conn.cursor()
+            cursor.execute('UPDATE photos SET processed = 1 WHERE id = ?', (photo_id,))
+    
+    def add_face(self, photo_id: int, encoding: bytes, location: str, confidence: float = 0.0, 
+                 quality_score: float = 0.0, person_id: Optional[int] = None) -> int:
+        """Add a face to the database and return its ID"""
+        with self.get_db_connection() as conn:
+            cursor = conn.cursor()
+            cursor.execute('''
+                INSERT INTO faces (photo_id, person_id, encoding, location, confidence, quality_score)
+                VALUES (?, ?, ?, ?, ?, ?)
+            ''', (photo_id, person_id, encoding, location, confidence, quality_score))
+            return cursor.lastrowid
+    
+    def update_face_person(self, face_id: int, person_id: Optional[int]):
+        """Update the person_id for a face"""
+        with self.get_db_connection() as conn:
+            cursor = conn.cursor()
+            cursor.execute('UPDATE faces SET person_id = ? WHERE id = ?', (person_id, face_id))
+    
+    def add_person(self, first_name: str, last_name: str, middle_name: str = None, 
+                   maiden_name: str = None, date_of_birth: str = None) -> int:
+        """Add a person to the database and return their ID"""
+        with self.get_db_connection() as conn:
+            cursor = conn.cursor()
+            cursor.execute('''
+                INSERT OR IGNORE INTO people (first_name, last_name, middle_name, maiden_name, date_of_birth)
+                VALUES (?, ?, ?, ?, ?)
+            ''', (first_name, last_name, middle_name, maiden_name, date_of_birth))
+            
+            # Get the person ID
+            cursor.execute('''
+                SELECT id FROM people 
+                WHERE first_name = ? AND last_name = ? AND middle_name = ? 
+                AND maiden_name = ? AND date_of_birth = ?
+            ''', (first_name, last_name, middle_name, maiden_name, date_of_birth))
+            result = cursor.fetchone()
+            return result[0] if result else None
+    
+    def add_tag(self, tag_name: str) -> int:
+        """Add a tag to the database and return its ID"""
+        with self.get_db_connection() as conn:
+            cursor = conn.cursor()
+            cursor.execute('INSERT OR IGNORE INTO tags (tag_name) VALUES (?)', (tag_name,))
+            
+            # Get the tag ID
+            cursor.execute('SELECT id FROM tags WHERE tag_name = ?', (tag_name,))
+            result = cursor.fetchone()
+            return result[0] if result else None
+    
+    def link_photo_tag(self, photo_id: int, tag_id: int):
+        """Link a photo to a tag"""
+        with self.get_db_connection() as conn:
+            cursor = conn.cursor()
+            cursor.execute('''
+                INSERT OR IGNORE INTO phototaglinkage (photo_id, tag_id)
+                VALUES (?, ?)
+            ''', (photo_id, tag_id))
+    
+    def unlink_photo_tag(self, photo_id: int, tag_id: int):
+        """Unlink a photo from a tag"""
+        with self.get_db_connection() as conn:
+            cursor = conn.cursor()
+            cursor.execute('''
+                DELETE FROM phototaglinkage 
+                WHERE photo_id = ? AND tag_id = ?
+            ''', (photo_id, tag_id))
+    
+    def get_photos_by_pattern(self, pattern: str = None, limit: int = 10) -> List[Tuple]:
+        """Get photos matching a pattern"""
+        with self.get_db_connection() as conn:
+            cursor = conn.cursor()
+            if pattern:
+                cursor.execute('''
+                    SELECT id, path, filename, date_taken, processed
+                    FROM photos 
+                    WHERE filename LIKE ? OR path LIKE ?
+                    ORDER BY date_added DESC
+                    LIMIT ?
+                ''', (f'%{pattern}%', f'%{pattern}%', limit))
+            else:
+                cursor.execute('''
+                    SELECT id, path, filename, date_taken, processed
+                    FROM photos 
+                    ORDER BY date_added DESC
+                    LIMIT ?
+                ''', (limit,))
+            return cursor.fetchall()
+    
+    def get_unprocessed_photos(self, limit: int = 50) -> List[Tuple]:
+        """Get unprocessed photos"""
+        with self.get_db_connection() as conn:
+            cursor = conn.cursor()
+            cursor.execute('''
+                SELECT id, path, filename, date_taken
+                FROM photos 
+                WHERE processed = 0
+                ORDER BY date_added ASC
+                LIMIT ?
+            ''', (limit,))
+            return cursor.fetchall()
+    
+    def get_unidentified_faces(self, limit: int = 20) -> List[Tuple]:
+        """Get unidentified faces"""
+        with self.get_db_connection() as conn:
+            cursor = conn.cursor()
+            cursor.execute('''
+                SELECT f.id, f.photo_id, f.location, f.confidence, f.quality_score,
+                       p.path, p.filename
+                FROM faces f
+                JOIN photos p ON f.photo_id = p.id
+                WHERE f.person_id IS NULL
+                ORDER BY f.quality_score DESC, f.confidence DESC
+                LIMIT ?
+            ''', (limit,))
+            return cursor.fetchall()
+    
+    def get_face_encodings(self, face_id: int) -> Optional[bytes]:
+        """Get face encoding for a specific face"""
+        with self.get_db_connection() as conn:
+            cursor = conn.cursor()
+            cursor.execute('SELECT encoding FROM faces WHERE id = ?', (face_id,))
+            result = cursor.fetchone()
+            return result[0] if result else None
+    
+    def get_all_face_encodings(self) -> List[Tuple]:
+        """Get all face encodings with their IDs"""
+        with self.get_db_connection() as conn:
+            cursor = conn.cursor()
+            cursor.execute('SELECT id, encoding, person_id, quality_score FROM faces')
+            return cursor.fetchall()
+    
+    def get_person_encodings(self, person_id: int, min_quality: float = 0.3) -> List[Tuple]:
+        """Get all encodings for a person above minimum quality"""
+        with self.get_db_connection() as conn:
+            cursor = conn.cursor()
+            cursor.execute('''
+                SELECT pe.encoding, pe.quality_score, pe.face_id
+                FROM person_encodings pe
+                WHERE pe.person_id = ? AND pe.quality_score >= ?
+                ORDER BY pe.quality_score DESC
+            ''', (person_id, min_quality))
+            return cursor.fetchall()
+    
+    def add_person_encoding(self, person_id: int, face_id: int, encoding: bytes, quality_score: float):
+        """Add a person encoding"""
+        with self.get_db_connection() as conn:
+            cursor = conn.cursor()
+            cursor.execute('''
+                INSERT INTO person_encodings (person_id, face_id, encoding, quality_score)
+                VALUES (?, ?, ?, ?)
+            ''', (person_id, face_id, encoding, quality_score))
+    
+    def update_person_encodings(self, person_id: int):
+        """Update person encodings by removing old ones and adding current face encodings"""
+        with self.get_db_connection() as conn:
+            cursor = conn.cursor()
+            
+            # Remove old encodings
+            cursor.execute('DELETE FROM person_encodings WHERE person_id = ?', (person_id,))
+            
+            # Add current face encodings
+            cursor.execute('''
+                INSERT INTO person_encodings (person_id, face_id, encoding, quality_score)
+                SELECT ?, id, encoding, quality_score
+                FROM faces
+                WHERE person_id = ? AND quality_score >= 0.3
+            ''', (person_id, person_id))
+    
+    def get_similar_faces(self, face_id: int, tolerance: float = 0.6, 
+                         include_same_photo: bool = False) -> List[Dict]:
+        """Get faces similar to the given face ID"""
+        with self.get_db_connection() as conn:
+            cursor = conn.cursor()
+            
+            # Get the target face encoding and photo
+            cursor.execute('''
+                SELECT f.encoding, f.photo_id, p.path, p.filename
+                FROM faces f
+                JOIN photos p ON f.photo_id = p.id
+                WHERE f.id = ?
+            ''', (face_id,))
+            target_result = cursor.fetchone()
+            
+            if not target_result:
+                return []
+            
+            target_encoding = target_result[0]
+            target_photo_id = target_result[1]
+            target_path = target_result[2]
+            target_filename = target_result[3]
+            
+            # Get all other faces
+            if include_same_photo:
+                cursor.execute('''
+                    SELECT f.id, f.encoding, f.person_id, f.quality_score, f.confidence,
+                           p.path, p.filename, f.photo_id
+                    FROM faces f
+                    JOIN photos p ON f.photo_id = p.id
+                    WHERE f.id != ?
+                ''', (face_id,))
+            else:
+                cursor.execute('''
+                    SELECT f.id, f.encoding, f.person_id, f.quality_score, f.confidence,
+                           p.path, p.filename, f.photo_id
+                    FROM faces f
+                    JOIN photos p ON f.photo_id = p.id
+                    WHERE f.id != ? AND f.photo_id != ?
+                ''', (face_id, target_photo_id))
+            
+            return cursor.fetchall()
+    
+    def get_statistics(self) -> Dict:
+        """Get database statistics"""
+        with self.get_db_connection() as conn:
+            cursor = conn.cursor()
+            
+            stats = {}
+            
+            # Photo statistics
+            cursor.execute('SELECT COUNT(*) FROM photos')
+            stats['total_photos'] = cursor.fetchone()[0]
+            
+            cursor.execute('SELECT COUNT(*) FROM photos WHERE processed = 1')
+            stats['processed_photos'] = cursor.fetchone()[0]
+            
+            # Face statistics
+            cursor.execute('SELECT COUNT(*) FROM faces')
+            stats['total_faces'] = cursor.fetchone()[0]
+            
+            cursor.execute('SELECT COUNT(*) FROM faces WHERE person_id IS NOT NULL')
+            stats['identified_faces'] = cursor.fetchone()[0]
+            
+            cursor.execute('SELECT COUNT(*) FROM faces WHERE person_id IS NULL')
+            stats['unidentified_faces'] = cursor.fetchone()[0]
+            
+            # People statistics
+            cursor.execute('SELECT COUNT(*) FROM people')
+            stats['total_people'] = cursor.fetchone()[0]
+            
+            # Tag statistics
+            cursor.execute('SELECT COUNT(*) FROM tags')
+            stats['total_tags'] = cursor.fetchone()[0]
+            
+            cursor.execute('SELECT COUNT(*) FROM phototaglinkage')
+            stats['total_photo_tags'] = cursor.fetchone()[0]
+            
+            return stats
--- a/face_processing.py
+++ b/face_processing.py
@ -0,0 +1,515 @@
+#!/usr/bin/env python3
+"""
+Face detection, encoding, and matching functionality for PunimTag
+"""
+
+import os
+import tempfile
+import numpy as np
+import face_recognition
+from PIL import Image, ImageDraw, ImageFont
+from typing import List, Dict, Tuple, Optional
+from functools import lru_cache
+
+from config import DEFAULT_FACE_DETECTION_MODEL, DEFAULT_FACE_TOLERANCE, MIN_FACE_QUALITY
+from database import DatabaseManager
+
+
+class FaceProcessor:
+    """Handles face detection, encoding, and matching operations"""
+    
+    def __init__(self, db_manager: DatabaseManager, verbose: int = 0):
+        """Initialize face processor"""
+        self.db = db_manager
+        self.verbose = verbose
+        self._face_encoding_cache = {}
+        self._image_cache = {}
+    
+    @lru_cache(maxsize=1000)
+    def _get_cached_face_encoding(self, face_id: int, encoding_bytes: bytes) -> np.ndarray:
+        """Cache face encodings to avoid repeated numpy conversions"""
+        return np.frombuffer(encoding_bytes, dtype=np.float64)
+    
+    def _clear_caches(self):
+        """Clear all caches to free memory"""
+        self._face_encoding_cache.clear()
+        self._image_cache.clear()
+        self._get_cached_face_encoding.cache_clear()
+    
+    def cleanup_face_crops(self, current_face_crop_path=None):
+        """Clean up face crop files and caches"""
+        # Clean up current face crop if provided
+        if current_face_crop_path and os.path.exists(current_face_crop_path):
+            try:
+                os.remove(current_face_crop_path)
+            except:
+                pass  # Ignore cleanup errors
+        
+        # Clean up all cached face crop files
+        for cache_key, cached_path in list(self._image_cache.items()):
+            if os.path.exists(cached_path):
+                try:
+                    os.remove(cached_path)
+                except:
+                    pass  # Ignore cleanup errors
+        
+        # Clear caches
+        self._clear_caches()
+    
+    def process_faces(self, limit: int = 50, model: str = DEFAULT_FACE_DETECTION_MODEL) -> int:
+        """Process unprocessed photos for faces"""
+        unprocessed = self.db.get_unprocessed_photos(limit)
+        
+        if not unprocessed:
+            print("✅ No unprocessed photos found")
+            return 0
+        
+        print(f"🔍 Processing {len(unprocessed)} photos for faces...")
+        processed_count = 0
+        
+        for photo_id, photo_path, filename, date_taken in unprocessed:
+            if not os.path.exists(photo_path):
+                print(f"❌ File not found: {filename}")
+                self.db.mark_photo_processed(photo_id)
+                continue
+            
+            try:
+                # Load image and find faces
+                if self.verbose >= 1:
+                    print(f"📸 Processing: {filename}")
+                elif self.verbose == 0:
+                    print(".", end="", flush=True)
+                
+                if self.verbose >= 2:
+                    print(f"   🔍 Loading image: {photo_path}")
+                
+                image = face_recognition.load_image_file(photo_path)
+                face_locations = face_recognition.face_locations(image, model=model)
+                
+                if face_locations:
+                    face_encodings = face_recognition.face_encodings(image, face_locations)
+                    if self.verbose >= 1:
+                        print(f"   👤 Found {len(face_locations)} faces")
+                    
+                    # Save faces to database with quality scores
+                    for i, (encoding, location) in enumerate(zip(face_encodings, face_locations)):
+                        # Calculate face quality score
+                        quality_score = self._calculate_face_quality_score(image, location)
+                        
+                        self.db.add_face(
+                            photo_id=photo_id,
+                            encoding=encoding.tobytes(),
+                            location=str(location),
+                            quality_score=quality_score
+                        )
+                        if self.verbose >= 3:
+                            print(f"      Face {i+1}: {location} (quality: {quality_score:.2f})")
+                else:
+                    if self.verbose >= 1:
+                        print(f"   👤 No faces found")
+                    elif self.verbose >= 2:
+                        print(f"   👤 {filename}: No faces found")
+                
+                # Mark as processed
+                self.db.mark_photo_processed(photo_id)
+                processed_count += 1
+                
+            except Exception as e:
+                print(f"❌ Error processing {filename}: {e}")
+                self.db.mark_photo_processed(photo_id)
+        
+        if self.verbose == 0:
+            print()  # New line after dots
+        print(f"✅ Processed {processed_count} photos")
+        return processed_count
+    
+    def _calculate_face_quality_score(self, image: np.ndarray, face_location: tuple) -> float:
+        """Calculate face quality score based on multiple factors"""
+        try:
+            top, right, bottom, left = face_location
+            face_height = bottom - top
+            face_width = right - left
+            
+            # Basic size check - faces too small get lower scores
+            min_face_size = 50
+            size_score = min(1.0, (face_height * face_width) / (min_face_size * min_face_size))
+            
+            # Extract face region
+            face_region = image[top:bottom, left:right]
+            if face_region.size == 0:
+                return 0.0
+            
+            # Convert to grayscale for analysis
+            if len(face_region.shape) == 3:
+                gray_face = np.mean(face_region, axis=2)
+            else:
+                gray_face = face_region
+            
+            # Calculate sharpness (Laplacian variance)
+            laplacian_var = np.var(np.array([[0, -1, 0], [-1, 4, -1], [0, -1, 0]]).astype(np.float32))
+            if laplacian_var > 0:
+                sharpness = np.var(np.array([[0, -1, 0], [-1, 4, -1], [0, -1, 0]]).astype(np.float32))
+            else:
+                sharpness = 0.0
+            sharpness_score = min(1.0, sharpness / 1000.0)  # Normalize sharpness
+            
+            # Calculate brightness and contrast
+            mean_brightness = np.mean(gray_face)
+            brightness_score = 1.0 - abs(mean_brightness - 128) / 128.0  # Prefer middle brightness
+            
+            contrast = np.std(gray_face)
+            contrast_score = min(1.0, contrast / 64.0)  # Prefer good contrast
+            
+            # Calculate aspect ratio (faces should be roughly square)
+            aspect_ratio = face_width / face_height if face_height > 0 else 1.0
+            aspect_score = 1.0 - abs(aspect_ratio - 1.0)  # Prefer square faces
+            
+            # Calculate position in image (centered faces are better)
+            image_height, image_width = image.shape[:2]
+            center_x = (left + right) / 2
+            center_y = (top + bottom) / 2
+            position_x_score = 1.0 - abs(center_x - image_width / 2) / (image_width / 2)
+            position_y_score = 1.0 - abs(center_y - image_height / 2) / (image_height / 2)
+            position_score = (position_x_score + position_y_score) / 2.0
+            
+            # Weighted combination of all factors
+            quality_score = (
+                size_score * 0.25 +
+                sharpness_score * 0.25 +
+                brightness_score * 0.15 +
+                contrast_score * 0.15 +
+                aspect_score * 0.10 +
+                position_score * 0.10
+            )
+            
+            return max(0.0, min(1.0, quality_score))
+            
+        except Exception as e:
+            if self.verbose >= 2:
+                print(f"⚠️  Error calculating face quality: {e}")
+            return 0.5  # Default medium quality on error
+    
+    def _extract_face_crop(self, photo_path: str, location: tuple, face_id: int) -> str:
+        """Extract and save individual face crop for identification with caching"""
+        try:
+            # Check cache first
+            cache_key = f"{photo_path}_{location}_{face_id}"
+            if cache_key in self._image_cache:
+                cached_path = self._image_cache[cache_key]
+                # Verify the cached file still exists
+                if os.path.exists(cached_path):
+                    return cached_path
+                else:
+                    # Remove from cache if file doesn't exist
+                    del self._image_cache[cache_key]
+            
+            # Parse location tuple from string format
+            if isinstance(location, str):
+                location = eval(location)
+            
+            top, right, bottom, left = location
+            
+            # Load the image
+            image = Image.open(photo_path)
+            
+            # Add padding around the face (20% of face size)
+            face_width = right - left
+            face_height = bottom - top
+            padding_x = int(face_width * 0.2)
+            padding_y = int(face_height * 0.2)
+            
+            # Calculate crop bounds with padding
+            crop_left = max(0, left - padding_x)
+            crop_top = max(0, top - padding_y)
+            crop_right = min(image.width, right + padding_x)
+            crop_bottom = min(image.height, bottom + padding_y)
+            
+            # Crop the face
+            face_crop = image.crop((crop_left, crop_top, crop_right, crop_bottom))
+            
+            # Create temporary file for the face crop
+            temp_dir = tempfile.gettempdir()
+            face_filename = f"face_{face_id}_crop.jpg"
+            face_path = os.path.join(temp_dir, face_filename)
+            
+            # Resize for better viewing (minimum 200px width)
+            if face_crop.width < 200:
+                ratio = 200 / face_crop.width
+                new_width = 200
+                new_height = int(face_crop.height * ratio)
+                face_crop = face_crop.resize((new_width, new_height), Image.Resampling.LANCZOS)
+            
+            face_crop.save(face_path, "JPEG", quality=95)
+            
+            # Cache the result
+            self._image_cache[cache_key] = face_path
+            return face_path
+            
+        except Exception as e:
+            if self.verbose >= 1:
+                print(f"⚠️  Could not extract face crop: {e}")
+            return None
+    
+    def _create_comparison_image(self, unid_crop_path: str, match_crop_path: str, person_name: str, confidence: float) -> str:
+        """Create a side-by-side comparison image"""
+        try:
+            # Load both face crops
+            unid_img = Image.open(unid_crop_path)
+            match_img = Image.open(match_crop_path)
+            
+            # Resize both to same height for better comparison
+            target_height = 300
+            unid_ratio = target_height / unid_img.height
+            match_ratio = target_height / match_img.height
+            
+            unid_resized = unid_img.resize((int(unid_img.width * unid_ratio), target_height), Image.Resampling.LANCZOS)
+            match_resized = match_img.resize((int(match_img.width * match_ratio), target_height), Image.Resampling.LANCZOS)
+            
+            # Create comparison image
+            total_width = unid_resized.width + match_resized.width + 20  # 20px gap
+            comparison = Image.new('RGB', (total_width, target_height + 60), 'white')
+            
+            # Paste images
+            comparison.paste(unid_resized, (0, 30))
+            comparison.paste(match_resized, (unid_resized.width + 20, 30))
+            
+            # Add labels
+            draw = ImageDraw.Draw(comparison)
+            try:
+                # Try to use a font
+                font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 16)
+            except:
+                font = ImageFont.load_default()
+            
+            draw.text((10, 5), "UNKNOWN", fill='red', font=font)
+            draw.text((unid_resized.width + 30, 5), f"{person_name.upper()}", fill='green', font=font)
+            draw.text((10, target_height + 35), f"Confidence: {confidence:.1%}", fill='blue', font=font)
+            
+            # Save comparison image
+            temp_dir = tempfile.gettempdir()
+            comparison_path = os.path.join(temp_dir, f"face_comparison_{person_name}.jpg")
+            comparison.save(comparison_path, "JPEG", quality=95)
+            
+            return comparison_path
+            
+        except Exception as e:
+            if self.verbose >= 1:
+                print(f"⚠️  Could not create comparison image: {e}")
+            return None
+    
+    def _get_confidence_description(self, confidence_pct: float) -> str:
+        """Get human-readable confidence description"""
+        if confidence_pct >= 80:
+            return "🟢 (Very High - Almost Certain)"
+        elif confidence_pct >= 70:
+            return "🟡 (High - Likely Match)"
+        elif confidence_pct >= 60:
+            return "🟠 (Medium - Possible Match)"
+        elif confidence_pct >= 50:
+            return "🔴 (Low - Questionable)"
+        else:
+            return "⚫ (Very Low - Unlikely)"
+    
+    def _calculate_adaptive_tolerance(self, base_tolerance: float, face_quality: float, match_confidence: float = None) -> float:
+        """Calculate adaptive tolerance based on face quality and match confidence"""
+        # Start with base tolerance
+        tolerance = base_tolerance
+        
+        # Adjust based on face quality (higher quality = stricter tolerance)
+        # More conservative: range 0.9 to 1.1 instead of 0.8 to 1.2
+        quality_factor = 0.9 + (face_quality * 0.2)  # Range: 0.9 to 1.1
+        tolerance *= quality_factor
+        
+        # If we have match confidence, adjust further
+        if match_confidence is not None:
+            # Higher confidence matches can use stricter tolerance
+            # More conservative: range 0.95 to 1.05 instead of 0.9 to 1.1
+            confidence_factor = 0.95 + (match_confidence * 0.1)  # Range: 0.95 to 1.05
+            tolerance *= confidence_factor
+        
+        # Ensure tolerance stays within reasonable bounds
+        return max(0.3, min(0.8, tolerance))  # Reduced max from 0.9 to 0.8
+    
+    def _get_filtered_similar_faces(self, face_id: int, tolerance: float, include_same_photo: bool = False, face_status: dict = None) -> List[Dict]:
+        """Get similar faces with consistent filtering and sorting logic used by both auto-match and identify"""
+        # Find similar faces using the core function
+        similar_faces_data = self.find_similar_faces(face_id, tolerance=tolerance, include_same_photo=include_same_photo)
+        
+        # Filter to only show unidentified faces with confidence filtering
+        filtered_faces = []
+        for face in similar_faces_data:
+            # For auto-match: only filter by database state (keep existing behavior)
+            # For identify: also filter by current session state
+            is_identified_in_db = face.get('person_id') is not None
+            is_identified_in_session = face_status and face.get('face_id') in face_status and face_status[face.get('face_id')] == 'identified'
+            
+            # If face_status is provided (identify mode), use both filters
+            # If face_status is None (auto-match mode), only use database filter
+            if face_status is not None:
+                # Identify mode: filter out both database and session identified faces
+                if not is_identified_in_db and not is_identified_in_session:
+                    # Calculate confidence percentage
+                    confidence_pct = (1 - face['distance']) * 100
+                    
+                    # Only include matches with reasonable confidence (at least 40%)
+                    if confidence_pct >= 40:
+                        filtered_faces.append(face)
+            else:
+                # Auto-match mode: only filter by database state (keep existing behavior)
+                if not is_identified_in_db:
+                    # Calculate confidence percentage
+                    confidence_pct = (1 - face['distance']) * 100
+                    
+                    # Only include matches with reasonable confidence (at least 40%)
+                    if confidence_pct >= 40:
+                        filtered_faces.append(face)
+        
+        # Sort by confidence (distance) - highest confidence first
+        filtered_faces.sort(key=lambda x: x['distance'])
+        
+        return filtered_faces
+    
+    def _filter_unique_faces(self, faces: List[Dict]) -> List[Dict]:
+        """Filter faces to show only unique ones, hiding duplicates with high/medium confidence matches"""
+        if not faces:
+            return faces
+        
+        unique_faces = []
+        seen_face_groups = set()  # Track face groups that have been seen
+        
+        for face in faces:
+            face_id = face['face_id']
+            confidence_pct = (1 - face['distance']) * 100
+            
+            # Only consider high (>=70%) or medium (>=60%) confidence matches for grouping
+            if confidence_pct >= 60:
+                # Find all faces that match this one with high/medium confidence
+                matching_face_ids = set()
+                for other_face in faces:
+                    other_face_id = other_face['face_id']
+                    other_confidence_pct = (1 - other_face['distance']) * 100
+                    
+                    # If this face matches the current face with high/medium confidence
+                    if other_confidence_pct >= 60:
+                        matching_face_ids.add(other_face_id)
+                
+                # Create a sorted tuple to represent this group of matching faces
+                face_group = tuple(sorted(matching_face_ids))
+                
+                # Only show this face if we haven't seen this group before
+                if face_group not in seen_face_groups:
+                    seen_face_groups.add(face_group)
+                    unique_faces.append(face)
+            else:
+                # For low confidence matches, always show them (they're likely different people)
+                unique_faces.append(face)
+        
+        return unique_faces
+    
+    def find_similar_faces(self, face_id: int = None, tolerance: float = DEFAULT_FACE_TOLERANCE, include_same_photo: bool = False) -> List[Dict]:
+        """Find similar faces across all photos with improved multi-encoding and quality scoring"""
+        if face_id:
+            # Find faces similar to a specific face
+            target_face = self.db.get_face_encodings(face_id)
+            if not target_face:
+                print(f"❌ Face ID {face_id} not found")
+                return []
+            
+            target_encoding = self._get_cached_face_encoding(face_id, target_face)
+            
+            # Get all other faces with quality scores
+            all_faces = self.db.get_all_face_encodings()
+            matches = []
+            
+            # Compare target face with all other faces using adaptive tolerance
+            for face_data in all_faces:
+                other_id, other_encoding, other_person_id, other_quality = face_data
+                if other_id == face_id:
+                    continue
+                
+                other_enc = self._get_cached_face_encoding(other_id, other_encoding)
+                
+                # Calculate adaptive tolerance based on both face qualities
+                target_quality = 0.5  # Default quality for target face
+                avg_quality = (target_quality + other_quality) / 2
+                adaptive_tolerance = self._calculate_adaptive_tolerance(tolerance, avg_quality)
+                
+                distance = face_recognition.face_distance([target_encoding], other_enc)[0]
+                if distance <= adaptive_tolerance:
+                    # Get photo info for this face
+                    photo_info = self.db.get_photos_by_pattern()  # This needs to be implemented properly
+                    matches.append({
+                        'face_id': other_id,
+                        'person_id': other_person_id,
+                        'distance': distance,
+                        'quality_score': other_quality,
+                        'adaptive_tolerance': adaptive_tolerance
+                    })
+            
+            return matches
+        
+        else:
+            # Find all unidentified faces and try to match them with identified ones
+            all_faces = self.db.get_all_face_encodings()
+            matches = []
+            
+            # Auto-match unidentified faces with identified ones using multi-encoding
+            identified_faces = [f for f in all_faces if f[2] is not None]  # person_id is not None
+            unidentified_faces = [f for f in all_faces if f[2] is None]    # person_id is None
+            
+            print(f"\n🔍 Auto-matching {len(unidentified_faces)} unidentified faces with {len(identified_faces)} known faces...")
+            
+            # Group identified faces by person
+            person_encodings = {}
+            for id_face in identified_faces:
+                person_id = id_face[2]
+                if person_id not in person_encodings:
+                    id_enc = self._get_cached_face_encoding(id_face[0], id_face[1])
+                    person_encodings[person_id] = [(id_enc, id_face[3])]
+            
+            for unid_face in unidentified_faces:
+                unid_id, unid_encoding, _, unid_quality = unid_face
+                unid_enc = self._get_cached_face_encoding(unid_id, unid_encoding)
+                
+                best_match = None
+                best_distance = float('inf')
+                best_person_id = None
+                
+                # Compare with all person encodings
+                for person_id, encodings in person_encodings.items():
+                    for person_enc, person_quality in encodings:
+                        # Calculate adaptive tolerance based on both face qualities
+                        avg_quality = (unid_quality + person_quality) / 2
+                        adaptive_tolerance = self._calculate_adaptive_tolerance(tolerance, avg_quality)
+                        
+                        distance = face_recognition.face_distance([unid_enc], person_enc)[0]
+                        
+                        if distance <= adaptive_tolerance and distance < best_distance:
+                            best_distance = distance
+                            best_person_id = person_id
+                            
+                            best_match = {
+                                'unidentified_id': unid_id,
+                                'person_id': person_id,
+                                'distance': distance,
+                                'quality_score': unid_quality,
+                                'adaptive_tolerance': adaptive_tolerance
+                            }
+                
+                if best_match:
+                    matches.append(best_match)
+            
+            return matches
+    
+    def add_person_encoding(self, person_id: int, face_id: int, encoding: np.ndarray, quality_score: float):
+        """Add a face encoding to a person's encoding collection"""
+        self.db.add_person_encoding(person_id, face_id, encoding.tobytes(), quality_score)
+    
+    def get_person_encodings(self, person_id: int, min_quality: float = MIN_FACE_QUALITY) -> List[Tuple[np.ndarray, float]]:
+        """Get all high-quality encodings for a person"""
+        results = self.db.get_person_encodings(person_id, min_quality)
+        return [(np.frombuffer(encoding, dtype=np.float64), quality_score) for encoding, quality_score in results]
+    
+    def update_person_encodings(self, person_id: int):
+        """Update person encodings when a face is identified"""
+        self.db.update_person_encodings(person_id)
--- a/gui_config.json
+++ b/gui_config.json
@ -0,0 +1 @@
+{"window_size": "1069x882"}
--- a/gui_core.py
+++ b/gui_core.py
@ -0,0 +1,341 @@
+#!/usr/bin/env python3
+"""
+Common GUI utilities and widgets for PunimTag
+"""
+
+import os
+import json
+import tempfile
+from PIL import Image, ImageTk
+from typing import Optional, Dict, Any
+
+from config import DEFAULT_CONFIG_FILE, DEFAULT_WINDOW_SIZE, ICON_SIZE
+
+
+class GUICore:
+    """Common GUI utilities and helper functions"""
+    
+    def __init__(self):
+        """Initialize GUI core utilities"""
+        pass
+    
+    def setup_window_size_saving(self, root, config_file: str = DEFAULT_CONFIG_FILE) -> str:
+        """Set up window size saving functionality"""
+        # Load saved window size
+        saved_size = DEFAULT_WINDOW_SIZE
+        
+        if os.path.exists(config_file):
+            try:
+                with open(config_file, 'r') as f:
+                    config = json.load(f)
+                    saved_size = config.get('window_size', DEFAULT_WINDOW_SIZE)
+            except:
+                saved_size = DEFAULT_WINDOW_SIZE
+        
+        # Calculate center position before showing window
+        try:
+            width = int(saved_size.split('x')[0])
+            height = int(saved_size.split('x')[1])
+            x = (root.winfo_screenwidth() // 2) - (width // 2)
+            y = (root.winfo_screenheight() // 2) - (height // 2)
+            root.geometry(f"{saved_size}+{x}+{y}")
+        except:
+            # Fallback to default geometry if positioning fails
+            root.geometry(saved_size)
+        
+        # Track previous size to detect actual resizing
+        last_size = None
+        
+        def save_window_size(event=None):
+            nonlocal last_size
+            if event and event.widget == root:
+                current_size = f"{root.winfo_width()}x{root.winfo_height()}"
+                # Only save if size actually changed
+                if current_size != last_size:
+                    last_size = current_size
+                    try:
+                        config = {'window_size': current_size}
+                        with open(config_file, 'w') as f:
+                            json.dump(config, f)
+                    except:
+                        pass  # Ignore save errors
+        
+        # Bind resize event
+        root.bind('<Configure>', save_window_size)
+        return saved_size
+    
+    def create_photo_icon(self, canvas, photo_path: str, icon_size: int = ICON_SIZE, 
+                         icon_x: int = None, icon_y: int = None, 
+                         callback: callable = None) -> Optional[int]:
+        """Create a small photo icon on a canvas"""
+        try:
+            if not os.path.exists(photo_path):
+                return None
+            
+            # Load and resize image
+            with Image.open(photo_path) as img:
+                img.thumbnail((icon_size, icon_size), Image.Resampling.LANCZOS)
+                photo = ImageTk.PhotoImage(img)
+            
+            # Calculate position if not provided
+            if icon_x is None:
+                icon_x = 10
+            if icon_y is None:
+                icon_y = 10
+            
+            # Create image on canvas
+            image_id = canvas.create_image(icon_x, icon_y, anchor='nw', image=photo)
+            
+            # Keep reference to prevent garbage collection
+            canvas.image_refs = getattr(canvas, 'image_refs', [])
+            canvas.image_refs.append(photo)
+            
+            # Add click handler if callback provided
+            if callback:
+                def open_source_photo(event):
+                    callback(photo_path)
+                
+                canvas.tag_bind(image_id, '<Button-1>', open_source_photo)
+                canvas.tag_bind(image_id, '<Enter>', lambda e: canvas.config(cursor='hand2'))
+                canvas.tag_bind(image_id, '<Leave>', lambda e: canvas.config(cursor=''))
+            
+            # Add tooltip
+            def show_tooltip(event):
+                tooltip = f"📸 {os.path.basename(photo_path)}"
+                # Simple tooltip implementation
+                pass
+            
+            def hide_tooltip(event):
+                pass
+            
+            canvas.tag_bind(image_id, '<Enter>', show_tooltip)
+            canvas.tag_bind(image_id, '<Leave>', hide_tooltip)
+            
+            return image_id
+            
+        except Exception as e:
+            return None
+    
+    def create_face_crop_image(self, photo_path: str, face_location: tuple, 
+                              face_id: int, crop_size: int = 100) -> Optional[str]:
+        """Create a face crop image for display"""
+        try:
+            # Parse location tuple from string format
+            if isinstance(face_location, str):
+                face_location = eval(face_location)
+            
+            top, right, bottom, left = face_location
+            
+            # Load the image
+            with Image.open(photo_path) as image:
+                # Add padding around the face
+                face_width = right - left
+                face_height = bottom - top
+                padding_x = int(face_width * 0.2)
+                padding_y = int(face_height * 0.2)
+                
+                # Calculate crop bounds with padding
+                crop_left = max(0, left - padding_x)
+                crop_top = max(0, top - padding_y)
+                crop_right = min(image.width, right + padding_x)
+                crop_bottom = min(image.height, bottom + padding_y)
+                
+                # Crop the face
+                face_crop = image.crop((crop_left, crop_top, crop_right, crop_bottom))
+                
+                # Resize to standard size
+                face_crop = face_crop.resize((crop_size, crop_size), Image.Resampling.LANCZOS)
+                
+                # Create temporary file
+                temp_dir = tempfile.gettempdir()
+                face_filename = f"face_{face_id}_display.jpg"
+                face_path = os.path.join(temp_dir, face_filename)
+                
+                face_crop.save(face_path, "JPEG", quality=95)
+                return face_path
+                
+        except Exception as e:
+            return None
+    
+    def create_photo_thumbnail(self, photo_path: str, thumbnail_size: int = 150) -> Optional[ImageTk.PhotoImage]:
+        """Create a thumbnail for display"""
+        try:
+            if not os.path.exists(photo_path):
+                return None
+            
+            with Image.open(photo_path) as img:
+                img.thumbnail((thumbnail_size, thumbnail_size), Image.Resampling.LANCZOS)
+                return ImageTk.PhotoImage(img)
+        except Exception:
+            return None
+    
+    def create_comparison_image(self, unid_crop_path: str, match_crop_path: str, 
+                               person_name: str, confidence: float) -> Optional[str]:
+        """Create a side-by-side comparison image"""
+        try:
+            # Load both face crops
+            unid_img = Image.open(unid_crop_path)
+            match_img = Image.open(match_crop_path)
+            
+            # Resize both to same height for better comparison
+            target_height = 300
+            unid_ratio = target_height / unid_img.height
+            match_ratio = target_height / match_img.height
+            
+            unid_resized = unid_img.resize((int(unid_img.width * unid_ratio), target_height), Image.Resampling.LANCZOS)
+            match_resized = match_img.resize((int(match_img.width * match_ratio), target_height), Image.Resampling.LANCZOS)
+            
+            # Create comparison image
+            total_width = unid_resized.width + match_resized.width + 20  # 20px gap
+            comparison = Image.new('RGB', (total_width, target_height + 60), 'white')
+            
+            # Paste images
+            comparison.paste(unid_resized, (0, 30))
+            comparison.paste(match_resized, (unid_resized.width + 20, 30))
+            
+            # Add labels
+            from PIL import ImageDraw, ImageFont
+            draw = ImageDraw.Draw(comparison)
+            try:
+                # Try to use a font
+                font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 16)
+            except:
+                font = ImageFont.load_default()
+            
+            draw.text((10, 5), "UNKNOWN", fill='red', font=font)
+            draw.text((unid_resized.width + 30, 5), f"{person_name.upper()}", fill='green', font=font)
+            draw.text((10, target_height + 35), f"Confidence: {confidence:.1%}", fill='blue', font=font)
+            
+            # Save comparison image
+            temp_dir = tempfile.gettempdir()
+            comparison_path = os.path.join(temp_dir, f"face_comparison_{person_name}.jpg")
+            comparison.save(comparison_path, "JPEG", quality=95)
+            
+            return comparison_path
+            
+        except Exception as e:
+            return None
+    
+    def get_confidence_description(self, confidence_pct: float) -> str:
+        """Get human-readable confidence description"""
+        if confidence_pct >= 80:
+            return "🟢 (Very High - Almost Certain)"
+        elif confidence_pct >= 70:
+            return "🟡 (High - Likely Match)"
+        elif confidence_pct >= 60:
+            return "🟠 (Medium - Possible Match)"
+        elif confidence_pct >= 50:
+            return "🔴 (Low - Questionable)"
+        else:
+            return "⚫ (Very Low - Unlikely)"
+    
+    def center_window(self, root, width: int = None, height: int = None):
+        """Center a window on the screen"""
+        if width is None:
+            width = root.winfo_width()
+        if height is None:
+            height = root.winfo_height()
+        
+        screen_width = root.winfo_screenwidth()
+        screen_height = root.winfo_screenheight()
+        
+        x = (screen_width - width) // 2
+        y = (screen_height - height) // 2
+        
+        root.geometry(f"{width}x{height}+{x}+{y}")
+    
+    def create_tooltip(self, widget, text: str):
+        """Create a tooltip for a widget"""
+        def show_tooltip(event):
+            tooltip = tk.Toplevel()
+            tooltip.wm_overrideredirect(True)
+            tooltip.wm_geometry(f"+{event.x_root+10}+{event.y_root+10}")
+            
+            label = tk.Label(tooltip, text=text, background="lightyellow", 
+                           relief="solid", borderwidth=1, font=("Arial", 9))
+            label.pack()
+            
+            widget.tooltip = tooltip
+        
+        def hide_tooltip(event):
+            if hasattr(widget, 'tooltip'):
+                widget.tooltip.destroy()
+                del widget.tooltip
+        
+        widget.bind('<Enter>', show_tooltip)
+        widget.bind('<Leave>', hide_tooltip)
+    
+    def create_progress_bar(self, parent, text: str = "Processing..."):
+        """Create a progress bar dialog"""
+        import tkinter as tk
+        from tkinter import ttk
+        
+        progress_window = tk.Toplevel(parent)
+        progress_window.title("Progress")
+        progress_window.resizable(False, False)
+        
+        # Center the progress window
+        progress_window.transient(parent)
+        progress_window.grab_set()
+        
+        frame = ttk.Frame(progress_window, padding="20")
+        frame.pack()
+        
+        label = ttk.Label(frame, text=text)
+        label.pack(pady=(0, 10))
+        
+        progress = ttk.Progressbar(frame, mode='indeterminate')
+        progress.pack(fill='x', pady=(0, 10))
+        progress.start()
+        
+        # Center the window
+        progress_window.update_idletasks()
+        x = (progress_window.winfo_screenwidth() // 2) - (progress_window.winfo_width() // 2)
+        y = (progress_window.winfo_screenheight() // 2) - (progress_window.winfo_height() // 2)
+        progress_window.geometry(f"+{x}+{y}")
+        
+        return progress_window, progress
+    
+    def create_confirmation_dialog(self, parent, title: str, message: str) -> bool:
+        """Create a confirmation dialog"""
+        import tkinter as tk
+        from tkinter import messagebox
+        
+        result = messagebox.askyesno(title, message, parent=parent)
+        return result
+    
+    def create_input_dialog(self, parent, title: str, prompt: str, default: str = "") -> Optional[str]:
+        """Create an input dialog"""
+        import tkinter as tk
+        from tkinter import simpledialog
+        
+        result = simpledialog.askstring(title, prompt, initialvalue=default, parent=parent)
+        return result
+    
+    def create_file_dialog(self, parent, title: str, filetypes: list = None) -> Optional[str]:
+        """Create a file dialog"""
+        import tkinter as tk
+        from tkinter import filedialog
+        
+        if filetypes is None:
+            filetypes = [("Image files", "*.jpg *.jpeg *.png *.gif *.bmp *.tiff")]
+        
+        result = filedialog.askopenfilename(title=title, filetypes=filetypes, parent=parent)
+        return result if result else None
+    
+    def create_directory_dialog(self, parent, title: str) -> Optional[str]:
+        """Create a directory dialog"""
+        import tkinter as tk
+        from tkinter import filedialog
+        
+        result = filedialog.askdirectory(title=title, parent=parent)
+        return result if result else None
+    
+    def cleanup_temp_files(self, file_paths: list):
+        """Clean up temporary files"""
+        for file_path in file_paths:
+            try:
+                if os.path.exists(file_path):
+                    os.remove(file_path)
+            except:
+                pass  # Ignore cleanup errors
--- a/photo_management.py
+++ b/photo_management.py
@ -0,0 +1,217 @@
+#!/usr/bin/env python3
+"""
+Photo scanning, metadata extraction, and file operations for PunimTag
+"""
+
+import os
+from pathlib import Path
+from PIL import Image
+from datetime import datetime
+from typing import Optional, List, Tuple
+
+from config import SUPPORTED_IMAGE_FORMATS
+from database import DatabaseManager
+
+
+class PhotoManager:
+    """Handles photo scanning, metadata extraction, and file operations"""
+    
+    def __init__(self, db_manager: DatabaseManager, verbose: int = 0):
+        """Initialize photo manager"""
+        self.db = db_manager
+        self.verbose = verbose
+    
+    def extract_photo_date(self, photo_path: str) -> Optional[str]:
+        """Extract date taken from photo EXIF data"""
+        try:
+            with Image.open(photo_path) as image:
+                exifdata = image.getexif()
+                
+                # Look for date taken in EXIF tags
+                date_tags = [
+                    306,  # DateTime
+                    36867,  # DateTimeOriginal
+                    36868,  # DateTimeDigitized
+                ]
+                
+                for tag_id in date_tags:
+                    if tag_id in exifdata:
+                        date_str = exifdata[tag_id]
+                        if date_str:
+                            # Parse EXIF date format (YYYY:MM:DD HH:MM:SS)
+                            try:
+                                date_obj = datetime.strptime(date_str, '%Y:%m:%d %H:%M:%S')
+                                return date_obj.strftime('%Y-%m-%d')
+                            except ValueError:
+                                # Try alternative format
+                                try:
+                                    date_obj = datetime.strptime(date_str, '%Y-%m-%d %H:%M:%S')
+                                    return date_obj.strftime('%Y-%m-%d')
+                                except ValueError:
+                                    continue
+                
+                return None
+        except Exception as e:
+            if self.verbose >= 2:
+                print(f"   ⚠️  Could not extract date from {os.path.basename(photo_path)}: {e}")
+            return None
+    
+    def scan_folder(self, folder_path: str, recursive: bool = True) -> int:
+        """Scan folder for photos and add to database"""
+        if not os.path.exists(folder_path):
+            print(f"❌ Folder not found: {folder_path}")
+            return 0
+        
+        found_photos = []
+        
+        if recursive:
+            for root, dirs, files in os.walk(folder_path):
+                for file in files:
+                    file_ext = Path(file).suffix.lower()
+                    if file_ext in SUPPORTED_IMAGE_FORMATS:
+                        photo_path = os.path.join(root, file)
+                        found_photos.append((photo_path, file))
+        else:
+            for file in os.listdir(folder_path):
+                file_ext = Path(file).suffix.lower()
+                if file_ext in SUPPORTED_IMAGE_FORMATS:
+                    photo_path = os.path.join(folder_path, file)
+                    found_photos.append((photo_path, file))
+        
+        if not found_photos:
+            print(f"📁 No photos found in {folder_path}")
+            return 0
+        
+        # Add to database
+        added_count = 0
+        existing_count = 0
+        
+        for photo_path, filename in found_photos:
+            try:
+                # Extract date taken from EXIF data
+                date_taken = self.extract_photo_date(photo_path)
+                
+                # Add photo to database
+                photo_id = self.db.add_photo(photo_path, filename, date_taken)
+                if photo_id:
+                    # New photo was added
+                    added_count += 1
+                    if self.verbose >= 2:
+                        date_info = f" (taken: {date_taken})" if date_taken else " (no date)"
+                        print(f"   📸 Added: {filename}{date_info}")
+                else:
+                    # Photo already exists
+                    existing_count += 1
+                    if self.verbose >= 2:
+                        print(f"   📸 Already exists: {filename}")
+            except Exception as e:
+                print(f"⚠️  Error adding {filename}: {e}")
+        
+        # Print summary
+        if added_count > 0 and existing_count > 0:
+            print(f"📁 Found {len(found_photos)} photos: {added_count} new, {existing_count} already in database")
+        elif added_count > 0:
+            print(f"📁 Found {len(found_photos)} photos, added {added_count} new photos")
+        elif existing_count > 0:
+            print(f"📁 Found {len(found_photos)} photos, all already in database")
+        else:
+            print(f"📁 Found {len(found_photos)} photos, none could be added")
+        
+        return added_count
+    
+    def get_photo_info(self, photo_id: int) -> Optional[Tuple]:
+        """Get photo information by ID"""
+        photos = self.db.get_photos_by_pattern(limit=1000)  # Get all photos
+        for photo in photos:
+            if photo[0] == photo_id:  # photo[0] is the ID
+                return photo
+        return None
+    
+    def get_photo_path(self, photo_id: int) -> Optional[str]:
+        """Get photo path by ID"""
+        photo_info = self.get_photo_info(photo_id)
+        return photo_info[1] if photo_info else None  # photo[1] is the path
+    
+    def get_photo_filename(self, photo_id: int) -> Optional[str]:
+        """Get photo filename by ID"""
+        photo_info = self.get_photo_info(photo_id)
+        return photo_info[2] if photo_info else None  # photo[2] is the filename
+    
+    def is_photo_processed(self, photo_id: int) -> bool:
+        """Check if photo has been processed for faces"""
+        photo_info = self.get_photo_info(photo_id)
+        return photo_info[4] if photo_info else False  # photo[4] is the processed flag
+    
+    def mark_photo_processed(self, photo_id: int):
+        """Mark a photo as processed"""
+        self.db.mark_photo_processed(photo_id)
+    
+    def get_photos_by_date_range(self, date_from: str = None, date_to: str = None) -> List[Tuple]:
+        """Get photos within a date range"""
+        # This would need to be implemented in the database module
+        # For now, return all photos
+        return self.db.get_photos_by_pattern()
+    
+    def get_photos_by_pattern(self, pattern: str = None, limit: int = 10) -> List[Tuple]:
+        """Get photos matching a pattern"""
+        return self.db.get_photos_by_pattern(pattern, limit)
+    
+    def validate_photo_file(self, photo_path: str) -> bool:
+        """Validate that a photo file exists and is readable"""
+        if not os.path.exists(photo_path):
+            return False
+        
+        try:
+            with Image.open(photo_path) as image:
+                image.verify()
+            return True
+        except Exception:
+            return False
+    
+    def get_photo_dimensions(self, photo_path: str) -> Optional[Tuple[int, int]]:
+        """Get photo dimensions (width, height)"""
+        try:
+            with Image.open(photo_path) as image:
+                return image.size
+        except Exception:
+            return None
+    
+    def get_photo_format(self, photo_path: str) -> Optional[str]:
+        """Get photo format"""
+        try:
+            with Image.open(photo_path) as image:
+                return image.format
+        except Exception:
+            return None
+    
+    def get_photo_exif_data(self, photo_path: str) -> dict:
+        """Get EXIF data from photo"""
+        try:
+            with Image.open(photo_path) as image:
+                exifdata = image.getexif()
+                return dict(exifdata)
+        except Exception:
+            return {}
+    
+    def get_photo_file_size(self, photo_path: str) -> Optional[int]:
+        """Get photo file size in bytes"""
+        try:
+            return os.path.getsize(photo_path)
+        except Exception:
+            return None
+    
+    def get_photo_creation_time(self, photo_path: str) -> Optional[datetime]:
+        """Get photo file creation time"""
+        try:
+            timestamp = os.path.getctime(photo_path)
+            return datetime.fromtimestamp(timestamp)
+        except Exception:
+            return None
+    
+    def get_photo_modification_time(self, photo_path: str) -> Optional[datetime]:
+        """Get photo file modification time"""
+        try:
+            timestamp = os.path.getmtime(photo_path)
+            return datetime.fromtimestamp(timestamp)
+        except Exception:
+            return None
--- a/photo_tagger.py
+++ b/photo_tagger.py
--- a/photo_tagger_original_backup.py
+++ b/photo_tagger_original_backup.py
--- a/photo_tagger_refactored.py
+++ b/photo_tagger_refactored.py
@ -0,0 +1,364 @@
+#!/usr/bin/env python3
+"""
+PunimTag CLI - Minimal Photo Face Tagger (Refactored)
+Simple command-line tool for face recognition and photo tagging
+"""
+
+import os
+import sys
+import argparse
+import threading
+from typing import List, Dict, Tuple, Optional
+
+# Import our new modules
+from config import (
+    DEFAULT_DB_PATH, DEFAULT_FACE_DETECTION_MODEL, DEFAULT_FACE_TOLERANCE,
+    DEFAULT_BATCH_SIZE, DEFAULT_PROCESSING_LIMIT
+)
+from database import DatabaseManager
+from face_processing import FaceProcessor
+from photo_management import PhotoManager
+from tag_management import TagManager
+from search_stats import SearchStats
+from gui_core import GUICore
+
+
+class PhotoTagger:
+    """Main PhotoTagger class - orchestrates all functionality"""
+    
+    def __init__(self, db_path: str = DEFAULT_DB_PATH, verbose: int = 0, debug: bool = False):
+        """Initialize the photo tagger with database and all managers"""
+        self.db_path = db_path
+        self.verbose = verbose
+        self.debug = debug
+        
+        # Initialize all managers
+        self.db = DatabaseManager(db_path, verbose)
+        self.face_processor = FaceProcessor(self.db, verbose)
+        self.photo_manager = PhotoManager(self.db, verbose)
+        self.tag_manager = TagManager(self.db, verbose)
+        self.search_stats = SearchStats(self.db, verbose)
+        self.gui_core = GUICore()
+        
+        # Legacy compatibility - expose some methods directly
+        self._face_encoding_cache = {}
+        self._image_cache = {}
+        self._db_connection = None
+        self._db_lock = threading.Lock()
+    
+    def cleanup(self):
+        """Clean up resources and close connections"""
+        self.face_processor.cleanup_face_crops()
+        self.db.close_db_connection()
+    
+    # Database methods (delegated)
+    def get_db_connection(self):
+        """Get database connection (legacy compatibility)"""
+        return self.db.get_db_connection()
+    
+    def close_db_connection(self):
+        """Close database connection (legacy compatibility)"""
+        self.db.close_db_connection()
+    
+    def init_database(self):
+        """Initialize database (legacy compatibility)"""
+        self.db.init_database()
+    
+    # Photo management methods (delegated)
+    def scan_folder(self, folder_path: str, recursive: bool = True) -> int:
+        """Scan folder for photos and add to database"""
+        return self.photo_manager.scan_folder(folder_path, recursive)
+    
+    def _extract_photo_date(self, photo_path: str) -> Optional[str]:
+        """Extract date taken from photo EXIF data (legacy compatibility)"""
+        return self.photo_manager.extract_photo_date(photo_path)
+    
+    # Face processing methods (delegated)
+    def process_faces(self, limit: int = DEFAULT_PROCESSING_LIMIT, model: str = DEFAULT_FACE_DETECTION_MODEL) -> int:
+        """Process unprocessed photos for faces"""
+        return self.face_processor.process_faces(limit, model)
+    
+    def _extract_face_crop(self, photo_path: str, location: tuple, face_id: int) -> str:
+        """Extract and save individual face crop for identification (legacy compatibility)"""
+        return self.face_processor._extract_face_crop(photo_path, location, face_id)
+    
+    def _create_comparison_image(self, unid_crop_path: str, match_crop_path: str, person_name: str, confidence: float) -> str:
+        """Create a side-by-side comparison image (legacy compatibility)"""
+        return self.face_processor._create_comparison_image(unid_crop_path, match_crop_path, person_name, confidence)
+    
+    def _calculate_face_quality_score(self, image, face_location: tuple) -> float:
+        """Calculate face quality score (legacy compatibility)"""
+        return self.face_processor._calculate_face_quality_score(image, face_location)
+    
+    def _add_person_encoding(self, person_id: int, face_id: int, encoding, quality_score: float):
+        """Add a face encoding to a person's encoding collection (legacy compatibility)"""
+        self.face_processor.add_person_encoding(person_id, face_id, encoding, quality_score)
+    
+    def _get_person_encodings(self, person_id: int, min_quality: float = 0.3):
+        """Get all high-quality encodings for a person (legacy compatibility)"""
+        return self.face_processor.get_person_encodings(person_id, min_quality)
+    
+    def _update_person_encodings(self, person_id: int):
+        """Update person encodings when a face is identified (legacy compatibility)"""
+        self.face_processor.update_person_encodings(person_id)
+    
+    def _calculate_adaptive_tolerance(self, base_tolerance: float, face_quality: float, match_confidence: float = None) -> float:
+        """Calculate adaptive tolerance (legacy compatibility)"""
+        return self.face_processor._calculate_adaptive_tolerance(base_tolerance, face_quality, match_confidence)
+    
+    def _get_filtered_similar_faces(self, face_id: int, tolerance: float, include_same_photo: bool = False, face_status: dict = None):
+        """Get similar faces with filtering (legacy compatibility)"""
+        return self.face_processor._get_filtered_similar_faces(face_id, tolerance, include_same_photo, face_status)
+    
+    def _filter_unique_faces(self, faces: List[Dict]):
+        """Filter faces to show only unique ones (legacy compatibility)"""
+        return self.face_processor._filter_unique_faces(faces)
+    
+    def _filter_unique_faces_from_list(self, faces_list: List[tuple]):
+        """Filter face list to show only unique ones (legacy compatibility)"""
+        return self.face_processor._filter_unique_faces_from_list(faces_list)
+    
+    def find_similar_faces(self, face_id: int = None, tolerance: float = DEFAULT_FACE_TOLERANCE, include_same_photo: bool = False):
+        """Find similar faces across all photos"""
+        return self.face_processor.find_similar_faces(face_id, tolerance, include_same_photo)
+    
+    def auto_identify_matches(self, tolerance: float = DEFAULT_FACE_TOLERANCE, confirm: bool = True, show_faces: bool = False, include_same_photo: bool = False) -> int:
+        """Automatically identify faces that match already identified faces"""
+        # This would need to be implemented in the face_processing module
+        # For now, return 0
+        print("⚠️  Auto-identify matches not yet implemented in refactored version")
+        return 0
+    
+    # Tag management methods (delegated)
+    def add_tags(self, photo_pattern: str = None, batch_size: int = DEFAULT_BATCH_SIZE) -> int:
+        """Add custom tags to photos"""
+        return self.tag_manager.add_tags_to_photos(photo_pattern, batch_size)
+    
+    def _deduplicate_tags(self, tag_list):
+        """Remove duplicate tags from a list (legacy compatibility)"""
+        return self.tag_manager.deduplicate_tags(tag_list)
+    
+    def _parse_tags_string(self, tags_string):
+        """Parse a comma-separated tags string (legacy compatibility)"""
+        return self.tag_manager.parse_tags_string(tags_string)
+    
+    def _get_tag_id_by_name(self, tag_name, tag_name_to_id_map):
+        """Get tag ID by name (legacy compatibility)"""
+        return self.db.get_tag_id_by_name(tag_name, tag_name_to_id_map)
+    
+    def _get_tag_name_by_id(self, tag_id, tag_id_to_name_map):
+        """Get tag name by ID (legacy compatibility)"""
+        return self.db.get_tag_name_by_id(tag_id, tag_id_to_name_map)
+    
+    def _load_tag_mappings(self):
+        """Load tag name to ID and ID to name mappings (legacy compatibility)"""
+        return self.db.load_tag_mappings()
+    
+    def _get_existing_tag_ids_for_photo(self, photo_id):
+        """Get list of tag IDs for a photo (legacy compatibility)"""
+        return self.db.get_existing_tag_ids_for_photo(photo_id)
+    
+    def _show_people_list(self, cursor=None):
+        """Show list of people in database (legacy compatibility)"""
+        return self.db.show_people_list(cursor)
+    
+    # Search and statistics methods (delegated)
+    def search_faces(self, person_name: str):
+        """Search for photos containing a specific person"""
+        return self.search_stats.search_faces(person_name)
+    
+    def stats(self):
+        """Show database statistics"""
+        return self.search_stats.print_statistics()
+    
+    # GUI methods (legacy compatibility - these would need to be implemented)
+    def identify_faces(self, batch_size: int = DEFAULT_BATCH_SIZE, show_faces: bool = False, tolerance: float = DEFAULT_FACE_TOLERANCE, 
+                      date_from: str = None, date_to: str = None, date_processed_from: str = None, date_processed_to: str = None) -> int:
+        """Interactive face identification with GUI"""
+        print("⚠️  Face identification GUI not yet implemented in refactored version")
+        return 0
+    
+    def tag_management(self) -> int:
+        """Tag management GUI"""
+        print("⚠️  Tag management GUI not yet implemented in refactored version")
+        return 0
+    
+    def modifyidentified(self) -> int:
+        """Modify identified faces GUI"""
+        print("⚠️  Face modification GUI not yet implemented in refactored version")
+        return 0
+    
+    def _setup_window_size_saving(self, root, config_file="gui_config.json"):
+        """Set up window size saving functionality (legacy compatibility)"""
+        return self.gui_core.setup_window_size_saving(root, config_file)
+    
+    def _display_similar_faces_in_panel(self, parent_frame, similar_faces_data, face_vars, face_images, face_crops, current_face_id=None, face_selection_states=None, data_cache=None):
+        """Display similar faces in panel (legacy compatibility)"""
+        print("⚠️  Similar faces panel not yet implemented in refactored version")
+        return None
+    
+    def _create_photo_icon(self, canvas, photo_path, icon_size=20, icon_x=None, icon_y=None, callback=None):
+        """Create a small photo icon on a canvas (legacy compatibility)"""
+        return self.gui_core.create_photo_icon(canvas, photo_path, icon_size, icon_x, icon_y, callback)
+    
+    def _get_confidence_description(self, confidence_pct: float) -> str:
+        """Get human-readable confidence description (legacy compatibility)"""
+        return self.face_processor._get_confidence_description(confidence_pct)
+    
+    # Cache management (legacy compatibility)
+    def _clear_caches(self):
+        """Clear all caches to free memory (legacy compatibility)"""
+        self.face_processor._clear_caches()
+    
+    def _cleanup_face_crops(self, current_face_crop_path=None):
+        """Clean up face crop files and caches (legacy compatibility)"""
+        self.face_processor.cleanup_face_crops(current_face_crop_path)
+    
+    @property
+    def _face_encoding_cache(self):
+        """Face encoding cache (legacy compatibility)"""
+        return self.face_processor._face_encoding_cache
+    
+    @property
+    def _image_cache(self):
+        """Image cache (legacy compatibility)"""
+        return self.face_processor._image_cache
+
+
+def main():
+    """Main CLI interface"""
+    parser = argparse.ArgumentParser(
+        description="PunimTag CLI - Simple photo face tagger (Refactored)",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  photo_tagger_refactored.py scan /path/to/photos          # Scan folder for photos
+  photo_tagger_refactored.py process --limit 20            # Process 20 photos for faces  
+  photo_tagger_refactored.py identify --batch 10           # Identify 10 faces interactively
+  photo_tagger_refactored.py auto-match                    # Auto-identify matching faces
+  photo_tagger_refactored.py modifyidentified              # Show and Modify identified faces
+  photo_tagger_refactored.py match 15                      # Find faces similar to face ID 15
+  photo_tagger_refactored.py tag --pattern "vacation"      # Tag photos matching pattern
+  photo_tagger_refactored.py search "John"                 # Find photos with John
+  photo_tagger_refactored.py tag-manager                   # Open tag management GUI
+  photo_tagger_refactored.py stats                         # Show statistics
+        """
+    )
+    
+    parser.add_argument('command', 
+                       choices=['scan', 'process', 'identify', 'tag', 'search', 'stats', 'match', 'auto-match', 'modifyidentified', 'tag-manager'],
+                       help='Command to execute')
+    
+    parser.add_argument('target', nargs='?', 
+                       help='Target folder (scan), person name (search), or pattern (tag)')
+    
+    parser.add_argument('--db', default=DEFAULT_DB_PATH, 
+                       help=f'Database file path (default: {DEFAULT_DB_PATH})')
+    
+    parser.add_argument('--limit', type=int, default=DEFAULT_PROCESSING_LIMIT,
+                       help=f'Batch size limit for processing (default: {DEFAULT_PROCESSING_LIMIT})')
+    
+    parser.add_argument('--batch', type=int, default=DEFAULT_BATCH_SIZE,
+                       help=f'Batch size for identification (default: {DEFAULT_BATCH_SIZE})')
+    
+    parser.add_argument('--pattern', 
+                       help='Pattern for filtering photos when tagging')
+    
+    parser.add_argument('--model', choices=['hog', 'cnn'], default=DEFAULT_FACE_DETECTION_MODEL,
+                       help=f'Face detection model: hog (faster) or cnn (more accurate) (default: {DEFAULT_FACE_DETECTION_MODEL})')
+    
+    parser.add_argument('--recursive', action='store_true',
+                       help='Scan folders recursively')
+    
+    parser.add_argument('--show-faces', action='store_true',
+                       help='Show individual face crops during identification')
+    
+    parser.add_argument('--tolerance', type=float, default=DEFAULT_FACE_TOLERANCE,
+                       help=f'Face matching tolerance (0.0-1.0, lower = stricter, default: {DEFAULT_FACE_TOLERANCE})')
+    
+    parser.add_argument('--auto', action='store_true',
+                       help='Auto-identify high-confidence matches without confirmation')
+    
+    parser.add_argument('--include-twins', action='store_true',
+                       help='Include same-photo matching (for twins or multiple instances)')
+    
+    parser.add_argument('-v', '--verbose', action='count', default=0,
+                       help='Increase verbosity (-v, -vv, -vvv for more detail)')
+    
+    parser.add_argument('--debug', action='store_true',
+                       help='Enable line-by-line debugging with pdb')
+    
+    args = parser.parse_args()
+    
+    # Initialize tagger
+    tagger = PhotoTagger(args.db, args.verbose, args.debug)
+    
+    try:
+        if args.command == 'scan':
+            if not args.target:
+                print("❌ Please specify a folder to scan")
+                return 1
+            tagger.scan_folder(args.target, args.recursive)
+        
+        elif args.command == 'process':
+            tagger.process_faces(args.limit, args.model)
+        
+        elif args.command == 'identify':
+            show_faces = getattr(args, 'show_faces', False)
+            tagger.identify_faces(args.batch, show_faces, args.tolerance)
+        
+        elif args.command == 'tag':
+            tagger.add_tags(args.pattern or args.target, args.batch)
+        
+        elif args.command == 'search':
+            if not args.target:
+                print("❌ Please specify a person name to search for")
+                return 1
+            tagger.search_faces(args.target)
+        
+        elif args.command == 'stats':
+            tagger.stats()
+        
+        elif args.command == 'match':
+            if args.target and args.target.isdigit():
+                face_id = int(args.target)
+                matches = tagger.find_similar_faces(face_id, args.tolerance)
+                if matches:
+                    print(f"\n🎯 Found {len(matches)} similar faces:")
+                    for match in matches:
+                        person_name = "Unknown" if match.get('person_id') is None else f"Person ID {match.get('person_id')}"
+                        print(f"   📸 {match.get('filename', 'Unknown')} - {person_name} (confidence: {(1-match.get('distance', 1)):.1%})")
+                else:
+                    print("🔍 No similar faces found")
+            else:
+                print("❌ Please specify a face ID number to find matches for")
+        
+        elif args.command == 'auto-match':
+            show_faces = getattr(args, 'show_faces', False)
+            include_twins = getattr(args, 'include_twins', False)
+            tagger.auto_identify_matches(args.tolerance, not args.auto, show_faces, include_twins)
+        
+        elif args.command == 'modifyidentified':
+            tagger.modifyidentified()
+        
+        elif args.command == 'tag-manager':
+            tagger.tag_management()
+        
+        return 0
+        
+    except KeyboardInterrupt:
+        print("\n\n⚠️  Interrupted by user")
+        return 1
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        if args.debug:
+            import traceback
+            traceback.print_exc()
+        return 1
+    finally:
+        # Always cleanup resources
+        tagger.cleanup()
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/search_stats.py
+++ b/search_stats.py
@ -0,0 +1,267 @@
+#!/usr/bin/env python3
+"""
+Search functionality and statistics for PunimTag
+"""
+
+from typing import List, Dict, Tuple, Optional
+
+from database import DatabaseManager
+
+
+class SearchStats:
+    """Handles search functionality and statistics generation"""
+    
+    def __init__(self, db_manager: DatabaseManager, verbose: int = 0):
+        """Initialize search and stats manager"""
+        self.db = db_manager
+        self.verbose = verbose
+    
+    def search_faces(self, person_name: str) -> List[str]:
+        """Search for photos containing a specific person"""
+        # Get all people matching the name
+        people = self.db.show_people_list()
+        matching_people = []
+        
+        for person in people:
+            person_id, first_name, last_name, middle_name, maiden_name, date_of_birth, created_date = person
+            full_name = f"{first_name} {last_name}".lower()
+            search_name = person_name.lower()
+            
+            # Check if search term matches any part of the name
+            if (search_name in full_name or 
+                search_name in first_name.lower() or 
+                search_name in last_name.lower() or
+                (middle_name and search_name in middle_name.lower()) or
+                (maiden_name and search_name in maiden_name.lower())):
+                matching_people.append(person_id)
+        
+        if not matching_people:
+            print(f"❌ No people found matching '{person_name}'")
+            return []
+        
+        # Get photos for matching people
+        photo_paths = []
+        for person_id in matching_people:
+            # This would need to be implemented in the database module
+            # For now, we'll use a placeholder
+            pass
+        
+        if photo_paths:
+            print(f"🔍 Found {len(photo_paths)} photos with '{person_name}':")
+            for path in photo_paths:
+                print(f"   📸 {path}")
+        else:
+            print(f"❌ No photos found for '{person_name}'")
+        
+        return photo_paths
+    
+    def get_statistics(self) -> Dict:
+        """Get comprehensive database statistics"""
+        stats = self.db.get_statistics()
+        
+        # Add calculated statistics
+        if stats['total_photos'] > 0:
+            stats['processing_percentage'] = (stats['processed_photos'] / stats['total_photos']) * 100
+        else:
+            stats['processing_percentage'] = 0
+        
+        if stats['total_faces'] > 0:
+            stats['identification_percentage'] = (stats['identified_faces'] / stats['total_faces']) * 100
+        else:
+            stats['identification_percentage'] = 0
+        
+        if stats['total_people'] > 0:
+            stats['faces_per_person'] = stats['identified_faces'] / stats['total_people']
+        else:
+            stats['faces_per_person'] = 0
+        
+        if stats['total_photos'] > 0:
+            stats['faces_per_photo'] = stats['total_faces'] / stats['total_photos']
+        else:
+            stats['faces_per_photo'] = 0
+        
+        if stats['total_photos'] > 0:
+            stats['tags_per_photo'] = stats['total_photo_tags'] / stats['total_photos']
+        else:
+            stats['tags_per_photo'] = 0
+        
+        return stats
+    
+    def print_statistics(self):
+        """Print formatted statistics to console"""
+        stats = self.get_statistics()
+        
+        print("\n📊 PunimTag Database Statistics")
+        print("=" * 50)
+        
+        print(f"📸 Photos:")
+        print(f"   Total photos: {stats['total_photos']}")
+        print(f"   Processed: {stats['processed_photos']} ({stats['processing_percentage']:.1f}%)")
+        print(f"   Unprocessed: {stats['total_photos'] - stats['processed_photos']}")
+        
+        print(f"\n👤 Faces:")
+        print(f"   Total faces: {stats['total_faces']}")
+        print(f"   Identified: {stats['identified_faces']} ({stats['identification_percentage']:.1f}%)")
+        print(f"   Unidentified: {stats['unidentified_faces']}")
+        
+        print(f"\n👥 People:")
+        print(f"   Total people: {stats['total_people']}")
+        print(f"   Average faces per person: {stats['faces_per_person']:.1f}")
+        
+        print(f"\n🏷️  Tags:")
+        print(f"   Total tags: {stats['total_tags']}")
+        print(f"   Total photo-tag links: {stats['total_photo_tags']}")
+        print(f"   Average tags per photo: {stats['tags_per_photo']:.1f}")
+        
+        print(f"\n📈 Averages:")
+        print(f"   Faces per photo: {stats['faces_per_photo']:.1f}")
+        print(f"   Tags per photo: {stats['tags_per_photo']:.1f}")
+        
+        print("=" * 50)
+    
+    def get_photo_statistics(self) -> Dict:
+        """Get detailed photo statistics"""
+        stats = self.get_statistics()
+        
+        # This could be expanded with more detailed photo analysis
+        return {
+            'total_photos': stats['total_photos'],
+            'processed_photos': stats['processed_photos'],
+            'unprocessed_photos': stats['total_photos'] - stats['processed_photos'],
+            'processing_percentage': stats['processing_percentage']
+        }
+    
+    def get_face_statistics(self) -> Dict:
+        """Get detailed face statistics"""
+        stats = self.get_statistics()
+        
+        return {
+            'total_faces': stats['total_faces'],
+            'identified_faces': stats['identified_faces'],
+            'unidentified_faces': stats['unidentified_faces'],
+            'identification_percentage': stats['identification_percentage'],
+            'faces_per_photo': stats['faces_per_photo']
+        }
+    
+    def get_people_statistics(self) -> Dict:
+        """Get detailed people statistics"""
+        stats = self.get_statistics()
+        
+        return {
+            'total_people': stats['total_people'],
+            'faces_per_person': stats['faces_per_person']
+        }
+    
+    def get_tag_statistics(self) -> Dict:
+        """Get detailed tag statistics"""
+        stats = self.get_statistics()
+        
+        return {
+            'total_tags': stats['total_tags'],
+            'total_photo_tags': stats['total_photo_tags'],
+            'tags_per_photo': stats['tags_per_photo']
+        }
+    
+    def search_photos_by_date(self, date_from: str = None, date_to: str = None) -> List[Tuple]:
+        """Search photos by date range"""
+        # This would need to be implemented in the database module
+        # For now, return empty list
+        return []
+    
+    def search_photos_by_tags(self, tags: List[str], match_all: bool = False) -> List[Tuple]:
+        """Search photos by tags"""
+        # This would need to be implemented in the database module
+        # For now, return empty list
+        return []
+    
+    def search_photos_by_people(self, people: List[str]) -> List[Tuple]:
+        """Search photos by people"""
+        # This would need to be implemented in the database module
+        # For now, return empty list
+        return []
+    
+    def get_most_common_tags(self, limit: int = 10) -> List[Tuple[str, int]]:
+        """Get most commonly used tags"""
+        # This would need to be implemented in the database module
+        # For now, return empty list
+        return []
+    
+    def get_most_photographed_people(self, limit: int = 10) -> List[Tuple[str, int]]:
+        """Get most photographed people"""
+        # This would need to be implemented in the database module
+        # For now, return empty list
+        return []
+    
+    def get_photos_without_faces(self) -> List[Tuple]:
+        """Get photos that have no detected faces"""
+        # This would need to be implemented in the database module
+        # For now, return empty list
+        return []
+    
+    def get_photos_without_tags(self) -> List[Tuple]:
+        """Get photos that have no tags"""
+        # This would need to be implemented in the database module
+        # For now, return empty list
+        return []
+    
+    def get_duplicate_faces(self, tolerance: float = 0.6) -> List[Dict]:
+        """Get potential duplicate faces (same person, different photos)"""
+        # This would need to be implemented using face matching
+        # For now, return empty list
+        return []
+    
+    def get_face_quality_distribution(self) -> Dict:
+        """Get distribution of face quality scores"""
+        # This would need to be implemented in the database module
+        # For now, return empty dict
+        return {}
+    
+    def get_processing_timeline(self) -> List[Tuple[str, int]]:
+        """Get timeline of photo processing (photos processed per day)"""
+        # This would need to be implemented in the database module
+        # For now, return empty list
+        return []
+    
+    def export_statistics(self, filename: str = "punimtag_stats.json"):
+        """Export statistics to a JSON file"""
+        import json
+        
+        stats = self.get_statistics()
+        
+        try:
+            with open(filename, 'w') as f:
+                json.dump(stats, f, indent=2)
+            print(f"✅ Statistics exported to {filename}")
+        except Exception as e:
+            print(f"❌ Error exporting statistics: {e}")
+    
+    def generate_report(self) -> str:
+        """Generate a text report of statistics"""
+        stats = self.get_statistics()
+        
+        report = f"""
+PunimTag Database Report
+Generated: {__import__('datetime').datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+
+PHOTO STATISTICS:
+- Total photos: {stats['total_photos']}
+- Processed: {stats['processed_photos']} ({stats['processing_percentage']:.1f}%)
+- Unprocessed: {stats['total_photos'] - stats['processed_photos']}
+
+FACE STATISTICS:
+- Total faces: {stats['total_faces']}
+- Identified: {stats['identified_faces']} ({stats['identification_percentage']:.1f}%)
+- Unidentified: {stats['unidentified_faces']}
+- Average faces per photo: {stats['faces_per_photo']:.1f}
+
+PEOPLE STATISTICS:
+- Total people: {stats['total_people']}
+- Average faces per person: {stats['faces_per_person']:.1f}
+
+TAG STATISTICS:
+- Total tags: {stats['total_tags']}
+- Total photo-tag links: {stats['total_photo_tags']}
+- Average tags per photo: {stats['tags_per_photo']:.1f}
+"""
+        
+        return report
--- a/tag_management.py
+++ b/tag_management.py
@ -0,0 +1,264 @@
+#!/usr/bin/env python3
+"""
+Tag management functionality for PunimTag
+"""
+
+from typing import List, Dict, Tuple, Optional
+
+from config import DEFAULT_BATCH_SIZE
+from database import DatabaseManager
+
+
+class TagManager:
+    """Handles photo tagging and tag management operations"""
+    
+    def __init__(self, db_manager: DatabaseManager, verbose: int = 0):
+        """Initialize tag manager"""
+        self.db = db_manager
+        self.verbose = verbose
+    
+    def deduplicate_tags(self, tag_list: List[str]) -> List[str]:
+        """Remove duplicate tags from a list while preserving order (case insensitive)"""
+        seen = set()
+        unique_tags = []
+        for tag in tag_list:
+            if tag.lower() not in seen:
+                seen.add(tag.lower())
+                unique_tags.append(tag)
+        return unique_tags
+    
+    def parse_tags_string(self, tags_string: str) -> List[str]:
+        """Parse a comma-separated tags string into a list, handling empty strings and whitespace"""
+        if not tags_string or tags_string.strip() == "":
+            return []
+        # Split by comma and strip whitespace from each tag
+        tags = [tag.strip() for tag in tags_string.split(",")]
+        # Remove empty strings that might result from splitting
+        return [tag for tag in tags if tag]
+    
+    def add_tags_to_photos(self, photo_pattern: str = None, batch_size: int = DEFAULT_BATCH_SIZE) -> int:
+        """Add custom tags to photos via command line interface"""
+        if photo_pattern:
+            photos = self.db.get_photos_by_pattern(photo_pattern, batch_size)
+        else:
+            photos = self.db.get_photos_by_pattern(limit=batch_size)
+        
+        if not photos:
+            print("No photos found")
+            return 0
+        
+        print(f"🏷️  Tagging {len(photos)} photos (enter comma-separated tags)")
+        tagged_count = 0
+        
+        for photo_id, photo_path, filename, date_taken, processed in photos:
+            print(f"\n📸 {filename}")
+            tags_input = input("🏷️  Tags: ").strip()
+            
+            if tags_input.lower() == 'q':
+                break
+            
+            if tags_input:
+                tags = self.parse_tags_string(tags_input)
+                tags = self.deduplicate_tags(tags)
+                
+                for tag_name in tags:
+                    # Add tag to database and get its ID
+                    tag_id = self.db.add_tag(tag_name)
+                    if tag_id:
+                        # Link photo to tag
+                        self.db.link_photo_tag(photo_id, tag_id)
+                
+                print(f"   ✅ Added {len(tags)} tags")
+                tagged_count += 1
+        
+        print(f"✅ Tagged {tagged_count} photos")
+        return tagged_count
+    
+    def add_tags_to_photo(self, photo_id: int, tags: List[str]) -> int:
+        """Add tags to a specific photo"""
+        if not tags:
+            return 0
+        
+        tags = self.deduplicate_tags(tags)
+        added_count = 0
+        
+        for tag_name in tags:
+            # Add tag to database and get its ID
+            tag_id = self.db.add_tag(tag_name)
+            if tag_id:
+                # Link photo to tag
+                self.db.link_photo_tag(photo_id, tag_id)
+                added_count += 1
+        
+        return added_count
+    
+    def remove_tags_from_photo(self, photo_id: int, tags: List[str]) -> int:
+        """Remove tags from a specific photo"""
+        if not tags:
+            return 0
+        
+        removed_count = 0
+        tag_id_to_name, tag_name_to_id = self.db.load_tag_mappings()
+        
+        for tag_name in tags:
+            if tag_name in tag_name_to_id:
+                tag_id = tag_name_to_id[tag_name]
+                self.db.unlink_photo_tag(photo_id, tag_id)
+                removed_count += 1
+        
+        return removed_count
+    
+    def get_photo_tags(self, photo_id: int) -> List[str]:
+        """Get all tags for a specific photo"""
+        tag_ids = self.db.get_existing_tag_ids_for_photo(photo_id)
+        tag_id_to_name, _ = self.db.load_tag_mappings()
+        
+        tags = []
+        for tag_id in tag_ids:
+            tag_name = self.db.get_tag_name_by_id(tag_id, tag_id_to_name)
+            tags.append(tag_name)
+        
+        return tags
+    
+    def get_all_tags(self) -> List[Tuple[int, str]]:
+        """Get all tags in the database"""
+        tag_id_to_name, _ = self.db.load_tag_mappings()
+        return [(tag_id, tag_name) for tag_id, tag_name in tag_id_to_name.items()]
+    
+    def get_photos_with_tag(self, tag_name: str) -> List[Tuple]:
+        """Get all photos that have a specific tag"""
+        tag_id_to_name, tag_name_to_id = self.db.load_tag_mappings()
+        
+        if tag_name not in tag_name_to_id:
+            return []
+        
+        tag_id = tag_name_to_id[tag_name]
+        
+        # This would need to be implemented in the database module
+        # For now, return empty list
+        return []
+    
+    def get_tag_statistics(self) -> Dict:
+        """Get tag usage statistics"""
+        tag_id_to_name, _ = self.db.load_tag_mappings()
+        stats = {
+            'total_tags': len(tag_id_to_name),
+            'tag_usage': {}
+        }
+        
+        # Count usage for each tag
+        for tag_id, tag_name in tag_id_to_name.items():
+            # This would need to be implemented in the database module
+            # For now, set usage to 0
+            stats['tag_usage'][tag_name] = 0
+        
+        return stats
+    
+    def delete_tag(self, tag_name: str) -> bool:
+        """Delete a tag from the database (and all its linkages)"""
+        tag_id_to_name, tag_name_to_id = self.db.load_tag_mappings()
+        
+        if tag_name not in tag_name_to_id:
+            return False
+        
+        tag_id = tag_name_to_id[tag_name]
+        
+        # This would need to be implemented in the database module
+        # For now, return False
+        return False
+    
+    def rename_tag(self, old_name: str, new_name: str) -> bool:
+        """Rename a tag"""
+        tag_id_to_name, tag_name_to_id = self.db.load_tag_mappings()
+        
+        if old_name not in tag_name_to_id:
+            return False
+        
+        if new_name in tag_name_to_id:
+            return False  # New name already exists
+        
+        tag_id = tag_name_to_id[old_name]
+        
+        # This would need to be implemented in the database module
+        # For now, return False
+        return False
+    
+    def merge_tags(self, source_tag: str, target_tag: str) -> bool:
+        """Merge one tag into another (move all linkages from source to target)"""
+        tag_id_to_name, tag_name_to_id = self.db.load_tag_mappings()
+        
+        if source_tag not in tag_name_to_id or target_tag not in tag_name_to_id:
+            return False
+        
+        source_tag_id = tag_name_to_id[source_tag]
+        target_tag_id = tag_name_to_id[target_tag]
+        
+        # This would need to be implemented in the database module
+        # For now, return False
+        return False
+    
+    def get_photos_by_tags(self, tags: List[str], match_all: bool = False) -> List[Tuple]:
+        """Get photos that have any (or all) of the specified tags"""
+        if not tags:
+            return []
+        
+        tag_id_to_name, tag_name_to_id = self.db.load_tag_mappings()
+        tag_ids = []
+        
+        for tag_name in tags:
+            if tag_name in tag_name_to_id:
+                tag_ids.append(tag_name_to_id[tag_name])
+        
+        if not tag_ids:
+            return []
+        
+        # This would need to be implemented in the database module
+        # For now, return empty list
+        return []
+    
+    def get_common_tags(self, photo_ids: List[int]) -> List[str]:
+        """Get tags that are common to all specified photos"""
+        if not photo_ids:
+            return []
+        
+        # Get tags for each photo
+        all_photo_tags = []
+        for photo_id in photo_ids:
+            tags = self.get_photo_tags(photo_id)
+            all_photo_tags.append(set(tags))
+        
+        if not all_photo_tags:
+            return []
+        
+        # Find intersection of all tag sets
+        common_tags = set.intersection(*all_photo_tags)
+        return list(common_tags)
+    
+    def get_suggested_tags(self, photo_id: int, limit: int = 5) -> List[str]:
+        """Get suggested tags based on similar photos"""
+        # This is a placeholder for tag suggestion logic
+        # Could be implemented based on:
+        # - Tags from photos in the same folder
+        # - Tags from photos taken on the same date
+        # - Most commonly used tags
+        # - Machine learning based suggestions
+        
+        return []
+    
+    def validate_tag_name(self, tag_name: str) -> Tuple[bool, str]:
+        """Validate a tag name and return (is_valid, error_message)"""
+        if not tag_name or not tag_name.strip():
+            return False, "Tag name cannot be empty"
+        
+        tag_name = tag_name.strip()
+        
+        if len(tag_name) > 50:
+            return False, "Tag name is too long (max 50 characters)"
+        
+        if ',' in tag_name:
+            return False, "Tag name cannot contain commas"
+        
+        if tag_name.lower() in ['all', 'none', 'untagged']:
+            return False, "Tag name is reserved"
+        
+        return True, ""