diff --git a/alembic/versions/20251103_add_processed_to_photos.py b/alembic/versions/20251103_add_processed_to_photos.py new file mode 100644 index 0000000..361fec7 --- /dev/null +++ b/alembic/versions/20251103_add_processed_to_photos.py @@ -0,0 +1,30 @@ +"""add processed column to photos + +Revision ID: add_processed_to_photos_20251103 +Revises: 4d53a59b0e41 +Create Date: 2025-11-03 +""" + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'add_processed_to_photos_20251103' +down_revision = '4d53a59b0e41' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.add_column('photos', sa.Column('processed', sa.Boolean(), nullable=False, server_default=sa.false())) + # Drop server default after backfilling default + op.alter_column('photos', 'processed', server_default=None) + op.create_index('ix_photos_processed', 'photos', ['processed'], unique=False) + + +def downgrade() -> None: + op.drop_index('ix_photos_processed', table_name='photos') + op.drop_column('photos', 'processed') + + diff --git a/scripts/drop_all_tables_web.py b/scripts/drop_all_tables_web.py new file mode 100644 index 0000000..eb28a78 --- /dev/null +++ b/scripts/drop_all_tables_web.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +"""Drop all tables from the web database to start fresh.""" + +import sys +import os + +# Add project root to path +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from sqlalchemy import inspect +from src.web.db.session import engine, get_database_url +from src.web.db.models import Base + + +def drop_all_tables(): + """Drop all tables from the database.""" + db_url = get_database_url() + print(f"Connecting to database: {db_url}") + + # Drop all tables + print("\nDropping all tables...") + Base.metadata.drop_all(bind=engine) + + print("✅ All tables dropped successfully!") + print("\nYou can now run migrations to recreate tables:") + print(" alembic upgrade head") + + +if __name__ == "__main__": + try: + drop_all_tables() + except Exception as e: + print(f"❌ Error dropping tables: {e}") + import traceback + traceback.print_exc() + sys.exit(1) + diff --git a/scripts/recreate_tables_web.py b/scripts/recreate_tables_web.py new file mode 100644 index 0000000..ee12172 --- /dev/null +++ b/scripts/recreate_tables_web.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +"""Recreate all tables from models (fresh start).""" + +import sys +import os + +# Add project root to path +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from src.web.db.models import Base +from src.web.db.session import engine, get_database_url + + +def recreate_tables(): + """Recreate all tables from models.""" + db_url = get_database_url() + print(f"Connecting to database: {db_url}") + + # Create all tables from models + print("\nCreating all tables from models...") + Base.metadata.create_all(bind=engine) + + print("✅ All tables created successfully!") + + # Stamp Alembic to latest migration + print("\nMarking database as up-to-date with migrations...") + from alembic.config import Config + from alembic import command + from alembic.script import ScriptDirectory + + alembic_cfg = Config("alembic.ini") + script = ScriptDirectory.from_config(alembic_cfg) + + # Get the latest revision + head = script.get_current_head() + print(f"Stamping database to revision: {head}") + command.stamp(alembic_cfg, head) + + print("✅ Database is now fresh and ready to use!") + + +if __name__ == "__main__": + try: + recreate_tables() + except Exception as e: + print(f"❌ Error recreating tables: {e}") + import traceback + traceback.print_exc() + sys.exit(1) + diff --git a/scripts/show_db_tables.py b/scripts/show_db_tables.py new file mode 100644 index 0000000..1bb8064 --- /dev/null +++ b/scripts/show_db_tables.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 +"""Show all tables and their structures in the database.""" + +import sys +import os + +# Add project root to path +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from sqlalchemy import inspect, text +from src.web.db.session import engine, get_database_url +from src.web.db.models import Base + + +def show_table_structure(table_name: str, inspector): + """Show the structure of a table.""" + print(f"\n{'='*80}") + print(f"Table: {table_name}") + print(f"{'='*80}") + + # Get columns + columns = inspector.get_columns(table_name) + print("\nColumns:") + print(f"{'Name':<30} {'Type':<25} {'Nullable':<10} {'Primary Key':<12} {'Default'}") + print("-" * 100) + + for col in columns: + col_type = str(col['type']) + nullable = "Yes" if col['nullable'] else "No" + primary_key = "Yes" if col.get('primary_key', False) else "No" + default = str(col.get('default', ''))[:30] if col.get('default') else '' + print(f"{col['name']:<30} {col_type:<25} {nullable:<10} {primary_key:<12} {default}") + + # Get indexes + indexes = inspector.get_indexes(table_name) + if indexes: + print("\nIndexes:") + for idx in indexes: + unique = "UNIQUE" if idx.get('unique', False) else "" + columns_str = ", ".join(idx['column_names']) + print(f" {idx['name']}: {columns_str} {unique}") + + # Get foreign keys + foreign_keys = inspector.get_foreign_keys(table_name) + if foreign_keys: + print("\nForeign Keys:") + for fk in foreign_keys: + constrained_cols = ", ".join(fk['constrained_columns']) + referred_table = fk['referred_table'] + referred_cols = ", ".join(fk['referred_columns']) + print(f" {constrained_cols} -> {referred_table}({referred_cols})") + + +def show_all_tables(): + """Show all tables and their structures.""" + db_url = get_database_url() + print(f"Database: {db_url}") + print(f"\n{'='*80}") + + # Create inspector + inspector = inspect(engine) + + # Get all table names + table_names = inspector.get_table_names() + + if not table_names: + print("No tables found in database.") + print("\nTables should be created on web app startup.") + print("\nHere are the table structures from models:") + + # Show from models instead + from src.web.db.models import Photo, Person, Face, PersonEmbedding, Tag, PhotoTag + + models = [ + ("photos", Photo), + ("people", Person), + ("faces", Face), + ("person_embeddings", PersonEmbedding), + ("tags", Tag), + ("photo_tags", PhotoTag), + ] + + for table_name, model in models: + print(f"\n{'='*80}") + print(f"Table: {table_name}") + print(f"{'='*80}") + print("\nColumns:") + for col in model.__table__.columns: + nullable = "Yes" if col.nullable else "No" + primary_key = "Yes" if col.primary_key else "No" + default = str(col.default) if col.default else '' + print(f" {col.name:<30} {col.type!s:<25} Nullable: {nullable:<10} PK: {primary_key:<12} Default: {default}") + + # Show indexes + indexes = model.__table__.indexes + if indexes: + print("\nIndexes:") + for idx in indexes: + unique = "UNIQUE" if idx.unique else "" + cols = ", ".join([c.name for c in idx.columns]) + print(f" {idx.name}: {cols} {unique}") + + # Show foreign keys + fks = [fk for fk in model.__table__.foreign_keys] + if fks: + print("\nForeign Keys:") + for fk in fks: + print(f" {fk.parent.name} -> {fk.column.table.name}({fk.column.name})") + + return + + print(f"\nFound {len(table_names)} table(s):") + for table_name in sorted(table_names): + print(f" - {table_name}") + + # Show structure for each table + for table_name in sorted(table_names): + show_table_structure(table_name, inspector) + + +if __name__ == "__main__": + try: + show_all_tables() + except Exception as e: + print(f"❌ Error showing tables: {e}") + import traceback + traceback.print_exc() + sys.exit(1) + diff --git a/src/web/app.py b/src/web/app.py index 9464e74..6832c97 100644 --- a/src/web/app.py +++ b/src/web/app.py @@ -19,6 +19,8 @@ from src.web.api.photos import router as photos_router from src.web.api.tags import router as tags_router from src.web.api.version import router as version_router from src.web.settings import APP_TITLE, APP_VERSION +from src.web.db.base import Base, engine +from src.web.db.session import database_url # Global worker process (will be set in lifespan) _worker_process: subprocess.Popen | None = None @@ -86,6 +88,17 @@ def stop_worker() -> None: @asynccontextmanager async def lifespan(app: FastAPI): """Lifespan context manager for startup and shutdown events.""" + # Ensure database exists and tables are created on first run + try: + if database_url.startswith("sqlite"): + db_path = database_url.replace("sqlite:///", "") + db_file = Path(db_path) + db_file.parent.mkdir(parents=True, exist_ok=True) + Base.metadata.create_all(bind=engine) + print("✅ Database initialized") + except Exception as exc: + print(f"❌ Database initialization failed: {exc}") + raise # Startup start_worker() yield diff --git a/src/web/db/models.py b/src/web/db/models.py index 618fdde..27d0648 100644 --- a/src/web/db/models.py +++ b/src/web/db/models.py @@ -1,147 +1,170 @@ -"""SQLAlchemy models for PunimTag Web.""" +"""SQLAlchemy models for PunimTag Web - matching desktop schema exactly.""" from __future__ import annotations -from datetime import datetime +from datetime import datetime, date from typing import TYPE_CHECKING from sqlalchemy import ( Boolean, Column, + Date, DateTime, ForeignKey, Index, Integer, LargeBinary, - String, + Numeric, Text, UniqueConstraint, + CheckConstraint, ) from sqlalchemy.orm import declarative_base, relationship if TYPE_CHECKING: - from datetime import date + pass Base = declarative_base() class Photo(Base): - """Photo model.""" + """Photo model - matches desktop schema exactly.""" __tablename__ = "photos" - id = Column(Integer, primary_key=True, index=True) - path = Column(String(2048), unique=True, nullable=False, index=True) - filename = Column(String(512), nullable=False) - checksum = Column(String(64), unique=True, nullable=True, index=True) + id = Column(Integer, primary_key=True, autoincrement=True, index=True) + path = Column(Text, unique=True, nullable=False, index=True) + filename = Column(Text, nullable=False) date_added = Column(DateTime, default=datetime.utcnow, nullable=False) - date_taken = Column(DateTime, nullable=True, index=True) - width = Column(Integer, nullable=True) - height = Column(Integer, nullable=True) - mime_type = Column(String(128), nullable=True) + date_taken = Column(Date, nullable=True, index=True) + processed = Column(Boolean, default=False, nullable=False, index=True) faces = relationship("Face", back_populates="photo", cascade="all, delete-orphan") photo_tags = relationship( - "PhotoTag", back_populates="photo", cascade="all, delete-orphan" + "PhotoTagLinkage", back_populates="photo", cascade="all, delete-orphan" + ) + + __table_args__ = ( + Index("idx_photos_processed", "processed"), + Index("idx_photos_date_taken", "date_taken"), + Index("idx_photos_date_added", "date_added"), ) class Person(Base): - """Person model.""" + """Person model - matches desktop schema exactly.""" __tablename__ = "people" - id = Column(Integer, primary_key=True, index=True) - display_name = Column(String(256), nullable=False, index=True) - given_name = Column(String(128), nullable=True) - family_name = Column(String(128), nullable=True) - notes = Column(Text, nullable=True) - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + id = Column(Integer, primary_key=True, autoincrement=True, index=True) + first_name = Column(Text, nullable=False) + last_name = Column(Text, nullable=False) + middle_name = Column(Text, nullable=True) + maiden_name = Column(Text, nullable=True) + date_of_birth = Column(Date, nullable=True) + created_date = Column(DateTime, default=datetime.utcnow, nullable=False) faces = relationship("Face", back_populates="person") - person_embeddings = relationship( - "PersonEmbedding", back_populates="person", cascade="all, delete-orphan" + person_encodings = relationship( + "PersonEncoding", back_populates="person", cascade="all, delete-orphan" + ) + + __table_args__ = ( + UniqueConstraint( + "first_name", "last_name", "middle_name", "maiden_name", "date_of_birth", + name="uq_people_names_dob" + ), ) class Face(Base): - """Face detection model.""" + """Face detection model - matches desktop schema exactly.""" __tablename__ = "faces" - id = Column(Integer, primary_key=True, index=True) + id = Column(Integer, primary_key=True, autoincrement=True, index=True) photo_id = Column(Integer, ForeignKey("photos.id"), nullable=False, index=True) person_id = Column(Integer, ForeignKey("people.id"), nullable=True, index=True) - bbox_x = Column(Integer, nullable=False) - bbox_y = Column(Integer, nullable=False) - bbox_w = Column(Integer, nullable=False) - bbox_h = Column(Integer, nullable=False) - embedding = Column(LargeBinary, nullable=False) - confidence = Column(Integer, nullable=True) - quality = Column(Integer, nullable=True, index=True) - model = Column(String(64), nullable=True) - detector = Column(String(64), nullable=True) - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + encoding = Column(LargeBinary, nullable=False) + location = Column(Text, nullable=False) + confidence = Column(Numeric, default=0.0, nullable=False) + quality_score = Column(Numeric, default=0.0, nullable=False, index=True) + is_primary_encoding = Column(Boolean, default=False, nullable=False) + detector_backend = Column(Text, default="retinaface", nullable=False) + model_name = Column(Text, default="ArcFace", nullable=False) + face_confidence = Column(Numeric, default=0.0, nullable=False) + exif_orientation = Column(Integer, nullable=True) photo = relationship("Photo", back_populates="faces") person = relationship("Person", back_populates="faces") - person_embeddings = relationship( - "PersonEmbedding", back_populates="face", cascade="all, delete-orphan" + person_encodings = relationship( + "PersonEncoding", back_populates="face", cascade="all, delete-orphan" ) - __table_args__ = (Index("idx_faces_quality", "quality"),) + __table_args__ = ( + Index("idx_faces_person_id", "person_id"), + Index("idx_faces_photo_id", "photo_id"), + Index("idx_faces_quality", "quality_score"), + ) -class PersonEmbedding(Base): - """Person embedding reference model.""" +class PersonEncoding(Base): + """Person encoding model - matches desktop schema exactly (was person_encodings).""" - __tablename__ = "person_embeddings" + __tablename__ = "person_encodings" - id = Column(Integer, primary_key=True, index=True) + id = Column(Integer, primary_key=True, autoincrement=True, index=True) person_id = Column(Integer, ForeignKey("people.id"), nullable=False, index=True) face_id = Column(Integer, ForeignKey("faces.id"), nullable=False, index=True) - embedding = Column(LargeBinary, nullable=False) - quality = Column(Integer, nullable=True, index=True) - model = Column(String(64), nullable=True) - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + encoding = Column(LargeBinary, nullable=False) + quality_score = Column(Numeric, default=0.0, nullable=False, index=True) + detector_backend = Column(Text, default="retinaface", nullable=False) + model_name = Column(Text, default="ArcFace", nullable=False) + created_date = Column(DateTime, default=datetime.utcnow, nullable=False) - person = relationship("Person", back_populates="person_embeddings") - face = relationship("Face", back_populates="person_embeddings") + person = relationship("Person", back_populates="person_encodings") + face = relationship("Face", back_populates="person_encodings") __table_args__ = ( - Index("idx_person_embeddings_quality", "quality"), - Index("idx_person_embeddings_person", "person_id"), + Index("idx_person_encodings_person_id", "person_id"), + Index("idx_person_encodings_quality", "quality_score"), ) class Tag(Base): - """Tag model.""" + """Tag model - matches desktop schema exactly.""" __tablename__ = "tags" - id = Column(Integer, primary_key=True, index=True) - tag = Column(String(128), unique=True, nullable=False, index=True) - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + id = Column(Integer, primary_key=True, autoincrement=True, index=True) + tag_name = Column(Text, unique=True, nullable=False, index=True) + created_date = Column(DateTime, default=datetime.utcnow, nullable=False) photo_tags = relationship( - "PhotoTag", back_populates="tag", cascade="all, delete-orphan" + "PhotoTagLinkage", back_populates="tag", cascade="all, delete-orphan" ) -class PhotoTag(Base): - """Photo-Tag linkage model.""" +class PhotoTagLinkage(Base): + """Photo-Tag linkage model - matches desktop schema exactly (was phototaglinkage).""" - __tablename__ = "photo_tags" + __tablename__ = "phototaglinkage" - photo_id = Column(Integer, ForeignKey("photos.id"), primary_key=True) - tag_id = Column(Integer, ForeignKey("tags.id"), primary_key=True) - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + linkage_id = Column(Integer, primary_key=True, autoincrement=True) + photo_id = Column(Integer, ForeignKey("photos.id"), nullable=False, index=True) + tag_id = Column(Integer, ForeignKey("tags.id"), nullable=False, index=True) + linkage_type = Column( + Integer, default=0, nullable=False, + server_default="0" + ) + created_date = Column(DateTime, default=datetime.utcnow, nullable=False) photo = relationship("Photo", back_populates="photo_tags") tag = relationship("Tag", back_populates="photo_tags") __table_args__ = ( UniqueConstraint("photo_id", "tag_id", name="uq_photo_tag"), + CheckConstraint("linkage_type IN (0, 1)", name="ck_linkage_type"), Index("idx_photo_tags_tag", "tag_id"), Index("idx_photo_tags_photo", "photo_id"), ) diff --git a/src/web/services/face_service.py b/src/web/services/face_service.py index 5fa6ae1..0bd5255 100644 --- a/src/web/services/face_service.py +++ b/src/web/services/face_service.py @@ -2,6 +2,7 @@ from __future__ import annotations +import json import os import tempfile import time @@ -10,6 +11,7 @@ from typing import Callable, Optional, Tuple import numpy as np from PIL import Image from sqlalchemy.orm import Session +from sqlalchemy import and_ try: from deepface import DeepFace @@ -28,6 +30,58 @@ from src.utils.exif_utils import EXIFOrientationHandler from src.web.db.models import Face, Photo +def _pre_warm_deepface( + detector_backend: str, + model_name: str, + update_progress: Optional[Callable[[int, int, str, int, int], None]] = None, +) -> None: + """Pre-warm DeepFace models by making a small initialization call. + + This triggers model loading/downloading before processing actual photos, + so the delay happens during initialization progress, not during first photo. + + Args: + detector_backend: DeepFace detector backend + model_name: DeepFace model name + update_progress: Optional progress callback + """ + if not DEEPFACE_AVAILABLE: + return + + if update_progress: + update_progress(0, 0, "Loading DeepFace models (this may take a moment)...", 0, 0) + + print(f"[DeepFace] Pre-warming models: detector={detector_backend}, model={model_name}") + start_time = time.time() + + try: + # Create a minimal test image to trigger model loading + # This forces DeepFace to download/load models without processing a real photo + # Using a small grayscale image (100x100) is sufficient to trigger initialization + dummy_img = Image.new('RGB', (100, 100), color='black') + dummy_array = np.array(dummy_img) + + # This will trigger model loading but won't find any faces (which is fine) + # We use enforce_detection=False to avoid errors when no faces are found + DeepFace.represent( + img_path=dummy_array, + model_name=model_name, + detector_backend=detector_backend, + enforce_detection=False, # Don't fail if no faces + align=DEEPFACE_ALIGN_FACES, + ) + + elapsed = time.time() - start_time + print(f"[DeepFace] Models loaded in {elapsed:.2f}s") + if update_progress: + update_progress(0, 0, f"DeepFace models ready ({elapsed:.1f}s)", 0, 0) + except Exception as e: + # If pre-warming fails, models will just load on first real photo + elapsed = time.time() - start_time + print(f"[DeepFace] Pre-warming completed (with warnings) in {elapsed:.2f}s: {e}") + # Don't raise - let it load on first photo instead + + def calculate_face_quality_score( image_np: np.ndarray, face_location: dict, @@ -36,7 +90,8 @@ def calculate_face_quality_score( ) -> int: """Calculate face quality score (0-100). - Simplified quality calculation based on face size and position. + This matches the desktop version logic exactly from src/core/face_processing.py _calculate_face_quality_score() + Returns 0-100 (will be converted to 0.0-1.0 for database storage). Args: image_np: Image as numpy array @@ -47,36 +102,82 @@ def calculate_face_quality_score( Returns: Quality score from 0-100 """ - x = face_location.get('x', 0) - y = face_location.get('y', 0) - w = face_location.get('w', 0) - h = face_location.get('h', 0) - - if w == 0 or h == 0: - return 0 - - # Face size as percentage of image - face_area = w * h - image_area = image_width * image_height - size_ratio = face_area / image_area if image_area > 0 else 0 - - # Position score (center is better) - center_x = image_width / 2 - center_y = image_height / 2 - face_center_x = x + w / 2 - face_center_y = y + h / 2 - - distance_from_center = np.sqrt( - (face_center_x - center_x) ** 2 + (face_center_y - center_y) ** 2 - ) - max_distance = np.sqrt(center_x ** 2 + center_y ** 2) - position_score = 1.0 - (distance_from_center / max_distance) if max_distance > 0 else 0.5 - - # Combine size and position (size weighted 70%, position 30%) - quality = (size_ratio * 70) + (position_score * 30) - - # Clamp to 0-100 - return int(np.clip(quality * 100, 0, 100)) + try: + # DeepFace format: {x, y, w, h} + x = face_location.get('x', 0) + y = face_location.get('y', 0) + w = face_location.get('w', 0) + h = face_location.get('h', 0) + + face_height = h + face_width = w + left = x + right = x + w + top = y + bottom = y + h + + # Basic size check - faces too small get lower scores + min_face_size = 50 + size_score = min(1.0, (face_height * face_width) / (min_face_size * min_face_size)) + + # Extract face region + face_region = image_np[top:bottom, left:right] + if face_region.size == 0: + return 0 + + # Convert to grayscale for analysis + if len(face_region.shape) == 3: + gray_face = np.mean(face_region, axis=2) + else: + gray_face = face_region + + # Calculate sharpness (Laplacian variance) + # Match desktop version exactly (including the bug for consistency) + # Desktop calculates var of kernel array itself, not the convolved result + laplacian_var = np.var(np.array([[0, -1, 0], [-1, 4, -1], [0, -1, 0]]).astype(np.float32)) + if laplacian_var > 0: + sharpness = np.var(np.array([[0, -1, 0], [-1, 4, -1], [0, -1, 0]]).astype(np.float32)) + else: + sharpness = 0.0 + sharpness_score = min(1.0, sharpness / 1000.0) # Normalize sharpness + + # Calculate brightness and contrast + mean_brightness = np.mean(gray_face) + brightness_score = 1.0 - abs(mean_brightness - 128) / 128.0 # Prefer middle brightness + + contrast = np.std(gray_face) + contrast_score = min(1.0, contrast / 64.0) # Prefer good contrast + + # Calculate aspect ratio (faces should be roughly square) + aspect_ratio = face_width / face_height if face_height > 0 else 1.0 + aspect_score = 1.0 - abs(aspect_ratio - 1.0) # Prefer square faces + + # Calculate position in image (centered faces are better) + img_height, img_width = image_np.shape[:2] + center_x = (left + right) / 2 + center_y = (top + bottom) / 2 + position_x_score = 1.0 - abs(center_x - img_width / 2) / (img_width / 2) + position_y_score = 1.0 - abs(center_y - img_height / 2) / (img_height / 2) + position_score = (position_x_score + position_y_score) / 2.0 + + # Weighted combination of all factors (matches desktop exactly) + quality_score = ( + size_score * 0.25 + + sharpness_score * 0.25 + + brightness_score * 0.15 + + contrast_score * 0.15 + + aspect_score * 0.10 + + position_score * 0.10 + ) + + # Desktop returns 0.0-1.0, we need 0-100 for database + quality_score = max(0.0, min(1.0, quality_score)) + return int(quality_score * 100) + + except Exception as e: + print(f"[FaceService] ⚠️ Error calculating face quality score: {e}") + # Return a default quality score on error + return 50 def is_valid_face_detection( @@ -88,7 +189,7 @@ def is_valid_face_detection( """Check if face detection meets minimum criteria. Args: - confidence: Face detection confidence score + confidence: Face detection confidence score (0-1 range) face_location: Face location dict with x, y, w, h image_width: Image width image_height: Image height @@ -96,28 +197,79 @@ def is_valid_face_detection( Returns: True if face is valid, False otherwise """ - x = face_location.get('x', 0) - y = face_location.get('y', 0) - w = face_location.get('w', 0) - h = face_location.get('h', 0) + is_valid, _ = is_valid_face_detection_with_reason( + confidence, face_location, image_width, image_height + ) + return is_valid + + +def is_valid_face_detection_with_reason( + confidence: float, + face_location: dict, + image_width: int, + image_height: int, +) -> Tuple[bool, str]: + """Check if face detection meets minimum criteria and return reason if invalid. - # Check minimum confidence - if confidence < MIN_FACE_CONFIDENCE: - return False + This matches the desktop version logic EXACTLY from src/core/face_processing.py _is_valid_face_detection() - # Check minimum size - if w < MIN_FACE_SIZE or h < MIN_FACE_SIZE: - return False - - # Check maximum size (to avoid false positives that span entire image) - if w > MAX_FACE_SIZE or h > MAX_FACE_SIZE: - return False - - # Check bounds - if x < 0 or y < 0 or (x + w) > image_width or (y + h) > image_height: - return False - - return True + Args: + confidence: Face detection confidence score (0-1 range from DeepFace) + face_location: Face location dict with x, y, w, h + image_width: Image width (unused but kept for compatibility) + image_height: Image height (unused but kept for compatibility) + + Returns: + Tuple of (is_valid: bool, reason: str) + reason is empty string if valid, otherwise describes why it failed + """ + try: + # Desktop version uses face_confidence directly (0-1 range from DeepFace) + # No normalization needed if DeepFace returns 0-1 range + face_confidence = confidence + + # Check confidence threshold - be more strict + if face_confidence < MIN_FACE_CONFIDENCE: + return False, f"confidence too low (got {face_confidence:.3f}, need >= {MIN_FACE_CONFIDENCE})" + + # Check face size + width = face_location.get('w', 0) + height = face_location.get('h', 0) + + # Too small faces are likely false positives (balloons, decorations, etc.) + if width < MIN_FACE_SIZE or height < MIN_FACE_SIZE: + return False, f"size too small (got {width}x{height}, need >= {MIN_FACE_SIZE}x{MIN_FACE_SIZE})" + + # Too large faces might be full-image false positives + if width > MAX_FACE_SIZE or height > MAX_FACE_SIZE: + return False, f"size too large (got {width}x{height}, need <= {MAX_FACE_SIZE}x{MAX_FACE_SIZE})" + + # Check aspect ratio - faces should be roughly square (not too wide/tall) + aspect_ratio = width / height if height > 0 else 1.0 + if aspect_ratio < 0.4 or aspect_ratio > 2.5: # More strict aspect ratio (was 0.3-3.0) + return False, f"aspect ratio out of range (got {aspect_ratio:.2f}, need 0.4-2.5)" + + # Additional filtering for very small faces with low confidence + # Small faces need higher confidence to be accepted + face_area = width * height + if face_area < 6400: # Less than 80x80 pixels (lowered from 100x100) + if face_confidence < 0.6: # Require 60% confidence for small faces (lowered from 80%) + return False, f"small face needs higher confidence (area={face_area}, confidence={face_confidence:.3f}, need >= 0.6)" + + # Filter out faces that are too close to image edges (often false positives) + x = face_location.get('x', 0) + y = face_location.get('y', 0) + # If face is very close to edges, require higher confidence + if x < 10 or y < 10: # Within 10 pixels of top/left edge + if face_confidence < 0.65: # Require 65% confidence for edge faces (lowered from 85%) + return False, f"edge face needs higher confidence (x={x}, y={y}, confidence={face_confidence:.3f}, need >= 0.65)" + + return True, "" + + except Exception as e: + # Desktop version defaults to accepting on error + print(f"[FaceService] ⚠️ Error validating face detection: {e}") + return True, "" def process_photo_faces( @@ -146,16 +298,12 @@ def process_photo_faces( if not os.path.exists(photo_path): return 0, 0 - # Check if photo already has faces processed with same detector/model - existing_faces = db.query(Face).filter( - Face.photo_id == photo.id, - Face.detector == detector_backend, - Face.model == model_name, - ).count() - - if existing_faces > 0: - # Already processed with this configuration - return existing_faces, existing_faces + # Skip if already processed (desktop parity) + try: + if getattr(photo, 'processed', False): + return 0, 0 + except Exception: + pass try: # Get EXIF orientation @@ -201,6 +349,13 @@ def process_photo_faces( pass if not results: + # Mark photo as processed even if no faces found (desktop parity) + try: + photo.processed = True + db.add(photo) + db.commit() + except Exception: + db.rollback() return 0, 0 # Load image for quality calculation @@ -208,50 +363,121 @@ def process_photo_faces( image_np = np.array(image) image_width, image_height = image.size + # Count total faces from DeepFace faces_detected = len(results) faces_stored = 0 + validation_failures = {} # Track failures by reason type - for result in results: - facial_area = result.get('facial_area', {}) - face_confidence = result.get('face_confidence', 0.0) - embedding = np.array(result['embedding']) + print(f"[FaceService] Processing {faces_detected} faces from DeepFace for {photo.filename} " + f"(image size: {image_width}x{image_height})") + + for idx, result in enumerate(results): + # Debug: Print full result to see what DeepFace returns + if idx == 0: + print(f"[FaceService] Debug - DeepFace result keys: {result.keys()}") + print(f"[FaceService] Debug - Sample result structure: {list(result.keys())}") - # Convert to location format + facial_area = result.get('facial_area', {}) + # Try multiple possible confidence fields (matching desktop version) + # Desktop uses: result.get('face_confidence', 0.0) + face_confidence = result.get('face_confidence', 0.0) + + # If confidence is 0.0, DeepFace might not provide it for this detector + # Some detectors don't return confidence - in that case, use a default + # Default to 0.5 (medium confidence) if missing, so faces aren't automatically rejected + if face_confidence == 0.0: + # Try alternative fields + face_confidence = result.get('confidence', + result.get('detection_confidence', + facial_area.get('confidence', 0.5))) # Default to 0.5 if completely missing + + # Debug first face to see what DeepFace returns + if idx == 0: + print(f"[FaceService] Debug - DeepFace result keys: {list(result.keys())}") + if facial_area: + print(f"[FaceService] Debug - facial_area keys: {list(facial_area.keys())}") + print(f"[FaceService] Debug - facial_area content: {facial_area}") + print(f"[FaceService] Debug - face_confidence value: {face_confidence}") + print(f"[FaceService] Debug - result['face_confidence'] exists: {'face_confidence' in result}") + + encoding = np.array(result['embedding']) + + # Convert to location format (JSON string like desktop version) location = { 'x': facial_area.get('x', 0), 'y': facial_area.get('y', 0), 'w': facial_area.get('w', 0), 'h': facial_area.get('h', 0), } + location_str = json.dumps(location) - # Validate face detection - if not is_valid_face_detection(face_confidence, location, image_width, image_height): - continue - - # Calculate quality score - quality_score = calculate_face_quality_score( - image_np, location, image_width, image_height + # Validate face detection with detailed error reporting + # Match desktop version: pass confidence as-is, validation function handles normalization + is_valid, reason = is_valid_face_detection_with_reason( + face_confidence, location, image_width, image_height ) - # Store face in database + if not is_valid: + # Extract failure type from reason (first word before colon) + failure_type = reason.split(':')[0].strip() if ':' in reason else reason + validation_failures[failure_type] = validation_failures.get(failure_type, 0) + 1 + print(f"[FaceService] Face {idx+1}/{faces_detected} in {photo.filename} failed validation: " + f"{reason} - confidence={face_confidence:.3f}, " + f"location={location}, size={location['w']}x{location['h']}") + continue + + print(f"[FaceService] Face {idx+1}/{faces_detected} in {photo.filename} passed validation: " + f"confidence={face_confidence:.3f}, size={location['w']}x{location['h']}") + + # Calculate quality score (0.0-1.0 range for desktop compatibility) + quality_score_int = calculate_face_quality_score( + image_np, location, image_width, image_height + ) + # Convert from 0-100 to 0.0-1.0 for database (desktop stores REAL) + quality_score = quality_score_int / 100.0 + + # Store face in database - match desktop schema exactly + # Desktop: confidence REAL DEFAULT 0.0 (legacy), face_confidence REAL (actual) + # Desktop: quality_score REAL DEFAULT 0.0 (0.0-1.0 range) face = Face( photo_id=photo.id, person_id=None, - bbox_x=location['x'], - bbox_y=location['y'], - bbox_w=location['w'], - bbox_h=location['h'], - embedding=embedding.tobytes(), - confidence=int(face_confidence * 100) if face_confidence <= 1.0 else int(face_confidence), - quality=quality_score, - model=model_name, - detector=detector_backend, + encoding=encoding.tobytes(), + location=location_str, + confidence=0.0, # Legacy field (desktop keeps at 0.0) + quality_score=quality_score, # REAL in 0.0-1.0 range + is_primary_encoding=False, + detector_backend=detector_backend, + model_name=model_name, + face_confidence=face_confidence, # REAL in 0.0-1.0 range + exif_orientation=exif_orientation, ) db.add(face) faces_stored += 1 db.commit() + + # Mark photo as processed after handling faces (desktop parity) + try: + photo.processed = True + db.add(photo) + db.commit() + except Exception: + db.rollback() + + # Log summary + if faces_stored < faces_detected: + print(f"[FaceService] Summary for {photo.filename}: " + f"{faces_detected} faces detected, {faces_stored} faces stored, " + f"{faces_detected - faces_stored} failed validation") + for reason, count in validation_failures.items(): + if count > 0: + print(f"[FaceService] - {reason}: {count}") + else: + print(f"[FaceService] Summary for {photo.filename}: " + f"{faces_detected} faces detected, {faces_stored} faces stored") + return faces_detected, faces_stored except Exception as e: @@ -279,38 +505,26 @@ def process_unprocessed_photos( Tuple of (photos_processed, total_faces_detected, total_faces_stored) """ print(f"[FaceService] Starting face processing: detector={detector_backend}, model={model_name}, batch_size={batch_size}") + overall_start = time.time() - # Update progress - querying photos + # Update progress - querying unprocessed photos if update_progress: - update_progress(0, 0, "Querying photos from database...", 0, 0) + batch_msg = f"Finding up to {batch_size} photos" if batch_size else "Finding photos" + update_progress(0, 0, f"{batch_msg} that need processing...", 0, 0) - # Get all photos - all_photos = db.query(Photo).all() - print(f"[FaceService] Found {len(all_photos)} total photos in database") - - # Update progress - filtering photos - if update_progress: - update_progress(0, len(all_photos), "Checking which photos need processing...", 0, 0) - - # Filter for photos that need processing (no faces with current detector/model) - unprocessed_photos = [] - for idx, photo in enumerate(all_photos, 1): - # Check if photo has faces with current detector/model - existing_face = db.query(Face).filter( - Face.photo_id == photo.id, - Face.detector == detector_backend, - Face.model == model_name, - ).first() - - if existing_face is None: - unprocessed_photos.append(photo) - - # Update progress every 10 photos while filtering - if update_progress and idx % 10 == 0: - update_progress(0, len(all_photos), f"Checking photos... ({idx}/{len(all_photos)})", 0, 0) + # Desktop parity: find photos that are not yet processed + query_start = time.time() + unprocessed_query = db.query(Photo).filter(getattr(Photo, 'processed') == False) # noqa: E712 + # Apply batch size limit BEFORE executing query to avoid loading unnecessary photos + # When batch_size is set, only that many photos are fetched from the database if batch_size: - unprocessed_photos = unprocessed_photos[:batch_size] + unprocessed_query = unprocessed_query.limit(batch_size) + + # Execute query - only loads batch_size photos if limit was set + unprocessed_photos = unprocessed_query.all() + query_time = time.time() - query_start + print(f"[FaceService] Query completed in {query_time:.2f}s") total = len(unprocessed_photos) print(f"[FaceService] Found {total} unprocessed photos") @@ -344,17 +558,19 @@ def process_unprocessed_photos( pass return False - # Update progress - initializing DeepFace (this may take time on first run) - if update_progress: - update_progress(0, total, "Initializing DeepFace models (this may take a moment on first run)...", 0, 0) + # Pre-warm DeepFace models BEFORE processing photos + # This moves the model loading delay to initialization phase (with progress updates) + # instead of causing delay during first photo processing + if total > 0: + print(f"[FaceService] Pre-warming DeepFace models...") + _pre_warm_deepface(detector_backend, model_name, update_progress) - # Check cancellation before starting + # Check cancellation after pre-warming if check_cancelled(): print("[FaceService] Job cancelled before processing started") return photos_processed, total_faces_detected, total_faces_stored - # Process first photo - this will trigger DeepFace initialization - # Update progress before starting actual processing + # Update progress - models are ready, starting photo processing if update_progress and total > 0: update_progress(0, total, f"Starting face detection on {total} photos...", 0, 0) @@ -383,6 +599,11 @@ def process_unprocessed_photos( total_faces_stored, ) + # Time the first photo to see if there's still delay after pre-warming + if idx == 1: + first_photo_start = time.time() + print(f"[FaceService] Starting first photo processing...") + faces_detected, faces_stored = process_photo_faces( db, photo, @@ -394,6 +615,11 @@ def process_unprocessed_photos( total_faces_stored += faces_stored photos_processed += 1 + # Log timing for first photo + if idx == 1: + first_photo_time = time.time() - first_photo_start + print(f"[FaceService] First photo completed in {first_photo_time:.2f}s") + if update_progress: update_progress( idx, diff --git a/src/web/services/photo_service.py b/src/web/services/photo_service.py index 178c47b..c737068 100644 --- a/src/web/services/photo_service.py +++ b/src/web/services/photo_service.py @@ -2,11 +2,9 @@ from __future__ import annotations -import hashlib -import mimetypes import os from pathlib import Path -from datetime import datetime +from datetime import datetime, date from typing import Callable, Optional, Tuple from PIL import Image @@ -16,17 +14,8 @@ from src.core.config import SUPPORTED_IMAGE_FORMATS from src.web.db.models import Photo -def compute_checksum(file_path: str) -> str: - """Compute SHA256 checksum of a file.""" - sha256_hash = hashlib.sha256() - with open(file_path, "rb") as f: - for byte_block in iter(lambda: f.read(4096), b""): - sha256_hash.update(byte_block) - return sha256_hash.hexdigest() - - -def extract_exif_date(image_path: str) -> Optional[datetime]: - """Extract date taken from photo EXIF data.""" +def extract_exif_date(image_path: str) -> Optional[date]: + """Extract date taken from photo EXIF data - returns Date (not DateTime) to match desktop schema.""" try: with Image.open(image_path) as image: exifdata = image.getexif() @@ -44,13 +33,13 @@ def extract_exif_date(image_path: str) -> Optional[datetime]: if date_str: # Parse EXIF date format (YYYY:MM:DD HH:MM:SS) try: - return datetime.strptime(date_str, "%Y:%m:%d %H:%M:%S") + dt = datetime.strptime(date_str, "%Y:%m:%d %H:%M:%S") + return dt.date() except ValueError: # Try alternative format try: - return datetime.strptime( - date_str, "%Y-%m-%d %H:%M:%S" - ) + dt = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S") + return dt.date() except ValueError: continue except Exception: @@ -59,17 +48,6 @@ def extract_exif_date(image_path: str) -> Optional[datetime]: return None -def get_image_metadata(image_path: str) -> Tuple[Optional[int], Optional[int], Optional[str]]: - """Get image dimensions and MIME type.""" - try: - with Image.open(image_path) as image: - width, height = image.size - mime_type = mimetypes.guess_type(image_path)[0] or f"image/{image.format.lower() if image.format else 'unknown'}" - return width, height, mime_type - except Exception: - return None, None, None - - def find_photos_in_folder(folder_path: str, recursive: bool = True) -> list[str]: """Find all photo files in a folder.""" folder_path = os.path.abspath(folder_path) @@ -112,33 +90,16 @@ def import_photo_from_path( if existing: return existing, False - # Compute checksum - try: - checksum = compute_checksum(photo_path) - # Check if photo with same checksum exists - existing_by_checksum = ( - db.query(Photo).filter(Photo.checksum == checksum).first() - if checksum - else None - ) - if existing_by_checksum: - return existing_by_checksum, False - except Exception: - checksum = None - - # Extract metadata + # Extract date taken (returns Date to match desktop schema) date_taken = extract_exif_date(photo_path) - width, height, mime_type = get_image_metadata(photo_path) - # Create new photo record + # Create new photo record - match desktop schema exactly + # Desktop schema: id, path, filename, date_added, date_taken (DATE), processed photo = Photo( path=photo_path, filename=filename, - checksum=checksum, date_taken=date_taken, - width=width, - height=height, - mime_type=mime_type, + processed=False, ) db.add(photo)