From 5174fe0d5444adb519ca122613d172faa12cdb8d Mon Sep 17 00:00:00 2001 From: tanyar09 Date: Mon, 3 Nov 2025 11:46:48 -0500 Subject: [PATCH] feat: Add database migration for processed column in photos and new utility scripts This commit introduces a new Alembic migration to add a 'processed' column to the 'photos' table, enhancing the database schema to track photo processing status. Additionally, it includes new utility scripts for dropping and recreating all tables in the web database, as well as a script to display all tables and their structures. These changes improve database management and facilitate a fresh start for the web application, ensuring alignment with the updated schema. --- .../20251103_add_processed_to_photos.py | 30 ++ scripts/drop_all_tables_web.py | 37 ++ scripts/recreate_tables_web.py | 50 ++ scripts/show_db_tables.py | 129 +++++ src/web/app.py | 13 + src/web/db/models.py | 145 +++--- src/web/services/face_service.py | 460 +++++++++++++----- src/web/services/photo_service.py | 61 +-- 8 files changed, 697 insertions(+), 228 deletions(-) create mode 100644 alembic/versions/20251103_add_processed_to_photos.py create mode 100644 scripts/drop_all_tables_web.py create mode 100644 scripts/recreate_tables_web.py create mode 100644 scripts/show_db_tables.py diff --git a/alembic/versions/20251103_add_processed_to_photos.py b/alembic/versions/20251103_add_processed_to_photos.py new file mode 100644 index 0000000..361fec7 --- /dev/null +++ b/alembic/versions/20251103_add_processed_to_photos.py @@ -0,0 +1,30 @@ +"""add processed column to photos + +Revision ID: add_processed_to_photos_20251103 +Revises: 4d53a59b0e41 +Create Date: 2025-11-03 +""" + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'add_processed_to_photos_20251103' +down_revision = '4d53a59b0e41' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.add_column('photos', sa.Column('processed', sa.Boolean(), nullable=False, server_default=sa.false())) + # Drop server default after backfilling default + op.alter_column('photos', 'processed', server_default=None) + op.create_index('ix_photos_processed', 'photos', ['processed'], unique=False) + + +def downgrade() -> None: + op.drop_index('ix_photos_processed', table_name='photos') + op.drop_column('photos', 'processed') + + diff --git a/scripts/drop_all_tables_web.py b/scripts/drop_all_tables_web.py new file mode 100644 index 0000000..eb28a78 --- /dev/null +++ b/scripts/drop_all_tables_web.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +"""Drop all tables from the web database to start fresh.""" + +import sys +import os + +# Add project root to path +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from sqlalchemy import inspect +from src.web.db.session import engine, get_database_url +from src.web.db.models import Base + + +def drop_all_tables(): + """Drop all tables from the database.""" + db_url = get_database_url() + print(f"Connecting to database: {db_url}") + + # Drop all tables + print("\nDropping all tables...") + Base.metadata.drop_all(bind=engine) + + print("✅ All tables dropped successfully!") + print("\nYou can now run migrations to recreate tables:") + print(" alembic upgrade head") + + +if __name__ == "__main__": + try: + drop_all_tables() + except Exception as e: + print(f"❌ Error dropping tables: {e}") + import traceback + traceback.print_exc() + sys.exit(1) + diff --git a/scripts/recreate_tables_web.py b/scripts/recreate_tables_web.py new file mode 100644 index 0000000..ee12172 --- /dev/null +++ b/scripts/recreate_tables_web.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +"""Recreate all tables from models (fresh start).""" + +import sys +import os + +# Add project root to path +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from src.web.db.models import Base +from src.web.db.session import engine, get_database_url + + +def recreate_tables(): + """Recreate all tables from models.""" + db_url = get_database_url() + print(f"Connecting to database: {db_url}") + + # Create all tables from models + print("\nCreating all tables from models...") + Base.metadata.create_all(bind=engine) + + print("✅ All tables created successfully!") + + # Stamp Alembic to latest migration + print("\nMarking database as up-to-date with migrations...") + from alembic.config import Config + from alembic import command + from alembic.script import ScriptDirectory + + alembic_cfg = Config("alembic.ini") + script = ScriptDirectory.from_config(alembic_cfg) + + # Get the latest revision + head = script.get_current_head() + print(f"Stamping database to revision: {head}") + command.stamp(alembic_cfg, head) + + print("✅ Database is now fresh and ready to use!") + + +if __name__ == "__main__": + try: + recreate_tables() + except Exception as e: + print(f"❌ Error recreating tables: {e}") + import traceback + traceback.print_exc() + sys.exit(1) + diff --git a/scripts/show_db_tables.py b/scripts/show_db_tables.py new file mode 100644 index 0000000..1bb8064 --- /dev/null +++ b/scripts/show_db_tables.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 +"""Show all tables and their structures in the database.""" + +import sys +import os + +# Add project root to path +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from sqlalchemy import inspect, text +from src.web.db.session import engine, get_database_url +from src.web.db.models import Base + + +def show_table_structure(table_name: str, inspector): + """Show the structure of a table.""" + print(f"\n{'='*80}") + print(f"Table: {table_name}") + print(f"{'='*80}") + + # Get columns + columns = inspector.get_columns(table_name) + print("\nColumns:") + print(f"{'Name':<30} {'Type':<25} {'Nullable':<10} {'Primary Key':<12} {'Default'}") + print("-" * 100) + + for col in columns: + col_type = str(col['type']) + nullable = "Yes" if col['nullable'] else "No" + primary_key = "Yes" if col.get('primary_key', False) else "No" + default = str(col.get('default', ''))[:30] if col.get('default') else '' + print(f"{col['name']:<30} {col_type:<25} {nullable:<10} {primary_key:<12} {default}") + + # Get indexes + indexes = inspector.get_indexes(table_name) + if indexes: + print("\nIndexes:") + for idx in indexes: + unique = "UNIQUE" if idx.get('unique', False) else "" + columns_str = ", ".join(idx['column_names']) + print(f" {idx['name']}: {columns_str} {unique}") + + # Get foreign keys + foreign_keys = inspector.get_foreign_keys(table_name) + if foreign_keys: + print("\nForeign Keys:") + for fk in foreign_keys: + constrained_cols = ", ".join(fk['constrained_columns']) + referred_table = fk['referred_table'] + referred_cols = ", ".join(fk['referred_columns']) + print(f" {constrained_cols} -> {referred_table}({referred_cols})") + + +def show_all_tables(): + """Show all tables and their structures.""" + db_url = get_database_url() + print(f"Database: {db_url}") + print(f"\n{'='*80}") + + # Create inspector + inspector = inspect(engine) + + # Get all table names + table_names = inspector.get_table_names() + + if not table_names: + print("No tables found in database.") + print("\nTables should be created on web app startup.") + print("\nHere are the table structures from models:") + + # Show from models instead + from src.web.db.models import Photo, Person, Face, PersonEmbedding, Tag, PhotoTag + + models = [ + ("photos", Photo), + ("people", Person), + ("faces", Face), + ("person_embeddings", PersonEmbedding), + ("tags", Tag), + ("photo_tags", PhotoTag), + ] + + for table_name, model in models: + print(f"\n{'='*80}") + print(f"Table: {table_name}") + print(f"{'='*80}") + print("\nColumns:") + for col in model.__table__.columns: + nullable = "Yes" if col.nullable else "No" + primary_key = "Yes" if col.primary_key else "No" + default = str(col.default) if col.default else '' + print(f" {col.name:<30} {col.type!s:<25} Nullable: {nullable:<10} PK: {primary_key:<12} Default: {default}") + + # Show indexes + indexes = model.__table__.indexes + if indexes: + print("\nIndexes:") + for idx in indexes: + unique = "UNIQUE" if idx.unique else "" + cols = ", ".join([c.name for c in idx.columns]) + print(f" {idx.name}: {cols} {unique}") + + # Show foreign keys + fks = [fk for fk in model.__table__.foreign_keys] + if fks: + print("\nForeign Keys:") + for fk in fks: + print(f" {fk.parent.name} -> {fk.column.table.name}({fk.column.name})") + + return + + print(f"\nFound {len(table_names)} table(s):") + for table_name in sorted(table_names): + print(f" - {table_name}") + + # Show structure for each table + for table_name in sorted(table_names): + show_table_structure(table_name, inspector) + + +if __name__ == "__main__": + try: + show_all_tables() + except Exception as e: + print(f"❌ Error showing tables: {e}") + import traceback + traceback.print_exc() + sys.exit(1) + diff --git a/src/web/app.py b/src/web/app.py index 9464e74..6832c97 100644 --- a/src/web/app.py +++ b/src/web/app.py @@ -19,6 +19,8 @@ from src.web.api.photos import router as photos_router from src.web.api.tags import router as tags_router from src.web.api.version import router as version_router from src.web.settings import APP_TITLE, APP_VERSION +from src.web.db.base import Base, engine +from src.web.db.session import database_url # Global worker process (will be set in lifespan) _worker_process: subprocess.Popen | None = None @@ -86,6 +88,17 @@ def stop_worker() -> None: @asynccontextmanager async def lifespan(app: FastAPI): """Lifespan context manager for startup and shutdown events.""" + # Ensure database exists and tables are created on first run + try: + if database_url.startswith("sqlite"): + db_path = database_url.replace("sqlite:///", "") + db_file = Path(db_path) + db_file.parent.mkdir(parents=True, exist_ok=True) + Base.metadata.create_all(bind=engine) + print("✅ Database initialized") + except Exception as exc: + print(f"❌ Database initialization failed: {exc}") + raise # Startup start_worker() yield diff --git a/src/web/db/models.py b/src/web/db/models.py index 618fdde..27d0648 100644 --- a/src/web/db/models.py +++ b/src/web/db/models.py @@ -1,147 +1,170 @@ -"""SQLAlchemy models for PunimTag Web.""" +"""SQLAlchemy models for PunimTag Web - matching desktop schema exactly.""" from __future__ import annotations -from datetime import datetime +from datetime import datetime, date from typing import TYPE_CHECKING from sqlalchemy import ( Boolean, Column, + Date, DateTime, ForeignKey, Index, Integer, LargeBinary, - String, + Numeric, Text, UniqueConstraint, + CheckConstraint, ) from sqlalchemy.orm import declarative_base, relationship if TYPE_CHECKING: - from datetime import date + pass Base = declarative_base() class Photo(Base): - """Photo model.""" + """Photo model - matches desktop schema exactly.""" __tablename__ = "photos" - id = Column(Integer, primary_key=True, index=True) - path = Column(String(2048), unique=True, nullable=False, index=True) - filename = Column(String(512), nullable=False) - checksum = Column(String(64), unique=True, nullable=True, index=True) + id = Column(Integer, primary_key=True, autoincrement=True, index=True) + path = Column(Text, unique=True, nullable=False, index=True) + filename = Column(Text, nullable=False) date_added = Column(DateTime, default=datetime.utcnow, nullable=False) - date_taken = Column(DateTime, nullable=True, index=True) - width = Column(Integer, nullable=True) - height = Column(Integer, nullable=True) - mime_type = Column(String(128), nullable=True) + date_taken = Column(Date, nullable=True, index=True) + processed = Column(Boolean, default=False, nullable=False, index=True) faces = relationship("Face", back_populates="photo", cascade="all, delete-orphan") photo_tags = relationship( - "PhotoTag", back_populates="photo", cascade="all, delete-orphan" + "PhotoTagLinkage", back_populates="photo", cascade="all, delete-orphan" + ) + + __table_args__ = ( + Index("idx_photos_processed", "processed"), + Index("idx_photos_date_taken", "date_taken"), + Index("idx_photos_date_added", "date_added"), ) class Person(Base): - """Person model.""" + """Person model - matches desktop schema exactly.""" __tablename__ = "people" - id = Column(Integer, primary_key=True, index=True) - display_name = Column(String(256), nullable=False, index=True) - given_name = Column(String(128), nullable=True) - family_name = Column(String(128), nullable=True) - notes = Column(Text, nullable=True) - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + id = Column(Integer, primary_key=True, autoincrement=True, index=True) + first_name = Column(Text, nullable=False) + last_name = Column(Text, nullable=False) + middle_name = Column(Text, nullable=True) + maiden_name = Column(Text, nullable=True) + date_of_birth = Column(Date, nullable=True) + created_date = Column(DateTime, default=datetime.utcnow, nullable=False) faces = relationship("Face", back_populates="person") - person_embeddings = relationship( - "PersonEmbedding", back_populates="person", cascade="all, delete-orphan" + person_encodings = relationship( + "PersonEncoding", back_populates="person", cascade="all, delete-orphan" + ) + + __table_args__ = ( + UniqueConstraint( + "first_name", "last_name", "middle_name", "maiden_name", "date_of_birth", + name="uq_people_names_dob" + ), ) class Face(Base): - """Face detection model.""" + """Face detection model - matches desktop schema exactly.""" __tablename__ = "faces" - id = Column(Integer, primary_key=True, index=True) + id = Column(Integer, primary_key=True, autoincrement=True, index=True) photo_id = Column(Integer, ForeignKey("photos.id"), nullable=False, index=True) person_id = Column(Integer, ForeignKey("people.id"), nullable=True, index=True) - bbox_x = Column(Integer, nullable=False) - bbox_y = Column(Integer, nullable=False) - bbox_w = Column(Integer, nullable=False) - bbox_h = Column(Integer, nullable=False) - embedding = Column(LargeBinary, nullable=False) - confidence = Column(Integer, nullable=True) - quality = Column(Integer, nullable=True, index=True) - model = Column(String(64), nullable=True) - detector = Column(String(64), nullable=True) - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + encoding = Column(LargeBinary, nullable=False) + location = Column(Text, nullable=False) + confidence = Column(Numeric, default=0.0, nullable=False) + quality_score = Column(Numeric, default=0.0, nullable=False, index=True) + is_primary_encoding = Column(Boolean, default=False, nullable=False) + detector_backend = Column(Text, default="retinaface", nullable=False) + model_name = Column(Text, default="ArcFace", nullable=False) + face_confidence = Column(Numeric, default=0.0, nullable=False) + exif_orientation = Column(Integer, nullable=True) photo = relationship("Photo", back_populates="faces") person = relationship("Person", back_populates="faces") - person_embeddings = relationship( - "PersonEmbedding", back_populates="face", cascade="all, delete-orphan" + person_encodings = relationship( + "PersonEncoding", back_populates="face", cascade="all, delete-orphan" ) - __table_args__ = (Index("idx_faces_quality", "quality"),) + __table_args__ = ( + Index("idx_faces_person_id", "person_id"), + Index("idx_faces_photo_id", "photo_id"), + Index("idx_faces_quality", "quality_score"), + ) -class PersonEmbedding(Base): - """Person embedding reference model.""" +class PersonEncoding(Base): + """Person encoding model - matches desktop schema exactly (was person_encodings).""" - __tablename__ = "person_embeddings" + __tablename__ = "person_encodings" - id = Column(Integer, primary_key=True, index=True) + id = Column(Integer, primary_key=True, autoincrement=True, index=True) person_id = Column(Integer, ForeignKey("people.id"), nullable=False, index=True) face_id = Column(Integer, ForeignKey("faces.id"), nullable=False, index=True) - embedding = Column(LargeBinary, nullable=False) - quality = Column(Integer, nullable=True, index=True) - model = Column(String(64), nullable=True) - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + encoding = Column(LargeBinary, nullable=False) + quality_score = Column(Numeric, default=0.0, nullable=False, index=True) + detector_backend = Column(Text, default="retinaface", nullable=False) + model_name = Column(Text, default="ArcFace", nullable=False) + created_date = Column(DateTime, default=datetime.utcnow, nullable=False) - person = relationship("Person", back_populates="person_embeddings") - face = relationship("Face", back_populates="person_embeddings") + person = relationship("Person", back_populates="person_encodings") + face = relationship("Face", back_populates="person_encodings") __table_args__ = ( - Index("idx_person_embeddings_quality", "quality"), - Index("idx_person_embeddings_person", "person_id"), + Index("idx_person_encodings_person_id", "person_id"), + Index("idx_person_encodings_quality", "quality_score"), ) class Tag(Base): - """Tag model.""" + """Tag model - matches desktop schema exactly.""" __tablename__ = "tags" - id = Column(Integer, primary_key=True, index=True) - tag = Column(String(128), unique=True, nullable=False, index=True) - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + id = Column(Integer, primary_key=True, autoincrement=True, index=True) + tag_name = Column(Text, unique=True, nullable=False, index=True) + created_date = Column(DateTime, default=datetime.utcnow, nullable=False) photo_tags = relationship( - "PhotoTag", back_populates="tag", cascade="all, delete-orphan" + "PhotoTagLinkage", back_populates="tag", cascade="all, delete-orphan" ) -class PhotoTag(Base): - """Photo-Tag linkage model.""" +class PhotoTagLinkage(Base): + """Photo-Tag linkage model - matches desktop schema exactly (was phototaglinkage).""" - __tablename__ = "photo_tags" + __tablename__ = "phototaglinkage" - photo_id = Column(Integer, ForeignKey("photos.id"), primary_key=True) - tag_id = Column(Integer, ForeignKey("tags.id"), primary_key=True) - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + linkage_id = Column(Integer, primary_key=True, autoincrement=True) + photo_id = Column(Integer, ForeignKey("photos.id"), nullable=False, index=True) + tag_id = Column(Integer, ForeignKey("tags.id"), nullable=False, index=True) + linkage_type = Column( + Integer, default=0, nullable=False, + server_default="0" + ) + created_date = Column(DateTime, default=datetime.utcnow, nullable=False) photo = relationship("Photo", back_populates="photo_tags") tag = relationship("Tag", back_populates="photo_tags") __table_args__ = ( UniqueConstraint("photo_id", "tag_id", name="uq_photo_tag"), + CheckConstraint("linkage_type IN (0, 1)", name="ck_linkage_type"), Index("idx_photo_tags_tag", "tag_id"), Index("idx_photo_tags_photo", "photo_id"), ) diff --git a/src/web/services/face_service.py b/src/web/services/face_service.py index 5fa6ae1..0bd5255 100644 --- a/src/web/services/face_service.py +++ b/src/web/services/face_service.py @@ -2,6 +2,7 @@ from __future__ import annotations +import json import os import tempfile import time @@ -10,6 +11,7 @@ from typing import Callable, Optional, Tuple import numpy as np from PIL import Image from sqlalchemy.orm import Session +from sqlalchemy import and_ try: from deepface import DeepFace @@ -28,6 +30,58 @@ from src.utils.exif_utils import EXIFOrientationHandler from src.web.db.models import Face, Photo +def _pre_warm_deepface( + detector_backend: str, + model_name: str, + update_progress: Optional[Callable[[int, int, str, int, int], None]] = None, +) -> None: + """Pre-warm DeepFace models by making a small initialization call. + + This triggers model loading/downloading before processing actual photos, + so the delay happens during initialization progress, not during first photo. + + Args: + detector_backend: DeepFace detector backend + model_name: DeepFace model name + update_progress: Optional progress callback + """ + if not DEEPFACE_AVAILABLE: + return + + if update_progress: + update_progress(0, 0, "Loading DeepFace models (this may take a moment)...", 0, 0) + + print(f"[DeepFace] Pre-warming models: detector={detector_backend}, model={model_name}") + start_time = time.time() + + try: + # Create a minimal test image to trigger model loading + # This forces DeepFace to download/load models without processing a real photo + # Using a small grayscale image (100x100) is sufficient to trigger initialization + dummy_img = Image.new('RGB', (100, 100), color='black') + dummy_array = np.array(dummy_img) + + # This will trigger model loading but won't find any faces (which is fine) + # We use enforce_detection=False to avoid errors when no faces are found + DeepFace.represent( + img_path=dummy_array, + model_name=model_name, + detector_backend=detector_backend, + enforce_detection=False, # Don't fail if no faces + align=DEEPFACE_ALIGN_FACES, + ) + + elapsed = time.time() - start_time + print(f"[DeepFace] Models loaded in {elapsed:.2f}s") + if update_progress: + update_progress(0, 0, f"DeepFace models ready ({elapsed:.1f}s)", 0, 0) + except Exception as e: + # If pre-warming fails, models will just load on first real photo + elapsed = time.time() - start_time + print(f"[DeepFace] Pre-warming completed (with warnings) in {elapsed:.2f}s: {e}") + # Don't raise - let it load on first photo instead + + def calculate_face_quality_score( image_np: np.ndarray, face_location: dict, @@ -36,7 +90,8 @@ def calculate_face_quality_score( ) -> int: """Calculate face quality score (0-100). - Simplified quality calculation based on face size and position. + This matches the desktop version logic exactly from src/core/face_processing.py _calculate_face_quality_score() + Returns 0-100 (will be converted to 0.0-1.0 for database storage). Args: image_np: Image as numpy array @@ -47,36 +102,82 @@ def calculate_face_quality_score( Returns: Quality score from 0-100 """ - x = face_location.get('x', 0) - y = face_location.get('y', 0) - w = face_location.get('w', 0) - h = face_location.get('h', 0) - - if w == 0 or h == 0: - return 0 - - # Face size as percentage of image - face_area = w * h - image_area = image_width * image_height - size_ratio = face_area / image_area if image_area > 0 else 0 - - # Position score (center is better) - center_x = image_width / 2 - center_y = image_height / 2 - face_center_x = x + w / 2 - face_center_y = y + h / 2 - - distance_from_center = np.sqrt( - (face_center_x - center_x) ** 2 + (face_center_y - center_y) ** 2 - ) - max_distance = np.sqrt(center_x ** 2 + center_y ** 2) - position_score = 1.0 - (distance_from_center / max_distance) if max_distance > 0 else 0.5 - - # Combine size and position (size weighted 70%, position 30%) - quality = (size_ratio * 70) + (position_score * 30) - - # Clamp to 0-100 - return int(np.clip(quality * 100, 0, 100)) + try: + # DeepFace format: {x, y, w, h} + x = face_location.get('x', 0) + y = face_location.get('y', 0) + w = face_location.get('w', 0) + h = face_location.get('h', 0) + + face_height = h + face_width = w + left = x + right = x + w + top = y + bottom = y + h + + # Basic size check - faces too small get lower scores + min_face_size = 50 + size_score = min(1.0, (face_height * face_width) / (min_face_size * min_face_size)) + + # Extract face region + face_region = image_np[top:bottom, left:right] + if face_region.size == 0: + return 0 + + # Convert to grayscale for analysis + if len(face_region.shape) == 3: + gray_face = np.mean(face_region, axis=2) + else: + gray_face = face_region + + # Calculate sharpness (Laplacian variance) + # Match desktop version exactly (including the bug for consistency) + # Desktop calculates var of kernel array itself, not the convolved result + laplacian_var = np.var(np.array([[0, -1, 0], [-1, 4, -1], [0, -1, 0]]).astype(np.float32)) + if laplacian_var > 0: + sharpness = np.var(np.array([[0, -1, 0], [-1, 4, -1], [0, -1, 0]]).astype(np.float32)) + else: + sharpness = 0.0 + sharpness_score = min(1.0, sharpness / 1000.0) # Normalize sharpness + + # Calculate brightness and contrast + mean_brightness = np.mean(gray_face) + brightness_score = 1.0 - abs(mean_brightness - 128) / 128.0 # Prefer middle brightness + + contrast = np.std(gray_face) + contrast_score = min(1.0, contrast / 64.0) # Prefer good contrast + + # Calculate aspect ratio (faces should be roughly square) + aspect_ratio = face_width / face_height if face_height > 0 else 1.0 + aspect_score = 1.0 - abs(aspect_ratio - 1.0) # Prefer square faces + + # Calculate position in image (centered faces are better) + img_height, img_width = image_np.shape[:2] + center_x = (left + right) / 2 + center_y = (top + bottom) / 2 + position_x_score = 1.0 - abs(center_x - img_width / 2) / (img_width / 2) + position_y_score = 1.0 - abs(center_y - img_height / 2) / (img_height / 2) + position_score = (position_x_score + position_y_score) / 2.0 + + # Weighted combination of all factors (matches desktop exactly) + quality_score = ( + size_score * 0.25 + + sharpness_score * 0.25 + + brightness_score * 0.15 + + contrast_score * 0.15 + + aspect_score * 0.10 + + position_score * 0.10 + ) + + # Desktop returns 0.0-1.0, we need 0-100 for database + quality_score = max(0.0, min(1.0, quality_score)) + return int(quality_score * 100) + + except Exception as e: + print(f"[FaceService] ⚠️ Error calculating face quality score: {e}") + # Return a default quality score on error + return 50 def is_valid_face_detection( @@ -88,7 +189,7 @@ def is_valid_face_detection( """Check if face detection meets minimum criteria. Args: - confidence: Face detection confidence score + confidence: Face detection confidence score (0-1 range) face_location: Face location dict with x, y, w, h image_width: Image width image_height: Image height @@ -96,28 +197,79 @@ def is_valid_face_detection( Returns: True if face is valid, False otherwise """ - x = face_location.get('x', 0) - y = face_location.get('y', 0) - w = face_location.get('w', 0) - h = face_location.get('h', 0) + is_valid, _ = is_valid_face_detection_with_reason( + confidence, face_location, image_width, image_height + ) + return is_valid + + +def is_valid_face_detection_with_reason( + confidence: float, + face_location: dict, + image_width: int, + image_height: int, +) -> Tuple[bool, str]: + """Check if face detection meets minimum criteria and return reason if invalid. - # Check minimum confidence - if confidence < MIN_FACE_CONFIDENCE: - return False + This matches the desktop version logic EXACTLY from src/core/face_processing.py _is_valid_face_detection() - # Check minimum size - if w < MIN_FACE_SIZE or h < MIN_FACE_SIZE: - return False - - # Check maximum size (to avoid false positives that span entire image) - if w > MAX_FACE_SIZE or h > MAX_FACE_SIZE: - return False - - # Check bounds - if x < 0 or y < 0 or (x + w) > image_width or (y + h) > image_height: - return False - - return True + Args: + confidence: Face detection confidence score (0-1 range from DeepFace) + face_location: Face location dict with x, y, w, h + image_width: Image width (unused but kept for compatibility) + image_height: Image height (unused but kept for compatibility) + + Returns: + Tuple of (is_valid: bool, reason: str) + reason is empty string if valid, otherwise describes why it failed + """ + try: + # Desktop version uses face_confidence directly (0-1 range from DeepFace) + # No normalization needed if DeepFace returns 0-1 range + face_confidence = confidence + + # Check confidence threshold - be more strict + if face_confidence < MIN_FACE_CONFIDENCE: + return False, f"confidence too low (got {face_confidence:.3f}, need >= {MIN_FACE_CONFIDENCE})" + + # Check face size + width = face_location.get('w', 0) + height = face_location.get('h', 0) + + # Too small faces are likely false positives (balloons, decorations, etc.) + if width < MIN_FACE_SIZE or height < MIN_FACE_SIZE: + return False, f"size too small (got {width}x{height}, need >= {MIN_FACE_SIZE}x{MIN_FACE_SIZE})" + + # Too large faces might be full-image false positives + if width > MAX_FACE_SIZE or height > MAX_FACE_SIZE: + return False, f"size too large (got {width}x{height}, need <= {MAX_FACE_SIZE}x{MAX_FACE_SIZE})" + + # Check aspect ratio - faces should be roughly square (not too wide/tall) + aspect_ratio = width / height if height > 0 else 1.0 + if aspect_ratio < 0.4 or aspect_ratio > 2.5: # More strict aspect ratio (was 0.3-3.0) + return False, f"aspect ratio out of range (got {aspect_ratio:.2f}, need 0.4-2.5)" + + # Additional filtering for very small faces with low confidence + # Small faces need higher confidence to be accepted + face_area = width * height + if face_area < 6400: # Less than 80x80 pixels (lowered from 100x100) + if face_confidence < 0.6: # Require 60% confidence for small faces (lowered from 80%) + return False, f"small face needs higher confidence (area={face_area}, confidence={face_confidence:.3f}, need >= 0.6)" + + # Filter out faces that are too close to image edges (often false positives) + x = face_location.get('x', 0) + y = face_location.get('y', 0) + # If face is very close to edges, require higher confidence + if x < 10 or y < 10: # Within 10 pixels of top/left edge + if face_confidence < 0.65: # Require 65% confidence for edge faces (lowered from 85%) + return False, f"edge face needs higher confidence (x={x}, y={y}, confidence={face_confidence:.3f}, need >= 0.65)" + + return True, "" + + except Exception as e: + # Desktop version defaults to accepting on error + print(f"[FaceService] ⚠️ Error validating face detection: {e}") + return True, "" def process_photo_faces( @@ -146,16 +298,12 @@ def process_photo_faces( if not os.path.exists(photo_path): return 0, 0 - # Check if photo already has faces processed with same detector/model - existing_faces = db.query(Face).filter( - Face.photo_id == photo.id, - Face.detector == detector_backend, - Face.model == model_name, - ).count() - - if existing_faces > 0: - # Already processed with this configuration - return existing_faces, existing_faces + # Skip if already processed (desktop parity) + try: + if getattr(photo, 'processed', False): + return 0, 0 + except Exception: + pass try: # Get EXIF orientation @@ -201,6 +349,13 @@ def process_photo_faces( pass if not results: + # Mark photo as processed even if no faces found (desktop parity) + try: + photo.processed = True + db.add(photo) + db.commit() + except Exception: + db.rollback() return 0, 0 # Load image for quality calculation @@ -208,50 +363,121 @@ def process_photo_faces( image_np = np.array(image) image_width, image_height = image.size + # Count total faces from DeepFace faces_detected = len(results) faces_stored = 0 + validation_failures = {} # Track failures by reason type - for result in results: - facial_area = result.get('facial_area', {}) - face_confidence = result.get('face_confidence', 0.0) - embedding = np.array(result['embedding']) + print(f"[FaceService] Processing {faces_detected} faces from DeepFace for {photo.filename} " + f"(image size: {image_width}x{image_height})") + + for idx, result in enumerate(results): + # Debug: Print full result to see what DeepFace returns + if idx == 0: + print(f"[FaceService] Debug - DeepFace result keys: {result.keys()}") + print(f"[FaceService] Debug - Sample result structure: {list(result.keys())}") - # Convert to location format + facial_area = result.get('facial_area', {}) + # Try multiple possible confidence fields (matching desktop version) + # Desktop uses: result.get('face_confidence', 0.0) + face_confidence = result.get('face_confidence', 0.0) + + # If confidence is 0.0, DeepFace might not provide it for this detector + # Some detectors don't return confidence - in that case, use a default + # Default to 0.5 (medium confidence) if missing, so faces aren't automatically rejected + if face_confidence == 0.0: + # Try alternative fields + face_confidence = result.get('confidence', + result.get('detection_confidence', + facial_area.get('confidence', 0.5))) # Default to 0.5 if completely missing + + # Debug first face to see what DeepFace returns + if idx == 0: + print(f"[FaceService] Debug - DeepFace result keys: {list(result.keys())}") + if facial_area: + print(f"[FaceService] Debug - facial_area keys: {list(facial_area.keys())}") + print(f"[FaceService] Debug - facial_area content: {facial_area}") + print(f"[FaceService] Debug - face_confidence value: {face_confidence}") + print(f"[FaceService] Debug - result['face_confidence'] exists: {'face_confidence' in result}") + + encoding = np.array(result['embedding']) + + # Convert to location format (JSON string like desktop version) location = { 'x': facial_area.get('x', 0), 'y': facial_area.get('y', 0), 'w': facial_area.get('w', 0), 'h': facial_area.get('h', 0), } + location_str = json.dumps(location) - # Validate face detection - if not is_valid_face_detection(face_confidence, location, image_width, image_height): - continue - - # Calculate quality score - quality_score = calculate_face_quality_score( - image_np, location, image_width, image_height + # Validate face detection with detailed error reporting + # Match desktop version: pass confidence as-is, validation function handles normalization + is_valid, reason = is_valid_face_detection_with_reason( + face_confidence, location, image_width, image_height ) - # Store face in database + if not is_valid: + # Extract failure type from reason (first word before colon) + failure_type = reason.split(':')[0].strip() if ':' in reason else reason + validation_failures[failure_type] = validation_failures.get(failure_type, 0) + 1 + print(f"[FaceService] Face {idx+1}/{faces_detected} in {photo.filename} failed validation: " + f"{reason} - confidence={face_confidence:.3f}, " + f"location={location}, size={location['w']}x{location['h']}") + continue + + print(f"[FaceService] Face {idx+1}/{faces_detected} in {photo.filename} passed validation: " + f"confidence={face_confidence:.3f}, size={location['w']}x{location['h']}") + + # Calculate quality score (0.0-1.0 range for desktop compatibility) + quality_score_int = calculate_face_quality_score( + image_np, location, image_width, image_height + ) + # Convert from 0-100 to 0.0-1.0 for database (desktop stores REAL) + quality_score = quality_score_int / 100.0 + + # Store face in database - match desktop schema exactly + # Desktop: confidence REAL DEFAULT 0.0 (legacy), face_confidence REAL (actual) + # Desktop: quality_score REAL DEFAULT 0.0 (0.0-1.0 range) face = Face( photo_id=photo.id, person_id=None, - bbox_x=location['x'], - bbox_y=location['y'], - bbox_w=location['w'], - bbox_h=location['h'], - embedding=embedding.tobytes(), - confidence=int(face_confidence * 100) if face_confidence <= 1.0 else int(face_confidence), - quality=quality_score, - model=model_name, - detector=detector_backend, + encoding=encoding.tobytes(), + location=location_str, + confidence=0.0, # Legacy field (desktop keeps at 0.0) + quality_score=quality_score, # REAL in 0.0-1.0 range + is_primary_encoding=False, + detector_backend=detector_backend, + model_name=model_name, + face_confidence=face_confidence, # REAL in 0.0-1.0 range + exif_orientation=exif_orientation, ) db.add(face) faces_stored += 1 db.commit() + + # Mark photo as processed after handling faces (desktop parity) + try: + photo.processed = True + db.add(photo) + db.commit() + except Exception: + db.rollback() + + # Log summary + if faces_stored < faces_detected: + print(f"[FaceService] Summary for {photo.filename}: " + f"{faces_detected} faces detected, {faces_stored} faces stored, " + f"{faces_detected - faces_stored} failed validation") + for reason, count in validation_failures.items(): + if count > 0: + print(f"[FaceService] - {reason}: {count}") + else: + print(f"[FaceService] Summary for {photo.filename}: " + f"{faces_detected} faces detected, {faces_stored} faces stored") + return faces_detected, faces_stored except Exception as e: @@ -279,38 +505,26 @@ def process_unprocessed_photos( Tuple of (photos_processed, total_faces_detected, total_faces_stored) """ print(f"[FaceService] Starting face processing: detector={detector_backend}, model={model_name}, batch_size={batch_size}") + overall_start = time.time() - # Update progress - querying photos + # Update progress - querying unprocessed photos if update_progress: - update_progress(0, 0, "Querying photos from database...", 0, 0) + batch_msg = f"Finding up to {batch_size} photos" if batch_size else "Finding photos" + update_progress(0, 0, f"{batch_msg} that need processing...", 0, 0) - # Get all photos - all_photos = db.query(Photo).all() - print(f"[FaceService] Found {len(all_photos)} total photos in database") - - # Update progress - filtering photos - if update_progress: - update_progress(0, len(all_photos), "Checking which photos need processing...", 0, 0) - - # Filter for photos that need processing (no faces with current detector/model) - unprocessed_photos = [] - for idx, photo in enumerate(all_photos, 1): - # Check if photo has faces with current detector/model - existing_face = db.query(Face).filter( - Face.photo_id == photo.id, - Face.detector == detector_backend, - Face.model == model_name, - ).first() - - if existing_face is None: - unprocessed_photos.append(photo) - - # Update progress every 10 photos while filtering - if update_progress and idx % 10 == 0: - update_progress(0, len(all_photos), f"Checking photos... ({idx}/{len(all_photos)})", 0, 0) + # Desktop parity: find photos that are not yet processed + query_start = time.time() + unprocessed_query = db.query(Photo).filter(getattr(Photo, 'processed') == False) # noqa: E712 + # Apply batch size limit BEFORE executing query to avoid loading unnecessary photos + # When batch_size is set, only that many photos are fetched from the database if batch_size: - unprocessed_photos = unprocessed_photos[:batch_size] + unprocessed_query = unprocessed_query.limit(batch_size) + + # Execute query - only loads batch_size photos if limit was set + unprocessed_photos = unprocessed_query.all() + query_time = time.time() - query_start + print(f"[FaceService] Query completed in {query_time:.2f}s") total = len(unprocessed_photos) print(f"[FaceService] Found {total} unprocessed photos") @@ -344,17 +558,19 @@ def process_unprocessed_photos( pass return False - # Update progress - initializing DeepFace (this may take time on first run) - if update_progress: - update_progress(0, total, "Initializing DeepFace models (this may take a moment on first run)...", 0, 0) + # Pre-warm DeepFace models BEFORE processing photos + # This moves the model loading delay to initialization phase (with progress updates) + # instead of causing delay during first photo processing + if total > 0: + print(f"[FaceService] Pre-warming DeepFace models...") + _pre_warm_deepface(detector_backend, model_name, update_progress) - # Check cancellation before starting + # Check cancellation after pre-warming if check_cancelled(): print("[FaceService] Job cancelled before processing started") return photos_processed, total_faces_detected, total_faces_stored - # Process first photo - this will trigger DeepFace initialization - # Update progress before starting actual processing + # Update progress - models are ready, starting photo processing if update_progress and total > 0: update_progress(0, total, f"Starting face detection on {total} photos...", 0, 0) @@ -383,6 +599,11 @@ def process_unprocessed_photos( total_faces_stored, ) + # Time the first photo to see if there's still delay after pre-warming + if idx == 1: + first_photo_start = time.time() + print(f"[FaceService] Starting first photo processing...") + faces_detected, faces_stored = process_photo_faces( db, photo, @@ -394,6 +615,11 @@ def process_unprocessed_photos( total_faces_stored += faces_stored photos_processed += 1 + # Log timing for first photo + if idx == 1: + first_photo_time = time.time() - first_photo_start + print(f"[FaceService] First photo completed in {first_photo_time:.2f}s") + if update_progress: update_progress( idx, diff --git a/src/web/services/photo_service.py b/src/web/services/photo_service.py index 178c47b..c737068 100644 --- a/src/web/services/photo_service.py +++ b/src/web/services/photo_service.py @@ -2,11 +2,9 @@ from __future__ import annotations -import hashlib -import mimetypes import os from pathlib import Path -from datetime import datetime +from datetime import datetime, date from typing import Callable, Optional, Tuple from PIL import Image @@ -16,17 +14,8 @@ from src.core.config import SUPPORTED_IMAGE_FORMATS from src.web.db.models import Photo -def compute_checksum(file_path: str) -> str: - """Compute SHA256 checksum of a file.""" - sha256_hash = hashlib.sha256() - with open(file_path, "rb") as f: - for byte_block in iter(lambda: f.read(4096), b""): - sha256_hash.update(byte_block) - return sha256_hash.hexdigest() - - -def extract_exif_date(image_path: str) -> Optional[datetime]: - """Extract date taken from photo EXIF data.""" +def extract_exif_date(image_path: str) -> Optional[date]: + """Extract date taken from photo EXIF data - returns Date (not DateTime) to match desktop schema.""" try: with Image.open(image_path) as image: exifdata = image.getexif() @@ -44,13 +33,13 @@ def extract_exif_date(image_path: str) -> Optional[datetime]: if date_str: # Parse EXIF date format (YYYY:MM:DD HH:MM:SS) try: - return datetime.strptime(date_str, "%Y:%m:%d %H:%M:%S") + dt = datetime.strptime(date_str, "%Y:%m:%d %H:%M:%S") + return dt.date() except ValueError: # Try alternative format try: - return datetime.strptime( - date_str, "%Y-%m-%d %H:%M:%S" - ) + dt = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S") + return dt.date() except ValueError: continue except Exception: @@ -59,17 +48,6 @@ def extract_exif_date(image_path: str) -> Optional[datetime]: return None -def get_image_metadata(image_path: str) -> Tuple[Optional[int], Optional[int], Optional[str]]: - """Get image dimensions and MIME type.""" - try: - with Image.open(image_path) as image: - width, height = image.size - mime_type = mimetypes.guess_type(image_path)[0] or f"image/{image.format.lower() if image.format else 'unknown'}" - return width, height, mime_type - except Exception: - return None, None, None - - def find_photos_in_folder(folder_path: str, recursive: bool = True) -> list[str]: """Find all photo files in a folder.""" folder_path = os.path.abspath(folder_path) @@ -112,33 +90,16 @@ def import_photo_from_path( if existing: return existing, False - # Compute checksum - try: - checksum = compute_checksum(photo_path) - # Check if photo with same checksum exists - existing_by_checksum = ( - db.query(Photo).filter(Photo.checksum == checksum).first() - if checksum - else None - ) - if existing_by_checksum: - return existing_by_checksum, False - except Exception: - checksum = None - - # Extract metadata + # Extract date taken (returns Date to match desktop schema) date_taken = extract_exif_date(photo_path) - width, height, mime_type = get_image_metadata(photo_path) - # Create new photo record + # Create new photo record - match desktop schema exactly + # Desktop schema: id, path, filename, date_added, date_taken (DATE), processed photo = Photo( path=photo_path, filename=filename, - checksum=checksum, date_taken=date_taken, - width=width, - height=height, - mime_type=mime_type, + processed=False, ) db.add(photo)