feat: Add database migration for processed column in photos and new utility scripts

This commit introduces a new Alembic migration to add a 'processed' column to the 'photos' table, enhancing the database schema to track photo processing status. Additionally, it includes new utility scripts for dropping and recreating all tables in the web database, as well as a script to display all tables and their structures. These changes improve database management and facilitate a fresh start for the web application, ensuring alignment with the updated schema.
This commit is contained in:
tanyar09 2025-11-03 11:46:48 -05:00
parent dd92d1ec14
commit 5174fe0d54
8 changed files with 697 additions and 228 deletions

View File

@ -0,0 +1,30 @@
"""add processed column to photos
Revision ID: add_processed_to_photos_20251103
Revises: 4d53a59b0e41
Create Date: 2025-11-03
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = 'add_processed_to_photos_20251103'
down_revision = '4d53a59b0e41'
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column('photos', sa.Column('processed', sa.Boolean(), nullable=False, server_default=sa.false()))
# Drop server default after backfilling default
op.alter_column('photos', 'processed', server_default=None)
op.create_index('ix_photos_processed', 'photos', ['processed'], unique=False)
def downgrade() -> None:
op.drop_index('ix_photos_processed', table_name='photos')
op.drop_column('photos', 'processed')

View File

@ -0,0 +1,37 @@
#!/usr/bin/env python3
"""Drop all tables from the web database to start fresh."""
import sys
import os
# Add project root to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from sqlalchemy import inspect
from src.web.db.session import engine, get_database_url
from src.web.db.models import Base
def drop_all_tables():
"""Drop all tables from the database."""
db_url = get_database_url()
print(f"Connecting to database: {db_url}")
# Drop all tables
print("\nDropping all tables...")
Base.metadata.drop_all(bind=engine)
print("✅ All tables dropped successfully!")
print("\nYou can now run migrations to recreate tables:")
print(" alembic upgrade head")
if __name__ == "__main__":
try:
drop_all_tables()
except Exception as e:
print(f"❌ Error dropping tables: {e}")
import traceback
traceback.print_exc()
sys.exit(1)

View File

@ -0,0 +1,50 @@
#!/usr/bin/env python3
"""Recreate all tables from models (fresh start)."""
import sys
import os
# Add project root to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from src.web.db.models import Base
from src.web.db.session import engine, get_database_url
def recreate_tables():
"""Recreate all tables from models."""
db_url = get_database_url()
print(f"Connecting to database: {db_url}")
# Create all tables from models
print("\nCreating all tables from models...")
Base.metadata.create_all(bind=engine)
print("✅ All tables created successfully!")
# Stamp Alembic to latest migration
print("\nMarking database as up-to-date with migrations...")
from alembic.config import Config
from alembic import command
from alembic.script import ScriptDirectory
alembic_cfg = Config("alembic.ini")
script = ScriptDirectory.from_config(alembic_cfg)
# Get the latest revision
head = script.get_current_head()
print(f"Stamping database to revision: {head}")
command.stamp(alembic_cfg, head)
print("✅ Database is now fresh and ready to use!")
if __name__ == "__main__":
try:
recreate_tables()
except Exception as e:
print(f"❌ Error recreating tables: {e}")
import traceback
traceback.print_exc()
sys.exit(1)

129
scripts/show_db_tables.py Normal file
View File

@ -0,0 +1,129 @@
#!/usr/bin/env python3
"""Show all tables and their structures in the database."""
import sys
import os
# Add project root to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from sqlalchemy import inspect, text
from src.web.db.session import engine, get_database_url
from src.web.db.models import Base
def show_table_structure(table_name: str, inspector):
"""Show the structure of a table."""
print(f"\n{'='*80}")
print(f"Table: {table_name}")
print(f"{'='*80}")
# Get columns
columns = inspector.get_columns(table_name)
print("\nColumns:")
print(f"{'Name':<30} {'Type':<25} {'Nullable':<10} {'Primary Key':<12} {'Default'}")
print("-" * 100)
for col in columns:
col_type = str(col['type'])
nullable = "Yes" if col['nullable'] else "No"
primary_key = "Yes" if col.get('primary_key', False) else "No"
default = str(col.get('default', ''))[:30] if col.get('default') else ''
print(f"{col['name']:<30} {col_type:<25} {nullable:<10} {primary_key:<12} {default}")
# Get indexes
indexes = inspector.get_indexes(table_name)
if indexes:
print("\nIndexes:")
for idx in indexes:
unique = "UNIQUE" if idx.get('unique', False) else ""
columns_str = ", ".join(idx['column_names'])
print(f" {idx['name']}: {columns_str} {unique}")
# Get foreign keys
foreign_keys = inspector.get_foreign_keys(table_name)
if foreign_keys:
print("\nForeign Keys:")
for fk in foreign_keys:
constrained_cols = ", ".join(fk['constrained_columns'])
referred_table = fk['referred_table']
referred_cols = ", ".join(fk['referred_columns'])
print(f" {constrained_cols} -> {referred_table}({referred_cols})")
def show_all_tables():
"""Show all tables and their structures."""
db_url = get_database_url()
print(f"Database: {db_url}")
print(f"\n{'='*80}")
# Create inspector
inspector = inspect(engine)
# Get all table names
table_names = inspector.get_table_names()
if not table_names:
print("No tables found in database.")
print("\nTables should be created on web app startup.")
print("\nHere are the table structures from models:")
# Show from models instead
from src.web.db.models import Photo, Person, Face, PersonEmbedding, Tag, PhotoTag
models = [
("photos", Photo),
("people", Person),
("faces", Face),
("person_embeddings", PersonEmbedding),
("tags", Tag),
("photo_tags", PhotoTag),
]
for table_name, model in models:
print(f"\n{'='*80}")
print(f"Table: {table_name}")
print(f"{'='*80}")
print("\nColumns:")
for col in model.__table__.columns:
nullable = "Yes" if col.nullable else "No"
primary_key = "Yes" if col.primary_key else "No"
default = str(col.default) if col.default else ''
print(f" {col.name:<30} {col.type!s:<25} Nullable: {nullable:<10} PK: {primary_key:<12} Default: {default}")
# Show indexes
indexes = model.__table__.indexes
if indexes:
print("\nIndexes:")
for idx in indexes:
unique = "UNIQUE" if idx.unique else ""
cols = ", ".join([c.name for c in idx.columns])
print(f" {idx.name}: {cols} {unique}")
# Show foreign keys
fks = [fk for fk in model.__table__.foreign_keys]
if fks:
print("\nForeign Keys:")
for fk in fks:
print(f" {fk.parent.name} -> {fk.column.table.name}({fk.column.name})")
return
print(f"\nFound {len(table_names)} table(s):")
for table_name in sorted(table_names):
print(f" - {table_name}")
# Show structure for each table
for table_name in sorted(table_names):
show_table_structure(table_name, inspector)
if __name__ == "__main__":
try:
show_all_tables()
except Exception as e:
print(f"❌ Error showing tables: {e}")
import traceback
traceback.print_exc()
sys.exit(1)

View File

@ -19,6 +19,8 @@ from src.web.api.photos import router as photos_router
from src.web.api.tags import router as tags_router
from src.web.api.version import router as version_router
from src.web.settings import APP_TITLE, APP_VERSION
from src.web.db.base import Base, engine
from src.web.db.session import database_url
# Global worker process (will be set in lifespan)
_worker_process: subprocess.Popen | None = None
@ -86,6 +88,17 @@ def stop_worker() -> None:
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Lifespan context manager for startup and shutdown events."""
# Ensure database exists and tables are created on first run
try:
if database_url.startswith("sqlite"):
db_path = database_url.replace("sqlite:///", "")
db_file = Path(db_path)
db_file.parent.mkdir(parents=True, exist_ok=True)
Base.metadata.create_all(bind=engine)
print("✅ Database initialized")
except Exception as exc:
print(f"❌ Database initialization failed: {exc}")
raise
# Startup
start_worker()
yield

View File

@ -1,147 +1,170 @@
"""SQLAlchemy models for PunimTag Web."""
"""SQLAlchemy models for PunimTag Web - matching desktop schema exactly."""
from __future__ import annotations
from datetime import datetime
from datetime import datetime, date
from typing import TYPE_CHECKING
from sqlalchemy import (
Boolean,
Column,
Date,
DateTime,
ForeignKey,
Index,
Integer,
LargeBinary,
String,
Numeric,
Text,
UniqueConstraint,
CheckConstraint,
)
from sqlalchemy.orm import declarative_base, relationship
if TYPE_CHECKING:
from datetime import date
pass
Base = declarative_base()
class Photo(Base):
"""Photo model."""
"""Photo model - matches desktop schema exactly."""
__tablename__ = "photos"
id = Column(Integer, primary_key=True, index=True)
path = Column(String(2048), unique=True, nullable=False, index=True)
filename = Column(String(512), nullable=False)
checksum = Column(String(64), unique=True, nullable=True, index=True)
id = Column(Integer, primary_key=True, autoincrement=True, index=True)
path = Column(Text, unique=True, nullable=False, index=True)
filename = Column(Text, nullable=False)
date_added = Column(DateTime, default=datetime.utcnow, nullable=False)
date_taken = Column(DateTime, nullable=True, index=True)
width = Column(Integer, nullable=True)
height = Column(Integer, nullable=True)
mime_type = Column(String(128), nullable=True)
date_taken = Column(Date, nullable=True, index=True)
processed = Column(Boolean, default=False, nullable=False, index=True)
faces = relationship("Face", back_populates="photo", cascade="all, delete-orphan")
photo_tags = relationship(
"PhotoTag", back_populates="photo", cascade="all, delete-orphan"
"PhotoTagLinkage", back_populates="photo", cascade="all, delete-orphan"
)
__table_args__ = (
Index("idx_photos_processed", "processed"),
Index("idx_photos_date_taken", "date_taken"),
Index("idx_photos_date_added", "date_added"),
)
class Person(Base):
"""Person model."""
"""Person model - matches desktop schema exactly."""
__tablename__ = "people"
id = Column(Integer, primary_key=True, index=True)
display_name = Column(String(256), nullable=False, index=True)
given_name = Column(String(128), nullable=True)
family_name = Column(String(128), nullable=True)
notes = Column(Text, nullable=True)
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
id = Column(Integer, primary_key=True, autoincrement=True, index=True)
first_name = Column(Text, nullable=False)
last_name = Column(Text, nullable=False)
middle_name = Column(Text, nullable=True)
maiden_name = Column(Text, nullable=True)
date_of_birth = Column(Date, nullable=True)
created_date = Column(DateTime, default=datetime.utcnow, nullable=False)
faces = relationship("Face", back_populates="person")
person_embeddings = relationship(
"PersonEmbedding", back_populates="person", cascade="all, delete-orphan"
person_encodings = relationship(
"PersonEncoding", back_populates="person", cascade="all, delete-orphan"
)
__table_args__ = (
UniqueConstraint(
"first_name", "last_name", "middle_name", "maiden_name", "date_of_birth",
name="uq_people_names_dob"
),
)
class Face(Base):
"""Face detection model."""
"""Face detection model - matches desktop schema exactly."""
__tablename__ = "faces"
id = Column(Integer, primary_key=True, index=True)
id = Column(Integer, primary_key=True, autoincrement=True, index=True)
photo_id = Column(Integer, ForeignKey("photos.id"), nullable=False, index=True)
person_id = Column(Integer, ForeignKey("people.id"), nullable=True, index=True)
bbox_x = Column(Integer, nullable=False)
bbox_y = Column(Integer, nullable=False)
bbox_w = Column(Integer, nullable=False)
bbox_h = Column(Integer, nullable=False)
embedding = Column(LargeBinary, nullable=False)
confidence = Column(Integer, nullable=True)
quality = Column(Integer, nullable=True, index=True)
model = Column(String(64), nullable=True)
detector = Column(String(64), nullable=True)
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
encoding = Column(LargeBinary, nullable=False)
location = Column(Text, nullable=False)
confidence = Column(Numeric, default=0.0, nullable=False)
quality_score = Column(Numeric, default=0.0, nullable=False, index=True)
is_primary_encoding = Column(Boolean, default=False, nullable=False)
detector_backend = Column(Text, default="retinaface", nullable=False)
model_name = Column(Text, default="ArcFace", nullable=False)
face_confidence = Column(Numeric, default=0.0, nullable=False)
exif_orientation = Column(Integer, nullable=True)
photo = relationship("Photo", back_populates="faces")
person = relationship("Person", back_populates="faces")
person_embeddings = relationship(
"PersonEmbedding", back_populates="face", cascade="all, delete-orphan"
person_encodings = relationship(
"PersonEncoding", back_populates="face", cascade="all, delete-orphan"
)
__table_args__ = (Index("idx_faces_quality", "quality"),)
__table_args__ = (
Index("idx_faces_person_id", "person_id"),
Index("idx_faces_photo_id", "photo_id"),
Index("idx_faces_quality", "quality_score"),
)
class PersonEmbedding(Base):
"""Person embedding reference model."""
class PersonEncoding(Base):
"""Person encoding model - matches desktop schema exactly (was person_encodings)."""
__tablename__ = "person_embeddings"
__tablename__ = "person_encodings"
id = Column(Integer, primary_key=True, index=True)
id = Column(Integer, primary_key=True, autoincrement=True, index=True)
person_id = Column(Integer, ForeignKey("people.id"), nullable=False, index=True)
face_id = Column(Integer, ForeignKey("faces.id"), nullable=False, index=True)
embedding = Column(LargeBinary, nullable=False)
quality = Column(Integer, nullable=True, index=True)
model = Column(String(64), nullable=True)
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
encoding = Column(LargeBinary, nullable=False)
quality_score = Column(Numeric, default=0.0, nullable=False, index=True)
detector_backend = Column(Text, default="retinaface", nullable=False)
model_name = Column(Text, default="ArcFace", nullable=False)
created_date = Column(DateTime, default=datetime.utcnow, nullable=False)
person = relationship("Person", back_populates="person_embeddings")
face = relationship("Face", back_populates="person_embeddings")
person = relationship("Person", back_populates="person_encodings")
face = relationship("Face", back_populates="person_encodings")
__table_args__ = (
Index("idx_person_embeddings_quality", "quality"),
Index("idx_person_embeddings_person", "person_id"),
Index("idx_person_encodings_person_id", "person_id"),
Index("idx_person_encodings_quality", "quality_score"),
)
class Tag(Base):
"""Tag model."""
"""Tag model - matches desktop schema exactly."""
__tablename__ = "tags"
id = Column(Integer, primary_key=True, index=True)
tag = Column(String(128), unique=True, nullable=False, index=True)
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
id = Column(Integer, primary_key=True, autoincrement=True, index=True)
tag_name = Column(Text, unique=True, nullable=False, index=True)
created_date = Column(DateTime, default=datetime.utcnow, nullable=False)
photo_tags = relationship(
"PhotoTag", back_populates="tag", cascade="all, delete-orphan"
"PhotoTagLinkage", back_populates="tag", cascade="all, delete-orphan"
)
class PhotoTag(Base):
"""Photo-Tag linkage model."""
class PhotoTagLinkage(Base):
"""Photo-Tag linkage model - matches desktop schema exactly (was phototaglinkage)."""
__tablename__ = "photo_tags"
__tablename__ = "phototaglinkage"
photo_id = Column(Integer, ForeignKey("photos.id"), primary_key=True)
tag_id = Column(Integer, ForeignKey("tags.id"), primary_key=True)
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
linkage_id = Column(Integer, primary_key=True, autoincrement=True)
photo_id = Column(Integer, ForeignKey("photos.id"), nullable=False, index=True)
tag_id = Column(Integer, ForeignKey("tags.id"), nullable=False, index=True)
linkage_type = Column(
Integer, default=0, nullable=False,
server_default="0"
)
created_date = Column(DateTime, default=datetime.utcnow, nullable=False)
photo = relationship("Photo", back_populates="photo_tags")
tag = relationship("Tag", back_populates="photo_tags")
__table_args__ = (
UniqueConstraint("photo_id", "tag_id", name="uq_photo_tag"),
CheckConstraint("linkage_type IN (0, 1)", name="ck_linkage_type"),
Index("idx_photo_tags_tag", "tag_id"),
Index("idx_photo_tags_photo", "photo_id"),
)

View File

@ -2,6 +2,7 @@
from __future__ import annotations
import json
import os
import tempfile
import time
@ -10,6 +11,7 @@ from typing import Callable, Optional, Tuple
import numpy as np
from PIL import Image
from sqlalchemy.orm import Session
from sqlalchemy import and_
try:
from deepface import DeepFace
@ -28,6 +30,58 @@ from src.utils.exif_utils import EXIFOrientationHandler
from src.web.db.models import Face, Photo
def _pre_warm_deepface(
detector_backend: str,
model_name: str,
update_progress: Optional[Callable[[int, int, str, int, int], None]] = None,
) -> None:
"""Pre-warm DeepFace models by making a small initialization call.
This triggers model loading/downloading before processing actual photos,
so the delay happens during initialization progress, not during first photo.
Args:
detector_backend: DeepFace detector backend
model_name: DeepFace model name
update_progress: Optional progress callback
"""
if not DEEPFACE_AVAILABLE:
return
if update_progress:
update_progress(0, 0, "Loading DeepFace models (this may take a moment)...", 0, 0)
print(f"[DeepFace] Pre-warming models: detector={detector_backend}, model={model_name}")
start_time = time.time()
try:
# Create a minimal test image to trigger model loading
# This forces DeepFace to download/load models without processing a real photo
# Using a small grayscale image (100x100) is sufficient to trigger initialization
dummy_img = Image.new('RGB', (100, 100), color='black')
dummy_array = np.array(dummy_img)
# This will trigger model loading but won't find any faces (which is fine)
# We use enforce_detection=False to avoid errors when no faces are found
DeepFace.represent(
img_path=dummy_array,
model_name=model_name,
detector_backend=detector_backend,
enforce_detection=False, # Don't fail if no faces
align=DEEPFACE_ALIGN_FACES,
)
elapsed = time.time() - start_time
print(f"[DeepFace] Models loaded in {elapsed:.2f}s")
if update_progress:
update_progress(0, 0, f"DeepFace models ready ({elapsed:.1f}s)", 0, 0)
except Exception as e:
# If pre-warming fails, models will just load on first real photo
elapsed = time.time() - start_time
print(f"[DeepFace] Pre-warming completed (with warnings) in {elapsed:.2f}s: {e}")
# Don't raise - let it load on first photo instead
def calculate_face_quality_score(
image_np: np.ndarray,
face_location: dict,
@ -36,7 +90,8 @@ def calculate_face_quality_score(
) -> int:
"""Calculate face quality score (0-100).
Simplified quality calculation based on face size and position.
This matches the desktop version logic exactly from src/core/face_processing.py _calculate_face_quality_score()
Returns 0-100 (will be converted to 0.0-1.0 for database storage).
Args:
image_np: Image as numpy array
@ -47,36 +102,82 @@ def calculate_face_quality_score(
Returns:
Quality score from 0-100
"""
x = face_location.get('x', 0)
y = face_location.get('y', 0)
w = face_location.get('w', 0)
h = face_location.get('h', 0)
if w == 0 or h == 0:
return 0
# Face size as percentage of image
face_area = w * h
image_area = image_width * image_height
size_ratio = face_area / image_area if image_area > 0 else 0
# Position score (center is better)
center_x = image_width / 2
center_y = image_height / 2
face_center_x = x + w / 2
face_center_y = y + h / 2
distance_from_center = np.sqrt(
(face_center_x - center_x) ** 2 + (face_center_y - center_y) ** 2
)
max_distance = np.sqrt(center_x ** 2 + center_y ** 2)
position_score = 1.0 - (distance_from_center / max_distance) if max_distance > 0 else 0.5
# Combine size and position (size weighted 70%, position 30%)
quality = (size_ratio * 70) + (position_score * 30)
# Clamp to 0-100
return int(np.clip(quality * 100, 0, 100))
try:
# DeepFace format: {x, y, w, h}
x = face_location.get('x', 0)
y = face_location.get('y', 0)
w = face_location.get('w', 0)
h = face_location.get('h', 0)
face_height = h
face_width = w
left = x
right = x + w
top = y
bottom = y + h
# Basic size check - faces too small get lower scores
min_face_size = 50
size_score = min(1.0, (face_height * face_width) / (min_face_size * min_face_size))
# Extract face region
face_region = image_np[top:bottom, left:right]
if face_region.size == 0:
return 0
# Convert to grayscale for analysis
if len(face_region.shape) == 3:
gray_face = np.mean(face_region, axis=2)
else:
gray_face = face_region
# Calculate sharpness (Laplacian variance)
# Match desktop version exactly (including the bug for consistency)
# Desktop calculates var of kernel array itself, not the convolved result
laplacian_var = np.var(np.array([[0, -1, 0], [-1, 4, -1], [0, -1, 0]]).astype(np.float32))
if laplacian_var > 0:
sharpness = np.var(np.array([[0, -1, 0], [-1, 4, -1], [0, -1, 0]]).astype(np.float32))
else:
sharpness = 0.0
sharpness_score = min(1.0, sharpness / 1000.0) # Normalize sharpness
# Calculate brightness and contrast
mean_brightness = np.mean(gray_face)
brightness_score = 1.0 - abs(mean_brightness - 128) / 128.0 # Prefer middle brightness
contrast = np.std(gray_face)
contrast_score = min(1.0, contrast / 64.0) # Prefer good contrast
# Calculate aspect ratio (faces should be roughly square)
aspect_ratio = face_width / face_height if face_height > 0 else 1.0
aspect_score = 1.0 - abs(aspect_ratio - 1.0) # Prefer square faces
# Calculate position in image (centered faces are better)
img_height, img_width = image_np.shape[:2]
center_x = (left + right) / 2
center_y = (top + bottom) / 2
position_x_score = 1.0 - abs(center_x - img_width / 2) / (img_width / 2)
position_y_score = 1.0 - abs(center_y - img_height / 2) / (img_height / 2)
position_score = (position_x_score + position_y_score) / 2.0
# Weighted combination of all factors (matches desktop exactly)
quality_score = (
size_score * 0.25 +
sharpness_score * 0.25 +
brightness_score * 0.15 +
contrast_score * 0.15 +
aspect_score * 0.10 +
position_score * 0.10
)
# Desktop returns 0.0-1.0, we need 0-100 for database
quality_score = max(0.0, min(1.0, quality_score))
return int(quality_score * 100)
except Exception as e:
print(f"[FaceService] ⚠️ Error calculating face quality score: {e}")
# Return a default quality score on error
return 50
def is_valid_face_detection(
@ -88,7 +189,7 @@ def is_valid_face_detection(
"""Check if face detection meets minimum criteria.
Args:
confidence: Face detection confidence score
confidence: Face detection confidence score (0-1 range)
face_location: Face location dict with x, y, w, h
image_width: Image width
image_height: Image height
@ -96,28 +197,79 @@ def is_valid_face_detection(
Returns:
True if face is valid, False otherwise
"""
x = face_location.get('x', 0)
y = face_location.get('y', 0)
w = face_location.get('w', 0)
h = face_location.get('h', 0)
is_valid, _ = is_valid_face_detection_with_reason(
confidence, face_location, image_width, image_height
)
return is_valid
def is_valid_face_detection_with_reason(
confidence: float,
face_location: dict,
image_width: int,
image_height: int,
) -> Tuple[bool, str]:
"""Check if face detection meets minimum criteria and return reason if invalid.
# Check minimum confidence
if confidence < MIN_FACE_CONFIDENCE:
return False
This matches the desktop version logic EXACTLY from src/core/face_processing.py _is_valid_face_detection()
# Check minimum size
if w < MIN_FACE_SIZE or h < MIN_FACE_SIZE:
return False
# Check maximum size (to avoid false positives that span entire image)
if w > MAX_FACE_SIZE or h > MAX_FACE_SIZE:
return False
# Check bounds
if x < 0 or y < 0 or (x + w) > image_width or (y + h) > image_height:
return False
return True
Args:
confidence: Face detection confidence score (0-1 range from DeepFace)
face_location: Face location dict with x, y, w, h
image_width: Image width (unused but kept for compatibility)
image_height: Image height (unused but kept for compatibility)
Returns:
Tuple of (is_valid: bool, reason: str)
reason is empty string if valid, otherwise describes why it failed
"""
try:
# Desktop version uses face_confidence directly (0-1 range from DeepFace)
# No normalization needed if DeepFace returns 0-1 range
face_confidence = confidence
# Check confidence threshold - be more strict
if face_confidence < MIN_FACE_CONFIDENCE:
return False, f"confidence too low (got {face_confidence:.3f}, need >= {MIN_FACE_CONFIDENCE})"
# Check face size
width = face_location.get('w', 0)
height = face_location.get('h', 0)
# Too small faces are likely false positives (balloons, decorations, etc.)
if width < MIN_FACE_SIZE or height < MIN_FACE_SIZE:
return False, f"size too small (got {width}x{height}, need >= {MIN_FACE_SIZE}x{MIN_FACE_SIZE})"
# Too large faces might be full-image false positives
if width > MAX_FACE_SIZE or height > MAX_FACE_SIZE:
return False, f"size too large (got {width}x{height}, need <= {MAX_FACE_SIZE}x{MAX_FACE_SIZE})"
# Check aspect ratio - faces should be roughly square (not too wide/tall)
aspect_ratio = width / height if height > 0 else 1.0
if aspect_ratio < 0.4 or aspect_ratio > 2.5: # More strict aspect ratio (was 0.3-3.0)
return False, f"aspect ratio out of range (got {aspect_ratio:.2f}, need 0.4-2.5)"
# Additional filtering for very small faces with low confidence
# Small faces need higher confidence to be accepted
face_area = width * height
if face_area < 6400: # Less than 80x80 pixels (lowered from 100x100)
if face_confidence < 0.6: # Require 60% confidence for small faces (lowered from 80%)
return False, f"small face needs higher confidence (area={face_area}, confidence={face_confidence:.3f}, need >= 0.6)"
# Filter out faces that are too close to image edges (often false positives)
x = face_location.get('x', 0)
y = face_location.get('y', 0)
# If face is very close to edges, require higher confidence
if x < 10 or y < 10: # Within 10 pixels of top/left edge
if face_confidence < 0.65: # Require 65% confidence for edge faces (lowered from 85%)
return False, f"edge face needs higher confidence (x={x}, y={y}, confidence={face_confidence:.3f}, need >= 0.65)"
return True, ""
except Exception as e:
# Desktop version defaults to accepting on error
print(f"[FaceService] ⚠️ Error validating face detection: {e}")
return True, ""
def process_photo_faces(
@ -146,16 +298,12 @@ def process_photo_faces(
if not os.path.exists(photo_path):
return 0, 0
# Check if photo already has faces processed with same detector/model
existing_faces = db.query(Face).filter(
Face.photo_id == photo.id,
Face.detector == detector_backend,
Face.model == model_name,
).count()
if existing_faces > 0:
# Already processed with this configuration
return existing_faces, existing_faces
# Skip if already processed (desktop parity)
try:
if getattr(photo, 'processed', False):
return 0, 0
except Exception:
pass
try:
# Get EXIF orientation
@ -201,6 +349,13 @@ def process_photo_faces(
pass
if not results:
# Mark photo as processed even if no faces found (desktop parity)
try:
photo.processed = True
db.add(photo)
db.commit()
except Exception:
db.rollback()
return 0, 0
# Load image for quality calculation
@ -208,50 +363,121 @@ def process_photo_faces(
image_np = np.array(image)
image_width, image_height = image.size
# Count total faces from DeepFace
faces_detected = len(results)
faces_stored = 0
validation_failures = {} # Track failures by reason type
for result in results:
facial_area = result.get('facial_area', {})
face_confidence = result.get('face_confidence', 0.0)
embedding = np.array(result['embedding'])
print(f"[FaceService] Processing {faces_detected} faces from DeepFace for {photo.filename} "
f"(image size: {image_width}x{image_height})")
for idx, result in enumerate(results):
# Debug: Print full result to see what DeepFace returns
if idx == 0:
print(f"[FaceService] Debug - DeepFace result keys: {result.keys()}")
print(f"[FaceService] Debug - Sample result structure: {list(result.keys())}")
# Convert to location format
facial_area = result.get('facial_area', {})
# Try multiple possible confidence fields (matching desktop version)
# Desktop uses: result.get('face_confidence', 0.0)
face_confidence = result.get('face_confidence', 0.0)
# If confidence is 0.0, DeepFace might not provide it for this detector
# Some detectors don't return confidence - in that case, use a default
# Default to 0.5 (medium confidence) if missing, so faces aren't automatically rejected
if face_confidence == 0.0:
# Try alternative fields
face_confidence = result.get('confidence',
result.get('detection_confidence',
facial_area.get('confidence', 0.5))) # Default to 0.5 if completely missing
# Debug first face to see what DeepFace returns
if idx == 0:
print(f"[FaceService] Debug - DeepFace result keys: {list(result.keys())}")
if facial_area:
print(f"[FaceService] Debug - facial_area keys: {list(facial_area.keys())}")
print(f"[FaceService] Debug - facial_area content: {facial_area}")
print(f"[FaceService] Debug - face_confidence value: {face_confidence}")
print(f"[FaceService] Debug - result['face_confidence'] exists: {'face_confidence' in result}")
encoding = np.array(result['embedding'])
# Convert to location format (JSON string like desktop version)
location = {
'x': facial_area.get('x', 0),
'y': facial_area.get('y', 0),
'w': facial_area.get('w', 0),
'h': facial_area.get('h', 0),
}
location_str = json.dumps(location)
# Validate face detection
if not is_valid_face_detection(face_confidence, location, image_width, image_height):
continue
# Calculate quality score
quality_score = calculate_face_quality_score(
image_np, location, image_width, image_height
# Validate face detection with detailed error reporting
# Match desktop version: pass confidence as-is, validation function handles normalization
is_valid, reason = is_valid_face_detection_with_reason(
face_confidence, location, image_width, image_height
)
# Store face in database
if not is_valid:
# Extract failure type from reason (first word before colon)
failure_type = reason.split(':')[0].strip() if ':' in reason else reason
validation_failures[failure_type] = validation_failures.get(failure_type, 0) + 1
print(f"[FaceService] Face {idx+1}/{faces_detected} in {photo.filename} failed validation: "
f"{reason} - confidence={face_confidence:.3f}, "
f"location={location}, size={location['w']}x{location['h']}")
continue
print(f"[FaceService] Face {idx+1}/{faces_detected} in {photo.filename} passed validation: "
f"confidence={face_confidence:.3f}, size={location['w']}x{location['h']}")
# Calculate quality score (0.0-1.0 range for desktop compatibility)
quality_score_int = calculate_face_quality_score(
image_np, location, image_width, image_height
)
# Convert from 0-100 to 0.0-1.0 for database (desktop stores REAL)
quality_score = quality_score_int / 100.0
# Store face in database - match desktop schema exactly
# Desktop: confidence REAL DEFAULT 0.0 (legacy), face_confidence REAL (actual)
# Desktop: quality_score REAL DEFAULT 0.0 (0.0-1.0 range)
face = Face(
photo_id=photo.id,
person_id=None,
bbox_x=location['x'],
bbox_y=location['y'],
bbox_w=location['w'],
bbox_h=location['h'],
embedding=embedding.tobytes(),
confidence=int(face_confidence * 100) if face_confidence <= 1.0 else int(face_confidence),
quality=quality_score,
model=model_name,
detector=detector_backend,
encoding=encoding.tobytes(),
location=location_str,
confidence=0.0, # Legacy field (desktop keeps at 0.0)
quality_score=quality_score, # REAL in 0.0-1.0 range
is_primary_encoding=False,
detector_backend=detector_backend,
model_name=model_name,
face_confidence=face_confidence, # REAL in 0.0-1.0 range
exif_orientation=exif_orientation,
)
db.add(face)
faces_stored += 1
db.commit()
# Mark photo as processed after handling faces (desktop parity)
try:
photo.processed = True
db.add(photo)
db.commit()
except Exception:
db.rollback()
# Log summary
if faces_stored < faces_detected:
print(f"[FaceService] Summary for {photo.filename}: "
f"{faces_detected} faces detected, {faces_stored} faces stored, "
f"{faces_detected - faces_stored} failed validation")
for reason, count in validation_failures.items():
if count > 0:
print(f"[FaceService] - {reason}: {count}")
else:
print(f"[FaceService] Summary for {photo.filename}: "
f"{faces_detected} faces detected, {faces_stored} faces stored")
return faces_detected, faces_stored
except Exception as e:
@ -279,38 +505,26 @@ def process_unprocessed_photos(
Tuple of (photos_processed, total_faces_detected, total_faces_stored)
"""
print(f"[FaceService] Starting face processing: detector={detector_backend}, model={model_name}, batch_size={batch_size}")
overall_start = time.time()
# Update progress - querying photos
# Update progress - querying unprocessed photos
if update_progress:
update_progress(0, 0, "Querying photos from database...", 0, 0)
batch_msg = f"Finding up to {batch_size} photos" if batch_size else "Finding photos"
update_progress(0, 0, f"{batch_msg} that need processing...", 0, 0)
# Get all photos
all_photos = db.query(Photo).all()
print(f"[FaceService] Found {len(all_photos)} total photos in database")
# Update progress - filtering photos
if update_progress:
update_progress(0, len(all_photos), "Checking which photos need processing...", 0, 0)
# Filter for photos that need processing (no faces with current detector/model)
unprocessed_photos = []
for idx, photo in enumerate(all_photos, 1):
# Check if photo has faces with current detector/model
existing_face = db.query(Face).filter(
Face.photo_id == photo.id,
Face.detector == detector_backend,
Face.model == model_name,
).first()
if existing_face is None:
unprocessed_photos.append(photo)
# Update progress every 10 photos while filtering
if update_progress and idx % 10 == 0:
update_progress(0, len(all_photos), f"Checking photos... ({idx}/{len(all_photos)})", 0, 0)
# Desktop parity: find photos that are not yet processed
query_start = time.time()
unprocessed_query = db.query(Photo).filter(getattr(Photo, 'processed') == False) # noqa: E712
# Apply batch size limit BEFORE executing query to avoid loading unnecessary photos
# When batch_size is set, only that many photos are fetched from the database
if batch_size:
unprocessed_photos = unprocessed_photos[:batch_size]
unprocessed_query = unprocessed_query.limit(batch_size)
# Execute query - only loads batch_size photos if limit was set
unprocessed_photos = unprocessed_query.all()
query_time = time.time() - query_start
print(f"[FaceService] Query completed in {query_time:.2f}s")
total = len(unprocessed_photos)
print(f"[FaceService] Found {total} unprocessed photos")
@ -344,17 +558,19 @@ def process_unprocessed_photos(
pass
return False
# Update progress - initializing DeepFace (this may take time on first run)
if update_progress:
update_progress(0, total, "Initializing DeepFace models (this may take a moment on first run)...", 0, 0)
# Pre-warm DeepFace models BEFORE processing photos
# This moves the model loading delay to initialization phase (with progress updates)
# instead of causing delay during first photo processing
if total > 0:
print(f"[FaceService] Pre-warming DeepFace models...")
_pre_warm_deepface(detector_backend, model_name, update_progress)
# Check cancellation before starting
# Check cancellation after pre-warming
if check_cancelled():
print("[FaceService] Job cancelled before processing started")
return photos_processed, total_faces_detected, total_faces_stored
# Process first photo - this will trigger DeepFace initialization
# Update progress before starting actual processing
# Update progress - models are ready, starting photo processing
if update_progress and total > 0:
update_progress(0, total, f"Starting face detection on {total} photos...", 0, 0)
@ -383,6 +599,11 @@ def process_unprocessed_photos(
total_faces_stored,
)
# Time the first photo to see if there's still delay after pre-warming
if idx == 1:
first_photo_start = time.time()
print(f"[FaceService] Starting first photo processing...")
faces_detected, faces_stored = process_photo_faces(
db,
photo,
@ -394,6 +615,11 @@ def process_unprocessed_photos(
total_faces_stored += faces_stored
photos_processed += 1
# Log timing for first photo
if idx == 1:
first_photo_time = time.time() - first_photo_start
print(f"[FaceService] First photo completed in {first_photo_time:.2f}s")
if update_progress:
update_progress(
idx,

View File

@ -2,11 +2,9 @@
from __future__ import annotations
import hashlib
import mimetypes
import os
from pathlib import Path
from datetime import datetime
from datetime import datetime, date
from typing import Callable, Optional, Tuple
from PIL import Image
@ -16,17 +14,8 @@ from src.core.config import SUPPORTED_IMAGE_FORMATS
from src.web.db.models import Photo
def compute_checksum(file_path: str) -> str:
"""Compute SHA256 checksum of a file."""
sha256_hash = hashlib.sha256()
with open(file_path, "rb") as f:
for byte_block in iter(lambda: f.read(4096), b""):
sha256_hash.update(byte_block)
return sha256_hash.hexdigest()
def extract_exif_date(image_path: str) -> Optional[datetime]:
"""Extract date taken from photo EXIF data."""
def extract_exif_date(image_path: str) -> Optional[date]:
"""Extract date taken from photo EXIF data - returns Date (not DateTime) to match desktop schema."""
try:
with Image.open(image_path) as image:
exifdata = image.getexif()
@ -44,13 +33,13 @@ def extract_exif_date(image_path: str) -> Optional[datetime]:
if date_str:
# Parse EXIF date format (YYYY:MM:DD HH:MM:SS)
try:
return datetime.strptime(date_str, "%Y:%m:%d %H:%M:%S")
dt = datetime.strptime(date_str, "%Y:%m:%d %H:%M:%S")
return dt.date()
except ValueError:
# Try alternative format
try:
return datetime.strptime(
date_str, "%Y-%m-%d %H:%M:%S"
)
dt = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S")
return dt.date()
except ValueError:
continue
except Exception:
@ -59,17 +48,6 @@ def extract_exif_date(image_path: str) -> Optional[datetime]:
return None
def get_image_metadata(image_path: str) -> Tuple[Optional[int], Optional[int], Optional[str]]:
"""Get image dimensions and MIME type."""
try:
with Image.open(image_path) as image:
width, height = image.size
mime_type = mimetypes.guess_type(image_path)[0] or f"image/{image.format.lower() if image.format else 'unknown'}"
return width, height, mime_type
except Exception:
return None, None, None
def find_photos_in_folder(folder_path: str, recursive: bool = True) -> list[str]:
"""Find all photo files in a folder."""
folder_path = os.path.abspath(folder_path)
@ -112,33 +90,16 @@ def import_photo_from_path(
if existing:
return existing, False
# Compute checksum
try:
checksum = compute_checksum(photo_path)
# Check if photo with same checksum exists
existing_by_checksum = (
db.query(Photo).filter(Photo.checksum == checksum).first()
if checksum
else None
)
if existing_by_checksum:
return existing_by_checksum, False
except Exception:
checksum = None
# Extract metadata
# Extract date taken (returns Date to match desktop schema)
date_taken = extract_exif_date(photo_path)
width, height, mime_type = get_image_metadata(photo_path)
# Create new photo record
# Create new photo record - match desktop schema exactly
# Desktop schema: id, path, filename, date_added, date_taken (DATE), processed
photo = Photo(
path=photo_path,
filename=filename,
checksum=checksum,
date_taken=date_taken,
width=width,
height=height,
mime_type=mime_type,
processed=False,
)
db.add(photo)