feat: Add DeepFace model weights download functionality to installation script

This commit introduces a new function in the `install.sh` script to download DeepFace model weights, enhancing the setup process for users. The function checks for the presence of DeepFace and attempts to download the ArcFace model weights, providing fallback options and user-friendly messages for manual download if automatic attempts fail. This improvement streamlines the initial configuration for facial recognition capabilities in the application.
This commit is contained in:
Tanya 2026-01-02 14:16:08 -05:00
parent 32be5c7f23
commit e624d203d5
7 changed files with 308 additions and 72 deletions

View File

@ -54,14 +54,10 @@ export const photosApi = {
formData.append('files', file)
})
// Don't set Content-Type header manually - let the browser set it with boundary
const { data } = await apiClient.post<UploadResponse>(
'/api/v1/photos/import/upload',
formData,
{
headers: {
'Content-Type': 'multipart/form-data',
},
}
formData
)
return data
},

View File

@ -275,7 +275,8 @@ def review_pending_photos(
now = datetime.utcnow()
# Base directories
upload_base_dir = Path("/mnt/db-server-uploads")
# Try to get upload directory from environment, fallback to hardcoded path
upload_base_dir = Path(os.getenv("UPLOAD_DIR") or os.getenv("PENDING_PHOTOS_DIR") or "/mnt/db-server-uploads")
main_storage_dir = Path(PHOTO_STORAGE_DIR)
main_storage_dir.mkdir(parents=True, exist_ok=True)
@ -306,18 +307,41 @@ def review_pending_photos(
# Try to find the file - handle both absolute and relative paths
if os.path.isabs(db_file_path):
# Use absolute path directly
source_path = Path(db_file_path)
else:
# Try relative to upload base directory
source_path = upload_base_dir / db_file_path
# If file doesn't exist, try with filename
# If file doesn't exist, try alternative locations
if not source_path.exists():
# Try with just the filename in upload_base_dir
source_path = upload_base_dir / row.filename
if not source_path.exists() and row.original_filename:
# Try with original filename
source_path = upload_base_dir / row.original_filename
# If still not found, try looking in user subdirectories
if not source_path.exists() and upload_base_dir.exists():
# Check if file_path contains user ID subdirectory
# file_path format might be: {userId}/{filename} or full path
try:
for user_id_dir in upload_base_dir.iterdir():
if user_id_dir.is_dir():
potential_path = user_id_dir / row.filename
if potential_path.exists():
source_path = potential_path
break
if row.original_filename:
potential_path = user_id_dir / row.original_filename
if potential_path.exists():
source_path = potential_path
break
except (PermissionError, OSError) as e:
# Can't read directory, skip this search
pass
if not source_path.exists():
errors.append(f"Photo file not found for pending photo {decision.id}: {source_path}")
errors.append(f"Photo file not found for pending photo {decision.id}. Tried: {db_file_path}, {upload_base_dir / row.filename}, {upload_base_dir / row.original_filename if row.original_filename else 'N/A'}")
continue
# Calculate file hash and check for duplicates BEFORE moving file
@ -328,9 +352,18 @@ def review_pending_photos(
continue
# Check if photo with same hash already exists in main database
existing_photo = main_db.execute(text("""
SELECT id, path FROM photos WHERE file_hash = :file_hash
"""), {"file_hash": file_hash}).fetchone()
# Handle case where file_hash column might not exist or be NULL for old photos
try:
existing_photo = main_db.execute(text("""
SELECT id, path FROM photos WHERE file_hash = :file_hash AND file_hash IS NOT NULL
"""), {"file_hash": file_hash}).fetchone()
except Exception as e:
# If file_hash column doesn't exist, skip duplicate check
# This can happen if database schema is outdated
if "no such column" in str(e).lower() or "file_hash" in str(e).lower():
existing_photo = None
else:
raise
if existing_photo:
# Photo already exists - mark as duplicate and skip import

View File

@ -40,7 +40,7 @@ class Photo(Base):
date_added = Column(DateTime, default=datetime.utcnow, nullable=False)
date_taken = Column(Date, nullable=True, index=True)
processed = Column(Boolean, default=False, nullable=False, index=True)
file_hash = Column(Text, nullable=False, index=True)
file_hash = Column(Text, nullable=True, index=True) # Nullable to support existing photos without hashes
media_type = Column(Text, default="image", nullable=False, index=True) # "image" or "video"
faces = relationship("Face", back_populates="photo", cascade="all, delete-orphan")

View File

@ -8,7 +8,8 @@ from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
# Load environment variables from .env file if it exists
env_path = Path(__file__).parent.parent.parent.parent / ".env"
# Path: backend/db/session.py -> backend/db -> backend -> punimtag/ -> .env
env_path = Path(__file__).parent.parent.parent / ".env"
load_dotenv(dotenv_path=env_path)
@ -87,8 +88,16 @@ try:
**auth_pool_kwargs
)
AuthSessionLocal = sessionmaker(bind=auth_engine, autoflush=False, autocommit=False, future=True)
except ValueError:
except ValueError as e:
# DATABASE_URL_AUTH not set - auth database not available
print(f"[DB Session] ⚠️ Auth database not configured: {e}")
auth_engine = None
AuthSessionLocal = None
except Exception as e:
# Other errors (connection failures, etc.) - log but don't crash
import os
print(f"[DB Session] ⚠️ Failed to initialize auth database: {e}")
print(f"[DB Session] URL was: {os.getenv('DATABASE_URL_AUTH', 'not set')}")
auth_engine = None
AuthSessionLocal = None
@ -96,7 +105,11 @@ except ValueError:
def get_auth_db() -> Generator:
"""Yield a DB session for auth database request lifecycle."""
if AuthSessionLocal is None:
raise ValueError("Auth database not configured. Set DATABASE_URL_AUTH environment variable.")
from fastapi import HTTPException, status
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail="Auth database not configured. Please set DATABASE_URL_AUTH environment variable in the backend configuration."
)
db = AuthSessionLocal()
try:
yield db

View File

@ -66,15 +66,27 @@ def _pre_warm_deepface(
dummy_img = Image.new('RGB', (100, 100), color='black')
dummy_array = np.array(dummy_img)
# This will trigger model loading but won't find any faces (which is fine)
# We use enforce_detection=False to avoid errors when no faces are found
DeepFace.represent(
img_path=dummy_array,
model_name=model_name,
detector_backend=detector_backend,
enforce_detection=False, # Don't fail if no faces
align=DEEPFACE_ALIGN_FACES,
)
# Suppress stderr to prevent broken pipe errors from gdown
import sys
import contextlib
from io import StringIO
# Suppress stdout/stderr during model loading
with contextlib.redirect_stdout(StringIO()), open(os.devnull, 'w') as devnull:
old_stderr = sys.stderr
sys.stderr = devnull
try:
# This will trigger model loading but won't find any faces (which is fine)
# We use enforce_detection=False to avoid errors when no faces are found
DeepFace.represent(
img_path=dummy_array,
model_name=model_name,
detector_backend=detector_backend,
enforce_detection=False, # Don't fail if no faces
align=DEEPFACE_ALIGN_FACES,
)
finally:
sys.stderr = old_stderr
elapsed = time.time() - start_time
print(f"[DeepFace] Models loaded in {elapsed:.2f}s")
@ -83,7 +95,10 @@ def _pre_warm_deepface(
except Exception as e:
# If pre-warming fails, models will just load on first real photo
elapsed = time.time() - start_time
print(f"[DeepFace] Pre-warming completed (with warnings) in {elapsed:.2f}s: {e}")
try:
print(f"[DeepFace] Pre-warming completed (with warnings) in {elapsed:.2f}s: {e}")
except (BrokenPipeError, OSError):
pass
# Don't raise - let it load on first photo instead
@ -302,26 +317,58 @@ def process_photo_faces(
if not DEEPFACE_AVAILABLE:
raise RuntimeError("DeepFace not available")
photo_path = photo.path
if not os.path.exists(photo_path):
return 0, 0
# Suppress stderr globally for this function to prevent broken pipe errors
# This must happen BEFORE any DeepFace/RetinaFace initialization
import sys
from io import StringIO
import contextlib
# Save original stderr and redirect to devnull for the entire function
# This prevents gdown (used by RetinaFace/DeepFace) from causing broken pipe errors
# Note: os is imported at module level, so it's available here
original_stderr = sys.stderr
devnull_path = os.devnull # Capture os.devnull value to avoid closure issues
devnull_fd = open(devnull_path, 'w')
sys.stderr = devnull_fd
def _print_with_stderr(*args, **kwargs):
"""Helper to temporarily restore stderr for print statements"""
sys.stderr = original_stderr
try:
print(*args, **kwargs)
finally:
sys.stderr = devnull_fd
# Skip videos (videos are not processed for face detection)
try:
media_type = getattr(photo, 'media_type', 'image')
if media_type == 'video':
photo_path = photo.path
if not os.path.exists(photo_path):
sys.stderr = original_stderr
if devnull_fd:
devnull_fd.close()
return 0, 0
except Exception:
pass
# Skip if already processed (desktop parity)
try:
if getattr(photo, 'processed', False):
return 0, 0
except Exception:
pass
try:
# Skip videos (videos are not processed for face detection)
try:
media_type = getattr(photo, 'media_type', 'image')
if media_type == 'video':
sys.stderr = original_stderr
if devnull_fd:
devnull_fd.close()
return 0, 0
except Exception:
pass
# Skip if already processed (desktop parity)
try:
if getattr(photo, 'processed', False):
sys.stderr = original_stderr
if devnull_fd:
devnull_fd.close()
return 0, 0
except Exception:
pass
# Get EXIF orientation
exif_orientation = EXIFOrientationHandler.get_exif_orientation(photo_path)
@ -349,25 +396,40 @@ def process_photo_faces(
try:
pose_faces = pose_detector.detect_pose_faces(face_detection_path)
if pose_faces:
print(f"[FaceService] Pose detection for {photo.filename}: found {len(pose_faces)} faces with pose data")
_print_with_stderr(f"[FaceService] Pose detection for {photo.filename}: found {len(pose_faces)} faces with pose data")
except Exception as e:
print(f"[FaceService] ⚠️ Pose detection failed for {photo.filename}: {e}, using defaults")
_print_with_stderr(f"[FaceService] ⚠️ Pose detection failed for {photo.filename}: {e}, using defaults")
pose_faces = []
elif RETINAFACE_AVAILABLE:
# Fallback: create detector if not provided (backward compatibility)
# Note: stderr is already suppressed above, so PoseDetector initialization won't cause broken pipe
try:
pose_detector_local = PoseDetector()
pose_faces = pose_detector_local.detect_pose_faces(face_detection_path)
if pose_faces:
print(f"[FaceService] Pose detection for {photo.filename}: found {len(pose_faces)} faces with pose data")
_print_with_stderr(f"[FaceService] Pose detection for {photo.filename}: found {len(pose_faces)} faces with pose data")
except Exception as e:
print(f"[FaceService] ⚠️ Pose detection failed for {photo.filename}: {e}, using defaults")
_print_with_stderr(f"[FaceService] ⚠️ Pose detection failed for {photo.filename}: {e}, using defaults")
pose_faces = []
# Step 2: Use DeepFace for encoding generation
# Note: stderr is already suppressed, so DeepFace won't cause broken pipe errors
_print_with_stderr(f"[DeepFace] Processing {photo.filename} with {detector_backend}/{model_name}...")
# Check if model weights exist before calling DeepFace
# This prevents automatic download attempts that can cause broken pipe errors
weights_path = os.path.expanduser(f"~/.deepface/weights/arcface_weights.h5")
if model_name == "ArcFace" and not os.path.exists(weights_path):
sys.stderr = original_stderr
raise Exception(
f"ArcFace model weights not found at {weights_path}. "
f"Please download manually: "
f"wget https://github.com/serengil/deepface_models/releases/download/v1.0/arcface_weights.h5 "
f"-O {weights_path}"
)
# DeepFace call - stderr is already suppressed
try:
# Step 2: Use DeepFace for encoding generation
# Note: First call may take time to download/initialize models
print(f"[DeepFace] Processing {photo.filename} with {detector_backend}/{model_name}...")
results = DeepFace.represent(
img_path=face_detection_path,
model_name=model_name,
@ -375,9 +437,20 @@ def process_photo_faces(
enforce_detection=DEEPFACE_ENFORCE_DETECTION,
align=DEEPFACE_ALIGN_FACES,
)
print(f"[DeepFace] Completed {photo.filename}")
_print_with_stderr(f"[DeepFace] Completed {photo.filename}")
except Exception as e:
print(f"[DeepFace] Error processing {photo.filename}: {e}")
error_msg = str(e)
try:
_print_with_stderr(f"[DeepFace] Error processing {photo.filename}: {error_msg}")
except (BrokenPipeError, OSError):
pass # Ignore broken pipe when printing
# If it's a model download error, provide helpful message
if "downloading" in error_msg.lower() or "arcface_weights" in error_msg.lower():
raise Exception(
f"Failed to load DeepFace model '{model_name}'. "
f"Please download the model weights manually. Error: {error_msg}"
)
raise
finally:
# Clean up temporary file if created
@ -407,7 +480,7 @@ def process_photo_faces(
faces_stored = 0
validation_failures = {} # Track failures by reason type
print(f"[FaceService] Processing {faces_detected} faces from DeepFace for {photo.filename} "
_print_with_stderr(f"[FaceService] Processing {faces_detected} faces from DeepFace for {photo.filename} "
f"(image size: {image_width}x{image_height})")
# Track which pose_faces have been used to prevent duplicate matches
@ -416,8 +489,8 @@ def process_photo_faces(
for idx, result in enumerate(results):
# Debug: Print full result to see what DeepFace returns
if idx == 0:
print(f"[FaceService] Debug - DeepFace result keys: {result.keys()}")
print(f"[FaceService] Debug - Sample result structure: {list(result.keys())}")
_print_with_stderr(f"[FaceService] Debug - DeepFace result keys: {result.keys()}")
_print_with_stderr(f"[FaceService] Debug - Sample result structure: {list(result.keys())}")
facial_area = result.get('facial_area', {})
# Try multiple possible confidence fields (matching desktop version)
@ -435,12 +508,12 @@ def process_photo_faces(
# Debug first face to see what DeepFace returns
if idx == 0:
print(f"[FaceService] Debug - DeepFace result keys: {list(result.keys())}")
_print_with_stderr(f"[FaceService] Debug - DeepFace result keys: {list(result.keys())}")
if facial_area:
print(f"[FaceService] Debug - facial_area keys: {list(facial_area.keys())}")
print(f"[FaceService] Debug - facial_area content: {facial_area}")
print(f"[FaceService] Debug - face_confidence value: {face_confidence}")
print(f"[FaceService] Debug - result['face_confidence'] exists: {'face_confidence' in result}")
_print_with_stderr(f"[FaceService] Debug - facial_area keys: {list(facial_area.keys())}")
_print_with_stderr(f"[FaceService] Debug - facial_area content: {facial_area}")
_print_with_stderr(f"[FaceService] Debug - face_confidence value: {face_confidence}")
_print_with_stderr(f"[FaceService] Debug - result['face_confidence'] exists: {'face_confidence' in result}")
encoding = np.array(result['embedding'])
@ -463,12 +536,12 @@ def process_photo_faces(
# Extract failure type from reason (first word before colon)
failure_type = reason.split(':')[0].strip() if ':' in reason else reason
validation_failures[failure_type] = validation_failures.get(failure_type, 0) + 1
print(f"[FaceService] Face {idx+1}/{faces_detected} in {photo.filename} failed validation: "
_print_with_stderr(f"[FaceService] Face {idx+1}/{faces_detected} in {photo.filename} failed validation: "
f"{reason} - confidence={face_confidence:.3f}, "
f"location={location}, size={location['w']}x{location['h']}")
continue
print(f"[FaceService] Face {idx+1}/{faces_detected} in {photo.filename} passed validation: "
_print_with_stderr(f"[FaceService] Face {idx+1}/{faces_detected} in {photo.filename} passed validation: "
f"confidence={face_confidence:.3f}, size={location['w']}x{location['h']}")
# Calculate quality score (0.0-1.0 range for desktop compatibility)
@ -618,11 +691,11 @@ def process_photo_faces(
if face_width is not None:
profile_status = "PROFILE" if face_width < 25.0 else "FRONTAL"
yaw_str = f"{yaw_angle:.2f}°" if yaw_angle is not None else "None"
print(f"[FaceService] Face {idx+1}/{faces_detected} in {photo.filename}: "
_print_with_stderr(f"[FaceService] Face {idx+1}/{faces_detected} in {photo.filename}: "
f"face_width={face_width:.2f}px, pose_mode={pose_mode} ({profile_status}), yaw={yaw_str}")
else:
yaw_str = f"{yaw_angle:.2f}°" if yaw_angle is not None else "None"
print(f"[FaceService] Face {idx+1}/{faces_detected} in {photo.filename}: "
_print_with_stderr(f"[FaceService] Face {idx+1}/{faces_detected} in {photo.filename}: "
f"face_width=None, pose_mode={pose_mode}, yaw={yaw_str}")
# Store face in database - match desktop schema exactly
@ -662,14 +735,14 @@ def process_photo_faces(
# Log summary
if faces_stored < faces_detected:
print(f"[FaceService] Summary for {photo.filename}: "
_print_with_stderr(f"[FaceService] Summary for {photo.filename}: "
f"{faces_detected} faces detected, {faces_stored} faces stored, "
f"{faces_detected - faces_stored} failed validation")
for reason, count in validation_failures.items():
if count > 0:
print(f"[FaceService] - {reason}: {count}")
_print_with_stderr(f"[FaceService] - {reason}: {count}")
else:
print(f"[FaceService] Summary for {photo.filename}: "
_print_with_stderr(f"[FaceService] Summary for {photo.filename}: "
f"{faces_detected} faces detected, {faces_stored} faces stored")
return faces_detected, faces_stored
@ -677,6 +750,14 @@ def process_photo_faces(
except Exception as e:
db.rollback()
raise Exception(f"Error processing faces in {photo.filename}: {str(e)}")
finally:
# Always restore stderr
sys.stderr = original_stderr
if devnull_fd:
try:
devnull_fd.close()
except Exception:
pass
def _calculate_iou(box1: Dict, box2: Dict) -> float:
@ -1170,9 +1251,21 @@ def process_unprocessed_photos(
raise
except Exception as e:
# Log error but continue processing other photos
print(f"[FaceService] Error processing photo {photo.filename}: {e}")
import traceback
traceback.print_exc()
try:
print(f"[FaceService] Error processing photo {photo.filename}: {e}")
except (BrokenPipeError, OSError):
pass # Ignore broken pipe errors
# Try to print traceback, but don't fail if stdout is closed
try:
import traceback
traceback.print_exc()
except (BrokenPipeError, OSError):
# If printing fails, at least log the error type
try:
print(f"[FaceService] Error type: {type(e).__name__}: {str(e)}")
except (BrokenPipeError, OSError):
pass
if update_progress:
try:
update_progress(

View File

@ -235,8 +235,20 @@ def process_faces_task(
except Exception as e:
# Log error and update job metadata
error_msg = f"Task failed: {str(e)}"
print(f"[Task] ❌ {error_msg}")
traceback.print_exc()
try:
print(f"[Task] ❌ {error_msg}")
except (BrokenPipeError, OSError):
pass # Ignore broken pipe errors when printing
# Try to print traceback, but don't fail if stdout is closed
try:
traceback.print_exc()
except (BrokenPipeError, OSError):
# If printing fails, at least log the error type
try:
print(f"[Task] Error type: {type(e).__name__}: {str(e)}")
except (BrokenPipeError, OSError):
pass
if job:
try:

View File

@ -209,6 +209,91 @@ install_python_dependencies() {
echo -e "${GREEN} ✅ Python dependencies installed${NC}"
}
# Download DeepFace model weights (optional, but recommended)
download_deepface_models() {
echo -e "${BLUE}🤖 Downloading DeepFace model weights (optional)...${NC}"
source venv/bin/activate
# Check if deepface is installed
if ! python3 -c "import deepface" 2>/dev/null; then
echo -e "${YELLOW} ⚠️ DeepFace not installed, skipping model download${NC}"
return 0
fi
# Create weights directory
mkdir -p ~/.deepface/weights
# Check if ArcFace weights already exist
if [ -f ~/.deepface/weights/arcface_weights.h5 ]; then
echo -e "${GREEN} ✅ ArcFace model weights already exist${NC}"
return 0
fi
echo -e "${BLUE} Downloading ArcFace model weights (~131MB, this may take a few minutes)...${NC}"
# Try to download using wget (more reliable than Python download in worker)
if command_exists wget; then
if wget -q --show-progress \
https://github.com/serengil/deepface_models/releases/download/v1.0/arcface_weights.h5 \
-O ~/.deepface/weights/arcface_weights.h5; then
echo -e "${GREEN} ✅ ArcFace model weights downloaded successfully${NC}"
return 0
fi
fi
# Fallback: try with Python (may fail in some environments)
echo -e "${YELLOW} ⚠️ wget failed, trying Python download...${NC}"
if python3 << 'PYTHON'
import os
import sys
from pathlib import Path
try:
from deepface import DeepFace
import numpy as np
from PIL import Image
# Create a dummy image to trigger model download
dummy_img = Image.new('RGB', (100, 100), color='black')
dummy_array = np.array(dummy_img)
# This will trigger model download
try:
DeepFace.represent(
img_path=dummy_array,
model_name="ArcFace",
detector_backend="retinaface",
enforce_detection=False,
align=True,
)
print("✅ Model weights downloaded successfully")
sys.exit(0)
except Exception as e:
if "arcface_weights" in str(e).lower():
print(f"⚠️ Download failed: {e}")
print(" You can download manually later or the worker will try again")
sys.exit(1)
# Other errors are OK (like no faces found)
print("✅ Model weights are available")
sys.exit(0)
except ImportError:
print("⚠️ DeepFace not available")
sys.exit(0)
except Exception as e:
print(f"⚠️ Error: {e}")
sys.exit(1)
PYTHON
then
echo -e "${GREEN} ✅ DeepFace models ready${NC}"
else
echo -e "${YELLOW} ⚠️ Automatic download failed. Models will be downloaded on first use.${NC}"
echo -e "${YELLOW} ⚠️ To download manually, run:${NC}"
echo -e "${BLUE} mkdir -p ~/.deepface/weights${NC}"
echo -e "${BLUE} wget https://github.com/serengil/deepface_models/releases/download/v1.0/arcface_weights.h5 -O ~/.deepface/weights/arcface_weights.h5${NC}"
fi
}
# Install frontend dependencies
install_frontend_dependencies() {
echo -e "${BLUE}📦 Installing frontend dependencies...${NC}"
@ -352,6 +437,10 @@ main() {
install_python_dependencies
echo ""
# Download DeepFace model weights (optional)
download_deepface_models
echo ""
# Install frontend dependencies
install_frontend_dependencies
echo ""