From e624d203d5f03db18be562c534f40fd1f1240ab3 Mon Sep 17 00:00:00 2001 From: Tanya Date: Fri, 2 Jan 2026 14:16:08 -0500 Subject: [PATCH] feat: Add DeepFace model weights download functionality to installation script This commit introduces a new function in the `install.sh` script to download DeepFace model weights, enhancing the setup process for users. The function checks for the presence of DeepFace and attempts to download the ArcFace model weights, providing fallback options and user-friendly messages for manual download if automatic attempts fail. This improvement streamlines the initial configuration for facial recognition capabilities in the application. --- admin-frontend/src/api/photos.ts | 8 +- backend/api/pending_photos.py | 45 ++++++- backend/db/models.py | 2 +- backend/db/session.py | 19 ++- backend/services/face_service.py | 201 ++++++++++++++++++++++--------- backend/services/tasks.py | 16 ++- install.sh | 89 ++++++++++++++ 7 files changed, 308 insertions(+), 72 deletions(-) diff --git a/admin-frontend/src/api/photos.ts b/admin-frontend/src/api/photos.ts index 7e5a15d..b8b7a6b 100644 --- a/admin-frontend/src/api/photos.ts +++ b/admin-frontend/src/api/photos.ts @@ -54,14 +54,10 @@ export const photosApi = { formData.append('files', file) }) + // Don't set Content-Type header manually - let the browser set it with boundary const { data } = await apiClient.post( '/api/v1/photos/import/upload', - formData, - { - headers: { - 'Content-Type': 'multipart/form-data', - }, - } + formData ) return data }, diff --git a/backend/api/pending_photos.py b/backend/api/pending_photos.py index 3c6d6f6..57238fe 100644 --- a/backend/api/pending_photos.py +++ b/backend/api/pending_photos.py @@ -275,7 +275,8 @@ def review_pending_photos( now = datetime.utcnow() # Base directories - upload_base_dir = Path("/mnt/db-server-uploads") + # Try to get upload directory from environment, fallback to hardcoded path + upload_base_dir = Path(os.getenv("UPLOAD_DIR") or os.getenv("PENDING_PHOTOS_DIR") or "/mnt/db-server-uploads") main_storage_dir = Path(PHOTO_STORAGE_DIR) main_storage_dir.mkdir(parents=True, exist_ok=True) @@ -306,18 +307,41 @@ def review_pending_photos( # Try to find the file - handle both absolute and relative paths if os.path.isabs(db_file_path): + # Use absolute path directly source_path = Path(db_file_path) else: + # Try relative to upload base directory source_path = upload_base_dir / db_file_path - # If file doesn't exist, try with filename + # If file doesn't exist, try alternative locations if not source_path.exists(): + # Try with just the filename in upload_base_dir source_path = upload_base_dir / row.filename if not source_path.exists() and row.original_filename: + # Try with original filename source_path = upload_base_dir / row.original_filename + # If still not found, try looking in user subdirectories + if not source_path.exists() and upload_base_dir.exists(): + # Check if file_path contains user ID subdirectory + # file_path format might be: {userId}/{filename} or full path + try: + for user_id_dir in upload_base_dir.iterdir(): + if user_id_dir.is_dir(): + potential_path = user_id_dir / row.filename + if potential_path.exists(): + source_path = potential_path + break + if row.original_filename: + potential_path = user_id_dir / row.original_filename + if potential_path.exists(): + source_path = potential_path + break + except (PermissionError, OSError) as e: + # Can't read directory, skip this search + pass if not source_path.exists(): - errors.append(f"Photo file not found for pending photo {decision.id}: {source_path}") + errors.append(f"Photo file not found for pending photo {decision.id}. Tried: {db_file_path}, {upload_base_dir / row.filename}, {upload_base_dir / row.original_filename if row.original_filename else 'N/A'}") continue # Calculate file hash and check for duplicates BEFORE moving file @@ -328,9 +352,18 @@ def review_pending_photos( continue # Check if photo with same hash already exists in main database - existing_photo = main_db.execute(text(""" - SELECT id, path FROM photos WHERE file_hash = :file_hash - """), {"file_hash": file_hash}).fetchone() + # Handle case where file_hash column might not exist or be NULL for old photos + try: + existing_photo = main_db.execute(text(""" + SELECT id, path FROM photos WHERE file_hash = :file_hash AND file_hash IS NOT NULL + """), {"file_hash": file_hash}).fetchone() + except Exception as e: + # If file_hash column doesn't exist, skip duplicate check + # This can happen if database schema is outdated + if "no such column" in str(e).lower() or "file_hash" in str(e).lower(): + existing_photo = None + else: + raise if existing_photo: # Photo already exists - mark as duplicate and skip import diff --git a/backend/db/models.py b/backend/db/models.py index a534f30..9376e00 100644 --- a/backend/db/models.py +++ b/backend/db/models.py @@ -40,7 +40,7 @@ class Photo(Base): date_added = Column(DateTime, default=datetime.utcnow, nullable=False) date_taken = Column(Date, nullable=True, index=True) processed = Column(Boolean, default=False, nullable=False, index=True) - file_hash = Column(Text, nullable=False, index=True) + file_hash = Column(Text, nullable=True, index=True) # Nullable to support existing photos without hashes media_type = Column(Text, default="image", nullable=False, index=True) # "image" or "video" faces = relationship("Face", back_populates="photo", cascade="all, delete-orphan") diff --git a/backend/db/session.py b/backend/db/session.py index c73522a..957b921 100644 --- a/backend/db/session.py +++ b/backend/db/session.py @@ -8,7 +8,8 @@ from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker # Load environment variables from .env file if it exists -env_path = Path(__file__).parent.parent.parent.parent / ".env" +# Path: backend/db/session.py -> backend/db -> backend -> punimtag/ -> .env +env_path = Path(__file__).parent.parent.parent / ".env" load_dotenv(dotenv_path=env_path) @@ -87,8 +88,16 @@ try: **auth_pool_kwargs ) AuthSessionLocal = sessionmaker(bind=auth_engine, autoflush=False, autocommit=False, future=True) -except ValueError: +except ValueError as e: # DATABASE_URL_AUTH not set - auth database not available + print(f"[DB Session] ⚠️ Auth database not configured: {e}") + auth_engine = None + AuthSessionLocal = None +except Exception as e: + # Other errors (connection failures, etc.) - log but don't crash + import os + print(f"[DB Session] ⚠️ Failed to initialize auth database: {e}") + print(f"[DB Session] URL was: {os.getenv('DATABASE_URL_AUTH', 'not set')}") auth_engine = None AuthSessionLocal = None @@ -96,7 +105,11 @@ except ValueError: def get_auth_db() -> Generator: """Yield a DB session for auth database request lifecycle.""" if AuthSessionLocal is None: - raise ValueError("Auth database not configured. Set DATABASE_URL_AUTH environment variable.") + from fastapi import HTTPException, status + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="Auth database not configured. Please set DATABASE_URL_AUTH environment variable in the backend configuration." + ) db = AuthSessionLocal() try: yield db diff --git a/backend/services/face_service.py b/backend/services/face_service.py index df5eb84..e53cef0 100644 --- a/backend/services/face_service.py +++ b/backend/services/face_service.py @@ -66,15 +66,27 @@ def _pre_warm_deepface( dummy_img = Image.new('RGB', (100, 100), color='black') dummy_array = np.array(dummy_img) - # This will trigger model loading but won't find any faces (which is fine) - # We use enforce_detection=False to avoid errors when no faces are found - DeepFace.represent( - img_path=dummy_array, - model_name=model_name, - detector_backend=detector_backend, - enforce_detection=False, # Don't fail if no faces - align=DEEPFACE_ALIGN_FACES, - ) + # Suppress stderr to prevent broken pipe errors from gdown + import sys + import contextlib + from io import StringIO + + # Suppress stdout/stderr during model loading + with contextlib.redirect_stdout(StringIO()), open(os.devnull, 'w') as devnull: + old_stderr = sys.stderr + sys.stderr = devnull + try: + # This will trigger model loading but won't find any faces (which is fine) + # We use enforce_detection=False to avoid errors when no faces are found + DeepFace.represent( + img_path=dummy_array, + model_name=model_name, + detector_backend=detector_backend, + enforce_detection=False, # Don't fail if no faces + align=DEEPFACE_ALIGN_FACES, + ) + finally: + sys.stderr = old_stderr elapsed = time.time() - start_time print(f"[DeepFace] Models loaded in {elapsed:.2f}s") @@ -83,7 +95,10 @@ def _pre_warm_deepface( except Exception as e: # If pre-warming fails, models will just load on first real photo elapsed = time.time() - start_time - print(f"[DeepFace] Pre-warming completed (with warnings) in {elapsed:.2f}s: {e}") + try: + print(f"[DeepFace] Pre-warming completed (with warnings) in {elapsed:.2f}s: {e}") + except (BrokenPipeError, OSError): + pass # Don't raise - let it load on first photo instead @@ -302,26 +317,58 @@ def process_photo_faces( if not DEEPFACE_AVAILABLE: raise RuntimeError("DeepFace not available") - photo_path = photo.path - if not os.path.exists(photo_path): - return 0, 0 + # Suppress stderr globally for this function to prevent broken pipe errors + # This must happen BEFORE any DeepFace/RetinaFace initialization + import sys + from io import StringIO + import contextlib + + # Save original stderr and redirect to devnull for the entire function + # This prevents gdown (used by RetinaFace/DeepFace) from causing broken pipe errors + # Note: os is imported at module level, so it's available here + original_stderr = sys.stderr + devnull_path = os.devnull # Capture os.devnull value to avoid closure issues + devnull_fd = open(devnull_path, 'w') + sys.stderr = devnull_fd + + def _print_with_stderr(*args, **kwargs): + """Helper to temporarily restore stderr for print statements""" + sys.stderr = original_stderr + try: + print(*args, **kwargs) + finally: + sys.stderr = devnull_fd - # Skip videos (videos are not processed for face detection) try: - media_type = getattr(photo, 'media_type', 'image') - if media_type == 'video': + + photo_path = photo.path + if not os.path.exists(photo_path): + sys.stderr = original_stderr + if devnull_fd: + devnull_fd.close() return 0, 0 - except Exception: - pass - - # Skip if already processed (desktop parity) - try: - if getattr(photo, 'processed', False): - return 0, 0 - except Exception: - pass - - try: + + # Skip videos (videos are not processed for face detection) + try: + media_type = getattr(photo, 'media_type', 'image') + if media_type == 'video': + sys.stderr = original_stderr + if devnull_fd: + devnull_fd.close() + return 0, 0 + except Exception: + pass + + # Skip if already processed (desktop parity) + try: + if getattr(photo, 'processed', False): + sys.stderr = original_stderr + if devnull_fd: + devnull_fd.close() + return 0, 0 + except Exception: + pass + # Get EXIF orientation exif_orientation = EXIFOrientationHandler.get_exif_orientation(photo_path) @@ -349,25 +396,40 @@ def process_photo_faces( try: pose_faces = pose_detector.detect_pose_faces(face_detection_path) if pose_faces: - print(f"[FaceService] Pose detection for {photo.filename}: found {len(pose_faces)} faces with pose data") + _print_with_stderr(f"[FaceService] Pose detection for {photo.filename}: found {len(pose_faces)} faces with pose data") except Exception as e: - print(f"[FaceService] ⚠️ Pose detection failed for {photo.filename}: {e}, using defaults") + _print_with_stderr(f"[FaceService] ⚠️ Pose detection failed for {photo.filename}: {e}, using defaults") pose_faces = [] elif RETINAFACE_AVAILABLE: # Fallback: create detector if not provided (backward compatibility) + # Note: stderr is already suppressed above, so PoseDetector initialization won't cause broken pipe try: pose_detector_local = PoseDetector() pose_faces = pose_detector_local.detect_pose_faces(face_detection_path) if pose_faces: - print(f"[FaceService] Pose detection for {photo.filename}: found {len(pose_faces)} faces with pose data") + _print_with_stderr(f"[FaceService] Pose detection for {photo.filename}: found {len(pose_faces)} faces with pose data") except Exception as e: - print(f"[FaceService] ⚠️ Pose detection failed for {photo.filename}: {e}, using defaults") + _print_with_stderr(f"[FaceService] ⚠️ Pose detection failed for {photo.filename}: {e}, using defaults") pose_faces = [] + # Step 2: Use DeepFace for encoding generation + # Note: stderr is already suppressed, so DeepFace won't cause broken pipe errors + _print_with_stderr(f"[DeepFace] Processing {photo.filename} with {detector_backend}/{model_name}...") + + # Check if model weights exist before calling DeepFace + # This prevents automatic download attempts that can cause broken pipe errors + weights_path = os.path.expanduser(f"~/.deepface/weights/arcface_weights.h5") + if model_name == "ArcFace" and not os.path.exists(weights_path): + sys.stderr = original_stderr + raise Exception( + f"ArcFace model weights not found at {weights_path}. " + f"Please download manually: " + f"wget https://github.com/serengil/deepface_models/releases/download/v1.0/arcface_weights.h5 " + f"-O {weights_path}" + ) + + # DeepFace call - stderr is already suppressed try: - # Step 2: Use DeepFace for encoding generation - # Note: First call may take time to download/initialize models - print(f"[DeepFace] Processing {photo.filename} with {detector_backend}/{model_name}...") results = DeepFace.represent( img_path=face_detection_path, model_name=model_name, @@ -375,9 +437,20 @@ def process_photo_faces( enforce_detection=DEEPFACE_ENFORCE_DETECTION, align=DEEPFACE_ALIGN_FACES, ) - print(f"[DeepFace] Completed {photo.filename}") + _print_with_stderr(f"[DeepFace] Completed {photo.filename}") except Exception as e: - print(f"[DeepFace] Error processing {photo.filename}: {e}") + error_msg = str(e) + try: + _print_with_stderr(f"[DeepFace] Error processing {photo.filename}: {error_msg}") + except (BrokenPipeError, OSError): + pass # Ignore broken pipe when printing + + # If it's a model download error, provide helpful message + if "downloading" in error_msg.lower() or "arcface_weights" in error_msg.lower(): + raise Exception( + f"Failed to load DeepFace model '{model_name}'. " + f"Please download the model weights manually. Error: {error_msg}" + ) raise finally: # Clean up temporary file if created @@ -407,7 +480,7 @@ def process_photo_faces( faces_stored = 0 validation_failures = {} # Track failures by reason type - print(f"[FaceService] Processing {faces_detected} faces from DeepFace for {photo.filename} " + _print_with_stderr(f"[FaceService] Processing {faces_detected} faces from DeepFace for {photo.filename} " f"(image size: {image_width}x{image_height})") # Track which pose_faces have been used to prevent duplicate matches @@ -416,8 +489,8 @@ def process_photo_faces( for idx, result in enumerate(results): # Debug: Print full result to see what DeepFace returns if idx == 0: - print(f"[FaceService] Debug - DeepFace result keys: {result.keys()}") - print(f"[FaceService] Debug - Sample result structure: {list(result.keys())}") + _print_with_stderr(f"[FaceService] Debug - DeepFace result keys: {result.keys()}") + _print_with_stderr(f"[FaceService] Debug - Sample result structure: {list(result.keys())}") facial_area = result.get('facial_area', {}) # Try multiple possible confidence fields (matching desktop version) @@ -435,12 +508,12 @@ def process_photo_faces( # Debug first face to see what DeepFace returns if idx == 0: - print(f"[FaceService] Debug - DeepFace result keys: {list(result.keys())}") + _print_with_stderr(f"[FaceService] Debug - DeepFace result keys: {list(result.keys())}") if facial_area: - print(f"[FaceService] Debug - facial_area keys: {list(facial_area.keys())}") - print(f"[FaceService] Debug - facial_area content: {facial_area}") - print(f"[FaceService] Debug - face_confidence value: {face_confidence}") - print(f"[FaceService] Debug - result['face_confidence'] exists: {'face_confidence' in result}") + _print_with_stderr(f"[FaceService] Debug - facial_area keys: {list(facial_area.keys())}") + _print_with_stderr(f"[FaceService] Debug - facial_area content: {facial_area}") + _print_with_stderr(f"[FaceService] Debug - face_confidence value: {face_confidence}") + _print_with_stderr(f"[FaceService] Debug - result['face_confidence'] exists: {'face_confidence' in result}") encoding = np.array(result['embedding']) @@ -463,12 +536,12 @@ def process_photo_faces( # Extract failure type from reason (first word before colon) failure_type = reason.split(':')[0].strip() if ':' in reason else reason validation_failures[failure_type] = validation_failures.get(failure_type, 0) + 1 - print(f"[FaceService] Face {idx+1}/{faces_detected} in {photo.filename} failed validation: " + _print_with_stderr(f"[FaceService] Face {idx+1}/{faces_detected} in {photo.filename} failed validation: " f"{reason} - confidence={face_confidence:.3f}, " f"location={location}, size={location['w']}x{location['h']}") continue - print(f"[FaceService] Face {idx+1}/{faces_detected} in {photo.filename} passed validation: " + _print_with_stderr(f"[FaceService] Face {idx+1}/{faces_detected} in {photo.filename} passed validation: " f"confidence={face_confidence:.3f}, size={location['w']}x{location['h']}") # Calculate quality score (0.0-1.0 range for desktop compatibility) @@ -618,11 +691,11 @@ def process_photo_faces( if face_width is not None: profile_status = "PROFILE" if face_width < 25.0 else "FRONTAL" yaw_str = f"{yaw_angle:.2f}°" if yaw_angle is not None else "None" - print(f"[FaceService] Face {idx+1}/{faces_detected} in {photo.filename}: " + _print_with_stderr(f"[FaceService] Face {idx+1}/{faces_detected} in {photo.filename}: " f"face_width={face_width:.2f}px, pose_mode={pose_mode} ({profile_status}), yaw={yaw_str}") else: yaw_str = f"{yaw_angle:.2f}°" if yaw_angle is not None else "None" - print(f"[FaceService] Face {idx+1}/{faces_detected} in {photo.filename}: " + _print_with_stderr(f"[FaceService] Face {idx+1}/{faces_detected} in {photo.filename}: " f"face_width=None, pose_mode={pose_mode}, yaw={yaw_str}") # Store face in database - match desktop schema exactly @@ -662,14 +735,14 @@ def process_photo_faces( # Log summary if faces_stored < faces_detected: - print(f"[FaceService] Summary for {photo.filename}: " + _print_with_stderr(f"[FaceService] Summary for {photo.filename}: " f"{faces_detected} faces detected, {faces_stored} faces stored, " f"{faces_detected - faces_stored} failed validation") for reason, count in validation_failures.items(): if count > 0: - print(f"[FaceService] - {reason}: {count}") + _print_with_stderr(f"[FaceService] - {reason}: {count}") else: - print(f"[FaceService] Summary for {photo.filename}: " + _print_with_stderr(f"[FaceService] Summary for {photo.filename}: " f"{faces_detected} faces detected, {faces_stored} faces stored") return faces_detected, faces_stored @@ -677,6 +750,14 @@ def process_photo_faces( except Exception as e: db.rollback() raise Exception(f"Error processing faces in {photo.filename}: {str(e)}") + finally: + # Always restore stderr + sys.stderr = original_stderr + if devnull_fd: + try: + devnull_fd.close() + except Exception: + pass def _calculate_iou(box1: Dict, box2: Dict) -> float: @@ -1170,9 +1251,21 @@ def process_unprocessed_photos( raise except Exception as e: # Log error but continue processing other photos - print(f"[FaceService] Error processing photo {photo.filename}: {e}") - import traceback - traceback.print_exc() + try: + print(f"[FaceService] Error processing photo {photo.filename}: {e}") + except (BrokenPipeError, OSError): + pass # Ignore broken pipe errors + + # Try to print traceback, but don't fail if stdout is closed + try: + import traceback + traceback.print_exc() + except (BrokenPipeError, OSError): + # If printing fails, at least log the error type + try: + print(f"[FaceService] Error type: {type(e).__name__}: {str(e)}") + except (BrokenPipeError, OSError): + pass if update_progress: try: update_progress( diff --git a/backend/services/tasks.py b/backend/services/tasks.py index 961b1c4..1064ba3 100644 --- a/backend/services/tasks.py +++ b/backend/services/tasks.py @@ -235,8 +235,20 @@ def process_faces_task( except Exception as e: # Log error and update job metadata error_msg = f"Task failed: {str(e)}" - print(f"[Task] ❌ {error_msg}") - traceback.print_exc() + try: + print(f"[Task] ❌ {error_msg}") + except (BrokenPipeError, OSError): + pass # Ignore broken pipe errors when printing + + # Try to print traceback, but don't fail if stdout is closed + try: + traceback.print_exc() + except (BrokenPipeError, OSError): + # If printing fails, at least log the error type + try: + print(f"[Task] Error type: {type(e).__name__}: {str(e)}") + except (BrokenPipeError, OSError): + pass if job: try: diff --git a/install.sh b/install.sh index caae714..47e573d 100755 --- a/install.sh +++ b/install.sh @@ -209,6 +209,91 @@ install_python_dependencies() { echo -e "${GREEN} ✅ Python dependencies installed${NC}" } +# Download DeepFace model weights (optional, but recommended) +download_deepface_models() { + echo -e "${BLUE}🤖 Downloading DeepFace model weights (optional)...${NC}" + + source venv/bin/activate + + # Check if deepface is installed + if ! python3 -c "import deepface" 2>/dev/null; then + echo -e "${YELLOW} ⚠️ DeepFace not installed, skipping model download${NC}" + return 0 + fi + + # Create weights directory + mkdir -p ~/.deepface/weights + + # Check if ArcFace weights already exist + if [ -f ~/.deepface/weights/arcface_weights.h5 ]; then + echo -e "${GREEN} ✅ ArcFace model weights already exist${NC}" + return 0 + fi + + echo -e "${BLUE} Downloading ArcFace model weights (~131MB, this may take a few minutes)...${NC}" + + # Try to download using wget (more reliable than Python download in worker) + if command_exists wget; then + if wget -q --show-progress \ + https://github.com/serengil/deepface_models/releases/download/v1.0/arcface_weights.h5 \ + -O ~/.deepface/weights/arcface_weights.h5; then + echo -e "${GREEN} ✅ ArcFace model weights downloaded successfully${NC}" + return 0 + fi + fi + + # Fallback: try with Python (may fail in some environments) + echo -e "${YELLOW} ⚠️ wget failed, trying Python download...${NC}" + if python3 << 'PYTHON' +import os +import sys +from pathlib import Path + +try: + from deepface import DeepFace + import numpy as np + from PIL import Image + + # Create a dummy image to trigger model download + dummy_img = Image.new('RGB', (100, 100), color='black') + dummy_array = np.array(dummy_img) + + # This will trigger model download + try: + DeepFace.represent( + img_path=dummy_array, + model_name="ArcFace", + detector_backend="retinaface", + enforce_detection=False, + align=True, + ) + print("✅ Model weights downloaded successfully") + sys.exit(0) + except Exception as e: + if "arcface_weights" in str(e).lower(): + print(f"⚠️ Download failed: {e}") + print(" You can download manually later or the worker will try again") + sys.exit(1) + # Other errors are OK (like no faces found) + print("✅ Model weights are available") + sys.exit(0) +except ImportError: + print("⚠️ DeepFace not available") + sys.exit(0) +except Exception as e: + print(f"⚠️ Error: {e}") + sys.exit(1) +PYTHON + then + echo -e "${GREEN} ✅ DeepFace models ready${NC}" + else + echo -e "${YELLOW} ⚠️ Automatic download failed. Models will be downloaded on first use.${NC}" + echo -e "${YELLOW} ⚠️ To download manually, run:${NC}" + echo -e "${BLUE} mkdir -p ~/.deepface/weights${NC}" + echo -e "${BLUE} wget https://github.com/serengil/deepface_models/releases/download/v1.0/arcface_weights.h5 -O ~/.deepface/weights/arcface_weights.h5${NC}" + fi +} + # Install frontend dependencies install_frontend_dependencies() { echo -e "${BLUE}📦 Installing frontend dependencies...${NC}" @@ -352,6 +437,10 @@ main() { install_python_dependencies echo "" + # Download DeepFace model weights (optional) + download_deepface_models + echo "" + # Install frontend dependencies install_frontend_dependencies echo ""