From 7a981b069a307b3924bf75dca4e87dc9b50b0110 Mon Sep 17 00:00:00 2001 From: tanyar09 Date: Wed, 4 Feb 2026 19:30:05 +0000 Subject: [PATCH] feat: Enhance logging and error handling for job streaming and photo uploads - Added new logging scripts for quick access to service logs and troubleshooting. - Updated job streaming API to support authentication via query parameters for EventSource. - Improved photo upload process to capture and validate EXIF dates and original modification times. - Enhanced error handling for file uploads and EXIF extraction failures. - Introduced new configuration options in ecosystem.config.js to prevent infinite crash loops. --- QUICK_LOG_REFERENCE.md | 115 ++++++++++++++++++ admin-frontend/package-lock.json | 7 ++ admin-frontend/package.json | 1 + admin-frontend/src/api/jobs.ts | 5 +- admin-frontend/src/api/photos.ts | 58 ++++++++- admin-frontend/src/components/Layout.tsx | 80 ++++++++++--- admin-frontend/src/pages/Dashboard.tsx | 30 ----- admin-frontend/src/pages/Scan.tsx | 68 +++++++++-- backend/api/auth.py | 22 ++++ backend/api/jobs.py | 26 ++++- backend/api/photos.py | 143 ++++++++++++++++++++++- backend/app.py | 24 +++- backend/services/photo_service.py | 99 +++++++++++++--- docs/DEPLOY_FROM_SCRATCH.md | 15 ++- ecosystem.config.js.example | 10 ++ scripts/README.md | 34 ++++++ 16 files changed, 648 insertions(+), 89 deletions(-) create mode 100644 QUICK_LOG_REFERENCE.md diff --git a/QUICK_LOG_REFERENCE.md b/QUICK_LOG_REFERENCE.md new file mode 100644 index 0000000..f49368e --- /dev/null +++ b/QUICK_LOG_REFERENCE.md @@ -0,0 +1,115 @@ +# Quick Log Reference + +When something fails, use these commands to quickly check logs. + +## 🚀 Quick Commands + +### Check All Services for Errors +```bash +./scripts/check-logs.sh +``` +Shows PM2 status and recent errors from all services. + +### Follow Errors in Real-Time +```bash +./scripts/tail-errors.sh +``` +Watches all error logs live (press Ctrl+C to exit). + +### View Recent Errors (Last 10 minutes) +```bash +./scripts/view-recent-errors.sh +``` + +### View Errors from Last 30 minutes +```bash +./scripts/view-recent-errors.sh 30 +``` + +## 📋 PM2 Commands + +```bash +# View all logs +pm2 logs + +# View specific service logs +pm2 logs punimtag-api +pm2 logs punimtag-worker +pm2 logs punimtag-admin +pm2 logs punimtag-viewer + +# View only errors +pm2 logs --err + +# Monitor services +pm2 monit + +# Check service status +pm2 status +pm2 list +``` + +## 📁 Log File Locations + +All logs are in: `/home/appuser/.pm2/logs/` + +- **API**: `punimtag-api-error.log`, `punimtag-api-out.log` +- **Worker**: `punimtag-worker-error.log`, `punimtag-worker-out.log` +- **Admin**: `punimtag-admin-error.log`, `punimtag-admin-out.log` +- **Viewer**: `punimtag-viewer-error.log`, `punimtag-viewer-out.log` + +## 🔧 Direct Log Access + +```bash +# View last 50 lines of API errors +tail -n 50 /home/appuser/.pm2/logs/punimtag-api-error.log + +# Follow worker errors +tail -f /home/appuser/.pm2/logs/punimtag-worker-error.log + +# Search for specific errors +grep -i "error\|exception\|traceback" /home/appuser/.pm2/logs/punimtag-*-error.log +``` + +## 🔄 Log Rotation Setup + +Run once to prevent log bloat: +```bash +./scripts/setup-log-rotation.sh +``` + +This configures: +- Max log size: 50MB (auto-rotates) +- Retain: 7 rotated files +- Compress: Yes +- Daily rotation: Yes (midnight) + +## 💡 Troubleshooting Tips + +1. **Service keeps crashing?** + ```bash + ./scripts/check-logs.sh + pm2 logs punimtag-worker --err --lines 100 + ``` + +2. **API not responding?** + ```bash + pm2 logs punimtag-api --err + pm2 status + ``` + +3. **Large log files?** + ```bash + # Check log sizes + du -h /home/appuser/.pm2/logs/* + + # Setup rotation if not done + ./scripts/setup-log-rotation.sh + ``` + +4. **Need to clear old logs?** + ```bash + # PM2 can manage this with rotation, but if needed: + pm2 flush # Clear all logs (be careful!) + ``` + diff --git a/admin-frontend/package-lock.json b/admin-frontend/package-lock.json index d05aa7a..d8c6ab0 100644 --- a/admin-frontend/package-lock.json +++ b/admin-frontend/package-lock.json @@ -10,6 +10,7 @@ "dependencies": { "@tanstack/react-query": "^5.8.4", "axios": "^1.6.2", + "exifr": "^7.1.3", "react": "^18.2.0", "react-dom": "^18.2.0", "react-router-dom": "^6.20.0" @@ -3048,6 +3049,12 @@ "node": ">=0.10.0" } }, + "node_modules/exifr": { + "version": "7.1.3", + "resolved": "https://registry.npmjs.org/exifr/-/exifr-7.1.3.tgz", + "integrity": "sha512-g/aje2noHivrRSLbAUtBPWFbxKdKhgj/xr1vATDdUXPOFYJlQ62Ft0oy+72V6XLIpDJfHs6gXLbBLAolqOXYRw==", + "license": "MIT" + }, "node_modules/fast-deep-equal": { "version": "3.1.3", "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", diff --git a/admin-frontend/package.json b/admin-frontend/package.json index 576514c..867b5dd 100644 --- a/admin-frontend/package.json +++ b/admin-frontend/package.json @@ -12,6 +12,7 @@ "dependencies": { "@tanstack/react-query": "^5.8.4", "axios": "^1.6.2", + "exifr": "^7.1.3", "react": "^18.2.0", "react-dom": "^18.2.0", "react-router-dom": "^6.20.0" diff --git a/admin-frontend/src/api/jobs.ts b/admin-frontend/src/api/jobs.ts index 51bb8c5..9d351d2 100644 --- a/admin-frontend/src/api/jobs.ts +++ b/admin-frontend/src/api/jobs.ts @@ -28,11 +28,14 @@ export const jobsApi = { streamJobProgress: (jobId: string): EventSource => { // EventSource needs absolute URL - use VITE_API_URL or construct from current origin + // EventSource cannot send custom headers, so we pass token as query parameter const envApiUrl = import.meta.env.VITE_API_URL const baseURL = envApiUrl && envApiUrl.trim() !== '' ? envApiUrl : window.location.origin // Use current origin when empty - works with proxy and HTTPS - return new EventSource(`${baseURL}/api/v1/jobs/stream/${jobId}`) + const token = localStorage.getItem('access_token') + const tokenParam = token ? `?token=${encodeURIComponent(token)}` : '' + return new EventSource(`${baseURL}/api/v1/jobs/stream/${jobId}${tokenParam}`) }, cancelJob: async (jobId: string): Promise<{ message: string; status: string }> => { diff --git a/admin-frontend/src/api/photos.ts b/admin-frontend/src/api/photos.ts index 2a16423..1589da3 100644 --- a/admin-frontend/src/api/photos.ts +++ b/admin-frontend/src/api/photos.ts @@ -50,9 +50,60 @@ export const photosApi = { uploadPhotos: async (files: File[]): Promise => { const formData = new FormData() - files.forEach((file) => { + + // Extract EXIF date AND original file modification date from each file BEFORE upload + // This preserves the original photo date even if EXIF gets corrupted during upload + // We capture both so we can use modification date as fallback if EXIF is invalid + const exifr = await import('exifr') + + // First, append all files and capture modification dates (synchronous operations) + for (const file of files) { formData.append('files', file) + + // ALWAYS capture the original file's modification date before upload + // This is the modification date from the user's system, not the server + if (file.lastModified) { + formData.append(`file_original_mtime_${file.name}`, file.lastModified.toString()) + } + } + + // Extract EXIF data in parallel for all files (performance optimization) + const exifPromises = files.map(async (file) => { + try { + const exif = await exifr.parse(file, { + pick: ['DateTimeOriginal', 'DateTimeDigitized', 'DateTime'], + translateKeys: false, + translateValues: false, + }) + + return { + filename: file.name, + exif, + } + } catch (err) { + // EXIF extraction failed, but we still have file.lastModified captured above + console.debug(`EXIF extraction failed for ${file.name}, will use modification date:`, err) + return { + filename: file.name, + exif: null, + } + } }) + + // Wait for all EXIF extractions to complete in parallel + const exifResults = await Promise.all(exifPromises) + + // Add EXIF dates to form data + for (const result of exifResults) { + if (result.exif?.DateTimeOriginal) { + // Send the EXIF date as metadata + formData.append(`file_exif_date_${result.filename}`, result.exif.DateTimeOriginal) + } else if (result.exif?.DateTime) { + formData.append(`file_exif_date_${result.filename}`, result.exif.DateTime) + } else if (result.exif?.DateTimeDigitized) { + formData.append(`file_exif_date_${result.filename}`, result.exif.DateTimeDigitized) + } + } // The interceptor will automatically remove Content-Type for FormData // Axios will set multipart/form-data with boundary automatically @@ -72,11 +123,14 @@ export const photosApi = { streamJobProgress: (jobId: string): EventSource => { // EventSource needs absolute URL - use VITE_API_URL or construct from current origin + // EventSource cannot send custom headers, so we pass token as query parameter const envApiUrl = import.meta.env.VITE_API_URL const baseURL = envApiUrl && envApiUrl.trim() !== '' ? envApiUrl : window.location.origin // Use current origin when empty - works with proxy and HTTPS - return new EventSource(`${baseURL}/api/v1/jobs/stream/${jobId}`) + const token = localStorage.getItem('access_token') + const tokenParam = token ? `?token=${encodeURIComponent(token)}` : '' + return new EventSource(`${baseURL}/api/v1/jobs/stream/${jobId}${tokenParam}`) }, searchPhotos: async (params: { diff --git a/admin-frontend/src/components/Layout.tsx b/admin-frontend/src/components/Layout.tsx index 80259c6..2eb2781 100644 --- a/admin-frontend/src/components/Layout.tsx +++ b/admin-frontend/src/components/Layout.tsx @@ -5,6 +5,12 @@ import { useInactivityTimeout } from '../hooks/useInactivityTimeout' const INACTIVITY_TIMEOUT_MS = 30 * 60 * 1000 +// Check if running on iOS +const isIOS = (): boolean => { + return /iPad|iPhone|iPod/.test(navigator.userAgent) || + (navigator.platform === 'MacIntel' && navigator.maxTouchPoints > 1) +} + type NavItem = { path: string label: string @@ -16,6 +22,8 @@ export default function Layout() { const location = useLocation() const { username, logout, isAuthenticated, hasPermission } = useAuth() const [maintenanceExpanded, setMaintenanceExpanded] = useState(true) + const [sidebarOpen, setSidebarOpen] = useState(false) + const isIOSDevice = isIOS() const handleInactivityLogout = useCallback(() => { logout() @@ -60,6 +68,12 @@ export default function Layout() { { + // Close sidebar on iOS when navigating + if (isIOSDevice) { + setSidebarOpen(false) + } + }} className={`flex items-center gap-3 px-3 py-2 rounded-lg text-sm font-medium transition-colors ${ isActive ? 'bg-blue-50 text-blue-700' : 'text-gray-700 hover:bg-gray-50' } ${extraClasses}`} @@ -103,24 +117,40 @@ export default function Layout() {
{/* Left sidebar - fixed position with logo */} -
- - PunimTag { - // Fallback if logo.png doesn't exist, try logo.svg - const target = e.target as HTMLImageElement - if (target.src.endsWith('logo.png')) { - target.src = '/logo.svg' - } - }} - /> - +
+ {isIOSDevice ? ( + + ) : ( + + PunimTag { + // Fallback if logo.png doesn't exist, try logo.svg + const target = e.target as HTMLImageElement + if (target.src.endsWith('logo.png')) { + target.src = '/logo.svg' + } + }} + /> + + )}
{/* Header content - aligned with main content */} -
+

{getPageTitle()}

@@ -140,8 +170,22 @@ export default function Layout() {
+ {/* Overlay for mobile when sidebar is open */} + {isIOSDevice && sidebarOpen && ( +
setSidebarOpen(false)} + /> + )} + {/* Left sidebar - fixed position */} -
+
{/* Main content - with left margin to account for fixed sidebar */} -
+
diff --git a/admin-frontend/src/pages/Dashboard.tsx b/admin-frontend/src/pages/Dashboard.tsx index 02a5144..95e99ad 100644 --- a/admin-frontend/src/pages/Dashboard.tsx +++ b/admin-frontend/src/pages/Dashboard.tsx @@ -261,36 +261,6 @@ export default function Dashboard() { )}
- - {/* CTA Section */} -
-
-

- Ready to Get Started? -

-

- Begin organizing your photo collection today. Use the navigation menu - to explore all the powerful features PunimTag has to offer. -

-
-
- đŸ—‚ī¸ Scan Photos -
-
- âš™ī¸ Process Faces -
-
- 👤 Identify People -
-
- 🤖 Auto-Match -
-
- 🔍 Search Photos -
-
-
-
) } diff --git a/admin-frontend/src/pages/Scan.tsx b/admin-frontend/src/pages/Scan.tsx index 7f5dd37..77e9ad6 100644 --- a/admin-frontend/src/pages/Scan.tsx +++ b/admin-frontend/src/pages/Scan.tsx @@ -31,6 +31,12 @@ const isWebkitDirectorySupported = (): boolean => { return 'webkitdirectory' in input } +// Check if running on iOS +const isIOS = (): boolean => { + return /iPad|iPhone|iPod/.test(navigator.userAgent) || + (navigator.platform === 'MacIntel' && navigator.maxTouchPoints > 1) +} + // Recursively read all files from a directory handle async function readDirectoryRecursive( dirHandle: FileSystemDirectoryHandle, @@ -126,8 +132,14 @@ export default function Scan() { if (fileArray.length > 0) { const firstFile = fileArray[0] // Extract folder path from file path (webkitdirectory includes full path) - const folderPath = firstFile.webkitRelativePath.split('/').slice(0, -1).join('/') - setFolderPath(folderPath || 'Selected folder') + // On iOS, webkitRelativePath may not be available, so use a generic label + if (firstFile.webkitRelativePath) { + const folderPath = firstFile.webkitRelativePath.split('/').slice(0, -1).join('/') + setFolderPath(folderPath || 'Selected folder') + } else { + // iOS Photos selection - no folder path available + setFolderPath(`Selected ${fileArray.length} file${fileArray.length > 1 ? 's' : ''} from Photos`) + } } // Store files for later upload @@ -151,8 +163,8 @@ export default function Scan() { // Upload files to backend in batches to show progress setLocalUploadProgress({ current: 0, total: selectedFiles.length, filename: '' }) - // Upload files in batches to show progress - const batchSize = 10 + // Upload files in batches to show progress (increased from 10 to 25 for better performance) + const batchSize = 25 let uploaded = 0 let totalAdded = 0 let totalExisting = 0 @@ -212,19 +224,24 @@ export default function Scan() { if (files.length === 0) { setError('No supported image or video files found in the selected folder.') setSelectedFiles([]) + setIsImporting(false) return } - // Store files for later upload + // For File System Access API, files are File objects with lastModified + // Store files with their metadata for later upload setSelectedFiles(files) + setIsImporting(false) } catch (err: any) { if (err.name === 'AbortError') { // User cancelled the folder picker setError(null) setSelectedFiles([]) + setIsImporting(false) } else { setError(err.message || 'Failed to select folder') setSelectedFiles([]) + setIsImporting(false) } } } else if (isWebkitDirectorySupported()) { @@ -321,9 +338,22 @@ export default function Scan() { eventSource.onerror = (err) => { console.error('SSE error:', err) + // Check if connection failed (readyState 0 = CONNECTING, 2 = CLOSED) + if (eventSource.readyState === EventSource.CLOSED) { + setError('Connection to server lost. The job may still be running. Please refresh the page to check status.') + setIsImporting(false) + } else if (eventSource.readyState === EventSource.CONNECTING) { + // Still connecting, don't show error yet + console.log('SSE still connecting...') + } eventSource.close() eventSourceRef.current = null } + + // Handle connection open + eventSource.onopen = () => { + console.log('SSE connection opened for job:', jobId) + } } const fetchJobResult = async (jobId: string) => { @@ -426,17 +456,25 @@ export default function Scan() { handleLocalFolderSelect(e.target.files)} /> @@ -467,8 +505,16 @@ export default function Scan() {

{scanMode === 'local' ? ( <> - Click "Select Folder" to choose a folder from your local computer. The browser will read the files and upload them to the server. - {!isFileSystemAccessSupported() && !isWebkitDirectorySupported() && ( + {isIOS() ? ( + <> + Click "Select Folder" to choose photos and videos from your Photos app. You can select multiple files at once. + + ) : ( + <> + Click "Select Folder" to choose a folder from your local computer. The browser will read the files and upload them to the server. + + )} + {!isFileSystemAccessSupported() && !isWebkitDirectorySupported() && !isIOS() && ( âš ī¸ Folder selection is not supported in your browser. Please use Chrome, Edge, Safari, or Firefox. diff --git a/backend/api/auth.py b/backend/api/auth.py index efad6ad..fe4cdf8 100644 --- a/backend/api/auth.py +++ b/backend/api/auth.py @@ -100,6 +100,28 @@ def create_refresh_token(data: dict) -> str: return jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM) +def get_current_user_from_token(token: str) -> dict: + """Get current user from JWT token string (for query parameter auth). + + Used for endpoints that need authentication but can't use headers + (e.g., EventSource/SSE endpoints). + """ + try: + payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM]) + username: str = payload.get("sub") + if username is None: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid authentication credentials", + ) + return {"username": username} + except JWTError: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid authentication credentials", + ) + + def get_current_user( credentials: Annotated[HTTPAuthorizationCredentials, Depends(get_bearer_token)] ) -> dict: diff --git a/backend/api/jobs.py b/backend/api/jobs.py index 1ee0b2a..bf2351b 100644 --- a/backend/api/jobs.py +++ b/backend/api/jobs.py @@ -4,15 +4,17 @@ from __future__ import annotations from datetime import datetime -from fastapi import APIRouter, HTTPException, status +from fastapi import APIRouter, HTTPException, Query, status from fastapi.responses import StreamingResponse from rq import Queue from rq.job import Job from redis import Redis import json import time +from typing import Optional from backend.schemas.jobs import JobResponse, JobStatus +from backend.api.auth import get_current_user_from_token router = APIRouter(prefix="/jobs", tags=["jobs"]) @@ -89,8 +91,26 @@ def get_job(job_id: str) -> JobResponse: @router.get("/stream/{job_id}") -def stream_job_progress(job_id: str): - """Stream job progress via Server-Sent Events (SSE).""" +def stream_job_progress( + job_id: str, + token: Optional[str] = Query(None, description="JWT token for authentication"), +): + """Stream job progress via Server-Sent Events (SSE). + + Note: EventSource cannot send custom headers, so authentication + is done via query parameter 'token'. + """ + # Authenticate user via token query parameter (required for EventSource) + if not token: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Authentication required. Provide 'token' query parameter.", + ) + + try: + get_current_user_from_token(token) + except HTTPException as e: + raise e def event_generator(): """Generate SSE events for job progress.""" diff --git a/backend/api/photos.py b/backend/api/photos.py index 578c77e..9a3cfb3 100644 --- a/backend/api/photos.py +++ b/backend/api/photos.py @@ -5,7 +5,7 @@ from __future__ import annotations from datetime import date, datetime from typing import List, Optional -from fastapi import APIRouter, Depends, File, HTTPException, Query, Request, UploadFile, status +from fastapi import APIRouter, Depends, File, HTTPException, Query, Request, UploadFile, status, Request from fastapi.responses import JSONResponse, FileResponse, Response from typing import Annotated from rq import Queue @@ -339,6 +339,7 @@ def search_photos( @router.post("/import", response_model=PhotoImportResponse) def import_photos( request: PhotoImportRequest, + current_user: Annotated[dict, Depends(get_current_user)], ) -> PhotoImportResponse: """Import photos from a folder path. @@ -381,7 +382,7 @@ def import_photos( @router.post("/import/upload") async def upload_photos( - files: list[UploadFile] = File(...), + request: Request, db: Session = Depends(get_db), ) -> dict: """Upload photo files directly. @@ -393,6 +394,7 @@ async def upload_photos( import os import shutil from pathlib import Path + from datetime import datetime, date from backend.settings import PHOTO_STORAGE_DIR @@ -404,6 +406,49 @@ async def upload_photos( existing_count = 0 errors = [] + # Read form data first to get both files and metadata + form_data = await request.form() + + import logging + logger = logging.getLogger(__name__) + + # Extract file metadata (EXIF dates and original modification timestamps) from form data + # These are captured from the ORIGINAL file BEFORE upload, so they preserve the real dates + file_original_mtime = {} + file_exif_dates = {} + files = [] + + # Extract files first using getlist (handles multiple files with same key) + files = form_data.getlist('files') + + # Extract metadata from form data + for key, value in form_data.items(): + if key.startswith('file_exif_date_'): + # Extract EXIF date from browser (format: file_exif_date_) + filename = key.replace('file_exif_date_', '') + file_exif_dates[filename] = str(value) + elif key.startswith('file_original_mtime_'): + # Extract original file modification time from browser (format: file_original_mtime_) + # This is the modification date from the ORIGINAL file before upload + filename = key.replace('file_original_mtime_', '') + try: + file_original_mtime[filename] = int(value) + except (ValueError, TypeError) as e: + logger.debug(f"Could not parse original mtime for {filename}: {e}") + + # If no files found in form_data, try to get them from request directly + if not files: + # Fallback: try to get files from request.files() if available + try: + if hasattr(request, '_form'): + form = await request.form() + files = form.getlist('files') + except: + pass + + if not files: + raise HTTPException(status_code=400, detail="No files provided") + for file in files: try: # Generate unique filename to avoid conflicts @@ -418,8 +463,63 @@ async def upload_photos( with open(stored_path, "wb") as f: f.write(content) + # Extract date metadata from browser BEFORE upload + # Priority: 1) Browser EXIF date, 2) Original file modification date (from before upload) + # This ensures we use the ORIGINAL file's metadata, not the server's copy + browser_exif_date = None + file_last_modified = None + + # First try: Use EXIF date extracted in browser (from original file) + if file.filename in file_exif_dates: + exif_date_str = file_exif_dates[file.filename] + logger.info(f"[UPLOAD] Found browser EXIF date for {file.filename}: {exif_date_str}") + try: + # Parse EXIF date string (format: "YYYY:MM:DD HH:MM:SS" or ISO format) + from dateutil import parser + exif_datetime = parser.parse(exif_date_str) + browser_exif_date = exif_datetime.date() + # Validate the date + if browser_exif_date > date.today() or browser_exif_date < date(1900, 1, 1): + logger.warning(f"[UPLOAD] Browser EXIF date {browser_exif_date} is invalid for {file.filename}, trying original mtime") + browser_exif_date = None + else: + logger.info(f"[UPLOAD] Parsed browser EXIF date: {browser_exif_date} for {file.filename}") + except Exception as e: + logger.warning(f"[UPLOAD] Could not parse browser EXIF date '{exif_date_str}' for {file.filename}: {e}, trying original mtime") + browser_exif_date = None + else: + logger.debug(f"[UPLOAD] No browser EXIF date found for {file.filename}") + + # Second try: Use original file modification time (captured BEFORE upload) + if file.filename in file_original_mtime: + timestamp_ms = file_original_mtime[file.filename] + logger.info(f"[UPLOAD] Found original mtime for {file.filename}: {timestamp_ms}") + try: + file_last_modified = datetime.fromtimestamp(timestamp_ms / 1000.0).date() + # Validate the date + if file_last_modified > date.today() or file_last_modified < date(1900, 1, 1): + logger.warning(f"[UPLOAD] Original file mtime {file_last_modified} is invalid for {file.filename}") + file_last_modified = None + else: + logger.info(f"[UPLOAD] Parsed original mtime: {file_last_modified} for {file.filename}") + except (ValueError, OSError) as e: + logger.warning(f"[UPLOAD] Could not parse original mtime timestamp {timestamp_ms} for {file.filename}: {e}") + file_last_modified = None + else: + logger.debug(f"[UPLOAD] No original mtime found for {file.filename}") + + logger.info(f"[UPLOAD] Calling import_photo_from_path for {file.filename} with browser_exif_date={browser_exif_date}, file_last_modified={file_last_modified}") # Import photo from stored location - photo, is_new = import_photo_from_path(db, str(stored_path)) + # Pass browser-extracted EXIF date and file modification time separately + # Priority: browser_exif_date > server EXIF extraction > file_last_modified + photo, is_new = import_photo_from_path( + db, + str(stored_path), + is_uploaded_file=True, + file_last_modified=file_last_modified, + browser_exif_date=browser_exif_date + ) + if is_new: added_count += 1 else: @@ -982,8 +1082,18 @@ def bulk_delete_photos( current_admin: Annotated[dict, Depends(get_current_admin_user)], db: Session = Depends(get_db), ) -> BulkDeletePhotosResponse: - """Delete multiple photos and all related data (faces, encodings, tags, favorites).""" + """Delete multiple photos and all related data (faces, encodings, tags, favorites). + + If a photo's file is in the uploads folder, it will also be deleted from the filesystem + to prevent duplicate uploads. + """ + import os + import logging + from pathlib import Path from backend.db.models import Photo, PhotoTagLinkage + from backend.settings import PHOTO_STORAGE_DIR + + logger = logging.getLogger(__name__) photo_ids = list(dict.fromkeys(request.photo_ids)) if not photo_ids: @@ -992,13 +1102,36 @@ def bulk_delete_photos( detail="photo_ids list cannot be empty", ) + # Get the uploads folder path for comparison + uploads_dir = Path(PHOTO_STORAGE_DIR).resolve() + try: photos = db.query(Photo).filter(Photo.id.in_(photo_ids)).all() found_ids = {photo.id for photo in photos} missing_ids = sorted(set(photo_ids) - found_ids) deleted_count = 0 + files_deleted_count = 0 for photo in photos: + # Only delete file from filesystem if it's directly in the uploads folder + # Do NOT delete files from other folders (main photo storage, etc.) + photo_path = Path(photo.path).resolve() + # Strict check: only delete if parent directory is exactly the uploads folder + if photo_path.parent == uploads_dir: + try: + if photo_path.exists(): + os.remove(photo_path) + files_deleted_count += 1 + logger.warning(f"DELETED file from uploads folder: {photo_path} (Photo ID: {photo.id})") + else: + logger.warning(f"Photo file not found (already deleted?): {photo_path} (Photo ID: {photo.id})") + except OSError as e: + logger.error(f"Failed to delete file {photo_path} (Photo ID: {photo.id}): {e}") + # Continue with database deletion even if file deletion fails + else: + # File is not in uploads folder - do not delete from filesystem + logger.info(f"Photo {photo.id} is not in uploads folder (path: {photo_path.parent}, uploads: {uploads_dir}), skipping file deletion") + # Remove tag linkages explicitly (in addition to cascade) to keep counts accurate db.query(PhotoTagLinkage).filter( PhotoTagLinkage.photo_id == photo.id @@ -1019,6 +1152,8 @@ def bulk_delete_photos( admin_username = current_admin.get("username", "unknown") message_parts = [f"Deleted {deleted_count} photo(s)"] + if files_deleted_count > 0: + message_parts.append(f"{files_deleted_count} file(s) removed from uploads folder") if missing_ids: message_parts.append(f"{len(missing_ids)} photo(s) not found") message_parts.append(f"Request by admin: {admin_username}") diff --git a/backend/app.py b/backend/app.py index 8642817..1d340ab 100644 --- a/backend/app.py +++ b/backend/app.py @@ -56,9 +56,18 @@ def start_worker() -> None: project_root = Path(__file__).parent.parent # Use explicit Python path to avoid Cursor interception - # Check if sys.executable is Cursor, if so use /usr/bin/python3 + # Prefer virtual environment Python if available, otherwise use system Python python_executable = sys.executable - if "cursor" in python_executable.lower() or not python_executable.startswith("/usr"): + # If running in Cursor or not in venv, try to find venv Python + if "cursor" in python_executable.lower(): + # Try to use venv Python from project root + venv_python = project_root / "venv" / "bin" / "python3" + if venv_python.exists(): + python_executable = str(venv_python) + else: + python_executable = "/usr/bin/python3" + # Ensure we're using a valid Python executable + if not Path(python_executable).exists(): python_executable = "/usr/bin/python3" # Ensure PYTHONPATH is set correctly and pass DATABASE_URL_AUTH explicitly @@ -678,8 +687,15 @@ async def lifespan(app: FastAPI): try: ensure_auth_user_is_active_column() # Import and call worker's setup function to create all auth tables - from backend.worker import setup_auth_database_tables - setup_auth_database_tables() + # Note: This import may fail if dotenv is not installed in API environment + # (worker.py imports dotenv at top level, but API doesn't need it) + try: + from backend.worker import setup_auth_database_tables + setup_auth_database_tables() + except ImportError as import_err: + # dotenv not available in API environment - that's okay, worker will handle setup + print(f"â„šī¸ Could not import worker setup function: {import_err}") + print(" Worker process will handle auth database setup") except Exception as auth_exc: # Auth database might not exist yet - that's okay, frontend will handle it print(f"â„šī¸ Auth database not available: {auth_exc}") diff --git a/backend/services/photo_service.py b/backend/services/photo_service.py index 6639563..124b7c3 100644 --- a/backend/services/photo_service.py +++ b/backend/services/photo_service.py @@ -86,8 +86,8 @@ def extract_exif_date(image_path: str) -> Optional[date]: for tag_name in date_tag_names: if tag_name in tags: - date_str = str(tags[tag_name]) - if date_str: + date_str = str(tags[tag_name]).strip() + if date_str and date_str != "0000:00:00 00:00:00" and not date_str.startswith("0000:"): try: # exifread returns dates in format "YYYY:MM:DD HH:MM:SS" dt = datetime.strptime(date_str, "%Y:%m:%d %H:%M:%S") @@ -105,10 +105,21 @@ def extract_exif_date(image_path: str) -> Optional[date]: return extracted_date except ValueError: continue + elif date_str: + logger.debug(f"Skipping invalid date string '{date_str}' from {tag_name} in {image_path}") except ImportError: logger.debug("exifread library not available, falling back to PIL") except Exception as e: - logger.debug(f"exifread failed for {image_path}: {e}, trying PIL") + logger.warning(f"exifread failed for {image_path}: {e}, trying PIL", exc_info=True) + # Log what tags exifread could see (if any) + try: + import exifread + with open(image_path, 'rb') as test_f: + test_tags = exifread.process_file(test_f, details=False) + if test_tags: + logger.warning(f"exifread found {len(test_tags)} tags but couldn't parse dates. Sample tags: {list(test_tags.keys())[:5]}") + except Exception: + pass # Fallback to PIL methods try: @@ -137,6 +148,17 @@ def extract_exif_date(image_path: str) -> Optional[date]: if not exifdata: logger.warning(f"No EXIF data found in {image_path} - will fall back to file modification time") + # Try to open the file with exifread to see if it has EXIF at all + try: + import exifread + with open(image_path, 'rb') as test_f: + test_tags = exifread.process_file(test_f, details=False) + if test_tags: + logger.warning(f"File {image_path} has EXIF tags via exifread but PIL couldn't read them: {list(test_tags.keys())[:10]}") + else: + logger.warning(f"File {image_path} has no EXIF data at all") + except Exception: + pass return None # Debug: Log all available EXIF tags (only in debug mode to avoid spam) @@ -341,6 +363,18 @@ def extract_exif_date(image_path: str) -> Optional[date]: import logging logger = logging.getLogger(__name__) logger.warning(f"Failed to extract EXIF date from {image_path}: {e}", exc_info=True) + # Try a diagnostic check with exifread to see what's available + try: + import exifread + with open(image_path, 'rb') as diag_f: + diag_tags = exifread.process_file(diag_f, details=False) + if diag_tags: + date_tags_found = [k for k in diag_tags.keys() if 'date' in k.lower() or 'time' in k.lower()] + logger.warning(f"Diagnostic: File {image_path} has {len(diag_tags)} EXIF tags. Date-related tags: {date_tags_found[:10]}") + else: + logger.warning(f"Diagnostic: File {image_path} has no EXIF tags at all") + except Exception as diag_e: + logger.debug(f"Diagnostic check failed: {diag_e}") return None @@ -450,7 +484,7 @@ def extract_video_date(video_path: str) -> Optional[date]: return None -def extract_photo_date(image_path: str) -> Optional[date]: +def extract_photo_date(image_path: str, is_uploaded_file: bool = False) -> Optional[date]: """Extract date taken from photo with fallback to file modification time, then creation time. Tries in order: @@ -458,6 +492,11 @@ def extract_photo_date(image_path: str) -> Optional[date]: 2. File modification time (as fallback if EXIF fails) 3. File creation time (as final fallback if modification time doesn't exist) + Args: + image_path: Path to the image file + is_uploaded_file: If True, be more lenient about file modification times + (uploaded files have recent modification times but may have valid EXIF) + Returns: Date object or None if no date can be determined """ @@ -483,14 +522,19 @@ def extract_photo_date(image_path: str) -> Optional[date]: today = date.today() # Reject future dates and dates that are too recent (likely copy dates) # If modification time is within the last 7 days, it's probably a copy date, not the original photo date + # BUT: for uploaded files, we should be more lenient since EXIF might have failed for other reasons days_ago = (today - mtime_date).days if mtime_date <= today and mtime_date >= date(1900, 1, 1): - if days_ago <= 7: + if days_ago <= 7 and not is_uploaded_file: # Modification time is too recent - likely a copy date, skip it + # (unless it's an uploaded file where we should trust EXIF extraction failure) logger.debug(f"File modification time {mtime_date} is too recent (likely copy date) for {image_path}, trying creation time") else: - # Modification time is old enough to be a real photo date - logger.info(f"Using file modification time {mtime_date} for {image_path}") + # Modification time is old enough to be a real photo date, OR it's an uploaded file + if is_uploaded_file: + logger.info(f"Using file modification time {mtime_date} for uploaded file {image_path} (EXIF extraction failed)") + else: + logger.info(f"Using file modification time {mtime_date} for {image_path}") return mtime_date else: logger.debug(f"File modification time {mtime_date} is invalid for {image_path}, trying creation time") @@ -514,15 +558,20 @@ def extract_photo_date(image_path: str) -> Optional[date]: ctime_date = datetime.fromtimestamp(ctime).date() today = date.today() # Validate date before returning (reject future dates and recent copy dates) + # BUT: for uploaded files, be more lenient since EXIF might have failed for other reasons days_ago = (today - ctime_date).days if ctime_date <= today and ctime_date >= date(1900, 1, 1): - if days_ago <= 7: + if days_ago <= 7 and not is_uploaded_file: # Creation time is too recent - likely a copy date, reject it + # (unless it's an uploaded file where we should trust EXIF extraction failure) logger.warning(f"File creation time {ctime_date} is too recent (likely copy date) for {image_path}, cannot determine photo date") return None else: - # Creation time is old enough to be a real photo date - logger.info(f"Using file creation/change time {ctime_date} for {image_path}") + # Creation time is old enough to be a real photo date, OR it's an uploaded file + if is_uploaded_file: + logger.info(f"Using file creation/change time {ctime_date} for uploaded file {image_path} (EXIF extraction failed)") + else: + logger.info(f"Using file creation/change time {ctime_date} for {image_path}") return ctime_date else: logger.warning(f"File creation time {ctime_date} is invalid for {image_path}") @@ -567,7 +616,7 @@ def find_photos_in_folder(folder_path: str, recursive: bool = True) -> list[str] def import_photo_from_path( - db: Session, photo_path: str, update_progress: Optional[Callable[[int, int, str], None]] = None + db: Session, photo_path: str, update_progress: Optional[Callable[[int, int, str], None]] = None, is_uploaded_file: bool = False, file_last_modified: Optional[date] = None, browser_exif_date: Optional[date] = None ) -> Tuple[Optional[Photo], bool]: """Import a single photo or video from file path into database. @@ -602,7 +651,7 @@ def import_photo_from_path( if media_type == "video": date_taken = extract_video_date(photo_path) else: - date_taken = extract_photo_date(photo_path) + date_taken = extract_photo_date(photo_path, is_uploaded_file=is_uploaded_file) # Validate date_taken before setting date_taken = validate_date_taken(date_taken) if date_taken: @@ -624,7 +673,7 @@ def import_photo_from_path( if media_type == "video": date_taken = extract_video_date(photo_path) else: - date_taken = extract_photo_date(photo_path) + date_taken = extract_photo_date(photo_path, is_uploaded_file=is_uploaded_file) # Validate date_taken before setting date_taken = validate_date_taken(date_taken) if date_taken: @@ -633,15 +682,35 @@ def import_photo_from_path( db.refresh(existing_by_path) return existing_by_path, False - # Extract date taken with fallback to file modification time + # Extract date taken with priority: browser EXIF > server EXIF > browser file modification time > server file modification time + import logging + logger = logging.getLogger(__name__) + if media_type == "video": date_taken = extract_video_date(photo_path) else: - date_taken = extract_photo_date(photo_path) + # Priority 1: Use browser-extracted EXIF date (most reliable - extracted from original file before upload) + if browser_exif_date: + logger.info(f"[DATE_EXTRACTION] Using browser-extracted EXIF date {browser_exif_date} for {photo_path}") + date_taken = browser_exif_date + # Priority 2: Use browser-captured file modification time (from original file before upload) + # This MUST come before server-side extraction to avoid using the server file's modification time (which is today) + elif file_last_modified: + logger.info(f"[DATE_EXTRACTION] Using file's original modification date {file_last_modified} from browser metadata for {photo_path}") + date_taken = file_last_modified + else: + logger.debug(f"[DATE_EXTRACTION] No browser metadata for {photo_path}, trying server EXIF extraction") + # Priority 3: Try to extract EXIF from the uploaded file on server + date_taken = extract_photo_date(photo_path, is_uploaded_file=is_uploaded_file) + + if not date_taken: + logger.warning(f"[DATE_EXTRACTION] No date found for {photo_path} - browser_exif_date={browser_exif_date}, file_last_modified={file_last_modified}") # Validate date_taken - ensure it's a valid date object or None # This prevents corrupted date data from being saved + logger.debug(f"[DATE_EXTRACTION] Before validation: date_taken={date_taken} for {photo_path}") date_taken = validate_date_taken(date_taken) + logger.info(f"[DATE_EXTRACTION] After validation: date_taken={date_taken} for {photo_path}") # For videos, mark as processed immediately (we don't process videos for faces) # For images, start as unprocessed diff --git a/docs/DEPLOY_FROM_SCRATCH.md b/docs/DEPLOY_FROM_SCRATCH.md index 7a578a2..5a3eae0 100644 --- a/docs/DEPLOY_FROM_SCRATCH.md +++ b/docs/DEPLOY_FROM_SCRATCH.md @@ -382,7 +382,20 @@ Update your Caddyfile on the proxy server: your-admin-domain.com { import security-headers - # CRITICAL: Route API requests to backend FIRST (before static files) + # CRITICAL: Route SSE streaming endpoints FIRST with no buffering + # This is required for Server-Sent Events (EventSource) to work properly + handle /api/v1/jobs/stream/* { + reverse_proxy http://YOUR_BACKEND_IP:8000 { + header_up Host {host} + header_up X-Real-IP {remote} + header_up X-Forwarded-For {remote_host} + header_up X-Forwarded-Proto {scheme} + # Disable buffering for SSE streams + flush_interval -1 + } + } + + # CRITICAL: Route API requests to backend (before static files) handle /api/* { reverse_proxy http://YOUR_BACKEND_IP:8000 { header_up Host {host} diff --git a/ecosystem.config.js.example b/ecosystem.config.js.example index bb7da44..90a0eef 100644 --- a/ecosystem.config.js.example +++ b/ecosystem.config.js.example @@ -17,6 +17,11 @@ module.exports = { autorestart: true, watch: false, max_memory_restart: '1G', + // Prevent infinite crash loops - stop after 10 failed restarts + max_restarts: 10, + min_uptime: '10s', // Require 10 seconds of uptime before considering stable + restart_delay: 4000, // Wait 4 seconds between restarts + kill_timeout: 5000, // Allow 5 seconds for graceful shutdown }, { name: 'punimtag-worker', @@ -35,6 +40,11 @@ module.exports = { autorestart: true, watch: false, max_memory_restart: '1G', + // Prevent infinite crash loops - stop after 10 failed restarts + max_restarts: 10, + min_uptime: '10s', // Require 10 seconds of uptime before considering stable + restart_delay: 4000, // Wait 4 seconds between restarts + kill_timeout: 5000, // Allow 5 seconds for graceful shutdown }, { name: 'punimtag-admin', diff --git a/scripts/README.md b/scripts/README.md index 6414fca..3c41676 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -49,6 +49,40 @@ Database-related shell scripts remain in `scripts/`: - `grant_auth_db_delete_permission.sh` - Grant delete permissions - `setup_postgresql.sh` - Set up PostgreSQL +## Log Management Scripts + +Quick access to service logs for troubleshooting: +- `check-logs.sh` - Check recent errors from all services +- `tail-errors.sh` - Follow error logs in real-time +- `view-recent-errors.sh` - View errors from the last N minutes (default: 10) +- `setup-log-rotation.sh` - Configure PM2 log rotation to prevent log bloat + +### Quick Usage + +```bash +# Check all service logs for recent errors +./scripts/check-logs.sh + +# Follow error logs in real-time (Ctrl+C to exit) +./scripts/tail-errors.sh + +# View errors from last 10 minutes +./scripts/view-recent-errors.sh + +# View errors from last 30 minutes +./scripts/view-recent-errors.sh 30 + +# Setup log rotation (run once) +./scripts/setup-log-rotation.sh +``` + +**Log Locations:** +- All logs: `/home/appuser/.pm2/logs/` +- API errors: `/home/appuser/.pm2/logs/punimtag-api-error.log` +- Worker errors: `/home/appuser/.pm2/logs/punimtag-worker-error.log` +- Admin errors: `/home/appuser/.pm2/logs/punimtag-admin-error.log` +- Viewer errors: `/home/appuser/.pm2/logs/punimtag-viewer-error.log` + ## Usage Most scripts can be run directly: