feat: Optimize photo retrieval with tags and face counts using efficient queries

This commit enhances the `get_photos_with_tags` function by optimizing database queries through the use of JOINs and aggregations. The new implementation reduces the number of queries from 4N+1 to just 3, improving performance. Additionally, the function now returns a comprehensive list of photos with associated tags and identified individuals, enhancing the overall data retrieval process. Documentation has been updated to reflect these changes.
2025-12-05 12:46:35 -05:00 · 2025-12-05 12:46:35 -05:00 · 30f8a36e57
commit 30f8a36e57
parent 7973dfadd2
1 changed files with 132 additions and 71 deletions
--- a/src/web/services/tag_service.py
+++ b/src/web/services/tag_service.py
@ -217,95 +217,156 @@ def delete_tags(db: Session, tag_ids: List[int]) -> int:


 def get_photos_with_tags(db: Session) -> List[dict]:
-    """Get all photos with tags and face counts, matching desktop query exactly.
+    """Get all photos with tags and face counts, optimized with JOINs and aggregations.
    
-    Desktop query:
-    SELECT p.id, p.filename, p.path, p.processed, p.date_taken, p.date_added,
-           (SELECT COUNT(*) FROM faces f WHERE f.photo_id = p.id) as face_count,
-           (SELECT GROUP_CONCAT(DISTINCT t.tag_name) 
-            FROM phototaglinkage ptl 
-            JOIN tags t ON t.id = ptl.tag_id 
-            WHERE ptl.photo_id = p.id) as tags
-    FROM photos p
-    ORDER BY p.date_taken DESC, p.filename
+    This function uses efficient JOINs and aggregations instead of N+1 queries,
+    reducing database queries from 4N+1 to just 3 queries total.
    
    Returns:
        List of dicts with photo info, face_count, and tags (comma-separated string)
    """
-    from sqlalchemy import func
+    from sqlalchemy import func, case, distinct
    
-    # Get all photos with face counts and tags
-    photos = (
-        db.query(Photo)
+    # Query 1: Get all photos with face counts using LEFT JOIN and GROUP BY
+    # This gets face_count and unidentified_face_count in one query
+    photos_with_counts = (
+        db.query(
+            Photo.id,
+            Photo.filename,
+            Photo.path,
+            Photo.processed,
+            Photo.date_taken,
+            Photo.date_added,
+            Photo.media_type,
+            # Face count (all faces)
+            func.count(distinct(Face.id)).label('face_count'),
+            # Unidentified face count (faces with person_id IS NULL)
+            func.sum(
+                case((Face.person_id.is_(None), 1), else_=0)
+            ).label('unidentified_face_count'),
+        )
+        .outerjoin(Face, Photo.id == Face.photo_id)
+        .group_by(
+            Photo.id,
+            Photo.filename,
+            Photo.path,
+            Photo.processed,
+            Photo.date_taken,
+            Photo.date_added,
+            Photo.media_type,
+        )
        .order_by(Photo.date_taken.desc().nullslast(), Photo.filename)
        .all()
    )
    
-    result = []
-    for photo in photos:
-        # Get face count (all faces)
-        face_count = (
-            db.query(func.count(Face.id))
-            .filter(Face.photo_id == photo.id)
-            .scalar() or 0
+    # Create a map of photo_id -> photo data
+    photo_map = {row.id: row for row in photos_with_counts}
+    photo_ids = list(photo_map.keys())
+    
+    # If no photos, return empty list
+    if not photo_ids:
+        return []
+    
+    # Query 2: Get all tags for all photos in one query
+    # Fetch all tag linkages and aggregate in Python (more reliable across databases)
+    tags_data = (
+        db.query(
+            PhotoTagLinkage.photo_id,
+            Tag.tag_name,
        )
+        .join(Tag, PhotoTagLinkage.tag_id == Tag.id)
+        .filter(PhotoTagLinkage.photo_id.in_(photo_ids))
+        .order_by(PhotoTagLinkage.photo_id, Tag.tag_name)
+        .all()
+    )
+    
+    # Group tags by photo_id and join with comma
+    tags_map = {}
+    for row in tags_data:
+        if row.photo_id not in tags_map:
+            tags_map[row.photo_id] = []
+        tags_map[row.photo_id].append(row.tag_name)
+    
+    # Convert lists to comma-separated strings
+    tags_map = {photo_id: ", ".join(tags) for photo_id, tags in tags_map.items()}
+    
+    # Query 3: Get all people for all photos in one query
+    # Get distinct people per photo, then format names in Python
+    people_data = (
+        db.query(
+            Face.photo_id,
+            Person.id,
+            Person.first_name,
+            Person.middle_name,
+            Person.last_name,
+            Person.maiden_name,
+        )
+        .join(Person, Face.person_id == Person.id)
+        .filter(Face.photo_id.in_(photo_ids))
+        .filter(Face.person_id.isnot(None))
+        .distinct()
+        .order_by(Face.photo_id, Person.last_name, Person.first_name)
+        .all()
+    )
+    
+    # Group people by photo_id and format names
+    people_map = {}
+    for row in people_data:
+        if row.photo_id not in people_map:
+            people_map[row.photo_id] = []
        
-        # Get unidentified face count (only faces with person_id IS NULL)
-        unidentified_face_count = (
-            db.query(func.count(Face.id))
-            .filter(Face.photo_id == photo.id)
-            .filter(Face.person_id.is_(None))
-            .scalar() or 0
-        )
+        # Format person name
+        name_parts = []
+        if row.first_name:
+            name_parts.append(row.first_name)
+        if row.middle_name:
+            name_parts.append(row.middle_name)
+        if row.last_name:
+            name_parts.append(row.last_name)
+        if row.maiden_name:
+            name_parts.append(f"({row.maiden_name})")
+        full_name = " ".join(name_parts) if name_parts else "Unknown"
+        people_map[row.photo_id].append(full_name)
+    
+    # Build result list
+    result_list = []
+    for photo_id, photo_row in photo_map.items():
+        # Format date_taken
+        date_taken = None
+        if photo_row.date_taken:
+            if isinstance(photo_row.date_taken, str):
+                date_taken = photo_row.date_taken
+            else:
+                date_taken = photo_row.date_taken.isoformat()
        
-        # Get tags as comma-separated string (matching desktop GROUP_CONCAT)
-        tags_query = (
-            db.query(Tag.tag_name)
-            .join(PhotoTagLinkage, Tag.id == PhotoTagLinkage.tag_id)
-            .filter(PhotoTagLinkage.photo_id == photo.id)
-            .order_by(Tag.tag_name)
-            .all()
-        )
-        tags = ", ".join([t[0] for t in tags_query]) if tags_query else ""
+        # Format date_added
+        date_added = None
+        if photo_row.date_added:
+            if isinstance(photo_row.date_added, str):
+                date_added = photo_row.date_added
+            else:
+                date_added = photo_row.date_added.isoformat()
        
-        # Get people names as comma-separated string (unique people identified in photo)
-        people_query = (
-            db.query(Person)
-            .join(Face, Person.id == Face.person_id)
-            .filter(Face.photo_id == photo.id)
-            .filter(Face.person_id.isnot(None))
-            .order_by(Person.last_name, Person.first_name)
-            .distinct()
-            .all()
-        )
-        people_names = []
-        for person in people_query:
-            name_parts = []
-            if person.first_name:
-                name_parts.append(person.first_name)
-            if person.middle_name:
-                name_parts.append(person.middle_name)
-            if person.last_name:
-                name_parts.append(person.last_name)
-            if person.maiden_name:
-                name_parts.append(f"({person.maiden_name})")
-            full_name = " ".join(name_parts) if name_parts else "Unknown"
-            people_names.append(full_name)
+        # Get tags for this photo
+        tags = tags_map.get(photo_id, "")
+        
+        # Get people names for this photo
+        people_names = people_map.get(photo_id, [])
        people_names_str = ", ".join(people_names) if people_names else ""
        
-        result.append({
-            'id': photo.id,
-            'filename': photo.filename,
-            'path': photo.path,
-            'processed': photo.processed,
-            'date_taken': photo.date_taken.isoformat() if photo.date_taken else None,
-            'date_added': photo.date_added.isoformat() if photo.date_added else None,
-            'face_count': face_count,
-            'unidentified_face_count': unidentified_face_count,
+        result_list.append({
+            'id': photo_row.id,
+            'filename': photo_row.filename,
+            'path': photo_row.path,
+            'processed': photo_row.processed,
+            'date_taken': date_taken,
+            'date_added': date_added,
+            'face_count': photo_row.face_count or 0,
+            'unidentified_face_count': int(photo_row.unidentified_face_count or 0),
            'tags': tags,
            'people_names': people_names_str,
-            'media_type': photo.media_type or 'image',
+            'media_type': photo_row.media_type or 'image',
        })
    
-    return result
+    return result_list