feat: Optimize photo retrieval with tags and face counts using efficient queries

This commit enhances the `get_photos_with_tags` function by optimizing database queries through the use of JOINs and aggregations. The new implementation reduces the number of queries from 4N+1 to just 3, improving performance. Additionally, the function now returns a comprehensive list of photos with associated tags and identified individuals, enhancing the overall data retrieval process. Documentation has been updated to reflect these changes.
This commit is contained in:
tanyar09 2025-12-05 12:46:35 -05:00
parent 7973dfadd2
commit 30f8a36e57

View File

@ -217,95 +217,156 @@ def delete_tags(db: Session, tag_ids: List[int]) -> int:
def get_photos_with_tags(db: Session) -> List[dict]:
"""Get all photos with tags and face counts, matching desktop query exactly.
"""Get all photos with tags and face counts, optimized with JOINs and aggregations.
Desktop query:
SELECT p.id, p.filename, p.path, p.processed, p.date_taken, p.date_added,
(SELECT COUNT(*) FROM faces f WHERE f.photo_id = p.id) as face_count,
(SELECT GROUP_CONCAT(DISTINCT t.tag_name)
FROM phototaglinkage ptl
JOIN tags t ON t.id = ptl.tag_id
WHERE ptl.photo_id = p.id) as tags
FROM photos p
ORDER BY p.date_taken DESC, p.filename
This function uses efficient JOINs and aggregations instead of N+1 queries,
reducing database queries from 4N+1 to just 3 queries total.
Returns:
List of dicts with photo info, face_count, and tags (comma-separated string)
"""
from sqlalchemy import func
from sqlalchemy import func, case, distinct
# Get all photos with face counts and tags
photos = (
db.query(Photo)
# Query 1: Get all photos with face counts using LEFT JOIN and GROUP BY
# This gets face_count and unidentified_face_count in one query
photos_with_counts = (
db.query(
Photo.id,
Photo.filename,
Photo.path,
Photo.processed,
Photo.date_taken,
Photo.date_added,
Photo.media_type,
# Face count (all faces)
func.count(distinct(Face.id)).label('face_count'),
# Unidentified face count (faces with person_id IS NULL)
func.sum(
case((Face.person_id.is_(None), 1), else_=0)
).label('unidentified_face_count'),
)
.outerjoin(Face, Photo.id == Face.photo_id)
.group_by(
Photo.id,
Photo.filename,
Photo.path,
Photo.processed,
Photo.date_taken,
Photo.date_added,
Photo.media_type,
)
.order_by(Photo.date_taken.desc().nullslast(), Photo.filename)
.all()
)
result = []
for photo in photos:
# Get face count (all faces)
face_count = (
db.query(func.count(Face.id))
.filter(Face.photo_id == photo.id)
.scalar() or 0
# Create a map of photo_id -> photo data
photo_map = {row.id: row for row in photos_with_counts}
photo_ids = list(photo_map.keys())
# If no photos, return empty list
if not photo_ids:
return []
# Query 2: Get all tags for all photos in one query
# Fetch all tag linkages and aggregate in Python (more reliable across databases)
tags_data = (
db.query(
PhotoTagLinkage.photo_id,
Tag.tag_name,
)
.join(Tag, PhotoTagLinkage.tag_id == Tag.id)
.filter(PhotoTagLinkage.photo_id.in_(photo_ids))
.order_by(PhotoTagLinkage.photo_id, Tag.tag_name)
.all()
)
# Group tags by photo_id and join with comma
tags_map = {}
for row in tags_data:
if row.photo_id not in tags_map:
tags_map[row.photo_id] = []
tags_map[row.photo_id].append(row.tag_name)
# Convert lists to comma-separated strings
tags_map = {photo_id: ", ".join(tags) for photo_id, tags in tags_map.items()}
# Query 3: Get all people for all photos in one query
# Get distinct people per photo, then format names in Python
people_data = (
db.query(
Face.photo_id,
Person.id,
Person.first_name,
Person.middle_name,
Person.last_name,
Person.maiden_name,
)
.join(Person, Face.person_id == Person.id)
.filter(Face.photo_id.in_(photo_ids))
.filter(Face.person_id.isnot(None))
.distinct()
.order_by(Face.photo_id, Person.last_name, Person.first_name)
.all()
)
# Group people by photo_id and format names
people_map = {}
for row in people_data:
if row.photo_id not in people_map:
people_map[row.photo_id] = []
# Get unidentified face count (only faces with person_id IS NULL)
unidentified_face_count = (
db.query(func.count(Face.id))
.filter(Face.photo_id == photo.id)
.filter(Face.person_id.is_(None))
.scalar() or 0
)
# Format person name
name_parts = []
if row.first_name:
name_parts.append(row.first_name)
if row.middle_name:
name_parts.append(row.middle_name)
if row.last_name:
name_parts.append(row.last_name)
if row.maiden_name:
name_parts.append(f"({row.maiden_name})")
full_name = " ".join(name_parts) if name_parts else "Unknown"
people_map[row.photo_id].append(full_name)
# Build result list
result_list = []
for photo_id, photo_row in photo_map.items():
# Format date_taken
date_taken = None
if photo_row.date_taken:
if isinstance(photo_row.date_taken, str):
date_taken = photo_row.date_taken
else:
date_taken = photo_row.date_taken.isoformat()
# Get tags as comma-separated string (matching desktop GROUP_CONCAT)
tags_query = (
db.query(Tag.tag_name)
.join(PhotoTagLinkage, Tag.id == PhotoTagLinkage.tag_id)
.filter(PhotoTagLinkage.photo_id == photo.id)
.order_by(Tag.tag_name)
.all()
)
tags = ", ".join([t[0] for t in tags_query]) if tags_query else ""
# Format date_added
date_added = None
if photo_row.date_added:
if isinstance(photo_row.date_added, str):
date_added = photo_row.date_added
else:
date_added = photo_row.date_added.isoformat()
# Get people names as comma-separated string (unique people identified in photo)
people_query = (
db.query(Person)
.join(Face, Person.id == Face.person_id)
.filter(Face.photo_id == photo.id)
.filter(Face.person_id.isnot(None))
.order_by(Person.last_name, Person.first_name)
.distinct()
.all()
)
people_names = []
for person in people_query:
name_parts = []
if person.first_name:
name_parts.append(person.first_name)
if person.middle_name:
name_parts.append(person.middle_name)
if person.last_name:
name_parts.append(person.last_name)
if person.maiden_name:
name_parts.append(f"({person.maiden_name})")
full_name = " ".join(name_parts) if name_parts else "Unknown"
people_names.append(full_name)
# Get tags for this photo
tags = tags_map.get(photo_id, "")
# Get people names for this photo
people_names = people_map.get(photo_id, [])
people_names_str = ", ".join(people_names) if people_names else ""
result.append({
'id': photo.id,
'filename': photo.filename,
'path': photo.path,
'processed': photo.processed,
'date_taken': photo.date_taken.isoformat() if photo.date_taken else None,
'date_added': photo.date_added.isoformat() if photo.date_added else None,
'face_count': face_count,
'unidentified_face_count': unidentified_face_count,
result_list.append({
'id': photo_row.id,
'filename': photo_row.filename,
'path': photo_row.path,
'processed': photo_row.processed,
'date_taken': date_taken,
'date_added': date_added,
'face_count': photo_row.face_count or 0,
'unidentified_face_count': int(photo_row.unidentified_face_count or 0),
'tags': tags,
'people_names': people_names_str,
'media_type': photo.media_type or 'image',
'media_type': photo_row.media_type or 'image',
})
return result
return result_list