diff --git a/frontend/src/api/faces.ts b/frontend/src/api/faces.ts index 9864270..665df58 100644 --- a/frontend/src/api/faces.ts +++ b/frontend/src/api/faces.ts @@ -45,6 +45,22 @@ export interface SimilarFacesResponse { items: SimilarFaceItem[] } +export interface FaceSimilarityPair { + face_id_1: number + face_id_2: number + similarity: number // 0-1 range + confidence_pct: number // 0-100 range +} + +export interface BatchSimilarityRequest { + face_ids: number[] + min_confidence?: number // 0-100, default 60 +} + +export interface BatchSimilarityResponse { + pairs: FaceSimilarityPair[] +} + export interface IdentifyFaceRequest { person_id?: number first_name?: string @@ -146,6 +162,10 @@ export const facesApi = { const response = await apiClient.get(`/api/v1/faces/${faceId}/similar`) return response.data }, + batchSimilarity: async (request: BatchSimilarityRequest): Promise => { + const response = await apiClient.post('/api/v1/faces/batch-similarity', request) + return response.data + }, identify: async (faceId: number, payload: IdentifyFaceRequest): Promise => { const response = await apiClient.post(`/api/v1/faces/${faceId}/identify`, payload) return response.data diff --git a/frontend/src/pages/Identify.tsx b/frontend/src/pages/Identify.tsx index 748df1f..7256e4a 100644 --- a/frontend/src/pages/Identify.tsx +++ b/frontend/src/pages/Identify.tsx @@ -49,6 +49,8 @@ export default function Identify() { // Track previous face ID to save data on navigation const prevFaceIdRef = useRef(undefined) + // Track if initial load has happened + const initialLoadRef = useRef(false) const canIdentify = useMemo(() => { return Boolean((personId && currentFace) || (firstName && lastName && dob && currentFace)) @@ -92,37 +94,94 @@ export default function Identify() { // Create a map of face IDs to face objects for quick lookup const faceMap = new Map(faces.map(f => [f.id, f])) - // Build similarity graph: for each face, find all similar faces (≥60% confidence) in current list + // Build similarity graph: use batch endpoint to get all similarities at once const similarityMap = new Map>() - for (let i = 0; i < faces.length; i++) { - const face = faces[i] - const similarSet = new Set() + // Initialize similarity map for all faces + for (const face of faces) { + similarityMap.set(face.id, new Set()) + } + + // Update progress - loading all faces once + setLoadingProgress({ + current: 0, + total: faces.length, + message: 'Loading all faces from database...' + }) + + try { + // Get all face IDs + const faceIds = faces.map(f => f.id) - // Update progress + // Update progress - calculating similarities setLoadingProgress({ - current: i + 1, + current: 0, total: faces.length, - message: `Checking face ${i + 1} of ${faces.length}...` + message: `Calculating similarities for ${faces.length} faces (this may take a while)...` }) - try { - const similarRes = await facesApi.getSimilar(face.id) - for (const similar of similarRes.items) { - // Only include similar faces that are in the current list - if (!faceMap.has(similar.id)) continue - - // Convert similarity back to percentage (similarity is in [0,1]) - const confidencePct = Math.round(similar.similarity * 100) - if (confidencePct >= 60) { - similarSet.add(similar.id) - } - } - } catch (error) { - // Silently skip faces with errors - } + // Call batch similarity endpoint - loads all faces once from DB + // Note: This is where the heavy computation happens (comparing N faces to M faces) + // The progress bar will show 0% during this time as we can't track backend progress + const batchRes = await facesApi.batchSimilarity({ + face_ids: faceIds, + min_confidence: 60.0 + }) - similarityMap.set(face.id, similarSet) + // Update progress - calculation complete, now processing results + const totalPairs = batchRes.pairs.length + setLoadingProgress({ + current: 0, + total: totalPairs, + message: `Similarity calculation complete! Processing ${totalPairs} results...` + }) + + // Build similarity map from batch results + // Note: results include similarities to all faces in DB, but we only care about + // similarities between faces in the current list + let processedPairs = 0 + for (const pair of batchRes.pairs) { + // Only include pairs where both faces are in the current list + if (!faceMap.has(pair.face_id_1) || !faceMap.has(pair.face_id_2)) { + processedPairs++ + // Update progress every 100 pairs or at the end + if (processedPairs % 100 === 0 || processedPairs === totalPairs) { + setLoadingProgress({ + current: processedPairs, + total: totalPairs, + message: `Processing similarity results... (${processedPairs} / ${totalPairs})` + }) + // Allow UI to update + await new Promise(resolve => setTimeout(resolve, 0)) + } + continue + } + + // Add bidirectional relationships + const set1 = similarityMap.get(pair.face_id_1) || new Set() + set1.add(pair.face_id_2) + similarityMap.set(pair.face_id_1, set1) + + const set2 = similarityMap.get(pair.face_id_2) || new Set() + set2.add(pair.face_id_1) + similarityMap.set(pair.face_id_2, set2) + + processedPairs++ + // Update progress every 100 pairs or at the end + if (processedPairs % 100 === 0 || processedPairs === totalPairs) { + setLoadingProgress({ + current: processedPairs, + total: totalPairs, + message: `Processing similarity results... (${processedPairs} / ${totalPairs})` + }) + // Allow UI to update + await new Promise(resolve => setTimeout(resolve, 0)) + } + } + } catch (error) { + // Silently skip on error - return original faces + console.error('Error calculating batch similarities:', error) + return faces } // Find connected components (groups of similar faces) @@ -194,11 +253,23 @@ export default function Identify() { } } + // Initial load on mount useEffect(() => { - loadFaces() - loadPeople() + if (!initialLoadRef.current) { + initialLoadRef.current = true + loadFaces() + loadPeople() + } // eslint-disable-next-line react-hooks/exhaustive-deps - }, [pageSize, minQuality, sortBy, sortDir, dateFrom, dateTo, uniqueFacesOnly]) + }, []) + + // Reload when uniqueFacesOnly changes (immediate reload) + useEffect(() => { + if (initialLoadRef.current) { + loadFaces() + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [uniqueFacesOnly]) useEffect(() => { if (currentFace) { @@ -373,18 +444,40 @@ export default function Identify() { {loadingProgress.total > 0 && ( {loadingProgress.current} / {loadingProgress.total} + {loadingProgress.total > 0 && ( + + ({Math.round((loadingProgress.current / loadingProgress.total) * 100)}%) + + )} )}
-
0 - ? `${(loadingProgress.current / loadingProgress.total) * 100}%` - : '100%' - }} - /> + {loadingProgress.total > 0 ? ( +
+ ) : ( +
+
+ +
+ )}
)} @@ -466,6 +559,15 @@ export default function Identify() { Hide duplicates with ≥60% match confidence

+
+ +
)} diff --git a/src/web/api/faces.py b/src/web/api/faces.py index 709a47f..d527acf 100644 --- a/src/web/api/faces.py +++ b/src/web/api/faces.py @@ -17,6 +17,9 @@ from src.web.schemas.faces import ( FaceItem, SimilarFacesResponse, SimilarFaceItem, + BatchSimilarityRequest, + BatchSimilarityResponse, + FaceSimilarityPair, IdentifyFaceRequest, IdentifyFaceResponse, FaceUnmatchResponse, @@ -33,6 +36,7 @@ from src.web.db.models import Face, Person, PersonEncoding from src.web.services.face_service import ( list_unidentified_faces, find_similar_faces, + calculate_batch_similarities, find_auto_match_matches, accept_auto_match_matches, ) @@ -168,6 +172,42 @@ def get_similar_faces(face_id: int, db: Session = Depends(get_db)) -> SimilarFac return SimilarFacesResponse(base_face_id=face_id, items=items) +@router.post("/batch-similarity", response_model=BatchSimilarityResponse) +def get_batch_similarities( + request: BatchSimilarityRequest, + db: Session = Depends(get_db), +) -> BatchSimilarityResponse: + """Calculate similarities between all pairs of faces in the provided list. + + Loads all faces once from database and calculates similarities between all pairs. + Much more efficient than calling /similar for each face individually. + """ + import logging + logger = logging.getLogger(__name__) + logger.info(f"API: batch_similarity called for {len(request.face_ids)} faces") + + # Calculate similarities between all pairs + pairs = calculate_batch_similarities( + db, + request.face_ids, + min_confidence=request.min_confidence, + ) + + # Convert to response format + items = [ + FaceSimilarityPair( + face_id_1=face_id_1, + face_id_2=face_id_2, + similarity=similarity, + confidence_pct=confidence_pct, + ) + for face_id_1, face_id_2, similarity, confidence_pct in pairs + ] + + logger.info(f"API: batch_similarity returning {len(items)} pairs") + return BatchSimilarityResponse(pairs=items) + + @router.post("/{face_id}/identify", response_model=IdentifyFaceResponse) def identify_face( face_id: int, diff --git a/src/web/schemas/faces.py b/src/web/schemas/faces.py index f68db8b..059ab5c 100644 --- a/src/web/schemas/faces.py +++ b/src/web/schemas/faces.py @@ -99,6 +99,34 @@ class SimilarFacesResponse(BaseModel): items: list[SimilarFaceItem] +class BatchSimilarityRequest(BaseModel): + """Request to get similarities between multiple faces.""" + + model_config = ConfigDict(protected_namespaces=()) + + face_ids: list[int] = Field(..., description="List of face IDs to calculate similarities for") + min_confidence: float = Field(60.0, ge=0.0, le=100.0, description="Minimum confidence percentage (0-100)") + + +class FaceSimilarityPair(BaseModel): + """A pair of similar faces with their similarity score.""" + + model_config = ConfigDict(protected_namespaces=()) + + face_id_1: int + face_id_2: int + similarity: float # 0-1 range + confidence_pct: float # 0-100 range + + +class BatchSimilarityResponse(BaseModel): + """Response containing similarities between face pairs.""" + + model_config = ConfigDict(protected_namespaces=()) + + pairs: list[FaceSimilarityPair] = Field(..., description="List of similar face pairs") + + class IdentifyFaceRequest(BaseModel): """Identify a face by selecting existing or creating new person.""" diff --git a/src/web/services/face_service.py b/src/web/services/face_service.py index aa6572c..22b0391 100644 --- a/src/web/services/face_service.py +++ b/src/web/services/face_service.py @@ -1256,13 +1256,27 @@ def calculate_cosine_distance(encoding1: np.ndarray, encoding2: np.ndarray) -> f Desktop: _calculate_cosine_similarity returns distance (0 = identical, 2 = opposite) This matches the desktop implementation exactly. + + Optimized: Avoids unnecessary array creation if inputs are already numpy arrays. """ try: - # Ensure encodings are numpy arrays - enc1 = np.array(encoding1).flatten() - enc2 = np.array(encoding2).flatten() + # Optimize: Check if already numpy arrays to avoid unnecessary conversions + # Since we pre-load encodings as 1D arrays from np.frombuffer(), we can use them directly + if isinstance(encoding1, np.ndarray): + # Already numpy array - use directly if 1D, otherwise flatten (creates view, not copy) + enc1 = encoding1 if encoding1.ndim == 1 else encoding1.flatten() + else: + # Convert to numpy array only if needed (rare case) + enc1 = np.asarray(encoding1, dtype=np.float64).flatten() - # Check if encodings have the same length + if isinstance(encoding2, np.ndarray): + # Already numpy array - use directly if 1D, otherwise flatten (creates view, not copy) + enc2 = encoding2 if encoding2.ndim == 1 else encoding2.flatten() + else: + # Convert to numpy array only if needed (rare case) + enc2 = np.asarray(encoding2, dtype=np.float64).flatten() + + # Check if encodings have the same length (all ArcFace encodings are 512-dim, but check for safety) if len(enc1) != len(enc2): return 2.0 # Maximum distance on mismatch @@ -1498,6 +1512,156 @@ def find_similar_faces( return matches[:limit] +def calculate_batch_similarities( + db: Session, + face_ids: list[int], + min_confidence: float = 60.0, +) -> list[tuple[int, int, float, float]]: + """Calculate similarities between N faces and all M faces in database. + + Optimized implementation using: + - Phase 1: Pre-normalization of all encodings (avoids repeated normalization) + - Phase 2: Vectorized batch operations using numpy matrix multiplication + + Loads all M faces once from database (cached) and compares each of the N faces + to all M faces using efficient vectorized operations. Returns list of + (face_id_1, face_id_2, similarity, confidence_pct) tuples. + + Args: + db: Database session + face_ids: List of face IDs to calculate similarities for (N faces) + min_confidence: Minimum confidence percentage (0-100) to include a pair + + Returns: + List of (face_id_1, face_id_2, similarity, confidence_pct) tuples + where face_id_1 is from the request list, face_id_2 is from all faces in DB + similarity is in [0,1] range and confidence_pct is in [0,100] range + """ + from src.core.config import DEFAULT_FACE_TOLERANCE + from src.web.db.models import Photo + + if not face_ids: + return [] + + # Load all M faces once from database (cache all faces) + # Note: Don't load photo data - we only need encodings and quality scores + all_faces: list[Face] = ( + db.query(Face) + .all() + ) + + # Create a map of all face_id -> Face for quick lookup + all_face_map = {f.id: f for f in all_faces} + + # Load the N faces from the request + request_faces: list[Face] = [ + all_face_map[fid] for fid in face_ids if fid in all_face_map + ] + + if not request_faces: + return [] + + # Pre-load all M encodings and quality scores once (major optimization) + # This avoids loading from bytes repeatedly in the inner loop + all_encodings: dict[int, np.ndarray] = {} + all_qualities: dict[int, float] = {} + + for face in all_faces: + # Pre-load encoding as numpy array + all_encodings[face.id] = np.frombuffer(face.encoding, dtype=np.float64) + # Pre-cache quality score + all_qualities[face.id] = float(face.quality_score) if face.quality_score is not None else 0.5 + + # Pre-load request face encodings and qualities + request_encodings: dict[int, np.ndarray] = {} + request_qualities: dict[int, float] = {} + + for face in request_faces: + request_encodings[face.id] = all_encodings[face.id] + request_qualities[face.id] = all_qualities[face.id] + + # Phase 1: Pre-normalize all encodings (major optimization) + # This avoids normalizing each encoding thousands of times + normalized_all_encodings: dict[int, np.ndarray] = {} + for face_id, enc in all_encodings.items(): + # Normalize encoding once + norm = np.linalg.norm(enc) + if norm > 0: + normalized_all_encodings[face_id] = enc / (norm + 1e-8) + else: + # Handle zero-norm edge case + normalized_all_encodings[face_id] = enc + + normalized_request_encodings: dict[int, np.ndarray] = {} + for face_id, enc in request_encodings.items(): + # Normalize encoding once + norm = np.linalg.norm(enc) + if norm > 0: + normalized_request_encodings[face_id] = enc / (norm + 1e-8) + else: + # Handle zero-norm edge case + normalized_request_encodings[face_id] = enc + + # Phase 2: Vectorized batch operations using numpy matrix multiplication + # Build matrices for vectorized computation + request_face_ids = list(request_encodings.keys()) + all_face_ids = list(all_encodings.keys()) + + # Create matrices: (N, 512) and (M, 512) where 512 is encoding dimension + request_matrix = np.array([normalized_request_encodings[fid] for fid in request_face_ids]) + all_matrix = np.array([normalized_all_encodings[fid] for fid in all_face_ids]) + + # Calculate all cosine similarities at once using matrix multiplication + # Shape: (N, 512) @ (512, M) = (N, M) + # This computes dot product of each request face with each database face + similarity_matrix = request_matrix @ all_matrix.T + + # Clamp to valid range [-1, 1] for cosine similarity + similarity_matrix = np.clip(similarity_matrix, -1.0, 1.0) + + # Convert to distance: distance = 1 - similarity + # Range: [0, 2] where 0 is perfect match + distance_matrix = 1.0 - similarity_matrix + + # Calculate similarities: filter and process results + pairs: list[tuple[int, int, float, float]] = [] + tolerance = DEFAULT_FACE_TOLERANCE + + # Process results from the distance matrix + for i, face_id_1 in enumerate(request_face_ids): + quality_1 = request_qualities[face_id_1] + + for j, face_id_2 in enumerate(all_face_ids): + # Skip comparing face to itself + if face_id_1 == face_id_2: + continue + + # Get distance from pre-computed matrix + distance = float(distance_matrix[i, j]) + + # Get pre-cached quality score + quality_2 = all_qualities[face_id_2] + + # Calculate adaptive tolerance + avg_quality = (quality_1 + quality_2) / 2 + adaptive_tolerance = calculate_adaptive_tolerance(tolerance, avg_quality) + + # Check if within tolerance + if distance <= adaptive_tolerance: + # Calculate calibrated confidence + confidence_pct = calibrate_confidence(distance, tolerance) + + # Filter by minimum confidence + if confidence_pct >= min_confidence: + # Convert to similarity (0-1 range, higher = more similar) + similarity = 1.0 - (distance / 2.0) # Normalize distance to [0,1] + similarity = max(0.0, min(1.0, similarity)) # Clamp to [0,1] + + pairs.append((face_id_1, face_id_2, similarity, confidence_pct)) + + return pairs + + def find_auto_match_matches( db: Session, tolerance: float = 0.6,