diff --git a/frontend/src/api/faces.ts b/frontend/src/api/faces.ts
index 9864270..665df58 100644
--- a/frontend/src/api/faces.ts
+++ b/frontend/src/api/faces.ts
@@ -45,6 +45,22 @@ export interface SimilarFacesResponse {
items: SimilarFaceItem[]
}
+export interface FaceSimilarityPair {
+ face_id_1: number
+ face_id_2: number
+ similarity: number // 0-1 range
+ confidence_pct: number // 0-100 range
+}
+
+export interface BatchSimilarityRequest {
+ face_ids: number[]
+ min_confidence?: number // 0-100, default 60
+}
+
+export interface BatchSimilarityResponse {
+ pairs: FaceSimilarityPair[]
+}
+
export interface IdentifyFaceRequest {
person_id?: number
first_name?: string
@@ -146,6 +162,10 @@ export const facesApi = {
const response = await apiClient.get(`/api/v1/faces/${faceId}/similar`)
return response.data
},
+ batchSimilarity: async (request: BatchSimilarityRequest): Promise => {
+ const response = await apiClient.post('/api/v1/faces/batch-similarity', request)
+ return response.data
+ },
identify: async (faceId: number, payload: IdentifyFaceRequest): Promise => {
const response = await apiClient.post(`/api/v1/faces/${faceId}/identify`, payload)
return response.data
diff --git a/frontend/src/pages/Identify.tsx b/frontend/src/pages/Identify.tsx
index 748df1f..7256e4a 100644
--- a/frontend/src/pages/Identify.tsx
+++ b/frontend/src/pages/Identify.tsx
@@ -49,6 +49,8 @@ export default function Identify() {
// Track previous face ID to save data on navigation
const prevFaceIdRef = useRef(undefined)
+ // Track if initial load has happened
+ const initialLoadRef = useRef(false)
const canIdentify = useMemo(() => {
return Boolean((personId && currentFace) || (firstName && lastName && dob && currentFace))
@@ -92,37 +94,94 @@ export default function Identify() {
// Create a map of face IDs to face objects for quick lookup
const faceMap = new Map(faces.map(f => [f.id, f]))
- // Build similarity graph: for each face, find all similar faces (≥60% confidence) in current list
+ // Build similarity graph: use batch endpoint to get all similarities at once
const similarityMap = new Map>()
- for (let i = 0; i < faces.length; i++) {
- const face = faces[i]
- const similarSet = new Set()
+ // Initialize similarity map for all faces
+ for (const face of faces) {
+ similarityMap.set(face.id, new Set())
+ }
+
+ // Update progress - loading all faces once
+ setLoadingProgress({
+ current: 0,
+ total: faces.length,
+ message: 'Loading all faces from database...'
+ })
+
+ try {
+ // Get all face IDs
+ const faceIds = faces.map(f => f.id)
- // Update progress
+ // Update progress - calculating similarities
setLoadingProgress({
- current: i + 1,
+ current: 0,
total: faces.length,
- message: `Checking face ${i + 1} of ${faces.length}...`
+ message: `Calculating similarities for ${faces.length} faces (this may take a while)...`
})
- try {
- const similarRes = await facesApi.getSimilar(face.id)
- for (const similar of similarRes.items) {
- // Only include similar faces that are in the current list
- if (!faceMap.has(similar.id)) continue
-
- // Convert similarity back to percentage (similarity is in [0,1])
- const confidencePct = Math.round(similar.similarity * 100)
- if (confidencePct >= 60) {
- similarSet.add(similar.id)
- }
- }
- } catch (error) {
- // Silently skip faces with errors
- }
+ // Call batch similarity endpoint - loads all faces once from DB
+ // Note: This is where the heavy computation happens (comparing N faces to M faces)
+ // The progress bar will show 0% during this time as we can't track backend progress
+ const batchRes = await facesApi.batchSimilarity({
+ face_ids: faceIds,
+ min_confidence: 60.0
+ })
- similarityMap.set(face.id, similarSet)
+ // Update progress - calculation complete, now processing results
+ const totalPairs = batchRes.pairs.length
+ setLoadingProgress({
+ current: 0,
+ total: totalPairs,
+ message: `Similarity calculation complete! Processing ${totalPairs} results...`
+ })
+
+ // Build similarity map from batch results
+ // Note: results include similarities to all faces in DB, but we only care about
+ // similarities between faces in the current list
+ let processedPairs = 0
+ for (const pair of batchRes.pairs) {
+ // Only include pairs where both faces are in the current list
+ if (!faceMap.has(pair.face_id_1) || !faceMap.has(pair.face_id_2)) {
+ processedPairs++
+ // Update progress every 100 pairs or at the end
+ if (processedPairs % 100 === 0 || processedPairs === totalPairs) {
+ setLoadingProgress({
+ current: processedPairs,
+ total: totalPairs,
+ message: `Processing similarity results... (${processedPairs} / ${totalPairs})`
+ })
+ // Allow UI to update
+ await new Promise(resolve => setTimeout(resolve, 0))
+ }
+ continue
+ }
+
+ // Add bidirectional relationships
+ const set1 = similarityMap.get(pair.face_id_1) || new Set()
+ set1.add(pair.face_id_2)
+ similarityMap.set(pair.face_id_1, set1)
+
+ const set2 = similarityMap.get(pair.face_id_2) || new Set()
+ set2.add(pair.face_id_1)
+ similarityMap.set(pair.face_id_2, set2)
+
+ processedPairs++
+ // Update progress every 100 pairs or at the end
+ if (processedPairs % 100 === 0 || processedPairs === totalPairs) {
+ setLoadingProgress({
+ current: processedPairs,
+ total: totalPairs,
+ message: `Processing similarity results... (${processedPairs} / ${totalPairs})`
+ })
+ // Allow UI to update
+ await new Promise(resolve => setTimeout(resolve, 0))
+ }
+ }
+ } catch (error) {
+ // Silently skip on error - return original faces
+ console.error('Error calculating batch similarities:', error)
+ return faces
}
// Find connected components (groups of similar faces)
@@ -194,11 +253,23 @@ export default function Identify() {
}
}
+ // Initial load on mount
useEffect(() => {
- loadFaces()
- loadPeople()
+ if (!initialLoadRef.current) {
+ initialLoadRef.current = true
+ loadFaces()
+ loadPeople()
+ }
// eslint-disable-next-line react-hooks/exhaustive-deps
- }, [pageSize, minQuality, sortBy, sortDir, dateFrom, dateTo, uniqueFacesOnly])
+ }, [])
+
+ // Reload when uniqueFacesOnly changes (immediate reload)
+ useEffect(() => {
+ if (initialLoadRef.current) {
+ loadFaces()
+ }
+ // eslint-disable-next-line react-hooks/exhaustive-deps
+ }, [uniqueFacesOnly])
useEffect(() => {
if (currentFace) {
@@ -373,18 +444,40 @@ export default function Identify() {
{loadingProgress.total > 0 && (
{loadingProgress.current} / {loadingProgress.total}
+ {loadingProgress.total > 0 && (
+
+ ({Math.round((loadingProgress.current / loadingProgress.total) * 100)}%)
+
+ )}
)}
-
0
- ? `${(loadingProgress.current / loadingProgress.total) * 100}%`
- : '100%'
- }}
- />
+ {loadingProgress.total > 0 ? (
+
+ ) : (
+
+ )}
)}
@@ -466,6 +559,15 @@ export default function Identify() {
Hide duplicates with ≥60% match confidence
+
+
+
)}
diff --git a/src/web/api/faces.py b/src/web/api/faces.py
index 709a47f..d527acf 100644
--- a/src/web/api/faces.py
+++ b/src/web/api/faces.py
@@ -17,6 +17,9 @@ from src.web.schemas.faces import (
FaceItem,
SimilarFacesResponse,
SimilarFaceItem,
+ BatchSimilarityRequest,
+ BatchSimilarityResponse,
+ FaceSimilarityPair,
IdentifyFaceRequest,
IdentifyFaceResponse,
FaceUnmatchResponse,
@@ -33,6 +36,7 @@ from src.web.db.models import Face, Person, PersonEncoding
from src.web.services.face_service import (
list_unidentified_faces,
find_similar_faces,
+ calculate_batch_similarities,
find_auto_match_matches,
accept_auto_match_matches,
)
@@ -168,6 +172,42 @@ def get_similar_faces(face_id: int, db: Session = Depends(get_db)) -> SimilarFac
return SimilarFacesResponse(base_face_id=face_id, items=items)
+@router.post("/batch-similarity", response_model=BatchSimilarityResponse)
+def get_batch_similarities(
+ request: BatchSimilarityRequest,
+ db: Session = Depends(get_db),
+) -> BatchSimilarityResponse:
+ """Calculate similarities between all pairs of faces in the provided list.
+
+ Loads all faces once from database and calculates similarities between all pairs.
+ Much more efficient than calling /similar for each face individually.
+ """
+ import logging
+ logger = logging.getLogger(__name__)
+ logger.info(f"API: batch_similarity called for {len(request.face_ids)} faces")
+
+ # Calculate similarities between all pairs
+ pairs = calculate_batch_similarities(
+ db,
+ request.face_ids,
+ min_confidence=request.min_confidence,
+ )
+
+ # Convert to response format
+ items = [
+ FaceSimilarityPair(
+ face_id_1=face_id_1,
+ face_id_2=face_id_2,
+ similarity=similarity,
+ confidence_pct=confidence_pct,
+ )
+ for face_id_1, face_id_2, similarity, confidence_pct in pairs
+ ]
+
+ logger.info(f"API: batch_similarity returning {len(items)} pairs")
+ return BatchSimilarityResponse(pairs=items)
+
+
@router.post("/{face_id}/identify", response_model=IdentifyFaceResponse)
def identify_face(
face_id: int,
diff --git a/src/web/schemas/faces.py b/src/web/schemas/faces.py
index f68db8b..059ab5c 100644
--- a/src/web/schemas/faces.py
+++ b/src/web/schemas/faces.py
@@ -99,6 +99,34 @@ class SimilarFacesResponse(BaseModel):
items: list[SimilarFaceItem]
+class BatchSimilarityRequest(BaseModel):
+ """Request to get similarities between multiple faces."""
+
+ model_config = ConfigDict(protected_namespaces=())
+
+ face_ids: list[int] = Field(..., description="List of face IDs to calculate similarities for")
+ min_confidence: float = Field(60.0, ge=0.0, le=100.0, description="Minimum confidence percentage (0-100)")
+
+
+class FaceSimilarityPair(BaseModel):
+ """A pair of similar faces with their similarity score."""
+
+ model_config = ConfigDict(protected_namespaces=())
+
+ face_id_1: int
+ face_id_2: int
+ similarity: float # 0-1 range
+ confidence_pct: float # 0-100 range
+
+
+class BatchSimilarityResponse(BaseModel):
+ """Response containing similarities between face pairs."""
+
+ model_config = ConfigDict(protected_namespaces=())
+
+ pairs: list[FaceSimilarityPair] = Field(..., description="List of similar face pairs")
+
+
class IdentifyFaceRequest(BaseModel):
"""Identify a face by selecting existing or creating new person."""
diff --git a/src/web/services/face_service.py b/src/web/services/face_service.py
index aa6572c..22b0391 100644
--- a/src/web/services/face_service.py
+++ b/src/web/services/face_service.py
@@ -1256,13 +1256,27 @@ def calculate_cosine_distance(encoding1: np.ndarray, encoding2: np.ndarray) -> f
Desktop: _calculate_cosine_similarity returns distance (0 = identical, 2 = opposite)
This matches the desktop implementation exactly.
+
+ Optimized: Avoids unnecessary array creation if inputs are already numpy arrays.
"""
try:
- # Ensure encodings are numpy arrays
- enc1 = np.array(encoding1).flatten()
- enc2 = np.array(encoding2).flatten()
+ # Optimize: Check if already numpy arrays to avoid unnecessary conversions
+ # Since we pre-load encodings as 1D arrays from np.frombuffer(), we can use them directly
+ if isinstance(encoding1, np.ndarray):
+ # Already numpy array - use directly if 1D, otherwise flatten (creates view, not copy)
+ enc1 = encoding1 if encoding1.ndim == 1 else encoding1.flatten()
+ else:
+ # Convert to numpy array only if needed (rare case)
+ enc1 = np.asarray(encoding1, dtype=np.float64).flatten()
- # Check if encodings have the same length
+ if isinstance(encoding2, np.ndarray):
+ # Already numpy array - use directly if 1D, otherwise flatten (creates view, not copy)
+ enc2 = encoding2 if encoding2.ndim == 1 else encoding2.flatten()
+ else:
+ # Convert to numpy array only if needed (rare case)
+ enc2 = np.asarray(encoding2, dtype=np.float64).flatten()
+
+ # Check if encodings have the same length (all ArcFace encodings are 512-dim, but check for safety)
if len(enc1) != len(enc2):
return 2.0 # Maximum distance on mismatch
@@ -1498,6 +1512,156 @@ def find_similar_faces(
return matches[:limit]
+def calculate_batch_similarities(
+ db: Session,
+ face_ids: list[int],
+ min_confidence: float = 60.0,
+) -> list[tuple[int, int, float, float]]:
+ """Calculate similarities between N faces and all M faces in database.
+
+ Optimized implementation using:
+ - Phase 1: Pre-normalization of all encodings (avoids repeated normalization)
+ - Phase 2: Vectorized batch operations using numpy matrix multiplication
+
+ Loads all M faces once from database (cached) and compares each of the N faces
+ to all M faces using efficient vectorized operations. Returns list of
+ (face_id_1, face_id_2, similarity, confidence_pct) tuples.
+
+ Args:
+ db: Database session
+ face_ids: List of face IDs to calculate similarities for (N faces)
+ min_confidence: Minimum confidence percentage (0-100) to include a pair
+
+ Returns:
+ List of (face_id_1, face_id_2, similarity, confidence_pct) tuples
+ where face_id_1 is from the request list, face_id_2 is from all faces in DB
+ similarity is in [0,1] range and confidence_pct is in [0,100] range
+ """
+ from src.core.config import DEFAULT_FACE_TOLERANCE
+ from src.web.db.models import Photo
+
+ if not face_ids:
+ return []
+
+ # Load all M faces once from database (cache all faces)
+ # Note: Don't load photo data - we only need encodings and quality scores
+ all_faces: list[Face] = (
+ db.query(Face)
+ .all()
+ )
+
+ # Create a map of all face_id -> Face for quick lookup
+ all_face_map = {f.id: f for f in all_faces}
+
+ # Load the N faces from the request
+ request_faces: list[Face] = [
+ all_face_map[fid] for fid in face_ids if fid in all_face_map
+ ]
+
+ if not request_faces:
+ return []
+
+ # Pre-load all M encodings and quality scores once (major optimization)
+ # This avoids loading from bytes repeatedly in the inner loop
+ all_encodings: dict[int, np.ndarray] = {}
+ all_qualities: dict[int, float] = {}
+
+ for face in all_faces:
+ # Pre-load encoding as numpy array
+ all_encodings[face.id] = np.frombuffer(face.encoding, dtype=np.float64)
+ # Pre-cache quality score
+ all_qualities[face.id] = float(face.quality_score) if face.quality_score is not None else 0.5
+
+ # Pre-load request face encodings and qualities
+ request_encodings: dict[int, np.ndarray] = {}
+ request_qualities: dict[int, float] = {}
+
+ for face in request_faces:
+ request_encodings[face.id] = all_encodings[face.id]
+ request_qualities[face.id] = all_qualities[face.id]
+
+ # Phase 1: Pre-normalize all encodings (major optimization)
+ # This avoids normalizing each encoding thousands of times
+ normalized_all_encodings: dict[int, np.ndarray] = {}
+ for face_id, enc in all_encodings.items():
+ # Normalize encoding once
+ norm = np.linalg.norm(enc)
+ if norm > 0:
+ normalized_all_encodings[face_id] = enc / (norm + 1e-8)
+ else:
+ # Handle zero-norm edge case
+ normalized_all_encodings[face_id] = enc
+
+ normalized_request_encodings: dict[int, np.ndarray] = {}
+ for face_id, enc in request_encodings.items():
+ # Normalize encoding once
+ norm = np.linalg.norm(enc)
+ if norm > 0:
+ normalized_request_encodings[face_id] = enc / (norm + 1e-8)
+ else:
+ # Handle zero-norm edge case
+ normalized_request_encodings[face_id] = enc
+
+ # Phase 2: Vectorized batch operations using numpy matrix multiplication
+ # Build matrices for vectorized computation
+ request_face_ids = list(request_encodings.keys())
+ all_face_ids = list(all_encodings.keys())
+
+ # Create matrices: (N, 512) and (M, 512) where 512 is encoding dimension
+ request_matrix = np.array([normalized_request_encodings[fid] for fid in request_face_ids])
+ all_matrix = np.array([normalized_all_encodings[fid] for fid in all_face_ids])
+
+ # Calculate all cosine similarities at once using matrix multiplication
+ # Shape: (N, 512) @ (512, M) = (N, M)
+ # This computes dot product of each request face with each database face
+ similarity_matrix = request_matrix @ all_matrix.T
+
+ # Clamp to valid range [-1, 1] for cosine similarity
+ similarity_matrix = np.clip(similarity_matrix, -1.0, 1.0)
+
+ # Convert to distance: distance = 1 - similarity
+ # Range: [0, 2] where 0 is perfect match
+ distance_matrix = 1.0 - similarity_matrix
+
+ # Calculate similarities: filter and process results
+ pairs: list[tuple[int, int, float, float]] = []
+ tolerance = DEFAULT_FACE_TOLERANCE
+
+ # Process results from the distance matrix
+ for i, face_id_1 in enumerate(request_face_ids):
+ quality_1 = request_qualities[face_id_1]
+
+ for j, face_id_2 in enumerate(all_face_ids):
+ # Skip comparing face to itself
+ if face_id_1 == face_id_2:
+ continue
+
+ # Get distance from pre-computed matrix
+ distance = float(distance_matrix[i, j])
+
+ # Get pre-cached quality score
+ quality_2 = all_qualities[face_id_2]
+
+ # Calculate adaptive tolerance
+ avg_quality = (quality_1 + quality_2) / 2
+ adaptive_tolerance = calculate_adaptive_tolerance(tolerance, avg_quality)
+
+ # Check if within tolerance
+ if distance <= adaptive_tolerance:
+ # Calculate calibrated confidence
+ confidence_pct = calibrate_confidence(distance, tolerance)
+
+ # Filter by minimum confidence
+ if confidence_pct >= min_confidence:
+ # Convert to similarity (0-1 range, higher = more similar)
+ similarity = 1.0 - (distance / 2.0) # Normalize distance to [0,1]
+ similarity = max(0.0, min(1.0, similarity)) # Clamp to [0,1]
+
+ pairs.append((face_id_1, face_id_2, similarity, confidence_pct))
+
+ return pairs
+
+
def find_auto_match_matches(
db: Session,
tolerance: float = 0.6,