diff --git a/admin-frontend/src/api/faces.ts b/admin-frontend/src/api/faces.ts index e173cd1..608f3ac 100644 --- a/admin-frontend/src/api/faces.ts +++ b/admin-frontend/src/api/faces.ts @@ -251,6 +251,7 @@ export const facesApi = { }, getAutoMatchPeople: async (params?: { filter_frontal_only?: boolean + tolerance?: number }): Promise => { const response = await apiClient.get('/api/v1/faces/auto-match/people', { params, diff --git a/admin-frontend/src/pages/AutoMatch.tsx b/admin-frontend/src/pages/AutoMatch.tsx index 6ecde7b..13e55f0 100644 --- a/admin-frontend/src/pages/AutoMatch.tsx +++ b/admin-frontend/src/pages/AutoMatch.tsx @@ -7,7 +7,7 @@ import peopleApi, { Person } from '../api/people' import { apiClient } from '../api/client' import { useDeveloperMode } from '../context/DeveloperModeContext' -const DEFAULT_TOLERANCE = 0.6 +const DEFAULT_TOLERANCE = 0.5 export default function AutoMatch() { const { isDeveloperMode } = useDeveloperMode() @@ -16,8 +16,8 @@ export default function AutoMatch() { const [isActive, setIsActive] = useState(false) const [people, setPeople] = useState([]) const [filteredPeople, setFilteredPeople] = useState([]) - // Store matches separately, keyed by person_id - const [matchesCache, setMatchesCache] = useState>({}) + // Store matches separately, keyed by person_id_tolerance (composite key) + const [matchesCache, setMatchesCache] = useState>({}) const [currentIndex, setCurrentIndex] = useState(0) const [searchQuery, setSearchQuery] = useState('') const [allPeople, setAllPeople] = useState([]) @@ -44,6 +44,8 @@ export default function AutoMatch() { const [stateRestored, setStateRestored] = useState(false) // Track if initial restoration is complete (prevents reload effects from firing during restoration) const restorationCompleteRef = useRef(false) + // Track current tolerance in a ref to avoid stale closures + const toleranceRef = useRef(tolerance) const currentPerson = useMemo(() => { const activePeople = filteredPeople.length > 0 ? filteredPeople : people @@ -52,30 +54,49 @@ export default function AutoMatch() { const currentMatches = useMemo(() => { if (!currentPerson) return [] - return matchesCache[currentPerson.person_id] || [] - }, [currentPerson, matchesCache]) + // Use ref tolerance to ensure we always get the current tolerance value + const currentTolerance = toleranceRef.current + const cacheKey = `${currentPerson.person_id}_${currentTolerance}` + return matchesCache[cacheKey] || [] + }, [currentPerson, matchesCache, tolerance]) // Keep tolerance in deps to trigger recalculation when it changes // Check if any matches are selected const hasSelectedMatches = useMemo(() => { - return currentMatches.some(match => selectedFaces[match.id] === true) + return currentMatches.some((match: AutoMatchFaceItem) => selectedFaces[match.id] === true) }, [currentMatches, selectedFaces]) + // Update tolerance ref whenever tolerance changes + useEffect(() => { + toleranceRef.current = tolerance + }, [tolerance]) + // Load matches for a specific person (lazy loading) - const loadPersonMatches = async (personId: number) => { - // Skip if already cached - if (matchesCache[personId]) { - return + const loadPersonMatches = async (personId: number, currentTolerance?: number) => { + // Use provided tolerance, or ref tolerance (always current), or state tolerance as fallback + const toleranceToUse = currentTolerance !== undefined ? currentTolerance : toleranceRef.current + // Create cache key that includes tolerance to avoid stale matches + const cacheKey = `${personId}_${toleranceToUse}` + + // Double-check: if tolerance changed, don't use cached value + if (toleranceToUse !== toleranceRef.current) { + // Tolerance changed since this was called, don't use cache + // Will fall through to load fresh matches + } else { + // Skip if already cached for this tolerance + if (matchesCache[cacheKey]) { + return + } } try { const response = await facesApi.getAutoMatchPersonMatches(personId, { - tolerance, + tolerance: toleranceToUse, filter_frontal_only: false }) setMatchesCache(prev => ({ ...prev, - [personId]: response.matches + [cacheKey]: response.matches })) // Update total_matches in people list @@ -106,9 +127,10 @@ export default function AutoMatch() { } catch (error) { console.error('Failed to load matches for person:', error) // Set empty matches on error, and remove person from list + // Use composite cache key setMatchesCache(prev => ({ ...prev, - [personId]: [] + [cacheKey]: [] })) // Remove person if matches failed to load (assume no matches) setPeople(prev => prev.filter(p => p.person_id !== personId)) @@ -118,7 +140,10 @@ export default function AutoMatch() { // Shared function for auto-load and refresh (loads people list only - fast) const loadAutoMatch = async (clearState: boolean = false) => { - if (tolerance < 0 || tolerance > 1) { + // Use ref to get current tolerance (avoids stale closure) + const currentTolerance = toleranceRef.current + + if (currentTolerance < 0 || currentTolerance > 1) { return } @@ -128,12 +153,30 @@ export default function AutoMatch() { // Clear saved state if explicitly requested (Refresh button) if (clearState) { sessionStorage.removeItem(STATE_KEY) - setMatchesCache({}) // Clear matches cache + // Clear ALL cache entries + setMatchesCache({}) + } else { + // Also clear any cache entries that don't match current tolerance (even if not explicitly clearing) + setMatchesCache(prev => { + const cleaned: Record = {} + // Only keep cache entries that match current tolerance + Object.keys(prev).forEach(key => { + const parts = key.split('_') + if (parts.length >= 2) { + const cachedTolerance = parseFloat(parts[parts.length - 1]) + if (!isNaN(cachedTolerance) && cachedTolerance === currentTolerance) { + cleaned[key] = prev[key] + } + } + }) + return cleaned + }) } // Load people list only (fast - no match calculations) const response = await facesApi.getAutoMatchPeople({ - filter_frontal_only: false + filter_frontal_only: false, + tolerance: currentTolerance }) if (response.people.length === 0) { @@ -154,9 +197,9 @@ export default function AutoMatch() { setOriginalSelectedFaces({}) setIsActive(true) - // Load matches for first person immediately + // Load matches for first person immediately with current tolerance if (response.people.length > 0) { - await loadPersonMatches(response.people[0].person_id) + await loadPersonMatches(response.people[0].person_id, currentTolerance) } } catch (error) { console.error('Auto-match failed:', error) @@ -261,7 +304,7 @@ export default function AutoMatch() { const matchesCacheRef = useRef(matchesCache) const isActiveRef = useRef(isActive) const hasNoResultsRef = useRef(hasNoResults) - const toleranceRef = useRef(tolerance) + // Note: toleranceRef is already declared above, don't redeclare // Update refs whenever state changes useEffect(() => { @@ -355,7 +398,15 @@ export default function AutoMatch() { if (initialLoadRef.current && restorationCompleteRef.current) { // Clear matches cache when tolerance changes (matches depend on tolerance) setMatchesCache({}) - loadAutoMatch() + // Clear people list to force fresh load with new tolerance + setPeople([]) + setFilteredPeople([]) + setSelectedFaces({}) + setOriginalSelectedFaces({}) + setCurrentIndex(0) + setIsActive(false) + // Reload with new tolerance + loadAutoMatch(true) // Pass true to clear sessionStorage as well } // eslint-disable-next-line react-hooks/exhaustive-deps }, [tolerance]) @@ -457,7 +508,7 @@ export default function AutoMatch() { const selectAll = () => { const newSelected: Record = {} - currentMatches.forEach(match => { + currentMatches.forEach((match: AutoMatchFaceItem) => { newSelected[match.id] = true }) setSelectedFaces(newSelected) @@ -465,7 +516,7 @@ export default function AutoMatch() { const clearAll = () => { const newSelected: Record = {} - currentMatches.forEach(match => { + currentMatches.forEach((match: AutoMatchFaceItem) => { newSelected[match.id] = false }) setSelectedFaces(newSelected) @@ -477,14 +528,14 @@ export default function AutoMatch() { setSaving(true) try { const faceIds = currentMatches - .filter(match => selectedFaces[match.id] === true) - .map(match => match.id) + .filter((match: AutoMatchFaceItem) => selectedFaces[match.id] === true) + .map((match: AutoMatchFaceItem) => match.id) await peopleApi.acceptMatches(currentPerson.person_id, faceIds) // Update original selected faces to current state const newOriginal: Record = {} - currentMatches.forEach(match => { + currentMatches.forEach((match: AutoMatchFaceItem) => { newOriginal[match.id] = selectedFaces[match.id] || false }) setOriginalSelectedFaces(prev => ({ ...prev, ...newOriginal })) @@ -498,33 +549,45 @@ export default function AutoMatch() { } } - // Load matches when current person changes (lazy loading) + // Load matches when current person changes OR tolerance changes (lazy loading) useEffect(() => { if (currentPerson && restorationCompleteRef.current) { - loadPersonMatches(currentPerson.person_id) + // Always use ref tolerance (always current) to avoid stale matches + const currentTolerance = toleranceRef.current + + // Force reload when tolerance changes - clear cache for this person first + const cacheKey = `${currentPerson.person_id}_${currentTolerance}` + if (!matchesCache[cacheKey]) { + // Only load if not already cached for current tolerance + loadPersonMatches(currentPerson.person_id, currentTolerance) + } // Preload matches for next person in background const activePeople = filteredPeople.length > 0 ? filteredPeople : people if (currentIndex + 1 < activePeople.length) { const nextPerson = activePeople[currentIndex + 1] - loadPersonMatches(nextPerson.person_id) + const nextCacheKey = `${nextPerson.person_id}_${currentTolerance}` + if (!matchesCache[nextCacheKey]) { + loadPersonMatches(nextPerson.person_id, currentTolerance) + } } } // eslint-disable-next-line react-hooks/exhaustive-deps - }, [currentPerson?.person_id, currentIndex]) + }, [currentPerson?.person_id, currentIndex, tolerance]) // Restore selected faces when navigating to a different person useEffect(() => { if (currentPerson) { - const matches = matchesCache[currentPerson.person_id] || [] + const cacheKey = `${currentPerson.person_id}_${tolerance}` + const matches = matchesCache[cacheKey] || [] const restored: Record = {} - matches.forEach(match => { + matches.forEach((match: AutoMatchFaceItem) => { restored[match.id] = originalSelectedFaces[match.id] || false }) setSelectedFaces(restored) } // eslint-disable-next-line react-hooks/exhaustive-deps - }, [currentIndex, filteredPeople.length, people.length, currentPerson?.person_id, matchesCache]) + }, [currentIndex, filteredPeople.length, people.length, currentPerson?.person_id, matchesCache, tolerance]) const goBack = () => { if (currentIndex > 0) { diff --git a/backend/api/faces.py b/backend/api/faces.py index 64f9f4f..8bafeda 100644 --- a/backend/api/faces.py +++ b/backend/api/faces.py @@ -211,7 +211,8 @@ def get_similar_faces( raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Face {face_id} not found") logger.info(f"API: Calling find_similar_faces for face_id={face_id}, include_excluded={include_excluded}") - results = find_similar_faces(db, face_id, include_excluded=include_excluded) + # Use 0.6 tolerance for Identify People (more lenient for manual review) + results = find_similar_faces(db, face_id, tolerance=0.6, include_excluded=include_excluded) logger.info(f"API: find_similar_faces returned {len(results)} results") items = [ @@ -246,10 +247,12 @@ def get_batch_similarities( logger.info(f"API: batch_similarity called for {len(request.face_ids)} faces") # Calculate similarities between all pairs + # Use 0.6 tolerance for Identify People (more lenient for manual review) pairs = calculate_batch_similarities( db, request.face_ids, min_confidence=request.min_confidence, + tolerance=0.6, ) # Convert to response format @@ -747,7 +750,7 @@ def auto_match_faces( @router.get("/auto-match/people", response_model=AutoMatchPeopleResponse) def get_auto_match_people( filter_frontal_only: bool = Query(False, description="Only include frontal/tilted reference faces"), - tolerance: float = Query(0.6, ge=0.0, le=1.0, description="Tolerance threshold"), + tolerance: float = Query(0.5, ge=0.0, le=1.0, description="Tolerance threshold"), db: Session = Depends(get_db), ) -> AutoMatchPeopleResponse: """Get list of people for auto-match (without matches) - fast initial load. @@ -810,7 +813,7 @@ def get_auto_match_people( @router.get("/auto-match/people/{person_id}/matches", response_model=AutoMatchPersonMatchesResponse) def get_auto_match_person_matches( person_id: int, - tolerance: float = Query(0.6, ge=0.0, le=1.0, description="Tolerance threshold"), + tolerance: float = Query(0.5, ge=0.0, le=1.0, description="Tolerance threshold"), filter_frontal_only: bool = Query(False, description="Only return frontal/tilted faces"), db: Session = Depends(get_db), ) -> AutoMatchPersonMatchesResponse: diff --git a/backend/config.py b/backend/config.py index 0d6c576..2cf11e5 100644 --- a/backend/config.py +++ b/backend/config.py @@ -22,8 +22,13 @@ MIN_FACE_SIZE = 40 MAX_FACE_SIZE = 1500 # Matching tolerance and calibration options -DEFAULT_FACE_TOLERANCE = 0.6 +DEFAULT_FACE_TOLERANCE = 0.5 # Lowered from 0.6 for stricter matching USE_CALIBRATED_CONFIDENCE = True CONFIDENCE_CALIBRATION_METHOD = "empirical" # "empirical", "linear", or "sigmoid" +# Auto-match face size filtering +# Minimum face size as percentage of image area (0.5% = 0.005) +# Faces smaller than this are excluded from auto-match to avoid generic encodings +MIN_AUTO_MATCH_FACE_SIZE_RATIO = 0.005 # 0.5% of image area + diff --git a/backend/schemas/faces.py b/backend/schemas/faces.py index cc3f3d4..768ccba 100644 --- a/backend/schemas/faces.py +++ b/backend/schemas/faces.py @@ -212,7 +212,7 @@ class AutoMatchRequest(BaseModel): model_config = ConfigDict(protected_namespaces=()) - tolerance: float = Field(0.6, ge=0.0, le=1.0, description="Tolerance threshold (lower = stricter matching)") + tolerance: float = Field(0.5, ge=0.0, le=1.0, description="Tolerance threshold (lower = stricter matching)") auto_accept: bool = Field(False, description="Enable automatic acceptance of matching faces") auto_accept_threshold: float = Field(70.0, ge=0.0, le=100.0, description="Similarity threshold for auto-acceptance (0-100%)") diff --git a/backend/services/face_service.py b/backend/services/face_service.py index 761fb7b..da30331 100644 --- a/backend/services/face_service.py +++ b/backend/services/face_service.py @@ -6,6 +6,7 @@ import json import os import tempfile import time +from pathlib import Path from typing import Callable, Optional, Tuple, List, Dict from datetime import date @@ -34,6 +35,7 @@ from backend.config import ( MAX_FACE_SIZE, MIN_FACE_CONFIDENCE, MIN_FACE_SIZE, + MIN_AUTO_MATCH_FACE_SIZE_RATIO, USE_CALIBRATED_CONFIDENCE, ) from src.utils.exif_utils import EXIFOrientationHandler @@ -526,7 +528,9 @@ def process_photo_faces( _print_with_stderr(f"[FaceService] Debug - face_confidence value: {face_confidence}") _print_with_stderr(f"[FaceService] Debug - result['face_confidence'] exists: {'face_confidence' in result}") - encoding = np.array(result['embedding']) + # DeepFace returns float32 embeddings, but we store as float64 for consistency + # Convert to float64 explicitly to match how we read them back + encoding = np.array(result['embedding'], dtype=np.float64) # Convert to location format (JSON string like desktop version) location = { @@ -1669,6 +1673,47 @@ def list_unidentified_faces( return items, total +def load_face_encoding(encoding_bytes: bytes) -> np.ndarray: + """Load face encoding from bytes, auto-detecting dtype (float32 or float64). + + ArcFace encodings are 512 dimensions: + - float32: 512 * 4 bytes = 2048 bytes + - float64: 512 * 8 bytes = 4096 bytes + + Args: + encoding_bytes: Raw encoding bytes from database + + Returns: + numpy array of encoding (always float64 for consistency) + """ + encoding_size = len(encoding_bytes) + + # Auto-detect dtype based on size + if encoding_size == 2048: + # float32 encoding (old format) + encoding = np.frombuffer(encoding_bytes, dtype=np.float32) + # Convert to float64 for consistency + return encoding.astype(np.float64) + elif encoding_size == 4096: + # float64 encoding (new format) + return np.frombuffer(encoding_bytes, dtype=np.float64) + else: + # Unexpected size - try float64 first, fallback to float32 + # This handles edge cases or future changes + try: + encoding = np.frombuffer(encoding_bytes, dtype=np.float64) + if len(encoding) == 512: + return encoding + except: + pass + # Fallback to float32 + encoding = np.frombuffer(encoding_bytes, dtype=np.float32) + if len(encoding) == 512: + return encoding.astype(np.float64) + else: + raise ValueError(f"Unexpected encoding size: {encoding_size} bytes (expected 2048 or 4096)") + + def calculate_cosine_distance(encoding1: np.ndarray, encoding2: np.ndarray) -> float: """Calculate cosine distance between two face encodings, matching desktop exactly. @@ -1734,7 +1779,10 @@ def calculate_adaptive_tolerance(base_tolerance: float, face_quality: float) -> tolerance *= quality_factor # Ensure tolerance stays within reasonable bounds for DeepFace - return max(0.2, min(0.6, tolerance)) + # Allow tolerance down to 0.0 (user can set very strict matching) + # Allow tolerance up to 1.0 (matching API validation range) + # The quality factor can increase tolerance up to 1.1x, so cap at 1.0 to stay within API limits + return max(0.0, min(1.0, tolerance)) def calibrate_confidence(distance: float, tolerance: float = None) -> float: @@ -1768,27 +1816,34 @@ def calibrate_confidence(distance: float, tolerance: float = None) -> float: else: # "empirical" - default method (matching desktop exactly) # Empirical calibration parameters for DeepFace ArcFace model # These are derived from analysis of distance distributions for matching/non-matching pairs + # Moderate calibration: stricter than original but not too strict + + # For very close distances (< 0.12): very high confidence + if distance <= 0.12: + # Very close matches: exponential decay from 100% + confidence = 100 * np.exp(-distance * 2.8) + return min(100, max(92, confidence)) # For distances well below threshold: high confidence - if distance <= tolerance * 0.5: - # Very close matches: exponential decay from 100% - confidence = 100 * np.exp(-distance * 2.5) - return min(100, max(95, confidence)) + elif distance <= tolerance * 0.5: + # Close matches: exponential decay + confidence = 100 * np.exp(-distance * 2.6) + return min(92, max(82, confidence)) # For distances near threshold: moderate confidence elif distance <= tolerance: # Near-threshold matches: sigmoid-like curve # Maps distance to probability based on empirical data normalized_distance = (distance - tolerance * 0.5) / (tolerance * 0.5) - confidence = 95 - (normalized_distance * 40) # 95% to 55% range - return max(55, min(95, confidence)) + confidence = 82 - (normalized_distance * 32) # 82% to 50% range + return max(50, min(82, confidence)) # For distances above threshold: low confidence elif distance <= tolerance * 1.5: # Above threshold but not too far: rapid decay normalized_distance = (distance - tolerance) / (tolerance * 0.5) - confidence = 55 - (normalized_distance * 35) # 55% to 20% range - return max(20, min(55, confidence)) + confidence = 50 - (normalized_distance * 30) # 50% to 20% range + return max(20, min(50, confidence)) # For very large distances: very low confidence else: @@ -1797,6 +1852,46 @@ def calibrate_confidence(distance: float, tolerance: float = None) -> float: return max(1, min(20, confidence)) +def _calculate_face_size_ratio(face: Face, photo: Photo) -> float: + """Calculate face size as ratio of image area. + + Args: + face: Face model with location + photo: Photo model (needed for path to load image dimensions) + + Returns: + Face size ratio (0.0-1.0), or 0.0 if cannot calculate + """ + try: + import json + from PIL import Image + + # Parse location + location = json.loads(face.location) if isinstance(face.location, str) else face.location + face_w = location.get('w', 0) + face_h = location.get('h', 0) + face_area = face_w * face_h + + if face_area == 0: + return 0.0 + + # Load image to get dimensions + photo_path = Path(photo.path) + if not photo_path.exists(): + return 0.0 + + img = Image.open(photo_path) + img_width, img_height = img.size + image_area = img_width * img_height + + if image_area == 0: + return 0.0 + + return face_area / image_area + except Exception: + return 0.0 + + def _is_acceptable_pose_for_auto_match(pose_mode: str) -> bool: """Check if pose_mode is acceptable for auto-match (frontal or tilted, but not profile). @@ -1836,9 +1931,11 @@ def find_similar_faces( db: Session, face_id: int, limit: int = 20000, # Very high default limit - effectively unlimited - tolerance: float = 0.6, # DEFAULT_FACE_TOLERANCE from desktop + tolerance: float = 0.5, # DEFAULT_FACE_TOLERANCE filter_frontal_only: bool = False, # New: Only return frontal or tilted faces (not profile) include_excluded: bool = False, # Include excluded faces in results + filter_small_faces: bool = False, # Filter out small faces (for auto-match) + min_face_size_ratio: float = 0.005, # Minimum face size ratio (0.5% of image) ) -> List[Tuple[Face, float, float]]: # Returns (face, distance, confidence_pct) """Find similar faces matching desktop logic exactly. @@ -1867,29 +1964,31 @@ def find_similar_faces( if not base: return [] - # Load base encoding - desktop uses float64, ArcFace has 512 dimensions - # Stored as float64: 512 * 8 bytes = 4096 bytes - base_enc = np.frombuffer(base.encoding, dtype=np.float64) + # Load base encoding - auto-detect dtype (supports both float32 and float64) + base_enc = load_face_encoding(base.encoding) base_enc = base_enc.copy() # Make a copy to avoid buffer issues - # Desktop uses 0.5 as default quality for target face (hardcoded, matching desktop exactly) - # Desktop: target_quality = 0.5 # Default quality for target face - base_quality = 0.5 + # Use actual quality score of the reference face, defaulting to 0.5 if not set + # This ensures adaptive tolerance is calculated correctly based on the actual face quality + base_quality = float(base.quality_score) if base.quality_score is not None else 0.5 # Desktop: get ALL faces from database (matching get_all_face_encodings) # Desktop find_similar_faces gets ALL faces, doesn't filter by photo_id - # Get all faces except itself, with photo loaded + # However, for auto-match, we should exclude faces from the same photo to avoid + # duplicate detections of the same face (same encoding stored multiple times) + # Get all faces except itself and faces from the same photo, with photo loaded all_faces: List[Face] = ( db.query(Face) .options(joinedload(Face.photo)) .filter(Face.id != face_id) + .filter(Face.photo_id != base.photo_id) # Exclude faces from same photo .all() ) matches: List[Tuple[Face, float, float]] = [] for f in all_faces: - # Load other encoding - desktop uses float64, ArcFace has 512 dimensions - other_enc = np.frombuffer(f.encoding, dtype=np.float64) + # Load other encoding - auto-detect dtype (supports both float32 and float64) + other_enc = load_face_encoding(f.encoding) other_enc = other_enc.copy() # Make a copy to avoid buffer issues other_quality = float(f.quality_score) if f.quality_score is not None else 0.5 @@ -1902,18 +2001,24 @@ def find_similar_faces( distance = calculate_cosine_distance(base_enc, other_enc) # Filter by distance <= adaptive_tolerance (matching desktop find_similar_faces) + # DEBUG: Log tolerance usage for troubleshooting if distance <= adaptive_tolerance: # Get photo info (desktop does this in find_similar_faces) if f.photo: # Calculate calibrated confidence (matching desktop _get_filtered_similar_faces) - confidence_pct = calibrate_confidence(distance, DEFAULT_FACE_TOLERANCE) + # Use the actual tolerance parameter, not the default + confidence_pct = calibrate_confidence(distance, tolerance) # Desktop _get_filtered_similar_faces filters by: # 1. person_id is None (unidentified) - # 2. confidence >= 40% + # 2. confidence >= 50% (increased from 40% to reduce false matches) is_unidentified = f.person_id is None - if is_unidentified and confidence_pct >= 40: + # DEBUG: Log all faces that pass distance check + if is_unidentified: + print(f"DEBUG: Face {f.id} - distance={distance:.4f}, adaptive_tolerance={adaptive_tolerance:.4f}, base_tolerance={tolerance:.4f}, confidence={confidence_pct:.2f}%, passed_distance={distance <= adaptive_tolerance}, passed_confidence={confidence_pct >= 50}") + + if is_unidentified and confidence_pct >= 50: # Filter by excluded status if not including excluded faces if not include_excluded and getattr(f, "excluded", False): continue @@ -1922,6 +2027,13 @@ def find_similar_faces( if filter_frontal_only and not _is_acceptable_pose_for_auto_match(f.pose_mode): continue + # Filter by face size if requested (for auto-match) + if filter_small_faces: + if f.photo: + face_size_ratio = _calculate_face_size_ratio(f, f.photo) + if face_size_ratio < min_face_size_ratio: + continue # Skip small faces + # Return calibrated confidence percentage (matching desktop) # Desktop displays confidence_pct directly from _get_calibrated_confidence matches.append((f, distance, confidence_pct)) @@ -1937,6 +2049,7 @@ def calculate_batch_similarities( db: Session, face_ids: list[int], min_confidence: float = 60.0, + tolerance: float = 0.6, # Use 0.6 for Identify People (more lenient for manual review) ) -> list[tuple[int, int, float, float]]: """Calculate similarities between N faces and all M faces in database. @@ -1986,7 +2099,7 @@ def calculate_batch_similarities( for face in all_faces: # Pre-load encoding as numpy array - all_encodings[face.id] = np.frombuffer(face.encoding, dtype=np.float64) + all_encodings[face.id] = load_face_encoding(face.encoding) # Pre-cache quality score all_qualities[face.id] = float(face.quality_score) if face.quality_score is not None else 0.5 @@ -2082,7 +2195,7 @@ def calculate_batch_similarities( def find_auto_match_matches( db: Session, - tolerance: float = 0.6, + tolerance: float = 0.5, filter_frontal_only: bool = False, ) -> List[Tuple[int, int, Face, List[Tuple[Face, float, float]]]]: """Find auto-match matches for all identified people, matching desktop logic exactly. @@ -2176,15 +2289,26 @@ def find_auto_match_matches( for person_id, reference_face, person_name in person_faces_list: reference_face_id = reference_face.id + # Check if reference face is too small (exclude from auto-match) + reference_photo = db.query(Photo).filter(Photo.id == reference_face.photo_id).first() + if reference_photo: + ref_size_ratio = _calculate_face_size_ratio(reference_face, reference_photo) + if ref_size_ratio < MIN_AUTO_MATCH_FACE_SIZE_RATIO: + # Skip this person - reference face is too small + continue + # Use find_similar_faces which matches desktop _get_filtered_similar_faces logic # Desktop: similar_faces = self.face_processor._get_filtered_similar_faces( # reference_face_id, tolerance, include_same_photo=False, face_status=None) - # This filters by: person_id is None (unidentified), confidence >= 40%, sorts by distance + # This filters by: person_id is None (unidentified), confidence >= 50% (increased from 40%), sorts by distance # Auto-match always excludes excluded faces + # filter_small_faces=True to exclude small match faces similar_faces = find_similar_faces( db, reference_face_id, tolerance=tolerance, filter_frontal_only=filter_frontal_only, - include_excluded=False # Auto-match always excludes excluded faces + include_excluded=False, # Auto-match always excludes excluded faces + filter_small_faces=True, # Exclude small faces from auto-match + min_face_size_ratio=MIN_AUTO_MATCH_FACE_SIZE_RATIO ) if similar_faces: @@ -2196,7 +2320,7 @@ def find_auto_match_matches( def get_auto_match_people_list( db: Session, filter_frontal_only: bool = False, - tolerance: float = 0.6, + tolerance: float = 0.5, ) -> List[Tuple[int, Face, str, int]]: """Get list of people for auto-match (without matches) - fast initial load. @@ -2300,7 +2424,7 @@ def get_auto_match_people_list( def get_auto_match_person_matches( db: Session, person_id: int, - tolerance: float = 0.6, + tolerance: float = 0.5, filter_frontal_only: bool = False, ) -> List[Tuple[Face, float, float]]: """Get matches for a specific person - for lazy loading. @@ -2329,11 +2453,14 @@ def get_auto_match_person_matches( # Find similar faces using existing function # Auto-match always excludes excluded faces + # DEBUG: Log tolerance being used + print(f"DEBUG get_auto_match_person_matches: person_id={person_id}, tolerance={tolerance}, reference_face_id={reference_face.id}") similar_faces = find_similar_faces( db, reference_face.id, tolerance=tolerance, filter_frontal_only=filter_frontal_only, include_excluded=False # Auto-match always excludes excluded faces ) + print(f"DEBUG get_auto_match_person_matches: Found {len(similar_faces)} matches for person {person_id} with tolerance {tolerance}") return similar_faces