punimtag/test_face_recognition.py
tanyar09 e49b567afa Remove deprecated files and refactor codebase for improved maintainability
This commit deletes the `photo_tagger_refactored.py`, `run.sh`, and test files (`test_basic.py`, `test_deepface_gui.py`, `test_face_recognition.py`) that are no longer in use. The removal of these files streamlines the project structure and eliminates legacy code, paving the way for future enhancements and a cleaner codebase. The README has been updated to reflect these changes, ensuring clarity on the current state of the project.
2025-10-15 12:44:02 -04:00

524 lines
22 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Face Recognition Comparison Test Script
Compares face_recognition vs deepface on a folder of photos.
Tests accuracy and performance without modifying existing database.
Usage:
python test_face_recognition.py /path/to/photos [--save-crops] [--save-matrices] [--verbose]
Example:
python test_face_recognition.py demo_photos/ --save-crops --verbose
"""
import os
import sys
import time
import argparse
import tempfile
from pathlib import Path
from typing import List, Dict, Tuple, Optional
import numpy as np
import pandas as pd
from PIL import Image
# Face recognition libraries
import face_recognition
from deepface import DeepFace
# Supported image formats
SUPPORTED_FORMATS = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif'}
class FaceRecognitionTester:
"""Test and compare face recognition libraries"""
def __init__(self, verbose: bool = False):
self.verbose = verbose
self.results = {
'face_recognition': {'faces': [], 'times': [], 'encodings': []},
'deepface': {'faces': [], 'times': [], 'encodings': []}
}
def log(self, message: str, level: str = "INFO"):
"""Print log message with timestamp"""
if self.verbose or level == "ERROR":
timestamp = time.strftime("%H:%M:%S")
print(f"[{timestamp}] {level}: {message}")
def get_image_files(self, folder_path: str) -> List[str]:
"""Get all supported image files from folder"""
folder = Path(folder_path)
if not folder.exists():
raise FileNotFoundError(f"Folder not found: {folder_path}")
image_files = []
for file_path in folder.rglob("*"):
if file_path.is_file() and file_path.suffix.lower() in SUPPORTED_FORMATS:
image_files.append(str(file_path))
self.log(f"Found {len(image_files)} image files")
return sorted(image_files)
def process_with_face_recognition(self, image_path: str) -> Dict:
"""Process image with face_recognition library"""
start_time = time.time()
try:
# Load image
image = face_recognition.load_image_file(image_path)
# Detect faces using CNN model (more accurate than HOG)
face_locations = face_recognition.face_locations(image, model="cnn")
if not face_locations:
return {'faces': [], 'encodings': [], 'processing_time': time.time() - start_time}
# Get face encodings
face_encodings = face_recognition.face_encodings(image, face_locations)
# Convert to our format
faces = []
encodings = []
for i, (location, encoding) in enumerate(zip(face_locations, face_encodings)):
top, right, bottom, left = location
face_data = {
'image_path': image_path,
'face_id': f"fr_{Path(image_path).stem}_{i}",
'location': location,
'bbox': {'top': top, 'right': right, 'bottom': bottom, 'left': left},
'encoding': encoding
}
faces.append(face_data)
encodings.append(encoding)
processing_time = time.time() - start_time
self.log(f"face_recognition: Found {len(faces)} faces in {processing_time:.2f}s")
return {
'faces': faces,
'encodings': encodings,
'processing_time': processing_time
}
except Exception as e:
self.log(f"face_recognition error on {image_path}: {e}", "ERROR")
return {'faces': [], 'encodings': [], 'processing_time': time.time() - start_time}
def process_with_deepface(self, image_path: str) -> Dict:
"""Process image with deepface library"""
start_time = time.time()
try:
# Use DeepFace to detect and encode faces
results = DeepFace.represent(
img_path=image_path,
model_name='ArcFace', # Best accuracy model
detector_backend='retinaface', # Best detection
enforce_detection=False, # Don't fail if no faces
align=True # Face alignment for better accuracy
)
if not results:
return {'faces': [], 'encodings': [], 'processing_time': time.time() - start_time}
# Convert to our format
faces = []
encodings = []
for i, result in enumerate(results):
# Extract face region info
region = result.get('region', {})
face_data = {
'image_path': image_path,
'face_id': f"df_{Path(image_path).stem}_{i}",
'location': (region.get('y', 0), region.get('x', 0) + region.get('w', 0),
region.get('y', 0) + region.get('h', 0), region.get('x', 0)),
'bbox': region,
'encoding': np.array(result['embedding'])
}
faces.append(face_data)
encodings.append(np.array(result['embedding']))
processing_time = time.time() - start_time
self.log(f"deepface: Found {len(faces)} faces in {processing_time:.2f}s")
return {
'faces': faces,
'encodings': encodings,
'processing_time': processing_time
}
except Exception as e:
self.log(f"deepface error on {image_path}: {e}", "ERROR")
return {'faces': [], 'encodings': [], 'processing_time': time.time() - start_time}
def calculate_similarity_matrix(self, encodings: List[np.ndarray], method: str) -> np.ndarray:
"""Calculate similarity matrix between all face encodings"""
n_faces = len(encodings)
if n_faces == 0:
return np.array([])
similarity_matrix = np.zeros((n_faces, n_faces))
for i in range(n_faces):
for j in range(n_faces):
if i == j:
similarity_matrix[i, j] = 0.0 # Same face
else:
if method == 'face_recognition':
# Use face_recognition distance (lower = more similar)
distance = face_recognition.face_distance([encodings[i]], encodings[j])[0]
similarity_matrix[i, j] = distance
else: # deepface
# Use cosine distance for ArcFace embeddings
enc1_norm = encodings[i] / np.linalg.norm(encodings[i])
enc2_norm = encodings[j] / np.linalg.norm(encodings[j])
cosine_sim = np.dot(enc1_norm, enc2_norm)
cosine_distance = 1 - cosine_sim
similarity_matrix[i, j] = cosine_distance
return similarity_matrix
def find_top_matches(self, similarity_matrix: np.ndarray, faces: List[Dict],
method: str, top_k: int = 5) -> List[Dict]:
"""Find top matches for each face"""
top_matches = []
for i, face in enumerate(faces):
if i >= similarity_matrix.shape[0]:
continue
# Get distances to all other faces
distances = similarity_matrix[i, :]
# Find top matches (excluding self)
if method == 'face_recognition':
# Lower distance = more similar
sorted_indices = np.argsort(distances)
else: # deepface
# Lower cosine distance = more similar
sorted_indices = np.argsort(distances)
matches = []
for idx in sorted_indices[1:top_k+1]: # Skip self (index 0)
if idx < len(faces):
other_face = faces[idx]
distance = distances[idx]
# Convert to confidence percentage for display
if method == 'face_recognition':
confidence = max(0, (1 - distance) * 100)
else: # deepface
confidence = max(0, (1 - distance) * 100)
matches.append({
'face_id': other_face['face_id'],
'image_path': other_face['image_path'],
'distance': distance,
'confidence': confidence
})
top_matches.append({
'query_face': face,
'matches': matches
})
return top_matches
def save_face_crops(self, faces: List[Dict], output_dir: str, method: str):
"""Save face crops for manual inspection"""
crops_dir = Path(output_dir) / "face_crops" / method
crops_dir.mkdir(parents=True, exist_ok=True)
for face in faces:
try:
# Load original image
image = Image.open(face['image_path'])
# Extract face region
if method == 'face_recognition':
top, right, bottom, left = face['location']
else: # deepface
bbox = face['bbox']
left = bbox.get('x', 0)
top = bbox.get('y', 0)
right = left + bbox.get('w', 0)
bottom = top + bbox.get('h', 0)
# Add padding
padding = 20
left = max(0, left - padding)
top = max(0, top - padding)
right = min(image.width, right + padding)
bottom = min(image.height, bottom + padding)
# Crop and save
face_crop = image.crop((left, top, right, bottom))
crop_path = crops_dir / f"{face['face_id']}.jpg"
face_crop.save(crop_path, "JPEG", quality=95)
except Exception as e:
self.log(f"Error saving crop for {face['face_id']}: {e}", "ERROR")
def save_similarity_matrices(self, fr_matrix: np.ndarray, df_matrix: np.ndarray,
fr_faces: List[Dict], df_faces: List[Dict], output_dir: str):
"""Save similarity matrices as CSV files"""
matrices_dir = Path(output_dir) / "similarity_matrices"
matrices_dir.mkdir(parents=True, exist_ok=True)
# Save face_recognition matrix
if fr_matrix.size > 0:
fr_df = pd.DataFrame(fr_matrix,
index=[f['face_id'] for f in fr_faces],
columns=[f['face_id'] for f in fr_faces])
fr_df.to_csv(matrices_dir / "face_recognition_similarity.csv")
# Save deepface matrix
if df_matrix.size > 0:
df_df = pd.DataFrame(df_matrix,
index=[f['face_id'] for f in df_faces],
columns=[f['face_id'] for f in df_faces])
df_df.to_csv(matrices_dir / "deepface_similarity.csv")
def generate_report(self, fr_results: Dict, df_results: Dict,
fr_matches: List[Dict], df_matches: List[Dict],
output_dir: Optional[str] = None) -> str:
"""Generate comparison report"""
report_lines = []
report_lines.append("=" * 60)
report_lines.append("FACE RECOGNITION COMPARISON REPORT")
report_lines.append("=" * 60)
report_lines.append("")
# Summary statistics
fr_total_faces = len(fr_results['faces'])
df_total_faces = len(df_results['faces'])
fr_total_time = sum(fr_results['times'])
df_total_time = sum(df_results['times'])
report_lines.append("SUMMARY STATISTICS:")
report_lines.append(f" face_recognition: {fr_total_faces} faces in {fr_total_time:.2f}s")
report_lines.append(f" deepface: {df_total_faces} faces in {df_total_time:.2f}s")
report_lines.append(f" Speed ratio: {df_total_time/fr_total_time:.1f}x slower (deepface)")
report_lines.append("")
# High confidence matches analysis
def analyze_high_confidence_matches(matches: List[Dict], method: str, threshold: float = 70.0):
high_conf_matches = []
for match_data in matches:
for match in match_data['matches']:
if match['confidence'] >= threshold:
high_conf_matches.append({
'query': match_data['query_face']['face_id'],
'match': match['face_id'],
'confidence': match['confidence'],
'query_image': match_data['query_face']['image_path'],
'match_image': match['image_path']
})
return high_conf_matches
fr_high_conf = analyze_high_confidence_matches(fr_matches, 'face_recognition')
df_high_conf = analyze_high_confidence_matches(df_matches, 'deepface')
report_lines.append("HIGH CONFIDENCE MATCHES (≥70%):")
report_lines.append(f" face_recognition: {len(fr_high_conf)} matches")
report_lines.append(f" deepface: {len(df_high_conf)} matches")
report_lines.append("")
# Show top matches for manual inspection
report_lines.append("TOP MATCHES FOR MANUAL INSPECTION:")
report_lines.append("")
# face_recognition top matches
report_lines.append("face_recognition top matches:")
for i, match_data in enumerate(fr_matches[:3]): # Show first 3 faces
query_face = match_data['query_face']
report_lines.append(f" Query: {query_face['face_id']} ({Path(query_face['image_path']).name})")
for match in match_data['matches'][:3]: # Top 3 matches
report_lines.append(f"{match['face_id']}: {match['confidence']:.1f}% ({Path(match['image_path']).name})")
report_lines.append("")
# deepface top matches
report_lines.append("deepface top matches:")
for i, match_data in enumerate(df_matches[:3]): # Show first 3 faces
query_face = match_data['query_face']
report_lines.append(f" Query: {query_face['face_id']} ({Path(query_face['image_path']).name})")
for match in match_data['matches'][:3]: # Top 3 matches
report_lines.append(f"{match['face_id']}: {match['confidence']:.1f}% ({Path(match['image_path']).name})")
report_lines.append("")
# Recommendations
report_lines.append("RECOMMENDATIONS:")
if len(fr_high_conf) > len(df_high_conf) * 1.5:
report_lines.append(" ⚠️ face_recognition shows significantly more high-confidence matches")
report_lines.append(" This may indicate more false positives")
if df_total_time > fr_total_time * 3:
report_lines.append(" ⚠️ deepface is significantly slower")
report_lines.append(" Consider GPU acceleration or faster models")
if df_total_faces > fr_total_faces:
report_lines.append(" ✅ deepface detected more faces")
report_lines.append(" Better face detection in difficult conditions")
report_lines.append("")
report_lines.append("=" * 60)
report_text = "\n".join(report_lines)
# Save report if output directory specified
if output_dir:
report_path = Path(output_dir) / "comparison_report.txt"
with open(report_path, 'w') as f:
f.write(report_text)
self.log(f"Report saved to: {report_path}")
return report_text
def run_test(self, folder_path: str, save_crops: bool = False,
save_matrices: bool = False) -> Dict:
"""Run the complete face recognition comparison test"""
self.log(f"Starting face recognition test on: {folder_path}")
# Get image files
image_files = self.get_image_files(folder_path)
if not image_files:
raise ValueError("No image files found in the specified folder")
# Create output directory if needed
output_dir = None
if save_crops or save_matrices:
output_dir = Path(folder_path).parent / "test_results"
output_dir.mkdir(exist_ok=True)
# Process images with both methods
self.log("Processing images with face_recognition...")
for image_path in image_files:
result = self.process_with_face_recognition(image_path)
self.results['face_recognition']['faces'].extend(result['faces'])
self.results['face_recognition']['times'].append(result['processing_time'])
self.results['face_recognition']['encodings'].extend(result['encodings'])
self.log("Processing images with deepface...")
for image_path in image_files:
result = self.process_with_deepface(image_path)
self.results['deepface']['faces'].extend(result['faces'])
self.results['deepface']['times'].append(result['processing_time'])
self.results['deepface']['encodings'].extend(result['encodings'])
# Calculate similarity matrices
self.log("Calculating similarity matrices...")
fr_matrix = self.calculate_similarity_matrix(
self.results['face_recognition']['encodings'], 'face_recognition'
)
df_matrix = self.calculate_similarity_matrix(
self.results['deepface']['encodings'], 'deepface'
)
# Find top matches
fr_matches = self.find_top_matches(
fr_matrix, self.results['face_recognition']['faces'], 'face_recognition'
)
df_matches = self.find_top_matches(
df_matrix, self.results['deepface']['faces'], 'deepface'
)
# Save outputs if requested
if save_crops and output_dir:
self.log("Saving face crops...")
self.save_face_crops(self.results['face_recognition']['faces'], str(output_dir), 'face_recognition')
self.save_face_crops(self.results['deepface']['faces'], str(output_dir), 'deepface')
if save_matrices and output_dir:
self.log("Saving similarity matrices...")
self.save_similarity_matrices(
fr_matrix, df_matrix,
self.results['face_recognition']['faces'],
self.results['deepface']['faces'],
str(output_dir)
)
# Generate and display report
report = self.generate_report(
self.results['face_recognition'], self.results['deepface'],
fr_matches, df_matches, str(output_dir) if output_dir else None
)
print(report)
return {
'face_recognition': {
'faces': self.results['face_recognition']['faces'],
'matches': fr_matches,
'matrix': fr_matrix
},
'deepface': {
'faces': self.results['deepface']['faces'],
'matches': df_matches,
'matrix': df_matrix
}
}
def main():
"""Main CLI entry point"""
parser = argparse.ArgumentParser(
description="Compare face_recognition vs deepface on a folder of photos",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python test_face_recognition.py demo_photos/
python test_face_recognition.py demo_photos/ --save-crops --verbose
python test_face_recognition.py demo_photos/ --save-matrices --save-crops
"""
)
parser.add_argument('folder', help='Path to folder containing photos to test')
parser.add_argument('--save-crops', action='store_true',
help='Save face crops for manual inspection')
parser.add_argument('--save-matrices', action='store_true',
help='Save similarity matrices as CSV files')
parser.add_argument('--verbose', '-v', action='store_true',
help='Enable verbose logging')
args = parser.parse_args()
# Validate folder path
if not os.path.exists(args.folder):
print(f"Error: Folder not found: {args.folder}")
sys.exit(1)
# Check dependencies
try:
import face_recognition
from deepface import DeepFace
except ImportError as e:
print(f"Error: Missing required dependency: {e}")
print("Please install with: pip install face_recognition deepface")
sys.exit(1)
# Run test
try:
tester = FaceRecognitionTester(verbose=args.verbose)
results = tester.run_test(
args.folder,
save_crops=args.save_crops,
save_matrices=args.save_matrices
)
print("\n✅ Test completed successfully!")
if args.save_crops or args.save_matrices:
print(f"📁 Results saved to: {Path(args.folder).parent / 'test_results'}")
except Exception as e:
print(f"❌ Test failed: {e}")
if args.verbose:
import traceback
traceback.print_exc()
sys.exit(1)
if __name__ == "__main__":
main()