This commit deletes the `photo_tagger_refactored.py`, `run.sh`, and test files (`test_basic.py`, `test_deepface_gui.py`, `test_face_recognition.py`) that are no longer in use. The removal of these files streamlines the project structure and eliminates legacy code, paving the way for future enhancements and a cleaner codebase. The README has been updated to reflect these changes, ensuring clarity on the current state of the project.
524 lines
22 KiB
Python
Executable File
524 lines
22 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Face Recognition Comparison Test Script
|
|
|
|
Compares face_recognition vs deepface on a folder of photos.
|
|
Tests accuracy and performance without modifying existing database.
|
|
|
|
Usage:
|
|
python test_face_recognition.py /path/to/photos [--save-crops] [--save-matrices] [--verbose]
|
|
|
|
Example:
|
|
python test_face_recognition.py demo_photos/ --save-crops --verbose
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import time
|
|
import argparse
|
|
import tempfile
|
|
from pathlib import Path
|
|
from typing import List, Dict, Tuple, Optional
|
|
import numpy as np
|
|
import pandas as pd
|
|
from PIL import Image
|
|
|
|
# Face recognition libraries
|
|
import face_recognition
|
|
from deepface import DeepFace
|
|
|
|
# Supported image formats
|
|
SUPPORTED_FORMATS = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif'}
|
|
|
|
|
|
class FaceRecognitionTester:
|
|
"""Test and compare face recognition libraries"""
|
|
|
|
def __init__(self, verbose: bool = False):
|
|
self.verbose = verbose
|
|
self.results = {
|
|
'face_recognition': {'faces': [], 'times': [], 'encodings': []},
|
|
'deepface': {'faces': [], 'times': [], 'encodings': []}
|
|
}
|
|
|
|
def log(self, message: str, level: str = "INFO"):
|
|
"""Print log message with timestamp"""
|
|
if self.verbose or level == "ERROR":
|
|
timestamp = time.strftime("%H:%M:%S")
|
|
print(f"[{timestamp}] {level}: {message}")
|
|
|
|
def get_image_files(self, folder_path: str) -> List[str]:
|
|
"""Get all supported image files from folder"""
|
|
folder = Path(folder_path)
|
|
if not folder.exists():
|
|
raise FileNotFoundError(f"Folder not found: {folder_path}")
|
|
|
|
image_files = []
|
|
for file_path in folder.rglob("*"):
|
|
if file_path.is_file() and file_path.suffix.lower() in SUPPORTED_FORMATS:
|
|
image_files.append(str(file_path))
|
|
|
|
self.log(f"Found {len(image_files)} image files")
|
|
return sorted(image_files)
|
|
|
|
def process_with_face_recognition(self, image_path: str) -> Dict:
|
|
"""Process image with face_recognition library"""
|
|
start_time = time.time()
|
|
|
|
try:
|
|
# Load image
|
|
image = face_recognition.load_image_file(image_path)
|
|
|
|
# Detect faces using CNN model (more accurate than HOG)
|
|
face_locations = face_recognition.face_locations(image, model="cnn")
|
|
|
|
if not face_locations:
|
|
return {'faces': [], 'encodings': [], 'processing_time': time.time() - start_time}
|
|
|
|
# Get face encodings
|
|
face_encodings = face_recognition.face_encodings(image, face_locations)
|
|
|
|
# Convert to our format
|
|
faces = []
|
|
encodings = []
|
|
|
|
for i, (location, encoding) in enumerate(zip(face_locations, face_encodings)):
|
|
top, right, bottom, left = location
|
|
face_data = {
|
|
'image_path': image_path,
|
|
'face_id': f"fr_{Path(image_path).stem}_{i}",
|
|
'location': location,
|
|
'bbox': {'top': top, 'right': right, 'bottom': bottom, 'left': left},
|
|
'encoding': encoding
|
|
}
|
|
faces.append(face_data)
|
|
encodings.append(encoding)
|
|
|
|
processing_time = time.time() - start_time
|
|
self.log(f"face_recognition: Found {len(faces)} faces in {processing_time:.2f}s")
|
|
|
|
return {
|
|
'faces': faces,
|
|
'encodings': encodings,
|
|
'processing_time': processing_time
|
|
}
|
|
|
|
except Exception as e:
|
|
self.log(f"face_recognition error on {image_path}: {e}", "ERROR")
|
|
return {'faces': [], 'encodings': [], 'processing_time': time.time() - start_time}
|
|
|
|
def process_with_deepface(self, image_path: str) -> Dict:
|
|
"""Process image with deepface library"""
|
|
start_time = time.time()
|
|
|
|
try:
|
|
# Use DeepFace to detect and encode faces
|
|
results = DeepFace.represent(
|
|
img_path=image_path,
|
|
model_name='ArcFace', # Best accuracy model
|
|
detector_backend='retinaface', # Best detection
|
|
enforce_detection=False, # Don't fail if no faces
|
|
align=True # Face alignment for better accuracy
|
|
)
|
|
|
|
if not results:
|
|
return {'faces': [], 'encodings': [], 'processing_time': time.time() - start_time}
|
|
|
|
# Convert to our format
|
|
faces = []
|
|
encodings = []
|
|
|
|
for i, result in enumerate(results):
|
|
# Extract face region info
|
|
region = result.get('region', {})
|
|
face_data = {
|
|
'image_path': image_path,
|
|
'face_id': f"df_{Path(image_path).stem}_{i}",
|
|
'location': (region.get('y', 0), region.get('x', 0) + region.get('w', 0),
|
|
region.get('y', 0) + region.get('h', 0), region.get('x', 0)),
|
|
'bbox': region,
|
|
'encoding': np.array(result['embedding'])
|
|
}
|
|
faces.append(face_data)
|
|
encodings.append(np.array(result['embedding']))
|
|
|
|
processing_time = time.time() - start_time
|
|
self.log(f"deepface: Found {len(faces)} faces in {processing_time:.2f}s")
|
|
|
|
return {
|
|
'faces': faces,
|
|
'encodings': encodings,
|
|
'processing_time': processing_time
|
|
}
|
|
|
|
except Exception as e:
|
|
self.log(f"deepface error on {image_path}: {e}", "ERROR")
|
|
return {'faces': [], 'encodings': [], 'processing_time': time.time() - start_time}
|
|
|
|
def calculate_similarity_matrix(self, encodings: List[np.ndarray], method: str) -> np.ndarray:
|
|
"""Calculate similarity matrix between all face encodings"""
|
|
n_faces = len(encodings)
|
|
if n_faces == 0:
|
|
return np.array([])
|
|
|
|
similarity_matrix = np.zeros((n_faces, n_faces))
|
|
|
|
for i in range(n_faces):
|
|
for j in range(n_faces):
|
|
if i == j:
|
|
similarity_matrix[i, j] = 0.0 # Same face
|
|
else:
|
|
if method == 'face_recognition':
|
|
# Use face_recognition distance (lower = more similar)
|
|
distance = face_recognition.face_distance([encodings[i]], encodings[j])[0]
|
|
similarity_matrix[i, j] = distance
|
|
else: # deepface
|
|
# Use cosine distance for ArcFace embeddings
|
|
enc1_norm = encodings[i] / np.linalg.norm(encodings[i])
|
|
enc2_norm = encodings[j] / np.linalg.norm(encodings[j])
|
|
cosine_sim = np.dot(enc1_norm, enc2_norm)
|
|
cosine_distance = 1 - cosine_sim
|
|
similarity_matrix[i, j] = cosine_distance
|
|
|
|
return similarity_matrix
|
|
|
|
def find_top_matches(self, similarity_matrix: np.ndarray, faces: List[Dict],
|
|
method: str, top_k: int = 5) -> List[Dict]:
|
|
"""Find top matches for each face"""
|
|
top_matches = []
|
|
|
|
for i, face in enumerate(faces):
|
|
if i >= similarity_matrix.shape[0]:
|
|
continue
|
|
|
|
# Get distances to all other faces
|
|
distances = similarity_matrix[i, :]
|
|
|
|
# Find top matches (excluding self)
|
|
if method == 'face_recognition':
|
|
# Lower distance = more similar
|
|
sorted_indices = np.argsort(distances)
|
|
else: # deepface
|
|
# Lower cosine distance = more similar
|
|
sorted_indices = np.argsort(distances)
|
|
|
|
matches = []
|
|
for idx in sorted_indices[1:top_k+1]: # Skip self (index 0)
|
|
if idx < len(faces):
|
|
other_face = faces[idx]
|
|
distance = distances[idx]
|
|
|
|
# Convert to confidence percentage for display
|
|
if method == 'face_recognition':
|
|
confidence = max(0, (1 - distance) * 100)
|
|
else: # deepface
|
|
confidence = max(0, (1 - distance) * 100)
|
|
|
|
matches.append({
|
|
'face_id': other_face['face_id'],
|
|
'image_path': other_face['image_path'],
|
|
'distance': distance,
|
|
'confidence': confidence
|
|
})
|
|
|
|
top_matches.append({
|
|
'query_face': face,
|
|
'matches': matches
|
|
})
|
|
|
|
return top_matches
|
|
|
|
def save_face_crops(self, faces: List[Dict], output_dir: str, method: str):
|
|
"""Save face crops for manual inspection"""
|
|
crops_dir = Path(output_dir) / "face_crops" / method
|
|
crops_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
for face in faces:
|
|
try:
|
|
# Load original image
|
|
image = Image.open(face['image_path'])
|
|
|
|
# Extract face region
|
|
if method == 'face_recognition':
|
|
top, right, bottom, left = face['location']
|
|
else: # deepface
|
|
bbox = face['bbox']
|
|
left = bbox.get('x', 0)
|
|
top = bbox.get('y', 0)
|
|
right = left + bbox.get('w', 0)
|
|
bottom = top + bbox.get('h', 0)
|
|
|
|
# Add padding
|
|
padding = 20
|
|
left = max(0, left - padding)
|
|
top = max(0, top - padding)
|
|
right = min(image.width, right + padding)
|
|
bottom = min(image.height, bottom + padding)
|
|
|
|
# Crop and save
|
|
face_crop = image.crop((left, top, right, bottom))
|
|
crop_path = crops_dir / f"{face['face_id']}.jpg"
|
|
face_crop.save(crop_path, "JPEG", quality=95)
|
|
|
|
except Exception as e:
|
|
self.log(f"Error saving crop for {face['face_id']}: {e}", "ERROR")
|
|
|
|
def save_similarity_matrices(self, fr_matrix: np.ndarray, df_matrix: np.ndarray,
|
|
fr_faces: List[Dict], df_faces: List[Dict], output_dir: str):
|
|
"""Save similarity matrices as CSV files"""
|
|
matrices_dir = Path(output_dir) / "similarity_matrices"
|
|
matrices_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Save face_recognition matrix
|
|
if fr_matrix.size > 0:
|
|
fr_df = pd.DataFrame(fr_matrix,
|
|
index=[f['face_id'] for f in fr_faces],
|
|
columns=[f['face_id'] for f in fr_faces])
|
|
fr_df.to_csv(matrices_dir / "face_recognition_similarity.csv")
|
|
|
|
# Save deepface matrix
|
|
if df_matrix.size > 0:
|
|
df_df = pd.DataFrame(df_matrix,
|
|
index=[f['face_id'] for f in df_faces],
|
|
columns=[f['face_id'] for f in df_faces])
|
|
df_df.to_csv(matrices_dir / "deepface_similarity.csv")
|
|
|
|
def generate_report(self, fr_results: Dict, df_results: Dict,
|
|
fr_matches: List[Dict], df_matches: List[Dict],
|
|
output_dir: Optional[str] = None) -> str:
|
|
"""Generate comparison report"""
|
|
report_lines = []
|
|
report_lines.append("=" * 60)
|
|
report_lines.append("FACE RECOGNITION COMPARISON REPORT")
|
|
report_lines.append("=" * 60)
|
|
report_lines.append("")
|
|
|
|
# Summary statistics
|
|
fr_total_faces = len(fr_results['faces'])
|
|
df_total_faces = len(df_results['faces'])
|
|
fr_total_time = sum(fr_results['times'])
|
|
df_total_time = sum(df_results['times'])
|
|
|
|
report_lines.append("SUMMARY STATISTICS:")
|
|
report_lines.append(f" face_recognition: {fr_total_faces} faces in {fr_total_time:.2f}s")
|
|
report_lines.append(f" deepface: {df_total_faces} faces in {df_total_time:.2f}s")
|
|
report_lines.append(f" Speed ratio: {df_total_time/fr_total_time:.1f}x slower (deepface)")
|
|
report_lines.append("")
|
|
|
|
# High confidence matches analysis
|
|
def analyze_high_confidence_matches(matches: List[Dict], method: str, threshold: float = 70.0):
|
|
high_conf_matches = []
|
|
for match_data in matches:
|
|
for match in match_data['matches']:
|
|
if match['confidence'] >= threshold:
|
|
high_conf_matches.append({
|
|
'query': match_data['query_face']['face_id'],
|
|
'match': match['face_id'],
|
|
'confidence': match['confidence'],
|
|
'query_image': match_data['query_face']['image_path'],
|
|
'match_image': match['image_path']
|
|
})
|
|
return high_conf_matches
|
|
|
|
fr_high_conf = analyze_high_confidence_matches(fr_matches, 'face_recognition')
|
|
df_high_conf = analyze_high_confidence_matches(df_matches, 'deepface')
|
|
|
|
report_lines.append("HIGH CONFIDENCE MATCHES (≥70%):")
|
|
report_lines.append(f" face_recognition: {len(fr_high_conf)} matches")
|
|
report_lines.append(f" deepface: {len(df_high_conf)} matches")
|
|
report_lines.append("")
|
|
|
|
# Show top matches for manual inspection
|
|
report_lines.append("TOP MATCHES FOR MANUAL INSPECTION:")
|
|
report_lines.append("")
|
|
|
|
# face_recognition top matches
|
|
report_lines.append("face_recognition top matches:")
|
|
for i, match_data in enumerate(fr_matches[:3]): # Show first 3 faces
|
|
query_face = match_data['query_face']
|
|
report_lines.append(f" Query: {query_face['face_id']} ({Path(query_face['image_path']).name})")
|
|
for match in match_data['matches'][:3]: # Top 3 matches
|
|
report_lines.append(f" → {match['face_id']}: {match['confidence']:.1f}% ({Path(match['image_path']).name})")
|
|
report_lines.append("")
|
|
|
|
# deepface top matches
|
|
report_lines.append("deepface top matches:")
|
|
for i, match_data in enumerate(df_matches[:3]): # Show first 3 faces
|
|
query_face = match_data['query_face']
|
|
report_lines.append(f" Query: {query_face['face_id']} ({Path(query_face['image_path']).name})")
|
|
for match in match_data['matches'][:3]: # Top 3 matches
|
|
report_lines.append(f" → {match['face_id']}: {match['confidence']:.1f}% ({Path(match['image_path']).name})")
|
|
report_lines.append("")
|
|
|
|
# Recommendations
|
|
report_lines.append("RECOMMENDATIONS:")
|
|
if len(fr_high_conf) > len(df_high_conf) * 1.5:
|
|
report_lines.append(" ⚠️ face_recognition shows significantly more high-confidence matches")
|
|
report_lines.append(" This may indicate more false positives")
|
|
if df_total_time > fr_total_time * 3:
|
|
report_lines.append(" ⚠️ deepface is significantly slower")
|
|
report_lines.append(" Consider GPU acceleration or faster models")
|
|
if df_total_faces > fr_total_faces:
|
|
report_lines.append(" ✅ deepface detected more faces")
|
|
report_lines.append(" Better face detection in difficult conditions")
|
|
|
|
report_lines.append("")
|
|
report_lines.append("=" * 60)
|
|
|
|
report_text = "\n".join(report_lines)
|
|
|
|
# Save report if output directory specified
|
|
if output_dir:
|
|
report_path = Path(output_dir) / "comparison_report.txt"
|
|
with open(report_path, 'w') as f:
|
|
f.write(report_text)
|
|
self.log(f"Report saved to: {report_path}")
|
|
|
|
return report_text
|
|
|
|
def run_test(self, folder_path: str, save_crops: bool = False,
|
|
save_matrices: bool = False) -> Dict:
|
|
"""Run the complete face recognition comparison test"""
|
|
self.log(f"Starting face recognition test on: {folder_path}")
|
|
|
|
# Get image files
|
|
image_files = self.get_image_files(folder_path)
|
|
if not image_files:
|
|
raise ValueError("No image files found in the specified folder")
|
|
|
|
# Create output directory if needed
|
|
output_dir = None
|
|
if save_crops or save_matrices:
|
|
output_dir = Path(folder_path).parent / "test_results"
|
|
output_dir.mkdir(exist_ok=True)
|
|
|
|
# Process images with both methods
|
|
self.log("Processing images with face_recognition...")
|
|
for image_path in image_files:
|
|
result = self.process_with_face_recognition(image_path)
|
|
self.results['face_recognition']['faces'].extend(result['faces'])
|
|
self.results['face_recognition']['times'].append(result['processing_time'])
|
|
self.results['face_recognition']['encodings'].extend(result['encodings'])
|
|
|
|
self.log("Processing images with deepface...")
|
|
for image_path in image_files:
|
|
result = self.process_with_deepface(image_path)
|
|
self.results['deepface']['faces'].extend(result['faces'])
|
|
self.results['deepface']['times'].append(result['processing_time'])
|
|
self.results['deepface']['encodings'].extend(result['encodings'])
|
|
|
|
# Calculate similarity matrices
|
|
self.log("Calculating similarity matrices...")
|
|
fr_matrix = self.calculate_similarity_matrix(
|
|
self.results['face_recognition']['encodings'], 'face_recognition'
|
|
)
|
|
df_matrix = self.calculate_similarity_matrix(
|
|
self.results['deepface']['encodings'], 'deepface'
|
|
)
|
|
|
|
# Find top matches
|
|
fr_matches = self.find_top_matches(
|
|
fr_matrix, self.results['face_recognition']['faces'], 'face_recognition'
|
|
)
|
|
df_matches = self.find_top_matches(
|
|
df_matrix, self.results['deepface']['faces'], 'deepface'
|
|
)
|
|
|
|
# Save outputs if requested
|
|
if save_crops and output_dir:
|
|
self.log("Saving face crops...")
|
|
self.save_face_crops(self.results['face_recognition']['faces'], str(output_dir), 'face_recognition')
|
|
self.save_face_crops(self.results['deepface']['faces'], str(output_dir), 'deepface')
|
|
|
|
if save_matrices and output_dir:
|
|
self.log("Saving similarity matrices...")
|
|
self.save_similarity_matrices(
|
|
fr_matrix, df_matrix,
|
|
self.results['face_recognition']['faces'],
|
|
self.results['deepface']['faces'],
|
|
str(output_dir)
|
|
)
|
|
|
|
# Generate and display report
|
|
report = self.generate_report(
|
|
self.results['face_recognition'], self.results['deepface'],
|
|
fr_matches, df_matches, str(output_dir) if output_dir else None
|
|
)
|
|
|
|
print(report)
|
|
|
|
return {
|
|
'face_recognition': {
|
|
'faces': self.results['face_recognition']['faces'],
|
|
'matches': fr_matches,
|
|
'matrix': fr_matrix
|
|
},
|
|
'deepface': {
|
|
'faces': self.results['deepface']['faces'],
|
|
'matches': df_matches,
|
|
'matrix': df_matrix
|
|
}
|
|
}
|
|
|
|
|
|
def main():
|
|
"""Main CLI entry point"""
|
|
parser = argparse.ArgumentParser(
|
|
description="Compare face_recognition vs deepface on a folder of photos",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
python test_face_recognition.py demo_photos/
|
|
python test_face_recognition.py demo_photos/ --save-crops --verbose
|
|
python test_face_recognition.py demo_photos/ --save-matrices --save-crops
|
|
"""
|
|
)
|
|
|
|
parser.add_argument('folder', help='Path to folder containing photos to test')
|
|
parser.add_argument('--save-crops', action='store_true',
|
|
help='Save face crops for manual inspection')
|
|
parser.add_argument('--save-matrices', action='store_true',
|
|
help='Save similarity matrices as CSV files')
|
|
parser.add_argument('--verbose', '-v', action='store_true',
|
|
help='Enable verbose logging')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Validate folder path
|
|
if not os.path.exists(args.folder):
|
|
print(f"Error: Folder not found: {args.folder}")
|
|
sys.exit(1)
|
|
|
|
# Check dependencies
|
|
try:
|
|
import face_recognition
|
|
from deepface import DeepFace
|
|
except ImportError as e:
|
|
print(f"Error: Missing required dependency: {e}")
|
|
print("Please install with: pip install face_recognition deepface")
|
|
sys.exit(1)
|
|
|
|
# Run test
|
|
try:
|
|
tester = FaceRecognitionTester(verbose=args.verbose)
|
|
results = tester.run_test(
|
|
args.folder,
|
|
save_crops=args.save_crops,
|
|
save_matrices=args.save_matrices
|
|
)
|
|
|
|
print("\n✅ Test completed successfully!")
|
|
if args.save_crops or args.save_matrices:
|
|
print(f"📁 Results saved to: {Path(args.folder).parent / 'test_results'}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Test failed: {e}")
|
|
if args.verbose:
|
|
import traceback
|
|
traceback.print_exc()
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|