punimtag/test_face_recognition.py

#!/usr/bin/env python3
"""
Face Recognition Comparison Test Script

Compares face_recognition vs deepface on a folder of photos.
Tests accuracy and performance without modifying existing database.

Usage:
    python test_face_recognition.py /path/to/photos [--save-crops] [--save-matrices] [--verbose]

Example:
    python test_face_recognition.py demo_photos/ --save-crops --verbose
"""

import os
import sys
import time
import argparse
import tempfile
from pathlib import Path
from typing import List, Dict, Tuple, Optional
import numpy as np
import pandas as pd
from PIL import Image

# Face recognition libraries
import face_recognition
from deepface import DeepFace

# Supported image formats
SUPPORTED_FORMATS = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif'}


class FaceRecognitionTester:
    """Test and compare face recognition libraries"""

    def __init__(self, verbose: bool = False):
        self.verbose = verbose
        self.results = {
            'face_recognition': {'faces': [], 'times': [], 'encodings': []},
            'deepface': {'faces': [], 'times': [], 'encodings': []}
        }

    def log(self, message: str, level: str = "INFO"):
        """Print log message with timestamp"""
        if self.verbose or level == "ERROR":
            timestamp = time.strftime("%H:%M:%S")
            print(f"[{timestamp}] {level}: {message}")

    def get_image_files(self, folder_path: str) -> List[str]:
        """Get all supported image files from folder"""
        folder = Path(folder_path)
        if not folder.exists():
            raise FileNotFoundError(f"Folder not found: {folder_path}")

        image_files = []
        for file_path in folder.rglob("*"):
            if file_path.is_file() and file_path.suffix.lower() in SUPPORTED_FORMATS:
                image_files.append(str(file_path))

        self.log(f"Found {len(image_files)} image files")
        return sorted(image_files)

    def process_with_face_recognition(self, image_path: str) -> Dict:
        """Process image with face_recognition library"""
        start_time = time.time()

        try:
            # Load image
            image = face_recognition.load_image_file(image_path)

            # Detect faces using CNN model (more accurate than HOG)
            face_locations = face_recognition.face_locations(image, model="cnn")

            if not face_locations:
                return {'faces': [], 'encodings': [], 'processing_time': time.time() - start_time}

            # Get face encodings
            face_encodings = face_recognition.face_encodings(image, face_locations)

            # Convert to our format
            faces = []
            encodings = []

            for i, (location, encoding) in enumerate(zip(face_locations, face_encodings)):
                top, right, bottom, left = location
                face_data = {
                    'image_path': image_path,
                    'face_id': f"fr_{Path(image_path).stem}_{i}",
                    'location': location,
                    'bbox': {'top': top, 'right': right, 'bottom': bottom, 'left': left},
                    'encoding': encoding
                }
                faces.append(face_data)
                encodings.append(encoding)

            processing_time = time.time() - start_time
            self.log(f"face_recognition: Found {len(faces)} faces in {processing_time:.2f}s")

            return {
                'faces': faces,
                'encodings': encodings,
                'processing_time': processing_time
            }

        except Exception as e:
            self.log(f"face_recognition error on {image_path}: {e}", "ERROR")
            return {'faces': [], 'encodings': [], 'processing_time': time.time() - start_time}

    def process_with_deepface(self, image_path: str) -> Dict:
        """Process image with deepface library"""
        start_time = time.time()

        try:
            # Use DeepFace to detect and encode faces
            results = DeepFace.represent(
                img_path=image_path,
                model_name='ArcFace',  # Best accuracy model
                detector_backend='retinaface',  # Best detection
                enforce_detection=False,  # Don't fail if no faces
                align=True  # Face alignment for better accuracy
            )

            if not results:
                return {'faces': [], 'encodings': [], 'processing_time': time.time() - start_time}

            # Convert to our format
            faces = []
            encodings = []

            for i, result in enumerate(results):
                # Extract face region info
                region = result.get('region', {})
                face_data = {
                    'image_path': image_path,
                    'face_id': f"df_{Path(image_path).stem}_{i}",
                    'location': (region.get('y', 0), region.get('x', 0) + region.get('w', 0),
                               region.get('y', 0) + region.get('h', 0), region.get('x', 0)),
                    'bbox': region,
                    'encoding': np.array(result['embedding'])
                }
                faces.append(face_data)
                encodings.append(np.array(result['embedding']))

            processing_time = time.time() - start_time
            self.log(f"deepface: Found {len(faces)} faces in {processing_time:.2f}s")

            return {
                'faces': faces,
                'encodings': encodings,
                'processing_time': processing_time
            }

        except Exception as e:
            self.log(f"deepface error on {image_path}: {e}", "ERROR")
            return {'faces': [], 'encodings': [], 'processing_time': time.time() - start_time}

    def calculate_similarity_matrix(self, encodings: List[np.ndarray], method: str) -> np.ndarray:
        """Calculate similarity matrix between all face encodings"""
        n_faces = len(encodings)
        if n_faces == 0:
            return np.array([])

        similarity_matrix = np.zeros((n_faces, n_faces))

        for i in range(n_faces):
            for j in range(n_faces):
                if i == j:
                    similarity_matrix[i, j] = 0.0  # Same face
                else:
                    if method == 'face_recognition':
                        # Use face_recognition distance (lower = more similar)
                        distance = face_recognition.face_distance([encodings[i]], encodings[j])[0]
                        similarity_matrix[i, j] = distance
                    else:  # deepface
                        # Use cosine distance for ArcFace embeddings
                        enc1_norm = encodings[i] / np.linalg.norm(encodings[i])
                        enc2_norm = encodings[j] / np.linalg.norm(encodings[j])
                        cosine_sim = np.dot(enc1_norm, enc2_norm)
                        cosine_distance = 1 - cosine_sim
                        similarity_matrix[i, j] = cosine_distance

        return similarity_matrix

    def find_top_matches(self, similarity_matrix: np.ndarray, faces: List[Dict],
                        method: str, top_k: int = 5) -> List[Dict]:
        """Find top matches for each face"""
        top_matches = []

        for i, face in enumerate(faces):
            if i >= similarity_matrix.shape[0]:
                continue

            # Get distances to all other faces
            distances = similarity_matrix[i, :]

            # Find top matches (excluding self)
            if method == 'face_recognition':
                # Lower distance = more similar
                sorted_indices = np.argsort(distances)
            else:  # deepface
                # Lower cosine distance = more similar
                sorted_indices = np.argsort(distances)

            matches = []
            for idx in sorted_indices[1:top_k+1]:  # Skip self (index 0)
                if idx < len(faces):
                    other_face = faces[idx]
                    distance = distances[idx]

                    # Convert to confidence percentage for display
                    if method == 'face_recognition':
                        confidence = max(0, (1 - distance) * 100)
                    else:  # deepface
                        confidence = max(0, (1 - distance) * 100)

                    matches.append({
                        'face_id': other_face['face_id'],
                        'image_path': other_face['image_path'],
                        'distance': distance,
                        'confidence': confidence
                    })

            top_matches.append({
                'query_face': face,
                'matches': matches
            })

        return top_matches

    def save_face_crops(self, faces: List[Dict], output_dir: str, method: str):
        """Save face crops for manual inspection"""
        crops_dir = Path(output_dir) / "face_crops" / method
        crops_dir.mkdir(parents=True, exist_ok=True)

        for face in faces:
            try:
                # Load original image
                image = Image.open(face['image_path'])

                # Extract face region
                if method == 'face_recognition':
                    top, right, bottom, left = face['location']
                else:  # deepface
                    bbox = face['bbox']
                    left = bbox.get('x', 0)
                    top = bbox.get('y', 0)
                    right = left + bbox.get('w', 0)
                    bottom = top + bbox.get('h', 0)

                # Add padding
                padding = 20
                left = max(0, left - padding)
                top = max(0, top - padding)
                right = min(image.width, right + padding)
                bottom = min(image.height, bottom + padding)

                # Crop and save
                face_crop = image.crop((left, top, right, bottom))
                crop_path = crops_dir / f"{face['face_id']}.jpg"
                face_crop.save(crop_path, "JPEG", quality=95)

            except Exception as e:
                self.log(f"Error saving crop for {face['face_id']}: {e}", "ERROR")

    def save_similarity_matrices(self, fr_matrix: np.ndarray, df_matrix: np.ndarray,
                               fr_faces: List[Dict], df_faces: List[Dict], output_dir: str):
        """Save similarity matrices as CSV files"""
        matrices_dir = Path(output_dir) / "similarity_matrices"
        matrices_dir.mkdir(parents=True, exist_ok=True)

        # Save face_recognition matrix
        if fr_matrix.size > 0:
            fr_df = pd.DataFrame(fr_matrix,
                               index=[f['face_id'] for f in fr_faces],
                               columns=[f['face_id'] for f in fr_faces])
            fr_df.to_csv(matrices_dir / "face_recognition_similarity.csv")

        # Save deepface matrix
        if df_matrix.size > 0:
            df_df = pd.DataFrame(df_matrix,
                               index=[f['face_id'] for f in df_faces],
                               columns=[f['face_id'] for f in df_faces])
            df_df.to_csv(matrices_dir / "deepface_similarity.csv")

    def generate_report(self, fr_results: Dict, df_results: Dict,
                       fr_matches: List[Dict], df_matches: List[Dict],
                       output_dir: Optional[str] = None) -> str:
        """Generate comparison report"""
        report_lines = []
        report_lines.append("=" * 60)
        report_lines.append("FACE RECOGNITION COMPARISON REPORT")
        report_lines.append("=" * 60)
        report_lines.append("")

        # Summary statistics
        fr_total_faces = len(fr_results['faces'])
        df_total_faces = len(df_results['faces'])
        fr_total_time = sum(fr_results['times'])
        df_total_time = sum(df_results['times'])

        report_lines.append("SUMMARY STATISTICS:")
        report_lines.append(f"  face_recognition: {fr_total_faces} faces in {fr_total_time:.2f}s")
        report_lines.append(f"  deepface:         {df_total_faces} faces in {df_total_time:.2f}s")
        report_lines.append(f"  Speed ratio:      {df_total_time/fr_total_time:.1f}x slower (deepface)")
        report_lines.append("")

        # High confidence matches analysis
        def analyze_high_confidence_matches(matches: List[Dict], method: str, threshold: float = 70.0):
            high_conf_matches = []
            for match_data in matches:
                for match in match_data['matches']:
                    if match['confidence'] >= threshold:
                        high_conf_matches.append({
                            'query': match_data['query_face']['face_id'],
                            'match': match['face_id'],
                            'confidence': match['confidence'],
                            'query_image': match_data['query_face']['image_path'],
                            'match_image': match['image_path']
                        })
            return high_conf_matches

        fr_high_conf = analyze_high_confidence_matches(fr_matches, 'face_recognition')
        df_high_conf = analyze_high_confidence_matches(df_matches, 'deepface')

        report_lines.append("HIGH CONFIDENCE MATCHES (≥70%):")
        report_lines.append(f"  face_recognition: {len(fr_high_conf)} matches")
        report_lines.append(f"  deepface:         {len(df_high_conf)} matches")
        report_lines.append("")

        # Show top matches for manual inspection
        report_lines.append("TOP MATCHES FOR MANUAL INSPECTION:")
        report_lines.append("")

        # face_recognition top matches
        report_lines.append("face_recognition top matches:")
        for i, match_data in enumerate(fr_matches[:3]):  # Show first 3 faces
            query_face = match_data['query_face']
            report_lines.append(f"  Query: {query_face['face_id']} ({Path(query_face['image_path']).name})")
            for match in match_data['matches'][:3]:  # Top 3 matches
                report_lines.append(f"    → {match['face_id']}: {match['confidence']:.1f}% ({Path(match['image_path']).name})")
            report_lines.append("")

        # deepface top matches
        report_lines.append("deepface top matches:")
        for i, match_data in enumerate(df_matches[:3]):  # Show first 3 faces
            query_face = match_data['query_face']
            report_lines.append(f"  Query: {query_face['face_id']} ({Path(query_face['image_path']).name})")
            for match in match_data['matches'][:3]:  # Top 3 matches
                report_lines.append(f"    → {match['face_id']}: {match['confidence']:.1f}% ({Path(match['image_path']).name})")
            report_lines.append("")

        # Recommendations
        report_lines.append("RECOMMENDATIONS:")
        if len(fr_high_conf) > len(df_high_conf) * 1.5:
            report_lines.append("  ⚠️  face_recognition shows significantly more high-confidence matches")
            report_lines.append("     This may indicate more false positives")
        if df_total_time > fr_total_time * 3:
            report_lines.append("  ⚠️  deepface is significantly slower")
            report_lines.append("     Consider GPU acceleration or faster models")
        if df_total_faces > fr_total_faces:
            report_lines.append("  ✅ deepface detected more faces")
            report_lines.append("     Better face detection in difficult conditions")

        report_lines.append("")
        report_lines.append("=" * 60)

        report_text = "\n".join(report_lines)

        # Save report if output directory specified
        if output_dir:
            report_path = Path(output_dir) / "comparison_report.txt"
            with open(report_path, 'w') as f:
                f.write(report_text)
            self.log(f"Report saved to: {report_path}")

        return report_text

    def run_test(self, folder_path: str, save_crops: bool = False,
                save_matrices: bool = False) -> Dict:
        """Run the complete face recognition comparison test"""
        self.log(f"Starting face recognition test on: {folder_path}")

        # Get image files
        image_files = self.get_image_files(folder_path)
        if not image_files:
            raise ValueError("No image files found in the specified folder")

        # Create output directory if needed
        output_dir = None
        if save_crops or save_matrices:
            output_dir = Path(folder_path).parent / "test_results"
            output_dir.mkdir(exist_ok=True)

        # Process images with both methods
        self.log("Processing images with face_recognition...")
        for image_path in image_files:
            result = self.process_with_face_recognition(image_path)
            self.results['face_recognition']['faces'].extend(result['faces'])
            self.results['face_recognition']['times'].append(result['processing_time'])
            self.results['face_recognition']['encodings'].extend(result['encodings'])

        self.log("Processing images with deepface...")
        for image_path in image_files:
            result = self.process_with_deepface(image_path)
            self.results['deepface']['faces'].extend(result['faces'])
            self.results['deepface']['times'].append(result['processing_time'])
            self.results['deepface']['encodings'].extend(result['encodings'])

        # Calculate similarity matrices
        self.log("Calculating similarity matrices...")
        fr_matrix = self.calculate_similarity_matrix(
            self.results['face_recognition']['encodings'], 'face_recognition'
        )
        df_matrix = self.calculate_similarity_matrix(
            self.results['deepface']['encodings'], 'deepface'
        )

        # Find top matches
        fr_matches = self.find_top_matches(
            fr_matrix, self.results['face_recognition']['faces'], 'face_recognition'
        )
        df_matches = self.find_top_matches(
            df_matrix, self.results['deepface']['faces'], 'deepface'
        )

        # Save outputs if requested
        if save_crops and output_dir:
            self.log("Saving face crops...")
            self.save_face_crops(self.results['face_recognition']['faces'], str(output_dir), 'face_recognition')
            self.save_face_crops(self.results['deepface']['faces'], str(output_dir), 'deepface')

        if save_matrices and output_dir:
            self.log("Saving similarity matrices...")
            self.save_similarity_matrices(
                fr_matrix, df_matrix,
                self.results['face_recognition']['faces'],
                self.results['deepface']['faces'],
                str(output_dir)
            )

        # Generate and display report
        report = self.generate_report(
            self.results['face_recognition'], self.results['deepface'],
            fr_matches, df_matches, str(output_dir) if output_dir else None
        )

        print(report)

        return {
            'face_recognition': {
                'faces': self.results['face_recognition']['faces'],
                'matches': fr_matches,
                'matrix': fr_matrix
            },
            'deepface': {
                'faces': self.results['deepface']['faces'],
                'matches': df_matches,
                'matrix': df_matrix
            }
        }


def main():
    """Main CLI entry point"""
    parser = argparse.ArgumentParser(
        description="Compare face_recognition vs deepface on a folder of photos",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  python test_face_recognition.py demo_photos/
  python test_face_recognition.py demo_photos/ --save-crops --verbose
  python test_face_recognition.py demo_photos/ --save-matrices --save-crops
        """
    )

    parser.add_argument('folder', help='Path to folder containing photos to test')
    parser.add_argument('--save-crops', action='store_true',
                       help='Save face crops for manual inspection')
    parser.add_argument('--save-matrices', action='store_true',
                       help='Save similarity matrices as CSV files')
    parser.add_argument('--verbose', '-v', action='store_true',
                       help='Enable verbose logging')

    args = parser.parse_args()

    # Validate folder path
    if not os.path.exists(args.folder):
        print(f"Error: Folder not found: {args.folder}")
        sys.exit(1)

    # Check dependencies
    try:
        import face_recognition
        from deepface import DeepFace
    except ImportError as e:
        print(f"Error: Missing required dependency: {e}")
        print("Please install with: pip install face_recognition deepface")
        sys.exit(1)

    # Run test
    try:
        tester = FaceRecognitionTester(verbose=args.verbose)
        results = tester.run_test(
            args.folder,
            save_crops=args.save_crops,
            save_matrices=args.save_matrices
        )

        print("\n✅ Test completed successfully!")
        if args.save_crops or args.save_matrices:
            print(f"📁 Results saved to: {Path(args.folder).parent / 'test_results'}")

    except Exception as e:
        print(f"❌ Test failed: {e}")
        if args.verbose:
            import traceback
            traceback.print_exc()
        sys.exit(1)


if __name__ == "__main__":
    main()