punimtag/search_stats.py

#!/usr/bin/env python3
"""
Search functionality and statistics for PunimTag
"""

from typing import List, Dict, Tuple, Optional

from database import DatabaseManager


class SearchStats:
    """Handles search functionality and statistics generation"""

    def __init__(self, db_manager: DatabaseManager, verbose: int = 0):
        """Initialize search and stats manager"""
        self.db = db_manager
        self.verbose = verbose

    def search_faces(self, person_name: str) -> List[Tuple[str, str]]:
        """Search for photos containing a specific person by name (partial, case-insensitive).

        Returns a list of tuples: (person_full_name, photo_path).
        """
        # Get all people matching the name
        people = self.db.show_people_list()
        matching_people = []

        search_name = (person_name or "").strip().lower()
        if not search_name:
            return []

        for person in people:
            person_id, first_name, last_name, middle_name, maiden_name, date_of_birth, created_date = person
            full_name = f"{first_name or ''} {last_name or ''}".strip().lower()

            # Check if search term matches any part of the name
            if (
                (full_name and search_name in full_name) or
                (first_name and search_name in first_name.lower()) or
                (last_name and search_name in last_name.lower()) or
                (middle_name and search_name in middle_name.lower()) or
                (maiden_name and search_name in maiden_name.lower())
            ):
                matching_people.append(person_id)

        if not matching_people:
            return []

        # Fetch photo paths for each matching person using database helper if available
        results: List[Tuple[str, str]] = []
        try:
            with self.db.get_db_connection() as conn:
                cursor = conn.cursor()
                # faces.person_id links to photos via faces.photo_id
                placeholders = ",".join(["?"] * len(matching_people))
                cursor.execute(
                    f"""
                    SELECT DISTINCT p.path, pe.first_name, pe.last_name
                    FROM faces f
                    JOIN photos p ON p.id = f.photo_id
                    JOIN people pe ON pe.id = f.person_id
                    WHERE f.person_id IN ({placeholders})
                    ORDER BY pe.last_name, pe.first_name, p.path
                    """,
                    tuple(matching_people),
                )
                for row in cursor.fetchall():
                    if row and row[0]:
                        path = row[0]
                        first = (row[1] or "").strip()
                        last = (row[2] or "").strip()
                        full_name = (f"{first} {last}").strip() or "Unknown"
                        results.append((full_name, path))
        except Exception:
            # Fall back gracefully if schema differs
            pass

        return results

    def get_statistics(self) -> Dict:
        """Get comprehensive database statistics"""
        stats = self.db.get_statistics()

        # Add calculated statistics
        if stats['total_photos'] > 0:
            stats['processing_percentage'] = (stats['processed_photos'] / stats['total_photos']) * 100
        else:
            stats['processing_percentage'] = 0

        if stats['total_faces'] > 0:
            stats['identification_percentage'] = (stats['identified_faces'] / stats['total_faces']) * 100
        else:
            stats['identification_percentage'] = 0

        if stats['total_people'] > 0:
            stats['faces_per_person'] = stats['identified_faces'] / stats['total_people']
        else:
            stats['faces_per_person'] = 0

        if stats['total_photos'] > 0:
            stats['faces_per_photo'] = stats['total_faces'] / stats['total_photos']
        else:
            stats['faces_per_photo'] = 0

        if stats['total_photos'] > 0:
            stats['tags_per_photo'] = stats['total_photo_tags'] / stats['total_photos']
        else:
            stats['tags_per_photo'] = 0

        return stats

    def print_statistics(self):
        """Print formatted statistics to console"""
        stats = self.get_statistics()

        print("\n📊 PunimTag Database Statistics")
        print("=" * 50)

        print(f"📸 Photos:")
        print(f"   Total photos: {stats['total_photos']}")
        print(f"   Processed: {stats['processed_photos']} ({stats['processing_percentage']:.1f}%)")
        print(f"   Unprocessed: {stats['total_photos'] - stats['processed_photos']}")

        print(f"\n👤 Faces:")
        print(f"   Total faces: {stats['total_faces']}")
        print(f"   Identified: {stats['identified_faces']} ({stats['identification_percentage']:.1f}%)")
        print(f"   Unidentified: {stats['unidentified_faces']}")

        print(f"\n👥 People:")
        print(f"   Total people: {stats['total_people']}")
        print(f"   Average faces per person: {stats['faces_per_person']:.1f}")

        print(f"\n🏷️  Tags:")
        print(f"   Total tags: {stats['total_tags']}")
        print(f"   Total photo-tag links: {stats['total_photo_tags']}")
        print(f"   Average tags per photo: {stats['tags_per_photo']:.1f}")

        print(f"\n📈 Averages:")
        print(f"   Faces per photo: {stats['faces_per_photo']:.1f}")
        print(f"   Tags per photo: {stats['tags_per_photo']:.1f}")

        print("=" * 50)

    def get_photo_statistics(self) -> Dict:
        """Get detailed photo statistics"""
        stats = self.get_statistics()

        # This could be expanded with more detailed photo analysis
        return {
            'total_photos': stats['total_photos'],
            'processed_photos': stats['processed_photos'],
            'unprocessed_photos': stats['total_photos'] - stats['processed_photos'],
            'processing_percentage': stats['processing_percentage']
        }

    def get_face_statistics(self) -> Dict:
        """Get detailed face statistics"""
        stats = self.get_statistics()

        return {
            'total_faces': stats['total_faces'],
            'identified_faces': stats['identified_faces'],
            'unidentified_faces': stats['unidentified_faces'],
            'identification_percentage': stats['identification_percentage'],
            'faces_per_photo': stats['faces_per_photo']
        }

    def get_people_statistics(self) -> Dict:
        """Get detailed people statistics"""
        stats = self.get_statistics()

        return {
            'total_people': stats['total_people'],
            'faces_per_person': stats['faces_per_person']
        }

    def get_tag_statistics(self) -> Dict:
        """Get detailed tag statistics"""
        stats = self.get_statistics()

        return {
            'total_tags': stats['total_tags'],
            'total_photo_tags': stats['total_photo_tags'],
            'tags_per_photo': stats['tags_per_photo']
        }

    def search_photos_by_date(self, date_from: str = None, date_to: str = None) -> List[Tuple]:
        """Search photos by date range"""
        # This would need to be implemented in the database module
        # For now, return empty list
        return []

    def search_photos_by_tags(self, tags: List[str], match_all: bool = False) -> List[Tuple]:
        """Search photos by tags

        Args:
            tags: List of tag names to search for
            match_all: If True, photos must have ALL tags. If False, photos with ANY tag.

        Returns:
            List of tuples: (photo_path, tag_info)
        """
        if not tags:
            return []

        # Get tag IDs for the provided tag names (case-insensitive)
        tag_id_to_name, tag_name_to_id = self.db.load_tag_mappings()
        tag_ids = []

        for tag_name in tags:
            # Convert to lowercase for case-insensitive lookup
            normalized_tag_name = tag_name.lower().strip()
            if normalized_tag_name in tag_name_to_id:
                tag_ids.append(tag_name_to_id[normalized_tag_name])

        if not tag_ids:
            return []

        results = []
        try:
            with self.db.get_db_connection() as conn:
                cursor = conn.cursor()

                if match_all:
                    # Photos that have ALL specified tags
                    placeholders = ",".join(["?"] * len(tag_ids))
                    cursor.execute(f'''
                        SELECT p.path, GROUP_CONCAT(t.tag_name, ', ') as tag_names
                        FROM photos p
                        JOIN phototaglinkage ptl ON p.id = ptl.photo_id
                        JOIN tags t ON ptl.tag_id = t.id
                        WHERE ptl.tag_id IN ({placeholders})
                        GROUP BY p.id, p.path
                        HAVING COUNT(DISTINCT ptl.tag_id) = ?
                        ORDER BY p.path
                    ''', tuple(tag_ids) + (len(tag_ids),))
                else:
                    # Photos that have ANY of the specified tags
                    placeholders = ",".join(["?"] * len(tag_ids))
                    cursor.execute(f'''
                        SELECT DISTINCT p.path, GROUP_CONCAT(t.tag_name, ', ') as tag_names
                        FROM photos p
                        JOIN phototaglinkage ptl ON p.id = ptl.photo_id
                        JOIN tags t ON ptl.tag_id = t.id
                        WHERE ptl.tag_id IN ({placeholders})
                        GROUP BY p.id, p.path
                        ORDER BY p.path
                    ''', tuple(tag_ids))

                for row in cursor.fetchall():
                    if row and row[0]:
                        results.append((row[0], row[1] or ""))

        except Exception as e:
            if self.verbose > 0:
                print(f"Error searching photos by tags: {e}")

        return results

    def search_photos_by_people(self, people: List[str]) -> List[Tuple]:
        """Search photos by people"""
        # This would need to be implemented in the database module
        # For now, return empty list
        return []

    def get_most_common_tags(self, limit: int = 10) -> List[Tuple[str, int]]:
        """Get most commonly used tags"""
        # This would need to be implemented in the database module
        # For now, return empty list
        return []

    def get_most_photographed_people(self, limit: int = 10) -> List[Tuple[str, int]]:
        """Get most photographed people"""
        # This would need to be implemented in the database module
        # For now, return empty list
        return []

    def get_photos_without_faces(self) -> List[Tuple]:
        """Get photos that have no detected faces

        Returns:
            List of tuples: (photo_path, filename)
        """
        results = []
        try:
            with self.db.get_db_connection() as conn:
                cursor = conn.cursor()
                # Find photos that have no faces associated with them
                cursor.execute('''
                    SELECT p.path, p.filename
                    FROM photos p
                    LEFT JOIN faces f ON p.id = f.photo_id
                    WHERE f.photo_id IS NULL
                    ORDER BY p.filename
                ''')
                for row in cursor.fetchall():
                    if row and row[0]:
                        results.append((row[0], row[1]))
        except Exception as e:
            if self.verbose > 0:
                print(f"Error searching photos without faces: {e}")

        return results

    def get_photos_without_tags(self) -> List[Tuple]:
        """Get photos that have no tags"""
        # This would need to be implemented in the database module
        # For now, return empty list
        return []

    def get_duplicate_faces(self, tolerance: float = 0.6) -> List[Dict]:
        """Get potential duplicate faces (same person, different photos)"""
        # This would need to be implemented using face matching
        # For now, return empty list
        return []

    def get_face_quality_distribution(self) -> Dict:
        """Get distribution of face quality scores"""
        # This would need to be implemented in the database module
        # For now, return empty dict
        return {}

    def get_processing_timeline(self) -> List[Tuple[str, int]]:
        """Get timeline of photo processing (photos processed per day)"""
        # This would need to be implemented in the database module
        # For now, return empty list
        return []

    def export_statistics(self, filename: str = "punimtag_stats.json"):
        """Export statistics to a JSON file"""
        import json

        stats = self.get_statistics()

        try:
            with open(filename, 'w') as f:
                json.dump(stats, f, indent=2)
            print(f"✅ Statistics exported to {filename}")
        except Exception as e:
            print(f"❌ Error exporting statistics: {e}")

    def generate_report(self) -> str:
        """Generate a text report of statistics"""
        stats = self.get_statistics()

        report = f"""
PunimTag Database Report
Generated: {__import__('datetime').datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

PHOTO STATISTICS:
- Total photos: {stats['total_photos']}
- Processed: {stats['processed_photos']} ({stats['processing_percentage']:.1f}%)
- Unprocessed: {stats['total_photos'] - stats['processed_photos']}

FACE STATISTICS:
- Total faces: {stats['total_faces']}
- Identified: {stats['identified_faces']} ({stats['identification_percentage']:.1f}%)
- Unidentified: {stats['unidentified_faces']}
- Average faces per photo: {stats['faces_per_photo']:.1f}

PEOPLE STATISTICS:
- Total people: {stats['total_people']}
- Average faces per person: {stats['faces_per_person']:.1f}

TAG STATISTICS:
- Total tags: {stats['total_tags']}
- Total photo-tag links: {stats['total_photo_tags']}
- Average tags per photo: {stats['tags_per_photo']:.1f}
"""

        return report