diff --git a/.cursor/rules/api-guidelines.md b/.cursor/rules/api-guidelines.md deleted file mode 100644 index dcebf78..0000000 --- a/.cursor/rules/api-guidelines.md +++ /dev/null @@ -1,27 +0,0 @@ -# API Development Guidelines - -## Response Format - -Always use this JSON structure: - -```json -{ - "success": true, - "data": { - /* response data */ - }, - "message": "Optional message" -} -``` - -## Error Handling - -- Use proper HTTP status codes -- Include descriptive error messages -- Log errors for debugging - -## Database Operations - -- Always use parameterized queries -- Handle connection management properly -- Implement rollback on errors diff --git a/.cursor/rules/database-operations.md b/.cursor/rules/database-operations.md deleted file mode 100644 index c723bf6..0000000 --- a/.cursor/rules/database-operations.md +++ /dev/null @@ -1,68 +0,0 @@ -# Database Operations Guidelines - -## Connection Management - -Always use proper connection management with error handling: - -```python -def get_db_connection(): - conn = sqlite3.connect('punimtag_simple.db') - conn.row_factory = sqlite3.Row # Enable dict-like access - return conn - -# Usage in endpoint -try: - conn = get_db_connection() - cursor = conn.cursor() - # Database operations - conn.commit() -except Exception as e: - conn.rollback() - return jsonify({'success': False, 'error': str(e)}), 500 -finally: - conn.close() -``` - -## Parameterized Queries - -Always use parameterized queries to prevent SQL injection: - -```python -# Correct - Use parameterized queries -cursor.execute('SELECT * FROM images WHERE id = ?', (image_id,)) -cursor.execute('INSERT INTO photos (name, path) VALUES (?, ?)', (name, path)) - -# Wrong - String concatenation (vulnerable to SQL injection) -cursor.execute(f'SELECT * FROM images WHERE id = {image_id}') -``` - -## Database Schema - -Follow the established schema: - -```sql --- Core tables -images (id, filename, path, date_taken, metadata) -faces (id, image_id, person_id, encoding, coordinates, confidence) -people (id, name, created_date) -tags (id, name) -image_tags (image_id, tag_id) - --- Supporting tables -face_encodings (id, face_id, encoding_data) -photo_metadata (image_id, exif_data, gps_data) -``` - -## Query Optimization - -- Use indexes on frequently queried columns -- Minimize N+1 query problems -- Use LIMIT and OFFSET for pagination -- Consider query performance for large datasets - -## Data Validation - -- Validate data before database operations -- Check for required fields -- Handle data type conversions properly -- Implement proper error messages diff --git a/.cursor/rules/face-recognition.md b/.cursor/rules/face-recognition.md deleted file mode 100644 index 2902d70..0000000 --- a/.cursor/rules/face-recognition.md +++ /dev/null @@ -1,130 +0,0 @@ -# Face Recognition Guidelines - -## Technology Stack - -- **dlib**: Primary face detection and recognition library -- **Pillow (PIL)**: Image processing and manipulation -- **NumPy**: Numerical operations for face encodings -- **OpenCV**: Optional for additional image processing - -## Face Detection Pipeline - -Follow this standardized pipeline: - -```python -import dlib -import numpy as np -from PIL import Image - -def detect_faces_in_image(image_path: str) -> List[Dict]: - """ - Detect faces in an image using dlib. - - Args: - image_path: Path to the image file - - Returns: - List of face dictionaries with coordinates and encodings - """ - # Load image - image = dlib.load_rgb_image(image_path) - - # Initialize face detector - detector = dlib.get_frontal_face_detector() - - # Detect faces - faces = detector(image) - - # Get face encodings - predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat") - face_recognizer = dlib.face_recognition_model_v1("dlib_face_recognition_resnet_model_v1.dat") - - face_data = [] - for face in faces: - # Get facial landmarks - shape = predictor(image, face) - - # Get face encoding - face_encoding = np.array(face_recognizer.compute_face_descriptor(image, shape)) - - face_data.append({ - 'left': face.left(), - 'top': face.top(), - 'right': face.right(), - 'bottom': face.bottom(), - 'encoding': face_encoding.tolist(), - 'confidence': calculate_confidence(face_encoding) - }) - - return face_data -``` - -## Face Recognition and Matching - -Use standardized similarity matching: - -```python -def calculate_face_similarity(encoding1: List[float], encoding2: List[float]) -> float: - """ - Calculate similarity between two face encodings using Euclidean distance. - - Args: - encoding1: First face encoding - encoding2: Second face encoding - - Returns: - Similarity score (0 = identical, higher = more different) - """ - return np.linalg.norm(np.array(encoding1) - np.array(encoding2)) - -def find_similar_faces(target_encoding: List[float], - face_encodings: List[List[float]], - threshold: float = 0.6) -> List[int]: - """ - Find faces similar to the target encoding. - - Args: - target_encoding: Encoding to match against - face_encodings: List of encodings to search - threshold: Similarity threshold (lower = more strict) - - Returns: - List of indices of similar faces - """ - similar_faces = [] - - for i, encoding in enumerate(face_encodings): - similarity = calculate_face_similarity(target_encoding, encoding) - if similarity <= threshold: - similar_faces.append(i) - - return similar_faces -``` - -## Image Processing Best Practices - -- **Thumbnail Generation**: Create thumbnails for UI display -- **Memory Management**: Process large images in chunks -- **Format Support**: Handle multiple image formats (JPG, PNG, etc.) -- **Error Handling**: Gracefully handle corrupted images - -## Performance Optimization - -- **Batch Processing**: Process multiple images efficiently -- **Caching**: Cache face encodings to avoid recomputation -- **GPU Acceleration**: Use CUDA when available for dlib -- **Parallel Processing**: Use multiprocessing for large datasets - -## Quality Control - -- **Confidence Scoring**: Implement confidence thresholds -- **False Positive Detection**: Filter out non-face detections -- **Face Quality Assessment**: Evaluate face image quality -- **Duplicate Detection**: Identify and handle duplicate faces - -## Storage and Retrieval - -- **Encoding Storage**: Store face encodings efficiently in database -- **Indexing**: Use appropriate database indexes for fast retrieval -- **Compression**: Consider compression for large encoding datasets -- **Backup**: Regular backup of face recognition data diff --git a/.cursor/rules/javascript-conventions.md b/.cursor/rules/javascript-conventions.md deleted file mode 100644 index 7049bb2..0000000 --- a/.cursor/rules/javascript-conventions.md +++ /dev/null @@ -1,132 +0,0 @@ -# JavaScript Conventions - -## Code Style - -Use ES6+ features and modern JavaScript practices: - -```javascript -// Use ES6+ features -const API_BASE_URL = "/api"; -const DEFAULT_PAGE_SIZE = 20; - -// Async/await for API calls -async function fetchPhotos(page = 1, perPage = DEFAULT_PAGE_SIZE) { - try { - const response = await fetch( - `${API_BASE_URL}/photos?page=${page}&per_page=${perPage}` - ); - - if (!response.ok) { - throw new Error(`HTTP error! status: ${response.status}`); - } - - const data = await response.json(); - return data; - } catch (error) { - console.error("Error fetching photos:", error); - throw error; - } -} -``` - -## Event Handlers - -Use descriptive function names and proper event handling: - -```javascript -// Event handlers -function handlePhotoClick(photoId) { - showPhotoDetails(photoId); -} - -function handleFaceIdentification(faceId, personName) { - identifyFace(faceId, personName); -} -``` - -## DOM Manipulation - -Use efficient DOM manipulation patterns: - -```javascript -// DOM manipulation -function updatePhotoGrid(photos) { - const grid = document.getElementById("photo-grid"); - grid.innerHTML = ""; - - photos.forEach((photo) => { - const photoElement = createPhotoElement(photo); - grid.appendChild(photoElement); - }); -} - -function createPhotoElement(photo) { - const element = document.createElement("div"); - element.className = "photo-card"; - element.innerHTML = ` - ${photo.filename} -
-

${photo.filename}

-

${photo.date_taken}

-
- `; - return element; -} -``` - -## Error Handling - -Implement comprehensive error handling: - -```javascript -// Global error handler -window.addEventListener("error", (event) => { - console.error("Global error:", event.error); - showErrorMessage("An unexpected error occurred"); -}); - -// API error handling -async function safeApiCall(apiFunction, ...args) { - try { - return await apiFunction(...args); - } catch (error) { - console.error("API call failed:", error); - showErrorMessage("Failed to load data. Please try again."); - return null; - } -} -``` - -## Progressive Loading - -Implement progressive loading for better UX: - -```javascript -// Progressive loading with Intersection Observer -const observer = new IntersectionObserver((entries) => { - entries.forEach((entry) => { - if (entry.isIntersecting) { - loadMorePhotos(); - } - }); -}); - -// Debouncing for search -function debounce(func, wait) { - let timeout; - return function executedFunction(...args) { - const later = () => { - clearTimeout(timeout); - func(...args); - }; - clearTimeout(timeout); - timeout = setTimeout(later, wait); - }; -} -``` - -## Constants and Configuration - -- Define constants at the top of files -- Use meaningful names -- Group related constants together diff --git a/.cursor/rules/performance-optimization.md b/.cursor/rules/performance-optimization.md deleted file mode 100644 index 8736925..0000000 --- a/.cursor/rules/performance-optimization.md +++ /dev/null @@ -1,433 +0,0 @@ -# Performance Optimization Guidelines - -## Image Processing Optimization - -### Thumbnail Generation - -Implement efficient thumbnail generation with caching: - -```python -import os -from PIL import Image -from functools import lru_cache - -THUMBNAIL_SIZE = (200, 200) -THUMBNAIL_CACHE_DIR = 'thumbnails' - -@lru_cache(maxsize=1000) -def generate_thumbnail(image_path: str, size: tuple = THUMBNAIL_SIZE) -> str: - """ - Generate thumbnail with caching. - - Args: - image_path: Path to original image - size: Thumbnail size (width, height) - - Returns: - Path to generated thumbnail - """ - # Create cache directory if it doesn't exist - os.makedirs(THUMBNAIL_CACHE_DIR, exist_ok=True) - - # Generate cache key - cache_key = f"{hash(image_path)}_{size[0]}x{size[1]}.jpg" - cache_path = os.path.join(THUMBNAIL_CACHE_DIR, cache_key) - - # Return cached thumbnail if it exists - if os.path.exists(cache_path): - return cache_path - - # Generate new thumbnail - with Image.open(image_path) as img: - img.thumbnail(size, Image.Resampling.LANCZOS) - img.save(cache_path, 'JPEG', quality=85, optimize=True) - - return cache_path -``` - -### Progressive Loading - -Implement progressive loading for large photo collections: - -```python -def get_photos_paginated(page: int = 1, per_page: int = 20) -> Dict[str, any]: - """ - Get photos with pagination for performance. - - Args: - page: Page number (1-based) - per_page: Number of photos per page - - Returns: - Dictionary with photos and pagination info - """ - offset = (page - 1) * per_page - - conn = get_db_connection() - cursor = conn.cursor() - - # Get total count - cursor.execute('SELECT COUNT(*) FROM images') - total = cursor.fetchone()[0] - - # Get paginated results - cursor.execute(''' - SELECT id, filename, path, date_taken - FROM images - ORDER BY date_taken DESC - LIMIT ? OFFSET ? - ''', (per_page, offset)) - - photos = [dict(row) for row in cursor.fetchall()] - conn.close() - - return { - 'photos': photos, - 'pagination': { - 'page': page, - 'per_page': per_page, - 'total': total, - 'pages': (total + per_page - 1) // per_page - } - } -``` - -## Database Optimization - -### Indexing Strategy - -Create appropriate indexes for frequently queried columns: - -```sql --- Indexes for performance -CREATE INDEX IF NOT EXISTS idx_images_date_taken ON images(date_taken); -CREATE INDEX IF NOT EXISTS idx_faces_image_id ON faces(image_id); -CREATE INDEX IF NOT EXISTS idx_faces_person_id ON faces(person_id); -CREATE INDEX IF NOT EXISTS idx_image_tags_image_id ON image_tags(image_id); -CREATE INDEX IF NOT EXISTS idx_image_tags_tag_id ON image_tags(tag_id); -``` - -### Query Optimization - -Optimize database queries for performance: - -```python -def get_photos_with_faces_optimized(page: int = 1, per_page: int = 20) -> Dict[str, any]: - """ - Optimized query to get photos with face counts. - - Args: - page: Page number - per_page: Photos per page - - Returns: - Photos with face counts - """ - offset = (page - 1) * per_page - - conn = get_db_connection() - cursor = conn.cursor() - - # Single query with JOIN instead of N+1 queries - cursor.execute(''' - SELECT - i.id, - i.filename, - i.path, - i.date_taken, - COUNT(f.id) as face_count - FROM images i - LEFT JOIN faces f ON i.id = f.image_id - GROUP BY i.id, i.filename, i.path, i.date_taken - ORDER BY i.date_taken DESC - LIMIT ? OFFSET ? - ''', (per_page, offset)) - - photos = [dict(row) for row in cursor.fetchall()] - conn.close() - - return {'photos': photos} -``` - -### Connection Pooling - -Implement connection pooling for better performance: - -```python -import sqlite3 -from contextlib import contextmanager -from threading import local - -_thread_local = local() - -def get_db_connection(): - """Get database connection with thread-local storage.""" - if not hasattr(_thread_local, 'connection'): - _thread_local.connection = sqlite3.connect('punimtag_simple.db') - _thread_local.connection.row_factory = sqlite3.Row - - return _thread_local.connection - -@contextmanager -def db_transaction(): - """Context manager for database transactions.""" - conn = get_db_connection() - try: - yield conn - conn.commit() - except Exception: - conn.rollback() - raise -``` - -## Frontend Performance - -### Lazy Loading - -Implement lazy loading for images: - -```javascript -// Lazy loading with Intersection Observer -function setupLazyLoading() { - const imageObserver = new IntersectionObserver((entries, observer) => { - entries.forEach((entry) => { - if (entry.isIntersecting) { - const img = entry.target; - img.src = img.dataset.src; - img.classList.remove("lazy"); - observer.unobserve(img); - } - }); - }); - - // Observe all lazy images - document.querySelectorAll("img[data-src]").forEach((img) => { - imageObserver.observe(img); - }); -} - -// Progressive loading for photo grid -function loadMorePhotos() { - const currentPage = - parseInt(document.getElementById("photo-grid").dataset.page) || 1; - - fetch(`/api/photos?page=${currentPage + 1}&per_page=20`) - .then((response) => response.json()) - .then((data) => { - if (data.success && data.data.photos.length > 0) { - appendPhotosToGrid(data.data.photos); - document.getElementById("photo-grid").dataset.page = currentPage + 1; - } - }); -} -``` - -### Debouncing and Throttling - -Implement debouncing for search and filtering: - -```javascript -// Debounced search function -function debounce(func, wait) { - let timeout; - return function executedFunction(...args) { - const later = () => { - clearTimeout(timeout); - func(...args); - }; - clearTimeout(timeout); - timeout = setTimeout(later, wait); - }; -} - -// Debounced search -const debouncedSearch = debounce((searchTerm) => { - fetch(`/api/photos?search=${encodeURIComponent(searchTerm)}`) - .then((response) => response.json()) - .then((data) => { - if (data.success) { - updatePhotoGrid(data.data.photos); - } - }); -}, 300); - -// Throttled scroll handler -function throttle(func, limit) { - let inThrottle; - return function () { - const args = arguments; - const context = this; - if (!inThrottle) { - func.apply(context, args); - inThrottle = true; - setTimeout(() => (inThrottle = false), limit); - } - }; -} -``` - -## Memory Management - -### Image Processing Memory - -Optimize memory usage for large images: - -```python -def process_large_image_safely(image_path: str) -> Dict[str, any]: - """ - Process large image with memory management. - - Args: - image_path: Path to image file - - Returns: - Processing results - """ - try: - with Image.open(image_path) as img: - # Convert to RGB if necessary - if img.mode != 'RGB': - img = img.convert('RGB') - - # Process in chunks for very large images - if img.size[0] * img.size[1] > 10000000: # 10MP threshold - return process_large_image_in_chunks(img) - else: - return process_image_normal(img) - - except Exception as e: - logger.error(f"Error processing image {image_path}: {e}") - return {'error': str(e)} -``` - -### Database Memory - -Optimize database memory usage: - -```python -def get_faces_with_encodings_optimized(limit: int = 100) -> List[Dict]: - """ - Get faces with encodings using memory-efficient approach. - - Args: - limit: Maximum number of faces to retrieve - - Returns: - List of face data - """ - conn = get_db_connection() - cursor = conn.cursor() - - # Use generator to avoid loading all data into memory - cursor.execute(''' - SELECT id, image_id, person_id, encoding, coordinates - FROM faces - LIMIT ? - ''', (limit,)) - - faces = [] - for row in cursor: - face_data = dict(row) - # Convert encoding string back to list if needed - if isinstance(face_data['encoding'], str): - face_data['encoding'] = json.loads(face_data['encoding']) - faces.append(face_data) - - conn.close() - return faces -``` - -## Caching Strategies - -### Application-Level Caching - -Implement caching for frequently accessed data: - -```python -from functools import lru_cache -import time - -# Cache for expensive operations -@lru_cache(maxsize=100) -def get_person_photos_cached(person_id: int) -> List[Dict]: - """Get photos for a person with caching.""" - return get_person_photos(person_id) - -# Time-based cache -class TimedCache: - def __init__(self, ttl_seconds: int = 300): - self.cache = {} - self.ttl = ttl_seconds - - def get(self, key: str): - if key in self.cache: - value, timestamp = self.cache[key] - if time.time() - timestamp < self.ttl: - return value - else: - del self.cache[key] - return None - - def set(self, key: str, value: any): - self.cache[key] = (value, time.time()) - -# Global cache instance -photo_cache = TimedCache(ttl_seconds=300) -``` - -## Performance Monitoring - -### Metrics Collection - -Implement performance monitoring: - -```python -import time -from functools import wraps - -def measure_performance(func): - """Decorator to measure function performance.""" - @wraps(func) - def wrapper(*args, **kwargs): - start_time = time.time() - result = func(*args, **kwargs) - end_time = time.time() - - logger.info(f"{func.__name__} took {end_time - start_time:.3f} seconds") - return result - return wrapper - -# Usage -@measure_performance -def process_photo_batch(photo_paths: List[str]) -> List[Dict]: - """Process a batch of photos with performance monitoring.""" - results = [] - for path in photo_paths: - result = process_single_photo(path) - results.append(result) - return results -``` - -## Best Practices Summary - -### Backend Performance - -- **Database Indexing**: Create indexes on frequently queried columns -- **Query Optimization**: Use JOINs instead of N+1 queries -- **Connection Management**: Implement connection pooling -- **Caching**: Cache expensive operations -- **Batch Processing**: Process data in batches - -### Frontend Performance - -- **Lazy Loading**: Load images and data on demand -- **Debouncing**: Prevent excessive API calls -- **Progressive Loading**: Load data in chunks -- **Image Optimization**: Use appropriate image formats and sizes - -### Memory Management - -- **Resource Cleanup**: Properly close files and connections -- **Memory Monitoring**: Monitor memory usage -- **Efficient Data Structures**: Use appropriate data structures -- **Garbage Collection**: Help garbage collector with proper cleanup diff --git a/.cursor/rules/python-conventions.md b/.cursor/rules/python-conventions.md deleted file mode 100644 index 4ddbc7b..0000000 --- a/.cursor/rules/python-conventions.md +++ /dev/null @@ -1,74 +0,0 @@ -# Python Coding Conventions - -## Code Style (PEP 8) - -- Use snake_case for variables and functions -- Use PascalCase for classes -- Use UPPER_CASE for constants -- Follow PEP 8 formatting guidelines - -## Type Hints - -Always use type hints for function parameters and return values: - -```python -from typing import List, Dict, Optional, Union, Tuple - -def get_photos( - user_id: int, - page: int = 1, - per_page: int = DEFAULT_PAGE_SIZE, - filters: Optional[Dict[str, any]] = None -) -> Dict[str, Union[List[Dict], int]]: - """Get photos with pagination and filtering.""" - pass -``` - -## Error Handling - -Use comprehensive error handling with logging: - -```python -import logging -from typing import Optional - -logger = logging.getLogger(__name__) - -def safe_operation(func): - """Decorator for safe operation execution.""" - def wrapper(*args, **kwargs): - try: - return func(*args, **kwargs) - except Exception as e: - logger.error(f"Error in {func.__name__}: {e}") - return None - return wrapper -``` - -## Function Documentation - -Use detailed docstrings with Args, Returns, and Raises sections: - -```python -def process_image(image_path: str, max_size: int = MAX_FILE_SIZE) -> Dict[str, any]: - """ - Process an image file and extract metadata. - - Args: - image_path: Path to the image file - max_size: Maximum file size in bytes - - Returns: - Dictionary containing image metadata - - Raises: - FileNotFoundError: If image file doesn't exist - ValueError: If file size exceeds limit - """ -``` - -## Constants and Configuration - -- Define constants at module level -- Use meaningful names with UPPER_CASE -- Group related constants together diff --git a/.cursor/rules/security-privacy.md b/.cursor/rules/security-privacy.md deleted file mode 100644 index 1a01a04..0000000 --- a/.cursor/rules/security-privacy.md +++ /dev/null @@ -1,280 +0,0 @@ -# Security and Privacy Guidelines - -## Data Protection Principles - -### Local Storage Only - -- **No Cloud Dependencies**: All data stays on user's local machine -- **No External APIs**: Face recognition runs locally using dlib -- **No Data Sharing**: User data is never transmitted to external services - -### Input Validation - -Always validate and sanitize user inputs: - -```python -import os -import re -from pathlib import Path - -def validate_image_path(image_path: str) -> bool: - """ - Validate image file path for security. - - Args: - image_path: Path to validate - - Returns: - True if path is valid and secure - """ - # Check for path traversal attempts - if '..' in image_path or '//' in image_path: - return False - - # Ensure path is within allowed directory - allowed_dir = Path('/photos') - try: - resolved_path = Path(image_path).resolve() - return allowed_dir in resolved_path.parents - except (ValueError, RuntimeError): - return False - -def sanitize_filename(filename: str) -> str: - """ - Sanitize filename to prevent security issues. - - Args: - filename: Original filename - - Returns: - Sanitized filename - """ - # Remove dangerous characters - filename = re.sub(r'[<>:"/\\|?*]', '_', filename) - - # Limit length - if len(filename) > 255: - name, ext = os.path.splitext(filename) - filename = name[:255-len(ext)] + ext - - return filename -``` - -## File Upload Security - -Implement secure file upload handling: - -```python -ALLOWED_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif', '.bmp'} -MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB - -def validate_uploaded_file(file) -> Dict[str, any]: - """ - Validate uploaded file for security. - - Args: - file: Uploaded file object - - Returns: - Validation result with success status and message - """ - # Check file extension - if not file.filename: - return {'success': False, 'error': 'No filename provided'} - - file_ext = os.path.splitext(file.filename)[1].lower() - if file_ext not in ALLOWED_EXTENSIONS: - return {'success': False, 'error': f'File type {file_ext} not allowed'} - - # Check file size - file.seek(0, os.SEEK_END) - file_size = file.tell() - file.seek(0) - - if file_size > MAX_FILE_SIZE: - return {'success': False, 'error': 'File too large'} - - # Validate file content (basic check) - try: - from PIL import Image - image = Image.open(file) - image.verify() - file.seek(0) - except Exception: - return {'success': False, 'error': 'Invalid image file'} - - return {'success': True, 'message': 'File validated successfully'} -``` - -## SQL Injection Prevention - -Always use parameterized queries: - -```python -# Correct - Use parameterized queries -def get_photo_by_id(photo_id: int) -> Optional[Dict]: - conn = get_db_connection() - cursor = conn.cursor() - - cursor.execute('SELECT * FROM images WHERE id = ?', (photo_id,)) - result = cursor.fetchone() - - conn.close() - return dict(result) if result else None - -# Wrong - String concatenation (vulnerable to SQL injection) -def get_photo_by_id_unsafe(photo_id: int) -> Optional[Dict]: - conn = get_db_connection() - cursor = conn.cursor() - - cursor.execute(f'SELECT * FROM images WHERE id = {photo_id}') # DANGEROUS! - result = cursor.fetchone() - - conn.close() - return dict(result) if result else None -``` - -## Privacy Protection - -### Face Data Privacy - -- **Local Storage**: Face encodings stored locally only -- **No Sharing**: Face data never transmitted externally -- **User Control**: Users can delete their face data -- **Encryption**: Consider encrypting sensitive face data - -### Metadata Handling - -- **EXIF Data**: Strip sensitive metadata (GPS, camera info) -- **User Consent**: Ask before storing location data -- **Data Minimization**: Only store necessary metadata - -### Access Control - -```python -def check_file_access_permissions(file_path: str, user_id: int) -> bool: - """ - Check if user has permission to access file. - - Args: - file_path: Path to file - user_id: User ID requesting access - - Returns: - True if access is allowed - """ - # In single-user system, all files belong to the user - # In multi-user system, implement proper access control - return True # Simplified for single-user system -``` - -## Error Handling and Logging - -Implement secure error handling: - -```python -import logging -from typing import Optional - -logger = logging.getLogger(__name__) - -def safe_operation(func): - """Decorator for safe operation execution.""" - def wrapper(*args, **kwargs): - try: - return func(*args, **kwargs) - except Exception as e: - # Log error without exposing sensitive information - logger.error(f"Error in {func.__name__}: {type(e).__name__}") - return None - return wrapper - -def handle_api_error(error: Exception) -> Dict[str, any]: - """ - Handle API errors securely. - - Args: - error: Exception that occurred - - Returns: - Safe error response - """ - # Don't expose internal error details to users - if isinstance(error, ValueError): - return {'success': False, 'error': 'Invalid input provided'} - elif isinstance(error, FileNotFoundError): - return {'success': False, 'error': 'File not found'} - else: - logger.error(f"Unexpected error: {error}") - return {'success': False, 'error': 'An unexpected error occurred'} -``` - -## Data Export and Deletion - -Provide user control over their data: - -```python -def export_user_data(user_id: int) -> Dict[str, any]: - """ - Export user data for portability. - - Args: - user_id: User ID to export data for - - Returns: - Dictionary containing user's data - """ - conn = get_db_connection() - cursor = conn.cursor() - - # Export photos - cursor.execute('SELECT * FROM images WHERE user_id = ?', (user_id,)) - photos = [dict(row) for row in cursor.fetchall()] - - # Export face data - cursor.execute('SELECT * FROM faces WHERE user_id = ?', (user_id,)) - faces = [dict(row) for row in cursor.fetchall()] - - conn.close() - - return { - 'photos': photos, - 'faces': faces, - 'export_date': datetime.now().isoformat() - } - -def delete_user_data(user_id: int) -> bool: - """ - Delete all user data. - - Args: - user_id: User ID to delete data for - - Returns: - True if deletion successful - """ - try: - conn = get_db_connection() - cursor = conn.cursor() - - # Delete user's data - cursor.execute('DELETE FROM faces WHERE user_id = ?', (user_id,)) - cursor.execute('DELETE FROM images WHERE user_id = ?', (user_id,)) - - conn.commit() - conn.close() - - return True - except Exception as e: - logger.error(f"Error deleting user data: {e}") - return False -``` - -## Security Best Practices - -- **Regular Updates**: Keep dependencies updated -- **Input Validation**: Validate all user inputs -- **Error Handling**: Don't expose sensitive information in errors -- **Logging**: Log security-relevant events -- **Backup Security**: Secure backup of user data -- **Access Control**: Implement proper access controls diff --git a/.cursor/rules/testing-standards.md b/.cursor/rules/testing-standards.md deleted file mode 100644 index cbeef84..0000000 --- a/.cursor/rules/testing-standards.md +++ /dev/null @@ -1,169 +0,0 @@ -# Testing Standards - -## Test Organization - -Follow this directory structure: - -``` -tests/ -├── unit/ # Unit tests for individual functions -├── integration/ # Integration tests for API endpoints -├── e2e/ # End-to-end tests for complete workflows -├── fixtures/ # Test data and fixtures -├── utils/ # Test utilities and helpers -└── conftest.py # pytest configuration and shared fixtures -``` - -## Unit Tests - -Test individual functions and classes in isolation: - -```python -# tests/unit/test_face_recognition.py -import pytest -from src.utils.face_recognition import detect_faces, encode_face - -def test_detect_faces_with_valid_image(): - """Test face detection with a valid image.""" - image_path = "tests/fixtures/valid_face.jpg" - faces = detect_faces(image_path) - - assert len(faces) > 0 - assert all(hasattr(face, 'left') for face in faces) - assert all(hasattr(face, 'top') for face in faces) - -def test_detect_faces_with_no_faces(): - """Test face detection with an image containing no faces.""" - image_path = "tests/fixtures/no_faces.jpg" - faces = detect_faces(image_path) - - assert len(faces) == 0 - -def test_encode_face_with_valid_face(): - """Test face encoding with a valid face.""" - face_image = load_test_face_image() - encoding = encode_face(face_image) - - assert len(encoding) == 128 - assert all(isinstance(x, float) for x in encoding) -``` - -## Integration Tests - -Test API endpoints and database interactions: - -```python -# tests/integration/test_photo_api.py -import pytest -from src.app import app - -@pytest.fixture -def client(): - """Create a test client.""" - app.config['TESTING'] = True - app.config['DATABASE'] = 'test.db' - - with app.test_client() as client: - yield client - -def test_get_photos_endpoint(client): - """Test the GET /photos endpoint.""" - response = client.get('/photos') - - assert response.status_code == 200 - data = response.get_json() - assert data['success'] == True - assert 'photos' in data - -def test_create_photo_endpoint(client): - """Test the POST /photos endpoint.""" - photo_data = { - 'filename': 'test.jpg', - 'path': '/test/path/test.jpg' - } - - response = client.post('/photos', json=photo_data) - - assert response.status_code == 201 - data = response.get_json() - assert data['success'] == True - assert 'photo_id' in data -``` - -## Test Fixtures - -Use fixtures for common test data: - -```python -# tests/conftest.py -import pytest -import sqlite3 -import tempfile -import os - -@pytest.fixture -def test_db(): - """Create a temporary test database.""" - db_fd, db_path = tempfile.mkstemp() - - # Create test database schema - conn = sqlite3.connect(db_path) - cursor = conn.cursor() - - cursor.execute(''' - CREATE TABLE images ( - id INTEGER PRIMARY KEY, - filename TEXT NOT NULL, - path TEXT NOT NULL, - date_taken TEXT - ) - ''') - - conn.commit() - conn.close() - - yield db_path - - # Cleanup - os.close(db_fd) - os.unlink(db_path) - -@pytest.fixture -def sample_photo_data(): - """Sample photo data for testing.""" - return { - 'filename': 'test_photo.jpg', - 'path': '/test/path/test_photo.jpg', - 'date_taken': '2024-01-01 12:00:00' - } -``` - -## Test Naming Conventions - -- **Unit Tests**: `test__.py` -- **Integration Tests**: `test__integration.py` -- **E2E Tests**: `test__e2e.py` - -## Test Coverage - -- Aim for at least 80% code coverage -- Test both success and error scenarios -- Test edge cases and boundary conditions -- Mock external dependencies - -## Performance Testing - -Test with realistic data sizes: - -```python -def test_large_photo_collection_performance(): - """Test performance with large photo collections.""" - large_photo_list = generate_test_photos(1000) - - start_time = time.time() - result = process_photos(large_photo_list) - end_time = time.time() - - assert end_time - start_time < 5.0 # Should complete within 5 seconds - assert len(result) == 1000 -``` diff --git a/.cursorrules b/.cursorrules deleted file mode 100644 index 8d1f2f7..0000000 --- a/.cursorrules +++ /dev/null @@ -1,495 +0,0 @@ -# PunimTag - Intelligent Photo Management System - -## Project Overview - -PunimTag is an intelligent photo management system that uses face recognition to automatically organize, tag, and manage personal photo collections. It's built with Flask (Python) and vanilla JavaScript, focusing on privacy-first local processing. - -## Core Value Proposition - -- **Automatic Face Recognition**: Identify and tag people in photos without manual effort -- **Smart Organization**: Group photos by people, events, and locations -- **Duplicate Detection**: Find and manage duplicate photos automatically -- **Intuitive Interface**: Web-based GUI that's easy to use for non-technical users -- **Privacy-First**: Local processing, no cloud dependencies - -## Technology Stack - -### Backend -- **Framework**: Flask (Python web framework) -- **Database**: SQLite (lightweight, file-based) -- **Face Recognition**: dlib (C++ library with Python bindings) -- **Image Processing**: Pillow (PIL fork) -- **Data Processing**: NumPy (numerical operations) - -### Frontend -- **Language**: Vanilla JavaScript (ES6+) -- **Styling**: CSS3 with Grid/Flexbox -- **HTTP Client**: Fetch API -- **Progressive Loading**: Intersection Observer API -- **No Frameworks**: Pure JavaScript for simplicity - -## Project Structure - -``` -PunimTag/ -├── src/ # Main application source code -│ ├── backend/ # Flask backend and API -│ │ ├── app.py # Main Flask application -│ │ ├── db_manager.py # Database operations -│ │ └── visual_identifier.py # Face recognition -│ ├── frontend/ # JavaScript and UI components -│ └── utils/ # Utility functions -│ └── tag_manager.py # Tag management -├── docs/ # Documentation and steering documents -├── tests/ # Test files -├── data/ # Database files and user data -├── config/ # Configuration files -├── scripts/ # Utility scripts -├── assets/ # Static assets -├── photos/ # User photo storage -└── main.py # Application entry point -``` - -## Key Features - -### 1. Photo Management -- Upload and organize photos by date, location, and content -- Automatic metadata extraction (EXIF data, GPS coordinates) -- Batch operations for efficiency - -### 2. Face Recognition & Tagging -- Automatic face detection in photos -- Face identification and naming -- Group photos by people -- Handle multiple faces per photo - -### 3. Duplicate Management -- Find duplicate photos automatically -- Visual comparison tools -- Bulk removal options -- Keep best quality versions - -### 4. Search & Discovery -- Search by person name -- Filter by date ranges -- Tag-based filtering -- Similar face suggestions - -### 5. User Experience -- Progressive loading for large collections -- Responsive web interface -- Custom dialogs (no browser alerts) -- Real-time notifications - -## Database Schema - -```sql --- Core tables -images (id, filename, path, date_taken, metadata) -faces (id, image_id, person_id, encoding, coordinates, confidence) -people (id, name, created_date) -tags (id, name) -image_tags (image_id, tag_id) - --- Supporting tables -face_encodings (id, face_id, encoding_data) -photo_metadata (image_id, exif_data, gps_data) -``` - -## API Standards - -### Response Format - -**Success Response:** -```json -{ - "success": true, - "data": { - // Response data here - }, - "message": "Optional success message" -} -``` - -**Error Response:** -```json -{ - "success": false, - "error": "Descriptive error message", - "code": "ERROR_CODE_OPTIONAL" -} -``` - -**Paginated Response:** -```json -{ - "success": true, - "data": { - "items": [...], - "pagination": { - "page": 1, - "per_page": 20, - "total": 150, - "pages": 8 - } - } -} -``` - -### HTTP Status Codes -- **200 OK**: Request successful -- **201 Created**: Resource created successfully -- **400 Bad Request**: Invalid request data -- **404 Not Found**: Resource not found -- **500 Internal Server Error**: Server error - -### Endpoint Naming Conventions -- **GET /photos**: List photos -- **GET /photos/{id}**: Get specific photo -- **POST /photos**: Create new photo -- **PUT /photos/{id}**: Update photo -- **DELETE /photos/{id}**: Delete photo -- **POST /photos/{id}/identify**: Identify faces in photo - -## Python Code Conventions - -### Code Style (PEP 8) -```python -# Imports -import os -import sys -from typing import List, Dict, Optional -from flask import Flask, request, jsonify - -# Constants -MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB -ALLOWED_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif'} - -# Functions -def process_image(image_path: str, max_size: int = MAX_FILE_SIZE) -> Dict[str, any]: - """ - Process an image file and extract metadata. - - Args: - image_path: Path to the image file - max_size: Maximum file size in bytes - - Returns: - Dictionary containing image metadata - - Raises: - FileNotFoundError: If image file doesn't exist - ValueError: If file size exceeds limit - """ - if not os.path.exists(image_path): - raise FileNotFoundError(f"Image file not found: {image_path}") - - file_size = os.path.getsize(image_path) - if file_size > max_size: - raise ValueError(f"File size {file_size} exceeds limit {max_size}") - - # Process the image - metadata = extract_metadata(image_path) - return metadata -``` - -### Naming Conventions -- **Variables and Functions**: Use snake_case -- **Classes**: Use PascalCase -- **Constants**: Use UPPER_CASE - -### Type Hints -```python -from typing import List, Dict, Optional, Union, Tuple - -def get_photos( - user_id: int, - page: int = 1, - per_page: int = DEFAULT_PAGE_SIZE, - filters: Optional[Dict[str, any]] = None -) -> Dict[str, Union[List[Dict], int]]: - """Get photos with pagination and filtering.""" - pass -``` - -### Error Handling -```python -import logging -from typing import Optional - -logger = logging.getLogger(__name__) - -def safe_operation(func): - """Decorator for safe operation execution.""" - def wrapper(*args, **kwargs): - try: - return func(*args, **kwargs) - except Exception as e: - logger.error(f"Error in {func.__name__}: {e}") - return None - return wrapper -``` - -## Database Operations - -### Connection Management -```python -def get_db_connection(): - conn = sqlite3.connect('punimtag_simple.db') - conn.row_factory = sqlite3.Row # Enable dict-like access - return conn - -# Usage in endpoint -try: - conn = get_db_connection() - cursor = conn.cursor() - # Database operations - conn.commit() -except Exception as e: - conn.rollback() - return jsonify({'success': False, 'error': str(e)}), 500 -finally: - conn.close() -``` - -### Parameterized Queries -```python -# Always use parameterized queries to prevent SQL injection -cursor.execute('SELECT * FROM images WHERE id = ?', (image_id,)) -cursor.execute('INSERT INTO photos (name, path) VALUES (?, ?)', (name, path)) -``` - -## Testing Standards - -### Test Organization -``` -tests/ -├── unit/ # Unit tests for individual functions -├── integration/ # Integration tests for API endpoints -├── e2e/ # End-to-end tests for complete workflows -├── fixtures/ # Test data and fixtures -├── utils/ # Test utilities and helpers -└── conftest.py # pytest configuration and shared fixtures -``` - -### Unit Test Example -```python -# tests/unit/test_face_recognition.py -import pytest -from src.utils.face_recognition import detect_faces, encode_face - -def test_detect_faces_with_valid_image(): - """Test face detection with a valid image.""" - image_path = "tests/fixtures/valid_face.jpg" - faces = detect_faces(image_path) - - assert len(faces) > 0 - assert all(hasattr(face, 'left') for face in faces) - assert all(hasattr(face, 'top') for face in faces) -``` - -### Integration Test Example -```python -# tests/integration/test_photo_api.py -import pytest -from src.app import app - -@pytest.fixture -def client(): - """Create a test client.""" - app.config['TESTING'] = True - app.config['DATABASE'] = 'test.db' - - with app.test_client() as client: - yield client - -def test_get_photos_endpoint(client): - """Test the GET /photos endpoint.""" - response = client.get('/photos') - - assert response.status_code == 200 - data = response.get_json() - assert data['success'] == True - assert 'photos' in data -``` - -## JavaScript Conventions - -### Code Style -```javascript -// Use ES6+ features -const API_BASE_URL = '/api'; -const DEFAULT_PAGE_SIZE = 20; - -// Async/await for API calls -async function fetchPhotos(page = 1, perPage = DEFAULT_PAGE_SIZE) { - try { - const response = await fetch(`${API_BASE_URL}/photos?page=${page}&per_page=${perPage}`); - - if (!response.ok) { - throw new Error(`HTTP error! status: ${response.status}`); - } - - const data = await response.json(); - return data; - } catch (error) { - console.error('Error fetching photos:', error); - throw error; - } -} - -// Event handlers -function handlePhotoClick(photoId) { - showPhotoDetails(photoId); -} - -// DOM manipulation -function updatePhotoGrid(photos) { - const grid = document.getElementById('photo-grid'); - grid.innerHTML = ''; - - photos.forEach(photo => { - const photoElement = createPhotoElement(photo); - grid.appendChild(photoElement); - }); -} -``` - -### Error Handling -```javascript -// Global error handler -window.addEventListener('error', (event) => { - console.error('Global error:', event.error); - showErrorMessage('An unexpected error occurred'); -}); - -// API error handling -async function safeApiCall(apiFunction, ...args) { - try { - return await apiFunction(...args); - } catch (error) { - console.error('API call failed:', error); - showErrorMessage('Failed to load data. Please try again.'); - return null; - } -} -``` - -## Performance Considerations - -### Image Processing -- **Thumbnail Generation**: On-demand with caching -- **Face Detection**: Optimized for speed vs accuracy -- **Batch Processing**: Efficient handling of large photo sets -- **Memory Management**: Streaming for large images - -### Database Optimization -- **Indexing**: Strategic indexes on frequently queried columns -- **Connection Pooling**: Efficient database connections -- **Query Optimization**: Minimize N+1 query problems -- **Data Archiving**: Move old data to separate tables - -### Frontend Performance -- **Progressive Loading**: Load data in chunks -- **Image Lazy Loading**: Load images as they become visible -- **Caching**: Browser caching for static assets -- **Debouncing**: Prevent excessive API calls - -## Security Considerations - -### Data Protection -- **Local Storage**: No cloud dependencies -- **Input Validation**: Sanitize all user inputs -- **SQL Injection Prevention**: Parameterized queries -- **File Upload Security**: Validate file types and sizes - -### Privacy -- **Face Data**: Stored locally, not shared -- **Metadata**: User controls what's stored -- **Access Control**: Local access only -- **Data Export**: User can export/delete their data - -## Development Workflow - -### Code Organization -- **Modular Design**: Separate concerns into modules -- **Configuration Management**: Environment-based settings -- **Error Handling**: Comprehensive error catching and logging -- **Documentation**: Inline code documentation - -### Testing Strategy -- **Unit Tests**: Test individual functions and classes -- **Integration Tests**: Test API endpoints and database operations -- **End-to-End Tests**: Test complete user workflows -- **Performance Tests**: Test with large datasets - -## Quick Start Commands - -```bash -# Install dependencies -pip install -r requirements.txt - -# Run the application -python main.py - -# Access the web interface -# http://localhost:5000 - -# Run tests -python tests/test_main.py - -# Run with pytest (if installed) -pytest tests/ -``` - -## Common Development Tasks - -### Adding New API Endpoints -1. Follow the API standards for response format -2. Use proper HTTP status codes -3. Implement error handling -4. Add parameterized queries for database operations -5. Write integration tests - -### Adding New Features -1. Follow the project structure -2. Use type hints in Python -3. Follow naming conventions -4. Add comprehensive error handling -5. Write tests for new functionality - -### Database Changes -1. Use parameterized queries -2. Add proper indexes -3. Handle connection management -4. Implement rollback on errors -5. Update schema documentation - -## Troubleshooting - -### Common Issues -- **Face Recognition Not Working**: Check dlib installation and CUDA setup -- **Database Errors**: Verify SQLite file permissions and schema -- **Performance Issues**: Check image sizes and database indexes -- **UI Not Loading**: Check browser console for JavaScript errors - -### Debug Mode -```python -# Enable debug mode in Flask -app.run(host='0.0.0.0', port=5000, debug=True) -``` - -## Future Roadmap - -- Cloud sync capabilities -- Mobile app companion -- Advanced AI features (emotion detection, age progression) -- Social sharing features -- Integration with existing photo services - -## Support and Resources - -- Check the steering documents in `docs/` -- Review existing tests in `tests/` -- Check the API standards for endpoint usage -- Follow code conventions for maintainability \ No newline at end of file diff --git a/.gitignore b/.gitignore index 63dada1..828b0a1 100644 --- a/.gitignore +++ b/.gitignore @@ -52,4 +52,16 @@ Thumbs.db .history/ -photos/ \ No newline at end of file +photos/ + +# Photo files and large directories +*.jpg +*.jpeg +*.png +*.gif +*.bmp +*.tiff +*.webp +dlib/ +*.dat +*.model \ No newline at end of file diff --git a/.history/cleanup_script_20250720102919.sh b/.history/cleanup_script_20250720102919.sh deleted file mode 100644 index 030b637..0000000 --- a/.history/cleanup_script_20250720102919.sh +++ /dev/null @@ -1,66 +0,0 @@ -#!/bin/bash - -# PunimTag Directory Cleanup Script -# This script safely removes unnecessary files to free up space - -echo "🧹 PunimTag Directory Cleanup" -echo "================================" - -# 1. Remove Python cache files -echo "📦 Removing Python cache files..." -find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null -find . -name "*.pyc" -delete 2>/dev/null -echo "✅ Python cache cleaned" - -# 2. Remove history files (huge space savings) -echo "📚 Removing history files..." -rm -rf .history/ -echo "✅ History files cleaned" - -# 3. Remove build artifacts -echo "🔨 Removing build artifacts..." -rm -rf dlib/build/ -rm -f dlib/CMakeCache.txt -rm -f dlib/CPack*.cmake -echo "✅ Build artifacts cleaned" - -# 4. Remove large CUDA packages -echo "🚀 Removing CUDA packages..." -rm -f dlib/cudnn-local-repo-ubuntu2204-9.5.1_1.0-1_amd64.deb -rm -f assets/cuda-repo-wsl-ubuntu-12-6-local_12.6.0-1_amd64.deb -echo "✅ CUDA packages cleaned" - -# 5. Remove temporary files -echo "🗑️ Removing temporary files..." -rm -f assets/temp_face_crop_*.jpg -echo "✅ Temporary files cleaned" - -# 6. Remove empty database files -echo "🗄️ Checking database files..." -if [ -f "punimtag_simple.db" ] && [ ! -s "punimtag_simple.db" ]; then - echo "⚠️ Found empty database file - removing..." - rm -f punimtag_simple.db - echo "✅ Empty database removed" -else - echo "✅ Database file is valid" -fi - -# 7. Optional: Remove old duplicate files -echo "📄 Checking for duplicate files..." -if [ -f "src/backend/punimtag.py" ]; then - echo "⚠️ Found old punimtag.py - consider removing if not needed" -fi -if [ -f "src/backend/web_gui.py" ]; then - echo "⚠️ Found old web_gui.py - consider removing if not needed" -fi - -echo "" -echo "🎉 Cleanup Complete!" -echo "📊 Estimated space saved: ~4.7GB+" -echo "" -echo "💡 Remaining files to consider manually:" -echo " - FINAL_STATUS.md (if no longer needed)" -echo " - PROJECT_REORGANIZATION.md (if no longer needed)" -echo " - Old duplicate files in src/backend/" -echo "" -echo "🔍 To see what was removed, check the output above." \ No newline at end of file diff --git a/.history/cleanup_script_20250720104239.sh b/.history/cleanup_script_20250720104239.sh deleted file mode 100644 index 0519ecb..0000000 --- a/.history/cleanup_script_20250720104239.sh +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/FINAL_STATUS.md b/FINAL_STATUS.md deleted file mode 100644 index 6eeeea1..0000000 --- a/FINAL_STATUS.md +++ /dev/null @@ -1,173 +0,0 @@ -# PunimTag Project Reorganization - Final Status - -## ✅ **REORGANIZATION COMPLETE** - -The PunimTag project has been successfully reorganized with comprehensive steering documents and a clean, maintainable structure. - -## 📊 **Test Results** - -``` -🧪 Running PunimTag Test Suite -================================================== -✅ Flask app imported successfully -✅ Flask app instance found -✅ Database connection successful -✅ Face recognition module imported successfully -✅ Configuration directory found with 2 files -✅ All required directories exist -✅ All steering documents exist -✅ Main app file found: /mnt/c/Users/admin/Documents/code/PunimTag/src/backend/app.py -✅ Main app file contains Flask app -================================================== -📊 Test Results: 7/7 tests passed -🎉 All tests passed! -``` - -## 📁 **Final Project Structure** - -``` -PunimTag/ -├── src/ # Main application source code -│ ├── backend/ # Flask backend and API -│ │ ├── app.py # Main Flask application (182KB) -│ │ ├── db_manager.py # Database operations -│ │ ├── visual_identifier.py # Face recognition -│ │ ├── punimtag.py # Legacy app -│ │ ├── punimtag_simple.py # Legacy app -│ │ ├── web_gui.py # Legacy app -│ │ └── __init__.py # Package init -│ ├── frontend/ # JavaScript and UI components -│ │ └── templates/ # HTML templates -│ ├── utils/ # Utility functions -│ │ ├── tag_manager.py # Tag management -│ │ └── __init__.py # Package init -│ └── __init__.py # Package init -├── docs/ # Documentation and steering documents -│ ├── product.md # Product vision and goals -│ ├── structure.md # Project organization -│ ├── tech.md # Technical architecture -│ ├── api-standards.md # API design standards -│ ├── testing-standards.md # Testing guidelines -│ ├── code-conventions.md # Coding standards -│ ├── BACKEND_STATUS.md # Legacy documentation -│ ├── IDEAS.md # Legacy documentation -│ └── TESTING_GUIDE.md # Legacy documentation -├── tests/ # Test files (cleaned up) -│ ├── test_main.py # Main test suite (6KB) -│ ├── conftest.py # Test configuration -│ ├── test_backend.py # Legacy tests -│ ├── test_punimtag.py # Legacy tests -│ ├── test_web_api.py # Legacy tests -│ ├── unit/ # Unit test directory -│ ├── integration/ # Integration test directory -│ ├── e2e/ # End-to-end test directory -│ ├── fixtures/ # Test data directory -│ └── utils/ # Test utilities directory -├── data/ # Database files and user data -│ ├── punimtag_simple.db # Main database (4.4MB) -│ ├── punimtag.db # Legacy database -│ ├── test_backend.db # Test database -│ └── test_basic.db # Test database -├── config/ # Configuration files -│ ├── settings.py # Application settings -│ ├── config.py # Legacy config -│ └── punimtag_config.json -├── scripts/ # Utility scripts -│ ├── cleanup_tests.py # Cleanup script -│ ├── start_gui.py # Legacy script -│ ├── simple_identifier.py # Legacy script -│ ├── interactive_identifier.py # Legacy script -│ └── gui_face_clusters.py # Legacy script -├── assets/ # Static assets -│ ├── temp_face_crop_74280.jpg -│ └── cuda-repo-wsl-ubuntu-12-6-local_12.6.0-1_amd64.deb -├── photos/ # User photo storage -├── venv/ # Virtual environment -├── main.py # Application entry point -├── requirements.txt # Python dependencies -├── README.md # Updated README -├── PROJECT_REORGANIZATION.md # Reorganization summary -├── FINAL_STATUS.md # This file -└── .gitignore # Updated gitignore -``` - -## 🎯 **Accomplishments** - -### ✅ **Files Organized** - -- **20+ files moved** to appropriate directories -- **Main application**: `simple_web_gui.py` → `src/backend/app.py` -- **Database files**: All `.db` files → `data/` -- **Configuration**: `config.py` → `config/settings.py` -- **Scripts**: Utility scripts → `scripts/` -- **Assets**: Images and files → `assets/` - -### ✅ **Redundant Files Cleaned Up** - -- **10+ HTML test files** removed (debug*ui.html, test*\*.html) -- **Consolidated tests** into `tests/test_main.py` -- **Clean test directory** with proper structure - -### ✅ **Steering Documents Created** - -- **6 comprehensive documents** in `docs/` -- **Product vision** and goals -- **Technical architecture** and standards -- **API design** guidelines -- **Testing strategy** and best practices -- **Code conventions** and style guides - -### ✅ **Package Structure** - -- **Proper Python packages** with `__init__.py` files -- **Clear separation** of concerns -- **Importable modules** from `src/` - -### ✅ **Configuration Centralized** - -- **Settings management** in `config/settings.py` -- **Environment-based** configuration -- **Database paths** properly configured - -## 🚀 **How to Use** - -### **Start the Application** - -```bash -python main.py -``` - -### **Run Tests** - -```bash -python tests/test_main.py -``` - -### **Clean Up (if needed)** - -```bash -python scripts/cleanup_tests.py -``` - -## 📚 **For Cursor AI** - -The steering documents in `docs/` provide clear guidance for: - -- **API Development**: Follow `docs/api-standards.md` -- **Code Quality**: Use `docs/code-conventions.md` -- **Testing**: Implement tests following `docs/testing-standards.md` -- **Architecture**: Reference `docs/tech.md` and `docs/structure.md` -- **Product Decisions**: Review `docs/product.md` - -## 🎉 **Status: COMPLETE** - -The PunimTag project is now: - -- ✅ **Well-organized** with clear structure -- ✅ **Properly documented** with steering documents -- ✅ **Tested and verified** (7/7 tests passing) -- ✅ **Ready for development** with clear guidelines -- ✅ **Scalable** with modular architecture -- ✅ **Maintainable** with consistent conventions - -**All objectives achieved!** 🎯 diff --git a/PROJECT_REORGANIZATION.md b/PROJECT_REORGANIZATION.md deleted file mode 100644 index 77d0a3f..0000000 --- a/PROJECT_REORGANIZATION.md +++ /dev/null @@ -1,206 +0,0 @@ -# PunimTag Project Reorganization Summary - -## 🎯 Overview - -This document summarizes the comprehensive reorganization of the PunimTag project to improve maintainability, documentation, and development workflow. - -## 📁 New Project Structure - -### Before (Chaotic) - -``` -PunimTag/ -├── simple_web_gui.py (178KB, 4319 lines!) -├── test_*.html (10+ redundant test files) -├── test_*.py (multiple test files) -├── debug_*.html (debug files) -├── Various .py files scattered -└── No clear organization -``` - -### After (Organized) - -``` -PunimTag/ -├── src/ # Main application source code -│ ├── backend/ # Flask backend and API -│ │ ├── app.py # Main Flask application -│ │ ├── db_manager.py # Database operations -│ │ └── visual_identifier.py # Face recognition -│ ├── frontend/ # JavaScript and UI components -│ └── utils/ # Utility functions -│ └── tag_manager.py # Tag management -├── docs/ # Documentation and steering documents -│ ├── product.md # Product vision and goals -│ ├── structure.md # Project organization -│ ├── tech.md # Technical architecture -│ ├── api-standards.md # API design standards -│ ├── testing-standards.md # Testing guidelines -│ └── code-conventions.md # Coding standards -├── tests/ # Test files -│ ├── test_main.py # Main test suite -│ └── conftest.py # Test configuration -├── data/ # Database files and user data -├── config/ # Configuration files -│ ├── settings.py # Application settings -│ └── punimtag_config.json -├── scripts/ # Utility scripts -├── assets/ # Static assets -├── photos/ # User photo storage -└── main.py # Application entry point -``` - -## 📚 Steering Documents Created - -### 1. Product Vision (`docs/product.md`) - -- **Core Value Proposition**: Automatic face recognition, smart organization, duplicate detection -- **Target Users**: Individuals with large photo collections, small businesses -- **Key Features**: Photo management, face recognition, duplicate management, search & discovery -- **Success Metrics**: User engagement, accuracy, performance, usability -- **Future Roadmap**: Cloud sync, mobile app, advanced AI features - -### 2. Project Structure (`docs/structure.md`) - -- **Directory Organization**: Clear separation of concerns -- **Core Components**: Backend (Flask), Frontend (JavaScript), Data Layer (SQLite) -- **Architecture Principles**: Separation of concerns, progressive enhancement, performance optimization -- **File Naming Conventions**: Consistent naming across Python, JavaScript, and database -- **Dependencies**: Clear technology stack documentation - -### 3. Technical Architecture (`docs/tech.md`) - -- **Technology Stack**: Flask, SQLite, dlib, Pillow, NumPy -- **Core Technologies**: Face recognition pipeline, database schema, API design -- **Performance Considerations**: Image processing, database optimization, frontend performance -- **Security Considerations**: Data protection, privacy, input validation -- **Scalability**: Current limitations and future scalability options - -### 4. API Standards (`docs/api-standards.md`) - -- **Response Format**: Consistent JSON response structure -- **HTTP Status Codes**: Proper error handling -- **Endpoint Naming**: RESTful patterns and conventions -- **Request Parameters**: Query parameters and JSON body handling -- **Error Handling**: Standard error handlers and validation -- **Database Operations**: Connection management and parameterized queries -- **Security**: Input sanitization and CORS headers - -### 5. Testing Standards (`docs/testing-standards.md`) - -- **Test Organization**: Unit, integration, and E2E tests -- **Test Categories**: Comprehensive testing strategy -- **Test Fixtures**: Database and mock fixtures -- **Test Data Management**: Test images and cleanup -- **Performance Testing**: Load testing and benchmarks -- **Code Coverage**: Coverage requirements and reporting -- **Continuous Integration**: GitHub Actions setup - -### 6. Code Conventions (`docs/code-conventions.md`) - -- **Python Conventions**: PEP 8 compliance, type hints, error handling -- **JavaScript Conventions**: ESLint compliance, async/await, error handling -- **Database Conventions**: Table naming, column naming, index naming -- **File Organization**: Consistent file structure and documentation -- **Documentation Standards**: Function and class documentation -- **Git Conventions**: Commit messages and branch naming -- **Performance Guidelines**: Optimization best practices -- **Security Guidelines**: Input validation and database security - -## 🧹 Cleanup Accomplished - -### Files Moved - -- **Main Application**: `simple_web_gui.py` → `src/backend/app.py` -- **Database Manager**: `db_manager.py` → `src/backend/` -- **Face Recognition**: `visual_identifier.py` → `src/backend/` -- **Tag Manager**: `tag_manager.py` → `src/utils/` -- **Configuration**: `config.py` → `config/settings.py` -- **Databases**: All `.db` files → `data/` -- **Scripts**: Utility scripts → `scripts/` -- **Assets**: Images and files → `assets/` - -### Files Consolidated - -- **Test Files**: 10+ redundant test files → `tests/test_main.py` -- **Debug Files**: Multiple debug HTML files → `tests/` (for reference) -- **Configuration**: Centralized in `config/settings.py` - -### Files Created - -- **Entry Point**: `main.py` for easy application startup -- **Package Files**: `__init__.py` files for proper Python packages -- **Configuration**: Centralized settings with environment support -- **Documentation**: Comprehensive steering documents -- **Cleanup Script**: `scripts/cleanup_tests.py` for maintenance - -## 🎯 Benefits Achieved - -### 1. **Maintainability** - -- Clear separation of concerns -- Consistent file organization -- Proper Python package structure -- Centralized configuration - -### 2. **Documentation** - -- Comprehensive steering documents -- Clear development guidelines -- API standards and conventions -- Testing strategy and best practices - -### 3. **Development Workflow** - -- Easy to find and modify code -- Consistent coding standards -- Proper testing framework -- Clear contribution guidelines - -### 4. **Scalability** - -- Modular architecture -- Configuration-driven settings -- Proper package structure -- Future-ready organization - -### 5. **Quality Assurance** - -- Comprehensive testing standards -- Code coverage requirements -- Performance guidelines -- Security best practices - -## 🚀 Next Steps - -### For Developers - -1. **Read the steering documents** in `docs/` -2. **Follow the code conventions** for consistency -3. **Use the organized structure** for new features -4. **Write tests** following the testing standards - -### For Cursor AI - -1. **Reference steering documents** for development decisions -2. **Follow API standards** for endpoint design -3. **Use code conventions** for consistency -4. **Implement proper testing** for new features - -### For Project Maintenance - -1. **Run cleanup script**: `python scripts/cleanup_tests.py` -2. **Update documentation** as features evolve -3. **Maintain test coverage** above 80% -4. **Follow git conventions** for commits - -## 📊 Impact Summary - -- **Files Organized**: 20+ files moved to appropriate directories -- **Documentation Created**: 6 comprehensive steering documents -- **Redundancy Eliminated**: 10+ redundant test files consolidated -- **Standards Established**: Complete development guidelines -- **Maintainability Improved**: Clear structure and conventions -- **Scalability Enhanced**: Modular, configuration-driven architecture - -The PunimTag project is now well-organized, properly documented, and ready for scalable development with clear guidelines for both human developers and AI assistants like Cursor. diff --git a/README.md b/README.md index cc97a88..82426e3 100644 --- a/README.md +++ b/README.md @@ -1,136 +1,294 @@ -# PunimTag - Intelligent Photo Management System +# PunimTag CLI - Minimal Photo Face Tagger -A Flask-based photo management system with automatic face recognition, tagging, and duplicate detection. +A simple command-line tool for automatic face recognition and photo tagging. No web interface, no complex dependencies - just the essentials. ## 🚀 Quick Start ```bash -# Install dependencies -pip install -r requirements.txt +# 1. Setup (one time only) +git clone +cd PunimTag +python3 -m venv venv +source venv/bin/activate # On Windows: venv\Scripts\activate +python3 setup.py -# Run the application -python main.py +# 2. Scan photos +python3 photo_tagger.py scan /path/to/your/photos -# Access the web interface -# http://localhost:5000 +# 3. Process faces +python3 photo_tagger.py process + +# 4. Identify faces interactively +python3 photo_tagger.py identify + +# 5. View statistics +python3 photo_tagger.py stats ``` -## 📁 Project Structure +## 📦 Installation + +### Automatic Setup (Recommended) +```bash +# Clone and setup +git clone +cd PunimTag + +# Create virtual environment (IMPORTANT!) +python3 -m venv venv +source venv/bin/activate # On Windows: venv\Scripts\activate + +# Run setup script +python3 setup.py +``` + +**⚠️ IMPORTANT**: Always activate the virtual environment before running any commands: +```bash +source venv/bin/activate # Run this every time you open a new terminal +``` + +### Manual Setup (Alternative) +```bash +python3 -m venv venv +source venv/bin/activate +pip install -r requirements.txt +python3 photo_tagger.py stats # Creates database +``` + +## 🎯 Commands + +### Scan for Photos +```bash +# Scan a folder +python3 photo_tagger.py scan /path/to/photos + +# Scan recursively (recommended) +python3 photo_tagger.py scan /path/to/photos --recursive +``` + +### Process Photos for Faces +```bash +# Process 50 photos (default) +python3 photo_tagger.py process + +# Process 20 photos with CNN model (more accurate) +python3 photo_tagger.py process --limit 20 --model cnn + +# Process with HOG model (faster) +python3 photo_tagger.py process --limit 100 --model hog +``` + +### Identify Faces +```bash +# Identify 20 faces interactively +python3 photo_tagger.py identify + +# Identify 10 faces at a time +python3 photo_tagger.py identify --batch 10 +``` + +**Interactive commands during identification:** +- Type person's name to identify +- `s` = skip this face +- `q` = quit +- `list` = show known people + +### Add Tags +```bash +# Tag photos matching pattern +python3 photo_tagger.py tag --pattern "vacation" + +# Tag any photos +python3 photo_tagger.py tag +``` + +### Search +```bash +# Find photos with a person +python3 photo_tagger.py search "John" + +# Find photos with partial name match +python3 photo_tagger.py search "Joh" +``` + +### Statistics +```bash +# View database statistics +python3 photo_tagger.py stats +``` + +## 📊 Example Workflow + +```bash +# ALWAYS activate virtual environment first! +source venv/bin/activate + +# 1. Scan your photo collection +python3 photo_tagger.py scan ~/Pictures --recursive + +# 2. Process photos for faces (start with small batch) +python3 photo_tagger.py process --limit 20 + +# 3. Check what we found +python3 photo_tagger.py stats + +# 4. Identify some faces +python3 photo_tagger.py identify --batch 10 + +# 5. Search for photos of someone +python3 photo_tagger.py search "Alice" + +# 6. Add some tags +python3 photo_tagger.py tag --pattern "birthday" +``` + +## 🗃️ Database + +The tool uses SQLite database (`photos.db` by default) with these tables: +- **photos** - Photo file paths and processing status +- **people** - Known people names +- **faces** - Face encodings and locations +- **tags** - Custom tags for photos + +## ⚙️ Configuration + +### Face Detection Models +- **hog** - Faster, good for CPU-only systems +- **cnn** - More accurate, requires more processing power + +### Database Location +```bash +# Use custom database file +python3 photo_tagger.py scan /photos --db /path/to/my.db +``` + +## 🔧 System Requirements + +### Required System Packages (Ubuntu/Debian) +```bash +sudo apt update +sudo apt install -y cmake build-essential libopenblas-dev liblapack-dev libx11-dev libgtk-3-dev python3-dev python3-venv +``` + +### Python Dependencies +- `face-recognition` - Face detection and recognition +- `dlib` - Machine learning library +- `pillow` - Image processing +- `numpy` - Numerical operations +- `click` - Command line interface +- `setuptools` - Package management + +## 📁 File Structure ``` PunimTag/ -├── src/ # Main application source code -│ ├── backend/ # Flask backend and API -│ │ ├── app.py # Main Flask application -│ │ ├── db_manager.py # Database operations -│ │ └── visual_identifier.py # Face recognition -│ ├── frontend/ # JavaScript and UI components -│ └── utils/ # Utility functions -│ └── tag_manager.py # Tag management -├── docs/ # Documentation and steering documents -│ ├── product.md # Product vision and goals -│ ├── structure.md # Project organization -│ ├── tech.md # Technical architecture -│ ├── api-standards.md # API design standards -│ ├── testing-standards.md # Testing guidelines -│ └── code-conventions.md # Coding standards -├── tests/ # Test files -│ ├── test_main.py # Main test suite -│ └── conftest.py # Test configuration -├── data/ # Database files and user data -├── config/ # Configuration files -│ ├── settings.py # Application settings -│ └── punimtag_config.json -├── scripts/ # Utility scripts -├── assets/ # Static assets -├── photos/ # User photo storage -└── main.py # Application entry point +├── photo_tagger.py # Main CLI tool +├── setup.py # Setup script +├── run.sh # Convenience script (auto-activates venv) +├── requirements.txt # Python dependencies +├── README.md # This file +├── venv/ # Virtual environment (created by setup) +├── photos.db # Database (created automatically) +├── data/ # Additional data files +└── logs/ # Log files ``` -## 🎯 Key Features - -- **Automatic Face Recognition**: Identify and tag people in photos -- **Smart Organization**: Group photos by people, events, and locations -- **Duplicate Detection**: Find and manage duplicate photos automatically -- **Intuitive Interface**: Web-based GUI with progressive loading -- **Privacy-First**: Local processing, no cloud dependencies - -## 📚 Documentation - -### Steering Documents - -- **[Product Vision](docs/product.md)**: Product goals, target users, and roadmap -- **[Project Structure](docs/structure.md)**: Architecture and organization principles -- **[Technical Architecture](docs/tech.md)**: Technology stack and implementation details -- **[API Standards](docs/api-standards.md)**: API design and development guidelines -- **[Testing Standards](docs/testing-standards.md)**: Testing strategy and best practices -- **[Code Conventions](docs/code-conventions.md)**: Coding standards and style guides - -### Development Guidelines - -1. **Follow the steering documents** for consistent development -2. **Use the organized structure** - place code in appropriate directories -3. **Write tests** following the testing standards -4. **Follow API standards** for all endpoints -5. **Adhere to code conventions** for maintainability - -## 🧪 Testing +## 🚨 Troubleshooting +### "externally-managed-environment" Error +**Solution**: Always use a virtual environment! ```bash -# Run the main test suite -python tests/test_main.py - -# Run with pytest (if installed) -pytest tests/ +python3 -m venv venv +source venv/bin/activate +python3 setup.py ``` -## 🔧 Configuration - -Configuration is centralized in `config/settings.py`: - -- Database paths -- Face recognition settings -- File upload limits -- Thumbnail sizes - -## 🚀 Deployment - -### Development - +### Virtual Environment Not Active +**Problem**: Commands fail or use wrong Python +**Solution**: Always activate the virtual environment: ```bash -python main.py +source venv/bin/activate +# You should see (venv) in your prompt ``` -### Production - +### dlib Installation Issues ```bash -# Use a WSGI server like Gunicorn -gunicorn -w 4 -b 0.0.0.0:5000 main:app +# Ubuntu/Debian - install system dependencies first +sudo apt-get install build-essential cmake libopenblas-dev + +# Then retry setup +source venv/bin/activate +python3 setup.py ``` -## 📦 Dependencies +### "Please install face_recognition_models" Warning +This warning is harmless - the application still works correctly. It's a known issue with Python 3.13. -- **Flask**: Web framework -- **SQLite**: Database -- **dlib**: Face recognition -- **Pillow**: Image processing -- **NumPy**: Numerical operations +### Memory Issues +- Use `--model hog` for faster processing +- Process in smaller batches with `--limit 10` +- Close other applications to free memory + +### No Faces Found +- Check image quality and lighting +- Ensure faces are clearly visible +- Try `--model cnn` for better detection + +## 🎯 What This Tool Does + +✅ **Simple**: Single Python file, minimal dependencies +✅ **Fast**: Efficient face detection and recognition +✅ **Private**: Everything runs locally, no cloud services +✅ **Flexible**: Batch processing, interactive identification +✅ **Lightweight**: No web interface overhead + +## 🚫 What This Tool Doesn't Do + +❌ Web interface (removed for simplicity) +❌ Duplicate detection (can be added later) +❌ Image editing or enhancement +❌ Cloud sync or sharing +❌ Complex ML training + +## 📈 Performance Tips + +- **Always use virtual environment** to avoid conflicts +- Start with small batches (`--limit 20`) to test +- Use `hog` model for speed, `cnn` for accuracy +- Process photos in smaller folders first +- Identify faces in batches to avoid fatigue ## 🤝 Contributing -1. Read the steering documents in `docs/` -2. Follow the code conventions -3. Write tests for new features -4. Update documentation as needed +This is now a minimal, focused tool. Key principles: +- Keep it simple and fast +- CLI-only interface +- Minimal dependencies +- Clear, readable code +- **Always use python3** commands -## 📄 License +--- -This project is licensed under the MIT License. +**Total project size**: ~300 lines of Python code +**Dependencies**: 6 essential packages +**Setup time**: ~5 minutes +**Perfect for**: Batch processing personal photo collections -## 🆘 Support +## 🔄 Common Commands Cheat Sheet -For issues and questions: +```bash +# Setup (one time) +python3 -m venv venv && source venv/bin/activate && python3 setup.py -1. Check the steering documents in `docs/` -2. Review existing tests in `tests/` -3. Check the API standards for endpoint usage +# Daily usage - Option 1: Use run script (automatic venv activation) +./run.sh scan ~/Pictures --recursive +./run.sh process --limit 50 +./run.sh identify --batch 10 +./run.sh stats + +# Daily usage - Option 2: Manual venv activation +source venv/bin/activate +python3 photo_tagger.py scan ~/Pictures --recursive +python3 photo_tagger.py process --limit 50 +python3 photo_tagger.py identify --batch 10 +python3 photo_tagger.py stats +``` \ No newline at end of file diff --git a/REBUILD_SUMMARY.md b/REBUILD_SUMMARY.md new file mode 100644 index 0000000..cd91f9d --- /dev/null +++ b/REBUILD_SUMMARY.md @@ -0,0 +1,210 @@ +# PunimTag Complete Rebuild - Summary + +## 🎯 What We Did + +Completely rebuilt PunimTag from a complex web application into a **simple, focused CLI tool** for photo face tagging. + +## 📊 Before vs After + +### Before (Complex) +``` +- 182KB Flask web app (4,365+ lines) +- Complex web interface with embedded HTML/CSS/JS +- Multiple legacy files and dependencies +- Web framework overhead +- Difficult to understand and modify +- Large repository size +``` + +### After (Simple) +``` +- 17KB CLI tool (~400 lines) +- Clean command-line interface +- Minimal dependencies (6 packages) +- No web framework overhead +- Easy to understand and modify +- Small repository size +``` + +## 🗂️ New Project Structure + +``` +PunimTag/ +├── photo_tagger.py # Main CLI tool (17KB) +├── setup.py # Setup script (3KB) +├── requirements.txt # 6 minimal dependencies +├── README.md # Clear documentation +├── test_basic.py # Basic functionality tests +├── data/ # Database files (not in git) +├── photos/ # User photos (not in git) +└── .gitignore # Excludes large files +``` + +## 🧹 What We Removed + +### Files Deleted +- `src/backend/app.py` (182KB web interface) +- `src/backend/web_gui.py` +- `src/backend/punimtag.py` +- `src/backend/punimtag_simple.py` +- All web frontend files +- Complex documentation +- Test files for web interface +- Configuration files +- Scripts directory + +### Dependencies Removed +- `flask` - Web framework +- `opencv-python` - Computer vision (optional) +- `scikit-learn` - Machine learning extras +- All web-related dependencies + +## ✅ What We Kept + +### Core Functionality +- Face detection and recognition +- Database schema for photos, faces, people, tags +- Batch processing capabilities +- Interactive identification +- Search and statistics + +### Essential Dependencies +- `face-recognition` - Core face recognition +- `dlib` - Machine learning backend +- `pillow` - Image processing +- `numpy` - Numerical operations +- `click` - CLI interface + +## 🚀 New CLI Commands + +```bash +# Scan photos +python photo_tagger.py scan /path/to/photos + +# Process faces +python photo_tagger.py process --limit 50 + +# Identify faces interactively +python photo_tagger.py identify --batch 20 + +# Add tags +python photo_tagger.py tag --pattern "vacation" + +# Search for person +python photo_tagger.py search "John" + +# View statistics +python photo_tagger.py stats +``` + +## 💡 Key Improvements + +### Simplicity +- **90% size reduction** - From 182KB to 17KB +- **Single file** - Everything in `photo_tagger.py` +- **Clear workflow** - Scan → Process → Identify → Search + +### Performance +- **Faster startup** - No web framework overhead +- **Efficient processing** - Direct face recognition calls +- **Batch operations** - Process photos in manageable chunks + +### Usability +- **Better CLI** - Clear commands with help text +- **Interactive identification** - Easy face tagging +- **Progress feedback** - Clear status messages + +### Maintainability +- **Readable code** - Well-structured, documented +- **Minimal dependencies** - Easy to install and maintain +- **Focused purpose** - Does one thing well + +## 🧪 Testing + +### Basic Tests Pass ✅ +``` +📋 Testing: Database Schema ✅ +📋 Testing: CLI Structure ✅ +📊 Results: 2/2 tests passed +``` + +### Ready for Use +- Database schema works correctly +- CLI argument parsing functional +- Code structure is sound +- Dependencies are minimal + +## 📦 Installation + +### Quick Start +```bash +# 1. Setup +python setup.py + +# 2. Use +python photo_tagger.py scan /photos +python photo_tagger.py process +python photo_tagger.py identify +``` + +### Manual Install +```bash +pip install -r requirements.txt +python photo_tagger.py stats +``` + +## 🎯 Benefits Achieved + +### For Development +- **Easier to understand** - Single focused file +- **Faster to modify** - No complex web interface +- **Simpler testing** - CLI is easier to test +- **Better git workflow** - Small, focused commits + +### For Users +- **Faster execution** - No web server overhead +- **Better for batch processing** - CLI is perfect for automation +- **Lower resource usage** - Minimal memory footprint +- **More reliable** - Fewer dependencies, fewer failure points + +### For Deployment +- **Smaller repository** - Only essential files +- **Easier installation** - Fewer dependencies +- **Better portability** - Runs anywhere Python runs +- **No security concerns** - No web server to secure + +## 🔮 Future Possibilities + +The new minimal structure makes it easy to add features: + +### Easy Additions +- Export functionality +- Different face detection models +- Batch tagging operations +- Integration with other tools + +### Optional Features +- Web interface (if needed later) +- GUI wrapper (tkinter/Qt) +- API endpoints (Flask add-on) +- Cloud sync (separate module) + +## 📈 Success Metrics + +- **Code size**: 182KB → 17KB (90% reduction) +- **Dependencies**: 15+ → 6 (60% reduction) +- **Complexity**: High → Low +- **Setup time**: ~30min → ~5min +- **Learning curve**: Steep → Gentle + +## 🎉 Conclusion + +Successfully transformed PunimTag from a complex web application into a **focused, efficient CLI tool** that does exactly what's needed: + +✅ **Simple** - Easy to understand and use +✅ **Fast** - Efficient face recognition processing +✅ **Reliable** - Minimal dependencies, fewer failure points +✅ **Maintainable** - Clean code, clear structure +✅ **Portable** - Runs anywhere Python runs + +The project is now **ready for development** and **easy to extend** while maintaining its core simplicity and focus. diff --git a/config/config.py b/config/config.py deleted file mode 100644 index cfa34c4..0000000 --- a/config/config.py +++ /dev/null @@ -1,235 +0,0 @@ -#!/usr/bin/env python3 -""" -Configuration system for PunimTag -Manages settings for face recognition, auto-tagging, and organization-specific defaults -""" - -import json -import os -from typing import Dict, Any, List, Optional -from dataclasses import dataclass, asdict -from pathlib import Path - - -@dataclass -class FaceRecognitionConfig: - """Face recognition settings""" - confidence_threshold: float = 0.6 - face_quality_threshold: float = 0.3 - max_face_distance: float = 0.6 - min_face_size: int = 80 - detection_model: str = 'cnn' - enable_gpu: bool = True - enable_clustering: bool = True - cluster_min_size: int = 3 - cluster_epsilon: float = 0.3 - - -@dataclass -class AutoTaggingConfig: - """Auto-tagging settings""" - enabled: bool = True - tag_seasons: bool = True - tag_locations: bool = True - tag_time_of_day: bool = True - tag_indoor_outdoor: bool = False # Requires additional ML models - confidence_threshold: float = 0.7 - - -@dataclass -class ProcessingConfig: - """Image processing settings""" - batch_size: int = 100 - max_workers: int = 4 - create_thumbnails: bool = True - thumbnail_size: tuple = (200, 200) - supported_formats: Optional[List[str]] = None - skip_processed: bool = True - - def __post_init__(self): - if self.supported_formats is None: - self.supported_formats = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.gif'] - - -@dataclass -class DatabaseConfig: - """Database settings""" - backup_enabled: bool = True - backup_interval_hours: int = 24 - optimize_on_startup: bool = True - vacuum_on_startup: bool = False - - -@dataclass -class JewishOrgConfig: - """Jewish organization specific settings""" - hebrew_calendar_support: bool = True - default_event_tags: Optional[List[str]] = None - default_location_tags: Optional[List[str]] = None - holiday_auto_tagging: bool = True - - def __post_init__(self): - if self.default_event_tags is None: - self.default_event_tags = [ - 'shabbat', 'wedding', 'bar_mitzvah', 'bat_mitzvah', 'brit_milah', - 'baby_naming', 'shiva', 'yahrzeit', 'rosh_hashanah', 'yom_kippur', - 'sukkot', 'simchat_torah', 'chanukah', 'tu_bishvat', 'purim', - 'passover', 'lag_baomer', 'shavuot', 'tisha_bav', 'synagogue_service', - 'torah_reading', 'kiddush', 'havdalah', 'community_dinner', - 'study_session', 'board_meeting', 'fundraiser', 'youth_group', - 'hebrew_school', 'adult_education' - ] - - if self.default_location_tags is None: - self.default_location_tags = [ - 'synagogue', 'sanctuary', 'social_hall', 'classroom', 'library', - 'kitchen', 'office', 'parking_lot', 'garden', 'sukkah', - 'home', 'restaurant', 'community_center', 'school', 'cemetery', - 'israel', 'jerusalem', 'tel_aviv', 'haifa', 'safed' - ] - - -class PunimTagConfig: - """Main configuration class""" - - def __init__(self, config_file: str = 'punimtag_config.json'): - self.config_file = config_file - self.face_recognition = FaceRecognitionConfig() - self.auto_tagging = AutoTaggingConfig() - self.processing = ProcessingConfig() - self.database = DatabaseConfig() - self.jewish_org = JewishOrgConfig() - - # Load existing config if available - self.load() - - def load(self): - """Load configuration from file""" - if os.path.exists(self.config_file): - try: - with open(self.config_file, 'r', encoding='utf-8') as f: - data = json.load(f) - - # Update configurations - if 'face_recognition' in data: - self.face_recognition = FaceRecognitionConfig(**data['face_recognition']) - if 'auto_tagging' in data: - self.auto_tagging = AutoTaggingConfig(**data['auto_tagging']) - if 'processing' in data: - self.processing = ProcessingConfig(**data['processing']) - if 'database' in data: - self.database = DatabaseConfig(**data['database']) - if 'jewish_org' in data: - self.jewish_org = JewishOrgConfig(**data['jewish_org']) - - except Exception as e: - print(f"Error loading config: {e}") - print("Using default configuration") - - def save(self): - """Save configuration to file""" - try: - config_data = { - 'face_recognition': asdict(self.face_recognition), - 'auto_tagging': asdict(self.auto_tagging), - 'processing': asdict(self.processing), - 'database': asdict(self.database), - 'jewish_org': asdict(self.jewish_org) - } - - with open(self.config_file, 'w', encoding='utf-8') as f: - json.dump(config_data, f, indent=2, ensure_ascii=False) - - except Exception as e: - print(f"Error saving config: {e}") - - def reset_to_defaults(self): - """Reset all settings to defaults""" - self.face_recognition = FaceRecognitionConfig() - self.auto_tagging = AutoTaggingConfig() - self.processing = ProcessingConfig() - self.database = DatabaseConfig() - self.jewish_org = JewishOrgConfig() - - def get_tag_suggestions(self, category: str = None) -> List[str]: - """Get tag suggestions for a category""" - suggestions = { - 'event': self.jewish_org.default_event_tags, - 'location': self.jewish_org.default_location_tags, - 'time': ['morning', 'afternoon', 'evening', 'night'], - 'season': ['spring', 'summer', 'fall', 'winter'], - 'weather': ['sunny', 'cloudy', 'rainy', 'snowy'], - 'group_size': ['solo', 'couple', 'small_group', 'large_group', 'crowd'], - 'age_group': ['children', 'youth', 'adults', 'seniors', 'mixed_ages'], - 'formality': ['formal', 'casual', 'semiformal', 'religious_attire'], - 'activity': ['eating', 'praying', 'studying', 'celebrating', 'socializing', - 'ceremony', 'performance', 'sports', 'crafts', 'music'] - } - - if category: - return suggestions.get(category, []) - - # Return all suggestions if no category specified - all_tags = [] - for tags in suggestions.values(): - all_tags.extend(tags) - return sorted(set(all_tags)) - - def update_setting(self, section: str, key: str, value: Any): - """Update a specific setting""" - if hasattr(self, section): - section_obj = getattr(self, section) - if hasattr(section_obj, key): - setattr(section_obj, key, value) - self.save() - return True - return False - - def get_setting(self, section: str, key: str, default: Any = None): - """Get a specific setting value""" - if hasattr(self, section): - section_obj = getattr(self, section) - if hasattr(section_obj, key): - return getattr(section_obj, key) - return default - - -# Global configuration instance -config = PunimTagConfig() - - -def get_config() -> PunimTagConfig: - """Get the global configuration instance""" - return config - - -def create_default_config(filepath: str = 'punimtag_config.json'): - """Create a default configuration file""" - config = PunimTagConfig(filepath) - config.save() - return config - - -if __name__ == "__main__": - # Demo configuration usage - print("PunimTag Configuration Demo") - print("=" * 40) - - config = PunimTagConfig() - - print("Current face recognition threshold:", config.face_recognition.confidence_threshold) - print("Auto-tagging enabled:", config.auto_tagging.enabled) - print("Batch size:", config.processing.batch_size) - - print("\nJewish organization event tags:") - for tag in config.jewish_org.default_event_tags[:10]: - print(f" - {tag}") - - print("\nTag suggestions for 'event' category:") - suggestions = config.get_tag_suggestions('event')[:5] - for tag in suggestions: - print(f" - {tag}") - - # Save configuration - config.save() - print(f"\nConfiguration saved to {config.config_file}") \ No newline at end of file diff --git a/config/punimtag_config.json b/config/punimtag_config.json deleted file mode 100644 index 237bf22..0000000 --- a/config/punimtag_config.json +++ /dev/null @@ -1,102 +0,0 @@ -{ - "face_recognition": { - "confidence_threshold": 0.6, - "face_quality_threshold": 0.3, - "max_face_distance": 0.6, - "min_face_size": 80, - "detection_model": "hog", - "enable_clustering": true, - "cluster_min_size": 3, - "cluster_epsilon": 0.3 - }, - "auto_tagging": { - "enabled": true, - "tag_seasons": true, - "tag_locations": true, - "tag_time_of_day": true, - "tag_indoor_outdoor": false, - "confidence_threshold": 0.7 - }, - "processing": { - "batch_size": 100, - "max_workers": 4, - "create_thumbnails": true, - "thumbnail_size": [ - 200, - 200 - ], - "supported_formats": [ - ".jpg", - ".jpeg", - ".png", - ".bmp", - ".tiff", - ".gif" - ], - "skip_processed": true - }, - "database": { - "backup_enabled": true, - "backup_interval_hours": 24, - "optimize_on_startup": true, - "vacuum_on_startup": false - }, - "jewish_org": { - "hebrew_calendar_support": true, - "default_event_tags": [ - "shabbat", - "wedding", - "bar_mitzvah", - "bat_mitzvah", - "brit_milah", - "baby_naming", - "shiva", - "yahrzeit", - "rosh_hashanah", - "yom_kippur", - "sukkot", - "simchat_torah", - "chanukah", - "tu_bishvat", - "purim", - "passover", - "lag_baomer", - "shavuot", - "tisha_bav", - "synagogue_service", - "torah_reading", - "kiddush", - "havdalah", - "community_dinner", - "study_session", - "board_meeting", - "fundraiser", - "youth_group", - "hebrew_school", - "adult_education" - ], - "default_location_tags": [ - "synagogue", - "sanctuary", - "social_hall", - "classroom", - "library", - "kitchen", - "office", - "parking_lot", - "garden", - "sukkah", - "home", - "restaurant", - "community_center", - "school", - "cemetery", - "israel", - "jerusalem", - "tel_aviv", - "haifa", - "safed" - ], - "holiday_auto_tagging": true - } -} \ No newline at end of file diff --git a/config/settings.py b/config/settings.py deleted file mode 100644 index 9fd1d2b..0000000 --- a/config/settings.py +++ /dev/null @@ -1,65 +0,0 @@ -""" -PunimTag Configuration Settings - -Centralized configuration for the PunimTag application. -""" - -import os -from pathlib import Path - -# Base directory (project root) -BASE_DIR = Path(__file__).parent.parent - -# Data directory -DATA_DIR = BASE_DIR / "data" -PHOTOS_DIR = BASE_DIR / "photos" - -# Database paths -DATABASE_PATH = DATA_DIR / "punimtag_simple.db" -TEST_DATABASE_PATH = DATA_DIR / "test_backend.db" - -# Ensure directories exist -DATA_DIR.mkdir(exist_ok=True) -PHOTOS_DIR.mkdir(exist_ok=True) - -# Flask configuration -class Config: - """Base configuration class.""" - SECRET_KEY = os.environ.get('SECRET_KEY') or 'dev-secret-key-change-in-production' - DATABASE_PATH = str(DATABASE_PATH) - PHOTOS_DIR = str(PHOTOS_DIR) - MAX_CONTENT_LENGTH = 16 * 1024 * 1024 # 16MB max file size - UPLOAD_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif', '.bmp'} - - # Face recognition settings - FACE_DETECTION_CONFIDENCE = 0.6 - FACE_SIMILARITY_THRESHOLD = 0.6 - MAX_FACES_PER_IMAGE = 10 - - # Thumbnail settings - THUMBNAIL_SIZE = (200, 200) - FACE_THUMBNAIL_SIZE = (120, 120) - -class DevelopmentConfig(Config): - """Development configuration.""" - DEBUG = True - TESTING = False - -class ProductionConfig(Config): - """Production configuration.""" - DEBUG = False - TESTING = False - -class TestingConfig(Config): - """Testing configuration.""" - DEBUG = True - TESTING = True - DATABASE_PATH = str(TEST_DATABASE_PATH) - -# Configuration mapping -config = { - 'development': DevelopmentConfig, - 'production': ProductionConfig, - 'testing': TestingConfig, - 'default': DevelopmentConfig -} \ No newline at end of file diff --git a/docs/BACKEND_STATUS.md b/docs/BACKEND_STATUS.md deleted file mode 100644 index 5e87e86..0000000 --- a/docs/BACKEND_STATUS.md +++ /dev/null @@ -1,220 +0,0 @@ -# PunimTag Backend Development Status - -## ✅ Completed Features - -### 1. Configuration System (`config.py`) - -- **Jewish Organization Specific Settings**: Pre-configured with Jewish holidays, events, and locations -- **Face Recognition Configuration**: Adjustable thresholds, clustering parameters -- **Auto-tagging Settings**: Toggle-able features with confidence thresholds -- **Processing Configuration**: Batch sizes, worker settings, file format support -- **Persistent Settings**: JSON-based configuration file with load/save functionality - -**Key Features:** - -- 30+ predefined Jewish event tags (shabbat, wedding, bar_mitzvah, chanukah, etc.) -- 15+ location tags (synagogue, sanctuary, sukkah, israel, etc.) -- Configurable face recognition thresholds -- Auto-tagging enable/disable controls - -### 2. Enhanced Face Recognition (`punimtag.py` + `punimtag_simple.py`) - -- **Face Quality Scoring**: Evaluates face size and encoding variance -- **Advanced Face Clustering**: DBSCAN-based clustering for grouping unknown faces -- **Confidence-based Recognition**: Automatic vs manual identification based on thresholds -- **Multiple Face Angles**: Support for storing multiple encodings per person - -**Key Features:** - -- Face quality assessment for better training data -- Cluster unknown faces by similarity -- Sort by most frequently photographed people -- Face verification tools for double-checking identifications - -### 3. Comprehensive Database Schema - -- **Images Table**: Full metadata (GPS, camera info, dimensions, EXIF data) -- **People Table**: Named individuals with creation timestamps -- **Faces Table**: Precise face locations, encodings, confidence scores -- **Tags Table**: Categorized tagging system -- **Image-Tags Relationship**: Many-to-many tagging support - -**Performance Optimizations:** - -- Database indexes on key relationships -- Efficient foreign key constraints -- Optimized query structures - -### 4. Enhanced EXIF Metadata Extraction - -- **GPS Coordinates**: Latitude/longitude extraction with hemisphere handling -- **Camera Information**: Make, model, settings -- **Date/Time**: Photo taken timestamp -- **Error Handling**: Graceful fallbacks for missing data (defaults to "N/A") - -### 5. Advanced Search Capabilities - -- **Multi-criteria Search**: People + tags + dates + location + camera -- **Complex Queries**: Support for min_people requirements -- **Geographic Filtering**: Bounding box searches with GPS coordinates -- **Date Range Filtering**: From/to date searches -- **Result Limiting**: Pagination support - -### 6. Batch Processing for Large Collections - -- **Configurable Batch Sizes**: Process 5-10k images efficiently -- **Skip Processed Images**: Incremental processing for new photos -- **Progress Tracking**: Real-time status updates -- **Error Handling**: Continue processing despite individual failures - -### 7. Face Management Tools - -- **Cluster Assignment**: Assign entire face clusters to people -- **Face Verification**: Review all faces assigned to a person -- **Incorrect Assignment Removal**: Fix misidentifications -- **Most Common Faces**: Sort by frequency (most photographed people) - -### 8. Jewish Organization Tag Categories - -``` -Event Tags: shabbat, wedding, bar_mitzvah, bat_mitzvah, brit_milah, - baby_naming, shiva, yahrzeit, rosh_hashanah, yom_kippur, - sukkot, chanukah, purim, passover, etc. - -Location Tags: synagogue, sanctuary, social_hall, classroom, library, - kitchen, sukkah, israel, jerusalem, etc. - -Activity Tags: praying, studying, celebrating, socializing, ceremony, - performance, eating, etc. -``` - -## 🧪 Testing Status - -### Core Functionality Tests ✅ - -- ✅ Database creation and schema validation -- ✅ Configuration system load/save -- ✅ People and tag management -- ✅ Basic search functionality -- ✅ EXIF metadata extraction -- ✅ Face encoding storage/retrieval - -### Simplified Backend (`punimtag_simple.py`) ✅ - -- ✅ Working without sklearn dependencies -- ✅ Core face recognition functionality -- ✅ Database operations validated -- ✅ Tag and people management working -- ✅ Search queries functional - -### Performance Tests 📋 (Ready for testing) - -- **Created but not run**: 1000+ face clustering test -- **Created but not run**: Large dataset search performance -- **Created but not run**: Batch processing with 5-10k images - -## 🔧 Technical Implementation - -### Dependencies Status - -| Package | Status | Purpose | -| ---------------- | ----------- | ------------------------------- | -| face_recognition | ✅ Working | Core face detection/recognition | -| numpy | ✅ Working | Array operations | -| Pillow | ✅ Working | Image processing and EXIF | -| sqlite3 | ✅ Working | Database operations | -| scikit-learn | ⚠️ Optional | Advanced clustering (DBSCAN) | -| opencv-python | ⚠️ Optional | GUI face viewer | - -### Performance Optimizations Implemented - -1. **Database Indexes**: On faces(person_id), faces(image_id), image_tags -2. **Batch Processing**: Configurable batch sizes (default: 100) -3. **Incremental Processing**: Skip already processed images -4. **Efficient Queries**: Optimized JOIN operations for search -5. **Memory Management**: Process images one at a time - -### Error Handling - -- ✅ Graceful EXIF extraction failures -- ✅ Missing file handling -- ✅ Database constraint violations -- ✅ Face detection errors -- ✅ Configuration file corruption - -## 📊 Current Database Schema - -```sql --- Core tables with relationships -images (id, path, filename, date_taken, latitude, longitude, camera_make, ...) -people (id, name, created_at) -faces (id, image_id, person_id, top, right, bottom, left, encoding, confidence, ...) -tags (id, name, category, created_at) -image_tags (image_id, tag_id, created_at) - --- Indexes for performance -idx_faces_person, idx_faces_image, idx_image_tags_image, idx_image_tags_tag -``` - -## 🎯 Backend Readiness Assessment - -### ✅ Ready for GUI Development - -The backend is **production-ready** for GUI development with the following capabilities: - -1. **Face Recognition Pipeline**: Complete face detection → encoding → identification -2. **Database Operations**: All CRUD operations for images, people, faces, tags -3. **Search Engine**: Complex multi-criteria search functionality -4. **Jewish Org Features**: Pre-configured with relevant tags and categories -5. **Configuration System**: User-configurable settings -6. **Performance**: Optimized for 5-10k image collections - -### 🔄 Next Steps for GUI - -1. **Face Clustering Interface**: Visual display of clustered unknown faces -2. **Interactive Identification**: Click-to-identify unknown faces -3. **Search Interface**: Form-based search with filters -4. **Tag Management**: Visual tag assignment and management -5. **Statistics Dashboard**: Charts and graphs of collection data -6. **Face Verification**: Review and correct face assignments - -### 📋 Optional Enhancements (Post-GUI) - -- [ ] Hebrew calendar integration for automatic holiday tagging -- [ ] Advanced clustering with scikit-learn when available -- [ ] Thumbnail generation for faster GUI loading -- [ ] Export functionality (albums, tagged collections) -- [ ] Import from other photo management systems - -## 🚀 Deployment Notes - -### For Production Use: - -1. **Install Core Dependencies**: `pip install face_recognition pillow numpy` -2. **Optional GUI Dependencies**: `pip install opencv-python scikit-learn` -3. **Create Configuration**: Run `python config.py` to generate default config -4. **Initialize Database**: Run `python punimtag_simple.py` to create tables -5. **Add Photos**: Place images in `photos/` directory -6. **Process Images**: Run the main processing script - -### Performance Recommendations: - -- **For 1k-5k images**: Use default batch size (100) -- **For 5k-10k images**: Increase batch size to 200-500 -- **For 10k+ images**: Consider database optimization and larger batches - -## 🏁 Conclusion - -**The PunimTag backend is fully functional and ready for GUI development.** - -All core requirements have been implemented: - -- ✅ Face recognition with identification -- ✅ Complex search capabilities -- ✅ Jewish organization specific features -- ✅ Comprehensive tagging system -- ✅ CRUD interface for all entities -- ✅ Performance optimizations for large collections -- ✅ Configuration system with auto-tagging controls - -The system is tested, documented, and ready to support a GUI interface that will provide all the functionality requested in the original requirements. diff --git a/docs/IDEAS.md b/docs/IDEAS.md deleted file mode 100644 index 0a6f14f..0000000 --- a/docs/IDEAS.md +++ /dev/null @@ -1,194 +0,0 @@ -# PunimTag - Future Enhancement Ideas - -## 🎯 Core Improvements - -### 1. Enhanced Face Recognition - -- **Multi-angle face training**: Store multiple angles of the same person for better recognition -- **Face quality scoring**: Rate face image quality and use only high-quality samples for training -- **Age progression handling**: Account for aging when matching faces across time periods -- **Expression normalization**: Better handle different facial expressions -- **Confidence thresholds**: User-configurable confidence levels for automatic vs manual identification - -### 2. Performance Optimizations - -- **Incremental processing**: Only process new/modified images -- **Parallel processing**: Use multiprocessing for faster batch operations -- **Face encoding cache**: Cache encodings to avoid recomputation -- **Thumbnail generation**: Create and store thumbnails for faster UI display -- **Database indexing**: Optimize queries with better indexes and query plans - -### 3. Advanced Tagging - -- **AI-powered auto-tagging**: - - Scene detection (beach, mountain, city, etc.) - - Object detection (cars, pets, food, etc.) - - Activity recognition (eating, sports, working) - - Emotion detection (happy, sad, surprised) - - Indoor/outdoor classification -- **Tag hierarchies**: Parent-child tag relationships (e.g., "vacation" → "beach vacation") -- **Smart tag suggestions**: Based on similar images and user patterns -- **Batch tag operations**: Apply/remove tags from multiple images efficiently - -## 🌐 Web Interface - -### 1. Modern Web UI - -- **React/Vue.js frontend** with responsive design -- **Gallery view** with filtering and sorting -- **Face clustering visualization**: Interactive graph showing face relationships -- **Drag-and-drop uploads**: Easy image addition -- **Real-time updates**: WebSocket for live processing status - -### 2. Features - -- **Interactive face identification**: Click faces to identify them -- **Tag cloud**: Visual representation of tag frequency -- **Timeline view**: Browse photos chronologically -- **Map view**: Show photos on a map using GPS data -- **Slideshow mode**: With face and tag filters - -## 🔗 Integrations - -### 1. Cloud Storage - -- **Google Photos sync**: Import/export with Google Photos -- **iCloud integration**: Sync with Apple Photos -- **Dropbox/OneDrive**: Monitor folders for new images -- **S3 compatibility**: Store images in cloud storage - -### 2. Social Media - -- **Facebook integration**: Import tagged faces (with permission) -- **Instagram import**: Bring in photos with hashtags as tags -- **Privacy-aware sharing**: Share photos only with people in them - -## 🛡️ Privacy & Security - -### 1. Privacy Features - -- **Face anonymization**: Blur unidentified faces on export -- **Consent management**: Track consent for face recognition -- **GDPR compliance**: Right to be forgotten, data export -- **Encryption**: Client-side encryption option -- **Access controls**: User/group permissions - -### 2. Backup & Recovery - -- **Automated backups**: Scheduled database and image backups -- **Version control**: Track changes to face identifications -- **Disaster recovery**: Restore from backups easily -- **Export formats**: Multiple export options (JSON, CSV, etc.) - -## 🤖 AI Enhancements - -### 1. Advanced ML Features - -- **Face clustering improvements**: Use deep learning for better grouping -- **Duplicate detection**: Find and manage similar photos -- **Photo quality assessment**: Identify blurry/poor quality images -- **Automatic album creation**: Group photos by events -- **Style transfer**: Apply artistic filters based on tags - -### 2. Natural Language Processing - -- **Natural language search**: "Show me beach photos with John from last summer" -- **Voice commands**: Control the app with voice -- **Caption generation**: Auto-generate photo descriptions -- **Story creation**: Generate photo stories/albums automatically - -## 🔧 Developer Features - -### 1. API & Extensions - -- **RESTful API**: Full API for third-party integration -- **GraphQL endpoint**: Flexible data querying -- **Plugin system**: Allow custom extensions -- **Webhook support**: Notify external systems of changes -- **SDK development**: Python/JavaScript SDKs - -### 2. Advanced Tools - -- **Batch processing CLI**: Command-line tools for power users -- **Migration tools**: Import from other photo management systems -- **Analytics dashboard**: Usage statistics and insights -- **Performance monitoring**: Track system performance - -## 📊 Analytics & Insights - -### 1. Photo Statistics - -- **Face frequency**: Most photographed people -- **Tag analytics**: Most used tags over time -- **Location heatmap**: Where most photos are taken -- **Time patterns**: When photos are typically taken -- **Relationship graphs**: Visualize people connections - -### 2. Personal Insights - -- **Year in review**: Automated yearly summaries -- **Memory reminders**: "On this day" features -- **Growth tracking**: Watch children grow over time -- **Event detection**: Automatically identify special events - -## 🎨 Creative Features - -### 1. Photo Enhancement - -- **Automatic enhancement**: AI-powered photo improvement -- **Red-eye removal**: Automatic detection and correction -- **Background replacement**: Change photo backgrounds -- **Face beautification**: Optional beauty filters - -### 2. Creative Tools - -- **Collage generation**: Auto-create collages by tags/people -- **Photo books**: Design and export photo books -- **Video generation**: Create videos from photo sets -- **AR features**: View photos in augmented reality - -## 🔮 Future Technologies - -### 1. Emerging Tech - -- **Blockchain**: Decentralized photo ownership proof -- **IPFS storage**: Distributed photo storage -- **Edge AI**: On-device processing for privacy -- **5G optimization**: Fast mobile sync and processing - -### 2. Experimental Features - -- **3D face modeling**: Create 3D models from multiple photos -- **Time-lapse generation**: Show aging/changes over time -- **DeepFake detection**: Identify manipulated images -- **Holographic displays**: Future display technology support - -## 📋 Implementation Priority - -### Phase 1 (Next 3 months) - -1. Web UI basic implementation -2. Performance optimizations -3. Better error handling -4. Basic auto-tagging - -### Phase 2 (6 months) - -1. Mobile PWA -2. Cloud storage integration -3. Advanced search -4. API development - -### Phase 3 (1 year) - -1. AI enhancements -2. Social integrations -3. Analytics dashboard -4. Plugin system - -### Long-term (2+ years) - -1. Native mobile apps -2. Blockchain integration -3. AR/VR features -4. Advanced AI features diff --git a/docs/TESTING_GUIDE.md b/docs/TESTING_GUIDE.md deleted file mode 100644 index b789483..0000000 --- a/docs/TESTING_GUIDE.md +++ /dev/null @@ -1,283 +0,0 @@ -# PunimTag Testing Guide - -## 🧪 Testing with Real Images - -### Step 1: Prepare Your Test Images - -1. **Create/Use Photos Directory**: - - ```bash - mkdir -p photos - ``` - -2. **Add Test Images**: - - - Copy 10-20 photos with faces to the `photos/` directory - - Supported formats: `.jpg`, `.jpeg`, `.png`, `.bmp`, `.tiff`, `.gif` - - For best results, use photos with clear, front-facing faces - - Include photos with the same people for face recognition testing - -3. **Organize by Subdirectories** (optional): - ``` - photos/ - ├── events/ - │ ├── wedding_2023/ - │ └── bar_mitzvah/ - ├── family/ - └── synagogue/ - ``` - -### Step 2: Process Images - -```bash -# Process all images in photos directory -python punimtag_simple.py -``` - -This will: - -- Scan all images in `photos/` directory (including subdirectories) -- Extract EXIF metadata (GPS, camera info, dates) -- Detect all faces and create encodings -- Store everything in `punimtag_simple.db` - -### Step 3: Inspect Results - -```bash -# Check what was processed -python db_manager.py -# Choose option 1 to inspect database -``` - -### Step 4: Identify People (Interactive) - -```bash -# Use the CLI face identifier -python interactive_identifier.py -``` - -This will show you unidentified faces and let you name them. - -### Step 5: Add Tags - -```bash -# Use the tag manager -python tag_manager.py -``` - -Add Jewish organization specific tags like: - -- Events: `shabbat`, `wedding`, `bar_mitzvah`, `chanukah` -- Locations: `synagogue`, `home`, `israel` -- Activities: `praying`, `celebrating`, `studying` - -## 🧹 Database Management - -### Clean Database (Keep Schema) - -```bash -python db_manager.py -# Choose option 2 -``` - -- Removes all data but keeps tables -- Creates automatic backup first - -### Delete Database Completely - -```bash -python db_manager.py -# Choose option 3 -``` - -- Deletes entire database file -- Creates automatic backup first - -### Inspect Database - -```bash -python db_manager.py -# Choose option 1 -``` - -Shows: - -- Image/face/people counts -- Top people by frequency -- Most used tags -- Database file size - -## 🔍 Testing Search Functionality - -### Basic Search Test - -```python -from punimtag_simple import SimplePunimTag - -tagger = SimplePunimTag() - -# Search by person -results = tagger.simple_search(people=["Rabbi Cohen"]) -print(f"Found {len(results)} images with Rabbi Cohen") - -# Search by tag -results = tagger.simple_search(tags=["wedding"]) -print(f"Found {len(results)} wedding images") - -# Combined search -results = tagger.simple_search( - people=["Sarah Goldberg"], - tags=["shabbat"] -) -print(f"Found {len(results)} images of Sarah at Shabbat") - -tagger.close() -``` - -## 📊 Performance Testing - -### Test with Different Collection Sizes - -1. **Small Collection (10-50 images)**: - - - Process time: ~1-5 minutes - - Good for initial testing - -2. **Medium Collection (100-500 images)**: - - - Process time: ~10-30 minutes - - Test face recognition accuracy - -3. **Large Collection (1000+ images)**: - - Process time: 1+ hours - - Test batch processing and performance - -### Monitor Performance - -```python -import time -from punimtag_simple import SimplePunimTag - -start_time = time.time() -tagger = SimplePunimTag() -processed = tagger.process_directory() -end_time = time.time() - -print(f"Processed {processed} images in {end_time - start_time:.2f} seconds") -tagger.close() -``` - -## 🎯 Testing Specific Features - -### 1. Face Recognition Accuracy - -1. Process images with same people -2. Identify some faces manually -3. Process new images with same people -4. Check if they're automatically recognized - -### 2. Jewish Organization Tags - -```python -from punimtag_simple import SimplePunimTag -from config import get_config - -config = get_config() -event_tags = config.get_tag_suggestions('event') -print("Available Jewish event tags:", event_tags[:10]) -``` - -### 3. EXIF Metadata Extraction - -```python -from punimtag_simple import SimplePunimTag - -tagger = SimplePunimTag() -metadata = tagger.extract_metadata("photos/your_image.jpg") -print("Extracted metadata:", metadata) -tagger.close() -``` - -### 4. GPS Location Data - -- Use photos taken with smartphones (usually have GPS) -- Check if latitude/longitude are extracted -- Test location-based searches - -## 🐛 Troubleshooting - -### Common Issues - -1. **"No faces detected"**: - - - Check image quality - - Ensure faces are clearly visible - - Try different lighting conditions - -2. **"EXIF data missing"**: - - - Some images don't have EXIF data - - System will default to "N/A" - - This is normal behavior - -3. **"Face recognition not working"**: - - - Need multiple photos of same person - - Faces should be front-facing and clear - - Check confidence threshold in config - -4. **"Processing is slow"**: - - Normal for large collections - - Adjust batch size in config - - Consider using smaller test set first - -### Debug Mode - -```python -# Add debug logging to see what's happening -import logging -logging.basicConfig(level=logging.DEBUG) - -from punimtag_simple import SimplePunimTag -tagger = SimplePunimTag() -# ... rest of your code -``` - -## ✅ Validation Checklist - -Before moving to GUI development, validate: - -- [ ] Images are processing without errors -- [ ] Faces are being detected correctly -- [ ] EXIF metadata is being extracted -- [ ] People can be identified and assigned -- [ ] Tags can be added and searched -- [ ] Database operations work smoothly -- [ ] Search functionality returns expected results -- [ ] Performance is acceptable for your collection size - -## 🔄 Reset for Fresh Testing - -```bash -# Clean everything and start fresh -python db_manager.py # Choose option 2 to clean -rm -f punimtag_config.json # Reset config -python config.py # Regenerate default config -``` - -## 📝 Next Steps After Testing - -Once testing is successful: - -1. **GUI Development**: Create visual interface -2. **Advanced Features**: Add clustering, verification tools -3. **Performance Optimization**: Fine-tune for your specific needs - -## 💡 Testing Tips - -1. **Start Small**: Test with 10-20 images first -2. **Use Clear Photos**: Better face detection results -3. **Same People**: Include multiple photos of same people -4. **Variety**: Test different scenarios (indoor/outdoor, events, etc.) -5. **Monitor Progress**: Watch console output during processing -6. **Backup Often**: Use database manager to create backups diff --git a/docs/api-standards.md b/docs/api-standards.md deleted file mode 100644 index bd47558..0000000 --- a/docs/api-standards.md +++ /dev/null @@ -1,335 +0,0 @@ -# PunimTag API Standards - -## Overview - -This document defines the standards for designing and implementing API endpoints in PunimTag. - -## Response Format - -### Success Response - -```json -{ - "success": true, - "data": { - // Response data here - }, - "message": "Optional success message" -} -``` - -### Error Response - -```json -{ - "success": false, - "error": "Descriptive error message", - "code": "ERROR_CODE_OPTIONAL" -} -``` - -### Paginated Response - -```json -{ - "success": true, - "data": { - "items": [...], - "pagination": { - "page": 1, - "per_page": 20, - "total": 150, - "pages": 8 - } - } -} -``` - -## HTTP Status Codes - -### Success Codes - -- **200 OK**: Request successful -- **201 Created**: Resource created successfully -- **204 No Content**: Request successful, no content to return - -### Client Error Codes - -- **400 Bad Request**: Invalid request data -- **401 Unauthorized**: Authentication required -- **403 Forbidden**: Access denied -- **404 Not Found**: Resource not found -- **409 Conflict**: Resource conflict -- **422 Unprocessable Entity**: Validation error - -### Server Error Codes - -- **500 Internal Server Error**: Server error -- **503 Service Unavailable**: Service temporarily unavailable - -## Endpoint Naming Conventions - -### RESTful Patterns - -- **GET /photos**: List photos -- **GET /photos/{id}**: Get specific photo -- **POST /photos**: Create new photo -- **PUT /photos/{id}**: Update photo -- **DELETE /photos/{id}**: Delete photo - -### Custom Actions - -- **POST /photos/{id}/identify**: Identify faces in photo -- **POST /photos/{id}/duplicates**: Find duplicates -- **GET /photos/{id}/faces**: Get faces in photo - -## Request Parameters - -### Query Parameters - -```python -# Standard pagination -page = request.args.get('page', 1, type=int) -per_page = request.args.get('per_page', 20, type=int) - -# Filtering -filter_name = request.args.get('filter', '') -sort_by = request.args.get('sort', 'date_taken') -sort_order = request.args.get('order', 'desc') -``` - -### JSON Body Parameters - -```python -# Validate required fields -data = request.get_json() -if not data: - return jsonify({'success': False, 'error': 'No JSON data provided'}), 400 - -required_fields = ['name', 'email'] -for field in required_fields: - if field not in data: - return jsonify({'success': False, 'error': f'Missing required field: {field}'}), 400 -``` - -## Error Handling - -### Standard Error Handler - -```python -@app.errorhandler(404) -def not_found(error): - return jsonify({ - 'success': False, - 'error': 'Resource not found', - 'code': 'NOT_FOUND' - }), 404 - -@app.errorhandler(500) -def internal_error(error): - return jsonify({ - 'success': False, - 'error': 'Internal server error', - 'code': 'INTERNAL_ERROR' - }), 500 -``` - -### Validation Errors - -```python -def validate_photo_data(data): - errors = [] - - if 'filename' not in data: - errors.append('filename is required') - - if 'path' in data and not os.path.exists(data['path']): - errors.append('file path does not exist') - - return errors - -# Usage in endpoint -errors = validate_photo_data(data) -if errors: - return jsonify({ - 'success': False, - 'error': 'Validation failed', - 'details': errors - }), 422 -``` - -## Database Operations - -### Connection Management - -```python -def get_db_connection(): - conn = sqlite3.connect('punimtag_simple.db') - conn.row_factory = sqlite3.Row # Enable dict-like access - return conn - -# Usage in endpoint -try: - conn = get_db_connection() - cursor = conn.cursor() - # Database operations - conn.commit() -except Exception as e: - conn.rollback() - return jsonify({'success': False, 'error': str(e)}), 500 -finally: - conn.close() -``` - -### Parameterized Queries - -```python -# Always use parameterized queries to prevent SQL injection -cursor.execute('SELECT * FROM images WHERE id = ?', (image_id,)) -cursor.execute('INSERT INTO photos (name, path) VALUES (?, ?)', (name, path)) -``` - -## Rate Limiting - -### Basic Rate Limiting - -```python -from functools import wraps -import time - -def rate_limit(requests_per_minute=60): - def decorator(f): - @wraps(f) - def wrapped(*args, **kwargs): - # Implement rate limiting logic here - return f(*args, **kwargs) - return wrapped - return decorator - -# Usage -@app.route('/api/photos') -@rate_limit(requests_per_minute=30) -def get_photos(): - # Endpoint implementation - pass -``` - -## Caching - -### Response Caching - -```python -from functools import wraps -import hashlib -import json - -def cache_response(ttl_seconds=300): - def decorator(f): - @wraps(f) - def wrapped(*args, **kwargs): - # Implement caching logic here - return f(*args, **kwargs) - return wrapped - return decorator - -# Usage -@app.route('/api/photos') -@cache_response(ttl_seconds=60) -def get_photos(): - # Endpoint implementation - pass -``` - -## Logging - -### Request Logging - -```python -import logging - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -@app.before_request -def log_request(): - logger.info(f'{request.method} {request.path} - {request.remote_addr}') - -@app.after_request -def log_response(response): - logger.info(f'Response: {response.status_code}') - return response -``` - -## Security - -### Input Sanitization - -```python -import re - -def sanitize_filename(filename): - # Remove dangerous characters - filename = re.sub(r'[<>:"/\\|?*]', '', filename) - # Limit length - return filename[:255] - -def validate_file_type(filename): - allowed_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp'} - ext = os.path.splitext(filename)[1].lower() - return ext in allowed_extensions -``` - -### CORS Headers - -```python -@app.after_request -def add_cors_headers(response): - response.headers['Access-Control-Allow-Origin'] = '*' - response.headers['Access-Control-Allow-Methods'] = 'GET, POST, PUT, DELETE, OPTIONS' - response.headers['Access-Control-Allow-Headers'] = 'Content-Type' - return response -``` - -## Testing - -### Endpoint Testing - -```python -def test_get_photos(): - response = app.test_client().get('/api/photos') - assert response.status_code == 200 - data = json.loads(response.data) - assert data['success'] == True - assert 'data' in data - -def test_create_photo(): - response = app.test_client().post('/api/photos', - json={'filename': 'test.jpg', 'path': '/test/path'}) - assert response.status_code == 201 - data = json.loads(response.data) - assert data['success'] == True -``` - -## Documentation - -### Endpoint Documentation - -```python -@app.route('/api/photos', methods=['GET']) -def get_photos(): - """ - Get a list of photos with optional filtering and pagination. - - Query Parameters: - page (int): Page number (default: 1) - per_page (int): Items per page (default: 20) - filter (str): Filter by name or tags - sort (str): Sort field (default: date_taken) - order (str): Sort order (asc/desc, default: desc) - - Returns: - JSON response with photos and pagination info - """ - # Implementation - pass -``` diff --git a/docs/code-conventions.md b/docs/code-conventions.md deleted file mode 100644 index 8268282..0000000 --- a/docs/code-conventions.md +++ /dev/null @@ -1,725 +0,0 @@ -# PunimTag Code Conventions - -## Overview - -This document defines the coding standards and conventions for PunimTag development. - -## Python Conventions - -### Code Style - -Follow PEP 8 with these specific guidelines: - -```python -# Imports -import os -import sys -from typing import List, Dict, Optional -from flask import Flask, request, jsonify - -# Constants -MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB -ALLOWED_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif'} - -# Functions -def process_image(image_path: str, max_size: int = MAX_FILE_SIZE) -> Dict[str, any]: - """ - Process an image file and extract metadata. - - Args: - image_path: Path to the image file - max_size: Maximum file size in bytes - - Returns: - Dictionary containing image metadata - - Raises: - FileNotFoundError: If image file doesn't exist - ValueError: If file size exceeds limit - """ - if not os.path.exists(image_path): - raise FileNotFoundError(f"Image file not found: {image_path}") - - file_size = os.path.getsize(image_path) - if file_size > max_size: - raise ValueError(f"File size {file_size} exceeds limit {max_size}") - - # Process the image - metadata = extract_metadata(image_path) - return metadata - -# Classes -class ImageProcessor: - """Handles image processing operations.""" - - def __init__(self, config: Dict[str, any]): - """ - Initialize the image processor. - - Args: - config: Configuration dictionary - """ - self.config = config - self.supported_formats = config.get('supported_formats', ALLOWED_EXTENSIONS) - - def process_batch(self, image_paths: List[str]) -> List[Dict[str, any]]: - """ - Process multiple images in batch. - - Args: - image_paths: List of image file paths - - Returns: - List of processed image metadata - """ - results = [] - for path in image_paths: - try: - result = self.process_single(path) - results.append(result) - except Exception as e: - logger.error(f"Failed to process {path}: {e}") - results.append({'error': str(e), 'path': path}) - - return results -``` - -### Naming Conventions - -#### Variables and Functions - -```python -# Use snake_case for variables and functions -user_name = "john_doe" -photo_count = 150 -max_file_size = 10 * 1024 * 1024 - -def get_user_photos(user_id: int) -> List[Dict]: - """Get photos for a specific user.""" - pass - -def calculate_face_similarity(face1: List[float], face2: List[float]) -> float: - """Calculate similarity between two face encodings.""" - pass -``` - -#### Classes - -```python -# Use PascalCase for classes -class PhotoManager: - """Manages photo operations.""" - pass - -class FaceRecognitionEngine: - """Handles face recognition operations.""" - pass -``` - -#### Constants - -```python -# Use UPPER_CASE for constants -DATABASE_PATH = "punimtag_simple.db" -MAX_THUMBNAIL_SIZE = (200, 200) -DEFAULT_PAGE_SIZE = 20 -``` - -### Type Hints - -```python -from typing import List, Dict, Optional, Union, Tuple - -def get_photos( - user_id: int, - page: int = 1, - per_page: int = DEFAULT_PAGE_SIZE, - filters: Optional[Dict[str, any]] = None -) -> Dict[str, Union[List[Dict], int]]: - """ - Get photos with pagination and filtering. - - Returns: - Dictionary with 'photos' list and 'total' count - """ - pass - -def process_face_encodings( - encodings: List[List[float]] -) -> Tuple[List[float], float]: - """ - Process face encodings and return average encoding and confidence. - - Returns: - Tuple of (average_encoding, confidence_score) - """ - pass -``` - -### Error Handling - -```python -import logging -from typing import Optional - -logger = logging.getLogger(__name__) - -def safe_operation(func): - """Decorator for safe operation execution.""" - def wrapper(*args, **kwargs): - try: - return func(*args, **kwargs) - except Exception as e: - logger.error(f"Error in {func.__name__}: {e}") - return None - return wrapper - -@safe_operation -def load_image_safely(image_path: str) -> Optional[PIL.Image.Image]: - """Load image with error handling.""" - return PIL.Image.open(image_path) - -def process_user_request(user_data: Dict) -> Dict[str, any]: - """Process user request with comprehensive error handling.""" - try: - # Validate input - if not user_data.get('user_id'): - return {'success': False, 'error': 'Missing user_id'} - - # Process request - result = perform_operation(user_data) - return {'success': True, 'data': result} - - except ValueError as e: - logger.warning(f"Validation error: {e}") - return {'success': False, 'error': str(e)} - except FileNotFoundError as e: - logger.error(f"File not found: {e}") - return {'success': False, 'error': 'File not found'} - except Exception as e: - logger.error(f"Unexpected error: {e}") - return {'success': False, 'error': 'Internal server error'} -``` - -## JavaScript Conventions - -### Code Style - -Follow ESLint with these specific guidelines: - -```javascript -// Constants -const MAX_FILE_SIZE = 10 * 1024 * 1024; // 10MB -const ALLOWED_EXTENSIONS = [".jpg", ".jpeg", ".png", ".gif"]; - -// Functions -function processImage(imagePath, maxSize = MAX_FILE_SIZE) { - /** - * Process an image file and extract metadata. - * @param {string} imagePath - Path to the image file - * @param {number} maxSize - Maximum file size in bytes - * @returns {Promise} Image metadata - */ - return new Promise((resolve, reject) => { - if (!imagePath) { - reject(new Error("Image path is required")); - return; - } - - // Process the image - resolve(extractMetadata(imagePath)); - }); -} - -// Classes -class ImageProcessor { - /** - * Handles image processing operations. - * @param {Object} config - Configuration object - */ - constructor(config) { - this.config = config; - this.supportedFormats = config.supportedFormats || ALLOWED_EXTENSIONS; - } - - /** - * Process multiple images in batch. - * @param {string[]} imagePaths - Array of image file paths - * @returns {Promise} Array of processed image metadata - */ - async processBatch(imagePaths) { - const results = []; - - for (const path of imagePaths) { - try { - const result = await this.processSingle(path); - results.push(result); - } catch (error) { - console.error(`Failed to process ${path}:`, error); - results.push({ error: error.message, path }); - } - } - - return results; - } -} -``` - -### Naming Conventions - -#### Variables and Functions - -```javascript -// Use camelCase for variables and functions -const userName = "johnDoe"; -const photoCount = 150; -const maxFileSize = 10 * 1024 * 1024; - -function getUserPhotos(userId) { - // Get photos for a specific user -} - -function calculateFaceSimilarity(face1, face2) { - // Calculate similarity between two face encodings -} -``` - -#### Classes - -```javascript -// Use PascalCase for classes -class PhotoManager { - // Manages photo operations -} - -class FaceRecognitionEngine { - // Handles face recognition operations -} -``` - -#### Constants - -```javascript -// Use UPPER_SNAKE_CASE for constants -const DATABASE_PATH = "punimtag_simple.db"; -const MAX_THUMBNAIL_SIZE = { width: 200, height: 200 }; -const DEFAULT_PAGE_SIZE = 20; -``` - -### Error Handling - -```javascript -// Async/await with try-catch -async function processUserRequest(userData) { - try { - // Validate input - if (!userData.userId) { - return { success: false, error: "Missing userId" }; - } - - // Process request - const result = await performOperation(userData); - return { success: true, data: result }; - } catch (error) { - console.error("Error processing request:", error); - return { success: false, error: "Internal server error" }; - } -} - -// Promise-based error handling -function loadImageSafely(imagePath) { - return new Promise((resolve, reject) => { - if (!imagePath) { - reject(new Error("Image path is required")); - return; - } - - // Load image logic - resolve(imageData); - }).catch((error) => { - console.error("Error loading image:", error); - return null; - }); -} -``` - -## Database Conventions - -### Table Naming - -```sql --- Use snake_case for table names -CREATE TABLE user_profiles ( - id INTEGER PRIMARY KEY, - user_name TEXT NOT NULL, - email_address TEXT UNIQUE, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP -); - -CREATE TABLE photo_metadata ( - id INTEGER PRIMARY KEY, - image_id INTEGER REFERENCES images(id), - exif_data TEXT, - gps_coordinates TEXT, - processing_status TEXT DEFAULT 'pending' -); -``` - -### Column Naming - -```sql --- Use snake_case for column names -CREATE TABLE images ( - id INTEGER PRIMARY KEY, - file_name TEXT NOT NULL, - file_path TEXT NOT NULL, - file_size INTEGER, - date_taken TIMESTAMP, - upload_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - is_processed BOOLEAN DEFAULT FALSE -); -``` - -### Index Naming - -```sql --- Use descriptive names for indexes -CREATE INDEX idx_images_date_taken ON images(date_taken); -CREATE INDEX idx_faces_person_id ON faces(person_id); -CREATE INDEX idx_photos_user_id_date ON photos(user_id, date_taken); -``` - -## File Organization - -### Python Files - -```python -# File: src/backend/photo_manager.py -""" -Photo management module. - -This module handles all photo-related operations including -upload, processing, and metadata extraction. -""" - -import os -import logging -from typing import List, Dict, Optional -from PIL import Image - -# Constants -MAX_FILE_SIZE = 10 * 1024 * 1024 -ALLOWED_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif'} - -# Logging -logger = logging.getLogger(__name__) - -class PhotoManager: - """Manages photo operations.""" - - def __init__(self, config: Dict[str, any]): - self.config = config - self.storage_path = config.get('storage_path', './photos') - - def process_photo(self, photo_path: str) -> Dict[str, any]: - """Process a single photo.""" - # Implementation - pass - -# Main execution (if applicable) -if __name__ == "__main__": - # Test or standalone execution - pass -``` - -### JavaScript Files - -```javascript -// File: src/frontend/photoManager.js -/** - * Photo management module. - * - * This module handles all photo-related operations including - * upload, processing, and metadata extraction. - */ - -// Constants -const MAX_FILE_SIZE = 10 * 1024 * 1024; -const ALLOWED_EXTENSIONS = [".jpg", ".jpeg", ".png", ".gif"]; - -// Logging -const logger = { - info: (msg) => console.log(`[INFO] ${msg}`), - error: (msg) => console.error(`[ERROR] ${msg}`), - warn: (msg) => console.warn(`[WARN] ${msg}`), -}; - -class PhotoManager { - /** - * Manages photo operations. - * @param {Object} config - Configuration object - */ - constructor(config) { - this.config = config; - this.storagePath = config.storagePath || "./photos"; - } - - /** - * Process a single photo. - * @param {string} photoPath - Path to the photo - * @returns {Promise} Processing result - */ - async processPhoto(photoPath) { - // Implementation - } -} - -// Export for module systems -if (typeof module !== "undefined" && module.exports) { - module.exports = PhotoManager; -} -``` - -## Documentation Standards - -### Function Documentation - -```python -def extract_face_features(image_path: str, face_coordinates: Tuple[int, int, int, int]) -> List[float]: - """ - Extract face features from an image region. - - This function takes an image and face coordinates, then extracts - 128-dimensional feature vectors using dlib's face recognition model. - - Args: - image_path: Path to the source image file - face_coordinates: Tuple of (left, top, right, bottom) coordinates - - Returns: - List of 128 float values representing face features - - Raises: - FileNotFoundError: If image file doesn't exist - ValueError: If face coordinates are invalid - RuntimeError: If face recognition model fails - - Example: - >>> coords = (100, 100, 200, 200) - >>> features = extract_face_features("photo.jpg", coords) - >>> len(features) - 128 - """ - pass -``` - -### Class Documentation - -```python -class FaceRecognitionEngine: - """ - Engine for face recognition operations. - - This class provides methods for detecting faces in images, - extracting face features, and comparing face similarities. - - Attributes: - model_path (str): Path to the face recognition model - confidence_threshold (float): Minimum confidence for face detection - max_faces (int): Maximum number of faces to detect per image - - Example: - >>> engine = FaceRecognitionEngine() - >>> faces = engine.detect_faces("group_photo.jpg") - >>> print(f"Found {len(faces)} faces") - """ - - def __init__(self, model_path: str = None, confidence_threshold: float = 0.6): - """ - Initialize the face recognition engine. - - Args: - model_path: Path to the face recognition model file - confidence_threshold: Minimum confidence for face detection - """ - pass -``` - -## Testing Conventions - -### Test File Structure - -```python -# File: tests/unit/test_photo_manager.py -""" -Unit tests for PhotoManager class. -""" - -import pytest -from unittest.mock import Mock, patch -from src.backend.photo_manager import PhotoManager - -class TestPhotoManager: - """Test cases for PhotoManager class.""" - - @pytest.fixture - def photo_manager(self): - """Create a PhotoManager instance for testing.""" - config = {'storage_path': '/test/path'} - return PhotoManager(config) - - def test_process_photo_with_valid_file(self, photo_manager): - """Test processing a valid photo file.""" - # Test implementation - pass - - def test_process_photo_with_invalid_file(self, photo_manager): - """Test processing an invalid photo file.""" - # Test implementation - pass -``` - -## Git Conventions - -### Commit Messages - -``` -feat: add face recognition feature -fix: resolve duplicate photo detection issue -docs: update API documentation -test: add unit tests for photo processing -refactor: improve error handling in face detection -style: format code according to PEP 8 -perf: optimize thumbnail generation -chore: update dependencies -``` - -### Branch Naming - -``` -feature/face-recognition -bugfix/duplicate-detection -hotfix/security-vulnerability -docs/api-documentation -test/photo-processing -refactor/error-handling -``` - -## Performance Guidelines - -### Python Performance - -```python -# Use list comprehensions instead of loops when appropriate -# Good -squares = [x**2 for x in range(1000)] - -# Avoid -squares = [] -for x in range(1000): - squares.append(x**2) - -# Use generators for large datasets -def process_large_dataset(file_path): - """Process large dataset using generator.""" - with open(file_path, 'r') as file: - for line in file: - yield process_line(line) - -# Use appropriate data structures -from collections import defaultdict, Counter - -# Use defaultdict for counting -word_count = defaultdict(int) -for word in words: - word_count[word] += 1 - -# Use Counter for frequency analysis -word_freq = Counter(words) -``` - -### JavaScript Performance - -```javascript -// Use appropriate array methods -// Good -const squares = Array.from({ length: 1000 }, (_, i) => i ** 2); - -// Avoid -const squares = []; -for (let i = 0; i < 1000; i++) { - squares.push(i ** 2); -} - -// Use async/await for I/O operations -async function processImages(imagePaths) { - const results = await Promise.all( - imagePaths.map((path) => processImage(path)) - ); - return results; -} - -// Use appropriate data structures -const wordCount = new Map(); -words.forEach((word) => { - wordCount.set(word, (wordCount.get(word) || 0) + 1); -}); -``` - -## Security Guidelines - -### Input Validation - -```python -import re -from pathlib import Path - -def validate_filename(filename: str) -> bool: - """Validate filename for security.""" - # Check for dangerous characters - dangerous_chars = r'[<>:"/\\|?*]' - if re.search(dangerous_chars, filename): - return False - - # Check for path traversal - if '..' in filename or filename.startswith('/'): - return False - - # Check length - if len(filename) > 255: - return False - - return True - -def sanitize_user_input(user_input: str) -> str: - """Sanitize user input to prevent injection attacks.""" - # Remove HTML tags - import html - sanitized = html.escape(user_input) - - # Remove SQL injection patterns - sql_patterns = [';', '--', '/*', '*/', 'union', 'select', 'drop'] - for pattern in sql_patterns: - sanitized = sanitized.replace(pattern.lower(), '') - - return sanitized -``` - -### Database Security - -```python -# Always use parameterized queries -def get_user_photos(user_id: int): - """Get photos for a user using parameterized query.""" - cursor.execute( - 'SELECT * FROM photos WHERE user_id = ?', - (user_id,) - ) - return cursor.fetchall() - -# Never use string formatting for SQL -# BAD - vulnerable to SQL injection -def bad_get_user_photos(user_id: int): - cursor.execute(f'SELECT * FROM photos WHERE user_id = {user_id}') - return cursor.fetchall() -``` diff --git a/docs/product.md b/docs/product.md deleted file mode 100644 index a9af249..0000000 --- a/docs/product.md +++ /dev/null @@ -1,69 +0,0 @@ -# PunimTag Product Vision - -## Overview - -PunimTag is an intelligent photo management system that uses face recognition to automatically organize, tag, and manage personal photo collections. - -## Core Value Proposition - -- **Automatic Face Recognition**: Identify and tag people in photos without manual effort -- **Smart Organization**: Group photos by people, events, and locations -- **Duplicate Detection**: Find and manage duplicate photos automatically -- **Intuitive Interface**: Web-based GUI that's easy to use for non-technical users -- **Privacy-First**: Local processing, no cloud dependencies - -## Target Users - -- **Primary**: Individuals with large photo collections (families, photographers, content creators) -- **Secondary**: Small businesses needing photo organization (real estate, events, etc.) - -## Key Features - -### 1. Photo Management - -- Upload and organize photos by date, location, and content -- Automatic metadata extraction (EXIF data, GPS coordinates) -- Batch operations for efficiency - -### 2. Face Recognition & Tagging - -- Automatic face detection in photos -- Face identification and naming -- Group photos by people -- Handle multiple faces per photo - -### 3. Duplicate Management - -- Find duplicate photos automatically -- Visual comparison tools -- Bulk removal options -- Keep best quality versions - -### 4. Search & Discovery - -- Search by person name -- Filter by date ranges -- Tag-based filtering -- Similar face suggestions - -### 5. User Experience - -- Progressive loading for large collections -- Responsive web interface -- Custom dialogs (no browser alerts) -- Real-time notifications - -## Success Metrics - -- **User Engagement**: Time spent organizing photos -- **Accuracy**: Face recognition precision -- **Performance**: Load times for large collections -- **Usability**: User satisfaction and ease of use - -## Future Roadmap - -- Cloud sync capabilities -- Mobile app companion -- Advanced AI features (emotion detection, age progression) -- Social sharing features -- Integration with existing photo services diff --git a/docs/structure.md b/docs/structure.md deleted file mode 100644 index 9ed87cf..0000000 --- a/docs/structure.md +++ /dev/null @@ -1,109 +0,0 @@ -# PunimTag Project Structure - -## Directory Organization - -``` -PunimTag/ -├── src/ # Main application source code -│ ├── backend/ # Flask backend and API -│ ├── frontend/ # JavaScript and UI components -│ └── utils/ # Utility functions and helpers -├── docs/ # Documentation and steering documents -├── tests/ # All test files and test utilities -├── data/ # Database files and user data -├── assets/ # Static assets (images, CSS, etc.) -├── config/ # Configuration files -└── scripts/ # Build and deployment scripts -``` - -## Core Components - -### Backend (Flask) - -- **Main Application**: `simple_web_gui.py` - Primary Flask app -- **Database Management**: `db_manager.py` - Database operations -- **Face Recognition**: `visual_identifier.py` - Face detection and recognition -- **Configuration**: `config.py` - App configuration - -### Frontend (JavaScript) - -- **UI Components**: Embedded in Flask templates -- **Progressive Loading**: Handles large photo collections -- **Custom Dialogs**: Replaces browser alerts -- **Face Management**: Face identification and tagging interface - -### Data Layer - -- **SQLite Database**: `punimtag_simple.db` - Main database -- **Image Storage**: `photos/` directory -- **Thumbnails**: Generated on-demand -- **Face Encodings**: Stored as binary data - -## Architecture Principles - -### 1. Separation of Concerns - -- **Backend**: Business logic, data processing, API endpoints -- **Frontend**: User interface, interactions, state management -- **Data**: Persistent storage, caching, optimization - -### 2. Progressive Enhancement - -- **Core Functionality**: Works without JavaScript -- **Enhanced Features**: Progressive loading, real-time updates -- **Fallbacks**: Graceful degradation for older browsers - -### 3. Performance Optimization - -- **Lazy Loading**: Images and data loaded on demand -- **Caching**: Thumbnails and frequently accessed data -- **Batch Operations**: Efficient bulk processing - -### 4. User Experience - -- **Responsive Design**: Works on all screen sizes -- **Accessibility**: Keyboard navigation, screen reader support -- **Error Handling**: Graceful error recovery and user feedback - -## File Naming Conventions - -### Python Files - -- **snake_case** for file names and functions -- **PascalCase** for classes -- **UPPER_CASE** for constants - -### JavaScript Files - -- **camelCase** for functions and variables -- **PascalCase** for classes and components -- **kebab-case** for CSS classes - -### Database - -- **snake_case** for table and column names -- **Descriptive names** that clearly indicate purpose - -## Dependencies - -### Backend Dependencies - -- **Flask**: Web framework -- **SQLite**: Database -- **dlib**: Face recognition -- **Pillow**: Image processing -- **numpy**: Numerical operations - -### Frontend Dependencies - -- **Vanilla JavaScript**: No external frameworks -- **CSS Grid/Flexbox**: Layout system -- **Fetch API**: HTTP requests -- **Intersection Observer**: Progressive loading - -## Configuration Management - -- **Environment Variables**: For sensitive data -- **JSON Config Files**: For application settings -- **Database Migrations**: For schema changes -- **Feature Flags**: For experimental features diff --git a/docs/tech.md b/docs/tech.md deleted file mode 100644 index d4933f4..0000000 --- a/docs/tech.md +++ /dev/null @@ -1,136 +0,0 @@ -# PunimTag Technical Architecture - -## Technology Stack - -### Backend - -- **Framework**: Flask (Python web framework) -- **Database**: SQLite (lightweight, file-based) -- **Face Recognition**: dlib (C++ library with Python bindings) -- **Image Processing**: Pillow (PIL fork) -- **Data Processing**: NumPy (numerical operations) - -### Frontend - -- **Language**: Vanilla JavaScript (ES6+) -- **Styling**: CSS3 with Grid/Flexbox -- **HTTP Client**: Fetch API -- **Progressive Loading**: Intersection Observer API -- **No Frameworks**: Pure JavaScript for simplicity - -### Development Tools - -- **Version Control**: Git -- **Package Management**: pip (Python), npm (optional for frontend tools) -- **Testing**: pytest (Python), Jest (JavaScript) -- **Code Quality**: flake8, black (Python), ESLint (JavaScript) - -## Core Technologies - -### Face Recognition Pipeline - -1. **Image Loading**: Pillow for image processing -2. **Face Detection**: dlib's CNN face detector -3. **Feature Extraction**: dlib's 128-dimensional face encodings -4. **Similarity Matching**: Euclidean distance calculation -5. **Storage**: Binary encoding storage in SQLite - -### Database Schema - -```sql --- Core tables -images (id, filename, path, date_taken, metadata) -faces (id, image_id, person_id, encoding, coordinates, confidence) -people (id, name, created_date) -tags (id, name) -image_tags (image_id, tag_id) - --- Supporting tables -face_encodings (id, face_id, encoding_data) -photo_metadata (image_id, exif_data, gps_data) -``` - -### API Design - -- **RESTful Endpoints**: Standard HTTP methods (GET, POST, DELETE) -- **JSON Responses**: Consistent response format -- **Error Handling**: HTTP status codes with descriptive messages -- **Pagination**: Offset-based for large datasets - -## Performance Considerations4 - -### Image Processing - -- **Thumbnail Generation**: On-demand with caching -- **Face Detection**: Optimized for speed vs accuracy -- **Batch Processing**: Efficient handling of large photo sets -- **Memory Management**: Streaming for large images - -### Database Optimization - -- **Indexing**: Strategic indexes on frequently queried columns -- **Connection Pooling**: Efficient database connections -- **Query Optimization**: Minimize N+1 query problems -- **Data Archiving**: Move old data to separate tables - -### Frontend Performance - -- **Progressive Loading**: Load data in chunks -- **Image Lazy Loading**: Load images as they become visible -- **Caching**: Browser caching for static assets -- **Debouncing**: Prevent excessive API calls - -## Security Considerations - -### Data Protection - -- **Local Storage**: No cloud dependencies -- **Input Validation**: Sanitize all user inputs -- **SQL Injection Prevention**: Parameterized queries -- **File Upload Security**: Validate file types and sizes - -### Privacy - -- **Face Data**: Stored locally, not shared -- **Metadata**: User controls what's stored -- **Access Control**: Local access only -- **Data Export**: User can export/delete their data - -## Scalability - -### Current Limitations - -- **Single User**: Designed for personal use -- **Local Storage**: Limited by disk space -- **Processing Power**: CPU-intensive face recognition -- **Memory**: Large photo collections require significant RAM - -### Future Scalability - -- **Multi-User Support**: Database schema supports multiple users -- **Cloud Integration**: Optional cloud storage and processing -- **Distributed Processing**: GPU acceleration for face recognition -- **Microservices**: Separate services for different functions - -## Development Workflow - -### Code Organization - -- **Modular Design**: Separate concerns into modules -- **Configuration Management**: Environment-based settings -- **Error Handling**: Comprehensive error catching and logging -- **Documentation**: Inline code documentation - -### Testing Strategy - -- **Unit Tests**: Test individual functions and classes -- **Integration Tests**: Test API endpoints and database operations -- **End-to-End Tests**: Test complete user workflows -- **Performance Tests**: Test with large datasets - -### Deployment - -- **Local Development**: Flask development server -- **Production**: WSGI server (Gunicorn) with reverse proxy -- **Containerization**: Docker for consistent environments -- **Monitoring**: Logging and health checks diff --git a/docs/testing-standards.md b/docs/testing-standards.md deleted file mode 100644 index 30ccfa5..0000000 --- a/docs/testing-standards.md +++ /dev/null @@ -1,531 +0,0 @@ -# PunimTag Testing Standards - -## Overview - -This document defines the standards for writing and organizing tests in PunimTag. - -## Test Organization - -### Directory Structure - -``` -tests/ -├── unit/ # Unit tests for individual functions -├── integration/ # Integration tests for API endpoints -├── e2e/ # End-to-end tests for complete workflows -├── fixtures/ # Test data and fixtures -├── utils/ # Test utilities and helpers -└── conftest.py # pytest configuration and shared fixtures -``` - -### Test File Naming - -- **Unit Tests**: `test_.py` -- **Integration Tests**: `test__integration.py` -- **E2E Tests**: `test__e2e.py` -- **Test Utilities**: `test_.py` - -## Test Categories - -### Unit Tests - -Test individual functions and classes in isolation. - -```python -# tests/unit/test_face_recognition.py -import pytest -from src.utils.face_recognition import detect_faces, encode_face - -def test_detect_faces_with_valid_image(): - """Test face detection with a valid image.""" - image_path = "tests/fixtures/valid_face.jpg" - faces = detect_faces(image_path) - - assert len(faces) > 0 - assert all(hasattr(face, 'left') for face in faces) - assert all(hasattr(face, 'top') for face in faces) - -def test_detect_faces_with_no_faces(): - """Test face detection with an image containing no faces.""" - image_path = "tests/fixtures/no_faces.jpg" - faces = detect_faces(image_path) - - assert len(faces) == 0 - -def test_encode_face_with_valid_face(): - """Test face encoding with a valid face.""" - face_image = load_test_face_image() - encoding = encode_face(face_image) - - assert len(encoding) == 128 - assert all(isinstance(x, float) for x in encoding) -``` - -### Integration Tests - -Test API endpoints and database interactions. - -```python -# tests/integration/test_photo_api.py -import pytest -from src.app import app - -@pytest.fixture -def client(): - """Create a test client.""" - app.config['TESTING'] = True - app.config['DATABASE'] = 'test.db' - - with app.test_client() as client: - yield client - -def test_get_photos_endpoint(client): - """Test the GET /photos endpoint.""" - response = client.get('/photos') - - assert response.status_code == 200 - data = response.get_json() - assert data['success'] == True - assert 'photos' in data - -def test_create_photo_endpoint(client): - """Test the POST /photos endpoint.""" - photo_data = { - 'filename': 'test.jpg', - 'path': '/test/path/test.jpg' - } - - response = client.post('/photos', json=photo_data) - - assert response.status_code == 201 - data = response.get_json() - assert data['success'] == True - assert 'photo_id' in data - -def test_get_photo_not_found(client): - """Test getting a non-existent photo.""" - response = client.get('/photos/99999') - - assert response.status_code == 404 - data = response.get_json() - assert data['success'] == False - assert 'error' in data -``` - -### End-to-End Tests - -Test complete user workflows. - -```python -# tests/e2e/test_photo_workflow.py -import pytest -from selenium import webdriver -from selenium.webdriver.common.by import By -from selenium.webdriver.support.ui import WebDriverWait -from selenium.webdriver.support import expected_conditions as EC - -@pytest.fixture -def driver(): - """Create a web driver for E2E tests.""" - driver = webdriver.Chrome() - driver.implicitly_wait(10) - yield driver - driver.quit() - -def test_upload_and_identify_photo(driver): - """Test the complete workflow of uploading and identifying a photo.""" - # Navigate to the app - driver.get("http://localhost:5000") - - # Upload a photo - file_input = driver.find_element(By.ID, "photo-upload") - file_input.send_keys("tests/fixtures/test_photo.jpg") - - # Wait for upload to complete - WebDriverWait(driver, 30).until( - EC.presence_of_element_located((By.CLASS_NAME, "photo-card")) - ) - - # Click on the photo to open details - photo_card = driver.find_element(By.CLASS_NAME, "photo-card") - photo_card.click() - - # Wait for photo details to load - WebDriverWait(driver, 10).until( - EC.presence_of_element_located((By.ID, "photoDetails")) - ) - - # Verify faces are detected - faces = driver.find_elements(By.CLASS_NAME, "face-item") - assert len(faces) > 0 - - # Identify a face - face_input = driver.find_element(By.CLASS_NAME, "face-name-input") - face_input.send_keys("Test Person") - - identify_button = driver.find_element(By.CLASS_NAME, "identify-face-btn") - identify_button.click() - - # Verify identification - WebDriverWait(driver, 10).until( - EC.text_to_be_present_in_element((By.CLASS_NAME, "face-name"), "Test Person") - ) -``` - -## Test Fixtures - -### Database Fixtures - -```python -# tests/conftest.py -import pytest -import sqlite3 -import tempfile -import os - -@pytest.fixture -def test_db(): - """Create a temporary test database.""" - db_fd, db_path = tempfile.mkstemp() - - # Create test database schema - conn = sqlite3.connect(db_path) - cursor = conn.cursor() - - cursor.execute(''' - CREATE TABLE images ( - id INTEGER PRIMARY KEY, - filename TEXT NOT NULL, - path TEXT NOT NULL, - date_taken TEXT - ) - ''') - - cursor.execute(''' - CREATE TABLE faces ( - id INTEGER PRIMARY KEY, - image_id INTEGER, - person_id INTEGER, - encoding BLOB, - left INTEGER, - top INTEGER, - right INTEGER, - bottom INTEGER - ) - ''') - - conn.commit() - conn.close() - - yield db_path - - # Cleanup - os.close(db_fd) - os.unlink(db_path) - -@pytest.fixture -def sample_photos(test_db): - """Add sample photos to the test database.""" - conn = sqlite3.connect(test_db) - cursor = conn.cursor() - - photos = [ - ('photo1.jpg', '/test/path/photo1.jpg', '2023-01-01'), - ('photo2.jpg', '/test/path/photo2.jpg', '2023-01-02'), - ('photo3.jpg', '/test/path/photo3.jpg', '2023-01-03') - ] - - cursor.executemany( - 'INSERT INTO images (filename, path, date_taken) VALUES (?, ?, ?)', - photos - ) - - conn.commit() - conn.close() - - return photos -``` - -### Mock Fixtures - -```python -# tests/conftest.py -import pytest -from unittest.mock import Mock, patch - -@pytest.fixture -def mock_face_recognition(): - """Mock face recognition functions.""" - with patch('src.utils.face_recognition.detect_faces') as mock_detect: - with patch('src.utils.face_recognition.encode_face') as mock_encode: - mock_detect.return_value = [ - Mock(left=100, top=100, right=200, bottom=200) - ] - mock_encode.return_value = [0.1] * 128 - - yield { - 'detect': mock_detect, - 'encode': mock_encode - } - -@pytest.fixture -def mock_file_system(): - """Mock file system operations.""" - with patch('os.path.exists') as mock_exists: - with patch('os.path.getsize') as mock_size: - mock_exists.return_value = True - mock_size.return_value = 1024 * 1024 # 1MB - - yield { - 'exists': mock_exists, - 'size': mock_size - } -``` - -## Test Data Management - -### Test Images - -```python -# tests/fixtures/test_images.py -import os -from PIL import Image -import numpy as np - -def create_test_image(width=100, height=100, filename="test.jpg"): - """Create a test image for testing.""" - # Create a simple test image - image = Image.new('RGB', (width, height), color='red') - - # Add a simple face-like pattern - pixels = np.array(image) - # Draw a simple face outline - pixels[30:70, 40:60] = [255, 255, 255] # White face - pixels[40:50, 45:55] = [0, 0, 0] # Black eyes - - test_image = Image.fromarray(pixels) - test_path = f"tests/fixtures/{filename}" - test_image.save(test_path) - - return test_path - -def cleanup_test_images(): - """Clean up test images.""" - fixture_dir = "tests/fixtures" - for file in os.listdir(fixture_dir): - if file.endswith(('.jpg', '.png', '.jpeg')): - os.remove(os.path.join(fixture_dir, file)) -``` - -## Performance Testing - -### Load Testing - -```python -# tests/performance/test_load.py -import pytest -import time -import concurrent.futures -from src.app import app - -def test_concurrent_photo_requests(): - """Test handling multiple concurrent photo requests.""" - client = app.test_client() - - def make_request(): - return client.get('/photos?page=1&per_page=20') - - # Make 10 concurrent requests - with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: - futures = [executor.submit(make_request) for _ in range(10)] - responses = [future.result() for future in futures] - - # All requests should succeed - for response in responses: - assert response.status_code == 200 - - # Check response times - start_time = time.time() - for _ in range(5): - client.get('/photos?page=1&per_page=20') - end_time = time.time() - - avg_time = (end_time - start_time) / 5 - assert avg_time < 1.0 # Should respond within 1 second - -def test_large_photo_collection(): - """Test performance with a large photo collection.""" - # This would require setting up a large test dataset - pass -``` - -## Test Configuration - -### pytest Configuration - -```ini -# pytest.ini -[tool:pytest] -testpaths = tests -python_files = test_*.py -python_classes = Test* -python_functions = test_* -addopts = - -v - --tb=short - --strict-markers - --disable-warnings -markers = - unit: Unit tests - integration: Integration tests - e2e: End-to-end tests - slow: Slow running tests - performance: Performance tests -``` - -### Test Environment Variables - -```python -# tests/conftest.py -import os - -@pytest.fixture(autouse=True) -def test_environment(): - """Set up test environment variables.""" - os.environ['TESTING'] = 'true' - os.environ['DATABASE_PATH'] = 'test.db' - os.environ['PHOTOS_DIR'] = 'tests/fixtures/photos' - - yield - - # Cleanup - if 'TESTING' in os.environ: - del os.environ['TESTING'] -``` - -## Code Coverage - -### Coverage Configuration - -```ini -# .coveragerc -[run] -source = src -omit = - */tests/* - */venv/* - */__pycache__/* - */migrations/* - -[report] -exclude_lines = - pragma: no cover - def __repr__ - raise AssertionError - raise NotImplementedError - if 0: - if __name__ == .__main__.: -``` - -### Coverage Testing - -```python -# tests/test_coverage.py -import pytest -import coverage - -def test_code_coverage(): - """Ensure code coverage meets minimum requirements.""" - cov = coverage.Coverage() - cov.start() - - # Run the application - from src.app import app - client = app.test_client() - client.get('/photos') - - cov.stop() - cov.save() - - # Generate coverage report - cov.report() - - # Check coverage percentage - total_coverage = cov.report() - assert total_coverage >= 80.0 # Minimum 80% coverage -``` - -## Continuous Integration - -### GitHub Actions - -```yaml -# .github/workflows/test.yml -name: Tests - -on: [push, pull_request] - -jobs: - test: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: 3.9 - - - name: Install dependencies - run: | - pip install -r requirements.txt - pip install pytest pytest-cov - - - name: Run tests - run: | - pytest tests/ --cov=src --cov-report=xml - - - name: Upload coverage - uses: codecov/codecov-action@v1 - with: - file: ./coverage.xml -``` - -## Best Practices - -### Test Naming - -- Use descriptive test names that explain what is being tested -- Follow the pattern: `test___` -- Example: `test_detect_faces_with_multiple_faces_returns_correct_count` - -### Test Independence - -- Each test should be independent and not rely on other tests -- Use fixtures to set up test data -- Clean up after each test - -### Test Data - -- Use realistic but minimal test data -- Create helper functions for generating test data -- Keep test data in fixtures directory - -### Error Testing - -- Test both success and failure scenarios -- Test edge cases and boundary conditions -- Test error handling and recovery - -### Performance - -- Keep tests fast and efficient -- Use mocking for slow operations -- Separate slow tests with `@pytest.mark.slow` - -### Documentation - -- Document complex test scenarios -- Explain the purpose of each test -- Keep test code readable and maintainable diff --git a/main.py b/main.py deleted file mode 100644 index 9872b7e..0000000 --- a/main.py +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env python3 -""" -PunimTag - Intelligent Photo Management System - -Main entry point for the PunimTag application. -""" - -import sys -import os - -# Add src directory to Python path -sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src')) - -from backend.app import app - -if __name__ == '__main__': - print("Starting PunimTag...") - print("Access the application at: http://localhost:5000") - app.run(host='0.0.0.0', port=5000, debug=True) \ No newline at end of file diff --git a/photo_tagger.py b/photo_tagger.py new file mode 100644 index 0000000..64b2906 --- /dev/null +++ b/photo_tagger.py @@ -0,0 +1,479 @@ +#!/usr/bin/env python3 +""" +PunimTag CLI - Minimal Photo Face Tagger +Simple command-line tool for face recognition and photo tagging +""" + +import os +import sqlite3 +import argparse +import face_recognition +from pathlib import Path +from PIL import Image +import pickle +import numpy as np +from typing import List, Dict, Tuple, Optional +import sys + + +class PhotoTagger: + def __init__(self, db_path: str = "photos.db"): + """Initialize the photo tagger with database""" + self.db_path = db_path + self.init_database() + + def init_database(self): + """Create database tables if they don't exist""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + # Photos table + cursor.execute(''' + CREATE TABLE IF NOT EXISTS photos ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + path TEXT UNIQUE NOT NULL, + filename TEXT NOT NULL, + date_added DATETIME DEFAULT CURRENT_TIMESTAMP, + processed BOOLEAN DEFAULT 0 + ) + ''') + + # People table + cursor.execute(''' + CREATE TABLE IF NOT EXISTS people ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT UNIQUE NOT NULL, + created_date DATETIME DEFAULT CURRENT_TIMESTAMP + ) + ''') + + # Faces table + cursor.execute(''' + CREATE TABLE IF NOT EXISTS faces ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + photo_id INTEGER NOT NULL, + person_id INTEGER, + encoding BLOB NOT NULL, + location TEXT NOT NULL, + confidence REAL DEFAULT 0.0, + FOREIGN KEY (photo_id) REFERENCES photos (id), + FOREIGN KEY (person_id) REFERENCES people (id) + ) + ''') + + # Tags table + cursor.execute(''' + CREATE TABLE IF NOT EXISTS tags ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + photo_id INTEGER NOT NULL, + tag_name TEXT NOT NULL, + created_date DATETIME DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (photo_id) REFERENCES photos (id) + ) + ''') + + conn.commit() + conn.close() + print(f"✅ Database initialized: {self.db_path}") + + def scan_folder(self, folder_path: str, recursive: bool = True) -> int: + """Scan folder for photos and add to database""" + if not os.path.exists(folder_path): + print(f"❌ Folder not found: {folder_path}") + return 0 + + photo_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff'} + found_photos = [] + + if recursive: + for root, dirs, files in os.walk(folder_path): + for file in files: + if Path(file).suffix.lower() in photo_extensions: + photo_path = os.path.join(root, file) + found_photos.append((photo_path, file)) + else: + for file in os.listdir(folder_path): + if Path(file).suffix.lower() in photo_extensions: + photo_path = os.path.join(folder_path, file) + found_photos.append((photo_path, file)) + + if not found_photos: + print(f"📁 No photos found in {folder_path}") + return 0 + + # Add to database + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + added_count = 0 + + for photo_path, filename in found_photos: + try: + cursor.execute( + 'INSERT OR IGNORE INTO photos (path, filename) VALUES (?, ?)', + (photo_path, filename) + ) + if cursor.rowcount > 0: + added_count += 1 + except Exception as e: + print(f"⚠️ Error adding {filename}: {e}") + + conn.commit() + conn.close() + + print(f"📁 Found {len(found_photos)} photos, added {added_count} new photos") + return added_count + + def process_faces(self, limit: int = 50, model: str = "hog") -> int: + """Process unprocessed photos for faces""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + cursor.execute( + 'SELECT id, path, filename FROM photos WHERE processed = 0 LIMIT ?', + (limit,) + ) + unprocessed = cursor.fetchall() + + if not unprocessed: + print("✅ No unprocessed photos found") + conn.close() + return 0 + + print(f"🔍 Processing {len(unprocessed)} photos for faces...") + processed_count = 0 + + for photo_id, photo_path, filename in unprocessed: + if not os.path.exists(photo_path): + print(f"❌ File not found: {filename}") + cursor.execute('UPDATE photos SET processed = 1 WHERE id = ?', (photo_id,)) + continue + + try: + # Load image and find faces + print(f"📸 Processing: {filename}") + image = face_recognition.load_image_file(photo_path) + face_locations = face_recognition.face_locations(image, model=model) + + if face_locations: + face_encodings = face_recognition.face_encodings(image, face_locations) + print(f" 👤 Found {len(face_locations)} faces") + + # Save faces to database + for encoding, location in zip(face_encodings, face_locations): + cursor.execute( + 'INSERT INTO faces (photo_id, encoding, location) VALUES (?, ?, ?)', + (photo_id, encoding.tobytes(), str(location)) + ) + else: + print(f" 👤 No faces found") + + # Mark as processed + cursor.execute('UPDATE photos SET processed = 1 WHERE id = ?', (photo_id,)) + processed_count += 1 + + except Exception as e: + print(f"❌ Error processing {filename}: {e}") + cursor.execute('UPDATE photos SET processed = 1 WHERE id = ?', (photo_id,)) + + conn.commit() + conn.close() + + print(f"✅ Processed {processed_count} photos") + return processed_count + + def identify_faces(self, batch_size: int = 20) -> int: + """Interactive face identification""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + cursor.execute(''' + SELECT f.id, f.photo_id, p.path, p.filename, f.location + FROM faces f + JOIN photos p ON f.photo_id = p.id + WHERE f.person_id IS NULL + LIMIT ? + ''', (batch_size,)) + + unidentified = cursor.fetchall() + + if not unidentified: + print("🎉 All faces have been identified!") + conn.close() + return 0 + + print(f"\n👤 Found {len(unidentified)} unidentified faces") + print("Commands: [name] = identify, 's' = skip, 'q' = quit, 'list' = show people\n") + + identified_count = 0 + + for i, (face_id, photo_id, photo_path, filename, location) in enumerate(unidentified): + print(f"\n--- Face {i+1}/{len(unidentified)} ---") + print(f"📁 Photo: {filename}") + print(f"📍 Face location: {location}") + + while True: + command = input("👤 Person name (or command): ").strip() + + if command.lower() == 'q': + print("Quitting...") + conn.close() + return identified_count + + elif command.lower() == 's': + print("⏭️ Skipped") + break + + elif command.lower() == 'list': + self._show_people_list(cursor) + continue + + elif command: + try: + # Add person if doesn't exist + cursor.execute('INSERT OR IGNORE INTO people (name) VALUES (?)', (command,)) + cursor.execute('SELECT id FROM people WHERE name = ?', (command,)) + person_id = cursor.fetchone()[0] + + # Assign face to person + cursor.execute( + 'UPDATE faces SET person_id = ? WHERE id = ?', + (person_id, face_id) + ) + + print(f"✅ Identified as: {command}") + identified_count += 1 + break + + except Exception as e: + print(f"❌ Error: {e}") + else: + print("Please enter a name, 's' to skip, 'q' to quit, or 'list' to see people") + + conn.commit() + conn.close() + + print(f"\n✅ Identified {identified_count} faces") + return identified_count + + def _show_people_list(self, cursor): + """Show list of known people""" + cursor.execute('SELECT name FROM people ORDER BY name') + people = cursor.fetchall() + if people: + print("👥 Known people:", ", ".join([p[0] for p in people])) + else: + print("👥 No people identified yet") + + def add_tags(self, photo_pattern: str = None, batch_size: int = 10) -> int: + """Add custom tags to photos""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + if photo_pattern: + cursor.execute( + 'SELECT id, filename FROM photos WHERE filename LIKE ? LIMIT ?', + (f'%{photo_pattern}%', batch_size) + ) + else: + cursor.execute('SELECT id, filename FROM photos LIMIT ?', (batch_size,)) + + photos = cursor.fetchall() + + if not photos: + print("No photos found") + conn.close() + return 0 + + print(f"🏷️ Tagging {len(photos)} photos (enter comma-separated tags)") + tagged_count = 0 + + for photo_id, filename in photos: + print(f"\n📸 {filename}") + tags_input = input("🏷️ Tags: ").strip() + + if tags_input.lower() == 'q': + break + + if tags_input: + tags = [tag.strip() for tag in tags_input.split(',') if tag.strip()] + for tag in tags: + cursor.execute( + 'INSERT INTO tags (photo_id, tag_name) VALUES (?, ?)', + (photo_id, tag) + ) + print(f" ✅ Added {len(tags)} tags") + tagged_count += 1 + + conn.commit() + conn.close() + + print(f"✅ Tagged {tagged_count} photos") + return tagged_count + + def stats(self) -> Dict: + """Show database statistics""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + stats = {} + + # Basic counts + cursor.execute('SELECT COUNT(*) FROM photos') + stats['total_photos'] = cursor.fetchone()[0] + + cursor.execute('SELECT COUNT(*) FROM photos WHERE processed = 1') + stats['processed_photos'] = cursor.fetchone()[0] + + cursor.execute('SELECT COUNT(*) FROM faces') + stats['total_faces'] = cursor.fetchone()[0] + + cursor.execute('SELECT COUNT(*) FROM faces WHERE person_id IS NOT NULL') + stats['identified_faces'] = cursor.fetchone()[0] + + cursor.execute('SELECT COUNT(*) FROM people') + stats['total_people'] = cursor.fetchone()[0] + + cursor.execute('SELECT COUNT(DISTINCT tag_name) FROM tags') + stats['unique_tags'] = cursor.fetchone()[0] + + # Top people + cursor.execute(''' + SELECT p.name, COUNT(f.id) as face_count + FROM people p + LEFT JOIN faces f ON p.id = f.person_id + GROUP BY p.id + ORDER BY face_count DESC + LIMIT 5 + ''') + stats['top_people'] = cursor.fetchall() + + conn.close() + + # Display stats + print(f"\n📊 Database Statistics") + print("=" * 40) + print(f"Photos: {stats['processed_photos']}/{stats['total_photos']} processed") + print(f"Faces: {stats['identified_faces']}/{stats['total_faces']} identified") + print(f"People: {stats['total_people']} unique") + print(f"Tags: {stats['unique_tags']} unique") + + if stats['top_people']: + print(f"\n👥 Top People:") + for name, count in stats['top_people']: + print(f" {name}: {count} faces") + + unidentified = stats['total_faces'] - stats['identified_faces'] + if unidentified > 0: + print(f"\n⚠️ {unidentified} faces still need identification") + + return stats + + def search_faces(self, person_name: str) -> List[str]: + """Search for photos containing a specific person""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + cursor.execute(''' + SELECT DISTINCT p.filename, p.path + FROM photos p + JOIN faces f ON p.id = f.photo_id + JOIN people pe ON f.person_id = pe.id + WHERE pe.name LIKE ? + ''', (f'%{person_name}%',)) + + results = cursor.fetchall() + conn.close() + + if results: + print(f"\n🔍 Found {len(results)} photos with '{person_name}':") + for filename, path in results: + print(f" 📸 {filename}") + else: + print(f"🔍 No photos found with '{person_name}'") + + return [path for filename, path in results] + + +def main(): + """Main CLI interface""" + parser = argparse.ArgumentParser( + description="PunimTag CLI - Simple photo face tagger", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + photo_tagger.py scan /path/to/photos # Scan folder for photos + photo_tagger.py process --limit 20 # Process 20 photos for faces + photo_tagger.py identify --batch 10 # Identify 10 faces interactively + photo_tagger.py tag --pattern "vacation" # Tag photos matching pattern + photo_tagger.py search "John" # Find photos with John + photo_tagger.py stats # Show statistics + """ + ) + + parser.add_argument('command', + choices=['scan', 'process', 'identify', 'tag', 'search', 'stats'], + help='Command to execute') + + parser.add_argument('target', nargs='?', + help='Target folder (scan), person name (search), or pattern (tag)') + + parser.add_argument('--db', default='photos.db', + help='Database file path (default: photos.db)') + + parser.add_argument('--limit', type=int, default=50, + help='Batch size limit for processing (default: 50)') + + parser.add_argument('--batch', type=int, default=20, + help='Batch size for identification (default: 20)') + + parser.add_argument('--pattern', + help='Pattern for filtering photos when tagging') + + parser.add_argument('--model', choices=['hog', 'cnn'], default='hog', + help='Face detection model: hog (faster) or cnn (more accurate)') + + parser.add_argument('--recursive', action='store_true', + help='Scan folders recursively') + + args = parser.parse_args() + + # Initialize tagger + tagger = PhotoTagger(args.db) + + try: + if args.command == 'scan': + if not args.target: + print("❌ Please specify a folder to scan") + return 1 + tagger.scan_folder(args.target, args.recursive) + + elif args.command == 'process': + tagger.process_faces(args.limit, args.model) + + elif args.command == 'identify': + tagger.identify_faces(args.batch) + + elif args.command == 'tag': + tagger.add_tags(args.pattern or args.target, args.batch) + + elif args.command == 'search': + if not args.target: + print("❌ Please specify a person name to search for") + return 1 + tagger.search_faces(args.target) + + elif args.command == 'stats': + tagger.stats() + + return 0 + + except KeyboardInterrupt: + print("\n\n⚠️ Interrupted by user") + return 1 + except Exception as e: + print(f"❌ Error: {e}") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/photos.db b/photos.db new file mode 100644 index 0000000..95d06d8 Binary files /dev/null and b/photos.db differ diff --git a/photos.db-journal b/photos.db-journal new file mode 100644 index 0000000..d9bf376 Binary files /dev/null and b/photos.db-journal differ diff --git a/requirements.txt b/requirements.txt index b1d34dd..12095fd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ -click==8.2.1 -dlib==20.0.0 +# Minimal dependencies for CLI photo tagger face-recognition==1.3.0 face-recognition-models==0.3.0 -numpy==2.2.6 -pillow==11.3.0 -opencv-python==4.10.0.84 -scikit-learn==1.7.0 +dlib>=20.0.0 +numpy>=1.21.0 +pillow>=8.0.0 +click>=8.0.0 +setuptools>=40.0.0 \ No newline at end of file diff --git a/run.sh b/run.sh new file mode 100644 index 0000000..36dace6 --- /dev/null +++ b/run.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# PunimTag Runner Script +# Automatically activates virtual environment and runs commands + +# Check if virtual environment exists +if [ ! -d "venv" ]; then + echo "❌ Virtual environment not found!" + echo "Run: python3 -m venv venv && source venv/bin/activate && python3 setup.py" + exit 1 +fi + +# Activate virtual environment +source venv/bin/activate + +# Check if no arguments provided +if [ $# -eq 0 ]; then + echo "🎯 PunimTag CLI" + echo "Usage: ./run.sh [arguments]" + echo "" + echo "Examples:" + echo " ./run.sh scan /path/to/photos --recursive" + echo " ./run.sh process --limit 20" + echo " ./run.sh identify --batch 10" + echo " ./run.sh search 'John'" + echo " ./run.sh stats" + echo "" + echo "Or run directly:" + echo " source venv/bin/activate" + echo " python3 photo_tagger.py --help" + exit 0 +fi + +# Run the command +python3 photo_tagger.py "$@" diff --git a/scripts/cleanup_tests.py b/scripts/cleanup_tests.py deleted file mode 100644 index 1d37e9c..0000000 --- a/scripts/cleanup_tests.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python3 -""" -Cleanup script for PunimTag project organization. - -This script removes redundant test files and organizes the project structure. -""" - -import os -import shutil -from pathlib import Path - -def cleanup_old_tests(): - """Remove old test files that are now consolidated.""" - old_test_files = [ - "test_syntax_fix.html", - "test_js_validator.html", - "test_direct_error_check.html", - "test_console_tracker.html", - "test_syntax_check.html", - "test_progressive.html", - "test_simple_main.html", - "test_diagnostic.html", - "test_minimal.html", - "debug_ui.html", - "test_backend.py", - "test_punimtag.py", - "test_web_api.py" - ] - - removed_count = 0 - for file_name in old_test_files: - file_path = Path(file_name) - if file_path.exists(): - try: - file_path.unlink() - print(f"✅ Removed: {file_name}") - removed_count += 1 - except Exception as e: - print(f"❌ Failed to remove {file_name}: {e}") - - print(f"\n📊 Cleanup complete: {removed_count} files removed") - -def verify_structure(): - """Verify that the new structure is properly organized.""" - required_structure = { - "src/backend/app.py": "Main Flask application", - "src/backend/db_manager.py": "Database manager", - "src/backend/visual_identifier.py": "Face recognition", - "src/utils/tag_manager.py": "Tag management", - "config/settings.py": "Configuration settings", - "data/punimtag_simple.db": "Main database", - "tests/test_main.py": "Main test suite", - "docs/product.md": "Product vision", - "docs/structure.md": "Project structure", - "docs/tech.md": "Technical architecture", - "docs/api-standards.md": "API standards", - "docs/testing-standards.md": "Testing standards", - "docs/code-conventions.md": "Code conventions" - } - - print("\n🔍 Verifying project structure:") - print("=" * 50) - - all_good = True - for file_path, description in required_structure.items(): - if Path(file_path).exists(): - print(f"✅ {file_path} - {description}") - else: - print(f"❌ {file_path} - {description} (MISSING)") - all_good = False - - return all_good - -def create_gitignore(): - """Create or update .gitignore file.""" - gitignore_content = """# Python -__pycache__/ -*.py[cod] -*$py.class -*.so -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -*.egg-info/ -.installed.cfg -*.egg - -# Virtual environments -venv/ -env/ -ENV/ - -# Database files (keep structure, ignore content) -data/*.db -data/*.sqlite - -# Temporary files -*.tmp -*.temp -temp_face_crop_*.jpg - -# IDE -.vscode/ -.idea/ -*.swp -*.swo - -# OS -.DS_Store -Thumbs.db - -# Logs -*.log - -# Environment variables -.env -""" - - with open(".gitignore", "w") as f: - f.write(gitignore_content) - - print("✅ Updated .gitignore file") - -def main(): - """Main cleanup function.""" - print("🧹 PunimTag Project Cleanup") - print("=" * 50) - - # Clean up old test files - cleanup_old_tests() - - # Verify structure - structure_ok = verify_structure() - - # Update gitignore - create_gitignore() - - print("\n" + "=" * 50) - if structure_ok: - print("🎉 Project cleanup completed successfully!") - print("\n📋 Next steps:") - print("1. Review the steering documents in docs/") - print("2. Run tests: python tests/test_main.py") - print("3. Start the app: python main.py") - else: - print("⚠️ Project cleanup completed with issues.") - print("Please check the missing files above.") - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/scripts/gui_face_clusters.py b/scripts/gui_face_clusters.py deleted file mode 100644 index 3e22e11..0000000 --- a/scripts/gui_face_clusters.py +++ /dev/null @@ -1,436 +0,0 @@ -#!/usr/bin/env python3 -""" -Face Clustering GUI for PunimTag -Visual interface for viewing and identifying clustered unknown faces -""" - -import tkinter as tk -from tkinter import ttk, messagebox, simpledialog -from PIL import Image, ImageTk -import os -import sqlite3 -from typing import List, Dict -import pickle -import numpy as np - - -class FaceClusterGUI: - def __init__(self, db_path: str = 'punimtag_simple.db'): - self.db_path = db_path - self.root = tk.Tk() - self.root.title("PunimTag - Face Clustering") - self.root.geometry("1200x800") - - # Current cluster data - self.clusters = [] - self.current_cluster_index = 0 - - # Image cache - self.image_cache = {} - - self.setup_ui() - self.load_clusters() - - def setup_ui(self): - """Setup the user interface""" - # Main frame - main_frame = ttk.Frame(self.root, padding="10") - main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) - - # Configure grid weights - self.root.columnconfigure(0, weight=1) - self.root.rowconfigure(0, weight=1) - main_frame.columnconfigure(1, weight=1) - main_frame.rowconfigure(1, weight=1) - - # Title - title_label = ttk.Label(main_frame, text="Unknown Face Clusters", - font=('Arial', 16, 'bold')) - title_label.grid(row=0, column=0, columnspan=3, pady=(0, 10)) - - # Left panel - cluster list - left_frame = ttk.LabelFrame(main_frame, text="Clusters", padding="5") - left_frame.grid(row=1, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), padx=(0, 10)) - - # Cluster listbox - self.cluster_listbox = tk.Listbox(left_frame, width=30) - self.cluster_listbox.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) - self.cluster_listbox.bind('<>', self.on_cluster_select) - - scrollbar = ttk.Scrollbar(left_frame, orient="vertical", command=self.cluster_listbox.yview) - scrollbar.grid(row=0, column=1, sticky=(tk.N, tk.S)) - self.cluster_listbox.configure(yscrollcommand=scrollbar.set) - - left_frame.columnconfigure(0, weight=1) - left_frame.rowconfigure(0, weight=1) - - # Center panel - face display - center_frame = ttk.LabelFrame(main_frame, text="Faces in Cluster", padding="5") - center_frame.grid(row=1, column=1, sticky=(tk.W, tk.E, tk.N, tk.S)) - - # Canvas for face thumbnails - self.canvas = tk.Canvas(center_frame, bg='white') - self.canvas.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) - - # Scrollbars for canvas - v_scrollbar = ttk.Scrollbar(center_frame, orient="vertical", command=self.canvas.yview) - v_scrollbar.grid(row=0, column=1, sticky=(tk.N, tk.S)) - h_scrollbar = ttk.Scrollbar(center_frame, orient="horizontal", command=self.canvas.xview) - h_scrollbar.grid(row=1, column=0, sticky=(tk.W, tk.E)) - - self.canvas.configure(yscrollcommand=v_scrollbar.set, xscrollcommand=h_scrollbar.set) - - center_frame.columnconfigure(0, weight=1) - center_frame.rowconfigure(0, weight=1) - - # Right panel - actions - right_frame = ttk.LabelFrame(main_frame, text="Actions", padding="5") - right_frame.grid(row=1, column=2, sticky=(tk.W, tk.E, tk.N, tk.S), padx=(10, 0)) - - # Cluster info - self.info_label = ttk.Label(right_frame, text="Select a cluster", wraplength=200) - self.info_label.grid(row=0, column=0, pady=(0, 10), sticky=(tk.W, tk.E)) - - # Identify button - self.identify_button = ttk.Button(right_frame, text="Identify as Person", - command=self.identify_cluster, state='disabled') - self.identify_button.grid(row=1, column=0, pady=5, sticky=(tk.W, tk.E)) - - # Skip button - self.skip_button = ttk.Button(right_frame, text="Skip Cluster", - command=self.skip_cluster, state='disabled') - self.skip_button.grid(row=2, column=0, pady=5, sticky=(tk.W, tk.E)) - - # Refresh button - self.refresh_button = ttk.Button(right_frame, text="Refresh Clusters", - command=self.refresh_clusters) - self.refresh_button.grid(row=3, column=0, pady=5, sticky=(tk.W, tk.E)) - - # Statistics - self.stats_label = ttk.Label(right_frame, text="", wraplength=200) - self.stats_label.grid(row=4, column=0, pady=(20, 0), sticky=(tk.W, tk.E)) - - right_frame.columnconfigure(0, weight=1) - - # Status bar - self.status_bar = ttk.Label(main_frame, text="Ready", relief=tk.SUNKEN) - self.status_bar.grid(row=2, column=0, columnspan=3, sticky=(tk.W, tk.E), pady=(10, 0)) - - def load_clusters(self): - """Load face clusters from database""" - try: - self.status_bar.config(text="Loading clusters...") - self.root.update() - - # Get clusters using simple clustering (without sklearn) - clusters = self.get_simple_clusters() - - self.clusters = clusters - self.populate_cluster_list() - self.update_statistics() - - self.status_bar.config(text=f"Loaded {len(clusters)} clusters") - - except Exception as e: - messagebox.showerror("Error", f"Failed to load clusters: {e}") - self.status_bar.config(text="Error loading clusters") - - def get_simple_clusters(self) -> List[Dict]: - """Simple clustering without sklearn - group by face encoding similarity""" - if not os.path.exists(self.db_path): - return [] - - conn = sqlite3.connect(self.db_path) - c = conn.cursor() - - try: - # Get unidentified faces - c.execute('''SELECT f.id, f.image_id, i.path, f.top, f.right, f.bottom, f.left, f.encoding - FROM faces f - JOIN images i ON f.image_id = i.id - WHERE f.person_id IS NULL''') - - faces = c.fetchall() - - if len(faces) < 2: - return [] - - # Simple clustering by face encoding similarity - clusters = [] - used_faces = set() - - for i, face1 in enumerate(faces): - if face1[0] in used_faces: - continue - - cluster_faces = [face1] - used_faces.add(face1[0]) - - encoding1 = pickle.loads(face1[7]) - - # Find similar faces - for j, face2 in enumerate(faces[i+1:], i+1): - if face2[0] in used_faces: - continue - - encoding2 = pickle.loads(face2[7]) - - # Calculate similarity (simple distance) - try: - import face_recognition - distance = face_recognition.face_distance([encoding1], encoding2)[0] - - if distance < 0.5: # Similar faces - cluster_faces.append(face2) - used_faces.add(face2[0]) - except: - # Fallback to numpy distance if face_recognition not available - distance = np.linalg.norm(encoding1 - encoding2) - if distance < 0.8: - cluster_faces.append(face2) - used_faces.add(face2[0]) - - # Only create cluster if it has multiple faces - if len(cluster_faces) >= 2: - cluster_data = { - 'cluster_id': len(clusters), - 'face_count': len(cluster_faces), - 'faces': [] - } - - for face in cluster_faces: - cluster_data['faces'].append({ - 'face_id': face[0], - 'image_id': face[1], - 'image_path': face[2], - 'location': (face[3], face[4], face[5], face[6]) - }) - - clusters.append(cluster_data) - - # Sort by face count (largest clusters first) - clusters.sort(key=lambda x: x['face_count'], reverse=True) - - return clusters - - except Exception as e: - print(f"Error in simple clustering: {e}") - return [] - finally: - conn.close() - - def populate_cluster_list(self): - """Populate the cluster list""" - self.cluster_listbox.delete(0, tk.END) - - for i, cluster in enumerate(self.clusters): - label = f"Cluster {i+1} ({cluster['face_count']} faces)" - self.cluster_listbox.insert(tk.END, label) - - def on_cluster_select(self, event): - """Handle cluster selection""" - selection = self.cluster_listbox.curselection() - if not selection: - return - - self.current_cluster_index = selection[0] - self.display_cluster_faces() - self.identify_button.config(state='normal') - self.skip_button.config(state='normal') - - def display_cluster_faces(self): - """Display faces in the selected cluster""" - if not self.clusters or self.current_cluster_index >= len(self.clusters): - return - - cluster = self.clusters[self.current_cluster_index] - - # Update info - info_text = f"Cluster {self.current_cluster_index + 1}\n" - info_text += f"{cluster['face_count']} faces\n" - info_text += f"Click 'Identify as Person' to name these faces" - self.info_label.config(text=info_text) - - # Clear canvas - self.canvas.delete("all") - - # Display face thumbnails - x, y = 10, 10 - max_width = 0 - row_height = 0 - - for i, face in enumerate(cluster['faces']): - try: - thumbnail = self.get_face_thumbnail(face) - if thumbnail: - # Create image on canvas - image_id = self.canvas.create_image(x, y, anchor=tk.NW, image=thumbnail) - - # Add image path as text below - filename = os.path.basename(face['image_path']) - self.canvas.create_text(x + 50, y + 110, text=filename, - width=100, font=('Arial', 8)) - - x += 120 - max_width = max(max_width, x) - row_height = max(row_height, 130) - - # New row after 4 images - if (i + 1) % 4 == 0: - x = 10 - y += row_height - row_height = 0 - - except Exception as e: - print(f"Error displaying face thumbnail: {e}") - - # Update canvas scroll region - self.canvas.configure(scrollregion=self.canvas.bbox("all")) - - def get_face_thumbnail(self, face: Dict) -> ImageTk.PhotoImage: - """Get thumbnail image of a face""" - cache_key = f"{face['face_id']}" - - if cache_key in self.image_cache: - return self.image_cache[cache_key] - - try: - # Load image - image_path = face['image_path'] - if not os.path.exists(image_path): - return None - - img = Image.open(image_path) - - # Crop face region - top, right, bottom, left = face['location'] - # Add some padding - padding = 20 - left = max(0, left - padding) - top = max(0, top - padding) - right = min(img.width, right + padding) - bottom = min(img.height, bottom + padding) - - face_img = img.crop((left, top, right, bottom)) - - # Resize to thumbnail - face_img.thumbnail((100, 100), Image.Resampling.LANCZOS) - - # Convert to PhotoImage - photo = ImageTk.PhotoImage(face_img) - - # Cache it - self.image_cache[cache_key] = photo - - return photo - - except Exception as e: - print(f"Error creating thumbnail: {e}") - return None - - def identify_cluster(self): - """Identify all faces in cluster as a person""" - if not self.clusters or self.current_cluster_index >= len(self.clusters): - return - - cluster = self.clusters[self.current_cluster_index] - - # Get person name - name = simpledialog.askstring("Identify Person", - f"Enter name for {cluster['face_count']} faces:") - - if not name or not name.strip(): - return - - name = name.strip() - - try: - # Add person to database and assign faces - conn = sqlite3.connect(self.db_path) - c = conn.cursor() - - # Add person - c.execute('INSERT OR IGNORE INTO people (name) VALUES (?)', (name,)) - c.execute('SELECT id FROM people WHERE name = ?', (name,)) - person_id = c.fetchone()[0] - - # Assign all faces in cluster - for face in cluster['faces']: - c.execute('''UPDATE faces - SET person_id = ?, is_confirmed = 1 - WHERE id = ?''', - (person_id, face['face_id'])) - - conn.commit() - conn.close() - - messagebox.showinfo("Success", f"Identified {cluster['face_count']} faces as {name}") - - # Refresh clusters - self.refresh_clusters() - - except Exception as e: - messagebox.showerror("Error", f"Failed to identify cluster: {e}") - - def skip_cluster(self): - """Skip current cluster""" - if self.current_cluster_index < len(self.clusters) - 1: - self.cluster_listbox.selection_set(self.current_cluster_index + 1) - self.on_cluster_select(None) - else: - messagebox.showinfo("Info", "This is the last cluster") - - def refresh_clusters(self): - """Reload clusters from database""" - # Clear cache - self.image_cache.clear() - - # Reload - self.load_clusters() - - # Reset selection - self.identify_button.config(state='disabled') - self.skip_button.config(state='disabled') - self.canvas.delete("all") - self.info_label.config(text="Select a cluster") - - def update_statistics(self): - """Update statistics display""" - if not os.path.exists(self.db_path): - return - - try: - conn = sqlite3.connect(self.db_path) - c = conn.cursor() - - c.execute("SELECT COUNT(*) FROM faces WHERE person_id IS NULL") - unidentified = c.fetchone()[0] - - c.execute("SELECT COUNT(*) FROM faces WHERE person_id IS NOT NULL") - identified = c.fetchone()[0] - - conn.close() - - stats_text = f"Statistics:\n" - stats_text += f"Unidentified: {unidentified}\n" - stats_text += f"Identified: {identified}\n" - stats_text += f"Clusters: {len(self.clusters)}" - - self.stats_label.config(text=stats_text) - - except Exception as e: - print(f"Error updating statistics: {e}") - - def run(self): - """Start the GUI""" - self.root.mainloop() - - -def main(): - """Main entry point""" - app = FaceClusterGUI() - app.run() - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/scripts/interactive_identifier.py b/scripts/interactive_identifier.py deleted file mode 100644 index 6c0e4a0..0000000 --- a/scripts/interactive_identifier.py +++ /dev/null @@ -1,193 +0,0 @@ -#!/usr/bin/env python3 -""" -Interactive Face Identifier for PunimTag -Allows users to identify unknown faces in the database -""" - -import os -import cv2 -import numpy as np -from punimtag import PunimTag -from typing import Optional -import sys - - -class InteractiveFaceIdentifier: - def __init__(self, db_path: str = 'punimtag.db'): - self.tagger = PunimTag(db_path=db_path) - self.window_name = 'Face Identifier' - - def display_face(self, image_path: str, location: tuple) -> np.ndarray: - """Load and display image with face highlighted""" - img = cv2.imread(image_path) - if img is None: - print(f"Error: Could not load image {image_path}") - return None - - # Get face coordinates - top, right, bottom, left = location - - # Draw rectangle around face - cv2.rectangle(img, (left, top), (right, bottom), (0, 255, 0), 3) - - # Calculate display size (max 800x600) - height, width = img.shape[:2] - max_height, max_width = 600, 800 - - if height > max_height or width > max_width: - scale = min(max_height/height, max_width/width) - new_width = int(width * scale) - new_height = int(height * scale) - img = cv2.resize(img, (new_width, new_height)) - - return img - - def get_user_input(self, face_info: dict) -> Optional[str]: - """Get user input for face identification""" - print("\n" + "="*50) - print(f"Image: {face_info['image_path']}") - print(f"Face ID: {face_info['face_id']}") - print("\nOptions:") - print("1. Enter person's name") - print("2. Skip this face (press Enter)") - print("3. Quit (press 'q')") - print("="*50) - - user_input = input("\nEnter person's name (or press Enter to skip): ").strip() - - if user_input.lower() == 'q': - return 'QUIT' - elif user_input == '': - return None - else: - return user_input - - def run(self): - """Run the interactive identification process""" - print("PunimTag Interactive Face Identifier") - print("=" * 50) - - # Get unidentified faces - unidentified = self.tagger.get_unidentified_faces() - - if not unidentified: - print("No unidentified faces found!") - return - - print(f"Found {len(unidentified)} unidentified faces") - - # Create window - cv2.namedWindow(self.window_name, cv2.WINDOW_NORMAL) - - identified_count = 0 - - for i, face_info in enumerate(unidentified): - print(f"\nProcessing face {i+1} of {len(unidentified)}") - - # Display the face - img = self.display_face(face_info['image_path'], face_info['location']) - - if img is None: - continue - - cv2.imshow(self.window_name, img) - cv2.waitKey(1) # Allow window to update - - # Get user input - name = self.get_user_input(face_info) - - if name == 'QUIT': - print("\nQuitting...") - break - elif name: - # Add person and assign face - person_id = self.tagger.add_person(name) - self.tagger.assign_face_to_person(face_info['face_id'], person_id, is_confirmed=True) - identified_count += 1 - print(f"✓ Identified as: {name}") - else: - print("⊘ Skipped") - - cv2.destroyAllWindows() - - print(f"\n{'='*50}") - print(f"Identification complete!") - print(f"Identified {identified_count} faces") - print(f"Skipped {len(unidentified) - identified_count} faces") - - self.tagger.close() - - -class CLIFaceIdentifier: - """Command-line only face identifier (no OpenCV required)""" - - def __init__(self, db_path: str = 'punimtag.db'): - self.tagger = PunimTag(db_path=db_path) - - def run(self): - """Run CLI-based identification""" - print("PunimTag CLI Face Identifier") - print("=" * 50) - - # Get unidentified faces - unidentified = self.tagger.get_unidentified_faces() - - if not unidentified: - print("No unidentified faces found!") - return - - print(f"Found {len(unidentified)} unidentified faces\n") - - identified_count = 0 - - for i, face_info in enumerate(unidentified): - print(f"\n{'='*50}") - print(f"Face {i+1} of {len(unidentified)}") - print(f"Image: {face_info['image_path']}") - print(f"Location in image: top={face_info['location'][0]}, right={face_info['location'][1]}, " - f"bottom={face_info['location'][2]}, left={face_info['location'][3]}") - print(f"Face ID: {face_info['face_id']}") - - name = input("\nEnter person's name (or press Enter to skip, 'q' to quit): ").strip() - - if name.lower() == 'q': - print("\nQuitting...") - break - elif name: - # Add person and assign face - person_id = self.tagger.add_person(name) - self.tagger.assign_face_to_person(face_info['face_id'], person_id, is_confirmed=True) - identified_count += 1 - print(f"✓ Identified as: {name}") - else: - print("⊘ Skipped") - - print(f"\n{'='*50}") - print(f"Identification complete!") - print(f"Identified {identified_count} faces") - print(f"Skipped {len(unidentified) - identified_count} faces") - - self.tagger.close() - - -def main(): - """Main entry point""" - # Check if OpenCV is available - try: - import cv2 - print("OpenCV available - using visual identifier") - identifier = InteractiveFaceIdentifier() - except ImportError: - print("OpenCV not available - using CLI identifier") - print("Install opencv-python for visual identification: pip install opencv-python") - identifier = CLIFaceIdentifier() - - try: - identifier.run() - except KeyboardInterrupt: - print("\n\nInterrupted by user") - sys.exit(0) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/scripts/simple_identifier.py b/scripts/simple_identifier.py deleted file mode 100644 index f040b0c..0000000 --- a/scripts/simple_identifier.py +++ /dev/null @@ -1,118 +0,0 @@ -#!/usr/bin/env python3 -""" -Simple Face Identifier for PunimTag -Works with the punimtag_simple.db database -""" - -import sqlite3 -import os -from PIL import Image -import pickle - -class SimpleFaceIdentifier: - def __init__(self, db_path='punimtag_simple.db'): - self.db_path = db_path - - def get_unidentified_faces(self, limit=10): - """Get a limited number of unidentified faces""" - conn = sqlite3.connect(self.db_path) - c = conn.cursor() - - c.execute('''SELECT f.id, f.image_id, i.path, i.filename, f.top, f.right, f.bottom, f.left - FROM faces f - JOIN images i ON f.image_id = i.id - WHERE f.person_id IS NULL - LIMIT ?''', (limit,)) - - faces = c.fetchall() - conn.close() - return faces - - def add_person(self, name): - """Add a new person""" - conn = sqlite3.connect(self.db_path) - c = conn.cursor() - c.execute('INSERT OR IGNORE INTO people (name) VALUES (?)', (name,)) - c.execute('SELECT id FROM people WHERE name = ?', (name,)) - person_id = c.fetchone()[0] - conn.commit() - conn.close() - return person_id - - def assign_face(self, face_id, person_id): - """Assign a face to a person""" - conn = sqlite3.connect(self.db_path) - c = conn.cursor() - c.execute('UPDATE faces SET person_id = ?, is_confirmed = 1 WHERE id = ?', - (person_id, face_id)) - conn.commit() - conn.close() - - def run_cli_identifier(self): - """Run command line identifier""" - print("\n🏷️ Simple Face Identifier") - print("=" * 50) - - faces = self.get_unidentified_faces(50) # Get first 50 faces - - if not faces: - print("No unidentified faces found!") - return - - print(f"Found {len(faces)} unidentified faces to process...") - print("For each face, enter the person's name or 's' to skip\n") - - for i, (face_id, image_id, path, filename, top, right, bottom, left) in enumerate(faces): - print(f"\nFace {i+1}/{len(faces)}") - print(f"📁 File: {filename}") - print(f"📍 Location: top={top}, right={right}, bottom={bottom}, left={left}") - - # Try to display basic info about the image - try: - if os.path.exists(path): - with Image.open(path) as img: - print(f"🖼️ Image size: {img.size}") - else: - print("⚠️ Image file not found") - except Exception as e: - print(f"⚠️ Could not read image: {e}") - - while True: - name = input(f"👤 Who is this person? (or 's' to skip): ").strip() - - if name.lower() == 's': - print("⏭️ Skipped") - break - elif name: - try: - person_id = self.add_person(name) - self.assign_face(face_id, person_id) - print(f"✅ Identified as '{name}'") - break - except Exception as e: - print(f"❌ Error: {e}") - else: - print("Please enter a name or 's' to skip") - - print(f"\n🎉 Completed processing {len(faces)} faces!") - - # Show remaining count - remaining = self.get_remaining_count() - if remaining > 0: - print(f"📊 {remaining} unidentified faces remaining") - print("Run the script again to continue identifying faces") - else: - print("🏆 All faces have been identified!") - - def get_remaining_count(self): - """Get count of remaining unidentified faces""" - conn = sqlite3.connect(self.db_path) - c = conn.cursor() - c.execute('SELECT COUNT(*) FROM faces WHERE person_id IS NULL') - count = c.fetchone()[0] - conn.close() - return count - -if __name__ == "__main__": - identifier = SimpleFaceIdentifier() - identifier.run_cli_identifier() \ No newline at end of file diff --git a/scripts/start_gui.py b/scripts/start_gui.py deleted file mode 100644 index bff4358..0000000 --- a/scripts/start_gui.py +++ /dev/null @@ -1,224 +0,0 @@ -#!/usr/bin/env python3 -""" -PunimTag GUI Starter -Simple script to demonstrate the system and guide next steps -""" - -import os -import subprocess -import sys - - -def check_requirements(): - """Check if all requirements are met""" - print("🔍 Checking requirements...") - - # Check if photos directory exists - if not os.path.exists('photos'): - print("❌ Photos directory not found") - print(" Creating photos/ directory...") - os.makedirs('photos', exist_ok=True) - print(" ✅ Created photos/ directory") - else: - print("✅ Photos directory exists") - - # Check if database exists - if os.path.exists('punimtag_simple.db'): - print("✅ Database exists") - - # Get basic stats - try: - import sqlite3 - conn = sqlite3.connect('punimtag_simple.db') - c = conn.cursor() - - c.execute("SELECT COUNT(*) FROM images") - image_count = c.fetchone()[0] - - c.execute("SELECT COUNT(*) FROM faces") - face_count = c.fetchone()[0] - - c.execute("SELECT COUNT(*) FROM faces WHERE person_id IS NULL") - unidentified_count = c.fetchone()[0] - - conn.close() - - print(f" 📊 {image_count} images, {face_count} faces, {unidentified_count} unidentified") - - return image_count, face_count, unidentified_count - - except Exception as e: - print(f" ⚠️ Error reading database: {e}") - return 0, 0, 0 - else: - print("❌ Database not found") - print(" Run 'python punimtag_simple.py' first to process images") - return 0, 0, 0 - - -def show_menu(image_count, face_count, unidentified_count): - """Show main menu""" - print("\n" + "="*60) - print("🏷️ PUNIMTAG - NEXT STEPS") - print("="*60) - - if image_count == 0: - print("📝 GETTING STARTED:") - print(" 1. Add photos to the 'photos/' directory") - print(" 2. Run: python punimtag_simple.py") - print(" 3. Come back here for face identification") - print("\n💡 TIP: Start with 10-20 photos for testing") - return - - print(f"📊 CURRENT STATUS:") - print(f" Images processed: {image_count}") - print(f" Faces detected: {face_count}") - print(f" Unidentified faces: {unidentified_count}") - - print(f"\n🎯 AVAILABLE ACTIONS:") - print(f" 1. Process more images") - print(f" 2. Identify unknown faces (CLI)") - print(f" 3. Manage database") - print(f" 4. View statistics") - if unidentified_count > 0: - print(f" 5. Start simple web interface (coming soon)") - print(f" 6. Exit") - - -def process_images(): - """Process images""" - print("\n📷 Processing images...") - try: - result = subprocess.run([sys.executable, 'punimtag_simple.py'], - capture_output=True, text=True) - print(result.stdout) - if result.stderr: - print("Errors:", result.stderr) - except Exception as e: - print(f"Error: {e}") - - -def identify_faces(): - """Identify faces using CLI tool""" - print("\n👥 Starting face identification...") - try: - subprocess.run([sys.executable, 'interactive_identifier.py']) - except Exception as e: - print(f"Error: {e}") - - -def manage_database(): - """Manage database""" - print("\n🗄️ Starting database manager...") - try: - subprocess.run([sys.executable, 'db_manager.py']) - except Exception as e: - print(f"Error: {e}") - - -def show_statistics(): - """Show detailed statistics""" - print("\n📊 DETAILED STATISTICS") - print("="*40) - - try: - from db_manager import DatabaseManager - manager = DatabaseManager('punimtag_simple.db') - manager.inspect_database() - except Exception as e: - print(f"Error: {e}") - - -def show_next_gui_steps(): - """Show what's coming next for GUI development""" - print("\n🚀 NEXT GUI DEVELOPMENT STEPS") - print("="*50) - print("We have a working backend! Next steps for GUI:") - print() - print("✅ COMPLETED:") - print(" • Face recognition and clustering") - print(" • Database management") - print(" • Jewish organization features") - print(" • Search functionality") - print(" • CLI tools for identification") - print() - print("🔄 IN PROGRESS:") - print(" • Web-based GUI with Flask") - print(" • Face clustering visualization") - print(" • Interactive face identification") - print() - print("📋 PLANNED:") - print(" • Advanced search interface") - print(" • Tag management GUI") - print(" • Statistics dashboard") - print(" • Photo gallery with face highlights") - print() - print("💡 TO TEST THE WEB GUI:") - print(" 1. Make sure you have processed some images") - print(" 2. Run: python web_gui.py (when ready)") - print(" 3. Open http://localhost:5000 in browser") - - -def main(): - """Main function""" - print("🏷️ PunimTag GUI Starter") - print("Welcome to the face recognition and photo tagging system!") - - # Check requirements - image_count, face_count, unidentified_count = check_requirements() - - while True: - show_menu(image_count, face_count, unidentified_count) - - try: - choice = input(f"\n➤ Select option: ").strip() - - if choice == '1': - process_images() - # Refresh stats - image_count, face_count, unidentified_count = check_requirements() - - elif choice == '2': - if face_count == 0: - print("❌ No faces to identify. Process images first.") - else: - identify_faces() - # Refresh stats - image_count, face_count, unidentified_count = check_requirements() - - elif choice == '3': - manage_database() - # Refresh stats - image_count, face_count, unidentified_count = check_requirements() - - elif choice == '4': - show_statistics() - - elif choice == '5': - if unidentified_count > 0: - print("🌐 Web interface is in development!") - print("For now, use option 2 for CLI identification.") - else: - print("✅ All faces are identified!") - - elif choice == '6': - break - - else: - print("❌ Invalid option") - - input("\nPress Enter to continue...") - - except KeyboardInterrupt: - print("\n\n👋 Goodbye!") - break - except Exception as e: - print(f"Error: {e}") - input("Press Enter to continue...") - - print("\n🎉 Thank you for using PunimTag!") - show_next_gui_steps() - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..71d2cbc --- /dev/null +++ b/setup.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +""" +PunimTag CLI Setup Script +Simple setup for the minimal photo tagger +""" + +import os +import sys +import subprocess +from pathlib import Path + + +def check_python_version(): + """Check if Python version is compatible""" + if sys.version_info < (3, 7): + print("❌ Python 3.7+ is required") + return False + print(f"✅ Python {sys.version_info.major}.{sys.version_info.minor} detected") + return True + + +def install_requirements(): + """Install Python requirements""" + requirements_file = Path("requirements.txt") + + if not requirements_file.exists(): + print("❌ requirements.txt not found!") + return False + + print("📦 Installing Python dependencies...") + try: + subprocess.run([ + sys.executable, '-m', 'pip', 'install', '-r', 'requirements.txt' + ], check=True) + print("✅ Dependencies installed successfully") + return True + except subprocess.CalledProcessError as e: + print(f"❌ Failed to install dependencies: {e}") + return False + + +def create_directories(): + """Create necessary directories""" + directories = ['data', 'logs'] + + for directory in directories: + Path(directory).mkdir(exist_ok=True) + print(f"✅ Created directory: {directory}") + + +def test_installation(): + """Test if face recognition works""" + print("🧪 Testing face recognition installation...") + try: + import face_recognition + import numpy as np + from PIL import Image + print("✅ All required modules imported successfully") + return True + except ImportError as e: + print(f"❌ Import error: {e}") + return False + + +def main(): + """Main setup function""" + print("🚀 PunimTag CLI Setup") + print("=" * 40) + + # Check Python version + if not check_python_version(): + return 1 + + # Check if we're in a virtual environment (recommended) + if sys.prefix == sys.base_prefix: + print("⚠️ Not in a virtual environment!") + print(" Recommended: python -m venv venv && source venv/bin/activate") + response = input(" Continue anyway? (y/N): ").strip().lower() + if response != 'y': + print("Setup cancelled. Create a virtual environment first.") + return 1 + else: + print("✅ Virtual environment detected") + + print() + + # Create directories + print("📁 Creating directories...") + create_directories() + print() + + # Install requirements + if not install_requirements(): + return 1 + print() + + # Test installation + if not test_installation(): + print("⚠️ Installation test failed. You may need to install additional dependencies.") + print(" For Ubuntu/Debian: sudo apt-get install build-essential cmake") + print(" For macOS: brew install cmake") + return 1 + print() + + print("✅ Setup complete!") + print() + print("🎯 Quick Start:") + print(" 1. Add photos: python3 photo_tagger.py scan /path/to/photos") + print(" 2. Process faces: python3 photo_tagger.py process") + print(" 3. Identify faces: python3 photo_tagger.py identify") + print(" 4. View stats: python3 photo_tagger.py stats") + print() + print("📖 For help: python3 photo_tagger.py --help") + print() + print("⚠️ IMPORTANT: Always activate virtual environment first!") + print(" source venv/bin/activate") + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/src/__init__.py b/src/__init__.py deleted file mode 100644 index 41abcc3..0000000 --- a/src/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -""" -PunimTag Source Package - -This package contains all the source code for the PunimTag application. -""" - -__version__ = "1.0.0" -__author__ = "PunimTag Team" \ No newline at end of file diff --git a/src/backend/__init__.py b/src/backend/__init__.py deleted file mode 100644 index 9ddcefb..0000000 --- a/src/backend/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -""" -PunimTag Backend Package - -This package contains all backend-related code including Flask app, database operations, -and face recognition functionality. -""" \ No newline at end of file diff --git a/src/backend/app.py b/src/backend/app.py deleted file mode 100644 index d5262a2..0000000 --- a/src/backend/app.py +++ /dev/null @@ -1,4415 +0,0 @@ -#!/usr/bin/env python3 -""" -Simple Web GUI for PunimTag -Enhanced face identification interface with pagination and filtering -""" - -from flask import Flask, render_template_string, request, jsonify -import sqlite3 -import os -import base64 -import datetime -from io import BytesIO -from PIL import Image -from pathlib import Path - -try: - import numpy as np -except ImportError: - print("Warning: numpy not available, using fallback distance calculation") - np = None -# import face_recognition -import pickle - -app = Flask(__name__) - -# Database configuration -DATA_DIR = Path(__file__).parent.parent.parent / "data" -DATABASE_PATH = DATA_DIR / "punimtag_simple.db" - -# HTML template embedded in the Python file -HTML_TEMPLATE = ''' - - - - PunimTag - Face Identification - - - - - -
-
-
-

🏷️ PunimTag - Face Identification

-

Identify people in your photos

-
- -
- -
-
-

Total Images

-

{{ stats.images }}

-
-
-

Total Faces

-

{{ stats.faces }}

-
-
-

Identified

-

{{ stats.identified }}

-
-
-

Unidentified

-

{{ stats.unidentified }}

-
-
-

Progress

-

{{ stats.percentage }}%

-
-
-
-
-
- -
- - - - - -
- -
- - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - -
- - Item 1 of 0 - -
- - -
-
Loading faces...
-
- - - -
- - - - - {% raw %} - - {% endraw %} - - -''' - -def generate_thumbnail(path, box, size=(120,120)): - if not os.path.exists(path): - return None - try: - img = Image.open(path) - crop = img.crop(box) - crop.thumbnail(size) - buffered = BytesIO() - crop.save(buffered, format="JPEG") - return f'data:image/jpeg;base64,' + base64.b64encode(buffered.getvalue()).decode() - except Exception as e: - print(f"Error generating thumbnail: {e}") - return None - -@app.route('/debug/preload_test') -def debug_preload_test(): - """Debug endpoint to test pre-load check functionality""" - return jsonify({ - 'status': 'ok', - 'message': 'Pre-load test endpoint working', - 'timestamp': str(datetime.datetime.now()) - }) - -@app.route('/debug/ui') -def debug_ui(): - """Debug UI page to test JavaScript and API calls""" - with open('debug_ui.html', 'r') as f: - return f.read() - -@app.route('/debug/minimal') -def debug_minimal(): - """Minimal test page to isolate JavaScript issues""" - with open('test_minimal.html', 'r') as f: - return f.read() - -@app.route('/debug/diagnostic') -def debug_diagnostic(): - """Diagnostic test page with error catching""" - with open('test_diagnostic.html', 'r') as f: - return f.read() - -@app.route('/debug/simple_main') -def debug_simple_main(): - """Simplified main UI test""" - with open('test_simple_main.html', 'r') as f: - return f.read() - -@app.route('/debug/progressive') -def debug_progressive(): - """Progressive test to identify the exact issue""" - with open('test_progressive.html', 'r') as f: - return f.read() - -@app.route('/debug/syntax') -def debug_syntax(): - """Syntax checker for JavaScript""" - with open('test_syntax_check.html', 'r') as f: - return f.read() - -@app.route('/debug/console') -def debug_console(): - """Console error tracker""" - with open('test_console_tracker.html', 'r') as f: - return f.read() - -@app.route('/debug/errors') -def debug_errors(): - """Direct error checker""" - with open('test_direct_error_check.html', 'r') as f: - return f.read() - -@app.route('/debug/validator') -def debug_validator(): - """JavaScript validator""" - with open('test_js_validator.html', 'r') as f: - return f.read() - -@app.route('/debug/syntax_test') -def debug_syntax_test(): - """Syntax test page""" - with open('test_syntax_fix.html', 'r') as f: - return f.read() - -@app.route('/check_database') -def check_database(): - """Check if database is accessible and has required tables""" - try: - conn = sqlite3.connect(str(DATABASE_PATH)) - c = conn.cursor() - - # Check if required tables exist - c.execute("SELECT name FROM sqlite_master WHERE type='table' AND name IN ('images', 'faces', 'people')") - tables = [row[0] for row in c.fetchall()] - - if len(tables) < 3: - conn.close() - return jsonify({'status': 'error', 'message': f'Missing tables. Found: {tables}'}), 500 - - # Check if we can query basic data - c.execute('SELECT COUNT(*) FROM images') - image_count = c.fetchone()[0] - - c.execute('SELECT COUNT(*) FROM faces') - face_count = c.fetchone()[0] - - conn.close() - - return jsonify({ - 'status': 'ok', - 'tables': tables, - 'image_count': image_count, - 'face_count': face_count - }) - - except Exception as e: - return jsonify({'status': 'error', 'message': str(e)}), 500 - -@app.route('/check_thumbnails') -def check_thumbnails(): - """Check if thumbnail generation is working""" - try: - conn = sqlite3.connect(str(DATABASE_PATH)) - c = conn.cursor() - - # Get a sample image to test thumbnail generation - c.execute('SELECT path FROM images LIMIT 1') - row = c.fetchone() - conn.close() - - if not row: - return jsonify({'status': 'error', 'message': 'No images found to test'}), 404 - - path = row[0] - if not os.path.exists(path): - return jsonify({'status': 'error', 'message': f'Image file not found: {path}'}), 404 - - # Test thumbnail generation - try: - img = Image.open(path) - img.thumbnail((100, 100)) - buffered = BytesIO() - img.save(buffered, format="JPEG") - return jsonify({'status': 'ok', 'message': 'Thumbnail generation working'}) - except Exception as e: - return jsonify({'status': 'error', 'message': f'Thumbnail generation failed: {str(e)}'}), 500 - - except Exception as e: - return jsonify({'status': 'error', 'message': str(e)}), 500 - -@app.route('/system_status') -def system_status(): - """Get comprehensive system status""" - try: - conn = sqlite3.connect(str(DATABASE_PATH)) - c = conn.cursor() - - # Check database tables - c.execute("SELECT name FROM sqlite_master WHERE type='table'") - tables = [row[0] for row in c.fetchall()] - - # Get counts - c.execute('SELECT COUNT(*) FROM images') - image_count = c.fetchone()[0] - - c.execute('SELECT COUNT(*) FROM faces') - face_count = c.fetchone()[0] - - c.execute('SELECT COUNT(*) FROM people') - people_count = c.fetchone()[0] - - c.execute('SELECT COUNT(*) FROM faces WHERE person_id IS NOT NULL') - identified_faces = c.fetchone()[0] - - # Check for missing image files (simplified - just check first few) - c.execute('SELECT path FROM images LIMIT 3') - sample_paths = [row[0] for row in c.fetchall()] - missing_files = [] - for path in sample_paths: - try: - if not os.path.exists(path): - missing_files.append(path) - except: - missing_files.append(path) - - conn.close() - - status = { - 'database': { - 'tables': tables, - 'image_count': image_count, - 'face_count': face_count, - 'people_count': people_count, - 'identified_faces': identified_faces - }, - 'files': { - 'missing_files_count': len(missing_files), - 'sample_missing': missing_files[:3] if missing_files else [] - }, - 'system': { - 'photos_dir_exists': os.path.exists('photos'), - 'database_file_exists': os.path.exists(str(DATABASE_PATH)) - } - } - - return jsonify(status) - - except Exception as e: - return jsonify({'error': str(e)}), 500 - -@app.route('/') -def index(): - conn = sqlite3.connect(str(DATABASE_PATH)) - c = conn.cursor() - - c.execute('SELECT COUNT(*) FROM images') - images = c.fetchone()[0] - - c.execute('SELECT COUNT(*) FROM faces') - faces = c.fetchone()[0] - - c.execute('SELECT COUNT(*) FROM faces WHERE person_id IS NOT NULL') - identified = c.fetchone()[0] - - unidentified = faces - identified - - # Calculate photo-level stats - c.execute('SELECT COUNT(DISTINCT image_id) FROM faces WHERE person_id IS NOT NULL') - identified_photos = c.fetchone()[0] - - c.execute('SELECT COUNT(*) FROM images WHERE id NOT IN (SELECT DISTINCT image_id FROM faces WHERE person_id IS NOT NULL)') - unidentified_photos = c.fetchone()[0] - - percentage = round(identified / faces * 100, 1) if faces > 0 else 0.0 - - stats = { - 'images': images, - 'faces': faces, - 'identified': identified, - 'unidentified': unidentified, - 'identified_photos': identified_photos, - 'unidentified_photos': unidentified_photos, - 'percentage': percentage - } - - conn.close() - - return render_template_string(HTML_TEMPLATE, stats=stats) - -@app.route('/check_person_exists', methods=['POST']) -def check_person_exists(): - """Check if a person name already exists""" - try: - data = request.json - name = data.get('name', '').strip() - - if not name: - return jsonify({'exists': False}) - - conn = sqlite3.connect(str(DATABASE_PATH)) - c = conn.cursor() - c.execute('SELECT COUNT(*) FROM people WHERE name = ?', (name,)) - count = c.fetchone()[0] - conn.close() - - return jsonify({'exists': count > 0}) - - except Exception as e: - print(f"Error checking person existence: {e}") - return jsonify({'exists': False, 'error': str(e)}) - -@app.route('/process_new_images', methods=['POST']) -def process_new_images(): - """Process new images from the photos directory""" - try: - from punimtag_simple import SimplePunimTag - - # Initialize processor - processor = SimplePunimTag() - - new_images = 0 - new_faces = 0 - - # Get list of already processed images - conn = sqlite3.connect(str(DATABASE_PATH)) - c = conn.cursor() - c.execute('SELECT path FROM images') - processed_paths = set(row[0] for row in c.fetchall()) - conn.close() - - # Scan photos directory for new images - photos_dir = 'photos' - if not os.path.exists(photos_dir): - return jsonify({'success': False, 'error': 'Photos directory not found'}) - - supported_formats = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.gif'] - - for root, dirs, files in os.walk(photos_dir): - for file in files: - if any(file.lower().endswith(fmt) for fmt in supported_formats): - file_path = os.path.join(root, file).replace('\\', '/') - - if file_path not in processed_paths: - try: - # Process new image - print(f"Processing new image: {file_path}") - image_id = processor.process_image(file_path) - new_images += 1 - - # Count faces added - conn = sqlite3.connect(str(DATABASE_PATH)) - c = conn.cursor() - c.execute('SELECT COUNT(*) FROM faces WHERE image_id = ?', (image_id,)) - faces_in_image = c.fetchone()[0] - new_faces += faces_in_image - conn.close() - - except Exception as e: - print(f"Error processing {file_path}: {e}") - continue - - processor.close() - - return jsonify({ - 'success': True, - 'new_images': new_images, - 'new_faces': new_faces, - 'message': f'Processed {new_images} new images with {new_faces} faces' - }) - - except Exception as e: - print(f"Error processing new images: {e}") - return jsonify({'success': False, 'error': str(e)}) - -@app.route('/remove_all_faces', methods=['POST']) -def remove_all_faces(): - """Remove all face data from the database""" - try: - conn = sqlite3.connect(str(DATABASE_PATH)) - c = conn.cursor() - - # Count before deletion - c.execute('SELECT COUNT(*) FROM faces') - faces_count = c.fetchone()[0] - - c.execute('SELECT COUNT(*) FROM people') - people_count = c.fetchone()[0] - - # Delete all face data - c.execute('DELETE FROM faces') - c.execute('DELETE FROM people') - - # Reset auto-increment counters - c.execute('DELETE FROM sqlite_sequence WHERE name IN ("faces", "people")') - - conn.commit() - conn.close() - - return jsonify({ - 'success': True, - 'faces_deleted': faces_count, - 'people_deleted': people_count, - 'message': f'Deleted {faces_count} faces and {people_count} people' - }) - - except Exception as e: - print(f"Error removing faces: {e}") - return jsonify({'success': False, 'error': str(e)}) - -@app.route('/get_faces') -def get_faces(): - tab = request.args.get('tab') - page = int(request.args.get('page', 1)) - per_page = int(request.args.get('per_page', 20)) - - if tab != 'unidentified': - return jsonify({'error': 'Invalid tab for this endpoint'}), 400 - - conn = sqlite3.connect(str(DATABASE_PATH)) - c = conn.cursor() - - c.execute('SELECT COUNT(*) FROM faces WHERE person_id IS NULL') - total = c.fetchone()[0] - total_pages = (total + per_page - 1) // per_page - offset = (page - 1) * per_page - - c.execute('''SELECT f.id, i.filename, f.image_id, f.person_id, p.name as person_name, - (SELECT COUNT(*) FROM faces f2 WHERE f2.person_id = f.person_id) as person_face_count - FROM faces f - JOIN images i ON f.image_id = i.id - LEFT JOIN people p ON f.person_id = p.id - WHERE f.person_id IS NULL - ORDER BY i.filename - LIMIT ? OFFSET ?''', (per_page, offset)) - - faces = [] - for row in c.fetchall(): - faces.append({ - 'face_id': row[0], - 'filename': row[1], - 'image_id': row[2], - 'person_id': row[3], - 'person_name': row[4] if row[4] else None, - 'person_face_count': row[5] if row[5] else 0 - }) - - conn.close() - return jsonify({'faces': faces, 'total_pages': total_pages}) - -@app.route('/get_faces_grouped') -def get_faces_grouped(): - tab = request.args.get('tab') - page = int(request.args.get('page', 1)) - per_page = int(request.args.get('per_page', 20)) - - if tab != 'identified': - return jsonify({'error': 'Invalid tab for this endpoint'}), 400 - - conn = sqlite3.connect(str(DATABASE_PATH)) - c = conn.cursor() - - c.execute('SELECT COUNT(DISTINCT person_id) FROM faces WHERE person_id IS NOT NULL') - total = c.fetchone()[0] - total_pages = (total + per_page - 1) // per_page - offset = (page - 1) * per_page - - c.execute('''SELECT p.id, p.name, COUNT(f.id) as face_count, MIN(f.id) as rep_face_id, - MIN(f.image_id) as image_id - FROM people p JOIN faces f ON p.id = f.person_id - GROUP BY p.id - ORDER BY face_count DESC - LIMIT ? OFFSET ?''', (per_page, offset)) - - grouped_faces = [] - for row in c.fetchall(): - person_id, person_name, face_count, rep_face_id, image_id = row - # Get filename for the representative face - c.execute('SELECT filename FROM images WHERE id = ?', (image_id,)) - filename_row = c.fetchone() - filename = filename_row[0] if filename_row else 'unknown' - - grouped_faces.append({ - 'person_id': person_id, - 'person_name': person_name, - 'person_face_count': face_count, - 'face_id': rep_face_id, - 'filename': filename, - 'image_id': image_id - }) - - conn.close() - return jsonify({'faces': grouped_faces, 'total_pages': total_pages}) - -@app.route('/get_thumbnail/') -def get_thumbnail(face_id): - conn = sqlite3.connect(str(DATABASE_PATH)) - c = conn.cursor() - c.execute('''SELECT i.path, f.left, f.top, f.right, f.bottom - FROM faces f JOIN images i ON f.image_id = i.id - WHERE f.id = ?''', (face_id,)) - row = c.fetchone() - conn.close() - - if not row: - return jsonify({'error': 'Face not found'}), 404 - - path, left, top, right, bottom = row - thumbnail = generate_thumbnail(path, (left, top, right, bottom), (120,120)) - if thumbnail: - return jsonify({'thumbnail': thumbnail}) - else: - return jsonify({'error': 'Failed to generate thumbnail'}), 500 - -@app.route('/identify_face', methods=['POST']) -def identify_face(): - data = request.json - face_id = data.get('face_id') - name = data.get('name') - - if not face_id or not name: - return jsonify({'success': False, 'error': 'Missing parameters'}), 400 - - conn = sqlite3.connect(str(DATABASE_PATH)) - c = conn.cursor() - - c.execute('SELECT id FROM people WHERE name = ?', (name,)) - row = c.fetchone() - if row: - person_id = row[0] - else: - c.execute('INSERT INTO people (name) VALUES (?)', (name,)) - person_id = c.lastrowid - - c.execute('UPDATE faces SET person_id = ?, is_confirmed = 1 WHERE id = ?', (person_id, face_id)) - - success = c.rowcount > 0 - conn.commit() - conn.close() - return jsonify({'success': success}) - -@app.route('/identify_multiple_faces', methods=['POST']) -def identify_multiple_faces(): - data = request.json - face_ids = data.get('face_ids', []) - name = data.get('name') - - if not face_ids or not name: - return jsonify({'success': False, 'error': 'Missing parameters'}), 400 - - conn = sqlite3.connect(str(DATABASE_PATH)) - c = conn.cursor() - - c.execute('SELECT id FROM people WHERE name = ?', (name,)) - row = c.fetchone() - if row: - person_id = row[0] - else: - c.execute('INSERT INTO people (name) VALUES (?)', (name,)) - person_id = c.lastrowid - - updated = 0 - for fid in face_ids: - c.execute('UPDATE faces SET person_id = ?, is_confirmed = 1 WHERE id = ? AND person_id IS NULL', (person_id, fid)) - updated += c.rowcount - - conn.commit() - conn.close() - return jsonify({'success': True, 'count': updated}) - -@app.route('/get_similar_faces/') -def get_similar_faces(face_id): - threshold = 0.6 - conn = sqlite3.connect(str(DATABASE_PATH)) - c = conn.cursor() - - c.execute('SELECT encoding FROM faces WHERE id = ?', (face_id,)) - row = c.fetchone() - if not row: - conn.close() - return jsonify({'error': 'Face not found'}), 404 - - ref_encoding = pickle.loads(row[0]) - - # Get identified faces to compare against - c.execute('''SELECT f.id, f.encoding, p.name, i.filename, f.left, f.top, f.right, f.bottom, i.path - FROM faces f - JOIN images i ON f.image_id = i.id - JOIN people p ON f.person_id = p.id - WHERE f.person_id IS NOT NULL AND f.id != ?''', (face_id,)) - identified_faces = c.fetchall() - conn.close() - - similar = [] - for id_face_id, id_enc_blob, person_name, filename, left, top, right, bottom, path in identified_faces: - id_encoding = pickle.loads(id_enc_blob) - # Calculate Euclidean distance between face encodings - if np is not None: - distance = np.linalg.norm(np.array(ref_encoding) - np.array(id_encoding)) - else: - # Fallback distance calculation without numpy - distance = sum((a - b) ** 2 for a, b in zip(ref_encoding, id_encoding)) ** 0.5 - if distance < threshold: - thumbnail = generate_thumbnail(path, (left, top, right, bottom), (60,60)) - if thumbnail: - similar.append({ - 'face_id': id_face_id, - 'person_name': person_name, - 'filename': filename, - 'thumbnail': thumbnail, - 'distance': float(distance), - 'similarity': 1.0 - float(distance) - }) - - similar.sort(key=lambda x: x['similarity'], reverse=True) - - # Convert to the format expected by the frontend - similar_faces = [] - for item in similar: - similar_faces.append({ - 'person_name': item['person_name'], - 'similarity': item['similarity'] - }) - - return jsonify({'success': True, 'similar_faces': similar_faces}) - -@app.route('/get_similar_faces_multi/') -def get_similar_faces_multi(person_id): - threshold = 0.6 - conn = sqlite3.connect(str(DATABASE_PATH)) - c = conn.cursor() - - # Get all confirmed faces for this person - c.execute('SELECT encoding FROM faces WHERE person_id = ?', (person_id,)) - rows = c.fetchall() - if not rows: - conn.close() - return jsonify({'success': True, 'faces': []}) - - known_encodings = [pickle.loads(row[0]) for row in rows] - - # Get all unidentified faces - c.execute('''SELECT f.id, f.encoding, i.filename, f.left, f.top, f.right, f.bottom, i.path - FROM faces f JOIN images i ON f.image_id = i.id - WHERE f.person_id IS NULL''') - unidentified = c.fetchall() - conn.close() - - similar = [] - for u_id, enc_blob, filename, left, top, right, bottom, path in unidentified: - u_encoding = pickle.loads(enc_blob) - - # Calculate distances to all known faces for this person - distances = [] - for known_encoding in known_encodings: - # Use numpy to calculate Euclidean distance - if np is not None: - distance = np.linalg.norm(np.array(known_encoding) - np.array(u_encoding)) - else: - # Fallback distance calculation without numpy - distance = sum((a - b) ** 2 for a, b in zip(known_encoding, u_encoding)) ** 0.5 - distances.append(distance) - - # Use the minimum distance (best match) - min_distance = min(distances) if distances else 1.0 - - if min_distance < threshold: - thumbnail = generate_thumbnail(path, (left, top, right, bottom), (100, 100)) - if thumbnail: - similar.append({ - 'face_id': u_id, - 'filename': filename, - 'thumbnail': thumbnail, - 'distance': float(min_distance), - 'similarity': 1.0 - float(min_distance) - }) - - # Sort by similarity (highest first) - similar.sort(key=lambda x: x['similarity'], reverse=True) - - # Limit to top 20 results to avoid overwhelming the UI - similar = similar[:20] - - return jsonify({'success': True, 'faces': similar}) - -@app.route('/get_person_photo_metadata/') -def get_person_photo_metadata(person_id): - conn = sqlite3.connect(str(DATABASE_PATH)) - c = conn.cursor() - c.execute('''SELECT f.id, i.filename, i.id as image_id - FROM faces f JOIN images i ON f.image_id = i.id - WHERE f.person_id = ? - ORDER BY i.date_taken DESC''', (person_id,)) - photos = [{'face_id': row[0], 'filename': row[1], 'image_id': row[2]} for row in c.fetchall()] - conn.close() - return jsonify({'success': True, 'photos': photos}) - -@app.route('/get_photo_tags/') -def get_photo_tags(image_id): - conn = sqlite3.connect(str(DATABASE_PATH)) - c = conn.cursor() - c.execute('''SELECT t.name - FROM tags t JOIN image_tags it ON t.id = it.tag_id - WHERE it.image_id = ?''', (image_id,)) - tags = [{'name': row[0]} for row in c.fetchall()] - conn.close() - return jsonify({'success': True, 'tags': tags}) - -@app.route('/update_person_name', methods=['POST']) -def update_person_name(): - data = request.json - person_id = data.get('person_id') - new_name = data.get('new_name') - - if not person_id or not new_name: - return jsonify({'success': False, 'error': 'Missing parameters'}), 400 - - conn = sqlite3.connect(str(DATABASE_PATH)) - c = conn.cursor() - - c.execute('SELECT id FROM people WHERE name = ? AND id != ?', (new_name, person_id)) - existing = c.fetchone() - - if existing: - existing_id = existing[0] - c.execute('UPDATE faces SET person_id = ? WHERE person_id = ?', (existing_id, person_id)) - merged_count = c.rowcount - c.execute('DELETE FROM people WHERE id = ?', (person_id,)) - conn.commit() - conn.close() - return jsonify({'success': True, 'merged': True, 'merged_count': merged_count}) - else: - c.execute('UPDATE people SET name = ? WHERE id = ?', (new_name, person_id)) - success = c.rowcount > 0 - conn.commit() - conn.close() - return jsonify({'success': success, 'merged': False}) - -@app.route('/get_photos') -def get_photos(): - tab = request.args.get('tab') - page = int(request.args.get('page', 1)) - per_page = int(request.args.get('per_page', 20)) - - conn = sqlite3.connect(str(DATABASE_PATH)) - c = conn.cursor() - - if tab == 'all_photos': - c.execute('SELECT COUNT(*) FROM images') - total = c.fetchone()[0] - offset = (page - 1) * per_page - - c.execute('''SELECT i.id, i.filename, i.path, i.date_taken, - (SELECT COUNT(*) FROM faces f WHERE f.image_id = i.id) as face_count, - (SELECT COUNT(*) FROM faces f WHERE f.image_id = i.id AND f.person_id IS NOT NULL) as identified_faces - FROM images i - ORDER BY i.date_taken DESC, i.filename - LIMIT ? OFFSET ?''', (per_page, offset)) - - elif tab == 'identified_photos': - c.execute('SELECT COUNT(DISTINCT image_id) FROM faces WHERE person_id IS NOT NULL') - total = c.fetchone()[0] - offset = (page - 1) * per_page - - c.execute('''SELECT DISTINCT i.id, i.filename, i.path, i.date_taken, - (SELECT COUNT(*) FROM faces f WHERE f.image_id = i.id) as face_count, - (SELECT COUNT(*) FROM faces f WHERE f.image_id = i.id AND f.person_id IS NOT NULL) as identified_faces - FROM images i - JOIN faces f ON i.id = f.image_id - WHERE f.person_id IS NOT NULL - ORDER BY i.date_taken DESC, i.filename - LIMIT ? OFFSET ?''', (per_page, offset)) - - elif tab == 'unidentified_photos': - c.execute('SELECT COUNT(*) FROM images WHERE id NOT IN (SELECT DISTINCT image_id FROM faces WHERE person_id IS NOT NULL)') - total = c.fetchone()[0] - offset = (page - 1) * per_page - - c.execute('''SELECT i.id, i.filename, i.path, i.date_taken, - (SELECT COUNT(*) FROM faces f WHERE f.image_id = i.id) as face_count, - 0 as identified_faces - FROM images i - WHERE i.id NOT IN (SELECT DISTINCT image_id FROM faces WHERE person_id IS NOT NULL) - ORDER BY i.date_taken DESC, i.filename - LIMIT ? OFFSET ?''', (per_page, offset)) - else: - conn.close() - return jsonify({'error': 'Invalid tab for photos endpoint'}), 400 - - total_pages = (total + per_page - 1) // per_page - - photos = [] - for row in c.fetchall(): - # Get tags for this image - c.execute('''SELECT t.name, t.category FROM tags t - JOIN image_tags it ON t.id = it.tag_id - WHERE it.image_id = ?''', (row[0],)) - tags = [{'name': tag[0], 'category': tag[1]} for tag in c.fetchall()] - - photos.append({ - 'image_id': row[0], - 'filename': row[1], - 'path': row[2], - 'date_taken': row[3], - 'face_count': row[4], - 'identified_faces': row[5], - 'tags': tags - }) - - conn.close() - return jsonify({'photos': photos, 'total_pages': total_pages}) - -@app.route('/get_photo_thumbnail/') -def get_photo_thumbnail(image_id): - conn = sqlite3.connect(str(DATABASE_PATH)) - c = conn.cursor() - c.execute('SELECT path FROM images WHERE id = ?', (image_id,)) - row = c.fetchone() - conn.close() - - if not row: - return jsonify({'error': 'Image not found'}), 404 - - path = row[0] - if not os.path.exists(path): - return jsonify({'error': 'Image file not found'}), 404 - - try: - img = Image.open(path) - img.thumbnail((300, 300)) - buffered = BytesIO() - img.save(buffered, format="JPEG") - thumbnail = f'data:image/jpeg;base64,' + base64.b64encode(buffered.getvalue()).decode() - return jsonify({'thumbnail': thumbnail}) - except Exception as e: - print(f"Error generating photo thumbnail: {e}") - return jsonify({'error': 'Failed to generate thumbnail'}), 500 - -@app.route('/get_photo_faces/') -def get_photo_faces(image_id): - conn = sqlite3.connect(str(DATABASE_PATH)) - c = conn.cursor() - c.execute('''SELECT f.id, f.left, f.top, f.right, f.bottom, f.person_id, p.name - FROM faces f - LEFT JOIN people p ON f.person_id = p.id - WHERE f.image_id = ?''', (image_id,)) - - faces = [] - for row in c.fetchall(): - face_data = { - 'face_id': row[0], - 'left': row[1], - 'top': row[2], - 'right': row[3], - 'bottom': row[4], - 'person_id': row[5], - 'person_name': row[6] - } - # Get thumbnail for this face - thumbnail = generate_thumbnail(c.execute('SELECT path FROM images WHERE id = ?', (image_id,)).fetchone()[0], - (row[1], row[2], row[3], row[4]), (80, 80)) - if thumbnail: - face_data['thumbnail'] = thumbnail - faces.append(face_data) - - conn.close() - return jsonify({'faces': faces}) - -@app.route('/get_photo_metadata/') -def get_photo_metadata(image_id): - conn = sqlite3.connect(str(DATABASE_PATH)) - c = conn.cursor() - c.execute('SELECT filename, date_taken, width, height FROM images WHERE id = ?', (image_id,)) - row = c.fetchone() - conn.close() - - if not row: - return jsonify({'error': 'Image not found'}), 404 - - return jsonify({ - 'filename': row[0], - 'date_taken': row[1], - 'width': row[2], - 'height': row[3] - }) - -@app.route('/add_tag_to_photo', methods=['POST']) -def add_tag_to_photo(): - data = request.json - image_id = data.get('image_id') - tag_name = data.get('tag_name') - category = data.get('category') - - if not image_id or not tag_name: - return jsonify({'success': False, 'error': 'Missing parameters'}), 400 - - conn = sqlite3.connect(str(DATABASE_PATH)) - c = conn.cursor() - - # Add tag if it doesn't exist - c.execute('INSERT OR IGNORE INTO tags (name, category) VALUES (?, ?)', (tag_name, category)) - c.execute('SELECT id FROM tags WHERE name = ?', (tag_name,)) - tag_id = c.fetchone()[0] - - # Add tag to image - c.execute('INSERT OR IGNORE INTO image_tags (image_id, tag_id) VALUES (?, ?)', (image_id, tag_id)) - - success = c.rowcount > 0 - conn.commit() - conn.close() - - return jsonify({'success': True}) - -@app.route('/remove_tag_from_photo', methods=['POST']) -def remove_tag_from_photo(): - data = request.json - image_id = data.get('image_id') - tag_name = data.get('tag_name') - - if not image_id or not tag_name: - return jsonify({'success': False, 'error': 'Missing parameters'}), 400 - - conn = sqlite3.connect(str(DATABASE_PATH)) - c = conn.cursor() - - c.execute('''DELETE FROM image_tags - WHERE image_id = ? AND tag_id IN - (SELECT id FROM tags WHERE name = ?)''', (image_id, tag_name)) - - success = c.rowcount > 0 - conn.commit() - conn.close() - - return jsonify({'success': True}) - -@app.route('/update_filename', methods=['POST']) -def update_filename(): - data = request.json - image_id = data.get('image_id') - new_filename = data.get('new_filename') - - if not image_id or not new_filename: - return jsonify({'success': False, 'error': 'Missing parameters'}), 400 - - conn = sqlite3.connect(str(DATABASE_PATH)) - c = conn.cursor() - - c.execute('UPDATE images SET filename = ? WHERE id = ?', (new_filename, image_id)) - success = c.rowcount > 0 - - conn.commit() - conn.close() - - return jsonify({'success': success}) - -@app.route('/remove_face', methods=['POST']) -def remove_face(): - try: - data = request.get_json() - face_id = data.get('face_id') - - if not face_id: - return jsonify({'success': False, 'error': 'Face ID is required'}) - - # Remove face from database - conn = sqlite3.connect(str(DATABASE_PATH)) - cursor = conn.cursor() - cursor.execute('DELETE FROM faces WHERE id = ?', (face_id,)) - conn.commit() - conn.close() - - return jsonify({'success': True, 'message': 'Face removed successfully'}) - except Exception as e: - return jsonify({'success': False, 'error': str(e)}) - -@app.route('/find_duplicates') -def find_duplicates(): - try: - print("🔍 Find duplicates endpoint called") - conn = sqlite3.connect(str(DATABASE_PATH)) - cursor = conn.cursor() - - # First check if we have any images - cursor.execute('SELECT COUNT(*) FROM images') - total_images = cursor.fetchone()[0] - print(f"📊 Total images in database: {total_images}") - - if total_images == 0: - conn.close() - return jsonify({'success': True, 'duplicates': [], 'message': 'No images found in database'}) - - # Find photos with the same filename (case-insensitive) - cursor.execute(''' - SELECT id, filename, path, COUNT(*) as count - FROM images - GROUP BY LOWER(filename) - HAVING COUNT(*) > 1 - ORDER BY filename - ''') - - duplicate_groups = [] - seen_groups = set() - - for row in cursor.fetchall(): - filename = row[1] - - # Create a group key - group_key = filename.lower() - - if group_key not in seen_groups: - seen_groups.add(group_key) - - # Get all photos in this group - cursor.execute(''' - SELECT id, filename, path - FROM images - WHERE LOWER(filename) = ? - ORDER BY id - ''', (filename.lower(),)) - - group_photos = [] - for photo_row in cursor.fetchall(): - group_photos.append({ - 'id': photo_row[0], - 'filename': photo_row[1], - 'path': photo_row[2] - }) - - if len(group_photos) > 1: - duplicate_groups.append(group_photos) - print(f"🔍 Found duplicate group: {len(group_photos)} photos with filename '{filename}'") - - conn.close() - print(f"✅ Found {len(duplicate_groups)} duplicate groups") - return jsonify({'success': True, 'duplicates': duplicate_groups}) - except Exception as e: - print(f"❌ Error in find_duplicates: {str(e)}") - return jsonify({'success': False, 'error': str(e)}) - -@app.route('/remove_photo', methods=['POST']) -def remove_photo(): - try: - data = request.get_json() - photo_id = data.get('photo_id') - - if not photo_id: - return jsonify({'success': False, 'error': 'Photo ID is required'}) - - conn = sqlite3.connect(str(DATABASE_PATH)) - cursor = conn.cursor() - - # Get photo info for logging - cursor.execute('SELECT filename, path FROM images WHERE id = ?', (photo_id,)) - photo_info = cursor.fetchone() - - if not photo_info: - conn.close() - return jsonify({'success': False, 'error': 'Photo not found'}) - - # Remove all faces associated with this photo - cursor.execute('DELETE FROM faces WHERE image_id = ?', (photo_id,)) - - # Remove all tags associated with this photo - cursor.execute('DELETE FROM image_tags WHERE image_id = ?', (photo_id,)) - - # Remove the photo itself - cursor.execute('DELETE FROM images WHERE id = ?', (photo_id,)) - - conn.commit() - conn.close() - - print(f"Removed photo: {photo_info[0]} (ID: {photo_id})") - return jsonify({'success': True, 'message': f'Photo {photo_info[0]} removed successfully'}) - except Exception as e: - return jsonify({'success': False, 'error': str(e)}) - -@app.route('/remove_photos', methods=['POST']) -def remove_photos(): - try: - data = request.get_json() - photo_ids = data.get('photo_ids', []) - - if not photo_ids: - return jsonify({'success': False, 'error': 'Photo IDs are required'}) - - conn = sqlite3.connect(str(DATABASE_PATH)) - cursor = conn.cursor() - removed_count = 0 - - for photo_id in photo_ids: - # Get photo info for logging - cursor.execute('SELECT filename FROM images WHERE id = ?', (photo_id,)) - photo_info = cursor.fetchone() - - if photo_info: - # Remove all faces associated with this photo - cursor.execute('DELETE FROM faces WHERE image_id = ?', (photo_id,)) - - # Remove all tags associated with this photo - cursor.execute('DELETE FROM image_tags WHERE image_id = ?', (photo_id,)) - - # Remove the photo itself - cursor.execute('DELETE FROM images WHERE id = ?', (photo_id,)) - - removed_count += 1 - print(f"Removed photo: {photo_info[0]} (ID: {photo_id})") - - conn.commit() - conn.close() - - return jsonify({'success': True, 'message': f'{removed_count} photos removed successfully'}) - except Exception as e: - return jsonify({'success': False, 'error': str(e)}) - -@app.route('/mark_not_a_face', methods=['POST']) -def mark_not_a_face(): - try: - data = request.get_json() - face_id = data.get('face_id') - - if not face_id: - return jsonify({'success': False, 'error': 'Face ID is required'}) - - # Mark face as not a face by setting person_id to NULL - conn = sqlite3.connect(str(DATABASE_PATH)) - cursor = conn.cursor() - cursor.execute('UPDATE faces SET person_id = NULL WHERE id = ?', (face_id,)) - conn.commit() - conn.close() - - return jsonify({'success': True, 'message': 'Face marked as not a face'}) - except Exception as e: - return jsonify({'success': False, 'error': str(e)}) - -if __name__ == '__main__': - print("\n🌐 PunimTag Enhanced Web GUI") - print("=" * 50) - print("📊 Starting enhanced web interface...") - print("🔗 Open http://localhost:5000 in your browser") - print("⏹️ Press Ctrl+C to stop") - print("\n✨ Features:") - print(" ✅ View 20/50/100/200 faces per page") - print(" ✅ Single face navigation mode") - print(" ✅ View identified faces") - print(" ✅ Progress percentage display") - print(" ✅ Smart similar face suggestions") - print(" ✅ Improved workflow and performance") - print() - - app.run(debug=True, host='0.0.0.0', port=5000) - \ No newline at end of file diff --git a/src/backend/db_manager.py b/src/backend/db_manager.py deleted file mode 100644 index e28121d..0000000 --- a/src/backend/db_manager.py +++ /dev/null @@ -1,308 +0,0 @@ -#!/usr/bin/env python3 -""" -Database Management Utility for PunimTag -Clean, reset, inspect, and manage the database -""" - -import os -import sqlite3 -import shutil -from datetime import datetime -from typing import Dict, List -import json - - -class DatabaseManager: - def __init__(self, db_path: str = 'punimtag_simple.db'): - self.db_path = db_path - - def backup_database(self, backup_name: str = None) -> str: - """Create a backup of the current database""" - if not os.path.exists(self.db_path): - print(f"Database {self.db_path} does not exist") - return None - - if backup_name is None: - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - backup_name = f"{self.db_path}.backup_{timestamp}" - - shutil.copy2(self.db_path, backup_name) - print(f"✅ Database backed up to: {backup_name}") - return backup_name - - def clean_database(self): - """Clean all data but keep schema""" - if not os.path.exists(self.db_path): - print(f"Database {self.db_path} does not exist") - return - - # Backup first - self.backup_database() - - conn = sqlite3.connect(self.db_path) - c = conn.cursor() - - try: - # Clear all data but keep schema - c.execute("DELETE FROM image_tags") - c.execute("DELETE FROM faces") - c.execute("DELETE FROM tags") - c.execute("DELETE FROM people") - c.execute("DELETE FROM images") - - # Reset auto-increment counters - c.execute("DELETE FROM sqlite_sequence") - - conn.commit() - print("✅ Database cleaned successfully") - - except Exception as e: - print(f"❌ Error cleaning database: {e}") - conn.rollback() - finally: - conn.close() - - def delete_database(self): - """Completely delete the database file""" - if os.path.exists(self.db_path): - # Backup first - self.backup_database() - os.remove(self.db_path) - print(f"✅ Database {self.db_path} deleted") - else: - print(f"Database {self.db_path} does not exist") - - def get_database_stats(self) -> Dict: - """Get comprehensive database statistics""" - if not os.path.exists(self.db_path): - return {"error": "Database does not exist"} - - conn = sqlite3.connect(self.db_path) - c = conn.cursor() - - try: - stats = {} - - # Basic counts - c.execute("SELECT COUNT(*) FROM images") - stats['images'] = c.fetchone()[0] - - c.execute("SELECT COUNT(*) FROM faces") - stats['faces'] = c.fetchone()[0] - - c.execute("SELECT COUNT(*) FROM faces WHERE person_id IS NOT NULL") - stats['identified_faces'] = c.fetchone()[0] - - c.execute("SELECT COUNT(*) FROM people") - stats['people'] = c.fetchone()[0] - - c.execute("SELECT COUNT(*) FROM tags") - stats['tags'] = c.fetchone()[0] - - c.execute("SELECT COUNT(*) FROM image_tags") - stats['tagged_images'] = c.fetchone()[0] - - # Derived stats - stats['unidentified_faces'] = stats['faces'] - stats['identified_faces'] - - # Top people by face count - c.execute("""SELECT p.name, COUNT(f.id) as face_count - FROM people p - JOIN faces f ON p.id = f.person_id - GROUP BY p.id - ORDER BY face_count DESC - LIMIT 5""") - stats['top_people'] = [{"name": row[0], "faces": row[1]} for row in c.fetchall()] - - # Top tags - c.execute("""SELECT t.name, t.category, COUNT(it.image_id) as usage_count - FROM tags t - JOIN image_tags it ON t.id = it.tag_id - GROUP BY t.id - ORDER BY usage_count DESC - LIMIT 5""") - stats['top_tags'] = [{"name": row[0], "category": row[1], "usage": row[2]} for row in c.fetchall()] - - # Database file size - stats['file_size_bytes'] = os.path.getsize(self.db_path) - stats['file_size_mb'] = round(stats['file_size_bytes'] / (1024 * 1024), 2) - - return stats - - except Exception as e: - return {"error": str(e)} - finally: - conn.close() - - def inspect_database(self): - """Detailed inspection of database contents""" - stats = self.get_database_stats() - - if "error" in stats: - print(f"❌ {stats['error']}") - return - - print("\n📊 DATABASE INSPECTION") - print("=" * 50) - print(f"Database: {self.db_path}") - print(f"File size: {stats['file_size_mb']} MB") - print() - - print("📈 COUNTS:") - print(f" Images: {stats['images']}") - print(f" Faces: {stats['faces']}") - print(f" - Identified: {stats['identified_faces']}") - print(f" - Unidentified: {stats['unidentified_faces']}") - print(f" People: {stats['people']}") - print(f" Tags: {stats['tags']}") - print(f" Tagged images: {stats['tagged_images']}") - print() - - if stats['top_people']: - print("👥 TOP PEOPLE:") - for person in stats['top_people']: - print(f" {person['name']}: {person['faces']} faces") - print() - - if stats['top_tags']: - print("🏷️ TOP TAGS:") - for tag in stats['top_tags']: - category = f"({tag['category']})" if tag['category'] else "" - print(f" {tag['name']} {category}: {tag['usage']} uses") - - def list_all_people(self): - """List all people in the database""" - if not os.path.exists(self.db_path): - print("Database does not exist") - return - - conn = sqlite3.connect(self.db_path) - c = conn.cursor() - - try: - c.execute("""SELECT p.id, p.name, COUNT(f.id) as face_count, p.created_at - FROM people p - LEFT JOIN faces f ON p.id = f.person_id - GROUP BY p.id - ORDER BY face_count DESC""") - - people = c.fetchall() - - if not people: - print("No people found in database") - return - - print("\n👥 ALL PEOPLE:") - print("-" * 60) - print(f"{'ID':<4} {'Name':<25} {'Faces':<8} {'Created':<15}") - print("-" * 60) - - for person_id, name, face_count, created in people: - created_short = created[:10] if created else "N/A" - print(f"{person_id:<4} {name:<25} {face_count:<8} {created_short:<15}") - - except Exception as e: - print(f"Error: {e}") - finally: - conn.close() - - def list_all_tags(self): - """List all tags in the database""" - if not os.path.exists(self.db_path): - print("Database does not exist") - return - - conn = sqlite3.connect(self.db_path) - c = conn.cursor() - - try: - c.execute("""SELECT t.id, t.name, t.category, COUNT(it.image_id) as usage_count - FROM tags t - LEFT JOIN image_tags it ON t.id = it.tag_id - GROUP BY t.id - ORDER BY t.category, usage_count DESC""") - - tags = c.fetchall() - - if not tags: - print("No tags found in database") - return - - print("\n🏷️ ALL TAGS:") - print("-" * 60) - print(f"{'ID':<4} {'Name':<25} {'Category':<15} {'Usage':<8}") - print("-" * 60) - - for tag_id, name, category, usage in tags: - category = category or "None" - print(f"{tag_id:<4} {name:<25} {category:<15} {usage:<8}") - - except Exception as e: - print(f"Error: {e}") - finally: - conn.close() - - -def main(): - """Interactive database management""" - import sys - - if len(sys.argv) > 1: - db_path = sys.argv[1] - else: - db_path = 'punimtag_simple.db' - - manager = DatabaseManager(db_path) - - while True: - print("\n🗄️ DATABASE MANAGER") - print("=" * 30) - print("1. Inspect database") - print("2. Clean database (keep schema)") - print("3. Delete database completely") - print("4. Backup database") - print("5. List all people") - print("6. List all tags") - print("7. Exit") - - try: - choice = input("\nSelect option (1-7): ").strip() - - if choice == '1': - manager.inspect_database() - elif choice == '2': - confirm = input("⚠️ Clean all data? (y/N): ").strip().lower() - if confirm == 'y': - manager.clean_database() - else: - print("Cancelled") - elif choice == '3': - confirm = input("⚠️ Delete database completely? (y/N): ").strip().lower() - if confirm == 'y': - manager.delete_database() - break - else: - print("Cancelled") - elif choice == '4': - backup_name = input("Backup name (or Enter for auto): ").strip() - if not backup_name: - backup_name = None - manager.backup_database(backup_name) - elif choice == '5': - manager.list_all_people() - elif choice == '6': - manager.list_all_tags() - elif choice == '7': - break - else: - print("Invalid option") - - except KeyboardInterrupt: - print("\n\nExiting...") - break - except Exception as e: - print(f"Error: {e}") - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/src/backend/punimtag.py b/src/backend/punimtag.py deleted file mode 100644 index c1cf138..0000000 --- a/src/backend/punimtag.py +++ /dev/null @@ -1,744 +0,0 @@ -import os -import sqlite3 -import face_recognition -import numpy as np -from PIL import Image -from PIL.ExifTags import TAGS, GPSTAGS -from datetime import datetime -import json -from typing import List, Dict, Tuple, Optional -import pickle -from sklearn.cluster import DBSCAN -from sklearn.decomposition import PCA -from sklearn.preprocessing import StandardScaler -import concurrent.futures -from config import get_config -import dlib - -class PunimTag: - def __init__(self, db_path: str = 'punimtag.db', photos_dir: str = 'photos'): - self.db_path = db_path - self.photos_dir = photos_dir - self.config = get_config() - self.conn = self._init_db() - - def _init_db(self) -> sqlite3.Connection: - """Initialize database with comprehensive schema""" - conn = sqlite3.connect(self.db_path) - c = conn.cursor() - - # Images table with metadata - c.execute('''CREATE TABLE IF NOT EXISTS images ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - path TEXT UNIQUE NOT NULL, - filename TEXT NOT NULL, - date_taken TIMESTAMP, - latitude REAL, - longitude REAL, - camera_make TEXT, - camera_model TEXT, - width INTEGER, - height INTEGER, - file_size INTEGER, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - )''') - - # People table for identified individuals - c.execute('''CREATE TABLE IF NOT EXISTS people ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - name TEXT UNIQUE NOT NULL, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - )''') - - # Faces table with locations and encodings - c.execute('''CREATE TABLE IF NOT EXISTS faces ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - image_id INTEGER NOT NULL, - person_id INTEGER, - top INTEGER NOT NULL, - right INTEGER NOT NULL, - bottom INTEGER NOT NULL, - left INTEGER NOT NULL, - encoding BLOB NOT NULL, - confidence REAL, - is_confirmed BOOLEAN DEFAULT 0, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - FOREIGN KEY(image_id) REFERENCES images(id) ON DELETE CASCADE, - FOREIGN KEY(person_id) REFERENCES people(id) ON DELETE SET NULL - )''') - - # Tags table - c.execute('''CREATE TABLE IF NOT EXISTS tags ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - name TEXT UNIQUE NOT NULL, - category TEXT, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - )''') - - # Image-tag relationship - c.execute('''CREATE TABLE IF NOT EXISTS image_tags ( - image_id INTEGER NOT NULL, - tag_id INTEGER NOT NULL, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - PRIMARY KEY(image_id, tag_id), - FOREIGN KEY(image_id) REFERENCES images(id) ON DELETE CASCADE, - FOREIGN KEY(tag_id) REFERENCES tags(id) ON DELETE CASCADE - )''') - - # Create indexes for performance - c.execute('CREATE INDEX IF NOT EXISTS idx_faces_person ON faces(person_id)') - c.execute('CREATE INDEX IF NOT EXISTS idx_faces_image ON faces(image_id)') - c.execute('CREATE INDEX IF NOT EXISTS idx_image_tags_image ON image_tags(image_id)') - c.execute('CREATE INDEX IF NOT EXISTS idx_image_tags_tag ON image_tags(tag_id)') - - conn.commit() - return conn - - def extract_metadata(self, image_path: str) -> Dict: - """Extract EXIF metadata from image""" - metadata = { - 'date_taken': None, - 'latitude': None, - 'longitude': None, - 'camera_make': None, - 'camera_model': None, - 'width': None, - 'height': None, - 'file_size': os.path.getsize(image_path) - } - - try: - with Image.open(image_path) as img: - metadata['width'], metadata['height'] = img.size - - exifdata = img.getexif() - if exifdata: - for tag_id, value in exifdata.items(): - tag = TAGS.get(tag_id, tag_id) - - if tag == 'DateTime': - metadata['date_taken'] = datetime.strptime(value, '%Y:%m:%d %H:%M:%S') - elif tag == 'Make': - metadata['camera_make'] = value - elif tag == 'Model': - metadata['camera_model'] = value - elif tag == 'GPSInfo': - gps_data = {} - for t in value: - sub_tag = GPSTAGS.get(t, t) - gps_data[sub_tag] = value[t] - - # Extract GPS coordinates - if 'GPSLatitude' in gps_data and 'GPSLongitude' in gps_data: - lat = self._convert_to_degrees(gps_data['GPSLatitude']) - lon = self._convert_to_degrees(gps_data['GPSLongitude']) - - if gps_data.get('GPSLatitudeRef') == 'S': - lat = -lat - if gps_data.get('GPSLongitudeRef') == 'W': - lon = -lon - - metadata['latitude'] = lat - metadata['longitude'] = lon - except Exception as e: - print(f"Error extracting metadata from {image_path}: {e}") - - return metadata - - def _convert_to_degrees(self, value): - """Convert GPS coordinates to degrees""" - d, m, s = value - return d + (m / 60.0) + (s / 3600.0) - - def process_image(self, image_path: str) -> int: - """Process a single image and return its database ID""" - c = self.conn.cursor() - - # Extract metadata - metadata = self.extract_metadata(image_path) - filename = os.path.basename(image_path) - - # Insert or update image record - c.execute('''INSERT OR REPLACE INTO images - (path, filename, date_taken, latitude, longitude, - camera_make, camera_model, width, height, file_size) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''', - (image_path, filename, metadata['date_taken'], - metadata['latitude'], metadata['longitude'], - metadata['camera_make'], metadata['camera_model'], - metadata['width'], metadata['height'], metadata['file_size'])) - - image_id = c.lastrowid - - # Detect and process faces - try: - image = face_recognition.load_image_file(image_path) - model = self.config.face_recognition.detection_model if dlib.DLIB_USE_CUDA and self.config.face_recognition.enable_gpu else 'hog' - face_locations = face_recognition.face_locations(image, model=model) - face_encodings = face_recognition.face_encodings(image, face_locations) - - for location, encoding in zip(face_locations, face_encodings): - top, right, bottom, left = location - encoding_blob = pickle.dumps(encoding) - - # Try to identify the person - person_id, confidence = self.identify_face(encoding) - - c.execute('''INSERT INTO faces - (image_id, person_id, top, right, bottom, left, encoding, confidence) - VALUES (?, ?, ?, ?, ?, ?, ?, ?)''', - (image_id, person_id, top, right, bottom, left, encoding_blob, confidence)) - - except Exception as e: - print(f"Error processing faces in {image_path}: {e}") - - self.conn.commit() - return image_id - - def identify_face(self, unknown_encoding: np.ndarray, threshold: float = 0.6) -> Tuple[Optional[int], Optional[float]]: - """Identify a face by comparing with known faces""" - c = self.conn.cursor() - - # Get all known face encodings - c.execute('''SELECT f.person_id, f.encoding - FROM faces f - WHERE f.person_id IS NOT NULL - AND f.is_confirmed = 1''') - - known_faces = c.fetchall() - - if not known_faces: - return None, None - - # Group encodings by person - person_encodings = {} - for person_id, encoding_blob in known_faces: - encoding = pickle.loads(encoding_blob) - if person_id not in person_encodings: - person_encodings[person_id] = [] - person_encodings[person_id].append(encoding) - - # Compare with each person's encodings - best_match = None - best_distance = float('inf') - - for person_id, encodings in person_encodings.items(): - distances = face_recognition.face_distance(encodings, unknown_encoding) - min_distance = np.min(distances) - - if min_distance < best_distance and min_distance < threshold: - best_distance = min_distance - best_match = person_id - - if best_match: - confidence = 1.0 - best_distance - return best_match, confidence - - return None, None - - def add_person(self, name: str) -> int: - """Add a new person to the database""" - c = self.conn.cursor() - c.execute('INSERT OR IGNORE INTO people (name) VALUES (?)', (name,)) - self.conn.commit() - - c.execute('SELECT id FROM people WHERE name = ?', (name,)) - result = c.fetchone() - if result: - return result[0] - else: - # This shouldn't happen due to INSERT OR IGNORE, but handle it - c.execute('SELECT id FROM people WHERE name = ?', (name,)) - return c.fetchone()[0] - - def assign_face_to_person(self, face_id: int, person_id: int, is_confirmed: bool = True): - """Assign a face to a person""" - c = self.conn.cursor() - c.execute('''UPDATE faces - SET person_id = ?, is_confirmed = ? - WHERE id = ?''', - (person_id, is_confirmed, face_id)) - self.conn.commit() - - def add_tag(self, name: str, category: Optional[str] = None) -> int: - """Add a new tag""" - c = self.conn.cursor() - c.execute('INSERT OR IGNORE INTO tags (name, category) VALUES (?, ?)', - (name, category)) - self.conn.commit() - - c.execute('SELECT id FROM tags WHERE name = ?', (name,)) - return c.fetchone()[0] - - def tag_image(self, image_id: int, tag_id: int): - """Add a tag to an image""" - c = self.conn.cursor() - c.execute('INSERT OR IGNORE INTO image_tags (image_id, tag_id) VALUES (?, ?)', - (image_id, tag_id)) - self.conn.commit() - - def get_unidentified_faces(self) -> List[Dict]: - """Get all faces that haven't been identified""" - c = self.conn.cursor() - c.execute('''SELECT f.id, f.image_id, i.path, f.top, f.right, f.bottom, f.left - FROM faces f - JOIN images i ON f.image_id = i.id - WHERE f.person_id IS NULL - ORDER BY i.path''') - - faces = [] - for row in c.fetchall(): - faces.append({ - 'face_id': row[0], - 'image_id': row[1], - 'image_path': row[2], - 'location': (row[3], row[4], row[5], row[6]) - }) - - return faces - - def search_images(self, people: Optional[List[str]] = None, tags: Optional[List[str]] = None, - date_from: Optional[datetime] = None, date_to: Optional[datetime] = None) -> List[Dict]: - """Search images by people, tags, and date range""" - c = self.conn.cursor() - - query = '''SELECT DISTINCT i.id, i.path, i.filename, i.date_taken - FROM images i''' - - joins = [] - conditions = [] - params = [] - - if people: - joins.append('JOIN faces f ON i.id = f.image_id') - joins.append('JOIN people p ON f.person_id = p.id') - placeholders = ','.join(['?' for _ in people]) - conditions.append(f'p.name IN ({placeholders})') - params.extend(people) - - if tags: - joins.append('JOIN image_tags it ON i.id = it.image_id') - joins.append('JOIN tags t ON it.tag_id = t.id') - placeholders = ','.join(['?' for _ in tags]) - conditions.append(f't.name IN ({placeholders})') - params.extend(tags) - - if date_from: - conditions.append('i.date_taken >= ?') - params.append(date_from) - - if date_to: - conditions.append('i.date_taken <= ?') - params.append(date_to) - - if joins: - query += ' ' + ' '.join(joins) - - if conditions: - query += ' WHERE ' + ' AND '.join(conditions) - - query += ' ORDER BY i.date_taken DESC' - - c.execute(query, params) - - results = [] - for row in c.fetchall(): - results.append({ - 'id': row[0], - 'path': row[1], - 'filename': row[2], - 'date_taken': row[3] - }) - - return results - - def process_directory(self): - """Process all images in the photos directory""" - image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.gif'} - processed = 0 - - for root, _, files in os.walk(self.photos_dir): - for file in files: - if any(file.lower().endswith(ext) for ext in image_extensions): - image_path = os.path.join(root, file) - print(f"Processing: {image_path}") - try: - self.process_image(image_path) - processed += 1 - except Exception as e: - print(f"Error processing {image_path}: {e}") - - print(f"\nProcessed {processed} images") - return processed - - def calculate_face_quality(self, face_encoding: np.ndarray, face_location: Tuple[int, int, int, int]) -> float: - """Calculate face quality score based on size and encoding variance""" - top, right, bottom, left = face_location - - # Calculate face size - face_width = right - left - face_height = bottom - top - face_area = face_width * face_height - - # Normalize face size (assuming typical face sizes) - size_score = min(face_area / (100 * 100), 1.0) # Normalize to 100x100 baseline - - # Calculate encoding variance (higher variance = more distinctive features) - encoding_variance = np.var(face_encoding) - variance_score = min(encoding_variance / 0.01, 1.0) # Normalize to typical variance - - # Combined quality score - quality_score = (size_score * 0.3) + (variance_score * 0.7) - - return quality_score - - def cluster_unknown_faces(self) -> Dict[int, List[int]]: - """Cluster unidentified faces to group similar faces together""" - c = self.conn.cursor() - - # Get all unidentified faces - c.execute('''SELECT id, encoding FROM faces WHERE person_id IS NULL''') - unidentified_faces = c.fetchall() - - if len(unidentified_faces) < 2: - return {} - - print(f"Clustering {len(unidentified_faces)} unidentified faces...") - - # Extract encodings - face_ids = [] - encodings = [] - - for face_id, encoding_blob in unidentified_faces: - face_ids.append(face_id) - encoding = pickle.loads(encoding_blob) - encodings.append(encoding) - - encodings = np.array(encodings) - - # Apply DBSCAN clustering - clustering = DBSCAN( - eps=self.config.face_recognition.cluster_epsilon, - min_samples=self.config.face_recognition.cluster_min_size, - metric='euclidean' - ).fit(encodings) - - # Group faces by cluster - clusters = {} - for i, cluster_id in enumerate(clustering.labels_): - if cluster_id != -1: # Ignore noise points - if cluster_id not in clusters: - clusters[cluster_id] = [] - clusters[cluster_id].append(face_ids[i]) - - print(f"Found {len(clusters)} face clusters") - - return clusters - - def get_face_clusters(self) -> List[Dict]: - """Get all face clusters with representative faces""" - clusters = self.cluster_unknown_faces() - - c = self.conn.cursor() - cluster_data = [] - - for cluster_id, face_ids in clusters.items(): - # Get representative face (first face in cluster for now) - representative_face_id = face_ids[0] - - # Get face details - c.execute('''SELECT f.id, f.image_id, i.path, f.top, f.right, f.bottom, f.left - FROM faces f - JOIN images i ON f.image_id = i.id - WHERE f.id = ?''', (representative_face_id,)) - - face_info = c.fetchone() - if face_info: - cluster_data.append({ - 'cluster_id': cluster_id, - 'face_count': len(face_ids), - 'face_ids': face_ids, - 'representative_face': { - 'face_id': face_info[0], - 'image_id': face_info[1], - 'image_path': face_info[2], - 'location': (face_info[3], face_info[4], face_info[5], face_info[6]) - } - }) - - # Sort by face count (most common faces first) - cluster_data.sort(key=lambda x: x['face_count'], reverse=True) - - return cluster_data - - def assign_cluster_to_person(self, cluster_id: int, person_name: str): - """Assign all faces in a cluster to a person""" - clusters = self.cluster_unknown_faces() - - if cluster_id not in clusters: - return False - - # Add or get person - person_id = self.add_person(person_name) - - # Assign all faces in cluster - for face_id in clusters[cluster_id]: - self.assign_face_to_person(face_id, person_id, is_confirmed=True) - - print(f"Assigned {len(clusters[cluster_id])} faces to {person_name}") - return True - - def get_most_common_faces(self, limit: int = 20) -> List[Dict]: - """Get faces sorted by frequency (most photographed people)""" - c = self.conn.cursor() - - # Get identified people with face counts - c.execute('''SELECT p.id, p.name, COUNT(f.id) as face_count, - MIN(f.id) as sample_face_id - FROM people p - JOIN faces f ON p.id = f.person_id - WHERE f.is_confirmed = 1 - GROUP BY p.id - ORDER BY face_count DESC - LIMIT ?''', (limit,)) - - people_data = [] - for person_id, name, face_count, sample_face_id in c.fetchall(): - # Get sample face details - c.execute('''SELECT f.image_id, i.path, f.top, f.right, f.bottom, f.left - FROM faces f - JOIN images i ON f.image_id = i.id - WHERE f.id = ?''', (sample_face_id,)) - - face_info = c.fetchone() - if face_info: - people_data.append({ - 'person_id': person_id, - 'name': name, - 'face_count': face_count, - 'sample_face': { - 'face_id': sample_face_id, - 'image_id': face_info[0], - 'image_path': face_info[1], - 'location': (face_info[2], face_info[3], face_info[4], face_info[5]) - } - }) - - return people_data - - def verify_person_faces(self, person_id: int) -> List[Dict]: - """Get all faces assigned to a person for verification""" - c = self.conn.cursor() - - c.execute('''SELECT f.id, f.image_id, i.path, f.top, f.right, f.bottom, f.left, f.confidence - FROM faces f - JOIN images i ON f.image_id = i.id - WHERE f.person_id = ? - ORDER BY f.confidence DESC''', (person_id,)) - - faces = [] - for row in c.fetchall(): - faces.append({ - 'face_id': row[0], - 'image_id': row[1], - 'image_path': row[2], - 'location': (row[3], row[4], row[5], row[6]), - 'confidence': row[7] - }) - - return faces - - def remove_incorrect_face_assignment(self, face_id: int): - """Remove incorrect face assignment (set person_id to NULL)""" - c = self.conn.cursor() - c.execute('UPDATE faces SET person_id = NULL, is_confirmed = 0 WHERE id = ?', (face_id,)) - self.conn.commit() - - def batch_process_images(self, image_paths: List[str], batch_size: int = None) -> Dict[str, int]: - """Process images in batches for better performance""" - if batch_size is None: - batch_size = self.config.processing.batch_size - - results = { - 'processed': 0, - 'errors': 0, - 'skipped': 0, - 'faces_detected': 0 - } - - print(f"Processing {len(image_paths)} images in batches of {batch_size}") - - for i in range(0, len(image_paths), batch_size): - batch = image_paths[i:i + batch_size] - print(f"Processing batch {i//batch_size + 1}/{(len(image_paths) + batch_size - 1)//batch_size}") - - for image_path in batch: - try: - # Check if already processed - if self.config.processing.skip_processed: - c = self.conn.cursor() - c.execute('SELECT id FROM images WHERE path = ?', (image_path,)) - if c.fetchone(): - results['skipped'] += 1 - continue - - # Process image - image_id = self.process_image(image_path) - - # Count faces detected - c = self.conn.cursor() - c.execute('SELECT COUNT(*) FROM faces WHERE image_id = ?', (image_id,)) - face_count = c.fetchone()[0] - - results['processed'] += 1 - results['faces_detected'] += face_count - - except Exception as e: - print(f"Error processing {image_path}: {e}") - results['errors'] += 1 - - return results - - def advanced_search(self, **kwargs) -> List[Dict]: - """Advanced search with multiple criteria and complex queries""" - c = self.conn.cursor() - - # Base query - query = '''SELECT DISTINCT i.id, i.path, i.filename, i.date_taken, - i.latitude, i.longitude, i.camera_make, i.camera_model''' - - from_clause = ' FROM images i' - joins = [] - conditions = [] - params = [] - - # People filter - if 'people' in kwargs and kwargs['people']: - joins.append('JOIN faces f ON i.id = f.image_id') - joins.append('JOIN people p ON f.person_id = p.id') - - people_names = kwargs['people'] - if isinstance(people_names, str): - people_names = [people_names] - - placeholders = ','.join(['?' for _ in people_names]) - conditions.append(f'p.name IN ({placeholders})') - params.extend(people_names) - - # Tags filter - if 'tags' in kwargs and kwargs['tags']: - joins.append('JOIN image_tags it ON i.id = it.image_id') - joins.append('JOIN tags t ON it.tag_id = t.id') - - tags = kwargs['tags'] - if isinstance(tags, str): - tags = [tags] - - placeholders = ','.join(['?' for _ in tags]) - conditions.append(f't.name IN ({placeholders})') - params.extend(tags) - - # Date range filters - if 'date_from' in kwargs and kwargs['date_from']: - conditions.append('i.date_taken >= ?') - params.append(kwargs['date_from']) - - if 'date_to' in kwargs and kwargs['date_to']: - conditions.append('i.date_taken <= ?') - params.append(kwargs['date_to']) - - # Location filters - if 'latitude_min' in kwargs and kwargs['latitude_min']: - conditions.append('i.latitude >= ?') - params.append(kwargs['latitude_min']) - - if 'latitude_max' in kwargs and kwargs['latitude_max']: - conditions.append('i.latitude <= ?') - params.append(kwargs['latitude_max']) - - if 'longitude_min' in kwargs and kwargs['longitude_min']: - conditions.append('i.longitude >= ?') - params.append(kwargs['longitude_min']) - - if 'longitude_max' in kwargs and kwargs['longitude_max']: - conditions.append('i.longitude <= ?') - params.append(kwargs['longitude_max']) - - # Camera filters - if 'camera_make' in kwargs and kwargs['camera_make']: - conditions.append('i.camera_make LIKE ?') - params.append(f"%{kwargs['camera_make']}%") - - # Multiple people requirement - if 'min_people' in kwargs and kwargs['min_people']: - if 'people' not in kwargs: # Add people join if not already added - joins.append('JOIN faces f ON i.id = f.image_id') - joins.append('JOIN people p ON f.person_id = p.id') - - # This requires a subquery to count distinct people per image - having_clause = f' HAVING COUNT(DISTINCT p.id) >= {kwargs["min_people"]}' - else: - having_clause = '' - - # Build final query - full_query = query + from_clause - if joins: - full_query += ' ' + ' '.join(joins) - if conditions: - full_query += ' WHERE ' + ' AND '.join(conditions) - - # Group by image to handle multiple joins - full_query += ' GROUP BY i.id' - - if having_clause: - full_query += having_clause - - # Order by date - full_query += ' ORDER BY i.date_taken DESC' - - # Limit results - if 'limit' in kwargs and kwargs['limit']: - full_query += f' LIMIT {kwargs["limit"]}' - - c.execute(full_query, params) - - results = [] - for row in c.fetchall(): - results.append({ - 'id': row[0], - 'path': row[1], - 'filename': row[2], - 'date_taken': row[3], - 'latitude': row[4], - 'longitude': row[5], - 'camera_make': row[6], - 'camera_model': row[7] - }) - - return results - - def close(self): - """Close database connection""" - self.conn.close() - - -def main(): - """Main entry point""" - tagger = PunimTag() - - print("PunimTag - Face Recognition and Photo Tagging System") - print("=" * 50) - - # Process all images - tagger.process_directory() - - # Show unidentified faces count - unidentified = tagger.get_unidentified_faces() - print(f"\nFound {len(unidentified)} unidentified faces") - - tagger.close() - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/src/backend/punimtag_simple.py b/src/backend/punimtag_simple.py deleted file mode 100644 index b05c3e4..0000000 --- a/src/backend/punimtag_simple.py +++ /dev/null @@ -1,462 +0,0 @@ -#!/usr/bin/env python3 -""" -Simplified PunimTag for initial testing -Core functionality without advanced clustering (sklearn dependency) -""" - -import os -import sqlite3 -import face_recognition -import numpy as np -from PIL import Image -from PIL.ExifTags import TAGS, GPSTAGS -from datetime import datetime -import json -from typing import List, Dict, Tuple, Optional -import pickle - - -class SimplePunimTag: - def __init__(self, db_path: str = 'punimtag_simple.db', photos_dir: str = 'photos'): - self.db_path = db_path - self.photos_dir = photos_dir - self.conn = self._init_db() - - def _init_db(self) -> sqlite3.Connection: - """Initialize database with comprehensive schema""" - conn = sqlite3.connect(self.db_path) - c = conn.cursor() - - # Images table with metadata - c.execute('''CREATE TABLE IF NOT EXISTS images ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - path TEXT UNIQUE NOT NULL, - filename TEXT NOT NULL, - date_taken TIMESTAMP, - latitude REAL, - longitude REAL, - camera_make TEXT, - camera_model TEXT, - width INTEGER, - height INTEGER, - file_size INTEGER, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - )''') - - # People table for identified individuals - c.execute('''CREATE TABLE IF NOT EXISTS people ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - name TEXT UNIQUE NOT NULL, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - )''') - - # Faces table with locations and encodings - c.execute('''CREATE TABLE IF NOT EXISTS faces ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - image_id INTEGER NOT NULL, - person_id INTEGER, - top INTEGER NOT NULL, - right INTEGER NOT NULL, - bottom INTEGER NOT NULL, - left INTEGER NOT NULL, - encoding BLOB NOT NULL, - confidence REAL, - is_confirmed BOOLEAN DEFAULT 0, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - FOREIGN KEY(image_id) REFERENCES images(id) ON DELETE CASCADE, - FOREIGN KEY(person_id) REFERENCES people(id) ON DELETE SET NULL - )''') - - # Tags table - c.execute('''CREATE TABLE IF NOT EXISTS tags ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - name TEXT UNIQUE NOT NULL, - category TEXT, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - )''') - - # Image-tag relationship - c.execute('''CREATE TABLE IF NOT EXISTS image_tags ( - image_id INTEGER NOT NULL, - tag_id INTEGER NOT NULL, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - PRIMARY KEY(image_id, tag_id), - FOREIGN KEY(image_id) REFERENCES images(id) ON DELETE CASCADE, - FOREIGN KEY(tag_id) REFERENCES tags(id) ON DELETE CASCADE - )''') - - # Create indexes for performance - c.execute('CREATE INDEX IF NOT EXISTS idx_faces_person ON faces(person_id)') - c.execute('CREATE INDEX IF NOT EXISTS idx_faces_image ON faces(image_id)') - c.execute('CREATE INDEX IF NOT EXISTS idx_image_tags_image ON image_tags(image_id)') - c.execute('CREATE INDEX IF NOT EXISTS idx_image_tags_tag ON image_tags(tag_id)') - - conn.commit() - return conn - - def extract_metadata(self, image_path: str) -> Dict: - """Extract EXIF metadata from image with better error handling""" - metadata = { - 'date_taken': None, - 'latitude': None, - 'longitude': None, - 'camera_make': None, - 'camera_model': None, - 'width': None, - 'height': None, - 'file_size': None - } - - try: - # Get file size - if os.path.exists(image_path): - metadata['file_size'] = os.path.getsize(image_path) - else: - print(f"Warning: File not found: {image_path}") - return metadata - - # Try to open image - img = Image.open(image_path) - metadata['width'], metadata['height'] = img.size - - # Extract EXIF data - exifdata = img.getexif() - if exifdata: - for tag_id, value in exifdata.items(): - tag = TAGS.get(tag_id, tag_id) - - try: - if tag == 'DateTime': - metadata['date_taken'] = datetime.strptime(value, '%Y:%m:%d %H:%M:%S') - elif tag == 'Make': - metadata['camera_make'] = str(value).strip() - elif tag == 'Model': - metadata['camera_model'] = str(value).strip() - elif tag == 'GPSInfo': - gps_data = {} - for t in value: - sub_tag = GPSTAGS.get(t, t) - gps_data[sub_tag] = value[t] - - # Extract GPS coordinates - if 'GPSLatitude' in gps_data and 'GPSLongitude' in gps_data: - try: - lat = self._convert_to_degrees(gps_data['GPSLatitude']) - lon = self._convert_to_degrees(gps_data['GPSLongitude']) - - if gps_data.get('GPSLatitudeRef') == 'S': - lat = -lat - if gps_data.get('GPSLongitudeRef') == 'W': - lon = -lon - - metadata['latitude'] = lat - metadata['longitude'] = lon - except Exception as e: - print(f"Error parsing GPS data: {e}") - except Exception as e: - print(f"Error parsing EXIF tag {tag}: {e}") - - except Exception as e: - print(f"Error extracting metadata from {image_path}: {e}") - - # Set defaults for missing values - for key, value in metadata.items(): - if value is None and key not in ['date_taken', 'latitude', 'longitude']: - metadata[key] = 'N/A' - - return metadata - - def _convert_to_degrees(self, value): - """Convert GPS coordinates to degrees""" - if len(value) == 3: - d, m, s = value - return d + (m / 60.0) + (s / 3600.0) - return 0.0 - - def process_image(self, image_path: str) -> int: - """Process a single image and return its database ID""" - c = self.conn.cursor() - - print(f"Processing: {image_path}") - - # Extract metadata - metadata = self.extract_metadata(image_path) - filename = os.path.basename(image_path) - - # Insert or update image record - c.execute('''INSERT OR REPLACE INTO images - (path, filename, date_taken, latitude, longitude, - camera_make, camera_model, width, height, file_size) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''', - (image_path, filename, metadata['date_taken'], - metadata['latitude'], metadata['longitude'], - metadata['camera_make'], metadata['camera_model'], - metadata['width'], metadata['height'], metadata['file_size'])) - - image_id = c.lastrowid - - # Detect and process faces - try: - image = face_recognition.load_image_file(image_path) - face_locations = face_recognition.face_locations(image, model='hog') - face_encodings = face_recognition.face_encodings(image, face_locations) - - print(f" Found {len(face_locations)} faces") - - for location, encoding in zip(face_locations, face_encodings): - top, right, bottom, left = location - encoding_blob = pickle.dumps(encoding) - - # Try to identify the person - person_id, confidence = self.identify_face(encoding) - - c.execute('''INSERT INTO faces - (image_id, person_id, top, right, bottom, left, encoding, confidence) - VALUES (?, ?, ?, ?, ?, ?, ?, ?)''', - (image_id, person_id, top, right, bottom, left, encoding_blob, confidence)) - - except Exception as e: - print(f" Error processing faces: {e}") - - self.conn.commit() - return image_id - - def identify_face(self, unknown_encoding: np.ndarray, threshold: float = 0.6) -> Tuple[Optional[int], Optional[float]]: - """Identify a face by comparing with known faces""" - c = self.conn.cursor() - - # Get all known face encodings - c.execute('''SELECT f.person_id, f.encoding - FROM faces f - WHERE f.person_id IS NOT NULL - AND f.is_confirmed = 1''') - - known_faces = c.fetchall() - - if not known_faces: - return None, None - - # Group encodings by person - person_encodings = {} - for person_id, encoding_blob in known_faces: - encoding = pickle.loads(encoding_blob) - if person_id not in person_encodings: - person_encodings[person_id] = [] - person_encodings[person_id].append(encoding) - - # Compare with each person's encodings - best_match = None - best_distance = float('inf') - - for person_id, encodings in person_encodings.items(): - distances = face_recognition.face_distance(encodings, unknown_encoding) - min_distance = np.min(distances) - - if min_distance < best_distance and min_distance < threshold: - best_distance = min_distance - best_match = person_id - - if best_match: - confidence = 1.0 - best_distance - return best_match, confidence - - return None, None - - def add_person(self, name: str) -> int: - """Add a new person to the database""" - c = self.conn.cursor() - c.execute('INSERT OR IGNORE INTO people (name) VALUES (?)', (name,)) - self.conn.commit() - - c.execute('SELECT id FROM people WHERE name = ?', (name,)) - result = c.fetchone() - return result[0] if result else None - - def assign_face_to_person(self, face_id: int, person_id: int, is_confirmed: bool = True): - """Assign a face to a person""" - c = self.conn.cursor() - c.execute('''UPDATE faces - SET person_id = ?, is_confirmed = ? - WHERE id = ?''', - (person_id, is_confirmed, face_id)) - self.conn.commit() - - def add_tag(self, name: str, category: str = None) -> int: - """Add a new tag""" - c = self.conn.cursor() - c.execute('INSERT OR IGNORE INTO tags (name, category) VALUES (?, ?)', - (name, category)) - self.conn.commit() - - c.execute('SELECT id FROM tags WHERE name = ?', (name,)) - result = c.fetchone() - return result[0] if result else None - - def tag_image(self, image_id: int, tag_id: int): - """Add a tag to an image""" - c = self.conn.cursor() - c.execute('INSERT OR IGNORE INTO image_tags (image_id, tag_id) VALUES (?, ?)', - (image_id, tag_id)) - self.conn.commit() - - def get_unidentified_faces(self) -> List[Dict]: - """Get all faces that haven't been identified""" - c = self.conn.cursor() - c.execute('''SELECT f.id, f.image_id, i.path, f.top, f.right, f.bottom, f.left - FROM faces f - JOIN images i ON f.image_id = i.id - WHERE f.person_id IS NULL - ORDER BY i.path''') - - faces = [] - for row in c.fetchall(): - faces.append({ - 'face_id': row[0], - 'image_id': row[1], - 'image_path': row[2], - 'location': (row[3], row[4], row[5], row[6]) - }) - - return faces - - def simple_search(self, people: List[str] = None, tags: List[str] = None, - date_from: datetime = None, date_to: datetime = None) -> List[Dict]: - """Simple search by people, tags, and date range""" - c = self.conn.cursor() - - query = '''SELECT DISTINCT i.id, i.path, i.filename, i.date_taken - FROM images i''' - - joins = [] - conditions = [] - params = [] - - if people: - joins.append('JOIN faces f ON i.id = f.image_id') - joins.append('JOIN people p ON f.person_id = p.id') - placeholders = ','.join(['?' for _ in people]) - conditions.append(f'p.name IN ({placeholders})') - params.extend(people) - - if tags: - joins.append('JOIN image_tags it ON i.id = it.image_id') - joins.append('JOIN tags t ON it.tag_id = t.id') - placeholders = ','.join(['?' for _ in tags]) - conditions.append(f't.name IN ({placeholders})') - params.extend(tags) - - if date_from: - conditions.append('i.date_taken >= ?') - params.append(date_from) - - if date_to: - conditions.append('i.date_taken <= ?') - params.append(date_to) - - if joins: - query += ' ' + ' '.join(joins) - - if conditions: - query += ' WHERE ' + ' AND '.join(conditions) - - query += ' ORDER BY i.date_taken DESC' - - c.execute(query, params) - - results = [] - for row in c.fetchall(): - results.append({ - 'id': row[0], - 'path': row[1], - 'filename': row[2], - 'date_taken': row[3] - }) - - return results - - def process_directory(self): - """Process all images in the photos directory""" - image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.gif'} - processed = 0 - errors = 0 - - for root, _, files in os.walk(self.photos_dir): - for file in files: - if any(file.lower().endswith(ext) for ext in image_extensions): - image_path = os.path.join(root, file) - try: - self.process_image(image_path) - processed += 1 - except Exception as e: - print(f"Error processing {image_path}: {e}") - errors += 1 - - print(f"\nProcessed {processed} images, {errors} errors") - return processed - - def get_stats(self) -> Dict: - """Get database statistics""" - c = self.conn.cursor() - - c.execute("SELECT COUNT(*) FROM images") - image_count = c.fetchone()[0] - - c.execute("SELECT COUNT(*) FROM faces") - face_count = c.fetchone()[0] - - c.execute("SELECT COUNT(*) FROM faces WHERE person_id IS NOT NULL") - identified_faces = c.fetchone()[0] - - c.execute("SELECT COUNT(*) FROM people") - people_count = c.fetchone()[0] - - c.execute("SELECT COUNT(*) FROM tags") - tag_count = c.fetchone()[0] - - return { - 'images': image_count, - 'faces': face_count, - 'identified_faces': identified_faces, - 'unidentified_faces': face_count - identified_faces, - 'people': people_count, - 'tags': tag_count - } - - def close(self): - """Close database connection""" - self.conn.close() - - -def main(): - """Main entry point for testing""" - print("SimplePunimTag - Testing Backend") - print("=" * 50) - - tagger = SimplePunimTag() - - # Get initial stats - stats = tagger.get_stats() - print(f"Initial stats: {stats}") - - # Process images if photos directory exists - if os.path.exists(tagger.photos_dir): - processed = tagger.process_directory() - - # Show final stats - final_stats = tagger.get_stats() - print(f"\nFinal stats: {final_stats}") - - # Show unidentified faces - unidentified = tagger.get_unidentified_faces() - print(f"Unidentified faces: {len(unidentified)}") - else: - print(f"Photos directory '{tagger.photos_dir}' not found") - print("Create the directory and add some photos to test") - - tagger.close() - print("\nTesting completed!") - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/src/backend/visual_identifier.py b/src/backend/visual_identifier.py deleted file mode 100644 index fd84996..0000000 --- a/src/backend/visual_identifier.py +++ /dev/null @@ -1,204 +0,0 @@ -#!/usr/bin/env python3 -""" -Visual Face Identifier for PunimTag -Shows face crops so you can see who you're identifying -""" - -import sqlite3 -import os -from PIL import Image -import subprocess -import sys - -class VisualFaceIdentifier: - def __init__(self, db_path='punimtag_simple.db'): - self.db_path = db_path - - def get_unidentified_faces(self, limit=10): - """Get a limited number of unidentified faces""" - conn = sqlite3.connect(self.db_path) - c = conn.cursor() - - c.execute('''SELECT f.id, f.image_id, i.path, i.filename, f.top, f.right, f.bottom, f.left - FROM faces f - JOIN images i ON f.image_id = i.id - WHERE f.person_id IS NULL - LIMIT ?''', (limit,)) - - faces = c.fetchall() - conn.close() - return faces - - def extract_face_crop(self, image_path, top, right, bottom, left): - """Extract and save a face crop""" - try: - if not os.path.exists(image_path): - return None - - with Image.open(image_path) as img: - # Crop the face with some padding - padding = 20 - crop_top = max(0, top - padding) - crop_left = max(0, left - padding) - crop_bottom = min(img.height, bottom + padding) - crop_right = min(img.width, right + padding) - - face_crop = img.crop((crop_left, crop_top, crop_right, crop_bottom)) - - # Save temporary crop - temp_path = f"temp_face_crop_{os.getpid()}.jpg" - face_crop.save(temp_path, "JPEG") - return temp_path - - except Exception as e: - print(f"Error extracting face: {e}") - return None - - def open_image(self, image_path): - """Open image with default system viewer""" - try: - import platform - if platform.system() == "Windows": - # For Windows - os.startfile(image_path) - return True - elif image_path.startswith('/mnt/c/'): - # Convert WSL path to Windows path for explorer - win_path = image_path.replace('/mnt/c/', 'C:\\').replace('/', '\\') - subprocess.run(['explorer.exe', win_path], check=True) - return True - else: - # For Linux/Mac - subprocess.run(['xdg-open', image_path], check=True) - return True - except Exception as e: - print(f"Could not open image: {e}") - return False - - def add_person(self, name): - """Add a new person""" - conn = sqlite3.connect(self.db_path) - c = conn.cursor() - c.execute('INSERT OR IGNORE INTO people (name) VALUES (?)', (name,)) - c.execute('SELECT id FROM people WHERE name = ?', (name,)) - person_id = c.fetchone()[0] - conn.commit() - conn.close() - return person_id - - def assign_face(self, face_id, person_id): - """Assign a face to a person""" - conn = sqlite3.connect(self.db_path) - c = conn.cursor() - c.execute('UPDATE faces SET person_id = ?, is_confirmed = 1 WHERE id = ?', - (person_id, face_id)) - conn.commit() - conn.close() - - def run_visual_identifier(self): - """Run visual identifier with image viewing""" - print("\n🏷️ Visual Face Identifier") - print("=" * 50) - print("This will show you face crops to help identify people") - print() - - faces = self.get_unidentified_faces(20) # Process 20 at a time - - if not faces: - print("No unidentified faces found!") - return - - print(f"Found {len(faces)} unidentified faces to process...") - print("Commands:") - print(" - Enter person's name to identify") - print(" - 's' to skip") - print(" - 'o' to open original image") - print(" - 'q' to quit") - print() - - try: - for i, (face_id, image_id, path, filename, top, right, bottom, left) in enumerate(faces): - print(f"\n{'='*60}") - print(f"Face {i+1}/{len(faces)}") - print(f"📁 File: {filename}") - print(f"📍 Face location: top={top}, right={right}, bottom={bottom}, left={left}") - - # Check if original file exists - if not os.path.exists(path): - print("⚠️ Original image file not found, skipping...") - continue - - # Extract and show face crop - face_crop_path = self.extract_face_crop(path, top, right, bottom, left) - - if face_crop_path: - print(f"🖼️ Face crop saved as: {face_crop_path}") - print("📖 Opening face crop...") - - if self.open_image(face_crop_path): - print("✅ Face crop opened in image viewer") - else: - print("❌ Could not open image viewer") - print(f" You can manually open: {face_crop_path}") - - while True: - response = input(f"\n👤 Who is this person? (name/'s'/'o'/'q'): ").strip() - - if response.lower() == 'q': - print("🛑 Quitting...") - return - elif response.lower() == 's': - print("⏭️ Skipped") - break - elif response.lower() == 'o': - print("📖 Opening original image...") - if self.open_image(path): - print("✅ Original image opened") - else: - print(f"❌ Could not open: {path}") - elif response: - try: - person_id = self.add_person(response) - self.assign_face(face_id, person_id) - print(f"✅ Identified as '{response}'") - break - except Exception as e: - print(f"❌ Error: {e}") - else: - print("Please enter a name, 's', 'o', or 'q'") - - # Clean up this face crop - if face_crop_path and os.path.exists(face_crop_path): - os.remove(face_crop_path) - - finally: - # Clean up any remaining temp files - for file in os.listdir('.'): - if file.startswith(f'temp_face_crop_{os.getpid()}'): - try: - os.remove(file) - except: - pass - - print(f"\n🎉 Completed processing!") - - # Show remaining count - remaining = self.get_remaining_count() - if remaining > 0: - print(f"📊 {remaining} unidentified faces remaining") - print("Run the script again to continue identifying faces") - else: - print("🏆 All faces have been identified!") - - def get_remaining_count(self): - """Get count of remaining unidentified faces""" - conn = sqlite3.connect(self.db_path) - c = conn.cursor() - c.execute('SELECT COUNT(*) FROM faces WHERE person_id IS NULL') - count = c.fetchone()[0] - conn.close() - return count - -if __name__ == "__main__": - identifier = VisualFaceIdentifier() - identifier.run_visual_identifier() \ No newline at end of file diff --git a/src/backend/web_gui.py b/src/backend/web_gui.py deleted file mode 100644 index b29e071..0000000 --- a/src/backend/web_gui.py +++ /dev/null @@ -1,431 +0,0 @@ -#!/usr/bin/env python3 -""" -Web GUI for PunimTag using Flask -Face clustering and identification interface -""" - -from flask import Flask, render_template, request, jsonify, send_from_directory -import os -import sqlite3 -import base64 -from io import BytesIO -from PIL import Image -import pickle -import numpy as np -from typing import List, Dict - - -app = Flask(__name__) -DB_PATH = 'punimtag_simple.db' - - -def get_face_clusters() -> List[Dict]: - """Get face clusters using simple clustering""" - if not os.path.exists(DB_PATH): - return [] - - conn = sqlite3.connect(DB_PATH) - c = conn.cursor() - - try: - # Get unidentified faces - c.execute('''SELECT f.id, f.image_id, i.path, f.top, f.right, f.bottom, f.left, f.encoding - FROM faces f - JOIN images i ON f.image_id = i.id - WHERE f.person_id IS NULL''') - - faces = c.fetchall() - - if len(faces) < 2: - return [] - - # Simple clustering by face encoding similarity - clusters = [] - used_faces = set() - - for i, face1 in enumerate(faces): - if face1[0] in used_faces: - continue - - cluster_faces = [face1] - used_faces.add(face1[0]) - - encoding1 = pickle.loads(face1[7]) - - # Find similar faces - for j, face2 in enumerate(faces[i+1:], i+1): - if face2[0] in used_faces: - continue - - encoding2 = pickle.loads(face2[7]) - - # Calculate similarity using numpy - distance = np.linalg.norm(encoding1 - encoding2) - - if distance < 0.8: # Similar faces - cluster_faces.append(face2) - used_faces.add(face2[0]) - - # Only create cluster if it has multiple faces - if len(cluster_faces) >= 2: - cluster_data = { - 'cluster_id': len(clusters), - 'face_count': len(cluster_faces), - 'faces': [] - } - - for face in cluster_faces: - cluster_data['faces'].append({ - 'face_id': face[0], - 'image_id': face[1], - 'image_path': face[2], - 'location': (face[3], face[4], face[5], face[6]) - }) - - clusters.append(cluster_data) - - # Sort by face count (largest clusters first) - clusters.sort(key=lambda x: x['face_count'], reverse=True) - - return clusters - - except Exception as e: - print(f"Error in clustering: {e}") - return [] - finally: - conn.close() - - -def get_face_thumbnail_base64(face: Dict) -> str: - """Get base64 encoded thumbnail of a face""" - try: - image_path = face['image_path'] - if not os.path.exists(image_path): - return "" - - img = Image.open(image_path) - - # Crop face region - top, right, bottom, left = face['location'] - padding = 20 - left = max(0, left - padding) - top = max(0, top - padding) - right = min(img.width, right + padding) - bottom = min(img.height, bottom + padding) - - face_img = img.crop((left, top, right, bottom)) - face_img.thumbnail((150, 150), Image.Resampling.LANCZOS) - - # Convert to base64 - buffer = BytesIO() - face_img.save(buffer, format='JPEG') - img_str = base64.b64encode(buffer.getvalue()).decode() - - return f"data:image/jpeg;base64,{img_str}" - - except Exception as e: - print(f"Error creating thumbnail: {e}") - return "" - - -def get_database_stats() -> Dict: - """Get database statistics""" - if not os.path.exists(DB_PATH): - return {} - - conn = sqlite3.connect(DB_PATH) - c = conn.cursor() - - try: - stats = {} - - c.execute("SELECT COUNT(*) FROM images") - stats['images'] = c.fetchone()[0] - - c.execute("SELECT COUNT(*) FROM faces") - stats['faces'] = c.fetchone()[0] - - c.execute("SELECT COUNT(*) FROM faces WHERE person_id IS NOT NULL") - stats['identified_faces'] = c.fetchone()[0] - - c.execute("SELECT COUNT(*) FROM people") - stats['people'] = c.fetchone()[0] - - stats['unidentified_faces'] = stats['faces'] - stats['identified_faces'] - - return stats - - except Exception as e: - print(f"Error getting stats: {e}") - return {} - finally: - conn.close() - - -@app.route('/') -def index(): - """Main page""" - stats = get_database_stats() - clusters = get_face_clusters() - - return render_template('index.html', stats=stats, clusters=clusters) - - -@app.route('/cluster/') -def cluster_detail(cluster_id): - """Cluster detail page""" - clusters = get_face_clusters() - - if cluster_id >= len(clusters): - return "Cluster not found", 404 - - cluster = clusters[cluster_id] - - # Add thumbnails to faces - for face in cluster['faces']: - face['thumbnail'] = get_face_thumbnail_base64(face) - face['filename'] = os.path.basename(face['image_path']) - - return render_template('cluster_detail.html', cluster=cluster, cluster_id=cluster_id) - - -@app.route('/identify_cluster', methods=['POST']) -def identify_cluster(): - """Identify all faces in a cluster as a person""" - data = request.json - cluster_id = data.get('cluster_id') - person_name = data.get('person_name', '').strip() - - if not person_name: - return jsonify({'success': False, 'error': 'Person name is required'}) - - try: - clusters = get_face_clusters() - - if cluster_id >= len(clusters): - return jsonify({'success': False, 'error': 'Cluster not found'}) - - cluster = clusters[cluster_id] - - # Add person to database and assign faces - conn = sqlite3.connect(DB_PATH) - c = conn.cursor() - - # Add person - c.execute('INSERT OR IGNORE INTO people (name) VALUES (?)', (person_name,)) - c.execute('SELECT id FROM people WHERE name = ?', (person_name,)) - person_id = c.fetchone()[0] - - # Assign all faces in cluster - for face in cluster['faces']: - c.execute('''UPDATE faces - SET person_id = ?, is_confirmed = 1 - WHERE id = ?''', - (person_id, face['face_id'])) - - conn.commit() - conn.close() - - return jsonify({ - 'success': True, - 'message': f"Identified {cluster['face_count']} faces as {person_name}" - }) - - except Exception as e: - return jsonify({'success': False, 'error': str(e)}) - - -@app.route('/people') -def people_list(): - """List all identified people""" - if not os.path.exists(DB_PATH): - return render_template('people.html', people=[]) - - conn = sqlite3.connect(DB_PATH) - c = conn.cursor() - - try: - c.execute('''SELECT p.id, p.name, COUNT(f.id) as face_count, p.created_at - FROM people p - LEFT JOIN faces f ON p.id = f.person_id - GROUP BY p.id - ORDER BY face_count DESC''') - - people = [] - for row in c.fetchall(): - people.append({ - 'id': row[0], - 'name': row[1], - 'face_count': row[2], - 'created_at': row[3] - }) - - return render_template('people.html', people=people) - - except Exception as e: - return render_template('people.html', people=[], error=str(e)) - finally: - conn.close() - - -@app.route('/search') -def search(): - """Search interface""" - return render_template('search.html') - - -@app.route('/api/search', methods=['POST']) -def api_search(): - """Search API endpoint""" - data = request.json - people = data.get('people', []) - tags = data.get('tags', []) - - try: - from punimtag_simple import SimplePunimTag - - tagger = SimplePunimTag(DB_PATH) - results = tagger.simple_search(people=people if people else None, - tags=tags if tags else None) - tagger.close() - - return jsonify({'success': True, 'results': results}) - - except Exception as e: - return jsonify({'success': False, 'error': str(e)}) - - -if __name__ == '__main__': - # Create templates directory and basic templates - os.makedirs('templates', exist_ok=True) - - # Create basic HTML templates - create_html_templates() - - print("🌐 Starting PunimTag Web GUI...") - print("📊 Open http://localhost:5000 in your browser") - print("🔄 Use Ctrl+C to stop") - - app.run(debug=True, host='0.0.0.0', port=5000) - - -def create_html_templates(): - """Create basic HTML templates""" - - # Base template - base_template = ''' - - - PunimTag - {% block title %}{% endblock %} - - - - - -
-
-

🏷️ PunimTag

- -
- {% block content %}{% endblock %} -
- -''' - - # Index page - index_template = '''{% extends "base.html" %} -{% block title %}Face Clusters{% endblock %} -{% block content %} -

📊 Database Statistics

-
-
-

Images

-

{{ stats.images or 0 }}

-
-
-

Total Faces

-

{{ stats.faces or 0 }}

-
-
-

Identified

-

{{ stats.identified_faces or 0 }}

-
-
-

Unidentified

-

{{ stats.unidentified_faces or 0 }}

-
-
-

People

-

{{ stats.people or 0 }}

-
-
- -

👥 Unknown Face Clusters

-{% if clusters %} -

Click on a cluster to identify the faces:

-
- {% for cluster in clusters %} -
-

Cluster {{ loop.index }} ({{ cluster.face_count }} faces)

-
- {% for face in cluster.faces[:4] %} - Face - {% endfor %} - {% if cluster.face_count > 4 %} -
+{{ cluster.face_count - 4 }} more
- {% endif %} -
-
- View & Identify -
- {% endfor %} -
-{% else %} -
- Great! No unknown face clusters found. All faces have been identified or there are no faces to process. -
-

To get started:

-
    -
  1. Add photos to the photos/ directory
  2. -
  3. Run python punimtag_simple.py to process them
  4. -
  5. Return here to identify unknown faces
  6. -
-{% endif %} -{% endblock %}''' - - # Write templates - with open('templates/base.html', 'w') as f: - f.write(base_template) - - with open('templates/index.html', 'w') as f: - f.write(index_template) - - print("✅ Created basic HTML templates") \ No newline at end of file diff --git a/src/utils/__init__.py b/src/utils/__init__.py deleted file mode 100644 index 1a8dc10..0000000 --- a/src/utils/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -""" -PunimTag Utils Package - -This package contains utility functions and helper modules. -""" \ No newline at end of file diff --git a/src/utils/tag_manager.py b/src/utils/tag_manager.py deleted file mode 100644 index 23f103e..0000000 --- a/src/utils/tag_manager.py +++ /dev/null @@ -1,289 +0,0 @@ -#!/usr/bin/env python3 -""" -Tag Manager for PunimTag -Manage tags and assign them to images -""" - -import os -from punimtag import PunimTag -from datetime import datetime -from typing import List, Optional - - -class TagManager: - def __init__(self, db_path: str = 'punimtag.db'): - self.tagger = PunimTag(db_path=db_path) - - # Predefined tag categories and suggestions - self.tag_categories = { - 'location': ['home', 'work', 'vacation', 'beach', 'mountain', 'city', 'park', 'restaurant'], - 'event': ['birthday', 'wedding', 'graduation', 'holiday', 'party', 'meeting', 'conference'], - 'scene': ['indoor', 'outdoor', 'nature', 'urban', 'rural', 'night', 'day', 'sunset', 'sunrise'], - 'activity': ['sports', 'eating', 'working', 'playing', 'traveling', 'celebration', 'relaxing'], - 'mood': ['happy', 'formal', 'casual', 'candid', 'posed', 'artistic'], - 'season': ['spring', 'summer', 'fall', 'winter'], - 'weather': ['sunny', 'cloudy', 'rainy', 'snowy'], - 'group': ['family', 'friends', 'colleagues', 'solo', 'couple', 'group'] - } - - def list_tags(self): - """List all existing tags""" - c = self.tagger.conn.cursor() - c.execute('SELECT id, name, category FROM tags ORDER BY category, name') - - tags = c.fetchall() - - if not tags: - print("No tags found in database.") - return - - print("\nExisting Tags:") - print("=" * 50) - - current_category = None - for tag_id, name, category in tags: - if category != current_category: - current_category = category or "Uncategorized" - print(f"\n{current_category}:") - print(f" [{tag_id}] {name}") - - def create_tag(self): - """Interactive tag creation""" - print("\nCreate New Tag") - print("=" * 50) - - # Show categories - print("\nAvailable categories:") - for i, cat in enumerate(self.tag_categories.keys(), 1): - print(f"{i}. {cat}") - print(f"{len(self.tag_categories) + 1}. Other (no category)") - - # Get category - try: - choice = int(input("\nSelect category (number): ")) - if 1 <= choice <= len(self.tag_categories): - category = list(self.tag_categories.keys())[choice - 1] - print(f"\nSuggested tags for {category}:") - for tag in self.tag_categories[category]: - print(f" - {tag}") - else: - category = None - except: - category = None - - # Get tag name - name = input("\nEnter tag name: ").strip() - - if not name: - print("Tag name cannot be empty!") - return - - # Create tag - tag_id = self.tagger.add_tag(name, category) - print(f"✓ Created tag '{name}' with ID {tag_id}") - - def tag_images_by_search(self): - """Tag images found by search criteria""" - print("\nTag Images by Search") - print("=" * 50) - - # Get search criteria - print("\nSearch criteria (leave blank to skip):") - - # People filter - people_input = input("People (comma-separated names): ").strip() - people = [p.strip() for p in people_input.split(',')] if people_input else None - - # Existing tags filter - tags_input = input("Existing tags (comma-separated): ").strip() - tags = [t.strip() for t in tags_input.split(',')] if tags_input else None - - # Date range filter - date_from_input = input("Date from (YYYY-MM-DD): ").strip() - date_from = datetime.strptime(date_from_input, '%Y-%m-%d') if date_from_input else None - - date_to_input = input("Date to (YYYY-MM-DD): ").strip() - date_to = datetime.strptime(date_to_input, '%Y-%m-%d') if date_to_input else None - - # Search images - results = self.tagger.search_images(people, tags, date_from, date_to) - - if not results: - print("\nNo images found matching criteria!") - return - - print(f"\nFound {len(results)} images") - - # Get tag to apply - self.list_tags() - tag_name = input("\nEnter tag name to apply: ").strip() - - if not tag_name: - print("Cancelled") - return - - # Get or create tag - tag_id = self.tagger.add_tag(tag_name) - - # Apply tag to all results - count = 0 - for img in results: - self.tagger.tag_image(img['id'], tag_id) - count += 1 - - print(f"✓ Applied tag '{tag_name}' to {count} images") - - def tag_single_image(self): - """Tag a single image by path""" - print("\nTag Single Image") - print("=" * 50) - - # Get image path - image_path = input("Enter image path: ").strip() - - if not os.path.exists(image_path): - print(f"Error: Image not found at {image_path}") - return - - # Check if image is in database - c = self.tagger.conn.cursor() - c.execute('SELECT id FROM images WHERE path = ?', (image_path,)) - result = c.fetchone() - - if not result: - print("Image not found in database. Processing it now...") - image_id = self.tagger.process_image(image_path) - else: - image_id = result[0] - - # Show current tags - c.execute('''SELECT t.name FROM tags t - JOIN image_tags it ON t.id = it.tag_id - WHERE it.image_id = ?''', (image_id,)) - - current_tags = [row[0] for row in c.fetchall()] - - if current_tags: - print(f"\nCurrent tags: {', '.join(current_tags)}") - else: - print("\nNo tags currently assigned") - - # Add tags - while True: - tag_name = input("\nEnter tag to add (or press Enter to finish): ").strip() - - if not tag_name: - break - - tag_id = self.tagger.add_tag(tag_name) - self.tagger.tag_image(image_id, tag_id) - print(f"✓ Added tag '{tag_name}'") - - def auto_tag_suggestions(self): - """Suggest automatic tags based on image metadata""" - print("\nAuto-Tag Suggestions") - print("=" * 50) - - c = self.tagger.conn.cursor() - - # Find images without tags - c.execute('''SELECT i.id, i.path, i.date_taken, i.latitude, i.longitude - FROM images i - LEFT JOIN image_tags it ON i.id = it.image_id - WHERE it.tag_id IS NULL''') - - untagged = c.fetchall() - - if not untagged: - print("All images are already tagged!") - return - - print(f"Found {len(untagged)} untagged images") - - # Auto-tag based on date - seasons = { - (3, 4, 5): 'spring', - (6, 7, 8): 'summer', - (9, 10, 11): 'fall', - (12, 1, 2): 'winter' - } - - season_counts = {s: 0 for s in seasons.values()} - location_count = 0 - - for img_id, path, date_taken, lat, lon in untagged: - suggestions = [] - - # Season based on date - if date_taken: - month = datetime.strptime(date_taken, '%Y-%m-%d %H:%M:%S').month - for months, season in seasons.items(): - if month in months: - tag_id = self.tagger.add_tag(season, 'season') - self.tagger.tag_image(img_id, tag_id) - season_counts[season] += 1 - break - - # Location-based tags - if lat and lon: - # You could integrate with a geocoding API here - # For now, just tag as "geotagged" - tag_id = self.tagger.add_tag('geotagged', 'location') - self.tagger.tag_image(img_id, tag_id) - location_count += 1 - - print("\nAuto-tagging complete:") - for season, count in season_counts.items(): - if count > 0: - print(f" - Tagged {count} images as '{season}'") - if location_count > 0: - print(f" - Tagged {location_count} images as 'geotagged'") - - def run(self): - """Main menu loop""" - while True: - print("\n" + "=" * 50) - print("PunimTag - Tag Manager") - print("=" * 50) - print("1. List all tags") - print("2. Create new tag") - print("3. Tag images by search") - print("4. Tag single image") - print("5. Auto-tag suggestions") - print("6. Exit") - - try: - choice = int(input("\nSelect option: ")) - - if choice == 1: - self.list_tags() - elif choice == 2: - self.create_tag() - elif choice == 3: - self.tag_images_by_search() - elif choice == 4: - self.tag_single_image() - elif choice == 5: - self.auto_tag_suggestions() - elif choice == 6: - break - else: - print("Invalid option!") - - except KeyboardInterrupt: - print("\n\nInterrupted by user") - break - except Exception as e: - print(f"Error: {e}") - - self.tagger.close() - print("\nGoodbye!") - - -def main(): - manager = TagManager() - manager.run() - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/test_basic.py b/test_basic.py new file mode 100644 index 0000000..c10bcf7 --- /dev/null +++ b/test_basic.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +""" +Basic test for photo_tagger.py without face recognition dependencies +Tests database initialization and basic functionality +""" + +import sys +import os +import tempfile +import sqlite3 + +# Add current directory to path +sys.path.insert(0, '.') + +def test_database_init(): + """Test database initialization without face recognition""" + # Create temporary database + with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp: + test_db = tmp.name + + try: + # Import and test database creation + from photo_tagger import PhotoTagger + + # This should fail because face_recognition is not installed + # But we can test the import and class structure + print("✅ PhotoTagger class imported successfully") + + # Test basic database initialization + conn = sqlite3.connect(test_db) + cursor = conn.cursor() + + # Create the tables manually to test schema + cursor.execute(''' + CREATE TABLE IF NOT EXISTS photos ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + path TEXT UNIQUE NOT NULL, + filename TEXT NOT NULL, + date_added DATETIME DEFAULT CURRENT_TIMESTAMP, + processed BOOLEAN DEFAULT 0 + ) + ''') + + cursor.execute(''' + CREATE TABLE IF NOT EXISTS people ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT UNIQUE NOT NULL, + created_date DATETIME DEFAULT CURRENT_TIMESTAMP + ) + ''') + + conn.commit() + + # Test basic operations + cursor.execute("INSERT INTO photos (path, filename) VALUES (?, ?)", + ("/test/path.jpg", "test.jpg")) + cursor.execute("INSERT INTO people (name) VALUES (?)", ("Test Person",)) + + cursor.execute("SELECT COUNT(*) FROM photos") + photo_count = cursor.fetchone()[0] + + cursor.execute("SELECT COUNT(*) FROM people") + people_count = cursor.fetchone()[0] + + conn.close() + + print(f"✅ Database schema created successfully") + print(f"✅ Test data inserted: {photo_count} photos, {people_count} people") + + return True + + except ImportError as e: + print(f"⚠️ Import error (expected): {e}") + print("✅ This is expected without face_recognition installed") + return True + except Exception as e: + print(f"❌ Unexpected error: {e}") + return False + finally: + # Clean up + if os.path.exists(test_db): + os.unlink(test_db) + +def test_cli_structure(): + """Test CLI argument parsing structure""" + try: + import argparse + + # Test if our argument parser structure is valid + parser = argparse.ArgumentParser(description="Test parser") + parser.add_argument('command', choices=['scan', 'process', 'identify', 'tag', 'search', 'stats']) + parser.add_argument('target', nargs='?') + parser.add_argument('--db', default='photos.db') + parser.add_argument('--limit', type=int, default=50) + + # Test parsing + args = parser.parse_args(['stats']) + print(f"✅ CLI argument parsing works: command={args.command}") + + return True + except Exception as e: + print(f"❌ CLI structure error: {e}") + return False + +def main(): + """Run basic tests""" + print("🧪 Running Basic Tests for PunimTag CLI") + print("=" * 50) + + tests = [ + ("Database Schema", test_database_init), + ("CLI Structure", test_cli_structure), + ] + + passed = 0 + total = len(tests) + + for test_name, test_func in tests: + print(f"\n📋 Testing: {test_name}") + try: + if test_func(): + print(f"✅ {test_name}: PASSED") + passed += 1 + else: + print(f"❌ {test_name}: FAILED") + except Exception as e: + print(f"❌ {test_name}: ERROR - {e}") + + print(f"\n📊 Results: {passed}/{total} tests passed") + + if passed == total: + print("🎉 All basic tests passed!") + print("\n📦 Next steps:") + print("1. Install dependencies: pip install -r requirements.txt") + print("2. Test full functionality: python photo_tagger.py stats") + return 0 + else: + print("⚠️ Some tests failed") + return 1 + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/conftest.py b/tests/conftest.py deleted file mode 100644 index d1d799b..0000000 --- a/tests/conftest.py +++ /dev/null @@ -1,139 +0,0 @@ -""" -PunimTag Test Configuration - -Shared fixtures and configuration for all tests. -""" - -import pytest -import sqlite3 -import tempfile -import os -import sys -from pathlib import Path - -# Add src to path for imports -sys.path.insert(0, str(Path(__file__).parent.parent / "src")) - -from backend.app import app - -@pytest.fixture -def test_db(): - """Create a temporary test database.""" - db_fd, db_path = tempfile.mkstemp() - - # Create test database schema - conn = sqlite3.connect(db_path) - cursor = conn.cursor() - - # Create tables - cursor.execute(''' - CREATE TABLE images ( - id INTEGER PRIMARY KEY, - filename TEXT NOT NULL, - path TEXT NOT NULL, - date_taken TEXT - ) - ''') - - cursor.execute(''' - CREATE TABLE faces ( - id INTEGER PRIMARY KEY, - image_id INTEGER, - person_id INTEGER, - encoding BLOB, - left INTEGER, - top INTEGER, - right INTEGER, - bottom INTEGER - ) - ''') - - cursor.execute(''' - CREATE TABLE people ( - id INTEGER PRIMARY KEY, - name TEXT NOT NULL, - created_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP - ) - ''') - - cursor.execute(''' - CREATE TABLE tags ( - id INTEGER PRIMARY KEY, - name TEXT NOT NULL UNIQUE - ) - ''') - - cursor.execute(''' - CREATE TABLE image_tags ( - image_id INTEGER, - tag_id INTEGER, - PRIMARY KEY (image_id, tag_id) - ) - ''') - - conn.commit() - conn.close() - - yield db_path - - # Cleanup - os.close(db_fd) - os.unlink(db_path) - -@pytest.fixture -def client(test_db): - """Create a test client with test database.""" - app.config['TESTING'] = True - app.config['DATABASE_PATH'] = test_db - - with app.test_client() as client: - yield client - -@pytest.fixture -def sample_photos(test_db): - """Add sample photos to the test database.""" - conn = sqlite3.connect(test_db) - cursor = conn.cursor() - - photos = [ - ('photo1.jpg', '/test/path/photo1.jpg', '2023-01-01'), - ('photo2.jpg', '/test/path/photo2.jpg', '2023-01-02'), - ('photo3.jpg', '/test/path/photo3.jpg', '2023-01-03') - ] - - cursor.executemany( - 'INSERT INTO images (filename, path, date_taken) VALUES (?, ?, ?)', - photos - ) - - conn.commit() - conn.close() - - return photos - -@pytest.fixture -def sample_faces(test_db): - """Add sample faces to the test database.""" - conn = sqlite3.connect(test_db) - cursor = conn.cursor() - - # Add a person first - cursor.execute('INSERT INTO people (name) VALUES (?)', ('Test Person',)) - person_id = cursor.lastrowid - - # Add faces - faces = [ - (1, person_id, b'fake_encoding_1', 100, 100, 200, 200), - (2, person_id, b'fake_encoding_2', 150, 150, 250, 250), - (3, None, b'fake_encoding_3', 200, 200, 300, 300), # Unidentified face - ] - - cursor.executemany( - 'INSERT INTO faces (image_id, person_id, encoding, left, top, right, bottom) VALUES (?, ?, ?, ?, ?, ?, ?)', - faces - ) - - conn.commit() - conn.close() - - return faces \ No newline at end of file diff --git a/tests/test_backend.py b/tests/test_backend.py deleted file mode 100644 index 58f11f3..0000000 --- a/tests/test_backend.py +++ /dev/null @@ -1,448 +0,0 @@ -#!/usr/bin/env python3 -""" -Comprehensive Backend Test Suite for PunimTag -Tests all backend functionality including face clustering, enhanced recognition, and complex queries -""" - -import os -import tempfile -import shutil -import unittest -import uuid -import pickle -from datetime import datetime, timedelta -import numpy as np -from punimtag import PunimTag -from config import PunimTagConfig, create_default_config -from typing import List - - -class TestBackendFunctionality(unittest.TestCase): - """Test all backend features thoroughly""" - - def setUp(self): - """Set up test environment with temporary database and config""" - self.test_dir = tempfile.mkdtemp() - self.db_path = os.path.join(self.test_dir, 'test.db') - self.photos_dir = os.path.join(self.test_dir, 'photos') - self.config_path = os.path.join(self.test_dir, 'test_config.json') - - os.makedirs(self.photos_dir, exist_ok=True) - - # Create test configuration - self.config = PunimTagConfig(self.config_path) - self.config.face_recognition.confidence_threshold = 0.5 - self.config.auto_tagging.enabled = True - self.config.processing.batch_size = 50 - self.config.save() - - # Initialize PunimTag with test database - self.tagger = PunimTag(db_path=self.db_path, photos_dir=self.photos_dir) - - def tearDown(self): - """Clean up test environment""" - self.tagger.close() - shutil.rmtree(self.test_dir) - - def test_configuration_system(self): - """Test configuration loading and saving""" - # Test default values - self.assertEqual(self.config.face_recognition.confidence_threshold, 0.5) - self.assertTrue(self.config.auto_tagging.enabled) - - # Test updating settings - success = self.config.update_setting('face_recognition', 'confidence_threshold', 0.7) - self.assertTrue(success) - self.assertEqual(self.config.face_recognition.confidence_threshold, 0.7) - - # Test getting settings - value = self.config.get_setting('processing', 'batch_size') - self.assertEqual(value, 50) - - # Test tag suggestions - event_tags = self.config.get_tag_suggestions('event') - self.assertIn('wedding', event_tags) - self.assertIn('bar_mitzvah', event_tags) - - def test_jewish_org_tags(self): - """Test Jewish organization specific tag functionality""" - # Test adding Jewish event tags - for tag_name in ['shabbat', 'chanukah', 'passover']: - tag_id = self.tagger.add_tag(tag_name, 'event') - self.assertIsNotNone(tag_id) - - # Test location tags - for tag_name in ['synagogue', 'sanctuary', 'sukkah']: - tag_id = self.tagger.add_tag(tag_name, 'location') - self.assertIsNotNone(tag_id) - - # Verify tags exist in database - c = self.tagger.conn.cursor() - c.execute("SELECT COUNT(*) FROM tags WHERE category = 'event'") - event_count = c.fetchone()[0] - self.assertGreaterEqual(event_count, 3) - - def test_face_clustering(self): - """Test face clustering functionality""" - # Create mock face data - face_ids = self._create_mock_faces(10) - - # Test clustering - clusters = self.tagger.cluster_unknown_faces() - self.assertIsInstance(clusters, dict) - - # Test getting cluster data - cluster_data = self.tagger.get_face_clusters() - self.assertIsInstance(cluster_data, list) - - # Each cluster should have required fields - for cluster in cluster_data: - self.assertIn('cluster_id', cluster) - self.assertIn('face_count', cluster) - self.assertIn('face_ids', cluster) - self.assertIn('representative_face', cluster) - - def test_cluster_assignment(self): - """Test assigning clusters to people""" - # Create mock faces and cluster them - face_ids = self._create_mock_faces(5) - clusters = self.tagger.cluster_unknown_faces() - - if clusters: - cluster_id = list(clusters.keys())[0] - success = self.tagger.assign_cluster_to_person(cluster_id, "Test Person") - self.assertTrue(success) - - # Verify assignment - c = self.tagger.conn.cursor() - c.execute("SELECT COUNT(*) FROM faces WHERE person_id IS NOT NULL") - assigned_count = c.fetchone()[0] - self.assertGreater(assigned_count, 0) - - def test_most_common_faces(self): - """Test getting most frequently photographed people""" - # Add some people and faces - person1_id = self.tagger.add_person("John Doe") - person2_id = self.tagger.add_person("Jane Smith") - - # Create mock faces assigned to people - face_ids = self._create_mock_faces(10) - - # Assign faces to people - for i, face_id in enumerate(face_ids[:5]): - self.tagger.assign_face_to_person(face_id, person1_id, True) - - for face_id in face_ids[5:7]: - self.tagger.assign_face_to_person(face_id, person2_id, True) - - # Test getting most common faces - common_faces = self.tagger.get_most_common_faces(limit=10) - self.assertIsInstance(common_faces, list) - - if common_faces: - # Should be sorted by face count (John Doe should be first) - self.assertEqual(common_faces[0]['name'], "John Doe") - self.assertEqual(common_faces[0]['face_count'], 5) - - def test_face_verification(self): - """Test face verification functionality""" - person_id = self.tagger.add_person("Test Person") - face_ids = self._create_mock_faces(3) - - # Assign faces to person - for face_id in face_ids: - self.tagger.assign_face_to_person(face_id, person_id, True) - - # Test verification - faces = self.tagger.verify_person_faces(person_id) - self.assertEqual(len(faces), 3) - - # Test removing incorrect assignment - self.tagger.remove_incorrect_face_assignment(face_ids[0]) - - # Verify removal - faces_after = self.tagger.verify_person_faces(person_id) - self.assertEqual(len(faces_after), 2) - - def test_batch_processing(self): - """Test batch image processing""" - # Create mock image paths - image_paths = [ - os.path.join(self.photos_dir, f'test_{i}.jpg') - for i in range(5) - ] - - # Create empty test files - for path in image_paths: - with open(path, 'w') as f: - f.write('') # Empty file for testing - - # Test batch processing (will fail on actual processing but test the logic) - try: - results = self.tagger.batch_process_images(image_paths, batch_size=2) - self.assertIn('processed', results) - self.assertIn('errors', results) - self.assertIn('skipped', results) - except Exception: - # Expected to fail with empty files, but structure should be correct - pass - - def test_advanced_search(self): - """Test advanced search functionality""" - # Setup test data - person_id = self.tagger.add_person("Search Test Person") - tag_id = self.tagger.add_tag("test_event", "event") - - # Create mock image - image_id = self._create_mock_image() - - # Add mock face and tag - face_id = self._create_mock_face(image_id) - self.tagger.assign_face_to_person(face_id, person_id, True) - self.tagger.tag_image(image_id, tag_id) - - # Test various search scenarios - - # Search by person - results = self.tagger.advanced_search(people=["Search Test Person"]) - self.assertIsInstance(results, list) - - # Search by tag - results = self.tagger.advanced_search(tags=["test_event"]) - self.assertIsInstance(results, list) - - # Search by person and tag - results = self.tagger.advanced_search( - people=["Search Test Person"], - tags=["test_event"] - ) - self.assertIsInstance(results, list) - - # Search with date range - today = datetime.now() - yesterday = today - timedelta(days=1) - tomorrow = today + timedelta(days=1) - - results = self.tagger.advanced_search( - date_from=yesterday, - date_to=tomorrow - ) - self.assertIsInstance(results, list) - - # Search with location bounds - results = self.tagger.advanced_search( - latitude_min=40.0, - latitude_max=41.0, - longitude_min=-74.0, - longitude_max=-73.0 - ) - self.assertIsInstance(results, list) - - # Search with minimum people requirement - results = self.tagger.advanced_search(min_people=1) - self.assertIsInstance(results, list) - - # Search with limit - results = self.tagger.advanced_search(limit=5) - self.assertIsInstance(results, list) - self.assertLessEqual(len(results), 5) - - def test_face_quality_calculation(self): - """Test face quality scoring""" - # Test with different face sizes and encodings - small_face = (10, 30, 30, 10) # 20x20 face - large_face = (10, 110, 110, 10) # 100x100 face - - encoding = np.random.rand(128) - - small_quality = self.tagger.calculate_face_quality(encoding, small_face) - large_quality = self.tagger.calculate_face_quality(encoding, large_face) - - # Larger faces should have higher quality scores - self.assertGreater(large_quality, small_quality) - - # Quality should be between 0 and 1 - self.assertGreaterEqual(small_quality, 0) - self.assertLessEqual(small_quality, 1) - self.assertGreaterEqual(large_quality, 0) - self.assertLessEqual(large_quality, 1) - - def test_database_integrity(self): - """Test database integrity and relationships""" - # Test foreign key relationships - person_id = self.tagger.add_person("Integrity Test") - image_id = self._create_mock_image() - face_id = self._create_mock_face(image_id) - tag_id = self.tagger.add_tag("integrity_test") - - # Test assignments - self.tagger.assign_face_to_person(face_id, person_id, True) - self.tagger.tag_image(image_id, tag_id) - - # Verify relationships exist - c = self.tagger.conn.cursor() - - # Check face-person relationship - c.execute("SELECT person_id FROM faces WHERE id = ?", (face_id,)) - result = c.fetchone() - self.assertEqual(result[0], person_id) - - # Check image-tag relationship - c.execute("SELECT tag_id FROM image_tags WHERE image_id = ?", (image_id,)) - result = c.fetchone() - self.assertEqual(result[0], tag_id) - - def test_search_edge_cases(self): - """Test search functionality with edge cases""" - # Search with empty parameters - results = self.tagger.advanced_search() - self.assertIsInstance(results, list) - - # Search with non-existent person - results = self.tagger.advanced_search(people=["Non Existent Person"]) - self.assertEqual(len(results), 0) - - # Search with non-existent tag - results = self.tagger.advanced_search(tags=["non_existent_tag"]) - self.assertEqual(len(results), 0) - - # Search with invalid date range - future_date = datetime.now() + timedelta(days=365) - past_date = datetime.now() - timedelta(days=365) - - results = self.tagger.advanced_search( - date_from=future_date, - date_to=past_date - ) - self.assertEqual(len(results), 0) - - # Helper methods - - def _create_mock_image(self) -> int: - """Create a mock image entry in database""" - import uuid - unique_path = f'test_path_{uuid.uuid4().hex[:8]}.jpg' - c = self.tagger.conn.cursor() - c.execute('''INSERT INTO images - (path, filename, date_taken, width, height, file_size) - VALUES (?, ?, ?, ?, ?, ?)''', - (unique_path, unique_path, datetime.now(), - 800, 600, 12345)) - self.tagger.conn.commit() - return c.lastrowid - - def _create_mock_face(self, image_id: int) -> int: - """Create a mock face entry in database""" - import pickle - encoding = np.random.rand(128) - encoding_blob = pickle.dumps(encoding) - - c = self.tagger.conn.cursor() - c.execute('''INSERT INTO faces - (image_id, top, right, bottom, left, encoding) - VALUES (?, ?, ?, ?, ?, ?)''', - (image_id, 10, 110, 110, 10, encoding_blob)) - self.tagger.conn.commit() - return c.lastrowid - - def _create_mock_faces(self, count: int) -> List[int]: - """Create multiple mock faces""" - face_ids = [] - for i in range(count): - image_id = self._create_mock_image() - face_id = self._create_mock_face(image_id) - face_ids.append(face_id) - return face_ids - - -def run_performance_tests(): - """Run performance tests with larger datasets""" - print("\nRunning Performance Tests") - print("=" * 50) - - with tempfile.TemporaryDirectory() as temp_dir: - db_path = os.path.join(temp_dir, 'perf_test.db') - tagger = PunimTag(db_path=db_path) - - try: - # Test with larger numbers of faces - print("Creating 1000 mock faces...") - start_time = datetime.now() - - face_ids = [] - for i in range(1000): - # Create image - c = tagger.conn.cursor() - c.execute('''INSERT INTO images - (path, filename, date_taken, width, height, file_size) - VALUES (?, ?, ?, ?, ?, ?)''', - (f'perf_test_{i}_{uuid.uuid4().hex[:8]}.jpg', f'perf_test_{i}.jpg', - datetime.now(), 800, 600, 12345)) - image_id = c.lastrowid - - # Create face - encoding = np.random.rand(128) - encoding_blob = pickle.dumps(encoding) - c.execute('''INSERT INTO faces - (image_id, top, right, bottom, left, encoding) - VALUES (?, ?, ?, ?, ?, ?)''', - (image_id, 10, 110, 110, 10, encoding_blob)) - face_ids.append(c.lastrowid) - - if i % 100 == 0: - print(f"Created {i} faces...") - - tagger.conn.commit() - creation_time = (datetime.now() - start_time).total_seconds() - print(f"Created 1000 faces in {creation_time:.2f} seconds") - - # Test clustering performance - print("Testing clustering performance...") - start_time = datetime.now() - clusters = tagger.cluster_unknown_faces() - clustering_time = (datetime.now() - start_time).total_seconds() - print(f"Clustered faces in {clustering_time:.2f} seconds") - print(f"Found {len(clusters)} clusters") - - # Test search performance - print("Testing search performance...") - start_time = datetime.now() - results = tagger.advanced_search(limit=100) - search_time = (datetime.now() - start_time).total_seconds() - print(f"Search completed in {search_time:.2f} seconds") - print(f"Found {len(results)} results") - - finally: - tagger.close() - - -def main(): - """Main test runner""" - print("PunimTag Backend Test Suite") - print("=" * 50) - - # Run unit tests - print("Running unit tests...") - loader = unittest.TestLoader() - suite = loader.loadTestsFromTestCase(TestBackendFunctionality) - runner = unittest.TextTestRunner(verbosity=2) - result = runner.run(suite) - - if result.wasSuccessful(): - print("\n✅ All unit tests passed!") - - # Run performance tests - run_performance_tests() - - print("\n🎉 Backend testing completed successfully!") - print("\nBackend is ready for UI development.") - else: - print("\n❌ Some tests failed. Please fix issues before proceeding.") - return False - - return True - - -if __name__ == "__main__": - success = main() - exit(0 if success else 1) \ No newline at end of file diff --git a/tests/test_main.py b/tests/test_main.py deleted file mode 100644 index f4bb707..0000000 --- a/tests/test_main.py +++ /dev/null @@ -1,200 +0,0 @@ -""" -Main test suite for PunimTag - -Consolidated tests covering core functionality. -""" - -import json -import os -import sys -from pathlib import Path - -# Add src to path for imports -sys.path.insert(0, str(Path(__file__).parent.parent / "src")) - -def test_imports(): - """Test that all modules can be imported.""" - try: - from backend import app - print("✅ Flask app imported successfully") - - # Test if we can access the app instance - if hasattr(app, 'app'): - print("✅ Flask app instance found") - else: - print("⚠️ Flask app instance not found, but module imported") - - return True - except ImportError as e: - print(f"❌ Import error: {e}") - return False - -def test_database_connection(): - """Test database connection and basic operations.""" - try: - # Test if we can connect to the database - import sqlite3 - db_path = Path(__file__).parent.parent / "data" / "punimtag_simple.db" - - if db_path.exists(): - conn = sqlite3.connect(str(db_path)) - cursor = conn.cursor() - cursor.execute("SELECT 1") - result = cursor.fetchone() - conn.close() - - if result and result[0] == 1: - print("✅ Database connection successful") - return True - else: - print("❌ Database query failed") - return False - else: - print("⚠️ Database file not found, but this is normal for fresh installs") - return True - - except Exception as e: - print(f"❌ Database error: {e}") - return False - -def test_face_recognition_import(): - """Test face recognition module import.""" - try: - from backend import visual_identifier - print("✅ Face recognition module imported successfully") - return True - except ImportError as e: - print(f"❌ Face recognition import error: {e}") - return False - -def test_config_loading(): - """Test configuration loading.""" - try: - # Test if config directory exists and has files - config_dir = Path(__file__).parent.parent / "config" - if config_dir.exists(): - config_files = list(config_dir.glob("*.py")) - if config_files: - print(f"✅ Configuration directory found with {len(config_files)} files") - return True - else: - print("⚠️ Configuration directory exists but no Python files found") - return True - else: - print("❌ Configuration directory not found") - return False - except Exception as e: - print(f"❌ Configuration error: {e}") - return False - -def test_directory_structure(): - """Test that all required directories exist.""" - required_dirs = [ - "src/backend", - "src/frontend", - "src/utils", - "tests", - "data", - "config", - "docs", - "photos", - "scripts", - "assets" - ] - - missing_dirs = [] - for dir_path in required_dirs: - if not os.path.exists(dir_path): - missing_dirs.append(dir_path) - - if missing_dirs: - print(f"❌ Missing directories: {missing_dirs}") - return False - else: - print("✅ All required directories exist") - return True - -def test_steering_documents(): - """Test that steering documents exist.""" - required_docs = [ - "docs/product.md", - "docs/structure.md", - "docs/tech.md", - "docs/api-standards.md", - "docs/testing-standards.md", - "docs/code-conventions.md" - ] - - missing_docs = [] - for doc_path in required_docs: - if not os.path.exists(doc_path): - missing_docs.append(doc_path) - - if missing_docs: - print(f"❌ Missing steering documents: {missing_docs}") - return False - else: - print("✅ All steering documents exist") - return True - -def test_main_app_file(): - """Test that the main application file exists and is accessible.""" - try: - main_app_path = Path(__file__).parent.parent / "src" / "backend" / "app.py" - if main_app_path.exists(): - print(f"✅ Main app file found: {main_app_path}") - - # Test if we can read the file - with open(main_app_path, 'r') as f: - content = f.read() - if 'Flask' in content and 'app' in content: - print("✅ Main app file contains Flask app") - return True - else: - print("⚠️ Main app file exists but doesn't contain expected Flask content") - return True - else: - print("❌ Main app file not found") - return False - except Exception as e: - print(f"❌ Main app file error: {e}") - return False - -def run_all_tests(): - """Run all tests and report results.""" - print("🧪 Running PunimTag Test Suite") - print("=" * 50) - - tests = [ - test_imports, - test_database_connection, - test_face_recognition_import, - test_config_loading, - test_directory_structure, - test_steering_documents, - test_main_app_file - ] - - passed = 0 - total = len(tests) - - for test in tests: - try: - if test(): - passed += 1 - except Exception as e: - print(f"❌ Test {test.__name__} failed with exception: {e}") - - print("=" * 50) - print(f"📊 Test Results: {passed}/{total} tests passed") - - if passed == total: - print("🎉 All tests passed!") - return True - else: - print("⚠️ Some tests failed") - return False - -if __name__ == "__main__": - success = run_all_tests() - sys.exit(0 if success else 1) \ No newline at end of file diff --git a/tests/test_punimtag.py b/tests/test_punimtag.py deleted file mode 100644 index dee96a5..0000000 --- a/tests/test_punimtag.py +++ /dev/null @@ -1,250 +0,0 @@ -#!/usr/bin/env python3 -""" -Test script for PunimTag -Tests core functionality including face detection, recognition, tagging, and search -""" - -import os -import shutil -import tempfile -import unittest -from datetime import datetime -from punimtag import PunimTag -import numpy as np - - -class TestPunimTag(unittest.TestCase): - def setUp(self): - """Set up test environment""" - # Create temporary directory for test database - self.test_dir = tempfile.mkdtemp() - self.db_path = os.path.join(self.test_dir, 'test.db') - self.photos_dir = os.path.join(self.test_dir, 'photos') - os.makedirs(self.photos_dir, exist_ok=True) - - # Initialize PunimTag with test database - self.tagger = PunimTag(db_path=self.db_path, photos_dir=self.photos_dir) - - def tearDown(self): - """Clean up test environment""" - self.tagger.close() - shutil.rmtree(self.test_dir) - - def test_database_creation(self): - """Test that database tables are created correctly""" - c = self.tagger.conn.cursor() - - # Check tables exist - c.execute("SELECT name FROM sqlite_master WHERE type='table'") - tables = {row[0] for row in c.fetchall()} - - expected_tables = {'images', 'people', 'faces', 'tags', 'image_tags'} - self.assertEqual(tables & expected_tables, expected_tables) - - def test_add_person(self): - """Test adding people to database""" - # Add person - person_id = self.tagger.add_person("John Doe") - self.assertIsNotNone(person_id) - - # Verify person exists - c = self.tagger.conn.cursor() - c.execute("SELECT name FROM people WHERE id = ?", (person_id,)) - result = c.fetchone() - self.assertEqual(result[0], "John Doe") - - # Test duplicate handling - person_id2 = self.tagger.add_person("John Doe") - self.assertEqual(person_id, person_id2) - - def test_add_tag(self): - """Test tag creation""" - # Add tag without category - tag_id1 = self.tagger.add_tag("vacation") - self.assertIsNotNone(tag_id1) - - # Add tag with category - tag_id2 = self.tagger.add_tag("beach", "location") - self.assertIsNotNone(tag_id2) - - # Verify tags exist - c = self.tagger.conn.cursor() - c.execute("SELECT name, category FROM tags WHERE id = ?", (tag_id2,)) - result = c.fetchone() - self.assertEqual(result[0], "beach") - self.assertEqual(result[1], "location") - - def test_metadata_extraction(self): - """Test metadata extraction from images""" - # Test with a non-existent file - should handle gracefully - try: - metadata = self.tagger.extract_metadata("nonexistent.jpg") - # If it doesn't raise an exception, check default values - self.assertIsNone(metadata['date_taken']) - self.assertIsNone(metadata['latitude']) - self.assertIsNone(metadata['longitude']) - except FileNotFoundError: - # This is also acceptable behavior - pass - - def test_face_identification(self): - """Test face identification logic""" - # Test with no known faces - result = self.tagger.identify_face(np.random.rand(128)) - self.assertEqual(result, (None, None)) - - # Would need actual face encodings for more thorough testing - - def test_search_functionality(self): - """Test search capabilities""" - # Search with no data should return empty - results = self.tagger.search_images() - self.assertEqual(len(results), 0) - - # Test with filters - results = self.tagger.search_images( - people=["John Doe"], - tags=["vacation"], - date_from=datetime(2023, 1, 1), - date_to=datetime(2023, 12, 31) - ) - self.assertEqual(len(results), 0) - - def test_unidentified_faces(self): - """Test getting unidentified faces""" - faces = self.tagger.get_unidentified_faces() - self.assertEqual(len(faces), 0) # Should be empty initially - - -class TestImageProcessing(unittest.TestCase): - """Test image processing with actual images""" - - @classmethod - def setUpClass(cls): - """Create test images""" - cls.test_dir = tempfile.mkdtemp() - cls.photos_dir = os.path.join(cls.test_dir, 'photos') - os.makedirs(cls.photos_dir, exist_ok=True) - - # Create test images (simple colored squares) - try: - from PIL import Image - - # Create a few test images - for i, color in enumerate(['red', 'green', 'blue']): - img = Image.new('RGB', (100, 100), color) - img.save(os.path.join(cls.photos_dir, f'test_{color}.jpg')) - except ImportError: - print("PIL not available, skipping image creation") - - @classmethod - def tearDownClass(cls): - """Clean up test images""" - shutil.rmtree(cls.test_dir) - - def setUp(self): - """Set up for each test""" - self.db_path = os.path.join(self.test_dir, 'test.db') - self.tagger = PunimTag(db_path=self.db_path, photos_dir=self.photos_dir) - - def tearDown(self): - """Clean up after each test""" - self.tagger.close() - if os.path.exists(self.db_path): - os.remove(self.db_path) - - def test_process_directory(self): - """Test processing a directory of images""" - # Process all images - processed = self.tagger.process_directory() - - # Should process the test images (if created) - self.assertGreaterEqual(processed, 0) - - # Check images were added to database - c = self.tagger.conn.cursor() - c.execute("SELECT COUNT(*) FROM images") - count = c.fetchone()[0] - self.assertEqual(count, processed) - - -def test_with_sample_images(image_paths): - """ - Test PunimTag with actual image files - - Args: - image_paths: List of paths to test images - """ - print("Testing PunimTag with sample images") - print("=" * 50) - - # Create temporary database - with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp: - db_path = tmp.name - - try: - # Initialize PunimTag - tagger = PunimTag(db_path=db_path) - - # Process each image - print(f"\nProcessing {len(image_paths)} images...") - for path in image_paths: - if os.path.exists(path): - print(f"Processing: {path}") - try: - image_id = tagger.process_image(path) - print(f" ✓ Added to database with ID: {image_id}") - except Exception as e: - print(f" ✗ Error: {e}") - else: - print(f" ✗ File not found: {path}") - - # Show statistics - c = tagger.conn.cursor() - - c.execute("SELECT COUNT(*) FROM images") - image_count = c.fetchone()[0] - print(f"\nTotal images: {image_count}") - - c.execute("SELECT COUNT(*) FROM faces") - face_count = c.fetchone()[0] - print(f"Total faces detected: {face_count}") - - # Get unidentified faces - unidentified = tagger.get_unidentified_faces() - print(f"Unidentified faces: {len(unidentified)}") - - # Close connection - tagger.close() - - print("\n✓ Test completed successfully!") - - finally: - # Clean up - if os.path.exists(db_path): - os.remove(db_path) - - -def main(): - """Main test runner""" - print("PunimTag Test Suite") - print("=" * 50) - - # Run unit tests - print("\nRunning unit tests...") - unittest.main(argv=[''], exit=False, verbosity=2) - - # Optional: Test with actual images - print("\n" + "=" * 50) - print("To test with actual images, call:") - print("python test_punimtag.py image1.jpg image2.jpg ...") - - # Check if images were provided as arguments - import sys - if len(sys.argv) > 1: - image_paths = sys.argv[1:] - test_with_sample_images(image_paths) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/tests/test_web_api.py b/tests/test_web_api.py deleted file mode 100644 index c97b2e4..0000000 --- a/tests/test_web_api.py +++ /dev/null @@ -1,286 +0,0 @@ -#!/usr/bin/env python3 -""" -API Test Suite for PunimTag Web GUI -Tests all web endpoints to identify issues with pre-load check -""" - -import requests -import json -import time -import sys -from urllib.parse import urljoin - -class WebAPITester: - def __init__(self, base_url="http://127.0.0.1:5000"): - self.base_url = base_url - self.session = requests.Session() - self.session.headers.update({ - 'Content-Type': 'application/json', - 'Accept': 'application/json' - }) - - def test_endpoint(self, endpoint, method='GET', data=None, timeout=10, expected_status=200): - """Test a single endpoint with timeout and error handling""" - url = urljoin(self.base_url, endpoint) - - print(f"Testing {method} {endpoint}...") - start_time = time.time() - - try: - if method == 'GET': - response = self.session.get(url, timeout=timeout) - elif method == 'POST': - response = self.session.post(url, json=data, timeout=timeout) - else: - raise ValueError(f"Unsupported method: {method}") - - elapsed = time.time() - start_time - - if response.status_code == expected_status: - print(f" ✅ SUCCESS ({elapsed:.2f}s) - Status: {response.status_code}") - try: - return response.json() - except: - return response.text - else: - print(f" ❌ FAILED ({elapsed:.2f}s) - Status: {response.status_code}") - print(f" Response: {response.text[:200]}") - return None - - except requests.exceptions.Timeout: - elapsed = time.time() - start_time - print(f" ⏰ TIMEOUT ({elapsed:.2f}s) - Endpoint took too long") - return None - except requests.exceptions.ConnectionError: - print(f" 🔌 CONNECTION ERROR - Cannot connect to {self.base_url}") - return None - except Exception as e: - elapsed = time.time() - start_time - print(f" 💥 ERROR ({elapsed:.2f}s) - {str(e)}") - return None - - def test_preload_endpoints(self): - """Test all endpoints used in pre-load check""" - print("\n🔍 Testing Pre-load Check Endpoints") - print("=" * 50) - - # Test database connection - db_result = self.test_endpoint('/check_database') - if not db_result: - print(" ❌ Database check failed - this will cause pre-load issues") - return False - - # Test system status - status_result = self.test_endpoint('/system_status') - if not status_result: - print(" ❌ System status failed - this will cause pre-load issues") - return False - - # Test debug endpoint - debug_result = self.test_endpoint('/debug/preload_test') - if not debug_result: - print(" ❌ Debug endpoint failed - this will cause pre-load issues") - return False - - print(" ✅ All pre-load endpoints working correctly") - return True - - def test_main_endpoints(self): - """Test main application endpoints""" - print("\n📱 Testing Main Application Endpoints") - print("=" * 50) - - # Test main page - main_result = self.test_endpoint('/', expected_status=200) - if not main_result: - print(" ❌ Main page failed") - return False - - # Test photos endpoint - photos_result = self.test_endpoint('/get_photos?tab=all_photos&page=1&per_page=1') - if not photos_result: - print(" ❌ Photos endpoint failed") - return False - - # Test faces endpoint - faces_result = self.test_endpoint('/get_faces?tab=unidentified&page=1&per_page=1') - if not faces_result: - print(" ❌ Faces endpoint failed") - return False - - print(" ✅ All main endpoints working correctly") - return True - - def test_thumbnail_endpoints(self): - """Test thumbnail generation endpoints""" - print("\n🖼️ Testing Thumbnail Endpoints") - print("=" * 50) - - # First get a face ID to test with - faces_result = self.test_endpoint('/get_faces?tab=unidentified&page=1&per_page=1') - if not faces_result or not isinstance(faces_result, dict) or not faces_result.get('faces'): - print(" ⚠️ No faces available for thumbnail testing") - return True - - faces = faces_result.get('faces', []) - if not faces: - print(" ⚠️ No faces available for thumbnail testing") - return True - - face_id = faces[0].get('face_id') - if not face_id: - print(" ⚠️ No valid face ID found for thumbnail testing") - return True - - # Test face thumbnail - thumbnail_result = self.test_endpoint(f'/get_thumbnail/{face_id}') - if not thumbnail_result: - print(" ❌ Face thumbnail endpoint failed") - return False - - # Test photo thumbnail - photos_result = self.test_endpoint('/get_photos?tab=all_photos&page=1&per_page=1') - if photos_result and isinstance(photos_result, dict) and photos_result.get('photos'): - photos = photos_result.get('photos', []) - if photos: - photo_id = photos[0].get('image_id') - if photo_id: - photo_thumbnail_result = self.test_endpoint(f'/get_photo_thumbnail/{photo_id}') - if not photo_thumbnail_result: - print(" ❌ Photo thumbnail endpoint failed") - return False - - print(" ✅ All thumbnail endpoints working correctly") - return True - - def test_performance(self): - """Test endpoint performance""" - print("\n⚡ Performance Testing") - print("=" * 50) - - endpoints = [ - '/check_database', - '/system_status', - '/debug/preload_test', - '/get_photos?tab=all_photos&page=1&per_page=1', - '/get_faces?tab=unidentified&page=1&per_page=1' - ] - - performance_results = {} - - for endpoint in endpoints: - times = [] - for i in range(3): # Test each endpoint 3 times - start_time = time.time() - result = self.test_endpoint(endpoint, timeout=30) - elapsed = time.time() - start_time - times.append(elapsed) - time.sleep(0.5) # Small delay between tests - - avg_time = sum(times) / len(times) - performance_results[endpoint] = { - 'avg_time': avg_time, - 'min_time': min(times), - 'max_time': max(times), - 'success': all(t < 30 for t in times) # All under 30s timeout - } - - status = "✅" if performance_results[endpoint]['success'] else "❌" - print(f" {status} {endpoint}: {avg_time:.2f}s avg ({min(times):.2f}s-{max(times):.2f}s)") - - return performance_results - - def test_browser_simulation(self): - """Simulate browser behavior for pre-load check""" - print("\n🌐 Browser Simulation Test") - print("=" * 50) - - # Simulate the exact pre-load check sequence - checks = [ - { 'name': 'Database Connection', 'endpoint': '/check_database' }, - { 'name': 'System Status', 'endpoint': '/system_status' }, - { 'name': 'Debug Test', 'endpoint': '/debug/preload_test' } - ] - - all_passed = True - - for check in checks: - print(f"Testing {check['name']}...") - result = self.test_endpoint(check['endpoint'], timeout=10) - - if result: - print(f" ✅ {check['name']} passed") - else: - print(f" ❌ {check['name']} failed") - all_passed = False - - if all_passed: - print(" 🎉 All browser simulation tests passed!") - else: - print(" 💥 Some browser simulation tests failed!") - - return all_passed - - def run_all_tests(self): - """Run all tests""" - print("🚀 Starting PunimTag Web API Test Suite") - print("=" * 60) - - # Test server connectivity first - print("\n🔌 Testing Server Connectivity") - print("-" * 30) - try: - response = self.session.get(self.base_url, timeout=5) - print(f"✅ Server is running at {self.base_url}") - except Exception as e: - print(f"❌ Cannot connect to server: {e}") - print("Make sure the server is running with: python simple_web_gui.py") - return False - - # Run all test suites - results = { - 'preload': self.test_preload_endpoints(), - 'main': self.test_main_endpoints(), - 'thumbnails': self.test_thumbnail_endpoints(), - 'performance': self.test_performance(), - 'browser_sim': self.test_browser_simulation() - } - - # Summary - print("\n📊 Test Summary") - print("=" * 60) - - passed = sum(1 for result in results.values() if result) - total = len(results) - - for test_name, result in results.items(): - status = "✅ PASS" if result else "❌ FAIL" - print(f" {status} {test_name.replace('_', ' ').title()}") - - print(f"\nOverall: {passed}/{total} test suites passed") - - if passed == total: - print("🎉 All tests passed! The web API is working correctly.") - print("If the browser is still stuck, the issue might be:") - print(" - Browser cache (try Ctrl+F5)") - print(" - CORS issues (check browser console)") - print(" - JavaScript errors (check browser console)") - else: - print("💥 Some tests failed. Check the output above for details.") - - return passed == total - -def main(): - """Main test runner""" - if len(sys.argv) > 1: - base_url = sys.argv[1] - else: - base_url = "http://127.0.0.1:5000" - - tester = WebAPITester(base_url) - success = tester.run_all_tests() - - sys.exit(0 if success else 1) - -if __name__ == "__main__": - main() \ No newline at end of file