Update project documentation and structure; enhance README, finalize project reorganization, and improve testing standards.

This commit is contained in:
ilia 2025-09-03 17:07:17 -04:00
parent f4a83b3c40
commit 0e66b2253f
56 changed files with 1267 additions and 14713 deletions

View File

@ -1,27 +0,0 @@
# API Development Guidelines
## Response Format
Always use this JSON structure:
```json
{
"success": true,
"data": {
/* response data */
},
"message": "Optional message"
}
```
## Error Handling
- Use proper HTTP status codes
- Include descriptive error messages
- Log errors for debugging
## Database Operations
- Always use parameterized queries
- Handle connection management properly
- Implement rollback on errors

View File

@ -1,68 +0,0 @@
# Database Operations Guidelines
## Connection Management
Always use proper connection management with error handling:
```python
def get_db_connection():
conn = sqlite3.connect('punimtag_simple.db')
conn.row_factory = sqlite3.Row # Enable dict-like access
return conn
# Usage in endpoint
try:
conn = get_db_connection()
cursor = conn.cursor()
# Database operations
conn.commit()
except Exception as e:
conn.rollback()
return jsonify({'success': False, 'error': str(e)}), 500
finally:
conn.close()
```
## Parameterized Queries
Always use parameterized queries to prevent SQL injection:
```python
# Correct - Use parameterized queries
cursor.execute('SELECT * FROM images WHERE id = ?', (image_id,))
cursor.execute('INSERT INTO photos (name, path) VALUES (?, ?)', (name, path))
# Wrong - String concatenation (vulnerable to SQL injection)
cursor.execute(f'SELECT * FROM images WHERE id = {image_id}')
```
## Database Schema
Follow the established schema:
```sql
-- Core tables
images (id, filename, path, date_taken, metadata)
faces (id, image_id, person_id, encoding, coordinates, confidence)
people (id, name, created_date)
tags (id, name)
image_tags (image_id, tag_id)
-- Supporting tables
face_encodings (id, face_id, encoding_data)
photo_metadata (image_id, exif_data, gps_data)
```
## Query Optimization
- Use indexes on frequently queried columns
- Minimize N+1 query problems
- Use LIMIT and OFFSET for pagination
- Consider query performance for large datasets
## Data Validation
- Validate data before database operations
- Check for required fields
- Handle data type conversions properly
- Implement proper error messages

View File

@ -1,130 +0,0 @@
# Face Recognition Guidelines
## Technology Stack
- **dlib**: Primary face detection and recognition library
- **Pillow (PIL)**: Image processing and manipulation
- **NumPy**: Numerical operations for face encodings
- **OpenCV**: Optional for additional image processing
## Face Detection Pipeline
Follow this standardized pipeline:
```python
import dlib
import numpy as np
from PIL import Image
def detect_faces_in_image(image_path: str) -> List[Dict]:
"""
Detect faces in an image using dlib.
Args:
image_path: Path to the image file
Returns:
List of face dictionaries with coordinates and encodings
"""
# Load image
image = dlib.load_rgb_image(image_path)
# Initialize face detector
detector = dlib.get_frontal_face_detector()
# Detect faces
faces = detector(image)
# Get face encodings
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
face_recognizer = dlib.face_recognition_model_v1("dlib_face_recognition_resnet_model_v1.dat")
face_data = []
for face in faces:
# Get facial landmarks
shape = predictor(image, face)
# Get face encoding
face_encoding = np.array(face_recognizer.compute_face_descriptor(image, shape))
face_data.append({
'left': face.left(),
'top': face.top(),
'right': face.right(),
'bottom': face.bottom(),
'encoding': face_encoding.tolist(),
'confidence': calculate_confidence(face_encoding)
})
return face_data
```
## Face Recognition and Matching
Use standardized similarity matching:
```python
def calculate_face_similarity(encoding1: List[float], encoding2: List[float]) -> float:
"""
Calculate similarity between two face encodings using Euclidean distance.
Args:
encoding1: First face encoding
encoding2: Second face encoding
Returns:
Similarity score (0 = identical, higher = more different)
"""
return np.linalg.norm(np.array(encoding1) - np.array(encoding2))
def find_similar_faces(target_encoding: List[float],
face_encodings: List[List[float]],
threshold: float = 0.6) -> List[int]:
"""
Find faces similar to the target encoding.
Args:
target_encoding: Encoding to match against
face_encodings: List of encodings to search
threshold: Similarity threshold (lower = more strict)
Returns:
List of indices of similar faces
"""
similar_faces = []
for i, encoding in enumerate(face_encodings):
similarity = calculate_face_similarity(target_encoding, encoding)
if similarity <= threshold:
similar_faces.append(i)
return similar_faces
```
## Image Processing Best Practices
- **Thumbnail Generation**: Create thumbnails for UI display
- **Memory Management**: Process large images in chunks
- **Format Support**: Handle multiple image formats (JPG, PNG, etc.)
- **Error Handling**: Gracefully handle corrupted images
## Performance Optimization
- **Batch Processing**: Process multiple images efficiently
- **Caching**: Cache face encodings to avoid recomputation
- **GPU Acceleration**: Use CUDA when available for dlib
- **Parallel Processing**: Use multiprocessing for large datasets
## Quality Control
- **Confidence Scoring**: Implement confidence thresholds
- **False Positive Detection**: Filter out non-face detections
- **Face Quality Assessment**: Evaluate face image quality
- **Duplicate Detection**: Identify and handle duplicate faces
## Storage and Retrieval
- **Encoding Storage**: Store face encodings efficiently in database
- **Indexing**: Use appropriate database indexes for fast retrieval
- **Compression**: Consider compression for large encoding datasets
- **Backup**: Regular backup of face recognition data

View File

@ -1,132 +0,0 @@
# JavaScript Conventions
## Code Style
Use ES6+ features and modern JavaScript practices:
```javascript
// Use ES6+ features
const API_BASE_URL = "/api";
const DEFAULT_PAGE_SIZE = 20;
// Async/await for API calls
async function fetchPhotos(page = 1, perPage = DEFAULT_PAGE_SIZE) {
try {
const response = await fetch(
`${API_BASE_URL}/photos?page=${page}&per_page=${perPage}`
);
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const data = await response.json();
return data;
} catch (error) {
console.error("Error fetching photos:", error);
throw error;
}
}
```
## Event Handlers
Use descriptive function names and proper event handling:
```javascript
// Event handlers
function handlePhotoClick(photoId) {
showPhotoDetails(photoId);
}
function handleFaceIdentification(faceId, personName) {
identifyFace(faceId, personName);
}
```
## DOM Manipulation
Use efficient DOM manipulation patterns:
```javascript
// DOM manipulation
function updatePhotoGrid(photos) {
const grid = document.getElementById("photo-grid");
grid.innerHTML = "";
photos.forEach((photo) => {
const photoElement = createPhotoElement(photo);
grid.appendChild(photoElement);
});
}
function createPhotoElement(photo) {
const element = document.createElement("div");
element.className = "photo-card";
element.innerHTML = `
<img src="${photo.thumbnail_url}" alt="${photo.filename}">
<div class="photo-info">
<h3>${photo.filename}</h3>
<p>${photo.date_taken}</p>
</div>
`;
return element;
}
```
## Error Handling
Implement comprehensive error handling:
```javascript
// Global error handler
window.addEventListener("error", (event) => {
console.error("Global error:", event.error);
showErrorMessage("An unexpected error occurred");
});
// API error handling
async function safeApiCall(apiFunction, ...args) {
try {
return await apiFunction(...args);
} catch (error) {
console.error("API call failed:", error);
showErrorMessage("Failed to load data. Please try again.");
return null;
}
}
```
## Progressive Loading
Implement progressive loading for better UX:
```javascript
// Progressive loading with Intersection Observer
const observer = new IntersectionObserver((entries) => {
entries.forEach((entry) => {
if (entry.isIntersecting) {
loadMorePhotos();
}
});
});
// Debouncing for search
function debounce(func, wait) {
let timeout;
return function executedFunction(...args) {
const later = () => {
clearTimeout(timeout);
func(...args);
};
clearTimeout(timeout);
timeout = setTimeout(later, wait);
};
}
```
## Constants and Configuration
- Define constants at the top of files
- Use meaningful names
- Group related constants together

View File

@ -1,433 +0,0 @@
# Performance Optimization Guidelines
## Image Processing Optimization
### Thumbnail Generation
Implement efficient thumbnail generation with caching:
```python
import os
from PIL import Image
from functools import lru_cache
THUMBNAIL_SIZE = (200, 200)
THUMBNAIL_CACHE_DIR = 'thumbnails'
@lru_cache(maxsize=1000)
def generate_thumbnail(image_path: str, size: tuple = THUMBNAIL_SIZE) -> str:
"""
Generate thumbnail with caching.
Args:
image_path: Path to original image
size: Thumbnail size (width, height)
Returns:
Path to generated thumbnail
"""
# Create cache directory if it doesn't exist
os.makedirs(THUMBNAIL_CACHE_DIR, exist_ok=True)
# Generate cache key
cache_key = f"{hash(image_path)}_{size[0]}x{size[1]}.jpg"
cache_path = os.path.join(THUMBNAIL_CACHE_DIR, cache_key)
# Return cached thumbnail if it exists
if os.path.exists(cache_path):
return cache_path
# Generate new thumbnail
with Image.open(image_path) as img:
img.thumbnail(size, Image.Resampling.LANCZOS)
img.save(cache_path, 'JPEG', quality=85, optimize=True)
return cache_path
```
### Progressive Loading
Implement progressive loading for large photo collections:
```python
def get_photos_paginated(page: int = 1, per_page: int = 20) -> Dict[str, any]:
"""
Get photos with pagination for performance.
Args:
page: Page number (1-based)
per_page: Number of photos per page
Returns:
Dictionary with photos and pagination info
"""
offset = (page - 1) * per_page
conn = get_db_connection()
cursor = conn.cursor()
# Get total count
cursor.execute('SELECT COUNT(*) FROM images')
total = cursor.fetchone()[0]
# Get paginated results
cursor.execute('''
SELECT id, filename, path, date_taken
FROM images
ORDER BY date_taken DESC
LIMIT ? OFFSET ?
''', (per_page, offset))
photos = [dict(row) for row in cursor.fetchall()]
conn.close()
return {
'photos': photos,
'pagination': {
'page': page,
'per_page': per_page,
'total': total,
'pages': (total + per_page - 1) // per_page
}
}
```
## Database Optimization
### Indexing Strategy
Create appropriate indexes for frequently queried columns:
```sql
-- Indexes for performance
CREATE INDEX IF NOT EXISTS idx_images_date_taken ON images(date_taken);
CREATE INDEX IF NOT EXISTS idx_faces_image_id ON faces(image_id);
CREATE INDEX IF NOT EXISTS idx_faces_person_id ON faces(person_id);
CREATE INDEX IF NOT EXISTS idx_image_tags_image_id ON image_tags(image_id);
CREATE INDEX IF NOT EXISTS idx_image_tags_tag_id ON image_tags(tag_id);
```
### Query Optimization
Optimize database queries for performance:
```python
def get_photos_with_faces_optimized(page: int = 1, per_page: int = 20) -> Dict[str, any]:
"""
Optimized query to get photos with face counts.
Args:
page: Page number
per_page: Photos per page
Returns:
Photos with face counts
"""
offset = (page - 1) * per_page
conn = get_db_connection()
cursor = conn.cursor()
# Single query with JOIN instead of N+1 queries
cursor.execute('''
SELECT
i.id,
i.filename,
i.path,
i.date_taken,
COUNT(f.id) as face_count
FROM images i
LEFT JOIN faces f ON i.id = f.image_id
GROUP BY i.id, i.filename, i.path, i.date_taken
ORDER BY i.date_taken DESC
LIMIT ? OFFSET ?
''', (per_page, offset))
photos = [dict(row) for row in cursor.fetchall()]
conn.close()
return {'photos': photos}
```
### Connection Pooling
Implement connection pooling for better performance:
```python
import sqlite3
from contextlib import contextmanager
from threading import local
_thread_local = local()
def get_db_connection():
"""Get database connection with thread-local storage."""
if not hasattr(_thread_local, 'connection'):
_thread_local.connection = sqlite3.connect('punimtag_simple.db')
_thread_local.connection.row_factory = sqlite3.Row
return _thread_local.connection
@contextmanager
def db_transaction():
"""Context manager for database transactions."""
conn = get_db_connection()
try:
yield conn
conn.commit()
except Exception:
conn.rollback()
raise
```
## Frontend Performance
### Lazy Loading
Implement lazy loading for images:
```javascript
// Lazy loading with Intersection Observer
function setupLazyLoading() {
const imageObserver = new IntersectionObserver((entries, observer) => {
entries.forEach((entry) => {
if (entry.isIntersecting) {
const img = entry.target;
img.src = img.dataset.src;
img.classList.remove("lazy");
observer.unobserve(img);
}
});
});
// Observe all lazy images
document.querySelectorAll("img[data-src]").forEach((img) => {
imageObserver.observe(img);
});
}
// Progressive loading for photo grid
function loadMorePhotos() {
const currentPage =
parseInt(document.getElementById("photo-grid").dataset.page) || 1;
fetch(`/api/photos?page=${currentPage + 1}&per_page=20`)
.then((response) => response.json())
.then((data) => {
if (data.success && data.data.photos.length > 0) {
appendPhotosToGrid(data.data.photos);
document.getElementById("photo-grid").dataset.page = currentPage + 1;
}
});
}
```
### Debouncing and Throttling
Implement debouncing for search and filtering:
```javascript
// Debounced search function
function debounce(func, wait) {
let timeout;
return function executedFunction(...args) {
const later = () => {
clearTimeout(timeout);
func(...args);
};
clearTimeout(timeout);
timeout = setTimeout(later, wait);
};
}
// Debounced search
const debouncedSearch = debounce((searchTerm) => {
fetch(`/api/photos?search=${encodeURIComponent(searchTerm)}`)
.then((response) => response.json())
.then((data) => {
if (data.success) {
updatePhotoGrid(data.data.photos);
}
});
}, 300);
// Throttled scroll handler
function throttle(func, limit) {
let inThrottle;
return function () {
const args = arguments;
const context = this;
if (!inThrottle) {
func.apply(context, args);
inThrottle = true;
setTimeout(() => (inThrottle = false), limit);
}
};
}
```
## Memory Management
### Image Processing Memory
Optimize memory usage for large images:
```python
def process_large_image_safely(image_path: str) -> Dict[str, any]:
"""
Process large image with memory management.
Args:
image_path: Path to image file
Returns:
Processing results
"""
try:
with Image.open(image_path) as img:
# Convert to RGB if necessary
if img.mode != 'RGB':
img = img.convert('RGB')
# Process in chunks for very large images
if img.size[0] * img.size[1] > 10000000: # 10MP threshold
return process_large_image_in_chunks(img)
else:
return process_image_normal(img)
except Exception as e:
logger.error(f"Error processing image {image_path}: {e}")
return {'error': str(e)}
```
### Database Memory
Optimize database memory usage:
```python
def get_faces_with_encodings_optimized(limit: int = 100) -> List[Dict]:
"""
Get faces with encodings using memory-efficient approach.
Args:
limit: Maximum number of faces to retrieve
Returns:
List of face data
"""
conn = get_db_connection()
cursor = conn.cursor()
# Use generator to avoid loading all data into memory
cursor.execute('''
SELECT id, image_id, person_id, encoding, coordinates
FROM faces
LIMIT ?
''', (limit,))
faces = []
for row in cursor:
face_data = dict(row)
# Convert encoding string back to list if needed
if isinstance(face_data['encoding'], str):
face_data['encoding'] = json.loads(face_data['encoding'])
faces.append(face_data)
conn.close()
return faces
```
## Caching Strategies
### Application-Level Caching
Implement caching for frequently accessed data:
```python
from functools import lru_cache
import time
# Cache for expensive operations
@lru_cache(maxsize=100)
def get_person_photos_cached(person_id: int) -> List[Dict]:
"""Get photos for a person with caching."""
return get_person_photos(person_id)
# Time-based cache
class TimedCache:
def __init__(self, ttl_seconds: int = 300):
self.cache = {}
self.ttl = ttl_seconds
def get(self, key: str):
if key in self.cache:
value, timestamp = self.cache[key]
if time.time() - timestamp < self.ttl:
return value
else:
del self.cache[key]
return None
def set(self, key: str, value: any):
self.cache[key] = (value, time.time())
# Global cache instance
photo_cache = TimedCache(ttl_seconds=300)
```
## Performance Monitoring
### Metrics Collection
Implement performance monitoring:
```python
import time
from functools import wraps
def measure_performance(func):
"""Decorator to measure function performance."""
@wraps(func)
def wrapper(*args, **kwargs):
start_time = time.time()
result = func(*args, **kwargs)
end_time = time.time()
logger.info(f"{func.__name__} took {end_time - start_time:.3f} seconds")
return result
return wrapper
# Usage
@measure_performance
def process_photo_batch(photo_paths: List[str]) -> List[Dict]:
"""Process a batch of photos with performance monitoring."""
results = []
for path in photo_paths:
result = process_single_photo(path)
results.append(result)
return results
```
## Best Practices Summary
### Backend Performance
- **Database Indexing**: Create indexes on frequently queried columns
- **Query Optimization**: Use JOINs instead of N+1 queries
- **Connection Management**: Implement connection pooling
- **Caching**: Cache expensive operations
- **Batch Processing**: Process data in batches
### Frontend Performance
- **Lazy Loading**: Load images and data on demand
- **Debouncing**: Prevent excessive API calls
- **Progressive Loading**: Load data in chunks
- **Image Optimization**: Use appropriate image formats and sizes
### Memory Management
- **Resource Cleanup**: Properly close files and connections
- **Memory Monitoring**: Monitor memory usage
- **Efficient Data Structures**: Use appropriate data structures
- **Garbage Collection**: Help garbage collector with proper cleanup

View File

@ -1,74 +0,0 @@
# Python Coding Conventions
## Code Style (PEP 8)
- Use snake_case for variables and functions
- Use PascalCase for classes
- Use UPPER_CASE for constants
- Follow PEP 8 formatting guidelines
## Type Hints
Always use type hints for function parameters and return values:
```python
from typing import List, Dict, Optional, Union, Tuple
def get_photos(
user_id: int,
page: int = 1,
per_page: int = DEFAULT_PAGE_SIZE,
filters: Optional[Dict[str, any]] = None
) -> Dict[str, Union[List[Dict], int]]:
"""Get photos with pagination and filtering."""
pass
```
## Error Handling
Use comprehensive error handling with logging:
```python
import logging
from typing import Optional
logger = logging.getLogger(__name__)
def safe_operation(func):
"""Decorator for safe operation execution."""
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except Exception as e:
logger.error(f"Error in {func.__name__}: {e}")
return None
return wrapper
```
## Function Documentation
Use detailed docstrings with Args, Returns, and Raises sections:
```python
def process_image(image_path: str, max_size: int = MAX_FILE_SIZE) -> Dict[str, any]:
"""
Process an image file and extract metadata.
Args:
image_path: Path to the image file
max_size: Maximum file size in bytes
Returns:
Dictionary containing image metadata
Raises:
FileNotFoundError: If image file doesn't exist
ValueError: If file size exceeds limit
"""
```
## Constants and Configuration
- Define constants at module level
- Use meaningful names with UPPER_CASE
- Group related constants together

View File

@ -1,280 +0,0 @@
# Security and Privacy Guidelines
## Data Protection Principles
### Local Storage Only
- **No Cloud Dependencies**: All data stays on user's local machine
- **No External APIs**: Face recognition runs locally using dlib
- **No Data Sharing**: User data is never transmitted to external services
### Input Validation
Always validate and sanitize user inputs:
```python
import os
import re
from pathlib import Path
def validate_image_path(image_path: str) -> bool:
"""
Validate image file path for security.
Args:
image_path: Path to validate
Returns:
True if path is valid and secure
"""
# Check for path traversal attempts
if '..' in image_path or '//' in image_path:
return False
# Ensure path is within allowed directory
allowed_dir = Path('/photos')
try:
resolved_path = Path(image_path).resolve()
return allowed_dir in resolved_path.parents
except (ValueError, RuntimeError):
return False
def sanitize_filename(filename: str) -> str:
"""
Sanitize filename to prevent security issues.
Args:
filename: Original filename
Returns:
Sanitized filename
"""
# Remove dangerous characters
filename = re.sub(r'[<>:"/\\|?*]', '_', filename)
# Limit length
if len(filename) > 255:
name, ext = os.path.splitext(filename)
filename = name[:255-len(ext)] + ext
return filename
```
## File Upload Security
Implement secure file upload handling:
```python
ALLOWED_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif', '.bmp'}
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB
def validate_uploaded_file(file) -> Dict[str, any]:
"""
Validate uploaded file for security.
Args:
file: Uploaded file object
Returns:
Validation result with success status and message
"""
# Check file extension
if not file.filename:
return {'success': False, 'error': 'No filename provided'}
file_ext = os.path.splitext(file.filename)[1].lower()
if file_ext not in ALLOWED_EXTENSIONS:
return {'success': False, 'error': f'File type {file_ext} not allowed'}
# Check file size
file.seek(0, os.SEEK_END)
file_size = file.tell()
file.seek(0)
if file_size > MAX_FILE_SIZE:
return {'success': False, 'error': 'File too large'}
# Validate file content (basic check)
try:
from PIL import Image
image = Image.open(file)
image.verify()
file.seek(0)
except Exception:
return {'success': False, 'error': 'Invalid image file'}
return {'success': True, 'message': 'File validated successfully'}
```
## SQL Injection Prevention
Always use parameterized queries:
```python
# Correct - Use parameterized queries
def get_photo_by_id(photo_id: int) -> Optional[Dict]:
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute('SELECT * FROM images WHERE id = ?', (photo_id,))
result = cursor.fetchone()
conn.close()
return dict(result) if result else None
# Wrong - String concatenation (vulnerable to SQL injection)
def get_photo_by_id_unsafe(photo_id: int) -> Optional[Dict]:
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute(f'SELECT * FROM images WHERE id = {photo_id}') # DANGEROUS!
result = cursor.fetchone()
conn.close()
return dict(result) if result else None
```
## Privacy Protection
### Face Data Privacy
- **Local Storage**: Face encodings stored locally only
- **No Sharing**: Face data never transmitted externally
- **User Control**: Users can delete their face data
- **Encryption**: Consider encrypting sensitive face data
### Metadata Handling
- **EXIF Data**: Strip sensitive metadata (GPS, camera info)
- **User Consent**: Ask before storing location data
- **Data Minimization**: Only store necessary metadata
### Access Control
```python
def check_file_access_permissions(file_path: str, user_id: int) -> bool:
"""
Check if user has permission to access file.
Args:
file_path: Path to file
user_id: User ID requesting access
Returns:
True if access is allowed
"""
# In single-user system, all files belong to the user
# In multi-user system, implement proper access control
return True # Simplified for single-user system
```
## Error Handling and Logging
Implement secure error handling:
```python
import logging
from typing import Optional
logger = logging.getLogger(__name__)
def safe_operation(func):
"""Decorator for safe operation execution."""
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except Exception as e:
# Log error without exposing sensitive information
logger.error(f"Error in {func.__name__}: {type(e).__name__}")
return None
return wrapper
def handle_api_error(error: Exception) -> Dict[str, any]:
"""
Handle API errors securely.
Args:
error: Exception that occurred
Returns:
Safe error response
"""
# Don't expose internal error details to users
if isinstance(error, ValueError):
return {'success': False, 'error': 'Invalid input provided'}
elif isinstance(error, FileNotFoundError):
return {'success': False, 'error': 'File not found'}
else:
logger.error(f"Unexpected error: {error}")
return {'success': False, 'error': 'An unexpected error occurred'}
```
## Data Export and Deletion
Provide user control over their data:
```python
def export_user_data(user_id: int) -> Dict[str, any]:
"""
Export user data for portability.
Args:
user_id: User ID to export data for
Returns:
Dictionary containing user's data
"""
conn = get_db_connection()
cursor = conn.cursor()
# Export photos
cursor.execute('SELECT * FROM images WHERE user_id = ?', (user_id,))
photos = [dict(row) for row in cursor.fetchall()]
# Export face data
cursor.execute('SELECT * FROM faces WHERE user_id = ?', (user_id,))
faces = [dict(row) for row in cursor.fetchall()]
conn.close()
return {
'photos': photos,
'faces': faces,
'export_date': datetime.now().isoformat()
}
def delete_user_data(user_id: int) -> bool:
"""
Delete all user data.
Args:
user_id: User ID to delete data for
Returns:
True if deletion successful
"""
try:
conn = get_db_connection()
cursor = conn.cursor()
# Delete user's data
cursor.execute('DELETE FROM faces WHERE user_id = ?', (user_id,))
cursor.execute('DELETE FROM images WHERE user_id = ?', (user_id,))
conn.commit()
conn.close()
return True
except Exception as e:
logger.error(f"Error deleting user data: {e}")
return False
```
## Security Best Practices
- **Regular Updates**: Keep dependencies updated
- **Input Validation**: Validate all user inputs
- **Error Handling**: Don't expose sensitive information in errors
- **Logging**: Log security-relevant events
- **Backup Security**: Secure backup of user data
- **Access Control**: Implement proper access controls

View File

@ -1,169 +0,0 @@
# Testing Standards
## Test Organization
Follow this directory structure:
```
tests/
├── unit/ # Unit tests for individual functions
├── integration/ # Integration tests for API endpoints
├── e2e/ # End-to-end tests for complete workflows
├── fixtures/ # Test data and fixtures
├── utils/ # Test utilities and helpers
└── conftest.py # pytest configuration and shared fixtures
```
## Unit Tests
Test individual functions and classes in isolation:
```python
# tests/unit/test_face_recognition.py
import pytest
from src.utils.face_recognition import detect_faces, encode_face
def test_detect_faces_with_valid_image():
"""Test face detection with a valid image."""
image_path = "tests/fixtures/valid_face.jpg"
faces = detect_faces(image_path)
assert len(faces) > 0
assert all(hasattr(face, 'left') for face in faces)
assert all(hasattr(face, 'top') for face in faces)
def test_detect_faces_with_no_faces():
"""Test face detection with an image containing no faces."""
image_path = "tests/fixtures/no_faces.jpg"
faces = detect_faces(image_path)
assert len(faces) == 0
def test_encode_face_with_valid_face():
"""Test face encoding with a valid face."""
face_image = load_test_face_image()
encoding = encode_face(face_image)
assert len(encoding) == 128
assert all(isinstance(x, float) for x in encoding)
```
## Integration Tests
Test API endpoints and database interactions:
```python
# tests/integration/test_photo_api.py
import pytest
from src.app import app
@pytest.fixture
def client():
"""Create a test client."""
app.config['TESTING'] = True
app.config['DATABASE'] = 'test.db'
with app.test_client() as client:
yield client
def test_get_photos_endpoint(client):
"""Test the GET /photos endpoint."""
response = client.get('/photos')
assert response.status_code == 200
data = response.get_json()
assert data['success'] == True
assert 'photos' in data
def test_create_photo_endpoint(client):
"""Test the POST /photos endpoint."""
photo_data = {
'filename': 'test.jpg',
'path': '/test/path/test.jpg'
}
response = client.post('/photos', json=photo_data)
assert response.status_code == 201
data = response.get_json()
assert data['success'] == True
assert 'photo_id' in data
```
## Test Fixtures
Use fixtures for common test data:
```python
# tests/conftest.py
import pytest
import sqlite3
import tempfile
import os
@pytest.fixture
def test_db():
"""Create a temporary test database."""
db_fd, db_path = tempfile.mkstemp()
# Create test database schema
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE images (
id INTEGER PRIMARY KEY,
filename TEXT NOT NULL,
path TEXT NOT NULL,
date_taken TEXT
)
''')
conn.commit()
conn.close()
yield db_path
# Cleanup
os.close(db_fd)
os.unlink(db_path)
@pytest.fixture
def sample_photo_data():
"""Sample photo data for testing."""
return {
'filename': 'test_photo.jpg',
'path': '/test/path/test_photo.jpg',
'date_taken': '2024-01-01 12:00:00'
}
```
## Test Naming Conventions
- **Unit Tests**: `test_<function_name>_<scenario>.py`
- **Integration Tests**: `test_<feature>_integration.py`
- **E2E Tests**: `test_<workflow>_e2e.py`
## Test Coverage
- Aim for at least 80% code coverage
- Test both success and error scenarios
- Test edge cases and boundary conditions
- Mock external dependencies
## Performance Testing
Test with realistic data sizes:
```python
def test_large_photo_collection_performance():
"""Test performance with large photo collections."""
large_photo_list = generate_test_photos(1000)
start_time = time.time()
result = process_photos(large_photo_list)
end_time = time.time()
assert end_time - start_time < 5.0 # Should complete within 5 seconds
assert len(result) == 1000
```

View File

@ -1,495 +0,0 @@
# PunimTag - Intelligent Photo Management System
## Project Overview
PunimTag is an intelligent photo management system that uses face recognition to automatically organize, tag, and manage personal photo collections. It's built with Flask (Python) and vanilla JavaScript, focusing on privacy-first local processing.
## Core Value Proposition
- **Automatic Face Recognition**: Identify and tag people in photos without manual effort
- **Smart Organization**: Group photos by people, events, and locations
- **Duplicate Detection**: Find and manage duplicate photos automatically
- **Intuitive Interface**: Web-based GUI that's easy to use for non-technical users
- **Privacy-First**: Local processing, no cloud dependencies
## Technology Stack
### Backend
- **Framework**: Flask (Python web framework)
- **Database**: SQLite (lightweight, file-based)
- **Face Recognition**: dlib (C++ library with Python bindings)
- **Image Processing**: Pillow (PIL fork)
- **Data Processing**: NumPy (numerical operations)
### Frontend
- **Language**: Vanilla JavaScript (ES6+)
- **Styling**: CSS3 with Grid/Flexbox
- **HTTP Client**: Fetch API
- **Progressive Loading**: Intersection Observer API
- **No Frameworks**: Pure JavaScript for simplicity
## Project Structure
```
PunimTag/
├── src/ # Main application source code
│ ├── backend/ # Flask backend and API
│ │ ├── app.py # Main Flask application
│ │ ├── db_manager.py # Database operations
│ │ └── visual_identifier.py # Face recognition
│ ├── frontend/ # JavaScript and UI components
│ └── utils/ # Utility functions
│ └── tag_manager.py # Tag management
├── docs/ # Documentation and steering documents
├── tests/ # Test files
├── data/ # Database files and user data
├── config/ # Configuration files
├── scripts/ # Utility scripts
├── assets/ # Static assets
├── photos/ # User photo storage
└── main.py # Application entry point
```
## Key Features
### 1. Photo Management
- Upload and organize photos by date, location, and content
- Automatic metadata extraction (EXIF data, GPS coordinates)
- Batch operations for efficiency
### 2. Face Recognition & Tagging
- Automatic face detection in photos
- Face identification and naming
- Group photos by people
- Handle multiple faces per photo
### 3. Duplicate Management
- Find duplicate photos automatically
- Visual comparison tools
- Bulk removal options
- Keep best quality versions
### 4. Search & Discovery
- Search by person name
- Filter by date ranges
- Tag-based filtering
- Similar face suggestions
### 5. User Experience
- Progressive loading for large collections
- Responsive web interface
- Custom dialogs (no browser alerts)
- Real-time notifications
## Database Schema
```sql
-- Core tables
images (id, filename, path, date_taken, metadata)
faces (id, image_id, person_id, encoding, coordinates, confidence)
people (id, name, created_date)
tags (id, name)
image_tags (image_id, tag_id)
-- Supporting tables
face_encodings (id, face_id, encoding_data)
photo_metadata (image_id, exif_data, gps_data)
```
## API Standards
### Response Format
**Success Response:**
```json
{
"success": true,
"data": {
// Response data here
},
"message": "Optional success message"
}
```
**Error Response:**
```json
{
"success": false,
"error": "Descriptive error message",
"code": "ERROR_CODE_OPTIONAL"
}
```
**Paginated Response:**
```json
{
"success": true,
"data": {
"items": [...],
"pagination": {
"page": 1,
"per_page": 20,
"total": 150,
"pages": 8
}
}
}
```
### HTTP Status Codes
- **200 OK**: Request successful
- **201 Created**: Resource created successfully
- **400 Bad Request**: Invalid request data
- **404 Not Found**: Resource not found
- **500 Internal Server Error**: Server error
### Endpoint Naming Conventions
- **GET /photos**: List photos
- **GET /photos/{id}**: Get specific photo
- **POST /photos**: Create new photo
- **PUT /photos/{id}**: Update photo
- **DELETE /photos/{id}**: Delete photo
- **POST /photos/{id}/identify**: Identify faces in photo
## Python Code Conventions
### Code Style (PEP 8)
```python
# Imports
import os
import sys
from typing import List, Dict, Optional
from flask import Flask, request, jsonify
# Constants
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB
ALLOWED_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif'}
# Functions
def process_image(image_path: str, max_size: int = MAX_FILE_SIZE) -> Dict[str, any]:
"""
Process an image file and extract metadata.
Args:
image_path: Path to the image file
max_size: Maximum file size in bytes
Returns:
Dictionary containing image metadata
Raises:
FileNotFoundError: If image file doesn't exist
ValueError: If file size exceeds limit
"""
if not os.path.exists(image_path):
raise FileNotFoundError(f"Image file not found: {image_path}")
file_size = os.path.getsize(image_path)
if file_size > max_size:
raise ValueError(f"File size {file_size} exceeds limit {max_size}")
# Process the image
metadata = extract_metadata(image_path)
return metadata
```
### Naming Conventions
- **Variables and Functions**: Use snake_case
- **Classes**: Use PascalCase
- **Constants**: Use UPPER_CASE
### Type Hints
```python
from typing import List, Dict, Optional, Union, Tuple
def get_photos(
user_id: int,
page: int = 1,
per_page: int = DEFAULT_PAGE_SIZE,
filters: Optional[Dict[str, any]] = None
) -> Dict[str, Union[List[Dict], int]]:
"""Get photos with pagination and filtering."""
pass
```
### Error Handling
```python
import logging
from typing import Optional
logger = logging.getLogger(__name__)
def safe_operation(func):
"""Decorator for safe operation execution."""
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except Exception as e:
logger.error(f"Error in {func.__name__}: {e}")
return None
return wrapper
```
## Database Operations
### Connection Management
```python
def get_db_connection():
conn = sqlite3.connect('punimtag_simple.db')
conn.row_factory = sqlite3.Row # Enable dict-like access
return conn
# Usage in endpoint
try:
conn = get_db_connection()
cursor = conn.cursor()
# Database operations
conn.commit()
except Exception as e:
conn.rollback()
return jsonify({'success': False, 'error': str(e)}), 500
finally:
conn.close()
```
### Parameterized Queries
```python
# Always use parameterized queries to prevent SQL injection
cursor.execute('SELECT * FROM images WHERE id = ?', (image_id,))
cursor.execute('INSERT INTO photos (name, path) VALUES (?, ?)', (name, path))
```
## Testing Standards
### Test Organization
```
tests/
├── unit/ # Unit tests for individual functions
├── integration/ # Integration tests for API endpoints
├── e2e/ # End-to-end tests for complete workflows
├── fixtures/ # Test data and fixtures
├── utils/ # Test utilities and helpers
└── conftest.py # pytest configuration and shared fixtures
```
### Unit Test Example
```python
# tests/unit/test_face_recognition.py
import pytest
from src.utils.face_recognition import detect_faces, encode_face
def test_detect_faces_with_valid_image():
"""Test face detection with a valid image."""
image_path = "tests/fixtures/valid_face.jpg"
faces = detect_faces(image_path)
assert len(faces) > 0
assert all(hasattr(face, 'left') for face in faces)
assert all(hasattr(face, 'top') for face in faces)
```
### Integration Test Example
```python
# tests/integration/test_photo_api.py
import pytest
from src.app import app
@pytest.fixture
def client():
"""Create a test client."""
app.config['TESTING'] = True
app.config['DATABASE'] = 'test.db'
with app.test_client() as client:
yield client
def test_get_photos_endpoint(client):
"""Test the GET /photos endpoint."""
response = client.get('/photos')
assert response.status_code == 200
data = response.get_json()
assert data['success'] == True
assert 'photos' in data
```
## JavaScript Conventions
### Code Style
```javascript
// Use ES6+ features
const API_BASE_URL = '/api';
const DEFAULT_PAGE_SIZE = 20;
// Async/await for API calls
async function fetchPhotos(page = 1, perPage = DEFAULT_PAGE_SIZE) {
try {
const response = await fetch(`${API_BASE_URL}/photos?page=${page}&per_page=${perPage}`);
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const data = await response.json();
return data;
} catch (error) {
console.error('Error fetching photos:', error);
throw error;
}
}
// Event handlers
function handlePhotoClick(photoId) {
showPhotoDetails(photoId);
}
// DOM manipulation
function updatePhotoGrid(photos) {
const grid = document.getElementById('photo-grid');
grid.innerHTML = '';
photos.forEach(photo => {
const photoElement = createPhotoElement(photo);
grid.appendChild(photoElement);
});
}
```
### Error Handling
```javascript
// Global error handler
window.addEventListener('error', (event) => {
console.error('Global error:', event.error);
showErrorMessage('An unexpected error occurred');
});
// API error handling
async function safeApiCall(apiFunction, ...args) {
try {
return await apiFunction(...args);
} catch (error) {
console.error('API call failed:', error);
showErrorMessage('Failed to load data. Please try again.');
return null;
}
}
```
## Performance Considerations
### Image Processing
- **Thumbnail Generation**: On-demand with caching
- **Face Detection**: Optimized for speed vs accuracy
- **Batch Processing**: Efficient handling of large photo sets
- **Memory Management**: Streaming for large images
### Database Optimization
- **Indexing**: Strategic indexes on frequently queried columns
- **Connection Pooling**: Efficient database connections
- **Query Optimization**: Minimize N+1 query problems
- **Data Archiving**: Move old data to separate tables
### Frontend Performance
- **Progressive Loading**: Load data in chunks
- **Image Lazy Loading**: Load images as they become visible
- **Caching**: Browser caching for static assets
- **Debouncing**: Prevent excessive API calls
## Security Considerations
### Data Protection
- **Local Storage**: No cloud dependencies
- **Input Validation**: Sanitize all user inputs
- **SQL Injection Prevention**: Parameterized queries
- **File Upload Security**: Validate file types and sizes
### Privacy
- **Face Data**: Stored locally, not shared
- **Metadata**: User controls what's stored
- **Access Control**: Local access only
- **Data Export**: User can export/delete their data
## Development Workflow
### Code Organization
- **Modular Design**: Separate concerns into modules
- **Configuration Management**: Environment-based settings
- **Error Handling**: Comprehensive error catching and logging
- **Documentation**: Inline code documentation
### Testing Strategy
- **Unit Tests**: Test individual functions and classes
- **Integration Tests**: Test API endpoints and database operations
- **End-to-End Tests**: Test complete user workflows
- **Performance Tests**: Test with large datasets
## Quick Start Commands
```bash
# Install dependencies
pip install -r requirements.txt
# Run the application
python main.py
# Access the web interface
# http://localhost:5000
# Run tests
python tests/test_main.py
# Run with pytest (if installed)
pytest tests/
```
## Common Development Tasks
### Adding New API Endpoints
1. Follow the API standards for response format
2. Use proper HTTP status codes
3. Implement error handling
4. Add parameterized queries for database operations
5. Write integration tests
### Adding New Features
1. Follow the project structure
2. Use type hints in Python
3. Follow naming conventions
4. Add comprehensive error handling
5. Write tests for new functionality
### Database Changes
1. Use parameterized queries
2. Add proper indexes
3. Handle connection management
4. Implement rollback on errors
5. Update schema documentation
## Troubleshooting
### Common Issues
- **Face Recognition Not Working**: Check dlib installation and CUDA setup
- **Database Errors**: Verify SQLite file permissions and schema
- **Performance Issues**: Check image sizes and database indexes
- **UI Not Loading**: Check browser console for JavaScript errors
### Debug Mode
```python
# Enable debug mode in Flask
app.run(host='0.0.0.0', port=5000, debug=True)
```
## Future Roadmap
- Cloud sync capabilities
- Mobile app companion
- Advanced AI features (emotion detection, age progression)
- Social sharing features
- Integration with existing photo services
## Support and Resources
- Check the steering documents in `docs/`
- Review existing tests in `tests/`
- Check the API standards for endpoint usage
- Follow code conventions for maintainability

14
.gitignore vendored
View File

@ -52,4 +52,16 @@ Thumbs.db
.history/
photos/
photos/
# Photo files and large directories
*.jpg
*.jpeg
*.png
*.gif
*.bmp
*.tiff
*.webp
dlib/
*.dat
*.model

View File

@ -1,66 +0,0 @@
#!/bin/bash
# PunimTag Directory Cleanup Script
# This script safely removes unnecessary files to free up space
echo "🧹 PunimTag Directory Cleanup"
echo "================================"
# 1. Remove Python cache files
echo "📦 Removing Python cache files..."
find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null
find . -name "*.pyc" -delete 2>/dev/null
echo "✅ Python cache cleaned"
# 2. Remove history files (huge space savings)
echo "📚 Removing history files..."
rm -rf .history/
echo "✅ History files cleaned"
# 3. Remove build artifacts
echo "🔨 Removing build artifacts..."
rm -rf dlib/build/
rm -f dlib/CMakeCache.txt
rm -f dlib/CPack*.cmake
echo "✅ Build artifacts cleaned"
# 4. Remove large CUDA packages
echo "🚀 Removing CUDA packages..."
rm -f dlib/cudnn-local-repo-ubuntu2204-9.5.1_1.0-1_amd64.deb
rm -f assets/cuda-repo-wsl-ubuntu-12-6-local_12.6.0-1_amd64.deb
echo "✅ CUDA packages cleaned"
# 5. Remove temporary files
echo "🗑️ Removing temporary files..."
rm -f assets/temp_face_crop_*.jpg
echo "✅ Temporary files cleaned"
# 6. Remove empty database files
echo "🗄️ Checking database files..."
if [ -f "punimtag_simple.db" ] && [ ! -s "punimtag_simple.db" ]; then
echo "⚠️ Found empty database file - removing..."
rm -f punimtag_simple.db
echo "✅ Empty database removed"
else
echo "✅ Database file is valid"
fi
# 7. Optional: Remove old duplicate files
echo "📄 Checking for duplicate files..."
if [ -f "src/backend/punimtag.py" ]; then
echo "⚠️ Found old punimtag.py - consider removing if not needed"
fi
if [ -f "src/backend/web_gui.py" ]; then
echo "⚠️ Found old web_gui.py - consider removing if not needed"
fi
echo ""
echo "🎉 Cleanup Complete!"
echo "📊 Estimated space saved: ~4.7GB+"
echo ""
echo "💡 Remaining files to consider manually:"
echo " - FINAL_STATUS.md (if no longer needed)"
echo " - PROJECT_REORGANIZATION.md (if no longer needed)"
echo " - Old duplicate files in src/backend/"
echo ""
echo "🔍 To see what was removed, check the output above."

View File

@ -1 +0,0 @@

View File

@ -1,173 +0,0 @@
# PunimTag Project Reorganization - Final Status
## ✅ **REORGANIZATION COMPLETE**
The PunimTag project has been successfully reorganized with comprehensive steering documents and a clean, maintainable structure.
## 📊 **Test Results**
```
🧪 Running PunimTag Test Suite
==================================================
✅ Flask app imported successfully
✅ Flask app instance found
✅ Database connection successful
✅ Face recognition module imported successfully
✅ Configuration directory found with 2 files
✅ All required directories exist
✅ All steering documents exist
✅ Main app file found: /mnt/c/Users/admin/Documents/code/PunimTag/src/backend/app.py
✅ Main app file contains Flask app
==================================================
📊 Test Results: 7/7 tests passed
🎉 All tests passed!
```
## 📁 **Final Project Structure**
```
PunimTag/
├── src/ # Main application source code
│ ├── backend/ # Flask backend and API
│ │ ├── app.py # Main Flask application (182KB)
│ │ ├── db_manager.py # Database operations
│ │ ├── visual_identifier.py # Face recognition
│ │ ├── punimtag.py # Legacy app
│ │ ├── punimtag_simple.py # Legacy app
│ │ ├── web_gui.py # Legacy app
│ │ └── __init__.py # Package init
│ ├── frontend/ # JavaScript and UI components
│ │ └── templates/ # HTML templates
│ ├── utils/ # Utility functions
│ │ ├── tag_manager.py # Tag management
│ │ └── __init__.py # Package init
│ └── __init__.py # Package init
├── docs/ # Documentation and steering documents
│ ├── product.md # Product vision and goals
│ ├── structure.md # Project organization
│ ├── tech.md # Technical architecture
│ ├── api-standards.md # API design standards
│ ├── testing-standards.md # Testing guidelines
│ ├── code-conventions.md # Coding standards
│ ├── BACKEND_STATUS.md # Legacy documentation
│ ├── IDEAS.md # Legacy documentation
│ └── TESTING_GUIDE.md # Legacy documentation
├── tests/ # Test files (cleaned up)
│ ├── test_main.py # Main test suite (6KB)
│ ├── conftest.py # Test configuration
│ ├── test_backend.py # Legacy tests
│ ├── test_punimtag.py # Legacy tests
│ ├── test_web_api.py # Legacy tests
│ ├── unit/ # Unit test directory
│ ├── integration/ # Integration test directory
│ ├── e2e/ # End-to-end test directory
│ ├── fixtures/ # Test data directory
│ └── utils/ # Test utilities directory
├── data/ # Database files and user data
│ ├── punimtag_simple.db # Main database (4.4MB)
│ ├── punimtag.db # Legacy database
│ ├── test_backend.db # Test database
│ └── test_basic.db # Test database
├── config/ # Configuration files
│ ├── settings.py # Application settings
│ ├── config.py # Legacy config
│ └── punimtag_config.json
├── scripts/ # Utility scripts
│ ├── cleanup_tests.py # Cleanup script
│ ├── start_gui.py # Legacy script
│ ├── simple_identifier.py # Legacy script
│ ├── interactive_identifier.py # Legacy script
│ └── gui_face_clusters.py # Legacy script
├── assets/ # Static assets
│ ├── temp_face_crop_74280.jpg
│ └── cuda-repo-wsl-ubuntu-12-6-local_12.6.0-1_amd64.deb
├── photos/ # User photo storage
├── venv/ # Virtual environment
├── main.py # Application entry point
├── requirements.txt # Python dependencies
├── README.md # Updated README
├── PROJECT_REORGANIZATION.md # Reorganization summary
├── FINAL_STATUS.md # This file
└── .gitignore # Updated gitignore
```
## 🎯 **Accomplishments**
### ✅ **Files Organized**
- **20+ files moved** to appropriate directories
- **Main application**: `simple_web_gui.py``src/backend/app.py`
- **Database files**: All `.db` files → `data/`
- **Configuration**: `config.py``config/settings.py`
- **Scripts**: Utility scripts → `scripts/`
- **Assets**: Images and files → `assets/`
### ✅ **Redundant Files Cleaned Up**
- **10+ HTML test files** removed (debug*ui.html, test*\*.html)
- **Consolidated tests** into `tests/test_main.py`
- **Clean test directory** with proper structure
### ✅ **Steering Documents Created**
- **6 comprehensive documents** in `docs/`
- **Product vision** and goals
- **Technical architecture** and standards
- **API design** guidelines
- **Testing strategy** and best practices
- **Code conventions** and style guides
### ✅ **Package Structure**
- **Proper Python packages** with `__init__.py` files
- **Clear separation** of concerns
- **Importable modules** from `src/`
### ✅ **Configuration Centralized**
- **Settings management** in `config/settings.py`
- **Environment-based** configuration
- **Database paths** properly configured
## 🚀 **How to Use**
### **Start the Application**
```bash
python main.py
```
### **Run Tests**
```bash
python tests/test_main.py
```
### **Clean Up (if needed)**
```bash
python scripts/cleanup_tests.py
```
## 📚 **For Cursor AI**
The steering documents in `docs/` provide clear guidance for:
- **API Development**: Follow `docs/api-standards.md`
- **Code Quality**: Use `docs/code-conventions.md`
- **Testing**: Implement tests following `docs/testing-standards.md`
- **Architecture**: Reference `docs/tech.md` and `docs/structure.md`
- **Product Decisions**: Review `docs/product.md`
## 🎉 **Status: COMPLETE**
The PunimTag project is now:
- ✅ **Well-organized** with clear structure
- ✅ **Properly documented** with steering documents
- ✅ **Tested and verified** (7/7 tests passing)
- ✅ **Ready for development** with clear guidelines
- ✅ **Scalable** with modular architecture
- ✅ **Maintainable** with consistent conventions
**All objectives achieved!** 🎯

View File

@ -1,206 +0,0 @@
# PunimTag Project Reorganization Summary
## 🎯 Overview
This document summarizes the comprehensive reorganization of the PunimTag project to improve maintainability, documentation, and development workflow.
## 📁 New Project Structure
### Before (Chaotic)
```
PunimTag/
├── simple_web_gui.py (178KB, 4319 lines!)
├── test_*.html (10+ redundant test files)
├── test_*.py (multiple test files)
├── debug_*.html (debug files)
├── Various .py files scattered
└── No clear organization
```
### After (Organized)
```
PunimTag/
├── src/ # Main application source code
│ ├── backend/ # Flask backend and API
│ │ ├── app.py # Main Flask application
│ │ ├── db_manager.py # Database operations
│ │ └── visual_identifier.py # Face recognition
│ ├── frontend/ # JavaScript and UI components
│ └── utils/ # Utility functions
│ └── tag_manager.py # Tag management
├── docs/ # Documentation and steering documents
│ ├── product.md # Product vision and goals
│ ├── structure.md # Project organization
│ ├── tech.md # Technical architecture
│ ├── api-standards.md # API design standards
│ ├── testing-standards.md # Testing guidelines
│ └── code-conventions.md # Coding standards
├── tests/ # Test files
│ ├── test_main.py # Main test suite
│ └── conftest.py # Test configuration
├── data/ # Database files and user data
├── config/ # Configuration files
│ ├── settings.py # Application settings
│ └── punimtag_config.json
├── scripts/ # Utility scripts
├── assets/ # Static assets
├── photos/ # User photo storage
└── main.py # Application entry point
```
## 📚 Steering Documents Created
### 1. Product Vision (`docs/product.md`)
- **Core Value Proposition**: Automatic face recognition, smart organization, duplicate detection
- **Target Users**: Individuals with large photo collections, small businesses
- **Key Features**: Photo management, face recognition, duplicate management, search & discovery
- **Success Metrics**: User engagement, accuracy, performance, usability
- **Future Roadmap**: Cloud sync, mobile app, advanced AI features
### 2. Project Structure (`docs/structure.md`)
- **Directory Organization**: Clear separation of concerns
- **Core Components**: Backend (Flask), Frontend (JavaScript), Data Layer (SQLite)
- **Architecture Principles**: Separation of concerns, progressive enhancement, performance optimization
- **File Naming Conventions**: Consistent naming across Python, JavaScript, and database
- **Dependencies**: Clear technology stack documentation
### 3. Technical Architecture (`docs/tech.md`)
- **Technology Stack**: Flask, SQLite, dlib, Pillow, NumPy
- **Core Technologies**: Face recognition pipeline, database schema, API design
- **Performance Considerations**: Image processing, database optimization, frontend performance
- **Security Considerations**: Data protection, privacy, input validation
- **Scalability**: Current limitations and future scalability options
### 4. API Standards (`docs/api-standards.md`)
- **Response Format**: Consistent JSON response structure
- **HTTP Status Codes**: Proper error handling
- **Endpoint Naming**: RESTful patterns and conventions
- **Request Parameters**: Query parameters and JSON body handling
- **Error Handling**: Standard error handlers and validation
- **Database Operations**: Connection management and parameterized queries
- **Security**: Input sanitization and CORS headers
### 5. Testing Standards (`docs/testing-standards.md`)
- **Test Organization**: Unit, integration, and E2E tests
- **Test Categories**: Comprehensive testing strategy
- **Test Fixtures**: Database and mock fixtures
- **Test Data Management**: Test images and cleanup
- **Performance Testing**: Load testing and benchmarks
- **Code Coverage**: Coverage requirements and reporting
- **Continuous Integration**: GitHub Actions setup
### 6. Code Conventions (`docs/code-conventions.md`)
- **Python Conventions**: PEP 8 compliance, type hints, error handling
- **JavaScript Conventions**: ESLint compliance, async/await, error handling
- **Database Conventions**: Table naming, column naming, index naming
- **File Organization**: Consistent file structure and documentation
- **Documentation Standards**: Function and class documentation
- **Git Conventions**: Commit messages and branch naming
- **Performance Guidelines**: Optimization best practices
- **Security Guidelines**: Input validation and database security
## 🧹 Cleanup Accomplished
### Files Moved
- **Main Application**: `simple_web_gui.py``src/backend/app.py`
- **Database Manager**: `db_manager.py``src/backend/`
- **Face Recognition**: `visual_identifier.py``src/backend/`
- **Tag Manager**: `tag_manager.py``src/utils/`
- **Configuration**: `config.py``config/settings.py`
- **Databases**: All `.db` files → `data/`
- **Scripts**: Utility scripts → `scripts/`
- **Assets**: Images and files → `assets/`
### Files Consolidated
- **Test Files**: 10+ redundant test files → `tests/test_main.py`
- **Debug Files**: Multiple debug HTML files → `tests/` (for reference)
- **Configuration**: Centralized in `config/settings.py`
### Files Created
- **Entry Point**: `main.py` for easy application startup
- **Package Files**: `__init__.py` files for proper Python packages
- **Configuration**: Centralized settings with environment support
- **Documentation**: Comprehensive steering documents
- **Cleanup Script**: `scripts/cleanup_tests.py` for maintenance
## 🎯 Benefits Achieved
### 1. **Maintainability**
- Clear separation of concerns
- Consistent file organization
- Proper Python package structure
- Centralized configuration
### 2. **Documentation**
- Comprehensive steering documents
- Clear development guidelines
- API standards and conventions
- Testing strategy and best practices
### 3. **Development Workflow**
- Easy to find and modify code
- Consistent coding standards
- Proper testing framework
- Clear contribution guidelines
### 4. **Scalability**
- Modular architecture
- Configuration-driven settings
- Proper package structure
- Future-ready organization
### 5. **Quality Assurance**
- Comprehensive testing standards
- Code coverage requirements
- Performance guidelines
- Security best practices
## 🚀 Next Steps
### For Developers
1. **Read the steering documents** in `docs/`
2. **Follow the code conventions** for consistency
3. **Use the organized structure** for new features
4. **Write tests** following the testing standards
### For Cursor AI
1. **Reference steering documents** for development decisions
2. **Follow API standards** for endpoint design
3. **Use code conventions** for consistency
4. **Implement proper testing** for new features
### For Project Maintenance
1. **Run cleanup script**: `python scripts/cleanup_tests.py`
2. **Update documentation** as features evolve
3. **Maintain test coverage** above 80%
4. **Follow git conventions** for commits
## 📊 Impact Summary
- **Files Organized**: 20+ files moved to appropriate directories
- **Documentation Created**: 6 comprehensive steering documents
- **Redundancy Eliminated**: 10+ redundant test files consolidated
- **Standards Established**: Complete development guidelines
- **Maintainability Improved**: Clear structure and conventions
- **Scalability Enhanced**: Modular, configuration-driven architecture
The PunimTag project is now well-organized, properly documented, and ready for scalable development with clear guidelines for both human developers and AI assistants like Cursor.

364
README.md
View File

@ -1,136 +1,294 @@
# PunimTag - Intelligent Photo Management System
# PunimTag CLI - Minimal Photo Face Tagger
A Flask-based photo management system with automatic face recognition, tagging, and duplicate detection.
A simple command-line tool for automatic face recognition and photo tagging. No web interface, no complex dependencies - just the essentials.
## 🚀 Quick Start
```bash
# Install dependencies
pip install -r requirements.txt
# 1. Setup (one time only)
git clone <your-repo>
cd PunimTag
python3 -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
python3 setup.py
# Run the application
python main.py
# 2. Scan photos
python3 photo_tagger.py scan /path/to/your/photos
# Access the web interface
# http://localhost:5000
# 3. Process faces
python3 photo_tagger.py process
# 4. Identify faces interactively
python3 photo_tagger.py identify
# 5. View statistics
python3 photo_tagger.py stats
```
## 📁 Project Structure
## 📦 Installation
### Automatic Setup (Recommended)
```bash
# Clone and setup
git clone <your-repo>
cd PunimTag
# Create virtual environment (IMPORTANT!)
python3 -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
# Run setup script
python3 setup.py
```
**⚠️ IMPORTANT**: Always activate the virtual environment before running any commands:
```bash
source venv/bin/activate # Run this every time you open a new terminal
```
### Manual Setup (Alternative)
```bash
python3 -m venv venv
source venv/bin/activate
pip install -r requirements.txt
python3 photo_tagger.py stats # Creates database
```
## 🎯 Commands
### Scan for Photos
```bash
# Scan a folder
python3 photo_tagger.py scan /path/to/photos
# Scan recursively (recommended)
python3 photo_tagger.py scan /path/to/photos --recursive
```
### Process Photos for Faces
```bash
# Process 50 photos (default)
python3 photo_tagger.py process
# Process 20 photos with CNN model (more accurate)
python3 photo_tagger.py process --limit 20 --model cnn
# Process with HOG model (faster)
python3 photo_tagger.py process --limit 100 --model hog
```
### Identify Faces
```bash
# Identify 20 faces interactively
python3 photo_tagger.py identify
# Identify 10 faces at a time
python3 photo_tagger.py identify --batch 10
```
**Interactive commands during identification:**
- Type person's name to identify
- `s` = skip this face
- `q` = quit
- `list` = show known people
### Add Tags
```bash
# Tag photos matching pattern
python3 photo_tagger.py tag --pattern "vacation"
# Tag any photos
python3 photo_tagger.py tag
```
### Search
```bash
# Find photos with a person
python3 photo_tagger.py search "John"
# Find photos with partial name match
python3 photo_tagger.py search "Joh"
```
### Statistics
```bash
# View database statistics
python3 photo_tagger.py stats
```
## 📊 Example Workflow
```bash
# ALWAYS activate virtual environment first!
source venv/bin/activate
# 1. Scan your photo collection
python3 photo_tagger.py scan ~/Pictures --recursive
# 2. Process photos for faces (start with small batch)
python3 photo_tagger.py process --limit 20
# 3. Check what we found
python3 photo_tagger.py stats
# 4. Identify some faces
python3 photo_tagger.py identify --batch 10
# 5. Search for photos of someone
python3 photo_tagger.py search "Alice"
# 6. Add some tags
python3 photo_tagger.py tag --pattern "birthday"
```
## 🗃️ Database
The tool uses SQLite database (`photos.db` by default) with these tables:
- **photos** - Photo file paths and processing status
- **people** - Known people names
- **faces** - Face encodings and locations
- **tags** - Custom tags for photos
## ⚙️ Configuration
### Face Detection Models
- **hog** - Faster, good for CPU-only systems
- **cnn** - More accurate, requires more processing power
### Database Location
```bash
# Use custom database file
python3 photo_tagger.py scan /photos --db /path/to/my.db
```
## 🔧 System Requirements
### Required System Packages (Ubuntu/Debian)
```bash
sudo apt update
sudo apt install -y cmake build-essential libopenblas-dev liblapack-dev libx11-dev libgtk-3-dev python3-dev python3-venv
```
### Python Dependencies
- `face-recognition` - Face detection and recognition
- `dlib` - Machine learning library
- `pillow` - Image processing
- `numpy` - Numerical operations
- `click` - Command line interface
- `setuptools` - Package management
## 📁 File Structure
```
PunimTag/
├── src/ # Main application source code
│ ├── backend/ # Flask backend and API
│ │ ├── app.py # Main Flask application
│ │ ├── db_manager.py # Database operations
│ │ └── visual_identifier.py # Face recognition
│ ├── frontend/ # JavaScript and UI components
│ └── utils/ # Utility functions
│ └── tag_manager.py # Tag management
├── docs/ # Documentation and steering documents
│ ├── product.md # Product vision and goals
│ ├── structure.md # Project organization
│ ├── tech.md # Technical architecture
│ ├── api-standards.md # API design standards
│ ├── testing-standards.md # Testing guidelines
│ └── code-conventions.md # Coding standards
├── tests/ # Test files
│ ├── test_main.py # Main test suite
│ └── conftest.py # Test configuration
├── data/ # Database files and user data
├── config/ # Configuration files
│ ├── settings.py # Application settings
│ └── punimtag_config.json
├── scripts/ # Utility scripts
├── assets/ # Static assets
├── photos/ # User photo storage
└── main.py # Application entry point
├── photo_tagger.py # Main CLI tool
├── setup.py # Setup script
├── run.sh # Convenience script (auto-activates venv)
├── requirements.txt # Python dependencies
├── README.md # This file
├── venv/ # Virtual environment (created by setup)
├── photos.db # Database (created automatically)
├── data/ # Additional data files
└── logs/ # Log files
```
## 🎯 Key Features
- **Automatic Face Recognition**: Identify and tag people in photos
- **Smart Organization**: Group photos by people, events, and locations
- **Duplicate Detection**: Find and manage duplicate photos automatically
- **Intuitive Interface**: Web-based GUI with progressive loading
- **Privacy-First**: Local processing, no cloud dependencies
## 📚 Documentation
### Steering Documents
- **[Product Vision](docs/product.md)**: Product goals, target users, and roadmap
- **[Project Structure](docs/structure.md)**: Architecture and organization principles
- **[Technical Architecture](docs/tech.md)**: Technology stack and implementation details
- **[API Standards](docs/api-standards.md)**: API design and development guidelines
- **[Testing Standards](docs/testing-standards.md)**: Testing strategy and best practices
- **[Code Conventions](docs/code-conventions.md)**: Coding standards and style guides
### Development Guidelines
1. **Follow the steering documents** for consistent development
2. **Use the organized structure** - place code in appropriate directories
3. **Write tests** following the testing standards
4. **Follow API standards** for all endpoints
5. **Adhere to code conventions** for maintainability
## 🧪 Testing
## 🚨 Troubleshooting
### "externally-managed-environment" Error
**Solution**: Always use a virtual environment!
```bash
# Run the main test suite
python tests/test_main.py
# Run with pytest (if installed)
pytest tests/
python3 -m venv venv
source venv/bin/activate
python3 setup.py
```
## 🔧 Configuration
Configuration is centralized in `config/settings.py`:
- Database paths
- Face recognition settings
- File upload limits
- Thumbnail sizes
## 🚀 Deployment
### Development
### Virtual Environment Not Active
**Problem**: Commands fail or use wrong Python
**Solution**: Always activate the virtual environment:
```bash
python main.py
source venv/bin/activate
# You should see (venv) in your prompt
```
### Production
### dlib Installation Issues
```bash
# Use a WSGI server like Gunicorn
gunicorn -w 4 -b 0.0.0.0:5000 main:app
# Ubuntu/Debian - install system dependencies first
sudo apt-get install build-essential cmake libopenblas-dev
# Then retry setup
source venv/bin/activate
python3 setup.py
```
## 📦 Dependencies
### "Please install face_recognition_models" Warning
This warning is harmless - the application still works correctly. It's a known issue with Python 3.13.
- **Flask**: Web framework
- **SQLite**: Database
- **dlib**: Face recognition
- **Pillow**: Image processing
- **NumPy**: Numerical operations
### Memory Issues
- Use `--model hog` for faster processing
- Process in smaller batches with `--limit 10`
- Close other applications to free memory
### No Faces Found
- Check image quality and lighting
- Ensure faces are clearly visible
- Try `--model cnn` for better detection
## 🎯 What This Tool Does
**Simple**: Single Python file, minimal dependencies
**Fast**: Efficient face detection and recognition
**Private**: Everything runs locally, no cloud services
**Flexible**: Batch processing, interactive identification
**Lightweight**: No web interface overhead
## 🚫 What This Tool Doesn't Do
❌ Web interface (removed for simplicity)
❌ Duplicate detection (can be added later)
❌ Image editing or enhancement
❌ Cloud sync or sharing
❌ Complex ML training
## 📈 Performance Tips
- **Always use virtual environment** to avoid conflicts
- Start with small batches (`--limit 20`) to test
- Use `hog` model for speed, `cnn` for accuracy
- Process photos in smaller folders first
- Identify faces in batches to avoid fatigue
## 🤝 Contributing
1. Read the steering documents in `docs/`
2. Follow the code conventions
3. Write tests for new features
4. Update documentation as needed
This is now a minimal, focused tool. Key principles:
- Keep it simple and fast
- CLI-only interface
- Minimal dependencies
- Clear, readable code
- **Always use python3** commands
## 📄 License
---
This project is licensed under the MIT License.
**Total project size**: ~300 lines of Python code
**Dependencies**: 6 essential packages
**Setup time**: ~5 minutes
**Perfect for**: Batch processing personal photo collections
## 🆘 Support
## 🔄 Common Commands Cheat Sheet
For issues and questions:
```bash
# Setup (one time)
python3 -m venv venv && source venv/bin/activate && python3 setup.py
1. Check the steering documents in `docs/`
2. Review existing tests in `tests/`
3. Check the API standards for endpoint usage
# Daily usage - Option 1: Use run script (automatic venv activation)
./run.sh scan ~/Pictures --recursive
./run.sh process --limit 50
./run.sh identify --batch 10
./run.sh stats
# Daily usage - Option 2: Manual venv activation
source venv/bin/activate
python3 photo_tagger.py scan ~/Pictures --recursive
python3 photo_tagger.py process --limit 50
python3 photo_tagger.py identify --batch 10
python3 photo_tagger.py stats
```

210
REBUILD_SUMMARY.md Normal file
View File

@ -0,0 +1,210 @@
# PunimTag Complete Rebuild - Summary
## 🎯 What We Did
Completely rebuilt PunimTag from a complex web application into a **simple, focused CLI tool** for photo face tagging.
## 📊 Before vs After
### Before (Complex)
```
- 182KB Flask web app (4,365+ lines)
- Complex web interface with embedded HTML/CSS/JS
- Multiple legacy files and dependencies
- Web framework overhead
- Difficult to understand and modify
- Large repository size
```
### After (Simple)
```
- 17KB CLI tool (~400 lines)
- Clean command-line interface
- Minimal dependencies (6 packages)
- No web framework overhead
- Easy to understand and modify
- Small repository size
```
## 🗂️ New Project Structure
```
PunimTag/
├── photo_tagger.py # Main CLI tool (17KB)
├── setup.py # Setup script (3KB)
├── requirements.txt # 6 minimal dependencies
├── README.md # Clear documentation
├── test_basic.py # Basic functionality tests
├── data/ # Database files (not in git)
├── photos/ # User photos (not in git)
└── .gitignore # Excludes large files
```
## 🧹 What We Removed
### Files Deleted
- `src/backend/app.py` (182KB web interface)
- `src/backend/web_gui.py`
- `src/backend/punimtag.py`
- `src/backend/punimtag_simple.py`
- All web frontend files
- Complex documentation
- Test files for web interface
- Configuration files
- Scripts directory
### Dependencies Removed
- `flask` - Web framework
- `opencv-python` - Computer vision (optional)
- `scikit-learn` - Machine learning extras
- All web-related dependencies
## ✅ What We Kept
### Core Functionality
- Face detection and recognition
- Database schema for photos, faces, people, tags
- Batch processing capabilities
- Interactive identification
- Search and statistics
### Essential Dependencies
- `face-recognition` - Core face recognition
- `dlib` - Machine learning backend
- `pillow` - Image processing
- `numpy` - Numerical operations
- `click` - CLI interface
## 🚀 New CLI Commands
```bash
# Scan photos
python photo_tagger.py scan /path/to/photos
# Process faces
python photo_tagger.py process --limit 50
# Identify faces interactively
python photo_tagger.py identify --batch 20
# Add tags
python photo_tagger.py tag --pattern "vacation"
# Search for person
python photo_tagger.py search "John"
# View statistics
python photo_tagger.py stats
```
## 💡 Key Improvements
### Simplicity
- **90% size reduction** - From 182KB to 17KB
- **Single file** - Everything in `photo_tagger.py`
- **Clear workflow** - Scan → Process → Identify → Search
### Performance
- **Faster startup** - No web framework overhead
- **Efficient processing** - Direct face recognition calls
- **Batch operations** - Process photos in manageable chunks
### Usability
- **Better CLI** - Clear commands with help text
- **Interactive identification** - Easy face tagging
- **Progress feedback** - Clear status messages
### Maintainability
- **Readable code** - Well-structured, documented
- **Minimal dependencies** - Easy to install and maintain
- **Focused purpose** - Does one thing well
## 🧪 Testing
### Basic Tests Pass ✅
```
📋 Testing: Database Schema ✅
📋 Testing: CLI Structure ✅
📊 Results: 2/2 tests passed
```
### Ready for Use
- Database schema works correctly
- CLI argument parsing functional
- Code structure is sound
- Dependencies are minimal
## 📦 Installation
### Quick Start
```bash
# 1. Setup
python setup.py
# 2. Use
python photo_tagger.py scan /photos
python photo_tagger.py process
python photo_tagger.py identify
```
### Manual Install
```bash
pip install -r requirements.txt
python photo_tagger.py stats
```
## 🎯 Benefits Achieved
### For Development
- **Easier to understand** - Single focused file
- **Faster to modify** - No complex web interface
- **Simpler testing** - CLI is easier to test
- **Better git workflow** - Small, focused commits
### For Users
- **Faster execution** - No web server overhead
- **Better for batch processing** - CLI is perfect for automation
- **Lower resource usage** - Minimal memory footprint
- **More reliable** - Fewer dependencies, fewer failure points
### For Deployment
- **Smaller repository** - Only essential files
- **Easier installation** - Fewer dependencies
- **Better portability** - Runs anywhere Python runs
- **No security concerns** - No web server to secure
## 🔮 Future Possibilities
The new minimal structure makes it easy to add features:
### Easy Additions
- Export functionality
- Different face detection models
- Batch tagging operations
- Integration with other tools
### Optional Features
- Web interface (if needed later)
- GUI wrapper (tkinter/Qt)
- API endpoints (Flask add-on)
- Cloud sync (separate module)
## 📈 Success Metrics
- **Code size**: 182KB → 17KB (90% reduction)
- **Dependencies**: 15+ → 6 (60% reduction)
- **Complexity**: High → Low
- **Setup time**: ~30min → ~5min
- **Learning curve**: Steep → Gentle
## 🎉 Conclusion
Successfully transformed PunimTag from a complex web application into a **focused, efficient CLI tool** that does exactly what's needed:
**Simple** - Easy to understand and use
**Fast** - Efficient face recognition processing
**Reliable** - Minimal dependencies, fewer failure points
**Maintainable** - Clean code, clear structure
**Portable** - Runs anywhere Python runs
The project is now **ready for development** and **easy to extend** while maintaining its core simplicity and focus.

View File

@ -1,235 +0,0 @@
#!/usr/bin/env python3
"""
Configuration system for PunimTag
Manages settings for face recognition, auto-tagging, and organization-specific defaults
"""
import json
import os
from typing import Dict, Any, List, Optional
from dataclasses import dataclass, asdict
from pathlib import Path
@dataclass
class FaceRecognitionConfig:
"""Face recognition settings"""
confidence_threshold: float = 0.6
face_quality_threshold: float = 0.3
max_face_distance: float = 0.6
min_face_size: int = 80
detection_model: str = 'cnn'
enable_gpu: bool = True
enable_clustering: bool = True
cluster_min_size: int = 3
cluster_epsilon: float = 0.3
@dataclass
class AutoTaggingConfig:
"""Auto-tagging settings"""
enabled: bool = True
tag_seasons: bool = True
tag_locations: bool = True
tag_time_of_day: bool = True
tag_indoor_outdoor: bool = False # Requires additional ML models
confidence_threshold: float = 0.7
@dataclass
class ProcessingConfig:
"""Image processing settings"""
batch_size: int = 100
max_workers: int = 4
create_thumbnails: bool = True
thumbnail_size: tuple = (200, 200)
supported_formats: Optional[List[str]] = None
skip_processed: bool = True
def __post_init__(self):
if self.supported_formats is None:
self.supported_formats = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.gif']
@dataclass
class DatabaseConfig:
"""Database settings"""
backup_enabled: bool = True
backup_interval_hours: int = 24
optimize_on_startup: bool = True
vacuum_on_startup: bool = False
@dataclass
class JewishOrgConfig:
"""Jewish organization specific settings"""
hebrew_calendar_support: bool = True
default_event_tags: Optional[List[str]] = None
default_location_tags: Optional[List[str]] = None
holiday_auto_tagging: bool = True
def __post_init__(self):
if self.default_event_tags is None:
self.default_event_tags = [
'shabbat', 'wedding', 'bar_mitzvah', 'bat_mitzvah', 'brit_milah',
'baby_naming', 'shiva', 'yahrzeit', 'rosh_hashanah', 'yom_kippur',
'sukkot', 'simchat_torah', 'chanukah', 'tu_bishvat', 'purim',
'passover', 'lag_baomer', 'shavuot', 'tisha_bav', 'synagogue_service',
'torah_reading', 'kiddush', 'havdalah', 'community_dinner',
'study_session', 'board_meeting', 'fundraiser', 'youth_group',
'hebrew_school', 'adult_education'
]
if self.default_location_tags is None:
self.default_location_tags = [
'synagogue', 'sanctuary', 'social_hall', 'classroom', 'library',
'kitchen', 'office', 'parking_lot', 'garden', 'sukkah',
'home', 'restaurant', 'community_center', 'school', 'cemetery',
'israel', 'jerusalem', 'tel_aviv', 'haifa', 'safed'
]
class PunimTagConfig:
"""Main configuration class"""
def __init__(self, config_file: str = 'punimtag_config.json'):
self.config_file = config_file
self.face_recognition = FaceRecognitionConfig()
self.auto_tagging = AutoTaggingConfig()
self.processing = ProcessingConfig()
self.database = DatabaseConfig()
self.jewish_org = JewishOrgConfig()
# Load existing config if available
self.load()
def load(self):
"""Load configuration from file"""
if os.path.exists(self.config_file):
try:
with open(self.config_file, 'r', encoding='utf-8') as f:
data = json.load(f)
# Update configurations
if 'face_recognition' in data:
self.face_recognition = FaceRecognitionConfig(**data['face_recognition'])
if 'auto_tagging' in data:
self.auto_tagging = AutoTaggingConfig(**data['auto_tagging'])
if 'processing' in data:
self.processing = ProcessingConfig(**data['processing'])
if 'database' in data:
self.database = DatabaseConfig(**data['database'])
if 'jewish_org' in data:
self.jewish_org = JewishOrgConfig(**data['jewish_org'])
except Exception as e:
print(f"Error loading config: {e}")
print("Using default configuration")
def save(self):
"""Save configuration to file"""
try:
config_data = {
'face_recognition': asdict(self.face_recognition),
'auto_tagging': asdict(self.auto_tagging),
'processing': asdict(self.processing),
'database': asdict(self.database),
'jewish_org': asdict(self.jewish_org)
}
with open(self.config_file, 'w', encoding='utf-8') as f:
json.dump(config_data, f, indent=2, ensure_ascii=False)
except Exception as e:
print(f"Error saving config: {e}")
def reset_to_defaults(self):
"""Reset all settings to defaults"""
self.face_recognition = FaceRecognitionConfig()
self.auto_tagging = AutoTaggingConfig()
self.processing = ProcessingConfig()
self.database = DatabaseConfig()
self.jewish_org = JewishOrgConfig()
def get_tag_suggestions(self, category: str = None) -> List[str]:
"""Get tag suggestions for a category"""
suggestions = {
'event': self.jewish_org.default_event_tags,
'location': self.jewish_org.default_location_tags,
'time': ['morning', 'afternoon', 'evening', 'night'],
'season': ['spring', 'summer', 'fall', 'winter'],
'weather': ['sunny', 'cloudy', 'rainy', 'snowy'],
'group_size': ['solo', 'couple', 'small_group', 'large_group', 'crowd'],
'age_group': ['children', 'youth', 'adults', 'seniors', 'mixed_ages'],
'formality': ['formal', 'casual', 'semiformal', 'religious_attire'],
'activity': ['eating', 'praying', 'studying', 'celebrating', 'socializing',
'ceremony', 'performance', 'sports', 'crafts', 'music']
}
if category:
return suggestions.get(category, [])
# Return all suggestions if no category specified
all_tags = []
for tags in suggestions.values():
all_tags.extend(tags)
return sorted(set(all_tags))
def update_setting(self, section: str, key: str, value: Any):
"""Update a specific setting"""
if hasattr(self, section):
section_obj = getattr(self, section)
if hasattr(section_obj, key):
setattr(section_obj, key, value)
self.save()
return True
return False
def get_setting(self, section: str, key: str, default: Any = None):
"""Get a specific setting value"""
if hasattr(self, section):
section_obj = getattr(self, section)
if hasattr(section_obj, key):
return getattr(section_obj, key)
return default
# Global configuration instance
config = PunimTagConfig()
def get_config() -> PunimTagConfig:
"""Get the global configuration instance"""
return config
def create_default_config(filepath: str = 'punimtag_config.json'):
"""Create a default configuration file"""
config = PunimTagConfig(filepath)
config.save()
return config
if __name__ == "__main__":
# Demo configuration usage
print("PunimTag Configuration Demo")
print("=" * 40)
config = PunimTagConfig()
print("Current face recognition threshold:", config.face_recognition.confidence_threshold)
print("Auto-tagging enabled:", config.auto_tagging.enabled)
print("Batch size:", config.processing.batch_size)
print("\nJewish organization event tags:")
for tag in config.jewish_org.default_event_tags[:10]:
print(f" - {tag}")
print("\nTag suggestions for 'event' category:")
suggestions = config.get_tag_suggestions('event')[:5]
for tag in suggestions:
print(f" - {tag}")
# Save configuration
config.save()
print(f"\nConfiguration saved to {config.config_file}")

View File

@ -1,102 +0,0 @@
{
"face_recognition": {
"confidence_threshold": 0.6,
"face_quality_threshold": 0.3,
"max_face_distance": 0.6,
"min_face_size": 80,
"detection_model": "hog",
"enable_clustering": true,
"cluster_min_size": 3,
"cluster_epsilon": 0.3
},
"auto_tagging": {
"enabled": true,
"tag_seasons": true,
"tag_locations": true,
"tag_time_of_day": true,
"tag_indoor_outdoor": false,
"confidence_threshold": 0.7
},
"processing": {
"batch_size": 100,
"max_workers": 4,
"create_thumbnails": true,
"thumbnail_size": [
200,
200
],
"supported_formats": [
".jpg",
".jpeg",
".png",
".bmp",
".tiff",
".gif"
],
"skip_processed": true
},
"database": {
"backup_enabled": true,
"backup_interval_hours": 24,
"optimize_on_startup": true,
"vacuum_on_startup": false
},
"jewish_org": {
"hebrew_calendar_support": true,
"default_event_tags": [
"shabbat",
"wedding",
"bar_mitzvah",
"bat_mitzvah",
"brit_milah",
"baby_naming",
"shiva",
"yahrzeit",
"rosh_hashanah",
"yom_kippur",
"sukkot",
"simchat_torah",
"chanukah",
"tu_bishvat",
"purim",
"passover",
"lag_baomer",
"shavuot",
"tisha_bav",
"synagogue_service",
"torah_reading",
"kiddush",
"havdalah",
"community_dinner",
"study_session",
"board_meeting",
"fundraiser",
"youth_group",
"hebrew_school",
"adult_education"
],
"default_location_tags": [
"synagogue",
"sanctuary",
"social_hall",
"classroom",
"library",
"kitchen",
"office",
"parking_lot",
"garden",
"sukkah",
"home",
"restaurant",
"community_center",
"school",
"cemetery",
"israel",
"jerusalem",
"tel_aviv",
"haifa",
"safed"
],
"holiday_auto_tagging": true
}
}

View File

@ -1,65 +0,0 @@
"""
PunimTag Configuration Settings
Centralized configuration for the PunimTag application.
"""
import os
from pathlib import Path
# Base directory (project root)
BASE_DIR = Path(__file__).parent.parent
# Data directory
DATA_DIR = BASE_DIR / "data"
PHOTOS_DIR = BASE_DIR / "photos"
# Database paths
DATABASE_PATH = DATA_DIR / "punimtag_simple.db"
TEST_DATABASE_PATH = DATA_DIR / "test_backend.db"
# Ensure directories exist
DATA_DIR.mkdir(exist_ok=True)
PHOTOS_DIR.mkdir(exist_ok=True)
# Flask configuration
class Config:
"""Base configuration class."""
SECRET_KEY = os.environ.get('SECRET_KEY') or 'dev-secret-key-change-in-production'
DATABASE_PATH = str(DATABASE_PATH)
PHOTOS_DIR = str(PHOTOS_DIR)
MAX_CONTENT_LENGTH = 16 * 1024 * 1024 # 16MB max file size
UPLOAD_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif', '.bmp'}
# Face recognition settings
FACE_DETECTION_CONFIDENCE = 0.6
FACE_SIMILARITY_THRESHOLD = 0.6
MAX_FACES_PER_IMAGE = 10
# Thumbnail settings
THUMBNAIL_SIZE = (200, 200)
FACE_THUMBNAIL_SIZE = (120, 120)
class DevelopmentConfig(Config):
"""Development configuration."""
DEBUG = True
TESTING = False
class ProductionConfig(Config):
"""Production configuration."""
DEBUG = False
TESTING = False
class TestingConfig(Config):
"""Testing configuration."""
DEBUG = True
TESTING = True
DATABASE_PATH = str(TEST_DATABASE_PATH)
# Configuration mapping
config = {
'development': DevelopmentConfig,
'production': ProductionConfig,
'testing': TestingConfig,
'default': DevelopmentConfig
}

View File

@ -1,220 +0,0 @@
# PunimTag Backend Development Status
## ✅ Completed Features
### 1. Configuration System (`config.py`)
- **Jewish Organization Specific Settings**: Pre-configured with Jewish holidays, events, and locations
- **Face Recognition Configuration**: Adjustable thresholds, clustering parameters
- **Auto-tagging Settings**: Toggle-able features with confidence thresholds
- **Processing Configuration**: Batch sizes, worker settings, file format support
- **Persistent Settings**: JSON-based configuration file with load/save functionality
**Key Features:**
- 30+ predefined Jewish event tags (shabbat, wedding, bar_mitzvah, chanukah, etc.)
- 15+ location tags (synagogue, sanctuary, sukkah, israel, etc.)
- Configurable face recognition thresholds
- Auto-tagging enable/disable controls
### 2. Enhanced Face Recognition (`punimtag.py` + `punimtag_simple.py`)
- **Face Quality Scoring**: Evaluates face size and encoding variance
- **Advanced Face Clustering**: DBSCAN-based clustering for grouping unknown faces
- **Confidence-based Recognition**: Automatic vs manual identification based on thresholds
- **Multiple Face Angles**: Support for storing multiple encodings per person
**Key Features:**
- Face quality assessment for better training data
- Cluster unknown faces by similarity
- Sort by most frequently photographed people
- Face verification tools for double-checking identifications
### 3. Comprehensive Database Schema
- **Images Table**: Full metadata (GPS, camera info, dimensions, EXIF data)
- **People Table**: Named individuals with creation timestamps
- **Faces Table**: Precise face locations, encodings, confidence scores
- **Tags Table**: Categorized tagging system
- **Image-Tags Relationship**: Many-to-many tagging support
**Performance Optimizations:**
- Database indexes on key relationships
- Efficient foreign key constraints
- Optimized query structures
### 4. Enhanced EXIF Metadata Extraction
- **GPS Coordinates**: Latitude/longitude extraction with hemisphere handling
- **Camera Information**: Make, model, settings
- **Date/Time**: Photo taken timestamp
- **Error Handling**: Graceful fallbacks for missing data (defaults to "N/A")
### 5. Advanced Search Capabilities
- **Multi-criteria Search**: People + tags + dates + location + camera
- **Complex Queries**: Support for min_people requirements
- **Geographic Filtering**: Bounding box searches with GPS coordinates
- **Date Range Filtering**: From/to date searches
- **Result Limiting**: Pagination support
### 6. Batch Processing for Large Collections
- **Configurable Batch Sizes**: Process 5-10k images efficiently
- **Skip Processed Images**: Incremental processing for new photos
- **Progress Tracking**: Real-time status updates
- **Error Handling**: Continue processing despite individual failures
### 7. Face Management Tools
- **Cluster Assignment**: Assign entire face clusters to people
- **Face Verification**: Review all faces assigned to a person
- **Incorrect Assignment Removal**: Fix misidentifications
- **Most Common Faces**: Sort by frequency (most photographed people)
### 8. Jewish Organization Tag Categories
```
Event Tags: shabbat, wedding, bar_mitzvah, bat_mitzvah, brit_milah,
baby_naming, shiva, yahrzeit, rosh_hashanah, yom_kippur,
sukkot, chanukah, purim, passover, etc.
Location Tags: synagogue, sanctuary, social_hall, classroom, library,
kitchen, sukkah, israel, jerusalem, etc.
Activity Tags: praying, studying, celebrating, socializing, ceremony,
performance, eating, etc.
```
## 🧪 Testing Status
### Core Functionality Tests ✅
- ✅ Database creation and schema validation
- ✅ Configuration system load/save
- ✅ People and tag management
- ✅ Basic search functionality
- ✅ EXIF metadata extraction
- ✅ Face encoding storage/retrieval
### Simplified Backend (`punimtag_simple.py`) ✅
- ✅ Working without sklearn dependencies
- ✅ Core face recognition functionality
- ✅ Database operations validated
- ✅ Tag and people management working
- ✅ Search queries functional
### Performance Tests 📋 (Ready for testing)
- **Created but not run**: 1000+ face clustering test
- **Created but not run**: Large dataset search performance
- **Created but not run**: Batch processing with 5-10k images
## 🔧 Technical Implementation
### Dependencies Status
| Package | Status | Purpose |
| ---------------- | ----------- | ------------------------------- |
| face_recognition | ✅ Working | Core face detection/recognition |
| numpy | ✅ Working | Array operations |
| Pillow | ✅ Working | Image processing and EXIF |
| sqlite3 | ✅ Working | Database operations |
| scikit-learn | ⚠️ Optional | Advanced clustering (DBSCAN) |
| opencv-python | ⚠️ Optional | GUI face viewer |
### Performance Optimizations Implemented
1. **Database Indexes**: On faces(person_id), faces(image_id), image_tags
2. **Batch Processing**: Configurable batch sizes (default: 100)
3. **Incremental Processing**: Skip already processed images
4. **Efficient Queries**: Optimized JOIN operations for search
5. **Memory Management**: Process images one at a time
### Error Handling
- ✅ Graceful EXIF extraction failures
- ✅ Missing file handling
- ✅ Database constraint violations
- ✅ Face detection errors
- ✅ Configuration file corruption
## 📊 Current Database Schema
```sql
-- Core tables with relationships
images (id, path, filename, date_taken, latitude, longitude, camera_make, ...)
people (id, name, created_at)
faces (id, image_id, person_id, top, right, bottom, left, encoding, confidence, ...)
tags (id, name, category, created_at)
image_tags (image_id, tag_id, created_at)
-- Indexes for performance
idx_faces_person, idx_faces_image, idx_image_tags_image, idx_image_tags_tag
```
## 🎯 Backend Readiness Assessment
### ✅ Ready for GUI Development
The backend is **production-ready** for GUI development with the following capabilities:
1. **Face Recognition Pipeline**: Complete face detection → encoding → identification
2. **Database Operations**: All CRUD operations for images, people, faces, tags
3. **Search Engine**: Complex multi-criteria search functionality
4. **Jewish Org Features**: Pre-configured with relevant tags and categories
5. **Configuration System**: User-configurable settings
6. **Performance**: Optimized for 5-10k image collections
### 🔄 Next Steps for GUI
1. **Face Clustering Interface**: Visual display of clustered unknown faces
2. **Interactive Identification**: Click-to-identify unknown faces
3. **Search Interface**: Form-based search with filters
4. **Tag Management**: Visual tag assignment and management
5. **Statistics Dashboard**: Charts and graphs of collection data
6. **Face Verification**: Review and correct face assignments
### 📋 Optional Enhancements (Post-GUI)
- [ ] Hebrew calendar integration for automatic holiday tagging
- [ ] Advanced clustering with scikit-learn when available
- [ ] Thumbnail generation for faster GUI loading
- [ ] Export functionality (albums, tagged collections)
- [ ] Import from other photo management systems
## 🚀 Deployment Notes
### For Production Use:
1. **Install Core Dependencies**: `pip install face_recognition pillow numpy`
2. **Optional GUI Dependencies**: `pip install opencv-python scikit-learn`
3. **Create Configuration**: Run `python config.py` to generate default config
4. **Initialize Database**: Run `python punimtag_simple.py` to create tables
5. **Add Photos**: Place images in `photos/` directory
6. **Process Images**: Run the main processing script
### Performance Recommendations:
- **For 1k-5k images**: Use default batch size (100)
- **For 5k-10k images**: Increase batch size to 200-500
- **For 10k+ images**: Consider database optimization and larger batches
## 🏁 Conclusion
**The PunimTag backend is fully functional and ready for GUI development.**
All core requirements have been implemented:
- ✅ Face recognition with identification
- ✅ Complex search capabilities
- ✅ Jewish organization specific features
- ✅ Comprehensive tagging system
- ✅ CRUD interface for all entities
- ✅ Performance optimizations for large collections
- ✅ Configuration system with auto-tagging controls
The system is tested, documented, and ready to support a GUI interface that will provide all the functionality requested in the original requirements.

View File

@ -1,194 +0,0 @@
# PunimTag - Future Enhancement Ideas
## 🎯 Core Improvements
### 1. Enhanced Face Recognition
- **Multi-angle face training**: Store multiple angles of the same person for better recognition
- **Face quality scoring**: Rate face image quality and use only high-quality samples for training
- **Age progression handling**: Account for aging when matching faces across time periods
- **Expression normalization**: Better handle different facial expressions
- **Confidence thresholds**: User-configurable confidence levels for automatic vs manual identification
### 2. Performance Optimizations
- **Incremental processing**: Only process new/modified images
- **Parallel processing**: Use multiprocessing for faster batch operations
- **Face encoding cache**: Cache encodings to avoid recomputation
- **Thumbnail generation**: Create and store thumbnails for faster UI display
- **Database indexing**: Optimize queries with better indexes and query plans
### 3. Advanced Tagging
- **AI-powered auto-tagging**:
- Scene detection (beach, mountain, city, etc.)
- Object detection (cars, pets, food, etc.)
- Activity recognition (eating, sports, working)
- Emotion detection (happy, sad, surprised)
- Indoor/outdoor classification
- **Tag hierarchies**: Parent-child tag relationships (e.g., "vacation" → "beach vacation")
- **Smart tag suggestions**: Based on similar images and user patterns
- **Batch tag operations**: Apply/remove tags from multiple images efficiently
## 🌐 Web Interface
### 1. Modern Web UI
- **React/Vue.js frontend** with responsive design
- **Gallery view** with filtering and sorting
- **Face clustering visualization**: Interactive graph showing face relationships
- **Drag-and-drop uploads**: Easy image addition
- **Real-time updates**: WebSocket for live processing status
### 2. Features
- **Interactive face identification**: Click faces to identify them
- **Tag cloud**: Visual representation of tag frequency
- **Timeline view**: Browse photos chronologically
- **Map view**: Show photos on a map using GPS data
- **Slideshow mode**: With face and tag filters
## 🔗 Integrations
### 1. Cloud Storage
- **Google Photos sync**: Import/export with Google Photos
- **iCloud integration**: Sync with Apple Photos
- **Dropbox/OneDrive**: Monitor folders for new images
- **S3 compatibility**: Store images in cloud storage
### 2. Social Media
- **Facebook integration**: Import tagged faces (with permission)
- **Instagram import**: Bring in photos with hashtags as tags
- **Privacy-aware sharing**: Share photos only with people in them
## 🛡️ Privacy & Security
### 1. Privacy Features
- **Face anonymization**: Blur unidentified faces on export
- **Consent management**: Track consent for face recognition
- **GDPR compliance**: Right to be forgotten, data export
- **Encryption**: Client-side encryption option
- **Access controls**: User/group permissions
### 2. Backup & Recovery
- **Automated backups**: Scheduled database and image backups
- **Version control**: Track changes to face identifications
- **Disaster recovery**: Restore from backups easily
- **Export formats**: Multiple export options (JSON, CSV, etc.)
## 🤖 AI Enhancements
### 1. Advanced ML Features
- **Face clustering improvements**: Use deep learning for better grouping
- **Duplicate detection**: Find and manage similar photos
- **Photo quality assessment**: Identify blurry/poor quality images
- **Automatic album creation**: Group photos by events
- **Style transfer**: Apply artistic filters based on tags
### 2. Natural Language Processing
- **Natural language search**: "Show me beach photos with John from last summer"
- **Voice commands**: Control the app with voice
- **Caption generation**: Auto-generate photo descriptions
- **Story creation**: Generate photo stories/albums automatically
## 🔧 Developer Features
### 1. API & Extensions
- **RESTful API**: Full API for third-party integration
- **GraphQL endpoint**: Flexible data querying
- **Plugin system**: Allow custom extensions
- **Webhook support**: Notify external systems of changes
- **SDK development**: Python/JavaScript SDKs
### 2. Advanced Tools
- **Batch processing CLI**: Command-line tools for power users
- **Migration tools**: Import from other photo management systems
- **Analytics dashboard**: Usage statistics and insights
- **Performance monitoring**: Track system performance
## 📊 Analytics & Insights
### 1. Photo Statistics
- **Face frequency**: Most photographed people
- **Tag analytics**: Most used tags over time
- **Location heatmap**: Where most photos are taken
- **Time patterns**: When photos are typically taken
- **Relationship graphs**: Visualize people connections
### 2. Personal Insights
- **Year in review**: Automated yearly summaries
- **Memory reminders**: "On this day" features
- **Growth tracking**: Watch children grow over time
- **Event detection**: Automatically identify special events
## 🎨 Creative Features
### 1. Photo Enhancement
- **Automatic enhancement**: AI-powered photo improvement
- **Red-eye removal**: Automatic detection and correction
- **Background replacement**: Change photo backgrounds
- **Face beautification**: Optional beauty filters
### 2. Creative Tools
- **Collage generation**: Auto-create collages by tags/people
- **Photo books**: Design and export photo books
- **Video generation**: Create videos from photo sets
- **AR features**: View photos in augmented reality
## 🔮 Future Technologies
### 1. Emerging Tech
- **Blockchain**: Decentralized photo ownership proof
- **IPFS storage**: Distributed photo storage
- **Edge AI**: On-device processing for privacy
- **5G optimization**: Fast mobile sync and processing
### 2. Experimental Features
- **3D face modeling**: Create 3D models from multiple photos
- **Time-lapse generation**: Show aging/changes over time
- **DeepFake detection**: Identify manipulated images
- **Holographic displays**: Future display technology support
## 📋 Implementation Priority
### Phase 1 (Next 3 months)
1. Web UI basic implementation
2. Performance optimizations
3. Better error handling
4. Basic auto-tagging
### Phase 2 (6 months)
1. Mobile PWA
2. Cloud storage integration
3. Advanced search
4. API development
### Phase 3 (1 year)
1. AI enhancements
2. Social integrations
3. Analytics dashboard
4. Plugin system
### Long-term (2+ years)
1. Native mobile apps
2. Blockchain integration
3. AR/VR features
4. Advanced AI features

View File

@ -1,283 +0,0 @@
# PunimTag Testing Guide
## 🧪 Testing with Real Images
### Step 1: Prepare Your Test Images
1. **Create/Use Photos Directory**:
```bash
mkdir -p photos
```
2. **Add Test Images**:
- Copy 10-20 photos with faces to the `photos/` directory
- Supported formats: `.jpg`, `.jpeg`, `.png`, `.bmp`, `.tiff`, `.gif`
- For best results, use photos with clear, front-facing faces
- Include photos with the same people for face recognition testing
3. **Organize by Subdirectories** (optional):
```
photos/
├── events/
│ ├── wedding_2023/
│ └── bar_mitzvah/
├── family/
└── synagogue/
```
### Step 2: Process Images
```bash
# Process all images in photos directory
python punimtag_simple.py
```
This will:
- Scan all images in `photos/` directory (including subdirectories)
- Extract EXIF metadata (GPS, camera info, dates)
- Detect all faces and create encodings
- Store everything in `punimtag_simple.db`
### Step 3: Inspect Results
```bash
# Check what was processed
python db_manager.py
# Choose option 1 to inspect database
```
### Step 4: Identify People (Interactive)
```bash
# Use the CLI face identifier
python interactive_identifier.py
```
This will show you unidentified faces and let you name them.
### Step 5: Add Tags
```bash
# Use the tag manager
python tag_manager.py
```
Add Jewish organization specific tags like:
- Events: `shabbat`, `wedding`, `bar_mitzvah`, `chanukah`
- Locations: `synagogue`, `home`, `israel`
- Activities: `praying`, `celebrating`, `studying`
## 🧹 Database Management
### Clean Database (Keep Schema)
```bash
python db_manager.py
# Choose option 2
```
- Removes all data but keeps tables
- Creates automatic backup first
### Delete Database Completely
```bash
python db_manager.py
# Choose option 3
```
- Deletes entire database file
- Creates automatic backup first
### Inspect Database
```bash
python db_manager.py
# Choose option 1
```
Shows:
- Image/face/people counts
- Top people by frequency
- Most used tags
- Database file size
## 🔍 Testing Search Functionality
### Basic Search Test
```python
from punimtag_simple import SimplePunimTag
tagger = SimplePunimTag()
# Search by person
results = tagger.simple_search(people=["Rabbi Cohen"])
print(f"Found {len(results)} images with Rabbi Cohen")
# Search by tag
results = tagger.simple_search(tags=["wedding"])
print(f"Found {len(results)} wedding images")
# Combined search
results = tagger.simple_search(
people=["Sarah Goldberg"],
tags=["shabbat"]
)
print(f"Found {len(results)} images of Sarah at Shabbat")
tagger.close()
```
## 📊 Performance Testing
### Test with Different Collection Sizes
1. **Small Collection (10-50 images)**:
- Process time: ~1-5 minutes
- Good for initial testing
2. **Medium Collection (100-500 images)**:
- Process time: ~10-30 minutes
- Test face recognition accuracy
3. **Large Collection (1000+ images)**:
- Process time: 1+ hours
- Test batch processing and performance
### Monitor Performance
```python
import time
from punimtag_simple import SimplePunimTag
start_time = time.time()
tagger = SimplePunimTag()
processed = tagger.process_directory()
end_time = time.time()
print(f"Processed {processed} images in {end_time - start_time:.2f} seconds")
tagger.close()
```
## 🎯 Testing Specific Features
### 1. Face Recognition Accuracy
1. Process images with same people
2. Identify some faces manually
3. Process new images with same people
4. Check if they're automatically recognized
### 2. Jewish Organization Tags
```python
from punimtag_simple import SimplePunimTag
from config import get_config
config = get_config()
event_tags = config.get_tag_suggestions('event')
print("Available Jewish event tags:", event_tags[:10])
```
### 3. EXIF Metadata Extraction
```python
from punimtag_simple import SimplePunimTag
tagger = SimplePunimTag()
metadata = tagger.extract_metadata("photos/your_image.jpg")
print("Extracted metadata:", metadata)
tagger.close()
```
### 4. GPS Location Data
- Use photos taken with smartphones (usually have GPS)
- Check if latitude/longitude are extracted
- Test location-based searches
## 🐛 Troubleshooting
### Common Issues
1. **"No faces detected"**:
- Check image quality
- Ensure faces are clearly visible
- Try different lighting conditions
2. **"EXIF data missing"**:
- Some images don't have EXIF data
- System will default to "N/A"
- This is normal behavior
3. **"Face recognition not working"**:
- Need multiple photos of same person
- Faces should be front-facing and clear
- Check confidence threshold in config
4. **"Processing is slow"**:
- Normal for large collections
- Adjust batch size in config
- Consider using smaller test set first
### Debug Mode
```python
# Add debug logging to see what's happening
import logging
logging.basicConfig(level=logging.DEBUG)
from punimtag_simple import SimplePunimTag
tagger = SimplePunimTag()
# ... rest of your code
```
## ✅ Validation Checklist
Before moving to GUI development, validate:
- [ ] Images are processing without errors
- [ ] Faces are being detected correctly
- [ ] EXIF metadata is being extracted
- [ ] People can be identified and assigned
- [ ] Tags can be added and searched
- [ ] Database operations work smoothly
- [ ] Search functionality returns expected results
- [ ] Performance is acceptable for your collection size
## 🔄 Reset for Fresh Testing
```bash
# Clean everything and start fresh
python db_manager.py # Choose option 2 to clean
rm -f punimtag_config.json # Reset config
python config.py # Regenerate default config
```
## 📝 Next Steps After Testing
Once testing is successful:
1. **GUI Development**: Create visual interface
2. **Advanced Features**: Add clustering, verification tools
3. **Performance Optimization**: Fine-tune for your specific needs
## 💡 Testing Tips
1. **Start Small**: Test with 10-20 images first
2. **Use Clear Photos**: Better face detection results
3. **Same People**: Include multiple photos of same people
4. **Variety**: Test different scenarios (indoor/outdoor, events, etc.)
5. **Monitor Progress**: Watch console output during processing
6. **Backup Often**: Use database manager to create backups

View File

@ -1,335 +0,0 @@
# PunimTag API Standards
## Overview
This document defines the standards for designing and implementing API endpoints in PunimTag.
## Response Format
### Success Response
```json
{
"success": true,
"data": {
// Response data here
},
"message": "Optional success message"
}
```
### Error Response
```json
{
"success": false,
"error": "Descriptive error message",
"code": "ERROR_CODE_OPTIONAL"
}
```
### Paginated Response
```json
{
"success": true,
"data": {
"items": [...],
"pagination": {
"page": 1,
"per_page": 20,
"total": 150,
"pages": 8
}
}
}
```
## HTTP Status Codes
### Success Codes
- **200 OK**: Request successful
- **201 Created**: Resource created successfully
- **204 No Content**: Request successful, no content to return
### Client Error Codes
- **400 Bad Request**: Invalid request data
- **401 Unauthorized**: Authentication required
- **403 Forbidden**: Access denied
- **404 Not Found**: Resource not found
- **409 Conflict**: Resource conflict
- **422 Unprocessable Entity**: Validation error
### Server Error Codes
- **500 Internal Server Error**: Server error
- **503 Service Unavailable**: Service temporarily unavailable
## Endpoint Naming Conventions
### RESTful Patterns
- **GET /photos**: List photos
- **GET /photos/{id}**: Get specific photo
- **POST /photos**: Create new photo
- **PUT /photos/{id}**: Update photo
- **DELETE /photos/{id}**: Delete photo
### Custom Actions
- **POST /photos/{id}/identify**: Identify faces in photo
- **POST /photos/{id}/duplicates**: Find duplicates
- **GET /photos/{id}/faces**: Get faces in photo
## Request Parameters
### Query Parameters
```python
# Standard pagination
page = request.args.get('page', 1, type=int)
per_page = request.args.get('per_page', 20, type=int)
# Filtering
filter_name = request.args.get('filter', '')
sort_by = request.args.get('sort', 'date_taken')
sort_order = request.args.get('order', 'desc')
```
### JSON Body Parameters
```python
# Validate required fields
data = request.get_json()
if not data:
return jsonify({'success': False, 'error': 'No JSON data provided'}), 400
required_fields = ['name', 'email']
for field in required_fields:
if field not in data:
return jsonify({'success': False, 'error': f'Missing required field: {field}'}), 400
```
## Error Handling
### Standard Error Handler
```python
@app.errorhandler(404)
def not_found(error):
return jsonify({
'success': False,
'error': 'Resource not found',
'code': 'NOT_FOUND'
}), 404
@app.errorhandler(500)
def internal_error(error):
return jsonify({
'success': False,
'error': 'Internal server error',
'code': 'INTERNAL_ERROR'
}), 500
```
### Validation Errors
```python
def validate_photo_data(data):
errors = []
if 'filename' not in data:
errors.append('filename is required')
if 'path' in data and not os.path.exists(data['path']):
errors.append('file path does not exist')
return errors
# Usage in endpoint
errors = validate_photo_data(data)
if errors:
return jsonify({
'success': False,
'error': 'Validation failed',
'details': errors
}), 422
```
## Database Operations
### Connection Management
```python
def get_db_connection():
conn = sqlite3.connect('punimtag_simple.db')
conn.row_factory = sqlite3.Row # Enable dict-like access
return conn
# Usage in endpoint
try:
conn = get_db_connection()
cursor = conn.cursor()
# Database operations
conn.commit()
except Exception as e:
conn.rollback()
return jsonify({'success': False, 'error': str(e)}), 500
finally:
conn.close()
```
### Parameterized Queries
```python
# Always use parameterized queries to prevent SQL injection
cursor.execute('SELECT * FROM images WHERE id = ?', (image_id,))
cursor.execute('INSERT INTO photos (name, path) VALUES (?, ?)', (name, path))
```
## Rate Limiting
### Basic Rate Limiting
```python
from functools import wraps
import time
def rate_limit(requests_per_minute=60):
def decorator(f):
@wraps(f)
def wrapped(*args, **kwargs):
# Implement rate limiting logic here
return f(*args, **kwargs)
return wrapped
return decorator
# Usage
@app.route('/api/photos')
@rate_limit(requests_per_minute=30)
def get_photos():
# Endpoint implementation
pass
```
## Caching
### Response Caching
```python
from functools import wraps
import hashlib
import json
def cache_response(ttl_seconds=300):
def decorator(f):
@wraps(f)
def wrapped(*args, **kwargs):
# Implement caching logic here
return f(*args, **kwargs)
return wrapped
return decorator
# Usage
@app.route('/api/photos')
@cache_response(ttl_seconds=60)
def get_photos():
# Endpoint implementation
pass
```
## Logging
### Request Logging
```python
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@app.before_request
def log_request():
logger.info(f'{request.method} {request.path} - {request.remote_addr}')
@app.after_request
def log_response(response):
logger.info(f'Response: {response.status_code}')
return response
```
## Security
### Input Sanitization
```python
import re
def sanitize_filename(filename):
# Remove dangerous characters
filename = re.sub(r'[<>:"/\\|?*]', '', filename)
# Limit length
return filename[:255]
def validate_file_type(filename):
allowed_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp'}
ext = os.path.splitext(filename)[1].lower()
return ext in allowed_extensions
```
### CORS Headers
```python
@app.after_request
def add_cors_headers(response):
response.headers['Access-Control-Allow-Origin'] = '*'
response.headers['Access-Control-Allow-Methods'] = 'GET, POST, PUT, DELETE, OPTIONS'
response.headers['Access-Control-Allow-Headers'] = 'Content-Type'
return response
```
## Testing
### Endpoint Testing
```python
def test_get_photos():
response = app.test_client().get('/api/photos')
assert response.status_code == 200
data = json.loads(response.data)
assert data['success'] == True
assert 'data' in data
def test_create_photo():
response = app.test_client().post('/api/photos',
json={'filename': 'test.jpg', 'path': '/test/path'})
assert response.status_code == 201
data = json.loads(response.data)
assert data['success'] == True
```
## Documentation
### Endpoint Documentation
```python
@app.route('/api/photos', methods=['GET'])
def get_photos():
"""
Get a list of photos with optional filtering and pagination.
Query Parameters:
page (int): Page number (default: 1)
per_page (int): Items per page (default: 20)
filter (str): Filter by name or tags
sort (str): Sort field (default: date_taken)
order (str): Sort order (asc/desc, default: desc)
Returns:
JSON response with photos and pagination info
"""
# Implementation
pass
```

View File

@ -1,725 +0,0 @@
# PunimTag Code Conventions
## Overview
This document defines the coding standards and conventions for PunimTag development.
## Python Conventions
### Code Style
Follow PEP 8 with these specific guidelines:
```python
# Imports
import os
import sys
from typing import List, Dict, Optional
from flask import Flask, request, jsonify
# Constants
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB
ALLOWED_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif'}
# Functions
def process_image(image_path: str, max_size: int = MAX_FILE_SIZE) -> Dict[str, any]:
"""
Process an image file and extract metadata.
Args:
image_path: Path to the image file
max_size: Maximum file size in bytes
Returns:
Dictionary containing image metadata
Raises:
FileNotFoundError: If image file doesn't exist
ValueError: If file size exceeds limit
"""
if not os.path.exists(image_path):
raise FileNotFoundError(f"Image file not found: {image_path}")
file_size = os.path.getsize(image_path)
if file_size > max_size:
raise ValueError(f"File size {file_size} exceeds limit {max_size}")
# Process the image
metadata = extract_metadata(image_path)
return metadata
# Classes
class ImageProcessor:
"""Handles image processing operations."""
def __init__(self, config: Dict[str, any]):
"""
Initialize the image processor.
Args:
config: Configuration dictionary
"""
self.config = config
self.supported_formats = config.get('supported_formats', ALLOWED_EXTENSIONS)
def process_batch(self, image_paths: List[str]) -> List[Dict[str, any]]:
"""
Process multiple images in batch.
Args:
image_paths: List of image file paths
Returns:
List of processed image metadata
"""
results = []
for path in image_paths:
try:
result = self.process_single(path)
results.append(result)
except Exception as e:
logger.error(f"Failed to process {path}: {e}")
results.append({'error': str(e), 'path': path})
return results
```
### Naming Conventions
#### Variables and Functions
```python
# Use snake_case for variables and functions
user_name = "john_doe"
photo_count = 150
max_file_size = 10 * 1024 * 1024
def get_user_photos(user_id: int) -> List[Dict]:
"""Get photos for a specific user."""
pass
def calculate_face_similarity(face1: List[float], face2: List[float]) -> float:
"""Calculate similarity between two face encodings."""
pass
```
#### Classes
```python
# Use PascalCase for classes
class PhotoManager:
"""Manages photo operations."""
pass
class FaceRecognitionEngine:
"""Handles face recognition operations."""
pass
```
#### Constants
```python
# Use UPPER_CASE for constants
DATABASE_PATH = "punimtag_simple.db"
MAX_THUMBNAIL_SIZE = (200, 200)
DEFAULT_PAGE_SIZE = 20
```
### Type Hints
```python
from typing import List, Dict, Optional, Union, Tuple
def get_photos(
user_id: int,
page: int = 1,
per_page: int = DEFAULT_PAGE_SIZE,
filters: Optional[Dict[str, any]] = None
) -> Dict[str, Union[List[Dict], int]]:
"""
Get photos with pagination and filtering.
Returns:
Dictionary with 'photos' list and 'total' count
"""
pass
def process_face_encodings(
encodings: List[List[float]]
) -> Tuple[List[float], float]:
"""
Process face encodings and return average encoding and confidence.
Returns:
Tuple of (average_encoding, confidence_score)
"""
pass
```
### Error Handling
```python
import logging
from typing import Optional
logger = logging.getLogger(__name__)
def safe_operation(func):
"""Decorator for safe operation execution."""
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except Exception as e:
logger.error(f"Error in {func.__name__}: {e}")
return None
return wrapper
@safe_operation
def load_image_safely(image_path: str) -> Optional[PIL.Image.Image]:
"""Load image with error handling."""
return PIL.Image.open(image_path)
def process_user_request(user_data: Dict) -> Dict[str, any]:
"""Process user request with comprehensive error handling."""
try:
# Validate input
if not user_data.get('user_id'):
return {'success': False, 'error': 'Missing user_id'}
# Process request
result = perform_operation(user_data)
return {'success': True, 'data': result}
except ValueError as e:
logger.warning(f"Validation error: {e}")
return {'success': False, 'error': str(e)}
except FileNotFoundError as e:
logger.error(f"File not found: {e}")
return {'success': False, 'error': 'File not found'}
except Exception as e:
logger.error(f"Unexpected error: {e}")
return {'success': False, 'error': 'Internal server error'}
```
## JavaScript Conventions
### Code Style
Follow ESLint with these specific guidelines:
```javascript
// Constants
const MAX_FILE_SIZE = 10 * 1024 * 1024; // 10MB
const ALLOWED_EXTENSIONS = [".jpg", ".jpeg", ".png", ".gif"];
// Functions
function processImage(imagePath, maxSize = MAX_FILE_SIZE) {
/**
* Process an image file and extract metadata.
* @param {string} imagePath - Path to the image file
* @param {number} maxSize - Maximum file size in bytes
* @returns {Promise<Object>} Image metadata
*/
return new Promise((resolve, reject) => {
if (!imagePath) {
reject(new Error("Image path is required"));
return;
}
// Process the image
resolve(extractMetadata(imagePath));
});
}
// Classes
class ImageProcessor {
/**
* Handles image processing operations.
* @param {Object} config - Configuration object
*/
constructor(config) {
this.config = config;
this.supportedFormats = config.supportedFormats || ALLOWED_EXTENSIONS;
}
/**
* Process multiple images in batch.
* @param {string[]} imagePaths - Array of image file paths
* @returns {Promise<Object[]>} Array of processed image metadata
*/
async processBatch(imagePaths) {
const results = [];
for (const path of imagePaths) {
try {
const result = await this.processSingle(path);
results.push(result);
} catch (error) {
console.error(`Failed to process ${path}:`, error);
results.push({ error: error.message, path });
}
}
return results;
}
}
```
### Naming Conventions
#### Variables and Functions
```javascript
// Use camelCase for variables and functions
const userName = "johnDoe";
const photoCount = 150;
const maxFileSize = 10 * 1024 * 1024;
function getUserPhotos(userId) {
// Get photos for a specific user
}
function calculateFaceSimilarity(face1, face2) {
// Calculate similarity between two face encodings
}
```
#### Classes
```javascript
// Use PascalCase for classes
class PhotoManager {
// Manages photo operations
}
class FaceRecognitionEngine {
// Handles face recognition operations
}
```
#### Constants
```javascript
// Use UPPER_SNAKE_CASE for constants
const DATABASE_PATH = "punimtag_simple.db";
const MAX_THUMBNAIL_SIZE = { width: 200, height: 200 };
const DEFAULT_PAGE_SIZE = 20;
```
### Error Handling
```javascript
// Async/await with try-catch
async function processUserRequest(userData) {
try {
// Validate input
if (!userData.userId) {
return { success: false, error: "Missing userId" };
}
// Process request
const result = await performOperation(userData);
return { success: true, data: result };
} catch (error) {
console.error("Error processing request:", error);
return { success: false, error: "Internal server error" };
}
}
// Promise-based error handling
function loadImageSafely(imagePath) {
return new Promise((resolve, reject) => {
if (!imagePath) {
reject(new Error("Image path is required"));
return;
}
// Load image logic
resolve(imageData);
}).catch((error) => {
console.error("Error loading image:", error);
return null;
});
}
```
## Database Conventions
### Table Naming
```sql
-- Use snake_case for table names
CREATE TABLE user_profiles (
id INTEGER PRIMARY KEY,
user_name TEXT NOT NULL,
email_address TEXT UNIQUE,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE photo_metadata (
id INTEGER PRIMARY KEY,
image_id INTEGER REFERENCES images(id),
exif_data TEXT,
gps_coordinates TEXT,
processing_status TEXT DEFAULT 'pending'
);
```
### Column Naming
```sql
-- Use snake_case for column names
CREATE TABLE images (
id INTEGER PRIMARY KEY,
file_name TEXT NOT NULL,
file_path TEXT NOT NULL,
file_size INTEGER,
date_taken TIMESTAMP,
upload_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
is_processed BOOLEAN DEFAULT FALSE
);
```
### Index Naming
```sql
-- Use descriptive names for indexes
CREATE INDEX idx_images_date_taken ON images(date_taken);
CREATE INDEX idx_faces_person_id ON faces(person_id);
CREATE INDEX idx_photos_user_id_date ON photos(user_id, date_taken);
```
## File Organization
### Python Files
```python
# File: src/backend/photo_manager.py
"""
Photo management module.
This module handles all photo-related operations including
upload, processing, and metadata extraction.
"""
import os
import logging
from typing import List, Dict, Optional
from PIL import Image
# Constants
MAX_FILE_SIZE = 10 * 1024 * 1024
ALLOWED_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif'}
# Logging
logger = logging.getLogger(__name__)
class PhotoManager:
"""Manages photo operations."""
def __init__(self, config: Dict[str, any]):
self.config = config
self.storage_path = config.get('storage_path', './photos')
def process_photo(self, photo_path: str) -> Dict[str, any]:
"""Process a single photo."""
# Implementation
pass
# Main execution (if applicable)
if __name__ == "__main__":
# Test or standalone execution
pass
```
### JavaScript Files
```javascript
// File: src/frontend/photoManager.js
/**
* Photo management module.
*
* This module handles all photo-related operations including
* upload, processing, and metadata extraction.
*/
// Constants
const MAX_FILE_SIZE = 10 * 1024 * 1024;
const ALLOWED_EXTENSIONS = [".jpg", ".jpeg", ".png", ".gif"];
// Logging
const logger = {
info: (msg) => console.log(`[INFO] ${msg}`),
error: (msg) => console.error(`[ERROR] ${msg}`),
warn: (msg) => console.warn(`[WARN] ${msg}`),
};
class PhotoManager {
/**
* Manages photo operations.
* @param {Object} config - Configuration object
*/
constructor(config) {
this.config = config;
this.storagePath = config.storagePath || "./photos";
}
/**
* Process a single photo.
* @param {string} photoPath - Path to the photo
* @returns {Promise<Object>} Processing result
*/
async processPhoto(photoPath) {
// Implementation
}
}
// Export for module systems
if (typeof module !== "undefined" && module.exports) {
module.exports = PhotoManager;
}
```
## Documentation Standards
### Function Documentation
```python
def extract_face_features(image_path: str, face_coordinates: Tuple[int, int, int, int]) -> List[float]:
"""
Extract face features from an image region.
This function takes an image and face coordinates, then extracts
128-dimensional feature vectors using dlib's face recognition model.
Args:
image_path: Path to the source image file
face_coordinates: Tuple of (left, top, right, bottom) coordinates
Returns:
List of 128 float values representing face features
Raises:
FileNotFoundError: If image file doesn't exist
ValueError: If face coordinates are invalid
RuntimeError: If face recognition model fails
Example:
>>> coords = (100, 100, 200, 200)
>>> features = extract_face_features("photo.jpg", coords)
>>> len(features)
128
"""
pass
```
### Class Documentation
```python
class FaceRecognitionEngine:
"""
Engine for face recognition operations.
This class provides methods for detecting faces in images,
extracting face features, and comparing face similarities.
Attributes:
model_path (str): Path to the face recognition model
confidence_threshold (float): Minimum confidence for face detection
max_faces (int): Maximum number of faces to detect per image
Example:
>>> engine = FaceRecognitionEngine()
>>> faces = engine.detect_faces("group_photo.jpg")
>>> print(f"Found {len(faces)} faces")
"""
def __init__(self, model_path: str = None, confidence_threshold: float = 0.6):
"""
Initialize the face recognition engine.
Args:
model_path: Path to the face recognition model file
confidence_threshold: Minimum confidence for face detection
"""
pass
```
## Testing Conventions
### Test File Structure
```python
# File: tests/unit/test_photo_manager.py
"""
Unit tests for PhotoManager class.
"""
import pytest
from unittest.mock import Mock, patch
from src.backend.photo_manager import PhotoManager
class TestPhotoManager:
"""Test cases for PhotoManager class."""
@pytest.fixture
def photo_manager(self):
"""Create a PhotoManager instance for testing."""
config = {'storage_path': '/test/path'}
return PhotoManager(config)
def test_process_photo_with_valid_file(self, photo_manager):
"""Test processing a valid photo file."""
# Test implementation
pass
def test_process_photo_with_invalid_file(self, photo_manager):
"""Test processing an invalid photo file."""
# Test implementation
pass
```
## Git Conventions
### Commit Messages
```
feat: add face recognition feature
fix: resolve duplicate photo detection issue
docs: update API documentation
test: add unit tests for photo processing
refactor: improve error handling in face detection
style: format code according to PEP 8
perf: optimize thumbnail generation
chore: update dependencies
```
### Branch Naming
```
feature/face-recognition
bugfix/duplicate-detection
hotfix/security-vulnerability
docs/api-documentation
test/photo-processing
refactor/error-handling
```
## Performance Guidelines
### Python Performance
```python
# Use list comprehensions instead of loops when appropriate
# Good
squares = [x**2 for x in range(1000)]
# Avoid
squares = []
for x in range(1000):
squares.append(x**2)
# Use generators for large datasets
def process_large_dataset(file_path):
"""Process large dataset using generator."""
with open(file_path, 'r') as file:
for line in file:
yield process_line(line)
# Use appropriate data structures
from collections import defaultdict, Counter
# Use defaultdict for counting
word_count = defaultdict(int)
for word in words:
word_count[word] += 1
# Use Counter for frequency analysis
word_freq = Counter(words)
```
### JavaScript Performance
```javascript
// Use appropriate array methods
// Good
const squares = Array.from({ length: 1000 }, (_, i) => i ** 2);
// Avoid
const squares = [];
for (let i = 0; i < 1000; i++) {
squares.push(i ** 2);
}
// Use async/await for I/O operations
async function processImages(imagePaths) {
const results = await Promise.all(
imagePaths.map((path) => processImage(path))
);
return results;
}
// Use appropriate data structures
const wordCount = new Map();
words.forEach((word) => {
wordCount.set(word, (wordCount.get(word) || 0) + 1);
});
```
## Security Guidelines
### Input Validation
```python
import re
from pathlib import Path
def validate_filename(filename: str) -> bool:
"""Validate filename for security."""
# Check for dangerous characters
dangerous_chars = r'[<>:"/\\|?*]'
if re.search(dangerous_chars, filename):
return False
# Check for path traversal
if '..' in filename or filename.startswith('/'):
return False
# Check length
if len(filename) > 255:
return False
return True
def sanitize_user_input(user_input: str) -> str:
"""Sanitize user input to prevent injection attacks."""
# Remove HTML tags
import html
sanitized = html.escape(user_input)
# Remove SQL injection patterns
sql_patterns = [';', '--', '/*', '*/', 'union', 'select', 'drop']
for pattern in sql_patterns:
sanitized = sanitized.replace(pattern.lower(), '')
return sanitized
```
### Database Security
```python
# Always use parameterized queries
def get_user_photos(user_id: int):
"""Get photos for a user using parameterized query."""
cursor.execute(
'SELECT * FROM photos WHERE user_id = ?',
(user_id,)
)
return cursor.fetchall()
# Never use string formatting for SQL
# BAD - vulnerable to SQL injection
def bad_get_user_photos(user_id: int):
cursor.execute(f'SELECT * FROM photos WHERE user_id = {user_id}')
return cursor.fetchall()
```

View File

@ -1,69 +0,0 @@
# PunimTag Product Vision
## Overview
PunimTag is an intelligent photo management system that uses face recognition to automatically organize, tag, and manage personal photo collections.
## Core Value Proposition
- **Automatic Face Recognition**: Identify and tag people in photos without manual effort
- **Smart Organization**: Group photos by people, events, and locations
- **Duplicate Detection**: Find and manage duplicate photos automatically
- **Intuitive Interface**: Web-based GUI that's easy to use for non-technical users
- **Privacy-First**: Local processing, no cloud dependencies
## Target Users
- **Primary**: Individuals with large photo collections (families, photographers, content creators)
- **Secondary**: Small businesses needing photo organization (real estate, events, etc.)
## Key Features
### 1. Photo Management
- Upload and organize photos by date, location, and content
- Automatic metadata extraction (EXIF data, GPS coordinates)
- Batch operations for efficiency
### 2. Face Recognition & Tagging
- Automatic face detection in photos
- Face identification and naming
- Group photos by people
- Handle multiple faces per photo
### 3. Duplicate Management
- Find duplicate photos automatically
- Visual comparison tools
- Bulk removal options
- Keep best quality versions
### 4. Search & Discovery
- Search by person name
- Filter by date ranges
- Tag-based filtering
- Similar face suggestions
### 5. User Experience
- Progressive loading for large collections
- Responsive web interface
- Custom dialogs (no browser alerts)
- Real-time notifications
## Success Metrics
- **User Engagement**: Time spent organizing photos
- **Accuracy**: Face recognition precision
- **Performance**: Load times for large collections
- **Usability**: User satisfaction and ease of use
## Future Roadmap
- Cloud sync capabilities
- Mobile app companion
- Advanced AI features (emotion detection, age progression)
- Social sharing features
- Integration with existing photo services

View File

@ -1,109 +0,0 @@
# PunimTag Project Structure
## Directory Organization
```
PunimTag/
├── src/ # Main application source code
│ ├── backend/ # Flask backend and API
│ ├── frontend/ # JavaScript and UI components
│ └── utils/ # Utility functions and helpers
├── docs/ # Documentation and steering documents
├── tests/ # All test files and test utilities
├── data/ # Database files and user data
├── assets/ # Static assets (images, CSS, etc.)
├── config/ # Configuration files
└── scripts/ # Build and deployment scripts
```
## Core Components
### Backend (Flask)
- **Main Application**: `simple_web_gui.py` - Primary Flask app
- **Database Management**: `db_manager.py` - Database operations
- **Face Recognition**: `visual_identifier.py` - Face detection and recognition
- **Configuration**: `config.py` - App configuration
### Frontend (JavaScript)
- **UI Components**: Embedded in Flask templates
- **Progressive Loading**: Handles large photo collections
- **Custom Dialogs**: Replaces browser alerts
- **Face Management**: Face identification and tagging interface
### Data Layer
- **SQLite Database**: `punimtag_simple.db` - Main database
- **Image Storage**: `photos/` directory
- **Thumbnails**: Generated on-demand
- **Face Encodings**: Stored as binary data
## Architecture Principles
### 1. Separation of Concerns
- **Backend**: Business logic, data processing, API endpoints
- **Frontend**: User interface, interactions, state management
- **Data**: Persistent storage, caching, optimization
### 2. Progressive Enhancement
- **Core Functionality**: Works without JavaScript
- **Enhanced Features**: Progressive loading, real-time updates
- **Fallbacks**: Graceful degradation for older browsers
### 3. Performance Optimization
- **Lazy Loading**: Images and data loaded on demand
- **Caching**: Thumbnails and frequently accessed data
- **Batch Operations**: Efficient bulk processing
### 4. User Experience
- **Responsive Design**: Works on all screen sizes
- **Accessibility**: Keyboard navigation, screen reader support
- **Error Handling**: Graceful error recovery and user feedback
## File Naming Conventions
### Python Files
- **snake_case** for file names and functions
- **PascalCase** for classes
- **UPPER_CASE** for constants
### JavaScript Files
- **camelCase** for functions and variables
- **PascalCase** for classes and components
- **kebab-case** for CSS classes
### Database
- **snake_case** for table and column names
- **Descriptive names** that clearly indicate purpose
## Dependencies
### Backend Dependencies
- **Flask**: Web framework
- **SQLite**: Database
- **dlib**: Face recognition
- **Pillow**: Image processing
- **numpy**: Numerical operations
### Frontend Dependencies
- **Vanilla JavaScript**: No external frameworks
- **CSS Grid/Flexbox**: Layout system
- **Fetch API**: HTTP requests
- **Intersection Observer**: Progressive loading
## Configuration Management
- **Environment Variables**: For sensitive data
- **JSON Config Files**: For application settings
- **Database Migrations**: For schema changes
- **Feature Flags**: For experimental features

View File

@ -1,136 +0,0 @@
# PunimTag Technical Architecture
## Technology Stack
### Backend
- **Framework**: Flask (Python web framework)
- **Database**: SQLite (lightweight, file-based)
- **Face Recognition**: dlib (C++ library with Python bindings)
- **Image Processing**: Pillow (PIL fork)
- **Data Processing**: NumPy (numerical operations)
### Frontend
- **Language**: Vanilla JavaScript (ES6+)
- **Styling**: CSS3 with Grid/Flexbox
- **HTTP Client**: Fetch API
- **Progressive Loading**: Intersection Observer API
- **No Frameworks**: Pure JavaScript for simplicity
### Development Tools
- **Version Control**: Git
- **Package Management**: pip (Python), npm (optional for frontend tools)
- **Testing**: pytest (Python), Jest (JavaScript)
- **Code Quality**: flake8, black (Python), ESLint (JavaScript)
## Core Technologies
### Face Recognition Pipeline
1. **Image Loading**: Pillow for image processing
2. **Face Detection**: dlib's CNN face detector
3. **Feature Extraction**: dlib's 128-dimensional face encodings
4. **Similarity Matching**: Euclidean distance calculation
5. **Storage**: Binary encoding storage in SQLite
### Database Schema
```sql
-- Core tables
images (id, filename, path, date_taken, metadata)
faces (id, image_id, person_id, encoding, coordinates, confidence)
people (id, name, created_date)
tags (id, name)
image_tags (image_id, tag_id)
-- Supporting tables
face_encodings (id, face_id, encoding_data)
photo_metadata (image_id, exif_data, gps_data)
```
### API Design
- **RESTful Endpoints**: Standard HTTP methods (GET, POST, DELETE)
- **JSON Responses**: Consistent response format
- **Error Handling**: HTTP status codes with descriptive messages
- **Pagination**: Offset-based for large datasets
## Performance Considerations4
### Image Processing
- **Thumbnail Generation**: On-demand with caching
- **Face Detection**: Optimized for speed vs accuracy
- **Batch Processing**: Efficient handling of large photo sets
- **Memory Management**: Streaming for large images
### Database Optimization
- **Indexing**: Strategic indexes on frequently queried columns
- **Connection Pooling**: Efficient database connections
- **Query Optimization**: Minimize N+1 query problems
- **Data Archiving**: Move old data to separate tables
### Frontend Performance
- **Progressive Loading**: Load data in chunks
- **Image Lazy Loading**: Load images as they become visible
- **Caching**: Browser caching for static assets
- **Debouncing**: Prevent excessive API calls
## Security Considerations
### Data Protection
- **Local Storage**: No cloud dependencies
- **Input Validation**: Sanitize all user inputs
- **SQL Injection Prevention**: Parameterized queries
- **File Upload Security**: Validate file types and sizes
### Privacy
- **Face Data**: Stored locally, not shared
- **Metadata**: User controls what's stored
- **Access Control**: Local access only
- **Data Export**: User can export/delete their data
## Scalability
### Current Limitations
- **Single User**: Designed for personal use
- **Local Storage**: Limited by disk space
- **Processing Power**: CPU-intensive face recognition
- **Memory**: Large photo collections require significant RAM
### Future Scalability
- **Multi-User Support**: Database schema supports multiple users
- **Cloud Integration**: Optional cloud storage and processing
- **Distributed Processing**: GPU acceleration for face recognition
- **Microservices**: Separate services for different functions
## Development Workflow
### Code Organization
- **Modular Design**: Separate concerns into modules
- **Configuration Management**: Environment-based settings
- **Error Handling**: Comprehensive error catching and logging
- **Documentation**: Inline code documentation
### Testing Strategy
- **Unit Tests**: Test individual functions and classes
- **Integration Tests**: Test API endpoints and database operations
- **End-to-End Tests**: Test complete user workflows
- **Performance Tests**: Test with large datasets
### Deployment
- **Local Development**: Flask development server
- **Production**: WSGI server (Gunicorn) with reverse proxy
- **Containerization**: Docker for consistent environments
- **Monitoring**: Logging and health checks

View File

@ -1,531 +0,0 @@
# PunimTag Testing Standards
## Overview
This document defines the standards for writing and organizing tests in PunimTag.
## Test Organization
### Directory Structure
```
tests/
├── unit/ # Unit tests for individual functions
├── integration/ # Integration tests for API endpoints
├── e2e/ # End-to-end tests for complete workflows
├── fixtures/ # Test data and fixtures
├── utils/ # Test utilities and helpers
└── conftest.py # pytest configuration and shared fixtures
```
### Test File Naming
- **Unit Tests**: `test_<module_name>.py`
- **Integration Tests**: `test_<feature>_integration.py`
- **E2E Tests**: `test_<workflow>_e2e.py`
- **Test Utilities**: `test_<utility_name>.py`
## Test Categories
### Unit Tests
Test individual functions and classes in isolation.
```python
# tests/unit/test_face_recognition.py
import pytest
from src.utils.face_recognition import detect_faces, encode_face
def test_detect_faces_with_valid_image():
"""Test face detection with a valid image."""
image_path = "tests/fixtures/valid_face.jpg"
faces = detect_faces(image_path)
assert len(faces) > 0
assert all(hasattr(face, 'left') for face in faces)
assert all(hasattr(face, 'top') for face in faces)
def test_detect_faces_with_no_faces():
"""Test face detection with an image containing no faces."""
image_path = "tests/fixtures/no_faces.jpg"
faces = detect_faces(image_path)
assert len(faces) == 0
def test_encode_face_with_valid_face():
"""Test face encoding with a valid face."""
face_image = load_test_face_image()
encoding = encode_face(face_image)
assert len(encoding) == 128
assert all(isinstance(x, float) for x in encoding)
```
### Integration Tests
Test API endpoints and database interactions.
```python
# tests/integration/test_photo_api.py
import pytest
from src.app import app
@pytest.fixture
def client():
"""Create a test client."""
app.config['TESTING'] = True
app.config['DATABASE'] = 'test.db'
with app.test_client() as client:
yield client
def test_get_photos_endpoint(client):
"""Test the GET /photos endpoint."""
response = client.get('/photos')
assert response.status_code == 200
data = response.get_json()
assert data['success'] == True
assert 'photos' in data
def test_create_photo_endpoint(client):
"""Test the POST /photos endpoint."""
photo_data = {
'filename': 'test.jpg',
'path': '/test/path/test.jpg'
}
response = client.post('/photos', json=photo_data)
assert response.status_code == 201
data = response.get_json()
assert data['success'] == True
assert 'photo_id' in data
def test_get_photo_not_found(client):
"""Test getting a non-existent photo."""
response = client.get('/photos/99999')
assert response.status_code == 404
data = response.get_json()
assert data['success'] == False
assert 'error' in data
```
### End-to-End Tests
Test complete user workflows.
```python
# tests/e2e/test_photo_workflow.py
import pytest
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
@pytest.fixture
def driver():
"""Create a web driver for E2E tests."""
driver = webdriver.Chrome()
driver.implicitly_wait(10)
yield driver
driver.quit()
def test_upload_and_identify_photo(driver):
"""Test the complete workflow of uploading and identifying a photo."""
# Navigate to the app
driver.get("http://localhost:5000")
# Upload a photo
file_input = driver.find_element(By.ID, "photo-upload")
file_input.send_keys("tests/fixtures/test_photo.jpg")
# Wait for upload to complete
WebDriverWait(driver, 30).until(
EC.presence_of_element_located((By.CLASS_NAME, "photo-card"))
)
# Click on the photo to open details
photo_card = driver.find_element(By.CLASS_NAME, "photo-card")
photo_card.click()
# Wait for photo details to load
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "photoDetails"))
)
# Verify faces are detected
faces = driver.find_elements(By.CLASS_NAME, "face-item")
assert len(faces) > 0
# Identify a face
face_input = driver.find_element(By.CLASS_NAME, "face-name-input")
face_input.send_keys("Test Person")
identify_button = driver.find_element(By.CLASS_NAME, "identify-face-btn")
identify_button.click()
# Verify identification
WebDriverWait(driver, 10).until(
EC.text_to_be_present_in_element((By.CLASS_NAME, "face-name"), "Test Person")
)
```
## Test Fixtures
### Database Fixtures
```python
# tests/conftest.py
import pytest
import sqlite3
import tempfile
import os
@pytest.fixture
def test_db():
"""Create a temporary test database."""
db_fd, db_path = tempfile.mkstemp()
# Create test database schema
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE images (
id INTEGER PRIMARY KEY,
filename TEXT NOT NULL,
path TEXT NOT NULL,
date_taken TEXT
)
''')
cursor.execute('''
CREATE TABLE faces (
id INTEGER PRIMARY KEY,
image_id INTEGER,
person_id INTEGER,
encoding BLOB,
left INTEGER,
top INTEGER,
right INTEGER,
bottom INTEGER
)
''')
conn.commit()
conn.close()
yield db_path
# Cleanup
os.close(db_fd)
os.unlink(db_path)
@pytest.fixture
def sample_photos(test_db):
"""Add sample photos to the test database."""
conn = sqlite3.connect(test_db)
cursor = conn.cursor()
photos = [
('photo1.jpg', '/test/path/photo1.jpg', '2023-01-01'),
('photo2.jpg', '/test/path/photo2.jpg', '2023-01-02'),
('photo3.jpg', '/test/path/photo3.jpg', '2023-01-03')
]
cursor.executemany(
'INSERT INTO images (filename, path, date_taken) VALUES (?, ?, ?)',
photos
)
conn.commit()
conn.close()
return photos
```
### Mock Fixtures
```python
# tests/conftest.py
import pytest
from unittest.mock import Mock, patch
@pytest.fixture
def mock_face_recognition():
"""Mock face recognition functions."""
with patch('src.utils.face_recognition.detect_faces') as mock_detect:
with patch('src.utils.face_recognition.encode_face') as mock_encode:
mock_detect.return_value = [
Mock(left=100, top=100, right=200, bottom=200)
]
mock_encode.return_value = [0.1] * 128
yield {
'detect': mock_detect,
'encode': mock_encode
}
@pytest.fixture
def mock_file_system():
"""Mock file system operations."""
with patch('os.path.exists') as mock_exists:
with patch('os.path.getsize') as mock_size:
mock_exists.return_value = True
mock_size.return_value = 1024 * 1024 # 1MB
yield {
'exists': mock_exists,
'size': mock_size
}
```
## Test Data Management
### Test Images
```python
# tests/fixtures/test_images.py
import os
from PIL import Image
import numpy as np
def create_test_image(width=100, height=100, filename="test.jpg"):
"""Create a test image for testing."""
# Create a simple test image
image = Image.new('RGB', (width, height), color='red')
# Add a simple face-like pattern
pixels = np.array(image)
# Draw a simple face outline
pixels[30:70, 40:60] = [255, 255, 255] # White face
pixels[40:50, 45:55] = [0, 0, 0] # Black eyes
test_image = Image.fromarray(pixels)
test_path = f"tests/fixtures/{filename}"
test_image.save(test_path)
return test_path
def cleanup_test_images():
"""Clean up test images."""
fixture_dir = "tests/fixtures"
for file in os.listdir(fixture_dir):
if file.endswith(('.jpg', '.png', '.jpeg')):
os.remove(os.path.join(fixture_dir, file))
```
## Performance Testing
### Load Testing
```python
# tests/performance/test_load.py
import pytest
import time
import concurrent.futures
from src.app import app
def test_concurrent_photo_requests():
"""Test handling multiple concurrent photo requests."""
client = app.test_client()
def make_request():
return client.get('/photos?page=1&per_page=20')
# Make 10 concurrent requests
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
futures = [executor.submit(make_request) for _ in range(10)]
responses = [future.result() for future in futures]
# All requests should succeed
for response in responses:
assert response.status_code == 200
# Check response times
start_time = time.time()
for _ in range(5):
client.get('/photos?page=1&per_page=20')
end_time = time.time()
avg_time = (end_time - start_time) / 5
assert avg_time < 1.0 # Should respond within 1 second
def test_large_photo_collection():
"""Test performance with a large photo collection."""
# This would require setting up a large test dataset
pass
```
## Test Configuration
### pytest Configuration
```ini
# pytest.ini
[tool:pytest]
testpaths = tests
python_files = test_*.py
python_classes = Test*
python_functions = test_*
addopts =
-v
--tb=short
--strict-markers
--disable-warnings
markers =
unit: Unit tests
integration: Integration tests
e2e: End-to-end tests
slow: Slow running tests
performance: Performance tests
```
### Test Environment Variables
```python
# tests/conftest.py
import os
@pytest.fixture(autouse=True)
def test_environment():
"""Set up test environment variables."""
os.environ['TESTING'] = 'true'
os.environ['DATABASE_PATH'] = 'test.db'
os.environ['PHOTOS_DIR'] = 'tests/fixtures/photos'
yield
# Cleanup
if 'TESTING' in os.environ:
del os.environ['TESTING']
```
## Code Coverage
### Coverage Configuration
```ini
# .coveragerc
[run]
source = src
omit =
*/tests/*
*/venv/*
*/__pycache__/*
*/migrations/*
[report]
exclude_lines =
pragma: no cover
def __repr__
raise AssertionError
raise NotImplementedError
if 0:
if __name__ == .__main__.:
```
### Coverage Testing
```python
# tests/test_coverage.py
import pytest
import coverage
def test_code_coverage():
"""Ensure code coverage meets minimum requirements."""
cov = coverage.Coverage()
cov.start()
# Run the application
from src.app import app
client = app.test_client()
client.get('/photos')
cov.stop()
cov.save()
# Generate coverage report
cov.report()
# Check coverage percentage
total_coverage = cov.report()
assert total_coverage >= 80.0 # Minimum 80% coverage
```
## Continuous Integration
### GitHub Actions
```yaml
# .github/workflows/test.yml
name: Tests
on: [push, pull_request]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install pytest pytest-cov
- name: Run tests
run: |
pytest tests/ --cov=src --cov-report=xml
- name: Upload coverage
uses: codecov/codecov-action@v1
with:
file: ./coverage.xml
```
## Best Practices
### Test Naming
- Use descriptive test names that explain what is being tested
- Follow the pattern: `test_<function>_<scenario>_<expected_result>`
- Example: `test_detect_faces_with_multiple_faces_returns_correct_count`
### Test Independence
- Each test should be independent and not rely on other tests
- Use fixtures to set up test data
- Clean up after each test
### Test Data
- Use realistic but minimal test data
- Create helper functions for generating test data
- Keep test data in fixtures directory
### Error Testing
- Test both success and failure scenarios
- Test edge cases and boundary conditions
- Test error handling and recovery
### Performance
- Keep tests fast and efficient
- Use mocking for slow operations
- Separate slow tests with `@pytest.mark.slow`
### Documentation
- Document complex test scenarios
- Explain the purpose of each test
- Keep test code readable and maintainable

19
main.py
View File

@ -1,19 +0,0 @@
#!/usr/bin/env python3
"""
PunimTag - Intelligent Photo Management System
Main entry point for the PunimTag application.
"""
import sys
import os
# Add src directory to Python path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
from backend.app import app
if __name__ == '__main__':
print("Starting PunimTag...")
print("Access the application at: http://localhost:5000")
app.run(host='0.0.0.0', port=5000, debug=True)

479
photo_tagger.py Normal file
View File

@ -0,0 +1,479 @@
#!/usr/bin/env python3
"""
PunimTag CLI - Minimal Photo Face Tagger
Simple command-line tool for face recognition and photo tagging
"""
import os
import sqlite3
import argparse
import face_recognition
from pathlib import Path
from PIL import Image
import pickle
import numpy as np
from typing import List, Dict, Tuple, Optional
import sys
class PhotoTagger:
def __init__(self, db_path: str = "photos.db"):
"""Initialize the photo tagger with database"""
self.db_path = db_path
self.init_database()
def init_database(self):
"""Create database tables if they don't exist"""
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
# Photos table
cursor.execute('''
CREATE TABLE IF NOT EXISTS photos (
id INTEGER PRIMARY KEY AUTOINCREMENT,
path TEXT UNIQUE NOT NULL,
filename TEXT NOT NULL,
date_added DATETIME DEFAULT CURRENT_TIMESTAMP,
processed BOOLEAN DEFAULT 0
)
''')
# People table
cursor.execute('''
CREATE TABLE IF NOT EXISTS people (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT UNIQUE NOT NULL,
created_date DATETIME DEFAULT CURRENT_TIMESTAMP
)
''')
# Faces table
cursor.execute('''
CREATE TABLE IF NOT EXISTS faces (
id INTEGER PRIMARY KEY AUTOINCREMENT,
photo_id INTEGER NOT NULL,
person_id INTEGER,
encoding BLOB NOT NULL,
location TEXT NOT NULL,
confidence REAL DEFAULT 0.0,
FOREIGN KEY (photo_id) REFERENCES photos (id),
FOREIGN KEY (person_id) REFERENCES people (id)
)
''')
# Tags table
cursor.execute('''
CREATE TABLE IF NOT EXISTS tags (
id INTEGER PRIMARY KEY AUTOINCREMENT,
photo_id INTEGER NOT NULL,
tag_name TEXT NOT NULL,
created_date DATETIME DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (photo_id) REFERENCES photos (id)
)
''')
conn.commit()
conn.close()
print(f"✅ Database initialized: {self.db_path}")
def scan_folder(self, folder_path: str, recursive: bool = True) -> int:
"""Scan folder for photos and add to database"""
if not os.path.exists(folder_path):
print(f"❌ Folder not found: {folder_path}")
return 0
photo_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff'}
found_photos = []
if recursive:
for root, dirs, files in os.walk(folder_path):
for file in files:
if Path(file).suffix.lower() in photo_extensions:
photo_path = os.path.join(root, file)
found_photos.append((photo_path, file))
else:
for file in os.listdir(folder_path):
if Path(file).suffix.lower() in photo_extensions:
photo_path = os.path.join(folder_path, file)
found_photos.append((photo_path, file))
if not found_photos:
print(f"📁 No photos found in {folder_path}")
return 0
# Add to database
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
added_count = 0
for photo_path, filename in found_photos:
try:
cursor.execute(
'INSERT OR IGNORE INTO photos (path, filename) VALUES (?, ?)',
(photo_path, filename)
)
if cursor.rowcount > 0:
added_count += 1
except Exception as e:
print(f"⚠️ Error adding {filename}: {e}")
conn.commit()
conn.close()
print(f"📁 Found {len(found_photos)} photos, added {added_count} new photos")
return added_count
def process_faces(self, limit: int = 50, model: str = "hog") -> int:
"""Process unprocessed photos for faces"""
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
cursor.execute(
'SELECT id, path, filename FROM photos WHERE processed = 0 LIMIT ?',
(limit,)
)
unprocessed = cursor.fetchall()
if not unprocessed:
print("✅ No unprocessed photos found")
conn.close()
return 0
print(f"🔍 Processing {len(unprocessed)} photos for faces...")
processed_count = 0
for photo_id, photo_path, filename in unprocessed:
if not os.path.exists(photo_path):
print(f"❌ File not found: {filename}")
cursor.execute('UPDATE photos SET processed = 1 WHERE id = ?', (photo_id,))
continue
try:
# Load image and find faces
print(f"📸 Processing: {filename}")
image = face_recognition.load_image_file(photo_path)
face_locations = face_recognition.face_locations(image, model=model)
if face_locations:
face_encodings = face_recognition.face_encodings(image, face_locations)
print(f" 👤 Found {len(face_locations)} faces")
# Save faces to database
for encoding, location in zip(face_encodings, face_locations):
cursor.execute(
'INSERT INTO faces (photo_id, encoding, location) VALUES (?, ?, ?)',
(photo_id, encoding.tobytes(), str(location))
)
else:
print(f" 👤 No faces found")
# Mark as processed
cursor.execute('UPDATE photos SET processed = 1 WHERE id = ?', (photo_id,))
processed_count += 1
except Exception as e:
print(f"❌ Error processing {filename}: {e}")
cursor.execute('UPDATE photos SET processed = 1 WHERE id = ?', (photo_id,))
conn.commit()
conn.close()
print(f"✅ Processed {processed_count} photos")
return processed_count
def identify_faces(self, batch_size: int = 20) -> int:
"""Interactive face identification"""
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
cursor.execute('''
SELECT f.id, f.photo_id, p.path, p.filename, f.location
FROM faces f
JOIN photos p ON f.photo_id = p.id
WHERE f.person_id IS NULL
LIMIT ?
''', (batch_size,))
unidentified = cursor.fetchall()
if not unidentified:
print("🎉 All faces have been identified!")
conn.close()
return 0
print(f"\n👤 Found {len(unidentified)} unidentified faces")
print("Commands: [name] = identify, 's' = skip, 'q' = quit, 'list' = show people\n")
identified_count = 0
for i, (face_id, photo_id, photo_path, filename, location) in enumerate(unidentified):
print(f"\n--- Face {i+1}/{len(unidentified)} ---")
print(f"📁 Photo: {filename}")
print(f"📍 Face location: {location}")
while True:
command = input("👤 Person name (or command): ").strip()
if command.lower() == 'q':
print("Quitting...")
conn.close()
return identified_count
elif command.lower() == 's':
print("⏭️ Skipped")
break
elif command.lower() == 'list':
self._show_people_list(cursor)
continue
elif command:
try:
# Add person if doesn't exist
cursor.execute('INSERT OR IGNORE INTO people (name) VALUES (?)', (command,))
cursor.execute('SELECT id FROM people WHERE name = ?', (command,))
person_id = cursor.fetchone()[0]
# Assign face to person
cursor.execute(
'UPDATE faces SET person_id = ? WHERE id = ?',
(person_id, face_id)
)
print(f"✅ Identified as: {command}")
identified_count += 1
break
except Exception as e:
print(f"❌ Error: {e}")
else:
print("Please enter a name, 's' to skip, 'q' to quit, or 'list' to see people")
conn.commit()
conn.close()
print(f"\n✅ Identified {identified_count} faces")
return identified_count
def _show_people_list(self, cursor):
"""Show list of known people"""
cursor.execute('SELECT name FROM people ORDER BY name')
people = cursor.fetchall()
if people:
print("👥 Known people:", ", ".join([p[0] for p in people]))
else:
print("👥 No people identified yet")
def add_tags(self, photo_pattern: str = None, batch_size: int = 10) -> int:
"""Add custom tags to photos"""
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
if photo_pattern:
cursor.execute(
'SELECT id, filename FROM photos WHERE filename LIKE ? LIMIT ?',
(f'%{photo_pattern}%', batch_size)
)
else:
cursor.execute('SELECT id, filename FROM photos LIMIT ?', (batch_size,))
photos = cursor.fetchall()
if not photos:
print("No photos found")
conn.close()
return 0
print(f"🏷️ Tagging {len(photos)} photos (enter comma-separated tags)")
tagged_count = 0
for photo_id, filename in photos:
print(f"\n📸 {filename}")
tags_input = input("🏷️ Tags: ").strip()
if tags_input.lower() == 'q':
break
if tags_input:
tags = [tag.strip() for tag in tags_input.split(',') if tag.strip()]
for tag in tags:
cursor.execute(
'INSERT INTO tags (photo_id, tag_name) VALUES (?, ?)',
(photo_id, tag)
)
print(f" ✅ Added {len(tags)} tags")
tagged_count += 1
conn.commit()
conn.close()
print(f"✅ Tagged {tagged_count} photos")
return tagged_count
def stats(self) -> Dict:
"""Show database statistics"""
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
stats = {}
# Basic counts
cursor.execute('SELECT COUNT(*) FROM photos')
stats['total_photos'] = cursor.fetchone()[0]
cursor.execute('SELECT COUNT(*) FROM photos WHERE processed = 1')
stats['processed_photos'] = cursor.fetchone()[0]
cursor.execute('SELECT COUNT(*) FROM faces')
stats['total_faces'] = cursor.fetchone()[0]
cursor.execute('SELECT COUNT(*) FROM faces WHERE person_id IS NOT NULL')
stats['identified_faces'] = cursor.fetchone()[0]
cursor.execute('SELECT COUNT(*) FROM people')
stats['total_people'] = cursor.fetchone()[0]
cursor.execute('SELECT COUNT(DISTINCT tag_name) FROM tags')
stats['unique_tags'] = cursor.fetchone()[0]
# Top people
cursor.execute('''
SELECT p.name, COUNT(f.id) as face_count
FROM people p
LEFT JOIN faces f ON p.id = f.person_id
GROUP BY p.id
ORDER BY face_count DESC
LIMIT 5
''')
stats['top_people'] = cursor.fetchall()
conn.close()
# Display stats
print(f"\n📊 Database Statistics")
print("=" * 40)
print(f"Photos: {stats['processed_photos']}/{stats['total_photos']} processed")
print(f"Faces: {stats['identified_faces']}/{stats['total_faces']} identified")
print(f"People: {stats['total_people']} unique")
print(f"Tags: {stats['unique_tags']} unique")
if stats['top_people']:
print(f"\n👥 Top People:")
for name, count in stats['top_people']:
print(f" {name}: {count} faces")
unidentified = stats['total_faces'] - stats['identified_faces']
if unidentified > 0:
print(f"\n⚠️ {unidentified} faces still need identification")
return stats
def search_faces(self, person_name: str) -> List[str]:
"""Search for photos containing a specific person"""
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
cursor.execute('''
SELECT DISTINCT p.filename, p.path
FROM photos p
JOIN faces f ON p.id = f.photo_id
JOIN people pe ON f.person_id = pe.id
WHERE pe.name LIKE ?
''', (f'%{person_name}%',))
results = cursor.fetchall()
conn.close()
if results:
print(f"\n🔍 Found {len(results)} photos with '{person_name}':")
for filename, path in results:
print(f" 📸 {filename}")
else:
print(f"🔍 No photos found with '{person_name}'")
return [path for filename, path in results]
def main():
"""Main CLI interface"""
parser = argparse.ArgumentParser(
description="PunimTag CLI - Simple photo face tagger",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
photo_tagger.py scan /path/to/photos # Scan folder for photos
photo_tagger.py process --limit 20 # Process 20 photos for faces
photo_tagger.py identify --batch 10 # Identify 10 faces interactively
photo_tagger.py tag --pattern "vacation" # Tag photos matching pattern
photo_tagger.py search "John" # Find photos with John
photo_tagger.py stats # Show statistics
"""
)
parser.add_argument('command',
choices=['scan', 'process', 'identify', 'tag', 'search', 'stats'],
help='Command to execute')
parser.add_argument('target', nargs='?',
help='Target folder (scan), person name (search), or pattern (tag)')
parser.add_argument('--db', default='photos.db',
help='Database file path (default: photos.db)')
parser.add_argument('--limit', type=int, default=50,
help='Batch size limit for processing (default: 50)')
parser.add_argument('--batch', type=int, default=20,
help='Batch size for identification (default: 20)')
parser.add_argument('--pattern',
help='Pattern for filtering photos when tagging')
parser.add_argument('--model', choices=['hog', 'cnn'], default='hog',
help='Face detection model: hog (faster) or cnn (more accurate)')
parser.add_argument('--recursive', action='store_true',
help='Scan folders recursively')
args = parser.parse_args()
# Initialize tagger
tagger = PhotoTagger(args.db)
try:
if args.command == 'scan':
if not args.target:
print("❌ Please specify a folder to scan")
return 1
tagger.scan_folder(args.target, args.recursive)
elif args.command == 'process':
tagger.process_faces(args.limit, args.model)
elif args.command == 'identify':
tagger.identify_faces(args.batch)
elif args.command == 'tag':
tagger.add_tags(args.pattern or args.target, args.batch)
elif args.command == 'search':
if not args.target:
print("❌ Please specify a person name to search for")
return 1
tagger.search_faces(args.target)
elif args.command == 'stats':
tagger.stats()
return 0
except KeyboardInterrupt:
print("\n\n⚠️ Interrupted by user")
return 1
except Exception as e:
print(f"❌ Error: {e}")
return 1
if __name__ == "__main__":
sys.exit(main())

BIN
photos.db Normal file

Binary file not shown.

BIN
photos.db-journal Normal file

Binary file not shown.

View File

@ -1,8 +1,8 @@
click==8.2.1
dlib==20.0.0
# Minimal dependencies for CLI photo tagger
face-recognition==1.3.0
face-recognition-models==0.3.0
numpy==2.2.6
pillow==11.3.0
opencv-python==4.10.0.84
scikit-learn==1.7.0
dlib>=20.0.0
numpy>=1.21.0
pillow>=8.0.0
click>=8.0.0
setuptools>=40.0.0

34
run.sh Normal file
View File

@ -0,0 +1,34 @@
#!/bin/bash
# PunimTag Runner Script
# Automatically activates virtual environment and runs commands
# Check if virtual environment exists
if [ ! -d "venv" ]; then
echo "❌ Virtual environment not found!"
echo "Run: python3 -m venv venv && source venv/bin/activate && python3 setup.py"
exit 1
fi
# Activate virtual environment
source venv/bin/activate
# Check if no arguments provided
if [ $# -eq 0 ]; then
echo "🎯 PunimTag CLI"
echo "Usage: ./run.sh <command> [arguments]"
echo ""
echo "Examples:"
echo " ./run.sh scan /path/to/photos --recursive"
echo " ./run.sh process --limit 20"
echo " ./run.sh identify --batch 10"
echo " ./run.sh search 'John'"
echo " ./run.sh stats"
echo ""
echo "Or run directly:"
echo " source venv/bin/activate"
echo " python3 photo_tagger.py --help"
exit 0
fi
# Run the command
python3 photo_tagger.py "$@"

View File

@ -1,160 +0,0 @@
#!/usr/bin/env python3
"""
Cleanup script for PunimTag project organization.
This script removes redundant test files and organizes the project structure.
"""
import os
import shutil
from pathlib import Path
def cleanup_old_tests():
"""Remove old test files that are now consolidated."""
old_test_files = [
"test_syntax_fix.html",
"test_js_validator.html",
"test_direct_error_check.html",
"test_console_tracker.html",
"test_syntax_check.html",
"test_progressive.html",
"test_simple_main.html",
"test_diagnostic.html",
"test_minimal.html",
"debug_ui.html",
"test_backend.py",
"test_punimtag.py",
"test_web_api.py"
]
removed_count = 0
for file_name in old_test_files:
file_path = Path(file_name)
if file_path.exists():
try:
file_path.unlink()
print(f"✅ Removed: {file_name}")
removed_count += 1
except Exception as e:
print(f"❌ Failed to remove {file_name}: {e}")
print(f"\n📊 Cleanup complete: {removed_count} files removed")
def verify_structure():
"""Verify that the new structure is properly organized."""
required_structure = {
"src/backend/app.py": "Main Flask application",
"src/backend/db_manager.py": "Database manager",
"src/backend/visual_identifier.py": "Face recognition",
"src/utils/tag_manager.py": "Tag management",
"config/settings.py": "Configuration settings",
"data/punimtag_simple.db": "Main database",
"tests/test_main.py": "Main test suite",
"docs/product.md": "Product vision",
"docs/structure.md": "Project structure",
"docs/tech.md": "Technical architecture",
"docs/api-standards.md": "API standards",
"docs/testing-standards.md": "Testing standards",
"docs/code-conventions.md": "Code conventions"
}
print("\n🔍 Verifying project structure:")
print("=" * 50)
all_good = True
for file_path, description in required_structure.items():
if Path(file_path).exists():
print(f"{file_path} - {description}")
else:
print(f"{file_path} - {description} (MISSING)")
all_good = False
return all_good
def create_gitignore():
"""Create or update .gitignore file."""
gitignore_content = """# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# Virtual environments
venv/
env/
ENV/
# Database files (keep structure, ignore content)
data/*.db
data/*.sqlite
# Temporary files
*.tmp
*.temp
temp_face_crop_*.jpg
# IDE
.vscode/
.idea/
*.swp
*.swo
# OS
.DS_Store
Thumbs.db
# Logs
*.log
# Environment variables
.env
"""
with open(".gitignore", "w") as f:
f.write(gitignore_content)
print("✅ Updated .gitignore file")
def main():
"""Main cleanup function."""
print("🧹 PunimTag Project Cleanup")
print("=" * 50)
# Clean up old test files
cleanup_old_tests()
# Verify structure
structure_ok = verify_structure()
# Update gitignore
create_gitignore()
print("\n" + "=" * 50)
if structure_ok:
print("🎉 Project cleanup completed successfully!")
print("\n📋 Next steps:")
print("1. Review the steering documents in docs/")
print("2. Run tests: python tests/test_main.py")
print("3. Start the app: python main.py")
else:
print("⚠️ Project cleanup completed with issues.")
print("Please check the missing files above.")
if __name__ == "__main__":
main()

View File

@ -1,436 +0,0 @@
#!/usr/bin/env python3
"""
Face Clustering GUI for PunimTag
Visual interface for viewing and identifying clustered unknown faces
"""
import tkinter as tk
from tkinter import ttk, messagebox, simpledialog
from PIL import Image, ImageTk
import os
import sqlite3
from typing import List, Dict
import pickle
import numpy as np
class FaceClusterGUI:
def __init__(self, db_path: str = 'punimtag_simple.db'):
self.db_path = db_path
self.root = tk.Tk()
self.root.title("PunimTag - Face Clustering")
self.root.geometry("1200x800")
# Current cluster data
self.clusters = []
self.current_cluster_index = 0
# Image cache
self.image_cache = {}
self.setup_ui()
self.load_clusters()
def setup_ui(self):
"""Setup the user interface"""
# Main frame
main_frame = ttk.Frame(self.root, padding="10")
main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
# Configure grid weights
self.root.columnconfigure(0, weight=1)
self.root.rowconfigure(0, weight=1)
main_frame.columnconfigure(1, weight=1)
main_frame.rowconfigure(1, weight=1)
# Title
title_label = ttk.Label(main_frame, text="Unknown Face Clusters",
font=('Arial', 16, 'bold'))
title_label.grid(row=0, column=0, columnspan=3, pady=(0, 10))
# Left panel - cluster list
left_frame = ttk.LabelFrame(main_frame, text="Clusters", padding="5")
left_frame.grid(row=1, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), padx=(0, 10))
# Cluster listbox
self.cluster_listbox = tk.Listbox(left_frame, width=30)
self.cluster_listbox.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
self.cluster_listbox.bind('<<ListboxSelect>>', self.on_cluster_select)
scrollbar = ttk.Scrollbar(left_frame, orient="vertical", command=self.cluster_listbox.yview)
scrollbar.grid(row=0, column=1, sticky=(tk.N, tk.S))
self.cluster_listbox.configure(yscrollcommand=scrollbar.set)
left_frame.columnconfigure(0, weight=1)
left_frame.rowconfigure(0, weight=1)
# Center panel - face display
center_frame = ttk.LabelFrame(main_frame, text="Faces in Cluster", padding="5")
center_frame.grid(row=1, column=1, sticky=(tk.W, tk.E, tk.N, tk.S))
# Canvas for face thumbnails
self.canvas = tk.Canvas(center_frame, bg='white')
self.canvas.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
# Scrollbars for canvas
v_scrollbar = ttk.Scrollbar(center_frame, orient="vertical", command=self.canvas.yview)
v_scrollbar.grid(row=0, column=1, sticky=(tk.N, tk.S))
h_scrollbar = ttk.Scrollbar(center_frame, orient="horizontal", command=self.canvas.xview)
h_scrollbar.grid(row=1, column=0, sticky=(tk.W, tk.E))
self.canvas.configure(yscrollcommand=v_scrollbar.set, xscrollcommand=h_scrollbar.set)
center_frame.columnconfigure(0, weight=1)
center_frame.rowconfigure(0, weight=1)
# Right panel - actions
right_frame = ttk.LabelFrame(main_frame, text="Actions", padding="5")
right_frame.grid(row=1, column=2, sticky=(tk.W, tk.E, tk.N, tk.S), padx=(10, 0))
# Cluster info
self.info_label = ttk.Label(right_frame, text="Select a cluster", wraplength=200)
self.info_label.grid(row=0, column=0, pady=(0, 10), sticky=(tk.W, tk.E))
# Identify button
self.identify_button = ttk.Button(right_frame, text="Identify as Person",
command=self.identify_cluster, state='disabled')
self.identify_button.grid(row=1, column=0, pady=5, sticky=(tk.W, tk.E))
# Skip button
self.skip_button = ttk.Button(right_frame, text="Skip Cluster",
command=self.skip_cluster, state='disabled')
self.skip_button.grid(row=2, column=0, pady=5, sticky=(tk.W, tk.E))
# Refresh button
self.refresh_button = ttk.Button(right_frame, text="Refresh Clusters",
command=self.refresh_clusters)
self.refresh_button.grid(row=3, column=0, pady=5, sticky=(tk.W, tk.E))
# Statistics
self.stats_label = ttk.Label(right_frame, text="", wraplength=200)
self.stats_label.grid(row=4, column=0, pady=(20, 0), sticky=(tk.W, tk.E))
right_frame.columnconfigure(0, weight=1)
# Status bar
self.status_bar = ttk.Label(main_frame, text="Ready", relief=tk.SUNKEN)
self.status_bar.grid(row=2, column=0, columnspan=3, sticky=(tk.W, tk.E), pady=(10, 0))
def load_clusters(self):
"""Load face clusters from database"""
try:
self.status_bar.config(text="Loading clusters...")
self.root.update()
# Get clusters using simple clustering (without sklearn)
clusters = self.get_simple_clusters()
self.clusters = clusters
self.populate_cluster_list()
self.update_statistics()
self.status_bar.config(text=f"Loaded {len(clusters)} clusters")
except Exception as e:
messagebox.showerror("Error", f"Failed to load clusters: {e}")
self.status_bar.config(text="Error loading clusters")
def get_simple_clusters(self) -> List[Dict]:
"""Simple clustering without sklearn - group by face encoding similarity"""
if not os.path.exists(self.db_path):
return []
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
try:
# Get unidentified faces
c.execute('''SELECT f.id, f.image_id, i.path, f.top, f.right, f.bottom, f.left, f.encoding
FROM faces f
JOIN images i ON f.image_id = i.id
WHERE f.person_id IS NULL''')
faces = c.fetchall()
if len(faces) < 2:
return []
# Simple clustering by face encoding similarity
clusters = []
used_faces = set()
for i, face1 in enumerate(faces):
if face1[0] in used_faces:
continue
cluster_faces = [face1]
used_faces.add(face1[0])
encoding1 = pickle.loads(face1[7])
# Find similar faces
for j, face2 in enumerate(faces[i+1:], i+1):
if face2[0] in used_faces:
continue
encoding2 = pickle.loads(face2[7])
# Calculate similarity (simple distance)
try:
import face_recognition
distance = face_recognition.face_distance([encoding1], encoding2)[0]
if distance < 0.5: # Similar faces
cluster_faces.append(face2)
used_faces.add(face2[0])
except:
# Fallback to numpy distance if face_recognition not available
distance = np.linalg.norm(encoding1 - encoding2)
if distance < 0.8:
cluster_faces.append(face2)
used_faces.add(face2[0])
# Only create cluster if it has multiple faces
if len(cluster_faces) >= 2:
cluster_data = {
'cluster_id': len(clusters),
'face_count': len(cluster_faces),
'faces': []
}
for face in cluster_faces:
cluster_data['faces'].append({
'face_id': face[0],
'image_id': face[1],
'image_path': face[2],
'location': (face[3], face[4], face[5], face[6])
})
clusters.append(cluster_data)
# Sort by face count (largest clusters first)
clusters.sort(key=lambda x: x['face_count'], reverse=True)
return clusters
except Exception as e:
print(f"Error in simple clustering: {e}")
return []
finally:
conn.close()
def populate_cluster_list(self):
"""Populate the cluster list"""
self.cluster_listbox.delete(0, tk.END)
for i, cluster in enumerate(self.clusters):
label = f"Cluster {i+1} ({cluster['face_count']} faces)"
self.cluster_listbox.insert(tk.END, label)
def on_cluster_select(self, event):
"""Handle cluster selection"""
selection = self.cluster_listbox.curselection()
if not selection:
return
self.current_cluster_index = selection[0]
self.display_cluster_faces()
self.identify_button.config(state='normal')
self.skip_button.config(state='normal')
def display_cluster_faces(self):
"""Display faces in the selected cluster"""
if not self.clusters or self.current_cluster_index >= len(self.clusters):
return
cluster = self.clusters[self.current_cluster_index]
# Update info
info_text = f"Cluster {self.current_cluster_index + 1}\n"
info_text += f"{cluster['face_count']} faces\n"
info_text += f"Click 'Identify as Person' to name these faces"
self.info_label.config(text=info_text)
# Clear canvas
self.canvas.delete("all")
# Display face thumbnails
x, y = 10, 10
max_width = 0
row_height = 0
for i, face in enumerate(cluster['faces']):
try:
thumbnail = self.get_face_thumbnail(face)
if thumbnail:
# Create image on canvas
image_id = self.canvas.create_image(x, y, anchor=tk.NW, image=thumbnail)
# Add image path as text below
filename = os.path.basename(face['image_path'])
self.canvas.create_text(x + 50, y + 110, text=filename,
width=100, font=('Arial', 8))
x += 120
max_width = max(max_width, x)
row_height = max(row_height, 130)
# New row after 4 images
if (i + 1) % 4 == 0:
x = 10
y += row_height
row_height = 0
except Exception as e:
print(f"Error displaying face thumbnail: {e}")
# Update canvas scroll region
self.canvas.configure(scrollregion=self.canvas.bbox("all"))
def get_face_thumbnail(self, face: Dict) -> ImageTk.PhotoImage:
"""Get thumbnail image of a face"""
cache_key = f"{face['face_id']}"
if cache_key in self.image_cache:
return self.image_cache[cache_key]
try:
# Load image
image_path = face['image_path']
if not os.path.exists(image_path):
return None
img = Image.open(image_path)
# Crop face region
top, right, bottom, left = face['location']
# Add some padding
padding = 20
left = max(0, left - padding)
top = max(0, top - padding)
right = min(img.width, right + padding)
bottom = min(img.height, bottom + padding)
face_img = img.crop((left, top, right, bottom))
# Resize to thumbnail
face_img.thumbnail((100, 100), Image.Resampling.LANCZOS)
# Convert to PhotoImage
photo = ImageTk.PhotoImage(face_img)
# Cache it
self.image_cache[cache_key] = photo
return photo
except Exception as e:
print(f"Error creating thumbnail: {e}")
return None
def identify_cluster(self):
"""Identify all faces in cluster as a person"""
if not self.clusters or self.current_cluster_index >= len(self.clusters):
return
cluster = self.clusters[self.current_cluster_index]
# Get person name
name = simpledialog.askstring("Identify Person",
f"Enter name for {cluster['face_count']} faces:")
if not name or not name.strip():
return
name = name.strip()
try:
# Add person to database and assign faces
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
# Add person
c.execute('INSERT OR IGNORE INTO people (name) VALUES (?)', (name,))
c.execute('SELECT id FROM people WHERE name = ?', (name,))
person_id = c.fetchone()[0]
# Assign all faces in cluster
for face in cluster['faces']:
c.execute('''UPDATE faces
SET person_id = ?, is_confirmed = 1
WHERE id = ?''',
(person_id, face['face_id']))
conn.commit()
conn.close()
messagebox.showinfo("Success", f"Identified {cluster['face_count']} faces as {name}")
# Refresh clusters
self.refresh_clusters()
except Exception as e:
messagebox.showerror("Error", f"Failed to identify cluster: {e}")
def skip_cluster(self):
"""Skip current cluster"""
if self.current_cluster_index < len(self.clusters) - 1:
self.cluster_listbox.selection_set(self.current_cluster_index + 1)
self.on_cluster_select(None)
else:
messagebox.showinfo("Info", "This is the last cluster")
def refresh_clusters(self):
"""Reload clusters from database"""
# Clear cache
self.image_cache.clear()
# Reload
self.load_clusters()
# Reset selection
self.identify_button.config(state='disabled')
self.skip_button.config(state='disabled')
self.canvas.delete("all")
self.info_label.config(text="Select a cluster")
def update_statistics(self):
"""Update statistics display"""
if not os.path.exists(self.db_path):
return
try:
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
c.execute("SELECT COUNT(*) FROM faces WHERE person_id IS NULL")
unidentified = c.fetchone()[0]
c.execute("SELECT COUNT(*) FROM faces WHERE person_id IS NOT NULL")
identified = c.fetchone()[0]
conn.close()
stats_text = f"Statistics:\n"
stats_text += f"Unidentified: {unidentified}\n"
stats_text += f"Identified: {identified}\n"
stats_text += f"Clusters: {len(self.clusters)}"
self.stats_label.config(text=stats_text)
except Exception as e:
print(f"Error updating statistics: {e}")
def run(self):
"""Start the GUI"""
self.root.mainloop()
def main():
"""Main entry point"""
app = FaceClusterGUI()
app.run()
if __name__ == "__main__":
main()

View File

@ -1,193 +0,0 @@
#!/usr/bin/env python3
"""
Interactive Face Identifier for PunimTag
Allows users to identify unknown faces in the database
"""
import os
import cv2
import numpy as np
from punimtag import PunimTag
from typing import Optional
import sys
class InteractiveFaceIdentifier:
def __init__(self, db_path: str = 'punimtag.db'):
self.tagger = PunimTag(db_path=db_path)
self.window_name = 'Face Identifier'
def display_face(self, image_path: str, location: tuple) -> np.ndarray:
"""Load and display image with face highlighted"""
img = cv2.imread(image_path)
if img is None:
print(f"Error: Could not load image {image_path}")
return None
# Get face coordinates
top, right, bottom, left = location
# Draw rectangle around face
cv2.rectangle(img, (left, top), (right, bottom), (0, 255, 0), 3)
# Calculate display size (max 800x600)
height, width = img.shape[:2]
max_height, max_width = 600, 800
if height > max_height or width > max_width:
scale = min(max_height/height, max_width/width)
new_width = int(width * scale)
new_height = int(height * scale)
img = cv2.resize(img, (new_width, new_height))
return img
def get_user_input(self, face_info: dict) -> Optional[str]:
"""Get user input for face identification"""
print("\n" + "="*50)
print(f"Image: {face_info['image_path']}")
print(f"Face ID: {face_info['face_id']}")
print("\nOptions:")
print("1. Enter person's name")
print("2. Skip this face (press Enter)")
print("3. Quit (press 'q')")
print("="*50)
user_input = input("\nEnter person's name (or press Enter to skip): ").strip()
if user_input.lower() == 'q':
return 'QUIT'
elif user_input == '':
return None
else:
return user_input
def run(self):
"""Run the interactive identification process"""
print("PunimTag Interactive Face Identifier")
print("=" * 50)
# Get unidentified faces
unidentified = self.tagger.get_unidentified_faces()
if not unidentified:
print("No unidentified faces found!")
return
print(f"Found {len(unidentified)} unidentified faces")
# Create window
cv2.namedWindow(self.window_name, cv2.WINDOW_NORMAL)
identified_count = 0
for i, face_info in enumerate(unidentified):
print(f"\nProcessing face {i+1} of {len(unidentified)}")
# Display the face
img = self.display_face(face_info['image_path'], face_info['location'])
if img is None:
continue
cv2.imshow(self.window_name, img)
cv2.waitKey(1) # Allow window to update
# Get user input
name = self.get_user_input(face_info)
if name == 'QUIT':
print("\nQuitting...")
break
elif name:
# Add person and assign face
person_id = self.tagger.add_person(name)
self.tagger.assign_face_to_person(face_info['face_id'], person_id, is_confirmed=True)
identified_count += 1
print(f"✓ Identified as: {name}")
else:
print("⊘ Skipped")
cv2.destroyAllWindows()
print(f"\n{'='*50}")
print(f"Identification complete!")
print(f"Identified {identified_count} faces")
print(f"Skipped {len(unidentified) - identified_count} faces")
self.tagger.close()
class CLIFaceIdentifier:
"""Command-line only face identifier (no OpenCV required)"""
def __init__(self, db_path: str = 'punimtag.db'):
self.tagger = PunimTag(db_path=db_path)
def run(self):
"""Run CLI-based identification"""
print("PunimTag CLI Face Identifier")
print("=" * 50)
# Get unidentified faces
unidentified = self.tagger.get_unidentified_faces()
if not unidentified:
print("No unidentified faces found!")
return
print(f"Found {len(unidentified)} unidentified faces\n")
identified_count = 0
for i, face_info in enumerate(unidentified):
print(f"\n{'='*50}")
print(f"Face {i+1} of {len(unidentified)}")
print(f"Image: {face_info['image_path']}")
print(f"Location in image: top={face_info['location'][0]}, right={face_info['location'][1]}, "
f"bottom={face_info['location'][2]}, left={face_info['location'][3]}")
print(f"Face ID: {face_info['face_id']}")
name = input("\nEnter person's name (or press Enter to skip, 'q' to quit): ").strip()
if name.lower() == 'q':
print("\nQuitting...")
break
elif name:
# Add person and assign face
person_id = self.tagger.add_person(name)
self.tagger.assign_face_to_person(face_info['face_id'], person_id, is_confirmed=True)
identified_count += 1
print(f"✓ Identified as: {name}")
else:
print("⊘ Skipped")
print(f"\n{'='*50}")
print(f"Identification complete!")
print(f"Identified {identified_count} faces")
print(f"Skipped {len(unidentified) - identified_count} faces")
self.tagger.close()
def main():
"""Main entry point"""
# Check if OpenCV is available
try:
import cv2
print("OpenCV available - using visual identifier")
identifier = InteractiveFaceIdentifier()
except ImportError:
print("OpenCV not available - using CLI identifier")
print("Install opencv-python for visual identification: pip install opencv-python")
identifier = CLIFaceIdentifier()
try:
identifier.run()
except KeyboardInterrupt:
print("\n\nInterrupted by user")
sys.exit(0)
if __name__ == "__main__":
main()

View File

@ -1,118 +0,0 @@
#!/usr/bin/env python3
"""
Simple Face Identifier for PunimTag
Works with the punimtag_simple.db database
"""
import sqlite3
import os
from PIL import Image
import pickle
class SimpleFaceIdentifier:
def __init__(self, db_path='punimtag_simple.db'):
self.db_path = db_path
def get_unidentified_faces(self, limit=10):
"""Get a limited number of unidentified faces"""
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
c.execute('''SELECT f.id, f.image_id, i.path, i.filename, f.top, f.right, f.bottom, f.left
FROM faces f
JOIN images i ON f.image_id = i.id
WHERE f.person_id IS NULL
LIMIT ?''', (limit,))
faces = c.fetchall()
conn.close()
return faces
def add_person(self, name):
"""Add a new person"""
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
c.execute('INSERT OR IGNORE INTO people (name) VALUES (?)', (name,))
c.execute('SELECT id FROM people WHERE name = ?', (name,))
person_id = c.fetchone()[0]
conn.commit()
conn.close()
return person_id
def assign_face(self, face_id, person_id):
"""Assign a face to a person"""
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
c.execute('UPDATE faces SET person_id = ?, is_confirmed = 1 WHERE id = ?',
(person_id, face_id))
conn.commit()
conn.close()
def run_cli_identifier(self):
"""Run command line identifier"""
print("\n🏷️ Simple Face Identifier")
print("=" * 50)
faces = self.get_unidentified_faces(50) # Get first 50 faces
if not faces:
print("No unidentified faces found!")
return
print(f"Found {len(faces)} unidentified faces to process...")
print("For each face, enter the person's name or 's' to skip\n")
for i, (face_id, image_id, path, filename, top, right, bottom, left) in enumerate(faces):
print(f"\nFace {i+1}/{len(faces)}")
print(f"📁 File: {filename}")
print(f"📍 Location: top={top}, right={right}, bottom={bottom}, left={left}")
# Try to display basic info about the image
try:
if os.path.exists(path):
with Image.open(path) as img:
print(f"🖼️ Image size: {img.size}")
else:
print("⚠️ Image file not found")
except Exception as e:
print(f"⚠️ Could not read image: {e}")
while True:
name = input(f"👤 Who is this person? (or 's' to skip): ").strip()
if name.lower() == 's':
print("⏭️ Skipped")
break
elif name:
try:
person_id = self.add_person(name)
self.assign_face(face_id, person_id)
print(f"✅ Identified as '{name}'")
break
except Exception as e:
print(f"❌ Error: {e}")
else:
print("Please enter a name or 's' to skip")
print(f"\n🎉 Completed processing {len(faces)} faces!")
# Show remaining count
remaining = self.get_remaining_count()
if remaining > 0:
print(f"📊 {remaining} unidentified faces remaining")
print("Run the script again to continue identifying faces")
else:
print("🏆 All faces have been identified!")
def get_remaining_count(self):
"""Get count of remaining unidentified faces"""
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
c.execute('SELECT COUNT(*) FROM faces WHERE person_id IS NULL')
count = c.fetchone()[0]
conn.close()
return count
if __name__ == "__main__":
identifier = SimpleFaceIdentifier()
identifier.run_cli_identifier()

View File

@ -1,224 +0,0 @@
#!/usr/bin/env python3
"""
PunimTag GUI Starter
Simple script to demonstrate the system and guide next steps
"""
import os
import subprocess
import sys
def check_requirements():
"""Check if all requirements are met"""
print("🔍 Checking requirements...")
# Check if photos directory exists
if not os.path.exists('photos'):
print("❌ Photos directory not found")
print(" Creating photos/ directory...")
os.makedirs('photos', exist_ok=True)
print(" ✅ Created photos/ directory")
else:
print("✅ Photos directory exists")
# Check if database exists
if os.path.exists('punimtag_simple.db'):
print("✅ Database exists")
# Get basic stats
try:
import sqlite3
conn = sqlite3.connect('punimtag_simple.db')
c = conn.cursor()
c.execute("SELECT COUNT(*) FROM images")
image_count = c.fetchone()[0]
c.execute("SELECT COUNT(*) FROM faces")
face_count = c.fetchone()[0]
c.execute("SELECT COUNT(*) FROM faces WHERE person_id IS NULL")
unidentified_count = c.fetchone()[0]
conn.close()
print(f" 📊 {image_count} images, {face_count} faces, {unidentified_count} unidentified")
return image_count, face_count, unidentified_count
except Exception as e:
print(f" ⚠️ Error reading database: {e}")
return 0, 0, 0
else:
print("❌ Database not found")
print(" Run 'python punimtag_simple.py' first to process images")
return 0, 0, 0
def show_menu(image_count, face_count, unidentified_count):
"""Show main menu"""
print("\n" + "="*60)
print("🏷️ PUNIMTAG - NEXT STEPS")
print("="*60)
if image_count == 0:
print("📝 GETTING STARTED:")
print(" 1. Add photos to the 'photos/' directory")
print(" 2. Run: python punimtag_simple.py")
print(" 3. Come back here for face identification")
print("\n💡 TIP: Start with 10-20 photos for testing")
return
print(f"📊 CURRENT STATUS:")
print(f" Images processed: {image_count}")
print(f" Faces detected: {face_count}")
print(f" Unidentified faces: {unidentified_count}")
print(f"\n🎯 AVAILABLE ACTIONS:")
print(f" 1. Process more images")
print(f" 2. Identify unknown faces (CLI)")
print(f" 3. Manage database")
print(f" 4. View statistics")
if unidentified_count > 0:
print(f" 5. Start simple web interface (coming soon)")
print(f" 6. Exit")
def process_images():
"""Process images"""
print("\n📷 Processing images...")
try:
result = subprocess.run([sys.executable, 'punimtag_simple.py'],
capture_output=True, text=True)
print(result.stdout)
if result.stderr:
print("Errors:", result.stderr)
except Exception as e:
print(f"Error: {e}")
def identify_faces():
"""Identify faces using CLI tool"""
print("\n👥 Starting face identification...")
try:
subprocess.run([sys.executable, 'interactive_identifier.py'])
except Exception as e:
print(f"Error: {e}")
def manage_database():
"""Manage database"""
print("\n🗄️ Starting database manager...")
try:
subprocess.run([sys.executable, 'db_manager.py'])
except Exception as e:
print(f"Error: {e}")
def show_statistics():
"""Show detailed statistics"""
print("\n📊 DETAILED STATISTICS")
print("="*40)
try:
from db_manager import DatabaseManager
manager = DatabaseManager('punimtag_simple.db')
manager.inspect_database()
except Exception as e:
print(f"Error: {e}")
def show_next_gui_steps():
"""Show what's coming next for GUI development"""
print("\n🚀 NEXT GUI DEVELOPMENT STEPS")
print("="*50)
print("We have a working backend! Next steps for GUI:")
print()
print("✅ COMPLETED:")
print(" • Face recognition and clustering")
print(" • Database management")
print(" • Jewish organization features")
print(" • Search functionality")
print(" • CLI tools for identification")
print()
print("🔄 IN PROGRESS:")
print(" • Web-based GUI with Flask")
print(" • Face clustering visualization")
print(" • Interactive face identification")
print()
print("📋 PLANNED:")
print(" • Advanced search interface")
print(" • Tag management GUI")
print(" • Statistics dashboard")
print(" • Photo gallery with face highlights")
print()
print("💡 TO TEST THE WEB GUI:")
print(" 1. Make sure you have processed some images")
print(" 2. Run: python web_gui.py (when ready)")
print(" 3. Open http://localhost:5000 in browser")
def main():
"""Main function"""
print("🏷️ PunimTag GUI Starter")
print("Welcome to the face recognition and photo tagging system!")
# Check requirements
image_count, face_count, unidentified_count = check_requirements()
while True:
show_menu(image_count, face_count, unidentified_count)
try:
choice = input(f"\n➤ Select option: ").strip()
if choice == '1':
process_images()
# Refresh stats
image_count, face_count, unidentified_count = check_requirements()
elif choice == '2':
if face_count == 0:
print("❌ No faces to identify. Process images first.")
else:
identify_faces()
# Refresh stats
image_count, face_count, unidentified_count = check_requirements()
elif choice == '3':
manage_database()
# Refresh stats
image_count, face_count, unidentified_count = check_requirements()
elif choice == '4':
show_statistics()
elif choice == '5':
if unidentified_count > 0:
print("🌐 Web interface is in development!")
print("For now, use option 2 for CLI identification.")
else:
print("✅ All faces are identified!")
elif choice == '6':
break
else:
print("❌ Invalid option")
input("\nPress Enter to continue...")
except KeyboardInterrupt:
print("\n\n👋 Goodbye!")
break
except Exception as e:
print(f"Error: {e}")
input("Press Enter to continue...")
print("\n🎉 Thank you for using PunimTag!")
show_next_gui_steps()
if __name__ == "__main__":
main()

122
setup.py Normal file
View File

@ -0,0 +1,122 @@
#!/usr/bin/env python3
"""
PunimTag CLI Setup Script
Simple setup for the minimal photo tagger
"""
import os
import sys
import subprocess
from pathlib import Path
def check_python_version():
"""Check if Python version is compatible"""
if sys.version_info < (3, 7):
print("❌ Python 3.7+ is required")
return False
print(f"✅ Python {sys.version_info.major}.{sys.version_info.minor} detected")
return True
def install_requirements():
"""Install Python requirements"""
requirements_file = Path("requirements.txt")
if not requirements_file.exists():
print("❌ requirements.txt not found!")
return False
print("📦 Installing Python dependencies...")
try:
subprocess.run([
sys.executable, '-m', 'pip', 'install', '-r', 'requirements.txt'
], check=True)
print("✅ Dependencies installed successfully")
return True
except subprocess.CalledProcessError as e:
print(f"❌ Failed to install dependencies: {e}")
return False
def create_directories():
"""Create necessary directories"""
directories = ['data', 'logs']
for directory in directories:
Path(directory).mkdir(exist_ok=True)
print(f"✅ Created directory: {directory}")
def test_installation():
"""Test if face recognition works"""
print("🧪 Testing face recognition installation...")
try:
import face_recognition
import numpy as np
from PIL import Image
print("✅ All required modules imported successfully")
return True
except ImportError as e:
print(f"❌ Import error: {e}")
return False
def main():
"""Main setup function"""
print("🚀 PunimTag CLI Setup")
print("=" * 40)
# Check Python version
if not check_python_version():
return 1
# Check if we're in a virtual environment (recommended)
if sys.prefix == sys.base_prefix:
print("⚠️ Not in a virtual environment!")
print(" Recommended: python -m venv venv && source venv/bin/activate")
response = input(" Continue anyway? (y/N): ").strip().lower()
if response != 'y':
print("Setup cancelled. Create a virtual environment first.")
return 1
else:
print("✅ Virtual environment detected")
print()
# Create directories
print("📁 Creating directories...")
create_directories()
print()
# Install requirements
if not install_requirements():
return 1
print()
# Test installation
if not test_installation():
print("⚠️ Installation test failed. You may need to install additional dependencies.")
print(" For Ubuntu/Debian: sudo apt-get install build-essential cmake")
print(" For macOS: brew install cmake")
return 1
print()
print("✅ Setup complete!")
print()
print("🎯 Quick Start:")
print(" 1. Add photos: python3 photo_tagger.py scan /path/to/photos")
print(" 2. Process faces: python3 photo_tagger.py process")
print(" 3. Identify faces: python3 photo_tagger.py identify")
print(" 4. View stats: python3 photo_tagger.py stats")
print()
print("📖 For help: python3 photo_tagger.py --help")
print()
print("⚠️ IMPORTANT: Always activate virtual environment first!")
print(" source venv/bin/activate")
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -1,8 +0,0 @@
"""
PunimTag Source Package
This package contains all the source code for the PunimTag application.
"""
__version__ = "1.0.0"
__author__ = "PunimTag Team"

View File

@ -1,6 +0,0 @@
"""
PunimTag Backend Package
This package contains all backend-related code including Flask app, database operations,
and face recognition functionality.
"""

File diff suppressed because it is too large Load Diff

View File

@ -1,308 +0,0 @@
#!/usr/bin/env python3
"""
Database Management Utility for PunimTag
Clean, reset, inspect, and manage the database
"""
import os
import sqlite3
import shutil
from datetime import datetime
from typing import Dict, List
import json
class DatabaseManager:
def __init__(self, db_path: str = 'punimtag_simple.db'):
self.db_path = db_path
def backup_database(self, backup_name: str = None) -> str:
"""Create a backup of the current database"""
if not os.path.exists(self.db_path):
print(f"Database {self.db_path} does not exist")
return None
if backup_name is None:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_name = f"{self.db_path}.backup_{timestamp}"
shutil.copy2(self.db_path, backup_name)
print(f"✅ Database backed up to: {backup_name}")
return backup_name
def clean_database(self):
"""Clean all data but keep schema"""
if not os.path.exists(self.db_path):
print(f"Database {self.db_path} does not exist")
return
# Backup first
self.backup_database()
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
try:
# Clear all data but keep schema
c.execute("DELETE FROM image_tags")
c.execute("DELETE FROM faces")
c.execute("DELETE FROM tags")
c.execute("DELETE FROM people")
c.execute("DELETE FROM images")
# Reset auto-increment counters
c.execute("DELETE FROM sqlite_sequence")
conn.commit()
print("✅ Database cleaned successfully")
except Exception as e:
print(f"❌ Error cleaning database: {e}")
conn.rollback()
finally:
conn.close()
def delete_database(self):
"""Completely delete the database file"""
if os.path.exists(self.db_path):
# Backup first
self.backup_database()
os.remove(self.db_path)
print(f"✅ Database {self.db_path} deleted")
else:
print(f"Database {self.db_path} does not exist")
def get_database_stats(self) -> Dict:
"""Get comprehensive database statistics"""
if not os.path.exists(self.db_path):
return {"error": "Database does not exist"}
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
try:
stats = {}
# Basic counts
c.execute("SELECT COUNT(*) FROM images")
stats['images'] = c.fetchone()[0]
c.execute("SELECT COUNT(*) FROM faces")
stats['faces'] = c.fetchone()[0]
c.execute("SELECT COUNT(*) FROM faces WHERE person_id IS NOT NULL")
stats['identified_faces'] = c.fetchone()[0]
c.execute("SELECT COUNT(*) FROM people")
stats['people'] = c.fetchone()[0]
c.execute("SELECT COUNT(*) FROM tags")
stats['tags'] = c.fetchone()[0]
c.execute("SELECT COUNT(*) FROM image_tags")
stats['tagged_images'] = c.fetchone()[0]
# Derived stats
stats['unidentified_faces'] = stats['faces'] - stats['identified_faces']
# Top people by face count
c.execute("""SELECT p.name, COUNT(f.id) as face_count
FROM people p
JOIN faces f ON p.id = f.person_id
GROUP BY p.id
ORDER BY face_count DESC
LIMIT 5""")
stats['top_people'] = [{"name": row[0], "faces": row[1]} for row in c.fetchall()]
# Top tags
c.execute("""SELECT t.name, t.category, COUNT(it.image_id) as usage_count
FROM tags t
JOIN image_tags it ON t.id = it.tag_id
GROUP BY t.id
ORDER BY usage_count DESC
LIMIT 5""")
stats['top_tags'] = [{"name": row[0], "category": row[1], "usage": row[2]} for row in c.fetchall()]
# Database file size
stats['file_size_bytes'] = os.path.getsize(self.db_path)
stats['file_size_mb'] = round(stats['file_size_bytes'] / (1024 * 1024), 2)
return stats
except Exception as e:
return {"error": str(e)}
finally:
conn.close()
def inspect_database(self):
"""Detailed inspection of database contents"""
stats = self.get_database_stats()
if "error" in stats:
print(f"{stats['error']}")
return
print("\n📊 DATABASE INSPECTION")
print("=" * 50)
print(f"Database: {self.db_path}")
print(f"File size: {stats['file_size_mb']} MB")
print()
print("📈 COUNTS:")
print(f" Images: {stats['images']}")
print(f" Faces: {stats['faces']}")
print(f" - Identified: {stats['identified_faces']}")
print(f" - Unidentified: {stats['unidentified_faces']}")
print(f" People: {stats['people']}")
print(f" Tags: {stats['tags']}")
print(f" Tagged images: {stats['tagged_images']}")
print()
if stats['top_people']:
print("👥 TOP PEOPLE:")
for person in stats['top_people']:
print(f" {person['name']}: {person['faces']} faces")
print()
if stats['top_tags']:
print("🏷️ TOP TAGS:")
for tag in stats['top_tags']:
category = f"({tag['category']})" if tag['category'] else ""
print(f" {tag['name']} {category}: {tag['usage']} uses")
def list_all_people(self):
"""List all people in the database"""
if not os.path.exists(self.db_path):
print("Database does not exist")
return
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
try:
c.execute("""SELECT p.id, p.name, COUNT(f.id) as face_count, p.created_at
FROM people p
LEFT JOIN faces f ON p.id = f.person_id
GROUP BY p.id
ORDER BY face_count DESC""")
people = c.fetchall()
if not people:
print("No people found in database")
return
print("\n👥 ALL PEOPLE:")
print("-" * 60)
print(f"{'ID':<4} {'Name':<25} {'Faces':<8} {'Created':<15}")
print("-" * 60)
for person_id, name, face_count, created in people:
created_short = created[:10] if created else "N/A"
print(f"{person_id:<4} {name:<25} {face_count:<8} {created_short:<15}")
except Exception as e:
print(f"Error: {e}")
finally:
conn.close()
def list_all_tags(self):
"""List all tags in the database"""
if not os.path.exists(self.db_path):
print("Database does not exist")
return
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
try:
c.execute("""SELECT t.id, t.name, t.category, COUNT(it.image_id) as usage_count
FROM tags t
LEFT JOIN image_tags it ON t.id = it.tag_id
GROUP BY t.id
ORDER BY t.category, usage_count DESC""")
tags = c.fetchall()
if not tags:
print("No tags found in database")
return
print("\n🏷️ ALL TAGS:")
print("-" * 60)
print(f"{'ID':<4} {'Name':<25} {'Category':<15} {'Usage':<8}")
print("-" * 60)
for tag_id, name, category, usage in tags:
category = category or "None"
print(f"{tag_id:<4} {name:<25} {category:<15} {usage:<8}")
except Exception as e:
print(f"Error: {e}")
finally:
conn.close()
def main():
"""Interactive database management"""
import sys
if len(sys.argv) > 1:
db_path = sys.argv[1]
else:
db_path = 'punimtag_simple.db'
manager = DatabaseManager(db_path)
while True:
print("\n🗄️ DATABASE MANAGER")
print("=" * 30)
print("1. Inspect database")
print("2. Clean database (keep schema)")
print("3. Delete database completely")
print("4. Backup database")
print("5. List all people")
print("6. List all tags")
print("7. Exit")
try:
choice = input("\nSelect option (1-7): ").strip()
if choice == '1':
manager.inspect_database()
elif choice == '2':
confirm = input("⚠️ Clean all data? (y/N): ").strip().lower()
if confirm == 'y':
manager.clean_database()
else:
print("Cancelled")
elif choice == '3':
confirm = input("⚠️ Delete database completely? (y/N): ").strip().lower()
if confirm == 'y':
manager.delete_database()
break
else:
print("Cancelled")
elif choice == '4':
backup_name = input("Backup name (or Enter for auto): ").strip()
if not backup_name:
backup_name = None
manager.backup_database(backup_name)
elif choice == '5':
manager.list_all_people()
elif choice == '6':
manager.list_all_tags()
elif choice == '7':
break
else:
print("Invalid option")
except KeyboardInterrupt:
print("\n\nExiting...")
break
except Exception as e:
print(f"Error: {e}")
if __name__ == "__main__":
main()

View File

@ -1,744 +0,0 @@
import os
import sqlite3
import face_recognition
import numpy as np
from PIL import Image
from PIL.ExifTags import TAGS, GPSTAGS
from datetime import datetime
import json
from typing import List, Dict, Tuple, Optional
import pickle
from sklearn.cluster import DBSCAN
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import concurrent.futures
from config import get_config
import dlib
class PunimTag:
def __init__(self, db_path: str = 'punimtag.db', photos_dir: str = 'photos'):
self.db_path = db_path
self.photos_dir = photos_dir
self.config = get_config()
self.conn = self._init_db()
def _init_db(self) -> sqlite3.Connection:
"""Initialize database with comprehensive schema"""
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
# Images table with metadata
c.execute('''CREATE TABLE IF NOT EXISTS images (
id INTEGER PRIMARY KEY AUTOINCREMENT,
path TEXT UNIQUE NOT NULL,
filename TEXT NOT NULL,
date_taken TIMESTAMP,
latitude REAL,
longitude REAL,
camera_make TEXT,
camera_model TEXT,
width INTEGER,
height INTEGER,
file_size INTEGER,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)''')
# People table for identified individuals
c.execute('''CREATE TABLE IF NOT EXISTS people (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT UNIQUE NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)''')
# Faces table with locations and encodings
c.execute('''CREATE TABLE IF NOT EXISTS faces (
id INTEGER PRIMARY KEY AUTOINCREMENT,
image_id INTEGER NOT NULL,
person_id INTEGER,
top INTEGER NOT NULL,
right INTEGER NOT NULL,
bottom INTEGER NOT NULL,
left INTEGER NOT NULL,
encoding BLOB NOT NULL,
confidence REAL,
is_confirmed BOOLEAN DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY(image_id) REFERENCES images(id) ON DELETE CASCADE,
FOREIGN KEY(person_id) REFERENCES people(id) ON DELETE SET NULL
)''')
# Tags table
c.execute('''CREATE TABLE IF NOT EXISTS tags (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT UNIQUE NOT NULL,
category TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)''')
# Image-tag relationship
c.execute('''CREATE TABLE IF NOT EXISTS image_tags (
image_id INTEGER NOT NULL,
tag_id INTEGER NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY(image_id, tag_id),
FOREIGN KEY(image_id) REFERENCES images(id) ON DELETE CASCADE,
FOREIGN KEY(tag_id) REFERENCES tags(id) ON DELETE CASCADE
)''')
# Create indexes for performance
c.execute('CREATE INDEX IF NOT EXISTS idx_faces_person ON faces(person_id)')
c.execute('CREATE INDEX IF NOT EXISTS idx_faces_image ON faces(image_id)')
c.execute('CREATE INDEX IF NOT EXISTS idx_image_tags_image ON image_tags(image_id)')
c.execute('CREATE INDEX IF NOT EXISTS idx_image_tags_tag ON image_tags(tag_id)')
conn.commit()
return conn
def extract_metadata(self, image_path: str) -> Dict:
"""Extract EXIF metadata from image"""
metadata = {
'date_taken': None,
'latitude': None,
'longitude': None,
'camera_make': None,
'camera_model': None,
'width': None,
'height': None,
'file_size': os.path.getsize(image_path)
}
try:
with Image.open(image_path) as img:
metadata['width'], metadata['height'] = img.size
exifdata = img.getexif()
if exifdata:
for tag_id, value in exifdata.items():
tag = TAGS.get(tag_id, tag_id)
if tag == 'DateTime':
metadata['date_taken'] = datetime.strptime(value, '%Y:%m:%d %H:%M:%S')
elif tag == 'Make':
metadata['camera_make'] = value
elif tag == 'Model':
metadata['camera_model'] = value
elif tag == 'GPSInfo':
gps_data = {}
for t in value:
sub_tag = GPSTAGS.get(t, t)
gps_data[sub_tag] = value[t]
# Extract GPS coordinates
if 'GPSLatitude' in gps_data and 'GPSLongitude' in gps_data:
lat = self._convert_to_degrees(gps_data['GPSLatitude'])
lon = self._convert_to_degrees(gps_data['GPSLongitude'])
if gps_data.get('GPSLatitudeRef') == 'S':
lat = -lat
if gps_data.get('GPSLongitudeRef') == 'W':
lon = -lon
metadata['latitude'] = lat
metadata['longitude'] = lon
except Exception as e:
print(f"Error extracting metadata from {image_path}: {e}")
return metadata
def _convert_to_degrees(self, value):
"""Convert GPS coordinates to degrees"""
d, m, s = value
return d + (m / 60.0) + (s / 3600.0)
def process_image(self, image_path: str) -> int:
"""Process a single image and return its database ID"""
c = self.conn.cursor()
# Extract metadata
metadata = self.extract_metadata(image_path)
filename = os.path.basename(image_path)
# Insert or update image record
c.execute('''INSERT OR REPLACE INTO images
(path, filename, date_taken, latitude, longitude,
camera_make, camera_model, width, height, file_size)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''',
(image_path, filename, metadata['date_taken'],
metadata['latitude'], metadata['longitude'],
metadata['camera_make'], metadata['camera_model'],
metadata['width'], metadata['height'], metadata['file_size']))
image_id = c.lastrowid
# Detect and process faces
try:
image = face_recognition.load_image_file(image_path)
model = self.config.face_recognition.detection_model if dlib.DLIB_USE_CUDA and self.config.face_recognition.enable_gpu else 'hog'
face_locations = face_recognition.face_locations(image, model=model)
face_encodings = face_recognition.face_encodings(image, face_locations)
for location, encoding in zip(face_locations, face_encodings):
top, right, bottom, left = location
encoding_blob = pickle.dumps(encoding)
# Try to identify the person
person_id, confidence = self.identify_face(encoding)
c.execute('''INSERT INTO faces
(image_id, person_id, top, right, bottom, left, encoding, confidence)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)''',
(image_id, person_id, top, right, bottom, left, encoding_blob, confidence))
except Exception as e:
print(f"Error processing faces in {image_path}: {e}")
self.conn.commit()
return image_id
def identify_face(self, unknown_encoding: np.ndarray, threshold: float = 0.6) -> Tuple[Optional[int], Optional[float]]:
"""Identify a face by comparing with known faces"""
c = self.conn.cursor()
# Get all known face encodings
c.execute('''SELECT f.person_id, f.encoding
FROM faces f
WHERE f.person_id IS NOT NULL
AND f.is_confirmed = 1''')
known_faces = c.fetchall()
if not known_faces:
return None, None
# Group encodings by person
person_encodings = {}
for person_id, encoding_blob in known_faces:
encoding = pickle.loads(encoding_blob)
if person_id not in person_encodings:
person_encodings[person_id] = []
person_encodings[person_id].append(encoding)
# Compare with each person's encodings
best_match = None
best_distance = float('inf')
for person_id, encodings in person_encodings.items():
distances = face_recognition.face_distance(encodings, unknown_encoding)
min_distance = np.min(distances)
if min_distance < best_distance and min_distance < threshold:
best_distance = min_distance
best_match = person_id
if best_match:
confidence = 1.0 - best_distance
return best_match, confidence
return None, None
def add_person(self, name: str) -> int:
"""Add a new person to the database"""
c = self.conn.cursor()
c.execute('INSERT OR IGNORE INTO people (name) VALUES (?)', (name,))
self.conn.commit()
c.execute('SELECT id FROM people WHERE name = ?', (name,))
result = c.fetchone()
if result:
return result[0]
else:
# This shouldn't happen due to INSERT OR IGNORE, but handle it
c.execute('SELECT id FROM people WHERE name = ?', (name,))
return c.fetchone()[0]
def assign_face_to_person(self, face_id: int, person_id: int, is_confirmed: bool = True):
"""Assign a face to a person"""
c = self.conn.cursor()
c.execute('''UPDATE faces
SET person_id = ?, is_confirmed = ?
WHERE id = ?''',
(person_id, is_confirmed, face_id))
self.conn.commit()
def add_tag(self, name: str, category: Optional[str] = None) -> int:
"""Add a new tag"""
c = self.conn.cursor()
c.execute('INSERT OR IGNORE INTO tags (name, category) VALUES (?, ?)',
(name, category))
self.conn.commit()
c.execute('SELECT id FROM tags WHERE name = ?', (name,))
return c.fetchone()[0]
def tag_image(self, image_id: int, tag_id: int):
"""Add a tag to an image"""
c = self.conn.cursor()
c.execute('INSERT OR IGNORE INTO image_tags (image_id, tag_id) VALUES (?, ?)',
(image_id, tag_id))
self.conn.commit()
def get_unidentified_faces(self) -> List[Dict]:
"""Get all faces that haven't been identified"""
c = self.conn.cursor()
c.execute('''SELECT f.id, f.image_id, i.path, f.top, f.right, f.bottom, f.left
FROM faces f
JOIN images i ON f.image_id = i.id
WHERE f.person_id IS NULL
ORDER BY i.path''')
faces = []
for row in c.fetchall():
faces.append({
'face_id': row[0],
'image_id': row[1],
'image_path': row[2],
'location': (row[3], row[4], row[5], row[6])
})
return faces
def search_images(self, people: Optional[List[str]] = None, tags: Optional[List[str]] = None,
date_from: Optional[datetime] = None, date_to: Optional[datetime] = None) -> List[Dict]:
"""Search images by people, tags, and date range"""
c = self.conn.cursor()
query = '''SELECT DISTINCT i.id, i.path, i.filename, i.date_taken
FROM images i'''
joins = []
conditions = []
params = []
if people:
joins.append('JOIN faces f ON i.id = f.image_id')
joins.append('JOIN people p ON f.person_id = p.id')
placeholders = ','.join(['?' for _ in people])
conditions.append(f'p.name IN ({placeholders})')
params.extend(people)
if tags:
joins.append('JOIN image_tags it ON i.id = it.image_id')
joins.append('JOIN tags t ON it.tag_id = t.id')
placeholders = ','.join(['?' for _ in tags])
conditions.append(f't.name IN ({placeholders})')
params.extend(tags)
if date_from:
conditions.append('i.date_taken >= ?')
params.append(date_from)
if date_to:
conditions.append('i.date_taken <= ?')
params.append(date_to)
if joins:
query += ' ' + ' '.join(joins)
if conditions:
query += ' WHERE ' + ' AND '.join(conditions)
query += ' ORDER BY i.date_taken DESC'
c.execute(query, params)
results = []
for row in c.fetchall():
results.append({
'id': row[0],
'path': row[1],
'filename': row[2],
'date_taken': row[3]
})
return results
def process_directory(self):
"""Process all images in the photos directory"""
image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.gif'}
processed = 0
for root, _, files in os.walk(self.photos_dir):
for file in files:
if any(file.lower().endswith(ext) for ext in image_extensions):
image_path = os.path.join(root, file)
print(f"Processing: {image_path}")
try:
self.process_image(image_path)
processed += 1
except Exception as e:
print(f"Error processing {image_path}: {e}")
print(f"\nProcessed {processed} images")
return processed
def calculate_face_quality(self, face_encoding: np.ndarray, face_location: Tuple[int, int, int, int]) -> float:
"""Calculate face quality score based on size and encoding variance"""
top, right, bottom, left = face_location
# Calculate face size
face_width = right - left
face_height = bottom - top
face_area = face_width * face_height
# Normalize face size (assuming typical face sizes)
size_score = min(face_area / (100 * 100), 1.0) # Normalize to 100x100 baseline
# Calculate encoding variance (higher variance = more distinctive features)
encoding_variance = np.var(face_encoding)
variance_score = min(encoding_variance / 0.01, 1.0) # Normalize to typical variance
# Combined quality score
quality_score = (size_score * 0.3) + (variance_score * 0.7)
return quality_score
def cluster_unknown_faces(self) -> Dict[int, List[int]]:
"""Cluster unidentified faces to group similar faces together"""
c = self.conn.cursor()
# Get all unidentified faces
c.execute('''SELECT id, encoding FROM faces WHERE person_id IS NULL''')
unidentified_faces = c.fetchall()
if len(unidentified_faces) < 2:
return {}
print(f"Clustering {len(unidentified_faces)} unidentified faces...")
# Extract encodings
face_ids = []
encodings = []
for face_id, encoding_blob in unidentified_faces:
face_ids.append(face_id)
encoding = pickle.loads(encoding_blob)
encodings.append(encoding)
encodings = np.array(encodings)
# Apply DBSCAN clustering
clustering = DBSCAN(
eps=self.config.face_recognition.cluster_epsilon,
min_samples=self.config.face_recognition.cluster_min_size,
metric='euclidean'
).fit(encodings)
# Group faces by cluster
clusters = {}
for i, cluster_id in enumerate(clustering.labels_):
if cluster_id != -1: # Ignore noise points
if cluster_id not in clusters:
clusters[cluster_id] = []
clusters[cluster_id].append(face_ids[i])
print(f"Found {len(clusters)} face clusters")
return clusters
def get_face_clusters(self) -> List[Dict]:
"""Get all face clusters with representative faces"""
clusters = self.cluster_unknown_faces()
c = self.conn.cursor()
cluster_data = []
for cluster_id, face_ids in clusters.items():
# Get representative face (first face in cluster for now)
representative_face_id = face_ids[0]
# Get face details
c.execute('''SELECT f.id, f.image_id, i.path, f.top, f.right, f.bottom, f.left
FROM faces f
JOIN images i ON f.image_id = i.id
WHERE f.id = ?''', (representative_face_id,))
face_info = c.fetchone()
if face_info:
cluster_data.append({
'cluster_id': cluster_id,
'face_count': len(face_ids),
'face_ids': face_ids,
'representative_face': {
'face_id': face_info[0],
'image_id': face_info[1],
'image_path': face_info[2],
'location': (face_info[3], face_info[4], face_info[5], face_info[6])
}
})
# Sort by face count (most common faces first)
cluster_data.sort(key=lambda x: x['face_count'], reverse=True)
return cluster_data
def assign_cluster_to_person(self, cluster_id: int, person_name: str):
"""Assign all faces in a cluster to a person"""
clusters = self.cluster_unknown_faces()
if cluster_id not in clusters:
return False
# Add or get person
person_id = self.add_person(person_name)
# Assign all faces in cluster
for face_id in clusters[cluster_id]:
self.assign_face_to_person(face_id, person_id, is_confirmed=True)
print(f"Assigned {len(clusters[cluster_id])} faces to {person_name}")
return True
def get_most_common_faces(self, limit: int = 20) -> List[Dict]:
"""Get faces sorted by frequency (most photographed people)"""
c = self.conn.cursor()
# Get identified people with face counts
c.execute('''SELECT p.id, p.name, COUNT(f.id) as face_count,
MIN(f.id) as sample_face_id
FROM people p
JOIN faces f ON p.id = f.person_id
WHERE f.is_confirmed = 1
GROUP BY p.id
ORDER BY face_count DESC
LIMIT ?''', (limit,))
people_data = []
for person_id, name, face_count, sample_face_id in c.fetchall():
# Get sample face details
c.execute('''SELECT f.image_id, i.path, f.top, f.right, f.bottom, f.left
FROM faces f
JOIN images i ON f.image_id = i.id
WHERE f.id = ?''', (sample_face_id,))
face_info = c.fetchone()
if face_info:
people_data.append({
'person_id': person_id,
'name': name,
'face_count': face_count,
'sample_face': {
'face_id': sample_face_id,
'image_id': face_info[0],
'image_path': face_info[1],
'location': (face_info[2], face_info[3], face_info[4], face_info[5])
}
})
return people_data
def verify_person_faces(self, person_id: int) -> List[Dict]:
"""Get all faces assigned to a person for verification"""
c = self.conn.cursor()
c.execute('''SELECT f.id, f.image_id, i.path, f.top, f.right, f.bottom, f.left, f.confidence
FROM faces f
JOIN images i ON f.image_id = i.id
WHERE f.person_id = ?
ORDER BY f.confidence DESC''', (person_id,))
faces = []
for row in c.fetchall():
faces.append({
'face_id': row[0],
'image_id': row[1],
'image_path': row[2],
'location': (row[3], row[4], row[5], row[6]),
'confidence': row[7]
})
return faces
def remove_incorrect_face_assignment(self, face_id: int):
"""Remove incorrect face assignment (set person_id to NULL)"""
c = self.conn.cursor()
c.execute('UPDATE faces SET person_id = NULL, is_confirmed = 0 WHERE id = ?', (face_id,))
self.conn.commit()
def batch_process_images(self, image_paths: List[str], batch_size: int = None) -> Dict[str, int]:
"""Process images in batches for better performance"""
if batch_size is None:
batch_size = self.config.processing.batch_size
results = {
'processed': 0,
'errors': 0,
'skipped': 0,
'faces_detected': 0
}
print(f"Processing {len(image_paths)} images in batches of {batch_size}")
for i in range(0, len(image_paths), batch_size):
batch = image_paths[i:i + batch_size]
print(f"Processing batch {i//batch_size + 1}/{(len(image_paths) + batch_size - 1)//batch_size}")
for image_path in batch:
try:
# Check if already processed
if self.config.processing.skip_processed:
c = self.conn.cursor()
c.execute('SELECT id FROM images WHERE path = ?', (image_path,))
if c.fetchone():
results['skipped'] += 1
continue
# Process image
image_id = self.process_image(image_path)
# Count faces detected
c = self.conn.cursor()
c.execute('SELECT COUNT(*) FROM faces WHERE image_id = ?', (image_id,))
face_count = c.fetchone()[0]
results['processed'] += 1
results['faces_detected'] += face_count
except Exception as e:
print(f"Error processing {image_path}: {e}")
results['errors'] += 1
return results
def advanced_search(self, **kwargs) -> List[Dict]:
"""Advanced search with multiple criteria and complex queries"""
c = self.conn.cursor()
# Base query
query = '''SELECT DISTINCT i.id, i.path, i.filename, i.date_taken,
i.latitude, i.longitude, i.camera_make, i.camera_model'''
from_clause = ' FROM images i'
joins = []
conditions = []
params = []
# People filter
if 'people' in kwargs and kwargs['people']:
joins.append('JOIN faces f ON i.id = f.image_id')
joins.append('JOIN people p ON f.person_id = p.id')
people_names = kwargs['people']
if isinstance(people_names, str):
people_names = [people_names]
placeholders = ','.join(['?' for _ in people_names])
conditions.append(f'p.name IN ({placeholders})')
params.extend(people_names)
# Tags filter
if 'tags' in kwargs and kwargs['tags']:
joins.append('JOIN image_tags it ON i.id = it.image_id')
joins.append('JOIN tags t ON it.tag_id = t.id')
tags = kwargs['tags']
if isinstance(tags, str):
tags = [tags]
placeholders = ','.join(['?' for _ in tags])
conditions.append(f't.name IN ({placeholders})')
params.extend(tags)
# Date range filters
if 'date_from' in kwargs and kwargs['date_from']:
conditions.append('i.date_taken >= ?')
params.append(kwargs['date_from'])
if 'date_to' in kwargs and kwargs['date_to']:
conditions.append('i.date_taken <= ?')
params.append(kwargs['date_to'])
# Location filters
if 'latitude_min' in kwargs and kwargs['latitude_min']:
conditions.append('i.latitude >= ?')
params.append(kwargs['latitude_min'])
if 'latitude_max' in kwargs and kwargs['latitude_max']:
conditions.append('i.latitude <= ?')
params.append(kwargs['latitude_max'])
if 'longitude_min' in kwargs and kwargs['longitude_min']:
conditions.append('i.longitude >= ?')
params.append(kwargs['longitude_min'])
if 'longitude_max' in kwargs and kwargs['longitude_max']:
conditions.append('i.longitude <= ?')
params.append(kwargs['longitude_max'])
# Camera filters
if 'camera_make' in kwargs and kwargs['camera_make']:
conditions.append('i.camera_make LIKE ?')
params.append(f"%{kwargs['camera_make']}%")
# Multiple people requirement
if 'min_people' in kwargs and kwargs['min_people']:
if 'people' not in kwargs: # Add people join if not already added
joins.append('JOIN faces f ON i.id = f.image_id')
joins.append('JOIN people p ON f.person_id = p.id')
# This requires a subquery to count distinct people per image
having_clause = f' HAVING COUNT(DISTINCT p.id) >= {kwargs["min_people"]}'
else:
having_clause = ''
# Build final query
full_query = query + from_clause
if joins:
full_query += ' ' + ' '.join(joins)
if conditions:
full_query += ' WHERE ' + ' AND '.join(conditions)
# Group by image to handle multiple joins
full_query += ' GROUP BY i.id'
if having_clause:
full_query += having_clause
# Order by date
full_query += ' ORDER BY i.date_taken DESC'
# Limit results
if 'limit' in kwargs and kwargs['limit']:
full_query += f' LIMIT {kwargs["limit"]}'
c.execute(full_query, params)
results = []
for row in c.fetchall():
results.append({
'id': row[0],
'path': row[1],
'filename': row[2],
'date_taken': row[3],
'latitude': row[4],
'longitude': row[5],
'camera_make': row[6],
'camera_model': row[7]
})
return results
def close(self):
"""Close database connection"""
self.conn.close()
def main():
"""Main entry point"""
tagger = PunimTag()
print("PunimTag - Face Recognition and Photo Tagging System")
print("=" * 50)
# Process all images
tagger.process_directory()
# Show unidentified faces count
unidentified = tagger.get_unidentified_faces()
print(f"\nFound {len(unidentified)} unidentified faces")
tagger.close()
if __name__ == "__main__":
main()

View File

@ -1,462 +0,0 @@
#!/usr/bin/env python3
"""
Simplified PunimTag for initial testing
Core functionality without advanced clustering (sklearn dependency)
"""
import os
import sqlite3
import face_recognition
import numpy as np
from PIL import Image
from PIL.ExifTags import TAGS, GPSTAGS
from datetime import datetime
import json
from typing import List, Dict, Tuple, Optional
import pickle
class SimplePunimTag:
def __init__(self, db_path: str = 'punimtag_simple.db', photos_dir: str = 'photos'):
self.db_path = db_path
self.photos_dir = photos_dir
self.conn = self._init_db()
def _init_db(self) -> sqlite3.Connection:
"""Initialize database with comprehensive schema"""
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
# Images table with metadata
c.execute('''CREATE TABLE IF NOT EXISTS images (
id INTEGER PRIMARY KEY AUTOINCREMENT,
path TEXT UNIQUE NOT NULL,
filename TEXT NOT NULL,
date_taken TIMESTAMP,
latitude REAL,
longitude REAL,
camera_make TEXT,
camera_model TEXT,
width INTEGER,
height INTEGER,
file_size INTEGER,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)''')
# People table for identified individuals
c.execute('''CREATE TABLE IF NOT EXISTS people (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT UNIQUE NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)''')
# Faces table with locations and encodings
c.execute('''CREATE TABLE IF NOT EXISTS faces (
id INTEGER PRIMARY KEY AUTOINCREMENT,
image_id INTEGER NOT NULL,
person_id INTEGER,
top INTEGER NOT NULL,
right INTEGER NOT NULL,
bottom INTEGER NOT NULL,
left INTEGER NOT NULL,
encoding BLOB NOT NULL,
confidence REAL,
is_confirmed BOOLEAN DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY(image_id) REFERENCES images(id) ON DELETE CASCADE,
FOREIGN KEY(person_id) REFERENCES people(id) ON DELETE SET NULL
)''')
# Tags table
c.execute('''CREATE TABLE IF NOT EXISTS tags (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT UNIQUE NOT NULL,
category TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)''')
# Image-tag relationship
c.execute('''CREATE TABLE IF NOT EXISTS image_tags (
image_id INTEGER NOT NULL,
tag_id INTEGER NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY(image_id, tag_id),
FOREIGN KEY(image_id) REFERENCES images(id) ON DELETE CASCADE,
FOREIGN KEY(tag_id) REFERENCES tags(id) ON DELETE CASCADE
)''')
# Create indexes for performance
c.execute('CREATE INDEX IF NOT EXISTS idx_faces_person ON faces(person_id)')
c.execute('CREATE INDEX IF NOT EXISTS idx_faces_image ON faces(image_id)')
c.execute('CREATE INDEX IF NOT EXISTS idx_image_tags_image ON image_tags(image_id)')
c.execute('CREATE INDEX IF NOT EXISTS idx_image_tags_tag ON image_tags(tag_id)')
conn.commit()
return conn
def extract_metadata(self, image_path: str) -> Dict:
"""Extract EXIF metadata from image with better error handling"""
metadata = {
'date_taken': None,
'latitude': None,
'longitude': None,
'camera_make': None,
'camera_model': None,
'width': None,
'height': None,
'file_size': None
}
try:
# Get file size
if os.path.exists(image_path):
metadata['file_size'] = os.path.getsize(image_path)
else:
print(f"Warning: File not found: {image_path}")
return metadata
# Try to open image
img = Image.open(image_path)
metadata['width'], metadata['height'] = img.size
# Extract EXIF data
exifdata = img.getexif()
if exifdata:
for tag_id, value in exifdata.items():
tag = TAGS.get(tag_id, tag_id)
try:
if tag == 'DateTime':
metadata['date_taken'] = datetime.strptime(value, '%Y:%m:%d %H:%M:%S')
elif tag == 'Make':
metadata['camera_make'] = str(value).strip()
elif tag == 'Model':
metadata['camera_model'] = str(value).strip()
elif tag == 'GPSInfo':
gps_data = {}
for t in value:
sub_tag = GPSTAGS.get(t, t)
gps_data[sub_tag] = value[t]
# Extract GPS coordinates
if 'GPSLatitude' in gps_data and 'GPSLongitude' in gps_data:
try:
lat = self._convert_to_degrees(gps_data['GPSLatitude'])
lon = self._convert_to_degrees(gps_data['GPSLongitude'])
if gps_data.get('GPSLatitudeRef') == 'S':
lat = -lat
if gps_data.get('GPSLongitudeRef') == 'W':
lon = -lon
metadata['latitude'] = lat
metadata['longitude'] = lon
except Exception as e:
print(f"Error parsing GPS data: {e}")
except Exception as e:
print(f"Error parsing EXIF tag {tag}: {e}")
except Exception as e:
print(f"Error extracting metadata from {image_path}: {e}")
# Set defaults for missing values
for key, value in metadata.items():
if value is None and key not in ['date_taken', 'latitude', 'longitude']:
metadata[key] = 'N/A'
return metadata
def _convert_to_degrees(self, value):
"""Convert GPS coordinates to degrees"""
if len(value) == 3:
d, m, s = value
return d + (m / 60.0) + (s / 3600.0)
return 0.0
def process_image(self, image_path: str) -> int:
"""Process a single image and return its database ID"""
c = self.conn.cursor()
print(f"Processing: {image_path}")
# Extract metadata
metadata = self.extract_metadata(image_path)
filename = os.path.basename(image_path)
# Insert or update image record
c.execute('''INSERT OR REPLACE INTO images
(path, filename, date_taken, latitude, longitude,
camera_make, camera_model, width, height, file_size)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''',
(image_path, filename, metadata['date_taken'],
metadata['latitude'], metadata['longitude'],
metadata['camera_make'], metadata['camera_model'],
metadata['width'], metadata['height'], metadata['file_size']))
image_id = c.lastrowid
# Detect and process faces
try:
image = face_recognition.load_image_file(image_path)
face_locations = face_recognition.face_locations(image, model='hog')
face_encodings = face_recognition.face_encodings(image, face_locations)
print(f" Found {len(face_locations)} faces")
for location, encoding in zip(face_locations, face_encodings):
top, right, bottom, left = location
encoding_blob = pickle.dumps(encoding)
# Try to identify the person
person_id, confidence = self.identify_face(encoding)
c.execute('''INSERT INTO faces
(image_id, person_id, top, right, bottom, left, encoding, confidence)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)''',
(image_id, person_id, top, right, bottom, left, encoding_blob, confidence))
except Exception as e:
print(f" Error processing faces: {e}")
self.conn.commit()
return image_id
def identify_face(self, unknown_encoding: np.ndarray, threshold: float = 0.6) -> Tuple[Optional[int], Optional[float]]:
"""Identify a face by comparing with known faces"""
c = self.conn.cursor()
# Get all known face encodings
c.execute('''SELECT f.person_id, f.encoding
FROM faces f
WHERE f.person_id IS NOT NULL
AND f.is_confirmed = 1''')
known_faces = c.fetchall()
if not known_faces:
return None, None
# Group encodings by person
person_encodings = {}
for person_id, encoding_blob in known_faces:
encoding = pickle.loads(encoding_blob)
if person_id not in person_encodings:
person_encodings[person_id] = []
person_encodings[person_id].append(encoding)
# Compare with each person's encodings
best_match = None
best_distance = float('inf')
for person_id, encodings in person_encodings.items():
distances = face_recognition.face_distance(encodings, unknown_encoding)
min_distance = np.min(distances)
if min_distance < best_distance and min_distance < threshold:
best_distance = min_distance
best_match = person_id
if best_match:
confidence = 1.0 - best_distance
return best_match, confidence
return None, None
def add_person(self, name: str) -> int:
"""Add a new person to the database"""
c = self.conn.cursor()
c.execute('INSERT OR IGNORE INTO people (name) VALUES (?)', (name,))
self.conn.commit()
c.execute('SELECT id FROM people WHERE name = ?', (name,))
result = c.fetchone()
return result[0] if result else None
def assign_face_to_person(self, face_id: int, person_id: int, is_confirmed: bool = True):
"""Assign a face to a person"""
c = self.conn.cursor()
c.execute('''UPDATE faces
SET person_id = ?, is_confirmed = ?
WHERE id = ?''',
(person_id, is_confirmed, face_id))
self.conn.commit()
def add_tag(self, name: str, category: str = None) -> int:
"""Add a new tag"""
c = self.conn.cursor()
c.execute('INSERT OR IGNORE INTO tags (name, category) VALUES (?, ?)',
(name, category))
self.conn.commit()
c.execute('SELECT id FROM tags WHERE name = ?', (name,))
result = c.fetchone()
return result[0] if result else None
def tag_image(self, image_id: int, tag_id: int):
"""Add a tag to an image"""
c = self.conn.cursor()
c.execute('INSERT OR IGNORE INTO image_tags (image_id, tag_id) VALUES (?, ?)',
(image_id, tag_id))
self.conn.commit()
def get_unidentified_faces(self) -> List[Dict]:
"""Get all faces that haven't been identified"""
c = self.conn.cursor()
c.execute('''SELECT f.id, f.image_id, i.path, f.top, f.right, f.bottom, f.left
FROM faces f
JOIN images i ON f.image_id = i.id
WHERE f.person_id IS NULL
ORDER BY i.path''')
faces = []
for row in c.fetchall():
faces.append({
'face_id': row[0],
'image_id': row[1],
'image_path': row[2],
'location': (row[3], row[4], row[5], row[6])
})
return faces
def simple_search(self, people: List[str] = None, tags: List[str] = None,
date_from: datetime = None, date_to: datetime = None) -> List[Dict]:
"""Simple search by people, tags, and date range"""
c = self.conn.cursor()
query = '''SELECT DISTINCT i.id, i.path, i.filename, i.date_taken
FROM images i'''
joins = []
conditions = []
params = []
if people:
joins.append('JOIN faces f ON i.id = f.image_id')
joins.append('JOIN people p ON f.person_id = p.id')
placeholders = ','.join(['?' for _ in people])
conditions.append(f'p.name IN ({placeholders})')
params.extend(people)
if tags:
joins.append('JOIN image_tags it ON i.id = it.image_id')
joins.append('JOIN tags t ON it.tag_id = t.id')
placeholders = ','.join(['?' for _ in tags])
conditions.append(f't.name IN ({placeholders})')
params.extend(tags)
if date_from:
conditions.append('i.date_taken >= ?')
params.append(date_from)
if date_to:
conditions.append('i.date_taken <= ?')
params.append(date_to)
if joins:
query += ' ' + ' '.join(joins)
if conditions:
query += ' WHERE ' + ' AND '.join(conditions)
query += ' ORDER BY i.date_taken DESC'
c.execute(query, params)
results = []
for row in c.fetchall():
results.append({
'id': row[0],
'path': row[1],
'filename': row[2],
'date_taken': row[3]
})
return results
def process_directory(self):
"""Process all images in the photos directory"""
image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.gif'}
processed = 0
errors = 0
for root, _, files in os.walk(self.photos_dir):
for file in files:
if any(file.lower().endswith(ext) for ext in image_extensions):
image_path = os.path.join(root, file)
try:
self.process_image(image_path)
processed += 1
except Exception as e:
print(f"Error processing {image_path}: {e}")
errors += 1
print(f"\nProcessed {processed} images, {errors} errors")
return processed
def get_stats(self) -> Dict:
"""Get database statistics"""
c = self.conn.cursor()
c.execute("SELECT COUNT(*) FROM images")
image_count = c.fetchone()[0]
c.execute("SELECT COUNT(*) FROM faces")
face_count = c.fetchone()[0]
c.execute("SELECT COUNT(*) FROM faces WHERE person_id IS NOT NULL")
identified_faces = c.fetchone()[0]
c.execute("SELECT COUNT(*) FROM people")
people_count = c.fetchone()[0]
c.execute("SELECT COUNT(*) FROM tags")
tag_count = c.fetchone()[0]
return {
'images': image_count,
'faces': face_count,
'identified_faces': identified_faces,
'unidentified_faces': face_count - identified_faces,
'people': people_count,
'tags': tag_count
}
def close(self):
"""Close database connection"""
self.conn.close()
def main():
"""Main entry point for testing"""
print("SimplePunimTag - Testing Backend")
print("=" * 50)
tagger = SimplePunimTag()
# Get initial stats
stats = tagger.get_stats()
print(f"Initial stats: {stats}")
# Process images if photos directory exists
if os.path.exists(tagger.photos_dir):
processed = tagger.process_directory()
# Show final stats
final_stats = tagger.get_stats()
print(f"\nFinal stats: {final_stats}")
# Show unidentified faces
unidentified = tagger.get_unidentified_faces()
print(f"Unidentified faces: {len(unidentified)}")
else:
print(f"Photos directory '{tagger.photos_dir}' not found")
print("Create the directory and add some photos to test")
tagger.close()
print("\nTesting completed!")
if __name__ == "__main__":
main()

View File

@ -1,204 +0,0 @@
#!/usr/bin/env python3
"""
Visual Face Identifier for PunimTag
Shows face crops so you can see who you're identifying
"""
import sqlite3
import os
from PIL import Image
import subprocess
import sys
class VisualFaceIdentifier:
def __init__(self, db_path='punimtag_simple.db'):
self.db_path = db_path
def get_unidentified_faces(self, limit=10):
"""Get a limited number of unidentified faces"""
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
c.execute('''SELECT f.id, f.image_id, i.path, i.filename, f.top, f.right, f.bottom, f.left
FROM faces f
JOIN images i ON f.image_id = i.id
WHERE f.person_id IS NULL
LIMIT ?''', (limit,))
faces = c.fetchall()
conn.close()
return faces
def extract_face_crop(self, image_path, top, right, bottom, left):
"""Extract and save a face crop"""
try:
if not os.path.exists(image_path):
return None
with Image.open(image_path) as img:
# Crop the face with some padding
padding = 20
crop_top = max(0, top - padding)
crop_left = max(0, left - padding)
crop_bottom = min(img.height, bottom + padding)
crop_right = min(img.width, right + padding)
face_crop = img.crop((crop_left, crop_top, crop_right, crop_bottom))
# Save temporary crop
temp_path = f"temp_face_crop_{os.getpid()}.jpg"
face_crop.save(temp_path, "JPEG")
return temp_path
except Exception as e:
print(f"Error extracting face: {e}")
return None
def open_image(self, image_path):
"""Open image with default system viewer"""
try:
import platform
if platform.system() == "Windows":
# For Windows
os.startfile(image_path)
return True
elif image_path.startswith('/mnt/c/'):
# Convert WSL path to Windows path for explorer
win_path = image_path.replace('/mnt/c/', 'C:\\').replace('/', '\\')
subprocess.run(['explorer.exe', win_path], check=True)
return True
else:
# For Linux/Mac
subprocess.run(['xdg-open', image_path], check=True)
return True
except Exception as e:
print(f"Could not open image: {e}")
return False
def add_person(self, name):
"""Add a new person"""
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
c.execute('INSERT OR IGNORE INTO people (name) VALUES (?)', (name,))
c.execute('SELECT id FROM people WHERE name = ?', (name,))
person_id = c.fetchone()[0]
conn.commit()
conn.close()
return person_id
def assign_face(self, face_id, person_id):
"""Assign a face to a person"""
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
c.execute('UPDATE faces SET person_id = ?, is_confirmed = 1 WHERE id = ?',
(person_id, face_id))
conn.commit()
conn.close()
def run_visual_identifier(self):
"""Run visual identifier with image viewing"""
print("\n🏷️ Visual Face Identifier")
print("=" * 50)
print("This will show you face crops to help identify people")
print()
faces = self.get_unidentified_faces(20) # Process 20 at a time
if not faces:
print("No unidentified faces found!")
return
print(f"Found {len(faces)} unidentified faces to process...")
print("Commands:")
print(" - Enter person's name to identify")
print(" - 's' to skip")
print(" - 'o' to open original image")
print(" - 'q' to quit")
print()
try:
for i, (face_id, image_id, path, filename, top, right, bottom, left) in enumerate(faces):
print(f"\n{'='*60}")
print(f"Face {i+1}/{len(faces)}")
print(f"📁 File: {filename}")
print(f"📍 Face location: top={top}, right={right}, bottom={bottom}, left={left}")
# Check if original file exists
if not os.path.exists(path):
print("⚠️ Original image file not found, skipping...")
continue
# Extract and show face crop
face_crop_path = self.extract_face_crop(path, top, right, bottom, left)
if face_crop_path:
print(f"🖼️ Face crop saved as: {face_crop_path}")
print("📖 Opening face crop...")
if self.open_image(face_crop_path):
print("✅ Face crop opened in image viewer")
else:
print("❌ Could not open image viewer")
print(f" You can manually open: {face_crop_path}")
while True:
response = input(f"\n👤 Who is this person? (name/'s'/'o'/'q'): ").strip()
if response.lower() == 'q':
print("🛑 Quitting...")
return
elif response.lower() == 's':
print("⏭️ Skipped")
break
elif response.lower() == 'o':
print("📖 Opening original image...")
if self.open_image(path):
print("✅ Original image opened")
else:
print(f"❌ Could not open: {path}")
elif response:
try:
person_id = self.add_person(response)
self.assign_face(face_id, person_id)
print(f"✅ Identified as '{response}'")
break
except Exception as e:
print(f"❌ Error: {e}")
else:
print("Please enter a name, 's', 'o', or 'q'")
# Clean up this face crop
if face_crop_path and os.path.exists(face_crop_path):
os.remove(face_crop_path)
finally:
# Clean up any remaining temp files
for file in os.listdir('.'):
if file.startswith(f'temp_face_crop_{os.getpid()}'):
try:
os.remove(file)
except:
pass
print(f"\n🎉 Completed processing!")
# Show remaining count
remaining = self.get_remaining_count()
if remaining > 0:
print(f"📊 {remaining} unidentified faces remaining")
print("Run the script again to continue identifying faces")
else:
print("🏆 All faces have been identified!")
def get_remaining_count(self):
"""Get count of remaining unidentified faces"""
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
c.execute('SELECT COUNT(*) FROM faces WHERE person_id IS NULL')
count = c.fetchone()[0]
conn.close()
return count
if __name__ == "__main__":
identifier = VisualFaceIdentifier()
identifier.run_visual_identifier()

View File

@ -1,431 +0,0 @@
#!/usr/bin/env python3
"""
Web GUI for PunimTag using Flask
Face clustering and identification interface
"""
from flask import Flask, render_template, request, jsonify, send_from_directory
import os
import sqlite3
import base64
from io import BytesIO
from PIL import Image
import pickle
import numpy as np
from typing import List, Dict
app = Flask(__name__)
DB_PATH = 'punimtag_simple.db'
def get_face_clusters() -> List[Dict]:
"""Get face clusters using simple clustering"""
if not os.path.exists(DB_PATH):
return []
conn = sqlite3.connect(DB_PATH)
c = conn.cursor()
try:
# Get unidentified faces
c.execute('''SELECT f.id, f.image_id, i.path, f.top, f.right, f.bottom, f.left, f.encoding
FROM faces f
JOIN images i ON f.image_id = i.id
WHERE f.person_id IS NULL''')
faces = c.fetchall()
if len(faces) < 2:
return []
# Simple clustering by face encoding similarity
clusters = []
used_faces = set()
for i, face1 in enumerate(faces):
if face1[0] in used_faces:
continue
cluster_faces = [face1]
used_faces.add(face1[0])
encoding1 = pickle.loads(face1[7])
# Find similar faces
for j, face2 in enumerate(faces[i+1:], i+1):
if face2[0] in used_faces:
continue
encoding2 = pickle.loads(face2[7])
# Calculate similarity using numpy
distance = np.linalg.norm(encoding1 - encoding2)
if distance < 0.8: # Similar faces
cluster_faces.append(face2)
used_faces.add(face2[0])
# Only create cluster if it has multiple faces
if len(cluster_faces) >= 2:
cluster_data = {
'cluster_id': len(clusters),
'face_count': len(cluster_faces),
'faces': []
}
for face in cluster_faces:
cluster_data['faces'].append({
'face_id': face[0],
'image_id': face[1],
'image_path': face[2],
'location': (face[3], face[4], face[5], face[6])
})
clusters.append(cluster_data)
# Sort by face count (largest clusters first)
clusters.sort(key=lambda x: x['face_count'], reverse=True)
return clusters
except Exception as e:
print(f"Error in clustering: {e}")
return []
finally:
conn.close()
def get_face_thumbnail_base64(face: Dict) -> str:
"""Get base64 encoded thumbnail of a face"""
try:
image_path = face['image_path']
if not os.path.exists(image_path):
return ""
img = Image.open(image_path)
# Crop face region
top, right, bottom, left = face['location']
padding = 20
left = max(0, left - padding)
top = max(0, top - padding)
right = min(img.width, right + padding)
bottom = min(img.height, bottom + padding)
face_img = img.crop((left, top, right, bottom))
face_img.thumbnail((150, 150), Image.Resampling.LANCZOS)
# Convert to base64
buffer = BytesIO()
face_img.save(buffer, format='JPEG')
img_str = base64.b64encode(buffer.getvalue()).decode()
return f"data:image/jpeg;base64,{img_str}"
except Exception as e:
print(f"Error creating thumbnail: {e}")
return ""
def get_database_stats() -> Dict:
"""Get database statistics"""
if not os.path.exists(DB_PATH):
return {}
conn = sqlite3.connect(DB_PATH)
c = conn.cursor()
try:
stats = {}
c.execute("SELECT COUNT(*) FROM images")
stats['images'] = c.fetchone()[0]
c.execute("SELECT COUNT(*) FROM faces")
stats['faces'] = c.fetchone()[0]
c.execute("SELECT COUNT(*) FROM faces WHERE person_id IS NOT NULL")
stats['identified_faces'] = c.fetchone()[0]
c.execute("SELECT COUNT(*) FROM people")
stats['people'] = c.fetchone()[0]
stats['unidentified_faces'] = stats['faces'] - stats['identified_faces']
return stats
except Exception as e:
print(f"Error getting stats: {e}")
return {}
finally:
conn.close()
@app.route('/')
def index():
"""Main page"""
stats = get_database_stats()
clusters = get_face_clusters()
return render_template('index.html', stats=stats, clusters=clusters)
@app.route('/cluster/<int:cluster_id>')
def cluster_detail(cluster_id):
"""Cluster detail page"""
clusters = get_face_clusters()
if cluster_id >= len(clusters):
return "Cluster not found", 404
cluster = clusters[cluster_id]
# Add thumbnails to faces
for face in cluster['faces']:
face['thumbnail'] = get_face_thumbnail_base64(face)
face['filename'] = os.path.basename(face['image_path'])
return render_template('cluster_detail.html', cluster=cluster, cluster_id=cluster_id)
@app.route('/identify_cluster', methods=['POST'])
def identify_cluster():
"""Identify all faces in a cluster as a person"""
data = request.json
cluster_id = data.get('cluster_id')
person_name = data.get('person_name', '').strip()
if not person_name:
return jsonify({'success': False, 'error': 'Person name is required'})
try:
clusters = get_face_clusters()
if cluster_id >= len(clusters):
return jsonify({'success': False, 'error': 'Cluster not found'})
cluster = clusters[cluster_id]
# Add person to database and assign faces
conn = sqlite3.connect(DB_PATH)
c = conn.cursor()
# Add person
c.execute('INSERT OR IGNORE INTO people (name) VALUES (?)', (person_name,))
c.execute('SELECT id FROM people WHERE name = ?', (person_name,))
person_id = c.fetchone()[0]
# Assign all faces in cluster
for face in cluster['faces']:
c.execute('''UPDATE faces
SET person_id = ?, is_confirmed = 1
WHERE id = ?''',
(person_id, face['face_id']))
conn.commit()
conn.close()
return jsonify({
'success': True,
'message': f"Identified {cluster['face_count']} faces as {person_name}"
})
except Exception as e:
return jsonify({'success': False, 'error': str(e)})
@app.route('/people')
def people_list():
"""List all identified people"""
if not os.path.exists(DB_PATH):
return render_template('people.html', people=[])
conn = sqlite3.connect(DB_PATH)
c = conn.cursor()
try:
c.execute('''SELECT p.id, p.name, COUNT(f.id) as face_count, p.created_at
FROM people p
LEFT JOIN faces f ON p.id = f.person_id
GROUP BY p.id
ORDER BY face_count DESC''')
people = []
for row in c.fetchall():
people.append({
'id': row[0],
'name': row[1],
'face_count': row[2],
'created_at': row[3]
})
return render_template('people.html', people=people)
except Exception as e:
return render_template('people.html', people=[], error=str(e))
finally:
conn.close()
@app.route('/search')
def search():
"""Search interface"""
return render_template('search.html')
@app.route('/api/search', methods=['POST'])
def api_search():
"""Search API endpoint"""
data = request.json
people = data.get('people', [])
tags = data.get('tags', [])
try:
from punimtag_simple import SimplePunimTag
tagger = SimplePunimTag(DB_PATH)
results = tagger.simple_search(people=people if people else None,
tags=tags if tags else None)
tagger.close()
return jsonify({'success': True, 'results': results})
except Exception as e:
return jsonify({'success': False, 'error': str(e)})
if __name__ == '__main__':
# Create templates directory and basic templates
os.makedirs('templates', exist_ok=True)
# Create basic HTML templates
create_html_templates()
print("🌐 Starting PunimTag Web GUI...")
print("📊 Open http://localhost:5000 in your browser")
print("🔄 Use Ctrl+C to stop")
app.run(debug=True, host='0.0.0.0', port=5000)
def create_html_templates():
"""Create basic HTML templates"""
# Base template
base_template = '''<!DOCTYPE html>
<html>
<head>
<title>PunimTag - {% block title %}{% endblock %}</title>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<style>
body { font-family: Arial, sans-serif; margin: 0; padding: 20px; background: #f5f5f5; }
.container { max-width: 1200px; margin: 0 auto; background: white; padding: 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }
.header { border-bottom: 2px solid #007bff; padding-bottom: 10px; margin-bottom: 20px; }
.header h1 { color: #007bff; margin: 0; }
.nav { margin: 20px 0; }
.nav a { margin-right: 20px; color: #007bff; text-decoration: none; }
.nav a:hover { text-decoration: underline; }
.stats { display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); gap: 15px; margin: 20px 0; }
.stat-card { background: #f8f9fa; padding: 15px; border-radius: 5px; text-align: center; }
.stat-card h3 { margin: 0; color: #007bff; }
.stat-card p { margin: 5px 0 0 0; font-size: 24px; font-weight: bold; }
.cluster-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(300px, 1fr)); gap: 20px; margin: 20px 0; }
.cluster-card { border: 1px solid #ddd; border-radius: 8px; padding: 15px; background: white; }
.cluster-card h3 { margin: 0 0 10px 0; color: #007bff; }
.face-thumbnails { display: flex; flex-wrap: wrap; gap: 10px; }
.face-thumb { width: 80px; height: 80px; border-radius: 5px; object-fit: cover; }
.btn { background: #007bff; color: white; border: none; padding: 10px 20px; border-radius: 5px; cursor: pointer; text-decoration: none; display: inline-block; }
.btn:hover { background: #0056b3; }
.btn-success { background: #28a745; }
.btn-success:hover { background: #1e7e34; }
.alert { padding: 15px; margin: 20px 0; border-radius: 5px; }
.alert-success { background: #d4edda; color: #155724; border: 1px solid #c3e6cb; }
.alert-error { background: #f8d7da; color: #721c24; border: 1px solid #f5c6cb; }
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>🏷 PunimTag</h1>
<div class="nav">
<a href="/">Face Clusters</a>
<a href="/people">People</a>
<a href="/search">Search</a>
</div>
</div>
{% block content %}{% endblock %}
</div>
</body>
</html>'''
# Index page
index_template = '''{% extends "base.html" %}
{% block title %}Face Clusters{% endblock %}
{% block content %}
<h2>📊 Database Statistics</h2>
<div class="stats">
<div class="stat-card">
<h3>Images</h3>
<p>{{ stats.images or 0 }}</p>
</div>
<div class="stat-card">
<h3>Total Faces</h3>
<p>{{ stats.faces or 0 }}</p>
</div>
<div class="stat-card">
<h3>Identified</h3>
<p>{{ stats.identified_faces or 0 }}</p>
</div>
<div class="stat-card">
<h3>Unidentified</h3>
<p>{{ stats.unidentified_faces or 0 }}</p>
</div>
<div class="stat-card">
<h3>People</h3>
<p>{{ stats.people or 0 }}</p>
</div>
</div>
<h2>👥 Unknown Face Clusters</h2>
{% if clusters %}
<p>Click on a cluster to identify the faces:</p>
<div class="cluster-grid">
{% for cluster in clusters %}
<div class="cluster-card">
<h3>Cluster {{ loop.index }} ({{ cluster.face_count }} faces)</h3>
<div class="face-thumbnails">
{% for face in cluster.faces[:4] %}
<img src="{{ get_face_thumbnail_base64(face) }}" class="face-thumb" alt="Face">
{% endfor %}
{% if cluster.face_count > 4 %}
<div style="padding: 10px;">+{{ cluster.face_count - 4 }} more</div>
{% endif %}
</div>
<br>
<a href="/cluster/{{ cluster.cluster_id }}" class="btn">View & Identify</a>
</div>
{% endfor %}
</div>
{% else %}
<div class="alert alert-success">
<strong>Great!</strong> No unknown face clusters found. All faces have been identified or there are no faces to process.
</div>
<p>To get started:</p>
<ol>
<li>Add photos to the <code>photos/</code> directory</li>
<li>Run <code>python punimtag_simple.py</code> to process them</li>
<li>Return here to identify unknown faces</li>
</ol>
{% endif %}
{% endblock %}'''
# Write templates
with open('templates/base.html', 'w') as f:
f.write(base_template)
with open('templates/index.html', 'w') as f:
f.write(index_template)
print("✅ Created basic HTML templates")

View File

@ -1,5 +0,0 @@
"""
PunimTag Utils Package
This package contains utility functions and helper modules.
"""

View File

@ -1,289 +0,0 @@
#!/usr/bin/env python3
"""
Tag Manager for PunimTag
Manage tags and assign them to images
"""
import os
from punimtag import PunimTag
from datetime import datetime
from typing import List, Optional
class TagManager:
def __init__(self, db_path: str = 'punimtag.db'):
self.tagger = PunimTag(db_path=db_path)
# Predefined tag categories and suggestions
self.tag_categories = {
'location': ['home', 'work', 'vacation', 'beach', 'mountain', 'city', 'park', 'restaurant'],
'event': ['birthday', 'wedding', 'graduation', 'holiday', 'party', 'meeting', 'conference'],
'scene': ['indoor', 'outdoor', 'nature', 'urban', 'rural', 'night', 'day', 'sunset', 'sunrise'],
'activity': ['sports', 'eating', 'working', 'playing', 'traveling', 'celebration', 'relaxing'],
'mood': ['happy', 'formal', 'casual', 'candid', 'posed', 'artistic'],
'season': ['spring', 'summer', 'fall', 'winter'],
'weather': ['sunny', 'cloudy', 'rainy', 'snowy'],
'group': ['family', 'friends', 'colleagues', 'solo', 'couple', 'group']
}
def list_tags(self):
"""List all existing tags"""
c = self.tagger.conn.cursor()
c.execute('SELECT id, name, category FROM tags ORDER BY category, name')
tags = c.fetchall()
if not tags:
print("No tags found in database.")
return
print("\nExisting Tags:")
print("=" * 50)
current_category = None
for tag_id, name, category in tags:
if category != current_category:
current_category = category or "Uncategorized"
print(f"\n{current_category}:")
print(f" [{tag_id}] {name}")
def create_tag(self):
"""Interactive tag creation"""
print("\nCreate New Tag")
print("=" * 50)
# Show categories
print("\nAvailable categories:")
for i, cat in enumerate(self.tag_categories.keys(), 1):
print(f"{i}. {cat}")
print(f"{len(self.tag_categories) + 1}. Other (no category)")
# Get category
try:
choice = int(input("\nSelect category (number): "))
if 1 <= choice <= len(self.tag_categories):
category = list(self.tag_categories.keys())[choice - 1]
print(f"\nSuggested tags for {category}:")
for tag in self.tag_categories[category]:
print(f" - {tag}")
else:
category = None
except:
category = None
# Get tag name
name = input("\nEnter tag name: ").strip()
if not name:
print("Tag name cannot be empty!")
return
# Create tag
tag_id = self.tagger.add_tag(name, category)
print(f"✓ Created tag '{name}' with ID {tag_id}")
def tag_images_by_search(self):
"""Tag images found by search criteria"""
print("\nTag Images by Search")
print("=" * 50)
# Get search criteria
print("\nSearch criteria (leave blank to skip):")
# People filter
people_input = input("People (comma-separated names): ").strip()
people = [p.strip() for p in people_input.split(',')] if people_input else None
# Existing tags filter
tags_input = input("Existing tags (comma-separated): ").strip()
tags = [t.strip() for t in tags_input.split(',')] if tags_input else None
# Date range filter
date_from_input = input("Date from (YYYY-MM-DD): ").strip()
date_from = datetime.strptime(date_from_input, '%Y-%m-%d') if date_from_input else None
date_to_input = input("Date to (YYYY-MM-DD): ").strip()
date_to = datetime.strptime(date_to_input, '%Y-%m-%d') if date_to_input else None
# Search images
results = self.tagger.search_images(people, tags, date_from, date_to)
if not results:
print("\nNo images found matching criteria!")
return
print(f"\nFound {len(results)} images")
# Get tag to apply
self.list_tags()
tag_name = input("\nEnter tag name to apply: ").strip()
if not tag_name:
print("Cancelled")
return
# Get or create tag
tag_id = self.tagger.add_tag(tag_name)
# Apply tag to all results
count = 0
for img in results:
self.tagger.tag_image(img['id'], tag_id)
count += 1
print(f"✓ Applied tag '{tag_name}' to {count} images")
def tag_single_image(self):
"""Tag a single image by path"""
print("\nTag Single Image")
print("=" * 50)
# Get image path
image_path = input("Enter image path: ").strip()
if not os.path.exists(image_path):
print(f"Error: Image not found at {image_path}")
return
# Check if image is in database
c = self.tagger.conn.cursor()
c.execute('SELECT id FROM images WHERE path = ?', (image_path,))
result = c.fetchone()
if not result:
print("Image not found in database. Processing it now...")
image_id = self.tagger.process_image(image_path)
else:
image_id = result[0]
# Show current tags
c.execute('''SELECT t.name FROM tags t
JOIN image_tags it ON t.id = it.tag_id
WHERE it.image_id = ?''', (image_id,))
current_tags = [row[0] for row in c.fetchall()]
if current_tags:
print(f"\nCurrent tags: {', '.join(current_tags)}")
else:
print("\nNo tags currently assigned")
# Add tags
while True:
tag_name = input("\nEnter tag to add (or press Enter to finish): ").strip()
if not tag_name:
break
tag_id = self.tagger.add_tag(tag_name)
self.tagger.tag_image(image_id, tag_id)
print(f"✓ Added tag '{tag_name}'")
def auto_tag_suggestions(self):
"""Suggest automatic tags based on image metadata"""
print("\nAuto-Tag Suggestions")
print("=" * 50)
c = self.tagger.conn.cursor()
# Find images without tags
c.execute('''SELECT i.id, i.path, i.date_taken, i.latitude, i.longitude
FROM images i
LEFT JOIN image_tags it ON i.id = it.image_id
WHERE it.tag_id IS NULL''')
untagged = c.fetchall()
if not untagged:
print("All images are already tagged!")
return
print(f"Found {len(untagged)} untagged images")
# Auto-tag based on date
seasons = {
(3, 4, 5): 'spring',
(6, 7, 8): 'summer',
(9, 10, 11): 'fall',
(12, 1, 2): 'winter'
}
season_counts = {s: 0 for s in seasons.values()}
location_count = 0
for img_id, path, date_taken, lat, lon in untagged:
suggestions = []
# Season based on date
if date_taken:
month = datetime.strptime(date_taken, '%Y-%m-%d %H:%M:%S').month
for months, season in seasons.items():
if month in months:
tag_id = self.tagger.add_tag(season, 'season')
self.tagger.tag_image(img_id, tag_id)
season_counts[season] += 1
break
# Location-based tags
if lat and lon:
# You could integrate with a geocoding API here
# For now, just tag as "geotagged"
tag_id = self.tagger.add_tag('geotagged', 'location')
self.tagger.tag_image(img_id, tag_id)
location_count += 1
print("\nAuto-tagging complete:")
for season, count in season_counts.items():
if count > 0:
print(f" - Tagged {count} images as '{season}'")
if location_count > 0:
print(f" - Tagged {location_count} images as 'geotagged'")
def run(self):
"""Main menu loop"""
while True:
print("\n" + "=" * 50)
print("PunimTag - Tag Manager")
print("=" * 50)
print("1. List all tags")
print("2. Create new tag")
print("3. Tag images by search")
print("4. Tag single image")
print("5. Auto-tag suggestions")
print("6. Exit")
try:
choice = int(input("\nSelect option: "))
if choice == 1:
self.list_tags()
elif choice == 2:
self.create_tag()
elif choice == 3:
self.tag_images_by_search()
elif choice == 4:
self.tag_single_image()
elif choice == 5:
self.auto_tag_suggestions()
elif choice == 6:
break
else:
print("Invalid option!")
except KeyboardInterrupt:
print("\n\nInterrupted by user")
break
except Exception as e:
print(f"Error: {e}")
self.tagger.close()
print("\nGoodbye!")
def main():
manager = TagManager()
manager.run()
if __name__ == "__main__":
main()

142
test_basic.py Normal file
View File

@ -0,0 +1,142 @@
#!/usr/bin/env python3
"""
Basic test for photo_tagger.py without face recognition dependencies
Tests database initialization and basic functionality
"""
import sys
import os
import tempfile
import sqlite3
# Add current directory to path
sys.path.insert(0, '.')
def test_database_init():
"""Test database initialization without face recognition"""
# Create temporary database
with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
test_db = tmp.name
try:
# Import and test database creation
from photo_tagger import PhotoTagger
# This should fail because face_recognition is not installed
# But we can test the import and class structure
print("✅ PhotoTagger class imported successfully")
# Test basic database initialization
conn = sqlite3.connect(test_db)
cursor = conn.cursor()
# Create the tables manually to test schema
cursor.execute('''
CREATE TABLE IF NOT EXISTS photos (
id INTEGER PRIMARY KEY AUTOINCREMENT,
path TEXT UNIQUE NOT NULL,
filename TEXT NOT NULL,
date_added DATETIME DEFAULT CURRENT_TIMESTAMP,
processed BOOLEAN DEFAULT 0
)
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS people (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT UNIQUE NOT NULL,
created_date DATETIME DEFAULT CURRENT_TIMESTAMP
)
''')
conn.commit()
# Test basic operations
cursor.execute("INSERT INTO photos (path, filename) VALUES (?, ?)",
("/test/path.jpg", "test.jpg"))
cursor.execute("INSERT INTO people (name) VALUES (?)", ("Test Person",))
cursor.execute("SELECT COUNT(*) FROM photos")
photo_count = cursor.fetchone()[0]
cursor.execute("SELECT COUNT(*) FROM people")
people_count = cursor.fetchone()[0]
conn.close()
print(f"✅ Database schema created successfully")
print(f"✅ Test data inserted: {photo_count} photos, {people_count} people")
return True
except ImportError as e:
print(f"⚠️ Import error (expected): {e}")
print("✅ This is expected without face_recognition installed")
return True
except Exception as e:
print(f"❌ Unexpected error: {e}")
return False
finally:
# Clean up
if os.path.exists(test_db):
os.unlink(test_db)
def test_cli_structure():
"""Test CLI argument parsing structure"""
try:
import argparse
# Test if our argument parser structure is valid
parser = argparse.ArgumentParser(description="Test parser")
parser.add_argument('command', choices=['scan', 'process', 'identify', 'tag', 'search', 'stats'])
parser.add_argument('target', nargs='?')
parser.add_argument('--db', default='photos.db')
parser.add_argument('--limit', type=int, default=50)
# Test parsing
args = parser.parse_args(['stats'])
print(f"✅ CLI argument parsing works: command={args.command}")
return True
except Exception as e:
print(f"❌ CLI structure error: {e}")
return False
def main():
"""Run basic tests"""
print("🧪 Running Basic Tests for PunimTag CLI")
print("=" * 50)
tests = [
("Database Schema", test_database_init),
("CLI Structure", test_cli_structure),
]
passed = 0
total = len(tests)
for test_name, test_func in tests:
print(f"\n📋 Testing: {test_name}")
try:
if test_func():
print(f"{test_name}: PASSED")
passed += 1
else:
print(f"{test_name}: FAILED")
except Exception as e:
print(f"{test_name}: ERROR - {e}")
print(f"\n📊 Results: {passed}/{total} tests passed")
if passed == total:
print("🎉 All basic tests passed!")
print("\n📦 Next steps:")
print("1. Install dependencies: pip install -r requirements.txt")
print("2. Test full functionality: python photo_tagger.py stats")
return 0
else:
print("⚠️ Some tests failed")
return 1
if __name__ == "__main__":
sys.exit(main())

View File

@ -1,139 +0,0 @@
"""
PunimTag Test Configuration
Shared fixtures and configuration for all tests.
"""
import pytest
import sqlite3
import tempfile
import os
import sys
from pathlib import Path
# Add src to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
from backend.app import app
@pytest.fixture
def test_db():
"""Create a temporary test database."""
db_fd, db_path = tempfile.mkstemp()
# Create test database schema
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# Create tables
cursor.execute('''
CREATE TABLE images (
id INTEGER PRIMARY KEY,
filename TEXT NOT NULL,
path TEXT NOT NULL,
date_taken TEXT
)
''')
cursor.execute('''
CREATE TABLE faces (
id INTEGER PRIMARY KEY,
image_id INTEGER,
person_id INTEGER,
encoding BLOB,
left INTEGER,
top INTEGER,
right INTEGER,
bottom INTEGER
)
''')
cursor.execute('''
CREATE TABLE people (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
created_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')
cursor.execute('''
CREATE TABLE tags (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE
)
''')
cursor.execute('''
CREATE TABLE image_tags (
image_id INTEGER,
tag_id INTEGER,
PRIMARY KEY (image_id, tag_id)
)
''')
conn.commit()
conn.close()
yield db_path
# Cleanup
os.close(db_fd)
os.unlink(db_path)
@pytest.fixture
def client(test_db):
"""Create a test client with test database."""
app.config['TESTING'] = True
app.config['DATABASE_PATH'] = test_db
with app.test_client() as client:
yield client
@pytest.fixture
def sample_photos(test_db):
"""Add sample photos to the test database."""
conn = sqlite3.connect(test_db)
cursor = conn.cursor()
photos = [
('photo1.jpg', '/test/path/photo1.jpg', '2023-01-01'),
('photo2.jpg', '/test/path/photo2.jpg', '2023-01-02'),
('photo3.jpg', '/test/path/photo3.jpg', '2023-01-03')
]
cursor.executemany(
'INSERT INTO images (filename, path, date_taken) VALUES (?, ?, ?)',
photos
)
conn.commit()
conn.close()
return photos
@pytest.fixture
def sample_faces(test_db):
"""Add sample faces to the test database."""
conn = sqlite3.connect(test_db)
cursor = conn.cursor()
# Add a person first
cursor.execute('INSERT INTO people (name) VALUES (?)', ('Test Person',))
person_id = cursor.lastrowid
# Add faces
faces = [
(1, person_id, b'fake_encoding_1', 100, 100, 200, 200),
(2, person_id, b'fake_encoding_2', 150, 150, 250, 250),
(3, None, b'fake_encoding_3', 200, 200, 300, 300), # Unidentified face
]
cursor.executemany(
'INSERT INTO faces (image_id, person_id, encoding, left, top, right, bottom) VALUES (?, ?, ?, ?, ?, ?, ?)',
faces
)
conn.commit()
conn.close()
return faces

View File

@ -1,448 +0,0 @@
#!/usr/bin/env python3
"""
Comprehensive Backend Test Suite for PunimTag
Tests all backend functionality including face clustering, enhanced recognition, and complex queries
"""
import os
import tempfile
import shutil
import unittest
import uuid
import pickle
from datetime import datetime, timedelta
import numpy as np
from punimtag import PunimTag
from config import PunimTagConfig, create_default_config
from typing import List
class TestBackendFunctionality(unittest.TestCase):
"""Test all backend features thoroughly"""
def setUp(self):
"""Set up test environment with temporary database and config"""
self.test_dir = tempfile.mkdtemp()
self.db_path = os.path.join(self.test_dir, 'test.db')
self.photos_dir = os.path.join(self.test_dir, 'photos')
self.config_path = os.path.join(self.test_dir, 'test_config.json')
os.makedirs(self.photos_dir, exist_ok=True)
# Create test configuration
self.config = PunimTagConfig(self.config_path)
self.config.face_recognition.confidence_threshold = 0.5
self.config.auto_tagging.enabled = True
self.config.processing.batch_size = 50
self.config.save()
# Initialize PunimTag with test database
self.tagger = PunimTag(db_path=self.db_path, photos_dir=self.photos_dir)
def tearDown(self):
"""Clean up test environment"""
self.tagger.close()
shutil.rmtree(self.test_dir)
def test_configuration_system(self):
"""Test configuration loading and saving"""
# Test default values
self.assertEqual(self.config.face_recognition.confidence_threshold, 0.5)
self.assertTrue(self.config.auto_tagging.enabled)
# Test updating settings
success = self.config.update_setting('face_recognition', 'confidence_threshold', 0.7)
self.assertTrue(success)
self.assertEqual(self.config.face_recognition.confidence_threshold, 0.7)
# Test getting settings
value = self.config.get_setting('processing', 'batch_size')
self.assertEqual(value, 50)
# Test tag suggestions
event_tags = self.config.get_tag_suggestions('event')
self.assertIn('wedding', event_tags)
self.assertIn('bar_mitzvah', event_tags)
def test_jewish_org_tags(self):
"""Test Jewish organization specific tag functionality"""
# Test adding Jewish event tags
for tag_name in ['shabbat', 'chanukah', 'passover']:
tag_id = self.tagger.add_tag(tag_name, 'event')
self.assertIsNotNone(tag_id)
# Test location tags
for tag_name in ['synagogue', 'sanctuary', 'sukkah']:
tag_id = self.tagger.add_tag(tag_name, 'location')
self.assertIsNotNone(tag_id)
# Verify tags exist in database
c = self.tagger.conn.cursor()
c.execute("SELECT COUNT(*) FROM tags WHERE category = 'event'")
event_count = c.fetchone()[0]
self.assertGreaterEqual(event_count, 3)
def test_face_clustering(self):
"""Test face clustering functionality"""
# Create mock face data
face_ids = self._create_mock_faces(10)
# Test clustering
clusters = self.tagger.cluster_unknown_faces()
self.assertIsInstance(clusters, dict)
# Test getting cluster data
cluster_data = self.tagger.get_face_clusters()
self.assertIsInstance(cluster_data, list)
# Each cluster should have required fields
for cluster in cluster_data:
self.assertIn('cluster_id', cluster)
self.assertIn('face_count', cluster)
self.assertIn('face_ids', cluster)
self.assertIn('representative_face', cluster)
def test_cluster_assignment(self):
"""Test assigning clusters to people"""
# Create mock faces and cluster them
face_ids = self._create_mock_faces(5)
clusters = self.tagger.cluster_unknown_faces()
if clusters:
cluster_id = list(clusters.keys())[0]
success = self.tagger.assign_cluster_to_person(cluster_id, "Test Person")
self.assertTrue(success)
# Verify assignment
c = self.tagger.conn.cursor()
c.execute("SELECT COUNT(*) FROM faces WHERE person_id IS NOT NULL")
assigned_count = c.fetchone()[0]
self.assertGreater(assigned_count, 0)
def test_most_common_faces(self):
"""Test getting most frequently photographed people"""
# Add some people and faces
person1_id = self.tagger.add_person("John Doe")
person2_id = self.tagger.add_person("Jane Smith")
# Create mock faces assigned to people
face_ids = self._create_mock_faces(10)
# Assign faces to people
for i, face_id in enumerate(face_ids[:5]):
self.tagger.assign_face_to_person(face_id, person1_id, True)
for face_id in face_ids[5:7]:
self.tagger.assign_face_to_person(face_id, person2_id, True)
# Test getting most common faces
common_faces = self.tagger.get_most_common_faces(limit=10)
self.assertIsInstance(common_faces, list)
if common_faces:
# Should be sorted by face count (John Doe should be first)
self.assertEqual(common_faces[0]['name'], "John Doe")
self.assertEqual(common_faces[0]['face_count'], 5)
def test_face_verification(self):
"""Test face verification functionality"""
person_id = self.tagger.add_person("Test Person")
face_ids = self._create_mock_faces(3)
# Assign faces to person
for face_id in face_ids:
self.tagger.assign_face_to_person(face_id, person_id, True)
# Test verification
faces = self.tagger.verify_person_faces(person_id)
self.assertEqual(len(faces), 3)
# Test removing incorrect assignment
self.tagger.remove_incorrect_face_assignment(face_ids[0])
# Verify removal
faces_after = self.tagger.verify_person_faces(person_id)
self.assertEqual(len(faces_after), 2)
def test_batch_processing(self):
"""Test batch image processing"""
# Create mock image paths
image_paths = [
os.path.join(self.photos_dir, f'test_{i}.jpg')
for i in range(5)
]
# Create empty test files
for path in image_paths:
with open(path, 'w') as f:
f.write('') # Empty file for testing
# Test batch processing (will fail on actual processing but test the logic)
try:
results = self.tagger.batch_process_images(image_paths, batch_size=2)
self.assertIn('processed', results)
self.assertIn('errors', results)
self.assertIn('skipped', results)
except Exception:
# Expected to fail with empty files, but structure should be correct
pass
def test_advanced_search(self):
"""Test advanced search functionality"""
# Setup test data
person_id = self.tagger.add_person("Search Test Person")
tag_id = self.tagger.add_tag("test_event", "event")
# Create mock image
image_id = self._create_mock_image()
# Add mock face and tag
face_id = self._create_mock_face(image_id)
self.tagger.assign_face_to_person(face_id, person_id, True)
self.tagger.tag_image(image_id, tag_id)
# Test various search scenarios
# Search by person
results = self.tagger.advanced_search(people=["Search Test Person"])
self.assertIsInstance(results, list)
# Search by tag
results = self.tagger.advanced_search(tags=["test_event"])
self.assertIsInstance(results, list)
# Search by person and tag
results = self.tagger.advanced_search(
people=["Search Test Person"],
tags=["test_event"]
)
self.assertIsInstance(results, list)
# Search with date range
today = datetime.now()
yesterday = today - timedelta(days=1)
tomorrow = today + timedelta(days=1)
results = self.tagger.advanced_search(
date_from=yesterday,
date_to=tomorrow
)
self.assertIsInstance(results, list)
# Search with location bounds
results = self.tagger.advanced_search(
latitude_min=40.0,
latitude_max=41.0,
longitude_min=-74.0,
longitude_max=-73.0
)
self.assertIsInstance(results, list)
# Search with minimum people requirement
results = self.tagger.advanced_search(min_people=1)
self.assertIsInstance(results, list)
# Search with limit
results = self.tagger.advanced_search(limit=5)
self.assertIsInstance(results, list)
self.assertLessEqual(len(results), 5)
def test_face_quality_calculation(self):
"""Test face quality scoring"""
# Test with different face sizes and encodings
small_face = (10, 30, 30, 10) # 20x20 face
large_face = (10, 110, 110, 10) # 100x100 face
encoding = np.random.rand(128)
small_quality = self.tagger.calculate_face_quality(encoding, small_face)
large_quality = self.tagger.calculate_face_quality(encoding, large_face)
# Larger faces should have higher quality scores
self.assertGreater(large_quality, small_quality)
# Quality should be between 0 and 1
self.assertGreaterEqual(small_quality, 0)
self.assertLessEqual(small_quality, 1)
self.assertGreaterEqual(large_quality, 0)
self.assertLessEqual(large_quality, 1)
def test_database_integrity(self):
"""Test database integrity and relationships"""
# Test foreign key relationships
person_id = self.tagger.add_person("Integrity Test")
image_id = self._create_mock_image()
face_id = self._create_mock_face(image_id)
tag_id = self.tagger.add_tag("integrity_test")
# Test assignments
self.tagger.assign_face_to_person(face_id, person_id, True)
self.tagger.tag_image(image_id, tag_id)
# Verify relationships exist
c = self.tagger.conn.cursor()
# Check face-person relationship
c.execute("SELECT person_id FROM faces WHERE id = ?", (face_id,))
result = c.fetchone()
self.assertEqual(result[0], person_id)
# Check image-tag relationship
c.execute("SELECT tag_id FROM image_tags WHERE image_id = ?", (image_id,))
result = c.fetchone()
self.assertEqual(result[0], tag_id)
def test_search_edge_cases(self):
"""Test search functionality with edge cases"""
# Search with empty parameters
results = self.tagger.advanced_search()
self.assertIsInstance(results, list)
# Search with non-existent person
results = self.tagger.advanced_search(people=["Non Existent Person"])
self.assertEqual(len(results), 0)
# Search with non-existent tag
results = self.tagger.advanced_search(tags=["non_existent_tag"])
self.assertEqual(len(results), 0)
# Search with invalid date range
future_date = datetime.now() + timedelta(days=365)
past_date = datetime.now() - timedelta(days=365)
results = self.tagger.advanced_search(
date_from=future_date,
date_to=past_date
)
self.assertEqual(len(results), 0)
# Helper methods
def _create_mock_image(self) -> int:
"""Create a mock image entry in database"""
import uuid
unique_path = f'test_path_{uuid.uuid4().hex[:8]}.jpg'
c = self.tagger.conn.cursor()
c.execute('''INSERT INTO images
(path, filename, date_taken, width, height, file_size)
VALUES (?, ?, ?, ?, ?, ?)''',
(unique_path, unique_path, datetime.now(),
800, 600, 12345))
self.tagger.conn.commit()
return c.lastrowid
def _create_mock_face(self, image_id: int) -> int:
"""Create a mock face entry in database"""
import pickle
encoding = np.random.rand(128)
encoding_blob = pickle.dumps(encoding)
c = self.tagger.conn.cursor()
c.execute('''INSERT INTO faces
(image_id, top, right, bottom, left, encoding)
VALUES (?, ?, ?, ?, ?, ?)''',
(image_id, 10, 110, 110, 10, encoding_blob))
self.tagger.conn.commit()
return c.lastrowid
def _create_mock_faces(self, count: int) -> List[int]:
"""Create multiple mock faces"""
face_ids = []
for i in range(count):
image_id = self._create_mock_image()
face_id = self._create_mock_face(image_id)
face_ids.append(face_id)
return face_ids
def run_performance_tests():
"""Run performance tests with larger datasets"""
print("\nRunning Performance Tests")
print("=" * 50)
with tempfile.TemporaryDirectory() as temp_dir:
db_path = os.path.join(temp_dir, 'perf_test.db')
tagger = PunimTag(db_path=db_path)
try:
# Test with larger numbers of faces
print("Creating 1000 mock faces...")
start_time = datetime.now()
face_ids = []
for i in range(1000):
# Create image
c = tagger.conn.cursor()
c.execute('''INSERT INTO images
(path, filename, date_taken, width, height, file_size)
VALUES (?, ?, ?, ?, ?, ?)''',
(f'perf_test_{i}_{uuid.uuid4().hex[:8]}.jpg', f'perf_test_{i}.jpg',
datetime.now(), 800, 600, 12345))
image_id = c.lastrowid
# Create face
encoding = np.random.rand(128)
encoding_blob = pickle.dumps(encoding)
c.execute('''INSERT INTO faces
(image_id, top, right, bottom, left, encoding)
VALUES (?, ?, ?, ?, ?, ?)''',
(image_id, 10, 110, 110, 10, encoding_blob))
face_ids.append(c.lastrowid)
if i % 100 == 0:
print(f"Created {i} faces...")
tagger.conn.commit()
creation_time = (datetime.now() - start_time).total_seconds()
print(f"Created 1000 faces in {creation_time:.2f} seconds")
# Test clustering performance
print("Testing clustering performance...")
start_time = datetime.now()
clusters = tagger.cluster_unknown_faces()
clustering_time = (datetime.now() - start_time).total_seconds()
print(f"Clustered faces in {clustering_time:.2f} seconds")
print(f"Found {len(clusters)} clusters")
# Test search performance
print("Testing search performance...")
start_time = datetime.now()
results = tagger.advanced_search(limit=100)
search_time = (datetime.now() - start_time).total_seconds()
print(f"Search completed in {search_time:.2f} seconds")
print(f"Found {len(results)} results")
finally:
tagger.close()
def main():
"""Main test runner"""
print("PunimTag Backend Test Suite")
print("=" * 50)
# Run unit tests
print("Running unit tests...")
loader = unittest.TestLoader()
suite = loader.loadTestsFromTestCase(TestBackendFunctionality)
runner = unittest.TextTestRunner(verbosity=2)
result = runner.run(suite)
if result.wasSuccessful():
print("\n✅ All unit tests passed!")
# Run performance tests
run_performance_tests()
print("\n🎉 Backend testing completed successfully!")
print("\nBackend is ready for UI development.")
else:
print("\n❌ Some tests failed. Please fix issues before proceeding.")
return False
return True
if __name__ == "__main__":
success = main()
exit(0 if success else 1)

View File

@ -1,200 +0,0 @@
"""
Main test suite for PunimTag
Consolidated tests covering core functionality.
"""
import json
import os
import sys
from pathlib import Path
# Add src to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
def test_imports():
"""Test that all modules can be imported."""
try:
from backend import app
print("✅ Flask app imported successfully")
# Test if we can access the app instance
if hasattr(app, 'app'):
print("✅ Flask app instance found")
else:
print("⚠️ Flask app instance not found, but module imported")
return True
except ImportError as e:
print(f"❌ Import error: {e}")
return False
def test_database_connection():
"""Test database connection and basic operations."""
try:
# Test if we can connect to the database
import sqlite3
db_path = Path(__file__).parent.parent / "data" / "punimtag_simple.db"
if db_path.exists():
conn = sqlite3.connect(str(db_path))
cursor = conn.cursor()
cursor.execute("SELECT 1")
result = cursor.fetchone()
conn.close()
if result and result[0] == 1:
print("✅ Database connection successful")
return True
else:
print("❌ Database query failed")
return False
else:
print("⚠️ Database file not found, but this is normal for fresh installs")
return True
except Exception as e:
print(f"❌ Database error: {e}")
return False
def test_face_recognition_import():
"""Test face recognition module import."""
try:
from backend import visual_identifier
print("✅ Face recognition module imported successfully")
return True
except ImportError as e:
print(f"❌ Face recognition import error: {e}")
return False
def test_config_loading():
"""Test configuration loading."""
try:
# Test if config directory exists and has files
config_dir = Path(__file__).parent.parent / "config"
if config_dir.exists():
config_files = list(config_dir.glob("*.py"))
if config_files:
print(f"✅ Configuration directory found with {len(config_files)} files")
return True
else:
print("⚠️ Configuration directory exists but no Python files found")
return True
else:
print("❌ Configuration directory not found")
return False
except Exception as e:
print(f"❌ Configuration error: {e}")
return False
def test_directory_structure():
"""Test that all required directories exist."""
required_dirs = [
"src/backend",
"src/frontend",
"src/utils",
"tests",
"data",
"config",
"docs",
"photos",
"scripts",
"assets"
]
missing_dirs = []
for dir_path in required_dirs:
if not os.path.exists(dir_path):
missing_dirs.append(dir_path)
if missing_dirs:
print(f"❌ Missing directories: {missing_dirs}")
return False
else:
print("✅ All required directories exist")
return True
def test_steering_documents():
"""Test that steering documents exist."""
required_docs = [
"docs/product.md",
"docs/structure.md",
"docs/tech.md",
"docs/api-standards.md",
"docs/testing-standards.md",
"docs/code-conventions.md"
]
missing_docs = []
for doc_path in required_docs:
if not os.path.exists(doc_path):
missing_docs.append(doc_path)
if missing_docs:
print(f"❌ Missing steering documents: {missing_docs}")
return False
else:
print("✅ All steering documents exist")
return True
def test_main_app_file():
"""Test that the main application file exists and is accessible."""
try:
main_app_path = Path(__file__).parent.parent / "src" / "backend" / "app.py"
if main_app_path.exists():
print(f"✅ Main app file found: {main_app_path}")
# Test if we can read the file
with open(main_app_path, 'r') as f:
content = f.read()
if 'Flask' in content and 'app' in content:
print("✅ Main app file contains Flask app")
return True
else:
print("⚠️ Main app file exists but doesn't contain expected Flask content")
return True
else:
print("❌ Main app file not found")
return False
except Exception as e:
print(f"❌ Main app file error: {e}")
return False
def run_all_tests():
"""Run all tests and report results."""
print("🧪 Running PunimTag Test Suite")
print("=" * 50)
tests = [
test_imports,
test_database_connection,
test_face_recognition_import,
test_config_loading,
test_directory_structure,
test_steering_documents,
test_main_app_file
]
passed = 0
total = len(tests)
for test in tests:
try:
if test():
passed += 1
except Exception as e:
print(f"❌ Test {test.__name__} failed with exception: {e}")
print("=" * 50)
print(f"📊 Test Results: {passed}/{total} tests passed")
if passed == total:
print("🎉 All tests passed!")
return True
else:
print("⚠️ Some tests failed")
return False
if __name__ == "__main__":
success = run_all_tests()
sys.exit(0 if success else 1)

View File

@ -1,250 +0,0 @@
#!/usr/bin/env python3
"""
Test script for PunimTag
Tests core functionality including face detection, recognition, tagging, and search
"""
import os
import shutil
import tempfile
import unittest
from datetime import datetime
from punimtag import PunimTag
import numpy as np
class TestPunimTag(unittest.TestCase):
def setUp(self):
"""Set up test environment"""
# Create temporary directory for test database
self.test_dir = tempfile.mkdtemp()
self.db_path = os.path.join(self.test_dir, 'test.db')
self.photos_dir = os.path.join(self.test_dir, 'photos')
os.makedirs(self.photos_dir, exist_ok=True)
# Initialize PunimTag with test database
self.tagger = PunimTag(db_path=self.db_path, photos_dir=self.photos_dir)
def tearDown(self):
"""Clean up test environment"""
self.tagger.close()
shutil.rmtree(self.test_dir)
def test_database_creation(self):
"""Test that database tables are created correctly"""
c = self.tagger.conn.cursor()
# Check tables exist
c.execute("SELECT name FROM sqlite_master WHERE type='table'")
tables = {row[0] for row in c.fetchall()}
expected_tables = {'images', 'people', 'faces', 'tags', 'image_tags'}
self.assertEqual(tables & expected_tables, expected_tables)
def test_add_person(self):
"""Test adding people to database"""
# Add person
person_id = self.tagger.add_person("John Doe")
self.assertIsNotNone(person_id)
# Verify person exists
c = self.tagger.conn.cursor()
c.execute("SELECT name FROM people WHERE id = ?", (person_id,))
result = c.fetchone()
self.assertEqual(result[0], "John Doe")
# Test duplicate handling
person_id2 = self.tagger.add_person("John Doe")
self.assertEqual(person_id, person_id2)
def test_add_tag(self):
"""Test tag creation"""
# Add tag without category
tag_id1 = self.tagger.add_tag("vacation")
self.assertIsNotNone(tag_id1)
# Add tag with category
tag_id2 = self.tagger.add_tag("beach", "location")
self.assertIsNotNone(tag_id2)
# Verify tags exist
c = self.tagger.conn.cursor()
c.execute("SELECT name, category FROM tags WHERE id = ?", (tag_id2,))
result = c.fetchone()
self.assertEqual(result[0], "beach")
self.assertEqual(result[1], "location")
def test_metadata_extraction(self):
"""Test metadata extraction from images"""
# Test with a non-existent file - should handle gracefully
try:
metadata = self.tagger.extract_metadata("nonexistent.jpg")
# If it doesn't raise an exception, check default values
self.assertIsNone(metadata['date_taken'])
self.assertIsNone(metadata['latitude'])
self.assertIsNone(metadata['longitude'])
except FileNotFoundError:
# This is also acceptable behavior
pass
def test_face_identification(self):
"""Test face identification logic"""
# Test with no known faces
result = self.tagger.identify_face(np.random.rand(128))
self.assertEqual(result, (None, None))
# Would need actual face encodings for more thorough testing
def test_search_functionality(self):
"""Test search capabilities"""
# Search with no data should return empty
results = self.tagger.search_images()
self.assertEqual(len(results), 0)
# Test with filters
results = self.tagger.search_images(
people=["John Doe"],
tags=["vacation"],
date_from=datetime(2023, 1, 1),
date_to=datetime(2023, 12, 31)
)
self.assertEqual(len(results), 0)
def test_unidentified_faces(self):
"""Test getting unidentified faces"""
faces = self.tagger.get_unidentified_faces()
self.assertEqual(len(faces), 0) # Should be empty initially
class TestImageProcessing(unittest.TestCase):
"""Test image processing with actual images"""
@classmethod
def setUpClass(cls):
"""Create test images"""
cls.test_dir = tempfile.mkdtemp()
cls.photos_dir = os.path.join(cls.test_dir, 'photos')
os.makedirs(cls.photos_dir, exist_ok=True)
# Create test images (simple colored squares)
try:
from PIL import Image
# Create a few test images
for i, color in enumerate(['red', 'green', 'blue']):
img = Image.new('RGB', (100, 100), color)
img.save(os.path.join(cls.photos_dir, f'test_{color}.jpg'))
except ImportError:
print("PIL not available, skipping image creation")
@classmethod
def tearDownClass(cls):
"""Clean up test images"""
shutil.rmtree(cls.test_dir)
def setUp(self):
"""Set up for each test"""
self.db_path = os.path.join(self.test_dir, 'test.db')
self.tagger = PunimTag(db_path=self.db_path, photos_dir=self.photos_dir)
def tearDown(self):
"""Clean up after each test"""
self.tagger.close()
if os.path.exists(self.db_path):
os.remove(self.db_path)
def test_process_directory(self):
"""Test processing a directory of images"""
# Process all images
processed = self.tagger.process_directory()
# Should process the test images (if created)
self.assertGreaterEqual(processed, 0)
# Check images were added to database
c = self.tagger.conn.cursor()
c.execute("SELECT COUNT(*) FROM images")
count = c.fetchone()[0]
self.assertEqual(count, processed)
def test_with_sample_images(image_paths):
"""
Test PunimTag with actual image files
Args:
image_paths: List of paths to test images
"""
print("Testing PunimTag with sample images")
print("=" * 50)
# Create temporary database
with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp:
db_path = tmp.name
try:
# Initialize PunimTag
tagger = PunimTag(db_path=db_path)
# Process each image
print(f"\nProcessing {len(image_paths)} images...")
for path in image_paths:
if os.path.exists(path):
print(f"Processing: {path}")
try:
image_id = tagger.process_image(path)
print(f" ✓ Added to database with ID: {image_id}")
except Exception as e:
print(f" ✗ Error: {e}")
else:
print(f" ✗ File not found: {path}")
# Show statistics
c = tagger.conn.cursor()
c.execute("SELECT COUNT(*) FROM images")
image_count = c.fetchone()[0]
print(f"\nTotal images: {image_count}")
c.execute("SELECT COUNT(*) FROM faces")
face_count = c.fetchone()[0]
print(f"Total faces detected: {face_count}")
# Get unidentified faces
unidentified = tagger.get_unidentified_faces()
print(f"Unidentified faces: {len(unidentified)}")
# Close connection
tagger.close()
print("\n✓ Test completed successfully!")
finally:
# Clean up
if os.path.exists(db_path):
os.remove(db_path)
def main():
"""Main test runner"""
print("PunimTag Test Suite")
print("=" * 50)
# Run unit tests
print("\nRunning unit tests...")
unittest.main(argv=[''], exit=False, verbosity=2)
# Optional: Test with actual images
print("\n" + "=" * 50)
print("To test with actual images, call:")
print("python test_punimtag.py image1.jpg image2.jpg ...")
# Check if images were provided as arguments
import sys
if len(sys.argv) > 1:
image_paths = sys.argv[1:]
test_with_sample_images(image_paths)
if __name__ == "__main__":
main()

View File

@ -1,286 +0,0 @@
#!/usr/bin/env python3
"""
API Test Suite for PunimTag Web GUI
Tests all web endpoints to identify issues with pre-load check
"""
import requests
import json
import time
import sys
from urllib.parse import urljoin
class WebAPITester:
def __init__(self, base_url="http://127.0.0.1:5000"):
self.base_url = base_url
self.session = requests.Session()
self.session.headers.update({
'Content-Type': 'application/json',
'Accept': 'application/json'
})
def test_endpoint(self, endpoint, method='GET', data=None, timeout=10, expected_status=200):
"""Test a single endpoint with timeout and error handling"""
url = urljoin(self.base_url, endpoint)
print(f"Testing {method} {endpoint}...")
start_time = time.time()
try:
if method == 'GET':
response = self.session.get(url, timeout=timeout)
elif method == 'POST':
response = self.session.post(url, json=data, timeout=timeout)
else:
raise ValueError(f"Unsupported method: {method}")
elapsed = time.time() - start_time
if response.status_code == expected_status:
print(f" ✅ SUCCESS ({elapsed:.2f}s) - Status: {response.status_code}")
try:
return response.json()
except:
return response.text
else:
print(f" ❌ FAILED ({elapsed:.2f}s) - Status: {response.status_code}")
print(f" Response: {response.text[:200]}")
return None
except requests.exceptions.Timeout:
elapsed = time.time() - start_time
print(f" ⏰ TIMEOUT ({elapsed:.2f}s) - Endpoint took too long")
return None
except requests.exceptions.ConnectionError:
print(f" 🔌 CONNECTION ERROR - Cannot connect to {self.base_url}")
return None
except Exception as e:
elapsed = time.time() - start_time
print(f" 💥 ERROR ({elapsed:.2f}s) - {str(e)}")
return None
def test_preload_endpoints(self):
"""Test all endpoints used in pre-load check"""
print("\n🔍 Testing Pre-load Check Endpoints")
print("=" * 50)
# Test database connection
db_result = self.test_endpoint('/check_database')
if not db_result:
print(" ❌ Database check failed - this will cause pre-load issues")
return False
# Test system status
status_result = self.test_endpoint('/system_status')
if not status_result:
print(" ❌ System status failed - this will cause pre-load issues")
return False
# Test debug endpoint
debug_result = self.test_endpoint('/debug/preload_test')
if not debug_result:
print(" ❌ Debug endpoint failed - this will cause pre-load issues")
return False
print(" ✅ All pre-load endpoints working correctly")
return True
def test_main_endpoints(self):
"""Test main application endpoints"""
print("\n📱 Testing Main Application Endpoints")
print("=" * 50)
# Test main page
main_result = self.test_endpoint('/', expected_status=200)
if not main_result:
print(" ❌ Main page failed")
return False
# Test photos endpoint
photos_result = self.test_endpoint('/get_photos?tab=all_photos&page=1&per_page=1')
if not photos_result:
print(" ❌ Photos endpoint failed")
return False
# Test faces endpoint
faces_result = self.test_endpoint('/get_faces?tab=unidentified&page=1&per_page=1')
if not faces_result:
print(" ❌ Faces endpoint failed")
return False
print(" ✅ All main endpoints working correctly")
return True
def test_thumbnail_endpoints(self):
"""Test thumbnail generation endpoints"""
print("\n🖼️ Testing Thumbnail Endpoints")
print("=" * 50)
# First get a face ID to test with
faces_result = self.test_endpoint('/get_faces?tab=unidentified&page=1&per_page=1')
if not faces_result or not isinstance(faces_result, dict) or not faces_result.get('faces'):
print(" ⚠️ No faces available for thumbnail testing")
return True
faces = faces_result.get('faces', [])
if not faces:
print(" ⚠️ No faces available for thumbnail testing")
return True
face_id = faces[0].get('face_id')
if not face_id:
print(" ⚠️ No valid face ID found for thumbnail testing")
return True
# Test face thumbnail
thumbnail_result = self.test_endpoint(f'/get_thumbnail/{face_id}')
if not thumbnail_result:
print(" ❌ Face thumbnail endpoint failed")
return False
# Test photo thumbnail
photos_result = self.test_endpoint('/get_photos?tab=all_photos&page=1&per_page=1')
if photos_result and isinstance(photos_result, dict) and photos_result.get('photos'):
photos = photos_result.get('photos', [])
if photos:
photo_id = photos[0].get('image_id')
if photo_id:
photo_thumbnail_result = self.test_endpoint(f'/get_photo_thumbnail/{photo_id}')
if not photo_thumbnail_result:
print(" ❌ Photo thumbnail endpoint failed")
return False
print(" ✅ All thumbnail endpoints working correctly")
return True
def test_performance(self):
"""Test endpoint performance"""
print("\n⚡ Performance Testing")
print("=" * 50)
endpoints = [
'/check_database',
'/system_status',
'/debug/preload_test',
'/get_photos?tab=all_photos&page=1&per_page=1',
'/get_faces?tab=unidentified&page=1&per_page=1'
]
performance_results = {}
for endpoint in endpoints:
times = []
for i in range(3): # Test each endpoint 3 times
start_time = time.time()
result = self.test_endpoint(endpoint, timeout=30)
elapsed = time.time() - start_time
times.append(elapsed)
time.sleep(0.5) # Small delay between tests
avg_time = sum(times) / len(times)
performance_results[endpoint] = {
'avg_time': avg_time,
'min_time': min(times),
'max_time': max(times),
'success': all(t < 30 for t in times) # All under 30s timeout
}
status = "" if performance_results[endpoint]['success'] else ""
print(f" {status} {endpoint}: {avg_time:.2f}s avg ({min(times):.2f}s-{max(times):.2f}s)")
return performance_results
def test_browser_simulation(self):
"""Simulate browser behavior for pre-load check"""
print("\n🌐 Browser Simulation Test")
print("=" * 50)
# Simulate the exact pre-load check sequence
checks = [
{ 'name': 'Database Connection', 'endpoint': '/check_database' },
{ 'name': 'System Status', 'endpoint': '/system_status' },
{ 'name': 'Debug Test', 'endpoint': '/debug/preload_test' }
]
all_passed = True
for check in checks:
print(f"Testing {check['name']}...")
result = self.test_endpoint(check['endpoint'], timeout=10)
if result:
print(f"{check['name']} passed")
else:
print(f"{check['name']} failed")
all_passed = False
if all_passed:
print(" 🎉 All browser simulation tests passed!")
else:
print(" 💥 Some browser simulation tests failed!")
return all_passed
def run_all_tests(self):
"""Run all tests"""
print("🚀 Starting PunimTag Web API Test Suite")
print("=" * 60)
# Test server connectivity first
print("\n🔌 Testing Server Connectivity")
print("-" * 30)
try:
response = self.session.get(self.base_url, timeout=5)
print(f"✅ Server is running at {self.base_url}")
except Exception as e:
print(f"❌ Cannot connect to server: {e}")
print("Make sure the server is running with: python simple_web_gui.py")
return False
# Run all test suites
results = {
'preload': self.test_preload_endpoints(),
'main': self.test_main_endpoints(),
'thumbnails': self.test_thumbnail_endpoints(),
'performance': self.test_performance(),
'browser_sim': self.test_browser_simulation()
}
# Summary
print("\n📊 Test Summary")
print("=" * 60)
passed = sum(1 for result in results.values() if result)
total = len(results)
for test_name, result in results.items():
status = "✅ PASS" if result else "❌ FAIL"
print(f" {status} {test_name.replace('_', ' ').title()}")
print(f"\nOverall: {passed}/{total} test suites passed")
if passed == total:
print("🎉 All tests passed! The web API is working correctly.")
print("If the browser is still stuck, the issue might be:")
print(" - Browser cache (try Ctrl+F5)")
print(" - CORS issues (check browser console)")
print(" - JavaScript errors (check browser console)")
else:
print("💥 Some tests failed. Check the output above for details.")
return passed == total
def main():
"""Main test runner"""
if len(sys.argv) > 1:
base_url = sys.argv[1]
else:
base_url = "http://127.0.0.1:5000"
tester = WebAPITester(base_url)
success = tester.run_all_tests()
sys.exit(0 if success else 1)
if __name__ == "__main__":
main()