feat: Add PrismaCompatibleDate type and enhance date validation in photo extraction

This commit introduces a new `PrismaCompatibleDate` type to ensure compatibility with Prisma's SQLite driver by storing dates in a DateTime format. Additionally, the `extract_exif_date`, `extract_video_date`, and `extract_photo_date` functions are updated to include validation checks that reject future dates and dates prior to 1900, enhancing data integrity during photo and video metadata extraction.
This commit is contained in:
Tanya 2026-01-05 15:05:03 -05:00
parent c69604573d
commit fe01ff51b8
2 changed files with 115 additions and 6 deletions

View File

@ -89,6 +89,89 @@ class PrismaCompatibleDateTime(TypeDecorator):
return value
class PrismaCompatibleDate(TypeDecorator):
"""
Date type that stores in DateTime format for Prisma compatibility.
Prisma's SQLite driver expects DateTime format (YYYY-MM-DD HH:MM:SS) even for dates.
This type stores dates with a time component (00:00:00) so Prisma can read them correctly,
while still using Python's date type in the application.
Uses String as the underlying type for SQLite to have full control over the format.
"""
impl = String
cache_ok = True
def process_bind_param(self, value, dialect):
"""Convert Python date to space-separated DateTime format for Prisma compatibility."""
if value is None:
return None
if isinstance(value, date):
# Store date in space-separated format: YYYY-MM-DD HH:MM:SS (matching date_added format)
return value.strftime('%Y-%m-%d 00:00:00')
if isinstance(value, datetime):
# If datetime is passed, extract date and format with time component
return value.date().strftime('%Y-%m-%d 00:00:00')
if isinstance(value, str):
# If it's already a string, ensure it's in space-separated format
try:
# Try to parse and convert to space-separated format
if 'T' in value:
# ISO format with T - convert to space-separated
date_part, time_part = value.split('T', 1)
time_part = time_part.split('+')[0].split('-')[0].split('Z')[0].split('.')[0]
if len(time_part.split(':')) == 3:
return f"{date_part} {time_part}"
else:
return f"{date_part} 00:00:00"
elif ' ' in value:
# Already space-separated - ensure it has time component
parts = value.split(' ', 1)
if len(parts) == 2:
date_part, time_part = parts
time_part = time_part.split('.')[0] # Remove microseconds if present
if len(time_part.split(':')) == 3:
return f"{date_part} {time_part}"
# Missing time component - add it
return f"{parts[0]} 00:00:00"
else:
# Just date (YYYY-MM-DD) - add time component
d = datetime.strptime(value, '%Y-%m-%d').date()
return d.strftime('%Y-%m-%d 00:00:00')
except (ValueError, TypeError):
# If parsing fails, return as-is
return value
return value
def process_result_value(self, value, dialect):
"""Convert SQL string back to Python date."""
if value is None:
return None
if isinstance(value, str):
# Extract date part from ISO 8601 or space-separated DateTime string
try:
if 'T' in value:
# ISO format with T
return datetime.fromisoformat(value.split('T')[0]).date()
elif ' ' in value:
# Space-separated format - extract date part
return datetime.strptime(value.split()[0], '%Y-%m-%d').date()
else:
# Just date (YYYY-MM-DD)
return datetime.strptime(value, '%Y-%m-%d').date()
except ValueError:
# Fallback to ISO format parser
try:
return datetime.fromisoformat(value.split('T')[0]).date()
except:
return datetime.strptime(value.split()[0], '%Y-%m-%d').date()
if isinstance(value, (date, datetime)):
if isinstance(value, datetime):
return value.date()
return value
return value
class Photo(Base):
"""Photo model - matches desktop schema exactly."""
@ -98,7 +181,7 @@ class Photo(Base):
path = Column(Text, unique=True, nullable=False, index=True)
filename = Column(Text, nullable=False)
date_added = Column(PrismaCompatibleDateTime, default=datetime.utcnow, nullable=False)
date_taken = Column(Date, nullable=True, index=True)
date_taken = Column(PrismaCompatibleDate, nullable=True, index=True)
processed = Column(Boolean, default=False, nullable=False, index=True)
file_hash = Column(Text, nullable=True, index=True) # Nullable to support existing photos without hashes
media_type = Column(Text, default="image", nullable=False, index=True) # "image" or "video"

View File

@ -100,12 +100,20 @@ def extract_exif_date(image_path: str) -> Optional[date]:
# Parse EXIF date format (YYYY:MM:DD HH:MM:SS)
try:
dt = datetime.strptime(date_str, "%Y:%m:%d %H:%M:%S")
return dt.date()
extracted_date = dt.date()
# Validate date before returning (reject future dates)
if extracted_date > date.today() or extracted_date < date(1900, 1, 1):
continue # Skip invalid dates
return extracted_date
except ValueError:
# Try alternative format
try:
dt = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S")
return dt.date()
extracted_date = dt.date()
# Validate date before returning (reject future dates)
if extracted_date > date.today() or extracted_date < date(1900, 1, 1):
continue # Skip invalid dates
return extracted_date
except ValueError:
continue
except (KeyError, TypeError):
@ -123,11 +131,19 @@ def extract_exif_date(image_path: str) -> Optional[date]:
if date_str:
try:
dt = datetime.strptime(date_str, "%Y:%m:%d %H:%M:%S")
return dt.date()
extracted_date = dt.date()
# Validate date before returning (reject future dates)
if extracted_date > date.today() or extracted_date < date(1900, 1, 1):
continue # Skip invalid dates
return extracted_date
except ValueError:
try:
dt = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S")
return dt.date()
extracted_date = dt.date()
# Validate date before returning (reject future dates)
if extracted_date > date.today() or extracted_date < date(1900, 1, 1):
continue # Skip invalid dates
return extracted_date
except ValueError:
continue
except Exception:
@ -216,7 +232,11 @@ def extract_video_date(video_path: str) -> Optional[date]:
else:
# Try other common formats
dt = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S")
return dt.date()
extracted_date = dt.date()
# Validate date before returning (reject future dates)
if extracted_date > date.today() or extracted_date < date(1900, 1, 1):
continue # Skip invalid dates
return extracted_date
except (ValueError, AttributeError):
continue
except (subprocess.TimeoutExpired, FileNotFoundError, json.JSONDecodeError, Exception) as e:
@ -230,6 +250,9 @@ def extract_video_date(video_path: str) -> Optional[date]:
if os.path.exists(video_path):
mtime = os.path.getmtime(video_path)
mtime_date = datetime.fromtimestamp(mtime).date()
# Validate date before returning (reject future dates)
if mtime_date > date.today() or mtime_date < date(1900, 1, 1):
return None # Skip invalid dates
return mtime_date
except Exception as e:
# Log error for debugging (but don't fail the import)
@ -260,6 +283,9 @@ def extract_photo_date(image_path: str) -> Optional[date]:
if os.path.exists(image_path):
mtime = os.path.getmtime(image_path)
mtime_date = datetime.fromtimestamp(mtime).date()
# Validate date before returning (reject future dates)
if mtime_date > date.today() or mtime_date < date(1900, 1, 1):
return None # Skip invalid dates
return mtime_date
except Exception as e:
# Log error for debugging (but don't fail the import)