From 7dd95cbcd0b868dc0e8b5c581fd934bebd9f7917 Mon Sep 17 00:00:00 2001
From: Tanya <tatiana.romlit@gmail.com>
Date: Thu, 8 Jan 2026 14:33:51 -0500
Subject: [PATCH] chore: Add Gitleaks configuration and enhance CI workflow for
 backend validation

This commit introduces a Gitleaks configuration file to manage known false positives and improve security by preventing the accidental exposure of sensitive information. Additionally, it enhances the CI workflow by adding a step to validate backend imports and application structure, ensuring that core modules and API routers can be imported successfully without starting the server or connecting to a database.
---
 .gitea/workflows/ci.yml | 78 +++++++++++++++++++++++++++++++++++++++--
 .gitleaks.toml          | 25 +++++++++++++
 tests/test_api_auth.py  |  1 +
 3 files changed, 102 insertions(+), 2 deletions(-)
 create mode 100644 .gitleaks.toml

diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml
index df89dfe..8e4da8b 100644
--- a/.gitea/workflows/ci.yml
+++ b/.gitea/workflows/ci.yml
@@ -417,6 +417,74 @@ jobs:
       - name: Check out code
         uses: actions/checkout@v4
 
+      - name: Validate backend (imports and app instantiation)
+        run: |
+          # Install Python and pip
+          apt-get update && apt-get install -y python3 python3-pip python3-venv
+          
+          # Create virtual environment
+          python3 -m venv /tmp/backend-venv
+          source /tmp/backend-venv/bin/activate
+          
+          # Install core dependencies (skip heavy ML dependencies for faster build)
+          # This validates that requirements.txt structure is valid and core imports work
+          pip install --no-cache-dir fastapi uvicorn pydantic sqlalchemy psycopg2-binary redis rq python-jose python-multipart python-dotenv bcrypt
+          
+          # Set environment variables for validation
+          export PYTHONPATH=$(pwd)
+          export SKIP_DEEPFACE_IN_TESTS=1
+          # Use dummy database URLs - we're only validating imports, not connections
+          export DATABASE_URL=postgresql+psycopg2://postgres:postgres@localhost:5432/punimtag_test
+          export DATABASE_URL_AUTH=postgresql+psycopg2://postgres:postgres@localhost:5432/punimtag_auth_test
+          export REDIS_URL=redis://localhost:6379/0
+          
+          # Validate imports and app instantiation (without starting server or connecting to DB)
+          echo "🔍 Validating backend imports and structure..."
+          python3 << 'EOF'
+          import sys
+          import os
+          sys.path.insert(0, '.')
+          
+          # Test that core modules can be imported
+          try:
+              from backend.settings import APP_TITLE, APP_VERSION
+              print(f'✅ App settings loaded: {APP_TITLE} v{APP_VERSION}')
+          except ImportError as e:
+              print(f'❌ Settings import error: {e}')
+              sys.exit(1)
+          
+          # Test that all API routers can be imported (validates import structure)
+          try:
+              from backend.api import (
+                  auth, faces, health, jobs, metrics, people,
+                  pending_identifications, pending_linkages, photos,
+                  reported_photos, pending_photos, tags, users,
+                  auth_users, role_permissions, videos, version
+              )
+              print('✅ All API routers imported successfully')
+          except ImportError as e:
+              print(f'❌ API router import error: {e}')
+              import traceback
+              traceback.print_exc()
+              sys.exit(1)
+          
+          # Test that app factory can be imported
+          try:
+              from backend.app import create_app
+              print('✅ App factory imported successfully')
+          except ImportError as e:
+              print(f'❌ App factory import error: {e}')
+              import traceback
+              traceback.print_exc()
+              sys.exit(1)
+          
+          # Note: We don't actually call create_app() here because it would trigger
+          # database initialization in the lifespan, which requires a real database.
+          # The import validation above is sufficient to catch most build-time errors.
+          print('✅ Backend structure validated (imports and dependencies)')
+          EOF
+          echo "✅ Backend validation complete"
+
       - name: Install admin-frontend dependencies
         run: |
           cd admin-frontend
@@ -612,6 +680,12 @@ jobs:
     steps:
       - name: Generate workflow summary
         run: |
+          SUMMARY_FILE="${GITHUB_STEP_SUMMARY:-/dev/stdout}"
+          # Ensure directory exists if using a file path (for act/local runners)
+          if [ "$SUMMARY_FILE" != "/dev/stdout" ] && [ "$SUMMARY_FILE" != "/dev/stderr" ]; then
+            mkdir -p "$(dirname "$SUMMARY_FILE")" || true
+            touch "$SUMMARY_FILE" || true
+          fi
           {
             echo "## 🔍 CI Workflow Summary"
             echo ""
@@ -624,7 +698,7 @@ jobs:
             echo "| 📝 Lint & Type Check | Runs ESLint on the admin UI and TypeScript type-checks the viewer UI | ${{ needs.lint-and-type-check.result }} |"
             echo "| 🐍 Python Lint | Runs Python style and syntax checks over the backend | ${{ needs.python-lint.result }} |"
             echo "| 🧪 Backend Tests | Runs \`pytest tests/ -v\` against the FastAPI backend (with coverage) | ${{ needs.test-backend.result }} |"
-            echo "| 🏗️ Build | Builds the admin frontend (Vite) and viewer frontend (Next.js) | ${{ needs.build.result }} |"
+            echo "| 🏗️ Build | Validates backend imports/structure, builds admin frontend (Vite), and viewer frontend (Next.js) | ${{ needs.build.result }} |"
             echo "| 🔐 Secret Scanning | Uses Gitleaks to look for committed secrets | ${{ needs.secret-scanning.result }} |"
             echo "| 📦 Dependency Scan | Uses Trivy to scan dependencies for HIGH/CRITICAL vulns | ${{ needs.dependency-scan.result }} |"
             echo "| 🔍 SAST Scan | Uses Semgrep to look for insecure code patterns | ${{ needs.sast-scan.result }} |"
@@ -642,6 +716,6 @@ jobs:
             echo "  1. Open the **test-backend** job in this workflow run."
             echo "  2. Look at the **Run backend tests** step to see the \`pytest -v\` output."
             echo "  3. For local debugging, run \`pytest tests/ -v\` in your dev environment."
-          } >> \"${GITHUB_STEP_SUMMARY:-/dev/stdout}\"
+          } >> "$SUMMARY_FILE" || true
         continue-on-error: true
 
diff --git a/.gitleaks.toml b/.gitleaks.toml
new file mode 100644
index 0000000..86dc85e
--- /dev/null
+++ b/.gitleaks.toml
@@ -0,0 +1,25 @@
+# Gitleaks configuration file
+# This file configures gitleaks to ignore known false positives
+
+title = "PunimTag Gitleaks Configuration"
+
+[allowlist]
+description = "Allowlist for known false positives and test files"
+
+# Ignore demo photos directory (contains sample/test HTML files)
+paths = [
+  '''demo_photos/.*''',
+]
+
+# Ignore specific commits that contain known false positives
+# These are test tokens or sample files, not real secrets
+commits = [
+  "77ffbdcc5041cd732bfcbc00ba513bccb87cfe96",  # test_api_auth.py expired_token test
+  "d300eb1122d12ffb2cdc3fab6dada520b53c20da",  # demo_photos/imgres.html sample file
+]
+
+# Allowlist specific regex patterns for test files
+regexes = [
+  '''tests/test_api_auth.py.*expired_token.*eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9\.eyJzdWIiOiJhZG1pbiIsImV4cCI6MTYwOTQ1NjgwMH0\.invalid''',
+]
+
diff --git a/tests/test_api_auth.py b/tests/test_api_auth.py
index 42adcd3..766a3a2 100644
--- a/tests/test_api_auth.py
+++ b/tests/test_api_auth.py
@@ -432,6 +432,7 @@ class TestAuthenticationMiddleware:
     def test_get_current_user_with_expired_token(self, test_client: TestClient):
         """Verify 401 with expired JWT."""
         # Create an obviously invalid/expired token
+        # Note: This is a test fixture, not a real secret. The token is intentionally invalid.
         expired_token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJhZG1pbiIsImV4cCI6MTYwOTQ1NjgwMH0.invalid"
         
         response = test_client.get(