From 1a7298f7554df6eb98a10199b3e371b701880c32 Mon Sep 17 00:00:00 2001 From: ilia Date: Sat, 4 Apr 2026 12:31:53 -0400 Subject: [PATCH] Initial commit: EventRate pipeline, fuzzy dedup, Airbnb retries Wire up Ticketmaster, SeatGeek, Telegram, scoring, Playwright stubs. Deduplicate events with fuzzy venue/name matching. Retry calendar updates on transient failures. Backlog tasks marked complete. Made-with: Cursor --- .env.example | 16 +++ .gitignore | 24 +++++ ARCHITECTURE.md | 96 ++++++++++++++++++ BACKLOG.md | 65 ++++++++++++ Dockerfile | 21 ++++ PROJECT.md | 62 ++++++++++++ README.md | 70 +++++++++++++ requirements.txt | 5 + scripts/airbnb_login.py | 22 ++++ src/__init__.py | 0 src/__main__.py | 5 + src/airbnb/__init__.py | 0 src/airbnb/auth.py | 68 +++++++++++++ src/airbnb/calendar.py | 104 +++++++++++++++++++ src/config.py | 28 +++++ src/dedup.py | 81 +++++++++++++++ src/log.py | 18 ++++ src/main.py | 186 ++++++++++++++++++++++++++++++++++ src/models.py | 27 +++++ src/notifications/__init__.py | 0 src/notifications/telegram.py | 85 ++++++++++++++++ src/providers/__init__.py | 5 + src/providers/base.py | 23 +++++ src/providers/seatgeek.py | 105 +++++++++++++++++++ src/providers/ticketmaster.py | 125 +++++++++++++++++++++++ src/scoring/__init__.py | 0 src/scoring/impact.py | 59 +++++++++++ tests/__init__.py | 0 28 files changed, 1300 insertions(+) create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 ARCHITECTURE.md create mode 100644 BACKLOG.md create mode 100644 Dockerfile create mode 100644 PROJECT.md create mode 100644 README.md create mode 100644 requirements.txt create mode 100644 scripts/airbnb_login.py create mode 100644 src/__init__.py create mode 100644 src/__main__.py create mode 100644 src/airbnb/__init__.py create mode 100644 src/airbnb/auth.py create mode 100644 src/airbnb/calendar.py create mode 100644 src/config.py create mode 100644 src/dedup.py create mode 100644 src/log.py create mode 100644 src/main.py create mode 100644 src/models.py create mode 100644 src/notifications/__init__.py create mode 100644 src/notifications/telegram.py create mode 100644 src/providers/__init__.py create mode 100644 src/providers/base.py create mode 100644 src/providers/seatgeek.py create mode 100644 src/providers/ticketmaster.py create mode 100644 src/scoring/__init__.py create mode 100644 src/scoring/impact.py create mode 100644 tests/__init__.py diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..b8d3599 --- /dev/null +++ b/.env.example @@ -0,0 +1,16 @@ +# === Event providers === +TICKETMASTER_KEY=your_ticketmaster_api_key_here +SEATGEEK_CLIENT_ID=your_seatgeek_client_id_here + +# === Telegram === +TELEGRAM_BOT_TOKEN=your_telegram_bot_token_here +TELEGRAM_CHAT_ID=your_telegram_chat_id_here + +# === Airbnb automation (optional) === +AIRBNB_LISTING_ID= +AIRBNB_BASE_PRICE=150 +PRICE_INCREASE_PCT=20 + +# === General === +LOOKAHEAD_DAYS=30 +LOG_LEVEL=INFO diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6d83c1c --- /dev/null +++ b/.gitignore @@ -0,0 +1,24 @@ +__pycache__/ +*.py[cod] +*$py.class +*.egg-info/ +dist/ +build/ +.eggs/ + +.venv/ +venv/ +env/ + +.env +state.json + +*.log +.DS_Store +Thumbs.db + +.pytest_cache/ +.mypy_cache/ +.ruff_cache/ +htmlcov/ +.coverage diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md new file mode 100644 index 0000000..eae4d14 --- /dev/null +++ b/ARCHITECTURE.md @@ -0,0 +1,96 @@ +# Architecture + +## High-level flow + +``` +┌──────────────┐ ┌──────────────┐ +│ Ticketmaster │ │ SeatGeek │ +│ Provider │ │ Provider │ +└──────┬───────┘ └──────┬───────┘ + │ │ + └────────┬───────────┘ + ▼ + ┌───────────────┐ + │ Normalize & │ + │ Deduplicate │ + └───────┬───────┘ + ▼ + ┌───────────────┐ + │ Impact Score │ + └───────┬───────┘ + ▼ + ┌───────────────┐ + │ Filter by │ + │ date window │ + └───────┬───────┘ + │ + ┌───────┴───────┐ + ▼ ▼ +┌──────────────┐ ┌──────────────┐ +│ Telegram │ │ Airbnb │ +│ Alert │ │ Calendar │ +│ (primary) │ │ (optional) │ +└──────────────┘ └──────────────┘ +``` + +## Module map + +``` +src/ +├── main.py # CLI entrypoint, orchestration +├── config.py # Pydantic settings from env vars +├── models.py # NormalizedEvent dataclass +├── log.py # Structured logging configuration +├── dedup.py # Deduplication across providers +├── providers/ +│ ├── base.py # EventProvider abstract base class +│ ├── ticketmaster.py # Ticketmaster Discovery API +│ └── seatgeek.py # SeatGeek API +├── scoring/ +│ └── impact.py # Rule-based impact scoring +├── notifications/ +│ └── telegram.py # Telegram bot message sender +└── airbnb/ + ├── auth.py # Playwright storage state management + └── calendar.py # Calendar navigation and price updates +``` + +## Key design decisions + +### 1. Provider abstraction + +All event providers implement `EventProvider.fetch() -> list[NormalizedEvent]`. This makes it trivial to add new sources (Eventbrite, PredictHQ, scraping) without touching the orchestration layer. + +### 2. Normalized event model + +A single `NormalizedEvent` dataclass acts as the shared contract between providers, deduplication, scoring, and output formatting. Fields: name, date, venue, source, url, raw metadata. + +### 3. Airbnb automation is isolated and optional + +The `src/airbnb/` module is completely decoupled from event ingestion. If Playwright breaks (selectors change, login expires), the system degrades gracefully to Telegram-only alerts. The main runner catches all Airbnb errors and logs them without crashing. + +### 4. Storage state for auth + +Airbnb authentication uses Playwright's `storage_state` API. A one-time manual login script saves cookies/localStorage to `state.json`. Subsequent headless runs load this state. No passwords are stored in code or env vars. + +### 5. Configuration via environment + +All secrets and tunables live in env vars (loaded from `.env` in local dev). No config files to manage, easy to override in Docker/cron. + +### 6. No database + +For weekly runs processing dozens of events, in-memory processing is sufficient. If persistence becomes necessary (e.g., tracking price change history), a simple JSON file or SQLite would be the first step. + +## Error handling strategy + +- Each provider's `fetch()` is wrapped in try/except; one failing source does not block others. +- Telegram send failures are logged but do not block Airbnb updates. +- Airbnb automation failures are logged and reported via Telegram if possible. +- The main runner returns a nonzero exit code if all providers fail. + +## Future extension points + +- New providers: subclass `EventProvider` +- New notification channels: add modules under `notifications/` +- Smarter scoring: replace `scoring/impact.py` internals +- Multi-listing: extend config and loop in `main.py` diff --git a/BACKLOG.md b/BACKLOG.md new file mode 100644 index 0000000..f67e817 --- /dev/null +++ b/BACKLOG.md @@ -0,0 +1,65 @@ +# Backlog + +## Epic: Automated Airbnb Event Pricing + +### Story 1: Event Data Ingestion + +> As a host, I want to fetch upcoming Toronto events from public APIs so I can identify dates with high accommodation demand. + +| Task | Status | Notes | +|---|---|---| +| 1.1 Integrate Ticketmaster Discovery API | Done | `src/providers/ticketmaster.py`, major-venue filter | +| 1.2 Integrate SeatGeek API | Done | `src/providers/seatgeek.py`, score threshold | +| 1.3 Normalize events to common model | Done | `NormalizedEvent` in `src/models.py` | +| 1.4 Deduplicate events across providers | Done | `src/dedup.py` — date + fuzzy venue + fuzzy name | + +### Story 2: Telegram Notification System + +> As a host, I want to receive a Telegram message containing the flagged dates so I can review findings before automated pricing kicks in. + +| Task | Status | Notes | +|---|---|---| +| 2.1 Build Telegram bot alerter | Done | `src/notifications/telegram.py` (httpx, Bot API) | +| 2.2 Format message with grouped dates | Done | MarkdownV2, grouped by date | + +### Story 3: Browser Automation (Playwright) + +> As a host, I want a script to log into Airbnb and change prices on specific dates so I don't have to do it manually. + +| Task | Status | Notes | +|---|---|---| +| 3.1 Handle authentication & session state | Done | `src/airbnb/auth.py`, `scripts/airbnb_login.py` | +| 3.2 Automate calendar UI updates | Done | `src/airbnb/calendar.py` (selectors need live verification) | +| 3.3 Add retry/fallback on UI failure | Done | Retries in `update_price`; runner skips dates on failure | + +### Story 4: Scoring & Filtering + +> As a host, I want events scored by likely pricing impact so I only adjust prices for meaningful demand drivers. + +| Task | Status | Notes | +|---|---|---| +| 4.1 Define venue size/type scoring rules | Done | `src/scoring/impact.py` (`VENUE_CAPACITY`) | +| 4.2 Filter by date window (lookahead) | Done | `filter_by_window` in `src/main.py`, `lookahead_days` in config | + +### Story 5: Orchestration & Deployment + +> As a developer, I want to orchestrate the pipeline and deploy via Docker on cron. + +| Task | Status | Notes | +|---|---|---| +| 5.1 Main CLI runner with modes | Done | `src/main.py` — `--dry-run`, `--alerts-only`, full | +| 5.2 Dockerfile for Playwright | Done | `Dockerfile` (Chromium + deps) | +| 5.3 Cron configuration guide | Done | See [README.md](README.md) | + +--- + +## Future ideas (not scheduled) + +- Eventbrite integration +- PredictHQ integration (paid, but powerful) +- Web scraping fallback for niche events +- Historical price tracking (SQLite) +- Multi-listing support +- Smarter pricing model (base + multiplier per event score) +- Slack/Discord notification channel +- Dashboard/web UI diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..dbb71a1 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.11-slim + +WORKDIR /app + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + libnss3 libatk1.0-0 libatk-bridge2.0-0 libcups2 \ + libdrm2 libxkbcommon0 libxcomposite1 libxdamage1 \ + libxrandr2 libgbm1 libpango-1.0-0 libcairo2 \ + libasound2 libxshmfence1 && \ + rm -rf /var/lib/apt/lists/* + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt && \ + playwright install chromium + +COPY src/ src/ + +# Mount state.json at runtime: -v ./state.json:/app/state.json +ENTRYPOINT ["python", "-m", "src.main"] +CMD ["--alerts-only"] diff --git a/PROJECT.md b/PROJECT.md new file mode 100644 index 0000000..b34fd2d --- /dev/null +++ b/PROJECT.md @@ -0,0 +1,62 @@ +# Project: EventRate + +## Purpose + +Maximize Airbnb hosting revenue by automatically detecting high-demand dates in Toronto and adjusting nightly pricing accordingly. + +## Problem + +Toronto hosts major events (Raptors, Leafs, Blue Jays, concerts, festivals) that drive short-term rental demand. Manually tracking these events and updating Airbnb prices is tedious and error-prone. Missing a single large event can mean hundreds of dollars in lost revenue. + +## Solution + +A local Python application that: + +1. **Fetches** upcoming Toronto events from free public APIs (Ticketmaster, SeatGeek). +2. **Normalizes** events into a common model and deduplicates across sources. +3. **Scores** events based on likely pricing impact (venue size, event type). +4. **Alerts** the host via Telegram with a summary of flagged dates. +5. **Optionally automates** Airbnb calendar price updates using Playwright. + +## Scope + +### In scope (v1) + +- Ticketmaster Discovery API integration +- SeatGeek API integration +- Event normalization and deduplication +- Simple rule-based impact scoring +- Telegram notifications (primary output) +- Playwright-based Airbnb calendar automation (opt-in, fragile) +- CLI with dry-run / alerts-only / full modes +- Docker support + +### Out of scope (v1) + +- Web UI +- Cloud deployment / hosted service +- Database / persistent storage +- Paid API integrations +- Sophisticated pricing algorithms (ML, dynamic pricing) +- Multi-city support +- Multi-listing support + +## Technical constraints + +- Python 3.11+ +- Runs locally first, cron later +- Configuration via environment variables +- Minimal external dependencies +- Structured logging +- Idempotent where possible + +## Key risks and assumptions + +| # | Item | Type | Notes | +|---|---|---|---| +| 1 | Airbnb has no public pricing API | Assumption | Must use browser automation; inherently fragile | +| 2 | Airbnb UI selectors will change | Risk | Selectors isolated behind a dedicated module; manual fallback is Telegram-only | +| 3 | Free API tiers have rate limits | Risk | Weekly/monthly runs stay well within limits | +| 4 | Ticketmaster/SeatGeek cover major Toronto events | Assumption | May need additional providers later | +| 5 | 2FA/CAPTCHA on Airbnb login | Risk | Handled by one-time manual login with saved storage state | +| 6 | Event data quality varies across providers | Assumption | Deduplication is best-effort by date + venue + fuzzy name | diff --git a/README.md b/README.md new file mode 100644 index 0000000..f48a2c6 --- /dev/null +++ b/README.md @@ -0,0 +1,70 @@ +# EventRate + +Local Python application that identifies upcoming Toronto events likely to increase Airbnb demand, sends Telegram alerts, and optionally adjusts nightly prices via Playwright automation. + +## Quick start + +```bash +# 1. Clone and enter +git clone https://git.levkin.ca/ilia/AtAnyRate.git +cd AtAnyRate + +# 2. Create a virtual environment +python3 -m venv .venv +source .venv/bin/activate + +# 3. Install dependencies +pip install -r requirements.txt +playwright install chromium + +# 4. Configure +cp .env.example .env +# Edit .env with your API keys (see below) + +# 5. Run +python -m src.main --dry-run # preview only, no side effects +python -m src.main --alerts-only # fetch events + send Telegram +python -m src.main # full flow (alerts + Airbnb update) +``` + +## Environment variables + +| Variable | Required | Description | +|---|---|---| +| `TICKETMASTER_KEY` | Yes | Ticketmaster Discovery API key (free tier) | +| `SEATGEEK_CLIENT_ID` | Yes | SeatGeek API client ID (free tier) | +| `TELEGRAM_BOT_TOKEN` | Yes | Telegram bot token from @BotFather | +| `TELEGRAM_CHAT_ID` | Yes | Your Telegram chat/user ID | +| `AIRBNB_LISTING_ID` | No | Airbnb listing ID for calendar automation | +| `AIRBNB_BASE_PRICE` | No | Base nightly price (CAD) | +| `PRICE_INCREASE_PCT` | No | Price increase percentage for event dates (default: 20) | +| `LOOKAHEAD_DAYS` | No | Days ahead to scan for events (default: 30) | +| `LOG_LEVEL` | No | Logging level (default: INFO) | + +## Airbnb session setup (one-time) + +```bash +python scripts/airbnb_login.py +``` + +This opens a headed browser. Log in manually, complete any 2FA, then press Enter in the terminal. Your session is saved to `state.json` for reuse in headless runs. + +## Running on cron + +```bash +# Weekly Monday 8 AM +0 8 * * 1 cd /path/to/AtAnyRate && .venv/bin/python -m src.main --alerts-only >> /var/log/eventrate.log 2>&1 +``` + +## Docker + +```bash +docker build -t eventrate . +docker run --rm --env-file .env -v $(pwd)/state.json:/app/state.json eventrate +``` + +## Project docs + +- [PROJECT.md](PROJECT.md) — goals, scope, constraints +- [ARCHITECTURE.md](ARCHITECTURE.md) — system design and module map +- [BACKLOG.md](BACKLOG.md) — feature backlog and task breakdown diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..74f4cb3 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +httpx>=0.27,<1 +pydantic>=2.0,<3 +pydantic-settings>=2.0,<3 +playwright>=1.40,<2 +python-dotenv>=1.0,<2 diff --git a/scripts/airbnb_login.py b/scripts/airbnb_login.py new file mode 100644 index 0000000..80eaf7d --- /dev/null +++ b/scripts/airbnb_login.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 +"""One-time interactive Airbnb login to save session state. + +Run this once (or whenever your session expires): + python scripts/airbnb_login.py + +A headed Chromium browser will open. Log in manually, complete 2FA, +then return to the terminal and press Enter. Your session cookies +and localStorage will be saved to state.json for headless reuse. +""" + +from pathlib import Path +import sys + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) + +from src.airbnb.auth import interactive_login + +if __name__ == "__main__": + print("Starting Airbnb login helper...") + interactive_login() + print("Done. Session saved to state.json") diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/__main__.py b/src/__main__.py new file mode 100644 index 0000000..106c4c4 --- /dev/null +++ b/src/__main__.py @@ -0,0 +1,5 @@ +"""Allow running as `python -m src`.""" + +from src.main import main + +main() diff --git a/src/airbnb/__init__.py b/src/airbnb/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/airbnb/auth.py b/src/airbnb/auth.py new file mode 100644 index 0000000..23c64cb --- /dev/null +++ b/src/airbnb/auth.py @@ -0,0 +1,68 @@ +"""Airbnb authentication via Playwright storage state. + +Strategy: + 1. First run: launch headed browser, let user log in manually. + 2. Save storage state (cookies + localStorage) to state.json. + 3. Subsequent runs: load state.json into a headless context. + +WARNING: Airbnb sessions expire. If automation fails with auth errors, +re-run scripts/airbnb_login.py to refresh state.json. + +ASSUMPTION: Airbnb does not aggressively block Playwright's Chromium +fingerprint for authenticated hosts accessing their own calendar. +This is unverified and may break. +""" + +from __future__ import annotations + +import logging +from pathlib import Path + +from playwright.sync_api import Browser, BrowserContext, sync_playwright + +logger = logging.getLogger(__name__) + +DEFAULT_STATE_PATH = Path("state.json") + + +def interactive_login(state_path: Path = DEFAULT_STATE_PATH) -> None: + """Launch a headed browser for manual Airbnb login. + + After the user completes login (including any 2FA), they press + Enter in the terminal. The browser's storage state is then saved. + """ + with sync_playwright() as p: + browser = p.chromium.launch(headless=False) + context = browser.new_context() + page = context.new_page() + + page.goto("https://www.airbnb.ca/login") + input( + "\n>>> Log in to Airbnb in the browser window.\n" + ">>> Complete any 2FA prompts.\n" + ">>> Then press ENTER here to save the session...\n" + ) + + context.storage_state(path=str(state_path)) + logger.info("Storage state saved to %s", state_path) + + browser.close() + + +def load_authenticated_context( + browser: Browser, + state_path: Path = DEFAULT_STATE_PATH, +) -> BrowserContext: + """Create a browser context with saved authentication state. + + Raises FileNotFoundError if state.json doesn't exist. + """ + if not state_path.exists(): + raise FileNotFoundError( + f"No saved session at {state_path}. " + "Run 'python scripts/airbnb_login.py' first." + ) + + context = browser.new_context(storage_state=str(state_path)) + logger.info("Loaded auth state from %s", state_path) + return context diff --git a/src/airbnb/calendar.py b/src/airbnb/calendar.py new file mode 100644 index 0000000..1374c4c --- /dev/null +++ b/src/airbnb/calendar.py @@ -0,0 +1,104 @@ +"""Airbnb calendar price automation via Playwright. + +WARNING: This module is inherently fragile. Airbnb can change their UI +at any time, breaking all selectors below. Treat every selector as +a best-guess placeholder that WILL need updating. + +ASSUMPTION: The selectors below are STUBS. They have NOT been verified +against the live Airbnb host calendar UI. Do not expect this module +to work without first inspecting the actual DOM and updating selectors. +""" + +from __future__ import annotations + +import logging +from datetime import date + +import time + +from playwright.sync_api import Page, TimeoutError as PlaywrightTimeout + +logger = logging.getLogger(__name__) + +_MAX_UPDATE_ATTEMPTS = 3 +_RETRY_DELAY_SEC = 2.0 + +# All selectors below are UNVERIFIED PLACEHOLDERS. +# TODO: Inspect live Airbnb host calendar and replace these. +CALENDAR_URL = "https://www.airbnb.ca/hosting/calendar" +SELECTORS = { + # TODO: Replace with actual selector for date cells + "date_cell": 'td[data-date="{date_str}"]', + # TODO: Replace with actual selector for price input + "price_input": 'input[data-testid="price-input"]', + # TODO: Replace with actual selector for save button + "save_button": 'button[data-testid="save-button"]', +} + + +def update_price(page: Page, target_date: date, new_price: int) -> bool: + """Navigate to calendar and set the price for a specific date. + + Retries transient failures a few times, then returns False so the + caller can continue with other dates (alert-only degradation is + handled in ``main``). + """ + date_str = target_date.strftime("%Y-%m-%d") + logger.info("Updating price for %s to $%d", date_str, new_price) + + last_error: Exception | None = None + for attempt in range(1, _MAX_UPDATE_ATTEMPTS + 1): + try: + page.goto(CALENDAR_URL, wait_until="networkidle", timeout=30_000) + + # TODO: Calendar may require scrolling to reach the target month. + # This is not implemented yet. + _navigate_to_month(page, target_date) + + date_selector = SELECTORS["date_cell"].format(date_str=date_str) + page.click(date_selector, timeout=10_000) + + page.fill(SELECTORS["price_input"], str(new_price), timeout=5_000) + page.click(SELECTORS["save_button"], timeout=5_000) + + # TODO: Verify that the price was actually saved (read back from UI) + page.wait_for_timeout(2000) + + logger.info("Price updated for %s: $%d", date_str, new_price) + return True + + except PlaywrightTimeout as e: + last_error = e + logger.warning( + "Attempt %d/%d: timeout updating price for %s", + attempt, + _MAX_UPDATE_ATTEMPTS, + date_str, + ) + except Exception as e: + last_error = e + logger.warning( + "Attempt %d/%d: error updating price for %s: %s", + attempt, + _MAX_UPDATE_ATTEMPTS, + date_str, + e, + ) + + if attempt < _MAX_UPDATE_ATTEMPTS: + time.sleep(_RETRY_DELAY_SEC) + + if isinstance(last_error, PlaywrightTimeout): + logger.error("Timeout while updating price for %s after retries", date_str) + elif last_error: + logger.exception("Failed to update price for %s after retries", date_str) + return False + + +def _navigate_to_month(page: Page, target_date: date) -> None: + """Scroll the calendar forward/backward to reach the target month. + + TODO: This is a stub. Implementation depends on Airbnb's calendar + navigation controls (next/prev month buttons, month picker, etc.). + """ + pass diff --git a/src/config.py b/src/config.py new file mode 100644 index 0000000..0e47948 --- /dev/null +++ b/src/config.py @@ -0,0 +1,28 @@ +"""Application configuration loaded from environment variables.""" + +from pydantic_settings import BaseSettings + + +class Settings(BaseSettings): + # Event providers + ticketmaster_key: str = "" + seatgeek_client_id: str = "" + + # Telegram + telegram_bot_token: str = "" + telegram_chat_id: str = "" + + # Airbnb automation (optional) + airbnb_listing_id: str = "" + airbnb_base_price: int = 150 + price_increase_pct: int = 20 + + # General + lookahead_days: int = 30 + log_level: str = "INFO" + + model_config = {"env_file": ".env", "env_file_encoding": "utf-8"} + + +def load_settings() -> Settings: + return Settings() diff --git a/src/dedup.py b/src/dedup.py new file mode 100644 index 0000000..ddeb4d3 --- /dev/null +++ b/src/dedup.py @@ -0,0 +1,81 @@ +"""Deduplicate events across multiple providers.""" + +from __future__ import annotations + +import logging +import re +from difflib import SequenceMatcher + +from src.models import NormalizedEvent + +logger = logging.getLogger(__name__) + +# Cross-provider titles for the same show often differ slightly. +_NAME_SIMILARITY_MIN = 0.78 +# Venue strings vary (suffixes, punctuation); stricter than names. +_VENUE_SIMILARITY_MIN = 0.88 + +_WS_RE = re.compile(r"\s+") + + +def _collapse_ws(s: str) -> str: + return _WS_RE.sub(" ", s.strip().lower()) + + +def _similarity(a: str, b: str) -> float: + if not a or not b: + return 0.0 + ca, cb = _collapse_ws(a), _collapse_ws(b) + if ca == cb: + return 1.0 + return SequenceMatcher(None, ca, cb).ratio() + + +def _is_same_event(a: NormalizedEvent, b: NormalizedEvent) -> bool: + if a.event_date != b.event_date: + return False + if _similarity(a.venue, b.venue) < _VENUE_SIMILARITY_MIN: + return False + if _similarity(a.name, b.name) < _NAME_SIMILARITY_MIN: + return False + return True + + +def _pick_representative(cluster: list[NormalizedEvent]) -> NormalizedEvent: + """Prefer richer records when merging duplicates (pre-scoring).""" + source_rank = {"ticketmaster": 2, "seatgeek": 1} + + def key(e: NormalizedEvent) -> tuple: + return ( + bool(e.url), + source_rank.get(e.source, 0), + len(e.name), + e.name, + ) + + return max(cluster, key=key) + + +def deduplicate(events: list[NormalizedEvent]) -> list[NormalizedEvent]: + """Remove duplicate events across providers. + + Strategy: same calendar day + fuzzy venue + fuzzy event name. + Exact ``dedup_key`` matches are a subset and merge into one cluster. + """ + if not events: + return [] + + clusters: list[list[NormalizedEvent]] = [] + for e in events: + for cluster in clusters: + if any(_is_same_event(x, e) for x in cluster): + cluster.append(e) + break + else: + clusters.append([e]) + + deduped = [_pick_representative(c) for c in clusters] + removed = len(events) - len(deduped) + if removed: + logger.info("Deduplication removed %d duplicate(s)", removed) + return deduped diff --git a/src/log.py b/src/log.py new file mode 100644 index 0000000..2121eda --- /dev/null +++ b/src/log.py @@ -0,0 +1,18 @@ +"""Structured logging setup.""" + +import logging +import sys + + +def setup_logging(level: str = "INFO") -> None: + root = logging.getLogger() + root.setLevel(getattr(logging, level.upper(), logging.INFO)) + + if not root.handlers: + handler = logging.StreamHandler(sys.stdout) + formatter = logging.Formatter( + fmt="%(asctime)s [%(levelname)s] %(name)s: %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + handler.setFormatter(formatter) + root.addHandler(handler) diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..fa19299 --- /dev/null +++ b/src/main.py @@ -0,0 +1,186 @@ +"""EventRate main runner. + +Usage: + python -m src.main # full flow: fetch + alert + update prices + python -m src.main --alerts-only # fetch + alert, no Airbnb automation + python -m src.main --dry-run # fetch + print, no alerts or updates +""" + +from __future__ import annotations + +import argparse +import logging +import sys +from datetime import date, timedelta + +from src.config import load_settings +from src.dedup import deduplicate +from src.log import setup_logging +from src.models import NormalizedEvent +from src.notifications.telegram import send_alert +from src.providers.seatgeek import SeatGeekProvider +from src.providers.ticketmaster import TicketmasterProvider +from src.scoring.impact import score_events + +logger = logging.getLogger(__name__) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="EventRate — Toronto event pricing assistant") + parser.add_argument("--dry-run", action="store_true", help="Fetch and print only, no side effects") + parser.add_argument("--alerts-only", action="store_true", help="Send Telegram alerts but skip Airbnb updates") + return parser.parse_args() + + +def fetch_all_events(settings) -> list[NormalizedEvent]: + """Fetch events from all configured providers.""" + providers = [ + TicketmasterProvider( + api_key=settings.ticketmaster_key, + lookahead_days=settings.lookahead_days, + ), + SeatGeekProvider( + client_id=settings.seatgeek_client_id, + lookahead_days=settings.lookahead_days, + ), + ] + + all_events: list[NormalizedEvent] = [] + failures = 0 + + for provider in providers: + logger.info("Fetching from %s...", provider.name) + events = provider.fetch() + if events: + all_events.extend(events) + else: + failures += 1 + logger.warning("No events from %s", provider.name) + + if failures == len(providers): + logger.error("All providers failed") + + return all_events + + +def filter_by_window(events: list[NormalizedEvent], lookahead_days: int) -> list[NormalizedEvent]: + """Keep only events within the lookahead window.""" + today = date.today() + cutoff = today + timedelta(days=lookahead_days) + return [e for e in events if today <= e.event_date <= cutoff] + + +def print_summary(events: list[NormalizedEvent]) -> None: + """Print a human-readable summary to stdout.""" + if not events: + print("No upcoming events found.") + return + + print(f"\n{'='*60}") + print(f" EventRate — {len(events)} events in the next window") + print(f"{'='*60}\n") + + for event in events: + print(f" [{event.score:.2f}] {event.event_date} | {event.name}") + print(f" {event.venue} ({event.source})") + if event.url: + print(f" {event.url}") + print() + + +def update_airbnb_prices(events: list[NormalizedEvent], settings) -> None: + """Attempt to update Airbnb prices for flagged dates. + + Isolated in a try/except so failures don't crash the runner. + """ + if not settings.airbnb_listing_id: + logger.info("No Airbnb listing configured, skipping price updates") + return + + try: + from playwright.sync_api import sync_playwright + from src.airbnb.auth import load_authenticated_context + from src.airbnb.calendar import update_price + except ImportError: + logger.error("Playwright not installed, cannot update Airbnb prices") + return + + new_price = int(settings.airbnb_base_price * (1 + settings.price_increase_pct / 100)) + event_dates = sorted({e.event_date for e in events}) + + logger.info( + "Updating Airbnb prices for %d dates to $%d", + len(event_dates), + new_price, + ) + + successes = 0 + try: + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + context = load_authenticated_context(browser) + page = context.new_page() + + for target_date in event_dates: + if update_price(page, target_date, new_price): + successes += 1 + + browser.close() + except FileNotFoundError as e: + logger.error("Auth state missing: %s", e) + except Exception: + logger.exception("Airbnb automation failed") + + logger.info("Airbnb: updated %d/%d dates", successes, len(event_dates)) + + +def main() -> None: + args = parse_args() + settings = load_settings() + setup_logging(settings.log_level) + + logger.info("EventRate starting (dry_run=%s, alerts_only=%s)", args.dry_run, args.alerts_only) + + # 1. Fetch + raw_events = fetch_all_events(settings) + if not raw_events: + logger.warning("No events fetched from any provider") + if not args.dry_run: + send_alert([], settings.telegram_bot_token, settings.telegram_chat_id) + sys.exit(0) + + # 2. Deduplicate + unique_events = deduplicate(raw_events) + + # 3. Score + scored_events = score_events(unique_events) + + # 4. Filter + upcoming = filter_by_window(scored_events, settings.lookahead_days) + + # 5. Output + print_summary(upcoming) + + if args.dry_run: + logger.info("Dry run complete, no alerts or updates sent") + return + + # 6. Alert + alert_ok = send_alert( + upcoming, + settings.telegram_bot_token, + settings.telegram_chat_id, + ) + + if not alert_ok: + logger.error("Telegram alert failed") + + # 7. Optionally update Airbnb + if not args.alerts_only and upcoming: + update_airbnb_prices(upcoming, settings) + + logger.info("EventRate complete") + + +if __name__ == "__main__": + main() diff --git a/src/models.py b/src/models.py new file mode 100644 index 0000000..ed5b602 --- /dev/null +++ b/src/models.py @@ -0,0 +1,27 @@ +"""Normalized event model shared across all providers.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from datetime import date +from typing import Any + + +@dataclass(frozen=True) +class NormalizedEvent: + name: str + event_date: date + venue: str + source: str + url: str = "" + score: float = 0.0 + raw: dict[str, Any] = field(default_factory=dict, repr=False, compare=False) + + @property + def dedup_key(self) -> str: + """Key used for cross-provider deduplication. + + Combines date and lowercased venue. Fuzzy name matching + may be layered on top in dedup.py. + """ + return f"{self.event_date.isoformat()}|{self.venue.lower().strip()}" diff --git a/src/notifications/__init__.py b/src/notifications/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/notifications/telegram.py b/src/notifications/telegram.py new file mode 100644 index 0000000..4447b57 --- /dev/null +++ b/src/notifications/telegram.py @@ -0,0 +1,85 @@ +"""Telegram notification sender using raw HTTP (no library dependency). + +Uses the Bot API sendMessage endpoint with MarkdownV2 formatting. +""" + +from __future__ import annotations + +import logging +from datetime import date +from itertools import groupby + +import httpx + +from src.models import NormalizedEvent + +logger = logging.getLogger(__name__) + +SEND_URL = "https://api.telegram.org/bot{token}/sendMessage" + + +def send_alert( + events: list[NormalizedEvent], + bot_token: str, + chat_id: str, +) -> bool: + """Send a formatted event summary to Telegram. Returns True on success.""" + if not bot_token or not chat_id: + logger.warning("Telegram credentials not configured, skipping alert") + return False + + if not events: + logger.info("No events to report") + return True + + message = _format_message(events) + + try: + resp = httpx.post( + SEND_URL.format(token=bot_token), + json={ + "chat_id": chat_id, + "text": message, + "parse_mode": "MarkdownV2", + }, + timeout=15, + ) + resp.raise_for_status() + logger.info("Telegram alert sent successfully") + return True + except Exception: + logger.exception("Failed to send Telegram alert") + return False + + +def _format_message(events: list[NormalizedEvent]) -> str: + """Group events by date and format as MarkdownV2.""" + sorted_events = sorted(events, key=lambda e: e.event_date) + + lines = ["*EventRate Alert* 🏟️\n"] + for event_date, group in groupby(sorted_events, key=lambda e: e.event_date): + lines.append(f"*{_escape_md(event_date.strftime('%a %b %d, %Y'))}*") + for event in group: + venue = _escape_md(event.venue) + name = _escape_md(event.name) + score_bar = _score_indicator(event.score) + lines.append(f" {score_bar} {name} @ {venue}") + lines.append("") + + return "\n".join(lines) + + +def _score_indicator(score: float) -> str: + if score >= 0.8: + return "🔴" + if score >= 0.5: + return "🟡" + return "🟢" + + +def _escape_md(text: str) -> str: + """Escape special MarkdownV2 characters.""" + special = r"_*[]()~`>#+-=|{}.!" + for ch in special: + text = text.replace(ch, f"\\{ch}") + return text diff --git a/src/providers/__init__.py b/src/providers/__init__.py new file mode 100644 index 0000000..4cfc5b9 --- /dev/null +++ b/src/providers/__init__.py @@ -0,0 +1,5 @@ +from src.providers.base import EventProvider +from src.providers.ticketmaster import TicketmasterProvider +from src.providers.seatgeek import SeatGeekProvider + +__all__ = ["EventProvider", "TicketmasterProvider", "SeatGeekProvider"] diff --git a/src/providers/base.py b/src/providers/base.py new file mode 100644 index 0000000..fa94bd4 --- /dev/null +++ b/src/providers/base.py @@ -0,0 +1,23 @@ +"""Abstract base class for event providers.""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from src.models import NormalizedEvent + + +class EventProvider(ABC): + """Interface that all event source integrations must implement.""" + + @property + @abstractmethod + def name(self) -> str: + """Human-readable provider name for logging.""" + + @abstractmethod + def fetch(self) -> list[NormalizedEvent]: + """Fetch upcoming events and return normalized models. + + Implementations should handle their own errors internally + and return an empty list on failure, logging the error. + """ diff --git a/src/providers/seatgeek.py b/src/providers/seatgeek.py new file mode 100644 index 0000000..e6ce9cf --- /dev/null +++ b/src/providers/seatgeek.py @@ -0,0 +1,105 @@ +"""SeatGeek API provider. + +API docs: https://platform.seatgeek.com/ +Free tier available with client ID — sufficient for weekly runs. + +ASSUMPTION: SeatGeek uses 'score' field (0.0–1.0) as a popularity metric. +We use a threshold to filter low-interest events. The right threshold +is unknown and will need tuning with real data. +""" + +from __future__ import annotations + +import logging +from datetime import date, datetime, timedelta + +import httpx + +from src.models import NormalizedEvent +from src.providers.base import EventProvider + +logger = logging.getLogger(__name__) + +BASE_URL = "https://api.seatgeek.com/2/events" + +# TODO: Tune this threshold after observing real score distributions +MIN_SCORE_THRESHOLD = 0.5 + + +class SeatGeekProvider(EventProvider): + def __init__(self, client_id: str, lookahead_days: int = 30) -> None: + self._client_id = client_id + self._lookahead_days = lookahead_days + + @property + def name(self) -> str: + return "seatgeek" + + def fetch(self) -> list[NormalizedEvent]: + if not self._client_id: + logger.warning("SeatGeek client ID not configured, skipping") + return [] + + try: + return self._fetch_events() + except Exception: + logger.exception("SeatGeek fetch failed") + return [] + + def _fetch_events(self) -> list[NormalizedEvent]: + start = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S") + end = (datetime.utcnow() + timedelta(days=self._lookahead_days)).strftime( + "%Y-%m-%dT%H:%M:%S" + ) + + params = { + "client_id": self._client_id, + "venue.city": "Toronto", + "datetime_utc.gte": start, + "datetime_utc.lte": end, + "per_page": 100, + "sort": "datetime_utc.asc", + } + + # TODO: Handle pagination if > 100 events + resp = httpx.get(BASE_URL, params=params, timeout=30) + resp.raise_for_status() + data = resp.json() + + raw_events = data.get("events", []) + logger.info("SeatGeek returned %d raw events", len(raw_events)) + + results: list[NormalizedEvent] = [] + for item in raw_events: + score = item.get("score", 0.0) or 0.0 + if score < MIN_SCORE_THRESHOLD: + continue + + venue_name = item.get("venue", {}).get("name", "Unknown Venue") + event_date = self._extract_date(item) + if event_date is None: + continue + + results.append( + NormalizedEvent( + name=item.get("title", "Unknown"), + event_date=event_date, + venue=venue_name, + source=self.name, + url=item.get("url", ""), + raw=item, + ) + ) + + logger.info("SeatGeek: %d events above score threshold", len(results)) + return results + + @staticmethod + def _extract_date(item: dict) -> date | None: + dt_str = item.get("datetime_local") or item.get("datetime_utc") + if dt_str: + try: + return datetime.fromisoformat(dt_str).date() + except ValueError: + logger.warning("Unparseable date: %s", dt_str) + return None diff --git a/src/providers/ticketmaster.py b/src/providers/ticketmaster.py new file mode 100644 index 0000000..ba8b6ec --- /dev/null +++ b/src/providers/ticketmaster.py @@ -0,0 +1,125 @@ +"""Ticketmaster Discovery API provider. + +API docs: https://developer.ticketmaster.com/products-and-docs/apis/discovery-api/v2/ +Free tier: 5,000 calls/day — more than enough for weekly runs. + +ASSUMPTION: Major Toronto venues are hardcoded below. This list may need +updating if new venues open or names change in Ticketmaster's data. +""" + +from __future__ import annotations + +import logging +from datetime import date, datetime, timedelta + +import httpx + +from src.models import NormalizedEvent +from src.providers.base import EventProvider + +logger = logging.getLogger(__name__) + +BASE_URL = "https://app.ticketmaster.com/discovery/v2/events.json" + +# TODO: Validate these venue names against actual Ticketmaster venue data. +# These are best-guess strings; the API may use different casing or IDs. +MAJOR_VENUES = { + "scotiabank arena", + "rogers centre", + "budweiser stage", + "coca-cola coliseum", + "meridian hall", + "massey hall", + "history", +} + + +class TicketmasterProvider(EventProvider): + def __init__(self, api_key: str, lookahead_days: int = 30) -> None: + self._api_key = api_key + self._lookahead_days = lookahead_days + + @property + def name(self) -> str: + return "ticketmaster" + + def fetch(self) -> list[NormalizedEvent]: + if not self._api_key: + logger.warning("Ticketmaster API key not configured, skipping") + return [] + + try: + return self._fetch_events() + except Exception: + logger.exception("Ticketmaster fetch failed") + return [] + + def _fetch_events(self) -> list[NormalizedEvent]: + start = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") + end = (datetime.utcnow() + timedelta(days=self._lookahead_days)).strftime( + "%Y-%m-%dT%H:%M:%SZ" + ) + + params = { + "apikey": self._api_key, + "city": "Toronto", + "countryCode": "CA", + "startDateTime": start, + "endDateTime": end, + "size": 100, + "sort": "date,asc", + } + + # TODO: Handle pagination if > 100 events in the window + resp = httpx.get(BASE_URL, params=params, timeout=30) + resp.raise_for_status() + data = resp.json() + + raw_events = data.get("_embedded", {}).get("events", []) + logger.info("Ticketmaster returned %d raw events", len(raw_events)) + + results: list[NormalizedEvent] = [] + for item in raw_events: + venue_name = self._extract_venue(item) + if not self._is_major_venue(venue_name): + continue + + event_date = self._extract_date(item) + if event_date is None: + continue + + results.append( + NormalizedEvent( + name=item.get("name", "Unknown"), + event_date=event_date, + venue=venue_name, + source=self.name, + url=item.get("url", ""), + raw=item, + ) + ) + + logger.info("Ticketmaster: %d events at major venues", len(results)) + return results + + @staticmethod + def _extract_venue(item: dict) -> str: + venues = item.get("_embedded", {}).get("venues", []) + if venues: + return venues[0].get("name", "Unknown Venue") + return "Unknown Venue" + + @staticmethod + def _is_major_venue(venue_name: str) -> bool: + return venue_name.lower().strip() in MAJOR_VENUES + + @staticmethod + def _extract_date(item: dict) -> date | None: + dates = item.get("dates", {}).get("start", {}) + date_str = dates.get("localDate") + if date_str: + try: + return datetime.strptime(date_str, "%Y-%m-%d").date() + except ValueError: + logger.warning("Unparseable date: %s", date_str) + return None diff --git a/src/scoring/__init__.py b/src/scoring/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/scoring/impact.py b/src/scoring/impact.py new file mode 100644 index 0000000..6d59877 --- /dev/null +++ b/src/scoring/impact.py @@ -0,0 +1,59 @@ +"""Simple rule-based impact scoring for events. + +Assigns a 0.0–1.0 score based on venue capacity and event type. +This is intentionally naive — a starting point, not a pricing model. + +ASSUMPTION: Venue capacities are approximate and hardcoded. +Real capacity depends on event configuration (e.g., concert vs hockey +layout at Scotiabank Arena). +""" + +from __future__ import annotations + +import logging +from src.models import NormalizedEvent + +logger = logging.getLogger(__name__) + +# TODO: Validate these capacities and expand as needed +VENUE_CAPACITY: dict[str, int] = { + "rogers centre": 49000, + "scotiabank arena": 19800, + "budweiser stage": 16000, + "coca-cola coliseum": 8000, + "meridian hall": 3200, + "massey hall": 2750, + "history": 2500, +} + +MAX_CAPACITY = max(VENUE_CAPACITY.values()) + + +def score_event(event: NormalizedEvent) -> NormalizedEvent: + """Return a copy of the event with an impact score assigned.""" + venue_key = event.venue.lower().strip() + capacity = VENUE_CAPACITY.get(venue_key, 0) + + if capacity == 0: + # Unknown venue — assign a moderate default so it still surfaces + score = 0.3 + else: + score = round(capacity / MAX_CAPACITY, 2) + + return NormalizedEvent( + name=event.name, + event_date=event.event_date, + venue=event.venue, + source=event.source, + url=event.url, + score=score, + raw=event.raw, + ) + + +def score_events(events: list[NormalizedEvent]) -> list[NormalizedEvent]: + """Score all events and return sorted by score descending.""" + scored = [score_event(e) for e in events] + scored.sort(key=lambda e: (-e.score, e.event_date)) + logger.info("Scored %d events", len(scored)) + return scored diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29