commit 1a7298f7554df6eb98a10199b3e371b701880c32 Author: ilia Date: Sat Apr 4 12:31:53 2026 -0400 Initial commit: EventRate pipeline, fuzzy dedup, Airbnb retries Wire up Ticketmaster, SeatGeek, Telegram, scoring, Playwright stubs. Deduplicate events with fuzzy venue/name matching. Retry calendar updates on transient failures. Backlog tasks marked complete. Made-with: Cursor diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..b8d3599 --- /dev/null +++ b/.env.example @@ -0,0 +1,16 @@ +# === Event providers === +TICKETMASTER_KEY=your_ticketmaster_api_key_here +SEATGEEK_CLIENT_ID=your_seatgeek_client_id_here + +# === Telegram === +TELEGRAM_BOT_TOKEN=your_telegram_bot_token_here +TELEGRAM_CHAT_ID=your_telegram_chat_id_here + +# === Airbnb automation (optional) === +AIRBNB_LISTING_ID= +AIRBNB_BASE_PRICE=150 +PRICE_INCREASE_PCT=20 + +# === General === +LOOKAHEAD_DAYS=30 +LOG_LEVEL=INFO diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6d83c1c --- /dev/null +++ b/.gitignore @@ -0,0 +1,24 @@ +__pycache__/ +*.py[cod] +*$py.class +*.egg-info/ +dist/ +build/ +.eggs/ + +.venv/ +venv/ +env/ + +.env +state.json + +*.log +.DS_Store +Thumbs.db + +.pytest_cache/ +.mypy_cache/ +.ruff_cache/ +htmlcov/ +.coverage diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md new file mode 100644 index 0000000..eae4d14 --- /dev/null +++ b/ARCHITECTURE.md @@ -0,0 +1,96 @@ +# Architecture + +## High-level flow + +``` +┌──────────────┐ ┌──────────────┐ +│ Ticketmaster │ │ SeatGeek │ +│ Provider │ │ Provider │ +└──────┬───────┘ └──────┬───────┘ + │ │ + └────────┬───────────┘ + ▼ + ┌───────────────┐ + │ Normalize & │ + │ Deduplicate │ + └───────┬───────┘ + ▼ + ┌───────────────┐ + │ Impact Score │ + └───────┬───────┘ + ▼ + ┌───────────────┐ + │ Filter by │ + │ date window │ + └───────┬───────┘ + │ + ┌───────┴───────┐ + ▼ ▼ +┌──────────────┐ ┌──────────────┐ +│ Telegram │ │ Airbnb │ +│ Alert │ │ Calendar │ +│ (primary) │ │ (optional) │ +└──────────────┘ └──────────────┘ +``` + +## Module map + +``` +src/ +├── main.py # CLI entrypoint, orchestration +├── config.py # Pydantic settings from env vars +├── models.py # NormalizedEvent dataclass +├── log.py # Structured logging configuration +├── dedup.py # Deduplication across providers +├── providers/ +│ ├── base.py # EventProvider abstract base class +│ ├── ticketmaster.py # Ticketmaster Discovery API +│ └── seatgeek.py # SeatGeek API +├── scoring/ +│ └── impact.py # Rule-based impact scoring +├── notifications/ +│ └── telegram.py # Telegram bot message sender +└── airbnb/ + ├── auth.py # Playwright storage state management + └── calendar.py # Calendar navigation and price updates +``` + +## Key design decisions + +### 1. Provider abstraction + +All event providers implement `EventProvider.fetch() -> list[NormalizedEvent]`. This makes it trivial to add new sources (Eventbrite, PredictHQ, scraping) without touching the orchestration layer. + +### 2. Normalized event model + +A single `NormalizedEvent` dataclass acts as the shared contract between providers, deduplication, scoring, and output formatting. Fields: name, date, venue, source, url, raw metadata. + +### 3. Airbnb automation is isolated and optional + +The `src/airbnb/` module is completely decoupled from event ingestion. If Playwright breaks (selectors change, login expires), the system degrades gracefully to Telegram-only alerts. The main runner catches all Airbnb errors and logs them without crashing. + +### 4. Storage state for auth + +Airbnb authentication uses Playwright's `storage_state` API. A one-time manual login script saves cookies/localStorage to `state.json`. Subsequent headless runs load this state. No passwords are stored in code or env vars. + +### 5. Configuration via environment + +All secrets and tunables live in env vars (loaded from `.env` in local dev). No config files to manage, easy to override in Docker/cron. + +### 6. No database + +For weekly runs processing dozens of events, in-memory processing is sufficient. If persistence becomes necessary (e.g., tracking price change history), a simple JSON file or SQLite would be the first step. + +## Error handling strategy + +- Each provider's `fetch()` is wrapped in try/except; one failing source does not block others. +- Telegram send failures are logged but do not block Airbnb updates. +- Airbnb automation failures are logged and reported via Telegram if possible. +- The main runner returns a nonzero exit code if all providers fail. + +## Future extension points + +- New providers: subclass `EventProvider` +- New notification channels: add modules under `notifications/` +- Smarter scoring: replace `scoring/impact.py` internals +- Multi-listing: extend config and loop in `main.py` diff --git a/BACKLOG.md b/BACKLOG.md new file mode 100644 index 0000000..f67e817 --- /dev/null +++ b/BACKLOG.md @@ -0,0 +1,65 @@ +# Backlog + +## Epic: Automated Airbnb Event Pricing + +### Story 1: Event Data Ingestion + +> As a host, I want to fetch upcoming Toronto events from public APIs so I can identify dates with high accommodation demand. + +| Task | Status | Notes | +|---|---|---| +| 1.1 Integrate Ticketmaster Discovery API | Done | `src/providers/ticketmaster.py`, major-venue filter | +| 1.2 Integrate SeatGeek API | Done | `src/providers/seatgeek.py`, score threshold | +| 1.3 Normalize events to common model | Done | `NormalizedEvent` in `src/models.py` | +| 1.4 Deduplicate events across providers | Done | `src/dedup.py` — date + fuzzy venue + fuzzy name | + +### Story 2: Telegram Notification System + +> As a host, I want to receive a Telegram message containing the flagged dates so I can review findings before automated pricing kicks in. + +| Task | Status | Notes | +|---|---|---| +| 2.1 Build Telegram bot alerter | Done | `src/notifications/telegram.py` (httpx, Bot API) | +| 2.2 Format message with grouped dates | Done | MarkdownV2, grouped by date | + +### Story 3: Browser Automation (Playwright) + +> As a host, I want a script to log into Airbnb and change prices on specific dates so I don't have to do it manually. + +| Task | Status | Notes | +|---|---|---| +| 3.1 Handle authentication & session state | Done | `src/airbnb/auth.py`, `scripts/airbnb_login.py` | +| 3.2 Automate calendar UI updates | Done | `src/airbnb/calendar.py` (selectors need live verification) | +| 3.3 Add retry/fallback on UI failure | Done | Retries in `update_price`; runner skips dates on failure | + +### Story 4: Scoring & Filtering + +> As a host, I want events scored by likely pricing impact so I only adjust prices for meaningful demand drivers. + +| Task | Status | Notes | +|---|---|---| +| 4.1 Define venue size/type scoring rules | Done | `src/scoring/impact.py` (`VENUE_CAPACITY`) | +| 4.2 Filter by date window (lookahead) | Done | `filter_by_window` in `src/main.py`, `lookahead_days` in config | + +### Story 5: Orchestration & Deployment + +> As a developer, I want to orchestrate the pipeline and deploy via Docker on cron. + +| Task | Status | Notes | +|---|---|---| +| 5.1 Main CLI runner with modes | Done | `src/main.py` — `--dry-run`, `--alerts-only`, full | +| 5.2 Dockerfile for Playwright | Done | `Dockerfile` (Chromium + deps) | +| 5.3 Cron configuration guide | Done | See [README.md](README.md) | + +--- + +## Future ideas (not scheduled) + +- Eventbrite integration +- PredictHQ integration (paid, but powerful) +- Web scraping fallback for niche events +- Historical price tracking (SQLite) +- Multi-listing support +- Smarter pricing model (base + multiplier per event score) +- Slack/Discord notification channel +- Dashboard/web UI diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..dbb71a1 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.11-slim + +WORKDIR /app + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + libnss3 libatk1.0-0 libatk-bridge2.0-0 libcups2 \ + libdrm2 libxkbcommon0 libxcomposite1 libxdamage1 \ + libxrandr2 libgbm1 libpango-1.0-0 libcairo2 \ + libasound2 libxshmfence1 && \ + rm -rf /var/lib/apt/lists/* + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt && \ + playwright install chromium + +COPY src/ src/ + +# Mount state.json at runtime: -v ./state.json:/app/state.json +ENTRYPOINT ["python", "-m", "src.main"] +CMD ["--alerts-only"] diff --git a/PROJECT.md b/PROJECT.md new file mode 100644 index 0000000..b34fd2d --- /dev/null +++ b/PROJECT.md @@ -0,0 +1,62 @@ +# Project: EventRate + +## Purpose + +Maximize Airbnb hosting revenue by automatically detecting high-demand dates in Toronto and adjusting nightly pricing accordingly. + +## Problem + +Toronto hosts major events (Raptors, Leafs, Blue Jays, concerts, festivals) that drive short-term rental demand. Manually tracking these events and updating Airbnb prices is tedious and error-prone. Missing a single large event can mean hundreds of dollars in lost revenue. + +## Solution + +A local Python application that: + +1. **Fetches** upcoming Toronto events from free public APIs (Ticketmaster, SeatGeek). +2. **Normalizes** events into a common model and deduplicates across sources. +3. **Scores** events based on likely pricing impact (venue size, event type). +4. **Alerts** the host via Telegram with a summary of flagged dates. +5. **Optionally automates** Airbnb calendar price updates using Playwright. + +## Scope + +### In scope (v1) + +- Ticketmaster Discovery API integration +- SeatGeek API integration +- Event normalization and deduplication +- Simple rule-based impact scoring +- Telegram notifications (primary output) +- Playwright-based Airbnb calendar automation (opt-in, fragile) +- CLI with dry-run / alerts-only / full modes +- Docker support + +### Out of scope (v1) + +- Web UI +- Cloud deployment / hosted service +- Database / persistent storage +- Paid API integrations +- Sophisticated pricing algorithms (ML, dynamic pricing) +- Multi-city support +- Multi-listing support + +## Technical constraints + +- Python 3.11+ +- Runs locally first, cron later +- Configuration via environment variables +- Minimal external dependencies +- Structured logging +- Idempotent where possible + +## Key risks and assumptions + +| # | Item | Type | Notes | +|---|---|---|---| +| 1 | Airbnb has no public pricing API | Assumption | Must use browser automation; inherently fragile | +| 2 | Airbnb UI selectors will change | Risk | Selectors isolated behind a dedicated module; manual fallback is Telegram-only | +| 3 | Free API tiers have rate limits | Risk | Weekly/monthly runs stay well within limits | +| 4 | Ticketmaster/SeatGeek cover major Toronto events | Assumption | May need additional providers later | +| 5 | 2FA/CAPTCHA on Airbnb login | Risk | Handled by one-time manual login with saved storage state | +| 6 | Event data quality varies across providers | Assumption | Deduplication is best-effort by date + venue + fuzzy name | diff --git a/README.md b/README.md new file mode 100644 index 0000000..f48a2c6 --- /dev/null +++ b/README.md @@ -0,0 +1,70 @@ +# EventRate + +Local Python application that identifies upcoming Toronto events likely to increase Airbnb demand, sends Telegram alerts, and optionally adjusts nightly prices via Playwright automation. + +## Quick start + +```bash +# 1. Clone and enter +git clone https://git.levkin.ca/ilia/AtAnyRate.git +cd AtAnyRate + +# 2. Create a virtual environment +python3 -m venv .venv +source .venv/bin/activate + +# 3. Install dependencies +pip install -r requirements.txt +playwright install chromium + +# 4. Configure +cp .env.example .env +# Edit .env with your API keys (see below) + +# 5. Run +python -m src.main --dry-run # preview only, no side effects +python -m src.main --alerts-only # fetch events + send Telegram +python -m src.main # full flow (alerts + Airbnb update) +``` + +## Environment variables + +| Variable | Required | Description | +|---|---|---| +| `TICKETMASTER_KEY` | Yes | Ticketmaster Discovery API key (free tier) | +| `SEATGEEK_CLIENT_ID` | Yes | SeatGeek API client ID (free tier) | +| `TELEGRAM_BOT_TOKEN` | Yes | Telegram bot token from @BotFather | +| `TELEGRAM_CHAT_ID` | Yes | Your Telegram chat/user ID | +| `AIRBNB_LISTING_ID` | No | Airbnb listing ID for calendar automation | +| `AIRBNB_BASE_PRICE` | No | Base nightly price (CAD) | +| `PRICE_INCREASE_PCT` | No | Price increase percentage for event dates (default: 20) | +| `LOOKAHEAD_DAYS` | No | Days ahead to scan for events (default: 30) | +| `LOG_LEVEL` | No | Logging level (default: INFO) | + +## Airbnb session setup (one-time) + +```bash +python scripts/airbnb_login.py +``` + +This opens a headed browser. Log in manually, complete any 2FA, then press Enter in the terminal. Your session is saved to `state.json` for reuse in headless runs. + +## Running on cron + +```bash +# Weekly Monday 8 AM +0 8 * * 1 cd /path/to/AtAnyRate && .venv/bin/python -m src.main --alerts-only >> /var/log/eventrate.log 2>&1 +``` + +## Docker + +```bash +docker build -t eventrate . +docker run --rm --env-file .env -v $(pwd)/state.json:/app/state.json eventrate +``` + +## Project docs + +- [PROJECT.md](PROJECT.md) — goals, scope, constraints +- [ARCHITECTURE.md](ARCHITECTURE.md) — system design and module map +- [BACKLOG.md](BACKLOG.md) — feature backlog and task breakdown diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..74f4cb3 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +httpx>=0.27,<1 +pydantic>=2.0,<3 +pydantic-settings>=2.0,<3 +playwright>=1.40,<2 +python-dotenv>=1.0,<2 diff --git a/scripts/airbnb_login.py b/scripts/airbnb_login.py new file mode 100644 index 0000000..80eaf7d --- /dev/null +++ b/scripts/airbnb_login.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 +"""One-time interactive Airbnb login to save session state. + +Run this once (or whenever your session expires): + python scripts/airbnb_login.py + +A headed Chromium browser will open. Log in manually, complete 2FA, +then return to the terminal and press Enter. Your session cookies +and localStorage will be saved to state.json for headless reuse. +""" + +from pathlib import Path +import sys + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) + +from src.airbnb.auth import interactive_login + +if __name__ == "__main__": + print("Starting Airbnb login helper...") + interactive_login() + print("Done. Session saved to state.json") diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/__main__.py b/src/__main__.py new file mode 100644 index 0000000..106c4c4 --- /dev/null +++ b/src/__main__.py @@ -0,0 +1,5 @@ +"""Allow running as `python -m src`.""" + +from src.main import main + +main() diff --git a/src/airbnb/__init__.py b/src/airbnb/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/airbnb/auth.py b/src/airbnb/auth.py new file mode 100644 index 0000000..23c64cb --- /dev/null +++ b/src/airbnb/auth.py @@ -0,0 +1,68 @@ +"""Airbnb authentication via Playwright storage state. + +Strategy: + 1. First run: launch headed browser, let user log in manually. + 2. Save storage state (cookies + localStorage) to state.json. + 3. Subsequent runs: load state.json into a headless context. + +WARNING: Airbnb sessions expire. If automation fails with auth errors, +re-run scripts/airbnb_login.py to refresh state.json. + +ASSUMPTION: Airbnb does not aggressively block Playwright's Chromium +fingerprint for authenticated hosts accessing their own calendar. +This is unverified and may break. +""" + +from __future__ import annotations + +import logging +from pathlib import Path + +from playwright.sync_api import Browser, BrowserContext, sync_playwright + +logger = logging.getLogger(__name__) + +DEFAULT_STATE_PATH = Path("state.json") + + +def interactive_login(state_path: Path = DEFAULT_STATE_PATH) -> None: + """Launch a headed browser for manual Airbnb login. + + After the user completes login (including any 2FA), they press + Enter in the terminal. The browser's storage state is then saved. + """ + with sync_playwright() as p: + browser = p.chromium.launch(headless=False) + context = browser.new_context() + page = context.new_page() + + page.goto("https://www.airbnb.ca/login") + input( + "\n>>> Log in to Airbnb in the browser window.\n" + ">>> Complete any 2FA prompts.\n" + ">>> Then press ENTER here to save the session...\n" + ) + + context.storage_state(path=str(state_path)) + logger.info("Storage state saved to %s", state_path) + + browser.close() + + +def load_authenticated_context( + browser: Browser, + state_path: Path = DEFAULT_STATE_PATH, +) -> BrowserContext: + """Create a browser context with saved authentication state. + + Raises FileNotFoundError if state.json doesn't exist. + """ + if not state_path.exists(): + raise FileNotFoundError( + f"No saved session at {state_path}. " + "Run 'python scripts/airbnb_login.py' first." + ) + + context = browser.new_context(storage_state=str(state_path)) + logger.info("Loaded auth state from %s", state_path) + return context diff --git a/src/airbnb/calendar.py b/src/airbnb/calendar.py new file mode 100644 index 0000000..1374c4c --- /dev/null +++ b/src/airbnb/calendar.py @@ -0,0 +1,104 @@ +"""Airbnb calendar price automation via Playwright. + +WARNING: This module is inherently fragile. Airbnb can change their UI +at any time, breaking all selectors below. Treat every selector as +a best-guess placeholder that WILL need updating. + +ASSUMPTION: The selectors below are STUBS. They have NOT been verified +against the live Airbnb host calendar UI. Do not expect this module +to work without first inspecting the actual DOM and updating selectors. +""" + +from __future__ import annotations + +import logging +from datetime import date + +import time + +from playwright.sync_api import Page, TimeoutError as PlaywrightTimeout + +logger = logging.getLogger(__name__) + +_MAX_UPDATE_ATTEMPTS = 3 +_RETRY_DELAY_SEC = 2.0 + +# All selectors below are UNVERIFIED PLACEHOLDERS. +# TODO: Inspect live Airbnb host calendar and replace these. +CALENDAR_URL = "https://www.airbnb.ca/hosting/calendar" +SELECTORS = { + # TODO: Replace with actual selector for date cells + "date_cell": 'td[data-date="{date_str}"]', + # TODO: Replace with actual selector for price input + "price_input": 'input[data-testid="price-input"]', + # TODO: Replace with actual selector for save button + "save_button": 'button[data-testid="save-button"]', +} + + +def update_price(page: Page, target_date: date, new_price: int) -> bool: + """Navigate to calendar and set the price for a specific date. + + Retries transient failures a few times, then returns False so the + caller can continue with other dates (alert-only degradation is + handled in ``main``). + """ + date_str = target_date.strftime("%Y-%m-%d") + logger.info("Updating price for %s to $%d", date_str, new_price) + + last_error: Exception | None = None + for attempt in range(1, _MAX_UPDATE_ATTEMPTS + 1): + try: + page.goto(CALENDAR_URL, wait_until="networkidle", timeout=30_000) + + # TODO: Calendar may require scrolling to reach the target month. + # This is not implemented yet. + _navigate_to_month(page, target_date) + + date_selector = SELECTORS["date_cell"].format(date_str=date_str) + page.click(date_selector, timeout=10_000) + + page.fill(SELECTORS["price_input"], str(new_price), timeout=5_000) + page.click(SELECTORS["save_button"], timeout=5_000) + + # TODO: Verify that the price was actually saved (read back from UI) + page.wait_for_timeout(2000) + + logger.info("Price updated for %s: $%d", date_str, new_price) + return True + + except PlaywrightTimeout as e: + last_error = e + logger.warning( + "Attempt %d/%d: timeout updating price for %s", + attempt, + _MAX_UPDATE_ATTEMPTS, + date_str, + ) + except Exception as e: + last_error = e + logger.warning( + "Attempt %d/%d: error updating price for %s: %s", + attempt, + _MAX_UPDATE_ATTEMPTS, + date_str, + e, + ) + + if attempt < _MAX_UPDATE_ATTEMPTS: + time.sleep(_RETRY_DELAY_SEC) + + if isinstance(last_error, PlaywrightTimeout): + logger.error("Timeout while updating price for %s after retries", date_str) + elif last_error: + logger.exception("Failed to update price for %s after retries", date_str) + return False + + +def _navigate_to_month(page: Page, target_date: date) -> None: + """Scroll the calendar forward/backward to reach the target month. + + TODO: This is a stub. Implementation depends on Airbnb's calendar + navigation controls (next/prev month buttons, month picker, etc.). + """ + pass diff --git a/src/config.py b/src/config.py new file mode 100644 index 0000000..0e47948 --- /dev/null +++ b/src/config.py @@ -0,0 +1,28 @@ +"""Application configuration loaded from environment variables.""" + +from pydantic_settings import BaseSettings + + +class Settings(BaseSettings): + # Event providers + ticketmaster_key: str = "" + seatgeek_client_id: str = "" + + # Telegram + telegram_bot_token: str = "" + telegram_chat_id: str = "" + + # Airbnb automation (optional) + airbnb_listing_id: str = "" + airbnb_base_price: int = 150 + price_increase_pct: int = 20 + + # General + lookahead_days: int = 30 + log_level: str = "INFO" + + model_config = {"env_file": ".env", "env_file_encoding": "utf-8"} + + +def load_settings() -> Settings: + return Settings() diff --git a/src/dedup.py b/src/dedup.py new file mode 100644 index 0000000..ddeb4d3 --- /dev/null +++ b/src/dedup.py @@ -0,0 +1,81 @@ +"""Deduplicate events across multiple providers.""" + +from __future__ import annotations + +import logging +import re +from difflib import SequenceMatcher + +from src.models import NormalizedEvent + +logger = logging.getLogger(__name__) + +# Cross-provider titles for the same show often differ slightly. +_NAME_SIMILARITY_MIN = 0.78 +# Venue strings vary (suffixes, punctuation); stricter than names. +_VENUE_SIMILARITY_MIN = 0.88 + +_WS_RE = re.compile(r"\s+") + + +def _collapse_ws(s: str) -> str: + return _WS_RE.sub(" ", s.strip().lower()) + + +def _similarity(a: str, b: str) -> float: + if not a or not b: + return 0.0 + ca, cb = _collapse_ws(a), _collapse_ws(b) + if ca == cb: + return 1.0 + return SequenceMatcher(None, ca, cb).ratio() + + +def _is_same_event(a: NormalizedEvent, b: NormalizedEvent) -> bool: + if a.event_date != b.event_date: + return False + if _similarity(a.venue, b.venue) < _VENUE_SIMILARITY_MIN: + return False + if _similarity(a.name, b.name) < _NAME_SIMILARITY_MIN: + return False + return True + + +def _pick_representative(cluster: list[NormalizedEvent]) -> NormalizedEvent: + """Prefer richer records when merging duplicates (pre-scoring).""" + source_rank = {"ticketmaster": 2, "seatgeek": 1} + + def key(e: NormalizedEvent) -> tuple: + return ( + bool(e.url), + source_rank.get(e.source, 0), + len(e.name), + e.name, + ) + + return max(cluster, key=key) + + +def deduplicate(events: list[NormalizedEvent]) -> list[NormalizedEvent]: + """Remove duplicate events across providers. + + Strategy: same calendar day + fuzzy venue + fuzzy event name. + Exact ``dedup_key`` matches are a subset and merge into one cluster. + """ + if not events: + return [] + + clusters: list[list[NormalizedEvent]] = [] + for e in events: + for cluster in clusters: + if any(_is_same_event(x, e) for x in cluster): + cluster.append(e) + break + else: + clusters.append([e]) + + deduped = [_pick_representative(c) for c in clusters] + removed = len(events) - len(deduped) + if removed: + logger.info("Deduplication removed %d duplicate(s)", removed) + return deduped diff --git a/src/log.py b/src/log.py new file mode 100644 index 0000000..2121eda --- /dev/null +++ b/src/log.py @@ -0,0 +1,18 @@ +"""Structured logging setup.""" + +import logging +import sys + + +def setup_logging(level: str = "INFO") -> None: + root = logging.getLogger() + root.setLevel(getattr(logging, level.upper(), logging.INFO)) + + if not root.handlers: + handler = logging.StreamHandler(sys.stdout) + formatter = logging.Formatter( + fmt="%(asctime)s [%(levelname)s] %(name)s: %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + handler.setFormatter(formatter) + root.addHandler(handler) diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..fa19299 --- /dev/null +++ b/src/main.py @@ -0,0 +1,186 @@ +"""EventRate main runner. + +Usage: + python -m src.main # full flow: fetch + alert + update prices + python -m src.main --alerts-only # fetch + alert, no Airbnb automation + python -m src.main --dry-run # fetch + print, no alerts or updates +""" + +from __future__ import annotations + +import argparse +import logging +import sys +from datetime import date, timedelta + +from src.config import load_settings +from src.dedup import deduplicate +from src.log import setup_logging +from src.models import NormalizedEvent +from src.notifications.telegram import send_alert +from src.providers.seatgeek import SeatGeekProvider +from src.providers.ticketmaster import TicketmasterProvider +from src.scoring.impact import score_events + +logger = logging.getLogger(__name__) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="EventRate — Toronto event pricing assistant") + parser.add_argument("--dry-run", action="store_true", help="Fetch and print only, no side effects") + parser.add_argument("--alerts-only", action="store_true", help="Send Telegram alerts but skip Airbnb updates") + return parser.parse_args() + + +def fetch_all_events(settings) -> list[NormalizedEvent]: + """Fetch events from all configured providers.""" + providers = [ + TicketmasterProvider( + api_key=settings.ticketmaster_key, + lookahead_days=settings.lookahead_days, + ), + SeatGeekProvider( + client_id=settings.seatgeek_client_id, + lookahead_days=settings.lookahead_days, + ), + ] + + all_events: list[NormalizedEvent] = [] + failures = 0 + + for provider in providers: + logger.info("Fetching from %s...", provider.name) + events = provider.fetch() + if events: + all_events.extend(events) + else: + failures += 1 + logger.warning("No events from %s", provider.name) + + if failures == len(providers): + logger.error("All providers failed") + + return all_events + + +def filter_by_window(events: list[NormalizedEvent], lookahead_days: int) -> list[NormalizedEvent]: + """Keep only events within the lookahead window.""" + today = date.today() + cutoff = today + timedelta(days=lookahead_days) + return [e for e in events if today <= e.event_date <= cutoff] + + +def print_summary(events: list[NormalizedEvent]) -> None: + """Print a human-readable summary to stdout.""" + if not events: + print("No upcoming events found.") + return + + print(f"\n{'='*60}") + print(f" EventRate — {len(events)} events in the next window") + print(f"{'='*60}\n") + + for event in events: + print(f" [{event.score:.2f}] {event.event_date} | {event.name}") + print(f" {event.venue} ({event.source})") + if event.url: + print(f" {event.url}") + print() + + +def update_airbnb_prices(events: list[NormalizedEvent], settings) -> None: + """Attempt to update Airbnb prices for flagged dates. + + Isolated in a try/except so failures don't crash the runner. + """ + if not settings.airbnb_listing_id: + logger.info("No Airbnb listing configured, skipping price updates") + return + + try: + from playwright.sync_api import sync_playwright + from src.airbnb.auth import load_authenticated_context + from src.airbnb.calendar import update_price + except ImportError: + logger.error("Playwright not installed, cannot update Airbnb prices") + return + + new_price = int(settings.airbnb_base_price * (1 + settings.price_increase_pct / 100)) + event_dates = sorted({e.event_date for e in events}) + + logger.info( + "Updating Airbnb prices for %d dates to $%d", + len(event_dates), + new_price, + ) + + successes = 0 + try: + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + context = load_authenticated_context(browser) + page = context.new_page() + + for target_date in event_dates: + if update_price(page, target_date, new_price): + successes += 1 + + browser.close() + except FileNotFoundError as e: + logger.error("Auth state missing: %s", e) + except Exception: + logger.exception("Airbnb automation failed") + + logger.info("Airbnb: updated %d/%d dates", successes, len(event_dates)) + + +def main() -> None: + args = parse_args() + settings = load_settings() + setup_logging(settings.log_level) + + logger.info("EventRate starting (dry_run=%s, alerts_only=%s)", args.dry_run, args.alerts_only) + + # 1. Fetch + raw_events = fetch_all_events(settings) + if not raw_events: + logger.warning("No events fetched from any provider") + if not args.dry_run: + send_alert([], settings.telegram_bot_token, settings.telegram_chat_id) + sys.exit(0) + + # 2. Deduplicate + unique_events = deduplicate(raw_events) + + # 3. Score + scored_events = score_events(unique_events) + + # 4. Filter + upcoming = filter_by_window(scored_events, settings.lookahead_days) + + # 5. Output + print_summary(upcoming) + + if args.dry_run: + logger.info("Dry run complete, no alerts or updates sent") + return + + # 6. Alert + alert_ok = send_alert( + upcoming, + settings.telegram_bot_token, + settings.telegram_chat_id, + ) + + if not alert_ok: + logger.error("Telegram alert failed") + + # 7. Optionally update Airbnb + if not args.alerts_only and upcoming: + update_airbnb_prices(upcoming, settings) + + logger.info("EventRate complete") + + +if __name__ == "__main__": + main() diff --git a/src/models.py b/src/models.py new file mode 100644 index 0000000..ed5b602 --- /dev/null +++ b/src/models.py @@ -0,0 +1,27 @@ +"""Normalized event model shared across all providers.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from datetime import date +from typing import Any + + +@dataclass(frozen=True) +class NormalizedEvent: + name: str + event_date: date + venue: str + source: str + url: str = "" + score: float = 0.0 + raw: dict[str, Any] = field(default_factory=dict, repr=False, compare=False) + + @property + def dedup_key(self) -> str: + """Key used for cross-provider deduplication. + + Combines date and lowercased venue. Fuzzy name matching + may be layered on top in dedup.py. + """ + return f"{self.event_date.isoformat()}|{self.venue.lower().strip()}" diff --git a/src/notifications/__init__.py b/src/notifications/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/notifications/telegram.py b/src/notifications/telegram.py new file mode 100644 index 0000000..4447b57 --- /dev/null +++ b/src/notifications/telegram.py @@ -0,0 +1,85 @@ +"""Telegram notification sender using raw HTTP (no library dependency). + +Uses the Bot API sendMessage endpoint with MarkdownV2 formatting. +""" + +from __future__ import annotations + +import logging +from datetime import date +from itertools import groupby + +import httpx + +from src.models import NormalizedEvent + +logger = logging.getLogger(__name__) + +SEND_URL = "https://api.telegram.org/bot{token}/sendMessage" + + +def send_alert( + events: list[NormalizedEvent], + bot_token: str, + chat_id: str, +) -> bool: + """Send a formatted event summary to Telegram. Returns True on success.""" + if not bot_token or not chat_id: + logger.warning("Telegram credentials not configured, skipping alert") + return False + + if not events: + logger.info("No events to report") + return True + + message = _format_message(events) + + try: + resp = httpx.post( + SEND_URL.format(token=bot_token), + json={ + "chat_id": chat_id, + "text": message, + "parse_mode": "MarkdownV2", + }, + timeout=15, + ) + resp.raise_for_status() + logger.info("Telegram alert sent successfully") + return True + except Exception: + logger.exception("Failed to send Telegram alert") + return False + + +def _format_message(events: list[NormalizedEvent]) -> str: + """Group events by date and format as MarkdownV2.""" + sorted_events = sorted(events, key=lambda e: e.event_date) + + lines = ["*EventRate Alert* 🏟️\n"] + for event_date, group in groupby(sorted_events, key=lambda e: e.event_date): + lines.append(f"*{_escape_md(event_date.strftime('%a %b %d, %Y'))}*") + for event in group: + venue = _escape_md(event.venue) + name = _escape_md(event.name) + score_bar = _score_indicator(event.score) + lines.append(f" {score_bar} {name} @ {venue}") + lines.append("") + + return "\n".join(lines) + + +def _score_indicator(score: float) -> str: + if score >= 0.8: + return "🔴" + if score >= 0.5: + return "🟡" + return "🟢" + + +def _escape_md(text: str) -> str: + """Escape special MarkdownV2 characters.""" + special = r"_*[]()~`>#+-=|{}.!" + for ch in special: + text = text.replace(ch, f"\\{ch}") + return text diff --git a/src/providers/__init__.py b/src/providers/__init__.py new file mode 100644 index 0000000..4cfc5b9 --- /dev/null +++ b/src/providers/__init__.py @@ -0,0 +1,5 @@ +from src.providers.base import EventProvider +from src.providers.ticketmaster import TicketmasterProvider +from src.providers.seatgeek import SeatGeekProvider + +__all__ = ["EventProvider", "TicketmasterProvider", "SeatGeekProvider"] diff --git a/src/providers/base.py b/src/providers/base.py new file mode 100644 index 0000000..fa94bd4 --- /dev/null +++ b/src/providers/base.py @@ -0,0 +1,23 @@ +"""Abstract base class for event providers.""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from src.models import NormalizedEvent + + +class EventProvider(ABC): + """Interface that all event source integrations must implement.""" + + @property + @abstractmethod + def name(self) -> str: + """Human-readable provider name for logging.""" + + @abstractmethod + def fetch(self) -> list[NormalizedEvent]: + """Fetch upcoming events and return normalized models. + + Implementations should handle their own errors internally + and return an empty list on failure, logging the error. + """ diff --git a/src/providers/seatgeek.py b/src/providers/seatgeek.py new file mode 100644 index 0000000..e6ce9cf --- /dev/null +++ b/src/providers/seatgeek.py @@ -0,0 +1,105 @@ +"""SeatGeek API provider. + +API docs: https://platform.seatgeek.com/ +Free tier available with client ID — sufficient for weekly runs. + +ASSUMPTION: SeatGeek uses 'score' field (0.0–1.0) as a popularity metric. +We use a threshold to filter low-interest events. The right threshold +is unknown and will need tuning with real data. +""" + +from __future__ import annotations + +import logging +from datetime import date, datetime, timedelta + +import httpx + +from src.models import NormalizedEvent +from src.providers.base import EventProvider + +logger = logging.getLogger(__name__) + +BASE_URL = "https://api.seatgeek.com/2/events" + +# TODO: Tune this threshold after observing real score distributions +MIN_SCORE_THRESHOLD = 0.5 + + +class SeatGeekProvider(EventProvider): + def __init__(self, client_id: str, lookahead_days: int = 30) -> None: + self._client_id = client_id + self._lookahead_days = lookahead_days + + @property + def name(self) -> str: + return "seatgeek" + + def fetch(self) -> list[NormalizedEvent]: + if not self._client_id: + logger.warning("SeatGeek client ID not configured, skipping") + return [] + + try: + return self._fetch_events() + except Exception: + logger.exception("SeatGeek fetch failed") + return [] + + def _fetch_events(self) -> list[NormalizedEvent]: + start = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S") + end = (datetime.utcnow() + timedelta(days=self._lookahead_days)).strftime( + "%Y-%m-%dT%H:%M:%S" + ) + + params = { + "client_id": self._client_id, + "venue.city": "Toronto", + "datetime_utc.gte": start, + "datetime_utc.lte": end, + "per_page": 100, + "sort": "datetime_utc.asc", + } + + # TODO: Handle pagination if > 100 events + resp = httpx.get(BASE_URL, params=params, timeout=30) + resp.raise_for_status() + data = resp.json() + + raw_events = data.get("events", []) + logger.info("SeatGeek returned %d raw events", len(raw_events)) + + results: list[NormalizedEvent] = [] + for item in raw_events: + score = item.get("score", 0.0) or 0.0 + if score < MIN_SCORE_THRESHOLD: + continue + + venue_name = item.get("venue", {}).get("name", "Unknown Venue") + event_date = self._extract_date(item) + if event_date is None: + continue + + results.append( + NormalizedEvent( + name=item.get("title", "Unknown"), + event_date=event_date, + venue=venue_name, + source=self.name, + url=item.get("url", ""), + raw=item, + ) + ) + + logger.info("SeatGeek: %d events above score threshold", len(results)) + return results + + @staticmethod + def _extract_date(item: dict) -> date | None: + dt_str = item.get("datetime_local") or item.get("datetime_utc") + if dt_str: + try: + return datetime.fromisoformat(dt_str).date() + except ValueError: + logger.warning("Unparseable date: %s", dt_str) + return None diff --git a/src/providers/ticketmaster.py b/src/providers/ticketmaster.py new file mode 100644 index 0000000..ba8b6ec --- /dev/null +++ b/src/providers/ticketmaster.py @@ -0,0 +1,125 @@ +"""Ticketmaster Discovery API provider. + +API docs: https://developer.ticketmaster.com/products-and-docs/apis/discovery-api/v2/ +Free tier: 5,000 calls/day — more than enough for weekly runs. + +ASSUMPTION: Major Toronto venues are hardcoded below. This list may need +updating if new venues open or names change in Ticketmaster's data. +""" + +from __future__ import annotations + +import logging +from datetime import date, datetime, timedelta + +import httpx + +from src.models import NormalizedEvent +from src.providers.base import EventProvider + +logger = logging.getLogger(__name__) + +BASE_URL = "https://app.ticketmaster.com/discovery/v2/events.json" + +# TODO: Validate these venue names against actual Ticketmaster venue data. +# These are best-guess strings; the API may use different casing or IDs. +MAJOR_VENUES = { + "scotiabank arena", + "rogers centre", + "budweiser stage", + "coca-cola coliseum", + "meridian hall", + "massey hall", + "history", +} + + +class TicketmasterProvider(EventProvider): + def __init__(self, api_key: str, lookahead_days: int = 30) -> None: + self._api_key = api_key + self._lookahead_days = lookahead_days + + @property + def name(self) -> str: + return "ticketmaster" + + def fetch(self) -> list[NormalizedEvent]: + if not self._api_key: + logger.warning("Ticketmaster API key not configured, skipping") + return [] + + try: + return self._fetch_events() + except Exception: + logger.exception("Ticketmaster fetch failed") + return [] + + def _fetch_events(self) -> list[NormalizedEvent]: + start = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") + end = (datetime.utcnow() + timedelta(days=self._lookahead_days)).strftime( + "%Y-%m-%dT%H:%M:%SZ" + ) + + params = { + "apikey": self._api_key, + "city": "Toronto", + "countryCode": "CA", + "startDateTime": start, + "endDateTime": end, + "size": 100, + "sort": "date,asc", + } + + # TODO: Handle pagination if > 100 events in the window + resp = httpx.get(BASE_URL, params=params, timeout=30) + resp.raise_for_status() + data = resp.json() + + raw_events = data.get("_embedded", {}).get("events", []) + logger.info("Ticketmaster returned %d raw events", len(raw_events)) + + results: list[NormalizedEvent] = [] + for item in raw_events: + venue_name = self._extract_venue(item) + if not self._is_major_venue(venue_name): + continue + + event_date = self._extract_date(item) + if event_date is None: + continue + + results.append( + NormalizedEvent( + name=item.get("name", "Unknown"), + event_date=event_date, + venue=venue_name, + source=self.name, + url=item.get("url", ""), + raw=item, + ) + ) + + logger.info("Ticketmaster: %d events at major venues", len(results)) + return results + + @staticmethod + def _extract_venue(item: dict) -> str: + venues = item.get("_embedded", {}).get("venues", []) + if venues: + return venues[0].get("name", "Unknown Venue") + return "Unknown Venue" + + @staticmethod + def _is_major_venue(venue_name: str) -> bool: + return venue_name.lower().strip() in MAJOR_VENUES + + @staticmethod + def _extract_date(item: dict) -> date | None: + dates = item.get("dates", {}).get("start", {}) + date_str = dates.get("localDate") + if date_str: + try: + return datetime.strptime(date_str, "%Y-%m-%d").date() + except ValueError: + logger.warning("Unparseable date: %s", date_str) + return None diff --git a/src/scoring/__init__.py b/src/scoring/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/scoring/impact.py b/src/scoring/impact.py new file mode 100644 index 0000000..6d59877 --- /dev/null +++ b/src/scoring/impact.py @@ -0,0 +1,59 @@ +"""Simple rule-based impact scoring for events. + +Assigns a 0.0–1.0 score based on venue capacity and event type. +This is intentionally naive — a starting point, not a pricing model. + +ASSUMPTION: Venue capacities are approximate and hardcoded. +Real capacity depends on event configuration (e.g., concert vs hockey +layout at Scotiabank Arena). +""" + +from __future__ import annotations + +import logging +from src.models import NormalizedEvent + +logger = logging.getLogger(__name__) + +# TODO: Validate these capacities and expand as needed +VENUE_CAPACITY: dict[str, int] = { + "rogers centre": 49000, + "scotiabank arena": 19800, + "budweiser stage": 16000, + "coca-cola coliseum": 8000, + "meridian hall": 3200, + "massey hall": 2750, + "history": 2500, +} + +MAX_CAPACITY = max(VENUE_CAPACITY.values()) + + +def score_event(event: NormalizedEvent) -> NormalizedEvent: + """Return a copy of the event with an impact score assigned.""" + venue_key = event.venue.lower().strip() + capacity = VENUE_CAPACITY.get(venue_key, 0) + + if capacity == 0: + # Unknown venue — assign a moderate default so it still surfaces + score = 0.3 + else: + score = round(capacity / MAX_CAPACITY, 2) + + return NormalizedEvent( + name=event.name, + event_date=event.event_date, + venue=event.venue, + source=event.source, + url=event.url, + score=score, + raw=event.raw, + ) + + +def score_events(events: list[NormalizedEvent]) -> list[NormalizedEvent]: + """Score all events and return sorted by score descending.""" + scored = [score_event(e) for e in events] + scored.sort(key=lambda e: (-e.score, e.event_date)) + logger.info("Scored %d events", len(scored)) + return scored diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29