diff --git a/.env.example b/.env.example index 41321f5..f9434eb 100644 --- a/.env.example +++ b/.env.example @@ -1,5 +1,6 @@ # === Event providers === TICKETMASTER_KEY=your_ticketmaster_api_key_here +# Optional — leave placeholder to skip SeatGeek (Ticketmaster-only still works) SEATGEEK_CLIENT_ID=your_seatgeek_client_id_here # === Telegram === diff --git a/BACKLOG.md b/BACKLOG.md index f67e817..cff1c06 100644 --- a/BACKLOG.md +++ b/BACKLOG.md @@ -50,6 +50,23 @@ | 5.1 Main CLI runner with modes | Done | `src/main.py` — `--dry-run`, `--alerts-only`, full | | 5.2 Dockerfile for Playwright | Done | `Dockerfile` (Chromium + deps) | | 5.3 Cron configuration guide | Done | See [README.md](README.md) | +| 5.4 Deploy on automationlab | Done | `/opt/atanyrate`, ansible `make deploy-atanyrate` | +| 5.5 Secrets in Ansible vault | Done | `vault_atanyrate_*` keys | + +--- + +## Outstanding (continue here) + +| Priority | Task | Status | Notes | +|---|---|---|---| +| P0 | **Refresh Airbnb `state.json`** | Blocked (manual) | April session on server; price panel timed out — re-run `airbnb_login.py` on Mac | +| P1 | **Verify calendar price update E2E** | In progress | Calendar loads; `PriceInput-basePrice` not appearing — UI or stale session | +| P2 | **Optional stealth browser** | Done (code) | `AIRBNB_STEALTH=1` + [invisible_playwright](https://github.com/feder-cr/invisible_playwright); try if Airbnb blocks Chromium | +| P2 | Push repo changes to Gitea | Todo | calendar.py, auth, browser.py, main.py | +| P3 | SeatGeek client ID on server | Done | Both providers 200 OK | +| P3 | Beszel agent on automationlab | Todo | ansible `beszel-install-agents.sh` | +| P3 | Dedicated Docker LXC | Deferred | Only if isolation/disk allows | +| P3 | Tune `MIN_ALERT_SCORE` / scoring | Todo | Observe real Telegram alerts first | --- diff --git a/README.md b/README.md index f48a2c6..ccfb4e3 100644 --- a/README.md +++ b/README.md @@ -41,13 +41,46 @@ python -m src.main # full flow (alerts + Airbnb update) | `LOOKAHEAD_DAYS` | No | Days ahead to scan for events (default: 30) | | `LOG_LEVEL` | No | Logging level (default: INFO) | -## Airbnb session setup (one-time) +## Airbnb session setup (one-time / refresh) + +Airbnb requires a logged-in browser session saved to `state.json`. Headless servers cannot complete 2FA — run login on a machine with a display, then copy the file to the server. + +### Local login (Mac) ```bash python scripts/airbnb_login.py ``` -This opens a headed browser. Log in manually, complete any 2FA, then press Enter in the terminal. Your session is saved to `state.json` for reuse in headless runs. +Chromium opens. Log in, complete any 2FA, then press Enter in the terminal. Session cookies are saved to `state.json`. + +### Production (automationlab @ 10.0.10.45) + +Recommended: login on your Mac, then copy: + +```bash +scp state.json root@10.0.10.45:/opt/atanyrate/state.json +ssh root@10.0.10.45 'chmod 600 /opt/atanyrate/state.json' +``` + +Or use the ansible deploy script: `ATANYRATE_STATE=~/path/to/state.json make deploy-atanyrate` (see `docs/guides/atanyrate-deploy.md` in the ansible repo). + +### Session expiry + +Airbnb sessions expire (weeks to months). When calendar automation fails with login/auth errors or empty calendar pages, re-run `scripts/airbnb_login.py` and push a fresh `state.json`. Weekly cron uses `--alerts-only` by default so Telegram alerts keep working if Airbnb auth is stale. + +### Stealth browser (optional) + +If Airbnb blocks or challenges stock Playwright Chromium, try [invisible_playwright](https://github.com/feder-cr/invisible_playwright) (patched Firefox, anti-detect): + +```bash +pip install 'git+https://github.com/feder-cr/invisible_playwright.git' +python -m invisible_playwright fetch # ~100 MB one-time + +AIRBNB_STEALTH=1 python scripts/airbnb_login.py # login + save state.json +AIRBNB_STEALTH=1 python -m src.main # headless calendar run +``` + +Use **the same engine** for login and automation — `state.json` from Chromium does not work in Firefox and vice versa. Stealth mode does **not** bypass 2FA; you still log in manually in the headed window. ## Running on cron @@ -63,6 +96,15 @@ docker build -t eventrate . docker run --rm --env-file .env -v $(pwd)/state.json:/app/state.json eventrate ``` +## Production deploy + +Deployed on **automationlab** (`10.0.10.45`) at `/opt/atanyrate`. Full guide: ansible repo `docs/guides/atanyrate-deploy.md`. + +```bash +# From ~/Documents/code/ansible +ATANYRATE_ENV=~/Documents/code/@AnyRate/.env make deploy-atanyrate +``` + ## Project docs - [PROJECT.md](PROJECT.md) — goals, scope, constraints diff --git a/docs/HANDOFF.md b/docs/HANDOFF.md new file mode 100644 index 0000000..b63e06c --- /dev/null +++ b/docs/HANDOFF.md @@ -0,0 +1,44 @@ +# AtAnyRate — handoff + +**Repo:** `gitea@git.levkin.ca:ilia/AtAnyRate.git` · local `~/Documents/code/AtAnyRate` +**Deploy:** pve10 LXC **automationlab** @ `10.0.10.59` — `make deploy-atanyrate` (ansible) +**Vikunja:** [todo.levkin.ca → Business → AtAnyRate](https://todo.levkin.ca) (`AAR`) +**Epic backlog:** [../BACKLOG.md](../BACKLOG.md) + +--- + +## Open tasks + +| P | Task | Owner | Status | +|---|------|-------|--------| +| **P0** | Refresh Airbnb `state.json` (Mac `airbnb_login.py` → scp to guest) | @you | blocked — needs Mac browser login | +| **P1** | Verify calendar price update E2E (`PriceInput-basePrice` selector) | @agent | todo | +| **P2** | Push pending repo changes to Gitea (calendar.py, auth, browser.py) | @agent | ⏳ local changes on `main` | +| **P3** | Beszel agent on automationlab | @agent | todo | +| **P3** | Tune `MIN_ALERT_SCORE` after real Telegram alerts | @you | todo | + +--- + +## Done (reference) + +- Ticketmaster + SeatGeek providers, Telegram alerter, Playwright calendar automation +- Deploy on automationlab; ansible vault `vault_atanyrate_*` +- SeatGeek client ID on server (both providers 200 OK) + +--- + +## Commands + +```bash +cd ~/Documents/code/AtAnyRate +# alerts only (no Airbnb browser) +python -m src.main --alerts-only --dry-run + +# on Mac: refresh session +python scripts/airbnb_login.py +scp state.json root@10.0.10.59:/opt/atanyrate/state.json +``` + +--- + +*Updated 2026-06-02* diff --git a/scripts/airbnb_login.py b/scripts/airbnb_login.py index 80eaf7d..d80c292 100644 --- a/scripts/airbnb_login.py +++ b/scripts/airbnb_login.py @@ -1,12 +1,22 @@ #!/usr/bin/env python3 """One-time interactive Airbnb login to save session state. -Run this once (or whenever your session expires): +Run on a machine with a display (Mac recommended). Re-run when Airbnb +sessions expire or calendar automation hits login/auth errors. + +Default (Chromium): + python scripts/airbnb_login.py -A headed Chromium browser will open. Log in manually, complete 2FA, -then return to the terminal and press Enter. Your session cookies -and localStorage will be saved to state.json for headless reuse. +Optional stealth Firefox (if Airbnb blocks Chromium — same mode for login + runs): + + pip install 'git+https://github.com/feder-cr/invisible_playwright.git' + python -m invisible_playwright fetch + AIRBNB_STEALTH=1 python scripts/airbnb_login.py + +Then copy to automationlab: + + scp state.json root@10.0.10.45:/opt/atanyrate/state.json """ from pathlib import Path diff --git a/src/airbnb/auth.py b/src/airbnb/auth.py index 23c64cb..aa2ed87 100644 --- a/src/airbnb/auth.py +++ b/src/airbnb/auth.py @@ -5,12 +5,15 @@ Strategy: 2. Save storage state (cookies + localStorage) to state.json. 3. Subsequent runs: load state.json into a headless context. -WARNING: Airbnb sessions expire. If automation fails with auth errors, -re-run scripts/airbnb_login.py to refresh state.json. +Optional AIRBNB_STEALTH=1 uses invisible_playwright (Firefox anti-detect). +Use the same mode for login and automation — storage state is not +portable between Chromium and Firefox. -ASSUMPTION: Airbnb does not aggressively block Playwright's Chromium -fingerprint for authenticated hosts accessing their own calendar. -This is unverified and may break. +WARNING: Airbnb sessions expire (typically weeks to months). If automation +fails with auth errors, login redirects, or empty calendar pages, re-run +``scripts/airbnb_login.py`` on a machine with a display and copy the new +``state.json`` to the server. Cron runs ``--alerts-only`` by default so +Telegram alerts continue even when Airbnb auth is stale. """ from __future__ import annotations @@ -18,11 +21,16 @@ from __future__ import annotations import logging from pathlib import Path -from playwright.sync_api import Browser, BrowserContext, sync_playwright +from playwright.sync_api import Browser, BrowserContext + +from src.airbnb.browser import open_browser, use_stealth_browser +from src.airbnb.calendar import _dismiss_cookie_banner_if_present logger = logging.getLogger(__name__) DEFAULT_STATE_PATH = Path("state.json") +_LOGIN_URL = "https://www.airbnb.ca/login" +_CALENDAR_URL = "https://www.airbnb.ca/hosting/calendar" def interactive_login(state_path: Path = DEFAULT_STATE_PATH) -> None: @@ -31,22 +39,26 @@ def interactive_login(state_path: Path = DEFAULT_STATE_PATH) -> None: After the user completes login (including any 2FA), they press Enter in the terminal. The browser's storage state is then saved. """ - with sync_playwright() as p: - browser = p.chromium.launch(headless=False) + engine = "stealth Firefox (invisible_playwright)" if use_stealth_browser() else "Chromium" + logger.info("Airbnb login: opening headed %s", engine) + + with open_browser(headless=False) as browser: context = browser.new_context() page = context.new_page() - page.goto("https://www.airbnb.ca/login") + page.goto(_LOGIN_URL, wait_until="domcontentloaded", timeout=45_000) + _dismiss_cookie_banner_if_present(page) input( "\n>>> Log in to Airbnb in the browser window.\n" ">>> Complete any 2FA prompts.\n" - ">>> Then press ENTER here to save the session...\n" + ">>> Then press ENTER here after login succeeds...\n" ) + page.goto(_CALENDAR_URL, wait_until="domcontentloaded", timeout=45_000) + page.wait_for_timeout(1500) + _dismiss_cookie_banner_if_present(page) context.storage_state(path=str(state_path)) - logger.info("Storage state saved to %s", state_path) - - browser.close() + logger.info("Storage state saved to %s", state_path.resolve()) def load_authenticated_context( @@ -57,12 +69,18 @@ def load_authenticated_context( Raises FileNotFoundError if state.json doesn't exist. """ + resolved = state_path.resolve() + logger.info("Airbnb auth: state file path=%s exists=%s", resolved, state_path.exists()) if not state_path.exists(): raise FileNotFoundError( f"No saved session at {state_path}. " "Run 'python scripts/airbnb_login.py' first." ) - context = browser.new_context(storage_state=str(state_path)) - logger.info("Loaded auth state from %s", state_path) + context = browser.new_context( + storage_state=str(state_path), + viewport={"width": 1440, "height": 900}, + locale="en-CA", + ) + logger.info("Airbnb auth: loaded storage state from %s", resolved) return context diff --git a/src/airbnb/browser.py b/src/airbnb/browser.py new file mode 100644 index 0000000..65aef6f --- /dev/null +++ b/src/airbnb/browser.py @@ -0,0 +1,51 @@ +"""Browser launcher for Airbnb Playwright automation. + +Optional stealth mode uses invisible_playwright (patched Firefox) when +AIRBNB_STEALTH=1. Login and headless runs must use the same engine — +Chromium state.json is not compatible with Firefox and vice versa. +""" + +from __future__ import annotations + +import os +from contextlib import contextmanager +from typing import Iterator + +from playwright.sync_api import Browser + +_STEALTH_TRUTHY = frozenset({"1", "true", "yes", "on"}) + + +def use_stealth_browser() -> bool: + return os.environ.get("AIRBNB_STEALTH", "").strip().lower() in _STEALTH_TRUTHY + + +@contextmanager +def open_browser(*, headless: bool = True, slow_mo: int = 0) -> Iterator[Browser]: + """Launch Chromium (default) or stealth Firefox (AIRBNB_STEALTH=1).""" + if use_stealth_browser(): + try: + from invisible_playwright import InvisiblePlaywright + except ImportError as e: + raise ImportError( + "AIRBNB_STEALTH=1 requires invisible_playwright. Install:\n" + " pip install 'git+https://github.com/feder-cr/invisible_playwright.git'\n" + " python -m invisible_playwright fetch" + ) from e + with InvisiblePlaywright(headless=headless) as browser: + yield browser + else: + from playwright.sync_api import sync_playwright + + launch_kw: dict = { + "headless": headless, + "args": ["--disable-blink-features=AutomationControlled"], + } + if slow_mo > 0: + launch_kw["slow_mo"] = slow_mo + with sync_playwright() as p: + browser = p.chromium.launch(**launch_kw) + try: + yield browser + finally: + browser.close() diff --git a/src/airbnb/calendar.py b/src/airbnb/calendar.py index 1374c4c..8c3c1d1 100644 --- a/src/airbnb/calendar.py +++ b/src/airbnb/calendar.py @@ -1,87 +1,111 @@ -"""Airbnb calendar price automation via Playwright. +"""Airbnb host calendar price automation via Playwright. -WARNING: This module is inherently fragile. Airbnb can change their UI -at any time, breaking all selectors below. Treat every selector as -a best-guess placeholder that WILL need updating. - -ASSUMPTION: The selectors below are STUBS. They have NOT been verified -against the live Airbnb host calendar UI. Do not expect this module -to work without first inspecting the actual DOM and updating selectors. +Multicalendar day cells (2025+ hyperloop): ``button[data-date="YYYY-MM-DD"]`` / +``id="date-YYYY-MM-DD"`` — often **no** ``role=application`` or +``data-state--date-string``. Older builds used the latter; this module tries both. +Price field: ``data-testid="PriceInput-basePrice"``. If Airbnb changes the DOM again, +inspect a real calendar page (or ``AIRBNB_CALENDAR_DEBUG=1`` HTML dumps). """ from __future__ import annotations +import calendar import logging +import os +import re +import time from datetime import date -import time - -from playwright.sync_api import Page, TimeoutError as PlaywrightTimeout +from playwright.sync_api import Locator, Page, TimeoutError as PlaywrightTimeout logger = logging.getLogger(__name__) + +def _calendar_debug() -> bool: + return bool(os.environ.get("AIRBNB_CALENDAR_DEBUG")) + + +def _debug_assets_enabled() -> bool: + return bool(os.environ.get("AIRBNB_DEBUG_SCREENSHOT") or _calendar_debug()) + _MAX_UPDATE_ATTEMPTS = 3 _RETRY_DELAY_SEC = 2.0 -# All selectors below are UNVERIFIED PLACEHOLDERS. -# TODO: Inspect live Airbnb host calendar and replace these. -CALENDAR_URL = "https://www.airbnb.ca/hosting/calendar" -SELECTORS = { - # TODO: Replace with actual selector for date cells - "date_cell": 'td[data-date="{date_str}"]', - # TODO: Replace with actual selector for price input - "price_input": 'input[data-testid="price-input"]', - # TODO: Replace with actual selector for save button - "save_button": 'button[data-testid="save-button"]', +DEFAULT_HOST_ORIGIN = "https://www.airbnb.ca" + +# Multicalendar: month navigation (exact aria-label copy from host UI) +_ARIA_MONTH_FORWARD = "Move forward to switch to the next month." +_ARIA_MONTH_BACK = "Move backward to switch to the previous month." + +_MONTH_HEADING_RE = re.compile( + r"(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{4})", + re.I, +) + +_MONTH_NAME_TO_INT = { + calendar.month_name[i].lower(): i for i in range(1, 13) } +# Day cells: hyperloop uses data-date; legacy host UI used data-state--date-string. +_DAY_BUTTON_LOCATOR = 'button[data-date], button[data-state--date-string]' -def update_price(page: Page, target_date: date, new_price: int) -> bool: - """Navigate to calendar and set the price for a specific date. - Retries transient failures a few times, then returns False so the - caller can continue with other dates (alert-only degradation is - handled in ``main``). - """ +def resolve_calendar_url(listing_id: str, calendar_url_override: str) -> str: + """Build the calendar URL, or use a user-supplied override from the browser.""" + override = (calendar_url_override or "").strip() + if override: + return override.rstrip("/") + lid = (listing_id or "").strip() + if not lid: + return f"{DEFAULT_HOST_ORIGIN}/hosting/calendar" + return f"{DEFAULT_HOST_ORIGIN}/multicalendar/{lid}" + + +def parse_month_heading_text(text: str) -> tuple[int, int] | None: + """Parse ``April 2026``-style text into ``(year, month)``.""" + m = _MONTH_HEADING_RE.search(text.strip()) + if not m: + return None + name, ys = m.group(1), m.group(2) + mi = _MONTH_NAME_TO_INT.get(name.lower()) + if mi is None: + return None + return int(ys), mi + + +def update_price( + page: Page, + target_date: date, + new_price: int, + calendar_url: str, +) -> bool: + """Open the host calendar and set a custom nightly price for one date.""" date_str = target_date.strftime("%Y-%m-%d") - logger.info("Updating price for %s to $%d", date_str, new_price) + logger.info("Updating price for %s to $%d (calendar=%s)", date_str, new_price, calendar_url) last_error: Exception | None = None for attempt in range(1, _MAX_UPDATE_ATTEMPTS + 1): try: - page.goto(CALENDAR_URL, wait_until="networkidle", timeout=30_000) - - # TODO: Calendar may require scrolling to reach the target month. - # This is not implemented yet. - _navigate_to_month(page, target_date) - - date_selector = SELECTORS["date_cell"].format(date_str=date_str) - page.click(date_selector, timeout=10_000) - - page.fill(SELECTORS["price_input"], str(new_price), timeout=5_000) - page.click(SELECTORS["save_button"], timeout=5_000) - - # TODO: Verify that the price was actually saved (read back from UI) - page.wait_for_timeout(2000) - + _run_price_update_attempt(page, target_date, new_price, calendar_url) logger.info("Price updated for %s: $%d", date_str, new_price) return True - except PlaywrightTimeout as e: last_error = e logger.warning( - "Attempt %d/%d: timeout updating price for %s", + "Attempt %d/%d: timeout updating price for %s: %s", attempt, _MAX_UPDATE_ATTEMPTS, date_str, + e, ) except Exception as e: last_error = e logger.warning( - "Attempt %d/%d: error updating price for %s: %s", + "Attempt %d/%d: error updating price for %s (%s): %s", attempt, _MAX_UPDATE_ATTEMPTS, date_str, + type(e).__name__, e, ) @@ -91,14 +115,756 @@ def update_price(page: Page, target_date: date, new_price: int) -> bool: if isinstance(last_error, PlaywrightTimeout): logger.error("Timeout while updating price for %s after retries", date_str) elif last_error: - logger.exception("Failed to update price for %s after retries", date_str) + logger.error( + "Failed to update price for %s after retries: %s: %s", + date_str, + type(last_error).__name__, + last_error, + exc_info=(type(last_error), last_error, last_error.__traceback__), + ) return False -def _navigate_to_month(page: Page, target_date: date) -> None: - """Scroll the calendar forward/backward to reach the target month. +def _debug_screenshot(page: Page, tag: str) -> None: + if not _debug_assets_enabled(): + return + path = f"airbnb-debug-{tag}.png" + try: + page.screenshot(path=path, full_page=True) + logger.error("Wrote debug screenshot: %s", path) + except Exception as e: + logger.debug("Screenshot failed: %s", e) - TODO: This is a stub. Implementation depends on Airbnb's calendar - navigation controls (next/prev month buttons, month picker, etc.). + +def _debug_dump_html(page: Page, tag: str, max_chars: int = 400_000) -> None: + if not _calendar_debug(): + return + path = f"airbnb-debug-{tag}.html" + try: + html = page.content() + if len(html) > max_chars: + html = html[:max_chars] + "\n\n" + with open(path, "w", encoding="utf-8") as f: + f.write(html) + logger.error("Wrote debug HTML: %s (%d chars)", path, len(html)) + except Exception as e: + logger.debug("HTML dump failed: %s", e) + + +def _log_calendar_probe(page: Page, where: str) -> None: + """Log quick DOM signals so long waits are explainable (main frame + iframes).""" + parts: list[str] = [] + try: + title = page.title() + parts.append(f"title={title[:80]!r}") + except Exception as e: + parts.append(f"title=(error:{e!s})") + + try: + n_app = page.locator('[role="application"]').count() + n_dd = page.locator("button[data-date]").count() + n_legacy = page.locator("button[data-state--date-string]").count() + n_if = page.locator("iframe").count() + parts.append( + f"main_frame role=application={n_app} day[data-date]={n_dd} " + f"day[data-state--date-string]={n_legacy} iframe_tags={n_if}" + ) + except Exception as e: + parts.append(f"main_counts=(error:{e!s})") + + try: + frs = list(page.frames) + parts.append(f"frames={len(frs)}") + main = page.main_frame + for fr in frs: + if fr == main: + continue + try: + u = (fr.url or "")[:120] + except Exception: + u = "(url error)" + try: + nd = fr.locator(_DAY_BUTTON_LOCATOR).count() + except Exception: + nd = -1 + parts.append(f" subframe day_cells={nd} url={u!r}") + except Exception as e: + parts.append(f"frames=(error:{e!s})") + + logger.warning("Airbnb calendar probe [%s]: %s", where, " | ".join(parts)) + + try: + page.locator(_DAY_BUTTON_LOCATOR).first.wait_for(state="attached", timeout=2_000) + logger.warning( + "Airbnb calendar probe [%s]: day cell exists in DOM (attached) but visible waits failed — " + "overlay, off-screen grid, or session wall?", + where, + ) + except PlaywrightTimeout: + pass + except Exception as e: + logger.debug("attached probe: %s", e) + + if _calendar_debug(): + _debug_screenshot(page, f"probe-{where.replace(' ', '-')}") + _debug_dump_html(page, f"probe-{where.replace(' ', '-')}") + + +def _log_page_context(page: Page, where: str) -> None: + logger.info("Playwright %s — URL: %s", where, page.url) + + +def _abort_if_login_required(page: Page) -> None: + """Fail fast if session expired and Airbnb shows a login/signup flow.""" + u = page.url.lower() + if "/login" in u or "/signup" in u or "sign_in" in u or "/authenticate" in u: + logger.warning("Airbnb: login/signup URL detected — session may be expired: %s", page.url[:200]) + _debug_screenshot(page, "login-redirect") + raise PlaywrightTimeout( + "Airbnb redirected to login or signup. Refresh session: python scripts/airbnb_login.py" + ) + n = page.locator('input[type="password"]').count() + if n > 0 and page.locator('input[type="password"]').first.is_visible(): + logger.warning("Airbnb: password field visible — likely logged out (url=%s)", page.url[:200]) + _debug_screenshot(page, "login-form") + raise PlaywrightTimeout( + "Login form visible; session likely expired. Run: python scripts/airbnb_login.py" + ) + + +def _dismiss_cookie_banner_if_present(page: Page) -> None: + """Dismiss Airbnb's cookie consent overlay so it does not block the calendar.""" + try: + banner = page.get_by_test_id("main-cookies-banner-container") + banner.first.wait_for(state="visible", timeout=4_000) + except PlaywrightTimeout: + return + try: + btn = banner.get_by_role("button", name=re.compile(r"only\s*necessary", re.I)) + btn.click(timeout=8_000) + except PlaywrightTimeout: + logger.debug("Cookie banner visible but 'Only necessary' button not found or clickable") + return + except Exception as e: + logger.debug("Cookie banner dismiss failed: %s", e) + return + page.wait_for_timeout(500) + logger.info("Dismissed Airbnb cookie banner (Only necessary)") + + +def _enter_listing_calendar_if_needed(page: Page) -> None: + """Some multicalendar views require selecting a listing card first.""" + # User-provided hint: click listing name card to enter the "real" calendar. + candidates = [ + page.get_by_text(re.compile(r"Double Sauna, private backyard, convenient, clean", re.I)), + page.locator("div._1esgcndk"), + ] + for loc in candidates: + try: + if loc.count() == 0: + continue + loc.first.wait_for(state="visible", timeout=5_000) + logger.info("Airbnb calendar: selecting listing card to enter calendar") + loc.first.click(timeout=10_000) + page.wait_for_timeout(1_200) + # Often triggers a client-side route change; give it a moment. + try: + page.wait_for_load_state("domcontentloaded", timeout=10_000) + except PlaywrightTimeout: + pass + return + except Exception: + continue + + +_OVERLAY_BUTTON_RE = re.compile( + r"^(got it|ok|close|skip|maybe later|not now|dismiss|no thanks|continue)$", + re.I, +) + + +def _dismiss_blocking_overlays(page: Page) -> None: + """Close coach marks, tooltips, and one-off modals that intercept calendar clicks.""" + for _ in range(4): + dismissed = False + for pat in (_OVERLAY_BUTTON_RE, re.compile(r"only\s*necessary", re.I)): + try: + btn = page.get_by_role("button", name=pat) + if btn.count() > 0 and btn.first.is_visible(): + btn.first.click(timeout=2_500) + dismissed = True + page.wait_for_timeout(400) + except Exception: + continue + try: + close = page.get_by_role("button", name=re.compile(r"^close$", re.I)) + if close.count() > 0 and close.first.is_visible(): + close.first.click(timeout=2_000) + dismissed = True + page.wait_for_timeout(300) + except Exception: + pass + if not dismissed: + break + page.keyboard.press("Escape") + page.wait_for_timeout(200) + + +def _pricing_panel(page: Page) -> Locator: + """Side panel / dialog that opens after selecting a calendar day.""" + return page.locator('[role="dialog"], aside').filter( + has=page.get_by_text(re.compile(r"nightly|custom.*price|pricing|availability", re.I)) + ) + + +def _price_panel_visible(page: Page) -> bool: + try: + if _pna_price_panel(page).first.is_visible(): + return True + except Exception: + pass + try: + return page.get_by_text(re.compile(r"price per night", re.I)).first.is_visible() + except Exception: + return False + + +def _pna_price_panel(page: Page) -> Locator: + """Current multicalendar nightly price block (2025+ host UI). + + Airbnb's host UI is A/B tested; sometimes the wrapper has ``data-testid="pna-price"``, + sometimes only the visible label/value are stable. """ - pass + by_testid = page.locator('[data-testid="pna-price"]') + by_label = page.locator("div").filter( + has=page.get_by_text(re.compile(r"^price per night$", re.I)) + ).filter( + has=page.get_by_text(re.compile(r"^\$\s*\d", re.I)) + ) + return by_testid.or_(by_label) + + +def _wait_for_pricing_panel(page: Page) -> None: + deadline = time.monotonic() + 22.0 + while time.monotonic() < deadline: + if _price_panel_visible(page): + logger.info("Airbnb calendar: pna-price / Price per night panel visible") + return + page.wait_for_timeout(500) + logger.warning("No pna-price panel after day click") + if _debug_assets_enabled(): + _debug_screenshot(page, "after-day-click-no-panel") + _debug_dump_html(page, "after-day-click-no-panel") + + +def _wait_calendar_in_iframes(page: Page, total_ms: int = 28_000) -> Locator | None: + """Poll child frames; the grid sometimes lives in a late-loading iframe.""" + deadline = time.monotonic() + total_ms / 1000.0 + main = page.main_frame + round_n = 0 + logger.info( + "Airbnb calendar: scanning iframes up to ~%.0fs for visible day cells", + total_ms / 1000.0, + ) + while time.monotonic() < deadline: + round_n += 1 + for fr in page.frames: + if fr == main: + continue + try: + fr.locator(_DAY_BUTTON_LOCATOR).first.wait_for( + state="visible", + timeout=2_500, + ) + logger.info( + "Found calendar via iframe (pass %d): %s", + round_n, + (fr.url or "")[:200], + ) + return fr.locator("body") + except PlaywrightTimeout: + continue + except Exception as e: + logger.debug("iframe calendar wait skipped: %s", e) + page.wait_for_timeout(1_600) + return None + + +def _wait_for_host_calendar(page: Page) -> Locator: + """Wait until the multicalendar grid is present (English/French/German names + fallbacks).""" + _log_page_context(page, "before calendar wait") + _abort_if_login_required(page) + + # Current hyperloop multicalendar: day cells use data-date; no role=application on the grid. + try: + logger.info( + "Airbnb calendar: waiting up to 25s for button[data-date] (hyperloop / current UI)" + ) + page.locator("button[data-date]").first.wait_for(state="visible", timeout=25_000) + n = page.locator("button[data-date]").count() + logger.info( + "Found %d calendar day cell(s) via data-date; using page scope for month controls", + n, + ) + return page.locator("body") + except PlaywrightTimeout: + logger.debug("No visible button[data-date]; trying legacy role=application selectors") + + # Accessible name varies by locale (Calendar / Calendrier / Kalender). + by_role = page.get_by_role( + "application", + name=re.compile(r"calendar|calendrier|kalender", re.I), + ) + try: + logger.info("Airbnb calendar: waiting up to 45s for role=application + name (Calendar/…)") + by_role.first.wait_for(state="visible", timeout=45_000) + logger.info("Found calendar via role=application + name") + return by_role.first + except PlaywrightTimeout: + logger.warning("Calendar not matched by accessible name; trying fallbacks") + _log_calendar_probe(page, "after-role-name-timeout") + + # Grid mounts without a reliable aria-label in some builds. + grid = page.locator('[role="application"]').filter( + has=page.locator(_DAY_BUTTON_LOCATOR), + ) + try: + logger.info("Airbnb calendar: waiting up to 25s for role=application containing day buttons") + grid.first.wait_for(state="visible", timeout=25_000) + logger.info("Found calendar via application + day button cells") + return grid.first + except PlaywrightTimeout: + logger.warning("Calendar not matched by application+day filter; trying first role=application") + _log_calendar_probe(page, "after-application-day-filter-timeout") + + # Some builds expose a visible application region without the expected accessible name. + generic_app = page.locator('[role="application"]').first + try: + logger.info("Airbnb calendar: waiting up to 20s for first visible role=application") + generic_app.wait_for(state="visible", timeout=20_000) + page.wait_for_timeout(1_200) + n_in = generic_app.locator(_DAY_BUTTON_LOCATOR).count() + if n_in > 0: + logger.info("Found calendar via first visible role=application with day cells inside") + return generic_app + logger.warning( + "First role=application is visible but has 0 day buttons inside (UI change or wrong region)" + ) + except PlaywrightTimeout: + logger.info("No visible role=application within 20s") + + _log_calendar_probe(page, "after-generic-application") + + cal_if = _wait_calendar_in_iframes(page, total_ms=28_000) + if cal_if is not None: + return cal_if + + # SPA may render day cells without wrapping role=application (or with a delayed wrapper). + day_cells = page.locator(_DAY_BUTTON_LOCATOR) + try: + logger.info( + "Airbnb calendar: waiting up to 30s on main frame for day cells " + "(data-date or data-state--date-string; do not close the browser; Ctrl+C aborts)" + ) + day_cells.first.wait_for(state="visible", timeout=30_000) + n = day_cells.count() + logger.info( + "Found %d day cell button(s); scoping to body for month/nav", + n, + ) + return page.locator("body") + except PlaywrightTimeout: + _log_calendar_probe(page, "after-main-frame-day-timeout") + _debug_screenshot(page, "no-calendar") + _log_page_context(page, "after calendar wait failure") + raise PlaywrightTimeout( + "Host calendar never appeared. Check --airbnb-headed: login wall, captcha, or UI change. " + "Run with AIRBNB_CALENDAR_DEBUG=1 for DOM probe + HTML/PNG. " + "Set AIRBNB_DEBUG_SCREENSHOT=1 for a final PNG only. See README: Playwright debugging." + ) + + +def _read_calendar_month_year(cal: Locator) -> tuple[int, int] | None: + """Read ``(year, month)`` from the month heading (prefer the grid section, not random page h2).""" + h2_candidates = ( + cal.locator("section").filter(has=cal.locator("button[data-date]")).locator("h2").first, + cal.locator('[role="grid"]').locator("h2").first, + cal.locator("h2").first, + ) + for h2 in h2_candidates: + try: + text = h2.inner_text(timeout=3_500) + except PlaywrightTimeout: + continue + if not isinstance(text, str): + continue + parsed = parse_month_heading_text(text) + if parsed is not None: + return parsed + return None + + +def _click_month_summary_button(page: Page) -> None: + """Toolbar control: button that shows current month in an ``h3`` (opens month context).""" + try: + page.locator("button").filter(has=page.locator("h3")).first.click(timeout=4_000) + page.wait_for_timeout(400) + except PlaywrightTimeout: + pass + except Exception as e: + logger.debug("Month summary button: %s", e) + + +def _ensure_calendar_month(page: Page, cal: Locator, target: date) -> None: + """Scroll the visible grid to ``target``'s month using Airbnb's month controls.""" + logger.info( + "Airbnb calendar: ensuring visible month is %04d-%02d", + target.year, + target.month, + ) + for attempt in range(24): + visible = _read_calendar_month_year(cal) + if visible is None and attempt == 0: + _click_month_summary_button(page) + page.wait_for_timeout(500) + visible = _read_calendar_month_year(cal) + if visible is None: + raise PlaywrightTimeout("Could not read calendar month from h2 heading") + + vy, vm = visible + if (vy, vm) == (target.year, target.month): + logger.info( + "Airbnb calendar: already showing %04d-%02d — no month arrow clicks needed", + vy, + vm, + ) + return + + logger.debug("Calendar at %04d-%02d, need %04d-%02d", vy, vm, target.year, target.month) + forward = (target.year, target.month) > (vy, vm) + label = _ARIA_MONTH_FORWARD if forward else _ARIA_MONTH_BACK + scoped = cal.locator(f'button[aria-label="{label}"]') + if scoped.count() > 0: + scoped.first.click(timeout=8_000) + else: + page.locator(f'button[aria-label="{label}"]').first.click(timeout=8_000) + page.wait_for_timeout(500) + + raise PlaywrightTimeout( + f"Could not reach calendar month {target.year}-{target.month:02d} after navigation" + ) + + +def _click_calendar_day(page: Page, cal: Locator, target: date) -> None: + """Open the host day cell (hyperloop ``data-date`` or legacy ``data-state--date-string``).""" + ds = target.strftime("%Y-%m-%d") + logger.info("Airbnb calendar: opening day cell %s", ds) + _dismiss_blocking_overlays(page) + + cells: list[tuple[Locator, str]] = [ + (cal.locator(f'button[data-date="{ds}"]'), "data-date"), + (cal.locator(f'button[data-state--date-string="{ds}"]'), "data-state--date-string"), + (page.locator(f'button[data-date="{ds}"]'), "data-date-page"), + ] + for cell, kind in cells: + try: + if cell.count() == 0: + continue + cell.first.wait_for(state="visible", timeout=12_000) + except PlaywrightTimeout: + continue + disabled = cell.first.get_attribute("disabled") + if disabled is not None: + raise PlaywrightTimeout( + f"Calendar day {ds} is disabled in host UI ({kind}); pick an available night." + ) + cell.first.scroll_into_view_if_needed(timeout=8_000) + page.wait_for_timeout(300) + _dismiss_blocking_overlays(page) + try: + cell.first.click(timeout=12_000) + page.wait_for_timeout(800) + if not _price_panel_visible(page): + cell.first.dblclick(timeout=12_000) + except PlaywrightTimeout: + logger.warning( + "Day cell click timed out (modal or overlay intercepting); " + "retrying with Escape + center click" + ) + page.keyboard.press("Escape") + page.wait_for_timeout(350) + _dismiss_blocking_overlays(page) + box = cell.first.bounding_box() + if box: + page.mouse.click( + box["x"] + box["width"] / 2, + box["y"] + box["height"] / 2, + ) + else: + cell.first.click(timeout=12_000, force=True) + page.wait_for_timeout(1_500) + _wait_for_pricing_panel(page) + if not _price_input_visible(page): + _click_inline_price_on_day(page, cell.first, ds) + return + raise PlaywrightTimeout(f"Calendar day {ds} not found (no day button in current month view).") + + +def _price_input_visible(page: Page) -> bool: + return _price_panel_visible(page) + + +def _click_inline_price_on_day(page: Page, cell: Locator, ds: str) -> None: + """Multicalendar shows price on the cell; click it to open the edit field.""" + logger.info("Airbnb calendar: trying inline price click on %s", ds) + price = cell.locator("span.p1ysqtdd").first + try: + price.wait_for(state="visible", timeout=5_000) + price.click(timeout=8_000) + page.wait_for_timeout(800) + _wait_for_pricing_panel(page) + return + except PlaywrightTimeout: + pass + try: + cell.get_by_text(re.compile(r"nightly price", re.I)).first.click(timeout=5_000) + page.wait_for_timeout(800) + _wait_for_pricing_panel(page) + except PlaywrightTimeout: + logger.warning("Inline price label not clickable for %s", ds) + + +def _price_input_locator(page: Page) -> Locator: + """Resolve the editable nightly price field.""" + pna = _pna_price_panel(page) + loc = pna.locator( + 'input[inputmode="decimal"], input[inputmode="numeric"], input[type="text"], [contenteditable="true"]' + ) + loc = loc.or_(pna) + loc = loc.or_(page.get_by_test_id("PriceInput-basePrice")) + loc = loc.or_(page.locator('[data-testid*="PriceInput"]')) + loc = loc.or_(page.locator('[role="dialog"] input[inputmode="decimal"]')) + loc = loc.or_(page.get_by_placeholder(re.compile(r"price|\$|CAD", re.I))) + return loc.first + + +def _read_current_price_text(page: Page) -> str: + """Read displayed or input value from the active price editor.""" + pna = _pna_price_panel(page) + try: + inp = pna.locator("input").first + if inp.count() > 0 and inp.is_visible(): + return inp.input_value(timeout=3_000) + except Exception: + pass + try: + display = pna.locator(".pokbdf7").first + if display.is_visible(): + return display.inner_text(timeout=3_000) + except Exception: + pass + try: + return _price_input_locator(page).input_value(timeout=3_000) + except Exception: + return "" + + +def _activate_price_editor(page: Page) -> Locator: + """Click the pna-price block so the numeric input receives focus.""" + pna = _pna_price_panel(page) + try: + pna.first.wait_for(state="visible", timeout=18_000) + except PlaywrightTimeout: + logger.warning("Airbnb price: pna-price panel never appeared") + if _debug_assets_enabled(): + _debug_screenshot(page, "pna-price-missing") + _debug_dump_html(page, "pna-price-missing") + raise + pna.first.click(timeout=8_000) + page.wait_for_timeout(500) + inp = pna.locator("input").first + try: + inp.wait_for(state="visible", timeout=4_000) + inp.click(timeout=5_000) + return inp + except PlaywrightTimeout: + pass + for sel in (".pokbdf7", ".pubh5mh", "div.t1eeg6oc"): + try: + pna.locator(sel).first.click(timeout=4_000) + page.wait_for_timeout(400) + inp = pna.locator("input").first + inp.wait_for(state="visible", timeout=4_000) + inp.click(timeout=5_000) + return inp + except PlaywrightTimeout: + continue + # Last resort: focus panel and type (some builds use contenteditable without input) + pna.first.click(timeout=5_000) + page.keyboard.press("Control+A") + page.wait_for_timeout(200) + return pna.locator("input").first + + +def _try_expand_price_panel_clicks(page: Page) -> None: + """Reveal a collapsed pricing row (chevron SVG paths and labels change often).""" + scopes: list[Page | Locator] = [page] + try: + panel = _pricing_panel(page) + if panel.count() > 0: + scopes.insert(0, panel.first) + except Exception: + pass + + # Current UI: pencil/edit icon is an SVG with aria-label="Edit". + for scope in scopes: + try: + edit_svg = scope.locator('svg[aria-label="Edit"]') + if edit_svg.count() > 0: + edit_svg.first.click(timeout=3_500) + logger.info("Airbnb price: clicked SVG Edit icon") + page.wait_for_timeout(500) + return + except Exception: + pass + + path_frags = ( + "m12 4 11.3", + "M12 4", + "m12 4 l11.3", + "4 11.3", + ) + for scope in scopes: + for frag in path_frags: + try: + scope.locator(f"path[d*='{frag}']").first.click(timeout=1_800) + logger.info("Airbnb price: clicked chevron path fragment %r", frag) + page.wait_for_timeout(400) + return + except Exception: + continue + for scope in scopes: + try: + scope.get_by_role( + "button", + name=re.compile( + r"edit(\s+nightly)?\s+price|set\s+price|custom\s+price|pricing|show\s+details", + re.I, + ), + ).first.click(timeout=3_500) + logger.info("Airbnb price: clicked pricing-related control by name") + page.wait_for_timeout(400) + return + except Exception as e: + logger.debug("Airbnb price: named expand button not found: %s", e) + try: + page.locator('[role="dialog"] button[aria-expanded="false"]').first.click(timeout=2_500) + page.wait_for_timeout(400) + except Exception: + pass + + +def _maybe_expand_price_panel(page: Page) -> None: + """Wait until pna-price or legacy PriceInput is visible.""" + try: + _pna_price_panel(page).first.wait_for(state="visible", timeout=8_000) + logger.info("Airbnb price: pna-price panel already visible") + return + except PlaywrightTimeout: + pass + inp = _price_input_locator(page) + try: + inp.wait_for(state="visible", timeout=4_000) + logger.info("Airbnb price: nightly price input already visible") + return + except PlaywrightTimeout: + logger.info("Airbnb price: input not visible yet; trying expand controls") + + _try_expand_price_panel_clicks(page) + try: + _pna_price_panel(page).first.wait_for(state="visible", timeout=18_000) + except PlaywrightTimeout: + try: + inp.wait_for(state="visible", timeout=12_000) + except PlaywrightTimeout: + logger.warning("Airbnb price: editor never appeared after expand attempts") + if _debug_assets_enabled(): + _debug_screenshot(page, "price-editor-never-appeared") + _debug_dump_html(page, "price-editor-never-appeared") + raise + logger.info("Airbnb price: nightly price editor visible after expand/wait") + + +def _click_save(page: Page) -> None: + """Save nightly price — multicalendar uses span[data-button-content='Save'].""" + save = page.get_by_role("button", name=re.compile(r"^save$", re.I)) + try: + save.first.wait_for(state="visible", timeout=8_000) + save.first.click(timeout=10_000) + return + except PlaywrightTimeout: + pass + page.locator('[data-button-content="true"]').filter( + has_text=re.compile(r"^Save$", re.I) + ).first.click(timeout=10_000) + + +def _fill_price_and_save(page: Page, new_price: int) -> None: + """Edit nightly price when needed, then Save.""" + _maybe_expand_price_panel(page) + inp = _activate_price_editor(page) + + raw = _read_current_price_text(page) + current_digits = re.sub(r"\D", "", raw or "") + logger.info( + "Airbnb price: field raw=%r parsed_digits=%r target=%s", + raw, + current_digits or "(empty)", + new_price, + ) + if current_digits == str(new_price): + logger.info("Nightly price already %s; skipping edit and save", new_price) + return + + inp.click(timeout=5_000) + try: + inp.fill("", timeout=5_000) + inp.fill(str(new_price), timeout=5_000) + except PlaywrightTimeout: + page.keyboard.press("Control+A") + page.keyboard.type(str(new_price), delay=50) + page.wait_for_timeout(300) + + logger.info("Airbnb price: clicking Save") + _click_save(page) + page.wait_for_timeout(600) + logger.info("Airbnb price: Save clicked, short wait complete") + + +def _run_price_update_attempt( + page: Page, + target_date: date, + new_price: int, + calendar_url: str, +) -> None: + logger.info( + "Airbnb step: goto calendar_url (wait=domcontentloaded) target_date=%s", + target_date.isoformat(), + ) + page.goto(calendar_url, wait_until="domcontentloaded", timeout=45_000) + _log_page_context(page, "after calendar goto") + page.wait_for_timeout(800) + try: + page.wait_for_load_state("load", timeout=25_000) + except PlaywrightTimeout: + logger.debug("wait_for_load_state(load) timed out after calendar goto; continuing") + _dismiss_cookie_banner_if_present(page) + _dismiss_blocking_overlays(page) + # Some accounts first land on a listing chooser; select the listing card. + _enter_listing_calendar_if_needed(page) + cal = _wait_for_host_calendar(page) + _ensure_calendar_month(page, cal, target_date) + _click_calendar_day(page, cal, target_date) + _maybe_expand_price_panel(page) + _fill_price_and_save(page, new_price) + _log_page_context(page, "after fill/save") diff --git a/src/config.py b/src/config.py index 4df10cc..d645831 100644 --- a/src/config.py +++ b/src/config.py @@ -14,8 +14,11 @@ class Settings(BaseSettings): # Airbnb automation (optional) airbnb_listing_id: str = "" + airbnb_calendar_url: str = "" airbnb_base_price: int = 150 price_increase_pct: int = 20 + airbnb_headed: bool = False + airbnb_slow_mo_ms: int = 0 # Search location (default: Thornhill, ON — covers Toronto + GTA) search_lat: float = 43.8083 diff --git a/src/main.py b/src/main.py index ca78e93..e4b7521 100644 --- a/src/main.py +++ b/src/main.py @@ -10,6 +10,7 @@ from __future__ import annotations import argparse import logging +import os import sys from datetime import date, timedelta @@ -139,34 +140,45 @@ def update_airbnb_prices(events: list[NormalizedEvent], settings) -> None: return try: - from playwright.sync_api import sync_playwright from src.airbnb.auth import load_authenticated_context - from src.airbnb.calendar import update_price + from src.airbnb.browser import open_browser + from src.airbnb.calendar import resolve_calendar_url, update_price except ImportError: logger.error("Playwright not installed, cannot update Airbnb prices") return new_price = int(settings.airbnb_base_price * (1 + settings.price_increase_pct / 100)) event_dates = sorted({e.event_date for e in events}) + calendar_url = resolve_calendar_url( + settings.airbnb_listing_id, + settings.airbnb_calendar_url, + ) + + use_headed = settings.airbnb_headed or os.environ.get("AIRBNB_HEADED", "").lower() in ( + "1", + "true", + "yes", + ) + slow_mo = settings.airbnb_slow_mo_ms if use_headed else 0 + if use_headed: + logger.info("Airbnb: headed browser (slow_mo=%d ms)", slow_mo) logger.info( - "Updating Airbnb prices for %d dates to $%d", + "Updating Airbnb prices for %d dates to $%d (calendar: %s)", len(event_dates), new_price, + calendar_url, ) successes = 0 try: - with sync_playwright() as p: - browser = p.chromium.launch(headless=True) + with open_browser(headless=not use_headed, slow_mo=slow_mo) as browser: context = load_authenticated_context(browser) page = context.new_page() for target_date in event_dates: - if update_price(page, target_date, new_price): + if update_price(page, target_date, new_price, calendar_url): successes += 1 - - browser.close() except FileNotFoundError as e: logger.error("Auth state missing: %s", e) except Exception: diff --git a/tests/airbnb/test_browser.py b/tests/airbnb/test_browser.py new file mode 100644 index 0000000..a508d6b --- /dev/null +++ b/tests/airbnb/test_browser.py @@ -0,0 +1,39 @@ +"""Tests for optional stealth browser launcher.""" + +import os +from unittest.mock import MagicMock, patch + +import pytest + +from src.airbnb.browser import open_browser, use_stealth_browser + + +class TestUseStealthBrowser: + def test_default_off(self, monkeypatch): + monkeypatch.delenv("AIRBNB_STEALTH", raising=False) + assert use_stealth_browser() is False + + def test_enabled(self, monkeypatch): + monkeypatch.setenv("AIRBNB_STEALTH", "1") + assert use_stealth_browser() is True + + +class TestOpenBrowser: + def test_chromium_path(self, monkeypatch): + monkeypatch.delenv("AIRBNB_STEALTH", raising=False) + mock_browser = MagicMock() + mock_pw = MagicMock() + mock_pw.chromium.launch.return_value = mock_browser + + with patch("playwright.sync_api.sync_playwright") as mock_sync: + mock_sync.return_value.__enter__.return_value = mock_pw + with open_browser(headless=True) as browser: + assert browser is mock_browser + mock_browser.close.assert_called_once() + + def test_stealth_requires_package(self, monkeypatch): + monkeypatch.setenv("AIRBNB_STEALTH", "1") + with patch.dict("sys.modules", {"invisible_playwright": None}): + with pytest.raises(ImportError, match="invisible_playwright"): + with open_browser(headless=True): + pass