AtAnyRate/tests/test_dedup.py
ilia c8a82e264c Add tests, geo search, noise filtering, sports scoring, and dedup improvements.
Tests cover providers, dedup, Telegram, scoring, main runner, and Airbnb stubs.
Ticketmaster and SeatGeek use configurable lat/lon/radius (Thornhill default).
Pipeline filters noise listings, merges same-day sports duplicates, optional
MIN_ALERT_SCORE, and Telegram severity summary.

Made-with: Cursor
2026-04-04 15:25:35 -04:00

137 lines
5.0 KiB
Python

"""Tests for event deduplication."""
from datetime import date
from src.dedup import deduplicate, _similarity, _is_same_event
from src.models import NormalizedEvent
def _make_event(name: str, event_date: date, venue: str, source: str = "test") -> NormalizedEvent:
return NormalizedEvent(name=name, event_date=event_date, venue=venue, source=source)
class TestSimilarity:
def test_identical_strings(self):
assert _similarity("hello", "hello") == 1.0
def test_empty_strings(self):
assert _similarity("", "hello") == 0.0
assert _similarity("hello", "") == 0.0
assert _similarity("", "") == 0.0
def test_similar_strings(self):
score = _similarity("Scotiabank Arena", "Scotiabank arena")
assert score == 1.0 # lowercased, identical
def test_different_strings(self):
score = _similarity("Rogers Centre", "Budweiser Stage")
assert score < 0.5
def test_whitespace_collapse(self):
score = _similarity(" Scotiabank Arena ", "scotiabank arena")
assert score == 1.0
class TestIsSameEvent:
def test_same_event_different_sources(self):
a = _make_event("Raptors vs Celtics", date(2026, 5, 10), "Scotiabank Arena", "ticketmaster")
b = _make_event("Raptors vs. Celtics", date(2026, 5, 10), "Scotiabank Arena", "seatgeek")
assert _is_same_event(a, b)
def test_different_dates(self):
a = _make_event("Raptors", date(2026, 5, 10), "Scotiabank Arena")
b = _make_event("Raptors", date(2026, 5, 11), "Scotiabank Arena")
assert not _is_same_event(a, b)
def test_different_venues(self):
a = _make_event("Concert", date(2026, 5, 10), "Scotiabank Arena")
b = _make_event("Concert", date(2026, 5, 10), "Rogers Centre")
assert not _is_same_event(a, b)
def test_very_different_names_same_venue_date(self):
a = _make_event("Raptors Game", date(2026, 5, 10), "Scotiabank Arena")
b = _make_event("Drake Concert", date(2026, 5, 10), "Scotiabank Arena")
assert not _is_same_event(a, b)
class TestDeduplicate:
def test_empty_list(self):
assert deduplicate([]) == []
def test_no_duplicates(self):
events = [
_make_event("Event A", date(2026, 5, 10), "Scotiabank Arena"),
_make_event("Event B", date(2026, 5, 11), "Rogers Centre"),
]
result = deduplicate(events)
assert len(result) == 2
def test_removes_cross_provider_duplicates(self):
events = [
_make_event("Raptors vs Celtics", date(2026, 5, 10), "Scotiabank Arena", "ticketmaster"),
_make_event("Raptors vs. Celtics", date(2026, 5, 10), "Scotiabank Arena", "seatgeek"),
]
result = deduplicate(events)
assert len(result) == 1
def test_prefers_ticketmaster_with_url(self):
tm = NormalizedEvent(
name="Raptors vs Celtics",
event_date=date(2026, 5, 10),
venue="Scotiabank Arena",
source="ticketmaster",
url="https://ticketmaster.ca/event",
)
sg = NormalizedEvent(
name="Raptors vs. Celtics",
event_date=date(2026, 5, 10),
venue="Scotiabank Arena",
source="seatgeek",
url="https://seatgeek.com/event",
)
result = deduplicate([tm, sg])
assert len(result) == 1
assert result[0].source == "ticketmaster"
def test_keeps_different_events_same_date(self):
events = [
_make_event("Raptors Game", date(2026, 5, 10), "Scotiabank Arena"),
_make_event("Blue Jays Game", date(2026, 5, 10), "Rogers Centre"),
]
result = deduplicate(events)
assert len(result) == 2
def test_three_duplicates_become_one(self):
events = [
_make_event("Big Show", date(2026, 5, 10), "Scotiabank Arena", "ticketmaster"),
_make_event("Big Show", date(2026, 5, 10), "Scotiabank Arena", "seatgeek"),
_make_event("The Big Show", date(2026, 5, 10), "Scotiabank Arena", "other"),
]
result = deduplicate(events)
assert len(result) == 1
def test_merges_jays_promo_variant_same_slot(self):
events = [
_make_event(
"Toronto Blue Jays vs. Dodgers (Loonie Dogs Night)",
date(2026, 5, 10),
"Rogers Centre",
),
_make_event(
"Toronto Blue Jays vs. Los Angeles Dodgers",
date(2026, 5, 10),
"Rogers Centre",
),
]
result = deduplicate(events)
assert len(result) == 1
assert "Loonie" not in result[0].name
def test_does_not_merge_jays_on_different_days(self):
events = [
_make_event("Toronto Blue Jays vs. Yankees", date(2026, 5, 10), "Rogers Centre"),
_make_event("Toronto Blue Jays vs. Red Sox", date(2026, 5, 11), "Rogers Centre"),
]
result = deduplicate(events)
assert len(result) == 2