Tests cover providers, dedup, Telegram, scoring, main runner, and Airbnb stubs. Ticketmaster and SeatGeek use configurable lat/lon/radius (Thornhill default). Pipeline filters noise listings, merges same-day sports duplicates, optional MIN_ALERT_SCORE, and Telegram severity summary. Made-with: Cursor
137 lines
5.0 KiB
Python
137 lines
5.0 KiB
Python
"""Tests for event deduplication."""
|
|
|
|
from datetime import date
|
|
|
|
from src.dedup import deduplicate, _similarity, _is_same_event
|
|
from src.models import NormalizedEvent
|
|
|
|
|
|
def _make_event(name: str, event_date: date, venue: str, source: str = "test") -> NormalizedEvent:
|
|
return NormalizedEvent(name=name, event_date=event_date, venue=venue, source=source)
|
|
|
|
|
|
class TestSimilarity:
|
|
def test_identical_strings(self):
|
|
assert _similarity("hello", "hello") == 1.0
|
|
|
|
def test_empty_strings(self):
|
|
assert _similarity("", "hello") == 0.0
|
|
assert _similarity("hello", "") == 0.0
|
|
assert _similarity("", "") == 0.0
|
|
|
|
def test_similar_strings(self):
|
|
score = _similarity("Scotiabank Arena", "Scotiabank arena")
|
|
assert score == 1.0 # lowercased, identical
|
|
|
|
def test_different_strings(self):
|
|
score = _similarity("Rogers Centre", "Budweiser Stage")
|
|
assert score < 0.5
|
|
|
|
def test_whitespace_collapse(self):
|
|
score = _similarity(" Scotiabank Arena ", "scotiabank arena")
|
|
assert score == 1.0
|
|
|
|
|
|
class TestIsSameEvent:
|
|
def test_same_event_different_sources(self):
|
|
a = _make_event("Raptors vs Celtics", date(2026, 5, 10), "Scotiabank Arena", "ticketmaster")
|
|
b = _make_event("Raptors vs. Celtics", date(2026, 5, 10), "Scotiabank Arena", "seatgeek")
|
|
assert _is_same_event(a, b)
|
|
|
|
def test_different_dates(self):
|
|
a = _make_event("Raptors", date(2026, 5, 10), "Scotiabank Arena")
|
|
b = _make_event("Raptors", date(2026, 5, 11), "Scotiabank Arena")
|
|
assert not _is_same_event(a, b)
|
|
|
|
def test_different_venues(self):
|
|
a = _make_event("Concert", date(2026, 5, 10), "Scotiabank Arena")
|
|
b = _make_event("Concert", date(2026, 5, 10), "Rogers Centre")
|
|
assert not _is_same_event(a, b)
|
|
|
|
def test_very_different_names_same_venue_date(self):
|
|
a = _make_event("Raptors Game", date(2026, 5, 10), "Scotiabank Arena")
|
|
b = _make_event("Drake Concert", date(2026, 5, 10), "Scotiabank Arena")
|
|
assert not _is_same_event(a, b)
|
|
|
|
|
|
class TestDeduplicate:
|
|
def test_empty_list(self):
|
|
assert deduplicate([]) == []
|
|
|
|
def test_no_duplicates(self):
|
|
events = [
|
|
_make_event("Event A", date(2026, 5, 10), "Scotiabank Arena"),
|
|
_make_event("Event B", date(2026, 5, 11), "Rogers Centre"),
|
|
]
|
|
result = deduplicate(events)
|
|
assert len(result) == 2
|
|
|
|
def test_removes_cross_provider_duplicates(self):
|
|
events = [
|
|
_make_event("Raptors vs Celtics", date(2026, 5, 10), "Scotiabank Arena", "ticketmaster"),
|
|
_make_event("Raptors vs. Celtics", date(2026, 5, 10), "Scotiabank Arena", "seatgeek"),
|
|
]
|
|
result = deduplicate(events)
|
|
assert len(result) == 1
|
|
|
|
def test_prefers_ticketmaster_with_url(self):
|
|
tm = NormalizedEvent(
|
|
name="Raptors vs Celtics",
|
|
event_date=date(2026, 5, 10),
|
|
venue="Scotiabank Arena",
|
|
source="ticketmaster",
|
|
url="https://ticketmaster.ca/event",
|
|
)
|
|
sg = NormalizedEvent(
|
|
name="Raptors vs. Celtics",
|
|
event_date=date(2026, 5, 10),
|
|
venue="Scotiabank Arena",
|
|
source="seatgeek",
|
|
url="https://seatgeek.com/event",
|
|
)
|
|
result = deduplicate([tm, sg])
|
|
assert len(result) == 1
|
|
assert result[0].source == "ticketmaster"
|
|
|
|
def test_keeps_different_events_same_date(self):
|
|
events = [
|
|
_make_event("Raptors Game", date(2026, 5, 10), "Scotiabank Arena"),
|
|
_make_event("Blue Jays Game", date(2026, 5, 10), "Rogers Centre"),
|
|
]
|
|
result = deduplicate(events)
|
|
assert len(result) == 2
|
|
|
|
def test_three_duplicates_become_one(self):
|
|
events = [
|
|
_make_event("Big Show", date(2026, 5, 10), "Scotiabank Arena", "ticketmaster"),
|
|
_make_event("Big Show", date(2026, 5, 10), "Scotiabank Arena", "seatgeek"),
|
|
_make_event("The Big Show", date(2026, 5, 10), "Scotiabank Arena", "other"),
|
|
]
|
|
result = deduplicate(events)
|
|
assert len(result) == 1
|
|
|
|
def test_merges_jays_promo_variant_same_slot(self):
|
|
events = [
|
|
_make_event(
|
|
"Toronto Blue Jays vs. Dodgers (Loonie Dogs Night)",
|
|
date(2026, 5, 10),
|
|
"Rogers Centre",
|
|
),
|
|
_make_event(
|
|
"Toronto Blue Jays vs. Los Angeles Dodgers",
|
|
date(2026, 5, 10),
|
|
"Rogers Centre",
|
|
),
|
|
]
|
|
result = deduplicate(events)
|
|
assert len(result) == 1
|
|
assert "Loonie" not in result[0].name
|
|
|
|
def test_does_not_merge_jays_on_different_days(self):
|
|
events = [
|
|
_make_event("Toronto Blue Jays vs. Yankees", date(2026, 5, 10), "Rogers Centre"),
|
|
_make_event("Toronto Blue Jays vs. Red Sox", date(2026, 5, 11), "Rogers Centre"),
|
|
]
|
|
result = deduplicate(events)
|
|
assert len(result) == 2
|