"""Tests for event deduplication.""" from datetime import date from src.dedup import deduplicate, _similarity, _is_same_event from src.models import NormalizedEvent def _make_event(name: str, event_date: date, venue: str, source: str = "test") -> NormalizedEvent: return NormalizedEvent(name=name, event_date=event_date, venue=venue, source=source) class TestSimilarity: def test_identical_strings(self): assert _similarity("hello", "hello") == 1.0 def test_empty_strings(self): assert _similarity("", "hello") == 0.0 assert _similarity("hello", "") == 0.0 assert _similarity("", "") == 0.0 def test_similar_strings(self): score = _similarity("Scotiabank Arena", "Scotiabank arena") assert score == 1.0 # lowercased, identical def test_different_strings(self): score = _similarity("Rogers Centre", "Budweiser Stage") assert score < 0.5 def test_whitespace_collapse(self): score = _similarity(" Scotiabank Arena ", "scotiabank arena") assert score == 1.0 class TestIsSameEvent: def test_same_event_different_sources(self): a = _make_event("Raptors vs Celtics", date(2026, 5, 10), "Scotiabank Arena", "ticketmaster") b = _make_event("Raptors vs. Celtics", date(2026, 5, 10), "Scotiabank Arena", "seatgeek") assert _is_same_event(a, b) def test_different_dates(self): a = _make_event("Raptors", date(2026, 5, 10), "Scotiabank Arena") b = _make_event("Raptors", date(2026, 5, 11), "Scotiabank Arena") assert not _is_same_event(a, b) def test_different_venues(self): a = _make_event("Concert", date(2026, 5, 10), "Scotiabank Arena") b = _make_event("Concert", date(2026, 5, 10), "Rogers Centre") assert not _is_same_event(a, b) def test_very_different_names_same_venue_date(self): a = _make_event("Raptors Game", date(2026, 5, 10), "Scotiabank Arena") b = _make_event("Drake Concert", date(2026, 5, 10), "Scotiabank Arena") assert not _is_same_event(a, b) class TestDeduplicate: def test_empty_list(self): assert deduplicate([]) == [] def test_no_duplicates(self): events = [ _make_event("Event A", date(2026, 5, 10), "Scotiabank Arena"), _make_event("Event B", date(2026, 5, 11), "Rogers Centre"), ] result = deduplicate(events) assert len(result) == 2 def test_removes_cross_provider_duplicates(self): events = [ _make_event("Raptors vs Celtics", date(2026, 5, 10), "Scotiabank Arena", "ticketmaster"), _make_event("Raptors vs. Celtics", date(2026, 5, 10), "Scotiabank Arena", "seatgeek"), ] result = deduplicate(events) assert len(result) == 1 def test_prefers_ticketmaster_with_url(self): tm = NormalizedEvent( name="Raptors vs Celtics", event_date=date(2026, 5, 10), venue="Scotiabank Arena", source="ticketmaster", url="https://ticketmaster.ca/event", ) sg = NormalizedEvent( name="Raptors vs. Celtics", event_date=date(2026, 5, 10), venue="Scotiabank Arena", source="seatgeek", url="https://seatgeek.com/event", ) result = deduplicate([tm, sg]) assert len(result) == 1 assert result[0].source == "ticketmaster" def test_keeps_different_events_same_date(self): events = [ _make_event("Raptors Game", date(2026, 5, 10), "Scotiabank Arena"), _make_event("Blue Jays Game", date(2026, 5, 10), "Rogers Centre"), ] result = deduplicate(events) assert len(result) == 2 def test_three_duplicates_become_one(self): events = [ _make_event("Big Show", date(2026, 5, 10), "Scotiabank Arena", "ticketmaster"), _make_event("Big Show", date(2026, 5, 10), "Scotiabank Arena", "seatgeek"), _make_event("The Big Show", date(2026, 5, 10), "Scotiabank Arena", "other"), ] result = deduplicate(events) assert len(result) == 1 def test_merges_jays_promo_variant_same_slot(self): events = [ _make_event( "Toronto Blue Jays vs. Dodgers (Loonie Dogs Night)", date(2026, 5, 10), "Rogers Centre", ), _make_event( "Toronto Blue Jays vs. Los Angeles Dodgers", date(2026, 5, 10), "Rogers Centre", ), ] result = deduplicate(events) assert len(result) == 1 assert "Loonie" not in result[0].name def test_does_not_merge_jays_on_different_days(self): events = [ _make_event("Toronto Blue Jays vs. Yankees", date(2026, 5, 10), "Rogers Centre"), _make_event("Toronto Blue Jays vs. Red Sox", date(2026, 5, 11), "Rogers Centre"), ] result = deduplicate(events) assert len(result) == 2