fix(discovery): enforce search-country allow-list when Canada (etc.) is selected
Some checks failed
CI / Linting (Biome) (push) Failing after 41s
CI / Tests (push) Successful in 5m19s
CI / Type Check (adzuna-extractor) (push) Successful in 1m10s
CI / Type Check (gradcracker-extractor) (push) Successful in 1m13s
CI / Type Check (hiringcafe-extractor) (push) Successful in 1m10s
CI / Type Check (orchestrator) (push) Successful in 1m27s
CI / Type Check (startupjobs-extractor) (push) Successful in 1m10s
CI / Type Check (ukvisajobs-extractor) (push) Successful in 1m10s
CI / Documentation (push) Successful in 1m59s

Reject vague Remote/Worldwide and any non-selected country at ingest; hide mismatched jobs in the UI and stop bypassing country filters for remote listings.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
ilia 2026-05-16 18:34:35 -04:00
parent 71f34853fd
commit 5401f384c1
7 changed files with 199 additions and 38 deletions

View File

@ -1,4 +1,5 @@
import { useSettings } from "@client/hooks/useSettings";
import { inferCountryKeyFromSearchGeography } from "@shared/search-cities";
import type React from "react";
import { useCallback, useEffect, useMemo, useState } from "react";
import { useNavigate, useParams } from "react-router-dom";
@ -160,6 +161,12 @@ export const OrchestratorPage: React.FC = () => {
[settings],
);
const searchGeographyCountryKey = useMemo(
() =>
inferCountryKeyFromSearchGeography(settings?.searchCities?.value ?? null),
[settings?.searchCities?.value],
);
const jobListFilterExtras = useMemo(
() => ({
foundAfterYmd,
@ -169,6 +176,7 @@ export const OrchestratorPage: React.FC = () => {
settingsBlockedEmployerKeywords: applySettingsCompanySkipList
? settingsSkipEmployerKeywords
: [],
searchGeographyCountryKey,
}),
[
foundAfterYmd,
@ -177,6 +185,7 @@ export const OrchestratorPage: React.FC = () => {
employerExcludeFilter,
applySettingsCompanySkipList,
settingsSkipEmployerKeywords,
searchGeographyCountryKey,
],
);

View File

@ -257,7 +257,7 @@ describe("useFilteredJobs", () => {
expect(result.current.map((j) => j.id).sort()).toEqual(["in", "li"]);
});
it("filters by country but always keeps remote listings", () => {
it("filters by country including remote jobs when location matches", () => {
const jobs: Job[] = [
{
...baseJob,
@ -293,13 +293,42 @@ describe("useFilteredJobs", () => {
{ key: "score", direction: "desc" },
),
);
expect(result.current.map((j) => j.id).sort()).toEqual([
"uk-onsite",
"us-remote",
]);
expect(result.current.map((j) => j.id)).toEqual(["uk-onsite"]);
});
it("excludes by country but keeps remote listings", () => {
it("hides jobs outside search geography country from settings", () => {
const jobs: Job[] = [
{ ...baseJob, id: "ca", location: "Toronto, ON, Canada" },
{ ...baseJob, id: "in", location: "Bangalore, India", isRemote: true },
{ ...baseJob, id: "vague", location: "Remote", isRemote: true },
];
const { result } = renderHook(() =>
useFilteredJobs(
jobs,
"all",
[],
[],
[],
[],
"all",
"all",
{ mode: "at_least", min: null, max: null },
{ key: "score", direction: "desc" },
{
foundAfterYmd: null,
foundBeforeYmd: null,
employerInclude: [],
employerExclude: [],
settingsBlockedEmployerKeywords: [],
searchGeographyCountryKey: "canada",
},
),
);
expect(result.current.map((j) => j.id)).toEqual(["ca"]);
});
it("excludes by country including remote jobs in that country", () => {
const jobs: Job[] = [
{
...baseJob,
@ -335,10 +364,7 @@ describe("useFilteredJobs", () => {
{ key: "score", direction: "desc" },
),
);
expect(result.current.map((j) => j.id).sort()).toEqual([
"uk-remote",
"us-onsite",
]);
expect(result.current.map((j) => j.id)).toEqual(["us-onsite"]);
});
it("excludes sources", () => {

View File

@ -1,5 +1,5 @@
import { jobMatchesAllowedCountry } from "@shared/blocked-countries";
import { textMatchesKeyword } from "@shared/keyword-match";
import { inferCountryKeysFromJobLocation } from "@shared/search-cities";
import type { JobListItem, JobSource } from "@shared/types";
import { useMemo } from "react";
import type {
@ -17,6 +17,8 @@ export type JobListFilterExtras = {
employerInclude: string[];
employerExclude: string[];
settingsBlockedEmployerKeywords: string[];
/** When settings search geography is a country (e.g. Canada), hide other countries. */
searchGeographyCountryKey?: string | null;
};
const startOfLocalDayMs = (ymd: string): number =>
@ -61,6 +63,7 @@ export const useFilteredJobs = (
employerInclude: [],
employerExclude: [],
settingsBlockedEmployerKeywords: [],
searchGeographyCountryKey: null,
},
) =>
useMemo(() => {
@ -103,22 +106,37 @@ export const useFilteredJobs = (
filtered = filtered.filter((job) => !deny.has(job.source));
}
const searchCountryKey = listExtras.searchGeographyCountryKey;
if (searchCountryKey) {
filtered = filtered.filter((job) =>
jobMatchesAllowedCountry(
{ location: job.location, title: job.title },
searchCountryKey,
),
);
}
if (countriesFilter.length > 0) {
const allowCountries = new Set(countriesFilter);
filtered = filtered.filter((job) => {
if (job.isRemote === true) return true;
const jobCountries = inferCountryKeysFromJobLocation(job.location);
return jobCountries.some((key) => allowCountries.has(key));
});
filtered = filtered.filter((job) =>
countriesFilter.some((countryKey) =>
jobMatchesAllowedCountry(
{ location: job.location, title: job.title },
countryKey,
),
),
);
}
if (countriesExcludeFilter.length > 0) {
const denyCountries = new Set(countriesExcludeFilter);
filtered = filtered.filter((job) => {
if (job.isRemote === true) return true;
const jobCountries = inferCountryKeysFromJobLocation(job.location);
return !jobCountries.some((key) => denyCountries.has(key));
});
filtered = filtered.filter(
(job) =>
!countriesExcludeFilter.some((countryKey) =>
jobMatchesAllowedCountry(
{ location: job.location, title: job.title },
countryKey,
),
),
);
}
if (sponsorFilter !== "all") {

View File

@ -371,6 +371,7 @@ describe("discoverJobsStep", () => {
vi.mocked(settingsRepo.getAllSettings).mockResolvedValue({
searchTerms: JSON.stringify(["sdet"]),
searchCities: "Canada",
blockedCountries: JSON.stringify(["india"]),
} as any);
@ -387,11 +388,8 @@ describe("discoverJobsStep", () => {
},
});
expect(result.discoveredJobs).toHaveLength(2);
expect(result.discoveredJobs.map((job) => job.jobUrl)).toEqual([
"https://example.com/job-ca",
"https://example.com/job-remote",
]);
expect(result.discoveredJobs).toHaveLength(1);
expect(result.discoveredJobs[0]?.jobUrl).toBe("https://example.com/job-ca");
});
it("drops co-op titles via company skip list and coop deal-breaker token", async () => {

View File

@ -7,18 +7,19 @@ import { getProfileById } from "@server/repositories/profiles";
import * as settingsRepo from "@server/repositories/settings";
import { asyncPool } from "@server/utils/async-pool";
import {
jobMatchesAllowedCountry,
jobMatchesBlockedCountries,
resolveBlockedCountriesFromStoredString,
} from "@shared/blocked-countries.js";
import {
textMatchesAnyKeyword,
textMatchesKeyword,
} from "@shared/keyword-match.js";
import {
formatCountryLabel,
isSourceAllowedForCountry,
normalizeCountryKey,
} from "@shared/location-support.js";
import {
textMatchesAnyKeyword,
textMatchesKeyword,
} from "@shared/keyword-match.js";
import { resolveBlockedCompanyKeywordsFromStoredString } from "@shared/resolve-blocked-company-keywords.js";
import {
inferCountryKeyFromSearchGeography,
@ -584,8 +585,32 @@ export async function discoverJobsStep(args: {
});
}
let allowedCountryFilteredJobs = filteredDiscoveredJobs;
if (geographyCountryKey) {
allowedCountryFilteredJobs = filteredDiscoveredJobs.filter((job) =>
jobMatchesAllowedCountry(
{
location: job.location,
jobDescription: job.jobDescription,
title: job.title,
},
geographyCountryKey,
),
);
const allowedDroppedCount =
filteredDiscoveredJobs.length - allowedCountryFilteredJobs.length;
if (allowedDroppedCount > 0) {
logger.info("Dropped discovered jobs outside selected search country", {
step: "discover-jobs",
droppedCount: allowedDroppedCount,
allowedCountry: geographyCountryKey,
allowedCountryLabel: formatCountryLabel(geographyCountryKey),
});
}
}
if (args.shouldCancel?.()) {
return { discoveredJobs: filteredDiscoveredJobs, sourceErrors };
return { discoveredJobs: allowedCountryFilteredJobs, sourceErrors };
}
const strictProfileFilteringEnabled =
@ -594,14 +619,14 @@ export async function discoverJobsStep(args: {
searchProfileDealBreakers.length > 0;
const profileFiltered = strictProfileFilteringEnabled
? filterJobsBySearchProfile({
jobs: filteredDiscoveredJobs,
jobs: allowedCountryFilteredJobs,
targetRolePhrases: searchProfileTargetRoles.length
? searchProfileTargetRoles
: searchTerms,
mustHaveSkills: searchProfileMustHaveSkills,
dealBreakers: searchProfileDealBreakers,
})
: { jobs: filteredDiscoveredJobs, dropped: 0 };
: { jobs: allowedCountryFilteredJobs, dropped: 0 };
if (profileFiltered.dropped > 0) {
logger.info("Dropped discovered jobs that didn't match search profile", {

View File

@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest";
import {
inferCountryKeysFromJobText,
isVagueJobLocation,
jobMatchesAllowedCountry,
jobMatchesBlockedCountries,
normalizeBlockedCountryTokens,
resolveBlockedCountriesFromStoredString,
@ -73,4 +74,34 @@ describe("blocked-countries", () => {
),
).toContain("india");
});
it("allows only jobs clearly in the selected search country", () => {
expect(
jobMatchesAllowedCountry("Toronto, ON, Canada", "canada"),
).toBe(true);
expect(jobMatchesAllowedCountry("Vancouver, BC", "canada")).toBe(true);
expect(
jobMatchesAllowedCountry("Bangalore, Karnataka, India", "canada"),
).toBe(false);
expect(jobMatchesAllowedCountry("Remote", "canada")).toBe(false);
expect(jobMatchesAllowedCountry("Worldwide", "canada")).toBe(false);
expect(
jobMatchesAllowedCountry(
{
location: "Remote",
jobDescription: "Hiring in India only.",
},
"canada",
),
).toBe(false);
expect(
jobMatchesAllowedCountry(
{
location: "Remote",
jobDescription: "Remote across Canada.",
},
"canada",
),
).toBe(true);
});
});

View File

@ -64,7 +64,9 @@ export function normalizeBlockedCountryTokens(tokens: string[]): string[] {
return [...keys];
}
export function isVagueJobLocation(location: string | null | undefined): boolean {
export function isVagueJobLocation(
location: string | null | undefined,
): boolean {
if (!location?.trim()) return true;
const normalized = location.trim().toLowerCase();
if (VAGUE_LOCATION_VALUES.has(normalized)) return true;
@ -92,12 +94,35 @@ export function inferCountryKeysFromJobText(
return [...keys];
}
function collectJobCountryKeys(signals: JobBlockedCountrySignals): string[] {
const CANADA_LOCATION_HINT_RE =
/\b(ontario|quebec|british columbia|alberta|manitoba|saskatchewan|nova scotia|new brunswick|newfoundland|prince edward island|northwest territories|yukon|nunavut|toronto|vancouver|montreal|ottawa|calgary|edmonton|winnipeg|mississauga|halifax|victoria|saskatoon|regina|hamilton|kitchener)\b/i;
const CANADA_PROVINCE_ABBREV_RE =
/(?:^|[,\s(])(ON|BC|AB|QC|MB|SK|NS|NB|NL|PE|YT|NT|NU)(?:[,\s).]|$)/;
function locationSuggestsCanada(location: string | null | undefined): boolean {
if (!location?.trim()) return false;
return (
CANADA_LOCATION_HINT_RE.test(location) ||
CANADA_PROVINCE_ABBREV_RE.test(location)
);
}
function collectJobCountryKeys(
signals: JobBlockedCountrySignals,
options?: { alwaysScanText?: boolean },
): string[] {
const keys = new Set<string>();
for (const key of inferCountryKeysFromJobLocation(signals.location)) {
keys.add(key);
}
if (isVagueJobLocation(signals.location)) {
if (locationSuggestsCanada(signals.location)) {
keys.add("canada");
}
const scanText =
options?.alwaysScanText === true || isVagueJobLocation(signals.location);
if (scanText) {
const blob = [signals.title, signals.jobDescription]
.filter(Boolean)
.join("\n");
@ -108,6 +133,35 @@ function collectJobCountryKeys(signals: JobBlockedCountrySignals): string[] {
return [...keys];
}
function substantiveCountryKeys(keys: readonly string[]): string[] {
return keys.filter((key) => !VAGUE_COUNTRY_KEYS.has(key));
}
/**
* When search geography is a single country (e.g. Canada), only keep jobs that
* clearly hire in that country. Vague "Remote"/"Worldwide" with no allowed-country
* signal is rejected; any other country mention is rejected.
*/
export function jobMatchesAllowedCountry(
locationOrSignals: string | null | undefined | JobBlockedCountrySignals,
allowedCountryKey: string,
): boolean {
const allowed = normalizeCountryKey(allowedCountryKey);
if (!supportedCountryKeySet.has(allowed)) return true;
const signals: JobBlockedCountrySignals =
typeof locationOrSignals === "object" && locationOrSignals !== null
? locationOrSignals
: { location: locationOrSignals };
const substantive = substantiveCountryKeys(
collectJobCountryKeys(signals, { alwaysScanText: true }),
);
if (substantive.length === 0) return false;
if (!substantive.includes(allowed)) return false;
return substantive.every((key) => key === allowed);
}
/**
* True when the job mentions a blocked country in location and/or (when location
* is vague) title/description. Unknown location with no country in text is kept.