fix(filters): match co-op/intern skip tokens in titles with hyphen normalization
Some checks failed
CI / Linting (Biome) (push) Failing after 40s
CI / Tests (push) Successful in 5m12s
CI / Type Check (adzuna-extractor) (push) Successful in 1m8s
CI / Type Check (gradcracker-extractor) (push) Successful in 1m13s
CI / Type Check (hiringcafe-extractor) (push) Successful in 1m9s
CI / Type Check (orchestrator) (push) Failing after 1m17s
CI / Type Check (startupjobs-extractor) (push) Successful in 1m10s
CI / Type Check (ukvisajobs-extractor) (push) Successful in 1m12s
CI / Documentation (push) Successful in 2m0s

Company skip list and profile deal-breakers now ignore hyphens when matching,
apply skip keywords to job titles as well as employers, and short-circuit
scoring when a deal-breaker hits so co-op posts cannot score 80 via the LLM.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
ilia 2026-05-16 17:31:59 -04:00
parent 03d293699a
commit 0de7f90278
10 changed files with 243 additions and 30 deletions

View File

@ -454,6 +454,37 @@ describe("useFilteredJobs", () => {
); );
expect(settingsSkip.current.map((j) => j.id)).toEqual(["good"]); expect(settingsSkip.current.map((j) => j.id)).toEqual(["good"]);
const { result: coOpTitle } = renderHook(() =>
useFilteredJobs(
[
{
...baseJob,
id: "coop",
employer: "Global Relay Communications Inc.",
title: "Co-op Software Development Engineer in Test (SDET)",
},
{ ...baseJob, id: "good", employer: "Contoso", title: "SDET" },
],
"all",
[],
[],
[],
[],
"all",
"all",
{ mode: "at_least", min: null, max: null },
{ key: "score", direction: "desc" },
{
foundAfterYmd: null,
foundBeforeYmd: null,
employerInclude: [],
employerExclude: [],
settingsBlockedEmployerKeywords: ["co-op"],
},
),
);
expect(coOpTitle.current.map((j) => j.id)).toEqual(["good"]);
const { result: includeOnly } = renderHook(() => const { result: includeOnly } = renderHook(() =>
useFilteredJobs( useFilteredJobs(
jobs, jobs,

View File

@ -1,3 +1,4 @@
import { textMatchesKeyword } from "@shared/keyword-match";
import { inferCountryKeysFromJobLocation } from "@shared/search-cities"; import { inferCountryKeysFromJobLocation } from "@shared/search-cities";
import type { JobListItem, JobSource } from "@shared/types"; import type { JobListItem, JobSource } from "@shared/types";
import { useMemo } from "react"; import { useMemo } from "react";
@ -24,13 +25,16 @@ const startOfLocalDayMs = (ymd: string): number =>
const endOfLocalDayMs = (ymd: string): number => const endOfLocalDayMs = (ymd: string): number =>
new Date(`${ymd}T23:59:59.999`).getTime(); new Date(`${ymd}T23:59:59.999`).getTime();
function employerMatchesAnyKeyword( function jobMatchesAnyBlockedKeyword(
employer: string, job: Pick<JobListItem, "employer" | "title">,
keywordsLower: string[], keywords: string[],
): boolean { ): boolean {
if (keywordsLower.length === 0) return false; if (keywords.length === 0) return false;
const normalized = employer.toLowerCase(); return keywords.some(
return keywordsLower.some((keyword) => normalized.includes(keyword)); (keyword) =>
textMatchesKeyword(job.employer, keyword) ||
textMatchesKeyword(job.title, keyword),
);
} }
const getSponsorCategory = (score: number | null): SponsorFilter => { const getSponsorCategory = (score: number | null): SponsorFilter => {
@ -63,12 +67,10 @@ export const useFilteredJobs = (
const employerIncludeLower = listExtras.employerInclude.map((value) => const employerIncludeLower = listExtras.employerInclude.map((value) =>
value.toLowerCase(), value.toLowerCase(),
); );
const employerExcludeLower = [ const employerExcludeLower = listExtras.employerExclude.map((value) =>
...listExtras.settingsBlockedEmployerKeywords.map((value) => value.toLowerCase(),
value.toLowerCase(), );
), const settingsBlockedKeywords = listExtras.settingsBlockedEmployerKeywords;
...listExtras.employerExclude.map((value) => value.toLowerCase()),
];
const uniqueEmployerExcludeLower = [...new Set(employerExcludeLower)]; const uniqueEmployerExcludeLower = [...new Set(employerExcludeLower)];
let filtered = [...jobs]; let filtered = [...jobs];
@ -164,14 +166,24 @@ export const useFilteredJobs = (
if (employerIncludeLower.length > 0) { if (employerIncludeLower.length > 0) {
filtered = filtered.filter((job) => filtered = filtered.filter((job) =>
employerMatchesAnyKeyword(job.employer, employerIncludeLower), employerIncludeLower.some((keyword) =>
job.employer.toLowerCase().includes(keyword),
),
);
}
if (settingsBlockedKeywords.length > 0) {
filtered = filtered.filter(
(job) => !jobMatchesAnyBlockedKeyword(job, settingsBlockedKeywords),
); );
} }
if (uniqueEmployerExcludeLower.length > 0) { if (uniqueEmployerExcludeLower.length > 0) {
filtered = filtered.filter( filtered = filtered.filter(
(job) => (job) =>
!employerMatchesAnyKeyword(job.employer, uniqueEmployerExcludeLower), !uniqueEmployerExcludeLower.some((keyword) =>
job.employer.toLowerCase().includes(keyword),
),
); );
} }

View File

@ -235,7 +235,7 @@ export const ScoringSettingsSection: React.FC<ScoringSettingsSectionProps> = ({
setValue("blockedCompanyKeywords", value, { shouldDirty: true }) setValue("blockedCompanyKeywords", value, { shouldDirty: true })
} }
placeholder='e.g. "recruitment", "staffing"' placeholder='e.g. "recruitment", "staffing"'
helperText="Maintained here and saved with Settings. Each token is a case-insensitive substring match on the employer name. Matching jobs are dropped during discovery (not removed from the database if already imported). See docs: /docs/features/company-skip-list" helperText="Maintained here and saved with Settings. Each token matches employer or job title (hyphens ignored, so co-op matches coop). Matching jobs are dropped during discovery. See docs: /docs/features/company-skip-list"
removeLabelPrefix="Remove blocked keyword" removeLabelPrefix="Remove blocked keyword"
disabled={isLoading || isSaving} disabled={isLoading || isSaving}
/> />

View File

@ -394,6 +394,58 @@ describe("discoverJobsStep", () => {
]); ]);
}); });
it("drops co-op titles via company skip list and coop deal-breaker token", async () => {
const settingsRepo = await import("@server/repositories/settings");
const registryModule = await import("@server/extractors/registry");
const bcManifest = {
id: "bctenet",
displayName: "BC T-Net",
providesSources: ["bctenet"],
run: vi.fn().mockResolvedValue({
success: true,
jobs: [
{
source: "bctenet",
title: "Co-op Software Development Engineer in Test (SDET)",
employer: "Global Relay Communications Inc.",
location: "British Columbia, Canada",
jobUrl: "https://example.com/job-coop",
},
{
source: "bctenet",
title: "SDET",
employer: "Contoso",
location: "Vancouver, BC, Canada",
jobUrl: "https://example.com/job-sdet",
},
],
}),
};
vi.mocked(settingsRepo.getAllSettings).mockResolvedValue({
searchTerms: JSON.stringify(["sdet"]),
searchCities: "Canada",
blockedCompanyKeywords: JSON.stringify(["co-op"]),
} as any);
vi.mocked(registryModule.getExtractorRegistry).mockResolvedValue({
manifests: new Map([["bctenet", bcManifest as any]]),
manifestBySource: new Map([["bctenet", bcManifest as any]]),
availableSources: ["bctenet"],
} as any);
const result = await discoverJobsStep({
mergedConfig: {
...baseConfig,
sources: ["bctenet"],
},
});
expect(result.discoveredJobs).toHaveLength(1);
expect(result.discoveredJobs[0]?.jobUrl).toBe("https://example.com/job-sdet");
});
it("drops jobs with blocked country in description when location is worldwide", async () => { it("drops jobs with blocked country in description when location is worldwide", async () => {
const settingsRepo = await import("@server/repositories/settings"); const settingsRepo = await import("@server/repositories/settings");
const registryModule = await import("@server/extractors/registry"); const registryModule = await import("@server/extractors/registry");

View File

@ -15,6 +15,7 @@ import {
isSourceAllowedForCountry, isSourceAllowedForCountry,
normalizeCountryKey, normalizeCountryKey,
} from "@shared/location-support.js"; } from "@shared/location-support.js";
import { textMatchesAnyKeyword } from "@shared/keyword-match.js";
import { resolveBlockedCompanyKeywordsFromStoredString } from "@shared/resolve-blocked-company-keywords.js"; import { resolveBlockedCompanyKeywordsFromStoredString } from "@shared/resolve-blocked-company-keywords.js";
import { import {
inferCountryKeyFromSearchGeography, inferCountryKeyFromSearchGeography,
@ -40,15 +41,14 @@ type DiscoverySourceTask = {
run: () => Promise<DiscoveryTaskResult>; run: () => Promise<DiscoveryTaskResult>;
}; };
function isBlockedEmployer( function jobMatchesBlockedCompanyKeywords(
employer: string | null | undefined, job: CreateJobInput,
blockedKeywordsLowerCase: string[], blockedKeywords: string[],
): boolean { ): boolean {
if (!employer) return false; if (blockedKeywords.length === 0) return false;
if (blockedKeywordsLowerCase.length === 0) return false; return (
const normalizedEmployer = employer.toLowerCase(); textMatchesAnyKeyword(job.employer, blockedKeywords) ||
return blockedKeywordsLowerCase.some((keyword) => textMatchesAnyKeyword(job.title, blockedKeywords)
normalizedEmployer.includes(keyword),
); );
} }
@ -156,7 +156,6 @@ function filterJobsBySearchProfile(args: {
const roleMatchers = buildRoleMatchers(targetRolePhrases); const roleMatchers = buildRoleMatchers(targetRolePhrases);
const mustHaveLower = mustHaveSkills.map(normalizeText).filter(Boolean); const mustHaveLower = mustHaveSkills.map(normalizeText).filter(Boolean);
const dealBreakersLower = dealBreakers.map(normalizeText).filter(Boolean);
const filtered = jobs.filter((job) => { const filtered = jobs.filter((job) => {
const title = normalizeText(job.title); const title = normalizeText(job.title);
@ -164,8 +163,8 @@ function filterJobsBySearchProfile(args: {
const haystack = `${title}\n${body}`; const haystack = `${title}\n${body}`;
if ( if (
dealBreakersLower.length > 0 && dealBreakers.length > 0 &&
matchesAny(haystack, dealBreakersLower) dealBreakers.some((breaker) => textMatchesAnyKeyword(haystack, breaker))
) { ) {
return false; return false;
} }
@ -521,11 +520,8 @@ export async function discoverJobsStep(args: {
const blockedCompanyKeywords = resolveBlockedCompanyKeywordsFromStoredString( const blockedCompanyKeywords = resolveBlockedCompanyKeywordsFromStoredString(
settings.blockedCompanyKeywords, settings.blockedCompanyKeywords,
); );
const blockedKeywordsLowerCase = blockedCompanyKeywords.map((value) =>
value.toLowerCase(),
);
const afterCompanyFilter = cityFilteredJobs.filter( const afterCompanyFilter = cityFilteredJobs.filter(
(job) => !isBlockedEmployer(job.employer, blockedKeywordsLowerCase), (job) => !jobMatchesBlockedCompanyKeywords(job, blockedCompanyKeywords),
); );
const companyDroppedCount = const companyDroppedCount =
cityFilteredJobs.length - afterCompanyFilter.length; cityFilteredJobs.length - afterCompanyFilter.length;

View File

@ -284,6 +284,49 @@ describe("salary penalty", () => {
vi.restoreAllMocks(); vi.restoreAllMocks();
}); });
describe("profile deal-breakers", () => {
it("short-circuits scoring when co-op matches coop deal-breaker token", async () => {
const { scoreJobSuitability } = await import("./scorer");
const { LlmService } = await import("./llm/service");
const callJson = vi.spyOn(LlmService.prototype, "callJson");
getEffectiveSettingsMock.mockResolvedValue({
jobSearchProfile: {
value: {
targetRoles: ["SDET"],
experienceLevel: "Senior",
mustHaveSkills: [],
niceToHaveSkills: [],
dealBreakers: ["coop"],
preferredWorkArrangement: ["remote"],
preferredLocations: ["Canada"],
minimumSalary: "",
industriesToTarget: [],
industriesToAvoid: [],
aboutMe: "",
},
default: null,
override: null,
},
penalizeMissingSalary: { value: false, default: false, override: null },
missingSalaryPenalty: { value: 10, default: 10, override: null },
scoringInstructions: { value: "", default: "", override: null },
rxresumeBaseResumeId: "base-resume-123",
} as any);
const job = createJob({
title: "Co-op Software Development Engineer in Test (SDET)",
employer: "Global Relay Communications Inc.",
});
const result = await scoreJobSuitability(job, {});
expect(callJson).not.toHaveBeenCalled();
expect(result.score).toBeLessThanOrEqual(15);
expect(result.analysis?.dealBreakerHits?.length).toBeGreaterThan(0);
});
});
describe("isSalaryMissing detection", () => { describe("isSalaryMissing detection", () => {
it("should detect null salary as missing", async () => { it("should detect null salary as missing", async () => {
const { scoreJobSuitability } = await import("./scorer"); const { scoreJobSuitability } = await import("./scorer");

View File

@ -3,6 +3,7 @@
*/ */
import { logger } from "@infra/logger"; import { logger } from "@infra/logger";
import { textMatchesKeyword } from "@shared/keyword-match";
import type { Job, JobSearchProfile, SuitabilityAnalysis } from "@shared/types"; import type { Job, JobSearchProfile, SuitabilityAnalysis } from "@shared/types";
import { jobLikelyRequiresOfficePresence } from "@shared/work-arrangement"; import { jobLikelyRequiresOfficePresence } from "@shared/work-arrangement";
import { LlmService } from "./llm/service"; import { LlmService } from "./llm/service";
@ -177,6 +178,30 @@ function extractAnalysis(data: ScoringLlmResponse): SuitabilityAnalysis | null {
* Score a job's suitability based on profile and job description. * Score a job's suitability based on profile and job description.
* Includes retry logic for when AI returns garbage responses. * Includes retry logic for when AI returns garbage responses.
*/ */
function scoreForProfileDealBreakers(
job: Job,
jobSearchProfile: JobSearchProfile,
): SuitabilityResult | null {
if (jobSearchProfile.dealBreakers.length === 0) return null;
const haystack = `${job.title}\n${job.jobDescription ?? ""}`;
const hit = jobSearchProfile.dealBreakers.find((breaker) =>
textMatchesKeyword(haystack, breaker),
);
if (!hit) return null;
return {
score: 10,
reason: `Deal-breaker "${hit}" matched in the job title or description.`,
analysis: {
roleTypeMatch: 15,
workArrangementMatch: undefined,
strengths: [],
gaps: [`Posting matches profile deal-breaker: ${hit}`],
suggestions: ["Skip co-op, intern, and other excluded role types"],
dealBreakerHits: [`Profile deal-breaker: ${hit}`],
},
};
}
export async function scoreJobSuitability( export async function scoreJobSuitability(
job: Job, job: Job,
profile: Record<string, unknown>, profile: Record<string, unknown>,
@ -189,6 +214,11 @@ export async function scoreJobSuitability(
const jobSearchProfile = settings.jobSearchProfile?.value ?? null; const jobSearchProfile = settings.jobSearchProfile?.value ?? null;
const hasProfile = jobSearchProfile && hasNonEmptyProfile(jobSearchProfile); const hasProfile = jobSearchProfile && hasNonEmptyProfile(jobSearchProfile);
if (hasProfile && jobSearchProfile) {
const dealBreakerScore = scoreForProfileDealBreakers(job, jobSearchProfile);
if (dealBreakerScore) return dealBreakerScore;
}
const prompt = buildScoringPrompt(job, sanitizeProfileForPrompt(profile), { const prompt = buildScoringPrompt(job, sanitizeProfileForPrompt(profile), {
instructions: settings.scoringInstructions?.value ?? "", instructions: settings.scoringInstructions?.value ?? "",
jobSearchProfile: hasProfile ? jobSearchProfile : null, jobSearchProfile: hasProfile ? jobSearchProfile : null,

View File

@ -3,5 +3,6 @@ export * from "./job-fingerprint";
export * from "./job-url-canonical"; export * from "./job-url-canonical";
export * from "./location-support"; export * from "./location-support";
export * from "./work-arrangement"; export * from "./work-arrangement";
export * from "./keyword-match";
export * from "./types"; export * from "./types";
export * from "./utils/type-conversion"; export * from "./utils/type-conversion";

View File

@ -0,0 +1,21 @@
import { describe, expect, it } from "vitest";
import { textMatchesAnyKeyword, textMatchesKeyword } from "./keyword-match.js";
describe("keyword-match", () => {
it("matches co-op title against coop deal-breaker token", () => {
expect(
textMatchesKeyword(
"Co-op Software Development Engineer in Test (SDET)",
"coop",
),
).toBe(true);
expect(textMatchesKeyword("Co-op SDET", "co-op")).toBe(true);
});
it("matches intern in employer or title", () => {
expect(textMatchesAnyKeyword("Acme Intern Program", ["intern"])).toBe(true);
expect(
textMatchesAnyKeyword("Global Relay Communications Inc.", ["intern"]),
).toBe(false);
});
});

View File

@ -0,0 +1,27 @@
/**
* Case-insensitive substring matching that ignores spaces, hyphens, and underscores
* so "co-op", "co op", and "coop" all match the same token.
*/
export function normalizeForKeywordMatch(value: string): string {
return value
.toLowerCase()
.replace(/[\s\-_]+/g, "")
.trim();
}
export function textMatchesKeyword(
haystack: string | null | undefined,
keyword: string,
): boolean {
const normalizedHaystack = normalizeForKeywordMatch(haystack ?? "");
const normalizedKeyword = normalizeForKeywordMatch(keyword);
if (!normalizedKeyword) return false;
return normalizedHaystack.includes(normalizedKeyword);
}
export function textMatchesAnyKeyword(
haystack: string | null | undefined,
keywords: readonly string[],
): boolean {
return keywords.some((keyword) => textMatchesKeyword(haystack, keyword));
}