From 0de7f90278caeee2da233631f55ad8912b206ef7 Mon Sep 17 00:00:00 2001 From: ilia Date: Sat, 16 May 2026 17:31:59 -0400 Subject: [PATCH] fix(filters): match co-op/intern skip tokens in titles with hyphen normalization Company skip list and profile deal-breakers now ignore hyphens when matching, apply skip keywords to job titles as well as employers, and short-circuit scoring when a deal-breaker hits so co-op posts cannot score 80 via the LLM. Co-authored-by: Cursor --- .../orchestrator/useFilteredJobs.test.ts | 31 +++++++++++ .../pages/orchestrator/useFilteredJobs.ts | 40 +++++++++----- .../components/ScoringSettingsSection.tsx | 2 +- .../pipeline/steps/discover-jobs.test.ts | 52 +++++++++++++++++++ .../server/pipeline/steps/discover-jobs.ts | 26 ++++------ .../src/server/services/scorer.test.ts | 43 +++++++++++++++ orchestrator/src/server/services/scorer.ts | 30 +++++++++++ shared/src/index.ts | 1 + shared/src/keyword-match.test.ts | 21 ++++++++ shared/src/keyword-match.ts | 27 ++++++++++ 10 files changed, 243 insertions(+), 30 deletions(-) create mode 100644 shared/src/keyword-match.test.ts create mode 100644 shared/src/keyword-match.ts diff --git a/orchestrator/src/client/pages/orchestrator/useFilteredJobs.test.ts b/orchestrator/src/client/pages/orchestrator/useFilteredJobs.test.ts index d5acb05..7df6807 100644 --- a/orchestrator/src/client/pages/orchestrator/useFilteredJobs.test.ts +++ b/orchestrator/src/client/pages/orchestrator/useFilteredJobs.test.ts @@ -454,6 +454,37 @@ describe("useFilteredJobs", () => { ); expect(settingsSkip.current.map((j) => j.id)).toEqual(["good"]); + const { result: coOpTitle } = renderHook(() => + useFilteredJobs( + [ + { + ...baseJob, + id: "coop", + employer: "Global Relay Communications Inc.", + title: "Co-op Software Development Engineer in Test (SDET)", + }, + { ...baseJob, id: "good", employer: "Contoso", title: "SDET" }, + ], + "all", + [], + [], + [], + [], + "all", + "all", + { mode: "at_least", min: null, max: null }, + { key: "score", direction: "desc" }, + { + foundAfterYmd: null, + foundBeforeYmd: null, + employerInclude: [], + employerExclude: [], + settingsBlockedEmployerKeywords: ["co-op"], + }, + ), + ); + expect(coOpTitle.current.map((j) => j.id)).toEqual(["good"]); + const { result: includeOnly } = renderHook(() => useFilteredJobs( jobs, diff --git a/orchestrator/src/client/pages/orchestrator/useFilteredJobs.ts b/orchestrator/src/client/pages/orchestrator/useFilteredJobs.ts index fa5dd86..38a630d 100644 --- a/orchestrator/src/client/pages/orchestrator/useFilteredJobs.ts +++ b/orchestrator/src/client/pages/orchestrator/useFilteredJobs.ts @@ -1,3 +1,4 @@ +import { textMatchesKeyword } from "@shared/keyword-match"; import { inferCountryKeysFromJobLocation } from "@shared/search-cities"; import type { JobListItem, JobSource } from "@shared/types"; import { useMemo } from "react"; @@ -24,13 +25,16 @@ const startOfLocalDayMs = (ymd: string): number => const endOfLocalDayMs = (ymd: string): number => new Date(`${ymd}T23:59:59.999`).getTime(); -function employerMatchesAnyKeyword( - employer: string, - keywordsLower: string[], +function jobMatchesAnyBlockedKeyword( + job: Pick, + keywords: string[], ): boolean { - if (keywordsLower.length === 0) return false; - const normalized = employer.toLowerCase(); - return keywordsLower.some((keyword) => normalized.includes(keyword)); + if (keywords.length === 0) return false; + return keywords.some( + (keyword) => + textMatchesKeyword(job.employer, keyword) || + textMatchesKeyword(job.title, keyword), + ); } const getSponsorCategory = (score: number | null): SponsorFilter => { @@ -63,12 +67,10 @@ export const useFilteredJobs = ( const employerIncludeLower = listExtras.employerInclude.map((value) => value.toLowerCase(), ); - const employerExcludeLower = [ - ...listExtras.settingsBlockedEmployerKeywords.map((value) => - value.toLowerCase(), - ), - ...listExtras.employerExclude.map((value) => value.toLowerCase()), - ]; + const employerExcludeLower = listExtras.employerExclude.map((value) => + value.toLowerCase(), + ); + const settingsBlockedKeywords = listExtras.settingsBlockedEmployerKeywords; const uniqueEmployerExcludeLower = [...new Set(employerExcludeLower)]; let filtered = [...jobs]; @@ -164,14 +166,24 @@ export const useFilteredJobs = ( if (employerIncludeLower.length > 0) { filtered = filtered.filter((job) => - employerMatchesAnyKeyword(job.employer, employerIncludeLower), + employerIncludeLower.some((keyword) => + job.employer.toLowerCase().includes(keyword), + ), + ); + } + + if (settingsBlockedKeywords.length > 0) { + filtered = filtered.filter( + (job) => !jobMatchesAnyBlockedKeyword(job, settingsBlockedKeywords), ); } if (uniqueEmployerExcludeLower.length > 0) { filtered = filtered.filter( (job) => - !employerMatchesAnyKeyword(job.employer, uniqueEmployerExcludeLower), + !uniqueEmployerExcludeLower.some((keyword) => + job.employer.toLowerCase().includes(keyword), + ), ); } diff --git a/orchestrator/src/client/pages/settings/components/ScoringSettingsSection.tsx b/orchestrator/src/client/pages/settings/components/ScoringSettingsSection.tsx index ae4baa4..12e124b 100644 --- a/orchestrator/src/client/pages/settings/components/ScoringSettingsSection.tsx +++ b/orchestrator/src/client/pages/settings/components/ScoringSettingsSection.tsx @@ -235,7 +235,7 @@ export const ScoringSettingsSection: React.FC = ({ setValue("blockedCompanyKeywords", value, { shouldDirty: true }) } placeholder='e.g. "recruitment", "staffing"' - helperText="Maintained here and saved with Settings. Each token is a case-insensitive substring match on the employer name. Matching jobs are dropped during discovery (not removed from the database if already imported). See docs: /docs/features/company-skip-list" + helperText="Maintained here and saved with Settings. Each token matches employer or job title (hyphens ignored, so co-op matches coop). Matching jobs are dropped during discovery. See docs: /docs/features/company-skip-list" removeLabelPrefix="Remove blocked keyword" disabled={isLoading || isSaving} /> diff --git a/orchestrator/src/server/pipeline/steps/discover-jobs.test.ts b/orchestrator/src/server/pipeline/steps/discover-jobs.test.ts index 1388cfb..023803d 100644 --- a/orchestrator/src/server/pipeline/steps/discover-jobs.test.ts +++ b/orchestrator/src/server/pipeline/steps/discover-jobs.test.ts @@ -394,6 +394,58 @@ describe("discoverJobsStep", () => { ]); }); + it("drops co-op titles via company skip list and coop deal-breaker token", async () => { + const settingsRepo = await import("@server/repositories/settings"); + const registryModule = await import("@server/extractors/registry"); + + const bcManifest = { + id: "bctenet", + displayName: "BC T-Net", + providesSources: ["bctenet"], + run: vi.fn().mockResolvedValue({ + success: true, + jobs: [ + { + source: "bctenet", + title: "Co-op Software Development Engineer in Test (SDET)", + employer: "Global Relay Communications Inc.", + location: "British Columbia, Canada", + jobUrl: "https://example.com/job-coop", + }, + { + source: "bctenet", + title: "SDET", + employer: "Contoso", + location: "Vancouver, BC, Canada", + jobUrl: "https://example.com/job-sdet", + }, + ], + }), + }; + + vi.mocked(settingsRepo.getAllSettings).mockResolvedValue({ + searchTerms: JSON.stringify(["sdet"]), + searchCities: "Canada", + blockedCompanyKeywords: JSON.stringify(["co-op"]), + } as any); + + vi.mocked(registryModule.getExtractorRegistry).mockResolvedValue({ + manifests: new Map([["bctenet", bcManifest as any]]), + manifestBySource: new Map([["bctenet", bcManifest as any]]), + availableSources: ["bctenet"], + } as any); + + const result = await discoverJobsStep({ + mergedConfig: { + ...baseConfig, + sources: ["bctenet"], + }, + }); + + expect(result.discoveredJobs).toHaveLength(1); + expect(result.discoveredJobs[0]?.jobUrl).toBe("https://example.com/job-sdet"); + }); + it("drops jobs with blocked country in description when location is worldwide", async () => { const settingsRepo = await import("@server/repositories/settings"); const registryModule = await import("@server/extractors/registry"); diff --git a/orchestrator/src/server/pipeline/steps/discover-jobs.ts b/orchestrator/src/server/pipeline/steps/discover-jobs.ts index e3c506c..a29d5b8 100644 --- a/orchestrator/src/server/pipeline/steps/discover-jobs.ts +++ b/orchestrator/src/server/pipeline/steps/discover-jobs.ts @@ -15,6 +15,7 @@ import { isSourceAllowedForCountry, normalizeCountryKey, } from "@shared/location-support.js"; +import { textMatchesAnyKeyword } from "@shared/keyword-match.js"; import { resolveBlockedCompanyKeywordsFromStoredString } from "@shared/resolve-blocked-company-keywords.js"; import { inferCountryKeyFromSearchGeography, @@ -40,15 +41,14 @@ type DiscoverySourceTask = { run: () => Promise; }; -function isBlockedEmployer( - employer: string | null | undefined, - blockedKeywordsLowerCase: string[], +function jobMatchesBlockedCompanyKeywords( + job: CreateJobInput, + blockedKeywords: string[], ): boolean { - if (!employer) return false; - if (blockedKeywordsLowerCase.length === 0) return false; - const normalizedEmployer = employer.toLowerCase(); - return blockedKeywordsLowerCase.some((keyword) => - normalizedEmployer.includes(keyword), + if (blockedKeywords.length === 0) return false; + return ( + textMatchesAnyKeyword(job.employer, blockedKeywords) || + textMatchesAnyKeyword(job.title, blockedKeywords) ); } @@ -156,7 +156,6 @@ function filterJobsBySearchProfile(args: { const roleMatchers = buildRoleMatchers(targetRolePhrases); const mustHaveLower = mustHaveSkills.map(normalizeText).filter(Boolean); - const dealBreakersLower = dealBreakers.map(normalizeText).filter(Boolean); const filtered = jobs.filter((job) => { const title = normalizeText(job.title); @@ -164,8 +163,8 @@ function filterJobsBySearchProfile(args: { const haystack = `${title}\n${body}`; if ( - dealBreakersLower.length > 0 && - matchesAny(haystack, dealBreakersLower) + dealBreakers.length > 0 && + dealBreakers.some((breaker) => textMatchesAnyKeyword(haystack, breaker)) ) { return false; } @@ -521,11 +520,8 @@ export async function discoverJobsStep(args: { const blockedCompanyKeywords = resolveBlockedCompanyKeywordsFromStoredString( settings.blockedCompanyKeywords, ); - const blockedKeywordsLowerCase = blockedCompanyKeywords.map((value) => - value.toLowerCase(), - ); const afterCompanyFilter = cityFilteredJobs.filter( - (job) => !isBlockedEmployer(job.employer, blockedKeywordsLowerCase), + (job) => !jobMatchesBlockedCompanyKeywords(job, blockedCompanyKeywords), ); const companyDroppedCount = cityFilteredJobs.length - afterCompanyFilter.length; diff --git a/orchestrator/src/server/services/scorer.test.ts b/orchestrator/src/server/services/scorer.test.ts index 0243cbe..afaffc0 100644 --- a/orchestrator/src/server/services/scorer.test.ts +++ b/orchestrator/src/server/services/scorer.test.ts @@ -284,6 +284,49 @@ describe("salary penalty", () => { vi.restoreAllMocks(); }); + describe("profile deal-breakers", () => { + it("short-circuits scoring when co-op matches coop deal-breaker token", async () => { + const { scoreJobSuitability } = await import("./scorer"); + const { LlmService } = await import("./llm/service"); + + const callJson = vi.spyOn(LlmService.prototype, "callJson"); + + getEffectiveSettingsMock.mockResolvedValue({ + jobSearchProfile: { + value: { + targetRoles: ["SDET"], + experienceLevel: "Senior", + mustHaveSkills: [], + niceToHaveSkills: [], + dealBreakers: ["coop"], + preferredWorkArrangement: ["remote"], + preferredLocations: ["Canada"], + minimumSalary: "", + industriesToTarget: [], + industriesToAvoid: [], + aboutMe: "", + }, + default: null, + override: null, + }, + penalizeMissingSalary: { value: false, default: false, override: null }, + missingSalaryPenalty: { value: 10, default: 10, override: null }, + scoringInstructions: { value: "", default: "", override: null }, + rxresumeBaseResumeId: "base-resume-123", + } as any); + + const job = createJob({ + title: "Co-op Software Development Engineer in Test (SDET)", + employer: "Global Relay Communications Inc.", + }); + const result = await scoreJobSuitability(job, {}); + + expect(callJson).not.toHaveBeenCalled(); + expect(result.score).toBeLessThanOrEqual(15); + expect(result.analysis?.dealBreakerHits?.length).toBeGreaterThan(0); + }); + }); + describe("isSalaryMissing detection", () => { it("should detect null salary as missing", async () => { const { scoreJobSuitability } = await import("./scorer"); diff --git a/orchestrator/src/server/services/scorer.ts b/orchestrator/src/server/services/scorer.ts index 8522809..d8921fb 100644 --- a/orchestrator/src/server/services/scorer.ts +++ b/orchestrator/src/server/services/scorer.ts @@ -3,6 +3,7 @@ */ import { logger } from "@infra/logger"; +import { textMatchesKeyword } from "@shared/keyword-match"; import type { Job, JobSearchProfile, SuitabilityAnalysis } from "@shared/types"; import { jobLikelyRequiresOfficePresence } from "@shared/work-arrangement"; import { LlmService } from "./llm/service"; @@ -177,6 +178,30 @@ function extractAnalysis(data: ScoringLlmResponse): SuitabilityAnalysis | null { * Score a job's suitability based on profile and job description. * Includes retry logic for when AI returns garbage responses. */ +function scoreForProfileDealBreakers( + job: Job, + jobSearchProfile: JobSearchProfile, +): SuitabilityResult | null { + if (jobSearchProfile.dealBreakers.length === 0) return null; + const haystack = `${job.title}\n${job.jobDescription ?? ""}`; + const hit = jobSearchProfile.dealBreakers.find((breaker) => + textMatchesKeyword(haystack, breaker), + ); + if (!hit) return null; + return { + score: 10, + reason: `Deal-breaker "${hit}" matched in the job title or description.`, + analysis: { + roleTypeMatch: 15, + workArrangementMatch: undefined, + strengths: [], + gaps: [`Posting matches profile deal-breaker: ${hit}`], + suggestions: ["Skip co-op, intern, and other excluded role types"], + dealBreakerHits: [`Profile deal-breaker: ${hit}`], + }, + }; +} + export async function scoreJobSuitability( job: Job, profile: Record, @@ -189,6 +214,11 @@ export async function scoreJobSuitability( const jobSearchProfile = settings.jobSearchProfile?.value ?? null; const hasProfile = jobSearchProfile && hasNonEmptyProfile(jobSearchProfile); + if (hasProfile && jobSearchProfile) { + const dealBreakerScore = scoreForProfileDealBreakers(job, jobSearchProfile); + if (dealBreakerScore) return dealBreakerScore; + } + const prompt = buildScoringPrompt(job, sanitizeProfileForPrompt(profile), { instructions: settings.scoringInstructions?.value ?? "", jobSearchProfile: hasProfile ? jobSearchProfile : null, diff --git a/shared/src/index.ts b/shared/src/index.ts index a82b7e4..0d984c4 100644 --- a/shared/src/index.ts +++ b/shared/src/index.ts @@ -3,5 +3,6 @@ export * from "./job-fingerprint"; export * from "./job-url-canonical"; export * from "./location-support"; export * from "./work-arrangement"; +export * from "./keyword-match"; export * from "./types"; export * from "./utils/type-conversion"; diff --git a/shared/src/keyword-match.test.ts b/shared/src/keyword-match.test.ts new file mode 100644 index 0000000..1920ce5 --- /dev/null +++ b/shared/src/keyword-match.test.ts @@ -0,0 +1,21 @@ +import { describe, expect, it } from "vitest"; +import { textMatchesAnyKeyword, textMatchesKeyword } from "./keyword-match.js"; + +describe("keyword-match", () => { + it("matches co-op title against coop deal-breaker token", () => { + expect( + textMatchesKeyword( + "Co-op Software Development Engineer in Test (SDET)", + "coop", + ), + ).toBe(true); + expect(textMatchesKeyword("Co-op SDET", "co-op")).toBe(true); + }); + + it("matches intern in employer or title", () => { + expect(textMatchesAnyKeyword("Acme Intern Program", ["intern"])).toBe(true); + expect( + textMatchesAnyKeyword("Global Relay Communications Inc.", ["intern"]), + ).toBe(false); + }); +}); diff --git a/shared/src/keyword-match.ts b/shared/src/keyword-match.ts new file mode 100644 index 0000000..3cd0c09 --- /dev/null +++ b/shared/src/keyword-match.ts @@ -0,0 +1,27 @@ +/** + * Case-insensitive substring matching that ignores spaces, hyphens, and underscores + * so "co-op", "co op", and "coop" all match the same token. + */ +export function normalizeForKeywordMatch(value: string): string { + return value + .toLowerCase() + .replace(/[\s\-_]+/g, "") + .trim(); +} + +export function textMatchesKeyword( + haystack: string | null | undefined, + keyword: string, +): boolean { + const normalizedHaystack = normalizeForKeywordMatch(haystack ?? ""); + const normalizedKeyword = normalizeForKeywordMatch(keyword); + if (!normalizedKeyword) return false; + return normalizedHaystack.includes(normalizedKeyword); +} + +export function textMatchesAnyKeyword( + haystack: string | null | undefined, + keywords: readonly string[], +): boolean { + return keywords.some((keyword) => textMatchesKeyword(haystack, keyword)); +}