fix(filters): match co-op/intern skip tokens in titles with hyphen normalization
Some checks failed
CI / Linting (Biome) (push) Failing after 40s
CI / Tests (push) Successful in 5m12s
CI / Type Check (adzuna-extractor) (push) Successful in 1m8s
CI / Type Check (gradcracker-extractor) (push) Successful in 1m13s
CI / Type Check (hiringcafe-extractor) (push) Successful in 1m9s
CI / Type Check (orchestrator) (push) Failing after 1m17s
CI / Type Check (startupjobs-extractor) (push) Successful in 1m10s
CI / Type Check (ukvisajobs-extractor) (push) Successful in 1m12s
CI / Documentation (push) Successful in 2m0s
Some checks failed
CI / Linting (Biome) (push) Failing after 40s
CI / Tests (push) Successful in 5m12s
CI / Type Check (adzuna-extractor) (push) Successful in 1m8s
CI / Type Check (gradcracker-extractor) (push) Successful in 1m13s
CI / Type Check (hiringcafe-extractor) (push) Successful in 1m9s
CI / Type Check (orchestrator) (push) Failing after 1m17s
CI / Type Check (startupjobs-extractor) (push) Successful in 1m10s
CI / Type Check (ukvisajobs-extractor) (push) Successful in 1m12s
CI / Documentation (push) Successful in 2m0s
Company skip list and profile deal-breakers now ignore hyphens when matching, apply skip keywords to job titles as well as employers, and short-circuit scoring when a deal-breaker hits so co-op posts cannot score 80 via the LLM. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
parent
03d293699a
commit
0de7f90278
@ -454,6 +454,37 @@ describe("useFilteredJobs", () => {
|
||||
);
|
||||
expect(settingsSkip.current.map((j) => j.id)).toEqual(["good"]);
|
||||
|
||||
const { result: coOpTitle } = renderHook(() =>
|
||||
useFilteredJobs(
|
||||
[
|
||||
{
|
||||
...baseJob,
|
||||
id: "coop",
|
||||
employer: "Global Relay Communications Inc.",
|
||||
title: "Co-op Software Development Engineer in Test (SDET)",
|
||||
},
|
||||
{ ...baseJob, id: "good", employer: "Contoso", title: "SDET" },
|
||||
],
|
||||
"all",
|
||||
[],
|
||||
[],
|
||||
[],
|
||||
[],
|
||||
"all",
|
||||
"all",
|
||||
{ mode: "at_least", min: null, max: null },
|
||||
{ key: "score", direction: "desc" },
|
||||
{
|
||||
foundAfterYmd: null,
|
||||
foundBeforeYmd: null,
|
||||
employerInclude: [],
|
||||
employerExclude: [],
|
||||
settingsBlockedEmployerKeywords: ["co-op"],
|
||||
},
|
||||
),
|
||||
);
|
||||
expect(coOpTitle.current.map((j) => j.id)).toEqual(["good"]);
|
||||
|
||||
const { result: includeOnly } = renderHook(() =>
|
||||
useFilteredJobs(
|
||||
jobs,
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
import { textMatchesKeyword } from "@shared/keyword-match";
|
||||
import { inferCountryKeysFromJobLocation } from "@shared/search-cities";
|
||||
import type { JobListItem, JobSource } from "@shared/types";
|
||||
import { useMemo } from "react";
|
||||
@ -24,13 +25,16 @@ const startOfLocalDayMs = (ymd: string): number =>
|
||||
const endOfLocalDayMs = (ymd: string): number =>
|
||||
new Date(`${ymd}T23:59:59.999`).getTime();
|
||||
|
||||
function employerMatchesAnyKeyword(
|
||||
employer: string,
|
||||
keywordsLower: string[],
|
||||
function jobMatchesAnyBlockedKeyword(
|
||||
job: Pick<JobListItem, "employer" | "title">,
|
||||
keywords: string[],
|
||||
): boolean {
|
||||
if (keywordsLower.length === 0) return false;
|
||||
const normalized = employer.toLowerCase();
|
||||
return keywordsLower.some((keyword) => normalized.includes(keyword));
|
||||
if (keywords.length === 0) return false;
|
||||
return keywords.some(
|
||||
(keyword) =>
|
||||
textMatchesKeyword(job.employer, keyword) ||
|
||||
textMatchesKeyword(job.title, keyword),
|
||||
);
|
||||
}
|
||||
|
||||
const getSponsorCategory = (score: number | null): SponsorFilter => {
|
||||
@ -63,12 +67,10 @@ export const useFilteredJobs = (
|
||||
const employerIncludeLower = listExtras.employerInclude.map((value) =>
|
||||
value.toLowerCase(),
|
||||
);
|
||||
const employerExcludeLower = [
|
||||
...listExtras.settingsBlockedEmployerKeywords.map((value) =>
|
||||
value.toLowerCase(),
|
||||
),
|
||||
...listExtras.employerExclude.map((value) => value.toLowerCase()),
|
||||
];
|
||||
const employerExcludeLower = listExtras.employerExclude.map((value) =>
|
||||
value.toLowerCase(),
|
||||
);
|
||||
const settingsBlockedKeywords = listExtras.settingsBlockedEmployerKeywords;
|
||||
const uniqueEmployerExcludeLower = [...new Set(employerExcludeLower)];
|
||||
|
||||
let filtered = [...jobs];
|
||||
@ -164,14 +166,24 @@ export const useFilteredJobs = (
|
||||
|
||||
if (employerIncludeLower.length > 0) {
|
||||
filtered = filtered.filter((job) =>
|
||||
employerMatchesAnyKeyword(job.employer, employerIncludeLower),
|
||||
employerIncludeLower.some((keyword) =>
|
||||
job.employer.toLowerCase().includes(keyword),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
if (settingsBlockedKeywords.length > 0) {
|
||||
filtered = filtered.filter(
|
||||
(job) => !jobMatchesAnyBlockedKeyword(job, settingsBlockedKeywords),
|
||||
);
|
||||
}
|
||||
|
||||
if (uniqueEmployerExcludeLower.length > 0) {
|
||||
filtered = filtered.filter(
|
||||
(job) =>
|
||||
!employerMatchesAnyKeyword(job.employer, uniqueEmployerExcludeLower),
|
||||
!uniqueEmployerExcludeLower.some((keyword) =>
|
||||
job.employer.toLowerCase().includes(keyword),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@ -235,7 +235,7 @@ export const ScoringSettingsSection: React.FC<ScoringSettingsSectionProps> = ({
|
||||
setValue("blockedCompanyKeywords", value, { shouldDirty: true })
|
||||
}
|
||||
placeholder='e.g. "recruitment", "staffing"'
|
||||
helperText="Maintained here and saved with Settings. Each token is a case-insensitive substring match on the employer name. Matching jobs are dropped during discovery (not removed from the database if already imported). See docs: /docs/features/company-skip-list"
|
||||
helperText="Maintained here and saved with Settings. Each token matches employer or job title (hyphens ignored, so co-op matches coop). Matching jobs are dropped during discovery. See docs: /docs/features/company-skip-list"
|
||||
removeLabelPrefix="Remove blocked keyword"
|
||||
disabled={isLoading || isSaving}
|
||||
/>
|
||||
|
||||
@ -394,6 +394,58 @@ describe("discoverJobsStep", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it("drops co-op titles via company skip list and coop deal-breaker token", async () => {
|
||||
const settingsRepo = await import("@server/repositories/settings");
|
||||
const registryModule = await import("@server/extractors/registry");
|
||||
|
||||
const bcManifest = {
|
||||
id: "bctenet",
|
||||
displayName: "BC T-Net",
|
||||
providesSources: ["bctenet"],
|
||||
run: vi.fn().mockResolvedValue({
|
||||
success: true,
|
||||
jobs: [
|
||||
{
|
||||
source: "bctenet",
|
||||
title: "Co-op Software Development Engineer in Test (SDET)",
|
||||
employer: "Global Relay Communications Inc.",
|
||||
location: "British Columbia, Canada",
|
||||
jobUrl: "https://example.com/job-coop",
|
||||
},
|
||||
{
|
||||
source: "bctenet",
|
||||
title: "SDET",
|
||||
employer: "Contoso",
|
||||
location: "Vancouver, BC, Canada",
|
||||
jobUrl: "https://example.com/job-sdet",
|
||||
},
|
||||
],
|
||||
}),
|
||||
};
|
||||
|
||||
vi.mocked(settingsRepo.getAllSettings).mockResolvedValue({
|
||||
searchTerms: JSON.stringify(["sdet"]),
|
||||
searchCities: "Canada",
|
||||
blockedCompanyKeywords: JSON.stringify(["co-op"]),
|
||||
} as any);
|
||||
|
||||
vi.mocked(registryModule.getExtractorRegistry).mockResolvedValue({
|
||||
manifests: new Map([["bctenet", bcManifest as any]]),
|
||||
manifestBySource: new Map([["bctenet", bcManifest as any]]),
|
||||
availableSources: ["bctenet"],
|
||||
} as any);
|
||||
|
||||
const result = await discoverJobsStep({
|
||||
mergedConfig: {
|
||||
...baseConfig,
|
||||
sources: ["bctenet"],
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.discoveredJobs).toHaveLength(1);
|
||||
expect(result.discoveredJobs[0]?.jobUrl).toBe("https://example.com/job-sdet");
|
||||
});
|
||||
|
||||
it("drops jobs with blocked country in description when location is worldwide", async () => {
|
||||
const settingsRepo = await import("@server/repositories/settings");
|
||||
const registryModule = await import("@server/extractors/registry");
|
||||
|
||||
@ -15,6 +15,7 @@ import {
|
||||
isSourceAllowedForCountry,
|
||||
normalizeCountryKey,
|
||||
} from "@shared/location-support.js";
|
||||
import { textMatchesAnyKeyword } from "@shared/keyword-match.js";
|
||||
import { resolveBlockedCompanyKeywordsFromStoredString } from "@shared/resolve-blocked-company-keywords.js";
|
||||
import {
|
||||
inferCountryKeyFromSearchGeography,
|
||||
@ -40,15 +41,14 @@ type DiscoverySourceTask = {
|
||||
run: () => Promise<DiscoveryTaskResult>;
|
||||
};
|
||||
|
||||
function isBlockedEmployer(
|
||||
employer: string | null | undefined,
|
||||
blockedKeywordsLowerCase: string[],
|
||||
function jobMatchesBlockedCompanyKeywords(
|
||||
job: CreateJobInput,
|
||||
blockedKeywords: string[],
|
||||
): boolean {
|
||||
if (!employer) return false;
|
||||
if (blockedKeywordsLowerCase.length === 0) return false;
|
||||
const normalizedEmployer = employer.toLowerCase();
|
||||
return blockedKeywordsLowerCase.some((keyword) =>
|
||||
normalizedEmployer.includes(keyword),
|
||||
if (blockedKeywords.length === 0) return false;
|
||||
return (
|
||||
textMatchesAnyKeyword(job.employer, blockedKeywords) ||
|
||||
textMatchesAnyKeyword(job.title, blockedKeywords)
|
||||
);
|
||||
}
|
||||
|
||||
@ -156,7 +156,6 @@ function filterJobsBySearchProfile(args: {
|
||||
|
||||
const roleMatchers = buildRoleMatchers(targetRolePhrases);
|
||||
const mustHaveLower = mustHaveSkills.map(normalizeText).filter(Boolean);
|
||||
const dealBreakersLower = dealBreakers.map(normalizeText).filter(Boolean);
|
||||
|
||||
const filtered = jobs.filter((job) => {
|
||||
const title = normalizeText(job.title);
|
||||
@ -164,8 +163,8 @@ function filterJobsBySearchProfile(args: {
|
||||
const haystack = `${title}\n${body}`;
|
||||
|
||||
if (
|
||||
dealBreakersLower.length > 0 &&
|
||||
matchesAny(haystack, dealBreakersLower)
|
||||
dealBreakers.length > 0 &&
|
||||
dealBreakers.some((breaker) => textMatchesAnyKeyword(haystack, breaker))
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
@ -521,11 +520,8 @@ export async function discoverJobsStep(args: {
|
||||
const blockedCompanyKeywords = resolveBlockedCompanyKeywordsFromStoredString(
|
||||
settings.blockedCompanyKeywords,
|
||||
);
|
||||
const blockedKeywordsLowerCase = blockedCompanyKeywords.map((value) =>
|
||||
value.toLowerCase(),
|
||||
);
|
||||
const afterCompanyFilter = cityFilteredJobs.filter(
|
||||
(job) => !isBlockedEmployer(job.employer, blockedKeywordsLowerCase),
|
||||
(job) => !jobMatchesBlockedCompanyKeywords(job, blockedCompanyKeywords),
|
||||
);
|
||||
const companyDroppedCount =
|
||||
cityFilteredJobs.length - afterCompanyFilter.length;
|
||||
|
||||
@ -284,6 +284,49 @@ describe("salary penalty", () => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
describe("profile deal-breakers", () => {
|
||||
it("short-circuits scoring when co-op matches coop deal-breaker token", async () => {
|
||||
const { scoreJobSuitability } = await import("./scorer");
|
||||
const { LlmService } = await import("./llm/service");
|
||||
|
||||
const callJson = vi.spyOn(LlmService.prototype, "callJson");
|
||||
|
||||
getEffectiveSettingsMock.mockResolvedValue({
|
||||
jobSearchProfile: {
|
||||
value: {
|
||||
targetRoles: ["SDET"],
|
||||
experienceLevel: "Senior",
|
||||
mustHaveSkills: [],
|
||||
niceToHaveSkills: [],
|
||||
dealBreakers: ["coop"],
|
||||
preferredWorkArrangement: ["remote"],
|
||||
preferredLocations: ["Canada"],
|
||||
minimumSalary: "",
|
||||
industriesToTarget: [],
|
||||
industriesToAvoid: [],
|
||||
aboutMe: "",
|
||||
},
|
||||
default: null,
|
||||
override: null,
|
||||
},
|
||||
penalizeMissingSalary: { value: false, default: false, override: null },
|
||||
missingSalaryPenalty: { value: 10, default: 10, override: null },
|
||||
scoringInstructions: { value: "", default: "", override: null },
|
||||
rxresumeBaseResumeId: "base-resume-123",
|
||||
} as any);
|
||||
|
||||
const job = createJob({
|
||||
title: "Co-op Software Development Engineer in Test (SDET)",
|
||||
employer: "Global Relay Communications Inc.",
|
||||
});
|
||||
const result = await scoreJobSuitability(job, {});
|
||||
|
||||
expect(callJson).not.toHaveBeenCalled();
|
||||
expect(result.score).toBeLessThanOrEqual(15);
|
||||
expect(result.analysis?.dealBreakerHits?.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("isSalaryMissing detection", () => {
|
||||
it("should detect null salary as missing", async () => {
|
||||
const { scoreJobSuitability } = await import("./scorer");
|
||||
|
||||
@ -3,6 +3,7 @@
|
||||
*/
|
||||
|
||||
import { logger } from "@infra/logger";
|
||||
import { textMatchesKeyword } from "@shared/keyword-match";
|
||||
import type { Job, JobSearchProfile, SuitabilityAnalysis } from "@shared/types";
|
||||
import { jobLikelyRequiresOfficePresence } from "@shared/work-arrangement";
|
||||
import { LlmService } from "./llm/service";
|
||||
@ -177,6 +178,30 @@ function extractAnalysis(data: ScoringLlmResponse): SuitabilityAnalysis | null {
|
||||
* Score a job's suitability based on profile and job description.
|
||||
* Includes retry logic for when AI returns garbage responses.
|
||||
*/
|
||||
function scoreForProfileDealBreakers(
|
||||
job: Job,
|
||||
jobSearchProfile: JobSearchProfile,
|
||||
): SuitabilityResult | null {
|
||||
if (jobSearchProfile.dealBreakers.length === 0) return null;
|
||||
const haystack = `${job.title}\n${job.jobDescription ?? ""}`;
|
||||
const hit = jobSearchProfile.dealBreakers.find((breaker) =>
|
||||
textMatchesKeyword(haystack, breaker),
|
||||
);
|
||||
if (!hit) return null;
|
||||
return {
|
||||
score: 10,
|
||||
reason: `Deal-breaker "${hit}" matched in the job title or description.`,
|
||||
analysis: {
|
||||
roleTypeMatch: 15,
|
||||
workArrangementMatch: undefined,
|
||||
strengths: [],
|
||||
gaps: [`Posting matches profile deal-breaker: ${hit}`],
|
||||
suggestions: ["Skip co-op, intern, and other excluded role types"],
|
||||
dealBreakerHits: [`Profile deal-breaker: ${hit}`],
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export async function scoreJobSuitability(
|
||||
job: Job,
|
||||
profile: Record<string, unknown>,
|
||||
@ -189,6 +214,11 @@ export async function scoreJobSuitability(
|
||||
const jobSearchProfile = settings.jobSearchProfile?.value ?? null;
|
||||
const hasProfile = jobSearchProfile && hasNonEmptyProfile(jobSearchProfile);
|
||||
|
||||
if (hasProfile && jobSearchProfile) {
|
||||
const dealBreakerScore = scoreForProfileDealBreakers(job, jobSearchProfile);
|
||||
if (dealBreakerScore) return dealBreakerScore;
|
||||
}
|
||||
|
||||
const prompt = buildScoringPrompt(job, sanitizeProfileForPrompt(profile), {
|
||||
instructions: settings.scoringInstructions?.value ?? "",
|
||||
jobSearchProfile: hasProfile ? jobSearchProfile : null,
|
||||
|
||||
@ -3,5 +3,6 @@ export * from "./job-fingerprint";
|
||||
export * from "./job-url-canonical";
|
||||
export * from "./location-support";
|
||||
export * from "./work-arrangement";
|
||||
export * from "./keyword-match";
|
||||
export * from "./types";
|
||||
export * from "./utils/type-conversion";
|
||||
|
||||
21
shared/src/keyword-match.test.ts
Normal file
21
shared/src/keyword-match.test.ts
Normal file
@ -0,0 +1,21 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { textMatchesAnyKeyword, textMatchesKeyword } from "./keyword-match.js";
|
||||
|
||||
describe("keyword-match", () => {
|
||||
it("matches co-op title against coop deal-breaker token", () => {
|
||||
expect(
|
||||
textMatchesKeyword(
|
||||
"Co-op Software Development Engineer in Test (SDET)",
|
||||
"coop",
|
||||
),
|
||||
).toBe(true);
|
||||
expect(textMatchesKeyword("Co-op SDET", "co-op")).toBe(true);
|
||||
});
|
||||
|
||||
it("matches intern in employer or title", () => {
|
||||
expect(textMatchesAnyKeyword("Acme Intern Program", ["intern"])).toBe(true);
|
||||
expect(
|
||||
textMatchesAnyKeyword("Global Relay Communications Inc.", ["intern"]),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
27
shared/src/keyword-match.ts
Normal file
27
shared/src/keyword-match.ts
Normal file
@ -0,0 +1,27 @@
|
||||
/**
|
||||
* Case-insensitive substring matching that ignores spaces, hyphens, and underscores
|
||||
* so "co-op", "co op", and "coop" all match the same token.
|
||||
*/
|
||||
export function normalizeForKeywordMatch(value: string): string {
|
||||
return value
|
||||
.toLowerCase()
|
||||
.replace(/[\s\-_]+/g, "")
|
||||
.trim();
|
||||
}
|
||||
|
||||
export function textMatchesKeyword(
|
||||
haystack: string | null | undefined,
|
||||
keyword: string,
|
||||
): boolean {
|
||||
const normalizedHaystack = normalizeForKeywordMatch(haystack ?? "");
|
||||
const normalizedKeyword = normalizeForKeywordMatch(keyword);
|
||||
if (!normalizedKeyword) return false;
|
||||
return normalizedHaystack.includes(normalizedKeyword);
|
||||
}
|
||||
|
||||
export function textMatchesAnyKeyword(
|
||||
haystack: string | null | undefined,
|
||||
keywords: readonly string[],
|
||||
): boolean {
|
||||
return keywords.some((keyword) => textMatchesKeyword(haystack, keyword));
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user