diff --git a/docs-site/docs/features/settings.md b/docs-site/docs/features/settings.md index f0058da..ffd9025 100644 --- a/docs-site/docs/features/settings.md +++ b/docs-site/docs/features/settings.md @@ -119,6 +119,7 @@ Readiness requires: - Penalize missing salary data - Set penalty amount - Optional auto-skip threshold for low-score jobs +- Block jobs from companies that match configured keyword tokens ### Danger Zone diff --git a/orchestrator/src/client/pages/SettingsPage.test.tsx b/orchestrator/src/client/pages/SettingsPage.test.tsx index 13f8aac..36e3e71 100644 --- a/orchestrator/src/client/pages/SettingsPage.test.tsx +++ b/orchestrator/src/client/pages/SettingsPage.test.tsx @@ -261,4 +261,38 @@ describe("SettingsPage", () => { }), ); }); + + it("saves blocked company keywords from scoring settings", async () => { + vi.mocked(api.getSettings).mockResolvedValue(baseSettings); + vi.mocked(api.updateSettings).mockResolvedValue({ + ...baseSettings, + blockedCompanyKeywords: { + value: ["staffing"], + default: [], + override: ["staffing"], + }, + }); + + renderPage(); + + const scoringTrigger = await screen.findByRole("button", { + name: /scoring settings/i, + }); + fireEvent.click(scoringTrigger); + + const input = screen.getByPlaceholderText('e.g. "recruitment", "staffing"'); + fireEvent.change(input, { target: { value: "staffing" } }); + fireEvent.keyDown(input, { key: "Enter" }); + + const saveButton = screen.getByRole("button", { name: /^save$/i }); + await waitFor(() => expect(saveButton).toBeEnabled()); + fireEvent.click(saveButton); + + await waitFor(() => expect(api.updateSettings).toHaveBeenCalled()); + expect(api.updateSettings).toHaveBeenCalledWith( + expect.objectContaining({ + blockedCompanyKeywords: ["staffing"], + }), + ); + }); }); diff --git a/orchestrator/src/client/pages/SettingsPage.tsx b/orchestrator/src/client/pages/SettingsPage.tsx index bbc0bec..36559f4 100644 --- a/orchestrator/src/client/pages/SettingsPage.tsx +++ b/orchestrator/src/client/pages/SettingsPage.tsx @@ -18,6 +18,7 @@ import { resumeProjectsEqual, } from "@client/pages/settings/utils"; import { zodResolver } from "@hookform/resolvers/zod"; +import { normalizeStringArray } from "@shared/normalize-string-array.js"; import { type UpdateSettingsInput, updateSettingsSchema, @@ -72,6 +73,7 @@ const DEFAULT_FORM_VALUES: UpdateSettingsInput = { penalizeMissingSalary: null, missingSalaryPenalty: null, autoSkipScoreThreshold: null, + blockedCompanyKeywords: [], }; type LlmProviderValue = LlmProviderId | null; @@ -114,6 +116,7 @@ const NULL_SETTINGS_PAYLOAD: UpdateSettingsInput = { penalizeMissingSalary: null, missingSalaryPenalty: null, autoSkipScoreThreshold: null, + blockedCompanyKeywords: null, }; const mapSettingsToForm = (data: AppSettings): UpdateSettingsInput => ({ @@ -149,6 +152,7 @@ const mapSettingsToForm = (data: AppSettings): UpdateSettingsInput => ({ penalizeMissingSalary: data.penalizeMissingSalary.override, missingSalaryPenalty: data.missingSalaryPenalty.override, autoSkipScoreThreshold: data.autoSkipScoreThreshold.override, + blockedCompanyKeywords: data.blockedCompanyKeywords.override ?? [], }); const normalizeString = (value: string | null | undefined) => { @@ -162,6 +166,11 @@ const normalizePrivateInput = (value: string | null | undefined) => { return trimmed || undefined; }; +const stringArraysEqual = (left: string[], right: string[]): boolean => { + if (left.length !== right.length) return false; + return left.every((value, index) => value === right[index]); +}; + const nullIfSame = (value: T | null | undefined, defaultValue: T) => value === defaultValue ? null : (value ?? null); @@ -291,6 +300,10 @@ const getDerivedSettings = (settings: AppSettings | null) => { effective: settings?.autoSkipScoreThreshold?.value ?? null, default: settings?.autoSkipScoreThreshold?.default ?? null, }, + blockedCompanyKeywords: { + effective: settings?.blockedCompanyKeywords?.value ?? [], + default: settings?.blockedCompanyKeywords?.default ?? [], + }, }, }; }; @@ -627,6 +640,15 @@ export const SettingsPage: React.FC = () => { data.missingSalaryPenalty, scoring.missingSalaryPenalty.default, ), + blockedCompanyKeywords: (() => { + const normalized = normalizeStringArray(data.blockedCompanyKeywords); + const normalizedDefault = normalizeStringArray( + scoring.blockedCompanyKeywords.default, + ); + return stringArraysEqual(normalized, normalizedDefault) + ? null + : normalized; + })(), ...envPayload, }; diff --git a/orchestrator/src/client/pages/orchestrator/TokenizedInput.tsx b/orchestrator/src/client/pages/orchestrator/TokenizedInput.tsx index 02c395d..f22ea1c 100644 --- a/orchestrator/src/client/pages/orchestrator/TokenizedInput.tsx +++ b/orchestrator/src/client/pages/orchestrator/TokenizedInput.tsx @@ -16,6 +16,7 @@ interface TokenizedInputProps { helperText: string; removeLabelPrefix: string; collapsedTextLimit?: number; + disabled?: boolean; } function mergeUnique(values: string[], nextValues: string[]): string[] { @@ -41,6 +42,7 @@ export const TokenizedInput: React.FC = ({ helperText, removeLabelPrefix, collapsedTextLimit = 3, + disabled = false, }) => { const [isFocused, setIsFocused] = useState(false); const tokensRef = useRef(null); @@ -118,6 +120,7 @@ export const TokenizedInput: React.FC = ({ } }} placeholder={placeholder} + disabled={disabled} />

{helperText}

{values.length > 0 ? ( @@ -152,6 +155,7 @@ export const TokenizedInput: React.FC = ({ variant="outline" className="h-auto rounded-full px-2 py-1 text-xs text-muted-foreground" aria-label={`${removeLabelPrefix} ${value}`} + disabled={disabled} onPointerDown={(event) => event.preventDefault()} onClick={() => onValuesChange( diff --git a/orchestrator/src/client/pages/settings/components/ScoringSettingsSection.tsx b/orchestrator/src/client/pages/settings/components/ScoringSettingsSection.tsx index dd42d7d..e227458 100644 --- a/orchestrator/src/client/pages/settings/components/ScoringSettingsSection.tsx +++ b/orchestrator/src/client/pages/settings/components/ScoringSettingsSection.tsx @@ -1,7 +1,9 @@ +import { TokenizedInput } from "@client/pages/orchestrator/TokenizedInput"; import { SettingsInput } from "@client/pages/settings/components/SettingsInput"; import type { ScoringValues } from "@client/pages/settings/types"; import type { UpdateSettingsInput } from "@shared/settings-schema.js"; import type React from "react"; +import { useState } from "react"; import { Controller, useFormContext } from "react-hook-form"; import { AccordionContent, @@ -17,6 +19,13 @@ type ScoringSettingsSectionProps = { isSaving: boolean; }; +function parseTokenizedKeywordInput(input: string): string[] { + return input + .split(/[\n,]/g) + .map((value) => value.trim()) + .filter(Boolean); +} + export const ScoringSettingsSection: React.FC = ({ values, isLoading, @@ -26,8 +35,11 @@ export const ScoringSettingsSection: React.FC = ({ penalizeMissingSalary, missingSalaryPenalty, autoSkipScoreThreshold, + blockedCompanyKeywords, } = values; - const { control, watch } = useFormContext(); + const { control, watch, setValue } = useFormContext(); + const [blockedCompanyKeywordDraft, setBlockedCompanyKeywordDraft] = + useState(""); // Watch the current form value to conditionally show/hide penalty input const currentPenalizeEnabled = @@ -35,6 +47,8 @@ export const ScoringSettingsSection: React.FC = ({ // Watch auto-skip threshold to show current value const currentAutoSkipThreshold = watch("autoSkipScoreThreshold"); + const blockedCompanyKeywordValues = + watch("blockedCompanyKeywords") ?? blockedCompanyKeywords.default; return ( @@ -154,6 +168,41 @@ export const ScoringSettingsSection: React.FC = ({ +
+ + + setValue("blockedCompanyKeywords", value, { shouldDirty: true }) + } + placeholder='e.g. "recruitment", "staffing"' + helperText="Jobs whose company name contains one of these keywords will be dropped during discovery." + removeLabelPrefix="Remove blocked keyword" + disabled={isLoading || isSaving} + /> +
+ Effective:{" "} + {blockedCompanyKeywordValues.length > 0 + ? blockedCompanyKeywordValues.join(", ") + : "None"}{" "} + | Default:{" "} + {blockedCompanyKeywords.default.length > 0 + ? blockedCompanyKeywords.default.join(", ") + : "None"} +
+
+ + + {/* Effective/Default values display */}
diff --git a/orchestrator/src/client/pages/settings/types.ts b/orchestrator/src/client/pages/settings/types.ts index fd4b5b3..cfef14f 100644 --- a/orchestrator/src/client/pages/settings/types.ts +++ b/orchestrator/src/client/pages/settings/types.ts @@ -48,4 +48,5 @@ export type ScoringValues = { penalizeMissingSalary: EffectiveDefault; missingSalaryPenalty: EffectiveDefault; autoSkipScoreThreshold: EffectiveDefault; + blockedCompanyKeywords: EffectiveDefault; }; diff --git a/orchestrator/src/index.css b/orchestrator/src/index.css index cb09191..dadc1ba 100644 --- a/orchestrator/src/index.css +++ b/orchestrator/src/index.css @@ -76,12 +76,12 @@ :root { --radius: 0.5rem; --background: oklch(0.9818 0.0054 95.0986); - --foreground: oklch(0.3438 0.0269 95.7226); + --foreground: oklch(28.815% 0.01599 96.892); --card: oklch(0.9818 0.0054 95.0986); --card-foreground: oklch(0.1908 0.002 106.5859); --popover: oklch(1 0 0); --popover-foreground: oklch(0.2671 0.0196 98.939); - --primary: oklch(0.6171 0.1375 39.0427); + --primary: oklch(0.6916 0.1719 40.93); --primary-foreground: oklch(1 0 0); --secondary: oklch(0.9245 0.0138 92.9892); --secondary-foreground: oklch(0.4334 0.0177 98.6048); @@ -143,7 +143,7 @@ --card-foreground: oklch(0.9818 0.0054 95.0986); --popover: oklch(0.3085 0.0035 106.6039); --popover-foreground: oklch(0.9211 0.004 106.4781); - --primary: oklch(0.6724 0.1308 38.7559); + --primary: oklch(0.6916 0.1719 40.93); --primary-foreground: oklch(1 0 0); --secondary: oklch(0.9818 0.0054 95.0986); --secondary-foreground: oklch(0.3085 0.0035 106.6039); @@ -155,7 +155,7 @@ --border: oklch(0.3618 0.0101 106.8928); --input: oklch(0.4336 0.0113 100.2195); --ring: oklch(0.6724 0.1308 38.7559); - --chart-1: oklch(0.5583 0.1276 42.9956); + --chart-1: oklch(0.6916 0.1719 40.93); --chart-2: oklch(0.6898 0.1581 290.4107); --chart-3: oklch(0.213 0.0078 95.4245); --chart-4: oklch(0.3074 0.0516 289.323); diff --git a/orchestrator/src/server/pipeline/steps/discover-jobs.test.ts b/orchestrator/src/server/pipeline/steps/discover-jobs.test.ts index 638a9e5..53eaf4c 100644 --- a/orchestrator/src/server/pipeline/steps/discover-jobs.test.ts +++ b/orchestrator/src/server/pipeline/steps/discover-jobs.test.ts @@ -646,6 +646,44 @@ describe("discoverJobsStep", () => { expect(vi.mocked(ukVisa.runUkVisaJobs)).not.toHaveBeenCalled(); }); + it("drops discovered jobs when employer matches blocked company keywords", async () => { + const settingsRepo = await import("../../repositories/settings"); + const jobSpy = await import("../../services/jobspy"); + + vi.mocked(settingsRepo.getAllSettings).mockResolvedValue({ + searchTerms: JSON.stringify(["engineer"]), + blockedCompanyKeywords: JSON.stringify(["recruit", "staffing"]), + } as any); + + vi.mocked(jobSpy.runJobSpy).mockResolvedValue({ + success: true, + jobs: [ + { + source: "linkedin", + title: "Engineer", + employer: "Acme Staffing", + jobUrl: "https://example.com/job-1", + }, + { + source: "linkedin", + title: "Engineer II", + employer: "Contoso", + jobUrl: "https://example.com/job-2", + }, + ], + } as any); + + const result = await discoverJobsStep({ + mergedConfig: { + ...config, + sources: ["linkedin"], + }, + }); + + expect(result.discoveredJobs).toHaveLength(1); + expect(result.discoveredJobs[0]?.employer).toBe("Contoso"); + }); + it("tracks source completion counters across source transitions", async () => { const settingsRepo = await import("../../repositories/settings"); const jobSpy = await import("../../services/jobspy"); diff --git a/orchestrator/src/server/pipeline/steps/discover-jobs.ts b/orchestrator/src/server/pipeline/steps/discover-jobs.ts index df136d1..a0195be 100644 --- a/orchestrator/src/server/pipeline/steps/discover-jobs.ts +++ b/orchestrator/src/server/pipeline/steps/discover-jobs.ts @@ -5,6 +5,7 @@ import { isSourceAllowedForCountry, normalizeCountryKey, } from "@shared/location-support.js"; +import { normalizeStringArray } from "@shared/normalize-string-array.js"; import { parseSearchCitiesSetting } from "@shared/search-cities.js"; import type { CreateJobInput, PipelineConfig } from "@shared/types"; import * as jobsRepo from "../../repositories/jobs"; @@ -31,6 +32,31 @@ type DiscoverySourceTask = { run: () => Promise; }; +function parseBlockedCompanyKeywords(raw: string | undefined): string[] { + if (!raw) return []; + try { + const parsed = JSON.parse(raw); + if (!Array.isArray(parsed)) return []; + return normalizeStringArray( + parsed.filter((value): value is string => typeof value === "string"), + ); + } catch { + return []; + } +} + +function isBlockedEmployer( + employer: string | null | undefined, + blockedKeywordsLowerCase: string[], +): boolean { + if (!employer) return false; + if (blockedKeywordsLowerCase.length === 0) return false; + const normalizedEmployer = employer.toLowerCase(); + return blockedKeywordsLowerCase.some((keyword) => + normalizedEmployer.includes(keyword), + ); +} + export async function discoverJobsStep(args: { mergedConfig: PipelineConfig; shouldCancel?: () => boolean; @@ -525,11 +551,41 @@ export async function discoverJobsStep(args: { sourceErrors.push(...sourceResult.sourceErrors); } - if (args.shouldCancel?.()) { - return { discoveredJobs, sourceErrors }; + const blockedCompanyKeywords = parseBlockedCompanyKeywords( + settings.blockedCompanyKeywords, + ); + const blockedKeywordsLowerCase = blockedCompanyKeywords.map((value) => + value.toLowerCase(), + ); + const filteredDiscoveredJobs = discoveredJobs.filter( + (job) => !isBlockedEmployer(job.employer, blockedKeywordsLowerCase), + ); + const droppedCount = discoveredJobs.length - filteredDiscoveredJobs.length; + + if (droppedCount > 0) { + const blockedCompanyKeywordsPreview = blockedCompanyKeywords.slice(0, 10); + const blockedCompanyKeywordsTruncated = + blockedCompanyKeywordsPreview.length < blockedCompanyKeywords.length; + + logger.info("Dropped discovered jobs matching blocked company keywords", { + step: "discover-jobs", + droppedCount, + blockedKeywordCount: blockedCompanyKeywords.length, + blockedCompanyKeywordsPreview, + blockedCompanyKeywordsTruncated, + }); + + logger.debug("Full blocked company keywords used for filtering", { + step: "discover-jobs", + blockedCompanyKeywords, + }); } - if (discoveredJobs.length === 0 && sourceErrors.length > 0) { + if (args.shouldCancel?.()) { + return { discoveredJobs: filteredDiscoveredJobs, sourceErrors }; + } + + if (filteredDiscoveredJobs.length === 0 && sourceErrors.length > 0) { throw new Error(`All sources failed: ${sourceErrors.join("; ")}`); } @@ -537,7 +593,7 @@ export async function discoverJobsStep(args: { logger.warn("Some discovery sources failed", { sourceErrors }); } - progressHelpers.crawlingComplete(discoveredJobs.length); + progressHelpers.crawlingComplete(filteredDiscoveredJobs.length); - return { discoveredJobs, sourceErrors }; + return { discoveredJobs: filteredDiscoveredJobs, sourceErrors }; } diff --git a/shared/src/normalize-string-array.test.ts b/shared/src/normalize-string-array.test.ts new file mode 100644 index 0000000..bcb2bf5 --- /dev/null +++ b/shared/src/normalize-string-array.test.ts @@ -0,0 +1,22 @@ +import { describe, expect, it } from "vitest"; +import { normalizeStringArray } from "./normalize-string-array"; + +describe("normalizeStringArray", () => { + it("returns empty array for nullish/empty input", () => { + expect(normalizeStringArray(undefined)).toEqual([]); + expect(normalizeStringArray(null)).toEqual([]); + expect(normalizeStringArray([])).toEqual([]); + }); + + it("trims values and removes empty entries", () => { + expect(normalizeStringArray([" staffing ", " ", "\n"])).toEqual([ + "staffing", + ]); + }); + + it("deduplicates values case-insensitively while preserving first casing", () => { + expect( + normalizeStringArray(["Recruit", "staffing", "recruit", "STAFFING"]), + ).toEqual(["Recruit", "staffing"]); + }); +}); diff --git a/shared/src/normalize-string-array.ts b/shared/src/normalize-string-array.ts new file mode 100644 index 0000000..e8fb4ed --- /dev/null +++ b/shared/src/normalize-string-array.ts @@ -0,0 +1,21 @@ +export function normalizeStringArray( + values: readonly string[] | null | undefined, +): string[] { + if (!values || values.length === 0) return []; + + const seen = new Set(); + const normalized: string[] = []; + + for (const value of values) { + const trimmed = value.trim(); + if (!trimmed) continue; + + const key = trimmed.toLowerCase(); + if (seen.has(key)) continue; + + seen.add(key); + normalized.push(trimmed); + } + + return normalized; +} diff --git a/shared/src/settings-registry.ts b/shared/src/settings-registry.ts index 95fe849..0c8e79e 100644 --- a/shared/src/settings-registry.ts +++ b/shared/src/settings-registry.ts @@ -177,6 +177,13 @@ export const settingsRegistry = { parse: parseJsonArrayOrNull, serialize: serializeNullableJsonArray, }, + blockedCompanyKeywords: { + kind: "typed" as const, + schema: z.array(z.string().trim().min(1).max(200)).max(200), + default: (): string[] => [], + parse: parseJsonArrayOrNull, + serialize: serializeNullableJsonArray, + }, searchCities: { kind: "typed" as const, schema: z.string().trim().max(100), diff --git a/shared/src/testing/factories.ts b/shared/src/testing/factories.ts index a4e4d44..2a9fd5f 100644 --- a/shared/src/testing/factories.ts +++ b/shared/src/testing/factories.ts @@ -156,6 +156,11 @@ export const createAppSettings = ( default: ["Software Engineer"], override: null, }, + blockedCompanyKeywords: { + value: [], + default: [], + override: null, + }, searchCities: { value: "United Kingdom", default: "United Kingdom", diff --git a/shared/src/types/settings.ts b/shared/src/types/settings.ts index e15c502..709810e 100644 --- a/shared/src/types/settings.ts +++ b/shared/src/types/settings.ts @@ -123,6 +123,7 @@ export interface AppSettings { adzunaMaxJobsPerTerm: Resolved; gradcrackerMaxJobsPerTerm: Resolved; searchTerms: Resolved; + blockedCompanyKeywords: Resolved; searchCities: Resolved; jobspyResultsWanted: Resolved; jobspyCountryIndeed: Resolved;