Feat/company blacklist tokenized input (#219)

* initial commit

* docs mention!

* Update orchestrator/src/server/pipeline/steps/discover-jobs.ts

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* normalizeStringArray

* poppier orange

* comments

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Shaheer Sarfaraz 2026-02-21 04:07:06 +00:00 committed by GitHub
parent 60ca350da8
commit cc7cacd7f5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 271 additions and 10 deletions

View File

@ -119,6 +119,7 @@ Readiness requires:
- Penalize missing salary data
- Set penalty amount
- Optional auto-skip threshold for low-score jobs
- Block jobs from companies that match configured keyword tokens
### Danger Zone

View File

@ -261,4 +261,38 @@ describe("SettingsPage", () => {
}),
);
});
it("saves blocked company keywords from scoring settings", async () => {
vi.mocked(api.getSettings).mockResolvedValue(baseSettings);
vi.mocked(api.updateSettings).mockResolvedValue({
...baseSettings,
blockedCompanyKeywords: {
value: ["staffing"],
default: [],
override: ["staffing"],
},
});
renderPage();
const scoringTrigger = await screen.findByRole("button", {
name: /scoring settings/i,
});
fireEvent.click(scoringTrigger);
const input = screen.getByPlaceholderText('e.g. "recruitment", "staffing"');
fireEvent.change(input, { target: { value: "staffing" } });
fireEvent.keyDown(input, { key: "Enter" });
const saveButton = screen.getByRole("button", { name: /^save$/i });
await waitFor(() => expect(saveButton).toBeEnabled());
fireEvent.click(saveButton);
await waitFor(() => expect(api.updateSettings).toHaveBeenCalled());
expect(api.updateSettings).toHaveBeenCalledWith(
expect.objectContaining({
blockedCompanyKeywords: ["staffing"],
}),
);
});
});

View File

@ -18,6 +18,7 @@ import {
resumeProjectsEqual,
} from "@client/pages/settings/utils";
import { zodResolver } from "@hookform/resolvers/zod";
import { normalizeStringArray } from "@shared/normalize-string-array.js";
import {
type UpdateSettingsInput,
updateSettingsSchema,
@ -72,6 +73,7 @@ const DEFAULT_FORM_VALUES: UpdateSettingsInput = {
penalizeMissingSalary: null,
missingSalaryPenalty: null,
autoSkipScoreThreshold: null,
blockedCompanyKeywords: [],
};
type LlmProviderValue = LlmProviderId | null;
@ -114,6 +116,7 @@ const NULL_SETTINGS_PAYLOAD: UpdateSettingsInput = {
penalizeMissingSalary: null,
missingSalaryPenalty: null,
autoSkipScoreThreshold: null,
blockedCompanyKeywords: null,
};
const mapSettingsToForm = (data: AppSettings): UpdateSettingsInput => ({
@ -149,6 +152,7 @@ const mapSettingsToForm = (data: AppSettings): UpdateSettingsInput => ({
penalizeMissingSalary: data.penalizeMissingSalary.override,
missingSalaryPenalty: data.missingSalaryPenalty.override,
autoSkipScoreThreshold: data.autoSkipScoreThreshold.override,
blockedCompanyKeywords: data.blockedCompanyKeywords.override ?? [],
});
const normalizeString = (value: string | null | undefined) => {
@ -162,6 +166,11 @@ const normalizePrivateInput = (value: string | null | undefined) => {
return trimmed || undefined;
};
const stringArraysEqual = (left: string[], right: string[]): boolean => {
if (left.length !== right.length) return false;
return left.every((value, index) => value === right[index]);
};
const nullIfSame = <T,>(value: T | null | undefined, defaultValue: T) =>
value === defaultValue ? null : (value ?? null);
@ -291,6 +300,10 @@ const getDerivedSettings = (settings: AppSettings | null) => {
effective: settings?.autoSkipScoreThreshold?.value ?? null,
default: settings?.autoSkipScoreThreshold?.default ?? null,
},
blockedCompanyKeywords: {
effective: settings?.blockedCompanyKeywords?.value ?? [],
default: settings?.blockedCompanyKeywords?.default ?? [],
},
},
};
};
@ -627,6 +640,15 @@ export const SettingsPage: React.FC = () => {
data.missingSalaryPenalty,
scoring.missingSalaryPenalty.default,
),
blockedCompanyKeywords: (() => {
const normalized = normalizeStringArray(data.blockedCompanyKeywords);
const normalizedDefault = normalizeStringArray(
scoring.blockedCompanyKeywords.default,
);
return stringArraysEqual(normalized, normalizedDefault)
? null
: normalized;
})(),
...envPayload,
};

View File

@ -16,6 +16,7 @@ interface TokenizedInputProps {
helperText: string;
removeLabelPrefix: string;
collapsedTextLimit?: number;
disabled?: boolean;
}
function mergeUnique(values: string[], nextValues: string[]): string[] {
@ -41,6 +42,7 @@ export const TokenizedInput: React.FC<TokenizedInputProps> = ({
helperText,
removeLabelPrefix,
collapsedTextLimit = 3,
disabled = false,
}) => {
const [isFocused, setIsFocused] = useState(false);
const tokensRef = useRef<HTMLDivElement | null>(null);
@ -118,6 +120,7 @@ export const TokenizedInput: React.FC<TokenizedInputProps> = ({
}
}}
placeholder={placeholder}
disabled={disabled}
/>
<p className="text-xs text-muted-foreground">{helperText}</p>
{values.length > 0 ? (
@ -152,6 +155,7 @@ export const TokenizedInput: React.FC<TokenizedInputProps> = ({
variant="outline"
className="h-auto rounded-full px-2 py-1 text-xs text-muted-foreground"
aria-label={`${removeLabelPrefix} ${value}`}
disabled={disabled}
onPointerDown={(event) => event.preventDefault()}
onClick={() =>
onValuesChange(

View File

@ -1,7 +1,9 @@
import { TokenizedInput } from "@client/pages/orchestrator/TokenizedInput";
import { SettingsInput } from "@client/pages/settings/components/SettingsInput";
import type { ScoringValues } from "@client/pages/settings/types";
import type { UpdateSettingsInput } from "@shared/settings-schema.js";
import type React from "react";
import { useState } from "react";
import { Controller, useFormContext } from "react-hook-form";
import {
AccordionContent,
@ -17,6 +19,13 @@ type ScoringSettingsSectionProps = {
isSaving: boolean;
};
function parseTokenizedKeywordInput(input: string): string[] {
return input
.split(/[\n,]/g)
.map((value) => value.trim())
.filter(Boolean);
}
export const ScoringSettingsSection: React.FC<ScoringSettingsSectionProps> = ({
values,
isLoading,
@ -26,8 +35,11 @@ export const ScoringSettingsSection: React.FC<ScoringSettingsSectionProps> = ({
penalizeMissingSalary,
missingSalaryPenalty,
autoSkipScoreThreshold,
blockedCompanyKeywords,
} = values;
const { control, watch } = useFormContext<UpdateSettingsInput>();
const { control, watch, setValue } = useFormContext<UpdateSettingsInput>();
const [blockedCompanyKeywordDraft, setBlockedCompanyKeywordDraft] =
useState("");
// Watch the current form value to conditionally show/hide penalty input
const currentPenalizeEnabled =
@ -35,6 +47,8 @@ export const ScoringSettingsSection: React.FC<ScoringSettingsSectionProps> = ({
// Watch auto-skip threshold to show current value
const currentAutoSkipThreshold = watch("autoSkipScoreThreshold");
const blockedCompanyKeywordValues =
watch("blockedCompanyKeywords") ?? blockedCompanyKeywords.default;
return (
<AccordionItem value="scoring" className="border rounded-lg px-4">
@ -154,6 +168,41 @@ export const ScoringSettingsSection: React.FC<ScoringSettingsSectionProps> = ({
<Separator />
<div className="space-y-3">
<label
htmlFor="blocked-company-keywords"
className="text-sm font-medium leading-none"
>
Blocked Company Keywords
</label>
<TokenizedInput
id="blocked-company-keywords"
values={blockedCompanyKeywordValues}
draft={blockedCompanyKeywordDraft}
parseInput={parseTokenizedKeywordInput}
onDraftChange={setBlockedCompanyKeywordDraft}
onValuesChange={(value) =>
setValue("blockedCompanyKeywords", value, { shouldDirty: true })
}
placeholder='e.g. "recruitment", "staffing"'
helperText="Jobs whose company name contains one of these keywords will be dropped during discovery."
removeLabelPrefix="Remove blocked keyword"
disabled={isLoading || isSaving}
/>
<div className="break-words font-mono text-xs text-muted-foreground">
Effective:{" "}
{blockedCompanyKeywordValues.length > 0
? blockedCompanyKeywordValues.join(", ")
: "None"}{" "}
| Default:{" "}
{blockedCompanyKeywords.default.length > 0
? blockedCompanyKeywords.default.join(", ")
: "None"}
</div>
</div>
<Separator />
{/* Effective/Default values display */}
<div className="grid gap-2 text-sm sm:grid-cols-2">
<div>

View File

@ -48,4 +48,5 @@ export type ScoringValues = {
penalizeMissingSalary: EffectiveDefault<boolean>;
missingSalaryPenalty: EffectiveDefault<number>;
autoSkipScoreThreshold: EffectiveDefault<number | null>;
blockedCompanyKeywords: EffectiveDefault<string[]>;
};

View File

@ -76,12 +76,12 @@
:root {
--radius: 0.5rem;
--background: oklch(0.9818 0.0054 95.0986);
--foreground: oklch(0.3438 0.0269 95.7226);
--foreground: oklch(28.815% 0.01599 96.892);
--card: oklch(0.9818 0.0054 95.0986);
--card-foreground: oklch(0.1908 0.002 106.5859);
--popover: oklch(1 0 0);
--popover-foreground: oklch(0.2671 0.0196 98.939);
--primary: oklch(0.6171 0.1375 39.0427);
--primary: oklch(0.6916 0.1719 40.93);
--primary-foreground: oklch(1 0 0);
--secondary: oklch(0.9245 0.0138 92.9892);
--secondary-foreground: oklch(0.4334 0.0177 98.6048);
@ -143,7 +143,7 @@
--card-foreground: oklch(0.9818 0.0054 95.0986);
--popover: oklch(0.3085 0.0035 106.6039);
--popover-foreground: oklch(0.9211 0.004 106.4781);
--primary: oklch(0.6724 0.1308 38.7559);
--primary: oklch(0.6916 0.1719 40.93);
--primary-foreground: oklch(1 0 0);
--secondary: oklch(0.9818 0.0054 95.0986);
--secondary-foreground: oklch(0.3085 0.0035 106.6039);
@ -155,7 +155,7 @@
--border: oklch(0.3618 0.0101 106.8928);
--input: oklch(0.4336 0.0113 100.2195);
--ring: oklch(0.6724 0.1308 38.7559);
--chart-1: oklch(0.5583 0.1276 42.9956);
--chart-1: oklch(0.6916 0.1719 40.93);
--chart-2: oklch(0.6898 0.1581 290.4107);
--chart-3: oklch(0.213 0.0078 95.4245);
--chart-4: oklch(0.3074 0.0516 289.323);

View File

@ -646,6 +646,44 @@ describe("discoverJobsStep", () => {
expect(vi.mocked(ukVisa.runUkVisaJobs)).not.toHaveBeenCalled();
});
it("drops discovered jobs when employer matches blocked company keywords", async () => {
const settingsRepo = await import("../../repositories/settings");
const jobSpy = await import("../../services/jobspy");
vi.mocked(settingsRepo.getAllSettings).mockResolvedValue({
searchTerms: JSON.stringify(["engineer"]),
blockedCompanyKeywords: JSON.stringify(["recruit", "staffing"]),
} as any);
vi.mocked(jobSpy.runJobSpy).mockResolvedValue({
success: true,
jobs: [
{
source: "linkedin",
title: "Engineer",
employer: "Acme Staffing",
jobUrl: "https://example.com/job-1",
},
{
source: "linkedin",
title: "Engineer II",
employer: "Contoso",
jobUrl: "https://example.com/job-2",
},
],
} as any);
const result = await discoverJobsStep({
mergedConfig: {
...config,
sources: ["linkedin"],
},
});
expect(result.discoveredJobs).toHaveLength(1);
expect(result.discoveredJobs[0]?.employer).toBe("Contoso");
});
it("tracks source completion counters across source transitions", async () => {
const settingsRepo = await import("../../repositories/settings");
const jobSpy = await import("../../services/jobspy");

View File

@ -5,6 +5,7 @@ import {
isSourceAllowedForCountry,
normalizeCountryKey,
} from "@shared/location-support.js";
import { normalizeStringArray } from "@shared/normalize-string-array.js";
import { parseSearchCitiesSetting } from "@shared/search-cities.js";
import type { CreateJobInput, PipelineConfig } from "@shared/types";
import * as jobsRepo from "../../repositories/jobs";
@ -31,6 +32,31 @@ type DiscoverySourceTask = {
run: () => Promise<DiscoveryTaskResult>;
};
function parseBlockedCompanyKeywords(raw: string | undefined): string[] {
if (!raw) return [];
try {
const parsed = JSON.parse(raw);
if (!Array.isArray(parsed)) return [];
return normalizeStringArray(
parsed.filter((value): value is string => typeof value === "string"),
);
} catch {
return [];
}
}
function isBlockedEmployer(
employer: string | null | undefined,
blockedKeywordsLowerCase: string[],
): boolean {
if (!employer) return false;
if (blockedKeywordsLowerCase.length === 0) return false;
const normalizedEmployer = employer.toLowerCase();
return blockedKeywordsLowerCase.some((keyword) =>
normalizedEmployer.includes(keyword),
);
}
export async function discoverJobsStep(args: {
mergedConfig: PipelineConfig;
shouldCancel?: () => boolean;
@ -525,11 +551,41 @@ export async function discoverJobsStep(args: {
sourceErrors.push(...sourceResult.sourceErrors);
}
if (args.shouldCancel?.()) {
return { discoveredJobs, sourceErrors };
const blockedCompanyKeywords = parseBlockedCompanyKeywords(
settings.blockedCompanyKeywords,
);
const blockedKeywordsLowerCase = blockedCompanyKeywords.map((value) =>
value.toLowerCase(),
);
const filteredDiscoveredJobs = discoveredJobs.filter(
(job) => !isBlockedEmployer(job.employer, blockedKeywordsLowerCase),
);
const droppedCount = discoveredJobs.length - filteredDiscoveredJobs.length;
if (droppedCount > 0) {
const blockedCompanyKeywordsPreview = blockedCompanyKeywords.slice(0, 10);
const blockedCompanyKeywordsTruncated =
blockedCompanyKeywordsPreview.length < blockedCompanyKeywords.length;
logger.info("Dropped discovered jobs matching blocked company keywords", {
step: "discover-jobs",
droppedCount,
blockedKeywordCount: blockedCompanyKeywords.length,
blockedCompanyKeywordsPreview,
blockedCompanyKeywordsTruncated,
});
logger.debug("Full blocked company keywords used for filtering", {
step: "discover-jobs",
blockedCompanyKeywords,
});
}
if (discoveredJobs.length === 0 && sourceErrors.length > 0) {
if (args.shouldCancel?.()) {
return { discoveredJobs: filteredDiscoveredJobs, sourceErrors };
}
if (filteredDiscoveredJobs.length === 0 && sourceErrors.length > 0) {
throw new Error(`All sources failed: ${sourceErrors.join("; ")}`);
}
@ -537,7 +593,7 @@ export async function discoverJobsStep(args: {
logger.warn("Some discovery sources failed", { sourceErrors });
}
progressHelpers.crawlingComplete(discoveredJobs.length);
progressHelpers.crawlingComplete(filteredDiscoveredJobs.length);
return { discoveredJobs, sourceErrors };
return { discoveredJobs: filteredDiscoveredJobs, sourceErrors };
}

View File

@ -0,0 +1,22 @@
import { describe, expect, it } from "vitest";
import { normalizeStringArray } from "./normalize-string-array";
describe("normalizeStringArray", () => {
it("returns empty array for nullish/empty input", () => {
expect(normalizeStringArray(undefined)).toEqual([]);
expect(normalizeStringArray(null)).toEqual([]);
expect(normalizeStringArray([])).toEqual([]);
});
it("trims values and removes empty entries", () => {
expect(normalizeStringArray([" staffing ", " ", "\n"])).toEqual([
"staffing",
]);
});
it("deduplicates values case-insensitively while preserving first casing", () => {
expect(
normalizeStringArray(["Recruit", "staffing", "recruit", "STAFFING"]),
).toEqual(["Recruit", "staffing"]);
});
});

View File

@ -0,0 +1,21 @@
export function normalizeStringArray(
values: readonly string[] | null | undefined,
): string[] {
if (!values || values.length === 0) return [];
const seen = new Set<string>();
const normalized: string[] = [];
for (const value of values) {
const trimmed = value.trim();
if (!trimmed) continue;
const key = trimmed.toLowerCase();
if (seen.has(key)) continue;
seen.add(key);
normalized.push(trimmed);
}
return normalized;
}

View File

@ -177,6 +177,13 @@ export const settingsRegistry = {
parse: parseJsonArrayOrNull,
serialize: serializeNullableJsonArray,
},
blockedCompanyKeywords: {
kind: "typed" as const,
schema: z.array(z.string().trim().min(1).max(200)).max(200),
default: (): string[] => [],
parse: parseJsonArrayOrNull,
serialize: serializeNullableJsonArray,
},
searchCities: {
kind: "typed" as const,
schema: z.string().trim().max(100),

View File

@ -156,6 +156,11 @@ export const createAppSettings = (
default: ["Software Engineer"],
override: null,
},
blockedCompanyKeywords: {
value: [],
default: [],
override: null,
},
searchCities: {
value: "United Kingdom",
default: "United Kingdom",

View File

@ -123,6 +123,7 @@ export interface AppSettings {
adzunaMaxJobsPerTerm: Resolved<number>;
gradcrackerMaxJobsPerTerm: Resolved<number>;
searchTerms: Resolved<string[]>;
blockedCompanyKeywords: Resolved<string[]>;
searchCities: Resolved<string>;
jobspyResultsWanted: Resolved<number>;
jobspyCountryIndeed: Resolved<string>;