Feat/company blacklist tokenized input (#219)
* initial commit * docs mention! * Update orchestrator/src/server/pipeline/steps/discover-jobs.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * normalizeStringArray * poppier orange * comments --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
parent
60ca350da8
commit
cc7cacd7f5
@ -119,6 +119,7 @@ Readiness requires:
|
||||
- Penalize missing salary data
|
||||
- Set penalty amount
|
||||
- Optional auto-skip threshold for low-score jobs
|
||||
- Block jobs from companies that match configured keyword tokens
|
||||
|
||||
### Danger Zone
|
||||
|
||||
|
||||
@ -261,4 +261,38 @@ describe("SettingsPage", () => {
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("saves blocked company keywords from scoring settings", async () => {
|
||||
vi.mocked(api.getSettings).mockResolvedValue(baseSettings);
|
||||
vi.mocked(api.updateSettings).mockResolvedValue({
|
||||
...baseSettings,
|
||||
blockedCompanyKeywords: {
|
||||
value: ["staffing"],
|
||||
default: [],
|
||||
override: ["staffing"],
|
||||
},
|
||||
});
|
||||
|
||||
renderPage();
|
||||
|
||||
const scoringTrigger = await screen.findByRole("button", {
|
||||
name: /scoring settings/i,
|
||||
});
|
||||
fireEvent.click(scoringTrigger);
|
||||
|
||||
const input = screen.getByPlaceholderText('e.g. "recruitment", "staffing"');
|
||||
fireEvent.change(input, { target: { value: "staffing" } });
|
||||
fireEvent.keyDown(input, { key: "Enter" });
|
||||
|
||||
const saveButton = screen.getByRole("button", { name: /^save$/i });
|
||||
await waitFor(() => expect(saveButton).toBeEnabled());
|
||||
fireEvent.click(saveButton);
|
||||
|
||||
await waitFor(() => expect(api.updateSettings).toHaveBeenCalled());
|
||||
expect(api.updateSettings).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
blockedCompanyKeywords: ["staffing"],
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@ -18,6 +18,7 @@ import {
|
||||
resumeProjectsEqual,
|
||||
} from "@client/pages/settings/utils";
|
||||
import { zodResolver } from "@hookform/resolvers/zod";
|
||||
import { normalizeStringArray } from "@shared/normalize-string-array.js";
|
||||
import {
|
||||
type UpdateSettingsInput,
|
||||
updateSettingsSchema,
|
||||
@ -72,6 +73,7 @@ const DEFAULT_FORM_VALUES: UpdateSettingsInput = {
|
||||
penalizeMissingSalary: null,
|
||||
missingSalaryPenalty: null,
|
||||
autoSkipScoreThreshold: null,
|
||||
blockedCompanyKeywords: [],
|
||||
};
|
||||
|
||||
type LlmProviderValue = LlmProviderId | null;
|
||||
@ -114,6 +116,7 @@ const NULL_SETTINGS_PAYLOAD: UpdateSettingsInput = {
|
||||
penalizeMissingSalary: null,
|
||||
missingSalaryPenalty: null,
|
||||
autoSkipScoreThreshold: null,
|
||||
blockedCompanyKeywords: null,
|
||||
};
|
||||
|
||||
const mapSettingsToForm = (data: AppSettings): UpdateSettingsInput => ({
|
||||
@ -149,6 +152,7 @@ const mapSettingsToForm = (data: AppSettings): UpdateSettingsInput => ({
|
||||
penalizeMissingSalary: data.penalizeMissingSalary.override,
|
||||
missingSalaryPenalty: data.missingSalaryPenalty.override,
|
||||
autoSkipScoreThreshold: data.autoSkipScoreThreshold.override,
|
||||
blockedCompanyKeywords: data.blockedCompanyKeywords.override ?? [],
|
||||
});
|
||||
|
||||
const normalizeString = (value: string | null | undefined) => {
|
||||
@ -162,6 +166,11 @@ const normalizePrivateInput = (value: string | null | undefined) => {
|
||||
return trimmed || undefined;
|
||||
};
|
||||
|
||||
const stringArraysEqual = (left: string[], right: string[]): boolean => {
|
||||
if (left.length !== right.length) return false;
|
||||
return left.every((value, index) => value === right[index]);
|
||||
};
|
||||
|
||||
const nullIfSame = <T,>(value: T | null | undefined, defaultValue: T) =>
|
||||
value === defaultValue ? null : (value ?? null);
|
||||
|
||||
@ -291,6 +300,10 @@ const getDerivedSettings = (settings: AppSettings | null) => {
|
||||
effective: settings?.autoSkipScoreThreshold?.value ?? null,
|
||||
default: settings?.autoSkipScoreThreshold?.default ?? null,
|
||||
},
|
||||
blockedCompanyKeywords: {
|
||||
effective: settings?.blockedCompanyKeywords?.value ?? [],
|
||||
default: settings?.blockedCompanyKeywords?.default ?? [],
|
||||
},
|
||||
},
|
||||
};
|
||||
};
|
||||
@ -627,6 +640,15 @@ export const SettingsPage: React.FC = () => {
|
||||
data.missingSalaryPenalty,
|
||||
scoring.missingSalaryPenalty.default,
|
||||
),
|
||||
blockedCompanyKeywords: (() => {
|
||||
const normalized = normalizeStringArray(data.blockedCompanyKeywords);
|
||||
const normalizedDefault = normalizeStringArray(
|
||||
scoring.blockedCompanyKeywords.default,
|
||||
);
|
||||
return stringArraysEqual(normalized, normalizedDefault)
|
||||
? null
|
||||
: normalized;
|
||||
})(),
|
||||
...envPayload,
|
||||
};
|
||||
|
||||
|
||||
@ -16,6 +16,7 @@ interface TokenizedInputProps {
|
||||
helperText: string;
|
||||
removeLabelPrefix: string;
|
||||
collapsedTextLimit?: number;
|
||||
disabled?: boolean;
|
||||
}
|
||||
|
||||
function mergeUnique(values: string[], nextValues: string[]): string[] {
|
||||
@ -41,6 +42,7 @@ export const TokenizedInput: React.FC<TokenizedInputProps> = ({
|
||||
helperText,
|
||||
removeLabelPrefix,
|
||||
collapsedTextLimit = 3,
|
||||
disabled = false,
|
||||
}) => {
|
||||
const [isFocused, setIsFocused] = useState(false);
|
||||
const tokensRef = useRef<HTMLDivElement | null>(null);
|
||||
@ -118,6 +120,7 @@ export const TokenizedInput: React.FC<TokenizedInputProps> = ({
|
||||
}
|
||||
}}
|
||||
placeholder={placeholder}
|
||||
disabled={disabled}
|
||||
/>
|
||||
<p className="text-xs text-muted-foreground">{helperText}</p>
|
||||
{values.length > 0 ? (
|
||||
@ -152,6 +155,7 @@ export const TokenizedInput: React.FC<TokenizedInputProps> = ({
|
||||
variant="outline"
|
||||
className="h-auto rounded-full px-2 py-1 text-xs text-muted-foreground"
|
||||
aria-label={`${removeLabelPrefix} ${value}`}
|
||||
disabled={disabled}
|
||||
onPointerDown={(event) => event.preventDefault()}
|
||||
onClick={() =>
|
||||
onValuesChange(
|
||||
|
||||
@ -1,7 +1,9 @@
|
||||
import { TokenizedInput } from "@client/pages/orchestrator/TokenizedInput";
|
||||
import { SettingsInput } from "@client/pages/settings/components/SettingsInput";
|
||||
import type { ScoringValues } from "@client/pages/settings/types";
|
||||
import type { UpdateSettingsInput } from "@shared/settings-schema.js";
|
||||
import type React from "react";
|
||||
import { useState } from "react";
|
||||
import { Controller, useFormContext } from "react-hook-form";
|
||||
import {
|
||||
AccordionContent,
|
||||
@ -17,6 +19,13 @@ type ScoringSettingsSectionProps = {
|
||||
isSaving: boolean;
|
||||
};
|
||||
|
||||
function parseTokenizedKeywordInput(input: string): string[] {
|
||||
return input
|
||||
.split(/[\n,]/g)
|
||||
.map((value) => value.trim())
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
export const ScoringSettingsSection: React.FC<ScoringSettingsSectionProps> = ({
|
||||
values,
|
||||
isLoading,
|
||||
@ -26,8 +35,11 @@ export const ScoringSettingsSection: React.FC<ScoringSettingsSectionProps> = ({
|
||||
penalizeMissingSalary,
|
||||
missingSalaryPenalty,
|
||||
autoSkipScoreThreshold,
|
||||
blockedCompanyKeywords,
|
||||
} = values;
|
||||
const { control, watch } = useFormContext<UpdateSettingsInput>();
|
||||
const { control, watch, setValue } = useFormContext<UpdateSettingsInput>();
|
||||
const [blockedCompanyKeywordDraft, setBlockedCompanyKeywordDraft] =
|
||||
useState("");
|
||||
|
||||
// Watch the current form value to conditionally show/hide penalty input
|
||||
const currentPenalizeEnabled =
|
||||
@ -35,6 +47,8 @@ export const ScoringSettingsSection: React.FC<ScoringSettingsSectionProps> = ({
|
||||
|
||||
// Watch auto-skip threshold to show current value
|
||||
const currentAutoSkipThreshold = watch("autoSkipScoreThreshold");
|
||||
const blockedCompanyKeywordValues =
|
||||
watch("blockedCompanyKeywords") ?? blockedCompanyKeywords.default;
|
||||
|
||||
return (
|
||||
<AccordionItem value="scoring" className="border rounded-lg px-4">
|
||||
@ -154,6 +168,41 @@ export const ScoringSettingsSection: React.FC<ScoringSettingsSectionProps> = ({
|
||||
|
||||
<Separator />
|
||||
|
||||
<div className="space-y-3">
|
||||
<label
|
||||
htmlFor="blocked-company-keywords"
|
||||
className="text-sm font-medium leading-none"
|
||||
>
|
||||
Blocked Company Keywords
|
||||
</label>
|
||||
<TokenizedInput
|
||||
id="blocked-company-keywords"
|
||||
values={blockedCompanyKeywordValues}
|
||||
draft={blockedCompanyKeywordDraft}
|
||||
parseInput={parseTokenizedKeywordInput}
|
||||
onDraftChange={setBlockedCompanyKeywordDraft}
|
||||
onValuesChange={(value) =>
|
||||
setValue("blockedCompanyKeywords", value, { shouldDirty: true })
|
||||
}
|
||||
placeholder='e.g. "recruitment", "staffing"'
|
||||
helperText="Jobs whose company name contains one of these keywords will be dropped during discovery."
|
||||
removeLabelPrefix="Remove blocked keyword"
|
||||
disabled={isLoading || isSaving}
|
||||
/>
|
||||
<div className="break-words font-mono text-xs text-muted-foreground">
|
||||
Effective:{" "}
|
||||
{blockedCompanyKeywordValues.length > 0
|
||||
? blockedCompanyKeywordValues.join(", ")
|
||||
: "None"}{" "}
|
||||
| Default:{" "}
|
||||
{blockedCompanyKeywords.default.length > 0
|
||||
? blockedCompanyKeywords.default.join(", ")
|
||||
: "None"}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<Separator />
|
||||
|
||||
{/* Effective/Default values display */}
|
||||
<div className="grid gap-2 text-sm sm:grid-cols-2">
|
||||
<div>
|
||||
|
||||
@ -48,4 +48,5 @@ export type ScoringValues = {
|
||||
penalizeMissingSalary: EffectiveDefault<boolean>;
|
||||
missingSalaryPenalty: EffectiveDefault<number>;
|
||||
autoSkipScoreThreshold: EffectiveDefault<number | null>;
|
||||
blockedCompanyKeywords: EffectiveDefault<string[]>;
|
||||
};
|
||||
|
||||
@ -76,12 +76,12 @@
|
||||
:root {
|
||||
--radius: 0.5rem;
|
||||
--background: oklch(0.9818 0.0054 95.0986);
|
||||
--foreground: oklch(0.3438 0.0269 95.7226);
|
||||
--foreground: oklch(28.815% 0.01599 96.892);
|
||||
--card: oklch(0.9818 0.0054 95.0986);
|
||||
--card-foreground: oklch(0.1908 0.002 106.5859);
|
||||
--popover: oklch(1 0 0);
|
||||
--popover-foreground: oklch(0.2671 0.0196 98.939);
|
||||
--primary: oklch(0.6171 0.1375 39.0427);
|
||||
--primary: oklch(0.6916 0.1719 40.93);
|
||||
--primary-foreground: oklch(1 0 0);
|
||||
--secondary: oklch(0.9245 0.0138 92.9892);
|
||||
--secondary-foreground: oklch(0.4334 0.0177 98.6048);
|
||||
@ -143,7 +143,7 @@
|
||||
--card-foreground: oklch(0.9818 0.0054 95.0986);
|
||||
--popover: oklch(0.3085 0.0035 106.6039);
|
||||
--popover-foreground: oklch(0.9211 0.004 106.4781);
|
||||
--primary: oklch(0.6724 0.1308 38.7559);
|
||||
--primary: oklch(0.6916 0.1719 40.93);
|
||||
--primary-foreground: oklch(1 0 0);
|
||||
--secondary: oklch(0.9818 0.0054 95.0986);
|
||||
--secondary-foreground: oklch(0.3085 0.0035 106.6039);
|
||||
@ -155,7 +155,7 @@
|
||||
--border: oklch(0.3618 0.0101 106.8928);
|
||||
--input: oklch(0.4336 0.0113 100.2195);
|
||||
--ring: oklch(0.6724 0.1308 38.7559);
|
||||
--chart-1: oklch(0.5583 0.1276 42.9956);
|
||||
--chart-1: oklch(0.6916 0.1719 40.93);
|
||||
--chart-2: oklch(0.6898 0.1581 290.4107);
|
||||
--chart-3: oklch(0.213 0.0078 95.4245);
|
||||
--chart-4: oklch(0.3074 0.0516 289.323);
|
||||
|
||||
@ -646,6 +646,44 @@ describe("discoverJobsStep", () => {
|
||||
expect(vi.mocked(ukVisa.runUkVisaJobs)).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("drops discovered jobs when employer matches blocked company keywords", async () => {
|
||||
const settingsRepo = await import("../../repositories/settings");
|
||||
const jobSpy = await import("../../services/jobspy");
|
||||
|
||||
vi.mocked(settingsRepo.getAllSettings).mockResolvedValue({
|
||||
searchTerms: JSON.stringify(["engineer"]),
|
||||
blockedCompanyKeywords: JSON.stringify(["recruit", "staffing"]),
|
||||
} as any);
|
||||
|
||||
vi.mocked(jobSpy.runJobSpy).mockResolvedValue({
|
||||
success: true,
|
||||
jobs: [
|
||||
{
|
||||
source: "linkedin",
|
||||
title: "Engineer",
|
||||
employer: "Acme Staffing",
|
||||
jobUrl: "https://example.com/job-1",
|
||||
},
|
||||
{
|
||||
source: "linkedin",
|
||||
title: "Engineer II",
|
||||
employer: "Contoso",
|
||||
jobUrl: "https://example.com/job-2",
|
||||
},
|
||||
],
|
||||
} as any);
|
||||
|
||||
const result = await discoverJobsStep({
|
||||
mergedConfig: {
|
||||
...config,
|
||||
sources: ["linkedin"],
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.discoveredJobs).toHaveLength(1);
|
||||
expect(result.discoveredJobs[0]?.employer).toBe("Contoso");
|
||||
});
|
||||
|
||||
it("tracks source completion counters across source transitions", async () => {
|
||||
const settingsRepo = await import("../../repositories/settings");
|
||||
const jobSpy = await import("../../services/jobspy");
|
||||
|
||||
@ -5,6 +5,7 @@ import {
|
||||
isSourceAllowedForCountry,
|
||||
normalizeCountryKey,
|
||||
} from "@shared/location-support.js";
|
||||
import { normalizeStringArray } from "@shared/normalize-string-array.js";
|
||||
import { parseSearchCitiesSetting } from "@shared/search-cities.js";
|
||||
import type { CreateJobInput, PipelineConfig } from "@shared/types";
|
||||
import * as jobsRepo from "../../repositories/jobs";
|
||||
@ -31,6 +32,31 @@ type DiscoverySourceTask = {
|
||||
run: () => Promise<DiscoveryTaskResult>;
|
||||
};
|
||||
|
||||
function parseBlockedCompanyKeywords(raw: string | undefined): string[] {
|
||||
if (!raw) return [];
|
||||
try {
|
||||
const parsed = JSON.parse(raw);
|
||||
if (!Array.isArray(parsed)) return [];
|
||||
return normalizeStringArray(
|
||||
parsed.filter((value): value is string => typeof value === "string"),
|
||||
);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
function isBlockedEmployer(
|
||||
employer: string | null | undefined,
|
||||
blockedKeywordsLowerCase: string[],
|
||||
): boolean {
|
||||
if (!employer) return false;
|
||||
if (blockedKeywordsLowerCase.length === 0) return false;
|
||||
const normalizedEmployer = employer.toLowerCase();
|
||||
return blockedKeywordsLowerCase.some((keyword) =>
|
||||
normalizedEmployer.includes(keyword),
|
||||
);
|
||||
}
|
||||
|
||||
export async function discoverJobsStep(args: {
|
||||
mergedConfig: PipelineConfig;
|
||||
shouldCancel?: () => boolean;
|
||||
@ -525,11 +551,41 @@ export async function discoverJobsStep(args: {
|
||||
sourceErrors.push(...sourceResult.sourceErrors);
|
||||
}
|
||||
|
||||
if (args.shouldCancel?.()) {
|
||||
return { discoveredJobs, sourceErrors };
|
||||
const blockedCompanyKeywords = parseBlockedCompanyKeywords(
|
||||
settings.blockedCompanyKeywords,
|
||||
);
|
||||
const blockedKeywordsLowerCase = blockedCompanyKeywords.map((value) =>
|
||||
value.toLowerCase(),
|
||||
);
|
||||
const filteredDiscoveredJobs = discoveredJobs.filter(
|
||||
(job) => !isBlockedEmployer(job.employer, blockedKeywordsLowerCase),
|
||||
);
|
||||
const droppedCount = discoveredJobs.length - filteredDiscoveredJobs.length;
|
||||
|
||||
if (droppedCount > 0) {
|
||||
const blockedCompanyKeywordsPreview = blockedCompanyKeywords.slice(0, 10);
|
||||
const blockedCompanyKeywordsTruncated =
|
||||
blockedCompanyKeywordsPreview.length < blockedCompanyKeywords.length;
|
||||
|
||||
logger.info("Dropped discovered jobs matching blocked company keywords", {
|
||||
step: "discover-jobs",
|
||||
droppedCount,
|
||||
blockedKeywordCount: blockedCompanyKeywords.length,
|
||||
blockedCompanyKeywordsPreview,
|
||||
blockedCompanyKeywordsTruncated,
|
||||
});
|
||||
|
||||
logger.debug("Full blocked company keywords used for filtering", {
|
||||
step: "discover-jobs",
|
||||
blockedCompanyKeywords,
|
||||
});
|
||||
}
|
||||
|
||||
if (discoveredJobs.length === 0 && sourceErrors.length > 0) {
|
||||
if (args.shouldCancel?.()) {
|
||||
return { discoveredJobs: filteredDiscoveredJobs, sourceErrors };
|
||||
}
|
||||
|
||||
if (filteredDiscoveredJobs.length === 0 && sourceErrors.length > 0) {
|
||||
throw new Error(`All sources failed: ${sourceErrors.join("; ")}`);
|
||||
}
|
||||
|
||||
@ -537,7 +593,7 @@ export async function discoverJobsStep(args: {
|
||||
logger.warn("Some discovery sources failed", { sourceErrors });
|
||||
}
|
||||
|
||||
progressHelpers.crawlingComplete(discoveredJobs.length);
|
||||
progressHelpers.crawlingComplete(filteredDiscoveredJobs.length);
|
||||
|
||||
return { discoveredJobs, sourceErrors };
|
||||
return { discoveredJobs: filteredDiscoveredJobs, sourceErrors };
|
||||
}
|
||||
|
||||
22
shared/src/normalize-string-array.test.ts
Normal file
22
shared/src/normalize-string-array.test.ts
Normal file
@ -0,0 +1,22 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { normalizeStringArray } from "./normalize-string-array";
|
||||
|
||||
describe("normalizeStringArray", () => {
|
||||
it("returns empty array for nullish/empty input", () => {
|
||||
expect(normalizeStringArray(undefined)).toEqual([]);
|
||||
expect(normalizeStringArray(null)).toEqual([]);
|
||||
expect(normalizeStringArray([])).toEqual([]);
|
||||
});
|
||||
|
||||
it("trims values and removes empty entries", () => {
|
||||
expect(normalizeStringArray([" staffing ", " ", "\n"])).toEqual([
|
||||
"staffing",
|
||||
]);
|
||||
});
|
||||
|
||||
it("deduplicates values case-insensitively while preserving first casing", () => {
|
||||
expect(
|
||||
normalizeStringArray(["Recruit", "staffing", "recruit", "STAFFING"]),
|
||||
).toEqual(["Recruit", "staffing"]);
|
||||
});
|
||||
});
|
||||
21
shared/src/normalize-string-array.ts
Normal file
21
shared/src/normalize-string-array.ts
Normal file
@ -0,0 +1,21 @@
|
||||
export function normalizeStringArray(
|
||||
values: readonly string[] | null | undefined,
|
||||
): string[] {
|
||||
if (!values || values.length === 0) return [];
|
||||
|
||||
const seen = new Set<string>();
|
||||
const normalized: string[] = [];
|
||||
|
||||
for (const value of values) {
|
||||
const trimmed = value.trim();
|
||||
if (!trimmed) continue;
|
||||
|
||||
const key = trimmed.toLowerCase();
|
||||
if (seen.has(key)) continue;
|
||||
|
||||
seen.add(key);
|
||||
normalized.push(trimmed);
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
@ -177,6 +177,13 @@ export const settingsRegistry = {
|
||||
parse: parseJsonArrayOrNull,
|
||||
serialize: serializeNullableJsonArray,
|
||||
},
|
||||
blockedCompanyKeywords: {
|
||||
kind: "typed" as const,
|
||||
schema: z.array(z.string().trim().min(1).max(200)).max(200),
|
||||
default: (): string[] => [],
|
||||
parse: parseJsonArrayOrNull,
|
||||
serialize: serializeNullableJsonArray,
|
||||
},
|
||||
searchCities: {
|
||||
kind: "typed" as const,
|
||||
schema: z.string().trim().max(100),
|
||||
|
||||
@ -156,6 +156,11 @@ export const createAppSettings = (
|
||||
default: ["Software Engineer"],
|
||||
override: null,
|
||||
},
|
||||
blockedCompanyKeywords: {
|
||||
value: [],
|
||||
default: [],
|
||||
override: null,
|
||||
},
|
||||
searchCities: {
|
||||
value: "United Kingdom",
|
||||
default: "United Kingdom",
|
||||
|
||||
@ -123,6 +123,7 @@ export interface AppSettings {
|
||||
adzunaMaxJobsPerTerm: Resolved<number>;
|
||||
gradcrackerMaxJobsPerTerm: Resolved<number>;
|
||||
searchTerms: Resolved<string[]>;
|
||||
blockedCompanyKeywords: Resolved<string[]>;
|
||||
searchCities: Resolved<string>;
|
||||
jobspyResultsWanted: Resolved<number>;
|
||||
jobspyCountryIndeed: Resolved<string>;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user