Jobber/orchestrator/src/server/pipeline/steps/discover-jobs.test.ts
ilia 5401f384c1
Some checks failed
CI / Linting (Biome) (push) Failing after 41s
CI / Tests (push) Successful in 5m19s
CI / Type Check (adzuna-extractor) (push) Successful in 1m10s
CI / Type Check (gradcracker-extractor) (push) Successful in 1m13s
CI / Type Check (hiringcafe-extractor) (push) Successful in 1m10s
CI / Type Check (orchestrator) (push) Successful in 1m27s
CI / Type Check (startupjobs-extractor) (push) Successful in 1m10s
CI / Type Check (ukvisajobs-extractor) (push) Successful in 1m10s
CI / Documentation (push) Successful in 1m59s
fix(discovery): enforce search-country allow-list when Canada (etc.) is selected
Reject vague Remote/Worldwide and any non-selected country at ingest; hide mismatched jobs in the UI and stop bypassing country filters for remote listings.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-16 18:34:35 -04:00

655 lines
20 KiB
TypeScript

import type { PipelineConfig } from "@shared/types";
import { beforeEach, describe, expect, it, vi } from "vitest";
import { getProgress, resetProgress } from "../progress";
import { discoverJobsStep } from "./discover-jobs";
vi.mock("@server/repositories/settings", () => ({
getAllSettings: vi.fn(),
}));
vi.mock("@server/repositories/jobs", () => ({
getAllJobUrls: vi.fn().mockResolvedValue([]),
}));
vi.mock("@server/repositories/profiles", () => ({
getProfileById: vi.fn().mockResolvedValue(null),
}));
vi.mock("@server/extractors/registry", () => ({
getExtractorRegistry: vi.fn(),
}));
const baseConfig: PipelineConfig = {
topN: 10,
minSuitabilityScore: 50,
sources: ["indeed", "linkedin", "ukvisajobs"],
outputDir: "./tmp",
ownerProfileId: "__default__",
enableCrawling: true,
enableScoring: true,
enableImporting: true,
enableAutoTailoring: true,
};
describe("discoverJobsStep", () => {
beforeEach(() => {
vi.clearAllMocks();
resetProgress();
});
it("aggregates source errors for enabled sources", async () => {
const settingsRepo = await import("@server/repositories/settings");
const registryModule = await import("@server/extractors/registry");
const jobspyManifest = {
id: "jobspy",
displayName: "JobSpy",
providesSources: ["indeed", "linkedin", "glassdoor"],
run: vi.fn().mockResolvedValue({
success: true,
jobs: [
{
source: "linkedin",
title: "Engineer",
employer: "ACME",
jobUrl: "https://example.com/job",
},
],
}),
};
const ukvisaManifest = {
id: "ukvisajobs",
displayName: "UK Visa Jobs",
providesSources: ["ukvisajobs"],
run: vi.fn().mockResolvedValue({
success: false,
jobs: [],
error: "login failed",
}),
};
vi.mocked(settingsRepo.getAllSettings).mockResolvedValue({
searchTerms: JSON.stringify(["engineer"]),
} as any);
vi.mocked(registryModule.getExtractorRegistry).mockResolvedValue({
manifests: new Map([
["jobspy", jobspyManifest as any],
["ukvisajobs", ukvisaManifest as any],
]),
manifestBySource: new Map([
["indeed", jobspyManifest as any],
["linkedin", jobspyManifest as any],
["glassdoor", jobspyManifest as any],
["ukvisajobs", ukvisaManifest as any],
]),
availableSources: ["indeed", "linkedin", "glassdoor", "ukvisajobs"],
} as any);
const result = await discoverJobsStep({ mergedConfig: baseConfig });
expect(result.discoveredJobs).toHaveLength(1);
expect(result.discoveredJobs[0]?.ownerProfileId).toBe("__default__");
expect(result.sourceErrors).toEqual([
"UK Visa Jobs: login failed (sources: ukvisajobs)",
]);
expect(jobspyManifest.run).toHaveBeenCalledWith(
expect.objectContaining({ selectedSources: ["indeed", "linkedin"] }),
);
});
it("aligns JobSpy Indeed country to country-level search geography when settings disagree", async () => {
const settingsRepo = await import("@server/repositories/settings");
const registryModule = await import("@server/extractors/registry");
const jobspyManifest = {
id: "jobspy",
displayName: "JobSpy",
providesSources: ["indeed", "linkedin", "glassdoor"],
run: vi.fn().mockResolvedValue({
success: true,
jobs: [
{
source: "linkedin",
title: "Engineer",
employer: "ACME",
jobUrl: "https://example.com/job",
},
],
}),
};
vi.mocked(settingsRepo.getAllSettings).mockResolvedValue({
searchTerms: JSON.stringify(["engineer"]),
searchCities: "UK",
jobspyCountryIndeed: "united states",
} as any);
vi.mocked(registryModule.getExtractorRegistry).mockResolvedValue({
manifests: new Map([["jobspy", jobspyManifest as any]]),
manifestBySource: new Map([
["indeed", jobspyManifest as any],
["linkedin", jobspyManifest as any],
["glassdoor", jobspyManifest as any],
]),
availableSources: ["indeed", "linkedin", "glassdoor"],
} as any);
await discoverJobsStep({
mergedConfig: { ...baseConfig, sources: ["indeed", "linkedin"] },
});
expect(jobspyManifest.run).toHaveBeenCalledWith(
expect.objectContaining({
settings: expect.objectContaining({
jobspyCountryIndeed: "united kingdom",
}),
}),
);
});
it("throws when all enabled sources fail", async () => {
const settingsRepo = await import("@server/repositories/settings");
const registryModule = await import("@server/extractors/registry");
const ukvisaManifest = {
id: "ukvisajobs",
displayName: "UK Visa Jobs",
providesSources: ["ukvisajobs"],
run: vi.fn().mockResolvedValue({
success: false,
jobs: [],
error: "boom",
}),
};
vi.mocked(settingsRepo.getAllSettings).mockResolvedValue({
searchTerms: JSON.stringify(["engineer"]),
} as any);
vi.mocked(registryModule.getExtractorRegistry).mockResolvedValue({
manifests: new Map([["ukvisajobs", ukvisaManifest as any]]),
manifestBySource: new Map([["ukvisajobs", ukvisaManifest as any]]),
availableSources: ["ukvisajobs"],
} as any);
await expect(
discoverJobsStep({
mergedConfig: {
...baseConfig,
sources: ["ukvisajobs"],
},
}),
).rejects.toThrow(
"All sources failed: UK Visa Jobs: boom (sources: ukvisajobs)",
);
});
it("throws when all requested sources are incompatible for country", async () => {
const settingsRepo = await import("@server/repositories/settings");
const registryModule = await import("@server/extractors/registry");
vi.mocked(settingsRepo.getAllSettings).mockResolvedValue({
searchTerms: JSON.stringify(["engineer"]),
jobspyCountryIndeed: "united states",
} as any);
vi.mocked(registryModule.getExtractorRegistry).mockResolvedValue({
manifests: new Map(),
manifestBySource: new Map(),
availableSources: [],
} as any);
await expect(
discoverJobsStep({
mergedConfig: {
...baseConfig,
sources: ["gradcracker", "ukvisajobs"],
},
}),
).rejects.toThrow(
"No compatible sources for selected country: United States",
);
});
it("does not throw when no sources are requested", async () => {
const settingsRepo = await import("@server/repositories/settings");
const registryModule = await import("@server/extractors/registry");
vi.mocked(settingsRepo.getAllSettings).mockResolvedValue({
searchTerms: JSON.stringify(["engineer"]),
jobspyCountryIndeed: "united states",
} as any);
vi.mocked(registryModule.getExtractorRegistry).mockResolvedValue({
manifests: new Map(),
manifestBySource: new Map(),
availableSources: [],
} as any);
const result = await discoverJobsStep({
mergedConfig: {
...baseConfig,
sources: [],
},
});
expect(result.discoveredJobs).toEqual([]);
expect(result.sourceErrors).toEqual([]);
});
it("drops discovered jobs when employer matches blocked company keywords", async () => {
const settingsRepo = await import("@server/repositories/settings");
const registryModule = await import("@server/extractors/registry");
const jobspyManifest = {
id: "jobspy",
displayName: "JobSpy",
providesSources: ["indeed", "linkedin", "glassdoor"],
run: vi.fn().mockResolvedValue({
success: true,
jobs: [
{
source: "linkedin",
title: "Engineer",
employer: "Acme Staffing",
jobUrl: "https://example.com/job-1",
},
{
source: "linkedin",
title: "Engineer II",
employer: "Contoso",
jobUrl: "https://example.com/job-2",
},
],
}),
};
vi.mocked(settingsRepo.getAllSettings).mockResolvedValue({
searchTerms: JSON.stringify(["engineer"]),
blockedCompanyKeywords: JSON.stringify(["recruit", "staffing"]),
} as any);
vi.mocked(registryModule.getExtractorRegistry).mockResolvedValue({
manifests: new Map([["jobspy", jobspyManifest as any]]),
manifestBySource: new Map([
["indeed", jobspyManifest as any],
["linkedin", jobspyManifest as any],
["glassdoor", jobspyManifest as any],
]),
availableSources: ["indeed", "linkedin", "glassdoor"],
} as any);
const result = await discoverJobsStep({
mergedConfig: {
...baseConfig,
sources: ["linkedin"],
},
});
expect(result.discoveredJobs).toHaveLength(1);
expect(result.discoveredJobs[0]?.employer).toBe("Contoso");
});
it("drops discovered jobs when blocked keywords use legacy comma-separated storage", async () => {
const settingsRepo = await import("@server/repositories/settings");
const registryModule = await import("@server/extractors/registry");
const jobspyManifest = {
id: "jobspy",
displayName: "JobSpy",
providesSources: ["linkedin"],
run: vi.fn().mockResolvedValue({
success: true,
jobs: [
{
source: "linkedin",
title: "Engineer",
employer: "Acme Staffing",
jobUrl: "https://example.com/job-legacy",
},
],
}),
};
vi.mocked(settingsRepo.getAllSettings).mockResolvedValue({
searchTerms: JSON.stringify(["engineer"]),
blockedCompanyKeywords: "staffing, irrelevant",
} as any);
vi.mocked(registryModule.getExtractorRegistry).mockResolvedValue({
manifests: new Map([["jobspy", jobspyManifest as any]]),
manifestBySource: new Map([["linkedin", jobspyManifest as any]]),
availableSources: ["linkedin"],
} as any);
const result = await discoverJobsStep({
mergedConfig: {
...baseConfig,
sources: ["linkedin"],
},
});
expect(result.discoveredJobs).toHaveLength(0);
});
it("drops discovered jobs when location is in a blocked country", async () => {
const settingsRepo = await import("@server/repositories/settings");
const registryModule = await import("@server/extractors/registry");
const jobspyManifest = {
id: "jobspy",
displayName: "JobSpy",
providesSources: ["linkedin"],
run: vi.fn().mockResolvedValue({
success: true,
jobs: [
{
source: "linkedin",
title: "SDET",
employer: "Acme",
location: "Bangalore, India",
jobUrl: "https://example.com/job-in",
},
{
source: "linkedin",
title: "SDET",
employer: "Contoso",
location: "Toronto, ON, Canada",
jobUrl: "https://example.com/job-ca",
},
{
source: "linkedin",
title: "SDET",
employer: "Remote Co",
location: "Remote",
jobUrl: "https://example.com/job-remote",
},
],
}),
};
vi.mocked(settingsRepo.getAllSettings).mockResolvedValue({
searchTerms: JSON.stringify(["sdet"]),
searchCities: "Canada",
blockedCountries: JSON.stringify(["india"]),
} as any);
vi.mocked(registryModule.getExtractorRegistry).mockResolvedValue({
manifests: new Map([["jobspy", jobspyManifest as any]]),
manifestBySource: new Map([["linkedin", jobspyManifest as any]]),
availableSources: ["linkedin"],
} as any);
const result = await discoverJobsStep({
mergedConfig: {
...baseConfig,
sources: ["linkedin"],
},
});
expect(result.discoveredJobs).toHaveLength(1);
expect(result.discoveredJobs[0]?.jobUrl).toBe("https://example.com/job-ca");
});
it("drops co-op titles via company skip list and coop deal-breaker token", async () => {
const settingsRepo = await import("@server/repositories/settings");
const registryModule = await import("@server/extractors/registry");
const bcManifest = {
id: "bctenet",
displayName: "BC T-Net",
providesSources: ["bctenet"],
run: vi.fn().mockResolvedValue({
success: true,
jobs: [
{
source: "bctenet",
title: "Co-op Software Development Engineer in Test (SDET)",
employer: "Global Relay Communications Inc.",
location: "British Columbia, Canada",
jobUrl: "https://example.com/job-coop",
},
{
source: "bctenet",
title: "SDET",
employer: "Contoso",
location: "Vancouver, BC, Canada",
jobUrl: "https://example.com/job-sdet",
},
],
}),
};
vi.mocked(settingsRepo.getAllSettings).mockResolvedValue({
searchTerms: JSON.stringify(["sdet"]),
searchCities: "Canada",
blockedCompanyKeywords: JSON.stringify(["co-op"]),
} as any);
vi.mocked(registryModule.getExtractorRegistry).mockResolvedValue({
manifests: new Map([["bctenet", bcManifest as any]]),
manifestBySource: new Map([["bctenet", bcManifest as any]]),
availableSources: ["bctenet"],
} as any);
const result = await discoverJobsStep({
mergedConfig: {
...baseConfig,
sources: ["bctenet"],
},
});
expect(result.discoveredJobs).toHaveLength(1);
expect(result.discoveredJobs[0]?.jobUrl).toBe("https://example.com/job-sdet");
});
it("drops jobs with blocked country in description when location is worldwide", async () => {
const settingsRepo = await import("@server/repositories/settings");
const registryModule = await import("@server/extractors/registry");
const qaManifest = {
id: "qajobsboard",
displayName: "QAJobsBoard",
providesSources: ["qajobsboard"],
run: vi.fn().mockResolvedValue({
success: true,
jobs: [
{
source: "qajobsboard",
title: "Sr. QA Automation Engineer",
employer: "Harrier",
location: "Worldwide",
jobDescription:
"Job Location: Mumbai/Nagpur. Open to candidates in India.",
jobUrl: "https://example.com/job-in",
},
{
source: "qajobsboard",
title: "SDET",
employer: "Contoso",
location: "Toronto, ON, Canada",
jobUrl: "https://example.com/job-ca",
},
],
}),
};
vi.mocked(settingsRepo.getAllSettings).mockResolvedValue({
searchTerms: JSON.stringify(["sdet"]),
blockedCountries: JSON.stringify(["india"]),
searchCities: "Canada",
} as any);
vi.mocked(registryModule.getExtractorRegistry).mockResolvedValue({
manifests: new Map([["qajobsboard", qaManifest as any]]),
manifestBySource: new Map([["qajobsboard", qaManifest as any]]),
availableSources: ["qajobsboard"],
} as any);
const result = await discoverJobsStep({
mergedConfig: {
...baseConfig,
sources: ["qajobsboard"],
},
});
expect(result.discoveredJobs).toHaveLength(1);
expect(result.discoveredJobs[0]?.jobUrl).toBe("https://example.com/job-ca");
});
it("applies shared city filtering for sources without native city filtering", async () => {
const settingsRepo = await import("@server/repositories/settings");
const registryModule = await import("@server/extractors/registry");
const gradcrackerManifest = {
id: "gradcracker",
displayName: "Gradcracker",
providesSources: ["gradcracker"],
run: vi.fn().mockResolvedValue({
success: true,
jobs: [
{
source: "gradcracker",
title: "Engineer - Leeds",
employer: "ACME",
location: "Leeds, England, UK",
jobUrl: "https://example.com/grad-1",
},
{
source: "gradcracker",
title: "Engineer - London",
employer: "ACME",
location: "London, England, UK",
jobUrl: "https://example.com/grad-2",
},
],
}),
};
const ukvisaManifest = {
id: "ukvisajobs",
displayName: "UK Visa Jobs",
providesSources: ["ukvisajobs"],
run: vi.fn().mockResolvedValue({
success: true,
jobs: [
{
source: "ukvisajobs",
title: "Developer - Leeds",
employer: "Contoso",
location: "Leeds, England, UK",
jobUrl: "https://example.com/ukv-1",
},
],
}),
};
vi.mocked(settingsRepo.getAllSettings).mockResolvedValue({
searchTerms: JSON.stringify(["engineer"]),
searchCities: "Leeds",
jobspyCountryIndeed: "united kingdom",
} as any);
vi.mocked(registryModule.getExtractorRegistry).mockResolvedValue({
manifests: new Map([
["gradcracker", gradcrackerManifest as any],
["ukvisajobs", ukvisaManifest as any],
]),
manifestBySource: new Map([
["gradcracker", gradcrackerManifest as any],
["ukvisajobs", ukvisaManifest as any],
]),
availableSources: ["gradcracker", "ukvisajobs"],
} as any);
const result = await discoverJobsStep({
mergedConfig: {
...baseConfig,
sources: ["gradcracker", "ukvisajobs"],
},
});
expect(result.discoveredJobs).toHaveLength(2);
expect(
result.discoveredJobs.every((job) => job.location?.includes("Leeds")),
).toBe(true);
});
it("tracks source completion counters across source transitions", async () => {
const settingsRepo = await import("@server/repositories/settings");
const jobsRepo = await import("@server/repositories/jobs");
const registryModule = await import("@server/extractors/registry");
const jobspyManifest = {
id: "jobspy",
displayName: "JobSpy",
providesSources: ["indeed", "linkedin", "glassdoor"],
run: vi.fn().mockResolvedValue({ success: true, jobs: [] }),
};
const gradcrackerManifest = {
id: "gradcracker",
displayName: "Gradcracker",
providesSources: ["gradcracker"],
run: vi.fn().mockResolvedValue({ success: true, jobs: [] }),
};
const ukvisaManifest = {
id: "ukvisajobs",
displayName: "UK Visa Jobs",
providesSources: ["ukvisajobs"],
run: vi.fn().mockResolvedValue({ success: true, jobs: [] }),
};
vi.mocked(settingsRepo.getAllSettings).mockResolvedValue({
searchTerms: JSON.stringify(["engineer"]),
} as any);
vi.mocked(jobsRepo.getAllJobUrls).mockResolvedValue([
"https://example.com/existing",
]);
vi.mocked(registryModule.getExtractorRegistry).mockResolvedValue({
manifests: new Map([
["jobspy", jobspyManifest as any],
["gradcracker", gradcrackerManifest as any],
["ukvisajobs", ukvisaManifest as any],
]),
manifestBySource: new Map([
["indeed", jobspyManifest as any],
["linkedin", jobspyManifest as any],
["glassdoor", jobspyManifest as any],
["gradcracker", gradcrackerManifest as any],
["ukvisajobs", ukvisaManifest as any],
]),
availableSources: [
"indeed",
"linkedin",
"glassdoor",
"gradcracker",
"ukvisajobs",
],
} as any);
await discoverJobsStep({
mergedConfig: {
...baseConfig,
sources: ["linkedin", "gradcracker", "ukvisajobs"],
},
});
const progress = getProgress();
expect(progress.crawlingSourcesTotal).toBe(3);
expect(progress.crawlingSourcesCompleted).toBe(3);
expect(gradcrackerManifest.run).toHaveBeenCalledWith(
expect.objectContaining({
getExistingJobUrls: expect.any(Function),
}),
);
const [{ getExistingJobUrls }] = gradcrackerManifest.run.mock.calls[0] as [
{ getExistingJobUrls: () => Promise<string[]> },
];
await expect(getExistingJobUrls()).resolves.toEqual([
"https://example.com/existing",
]);
});
});