Jobber/extractors/adzuna/manifest.ts
Shaheer Sarfaraz 82e142a8a8
Auto-Registering Extractor System (#223)
* initial commit?

* Address PR feedback on extractor discovery and startup resilience

* Address latest PR review comments

* fix city resolution fallback when input parses empty

* address PR feedback on extractor registry and pipeline validation

* address copilot comments on manifests and registry startup

* fix extractor discovery export handling and env isolation in tests

* enforce duplicate manifest id failures in strict mode

* Fix remaining extractor registry and runtime review comments

* docs

* docs

* test all, logic remains in extractors

* Address PR review feedback on extractor registry and validation

* Revert extractor moduleResolution to bundler

* Enforce shared city filtering across all discovery sources

* Deduplicate extractor strict city post-filtering
2026-02-21 17:44:07 +00:00

121 lines
3.2 KiB
TypeScript

import { getAdzunaCountryCode } from "@shared/location-support.js";
import { resolveSearchCities } from "@shared/search-cities.js";
import type {
ExtractorManifest,
ExtractorProgressEvent,
} from "@shared/types/extractors";
import { runAdzuna } from "./src/run";
function toProgress(event: {
type: string;
termIndex: number;
termTotal: number;
searchTerm: string;
pageNo?: number;
totalCollected?: number;
}): ExtractorProgressEvent {
if (event.type === "term_start") {
return {
phase: "list",
termsProcessed: Math.max(event.termIndex - 1, 0),
termsTotal: event.termTotal,
currentUrl: event.searchTerm,
detail: `Adzuna: term ${event.termIndex}/${event.termTotal} (${event.searchTerm})`,
};
}
if (event.type === "page_fetched") {
const pageNo = event.pageNo ?? 0;
const totalCollected = event.totalCollected ?? 0;
return {
phase: "list",
termsProcessed: Math.max(event.termIndex - 1, 0),
termsTotal: event.termTotal,
listPagesProcessed: pageNo,
jobPagesEnqueued: totalCollected,
jobPagesProcessed: totalCollected,
currentUrl: `page ${pageNo}`,
detail: `Adzuna: term ${event.termIndex}/${event.termTotal}, page ${pageNo} (${totalCollected} collected)`,
};
}
return {
phase: "list",
termsProcessed: event.termIndex,
termsTotal: event.termTotal,
currentUrl: event.searchTerm,
detail: `Adzuna: completed term ${event.termIndex}/${event.termTotal} (${event.searchTerm})`,
};
}
export const manifest: ExtractorManifest = {
id: "adzuna",
displayName: "Adzuna",
providesSources: ["adzuna"],
requiredEnvVars: ["ADZUNA_APP_ID", "ADZUNA_APP_KEY"],
async run(context) {
if (context.shouldCancel?.()) {
return { success: true, jobs: [] };
}
const countryCode = getAdzunaCountryCode(context.selectedCountry);
if (!countryCode) {
return {
success: false,
jobs: [],
error: `unsupported country ${context.selectedCountry}`,
};
}
const maxJobsPerTerm = context.settings.adzunaMaxJobsPerTerm
? parseInt(context.settings.adzunaMaxJobsPerTerm, 10)
: 50;
let result: Awaited<ReturnType<typeof runAdzuna>>;
try {
result = await runAdzuna({
country: countryCode,
countryKey: context.selectedCountry,
searchTerms: context.searchTerms,
locations: resolveSearchCities({
single:
context.settings.searchCities ?? context.settings.jobspyLocation,
}),
maxJobsPerTerm,
onProgress: (event) => {
if (context.shouldCancel?.()) return;
context.onProgress?.(toProgress(event));
},
});
} catch (error) {
const message =
error instanceof Error
? error.message
: typeof error === "string"
? error
: "Unexpected error while running Adzuna extractor.";
return {
success: false,
jobs: [],
error: message,
};
}
if (!result.success) {
return {
success: false,
jobs: [],
error: result.error,
};
}
return {
success: true,
jobs: result.jobs,
};
},
};
export default manifest;