Shaheer Sarfaraz 82e142a8a8
Auto-Registering Extractor System (#223)
* initial commit?

* Address PR feedback on extractor discovery and startup resilience

* Address latest PR review comments

* fix city resolution fallback when input parses empty

* address PR feedback on extractor registry and pipeline validation

* address copilot comments on manifests and registry startup

* fix extractor discovery export handling and env isolation in tests

* enforce duplicate manifest id failures in strict mode

* Fix remaining extractor registry and runtime review comments

* docs

* docs

* test all, logic remains in extractors

* Address PR review feedback on extractor registry and validation

* Revert extractor moduleResolution to bundler

* Enforce shared city filtering across all discovery sources

* Deduplicate extractor strict city post-filtering
2026-02-21 17:44:07 +00:00

57 lines
1.5 KiB
TypeScript

import type {
ExtractorManifest,
ExtractorRuntimeContext,
} from "@shared/types/extractors";
import { runCrawler } from "./src/run";
export const manifest: ExtractorManifest = {
id: "gradcracker",
displayName: "Gradcracker",
providesSources: ["gradcracker"],
async run(context: ExtractorRuntimeContext) {
if (context.shouldCancel?.()) {
return { success: true, jobs: [] };
}
const existingJobUrls = await context.getExistingJobUrls?.();
const maxJobsPerTerm = context.settings.gradcrackerMaxJobsPerTerm
? parseInt(context.settings.gradcrackerMaxJobsPerTerm, 10)
: 50;
const result = await runCrawler({
existingJobUrls,
searchTerms: context.searchTerms,
maxJobsPerTerm,
onProgress: (progress) => {
if (context.shouldCancel?.()) return;
context.onProgress?.({
phase: progress.phase,
currentUrl: progress.currentUrl,
listPagesProcessed: progress.listPagesProcessed,
listPagesTotal: progress.listPagesTotal,
jobCardsFound: progress.jobCardsFound,
jobPagesEnqueued: progress.jobPagesEnqueued,
jobPagesSkipped: progress.jobPagesSkipped,
jobPagesProcessed: progress.jobPagesProcessed,
});
},
});
if (!result.success) {
return {
success: false,
jobs: [],
error: result.error,
};
}
return {
success: true,
jobs: result.jobs,
};
},
};
export default manifest;