* initial commit? * Address PR feedback on extractor discovery and startup resilience * Address latest PR review comments * fix city resolution fallback when input parses empty * address PR feedback on extractor registry and pipeline validation * address copilot comments on manifests and registry startup * fix extractor discovery export handling and env isolation in tests * enforce duplicate manifest id failures in strict mode * Fix remaining extractor registry and runtime review comments * docs * docs * test all, logic remains in extractors * Address PR review feedback on extractor registry and validation * Revert extractor moduleResolution to bundler * Enforce shared city filtering across all discovery sources * Deduplicate extractor strict city post-filtering
57 lines
1.5 KiB
TypeScript
57 lines
1.5 KiB
TypeScript
import type {
|
|
ExtractorManifest,
|
|
ExtractorRuntimeContext,
|
|
} from "@shared/types/extractors";
|
|
import { runCrawler } from "./src/run";
|
|
|
|
export const manifest: ExtractorManifest = {
|
|
id: "gradcracker",
|
|
displayName: "Gradcracker",
|
|
providesSources: ["gradcracker"],
|
|
async run(context: ExtractorRuntimeContext) {
|
|
if (context.shouldCancel?.()) {
|
|
return { success: true, jobs: [] };
|
|
}
|
|
|
|
const existingJobUrls = await context.getExistingJobUrls?.();
|
|
const maxJobsPerTerm = context.settings.gradcrackerMaxJobsPerTerm
|
|
? parseInt(context.settings.gradcrackerMaxJobsPerTerm, 10)
|
|
: 50;
|
|
|
|
const result = await runCrawler({
|
|
existingJobUrls,
|
|
searchTerms: context.searchTerms,
|
|
maxJobsPerTerm,
|
|
onProgress: (progress) => {
|
|
if (context.shouldCancel?.()) return;
|
|
|
|
context.onProgress?.({
|
|
phase: progress.phase,
|
|
currentUrl: progress.currentUrl,
|
|
listPagesProcessed: progress.listPagesProcessed,
|
|
listPagesTotal: progress.listPagesTotal,
|
|
jobCardsFound: progress.jobCardsFound,
|
|
jobPagesEnqueued: progress.jobPagesEnqueued,
|
|
jobPagesSkipped: progress.jobPagesSkipped,
|
|
jobPagesProcessed: progress.jobPagesProcessed,
|
|
});
|
|
},
|
|
});
|
|
|
|
if (!result.success) {
|
|
return {
|
|
success: false,
|
|
jobs: [],
|
|
error: result.error,
|
|
};
|
|
}
|
|
|
|
return {
|
|
success: true,
|
|
jobs: result.jobs,
|
|
};
|
|
},
|
|
};
|
|
|
|
export default manifest;
|