/** * Tiny smoke-test for new extractors: imports each manifest, runs it with a * minimal context, and prints the count of mapped jobs + a few samples. * * Run from repo root: npx tsx scripts/smoke-extractors.ts [comma,separated,ids] * * Loads repo-root `.env` so keyed extractors match orchestrator behavior (plain * `tsx` does not read `.env` automatically). */ import path from "node:path"; import { fileURLToPath } from "node:url"; import { config as loadEnv } from "dotenv"; import type { ExtractorManifest, ExtractorRuntimeContext, } from "../shared/src/types/extractors"; const repoRoot = path.resolve( path.dirname(fileURLToPath(import.meta.url)), "..", ); loadEnv({ path: path.join(repoRoot, ".env") }); interface Target { id: string; importPath: string; needs?: string[]; // env vars required to run; skipped if missing settings?: Record; } const ALL_TARGETS: Target[] = [ { id: "jobicy", importPath: "../extractors/jobicy/manifest", settings: { jobicyMaxJobsPerTerm: "10" }, }, { id: "themuse", importPath: "../extractors/themuse/manifest", settings: { themuseMaxJobsPerTerm: "10" }, }, { id: "usajobs", importPath: "../extractors/usajobs/manifest", needs: ["USAJOBS_API_KEY", "USAJOBS_USER_AGENT"], settings: { usajobsMaxJobsPerTerm: "10" }, }, { id: "jooble", importPath: "../extractors/jooble/manifest", needs: ["JOOBLE_API_KEY"], settings: { joobleMaxJobsPerTerm: "10" }, }, { id: "careerjet", importPath: "../extractors/careerjet/manifest", needs: ["CAREERJET_AFFID", "CAREERJET_REFERER", "CAREERJET_USER_IP"], settings: { careerjetMaxJobsPerTerm: "10" }, }, { id: "reed", importPath: "../extractors/reed/manifest", needs: ["REED_API_KEY"], settings: { reedMaxJobsPerTerm: "10" }, }, { id: "lever", importPath: "../extractors/lever/manifest", settings: { // Known active public Lever board used purely as a connectivity check. leverCompanies: JSON.stringify(["palantir", "netflix"]), }, }, { id: "ashby", importPath: "../extractors/ashby/manifest", settings: { ashbyCompanies: JSON.stringify(["ramp", "linear"]), }, }, { id: "greenhouse", importPath: "../extractors/greenhouse/manifest", settings: { greenhouseCompanies: JSON.stringify(["stripe", "airbnb"]), }, }, { id: "workday", importPath: "../extractors/workday/manifest", settings: { workdayTenants: JSON.stringify([ "https://nvidia.wd5.myworkdayjobs.com/en-US/NVIDIAExternalCareerSite", ]), }, }, { id: "remoteok", importPath: "../extractors/remoteok/manifest", settings: { remoteokMaxJobsPerTerm: "10" }, }, { id: "remotive", importPath: "../extractors/remotive/manifest", settings: { remotiveMaxJobsPerTerm: "10" }, }, { id: "arbeitnow", importPath: "../extractors/arbeitnow/manifest", settings: { arbeitnowMaxJobsPerTerm: "10" }, }, { id: "himalayas", importPath: "../extractors/himalayas/manifest", settings: { himalayasMaxJobsPerTerm: "10" }, }, { id: "weworkremotely", importPath: "../extractors/weworkremotely/manifest", settings: { weworkremotelyMaxJobsPerTerm: "10" }, }, { id: "fourdayweek", importPath: "../extractors/fourdayweek/manifest", settings: { fourdayweekMaxJobsPerTerm: "10" }, }, ]; function buildContext( source: string, settings: Record, ): ExtractorRuntimeContext { return { source, selectedSources: [source], settings, searchTerms: ["software engineer"], selectedCountry: "United States", getExistingJobUrls: async () => [], shouldCancel: () => false, onProgress: () => {}, }; } function pad(s: string, n: number): string { return s.length >= n ? s : s + " ".repeat(n - s.length); } async function runOne(target: Target): Promise { const missing = (target.needs ?? []).filter((k) => !process.env[k]); if (missing.length > 0) { console.log( `${pad(target.id, 12)} SKIP missing env: ${missing.join(", ")}`, ); return; } let mod: { manifest?: ExtractorManifest; default?: ExtractorManifest }; try { mod = await import(target.importPath); } catch (err) { console.log( `${pad(target.id, 12)} FAIL import error: ${(err as Error).message}`, ); return; } const manifest = mod.manifest ?? mod.default; if (!manifest) { console.log(`${pad(target.id, 12)} FAIL manifest export missing`); return; } const started = Date.now(); try { const ctx = buildContext(target.id, target.settings ?? {}); const result = await manifest.run(ctx); const ms = Date.now() - started; const status = result.success ? "OK " : "ERR "; const sample = result.jobs[0]; const sampleStr = sample ? ` | first: "${sample.title}" @ ${sample.employer}` : ""; console.log( `${pad(target.id, 12)} ${status} jobs=${result.jobs.length} ${ms}ms${result.error ? ` | error: ${result.error}` : ""}${sampleStr}`, ); } catch (err) { const ms = Date.now() - started; console.log( `${pad(target.id, 12)} CRASH ${ms}ms ${(err as Error).message}`, ); } } async function main() { const requested = (process.argv[2] ?? "").trim(); const filter = requested ? new Set( requested .split(",") .map((s) => s.trim()) .filter(Boolean), ) : null; const targets = filter ? ALL_TARGETS.filter((t) => filter.has(t.id)) : ALL_TARGETS; console.log(`Smoke testing ${targets.length} extractor(s)...\n`); for (const t of targets) { await runOne(t); } } main().catch((err) => { console.error(err); process.exit(1); });