/** * Smoke-test helper for extractor manifests: imports each manifest, runs it with a * minimal context, and prints mapped job counts + a sample row. * * Run from repo root (`run-smoke-extractors.mjs` applies repo tsconfig for `@shared/*`): * npm run smoke:extractors * npm run smoke:extractors -- arcdev,icims * npm run smoke:extractors -- indeed # alias → `jobspy` (same manifest) * * Keep `ALL_TARGETS` aligned with every shipped manifest under each * `extractors//` package (`manifest.ts` or `src/manifest.ts`). * * Loads repo-root `.env` so keyed extractors match orchestrator behavior (plain * `tsx` does not read `.env` automatically). * * Resolves `@shared/*` imports in extractor manifests via `scripts/smoke-resolve-shared.mjs`. */ import { existsSync } from "node:fs"; import { register } from "node:module"; import { homedir } from "node:os"; import path from "node:path"; import { fileURLToPath, pathToFileURL } from "node:url"; import { config as loadEnv } from "dotenv"; register( pathToFileURL( path.join( path.dirname(fileURLToPath(import.meta.url)), "smoke-resolve-shared.mjs", ), ).href, import.meta.url, ); import type { ExtractorManifest, ExtractorRuntimeContext, } from "job-ops-shared/types/extractors"; const repoRoot = path.resolve( path.dirname(fileURLToPath(import.meta.url)), "..", ); loadEnv({ path: path.join(repoRoot, ".env") }); /** Left column width for log alignment (longest pipeline source id today). */ const ID_COL = 15; /** JobSpy serves Indeed / LinkedIn / Glassdoor; CLI filter accepts those ids as aliases. */ const JOBSPY_SITE_IDS = ["indeed", "linkedin", "glassdoor"] as const; function expandSmokeFilter(ids: Set): Set { const next = new Set(ids); for (const site of JOBSPY_SITE_IDS) { if (next.has(site)) { next.add("jobspy"); break; } } return next; } interface Target { id: string; importPath: string; needs?: string[]; // env vars required to run; skipped if missing /** When set, skip with this message (e.g. local Camoufox not installed). */ skipReason?: string; settings?: Record; /** When set, replaces the default smoke search terms (use [] for sources that filter client-side). */ searchTerms?: string[]; /** Geography passed as `selectedCountry` (must match what each extractor expects). */ selectedCountry?: string; } function camoufoxInstalled(): boolean { const home = homedir(); const candidates = [ path.join(home, "Library", "Caches", "camoufox", "version.json"), path.join(home, ".cache", "camoufox", "version.json"), ]; return candidates.some((filePath) => existsSync(filePath)); } const ALL_TARGETS: Target[] = [ { id: "adzuna", importPath: "../extractors/adzuna/manifest", needs: ["ADZUNA_APP_ID", "ADZUNA_APP_KEY"], selectedCountry: "United States", settings: { adzunaMaxJobsPerTerm: "10", searchCities: "United States", }, }, { id: "arbeitnow", importPath: "../extractors/arbeitnow/manifest", settings: { arbeitnowMaxJobsPerTerm: "10" }, }, { id: "arcdev", importPath: "../extractors/arcdev/manifest", settings: { arcRemoteJobsPaths: JSON.stringify(["/remote-jobs/playwright"]), arcMaxJobsPerPath: "20", }, }, { id: "ashby", importPath: "../extractors/ashby/manifest", settings: { ashbyCompanies: JSON.stringify(["ramp", "linear"]), }, }, { id: "bctenet", importPath: "../extractors/bctenet/manifest", selectedCountry: "Canada", settings: { bctenetMaxJobsPerTerm: "25", }, }, { id: "careerjet", importPath: "../extractors/careerjet/manifest", needs: ["CAREERJET_AFFID", "CAREERJET_REFERER", "CAREERJET_USER_IP"], settings: { careerjetMaxJobsPerTerm: "10", searchCities: "United States" }, }, { id: "eluta", importPath: "../extractors/eluta/manifest", selectedCountry: "Canada", settings: { elutaRssLocations: JSON.stringify(["Toronto, ON"]), elutaMaxJobsPerTerm: "15", }, }, { id: "fourdayweek", importPath: "../extractors/fourdayweek/manifest", settings: { fourdayweekMaxJobsPerTerm: "10" }, }, { id: "gradcracker", importPath: "../extractors/gradcracker/manifest", selectedCountry: "United Kingdom", settings: { gradcrackerMaxJobsPerTerm: "10" }, skipReason: camoufoxInstalled() ? undefined : "Camoufox not installed (run: npx camoufox-js fetch)", }, { id: "greenhouse", importPath: "../extractors/greenhouse/manifest", settings: { greenhouseCompanies: JSON.stringify(["stripe", "airbnb"]), }, }, { id: "himalayas", importPath: "../extractors/himalayas/manifest", settings: { himalayasMaxJobsPerTerm: "10" }, }, { id: "hiringcafe", importPath: "../extractors/hiringcafe/manifest", selectedCountry: "United Kingdom", settings: { searchCities: "UK", jobspyResultsWanted: "10", workplaceTypes: JSON.stringify(["remote", "hybrid", "onsite"]), }, skipReason: camoufoxInstalled() ? undefined : "Camoufox not installed (run: npx camoufox-js fetch)", }, { id: "icims", importPath: "../extractors/icims/manifest", searchTerms: [], settings: { icimsTenants: JSON.stringify(["careers-appliedsystems.icims.com"]), icimsMaxJobsPerTenant: "15", icimsMaxPagesPerSearch: "2", }, }, { id: "jobicy", importPath: "../extractors/jobicy/manifest", settings: { jobicyMaxJobsPerTerm: "10" }, }, { id: "jobspy", importPath: "../extractors/jobspy/manifest", selectedCountry: "United Kingdom", settings: { searchCities: "UK", jobspyCountryIndeed: "UK", jobspyResultsWanted: "5", workplaceTypes: JSON.stringify(["remote", "hybrid", "onsite"]), }, }, { id: "jooble", importPath: "../extractors/jooble/manifest", needs: ["JOOBLE_API_KEY"], settings: { joobleMaxJobsPerTerm: "10", searchCities: "United States" }, }, { id: "lever", importPath: "../extractors/lever/manifest", settings: { leverCompanies: JSON.stringify(["palantir", "netflix"]), }, }, { id: "qajobsboard", importPath: "../extractors/qajobsboard/manifest", settings: { qajobsboardMaxJobsPerTerm: "25" }, }, { id: "reed", importPath: "../extractors/reed/manifest", needs: ["REED_API_KEY"], selectedCountry: "United Kingdom", settings: { reedMaxJobsPerTerm: "10" }, }, { id: "remoteok", importPath: "../extractors/remoteok/manifest", settings: { remoteokMaxJobsPerTerm: "10" }, }, { id: "remotive", importPath: "../extractors/remotive/manifest", settings: { remotiveMaxJobsPerTerm: "10" }, }, { id: "smartrecruiters", importPath: "../extractors/smartrecruiters/manifest", settings: { smartrecruitersCompanies: JSON.stringify(["smartrecruiters"]), smartrecruitersMaxJobsPerCompany: "5", }, }, { id: "startupjobs", importPath: "../extractors/startupjobs/src/manifest", selectedCountry: "United Kingdom", settings: { searchCities: "UK", startupjobsMaxJobsPerTerm: "10", workplaceTypes: JSON.stringify(["remote", "hybrid", "onsite"]), }, }, { id: "themuse", importPath: "../extractors/themuse/manifest", settings: { themuseMaxJobsPerTerm: "10" }, }, { id: "ukvisajobs", importPath: "../extractors/ukvisajobs/manifest", needs: ["UKVISAJOBS_EMAIL", "UKVISAJOBS_PASSWORD"], selectedCountry: "United Kingdom", settings: { ukvisajobsMaxJobs: "10" }, }, { id: "usajobs", importPath: "../extractors/usajobs/manifest", needs: ["USAJOBS_API_KEY", "USAJOBS_USER_AGENT"], settings: { usajobsMaxJobsPerTerm: "10" }, }, { id: "weworkremotely", importPath: "../extractors/weworkremotely/manifest", settings: { weworkremotelyMaxJobsPerTerm: "10" }, }, { id: "workday", importPath: "../extractors/workday/manifest", settings: { workdayTenants: JSON.stringify([ "https://nvidia.wd5.myworkdayjobs.com/en-US/NVIDIAExternalCareerSite", ]), }, }, ]; function buildContext(args: { source: string; settings: Record; searchTerms?: string[]; selectedCountry?: string; }): ExtractorRuntimeContext { return { source: args.source, selectedSources: [args.source], settings: args.settings, searchTerms: args.searchTerms !== undefined ? args.searchTerms : ["software engineer"], selectedCountry: args.selectedCountry ?? "United States", getExistingJobUrls: async () => [], shouldCancel: () => false, onProgress: () => {}, }; } function pad(s: string, n: number): string { return s.length >= n ? s : s + " ".repeat(n - s.length); } async function runOne(target: Target): Promise { if (target.skipReason) { console.log(`${pad(target.id, ID_COL)} SKIP ${target.skipReason}`); return; } const missing = (target.needs ?? []).filter((k) => !process.env[k]); if (missing.length > 0) { console.log( `${pad(target.id, ID_COL)} SKIP missing env: ${missing.join(", ")}`, ); return; } let mod: { manifest?: ExtractorManifest; default?: ExtractorManifest }; try { mod = await import(target.importPath); } catch (err) { console.log( `${pad(target.id, ID_COL)} FAIL import error: ${(err as Error).message}`, ); return; } const manifest = mod.manifest ?? mod.default; if (!manifest) { console.log(`${pad(target.id, ID_COL)} FAIL manifest export missing`); return; } const started = Date.now(); try { const ctx = buildContext({ source: target.id, settings: target.settings ?? {}, searchTerms: target.searchTerms, selectedCountry: target.selectedCountry, }); const result = await manifest.run(ctx); const ms = Date.now() - started; const status = result.success ? "OK " : "ERR "; const sample = result.jobs[0]; const sampleStr = sample ? ` | first: "${sample.title}" @ ${sample.employer}` : ""; console.log( `${pad(target.id, ID_COL)} ${status} jobs=${result.jobs.length} ${ms}ms${result.error ? ` | error: ${result.error}` : ""}${sampleStr}`, ); } catch (err) { const ms = Date.now() - started; console.log( `${pad(target.id, ID_COL)} CRASH ${ms}ms ${(err as Error).message}`, ); } } async function main() { const requested = (process.argv[2] ?? "").trim(); const filter = requested ? expandSmokeFilter( new Set( requested .split(",") .map((s) => s.trim()) .filter(Boolean), ), ) : null; const targets = filter ? ALL_TARGETS.filter((t) => filter.has(t.id)) : ALL_TARGETS; console.log(`Smoke testing ${targets.length} extractor(s)...\n`); for (const t of targets) { await runOne(t); } } main().catch((err) => { console.error(err); process.exit(1); });