/** * Greenhouse public job boards API. * * https://developers.greenhouse.io/job-board.html * GET https://boards-api.greenhouse.io/v1/boards/{company}/jobs?content=true * * No auth. Each entry in `greenhouseCompanies` is fetched independently. */ import type { ExtractorManifest, ExtractorRunResult, } from "@shared/types/extractors"; import type { CreateJobInput } from "@shared/types/jobs"; interface GhDepartment { id?: number; name?: string; } interface GhMetadata { name?: string; value?: unknown; } interface GhJob { id?: number; title?: string; absolute_url?: string; internal_job_id?: number; updated_at?: string; requisition_id?: string | null; location?: { name?: string }; content?: string; // HTML, may be entity-encoded metadata?: GhMetadata[]; departments?: GhDepartment[]; offices?: Array<{ name?: string }>; } interface GhResponse { jobs?: GhJob[]; } function asString(value: unknown): string | undefined { if (typeof value !== "string") return undefined; const trimmed = value.trim(); return trimmed ? trimmed : undefined; } function readCompanies(raw: string | undefined): string[] { if (!raw) return []; try { const parsed = JSON.parse(raw); if (Array.isArray(parsed)) { return parsed .map((entry) => (typeof entry === "string" ? entry.trim() : "")) .filter(Boolean); } } catch { // fall through } return raw .split(/[\n,;|]+/) .map((entry) => entry.trim()) .filter(Boolean); } function decodeHtmlEntities(value: string): string { return value .replace(/&/g, "&") .replace(/</g, "<") .replace(/>/g, ">") .replace(/"/g, '"') .replace(/'/g, "'") .replace(/ /g, " "); } function mapJob(job: GhJob, company: string): CreateJobInput | null { const jobUrl = asString(job.absolute_url); if (!jobUrl) return null; const employer = company .split(/[-_]/) .filter(Boolean) .map((part) => part.charAt(0).toUpperCase() + part.slice(1)) .join(" "); const officeNames = job.offices ?.map((office) => asString(office.name)) .filter((name): name is string => Boolean(name)) ?? []; const departmentNames = job.departments ?.map((dept) => asString(dept.name)) .filter((name): name is string => Boolean(name)) ?? []; const description = job.content ? decodeHtmlEntities(job.content) : undefined; return { source: "greenhouse", sourceJobId: job.id != null ? String(job.id) : undefined, title: asString(job.title) ?? "Unknown Title", employer: employer || company, jobUrl, applicationLink: jobUrl, location: asString(job.location?.name) ?? (officeNames.join("; ") || undefined), jobFunction: departmentNames.length > 0 ? departmentNames.join(", ") : undefined, datePosted: asString(job.updated_at), jobDescription: description, }; } async function fetchCompany(company: string): Promise { const url = `https://boards-api.greenhouse.io/v1/boards/${encodeURIComponent(company)}/jobs?content=true`; const response = await fetch(url, { headers: { Accept: "application/json" }, }); if (response.status === 404) return []; if (!response.ok) { throw new Error( `Greenhouse request for "${company}" failed with status ${response.status}`, ); } const body = (await response.json()) as GhResponse; return Array.isArray(body.jobs) ? body.jobs : []; } export const manifest: ExtractorManifest = { id: "greenhouse", displayName: "Greenhouse (ATS)", providesSources: ["greenhouse"], async run(context): Promise { if (context.shouldCancel?.()) return { success: true, jobs: [] }; const companies = readCompanies(context.settings.greenhouseCompanies); if (companies.length === 0) { return { success: true, jobs: [], error: "No Greenhouse companies configured. Set GREENHOUSE_COMPANIES or the greenhouseCompanies setting (comma- or newline-separated slugs).", }; } const seen = new Set(); const out: CreateJobInput[] = []; try { for (let i = 0; i < companies.length; i += 1) { if (context.shouldCancel?.()) break; const company = companies[i]; context.onProgress?.({ phase: "list", termsProcessed: i, termsTotal: companies.length, currentUrl: company, detail: `Greenhouse: ${company} (${i + 1}/${companies.length})`, }); let added = 0; const jobs = await fetchCompany(company); for (const job of jobs) { const mapped = mapJob(job, company); if (!mapped) continue; const key = mapped.sourceJobId || mapped.jobUrl; if (seen.has(key)) continue; seen.add(key); out.push(mapped); added += 1; } context.onProgress?.({ phase: "list", termsProcessed: i + 1, termsTotal: companies.length, currentUrl: company, jobPagesProcessed: out.length, detail: `Greenhouse: ${company} → ${added} jobs (${out.length} total)`, }); } } catch (error) { const message = error instanceof Error ? error.message : "Unknown error"; return { success: false, jobs: out, error: message }; } return { success: true, jobs: out }; }, }; export default manifest;