/** * Arc.dev remote jobs — parse embedded Next.js __NEXT_DATA__ from SSR HTML. * * Listing URLs look like https://arc.dev/remote-jobs/playwright */ import type { ExtractorManifest, ExtractorRunResult, } from "@shared/types/extractors"; import type { CreateJobInput } from "@shared/types/jobs"; const ORIGIN = "https://arc.dev"; interface ArcCategory { name?: string; urlString?: string; } interface ArcCompanyJson { randomKey?: string | null; urlString?: string; name?: string; } interface ArcJobJson { randomKey?: string; title?: string; jobType?: string; jobRole?: string; urlString?: string; postedAt?: number; company?: ArcCompanyJson; categories?: ArcCategory[]; requiredCountries?: string[]; minAnnualSalary?: number | null; maxAnnualSalary?: number | null; minHourlyRate?: number | null; maxHourlyRate?: number | null; timeZone?: string | null; positionType?: string; experienceLevel?: string; experienceLevels?: string[]; } function readPaths(raw: string | undefined): string[] { if (!raw) return []; try { const parsed = JSON.parse(raw); if (Array.isArray(parsed)) { return parsed .map((entry) => (typeof entry === "string" ? entry.trim() : "")) .filter(Boolean); } } catch { // fall through } return raw .split(/[\n,;|]+/) .map((entry) => entry.trim()) .filter(Boolean); } function defaultArcPaths(): string[] { const raw = typeof process !== "undefined" ? process.env.ARC_REMOTE_JOBS_PATHS : ""; const parsed = readPaths(raw); return parsed.length > 0 ? parsed : ["/remote-jobs/playwright", "/remote-jobs/cypress"]; } function asString(value: unknown): string | undefined { if (typeof value !== "string") return undefined; const t = value.trim(); return t ? t : undefined; } function categoryHaystack(job: ArcJobJson): string { if (!Array.isArray(job.categories)) return ""; return job.categories .map((c) => `${c.name ?? ""} ${c.urlString ?? ""}`) .join(" ") .toLowerCase(); } function matchesTerm(job: ArcJobJson, term: string): boolean { const lower = term.toLowerCase(); if (job.title?.toLowerCase().includes(lower)) return true; if (categoryHaystack(job).includes(lower)) return true; if (job.jobRole?.toLowerCase().includes(lower)) return true; if (job.positionType?.toLowerCase().includes(lower)) return true; if ( Array.isArray(job.experienceLevels) && job.experienceLevels.some((l) => l.toLowerCase().includes(lower)) ) return true; if (job.experienceLevel?.toLowerCase().includes(lower)) return true; return false; } function salaryParts(job: ArcJobJson): string | undefined { const bits: string[] = []; if ( typeof job.minAnnualSalary === "number" && typeof job.maxAnnualSalary === "number" ) { bits.push(`USD ${job.minAnnualSalary}–${job.maxAnnualSalary} / yr`); } else if (typeof job.minAnnualSalary === "number") { bits.push(`USD ${job.minAnnualSalary}+ / yr`); } if ( typeof job.minHourlyRate === "number" || typeof job.maxHourlyRate === "number" ) { bits.push(`$${job.minHourlyRate ?? "?"}–${job.maxHourlyRate ?? "?"} / hr`); } return bits.length > 0 ? bits.join("; ") : undefined; } function locationLine(job: ArcJobJson): string { if ( Array.isArray(job.requiredCountries) && job.requiredCountries.length > 0 ) { return job.requiredCountries.join(", "); } if (job.timeZone) return job.timeZone; return "Remote"; } function postedIso(postedAt: number | undefined): string | undefined { if (typeof postedAt !== "number" || !Number.isFinite(postedAt)) return undefined; return new Date(postedAt * 1000).toISOString(); } function parseNextPageProps(html: string): { arcJobs: ArcJobJson[]; externalJobs: ArcJobJson[]; } | null { const match = html.match( /