ilia c840f289e1
Some checks failed
CI / Linting (Biome) (push) Failing after 40s
CI / Tests (push) Successful in 5m54s
CI / Type Check (adzuna-extractor) (push) Successful in 1m8s
CI / Type Check (gradcracker-extractor) (push) Successful in 1m11s
CI / Type Check (hiringcafe-extractor) (push) Successful in 1m8s
CI / Type Check (orchestrator) (push) Successful in 1m23s
CI / Type Check (startupjobs-extractor) (push) Successful in 1m6s
CI / Type Check (ukvisajobs-extractor) (push) Successful in 1m7s
CI / Documentation (push) Successful in 1m54s
feat(extractors): expand catalog, smoke coverage, and sourcing docs
Adds Arc.dev, BC T-Net, Eluta, iCIMS tenants, QAJobsBoard, and SmartRecruiters
manifests with registry/settings/UI wiring; registers full extractor list in
smoke-extractors and documents supplementary board access paths. Aligns Careerjet
v4 with the url query parameter and fixes strict typing in QAJobsBoard.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-15 22:36:23 -04:00

218 lines
6.1 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* QAJobsBoard (JobBoardly) — public jobs listing JSON.
*
* https://qajobsboard.jobboardly.com/jobs.json
*/
import type {
ExtractorManifest,
ExtractorRunResult,
} from "@shared/types/extractors";
import type { CreateJobInput } from "@shared/types/jobs";
const JOBS_URL = "https://qajobsboard.jobboardly.com/jobs.json";
interface JobCategory {
name?: string;
}
interface SalaryBand {
schedule?: string;
minimum?: number | null;
maximum?: number | null;
}
interface DescriptionBlock {
html?: string;
}
interface QaJobBoardlyJob {
title?: string;
arrangement?: string;
location?: string;
location_limits?: string[];
published_at?: string;
application_link?: string;
description?: DescriptionBlock;
company?: { name?: string; logo?: string };
salary?: SalaryBand;
categories?: JobCategory[];
links?: { self?: string };
}
function asString(value: unknown): string | undefined {
if (typeof value !== "string") return undefined;
const trimmed = value.trim();
return trimmed ? trimmed : undefined;
}
function decodeHtmlEntities(value: string): string {
return value
.replace(/&amp;/g, "&")
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">")
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
.replace(/&nbsp;/g, " ");
}
function stripHtml(html: string): string {
const noTags = html.replace(/<[^>]+>/g, " ");
return decodeHtmlEntities(noTags).replace(/\s+/g, " ").trim();
}
function salaryLabel(raw: SalaryBand | undefined): string | undefined {
if (!raw) return undefined;
const schedule = raw.schedule ? `${raw.schedule}: ` : "";
if (
typeof raw.minimum === "number" &&
typeof raw.maximum === "number" &&
Number.isFinite(raw.minimum) &&
Number.isFinite(raw.maximum)
) {
return `${schedule}${raw.minimum}${raw.maximum}`;
}
if (typeof raw.minimum === "number" && Number.isFinite(raw.minimum)) {
return `${schedule}${raw.minimum}+`;
}
if (typeof raw.maximum === "number" && Number.isFinite(raw.maximum)) {
return `${schedule}${raw.maximum}`;
}
return schedule.trim() || undefined;
}
function locationLabel(job: QaJobBoardlyJob): string {
const limits = Array.isArray(job.location_limits)
? job.location_limits.filter(
(v): v is string => typeof v === "string" && v.trim().length > 0,
)
: [];
if (limits.length > 0) return limits.join(", ");
const loc = asString(job.location);
if (loc) return loc;
return "Unknown";
}
function matchesTerm(job: QaJobBoardlyJob, term: string): boolean {
const lower = term.toLowerCase();
if (job.title?.toLowerCase().includes(lower)) return true;
const cats = Array.isArray(job.categories)
? job.categories.map((c) => c.name?.toLowerCase() ?? "").join(" ")
: "";
if (cats.includes(lower)) return true;
const html = job.description?.html ?? "";
if (stripHtml(html).toLowerCase().includes(lower)) return true;
return false;
}
function mapJob(raw: QaJobBoardlyJob): CreateJobInput | null {
const jobUrl = asString(raw.links?.self);
if (!jobUrl) return null;
const employer =
asString(raw.company?.name)
?.replace(/^[\s-]+/, "")
.trim() || "Unknown Employer";
const applicationLink = asString(raw.application_link) ?? jobUrl;
const descHtml = raw.description?.html;
const jobDescription = descHtml ? stripHtml(descHtml) : undefined;
const salary = salaryLabel(raw.salary);
const cats = Array.isArray(raw.categories)
? raw.categories
.map((c) => c?.name?.trim())
.filter((v): v is string => Boolean(v))
.join(", ")
: undefined;
return {
source: "qajobsboard",
sourceJobId: jobUrl.split("/").pop(),
title: asString(raw.title) ?? "Unknown Title",
employer,
jobUrl,
applicationLink,
location: locationLabel(raw),
isRemote: asString(raw.location)?.toLowerCase() === "remote",
datePosted: asString(raw.published_at),
jobDescription,
jobType: asString(raw.arrangement),
salary,
disciplines: cats,
companyLogo: asString(raw.company?.logo),
};
}
export const manifest: ExtractorManifest = {
id: "qajobsboard",
displayName: "QAJobsBoard",
providesSources: ["qajobsboard"],
async run(context): Promise<ExtractorRunResult> {
if (context.shouldCancel?.()) return { success: true, jobs: [] };
const maxJobs = context.settings.qajobsboardMaxJobsPerTerm
? Number.parseInt(context.settings.qajobsboardMaxJobsPerTerm, 10)
: 100;
const cap = Number.isFinite(maxJobs)
? Math.min(Math.max(maxJobs, 1), 500)
: 100;
const terms = context.searchTerms.length > 0 ? context.searchTerms : [];
context.onProgress?.({
phase: "list",
termsProcessed: 0,
termsTotal: 1,
currentUrl: JOBS_URL,
detail: "QAJobsBoard: fetching jobs.json",
});
try {
const response = await fetch(JOBS_URL, {
headers: { Accept: "application/json", "User-Agent": "JobOps/1.0" },
});
if (!response.ok) {
throw new Error(
`QAJobsBoard request failed with status ${response.status}`,
);
}
const body = (await response.json()) as unknown;
const rows = Array.isArray(body) ? body : [];
const seen = new Set<string>();
const out: CreateJobInput[] = [];
for (const row of rows as QaJobBoardlyJob[]) {
if (out.length >= cap) break;
if (terms.length > 0 && !terms.some((t) => matchesTerm(row, t)))
continue;
const mapped = mapJob(row);
if (!mapped) continue;
const key = mapped.sourceJobId || mapped.jobUrl;
if (seen.has(key)) continue;
seen.add(key);
out.push(mapped);
}
context.onProgress?.({
phase: "list",
termsProcessed: 1,
termsTotal: 1,
currentUrl: JOBS_URL,
jobPagesProcessed: out.length,
detail: `QAJobsBoard: ${out.length} matched (${rows.length} total listings)`,
});
return { success: true, jobs: out };
} catch (error) {
const message = error instanceof Error ? error.message : "Unknown error";
return { success: false, jobs: [], error: message };
}
},
};
export default manifest;