Some checks failed
CI / Linting (Biome) (push) Failing after 41s
CI / Tests (push) Successful in 6m10s
CI / Type Check (adzuna-extractor) (push) Successful in 1m9s
CI / Type Check (gradcracker-extractor) (push) Successful in 1m13s
CI / Type Check (hiringcafe-extractor) (push) Successful in 1m9s
CI / Type Check (orchestrator) (push) Failing after 1m16s
CI / Type Check (startupjobs-extractor) (push) Successful in 1m9s
CI / Type Check (ukvisajobs-extractor) (push) Successful in 1m10s
CI / Documentation (push) Successful in 1m56s
Probe application links for closed listings and feed expires_at; enrich vague Remote/Worldwide rows with real country before blocked-countries filtering. Co-authored-by: Cursor <cursoragent@cursor.com>
291 lines
8.1 KiB
TypeScript
291 lines
8.1 KiB
TypeScript
/**
|
||
* QAJobsBoard (JobBoardly) — public jobs listing JSON.
|
||
*
|
||
* https://qajobsboard.jobboardly.com/jobs.json
|
||
*/
|
||
|
||
import type {
|
||
ExtractorManifest,
|
||
ExtractorRunResult,
|
||
} from "@shared/types/extractors";
|
||
import type { CreateJobInput } from "@shared/types/jobs";
|
||
import {
|
||
isLinkedInJobUrl,
|
||
isPostingExpiredByDate,
|
||
probeApplicationLink,
|
||
} from "./src/application-link.js";
|
||
import {
|
||
extractJobLocationFromText,
|
||
fetchQaJobDetailEnrichment,
|
||
stripHtml,
|
||
} from "./src/detail-page.js";
|
||
|
||
const JOBS_URL = "https://qajobsboard.jobboardly.com/jobs.json";
|
||
|
||
interface JobCategory {
|
||
name?: string;
|
||
}
|
||
|
||
interface SalaryBand {
|
||
schedule?: string;
|
||
minimum?: number | null;
|
||
maximum?: number | null;
|
||
}
|
||
|
||
interface DescriptionBlock {
|
||
html?: string;
|
||
}
|
||
|
||
interface QaJobBoardlyJob {
|
||
title?: string;
|
||
arrangement?: string;
|
||
location?: string;
|
||
location_limits?: string[];
|
||
published_at?: string;
|
||
expires_at?: string;
|
||
application_link?: string;
|
||
description?: DescriptionBlock;
|
||
company?: { name?: string; logo?: string };
|
||
salary?: SalaryBand;
|
||
categories?: JobCategory[];
|
||
links?: { self?: string };
|
||
}
|
||
|
||
function asString(value: unknown): string | undefined {
|
||
if (typeof value !== "string") return undefined;
|
||
const trimmed = value.trim();
|
||
return trimmed ? trimmed : undefined;
|
||
}
|
||
|
||
function salaryLabel(raw: SalaryBand | undefined): string | undefined {
|
||
if (!raw) return undefined;
|
||
const schedule = raw.schedule ? `${raw.schedule}: ` : "";
|
||
if (
|
||
typeof raw.minimum === "number" &&
|
||
typeof raw.maximum === "number" &&
|
||
Number.isFinite(raw.minimum) &&
|
||
Number.isFinite(raw.maximum)
|
||
) {
|
||
return `${schedule}${raw.minimum}–${raw.maximum}`;
|
||
}
|
||
if (typeof raw.minimum === "number" && Number.isFinite(raw.minimum)) {
|
||
return `${schedule}${raw.minimum}+`;
|
||
}
|
||
if (typeof raw.maximum === "number" && Number.isFinite(raw.maximum)) {
|
||
return `${schedule}≤${raw.maximum}`;
|
||
}
|
||
return schedule.trim() || undefined;
|
||
}
|
||
|
||
const VAGUE_LOCATION_LABELS = new Set([
|
||
"worldwide",
|
||
"global",
|
||
"anywhere",
|
||
"remote",
|
||
"unknown",
|
||
]);
|
||
|
||
function isVagueLocationLabel(value: string): boolean {
|
||
return VAGUE_LOCATION_LABELS.has(value.trim().toLowerCase());
|
||
}
|
||
|
||
function locationLabel(job: QaJobBoardlyJob): string {
|
||
const limits = Array.isArray(job.location_limits)
|
||
? job.location_limits
|
||
.map((v) => (typeof v === "string" ? v.trim() : ""))
|
||
.filter((v) => v.length > 0 && !isVagueLocationLabel(v))
|
||
: [];
|
||
if (limits.length > 0) return limits.join(", ");
|
||
|
||
const loc = asString(job.location);
|
||
if (loc && !isVagueLocationLabel(loc)) return loc;
|
||
|
||
const fromDescription = job.description?.html
|
||
? extractJobLocationFromText(stripHtml(job.description.html))
|
||
: undefined;
|
||
if (fromDescription) return fromDescription;
|
||
|
||
if (loc) return loc;
|
||
return "Unknown";
|
||
}
|
||
|
||
function matchesTerm(job: QaJobBoardlyJob, term: string): boolean {
|
||
const lower = term.toLowerCase();
|
||
if (job.title?.toLowerCase().includes(lower)) return true;
|
||
const cats = Array.isArray(job.categories)
|
||
? job.categories.map((c) => c.name?.toLowerCase() ?? "").join(" ")
|
||
: "";
|
||
if (cats.includes(lower)) return true;
|
||
const html = job.description?.html ?? "";
|
||
if (stripHtml(html).toLowerCase().includes(lower)) return true;
|
||
return false;
|
||
}
|
||
|
||
function mapJob(raw: QaJobBoardlyJob): CreateJobInput | null {
|
||
const jobUrl = asString(raw.links?.self);
|
||
if (!jobUrl) return null;
|
||
|
||
const employer =
|
||
asString(raw.company?.name)
|
||
?.replace(/^[\s–-]+/, "")
|
||
.trim() || "Unknown Employer";
|
||
|
||
const applicationLink = asString(raw.application_link) ?? jobUrl;
|
||
|
||
const descHtml = raw.description?.html;
|
||
const jobDescription = descHtml ? stripHtml(descHtml) : undefined;
|
||
|
||
const salary = salaryLabel(raw.salary);
|
||
|
||
const cats = Array.isArray(raw.categories)
|
||
? raw.categories
|
||
.map((c) => c?.name?.trim())
|
||
.filter((v): v is string => Boolean(v))
|
||
.join(", ")
|
||
: undefined;
|
||
|
||
return {
|
||
source: "qajobsboard",
|
||
sourceJobId: jobUrl.split("/").pop(),
|
||
title: asString(raw.title) ?? "Unknown Title",
|
||
employer,
|
||
jobUrl,
|
||
applicationLink,
|
||
location: locationLabel(raw),
|
||
isRemote: asString(raw.location)?.toLowerCase() === "remote",
|
||
datePosted: asString(raw.published_at),
|
||
jobDescription,
|
||
jobType: asString(raw.arrangement),
|
||
salary,
|
||
disciplines: cats,
|
||
companyLogo: asString(raw.company?.logo),
|
||
};
|
||
}
|
||
|
||
function needsDetailEnrichment(location: string | undefined): boolean {
|
||
if (!location?.trim()) return true;
|
||
return isVagueLocationLabel(location);
|
||
}
|
||
|
||
async function enrichJobsFromDetailPages(
|
||
jobs: CreateJobInput[],
|
||
shouldCancel?: () => boolean,
|
||
): Promise<CreateJobInput[]> {
|
||
const enriched: CreateJobInput[] = [];
|
||
|
||
for (const job of jobs) {
|
||
if (shouldCancel?.()) break;
|
||
|
||
let current = { ...job };
|
||
|
||
const applicationLink = job.applicationLink?.trim();
|
||
if (applicationLink && isLinkedInJobUrl(applicationLink)) {
|
||
try {
|
||
const probe = await probeApplicationLink(applicationLink);
|
||
if (probe?.expired) continue;
|
||
if (probe?.location && needsDetailEnrichment(current.location)) {
|
||
current = { ...current, location: probe.location };
|
||
}
|
||
} catch {
|
||
// keep row when LinkedIn probe fails
|
||
}
|
||
}
|
||
|
||
if (needsDetailEnrichment(current.location)) {
|
||
try {
|
||
const detail = await fetchQaJobDetailEnrichment(current.jobUrl);
|
||
if (detail?.expired) continue;
|
||
current = {
|
||
...current,
|
||
...(detail?.location ? { location: detail.location } : {}),
|
||
...(detail?.jobDescription
|
||
? { jobDescription: detail.jobDescription }
|
||
: {}),
|
||
};
|
||
} catch {
|
||
// keep feed row when detail page fetch fails
|
||
}
|
||
}
|
||
|
||
enriched.push(current);
|
||
}
|
||
|
||
return enriched;
|
||
}
|
||
|
||
export const manifest: ExtractorManifest = {
|
||
id: "qajobsboard",
|
||
displayName: "QAJobsBoard",
|
||
providesSources: ["qajobsboard"],
|
||
async run(context): Promise<ExtractorRunResult> {
|
||
if (context.shouldCancel?.()) return { success: true, jobs: [] };
|
||
|
||
const maxJobs = context.settings.qajobsboardMaxJobsPerTerm
|
||
? Number.parseInt(context.settings.qajobsboardMaxJobsPerTerm, 10)
|
||
: 100;
|
||
const cap = Number.isFinite(maxJobs)
|
||
? Math.min(Math.max(maxJobs, 1), 500)
|
||
: 100;
|
||
|
||
const terms = context.searchTerms.length > 0 ? context.searchTerms : [];
|
||
|
||
context.onProgress?.({
|
||
phase: "list",
|
||
termsProcessed: 0,
|
||
termsTotal: 1,
|
||
currentUrl: JOBS_URL,
|
||
detail: "QAJobsBoard: fetching jobs.json",
|
||
});
|
||
|
||
try {
|
||
const response = await fetch(JOBS_URL, {
|
||
headers: { Accept: "application/json", "User-Agent": "JobOps/1.0" },
|
||
});
|
||
if (!response.ok) {
|
||
throw new Error(
|
||
`QAJobsBoard request failed with status ${response.status}`,
|
||
);
|
||
}
|
||
const body = (await response.json()) as unknown;
|
||
const rows = Array.isArray(body) ? body : [];
|
||
|
||
const seen = new Set<string>();
|
||
const out: CreateJobInput[] = [];
|
||
|
||
for (const row of rows as QaJobBoardlyJob[]) {
|
||
if (out.length >= cap) break;
|
||
if (isPostingExpiredByDate(asString(row.expires_at))) continue;
|
||
if (terms.length > 0 && !terms.some((t) => matchesTerm(row, t)))
|
||
continue;
|
||
const mapped = mapJob(row);
|
||
if (!mapped) continue;
|
||
const key = mapped.sourceJobId || mapped.jobUrl;
|
||
if (seen.has(key)) continue;
|
||
seen.add(key);
|
||
out.push(mapped);
|
||
}
|
||
|
||
const withDetails = await enrichJobsFromDetailPages(
|
||
out,
|
||
context.shouldCancel,
|
||
);
|
||
|
||
context.onProgress?.({
|
||
phase: "list",
|
||
termsProcessed: 1,
|
||
termsTotal: 1,
|
||
currentUrl: JOBS_URL,
|
||
jobPagesProcessed: withDetails.length,
|
||
detail: `QAJobsBoard: ${withDetails.length} matched (${rows.length} total listings, detail pages for vague locations)`,
|
||
});
|
||
|
||
return { success: true, jobs: withDetails };
|
||
} catch (error) {
|
||
const message = error instanceof Error ? error.message : "Unknown error";
|
||
return { success: false, jobs: [], error: message };
|
||
}
|
||
},
|
||
};
|
||
|
||
export default manifest;
|