ilia d28a6221e4
Some checks failed
CI / Linting (Biome) (push) Failing after 41s
CI / Tests (push) Successful in 6m10s
CI / Type Check (adzuna-extractor) (push) Successful in 1m9s
CI / Type Check (gradcracker-extractor) (push) Successful in 1m13s
CI / Type Check (hiringcafe-extractor) (push) Successful in 1m9s
CI / Type Check (orchestrator) (push) Failing after 1m16s
CI / Type Check (startupjobs-extractor) (push) Successful in 1m9s
CI / Type Check (ukvisajobs-extractor) (push) Successful in 1m10s
CI / Documentation (push) Successful in 1m56s
fix(qajobsboard): drop expired LinkedIn reposts and resolve hiring location
Probe application links for closed listings and feed expires_at; enrich vague Remote/Worldwide rows with real country before blocked-countries filtering.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-16 17:42:19 -04:00

291 lines
8.1 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* QAJobsBoard (JobBoardly) — public jobs listing JSON.
*
* https://qajobsboard.jobboardly.com/jobs.json
*/
import type {
ExtractorManifest,
ExtractorRunResult,
} from "@shared/types/extractors";
import type { CreateJobInput } from "@shared/types/jobs";
import {
isLinkedInJobUrl,
isPostingExpiredByDate,
probeApplicationLink,
} from "./src/application-link.js";
import {
extractJobLocationFromText,
fetchQaJobDetailEnrichment,
stripHtml,
} from "./src/detail-page.js";
const JOBS_URL = "https://qajobsboard.jobboardly.com/jobs.json";
interface JobCategory {
name?: string;
}
interface SalaryBand {
schedule?: string;
minimum?: number | null;
maximum?: number | null;
}
interface DescriptionBlock {
html?: string;
}
interface QaJobBoardlyJob {
title?: string;
arrangement?: string;
location?: string;
location_limits?: string[];
published_at?: string;
expires_at?: string;
application_link?: string;
description?: DescriptionBlock;
company?: { name?: string; logo?: string };
salary?: SalaryBand;
categories?: JobCategory[];
links?: { self?: string };
}
function asString(value: unknown): string | undefined {
if (typeof value !== "string") return undefined;
const trimmed = value.trim();
return trimmed ? trimmed : undefined;
}
function salaryLabel(raw: SalaryBand | undefined): string | undefined {
if (!raw) return undefined;
const schedule = raw.schedule ? `${raw.schedule}: ` : "";
if (
typeof raw.minimum === "number" &&
typeof raw.maximum === "number" &&
Number.isFinite(raw.minimum) &&
Number.isFinite(raw.maximum)
) {
return `${schedule}${raw.minimum}${raw.maximum}`;
}
if (typeof raw.minimum === "number" && Number.isFinite(raw.minimum)) {
return `${schedule}${raw.minimum}+`;
}
if (typeof raw.maximum === "number" && Number.isFinite(raw.maximum)) {
return `${schedule}${raw.maximum}`;
}
return schedule.trim() || undefined;
}
const VAGUE_LOCATION_LABELS = new Set([
"worldwide",
"global",
"anywhere",
"remote",
"unknown",
]);
function isVagueLocationLabel(value: string): boolean {
return VAGUE_LOCATION_LABELS.has(value.trim().toLowerCase());
}
function locationLabel(job: QaJobBoardlyJob): string {
const limits = Array.isArray(job.location_limits)
? job.location_limits
.map((v) => (typeof v === "string" ? v.trim() : ""))
.filter((v) => v.length > 0 && !isVagueLocationLabel(v))
: [];
if (limits.length > 0) return limits.join(", ");
const loc = asString(job.location);
if (loc && !isVagueLocationLabel(loc)) return loc;
const fromDescription = job.description?.html
? extractJobLocationFromText(stripHtml(job.description.html))
: undefined;
if (fromDescription) return fromDescription;
if (loc) return loc;
return "Unknown";
}
function matchesTerm(job: QaJobBoardlyJob, term: string): boolean {
const lower = term.toLowerCase();
if (job.title?.toLowerCase().includes(lower)) return true;
const cats = Array.isArray(job.categories)
? job.categories.map((c) => c.name?.toLowerCase() ?? "").join(" ")
: "";
if (cats.includes(lower)) return true;
const html = job.description?.html ?? "";
if (stripHtml(html).toLowerCase().includes(lower)) return true;
return false;
}
function mapJob(raw: QaJobBoardlyJob): CreateJobInput | null {
const jobUrl = asString(raw.links?.self);
if (!jobUrl) return null;
const employer =
asString(raw.company?.name)
?.replace(/^[\s-]+/, "")
.trim() || "Unknown Employer";
const applicationLink = asString(raw.application_link) ?? jobUrl;
const descHtml = raw.description?.html;
const jobDescription = descHtml ? stripHtml(descHtml) : undefined;
const salary = salaryLabel(raw.salary);
const cats = Array.isArray(raw.categories)
? raw.categories
.map((c) => c?.name?.trim())
.filter((v): v is string => Boolean(v))
.join(", ")
: undefined;
return {
source: "qajobsboard",
sourceJobId: jobUrl.split("/").pop(),
title: asString(raw.title) ?? "Unknown Title",
employer,
jobUrl,
applicationLink,
location: locationLabel(raw),
isRemote: asString(raw.location)?.toLowerCase() === "remote",
datePosted: asString(raw.published_at),
jobDescription,
jobType: asString(raw.arrangement),
salary,
disciplines: cats,
companyLogo: asString(raw.company?.logo),
};
}
function needsDetailEnrichment(location: string | undefined): boolean {
if (!location?.trim()) return true;
return isVagueLocationLabel(location);
}
async function enrichJobsFromDetailPages(
jobs: CreateJobInput[],
shouldCancel?: () => boolean,
): Promise<CreateJobInput[]> {
const enriched: CreateJobInput[] = [];
for (const job of jobs) {
if (shouldCancel?.()) break;
let current = { ...job };
const applicationLink = job.applicationLink?.trim();
if (applicationLink && isLinkedInJobUrl(applicationLink)) {
try {
const probe = await probeApplicationLink(applicationLink);
if (probe?.expired) continue;
if (probe?.location && needsDetailEnrichment(current.location)) {
current = { ...current, location: probe.location };
}
} catch {
// keep row when LinkedIn probe fails
}
}
if (needsDetailEnrichment(current.location)) {
try {
const detail = await fetchQaJobDetailEnrichment(current.jobUrl);
if (detail?.expired) continue;
current = {
...current,
...(detail?.location ? { location: detail.location } : {}),
...(detail?.jobDescription
? { jobDescription: detail.jobDescription }
: {}),
};
} catch {
// keep feed row when detail page fetch fails
}
}
enriched.push(current);
}
return enriched;
}
export const manifest: ExtractorManifest = {
id: "qajobsboard",
displayName: "QAJobsBoard",
providesSources: ["qajobsboard"],
async run(context): Promise<ExtractorRunResult> {
if (context.shouldCancel?.()) return { success: true, jobs: [] };
const maxJobs = context.settings.qajobsboardMaxJobsPerTerm
? Number.parseInt(context.settings.qajobsboardMaxJobsPerTerm, 10)
: 100;
const cap = Number.isFinite(maxJobs)
? Math.min(Math.max(maxJobs, 1), 500)
: 100;
const terms = context.searchTerms.length > 0 ? context.searchTerms : [];
context.onProgress?.({
phase: "list",
termsProcessed: 0,
termsTotal: 1,
currentUrl: JOBS_URL,
detail: "QAJobsBoard: fetching jobs.json",
});
try {
const response = await fetch(JOBS_URL, {
headers: { Accept: "application/json", "User-Agent": "JobOps/1.0" },
});
if (!response.ok) {
throw new Error(
`QAJobsBoard request failed with status ${response.status}`,
);
}
const body = (await response.json()) as unknown;
const rows = Array.isArray(body) ? body : [];
const seen = new Set<string>();
const out: CreateJobInput[] = [];
for (const row of rows as QaJobBoardlyJob[]) {
if (out.length >= cap) break;
if (isPostingExpiredByDate(asString(row.expires_at))) continue;
if (terms.length > 0 && !terms.some((t) => matchesTerm(row, t)))
continue;
const mapped = mapJob(row);
if (!mapped) continue;
const key = mapped.sourceJobId || mapped.jobUrl;
if (seen.has(key)) continue;
seen.add(key);
out.push(mapped);
}
const withDetails = await enrichJobsFromDetailPages(
out,
context.shouldCancel,
);
context.onProgress?.({
phase: "list",
termsProcessed: 1,
termsTotal: 1,
currentUrl: JOBS_URL,
jobPagesProcessed: withDetails.length,
detail: `QAJobsBoard: ${withDetails.length} matched (${rows.length} total listings, detail pages for vague locations)`,
});
return { success: true, jobs: withDetails };
} catch (error) {
const message = error instanceof Error ? error.message : "Unknown error";
return { success: false, jobs: [], error: message };
}
},
};
export default manifest;