ilia c840f289e1
Some checks failed
CI / Linting (Biome) (push) Failing after 40s
CI / Tests (push) Successful in 5m54s
CI / Type Check (adzuna-extractor) (push) Successful in 1m8s
CI / Type Check (gradcracker-extractor) (push) Successful in 1m11s
CI / Type Check (hiringcafe-extractor) (push) Successful in 1m8s
CI / Type Check (orchestrator) (push) Successful in 1m23s
CI / Type Check (startupjobs-extractor) (push) Successful in 1m6s
CI / Type Check (ukvisajobs-extractor) (push) Successful in 1m7s
CI / Documentation (push) Successful in 1m54s
feat(extractors): expand catalog, smoke coverage, and sourcing docs
Adds Arc.dev, BC T-Net, Eluta, iCIMS tenants, QAJobsBoard, and SmartRecruiters
manifests with registry/settings/UI wiring; registers full extractor list in
smoke-extractors and documents supplementary board access paths. Aligns Careerjet
v4 with the url query parameter and fixes strict typing in QAJobsBoard.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-15 22:36:23 -04:00

288 lines
8.7 KiB
TypeScript

/**
* SmartRecruiters public Posting API (no auth for public boards).
*
* https://developers.smartrecruiters.com/reference/v1listpostings
* GET https://api.smartrecruiters.com/v1/companies/{companyIdentifier}/postings
* GET https://api.smartrecruiters.com/v1/companies/{companyIdentifier}/postings/{postingId}
*/
import type {
ExtractorManifest,
ExtractorRunResult,
} from "@shared/types/extractors";
import type { CreateJobInput } from "@shared/types/jobs";
const LIST_LIMIT = 100;
interface SrCompany {
identifier?: string;
name?: string;
}
interface SrLocation {
fullLocation?: string;
city?: string;
region?: string;
country?: string;
remote?: boolean;
hybrid?: boolean;
}
interface SrPostingSummary {
id?: string;
name?: string;
releasedDate?: string;
company?: SrCompany;
location?: SrLocation;
typeOfEmployment?: { label?: string };
experienceLevel?: { id?: string; label?: string };
}
interface SrListResponse {
content?: SrPostingSummary[];
totalFound?: number;
offset?: number;
limit?: number;
}
interface SrDetail extends SrPostingSummary {
postingUrl?: string;
applyUrl?: string;
jobAd?: {
sections?: Record<string, { title?: string; text?: string } | undefined>;
};
}
function asString(value: unknown): string | undefined {
if (typeof value !== "string") return undefined;
const trimmed = value.trim();
return trimmed ? trimmed : undefined;
}
function readCompanies(raw: string | undefined): string[] {
if (!raw) return [];
try {
const parsed = JSON.parse(raw);
if (Array.isArray(parsed)) {
return parsed
.map((entry) => (typeof entry === "string" ? entry.trim() : ""))
.filter(Boolean);
}
} catch {
// fall through
}
return raw
.split(/[\n,;|]+/)
.map((entry) => entry.trim())
.filter(Boolean);
}
function decodeHtmlEntities(value: string): string {
return value
.replace(/&amp;/g, "&")
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">")
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
.replace(/&nbsp;/g, " ");
}
function stripHtml(html: string): string {
const noTags = html.replace(/<[^>]+>/g, " ");
return decodeHtmlEntities(noTags).replace(/\s+/g, " ").trim();
}
function locationString(loc: SrLocation | undefined): string {
if (!loc) return "Unknown";
const full = asString(loc.fullLocation);
if (full) return full;
const parts = [loc.city, loc.region, loc.country]
.map((p) => asString(p))
.filter(Boolean) as string[];
return parts.length > 0 ? parts.join(", ") : "Unknown";
}
function extractDescription(detail: SrDetail): string | undefined {
const sections = detail.jobAd?.sections;
if (!sections || typeof sections !== "object") return undefined;
const chunks: string[] = [];
for (const block of Object.values(sections)) {
const text = block && typeof block.text === "string" ? block.text : "";
if (text.trim()) chunks.push(text);
}
if (chunks.length === 0) return undefined;
return stripHtml(chunks.join("\n\n"));
}
function matchesTerm(summary: SrPostingSummary, term: string): boolean {
const lower = term.toLowerCase();
if (summary.name?.toLowerCase().includes(lower)) return true;
if (locationString(summary.location).toLowerCase().includes(lower))
return true;
if (summary.company?.name?.toLowerCase().includes(lower)) return true;
return false;
}
async function fetchPostingsPage(
company: string,
offset: number,
): Promise<SrListResponse> {
const base = `https://api.smartrecruiters.com/v1/companies/${encodeURIComponent(company)}/postings`;
const url = `${base}?destination=PUBLIC&limit=${LIST_LIMIT}&offset=${offset}`;
const response = await fetch(url, {
headers: { Accept: "application/json" },
});
if (response.status === 404) {
return { content: [], totalFound: 0, offset: 0, limit: LIST_LIMIT };
}
if (!response.ok) {
throw new Error(
`SmartRecruiters list for "${company}" failed with status ${response.status}`,
);
}
return (await response.json()) as SrListResponse;
}
async function fetchPostingDetail(
company: string,
postingId: string,
): Promise<SrDetail | null> {
const url = `https://api.smartrecruiters.com/v1/companies/${encodeURIComponent(company)}/postings/${encodeURIComponent(postingId)}`;
const response = await fetch(url, {
headers: { Accept: "application/json" },
});
if (!response.ok) return null;
return (await response.json()) as SrDetail;
}
function mapDetailToJob(detail: SrDetail): CreateJobInput | null {
const postingId = asString(detail.id);
if (!postingId) return null;
const jobUrl = asString(detail.applyUrl) ?? asString(detail.postingUrl);
if (!jobUrl) return null;
const employer =
asString(detail.company?.name) ??
asString(detail.company?.identifier) ??
"Unknown Employer";
const jobType = asString(detail.typeOfEmployment?.label);
const jobLevel = asString(detail.experienceLevel?.label);
return {
source: "smartrecruiters",
sourceJobId: postingId,
title: asString(detail.name) ?? "Unknown Title",
employer,
jobUrl,
applicationLink: asString(detail.applyUrl) ?? jobUrl,
location: locationString(detail.location),
isRemote: detail.location?.remote === true,
datePosted: asString(detail.releasedDate),
jobDescription: extractDescription(detail),
jobType: jobType || undefined,
jobLevel: jobLevel || undefined,
};
}
export const manifest: ExtractorManifest = {
id: "smartrecruiters",
displayName: "SmartRecruiters (ATS)",
providesSources: ["smartrecruiters"],
async run(context): Promise<ExtractorRunResult> {
if (context.shouldCancel?.()) return { success: true, jobs: [] };
const companies = readCompanies(context.settings.smartrecruitersCompanies);
if (companies.length === 0) {
return {
success: true,
jobs: [],
error:
"No SmartRecruiters companies configured. Set SMARTRECRUITERS_COMPANIES or smartrecruitersCompanies (comma- or newline-separated company identifiers).",
};
}
const maxPerCompany = context.settings.smartrecruitersMaxJobsPerCompany
? Number.parseInt(context.settings.smartrecruitersMaxJobsPerCompany, 10)
: 100;
const cap = Number.isFinite(maxPerCompany)
? Math.min(Math.max(maxPerCompany, 1), 500)
: 100;
const terms = context.searchTerms.length > 0 ? context.searchTerms : [];
const seen = new Set<string>();
const out: CreateJobInput[] = [];
try {
for (let i = 0; i < companies.length; i += 1) {
if (context.shouldCancel?.()) break;
const company = companies[i];
context.onProgress?.({
phase: "list",
termsProcessed: i,
termsTotal: companies.length,
currentUrl: company,
detail: `SmartRecruiters: ${company} (${i + 1}/${companies.length})`,
});
const matchedSummaries: SrPostingSummary[] = [];
let offset = 0;
let totalFound = Number.POSITIVE_INFINITY;
while (matchedSummaries.length < cap && offset < totalFound) {
if (context.shouldCancel?.()) break;
const page = await fetchPostingsPage(company, offset);
const batch = Array.isArray(page.content) ? page.content : [];
totalFound =
typeof page.totalFound === "number" ? page.totalFound : offset;
if (batch.length === 0) break;
for (const row of batch) {
if (matchedSummaries.length >= cap) break;
if (terms.length > 0 && !terms.some((t) => matchesTerm(row, t))) {
continue;
}
matchedSummaries.push(row);
}
offset += batch.length;
if (offset >= totalFound) break;
}
let added = 0;
for (const summary of matchedSummaries) {
if (context.shouldCancel?.()) break;
const id = asString(summary.id);
if (!id) continue;
const detail = await fetchPostingDetail(company, id);
if (!detail) continue;
const mapped = mapDetailToJob(detail);
if (!mapped) continue;
const key = mapped.sourceJobId || mapped.jobUrl;
if (seen.has(key)) continue;
seen.add(key);
out.push(mapped);
added += 1;
}
context.onProgress?.({
phase: "list",
termsProcessed: i + 1,
termsTotal: companies.length,
currentUrl: company,
jobPagesProcessed: out.length,
detail: `SmartRecruiters: ${company}${added} jobs (${out.length} total)`,
});
}
} catch (error) {
const message = error instanceof Error ? error.message : "Unknown error";
return { success: false, jobs: out, error: message };
}
return { success: true, jobs: out };
},
};
export default manifest;