Some checks failed
CI / Linting (Biome) (push) Failing after 40s
CI / Tests (push) Successful in 5m54s
CI / Type Check (adzuna-extractor) (push) Successful in 1m8s
CI / Type Check (gradcracker-extractor) (push) Successful in 1m11s
CI / Type Check (hiringcafe-extractor) (push) Successful in 1m8s
CI / Type Check (orchestrator) (push) Successful in 1m23s
CI / Type Check (startupjobs-extractor) (push) Successful in 1m6s
CI / Type Check (ukvisajobs-extractor) (push) Successful in 1m7s
CI / Documentation (push) Successful in 1m54s
Adds Arc.dev, BC T-Net, Eluta, iCIMS tenants, QAJobsBoard, and SmartRecruiters manifests with registry/settings/UI wiring; registers full extractor list in smoke-extractors and documents supplementary board access paths. Aligns Careerjet v4 with the url query parameter and fixes strict typing in QAJobsBoard. Co-authored-by: Cursor <cursoragent@cursor.com>
288 lines
8.7 KiB
TypeScript
288 lines
8.7 KiB
TypeScript
/**
|
|
* SmartRecruiters public Posting API (no auth for public boards).
|
|
*
|
|
* https://developers.smartrecruiters.com/reference/v1listpostings
|
|
* GET https://api.smartrecruiters.com/v1/companies/{companyIdentifier}/postings
|
|
* GET https://api.smartrecruiters.com/v1/companies/{companyIdentifier}/postings/{postingId}
|
|
*/
|
|
|
|
import type {
|
|
ExtractorManifest,
|
|
ExtractorRunResult,
|
|
} from "@shared/types/extractors";
|
|
import type { CreateJobInput } from "@shared/types/jobs";
|
|
|
|
const LIST_LIMIT = 100;
|
|
|
|
interface SrCompany {
|
|
identifier?: string;
|
|
name?: string;
|
|
}
|
|
|
|
interface SrLocation {
|
|
fullLocation?: string;
|
|
city?: string;
|
|
region?: string;
|
|
country?: string;
|
|
remote?: boolean;
|
|
hybrid?: boolean;
|
|
}
|
|
|
|
interface SrPostingSummary {
|
|
id?: string;
|
|
name?: string;
|
|
releasedDate?: string;
|
|
company?: SrCompany;
|
|
location?: SrLocation;
|
|
typeOfEmployment?: { label?: string };
|
|
experienceLevel?: { id?: string; label?: string };
|
|
}
|
|
|
|
interface SrListResponse {
|
|
content?: SrPostingSummary[];
|
|
totalFound?: number;
|
|
offset?: number;
|
|
limit?: number;
|
|
}
|
|
|
|
interface SrDetail extends SrPostingSummary {
|
|
postingUrl?: string;
|
|
applyUrl?: string;
|
|
jobAd?: {
|
|
sections?: Record<string, { title?: string; text?: string } | undefined>;
|
|
};
|
|
}
|
|
|
|
function asString(value: unknown): string | undefined {
|
|
if (typeof value !== "string") return undefined;
|
|
const trimmed = value.trim();
|
|
return trimmed ? trimmed : undefined;
|
|
}
|
|
|
|
function readCompanies(raw: string | undefined): string[] {
|
|
if (!raw) return [];
|
|
try {
|
|
const parsed = JSON.parse(raw);
|
|
if (Array.isArray(parsed)) {
|
|
return parsed
|
|
.map((entry) => (typeof entry === "string" ? entry.trim() : ""))
|
|
.filter(Boolean);
|
|
}
|
|
} catch {
|
|
// fall through
|
|
}
|
|
return raw
|
|
.split(/[\n,;|]+/)
|
|
.map((entry) => entry.trim())
|
|
.filter(Boolean);
|
|
}
|
|
|
|
function decodeHtmlEntities(value: string): string {
|
|
return value
|
|
.replace(/&/g, "&")
|
|
.replace(/</g, "<")
|
|
.replace(/>/g, ">")
|
|
.replace(/"/g, '"')
|
|
.replace(/'/g, "'")
|
|
.replace(/ /g, " ");
|
|
}
|
|
|
|
function stripHtml(html: string): string {
|
|
const noTags = html.replace(/<[^>]+>/g, " ");
|
|
return decodeHtmlEntities(noTags).replace(/\s+/g, " ").trim();
|
|
}
|
|
|
|
function locationString(loc: SrLocation | undefined): string {
|
|
if (!loc) return "Unknown";
|
|
const full = asString(loc.fullLocation);
|
|
if (full) return full;
|
|
const parts = [loc.city, loc.region, loc.country]
|
|
.map((p) => asString(p))
|
|
.filter(Boolean) as string[];
|
|
return parts.length > 0 ? parts.join(", ") : "Unknown";
|
|
}
|
|
|
|
function extractDescription(detail: SrDetail): string | undefined {
|
|
const sections = detail.jobAd?.sections;
|
|
if (!sections || typeof sections !== "object") return undefined;
|
|
const chunks: string[] = [];
|
|
for (const block of Object.values(sections)) {
|
|
const text = block && typeof block.text === "string" ? block.text : "";
|
|
if (text.trim()) chunks.push(text);
|
|
}
|
|
if (chunks.length === 0) return undefined;
|
|
return stripHtml(chunks.join("\n\n"));
|
|
}
|
|
|
|
function matchesTerm(summary: SrPostingSummary, term: string): boolean {
|
|
const lower = term.toLowerCase();
|
|
if (summary.name?.toLowerCase().includes(lower)) return true;
|
|
if (locationString(summary.location).toLowerCase().includes(lower))
|
|
return true;
|
|
if (summary.company?.name?.toLowerCase().includes(lower)) return true;
|
|
return false;
|
|
}
|
|
|
|
async function fetchPostingsPage(
|
|
company: string,
|
|
offset: number,
|
|
): Promise<SrListResponse> {
|
|
const base = `https://api.smartrecruiters.com/v1/companies/${encodeURIComponent(company)}/postings`;
|
|
const url = `${base}?destination=PUBLIC&limit=${LIST_LIMIT}&offset=${offset}`;
|
|
const response = await fetch(url, {
|
|
headers: { Accept: "application/json" },
|
|
});
|
|
if (response.status === 404) {
|
|
return { content: [], totalFound: 0, offset: 0, limit: LIST_LIMIT };
|
|
}
|
|
if (!response.ok) {
|
|
throw new Error(
|
|
`SmartRecruiters list for "${company}" failed with status ${response.status}`,
|
|
);
|
|
}
|
|
return (await response.json()) as SrListResponse;
|
|
}
|
|
|
|
async function fetchPostingDetail(
|
|
company: string,
|
|
postingId: string,
|
|
): Promise<SrDetail | null> {
|
|
const url = `https://api.smartrecruiters.com/v1/companies/${encodeURIComponent(company)}/postings/${encodeURIComponent(postingId)}`;
|
|
const response = await fetch(url, {
|
|
headers: { Accept: "application/json" },
|
|
});
|
|
if (!response.ok) return null;
|
|
return (await response.json()) as SrDetail;
|
|
}
|
|
|
|
function mapDetailToJob(detail: SrDetail): CreateJobInput | null {
|
|
const postingId = asString(detail.id);
|
|
if (!postingId) return null;
|
|
|
|
const jobUrl = asString(detail.applyUrl) ?? asString(detail.postingUrl);
|
|
if (!jobUrl) return null;
|
|
|
|
const employer =
|
|
asString(detail.company?.name) ??
|
|
asString(detail.company?.identifier) ??
|
|
"Unknown Employer";
|
|
|
|
const jobType = asString(detail.typeOfEmployment?.label);
|
|
const jobLevel = asString(detail.experienceLevel?.label);
|
|
|
|
return {
|
|
source: "smartrecruiters",
|
|
sourceJobId: postingId,
|
|
title: asString(detail.name) ?? "Unknown Title",
|
|
employer,
|
|
jobUrl,
|
|
applicationLink: asString(detail.applyUrl) ?? jobUrl,
|
|
location: locationString(detail.location),
|
|
isRemote: detail.location?.remote === true,
|
|
datePosted: asString(detail.releasedDate),
|
|
jobDescription: extractDescription(detail),
|
|
jobType: jobType || undefined,
|
|
jobLevel: jobLevel || undefined,
|
|
};
|
|
}
|
|
|
|
export const manifest: ExtractorManifest = {
|
|
id: "smartrecruiters",
|
|
displayName: "SmartRecruiters (ATS)",
|
|
providesSources: ["smartrecruiters"],
|
|
async run(context): Promise<ExtractorRunResult> {
|
|
if (context.shouldCancel?.()) return { success: true, jobs: [] };
|
|
|
|
const companies = readCompanies(context.settings.smartrecruitersCompanies);
|
|
if (companies.length === 0) {
|
|
return {
|
|
success: true,
|
|
jobs: [],
|
|
error:
|
|
"No SmartRecruiters companies configured. Set SMARTRECRUITERS_COMPANIES or smartrecruitersCompanies (comma- or newline-separated company identifiers).",
|
|
};
|
|
}
|
|
|
|
const maxPerCompany = context.settings.smartrecruitersMaxJobsPerCompany
|
|
? Number.parseInt(context.settings.smartrecruitersMaxJobsPerCompany, 10)
|
|
: 100;
|
|
const cap = Number.isFinite(maxPerCompany)
|
|
? Math.min(Math.max(maxPerCompany, 1), 500)
|
|
: 100;
|
|
|
|
const terms = context.searchTerms.length > 0 ? context.searchTerms : [];
|
|
const seen = new Set<string>();
|
|
const out: CreateJobInput[] = [];
|
|
|
|
try {
|
|
for (let i = 0; i < companies.length; i += 1) {
|
|
if (context.shouldCancel?.()) break;
|
|
const company = companies[i];
|
|
context.onProgress?.({
|
|
phase: "list",
|
|
termsProcessed: i,
|
|
termsTotal: companies.length,
|
|
currentUrl: company,
|
|
detail: `SmartRecruiters: ${company} (${i + 1}/${companies.length})`,
|
|
});
|
|
|
|
const matchedSummaries: SrPostingSummary[] = [];
|
|
let offset = 0;
|
|
let totalFound = Number.POSITIVE_INFINITY;
|
|
|
|
while (matchedSummaries.length < cap && offset < totalFound) {
|
|
if (context.shouldCancel?.()) break;
|
|
const page = await fetchPostingsPage(company, offset);
|
|
const batch = Array.isArray(page.content) ? page.content : [];
|
|
totalFound =
|
|
typeof page.totalFound === "number" ? page.totalFound : offset;
|
|
if (batch.length === 0) break;
|
|
|
|
for (const row of batch) {
|
|
if (matchedSummaries.length >= cap) break;
|
|
if (terms.length > 0 && !terms.some((t) => matchesTerm(row, t))) {
|
|
continue;
|
|
}
|
|
matchedSummaries.push(row);
|
|
}
|
|
|
|
offset += batch.length;
|
|
if (offset >= totalFound) break;
|
|
}
|
|
|
|
let added = 0;
|
|
for (const summary of matchedSummaries) {
|
|
if (context.shouldCancel?.()) break;
|
|
const id = asString(summary.id);
|
|
if (!id) continue;
|
|
const detail = await fetchPostingDetail(company, id);
|
|
if (!detail) continue;
|
|
const mapped = mapDetailToJob(detail);
|
|
if (!mapped) continue;
|
|
const key = mapped.sourceJobId || mapped.jobUrl;
|
|
if (seen.has(key)) continue;
|
|
seen.add(key);
|
|
out.push(mapped);
|
|
added += 1;
|
|
}
|
|
|
|
context.onProgress?.({
|
|
phase: "list",
|
|
termsProcessed: i + 1,
|
|
termsTotal: companies.length,
|
|
currentUrl: company,
|
|
jobPagesProcessed: out.length,
|
|
detail: `SmartRecruiters: ${company} → ${added} jobs (${out.length} total)`,
|
|
});
|
|
}
|
|
} catch (error) {
|
|
const message = error instanceof Error ? error.message : "Unknown error";
|
|
return { success: false, jobs: out, error: message };
|
|
}
|
|
|
|
return { success: true, jobs: out };
|
|
},
|
|
};
|
|
|
|
export default manifest;
|