/** * iCIMS tenant portal — anonymous HTML search (`/jobs/search`) pattern. * * Many tenants expose listings suitable for HTML extraction when loaded with * `ss=1` + `in_iframe=1`. Job links typically follow `/jobs/{id}/{slug}/job`. */ import type { ExtractorManifest, ExtractorRunResult, } from "@shared/types/extractors"; import type { CreateJobInput } from "@shared/types/jobs"; interface ParsedJobRow { url: string; title: string; } function parseHosts(raw: string | undefined): string[] { if (!raw) return []; try { const parsed = JSON.parse(raw); if (Array.isArray(parsed)) { return parsed .map((entry) => (typeof entry === "string" ? entry.trim() : "")) .filter(Boolean); } } catch { // fall through } return raw .split(/[\n,]+/) .map((entry) => entry.trim()) .filter(Boolean); } function normalizeHost(hostOrUrl: string): string { const trimmed = hostOrUrl.trim(); if (!trimmed) return ""; try { if (trimmed.includes("://")) { const url = new URL(trimmed); return url.host; } } catch { return trimmed.replace(/^\/\//, ""); } return trimmed.replace(/^\/\//, ""); } function canonicalJobUrl(url: string): string { try { const parsed = new URL(url); parsed.search = ""; return parsed.toString(); } catch { return url.replace(/\?[^#]*/, ""); } } function extractRows(html: string): ParsedJobRow[] { const out: ParsedJobRow[] = []; const seen = new Set(); const primary = /]*href="(https:\/\/[^"]+\/jobs\/\d+\/[^"]+\/job)(?:\?[^"]*)?"[^>]*title="\d+\s*-\s*([^"]+)"/gi; for (;;) { const match = primary.exec(html); if (match === null) break; const url = canonicalJobUrl(match[1]); const title = match[2]?.trim(); if (!url || !title || seen.has(url)) continue; seen.add(url); out.push({ url, title }); } const fallback = /]*href="(https:\/\/[^"]+\/jobs\/\d+\/([^"/]+)\/job)(?:\?[^"]*)?"[^>]*>/gi; for (;;) { const match = fallback.exec(html); if (match === null) break; const url = canonicalJobUrl(match[1]); const slug = match[2]; if (!url || seen.has(url)) continue; seen.add(url); const title = slug ? decodeURIComponent(slug.replace(/\+/g, " ")) : "Unknown Title"; out.push({ url, title }); } return out; } function matchesTerm(row: ParsedJobRow, term: string): boolean { const lower = term.toLowerCase(); return row.title.toLowerCase().includes(lower); } function employerFromHost(host: string): string { const prefix = host.replace(/^careers-/, "").replace(/^careers\./, ""); const base = prefix.replace(/\.icims\.com$/i, ""); return base.replace(/[-_.]/g, " ").trim() || host; } export const manifest: ExtractorManifest = { id: "icims", displayName: "iCIMS tenants (HTML)", providesSources: ["icims"], async run(context): Promise { if (context.shouldCancel?.()) return { success: true, jobs: [] }; const hosts = parseHosts(context.settings.icimsTenants) .map(normalizeHost) .filter(Boolean); if (hosts.length === 0) { return { success: false, jobs: [], error: "No icimsTenants configured", }; } const maxPagesRaw = context.settings.icimsMaxPagesPerSearch; const maxPages = maxPagesRaw ? Number.parseInt(maxPagesRaw, 10) : 10; const pages = Number.isFinite(maxPages) ? Math.min(Math.max(maxPages, 1), 50) : 10; const maxPerTenantRaw = context.settings.icimsMaxJobsPerTenant; const maxPerTenant = maxPerTenantRaw ? Number.parseInt(maxPerTenantRaw, 10) : 250; const tenantCap = Number.isFinite(maxPerTenant) ? Math.min(Math.max(maxPerTenant, 1), 2000) : 250; const terms = context.searchTerms.length > 0 ? context.searchTerms : [""]; const jobs: CreateJobInput[] = []; const seenGlobal = new Set(); try { let tenantIndex = 0; for (const rawHost of hosts) { if (context.shouldCancel?.()) break; const host = normalizeHost(rawHost); tenantIndex += 1; let tenantCount = 0; context.onProgress?.({ phase: "list", termsProcessed: tenantIndex - 1, termsTotal: hosts.length, currentUrl: host, detail: `iCIMS tenant ${tenantIndex}/${hosts.length}: ${host}`, }); for (const term of terms) { if (tenantCount >= tenantCap) break; for (let page = 1; page <= pages; page += 1) { if (tenantCount >= tenantCap) break; const query = new URLSearchParams({ ss: "1", in_iframe: "1", searchKeyword: term, pr: String(page), }); const searchUrl = `https://${host}/jobs/search?${query.toString()}`; const response = await fetch(searchUrl, { headers: { Accept: "text/html", "User-Agent": "Mozilla/5.0 (compatible; JobOps/1.0) iCIMS portal reader", }, }); if (!response.ok) { throw new Error( `iCIMS fetch failed (${host}): ${response.status}`, ); } const html = await response.text(); const rows = extractRows(html).filter((row) => term ? matchesTerm(row, term) : true, ); if (rows.length === 0) break; for (const row of rows) { if (tenantCount >= tenantCap) break; if (seenGlobal.has(row.url)) continue; seenGlobal.add(row.url); tenantCount += 1; jobs.push({ source: "icims", sourceJobId: row.url, title: row.title, employer: employerFromHost(host), jobUrl: row.url, applicationLink: row.url, }); } context.onProgress?.({ phase: "list", termsProcessed: tenantIndex - 1, termsTotal: hosts.length, currentUrl: host, jobPagesProcessed: jobs.length, detail: `iCIMS ${host}: page ${page}, +${rows.length} rows`, }); } } } return { success: true, jobs }; } catch (error) { const message = error instanceof Error ? error.message : "Unknown error"; return { success: false, jobs, error: message }; } }, }; export default manifest;