Jobber/extractors/eluta/manifest.ts
ilia c840f289e1
Some checks failed
CI / Linting (Biome) (push) Failing after 40s
CI / Tests (push) Successful in 5m54s
CI / Type Check (adzuna-extractor) (push) Successful in 1m8s
CI / Type Check (gradcracker-extractor) (push) Successful in 1m11s
CI / Type Check (hiringcafe-extractor) (push) Successful in 1m8s
CI / Type Check (orchestrator) (push) Successful in 1m23s
CI / Type Check (startupjobs-extractor) (push) Successful in 1m6s
CI / Type Check (ukvisajobs-extractor) (push) Successful in 1m7s
CI / Documentation (push) Successful in 1m54s
feat(extractors): expand catalog, smoke coverage, and sourcing docs
Adds Arc.dev, BC T-Net, Eluta, iCIMS tenants, QAJobsBoard, and SmartRecruiters
manifests with registry/settings/UI wiring; registers full extractor list in
smoke-extractors and documents supplementary board access paths. Aligns Careerjet
v4 with the url query parameter and fixes strict typing in QAJobsBoard.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-15 22:36:23 -04:00

202 lines
5.9 KiB
TypeScript

/**
* Eluta.ca — public RSS feeds (Canadian employer-direct listings).
*
* Example: https://www.eluta.ca/rss?location=Toronto%2C%20ON
*
* No auth. Multiple `elutaRssLocations` values each fetch a feed; results are
* merged and de-duplicated by guid/link.
*/
import type {
ExtractorManifest,
ExtractorRunResult,
} from "@shared/types/extractors";
import type { CreateJobInput } from "@shared/types/jobs";
const RSS_BASE = "https://www.eluta.ca/rss";
interface ElutaItem {
title?: string;
link?: string;
guid?: string;
description?: string;
pubDate?: string;
employer?: string;
location?: string;
}
function xmlText(xml: string, tag: string): string | undefined {
const pattern = new RegExp(`<${tag}[^>]*>([\\s\\S]*?)</${tag}>`);
const match = xml.match(pattern);
if (!match?.[1]) return undefined;
return (
match[1].replace(/<!\[CDATA\[([\s\S]*?)\]\]>/g, "$1").trim() || undefined
);
}
function parseItems(xml: string): ElutaItem[] {
const items: ElutaItem[] = [];
const blocks = xml.match(/<item>([\s\S]*?)<\/item>/g) ?? [];
for (const raw of blocks) {
const block = raw.replace(/^<item>/, "").replace(/<\/item>$/, "");
items.push({
title: xmlText(block, "title"),
link: xmlText(block, "link"),
guid: xmlText(block, "guid"),
description: xmlText(block, "description"),
pubDate: xmlText(block, "pubDate"),
employer: xmlText(block, "employer"),
location: xmlText(block, "location"),
});
}
return items;
}
function readLocations(raw: string | undefined): string[] {
if (!raw) return [];
try {
const parsed = JSON.parse(raw);
if (Array.isArray(parsed)) {
return parsed
.map((entry) => (typeof entry === "string" ? entry.trim() : ""))
.filter(Boolean);
}
} catch {
// fall through
}
return raw
.split(/[\n,;|]+/)
.map((entry) => entry.trim())
.filter(Boolean);
}
function decodeHtmlEntities(html: string): string {
return html
.replace(/&amp;/g, "&")
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">");
}
function matchesTerm(item: ElutaItem, term: string): boolean {
const lower = term.toLowerCase();
if (item.title?.toLowerCase().includes(lower)) return true;
if (item.description?.toLowerCase().includes(lower)) return true;
if (item.employer?.toLowerCase().includes(lower)) return true;
if (item.location?.toLowerCase().includes(lower)) return true;
return false;
}
function mapJob(item: ElutaItem): CreateJobInput | null {
const jobUrl = item.link || item.guid;
if (!jobUrl) return null;
const title = item.title ? decodeHtmlEntities(item.title) : "Unknown Title";
const employer = item.employer?.trim() || "Unknown Employer";
const location = item.location?.trim() || "Canada";
return {
source: "eluta",
sourceJobId: item.guid ?? item.link,
title,
employer,
jobUrl,
applicationLink: jobUrl,
location,
datePosted: item.pubDate,
jobDescription: item.description
? decodeHtmlEntities(item.description)
: undefined,
};
}
export const manifest: ExtractorManifest = {
id: "eluta",
displayName: "Eluta",
providesSources: ["eluta"],
async run(context): Promise<ExtractorRunResult> {
if (context.shouldCancel?.()) return { success: true, jobs: [] };
const locations = readLocations(context.settings.elutaRssLocations);
if (locations.length === 0) {
return {
success: true,
jobs: [],
error:
'No Eluta RSS locations configured. Set ELUTA_RSS_LOCATIONS or elutaRssLocations (comma- or newline-separated, e.g. "Toronto, ON|Vancouver, BC").',
};
}
const maxJobs = context.settings.elutaMaxJobsPerTerm
? Number.parseInt(context.settings.elutaMaxJobsPerTerm, 10)
: 100;
const cap = Number.isFinite(maxJobs)
? Math.min(Math.max(maxJobs, 1), 500)
: 100;
const terms = context.searchTerms.length > 0 ? context.searchTerms : [];
const maxTotal = cap * Math.max(terms.length, 1);
const seen = new Set<string>();
const out: CreateJobInput[] = [];
try {
for (let i = 0; i < locations.length; i += 1) {
if (context.shouldCancel?.()) break;
const loc = locations[i];
const rssUrl = `${RSS_BASE}?location=${encodeURIComponent(loc)}`;
context.onProgress?.({
phase: "list",
termsProcessed: i,
termsTotal: locations.length,
currentUrl: rssUrl,
detail: `Eluta: fetching RSS (${i + 1}/${locations.length}) — ${loc}`,
});
const response = await fetch(rssUrl, {
headers: {
Accept: "application/rss+xml, application/xml, text/xml",
"User-Agent": "JobOps/1.0 (+https://github.com) Eluta RSS consumer",
},
});
if (!response.ok) {
throw new Error(`Eluta RSS failed (${loc}): ${response.status}`);
}
const xml = await response.text();
const items = parseItems(xml);
for (const item of items) {
if (out.length >= maxTotal) break;
if (terms.length > 0 && !terms.some((t) => matchesTerm(item, t))) {
continue;
}
const mapped = mapJob(item);
if (!mapped) continue;
const key = mapped.sourceJobId || mapped.jobUrl;
if (seen.has(key)) continue;
seen.add(key);
out.push(mapped);
}
context.onProgress?.({
phase: "list",
termsProcessed: i + 1,
termsTotal: locations.length,
currentUrl: rssUrl,
jobPagesProcessed: out.length,
detail: `Eluta: ${loc}${items.length} items in feed (${out.length} matched total)`,
});
}
return { success: true, jobs: out };
} catch (error) {
const message = error instanceof Error ? error.message : "Unknown error";
return { success: false, jobs: out, error: message };
}
},
};
export default manifest;