/** * Eluta.ca — public RSS feeds (Canadian employer-direct listings). * * Example: https://www.eluta.ca/rss?location=Toronto%2C%20ON * * No auth. Multiple `elutaRssLocations` values each fetch a feed; results are * merged and de-duplicated by guid/link. */ import type { ExtractorManifest, ExtractorRunResult, } from "@shared/types/extractors"; import type { CreateJobInput } from "@shared/types/jobs"; const RSS_BASE = "https://www.eluta.ca/rss"; interface ElutaItem { title?: string; link?: string; guid?: string; description?: string; pubDate?: string; employer?: string; location?: string; } function xmlText(xml: string, tag: string): string | undefined { const pattern = new RegExp(`<${tag}[^>]*>([\\s\\S]*?)`); const match = xml.match(pattern); if (!match?.[1]) return undefined; return ( match[1].replace(//g, "$1").trim() || undefined ); } function parseItems(xml: string): ElutaItem[] { const items: ElutaItem[] = []; const blocks = xml.match(/([\s\S]*?)<\/item>/g) ?? []; for (const raw of blocks) { const block = raw.replace(/^/, "").replace(/<\/item>$/, ""); items.push({ title: xmlText(block, "title"), link: xmlText(block, "link"), guid: xmlText(block, "guid"), description: xmlText(block, "description"), pubDate: xmlText(block, "pubDate"), employer: xmlText(block, "employer"), location: xmlText(block, "location"), }); } return items; } function readLocations(raw: string | undefined): string[] { if (!raw) return []; try { const parsed = JSON.parse(raw); if (Array.isArray(parsed)) { return parsed .map((entry) => (typeof entry === "string" ? entry.trim() : "")) .filter(Boolean); } } catch { // fall through } return raw .split(/[\n,;|]+/) .map((entry) => entry.trim()) .filter(Boolean); } function decodeHtmlEntities(html: string): string { return html .replace(/&/g, "&") .replace(/</g, "<") .replace(/>/g, ">"); } function matchesTerm(item: ElutaItem, term: string): boolean { const lower = term.toLowerCase(); if (item.title?.toLowerCase().includes(lower)) return true; if (item.description?.toLowerCase().includes(lower)) return true; if (item.employer?.toLowerCase().includes(lower)) return true; if (item.location?.toLowerCase().includes(lower)) return true; return false; } function mapJob(item: ElutaItem): CreateJobInput | null { const jobUrl = item.link || item.guid; if (!jobUrl) return null; const title = item.title ? decodeHtmlEntities(item.title) : "Unknown Title"; const employer = item.employer?.trim() || "Unknown Employer"; const location = item.location?.trim() || "Canada"; return { source: "eluta", sourceJobId: item.guid ?? item.link, title, employer, jobUrl, applicationLink: jobUrl, location, datePosted: item.pubDate, jobDescription: item.description ? decodeHtmlEntities(item.description) : undefined, }; } export const manifest: ExtractorManifest = { id: "eluta", displayName: "Eluta", providesSources: ["eluta"], async run(context): Promise { if (context.shouldCancel?.()) return { success: true, jobs: [] }; const locations = readLocations(context.settings.elutaRssLocations); if (locations.length === 0) { return { success: true, jobs: [], error: 'No Eluta RSS locations configured. Set ELUTA_RSS_LOCATIONS or elutaRssLocations (comma- or newline-separated, e.g. "Toronto, ON|Vancouver, BC").', }; } const maxJobs = context.settings.elutaMaxJobsPerTerm ? Number.parseInt(context.settings.elutaMaxJobsPerTerm, 10) : 100; const cap = Number.isFinite(maxJobs) ? Math.min(Math.max(maxJobs, 1), 500) : 100; const terms = context.searchTerms.length > 0 ? context.searchTerms : []; const maxTotal = cap * Math.max(terms.length, 1); const seen = new Set(); const out: CreateJobInput[] = []; try { for (let i = 0; i < locations.length; i += 1) { if (context.shouldCancel?.()) break; const loc = locations[i]; const rssUrl = `${RSS_BASE}?location=${encodeURIComponent(loc)}`; context.onProgress?.({ phase: "list", termsProcessed: i, termsTotal: locations.length, currentUrl: rssUrl, detail: `Eluta: fetching RSS (${i + 1}/${locations.length}) — ${loc}`, }); const response = await fetch(rssUrl, { headers: { Accept: "application/rss+xml, application/xml, text/xml", "User-Agent": "JobOps/1.0 (+https://github.com) Eluta RSS consumer", }, }); if (!response.ok) { throw new Error(`Eluta RSS failed (${loc}): ${response.status}`); } const xml = await response.text(); const items = parseItems(xml); for (const item of items) { if (out.length >= maxTotal) break; if (terms.length > 0 && !terms.some((t) => matchesTerm(item, t))) { continue; } const mapped = mapJob(item); if (!mapped) continue; const key = mapped.sourceJobId || mapped.jobUrl; if (seen.has(key)) continue; seen.add(key); out.push(mapped); } context.onProgress?.({ phase: "list", termsProcessed: i + 1, termsTotal: locations.length, currentUrl: rssUrl, jobPagesProcessed: out.length, detail: `Eluta: ${loc} → ${items.length} items in feed (${out.length} matched total)`, }); } return { success: true, jobs: out }; } catch (error) { const message = error instanceof Error ? error.message : "Unknown error"; return { success: false, jobs: out, error: message }; } }, }; export default manifest;