Some checks failed
CI / Linting (Biome) (push) Failing after 40s
CI / Tests (push) Successful in 5m54s
CI / Type Check (adzuna-extractor) (push) Successful in 1m8s
CI / Type Check (gradcracker-extractor) (push) Successful in 1m11s
CI / Type Check (hiringcafe-extractor) (push) Successful in 1m8s
CI / Type Check (orchestrator) (push) Successful in 1m23s
CI / Type Check (startupjobs-extractor) (push) Successful in 1m6s
CI / Type Check (ukvisajobs-extractor) (push) Successful in 1m7s
CI / Documentation (push) Successful in 1m54s
Adds Arc.dev, BC T-Net, Eluta, iCIMS tenants, QAJobsBoard, and SmartRecruiters manifests with registry/settings/UI wiring; registers full extractor list in smoke-extractors and documents supplementary board access paths. Aligns Careerjet v4 with the url query parameter and fixes strict typing in QAJobsBoard. Co-authored-by: Cursor <cursoragent@cursor.com>
202 lines
5.9 KiB
TypeScript
202 lines
5.9 KiB
TypeScript
/**
|
|
* Eluta.ca — public RSS feeds (Canadian employer-direct listings).
|
|
*
|
|
* Example: https://www.eluta.ca/rss?location=Toronto%2C%20ON
|
|
*
|
|
* No auth. Multiple `elutaRssLocations` values each fetch a feed; results are
|
|
* merged and de-duplicated by guid/link.
|
|
*/
|
|
|
|
import type {
|
|
ExtractorManifest,
|
|
ExtractorRunResult,
|
|
} from "@shared/types/extractors";
|
|
import type { CreateJobInput } from "@shared/types/jobs";
|
|
|
|
const RSS_BASE = "https://www.eluta.ca/rss";
|
|
|
|
interface ElutaItem {
|
|
title?: string;
|
|
link?: string;
|
|
guid?: string;
|
|
description?: string;
|
|
pubDate?: string;
|
|
employer?: string;
|
|
location?: string;
|
|
}
|
|
|
|
function xmlText(xml: string, tag: string): string | undefined {
|
|
const pattern = new RegExp(`<${tag}[^>]*>([\\s\\S]*?)</${tag}>`);
|
|
const match = xml.match(pattern);
|
|
if (!match?.[1]) return undefined;
|
|
return (
|
|
match[1].replace(/<!\[CDATA\[([\s\S]*?)\]\]>/g, "$1").trim() || undefined
|
|
);
|
|
}
|
|
|
|
function parseItems(xml: string): ElutaItem[] {
|
|
const items: ElutaItem[] = [];
|
|
const blocks = xml.match(/<item>([\s\S]*?)<\/item>/g) ?? [];
|
|
|
|
for (const raw of blocks) {
|
|
const block = raw.replace(/^<item>/, "").replace(/<\/item>$/, "");
|
|
items.push({
|
|
title: xmlText(block, "title"),
|
|
link: xmlText(block, "link"),
|
|
guid: xmlText(block, "guid"),
|
|
description: xmlText(block, "description"),
|
|
pubDate: xmlText(block, "pubDate"),
|
|
employer: xmlText(block, "employer"),
|
|
location: xmlText(block, "location"),
|
|
});
|
|
}
|
|
|
|
return items;
|
|
}
|
|
|
|
function readLocations(raw: string | undefined): string[] {
|
|
if (!raw) return [];
|
|
try {
|
|
const parsed = JSON.parse(raw);
|
|
if (Array.isArray(parsed)) {
|
|
return parsed
|
|
.map((entry) => (typeof entry === "string" ? entry.trim() : ""))
|
|
.filter(Boolean);
|
|
}
|
|
} catch {
|
|
// fall through
|
|
}
|
|
return raw
|
|
.split(/[\n,;|]+/)
|
|
.map((entry) => entry.trim())
|
|
.filter(Boolean);
|
|
}
|
|
|
|
function decodeHtmlEntities(html: string): string {
|
|
return html
|
|
.replace(/&/g, "&")
|
|
.replace(/</g, "<")
|
|
.replace(/>/g, ">");
|
|
}
|
|
|
|
function matchesTerm(item: ElutaItem, term: string): boolean {
|
|
const lower = term.toLowerCase();
|
|
if (item.title?.toLowerCase().includes(lower)) return true;
|
|
if (item.description?.toLowerCase().includes(lower)) return true;
|
|
if (item.employer?.toLowerCase().includes(lower)) return true;
|
|
if (item.location?.toLowerCase().includes(lower)) return true;
|
|
return false;
|
|
}
|
|
|
|
function mapJob(item: ElutaItem): CreateJobInput | null {
|
|
const jobUrl = item.link || item.guid;
|
|
if (!jobUrl) return null;
|
|
|
|
const title = item.title ? decodeHtmlEntities(item.title) : "Unknown Title";
|
|
const employer = item.employer?.trim() || "Unknown Employer";
|
|
const location = item.location?.trim() || "Canada";
|
|
|
|
return {
|
|
source: "eluta",
|
|
sourceJobId: item.guid ?? item.link,
|
|
title,
|
|
employer,
|
|
jobUrl,
|
|
applicationLink: jobUrl,
|
|
location,
|
|
datePosted: item.pubDate,
|
|
jobDescription: item.description
|
|
? decodeHtmlEntities(item.description)
|
|
: undefined,
|
|
};
|
|
}
|
|
|
|
export const manifest: ExtractorManifest = {
|
|
id: "eluta",
|
|
displayName: "Eluta",
|
|
providesSources: ["eluta"],
|
|
async run(context): Promise<ExtractorRunResult> {
|
|
if (context.shouldCancel?.()) return { success: true, jobs: [] };
|
|
|
|
const locations = readLocations(context.settings.elutaRssLocations);
|
|
if (locations.length === 0) {
|
|
return {
|
|
success: true,
|
|
jobs: [],
|
|
error:
|
|
'No Eluta RSS locations configured. Set ELUTA_RSS_LOCATIONS or elutaRssLocations (comma- or newline-separated, e.g. "Toronto, ON|Vancouver, BC").',
|
|
};
|
|
}
|
|
|
|
const maxJobs = context.settings.elutaMaxJobsPerTerm
|
|
? Number.parseInt(context.settings.elutaMaxJobsPerTerm, 10)
|
|
: 100;
|
|
const cap = Number.isFinite(maxJobs)
|
|
? Math.min(Math.max(maxJobs, 1), 500)
|
|
: 100;
|
|
|
|
const terms = context.searchTerms.length > 0 ? context.searchTerms : [];
|
|
const maxTotal = cap * Math.max(terms.length, 1);
|
|
const seen = new Set<string>();
|
|
const out: CreateJobInput[] = [];
|
|
|
|
try {
|
|
for (let i = 0; i < locations.length; i += 1) {
|
|
if (context.shouldCancel?.()) break;
|
|
const loc = locations[i];
|
|
const rssUrl = `${RSS_BASE}?location=${encodeURIComponent(loc)}`;
|
|
|
|
context.onProgress?.({
|
|
phase: "list",
|
|
termsProcessed: i,
|
|
termsTotal: locations.length,
|
|
currentUrl: rssUrl,
|
|
detail: `Eluta: fetching RSS (${i + 1}/${locations.length}) — ${loc}`,
|
|
});
|
|
|
|
const response = await fetch(rssUrl, {
|
|
headers: {
|
|
Accept: "application/rss+xml, application/xml, text/xml",
|
|
"User-Agent": "JobOps/1.0 (+https://github.com) Eluta RSS consumer",
|
|
},
|
|
});
|
|
if (!response.ok) {
|
|
throw new Error(`Eluta RSS failed (${loc}): ${response.status}`);
|
|
}
|
|
|
|
const xml = await response.text();
|
|
const items = parseItems(xml);
|
|
|
|
for (const item of items) {
|
|
if (out.length >= maxTotal) break;
|
|
if (terms.length > 0 && !terms.some((t) => matchesTerm(item, t))) {
|
|
continue;
|
|
}
|
|
const mapped = mapJob(item);
|
|
if (!mapped) continue;
|
|
const key = mapped.sourceJobId || mapped.jobUrl;
|
|
if (seen.has(key)) continue;
|
|
seen.add(key);
|
|
out.push(mapped);
|
|
}
|
|
|
|
context.onProgress?.({
|
|
phase: "list",
|
|
termsProcessed: i + 1,
|
|
termsTotal: locations.length,
|
|
currentUrl: rssUrl,
|
|
jobPagesProcessed: out.length,
|
|
detail: `Eluta: ${loc} → ${items.length} items in feed (${out.length} matched total)`,
|
|
});
|
|
}
|
|
|
|
return { success: true, jobs: out };
|
|
} catch (error) {
|
|
const message = error instanceof Error ? error.message : "Unknown error";
|
|
return { success: false, jobs: out, error: message };
|
|
}
|
|
},
|
|
};
|
|
|
|
export default manifest;
|