/** * BC T-Net — public RSS aggregate of BC tech jobs. * * Default: https://www.bctechnology.com/rss/jobs/tnetjobs.xml * * Feeds may embed `` inside `` URLs — normalized before fetch. */ import type { ExtractorManifest, ExtractorRunResult, } from "@shared/types/extractors"; import type { CreateJobInput } from "@shared/types/jobs"; interface BcItem { title?: string; link?: string; guid?: string; description?: string; pubDate?: string; category?: string; } function xmlText(xml: string, tag: string): string | undefined { const pattern = new RegExp(`<${tag}[^>]*>([\\s\\S]*?)`); const match = xml.match(pattern); if (!match?.[1]) return undefined; return ( match[1].replace(//g, "$1").trim() || undefined ); } function normalizeFeedLink(raw: string): string { return raw.replace(//g, "&").trim(); } function parseItems(xml: string): BcItem[] { const items: BcItem[] = []; const blocks = xml.match(/([\s\S]*?)<\/item>/g) ?? []; for (const raw of blocks) { const block = raw.replace(/^/, "").replace(/<\/item>$/, ""); const linkRaw = xmlText(block, "link"); items.push({ title: xmlText(block, "title"), link: linkRaw ? normalizeFeedLink(linkRaw) : undefined, guid: xmlText(block, "guid"), description: xmlText(block, "description"), pubDate: xmlText(block, "pubDate"), category: xmlText(block, "category"), }); } return items; } function readUrls(raw: string | undefined): string[] { if (!raw) return []; try { const parsed = JSON.parse(raw); if (Array.isArray(parsed)) { return parsed .map((entry) => (typeof entry === "string" ? entry.trim() : "")) .filter(Boolean); } } catch { // fall through } return raw .split(/[\n|]+/) .map((entry) => entry.trim()) .filter(Boolean); } function decodeHtmlEntities(html: string): string { return html .replace(/&/g, "&") .replace(/</g, "<") .replace(/>/g, ">") .replace(///gi, "/") .replace(/&/gi, "&"); } function matchesTerm(item: BcItem, term: string): boolean { const lower = term.toLowerCase(); const hay = `${item.title ?? ""} ${item.description ?? ""} ${item.category ?? ""}`.toLowerCase(); return hay.includes(lower); } function mapJob(item: BcItem): CreateJobInput | null { const jobUrl = item.link?.trim(); if (!jobUrl) return null; const title = item.title ? decodeHtmlEntities(item.title) : "Unknown Title"; const employer = item.category?.trim() || "Unknown Employer"; return { source: "bctenet", sourceJobId: item.guid ?? jobUrl, title, employer, jobUrl, applicationLink: jobUrl, location: "British Columbia, Canada", datePosted: item.pubDate, jobDescription: item.description ? decodeHtmlEntities(item.description) : undefined, }; } export const manifest: ExtractorManifest = { id: "bctenet", displayName: "BC T-Net (RSS)", providesSources: ["bctenet"], async run(context): Promise { if (context.shouldCancel?.()) return { success: true, jobs: [] }; const defaults = ["https://www.bctechnology.com/rss/jobs/tnetjobs.xml"]; const configured = readUrls(context.settings.bctenetRssUrls); const urls = configured.length > 0 ? configured : defaults; const maxJobs = context.settings.bctenetMaxJobsPerTerm ? Number.parseInt(context.settings.bctenetMaxJobsPerTerm, 10) : 400; const cap = Number.isFinite(maxJobs) ? Math.min(Math.max(maxJobs, 1), 2000) : 400; const terms = context.searchTerms.length > 0 ? context.searchTerms : []; const maxTotal = cap * Math.max(terms.length, 1); const seen = new Set(); const out: CreateJobInput[] = []; try { for (let i = 0; i < urls.length; i += 1) { if (context.shouldCancel?.()) break; const rssUrl = urls[i]; context.onProgress?.({ phase: "list", termsProcessed: i, termsTotal: urls.length, currentUrl: rssUrl, detail: `BC T-Net: fetching (${i + 1}/${urls.length})`, }); const response = await fetch(rssUrl, { headers: { Accept: "application/rss+xml, application/xml, text/xml", "User-Agent": "Mozilla/5.0 (compatible; JobOps/1.0) BC T-Net RSS consumer", }, }); if (!response.ok) { throw new Error(`BC T-Net RSS failed: ${response.status}`); } const xml = await response.text(); const items = parseItems(xml); for (const item of items) { if (out.length >= maxTotal) break; if (terms.length > 0 && !terms.some((t) => matchesTerm(item, t))) { continue; } const mapped = mapJob(item); if (!mapped) continue; const key = mapped.sourceJobId || mapped.jobUrl; if (seen.has(key)) continue; seen.add(key); out.push(mapped); } context.onProgress?.({ phase: "list", termsProcessed: i + 1, termsTotal: urls.length, currentUrl: rssUrl, jobPagesProcessed: out.length, detail: `BC T-Net: ${items.length} items (${out.length} kept total)`, }); } return { success: true, jobs: out }; } catch (error) { const message = error instanceof Error ? error.message : "Unknown error"; return { success: false, jobs: out, error: message }; } }, }; export default manifest;