/** * Probe external application URLs (mainly LinkedIn) for expiry and hiring location. */ export interface ApplicationLinkProbe { expired: boolean; location?: string; } const LINKEDIN_JOB_RE = /^https?:\/\/(?:[a-z]+\.)?linkedin\.com\/jobs\/view\/\d+/i; const EXPIRED_URL_RE = /expired_jd_redirect|unavailable|no longer available/i; const EXPIRED_BODY_RE = /\bno longer accepting applications\b|\bjob you were looking for is no longer available\b|\bthis job is no longer available\b/i; export function isLinkedInJobUrl(url: string | undefined): boolean { return Boolean(url?.trim() && LINKEDIN_JOB_RE.test(url.trim())); } export function parseIsoDate(value: string | undefined): Date | null { if (!value?.trim()) return null; const parsed = Date.parse(value); if (Number.isNaN(parsed)) return null; return new Date(parsed); } export function isPostingExpiredByDate( expiresAt: string | undefined, now: Date = new Date(), ): boolean { const expiry = parseIsoDate(expiresAt); if (!expiry) return false; return expiry.getTime() < now.getTime(); } function extractLinkedInLocationFromHtml(html: string): string | undefined { const patterns = [ /"addressLocality"\s*:\s*"([^"]+)"[^}]*"addressCountry"\s*:\s*"([^"]+)"/i, /"addressCountry"\s*:\s*"([^"]+)"[^}]*"addressLocality"\s*:\s*"([^"]+)"/i, /"jobLocation"\s*:\s*\{[^}]*"name"\s*:\s*"([^"]+)"/i, ]; for (const pattern of patterns) { const match = html.match(pattern); if (!match) continue; if (match.length === 3) { const a = match[1]?.trim(); const b = match[2]?.trim(); if (a && b) { const countryLike = /\b(india|canada|united states|uk|united kingdom)\b/i; if (countryLike.test(b)) return `${a}, ${b}`; if (countryLike.test(a)) return `${b}, ${a}`; return `${a}, ${b}`; } } if (match[1]?.trim()) return match[1].trim(); } const indiaCity = html.match( /\b(Bengaluru|Bangalore|Mumbai|Hyderabad|Pune|Chennai|Delhi|Gurgaon|Noida)[^<]{0,40}\bIndia\b/i, ); if (indiaCity?.[0]) { return indiaCity[0].replace(/\s+/g, " ").trim(); } const countryOnly = html.match( /\b(?:job\s+)?location[^<]{0,40}\b(India|Canada|United States|United Kingdom)\b/i, ); if (countryOnly?.[1]) return countryOnly[1].trim(); return undefined; } export async function probeApplicationLink( url: string, fetchImpl: typeof fetch = fetch, ): Promise { if (!url?.trim()) return null; if (!isLinkedInJobUrl(url)) return null; const response = await fetchImpl(url, { redirect: "follow", headers: { Accept: "text/html,application/xhtml+xml", "User-Agent": "JobOps/1.0", }, }); const finalUrl = response.url ?? url; const html = await response.text(); const expired = EXPIRED_URL_RE.test(finalUrl) || EXPIRED_BODY_RE.test(html) || response.status === 404; const location = expired ? undefined : extractLinkedInLocationFromHtml(html); return { expired, ...(location ? { location } : {}) }; }