Some checks failed
CI / Linting (Biome) (push) Failing after 36s
CI / Tests (push) Successful in 5m54s
CI / Type Check (adzuna-extractor) (push) Successful in 1m6s
CI / Type Check (gradcracker-extractor) (push) Successful in 1m9s
CI / Type Check (hiringcafe-extractor) (push) Successful in 1m5s
CI / Type Check (orchestrator) (push) Successful in 1m21s
CI / Type Check (startupjobs-extractor) (push) Successful in 1m4s
CI / Type Check (ukvisajobs-extractor) (push) Successful in 1m4s
CI / Documentation (push) Successful in 1m52s
Adds extractor packages: arbeitnow, ashby, careerjet, fourdayweek,
greenhouse, himalayas, jobicy, jooble, lever, reed, remoteok, remotive,
themuse, usajobs, weworkremotely, workday — each with manifest, package
metadata and README.
Pipeline / shared:
- shared/job-fingerprint: stable hash for cross-source dedup, with tests
- discover-jobs: dedup via fingerprint and richer per-source merging
- jobs repository: fingerprint-aware upsert / lookup
- settings-registry, settings types/routes, demo-defaults: knobs for the
new sources
- shared extractors index: register the new manifests
- location-support, profiles route: small fixes for the new sources
Tooling:
- scripts/smoke-extractors.ts to sanity-check each source locally
- scripts/jobber-cron-{cherepaha,dobkin}.env.example: per-host cron
templates (CHANGEME placeholders only)
- .env.example: documented env vars for the new extractors
- .gitignore: ignore extractors/*/storage/ runtime caches (was ukvisajobs only)
Co-authored-by: Cursor <cursoragent@cursor.com>
264 lines
7.7 KiB
TypeScript
264 lines
7.7 KiB
TypeScript
/**
|
|
* Workday public career-site extractor.
|
|
*
|
|
* Workday tenants expose their public job board over a JSON CXS endpoint:
|
|
* POST {tenantUrl}/wday/cxs/{tenant}/{site}/jobs
|
|
* { appliedFacets: {}, limit: 20, offset: 0, searchText: "..." }
|
|
*
|
|
* `workdayTenants` accepts entries shaped as JSON objects (preferred) or as
|
|
* career-page URLs we parse on a best-effort basis. When we can't recover the
|
|
* tenant + site we skip the entry and continue.
|
|
*/
|
|
|
|
import type {
|
|
ExtractorManifest,
|
|
ExtractorRunResult,
|
|
} from "@shared/types/extractors";
|
|
import type { CreateJobInput } from "@shared/types/jobs";
|
|
|
|
interface WorkdayTarget {
|
|
company: string;
|
|
tenantUrl: string;
|
|
tenant: string;
|
|
site: string;
|
|
locale?: string;
|
|
}
|
|
|
|
interface WorkdayJobPosting {
|
|
title?: string;
|
|
externalPath?: string;
|
|
locationsText?: string;
|
|
postedOn?: string;
|
|
bulletFields?: string[];
|
|
}
|
|
interface WorkdayResponse {
|
|
total?: number;
|
|
jobPostings?: WorkdayJobPosting[];
|
|
}
|
|
|
|
function asString(value: unknown): string | undefined {
|
|
if (typeof value !== "string") return undefined;
|
|
const trimmed = value.trim();
|
|
return trimmed ? trimmed : undefined;
|
|
}
|
|
|
|
function inferTenantFromHost(host: string): string | null {
|
|
// host looks like `acme.wd5.myworkdayjobs.com` → tenant "acme"
|
|
const match = host.match(/^([^.]+)\.wd\d+\.myworkdayjobs\.com$/i);
|
|
return match ? match[1] : null;
|
|
}
|
|
|
|
function parseTargetEntry(entry: string): WorkdayTarget | null {
|
|
const trimmed = entry.trim();
|
|
if (!trimmed) return null;
|
|
// First, try JSON.
|
|
try {
|
|
const parsed = JSON.parse(trimmed) as Partial<WorkdayTarget>;
|
|
if (
|
|
parsed &&
|
|
typeof parsed.company === "string" &&
|
|
typeof parsed.tenantUrl === "string" &&
|
|
typeof parsed.tenant === "string" &&
|
|
typeof parsed.site === "string"
|
|
) {
|
|
return {
|
|
company: parsed.company,
|
|
tenantUrl: parsed.tenantUrl.replace(/\/$/, ""),
|
|
tenant: parsed.tenant,
|
|
site: parsed.site,
|
|
locale: typeof parsed.locale === "string" ? parsed.locale : undefined,
|
|
};
|
|
}
|
|
} catch {
|
|
// Fall through to URL parsing.
|
|
}
|
|
|
|
// URL form, e.g.
|
|
// https://nvidia.wd5.myworkdayjobs.com/en-US/NVIDIAExternalCareerSite
|
|
try {
|
|
const url = new URL(trimmed);
|
|
const tenant = inferTenantFromHost(url.host);
|
|
if (!tenant) return null;
|
|
const segments = url.pathname.split("/").filter(Boolean);
|
|
if (segments.length < 2) return null;
|
|
const [maybeLocale, site] = segments;
|
|
return {
|
|
company: tenant,
|
|
tenantUrl: `${url.protocol}//${url.host}`,
|
|
tenant,
|
|
site,
|
|
locale: maybeLocale,
|
|
};
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
function readTargets(raw: string | undefined): WorkdayTarget[] {
|
|
if (!raw) return [];
|
|
const out: WorkdayTarget[] = [];
|
|
// settings store stringifies JSON arrays; if we got a JSON array of strings
|
|
// we still need to parse each entry individually.
|
|
let entries: string[] = [];
|
|
try {
|
|
const parsed = JSON.parse(raw);
|
|
if (Array.isArray(parsed)) {
|
|
entries = parsed
|
|
.map((entry) =>
|
|
typeof entry === "string" ? entry : JSON.stringify(entry),
|
|
)
|
|
.filter(Boolean);
|
|
}
|
|
} catch {
|
|
entries = raw
|
|
.split(/\n+/)
|
|
.map((line) => line.trim())
|
|
.filter(Boolean);
|
|
}
|
|
if (entries.length === 0) {
|
|
entries = raw
|
|
.split(/\n+/)
|
|
.map((line) => line.trim())
|
|
.filter(Boolean);
|
|
}
|
|
for (const entry of entries) {
|
|
const target = parseTargetEntry(entry);
|
|
if (target) out.push(target);
|
|
}
|
|
return out;
|
|
}
|
|
|
|
function mapPosting(
|
|
posting: WorkdayJobPosting,
|
|
target: WorkdayTarget,
|
|
): CreateJobInput | null {
|
|
const externalPath = asString(posting.externalPath);
|
|
if (!externalPath) return null;
|
|
const locale = target.locale ?? "en-US";
|
|
const jobUrl = `${target.tenantUrl}/${locale}/${target.site}${externalPath}`;
|
|
return {
|
|
source: "workday",
|
|
sourceJobId: externalPath,
|
|
title: asString(posting.title) ?? "Unknown Title",
|
|
employer: target.company,
|
|
jobUrl,
|
|
applicationLink: jobUrl,
|
|
location: asString(posting.locationsText),
|
|
datePosted: asString(posting.postedOn),
|
|
jobType: posting.bulletFields?.find((field) => field?.length)?.trim(),
|
|
};
|
|
}
|
|
|
|
async function fetchPage(args: {
|
|
target: WorkdayTarget;
|
|
searchText: string;
|
|
offset: number;
|
|
limit: number;
|
|
}): Promise<WorkdayResponse> {
|
|
const url = `${args.target.tenantUrl}/wday/cxs/${encodeURIComponent(args.target.tenant)}/${encodeURIComponent(args.target.site)}/jobs`;
|
|
const response = await fetch(url, {
|
|
method: "POST",
|
|
headers: {
|
|
"Content-Type": "application/json",
|
|
Accept: "application/json",
|
|
},
|
|
body: JSON.stringify({
|
|
appliedFacets: {},
|
|
limit: args.limit,
|
|
offset: args.offset,
|
|
searchText: args.searchText,
|
|
}),
|
|
});
|
|
if (!response.ok) {
|
|
throw new Error(
|
|
`Workday request for "${args.target.company}" failed with status ${response.status}`,
|
|
);
|
|
}
|
|
return (await response.json()) as WorkdayResponse;
|
|
}
|
|
|
|
export const manifest: ExtractorManifest = {
|
|
id: "workday",
|
|
displayName: "Workday (ATS)",
|
|
providesSources: ["workday"],
|
|
async run(context): Promise<ExtractorRunResult> {
|
|
if (context.shouldCancel?.()) return { success: true, jobs: [] };
|
|
|
|
const targets = readTargets(context.settings.workdayTenants);
|
|
if (targets.length === 0) {
|
|
return {
|
|
success: true,
|
|
jobs: [],
|
|
error:
|
|
"No Workday tenants configured. Set WORKDAY_TENANTS or the workdayTenants setting to a list of career-site URLs (or JSON entries with company/tenantUrl/tenant/site).",
|
|
};
|
|
}
|
|
|
|
const terms = context.searchTerms.length > 0 ? context.searchTerms : [""];
|
|
const seen = new Set<string>();
|
|
const out: CreateJobInput[] = [];
|
|
const limit = 20;
|
|
const errors: string[] = [];
|
|
|
|
for (let t = 0; t < targets.length; t += 1) {
|
|
if (context.shouldCancel?.()) break;
|
|
const target = targets[t];
|
|
try {
|
|
for (let i = 0; i < terms.length; i += 1) {
|
|
if (context.shouldCancel?.()) break;
|
|
const term = terms[i].trim();
|
|
context.onProgress?.({
|
|
phase: "list",
|
|
termsProcessed: t * terms.length + i,
|
|
termsTotal: targets.length * terms.length,
|
|
currentUrl: `${target.company} (${term || "all"})`,
|
|
detail: `Workday: ${target.company} term ${i + 1}/${terms.length}`,
|
|
});
|
|
|
|
let offset = 0;
|
|
let total = Number.POSITIVE_INFINITY;
|
|
while (offset < total && offset < 1000) {
|
|
if (context.shouldCancel?.()) break;
|
|
const body = await fetchPage({
|
|
target,
|
|
searchText: term,
|
|
offset,
|
|
limit,
|
|
});
|
|
if (typeof body.total === "number") total = body.total;
|
|
const postings = Array.isArray(body.jobPostings)
|
|
? body.jobPostings
|
|
: [];
|
|
if (postings.length === 0) break;
|
|
for (const posting of postings) {
|
|
const mapped = mapPosting(posting, target);
|
|
if (!mapped) continue;
|
|
const key = mapped.sourceJobId || mapped.jobUrl;
|
|
if (seen.has(key)) continue;
|
|
seen.add(key);
|
|
out.push(mapped);
|
|
}
|
|
offset += postings.length;
|
|
if (postings.length < limit) break;
|
|
}
|
|
}
|
|
} catch (error) {
|
|
const message =
|
|
error instanceof Error ? error.message : "Unknown error";
|
|
errors.push(`${target.company}: ${message}`);
|
|
}
|
|
}
|
|
|
|
if (out.length === 0 && errors.length > 0) {
|
|
return { success: false, jobs: out, error: errors.join("; ") };
|
|
}
|
|
return {
|
|
success: true,
|
|
jobs: out,
|
|
error: errors.length > 0 ? errors.join("; ") : undefined,
|
|
};
|
|
},
|
|
};
|
|
|
|
export default manifest;
|