Some checks failed
CI / Linting (Biome) (push) Failing after 36s
CI / Tests (push) Successful in 5m54s
CI / Type Check (adzuna-extractor) (push) Successful in 1m6s
CI / Type Check (gradcracker-extractor) (push) Successful in 1m9s
CI / Type Check (hiringcafe-extractor) (push) Successful in 1m5s
CI / Type Check (orchestrator) (push) Successful in 1m21s
CI / Type Check (startupjobs-extractor) (push) Successful in 1m4s
CI / Type Check (ukvisajobs-extractor) (push) Successful in 1m4s
CI / Documentation (push) Successful in 1m52s
Adds extractor packages: arbeitnow, ashby, careerjet, fourdayweek,
greenhouse, himalayas, jobicy, jooble, lever, reed, remoteok, remotive,
themuse, usajobs, weworkremotely, workday — each with manifest, package
metadata and README.
Pipeline / shared:
- shared/job-fingerprint: stable hash for cross-source dedup, with tests
- discover-jobs: dedup via fingerprint and richer per-source merging
- jobs repository: fingerprint-aware upsert / lookup
- settings-registry, settings types/routes, demo-defaults: knobs for the
new sources
- shared extractors index: register the new manifests
- location-support, profiles route: small fixes for the new sources
Tooling:
- scripts/smoke-extractors.ts to sanity-check each source locally
- scripts/jobber-cron-{cherepaha,dobkin}.env.example: per-host cron
templates (CHANGEME placeholders only)
- .env.example: documented env vars for the new extractors
- .gitignore: ignore extractors/*/storage/ runtime caches (was ukvisajobs only)
Co-authored-by: Cursor <cursoragent@cursor.com>
264 lines
8.0 KiB
TypeScript
264 lines
8.0 KiB
TypeScript
/**
|
|
* USAJOBS public search API.
|
|
*
|
|
* https://developer.usajobs.gov/api-reference/get-api-search
|
|
*
|
|
* Requires:
|
|
* - USAJOBS_API_KEY (`usajobsApiKey` setting)
|
|
* - USAJOBS_USER_AGENT — must be a real contact email per their TOS
|
|
*
|
|
* The orchestrator already gates this source to United States via
|
|
* `isSourceAllowedForCountry`, so we don't re-validate country here.
|
|
*/
|
|
|
|
import type {
|
|
ExtractorManifest,
|
|
ExtractorRunResult,
|
|
} from "@shared/types/extractors";
|
|
import type { CreateJobInput } from "@shared/types/jobs";
|
|
|
|
const API_URL = "https://data.usajobs.gov/api/Search";
|
|
|
|
interface UsaJobsLocation {
|
|
LocationName?: string;
|
|
CountryCode?: string;
|
|
}
|
|
interface UsaJobsRemuneration {
|
|
MinimumRange?: string;
|
|
MaximumRange?: string;
|
|
RateIntervalCode?: string;
|
|
}
|
|
interface UsaJobsDescriptor {
|
|
PositionID?: string;
|
|
PositionTitle?: string;
|
|
PositionURI?: string;
|
|
ApplyURI?: string[];
|
|
PositionLocationDisplay?: string;
|
|
PositionLocation?: UsaJobsLocation[];
|
|
OrganizationName?: string;
|
|
DepartmentName?: string;
|
|
PublicationStartDate?: string;
|
|
PositionStartDate?: string;
|
|
PositionEndDate?: string;
|
|
PositionRemuneration?: UsaJobsRemuneration[];
|
|
UserArea?: { Details?: { JobSummary?: string } };
|
|
PositionSchedule?: Array<{ Name?: string }>;
|
|
}
|
|
interface UsaJobsSearchResultItem {
|
|
MatchedObjectDescriptor?: UsaJobsDescriptor;
|
|
}
|
|
interface UsaJobsSearchResult {
|
|
SearchResult?: {
|
|
SearchResultCountAll?: number;
|
|
SearchResultItems?: UsaJobsSearchResultItem[];
|
|
};
|
|
}
|
|
|
|
function asString(value: unknown): string | undefined {
|
|
if (typeof value !== "string") return undefined;
|
|
const trimmed = value.trim();
|
|
return trimmed ? trimmed : undefined;
|
|
}
|
|
|
|
function toNumberOrUndefined(value: unknown): number | undefined {
|
|
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
if (typeof value === "string") {
|
|
const parsed = Number.parseFloat(value);
|
|
return Number.isFinite(parsed) ? parsed : undefined;
|
|
}
|
|
return undefined;
|
|
}
|
|
|
|
function mapInterval(code: string | undefined): string | undefined {
|
|
if (!code) return undefined;
|
|
switch (code.toLowerCase()) {
|
|
case "py":
|
|
case "pa":
|
|
return "yearly";
|
|
case "ph":
|
|
return "hourly";
|
|
case "pd":
|
|
return "daily";
|
|
case "pm":
|
|
return "monthly";
|
|
case "pw":
|
|
return "weekly";
|
|
default:
|
|
return undefined;
|
|
}
|
|
}
|
|
|
|
function mapJob(item: UsaJobsSearchResultItem): CreateJobInput | null {
|
|
const descriptor = item.MatchedObjectDescriptor;
|
|
if (!descriptor) return null;
|
|
const jobUrl = asString(descriptor.PositionURI);
|
|
if (!jobUrl) return null;
|
|
|
|
const remuneration = descriptor.PositionRemuneration?.[0];
|
|
const min = toNumberOrUndefined(remuneration?.MinimumRange);
|
|
const max = toNumberOrUndefined(remuneration?.MaximumRange);
|
|
const interval = mapInterval(remuneration?.RateIntervalCode);
|
|
const applyArr = descriptor.ApplyURI;
|
|
const applicationLink =
|
|
Array.isArray(applyArr) && applyArr.length > 0
|
|
? (asString(applyArr[0]) ?? jobUrl)
|
|
: jobUrl;
|
|
|
|
return {
|
|
source: "usajobs",
|
|
sourceJobId: asString(descriptor.PositionID),
|
|
title: asString(descriptor.PositionTitle) ?? "Unknown Title",
|
|
employer:
|
|
asString(descriptor.OrganizationName) ??
|
|
asString(descriptor.DepartmentName) ??
|
|
"U.S. Federal Government",
|
|
jobUrl,
|
|
applicationLink,
|
|
location: asString(descriptor.PositionLocationDisplay),
|
|
datePosted: asString(descriptor.PublicationStartDate),
|
|
deadline: asString(descriptor.PositionEndDate),
|
|
jobDescription: asString(descriptor.UserArea?.Details?.JobSummary),
|
|
jobType: descriptor.PositionSchedule?.[0]?.Name?.trim() || undefined,
|
|
salaryMinAmount: min,
|
|
salaryMaxAmount: max,
|
|
salaryCurrency: min || max ? "USD" : undefined,
|
|
salaryInterval: interval,
|
|
};
|
|
}
|
|
|
|
async function fetchPage(args: {
|
|
apiKey: string;
|
|
userAgent: string;
|
|
keyword: string;
|
|
locationName?: string;
|
|
page: number;
|
|
resultsPerPage: number;
|
|
}): Promise<UsaJobsSearchResult> {
|
|
const url = new URL(API_URL);
|
|
url.searchParams.set("Keyword", args.keyword);
|
|
if (args.locationName) {
|
|
url.searchParams.set("LocationName", args.locationName);
|
|
}
|
|
url.searchParams.set("ResultsPerPage", String(args.resultsPerPage));
|
|
url.searchParams.set("Page", String(args.page));
|
|
url.searchParams.set("SortField", "OpenDate");
|
|
url.searchParams.set("SortDirection", "Desc");
|
|
|
|
const response = await fetch(url.toString(), {
|
|
headers: {
|
|
Host: "data.usajobs.gov",
|
|
"User-Agent": args.userAgent,
|
|
"Authorization-Key": args.apiKey,
|
|
Accept: "application/json",
|
|
},
|
|
});
|
|
if (!response.ok) {
|
|
throw new Error(`USAJOBS request failed with status ${response.status}`);
|
|
}
|
|
return (await response.json()) as UsaJobsSearchResult;
|
|
}
|
|
|
|
export const manifest: ExtractorManifest = {
|
|
id: "usajobs",
|
|
displayName: "USAJOBS",
|
|
providesSources: ["usajobs"],
|
|
requiredEnvVars: ["USAJOBS_API_KEY", "USAJOBS_USER_AGENT"],
|
|
async run(context): Promise<ExtractorRunResult> {
|
|
if (context.shouldCancel?.()) return { success: true, jobs: [] };
|
|
|
|
const apiKey =
|
|
context.settings.usajobsApiKey?.trim() ||
|
|
process.env.USAJOBS_API_KEY?.trim();
|
|
const userAgent =
|
|
context.settings.usajobsUserAgent?.trim() ||
|
|
process.env.USAJOBS_USER_AGENT?.trim();
|
|
if (!apiKey || !userAgent) {
|
|
return {
|
|
success: false,
|
|
jobs: [],
|
|
error:
|
|
"USAJOBS extractor requires USAJOBS_API_KEY and USAJOBS_USER_AGENT (a contact email)",
|
|
};
|
|
}
|
|
|
|
const maxJobsPerTerm = context.settings.usajobsMaxJobsPerTerm
|
|
? Number.parseInt(context.settings.usajobsMaxJobsPerTerm, 10)
|
|
: 100;
|
|
// USAJOBS caps page size at 500, but smaller pages are friendlier on retry.
|
|
const resultsPerPage = 50;
|
|
|
|
const terms = context.searchTerms.length > 0 ? context.searchTerms : [""];
|
|
const locationName =
|
|
context.settings.searchCities?.split("|")[0]?.trim() || undefined;
|
|
|
|
const seen = new Set<string>();
|
|
const out: CreateJobInput[] = [];
|
|
|
|
try {
|
|
for (let i = 0; i < terms.length; i += 1) {
|
|
if (context.shouldCancel?.()) break;
|
|
const term = terms[i].trim();
|
|
context.onProgress?.({
|
|
phase: "list",
|
|
termsProcessed: i,
|
|
termsTotal: terms.length,
|
|
currentUrl: term || "(all)",
|
|
detail: `USAJOBS: term ${i + 1}/${terms.length}`,
|
|
});
|
|
|
|
let collected = 0;
|
|
let page = 1;
|
|
let total = Number.POSITIVE_INFINITY;
|
|
while (
|
|
collected < maxJobsPerTerm &&
|
|
(page - 1) * resultsPerPage < total &&
|
|
page < 200
|
|
) {
|
|
if (context.shouldCancel?.()) break;
|
|
const body = await fetchPage({
|
|
apiKey,
|
|
userAgent,
|
|
keyword: term,
|
|
locationName,
|
|
page,
|
|
resultsPerPage,
|
|
});
|
|
if (typeof body.SearchResult?.SearchResultCountAll === "number") {
|
|
total = body.SearchResult.SearchResultCountAll;
|
|
}
|
|
const items = body.SearchResult?.SearchResultItems ?? [];
|
|
if (items.length === 0) break;
|
|
for (const item of items) {
|
|
const mapped = mapJob(item);
|
|
if (!mapped) continue;
|
|
const key = mapped.sourceJobId || mapped.jobUrl;
|
|
if (seen.has(key)) continue;
|
|
seen.add(key);
|
|
out.push(mapped);
|
|
collected += 1;
|
|
if (collected >= maxJobsPerTerm) break;
|
|
}
|
|
if (items.length < resultsPerPage) break;
|
|
page += 1;
|
|
}
|
|
|
|
context.onProgress?.({
|
|
phase: "list",
|
|
termsProcessed: i + 1,
|
|
termsTotal: terms.length,
|
|
currentUrl: term || "(all)",
|
|
jobPagesProcessed: out.length,
|
|
detail: `USAJOBS: completed term ${i + 1}/${terms.length} (${collected} found)`,
|
|
});
|
|
}
|
|
} catch (error) {
|
|
const message = error instanceof Error ? error.message : "Unknown error";
|
|
return { success: false, jobs: out, error: message };
|
|
}
|
|
|
|
return { success: true, jobs: out };
|
|
},
|
|
};
|
|
|
|
export default manifest;
|