Some checks failed
CI / Linting (Biome) (push) Failing after 36s
CI / Tests (push) Successful in 5m54s
CI / Type Check (adzuna-extractor) (push) Successful in 1m6s
CI / Type Check (gradcracker-extractor) (push) Successful in 1m9s
CI / Type Check (hiringcafe-extractor) (push) Successful in 1m5s
CI / Type Check (orchestrator) (push) Successful in 1m21s
CI / Type Check (startupjobs-extractor) (push) Successful in 1m4s
CI / Type Check (ukvisajobs-extractor) (push) Successful in 1m4s
CI / Documentation (push) Successful in 1m52s
Adds extractor packages: arbeitnow, ashby, careerjet, fourdayweek,
greenhouse, himalayas, jobicy, jooble, lever, reed, remoteok, remotive,
themuse, usajobs, weworkremotely, workday — each with manifest, package
metadata and README.
Pipeline / shared:
- shared/job-fingerprint: stable hash for cross-source dedup, with tests
- discover-jobs: dedup via fingerprint and richer per-source merging
- jobs repository: fingerprint-aware upsert / lookup
- settings-registry, settings types/routes, demo-defaults: knobs for the
new sources
- shared extractors index: register the new manifests
- location-support, profiles route: small fixes for the new sources
Tooling:
- scripts/smoke-extractors.ts to sanity-check each source locally
- scripts/jobber-cron-{cherepaha,dobkin}.env.example: per-host cron
templates (CHANGEME placeholders only)
- .env.example: documented env vars for the new extractors
- .gitignore: ignore extractors/*/storage/ runtime caches (was ukvisajobs only)
Co-authored-by: Cursor <cursoragent@cursor.com>
268 lines
7.6 KiB
TypeScript
268 lines
7.6 KiB
TypeScript
/**
|
|
* Careerjet publisher search API (v4).
|
|
*
|
|
* https://www.careerjet.com/partners/api/
|
|
* GET https://search.api.careerjet.net/v4/query
|
|
*
|
|
* Uses Basic auth (username = publisher API key, password empty). Requires a
|
|
* Referer header and `user_ip` / `user_agent` query params. Register your
|
|
* server's outbound IP(s) in the Careerjet publisher dashboard.
|
|
*
|
|
* Env: CAREERJET_AFFID (API key), CAREERJET_REFERER (job-search page URL),
|
|
* CAREERJET_USER_IP (must match an allowlisted IP), optional CAREERJET_USER_AGENT.
|
|
*/
|
|
|
|
import type {
|
|
ExtractorManifest,
|
|
ExtractorRunResult,
|
|
} from "@shared/types/extractors";
|
|
import type { CreateJobInput } from "@shared/types/jobs";
|
|
|
|
const API_URL = "https://search.api.careerjet.net/v4/query";
|
|
|
|
const DEFAULT_USER_AGENT =
|
|
"Mozilla/5.0 (compatible; JobOps/1.0; job-search pipeline)";
|
|
|
|
interface CareerjetJob {
|
|
title?: string;
|
|
description?: string;
|
|
company?: string;
|
|
salary?: string;
|
|
date?: string;
|
|
url?: string;
|
|
site?: string;
|
|
locations?: string;
|
|
}
|
|
interface CareerjetResponse {
|
|
type?: string;
|
|
jobs?: CareerjetJob[];
|
|
hits?: number;
|
|
pages?: number;
|
|
}
|
|
|
|
function asString(value: unknown): string | undefined {
|
|
if (typeof value !== "string") return undefined;
|
|
const trimmed = value.trim();
|
|
return trimmed ? trimmed : undefined;
|
|
}
|
|
|
|
function mapJob(raw: CareerjetJob): CreateJobInput | null {
|
|
const jobUrl = asString(raw.url);
|
|
if (!jobUrl) return null;
|
|
return {
|
|
source: "careerjet",
|
|
title: asString(raw.title) ?? "Unknown Title",
|
|
employer: asString(raw.company) ?? "Unknown Employer",
|
|
jobUrl,
|
|
applicationLink: jobUrl,
|
|
location: asString(raw.locations),
|
|
salary: asString(raw.salary),
|
|
datePosted: asString(raw.date),
|
|
jobDescription: asString(raw.description),
|
|
companyDescription: asString(raw.site),
|
|
};
|
|
}
|
|
|
|
function localeForCountry(country: string): string {
|
|
const key = country.trim().toLowerCase();
|
|
switch (key) {
|
|
case "united kingdom":
|
|
case "uk":
|
|
return "en_GB";
|
|
case "united states":
|
|
case "usa":
|
|
case "us":
|
|
return "en_US";
|
|
case "canada":
|
|
return "en_CA";
|
|
case "australia":
|
|
return "en_AU";
|
|
case "germany":
|
|
return "de_DE";
|
|
case "france":
|
|
return "fr_FR";
|
|
case "spain":
|
|
return "es_ES";
|
|
case "italy":
|
|
return "it_IT";
|
|
case "netherlands":
|
|
return "nl_NL";
|
|
default:
|
|
return "en_GB";
|
|
}
|
|
}
|
|
|
|
function basicAuthorizationHeader(apiKey: string): string {
|
|
const credentials = `${apiKey}:`;
|
|
const encoded = Buffer.from(credentials, "utf8").toString("base64");
|
|
return `Basic ${encoded}`;
|
|
}
|
|
|
|
async function fetchPage(args: {
|
|
apiKey: string;
|
|
keywords: string;
|
|
location?: string;
|
|
page: number;
|
|
pageSize: number;
|
|
localeCode: string;
|
|
referer: string;
|
|
userIp: string;
|
|
userAgent: string;
|
|
}): Promise<CareerjetResponse> {
|
|
const url = new URL(API_URL);
|
|
url.searchParams.set("locale_code", args.localeCode);
|
|
url.searchParams.set("keywords", args.keywords);
|
|
if (args.location) url.searchParams.set("location", args.location);
|
|
url.searchParams.set("page", String(args.page));
|
|
url.searchParams.set("page_size", String(args.pageSize));
|
|
url.searchParams.set("user_ip", args.userIp);
|
|
url.searchParams.set("user_agent", args.userAgent);
|
|
|
|
const response = await fetch(url.toString(), {
|
|
headers: {
|
|
Accept: "application/json",
|
|
Authorization: basicAuthorizationHeader(args.apiKey),
|
|
Referer: args.referer,
|
|
},
|
|
});
|
|
if (!response.ok) {
|
|
const snippet = (await response.text()).slice(0, 200);
|
|
throw new Error(
|
|
`Careerjet request failed with status ${response.status}${snippet ? `: ${snippet}` : ""}`,
|
|
);
|
|
}
|
|
return (await response.json()) as CareerjetResponse;
|
|
}
|
|
|
|
export const manifest: ExtractorManifest = {
|
|
id: "careerjet",
|
|
displayName: "Careerjet",
|
|
providesSources: ["careerjet"],
|
|
requiredEnvVars: [
|
|
"CAREERJET_AFFID",
|
|
"CAREERJET_REFERER",
|
|
"CAREERJET_USER_IP",
|
|
],
|
|
async run(context): Promise<ExtractorRunResult> {
|
|
if (context.shouldCancel?.()) return { success: true, jobs: [] };
|
|
|
|
const apiKey =
|
|
context.settings.careerjetAffid?.trim() ||
|
|
process.env.CAREERJET_AFFID?.trim();
|
|
const referer =
|
|
context.settings.careerjetReferer?.trim() ||
|
|
process.env.CAREERJET_REFERER?.trim();
|
|
const userIp =
|
|
context.settings.careerjetUserIp?.trim() ||
|
|
process.env.CAREERJET_USER_IP?.trim();
|
|
const userAgent =
|
|
context.settings.careerjetUserAgent?.trim() ||
|
|
process.env.CAREERJET_USER_AGENT?.trim() ||
|
|
DEFAULT_USER_AGENT;
|
|
|
|
if (!apiKey) {
|
|
return {
|
|
success: false,
|
|
jobs: [],
|
|
error:
|
|
"Careerjet requires CAREERJET_AFFID (publisher API key for Basic auth).",
|
|
};
|
|
}
|
|
if (!referer) {
|
|
return {
|
|
success: false,
|
|
jobs: [],
|
|
error:
|
|
"Careerjet v4 requires CAREERJET_REFERER (the Referer URL of your job-search page, per Careerjet docs).",
|
|
};
|
|
}
|
|
if (!userIp) {
|
|
return {
|
|
success: false,
|
|
jobs: [],
|
|
error:
|
|
"Careerjet v4 requires CAREERJET_USER_IP. Use an IP you have allowlisted in the Careerjet publisher dashboard (typically your server's public egress IP).",
|
|
};
|
|
}
|
|
|
|
const maxJobsPerTerm = context.settings.careerjetMaxJobsPerTerm
|
|
? Number.parseInt(context.settings.careerjetMaxJobsPerTerm, 10)
|
|
: 100;
|
|
const pageSize = 50;
|
|
const localeCode = localeForCountry(context.selectedCountry || "");
|
|
|
|
const terms = context.searchTerms.length > 0 ? context.searchTerms : [""];
|
|
const location =
|
|
context.settings.searchCities?.split("|")[0]?.trim() || undefined;
|
|
|
|
const seen = new Set<string>();
|
|
const out: CreateJobInput[] = [];
|
|
|
|
try {
|
|
for (let i = 0; i < terms.length; i += 1) {
|
|
if (context.shouldCancel?.()) break;
|
|
const term = terms[i].trim();
|
|
context.onProgress?.({
|
|
phase: "list",
|
|
termsProcessed: i,
|
|
termsTotal: terms.length,
|
|
currentUrl: term || "(all)",
|
|
detail: `Careerjet: term ${i + 1}/${terms.length}`,
|
|
});
|
|
|
|
let collected = 0;
|
|
let page = 1;
|
|
let totalPages = Number.POSITIVE_INFINITY;
|
|
while (
|
|
collected < maxJobsPerTerm &&
|
|
page <= totalPages &&
|
|
page <= 10
|
|
) {
|
|
if (context.shouldCancel?.()) break;
|
|
const body = await fetchPage({
|
|
apiKey,
|
|
keywords: term,
|
|
location,
|
|
page,
|
|
pageSize,
|
|
localeCode,
|
|
referer,
|
|
userIp,
|
|
userAgent,
|
|
});
|
|
if (typeof body.pages === "number") totalPages = body.pages;
|
|
const items = Array.isArray(body.jobs) ? body.jobs : [];
|
|
if (items.length === 0) break;
|
|
for (const raw of items) {
|
|
const mapped = mapJob(raw);
|
|
if (!mapped) continue;
|
|
const key = mapped.jobUrl;
|
|
if (seen.has(key)) continue;
|
|
seen.add(key);
|
|
out.push(mapped);
|
|
collected += 1;
|
|
if (collected >= maxJobsPerTerm) break;
|
|
}
|
|
page += 1;
|
|
}
|
|
|
|
context.onProgress?.({
|
|
phase: "list",
|
|
termsProcessed: i + 1,
|
|
termsTotal: terms.length,
|
|
currentUrl: term || "(all)",
|
|
jobPagesProcessed: out.length,
|
|
detail: `Careerjet: completed term ${i + 1}/${terms.length} (${collected} found)`,
|
|
});
|
|
}
|
|
} catch (error) {
|
|
const message = error instanceof Error ? error.message : "Unknown error";
|
|
return { success: false, jobs: out, error: message };
|
|
}
|
|
|
|
return { success: true, jobs: out };
|
|
},
|
|
};
|
|
|
|
export default manifest;
|