Some checks failed
CI / Linting (Biome) (push) Failing after 36s
CI / Tests (push) Successful in 5m54s
CI / Type Check (adzuna-extractor) (push) Successful in 1m6s
CI / Type Check (gradcracker-extractor) (push) Successful in 1m9s
CI / Type Check (hiringcafe-extractor) (push) Successful in 1m5s
CI / Type Check (orchestrator) (push) Successful in 1m21s
CI / Type Check (startupjobs-extractor) (push) Successful in 1m4s
CI / Type Check (ukvisajobs-extractor) (push) Successful in 1m4s
CI / Documentation (push) Successful in 1m52s
Adds extractor packages: arbeitnow, ashby, careerjet, fourdayweek,
greenhouse, himalayas, jobicy, jooble, lever, reed, remoteok, remotive,
themuse, usajobs, weworkremotely, workday — each with manifest, package
metadata and README.
Pipeline / shared:
- shared/job-fingerprint: stable hash for cross-source dedup, with tests
- discover-jobs: dedup via fingerprint and richer per-source merging
- jobs repository: fingerprint-aware upsert / lookup
- settings-registry, settings types/routes, demo-defaults: knobs for the
new sources
- shared extractors index: register the new manifests
- location-support, profiles route: small fixes for the new sources
Tooling:
- scripts/smoke-extractors.ts to sanity-check each source locally
- scripts/jobber-cron-{cherepaha,dobkin}.env.example: per-host cron
templates (CHANGEME placeholders only)
- .env.example: documented env vars for the new extractors
- .gitignore: ignore extractors/*/storage/ runtime caches (was ukvisajobs only)
Co-authored-by: Cursor <cursoragent@cursor.com>
173 lines
4.8 KiB
TypeScript
173 lines
4.8 KiB
TypeScript
/**
|
|
* Arbeitnow public job board API.
|
|
*
|
|
* https://www.arbeitnow.com/api/job-board-api?page=N
|
|
*
|
|
* No auth. Returns 100 results per page, sorted by creation date.
|
|
* No server-side search — we paginate and filter client-side by
|
|
* title + tags against each pipeline search term.
|
|
*
|
|
* Aggregates listings from Greenhouse, SmartRecruiters, Join,
|
|
* TeamTailor, Recruitee, and Comeet.
|
|
*/
|
|
|
|
import type {
|
|
ExtractorManifest,
|
|
ExtractorRunResult,
|
|
} from "@shared/types/extractors";
|
|
import type { CreateJobInput } from "@shared/types/jobs";
|
|
|
|
const API_URL = "https://www.arbeitnow.com/api/job-board-api";
|
|
const MAX_PAGES = 5;
|
|
|
|
interface ArbeitnowJob {
|
|
slug?: string;
|
|
company_name?: string;
|
|
title?: string;
|
|
description?: string;
|
|
remote?: boolean;
|
|
url?: string;
|
|
tags?: string[];
|
|
job_types?: string[];
|
|
location?: string;
|
|
created_at?: number;
|
|
}
|
|
|
|
interface ArbeitnowResponse {
|
|
data?: ArbeitnowJob[];
|
|
links?: { next?: string | null };
|
|
meta?: { current_page?: number };
|
|
}
|
|
|
|
function asString(value: unknown): string | undefined {
|
|
if (typeof value !== "string") return undefined;
|
|
const trimmed = value.trim();
|
|
return trimmed || undefined;
|
|
}
|
|
|
|
function matchesTerm(job: ArbeitnowJob, term: string): boolean {
|
|
const lower = term.toLowerCase();
|
|
if (job.title?.toLowerCase().includes(lower)) return true;
|
|
if (
|
|
Array.isArray(job.tags) &&
|
|
job.tags.some(
|
|
(t) => typeof t === "string" && t.toLowerCase().includes(lower),
|
|
)
|
|
)
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
function mapJob(raw: ArbeitnowJob): CreateJobInput | null {
|
|
const jobUrl = asString(raw.url);
|
|
if (!jobUrl) return null;
|
|
|
|
const tags = Array.isArray(raw.tags)
|
|
? raw.tags.filter((t): t is string => typeof t === "string" && t.length > 0)
|
|
: [];
|
|
|
|
const jobTypes = Array.isArray(raw.job_types)
|
|
? raw.job_types
|
|
.filter((t): t is string => typeof t === "string" && t.length > 0)
|
|
.join(", ")
|
|
: undefined;
|
|
|
|
const datePosted =
|
|
typeof raw.created_at === "number"
|
|
? new Date(raw.created_at * 1000).toISOString()
|
|
: undefined;
|
|
|
|
return {
|
|
source: "arbeitnow",
|
|
sourceJobId: asString(raw.slug),
|
|
title: asString(raw.title) ?? "Unknown Title",
|
|
employer: asString(raw.company_name) ?? "Unknown Employer",
|
|
jobUrl,
|
|
applicationLink: jobUrl,
|
|
location: asString(raw.location) ?? "Unknown",
|
|
isRemote: raw.remote === true,
|
|
jobType: jobTypes || undefined,
|
|
datePosted,
|
|
jobDescription: asString(raw.description),
|
|
disciplines: tags.length > 0 ? tags.join(", ") : undefined,
|
|
};
|
|
}
|
|
|
|
async function fetchPage(page: number): Promise<ArbeitnowResponse> {
|
|
const url = `${API_URL}?page=${page}`;
|
|
const response = await fetch(url, {
|
|
headers: { Accept: "application/json" },
|
|
});
|
|
if (!response.ok) {
|
|
throw new Error(`Arbeitnow request failed with status ${response.status}`);
|
|
}
|
|
return (await response.json()) as ArbeitnowResponse;
|
|
}
|
|
|
|
export const manifest: ExtractorManifest = {
|
|
id: "arbeitnow",
|
|
displayName: "Arbeitnow",
|
|
providesSources: ["arbeitnow"],
|
|
async run(context): Promise<ExtractorRunResult> {
|
|
if (context.shouldCancel?.()) return { success: true, jobs: [] };
|
|
|
|
const maxJobs = context.settings.arbeitnowMaxJobsPerTerm
|
|
? Number.parseInt(context.settings.arbeitnowMaxJobsPerTerm, 10)
|
|
: 100;
|
|
|
|
const terms = context.searchTerms.length > 0 ? context.searchTerms : [];
|
|
const seen = new Set<string>();
|
|
const out: CreateJobInput[] = [];
|
|
|
|
try {
|
|
for (let page = 1; page <= MAX_PAGES; page += 1) {
|
|
if (context.shouldCancel?.()) break;
|
|
if (out.length >= maxJobs * Math.max(terms.length, 1)) break;
|
|
|
|
context.onProgress?.({
|
|
phase: "list",
|
|
termsProcessed: 0,
|
|
termsTotal: 1,
|
|
currentUrl: `page ${page}`,
|
|
detail: `Arbeitnow: fetching page ${page}`,
|
|
});
|
|
|
|
const body = await fetchPage(page);
|
|
const jobs = Array.isArray(body.data) ? body.data : [];
|
|
|
|
if (jobs.length === 0) break;
|
|
|
|
for (const raw of jobs) {
|
|
if (terms.length > 0 && !terms.some((t) => matchesTerm(raw, t))) {
|
|
continue;
|
|
}
|
|
const mapped = mapJob(raw);
|
|
if (!mapped) continue;
|
|
const key = mapped.sourceJobId || mapped.jobUrl;
|
|
if (seen.has(key)) continue;
|
|
seen.add(key);
|
|
out.push(mapped);
|
|
}
|
|
|
|
context.onProgress?.({
|
|
phase: "list",
|
|
termsProcessed: 0,
|
|
termsTotal: 1,
|
|
currentUrl: `page ${page}`,
|
|
jobPagesProcessed: out.length,
|
|
detail: `Arbeitnow: page ${page} done (${out.length} matched so far)`,
|
|
});
|
|
|
|
if (!body.links?.next) break;
|
|
}
|
|
} catch (error) {
|
|
const message = error instanceof Error ? error.message : "Unknown error";
|
|
return { success: false, jobs: out, error: message };
|
|
}
|
|
|
|
return { success: true, jobs: out };
|
|
},
|
|
};
|
|
|
|
export default manifest;
|