ilia 7b3dfb002a
Some checks failed
CI / Linting (Biome) (push) Failing after 36s
CI / Tests (push) Successful in 5m54s
CI / Type Check (adzuna-extractor) (push) Successful in 1m6s
CI / Type Check (gradcracker-extractor) (push) Successful in 1m9s
CI / Type Check (hiringcafe-extractor) (push) Successful in 1m5s
CI / Type Check (orchestrator) (push) Successful in 1m21s
CI / Type Check (startupjobs-extractor) (push) Successful in 1m4s
CI / Type Check (ukvisajobs-extractor) (push) Successful in 1m4s
CI / Documentation (push) Successful in 1m52s
feat(extractors): add 17 job source extractors and cross-source dedup
Adds extractor packages: arbeitnow, ashby, careerjet, fourdayweek,
greenhouse, himalayas, jobicy, jooble, lever, reed, remoteok, remotive,
themuse, usajobs, weworkremotely, workday — each with manifest, package
metadata and README.

Pipeline / shared:
- shared/job-fingerprint: stable hash for cross-source dedup, with tests
- discover-jobs: dedup via fingerprint and richer per-source merging
- jobs repository: fingerprint-aware upsert / lookup
- settings-registry, settings types/routes, demo-defaults: knobs for the
  new sources
- shared extractors index: register the new manifests
- location-support, profiles route: small fixes for the new sources

Tooling:
- scripts/smoke-extractors.ts to sanity-check each source locally
- scripts/jobber-cron-{cherepaha,dobkin}.env.example: per-host cron
  templates (CHANGEME placeholders only)
- .env.example: documented env vars for the new extractors
- .gitignore: ignore extractors/*/storage/ runtime caches (was ukvisajobs only)

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-12 20:17:52 -04:00

227 lines
6.4 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* 4 Day Week public jobs API.
*
* https://4dayweek.io/api/jobs?page=N
*
* No auth. Paginated JSON. No description in listing response —
* we link to https://4dayweek.io/job/{slug} for details.
* Supports category filtering server-side; we also filter
* client-side by title + stack tags against pipeline search terms.
*/
import type {
ExtractorManifest,
ExtractorRunResult,
} from "@shared/types/extractors";
import type { CreateJobInput } from "@shared/types/jobs";
const API_URL = "https://4dayweek.io/api/jobs";
const MAX_PAGES = 3;
interface FdwCompany {
name?: string;
slug?: string;
logo_url?: string;
}
interface FdwRemoteAllowed {
country?: string;
continent?: string;
is_primary?: boolean;
}
interface FdwStackItem {
name?: string;
slug?: string;
}
interface FdwJob {
id?: string;
title?: string;
slug?: string;
company_name?: string;
company?: FdwCompany;
work_arrangement?: string;
remote_allowed?: FdwRemoteAllowed[];
timezones?: string[];
posted?: number;
schedule_type?: string;
stack?: FdwStackItem[];
category?: string;
level?: string;
salary?: string;
salary_lower?: number;
salary_upper?: number;
salary_currency?: string;
salary_period?: string;
is_expired?: boolean;
work_life_score?: number;
}
interface FdwResponse {
jobs?: FdwJob[];
total?: number;
page?: number;
has_more?: boolean;
}
function asString(value: unknown): string | undefined {
if (typeof value !== "string") return undefined;
const trimmed = value.trim();
return trimmed || undefined;
}
function matchesTerm(job: FdwJob, term: string): boolean {
const lower = term.toLowerCase();
if (job.title?.toLowerCase().includes(lower)) return true;
if (job.category?.toLowerCase().includes(lower)) return true;
if (
Array.isArray(job.stack) &&
job.stack.some(
(s) => typeof s.name === "string" && s.name.toLowerCase().includes(lower),
)
)
return true;
return false;
}
function formatSchedule(raw: string | undefined): string {
if (!raw) return "4-day week";
return raw.replace(/_/g, " ");
}
function formatLocation(job: FdwJob): string {
const countries = Array.isArray(job.remote_allowed)
? job.remote_allowed
.map((r) => r.country)
.filter((c): c is string => typeof c === "string")
: [];
if (countries.length > 0) return countries.join(", ");
return job.work_arrangement === "remote" ? "Remote" : "Unknown";
}
function formatSalary(job: FdwJob): string | undefined {
if (job.salary) return job.salary;
if (job.salary_lower == null && job.salary_upper == null) return undefined;
const cur = job.salary_currency ?? "USD";
const period = job.salary_period ?? "year";
if (job.salary_lower != null && job.salary_upper != null) {
return `${cur} ${(job.salary_lower / 100).toLocaleString()}${(job.salary_upper / 100).toLocaleString()} / ${period}`;
}
const val = job.salary_lower ?? job.salary_upper;
return val != null
? `${cur} ${(val / 100).toLocaleString()} / ${period}`
: undefined;
}
function mapJob(raw: FdwJob): CreateJobInput | null {
const slug = asString(raw.slug);
if (!slug) return null;
const jobUrl = `https://4dayweek.io/job/${slug}`;
const stackTags = Array.isArray(raw.stack)
? raw.stack
.map((s) => s.name)
.filter((n): n is string => typeof n === "string")
: [];
return {
source: "fourdayweek",
sourceJobId: raw.id ?? slug,
title: asString(raw.title) ?? "Unknown Title",
employer: raw.company?.name ?? raw.company_name ?? "Unknown Employer",
jobUrl,
applicationLink: jobUrl,
location: formatLocation(raw),
isRemote: raw.work_arrangement === "remote",
jobType: formatSchedule(raw.schedule_type),
companyLogo: raw.company?.logo_url ?? undefined,
datePosted:
typeof raw.posted === "number"
? new Date(raw.posted * 1000).toISOString()
: undefined,
salary: formatSalary(raw),
disciplines: stackTags.length > 0 ? stackTags.join(", ") : undefined,
companyIndustry: asString(raw.category),
};
}
async function fetchPage(page: number): Promise<FdwResponse> {
const url = `${API_URL}?page=${page}`;
const response = await fetch(url, {
headers: { Accept: "application/json" },
});
if (!response.ok) {
throw new Error(`4 Day Week request failed with status ${response.status}`);
}
return (await response.json()) as FdwResponse;
}
export const manifest: ExtractorManifest = {
id: "fourdayweek",
displayName: "4 Day Week",
providesSources: ["fourdayweek"],
async run(context): Promise<ExtractorRunResult> {
if (context.shouldCancel?.()) return { success: true, jobs: [] };
const maxJobs = context.settings.fourdayweekMaxJobsPerTerm
? Number.parseInt(context.settings.fourdayweekMaxJobsPerTerm, 10)
: 100;
const terms = context.searchTerms.length > 0 ? context.searchTerms : [];
const seen = new Set<string>();
const out: CreateJobInput[] = [];
try {
for (let page = 1; page <= MAX_PAGES; page += 1) {
if (context.shouldCancel?.()) break;
if (out.length >= maxJobs * Math.max(terms.length, 1)) break;
context.onProgress?.({
phase: "list",
termsProcessed: 0,
termsTotal: 1,
currentUrl: `page ${page}`,
detail: `4 Day Week: fetching page ${page}`,
});
const body = await fetchPage(page);
const jobs = Array.isArray(body.jobs) ? body.jobs : [];
if (jobs.length === 0) break;
for (const raw of jobs) {
if (raw.is_expired) continue;
if (terms.length > 0 && !terms.some((t) => matchesTerm(raw, t))) {
continue;
}
const mapped = mapJob(raw);
if (!mapped) continue;
const key = mapped.sourceJobId || mapped.jobUrl;
if (seen.has(key)) continue;
seen.add(key);
out.push(mapped);
}
context.onProgress?.({
phase: "list",
termsProcessed: 0,
termsTotal: 1,
currentUrl: `page ${page}`,
jobPagesProcessed: out.length,
detail: `4 Day Week: page ${page} done (${out.length} matched so far)`,
});
if (!body.has_more) break;
}
} catch (error) {
const message = error instanceof Error ? error.message : "Unknown error";
return { success: false, jobs: out, error: message };
}
return { success: true, jobs: out };
},
};
export default manifest;