Some checks failed
CI / Linting (Biome) (push) Failing after 36s
CI / Tests (push) Successful in 5m54s
CI / Type Check (adzuna-extractor) (push) Successful in 1m6s
CI / Type Check (gradcracker-extractor) (push) Successful in 1m9s
CI / Type Check (hiringcafe-extractor) (push) Successful in 1m5s
CI / Type Check (orchestrator) (push) Successful in 1m21s
CI / Type Check (startupjobs-extractor) (push) Successful in 1m4s
CI / Type Check (ukvisajobs-extractor) (push) Successful in 1m4s
CI / Documentation (push) Successful in 1m52s
Adds extractor packages: arbeitnow, ashby, careerjet, fourdayweek,
greenhouse, himalayas, jobicy, jooble, lever, reed, remoteok, remotive,
themuse, usajobs, weworkremotely, workday — each with manifest, package
metadata and README.
Pipeline / shared:
- shared/job-fingerprint: stable hash for cross-source dedup, with tests
- discover-jobs: dedup via fingerprint and richer per-source merging
- jobs repository: fingerprint-aware upsert / lookup
- settings-registry, settings types/routes, demo-defaults: knobs for the
new sources
- shared extractors index: register the new manifests
- location-support, profiles route: small fixes for the new sources
Tooling:
- scripts/smoke-extractors.ts to sanity-check each source locally
- scripts/jobber-cron-{cherepaha,dobkin}.env.example: per-host cron
templates (CHANGEME placeholders only)
- .env.example: documented env vars for the new extractors
- .gitignore: ignore extractors/*/storage/ runtime caches (was ukvisajobs only)
Co-authored-by: Cursor <cursoragent@cursor.com>
196 lines
5.6 KiB
TypeScript
196 lines
5.6 KiB
TypeScript
/**
|
||
* Himalayas public remote-jobs API.
|
||
*
|
||
* https://himalayas.app/jobs/api?limit=N&offset=M
|
||
*
|
||
* No auth. Returns up to `limit` results per call. No server-side
|
||
* search — we paginate and filter client-side by title + categories.
|
||
*/
|
||
|
||
import type {
|
||
ExtractorManifest,
|
||
ExtractorRunResult,
|
||
} from "@shared/types/extractors";
|
||
import type { CreateJobInput } from "@shared/types/jobs";
|
||
|
||
const API_URL = "https://himalayas.app/jobs/api";
|
||
const PAGE_SIZE = 50;
|
||
const MAX_PAGES = 5;
|
||
|
||
interface HimalayasJob {
|
||
title?: string;
|
||
excerpt?: string;
|
||
companyName?: string;
|
||
companySlug?: string;
|
||
companyLogo?: string;
|
||
employmentType?: string;
|
||
minSalary?: number | null;
|
||
maxSalary?: number | null;
|
||
currency?: string;
|
||
seniority?: string[];
|
||
locationRestrictions?: string[];
|
||
timezoneRestrictions?: number[];
|
||
categories?: string[];
|
||
parentCategories?: string[];
|
||
description?: string;
|
||
pubDate?: number;
|
||
expiryDate?: number;
|
||
applicationLink?: string;
|
||
guid?: string;
|
||
}
|
||
|
||
interface HimalayasResponse {
|
||
jobs?: HimalayasJob[];
|
||
}
|
||
|
||
function asString(value: unknown): string | undefined {
|
||
if (typeof value !== "string") return undefined;
|
||
const trimmed = value.trim();
|
||
return trimmed || undefined;
|
||
}
|
||
|
||
function matchesTerm(job: HimalayasJob, term: string): boolean {
|
||
const lower = term.toLowerCase();
|
||
if (job.title?.toLowerCase().includes(lower)) return true;
|
||
if (
|
||
Array.isArray(job.categories) &&
|
||
job.categories.some(
|
||
(c) =>
|
||
typeof c === "string" &&
|
||
c.toLowerCase().replace(/-/g, " ").includes(lower),
|
||
)
|
||
)
|
||
return true;
|
||
return false;
|
||
}
|
||
|
||
function formatSalary(job: HimalayasJob): string | undefined {
|
||
if (job.minSalary == null && job.maxSalary == null) return undefined;
|
||
const cur = job.currency ?? "USD";
|
||
if (job.minSalary != null && job.maxSalary != null) {
|
||
return `${cur} ${job.minSalary.toLocaleString()}–${job.maxSalary.toLocaleString()}`;
|
||
}
|
||
const val = job.minSalary ?? job.maxSalary;
|
||
return val != null ? `${cur} ${val.toLocaleString()}` : undefined;
|
||
}
|
||
|
||
function mapJob(raw: HimalayasJob): CreateJobInput | null {
|
||
const jobUrl = asString(raw.applicationLink) ?? asString(raw.guid);
|
||
if (!jobUrl) return null;
|
||
|
||
const categories = Array.isArray(raw.categories)
|
||
? raw.categories.filter(
|
||
(c): c is string => typeof c === "string" && c.length > 0,
|
||
)
|
||
: [];
|
||
|
||
const locations = Array.isArray(raw.locationRestrictions)
|
||
? raw.locationRestrictions.filter(
|
||
(l): l is string => typeof l === "string" && l.length > 0,
|
||
)
|
||
: [];
|
||
|
||
const datePosted =
|
||
typeof raw.pubDate === "number"
|
||
? new Date(raw.pubDate * 1000).toISOString()
|
||
: undefined;
|
||
|
||
return {
|
||
source: "himalayas",
|
||
sourceJobId: asString(raw.guid),
|
||
title: asString(raw.title) ?? "Unknown Title",
|
||
employer: asString(raw.companyName) ?? "Unknown Employer",
|
||
jobUrl,
|
||
applicationLink: jobUrl,
|
||
location: locations.length > 0 ? locations.join(", ") : "Remote",
|
||
isRemote: true,
|
||
jobType: asString(raw.employmentType),
|
||
companyLogo: asString(raw.companyLogo),
|
||
datePosted,
|
||
salary: formatSalary(raw),
|
||
jobDescription: asString(raw.description),
|
||
disciplines: categories.length > 0 ? categories.join(", ") : undefined,
|
||
};
|
||
}
|
||
|
||
async function fetchPage(
|
||
offset: number,
|
||
limit: number,
|
||
): Promise<HimalayasJob[]> {
|
||
const url = `${API_URL}?limit=${limit}&offset=${offset}`;
|
||
const response = await fetch(url, {
|
||
headers: { Accept: "application/json" },
|
||
});
|
||
if (!response.ok) {
|
||
throw new Error(`Himalayas request failed with status ${response.status}`);
|
||
}
|
||
const body = (await response.json()) as HimalayasResponse;
|
||
return Array.isArray(body.jobs) ? body.jobs : [];
|
||
}
|
||
|
||
export const manifest: ExtractorManifest = {
|
||
id: "himalayas",
|
||
displayName: "Himalayas",
|
||
providesSources: ["himalayas"],
|
||
async run(context): Promise<ExtractorRunResult> {
|
||
if (context.shouldCancel?.()) return { success: true, jobs: [] };
|
||
|
||
const maxJobs = context.settings.himalayasMaxJobsPerTerm
|
||
? Number.parseInt(context.settings.himalayasMaxJobsPerTerm, 10)
|
||
: 100;
|
||
|
||
const terms = context.searchTerms.length > 0 ? context.searchTerms : [];
|
||
const seen = new Set<string>();
|
||
const out: CreateJobInput[] = [];
|
||
|
||
try {
|
||
for (let page = 0; page < MAX_PAGES; page += 1) {
|
||
if (context.shouldCancel?.()) break;
|
||
if (out.length >= maxJobs * Math.max(terms.length, 1)) break;
|
||
|
||
const offset = page * PAGE_SIZE;
|
||
context.onProgress?.({
|
||
phase: "list",
|
||
termsProcessed: 0,
|
||
termsTotal: 1,
|
||
currentUrl: `offset ${offset}`,
|
||
detail: `Himalayas: fetching page ${page + 1}`,
|
||
});
|
||
|
||
const raw = await fetchPage(offset, PAGE_SIZE);
|
||
if (raw.length === 0) break;
|
||
|
||
for (const item of raw) {
|
||
if (terms.length > 0 && !terms.some((t) => matchesTerm(item, t))) {
|
||
continue;
|
||
}
|
||
const mapped = mapJob(item);
|
||
if (!mapped) continue;
|
||
const key = mapped.sourceJobId || mapped.jobUrl;
|
||
if (seen.has(key)) continue;
|
||
seen.add(key);
|
||
out.push(mapped);
|
||
}
|
||
|
||
context.onProgress?.({
|
||
phase: "list",
|
||
termsProcessed: 0,
|
||
termsTotal: 1,
|
||
currentUrl: `offset ${offset}`,
|
||
jobPagesProcessed: out.length,
|
||
detail: `Himalayas: page ${page + 1} done (${out.length} matched so far)`,
|
||
});
|
||
|
||
if (raw.length < PAGE_SIZE) break;
|
||
}
|
||
} catch (error) {
|
||
const message = error instanceof Error ? error.message : "Unknown error";
|
||
return { success: false, jobs: out, error: message };
|
||
}
|
||
|
||
return { success: true, jobs: out };
|
||
},
|
||
};
|
||
|
||
export default manifest;
|