Jobber/extractors/jobicy/manifest.ts
ilia 7b3dfb002a
Some checks failed
CI / Linting (Biome) (push) Failing after 36s
CI / Tests (push) Successful in 5m54s
CI / Type Check (adzuna-extractor) (push) Successful in 1m6s
CI / Type Check (gradcracker-extractor) (push) Successful in 1m9s
CI / Type Check (hiringcafe-extractor) (push) Successful in 1m5s
CI / Type Check (orchestrator) (push) Successful in 1m21s
CI / Type Check (startupjobs-extractor) (push) Successful in 1m4s
CI / Type Check (ukvisajobs-extractor) (push) Successful in 1m4s
CI / Documentation (push) Successful in 1m52s
feat(extractors): add 17 job source extractors and cross-source dedup
Adds extractor packages: arbeitnow, ashby, careerjet, fourdayweek,
greenhouse, himalayas, jobicy, jooble, lever, reed, remoteok, remotive,
themuse, usajobs, weworkremotely, workday — each with manifest, package
metadata and README.

Pipeline / shared:
- shared/job-fingerprint: stable hash for cross-source dedup, with tests
- discover-jobs: dedup via fingerprint and richer per-source merging
- jobs repository: fingerprint-aware upsert / lookup
- settings-registry, settings types/routes, demo-defaults: knobs for the
  new sources
- shared extractors index: register the new manifests
- location-support, profiles route: small fixes for the new sources

Tooling:
- scripts/smoke-extractors.ts to sanity-check each source locally
- scripts/jobber-cron-{cherepaha,dobkin}.env.example: per-host cron
  templates (CHANGEME placeholders only)
- .env.example: documented env vars for the new extractors
- .gitignore: ignore extractors/*/storage/ runtime caches (was ukvisajobs only)

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-12 20:17:52 -04:00

187 lines
5.8 KiB
TypeScript

/**
* Jobicy remote-jobs feed.
*
* Public, unauthenticated JSON endpoint:
* https://jobicy.com/api/v2/remote-jobs?count=50
*
* The feed is intentionally remote-only; we still pass each `searchTerm` as a
* `tag` so the same pipeline-level term iteration drives results. We do *not*
* try to invent a country filter — Jobicy postings are remote-friendly by
* design and the registry already restricts ukOnly extractors elsewhere.
*/
import type {
ExtractorManifest,
ExtractorRunResult,
} from "@shared/types/extractors";
import type { CreateJobInput } from "@shared/types/jobs";
const API_URL = "https://jobicy.com/api/v2/remote-jobs";
interface JobicyRawJob {
id?: number | string;
url?: string;
jobTitle?: string;
companyName?: string;
companyLogo?: string;
jobIndustry?: string[] | string;
jobType?: string[] | string;
jobGeo?: string;
jobLevel?: string;
jobExcerpt?: string;
jobDescription?: string;
pubDate?: string;
annualSalaryMin?: number | string;
annualSalaryMax?: number | string;
salaryCurrency?: string;
}
interface JobicyResponse {
jobs?: JobicyRawJob[];
}
function asString(value: unknown): string | undefined {
if (typeof value !== "string") return undefined;
const trimmed = value.trim();
return trimmed ? trimmed : undefined;
}
function joinList(value: unknown): string | undefined {
if (Array.isArray(value)) {
const cleaned = value
.map((item) => (typeof item === "string" ? item.trim() : ""))
.filter(Boolean);
return cleaned.length > 0 ? cleaned.join(", ") : undefined;
}
return asString(value);
}
function toNumberOrUndefined(value: unknown): number | undefined {
if (typeof value === "number" && Number.isFinite(value)) return value;
if (typeof value === "string") {
const parsed = Number.parseFloat(value);
return Number.isFinite(parsed) ? parsed : undefined;
}
return undefined;
}
function mapJob(raw: JobicyRawJob): CreateJobInput | null {
const jobUrl = asString(raw.url);
if (!jobUrl) return null;
const employer = asString(raw.companyName) ?? "Unknown Employer";
const title = asString(raw.jobTitle) ?? "Unknown Title";
const minSalary = toNumberOrUndefined(raw.annualSalaryMin);
const maxSalary = toNumberOrUndefined(raw.annualSalaryMax);
return {
source: "jobicy",
sourceJobId: raw.id != null ? String(raw.id) : undefined,
title,
employer,
jobUrl,
applicationLink: jobUrl,
location: asString(raw.jobGeo) ?? "Remote",
isRemote: true,
jobType: joinList(raw.jobType),
jobLevel: asString(raw.jobLevel),
companyIndustry: joinList(raw.jobIndustry),
companyLogo: asString(raw.companyLogo),
datePosted: asString(raw.pubDate),
jobDescription: asString(raw.jobDescription) ?? asString(raw.jobExcerpt),
salaryMinAmount: minSalary,
salaryMaxAmount: maxSalary,
salaryCurrency: asString(raw.salaryCurrency),
salaryInterval: minSalary || maxSalary ? "yearly" : undefined,
};
}
async function fetchJobicy(
tag: string | null,
count: number,
): Promise<JobicyRawJob[]> {
const url = new URL(API_URL);
url.searchParams.set("count", String(Math.min(Math.max(count, 1), 50)));
if (tag) url.searchParams.set("tag", tag);
const response = await fetch(url.toString(), {
headers: { Accept: "application/json" },
});
if (!response.ok) {
throw new Error(`Jobicy request failed with status ${response.status}`);
}
const body = (await response.json()) as JobicyResponse;
return Array.isArray(body.jobs) ? body.jobs : [];
}
export const manifest: ExtractorManifest = {
id: "jobicy",
displayName: "Jobicy (Remote)",
providesSources: ["jobicy"],
async run(context): Promise<ExtractorRunResult> {
if (context.shouldCancel?.()) return { success: true, jobs: [] };
const maxJobsPerTerm = context.settings.jobicyMaxJobsPerTerm
? Number.parseInt(context.settings.jobicyMaxJobsPerTerm, 10)
: 100;
const terms = context.searchTerms.length > 0 ? context.searchTerms : [null];
const seen = new Set<string>();
const out: CreateJobInput[] = [];
try {
for (let i = 0; i < terms.length; i += 1) {
if (context.shouldCancel?.()) break;
const term = terms[i];
const tag = term ? term.trim().toLowerCase() : null;
context.onProgress?.({
phase: "list",
termsProcessed: i,
termsTotal: terms.length,
currentUrl: tag ?? "(all remote)",
detail: `Jobicy: term ${i + 1}/${terms.length}`,
});
// Jobicy caps `count` at 50 per call; loop until we either hit the
// requested cap or the feed runs out (length < take).
let collected = 0;
let safetyHops = 0;
while (collected < maxJobsPerTerm && safetyHops < 10) {
const take = Math.min(50, maxJobsPerTerm - collected);
const raw = await fetchJobicy(tag, take);
if (raw.length === 0) break;
for (const item of raw) {
const mapped = mapJob(item);
if (!mapped) continue;
const key = mapped.sourceJobId || mapped.jobUrl;
if (seen.has(key)) continue;
seen.add(key);
out.push(mapped);
collected += 1;
if (collected >= maxJobsPerTerm) break;
}
if (raw.length < take) break;
safetyHops += 1;
}
context.onProgress?.({
phase: "list",
termsProcessed: i + 1,
termsTotal: terms.length,
currentUrl: tag ?? "(all remote)",
jobPagesProcessed: out.length,
detail: `Jobicy: completed term ${i + 1}/${terms.length} (${collected} found)`,
});
}
} catch (error) {
const message = error instanceof Error ? error.message : "Unknown error";
return { success: false, jobs: out, error: message };
}
return { success: true, jobs: out };
},
};
export default manifest;