Jobber/extractors/reed/manifest.ts
ilia 7b3dfb002a
Some checks failed
CI / Linting (Biome) (push) Failing after 36s
CI / Tests (push) Successful in 5m54s
CI / Type Check (adzuna-extractor) (push) Successful in 1m6s
CI / Type Check (gradcracker-extractor) (push) Successful in 1m9s
CI / Type Check (hiringcafe-extractor) (push) Successful in 1m5s
CI / Type Check (orchestrator) (push) Successful in 1m21s
CI / Type Check (startupjobs-extractor) (push) Successful in 1m4s
CI / Type Check (ukvisajobs-extractor) (push) Successful in 1m4s
CI / Documentation (push) Successful in 1m52s
feat(extractors): add 17 job source extractors and cross-source dedup
Adds extractor packages: arbeitnow, ashby, careerjet, fourdayweek,
greenhouse, himalayas, jobicy, jooble, lever, reed, remoteok, remotive,
themuse, usajobs, weworkremotely, workday — each with manifest, package
metadata and README.

Pipeline / shared:
- shared/job-fingerprint: stable hash for cross-source dedup, with tests
- discover-jobs: dedup via fingerprint and richer per-source merging
- jobs repository: fingerprint-aware upsert / lookup
- settings-registry, settings types/routes, demo-defaults: knobs for the
  new sources
- shared extractors index: register the new manifests
- location-support, profiles route: small fixes for the new sources

Tooling:
- scripts/smoke-extractors.ts to sanity-check each source locally
- scripts/jobber-cron-{cherepaha,dobkin}.env.example: per-host cron
  templates (CHANGEME placeholders only)
- .env.example: documented env vars for the new extractors
- .gitignore: ignore extractors/*/storage/ runtime caches (was ukvisajobs only)

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-12 20:17:52 -04:00

189 lines
5.7 KiB
TypeScript

/**
* Reed.co.uk Jobseeker API.
*
* https://www.reed.co.uk/developers/jobseeker
* GET https://www.reed.co.uk/api/1.0/search?...
* HTTP Basic with the API key as the username and an empty password.
*
* Requires REED_API_KEY (`reedApiKey` setting). The catalog gates this source
* to UK only via `isSourceAllowedForCountry`.
*/
import type {
ExtractorManifest,
ExtractorRunResult,
} from "@shared/types/extractors";
import type { CreateJobInput } from "@shared/types/jobs";
const API_URL = "https://www.reed.co.uk/api/1.0/search";
interface ReedJob {
jobId?: number;
jobTitle?: string;
employerName?: string;
employerProfileUrl?: string;
jobDescription?: string;
jobUrl?: string;
locationName?: string;
date?: string;
expirationDate?: string;
applications?: number;
currency?: string;
minimumSalary?: number;
maximumSalary?: number;
yearlyMinimumSalary?: number;
yearlyMaximumSalary?: number;
}
interface ReedResponse {
totalResults?: number;
results?: ReedJob[];
}
function asString(value: unknown): string | undefined {
if (typeof value !== "string") return undefined;
const trimmed = value.trim();
return trimmed ? trimmed : undefined;
}
function mapJob(raw: ReedJob): CreateJobInput | null {
const jobUrl = asString(raw.jobUrl);
if (!jobUrl) return null;
return {
source: "reed",
sourceJobId: raw.jobId != null ? String(raw.jobId) : undefined,
title: asString(raw.jobTitle) ?? "Unknown Title",
employer: asString(raw.employerName) ?? "Unknown Employer",
employerUrl: asString(raw.employerProfileUrl),
jobUrl,
applicationLink: jobUrl,
location: asString(raw.locationName),
datePosted: asString(raw.date),
deadline: asString(raw.expirationDate),
jobDescription: asString(raw.jobDescription),
salaryMinAmount:
typeof raw.minimumSalary === "number" ? raw.minimumSalary : undefined,
salaryMaxAmount:
typeof raw.maximumSalary === "number" ? raw.maximumSalary : undefined,
salaryCurrency: asString(raw.currency) ?? "GBP",
salaryInterval: raw.yearlyMinimumSalary != null ? "yearly" : undefined,
};
}
async function fetchPage(args: {
apiKey: string;
keywords: string;
locationName?: string;
resultsToTake: number;
resultsToSkip: number;
}): Promise<ReedResponse> {
const url = new URL(API_URL);
url.searchParams.set("keywords", args.keywords);
if (args.locationName)
url.searchParams.set("locationName", args.locationName);
url.searchParams.set("resultsToTake", String(args.resultsToTake));
url.searchParams.set("resultsToSkip", String(args.resultsToSkip));
const auth = Buffer.from(`${args.apiKey}:`).toString("base64");
const response = await fetch(url.toString(), {
headers: {
Accept: "application/json",
Authorization: `Basic ${auth}`,
},
});
if (!response.ok) {
throw new Error(`Reed request failed with status ${response.status}`);
}
return (await response.json()) as ReedResponse;
}
export const manifest: ExtractorManifest = {
id: "reed",
displayName: "Reed",
providesSources: ["reed"],
requiredEnvVars: ["REED_API_KEY"],
async run(context): Promise<ExtractorRunResult> {
if (context.shouldCancel?.()) return { success: true, jobs: [] };
const apiKey =
context.settings.reedApiKey?.trim() || process.env.REED_API_KEY?.trim();
if (!apiKey) {
return {
success: false,
jobs: [],
error: "Reed extractor requires REED_API_KEY",
};
}
const maxJobsPerTerm = context.settings.reedMaxJobsPerTerm
? Number.parseInt(context.settings.reedMaxJobsPerTerm, 10)
: 100;
// Reed accepts up to 100 per page.
const pageSize = Math.min(100, maxJobsPerTerm);
const terms = context.searchTerms.length > 0 ? context.searchTerms : [""];
const locationName =
context.settings.searchCities?.split("|")[0]?.trim() || undefined;
const seen = new Set<string>();
const out: CreateJobInput[] = [];
try {
for (let i = 0; i < terms.length; i += 1) {
if (context.shouldCancel?.()) break;
const term = terms[i].trim();
context.onProgress?.({
phase: "list",
termsProcessed: i,
termsTotal: terms.length,
currentUrl: term || "(all)",
detail: `Reed: term ${i + 1}/${terms.length}`,
});
let collected = 0;
let resultsToSkip = 0;
while (collected < maxJobsPerTerm) {
if (context.shouldCancel?.()) break;
const body = await fetchPage({
apiKey,
keywords: term,
locationName,
resultsToTake: pageSize,
resultsToSkip,
});
const items = Array.isArray(body.results) ? body.results : [];
if (items.length === 0) break;
for (const raw of items) {
const mapped = mapJob(raw);
if (!mapped) continue;
const key = mapped.sourceJobId || mapped.jobUrl;
if (seen.has(key)) continue;
seen.add(key);
out.push(mapped);
collected += 1;
if (collected >= maxJobsPerTerm) break;
}
if (items.length < pageSize) break;
resultsToSkip += pageSize;
if (resultsToSkip > 5000) break;
}
context.onProgress?.({
phase: "list",
termsProcessed: i + 1,
termsTotal: terms.length,
currentUrl: term || "(all)",
jobPagesProcessed: out.length,
detail: `Reed: completed term ${i + 1}/${terms.length} (${collected} found)`,
});
}
} catch (error) {
const message = error instanceof Error ? error.message : "Unknown error";
return { success: false, jobs: out, error: message };
}
return { success: true, jobs: out };
},
};
export default manifest;