Shaheer Sarfaraz 16a8f1d15a
Use logger! add shim to convert backend responses to same format (#84)
* chore(orchestrator): add @infra import alias

* feat(server): add error/http/context/logger/sanitize infrastructure

* refactor(core): propagate request context, structured logs, and sanitization

* test/docs: update API contract assertions and contributor standards

* all pages working

* normalizing
2026-02-04 23:07:24 +00:00

160 lines
4.9 KiB
TypeScript

import { logger } from "@infra/logger";
import type { CreateJobInput, PipelineConfig } from "@shared/types";
import * as jobsRepo from "../../repositories/jobs";
import * as settingsRepo from "../../repositories/settings";
import { runCrawler } from "../../services/crawler";
import { runJobSpy } from "../../services/jobspy";
import { runUkVisaJobs } from "../../services/ukvisajobs";
import { progressHelpers, updateProgress } from "../progress";
export async function discoverJobsStep(args: {
mergedConfig: PipelineConfig;
}): Promise<{
discoveredJobs: CreateJobInput[];
sourceErrors: string[];
}> {
logger.info("Running discovery step");
progressHelpers.startCrawling();
const discoveredJobs: CreateJobInput[] = [];
const sourceErrors: string[] = [];
const settings = await settingsRepo.getAllSettings();
const searchTermsSetting = settings.searchTerms;
let searchTerms: string[] = [];
if (searchTermsSetting) {
searchTerms = JSON.parse(searchTermsSetting) as string[];
} else {
const defaultSearchTermsEnv =
process.env.JOBSPY_SEARCH_TERMS || "web developer";
searchTerms = defaultSearchTermsEnv
.split("|")
.map((term) => term.trim())
.filter(Boolean);
}
let jobSpySites = args.mergedConfig.sources.filter(
(source): source is "indeed" | "linkedin" =>
source === "indeed" || source === "linkedin",
);
const jobspySitesSettingRaw = settings.jobspySites;
if (jobspySitesSettingRaw) {
try {
const allowed = JSON.parse(jobspySitesSettingRaw);
if (Array.isArray(allowed)) {
jobSpySites = jobSpySites.filter((site) => allowed.includes(site));
}
} catch {
// ignore JSON parse error
}
}
if (jobSpySites.length > 0) {
updateProgress({
step: "crawling",
detail: `JobSpy: scraping ${jobSpySites.join(", ")}...`,
});
const jobSpyResult = await runJobSpy({
sites: jobSpySites,
searchTerms,
location: settings.jobspyLocation ?? undefined,
resultsWanted: settings.jobspyResultsWanted
? parseInt(settings.jobspyResultsWanted, 10)
: undefined,
hoursOld: settings.jobspyHoursOld
? parseInt(settings.jobspyHoursOld, 10)
: undefined,
countryIndeed: settings.jobspyCountryIndeed ?? undefined,
linkedinFetchDescription:
settings.jobspyLinkedinFetchDescription !== null &&
settings.jobspyLinkedinFetchDescription !== undefined
? settings.jobspyLinkedinFetchDescription === "1"
: undefined,
isRemote:
settings.jobspyIsRemote !== null &&
settings.jobspyIsRemote !== undefined
? settings.jobspyIsRemote === "1"
: undefined,
});
if (!jobSpyResult.success) {
sourceErrors.push(`jobspy: ${jobSpyResult.error ?? "unknown error"}`);
} else {
discoveredJobs.push(...jobSpyResult.jobs);
}
}
if (args.mergedConfig.sources.includes("gradcracker")) {
updateProgress({ step: "crawling", detail: "Gradcracker: scraping..." });
const existingJobUrls = await jobsRepo.getAllJobUrls();
const gradcrackerMaxJobs = settings.gradcrackerMaxJobsPerTerm
? parseInt(settings.gradcrackerMaxJobsPerTerm, 10)
: 50;
const crawlerResult = await runCrawler({
existingJobUrls,
searchTerms,
maxJobsPerTerm: gradcrackerMaxJobs,
onProgress: (progress) => {
if (progress.listPagesTotal && progress.listPagesTotal > 0) {
const percent = Math.round(
((progress.listPagesProcessed ?? 0) / progress.listPagesTotal) *
100,
);
updateProgress({
step: "crawling",
detail: `Gradcracker: ${percent}% (scan ${progress.listPagesProcessed}/${progress.listPagesTotal}, found ${progress.jobCardsFound})`,
});
}
},
});
if (!crawlerResult.success) {
sourceErrors.push(
`gradcracker: ${crawlerResult.error ?? "unknown error"}`,
);
} else {
discoveredJobs.push(...crawlerResult.jobs);
}
}
if (args.mergedConfig.sources.includes("ukvisajobs")) {
updateProgress({
step: "crawling",
detail: "UKVisaJobs: scraping visa-sponsoring jobs...",
});
const ukvisajobsMaxJobs = settings.ukvisajobsMaxJobs
? parseInt(settings.ukvisajobsMaxJobs, 10)
: 50;
const ukVisaResult = await runUkVisaJobs({
maxJobs: ukvisajobsMaxJobs,
searchTerms,
});
if (!ukVisaResult.success) {
sourceErrors.push(`ukvisajobs: ${ukVisaResult.error ?? "unknown error"}`);
} else {
discoveredJobs.push(...ukVisaResult.jobs);
}
}
if (discoveredJobs.length === 0 && sourceErrors.length > 0) {
throw new Error(`All sources failed: ${sourceErrors.join("; ")}`);
}
if (sourceErrors.length > 0) {
logger.warn("Some discovery sources failed", { sourceErrors });
}
progressHelpers.crawlingComplete(discoveredJobs.length);
return { discoveredJobs, sourceErrors };
}