Shaheer Sarfaraz 16a8f1d15a
Use logger! add shim to convert backend responses to same format (#84)
* chore(orchestrator): add @infra import alias

* feat(server): add error/http/context/logger/sanitize infrastructure

* refactor(core): propagate request context, structured logs, and sanitization

* test/docs: update API contract assertions and contributor standards

* all pages working

* normalizing
2026-02-04 23:07:24 +00:00

295 lines
8.1 KiB
TypeScript

import { logger } from "@infra/logger";
/**
* Pipeline progress tracking with Server-Sent Events.
*/
export type PipelineStep =
| "idle"
| "crawling"
| "importing"
| "scoring"
| "processing"
| "completed"
| "failed";
export interface PipelineProgress {
step: PipelineStep;
message: string;
detail?: string;
crawlingListPagesProcessed: number;
crawlingListPagesTotal: number;
crawlingJobCardsFound: number;
crawlingJobPagesEnqueued: number;
crawlingJobPagesSkipped: number;
crawlingJobPagesProcessed: number;
crawlingPhase?: "list" | "job";
crawlingCurrentUrl?: string;
jobsDiscovered: number;
jobsScored: number;
jobsProcessed: number;
totalToProcess: number;
currentJob?: {
id: string;
title: string;
employer: string;
};
error?: string;
startedAt?: string;
completedAt?: string;
}
// Event emitter for progress updates
type ProgressListener = (progress: PipelineProgress) => void;
const listeners: Set<ProgressListener> = new Set();
let currentProgress: PipelineProgress = {
step: "idle",
message: "Ready",
crawlingListPagesProcessed: 0,
crawlingListPagesTotal: 0,
crawlingJobCardsFound: 0,
crawlingJobPagesEnqueued: 0,
crawlingJobPagesSkipped: 0,
crawlingJobPagesProcessed: 0,
jobsDiscovered: 0,
jobsScored: 0,
jobsProcessed: 0,
totalToProcess: 0,
};
/**
* Update the current progress and notify all listeners.
*/
export function updateProgress(update: Partial<PipelineProgress>): void {
currentProgress = { ...currentProgress, ...update };
// Notify all listeners
for (const listener of listeners) {
try {
listener(currentProgress);
} catch (error) {
logger.error("Error in progress listener", error);
}
}
}
/**
* Get the current progress state.
*/
export function getProgress(): PipelineProgress {
return { ...currentProgress };
}
/**
* Subscribe to progress updates.
*/
export function subscribeToProgress(listener: ProgressListener): () => void {
listeners.add(listener);
// Send current state immediately
listener(currentProgress);
// Return unsubscribe function
return () => {
listeners.delete(listener);
};
}
/**
* Reset progress to idle state.
*/
export function resetProgress(): void {
currentProgress = {
step: "idle",
message: "Ready",
crawlingListPagesProcessed: 0,
crawlingListPagesTotal: 0,
crawlingJobCardsFound: 0,
crawlingJobPagesEnqueued: 0,
crawlingJobPagesSkipped: 0,
crawlingJobPagesProcessed: 0,
crawlingPhase: undefined,
crawlingCurrentUrl: undefined,
jobsDiscovered: 0,
jobsScored: 0,
jobsProcessed: 0,
totalToProcess: 0,
};
}
/**
* Helper to create progress updates for each step.
*/
export const progressHelpers = {
startCrawling: () =>
updateProgress({
step: "crawling",
message: "Fetching jobs from sources...",
detail: "Starting crawler",
startedAt: new Date().toISOString(),
crawlingListPagesProcessed: 0,
crawlingListPagesTotal: 0,
crawlingJobCardsFound: 0,
crawlingJobPagesEnqueued: 0,
crawlingJobPagesSkipped: 0,
crawlingJobPagesProcessed: 0,
crawlingPhase: undefined,
crawlingCurrentUrl: undefined,
jobsDiscovered: 0,
jobsScored: 0,
jobsProcessed: 0,
totalToProcess: 0,
}),
crawlingUpdate: (update: {
listPagesProcessed?: number;
listPagesTotal?: number;
jobCardsFound?: number;
jobPagesEnqueued?: number;
jobPagesSkipped?: number;
jobPagesProcessed?: number;
phase?: "list" | "job";
currentUrl?: string;
}) => {
const current = getProgress();
const next = {
...current,
crawlingListPagesProcessed:
update.listPagesProcessed ?? current.crawlingListPagesProcessed,
crawlingListPagesTotal:
update.listPagesTotal ?? current.crawlingListPagesTotal,
crawlingJobCardsFound:
update.jobCardsFound ?? current.crawlingJobCardsFound,
crawlingJobPagesEnqueued:
update.jobPagesEnqueued ?? current.crawlingJobPagesEnqueued,
crawlingJobPagesSkipped:
update.jobPagesSkipped ?? current.crawlingJobPagesSkipped,
crawlingJobPagesProcessed:
update.jobPagesProcessed ?? current.crawlingJobPagesProcessed,
crawlingPhase: update.phase ?? current.crawlingPhase,
crawlingCurrentUrl: update.currentUrl ?? current.crawlingCurrentUrl,
};
const sourcesPart =
next.crawlingListPagesTotal > 0
? `${next.crawlingListPagesProcessed}/${next.crawlingListPagesTotal}`
: `${next.crawlingListPagesProcessed}`;
const pagesPart = `${next.crawlingJobPagesProcessed}/${next.crawlingJobPagesEnqueued}`;
const skippedPart =
next.crawlingJobPagesSkipped > 0
? `, skipped ${next.crawlingJobPagesSkipped}`
: "";
const cardsPart =
next.crawlingJobCardsFound > 0
? `, cards ${next.crawlingJobCardsFound}`
: "";
const message = `Crawling jobs (${sourcesPart} sources, pages ${pagesPart}${skippedPart}${cardsPart})...`;
const detail =
next.crawlingCurrentUrl && next.crawlingPhase
? `${next.crawlingPhase === "list" ? "List" : "Job"}: ${next.crawlingCurrentUrl}`
: next.crawlingCurrentUrl
? next.crawlingCurrentUrl
: "Running crawler";
updateProgress({
step: "crawling",
message,
detail,
crawlingListPagesProcessed: next.crawlingListPagesProcessed,
crawlingListPagesTotal: next.crawlingListPagesTotal,
crawlingJobCardsFound: next.crawlingJobCardsFound,
crawlingJobPagesEnqueued: next.crawlingJobPagesEnqueued,
crawlingJobPagesSkipped: next.crawlingJobPagesSkipped,
crawlingJobPagesProcessed: next.crawlingJobPagesProcessed,
crawlingPhase: next.crawlingPhase,
crawlingCurrentUrl: next.crawlingCurrentUrl,
});
},
crawlingComplete: (jobsFound: number) =>
updateProgress({
step: "importing",
message: `Found ${jobsFound} jobs, importing to database...`,
detail: "Deduplicating and saving",
jobsDiscovered: jobsFound,
crawlingCurrentUrl: undefined,
}),
importComplete: (created: number, skipped: number) =>
updateProgress({
step: "scoring",
message: `Imported ${created} new jobs (${skipped} duplicates). Scoring...`,
detail: "Using AI to evaluate job fit",
}),
scoringJob: (index: number, total: number, title: string) =>
updateProgress({
step: "scoring",
message: `Scoring jobs (${index}/${total})...`,
detail: title,
jobsScored: index,
}),
scoringComplete: (totalScored: number) =>
updateProgress({
step: "scoring",
message: `Scored ${totalScored} jobs.`,
detail: "Ready for manual processing",
jobsScored: totalScored,
totalToProcess: 0,
jobsProcessed: 0,
currentJob: undefined,
}),
processingJob: (
index: number,
total: number,
job: { id: string; title: string; employer: string },
) =>
updateProgress({
step: "processing",
message: `Processing job ${index}/${total}...`,
detail: `${job.title} @ ${job.employer}`,
jobsProcessed: index - 1,
totalToProcess: total,
currentJob: job,
}),
generatingSummary: (job: { title: string; employer: string }) =>
updateProgress({
detail: `Generating summary for ${job.title}...`,
}),
generatingPdf: (job: { title: string; employer: string }) =>
updateProgress({
detail: `Generating PDF for ${job.title}...`,
}),
jobComplete: (index: number, total: number) =>
updateProgress({
jobsProcessed: index,
detail: `Completed ${index}/${total} jobs`,
}),
complete: (discovered: number, processed: number) =>
updateProgress({
step: "completed",
message: `Pipeline complete! Discovered ${discovered} jobs, processed ${processed}.`,
detail: "Ready for review",
completedAt: new Date().toISOString(),
currentJob: undefined,
}),
failed: (error: string) =>
updateProgress({
step: "failed",
message: "Pipeline failed",
detail: error,
error,
completedAt: new Date().toISOString(),
}),
};