feat: add option to fetch from a link
Clean raw HTML to JSON to pass to an LLM
This commit is contained in:
parent
03649f3d17
commit
867a13cf22
18
orchestrator/package-lock.json
generated
18
orchestrator/package-lock.json
generated
@ -47,6 +47,7 @@
|
||||
"@types/better-sqlite3": "^7.6.8",
|
||||
"@types/cors": "^2.8.17",
|
||||
"@types/express": "^4.17.21",
|
||||
"@types/jsdom": "^27.0.0",
|
||||
"@types/node": "^22.10.1",
|
||||
"@types/react": "^18.3.12",
|
||||
"@types/react-dom": "^18.3.1",
|
||||
@ -3345,6 +3346,17 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/jsdom": {
|
||||
"version": "27.0.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/jsdom/-/jsdom-27.0.0.tgz",
|
||||
"integrity": "sha512-NZyFl/PViwKzdEkQg96gtnB8wm+1ljhdDay9ahn4hgb+SfVtPCbm3TlmDUFXTA+MGN3CijicnMhG18SI5H3rFw==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"@types/node": "*",
|
||||
"@types/tough-cookie": "*",
|
||||
"parse5": "^7.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/mdast": {
|
||||
"version": "4.0.4",
|
||||
"resolved": "https://registry.npmjs.org/@types/mdast/-/mdast-4.0.4.tgz",
|
||||
@ -3457,6 +3469,12 @@
|
||||
"@types/node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/tough-cookie": {
|
||||
"version": "4.0.5",
|
||||
"resolved": "https://registry.npmjs.org/@types/tough-cookie/-/tough-cookie-4.0.5.tgz",
|
||||
"integrity": "sha512-/Ad8+nIOV7Rl++6f1BdKxFSMgmoqEoYbHRpPcx3JEfv8VRsQe9Z4mCXeJBzxs7mbHY/XOZZuXlRNfhpVPbs6ZA==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@types/unist": {
|
||||
"version": "3.0.3",
|
||||
"resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz",
|
||||
|
||||
@ -59,6 +59,7 @@
|
||||
"@types/better-sqlite3": "^7.6.8",
|
||||
"@types/cors": "^2.8.17",
|
||||
"@types/express": "^4.17.21",
|
||||
"@types/jsdom": "^27.0.0",
|
||||
"@types/node": "^22.10.1",
|
||||
"@types/react": "^18.3.12",
|
||||
"@types/react-dom": "^18.3.1",
|
||||
|
||||
@ -17,6 +17,7 @@ import type {
|
||||
CreateJobInput,
|
||||
ManualJobDraft,
|
||||
ManualJobInferenceResponse,
|
||||
ManualJobFetchResponse,
|
||||
VisaSponsorSearchResponse,
|
||||
VisaSponsorStatusResponse,
|
||||
VisaSponsor,
|
||||
@ -38,7 +39,16 @@ async function fetchApi<T>(
|
||||
},
|
||||
});
|
||||
|
||||
const data: ApiResponse<T> = await response.json();
|
||||
const text = await response.text();
|
||||
|
||||
let data: ApiResponse<T>;
|
||||
try {
|
||||
data = JSON.parse(text);
|
||||
} catch {
|
||||
// If the response is not JSON, it's likely an HTML error page
|
||||
console.error('API returned non-JSON response:', text.substring(0, 500));
|
||||
throw new Error(`Server error (${response.status}): Expected JSON but received HTML. Is the backend server running?`);
|
||||
}
|
||||
|
||||
if (!data.success) {
|
||||
throw new Error(data.error || 'API request failed');
|
||||
@ -148,6 +158,15 @@ export async function importUkVisaJobs(input: {
|
||||
}
|
||||
|
||||
// Manual Job Import API
|
||||
export async function fetchJobFromUrl(input: {
|
||||
url: string;
|
||||
}): Promise<ManualJobFetchResponse> {
|
||||
return fetchApi<ManualJobFetchResponse>('/manual-jobs/fetch', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify(input),
|
||||
});
|
||||
}
|
||||
|
||||
export async function inferManualJob(input: {
|
||||
jobDescription: string;
|
||||
}): Promise<ManualJobInferenceResponse> {
|
||||
|
||||
@ -6,6 +6,7 @@ import * as api from "../api";
|
||||
import { toast } from "sonner";
|
||||
|
||||
vi.mock("../api", () => ({
|
||||
fetchJobFromUrl: vi.fn(),
|
||||
inferManualJob: vi.fn(),
|
||||
importManualJob: vi.fn(),
|
||||
}));
|
||||
@ -41,7 +42,7 @@ describe("ManualImportSheet", () => {
|
||||
);
|
||||
|
||||
fireEvent.change(
|
||||
screen.getByPlaceholderText("Paste the full job description here..."),
|
||||
screen.getByPlaceholderText("Paste the full job description here, or enter a URL above to fetch it..."),
|
||||
{ target: { value: rawDescription } }
|
||||
);
|
||||
fireEvent.click(screen.getByRole("button", { name: /analyze jd/i }));
|
||||
@ -92,7 +93,7 @@ describe("ManualImportSheet", () => {
|
||||
);
|
||||
|
||||
fireEvent.change(
|
||||
screen.getByPlaceholderText("Paste the full job description here..."),
|
||||
screen.getByPlaceholderText("Paste the full job description here, or enter a URL above to fetch it..."),
|
||||
{ target: { value: rawDescription } }
|
||||
);
|
||||
fireEvent.click(screen.getByRole("button", { name: /analyze jd/i }));
|
||||
@ -122,7 +123,7 @@ describe("ManualImportSheet", () => {
|
||||
);
|
||||
|
||||
fireEvent.change(
|
||||
screen.getByPlaceholderText("Paste the full job description here..."),
|
||||
screen.getByPlaceholderText("Paste the full job description here, or enter a URL above to fetch it..."),
|
||||
{ target: { value: rawDescription } }
|
||||
);
|
||||
fireEvent.click(screen.getByRole("button", { name: /analyze jd/i }));
|
||||
@ -150,7 +151,7 @@ describe("ManualImportSheet", () => {
|
||||
);
|
||||
|
||||
fireEvent.change(
|
||||
screen.getByPlaceholderText("Paste the full job description here..."),
|
||||
screen.getByPlaceholderText("Paste the full job description here, or enter a URL above to fetch it..."),
|
||||
{ target: { value: "Backend Engineer role." } }
|
||||
);
|
||||
fireEvent.click(screen.getByRole("button", { name: /analyze jd/i }));
|
||||
@ -165,4 +166,137 @@ describe("ManualImportSheet", () => {
|
||||
expect(onOpenChange).not.toHaveBeenCalled();
|
||||
expect(screen.getByRole("button", { name: /import job/i })).toBeEnabled();
|
||||
});
|
||||
|
||||
describe("URL fetch functionality", () => {
|
||||
it("shows Paste button when URL field is empty, Fetch when URL is entered", async () => {
|
||||
render(
|
||||
<ManualImportSheet open onOpenChange={vi.fn()} onImported={vi.fn()} />
|
||||
);
|
||||
|
||||
// Initially should show Paste button
|
||||
expect(screen.getByRole("button", { name: /paste/i })).toBeInTheDocument();
|
||||
|
||||
// Enter a URL
|
||||
fireEvent.change(
|
||||
screen.getByPlaceholderText("https://example.com/job-posting"),
|
||||
{ target: { value: "https://example.com/job" } }
|
||||
);
|
||||
|
||||
// Should now show Fetch button
|
||||
expect(screen.getByRole("button", { name: /fetch/i })).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("fetches URL and proceeds to review on successful fetch", async () => {
|
||||
vi.mocked(api.fetchJobFromUrl).mockResolvedValue({
|
||||
content: "Software Engineer role at Acme Corp",
|
||||
url: "https://example.com/job",
|
||||
});
|
||||
vi.mocked(api.inferManualJob).mockResolvedValue({
|
||||
job: {
|
||||
title: "Software Engineer",
|
||||
employer: "Acme Corp",
|
||||
location: "Remote",
|
||||
jobDescription: "Great opportunity to join our team.",
|
||||
},
|
||||
});
|
||||
|
||||
render(
|
||||
<ManualImportSheet open onOpenChange={vi.fn()} onImported={vi.fn()} />
|
||||
);
|
||||
|
||||
// Enter a URL
|
||||
fireEvent.change(
|
||||
screen.getByPlaceholderText("https://example.com/job-posting"),
|
||||
{ target: { value: "https://example.com/job" } }
|
||||
);
|
||||
|
||||
// Click Fetch
|
||||
fireEvent.click(screen.getByRole("button", { name: /fetch/i }));
|
||||
|
||||
// Should show loading state then review
|
||||
await screen.findByPlaceholderText("e.g. Junior Backend Engineer");
|
||||
|
||||
expect(api.fetchJobFromUrl).toHaveBeenCalledWith({
|
||||
url: "https://example.com/job",
|
||||
});
|
||||
expect(api.inferManualJob).toHaveBeenCalledWith({
|
||||
jobDescription: "Software Engineer role at Acme Corp",
|
||||
});
|
||||
|
||||
// Check inferred values are shown
|
||||
expect(screen.getByPlaceholderText("e.g. Junior Backend Engineer")).toHaveValue("Software Engineer");
|
||||
expect(screen.getByPlaceholderText("e.g. Acme Labs")).toHaveValue("Acme Corp");
|
||||
});
|
||||
|
||||
it("preserves fetched URL in the job URL field", async () => {
|
||||
vi.mocked(api.fetchJobFromUrl).mockResolvedValue({
|
||||
content: "Job description content",
|
||||
url: "https://example.com/job",
|
||||
});
|
||||
vi.mocked(api.inferManualJob).mockResolvedValue({
|
||||
job: {
|
||||
title: "Engineer",
|
||||
employer: "Company",
|
||||
},
|
||||
});
|
||||
|
||||
render(
|
||||
<ManualImportSheet open onOpenChange={vi.fn()} onImported={vi.fn()} />
|
||||
);
|
||||
|
||||
fireEvent.change(
|
||||
screen.getByPlaceholderText("https://example.com/job-posting"),
|
||||
{ target: { value: "https://example.com/job" } }
|
||||
);
|
||||
fireEvent.click(screen.getByRole("button", { name: /fetch/i }));
|
||||
|
||||
await screen.findByPlaceholderText("e.g. Junior Backend Engineer");
|
||||
|
||||
// Check the job URL field has the fetched URL (first https://... input is Job URL)
|
||||
const urlInputs = screen.getAllByPlaceholderText("https://...");
|
||||
expect(urlInputs[0]).toHaveValue("https://example.com/job");
|
||||
});
|
||||
|
||||
it("shows error and returns to paste step when fetch fails", async () => {
|
||||
vi.mocked(api.fetchJobFromUrl).mockRejectedValue(new Error("Failed to fetch URL"));
|
||||
|
||||
render(
|
||||
<ManualImportSheet open onOpenChange={vi.fn()} onImported={vi.fn()} />
|
||||
);
|
||||
|
||||
fireEvent.change(
|
||||
screen.getByPlaceholderText("https://example.com/job-posting"),
|
||||
{ target: { value: "https://example.com/bad-url" } }
|
||||
);
|
||||
fireEvent.click(screen.getByRole("button", { name: /fetch/i }));
|
||||
|
||||
await screen.findByText("Failed to fetch URL");
|
||||
|
||||
// Should still be on paste step
|
||||
expect(screen.getByPlaceholderText("Paste the full job description here, or enter a URL above to fetch it...")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("shows error when inference fails after fetch", async () => {
|
||||
vi.mocked(api.fetchJobFromUrl).mockResolvedValue({
|
||||
content: "Job content",
|
||||
url: "https://example.com/job",
|
||||
});
|
||||
vi.mocked(api.inferManualJob).mockRejectedValue(new Error("Inference failed"));
|
||||
|
||||
render(
|
||||
<ManualImportSheet open onOpenChange={vi.fn()} onImported={vi.fn()} />
|
||||
);
|
||||
|
||||
fireEvent.change(
|
||||
screen.getByPlaceholderText("https://example.com/job-posting"),
|
||||
{ target: { value: "https://example.com/job" } }
|
||||
);
|
||||
fireEvent.click(screen.getByRole("button", { name: /fetch/i }));
|
||||
|
||||
await screen.findByText("Inference failed");
|
||||
|
||||
// Should be back on paste step
|
||||
expect(screen.getByPlaceholderText("Paste the full job description here, or enter a URL above to fetch it...")).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
*/
|
||||
|
||||
import React, { useEffect, useMemo, useState } from "react";
|
||||
import { ArrowLeft, FileText, Loader2, Sparkles } from "lucide-react";
|
||||
import { ArrowLeft, ClipboardPaste, FileText, Link, Loader2, Sparkles } from "lucide-react";
|
||||
import { toast } from "sonner";
|
||||
|
||||
import { Button } from "@/components/ui/button";
|
||||
@ -112,6 +112,8 @@ export const ManualImportSheet: React.FC<ManualImportSheetProps> = ({
|
||||
}) => {
|
||||
const [step, setStep] = useState<ManualImportStep>("paste");
|
||||
const [rawDescription, setRawDescription] = useState("");
|
||||
const [fetchUrl, setFetchUrl] = useState("");
|
||||
const [isFetching, setIsFetching] = useState(false);
|
||||
const [draft, setDraft] = useState<ManualJobDraftState>(emptyDraft);
|
||||
const [warning, setWarning] = useState<string | null>(null);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
@ -121,6 +123,8 @@ export const ManualImportSheet: React.FC<ManualImportSheetProps> = ({
|
||||
if (!open) {
|
||||
setStep("paste");
|
||||
setRawDescription("");
|
||||
setFetchUrl("");
|
||||
setIsFetching(false);
|
||||
setDraft(emptyDraft);
|
||||
setWarning(null);
|
||||
setError(null);
|
||||
@ -132,6 +136,7 @@ export const ManualImportSheet: React.FC<ManualImportSheetProps> = ({
|
||||
const stepLabel = ["Paste JD", "Infer details", "Review & import"][stepIndex];
|
||||
|
||||
const canAnalyze = rawDescription.trim().length > 0 && step !== "loading";
|
||||
const canFetch = fetchUrl.trim().length > 0 && !isFetching && step === "paste";
|
||||
const canImport = useMemo(() => {
|
||||
if (step !== "review") return false;
|
||||
return (
|
||||
@ -141,6 +146,43 @@ export const ManualImportSheet: React.FC<ManualImportSheetProps> = ({
|
||||
);
|
||||
}, [draft, step]);
|
||||
|
||||
const handleFetch = async () => {
|
||||
if (!fetchUrl.trim()) return;
|
||||
|
||||
try {
|
||||
setError(null);
|
||||
setWarning(null);
|
||||
setIsFetching(true);
|
||||
|
||||
// Fetch the URL content
|
||||
const fetchResponse = await api.fetchJobFromUrl({ url: fetchUrl.trim() });
|
||||
const fetchedContent = fetchResponse.content;
|
||||
const fetchedUrl = fetchResponse.url;
|
||||
|
||||
setIsFetching(false);
|
||||
|
||||
// Automatically proceed to analysis
|
||||
setStep("loading");
|
||||
const inferResponse = await api.inferManualJob({ jobDescription: fetchedContent });
|
||||
// Don't pass raw HTML as job description - let user fill it in or use inferred data
|
||||
const normalized = normalizeDraft(inferResponse.job);
|
||||
|
||||
// Preserve the fetched URL
|
||||
if (!normalized.jobUrl) {
|
||||
normalized.jobUrl = fetchedUrl;
|
||||
}
|
||||
|
||||
setDraft(normalized);
|
||||
setWarning(inferResponse.warning ?? null);
|
||||
setStep("review");
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : "Failed to fetch URL";
|
||||
setError(message);
|
||||
setIsFetching(false);
|
||||
setStep("paste");
|
||||
}
|
||||
};
|
||||
|
||||
const handleAnalyze = async () => {
|
||||
if (!rawDescription.trim()) {
|
||||
setError("Paste a job description to continue.");
|
||||
@ -152,7 +194,12 @@ export const ManualImportSheet: React.FC<ManualImportSheetProps> = ({
|
||||
setWarning(null);
|
||||
setStep("loading");
|
||||
const response = await api.inferManualJob({ jobDescription: rawDescription });
|
||||
setDraft(normalizeDraft(response.job, rawDescription.trim()));
|
||||
const normalized = normalizeDraft(response.job, rawDescription.trim());
|
||||
// Preserve the fetched URL if we fetched from a URL
|
||||
if (draft.jobUrl && !normalized.jobUrl) {
|
||||
normalized.jobUrl = draft.jobUrl;
|
||||
}
|
||||
setDraft(normalized);
|
||||
setWarning(response.warning ?? null);
|
||||
setStep("review");
|
||||
} catch (err) {
|
||||
@ -217,6 +264,53 @@ export const ManualImportSheet: React.FC<ManualImportSheetProps> = ({
|
||||
<div className="mt-4 flex-1 overflow-y-auto pr-1">
|
||||
{step === "paste" && (
|
||||
<div className="space-y-4">
|
||||
<div className="space-y-2">
|
||||
<label className="text-xs font-semibold uppercase tracking-wide text-muted-foreground">
|
||||
Job URL (optional)
|
||||
</label>
|
||||
<div className="flex gap-2">
|
||||
<Input
|
||||
value={fetchUrl}
|
||||
onChange={(event) => setFetchUrl(event.target.value)}
|
||||
placeholder="https://example.com/job-posting"
|
||||
className="flex-1"
|
||||
onKeyDown={(event) => {
|
||||
if (event.key === "Enter" && canFetch) {
|
||||
event.preventDefault();
|
||||
handleFetch();
|
||||
}
|
||||
}}
|
||||
/>
|
||||
<Button
|
||||
type="button"
|
||||
variant="secondary"
|
||||
disabled={isFetching}
|
||||
className="gap-2 shrink-0"
|
||||
onClick={async () => {
|
||||
if (fetchUrl.trim()) {
|
||||
handleFetch();
|
||||
} else {
|
||||
try {
|
||||
const text = await navigator.clipboard.readText();
|
||||
if (text) setFetchUrl(text.trim());
|
||||
} catch {
|
||||
// Clipboard access denied
|
||||
}
|
||||
}
|
||||
}}
|
||||
>
|
||||
{isFetching ? (
|
||||
<Loader2 className="h-4 w-4 animate-spin" />
|
||||
) : fetchUrl.trim() ? (
|
||||
<Link className="h-4 w-4" />
|
||||
) : (
|
||||
<ClipboardPaste className="h-4 w-4" />
|
||||
)}
|
||||
{isFetching ? "Fetching..." : fetchUrl.trim() ? "Fetch" : "Paste"}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="space-y-2">
|
||||
<label className="text-xs font-semibold uppercase tracking-wide text-muted-foreground">
|
||||
Job description
|
||||
@ -224,8 +318,8 @@ export const ManualImportSheet: React.FC<ManualImportSheetProps> = ({
|
||||
<Textarea
|
||||
value={rawDescription}
|
||||
onChange={(event) => setRawDescription(event.target.value)}
|
||||
placeholder="Paste the full job description here..."
|
||||
className="min-h-[220px] font-mono text-sm leading-relaxed"
|
||||
placeholder="Paste the full job description here, or enter a URL above to fetch it..."
|
||||
className="min-h-[200px] font-mono text-sm leading-relaxed"
|
||||
/>
|
||||
</div>
|
||||
|
||||
@ -236,12 +330,16 @@ export const ManualImportSheet: React.FC<ManualImportSheetProps> = ({
|
||||
)}
|
||||
|
||||
<Button
|
||||
onClick={handleAnalyze}
|
||||
disabled={!canAnalyze}
|
||||
onClick={fetchUrl.trim() ? handleFetch : handleAnalyze}
|
||||
disabled={isFetching || (!canFetch && !canAnalyze)}
|
||||
className="w-full h-10 gap-2"
|
||||
>
|
||||
<Sparkles className="h-4 w-4" />
|
||||
Analyze JD
|
||||
{isFetching ? (
|
||||
<Loader2 className="h-4 w-4 animate-spin" />
|
||||
) : (
|
||||
<Sparkles className="h-4 w-4" />
|
||||
)}
|
||||
{isFetching ? "Fetching..." : "Analyze JD"}
|
||||
</Button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
@ -16,6 +16,28 @@ describe.sequential('Manual jobs API routes', () => {
|
||||
await stopServer({ server, closeDb, tempDir });
|
||||
});
|
||||
|
||||
describe('POST /api/manual-jobs/fetch', () => {
|
||||
it('rejects invalid URLs', async () => {
|
||||
const res = await fetch(`${baseUrl}/api/manual-jobs/fetch`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ url: 'not-a-valid-url' }),
|
||||
});
|
||||
|
||||
expect(res.status).toBe(400);
|
||||
});
|
||||
|
||||
it('rejects empty payload', async () => {
|
||||
const res = await fetch(`${baseUrl}/api/manual-jobs/fetch`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({}),
|
||||
});
|
||||
|
||||
expect(res.status).toBe(400);
|
||||
});
|
||||
});
|
||||
|
||||
it('infers manual jobs and rejects empty payloads', async () => {
|
||||
const badRes = await fetch(`${baseUrl}/api/manual-jobs/infer`, {
|
||||
method: 'POST',
|
||||
|
||||
@ -1,16 +1,21 @@
|
||||
import { Router, Request, Response } from 'express';
|
||||
import { randomUUID } from 'crypto';
|
||||
import { z } from 'zod';
|
||||
import { JSDOM } from 'jsdom';
|
||||
import * as jobsRepo from '../../repositories/jobs.js';
|
||||
import { inferManualJobDetails } from '../../services/manualJob.js';
|
||||
import { scoreJobSuitability } from '../../services/scorer.js';
|
||||
import { getProfile } from '../../services/profile.js';
|
||||
import type { ApiResponse, ManualJobInferenceResponse } from '../../../shared/types.js';
|
||||
import type { ApiResponse, ManualJobInferenceResponse, ManualJobFetchResponse } from '../../../shared/types.js';
|
||||
|
||||
export const manualJobsRouter = Router();
|
||||
|
||||
const manualJobFetchSchema = z.object({
|
||||
url: z.string().trim().url().max(2000),
|
||||
});
|
||||
|
||||
const manualJobInferenceSchema = z.object({
|
||||
jobDescription: z.string().trim().min(1).max(40000),
|
||||
jobDescription: z.string().trim().min(1).max(60000),
|
||||
});
|
||||
|
||||
const manualJobImportSchema = z.object({
|
||||
@ -38,6 +43,110 @@ const cleanOptional = (value?: string | null) => {
|
||||
return trimmed.length > 0 ? trimmed : undefined;
|
||||
};
|
||||
|
||||
/**
|
||||
* POST /api/manual-jobs/fetch - Fetch and extract job content from a URL
|
||||
*/
|
||||
manualJobsRouter.post('/fetch', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const input = manualJobFetchSchema.parse(req.body ?? {});
|
||||
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), 15000);
|
||||
|
||||
const response = await fetch(input.url, {
|
||||
signal: controller.signal,
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
},
|
||||
});
|
||||
clearTimeout(timeout);
|
||||
|
||||
if (!response.ok) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: `Failed to fetch URL: ${response.status} ${response.statusText}`,
|
||||
});
|
||||
}
|
||||
|
||||
const html = await response.text();
|
||||
const dom = new JSDOM(html);
|
||||
const document = dom.window.document;
|
||||
|
||||
// Extract page title (often contains job title)
|
||||
const pageTitle = document.querySelector('title')?.textContent?.trim() || '';
|
||||
|
||||
// Extract meta description
|
||||
const metaDescription = document.querySelector('meta[name="description"]')?.getAttribute('content')?.trim() || '';
|
||||
|
||||
// Extract Open Graph data
|
||||
const ogTitle = document.querySelector('meta[property="og:title"]')?.getAttribute('content')?.trim() || '';
|
||||
const ogDescription = document.querySelector('meta[property="og:description"]')?.getAttribute('content')?.trim() || '';
|
||||
const ogSiteName = document.querySelector('meta[property="og:site-name"]')?.getAttribute('content')?.trim() || '';
|
||||
|
||||
// Remove non-content elements
|
||||
const elementsToRemove = document.querySelectorAll(
|
||||
'script, style, nav, header, footer, aside, iframe, noscript, ' +
|
||||
'[role="navigation"], [role="banner"], [role="contentinfo"], ' +
|
||||
'.nav, .navbar, .header, .footer, .sidebar, .menu, .cookie, .popup, .modal, .ad, .advertisement'
|
||||
);
|
||||
elementsToRemove.forEach((el) => el.remove());
|
||||
|
||||
// Try to find the main job content area
|
||||
const mainContent =
|
||||
document.querySelector(
|
||||
'main, [role="main"], article, ' +
|
||||
'.job-description, .job-details, .job-content, .vacancy-description, ' +
|
||||
'#job-description, #job-details, #job-content, ' +
|
||||
'[class*="job-desc"], [class*="jobDesc"], [class*="vacancy"], [class*="posting"]'
|
||||
) || document.body;
|
||||
|
||||
// Get text content
|
||||
let textContent = mainContent?.textContent || '';
|
||||
|
||||
// Clean up whitespace
|
||||
textContent = textContent
|
||||
.replace(/[\t ]+/g, ' ')
|
||||
.replace(/\n\s*\n/g, '\n\n')
|
||||
.replace(/\n{3,}/g, '\n\n')
|
||||
.trim();
|
||||
|
||||
// Build enriched content with extracted metadata
|
||||
let enrichedContent = '';
|
||||
if (pageTitle) enrichedContent += `Page Title: ${pageTitle}\n`;
|
||||
if (ogTitle && ogTitle !== pageTitle) enrichedContent += `Job Title: ${ogTitle}\n`;
|
||||
if (ogSiteName) enrichedContent += `Company/Site: ${ogSiteName}\n`;
|
||||
if (ogDescription) enrichedContent += `Summary: ${ogDescription}\n`;
|
||||
if (metaDescription && metaDescription !== ogDescription) enrichedContent += `Description: ${metaDescription}\n`;
|
||||
if (enrichedContent) enrichedContent += '\n---\n\n';
|
||||
enrichedContent += textContent;
|
||||
|
||||
// Limit to reasonable size
|
||||
if (enrichedContent.length > 50000) {
|
||||
enrichedContent = enrichedContent.substring(0, 50000);
|
||||
}
|
||||
|
||||
const result: ApiResponse<ManualJobFetchResponse> = {
|
||||
success: true,
|
||||
data: {
|
||||
content: enrichedContent,
|
||||
url: input.url,
|
||||
},
|
||||
};
|
||||
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
if (error instanceof z.ZodError) {
|
||||
return res.status(400).json({ success: false, error: error.message });
|
||||
}
|
||||
if (error instanceof Error && error.name === 'AbortError') {
|
||||
return res.status(408).json({ success: false, error: 'Request timed out' });
|
||||
}
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
res.status(500).json({ success: false, error: message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/manual-jobs/infer - Infer job details from a pasted description
|
||||
*/
|
||||
|
||||
@ -26,6 +26,7 @@ interface ManualJobApiResponse {
|
||||
disciplines: string;
|
||||
degreeRequired: string;
|
||||
starting: string;
|
||||
jobDescription: string;
|
||||
}
|
||||
|
||||
/** JSON schema for manual job extraction response */
|
||||
@ -47,8 +48,9 @@ const MANUAL_JOB_SCHEMA: JsonSchemaDefinition = {
|
||||
disciplines: { type: 'string', description: 'Required disciplines or fields' },
|
||||
degreeRequired: { type: 'string', description: 'Required degree or education' },
|
||||
starting: { type: 'string', description: 'Start date information' },
|
||||
jobDescription: { type: 'string', description: 'Clean text job description with responsibilities and requirements' },
|
||||
},
|
||||
required: ['title', 'employer', 'location', 'salary', 'deadline', 'jobUrl', 'applicationLink', 'jobType', 'jobLevel', 'jobFunction', 'disciplines', 'degreeRequired', 'starting'],
|
||||
required: ['title', 'employer', 'location', 'salary', 'deadline', 'jobUrl', 'applicationLink', 'jobType', 'jobLevel', 'jobFunction', 'disciplines', 'degreeRequired', 'starting', 'jobDescription'],
|
||||
additionalProperties: false,
|
||||
},
|
||||
};
|
||||
@ -84,26 +86,28 @@ export async function inferManualJobDetails(jobDescription: string): Promise<Man
|
||||
|
||||
function buildInferencePrompt(jd: string): string {
|
||||
return `
|
||||
You are extracting structured data from a job description.
|
||||
You are extracting structured data from a job posting.
|
||||
The input may be raw HTML from a job listing page or plain text - extract the relevant job information either way.
|
||||
Return JSON only with the keys listed below. Use empty string if unknown.
|
||||
Do not guess or invent data.
|
||||
Do not guess or invent data. Ignore navigation, headers, footers, and other non-job content.
|
||||
|
||||
Keys:
|
||||
- title
|
||||
- employer
|
||||
- location
|
||||
- salary
|
||||
- deadline
|
||||
- jobUrl (the listing URL, if present)
|
||||
- title (job title)
|
||||
- employer (company name)
|
||||
- location (job location)
|
||||
- salary (salary/compensation info)
|
||||
- deadline (application deadline)
|
||||
- jobUrl (the listing URL, if present in the content)
|
||||
- applicationLink (the apply URL, if present)
|
||||
- jobType
|
||||
- jobLevel
|
||||
- jobFunction
|
||||
- disciplines
|
||||
- degreeRequired
|
||||
- starting
|
||||
- jobType (full-time, part-time, contract, etc.)
|
||||
- jobLevel (entry, mid, senior, etc.)
|
||||
- jobFunction (engineering, marketing, etc.)
|
||||
- disciplines (required fields/disciplines)
|
||||
- degreeRequired (required education)
|
||||
- starting (start date)
|
||||
- jobDescription (clean plain text of the job description including responsibilities and requirements - extract this from the HTML/content)
|
||||
|
||||
JOB DESCRIPTION:
|
||||
JOB POSTING CONTENT:
|
||||
${jd}
|
||||
|
||||
OUTPUT FORMAT (JSON ONLY):
|
||||
@ -120,7 +124,8 @@ OUTPUT FORMAT (JSON ONLY):
|
||||
"jobFunction": "",
|
||||
"disciplines": "",
|
||||
"degreeRequired": "",
|
||||
"starting": ""
|
||||
"starting": "",
|
||||
"jobDescription": ""
|
||||
}
|
||||
`.trim();
|
||||
}
|
||||
@ -142,6 +147,7 @@ function normalizeDraft(parsed: ManualJobApiResponse): ManualJobDraft {
|
||||
if (parsed.disciplines?.trim()) out.disciplines = parsed.disciplines.trim();
|
||||
if (parsed.degreeRequired?.trim()) out.degreeRequired = parsed.degreeRequired.trim();
|
||||
if (parsed.starting?.trim()) out.starting = parsed.starting.trim();
|
||||
if (parsed.jobDescription?.trim()) out.jobDescription = parsed.jobDescription.trim();
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
@ -154,6 +154,11 @@ export interface ManualJobInferenceResponse {
|
||||
warning?: string | null;
|
||||
}
|
||||
|
||||
export interface ManualJobFetchResponse {
|
||||
content: string;
|
||||
url: string;
|
||||
}
|
||||
|
||||
export interface UpdateJobInput {
|
||||
status?: JobStatus;
|
||||
jobDescription?: string;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user