job_spy implementation
This commit is contained in:
parent
4a00b3b900
commit
cefb75a9ec
12
.env.example
12
.env.example
@ -23,3 +23,15 @@ NOTION_DATABASE_ID=
|
||||
|
||||
# Optional: Webhook secret for n8n automation
|
||||
WEBHOOK_SECRET=
|
||||
|
||||
# =============================================================================
|
||||
# JobSpy (Indeed/LinkedIn scraping) - optional
|
||||
# =============================================================================
|
||||
# These control the Python JobSpy scraper used by the pipeline.
|
||||
JOBSPY_SITES=indeed,linkedin
|
||||
JOBSPY_SEARCH_TERM=web developer
|
||||
JOBSPY_LOCATION=UK
|
||||
JOBSPY_RESULTS_WANTED=200
|
||||
JOBSPY_HOURS_OLD=72
|
||||
JOBSPY_COUNTRY_INDEED=UK
|
||||
JOBSPY_LINKEDIN_FETCH_DESCRIPTION=1
|
||||
|
||||
@ -21,8 +21,8 @@ RUN apt-get update && apt-get install -y \
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Install Playwright and Firefox only
|
||||
RUN pip3 install --no-cache-dir --break-system-packages playwright && \
|
||||
# Install Playwright and Firefox only (plus JobSpy for Indeed/LinkedIn scraping)
|
||||
RUN pip3 install --no-cache-dir --break-system-packages playwright python-jobspy && \
|
||||
npx playwright install firefox
|
||||
|
||||
# Copy package files first for better caching
|
||||
@ -43,6 +43,7 @@ RUN npx camoufox fetch
|
||||
WORKDIR /app
|
||||
COPY orchestrator ./orchestrator
|
||||
COPY job-extractor ./job-extractor
|
||||
COPY jobspy-extractor ./jobspy-extractor
|
||||
COPY resume-generator ./resume-generator
|
||||
|
||||
# Build the orchestrator (client + server)
|
||||
|
||||
@ -33,6 +33,15 @@ services:
|
||||
- PIPELINE_TOP_N=${PIPELINE_TOP_N:-10}
|
||||
- PIPELINE_MIN_SCORE=${PIPELINE_MIN_SCORE:-50}
|
||||
|
||||
# JobSpy (Indeed/LinkedIn scraping) - optional
|
||||
- JOBSPY_SITES=${JOBSPY_SITES:-indeed,linkedin}
|
||||
- JOBSPY_SEARCH_TERM=${JOBSPY_SEARCH_TERM:-web developer}
|
||||
- JOBSPY_LOCATION=${JOBSPY_LOCATION:-UK}
|
||||
- JOBSPY_RESULTS_WANTED=${JOBSPY_RESULTS_WANTED:-200}
|
||||
- JOBSPY_HOURS_OLD=${JOBSPY_HOURS_OLD:-72}
|
||||
- JOBSPY_COUNTRY_INDEED=${JOBSPY_COUNTRY_INDEED:-UK}
|
||||
- JOBSPY_LINKEDIN_FETCH_DESCRIPTION=${JOBSPY_LINKEDIN_FETCH_DESCRIPTION:-1}
|
||||
|
||||
# Optional: Notion integration
|
||||
- NOTION_API_KEY=${NOTION_API_KEY:-}
|
||||
- NOTION_DATABASE_ID=${NOTION_DATABASE_ID:-}
|
||||
|
||||
1
jobspy-extractor/requirements.txt
Normal file
1
jobspy-extractor/requirements.txt
Normal file
@ -0,0 +1 @@
|
||||
python-jobspy
|
||||
77
jobspy-extractor/scrape_jobs.py
Normal file
77
jobspy-extractor/scrape_jobs.py
Normal file
@ -0,0 +1,77 @@
|
||||
import csv
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from jobspy import scrape_jobs
|
||||
|
||||
|
||||
def _env_str(name: str, default: str) -> str:
|
||||
value = os.getenv(name)
|
||||
return value if value and value.strip() else default
|
||||
|
||||
|
||||
def _env_int(name: str, default: int) -> int:
|
||||
value = os.getenv(name)
|
||||
if value is None or value.strip() == "":
|
||||
return default
|
||||
try:
|
||||
return int(value)
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
|
||||
def _env_bool(name: str, default: bool) -> bool:
|
||||
value = os.getenv(name)
|
||||
if value is None or value.strip() == "":
|
||||
return default
|
||||
return value.strip().lower() in ("1", "true", "yes", "y", "on")
|
||||
|
||||
|
||||
def _parse_sites(raw: str) -> list[str]:
|
||||
return [s.strip() for s in raw.split(",") if s.strip()]
|
||||
|
||||
|
||||
def main() -> int:
|
||||
sites = _parse_sites(_env_str("JOBSPY_SITES", "indeed,linkedin"))
|
||||
search_term = _env_str("JOBSPY_SEARCH_TERM", "web developer")
|
||||
location = _env_str("JOBSPY_LOCATION", "UK")
|
||||
results_wanted = _env_int("JOBSPY_RESULTS_WANTED", 200)
|
||||
hours_old = _env_int("JOBSPY_HOURS_OLD", 72)
|
||||
country_indeed = _env_str("JOBSPY_COUNTRY_INDEED", "UK")
|
||||
linkedin_fetch_description = _env_bool("JOBSPY_LINKEDIN_FETCH_DESCRIPTION", True)
|
||||
|
||||
output_csv = Path(_env_str("JOBSPY_OUTPUT_CSV", "jobs.csv"))
|
||||
output_json = Path(_env_str("JOBSPY_OUTPUT_JSON", str(output_csv.with_suffix(".json"))))
|
||||
|
||||
output_csv.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_json.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
jobs = scrape_jobs(
|
||||
site_name=sites,
|
||||
search_term=search_term,
|
||||
location=location,
|
||||
results_wanted=results_wanted,
|
||||
hours_old=hours_old,
|
||||
country_indeed=country_indeed,
|
||||
linkedin_fetch_description=linkedin_fetch_description,
|
||||
)
|
||||
|
||||
print(f"Found {len(jobs)} jobs")
|
||||
|
||||
jobs.to_csv(
|
||||
output_csv,
|
||||
quoting=csv.QUOTE_NONNUMERIC,
|
||||
escapechar="\\",
|
||||
index=False,
|
||||
)
|
||||
|
||||
jobs.to_json(output_json, orient="records", force_ascii=False)
|
||||
|
||||
print(f"Wrote CSV: {output_csv}")
|
||||
print(f"Wrote JSON: {output_json}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
|
||||
@ -19,3 +19,14 @@ PIPELINE_MIN_SCORE=50
|
||||
# RXResume credentials (for PDF generation)
|
||||
RXRESUME_EMAIL=
|
||||
RXRESUME_PASSWORD=
|
||||
|
||||
# =============================================================================
|
||||
# JobSpy (Indeed/LinkedIn scraping) - optional
|
||||
# =============================================================================
|
||||
JOBSPY_SITES=indeed,linkedin
|
||||
JOBSPY_SEARCH_TERM=web developer
|
||||
JOBSPY_LOCATION=UK
|
||||
JOBSPY_RESULTS_WANTED=200
|
||||
JOBSPY_HOURS_OLD=72
|
||||
JOBSPY_COUNTRY_INDEED=UK
|
||||
JOBSPY_LINKEDIN_FETCH_DESCRIPTION=1
|
||||
|
||||
211
orchestrator/package-lock.json
generated
211
orchestrator/package-lock.json
generated
@ -9,6 +9,7 @@
|
||||
"version": "1.0.0",
|
||||
"dependencies": {
|
||||
"@radix-ui/react-alert-dialog": "^1.1.15",
|
||||
"@radix-ui/react-dropdown-menu": "^2.1.15",
|
||||
"@radix-ui/react-progress": "^1.1.8",
|
||||
"@radix-ui/react-separator": "^1.1.8",
|
||||
"@radix-ui/react-slot": "^1.2.4",
|
||||
@ -1229,6 +1230,40 @@
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@floating-ui/core": {
|
||||
"version": "1.7.3",
|
||||
"resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.7.3.tgz",
|
||||
"integrity": "sha512-sGnvb5dmrJaKEZ+LDIpguvdX3bDlEllmv4/ClQ9awcmCZrlx5jQyyMWFM5kBI+EyNOCDDiKk8il0zeuX3Zlg/w==",
|
||||
"dependencies": {
|
||||
"@floating-ui/utils": "^0.2.10"
|
||||
}
|
||||
},
|
||||
"node_modules/@floating-ui/dom": {
|
||||
"version": "1.7.4",
|
||||
"resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.7.4.tgz",
|
||||
"integrity": "sha512-OOchDgh4F2CchOX94cRVqhvy7b3AFb+/rQXyswmzmGakRfkMgoWVjfnLWkRirfLEfuD4ysVW16eXzwt3jHIzKA==",
|
||||
"dependencies": {
|
||||
"@floating-ui/core": "^1.7.3",
|
||||
"@floating-ui/utils": "^0.2.10"
|
||||
}
|
||||
},
|
||||
"node_modules/@floating-ui/react-dom": {
|
||||
"version": "2.1.6",
|
||||
"resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.1.6.tgz",
|
||||
"integrity": "sha512-4JX6rEatQEvlmgU80wZyq9RT96HZJa88q8hp0pBd+LrczeDI4o6uA2M+uvxngVHo4Ihr8uibXxH6+70zhAFrVw==",
|
||||
"dependencies": {
|
||||
"@floating-ui/dom": "^1.7.4"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": ">=16.8.0",
|
||||
"react-dom": ">=16.8.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@floating-ui/utils": {
|
||||
"version": "0.2.10",
|
||||
"resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.10.tgz",
|
||||
"integrity": "sha512-aGTxbpbg8/b5JfU1HXSrbH3wXZuLPJcNEcZQFMxLs3oSzgtVu6nFPkbbGGUvBcUjKV2YyB9Wxxabo+HEH9tcRQ=="
|
||||
},
|
||||
"node_modules/@jridgewell/gen-mapping": {
|
||||
"version": "0.3.13",
|
||||
"resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz",
|
||||
@ -1335,6 +1370,28 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@radix-ui/react-arrow": {
|
||||
"version": "1.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz",
|
||||
"integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==",
|
||||
"dependencies": {
|
||||
"@radix-ui/react-primitive": "2.1.3"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@types/react": "*",
|
||||
"@types/react-dom": "*",
|
||||
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@types/react": {
|
||||
"optional": true
|
||||
},
|
||||
"@types/react-dom": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@radix-ui/react-collection": {
|
||||
"version": "1.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.7.tgz",
|
||||
@ -1497,6 +1554,34 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@radix-ui/react-dropdown-menu": {
|
||||
"version": "2.1.16",
|
||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-dropdown-menu/-/react-dropdown-menu-2.1.16.tgz",
|
||||
"integrity": "sha512-1PLGQEynI/3OX/ftV54COn+3Sud/Mn8vALg2rWnBLnRaGtJDduNW/22XjlGgPdpcIbiQxjKtb7BkcjP00nqfJw==",
|
||||
"dependencies": {
|
||||
"@radix-ui/primitive": "1.1.3",
|
||||
"@radix-ui/react-compose-refs": "1.1.2",
|
||||
"@radix-ui/react-context": "1.1.2",
|
||||
"@radix-ui/react-id": "1.1.1",
|
||||
"@radix-ui/react-menu": "2.1.16",
|
||||
"@radix-ui/react-primitive": "2.1.3",
|
||||
"@radix-ui/react-use-controllable-state": "1.2.2"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@types/react": "*",
|
||||
"@types/react-dom": "*",
|
||||
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@types/react": {
|
||||
"optional": true
|
||||
},
|
||||
"@types/react-dom": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@radix-ui/react-focus-guards": {
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.3.tgz",
|
||||
@ -1552,6 +1637,93 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@radix-ui/react-menu": {
|
||||
"version": "2.1.16",
|
||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-menu/-/react-menu-2.1.16.tgz",
|
||||
"integrity": "sha512-72F2T+PLlphrqLcAotYPp0uJMr5SjP5SL01wfEspJbru5Zs5vQaSHb4VB3ZMJPimgHHCHG7gMOeOB9H3Hdmtxg==",
|
||||
"dependencies": {
|
||||
"@radix-ui/primitive": "1.1.3",
|
||||
"@radix-ui/react-collection": "1.1.7",
|
||||
"@radix-ui/react-compose-refs": "1.1.2",
|
||||
"@radix-ui/react-context": "1.1.2",
|
||||
"@radix-ui/react-direction": "1.1.1",
|
||||
"@radix-ui/react-dismissable-layer": "1.1.11",
|
||||
"@radix-ui/react-focus-guards": "1.1.3",
|
||||
"@radix-ui/react-focus-scope": "1.1.7",
|
||||
"@radix-ui/react-id": "1.1.1",
|
||||
"@radix-ui/react-popper": "1.2.8",
|
||||
"@radix-ui/react-portal": "1.1.9",
|
||||
"@radix-ui/react-presence": "1.1.5",
|
||||
"@radix-ui/react-primitive": "2.1.3",
|
||||
"@radix-ui/react-roving-focus": "1.1.11",
|
||||
"@radix-ui/react-slot": "1.2.3",
|
||||
"@radix-ui/react-use-callback-ref": "1.1.1",
|
||||
"aria-hidden": "^1.2.4",
|
||||
"react-remove-scroll": "^2.6.3"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@types/react": "*",
|
||||
"@types/react-dom": "*",
|
||||
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@types/react": {
|
||||
"optional": true
|
||||
},
|
||||
"@types/react-dom": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@radix-ui/react-menu/node_modules/@radix-ui/react-slot": {
|
||||
"version": "1.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
|
||||
"integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
|
||||
"dependencies": {
|
||||
"@radix-ui/react-compose-refs": "1.1.2"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@types/react": "*",
|
||||
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@types/react": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@radix-ui/react-popper": {
|
||||
"version": "1.2.8",
|
||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz",
|
||||
"integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==",
|
||||
"dependencies": {
|
||||
"@floating-ui/react-dom": "^2.0.0",
|
||||
"@radix-ui/react-arrow": "1.1.7",
|
||||
"@radix-ui/react-compose-refs": "1.1.2",
|
||||
"@radix-ui/react-context": "1.1.2",
|
||||
"@radix-ui/react-primitive": "2.1.3",
|
||||
"@radix-ui/react-use-callback-ref": "1.1.1",
|
||||
"@radix-ui/react-use-layout-effect": "1.1.1",
|
||||
"@radix-ui/react-use-rect": "1.1.1",
|
||||
"@radix-ui/react-use-size": "1.1.1",
|
||||
"@radix-ui/rect": "1.1.1"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@types/react": "*",
|
||||
"@types/react-dom": "*",
|
||||
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@types/react": {
|
||||
"optional": true
|
||||
},
|
||||
"@types/react-dom": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@radix-ui/react-portal": {
|
||||
"version": "1.1.9",
|
||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
|
||||
@ -1896,6 +2068,45 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@radix-ui/react-use-rect": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-use-rect/-/react-use-rect-1.1.1.tgz",
|
||||
"integrity": "sha512-QTYuDesS0VtuHNNvMh+CjlKJ4LJickCMUAqjlE3+j8w+RlRpwyX3apEQKGFzbZGdo7XNG1tXa+bQqIE7HIXT2w==",
|
||||
"dependencies": {
|
||||
"@radix-ui/rect": "1.1.1"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@types/react": "*",
|
||||
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@types/react": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@radix-ui/react-use-size": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-use-size/-/react-use-size-1.1.1.tgz",
|
||||
"integrity": "sha512-ewrXRDTAqAXlkl6t/fkXWNAhFX9I+CkKlw6zjEwk86RSPKwZr3xpBRso655aqYafwtnbpHLj6toFzmd6xdVptQ==",
|
||||
"dependencies": {
|
||||
"@radix-ui/react-use-layout-effect": "1.1.1"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@types/react": "*",
|
||||
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@types/react": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@radix-ui/rect": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/@radix-ui/rect/-/rect-1.1.1.tgz",
|
||||
"integrity": "sha512-HPwpGIzkl28mWyZqG52jiqDJ12waP11Pa1lGoiyUkIEuMLBP0oeK/C89esbXrxsky5we7dfd8U58nm0SgAWpVw=="
|
||||
},
|
||||
"node_modules/@rolldown/pluginutils": {
|
||||
"version": "1.0.0-beta.27",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-beta.27.tgz",
|
||||
|
||||
@ -18,6 +18,7 @@
|
||||
"pipeline:run": "tsx src/server/pipeline/run.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@radix-ui/react-dropdown-menu": "^2.1.15",
|
||||
"@radix-ui/react-alert-dialog": "^1.1.15",
|
||||
"@radix-ui/react-progress": "^1.1.8",
|
||||
"@radix-ui/react-separator": "^1.1.8",
|
||||
|
||||
@ -6,10 +6,13 @@ import React, { useCallback, useEffect, useState } from "react";
|
||||
import { toast } from "sonner";
|
||||
|
||||
import { Toaster } from "@/components/ui/sonner";
|
||||
import type { Job, JobStatus } from "../shared/types";
|
||||
import type { Job, JobSource, JobStatus } from "../shared/types";
|
||||
import { Header, JobList, PipelineProgress, Stats } from "./components";
|
||||
import * as api from "./api";
|
||||
|
||||
const DEFAULT_PIPELINE_SOURCES: JobSource[] = ["gradcracker", "indeed", "linkedin"];
|
||||
const PIPELINE_SOURCES_STORAGE_KEY = "jobops.pipeline.sources";
|
||||
|
||||
export const App: React.FC = () => {
|
||||
const [jobs, setJobs] = useState<Job[]>([]);
|
||||
const [stats, setStats] = useState<Record<JobStatus, number>>({
|
||||
@ -24,6 +27,27 @@ export const App: React.FC = () => {
|
||||
const [isPipelineRunning, setIsPipelineRunning] = useState(false);
|
||||
const [processingJobId, setProcessingJobId] = useState<string | null>(null);
|
||||
const [isProcessingAll, setIsProcessingAll] = useState(false);
|
||||
const [pipelineSources, setPipelineSources] = useState<JobSource[]>(() => {
|
||||
try {
|
||||
const raw = localStorage.getItem(PIPELINE_SOURCES_STORAGE_KEY);
|
||||
if (!raw) return DEFAULT_PIPELINE_SOURCES;
|
||||
const parsed = JSON.parse(raw) as unknown;
|
||||
const allowed: JobSource[] = ["gradcracker", "indeed", "linkedin"];
|
||||
if (!Array.isArray(parsed)) return DEFAULT_PIPELINE_SOURCES;
|
||||
const next = parsed.filter((value): value is JobSource => allowed.includes(value));
|
||||
return next.length > 0 ? next : DEFAULT_PIPELINE_SOURCES;
|
||||
} catch {
|
||||
return DEFAULT_PIPELINE_SOURCES;
|
||||
}
|
||||
});
|
||||
|
||||
useEffect(() => {
|
||||
try {
|
||||
localStorage.setItem(PIPELINE_SOURCES_STORAGE_KEY, JSON.stringify(pipelineSources));
|
||||
} catch {
|
||||
// Ignore localStorage errors
|
||||
}
|
||||
}, [pipelineSources]);
|
||||
|
||||
const loadJobs = useCallback(async () => {
|
||||
try {
|
||||
@ -63,8 +87,10 @@ export const App: React.FC = () => {
|
||||
const handleRunPipeline = async () => {
|
||||
try {
|
||||
setIsPipelineRunning(true);
|
||||
await api.runPipeline();
|
||||
toast.message("Pipeline started", { description: "This may take a few minutes." });
|
||||
await api.runPipeline({ sources: pipelineSources });
|
||||
toast.message("Pipeline started", {
|
||||
description: `Sources: ${pipelineSources.join(", ")}. This may take a few minutes.`,
|
||||
});
|
||||
|
||||
const pollInterval = setInterval(async () => {
|
||||
try {
|
||||
@ -170,6 +196,8 @@ export const App: React.FC = () => {
|
||||
onClearDatabase={handleClearDatabase}
|
||||
isPipelineRunning={isPipelineRunning}
|
||||
isLoading={isLoading}
|
||||
pipelineSources={pipelineSources}
|
||||
onPipelineSourcesChange={setPipelineSources}
|
||||
/>
|
||||
|
||||
<main className="container mx-auto max-w-7xl space-y-6 px-4 py-6 pb-12">
|
||||
@ -190,4 +218,3 @@ export const App: React.FC = () => {
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
|
||||
@ -7,6 +7,7 @@ import type {
|
||||
ApiResponse,
|
||||
JobsListResponse,
|
||||
PipelineStatusResponse,
|
||||
JobSource,
|
||||
PipelineRun
|
||||
} from '../../shared/types';
|
||||
|
||||
@ -83,6 +84,7 @@ export async function getPipelineRuns(): Promise<PipelineRun[]> {
|
||||
export async function runPipeline(config?: {
|
||||
topN?: number;
|
||||
minSuitabilityScore?: number;
|
||||
sources?: JobSource[];
|
||||
}): Promise<{ message: string }> {
|
||||
return fetchApi<{ message: string }>('/pipeline/run', {
|
||||
method: 'POST',
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
*/
|
||||
|
||||
import React from "react";
|
||||
import { Loader2, Play, RefreshCcw, Rocket, Trash2 } from "lucide-react";
|
||||
import { ChevronDown, Loader2, Play, RefreshCcw, Rocket, Trash2 } from "lucide-react";
|
||||
|
||||
import { Button } from "@/components/ui/button";
|
||||
import {
|
||||
@ -17,6 +17,16 @@ import {
|
||||
AlertDialogTitle,
|
||||
AlertDialogTrigger,
|
||||
} from "@/components/ui/alert-dialog";
|
||||
import {
|
||||
DropdownMenu,
|
||||
DropdownMenuCheckboxItem,
|
||||
DropdownMenuContent,
|
||||
DropdownMenuItem,
|
||||
DropdownMenuLabel,
|
||||
DropdownMenuSeparator,
|
||||
DropdownMenuTrigger,
|
||||
} from "@/components/ui/dropdown-menu";
|
||||
import type { JobSource } from "../../shared/types";
|
||||
|
||||
interface HeaderProps {
|
||||
onRunPipeline: () => void;
|
||||
@ -24,6 +34,8 @@ interface HeaderProps {
|
||||
onClearDatabase: () => void;
|
||||
isPipelineRunning: boolean;
|
||||
isLoading: boolean;
|
||||
pipelineSources: JobSource[];
|
||||
onPipelineSourcesChange: (sources: JobSource[]) => void;
|
||||
}
|
||||
|
||||
export const Header: React.FC<HeaderProps> = ({
|
||||
@ -32,7 +44,26 @@ export const Header: React.FC<HeaderProps> = ({
|
||||
onClearDatabase,
|
||||
isPipelineRunning,
|
||||
isLoading,
|
||||
pipelineSources,
|
||||
onPipelineSourcesChange,
|
||||
}) => {
|
||||
const sourceLabel: Record<JobSource, string> = {
|
||||
gradcracker: "Gradcracker",
|
||||
indeed: "Indeed",
|
||||
linkedin: "LinkedIn",
|
||||
};
|
||||
|
||||
const orderedSources: JobSource[] = ["gradcracker", "indeed", "linkedin"];
|
||||
|
||||
const toggleSource = (source: JobSource, checked: boolean) => {
|
||||
const next = checked
|
||||
? Array.from(new Set([...pipelineSources, source]))
|
||||
: pipelineSources.filter((s) => s !== source);
|
||||
|
||||
if (next.length === 0) return;
|
||||
onPipelineSourcesChange(next);
|
||||
};
|
||||
|
||||
return (
|
||||
<header className="sticky top-0 z-40 border-b bg-background/80 backdrop-blur supports-[backdrop-filter]:bg-background/60">
|
||||
<div className="container mx-auto flex max-w-7xl items-center justify-between gap-4 px-4 py-4">
|
||||
@ -81,7 +112,13 @@ export const Header: React.FC<HeaderProps> = ({
|
||||
<span className="hidden sm:inline">Refresh</span>
|
||||
</Button>
|
||||
|
||||
<Button size="sm" onClick={onRunPipeline} disabled={isPipelineRunning}>
|
||||
<div className="flex items-center">
|
||||
<Button
|
||||
size="sm"
|
||||
onClick={onRunPipeline}
|
||||
disabled={isPipelineRunning}
|
||||
className="rounded-r-none"
|
||||
>
|
||||
{isPipelineRunning ? (
|
||||
<>
|
||||
<Loader2 className="h-4 w-4 animate-spin" />
|
||||
@ -94,6 +131,43 @@ export const Header: React.FC<HeaderProps> = ({
|
||||
</>
|
||||
)}
|
||||
</Button>
|
||||
|
||||
<DropdownMenu>
|
||||
<DropdownMenuTrigger asChild>
|
||||
<Button
|
||||
size="sm"
|
||||
disabled={isPipelineRunning}
|
||||
className="rounded-l-none border-l border-primary-foreground/20 px-2"
|
||||
aria-label="Select pipeline sources"
|
||||
>
|
||||
<ChevronDown className="h-4 w-4" />
|
||||
</Button>
|
||||
</DropdownMenuTrigger>
|
||||
<DropdownMenuContent align="end" className="w-56">
|
||||
<DropdownMenuLabel>Sources</DropdownMenuLabel>
|
||||
<DropdownMenuSeparator />
|
||||
{orderedSources.map((source) => (
|
||||
<DropdownMenuCheckboxItem
|
||||
key={source}
|
||||
checked={pipelineSources.includes(source)}
|
||||
onCheckedChange={(checked) => toggleSource(source, Boolean(checked))}
|
||||
>
|
||||
{sourceLabel[source]}
|
||||
</DropdownMenuCheckboxItem>
|
||||
))}
|
||||
<DropdownMenuSeparator />
|
||||
<DropdownMenuItem onSelect={() => onPipelineSourcesChange(orderedSources)}>
|
||||
All sources
|
||||
</DropdownMenuItem>
|
||||
<DropdownMenuItem onSelect={() => onPipelineSourcesChange(["gradcracker"])}>
|
||||
Gradcracker only
|
||||
</DropdownMenuItem>
|
||||
<DropdownMenuItem onSelect={() => onPipelineSourcesChange(["indeed", "linkedin"])}>
|
||||
Indeed + LinkedIn only
|
||||
</DropdownMenuItem>
|
||||
</DropdownMenuContent>
|
||||
</DropdownMenu>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
@ -55,6 +55,8 @@ export const JobCard: React.FC<JobCardProps> = ({
|
||||
}) => {
|
||||
const sourceLabel: Record<Job["source"], string> = {
|
||||
gradcracker: "Gradcracker",
|
||||
indeed: "Indeed",
|
||||
linkedin: "LinkedIn",
|
||||
};
|
||||
|
||||
const hasPdf = !!job.pdfPath;
|
||||
|
||||
193
orchestrator/src/components/ui/dropdown-menu.tsx
Normal file
193
orchestrator/src/components/ui/dropdown-menu.tsx
Normal file
@ -0,0 +1,193 @@
|
||||
import * as React from "react"
|
||||
import * as DropdownMenuPrimitive from "@radix-ui/react-dropdown-menu"
|
||||
import { Check, ChevronRight, Circle } from "lucide-react"
|
||||
|
||||
import { cn } from "@/lib/utils"
|
||||
|
||||
const DropdownMenu = DropdownMenuPrimitive.Root
|
||||
|
||||
const DropdownMenuTrigger = DropdownMenuPrimitive.Trigger
|
||||
|
||||
const DropdownMenuGroup = DropdownMenuPrimitive.Group
|
||||
|
||||
const DropdownMenuPortal = DropdownMenuPrimitive.Portal
|
||||
|
||||
const DropdownMenuSub = DropdownMenuPrimitive.Sub
|
||||
|
||||
const DropdownMenuRadioGroup = DropdownMenuPrimitive.RadioGroup
|
||||
|
||||
const DropdownMenuSubTrigger = React.forwardRef<
|
||||
React.ElementRef<typeof DropdownMenuPrimitive.SubTrigger>,
|
||||
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.SubTrigger> & {
|
||||
inset?: boolean
|
||||
}
|
||||
>(({ className, inset, children, ...props }, ref) => (
|
||||
<DropdownMenuPrimitive.SubTrigger
|
||||
ref={ref}
|
||||
className={cn(
|
||||
"flex cursor-default select-none items-center rounded-sm px-2 py-1.5 text-sm outline-none focus:bg-accent data-[state=open]:bg-accent",
|
||||
inset && "pl-8",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{children}
|
||||
<ChevronRight className="ml-auto h-4 w-4" />
|
||||
</DropdownMenuPrimitive.SubTrigger>
|
||||
))
|
||||
DropdownMenuSubTrigger.displayName = DropdownMenuPrimitive.SubTrigger.displayName
|
||||
|
||||
const DropdownMenuSubContent = React.forwardRef<
|
||||
React.ElementRef<typeof DropdownMenuPrimitive.SubContent>,
|
||||
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.SubContent>
|
||||
>(({ className, ...props }, ref) => (
|
||||
<DropdownMenuPrimitive.SubContent
|
||||
ref={ref}
|
||||
className={cn(
|
||||
"z-50 min-w-[8rem] overflow-hidden rounded-md border bg-popover p-1 text-popover-foreground shadow-md data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
))
|
||||
DropdownMenuSubContent.displayName = DropdownMenuPrimitive.SubContent.displayName
|
||||
|
||||
const DropdownMenuContent = React.forwardRef<
|
||||
React.ElementRef<typeof DropdownMenuPrimitive.Content>,
|
||||
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Content>
|
||||
>(({ className, sideOffset = 4, ...props }, ref) => (
|
||||
<DropdownMenuPrimitive.Portal>
|
||||
<DropdownMenuPrimitive.Content
|
||||
ref={ref}
|
||||
sideOffset={sideOffset}
|
||||
className={cn(
|
||||
"z-50 min-w-[8rem] overflow-hidden rounded-md border bg-popover p-1 text-popover-foreground shadow-md data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
</DropdownMenuPrimitive.Portal>
|
||||
))
|
||||
DropdownMenuContent.displayName = DropdownMenuPrimitive.Content.displayName
|
||||
|
||||
const DropdownMenuItem = React.forwardRef<
|
||||
React.ElementRef<typeof DropdownMenuPrimitive.Item>,
|
||||
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Item> & {
|
||||
inset?: boolean
|
||||
}
|
||||
>(({ className, inset, ...props }, ref) => (
|
||||
<DropdownMenuPrimitive.Item
|
||||
ref={ref}
|
||||
className={cn(
|
||||
"relative flex cursor-default select-none items-center rounded-sm px-2 py-1.5 text-sm outline-none transition-colors focus:bg-accent focus:text-accent-foreground data-[disabled]:pointer-events-none data-[disabled]:opacity-50",
|
||||
inset && "pl-8",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
))
|
||||
DropdownMenuItem.displayName = DropdownMenuPrimitive.Item.displayName
|
||||
|
||||
const DropdownMenuCheckboxItem = React.forwardRef<
|
||||
React.ElementRef<typeof DropdownMenuPrimitive.CheckboxItem>,
|
||||
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.CheckboxItem>
|
||||
>(({ className, children, checked, ...props }, ref) => (
|
||||
<DropdownMenuPrimitive.CheckboxItem
|
||||
ref={ref}
|
||||
className={cn(
|
||||
"relative flex cursor-default select-none items-center rounded-sm py-1.5 pl-8 pr-2 text-sm outline-none transition-colors focus:bg-accent focus:text-accent-foreground data-[disabled]:pointer-events-none data-[disabled]:opacity-50",
|
||||
className
|
||||
)}
|
||||
checked={checked}
|
||||
{...props}
|
||||
>
|
||||
<span className="absolute left-2 flex h-3.5 w-3.5 items-center justify-center">
|
||||
<DropdownMenuPrimitive.ItemIndicator>
|
||||
<Check className="h-4 w-4" />
|
||||
</DropdownMenuPrimitive.ItemIndicator>
|
||||
</span>
|
||||
{children}
|
||||
</DropdownMenuPrimitive.CheckboxItem>
|
||||
))
|
||||
DropdownMenuCheckboxItem.displayName =
|
||||
DropdownMenuPrimitive.CheckboxItem.displayName
|
||||
|
||||
const DropdownMenuRadioItem = React.forwardRef<
|
||||
React.ElementRef<typeof DropdownMenuPrimitive.RadioItem>,
|
||||
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.RadioItem>
|
||||
>(({ className, children, ...props }, ref) => (
|
||||
<DropdownMenuPrimitive.RadioItem
|
||||
ref={ref}
|
||||
className={cn(
|
||||
"relative flex cursor-default select-none items-center rounded-sm py-1.5 pl-8 pr-2 text-sm outline-none transition-colors focus:bg-accent focus:text-accent-foreground data-[disabled]:pointer-events-none data-[disabled]:opacity-50",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
<span className="absolute left-2 flex h-3.5 w-3.5 items-center justify-center">
|
||||
<DropdownMenuPrimitive.ItemIndicator>
|
||||
<Circle className="h-2 w-2 fill-current" />
|
||||
</DropdownMenuPrimitive.ItemIndicator>
|
||||
</span>
|
||||
{children}
|
||||
</DropdownMenuPrimitive.RadioItem>
|
||||
))
|
||||
DropdownMenuRadioItem.displayName = DropdownMenuPrimitive.RadioItem.displayName
|
||||
|
||||
const DropdownMenuLabel = React.forwardRef<
|
||||
React.ElementRef<typeof DropdownMenuPrimitive.Label>,
|
||||
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Label> & {
|
||||
inset?: boolean
|
||||
}
|
||||
>(({ className, inset, ...props }, ref) => (
|
||||
<DropdownMenuPrimitive.Label
|
||||
ref={ref}
|
||||
className={cn("px-2 py-1.5 text-sm font-semibold", inset && "pl-8", className)}
|
||||
{...props}
|
||||
/>
|
||||
))
|
||||
DropdownMenuLabel.displayName = DropdownMenuPrimitive.Label.displayName
|
||||
|
||||
const DropdownMenuSeparator = React.forwardRef<
|
||||
React.ElementRef<typeof DropdownMenuPrimitive.Separator>,
|
||||
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Separator>
|
||||
>(({ className, ...props }, ref) => (
|
||||
<DropdownMenuPrimitive.Separator
|
||||
ref={ref}
|
||||
className={cn("-mx-1 my-1 h-px bg-muted", className)}
|
||||
{...props}
|
||||
/>
|
||||
))
|
||||
DropdownMenuSeparator.displayName = DropdownMenuPrimitive.Separator.displayName
|
||||
|
||||
const DropdownMenuShortcut = ({
|
||||
className,
|
||||
...props
|
||||
}: React.HTMLAttributes<HTMLSpanElement>) => {
|
||||
return (
|
||||
<span
|
||||
className={cn("ml-auto text-xs tracking-widest opacity-60", className)}
|
||||
{...props}
|
||||
/>
|
||||
)
|
||||
}
|
||||
DropdownMenuShortcut.displayName = "DropdownMenuShortcut"
|
||||
|
||||
export {
|
||||
DropdownMenu,
|
||||
DropdownMenuTrigger,
|
||||
DropdownMenuContent,
|
||||
DropdownMenuItem,
|
||||
DropdownMenuCheckboxItem,
|
||||
DropdownMenuRadioItem,
|
||||
DropdownMenuLabel,
|
||||
DropdownMenuSeparator,
|
||||
DropdownMenuShortcut,
|
||||
DropdownMenuGroup,
|
||||
DropdownMenuPortal,
|
||||
DropdownMenuSub,
|
||||
DropdownMenuSubContent,
|
||||
DropdownMenuSubTrigger,
|
||||
DropdownMenuRadioGroup,
|
||||
}
|
||||
|
||||
@ -280,6 +280,7 @@ apiRouter.get('/pipeline/runs', async (req: Request, res: Response) => {
|
||||
const runPipelineSchema = z.object({
|
||||
topN: z.number().min(1).max(50).optional(),
|
||||
minSuitabilityScore: z.number().min(0).max(100).optional(),
|
||||
sources: z.array(z.enum(['gradcracker', 'indeed', 'linkedin'])).min(1).optional(),
|
||||
});
|
||||
|
||||
apiRouter.post('/pipeline/run', async (req: Request, res: Response) => {
|
||||
|
||||
@ -26,6 +26,33 @@ const migrations = [
|
||||
`CREATE TABLE IF NOT EXISTS jobs (
|
||||
id TEXT PRIMARY KEY,
|
||||
source TEXT NOT NULL DEFAULT 'gradcracker',
|
||||
source_job_id TEXT,
|
||||
job_url_direct TEXT,
|
||||
date_posted TEXT,
|
||||
job_type TEXT,
|
||||
salary_source TEXT,
|
||||
salary_interval TEXT,
|
||||
salary_min_amount REAL,
|
||||
salary_max_amount REAL,
|
||||
salary_currency TEXT,
|
||||
is_remote INTEGER,
|
||||
job_level TEXT,
|
||||
job_function TEXT,
|
||||
listing_type TEXT,
|
||||
emails TEXT,
|
||||
company_industry TEXT,
|
||||
company_logo TEXT,
|
||||
company_url_direct TEXT,
|
||||
company_addresses TEXT,
|
||||
company_num_employees TEXT,
|
||||
company_revenue TEXT,
|
||||
company_description TEXT,
|
||||
skills TEXT,
|
||||
experience_range TEXT,
|
||||
company_rating REAL,
|
||||
company_reviews_count INTEGER,
|
||||
vacancy_count INTEGER,
|
||||
work_from_home_type TEXT,
|
||||
title TEXT NOT NULL,
|
||||
employer TEXT NOT NULL,
|
||||
employer_url TEXT,
|
||||
@ -65,6 +92,35 @@ const migrations = [
|
||||
`ALTER TABLE jobs ADD COLUMN source TEXT NOT NULL DEFAULT 'gradcracker'`,
|
||||
`UPDATE jobs SET source = 'gradcracker' WHERE source IS NULL OR source = ''`,
|
||||
|
||||
// Add JobSpy columns for existing databases (safe to skip if already present)
|
||||
`ALTER TABLE jobs ADD COLUMN source_job_id TEXT`,
|
||||
`ALTER TABLE jobs ADD COLUMN job_url_direct TEXT`,
|
||||
`ALTER TABLE jobs ADD COLUMN date_posted TEXT`,
|
||||
`ALTER TABLE jobs ADD COLUMN job_type TEXT`,
|
||||
`ALTER TABLE jobs ADD COLUMN salary_source TEXT`,
|
||||
`ALTER TABLE jobs ADD COLUMN salary_interval TEXT`,
|
||||
`ALTER TABLE jobs ADD COLUMN salary_min_amount REAL`,
|
||||
`ALTER TABLE jobs ADD COLUMN salary_max_amount REAL`,
|
||||
`ALTER TABLE jobs ADD COLUMN salary_currency TEXT`,
|
||||
`ALTER TABLE jobs ADD COLUMN is_remote INTEGER`,
|
||||
`ALTER TABLE jobs ADD COLUMN job_level TEXT`,
|
||||
`ALTER TABLE jobs ADD COLUMN job_function TEXT`,
|
||||
`ALTER TABLE jobs ADD COLUMN listing_type TEXT`,
|
||||
`ALTER TABLE jobs ADD COLUMN emails TEXT`,
|
||||
`ALTER TABLE jobs ADD COLUMN company_industry TEXT`,
|
||||
`ALTER TABLE jobs ADD COLUMN company_logo TEXT`,
|
||||
`ALTER TABLE jobs ADD COLUMN company_url_direct TEXT`,
|
||||
`ALTER TABLE jobs ADD COLUMN company_addresses TEXT`,
|
||||
`ALTER TABLE jobs ADD COLUMN company_num_employees TEXT`,
|
||||
`ALTER TABLE jobs ADD COLUMN company_revenue TEXT`,
|
||||
`ALTER TABLE jobs ADD COLUMN company_description TEXT`,
|
||||
`ALTER TABLE jobs ADD COLUMN skills TEXT`,
|
||||
`ALTER TABLE jobs ADD COLUMN experience_range TEXT`,
|
||||
`ALTER TABLE jobs ADD COLUMN company_rating REAL`,
|
||||
`ALTER TABLE jobs ADD COLUMN company_reviews_count INTEGER`,
|
||||
`ALTER TABLE jobs ADD COLUMN vacancy_count INTEGER`,
|
||||
`ALTER TABLE jobs ADD COLUMN work_from_home_type TEXT`,
|
||||
|
||||
`CREATE INDEX IF NOT EXISTS idx_jobs_status ON jobs(status)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_jobs_discovered_at ON jobs(discovered_at)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_pipeline_runs_started_at ON pipeline_runs(started_at)`,
|
||||
@ -78,12 +134,12 @@ for (const migration of migrations) {
|
||||
console.log('✅ Migration applied');
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
const isDuplicateSourceColumn =
|
||||
migration.includes('ALTER TABLE jobs ADD COLUMN source') &&
|
||||
const isDuplicateColumn =
|
||||
migration.toLowerCase().includes('alter table jobs add column') &&
|
||||
message.toLowerCase().includes('duplicate column name');
|
||||
|
||||
if (isDuplicateSourceColumn) {
|
||||
console.log('↩️ Migration skipped (source column already exists)');
|
||||
if (isDuplicateColumn) {
|
||||
console.log('↩️ Migration skipped (column already exists)');
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@ -9,7 +9,10 @@ export const jobs = sqliteTable('jobs', {
|
||||
id: text('id').primaryKey(),
|
||||
|
||||
// From crawler
|
||||
source: text('source', { enum: ['gradcracker'] }).notNull().default('gradcracker'),
|
||||
source: text('source', { enum: ['gradcracker', 'indeed', 'linkedin'] }).notNull().default('gradcracker'),
|
||||
sourceJobId: text('source_job_id'),
|
||||
jobUrlDirect: text('job_url_direct'),
|
||||
datePosted: text('date_posted'),
|
||||
title: text('title').notNull(),
|
||||
employer: text('employer').notNull(),
|
||||
employerUrl: text('employer_url'),
|
||||
@ -23,6 +26,32 @@ export const jobs = sqliteTable('jobs', {
|
||||
starting: text('starting'),
|
||||
jobDescription: text('job_description'),
|
||||
|
||||
// JobSpy fields (nullable for other sources)
|
||||
jobType: text('job_type'),
|
||||
salarySource: text('salary_source'),
|
||||
salaryInterval: text('salary_interval'),
|
||||
salaryMinAmount: real('salary_min_amount'),
|
||||
salaryMaxAmount: real('salary_max_amount'),
|
||||
salaryCurrency: text('salary_currency'),
|
||||
isRemote: integer('is_remote', { mode: 'boolean' }),
|
||||
jobLevel: text('job_level'),
|
||||
jobFunction: text('job_function'),
|
||||
listingType: text('listing_type'),
|
||||
emails: text('emails'),
|
||||
companyIndustry: text('company_industry'),
|
||||
companyLogo: text('company_logo'),
|
||||
companyUrlDirect: text('company_url_direct'),
|
||||
companyAddresses: text('company_addresses'),
|
||||
companyNumEmployees: text('company_num_employees'),
|
||||
companyRevenue: text('company_revenue'),
|
||||
companyDescription: text('company_description'),
|
||||
skills: text('skills'),
|
||||
experienceRange: text('experience_range'),
|
||||
companyRating: real('company_rating'),
|
||||
companyReviewsCount: integer('company_reviews_count'),
|
||||
vacancyCount: integer('vacancy_count'),
|
||||
workFromHomeType: text('work_from_home_type'),
|
||||
|
||||
// Orchestrator enrichments
|
||||
status: text('status', {
|
||||
enum: ['discovered', 'processing', 'ready', 'applied', 'rejected', 'expired']
|
||||
|
||||
@ -14,13 +14,14 @@ import { readFile } from 'fs/promises';
|
||||
import { join, dirname } from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
import { runCrawler } from '../services/crawler.js';
|
||||
import { runJobSpy } from '../services/jobspy.js';
|
||||
import { scoreAndRankJobs, scoreJobSuitability } from '../services/scorer.js';
|
||||
import { generateSummary } from '../services/summary.js';
|
||||
import { generatePdf } from '../services/pdf.js';
|
||||
import * as jobsRepo from '../repositories/jobs.js';
|
||||
import * as pipelineRepo from '../repositories/pipeline.js';
|
||||
import { progressHelpers, resetProgress } from './progress.js';
|
||||
import type { Job, PipelineConfig } from '../../shared/types.js';
|
||||
import { progressHelpers, resetProgress, updateProgress } from './progress.js';
|
||||
import type { CreateJobInput, Job, JobSource, PipelineConfig } from '../../shared/types.js';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const DEFAULT_PROFILE_PATH = join(__dirname, '../../../../resume-generator/base.json');
|
||||
@ -28,7 +29,7 @@ const DEFAULT_PROFILE_PATH = join(__dirname, '../../../../resume-generator/base.
|
||||
const DEFAULT_CONFIG: PipelineConfig = {
|
||||
topN: 10,
|
||||
minSuitabilityScore: 50,
|
||||
sources: ['gradcracker'],
|
||||
sources: ['gradcracker', 'indeed', 'linkedin'],
|
||||
profilePath: DEFAULT_PROFILE_PATH,
|
||||
outputDir: join(__dirname, '../../../data/pdfs'),
|
||||
};
|
||||
@ -73,6 +74,11 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
|
||||
console.log('\n🕷️ Running crawler...');
|
||||
progressHelpers.startCrawling();
|
||||
const existingJobUrls = await jobsRepo.getAllJobUrls();
|
||||
|
||||
const discoveredJobs: CreateJobInput[] = [];
|
||||
const sourceErrors: string[] = [];
|
||||
|
||||
if (mergedConfig.sources.includes('gradcracker')) {
|
||||
const crawlerResult = await runCrawler({
|
||||
existingJobUrls,
|
||||
onProgress: (update) => {
|
||||
@ -90,14 +96,43 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
|
||||
});
|
||||
|
||||
if (!crawlerResult.success) {
|
||||
throw new Error(`Crawler failed: ${crawlerResult.error}`);
|
||||
sourceErrors.push(`gradcracker: ${crawlerResult.error ?? 'unknown error'}`);
|
||||
} else {
|
||||
discoveredJobs.push(...crawlerResult.jobs);
|
||||
}
|
||||
}
|
||||
|
||||
progressHelpers.crawlingComplete(crawlerResult.jobs.length);
|
||||
const jobSpySites = mergedConfig.sources.filter(
|
||||
(s): s is 'indeed' | 'linkedin' => s === 'indeed' || s === 'linkedin'
|
||||
);
|
||||
|
||||
if (jobSpySites.length > 0) {
|
||||
updateProgress({
|
||||
step: 'crawling',
|
||||
detail: `JobSpy: scraping ${jobSpySites.join(', ')}...`,
|
||||
});
|
||||
|
||||
const jobSpyResult = await runJobSpy({ sites: jobSpySites });
|
||||
if (!jobSpyResult.success) {
|
||||
sourceErrors.push(`jobspy: ${jobSpyResult.error ?? 'unknown error'}`);
|
||||
} else {
|
||||
discoveredJobs.push(...jobSpyResult.jobs);
|
||||
}
|
||||
}
|
||||
|
||||
if (discoveredJobs.length === 0 && sourceErrors.length > 0) {
|
||||
throw new Error(`All sources failed: ${sourceErrors.join('; ')}`);
|
||||
}
|
||||
|
||||
if (sourceErrors.length > 0) {
|
||||
console.warn(`ƒsÿ‹,? Some sources failed: ${sourceErrors.join('; ')}`);
|
||||
}
|
||||
|
||||
progressHelpers.crawlingComplete(discoveredJobs.length);
|
||||
|
||||
// Step 3: Import discovered jobs
|
||||
console.log('\n💾 Importing jobs to database...');
|
||||
const { created, skipped } = await jobsRepo.bulkCreateJobs(crawlerResult.jobs);
|
||||
const { created, skipped } = await jobsRepo.bulkCreateJobs(discoveredJobs);
|
||||
console.log(` Created: ${created}, Skipped (duplicates): ${skipped}`);
|
||||
|
||||
progressHelpers.importComplete(created, skipped);
|
||||
|
||||
@ -61,6 +61,9 @@ export async function createJob(input: CreateJobInput): Promise<Job> {
|
||||
await db.insert(jobs).values({
|
||||
id,
|
||||
source: input.source,
|
||||
sourceJobId: input.sourceJobId ?? null,
|
||||
jobUrlDirect: input.jobUrlDirect ?? null,
|
||||
datePosted: input.datePosted ?? null,
|
||||
title: input.title,
|
||||
employer: input.employer,
|
||||
employerUrl: input.employerUrl ?? null,
|
||||
@ -73,6 +76,30 @@ export async function createJob(input: CreateJobInput): Promise<Job> {
|
||||
degreeRequired: input.degreeRequired ?? null,
|
||||
starting: input.starting ?? null,
|
||||
jobDescription: input.jobDescription ?? null,
|
||||
jobType: input.jobType ?? null,
|
||||
salarySource: input.salarySource ?? null,
|
||||
salaryInterval: input.salaryInterval ?? null,
|
||||
salaryMinAmount: input.salaryMinAmount ?? null,
|
||||
salaryMaxAmount: input.salaryMaxAmount ?? null,
|
||||
salaryCurrency: input.salaryCurrency ?? null,
|
||||
isRemote: input.isRemote ?? null,
|
||||
jobLevel: input.jobLevel ?? null,
|
||||
jobFunction: input.jobFunction ?? null,
|
||||
listingType: input.listingType ?? null,
|
||||
emails: input.emails ?? null,
|
||||
companyIndustry: input.companyIndustry ?? null,
|
||||
companyLogo: input.companyLogo ?? null,
|
||||
companyUrlDirect: input.companyUrlDirect ?? null,
|
||||
companyAddresses: input.companyAddresses ?? null,
|
||||
companyNumEmployees: input.companyNumEmployees ?? null,
|
||||
companyRevenue: input.companyRevenue ?? null,
|
||||
companyDescription: input.companyDescription ?? null,
|
||||
skills: input.skills ?? null,
|
||||
experienceRange: input.experienceRange ?? null,
|
||||
companyRating: input.companyRating ?? null,
|
||||
companyReviewsCount: input.companyReviewsCount ?? null,
|
||||
vacancyCount: input.vacancyCount ?? null,
|
||||
workFromHomeType: input.workFromHomeType ?? null,
|
||||
status: 'discovered',
|
||||
discoveredAt: now,
|
||||
createdAt: now,
|
||||
@ -173,6 +200,9 @@ function mapRowToJob(row: typeof jobs.$inferSelect): Job {
|
||||
return {
|
||||
id: row.id,
|
||||
source: row.source as Job['source'],
|
||||
sourceJobId: row.sourceJobId ?? null,
|
||||
jobUrlDirect: row.jobUrlDirect ?? null,
|
||||
datePosted: row.datePosted ?? null,
|
||||
title: row.title,
|
||||
employer: row.employer,
|
||||
employerUrl: row.employerUrl,
|
||||
@ -191,6 +221,30 @@ function mapRowToJob(row: typeof jobs.$inferSelect): Job {
|
||||
tailoredSummary: row.tailoredSummary,
|
||||
pdfPath: row.pdfPath,
|
||||
notionPageId: row.notionPageId,
|
||||
jobType: row.jobType ?? null,
|
||||
salarySource: row.salarySource ?? null,
|
||||
salaryInterval: row.salaryInterval ?? null,
|
||||
salaryMinAmount: row.salaryMinAmount ?? null,
|
||||
salaryMaxAmount: row.salaryMaxAmount ?? null,
|
||||
salaryCurrency: row.salaryCurrency ?? null,
|
||||
isRemote: row.isRemote ?? null,
|
||||
jobLevel: row.jobLevel ?? null,
|
||||
jobFunction: row.jobFunction ?? null,
|
||||
listingType: row.listingType ?? null,
|
||||
emails: row.emails ?? null,
|
||||
companyIndustry: row.companyIndustry ?? null,
|
||||
companyLogo: row.companyLogo ?? null,
|
||||
companyUrlDirect: row.companyUrlDirect ?? null,
|
||||
companyAddresses: row.companyAddresses ?? null,
|
||||
companyNumEmployees: row.companyNumEmployees ?? null,
|
||||
companyRevenue: row.companyRevenue ?? null,
|
||||
companyDescription: row.companyDescription ?? null,
|
||||
skills: row.skills ?? null,
|
||||
experienceRange: row.experienceRange ?? null,
|
||||
companyRating: row.companyRating ?? null,
|
||||
companyReviewsCount: row.companyReviewsCount ?? null,
|
||||
vacancyCount: row.vacancyCount ?? null,
|
||||
workFromHomeType: row.workFromHomeType ?? null,
|
||||
discoveredAt: row.discoveredAt,
|
||||
processedAt: row.processedAt,
|
||||
appliedAt: row.appliedAt,
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
export * from './crawler.js';
|
||||
export * from './jobspy.js';
|
||||
export * from './scorer.js';
|
||||
export * from './summary.js';
|
||||
export * from './pdf.js';
|
||||
|
||||
241
orchestrator/src/server/services/jobspy.ts
Normal file
241
orchestrator/src/server/services/jobspy.ts
Normal file
@ -0,0 +1,241 @@
|
||||
/**
|
||||
* Service for scraping jobs via JobSpy (Indeed/LinkedIn/etc) and mapping them into our DB shape.
|
||||
*
|
||||
* Uses a small Python wrapper script that writes both CSV + JSON to disk; we ingest the JSON.
|
||||
*/
|
||||
|
||||
import { spawn } from 'child_process';
|
||||
import { readFile, mkdir } from 'fs/promises';
|
||||
import { join, dirname } from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
import type { CreateJobInput, JobSource } from '../../shared/types.js';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const JOBSPY_DIR = join(__dirname, '../../../../jobspy-extractor');
|
||||
const JOBSPY_SCRIPT = join(JOBSPY_DIR, 'scrape_jobs.py');
|
||||
|
||||
function getPythonPath(): string {
|
||||
if (process.env.PYTHON_PATH) return process.env.PYTHON_PATH;
|
||||
return process.platform === 'win32' ? 'python' : 'python3';
|
||||
}
|
||||
|
||||
function getDataDir(): string {
|
||||
if (process.env.DATA_DIR) return process.env.DATA_DIR;
|
||||
return join(__dirname, '../../../data');
|
||||
}
|
||||
|
||||
function toStringOrNull(value: unknown): string | null {
|
||||
if (value === null || value === undefined) return null;
|
||||
if (typeof value === 'string') {
|
||||
const trimmed = value.trim();
|
||||
return trimmed.length > 0 ? trimmed : null;
|
||||
}
|
||||
if (typeof value === 'number' || typeof value === 'boolean') return String(value);
|
||||
return null;
|
||||
}
|
||||
|
||||
function toNumberOrNull(value: unknown): number | null {
|
||||
if (value === null || value === undefined) return null;
|
||||
if (typeof value === 'number') return Number.isFinite(value) ? value : null;
|
||||
if (typeof value === 'string') {
|
||||
const trimmed = value.trim();
|
||||
if (!trimmed) return null;
|
||||
const parsed = Number(trimmed);
|
||||
return Number.isFinite(parsed) ? parsed : null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function toBooleanOrNull(value: unknown): boolean | null {
|
||||
if (value === null || value === undefined) return null;
|
||||
if (typeof value === 'boolean') return value;
|
||||
if (typeof value === 'number') return value !== 0;
|
||||
if (typeof value === 'string') {
|
||||
const normalized = value.trim().toLowerCase();
|
||||
if (!normalized) return null;
|
||||
if (['1', 'true', 'yes', 'y', 'on'].includes(normalized)) return true;
|
||||
if (['0', 'false', 'no', 'n', 'off'].includes(normalized)) return false;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function toJsonStringOrNull(value: unknown): string | null {
|
||||
if (value === null || value === undefined) return null;
|
||||
if (typeof value === 'string') return toStringOrNull(value);
|
||||
try {
|
||||
return JSON.stringify(value);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function toJobSource(site: unknown): JobSource | null {
|
||||
const raw = toStringOrNull(site)?.toLowerCase();
|
||||
if (raw === 'gradcracker') return 'gradcracker';
|
||||
if (raw === 'indeed') return 'indeed';
|
||||
if (raw === 'linkedin') return 'linkedin';
|
||||
return null;
|
||||
}
|
||||
|
||||
function formatSalary(params: {
|
||||
minAmount: number | null;
|
||||
maxAmount: number | null;
|
||||
currency: string | null;
|
||||
interval: string | null;
|
||||
}): string | null {
|
||||
const { minAmount, maxAmount, currency, interval } = params;
|
||||
if (minAmount === null && maxAmount === null) return null;
|
||||
|
||||
const fmt = (n: number) => {
|
||||
// Avoid locale ambiguity; keep it simple.
|
||||
const rounded = Math.round(n);
|
||||
return `${rounded}`;
|
||||
};
|
||||
|
||||
let range: string;
|
||||
if (minAmount !== null && maxAmount !== null) {
|
||||
range = `${fmt(minAmount)}-${fmt(maxAmount)}`;
|
||||
} else if (minAmount !== null) {
|
||||
range = `${fmt(minAmount)}+`;
|
||||
} else if (maxAmount !== null) {
|
||||
range = `${fmt(maxAmount)}`;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
||||
const currencyPart = currency ? `${currency} ` : '';
|
||||
const intervalPart = interval ? ` / ${interval}` : '';
|
||||
return `${currencyPart}${range}${intervalPart}`.trim();
|
||||
}
|
||||
|
||||
export interface RunJobSpyOptions {
|
||||
sites?: Array<JobSource>;
|
||||
searchTerm?: string;
|
||||
location?: string;
|
||||
resultsWanted?: number;
|
||||
hoursOld?: number;
|
||||
countryIndeed?: string;
|
||||
linkedinFetchDescription?: boolean;
|
||||
}
|
||||
|
||||
export interface JobSpyResult {
|
||||
success: boolean;
|
||||
jobs: CreateJobInput[];
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export async function runJobSpy(options: RunJobSpyOptions = {}): Promise<JobSpyResult> {
|
||||
const dataDir = getDataDir();
|
||||
const outputDir = join(dataDir, 'imports');
|
||||
await mkdir(outputDir, { recursive: true });
|
||||
|
||||
const outputCsv = join(outputDir, 'jobspy_jobs.csv');
|
||||
const outputJson = join(outputDir, 'jobspy_jobs.json');
|
||||
|
||||
const sites = (options.sites ?? ['indeed', 'linkedin'])
|
||||
.filter((s) => s === 'indeed' || s === 'linkedin')
|
||||
.join(',');
|
||||
|
||||
try {
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const pythonPath = getPythonPath();
|
||||
const child = spawn(pythonPath, [JOBSPY_SCRIPT], {
|
||||
cwd: JOBSPY_DIR,
|
||||
shell: false,
|
||||
stdio: 'inherit',
|
||||
env: {
|
||||
...process.env,
|
||||
JOBSPY_SITES: sites || 'indeed,linkedin',
|
||||
JOBSPY_SEARCH_TERM: options.searchTerm ?? process.env.JOBSPY_SEARCH_TERM ?? 'web developer',
|
||||
JOBSPY_LOCATION: options.location ?? process.env.JOBSPY_LOCATION ?? 'UK',
|
||||
JOBSPY_RESULTS_WANTED: String(options.resultsWanted ?? process.env.JOBSPY_RESULTS_WANTED ?? 200),
|
||||
JOBSPY_HOURS_OLD: String(options.hoursOld ?? process.env.JOBSPY_HOURS_OLD ?? 72),
|
||||
JOBSPY_COUNTRY_INDEED: options.countryIndeed ?? process.env.JOBSPY_COUNTRY_INDEED ?? 'UK',
|
||||
JOBSPY_LINKEDIN_FETCH_DESCRIPTION: String(
|
||||
options.linkedinFetchDescription ?? process.env.JOBSPY_LINKEDIN_FETCH_DESCRIPTION ?? '1'
|
||||
),
|
||||
JOBSPY_OUTPUT_CSV: outputCsv,
|
||||
JOBSPY_OUTPUT_JSON: outputJson,
|
||||
},
|
||||
});
|
||||
|
||||
child.on('close', (code) => {
|
||||
if (code === 0) resolve();
|
||||
else reject(new Error(`JobSpy exited with code ${code}`));
|
||||
});
|
||||
child.on('error', reject);
|
||||
});
|
||||
|
||||
const raw = await readFile(outputJson, 'utf-8');
|
||||
const parsed = JSON.parse(raw) as Array<Record<string, unknown>>;
|
||||
|
||||
const jobs: CreateJobInput[] = [];
|
||||
|
||||
for (const row of parsed) {
|
||||
const source = toJobSource(row.site);
|
||||
if (!source) continue;
|
||||
|
||||
const jobUrl = toStringOrNull(row.job_url);
|
||||
if (!jobUrl) continue;
|
||||
|
||||
const title = toStringOrNull(row.title) ?? 'Unknown Title';
|
||||
const employer = toStringOrNull(row.company) ?? 'Unknown Employer';
|
||||
|
||||
const jobUrlDirect = toStringOrNull(row.job_url_direct);
|
||||
const applicationLink = jobUrlDirect ?? jobUrl;
|
||||
|
||||
const minAmount = toNumberOrNull(row.min_amount);
|
||||
const maxAmount = toNumberOrNull(row.max_amount);
|
||||
const currency = toStringOrNull(row.currency);
|
||||
const interval = toStringOrNull(row.interval);
|
||||
|
||||
const salary = formatSalary({ minAmount, maxAmount, currency, interval });
|
||||
|
||||
jobs.push({
|
||||
source,
|
||||
sourceJobId: toStringOrNull(row.id) ?? undefined,
|
||||
jobUrlDirect: jobUrlDirect ?? undefined,
|
||||
datePosted: toStringOrNull(row.date_posted) ?? undefined,
|
||||
|
||||
title,
|
||||
employer,
|
||||
employerUrl: toStringOrNull(row.company_url) ?? undefined,
|
||||
jobUrl,
|
||||
applicationLink,
|
||||
location: toStringOrNull(row.location) ?? undefined,
|
||||
jobDescription: toStringOrNull(row.description) ?? undefined,
|
||||
salary: salary ?? undefined,
|
||||
|
||||
jobType: toStringOrNull(row.job_type) ?? undefined,
|
||||
salarySource: toStringOrNull(row.salary_source) ?? undefined,
|
||||
salaryInterval: interval ?? undefined,
|
||||
salaryMinAmount: minAmount ?? undefined,
|
||||
salaryMaxAmount: maxAmount ?? undefined,
|
||||
salaryCurrency: currency ?? undefined,
|
||||
isRemote: toBooleanOrNull(row.is_remote) ?? undefined,
|
||||
jobLevel: toStringOrNull(row.job_level) ?? undefined,
|
||||
jobFunction: toStringOrNull(row.job_function) ?? undefined,
|
||||
listingType: toStringOrNull(row.listing_type) ?? undefined,
|
||||
emails: toJsonStringOrNull(row.emails) ?? undefined,
|
||||
companyIndustry: toStringOrNull(row.company_industry) ?? undefined,
|
||||
companyLogo: toStringOrNull(row.company_logo) ?? undefined,
|
||||
companyUrlDirect: toStringOrNull(row.company_url_direct) ?? undefined,
|
||||
companyAddresses: toJsonStringOrNull(row.company_addresses) ?? undefined,
|
||||
companyNumEmployees: toStringOrNull(row.company_num_employees) ?? undefined,
|
||||
companyRevenue: toStringOrNull(row.company_revenue) ?? undefined,
|
||||
companyDescription: toStringOrNull(row.company_description) ?? undefined,
|
||||
skills: toJsonStringOrNull(row.skills) ?? undefined,
|
||||
experienceRange: toJsonStringOrNull(row.experience_range) ?? undefined,
|
||||
companyRating: toNumberOrNull(row.company_rating) ?? undefined,
|
||||
companyReviewsCount: toNumberOrNull(row.company_reviews_count) ?? undefined,
|
||||
vacancyCount: toNumberOrNull(row.vacancy_count) ?? undefined,
|
||||
workFromHomeType: toStringOrNull(row.work_from_home_type) ?? undefined,
|
||||
});
|
||||
}
|
||||
|
||||
return { success: true, jobs };
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
return { success: false, jobs: [], error: message };
|
||||
}
|
||||
}
|
||||
@ -11,13 +11,20 @@ export type JobStatus =
|
||||
| 'expired'; // Deadline passed
|
||||
|
||||
export type JobSource =
|
||||
| 'gradcracker';
|
||||
| 'gradcracker'
|
||||
| 'indeed'
|
||||
| 'linkedin';
|
||||
|
||||
export interface Job {
|
||||
id: string;
|
||||
|
||||
// From crawler
|
||||
// Source / provenance
|
||||
source: JobSource;
|
||||
sourceJobId: string | null; // External ID (if provided)
|
||||
jobUrlDirect: string | null; // Source-provided direct URL (if provided)
|
||||
datePosted: string | null; // Source-provided posting date (if provided)
|
||||
|
||||
// From crawler (normalized)
|
||||
title: string;
|
||||
employer: string;
|
||||
employerUrl: string | null;
|
||||
@ -39,6 +46,32 @@ export interface Job {
|
||||
pdfPath: string | null; // Path to generated PDF
|
||||
notionPageId: string | null; // Notion page ID if synced
|
||||
|
||||
// JobSpy fields (nullable for non-JobSpy sources)
|
||||
jobType: string | null;
|
||||
salarySource: string | null;
|
||||
salaryInterval: string | null;
|
||||
salaryMinAmount: number | null;
|
||||
salaryMaxAmount: number | null;
|
||||
salaryCurrency: string | null;
|
||||
isRemote: boolean | null;
|
||||
jobLevel: string | null;
|
||||
jobFunction: string | null;
|
||||
listingType: string | null;
|
||||
emails: string | null;
|
||||
companyIndustry: string | null;
|
||||
companyLogo: string | null;
|
||||
companyUrlDirect: string | null;
|
||||
companyAddresses: string | null;
|
||||
companyNumEmployees: string | null;
|
||||
companyRevenue: string | null;
|
||||
companyDescription: string | null;
|
||||
skills: string | null;
|
||||
experienceRange: string | null;
|
||||
companyRating: number | null;
|
||||
companyReviewsCount: number | null;
|
||||
vacancyCount: number | null;
|
||||
workFromHomeType: string | null;
|
||||
|
||||
// Timestamps
|
||||
discoveredAt: string;
|
||||
processedAt: string | null;
|
||||
@ -61,6 +94,35 @@ export interface CreateJobInput {
|
||||
degreeRequired?: string;
|
||||
starting?: string;
|
||||
jobDescription?: string;
|
||||
|
||||
// JobSpy fields (optional)
|
||||
sourceJobId?: string;
|
||||
jobUrlDirect?: string;
|
||||
datePosted?: string;
|
||||
jobType?: string;
|
||||
salarySource?: string;
|
||||
salaryInterval?: string;
|
||||
salaryMinAmount?: number;
|
||||
salaryMaxAmount?: number;
|
||||
salaryCurrency?: string;
|
||||
isRemote?: boolean;
|
||||
jobLevel?: string;
|
||||
jobFunction?: string;
|
||||
listingType?: string;
|
||||
emails?: string;
|
||||
companyIndustry?: string;
|
||||
companyLogo?: string;
|
||||
companyUrlDirect?: string;
|
||||
companyAddresses?: string;
|
||||
companyNumEmployees?: string;
|
||||
companyRevenue?: string;
|
||||
companyDescription?: string;
|
||||
skills?: string;
|
||||
experienceRange?: string;
|
||||
companyRating?: number;
|
||||
companyReviewsCount?: number;
|
||||
vacancyCount?: number;
|
||||
workFromHomeType?: string;
|
||||
}
|
||||
|
||||
export interface UpdateJobInput {
|
||||
@ -76,7 +138,7 @@ export interface UpdateJobInput {
|
||||
export interface PipelineConfig {
|
||||
topN: number; // Number of top jobs to process
|
||||
minSuitabilityScore: number; // Minimum score to auto-process
|
||||
sources: string[]; // Job sources to crawl
|
||||
sources: JobSource[]; // Job sources to crawl
|
||||
profilePath: string; // Path to profile JSON
|
||||
outputDir: string; // Directory for generated PDFs
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user