job_spy implementation
This commit is contained in:
parent
4a00b3b900
commit
cefb75a9ec
12
.env.example
12
.env.example
@ -23,3 +23,15 @@ NOTION_DATABASE_ID=
|
|||||||
|
|
||||||
# Optional: Webhook secret for n8n automation
|
# Optional: Webhook secret for n8n automation
|
||||||
WEBHOOK_SECRET=
|
WEBHOOK_SECRET=
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# JobSpy (Indeed/LinkedIn scraping) - optional
|
||||||
|
# =============================================================================
|
||||||
|
# These control the Python JobSpy scraper used by the pipeline.
|
||||||
|
JOBSPY_SITES=indeed,linkedin
|
||||||
|
JOBSPY_SEARCH_TERM=web developer
|
||||||
|
JOBSPY_LOCATION=UK
|
||||||
|
JOBSPY_RESULTS_WANTED=200
|
||||||
|
JOBSPY_HOURS_OLD=72
|
||||||
|
JOBSPY_COUNTRY_INDEED=UK
|
||||||
|
JOBSPY_LINKEDIN_FETCH_DESCRIPTION=1
|
||||||
|
|||||||
@ -21,8 +21,8 @@ RUN apt-get update && apt-get install -y \
|
|||||||
# Set working directory
|
# Set working directory
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Install Playwright and Firefox only
|
# Install Playwright and Firefox only (plus JobSpy for Indeed/LinkedIn scraping)
|
||||||
RUN pip3 install --no-cache-dir --break-system-packages playwright && \
|
RUN pip3 install --no-cache-dir --break-system-packages playwright python-jobspy && \
|
||||||
npx playwright install firefox
|
npx playwright install firefox
|
||||||
|
|
||||||
# Copy package files first for better caching
|
# Copy package files first for better caching
|
||||||
@ -43,6 +43,7 @@ RUN npx camoufox fetch
|
|||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
COPY orchestrator ./orchestrator
|
COPY orchestrator ./orchestrator
|
||||||
COPY job-extractor ./job-extractor
|
COPY job-extractor ./job-extractor
|
||||||
|
COPY jobspy-extractor ./jobspy-extractor
|
||||||
COPY resume-generator ./resume-generator
|
COPY resume-generator ./resume-generator
|
||||||
|
|
||||||
# Build the orchestrator (client + server)
|
# Build the orchestrator (client + server)
|
||||||
|
|||||||
@ -33,6 +33,15 @@ services:
|
|||||||
- PIPELINE_TOP_N=${PIPELINE_TOP_N:-10}
|
- PIPELINE_TOP_N=${PIPELINE_TOP_N:-10}
|
||||||
- PIPELINE_MIN_SCORE=${PIPELINE_MIN_SCORE:-50}
|
- PIPELINE_MIN_SCORE=${PIPELINE_MIN_SCORE:-50}
|
||||||
|
|
||||||
|
# JobSpy (Indeed/LinkedIn scraping) - optional
|
||||||
|
- JOBSPY_SITES=${JOBSPY_SITES:-indeed,linkedin}
|
||||||
|
- JOBSPY_SEARCH_TERM=${JOBSPY_SEARCH_TERM:-web developer}
|
||||||
|
- JOBSPY_LOCATION=${JOBSPY_LOCATION:-UK}
|
||||||
|
- JOBSPY_RESULTS_WANTED=${JOBSPY_RESULTS_WANTED:-200}
|
||||||
|
- JOBSPY_HOURS_OLD=${JOBSPY_HOURS_OLD:-72}
|
||||||
|
- JOBSPY_COUNTRY_INDEED=${JOBSPY_COUNTRY_INDEED:-UK}
|
||||||
|
- JOBSPY_LINKEDIN_FETCH_DESCRIPTION=${JOBSPY_LINKEDIN_FETCH_DESCRIPTION:-1}
|
||||||
|
|
||||||
# Optional: Notion integration
|
# Optional: Notion integration
|
||||||
- NOTION_API_KEY=${NOTION_API_KEY:-}
|
- NOTION_API_KEY=${NOTION_API_KEY:-}
|
||||||
- NOTION_DATABASE_ID=${NOTION_DATABASE_ID:-}
|
- NOTION_DATABASE_ID=${NOTION_DATABASE_ID:-}
|
||||||
|
|||||||
1
jobspy-extractor/requirements.txt
Normal file
1
jobspy-extractor/requirements.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
python-jobspy
|
||||||
77
jobspy-extractor/scrape_jobs.py
Normal file
77
jobspy-extractor/scrape_jobs.py
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
import csv
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from jobspy import scrape_jobs
|
||||||
|
|
||||||
|
|
||||||
|
def _env_str(name: str, default: str) -> str:
|
||||||
|
value = os.getenv(name)
|
||||||
|
return value if value and value.strip() else default
|
||||||
|
|
||||||
|
|
||||||
|
def _env_int(name: str, default: int) -> int:
|
||||||
|
value = os.getenv(name)
|
||||||
|
if value is None or value.strip() == "":
|
||||||
|
return default
|
||||||
|
try:
|
||||||
|
return int(value)
|
||||||
|
except ValueError:
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
def _env_bool(name: str, default: bool) -> bool:
|
||||||
|
value = os.getenv(name)
|
||||||
|
if value is None or value.strip() == "":
|
||||||
|
return default
|
||||||
|
return value.strip().lower() in ("1", "true", "yes", "y", "on")
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_sites(raw: str) -> list[str]:
|
||||||
|
return [s.strip() for s in raw.split(",") if s.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
sites = _parse_sites(_env_str("JOBSPY_SITES", "indeed,linkedin"))
|
||||||
|
search_term = _env_str("JOBSPY_SEARCH_TERM", "web developer")
|
||||||
|
location = _env_str("JOBSPY_LOCATION", "UK")
|
||||||
|
results_wanted = _env_int("JOBSPY_RESULTS_WANTED", 200)
|
||||||
|
hours_old = _env_int("JOBSPY_HOURS_OLD", 72)
|
||||||
|
country_indeed = _env_str("JOBSPY_COUNTRY_INDEED", "UK")
|
||||||
|
linkedin_fetch_description = _env_bool("JOBSPY_LINKEDIN_FETCH_DESCRIPTION", True)
|
||||||
|
|
||||||
|
output_csv = Path(_env_str("JOBSPY_OUTPUT_CSV", "jobs.csv"))
|
||||||
|
output_json = Path(_env_str("JOBSPY_OUTPUT_JSON", str(output_csv.with_suffix(".json"))))
|
||||||
|
|
||||||
|
output_csv.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
output_json.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
jobs = scrape_jobs(
|
||||||
|
site_name=sites,
|
||||||
|
search_term=search_term,
|
||||||
|
location=location,
|
||||||
|
results_wanted=results_wanted,
|
||||||
|
hours_old=hours_old,
|
||||||
|
country_indeed=country_indeed,
|
||||||
|
linkedin_fetch_description=linkedin_fetch_description,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Found {len(jobs)} jobs")
|
||||||
|
|
||||||
|
jobs.to_csv(
|
||||||
|
output_csv,
|
||||||
|
quoting=csv.QUOTE_NONNUMERIC,
|
||||||
|
escapechar="\\",
|
||||||
|
index=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
jobs.to_json(output_json, orient="records", force_ascii=False)
|
||||||
|
|
||||||
|
print(f"Wrote CSV: {output_csv}")
|
||||||
|
print(f"Wrote JSON: {output_json}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
|
|
||||||
@ -19,3 +19,14 @@ PIPELINE_MIN_SCORE=50
|
|||||||
# RXResume credentials (for PDF generation)
|
# RXResume credentials (for PDF generation)
|
||||||
RXRESUME_EMAIL=
|
RXRESUME_EMAIL=
|
||||||
RXRESUME_PASSWORD=
|
RXRESUME_PASSWORD=
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# JobSpy (Indeed/LinkedIn scraping) - optional
|
||||||
|
# =============================================================================
|
||||||
|
JOBSPY_SITES=indeed,linkedin
|
||||||
|
JOBSPY_SEARCH_TERM=web developer
|
||||||
|
JOBSPY_LOCATION=UK
|
||||||
|
JOBSPY_RESULTS_WANTED=200
|
||||||
|
JOBSPY_HOURS_OLD=72
|
||||||
|
JOBSPY_COUNTRY_INDEED=UK
|
||||||
|
JOBSPY_LINKEDIN_FETCH_DESCRIPTION=1
|
||||||
|
|||||||
211
orchestrator/package-lock.json
generated
211
orchestrator/package-lock.json
generated
@ -9,6 +9,7 @@
|
|||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@radix-ui/react-alert-dialog": "^1.1.15",
|
"@radix-ui/react-alert-dialog": "^1.1.15",
|
||||||
|
"@radix-ui/react-dropdown-menu": "^2.1.15",
|
||||||
"@radix-ui/react-progress": "^1.1.8",
|
"@radix-ui/react-progress": "^1.1.8",
|
||||||
"@radix-ui/react-separator": "^1.1.8",
|
"@radix-ui/react-separator": "^1.1.8",
|
||||||
"@radix-ui/react-slot": "^1.2.4",
|
"@radix-ui/react-slot": "^1.2.4",
|
||||||
@ -1229,6 +1230,40 @@
|
|||||||
"node": ">=12"
|
"node": ">=12"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@floating-ui/core": {
|
||||||
|
"version": "1.7.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.7.3.tgz",
|
||||||
|
"integrity": "sha512-sGnvb5dmrJaKEZ+LDIpguvdX3bDlEllmv4/ClQ9awcmCZrlx5jQyyMWFM5kBI+EyNOCDDiKk8il0zeuX3Zlg/w==",
|
||||||
|
"dependencies": {
|
||||||
|
"@floating-ui/utils": "^0.2.10"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@floating-ui/dom": {
|
||||||
|
"version": "1.7.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.7.4.tgz",
|
||||||
|
"integrity": "sha512-OOchDgh4F2CchOX94cRVqhvy7b3AFb+/rQXyswmzmGakRfkMgoWVjfnLWkRirfLEfuD4ysVW16eXzwt3jHIzKA==",
|
||||||
|
"dependencies": {
|
||||||
|
"@floating-ui/core": "^1.7.3",
|
||||||
|
"@floating-ui/utils": "^0.2.10"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@floating-ui/react-dom": {
|
||||||
|
"version": "2.1.6",
|
||||||
|
"resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.1.6.tgz",
|
||||||
|
"integrity": "sha512-4JX6rEatQEvlmgU80wZyq9RT96HZJa88q8hp0pBd+LrczeDI4o6uA2M+uvxngVHo4Ihr8uibXxH6+70zhAFrVw==",
|
||||||
|
"dependencies": {
|
||||||
|
"@floating-ui/dom": "^1.7.4"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"react": ">=16.8.0",
|
||||||
|
"react-dom": ">=16.8.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@floating-ui/utils": {
|
||||||
|
"version": "0.2.10",
|
||||||
|
"resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.10.tgz",
|
||||||
|
"integrity": "sha512-aGTxbpbg8/b5JfU1HXSrbH3wXZuLPJcNEcZQFMxLs3oSzgtVu6nFPkbbGGUvBcUjKV2YyB9Wxxabo+HEH9tcRQ=="
|
||||||
|
},
|
||||||
"node_modules/@jridgewell/gen-mapping": {
|
"node_modules/@jridgewell/gen-mapping": {
|
||||||
"version": "0.3.13",
|
"version": "0.3.13",
|
||||||
"resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz",
|
"resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz",
|
||||||
@ -1335,6 +1370,28 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@radix-ui/react-arrow": {
|
||||||
|
"version": "1.1.7",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz",
|
||||||
|
"integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-primitive": "2.1.3"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@radix-ui/react-collection": {
|
"node_modules/@radix-ui/react-collection": {
|
||||||
"version": "1.1.7",
|
"version": "1.1.7",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.7.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.7.tgz",
|
||||||
@ -1497,6 +1554,34 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@radix-ui/react-dropdown-menu": {
|
||||||
|
"version": "2.1.16",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-dropdown-menu/-/react-dropdown-menu-2.1.16.tgz",
|
||||||
|
"integrity": "sha512-1PLGQEynI/3OX/ftV54COn+3Sud/Mn8vALg2rWnBLnRaGtJDduNW/22XjlGgPdpcIbiQxjKtb7BkcjP00nqfJw==",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/primitive": "1.1.3",
|
||||||
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
|
"@radix-ui/react-context": "1.1.2",
|
||||||
|
"@radix-ui/react-id": "1.1.1",
|
||||||
|
"@radix-ui/react-menu": "2.1.16",
|
||||||
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
|
"@radix-ui/react-use-controllable-state": "1.2.2"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@radix-ui/react-focus-guards": {
|
"node_modules/@radix-ui/react-focus-guards": {
|
||||||
"version": "1.1.3",
|
"version": "1.1.3",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.3.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.3.tgz",
|
||||||
@ -1552,6 +1637,93 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@radix-ui/react-menu": {
|
||||||
|
"version": "2.1.16",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-menu/-/react-menu-2.1.16.tgz",
|
||||||
|
"integrity": "sha512-72F2T+PLlphrqLcAotYPp0uJMr5SjP5SL01wfEspJbru5Zs5vQaSHb4VB3ZMJPimgHHCHG7gMOeOB9H3Hdmtxg==",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/primitive": "1.1.3",
|
||||||
|
"@radix-ui/react-collection": "1.1.7",
|
||||||
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
|
"@radix-ui/react-context": "1.1.2",
|
||||||
|
"@radix-ui/react-direction": "1.1.1",
|
||||||
|
"@radix-ui/react-dismissable-layer": "1.1.11",
|
||||||
|
"@radix-ui/react-focus-guards": "1.1.3",
|
||||||
|
"@radix-ui/react-focus-scope": "1.1.7",
|
||||||
|
"@radix-ui/react-id": "1.1.1",
|
||||||
|
"@radix-ui/react-popper": "1.2.8",
|
||||||
|
"@radix-ui/react-portal": "1.1.9",
|
||||||
|
"@radix-ui/react-presence": "1.1.5",
|
||||||
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
|
"@radix-ui/react-roving-focus": "1.1.11",
|
||||||
|
"@radix-ui/react-slot": "1.2.3",
|
||||||
|
"@radix-ui/react-use-callback-ref": "1.1.1",
|
||||||
|
"aria-hidden": "^1.2.4",
|
||||||
|
"react-remove-scroll": "^2.6.3"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-menu/node_modules/@radix-ui/react-slot": {
|
||||||
|
"version": "1.2.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
|
||||||
|
"integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-compose-refs": "1.1.2"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-popper": {
|
||||||
|
"version": "1.2.8",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz",
|
||||||
|
"integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==",
|
||||||
|
"dependencies": {
|
||||||
|
"@floating-ui/react-dom": "^2.0.0",
|
||||||
|
"@radix-ui/react-arrow": "1.1.7",
|
||||||
|
"@radix-ui/react-compose-refs": "1.1.2",
|
||||||
|
"@radix-ui/react-context": "1.1.2",
|
||||||
|
"@radix-ui/react-primitive": "2.1.3",
|
||||||
|
"@radix-ui/react-use-callback-ref": "1.1.1",
|
||||||
|
"@radix-ui/react-use-layout-effect": "1.1.1",
|
||||||
|
"@radix-ui/react-use-rect": "1.1.1",
|
||||||
|
"@radix-ui/react-use-size": "1.1.1",
|
||||||
|
"@radix-ui/rect": "1.1.1"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"@types/react-dom": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||||
|
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@types/react-dom": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@radix-ui/react-portal": {
|
"node_modules/@radix-ui/react-portal": {
|
||||||
"version": "1.1.9",
|
"version": "1.1.9",
|
||||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
|
||||||
@ -1896,6 +2068,45 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@radix-ui/react-use-rect": {
|
||||||
|
"version": "1.1.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-use-rect/-/react-use-rect-1.1.1.tgz",
|
||||||
|
"integrity": "sha512-QTYuDesS0VtuHNNvMh+CjlKJ4LJickCMUAqjlE3+j8w+RlRpwyX3apEQKGFzbZGdo7XNG1tXa+bQqIE7HIXT2w==",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/rect": "1.1.1"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/react-use-size": {
|
||||||
|
"version": "1.1.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/react-use-size/-/react-use-size-1.1.1.tgz",
|
||||||
|
"integrity": "sha512-ewrXRDTAqAXlkl6t/fkXWNAhFX9I+CkKlw6zjEwk86RSPKwZr3xpBRso655aqYafwtnbpHLj6toFzmd6xdVptQ==",
|
||||||
|
"dependencies": {
|
||||||
|
"@radix-ui/react-use-layout-effect": "1.1.1"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@types/react": "*",
|
||||||
|
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@types/react": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@radix-ui/rect": {
|
||||||
|
"version": "1.1.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/@radix-ui/rect/-/rect-1.1.1.tgz",
|
||||||
|
"integrity": "sha512-HPwpGIzkl28mWyZqG52jiqDJ12waP11Pa1lGoiyUkIEuMLBP0oeK/C89esbXrxsky5we7dfd8U58nm0SgAWpVw=="
|
||||||
|
},
|
||||||
"node_modules/@rolldown/pluginutils": {
|
"node_modules/@rolldown/pluginutils": {
|
||||||
"version": "1.0.0-beta.27",
|
"version": "1.0.0-beta.27",
|
||||||
"resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-beta.27.tgz",
|
"resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-beta.27.tgz",
|
||||||
|
|||||||
@ -18,6 +18,7 @@
|
|||||||
"pipeline:run": "tsx src/server/pipeline/run.ts"
|
"pipeline:run": "tsx src/server/pipeline/run.ts"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"@radix-ui/react-dropdown-menu": "^2.1.15",
|
||||||
"@radix-ui/react-alert-dialog": "^1.1.15",
|
"@radix-ui/react-alert-dialog": "^1.1.15",
|
||||||
"@radix-ui/react-progress": "^1.1.8",
|
"@radix-ui/react-progress": "^1.1.8",
|
||||||
"@radix-ui/react-separator": "^1.1.8",
|
"@radix-ui/react-separator": "^1.1.8",
|
||||||
|
|||||||
@ -6,10 +6,13 @@ import React, { useCallback, useEffect, useState } from "react";
|
|||||||
import { toast } from "sonner";
|
import { toast } from "sonner";
|
||||||
|
|
||||||
import { Toaster } from "@/components/ui/sonner";
|
import { Toaster } from "@/components/ui/sonner";
|
||||||
import type { Job, JobStatus } from "../shared/types";
|
import type { Job, JobSource, JobStatus } from "../shared/types";
|
||||||
import { Header, JobList, PipelineProgress, Stats } from "./components";
|
import { Header, JobList, PipelineProgress, Stats } from "./components";
|
||||||
import * as api from "./api";
|
import * as api from "./api";
|
||||||
|
|
||||||
|
const DEFAULT_PIPELINE_SOURCES: JobSource[] = ["gradcracker", "indeed", "linkedin"];
|
||||||
|
const PIPELINE_SOURCES_STORAGE_KEY = "jobops.pipeline.sources";
|
||||||
|
|
||||||
export const App: React.FC = () => {
|
export const App: React.FC = () => {
|
||||||
const [jobs, setJobs] = useState<Job[]>([]);
|
const [jobs, setJobs] = useState<Job[]>([]);
|
||||||
const [stats, setStats] = useState<Record<JobStatus, number>>({
|
const [stats, setStats] = useState<Record<JobStatus, number>>({
|
||||||
@ -24,6 +27,27 @@ export const App: React.FC = () => {
|
|||||||
const [isPipelineRunning, setIsPipelineRunning] = useState(false);
|
const [isPipelineRunning, setIsPipelineRunning] = useState(false);
|
||||||
const [processingJobId, setProcessingJobId] = useState<string | null>(null);
|
const [processingJobId, setProcessingJobId] = useState<string | null>(null);
|
||||||
const [isProcessingAll, setIsProcessingAll] = useState(false);
|
const [isProcessingAll, setIsProcessingAll] = useState(false);
|
||||||
|
const [pipelineSources, setPipelineSources] = useState<JobSource[]>(() => {
|
||||||
|
try {
|
||||||
|
const raw = localStorage.getItem(PIPELINE_SOURCES_STORAGE_KEY);
|
||||||
|
if (!raw) return DEFAULT_PIPELINE_SOURCES;
|
||||||
|
const parsed = JSON.parse(raw) as unknown;
|
||||||
|
const allowed: JobSource[] = ["gradcracker", "indeed", "linkedin"];
|
||||||
|
if (!Array.isArray(parsed)) return DEFAULT_PIPELINE_SOURCES;
|
||||||
|
const next = parsed.filter((value): value is JobSource => allowed.includes(value));
|
||||||
|
return next.length > 0 ? next : DEFAULT_PIPELINE_SOURCES;
|
||||||
|
} catch {
|
||||||
|
return DEFAULT_PIPELINE_SOURCES;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
try {
|
||||||
|
localStorage.setItem(PIPELINE_SOURCES_STORAGE_KEY, JSON.stringify(pipelineSources));
|
||||||
|
} catch {
|
||||||
|
// Ignore localStorage errors
|
||||||
|
}
|
||||||
|
}, [pipelineSources]);
|
||||||
|
|
||||||
const loadJobs = useCallback(async () => {
|
const loadJobs = useCallback(async () => {
|
||||||
try {
|
try {
|
||||||
@ -63,8 +87,10 @@ export const App: React.FC = () => {
|
|||||||
const handleRunPipeline = async () => {
|
const handleRunPipeline = async () => {
|
||||||
try {
|
try {
|
||||||
setIsPipelineRunning(true);
|
setIsPipelineRunning(true);
|
||||||
await api.runPipeline();
|
await api.runPipeline({ sources: pipelineSources });
|
||||||
toast.message("Pipeline started", { description: "This may take a few minutes." });
|
toast.message("Pipeline started", {
|
||||||
|
description: `Sources: ${pipelineSources.join(", ")}. This may take a few minutes.`,
|
||||||
|
});
|
||||||
|
|
||||||
const pollInterval = setInterval(async () => {
|
const pollInterval = setInterval(async () => {
|
||||||
try {
|
try {
|
||||||
@ -170,6 +196,8 @@ export const App: React.FC = () => {
|
|||||||
onClearDatabase={handleClearDatabase}
|
onClearDatabase={handleClearDatabase}
|
||||||
isPipelineRunning={isPipelineRunning}
|
isPipelineRunning={isPipelineRunning}
|
||||||
isLoading={isLoading}
|
isLoading={isLoading}
|
||||||
|
pipelineSources={pipelineSources}
|
||||||
|
onPipelineSourcesChange={setPipelineSources}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
<main className="container mx-auto max-w-7xl space-y-6 px-4 py-6 pb-12">
|
<main className="container mx-auto max-w-7xl space-y-6 px-4 py-6 pb-12">
|
||||||
@ -190,4 +218,3 @@ export const App: React.FC = () => {
|
|||||||
</>
|
</>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -7,6 +7,7 @@ import type {
|
|||||||
ApiResponse,
|
ApiResponse,
|
||||||
JobsListResponse,
|
JobsListResponse,
|
||||||
PipelineStatusResponse,
|
PipelineStatusResponse,
|
||||||
|
JobSource,
|
||||||
PipelineRun
|
PipelineRun
|
||||||
} from '../../shared/types';
|
} from '../../shared/types';
|
||||||
|
|
||||||
@ -83,6 +84,7 @@ export async function getPipelineRuns(): Promise<PipelineRun[]> {
|
|||||||
export async function runPipeline(config?: {
|
export async function runPipeline(config?: {
|
||||||
topN?: number;
|
topN?: number;
|
||||||
minSuitabilityScore?: number;
|
minSuitabilityScore?: number;
|
||||||
|
sources?: JobSource[];
|
||||||
}): Promise<{ message: string }> {
|
}): Promise<{ message: string }> {
|
||||||
return fetchApi<{ message: string }>('/pipeline/run', {
|
return fetchApi<{ message: string }>('/pipeline/run', {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
|
|||||||
@ -3,7 +3,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import React from "react";
|
import React from "react";
|
||||||
import { Loader2, Play, RefreshCcw, Rocket, Trash2 } from "lucide-react";
|
import { ChevronDown, Loader2, Play, RefreshCcw, Rocket, Trash2 } from "lucide-react";
|
||||||
|
|
||||||
import { Button } from "@/components/ui/button";
|
import { Button } from "@/components/ui/button";
|
||||||
import {
|
import {
|
||||||
@ -17,6 +17,16 @@ import {
|
|||||||
AlertDialogTitle,
|
AlertDialogTitle,
|
||||||
AlertDialogTrigger,
|
AlertDialogTrigger,
|
||||||
} from "@/components/ui/alert-dialog";
|
} from "@/components/ui/alert-dialog";
|
||||||
|
import {
|
||||||
|
DropdownMenu,
|
||||||
|
DropdownMenuCheckboxItem,
|
||||||
|
DropdownMenuContent,
|
||||||
|
DropdownMenuItem,
|
||||||
|
DropdownMenuLabel,
|
||||||
|
DropdownMenuSeparator,
|
||||||
|
DropdownMenuTrigger,
|
||||||
|
} from "@/components/ui/dropdown-menu";
|
||||||
|
import type { JobSource } from "../../shared/types";
|
||||||
|
|
||||||
interface HeaderProps {
|
interface HeaderProps {
|
||||||
onRunPipeline: () => void;
|
onRunPipeline: () => void;
|
||||||
@ -24,6 +34,8 @@ interface HeaderProps {
|
|||||||
onClearDatabase: () => void;
|
onClearDatabase: () => void;
|
||||||
isPipelineRunning: boolean;
|
isPipelineRunning: boolean;
|
||||||
isLoading: boolean;
|
isLoading: boolean;
|
||||||
|
pipelineSources: JobSource[];
|
||||||
|
onPipelineSourcesChange: (sources: JobSource[]) => void;
|
||||||
}
|
}
|
||||||
|
|
||||||
export const Header: React.FC<HeaderProps> = ({
|
export const Header: React.FC<HeaderProps> = ({
|
||||||
@ -32,7 +44,26 @@ export const Header: React.FC<HeaderProps> = ({
|
|||||||
onClearDatabase,
|
onClearDatabase,
|
||||||
isPipelineRunning,
|
isPipelineRunning,
|
||||||
isLoading,
|
isLoading,
|
||||||
|
pipelineSources,
|
||||||
|
onPipelineSourcesChange,
|
||||||
}) => {
|
}) => {
|
||||||
|
const sourceLabel: Record<JobSource, string> = {
|
||||||
|
gradcracker: "Gradcracker",
|
||||||
|
indeed: "Indeed",
|
||||||
|
linkedin: "LinkedIn",
|
||||||
|
};
|
||||||
|
|
||||||
|
const orderedSources: JobSource[] = ["gradcracker", "indeed", "linkedin"];
|
||||||
|
|
||||||
|
const toggleSource = (source: JobSource, checked: boolean) => {
|
||||||
|
const next = checked
|
||||||
|
? Array.from(new Set([...pipelineSources, source]))
|
||||||
|
: pipelineSources.filter((s) => s !== source);
|
||||||
|
|
||||||
|
if (next.length === 0) return;
|
||||||
|
onPipelineSourcesChange(next);
|
||||||
|
};
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<header className="sticky top-0 z-40 border-b bg-background/80 backdrop-blur supports-[backdrop-filter]:bg-background/60">
|
<header className="sticky top-0 z-40 border-b bg-background/80 backdrop-blur supports-[backdrop-filter]:bg-background/60">
|
||||||
<div className="container mx-auto flex max-w-7xl items-center justify-between gap-4 px-4 py-4">
|
<div className="container mx-auto flex max-w-7xl items-center justify-between gap-4 px-4 py-4">
|
||||||
@ -81,19 +112,62 @@ export const Header: React.FC<HeaderProps> = ({
|
|||||||
<span className="hidden sm:inline">Refresh</span>
|
<span className="hidden sm:inline">Refresh</span>
|
||||||
</Button>
|
</Button>
|
||||||
|
|
||||||
<Button size="sm" onClick={onRunPipeline} disabled={isPipelineRunning}>
|
<div className="flex items-center">
|
||||||
{isPipelineRunning ? (
|
<Button
|
||||||
<>
|
size="sm"
|
||||||
<Loader2 className="h-4 w-4 animate-spin" />
|
onClick={onRunPipeline}
|
||||||
Running...
|
disabled={isPipelineRunning}
|
||||||
</>
|
className="rounded-r-none"
|
||||||
) : (
|
>
|
||||||
<>
|
{isPipelineRunning ? (
|
||||||
<Play className="h-4 w-4" />
|
<>
|
||||||
Run Pipeline
|
<Loader2 className="h-4 w-4 animate-spin" />
|
||||||
</>
|
Running...
|
||||||
)}
|
</>
|
||||||
</Button>
|
) : (
|
||||||
|
<>
|
||||||
|
<Play className="h-4 w-4" />
|
||||||
|
Run Pipeline
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</Button>
|
||||||
|
|
||||||
|
<DropdownMenu>
|
||||||
|
<DropdownMenuTrigger asChild>
|
||||||
|
<Button
|
||||||
|
size="sm"
|
||||||
|
disabled={isPipelineRunning}
|
||||||
|
className="rounded-l-none border-l border-primary-foreground/20 px-2"
|
||||||
|
aria-label="Select pipeline sources"
|
||||||
|
>
|
||||||
|
<ChevronDown className="h-4 w-4" />
|
||||||
|
</Button>
|
||||||
|
</DropdownMenuTrigger>
|
||||||
|
<DropdownMenuContent align="end" className="w-56">
|
||||||
|
<DropdownMenuLabel>Sources</DropdownMenuLabel>
|
||||||
|
<DropdownMenuSeparator />
|
||||||
|
{orderedSources.map((source) => (
|
||||||
|
<DropdownMenuCheckboxItem
|
||||||
|
key={source}
|
||||||
|
checked={pipelineSources.includes(source)}
|
||||||
|
onCheckedChange={(checked) => toggleSource(source, Boolean(checked))}
|
||||||
|
>
|
||||||
|
{sourceLabel[source]}
|
||||||
|
</DropdownMenuCheckboxItem>
|
||||||
|
))}
|
||||||
|
<DropdownMenuSeparator />
|
||||||
|
<DropdownMenuItem onSelect={() => onPipelineSourcesChange(orderedSources)}>
|
||||||
|
All sources
|
||||||
|
</DropdownMenuItem>
|
||||||
|
<DropdownMenuItem onSelect={() => onPipelineSourcesChange(["gradcracker"])}>
|
||||||
|
Gradcracker only
|
||||||
|
</DropdownMenuItem>
|
||||||
|
<DropdownMenuItem onSelect={() => onPipelineSourcesChange(["indeed", "linkedin"])}>
|
||||||
|
Indeed + LinkedIn only
|
||||||
|
</DropdownMenuItem>
|
||||||
|
</DropdownMenuContent>
|
||||||
|
</DropdownMenu>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</header>
|
</header>
|
||||||
|
|||||||
@ -55,6 +55,8 @@ export const JobCard: React.FC<JobCardProps> = ({
|
|||||||
}) => {
|
}) => {
|
||||||
const sourceLabel: Record<Job["source"], string> = {
|
const sourceLabel: Record<Job["source"], string> = {
|
||||||
gradcracker: "Gradcracker",
|
gradcracker: "Gradcracker",
|
||||||
|
indeed: "Indeed",
|
||||||
|
linkedin: "LinkedIn",
|
||||||
};
|
};
|
||||||
|
|
||||||
const hasPdf = !!job.pdfPath;
|
const hasPdf = !!job.pdfPath;
|
||||||
|
|||||||
193
orchestrator/src/components/ui/dropdown-menu.tsx
Normal file
193
orchestrator/src/components/ui/dropdown-menu.tsx
Normal file
@ -0,0 +1,193 @@
|
|||||||
|
import * as React from "react"
|
||||||
|
import * as DropdownMenuPrimitive from "@radix-ui/react-dropdown-menu"
|
||||||
|
import { Check, ChevronRight, Circle } from "lucide-react"
|
||||||
|
|
||||||
|
import { cn } from "@/lib/utils"
|
||||||
|
|
||||||
|
const DropdownMenu = DropdownMenuPrimitive.Root
|
||||||
|
|
||||||
|
const DropdownMenuTrigger = DropdownMenuPrimitive.Trigger
|
||||||
|
|
||||||
|
const DropdownMenuGroup = DropdownMenuPrimitive.Group
|
||||||
|
|
||||||
|
const DropdownMenuPortal = DropdownMenuPrimitive.Portal
|
||||||
|
|
||||||
|
const DropdownMenuSub = DropdownMenuPrimitive.Sub
|
||||||
|
|
||||||
|
const DropdownMenuRadioGroup = DropdownMenuPrimitive.RadioGroup
|
||||||
|
|
||||||
|
const DropdownMenuSubTrigger = React.forwardRef<
|
||||||
|
React.ElementRef<typeof DropdownMenuPrimitive.SubTrigger>,
|
||||||
|
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.SubTrigger> & {
|
||||||
|
inset?: boolean
|
||||||
|
}
|
||||||
|
>(({ className, inset, children, ...props }, ref) => (
|
||||||
|
<DropdownMenuPrimitive.SubTrigger
|
||||||
|
ref={ref}
|
||||||
|
className={cn(
|
||||||
|
"flex cursor-default select-none items-center rounded-sm px-2 py-1.5 text-sm outline-none focus:bg-accent data-[state=open]:bg-accent",
|
||||||
|
inset && "pl-8",
|
||||||
|
className
|
||||||
|
)}
|
||||||
|
{...props}
|
||||||
|
>
|
||||||
|
{children}
|
||||||
|
<ChevronRight className="ml-auto h-4 w-4" />
|
||||||
|
</DropdownMenuPrimitive.SubTrigger>
|
||||||
|
))
|
||||||
|
DropdownMenuSubTrigger.displayName = DropdownMenuPrimitive.SubTrigger.displayName
|
||||||
|
|
||||||
|
const DropdownMenuSubContent = React.forwardRef<
|
||||||
|
React.ElementRef<typeof DropdownMenuPrimitive.SubContent>,
|
||||||
|
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.SubContent>
|
||||||
|
>(({ className, ...props }, ref) => (
|
||||||
|
<DropdownMenuPrimitive.SubContent
|
||||||
|
ref={ref}
|
||||||
|
className={cn(
|
||||||
|
"z-50 min-w-[8rem] overflow-hidden rounded-md border bg-popover p-1 text-popover-foreground shadow-md data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2",
|
||||||
|
className
|
||||||
|
)}
|
||||||
|
{...props}
|
||||||
|
/>
|
||||||
|
))
|
||||||
|
DropdownMenuSubContent.displayName = DropdownMenuPrimitive.SubContent.displayName
|
||||||
|
|
||||||
|
const DropdownMenuContent = React.forwardRef<
|
||||||
|
React.ElementRef<typeof DropdownMenuPrimitive.Content>,
|
||||||
|
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Content>
|
||||||
|
>(({ className, sideOffset = 4, ...props }, ref) => (
|
||||||
|
<DropdownMenuPrimitive.Portal>
|
||||||
|
<DropdownMenuPrimitive.Content
|
||||||
|
ref={ref}
|
||||||
|
sideOffset={sideOffset}
|
||||||
|
className={cn(
|
||||||
|
"z-50 min-w-[8rem] overflow-hidden rounded-md border bg-popover p-1 text-popover-foreground shadow-md data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2",
|
||||||
|
className
|
||||||
|
)}
|
||||||
|
{...props}
|
||||||
|
/>
|
||||||
|
</DropdownMenuPrimitive.Portal>
|
||||||
|
))
|
||||||
|
DropdownMenuContent.displayName = DropdownMenuPrimitive.Content.displayName
|
||||||
|
|
||||||
|
const DropdownMenuItem = React.forwardRef<
|
||||||
|
React.ElementRef<typeof DropdownMenuPrimitive.Item>,
|
||||||
|
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Item> & {
|
||||||
|
inset?: boolean
|
||||||
|
}
|
||||||
|
>(({ className, inset, ...props }, ref) => (
|
||||||
|
<DropdownMenuPrimitive.Item
|
||||||
|
ref={ref}
|
||||||
|
className={cn(
|
||||||
|
"relative flex cursor-default select-none items-center rounded-sm px-2 py-1.5 text-sm outline-none transition-colors focus:bg-accent focus:text-accent-foreground data-[disabled]:pointer-events-none data-[disabled]:opacity-50",
|
||||||
|
inset && "pl-8",
|
||||||
|
className
|
||||||
|
)}
|
||||||
|
{...props}
|
||||||
|
/>
|
||||||
|
))
|
||||||
|
DropdownMenuItem.displayName = DropdownMenuPrimitive.Item.displayName
|
||||||
|
|
||||||
|
const DropdownMenuCheckboxItem = React.forwardRef<
|
||||||
|
React.ElementRef<typeof DropdownMenuPrimitive.CheckboxItem>,
|
||||||
|
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.CheckboxItem>
|
||||||
|
>(({ className, children, checked, ...props }, ref) => (
|
||||||
|
<DropdownMenuPrimitive.CheckboxItem
|
||||||
|
ref={ref}
|
||||||
|
className={cn(
|
||||||
|
"relative flex cursor-default select-none items-center rounded-sm py-1.5 pl-8 pr-2 text-sm outline-none transition-colors focus:bg-accent focus:text-accent-foreground data-[disabled]:pointer-events-none data-[disabled]:opacity-50",
|
||||||
|
className
|
||||||
|
)}
|
||||||
|
checked={checked}
|
||||||
|
{...props}
|
||||||
|
>
|
||||||
|
<span className="absolute left-2 flex h-3.5 w-3.5 items-center justify-center">
|
||||||
|
<DropdownMenuPrimitive.ItemIndicator>
|
||||||
|
<Check className="h-4 w-4" />
|
||||||
|
</DropdownMenuPrimitive.ItemIndicator>
|
||||||
|
</span>
|
||||||
|
{children}
|
||||||
|
</DropdownMenuPrimitive.CheckboxItem>
|
||||||
|
))
|
||||||
|
DropdownMenuCheckboxItem.displayName =
|
||||||
|
DropdownMenuPrimitive.CheckboxItem.displayName
|
||||||
|
|
||||||
|
const DropdownMenuRadioItem = React.forwardRef<
|
||||||
|
React.ElementRef<typeof DropdownMenuPrimitive.RadioItem>,
|
||||||
|
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.RadioItem>
|
||||||
|
>(({ className, children, ...props }, ref) => (
|
||||||
|
<DropdownMenuPrimitive.RadioItem
|
||||||
|
ref={ref}
|
||||||
|
className={cn(
|
||||||
|
"relative flex cursor-default select-none items-center rounded-sm py-1.5 pl-8 pr-2 text-sm outline-none transition-colors focus:bg-accent focus:text-accent-foreground data-[disabled]:pointer-events-none data-[disabled]:opacity-50",
|
||||||
|
className
|
||||||
|
)}
|
||||||
|
{...props}
|
||||||
|
>
|
||||||
|
<span className="absolute left-2 flex h-3.5 w-3.5 items-center justify-center">
|
||||||
|
<DropdownMenuPrimitive.ItemIndicator>
|
||||||
|
<Circle className="h-2 w-2 fill-current" />
|
||||||
|
</DropdownMenuPrimitive.ItemIndicator>
|
||||||
|
</span>
|
||||||
|
{children}
|
||||||
|
</DropdownMenuPrimitive.RadioItem>
|
||||||
|
))
|
||||||
|
DropdownMenuRadioItem.displayName = DropdownMenuPrimitive.RadioItem.displayName
|
||||||
|
|
||||||
|
const DropdownMenuLabel = React.forwardRef<
|
||||||
|
React.ElementRef<typeof DropdownMenuPrimitive.Label>,
|
||||||
|
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Label> & {
|
||||||
|
inset?: boolean
|
||||||
|
}
|
||||||
|
>(({ className, inset, ...props }, ref) => (
|
||||||
|
<DropdownMenuPrimitive.Label
|
||||||
|
ref={ref}
|
||||||
|
className={cn("px-2 py-1.5 text-sm font-semibold", inset && "pl-8", className)}
|
||||||
|
{...props}
|
||||||
|
/>
|
||||||
|
))
|
||||||
|
DropdownMenuLabel.displayName = DropdownMenuPrimitive.Label.displayName
|
||||||
|
|
||||||
|
const DropdownMenuSeparator = React.forwardRef<
|
||||||
|
React.ElementRef<typeof DropdownMenuPrimitive.Separator>,
|
||||||
|
React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Separator>
|
||||||
|
>(({ className, ...props }, ref) => (
|
||||||
|
<DropdownMenuPrimitive.Separator
|
||||||
|
ref={ref}
|
||||||
|
className={cn("-mx-1 my-1 h-px bg-muted", className)}
|
||||||
|
{...props}
|
||||||
|
/>
|
||||||
|
))
|
||||||
|
DropdownMenuSeparator.displayName = DropdownMenuPrimitive.Separator.displayName
|
||||||
|
|
||||||
|
const DropdownMenuShortcut = ({
|
||||||
|
className,
|
||||||
|
...props
|
||||||
|
}: React.HTMLAttributes<HTMLSpanElement>) => {
|
||||||
|
return (
|
||||||
|
<span
|
||||||
|
className={cn("ml-auto text-xs tracking-widest opacity-60", className)}
|
||||||
|
{...props}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
DropdownMenuShortcut.displayName = "DropdownMenuShortcut"
|
||||||
|
|
||||||
|
export {
|
||||||
|
DropdownMenu,
|
||||||
|
DropdownMenuTrigger,
|
||||||
|
DropdownMenuContent,
|
||||||
|
DropdownMenuItem,
|
||||||
|
DropdownMenuCheckboxItem,
|
||||||
|
DropdownMenuRadioItem,
|
||||||
|
DropdownMenuLabel,
|
||||||
|
DropdownMenuSeparator,
|
||||||
|
DropdownMenuShortcut,
|
||||||
|
DropdownMenuGroup,
|
||||||
|
DropdownMenuPortal,
|
||||||
|
DropdownMenuSub,
|
||||||
|
DropdownMenuSubContent,
|
||||||
|
DropdownMenuSubTrigger,
|
||||||
|
DropdownMenuRadioGroup,
|
||||||
|
}
|
||||||
|
|
||||||
@ -280,6 +280,7 @@ apiRouter.get('/pipeline/runs', async (req: Request, res: Response) => {
|
|||||||
const runPipelineSchema = z.object({
|
const runPipelineSchema = z.object({
|
||||||
topN: z.number().min(1).max(50).optional(),
|
topN: z.number().min(1).max(50).optional(),
|
||||||
minSuitabilityScore: z.number().min(0).max(100).optional(),
|
minSuitabilityScore: z.number().min(0).max(100).optional(),
|
||||||
|
sources: z.array(z.enum(['gradcracker', 'indeed', 'linkedin'])).min(1).optional(),
|
||||||
});
|
});
|
||||||
|
|
||||||
apiRouter.post('/pipeline/run', async (req: Request, res: Response) => {
|
apiRouter.post('/pipeline/run', async (req: Request, res: Response) => {
|
||||||
|
|||||||
@ -26,6 +26,33 @@ const migrations = [
|
|||||||
`CREATE TABLE IF NOT EXISTS jobs (
|
`CREATE TABLE IF NOT EXISTS jobs (
|
||||||
id TEXT PRIMARY KEY,
|
id TEXT PRIMARY KEY,
|
||||||
source TEXT NOT NULL DEFAULT 'gradcracker',
|
source TEXT NOT NULL DEFAULT 'gradcracker',
|
||||||
|
source_job_id TEXT,
|
||||||
|
job_url_direct TEXT,
|
||||||
|
date_posted TEXT,
|
||||||
|
job_type TEXT,
|
||||||
|
salary_source TEXT,
|
||||||
|
salary_interval TEXT,
|
||||||
|
salary_min_amount REAL,
|
||||||
|
salary_max_amount REAL,
|
||||||
|
salary_currency TEXT,
|
||||||
|
is_remote INTEGER,
|
||||||
|
job_level TEXT,
|
||||||
|
job_function TEXT,
|
||||||
|
listing_type TEXT,
|
||||||
|
emails TEXT,
|
||||||
|
company_industry TEXT,
|
||||||
|
company_logo TEXT,
|
||||||
|
company_url_direct TEXT,
|
||||||
|
company_addresses TEXT,
|
||||||
|
company_num_employees TEXT,
|
||||||
|
company_revenue TEXT,
|
||||||
|
company_description TEXT,
|
||||||
|
skills TEXT,
|
||||||
|
experience_range TEXT,
|
||||||
|
company_rating REAL,
|
||||||
|
company_reviews_count INTEGER,
|
||||||
|
vacancy_count INTEGER,
|
||||||
|
work_from_home_type TEXT,
|
||||||
title TEXT NOT NULL,
|
title TEXT NOT NULL,
|
||||||
employer TEXT NOT NULL,
|
employer TEXT NOT NULL,
|
||||||
employer_url TEXT,
|
employer_url TEXT,
|
||||||
@ -65,6 +92,35 @@ const migrations = [
|
|||||||
`ALTER TABLE jobs ADD COLUMN source TEXT NOT NULL DEFAULT 'gradcracker'`,
|
`ALTER TABLE jobs ADD COLUMN source TEXT NOT NULL DEFAULT 'gradcracker'`,
|
||||||
`UPDATE jobs SET source = 'gradcracker' WHERE source IS NULL OR source = ''`,
|
`UPDATE jobs SET source = 'gradcracker' WHERE source IS NULL OR source = ''`,
|
||||||
|
|
||||||
|
// Add JobSpy columns for existing databases (safe to skip if already present)
|
||||||
|
`ALTER TABLE jobs ADD COLUMN source_job_id TEXT`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN job_url_direct TEXT`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN date_posted TEXT`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN job_type TEXT`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN salary_source TEXT`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN salary_interval TEXT`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN salary_min_amount REAL`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN salary_max_amount REAL`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN salary_currency TEXT`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN is_remote INTEGER`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN job_level TEXT`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN job_function TEXT`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN listing_type TEXT`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN emails TEXT`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN company_industry TEXT`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN company_logo TEXT`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN company_url_direct TEXT`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN company_addresses TEXT`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN company_num_employees TEXT`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN company_revenue TEXT`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN company_description TEXT`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN skills TEXT`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN experience_range TEXT`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN company_rating REAL`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN company_reviews_count INTEGER`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN vacancy_count INTEGER`,
|
||||||
|
`ALTER TABLE jobs ADD COLUMN work_from_home_type TEXT`,
|
||||||
|
|
||||||
`CREATE INDEX IF NOT EXISTS idx_jobs_status ON jobs(status)`,
|
`CREATE INDEX IF NOT EXISTS idx_jobs_status ON jobs(status)`,
|
||||||
`CREATE INDEX IF NOT EXISTS idx_jobs_discovered_at ON jobs(discovered_at)`,
|
`CREATE INDEX IF NOT EXISTS idx_jobs_discovered_at ON jobs(discovered_at)`,
|
||||||
`CREATE INDEX IF NOT EXISTS idx_pipeline_runs_started_at ON pipeline_runs(started_at)`,
|
`CREATE INDEX IF NOT EXISTS idx_pipeline_runs_started_at ON pipeline_runs(started_at)`,
|
||||||
@ -78,12 +134,12 @@ for (const migration of migrations) {
|
|||||||
console.log('✅ Migration applied');
|
console.log('✅ Migration applied');
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
const message = error instanceof Error ? error.message : String(error);
|
const message = error instanceof Error ? error.message : String(error);
|
||||||
const isDuplicateSourceColumn =
|
const isDuplicateColumn =
|
||||||
migration.includes('ALTER TABLE jobs ADD COLUMN source') &&
|
migration.toLowerCase().includes('alter table jobs add column') &&
|
||||||
message.toLowerCase().includes('duplicate column name');
|
message.toLowerCase().includes('duplicate column name');
|
||||||
|
|
||||||
if (isDuplicateSourceColumn) {
|
if (isDuplicateColumn) {
|
||||||
console.log('↩️ Migration skipped (source column already exists)');
|
console.log('↩️ Migration skipped (column already exists)');
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -9,7 +9,10 @@ export const jobs = sqliteTable('jobs', {
|
|||||||
id: text('id').primaryKey(),
|
id: text('id').primaryKey(),
|
||||||
|
|
||||||
// From crawler
|
// From crawler
|
||||||
source: text('source', { enum: ['gradcracker'] }).notNull().default('gradcracker'),
|
source: text('source', { enum: ['gradcracker', 'indeed', 'linkedin'] }).notNull().default('gradcracker'),
|
||||||
|
sourceJobId: text('source_job_id'),
|
||||||
|
jobUrlDirect: text('job_url_direct'),
|
||||||
|
datePosted: text('date_posted'),
|
||||||
title: text('title').notNull(),
|
title: text('title').notNull(),
|
||||||
employer: text('employer').notNull(),
|
employer: text('employer').notNull(),
|
||||||
employerUrl: text('employer_url'),
|
employerUrl: text('employer_url'),
|
||||||
@ -22,6 +25,32 @@ export const jobs = sqliteTable('jobs', {
|
|||||||
degreeRequired: text('degree_required'),
|
degreeRequired: text('degree_required'),
|
||||||
starting: text('starting'),
|
starting: text('starting'),
|
||||||
jobDescription: text('job_description'),
|
jobDescription: text('job_description'),
|
||||||
|
|
||||||
|
// JobSpy fields (nullable for other sources)
|
||||||
|
jobType: text('job_type'),
|
||||||
|
salarySource: text('salary_source'),
|
||||||
|
salaryInterval: text('salary_interval'),
|
||||||
|
salaryMinAmount: real('salary_min_amount'),
|
||||||
|
salaryMaxAmount: real('salary_max_amount'),
|
||||||
|
salaryCurrency: text('salary_currency'),
|
||||||
|
isRemote: integer('is_remote', { mode: 'boolean' }),
|
||||||
|
jobLevel: text('job_level'),
|
||||||
|
jobFunction: text('job_function'),
|
||||||
|
listingType: text('listing_type'),
|
||||||
|
emails: text('emails'),
|
||||||
|
companyIndustry: text('company_industry'),
|
||||||
|
companyLogo: text('company_logo'),
|
||||||
|
companyUrlDirect: text('company_url_direct'),
|
||||||
|
companyAddresses: text('company_addresses'),
|
||||||
|
companyNumEmployees: text('company_num_employees'),
|
||||||
|
companyRevenue: text('company_revenue'),
|
||||||
|
companyDescription: text('company_description'),
|
||||||
|
skills: text('skills'),
|
||||||
|
experienceRange: text('experience_range'),
|
||||||
|
companyRating: real('company_rating'),
|
||||||
|
companyReviewsCount: integer('company_reviews_count'),
|
||||||
|
vacancyCount: integer('vacancy_count'),
|
||||||
|
workFromHomeType: text('work_from_home_type'),
|
||||||
|
|
||||||
// Orchestrator enrichments
|
// Orchestrator enrichments
|
||||||
status: text('status', {
|
status: text('status', {
|
||||||
|
|||||||
@ -14,13 +14,14 @@ import { readFile } from 'fs/promises';
|
|||||||
import { join, dirname } from 'path';
|
import { join, dirname } from 'path';
|
||||||
import { fileURLToPath } from 'url';
|
import { fileURLToPath } from 'url';
|
||||||
import { runCrawler } from '../services/crawler.js';
|
import { runCrawler } from '../services/crawler.js';
|
||||||
|
import { runJobSpy } from '../services/jobspy.js';
|
||||||
import { scoreAndRankJobs, scoreJobSuitability } from '../services/scorer.js';
|
import { scoreAndRankJobs, scoreJobSuitability } from '../services/scorer.js';
|
||||||
import { generateSummary } from '../services/summary.js';
|
import { generateSummary } from '../services/summary.js';
|
||||||
import { generatePdf } from '../services/pdf.js';
|
import { generatePdf } from '../services/pdf.js';
|
||||||
import * as jobsRepo from '../repositories/jobs.js';
|
import * as jobsRepo from '../repositories/jobs.js';
|
||||||
import * as pipelineRepo from '../repositories/pipeline.js';
|
import * as pipelineRepo from '../repositories/pipeline.js';
|
||||||
import { progressHelpers, resetProgress } from './progress.js';
|
import { progressHelpers, resetProgress, updateProgress } from './progress.js';
|
||||||
import type { Job, PipelineConfig } from '../../shared/types.js';
|
import type { CreateJobInput, Job, JobSource, PipelineConfig } from '../../shared/types.js';
|
||||||
|
|
||||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||||
const DEFAULT_PROFILE_PATH = join(__dirname, '../../../../resume-generator/base.json');
|
const DEFAULT_PROFILE_PATH = join(__dirname, '../../../../resume-generator/base.json');
|
||||||
@ -28,7 +29,7 @@ const DEFAULT_PROFILE_PATH = join(__dirname, '../../../../resume-generator/base.
|
|||||||
const DEFAULT_CONFIG: PipelineConfig = {
|
const DEFAULT_CONFIG: PipelineConfig = {
|
||||||
topN: 10,
|
topN: 10,
|
||||||
minSuitabilityScore: 50,
|
minSuitabilityScore: 50,
|
||||||
sources: ['gradcracker'],
|
sources: ['gradcracker', 'indeed', 'linkedin'],
|
||||||
profilePath: DEFAULT_PROFILE_PATH,
|
profilePath: DEFAULT_PROFILE_PATH,
|
||||||
outputDir: join(__dirname, '../../../data/pdfs'),
|
outputDir: join(__dirname, '../../../data/pdfs'),
|
||||||
};
|
};
|
||||||
@ -73,31 +74,65 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
|
|||||||
console.log('\n🕷️ Running crawler...');
|
console.log('\n🕷️ Running crawler...');
|
||||||
progressHelpers.startCrawling();
|
progressHelpers.startCrawling();
|
||||||
const existingJobUrls = await jobsRepo.getAllJobUrls();
|
const existingJobUrls = await jobsRepo.getAllJobUrls();
|
||||||
const crawlerResult = await runCrawler({
|
|
||||||
existingJobUrls,
|
const discoveredJobs: CreateJobInput[] = [];
|
||||||
onProgress: (update) => {
|
const sourceErrors: string[] = [];
|
||||||
progressHelpers.crawlingUpdate({
|
|
||||||
listPagesProcessed: update.listPagesProcessed,
|
if (mergedConfig.sources.includes('gradcracker')) {
|
||||||
listPagesTotal: update.listPagesTotal,
|
const crawlerResult = await runCrawler({
|
||||||
jobCardsFound: update.jobCardsFound,
|
existingJobUrls,
|
||||||
jobPagesEnqueued: update.jobPagesEnqueued,
|
onProgress: (update) => {
|
||||||
jobPagesSkipped: update.jobPagesSkipped,
|
progressHelpers.crawlingUpdate({
|
||||||
jobPagesProcessed: update.jobPagesProcessed,
|
listPagesProcessed: update.listPagesProcessed,
|
||||||
phase: update.phase,
|
listPagesTotal: update.listPagesTotal,
|
||||||
currentUrl: update.currentUrl,
|
jobCardsFound: update.jobCardsFound,
|
||||||
});
|
jobPagesEnqueued: update.jobPagesEnqueued,
|
||||||
},
|
jobPagesSkipped: update.jobPagesSkipped,
|
||||||
});
|
jobPagesProcessed: update.jobPagesProcessed,
|
||||||
|
phase: update.phase,
|
||||||
if (!crawlerResult.success) {
|
currentUrl: update.currentUrl,
|
||||||
throw new Error(`Crawler failed: ${crawlerResult.error}`);
|
});
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!crawlerResult.success) {
|
||||||
|
sourceErrors.push(`gradcracker: ${crawlerResult.error ?? 'unknown error'}`);
|
||||||
|
} else {
|
||||||
|
discoveredJobs.push(...crawlerResult.jobs);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
progressHelpers.crawlingComplete(crawlerResult.jobs.length);
|
const jobSpySites = mergedConfig.sources.filter(
|
||||||
|
(s): s is 'indeed' | 'linkedin' => s === 'indeed' || s === 'linkedin'
|
||||||
|
);
|
||||||
|
|
||||||
|
if (jobSpySites.length > 0) {
|
||||||
|
updateProgress({
|
||||||
|
step: 'crawling',
|
||||||
|
detail: `JobSpy: scraping ${jobSpySites.join(', ')}...`,
|
||||||
|
});
|
||||||
|
|
||||||
|
const jobSpyResult = await runJobSpy({ sites: jobSpySites });
|
||||||
|
if (!jobSpyResult.success) {
|
||||||
|
sourceErrors.push(`jobspy: ${jobSpyResult.error ?? 'unknown error'}`);
|
||||||
|
} else {
|
||||||
|
discoveredJobs.push(...jobSpyResult.jobs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (discoveredJobs.length === 0 && sourceErrors.length > 0) {
|
||||||
|
throw new Error(`All sources failed: ${sourceErrors.join('; ')}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sourceErrors.length > 0) {
|
||||||
|
console.warn(`ƒsÿ‹,? Some sources failed: ${sourceErrors.join('; ')}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
progressHelpers.crawlingComplete(discoveredJobs.length);
|
||||||
|
|
||||||
// Step 3: Import discovered jobs
|
// Step 3: Import discovered jobs
|
||||||
console.log('\n💾 Importing jobs to database...');
|
console.log('\n💾 Importing jobs to database...');
|
||||||
const { created, skipped } = await jobsRepo.bulkCreateJobs(crawlerResult.jobs);
|
const { created, skipped } = await jobsRepo.bulkCreateJobs(discoveredJobs);
|
||||||
console.log(` Created: ${created}, Skipped (duplicates): ${skipped}`);
|
console.log(` Created: ${created}, Skipped (duplicates): ${skipped}`);
|
||||||
|
|
||||||
progressHelpers.importComplete(created, skipped);
|
progressHelpers.importComplete(created, skipped);
|
||||||
|
|||||||
@ -61,6 +61,9 @@ export async function createJob(input: CreateJobInput): Promise<Job> {
|
|||||||
await db.insert(jobs).values({
|
await db.insert(jobs).values({
|
||||||
id,
|
id,
|
||||||
source: input.source,
|
source: input.source,
|
||||||
|
sourceJobId: input.sourceJobId ?? null,
|
||||||
|
jobUrlDirect: input.jobUrlDirect ?? null,
|
||||||
|
datePosted: input.datePosted ?? null,
|
||||||
title: input.title,
|
title: input.title,
|
||||||
employer: input.employer,
|
employer: input.employer,
|
||||||
employerUrl: input.employerUrl ?? null,
|
employerUrl: input.employerUrl ?? null,
|
||||||
@ -73,6 +76,30 @@ export async function createJob(input: CreateJobInput): Promise<Job> {
|
|||||||
degreeRequired: input.degreeRequired ?? null,
|
degreeRequired: input.degreeRequired ?? null,
|
||||||
starting: input.starting ?? null,
|
starting: input.starting ?? null,
|
||||||
jobDescription: input.jobDescription ?? null,
|
jobDescription: input.jobDescription ?? null,
|
||||||
|
jobType: input.jobType ?? null,
|
||||||
|
salarySource: input.salarySource ?? null,
|
||||||
|
salaryInterval: input.salaryInterval ?? null,
|
||||||
|
salaryMinAmount: input.salaryMinAmount ?? null,
|
||||||
|
salaryMaxAmount: input.salaryMaxAmount ?? null,
|
||||||
|
salaryCurrency: input.salaryCurrency ?? null,
|
||||||
|
isRemote: input.isRemote ?? null,
|
||||||
|
jobLevel: input.jobLevel ?? null,
|
||||||
|
jobFunction: input.jobFunction ?? null,
|
||||||
|
listingType: input.listingType ?? null,
|
||||||
|
emails: input.emails ?? null,
|
||||||
|
companyIndustry: input.companyIndustry ?? null,
|
||||||
|
companyLogo: input.companyLogo ?? null,
|
||||||
|
companyUrlDirect: input.companyUrlDirect ?? null,
|
||||||
|
companyAddresses: input.companyAddresses ?? null,
|
||||||
|
companyNumEmployees: input.companyNumEmployees ?? null,
|
||||||
|
companyRevenue: input.companyRevenue ?? null,
|
||||||
|
companyDescription: input.companyDescription ?? null,
|
||||||
|
skills: input.skills ?? null,
|
||||||
|
experienceRange: input.experienceRange ?? null,
|
||||||
|
companyRating: input.companyRating ?? null,
|
||||||
|
companyReviewsCount: input.companyReviewsCount ?? null,
|
||||||
|
vacancyCount: input.vacancyCount ?? null,
|
||||||
|
workFromHomeType: input.workFromHomeType ?? null,
|
||||||
status: 'discovered',
|
status: 'discovered',
|
||||||
discoveredAt: now,
|
discoveredAt: now,
|
||||||
createdAt: now,
|
createdAt: now,
|
||||||
@ -173,6 +200,9 @@ function mapRowToJob(row: typeof jobs.$inferSelect): Job {
|
|||||||
return {
|
return {
|
||||||
id: row.id,
|
id: row.id,
|
||||||
source: row.source as Job['source'],
|
source: row.source as Job['source'],
|
||||||
|
sourceJobId: row.sourceJobId ?? null,
|
||||||
|
jobUrlDirect: row.jobUrlDirect ?? null,
|
||||||
|
datePosted: row.datePosted ?? null,
|
||||||
title: row.title,
|
title: row.title,
|
||||||
employer: row.employer,
|
employer: row.employer,
|
||||||
employerUrl: row.employerUrl,
|
employerUrl: row.employerUrl,
|
||||||
@ -191,6 +221,30 @@ function mapRowToJob(row: typeof jobs.$inferSelect): Job {
|
|||||||
tailoredSummary: row.tailoredSummary,
|
tailoredSummary: row.tailoredSummary,
|
||||||
pdfPath: row.pdfPath,
|
pdfPath: row.pdfPath,
|
||||||
notionPageId: row.notionPageId,
|
notionPageId: row.notionPageId,
|
||||||
|
jobType: row.jobType ?? null,
|
||||||
|
salarySource: row.salarySource ?? null,
|
||||||
|
salaryInterval: row.salaryInterval ?? null,
|
||||||
|
salaryMinAmount: row.salaryMinAmount ?? null,
|
||||||
|
salaryMaxAmount: row.salaryMaxAmount ?? null,
|
||||||
|
salaryCurrency: row.salaryCurrency ?? null,
|
||||||
|
isRemote: row.isRemote ?? null,
|
||||||
|
jobLevel: row.jobLevel ?? null,
|
||||||
|
jobFunction: row.jobFunction ?? null,
|
||||||
|
listingType: row.listingType ?? null,
|
||||||
|
emails: row.emails ?? null,
|
||||||
|
companyIndustry: row.companyIndustry ?? null,
|
||||||
|
companyLogo: row.companyLogo ?? null,
|
||||||
|
companyUrlDirect: row.companyUrlDirect ?? null,
|
||||||
|
companyAddresses: row.companyAddresses ?? null,
|
||||||
|
companyNumEmployees: row.companyNumEmployees ?? null,
|
||||||
|
companyRevenue: row.companyRevenue ?? null,
|
||||||
|
companyDescription: row.companyDescription ?? null,
|
||||||
|
skills: row.skills ?? null,
|
||||||
|
experienceRange: row.experienceRange ?? null,
|
||||||
|
companyRating: row.companyRating ?? null,
|
||||||
|
companyReviewsCount: row.companyReviewsCount ?? null,
|
||||||
|
vacancyCount: row.vacancyCount ?? null,
|
||||||
|
workFromHomeType: row.workFromHomeType ?? null,
|
||||||
discoveredAt: row.discoveredAt,
|
discoveredAt: row.discoveredAt,
|
||||||
processedAt: row.processedAt,
|
processedAt: row.processedAt,
|
||||||
appliedAt: row.appliedAt,
|
appliedAt: row.appliedAt,
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
export * from './crawler.js';
|
export * from './crawler.js';
|
||||||
|
export * from './jobspy.js';
|
||||||
export * from './scorer.js';
|
export * from './scorer.js';
|
||||||
export * from './summary.js';
|
export * from './summary.js';
|
||||||
export * from './pdf.js';
|
export * from './pdf.js';
|
||||||
|
|||||||
241
orchestrator/src/server/services/jobspy.ts
Normal file
241
orchestrator/src/server/services/jobspy.ts
Normal file
@ -0,0 +1,241 @@
|
|||||||
|
/**
|
||||||
|
* Service for scraping jobs via JobSpy (Indeed/LinkedIn/etc) and mapping them into our DB shape.
|
||||||
|
*
|
||||||
|
* Uses a small Python wrapper script that writes both CSV + JSON to disk; we ingest the JSON.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { spawn } from 'child_process';
|
||||||
|
import { readFile, mkdir } from 'fs/promises';
|
||||||
|
import { join, dirname } from 'path';
|
||||||
|
import { fileURLToPath } from 'url';
|
||||||
|
import type { CreateJobInput, JobSource } from '../../shared/types.js';
|
||||||
|
|
||||||
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||||
|
const JOBSPY_DIR = join(__dirname, '../../../../jobspy-extractor');
|
||||||
|
const JOBSPY_SCRIPT = join(JOBSPY_DIR, 'scrape_jobs.py');
|
||||||
|
|
||||||
|
function getPythonPath(): string {
|
||||||
|
if (process.env.PYTHON_PATH) return process.env.PYTHON_PATH;
|
||||||
|
return process.platform === 'win32' ? 'python' : 'python3';
|
||||||
|
}
|
||||||
|
|
||||||
|
function getDataDir(): string {
|
||||||
|
if (process.env.DATA_DIR) return process.env.DATA_DIR;
|
||||||
|
return join(__dirname, '../../../data');
|
||||||
|
}
|
||||||
|
|
||||||
|
function toStringOrNull(value: unknown): string | null {
|
||||||
|
if (value === null || value === undefined) return null;
|
||||||
|
if (typeof value === 'string') {
|
||||||
|
const trimmed = value.trim();
|
||||||
|
return trimmed.length > 0 ? trimmed : null;
|
||||||
|
}
|
||||||
|
if (typeof value === 'number' || typeof value === 'boolean') return String(value);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function toNumberOrNull(value: unknown): number | null {
|
||||||
|
if (value === null || value === undefined) return null;
|
||||||
|
if (typeof value === 'number') return Number.isFinite(value) ? value : null;
|
||||||
|
if (typeof value === 'string') {
|
||||||
|
const trimmed = value.trim();
|
||||||
|
if (!trimmed) return null;
|
||||||
|
const parsed = Number(trimmed);
|
||||||
|
return Number.isFinite(parsed) ? parsed : null;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function toBooleanOrNull(value: unknown): boolean | null {
|
||||||
|
if (value === null || value === undefined) return null;
|
||||||
|
if (typeof value === 'boolean') return value;
|
||||||
|
if (typeof value === 'number') return value !== 0;
|
||||||
|
if (typeof value === 'string') {
|
||||||
|
const normalized = value.trim().toLowerCase();
|
||||||
|
if (!normalized) return null;
|
||||||
|
if (['1', 'true', 'yes', 'y', 'on'].includes(normalized)) return true;
|
||||||
|
if (['0', 'false', 'no', 'n', 'off'].includes(normalized)) return false;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function toJsonStringOrNull(value: unknown): string | null {
|
||||||
|
if (value === null || value === undefined) return null;
|
||||||
|
if (typeof value === 'string') return toStringOrNull(value);
|
||||||
|
try {
|
||||||
|
return JSON.stringify(value);
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function toJobSource(site: unknown): JobSource | null {
|
||||||
|
const raw = toStringOrNull(site)?.toLowerCase();
|
||||||
|
if (raw === 'gradcracker') return 'gradcracker';
|
||||||
|
if (raw === 'indeed') return 'indeed';
|
||||||
|
if (raw === 'linkedin') return 'linkedin';
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatSalary(params: {
|
||||||
|
minAmount: number | null;
|
||||||
|
maxAmount: number | null;
|
||||||
|
currency: string | null;
|
||||||
|
interval: string | null;
|
||||||
|
}): string | null {
|
||||||
|
const { minAmount, maxAmount, currency, interval } = params;
|
||||||
|
if (minAmount === null && maxAmount === null) return null;
|
||||||
|
|
||||||
|
const fmt = (n: number) => {
|
||||||
|
// Avoid locale ambiguity; keep it simple.
|
||||||
|
const rounded = Math.round(n);
|
||||||
|
return `${rounded}`;
|
||||||
|
};
|
||||||
|
|
||||||
|
let range: string;
|
||||||
|
if (minAmount !== null && maxAmount !== null) {
|
||||||
|
range = `${fmt(minAmount)}-${fmt(maxAmount)}`;
|
||||||
|
} else if (minAmount !== null) {
|
||||||
|
range = `${fmt(minAmount)}+`;
|
||||||
|
} else if (maxAmount !== null) {
|
||||||
|
range = `${fmt(maxAmount)}`;
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const currencyPart = currency ? `${currency} ` : '';
|
||||||
|
const intervalPart = interval ? ` / ${interval}` : '';
|
||||||
|
return `${currencyPart}${range}${intervalPart}`.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface RunJobSpyOptions {
|
||||||
|
sites?: Array<JobSource>;
|
||||||
|
searchTerm?: string;
|
||||||
|
location?: string;
|
||||||
|
resultsWanted?: number;
|
||||||
|
hoursOld?: number;
|
||||||
|
countryIndeed?: string;
|
||||||
|
linkedinFetchDescription?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface JobSpyResult {
|
||||||
|
success: boolean;
|
||||||
|
jobs: CreateJobInput[];
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function runJobSpy(options: RunJobSpyOptions = {}): Promise<JobSpyResult> {
|
||||||
|
const dataDir = getDataDir();
|
||||||
|
const outputDir = join(dataDir, 'imports');
|
||||||
|
await mkdir(outputDir, { recursive: true });
|
||||||
|
|
||||||
|
const outputCsv = join(outputDir, 'jobspy_jobs.csv');
|
||||||
|
const outputJson = join(outputDir, 'jobspy_jobs.json');
|
||||||
|
|
||||||
|
const sites = (options.sites ?? ['indeed', 'linkedin'])
|
||||||
|
.filter((s) => s === 'indeed' || s === 'linkedin')
|
||||||
|
.join(',');
|
||||||
|
|
||||||
|
try {
|
||||||
|
await new Promise<void>((resolve, reject) => {
|
||||||
|
const pythonPath = getPythonPath();
|
||||||
|
const child = spawn(pythonPath, [JOBSPY_SCRIPT], {
|
||||||
|
cwd: JOBSPY_DIR,
|
||||||
|
shell: false,
|
||||||
|
stdio: 'inherit',
|
||||||
|
env: {
|
||||||
|
...process.env,
|
||||||
|
JOBSPY_SITES: sites || 'indeed,linkedin',
|
||||||
|
JOBSPY_SEARCH_TERM: options.searchTerm ?? process.env.JOBSPY_SEARCH_TERM ?? 'web developer',
|
||||||
|
JOBSPY_LOCATION: options.location ?? process.env.JOBSPY_LOCATION ?? 'UK',
|
||||||
|
JOBSPY_RESULTS_WANTED: String(options.resultsWanted ?? process.env.JOBSPY_RESULTS_WANTED ?? 200),
|
||||||
|
JOBSPY_HOURS_OLD: String(options.hoursOld ?? process.env.JOBSPY_HOURS_OLD ?? 72),
|
||||||
|
JOBSPY_COUNTRY_INDEED: options.countryIndeed ?? process.env.JOBSPY_COUNTRY_INDEED ?? 'UK',
|
||||||
|
JOBSPY_LINKEDIN_FETCH_DESCRIPTION: String(
|
||||||
|
options.linkedinFetchDescription ?? process.env.JOBSPY_LINKEDIN_FETCH_DESCRIPTION ?? '1'
|
||||||
|
),
|
||||||
|
JOBSPY_OUTPUT_CSV: outputCsv,
|
||||||
|
JOBSPY_OUTPUT_JSON: outputJson,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
child.on('close', (code) => {
|
||||||
|
if (code === 0) resolve();
|
||||||
|
else reject(new Error(`JobSpy exited with code ${code}`));
|
||||||
|
});
|
||||||
|
child.on('error', reject);
|
||||||
|
});
|
||||||
|
|
||||||
|
const raw = await readFile(outputJson, 'utf-8');
|
||||||
|
const parsed = JSON.parse(raw) as Array<Record<string, unknown>>;
|
||||||
|
|
||||||
|
const jobs: CreateJobInput[] = [];
|
||||||
|
|
||||||
|
for (const row of parsed) {
|
||||||
|
const source = toJobSource(row.site);
|
||||||
|
if (!source) continue;
|
||||||
|
|
||||||
|
const jobUrl = toStringOrNull(row.job_url);
|
||||||
|
if (!jobUrl) continue;
|
||||||
|
|
||||||
|
const title = toStringOrNull(row.title) ?? 'Unknown Title';
|
||||||
|
const employer = toStringOrNull(row.company) ?? 'Unknown Employer';
|
||||||
|
|
||||||
|
const jobUrlDirect = toStringOrNull(row.job_url_direct);
|
||||||
|
const applicationLink = jobUrlDirect ?? jobUrl;
|
||||||
|
|
||||||
|
const minAmount = toNumberOrNull(row.min_amount);
|
||||||
|
const maxAmount = toNumberOrNull(row.max_amount);
|
||||||
|
const currency = toStringOrNull(row.currency);
|
||||||
|
const interval = toStringOrNull(row.interval);
|
||||||
|
|
||||||
|
const salary = formatSalary({ minAmount, maxAmount, currency, interval });
|
||||||
|
|
||||||
|
jobs.push({
|
||||||
|
source,
|
||||||
|
sourceJobId: toStringOrNull(row.id) ?? undefined,
|
||||||
|
jobUrlDirect: jobUrlDirect ?? undefined,
|
||||||
|
datePosted: toStringOrNull(row.date_posted) ?? undefined,
|
||||||
|
|
||||||
|
title,
|
||||||
|
employer,
|
||||||
|
employerUrl: toStringOrNull(row.company_url) ?? undefined,
|
||||||
|
jobUrl,
|
||||||
|
applicationLink,
|
||||||
|
location: toStringOrNull(row.location) ?? undefined,
|
||||||
|
jobDescription: toStringOrNull(row.description) ?? undefined,
|
||||||
|
salary: salary ?? undefined,
|
||||||
|
|
||||||
|
jobType: toStringOrNull(row.job_type) ?? undefined,
|
||||||
|
salarySource: toStringOrNull(row.salary_source) ?? undefined,
|
||||||
|
salaryInterval: interval ?? undefined,
|
||||||
|
salaryMinAmount: minAmount ?? undefined,
|
||||||
|
salaryMaxAmount: maxAmount ?? undefined,
|
||||||
|
salaryCurrency: currency ?? undefined,
|
||||||
|
isRemote: toBooleanOrNull(row.is_remote) ?? undefined,
|
||||||
|
jobLevel: toStringOrNull(row.job_level) ?? undefined,
|
||||||
|
jobFunction: toStringOrNull(row.job_function) ?? undefined,
|
||||||
|
listingType: toStringOrNull(row.listing_type) ?? undefined,
|
||||||
|
emails: toJsonStringOrNull(row.emails) ?? undefined,
|
||||||
|
companyIndustry: toStringOrNull(row.company_industry) ?? undefined,
|
||||||
|
companyLogo: toStringOrNull(row.company_logo) ?? undefined,
|
||||||
|
companyUrlDirect: toStringOrNull(row.company_url_direct) ?? undefined,
|
||||||
|
companyAddresses: toJsonStringOrNull(row.company_addresses) ?? undefined,
|
||||||
|
companyNumEmployees: toStringOrNull(row.company_num_employees) ?? undefined,
|
||||||
|
companyRevenue: toStringOrNull(row.company_revenue) ?? undefined,
|
||||||
|
companyDescription: toStringOrNull(row.company_description) ?? undefined,
|
||||||
|
skills: toJsonStringOrNull(row.skills) ?? undefined,
|
||||||
|
experienceRange: toJsonStringOrNull(row.experience_range) ?? undefined,
|
||||||
|
companyRating: toNumberOrNull(row.company_rating) ?? undefined,
|
||||||
|
companyReviewsCount: toNumberOrNull(row.company_reviews_count) ?? undefined,
|
||||||
|
vacancyCount: toNumberOrNull(row.vacancy_count) ?? undefined,
|
||||||
|
workFromHomeType: toStringOrNull(row.work_from_home_type) ?? undefined,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return { success: true, jobs };
|
||||||
|
} catch (error) {
|
||||||
|
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||||
|
return { success: false, jobs: [], error: message };
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -11,13 +11,20 @@ export type JobStatus =
|
|||||||
| 'expired'; // Deadline passed
|
| 'expired'; // Deadline passed
|
||||||
|
|
||||||
export type JobSource =
|
export type JobSource =
|
||||||
| 'gradcracker';
|
| 'gradcracker'
|
||||||
|
| 'indeed'
|
||||||
|
| 'linkedin';
|
||||||
|
|
||||||
export interface Job {
|
export interface Job {
|
||||||
id: string;
|
id: string;
|
||||||
|
|
||||||
// From crawler
|
// Source / provenance
|
||||||
source: JobSource;
|
source: JobSource;
|
||||||
|
sourceJobId: string | null; // External ID (if provided)
|
||||||
|
jobUrlDirect: string | null; // Source-provided direct URL (if provided)
|
||||||
|
datePosted: string | null; // Source-provided posting date (if provided)
|
||||||
|
|
||||||
|
// From crawler (normalized)
|
||||||
title: string;
|
title: string;
|
||||||
employer: string;
|
employer: string;
|
||||||
employerUrl: string | null;
|
employerUrl: string | null;
|
||||||
@ -38,6 +45,32 @@ export interface Job {
|
|||||||
tailoredSummary: string | null; // Generated resume summary
|
tailoredSummary: string | null; // Generated resume summary
|
||||||
pdfPath: string | null; // Path to generated PDF
|
pdfPath: string | null; // Path to generated PDF
|
||||||
notionPageId: string | null; // Notion page ID if synced
|
notionPageId: string | null; // Notion page ID if synced
|
||||||
|
|
||||||
|
// JobSpy fields (nullable for non-JobSpy sources)
|
||||||
|
jobType: string | null;
|
||||||
|
salarySource: string | null;
|
||||||
|
salaryInterval: string | null;
|
||||||
|
salaryMinAmount: number | null;
|
||||||
|
salaryMaxAmount: number | null;
|
||||||
|
salaryCurrency: string | null;
|
||||||
|
isRemote: boolean | null;
|
||||||
|
jobLevel: string | null;
|
||||||
|
jobFunction: string | null;
|
||||||
|
listingType: string | null;
|
||||||
|
emails: string | null;
|
||||||
|
companyIndustry: string | null;
|
||||||
|
companyLogo: string | null;
|
||||||
|
companyUrlDirect: string | null;
|
||||||
|
companyAddresses: string | null;
|
||||||
|
companyNumEmployees: string | null;
|
||||||
|
companyRevenue: string | null;
|
||||||
|
companyDescription: string | null;
|
||||||
|
skills: string | null;
|
||||||
|
experienceRange: string | null;
|
||||||
|
companyRating: number | null;
|
||||||
|
companyReviewsCount: number | null;
|
||||||
|
vacancyCount: number | null;
|
||||||
|
workFromHomeType: string | null;
|
||||||
|
|
||||||
// Timestamps
|
// Timestamps
|
||||||
discoveredAt: string;
|
discoveredAt: string;
|
||||||
@ -61,6 +94,35 @@ export interface CreateJobInput {
|
|||||||
degreeRequired?: string;
|
degreeRequired?: string;
|
||||||
starting?: string;
|
starting?: string;
|
||||||
jobDescription?: string;
|
jobDescription?: string;
|
||||||
|
|
||||||
|
// JobSpy fields (optional)
|
||||||
|
sourceJobId?: string;
|
||||||
|
jobUrlDirect?: string;
|
||||||
|
datePosted?: string;
|
||||||
|
jobType?: string;
|
||||||
|
salarySource?: string;
|
||||||
|
salaryInterval?: string;
|
||||||
|
salaryMinAmount?: number;
|
||||||
|
salaryMaxAmount?: number;
|
||||||
|
salaryCurrency?: string;
|
||||||
|
isRemote?: boolean;
|
||||||
|
jobLevel?: string;
|
||||||
|
jobFunction?: string;
|
||||||
|
listingType?: string;
|
||||||
|
emails?: string;
|
||||||
|
companyIndustry?: string;
|
||||||
|
companyLogo?: string;
|
||||||
|
companyUrlDirect?: string;
|
||||||
|
companyAddresses?: string;
|
||||||
|
companyNumEmployees?: string;
|
||||||
|
companyRevenue?: string;
|
||||||
|
companyDescription?: string;
|
||||||
|
skills?: string;
|
||||||
|
experienceRange?: string;
|
||||||
|
companyRating?: number;
|
||||||
|
companyReviewsCount?: number;
|
||||||
|
vacancyCount?: number;
|
||||||
|
workFromHomeType?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface UpdateJobInput {
|
export interface UpdateJobInput {
|
||||||
@ -76,7 +138,7 @@ export interface UpdateJobInput {
|
|||||||
export interface PipelineConfig {
|
export interface PipelineConfig {
|
||||||
topN: number; // Number of top jobs to process
|
topN: number; // Number of top jobs to process
|
||||||
minSuitabilityScore: number; // Minimum score to auto-process
|
minSuitabilityScore: number; // Minimum score to auto-process
|
||||||
sources: string[]; // Job sources to crawl
|
sources: JobSource[]; // Job sources to crawl
|
||||||
profilePath: string; // Path to profile JSON
|
profilePath: string; // Path to profile JSON
|
||||||
outputDir: string; // Directory for generated PDFs
|
outputDir: string; // Directory for generated PDFs
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user