From 7f517776dfb49338f0b4ad0c9ce0600204c5927a Mon Sep 17 00:00:00 2001 From: 0x1355 <0x1355@gmail.com> Date: Fri, 20 Mar 2026 09:18:27 +0100 Subject: [PATCH] fix: auto-detect jobspy venv so contributors don't need PYTHON_PATH (#293) --- CONTRIBUTING.md | 9 +++++++++ extractors/jobspy/src/run.ts | 18 +++++++++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 560984a..68d138d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -34,6 +34,15 @@ npm --workspace orchestrator run db:migrate npm --workspace orchestrator run dev ``` +If you are working with extractors that use Glassdoor, Indeed, or LinkedIn (powered by python-jobspy), set up the Python venv once: + +```bash +python3 -m venv extractors/jobspy/.venv +extractors/jobspy/.venv/bin/pip install -r extractors/jobspy/requirements.txt +``` + +The runner auto-detects the venv — no need to set `PYTHON_PATH`. + If you are editing docs: ```bash diff --git a/extractors/jobspy/src/run.ts b/extractors/jobspy/src/run.ts index f2cdba4..3344bec 100644 --- a/extractors/jobspy/src/run.ts +++ b/extractors/jobspy/src/run.ts @@ -1,4 +1,5 @@ import { spawn } from "node:child_process"; +import { existsSync } from "node:fs"; import { mkdir, readFile, unlink } from "node:fs/promises"; import { dirname, join } from "node:path"; import { createInterface } from "node:readline"; @@ -180,11 +181,22 @@ export async function runJobSpy( const outputJson = join(OUTPUT_DIR, `jobspy_jobs_${suffix}.json`); await new Promise((resolve, reject) => { + // Auto-detect venv if present, so contributors don't need to set + // PYTHON_PATH manually. The venv is created once with: + // python3 -m venv .venv && .venv/bin/pip install -r requirements.txt + // In Docker, PYTHON_PATH is set explicitly to /usr/bin/python3. + const venvPython = join( + EXTRACTOR_DIR, + ".venv", + process.platform === "win32" ? "Scripts/python.exe" : "bin/python3", + ); const pythonPath = process.env.PYTHON_PATH ? process.env.PYTHON_PATH - : process.platform === "win32" - ? "python" - : "python3"; + : existsSync(venvPython) + ? venvPython + : process.platform === "win32" + ? "python" + : "python3"; const child = spawn(pythonPath, [JOBSPY_SCRIPT], { cwd: EXTRACTOR_DIR,