From 0f36d9b8a6b5dade6f161d1cb6483e2044abd931 Mon Sep 17 00:00:00 2001 From: DaKheera47 Date: Fri, 26 Dec 2025 20:17:05 +0000 Subject: [PATCH] initial implementation --- .env.example | 11 + Dockerfile | 5 + README.md | 3 +- docker-compose.yml | 8 + extractors/ukvisajobs/.gitignore | 11 + extractors/ukvisajobs/README.md | 40 ++ extractors/ukvisajobs/package-lock.json | 599 ++++++++++++++++++ extractors/ukvisajobs/package.json | 22 + extractors/ukvisajobs/src/main.ts | 303 +++++++++ extractors/ukvisajobs/tsconfig.json | 12 + orchestrator/.env.example | 9 + orchestrator/src/client/App.tsx | 4 +- orchestrator/src/client/components/Header.tsx | 3 +- orchestrator/src/server/api/routes.ts | 80 +-- orchestrator/src/server/db/schema.ts | 12 +- .../src/server/pipeline/orchestrator.ts | 76 ++- .../src/server/services/ukvisajobs.ts | 125 ++++ orchestrator/src/shared/types.ts | 11 +- 18 files changed, 1249 insertions(+), 85 deletions(-) create mode 100644 extractors/ukvisajobs/.gitignore create mode 100644 extractors/ukvisajobs/README.md create mode 100644 extractors/ukvisajobs/package-lock.json create mode 100644 extractors/ukvisajobs/package.json create mode 100644 extractors/ukvisajobs/src/main.ts create mode 100644 extractors/ukvisajobs/tsconfig.json create mode 100644 orchestrator/src/server/services/ukvisajobs.ts diff --git a/.env.example b/.env.example index 8598d5e..3a3d8d1 100644 --- a/.env.example +++ b/.env.example @@ -37,3 +37,14 @@ JOBSPY_RESULTS_WANTED=200 JOBSPY_HOURS_OLD=72 JOBSPY_COUNTRY_INDEED=UK JOBSPY_LINKEDIN_FETCH_DESCRIPTION=1 + +# ============================================================================= +# UKVisaJobs (UK visa sponsorship jobs) - optional +# ============================================================================= +# Get these tokens from browser dev tools after logging into my.ukvisajobs.com +# See extractors/ukvisajobs/README.md for detailed instructions. +UKVISAJOBS_TOKEN= +UKVISAJOBS_AUTH_TOKEN= +UKVISAJOBS_CSRF_TOKEN= +UKVISAJOBS_CI_SESSION= +UKVISAJOBS_MAX_JOBS=50 diff --git a/Dockerfile b/Dockerfile index cbb5f5c..8245f0e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -28,6 +28,7 @@ RUN pip3 install --no-cache-dir --break-system-packages playwright python-jobspy # Copy package files first for better caching COPY orchestrator/package*.json ./orchestrator/ COPY extractors/gradcracker/package*.json ./extractors/gradcracker/ +COPY extractors/ukvisajobs/package*.json ./extractors/ukvisajobs/ # Install Node.js dependencies WORKDIR /app/orchestrator @@ -39,11 +40,15 @@ RUN npm install --production=false # Install Camoufox browser (downloads its own Firefox fork) RUN npx camoufox fetch +WORKDIR /app/extractors/ukvisajobs +RUN npm install --production=false + # Copy source code WORKDIR /app COPY orchestrator ./orchestrator COPY extractors/gradcracker ./extractors/gradcracker COPY extractors/jobspy ./extractors/jobspy +COPY extractors/ukvisajobs ./extractors/ukvisajobs COPY resume-generator ./resume-generator # Build the orchestrator (client + server) diff --git a/README.md b/README.md index db4f764..b44c3f5 100644 --- a/README.md +++ b/README.md @@ -75,6 +75,7 @@ job-ops/ src/shared/ # shared types (Job, PipelineRun, etc.) extractors/gradcracker/ # Crawlee crawler (Gradcracker) extractors/jobspy/ # JobSpy wrapper (Indeed/LinkedIn/etc) + extractors/ukvisajobs/ # UK Visa Jobs API extractor resume-generator/ # Python Playwright automation for rxresu.me base.json # your exported base resume (template) data/ # persisted runtime artifacts (Docker default) @@ -87,7 +88,7 @@ job-ops/ ## Data model (SQLite) - `jobs` - - from crawl: `title`, `employer`, `jobUrl`, `applicationLink`, `deadline`, `salary`, `location`, `jobDescription`, etc. + - from crawl: `title`, `employer`, `jobUrl`, `applicationLink`, `deadline`, `salary`, `location`, `jobDescription`, `source` (gradcracker/indeed/linkedin/ukvisajobs), etc. - enrichments: `status` (`discovered` -> `processing` -> `ready` -> `applied`/`rejected`), `suitabilityScore`, `suitabilityReason`, `tailoredSummary`, `pdfPath`, `notionPageId` - `pipeline_runs`: audit log of runs (`running`/`completed`/`failed`, counts, error) diff --git a/docker-compose.yml b/docker-compose.yml index 3b3ded9..b563b5e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -50,6 +50,14 @@ services: # Optional: Webhook secret for n8n - WEBHOOK_SECRET=${WEBHOOK_SECRET:-} + # UKVisaJobs (UK visa sponsorship jobs) - optional + - UKVISAJOBS_TOKEN=${UKVISAJOBS_TOKEN:-} + - UKVISAJOBS_AUTH_TOKEN=${UKVISAJOBS_AUTH_TOKEN:-} + - UKVISAJOBS_CSRF_TOKEN=${UKVISAJOBS_CSRF_TOKEN:-} + - UKVISAJOBS_CI_SESSION=${UKVISAJOBS_CI_SESSION:-} + - UKVISAJOBS_MAX_JOBS=${UKVISAJOBS_MAX_JOBS:-50} + - UKVISAJOBS_SEARCH_KEYWORD=${UKVISAJOBS_SEARCH_KEYWORD:-} + # Python path (uses system python in container) - PYTHON_PATH=/usr/bin/python3 restart: unless-stopped diff --git a/extractors/ukvisajobs/.gitignore b/extractors/ukvisajobs/.gitignore new file mode 100644 index 0000000..c2e6c96 --- /dev/null +++ b/extractors/ukvisajobs/.gitignore @@ -0,0 +1,11 @@ +# Dependencies +node_modules/ + +# Build output +dist/ + +# Crawlee storage +storage/ + +# Logs +*.log diff --git a/extractors/ukvisajobs/README.md b/extractors/ukvisajobs/README.md new file mode 100644 index 0000000..4412b83 --- /dev/null +++ b/extractors/ukvisajobs/README.md @@ -0,0 +1,40 @@ +# UK Visa Jobs Extractor + +Fetches job listings from [my.ukvisajobs.com](https://my.ukvisajobs.com) that may sponsor work visas. + +## Setup + +```bash +npm install +``` + +## Configuration + +Set the following environment variables (you can get these from your browser's dev tools after logging in): + +| Variable | Description | +|----------|-------------| +| `UKVISAJOBS_TOKEN` | JWT token from the request body (required) | +| `UKVISAJOBS_AUTH_TOKEN` | Auth cookie token (defaults to UKVISAJOBS_TOKEN) | +| `UKVISAJOBS_CSRF_TOKEN` | CSRF token from cookies | +| `UKVISAJOBS_CI_SESSION` | CI session ID from cookies | +| `UKVISAJOBS_MAX_JOBS` | Maximum jobs to fetch (default: 50, max: 200) | +| `UKVISAJOBS_SEARCH_KEYWORD` | Optional search filter | + +## How to get tokens + +1. Log into `my.ukvisajobs.com` in your browser +2. Open Developer Tools → Network tab +3. Navigate to the jobs page +4. Find the `fetch-jobs-data` POST request +5. Copy values: + - From **Request Body**: copy the `token` field → `UKVISAJOBS_TOKEN` + - From **Cookies**: copy `authToken`, `csrf_token`, `ci_session` + +## Running + +```bash +npm start +``` + +Output is written to `storage/datasets/default/` as JSON files. diff --git a/extractors/ukvisajobs/package-lock.json b/extractors/ukvisajobs/package-lock.json new file mode 100644 index 0000000..71e9a57 --- /dev/null +++ b/extractors/ukvisajobs/package-lock.json @@ -0,0 +1,599 @@ +{ + "name": "ukvisajobs-extractor", + "version": "0.0.1", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "ukvisajobs-extractor", + "version": "0.0.1", + "license": "ISC", + "devDependencies": { + "@apify/tsconfig": "^0.1.0", + "@types/node": "^24.0.0", + "tsx": "^4.4.0", + "typescript": "~5.9.0" + } + }, + "node_modules/@apify/tsconfig": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/@apify/tsconfig/-/tsconfig-0.1.1.tgz", + "integrity": "sha512-cS7mwN2UW1UXcluGXRDHH0Vr2VsSLkw2DwLTwoSBkcJSe8fvCr3MPryTSq0uod4MashpMURxJ7CsLKxs82VmOQ==", + "dev": true, + "license": "Apache-2.0" + }, + "node_modules/@esbuild/aix-ppc64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.2.tgz", + "integrity": "sha512-GZMB+a0mOMZs4MpDbj8RJp4cw+w1WV5NYD6xzgvzUJ5Ek2jerwfO2eADyI6ExDSUED+1X8aMbegahsJi+8mgpw==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-arm": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.27.2.tgz", + "integrity": "sha512-DVNI8jlPa7Ujbr1yjU2PfUSRtAUZPG9I1RwW4F4xFB1Imiu2on0ADiI/c3td+KmDtVKNbi+nffGDQMfcIMkwIA==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-arm64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.2.tgz", + "integrity": "sha512-pvz8ZZ7ot/RBphf8fv60ljmaoydPU12VuXHImtAs0XhLLw+EXBi2BLe3OYSBslR4rryHvweW5gmkKFwTiFy6KA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-x64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.27.2.tgz", + "integrity": "sha512-z8Ank4Byh4TJJOh4wpz8g2vDy75zFL0TlZlkUkEwYXuPSgX8yzep596n6mT7905kA9uHZsf/o2OJZubl2l3M7A==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/darwin-arm64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.27.2.tgz", + "integrity": "sha512-davCD2Zc80nzDVRwXTcQP/28fiJbcOwvdolL0sOiOsbwBa72kegmVU0Wrh1MYrbuCL98Omp5dVhQFWRKR2ZAlg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/darwin-x64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.27.2.tgz", + "integrity": "sha512-ZxtijOmlQCBWGwbVmwOF/UCzuGIbUkqB1faQRf5akQmxRJ1ujusWsb3CVfk/9iZKr2L5SMU5wPBi1UWbvL+VQA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/freebsd-arm64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.27.2.tgz", + "integrity": "sha512-lS/9CN+rgqQ9czogxlMcBMGd+l8Q3Nj1MFQwBZJyoEKI50XGxwuzznYdwcav6lpOGv5BqaZXqvBSiB/kJ5op+g==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/freebsd-x64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.27.2.tgz", + "integrity": "sha512-tAfqtNYb4YgPnJlEFu4c212HYjQWSO/w/h/lQaBK7RbwGIkBOuNKQI9tqWzx7Wtp7bTPaGC6MJvWI608P3wXYA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-arm": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.27.2.tgz", + "integrity": "sha512-vWfq4GaIMP9AIe4yj1ZUW18RDhx6EPQKjwe7n8BbIecFtCQG4CfHGaHuh7fdfq+y3LIA2vGS/o9ZBGVxIDi9hw==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-arm64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.2.tgz", + "integrity": "sha512-hYxN8pr66NsCCiRFkHUAsxylNOcAQaxSSkHMMjcpx0si13t1LHFphxJZUiGwojB1a/Hd5OiPIqDdXONia6bhTw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-ia32": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.27.2.tgz", + "integrity": "sha512-MJt5BRRSScPDwG2hLelYhAAKh9imjHK5+NE/tvnRLbIqUWa+0E9N4WNMjmp/kXXPHZGqPLxggwVhz7QP8CTR8w==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-loong64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.27.2.tgz", + "integrity": "sha512-lugyF1atnAT463aO6KPshVCJK5NgRnU4yb3FUumyVz+cGvZbontBgzeGFO1nF+dPueHD367a2ZXe1NtUkAjOtg==", + "cpu": [ + "loong64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-mips64el": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.27.2.tgz", + "integrity": "sha512-nlP2I6ArEBewvJ2gjrrkESEZkB5mIoaTswuqNFRv/WYd+ATtUpe9Y09RnJvgvdag7he0OWgEZWhviS1OTOKixw==", + "cpu": [ + "mips64el" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-ppc64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.27.2.tgz", + "integrity": "sha512-C92gnpey7tUQONqg1n6dKVbx3vphKtTHJaNG2Ok9lGwbZil6DrfyecMsp9CrmXGQJmZ7iiVXvvZH6Ml5hL6XdQ==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-riscv64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.27.2.tgz", + "integrity": "sha512-B5BOmojNtUyN8AXlK0QJyvjEZkWwy/FKvakkTDCziX95AowLZKR6aCDhG7LeF7uMCXEJqwa8Bejz5LTPYm8AvA==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-s390x": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.27.2.tgz", + "integrity": "sha512-p4bm9+wsPwup5Z8f4EpfN63qNagQ47Ua2znaqGH6bqLlmJ4bx97Y9JdqxgGZ6Y8xVTixUnEkoKSHcpRlDnNr5w==", + "cpu": [ + "s390x" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-x64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.27.2.tgz", + "integrity": "sha512-uwp2Tip5aPmH+NRUwTcfLb+W32WXjpFejTIOWZFw/v7/KnpCDKG66u4DLcurQpiYTiYwQ9B7KOeMJvLCu/OvbA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/netbsd-arm64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.27.2.tgz", + "integrity": "sha512-Kj6DiBlwXrPsCRDeRvGAUb/LNrBASrfqAIok+xB0LxK8CHqxZ037viF13ugfsIpePH93mX7xfJp97cyDuTZ3cw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/netbsd-x64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.27.2.tgz", + "integrity": "sha512-HwGDZ0VLVBY3Y+Nw0JexZy9o/nUAWq9MlV7cahpaXKW6TOzfVno3y3/M8Ga8u8Yr7GldLOov27xiCnqRZf0tCA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openbsd-arm64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.27.2.tgz", + "integrity": "sha512-DNIHH2BPQ5551A7oSHD0CKbwIA/Ox7+78/AWkbS5QoRzaqlev2uFayfSxq68EkonB+IKjiuxBFoV8ESJy8bOHA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openbsd-x64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.27.2.tgz", + "integrity": "sha512-/it7w9Nb7+0KFIzjalNJVR5bOzA9Vay+yIPLVHfIQYG/j+j9VTH84aNB8ExGKPU4AzfaEvN9/V4HV+F+vo8OEg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openharmony-arm64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.27.2.tgz", + "integrity": "sha512-LRBbCmiU51IXfeXk59csuX/aSaToeG7w48nMwA6049Y4J4+VbWALAuXcs+qcD04rHDuSCSRKdmY63sruDS5qag==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openharmony" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/sunos-x64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.27.2.tgz", + "integrity": "sha512-kMtx1yqJHTmqaqHPAzKCAkDaKsffmXkPHThSfRwZGyuqyIeBvf08KSsYXl+abf5HDAPMJIPnbBfXvP2ZC2TfHg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-arm64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.27.2.tgz", + "integrity": "sha512-Yaf78O/B3Kkh+nKABUF++bvJv5Ijoy9AN1ww904rOXZFLWVc5OLOfL56W+C8F9xn5JQZa3UX6m+IktJnIb1Jjg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-ia32": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.27.2.tgz", + "integrity": "sha512-Iuws0kxo4yusk7sw70Xa2E2imZU5HoixzxfGCdxwBdhiDgt9vX9VUCBhqcwY7/uh//78A1hMkkROMJq9l27oLQ==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-x64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.27.2.tgz", + "integrity": "sha512-sRdU18mcKf7F+YgheI/zGf5alZatMUTKj/jNS6l744f9u3WFu4v7twcUI9vu4mknF4Y9aDlblIie0IM+5xxaqQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@types/node": { + "version": "24.10.4", + "resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.4.tgz", + "integrity": "sha512-vnDVpYPMzs4wunl27jHrfmwojOGKya0xyM3sH+UE5iv5uPS6vX7UIoh6m+vQc5LGBq52HBKPIn/zcSZVzeDEZg==", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": "~7.16.0" + } + }, + "node_modules/esbuild": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.2.tgz", + "integrity": "sha512-HyNQImnsOC7X9PMNaCIeAm4ISCQXs5a5YasTXVliKv4uuBo1dKrG0A+uQS8M5eXjVMnLg3WgXaKvprHlFJQffw==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.27.2", + "@esbuild/android-arm": "0.27.2", + "@esbuild/android-arm64": "0.27.2", + "@esbuild/android-x64": "0.27.2", + "@esbuild/darwin-arm64": "0.27.2", + "@esbuild/darwin-x64": "0.27.2", + "@esbuild/freebsd-arm64": "0.27.2", + "@esbuild/freebsd-x64": "0.27.2", + "@esbuild/linux-arm": "0.27.2", + "@esbuild/linux-arm64": "0.27.2", + "@esbuild/linux-ia32": "0.27.2", + "@esbuild/linux-loong64": "0.27.2", + "@esbuild/linux-mips64el": "0.27.2", + "@esbuild/linux-ppc64": "0.27.2", + "@esbuild/linux-riscv64": "0.27.2", + "@esbuild/linux-s390x": "0.27.2", + "@esbuild/linux-x64": "0.27.2", + "@esbuild/netbsd-arm64": "0.27.2", + "@esbuild/netbsd-x64": "0.27.2", + "@esbuild/openbsd-arm64": "0.27.2", + "@esbuild/openbsd-x64": "0.27.2", + "@esbuild/openharmony-arm64": "0.27.2", + "@esbuild/sunos-x64": "0.27.2", + "@esbuild/win32-arm64": "0.27.2", + "@esbuild/win32-ia32": "0.27.2", + "@esbuild/win32-x64": "0.27.2" + } + }, + "node_modules/fsevents": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/get-tsconfig": { + "version": "4.13.0", + "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.13.0.tgz", + "integrity": "sha512-1VKTZJCwBrvbd+Wn3AOgQP/2Av+TfTCOlE4AcRJE72W1ksZXbAx8PPBR9RzgTeSPzlPMHrbANMH3LbltH73wxQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "resolve-pkg-maps": "^1.0.0" + }, + "funding": { + "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" + } + }, + "node_modules/resolve-pkg-maps": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", + "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==", + "dev": true, + "license": "MIT", + "funding": { + "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" + } + }, + "node_modules/tsx": { + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.21.0.tgz", + "integrity": "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==", + "dev": true, + "license": "MIT", + "dependencies": { + "esbuild": "~0.27.0", + "get-tsconfig": "^4.7.5" + }, + "bin": { + "tsx": "dist/cli.mjs" + }, + "engines": { + "node": ">=18.0.0" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + } + }, + "node_modules/typescript": { + "version": "5.9.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", + "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/undici-types": { + "version": "7.16.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz", + "integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==", + "dev": true, + "license": "MIT" + } + } +} diff --git a/extractors/ukvisajobs/package.json b/extractors/ukvisajobs/package.json new file mode 100644 index 0000000..e72e9c4 --- /dev/null +++ b/extractors/ukvisajobs/package.json @@ -0,0 +1,22 @@ +{ + "name": "ukvisajobs-extractor", + "version": "0.0.1", + "type": "module", + "description": "UK Visa Jobs extractor - fetches job listings that may sponsor work visas", + "main": "dist/main.js", + "dependencies": {}, + "devDependencies": { + "@apify/tsconfig": "^0.1.0", + "@types/node": "^24.0.0", + "tsx": "^4.4.0", + "typescript": "~5.9.0" + }, + "scripts": { + "start": "npm run start:dev", + "start:prod": "node dist/main.js", + "start:dev": "tsx src/main.ts", + "build": "tsc" + }, + "author": "", + "license": "ISC" +} \ No newline at end of file diff --git a/extractors/ukvisajobs/src/main.ts b/extractors/ukvisajobs/src/main.ts new file mode 100644 index 0000000..4e3dabe --- /dev/null +++ b/extractors/ukvisajobs/src/main.ts @@ -0,0 +1,303 @@ +/** + * UK Visa Jobs Extractor + * + * Fetches job listings from my.ukvisajobs.com that may sponsor work visas. + * Outputs JSON to stdout for the orchestrator to consume. + * + * Environment variables: + * UKVISAJOBS_TOKEN - JWT token (required) + * UKVISAJOBS_AUTH_TOKEN - Auth cookie token (defaults to UKVISAJOBS_TOKEN) + * UKVISAJOBS_CSRF_TOKEN - CSRF token cookie + * UKVISAJOBS_CI_SESSION - CI session cookie + * UKVISAJOBS_MAX_JOBS - Maximum jobs to fetch (default: 50, max: 200) + * UKVISAJOBS_SEARCH_KEYWORD - Optional search filter + */ + +import { mkdir, writeFile } from 'fs/promises'; +import { join, dirname } from 'path'; +import { fileURLToPath } from 'url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +const API_URL = 'https://my.ukvisajobs.com/ukvisa-api/api/fetch-jobs-data'; +const JOBS_PER_PAGE = 15; +const DEFAULT_MAX_JOBS = 50; +const MAX_ALLOWED_JOBS = 200; + +interface UkVisaJobsApiJob { + id: string; + title: string; + company_name: string; + company_link?: string; + job_link: string; + city: string; + created_date: string; + job_expire: string; + description?: string; + min_salary?: string; + max_salary?: string; + salary_interval?: string; + salary_method?: string; + degree_requirement?: string; + job_type?: string; + job_level?: string; + job_industry?: string; + visa_acceptance?: string; + applicants_outside_uk?: string; + likely_to_sponsor?: string; + definitely_sponsored?: string; + new_entrant?: string; + student_graduate?: string; + image?: string; + computed_cos_total?: string; +} + +interface UkVisaJobsApiResponse { + status: number; + totalJobs: number; + query?: string; + jobs: UkVisaJobsApiJob[]; +} + +interface ExtractedJob { + source: 'ukvisajobs'; + sourceJobId: string; + title: string; + employer: string; + employerUrl?: string; + jobUrl: string; + applicationLink: string; + location?: string; + deadline?: string; + salary?: string; + jobDescription?: string; + datePosted?: string; + degreeRequired?: string; + jobType?: string; + jobLevel?: string; +} + +function toStringOrNull(value: unknown): string | null { + if (value === null || value === undefined) return null; + if (typeof value === 'string') { + const trimmed = value.trim(); + return trimmed.length > 0 ? trimmed : null; + } + if (typeof value === 'number' || typeof value === 'boolean') return String(value); + return null; +} + +function toNumberOrNull(value: unknown): number | null { + if (value === null || value === undefined) return null; + if (typeof value === 'number') return Number.isFinite(value) ? value : null; + if (typeof value === 'string') { + const trimmed = value.trim(); + if (!trimmed) return null; + const parsed = Number(trimmed); + return Number.isFinite(parsed) ? parsed : null; + } + return null; +} + +async function fetchPage( + pageNo: number, + token: string, + cookies: string, + options: { searchKeyword?: string } = {} +): Promise { + // Use native FormData API (Node.js 18+) + const formData = new FormData(); + formData.append('is_global', '0'); + formData.append('sortBy', 'desc'); + formData.append('pageNo', String(pageNo)); + formData.append('visaAcceptance', 'false'); + formData.append('applicants_outside_uk', 'false'); + formData.append('searchKeyword', options.searchKeyword || 'null'); + formData.append('token', token); + + const response = await fetch(API_URL, { + method: 'POST', + headers: { + 'accept': 'application/json, text/plain, */*', + 'accept-language': 'en-US,en;q=0.9', + 'cookie': cookies, + 'origin': 'https://my.ukvisajobs.com', + 'referer': `https://my.ukvisajobs.com/open-jobs/1?is_global=0&sortBy=desc&pageNo=${pageNo}&visaAcceptance=false&applicants_outside_uk=false`, + 'user-agent': 'Mozilla/5.0 (Linux; Android 13; Pixel 7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Mobile Safari/537.36', + }, + body: formData, + }); + + if (!response.ok) { + const text = await response.text(); + throw new Error(`UKVisaJobs API returned ${response.status}: ${response.statusText} - ${text}`); + } + + return response.json() as Promise; +} + +function mapJob(raw: UkVisaJobsApiJob): ExtractedJob { + // Build salary string from min/max + let salary: string | undefined = undefined; + const minSalary = toNumberOrNull(raw.min_salary); + const maxSalary = toNumberOrNull(raw.max_salary); + + if (minSalary !== null && minSalary > 0 && maxSalary !== null && maxSalary > 0) { + salary = `Ā£${minSalary.toLocaleString()}-${maxSalary.toLocaleString()}`; + if (raw.salary_interval) { + salary += ` / ${raw.salary_interval}`; + } + } else if (maxSalary !== null && maxSalary > 0) { + salary = `Ā£${maxSalary.toLocaleString()}`; + if (raw.salary_interval) { + salary += ` / ${raw.salary_interval}`; + } + } + + // Build a description from visa sponsorship fields + const visaInfo: string[] = []; + if (raw.visa_acceptance?.toLowerCase() === 'yes') visaInfo.push('Visa acceptance: Yes'); + if (raw.applicants_outside_uk?.toLowerCase() === 'yes') visaInfo.push('Accepts applicants outside UK'); + if (raw.likely_to_sponsor?.toLowerCase() === 'yes') visaInfo.push('Likely to sponsor'); + if (raw.definitely_sponsored?.toLowerCase() === 'yes') visaInfo.push('Definitely sponsored'); + if (raw.new_entrant?.toLowerCase() === 'yes') visaInfo.push('New entrant friendly'); + if (raw.student_graduate?.toLowerCase() === 'yes') visaInfo.push('Student/Graduate friendly'); + + const description = raw.description + ? raw.description + : visaInfo.length > 0 + ? `Visa sponsorship info: ${visaInfo.join(', ')}` + : undefined; + + return { + source: 'ukvisajobs', + sourceJobId: raw.id, + title: raw.title || 'Unknown Title', + employer: raw.company_name || 'Unknown Employer', + employerUrl: toStringOrNull(raw.company_link) ?? undefined, + jobUrl: raw.job_link, + applicationLink: raw.job_link, + location: raw.city || undefined, + deadline: raw.job_expire || undefined, + salary, + jobDescription: description, + datePosted: raw.created_date || undefined, + degreeRequired: toStringOrNull(raw.degree_requirement) ?? undefined, + jobType: toStringOrNull(raw.job_type) ?? undefined, + jobLevel: toStringOrNull(raw.job_level) ?? undefined, + }; +} + +async function main(): Promise { + console.log('šŸ‡¬šŸ‡§ UK Visa Jobs Extractor starting...'); + + // Get credentials from environment + const token = process.env.UKVISAJOBS_TOKEN; + const authToken = process.env.UKVISAJOBS_AUTH_TOKEN || token; + const csrfToken = process.env.UKVISAJOBS_CSRF_TOKEN || ''; + const ciSession = process.env.UKVISAJOBS_CI_SESSION || ''; + const searchKeyword = process.env.UKVISAJOBS_SEARCH_KEYWORD || undefined; + + if (!token) { + console.error('āŒ UKVISAJOBS_TOKEN environment variable is not set'); + process.exit(1); + } + + // Build cookies string + const cookieParts: string[] = []; + if (csrfToken) cookieParts.push(`csrf_token=${csrfToken}`); + if (ciSession) cookieParts.push(`ci_session=${ciSession}`); + if (authToken) cookieParts.push(`authToken=${authToken}`); + const cookies = cookieParts.join('; '); + + console.log(` Cookies configured: ${cookieParts.length > 0 ? 'Yes' : 'No'}`); + console.log(` Token length: ${token.length}`); + + // Get max jobs from environment + const maxJobsEnv = toNumberOrNull(process.env.UKVISAJOBS_MAX_JOBS); + const maxJobs = Math.min(maxJobsEnv ?? DEFAULT_MAX_JOBS, MAX_ALLOWED_JOBS); + const maxPages = Math.ceil(maxJobs / JOBS_PER_PAGE); + + console.log(` Max jobs: ${maxJobs} (${maxPages} pages)`); + if (searchKeyword) { + console.log(` Search keyword: ${searchKeyword}`); + } + + const allJobs: ExtractedJob[] = []; + const seenIds = new Set(); + let totalAvailable = 0; + let pageNo = 1; + + try { + while (pageNo <= maxPages && allJobs.length < maxJobs) { + console.log(` Fetching page ${pageNo}/${maxPages}...`); + + const response = await fetchPage(pageNo, token, cookies, { searchKeyword }); + + if (response.status !== 1) { + console.warn(` āš ļø API returned status ${response.status} on page ${pageNo}`); + break; + } + + if (pageNo === 1) { + totalAvailable = response.totalJobs; + console.log(` Total available: ${totalAvailable} jobs`); + } + + if (!response.jobs || response.jobs.length === 0) { + console.log(` No more jobs on page ${pageNo}`); + break; + } + + for (const rawJob of response.jobs) { + if (allJobs.length >= maxJobs) break; + + // Deduplicate by ID + if (seenIds.has(rawJob.id)) continue; + seenIds.add(rawJob.id); + + const mapped = mapJob(rawJob); + allJobs.push(mapped); + } + + // If we got fewer jobs than a full page, we're at the end + if (response.jobs.length < JOBS_PER_PAGE) { + break; + } + + pageNo++; + + // Small delay to be nice to the API + await new Promise((resolve) => setTimeout(resolve, 500)); + } + + console.log(`āœ… Scraped ${allJobs.length} jobs`); + + // Write output to storage directory (similar to Crawlee dataset structure) + const storageDir = join(__dirname, '../storage/datasets/default'); + await mkdir(storageDir, { recursive: true }); + + // Write each job as a separate JSON file (Crawlee dataset format) + for (let i = 0; i < allJobs.length; i++) { + const filename = join(storageDir, `${String(i + 1).padStart(6, '0')}.json`); + await writeFile(filename, JSON.stringify(allJobs[i], null, 2)); + } + + // Also write a combined output file for easier consumption + const outputFile = join(storageDir, 'jobs.json'); + await writeFile(outputFile, JSON.stringify(allJobs, null, 2)); + + console.log(` Output written to: ${storageDir}`); + console.log(` Jobs file: ${outputFile}`); + + } catch (error) { + const message = error instanceof Error ? error.message : 'Unknown error'; + console.error(`āŒ Error: ${message}`); + process.exit(1); + } +} + +main().catch((error) => { + console.error('Fatal error:', error); + process.exit(1); +}); diff --git a/extractors/ukvisajobs/tsconfig.json b/extractors/ukvisajobs/tsconfig.json new file mode 100644 index 0000000..cc141ac --- /dev/null +++ b/extractors/ukvisajobs/tsconfig.json @@ -0,0 +1,12 @@ +{ + "extends": "@apify/tsconfig", + "compilerOptions": { + "module": "NodeNext", + "moduleResolution": "NodeNext", + "target": "ES2022", + "outDir": "dist", + "noUnusedLocals": false, + "lib": ["DOM"] + }, + "include": ["./src/**/*"] +} diff --git a/orchestrator/.env.example b/orchestrator/.env.example index a12357a..501197e 100644 --- a/orchestrator/.env.example +++ b/orchestrator/.env.example @@ -32,3 +32,12 @@ JOBSPY_RESULTS_WANTED=200 JOBSPY_HOURS_OLD=72 JOBSPY_COUNTRY_INDEED=UK JOBSPY_LINKEDIN_FETCH_DESCRIPTION=1 + +# ============================================================================= +# UKVisaJobs (UK visa sponsorship job scraping) - optional +# ============================================================================= +# Get these tokens from browser dev tools after logging into my.ukvisajobs.com +UKVISAJOBS_TOKEN= +UKVISAJOBS_AUTH_TOKEN= +UKVISAJOBS_CSRF_TOKEN= +UKVISAJOBS_CI_SESSION= diff --git a/orchestrator/src/client/App.tsx b/orchestrator/src/client/App.tsx index 5f4fbb0..48b409a 100644 --- a/orchestrator/src/client/App.tsx +++ b/orchestrator/src/client/App.tsx @@ -12,7 +12,7 @@ import { Header, JobList, PipelineProgress, Stats } from "./components"; import * as api from "./api"; import { SettingsPage } from "./pages/SettingsPage"; -const DEFAULT_PIPELINE_SOURCES: JobSource[] = ["gradcracker", "indeed", "linkedin"]; +const DEFAULT_PIPELINE_SOURCES: JobSource[] = ["gradcracker", "indeed", "linkedin", "ukvisajobs"]; const PIPELINE_SOURCES_STORAGE_KEY = "jobops.pipeline.sources"; export const App: React.FC = () => { @@ -33,7 +33,7 @@ export const App: React.FC = () => { const raw = localStorage.getItem(PIPELINE_SOURCES_STORAGE_KEY); if (!raw) return DEFAULT_PIPELINE_SOURCES; const parsed = JSON.parse(raw) as unknown; - const allowed: JobSource[] = ["gradcracker", "indeed", "linkedin"]; + const allowed: JobSource[] = ["gradcracker", "indeed", "linkedin", "ukvisajobs"]; if (!Array.isArray(parsed)) return DEFAULT_PIPELINE_SOURCES; const next = parsed.filter((value): value is JobSource => allowed.includes(value)); return next.length > 0 ? next : DEFAULT_PIPELINE_SOURCES; diff --git a/orchestrator/src/client/components/Header.tsx b/orchestrator/src/client/components/Header.tsx index 5498dd0..0e6f579 100644 --- a/orchestrator/src/client/components/Header.tsx +++ b/orchestrator/src/client/components/Header.tsx @@ -60,9 +60,10 @@ export const Header: React.FC = ({ gradcracker: "Gradcracker", indeed: "Indeed", linkedin: "LinkedIn", + ukvisajobs: "UK Visa Jobs", }; - const orderedSources: JobSource[] = ["gradcracker", "indeed", "linkedin"]; + const orderedSources: JobSource[] = ["gradcracker", "indeed", "linkedin", "ukvisajobs"]; const toggleSource = (source: JobSource, checked: boolean) => { const next = checked diff --git a/orchestrator/src/server/api/routes.ts b/orchestrator/src/server/api/routes.ts index 0a8911f..2b0a475 100644 --- a/orchestrator/src/server/api/routes.ts +++ b/orchestrator/src/server/api/routes.ts @@ -60,10 +60,10 @@ apiRouter.get('/jobs', async (req: Request, res: Response) => { try { const statusFilter = req.query.status as string | undefined; const statuses = statusFilter?.split(',').filter(Boolean) as JobStatus[] | undefined; - + const jobs = await jobsRepo.getAllJobs(statuses); const stats = await jobsRepo.getJobStats(); - + const response: ApiResponse = { success: true, data: { @@ -72,7 +72,7 @@ apiRouter.get('/jobs', async (req: Request, res: Response) => { byStatus: stats, }, }; - + res.json(response); } catch (error) { const message = error instanceof Error ? error.message : 'Unknown error'; @@ -86,11 +86,11 @@ apiRouter.get('/jobs', async (req: Request, res: Response) => { apiRouter.get('/jobs/:id', async (req: Request, res: Response) => { try { const job = await jobsRepo.getJobById(req.params.id); - + if (!job) { return res.status(404).json({ success: false, error: 'Job not found' }); } - + res.json({ success: true, data: job }); } catch (error) { const message = error instanceof Error ? error.message : 'Unknown error'; @@ -113,11 +113,11 @@ apiRouter.patch('/jobs/:id', async (req: Request, res: Response) => { try { const input = updateJobSchema.parse(req.body); const job = await jobsRepo.updateJob(req.params.id, input); - + if (!job) { return res.status(404).json({ success: false, error: 'Job not found' }); } - + res.json({ success: true, data: job }); } catch (error) { if (error instanceof z.ZodError) { @@ -137,11 +137,11 @@ apiRouter.post('/jobs/:id/process', async (req: Request, res: Response) => { const force = forceRaw === '1' || forceRaw === 'true'; const result = await processJob(req.params.id, { force }); - + if (!result.success) { return res.status(400).json({ success: false, error: result.error }); } - + const job = await jobsRepo.getJobById(req.params.id); res.json({ success: true, data: job }); } catch (error) { @@ -156,13 +156,13 @@ apiRouter.post('/jobs/:id/process', async (req: Request, res: Response) => { apiRouter.post('/jobs/:id/apply', async (req: Request, res: Response) => { try { const job = await jobsRepo.getJobById(req.params.id); - + if (!job) { return res.status(404).json({ success: false, error: 'Job not found' }); } - + const appliedAt = new Date().toISOString(); - + // Sync to Notion const notionResult = await createNotionEntry({ id: job.id, @@ -175,7 +175,7 @@ apiRouter.post('/jobs/:id/apply', async (req: Request, res: Response) => { pdfPath: job.pdfPath, appliedAt, }); - + // Update job status const updatedJob = await jobsRepo.updateJob(job.id, { status: 'applied', @@ -186,7 +186,7 @@ apiRouter.post('/jobs/:id/apply', async (req: Request, res: Response) => { if (updatedJob) { notifyJobCompleteWebhook(updatedJob).catch(console.warn) } - + res.json({ success: true, data: updatedJob }); } catch (error) { const message = error instanceof Error ? error.message : 'Unknown error'; @@ -200,11 +200,11 @@ apiRouter.post('/jobs/:id/apply', async (req: Request, res: Response) => { apiRouter.post('/jobs/:id/reject', async (req: Request, res: Response) => { try { const job = await jobsRepo.updateJob(req.params.id, { status: 'rejected' }); - + if (!job) { return res.status(404).json({ success: false, error: 'Job not found' }); } - + res.json({ success: true, data: job }); } catch (error) { const message = error instanceof Error ? error.message : 'Unknown error'; @@ -351,7 +351,7 @@ apiRouter.get('/pipeline/status', async (req: Request, res: Response) => { try { const { isRunning } = getPipelineStatus(); const lastRun = await pipelineRepo.getLatestPipelineRun(); - + const response: ApiResponse = { success: true, data: { @@ -360,7 +360,7 @@ apiRouter.get('/pipeline/status', async (req: Request, res: Response) => { nextScheduledRun: null, // Would come from n8n }, }; - + res.json(response); } catch (error) { const message = error instanceof Error ? error.message : 'Unknown error'; @@ -377,20 +377,20 @@ apiRouter.get('/pipeline/progress', (req: Request, res: Response) => { res.setHeader('Cache-Control', 'no-cache'); res.setHeader('Connection', 'keep-alive'); res.setHeader('X-Accel-Buffering', 'no'); // Disable Nginx buffering - + // Send initial progress const sendProgress = (data: unknown) => { res.write(`data: ${JSON.stringify(data)}\n\n`); }; - + // Subscribe to progress updates const unsubscribe = subscribeToProgress(sendProgress); - + // Send heartbeat every 30 seconds to keep connection alive const heartbeat = setInterval(() => { res.write(': heartbeat\n\n'); }, 30000); - + // Cleanup on close req.on('close', () => { clearInterval(heartbeat); @@ -417,19 +417,19 @@ apiRouter.get('/pipeline/runs', async (req: Request, res: Response) => { const runPipelineSchema = z.object({ topN: z.number().min(1).max(50).optional(), minSuitabilityScore: z.number().min(0).max(100).optional(), - sources: z.array(z.enum(['gradcracker', 'indeed', 'linkedin'])).min(1).optional(), + sources: z.array(z.enum(['gradcracker', 'indeed', 'linkedin', 'ukvisajobs'])).min(1).optional(), }); apiRouter.post('/pipeline/run', async (req: Request, res: Response) => { try { const config = runPipelineSchema.parse(req.body); - + // Start pipeline in background runPipeline(config).catch(console.error); - - res.json({ - success: true, - data: { message: 'Pipeline started' } + + res.json({ + success: true, + data: { message: 'Pipeline started' } }); } catch (error) { if (error instanceof z.ZodError) { @@ -451,21 +451,21 @@ apiRouter.post('/webhook/trigger', async (req: Request, res: Response) => { // Optional: Add authentication check const authHeader = req.headers.authorization; const expectedToken = process.env.WEBHOOK_SECRET; - + if (expectedToken && authHeader !== `Bearer ${expectedToken}`) { return res.status(401).json({ success: false, error: 'Unauthorized' }); } - + try { // Start pipeline in background runPipeline().catch(console.error); - - res.json({ - success: true, - data: { + + res.json({ + success: true, + data: { message: 'Pipeline triggered', triggeredAt: new Date().toISOString(), - } + } }); } catch (error) { const message = error instanceof Error ? error.message : 'Unknown error'; @@ -483,14 +483,14 @@ apiRouter.post('/webhook/trigger', async (req: Request, res: Response) => { apiRouter.delete('/database', async (req: Request, res: Response) => { try { const result = clearDatabase(); - - res.json({ - success: true, - data: { + + res.json({ + success: true, + data: { message: 'Database cleared', jobsDeleted: result.jobsDeleted, runsDeleted: result.runsDeleted, - } + } }); } catch (error) { const message = error instanceof Error ? error.message : 'Unknown error'; diff --git a/orchestrator/src/server/db/schema.ts b/orchestrator/src/server/db/schema.ts index f317983..b61252a 100644 --- a/orchestrator/src/server/db/schema.ts +++ b/orchestrator/src/server/db/schema.ts @@ -7,9 +7,9 @@ import { sql } from 'drizzle-orm'; export const jobs = sqliteTable('jobs', { id: text('id').primaryKey(), - + // From crawler - source: text('source', { enum: ['gradcracker', 'indeed', 'linkedin'] }).notNull().default('gradcracker'), + source: text('source', { enum: ['gradcracker', 'indeed', 'linkedin', 'ukvisajobs'] }).notNull().default('gradcracker'), sourceJobId: text('source_job_id'), jobUrlDirect: text('job_url_direct'), datePosted: text('date_posted'), @@ -51,17 +51,17 @@ export const jobs = sqliteTable('jobs', { companyReviewsCount: integer('company_reviews_count'), vacancyCount: integer('vacancy_count'), workFromHomeType: text('work_from_home_type'), - + // Orchestrator enrichments - status: text('status', { - enum: ['discovered', 'processing', 'ready', 'applied', 'rejected', 'expired'] + status: text('status', { + enum: ['discovered', 'processing', 'ready', 'applied', 'rejected', 'expired'] }).notNull().default('discovered'), suitabilityScore: real('suitability_score'), suitabilityReason: text('suitability_reason'), tailoredSummary: text('tailored_summary'), pdfPath: text('pdf_path'), notionPageId: text('notion_page_id'), - + // Timestamps discoveredAt: text('discovered_at').notNull().default(sql`(datetime('now'))`), processedAt: text('processed_at'), diff --git a/orchestrator/src/server/pipeline/orchestrator.ts b/orchestrator/src/server/pipeline/orchestrator.ts index b5e52cc..100588a 100644 --- a/orchestrator/src/server/pipeline/orchestrator.ts +++ b/orchestrator/src/server/pipeline/orchestrator.ts @@ -12,6 +12,7 @@ import { join, dirname } from 'path'; import { fileURLToPath } from 'url'; import { runCrawler } from '../services/crawler.js'; import { runJobSpy } from '../services/jobspy.js'; +import { runUkVisaJobs } from '../services/ukvisajobs.js'; import { scoreJobSuitability } from '../services/scorer.js'; import { generateSummary } from '../services/summary.js'; import { generatePdf } from '../services/pdf.js'; @@ -27,7 +28,7 @@ const DEFAULT_PROFILE_PATH = join(__dirname, '../../../../resume-generator/base. const DEFAULT_CONFIG: PipelineConfig = { topN: 10, minSuitabilityScore: 50, - sources: ['gradcracker', 'indeed', 'linkedin'], + sources: ['gradcracker', 'indeed', 'linkedin', 'ukvisajobs'], profilePath: DEFAULT_PROFILE_PATH, outputDir: join(__dirname, '../../../data/pdfs'), }; @@ -88,22 +89,22 @@ export async function runPipeline(config: Partial = {}): Promise error: 'Pipeline is already running', }; } - + isPipelineRunning = true; resetProgress(); const mergedConfig = { ...DEFAULT_CONFIG, ...config }; - + // Create pipeline run record const pipelineRun = await pipelineRepo.createPipelineRun(); - + console.log('šŸš€ Starting job pipeline...'); console.log(` Config: topN=${mergedConfig.topN}, minScore=${mergedConfig.minSuitabilityScore} (manual processing)`); - + try { // Step 1: Load profile console.log('\nšŸ“‹ Loading profile...'); const profile = await loadProfile(mergedConfig.profilePath); - + // Step 2: Run crawler console.log('\nšŸ•·ļø Running crawler...'); progressHelpers.startCrawling(); @@ -154,6 +155,21 @@ export async function runPipeline(config: Partial = {}): Promise } } + // Run UKVisaJobs extractor if selected + if (mergedConfig.sources.includes('ukvisajobs')) { + updateProgress({ + step: 'crawling', + detail: 'UKVisaJobs: scraping visa-sponsoring jobs...', + }); + + const ukVisaResult = await runUkVisaJobs({ maxJobs: 50 }); + if (!ukVisaResult.success) { + sourceErrors.push(`ukvisajobs: ${ukVisaResult.error ?? 'unknown error'}`); + } else { + discoveredJobs.push(...ukVisaResult.jobs); + } + } + if (discoveredJobs.length === 0 && sourceErrors.length > 0) { throw new Error(`All sources failed: ${sourceErrors.join('; ')}`); } @@ -163,18 +179,18 @@ export async function runPipeline(config: Partial = {}): Promise } progressHelpers.crawlingComplete(discoveredJobs.length); - + // Step 3: Import discovered jobs console.log('\nšŸ’¾ Importing jobs to database...'); const { created, skipped } = await jobsRepo.bulkCreateJobs(discoveredJobs); console.log(` Created: ${created}, Skipped (duplicates): ${skipped}`); - + progressHelpers.importComplete(created, skipped); - + await pipelineRepo.updatePipelineRun(pipelineRun.id, { jobsDiscovered: created, }); - + // Step 4: Score all discovered jobs missing a score console.log('\nšŸŽÆ Scoring jobs for suitability...'); const unprocessedJobs = await jobsRepo.getUnscoredDiscoveredJobs(); @@ -187,7 +203,7 @@ export async function runPipeline(config: Partial = {}): Promise totalToProcess: 0, currentJob: undefined, }); - + // Score jobs with progress updates const scoredJobs: Array = []; for (let i = 0; i < unprocessedJobs.length; i++) { @@ -217,21 +233,21 @@ export async function runPipeline(config: Partial = {}): Promise suitabilityReason: reason, }); } - + progressHelpers.scoringComplete(scoredJobs.length); console.log(`\nšŸ“Š Scored ${scoredJobs.length} jobs. Ready for manual processing.`); - + // Update pipeline run as completed await pipelineRepo.updatePipelineRun(pipelineRun.id, { status: 'completed', completedAt: new Date().toISOString(), jobsProcessed: 0, }); - + console.log('\nšŸŽ‰ Pipeline completed!'); console.log(` Jobs discovered: ${created}`); console.log(' Jobs processed: 0 (manual)'); - + progressHelpers.complete(created, 0); await notifyPipelineWebhook('pipeline.completed', { @@ -241,22 +257,22 @@ export async function runPipeline(config: Partial = {}): Promise jobsProcessed: 0, }) isPipelineRunning = false; - + return { success: true, jobsDiscovered: created, jobsProcessed: 0, }; - + } catch (error) { const message = error instanceof Error ? error.message : 'Unknown error'; - + await pipelineRepo.updatePipelineRun(pipelineRun.id, { status: 'failed', completedAt: new Date().toISOString(), errorMessage: message, }); - + progressHelpers.failed(message); await notifyPipelineWebhook('pipeline.failed', { @@ -264,9 +280,9 @@ export async function runPipeline(config: Partial = {}): Promise error: message, }) isPipelineRunning = false; - + console.error('\nāŒ Pipeline failed:', message); - + return { success: false, jobsDiscovered: 0, @@ -287,7 +303,7 @@ export async function processJob( error?: string; }> { console.log(`šŸ“ Processing job ${jobId}...`); - + try { const job = await jobsRepo.getJobById(jobId); if (!job) { @@ -297,9 +313,9 @@ export async function processJob( if (job.status !== 'discovered' && job.status !== 'ready') { return { success: false, error: `Job cannot be processed from status: ${job.status}` }; } - + const profile = await loadProfile(DEFAULT_PROFILE_PATH); - + // Mark as processing await jobsRepo.updateJob(job.id, { status: 'processing' }); @@ -314,7 +330,7 @@ export async function processJob( job.suitabilityScore = suitability.score; job.suitabilityReason = suitability.reason; } - + // Generate summary (AI) // If forcing, always recompute; otherwise compute if missing. if (options?.force || !job.tailoredSummary) { @@ -323,7 +339,7 @@ export async function processJob( job.jobDescription || '', profile ); - + if (summaryResult.success) { await jobsRepo.updateJob(job.id, { tailoredSummary: summaryResult.summary, @@ -331,7 +347,7 @@ export async function processJob( job.tailoredSummary = summaryResult.summary ?? null; } } - + // Generate PDF console.log(' Generating PDF...'); const pdfResult = await generatePdf( @@ -340,16 +356,16 @@ export async function processJob( job.jobDescription || '', DEFAULT_PROFILE_PATH ); - + // Mark as ready await jobsRepo.updateJob(job.id, { status: 'ready', pdfPath: pdfResult.pdfPath ?? undefined, }); - + console.log(' āœ… Done!'); return { success: true }; - + } catch (error) { const message = error instanceof Error ? error.message : 'Unknown error'; return { success: false, error: message }; diff --git a/orchestrator/src/server/services/ukvisajobs.ts b/orchestrator/src/server/services/ukvisajobs.ts new file mode 100644 index 0000000..8002545 --- /dev/null +++ b/orchestrator/src/server/services/ukvisajobs.ts @@ -0,0 +1,125 @@ +/** + * Service for running the UK Visa Jobs extractor (extractors/ukvisajobs). + * + * Spawns the extractor as a child process and reads its output dataset. + */ + +import { spawn } from 'child_process'; +import { readdir, readFile, rm, mkdir } from 'fs/promises'; +import { join, dirname } from 'path'; +import { fileURLToPath } from 'url'; +import type { CreateJobInput } from '../../shared/types.js'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const UKVISAJOBS_DIR = join(__dirname, '../../../../extractors/ukvisajobs'); +const STORAGE_DIR = join(UKVISAJOBS_DIR, 'storage/datasets/default'); + +export interface RunUkVisaJobsOptions { + /** Maximum number of jobs to fetch. Defaults to 50, max 200. */ + maxJobs?: number; + /** Search keyword filter (optional) */ + searchKeyword?: string; +} + +export interface UkVisaJobsResult { + success: boolean; + jobs: CreateJobInput[]; + error?: string; +} + +/** + * Clear previous extraction results. + */ +async function clearStorageDataset(): Promise { + try { + await rm(STORAGE_DIR, { recursive: true, force: true }); + } catch { + // Ignore if directory doesn't exist + } +} + +/** + * Run the UK Visa Jobs extractor. + */ +export async function runUkVisaJobs(options: RunUkVisaJobsOptions = {}): Promise { + console.log('šŸ‡¬šŸ‡§ Running UK Visa Jobs extractor...'); + + try { + // Clear previous results + await clearStorageDataset(); + await mkdir(STORAGE_DIR, { recursive: true }); + + // Run the extractor using npx tsx directly (more reliable in Docker/different environments) + await new Promise((resolve, reject) => { + const child = spawn('npx', ['tsx', 'src/main.ts'], { + cwd: UKVISAJOBS_DIR, + stdio: 'inherit', + env: { + ...process.env, + UKVISAJOBS_MAX_JOBS: String(options.maxJobs ?? 50), + UKVISAJOBS_SEARCH_KEYWORD: options.searchKeyword ?? '', + }, + }); + + child.on('close', (code) => { + if (code === 0) resolve(); + else reject(new Error(`UK Visa Jobs extractor exited with code ${code}`)); + }); + child.on('error', reject); + }); + + // Read the output dataset + const jobs = await readDataset(); + console.log(`āœ… UK Visa Jobs: imported ${jobs.length} jobs`); + + return { success: true, jobs }; + } catch (error) { + const message = error instanceof Error ? error.message : 'Unknown error'; + console.error(`āŒ UK Visa Jobs failed: ${message}`); + return { success: false, jobs: [], error: message }; + } +} + +/** + * Read jobs from the extractor's output dataset. + */ +async function readDataset(): Promise { + const jobs: CreateJobInput[] = []; + + try { + const files = await readdir(STORAGE_DIR); + const jsonFiles = files.filter((f) => f.endsWith('.json') && f !== 'jobs.json'); + + for (const file of jsonFiles.sort()) { + try { + const content = await readFile(join(STORAGE_DIR, file), 'utf-8'); + const job = JSON.parse(content); + + // Map to CreateJobInput format + jobs.push({ + source: 'ukvisajobs', + sourceJobId: job.sourceJobId, + title: job.title || 'Unknown Title', + employer: job.employer || 'Unknown Employer', + employerUrl: job.employerUrl, + jobUrl: job.jobUrl, + applicationLink: job.applicationLink || job.jobUrl, + location: job.location, + deadline: job.deadline, + salary: job.salary, + jobDescription: job.jobDescription, + datePosted: job.datePosted, + degreeRequired: job.degreeRequired, + jobType: job.jobType, + jobLevel: job.jobLevel, + }); + } catch { + // Skip invalid files + } + } + } catch { + // Dataset directory doesn't exist yet + } + + return jobs; +} diff --git a/orchestrator/src/shared/types.ts b/orchestrator/src/shared/types.ts index 430cb35..a7ad297 100644 --- a/orchestrator/src/shared/types.ts +++ b/orchestrator/src/shared/types.ts @@ -2,7 +2,7 @@ * Shared types for the job-ops orchestrator. */ -export type JobStatus = +export type JobStatus = | 'discovered' // Crawled but not processed | 'processing' // Currently generating resume | 'ready' // PDF generated, waiting for user to apply @@ -13,11 +13,12 @@ export type JobStatus = export type JobSource = | 'gradcracker' | 'indeed' - | 'linkedin'; + | 'linkedin' + | 'ukvisajobs'; export interface Job { id: string; - + // Source / provenance source: JobSource; sourceJobId: string | null; // External ID (if provided) @@ -37,7 +38,7 @@ export interface Job { degreeRequired: string | null; starting: string | null; jobDescription: string | null; - + // Orchestrator enrichments status: JobStatus; suitabilityScore: number | null; // 0-100 AI-generated score @@ -71,7 +72,7 @@ export interface Job { companyReviewsCount: number | null; vacancyCount: number | null; workFromHomeType: string | null; - + // Timestamps discoveredAt: string; processedAt: string | null;