initial implementation

This commit is contained in:
DaKheera47 2025-12-26 20:17:05 +00:00
parent 8a236c0be9
commit 0f36d9b8a6
18 changed files with 1249 additions and 85 deletions

View File

@ -37,3 +37,14 @@ JOBSPY_RESULTS_WANTED=200
JOBSPY_HOURS_OLD=72
JOBSPY_COUNTRY_INDEED=UK
JOBSPY_LINKEDIN_FETCH_DESCRIPTION=1
# =============================================================================
# UKVisaJobs (UK visa sponsorship jobs) - optional
# =============================================================================
# Get these tokens from browser dev tools after logging into my.ukvisajobs.com
# See extractors/ukvisajobs/README.md for detailed instructions.
UKVISAJOBS_TOKEN=
UKVISAJOBS_AUTH_TOKEN=
UKVISAJOBS_CSRF_TOKEN=
UKVISAJOBS_CI_SESSION=
UKVISAJOBS_MAX_JOBS=50

View File

@ -28,6 +28,7 @@ RUN pip3 install --no-cache-dir --break-system-packages playwright python-jobspy
# Copy package files first for better caching
COPY orchestrator/package*.json ./orchestrator/
COPY extractors/gradcracker/package*.json ./extractors/gradcracker/
COPY extractors/ukvisajobs/package*.json ./extractors/ukvisajobs/
# Install Node.js dependencies
WORKDIR /app/orchestrator
@ -39,11 +40,15 @@ RUN npm install --production=false
# Install Camoufox browser (downloads its own Firefox fork)
RUN npx camoufox fetch
WORKDIR /app/extractors/ukvisajobs
RUN npm install --production=false
# Copy source code
WORKDIR /app
COPY orchestrator ./orchestrator
COPY extractors/gradcracker ./extractors/gradcracker
COPY extractors/jobspy ./extractors/jobspy
COPY extractors/ukvisajobs ./extractors/ukvisajobs
COPY resume-generator ./resume-generator
# Build the orchestrator (client + server)

View File

@ -75,6 +75,7 @@ job-ops/
src/shared/ # shared types (Job, PipelineRun, etc.)
extractors/gradcracker/ # Crawlee crawler (Gradcracker)
extractors/jobspy/ # JobSpy wrapper (Indeed/LinkedIn/etc)
extractors/ukvisajobs/ # UK Visa Jobs API extractor
resume-generator/ # Python Playwright automation for rxresu.me
base.json # your exported base resume (template)
data/ # persisted runtime artifacts (Docker default)
@ -87,7 +88,7 @@ job-ops/
## Data model (SQLite)
- `jobs`
- from crawl: `title`, `employer`, `jobUrl`, `applicationLink`, `deadline`, `salary`, `location`, `jobDescription`, etc.
- from crawl: `title`, `employer`, `jobUrl`, `applicationLink`, `deadline`, `salary`, `location`, `jobDescription`, `source` (gradcracker/indeed/linkedin/ukvisajobs), etc.
- enrichments: `status` (`discovered` -> `processing` -> `ready` -> `applied`/`rejected`), `suitabilityScore`, `suitabilityReason`, `tailoredSummary`, `pdfPath`, `notionPageId`
- `pipeline_runs`: audit log of runs (`running`/`completed`/`failed`, counts, error)

View File

@ -50,6 +50,14 @@ services:
# Optional: Webhook secret for n8n
- WEBHOOK_SECRET=${WEBHOOK_SECRET:-}
# UKVisaJobs (UK visa sponsorship jobs) - optional
- UKVISAJOBS_TOKEN=${UKVISAJOBS_TOKEN:-}
- UKVISAJOBS_AUTH_TOKEN=${UKVISAJOBS_AUTH_TOKEN:-}
- UKVISAJOBS_CSRF_TOKEN=${UKVISAJOBS_CSRF_TOKEN:-}
- UKVISAJOBS_CI_SESSION=${UKVISAJOBS_CI_SESSION:-}
- UKVISAJOBS_MAX_JOBS=${UKVISAJOBS_MAX_JOBS:-50}
- UKVISAJOBS_SEARCH_KEYWORD=${UKVISAJOBS_SEARCH_KEYWORD:-}
# Python path (uses system python in container)
- PYTHON_PATH=/usr/bin/python3
restart: unless-stopped

11
extractors/ukvisajobs/.gitignore vendored Normal file
View File

@ -0,0 +1,11 @@
# Dependencies
node_modules/
# Build output
dist/
# Crawlee storage
storage/
# Logs
*.log

View File

@ -0,0 +1,40 @@
# UK Visa Jobs Extractor
Fetches job listings from [my.ukvisajobs.com](https://my.ukvisajobs.com) that may sponsor work visas.
## Setup
```bash
npm install
```
## Configuration
Set the following environment variables (you can get these from your browser's dev tools after logging in):
| Variable | Description |
|----------|-------------|
| `UKVISAJOBS_TOKEN` | JWT token from the request body (required) |
| `UKVISAJOBS_AUTH_TOKEN` | Auth cookie token (defaults to UKVISAJOBS_TOKEN) |
| `UKVISAJOBS_CSRF_TOKEN` | CSRF token from cookies |
| `UKVISAJOBS_CI_SESSION` | CI session ID from cookies |
| `UKVISAJOBS_MAX_JOBS` | Maximum jobs to fetch (default: 50, max: 200) |
| `UKVISAJOBS_SEARCH_KEYWORD` | Optional search filter |
## How to get tokens
1. Log into `my.ukvisajobs.com` in your browser
2. Open Developer Tools → Network tab
3. Navigate to the jobs page
4. Find the `fetch-jobs-data` POST request
5. Copy values:
- From **Request Body**: copy the `token` field → `UKVISAJOBS_TOKEN`
- From **Cookies**: copy `authToken`, `csrf_token`, `ci_session`
## Running
```bash
npm start
```
Output is written to `storage/datasets/default/` as JSON files.

599
extractors/ukvisajobs/package-lock.json generated Normal file
View File

@ -0,0 +1,599 @@
{
"name": "ukvisajobs-extractor",
"version": "0.0.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "ukvisajobs-extractor",
"version": "0.0.1",
"license": "ISC",
"devDependencies": {
"@apify/tsconfig": "^0.1.0",
"@types/node": "^24.0.0",
"tsx": "^4.4.0",
"typescript": "~5.9.0"
}
},
"node_modules/@apify/tsconfig": {
"version": "0.1.1",
"resolved": "https://registry.npmjs.org/@apify/tsconfig/-/tsconfig-0.1.1.tgz",
"integrity": "sha512-cS7mwN2UW1UXcluGXRDHH0Vr2VsSLkw2DwLTwoSBkcJSe8fvCr3MPryTSq0uod4MashpMURxJ7CsLKxs82VmOQ==",
"dev": true,
"license": "Apache-2.0"
},
"node_modules/@esbuild/aix-ppc64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.2.tgz",
"integrity": "sha512-GZMB+a0mOMZs4MpDbj8RJp4cw+w1WV5NYD6xzgvzUJ5Ek2jerwfO2eADyI6ExDSUED+1X8aMbegahsJi+8mgpw==",
"cpu": [
"ppc64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"aix"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/android-arm": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.27.2.tgz",
"integrity": "sha512-DVNI8jlPa7Ujbr1yjU2PfUSRtAUZPG9I1RwW4F4xFB1Imiu2on0ADiI/c3td+KmDtVKNbi+nffGDQMfcIMkwIA==",
"cpu": [
"arm"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/android-arm64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.2.tgz",
"integrity": "sha512-pvz8ZZ7ot/RBphf8fv60ljmaoydPU12VuXHImtAs0XhLLw+EXBi2BLe3OYSBslR4rryHvweW5gmkKFwTiFy6KA==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/android-x64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.27.2.tgz",
"integrity": "sha512-z8Ank4Byh4TJJOh4wpz8g2vDy75zFL0TlZlkUkEwYXuPSgX8yzep596n6mT7905kA9uHZsf/o2OJZubl2l3M7A==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/darwin-arm64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.27.2.tgz",
"integrity": "sha512-davCD2Zc80nzDVRwXTcQP/28fiJbcOwvdolL0sOiOsbwBa72kegmVU0Wrh1MYrbuCL98Omp5dVhQFWRKR2ZAlg==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/darwin-x64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.27.2.tgz",
"integrity": "sha512-ZxtijOmlQCBWGwbVmwOF/UCzuGIbUkqB1faQRf5akQmxRJ1ujusWsb3CVfk/9iZKr2L5SMU5wPBi1UWbvL+VQA==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/freebsd-arm64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.27.2.tgz",
"integrity": "sha512-lS/9CN+rgqQ9czogxlMcBMGd+l8Q3Nj1MFQwBZJyoEKI50XGxwuzznYdwcav6lpOGv5BqaZXqvBSiB/kJ5op+g==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"freebsd"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/freebsd-x64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.27.2.tgz",
"integrity": "sha512-tAfqtNYb4YgPnJlEFu4c212HYjQWSO/w/h/lQaBK7RbwGIkBOuNKQI9tqWzx7Wtp7bTPaGC6MJvWI608P3wXYA==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"freebsd"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-arm": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.27.2.tgz",
"integrity": "sha512-vWfq4GaIMP9AIe4yj1ZUW18RDhx6EPQKjwe7n8BbIecFtCQG4CfHGaHuh7fdfq+y3LIA2vGS/o9ZBGVxIDi9hw==",
"cpu": [
"arm"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-arm64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.2.tgz",
"integrity": "sha512-hYxN8pr66NsCCiRFkHUAsxylNOcAQaxSSkHMMjcpx0si13t1LHFphxJZUiGwojB1a/Hd5OiPIqDdXONia6bhTw==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-ia32": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.27.2.tgz",
"integrity": "sha512-MJt5BRRSScPDwG2hLelYhAAKh9imjHK5+NE/tvnRLbIqUWa+0E9N4WNMjmp/kXXPHZGqPLxggwVhz7QP8CTR8w==",
"cpu": [
"ia32"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-loong64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.27.2.tgz",
"integrity": "sha512-lugyF1atnAT463aO6KPshVCJK5NgRnU4yb3FUumyVz+cGvZbontBgzeGFO1nF+dPueHD367a2ZXe1NtUkAjOtg==",
"cpu": [
"loong64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-mips64el": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.27.2.tgz",
"integrity": "sha512-nlP2I6ArEBewvJ2gjrrkESEZkB5mIoaTswuqNFRv/WYd+ATtUpe9Y09RnJvgvdag7he0OWgEZWhviS1OTOKixw==",
"cpu": [
"mips64el"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-ppc64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.27.2.tgz",
"integrity": "sha512-C92gnpey7tUQONqg1n6dKVbx3vphKtTHJaNG2Ok9lGwbZil6DrfyecMsp9CrmXGQJmZ7iiVXvvZH6Ml5hL6XdQ==",
"cpu": [
"ppc64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-riscv64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.27.2.tgz",
"integrity": "sha512-B5BOmojNtUyN8AXlK0QJyvjEZkWwy/FKvakkTDCziX95AowLZKR6aCDhG7LeF7uMCXEJqwa8Bejz5LTPYm8AvA==",
"cpu": [
"riscv64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-s390x": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.27.2.tgz",
"integrity": "sha512-p4bm9+wsPwup5Z8f4EpfN63qNagQ47Ua2znaqGH6bqLlmJ4bx97Y9JdqxgGZ6Y8xVTixUnEkoKSHcpRlDnNr5w==",
"cpu": [
"s390x"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-x64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.27.2.tgz",
"integrity": "sha512-uwp2Tip5aPmH+NRUwTcfLb+W32WXjpFejTIOWZFw/v7/KnpCDKG66u4DLcurQpiYTiYwQ9B7KOeMJvLCu/OvbA==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/netbsd-arm64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.27.2.tgz",
"integrity": "sha512-Kj6DiBlwXrPsCRDeRvGAUb/LNrBASrfqAIok+xB0LxK8CHqxZ037viF13ugfsIpePH93mX7xfJp97cyDuTZ3cw==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"netbsd"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/netbsd-x64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.27.2.tgz",
"integrity": "sha512-HwGDZ0VLVBY3Y+Nw0JexZy9o/nUAWq9MlV7cahpaXKW6TOzfVno3y3/M8Ga8u8Yr7GldLOov27xiCnqRZf0tCA==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"netbsd"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/openbsd-arm64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.27.2.tgz",
"integrity": "sha512-DNIHH2BPQ5551A7oSHD0CKbwIA/Ox7+78/AWkbS5QoRzaqlev2uFayfSxq68EkonB+IKjiuxBFoV8ESJy8bOHA==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"openbsd"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/openbsd-x64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.27.2.tgz",
"integrity": "sha512-/it7w9Nb7+0KFIzjalNJVR5bOzA9Vay+yIPLVHfIQYG/j+j9VTH84aNB8ExGKPU4AzfaEvN9/V4HV+F+vo8OEg==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"openbsd"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/openharmony-arm64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.27.2.tgz",
"integrity": "sha512-LRBbCmiU51IXfeXk59csuX/aSaToeG7w48nMwA6049Y4J4+VbWALAuXcs+qcD04rHDuSCSRKdmY63sruDS5qag==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"openharmony"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/sunos-x64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.27.2.tgz",
"integrity": "sha512-kMtx1yqJHTmqaqHPAzKCAkDaKsffmXkPHThSfRwZGyuqyIeBvf08KSsYXl+abf5HDAPMJIPnbBfXvP2ZC2TfHg==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"sunos"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/win32-arm64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.27.2.tgz",
"integrity": "sha512-Yaf78O/B3Kkh+nKABUF++bvJv5Ijoy9AN1ww904rOXZFLWVc5OLOfL56W+C8F9xn5JQZa3UX6m+IktJnIb1Jjg==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/win32-ia32": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.27.2.tgz",
"integrity": "sha512-Iuws0kxo4yusk7sw70Xa2E2imZU5HoixzxfGCdxwBdhiDgt9vX9VUCBhqcwY7/uh//78A1hMkkROMJq9l27oLQ==",
"cpu": [
"ia32"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/win32-x64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.27.2.tgz",
"integrity": "sha512-sRdU18mcKf7F+YgheI/zGf5alZatMUTKj/jNS6l744f9u3WFu4v7twcUI9vu4mknF4Y9aDlblIie0IM+5xxaqQ==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@types/node": {
"version": "24.10.4",
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.4.tgz",
"integrity": "sha512-vnDVpYPMzs4wunl27jHrfmwojOGKya0xyM3sH+UE5iv5uPS6vX7UIoh6m+vQc5LGBq52HBKPIn/zcSZVzeDEZg==",
"dev": true,
"license": "MIT",
"dependencies": {
"undici-types": "~7.16.0"
}
},
"node_modules/esbuild": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.2.tgz",
"integrity": "sha512-HyNQImnsOC7X9PMNaCIeAm4ISCQXs5a5YasTXVliKv4uuBo1dKrG0A+uQS8M5eXjVMnLg3WgXaKvprHlFJQffw==",
"dev": true,
"hasInstallScript": true,
"license": "MIT",
"bin": {
"esbuild": "bin/esbuild"
},
"engines": {
"node": ">=18"
},
"optionalDependencies": {
"@esbuild/aix-ppc64": "0.27.2",
"@esbuild/android-arm": "0.27.2",
"@esbuild/android-arm64": "0.27.2",
"@esbuild/android-x64": "0.27.2",
"@esbuild/darwin-arm64": "0.27.2",
"@esbuild/darwin-x64": "0.27.2",
"@esbuild/freebsd-arm64": "0.27.2",
"@esbuild/freebsd-x64": "0.27.2",
"@esbuild/linux-arm": "0.27.2",
"@esbuild/linux-arm64": "0.27.2",
"@esbuild/linux-ia32": "0.27.2",
"@esbuild/linux-loong64": "0.27.2",
"@esbuild/linux-mips64el": "0.27.2",
"@esbuild/linux-ppc64": "0.27.2",
"@esbuild/linux-riscv64": "0.27.2",
"@esbuild/linux-s390x": "0.27.2",
"@esbuild/linux-x64": "0.27.2",
"@esbuild/netbsd-arm64": "0.27.2",
"@esbuild/netbsd-x64": "0.27.2",
"@esbuild/openbsd-arm64": "0.27.2",
"@esbuild/openbsd-x64": "0.27.2",
"@esbuild/openharmony-arm64": "0.27.2",
"@esbuild/sunos-x64": "0.27.2",
"@esbuild/win32-arm64": "0.27.2",
"@esbuild/win32-ia32": "0.27.2",
"@esbuild/win32-x64": "0.27.2"
}
},
"node_modules/fsevents": {
"version": "2.3.3",
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
"integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
"dev": true,
"hasInstallScript": true,
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
}
},
"node_modules/get-tsconfig": {
"version": "4.13.0",
"resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.13.0.tgz",
"integrity": "sha512-1VKTZJCwBrvbd+Wn3AOgQP/2Av+TfTCOlE4AcRJE72W1ksZXbAx8PPBR9RzgTeSPzlPMHrbANMH3LbltH73wxQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"resolve-pkg-maps": "^1.0.0"
},
"funding": {
"url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
}
},
"node_modules/resolve-pkg-maps": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
"integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==",
"dev": true,
"license": "MIT",
"funding": {
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
}
},
"node_modules/tsx": {
"version": "4.21.0",
"resolved": "https://registry.npmjs.org/tsx/-/tsx-4.21.0.tgz",
"integrity": "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==",
"dev": true,
"license": "MIT",
"dependencies": {
"esbuild": "~0.27.0",
"get-tsconfig": "^4.7.5"
},
"bin": {
"tsx": "dist/cli.mjs"
},
"engines": {
"node": ">=18.0.0"
},
"optionalDependencies": {
"fsevents": "~2.3.3"
}
},
"node_modules/typescript": {
"version": "5.9.3",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz",
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
"dev": true,
"license": "Apache-2.0",
"bin": {
"tsc": "bin/tsc",
"tsserver": "bin/tsserver"
},
"engines": {
"node": ">=14.17"
}
},
"node_modules/undici-types": {
"version": "7.16.0",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
"integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==",
"dev": true,
"license": "MIT"
}
}
}

View File

@ -0,0 +1,22 @@
{
"name": "ukvisajobs-extractor",
"version": "0.0.1",
"type": "module",
"description": "UK Visa Jobs extractor - fetches job listings that may sponsor work visas",
"main": "dist/main.js",
"dependencies": {},
"devDependencies": {
"@apify/tsconfig": "^0.1.0",
"@types/node": "^24.0.0",
"tsx": "^4.4.0",
"typescript": "~5.9.0"
},
"scripts": {
"start": "npm run start:dev",
"start:prod": "node dist/main.js",
"start:dev": "tsx src/main.ts",
"build": "tsc"
},
"author": "",
"license": "ISC"
}

View File

@ -0,0 +1,303 @@
/**
* UK Visa Jobs Extractor
*
* Fetches job listings from my.ukvisajobs.com that may sponsor work visas.
* Outputs JSON to stdout for the orchestrator to consume.
*
* Environment variables:
* UKVISAJOBS_TOKEN - JWT token (required)
* UKVISAJOBS_AUTH_TOKEN - Auth cookie token (defaults to UKVISAJOBS_TOKEN)
* UKVISAJOBS_CSRF_TOKEN - CSRF token cookie
* UKVISAJOBS_CI_SESSION - CI session cookie
* UKVISAJOBS_MAX_JOBS - Maximum jobs to fetch (default: 50, max: 200)
* UKVISAJOBS_SEARCH_KEYWORD - Optional search filter
*/
import { mkdir, writeFile } from 'fs/promises';
import { join, dirname } from 'path';
import { fileURLToPath } from 'url';
const __dirname = dirname(fileURLToPath(import.meta.url));
const API_URL = 'https://my.ukvisajobs.com/ukvisa-api/api/fetch-jobs-data';
const JOBS_PER_PAGE = 15;
const DEFAULT_MAX_JOBS = 50;
const MAX_ALLOWED_JOBS = 200;
interface UkVisaJobsApiJob {
id: string;
title: string;
company_name: string;
company_link?: string;
job_link: string;
city: string;
created_date: string;
job_expire: string;
description?: string;
min_salary?: string;
max_salary?: string;
salary_interval?: string;
salary_method?: string;
degree_requirement?: string;
job_type?: string;
job_level?: string;
job_industry?: string;
visa_acceptance?: string;
applicants_outside_uk?: string;
likely_to_sponsor?: string;
definitely_sponsored?: string;
new_entrant?: string;
student_graduate?: string;
image?: string;
computed_cos_total?: string;
}
interface UkVisaJobsApiResponse {
status: number;
totalJobs: number;
query?: string;
jobs: UkVisaJobsApiJob[];
}
interface ExtractedJob {
source: 'ukvisajobs';
sourceJobId: string;
title: string;
employer: string;
employerUrl?: string;
jobUrl: string;
applicationLink: string;
location?: string;
deadline?: string;
salary?: string;
jobDescription?: string;
datePosted?: string;
degreeRequired?: string;
jobType?: string;
jobLevel?: string;
}
function toStringOrNull(value: unknown): string | null {
if (value === null || value === undefined) return null;
if (typeof value === 'string') {
const trimmed = value.trim();
return trimmed.length > 0 ? trimmed : null;
}
if (typeof value === 'number' || typeof value === 'boolean') return String(value);
return null;
}
function toNumberOrNull(value: unknown): number | null {
if (value === null || value === undefined) return null;
if (typeof value === 'number') return Number.isFinite(value) ? value : null;
if (typeof value === 'string') {
const trimmed = value.trim();
if (!trimmed) return null;
const parsed = Number(trimmed);
return Number.isFinite(parsed) ? parsed : null;
}
return null;
}
async function fetchPage(
pageNo: number,
token: string,
cookies: string,
options: { searchKeyword?: string } = {}
): Promise<UkVisaJobsApiResponse> {
// Use native FormData API (Node.js 18+)
const formData = new FormData();
formData.append('is_global', '0');
formData.append('sortBy', 'desc');
formData.append('pageNo', String(pageNo));
formData.append('visaAcceptance', 'false');
formData.append('applicants_outside_uk', 'false');
formData.append('searchKeyword', options.searchKeyword || 'null');
formData.append('token', token);
const response = await fetch(API_URL, {
method: 'POST',
headers: {
'accept': 'application/json, text/plain, */*',
'accept-language': 'en-US,en;q=0.9',
'cookie': cookies,
'origin': 'https://my.ukvisajobs.com',
'referer': `https://my.ukvisajobs.com/open-jobs/1?is_global=0&sortBy=desc&pageNo=${pageNo}&visaAcceptance=false&applicants_outside_uk=false`,
'user-agent': 'Mozilla/5.0 (Linux; Android 13; Pixel 7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Mobile Safari/537.36',
},
body: formData,
});
if (!response.ok) {
const text = await response.text();
throw new Error(`UKVisaJobs API returned ${response.status}: ${response.statusText} - ${text}`);
}
return response.json() as Promise<UkVisaJobsApiResponse>;
}
function mapJob(raw: UkVisaJobsApiJob): ExtractedJob {
// Build salary string from min/max
let salary: string | undefined = undefined;
const minSalary = toNumberOrNull(raw.min_salary);
const maxSalary = toNumberOrNull(raw.max_salary);
if (minSalary !== null && minSalary > 0 && maxSalary !== null && maxSalary > 0) {
salary = `£${minSalary.toLocaleString()}-${maxSalary.toLocaleString()}`;
if (raw.salary_interval) {
salary += ` / ${raw.salary_interval}`;
}
} else if (maxSalary !== null && maxSalary > 0) {
salary = `£${maxSalary.toLocaleString()}`;
if (raw.salary_interval) {
salary += ` / ${raw.salary_interval}`;
}
}
// Build a description from visa sponsorship fields
const visaInfo: string[] = [];
if (raw.visa_acceptance?.toLowerCase() === 'yes') visaInfo.push('Visa acceptance: Yes');
if (raw.applicants_outside_uk?.toLowerCase() === 'yes') visaInfo.push('Accepts applicants outside UK');
if (raw.likely_to_sponsor?.toLowerCase() === 'yes') visaInfo.push('Likely to sponsor');
if (raw.definitely_sponsored?.toLowerCase() === 'yes') visaInfo.push('Definitely sponsored');
if (raw.new_entrant?.toLowerCase() === 'yes') visaInfo.push('New entrant friendly');
if (raw.student_graduate?.toLowerCase() === 'yes') visaInfo.push('Student/Graduate friendly');
const description = raw.description
? raw.description
: visaInfo.length > 0
? `Visa sponsorship info: ${visaInfo.join(', ')}`
: undefined;
return {
source: 'ukvisajobs',
sourceJobId: raw.id,
title: raw.title || 'Unknown Title',
employer: raw.company_name || 'Unknown Employer',
employerUrl: toStringOrNull(raw.company_link) ?? undefined,
jobUrl: raw.job_link,
applicationLink: raw.job_link,
location: raw.city || undefined,
deadline: raw.job_expire || undefined,
salary,
jobDescription: description,
datePosted: raw.created_date || undefined,
degreeRequired: toStringOrNull(raw.degree_requirement) ?? undefined,
jobType: toStringOrNull(raw.job_type) ?? undefined,
jobLevel: toStringOrNull(raw.job_level) ?? undefined,
};
}
async function main(): Promise<void> {
console.log('🇬🇧 UK Visa Jobs Extractor starting...');
// Get credentials from environment
const token = process.env.UKVISAJOBS_TOKEN;
const authToken = process.env.UKVISAJOBS_AUTH_TOKEN || token;
const csrfToken = process.env.UKVISAJOBS_CSRF_TOKEN || '';
const ciSession = process.env.UKVISAJOBS_CI_SESSION || '';
const searchKeyword = process.env.UKVISAJOBS_SEARCH_KEYWORD || undefined;
if (!token) {
console.error('❌ UKVISAJOBS_TOKEN environment variable is not set');
process.exit(1);
}
// Build cookies string
const cookieParts: string[] = [];
if (csrfToken) cookieParts.push(`csrf_token=${csrfToken}`);
if (ciSession) cookieParts.push(`ci_session=${ciSession}`);
if (authToken) cookieParts.push(`authToken=${authToken}`);
const cookies = cookieParts.join('; ');
console.log(` Cookies configured: ${cookieParts.length > 0 ? 'Yes' : 'No'}`);
console.log(` Token length: ${token.length}`);
// Get max jobs from environment
const maxJobsEnv = toNumberOrNull(process.env.UKVISAJOBS_MAX_JOBS);
const maxJobs = Math.min(maxJobsEnv ?? DEFAULT_MAX_JOBS, MAX_ALLOWED_JOBS);
const maxPages = Math.ceil(maxJobs / JOBS_PER_PAGE);
console.log(` Max jobs: ${maxJobs} (${maxPages} pages)`);
if (searchKeyword) {
console.log(` Search keyword: ${searchKeyword}`);
}
const allJobs: ExtractedJob[] = [];
const seenIds = new Set<string>();
let totalAvailable = 0;
let pageNo = 1;
try {
while (pageNo <= maxPages && allJobs.length < maxJobs) {
console.log(` Fetching page ${pageNo}/${maxPages}...`);
const response = await fetchPage(pageNo, token, cookies, { searchKeyword });
if (response.status !== 1) {
console.warn(` ⚠️ API returned status ${response.status} on page ${pageNo}`);
break;
}
if (pageNo === 1) {
totalAvailable = response.totalJobs;
console.log(` Total available: ${totalAvailable} jobs`);
}
if (!response.jobs || response.jobs.length === 0) {
console.log(` No more jobs on page ${pageNo}`);
break;
}
for (const rawJob of response.jobs) {
if (allJobs.length >= maxJobs) break;
// Deduplicate by ID
if (seenIds.has(rawJob.id)) continue;
seenIds.add(rawJob.id);
const mapped = mapJob(rawJob);
allJobs.push(mapped);
}
// If we got fewer jobs than a full page, we're at the end
if (response.jobs.length < JOBS_PER_PAGE) {
break;
}
pageNo++;
// Small delay to be nice to the API
await new Promise((resolve) => setTimeout(resolve, 500));
}
console.log(`✅ Scraped ${allJobs.length} jobs`);
// Write output to storage directory (similar to Crawlee dataset structure)
const storageDir = join(__dirname, '../storage/datasets/default');
await mkdir(storageDir, { recursive: true });
// Write each job as a separate JSON file (Crawlee dataset format)
for (let i = 0; i < allJobs.length; i++) {
const filename = join(storageDir, `${String(i + 1).padStart(6, '0')}.json`);
await writeFile(filename, JSON.stringify(allJobs[i], null, 2));
}
// Also write a combined output file for easier consumption
const outputFile = join(storageDir, 'jobs.json');
await writeFile(outputFile, JSON.stringify(allJobs, null, 2));
console.log(` Output written to: ${storageDir}`);
console.log(` Jobs file: ${outputFile}`);
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
console.error(`❌ Error: ${message}`);
process.exit(1);
}
}
main().catch((error) => {
console.error('Fatal error:', error);
process.exit(1);
});

View File

@ -0,0 +1,12 @@
{
"extends": "@apify/tsconfig",
"compilerOptions": {
"module": "NodeNext",
"moduleResolution": "NodeNext",
"target": "ES2022",
"outDir": "dist",
"noUnusedLocals": false,
"lib": ["DOM"]
},
"include": ["./src/**/*"]
}

View File

@ -32,3 +32,12 @@ JOBSPY_RESULTS_WANTED=200
JOBSPY_HOURS_OLD=72
JOBSPY_COUNTRY_INDEED=UK
JOBSPY_LINKEDIN_FETCH_DESCRIPTION=1
# =============================================================================
# UKVisaJobs (UK visa sponsorship job scraping) - optional
# =============================================================================
# Get these tokens from browser dev tools after logging into my.ukvisajobs.com
UKVISAJOBS_TOKEN=
UKVISAJOBS_AUTH_TOKEN=
UKVISAJOBS_CSRF_TOKEN=
UKVISAJOBS_CI_SESSION=

View File

@ -12,7 +12,7 @@ import { Header, JobList, PipelineProgress, Stats } from "./components";
import * as api from "./api";
import { SettingsPage } from "./pages/SettingsPage";
const DEFAULT_PIPELINE_SOURCES: JobSource[] = ["gradcracker", "indeed", "linkedin"];
const DEFAULT_PIPELINE_SOURCES: JobSource[] = ["gradcracker", "indeed", "linkedin", "ukvisajobs"];
const PIPELINE_SOURCES_STORAGE_KEY = "jobops.pipeline.sources";
export const App: React.FC = () => {
@ -33,7 +33,7 @@ export const App: React.FC = () => {
const raw = localStorage.getItem(PIPELINE_SOURCES_STORAGE_KEY);
if (!raw) return DEFAULT_PIPELINE_SOURCES;
const parsed = JSON.parse(raw) as unknown;
const allowed: JobSource[] = ["gradcracker", "indeed", "linkedin"];
const allowed: JobSource[] = ["gradcracker", "indeed", "linkedin", "ukvisajobs"];
if (!Array.isArray(parsed)) return DEFAULT_PIPELINE_SOURCES;
const next = parsed.filter((value): value is JobSource => allowed.includes(value));
return next.length > 0 ? next : DEFAULT_PIPELINE_SOURCES;

View File

@ -60,9 +60,10 @@ export const Header: React.FC<HeaderProps> = ({
gradcracker: "Gradcracker",
indeed: "Indeed",
linkedin: "LinkedIn",
ukvisajobs: "UK Visa Jobs",
};
const orderedSources: JobSource[] = ["gradcracker", "indeed", "linkedin"];
const orderedSources: JobSource[] = ["gradcracker", "indeed", "linkedin", "ukvisajobs"];
const toggleSource = (source: JobSource, checked: boolean) => {
const next = checked

View File

@ -60,10 +60,10 @@ apiRouter.get('/jobs', async (req: Request, res: Response) => {
try {
const statusFilter = req.query.status as string | undefined;
const statuses = statusFilter?.split(',').filter(Boolean) as JobStatus[] | undefined;
const jobs = await jobsRepo.getAllJobs(statuses);
const stats = await jobsRepo.getJobStats();
const response: ApiResponse<JobsListResponse> = {
success: true,
data: {
@ -72,7 +72,7 @@ apiRouter.get('/jobs', async (req: Request, res: Response) => {
byStatus: stats,
},
};
res.json(response);
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
@ -86,11 +86,11 @@ apiRouter.get('/jobs', async (req: Request, res: Response) => {
apiRouter.get('/jobs/:id', async (req: Request, res: Response) => {
try {
const job = await jobsRepo.getJobById(req.params.id);
if (!job) {
return res.status(404).json({ success: false, error: 'Job not found' });
}
res.json({ success: true, data: job });
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
@ -113,11 +113,11 @@ apiRouter.patch('/jobs/:id', async (req: Request, res: Response) => {
try {
const input = updateJobSchema.parse(req.body);
const job = await jobsRepo.updateJob(req.params.id, input);
if (!job) {
return res.status(404).json({ success: false, error: 'Job not found' });
}
res.json({ success: true, data: job });
} catch (error) {
if (error instanceof z.ZodError) {
@ -137,11 +137,11 @@ apiRouter.post('/jobs/:id/process', async (req: Request, res: Response) => {
const force = forceRaw === '1' || forceRaw === 'true';
const result = await processJob(req.params.id, { force });
if (!result.success) {
return res.status(400).json({ success: false, error: result.error });
}
const job = await jobsRepo.getJobById(req.params.id);
res.json({ success: true, data: job });
} catch (error) {
@ -156,13 +156,13 @@ apiRouter.post('/jobs/:id/process', async (req: Request, res: Response) => {
apiRouter.post('/jobs/:id/apply', async (req: Request, res: Response) => {
try {
const job = await jobsRepo.getJobById(req.params.id);
if (!job) {
return res.status(404).json({ success: false, error: 'Job not found' });
}
const appliedAt = new Date().toISOString();
// Sync to Notion
const notionResult = await createNotionEntry({
id: job.id,
@ -175,7 +175,7 @@ apiRouter.post('/jobs/:id/apply', async (req: Request, res: Response) => {
pdfPath: job.pdfPath,
appliedAt,
});
// Update job status
const updatedJob = await jobsRepo.updateJob(job.id, {
status: 'applied',
@ -186,7 +186,7 @@ apiRouter.post('/jobs/:id/apply', async (req: Request, res: Response) => {
if (updatedJob) {
notifyJobCompleteWebhook(updatedJob).catch(console.warn)
}
res.json({ success: true, data: updatedJob });
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
@ -200,11 +200,11 @@ apiRouter.post('/jobs/:id/apply', async (req: Request, res: Response) => {
apiRouter.post('/jobs/:id/reject', async (req: Request, res: Response) => {
try {
const job = await jobsRepo.updateJob(req.params.id, { status: 'rejected' });
if (!job) {
return res.status(404).json({ success: false, error: 'Job not found' });
}
res.json({ success: true, data: job });
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
@ -351,7 +351,7 @@ apiRouter.get('/pipeline/status', async (req: Request, res: Response) => {
try {
const { isRunning } = getPipelineStatus();
const lastRun = await pipelineRepo.getLatestPipelineRun();
const response: ApiResponse<PipelineStatusResponse> = {
success: true,
data: {
@ -360,7 +360,7 @@ apiRouter.get('/pipeline/status', async (req: Request, res: Response) => {
nextScheduledRun: null, // Would come from n8n
},
};
res.json(response);
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
@ -377,20 +377,20 @@ apiRouter.get('/pipeline/progress', (req: Request, res: Response) => {
res.setHeader('Cache-Control', 'no-cache');
res.setHeader('Connection', 'keep-alive');
res.setHeader('X-Accel-Buffering', 'no'); // Disable Nginx buffering
// Send initial progress
const sendProgress = (data: unknown) => {
res.write(`data: ${JSON.stringify(data)}\n\n`);
};
// Subscribe to progress updates
const unsubscribe = subscribeToProgress(sendProgress);
// Send heartbeat every 30 seconds to keep connection alive
const heartbeat = setInterval(() => {
res.write(': heartbeat\n\n');
}, 30000);
// Cleanup on close
req.on('close', () => {
clearInterval(heartbeat);
@ -417,19 +417,19 @@ apiRouter.get('/pipeline/runs', async (req: Request, res: Response) => {
const runPipelineSchema = z.object({
topN: z.number().min(1).max(50).optional(),
minSuitabilityScore: z.number().min(0).max(100).optional(),
sources: z.array(z.enum(['gradcracker', 'indeed', 'linkedin'])).min(1).optional(),
sources: z.array(z.enum(['gradcracker', 'indeed', 'linkedin', 'ukvisajobs'])).min(1).optional(),
});
apiRouter.post('/pipeline/run', async (req: Request, res: Response) => {
try {
const config = runPipelineSchema.parse(req.body);
// Start pipeline in background
runPipeline(config).catch(console.error);
res.json({
success: true,
data: { message: 'Pipeline started' }
res.json({
success: true,
data: { message: 'Pipeline started' }
});
} catch (error) {
if (error instanceof z.ZodError) {
@ -451,21 +451,21 @@ apiRouter.post('/webhook/trigger', async (req: Request, res: Response) => {
// Optional: Add authentication check
const authHeader = req.headers.authorization;
const expectedToken = process.env.WEBHOOK_SECRET;
if (expectedToken && authHeader !== `Bearer ${expectedToken}`) {
return res.status(401).json({ success: false, error: 'Unauthorized' });
}
try {
// Start pipeline in background
runPipeline().catch(console.error);
res.json({
success: true,
data: {
res.json({
success: true,
data: {
message: 'Pipeline triggered',
triggeredAt: new Date().toISOString(),
}
}
});
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
@ -483,14 +483,14 @@ apiRouter.post('/webhook/trigger', async (req: Request, res: Response) => {
apiRouter.delete('/database', async (req: Request, res: Response) => {
try {
const result = clearDatabase();
res.json({
success: true,
data: {
res.json({
success: true,
data: {
message: 'Database cleared',
jobsDeleted: result.jobsDeleted,
runsDeleted: result.runsDeleted,
}
}
});
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';

View File

@ -7,9 +7,9 @@ import { sql } from 'drizzle-orm';
export const jobs = sqliteTable('jobs', {
id: text('id').primaryKey(),
// From crawler
source: text('source', { enum: ['gradcracker', 'indeed', 'linkedin'] }).notNull().default('gradcracker'),
source: text('source', { enum: ['gradcracker', 'indeed', 'linkedin', 'ukvisajobs'] }).notNull().default('gradcracker'),
sourceJobId: text('source_job_id'),
jobUrlDirect: text('job_url_direct'),
datePosted: text('date_posted'),
@ -51,17 +51,17 @@ export const jobs = sqliteTable('jobs', {
companyReviewsCount: integer('company_reviews_count'),
vacancyCount: integer('vacancy_count'),
workFromHomeType: text('work_from_home_type'),
// Orchestrator enrichments
status: text('status', {
enum: ['discovered', 'processing', 'ready', 'applied', 'rejected', 'expired']
status: text('status', {
enum: ['discovered', 'processing', 'ready', 'applied', 'rejected', 'expired']
}).notNull().default('discovered'),
suitabilityScore: real('suitability_score'),
suitabilityReason: text('suitability_reason'),
tailoredSummary: text('tailored_summary'),
pdfPath: text('pdf_path'),
notionPageId: text('notion_page_id'),
// Timestamps
discoveredAt: text('discovered_at').notNull().default(sql`(datetime('now'))`),
processedAt: text('processed_at'),

View File

@ -12,6 +12,7 @@ import { join, dirname } from 'path';
import { fileURLToPath } from 'url';
import { runCrawler } from '../services/crawler.js';
import { runJobSpy } from '../services/jobspy.js';
import { runUkVisaJobs } from '../services/ukvisajobs.js';
import { scoreJobSuitability } from '../services/scorer.js';
import { generateSummary } from '../services/summary.js';
import { generatePdf } from '../services/pdf.js';
@ -27,7 +28,7 @@ const DEFAULT_PROFILE_PATH = join(__dirname, '../../../../resume-generator/base.
const DEFAULT_CONFIG: PipelineConfig = {
topN: 10,
minSuitabilityScore: 50,
sources: ['gradcracker', 'indeed', 'linkedin'],
sources: ['gradcracker', 'indeed', 'linkedin', 'ukvisajobs'],
profilePath: DEFAULT_PROFILE_PATH,
outputDir: join(__dirname, '../../../data/pdfs'),
};
@ -88,22 +89,22 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
error: 'Pipeline is already running',
};
}
isPipelineRunning = true;
resetProgress();
const mergedConfig = { ...DEFAULT_CONFIG, ...config };
// Create pipeline run record
const pipelineRun = await pipelineRepo.createPipelineRun();
console.log('🚀 Starting job pipeline...');
console.log(` Config: topN=${mergedConfig.topN}, minScore=${mergedConfig.minSuitabilityScore} (manual processing)`);
try {
// Step 1: Load profile
console.log('\n📋 Loading profile...');
const profile = await loadProfile(mergedConfig.profilePath);
// Step 2: Run crawler
console.log('\n🕷 Running crawler...');
progressHelpers.startCrawling();
@ -154,6 +155,21 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
}
}
// Run UKVisaJobs extractor if selected
if (mergedConfig.sources.includes('ukvisajobs')) {
updateProgress({
step: 'crawling',
detail: 'UKVisaJobs: scraping visa-sponsoring jobs...',
});
const ukVisaResult = await runUkVisaJobs({ maxJobs: 50 });
if (!ukVisaResult.success) {
sourceErrors.push(`ukvisajobs: ${ukVisaResult.error ?? 'unknown error'}`);
} else {
discoveredJobs.push(...ukVisaResult.jobs);
}
}
if (discoveredJobs.length === 0 && sourceErrors.length > 0) {
throw new Error(`All sources failed: ${sourceErrors.join('; ')}`);
}
@ -163,18 +179,18 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
}
progressHelpers.crawlingComplete(discoveredJobs.length);
// Step 3: Import discovered jobs
console.log('\n💾 Importing jobs to database...');
const { created, skipped } = await jobsRepo.bulkCreateJobs(discoveredJobs);
console.log(` Created: ${created}, Skipped (duplicates): ${skipped}`);
progressHelpers.importComplete(created, skipped);
await pipelineRepo.updatePipelineRun(pipelineRun.id, {
jobsDiscovered: created,
});
// Step 4: Score all discovered jobs missing a score
console.log('\n🎯 Scoring jobs for suitability...');
const unprocessedJobs = await jobsRepo.getUnscoredDiscoveredJobs();
@ -187,7 +203,7 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
totalToProcess: 0,
currentJob: undefined,
});
// Score jobs with progress updates
const scoredJobs: Array<Job & { suitabilityScore: number; suitabilityReason: string }> = [];
for (let i = 0; i < unprocessedJobs.length; i++) {
@ -217,21 +233,21 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
suitabilityReason: reason,
});
}
progressHelpers.scoringComplete(scoredJobs.length);
console.log(`\n📊 Scored ${scoredJobs.length} jobs. Ready for manual processing.`);
// Update pipeline run as completed
await pipelineRepo.updatePipelineRun(pipelineRun.id, {
status: 'completed',
completedAt: new Date().toISOString(),
jobsProcessed: 0,
});
console.log('\n🎉 Pipeline completed!');
console.log(` Jobs discovered: ${created}`);
console.log(' Jobs processed: 0 (manual)');
progressHelpers.complete(created, 0);
await notifyPipelineWebhook('pipeline.completed', {
@ -241,22 +257,22 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
jobsProcessed: 0,
})
isPipelineRunning = false;
return {
success: true,
jobsDiscovered: created,
jobsProcessed: 0,
};
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
await pipelineRepo.updatePipelineRun(pipelineRun.id, {
status: 'failed',
completedAt: new Date().toISOString(),
errorMessage: message,
});
progressHelpers.failed(message);
await notifyPipelineWebhook('pipeline.failed', {
@ -264,9 +280,9 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
error: message,
})
isPipelineRunning = false;
console.error('\n❌ Pipeline failed:', message);
return {
success: false,
jobsDiscovered: 0,
@ -287,7 +303,7 @@ export async function processJob(
error?: string;
}> {
console.log(`📝 Processing job ${jobId}...`);
try {
const job = await jobsRepo.getJobById(jobId);
if (!job) {
@ -297,9 +313,9 @@ export async function processJob(
if (job.status !== 'discovered' && job.status !== 'ready') {
return { success: false, error: `Job cannot be processed from status: ${job.status}` };
}
const profile = await loadProfile(DEFAULT_PROFILE_PATH);
// Mark as processing
await jobsRepo.updateJob(job.id, { status: 'processing' });
@ -314,7 +330,7 @@ export async function processJob(
job.suitabilityScore = suitability.score;
job.suitabilityReason = suitability.reason;
}
// Generate summary (AI)
// If forcing, always recompute; otherwise compute if missing.
if (options?.force || !job.tailoredSummary) {
@ -323,7 +339,7 @@ export async function processJob(
job.jobDescription || '',
profile
);
if (summaryResult.success) {
await jobsRepo.updateJob(job.id, {
tailoredSummary: summaryResult.summary,
@ -331,7 +347,7 @@ export async function processJob(
job.tailoredSummary = summaryResult.summary ?? null;
}
}
// Generate PDF
console.log(' Generating PDF...');
const pdfResult = await generatePdf(
@ -340,16 +356,16 @@ export async function processJob(
job.jobDescription || '',
DEFAULT_PROFILE_PATH
);
// Mark as ready
await jobsRepo.updateJob(job.id, {
status: 'ready',
pdfPath: pdfResult.pdfPath ?? undefined,
});
console.log(' ✅ Done!');
return { success: true };
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
return { success: false, error: message };

View File

@ -0,0 +1,125 @@
/**
* Service for running the UK Visa Jobs extractor (extractors/ukvisajobs).
*
* Spawns the extractor as a child process and reads its output dataset.
*/
import { spawn } from 'child_process';
import { readdir, readFile, rm, mkdir } from 'fs/promises';
import { join, dirname } from 'path';
import { fileURLToPath } from 'url';
import type { CreateJobInput } from '../../shared/types.js';
const __dirname = dirname(fileURLToPath(import.meta.url));
const UKVISAJOBS_DIR = join(__dirname, '../../../../extractors/ukvisajobs');
const STORAGE_DIR = join(UKVISAJOBS_DIR, 'storage/datasets/default');
export interface RunUkVisaJobsOptions {
/** Maximum number of jobs to fetch. Defaults to 50, max 200. */
maxJobs?: number;
/** Search keyword filter (optional) */
searchKeyword?: string;
}
export interface UkVisaJobsResult {
success: boolean;
jobs: CreateJobInput[];
error?: string;
}
/**
* Clear previous extraction results.
*/
async function clearStorageDataset(): Promise<void> {
try {
await rm(STORAGE_DIR, { recursive: true, force: true });
} catch {
// Ignore if directory doesn't exist
}
}
/**
* Run the UK Visa Jobs extractor.
*/
export async function runUkVisaJobs(options: RunUkVisaJobsOptions = {}): Promise<UkVisaJobsResult> {
console.log('🇬🇧 Running UK Visa Jobs extractor...');
try {
// Clear previous results
await clearStorageDataset();
await mkdir(STORAGE_DIR, { recursive: true });
// Run the extractor using npx tsx directly (more reliable in Docker/different environments)
await new Promise<void>((resolve, reject) => {
const child = spawn('npx', ['tsx', 'src/main.ts'], {
cwd: UKVISAJOBS_DIR,
stdio: 'inherit',
env: {
...process.env,
UKVISAJOBS_MAX_JOBS: String(options.maxJobs ?? 50),
UKVISAJOBS_SEARCH_KEYWORD: options.searchKeyword ?? '',
},
});
child.on('close', (code) => {
if (code === 0) resolve();
else reject(new Error(`UK Visa Jobs extractor exited with code ${code}`));
});
child.on('error', reject);
});
// Read the output dataset
const jobs = await readDataset();
console.log(`✅ UK Visa Jobs: imported ${jobs.length} jobs`);
return { success: true, jobs };
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
console.error(`❌ UK Visa Jobs failed: ${message}`);
return { success: false, jobs: [], error: message };
}
}
/**
* Read jobs from the extractor's output dataset.
*/
async function readDataset(): Promise<CreateJobInput[]> {
const jobs: CreateJobInput[] = [];
try {
const files = await readdir(STORAGE_DIR);
const jsonFiles = files.filter((f) => f.endsWith('.json') && f !== 'jobs.json');
for (const file of jsonFiles.sort()) {
try {
const content = await readFile(join(STORAGE_DIR, file), 'utf-8');
const job = JSON.parse(content);
// Map to CreateJobInput format
jobs.push({
source: 'ukvisajobs',
sourceJobId: job.sourceJobId,
title: job.title || 'Unknown Title',
employer: job.employer || 'Unknown Employer',
employerUrl: job.employerUrl,
jobUrl: job.jobUrl,
applicationLink: job.applicationLink || job.jobUrl,
location: job.location,
deadline: job.deadline,
salary: job.salary,
jobDescription: job.jobDescription,
datePosted: job.datePosted,
degreeRequired: job.degreeRequired,
jobType: job.jobType,
jobLevel: job.jobLevel,
});
} catch {
// Skip invalid files
}
}
} catch {
// Dataset directory doesn't exist yet
}
return jobs;
}

View File

@ -2,7 +2,7 @@
* Shared types for the job-ops orchestrator.
*/
export type JobStatus =
export type JobStatus =
| 'discovered' // Crawled but not processed
| 'processing' // Currently generating resume
| 'ready' // PDF generated, waiting for user to apply
@ -13,11 +13,12 @@ export type JobStatus =
export type JobSource =
| 'gradcracker'
| 'indeed'
| 'linkedin';
| 'linkedin'
| 'ukvisajobs';
export interface Job {
id: string;
// Source / provenance
source: JobSource;
sourceJobId: string | null; // External ID (if provided)
@ -37,7 +38,7 @@ export interface Job {
degreeRequired: string | null;
starting: string | null;
jobDescription: string | null;
// Orchestrator enrichments
status: JobStatus;
suitabilityScore: number | null; // 0-100 AI-generated score
@ -71,7 +72,7 @@ export interface Job {
companyReviewsCount: number | null;
vacancyCount: number | null;
workFromHomeType: string | null;
// Timestamps
discoveredAt: string;
processedAt: string | null;