initial implementation
This commit is contained in:
parent
8a236c0be9
commit
0f36d9b8a6
11
.env.example
11
.env.example
@ -37,3 +37,14 @@ JOBSPY_RESULTS_WANTED=200
|
||||
JOBSPY_HOURS_OLD=72
|
||||
JOBSPY_COUNTRY_INDEED=UK
|
||||
JOBSPY_LINKEDIN_FETCH_DESCRIPTION=1
|
||||
|
||||
# =============================================================================
|
||||
# UKVisaJobs (UK visa sponsorship jobs) - optional
|
||||
# =============================================================================
|
||||
# Get these tokens from browser dev tools after logging into my.ukvisajobs.com
|
||||
# See extractors/ukvisajobs/README.md for detailed instructions.
|
||||
UKVISAJOBS_TOKEN=
|
||||
UKVISAJOBS_AUTH_TOKEN=
|
||||
UKVISAJOBS_CSRF_TOKEN=
|
||||
UKVISAJOBS_CI_SESSION=
|
||||
UKVISAJOBS_MAX_JOBS=50
|
||||
|
||||
@ -28,6 +28,7 @@ RUN pip3 install --no-cache-dir --break-system-packages playwright python-jobspy
|
||||
# Copy package files first for better caching
|
||||
COPY orchestrator/package*.json ./orchestrator/
|
||||
COPY extractors/gradcracker/package*.json ./extractors/gradcracker/
|
||||
COPY extractors/ukvisajobs/package*.json ./extractors/ukvisajobs/
|
||||
|
||||
# Install Node.js dependencies
|
||||
WORKDIR /app/orchestrator
|
||||
@ -39,11 +40,15 @@ RUN npm install --production=false
|
||||
# Install Camoufox browser (downloads its own Firefox fork)
|
||||
RUN npx camoufox fetch
|
||||
|
||||
WORKDIR /app/extractors/ukvisajobs
|
||||
RUN npm install --production=false
|
||||
|
||||
# Copy source code
|
||||
WORKDIR /app
|
||||
COPY orchestrator ./orchestrator
|
||||
COPY extractors/gradcracker ./extractors/gradcracker
|
||||
COPY extractors/jobspy ./extractors/jobspy
|
||||
COPY extractors/ukvisajobs ./extractors/ukvisajobs
|
||||
COPY resume-generator ./resume-generator
|
||||
|
||||
# Build the orchestrator (client + server)
|
||||
|
||||
@ -75,6 +75,7 @@ job-ops/
|
||||
src/shared/ # shared types (Job, PipelineRun, etc.)
|
||||
extractors/gradcracker/ # Crawlee crawler (Gradcracker)
|
||||
extractors/jobspy/ # JobSpy wrapper (Indeed/LinkedIn/etc)
|
||||
extractors/ukvisajobs/ # UK Visa Jobs API extractor
|
||||
resume-generator/ # Python Playwright automation for rxresu.me
|
||||
base.json # your exported base resume (template)
|
||||
data/ # persisted runtime artifacts (Docker default)
|
||||
@ -87,7 +88,7 @@ job-ops/
|
||||
## Data model (SQLite)
|
||||
|
||||
- `jobs`
|
||||
- from crawl: `title`, `employer`, `jobUrl`, `applicationLink`, `deadline`, `salary`, `location`, `jobDescription`, etc.
|
||||
- from crawl: `title`, `employer`, `jobUrl`, `applicationLink`, `deadline`, `salary`, `location`, `jobDescription`, `source` (gradcracker/indeed/linkedin/ukvisajobs), etc.
|
||||
- enrichments: `status` (`discovered` -> `processing` -> `ready` -> `applied`/`rejected`), `suitabilityScore`, `suitabilityReason`, `tailoredSummary`, `pdfPath`, `notionPageId`
|
||||
- `pipeline_runs`: audit log of runs (`running`/`completed`/`failed`, counts, error)
|
||||
|
||||
|
||||
@ -50,6 +50,14 @@ services:
|
||||
# Optional: Webhook secret for n8n
|
||||
- WEBHOOK_SECRET=${WEBHOOK_SECRET:-}
|
||||
|
||||
# UKVisaJobs (UK visa sponsorship jobs) - optional
|
||||
- UKVISAJOBS_TOKEN=${UKVISAJOBS_TOKEN:-}
|
||||
- UKVISAJOBS_AUTH_TOKEN=${UKVISAJOBS_AUTH_TOKEN:-}
|
||||
- UKVISAJOBS_CSRF_TOKEN=${UKVISAJOBS_CSRF_TOKEN:-}
|
||||
- UKVISAJOBS_CI_SESSION=${UKVISAJOBS_CI_SESSION:-}
|
||||
- UKVISAJOBS_MAX_JOBS=${UKVISAJOBS_MAX_JOBS:-50}
|
||||
- UKVISAJOBS_SEARCH_KEYWORD=${UKVISAJOBS_SEARCH_KEYWORD:-}
|
||||
|
||||
# Python path (uses system python in container)
|
||||
- PYTHON_PATH=/usr/bin/python3
|
||||
restart: unless-stopped
|
||||
|
||||
11
extractors/ukvisajobs/.gitignore
vendored
Normal file
11
extractors/ukvisajobs/.gitignore
vendored
Normal file
@ -0,0 +1,11 @@
|
||||
# Dependencies
|
||||
node_modules/
|
||||
|
||||
# Build output
|
||||
dist/
|
||||
|
||||
# Crawlee storage
|
||||
storage/
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
40
extractors/ukvisajobs/README.md
Normal file
40
extractors/ukvisajobs/README.md
Normal file
@ -0,0 +1,40 @@
|
||||
# UK Visa Jobs Extractor
|
||||
|
||||
Fetches job listings from [my.ukvisajobs.com](https://my.ukvisajobs.com) that may sponsor work visas.
|
||||
|
||||
## Setup
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
Set the following environment variables (you can get these from your browser's dev tools after logging in):
|
||||
|
||||
| Variable | Description |
|
||||
|----------|-------------|
|
||||
| `UKVISAJOBS_TOKEN` | JWT token from the request body (required) |
|
||||
| `UKVISAJOBS_AUTH_TOKEN` | Auth cookie token (defaults to UKVISAJOBS_TOKEN) |
|
||||
| `UKVISAJOBS_CSRF_TOKEN` | CSRF token from cookies |
|
||||
| `UKVISAJOBS_CI_SESSION` | CI session ID from cookies |
|
||||
| `UKVISAJOBS_MAX_JOBS` | Maximum jobs to fetch (default: 50, max: 200) |
|
||||
| `UKVISAJOBS_SEARCH_KEYWORD` | Optional search filter |
|
||||
|
||||
## How to get tokens
|
||||
|
||||
1. Log into `my.ukvisajobs.com` in your browser
|
||||
2. Open Developer Tools → Network tab
|
||||
3. Navigate to the jobs page
|
||||
4. Find the `fetch-jobs-data` POST request
|
||||
5. Copy values:
|
||||
- From **Request Body**: copy the `token` field → `UKVISAJOBS_TOKEN`
|
||||
- From **Cookies**: copy `authToken`, `csrf_token`, `ci_session`
|
||||
|
||||
## Running
|
||||
|
||||
```bash
|
||||
npm start
|
||||
```
|
||||
|
||||
Output is written to `storage/datasets/default/` as JSON files.
|
||||
599
extractors/ukvisajobs/package-lock.json
generated
Normal file
599
extractors/ukvisajobs/package-lock.json
generated
Normal file
@ -0,0 +1,599 @@
|
||||
{
|
||||
"name": "ukvisajobs-extractor",
|
||||
"version": "0.0.1",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "ukvisajobs-extractor",
|
||||
"version": "0.0.1",
|
||||
"license": "ISC",
|
||||
"devDependencies": {
|
||||
"@apify/tsconfig": "^0.1.0",
|
||||
"@types/node": "^24.0.0",
|
||||
"tsx": "^4.4.0",
|
||||
"typescript": "~5.9.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@apify/tsconfig": {
|
||||
"version": "0.1.1",
|
||||
"resolved": "https://registry.npmjs.org/@apify/tsconfig/-/tsconfig-0.1.1.tgz",
|
||||
"integrity": "sha512-cS7mwN2UW1UXcluGXRDHH0Vr2VsSLkw2DwLTwoSBkcJSe8fvCr3MPryTSq0uod4MashpMURxJ7CsLKxs82VmOQ==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/@esbuild/aix-ppc64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.2.tgz",
|
||||
"integrity": "sha512-GZMB+a0mOMZs4MpDbj8RJp4cw+w1WV5NYD6xzgvzUJ5Ek2jerwfO2eADyI6ExDSUED+1X8aMbegahsJi+8mgpw==",
|
||||
"cpu": [
|
||||
"ppc64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"aix"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/android-arm": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.27.2.tgz",
|
||||
"integrity": "sha512-DVNI8jlPa7Ujbr1yjU2PfUSRtAUZPG9I1RwW4F4xFB1Imiu2on0ADiI/c3td+KmDtVKNbi+nffGDQMfcIMkwIA==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/android-arm64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.2.tgz",
|
||||
"integrity": "sha512-pvz8ZZ7ot/RBphf8fv60ljmaoydPU12VuXHImtAs0XhLLw+EXBi2BLe3OYSBslR4rryHvweW5gmkKFwTiFy6KA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/android-x64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.27.2.tgz",
|
||||
"integrity": "sha512-z8Ank4Byh4TJJOh4wpz8g2vDy75zFL0TlZlkUkEwYXuPSgX8yzep596n6mT7905kA9uHZsf/o2OJZubl2l3M7A==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/darwin-arm64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.27.2.tgz",
|
||||
"integrity": "sha512-davCD2Zc80nzDVRwXTcQP/28fiJbcOwvdolL0sOiOsbwBa72kegmVU0Wrh1MYrbuCL98Omp5dVhQFWRKR2ZAlg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/darwin-x64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.27.2.tgz",
|
||||
"integrity": "sha512-ZxtijOmlQCBWGwbVmwOF/UCzuGIbUkqB1faQRf5akQmxRJ1ujusWsb3CVfk/9iZKr2L5SMU5wPBi1UWbvL+VQA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/freebsd-arm64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.27.2.tgz",
|
||||
"integrity": "sha512-lS/9CN+rgqQ9czogxlMcBMGd+l8Q3Nj1MFQwBZJyoEKI50XGxwuzznYdwcav6lpOGv5BqaZXqvBSiB/kJ5op+g==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"freebsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/freebsd-x64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.27.2.tgz",
|
||||
"integrity": "sha512-tAfqtNYb4YgPnJlEFu4c212HYjQWSO/w/h/lQaBK7RbwGIkBOuNKQI9tqWzx7Wtp7bTPaGC6MJvWI608P3wXYA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"freebsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-arm": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.27.2.tgz",
|
||||
"integrity": "sha512-vWfq4GaIMP9AIe4yj1ZUW18RDhx6EPQKjwe7n8BbIecFtCQG4CfHGaHuh7fdfq+y3LIA2vGS/o9ZBGVxIDi9hw==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-arm64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.2.tgz",
|
||||
"integrity": "sha512-hYxN8pr66NsCCiRFkHUAsxylNOcAQaxSSkHMMjcpx0si13t1LHFphxJZUiGwojB1a/Hd5OiPIqDdXONia6bhTw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-ia32": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.27.2.tgz",
|
||||
"integrity": "sha512-MJt5BRRSScPDwG2hLelYhAAKh9imjHK5+NE/tvnRLbIqUWa+0E9N4WNMjmp/kXXPHZGqPLxggwVhz7QP8CTR8w==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-loong64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.27.2.tgz",
|
||||
"integrity": "sha512-lugyF1atnAT463aO6KPshVCJK5NgRnU4yb3FUumyVz+cGvZbontBgzeGFO1nF+dPueHD367a2ZXe1NtUkAjOtg==",
|
||||
"cpu": [
|
||||
"loong64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-mips64el": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.27.2.tgz",
|
||||
"integrity": "sha512-nlP2I6ArEBewvJ2gjrrkESEZkB5mIoaTswuqNFRv/WYd+ATtUpe9Y09RnJvgvdag7he0OWgEZWhviS1OTOKixw==",
|
||||
"cpu": [
|
||||
"mips64el"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-ppc64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.27.2.tgz",
|
||||
"integrity": "sha512-C92gnpey7tUQONqg1n6dKVbx3vphKtTHJaNG2Ok9lGwbZil6DrfyecMsp9CrmXGQJmZ7iiVXvvZH6Ml5hL6XdQ==",
|
||||
"cpu": [
|
||||
"ppc64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-riscv64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.27.2.tgz",
|
||||
"integrity": "sha512-B5BOmojNtUyN8AXlK0QJyvjEZkWwy/FKvakkTDCziX95AowLZKR6aCDhG7LeF7uMCXEJqwa8Bejz5LTPYm8AvA==",
|
||||
"cpu": [
|
||||
"riscv64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-s390x": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.27.2.tgz",
|
||||
"integrity": "sha512-p4bm9+wsPwup5Z8f4EpfN63qNagQ47Ua2znaqGH6bqLlmJ4bx97Y9JdqxgGZ6Y8xVTixUnEkoKSHcpRlDnNr5w==",
|
||||
"cpu": [
|
||||
"s390x"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-x64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.27.2.tgz",
|
||||
"integrity": "sha512-uwp2Tip5aPmH+NRUwTcfLb+W32WXjpFejTIOWZFw/v7/KnpCDKG66u4DLcurQpiYTiYwQ9B7KOeMJvLCu/OvbA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/netbsd-arm64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.27.2.tgz",
|
||||
"integrity": "sha512-Kj6DiBlwXrPsCRDeRvGAUb/LNrBASrfqAIok+xB0LxK8CHqxZ037viF13ugfsIpePH93mX7xfJp97cyDuTZ3cw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"netbsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/netbsd-x64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.27.2.tgz",
|
||||
"integrity": "sha512-HwGDZ0VLVBY3Y+Nw0JexZy9o/nUAWq9MlV7cahpaXKW6TOzfVno3y3/M8Ga8u8Yr7GldLOov27xiCnqRZf0tCA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"netbsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/openbsd-arm64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.27.2.tgz",
|
||||
"integrity": "sha512-DNIHH2BPQ5551A7oSHD0CKbwIA/Ox7+78/AWkbS5QoRzaqlev2uFayfSxq68EkonB+IKjiuxBFoV8ESJy8bOHA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"openbsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/openbsd-x64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.27.2.tgz",
|
||||
"integrity": "sha512-/it7w9Nb7+0KFIzjalNJVR5bOzA9Vay+yIPLVHfIQYG/j+j9VTH84aNB8ExGKPU4AzfaEvN9/V4HV+F+vo8OEg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"openbsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/openharmony-arm64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.27.2.tgz",
|
||||
"integrity": "sha512-LRBbCmiU51IXfeXk59csuX/aSaToeG7w48nMwA6049Y4J4+VbWALAuXcs+qcD04rHDuSCSRKdmY63sruDS5qag==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"openharmony"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/sunos-x64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.27.2.tgz",
|
||||
"integrity": "sha512-kMtx1yqJHTmqaqHPAzKCAkDaKsffmXkPHThSfRwZGyuqyIeBvf08KSsYXl+abf5HDAPMJIPnbBfXvP2ZC2TfHg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"sunos"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/win32-arm64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.27.2.tgz",
|
||||
"integrity": "sha512-Yaf78O/B3Kkh+nKABUF++bvJv5Ijoy9AN1ww904rOXZFLWVc5OLOfL56W+C8F9xn5JQZa3UX6m+IktJnIb1Jjg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/win32-ia32": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.27.2.tgz",
|
||||
"integrity": "sha512-Iuws0kxo4yusk7sw70Xa2E2imZU5HoixzxfGCdxwBdhiDgt9vX9VUCBhqcwY7/uh//78A1hMkkROMJq9l27oLQ==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/win32-x64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.27.2.tgz",
|
||||
"integrity": "sha512-sRdU18mcKf7F+YgheI/zGf5alZatMUTKj/jNS6l744f9u3WFu4v7twcUI9vu4mknF4Y9aDlblIie0IM+5xxaqQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "24.10.4",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.4.tgz",
|
||||
"integrity": "sha512-vnDVpYPMzs4wunl27jHrfmwojOGKya0xyM3sH+UE5iv5uPS6vX7UIoh6m+vQc5LGBq52HBKPIn/zcSZVzeDEZg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"undici-types": "~7.16.0"
|
||||
}
|
||||
},
|
||||
"node_modules/esbuild": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.2.tgz",
|
||||
"integrity": "sha512-HyNQImnsOC7X9PMNaCIeAm4ISCQXs5a5YasTXVliKv4uuBo1dKrG0A+uQS8M5eXjVMnLg3WgXaKvprHlFJQffw==",
|
||||
"dev": true,
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"bin": {
|
||||
"esbuild": "bin/esbuild"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@esbuild/aix-ppc64": "0.27.2",
|
||||
"@esbuild/android-arm": "0.27.2",
|
||||
"@esbuild/android-arm64": "0.27.2",
|
||||
"@esbuild/android-x64": "0.27.2",
|
||||
"@esbuild/darwin-arm64": "0.27.2",
|
||||
"@esbuild/darwin-x64": "0.27.2",
|
||||
"@esbuild/freebsd-arm64": "0.27.2",
|
||||
"@esbuild/freebsd-x64": "0.27.2",
|
||||
"@esbuild/linux-arm": "0.27.2",
|
||||
"@esbuild/linux-arm64": "0.27.2",
|
||||
"@esbuild/linux-ia32": "0.27.2",
|
||||
"@esbuild/linux-loong64": "0.27.2",
|
||||
"@esbuild/linux-mips64el": "0.27.2",
|
||||
"@esbuild/linux-ppc64": "0.27.2",
|
||||
"@esbuild/linux-riscv64": "0.27.2",
|
||||
"@esbuild/linux-s390x": "0.27.2",
|
||||
"@esbuild/linux-x64": "0.27.2",
|
||||
"@esbuild/netbsd-arm64": "0.27.2",
|
||||
"@esbuild/netbsd-x64": "0.27.2",
|
||||
"@esbuild/openbsd-arm64": "0.27.2",
|
||||
"@esbuild/openbsd-x64": "0.27.2",
|
||||
"@esbuild/openharmony-arm64": "0.27.2",
|
||||
"@esbuild/sunos-x64": "0.27.2",
|
||||
"@esbuild/win32-arm64": "0.27.2",
|
||||
"@esbuild/win32-ia32": "0.27.2",
|
||||
"@esbuild/win32-x64": "0.27.2"
|
||||
}
|
||||
},
|
||||
"node_modules/fsevents": {
|
||||
"version": "2.3.3",
|
||||
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
|
||||
"integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
|
||||
"dev": true,
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/get-tsconfig": {
|
||||
"version": "4.13.0",
|
||||
"resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.13.0.tgz",
|
||||
"integrity": "sha512-1VKTZJCwBrvbd+Wn3AOgQP/2Av+TfTCOlE4AcRJE72W1ksZXbAx8PPBR9RzgTeSPzlPMHrbANMH3LbltH73wxQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"resolve-pkg-maps": "^1.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/resolve-pkg-maps": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
|
||||
"integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/tsx": {
|
||||
"version": "4.21.0",
|
||||
"resolved": "https://registry.npmjs.org/tsx/-/tsx-4.21.0.tgz",
|
||||
"integrity": "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"esbuild": "~0.27.0",
|
||||
"get-tsconfig": "^4.7.5"
|
||||
},
|
||||
"bin": {
|
||||
"tsx": "dist/cli.mjs"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"fsevents": "~2.3.3"
|
||||
}
|
||||
},
|
||||
"node_modules/typescript": {
|
||||
"version": "5.9.3",
|
||||
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz",
|
||||
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"bin": {
|
||||
"tsc": "bin/tsc",
|
||||
"tsserver": "bin/tsserver"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14.17"
|
||||
}
|
||||
},
|
||||
"node_modules/undici-types": {
|
||||
"version": "7.16.0",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
|
||||
"integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
}
|
||||
}
|
||||
}
|
||||
22
extractors/ukvisajobs/package.json
Normal file
22
extractors/ukvisajobs/package.json
Normal file
@ -0,0 +1,22 @@
|
||||
{
|
||||
"name": "ukvisajobs-extractor",
|
||||
"version": "0.0.1",
|
||||
"type": "module",
|
||||
"description": "UK Visa Jobs extractor - fetches job listings that may sponsor work visas",
|
||||
"main": "dist/main.js",
|
||||
"dependencies": {},
|
||||
"devDependencies": {
|
||||
"@apify/tsconfig": "^0.1.0",
|
||||
"@types/node": "^24.0.0",
|
||||
"tsx": "^4.4.0",
|
||||
"typescript": "~5.9.0"
|
||||
},
|
||||
"scripts": {
|
||||
"start": "npm run start:dev",
|
||||
"start:prod": "node dist/main.js",
|
||||
"start:dev": "tsx src/main.ts",
|
||||
"build": "tsc"
|
||||
},
|
||||
"author": "",
|
||||
"license": "ISC"
|
||||
}
|
||||
303
extractors/ukvisajobs/src/main.ts
Normal file
303
extractors/ukvisajobs/src/main.ts
Normal file
@ -0,0 +1,303 @@
|
||||
/**
|
||||
* UK Visa Jobs Extractor
|
||||
*
|
||||
* Fetches job listings from my.ukvisajobs.com that may sponsor work visas.
|
||||
* Outputs JSON to stdout for the orchestrator to consume.
|
||||
*
|
||||
* Environment variables:
|
||||
* UKVISAJOBS_TOKEN - JWT token (required)
|
||||
* UKVISAJOBS_AUTH_TOKEN - Auth cookie token (defaults to UKVISAJOBS_TOKEN)
|
||||
* UKVISAJOBS_CSRF_TOKEN - CSRF token cookie
|
||||
* UKVISAJOBS_CI_SESSION - CI session cookie
|
||||
* UKVISAJOBS_MAX_JOBS - Maximum jobs to fetch (default: 50, max: 200)
|
||||
* UKVISAJOBS_SEARCH_KEYWORD - Optional search filter
|
||||
*/
|
||||
|
||||
import { mkdir, writeFile } from 'fs/promises';
|
||||
import { join, dirname } from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
const API_URL = 'https://my.ukvisajobs.com/ukvisa-api/api/fetch-jobs-data';
|
||||
const JOBS_PER_PAGE = 15;
|
||||
const DEFAULT_MAX_JOBS = 50;
|
||||
const MAX_ALLOWED_JOBS = 200;
|
||||
|
||||
interface UkVisaJobsApiJob {
|
||||
id: string;
|
||||
title: string;
|
||||
company_name: string;
|
||||
company_link?: string;
|
||||
job_link: string;
|
||||
city: string;
|
||||
created_date: string;
|
||||
job_expire: string;
|
||||
description?: string;
|
||||
min_salary?: string;
|
||||
max_salary?: string;
|
||||
salary_interval?: string;
|
||||
salary_method?: string;
|
||||
degree_requirement?: string;
|
||||
job_type?: string;
|
||||
job_level?: string;
|
||||
job_industry?: string;
|
||||
visa_acceptance?: string;
|
||||
applicants_outside_uk?: string;
|
||||
likely_to_sponsor?: string;
|
||||
definitely_sponsored?: string;
|
||||
new_entrant?: string;
|
||||
student_graduate?: string;
|
||||
image?: string;
|
||||
computed_cos_total?: string;
|
||||
}
|
||||
|
||||
interface UkVisaJobsApiResponse {
|
||||
status: number;
|
||||
totalJobs: number;
|
||||
query?: string;
|
||||
jobs: UkVisaJobsApiJob[];
|
||||
}
|
||||
|
||||
interface ExtractedJob {
|
||||
source: 'ukvisajobs';
|
||||
sourceJobId: string;
|
||||
title: string;
|
||||
employer: string;
|
||||
employerUrl?: string;
|
||||
jobUrl: string;
|
||||
applicationLink: string;
|
||||
location?: string;
|
||||
deadline?: string;
|
||||
salary?: string;
|
||||
jobDescription?: string;
|
||||
datePosted?: string;
|
||||
degreeRequired?: string;
|
||||
jobType?: string;
|
||||
jobLevel?: string;
|
||||
}
|
||||
|
||||
function toStringOrNull(value: unknown): string | null {
|
||||
if (value === null || value === undefined) return null;
|
||||
if (typeof value === 'string') {
|
||||
const trimmed = value.trim();
|
||||
return trimmed.length > 0 ? trimmed : null;
|
||||
}
|
||||
if (typeof value === 'number' || typeof value === 'boolean') return String(value);
|
||||
return null;
|
||||
}
|
||||
|
||||
function toNumberOrNull(value: unknown): number | null {
|
||||
if (value === null || value === undefined) return null;
|
||||
if (typeof value === 'number') return Number.isFinite(value) ? value : null;
|
||||
if (typeof value === 'string') {
|
||||
const trimmed = value.trim();
|
||||
if (!trimmed) return null;
|
||||
const parsed = Number(trimmed);
|
||||
return Number.isFinite(parsed) ? parsed : null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchPage(
|
||||
pageNo: number,
|
||||
token: string,
|
||||
cookies: string,
|
||||
options: { searchKeyword?: string } = {}
|
||||
): Promise<UkVisaJobsApiResponse> {
|
||||
// Use native FormData API (Node.js 18+)
|
||||
const formData = new FormData();
|
||||
formData.append('is_global', '0');
|
||||
formData.append('sortBy', 'desc');
|
||||
formData.append('pageNo', String(pageNo));
|
||||
formData.append('visaAcceptance', 'false');
|
||||
formData.append('applicants_outside_uk', 'false');
|
||||
formData.append('searchKeyword', options.searchKeyword || 'null');
|
||||
formData.append('token', token);
|
||||
|
||||
const response = await fetch(API_URL, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'accept': 'application/json, text/plain, */*',
|
||||
'accept-language': 'en-US,en;q=0.9',
|
||||
'cookie': cookies,
|
||||
'origin': 'https://my.ukvisajobs.com',
|
||||
'referer': `https://my.ukvisajobs.com/open-jobs/1?is_global=0&sortBy=desc&pageNo=${pageNo}&visaAcceptance=false&applicants_outside_uk=false`,
|
||||
'user-agent': 'Mozilla/5.0 (Linux; Android 13; Pixel 7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Mobile Safari/537.36',
|
||||
},
|
||||
body: formData,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const text = await response.text();
|
||||
throw new Error(`UKVisaJobs API returned ${response.status}: ${response.statusText} - ${text}`);
|
||||
}
|
||||
|
||||
return response.json() as Promise<UkVisaJobsApiResponse>;
|
||||
}
|
||||
|
||||
function mapJob(raw: UkVisaJobsApiJob): ExtractedJob {
|
||||
// Build salary string from min/max
|
||||
let salary: string | undefined = undefined;
|
||||
const minSalary = toNumberOrNull(raw.min_salary);
|
||||
const maxSalary = toNumberOrNull(raw.max_salary);
|
||||
|
||||
if (minSalary !== null && minSalary > 0 && maxSalary !== null && maxSalary > 0) {
|
||||
salary = `£${minSalary.toLocaleString()}-${maxSalary.toLocaleString()}`;
|
||||
if (raw.salary_interval) {
|
||||
salary += ` / ${raw.salary_interval}`;
|
||||
}
|
||||
} else if (maxSalary !== null && maxSalary > 0) {
|
||||
salary = `£${maxSalary.toLocaleString()}`;
|
||||
if (raw.salary_interval) {
|
||||
salary += ` / ${raw.salary_interval}`;
|
||||
}
|
||||
}
|
||||
|
||||
// Build a description from visa sponsorship fields
|
||||
const visaInfo: string[] = [];
|
||||
if (raw.visa_acceptance?.toLowerCase() === 'yes') visaInfo.push('Visa acceptance: Yes');
|
||||
if (raw.applicants_outside_uk?.toLowerCase() === 'yes') visaInfo.push('Accepts applicants outside UK');
|
||||
if (raw.likely_to_sponsor?.toLowerCase() === 'yes') visaInfo.push('Likely to sponsor');
|
||||
if (raw.definitely_sponsored?.toLowerCase() === 'yes') visaInfo.push('Definitely sponsored');
|
||||
if (raw.new_entrant?.toLowerCase() === 'yes') visaInfo.push('New entrant friendly');
|
||||
if (raw.student_graduate?.toLowerCase() === 'yes') visaInfo.push('Student/Graduate friendly');
|
||||
|
||||
const description = raw.description
|
||||
? raw.description
|
||||
: visaInfo.length > 0
|
||||
? `Visa sponsorship info: ${visaInfo.join(', ')}`
|
||||
: undefined;
|
||||
|
||||
return {
|
||||
source: 'ukvisajobs',
|
||||
sourceJobId: raw.id,
|
||||
title: raw.title || 'Unknown Title',
|
||||
employer: raw.company_name || 'Unknown Employer',
|
||||
employerUrl: toStringOrNull(raw.company_link) ?? undefined,
|
||||
jobUrl: raw.job_link,
|
||||
applicationLink: raw.job_link,
|
||||
location: raw.city || undefined,
|
||||
deadline: raw.job_expire || undefined,
|
||||
salary,
|
||||
jobDescription: description,
|
||||
datePosted: raw.created_date || undefined,
|
||||
degreeRequired: toStringOrNull(raw.degree_requirement) ?? undefined,
|
||||
jobType: toStringOrNull(raw.job_type) ?? undefined,
|
||||
jobLevel: toStringOrNull(raw.job_level) ?? undefined,
|
||||
};
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
console.log('🇬🇧 UK Visa Jobs Extractor starting...');
|
||||
|
||||
// Get credentials from environment
|
||||
const token = process.env.UKVISAJOBS_TOKEN;
|
||||
const authToken = process.env.UKVISAJOBS_AUTH_TOKEN || token;
|
||||
const csrfToken = process.env.UKVISAJOBS_CSRF_TOKEN || '';
|
||||
const ciSession = process.env.UKVISAJOBS_CI_SESSION || '';
|
||||
const searchKeyword = process.env.UKVISAJOBS_SEARCH_KEYWORD || undefined;
|
||||
|
||||
if (!token) {
|
||||
console.error('❌ UKVISAJOBS_TOKEN environment variable is not set');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Build cookies string
|
||||
const cookieParts: string[] = [];
|
||||
if (csrfToken) cookieParts.push(`csrf_token=${csrfToken}`);
|
||||
if (ciSession) cookieParts.push(`ci_session=${ciSession}`);
|
||||
if (authToken) cookieParts.push(`authToken=${authToken}`);
|
||||
const cookies = cookieParts.join('; ');
|
||||
|
||||
console.log(` Cookies configured: ${cookieParts.length > 0 ? 'Yes' : 'No'}`);
|
||||
console.log(` Token length: ${token.length}`);
|
||||
|
||||
// Get max jobs from environment
|
||||
const maxJobsEnv = toNumberOrNull(process.env.UKVISAJOBS_MAX_JOBS);
|
||||
const maxJobs = Math.min(maxJobsEnv ?? DEFAULT_MAX_JOBS, MAX_ALLOWED_JOBS);
|
||||
const maxPages = Math.ceil(maxJobs / JOBS_PER_PAGE);
|
||||
|
||||
console.log(` Max jobs: ${maxJobs} (${maxPages} pages)`);
|
||||
if (searchKeyword) {
|
||||
console.log(` Search keyword: ${searchKeyword}`);
|
||||
}
|
||||
|
||||
const allJobs: ExtractedJob[] = [];
|
||||
const seenIds = new Set<string>();
|
||||
let totalAvailable = 0;
|
||||
let pageNo = 1;
|
||||
|
||||
try {
|
||||
while (pageNo <= maxPages && allJobs.length < maxJobs) {
|
||||
console.log(` Fetching page ${pageNo}/${maxPages}...`);
|
||||
|
||||
const response = await fetchPage(pageNo, token, cookies, { searchKeyword });
|
||||
|
||||
if (response.status !== 1) {
|
||||
console.warn(` ⚠️ API returned status ${response.status} on page ${pageNo}`);
|
||||
break;
|
||||
}
|
||||
|
||||
if (pageNo === 1) {
|
||||
totalAvailable = response.totalJobs;
|
||||
console.log(` Total available: ${totalAvailable} jobs`);
|
||||
}
|
||||
|
||||
if (!response.jobs || response.jobs.length === 0) {
|
||||
console.log(` No more jobs on page ${pageNo}`);
|
||||
break;
|
||||
}
|
||||
|
||||
for (const rawJob of response.jobs) {
|
||||
if (allJobs.length >= maxJobs) break;
|
||||
|
||||
// Deduplicate by ID
|
||||
if (seenIds.has(rawJob.id)) continue;
|
||||
seenIds.add(rawJob.id);
|
||||
|
||||
const mapped = mapJob(rawJob);
|
||||
allJobs.push(mapped);
|
||||
}
|
||||
|
||||
// If we got fewer jobs than a full page, we're at the end
|
||||
if (response.jobs.length < JOBS_PER_PAGE) {
|
||||
break;
|
||||
}
|
||||
|
||||
pageNo++;
|
||||
|
||||
// Small delay to be nice to the API
|
||||
await new Promise((resolve) => setTimeout(resolve, 500));
|
||||
}
|
||||
|
||||
console.log(`✅ Scraped ${allJobs.length} jobs`);
|
||||
|
||||
// Write output to storage directory (similar to Crawlee dataset structure)
|
||||
const storageDir = join(__dirname, '../storage/datasets/default');
|
||||
await mkdir(storageDir, { recursive: true });
|
||||
|
||||
// Write each job as a separate JSON file (Crawlee dataset format)
|
||||
for (let i = 0; i < allJobs.length; i++) {
|
||||
const filename = join(storageDir, `${String(i + 1).padStart(6, '0')}.json`);
|
||||
await writeFile(filename, JSON.stringify(allJobs[i], null, 2));
|
||||
}
|
||||
|
||||
// Also write a combined output file for easier consumption
|
||||
const outputFile = join(storageDir, 'jobs.json');
|
||||
await writeFile(outputFile, JSON.stringify(allJobs, null, 2));
|
||||
|
||||
console.log(` Output written to: ${storageDir}`);
|
||||
console.log(` Jobs file: ${outputFile}`);
|
||||
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
console.error(`❌ Error: ${message}`);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((error) => {
|
||||
console.error('Fatal error:', error);
|
||||
process.exit(1);
|
||||
});
|
||||
12
extractors/ukvisajobs/tsconfig.json
Normal file
12
extractors/ukvisajobs/tsconfig.json
Normal file
@ -0,0 +1,12 @@
|
||||
{
|
||||
"extends": "@apify/tsconfig",
|
||||
"compilerOptions": {
|
||||
"module": "NodeNext",
|
||||
"moduleResolution": "NodeNext",
|
||||
"target": "ES2022",
|
||||
"outDir": "dist",
|
||||
"noUnusedLocals": false,
|
||||
"lib": ["DOM"]
|
||||
},
|
||||
"include": ["./src/**/*"]
|
||||
}
|
||||
@ -32,3 +32,12 @@ JOBSPY_RESULTS_WANTED=200
|
||||
JOBSPY_HOURS_OLD=72
|
||||
JOBSPY_COUNTRY_INDEED=UK
|
||||
JOBSPY_LINKEDIN_FETCH_DESCRIPTION=1
|
||||
|
||||
# =============================================================================
|
||||
# UKVisaJobs (UK visa sponsorship job scraping) - optional
|
||||
# =============================================================================
|
||||
# Get these tokens from browser dev tools after logging into my.ukvisajobs.com
|
||||
UKVISAJOBS_TOKEN=
|
||||
UKVISAJOBS_AUTH_TOKEN=
|
||||
UKVISAJOBS_CSRF_TOKEN=
|
||||
UKVISAJOBS_CI_SESSION=
|
||||
|
||||
@ -12,7 +12,7 @@ import { Header, JobList, PipelineProgress, Stats } from "./components";
|
||||
import * as api from "./api";
|
||||
import { SettingsPage } from "./pages/SettingsPage";
|
||||
|
||||
const DEFAULT_PIPELINE_SOURCES: JobSource[] = ["gradcracker", "indeed", "linkedin"];
|
||||
const DEFAULT_PIPELINE_SOURCES: JobSource[] = ["gradcracker", "indeed", "linkedin", "ukvisajobs"];
|
||||
const PIPELINE_SOURCES_STORAGE_KEY = "jobops.pipeline.sources";
|
||||
|
||||
export const App: React.FC = () => {
|
||||
@ -33,7 +33,7 @@ export const App: React.FC = () => {
|
||||
const raw = localStorage.getItem(PIPELINE_SOURCES_STORAGE_KEY);
|
||||
if (!raw) return DEFAULT_PIPELINE_SOURCES;
|
||||
const parsed = JSON.parse(raw) as unknown;
|
||||
const allowed: JobSource[] = ["gradcracker", "indeed", "linkedin"];
|
||||
const allowed: JobSource[] = ["gradcracker", "indeed", "linkedin", "ukvisajobs"];
|
||||
if (!Array.isArray(parsed)) return DEFAULT_PIPELINE_SOURCES;
|
||||
const next = parsed.filter((value): value is JobSource => allowed.includes(value));
|
||||
return next.length > 0 ? next : DEFAULT_PIPELINE_SOURCES;
|
||||
|
||||
@ -60,9 +60,10 @@ export const Header: React.FC<HeaderProps> = ({
|
||||
gradcracker: "Gradcracker",
|
||||
indeed: "Indeed",
|
||||
linkedin: "LinkedIn",
|
||||
ukvisajobs: "UK Visa Jobs",
|
||||
};
|
||||
|
||||
const orderedSources: JobSource[] = ["gradcracker", "indeed", "linkedin"];
|
||||
const orderedSources: JobSource[] = ["gradcracker", "indeed", "linkedin", "ukvisajobs"];
|
||||
|
||||
const toggleSource = (source: JobSource, checked: boolean) => {
|
||||
const next = checked
|
||||
|
||||
@ -60,10 +60,10 @@ apiRouter.get('/jobs', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const statusFilter = req.query.status as string | undefined;
|
||||
const statuses = statusFilter?.split(',').filter(Boolean) as JobStatus[] | undefined;
|
||||
|
||||
|
||||
const jobs = await jobsRepo.getAllJobs(statuses);
|
||||
const stats = await jobsRepo.getJobStats();
|
||||
|
||||
|
||||
const response: ApiResponse<JobsListResponse> = {
|
||||
success: true,
|
||||
data: {
|
||||
@ -72,7 +72,7 @@ apiRouter.get('/jobs', async (req: Request, res: Response) => {
|
||||
byStatus: stats,
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
res.json(response);
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
@ -86,11 +86,11 @@ apiRouter.get('/jobs', async (req: Request, res: Response) => {
|
||||
apiRouter.get('/jobs/:id', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const job = await jobsRepo.getJobById(req.params.id);
|
||||
|
||||
|
||||
if (!job) {
|
||||
return res.status(404).json({ success: false, error: 'Job not found' });
|
||||
}
|
||||
|
||||
|
||||
res.json({ success: true, data: job });
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
@ -113,11 +113,11 @@ apiRouter.patch('/jobs/:id', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const input = updateJobSchema.parse(req.body);
|
||||
const job = await jobsRepo.updateJob(req.params.id, input);
|
||||
|
||||
|
||||
if (!job) {
|
||||
return res.status(404).json({ success: false, error: 'Job not found' });
|
||||
}
|
||||
|
||||
|
||||
res.json({ success: true, data: job });
|
||||
} catch (error) {
|
||||
if (error instanceof z.ZodError) {
|
||||
@ -137,11 +137,11 @@ apiRouter.post('/jobs/:id/process', async (req: Request, res: Response) => {
|
||||
const force = forceRaw === '1' || forceRaw === 'true';
|
||||
|
||||
const result = await processJob(req.params.id, { force });
|
||||
|
||||
|
||||
if (!result.success) {
|
||||
return res.status(400).json({ success: false, error: result.error });
|
||||
}
|
||||
|
||||
|
||||
const job = await jobsRepo.getJobById(req.params.id);
|
||||
res.json({ success: true, data: job });
|
||||
} catch (error) {
|
||||
@ -156,13 +156,13 @@ apiRouter.post('/jobs/:id/process', async (req: Request, res: Response) => {
|
||||
apiRouter.post('/jobs/:id/apply', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const job = await jobsRepo.getJobById(req.params.id);
|
||||
|
||||
|
||||
if (!job) {
|
||||
return res.status(404).json({ success: false, error: 'Job not found' });
|
||||
}
|
||||
|
||||
|
||||
const appliedAt = new Date().toISOString();
|
||||
|
||||
|
||||
// Sync to Notion
|
||||
const notionResult = await createNotionEntry({
|
||||
id: job.id,
|
||||
@ -175,7 +175,7 @@ apiRouter.post('/jobs/:id/apply', async (req: Request, res: Response) => {
|
||||
pdfPath: job.pdfPath,
|
||||
appliedAt,
|
||||
});
|
||||
|
||||
|
||||
// Update job status
|
||||
const updatedJob = await jobsRepo.updateJob(job.id, {
|
||||
status: 'applied',
|
||||
@ -186,7 +186,7 @@ apiRouter.post('/jobs/:id/apply', async (req: Request, res: Response) => {
|
||||
if (updatedJob) {
|
||||
notifyJobCompleteWebhook(updatedJob).catch(console.warn)
|
||||
}
|
||||
|
||||
|
||||
res.json({ success: true, data: updatedJob });
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
@ -200,11 +200,11 @@ apiRouter.post('/jobs/:id/apply', async (req: Request, res: Response) => {
|
||||
apiRouter.post('/jobs/:id/reject', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const job = await jobsRepo.updateJob(req.params.id, { status: 'rejected' });
|
||||
|
||||
|
||||
if (!job) {
|
||||
return res.status(404).json({ success: false, error: 'Job not found' });
|
||||
}
|
||||
|
||||
|
||||
res.json({ success: true, data: job });
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
@ -351,7 +351,7 @@ apiRouter.get('/pipeline/status', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { isRunning } = getPipelineStatus();
|
||||
const lastRun = await pipelineRepo.getLatestPipelineRun();
|
||||
|
||||
|
||||
const response: ApiResponse<PipelineStatusResponse> = {
|
||||
success: true,
|
||||
data: {
|
||||
@ -360,7 +360,7 @@ apiRouter.get('/pipeline/status', async (req: Request, res: Response) => {
|
||||
nextScheduledRun: null, // Would come from n8n
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
res.json(response);
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
@ -377,20 +377,20 @@ apiRouter.get('/pipeline/progress', (req: Request, res: Response) => {
|
||||
res.setHeader('Cache-Control', 'no-cache');
|
||||
res.setHeader('Connection', 'keep-alive');
|
||||
res.setHeader('X-Accel-Buffering', 'no'); // Disable Nginx buffering
|
||||
|
||||
|
||||
// Send initial progress
|
||||
const sendProgress = (data: unknown) => {
|
||||
res.write(`data: ${JSON.stringify(data)}\n\n`);
|
||||
};
|
||||
|
||||
|
||||
// Subscribe to progress updates
|
||||
const unsubscribe = subscribeToProgress(sendProgress);
|
||||
|
||||
|
||||
// Send heartbeat every 30 seconds to keep connection alive
|
||||
const heartbeat = setInterval(() => {
|
||||
res.write(': heartbeat\n\n');
|
||||
}, 30000);
|
||||
|
||||
|
||||
// Cleanup on close
|
||||
req.on('close', () => {
|
||||
clearInterval(heartbeat);
|
||||
@ -417,19 +417,19 @@ apiRouter.get('/pipeline/runs', async (req: Request, res: Response) => {
|
||||
const runPipelineSchema = z.object({
|
||||
topN: z.number().min(1).max(50).optional(),
|
||||
minSuitabilityScore: z.number().min(0).max(100).optional(),
|
||||
sources: z.array(z.enum(['gradcracker', 'indeed', 'linkedin'])).min(1).optional(),
|
||||
sources: z.array(z.enum(['gradcracker', 'indeed', 'linkedin', 'ukvisajobs'])).min(1).optional(),
|
||||
});
|
||||
|
||||
apiRouter.post('/pipeline/run', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const config = runPipelineSchema.parse(req.body);
|
||||
|
||||
|
||||
// Start pipeline in background
|
||||
runPipeline(config).catch(console.error);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: { message: 'Pipeline started' }
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: { message: 'Pipeline started' }
|
||||
});
|
||||
} catch (error) {
|
||||
if (error instanceof z.ZodError) {
|
||||
@ -451,21 +451,21 @@ apiRouter.post('/webhook/trigger', async (req: Request, res: Response) => {
|
||||
// Optional: Add authentication check
|
||||
const authHeader = req.headers.authorization;
|
||||
const expectedToken = process.env.WEBHOOK_SECRET;
|
||||
|
||||
|
||||
if (expectedToken && authHeader !== `Bearer ${expectedToken}`) {
|
||||
return res.status(401).json({ success: false, error: 'Unauthorized' });
|
||||
}
|
||||
|
||||
|
||||
try {
|
||||
// Start pipeline in background
|
||||
runPipeline().catch(console.error);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
message: 'Pipeline triggered',
|
||||
triggeredAt: new Date().toISOString(),
|
||||
}
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
@ -483,14 +483,14 @@ apiRouter.post('/webhook/trigger', async (req: Request, res: Response) => {
|
||||
apiRouter.delete('/database', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const result = clearDatabase();
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
message: 'Database cleared',
|
||||
jobsDeleted: result.jobsDeleted,
|
||||
runsDeleted: result.runsDeleted,
|
||||
}
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
|
||||
@ -7,9 +7,9 @@ import { sql } from 'drizzle-orm';
|
||||
|
||||
export const jobs = sqliteTable('jobs', {
|
||||
id: text('id').primaryKey(),
|
||||
|
||||
|
||||
// From crawler
|
||||
source: text('source', { enum: ['gradcracker', 'indeed', 'linkedin'] }).notNull().default('gradcracker'),
|
||||
source: text('source', { enum: ['gradcracker', 'indeed', 'linkedin', 'ukvisajobs'] }).notNull().default('gradcracker'),
|
||||
sourceJobId: text('source_job_id'),
|
||||
jobUrlDirect: text('job_url_direct'),
|
||||
datePosted: text('date_posted'),
|
||||
@ -51,17 +51,17 @@ export const jobs = sqliteTable('jobs', {
|
||||
companyReviewsCount: integer('company_reviews_count'),
|
||||
vacancyCount: integer('vacancy_count'),
|
||||
workFromHomeType: text('work_from_home_type'),
|
||||
|
||||
|
||||
// Orchestrator enrichments
|
||||
status: text('status', {
|
||||
enum: ['discovered', 'processing', 'ready', 'applied', 'rejected', 'expired']
|
||||
status: text('status', {
|
||||
enum: ['discovered', 'processing', 'ready', 'applied', 'rejected', 'expired']
|
||||
}).notNull().default('discovered'),
|
||||
suitabilityScore: real('suitability_score'),
|
||||
suitabilityReason: text('suitability_reason'),
|
||||
tailoredSummary: text('tailored_summary'),
|
||||
pdfPath: text('pdf_path'),
|
||||
notionPageId: text('notion_page_id'),
|
||||
|
||||
|
||||
// Timestamps
|
||||
discoveredAt: text('discovered_at').notNull().default(sql`(datetime('now'))`),
|
||||
processedAt: text('processed_at'),
|
||||
|
||||
@ -12,6 +12,7 @@ import { join, dirname } from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
import { runCrawler } from '../services/crawler.js';
|
||||
import { runJobSpy } from '../services/jobspy.js';
|
||||
import { runUkVisaJobs } from '../services/ukvisajobs.js';
|
||||
import { scoreJobSuitability } from '../services/scorer.js';
|
||||
import { generateSummary } from '../services/summary.js';
|
||||
import { generatePdf } from '../services/pdf.js';
|
||||
@ -27,7 +28,7 @@ const DEFAULT_PROFILE_PATH = join(__dirname, '../../../../resume-generator/base.
|
||||
const DEFAULT_CONFIG: PipelineConfig = {
|
||||
topN: 10,
|
||||
minSuitabilityScore: 50,
|
||||
sources: ['gradcracker', 'indeed', 'linkedin'],
|
||||
sources: ['gradcracker', 'indeed', 'linkedin', 'ukvisajobs'],
|
||||
profilePath: DEFAULT_PROFILE_PATH,
|
||||
outputDir: join(__dirname, '../../../data/pdfs'),
|
||||
};
|
||||
@ -88,22 +89,22 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
|
||||
error: 'Pipeline is already running',
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
isPipelineRunning = true;
|
||||
resetProgress();
|
||||
const mergedConfig = { ...DEFAULT_CONFIG, ...config };
|
||||
|
||||
|
||||
// Create pipeline run record
|
||||
const pipelineRun = await pipelineRepo.createPipelineRun();
|
||||
|
||||
|
||||
console.log('🚀 Starting job pipeline...');
|
||||
console.log(` Config: topN=${mergedConfig.topN}, minScore=${mergedConfig.minSuitabilityScore} (manual processing)`);
|
||||
|
||||
|
||||
try {
|
||||
// Step 1: Load profile
|
||||
console.log('\n📋 Loading profile...');
|
||||
const profile = await loadProfile(mergedConfig.profilePath);
|
||||
|
||||
|
||||
// Step 2: Run crawler
|
||||
console.log('\n🕷️ Running crawler...');
|
||||
progressHelpers.startCrawling();
|
||||
@ -154,6 +155,21 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
|
||||
}
|
||||
}
|
||||
|
||||
// Run UKVisaJobs extractor if selected
|
||||
if (mergedConfig.sources.includes('ukvisajobs')) {
|
||||
updateProgress({
|
||||
step: 'crawling',
|
||||
detail: 'UKVisaJobs: scraping visa-sponsoring jobs...',
|
||||
});
|
||||
|
||||
const ukVisaResult = await runUkVisaJobs({ maxJobs: 50 });
|
||||
if (!ukVisaResult.success) {
|
||||
sourceErrors.push(`ukvisajobs: ${ukVisaResult.error ?? 'unknown error'}`);
|
||||
} else {
|
||||
discoveredJobs.push(...ukVisaResult.jobs);
|
||||
}
|
||||
}
|
||||
|
||||
if (discoveredJobs.length === 0 && sourceErrors.length > 0) {
|
||||
throw new Error(`All sources failed: ${sourceErrors.join('; ')}`);
|
||||
}
|
||||
@ -163,18 +179,18 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
|
||||
}
|
||||
|
||||
progressHelpers.crawlingComplete(discoveredJobs.length);
|
||||
|
||||
|
||||
// Step 3: Import discovered jobs
|
||||
console.log('\n💾 Importing jobs to database...');
|
||||
const { created, skipped } = await jobsRepo.bulkCreateJobs(discoveredJobs);
|
||||
console.log(` Created: ${created}, Skipped (duplicates): ${skipped}`);
|
||||
|
||||
|
||||
progressHelpers.importComplete(created, skipped);
|
||||
|
||||
|
||||
await pipelineRepo.updatePipelineRun(pipelineRun.id, {
|
||||
jobsDiscovered: created,
|
||||
});
|
||||
|
||||
|
||||
// Step 4: Score all discovered jobs missing a score
|
||||
console.log('\n🎯 Scoring jobs for suitability...');
|
||||
const unprocessedJobs = await jobsRepo.getUnscoredDiscoveredJobs();
|
||||
@ -187,7 +203,7 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
|
||||
totalToProcess: 0,
|
||||
currentJob: undefined,
|
||||
});
|
||||
|
||||
|
||||
// Score jobs with progress updates
|
||||
const scoredJobs: Array<Job & { suitabilityScore: number; suitabilityReason: string }> = [];
|
||||
for (let i = 0; i < unprocessedJobs.length; i++) {
|
||||
@ -217,21 +233,21 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
|
||||
suitabilityReason: reason,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
progressHelpers.scoringComplete(scoredJobs.length);
|
||||
console.log(`\n📊 Scored ${scoredJobs.length} jobs. Ready for manual processing.`);
|
||||
|
||||
|
||||
// Update pipeline run as completed
|
||||
await pipelineRepo.updatePipelineRun(pipelineRun.id, {
|
||||
status: 'completed',
|
||||
completedAt: new Date().toISOString(),
|
||||
jobsProcessed: 0,
|
||||
});
|
||||
|
||||
|
||||
console.log('\n🎉 Pipeline completed!');
|
||||
console.log(` Jobs discovered: ${created}`);
|
||||
console.log(' Jobs processed: 0 (manual)');
|
||||
|
||||
|
||||
progressHelpers.complete(created, 0);
|
||||
|
||||
await notifyPipelineWebhook('pipeline.completed', {
|
||||
@ -241,22 +257,22 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
|
||||
jobsProcessed: 0,
|
||||
})
|
||||
isPipelineRunning = false;
|
||||
|
||||
|
||||
return {
|
||||
success: true,
|
||||
jobsDiscovered: created,
|
||||
jobsProcessed: 0,
|
||||
};
|
||||
|
||||
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
|
||||
|
||||
await pipelineRepo.updatePipelineRun(pipelineRun.id, {
|
||||
status: 'failed',
|
||||
completedAt: new Date().toISOString(),
|
||||
errorMessage: message,
|
||||
});
|
||||
|
||||
|
||||
progressHelpers.failed(message);
|
||||
|
||||
await notifyPipelineWebhook('pipeline.failed', {
|
||||
@ -264,9 +280,9 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
|
||||
error: message,
|
||||
})
|
||||
isPipelineRunning = false;
|
||||
|
||||
|
||||
console.error('\n❌ Pipeline failed:', message);
|
||||
|
||||
|
||||
return {
|
||||
success: false,
|
||||
jobsDiscovered: 0,
|
||||
@ -287,7 +303,7 @@ export async function processJob(
|
||||
error?: string;
|
||||
}> {
|
||||
console.log(`📝 Processing job ${jobId}...`);
|
||||
|
||||
|
||||
try {
|
||||
const job = await jobsRepo.getJobById(jobId);
|
||||
if (!job) {
|
||||
@ -297,9 +313,9 @@ export async function processJob(
|
||||
if (job.status !== 'discovered' && job.status !== 'ready') {
|
||||
return { success: false, error: `Job cannot be processed from status: ${job.status}` };
|
||||
}
|
||||
|
||||
|
||||
const profile = await loadProfile(DEFAULT_PROFILE_PATH);
|
||||
|
||||
|
||||
// Mark as processing
|
||||
await jobsRepo.updateJob(job.id, { status: 'processing' });
|
||||
|
||||
@ -314,7 +330,7 @@ export async function processJob(
|
||||
job.suitabilityScore = suitability.score;
|
||||
job.suitabilityReason = suitability.reason;
|
||||
}
|
||||
|
||||
|
||||
// Generate summary (AI)
|
||||
// If forcing, always recompute; otherwise compute if missing.
|
||||
if (options?.force || !job.tailoredSummary) {
|
||||
@ -323,7 +339,7 @@ export async function processJob(
|
||||
job.jobDescription || '',
|
||||
profile
|
||||
);
|
||||
|
||||
|
||||
if (summaryResult.success) {
|
||||
await jobsRepo.updateJob(job.id, {
|
||||
tailoredSummary: summaryResult.summary,
|
||||
@ -331,7 +347,7 @@ export async function processJob(
|
||||
job.tailoredSummary = summaryResult.summary ?? null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Generate PDF
|
||||
console.log(' Generating PDF...');
|
||||
const pdfResult = await generatePdf(
|
||||
@ -340,16 +356,16 @@ export async function processJob(
|
||||
job.jobDescription || '',
|
||||
DEFAULT_PROFILE_PATH
|
||||
);
|
||||
|
||||
|
||||
// Mark as ready
|
||||
await jobsRepo.updateJob(job.id, {
|
||||
status: 'ready',
|
||||
pdfPath: pdfResult.pdfPath ?? undefined,
|
||||
});
|
||||
|
||||
|
||||
console.log(' ✅ Done!');
|
||||
return { success: true };
|
||||
|
||||
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
return { success: false, error: message };
|
||||
|
||||
125
orchestrator/src/server/services/ukvisajobs.ts
Normal file
125
orchestrator/src/server/services/ukvisajobs.ts
Normal file
@ -0,0 +1,125 @@
|
||||
/**
|
||||
* Service for running the UK Visa Jobs extractor (extractors/ukvisajobs).
|
||||
*
|
||||
* Spawns the extractor as a child process and reads its output dataset.
|
||||
*/
|
||||
|
||||
import { spawn } from 'child_process';
|
||||
import { readdir, readFile, rm, mkdir } from 'fs/promises';
|
||||
import { join, dirname } from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
import type { CreateJobInput } from '../../shared/types.js';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const UKVISAJOBS_DIR = join(__dirname, '../../../../extractors/ukvisajobs');
|
||||
const STORAGE_DIR = join(UKVISAJOBS_DIR, 'storage/datasets/default');
|
||||
|
||||
export interface RunUkVisaJobsOptions {
|
||||
/** Maximum number of jobs to fetch. Defaults to 50, max 200. */
|
||||
maxJobs?: number;
|
||||
/** Search keyword filter (optional) */
|
||||
searchKeyword?: string;
|
||||
}
|
||||
|
||||
export interface UkVisaJobsResult {
|
||||
success: boolean;
|
||||
jobs: CreateJobInput[];
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear previous extraction results.
|
||||
*/
|
||||
async function clearStorageDataset(): Promise<void> {
|
||||
try {
|
||||
await rm(STORAGE_DIR, { recursive: true, force: true });
|
||||
} catch {
|
||||
// Ignore if directory doesn't exist
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the UK Visa Jobs extractor.
|
||||
*/
|
||||
export async function runUkVisaJobs(options: RunUkVisaJobsOptions = {}): Promise<UkVisaJobsResult> {
|
||||
console.log('🇬🇧 Running UK Visa Jobs extractor...');
|
||||
|
||||
try {
|
||||
// Clear previous results
|
||||
await clearStorageDataset();
|
||||
await mkdir(STORAGE_DIR, { recursive: true });
|
||||
|
||||
// Run the extractor using npx tsx directly (more reliable in Docker/different environments)
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const child = spawn('npx', ['tsx', 'src/main.ts'], {
|
||||
cwd: UKVISAJOBS_DIR,
|
||||
stdio: 'inherit',
|
||||
env: {
|
||||
...process.env,
|
||||
UKVISAJOBS_MAX_JOBS: String(options.maxJobs ?? 50),
|
||||
UKVISAJOBS_SEARCH_KEYWORD: options.searchKeyword ?? '',
|
||||
},
|
||||
});
|
||||
|
||||
child.on('close', (code) => {
|
||||
if (code === 0) resolve();
|
||||
else reject(new Error(`UK Visa Jobs extractor exited with code ${code}`));
|
||||
});
|
||||
child.on('error', reject);
|
||||
});
|
||||
|
||||
// Read the output dataset
|
||||
const jobs = await readDataset();
|
||||
console.log(`✅ UK Visa Jobs: imported ${jobs.length} jobs`);
|
||||
|
||||
return { success: true, jobs };
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
console.error(`❌ UK Visa Jobs failed: ${message}`);
|
||||
return { success: false, jobs: [], error: message };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read jobs from the extractor's output dataset.
|
||||
*/
|
||||
async function readDataset(): Promise<CreateJobInput[]> {
|
||||
const jobs: CreateJobInput[] = [];
|
||||
|
||||
try {
|
||||
const files = await readdir(STORAGE_DIR);
|
||||
const jsonFiles = files.filter((f) => f.endsWith('.json') && f !== 'jobs.json');
|
||||
|
||||
for (const file of jsonFiles.sort()) {
|
||||
try {
|
||||
const content = await readFile(join(STORAGE_DIR, file), 'utf-8');
|
||||
const job = JSON.parse(content);
|
||||
|
||||
// Map to CreateJobInput format
|
||||
jobs.push({
|
||||
source: 'ukvisajobs',
|
||||
sourceJobId: job.sourceJobId,
|
||||
title: job.title || 'Unknown Title',
|
||||
employer: job.employer || 'Unknown Employer',
|
||||
employerUrl: job.employerUrl,
|
||||
jobUrl: job.jobUrl,
|
||||
applicationLink: job.applicationLink || job.jobUrl,
|
||||
location: job.location,
|
||||
deadline: job.deadline,
|
||||
salary: job.salary,
|
||||
jobDescription: job.jobDescription,
|
||||
datePosted: job.datePosted,
|
||||
degreeRequired: job.degreeRequired,
|
||||
jobType: job.jobType,
|
||||
jobLevel: job.jobLevel,
|
||||
});
|
||||
} catch {
|
||||
// Skip invalid files
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Dataset directory doesn't exist yet
|
||||
}
|
||||
|
||||
return jobs;
|
||||
}
|
||||
@ -2,7 +2,7 @@
|
||||
* Shared types for the job-ops orchestrator.
|
||||
*/
|
||||
|
||||
export type JobStatus =
|
||||
export type JobStatus =
|
||||
| 'discovered' // Crawled but not processed
|
||||
| 'processing' // Currently generating resume
|
||||
| 'ready' // PDF generated, waiting for user to apply
|
||||
@ -13,11 +13,12 @@ export type JobStatus =
|
||||
export type JobSource =
|
||||
| 'gradcracker'
|
||||
| 'indeed'
|
||||
| 'linkedin';
|
||||
| 'linkedin'
|
||||
| 'ukvisajobs';
|
||||
|
||||
export interface Job {
|
||||
id: string;
|
||||
|
||||
|
||||
// Source / provenance
|
||||
source: JobSource;
|
||||
sourceJobId: string | null; // External ID (if provided)
|
||||
@ -37,7 +38,7 @@ export interface Job {
|
||||
degreeRequired: string | null;
|
||||
starting: string | null;
|
||||
jobDescription: string | null;
|
||||
|
||||
|
||||
// Orchestrator enrichments
|
||||
status: JobStatus;
|
||||
suitabilityScore: number | null; // 0-100 AI-generated score
|
||||
@ -71,7 +72,7 @@ export interface Job {
|
||||
companyReviewsCount: number | null;
|
||||
vacancyCount: number | null;
|
||||
workFromHomeType: string | null;
|
||||
|
||||
|
||||
// Timestamps
|
||||
discoveredAt: string;
|
||||
processedAt: string | null;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user