Merge pull request #4 from DaKheera47/extractor-ukvisajobs

Extractor ukvisajobs
This commit is contained in:
Shaheer Sarfaraz 2026-01-02 14:54:30 +00:00 committed by GitHub
commit a6310af294
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
23 changed files with 1955 additions and 173 deletions

View File

@ -30,10 +30,19 @@ JOB_COMPLETE_WEBHOOK_URL=
# JobSpy (Indeed/LinkedIn scraping) - optional
# =============================================================================
# These control the Python JobSpy scraper used by the pipeline.
JOBSPY_SITES=indeed,linkedin
JOBSPY_SEARCH_TERM=web developer
JOBSPY_LOCATION=UK
JOBSPY_RESULTS_WANTED=200
JOBSPY_HOURS_OLD=72
JOBSPY_COUNTRY_INDEED=UK
JOBSPY_LINKEDIN_FETCH_DESCRIPTION=1
# =============================================================================
# UKVisaJobs (UK visa sponsorship jobs) - optional
# =============================================================================
# Get these tokens from browser dev tools after logging into my.ukvisajobs.com
# See extractors/ukvisajobs/README.md for detailed instructions.
UKVISAJOBS_TOKEN=
UKVISAJOBS_AUTH_TOKEN=
UKVISAJOBS_CSRF_TOKEN=
UKVISAJOBS_CI_SESSION=

View File

@ -28,6 +28,7 @@ RUN pip3 install --no-cache-dir --break-system-packages playwright python-jobspy
# Copy package files first for better caching
COPY orchestrator/package*.json ./orchestrator/
COPY extractors/gradcracker/package*.json ./extractors/gradcracker/
COPY extractors/ukvisajobs/package*.json ./extractors/ukvisajobs/
# Install Node.js dependencies
WORKDIR /app/orchestrator
@ -39,11 +40,15 @@ RUN npm install --production=false
# Install Camoufox browser (downloads its own Firefox fork)
RUN npx camoufox fetch
WORKDIR /app/extractors/ukvisajobs
RUN npm install --production=false
# Copy source code
WORKDIR /app
COPY orchestrator ./orchestrator
COPY extractors/gradcracker ./extractors/gradcracker
COPY extractors/jobspy ./extractors/jobspy
COPY extractors/ukvisajobs ./extractors/ukvisajobs
COPY resume-generator ./resume-generator
# Build the orchestrator (client + server)

View File

@ -75,6 +75,7 @@ job-ops/
src/shared/ # shared types (Job, PipelineRun, etc.)
extractors/gradcracker/ # Crawlee crawler (Gradcracker)
extractors/jobspy/ # JobSpy wrapper (Indeed/LinkedIn/etc)
extractors/ukvisajobs/ # UK Visa Jobs API extractor
resume-generator/ # Python Playwright automation for rxresu.me
base.json # your exported base resume (template)
data/ # persisted runtime artifacts (Docker default)
@ -87,7 +88,7 @@ job-ops/
## Data model (SQLite)
- `jobs`
- from crawl: `title`, `employer`, `jobUrl`, `applicationLink`, `deadline`, `salary`, `location`, `jobDescription`, etc.
- from crawl: `title`, `employer`, `jobUrl`, `applicationLink`, `deadline`, `salary`, `location`, `jobDescription`, `source` (gradcracker/indeed/linkedin/ukvisajobs), etc.
- enrichments: `status` (`discovered` -> `processing` -> `ready` -> `applied`/`rejected`), `suitabilityScore`, `suitabilityReason`, `tailoredSummary`, `pdfPath`, `notionPageId`
- `pipeline_runs`: audit log of runs (`running`/`completed`/`failed`, counts, error)

View File

@ -34,7 +34,7 @@ services:
- PIPELINE_MIN_SCORE=${PIPELINE_MIN_SCORE:-50}
# JobSpy (Indeed/LinkedIn scraping) - optional
- JOBSPY_SITES=${JOBSPY_SITES:-indeed,linkedin}
# Preferred: pipe-separated list, e.g. "web developer|frontend developer|react developer"
- JOBSPY_SEARCH_TERMS=${JOBSPY_SEARCH_TERMS:-web developer|graduate web developer|react developer|graduate software engineer|graduate react developer|next js developer|graduate front end developer}
- JOBSPY_LOCATION=${JOBSPY_LOCATION:-UK}
@ -50,6 +50,13 @@ services:
# Optional: Webhook secret for n8n
- WEBHOOK_SECRET=${WEBHOOK_SECRET:-}
# UKVisaJobs (UK visa sponsorship jobs) - optional
- UKVISAJOBS_TOKEN=${UKVISAJOBS_TOKEN:-}
- UKVISAJOBS_AUTH_TOKEN=${UKVISAJOBS_AUTH_TOKEN:-}
- UKVISAJOBS_CSRF_TOKEN=${UKVISAJOBS_CSRF_TOKEN:-}
- UKVISAJOBS_CI_SESSION=${UKVISAJOBS_CI_SESSION:-}
- UKVISAJOBS_SEARCH_KEYWORD=${UKVISAJOBS_SEARCH_KEYWORD:-}
# Python path (uses system python in container)
- PYTHON_PATH=/usr/bin/python3
restart: unless-stopped
@ -59,6 +66,27 @@ services:
timeout: 10s
retries: 3
start_period: 10s
develop:
watch:
# Rebuild container when package.json changes
- path: ./orchestrator/package.json
action: rebuild
- path: ./orchestrator/package-lock.json
action: rebuild
# Sync source code changes and rebuild inside container
- path: ./orchestrator/src
target: /app/orchestrator/src
action: sync+restart
# Sync extractor changes
- path: ./extractors/gradcracker/src
target: /app/extractors/gradcracker/src
action: sync+restart
- path: ./extractors/ukvisajobs/src
target: /app/extractors/ukvisajobs/src
action: sync+restart
- path: ./extractors/jobspy
target: /app/extractors/jobspy
action: sync+restart
# Volumes for data persistence
volumes:

View File

@ -17,11 +17,30 @@ const locations = [
];
// roles
const roles = [
const defaultRoles = [
"web-development",
"software-systems",
];
let roles = defaultRoles;
const envRolesRaw = process.env.GRADCRACKER_SEARCH_TERMS;
if (envRolesRaw) {
try {
const parsed = JSON.parse(envRolesRaw) as string[];
if (Array.isArray(parsed) && parsed.length > 0) {
roles = parsed.map(term =>
term.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-+|-+$/g, '')
);
console.log(`Using configured search terms: ${roles.join(', ')}`);
}
} catch (e) {
console.warn('Failed to parse GRADCRACKER_SEARCH_TERMS', e);
}
}
// combo of locations and roles
const gradcrackerUrls = locations.flatMap((location) => {
return roles.map((role) => {

11
extractors/ukvisajobs/.gitignore vendored Normal file
View File

@ -0,0 +1,11 @@
# Dependencies
node_modules/
# Build output
dist/
# Crawlee storage
storage/
# Logs
*.log

View File

@ -0,0 +1,40 @@
# UK Visa Jobs Extractor
Fetches job listings from [my.ukvisajobs.com](https://my.ukvisajobs.com) that may sponsor work visas.
## Setup
```bash
npm install
```
## Configuration
Set the following environment variables (you can get these from your browser's dev tools after logging in):
| Variable | Description |
|----------|-------------|
| `UKVISAJOBS_TOKEN` | JWT token from the request body (required) |
| `UKVISAJOBS_AUTH_TOKEN` | Auth cookie token (defaults to UKVISAJOBS_TOKEN) |
| `UKVISAJOBS_CSRF_TOKEN` | CSRF token from cookies |
| `UKVISAJOBS_CI_SESSION` | CI session ID from cookies |
| `UKVISAJOBS_MAX_JOBS` | Maximum jobs to fetch (default: 50, max: 200) |
| `UKVISAJOBS_SEARCH_KEYWORD` | Optional search filter |
## How to get tokens
1. Log into `my.ukvisajobs.com` in your browser
2. Open Developer Tools → Network tab
3. Navigate to the jobs page
4. Find the `fetch-jobs-data` POST request
5. Copy values:
- From **Request Body**: copy the `token` field → `UKVISAJOBS_TOKEN`
- From **Cookies**: copy `authToken`, `csrf_token`, `ci_session`
## Running
```bash
npm start
```
Output is written to `storage/datasets/default/` as JSON files.

599
extractors/ukvisajobs/package-lock.json generated Normal file
View File

@ -0,0 +1,599 @@
{
"name": "ukvisajobs-extractor",
"version": "0.0.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "ukvisajobs-extractor",
"version": "0.0.1",
"license": "ISC",
"devDependencies": {
"@apify/tsconfig": "^0.1.0",
"@types/node": "^24.0.0",
"tsx": "^4.4.0",
"typescript": "~5.9.0"
}
},
"node_modules/@apify/tsconfig": {
"version": "0.1.1",
"resolved": "https://registry.npmjs.org/@apify/tsconfig/-/tsconfig-0.1.1.tgz",
"integrity": "sha512-cS7mwN2UW1UXcluGXRDHH0Vr2VsSLkw2DwLTwoSBkcJSe8fvCr3MPryTSq0uod4MashpMURxJ7CsLKxs82VmOQ==",
"dev": true,
"license": "Apache-2.0"
},
"node_modules/@esbuild/aix-ppc64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.2.tgz",
"integrity": "sha512-GZMB+a0mOMZs4MpDbj8RJp4cw+w1WV5NYD6xzgvzUJ5Ek2jerwfO2eADyI6ExDSUED+1X8aMbegahsJi+8mgpw==",
"cpu": [
"ppc64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"aix"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/android-arm": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.27.2.tgz",
"integrity": "sha512-DVNI8jlPa7Ujbr1yjU2PfUSRtAUZPG9I1RwW4F4xFB1Imiu2on0ADiI/c3td+KmDtVKNbi+nffGDQMfcIMkwIA==",
"cpu": [
"arm"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/android-arm64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.2.tgz",
"integrity": "sha512-pvz8ZZ7ot/RBphf8fv60ljmaoydPU12VuXHImtAs0XhLLw+EXBi2BLe3OYSBslR4rryHvweW5gmkKFwTiFy6KA==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/android-x64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.27.2.tgz",
"integrity": "sha512-z8Ank4Byh4TJJOh4wpz8g2vDy75zFL0TlZlkUkEwYXuPSgX8yzep596n6mT7905kA9uHZsf/o2OJZubl2l3M7A==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/darwin-arm64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.27.2.tgz",
"integrity": "sha512-davCD2Zc80nzDVRwXTcQP/28fiJbcOwvdolL0sOiOsbwBa72kegmVU0Wrh1MYrbuCL98Omp5dVhQFWRKR2ZAlg==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/darwin-x64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.27.2.tgz",
"integrity": "sha512-ZxtijOmlQCBWGwbVmwOF/UCzuGIbUkqB1faQRf5akQmxRJ1ujusWsb3CVfk/9iZKr2L5SMU5wPBi1UWbvL+VQA==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/freebsd-arm64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.27.2.tgz",
"integrity": "sha512-lS/9CN+rgqQ9czogxlMcBMGd+l8Q3Nj1MFQwBZJyoEKI50XGxwuzznYdwcav6lpOGv5BqaZXqvBSiB/kJ5op+g==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"freebsd"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/freebsd-x64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.27.2.tgz",
"integrity": "sha512-tAfqtNYb4YgPnJlEFu4c212HYjQWSO/w/h/lQaBK7RbwGIkBOuNKQI9tqWzx7Wtp7bTPaGC6MJvWI608P3wXYA==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"freebsd"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-arm": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.27.2.tgz",
"integrity": "sha512-vWfq4GaIMP9AIe4yj1ZUW18RDhx6EPQKjwe7n8BbIecFtCQG4CfHGaHuh7fdfq+y3LIA2vGS/o9ZBGVxIDi9hw==",
"cpu": [
"arm"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-arm64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.2.tgz",
"integrity": "sha512-hYxN8pr66NsCCiRFkHUAsxylNOcAQaxSSkHMMjcpx0si13t1LHFphxJZUiGwojB1a/Hd5OiPIqDdXONia6bhTw==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-ia32": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.27.2.tgz",
"integrity": "sha512-MJt5BRRSScPDwG2hLelYhAAKh9imjHK5+NE/tvnRLbIqUWa+0E9N4WNMjmp/kXXPHZGqPLxggwVhz7QP8CTR8w==",
"cpu": [
"ia32"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-loong64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.27.2.tgz",
"integrity": "sha512-lugyF1atnAT463aO6KPshVCJK5NgRnU4yb3FUumyVz+cGvZbontBgzeGFO1nF+dPueHD367a2ZXe1NtUkAjOtg==",
"cpu": [
"loong64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-mips64el": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.27.2.tgz",
"integrity": "sha512-nlP2I6ArEBewvJ2gjrrkESEZkB5mIoaTswuqNFRv/WYd+ATtUpe9Y09RnJvgvdag7he0OWgEZWhviS1OTOKixw==",
"cpu": [
"mips64el"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-ppc64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.27.2.tgz",
"integrity": "sha512-C92gnpey7tUQONqg1n6dKVbx3vphKtTHJaNG2Ok9lGwbZil6DrfyecMsp9CrmXGQJmZ7iiVXvvZH6Ml5hL6XdQ==",
"cpu": [
"ppc64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-riscv64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.27.2.tgz",
"integrity": "sha512-B5BOmojNtUyN8AXlK0QJyvjEZkWwy/FKvakkTDCziX95AowLZKR6aCDhG7LeF7uMCXEJqwa8Bejz5LTPYm8AvA==",
"cpu": [
"riscv64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-s390x": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.27.2.tgz",
"integrity": "sha512-p4bm9+wsPwup5Z8f4EpfN63qNagQ47Ua2znaqGH6bqLlmJ4bx97Y9JdqxgGZ6Y8xVTixUnEkoKSHcpRlDnNr5w==",
"cpu": [
"s390x"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-x64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.27.2.tgz",
"integrity": "sha512-uwp2Tip5aPmH+NRUwTcfLb+W32WXjpFejTIOWZFw/v7/KnpCDKG66u4DLcurQpiYTiYwQ9B7KOeMJvLCu/OvbA==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/netbsd-arm64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.27.2.tgz",
"integrity": "sha512-Kj6DiBlwXrPsCRDeRvGAUb/LNrBASrfqAIok+xB0LxK8CHqxZ037viF13ugfsIpePH93mX7xfJp97cyDuTZ3cw==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"netbsd"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/netbsd-x64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.27.2.tgz",
"integrity": "sha512-HwGDZ0VLVBY3Y+Nw0JexZy9o/nUAWq9MlV7cahpaXKW6TOzfVno3y3/M8Ga8u8Yr7GldLOov27xiCnqRZf0tCA==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"netbsd"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/openbsd-arm64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.27.2.tgz",
"integrity": "sha512-DNIHH2BPQ5551A7oSHD0CKbwIA/Ox7+78/AWkbS5QoRzaqlev2uFayfSxq68EkonB+IKjiuxBFoV8ESJy8bOHA==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"openbsd"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/openbsd-x64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.27.2.tgz",
"integrity": "sha512-/it7w9Nb7+0KFIzjalNJVR5bOzA9Vay+yIPLVHfIQYG/j+j9VTH84aNB8ExGKPU4AzfaEvN9/V4HV+F+vo8OEg==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"openbsd"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/openharmony-arm64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.27.2.tgz",
"integrity": "sha512-LRBbCmiU51IXfeXk59csuX/aSaToeG7w48nMwA6049Y4J4+VbWALAuXcs+qcD04rHDuSCSRKdmY63sruDS5qag==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"openharmony"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/sunos-x64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.27.2.tgz",
"integrity": "sha512-kMtx1yqJHTmqaqHPAzKCAkDaKsffmXkPHThSfRwZGyuqyIeBvf08KSsYXl+abf5HDAPMJIPnbBfXvP2ZC2TfHg==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"sunos"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/win32-arm64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.27.2.tgz",
"integrity": "sha512-Yaf78O/B3Kkh+nKABUF++bvJv5Ijoy9AN1ww904rOXZFLWVc5OLOfL56W+C8F9xn5JQZa3UX6m+IktJnIb1Jjg==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/win32-ia32": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.27.2.tgz",
"integrity": "sha512-Iuws0kxo4yusk7sw70Xa2E2imZU5HoixzxfGCdxwBdhiDgt9vX9VUCBhqcwY7/uh//78A1hMkkROMJq9l27oLQ==",
"cpu": [
"ia32"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/win32-x64": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.27.2.tgz",
"integrity": "sha512-sRdU18mcKf7F+YgheI/zGf5alZatMUTKj/jNS6l744f9u3WFu4v7twcUI9vu4mknF4Y9aDlblIie0IM+5xxaqQ==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@types/node": {
"version": "24.10.4",
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.4.tgz",
"integrity": "sha512-vnDVpYPMzs4wunl27jHrfmwojOGKya0xyM3sH+UE5iv5uPS6vX7UIoh6m+vQc5LGBq52HBKPIn/zcSZVzeDEZg==",
"dev": true,
"license": "MIT",
"dependencies": {
"undici-types": "~7.16.0"
}
},
"node_modules/esbuild": {
"version": "0.27.2",
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.2.tgz",
"integrity": "sha512-HyNQImnsOC7X9PMNaCIeAm4ISCQXs5a5YasTXVliKv4uuBo1dKrG0A+uQS8M5eXjVMnLg3WgXaKvprHlFJQffw==",
"dev": true,
"hasInstallScript": true,
"license": "MIT",
"bin": {
"esbuild": "bin/esbuild"
},
"engines": {
"node": ">=18"
},
"optionalDependencies": {
"@esbuild/aix-ppc64": "0.27.2",
"@esbuild/android-arm": "0.27.2",
"@esbuild/android-arm64": "0.27.2",
"@esbuild/android-x64": "0.27.2",
"@esbuild/darwin-arm64": "0.27.2",
"@esbuild/darwin-x64": "0.27.2",
"@esbuild/freebsd-arm64": "0.27.2",
"@esbuild/freebsd-x64": "0.27.2",
"@esbuild/linux-arm": "0.27.2",
"@esbuild/linux-arm64": "0.27.2",
"@esbuild/linux-ia32": "0.27.2",
"@esbuild/linux-loong64": "0.27.2",
"@esbuild/linux-mips64el": "0.27.2",
"@esbuild/linux-ppc64": "0.27.2",
"@esbuild/linux-riscv64": "0.27.2",
"@esbuild/linux-s390x": "0.27.2",
"@esbuild/linux-x64": "0.27.2",
"@esbuild/netbsd-arm64": "0.27.2",
"@esbuild/netbsd-x64": "0.27.2",
"@esbuild/openbsd-arm64": "0.27.2",
"@esbuild/openbsd-x64": "0.27.2",
"@esbuild/openharmony-arm64": "0.27.2",
"@esbuild/sunos-x64": "0.27.2",
"@esbuild/win32-arm64": "0.27.2",
"@esbuild/win32-ia32": "0.27.2",
"@esbuild/win32-x64": "0.27.2"
}
},
"node_modules/fsevents": {
"version": "2.3.3",
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
"integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
"dev": true,
"hasInstallScript": true,
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
}
},
"node_modules/get-tsconfig": {
"version": "4.13.0",
"resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.13.0.tgz",
"integrity": "sha512-1VKTZJCwBrvbd+Wn3AOgQP/2Av+TfTCOlE4AcRJE72W1ksZXbAx8PPBR9RzgTeSPzlPMHrbANMH3LbltH73wxQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"resolve-pkg-maps": "^1.0.0"
},
"funding": {
"url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
}
},
"node_modules/resolve-pkg-maps": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
"integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==",
"dev": true,
"license": "MIT",
"funding": {
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
}
},
"node_modules/tsx": {
"version": "4.21.0",
"resolved": "https://registry.npmjs.org/tsx/-/tsx-4.21.0.tgz",
"integrity": "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==",
"dev": true,
"license": "MIT",
"dependencies": {
"esbuild": "~0.27.0",
"get-tsconfig": "^4.7.5"
},
"bin": {
"tsx": "dist/cli.mjs"
},
"engines": {
"node": ">=18.0.0"
},
"optionalDependencies": {
"fsevents": "~2.3.3"
}
},
"node_modules/typescript": {
"version": "5.9.3",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz",
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
"dev": true,
"license": "Apache-2.0",
"bin": {
"tsc": "bin/tsc",
"tsserver": "bin/tsserver"
},
"engines": {
"node": ">=14.17"
}
},
"node_modules/undici-types": {
"version": "7.16.0",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
"integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==",
"dev": true,
"license": "MIT"
}
}
}

View File

@ -0,0 +1,22 @@
{
"name": "ukvisajobs-extractor",
"version": "0.0.1",
"type": "module",
"description": "UK Visa Jobs extractor - fetches job listings that may sponsor work visas",
"main": "dist/main.js",
"dependencies": {},
"devDependencies": {
"@apify/tsconfig": "^0.1.0",
"@types/node": "^24.0.0",
"tsx": "^4.4.0",
"typescript": "~5.9.0"
},
"scripts": {
"start": "npm run start:dev",
"start:prod": "node dist/main.js",
"start:dev": "tsx src/main.ts",
"build": "tsc"
},
"author": "",
"license": "ISC"
}

View File

@ -0,0 +1,303 @@
/**
* UK Visa Jobs Extractor
*
* Fetches job listings from my.ukvisajobs.com that may sponsor work visas.
* Outputs JSON to stdout for the orchestrator to consume.
*
* Environment variables:
* UKVISAJOBS_TOKEN - JWT token (required)
* UKVISAJOBS_AUTH_TOKEN - Auth cookie token (defaults to UKVISAJOBS_TOKEN)
* UKVISAJOBS_CSRF_TOKEN - CSRF token cookie
* UKVISAJOBS_CI_SESSION - CI session cookie
* UKVISAJOBS_MAX_JOBS - Maximum jobs to fetch (default: 50, max: 200) - Set via UI Settings
* UKVISAJOBS_SEARCH_KEYWORD - Optional search filter
*/
import { mkdir, writeFile } from 'fs/promises';
import { join, dirname } from 'path';
import { fileURLToPath } from 'url';
const __dirname = dirname(fileURLToPath(import.meta.url));
const API_URL = 'https://my.ukvisajobs.com/ukvisa-api/api/fetch-jobs-data';
const JOBS_PER_PAGE = 15;
const DEFAULT_MAX_JOBS = 50;
const MAX_ALLOWED_JOBS = 200;
interface UkVisaJobsApiJob {
id: string;
title: string;
company_name: string;
company_link?: string;
job_link: string;
city: string;
created_date: string;
job_expire: string;
description?: string;
min_salary?: string;
max_salary?: string;
salary_interval?: string;
salary_method?: string;
degree_requirement?: string;
job_type?: string;
job_level?: string;
job_industry?: string;
visa_acceptance?: string;
applicants_outside_uk?: string;
likely_to_sponsor?: string;
definitely_sponsored?: string;
new_entrant?: string;
student_graduate?: string;
image?: string;
computed_cos_total?: string;
}
interface UkVisaJobsApiResponse {
status: number;
totalJobs: number;
query?: string;
jobs: UkVisaJobsApiJob[];
}
interface ExtractedJob {
source: 'ukvisajobs';
sourceJobId: string;
title: string;
employer: string;
employerUrl?: string;
jobUrl: string;
applicationLink: string;
location?: string;
deadline?: string;
salary?: string;
jobDescription?: string;
datePosted?: string;
degreeRequired?: string;
jobType?: string;
jobLevel?: string;
}
function toStringOrNull(value: unknown): string | null {
if (value === null || value === undefined) return null;
if (typeof value === 'string') {
const trimmed = value.trim();
return trimmed.length > 0 ? trimmed : null;
}
if (typeof value === 'number' || typeof value === 'boolean') return String(value);
return null;
}
function toNumberOrNull(value: unknown): number | null {
if (value === null || value === undefined) return null;
if (typeof value === 'number') return Number.isFinite(value) ? value : null;
if (typeof value === 'string') {
const trimmed = value.trim();
if (!trimmed) return null;
const parsed = Number(trimmed);
return Number.isFinite(parsed) ? parsed : null;
}
return null;
}
async function fetchPage(
pageNo: number,
token: string,
cookies: string,
options: { searchKeyword?: string } = {}
): Promise<UkVisaJobsApiResponse> {
// Use native FormData API (Node.js 18+)
const formData = new FormData();
formData.append('is_global', '0');
formData.append('sortBy', 'desc');
formData.append('pageNo', String(pageNo));
formData.append('visaAcceptance', 'false');
formData.append('applicants_outside_uk', 'false');
formData.append('searchKeyword', options.searchKeyword || 'null');
formData.append('token', token);
const response = await fetch(API_URL, {
method: 'POST',
headers: {
'accept': 'application/json, text/plain, */*',
'accept-language': 'en-US,en;q=0.9',
'cookie': cookies,
'origin': 'https://my.ukvisajobs.com',
'referer': `https://my.ukvisajobs.com/open-jobs/1?is_global=0&sortBy=desc&pageNo=${pageNo}&visaAcceptance=false&applicants_outside_uk=false`,
'user-agent': 'Mozilla/5.0 (Linux; Android 13; Pixel 7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Mobile Safari/537.36',
},
body: formData,
});
if (!response.ok) {
const text = await response.text();
throw new Error(`UKVisaJobs API returned ${response.status}: ${response.statusText} - ${text}`);
}
return response.json() as Promise<UkVisaJobsApiResponse>;
}
function mapJob(raw: UkVisaJobsApiJob): ExtractedJob {
// Build salary string from min/max
let salary: string | undefined = undefined;
const minSalary = toNumberOrNull(raw.min_salary);
const maxSalary = toNumberOrNull(raw.max_salary);
if (minSalary !== null && minSalary > 0 && maxSalary !== null && maxSalary > 0) {
salary = `£${minSalary.toLocaleString()}-${maxSalary.toLocaleString()}`;
if (raw.salary_interval) {
salary += ` / ${raw.salary_interval}`;
}
} else if (maxSalary !== null && maxSalary > 0) {
salary = `£${maxSalary.toLocaleString()}`;
if (raw.salary_interval) {
salary += ` / ${raw.salary_interval}`;
}
}
// Build a description from visa sponsorship fields
const visaInfo: string[] = [];
if (raw.visa_acceptance?.toLowerCase() === 'yes') visaInfo.push('Visa acceptance: Yes');
if (raw.applicants_outside_uk?.toLowerCase() === 'yes') visaInfo.push('Accepts applicants outside UK');
if (raw.likely_to_sponsor?.toLowerCase() === 'yes') visaInfo.push('Likely to sponsor');
if (raw.definitely_sponsored?.toLowerCase() === 'yes') visaInfo.push('Definitely sponsored');
if (raw.new_entrant?.toLowerCase() === 'yes') visaInfo.push('New entrant friendly');
if (raw.student_graduate?.toLowerCase() === 'yes') visaInfo.push('Student/Graduate friendly');
const description = raw.description
? raw.description
: visaInfo.length > 0
? `Visa sponsorship info: ${visaInfo.join(', ')}`
: undefined;
return {
source: 'ukvisajobs',
sourceJobId: raw.id,
title: raw.title || 'Unknown Title',
employer: raw.company_name || 'Unknown Employer',
employerUrl: toStringOrNull(raw.company_link) ?? undefined,
jobUrl: raw.job_link,
applicationLink: raw.job_link,
location: raw.city || undefined,
deadline: raw.job_expire || undefined,
salary,
jobDescription: description,
datePosted: raw.created_date || undefined,
degreeRequired: toStringOrNull(raw.degree_requirement) ?? undefined,
jobType: toStringOrNull(raw.job_type) ?? undefined,
jobLevel: toStringOrNull(raw.job_level) ?? undefined,
};
}
async function main(): Promise<void> {
console.log('🇬🇧 UK Visa Jobs Extractor starting...');
// Get credentials from environment
const token = process.env.UKVISAJOBS_TOKEN;
const authToken = process.env.UKVISAJOBS_AUTH_TOKEN || token;
const csrfToken = process.env.UKVISAJOBS_CSRF_TOKEN || '';
const ciSession = process.env.UKVISAJOBS_CI_SESSION || '';
const searchKeyword = process.env.UKVISAJOBS_SEARCH_KEYWORD || undefined;
if (!token) {
console.error('❌ UKVISAJOBS_TOKEN environment variable is not set');
process.exit(1);
}
// Build cookies string
const cookieParts: string[] = [];
if (csrfToken) cookieParts.push(`csrf_token=${csrfToken}`);
if (ciSession) cookieParts.push(`ci_session=${ciSession}`);
if (authToken) cookieParts.push(`authToken=${authToken}`);
const cookies = cookieParts.join('; ');
console.log(` Cookies configured: ${cookieParts.length > 0 ? 'Yes' : 'No'}`);
console.log(` Token length: ${token.length}`);
// Get max jobs from environment
const maxJobsEnv = toNumberOrNull(process.env.UKVISAJOBS_MAX_JOBS);
const maxJobs = Math.min(maxJobsEnv ?? DEFAULT_MAX_JOBS, MAX_ALLOWED_JOBS);
const maxPages = Math.ceil(maxJobs / JOBS_PER_PAGE);
console.log(` Max jobs: ${maxJobs} (${maxPages} pages)`);
if (searchKeyword) {
console.log(` Search keyword: ${searchKeyword}`);
}
const allJobs: ExtractedJob[] = [];
const seenIds = new Set<string>();
let totalAvailable = 0;
let pageNo = 1;
try {
while (pageNo <= maxPages && allJobs.length < maxJobs) {
console.log(` Fetching page ${pageNo}/${maxPages}...`);
const response = await fetchPage(pageNo, token, cookies, { searchKeyword });
if (response.status !== 1) {
console.warn(` ⚠️ API returned status ${response.status} on page ${pageNo}`);
break;
}
if (pageNo === 1) {
totalAvailable = response.totalJobs;
console.log(` Total available: ${totalAvailable} jobs`);
}
if (!response.jobs || response.jobs.length === 0) {
console.log(` No more jobs on page ${pageNo}`);
break;
}
for (const rawJob of response.jobs) {
if (allJobs.length >= maxJobs) break;
// Deduplicate by ID
if (seenIds.has(rawJob.id)) continue;
seenIds.add(rawJob.id);
const mapped = mapJob(rawJob);
allJobs.push(mapped);
}
// If we got fewer jobs than a full page, we're at the end
if (response.jobs.length < JOBS_PER_PAGE) {
break;
}
pageNo++;
// Small delay to be nice to the API
await new Promise((resolve) => setTimeout(resolve, 500));
}
console.log(`✅ Scraped ${allJobs.length} jobs`);
// Write output to storage directory (similar to Crawlee dataset structure)
const storageDir = join(__dirname, '../storage/datasets/default');
await mkdir(storageDir, { recursive: true });
// Write each job as a separate JSON file (Crawlee dataset format)
for (let i = 0; i < allJobs.length; i++) {
const filename = join(storageDir, `${String(i + 1).padStart(6, '0')}.json`);
await writeFile(filename, JSON.stringify(allJobs[i], null, 2));
}
// Also write a combined output file for easier consumption
const outputFile = join(storageDir, 'jobs.json');
await writeFile(outputFile, JSON.stringify(allJobs, null, 2));
console.log(` Output written to: ${storageDir}`);
console.log(` Jobs file: ${outputFile}`);
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
console.error(`❌ Error: ${message}`);
process.exit(1);
}
}
main().catch((error) => {
console.error('Fatal error:', error);
process.exit(1);
});

View File

@ -0,0 +1,12 @@
{
"extends": "@apify/tsconfig",
"compilerOptions": {
"module": "NodeNext",
"moduleResolution": "NodeNext",
"target": "ES2022",
"outDir": "dist",
"noUnusedLocals": false,
"lib": ["DOM"]
},
"include": ["./src/**/*"]
}

View File

@ -1,34 +0,0 @@
# Server
PORT=3001
# OpenRouter API (for AI features)
OPENROUTER_API_KEY=your_openrouter_api_key_here
MODEL=openai/gpt-4o-mini
# Notion integration (optional)
NOTION_API_KEY=
NOTION_DATABASE_ID=
# Webhook security (optional)
WEBHOOK_SECRET=
PIPELINE_WEBHOOK_URL=
JOB_COMPLETE_WEBHOOK_URL=
# Pipeline configuration
PIPELINE_TOP_N=10
PIPELINE_MIN_SCORE=50
# RXResume credentials (for PDF generation)
RXRESUME_EMAIL=
RXRESUME_PASSWORD=
# =============================================================================
# JobSpy (Indeed/LinkedIn scraping) - optional
# =============================================================================
JOBSPY_SITES=indeed,linkedin
JOBSPY_SEARCH_TERM=web developer
JOBSPY_LOCATION=UK
JOBSPY_RESULTS_WANTED=200
JOBSPY_HOURS_OLD=72
JOBSPY_COUNTRY_INDEED=UK
JOBSPY_LINKEDIN_FETCH_DESCRIPTION=1

View File

@ -12,7 +12,7 @@ import { Header, JobList, PipelineProgress, Stats } from "./components";
import * as api from "./api";
import { SettingsPage } from "./pages/SettingsPage";
const DEFAULT_PIPELINE_SOURCES: JobSource[] = ["gradcracker", "indeed", "linkedin"];
const DEFAULT_PIPELINE_SOURCES: JobSource[] = ["gradcracker", "indeed", "linkedin", "ukvisajobs"];
const PIPELINE_SOURCES_STORAGE_KEY = "jobops.pipeline.sources";
export const App: React.FC = () => {
@ -33,7 +33,7 @@ export const App: React.FC = () => {
const raw = localStorage.getItem(PIPELINE_SOURCES_STORAGE_KEY);
if (!raw) return DEFAULT_PIPELINE_SOURCES;
const parsed = JSON.parse(raw) as unknown;
const allowed: JobSource[] = ["gradcracker", "indeed", "linkedin"];
const allowed: JobSource[] = ["gradcracker", "indeed", "linkedin", "ukvisajobs"];
if (!Array.isArray(parsed)) return DEFAULT_PIPELINE_SOURCES;
const next = parsed.filter((value): value is JobSource => allowed.includes(value));
return next.length > 0 ? next : DEFAULT_PIPELINE_SOURCES;

View File

@ -2,10 +2,10 @@
* API client for the orchestrator backend.
*/
import type {
Job,
ApiResponse,
JobsListResponse,
import type {
Job,
ApiResponse,
JobsListResponse,
PipelineStatusResponse,
JobSource,
PipelineRun,
@ -26,13 +26,13 @@ async function fetchApi<T>(
...options?.headers,
},
});
const data: ApiResponse<T> = await response.json();
if (!data.success) {
throw new Error(data.error || 'API request failed');
}
return data.data as T;
}
@ -47,7 +47,7 @@ export async function getJob(id: string): Promise<Job> {
}
export async function updateJob(
id: string,
id: string,
update: Partial<Job>
): Promise<Job> {
return fetchApi<Job>(`/jobs/${id}`, {
@ -105,6 +105,13 @@ export async function updateSettings(update: {
pipelineWebhookUrl?: string | null
jobCompleteWebhookUrl?: string | null
resumeProjects?: ResumeProjectsSettings | null
ukvisajobsMaxJobs?: number | null
searchTerms?: string[] | null
jobspyLocation?: string | null
jobspyResultsWanted?: number | null
jobspyHoursOld?: number | null
jobspyCountryIndeed?: string | null
jobspyLinkedinFetchDescription?: boolean | null
}): Promise<AppSettings> {
return fetchApi<AppSettings>('/settings', {
method: 'PATCH',

View File

@ -60,9 +60,10 @@ export const Header: React.FC<HeaderProps> = ({
gradcracker: "Gradcracker",
indeed: "Indeed",
linkedin: "LinkedIn",
ukvisajobs: "UK Visa Jobs",
};
const orderedSources: JobSource[] = ["gradcracker", "indeed", "linkedin"];
const orderedSources: JobSource[] = ["gradcracker", "indeed", "linkedin", "ukvisajobs"];
const toggleSource = (source: JobSource, checked: boolean) => {
const next = checked

View File

@ -42,6 +42,13 @@ export const SettingsPage: React.FC = () => {
const [pipelineWebhookUrlDraft, setPipelineWebhookUrlDraft] = useState("")
const [jobCompleteWebhookUrlDraft, setJobCompleteWebhookUrlDraft] = useState("")
const [resumeProjectsDraft, setResumeProjectsDraft] = useState<ResumeProjectsSettings | null>(null)
const [ukvisajobsMaxJobsDraft, setUkvisajobsMaxJobsDraft] = useState<number | null>(null)
const [searchTermsDraft, setSearchTermsDraft] = useState<string[] | null>(null)
const [jobspyLocationDraft, setJobspyLocationDraft] = useState<string | null>(null)
const [jobspyResultsWantedDraft, setJobspyResultsWantedDraft] = useState<number | null>(null)
const [jobspyHoursOldDraft, setJobspyHoursOldDraft] = useState<number | null>(null)
const [jobspyCountryIndeedDraft, setJobspyCountryIndeedDraft] = useState<string | null>(null)
const [jobspyLinkedinFetchDescriptionDraft, setJobspyLinkedinFetchDescriptionDraft] = useState<boolean | null>(null)
const [isSaving, setIsSaving] = useState(false)
const [isLoading, setIsLoading] = useState(true)
@ -57,6 +64,13 @@ export const SettingsPage: React.FC = () => {
setPipelineWebhookUrlDraft(data.overridePipelineWebhookUrl ?? "")
setJobCompleteWebhookUrlDraft(data.overrideJobCompleteWebhookUrl ?? "")
setResumeProjectsDraft(data.resumeProjects)
setUkvisajobsMaxJobsDraft(data.overrideUkvisajobsMaxJobs)
setSearchTermsDraft(data.overrideSearchTerms)
setJobspyLocationDraft(data.overrideJobspyLocation)
setJobspyResultsWantedDraft(data.overrideJobspyResultsWanted)
setJobspyHoursOldDraft(data.overrideJobspyHoursOld)
setJobspyCountryIndeedDraft(data.overrideJobspyCountryIndeed)
setJobspyLinkedinFetchDescriptionDraft(data.overrideJobspyLinkedinFetchDescription)
})
.catch((error) => {
const message = error instanceof Error ? error.message : "Failed to load settings"
@ -81,6 +95,27 @@ export const SettingsPage: React.FC = () => {
const effectiveJobCompleteWebhookUrl = settings?.jobCompleteWebhookUrl ?? ""
const defaultJobCompleteWebhookUrl = settings?.defaultJobCompleteWebhookUrl ?? ""
const overrideJobCompleteWebhookUrl = settings?.overrideJobCompleteWebhookUrl
const effectiveUkvisajobsMaxJobs = settings?.ukvisajobsMaxJobs ?? 50
const defaultUkvisajobsMaxJobs = settings?.defaultUkvisajobsMaxJobs ?? 50
const overrideUkvisajobsMaxJobs = settings?.overrideUkvisajobsMaxJobs
const effectiveSearchTerms = settings?.searchTerms ?? []
const defaultSearchTerms = settings?.defaultSearchTerms ?? []
const overrideSearchTerms = settings?.overrideSearchTerms
const effectiveJobspyLocation = settings?.jobspyLocation ?? ""
const defaultJobspyLocation = settings?.defaultJobspyLocation ?? ""
const overrideJobspyLocation = settings?.overrideJobspyLocation
const effectiveJobspyResultsWanted = settings?.jobspyResultsWanted ?? 200
const defaultJobspyResultsWanted = settings?.defaultJobspyResultsWanted ?? 200
const overrideJobspyResultsWanted = settings?.overrideJobspyResultsWanted
const effectiveJobspyHoursOld = settings?.jobspyHoursOld ?? 72
const defaultJobspyHoursOld = settings?.defaultJobspyHoursOld ?? 72
const overrideJobspyHoursOld = settings?.overrideJobspyHoursOld
const effectiveJobspyCountryIndeed = settings?.jobspyCountryIndeed ?? ""
const defaultJobspyCountryIndeed = settings?.defaultJobspyCountryIndeed ?? ""
const overrideJobspyCountryIndeed = settings?.overrideJobspyCountryIndeed
const effectiveJobspyLinkedinFetchDescription = settings?.jobspyLinkedinFetchDescription ?? true
const defaultJobspyLinkedinFetchDescription = settings?.defaultJobspyLinkedinFetchDescription ?? true
const overrideJobspyLinkedinFetchDescription = settings?.overrideJobspyLinkedinFetchDescription
const profileProjects = settings?.profileProjects ?? []
const maxProjectsTotal = profileProjects.length
const lockedCount = resumeProjectsDraft?.lockedProjectIds.length ?? 0
@ -93,11 +128,20 @@ export const SettingsPage: React.FC = () => {
const currentWebhook = (overridePipelineWebhookUrl ?? "").trim()
const nextJobCompleteWebhook = jobCompleteWebhookUrlDraft.trim()
const currentJobCompleteWebhook = (overrideJobCompleteWebhookUrl ?? "").trim()
const ukvisajobsChanged = ukvisajobsMaxJobsDraft !== (overrideUkvisajobsMaxJobs ?? null)
const searchTermsChanged = JSON.stringify(searchTermsDraft) !== JSON.stringify(overrideSearchTerms ?? null)
return (
next !== current ||
nextWebhook !== currentWebhook ||
nextJobCompleteWebhook !== currentJobCompleteWebhook ||
!resumeProjectsEqual(resumeProjectsDraft, settings.resumeProjects)
!resumeProjectsEqual(resumeProjectsDraft, settings.resumeProjects) ||
ukvisajobsChanged ||
searchTermsChanged ||
jobspyLocationDraft !== (overrideJobspyLocation ?? null) ||
jobspyResultsWantedDraft !== (overrideJobspyResultsWanted ?? null) ||
jobspyHoursOldDraft !== (overrideJobspyHoursOld ?? null) ||
jobspyCountryIndeedDraft !== (overrideJobspyCountryIndeed ?? null) ||
jobspyLinkedinFetchDescriptionDraft !== (overrideJobspyLinkedinFetchDescription ?? null)
)
}, [
settings,
@ -108,6 +152,20 @@ export const SettingsPage: React.FC = () => {
overridePipelineWebhookUrl,
overrideJobCompleteWebhookUrl,
resumeProjectsDraft,
ukvisajobsMaxJobsDraft,
overrideUkvisajobsMaxJobs,
searchTermsDraft,
overrideSearchTerms,
jobspyLocationDraft,
jobspyResultsWantedDraft,
jobspyHoursOldDraft,
jobspyCountryIndeedDraft,
jobspyLinkedinFetchDescriptionDraft,
overrideJobspyLocation,
overrideJobspyResultsWanted,
overrideJobspyHoursOld,
overrideJobspyCountryIndeed,
overrideJobspyLinkedinFetchDescription,
])
const handleSave = async () => {
@ -120,17 +178,38 @@ export const SettingsPage: React.FC = () => {
const resumeProjectsOverride = resumeProjectsEqual(resumeProjectsDraft, settings.defaultResumeProjects)
? null
: resumeProjectsDraft
const ukvisajobsMaxJobsOverride = ukvisajobsMaxJobsDraft === defaultUkvisajobsMaxJobs ? null : ukvisajobsMaxJobsDraft
const searchTermsOverride = arraysEqual(searchTermsDraft ?? [], defaultSearchTerms) ? null : searchTermsDraft
const jobspyLocationOverride = jobspyLocationDraft === defaultJobspyLocation ? null : jobspyLocationDraft
const jobspyResultsWantedOverride = jobspyResultsWantedDraft === defaultJobspyResultsWanted ? null : jobspyResultsWantedDraft
const jobspyHoursOldOverride = jobspyHoursOldDraft === defaultJobspyHoursOld ? null : jobspyHoursOldDraft
const jobspyCountryIndeedOverride = jobspyCountryIndeedDraft === defaultJobspyCountryIndeed ? null : jobspyCountryIndeedDraft
const jobspyLinkedinFetchDescriptionOverride = jobspyLinkedinFetchDescriptionDraft === defaultJobspyLinkedinFetchDescription ? null : jobspyLinkedinFetchDescriptionDraft
const updated = await api.updateSettings({
model: trimmed.length > 0 ? trimmed : null,
pipelineWebhookUrl: webhookTrimmed.length > 0 ? webhookTrimmed : null,
jobCompleteWebhookUrl: jobCompleteTrimmed.length > 0 ? jobCompleteTrimmed : null,
resumeProjects: resumeProjectsOverride,
ukvisajobsMaxJobs: ukvisajobsMaxJobsOverride,
searchTerms: searchTermsOverride,
jobspyLocation: jobspyLocationOverride,
jobspyResultsWanted: jobspyResultsWantedOverride,
jobspyHoursOld: jobspyHoursOldOverride,
jobspyCountryIndeed: jobspyCountryIndeedOverride,
jobspyLinkedinFetchDescription: jobspyLinkedinFetchDescriptionOverride,
})
setSettings(updated)
setModelDraft(updated.overrideModel ?? "")
setPipelineWebhookUrlDraft(updated.overridePipelineWebhookUrl ?? "")
setJobCompleteWebhookUrlDraft(updated.overrideJobCompleteWebhookUrl ?? "")
setResumeProjectsDraft(updated.resumeProjects)
setUkvisajobsMaxJobsDraft(updated.overrideUkvisajobsMaxJobs)
setSearchTermsDraft(updated.overrideSearchTerms)
setJobspyLocationDraft(updated.overrideJobspyLocation)
setJobspyResultsWantedDraft(updated.overrideJobspyResultsWanted)
setJobspyHoursOldDraft(updated.overrideJobspyHoursOld)
setJobspyCountryIndeedDraft(updated.overrideJobspyCountryIndeed)
setJobspyLinkedinFetchDescriptionDraft(updated.overrideJobspyLinkedinFetchDescription)
toast.success("Settings saved")
} catch (error) {
const message = error instanceof Error ? error.message : "Failed to save settings"
@ -148,12 +227,26 @@ export const SettingsPage: React.FC = () => {
pipelineWebhookUrl: null,
jobCompleteWebhookUrl: null,
resumeProjects: null,
ukvisajobsMaxJobs: null,
searchTerms: null,
jobspyLocation: null,
jobspyResultsWanted: null,
jobspyHoursOld: null,
jobspyCountryIndeed: null,
jobspyLinkedinFetchDescription: null,
})
setSettings(updated)
setModelDraft("")
setPipelineWebhookUrlDraft("")
setJobCompleteWebhookUrlDraft("")
setResumeProjectsDraft(updated.resumeProjects)
setUkvisajobsMaxJobsDraft(null)
setSearchTermsDraft(null)
setJobspyLocationDraft(null)
setJobspyResultsWantedDraft(null)
setJobspyHoursOldDraft(null)
setJobspyCountryIndeedDraft(null)
setJobspyLinkedinFetchDescriptionDraft(null)
toast.success("Reset to default")
} catch (error) {
const message = error instanceof Error ? error.message : "Failed to reset settings"
@ -272,6 +365,220 @@ export const SettingsPage: React.FC = () => {
</CardContent>
</Card>
<Card>
<CardHeader>
<CardTitle className="text-base">UKVisaJobs Extractor</CardTitle>
</CardHeader>
<CardContent className="space-y-4">
<div className="space-y-2">
<div className="text-sm font-medium">Max jobs to fetch</div>
<Input
type="number"
inputMode="numeric"
min={1}
max={200}
value={ukvisajobsMaxJobsDraft ?? defaultUkvisajobsMaxJobs}
onChange={(event) => {
const value = parseInt(event.target.value, 10)
if (Number.isNaN(value)) {
setUkvisajobsMaxJobsDraft(null)
} else {
setUkvisajobsMaxJobsDraft(Math.min(200, Math.max(1, value)))
}
}}
disabled={isLoading || isSaving}
/>
<div className="text-xs text-muted-foreground">
Maximum number of jobs to fetch from UKVisaJobs per pipeline run. Range: 1-200.
</div>
</div>
<Separator />
<div className="grid gap-2 text-sm sm:grid-cols-2">
<div>
<div className="text-xs text-muted-foreground">Effective</div>
<div className="break-words font-mono text-xs">{effectiveUkvisajobsMaxJobs}</div>
</div>
<div>
<div className="text-xs text-muted-foreground">Default</div>
<div className="break-words font-mono text-xs">{defaultUkvisajobsMaxJobs}</div>
</div>
</div>
</CardContent>
</Card>
<Card>
<CardHeader>
<CardTitle className="text-base">Search Terms</CardTitle>
</CardHeader>
<CardContent className="space-y-4">
<div className="space-y-2">
<div className="text-sm font-medium">Global search terms</div>
<textarea
className="flex min-h-[80px] w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50"
value={searchTermsDraft ? searchTermsDraft.join('\n') : (defaultSearchTerms ?? []).join('\n')}
onChange={(event) => {
const text = event.target.value
const terms = text.split('\n') // Don't filter here to allow empty lines while typing
setSearchTermsDraft(terms)
}}
onBlur={() => {
// Clean up on blur
if (searchTermsDraft) {
setSearchTermsDraft(searchTermsDraft.map(t => t.trim()).filter(Boolean))
}
}}
placeholder="e.g. web developer"
disabled={isLoading || isSaving}
rows={5}
/>
<div className="text-xs text-muted-foreground">
One term per line. Applies to UKVisaJobs and other supported extractors.
</div>
</div>
<Separator />
<div className="grid gap-2 text-sm sm:grid-cols-2">
<div>
<div className="text-xs text-muted-foreground">Effective</div>
<div className="break-words font-mono text-xs">{(effectiveSearchTerms || []).join(', ') || "—"}</div>
</div>
<div>
<div className="text-xs text-muted-foreground">Default (env)</div>
<div className="break-words font-mono text-xs">{(defaultSearchTerms || []).join(', ') || "—"}</div>
</div>
</div>
</CardContent>
</Card>
<Card>
<CardHeader>
<CardTitle className="text-base">JobSpy Scraper</CardTitle>
</CardHeader>
<CardContent className="space-y-6">
<div className="grid gap-6 md:grid-cols-2">
<div className="space-y-2">
<div className="text-sm font-medium">Location</div>
<Input
value={jobspyLocationDraft ?? defaultJobspyLocation}
onChange={(event) => setJobspyLocationDraft(event.target.value)}
placeholder={defaultJobspyLocation || "UK"}
disabled={isLoading || isSaving}
/>
<div className="text-xs text-muted-foreground">
Location to search for jobs (e.g. "UK", "London", "Remote").
</div>
<div className="flex gap-2 text-xs text-muted-foreground">
<span>Effective: {effectiveJobspyLocation || "—"}</span>
<span>Default: {defaultJobspyLocation || "—"}</span>
</div>
</div>
<div className="space-y-2">
<div className="text-sm font-medium">Results Wanted</div>
<Input
type="number"
inputMode="numeric"
min={1}
max={500}
value={jobspyResultsWantedDraft ?? defaultJobspyResultsWanted}
onChange={(event) => {
const value = parseInt(event.target.value, 10)
if (Number.isNaN(value)) {
setJobspyResultsWantedDraft(null)
} else {
setJobspyResultsWantedDraft(Math.min(500, Math.max(1, value)))
}
}}
disabled={isLoading || isSaving}
/>
<div className="text-xs text-muted-foreground">
Number of results to fetch per term per site. Max 500.
</div>
<div className="flex gap-2 text-xs text-muted-foreground">
<span>Effective: {effectiveJobspyResultsWanted}</span>
<span>Default: {defaultJobspyResultsWanted}</span>
</div>
</div>
<div className="space-y-2">
<div className="text-sm font-medium">Hours Old</div>
<Input
type="number"
inputMode="numeric"
min={1}
max={168}
value={jobspyHoursOldDraft ?? defaultJobspyHoursOld}
onChange={(event) => {
const value = parseInt(event.target.value, 10)
if (Number.isNaN(value)) {
setJobspyHoursOldDraft(null)
} else {
setJobspyHoursOldDraft(Math.min(168, Math.max(1, value)))
}
}}
disabled={isLoading || isSaving}
/>
<div className="text-xs text-muted-foreground">
Max age of jobs in hours (e.g. 72 for 3 days).
</div>
<div className="flex gap-2 text-xs text-muted-foreground">
<span>Effective: {effectiveJobspyHoursOld}h</span>
<span>Default: {defaultJobspyHoursOld}h</span>
</div>
</div>
<div className="space-y-2">
<div className="text-sm font-medium">Indeed Country</div>
<Input
value={jobspyCountryIndeedDraft ?? defaultJobspyCountryIndeed}
onChange={(event) => setJobspyCountryIndeedDraft(event.target.value)}
placeholder={defaultJobspyCountryIndeed || "UK"}
disabled={isLoading || isSaving}
/>
<div className="text-xs text-muted-foreground">
Country domain for Indeed (e.g. "UK" for indeed.co.uk).
</div>
<div className="flex gap-2 text-xs text-muted-foreground">
<span>Effective: {effectiveJobspyCountryIndeed || "—"}</span>
<span>Default: {defaultJobspyCountryIndeed || "—"}</span>
</div>
</div>
</div>
<Separator />
<div className="flex items-center space-x-2">
<Checkbox
id="linkedin-desc"
checked={jobspyLinkedinFetchDescriptionDraft ?? defaultJobspyLinkedinFetchDescription}
onCheckedChange={(checked) => setJobspyLinkedinFetchDescriptionDraft(!!checked)}
disabled={isLoading || isSaving}
/>
<div className="grid gap-1.5 leading-none">
<label
htmlFor="linkedin-desc"
className="text-sm font-medium leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70"
>
Fetch LinkedIn Description
</label>
<p className="text-xs text-muted-foreground">
If enabled, JobSpy will make extra requests to fetch full descriptions. Slower but better data.
</p>
<div className="flex gap-2 text-xs text-muted-foreground">
<span>Effective: {effectiveJobspyLinkedinFetchDescription ? "Yes" : "No"}</span>
<span>Default: {defaultJobspyLinkedinFetchDescription ? "Yes" : "No"}</span>
</div>
</div>
</div>
</CardContent>
</Card>
<Card>
<CardHeader>
<CardTitle className="text-base">Resume Projects</CardTitle>

View File

@ -60,10 +60,10 @@ apiRouter.get('/jobs', async (req: Request, res: Response) => {
try {
const statusFilter = req.query.status as string | undefined;
const statuses = statusFilter?.split(',').filter(Boolean) as JobStatus[] | undefined;
const jobs = await jobsRepo.getAllJobs(statuses);
const stats = await jobsRepo.getJobStats();
const response: ApiResponse<JobsListResponse> = {
success: true,
data: {
@ -72,7 +72,7 @@ apiRouter.get('/jobs', async (req: Request, res: Response) => {
byStatus: stats,
},
};
res.json(response);
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
@ -86,11 +86,11 @@ apiRouter.get('/jobs', async (req: Request, res: Response) => {
apiRouter.get('/jobs/:id', async (req: Request, res: Response) => {
try {
const job = await jobsRepo.getJobById(req.params.id);
if (!job) {
return res.status(404).json({ success: false, error: 'Job not found' });
}
res.json({ success: true, data: job });
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
@ -113,11 +113,11 @@ apiRouter.patch('/jobs/:id', async (req: Request, res: Response) => {
try {
const input = updateJobSchema.parse(req.body);
const job = await jobsRepo.updateJob(req.params.id, input);
if (!job) {
return res.status(404).json({ success: false, error: 'Job not found' });
}
res.json({ success: true, data: job });
} catch (error) {
if (error instanceof z.ZodError) {
@ -137,11 +137,11 @@ apiRouter.post('/jobs/:id/process', async (req: Request, res: Response) => {
const force = forceRaw === '1' || forceRaw === 'true';
const result = await processJob(req.params.id, { force });
if (!result.success) {
return res.status(400).json({ success: false, error: result.error });
}
const job = await jobsRepo.getJobById(req.params.id);
res.json({ success: true, data: job });
} catch (error) {
@ -156,13 +156,13 @@ apiRouter.post('/jobs/:id/process', async (req: Request, res: Response) => {
apiRouter.post('/jobs/:id/apply', async (req: Request, res: Response) => {
try {
const job = await jobsRepo.getJobById(req.params.id);
if (!job) {
return res.status(404).json({ success: false, error: 'Job not found' });
}
const appliedAt = new Date().toISOString();
// Sync to Notion
const notionResult = await createNotionEntry({
id: job.id,
@ -175,7 +175,7 @@ apiRouter.post('/jobs/:id/apply', async (req: Request, res: Response) => {
pdfPath: job.pdfPath,
appliedAt,
});
// Update job status
const updatedJob = await jobsRepo.updateJob(job.id, {
status: 'applied',
@ -186,7 +186,7 @@ apiRouter.post('/jobs/:id/apply', async (req: Request, res: Response) => {
if (updatedJob) {
notifyJobCompleteWebhook(updatedJob).catch(console.warn)
}
res.json({ success: true, data: updatedJob });
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
@ -200,11 +200,11 @@ apiRouter.post('/jobs/:id/apply', async (req: Request, res: Response) => {
apiRouter.post('/jobs/:id/reject', async (req: Request, res: Response) => {
try {
const job = await jobsRepo.updateJob(req.params.id, { status: 'rejected' });
if (!job) {
return res.status(404).json({ success: false, error: 'Job not found' });
}
res.json({ success: true, data: job });
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
@ -238,6 +238,43 @@ apiRouter.get('/settings', async (_req: Request, res: Response) => {
const overrideResumeProjectsRaw = await settingsRepo.getSetting('resumeProjects');
const resumeProjectsData = resolveResumeProjectsSettings({ catalog, overrideRaw: overrideResumeProjectsRaw });
const overrideUkvisajobsMaxJobsRaw = await settingsRepo.getSetting('ukvisajobsMaxJobs');
const defaultUkvisajobsMaxJobs = 50;
const overrideUkvisajobsMaxJobs = overrideUkvisajobsMaxJobsRaw ? parseInt(overrideUkvisajobsMaxJobsRaw, 10) : null;
const ukvisajobsMaxJobs = overrideUkvisajobsMaxJobs ?? defaultUkvisajobsMaxJobs;
const overrideSearchTermsRaw = await settingsRepo.getSetting('searchTerms');
const defaultSearchTermsEnv = process.env.JOBSPY_SEARCH_TERMS || 'web developer';
const defaultSearchTerms = defaultSearchTermsEnv.split('|').map(s => s.trim()).filter(Boolean);
const overrideSearchTerms = overrideSearchTermsRaw ? JSON.parse(overrideSearchTermsRaw) as string[] : null;
const searchTerms = overrideSearchTerms ?? defaultSearchTerms;
// JobSpy settings
const overrideJobspyLocation = await settingsRepo.getSetting('jobspyLocation');
const defaultJobspyLocation = process.env.JOBSPY_LOCATION || 'UK';
const jobspyLocation = overrideJobspyLocation || defaultJobspyLocation;
const overrideJobspyResultsWantedRaw = await settingsRepo.getSetting('jobspyResultsWanted');
const defaultJobspyResultsWanted = parseInt(process.env.JOBSPY_RESULTS_WANTED || '200', 10);
const overrideJobspyResultsWanted = overrideJobspyResultsWantedRaw ? parseInt(overrideJobspyResultsWantedRaw, 10) : null;
const jobspyResultsWanted = overrideJobspyResultsWanted ?? defaultJobspyResultsWanted;
const overrideJobspyHoursOldRaw = await settingsRepo.getSetting('jobspyHoursOld');
const defaultJobspyHoursOld = parseInt(process.env.JOBSPY_HOURS_OLD || '72', 10);
const overrideJobspyHoursOld = overrideJobspyHoursOldRaw ? parseInt(overrideJobspyHoursOldRaw, 10) : null;
const jobspyHoursOld = overrideJobspyHoursOld ?? defaultJobspyHoursOld;
const overrideJobspyCountryIndeed = await settingsRepo.getSetting('jobspyCountryIndeed');
const defaultJobspyCountryIndeed = process.env.JOBSPY_COUNTRY_INDEED || 'UK';
const jobspyCountryIndeed = overrideJobspyCountryIndeed || defaultJobspyCountryIndeed;
const overrideJobspyLinkedinFetchDescriptionRaw = await settingsRepo.getSetting('jobspyLinkedinFetchDescription');
const defaultJobspyLinkedinFetchDescription = (process.env.JOBSPY_LINKEDIN_FETCH_DESCRIPTION || '1') === '1';
const overrideJobspyLinkedinFetchDescription = overrideJobspyLinkedinFetchDescriptionRaw
? overrideJobspyLinkedinFetchDescriptionRaw === 'true' || overrideJobspyLinkedinFetchDescriptionRaw === '1'
: null;
const jobspyLinkedinFetchDescription = overrideJobspyLinkedinFetchDescription ?? defaultJobspyLinkedinFetchDescription;
res.json({
success: true,
data: {
@ -251,6 +288,27 @@ apiRouter.get('/settings', async (_req: Request, res: Response) => {
defaultJobCompleteWebhookUrl,
overrideJobCompleteWebhookUrl,
...resumeProjectsData,
ukvisajobsMaxJobs,
defaultUkvisajobsMaxJobs,
overrideUkvisajobsMaxJobs,
searchTerms,
defaultSearchTerms,
overrideSearchTerms,
jobspyLocation,
defaultJobspyLocation,
overrideJobspyLocation,
jobspyResultsWanted,
defaultJobspyResultsWanted,
overrideJobspyResultsWanted,
jobspyHoursOld,
defaultJobspyHoursOld,
overrideJobspyHoursOld,
jobspyCountryIndeed,
defaultJobspyCountryIndeed,
overrideJobspyCountryIndeed,
jobspyLinkedinFetchDescription,
defaultJobspyLinkedinFetchDescription,
overrideJobspyLinkedinFetchDescription,
},
});
} catch (error) {
@ -268,6 +326,13 @@ const updateSettingsSchema = z.object({
lockedProjectIds: z.array(z.string().trim().min(1)).max(200),
aiSelectableProjectIds: z.array(z.string().trim().min(1)).max(200),
}).nullable().optional(),
ukvisajobsMaxJobs: z.number().int().min(1).max(200).nullable().optional(),
searchTerms: z.array(z.string().trim().min(1).max(200)).max(50).nullable().optional(),
jobspyLocation: z.string().trim().min(1).max(100).nullable().optional(),
jobspyResultsWanted: z.number().int().min(1).max(500).nullable().optional(),
jobspyHoursOld: z.number().int().min(1).max(168).nullable().optional(),
jobspyCountryIndeed: z.string().trim().min(1).max(100).nullable().optional(),
jobspyLinkedinFetchDescription: z.boolean().nullable().optional(),
});
/**
@ -306,6 +371,41 @@ apiRouter.patch('/settings', async (req: Request, res: Response) => {
}
}
if ('ukvisajobsMaxJobs' in input) {
const ukvisajobsMaxJobs = input.ukvisajobsMaxJobs ?? null;
await settingsRepo.setSetting('ukvisajobsMaxJobs', ukvisajobsMaxJobs !== null ? String(ukvisajobsMaxJobs) : null);
}
if ('searchTerms' in input) {
const searchTerms = input.searchTerms ?? null;
await settingsRepo.setSetting('searchTerms', searchTerms !== null ? JSON.stringify(searchTerms) : null);
}
if ('jobspyLocation' in input) {
const value = input.jobspyLocation ?? null;
await settingsRepo.setSetting('jobspyLocation', value);
}
if ('jobspyResultsWanted' in input) {
const value = input.jobspyResultsWanted ?? null;
await settingsRepo.setSetting('jobspyResultsWanted', value !== null ? String(value) : null);
}
if ('jobspyHoursOld' in input) {
const value = input.jobspyHoursOld ?? null;
await settingsRepo.setSetting('jobspyHoursOld', value !== null ? String(value) : null);
}
if ('jobspyCountryIndeed' in input) {
const value = input.jobspyCountryIndeed ?? null;
await settingsRepo.setSetting('jobspyCountryIndeed', value);
}
if ('jobspyLinkedinFetchDescription' in input) {
const value = input.jobspyLinkedinFetchDescription ?? null;
await settingsRepo.setSetting('jobspyLinkedinFetchDescription', value !== null ? (value ? '1' : '0') : null);
}
const overrideModel = await settingsRepo.getSetting('model');
const defaultModel = process.env.MODEL || 'openai/gpt-4o-mini';
const model = overrideModel || defaultModel;
@ -323,6 +423,44 @@ apiRouter.patch('/settings', async (req: Request, res: Response) => {
const overrideResumeProjectsRaw = await settingsRepo.getSetting('resumeProjects');
const resumeProjectsData = resolveResumeProjectsSettings({ catalog, overrideRaw: overrideResumeProjectsRaw });
const overrideUkvisajobsMaxJobsRaw = await settingsRepo.getSetting('ukvisajobsMaxJobs');
const defaultUkvisajobsMaxJobs = 50;
const overrideUkvisajobsMaxJobs = overrideUkvisajobsMaxJobsRaw ? parseInt(overrideUkvisajobsMaxJobsRaw, 10) : null;
const ukvisajobsMaxJobs = overrideUkvisajobsMaxJobs ?? defaultUkvisajobsMaxJobs;
// Search terms - stored as JSON array, default from env var (pipe-separated)
const overrideSearchTermsRaw = await settingsRepo.getSetting('searchTerms');
const defaultSearchTermsEnv = process.env.JOBSPY_SEARCH_TERMS || 'web developer';
const defaultSearchTerms = defaultSearchTermsEnv.split('|').map(s => s.trim()).filter(Boolean);
const overrideSearchTerms = overrideSearchTermsRaw ? JSON.parse(overrideSearchTermsRaw) as string[] : null;
const searchTerms = overrideSearchTerms ?? defaultSearchTerms;
// JobSpy settings (re-fetch to update response)
const overrideJobspyLocation = await settingsRepo.getSetting('jobspyLocation');
const defaultJobspyLocation = process.env.JOBSPY_LOCATION || 'UK';
const jobspyLocation = overrideJobspyLocation || defaultJobspyLocation;
const overrideJobspyResultsWantedRaw = await settingsRepo.getSetting('jobspyResultsWanted');
const defaultJobspyResultsWanted = parseInt(process.env.JOBSPY_RESULTS_WANTED || '200', 10);
const overrideJobspyResultsWanted = overrideJobspyResultsWantedRaw ? parseInt(overrideJobspyResultsWantedRaw, 10) : null;
const jobspyResultsWanted = overrideJobspyResultsWanted ?? defaultJobspyResultsWanted;
const overrideJobspyHoursOldRaw = await settingsRepo.getSetting('jobspyHoursOld');
const defaultJobspyHoursOld = parseInt(process.env.JOBSPY_HOURS_OLD || '72', 10);
const overrideJobspyHoursOld = overrideJobspyHoursOldRaw ? parseInt(overrideJobspyHoursOldRaw, 10) : null;
const jobspyHoursOld = overrideJobspyHoursOld ?? defaultJobspyHoursOld;
const overrideJobspyCountryIndeed = await settingsRepo.getSetting('jobspyCountryIndeed');
const defaultJobspyCountryIndeed = process.env.JOBSPY_COUNTRY_INDEED || 'UK';
const jobspyCountryIndeed = overrideJobspyCountryIndeed || defaultJobspyCountryIndeed;
const overrideJobspyLinkedinFetchDescriptionRaw = await settingsRepo.getSetting('jobspyLinkedinFetchDescription');
const defaultJobspyLinkedinFetchDescription = (process.env.JOBSPY_LINKEDIN_FETCH_DESCRIPTION || '1') === '1';
const overrideJobspyLinkedinFetchDescription = overrideJobspyLinkedinFetchDescriptionRaw
? overrideJobspyLinkedinFetchDescriptionRaw === 'true' || overrideJobspyLinkedinFetchDescriptionRaw === '1'
: null;
const jobspyLinkedinFetchDescription = overrideJobspyLinkedinFetchDescription ?? defaultJobspyLinkedinFetchDescription;
res.json({
success: true,
data: {
@ -336,6 +474,27 @@ apiRouter.patch('/settings', async (req: Request, res: Response) => {
defaultJobCompleteWebhookUrl,
overrideJobCompleteWebhookUrl,
...resumeProjectsData,
ukvisajobsMaxJobs,
defaultUkvisajobsMaxJobs,
overrideUkvisajobsMaxJobs,
searchTerms,
defaultSearchTerms,
overrideSearchTerms,
jobspyLocation,
defaultJobspyLocation,
overrideJobspyLocation,
jobspyResultsWanted,
defaultJobspyResultsWanted,
overrideJobspyResultsWanted,
jobspyHoursOld,
defaultJobspyHoursOld,
overrideJobspyHoursOld,
jobspyCountryIndeed,
defaultJobspyCountryIndeed,
overrideJobspyCountryIndeed,
jobspyLinkedinFetchDescription,
defaultJobspyLinkedinFetchDescription,
overrideJobspyLinkedinFetchDescription,
},
});
} catch (error) {
@ -351,7 +510,7 @@ apiRouter.get('/pipeline/status', async (req: Request, res: Response) => {
try {
const { isRunning } = getPipelineStatus();
const lastRun = await pipelineRepo.getLatestPipelineRun();
const response: ApiResponse<PipelineStatusResponse> = {
success: true,
data: {
@ -360,7 +519,7 @@ apiRouter.get('/pipeline/status', async (req: Request, res: Response) => {
nextScheduledRun: null, // Would come from n8n
},
};
res.json(response);
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
@ -377,20 +536,20 @@ apiRouter.get('/pipeline/progress', (req: Request, res: Response) => {
res.setHeader('Cache-Control', 'no-cache');
res.setHeader('Connection', 'keep-alive');
res.setHeader('X-Accel-Buffering', 'no'); // Disable Nginx buffering
// Send initial progress
const sendProgress = (data: unknown) => {
res.write(`data: ${JSON.stringify(data)}\n\n`);
};
// Subscribe to progress updates
const unsubscribe = subscribeToProgress(sendProgress);
// Send heartbeat every 30 seconds to keep connection alive
const heartbeat = setInterval(() => {
res.write(': heartbeat\n\n');
}, 30000);
// Cleanup on close
req.on('close', () => {
clearInterval(heartbeat);
@ -417,19 +576,19 @@ apiRouter.get('/pipeline/runs', async (req: Request, res: Response) => {
const runPipelineSchema = z.object({
topN: z.number().min(1).max(50).optional(),
minSuitabilityScore: z.number().min(0).max(100).optional(),
sources: z.array(z.enum(['gradcracker', 'indeed', 'linkedin'])).min(1).optional(),
sources: z.array(z.enum(['gradcracker', 'indeed', 'linkedin', 'ukvisajobs'])).min(1).optional(),
});
apiRouter.post('/pipeline/run', async (req: Request, res: Response) => {
try {
const config = runPipelineSchema.parse(req.body);
// Start pipeline in background
runPipeline(config).catch(console.error);
res.json({
success: true,
data: { message: 'Pipeline started' }
res.json({
success: true,
data: { message: 'Pipeline started' }
});
} catch (error) {
if (error instanceof z.ZodError) {
@ -451,21 +610,21 @@ apiRouter.post('/webhook/trigger', async (req: Request, res: Response) => {
// Optional: Add authentication check
const authHeader = req.headers.authorization;
const expectedToken = process.env.WEBHOOK_SECRET;
if (expectedToken && authHeader !== `Bearer ${expectedToken}`) {
return res.status(401).json({ success: false, error: 'Unauthorized' });
}
try {
// Start pipeline in background
runPipeline().catch(console.error);
res.json({
success: true,
data: {
res.json({
success: true,
data: {
message: 'Pipeline triggered',
triggeredAt: new Date().toISOString(),
}
}
});
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
@ -483,14 +642,14 @@ apiRouter.post('/webhook/trigger', async (req: Request, res: Response) => {
apiRouter.delete('/database', async (req: Request, res: Response) => {
try {
const result = clearDatabase();
res.json({
success: true,
data: {
res.json({
success: true,
data: {
message: 'Database cleared',
jobsDeleted: result.jobsDeleted,
runsDeleted: result.runsDeleted,
}
}
});
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';

View File

@ -7,9 +7,9 @@ import { sql } from 'drizzle-orm';
export const jobs = sqliteTable('jobs', {
id: text('id').primaryKey(),
// From crawler
source: text('source', { enum: ['gradcracker', 'indeed', 'linkedin'] }).notNull().default('gradcracker'),
source: text('source', { enum: ['gradcracker', 'indeed', 'linkedin', 'ukvisajobs'] }).notNull().default('gradcracker'),
sourceJobId: text('source_job_id'),
jobUrlDirect: text('job_url_direct'),
datePosted: text('date_posted'),
@ -51,17 +51,17 @@ export const jobs = sqliteTable('jobs', {
companyReviewsCount: integer('company_reviews_count'),
vacancyCount: integer('vacancy_count'),
workFromHomeType: text('work_from_home_type'),
// Orchestrator enrichments
status: text('status', {
enum: ['discovered', 'processing', 'ready', 'applied', 'rejected', 'expired']
status: text('status', {
enum: ['discovered', 'processing', 'ready', 'applied', 'rejected', 'expired']
}).notNull().default('discovered'),
suitabilityScore: real('suitability_score'),
suitabilityReason: text('suitability_reason'),
tailoredSummary: text('tailored_summary'),
pdfPath: text('pdf_path'),
notionPageId: text('notion_page_id'),
// Timestamps
discoveredAt: text('discovered_at').notNull().default(sql`(datetime('now'))`),
processedAt: text('processed_at'),

View File

@ -12,6 +12,7 @@ import { join, dirname } from 'path';
import { fileURLToPath } from 'url';
import { runCrawler } from '../services/crawler.js';
import { runJobSpy } from '../services/jobspy.js';
import { runUkVisaJobs } from '../services/ukvisajobs.js';
import { scoreJobSuitability } from '../services/scorer.js';
import { generateSummary } from '../services/summary.js';
import { generatePdf } from '../services/pdf.js';
@ -27,7 +28,7 @@ const DEFAULT_PROFILE_PATH = join(__dirname, '../../../../resume-generator/base.
const DEFAULT_CONFIG: PipelineConfig = {
topN: 10,
minSuitabilityScore: 50,
sources: ['gradcracker', 'indeed', 'linkedin'],
sources: ['gradcracker', 'indeed', 'linkedin', 'ukvisajobs'],
profilePath: DEFAULT_PROFILE_PATH,
outputDir: join(__dirname, '../../../data/pdfs'),
};
@ -88,54 +89,41 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
error: 'Pipeline is already running',
};
}
isPipelineRunning = true;
resetProgress();
const mergedConfig = { ...DEFAULT_CONFIG, ...config };
// Create pipeline run record
const pipelineRun = await pipelineRepo.createPipelineRun();
console.log('🚀 Starting job pipeline...');
console.log(` Config: topN=${mergedConfig.topN}, minScore=${mergedConfig.minSuitabilityScore} (manual processing)`);
try {
// Step 1: Load profile
console.log('\n📋 Loading profile...');
const profile = await loadProfile(mergedConfig.profilePath);
// Step 2: Run crawler
console.log('\n🕷 Running crawler...');
progressHelpers.startCrawling();
const existingJobUrls = await jobsRepo.getAllJobUrls();
const discoveredJobs: CreateJobInput[] = [];
const sourceErrors: string[] = [];
if (mergedConfig.sources.includes('gradcracker')) {
const crawlerResult = await runCrawler({
existingJobUrls,
onProgress: (update) => {
progressHelpers.crawlingUpdate({
listPagesProcessed: update.listPagesProcessed,
listPagesTotal: update.listPagesTotal,
jobCardsFound: update.jobCardsFound,
jobPagesEnqueued: update.jobPagesEnqueued,
jobPagesSkipped: update.jobPagesSkipped,
jobPagesProcessed: update.jobPagesProcessed,
phase: update.phase,
currentUrl: update.currentUrl,
});
},
});
// Read search terms setting
const searchTermsSetting = await settingsRepo.getSetting('searchTerms');
let searchTerms: string[] = [];
if (!crawlerResult.success) {
sourceErrors.push(`gradcracker: ${crawlerResult.error ?? 'unknown error'}`);
} else {
discoveredJobs.push(...crawlerResult.jobs);
}
if (searchTermsSetting) {
searchTerms = JSON.parse(searchTermsSetting) as string[];
} else {
// Default from env var
const defaultSearchTermsEnv = process.env.JOBSPY_SEARCH_TERMS || 'web developer';
searchTerms = defaultSearchTermsEnv.split('|').map(s => s.trim()).filter(Boolean);
}
// Run JobSpy (Indeed/LinkedIn) if selected
const jobSpySites = mergedConfig.sources.filter(
(s): s is 'indeed' | 'linkedin' => s === 'indeed' || s === 'linkedin'
);
@ -146,7 +134,21 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
detail: `JobSpy: scraping ${jobSpySites.join(', ')}...`,
});
const jobSpyResult = await runJobSpy({ sites: jobSpySites });
const jobspyLocationSetting = await settingsRepo.getSetting('jobspyLocation');
const jobspyResultsWantedSetting = await settingsRepo.getSetting('jobspyResultsWanted');
const jobspyHoursOldSetting = await settingsRepo.getSetting('jobspyHoursOld');
const jobspyCountryIndeedSetting = await settingsRepo.getSetting('jobspyCountryIndeed');
const jobspyLinkedinFetchDescriptionSetting = await settingsRepo.getSetting('jobspyLinkedinFetchDescription');
const jobSpyResult = await runJobSpy({
sites: jobSpySites,
searchTerms,
location: jobspyLocationSetting ?? undefined,
resultsWanted: jobspyResultsWantedSetting ? parseInt(jobspyResultsWantedSetting, 10) : undefined,
hoursOld: jobspyHoursOldSetting ? parseInt(jobspyHoursOldSetting, 10) : undefined,
countryIndeed: jobspyCountryIndeedSetting ?? undefined,
linkedinFetchDescription: jobspyLinkedinFetchDescriptionSetting !== null ? jobspyLinkedinFetchDescriptionSetting === '1' : undefined,
});
if (!jobSpyResult.success) {
sourceErrors.push(`jobspy: ${jobSpyResult.error ?? 'unknown error'}`);
} else {
@ -154,6 +156,61 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
}
}
// Run Gradcracker crawler if selected
if (mergedConfig.sources.includes('gradcracker')) {
updateProgress({
step: 'crawling',
detail: 'Gradcracker: scraping...',
});
// Pass existing URLs to avoid clicking "Apply" on jobs we already have
const existingJobUrls = await jobsRepo.getAllJobUrls();
const crawlerResult = await runCrawler({
existingJobUrls,
searchTerms,
onProgress: (progress) => {
// Calculate overall progress based on list pages processed vs total
// This is rough but better than nothing
if (progress.listPagesTotal && progress.listPagesTotal > 0) {
const percent = Math.round((progress.listPagesProcessed ?? 0) / progress.listPagesTotal * 100);
updateProgress({
step: 'crawling',
detail: `Gradcracker: ${percent}% (scan ${progress.listPagesProcessed}/${progress.listPagesTotal}, found ${progress.jobCardsFound})`,
});
}
},
});
if (!crawlerResult.success) {
sourceErrors.push(`gradcracker: ${crawlerResult.error ?? 'unknown error'}`);
} else {
discoveredJobs.push(...crawlerResult.jobs);
}
}
// Run UKVisaJobs extractor if selected
if (mergedConfig.sources.includes('ukvisajobs')) {
updateProgress({
step: 'crawling',
detail: 'UKVisaJobs: scraping visa-sponsoring jobs...',
});
// Read max jobs setting from database (default to 50 if not set)
const ukvisajobsMaxJobsSetting = await settingsRepo.getSetting('ukvisajobsMaxJobs');
const ukvisajobsMaxJobs = ukvisajobsMaxJobsSetting ? parseInt(ukvisajobsMaxJobsSetting, 10) : 50;
const ukVisaResult = await runUkVisaJobs({
maxJobs: ukvisajobsMaxJobs,
searchTerms,
});
if (!ukVisaResult.success) {
sourceErrors.push(`ukvisajobs: ${ukVisaResult.error ?? 'unknown error'}`);
} else {
discoveredJobs.push(...ukVisaResult.jobs);
}
}
if (discoveredJobs.length === 0 && sourceErrors.length > 0) {
throw new Error(`All sources failed: ${sourceErrors.join('; ')}`);
}
@ -163,18 +220,18 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
}
progressHelpers.crawlingComplete(discoveredJobs.length);
// Step 3: Import discovered jobs
console.log('\n💾 Importing jobs to database...');
const { created, skipped } = await jobsRepo.bulkCreateJobs(discoveredJobs);
console.log(` Created: ${created}, Skipped (duplicates): ${skipped}`);
progressHelpers.importComplete(created, skipped);
await pipelineRepo.updatePipelineRun(pipelineRun.id, {
jobsDiscovered: created,
});
// Step 4: Score all discovered jobs missing a score
console.log('\n🎯 Scoring jobs for suitability...');
const unprocessedJobs = await jobsRepo.getUnscoredDiscoveredJobs();
@ -187,7 +244,7 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
totalToProcess: 0,
currentJob: undefined,
});
// Score jobs with progress updates
const scoredJobs: Array<Job & { suitabilityScore: number; suitabilityReason: string }> = [];
for (let i = 0; i < unprocessedJobs.length; i++) {
@ -217,46 +274,83 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
suitabilityReason: reason,
});
}
progressHelpers.scoringComplete(scoredJobs.length);
console.log(`\n📊 Scored ${scoredJobs.length} jobs. Ready for manual processing.`);
console.log(`\n📊 Scored ${scoredJobs.length} jobs.`);
// Step 5: Auto-process top jobs
console.log('\n🏭 Auto-processing top jobs...');
const jobsToProcess = scoredJobs
.filter(j => (j.suitabilityScore ?? 0) >= mergedConfig.minSuitabilityScore)
.sort((a, b) => (b.suitabilityScore ?? 0) - (a.suitabilityScore ?? 0))
.slice(0, mergedConfig.topN);
console.log(` Found ${jobsToProcess.length} candidates (score >= ${mergedConfig.minSuitabilityScore}, top ${mergedConfig.topN})`);
let processedCount = 0;
if (jobsToProcess.length > 0) {
updateProgress({
step: 'processing',
jobsProcessed: 0,
totalToProcess: jobsToProcess.length,
});
for (let i = 0; i < jobsToProcess.length; i++) {
const job = jobsToProcess[i];
progressHelpers.processingJob(i + 1, jobsToProcess.length, job);
// Process job (Generate Summary + PDF)
// We catch errors here to ensure one failure doesn't stop the whole batch
const result = await processJob(job.id);
if (result.success) {
processedCount++;
} else {
console.warn(` ⚠️ Failed to process job ${job.id}: ${result.error}`);
}
progressHelpers.jobComplete(i + 1, jobsToProcess.length);
}
}
// Update pipeline run as completed
await pipelineRepo.updatePipelineRun(pipelineRun.id, {
status: 'completed',
completedAt: new Date().toISOString(),
jobsProcessed: 0,
jobsProcessed: processedCount,
});
console.log('\n🎉 Pipeline completed!');
console.log(` Jobs discovered: ${created}`);
console.log(' Jobs processed: 0 (manual)');
progressHelpers.complete(created, 0);
console.log(` Jobs processed: ${processedCount}`);
progressHelpers.complete(created, processedCount);
await notifyPipelineWebhook('pipeline.completed', {
pipelineRunId: pipelineRun.id,
jobsDiscovered: created,
jobsScored: unprocessedJobs.length,
jobsProcessed: 0,
jobsProcessed: processedCount,
})
isPipelineRunning = false;
return {
success: true,
jobsDiscovered: created,
jobsProcessed: 0,
jobsProcessed: processedCount,
};
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
await pipelineRepo.updatePipelineRun(pipelineRun.id, {
status: 'failed',
completedAt: new Date().toISOString(),
errorMessage: message,
});
progressHelpers.failed(message);
await notifyPipelineWebhook('pipeline.failed', {
@ -264,9 +358,9 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
error: message,
})
isPipelineRunning = false;
console.error('\n❌ Pipeline failed:', message);
return {
success: false,
jobsDiscovered: 0,
@ -287,7 +381,7 @@ export async function processJob(
error?: string;
}> {
console.log(`📝 Processing job ${jobId}...`);
try {
const job = await jobsRepo.getJobById(jobId);
if (!job) {
@ -297,9 +391,9 @@ export async function processJob(
if (job.status !== 'discovered' && job.status !== 'ready') {
return { success: false, error: `Job cannot be processed from status: ${job.status}` };
}
const profile = await loadProfile(DEFAULT_PROFILE_PATH);
// Mark as processing
await jobsRepo.updateJob(job.id, { status: 'processing' });
@ -314,7 +408,7 @@ export async function processJob(
job.suitabilityScore = suitability.score;
job.suitabilityReason = suitability.reason;
}
// Generate summary (AI)
// If forcing, always recompute; otherwise compute if missing.
if (options?.force || !job.tailoredSummary) {
@ -323,7 +417,7 @@ export async function processJob(
job.jobDescription || '',
profile
);
if (summaryResult.success) {
await jobsRepo.updateJob(job.id, {
tailoredSummary: summaryResult.summary,
@ -331,7 +425,7 @@ export async function processJob(
job.tailoredSummary = summaryResult.summary ?? null;
}
}
// Generate PDF
console.log(' Generating PDF...');
const pdfResult = await generatePdf(
@ -340,16 +434,16 @@ export async function processJob(
job.jobDescription || '',
DEFAULT_PROFILE_PATH
);
// Mark as ready
await jobsRepo.updateJob(job.id, {
status: 'ready',
pdfPath: pdfResult.pdfPath ?? undefined,
});
console.log(' ✅ Done!');
return { success: true };
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
return { success: false, error: message };

View File

@ -11,6 +11,13 @@ export type SettingKey = 'model'
| 'pipelineWebhookUrl'
| 'jobCompleteWebhookUrl'
| 'resumeProjects'
| 'ukvisajobsMaxJobs'
| 'searchTerms'
| 'jobspyLocation'
| 'jobspyResultsWanted'
| 'jobspyHoursOld'
| 'jobspyCountryIndeed'
| 'jobspyLinkedinFetchDescription'
export async function getSetting(key: SettingKey): Promise<string | null> {
const [row] = await db.select().from(settings).where(eq(settings.key, key))

View File

@ -32,6 +32,11 @@ export interface RunCrawlerOptions {
* Optional callback for live crawl progress emitted by the Gradcracker extractor.
*/
onProgress?: (update: JobExtractorProgress) => void;
/**
* List of search terms to be used as roles for URL generation.
*/
searchTerms?: string[];
}
interface JobExtractorProgress {
@ -61,13 +66,13 @@ async function writeExistingJobUrlsFile(existingJobUrls: string[] | undefined):
*/
export async function runCrawler(options: RunCrawlerOptions = {}): Promise<CrawlerResult> {
console.log('🕷️ Starting job crawler...');
try {
// Clear previous results
await clearStorageDataset();
const existingJobUrlsFile = await writeExistingJobUrlsFile(options.existingJobUrls);
// Run the crawler
await new Promise<void>((resolve, reject) => {
const child = spawn('npm', ['run', 'start'], {
@ -78,6 +83,7 @@ export async function runCrawler(options: RunCrawlerOptions = {}): Promise<Crawl
...process.env,
JOBOPS_SKIP_APPLY_FOR_EXISTING: '1',
JOBOPS_EMIT_PROGRESS: '1',
GRADCRACKER_SEARCH_TERMS: options.searchTerms ? JSON.stringify(options.searchTerms) : '',
...(existingJobUrlsFile ? { JOBOPS_EXISTING_JOB_URLS_FILE: existingJobUrlsFile } : {}),
},
});
@ -101,7 +107,7 @@ export async function runCrawler(options: RunCrawlerOptions = {}): Promise<Crawl
stdoutRl?.on('line', (line) => handleLine(line, process.stdout));
stderrRl?.on('line', (line) => handleLine(line, process.stderr));
child.on('close', (code) => {
stdoutRl?.close();
stderrRl?.close();
@ -111,15 +117,15 @@ export async function runCrawler(options: RunCrawlerOptions = {}): Promise<Crawl
reject(new Error(`Crawler exited with code ${code}`));
}
});
child.on('error', reject);
});
// Read crawled jobs from storage
const jobs = await readCrawledJobs();
console.log(`✅ Crawler completed. Found ${jobs.length} jobs.`);
return { success: true, jobs };
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
@ -135,13 +141,13 @@ async function readCrawledJobs(): Promise<CreateJobInput[]> {
try {
const files = await readdir(STORAGE_DIR);
const jsonFiles = files.filter(f => f.endsWith('.json'));
const jobs: CreateJobInput[] = [];
for (const file of jsonFiles) {
const content = await readFile(join(STORAGE_DIR, file), 'utf-8');
const data = JSON.parse(content);
// Map crawler output to our job input format
jobs.push({
source: 'gradcracker',
@ -159,7 +165,7 @@ async function readCrawledJobs(): Promise<CreateJobInput[]> {
jobDescription: data.jobDescription,
});
}
return jobs;
} catch (error) {
console.error('Failed to read crawled jobs:', error);

View File

@ -0,0 +1,164 @@
/**
* Service for running the UK Visa Jobs extractor (extractors/ukvisajobs).
*
* Spawns the extractor as a child process and reads its output dataset.
*/
import { spawn } from 'child_process';
import { readdir, readFile, rm, mkdir } from 'fs/promises';
import { join, dirname } from 'path';
import { fileURLToPath } from 'url';
import type { CreateJobInput } from '../../shared/types.js';
const __dirname = dirname(fileURLToPath(import.meta.url));
const UKVISAJOBS_DIR = join(__dirname, '../../../../extractors/ukvisajobs');
const STORAGE_DIR = join(UKVISAJOBS_DIR, 'storage/datasets/default');
export interface RunUkVisaJobsOptions {
/** Maximum number of jobs to fetch per search term. Defaults to 50, max 200. */
maxJobs?: number;
/** Search keyword filter (single) - legacy support */
searchKeyword?: string;
/** List of search terms to run sequentially */
searchTerms?: string[];
}
export interface UkVisaJobsResult {
success: boolean;
jobs: CreateJobInput[];
error?: string;
}
/**
* Clear previous extraction results.
*/
async function clearStorageDataset(): Promise<void> {
try {
await rm(STORAGE_DIR, { recursive: true, force: true });
} catch {
// Ignore if directory doesn't exist
}
}
export async function runUkVisaJobs(options: RunUkVisaJobsOptions = {}): Promise<UkVisaJobsResult> {
console.log('🇬🇧 Running UK Visa Jobs extractor...');
// Determine terms to run
const terms: string[] = [];
if (options.searchTerms && options.searchTerms.length > 0) {
terms.push(...options.searchTerms);
} else if (options.searchKeyword) {
terms.push(options.searchKeyword);
} else {
// No search terms = run once without keyword
terms.push('');
}
const allJobs: CreateJobInput[] = [];
const seenIds = new Set<string>();
for (let i = 0; i < terms.length; i++) {
const term = terms[i];
const termLabel = term ? `"${term}"` : 'all jobs';
console.log(` Running for ${termLabel}...`);
try {
// Clear previous results for this run
await clearStorageDataset();
await mkdir(STORAGE_DIR, { recursive: true });
// Run the extractor
await new Promise<void>((resolve, reject) => {
const child = spawn('npx', ['tsx', 'src/main.ts'], {
cwd: UKVISAJOBS_DIR,
stdio: 'inherit',
env: {
...process.env,
UKVISAJOBS_MAX_JOBS: String(options.maxJobs ?? 50),
UKVISAJOBS_SEARCH_KEYWORD: term,
},
});
child.on('close', (code) => {
if (code === 0) resolve();
else reject(new Error(`UK Visa Jobs extractor exited with code ${code}`));
});
child.on('error', reject);
});
// Read the output dataset and accumulate
const runJobs = await readDataset();
let newCount = 0;
for (const job of runJobs) {
// Deduplicate by sourceJobId or jobUrl
const id = job.sourceJobId || job.jobUrl;
if (!seenIds.has(id)) {
seenIds.add(id);
allJobs.push(job);
newCount++;
}
}
console.log(` ✅ Fetched ${runJobs.length} jobs for ${termLabel} (${newCount} new unique)`);
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
console.error(`❌ UK Visa Jobs failed for ${termLabel}: ${message}`);
// Continue to next term instead of failing completely
}
// Delay between terms
if (i < terms.length - 1) {
console.log(' Waiting 5s before next search term...');
await new Promise((resolve) => setTimeout(resolve, 5000));
}
}
console.log(`✅ UK Visa Jobs: imported total ${allJobs.length} unique jobs`);
return { success: true, jobs: allJobs };
}
/**
* Read jobs from the extractor's output dataset.
*/
async function readDataset(): Promise<CreateJobInput[]> {
const jobs: CreateJobInput[] = [];
try {
const files = await readdir(STORAGE_DIR);
const jsonFiles = files.filter((f) => f.endsWith('.json') && f !== 'jobs.json');
for (const file of jsonFiles.sort()) {
try {
const content = await readFile(join(STORAGE_DIR, file), 'utf-8');
const job = JSON.parse(content);
// Map to CreateJobInput format
jobs.push({
source: 'ukvisajobs',
sourceJobId: job.sourceJobId,
title: job.title || 'Unknown Title',
employer: job.employer || 'Unknown Employer',
employerUrl: job.employerUrl,
jobUrl: job.jobUrl,
applicationLink: job.applicationLink || job.jobUrl,
location: job.location,
deadline: job.deadline,
salary: job.salary,
jobDescription: job.jobDescription,
datePosted: job.datePosted,
degreeRequired: job.degreeRequired,
jobType: job.jobType,
jobLevel: job.jobLevel,
});
} catch {
// Skip invalid files
}
}
} catch {
// Dataset directory doesn't exist yet
}
return jobs;
}

View File

@ -2,7 +2,7 @@
* Shared types for the job-ops orchestrator.
*/
export type JobStatus =
export type JobStatus =
| 'discovered' // Crawled but not processed
| 'processing' // Currently generating resume
| 'ready' // PDF generated, waiting for user to apply
@ -13,11 +13,12 @@ export type JobStatus =
export type JobSource =
| 'gradcracker'
| 'indeed'
| 'linkedin';
| 'linkedin'
| 'ukvisajobs';
export interface Job {
id: string;
// Source / provenance
source: JobSource;
sourceJobId: string | null; // External ID (if provided)
@ -37,7 +38,7 @@ export interface Job {
degreeRequired: string | null;
starting: string | null;
jobDescription: string | null;
// Orchestrator enrichments
status: JobStatus;
suitabilityScore: number | null; // 0-100 AI-generated score
@ -71,7 +72,7 @@ export interface Job {
companyReviewsCount: number | null;
vacancyCount: number | null;
workFromHomeType: string | null;
// Timestamps
discoveredAt: string;
processedAt: string | null;
@ -200,4 +201,25 @@ export interface AppSettings {
resumeProjects: ResumeProjectsSettings;
defaultResumeProjects: ResumeProjectsSettings;
overrideResumeProjects: ResumeProjectsSettings | null;
ukvisajobsMaxJobs: number;
defaultUkvisajobsMaxJobs: number;
overrideUkvisajobsMaxJobs: number | null;
searchTerms: string[];
defaultSearchTerms: string[];
overrideSearchTerms: string[] | null;
jobspyLocation: string;
defaultJobspyLocation: string;
overrideJobspyLocation: string | null;
jobspyResultsWanted: number;
defaultJobspyResultsWanted: number;
overrideJobspyResultsWanted: number | null;
jobspyHoursOld: number;
defaultJobspyHoursOld: number;
overrideJobspyHoursOld: number | null;
jobspyCountryIndeed: string;
defaultJobspyCountryIndeed: string;
overrideJobspyCountryIndeed: string | null;
jobspyLinkedinFetchDescription: boolean;
defaultJobspyLinkedinFetchDescription: boolean;
overrideJobspyLinkedinFetchDescription: boolean | null;
}