Merge pull request #4 from DaKheera47/extractor-ukvisajobs
Extractor ukvisajobs
This commit is contained in:
commit
a6310af294
13
.env.example
13
.env.example
@ -30,10 +30,19 @@ JOB_COMPLETE_WEBHOOK_URL=
|
||||
# JobSpy (Indeed/LinkedIn scraping) - optional
|
||||
# =============================================================================
|
||||
# These control the Python JobSpy scraper used by the pipeline.
|
||||
JOBSPY_SITES=indeed,linkedin
|
||||
JOBSPY_SEARCH_TERM=web developer
|
||||
|
||||
JOBSPY_LOCATION=UK
|
||||
JOBSPY_RESULTS_WANTED=200
|
||||
JOBSPY_HOURS_OLD=72
|
||||
JOBSPY_COUNTRY_INDEED=UK
|
||||
JOBSPY_LINKEDIN_FETCH_DESCRIPTION=1
|
||||
|
||||
# =============================================================================
|
||||
# UKVisaJobs (UK visa sponsorship jobs) - optional
|
||||
# =============================================================================
|
||||
# Get these tokens from browser dev tools after logging into my.ukvisajobs.com
|
||||
# See extractors/ukvisajobs/README.md for detailed instructions.
|
||||
UKVISAJOBS_TOKEN=
|
||||
UKVISAJOBS_AUTH_TOKEN=
|
||||
UKVISAJOBS_CSRF_TOKEN=
|
||||
UKVISAJOBS_CI_SESSION=
|
||||
|
||||
@ -28,6 +28,7 @@ RUN pip3 install --no-cache-dir --break-system-packages playwright python-jobspy
|
||||
# Copy package files first for better caching
|
||||
COPY orchestrator/package*.json ./orchestrator/
|
||||
COPY extractors/gradcracker/package*.json ./extractors/gradcracker/
|
||||
COPY extractors/ukvisajobs/package*.json ./extractors/ukvisajobs/
|
||||
|
||||
# Install Node.js dependencies
|
||||
WORKDIR /app/orchestrator
|
||||
@ -39,11 +40,15 @@ RUN npm install --production=false
|
||||
# Install Camoufox browser (downloads its own Firefox fork)
|
||||
RUN npx camoufox fetch
|
||||
|
||||
WORKDIR /app/extractors/ukvisajobs
|
||||
RUN npm install --production=false
|
||||
|
||||
# Copy source code
|
||||
WORKDIR /app
|
||||
COPY orchestrator ./orchestrator
|
||||
COPY extractors/gradcracker ./extractors/gradcracker
|
||||
COPY extractors/jobspy ./extractors/jobspy
|
||||
COPY extractors/ukvisajobs ./extractors/ukvisajobs
|
||||
COPY resume-generator ./resume-generator
|
||||
|
||||
# Build the orchestrator (client + server)
|
||||
|
||||
@ -75,6 +75,7 @@ job-ops/
|
||||
src/shared/ # shared types (Job, PipelineRun, etc.)
|
||||
extractors/gradcracker/ # Crawlee crawler (Gradcracker)
|
||||
extractors/jobspy/ # JobSpy wrapper (Indeed/LinkedIn/etc)
|
||||
extractors/ukvisajobs/ # UK Visa Jobs API extractor
|
||||
resume-generator/ # Python Playwright automation for rxresu.me
|
||||
base.json # your exported base resume (template)
|
||||
data/ # persisted runtime artifacts (Docker default)
|
||||
@ -87,7 +88,7 @@ job-ops/
|
||||
## Data model (SQLite)
|
||||
|
||||
- `jobs`
|
||||
- from crawl: `title`, `employer`, `jobUrl`, `applicationLink`, `deadline`, `salary`, `location`, `jobDescription`, etc.
|
||||
- from crawl: `title`, `employer`, `jobUrl`, `applicationLink`, `deadline`, `salary`, `location`, `jobDescription`, `source` (gradcracker/indeed/linkedin/ukvisajobs), etc.
|
||||
- enrichments: `status` (`discovered` -> `processing` -> `ready` -> `applied`/`rejected`), `suitabilityScore`, `suitabilityReason`, `tailoredSummary`, `pdfPath`, `notionPageId`
|
||||
- `pipeline_runs`: audit log of runs (`running`/`completed`/`failed`, counts, error)
|
||||
|
||||
|
||||
@ -34,7 +34,7 @@ services:
|
||||
- PIPELINE_MIN_SCORE=${PIPELINE_MIN_SCORE:-50}
|
||||
|
||||
# JobSpy (Indeed/LinkedIn scraping) - optional
|
||||
- JOBSPY_SITES=${JOBSPY_SITES:-indeed,linkedin}
|
||||
|
||||
# Preferred: pipe-separated list, e.g. "web developer|frontend developer|react developer"
|
||||
- JOBSPY_SEARCH_TERMS=${JOBSPY_SEARCH_TERMS:-web developer|graduate web developer|react developer|graduate software engineer|graduate react developer|next js developer|graduate front end developer}
|
||||
- JOBSPY_LOCATION=${JOBSPY_LOCATION:-UK}
|
||||
@ -50,6 +50,13 @@ services:
|
||||
# Optional: Webhook secret for n8n
|
||||
- WEBHOOK_SECRET=${WEBHOOK_SECRET:-}
|
||||
|
||||
# UKVisaJobs (UK visa sponsorship jobs) - optional
|
||||
- UKVISAJOBS_TOKEN=${UKVISAJOBS_TOKEN:-}
|
||||
- UKVISAJOBS_AUTH_TOKEN=${UKVISAJOBS_AUTH_TOKEN:-}
|
||||
- UKVISAJOBS_CSRF_TOKEN=${UKVISAJOBS_CSRF_TOKEN:-}
|
||||
- UKVISAJOBS_CI_SESSION=${UKVISAJOBS_CI_SESSION:-}
|
||||
- UKVISAJOBS_SEARCH_KEYWORD=${UKVISAJOBS_SEARCH_KEYWORD:-}
|
||||
|
||||
# Python path (uses system python in container)
|
||||
- PYTHON_PATH=/usr/bin/python3
|
||||
restart: unless-stopped
|
||||
@ -59,6 +66,27 @@ services:
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
develop:
|
||||
watch:
|
||||
# Rebuild container when package.json changes
|
||||
- path: ./orchestrator/package.json
|
||||
action: rebuild
|
||||
- path: ./orchestrator/package-lock.json
|
||||
action: rebuild
|
||||
# Sync source code changes and rebuild inside container
|
||||
- path: ./orchestrator/src
|
||||
target: /app/orchestrator/src
|
||||
action: sync+restart
|
||||
# Sync extractor changes
|
||||
- path: ./extractors/gradcracker/src
|
||||
target: /app/extractors/gradcracker/src
|
||||
action: sync+restart
|
||||
- path: ./extractors/ukvisajobs/src
|
||||
target: /app/extractors/ukvisajobs/src
|
||||
action: sync+restart
|
||||
- path: ./extractors/jobspy
|
||||
target: /app/extractors/jobspy
|
||||
action: sync+restart
|
||||
|
||||
# Volumes for data persistence
|
||||
volumes:
|
||||
|
||||
@ -17,11 +17,30 @@ const locations = [
|
||||
];
|
||||
|
||||
// roles
|
||||
const roles = [
|
||||
const defaultRoles = [
|
||||
"web-development",
|
||||
"software-systems",
|
||||
];
|
||||
|
||||
let roles = defaultRoles;
|
||||
const envRolesRaw = process.env.GRADCRACKER_SEARCH_TERMS;
|
||||
|
||||
if (envRolesRaw) {
|
||||
try {
|
||||
const parsed = JSON.parse(envRolesRaw) as string[];
|
||||
if (Array.isArray(parsed) && parsed.length > 0) {
|
||||
roles = parsed.map(term =>
|
||||
term.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, '-')
|
||||
.replace(/^-+|-+$/g, '')
|
||||
);
|
||||
console.log(`Using configured search terms: ${roles.join(', ')}`);
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn('Failed to parse GRADCRACKER_SEARCH_TERMS', e);
|
||||
}
|
||||
}
|
||||
|
||||
// combo of locations and roles
|
||||
const gradcrackerUrls = locations.flatMap((location) => {
|
||||
return roles.map((role) => {
|
||||
|
||||
11
extractors/ukvisajobs/.gitignore
vendored
Normal file
11
extractors/ukvisajobs/.gitignore
vendored
Normal file
@ -0,0 +1,11 @@
|
||||
# Dependencies
|
||||
node_modules/
|
||||
|
||||
# Build output
|
||||
dist/
|
||||
|
||||
# Crawlee storage
|
||||
storage/
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
40
extractors/ukvisajobs/README.md
Normal file
40
extractors/ukvisajobs/README.md
Normal file
@ -0,0 +1,40 @@
|
||||
# UK Visa Jobs Extractor
|
||||
|
||||
Fetches job listings from [my.ukvisajobs.com](https://my.ukvisajobs.com) that may sponsor work visas.
|
||||
|
||||
## Setup
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
Set the following environment variables (you can get these from your browser's dev tools after logging in):
|
||||
|
||||
| Variable | Description |
|
||||
|----------|-------------|
|
||||
| `UKVISAJOBS_TOKEN` | JWT token from the request body (required) |
|
||||
| `UKVISAJOBS_AUTH_TOKEN` | Auth cookie token (defaults to UKVISAJOBS_TOKEN) |
|
||||
| `UKVISAJOBS_CSRF_TOKEN` | CSRF token from cookies |
|
||||
| `UKVISAJOBS_CI_SESSION` | CI session ID from cookies |
|
||||
| `UKVISAJOBS_MAX_JOBS` | Maximum jobs to fetch (default: 50, max: 200) |
|
||||
| `UKVISAJOBS_SEARCH_KEYWORD` | Optional search filter |
|
||||
|
||||
## How to get tokens
|
||||
|
||||
1. Log into `my.ukvisajobs.com` in your browser
|
||||
2. Open Developer Tools → Network tab
|
||||
3. Navigate to the jobs page
|
||||
4. Find the `fetch-jobs-data` POST request
|
||||
5. Copy values:
|
||||
- From **Request Body**: copy the `token` field → `UKVISAJOBS_TOKEN`
|
||||
- From **Cookies**: copy `authToken`, `csrf_token`, `ci_session`
|
||||
|
||||
## Running
|
||||
|
||||
```bash
|
||||
npm start
|
||||
```
|
||||
|
||||
Output is written to `storage/datasets/default/` as JSON files.
|
||||
599
extractors/ukvisajobs/package-lock.json
generated
Normal file
599
extractors/ukvisajobs/package-lock.json
generated
Normal file
@ -0,0 +1,599 @@
|
||||
{
|
||||
"name": "ukvisajobs-extractor",
|
||||
"version": "0.0.1",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "ukvisajobs-extractor",
|
||||
"version": "0.0.1",
|
||||
"license": "ISC",
|
||||
"devDependencies": {
|
||||
"@apify/tsconfig": "^0.1.0",
|
||||
"@types/node": "^24.0.0",
|
||||
"tsx": "^4.4.0",
|
||||
"typescript": "~5.9.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@apify/tsconfig": {
|
||||
"version": "0.1.1",
|
||||
"resolved": "https://registry.npmjs.org/@apify/tsconfig/-/tsconfig-0.1.1.tgz",
|
||||
"integrity": "sha512-cS7mwN2UW1UXcluGXRDHH0Vr2VsSLkw2DwLTwoSBkcJSe8fvCr3MPryTSq0uod4MashpMURxJ7CsLKxs82VmOQ==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/@esbuild/aix-ppc64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.2.tgz",
|
||||
"integrity": "sha512-GZMB+a0mOMZs4MpDbj8RJp4cw+w1WV5NYD6xzgvzUJ5Ek2jerwfO2eADyI6ExDSUED+1X8aMbegahsJi+8mgpw==",
|
||||
"cpu": [
|
||||
"ppc64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"aix"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/android-arm": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.27.2.tgz",
|
||||
"integrity": "sha512-DVNI8jlPa7Ujbr1yjU2PfUSRtAUZPG9I1RwW4F4xFB1Imiu2on0ADiI/c3td+KmDtVKNbi+nffGDQMfcIMkwIA==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/android-arm64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.2.tgz",
|
||||
"integrity": "sha512-pvz8ZZ7ot/RBphf8fv60ljmaoydPU12VuXHImtAs0XhLLw+EXBi2BLe3OYSBslR4rryHvweW5gmkKFwTiFy6KA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/android-x64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.27.2.tgz",
|
||||
"integrity": "sha512-z8Ank4Byh4TJJOh4wpz8g2vDy75zFL0TlZlkUkEwYXuPSgX8yzep596n6mT7905kA9uHZsf/o2OJZubl2l3M7A==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/darwin-arm64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.27.2.tgz",
|
||||
"integrity": "sha512-davCD2Zc80nzDVRwXTcQP/28fiJbcOwvdolL0sOiOsbwBa72kegmVU0Wrh1MYrbuCL98Omp5dVhQFWRKR2ZAlg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/darwin-x64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.27.2.tgz",
|
||||
"integrity": "sha512-ZxtijOmlQCBWGwbVmwOF/UCzuGIbUkqB1faQRf5akQmxRJ1ujusWsb3CVfk/9iZKr2L5SMU5wPBi1UWbvL+VQA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/freebsd-arm64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.27.2.tgz",
|
||||
"integrity": "sha512-lS/9CN+rgqQ9czogxlMcBMGd+l8Q3Nj1MFQwBZJyoEKI50XGxwuzznYdwcav6lpOGv5BqaZXqvBSiB/kJ5op+g==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"freebsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/freebsd-x64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.27.2.tgz",
|
||||
"integrity": "sha512-tAfqtNYb4YgPnJlEFu4c212HYjQWSO/w/h/lQaBK7RbwGIkBOuNKQI9tqWzx7Wtp7bTPaGC6MJvWI608P3wXYA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"freebsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-arm": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.27.2.tgz",
|
||||
"integrity": "sha512-vWfq4GaIMP9AIe4yj1ZUW18RDhx6EPQKjwe7n8BbIecFtCQG4CfHGaHuh7fdfq+y3LIA2vGS/o9ZBGVxIDi9hw==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-arm64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.2.tgz",
|
||||
"integrity": "sha512-hYxN8pr66NsCCiRFkHUAsxylNOcAQaxSSkHMMjcpx0si13t1LHFphxJZUiGwojB1a/Hd5OiPIqDdXONia6bhTw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-ia32": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.27.2.tgz",
|
||||
"integrity": "sha512-MJt5BRRSScPDwG2hLelYhAAKh9imjHK5+NE/tvnRLbIqUWa+0E9N4WNMjmp/kXXPHZGqPLxggwVhz7QP8CTR8w==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-loong64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.27.2.tgz",
|
||||
"integrity": "sha512-lugyF1atnAT463aO6KPshVCJK5NgRnU4yb3FUumyVz+cGvZbontBgzeGFO1nF+dPueHD367a2ZXe1NtUkAjOtg==",
|
||||
"cpu": [
|
||||
"loong64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-mips64el": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.27.2.tgz",
|
||||
"integrity": "sha512-nlP2I6ArEBewvJ2gjrrkESEZkB5mIoaTswuqNFRv/WYd+ATtUpe9Y09RnJvgvdag7he0OWgEZWhviS1OTOKixw==",
|
||||
"cpu": [
|
||||
"mips64el"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-ppc64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.27.2.tgz",
|
||||
"integrity": "sha512-C92gnpey7tUQONqg1n6dKVbx3vphKtTHJaNG2Ok9lGwbZil6DrfyecMsp9CrmXGQJmZ7iiVXvvZH6Ml5hL6XdQ==",
|
||||
"cpu": [
|
||||
"ppc64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-riscv64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.27.2.tgz",
|
||||
"integrity": "sha512-B5BOmojNtUyN8AXlK0QJyvjEZkWwy/FKvakkTDCziX95AowLZKR6aCDhG7LeF7uMCXEJqwa8Bejz5LTPYm8AvA==",
|
||||
"cpu": [
|
||||
"riscv64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-s390x": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.27.2.tgz",
|
||||
"integrity": "sha512-p4bm9+wsPwup5Z8f4EpfN63qNagQ47Ua2znaqGH6bqLlmJ4bx97Y9JdqxgGZ6Y8xVTixUnEkoKSHcpRlDnNr5w==",
|
||||
"cpu": [
|
||||
"s390x"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-x64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.27.2.tgz",
|
||||
"integrity": "sha512-uwp2Tip5aPmH+NRUwTcfLb+W32WXjpFejTIOWZFw/v7/KnpCDKG66u4DLcurQpiYTiYwQ9B7KOeMJvLCu/OvbA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/netbsd-arm64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.27.2.tgz",
|
||||
"integrity": "sha512-Kj6DiBlwXrPsCRDeRvGAUb/LNrBASrfqAIok+xB0LxK8CHqxZ037viF13ugfsIpePH93mX7xfJp97cyDuTZ3cw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"netbsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/netbsd-x64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.27.2.tgz",
|
||||
"integrity": "sha512-HwGDZ0VLVBY3Y+Nw0JexZy9o/nUAWq9MlV7cahpaXKW6TOzfVno3y3/M8Ga8u8Yr7GldLOov27xiCnqRZf0tCA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"netbsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/openbsd-arm64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.27.2.tgz",
|
||||
"integrity": "sha512-DNIHH2BPQ5551A7oSHD0CKbwIA/Ox7+78/AWkbS5QoRzaqlev2uFayfSxq68EkonB+IKjiuxBFoV8ESJy8bOHA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"openbsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/openbsd-x64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.27.2.tgz",
|
||||
"integrity": "sha512-/it7w9Nb7+0KFIzjalNJVR5bOzA9Vay+yIPLVHfIQYG/j+j9VTH84aNB8ExGKPU4AzfaEvN9/V4HV+F+vo8OEg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"openbsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/openharmony-arm64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.27.2.tgz",
|
||||
"integrity": "sha512-LRBbCmiU51IXfeXk59csuX/aSaToeG7w48nMwA6049Y4J4+VbWALAuXcs+qcD04rHDuSCSRKdmY63sruDS5qag==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"openharmony"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/sunos-x64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.27.2.tgz",
|
||||
"integrity": "sha512-kMtx1yqJHTmqaqHPAzKCAkDaKsffmXkPHThSfRwZGyuqyIeBvf08KSsYXl+abf5HDAPMJIPnbBfXvP2ZC2TfHg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"sunos"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/win32-arm64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.27.2.tgz",
|
||||
"integrity": "sha512-Yaf78O/B3Kkh+nKABUF++bvJv5Ijoy9AN1ww904rOXZFLWVc5OLOfL56W+C8F9xn5JQZa3UX6m+IktJnIb1Jjg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/win32-ia32": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.27.2.tgz",
|
||||
"integrity": "sha512-Iuws0kxo4yusk7sw70Xa2E2imZU5HoixzxfGCdxwBdhiDgt9vX9VUCBhqcwY7/uh//78A1hMkkROMJq9l27oLQ==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/win32-x64": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.27.2.tgz",
|
||||
"integrity": "sha512-sRdU18mcKf7F+YgheI/zGf5alZatMUTKj/jNS6l744f9u3WFu4v7twcUI9vu4mknF4Y9aDlblIie0IM+5xxaqQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "24.10.4",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.4.tgz",
|
||||
"integrity": "sha512-vnDVpYPMzs4wunl27jHrfmwojOGKya0xyM3sH+UE5iv5uPS6vX7UIoh6m+vQc5LGBq52HBKPIn/zcSZVzeDEZg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"undici-types": "~7.16.0"
|
||||
}
|
||||
},
|
||||
"node_modules/esbuild": {
|
||||
"version": "0.27.2",
|
||||
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.2.tgz",
|
||||
"integrity": "sha512-HyNQImnsOC7X9PMNaCIeAm4ISCQXs5a5YasTXVliKv4uuBo1dKrG0A+uQS8M5eXjVMnLg3WgXaKvprHlFJQffw==",
|
||||
"dev": true,
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"bin": {
|
||||
"esbuild": "bin/esbuild"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@esbuild/aix-ppc64": "0.27.2",
|
||||
"@esbuild/android-arm": "0.27.2",
|
||||
"@esbuild/android-arm64": "0.27.2",
|
||||
"@esbuild/android-x64": "0.27.2",
|
||||
"@esbuild/darwin-arm64": "0.27.2",
|
||||
"@esbuild/darwin-x64": "0.27.2",
|
||||
"@esbuild/freebsd-arm64": "0.27.2",
|
||||
"@esbuild/freebsd-x64": "0.27.2",
|
||||
"@esbuild/linux-arm": "0.27.2",
|
||||
"@esbuild/linux-arm64": "0.27.2",
|
||||
"@esbuild/linux-ia32": "0.27.2",
|
||||
"@esbuild/linux-loong64": "0.27.2",
|
||||
"@esbuild/linux-mips64el": "0.27.2",
|
||||
"@esbuild/linux-ppc64": "0.27.2",
|
||||
"@esbuild/linux-riscv64": "0.27.2",
|
||||
"@esbuild/linux-s390x": "0.27.2",
|
||||
"@esbuild/linux-x64": "0.27.2",
|
||||
"@esbuild/netbsd-arm64": "0.27.2",
|
||||
"@esbuild/netbsd-x64": "0.27.2",
|
||||
"@esbuild/openbsd-arm64": "0.27.2",
|
||||
"@esbuild/openbsd-x64": "0.27.2",
|
||||
"@esbuild/openharmony-arm64": "0.27.2",
|
||||
"@esbuild/sunos-x64": "0.27.2",
|
||||
"@esbuild/win32-arm64": "0.27.2",
|
||||
"@esbuild/win32-ia32": "0.27.2",
|
||||
"@esbuild/win32-x64": "0.27.2"
|
||||
}
|
||||
},
|
||||
"node_modules/fsevents": {
|
||||
"version": "2.3.3",
|
||||
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
|
||||
"integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
|
||||
"dev": true,
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/get-tsconfig": {
|
||||
"version": "4.13.0",
|
||||
"resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.13.0.tgz",
|
||||
"integrity": "sha512-1VKTZJCwBrvbd+Wn3AOgQP/2Av+TfTCOlE4AcRJE72W1ksZXbAx8PPBR9RzgTeSPzlPMHrbANMH3LbltH73wxQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"resolve-pkg-maps": "^1.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/resolve-pkg-maps": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
|
||||
"integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/tsx": {
|
||||
"version": "4.21.0",
|
||||
"resolved": "https://registry.npmjs.org/tsx/-/tsx-4.21.0.tgz",
|
||||
"integrity": "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"esbuild": "~0.27.0",
|
||||
"get-tsconfig": "^4.7.5"
|
||||
},
|
||||
"bin": {
|
||||
"tsx": "dist/cli.mjs"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"fsevents": "~2.3.3"
|
||||
}
|
||||
},
|
||||
"node_modules/typescript": {
|
||||
"version": "5.9.3",
|
||||
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz",
|
||||
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"bin": {
|
||||
"tsc": "bin/tsc",
|
||||
"tsserver": "bin/tsserver"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14.17"
|
||||
}
|
||||
},
|
||||
"node_modules/undici-types": {
|
||||
"version": "7.16.0",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
|
||||
"integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
}
|
||||
}
|
||||
}
|
||||
22
extractors/ukvisajobs/package.json
Normal file
22
extractors/ukvisajobs/package.json
Normal file
@ -0,0 +1,22 @@
|
||||
{
|
||||
"name": "ukvisajobs-extractor",
|
||||
"version": "0.0.1",
|
||||
"type": "module",
|
||||
"description": "UK Visa Jobs extractor - fetches job listings that may sponsor work visas",
|
||||
"main": "dist/main.js",
|
||||
"dependencies": {},
|
||||
"devDependencies": {
|
||||
"@apify/tsconfig": "^0.1.0",
|
||||
"@types/node": "^24.0.0",
|
||||
"tsx": "^4.4.0",
|
||||
"typescript": "~5.9.0"
|
||||
},
|
||||
"scripts": {
|
||||
"start": "npm run start:dev",
|
||||
"start:prod": "node dist/main.js",
|
||||
"start:dev": "tsx src/main.ts",
|
||||
"build": "tsc"
|
||||
},
|
||||
"author": "",
|
||||
"license": "ISC"
|
||||
}
|
||||
303
extractors/ukvisajobs/src/main.ts
Normal file
303
extractors/ukvisajobs/src/main.ts
Normal file
@ -0,0 +1,303 @@
|
||||
/**
|
||||
* UK Visa Jobs Extractor
|
||||
*
|
||||
* Fetches job listings from my.ukvisajobs.com that may sponsor work visas.
|
||||
* Outputs JSON to stdout for the orchestrator to consume.
|
||||
*
|
||||
* Environment variables:
|
||||
* UKVISAJOBS_TOKEN - JWT token (required)
|
||||
* UKVISAJOBS_AUTH_TOKEN - Auth cookie token (defaults to UKVISAJOBS_TOKEN)
|
||||
* UKVISAJOBS_CSRF_TOKEN - CSRF token cookie
|
||||
* UKVISAJOBS_CI_SESSION - CI session cookie
|
||||
* UKVISAJOBS_MAX_JOBS - Maximum jobs to fetch (default: 50, max: 200) - Set via UI Settings
|
||||
* UKVISAJOBS_SEARCH_KEYWORD - Optional search filter
|
||||
*/
|
||||
|
||||
import { mkdir, writeFile } from 'fs/promises';
|
||||
import { join, dirname } from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
const API_URL = 'https://my.ukvisajobs.com/ukvisa-api/api/fetch-jobs-data';
|
||||
const JOBS_PER_PAGE = 15;
|
||||
const DEFAULT_MAX_JOBS = 50;
|
||||
const MAX_ALLOWED_JOBS = 200;
|
||||
|
||||
interface UkVisaJobsApiJob {
|
||||
id: string;
|
||||
title: string;
|
||||
company_name: string;
|
||||
company_link?: string;
|
||||
job_link: string;
|
||||
city: string;
|
||||
created_date: string;
|
||||
job_expire: string;
|
||||
description?: string;
|
||||
min_salary?: string;
|
||||
max_salary?: string;
|
||||
salary_interval?: string;
|
||||
salary_method?: string;
|
||||
degree_requirement?: string;
|
||||
job_type?: string;
|
||||
job_level?: string;
|
||||
job_industry?: string;
|
||||
visa_acceptance?: string;
|
||||
applicants_outside_uk?: string;
|
||||
likely_to_sponsor?: string;
|
||||
definitely_sponsored?: string;
|
||||
new_entrant?: string;
|
||||
student_graduate?: string;
|
||||
image?: string;
|
||||
computed_cos_total?: string;
|
||||
}
|
||||
|
||||
interface UkVisaJobsApiResponse {
|
||||
status: number;
|
||||
totalJobs: number;
|
||||
query?: string;
|
||||
jobs: UkVisaJobsApiJob[];
|
||||
}
|
||||
|
||||
interface ExtractedJob {
|
||||
source: 'ukvisajobs';
|
||||
sourceJobId: string;
|
||||
title: string;
|
||||
employer: string;
|
||||
employerUrl?: string;
|
||||
jobUrl: string;
|
||||
applicationLink: string;
|
||||
location?: string;
|
||||
deadline?: string;
|
||||
salary?: string;
|
||||
jobDescription?: string;
|
||||
datePosted?: string;
|
||||
degreeRequired?: string;
|
||||
jobType?: string;
|
||||
jobLevel?: string;
|
||||
}
|
||||
|
||||
function toStringOrNull(value: unknown): string | null {
|
||||
if (value === null || value === undefined) return null;
|
||||
if (typeof value === 'string') {
|
||||
const trimmed = value.trim();
|
||||
return trimmed.length > 0 ? trimmed : null;
|
||||
}
|
||||
if (typeof value === 'number' || typeof value === 'boolean') return String(value);
|
||||
return null;
|
||||
}
|
||||
|
||||
function toNumberOrNull(value: unknown): number | null {
|
||||
if (value === null || value === undefined) return null;
|
||||
if (typeof value === 'number') return Number.isFinite(value) ? value : null;
|
||||
if (typeof value === 'string') {
|
||||
const trimmed = value.trim();
|
||||
if (!trimmed) return null;
|
||||
const parsed = Number(trimmed);
|
||||
return Number.isFinite(parsed) ? parsed : null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async function fetchPage(
|
||||
pageNo: number,
|
||||
token: string,
|
||||
cookies: string,
|
||||
options: { searchKeyword?: string } = {}
|
||||
): Promise<UkVisaJobsApiResponse> {
|
||||
// Use native FormData API (Node.js 18+)
|
||||
const formData = new FormData();
|
||||
formData.append('is_global', '0');
|
||||
formData.append('sortBy', 'desc');
|
||||
formData.append('pageNo', String(pageNo));
|
||||
formData.append('visaAcceptance', 'false');
|
||||
formData.append('applicants_outside_uk', 'false');
|
||||
formData.append('searchKeyword', options.searchKeyword || 'null');
|
||||
formData.append('token', token);
|
||||
|
||||
const response = await fetch(API_URL, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'accept': 'application/json, text/plain, */*',
|
||||
'accept-language': 'en-US,en;q=0.9',
|
||||
'cookie': cookies,
|
||||
'origin': 'https://my.ukvisajobs.com',
|
||||
'referer': `https://my.ukvisajobs.com/open-jobs/1?is_global=0&sortBy=desc&pageNo=${pageNo}&visaAcceptance=false&applicants_outside_uk=false`,
|
||||
'user-agent': 'Mozilla/5.0 (Linux; Android 13; Pixel 7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Mobile Safari/537.36',
|
||||
},
|
||||
body: formData,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const text = await response.text();
|
||||
throw new Error(`UKVisaJobs API returned ${response.status}: ${response.statusText} - ${text}`);
|
||||
}
|
||||
|
||||
return response.json() as Promise<UkVisaJobsApiResponse>;
|
||||
}
|
||||
|
||||
function mapJob(raw: UkVisaJobsApiJob): ExtractedJob {
|
||||
// Build salary string from min/max
|
||||
let salary: string | undefined = undefined;
|
||||
const minSalary = toNumberOrNull(raw.min_salary);
|
||||
const maxSalary = toNumberOrNull(raw.max_salary);
|
||||
|
||||
if (minSalary !== null && minSalary > 0 && maxSalary !== null && maxSalary > 0) {
|
||||
salary = `£${minSalary.toLocaleString()}-${maxSalary.toLocaleString()}`;
|
||||
if (raw.salary_interval) {
|
||||
salary += ` / ${raw.salary_interval}`;
|
||||
}
|
||||
} else if (maxSalary !== null && maxSalary > 0) {
|
||||
salary = `£${maxSalary.toLocaleString()}`;
|
||||
if (raw.salary_interval) {
|
||||
salary += ` / ${raw.salary_interval}`;
|
||||
}
|
||||
}
|
||||
|
||||
// Build a description from visa sponsorship fields
|
||||
const visaInfo: string[] = [];
|
||||
if (raw.visa_acceptance?.toLowerCase() === 'yes') visaInfo.push('Visa acceptance: Yes');
|
||||
if (raw.applicants_outside_uk?.toLowerCase() === 'yes') visaInfo.push('Accepts applicants outside UK');
|
||||
if (raw.likely_to_sponsor?.toLowerCase() === 'yes') visaInfo.push('Likely to sponsor');
|
||||
if (raw.definitely_sponsored?.toLowerCase() === 'yes') visaInfo.push('Definitely sponsored');
|
||||
if (raw.new_entrant?.toLowerCase() === 'yes') visaInfo.push('New entrant friendly');
|
||||
if (raw.student_graduate?.toLowerCase() === 'yes') visaInfo.push('Student/Graduate friendly');
|
||||
|
||||
const description = raw.description
|
||||
? raw.description
|
||||
: visaInfo.length > 0
|
||||
? `Visa sponsorship info: ${visaInfo.join(', ')}`
|
||||
: undefined;
|
||||
|
||||
return {
|
||||
source: 'ukvisajobs',
|
||||
sourceJobId: raw.id,
|
||||
title: raw.title || 'Unknown Title',
|
||||
employer: raw.company_name || 'Unknown Employer',
|
||||
employerUrl: toStringOrNull(raw.company_link) ?? undefined,
|
||||
jobUrl: raw.job_link,
|
||||
applicationLink: raw.job_link,
|
||||
location: raw.city || undefined,
|
||||
deadline: raw.job_expire || undefined,
|
||||
salary,
|
||||
jobDescription: description,
|
||||
datePosted: raw.created_date || undefined,
|
||||
degreeRequired: toStringOrNull(raw.degree_requirement) ?? undefined,
|
||||
jobType: toStringOrNull(raw.job_type) ?? undefined,
|
||||
jobLevel: toStringOrNull(raw.job_level) ?? undefined,
|
||||
};
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
console.log('🇬🇧 UK Visa Jobs Extractor starting...');
|
||||
|
||||
// Get credentials from environment
|
||||
const token = process.env.UKVISAJOBS_TOKEN;
|
||||
const authToken = process.env.UKVISAJOBS_AUTH_TOKEN || token;
|
||||
const csrfToken = process.env.UKVISAJOBS_CSRF_TOKEN || '';
|
||||
const ciSession = process.env.UKVISAJOBS_CI_SESSION || '';
|
||||
const searchKeyword = process.env.UKVISAJOBS_SEARCH_KEYWORD || undefined;
|
||||
|
||||
if (!token) {
|
||||
console.error('❌ UKVISAJOBS_TOKEN environment variable is not set');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Build cookies string
|
||||
const cookieParts: string[] = [];
|
||||
if (csrfToken) cookieParts.push(`csrf_token=${csrfToken}`);
|
||||
if (ciSession) cookieParts.push(`ci_session=${ciSession}`);
|
||||
if (authToken) cookieParts.push(`authToken=${authToken}`);
|
||||
const cookies = cookieParts.join('; ');
|
||||
|
||||
console.log(` Cookies configured: ${cookieParts.length > 0 ? 'Yes' : 'No'}`);
|
||||
console.log(` Token length: ${token.length}`);
|
||||
|
||||
// Get max jobs from environment
|
||||
const maxJobsEnv = toNumberOrNull(process.env.UKVISAJOBS_MAX_JOBS);
|
||||
const maxJobs = Math.min(maxJobsEnv ?? DEFAULT_MAX_JOBS, MAX_ALLOWED_JOBS);
|
||||
const maxPages = Math.ceil(maxJobs / JOBS_PER_PAGE);
|
||||
|
||||
console.log(` Max jobs: ${maxJobs} (${maxPages} pages)`);
|
||||
if (searchKeyword) {
|
||||
console.log(` Search keyword: ${searchKeyword}`);
|
||||
}
|
||||
|
||||
const allJobs: ExtractedJob[] = [];
|
||||
const seenIds = new Set<string>();
|
||||
let totalAvailable = 0;
|
||||
let pageNo = 1;
|
||||
|
||||
try {
|
||||
while (pageNo <= maxPages && allJobs.length < maxJobs) {
|
||||
console.log(` Fetching page ${pageNo}/${maxPages}...`);
|
||||
|
||||
const response = await fetchPage(pageNo, token, cookies, { searchKeyword });
|
||||
|
||||
if (response.status !== 1) {
|
||||
console.warn(` ⚠️ API returned status ${response.status} on page ${pageNo}`);
|
||||
break;
|
||||
}
|
||||
|
||||
if (pageNo === 1) {
|
||||
totalAvailable = response.totalJobs;
|
||||
console.log(` Total available: ${totalAvailable} jobs`);
|
||||
}
|
||||
|
||||
if (!response.jobs || response.jobs.length === 0) {
|
||||
console.log(` No more jobs on page ${pageNo}`);
|
||||
break;
|
||||
}
|
||||
|
||||
for (const rawJob of response.jobs) {
|
||||
if (allJobs.length >= maxJobs) break;
|
||||
|
||||
// Deduplicate by ID
|
||||
if (seenIds.has(rawJob.id)) continue;
|
||||
seenIds.add(rawJob.id);
|
||||
|
||||
const mapped = mapJob(rawJob);
|
||||
allJobs.push(mapped);
|
||||
}
|
||||
|
||||
// If we got fewer jobs than a full page, we're at the end
|
||||
if (response.jobs.length < JOBS_PER_PAGE) {
|
||||
break;
|
||||
}
|
||||
|
||||
pageNo++;
|
||||
|
||||
// Small delay to be nice to the API
|
||||
await new Promise((resolve) => setTimeout(resolve, 500));
|
||||
}
|
||||
|
||||
console.log(`✅ Scraped ${allJobs.length} jobs`);
|
||||
|
||||
// Write output to storage directory (similar to Crawlee dataset structure)
|
||||
const storageDir = join(__dirname, '../storage/datasets/default');
|
||||
await mkdir(storageDir, { recursive: true });
|
||||
|
||||
// Write each job as a separate JSON file (Crawlee dataset format)
|
||||
for (let i = 0; i < allJobs.length; i++) {
|
||||
const filename = join(storageDir, `${String(i + 1).padStart(6, '0')}.json`);
|
||||
await writeFile(filename, JSON.stringify(allJobs[i], null, 2));
|
||||
}
|
||||
|
||||
// Also write a combined output file for easier consumption
|
||||
const outputFile = join(storageDir, 'jobs.json');
|
||||
await writeFile(outputFile, JSON.stringify(allJobs, null, 2));
|
||||
|
||||
console.log(` Output written to: ${storageDir}`);
|
||||
console.log(` Jobs file: ${outputFile}`);
|
||||
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
console.error(`❌ Error: ${message}`);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((error) => {
|
||||
console.error('Fatal error:', error);
|
||||
process.exit(1);
|
||||
});
|
||||
12
extractors/ukvisajobs/tsconfig.json
Normal file
12
extractors/ukvisajobs/tsconfig.json
Normal file
@ -0,0 +1,12 @@
|
||||
{
|
||||
"extends": "@apify/tsconfig",
|
||||
"compilerOptions": {
|
||||
"module": "NodeNext",
|
||||
"moduleResolution": "NodeNext",
|
||||
"target": "ES2022",
|
||||
"outDir": "dist",
|
||||
"noUnusedLocals": false,
|
||||
"lib": ["DOM"]
|
||||
},
|
||||
"include": ["./src/**/*"]
|
||||
}
|
||||
@ -1,34 +0,0 @@
|
||||
# Server
|
||||
PORT=3001
|
||||
|
||||
# OpenRouter API (for AI features)
|
||||
OPENROUTER_API_KEY=your_openrouter_api_key_here
|
||||
MODEL=openai/gpt-4o-mini
|
||||
|
||||
# Notion integration (optional)
|
||||
NOTION_API_KEY=
|
||||
NOTION_DATABASE_ID=
|
||||
|
||||
# Webhook security (optional)
|
||||
WEBHOOK_SECRET=
|
||||
PIPELINE_WEBHOOK_URL=
|
||||
JOB_COMPLETE_WEBHOOK_URL=
|
||||
|
||||
# Pipeline configuration
|
||||
PIPELINE_TOP_N=10
|
||||
PIPELINE_MIN_SCORE=50
|
||||
|
||||
# RXResume credentials (for PDF generation)
|
||||
RXRESUME_EMAIL=
|
||||
RXRESUME_PASSWORD=
|
||||
|
||||
# =============================================================================
|
||||
# JobSpy (Indeed/LinkedIn scraping) - optional
|
||||
# =============================================================================
|
||||
JOBSPY_SITES=indeed,linkedin
|
||||
JOBSPY_SEARCH_TERM=web developer
|
||||
JOBSPY_LOCATION=UK
|
||||
JOBSPY_RESULTS_WANTED=200
|
||||
JOBSPY_HOURS_OLD=72
|
||||
JOBSPY_COUNTRY_INDEED=UK
|
||||
JOBSPY_LINKEDIN_FETCH_DESCRIPTION=1
|
||||
@ -12,7 +12,7 @@ import { Header, JobList, PipelineProgress, Stats } from "./components";
|
||||
import * as api from "./api";
|
||||
import { SettingsPage } from "./pages/SettingsPage";
|
||||
|
||||
const DEFAULT_PIPELINE_SOURCES: JobSource[] = ["gradcracker", "indeed", "linkedin"];
|
||||
const DEFAULT_PIPELINE_SOURCES: JobSource[] = ["gradcracker", "indeed", "linkedin", "ukvisajobs"];
|
||||
const PIPELINE_SOURCES_STORAGE_KEY = "jobops.pipeline.sources";
|
||||
|
||||
export const App: React.FC = () => {
|
||||
@ -33,7 +33,7 @@ export const App: React.FC = () => {
|
||||
const raw = localStorage.getItem(PIPELINE_SOURCES_STORAGE_KEY);
|
||||
if (!raw) return DEFAULT_PIPELINE_SOURCES;
|
||||
const parsed = JSON.parse(raw) as unknown;
|
||||
const allowed: JobSource[] = ["gradcracker", "indeed", "linkedin"];
|
||||
const allowed: JobSource[] = ["gradcracker", "indeed", "linkedin", "ukvisajobs"];
|
||||
if (!Array.isArray(parsed)) return DEFAULT_PIPELINE_SOURCES;
|
||||
const next = parsed.filter((value): value is JobSource => allowed.includes(value));
|
||||
return next.length > 0 ? next : DEFAULT_PIPELINE_SOURCES;
|
||||
|
||||
@ -2,10 +2,10 @@
|
||||
* API client for the orchestrator backend.
|
||||
*/
|
||||
|
||||
import type {
|
||||
Job,
|
||||
ApiResponse,
|
||||
JobsListResponse,
|
||||
import type {
|
||||
Job,
|
||||
ApiResponse,
|
||||
JobsListResponse,
|
||||
PipelineStatusResponse,
|
||||
JobSource,
|
||||
PipelineRun,
|
||||
@ -26,13 +26,13 @@ async function fetchApi<T>(
|
||||
...options?.headers,
|
||||
},
|
||||
});
|
||||
|
||||
|
||||
const data: ApiResponse<T> = await response.json();
|
||||
|
||||
|
||||
if (!data.success) {
|
||||
throw new Error(data.error || 'API request failed');
|
||||
}
|
||||
|
||||
|
||||
return data.data as T;
|
||||
}
|
||||
|
||||
@ -47,7 +47,7 @@ export async function getJob(id: string): Promise<Job> {
|
||||
}
|
||||
|
||||
export async function updateJob(
|
||||
id: string,
|
||||
id: string,
|
||||
update: Partial<Job>
|
||||
): Promise<Job> {
|
||||
return fetchApi<Job>(`/jobs/${id}`, {
|
||||
@ -105,6 +105,13 @@ export async function updateSettings(update: {
|
||||
pipelineWebhookUrl?: string | null
|
||||
jobCompleteWebhookUrl?: string | null
|
||||
resumeProjects?: ResumeProjectsSettings | null
|
||||
ukvisajobsMaxJobs?: number | null
|
||||
searchTerms?: string[] | null
|
||||
jobspyLocation?: string | null
|
||||
jobspyResultsWanted?: number | null
|
||||
jobspyHoursOld?: number | null
|
||||
jobspyCountryIndeed?: string | null
|
||||
jobspyLinkedinFetchDescription?: boolean | null
|
||||
}): Promise<AppSettings> {
|
||||
return fetchApi<AppSettings>('/settings', {
|
||||
method: 'PATCH',
|
||||
|
||||
@ -60,9 +60,10 @@ export const Header: React.FC<HeaderProps> = ({
|
||||
gradcracker: "Gradcracker",
|
||||
indeed: "Indeed",
|
||||
linkedin: "LinkedIn",
|
||||
ukvisajobs: "UK Visa Jobs",
|
||||
};
|
||||
|
||||
const orderedSources: JobSource[] = ["gradcracker", "indeed", "linkedin"];
|
||||
const orderedSources: JobSource[] = ["gradcracker", "indeed", "linkedin", "ukvisajobs"];
|
||||
|
||||
const toggleSource = (source: JobSource, checked: boolean) => {
|
||||
const next = checked
|
||||
|
||||
@ -42,6 +42,13 @@ export const SettingsPage: React.FC = () => {
|
||||
const [pipelineWebhookUrlDraft, setPipelineWebhookUrlDraft] = useState("")
|
||||
const [jobCompleteWebhookUrlDraft, setJobCompleteWebhookUrlDraft] = useState("")
|
||||
const [resumeProjectsDraft, setResumeProjectsDraft] = useState<ResumeProjectsSettings | null>(null)
|
||||
const [ukvisajobsMaxJobsDraft, setUkvisajobsMaxJobsDraft] = useState<number | null>(null)
|
||||
const [searchTermsDraft, setSearchTermsDraft] = useState<string[] | null>(null)
|
||||
const [jobspyLocationDraft, setJobspyLocationDraft] = useState<string | null>(null)
|
||||
const [jobspyResultsWantedDraft, setJobspyResultsWantedDraft] = useState<number | null>(null)
|
||||
const [jobspyHoursOldDraft, setJobspyHoursOldDraft] = useState<number | null>(null)
|
||||
const [jobspyCountryIndeedDraft, setJobspyCountryIndeedDraft] = useState<string | null>(null)
|
||||
const [jobspyLinkedinFetchDescriptionDraft, setJobspyLinkedinFetchDescriptionDraft] = useState<boolean | null>(null)
|
||||
const [isSaving, setIsSaving] = useState(false)
|
||||
const [isLoading, setIsLoading] = useState(true)
|
||||
|
||||
@ -57,6 +64,13 @@ export const SettingsPage: React.FC = () => {
|
||||
setPipelineWebhookUrlDraft(data.overridePipelineWebhookUrl ?? "")
|
||||
setJobCompleteWebhookUrlDraft(data.overrideJobCompleteWebhookUrl ?? "")
|
||||
setResumeProjectsDraft(data.resumeProjects)
|
||||
setUkvisajobsMaxJobsDraft(data.overrideUkvisajobsMaxJobs)
|
||||
setSearchTermsDraft(data.overrideSearchTerms)
|
||||
setJobspyLocationDraft(data.overrideJobspyLocation)
|
||||
setJobspyResultsWantedDraft(data.overrideJobspyResultsWanted)
|
||||
setJobspyHoursOldDraft(data.overrideJobspyHoursOld)
|
||||
setJobspyCountryIndeedDraft(data.overrideJobspyCountryIndeed)
|
||||
setJobspyLinkedinFetchDescriptionDraft(data.overrideJobspyLinkedinFetchDescription)
|
||||
})
|
||||
.catch((error) => {
|
||||
const message = error instanceof Error ? error.message : "Failed to load settings"
|
||||
@ -81,6 +95,27 @@ export const SettingsPage: React.FC = () => {
|
||||
const effectiveJobCompleteWebhookUrl = settings?.jobCompleteWebhookUrl ?? ""
|
||||
const defaultJobCompleteWebhookUrl = settings?.defaultJobCompleteWebhookUrl ?? ""
|
||||
const overrideJobCompleteWebhookUrl = settings?.overrideJobCompleteWebhookUrl
|
||||
const effectiveUkvisajobsMaxJobs = settings?.ukvisajobsMaxJobs ?? 50
|
||||
const defaultUkvisajobsMaxJobs = settings?.defaultUkvisajobsMaxJobs ?? 50
|
||||
const overrideUkvisajobsMaxJobs = settings?.overrideUkvisajobsMaxJobs
|
||||
const effectiveSearchTerms = settings?.searchTerms ?? []
|
||||
const defaultSearchTerms = settings?.defaultSearchTerms ?? []
|
||||
const overrideSearchTerms = settings?.overrideSearchTerms
|
||||
const effectiveJobspyLocation = settings?.jobspyLocation ?? ""
|
||||
const defaultJobspyLocation = settings?.defaultJobspyLocation ?? ""
|
||||
const overrideJobspyLocation = settings?.overrideJobspyLocation
|
||||
const effectiveJobspyResultsWanted = settings?.jobspyResultsWanted ?? 200
|
||||
const defaultJobspyResultsWanted = settings?.defaultJobspyResultsWanted ?? 200
|
||||
const overrideJobspyResultsWanted = settings?.overrideJobspyResultsWanted
|
||||
const effectiveJobspyHoursOld = settings?.jobspyHoursOld ?? 72
|
||||
const defaultJobspyHoursOld = settings?.defaultJobspyHoursOld ?? 72
|
||||
const overrideJobspyHoursOld = settings?.overrideJobspyHoursOld
|
||||
const effectiveJobspyCountryIndeed = settings?.jobspyCountryIndeed ?? ""
|
||||
const defaultJobspyCountryIndeed = settings?.defaultJobspyCountryIndeed ?? ""
|
||||
const overrideJobspyCountryIndeed = settings?.overrideJobspyCountryIndeed
|
||||
const effectiveJobspyLinkedinFetchDescription = settings?.jobspyLinkedinFetchDescription ?? true
|
||||
const defaultJobspyLinkedinFetchDescription = settings?.defaultJobspyLinkedinFetchDescription ?? true
|
||||
const overrideJobspyLinkedinFetchDescription = settings?.overrideJobspyLinkedinFetchDescription
|
||||
const profileProjects = settings?.profileProjects ?? []
|
||||
const maxProjectsTotal = profileProjects.length
|
||||
const lockedCount = resumeProjectsDraft?.lockedProjectIds.length ?? 0
|
||||
@ -93,11 +128,20 @@ export const SettingsPage: React.FC = () => {
|
||||
const currentWebhook = (overridePipelineWebhookUrl ?? "").trim()
|
||||
const nextJobCompleteWebhook = jobCompleteWebhookUrlDraft.trim()
|
||||
const currentJobCompleteWebhook = (overrideJobCompleteWebhookUrl ?? "").trim()
|
||||
const ukvisajobsChanged = ukvisajobsMaxJobsDraft !== (overrideUkvisajobsMaxJobs ?? null)
|
||||
const searchTermsChanged = JSON.stringify(searchTermsDraft) !== JSON.stringify(overrideSearchTerms ?? null)
|
||||
return (
|
||||
next !== current ||
|
||||
nextWebhook !== currentWebhook ||
|
||||
nextJobCompleteWebhook !== currentJobCompleteWebhook ||
|
||||
!resumeProjectsEqual(resumeProjectsDraft, settings.resumeProjects)
|
||||
!resumeProjectsEqual(resumeProjectsDraft, settings.resumeProjects) ||
|
||||
ukvisajobsChanged ||
|
||||
searchTermsChanged ||
|
||||
jobspyLocationDraft !== (overrideJobspyLocation ?? null) ||
|
||||
jobspyResultsWantedDraft !== (overrideJobspyResultsWanted ?? null) ||
|
||||
jobspyHoursOldDraft !== (overrideJobspyHoursOld ?? null) ||
|
||||
jobspyCountryIndeedDraft !== (overrideJobspyCountryIndeed ?? null) ||
|
||||
jobspyLinkedinFetchDescriptionDraft !== (overrideJobspyLinkedinFetchDescription ?? null)
|
||||
)
|
||||
}, [
|
||||
settings,
|
||||
@ -108,6 +152,20 @@ export const SettingsPage: React.FC = () => {
|
||||
overridePipelineWebhookUrl,
|
||||
overrideJobCompleteWebhookUrl,
|
||||
resumeProjectsDraft,
|
||||
ukvisajobsMaxJobsDraft,
|
||||
overrideUkvisajobsMaxJobs,
|
||||
searchTermsDraft,
|
||||
overrideSearchTerms,
|
||||
jobspyLocationDraft,
|
||||
jobspyResultsWantedDraft,
|
||||
jobspyHoursOldDraft,
|
||||
jobspyCountryIndeedDraft,
|
||||
jobspyLinkedinFetchDescriptionDraft,
|
||||
overrideJobspyLocation,
|
||||
overrideJobspyResultsWanted,
|
||||
overrideJobspyHoursOld,
|
||||
overrideJobspyCountryIndeed,
|
||||
overrideJobspyLinkedinFetchDescription,
|
||||
])
|
||||
|
||||
const handleSave = async () => {
|
||||
@ -120,17 +178,38 @@ export const SettingsPage: React.FC = () => {
|
||||
const resumeProjectsOverride = resumeProjectsEqual(resumeProjectsDraft, settings.defaultResumeProjects)
|
||||
? null
|
||||
: resumeProjectsDraft
|
||||
const ukvisajobsMaxJobsOverride = ukvisajobsMaxJobsDraft === defaultUkvisajobsMaxJobs ? null : ukvisajobsMaxJobsDraft
|
||||
const searchTermsOverride = arraysEqual(searchTermsDraft ?? [], defaultSearchTerms) ? null : searchTermsDraft
|
||||
const jobspyLocationOverride = jobspyLocationDraft === defaultJobspyLocation ? null : jobspyLocationDraft
|
||||
const jobspyResultsWantedOverride = jobspyResultsWantedDraft === defaultJobspyResultsWanted ? null : jobspyResultsWantedDraft
|
||||
const jobspyHoursOldOverride = jobspyHoursOldDraft === defaultJobspyHoursOld ? null : jobspyHoursOldDraft
|
||||
const jobspyCountryIndeedOverride = jobspyCountryIndeedDraft === defaultJobspyCountryIndeed ? null : jobspyCountryIndeedDraft
|
||||
const jobspyLinkedinFetchDescriptionOverride = jobspyLinkedinFetchDescriptionDraft === defaultJobspyLinkedinFetchDescription ? null : jobspyLinkedinFetchDescriptionDraft
|
||||
const updated = await api.updateSettings({
|
||||
model: trimmed.length > 0 ? trimmed : null,
|
||||
pipelineWebhookUrl: webhookTrimmed.length > 0 ? webhookTrimmed : null,
|
||||
jobCompleteWebhookUrl: jobCompleteTrimmed.length > 0 ? jobCompleteTrimmed : null,
|
||||
resumeProjects: resumeProjectsOverride,
|
||||
ukvisajobsMaxJobs: ukvisajobsMaxJobsOverride,
|
||||
searchTerms: searchTermsOverride,
|
||||
jobspyLocation: jobspyLocationOverride,
|
||||
jobspyResultsWanted: jobspyResultsWantedOverride,
|
||||
jobspyHoursOld: jobspyHoursOldOverride,
|
||||
jobspyCountryIndeed: jobspyCountryIndeedOverride,
|
||||
jobspyLinkedinFetchDescription: jobspyLinkedinFetchDescriptionOverride,
|
||||
})
|
||||
setSettings(updated)
|
||||
setModelDraft(updated.overrideModel ?? "")
|
||||
setPipelineWebhookUrlDraft(updated.overridePipelineWebhookUrl ?? "")
|
||||
setJobCompleteWebhookUrlDraft(updated.overrideJobCompleteWebhookUrl ?? "")
|
||||
setResumeProjectsDraft(updated.resumeProjects)
|
||||
setUkvisajobsMaxJobsDraft(updated.overrideUkvisajobsMaxJobs)
|
||||
setSearchTermsDraft(updated.overrideSearchTerms)
|
||||
setJobspyLocationDraft(updated.overrideJobspyLocation)
|
||||
setJobspyResultsWantedDraft(updated.overrideJobspyResultsWanted)
|
||||
setJobspyHoursOldDraft(updated.overrideJobspyHoursOld)
|
||||
setJobspyCountryIndeedDraft(updated.overrideJobspyCountryIndeed)
|
||||
setJobspyLinkedinFetchDescriptionDraft(updated.overrideJobspyLinkedinFetchDescription)
|
||||
toast.success("Settings saved")
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : "Failed to save settings"
|
||||
@ -148,12 +227,26 @@ export const SettingsPage: React.FC = () => {
|
||||
pipelineWebhookUrl: null,
|
||||
jobCompleteWebhookUrl: null,
|
||||
resumeProjects: null,
|
||||
ukvisajobsMaxJobs: null,
|
||||
searchTerms: null,
|
||||
jobspyLocation: null,
|
||||
jobspyResultsWanted: null,
|
||||
jobspyHoursOld: null,
|
||||
jobspyCountryIndeed: null,
|
||||
jobspyLinkedinFetchDescription: null,
|
||||
})
|
||||
setSettings(updated)
|
||||
setModelDraft("")
|
||||
setPipelineWebhookUrlDraft("")
|
||||
setJobCompleteWebhookUrlDraft("")
|
||||
setResumeProjectsDraft(updated.resumeProjects)
|
||||
setUkvisajobsMaxJobsDraft(null)
|
||||
setSearchTermsDraft(null)
|
||||
setJobspyLocationDraft(null)
|
||||
setJobspyResultsWantedDraft(null)
|
||||
setJobspyHoursOldDraft(null)
|
||||
setJobspyCountryIndeedDraft(null)
|
||||
setJobspyLinkedinFetchDescriptionDraft(null)
|
||||
toast.success("Reset to default")
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : "Failed to reset settings"
|
||||
@ -272,6 +365,220 @@ export const SettingsPage: React.FC = () => {
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle className="text-base">UKVisaJobs Extractor</CardTitle>
|
||||
</CardHeader>
|
||||
|
||||
<CardContent className="space-y-4">
|
||||
<div className="space-y-2">
|
||||
<div className="text-sm font-medium">Max jobs to fetch</div>
|
||||
<Input
|
||||
type="number"
|
||||
inputMode="numeric"
|
||||
min={1}
|
||||
max={200}
|
||||
value={ukvisajobsMaxJobsDraft ?? defaultUkvisajobsMaxJobs}
|
||||
onChange={(event) => {
|
||||
const value = parseInt(event.target.value, 10)
|
||||
if (Number.isNaN(value)) {
|
||||
setUkvisajobsMaxJobsDraft(null)
|
||||
} else {
|
||||
setUkvisajobsMaxJobsDraft(Math.min(200, Math.max(1, value)))
|
||||
}
|
||||
}}
|
||||
disabled={isLoading || isSaving}
|
||||
/>
|
||||
<div className="text-xs text-muted-foreground">
|
||||
Maximum number of jobs to fetch from UKVisaJobs per pipeline run. Range: 1-200.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<Separator />
|
||||
|
||||
<div className="grid gap-2 text-sm sm:grid-cols-2">
|
||||
<div>
|
||||
<div className="text-xs text-muted-foreground">Effective</div>
|
||||
<div className="break-words font-mono text-xs">{effectiveUkvisajobsMaxJobs}</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-xs text-muted-foreground">Default</div>
|
||||
<div className="break-words font-mono text-xs">{defaultUkvisajobsMaxJobs}</div>
|
||||
</div>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle className="text-base">Search Terms</CardTitle>
|
||||
</CardHeader>
|
||||
|
||||
<CardContent className="space-y-4">
|
||||
<div className="space-y-2">
|
||||
<div className="text-sm font-medium">Global search terms</div>
|
||||
<textarea
|
||||
className="flex min-h-[80px] w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50"
|
||||
value={searchTermsDraft ? searchTermsDraft.join('\n') : (defaultSearchTerms ?? []).join('\n')}
|
||||
onChange={(event) => {
|
||||
const text = event.target.value
|
||||
const terms = text.split('\n') // Don't filter here to allow empty lines while typing
|
||||
setSearchTermsDraft(terms)
|
||||
}}
|
||||
onBlur={() => {
|
||||
// Clean up on blur
|
||||
if (searchTermsDraft) {
|
||||
setSearchTermsDraft(searchTermsDraft.map(t => t.trim()).filter(Boolean))
|
||||
}
|
||||
}}
|
||||
placeholder="e.g. web developer"
|
||||
disabled={isLoading || isSaving}
|
||||
rows={5}
|
||||
/>
|
||||
<div className="text-xs text-muted-foreground">
|
||||
One term per line. Applies to UKVisaJobs and other supported extractors.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<Separator />
|
||||
|
||||
<div className="grid gap-2 text-sm sm:grid-cols-2">
|
||||
<div>
|
||||
<div className="text-xs text-muted-foreground">Effective</div>
|
||||
<div className="break-words font-mono text-xs">{(effectiveSearchTerms || []).join(', ') || "—"}</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-xs text-muted-foreground">Default (env)</div>
|
||||
<div className="break-words font-mono text-xs">{(defaultSearchTerms || []).join(', ') || "—"}</div>
|
||||
</div>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle className="text-base">JobSpy Scraper</CardTitle>
|
||||
</CardHeader>
|
||||
|
||||
<CardContent className="space-y-6">
|
||||
<div className="grid gap-6 md:grid-cols-2">
|
||||
<div className="space-y-2">
|
||||
<div className="text-sm font-medium">Location</div>
|
||||
<Input
|
||||
value={jobspyLocationDraft ?? defaultJobspyLocation}
|
||||
onChange={(event) => setJobspyLocationDraft(event.target.value)}
|
||||
placeholder={defaultJobspyLocation || "UK"}
|
||||
disabled={isLoading || isSaving}
|
||||
/>
|
||||
<div className="text-xs text-muted-foreground">
|
||||
Location to search for jobs (e.g. "UK", "London", "Remote").
|
||||
</div>
|
||||
<div className="flex gap-2 text-xs text-muted-foreground">
|
||||
<span>Effective: {effectiveJobspyLocation || "—"}</span>
|
||||
<span>Default: {defaultJobspyLocation || "—"}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="space-y-2">
|
||||
<div className="text-sm font-medium">Results Wanted</div>
|
||||
<Input
|
||||
type="number"
|
||||
inputMode="numeric"
|
||||
min={1}
|
||||
max={500}
|
||||
value={jobspyResultsWantedDraft ?? defaultJobspyResultsWanted}
|
||||
onChange={(event) => {
|
||||
const value = parseInt(event.target.value, 10)
|
||||
if (Number.isNaN(value)) {
|
||||
setJobspyResultsWantedDraft(null)
|
||||
} else {
|
||||
setJobspyResultsWantedDraft(Math.min(500, Math.max(1, value)))
|
||||
}
|
||||
}}
|
||||
disabled={isLoading || isSaving}
|
||||
/>
|
||||
<div className="text-xs text-muted-foreground">
|
||||
Number of results to fetch per term per site. Max 500.
|
||||
</div>
|
||||
<div className="flex gap-2 text-xs text-muted-foreground">
|
||||
<span>Effective: {effectiveJobspyResultsWanted}</span>
|
||||
<span>Default: {defaultJobspyResultsWanted}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="space-y-2">
|
||||
<div className="text-sm font-medium">Hours Old</div>
|
||||
<Input
|
||||
type="number"
|
||||
inputMode="numeric"
|
||||
min={1}
|
||||
max={168}
|
||||
value={jobspyHoursOldDraft ?? defaultJobspyHoursOld}
|
||||
onChange={(event) => {
|
||||
const value = parseInt(event.target.value, 10)
|
||||
if (Number.isNaN(value)) {
|
||||
setJobspyHoursOldDraft(null)
|
||||
} else {
|
||||
setJobspyHoursOldDraft(Math.min(168, Math.max(1, value)))
|
||||
}
|
||||
}}
|
||||
disabled={isLoading || isSaving}
|
||||
/>
|
||||
<div className="text-xs text-muted-foreground">
|
||||
Max age of jobs in hours (e.g. 72 for 3 days).
|
||||
</div>
|
||||
<div className="flex gap-2 text-xs text-muted-foreground">
|
||||
<span>Effective: {effectiveJobspyHoursOld}h</span>
|
||||
<span>Default: {defaultJobspyHoursOld}h</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="space-y-2">
|
||||
<div className="text-sm font-medium">Indeed Country</div>
|
||||
<Input
|
||||
value={jobspyCountryIndeedDraft ?? defaultJobspyCountryIndeed}
|
||||
onChange={(event) => setJobspyCountryIndeedDraft(event.target.value)}
|
||||
placeholder={defaultJobspyCountryIndeed || "UK"}
|
||||
disabled={isLoading || isSaving}
|
||||
/>
|
||||
<div className="text-xs text-muted-foreground">
|
||||
Country domain for Indeed (e.g. "UK" for indeed.co.uk).
|
||||
</div>
|
||||
<div className="flex gap-2 text-xs text-muted-foreground">
|
||||
<span>Effective: {effectiveJobspyCountryIndeed || "—"}</span>
|
||||
<span>Default: {defaultJobspyCountryIndeed || "—"}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<Separator />
|
||||
|
||||
<div className="flex items-center space-x-2">
|
||||
<Checkbox
|
||||
id="linkedin-desc"
|
||||
checked={jobspyLinkedinFetchDescriptionDraft ?? defaultJobspyLinkedinFetchDescription}
|
||||
onCheckedChange={(checked) => setJobspyLinkedinFetchDescriptionDraft(!!checked)}
|
||||
disabled={isLoading || isSaving}
|
||||
/>
|
||||
<div className="grid gap-1.5 leading-none">
|
||||
<label
|
||||
htmlFor="linkedin-desc"
|
||||
className="text-sm font-medium leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70"
|
||||
>
|
||||
Fetch LinkedIn Description
|
||||
</label>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
If enabled, JobSpy will make extra requests to fetch full descriptions. Slower but better data.
|
||||
</p>
|
||||
<div className="flex gap-2 text-xs text-muted-foreground">
|
||||
<span>Effective: {effectiveJobspyLinkedinFetchDescription ? "Yes" : "No"}</span>
|
||||
<span>Default: {defaultJobspyLinkedinFetchDescription ? "Yes" : "No"}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle className="text-base">Resume Projects</CardTitle>
|
||||
|
||||
@ -60,10 +60,10 @@ apiRouter.get('/jobs', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const statusFilter = req.query.status as string | undefined;
|
||||
const statuses = statusFilter?.split(',').filter(Boolean) as JobStatus[] | undefined;
|
||||
|
||||
|
||||
const jobs = await jobsRepo.getAllJobs(statuses);
|
||||
const stats = await jobsRepo.getJobStats();
|
||||
|
||||
|
||||
const response: ApiResponse<JobsListResponse> = {
|
||||
success: true,
|
||||
data: {
|
||||
@ -72,7 +72,7 @@ apiRouter.get('/jobs', async (req: Request, res: Response) => {
|
||||
byStatus: stats,
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
res.json(response);
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
@ -86,11 +86,11 @@ apiRouter.get('/jobs', async (req: Request, res: Response) => {
|
||||
apiRouter.get('/jobs/:id', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const job = await jobsRepo.getJobById(req.params.id);
|
||||
|
||||
|
||||
if (!job) {
|
||||
return res.status(404).json({ success: false, error: 'Job not found' });
|
||||
}
|
||||
|
||||
|
||||
res.json({ success: true, data: job });
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
@ -113,11 +113,11 @@ apiRouter.patch('/jobs/:id', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const input = updateJobSchema.parse(req.body);
|
||||
const job = await jobsRepo.updateJob(req.params.id, input);
|
||||
|
||||
|
||||
if (!job) {
|
||||
return res.status(404).json({ success: false, error: 'Job not found' });
|
||||
}
|
||||
|
||||
|
||||
res.json({ success: true, data: job });
|
||||
} catch (error) {
|
||||
if (error instanceof z.ZodError) {
|
||||
@ -137,11 +137,11 @@ apiRouter.post('/jobs/:id/process', async (req: Request, res: Response) => {
|
||||
const force = forceRaw === '1' || forceRaw === 'true';
|
||||
|
||||
const result = await processJob(req.params.id, { force });
|
||||
|
||||
|
||||
if (!result.success) {
|
||||
return res.status(400).json({ success: false, error: result.error });
|
||||
}
|
||||
|
||||
|
||||
const job = await jobsRepo.getJobById(req.params.id);
|
||||
res.json({ success: true, data: job });
|
||||
} catch (error) {
|
||||
@ -156,13 +156,13 @@ apiRouter.post('/jobs/:id/process', async (req: Request, res: Response) => {
|
||||
apiRouter.post('/jobs/:id/apply', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const job = await jobsRepo.getJobById(req.params.id);
|
||||
|
||||
|
||||
if (!job) {
|
||||
return res.status(404).json({ success: false, error: 'Job not found' });
|
||||
}
|
||||
|
||||
|
||||
const appliedAt = new Date().toISOString();
|
||||
|
||||
|
||||
// Sync to Notion
|
||||
const notionResult = await createNotionEntry({
|
||||
id: job.id,
|
||||
@ -175,7 +175,7 @@ apiRouter.post('/jobs/:id/apply', async (req: Request, res: Response) => {
|
||||
pdfPath: job.pdfPath,
|
||||
appliedAt,
|
||||
});
|
||||
|
||||
|
||||
// Update job status
|
||||
const updatedJob = await jobsRepo.updateJob(job.id, {
|
||||
status: 'applied',
|
||||
@ -186,7 +186,7 @@ apiRouter.post('/jobs/:id/apply', async (req: Request, res: Response) => {
|
||||
if (updatedJob) {
|
||||
notifyJobCompleteWebhook(updatedJob).catch(console.warn)
|
||||
}
|
||||
|
||||
|
||||
res.json({ success: true, data: updatedJob });
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
@ -200,11 +200,11 @@ apiRouter.post('/jobs/:id/apply', async (req: Request, res: Response) => {
|
||||
apiRouter.post('/jobs/:id/reject', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const job = await jobsRepo.updateJob(req.params.id, { status: 'rejected' });
|
||||
|
||||
|
||||
if (!job) {
|
||||
return res.status(404).json({ success: false, error: 'Job not found' });
|
||||
}
|
||||
|
||||
|
||||
res.json({ success: true, data: job });
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
@ -238,6 +238,43 @@ apiRouter.get('/settings', async (_req: Request, res: Response) => {
|
||||
const overrideResumeProjectsRaw = await settingsRepo.getSetting('resumeProjects');
|
||||
const resumeProjectsData = resolveResumeProjectsSettings({ catalog, overrideRaw: overrideResumeProjectsRaw });
|
||||
|
||||
const overrideUkvisajobsMaxJobsRaw = await settingsRepo.getSetting('ukvisajobsMaxJobs');
|
||||
const defaultUkvisajobsMaxJobs = 50;
|
||||
const overrideUkvisajobsMaxJobs = overrideUkvisajobsMaxJobsRaw ? parseInt(overrideUkvisajobsMaxJobsRaw, 10) : null;
|
||||
const ukvisajobsMaxJobs = overrideUkvisajobsMaxJobs ?? defaultUkvisajobsMaxJobs;
|
||||
|
||||
const overrideSearchTermsRaw = await settingsRepo.getSetting('searchTerms');
|
||||
const defaultSearchTermsEnv = process.env.JOBSPY_SEARCH_TERMS || 'web developer';
|
||||
const defaultSearchTerms = defaultSearchTermsEnv.split('|').map(s => s.trim()).filter(Boolean);
|
||||
const overrideSearchTerms = overrideSearchTermsRaw ? JSON.parse(overrideSearchTermsRaw) as string[] : null;
|
||||
const searchTerms = overrideSearchTerms ?? defaultSearchTerms;
|
||||
|
||||
// JobSpy settings
|
||||
const overrideJobspyLocation = await settingsRepo.getSetting('jobspyLocation');
|
||||
const defaultJobspyLocation = process.env.JOBSPY_LOCATION || 'UK';
|
||||
const jobspyLocation = overrideJobspyLocation || defaultJobspyLocation;
|
||||
|
||||
const overrideJobspyResultsWantedRaw = await settingsRepo.getSetting('jobspyResultsWanted');
|
||||
const defaultJobspyResultsWanted = parseInt(process.env.JOBSPY_RESULTS_WANTED || '200', 10);
|
||||
const overrideJobspyResultsWanted = overrideJobspyResultsWantedRaw ? parseInt(overrideJobspyResultsWantedRaw, 10) : null;
|
||||
const jobspyResultsWanted = overrideJobspyResultsWanted ?? defaultJobspyResultsWanted;
|
||||
|
||||
const overrideJobspyHoursOldRaw = await settingsRepo.getSetting('jobspyHoursOld');
|
||||
const defaultJobspyHoursOld = parseInt(process.env.JOBSPY_HOURS_OLD || '72', 10);
|
||||
const overrideJobspyHoursOld = overrideJobspyHoursOldRaw ? parseInt(overrideJobspyHoursOldRaw, 10) : null;
|
||||
const jobspyHoursOld = overrideJobspyHoursOld ?? defaultJobspyHoursOld;
|
||||
|
||||
const overrideJobspyCountryIndeed = await settingsRepo.getSetting('jobspyCountryIndeed');
|
||||
const defaultJobspyCountryIndeed = process.env.JOBSPY_COUNTRY_INDEED || 'UK';
|
||||
const jobspyCountryIndeed = overrideJobspyCountryIndeed || defaultJobspyCountryIndeed;
|
||||
|
||||
const overrideJobspyLinkedinFetchDescriptionRaw = await settingsRepo.getSetting('jobspyLinkedinFetchDescription');
|
||||
const defaultJobspyLinkedinFetchDescription = (process.env.JOBSPY_LINKEDIN_FETCH_DESCRIPTION || '1') === '1';
|
||||
const overrideJobspyLinkedinFetchDescription = overrideJobspyLinkedinFetchDescriptionRaw
|
||||
? overrideJobspyLinkedinFetchDescriptionRaw === 'true' || overrideJobspyLinkedinFetchDescriptionRaw === '1'
|
||||
: null;
|
||||
const jobspyLinkedinFetchDescription = overrideJobspyLinkedinFetchDescription ?? defaultJobspyLinkedinFetchDescription;
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
@ -251,6 +288,27 @@ apiRouter.get('/settings', async (_req: Request, res: Response) => {
|
||||
defaultJobCompleteWebhookUrl,
|
||||
overrideJobCompleteWebhookUrl,
|
||||
...resumeProjectsData,
|
||||
ukvisajobsMaxJobs,
|
||||
defaultUkvisajobsMaxJobs,
|
||||
overrideUkvisajobsMaxJobs,
|
||||
searchTerms,
|
||||
defaultSearchTerms,
|
||||
overrideSearchTerms,
|
||||
jobspyLocation,
|
||||
defaultJobspyLocation,
|
||||
overrideJobspyLocation,
|
||||
jobspyResultsWanted,
|
||||
defaultJobspyResultsWanted,
|
||||
overrideJobspyResultsWanted,
|
||||
jobspyHoursOld,
|
||||
defaultJobspyHoursOld,
|
||||
overrideJobspyHoursOld,
|
||||
jobspyCountryIndeed,
|
||||
defaultJobspyCountryIndeed,
|
||||
overrideJobspyCountryIndeed,
|
||||
jobspyLinkedinFetchDescription,
|
||||
defaultJobspyLinkedinFetchDescription,
|
||||
overrideJobspyLinkedinFetchDescription,
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
@ -268,6 +326,13 @@ const updateSettingsSchema = z.object({
|
||||
lockedProjectIds: z.array(z.string().trim().min(1)).max(200),
|
||||
aiSelectableProjectIds: z.array(z.string().trim().min(1)).max(200),
|
||||
}).nullable().optional(),
|
||||
ukvisajobsMaxJobs: z.number().int().min(1).max(200).nullable().optional(),
|
||||
searchTerms: z.array(z.string().trim().min(1).max(200)).max(50).nullable().optional(),
|
||||
jobspyLocation: z.string().trim().min(1).max(100).nullable().optional(),
|
||||
jobspyResultsWanted: z.number().int().min(1).max(500).nullable().optional(),
|
||||
jobspyHoursOld: z.number().int().min(1).max(168).nullable().optional(),
|
||||
jobspyCountryIndeed: z.string().trim().min(1).max(100).nullable().optional(),
|
||||
jobspyLinkedinFetchDescription: z.boolean().nullable().optional(),
|
||||
});
|
||||
|
||||
/**
|
||||
@ -306,6 +371,41 @@ apiRouter.patch('/settings', async (req: Request, res: Response) => {
|
||||
}
|
||||
}
|
||||
|
||||
if ('ukvisajobsMaxJobs' in input) {
|
||||
const ukvisajobsMaxJobs = input.ukvisajobsMaxJobs ?? null;
|
||||
await settingsRepo.setSetting('ukvisajobsMaxJobs', ukvisajobsMaxJobs !== null ? String(ukvisajobsMaxJobs) : null);
|
||||
}
|
||||
|
||||
if ('searchTerms' in input) {
|
||||
const searchTerms = input.searchTerms ?? null;
|
||||
await settingsRepo.setSetting('searchTerms', searchTerms !== null ? JSON.stringify(searchTerms) : null);
|
||||
}
|
||||
|
||||
if ('jobspyLocation' in input) {
|
||||
const value = input.jobspyLocation ?? null;
|
||||
await settingsRepo.setSetting('jobspyLocation', value);
|
||||
}
|
||||
|
||||
if ('jobspyResultsWanted' in input) {
|
||||
const value = input.jobspyResultsWanted ?? null;
|
||||
await settingsRepo.setSetting('jobspyResultsWanted', value !== null ? String(value) : null);
|
||||
}
|
||||
|
||||
if ('jobspyHoursOld' in input) {
|
||||
const value = input.jobspyHoursOld ?? null;
|
||||
await settingsRepo.setSetting('jobspyHoursOld', value !== null ? String(value) : null);
|
||||
}
|
||||
|
||||
if ('jobspyCountryIndeed' in input) {
|
||||
const value = input.jobspyCountryIndeed ?? null;
|
||||
await settingsRepo.setSetting('jobspyCountryIndeed', value);
|
||||
}
|
||||
|
||||
if ('jobspyLinkedinFetchDescription' in input) {
|
||||
const value = input.jobspyLinkedinFetchDescription ?? null;
|
||||
await settingsRepo.setSetting('jobspyLinkedinFetchDescription', value !== null ? (value ? '1' : '0') : null);
|
||||
}
|
||||
|
||||
const overrideModel = await settingsRepo.getSetting('model');
|
||||
const defaultModel = process.env.MODEL || 'openai/gpt-4o-mini';
|
||||
const model = overrideModel || defaultModel;
|
||||
@ -323,6 +423,44 @@ apiRouter.patch('/settings', async (req: Request, res: Response) => {
|
||||
const overrideResumeProjectsRaw = await settingsRepo.getSetting('resumeProjects');
|
||||
const resumeProjectsData = resolveResumeProjectsSettings({ catalog, overrideRaw: overrideResumeProjectsRaw });
|
||||
|
||||
const overrideUkvisajobsMaxJobsRaw = await settingsRepo.getSetting('ukvisajobsMaxJobs');
|
||||
const defaultUkvisajobsMaxJobs = 50;
|
||||
const overrideUkvisajobsMaxJobs = overrideUkvisajobsMaxJobsRaw ? parseInt(overrideUkvisajobsMaxJobsRaw, 10) : null;
|
||||
const ukvisajobsMaxJobs = overrideUkvisajobsMaxJobs ?? defaultUkvisajobsMaxJobs;
|
||||
|
||||
// Search terms - stored as JSON array, default from env var (pipe-separated)
|
||||
const overrideSearchTermsRaw = await settingsRepo.getSetting('searchTerms');
|
||||
const defaultSearchTermsEnv = process.env.JOBSPY_SEARCH_TERMS || 'web developer';
|
||||
const defaultSearchTerms = defaultSearchTermsEnv.split('|').map(s => s.trim()).filter(Boolean);
|
||||
const overrideSearchTerms = overrideSearchTermsRaw ? JSON.parse(overrideSearchTermsRaw) as string[] : null;
|
||||
const searchTerms = overrideSearchTerms ?? defaultSearchTerms;
|
||||
|
||||
// JobSpy settings (re-fetch to update response)
|
||||
const overrideJobspyLocation = await settingsRepo.getSetting('jobspyLocation');
|
||||
const defaultJobspyLocation = process.env.JOBSPY_LOCATION || 'UK';
|
||||
const jobspyLocation = overrideJobspyLocation || defaultJobspyLocation;
|
||||
|
||||
const overrideJobspyResultsWantedRaw = await settingsRepo.getSetting('jobspyResultsWanted');
|
||||
const defaultJobspyResultsWanted = parseInt(process.env.JOBSPY_RESULTS_WANTED || '200', 10);
|
||||
const overrideJobspyResultsWanted = overrideJobspyResultsWantedRaw ? parseInt(overrideJobspyResultsWantedRaw, 10) : null;
|
||||
const jobspyResultsWanted = overrideJobspyResultsWanted ?? defaultJobspyResultsWanted;
|
||||
|
||||
const overrideJobspyHoursOldRaw = await settingsRepo.getSetting('jobspyHoursOld');
|
||||
const defaultJobspyHoursOld = parseInt(process.env.JOBSPY_HOURS_OLD || '72', 10);
|
||||
const overrideJobspyHoursOld = overrideJobspyHoursOldRaw ? parseInt(overrideJobspyHoursOldRaw, 10) : null;
|
||||
const jobspyHoursOld = overrideJobspyHoursOld ?? defaultJobspyHoursOld;
|
||||
|
||||
const overrideJobspyCountryIndeed = await settingsRepo.getSetting('jobspyCountryIndeed');
|
||||
const defaultJobspyCountryIndeed = process.env.JOBSPY_COUNTRY_INDEED || 'UK';
|
||||
const jobspyCountryIndeed = overrideJobspyCountryIndeed || defaultJobspyCountryIndeed;
|
||||
|
||||
const overrideJobspyLinkedinFetchDescriptionRaw = await settingsRepo.getSetting('jobspyLinkedinFetchDescription');
|
||||
const defaultJobspyLinkedinFetchDescription = (process.env.JOBSPY_LINKEDIN_FETCH_DESCRIPTION || '1') === '1';
|
||||
const overrideJobspyLinkedinFetchDescription = overrideJobspyLinkedinFetchDescriptionRaw
|
||||
? overrideJobspyLinkedinFetchDescriptionRaw === 'true' || overrideJobspyLinkedinFetchDescriptionRaw === '1'
|
||||
: null;
|
||||
const jobspyLinkedinFetchDescription = overrideJobspyLinkedinFetchDescription ?? defaultJobspyLinkedinFetchDescription;
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
@ -336,6 +474,27 @@ apiRouter.patch('/settings', async (req: Request, res: Response) => {
|
||||
defaultJobCompleteWebhookUrl,
|
||||
overrideJobCompleteWebhookUrl,
|
||||
...resumeProjectsData,
|
||||
ukvisajobsMaxJobs,
|
||||
defaultUkvisajobsMaxJobs,
|
||||
overrideUkvisajobsMaxJobs,
|
||||
searchTerms,
|
||||
defaultSearchTerms,
|
||||
overrideSearchTerms,
|
||||
jobspyLocation,
|
||||
defaultJobspyLocation,
|
||||
overrideJobspyLocation,
|
||||
jobspyResultsWanted,
|
||||
defaultJobspyResultsWanted,
|
||||
overrideJobspyResultsWanted,
|
||||
jobspyHoursOld,
|
||||
defaultJobspyHoursOld,
|
||||
overrideJobspyHoursOld,
|
||||
jobspyCountryIndeed,
|
||||
defaultJobspyCountryIndeed,
|
||||
overrideJobspyCountryIndeed,
|
||||
jobspyLinkedinFetchDescription,
|
||||
defaultJobspyLinkedinFetchDescription,
|
||||
overrideJobspyLinkedinFetchDescription,
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
@ -351,7 +510,7 @@ apiRouter.get('/pipeline/status', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { isRunning } = getPipelineStatus();
|
||||
const lastRun = await pipelineRepo.getLatestPipelineRun();
|
||||
|
||||
|
||||
const response: ApiResponse<PipelineStatusResponse> = {
|
||||
success: true,
|
||||
data: {
|
||||
@ -360,7 +519,7 @@ apiRouter.get('/pipeline/status', async (req: Request, res: Response) => {
|
||||
nextScheduledRun: null, // Would come from n8n
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
res.json(response);
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
@ -377,20 +536,20 @@ apiRouter.get('/pipeline/progress', (req: Request, res: Response) => {
|
||||
res.setHeader('Cache-Control', 'no-cache');
|
||||
res.setHeader('Connection', 'keep-alive');
|
||||
res.setHeader('X-Accel-Buffering', 'no'); // Disable Nginx buffering
|
||||
|
||||
|
||||
// Send initial progress
|
||||
const sendProgress = (data: unknown) => {
|
||||
res.write(`data: ${JSON.stringify(data)}\n\n`);
|
||||
};
|
||||
|
||||
|
||||
// Subscribe to progress updates
|
||||
const unsubscribe = subscribeToProgress(sendProgress);
|
||||
|
||||
|
||||
// Send heartbeat every 30 seconds to keep connection alive
|
||||
const heartbeat = setInterval(() => {
|
||||
res.write(': heartbeat\n\n');
|
||||
}, 30000);
|
||||
|
||||
|
||||
// Cleanup on close
|
||||
req.on('close', () => {
|
||||
clearInterval(heartbeat);
|
||||
@ -417,19 +576,19 @@ apiRouter.get('/pipeline/runs', async (req: Request, res: Response) => {
|
||||
const runPipelineSchema = z.object({
|
||||
topN: z.number().min(1).max(50).optional(),
|
||||
minSuitabilityScore: z.number().min(0).max(100).optional(),
|
||||
sources: z.array(z.enum(['gradcracker', 'indeed', 'linkedin'])).min(1).optional(),
|
||||
sources: z.array(z.enum(['gradcracker', 'indeed', 'linkedin', 'ukvisajobs'])).min(1).optional(),
|
||||
});
|
||||
|
||||
apiRouter.post('/pipeline/run', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const config = runPipelineSchema.parse(req.body);
|
||||
|
||||
|
||||
// Start pipeline in background
|
||||
runPipeline(config).catch(console.error);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: { message: 'Pipeline started' }
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: { message: 'Pipeline started' }
|
||||
});
|
||||
} catch (error) {
|
||||
if (error instanceof z.ZodError) {
|
||||
@ -451,21 +610,21 @@ apiRouter.post('/webhook/trigger', async (req: Request, res: Response) => {
|
||||
// Optional: Add authentication check
|
||||
const authHeader = req.headers.authorization;
|
||||
const expectedToken = process.env.WEBHOOK_SECRET;
|
||||
|
||||
|
||||
if (expectedToken && authHeader !== `Bearer ${expectedToken}`) {
|
||||
return res.status(401).json({ success: false, error: 'Unauthorized' });
|
||||
}
|
||||
|
||||
|
||||
try {
|
||||
// Start pipeline in background
|
||||
runPipeline().catch(console.error);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
message: 'Pipeline triggered',
|
||||
triggeredAt: new Date().toISOString(),
|
||||
}
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
@ -483,14 +642,14 @@ apiRouter.post('/webhook/trigger', async (req: Request, res: Response) => {
|
||||
apiRouter.delete('/database', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const result = clearDatabase();
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
message: 'Database cleared',
|
||||
jobsDeleted: result.jobsDeleted,
|
||||
runsDeleted: result.runsDeleted,
|
||||
}
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
|
||||
@ -7,9 +7,9 @@ import { sql } from 'drizzle-orm';
|
||||
|
||||
export const jobs = sqliteTable('jobs', {
|
||||
id: text('id').primaryKey(),
|
||||
|
||||
|
||||
// From crawler
|
||||
source: text('source', { enum: ['gradcracker', 'indeed', 'linkedin'] }).notNull().default('gradcracker'),
|
||||
source: text('source', { enum: ['gradcracker', 'indeed', 'linkedin', 'ukvisajobs'] }).notNull().default('gradcracker'),
|
||||
sourceJobId: text('source_job_id'),
|
||||
jobUrlDirect: text('job_url_direct'),
|
||||
datePosted: text('date_posted'),
|
||||
@ -51,17 +51,17 @@ export const jobs = sqliteTable('jobs', {
|
||||
companyReviewsCount: integer('company_reviews_count'),
|
||||
vacancyCount: integer('vacancy_count'),
|
||||
workFromHomeType: text('work_from_home_type'),
|
||||
|
||||
|
||||
// Orchestrator enrichments
|
||||
status: text('status', {
|
||||
enum: ['discovered', 'processing', 'ready', 'applied', 'rejected', 'expired']
|
||||
status: text('status', {
|
||||
enum: ['discovered', 'processing', 'ready', 'applied', 'rejected', 'expired']
|
||||
}).notNull().default('discovered'),
|
||||
suitabilityScore: real('suitability_score'),
|
||||
suitabilityReason: text('suitability_reason'),
|
||||
tailoredSummary: text('tailored_summary'),
|
||||
pdfPath: text('pdf_path'),
|
||||
notionPageId: text('notion_page_id'),
|
||||
|
||||
|
||||
// Timestamps
|
||||
discoveredAt: text('discovered_at').notNull().default(sql`(datetime('now'))`),
|
||||
processedAt: text('processed_at'),
|
||||
|
||||
@ -12,6 +12,7 @@ import { join, dirname } from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
import { runCrawler } from '../services/crawler.js';
|
||||
import { runJobSpy } from '../services/jobspy.js';
|
||||
import { runUkVisaJobs } from '../services/ukvisajobs.js';
|
||||
import { scoreJobSuitability } from '../services/scorer.js';
|
||||
import { generateSummary } from '../services/summary.js';
|
||||
import { generatePdf } from '../services/pdf.js';
|
||||
@ -27,7 +28,7 @@ const DEFAULT_PROFILE_PATH = join(__dirname, '../../../../resume-generator/base.
|
||||
const DEFAULT_CONFIG: PipelineConfig = {
|
||||
topN: 10,
|
||||
minSuitabilityScore: 50,
|
||||
sources: ['gradcracker', 'indeed', 'linkedin'],
|
||||
sources: ['gradcracker', 'indeed', 'linkedin', 'ukvisajobs'],
|
||||
profilePath: DEFAULT_PROFILE_PATH,
|
||||
outputDir: join(__dirname, '../../../data/pdfs'),
|
||||
};
|
||||
@ -88,54 +89,41 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
|
||||
error: 'Pipeline is already running',
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
isPipelineRunning = true;
|
||||
resetProgress();
|
||||
const mergedConfig = { ...DEFAULT_CONFIG, ...config };
|
||||
|
||||
|
||||
// Create pipeline run record
|
||||
const pipelineRun = await pipelineRepo.createPipelineRun();
|
||||
|
||||
|
||||
console.log('🚀 Starting job pipeline...');
|
||||
console.log(` Config: topN=${mergedConfig.topN}, minScore=${mergedConfig.minSuitabilityScore} (manual processing)`);
|
||||
|
||||
|
||||
try {
|
||||
// Step 1: Load profile
|
||||
console.log('\n📋 Loading profile...');
|
||||
const profile = await loadProfile(mergedConfig.profilePath);
|
||||
|
||||
|
||||
// Step 2: Run crawler
|
||||
console.log('\n🕷️ Running crawler...');
|
||||
progressHelpers.startCrawling();
|
||||
const existingJobUrls = await jobsRepo.getAllJobUrls();
|
||||
|
||||
const discoveredJobs: CreateJobInput[] = [];
|
||||
const sourceErrors: string[] = [];
|
||||
|
||||
if (mergedConfig.sources.includes('gradcracker')) {
|
||||
const crawlerResult = await runCrawler({
|
||||
existingJobUrls,
|
||||
onProgress: (update) => {
|
||||
progressHelpers.crawlingUpdate({
|
||||
listPagesProcessed: update.listPagesProcessed,
|
||||
listPagesTotal: update.listPagesTotal,
|
||||
jobCardsFound: update.jobCardsFound,
|
||||
jobPagesEnqueued: update.jobPagesEnqueued,
|
||||
jobPagesSkipped: update.jobPagesSkipped,
|
||||
jobPagesProcessed: update.jobPagesProcessed,
|
||||
phase: update.phase,
|
||||
currentUrl: update.currentUrl,
|
||||
});
|
||||
},
|
||||
});
|
||||
// Read search terms setting
|
||||
const searchTermsSetting = await settingsRepo.getSetting('searchTerms');
|
||||
let searchTerms: string[] = [];
|
||||
|
||||
if (!crawlerResult.success) {
|
||||
sourceErrors.push(`gradcracker: ${crawlerResult.error ?? 'unknown error'}`);
|
||||
} else {
|
||||
discoveredJobs.push(...crawlerResult.jobs);
|
||||
}
|
||||
if (searchTermsSetting) {
|
||||
searchTerms = JSON.parse(searchTermsSetting) as string[];
|
||||
} else {
|
||||
// Default from env var
|
||||
const defaultSearchTermsEnv = process.env.JOBSPY_SEARCH_TERMS || 'web developer';
|
||||
searchTerms = defaultSearchTermsEnv.split('|').map(s => s.trim()).filter(Boolean);
|
||||
}
|
||||
|
||||
// Run JobSpy (Indeed/LinkedIn) if selected
|
||||
const jobSpySites = mergedConfig.sources.filter(
|
||||
(s): s is 'indeed' | 'linkedin' => s === 'indeed' || s === 'linkedin'
|
||||
);
|
||||
@ -146,7 +134,21 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
|
||||
detail: `JobSpy: scraping ${jobSpySites.join(', ')}...`,
|
||||
});
|
||||
|
||||
const jobSpyResult = await runJobSpy({ sites: jobSpySites });
|
||||
const jobspyLocationSetting = await settingsRepo.getSetting('jobspyLocation');
|
||||
const jobspyResultsWantedSetting = await settingsRepo.getSetting('jobspyResultsWanted');
|
||||
const jobspyHoursOldSetting = await settingsRepo.getSetting('jobspyHoursOld');
|
||||
const jobspyCountryIndeedSetting = await settingsRepo.getSetting('jobspyCountryIndeed');
|
||||
const jobspyLinkedinFetchDescriptionSetting = await settingsRepo.getSetting('jobspyLinkedinFetchDescription');
|
||||
|
||||
const jobSpyResult = await runJobSpy({
|
||||
sites: jobSpySites,
|
||||
searchTerms,
|
||||
location: jobspyLocationSetting ?? undefined,
|
||||
resultsWanted: jobspyResultsWantedSetting ? parseInt(jobspyResultsWantedSetting, 10) : undefined,
|
||||
hoursOld: jobspyHoursOldSetting ? parseInt(jobspyHoursOldSetting, 10) : undefined,
|
||||
countryIndeed: jobspyCountryIndeedSetting ?? undefined,
|
||||
linkedinFetchDescription: jobspyLinkedinFetchDescriptionSetting !== null ? jobspyLinkedinFetchDescriptionSetting === '1' : undefined,
|
||||
});
|
||||
if (!jobSpyResult.success) {
|
||||
sourceErrors.push(`jobspy: ${jobSpyResult.error ?? 'unknown error'}`);
|
||||
} else {
|
||||
@ -154,6 +156,61 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
|
||||
}
|
||||
}
|
||||
|
||||
// Run Gradcracker crawler if selected
|
||||
if (mergedConfig.sources.includes('gradcracker')) {
|
||||
updateProgress({
|
||||
step: 'crawling',
|
||||
detail: 'Gradcracker: scraping...',
|
||||
});
|
||||
|
||||
// Pass existing URLs to avoid clicking "Apply" on jobs we already have
|
||||
const existingJobUrls = await jobsRepo.getAllJobUrls();
|
||||
|
||||
const crawlerResult = await runCrawler({
|
||||
existingJobUrls,
|
||||
searchTerms,
|
||||
onProgress: (progress) => {
|
||||
// Calculate overall progress based on list pages processed vs total
|
||||
// This is rough but better than nothing
|
||||
if (progress.listPagesTotal && progress.listPagesTotal > 0) {
|
||||
const percent = Math.round((progress.listPagesProcessed ?? 0) / progress.listPagesTotal * 100);
|
||||
updateProgress({
|
||||
step: 'crawling',
|
||||
detail: `Gradcracker: ${percent}% (scan ${progress.listPagesProcessed}/${progress.listPagesTotal}, found ${progress.jobCardsFound})`,
|
||||
});
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
if (!crawlerResult.success) {
|
||||
sourceErrors.push(`gradcracker: ${crawlerResult.error ?? 'unknown error'}`);
|
||||
} else {
|
||||
discoveredJobs.push(...crawlerResult.jobs);
|
||||
}
|
||||
}
|
||||
|
||||
// Run UKVisaJobs extractor if selected
|
||||
if (mergedConfig.sources.includes('ukvisajobs')) {
|
||||
updateProgress({
|
||||
step: 'crawling',
|
||||
detail: 'UKVisaJobs: scraping visa-sponsoring jobs...',
|
||||
});
|
||||
|
||||
// Read max jobs setting from database (default to 50 if not set)
|
||||
const ukvisajobsMaxJobsSetting = await settingsRepo.getSetting('ukvisajobsMaxJobs');
|
||||
const ukvisajobsMaxJobs = ukvisajobsMaxJobsSetting ? parseInt(ukvisajobsMaxJobsSetting, 10) : 50;
|
||||
|
||||
const ukVisaResult = await runUkVisaJobs({
|
||||
maxJobs: ukvisajobsMaxJobs,
|
||||
searchTerms,
|
||||
});
|
||||
if (!ukVisaResult.success) {
|
||||
sourceErrors.push(`ukvisajobs: ${ukVisaResult.error ?? 'unknown error'}`);
|
||||
} else {
|
||||
discoveredJobs.push(...ukVisaResult.jobs);
|
||||
}
|
||||
}
|
||||
|
||||
if (discoveredJobs.length === 0 && sourceErrors.length > 0) {
|
||||
throw new Error(`All sources failed: ${sourceErrors.join('; ')}`);
|
||||
}
|
||||
@ -163,18 +220,18 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
|
||||
}
|
||||
|
||||
progressHelpers.crawlingComplete(discoveredJobs.length);
|
||||
|
||||
|
||||
// Step 3: Import discovered jobs
|
||||
console.log('\n💾 Importing jobs to database...');
|
||||
const { created, skipped } = await jobsRepo.bulkCreateJobs(discoveredJobs);
|
||||
console.log(` Created: ${created}, Skipped (duplicates): ${skipped}`);
|
||||
|
||||
|
||||
progressHelpers.importComplete(created, skipped);
|
||||
|
||||
|
||||
await pipelineRepo.updatePipelineRun(pipelineRun.id, {
|
||||
jobsDiscovered: created,
|
||||
});
|
||||
|
||||
|
||||
// Step 4: Score all discovered jobs missing a score
|
||||
console.log('\n🎯 Scoring jobs for suitability...');
|
||||
const unprocessedJobs = await jobsRepo.getUnscoredDiscoveredJobs();
|
||||
@ -187,7 +244,7 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
|
||||
totalToProcess: 0,
|
||||
currentJob: undefined,
|
||||
});
|
||||
|
||||
|
||||
// Score jobs with progress updates
|
||||
const scoredJobs: Array<Job & { suitabilityScore: number; suitabilityReason: string }> = [];
|
||||
for (let i = 0; i < unprocessedJobs.length; i++) {
|
||||
@ -217,46 +274,83 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
|
||||
suitabilityReason: reason,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
progressHelpers.scoringComplete(scoredJobs.length);
|
||||
console.log(`\n📊 Scored ${scoredJobs.length} jobs. Ready for manual processing.`);
|
||||
|
||||
console.log(`\n📊 Scored ${scoredJobs.length} jobs.`);
|
||||
|
||||
// Step 5: Auto-process top jobs
|
||||
console.log('\n🏭 Auto-processing top jobs...');
|
||||
|
||||
const jobsToProcess = scoredJobs
|
||||
.filter(j => (j.suitabilityScore ?? 0) >= mergedConfig.minSuitabilityScore)
|
||||
.sort((a, b) => (b.suitabilityScore ?? 0) - (a.suitabilityScore ?? 0))
|
||||
.slice(0, mergedConfig.topN);
|
||||
|
||||
console.log(` Found ${jobsToProcess.length} candidates (score >= ${mergedConfig.minSuitabilityScore}, top ${mergedConfig.topN})`);
|
||||
|
||||
let processedCount = 0;
|
||||
|
||||
if (jobsToProcess.length > 0) {
|
||||
updateProgress({
|
||||
step: 'processing',
|
||||
jobsProcessed: 0,
|
||||
totalToProcess: jobsToProcess.length,
|
||||
});
|
||||
|
||||
for (let i = 0; i < jobsToProcess.length; i++) {
|
||||
const job = jobsToProcess[i];
|
||||
progressHelpers.processingJob(i + 1, jobsToProcess.length, job);
|
||||
|
||||
// Process job (Generate Summary + PDF)
|
||||
// We catch errors here to ensure one failure doesn't stop the whole batch
|
||||
const result = await processJob(job.id);
|
||||
|
||||
if (result.success) {
|
||||
processedCount++;
|
||||
} else {
|
||||
console.warn(` ⚠️ Failed to process job ${job.id}: ${result.error}`);
|
||||
}
|
||||
|
||||
progressHelpers.jobComplete(i + 1, jobsToProcess.length);
|
||||
}
|
||||
}
|
||||
|
||||
// Update pipeline run as completed
|
||||
await pipelineRepo.updatePipelineRun(pipelineRun.id, {
|
||||
status: 'completed',
|
||||
completedAt: new Date().toISOString(),
|
||||
jobsProcessed: 0,
|
||||
jobsProcessed: processedCount,
|
||||
});
|
||||
|
||||
|
||||
console.log('\n🎉 Pipeline completed!');
|
||||
console.log(` Jobs discovered: ${created}`);
|
||||
console.log(' Jobs processed: 0 (manual)');
|
||||
|
||||
progressHelpers.complete(created, 0);
|
||||
console.log(` Jobs processed: ${processedCount}`);
|
||||
|
||||
progressHelpers.complete(created, processedCount);
|
||||
|
||||
await notifyPipelineWebhook('pipeline.completed', {
|
||||
pipelineRunId: pipelineRun.id,
|
||||
jobsDiscovered: created,
|
||||
jobsScored: unprocessedJobs.length,
|
||||
jobsProcessed: 0,
|
||||
jobsProcessed: processedCount,
|
||||
})
|
||||
isPipelineRunning = false;
|
||||
|
||||
|
||||
return {
|
||||
success: true,
|
||||
jobsDiscovered: created,
|
||||
jobsProcessed: 0,
|
||||
jobsProcessed: processedCount,
|
||||
};
|
||||
|
||||
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
|
||||
|
||||
await pipelineRepo.updatePipelineRun(pipelineRun.id, {
|
||||
status: 'failed',
|
||||
completedAt: new Date().toISOString(),
|
||||
errorMessage: message,
|
||||
});
|
||||
|
||||
|
||||
progressHelpers.failed(message);
|
||||
|
||||
await notifyPipelineWebhook('pipeline.failed', {
|
||||
@ -264,9 +358,9 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
|
||||
error: message,
|
||||
})
|
||||
isPipelineRunning = false;
|
||||
|
||||
|
||||
console.error('\n❌ Pipeline failed:', message);
|
||||
|
||||
|
||||
return {
|
||||
success: false,
|
||||
jobsDiscovered: 0,
|
||||
@ -287,7 +381,7 @@ export async function processJob(
|
||||
error?: string;
|
||||
}> {
|
||||
console.log(`📝 Processing job ${jobId}...`);
|
||||
|
||||
|
||||
try {
|
||||
const job = await jobsRepo.getJobById(jobId);
|
||||
if (!job) {
|
||||
@ -297,9 +391,9 @@ export async function processJob(
|
||||
if (job.status !== 'discovered' && job.status !== 'ready') {
|
||||
return { success: false, error: `Job cannot be processed from status: ${job.status}` };
|
||||
}
|
||||
|
||||
|
||||
const profile = await loadProfile(DEFAULT_PROFILE_PATH);
|
||||
|
||||
|
||||
// Mark as processing
|
||||
await jobsRepo.updateJob(job.id, { status: 'processing' });
|
||||
|
||||
@ -314,7 +408,7 @@ export async function processJob(
|
||||
job.suitabilityScore = suitability.score;
|
||||
job.suitabilityReason = suitability.reason;
|
||||
}
|
||||
|
||||
|
||||
// Generate summary (AI)
|
||||
// If forcing, always recompute; otherwise compute if missing.
|
||||
if (options?.force || !job.tailoredSummary) {
|
||||
@ -323,7 +417,7 @@ export async function processJob(
|
||||
job.jobDescription || '',
|
||||
profile
|
||||
);
|
||||
|
||||
|
||||
if (summaryResult.success) {
|
||||
await jobsRepo.updateJob(job.id, {
|
||||
tailoredSummary: summaryResult.summary,
|
||||
@ -331,7 +425,7 @@ export async function processJob(
|
||||
job.tailoredSummary = summaryResult.summary ?? null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Generate PDF
|
||||
console.log(' Generating PDF...');
|
||||
const pdfResult = await generatePdf(
|
||||
@ -340,16 +434,16 @@ export async function processJob(
|
||||
job.jobDescription || '',
|
||||
DEFAULT_PROFILE_PATH
|
||||
);
|
||||
|
||||
|
||||
// Mark as ready
|
||||
await jobsRepo.updateJob(job.id, {
|
||||
status: 'ready',
|
||||
pdfPath: pdfResult.pdfPath ?? undefined,
|
||||
});
|
||||
|
||||
|
||||
console.log(' ✅ Done!');
|
||||
return { success: true };
|
||||
|
||||
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
return { success: false, error: message };
|
||||
|
||||
@ -11,6 +11,13 @@ export type SettingKey = 'model'
|
||||
| 'pipelineWebhookUrl'
|
||||
| 'jobCompleteWebhookUrl'
|
||||
| 'resumeProjects'
|
||||
| 'ukvisajobsMaxJobs'
|
||||
| 'searchTerms'
|
||||
| 'jobspyLocation'
|
||||
| 'jobspyResultsWanted'
|
||||
| 'jobspyHoursOld'
|
||||
| 'jobspyCountryIndeed'
|
||||
| 'jobspyLinkedinFetchDescription'
|
||||
|
||||
export async function getSetting(key: SettingKey): Promise<string | null> {
|
||||
const [row] = await db.select().from(settings).where(eq(settings.key, key))
|
||||
|
||||
@ -32,6 +32,11 @@ export interface RunCrawlerOptions {
|
||||
* Optional callback for live crawl progress emitted by the Gradcracker extractor.
|
||||
*/
|
||||
onProgress?: (update: JobExtractorProgress) => void;
|
||||
|
||||
/**
|
||||
* List of search terms to be used as roles for URL generation.
|
||||
*/
|
||||
searchTerms?: string[];
|
||||
}
|
||||
|
||||
interface JobExtractorProgress {
|
||||
@ -61,13 +66,13 @@ async function writeExistingJobUrlsFile(existingJobUrls: string[] | undefined):
|
||||
*/
|
||||
export async function runCrawler(options: RunCrawlerOptions = {}): Promise<CrawlerResult> {
|
||||
console.log('🕷️ Starting job crawler...');
|
||||
|
||||
|
||||
try {
|
||||
// Clear previous results
|
||||
await clearStorageDataset();
|
||||
|
||||
const existingJobUrlsFile = await writeExistingJobUrlsFile(options.existingJobUrls);
|
||||
|
||||
|
||||
// Run the crawler
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const child = spawn('npm', ['run', 'start'], {
|
||||
@ -78,6 +83,7 @@ export async function runCrawler(options: RunCrawlerOptions = {}): Promise<Crawl
|
||||
...process.env,
|
||||
JOBOPS_SKIP_APPLY_FOR_EXISTING: '1',
|
||||
JOBOPS_EMIT_PROGRESS: '1',
|
||||
GRADCRACKER_SEARCH_TERMS: options.searchTerms ? JSON.stringify(options.searchTerms) : '',
|
||||
...(existingJobUrlsFile ? { JOBOPS_EXISTING_JOB_URLS_FILE: existingJobUrlsFile } : {}),
|
||||
},
|
||||
});
|
||||
@ -101,7 +107,7 @@ export async function runCrawler(options: RunCrawlerOptions = {}): Promise<Crawl
|
||||
|
||||
stdoutRl?.on('line', (line) => handleLine(line, process.stdout));
|
||||
stderrRl?.on('line', (line) => handleLine(line, process.stderr));
|
||||
|
||||
|
||||
child.on('close', (code) => {
|
||||
stdoutRl?.close();
|
||||
stderrRl?.close();
|
||||
@ -111,15 +117,15 @@ export async function runCrawler(options: RunCrawlerOptions = {}): Promise<Crawl
|
||||
reject(new Error(`Crawler exited with code ${code}`));
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
child.on('error', reject);
|
||||
});
|
||||
|
||||
|
||||
// Read crawled jobs from storage
|
||||
const jobs = await readCrawledJobs();
|
||||
|
||||
|
||||
console.log(`✅ Crawler completed. Found ${jobs.length} jobs.`);
|
||||
|
||||
|
||||
return { success: true, jobs };
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
@ -135,13 +141,13 @@ async function readCrawledJobs(): Promise<CreateJobInput[]> {
|
||||
try {
|
||||
const files = await readdir(STORAGE_DIR);
|
||||
const jsonFiles = files.filter(f => f.endsWith('.json'));
|
||||
|
||||
|
||||
const jobs: CreateJobInput[] = [];
|
||||
|
||||
|
||||
for (const file of jsonFiles) {
|
||||
const content = await readFile(join(STORAGE_DIR, file), 'utf-8');
|
||||
const data = JSON.parse(content);
|
||||
|
||||
|
||||
// Map crawler output to our job input format
|
||||
jobs.push({
|
||||
source: 'gradcracker',
|
||||
@ -159,7 +165,7 @@ async function readCrawledJobs(): Promise<CreateJobInput[]> {
|
||||
jobDescription: data.jobDescription,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
return jobs;
|
||||
} catch (error) {
|
||||
console.error('Failed to read crawled jobs:', error);
|
||||
|
||||
164
orchestrator/src/server/services/ukvisajobs.ts
Normal file
164
orchestrator/src/server/services/ukvisajobs.ts
Normal file
@ -0,0 +1,164 @@
|
||||
/**
|
||||
* Service for running the UK Visa Jobs extractor (extractors/ukvisajobs).
|
||||
*
|
||||
* Spawns the extractor as a child process and reads its output dataset.
|
||||
*/
|
||||
|
||||
import { spawn } from 'child_process';
|
||||
import { readdir, readFile, rm, mkdir } from 'fs/promises';
|
||||
import { join, dirname } from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
import type { CreateJobInput } from '../../shared/types.js';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const UKVISAJOBS_DIR = join(__dirname, '../../../../extractors/ukvisajobs');
|
||||
const STORAGE_DIR = join(UKVISAJOBS_DIR, 'storage/datasets/default');
|
||||
|
||||
export interface RunUkVisaJobsOptions {
|
||||
/** Maximum number of jobs to fetch per search term. Defaults to 50, max 200. */
|
||||
maxJobs?: number;
|
||||
/** Search keyword filter (single) - legacy support */
|
||||
searchKeyword?: string;
|
||||
/** List of search terms to run sequentially */
|
||||
searchTerms?: string[];
|
||||
}
|
||||
|
||||
export interface UkVisaJobsResult {
|
||||
success: boolean;
|
||||
jobs: CreateJobInput[];
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear previous extraction results.
|
||||
*/
|
||||
async function clearStorageDataset(): Promise<void> {
|
||||
try {
|
||||
await rm(STORAGE_DIR, { recursive: true, force: true });
|
||||
} catch {
|
||||
// Ignore if directory doesn't exist
|
||||
}
|
||||
}
|
||||
|
||||
export async function runUkVisaJobs(options: RunUkVisaJobsOptions = {}): Promise<UkVisaJobsResult> {
|
||||
console.log('🇬🇧 Running UK Visa Jobs extractor...');
|
||||
|
||||
// Determine terms to run
|
||||
const terms: string[] = [];
|
||||
if (options.searchTerms && options.searchTerms.length > 0) {
|
||||
terms.push(...options.searchTerms);
|
||||
} else if (options.searchKeyword) {
|
||||
terms.push(options.searchKeyword);
|
||||
} else {
|
||||
// No search terms = run once without keyword
|
||||
terms.push('');
|
||||
}
|
||||
|
||||
const allJobs: CreateJobInput[] = [];
|
||||
const seenIds = new Set<string>();
|
||||
|
||||
for (let i = 0; i < terms.length; i++) {
|
||||
const term = terms[i];
|
||||
const termLabel = term ? `"${term}"` : 'all jobs';
|
||||
console.log(` Running for ${termLabel}...`);
|
||||
|
||||
try {
|
||||
// Clear previous results for this run
|
||||
await clearStorageDataset();
|
||||
await mkdir(STORAGE_DIR, { recursive: true });
|
||||
|
||||
// Run the extractor
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const child = spawn('npx', ['tsx', 'src/main.ts'], {
|
||||
cwd: UKVISAJOBS_DIR,
|
||||
stdio: 'inherit',
|
||||
env: {
|
||||
...process.env,
|
||||
UKVISAJOBS_MAX_JOBS: String(options.maxJobs ?? 50),
|
||||
UKVISAJOBS_SEARCH_KEYWORD: term,
|
||||
},
|
||||
});
|
||||
|
||||
child.on('close', (code) => {
|
||||
if (code === 0) resolve();
|
||||
else reject(new Error(`UK Visa Jobs extractor exited with code ${code}`));
|
||||
});
|
||||
child.on('error', reject);
|
||||
});
|
||||
|
||||
// Read the output dataset and accumulate
|
||||
const runJobs = await readDataset();
|
||||
let newCount = 0;
|
||||
|
||||
for (const job of runJobs) {
|
||||
// Deduplicate by sourceJobId or jobUrl
|
||||
const id = job.sourceJobId || job.jobUrl;
|
||||
if (!seenIds.has(id)) {
|
||||
seenIds.add(id);
|
||||
allJobs.push(job);
|
||||
newCount++;
|
||||
}
|
||||
}
|
||||
|
||||
console.log(` ✅ Fetched ${runJobs.length} jobs for ${termLabel} (${newCount} new unique)`);
|
||||
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
console.error(`❌ UK Visa Jobs failed for ${termLabel}: ${message}`);
|
||||
// Continue to next term instead of failing completely
|
||||
}
|
||||
|
||||
// Delay between terms
|
||||
if (i < terms.length - 1) {
|
||||
console.log(' Waiting 5s before next search term...');
|
||||
await new Promise((resolve) => setTimeout(resolve, 5000));
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`✅ UK Visa Jobs: imported total ${allJobs.length} unique jobs`);
|
||||
return { success: true, jobs: allJobs };
|
||||
}
|
||||
|
||||
/**
|
||||
* Read jobs from the extractor's output dataset.
|
||||
*/
|
||||
async function readDataset(): Promise<CreateJobInput[]> {
|
||||
const jobs: CreateJobInput[] = [];
|
||||
|
||||
try {
|
||||
const files = await readdir(STORAGE_DIR);
|
||||
const jsonFiles = files.filter((f) => f.endsWith('.json') && f !== 'jobs.json');
|
||||
|
||||
for (const file of jsonFiles.sort()) {
|
||||
try {
|
||||
const content = await readFile(join(STORAGE_DIR, file), 'utf-8');
|
||||
const job = JSON.parse(content);
|
||||
|
||||
// Map to CreateJobInput format
|
||||
jobs.push({
|
||||
source: 'ukvisajobs',
|
||||
sourceJobId: job.sourceJobId,
|
||||
title: job.title || 'Unknown Title',
|
||||
employer: job.employer || 'Unknown Employer',
|
||||
employerUrl: job.employerUrl,
|
||||
jobUrl: job.jobUrl,
|
||||
applicationLink: job.applicationLink || job.jobUrl,
|
||||
location: job.location,
|
||||
deadline: job.deadline,
|
||||
salary: job.salary,
|
||||
jobDescription: job.jobDescription,
|
||||
datePosted: job.datePosted,
|
||||
degreeRequired: job.degreeRequired,
|
||||
jobType: job.jobType,
|
||||
jobLevel: job.jobLevel,
|
||||
});
|
||||
} catch {
|
||||
// Skip invalid files
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Dataset directory doesn't exist yet
|
||||
}
|
||||
|
||||
return jobs;
|
||||
}
|
||||
@ -2,7 +2,7 @@
|
||||
* Shared types for the job-ops orchestrator.
|
||||
*/
|
||||
|
||||
export type JobStatus =
|
||||
export type JobStatus =
|
||||
| 'discovered' // Crawled but not processed
|
||||
| 'processing' // Currently generating resume
|
||||
| 'ready' // PDF generated, waiting for user to apply
|
||||
@ -13,11 +13,12 @@ export type JobStatus =
|
||||
export type JobSource =
|
||||
| 'gradcracker'
|
||||
| 'indeed'
|
||||
| 'linkedin';
|
||||
| 'linkedin'
|
||||
| 'ukvisajobs';
|
||||
|
||||
export interface Job {
|
||||
id: string;
|
||||
|
||||
|
||||
// Source / provenance
|
||||
source: JobSource;
|
||||
sourceJobId: string | null; // External ID (if provided)
|
||||
@ -37,7 +38,7 @@ export interface Job {
|
||||
degreeRequired: string | null;
|
||||
starting: string | null;
|
||||
jobDescription: string | null;
|
||||
|
||||
|
||||
// Orchestrator enrichments
|
||||
status: JobStatus;
|
||||
suitabilityScore: number | null; // 0-100 AI-generated score
|
||||
@ -71,7 +72,7 @@ export interface Job {
|
||||
companyReviewsCount: number | null;
|
||||
vacancyCount: number | null;
|
||||
workFromHomeType: string | null;
|
||||
|
||||
|
||||
// Timestamps
|
||||
discoveredAt: string;
|
||||
processedAt: string | null;
|
||||
@ -200,4 +201,25 @@ export interface AppSettings {
|
||||
resumeProjects: ResumeProjectsSettings;
|
||||
defaultResumeProjects: ResumeProjectsSettings;
|
||||
overrideResumeProjects: ResumeProjectsSettings | null;
|
||||
ukvisajobsMaxJobs: number;
|
||||
defaultUkvisajobsMaxJobs: number;
|
||||
overrideUkvisajobsMaxJobs: number | null;
|
||||
searchTerms: string[];
|
||||
defaultSearchTerms: string[];
|
||||
overrideSearchTerms: string[] | null;
|
||||
jobspyLocation: string;
|
||||
defaultJobspyLocation: string;
|
||||
overrideJobspyLocation: string | null;
|
||||
jobspyResultsWanted: number;
|
||||
defaultJobspyResultsWanted: number;
|
||||
overrideJobspyResultsWanted: number | null;
|
||||
jobspyHoursOld: number;
|
||||
defaultJobspyHoursOld: number;
|
||||
overrideJobspyHoursOld: number | null;
|
||||
jobspyCountryIndeed: string;
|
||||
defaultJobspyCountryIndeed: string;
|
||||
overrideJobspyCountryIndeed: string | null;
|
||||
jobspyLinkedinFetchDescription: boolean;
|
||||
defaultJobspyLinkedinFetchDescription: boolean;
|
||||
overrideJobspyLinkedinFetchDescription: boolean | null;
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user