autologin for ukvisajobs

This commit is contained in:
DaKheera47 2026-01-07 23:53:01 +00:00
parent 4726c463c8
commit 2b2af06bb8
8 changed files with 1417 additions and 71 deletions

View File

@ -40,9 +40,8 @@ JOBSPY_LINKEDIN_FETCH_DESCRIPTION=1
# =============================================================================
# UKVisaJobs (UK visa sponsorship jobs) - optional
# =============================================================================
# Get these tokens from browser dev tools after logging into my.ukvisajobs.com
# Provide email/password for automatic login and token refresh.
# See extractors/ukvisajobs/README.md for detailed instructions.
UKVISAJOBS_TOKEN=
UKVISAJOBS_AUTH_TOKEN=
UKVISAJOBS_CSRF_TOKEN=
UKVISAJOBS_CI_SESSION=
UKVISAJOBS_EMAIL=
UKVISAJOBS_PASSWORD=
UKVISAJOBS_HEADLESS=true

5
.gitignore vendored
View File

@ -5,6 +5,9 @@
# Data directory (bind mount in Docker)
data/
# Extractor storage outputs and cached auth
extractors/ukvisajobs/storage/
# OS files
.DS_Store
Thumbs.db
Thumbs.db

View File

@ -51,10 +51,9 @@ services:
- WEBHOOK_SECRET=${WEBHOOK_SECRET:-}
# UKVisaJobs (UK visa sponsorship jobs) - optional
- UKVISAJOBS_TOKEN=${UKVISAJOBS_TOKEN:-}
- UKVISAJOBS_AUTH_TOKEN=${UKVISAJOBS_AUTH_TOKEN:-}
- UKVISAJOBS_CSRF_TOKEN=${UKVISAJOBS_CSRF_TOKEN:-}
- UKVISAJOBS_CI_SESSION=${UKVISAJOBS_CI_SESSION:-}
- UKVISAJOBS_EMAIL=${UKVISAJOBS_EMAIL:-}
- UKVISAJOBS_PASSWORD=${UKVISAJOBS_PASSWORD:-}
- UKVISAJOBS_HEADLESS=${UKVISAJOBS_HEADLESS:-true}
- UKVISAJOBS_SEARCH_KEYWORD=${UKVISAJOBS_SEARCH_KEYWORD:-}
# Python path (uses system python in container)

View File

@ -1,4 +1,4 @@
# UK Visa Jobs Extractor
# UK Visa Jobs Extractor
Fetches job listings from [my.ukvisajobs.com](https://my.ukvisajobs.com) that may sponsor work visas.
@ -8,28 +8,38 @@ Fetches job listings from [my.ukvisajobs.com](https://my.ukvisajobs.com) that ma
npm install
```
If Playwright browsers are skipped in your environment, install Firefox:
```bash
npx playwright install firefox
```
If Camoufox assets are missing, fetch them:
```bash
npx camoufox-js fetch
```
## Configuration
Set the following environment variables (you can get these from your browser's dev tools after logging in):
Set the following environment variables:
| Variable | Description |
|----------|-------------|
| `UKVISAJOBS_TOKEN` | JWT token from the request body (required) |
| `UKVISAJOBS_AUTH_TOKEN` | Auth cookie token (defaults to UKVISAJOBS_TOKEN) |
| `UKVISAJOBS_CSRF_TOKEN` | CSRF token from cookies |
| `UKVISAJOBS_CI_SESSION` | CI session ID from cookies |
| `UKVISAJOBS_EMAIL` | Login email for automatic token refresh |
| `UKVISAJOBS_PASSWORD` | Login password for automatic token refresh |
| `UKVISAJOBS_HEADLESS` | Set to `false` to show the browser (default: true) |
| `UKVISAJOBS_MAX_JOBS` | Maximum jobs to fetch (default: 50, max: 200) |
| `UKVISAJOBS_SEARCH_KEYWORD` | Optional search filter |
## How to get tokens
## Automatic login & cache
1. Log into `my.ukvisajobs.com` in your browser
2. Open Developer Tools → Network tab
3. Navigate to the jobs page
4. Find the `fetch-jobs-data` POST request
5. Copy values:
- From **Request Body**: copy the `token` field → `UKVISAJOBS_TOKEN`
- From **Cookies**: copy `authToken`, `csrf_token`, `ci_session`
The extractor will:
1. Launch a Camoufox (Playwright Firefox) browser and sign in
2. Navigate to the open jobs page and capture the token/cookies
3. Cache the session to `storage/ukvisajobs-auth.json`
4. Reuse the cached values until the API reports an expired token, then refresh
## Running
@ -38,3 +48,4 @@ npm start
```
Output is written to `storage/datasets/default/` as JSON files.

File diff suppressed because it is too large Load Diff

View File

@ -4,7 +4,10 @@
"type": "module",
"description": "UK Visa Jobs extractor - fetches job listings that may sponsor work visas",
"main": "dist/main.js",
"dependencies": {},
"dependencies": {
"camoufox-js": "^0.8.0",
"playwright": "^1.57.0"
},
"devDependencies": {
"@apify/tsconfig": "^0.1.0",
"@types/node": "^24.0.0",
@ -15,8 +18,10 @@
"start": "npm run start:dev",
"start:prod": "node dist/main.js",
"start:dev": "tsx src/main.ts",
"build": "tsc"
"build": "tsc",
"get-binaries": "camoufox-js fetch",
"postinstall": "npm run get-binaries"
},
"author": "",
"license": "ISC"
}
}

View File

@ -1,25 +1,28 @@
/**
/**
* UK Visa Jobs Extractor
*
* Fetches job listings from my.ukvisajobs.com that may sponsor work visas.
* Outputs JSON to stdout for the orchestrator to consume.
*
* Environment variables:
* UKVISAJOBS_TOKEN - JWT token (required)
* UKVISAJOBS_AUTH_TOKEN - Auth cookie token (defaults to UKVISAJOBS_TOKEN)
* UKVISAJOBS_CSRF_TOKEN - CSRF token cookie
* UKVISAJOBS_CI_SESSION - CI session cookie
* UKVISAJOBS_EMAIL - Login email for auto-refresh
* UKVISAJOBS_PASSWORD - Login password for auto-refresh
* UKVISAJOBS_HEADLESS - Set to "false" to show the browser (default: true)
* UKVISAJOBS_MAX_JOBS - Maximum jobs to fetch (default: 50, max: 200) - Set via UI Settings
* UKVISAJOBS_SEARCH_KEYWORD - Optional search filter
*/
import { mkdir, writeFile } from 'fs/promises';
import { mkdir, writeFile, readFile } from 'fs/promises';
import { join, dirname } from 'path';
import { fileURLToPath } from 'url';
import type { Request } from 'playwright';
const __dirname = dirname(fileURLToPath(import.meta.url));
const API_URL = 'https://my.ukvisajobs.com/ukvisa-api/api/fetch-jobs-data';
const SIGNIN_URL = 'https://my.ukvisajobs.com/signin';
const OPEN_JOBS_URL = 'https://my.ukvisajobs.com/open-jobs/1?is_global=0&sortBy=desc&visaAcceptance=false&applicants_outside_uk=false&pageNo=1';
const AUTH_CACHE_PATH = join(__dirname, '../storage/ukvisajobs-auth.json');
const JOBS_PER_PAGE = 15;
const DEFAULT_MAX_JOBS = 50;
const MAX_ALLOWED_JOBS = 200;
@ -77,6 +80,27 @@ interface ExtractedJob {
jobLevel?: string;
}
interface UkVisaJobsAuthSession {
token: string;
authToken: string;
csrfToken: string;
ciSession: string;
fetchedAt: string;
source: 'cache' | 'browser';
}
class UkVisaJobsAuthError extends Error {
status: number;
responseText: string;
constructor(message: string, status: number, responseText: string) {
super(message);
this.name = 'UkVisaJobsAuthError';
this.status = status;
this.responseText = responseText;
}
}
function toStringOrNull(value: unknown): string | null {
if (value === null || value === undefined) return null;
if (typeof value === 'string') {
@ -101,8 +125,7 @@ function toNumberOrNull(value: unknown): number | null {
async function fetchPage(
pageNo: number,
token: string,
cookies: string,
session: UkVisaJobsAuthSession,
options: { searchKeyword?: string } = {}
): Promise<UkVisaJobsApiResponse> {
// Use native FormData API (Node.js 18+)
@ -113,7 +136,9 @@ async function fetchPage(
formData.append('visaAcceptance', 'false');
formData.append('applicants_outside_uk', 'false');
formData.append('searchKeyword', options.searchKeyword || 'null');
formData.append('token', token);
formData.append('token', session.token);
const cookies = buildCookieHeader(session);
const response = await fetch(API_URL, {
method: 'POST',
@ -130,6 +155,13 @@ async function fetchPage(
if (!response.ok) {
const text = await response.text();
if (isAuthErrorResponse(response.status, text)) {
throw new UkVisaJobsAuthError(
`UKVisaJobs API returned ${response.status}: ${response.statusText} - ${text}`,
response.status,
text
);
}
throw new Error(`UKVisaJobs API returned ${response.status}: ${response.statusText} - ${text}`);
}
@ -143,12 +175,12 @@ function mapJob(raw: UkVisaJobsApiJob): ExtractedJob {
const maxSalary = toNumberOrNull(raw.max_salary);
if (minSalary !== null && minSalary > 0 && maxSalary !== null && maxSalary > 0) {
salary = `£${minSalary.toLocaleString()}-${maxSalary.toLocaleString()}`;
salary = `£${minSalary.toLocaleString()}-${maxSalary.toLocaleString()}`;
if (raw.salary_interval) {
salary += ` / ${raw.salary_interval}`;
}
} else if (maxSalary !== null && maxSalary > 0) {
salary = `£${maxSalary.toLocaleString()}`;
salary = `£${maxSalary.toLocaleString()}`;
if (raw.salary_interval) {
salary += ` / ${raw.salary_interval}`;
}
@ -188,30 +220,181 @@ function mapJob(raw: UkVisaJobsApiJob): ExtractedJob {
};
}
async function main(): Promise<void> {
console.log('🇬🇧 UK Visa Jobs Extractor starting...');
function buildCookieHeader(session: UkVisaJobsAuthSession): string {
const cookieParts: string[] = [];
if (session.csrfToken) cookieParts.push(`csrf_token=${session.csrfToken}`);
if (session.ciSession) cookieParts.push(`ci_session=${session.ciSession}`);
if (session.authToken) cookieParts.push(`authToken=${session.authToken}`);
return cookieParts.join('; ');
}
// Get credentials from environment
const token = process.env.UKVISAJOBS_TOKEN;
const authToken = process.env.UKVISAJOBS_AUTH_TOKEN || token;
const csrfToken = process.env.UKVISAJOBS_CSRF_TOKEN || '';
const ciSession = process.env.UKVISAJOBS_CI_SESSION || '';
function getLoginCredentials(): { email: string; password: string } | null {
const email = process.env.UKVISAJOBS_EMAIL;
const password = process.env.UKVISAJOBS_PASSWORD;
if (!email || !password) return null;
return { email, password };
}
async function loadCachedAuthSession(): Promise<UkVisaJobsAuthSession | null> {
try {
const data = await readFile(AUTH_CACHE_PATH, 'utf8');
const parsed = JSON.parse(data) as UkVisaJobsAuthSession;
if (!parsed?.token) return null;
return {
token: parsed.token,
authToken: parsed.authToken || parsed.token,
csrfToken: parsed.csrfToken || '',
ciSession: parsed.ciSession || '',
fetchedAt: parsed.fetchedAt || new Date().toISOString(),
source: 'cache',
};
} catch (error) {
return null;
}
}
async function saveCachedAuthSession(session: UkVisaJobsAuthSession): Promise<void> {
const payload = {
token: session.token,
authToken: session.authToken,
csrfToken: session.csrfToken,
ciSession: session.ciSession,
fetchedAt: session.fetchedAt,
source: session.source,
};
await mkdir(dirname(AUTH_CACHE_PATH), { recursive: true });
await writeFile(AUTH_CACHE_PATH, JSON.stringify(payload, null, 2));
}
function extractMultipartField(body: string, field: string): string | null {
const nameToken = `name="${field}"`;
const index = body.indexOf(nameToken);
if (index === -1) return null;
const afterName = body.slice(index + nameToken.length);
let separatorIndex = afterName.indexOf('\r\n\r\n');
let separatorLength = 4;
if (separatorIndex === -1) {
separatorIndex = afterName.indexOf('\n\n');
separatorLength = 2;
}
if (separatorIndex === -1) return null;
const valueStart = index + nameToken.length + separatorIndex + separatorLength;
const remainder = body.slice(valueStart);
const endIndex = remainder.indexOf('\r\n');
if (endIndex === -1) return remainder.trim();
return remainder.slice(0, endIndex).trim();
}
function extractTokenFromRequest(request: Request): string | null {
const postData = request.postData();
if (!postData) return null;
const multipartToken = extractMultipartField(postData, 'token');
if (multipartToken) return multipartToken;
try {
const params = new URLSearchParams(postData);
const token = params.get('token');
return token || null;
} catch (error) {
return null;
}
}
function isAuthErrorResponse(status: number, bodyText: string): boolean {
if (status === 401 || status === 403) return true;
if (status !== 400) return false;
try {
const parsed = JSON.parse(bodyText) as { errorType?: string; message?: string };
if (parsed?.errorType === 'expired') return true;
if (parsed?.message && parsed.message.toLowerCase().includes('expired')) return true;
} catch (error) {
// ignore JSON parse failures
}
return bodyText.toLowerCase().includes('expired');
}
async function loginWithBrowser(email: string, password: string): Promise<UkVisaJobsAuthSession> {
const [{ launchOptions }, { firefox }] = await Promise.all([
import('camoufox-js'),
import('playwright'),
]);
const headless = process.env.UKVISAJOBS_HEADLESS !== 'false';
const browser = await firefox.launch(await launchOptions({
headless,
humanize: true,
geoip: true,
}));
const context = await browser.newContext();
const page = await context.newPage();
try {
await page.goto(SIGNIN_URL, { waitUntil: 'domcontentloaded' });
await page.waitForSelector('#email', { timeout: 15000 });
await page.fill('#email', email);
await page.fill('#password', password);
await page.keyboard.press('Enter');
await page.waitForTimeout(7000);
const requestPromise = page.waitForRequest(
(request) => request.url().includes('/ukvisa-api/api/fetch-jobs-data') && request.method() === 'POST',
{ timeout: 30000 }
);
await page.goto(OPEN_JOBS_URL, { waitUntil: 'networkidle' });
await page.waitForTimeout(5000);
let fetchRequest: Request | null = null;
try {
fetchRequest = await requestPromise;
} catch (error) {
fetchRequest = null;
}
const cookies = await context.cookies('https://my.ukvisajobs.com');
const csrfToken = cookies.find((cookie) => cookie.name === 'csrf_token')?.value || '';
const ciSession = cookies.find((cookie) => cookie.name === 'ci_session')?.value || '';
const authToken = cookies.find((cookie) => cookie.name === 'authToken')?.value || '';
const token = fetchRequest ? extractTokenFromRequest(fetchRequest) : authToken;
if (!token) {
throw new Error('Failed to locate auth token from browser session.');
}
return {
token,
authToken: authToken || token,
csrfToken,
ciSession,
fetchedAt: new Date().toISOString(),
source: 'browser',
};
} finally {
await browser.close();
}
}
async function main(): Promise<void> {
console.log('🇬🇧 UK Visa Jobs Extractor starting...');
const credentials = getLoginCredentials();
const searchKeyword = process.env.UKVISAJOBS_SEARCH_KEYWORD || undefined;
if (!token) {
console.error('❌ UKVISAJOBS_TOKEN environment variable is not set');
process.exit(1);
let authSession = await loadCachedAuthSession();
if (!authSession) {
if (!credentials) {
console.error('ERROR: UKVISAJOBS_EMAIL and UKVISAJOBS_PASSWORD must be set');
process.exit(1);
}
console.log(' No cached session found. Logging in to refresh tokens...');
authSession = await loginWithBrowser(credentials.email, credentials.password);
await saveCachedAuthSession(authSession);
}
// Build cookies string
const cookieParts: string[] = [];
if (csrfToken) cookieParts.push(`csrf_token=${csrfToken}`);
if (ciSession) cookieParts.push(`ci_session=${ciSession}`);
if (authToken) cookieParts.push(`authToken=${authToken}`);
const cookies = cookieParts.join('; ');
console.log(` Cookies configured: ${cookieParts.length > 0 ? 'Yes' : 'No'}`);
console.log(` Token length: ${token.length}`);
const cookies = buildCookieHeader(authSession);
console.log(` Auth source: ${authSession.source}`);
console.log(` Cookies configured: ${cookies ? 'Yes' : 'No'}`);
console.log(` Token length: ${authSession.token.length}`);
// Get max jobs from environment
const maxJobsEnv = toNumberOrNull(process.env.UKVISAJOBS_MAX_JOBS);
@ -232,10 +415,25 @@ async function main(): Promise<void> {
while (pageNo <= maxPages && allJobs.length < maxJobs) {
console.log(` Fetching page ${pageNo}/${maxPages}...`);
const response = await fetchPage(pageNo, token, cookies, { searchKeyword });
let response: UkVisaJobsApiResponse;
try {
response = await fetchPage(pageNo, authSession, { searchKeyword });
} catch (error) {
if (error instanceof UkVisaJobsAuthError) {
if (!credentials) {
throw new Error('UKVisaJobs auth expired. Set UKVISAJOBS_EMAIL and UKVISAJOBS_PASSWORD to refresh.');
}
console.log(' Auth expired. Refreshing tokens...');
authSession = await loginWithBrowser(credentials.email, credentials.password);
await saveCachedAuthSession(authSession);
response = await fetchPage(pageNo, authSession, { searchKeyword });
} else {
throw error;
}
}
if (response.status !== 1) {
console.warn(` ⚠️ API returned status ${response.status} on page ${pageNo}`);
console.warn(` ⚠️ API returned status ${response.status} on page ${pageNo}`);
break;
}
@ -271,7 +469,7 @@ async function main(): Promise<void> {
await new Promise((resolve) => setTimeout(resolve, 500));
}
console.log(` Scraped ${allJobs.length} jobs`);
console.log(`✅ Scraped ${allJobs.length} jobs`);
// Write output to storage directory (similar to Crawlee dataset structure)
const storageDir = join(__dirname, '../storage/datasets/default');
@ -292,7 +490,7 @@ async function main(): Promise<void> {
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
console.error(` Error: ${message}`);
console.error(`❌ Error: ${message}`);
process.exit(1);
}
}
@ -301,3 +499,6 @@ main().catch((error) => {
console.error('Fatal error:', error);
process.exit(1);
});

View File

@ -1,4 +1,4 @@
/**
/**
* Service for running the UK Visa Jobs extractor (extractors/ukvisajobs).
*
* Spawns the extractor as a child process and reads its output dataset.
@ -13,6 +13,14 @@ import type { CreateJobInput } from '../../shared/types.js';
const __dirname = dirname(fileURLToPath(import.meta.url));
const UKVISAJOBS_DIR = join(__dirname, '../../../../extractors/ukvisajobs');
const STORAGE_DIR = join(UKVISAJOBS_DIR, 'storage/datasets/default');
const AUTH_CACHE_PATH = join(UKVISAJOBS_DIR, 'storage/ukvisajobs-auth.json');
interface UkVisaJobsAuthSession {
token?: string;
authToken?: string;
csrfToken?: string;
ciSession?: string;
}
export interface RunUkVisaJobsOptions {
/** Maximum number of jobs to fetch per search term. Defaults to 50, max 200. */
@ -73,11 +81,11 @@ async function fetchJobDescription(url: string): Promise<string | null> {
try {
console.log(` Fetching description from ${url}...`);
// Build cookies if present in env (similar to extractor)
const authSession = await loadCachedAuthSession();
const cookieParts: string[] = [];
if (process.env.UKVISAJOBS_CSRF_TOKEN) cookieParts.push(`csrf_token=${process.env.UKVISAJOBS_CSRF_TOKEN}`);
if (process.env.UKVISAJOBS_CI_SESSION) cookieParts.push(`ci_session=${process.env.UKVISAJOBS_CI_SESSION}`);
const token = process.env.UKVISAJOBS_AUTH_TOKEN || process.env.UKVISAJOBS_TOKEN;
if (authSession?.csrfToken) cookieParts.push(`csrf_token=${authSession.csrfToken}`);
if (authSession?.ciSession) cookieParts.push(`ci_session=${authSession.ciSession}`);
const token = authSession?.authToken || authSession?.token;
if (token) cookieParts.push(`authToken=${token}`);
const headers: Record<string, string> = {
@ -101,7 +109,16 @@ async function fetchJobDescription(url: string): Promise<string | null> {
// If we only got a tiny bit of text, it might have failed
return cleaned.length > 100 ? cleaned : null;
} catch (error) {
console.warn(` ⚠️ Failed to fetch description: ${error instanceof Error ? error.message : 'Unknown error'}`);
console.warn(` ⚠️ Failed to fetch description: ${error instanceof Error ? error.message : 'Unknown error'}`);
return null;
}
}
async function loadCachedAuthSession(): Promise<UkVisaJobsAuthSession | null> {
try {
const data = await readFile(AUTH_CACHE_PATH, 'utf-8');
return JSON.parse(data) as UkVisaJobsAuthSession;
} catch {
return null;
}
}
@ -118,7 +135,7 @@ async function clearStorageDataset(): Promise<void> {
}
export async function runUkVisaJobs(options: RunUkVisaJobsOptions = {}): Promise<UkVisaJobsResult> {
console.log('🇬🇧 Running UK Visa Jobs extractor...');
console.log('🇬🇧 Running UK Visa Jobs extractor...');
// Determine terms to run
const terms: string[] = [];
@ -192,11 +209,11 @@ export async function runUkVisaJobs(options: RunUkVisaJobsOptions = {}): Promise
}
}
console.log(` Fetched ${runJobs.length} jobs for ${termLabel} (${newCount} new unique)`);
console.log(` ✅ Fetched ${runJobs.length} jobs for ${termLabel} (${newCount} new unique)`);
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
console.error(` UK Visa Jobs failed for ${termLabel}: ${message}`);
console.error(`❌ UK Visa Jobs failed for ${termLabel}: ${message}`);
// Continue to next term instead of failing completely
}
@ -207,7 +224,7 @@ export async function runUkVisaJobs(options: RunUkVisaJobsOptions = {}): Promise
}
}
console.log(` UK Visa Jobs: imported total ${allJobs.length} unique jobs`);
console.log(`✅ UK Visa Jobs: imported total ${allJobs.length} unique jobs`);
return { success: true, jobs: allJobs };
}
@ -254,3 +271,4 @@ async function readDataset(): Promise<CreateJobInput[]> {
return jobs;
}