Add startup.jobs extractor support (#279)

* Add startup.jobs extractor support

* Harden startup.jobs extractor inputs

* Wire startupjobs into Docker and CI

* Tighten startupjobs review follow-ups

* fix: publish ghcr during release workflow

* feat: add startupjobs max jobs configuration and update related tests
This commit is contained in:
Shaheer Sarfaraz 2026-03-17 12:20:45 +00:00 committed by GitHub
parent 26275e4ee8
commit 71e640b563
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
28 changed files with 1177 additions and 132 deletions

View File

@ -54,6 +54,7 @@ jobs:
- adzuna-extractor
- hiringcafe-extractor
- gradcracker-extractor
- startupjobs-extractor
- ukvisajobs-extractor
steps:
- uses: actions/checkout@v4

View File

@ -7,9 +7,14 @@ on:
description: "Next release version (x.y.z)"
required: true
type: string
release_title:
description: "Optional release title shown on GitHub (defaults to vX.Y.Z)"
required: false
type: string
permissions:
contents: write
packages: write
concurrency:
group: release-${{ inputs.version }}
@ -83,8 +88,50 @@ jobs:
git tag "v$RELEASE_VERSION"
git push origin "v$RELEASE_VERSION"
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to GHCR
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Docker meta (tags/labels)
id: docker-meta
uses: docker/metadata-action@v5
with:
images: ghcr.io/${{ github.repository_owner }}/job-ops
tags: |
type=raw,value=v${{ inputs.version }}
type=raw,value=latest
type=sha
- name: Build and push GHCR image
uses: docker/build-push-action@v6
with:
context: .
file: ./Dockerfile
push: true
platforms: linux/amd64,linux/arm64
tags: ${{ steps.docker-meta.outputs.tags }}
labels: ${{ steps.docker-meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Create GitHub release
env:
GH_TOKEN: ${{ github.token }}
RELEASE_VERSION: ${{ inputs.version }}
run: gh release create "v$RELEASE_VERSION" --title "v$RELEASE_VERSION" --generate-notes
INPUT_RELEASE_TITLE: ${{ inputs.release_title }}
run: |
RELEASE_TITLE="$(printf '%s' "$INPUT_RELEASE_TITLE" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')"
if [ -z "$RELEASE_TITLE" ]; then
RELEASE_TITLE="v$RELEASE_VERSION"
fi
gh release create "v$RELEASE_VERSION" --title "$RELEASE_TITLE" --generate-notes

View File

@ -60,7 +60,8 @@ Releases are driven from GitHub Actions.
1. Open the `release` workflow in GitHub Actions.
2. Enter the next version as `x.y.z` (for example `0.1.30`).
3. Run the workflow.
3. Optionally enter a separate release title for GitHub (for example `Google Dorks!`).
4. Run the workflow.
The workflow will:
@ -68,9 +69,10 @@ The workflow will:
- update `package-lock.json`
- commit the version bump to `main`
- create and push tag `vX.Y.Z`
- create the GitHub release
- publish the `ghcr.io/.../job-ops` image for that release
- create the GitHub release using either the custom title or `vX.Y.Z`
The app version shown in the UI is sourced from `orchestrator/package.json`, so the release version, tag, and displayed app version stay aligned.
The app version shown in the UI is sourced from `orchestrator/package.json`, so the release version, tag, and displayed app version stay aligned even when the GitHub release title is customized separately.
## Validation Before PR (CI-Parity Checks)

View File

@ -38,6 +38,7 @@ COPY orchestrator/package*.json ./orchestrator/
COPY extractors/adzuna/package*.json ./extractors/adzuna/
COPY extractors/hiringcafe/package*.json ./extractors/hiringcafe/
COPY extractors/gradcracker/package*.json ./extractors/gradcracker/
COPY extractors/startupjobs/package*.json ./extractors/startupjobs/
COPY extractors/ukvisajobs/package*.json ./extractors/ukvisajobs/
# Install Node dependencies with npm cache (dev deps needed for build)
@ -59,6 +60,7 @@ COPY extractors/adzuna ./extractors/adzuna
COPY extractors/hiringcafe ./extractors/hiringcafe
COPY extractors/gradcracker ./extractors/gradcracker
COPY extractors/jobspy ./extractors/jobspy
COPY extractors/startupjobs ./extractors/startupjobs
COPY extractors/ukvisajobs ./extractors/ukvisajobs
# Build documentation site bundle
@ -105,6 +107,7 @@ COPY orchestrator/package*.json ./orchestrator/
COPY extractors/adzuna/package*.json ./extractors/adzuna/
COPY extractors/hiringcafe/package*.json ./extractors/hiringcafe/
COPY extractors/gradcracker/package*.json ./extractors/gradcracker/
COPY extractors/startupjobs/package*.json ./extractors/startupjobs/
COPY extractors/ukvisajobs/package*.json ./extractors/ukvisajobs/
# Install production Node dependencies only
@ -122,6 +125,7 @@ COPY extractors/adzuna ./extractors/adzuna
COPY extractors/hiringcafe ./extractors/hiringcafe
COPY extractors/gradcracker ./extractors/gradcracker
COPY extractors/jobspy ./extractors/jobspy
COPY extractors/startupjobs ./extractors/startupjobs
COPY extractors/ukvisajobs ./extractors/ukvisajobs
# Reuse Camoufox binaries from builder instead of fetching again

View File

@ -17,6 +17,7 @@ Extractor integrations are now registered through manifests and loaded automatic
| [JobSpy](/docs/next/extractors/jobspy) | Multi-source discovery (Indeed, LinkedIn, Glassdoor) | Requires Python wrapper execution per term; source availability and quality vary by site/location | `JOBSPY_SITES`, `JOBSPY_SEARCH_TERMS`, `JOBSPY_RESULTS_WANTED`, `JOBSPY_HOURS_OLD`, `JOBSPY_LINKEDIN_FETCH_DESCRIPTION` | Produces JSON per term, then orchestrator normalizes and de-duplicates by `jobUrl` |
| [Adzuna](/docs/next/extractors/adzuna) | API-based multi-country discovery with low scraping overhead | Requires valid App ID/App Key; country must be in Adzuna-supported list | `ADZUNA_APP_ID`, `ADZUNA_APP_KEY`, `ADZUNA_MAX_JOBS_PER_TERM` | API pagination to dataset output; orchestrator maps progress and de-duplicates by `sourceJobId`/`jobUrl` |
| [Hiring Cafe](/docs/next/extractors/hiring-cafe) | Browser-backed discovery using Hiring Cafe search APIs | Subject to upstream anti-bot checks; uses browser context and encoded search-state payloads | `HIRING_CAFE_SEARCH_TERMS`, `HIRING_CAFE_COUNTRY`, `HIRING_CAFE_MAX_JOBS_PER_TERM`, `HIRING_CAFE_DATE_FETCHED_PAST_N_DAYS` | Uses existing pipeline term/country/budget knobs and maps directly to normalized jobs |
| [startup.jobs](/docs/next/extractors/startup-jobs) | Startup-focused discovery through the published `startup-jobs-scraper` package | No credentials required; detail enrichment depends on Playwright browser binaries being installed | existing pipeline `searchTerms`, selected country/cities, `jobspyResultsWanted`; `npx playwright install` for fresh environments | Algolia-backed search plus detail-page enrichment via package import; orchestrator maps normalized records and de-duplicates by `jobUrl` |
| [UKVisaJobs](/docs/next/extractors/ukvisajobs) | UK visa sponsorship-focused roles | Requires authenticated session and periodic token/cookie refresh | `UKVISAJOBS_EMAIL`, `UKVISAJOBS_PASSWORD`, `UKVISAJOBS_MAX_JOBS`, `UKVISAJOBS_SEARCH_KEYWORD` | API pagination + dataset output; orchestrator de-dupes and may fetch missing descriptions |
| [Manual Import](/docs/next/extractors/manual) | One-off jobs not covered by scrapers | Inference quality depends on model/provider and input quality; some URLs cannot be fetched reliably | App/API endpoints (`/api/manual-jobs/infer`, `/api/manual-jobs/import`) | Accepts text/HTML/URL, runs inference, then saves and scores job after review |
@ -25,6 +26,7 @@ Extractor integrations are now registered through manifests and loaded automatic
- Use **JobSpy** for broad first-pass sourcing across common boards.
- Use **Adzuna** when you want API-first discovery in supported non-UK markets.
- Use **Hiring Cafe** when you want another term/country-driven source without adding credentials.
- Use **startup.jobs** when you want startup-heavy listings without maintaining another scraper locally.
- Use **Gradcracker** when targeting graduate pipelines in the UK.
- Use **UKVisaJobs** for sponsorship-specific UK searches.
- Use **Manual Import** when you already have a specific posting and need direct import.
@ -37,6 +39,7 @@ Many runs combine sources: broad discovery first, then manual import for high-pr
- [JobSpy](/docs/next/extractors/jobspy)
- [Adzuna](/docs/next/extractors/adzuna)
- [Hiring Cafe](/docs/next/extractors/hiring-cafe)
- [startup.jobs](/docs/next/extractors/startup-jobs)
- [UKVisaJobs](/docs/next/extractors/ukvisajobs)
- [Manual Import](/docs/next/extractors/manual)
- [Add an Extractor](/docs/next/workflows/add-an-extractor)

View File

@ -0,0 +1,64 @@
---
id: startup-jobs
title: startup.jobs Extractor
description: startup.jobs extraction integrated through the startup-jobs-scraper package.
sidebar_position: 8
---
## What it is
Original website: [startup.jobs](https://startup.jobs)
This extractor wraps the published [`startup-jobs-scraper`](https://www.npmjs.com/package/startup-jobs-scraper) package and feeds normalized startup.jobs listings into the existing pipeline.
Implementation split:
1. `extractors/startupjobs/src/run.ts` calls `scrapeStartupJobsViaAlgolia` and maps package records into `CreateJobInput`.
2. `extractors/startupjobs/src/manifest.ts` adapts pipeline settings, emits progress updates, and registers the source for runtime discovery.
## Why it exists
startup.jobs adds a startup-focused board to job-ops without introducing another bespoke scraper in this repository.
Using the published package also keeps the integration small and makes it easier to evolve the scraping logic independently from the app.
## How to use it
1. Open **Run jobs** and choose **Automatic**.
2. Leave **startup.jobs** enabled in **Sources** or toggle it on.
3. Set your usual automatic run controls:
- `searchTerms` are sent as `query`.
- country or city filters are reused as the package `location` option.
- run budget path (`jobspyResultsWanted`) is reused as `requestedCount` per term.
4. Start the run and monitor progress in the pipeline progress card.
Defaults and constraints:
- No new credentials are required.
- The integration runs with `enrichDetails: true`, so it opens job detail pages for richer records.
- Browser binaries are not downloaded automatically with the package. Install them with `npx playwright install` before using this extractor in a fresh environment.
- When **Search cities** is set, the extractor runs once per city and once per search term.
- Without explicit cities, the selected country is used as the location filter except for broad modes such as `worldwide` and `usa/ca`.
## Common problems
### startup.jobs does not appear in sources
- Check that the app is running a build that includes the new extractor manifest.
- This source does not require credentials, so it should appear as soon as the updated build is loaded.
### Results are broader than expected
- If no city is configured, the extractor uses the selected country when possible and otherwise falls back to a broad search.
- Add **Search cities** when you want tighter geographic filtering.
### Job descriptions are missing
- Detail enrichment depends on Playwright browser binaries being installed locally.
- Run `npx playwright install` and retry if the extractor cannot open job detail pages.
## Related pages
- [Extractors Overview](/docs/next/extractors/overview)
- [Pipeline Run](/docs/next/features/pipeline-run)
- [Add an Extractor](/docs/next/workflows/add-an-extractor)

View File

@ -49,6 +49,7 @@ const sidebars: SidebarsConfig = {
"extractors/jobspy",
"extractors/adzuna",
"extractors/hiring-cafe",
"extractors/startup-jobs",
"extractors/manual",
"extractors/ukvisajobs",
],

View File

@ -0,0 +1,10 @@
# startup.jobs Extractor
Extractor wrapper around the published `startup-jobs-scraper` package.
## Notes
- Uses `scrapeStartupJobsViaAlgolia` directly from `startup-jobs-scraper`.
- Runs with `enrichDetails: true` so job descriptions and other detail-page fields are fetched during pipeline runs.
- Browser binaries are not downloaded automatically. Install them with `npx playwright install` or `npm --workspace startupjobs-extractor run get-binaries`.
- Reuses the pipeline's existing search terms, country, city, and budget controls.

View File

@ -0,0 +1,17 @@
{
"name": "startupjobs-extractor",
"version": "0.0.1",
"type": "module",
"description": "startup.jobs extractor backed by the startup-jobs-scraper package",
"dependencies": {
"startup-jobs-scraper": "^0.1.0"
},
"devDependencies": {
"@types/node": "^24.0.0",
"typescript": "~5.9.0"
},
"scripts": {
"check:types": "tsc --noEmit",
"get-binaries": "npx playwright install"
}
}

View File

@ -0,0 +1,89 @@
import { resolveSearchCities } from "@shared/search-cities.js";
import type {
ExtractorManifest,
ExtractorProgressEvent,
} from "@shared/types/extractors";
import { runStartupJobs } from "./run";
function toProgress(event: {
type: string;
termIndex: number;
termTotal: number;
searchTerm: string;
location?: string;
jobsFoundTerm?: number;
}): ExtractorProgressEvent {
const scope = event.location
? `${event.searchTerm} @ ${event.location}`
: event.searchTerm;
if (event.type === "term_start") {
return {
phase: "list",
termsProcessed: Math.max(event.termIndex - 1, 0),
termsTotal: event.termTotal,
currentUrl: scope,
detail: `startup.jobs: term ${event.termIndex}/${event.termTotal} (${scope})`,
};
}
return {
phase: "list",
termsProcessed: event.termIndex,
termsTotal: event.termTotal,
currentUrl: scope,
jobPagesProcessed: event.jobsFoundTerm ?? 0,
jobPagesEnqueued: event.jobsFoundTerm ?? 0,
detail: `startup.jobs: completed ${event.termIndex}/${event.termTotal} (${scope}) with ${event.jobsFoundTerm ?? 0} jobs`,
};
}
export const manifest: ExtractorManifest = {
id: "startupjobs",
displayName: "startup.jobs",
providesSources: ["startupjobs"],
async run(context) {
if (context.shouldCancel?.()) {
return { success: true, jobs: [] };
}
const parsedMaxJobsPerTerm = context.settings.startupjobsMaxJobsPerTerm
? Number.parseInt(context.settings.startupjobsMaxJobsPerTerm, 10)
: context.settings.jobspyResultsWanted
? Number.parseInt(context.settings.jobspyResultsWanted, 10)
: Number.NaN;
const maxJobsPerTerm = Number.isFinite(parsedMaxJobsPerTerm)
? Math.max(1, parsedMaxJobsPerTerm)
: 50;
const result = await runStartupJobs({
selectedCountry: context.selectedCountry,
searchTerms: context.searchTerms,
locations: resolveSearchCities({
single:
context.settings.searchCities ?? context.settings.jobspyLocation,
}),
maxJobsPerTerm,
shouldCancel: context.shouldCancel,
onProgress: (event) => {
if (context.shouldCancel?.()) return;
context.onProgress?.(toProgress(event));
},
});
if (!result.success) {
return {
success: false,
jobs: [],
error: result.error,
};
}
return {
success: true,
jobs: result.jobs,
};
},
};
export default manifest;

View File

@ -0,0 +1,198 @@
import {
formatCountryLabel,
normalizeCountryKey,
} from "@shared/location-support.js";
import { resolveSearchCities } from "@shared/search-cities.js";
import type { CreateJobInput } from "@shared/types/jobs";
import {
type StartupJobRecord,
scrapeStartupJobsViaAlgolia,
} from "startup-jobs-scraper";
export type StartupJobsProgressEvent =
| {
type: "term_start";
termIndex: number;
termTotal: number;
searchTerm: string;
location?: string;
}
| {
type: "term_complete";
termIndex: number;
termTotal: number;
searchTerm: string;
location?: string;
jobsFoundTerm: number;
};
export interface RunStartupJobsOptions {
searchTerms?: string[];
selectedCountry?: string;
locations?: string[];
maxJobsPerTerm?: number;
onProgress?: (event: StartupJobsProgressEvent) => void;
shouldCancel?: () => boolean;
}
export interface StartupJobsResult {
success: boolean;
jobs: CreateJobInput[];
error?: string;
}
function toPositiveIntOrFallback(
value: number | string | undefined,
fallback: number,
): number {
const parsed =
typeof value === "number"
? value
: typeof value === "string"
? Number.parseInt(value, 10)
: Number.NaN;
if (!Number.isFinite(parsed)) return fallback;
return Math.max(1, Math.floor(parsed));
}
function inferJobType(disciplines: string | undefined): string | undefined {
if (!disciplines) return undefined;
const segments = disciplines
.split("|")
.map((value) => value.trim())
.filter(Boolean);
return segments.length > 1 ? segments[segments.length - 1] : undefined;
}
function mapStartupJob(row: StartupJobRecord): CreateJobInput | null {
if (!row.jobUrl) return null;
return {
source: "startupjobs",
title: row.title || "Unknown Title",
employer: row.employer || "Unknown Employer",
employerUrl: row.employerUrl || undefined,
jobUrl: row.jobUrl,
applicationLink: row.applicationLink || row.jobUrl,
disciplines: row.disciplines || undefined,
deadline: row.deadline || undefined,
salary: row.salary || undefined,
location: row.location || undefined,
degreeRequired: row.degreeRequired || undefined,
starting: row.starting || undefined,
jobDescription: row.jobDescription || undefined,
jobType: inferJobType(row.disciplines),
isRemote: row.location?.toLowerCase().includes("remote") ?? undefined,
};
}
function resolveRunLocations(args: {
selectedCountry?: string;
locations?: string[];
}): Array<string | null> {
const locations = resolveSearchCities({
list: args.locations,
});
const normalizedLocations = locations
.map((location) => normalizeCountryKey(location))
.filter((location) => location !== "worldwide" && location !== "usa/ca");
if (normalizedLocations.length > 0) {
return normalizedLocations.map((location) => formatCountryLabel(location));
}
const countryKey = normalizeCountryKey(args.selectedCountry);
if (!countryKey || countryKey === "worldwide" || countryKey === "usa/ca") {
return [null];
}
return [formatCountryLabel(countryKey)];
}
export async function runStartupJobs(
options: RunStartupJobsOptions = {},
): Promise<StartupJobsResult> {
const searchTerms =
options.searchTerms && options.searchTerms.length > 0
? options.searchTerms
: ["software engineer"];
const runLocations = resolveRunLocations({
selectedCountry: options.selectedCountry,
locations: options.locations,
});
const maxJobsPerTerm = toPositiveIntOrFallback(options.maxJobsPerTerm, 50);
const termTotal = searchTerms.length * runLocations.length;
const jobs: CreateJobInput[] = [];
const seen = new Set<string>();
let runIndex = 0;
try {
for (const location of runLocations) {
for (const searchTerm of searchTerms) {
runIndex += 1;
if (options.shouldCancel?.()) {
return { success: true, jobs };
}
options.onProgress?.({
type: "term_start",
termIndex: runIndex,
termTotal,
searchTerm,
location: location ?? undefined,
});
const records = await scrapeStartupJobsViaAlgolia({
query: searchTerm,
requestedCount: maxJobsPerTerm,
enrichDetails: true,
location: location ?? undefined,
});
let jobsFoundTerm = 0;
for (const record of records) {
const mapped = mapStartupJob(record);
if (!mapped) continue;
const dedupeKey = mapped.jobUrl;
if (seen.has(dedupeKey)) continue;
seen.add(dedupeKey);
jobs.push(mapped);
jobsFoundTerm += 1;
}
options.onProgress?.({
type: "term_complete",
termIndex: runIndex,
termTotal,
searchTerm,
location: location ?? undefined,
jobsFoundTerm,
});
}
}
return {
success: true,
jobs,
};
} catch (error) {
const message =
error instanceof Error
? error.message
: typeof error === "string"
? error
: "Unexpected error while running startup.jobs extractor.";
const missingBrowser =
/playwright|browser|executable/i.test(message) &&
/install/i.test(message);
return {
success: false,
jobs: [],
error: missingBrowser
? `${message}. Install browser binaries with 'npx playwright install'.`
: message,
};
}
}

View File

@ -0,0 +1,38 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
vi.mock("../src/run", () => ({
runStartupJobs: vi.fn(),
}));
describe("startupjobs manifest", () => {
beforeEach(() => {
vi.clearAllMocks();
});
it("prefers startupjobsMaxJobsPerTerm when provided", async () => {
const { manifest } = await import("../src/manifest");
const { runStartupJobs } = await import("../src/run");
const runStartupJobsMock = vi.mocked(runStartupJobs);
runStartupJobsMock.mockResolvedValue({
success: true,
jobs: [],
});
await manifest.run({
source: "startupjobs",
selectedSources: ["startupjobs"],
settings: {
startupjobsMaxJobsPerTerm: "70",
jobspyResultsWanted: "30",
},
searchTerms: ["software engineer"],
selectedCountry: "united kingdom",
});
expect(runStartupJobsMock).toHaveBeenCalledWith(
expect.objectContaining({
maxJobsPerTerm: 70,
}),
);
});
});

View File

@ -0,0 +1,75 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
vi.mock("startup-jobs-scraper", () => ({
scrapeStartupJobsViaAlgolia: vi.fn(),
}));
describe("runStartupJobs", () => {
beforeEach(() => {
vi.clearAllMocks();
});
it("falls back to the default max jobs per term when options.maxJobsPerTerm is NaN", async () => {
const { scrapeStartupJobsViaAlgolia } = await import(
"startup-jobs-scraper"
);
const scrapeMock = vi.mocked(scrapeStartupJobsViaAlgolia);
scrapeMock.mockResolvedValueOnce([]);
const { runStartupJobs } = await import("../src/run");
await runStartupJobs({
searchTerms: ["backend engineer"],
maxJobsPerTerm: Number.NaN,
});
expect(scrapeMock).toHaveBeenCalledWith(
expect.objectContaining({
requestedCount: 50,
}),
);
});
it("drops broad location sentinels and falls back to selectedCountry behavior", async () => {
const { scrapeStartupJobsViaAlgolia } = await import(
"startup-jobs-scraper"
);
const scrapeMock = vi.mocked(scrapeStartupJobsViaAlgolia);
scrapeMock.mockResolvedValueOnce([]);
const { runStartupJobs } = await import("../src/run");
await runStartupJobs({
searchTerms: ["platform engineer"],
selectedCountry: "worldwide",
locations: ["Worldwide"],
});
expect(scrapeMock).toHaveBeenCalledWith(
expect.objectContaining({
location: undefined,
}),
);
});
it("normalizes explicit city-country aliases before passing location to the scraper", async () => {
const { scrapeStartupJobsViaAlgolia } = await import(
"startup-jobs-scraper"
);
const scrapeMock = vi.mocked(scrapeStartupJobsViaAlgolia);
scrapeMock.mockResolvedValueOnce([]);
const { runStartupJobs } = await import("../src/run");
await runStartupJobs({
searchTerms: ["software engineer"],
locations: ["UK"],
});
expect(scrapeMock).toHaveBeenCalledWith(
expect.objectContaining({
location: "United Kingdom",
}),
);
});
});

View File

@ -0,0 +1,16 @@
{
"compilerOptions": {
"module": "ESNext",
"moduleResolution": "bundler",
"target": "ES2022",
"strict": true,
"noUnusedLocals": false,
"lib": ["ES2022", "DOM"],
"types": ["node"],
"baseUrl": ".",
"paths": {
"@shared/*": ["../../shared/src/*"]
}
},
"include": ["./src/**/*"]
}

View File

@ -753,6 +753,7 @@ describe("OrchestratorPage", () => {
gradcrackerMaxJobsPerTerm: 150,
ukvisajobsMaxJobs: 150,
adzunaMaxJobsPerTerm: 150,
startupjobsMaxJobsPerTerm: 150,
jobspyCountryIndeed: "united kingdom",
searchCities: "United Kingdom",
});

View File

@ -182,6 +182,7 @@ export const AutomaticRunTab: React.FC<AutomaticRunTabProps> = ({
const rememberedRunBudget =
settings?.jobspyResultsWanted?.value ??
settings?.startupjobsMaxJobsPerTerm?.value ??
settings?.adzunaMaxJobsPerTerm?.value ??
settings?.gradcrackerMaxJobsPerTerm?.value ??
settings?.ukvisajobsMaxJobs?.value ??

View File

@ -52,6 +52,17 @@ describe("automatic-run utilities", () => {
expect(cap).toBeLessThanOrEqual(750);
});
it("assigns a dedicated startupjobs max-jobs limit", () => {
const limits = deriveExtractorLimits({
budget: 120,
searchTerms: ["backend", "platform"],
sources: ["startupjobs"],
});
expect(limits.startupjobsMaxJobsPerTerm).toBeGreaterThan(0);
expect(limits.startupjobsMaxJobsPerTerm).toBeLessThanOrEqual(120);
});
it("returns zero estimate when no search terms are provided", () => {
const estimate = calculateAutomaticEstimate({
values: {
@ -112,4 +123,21 @@ describe("automatic-run utilities", () => {
expect(estimate.discovered.cap).toBeGreaterThan(0);
expect(estimate.discovered.cap).toBeLessThanOrEqual(120);
});
it("includes startupjobs in estimate caps using the shared term budget", () => {
const estimate = calculateAutomaticEstimate({
values: {
topN: 10,
minSuitabilityScore: 50,
searchTerms: ["backend", "platform"],
runBudget: 120,
country: "united kingdom",
cityLocations: [],
},
sources: ["startupjobs"],
});
expect(estimate.discovered.cap).toBeGreaterThan(0);
expect(estimate.discovered.cap).toBeLessThanOrEqual(120);
});
});

View File

@ -66,6 +66,7 @@ export interface ExtractorLimits {
gradcrackerMaxJobsPerTerm: number;
ukvisajobsMaxJobs: number;
adzunaMaxJobsPerTerm: number;
startupjobsMaxJobsPerTerm: number;
}
export function deriveExtractorLimits(args: {
@ -82,6 +83,7 @@ export function deriveExtractorLimits(args: {
const includesUkVisaJobs = args.sources.includes("ukvisajobs");
const includesAdzuna = args.sources.includes("adzuna");
const includesHiringCafe = args.sources.includes("hiringcafe");
const includesStartupJobs = args.sources.includes("startupjobs");
const weightedContributors =
(includesIndeed ? termCount : 0) +
@ -90,7 +92,8 @@ export function deriveExtractorLimits(args: {
(includesGradcracker ? termCount : 0) +
(includesUkVisaJobs ? 1 : 0) +
(includesAdzuna ? termCount : 0) +
(includesHiringCafe ? termCount : 0);
(includesHiringCafe ? termCount : 0) +
(includesStartupJobs ? termCount : 0);
if (weightedContributors <= 0) {
return {
@ -98,6 +101,7 @@ export function deriveExtractorLimits(args: {
gradcrackerMaxJobsPerTerm: budget,
ukvisajobsMaxJobs: budget,
adzunaMaxJobsPerTerm: budget,
startupjobsMaxJobsPerTerm: budget,
};
}
@ -109,6 +113,7 @@ export function deriveExtractorLimits(args: {
gradcrackerMaxJobsPerTerm: perUnit,
ukvisajobsMaxJobs: Math.min(budget, perUnit + remainder),
adzunaMaxJobsPerTerm: perUnit,
startupjobsMaxJobsPerTerm: perUnit,
};
}
@ -173,6 +178,7 @@ export function calculateAutomaticEstimate(args: {
const hasGlassdoor = sources.includes("glassdoor");
const hasAdzuna = sources.includes("adzuna");
const hasHiringCafe = sources.includes("hiringcafe");
const hasStartupJobs = sources.includes("startupjobs");
const limits = deriveExtractorLimits({
budget: values.runBudget,
searchTerms: values.searchTerms,
@ -191,9 +197,17 @@ export function calculateAutomaticEstimate(args: {
const hiringCafeCap = hasHiringCafe
? limits.jobspyResultsWanted * termCount
: 0;
const startupJobsCap = hasStartupJobs
? limits.startupjobsMaxJobsPerTerm * termCount
: 0;
const discoveredCap =
jobspyCap + gradcrackerCap + ukvisaCap + adzunaCap + hiringCafeCap;
jobspyCap +
gradcrackerCap +
ukvisaCap +
adzunaCap +
hiringCafeCap +
startupJobsCap;
const discoveredMin = Math.round(discoveredCap * 0.35);
const discoveredMax = Math.round(discoveredCap * 0.75);
const processedMin = Math.min(values.topN, discoveredMin);

View File

@ -181,11 +181,13 @@ export function usePipelineControls(
);
const hasAdzuna = compatibleSources.includes("adzuna");
const hasHiringCafe = compatibleSources.includes("hiringcafe");
const hasStartupJobs = compatibleSources.includes("startupjobs");
const serializedCities = serializeCityLocationsSetting(
values.cityLocations,
);
const searchCities =
(hasJobSpySite || hasAdzuna || hasHiringCafe) && serializedCities
(hasJobSpySite || hasAdzuna || hasHiringCafe || hasStartupJobs) &&
serializedCities
? serializedCities
: formatCountryLabel(values.country);
await api.updateSettings({
@ -194,6 +196,7 @@ export function usePipelineControls(
gradcrackerMaxJobsPerTerm: limits.gradcrackerMaxJobsPerTerm,
ukvisajobsMaxJobs: limits.ukvisajobsMaxJobs,
adzunaMaxJobsPerTerm: limits.adzunaMaxJobsPerTerm,
startupjobsMaxJobsPerTerm: limits.startupjobsMaxJobsPerTerm,
jobspyCountryIndeed: values.country,
searchCities,
});

View File

@ -17,6 +17,10 @@ describe("orchestrator utils", () => {
expect(getEnabledSources(withoutKey)).not.toContain("adzuna");
});
it("enables startupjobs without credentials", () => {
expect(getEnabledSources(createAppSettings())).toContain("startupjobs");
});
it("counts processing jobs in ready and discovered tabs", () => {
const jobs = [
createJob({ id: "ready", status: "ready", closedAt: null }),

View File

@ -195,6 +195,10 @@ export const getEnabledSources = (
enabled.push(source);
continue;
}
if (source === "startupjobs") {
enabled.push(source);
continue;
}
if (
source === "indeed" ||
source === "linkedin" ||

View File

@ -254,6 +254,7 @@ export const DEMO_SOURCE_BASE_URLS: Record<JobSource, string> = {
ukvisajobs: "https://www.ukvisajobs.com",
adzuna: "https://www.adzuna.com",
hiringcafe: "https://hiring.cafe",
startupjobs: "https://startup.jobs",
manual: "https://example.com",
};

650
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -8,6 +8,7 @@ export const EXTRACTOR_SOURCE_IDS = [
"ukvisajobs",
"adzuna",
"hiringcafe",
"startupjobs",
"manual",
] as const;
@ -48,6 +49,7 @@ export const EXTRACTOR_SOURCE_METADATA: Record<
requiresCredentials: true,
},
hiringcafe: { label: "Hiring Cafe", order: 70, category: "pipeline" },
startupjobs: { label: "startup.jobs", order: 80, category: "pipeline" },
manual: { label: "Manual", order: 90, category: "manual" },
};

View File

@ -55,6 +55,10 @@ describe("location-support", () => {
expect(isSourceAllowedForCountry("glassdoor", "japan")).toBe(false);
expect(isSourceAllowedForCountry("adzuna", "united states")).toBe(true);
expect(isSourceAllowedForCountry("adzuna", "japan")).toBe(false);
expect(isSourceAllowedForCountry("startupjobs", "united states")).toBe(
true,
);
expect(isSourceAllowedForCountry("startupjobs", "worldwide")).toBe(true);
});
it("filters incompatible sources while preserving compatible order", () => {
@ -66,11 +70,12 @@ describe("location-support", () => {
"glassdoor",
"ukvisajobs",
"adzuna",
"startupjobs",
"linkedin",
],
"united states",
),
).toEqual(["indeed", "glassdoor", "adzuna", "linkedin"]);
).toEqual(["indeed", "glassdoor", "adzuna", "startupjobs", "linkedin"]);
});
it("supports glassdoor only in explicitly supported countries", () => {

View File

@ -217,6 +217,19 @@ export const settingsRegistry = {
parse: parseIntOrNull,
serialize: serializeNullableNumber,
},
startupjobsMaxJobsPerTerm: {
kind: "typed" as const,
schema: z.number().int().min(1).max(1000),
default: (): number =>
parseInt(
typeof process !== "undefined"
? process.env.STARTUPJOBS_MAX_RESULTS || "50"
: "50",
10,
),
parse: parseIntOrNull,
serialize: serializeNullableNumber,
},
searchTerms: {
kind: "typed" as const,
schema: z.array(z.string().trim().min(1).max(200)).max(100),

View File

@ -153,6 +153,7 @@ export const createAppSettings = (
ukvisajobsMaxJobs: { value: 50, default: 50, override: null },
adzunaMaxJobsPerTerm: { value: 50, default: 50, override: null },
gradcrackerMaxJobsPerTerm: { value: 50, default: 50, override: null },
startupjobsMaxJobsPerTerm: { value: 50, default: 50, override: null },
searchTerms: {
value: ["Software Engineer"],
default: ["Software Engineer"],

View File

@ -152,6 +152,7 @@ export interface AppSettings {
ukvisajobsMaxJobs: Resolved<number>;
adzunaMaxJobsPerTerm: Resolved<number>;
gradcrackerMaxJobsPerTerm: Resolved<number>;
startupjobsMaxJobsPerTerm: Resolved<number>;
searchTerms: Resolved<string[]>;
blockedCompanyKeywords: Resolved<string[]>;
scoringInstructions: Resolved<string>;