Add startup.jobs extractor support (#279)

* Add startup.jobs extractor support * Harden startup.jobs extractor inputs * Wire startupjobs into Docker and CI * Tighten startupjobs review follow-ups * fix: publish ghcr during release workflow * feat: add startupjobs max jobs configuration and update related tests
2026-03-17 12:20:45 +00:00 · 2026-03-17 12:20:45 +00:00 · 71e640b563
commit 71e640b563
parent 26275e4ee8
28 changed files with 1177 additions and 132 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -54,6 +54,7 @@ jobs:
          - adzuna-extractor
          - hiringcafe-extractor
          - gradcracker-extractor
+          - startupjobs-extractor
          - ukvisajobs-extractor
    steps:
      - uses: actions/checkout@v4
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@ -7,9 +7,14 @@ on:
        description: "Next release version (x.y.z)"
        required: true
        type: string
+      release_title:
+        description: "Optional release title shown on GitHub (defaults to vX.Y.Z)"
+        required: false
+        type: string

 permissions:
  contents: write
+  packages: write

 concurrency:
  group: release-${{ inputs.version }}
@ -83,8 +88,50 @@ jobs:
          git tag "v$RELEASE_VERSION"
          git push origin "v$RELEASE_VERSION"

+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to GHCR
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Docker meta (tags/labels)
+        id: docker-meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ghcr.io/${{ github.repository_owner }}/job-ops
+          tags: |
+            type=raw,value=v${{ inputs.version }}
+            type=raw,value=latest
+            type=sha
+
+      - name: Build and push GHCR image
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: ./Dockerfile
+          push: true
+          platforms: linux/amd64,linux/arm64
+          tags: ${{ steps.docker-meta.outputs.tags }}
+          labels: ${{ steps.docker-meta.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+
      - name: Create GitHub release
        env:
          GH_TOKEN: ${{ github.token }}
          RELEASE_VERSION: ${{ inputs.version }}
-        run: gh release create "v$RELEASE_VERSION" --title "v$RELEASE_VERSION" --generate-notes
+          INPUT_RELEASE_TITLE: ${{ inputs.release_title }}
+        run: |
+          RELEASE_TITLE="$(printf '%s' "$INPUT_RELEASE_TITLE" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')"
+          if [ -z "$RELEASE_TITLE" ]; then
+            RELEASE_TITLE="v$RELEASE_VERSION"
+          fi
+
+          gh release create "v$RELEASE_VERSION" --title "$RELEASE_TITLE" --generate-notes
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -60,7 +60,8 @@ Releases are driven from GitHub Actions.

 1. Open the `release` workflow in GitHub Actions.
 2. Enter the next version as `x.y.z` (for example `0.1.30`).
-3. Run the workflow.
+3. Optionally enter a separate release title for GitHub (for example `Google Dorks!`).
+4. Run the workflow.

 The workflow will:

@ -68,9 +69,10 @@ The workflow will:
 - update `package-lock.json`
 - commit the version bump to `main`
 - create and push tag `vX.Y.Z`
- create the GitHub release
+- publish the `ghcr.io/.../job-ops` image for that release
+- create the GitHub release using either the custom title or `vX.Y.Z`

-The app version shown in the UI is sourced from `orchestrator/package.json`, so the release version, tag, and displayed app version stay aligned.
+The app version shown in the UI is sourced from `orchestrator/package.json`, so the release version, tag, and displayed app version stay aligned even when the GitHub release title is customized separately.

 ## Validation Before PR (CI-Parity Checks)

--- a/4
+++ b/4
@ -38,6 +38,7 @@ COPY orchestrator/package*.json ./orchestrator/
 COPY extractors/adzuna/package*.json ./extractors/adzuna/
 COPY extractors/hiringcafe/package*.json ./extractors/hiringcafe/
 COPY extractors/gradcracker/package*.json ./extractors/gradcracker/
+COPY extractors/startupjobs/package*.json ./extractors/startupjobs/
 COPY extractors/ukvisajobs/package*.json ./extractors/ukvisajobs/

 # Install Node dependencies with npm cache (dev deps needed for build)
@ -59,6 +60,7 @@ COPY extractors/adzuna ./extractors/adzuna
 COPY extractors/hiringcafe ./extractors/hiringcafe
 COPY extractors/gradcracker ./extractors/gradcracker
 COPY extractors/jobspy ./extractors/jobspy
+COPY extractors/startupjobs ./extractors/startupjobs
 COPY extractors/ukvisajobs ./extractors/ukvisajobs

 # Build documentation site bundle
@ -105,6 +107,7 @@ COPY orchestrator/package*.json ./orchestrator/
 COPY extractors/adzuna/package*.json ./extractors/adzuna/
 COPY extractors/hiringcafe/package*.json ./extractors/hiringcafe/
 COPY extractors/gradcracker/package*.json ./extractors/gradcracker/
+COPY extractors/startupjobs/package*.json ./extractors/startupjobs/
 COPY extractors/ukvisajobs/package*.json ./extractors/ukvisajobs/

 # Install production Node dependencies only
@ -122,6 +125,7 @@ COPY extractors/adzuna ./extractors/adzuna
 COPY extractors/hiringcafe ./extractors/hiringcafe
 COPY extractors/gradcracker ./extractors/gradcracker
 COPY extractors/jobspy ./extractors/jobspy
+COPY extractors/startupjobs ./extractors/startupjobs
 COPY extractors/ukvisajobs ./extractors/ukvisajobs

 # Reuse Camoufox binaries from builder instead of fetching again
--- a/docs-site/docs/extractors/overview.md
+++ b/docs-site/docs/extractors/overview.md
@ -17,6 +17,7 @@ Extractor integrations are now registered through manifests and loaded automatic
 | [JobSpy](/docs/next/extractors/jobspy) | Multi-source discovery (Indeed, LinkedIn, Glassdoor) | Requires Python wrapper execution per term; source availability and quality vary by site/location | `JOBSPY_SITES`, `JOBSPY_SEARCH_TERMS`, `JOBSPY_RESULTS_WANTED`, `JOBSPY_HOURS_OLD`, `JOBSPY_LINKEDIN_FETCH_DESCRIPTION` | Produces JSON per term, then orchestrator normalizes and de-duplicates by `jobUrl` |
 | [Adzuna](/docs/next/extractors/adzuna) | API-based multi-country discovery with low scraping overhead | Requires valid App ID/App Key; country must be in Adzuna-supported list | `ADZUNA_APP_ID`, `ADZUNA_APP_KEY`, `ADZUNA_MAX_JOBS_PER_TERM` | API pagination to dataset output; orchestrator maps progress and de-duplicates by `sourceJobId`/`jobUrl` |
 | [Hiring Cafe](/docs/next/extractors/hiring-cafe) | Browser-backed discovery using Hiring Cafe search APIs | Subject to upstream anti-bot checks; uses browser context and encoded search-state payloads | `HIRING_CAFE_SEARCH_TERMS`, `HIRING_CAFE_COUNTRY`, `HIRING_CAFE_MAX_JOBS_PER_TERM`, `HIRING_CAFE_DATE_FETCHED_PAST_N_DAYS` | Uses existing pipeline term/country/budget knobs and maps directly to normalized jobs |
+| [startup.jobs](/docs/next/extractors/startup-jobs) | Startup-focused discovery through the published `startup-jobs-scraper` package | No credentials required; detail enrichment depends on Playwright browser binaries being installed | existing pipeline `searchTerms`, selected country/cities, `jobspyResultsWanted`; `npx playwright install` for fresh environments | Algolia-backed search plus detail-page enrichment via package import; orchestrator maps normalized records and de-duplicates by `jobUrl` |
 | [UKVisaJobs](/docs/next/extractors/ukvisajobs) | UK visa sponsorship-focused roles | Requires authenticated session and periodic token/cookie refresh | `UKVISAJOBS_EMAIL`, `UKVISAJOBS_PASSWORD`, `UKVISAJOBS_MAX_JOBS`, `UKVISAJOBS_SEARCH_KEYWORD` | API pagination + dataset output; orchestrator de-dupes and may fetch missing descriptions |
 | [Manual Import](/docs/next/extractors/manual) | One-off jobs not covered by scrapers | Inference quality depends on model/provider and input quality; some URLs cannot be fetched reliably | App/API endpoints (`/api/manual-jobs/infer`, `/api/manual-jobs/import`) | Accepts text/HTML/URL, runs inference, then saves and scores job after review |

@ -25,6 +26,7 @@ Extractor integrations are now registered through manifests and loaded automatic
 - Use **JobSpy** for broad first-pass sourcing across common boards.
 - Use **Adzuna** when you want API-first discovery in supported non-UK markets.
 - Use **Hiring Cafe** when you want another term/country-driven source without adding credentials.
+- Use **startup.jobs** when you want startup-heavy listings without maintaining another scraper locally.
 - Use **Gradcracker** when targeting graduate pipelines in the UK.
 - Use **UKVisaJobs** for sponsorship-specific UK searches.
 - Use **Manual Import** when you already have a specific posting and need direct import.
@ -37,6 +39,7 @@ Many runs combine sources: broad discovery first, then manual import for high-pr
 - [JobSpy](/docs/next/extractors/jobspy)
 - [Adzuna](/docs/next/extractors/adzuna)
 - [Hiring Cafe](/docs/next/extractors/hiring-cafe)
+- [startup.jobs](/docs/next/extractors/startup-jobs)
 - [UKVisaJobs](/docs/next/extractors/ukvisajobs)
 - [Manual Import](/docs/next/extractors/manual)
 - [Add an Extractor](/docs/next/workflows/add-an-extractor)
--- a/docs-site/docs/extractors/startup-jobs.md
+++ b/docs-site/docs/extractors/startup-jobs.md
@ -0,0 +1,64 @@
+---
+id: startup-jobs
+title: startup.jobs Extractor
+description: startup.jobs extraction integrated through the startup-jobs-scraper package.
+sidebar_position: 8
+---
+
+## What it is
+
+Original website: [startup.jobs](https://startup.jobs)
+
+This extractor wraps the published [`startup-jobs-scraper`](https://www.npmjs.com/package/startup-jobs-scraper) package and feeds normalized startup.jobs listings into the existing pipeline.
+
+Implementation split:
+
+1. `extractors/startupjobs/src/run.ts` calls `scrapeStartupJobsViaAlgolia` and maps package records into `CreateJobInput`.
+2. `extractors/startupjobs/src/manifest.ts` adapts pipeline settings, emits progress updates, and registers the source for runtime discovery.
+
+## Why it exists
+
+startup.jobs adds a startup-focused board to job-ops without introducing another bespoke scraper in this repository.
+
+Using the published package also keeps the integration small and makes it easier to evolve the scraping logic independently from the app.
+
+## How to use it
+
+1. Open **Run jobs** and choose **Automatic**.
+2. Leave **startup.jobs** enabled in **Sources** or toggle it on.
+3. Set your usual automatic run controls:
+   - `searchTerms` are sent as `query`.
+   - country or city filters are reused as the package `location` option.
+   - run budget path (`jobspyResultsWanted`) is reused as `requestedCount` per term.
+4. Start the run and monitor progress in the pipeline progress card.
+
+Defaults and constraints:
+
+- No new credentials are required.
+- The integration runs with `enrichDetails: true`, so it opens job detail pages for richer records.
+- Browser binaries are not downloaded automatically with the package. Install them with `npx playwright install` before using this extractor in a fresh environment.
+- When **Search cities** is set, the extractor runs once per city and once per search term.
+- Without explicit cities, the selected country is used as the location filter except for broad modes such as `worldwide` and `usa/ca`.
+
+## Common problems
+
+### startup.jobs does not appear in sources
+
+- Check that the app is running a build that includes the new extractor manifest.
+- This source does not require credentials, so it should appear as soon as the updated build is loaded.
+
+### Results are broader than expected
+
+- If no city is configured, the extractor uses the selected country when possible and otherwise falls back to a broad search.
+- Add **Search cities** when you want tighter geographic filtering.
+
+### Job descriptions are missing
+
+- Detail enrichment depends on Playwright browser binaries being installed locally.
+- Run `npx playwright install` and retry if the extractor cannot open job detail pages.
+
+## Related pages
+
+- [Extractors Overview](/docs/next/extractors/overview)
+- [Pipeline Run](/docs/next/features/pipeline-run)
+- [Add an Extractor](/docs/next/workflows/add-an-extractor)
--- a/docs-site/sidebars.ts
+++ b/docs-site/sidebars.ts
@ -49,6 +49,7 @@ const sidebars: SidebarsConfig = {
        "extractors/jobspy",
        "extractors/adzuna",
        "extractors/hiring-cafe",
+        "extractors/startup-jobs",
        "extractors/manual",
        "extractors/ukvisajobs",
      ],
--- a/extractors/startupjobs/README.md
+++ b/extractors/startupjobs/README.md
@ -0,0 +1,10 @@
+# startup.jobs Extractor
+
+Extractor wrapper around the published `startup-jobs-scraper` package.
+
+## Notes
+
+- Uses `scrapeStartupJobsViaAlgolia` directly from `startup-jobs-scraper`.
+- Runs with `enrichDetails: true` so job descriptions and other detail-page fields are fetched during pipeline runs.
+- Browser binaries are not downloaded automatically. Install them with `npx playwright install` or `npm --workspace startupjobs-extractor run get-binaries`.
+- Reuses the pipeline's existing search terms, country, city, and budget controls.
--- a/extractors/startupjobs/package.json
+++ b/extractors/startupjobs/package.json
@ -0,0 +1,17 @@
+{
+  "name": "startupjobs-extractor",
+  "version": "0.0.1",
+  "type": "module",
+  "description": "startup.jobs extractor backed by the startup-jobs-scraper package",
+  "dependencies": {
+    "startup-jobs-scraper": "^0.1.0"
+  },
+  "devDependencies": {
+    "@types/node": "^24.0.0",
+    "typescript": "~5.9.0"
+  },
+  "scripts": {
+    "check:types": "tsc --noEmit",
+    "get-binaries": "npx playwright install"
+  }
+}
--- a/extractors/startupjobs/src/manifest.ts
+++ b/extractors/startupjobs/src/manifest.ts
@ -0,0 +1,89 @@
+import { resolveSearchCities } from "@shared/search-cities.js";
+import type {
+  ExtractorManifest,
+  ExtractorProgressEvent,
+} from "@shared/types/extractors";
+import { runStartupJobs } from "./run";
+
+function toProgress(event: {
+  type: string;
+  termIndex: number;
+  termTotal: number;
+  searchTerm: string;
+  location?: string;
+  jobsFoundTerm?: number;
+}): ExtractorProgressEvent {
+  const scope = event.location
+    ? `${event.searchTerm} @ ${event.location}`
+    : event.searchTerm;
+
+  if (event.type === "term_start") {
+    return {
+      phase: "list",
+      termsProcessed: Math.max(event.termIndex - 1, 0),
+      termsTotal: event.termTotal,
+      currentUrl: scope,
+      detail: `startup.jobs: term ${event.termIndex}/${event.termTotal} (${scope})`,
+    };
+  }
+
+  return {
+    phase: "list",
+    termsProcessed: event.termIndex,
+    termsTotal: event.termTotal,
+    currentUrl: scope,
+    jobPagesProcessed: event.jobsFoundTerm ?? 0,
+    jobPagesEnqueued: event.jobsFoundTerm ?? 0,
+    detail: `startup.jobs: completed ${event.termIndex}/${event.termTotal} (${scope}) with ${event.jobsFoundTerm ?? 0} jobs`,
+  };
+}
+
+export const manifest: ExtractorManifest = {
+  id: "startupjobs",
+  displayName: "startup.jobs",
+  providesSources: ["startupjobs"],
+  async run(context) {
+    if (context.shouldCancel?.()) {
+      return { success: true, jobs: [] };
+    }
+
+    const parsedMaxJobsPerTerm = context.settings.startupjobsMaxJobsPerTerm
+      ? Number.parseInt(context.settings.startupjobsMaxJobsPerTerm, 10)
+      : context.settings.jobspyResultsWanted
+        ? Number.parseInt(context.settings.jobspyResultsWanted, 10)
+        : Number.NaN;
+    const maxJobsPerTerm = Number.isFinite(parsedMaxJobsPerTerm)
+      ? Math.max(1, parsedMaxJobsPerTerm)
+      : 50;
+
+    const result = await runStartupJobs({
+      selectedCountry: context.selectedCountry,
+      searchTerms: context.searchTerms,
+      locations: resolveSearchCities({
+        single:
+          context.settings.searchCities ?? context.settings.jobspyLocation,
+      }),
+      maxJobsPerTerm,
+      shouldCancel: context.shouldCancel,
+      onProgress: (event) => {
+        if (context.shouldCancel?.()) return;
+        context.onProgress?.(toProgress(event));
+      },
+    });
+
+    if (!result.success) {
+      return {
+        success: false,
+        jobs: [],
+        error: result.error,
+      };
+    }
+
+    return {
+      success: true,
+      jobs: result.jobs,
+    };
+  },
+};
+
+export default manifest;
--- a/extractors/startupjobs/src/run.ts
+++ b/extractors/startupjobs/src/run.ts
@ -0,0 +1,198 @@
+import {
+  formatCountryLabel,
+  normalizeCountryKey,
+} from "@shared/location-support.js";
+import { resolveSearchCities } from "@shared/search-cities.js";
+import type { CreateJobInput } from "@shared/types/jobs";
+import {
+  type StartupJobRecord,
+  scrapeStartupJobsViaAlgolia,
+} from "startup-jobs-scraper";
+
+export type StartupJobsProgressEvent =
+  | {
+      type: "term_start";
+      termIndex: number;
+      termTotal: number;
+      searchTerm: string;
+      location?: string;
+    }
+  | {
+      type: "term_complete";
+      termIndex: number;
+      termTotal: number;
+      searchTerm: string;
+      location?: string;
+      jobsFoundTerm: number;
+    };
+
+export interface RunStartupJobsOptions {
+  searchTerms?: string[];
+  selectedCountry?: string;
+  locations?: string[];
+  maxJobsPerTerm?: number;
+  onProgress?: (event: StartupJobsProgressEvent) => void;
+  shouldCancel?: () => boolean;
+}
+
+export interface StartupJobsResult {
+  success: boolean;
+  jobs: CreateJobInput[];
+  error?: string;
+}
+
+function toPositiveIntOrFallback(
+  value: number | string | undefined,
+  fallback: number,
+): number {
+  const parsed =
+    typeof value === "number"
+      ? value
+      : typeof value === "string"
+        ? Number.parseInt(value, 10)
+        : Number.NaN;
+
+  if (!Number.isFinite(parsed)) return fallback;
+  return Math.max(1, Math.floor(parsed));
+}
+
+function inferJobType(disciplines: string | undefined): string | undefined {
+  if (!disciplines) return undefined;
+  const segments = disciplines
+    .split("|")
+    .map((value) => value.trim())
+    .filter(Boolean);
+  return segments.length > 1 ? segments[segments.length - 1] : undefined;
+}
+
+function mapStartupJob(row: StartupJobRecord): CreateJobInput | null {
+  if (!row.jobUrl) return null;
+
+  return {
+    source: "startupjobs",
+    title: row.title || "Unknown Title",
+    employer: row.employer || "Unknown Employer",
+    employerUrl: row.employerUrl || undefined,
+    jobUrl: row.jobUrl,
+    applicationLink: row.applicationLink || row.jobUrl,
+    disciplines: row.disciplines || undefined,
+    deadline: row.deadline || undefined,
+    salary: row.salary || undefined,
+    location: row.location || undefined,
+    degreeRequired: row.degreeRequired || undefined,
+    starting: row.starting || undefined,
+    jobDescription: row.jobDescription || undefined,
+    jobType: inferJobType(row.disciplines),
+    isRemote: row.location?.toLowerCase().includes("remote") ?? undefined,
+  };
+}
+
+function resolveRunLocations(args: {
+  selectedCountry?: string;
+  locations?: string[];
+}): Array<string | null> {
+  const locations = resolveSearchCities({
+    list: args.locations,
+  });
+
+  const normalizedLocations = locations
+    .map((location) => normalizeCountryKey(location))
+    .filter((location) => location !== "worldwide" && location !== "usa/ca");
+
+  if (normalizedLocations.length > 0) {
+    return normalizedLocations.map((location) => formatCountryLabel(location));
+  }
+
+  const countryKey = normalizeCountryKey(args.selectedCountry);
+  if (!countryKey || countryKey === "worldwide" || countryKey === "usa/ca") {
+    return [null];
+  }
+
+  return [formatCountryLabel(countryKey)];
+}
+
+export async function runStartupJobs(
+  options: RunStartupJobsOptions = {},
+): Promise<StartupJobsResult> {
+  const searchTerms =
+    options.searchTerms && options.searchTerms.length > 0
+      ? options.searchTerms
+      : ["software engineer"];
+  const runLocations = resolveRunLocations({
+    selectedCountry: options.selectedCountry,
+    locations: options.locations,
+  });
+  const maxJobsPerTerm = toPositiveIntOrFallback(options.maxJobsPerTerm, 50);
+  const termTotal = searchTerms.length * runLocations.length;
+  const jobs: CreateJobInput[] = [];
+  const seen = new Set<string>();
+  let runIndex = 0;
+
+  try {
+    for (const location of runLocations) {
+      for (const searchTerm of searchTerms) {
+        runIndex += 1;
+        if (options.shouldCancel?.()) {
+          return { success: true, jobs };
+        }
+
+        options.onProgress?.({
+          type: "term_start",
+          termIndex: runIndex,
+          termTotal,
+          searchTerm,
+          location: location ?? undefined,
+        });
+
+        const records = await scrapeStartupJobsViaAlgolia({
+          query: searchTerm,
+          requestedCount: maxJobsPerTerm,
+          enrichDetails: true,
+          location: location ?? undefined,
+        });
+
+        let jobsFoundTerm = 0;
+        for (const record of records) {
+          const mapped = mapStartupJob(record);
+          if (!mapped) continue;
+          const dedupeKey = mapped.jobUrl;
+          if (seen.has(dedupeKey)) continue;
+          seen.add(dedupeKey);
+          jobs.push(mapped);
+          jobsFoundTerm += 1;
+        }
+
+        options.onProgress?.({
+          type: "term_complete",
+          termIndex: runIndex,
+          termTotal,
+          searchTerm,
+          location: location ?? undefined,
+          jobsFoundTerm,
+        });
+      }
+    }
+
+    return {
+      success: true,
+      jobs,
+    };
+  } catch (error) {
+    const message =
+      error instanceof Error
+        ? error.message
+        : typeof error === "string"
+          ? error
+          : "Unexpected error while running startup.jobs extractor.";
+    const missingBrowser =
+      /playwright|browser|executable/i.test(message) &&
+      /install/i.test(message);
+    return {
+      success: false,
+      jobs: [],
+      error: missingBrowser
+        ? `${message}. Install browser binaries with 'npx playwright install'.`
+        : message,
+    };
+  }
+}
--- a/extractors/startupjobs/tests/manifest.test.ts
+++ b/extractors/startupjobs/tests/manifest.test.ts
@ -0,0 +1,38 @@
+import { beforeEach, describe, expect, it, vi } from "vitest";
+
+vi.mock("../src/run", () => ({
+  runStartupJobs: vi.fn(),
+}));
+
+describe("startupjobs manifest", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it("prefers startupjobsMaxJobsPerTerm when provided", async () => {
+    const { manifest } = await import("../src/manifest");
+    const { runStartupJobs } = await import("../src/run");
+    const runStartupJobsMock = vi.mocked(runStartupJobs);
+    runStartupJobsMock.mockResolvedValue({
+      success: true,
+      jobs: [],
+    });
+
+    await manifest.run({
+      source: "startupjobs",
+      selectedSources: ["startupjobs"],
+      settings: {
+        startupjobsMaxJobsPerTerm: "70",
+        jobspyResultsWanted: "30",
+      },
+      searchTerms: ["software engineer"],
+      selectedCountry: "united kingdom",
+    });
+
+    expect(runStartupJobsMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        maxJobsPerTerm: 70,
+      }),
+    );
+  });
+});
--- a/extractors/startupjobs/tests/run.test.ts
+++ b/extractors/startupjobs/tests/run.test.ts
@ -0,0 +1,75 @@
+import { beforeEach, describe, expect, it, vi } from "vitest";
+
+vi.mock("startup-jobs-scraper", () => ({
+  scrapeStartupJobsViaAlgolia: vi.fn(),
+}));
+
+describe("runStartupJobs", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it("falls back to the default max jobs per term when options.maxJobsPerTerm is NaN", async () => {
+    const { scrapeStartupJobsViaAlgolia } = await import(
+      "startup-jobs-scraper"
+    );
+    const scrapeMock = vi.mocked(scrapeStartupJobsViaAlgolia);
+    scrapeMock.mockResolvedValueOnce([]);
+
+    const { runStartupJobs } = await import("../src/run");
+
+    await runStartupJobs({
+      searchTerms: ["backend engineer"],
+      maxJobsPerTerm: Number.NaN,
+    });
+
+    expect(scrapeMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        requestedCount: 50,
+      }),
+    );
+  });
+
+  it("drops broad location sentinels and falls back to selectedCountry behavior", async () => {
+    const { scrapeStartupJobsViaAlgolia } = await import(
+      "startup-jobs-scraper"
+    );
+    const scrapeMock = vi.mocked(scrapeStartupJobsViaAlgolia);
+    scrapeMock.mockResolvedValueOnce([]);
+
+    const { runStartupJobs } = await import("../src/run");
+
+    await runStartupJobs({
+      searchTerms: ["platform engineer"],
+      selectedCountry: "worldwide",
+      locations: ["Worldwide"],
+    });
+
+    expect(scrapeMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        location: undefined,
+      }),
+    );
+  });
+
+  it("normalizes explicit city-country aliases before passing location to the scraper", async () => {
+    const { scrapeStartupJobsViaAlgolia } = await import(
+      "startup-jobs-scraper"
+    );
+    const scrapeMock = vi.mocked(scrapeStartupJobsViaAlgolia);
+    scrapeMock.mockResolvedValueOnce([]);
+
+    const { runStartupJobs } = await import("../src/run");
+
+    await runStartupJobs({
+      searchTerms: ["software engineer"],
+      locations: ["UK"],
+    });
+
+    expect(scrapeMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        location: "United Kingdom",
+      }),
+    );
+  });
+});
--- a/extractors/startupjobs/tsconfig.json
+++ b/extractors/startupjobs/tsconfig.json
@ -0,0 +1,16 @@
+{
+  "compilerOptions": {
+    "module": "ESNext",
+    "moduleResolution": "bundler",
+    "target": "ES2022",
+    "strict": true,
+    "noUnusedLocals": false,
+    "lib": ["ES2022", "DOM"],
+    "types": ["node"],
+    "baseUrl": ".",
+    "paths": {
+      "@shared/*": ["../../shared/src/*"]
+    }
+  },
+  "include": ["./src/**/*"]
+}
--- a/orchestrator/src/client/pages/OrchestratorPage.test.tsx
+++ b/orchestrator/src/client/pages/OrchestratorPage.test.tsx
@ -753,6 +753,7 @@ describe("OrchestratorPage", () => {
        gradcrackerMaxJobsPerTerm: 150,
        ukvisajobsMaxJobs: 150,
        adzunaMaxJobsPerTerm: 150,
+        startupjobsMaxJobsPerTerm: 150,
        jobspyCountryIndeed: "united kingdom",
        searchCities: "United Kingdom",
      });
--- a/orchestrator/src/client/pages/orchestrator/AutomaticRunTab.tsx
+++ b/orchestrator/src/client/pages/orchestrator/AutomaticRunTab.tsx
@ -182,6 +182,7 @@ export const AutomaticRunTab: React.FC<AutomaticRunTabProps> = ({

    const rememberedRunBudget =
      settings?.jobspyResultsWanted?.value ??
+      settings?.startupjobsMaxJobsPerTerm?.value ??
      settings?.adzunaMaxJobsPerTerm?.value ??
      settings?.gradcrackerMaxJobsPerTerm?.value ??
      settings?.ukvisajobsMaxJobs?.value ??
--- a/orchestrator/src/client/pages/orchestrator/automatic-run.test.ts
+++ b/orchestrator/src/client/pages/orchestrator/automatic-run.test.ts
@ -52,6 +52,17 @@ describe("automatic-run utilities", () => {
    expect(cap).toBeLessThanOrEqual(750);
  });

+  it("assigns a dedicated startupjobs max-jobs limit", () => {
+    const limits = deriveExtractorLimits({
+      budget: 120,
+      searchTerms: ["backend", "platform"],
+      sources: ["startupjobs"],
+    });
+
+    expect(limits.startupjobsMaxJobsPerTerm).toBeGreaterThan(0);
+    expect(limits.startupjobsMaxJobsPerTerm).toBeLessThanOrEqual(120);
+  });
+
  it("returns zero estimate when no search terms are provided", () => {
    const estimate = calculateAutomaticEstimate({
      values: {
@ -112,4 +123,21 @@ describe("automatic-run utilities", () => {
    expect(estimate.discovered.cap).toBeGreaterThan(0);
    expect(estimate.discovered.cap).toBeLessThanOrEqual(120);
  });
+
+  it("includes startupjobs in estimate caps using the shared term budget", () => {
+    const estimate = calculateAutomaticEstimate({
+      values: {
+        topN: 10,
+        minSuitabilityScore: 50,
+        searchTerms: ["backend", "platform"],
+        runBudget: 120,
+        country: "united kingdom",
+        cityLocations: [],
+      },
+      sources: ["startupjobs"],
+    });
+
+    expect(estimate.discovered.cap).toBeGreaterThan(0);
+    expect(estimate.discovered.cap).toBeLessThanOrEqual(120);
+  });
 });
--- a/orchestrator/src/client/pages/orchestrator/automatic-run.ts
+++ b/orchestrator/src/client/pages/orchestrator/automatic-run.ts
@ -66,6 +66,7 @@ export interface ExtractorLimits {
  gradcrackerMaxJobsPerTerm: number;
  ukvisajobsMaxJobs: number;
  adzunaMaxJobsPerTerm: number;
+  startupjobsMaxJobsPerTerm: number;
 }

 export function deriveExtractorLimits(args: {
@ -82,6 +83,7 @@ export function deriveExtractorLimits(args: {
  const includesUkVisaJobs = args.sources.includes("ukvisajobs");
  const includesAdzuna = args.sources.includes("adzuna");
  const includesHiringCafe = args.sources.includes("hiringcafe");
+  const includesStartupJobs = args.sources.includes("startupjobs");

  const weightedContributors =
    (includesIndeed ? termCount : 0) +
@ -90,7 +92,8 @@ export function deriveExtractorLimits(args: {
    (includesGradcracker ? termCount : 0) +
    (includesUkVisaJobs ? 1 : 0) +
    (includesAdzuna ? termCount : 0) +
-    (includesHiringCafe ? termCount : 0);
+    (includesHiringCafe ? termCount : 0) +
+    (includesStartupJobs ? termCount : 0);

  if (weightedContributors <= 0) {
    return {
@ -98,6 +101,7 @@ export function deriveExtractorLimits(args: {
      gradcrackerMaxJobsPerTerm: budget,
      ukvisajobsMaxJobs: budget,
      adzunaMaxJobsPerTerm: budget,
+      startupjobsMaxJobsPerTerm: budget,
    };
  }

@ -109,6 +113,7 @@ export function deriveExtractorLimits(args: {
    gradcrackerMaxJobsPerTerm: perUnit,
    ukvisajobsMaxJobs: Math.min(budget, perUnit + remainder),
    adzunaMaxJobsPerTerm: perUnit,
+    startupjobsMaxJobsPerTerm: perUnit,
  };
 }

@ -173,6 +178,7 @@ export function calculateAutomaticEstimate(args: {
  const hasGlassdoor = sources.includes("glassdoor");
  const hasAdzuna = sources.includes("adzuna");
  const hasHiringCafe = sources.includes("hiringcafe");
+  const hasStartupJobs = sources.includes("startupjobs");
  const limits = deriveExtractorLimits({
    budget: values.runBudget,
    searchTerms: values.searchTerms,
@ -191,9 +197,17 @@ export function calculateAutomaticEstimate(args: {
  const hiringCafeCap = hasHiringCafe
    ? limits.jobspyResultsWanted * termCount
    : 0;
+  const startupJobsCap = hasStartupJobs
+    ? limits.startupjobsMaxJobsPerTerm * termCount
+    : 0;

  const discoveredCap =
-    jobspyCap + gradcrackerCap + ukvisaCap + adzunaCap + hiringCafeCap;
+    jobspyCap +
+    gradcrackerCap +
+    ukvisaCap +
+    adzunaCap +
+    hiringCafeCap +
+    startupJobsCap;
  const discoveredMin = Math.round(discoveredCap * 0.35);
  const discoveredMax = Math.round(discoveredCap * 0.75);
  const processedMin = Math.min(values.topN, discoveredMin);
--- a/orchestrator/src/client/pages/orchestrator/usePipelineControls.ts
+++ b/orchestrator/src/client/pages/orchestrator/usePipelineControls.ts
@ -181,11 +181,13 @@ export function usePipelineControls(
      );
      const hasAdzuna = compatibleSources.includes("adzuna");
      const hasHiringCafe = compatibleSources.includes("hiringcafe");
+      const hasStartupJobs = compatibleSources.includes("startupjobs");
      const serializedCities = serializeCityLocationsSetting(
        values.cityLocations,
      );
      const searchCities =
-        (hasJobSpySite || hasAdzuna || hasHiringCafe) && serializedCities
+        (hasJobSpySite || hasAdzuna || hasHiringCafe || hasStartupJobs) &&
+        serializedCities
          ? serializedCities
          : formatCountryLabel(values.country);
      await api.updateSettings({
@ -194,6 +196,7 @@ export function usePipelineControls(
        gradcrackerMaxJobsPerTerm: limits.gradcrackerMaxJobsPerTerm,
        ukvisajobsMaxJobs: limits.ukvisajobsMaxJobs,
        adzunaMaxJobsPerTerm: limits.adzunaMaxJobsPerTerm,
+        startupjobsMaxJobsPerTerm: limits.startupjobsMaxJobsPerTerm,
        jobspyCountryIndeed: values.country,
        searchCities,
      });
--- a/orchestrator/src/client/pages/orchestrator/utils.test.ts
+++ b/orchestrator/src/client/pages/orchestrator/utils.test.ts
@ -17,6 +17,10 @@ describe("orchestrator utils", () => {
    expect(getEnabledSources(withoutKey)).not.toContain("adzuna");
  });

+  it("enables startupjobs without credentials", () => {
+    expect(getEnabledSources(createAppSettings())).toContain("startupjobs");
+  });
+
  it("counts processing jobs in ready and discovered tabs", () => {
    const jobs = [
      createJob({ id: "ready", status: "ready", closedAt: null }),
--- a/orchestrator/src/client/pages/orchestrator/utils.ts
+++ b/orchestrator/src/client/pages/orchestrator/utils.ts
@ -195,6 +195,10 @@ export const getEnabledSources = (
      enabled.push(source);
      continue;
    }
+    if (source === "startupjobs") {
+      enabled.push(source);
+      continue;
+    }
    if (
      source === "indeed" ||
      source === "linkedin" ||
--- a/orchestrator/src/server/config/demo-defaults.data.ts
+++ b/orchestrator/src/server/config/demo-defaults.data.ts
@ -254,6 +254,7 @@ export const DEMO_SOURCE_BASE_URLS: Record<JobSource, string> = {
  ukvisajobs: "https://www.ukvisajobs.com",
  adzuna: "https://www.adzuna.com",
  hiringcafe: "https://hiring.cafe",
+  startupjobs: "https://startup.jobs",
  manual: "https://example.com",
 };

--- a/package-lock.json
+++ b/package-lock.json
--- a/shared/src/extractors/index.ts
+++ b/shared/src/extractors/index.ts
@ -8,6 +8,7 @@ export const EXTRACTOR_SOURCE_IDS = [
  "ukvisajobs",
  "adzuna",
  "hiringcafe",
+  "startupjobs",
  "manual",
 ] as const;

@ -48,6 +49,7 @@ export const EXTRACTOR_SOURCE_METADATA: Record<
    requiresCredentials: true,
  },
  hiringcafe: { label: "Hiring Cafe", order: 70, category: "pipeline" },
+  startupjobs: { label: "startup.jobs", order: 80, category: "pipeline" },
  manual: { label: "Manual", order: 90, category: "manual" },
 };

--- a/shared/src/location-support.test.ts
+++ b/shared/src/location-support.test.ts
@ -55,6 +55,10 @@ describe("location-support", () => {
    expect(isSourceAllowedForCountry("glassdoor", "japan")).toBe(false);
    expect(isSourceAllowedForCountry("adzuna", "united states")).toBe(true);
    expect(isSourceAllowedForCountry("adzuna", "japan")).toBe(false);
+    expect(isSourceAllowedForCountry("startupjobs", "united states")).toBe(
+      true,
+    );
+    expect(isSourceAllowedForCountry("startupjobs", "worldwide")).toBe(true);
  });

  it("filters incompatible sources while preserving compatible order", () => {
@ -66,11 +70,12 @@ describe("location-support", () => {
          "glassdoor",
          "ukvisajobs",
          "adzuna",
+          "startupjobs",
          "linkedin",
        ],
        "united states",
      ),
-    ).toEqual(["indeed", "glassdoor", "adzuna", "linkedin"]);
+    ).toEqual(["indeed", "glassdoor", "adzuna", "startupjobs", "linkedin"]);
  });

  it("supports glassdoor only in explicitly supported countries", () => {
--- a/shared/src/settings-registry.ts
+++ b/shared/src/settings-registry.ts
@ -217,6 +217,19 @@ export const settingsRegistry = {
    parse: parseIntOrNull,
    serialize: serializeNullableNumber,
  },
+  startupjobsMaxJobsPerTerm: {
+    kind: "typed" as const,
+    schema: z.number().int().min(1).max(1000),
+    default: (): number =>
+      parseInt(
+        typeof process !== "undefined"
+          ? process.env.STARTUPJOBS_MAX_RESULTS || "50"
+          : "50",
+        10,
+      ),
+    parse: parseIntOrNull,
+    serialize: serializeNullableNumber,
+  },
  searchTerms: {
    kind: "typed" as const,
    schema: z.array(z.string().trim().min(1).max(200)).max(100),
--- a/shared/src/testing/factories.ts
+++ b/shared/src/testing/factories.ts
@ -153,6 +153,7 @@ export const createAppSettings = (
  ukvisajobsMaxJobs: { value: 50, default: 50, override: null },
  adzunaMaxJobsPerTerm: { value: 50, default: 50, override: null },
  gradcrackerMaxJobsPerTerm: { value: 50, default: 50, override: null },
+  startupjobsMaxJobsPerTerm: { value: 50, default: 50, override: null },
  searchTerms: {
    value: ["Software Engineer"],
    default: ["Software Engineer"],
--- a/shared/src/types/settings.ts
+++ b/shared/src/types/settings.ts
@ -152,6 +152,7 @@ export interface AppSettings {
  ukvisajobsMaxJobs: Resolved<number>;
  adzunaMaxJobsPerTerm: Resolved<number>;
  gradcrackerMaxJobsPerTerm: Resolved<number>;
+  startupjobsMaxJobsPerTerm: Resolved<number>;
  searchTerms: Resolved<string[]>;
  blockedCompanyKeywords: Resolved<string[]>;
  scoringInstructions: Resolved<string>;