Enable Glassdoor as a JobSpy source (#126)

* feat(shared): add glassdoor to job source model

* feat(jobspy): support glassdoor site in scraper and discovery

* feat(pipeline): include glassdoor in source selection and API schema

* feat(ui): add glassdoor toggle to jobspy settings and run estimates

* test/docs: cover glassdoor jobspy integration end-to-end

* fix(jobspy): make glassdoor always-on without settings toggle

* fix(jobspy): fallback glassdoor when location is country-level

* refactor(jobspy): drop direct pandas usage in wrapper

* feat(pipeline): gate glassdoor by supported countries

* fix(jobspy): restore pandas output and keep glassdoor disable copy

* fix(jobspy): map country-level glassdoor searches to city fallbacks

* feat(ui): require glassdoor city for country-level runs
This commit is contained in:
Shaheer Sarfaraz 2026-02-10 17:57:49 +00:00 committed by GitHub
parent 2c8de6c92e
commit 4e1ea28301
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
28 changed files with 530 additions and 69 deletions

View File

@ -4,7 +4,7 @@ AI-powered job discovery and application pipeline. Automatically finds jobs, sco
## Workflow
1. **Search**: Scrapes Gradcracker, Indeed, LinkedIn, and UK Visa Sponsorship jobs.
1. **Search**: Scrapes Gradcracker, Indeed, LinkedIn, Glassdoor, and UK Visa Sponsorship jobs.
2. **Score**: AI ranks jobs by suitability using the configured LLM provider (OpenRouter by default).
3. **Tailor**: Generates a custom resume summary for top-tier matches.
4. **Export**: Uses [RxResume v4](https://v4.rxresu.me) to create tailored PDFs.

View File

@ -1,6 +1,6 @@
# JobSpy Extractor (How It Works)
This is a simple walkthrough of the JobSpy extractor used for Indeed and LinkedIn.
This is a simple walkthrough of the JobSpy extractor used for Indeed, LinkedIn, and Glassdoor.
## Big picture
@ -34,7 +34,7 @@ The Node service (`orchestrator/src/server/services/jobspy.ts`) controls the run
The mapper normalizes fields like salary ranges, converts empty values to null, and keeps extra metadata (skills, company rating, remote flag, etc.) when available.
If a row is missing a valid site (`indeed` or `linkedin`) or a job URL, it gets skipped.
If a row is missing a valid site (`indeed`, `linkedin`, or `glassdoor`) or a job URL, it gets skipped.
## Notes

View File

@ -1 +1,2 @@
python-jobspy
pandas

View File

@ -3,9 +3,41 @@ import json
import os
from pathlib import Path
import pandas as pd
from jobspy import scrape_jobs
PROGRESS_PREFIX = "JOBOPS_PROGRESS "
COUNTRY_ALIASES = {
"uk": "united kingdom",
"united kingdom": "united kingdom",
"us": "united states",
"usa": "united states",
"united states": "united states",
"türkiye": "turkey",
"czech republic": "czechia",
}
GLASSDOOR_COUNTRY_TO_CITY = {
"australia": "Sydney",
"austria": "Vienna",
"belgium": "Brussels",
"brazil": "Sao Paulo",
"canada": "Toronto",
"france": "Paris",
"germany": "Berlin",
"hong kong": "Hong Kong",
"india": "Bengaluru",
"ireland": "Dublin",
"italy": "Milan",
"mexico": "Mexico City",
"netherlands": "Amsterdam",
"new zealand": "Auckland",
"singapore": "Singapore",
"spain": "Madrid",
"switzerland": "Zurich",
"united kingdom": "London",
"united states": "New York",
"vietnam": "Ho Chi Minh City",
}
def _env_str(name: str, default: str) -> str:
@ -39,6 +71,47 @@ def _parse_sites(raw: str) -> list[str]:
return [s.strip() for s in raw.split(",") if s.strip()]
def _normalize_country_token(value: str) -> str:
normalized = " ".join(value.strip().lower().split())
return COUNTRY_ALIASES.get(normalized, normalized)
def _is_country_level_location(location: str, country_indeed: str) -> bool:
if not location.strip() or not country_indeed.strip():
return False
return _normalize_country_token(location) == _normalize_country_token(country_indeed)
def _glassdoor_city_for_country(country_indeed: str, location: str) -> str | None:
country_key = _normalize_country_token(country_indeed or location)
return GLASSDOOR_COUNTRY_TO_CITY.get(country_key)
def _scrape_for_sites(
*,
sites: list[str],
search_term: str,
location: str | None,
results_wanted: int,
hours_old: int,
country_indeed: str,
linkedin_fetch_description: bool,
is_remote: bool,
) -> pd.DataFrame:
kwargs: dict[str, object] = {
"site_name": sites,
"search_term": search_term,
"results_wanted": results_wanted,
"hours_old": hours_old,
"country_indeed": country_indeed,
"linkedin_fetch_description": linkedin_fetch_description,
"is_remote": is_remote,
}
if location and location.strip():
kwargs["location"] = location
return scrape_jobs(**kwargs)
def main() -> int:
sites = _parse_sites(_env_str("JOBSPY_SITES", "indeed,linkedin"))
search_term = _env_str("JOBSPY_SEARCH_TERM", "web developer")
@ -68,16 +141,52 @@ def main() -> int:
"searchTerm": search_term,
},
)
jobs = scrape_jobs(
site_name=sites,
search_term=search_term,
location=location,
results_wanted=results_wanted,
hours_old=hours_old,
country_indeed=country_indeed,
linkedin_fetch_description=linkedin_fetch_description,
is_remote=is_remote,
)
frames: list[pd.DataFrame] = []
non_glassdoor_sites = [site for site in sites if site != "glassdoor"]
if non_glassdoor_sites:
frames.append(
_scrape_for_sites(
sites=non_glassdoor_sites,
search_term=search_term,
location=location,
results_wanted=results_wanted,
hours_old=hours_old,
country_indeed=country_indeed,
linkedin_fetch_description=linkedin_fetch_description,
is_remote=is_remote,
)
)
if "glassdoor" in sites:
glassdoor_location = location
if _is_country_level_location(location, country_indeed):
# Glassdoor works best with city-level location terms.
fallback_city = _glassdoor_city_for_country(country_indeed, location)
if fallback_city:
glassdoor_location = fallback_city
print(
"jobspy: Glassdoor location matched country; using city fallback "
f"({fallback_city})"
)
else:
print(
"jobspy: Glassdoor location matched country; keeping original location"
)
frames.append(
_scrape_for_sites(
sites=["glassdoor"],
search_term=search_term,
location=glassdoor_location,
results_wanted=results_wanted,
hours_old=hours_old,
country_indeed=country_indeed,
linkedin_fetch_description=linkedin_fetch_description,
is_remote=is_remote,
)
)
jobs = pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
print(f"Found {len(jobs)} jobs")
_emit_progress(
@ -96,7 +205,6 @@ def main() -> int:
escapechar="\\",
index=False,
)
jobs.to_json(output_json, orient="records", force_ascii=False)
print(f"Wrote CSV: {output_csv}")

View File

@ -257,13 +257,17 @@ export const OrchestratorPage: React.FC = () => {
searchTerms: values.searchTerms,
sources: compatibleSources,
});
const jobspyLocation = compatibleSources.includes("glassdoor")
? (values.glassdoorLocation ?? "").trim() ||
formatCountryLabel(values.country)
: formatCountryLabel(values.country);
await api.updateSettings({
searchTerms: values.searchTerms,
jobspyResultsWanted: limits.jobspyResultsWanted,
gradcrackerMaxJobsPerTerm: limits.gradcrackerMaxJobsPerTerm,
ukvisajobsMaxJobs: limits.ukvisajobsMaxJobs,
jobspyCountryIndeed: values.country,
jobspyLocation: formatCountryLabel(values.country),
jobspyLocation,
});
await refreshSettings();
await startPipelineRun({

View File

@ -236,6 +236,14 @@ const nullIfSameSortedList = (
defaultValue: string[],
) => (isSameSortedStringList(value, defaultValue) ? null : (value ?? null));
const withAlwaysOnGlassdoor = (
sites: string[] | null | undefined,
): string[] => {
const unique = new Set((sites ?? []).filter(Boolean));
unique.add("glassdoor");
return Array.from(unique);
};
const getDerivedSettings = (settings: AppSettings | null) => {
const profileProjects = settings?.profileProjects ?? [];
@ -289,8 +297,12 @@ const getDerivedSettings = (settings: AppSettings | null) => {
default: settings?.defaultJobspyCountryIndeed ?? "",
},
sites: {
effective: settings?.jobspySites ?? ["indeed", "linkedin"],
default: settings?.defaultJobspySites ?? ["indeed", "linkedin"],
effective: withAlwaysOnGlassdoor(
settings?.jobspySites ?? ["indeed", "linkedin", "glassdoor"],
),
default: withAlwaysOnGlassdoor(
settings?.defaultJobspySites ?? ["indeed", "linkedin", "glassdoor"],
),
},
linkedinFetchDescription: {
effective: settings?.jobspyLinkedinFetchDescription ?? true,
@ -691,7 +703,7 @@ export const SettingsPage: React.FC = () => {
jobspy.countryIndeed.default,
),
jobspySites: nullIfSameSortedList(
data.jobspySites,
withAlwaysOnGlassdoor(data.jobspySites),
jobspy.sites.default,
),
jobspyLinkedinFetchDescription: nullIfSame(

View File

@ -96,4 +96,69 @@ describe("AutomaticRunTab", () => {
),
).toBeInTheDocument();
});
it("disables glassdoor for unsupported countries with guidance copy", async () => {
const onSetPipelineSources = vi.fn();
render(
<AutomaticRunTab
open
settings={
{
searchTerms: ["backend engineer"],
jobspyCountryIndeed: "japan",
} as AppSettings
}
enabledSources={["linkedin", "glassdoor"]}
pipelineSources={["linkedin", "glassdoor"]}
onToggleSource={vi.fn()}
onSetPipelineSources={onSetPipelineSources}
isPipelineRunning={false}
onSaveAndRun={vi.fn().mockResolvedValue(undefined)}
/>,
);
await waitFor(() => {
expect(onSetPipelineSources).toHaveBeenCalledWith(["linkedin"]);
});
const glassdoorButton = screen.getByRole("button", { name: "Glassdoor" });
expect(glassdoorButton).toBeDisabled();
expect(glassdoorButton.getAttribute("title")).toContain(
"Glassdoor is not available for the selected country.",
);
});
it("disables glassdoor for supported countries until city is provided", async () => {
const onSetPipelineSources = vi.fn();
render(
<AutomaticRunTab
open
settings={
{
searchTerms: ["backend engineer"],
jobspyCountryIndeed: "united kingdom",
jobspyLocation: "United Kingdom",
} as AppSettings
}
enabledSources={["linkedin", "glassdoor"]}
pipelineSources={["linkedin", "glassdoor"]}
onToggleSource={vi.fn()}
onSetPipelineSources={onSetPipelineSources}
isPipelineRunning={false}
onSaveAndRun={vi.fn().mockResolvedValue(undefined)}
/>,
);
await waitFor(() => {
expect(onSetPipelineSources).toHaveBeenCalledWith(["linkedin"]);
});
const glassdoorButton = screen.getByRole("button", { name: "Glassdoor" });
expect(glassdoorButton).toBeDisabled();
expect(glassdoorButton.getAttribute("title")).toContain(
"Set a Glassdoor city in Advanced settings to enable Glassdoor.",
);
});
});

View File

@ -1,14 +1,13 @@
import * as PopoverPrimitive from "@radix-ui/react-popover";
import {
formatCountryLabel,
getCompatibleSourcesForCountry,
isSourceAllowedForCountry,
normalizeCountryKey,
SUPPORTED_COUNTRY_KEYS,
} from "@shared/location-support.js";
import type { AppSettings, JobSource } from "@shared/types";
import { Check, ChevronsUpDown, Loader2, Sparkles, X } from "lucide-react";
import { useEffect, useMemo, useState } from "react";
import { useCallback, useEffect, useMemo, useState } from "react";
import { useForm } from "react-hook-form";
import {
Accordion,
@ -71,12 +70,18 @@ interface AutomaticRunFormValues {
minSuitabilityScore: string;
runBudget: string;
country: string;
glassdoorLocation: string;
searchTerms: string[];
searchTermDraft: string;
}
type AutomaticPresetSelection = AutomaticPresetId | "custom";
const GLASSDOOR_COUNTRY_REASON =
"Glassdoor is not available for the selected country.";
const GLASSDOOR_LOCATION_REASON =
"Set a Glassdoor city in Advanced settings to enable Glassdoor.";
function toNumber(input: string, min: number, max: number, fallback: number) {
const parsed = Number.parseInt(input, 10);
if (Number.isNaN(parsed)) return fallback;
@ -134,6 +139,7 @@ export const AutomaticRunTab: React.FC<AutomaticRunTabProps> = ({
minSuitabilityScore: String(DEFAULT_VALUES.minSuitabilityScore),
runBudget: String(DEFAULT_VALUES.runBudget),
country: DEFAULT_VALUES.country,
glassdoorLocation: "",
searchTerms: DEFAULT_VALUES.searchTerms,
searchTermDraft: "",
},
@ -144,6 +150,7 @@ export const AutomaticRunTab: React.FC<AutomaticRunTabProps> = ({
const minScoreInput = watch("minSuitabilityScore");
const runBudgetInput = watch("runBudget");
const countryInput = watch("country");
const glassdoorLocationInput = watch("glassdoorLocation");
const searchTerms = watch("searchTerms");
const searchTermDraft = watch("searchTermDraft");
@ -164,12 +171,24 @@ export const AutomaticRunTab: React.FC<AutomaticRunTabProps> = ({
settings?.jobspyLocation ??
DEFAULT_VALUES.country,
);
const rememberedCountryKey = rememberedCountry || DEFAULT_VALUES.country;
const rememberedLocationRaw = settings?.jobspyLocation?.trim() ?? "";
const rememberedLocationNormalized = normalizeCountryKey(
rememberedLocationRaw,
);
const rememberedGlassdoorLocation =
rememberedLocationRaw &&
rememberedLocationNormalized &&
rememberedLocationNormalized !== normalizeCountryKey(rememberedCountryKey)
? rememberedLocationRaw
: "";
reset({
topN: String(topN),
minSuitabilityScore: String(minSuitabilityScore),
runBudget: String(rememberedRunBudget),
country: rememberedCountry || DEFAULT_VALUES.country,
glassdoorLocation: rememberedGlassdoorLocation,
searchTerms: settings?.searchTerms ?? DEFAULT_VALUES.searchTerms,
searchTermDraft: "",
});
@ -200,27 +219,40 @@ export const AutomaticRunTab: React.FC<AutomaticRunTabProps> = ({
),
runBudget: toNumber(runBudgetInput, 1, 1000, DEFAULT_VALUES.runBudget),
country: normalizedCountry || DEFAULT_VALUES.country,
glassdoorLocation: glassdoorLocationInput.trim() || undefined,
searchTerms,
};
}, [topNInput, minScoreInput, runBudgetInput, countryInput, searchTerms]);
}, [
topNInput,
minScoreInput,
runBudgetInput,
countryInput,
glassdoorLocationInput,
searchTerms,
]);
const isSourceAvailableForRun = useCallback(
(source: JobSource) => {
if (!isSourceAllowedForCountry(source, values.country)) return false;
if (source === "glassdoor" && !values.glassdoorLocation) return false;
return true;
},
[values.country, values.glassdoorLocation],
);
const compatibleEnabledSources = useMemo(
() =>
enabledSources.filter((source) =>
isSourceAllowedForCountry(source, values.country),
),
[enabledSources, values.country],
() => enabledSources.filter((source) => isSourceAvailableForRun(source)),
[enabledSources, isSourceAvailableForRun],
);
const compatiblePipelineSources = useMemo(
() => getCompatibleSourcesForCountry(pipelineSources, values.country),
[pipelineSources, values.country],
() => pipelineSources.filter((source) => isSourceAvailableForRun(source)),
[pipelineSources, isSourceAvailableForRun],
);
useEffect(() => {
const filtered = getCompatibleSourcesForCountry(
pipelineSources,
values.country,
const filtered = pipelineSources.filter((source) =>
isSourceAvailableForRun(source),
);
if (filtered.length === pipelineSources.length) return;
if (filtered.length > 0) {
@ -232,9 +264,9 @@ export const AutomaticRunTab: React.FC<AutomaticRunTabProps> = ({
}
}, [
compatibleEnabledSources,
isSourceAvailableForRun,
onSetPipelineSources,
pipelineSources,
values.country,
]);
const estimate = useMemo(
@ -441,6 +473,23 @@ export const AutomaticRunTab: React.FC<AutomaticRunTabProps> = ({
}
/>
</div>
<div className="space-y-2 md:col-span-3">
<Label htmlFor="glassdoor-location">Glassdoor city</Label>
<Input
id="glassdoor-location"
value={glassdoorLocationInput}
onChange={(event) =>
setValue("glassdoorLocation", event.target.value, {
shouldDirty: true,
})
}
placeholder='e.g. "London"'
/>
<p className="text-xs text-muted-foreground">
Required only for Glassdoor. Use a city (not country) to
keep results localized.
</p>
</div>
</div>
</AccordionContent>
</AccordionItem>
@ -526,12 +575,18 @@ export const AutomaticRunTab: React.FC<AutomaticRunTabProps> = ({
<CardContent className="flex flex-wrap gap-2">
<TooltipProvider>
{enabledSources.map((source) => {
const allowed = isSourceAllowedForCountry(
const countryAllowed = isSourceAllowedForCountry(
source,
values.country,
);
const allowed = isSourceAvailableForRun(source);
const selected = compatiblePipelineSources.includes(source);
const disabledReason = `${sourceLabel[source]} is available only when country is United Kingdom.`;
const disabledReason =
source === "glassdoor"
? countryAllowed
? GLASSDOOR_LOCATION_REASON
: GLASSDOOR_COUNTRY_REASON
: `${sourceLabel[source]} is available only when country is United Kingdom.`;
const button = (
<Button
@ -540,6 +595,7 @@ export const AutomaticRunTab: React.FC<AutomaticRunTabProps> = ({
size="sm"
variant={selected ? "default" : "outline"}
disabled={!allowed}
title={!allowed ? disabledReason : undefined}
onClick={() => onToggleSource(source, !selected)}
>
{sourceLabel[source]}
@ -553,9 +609,7 @@ export const AutomaticRunTab: React.FC<AutomaticRunTabProps> = ({
return (
<Tooltip key={source}>
<TooltipTrigger asChild>
<span className="inline-flex" title={disabledReason}>
{button}
</span>
<span className="inline-flex">{button}</span>
</TooltipTrigger>
<TooltipContent side="top">{disabledReason}</TooltipContent>
</Tooltip>

View File

@ -42,11 +42,11 @@ describe("automatic-run utilities", () => {
const limits = deriveExtractorLimits({
budget: 750,
searchTerms: ["a", "b", "c"],
sources: ["indeed", "linkedin", "gradcracker"],
sources: ["indeed", "linkedin", "glassdoor", "gradcracker"],
});
const cap =
2 * limits.jobspyResultsWanted * 3 + limits.gradcrackerMaxJobsPerTerm * 3;
3 * limits.jobspyResultsWanted * 3 + limits.gradcrackerMaxJobsPerTerm * 3;
expect(cap).toBeLessThanOrEqual(750);
});

View File

@ -8,6 +8,7 @@ export interface AutomaticRunValues {
searchTerms: string[];
runBudget: number;
country: string;
glassdoorLocation?: string;
}
export interface AutomaticPresetValues {
@ -71,12 +72,14 @@ export function deriveExtractorLimits(args: {
const termCount = Math.max(1, args.searchTerms.length);
const includesIndeed = args.sources.includes("indeed");
const includesLinkedIn = args.sources.includes("linkedin");
const includesGlassdoor = args.sources.includes("glassdoor");
const includesGradcracker = args.sources.includes("gradcracker");
const includesUkVisaJobs = args.sources.includes("ukvisajobs");
const weightedContributors =
(includesIndeed ? termCount : 0) +
(includesLinkedIn ? termCount : 0) +
(includesGlassdoor ? termCount : 0) +
(includesGradcracker ? termCount : 0) +
(includesUkVisaJobs ? 1 : 0);
@ -133,13 +136,16 @@ export function calculateAutomaticEstimate(args: {
const hasUkVisaJobs = sources.includes("ukvisajobs");
const hasIndeed = sources.includes("indeed");
const hasLinkedIn = sources.includes("linkedin");
const hasGlassdoor = sources.includes("glassdoor");
const limits = deriveExtractorLimits({
budget: values.runBudget,
searchTerms: values.searchTerms,
sources,
});
const jobspySitesCount = [hasIndeed, hasLinkedIn].filter(Boolean).length;
const jobspySitesCount = [hasIndeed, hasLinkedIn, hasGlassdoor].filter(
Boolean,
).length;
const jobspyCap = jobspySitesCount * limits.jobspyResultsWanted * termCount;
const gradcrackerCap = hasGradcracker
? limits.gradcrackerMaxJobsPerTerm * termCount

View File

@ -12,6 +12,7 @@ export const orderedSources: JobSource[] = [
"gradcracker",
"indeed",
"linkedin",
"glassdoor",
"ukvisajobs",
];
export const orderedFilterSources: JobSource[] = [...orderedSources, "manual"];

View File

@ -1,6 +1,10 @@
import type { AppSettings, JobListItem, JobSource } from "@shared/types";
import type { FilterTab, JobSort } from "./constants";
import { orderedFilterSources, orderedSources } from "./constants";
import {
DEFAULT_PIPELINE_SOURCES,
orderedFilterSources,
orderedSources,
} from "./constants";
const dateValue = (value: string | null) => {
if (!value) return null;
@ -159,7 +163,7 @@ export const getSourcesWithJobs = (jobs: JobListItem[]): JobSource[] => {
export const getEnabledSources = (
settings: AppSettings | null,
): JobSource[] => {
if (!settings) return [...orderedSources];
if (!settings) return [...DEFAULT_PIPELINE_SOURCES, "glassdoor"];
const enabled: JobSource[] = [];
const jobspySites = settings.jobspySites ?? [];
@ -176,10 +180,16 @@ export const getEnabledSources = (
if (hasUkVisaJobsAuth) enabled.push(source);
continue;
}
if (source === "indeed" || source === "linkedin") {
if (jobspySites.includes(source)) enabled.push(source);
if (
source === "indeed" ||
source === "linkedin" ||
source === "glassdoor"
) {
if (source === "glassdoor" || jobspySites.includes(source)) {
enabled.push(source);
}
}
}
return enabled.length > 0 ? enabled : [...orderedSources];
return enabled.length > 0 ? enabled : [...DEFAULT_PIPELINE_SOURCES];
};

View File

@ -8,7 +8,7 @@ import { JobspySection } from "./JobspySection";
const JobspyHarness = () => {
const methods = useForm<UpdateSettingsInput>({
defaultValues: {
jobspySites: ["indeed", "linkedin"],
jobspySites: ["indeed", "linkedin", "glassdoor"],
jobspyLocation: "UK",
jobspyResultsWanted: 200,
jobspyHoursOld: 72,
@ -24,8 +24,8 @@ const JobspyHarness = () => {
<JobspySection
values={{
sites: {
default: ["indeed", "linkedin"],
effective: ["indeed", "linkedin"],
default: ["indeed", "linkedin", "glassdoor"],
effective: ["indeed", "linkedin", "glassdoor"],
},
location: { default: "UK", effective: "UK" },
resultsWanted: { default: 200, effective: 200 },
@ -51,6 +51,7 @@ describe("JobspySection", () => {
expect(indeedCheckbox).toBeChecked();
expect(linkedinCheckbox).toBeChecked();
expect(screen.queryByLabelText(/glassdoor/i)).not.toBeInTheDocument();
fireEvent.click(indeedCheckbox);
expect(indeedCheckbox).not.toBeChecked();

View File

@ -43,6 +43,12 @@ export const JobspySection: React.FC<JobspySectionProps> = ({
linkedinFetchDescription,
isRemote,
} = values;
const configurableDefaultSites = sites.default.filter(
(site) => site !== "glassdoor",
);
const configurableEffectiveSites = sites.effective.filter(
(site) => site !== "glassdoor",
);
const {
control,
register,
@ -130,13 +136,13 @@ export const JobspySection: React.FC<JobspySectionProps> = ({
</p>
)}
<div className="text-xs text-muted-foreground">
Select which sites JobSpy should scrape.
Select configurable sites JobSpy should scrape.
</div>
<div className="flex gap-2 text-xs text-muted-foreground">
<span>
Effective: {(sites.effective || []).join(", ") || "None"}
Effective: {configurableEffectiveSites.join(", ") || "None"}
</span>
<span>Default: {(sites.default || []).join(", ")}</span>
<span>Default: {configurableDefaultSites.join(", ")}</span>
</div>
</div>

View File

@ -138,6 +138,7 @@ export const sourceLabel: Record<Job["source"], string> = {
gradcracker: "Gradcracker",
indeed: "Indeed",
linkedin: "LinkedIn",
glassdoor: "Glassdoor",
ukvisajobs: "UK Visa Jobs",
manual: "Manual",
};

View File

@ -44,6 +44,17 @@ describe.sequential("Pipeline API routes", () => {
topN: 5,
sources: ["gradcracker"],
});
const glassdoorRunRes = await fetch(`${baseUrl}/api/pipeline/run`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ sources: ["glassdoor"] }),
});
const glassdoorRunBody = await glassdoorRunRes.json();
expect(glassdoorRunBody.ok).toBe(true);
expect(runPipeline).toHaveBeenNthCalledWith(2, {
sources: ["glassdoor"],
});
});
it("returns conflict when cancelling with no active pipeline", async () => {

View File

@ -98,7 +98,9 @@ const runPipelineSchema = z.object({
topN: z.number().min(1).max(50).optional(),
minSuitabilityScore: z.number().min(0).max(100).optional(),
sources: z
.array(z.enum(["gradcracker", "indeed", "linkedin", "ukvisajobs"]))
.array(
z.enum(["gradcracker", "indeed", "linkedin", "glassdoor", "ukvisajobs"]),
)
.min(1)
.optional(),
});

View File

@ -28,7 +28,7 @@ export const DEMO_DEFAULT_SETTINGS: DemoDefaultSettings = {
jobspyResultsWanted: "25",
jobspyHoursOld: "72",
jobspyCountryIndeed: "US",
jobspySites: JSON.stringify(["linkedin", "indeed"]),
jobspySites: JSON.stringify(["linkedin", "indeed", "glassdoor"]),
jobspyLinkedinFetchDescription: "1",
jobspyIsRemote: "0",
resumeProjects: JSON.stringify({
@ -253,6 +253,7 @@ export const COMPANY_SUFFIXES = [
export const DEMO_SOURCE_BASE_URLS: Record<JobSource, string> = {
linkedin: "https://www.linkedin.com",
indeed: "https://www.indeed.com",
glassdoor: "https://www.glassdoor.com",
gradcracker: "https://www.gradcracker.com",
ukvisajobs: "https://www.ukvisajobs.com",
manual: "https://example.com",

View File

@ -17,7 +17,14 @@ export const jobs = sqliteTable("jobs", {
// From crawler
source: text("source", {
enum: ["gradcracker", "indeed", "linkedin", "ukvisajobs", "manual"],
enum: [
"gradcracker",
"indeed",
"linkedin",
"glassdoor",
"ukvisajobs",
"manual",
],
})
.notNull()
.default("gradcracker"),

View File

@ -37,6 +37,7 @@ import {
const DEFAULT_CONFIG: PipelineConfig = {
topN: 10,
minSuitabilityScore: 50,
// Keep Glassdoor opt-in via source picker/settings; do not enable by default.
sources: ["gradcracker", "indeed", "linkedin", "ukvisajobs"],
outputDir: join(getDataDir(), "pdfs"),
enableCrawling: true,

View File

@ -76,6 +76,92 @@ describe("discoverJobsStep", () => {
);
});
it("passes glassdoor through to JobSpy when selected", async () => {
const settingsRepo = await import("../../repositories/settings");
const jobSpy = await import("../../services/jobspy");
vi.mocked(settingsRepo.getAllSettings).mockResolvedValue({
searchTerms: JSON.stringify(["engineer"]),
jobspySites: JSON.stringify(["glassdoor"]),
} as any);
vi.mocked(jobSpy.runJobSpy).mockResolvedValue({
success: true,
jobs: [
{
source: "glassdoor",
title: "Engineer",
employer: "ACME",
jobUrl: "https://example.com/job",
},
],
} as any);
const result = await discoverJobsStep({
mergedConfig: {
...config,
sources: ["glassdoor"],
},
});
expect(result.discoveredJobs).toHaveLength(1);
expect(vi.mocked(jobSpy.runJobSpy)).toHaveBeenCalledWith(
expect.objectContaining({ sites: ["glassdoor"] }),
);
});
it("keeps glassdoor enabled even when jobspySites override omits it", async () => {
const settingsRepo = await import("../../repositories/settings");
const jobSpy = await import("../../services/jobspy");
vi.mocked(settingsRepo.getAllSettings).mockResolvedValue({
searchTerms: JSON.stringify(["engineer"]),
jobspySites: JSON.stringify(["linkedin"]),
} as any);
vi.mocked(jobSpy.runJobSpy).mockResolvedValue({
success: true,
jobs: [],
} as any);
await discoverJobsStep({
mergedConfig: {
...config,
sources: ["glassdoor", "linkedin"],
},
});
expect(vi.mocked(jobSpy.runJobSpy)).toHaveBeenCalledWith(
expect.objectContaining({ sites: ["glassdoor", "linkedin"] }),
);
});
it("filters out glassdoor for unsupported countries", async () => {
const settingsRepo = await import("../../repositories/settings");
const jobSpy = await import("../../services/jobspy");
vi.mocked(settingsRepo.getAllSettings).mockResolvedValue({
searchTerms: JSON.stringify(["engineer"]),
jobspyCountryIndeed: "japan",
} as any);
vi.mocked(jobSpy.runJobSpy).mockResolvedValue({
success: true,
jobs: [],
} as any);
await discoverJobsStep({
mergedConfig: {
...config,
sources: ["glassdoor", "linkedin"],
},
});
expect(vi.mocked(jobSpy.runJobSpy)).toHaveBeenCalledWith(
expect.objectContaining({ sites: ["linkedin"] }),
);
});
it("throws when all enabled sources fail", async () => {
const settingsRepo = await import("../../repositories/settings");
const ukVisa = await import("../../services/ukvisajobs");

View File

@ -67,8 +67,8 @@ export async function discoverJobsStep(args: {
}
let jobSpySites = compatibleSources.filter(
(source): source is "indeed" | "linkedin" =>
source === "indeed" || source === "linkedin",
(source): source is "indeed" | "linkedin" | "glassdoor" =>
source === "indeed" || source === "linkedin" || source === "glassdoor",
);
const jobspySitesSettingRaw = settings.jobspySites;
@ -76,7 +76,9 @@ export async function discoverJobsStep(args: {
try {
const allowed = JSON.parse(jobspySitesSettingRaw);
if (Array.isArray(allowed)) {
jobSpySites = jobSpySites.filter((site) => allowed.includes(site));
jobSpySites = jobSpySites.filter(
(site) => site === "glassdoor" || allowed.includes(site),
);
}
} catch {
// ignore JSON parse error

View File

@ -105,6 +105,7 @@ function toJobSource(site: unknown): JobSource | null {
if (raw === "gradcracker") return "gradcracker";
if (raw === "indeed") return "indeed";
if (raw === "linkedin") return "linkedin";
if (raw === "glassdoor") return "glassdoor";
return null;
}
@ -164,8 +165,8 @@ export async function runJobSpy(
const outputDir = join(dataDir, "imports");
await mkdir(outputDir, { recursive: true });
const sites = (options.sites ?? ["indeed", "linkedin"])
.filter((s) => s === "indeed" || s === "linkedin")
const sites = (options.sites ?? ["indeed", "linkedin", "glassdoor"])
.filter((s) => s === "indeed" || s === "linkedin" || s === "glassdoor")
.join(",");
const searchTerms = resolveSearchTerms(options);
@ -191,7 +192,7 @@ export async function runJobSpy(
stdio: ["ignore", "pipe", "pipe"],
env: {
...process.env,
JOBSPY_SITES: sites || "indeed,linkedin",
JOBSPY_SITES: sites || "indeed,linkedin,glassdoor",
JOBSPY_SEARCH_TERM: searchTerm,
JOBSPY_TERM_INDEX: String(i + 1),
JOBSPY_TERM_TOTAL: String(searchTerms.length),

View File

@ -79,6 +79,26 @@ describe("settings-conversion", () => {
expect(malformedOverride.value).toEqual(["web developer"]);
});
it("always includes glassdoor in resolved jobspySites", () => {
delete process.env.JOBSPY_SITES;
expect(resolveSettingValue("jobspySites", undefined).value).toEqual([
"indeed",
"linkedin",
"glassdoor",
]);
process.env.JOBSPY_SITES = "indeed,linkedin";
expect(resolveSettingValue("jobspySites", undefined).value).toEqual([
"indeed",
"linkedin",
"glassdoor",
]);
expect(
resolveSettingValue("jobspySites", JSON.stringify(["linkedin"])).value,
).toEqual(["linkedin", "glassdoor"]);
});
it("round-trips penalizeMissingSalary boolean setting", () => {
expect(serializeSettingValue("penalizeMissingSalary", true)).toBe("1");
expect(serializeSettingValue("penalizeMissingSalary", false)).toBe("0");

View File

@ -57,6 +57,24 @@ function parseJsonArrayOrNull(raw: string | undefined): string[] | null {
}
}
function normalizeJobspySites(value: string[]): string[] {
const seen = new Set<string>();
const normalized: string[] = [];
for (const site of value) {
const trimmed = site.trim();
if (!trimmed || seen.has(trimmed)) continue;
seen.add(trimmed);
normalized.push(trimmed);
}
if (!seen.has("glassdoor")) {
normalized.push("glassdoor");
}
return normalized;
}
function parseBitBoolOrNull(raw: string | undefined): boolean | null {
if (!raw) return null;
return raw === "true" || raw === "1";
@ -143,13 +161,13 @@ export const settingsConversionMetadata: SettingsConversionMetadata = {
},
jobspySites: {
defaultValue: () =>
(process.env.JOBSPY_SITES || "indeed,linkedin")
.split(",")
.map((value) => value.trim())
.filter(Boolean),
normalizeJobspySites(
(process.env.JOBSPY_SITES || "indeed,linkedin,glassdoor").split(","),
),
parseOverride: parseJsonArrayOrNull,
serialize: serializeNullableJsonArray,
resolve: resolveWithNullishFallback,
resolve: ({ defaultValue, overrideValue }) =>
normalizeJobspySites(overrideValue ?? defaultValue),
},
jobspyLinkedinFetchDescription: {
defaultValue: () =>

View File

@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest";
import {
formatCountryLabel,
getCompatibleSourcesForCountry,
isGlassdoorCountry,
isSourceAllowedForCountry,
isUkCountry,
normalizeCountryKey,
@ -49,14 +50,24 @@ describe("location-support", () => {
expect(isSourceAllowedForCountry("ukvisajobs", "worldwide")).toBe(false);
expect(isSourceAllowedForCountry("indeed", "united states")).toBe(true);
expect(isSourceAllowedForCountry("linkedin", "worldwide")).toBe(true);
expect(isSourceAllowedForCountry("glassdoor", "united states")).toBe(true);
expect(isSourceAllowedForCountry("glassdoor", "japan")).toBe(false);
});
it("filters incompatible sources while preserving compatible order", () => {
expect(
getCompatibleSourcesForCountry(
["gradcracker", "indeed", "ukvisajobs", "linkedin"],
["gradcracker", "indeed", "glassdoor", "ukvisajobs", "linkedin"],
"united states",
),
).toEqual(["indeed", "linkedin"]);
).toEqual(["indeed", "glassdoor", "linkedin"]);
});
it("supports glassdoor only in explicitly supported countries", () => {
expect(isGlassdoorCountry("united kingdom")).toBe(true);
expect(isGlassdoorCountry("uk")).toBe(true);
expect(isGlassdoorCountry("usa")).toBe(true);
expect(isGlassdoorCountry("japan")).toBe(false);
expect(isGlassdoorCountry("worldwide")).toBe(false);
});
});

View File

@ -100,6 +100,30 @@ export const SUPPORTED_COUNTRY_INPUTS = [
] as const;
const UK_ONLY_SOURCES = new Set<JobSource>(["gradcracker", "ukvisajobs"]);
const GLASSDOOR_SUPPORTED_COUNTRIES = new Set(
[
"australia",
"austria",
"belgium",
"brazil",
"canada",
"france",
"germany",
"hong kong",
"india",
"ireland",
"italy",
"mexico",
"netherlands",
"new zealand",
"singapore",
"spain",
"switzerland",
"united kingdom",
"united states",
"vietnam",
].map((country) => normalizeCountryKey(country)),
);
export function normalizeCountryKey(value: string | null | undefined): string {
const normalized = value?.trim().toLowerCase() ?? "";
@ -125,12 +149,19 @@ export function isUkCountry(country: string | null | undefined): boolean {
return normalizeCountryKey(country) === "united kingdom";
}
export function isGlassdoorCountry(
country: string | null | undefined,
): boolean {
return GLASSDOOR_SUPPORTED_COUNTRIES.has(normalizeCountryKey(country));
}
export function isSourceAllowedForCountry(
source: JobSource,
country: string | null | undefined,
): boolean {
if (!UK_ONLY_SOURCES.has(source)) return true;
return isUkCountry(country);
if (UK_ONLY_SOURCES.has(source)) return isUkCountry(country);
if (source === "glassdoor") return isGlassdoorCountry(country);
return true;
}
export function getCompatibleSourcesForCountry(

View File

@ -122,6 +122,7 @@ export type JobSource =
| "gradcracker"
| "indeed"
| "linkedin"
| "glassdoor"
| "ukvisajobs"
| "manual";