Enable Glassdoor as a JobSpy source (#126)
* feat(shared): add glassdoor to job source model * feat(jobspy): support glassdoor site in scraper and discovery * feat(pipeline): include glassdoor in source selection and API schema * feat(ui): add glassdoor toggle to jobspy settings and run estimates * test/docs: cover glassdoor jobspy integration end-to-end * fix(jobspy): make glassdoor always-on without settings toggle * fix(jobspy): fallback glassdoor when location is country-level * refactor(jobspy): drop direct pandas usage in wrapper * feat(pipeline): gate glassdoor by supported countries * fix(jobspy): restore pandas output and keep glassdoor disable copy * fix(jobspy): map country-level glassdoor searches to city fallbacks * feat(ui): require glassdoor city for country-level runs
This commit is contained in:
parent
2c8de6c92e
commit
4e1ea28301
@ -4,7 +4,7 @@ AI-powered job discovery and application pipeline. Automatically finds jobs, sco
|
|||||||
|
|
||||||
## Workflow
|
## Workflow
|
||||||
|
|
||||||
1. **Search**: Scrapes Gradcracker, Indeed, LinkedIn, and UK Visa Sponsorship jobs.
|
1. **Search**: Scrapes Gradcracker, Indeed, LinkedIn, Glassdoor, and UK Visa Sponsorship jobs.
|
||||||
2. **Score**: AI ranks jobs by suitability using the configured LLM provider (OpenRouter by default).
|
2. **Score**: AI ranks jobs by suitability using the configured LLM provider (OpenRouter by default).
|
||||||
3. **Tailor**: Generates a custom resume summary for top-tier matches.
|
3. **Tailor**: Generates a custom resume summary for top-tier matches.
|
||||||
4. **Export**: Uses [RxResume v4](https://v4.rxresu.me) to create tailored PDFs.
|
4. **Export**: Uses [RxResume v4](https://v4.rxresu.me) to create tailored PDFs.
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
# JobSpy Extractor (How It Works)
|
# JobSpy Extractor (How It Works)
|
||||||
|
|
||||||
This is a simple walkthrough of the JobSpy extractor used for Indeed and LinkedIn.
|
This is a simple walkthrough of the JobSpy extractor used for Indeed, LinkedIn, and Glassdoor.
|
||||||
|
|
||||||
## Big picture
|
## Big picture
|
||||||
|
|
||||||
@ -34,7 +34,7 @@ The Node service (`orchestrator/src/server/services/jobspy.ts`) controls the run
|
|||||||
|
|
||||||
The mapper normalizes fields like salary ranges, converts empty values to null, and keeps extra metadata (skills, company rating, remote flag, etc.) when available.
|
The mapper normalizes fields like salary ranges, converts empty values to null, and keeps extra metadata (skills, company rating, remote flag, etc.) when available.
|
||||||
|
|
||||||
If a row is missing a valid site (`indeed` or `linkedin`) or a job URL, it gets skipped.
|
If a row is missing a valid site (`indeed`, `linkedin`, or `glassdoor`) or a job URL, it gets skipped.
|
||||||
|
|
||||||
## Notes
|
## Notes
|
||||||
|
|
||||||
|
|||||||
@ -1 +1,2 @@
|
|||||||
python-jobspy
|
python-jobspy
|
||||||
|
pandas
|
||||||
|
|||||||
@ -3,9 +3,41 @@ import json
|
|||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
from jobspy import scrape_jobs
|
from jobspy import scrape_jobs
|
||||||
|
|
||||||
PROGRESS_PREFIX = "JOBOPS_PROGRESS "
|
PROGRESS_PREFIX = "JOBOPS_PROGRESS "
|
||||||
|
COUNTRY_ALIASES = {
|
||||||
|
"uk": "united kingdom",
|
||||||
|
"united kingdom": "united kingdom",
|
||||||
|
"us": "united states",
|
||||||
|
"usa": "united states",
|
||||||
|
"united states": "united states",
|
||||||
|
"türkiye": "turkey",
|
||||||
|
"czech republic": "czechia",
|
||||||
|
}
|
||||||
|
GLASSDOOR_COUNTRY_TO_CITY = {
|
||||||
|
"australia": "Sydney",
|
||||||
|
"austria": "Vienna",
|
||||||
|
"belgium": "Brussels",
|
||||||
|
"brazil": "Sao Paulo",
|
||||||
|
"canada": "Toronto",
|
||||||
|
"france": "Paris",
|
||||||
|
"germany": "Berlin",
|
||||||
|
"hong kong": "Hong Kong",
|
||||||
|
"india": "Bengaluru",
|
||||||
|
"ireland": "Dublin",
|
||||||
|
"italy": "Milan",
|
||||||
|
"mexico": "Mexico City",
|
||||||
|
"netherlands": "Amsterdam",
|
||||||
|
"new zealand": "Auckland",
|
||||||
|
"singapore": "Singapore",
|
||||||
|
"spain": "Madrid",
|
||||||
|
"switzerland": "Zurich",
|
||||||
|
"united kingdom": "London",
|
||||||
|
"united states": "New York",
|
||||||
|
"vietnam": "Ho Chi Minh City",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def _env_str(name: str, default: str) -> str:
|
def _env_str(name: str, default: str) -> str:
|
||||||
@ -39,6 +71,47 @@ def _parse_sites(raw: str) -> list[str]:
|
|||||||
return [s.strip() for s in raw.split(",") if s.strip()]
|
return [s.strip() for s in raw.split(",") if s.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_country_token(value: str) -> str:
|
||||||
|
normalized = " ".join(value.strip().lower().split())
|
||||||
|
return COUNTRY_ALIASES.get(normalized, normalized)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_country_level_location(location: str, country_indeed: str) -> bool:
|
||||||
|
if not location.strip() or not country_indeed.strip():
|
||||||
|
return False
|
||||||
|
return _normalize_country_token(location) == _normalize_country_token(country_indeed)
|
||||||
|
|
||||||
|
|
||||||
|
def _glassdoor_city_for_country(country_indeed: str, location: str) -> str | None:
|
||||||
|
country_key = _normalize_country_token(country_indeed or location)
|
||||||
|
return GLASSDOOR_COUNTRY_TO_CITY.get(country_key)
|
||||||
|
|
||||||
|
|
||||||
|
def _scrape_for_sites(
|
||||||
|
*,
|
||||||
|
sites: list[str],
|
||||||
|
search_term: str,
|
||||||
|
location: str | None,
|
||||||
|
results_wanted: int,
|
||||||
|
hours_old: int,
|
||||||
|
country_indeed: str,
|
||||||
|
linkedin_fetch_description: bool,
|
||||||
|
is_remote: bool,
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
kwargs: dict[str, object] = {
|
||||||
|
"site_name": sites,
|
||||||
|
"search_term": search_term,
|
||||||
|
"results_wanted": results_wanted,
|
||||||
|
"hours_old": hours_old,
|
||||||
|
"country_indeed": country_indeed,
|
||||||
|
"linkedin_fetch_description": linkedin_fetch_description,
|
||||||
|
"is_remote": is_remote,
|
||||||
|
}
|
||||||
|
if location and location.strip():
|
||||||
|
kwargs["location"] = location
|
||||||
|
return scrape_jobs(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
def main() -> int:
|
||||||
sites = _parse_sites(_env_str("JOBSPY_SITES", "indeed,linkedin"))
|
sites = _parse_sites(_env_str("JOBSPY_SITES", "indeed,linkedin"))
|
||||||
search_term = _env_str("JOBSPY_SEARCH_TERM", "web developer")
|
search_term = _env_str("JOBSPY_SEARCH_TERM", "web developer")
|
||||||
@ -68,16 +141,52 @@ def main() -> int:
|
|||||||
"searchTerm": search_term,
|
"searchTerm": search_term,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
jobs = scrape_jobs(
|
frames: list[pd.DataFrame] = []
|
||||||
site_name=sites,
|
non_glassdoor_sites = [site for site in sites if site != "glassdoor"]
|
||||||
search_term=search_term,
|
|
||||||
location=location,
|
if non_glassdoor_sites:
|
||||||
results_wanted=results_wanted,
|
frames.append(
|
||||||
hours_old=hours_old,
|
_scrape_for_sites(
|
||||||
country_indeed=country_indeed,
|
sites=non_glassdoor_sites,
|
||||||
linkedin_fetch_description=linkedin_fetch_description,
|
search_term=search_term,
|
||||||
is_remote=is_remote,
|
location=location,
|
||||||
)
|
results_wanted=results_wanted,
|
||||||
|
hours_old=hours_old,
|
||||||
|
country_indeed=country_indeed,
|
||||||
|
linkedin_fetch_description=linkedin_fetch_description,
|
||||||
|
is_remote=is_remote,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if "glassdoor" in sites:
|
||||||
|
glassdoor_location = location
|
||||||
|
if _is_country_level_location(location, country_indeed):
|
||||||
|
# Glassdoor works best with city-level location terms.
|
||||||
|
fallback_city = _glassdoor_city_for_country(country_indeed, location)
|
||||||
|
if fallback_city:
|
||||||
|
glassdoor_location = fallback_city
|
||||||
|
print(
|
||||||
|
"jobspy: Glassdoor location matched country; using city fallback "
|
||||||
|
f"({fallback_city})"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
"jobspy: Glassdoor location matched country; keeping original location"
|
||||||
|
)
|
||||||
|
frames.append(
|
||||||
|
_scrape_for_sites(
|
||||||
|
sites=["glassdoor"],
|
||||||
|
search_term=search_term,
|
||||||
|
location=glassdoor_location,
|
||||||
|
results_wanted=results_wanted,
|
||||||
|
hours_old=hours_old,
|
||||||
|
country_indeed=country_indeed,
|
||||||
|
linkedin_fetch_description=linkedin_fetch_description,
|
||||||
|
is_remote=is_remote,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
jobs = pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
|
||||||
|
|
||||||
print(f"Found {len(jobs)} jobs")
|
print(f"Found {len(jobs)} jobs")
|
||||||
_emit_progress(
|
_emit_progress(
|
||||||
@ -96,7 +205,6 @@ def main() -> int:
|
|||||||
escapechar="\\",
|
escapechar="\\",
|
||||||
index=False,
|
index=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
jobs.to_json(output_json, orient="records", force_ascii=False)
|
jobs.to_json(output_json, orient="records", force_ascii=False)
|
||||||
|
|
||||||
print(f"Wrote CSV: {output_csv}")
|
print(f"Wrote CSV: {output_csv}")
|
||||||
|
|||||||
@ -257,13 +257,17 @@ export const OrchestratorPage: React.FC = () => {
|
|||||||
searchTerms: values.searchTerms,
|
searchTerms: values.searchTerms,
|
||||||
sources: compatibleSources,
|
sources: compatibleSources,
|
||||||
});
|
});
|
||||||
|
const jobspyLocation = compatibleSources.includes("glassdoor")
|
||||||
|
? (values.glassdoorLocation ?? "").trim() ||
|
||||||
|
formatCountryLabel(values.country)
|
||||||
|
: formatCountryLabel(values.country);
|
||||||
await api.updateSettings({
|
await api.updateSettings({
|
||||||
searchTerms: values.searchTerms,
|
searchTerms: values.searchTerms,
|
||||||
jobspyResultsWanted: limits.jobspyResultsWanted,
|
jobspyResultsWanted: limits.jobspyResultsWanted,
|
||||||
gradcrackerMaxJobsPerTerm: limits.gradcrackerMaxJobsPerTerm,
|
gradcrackerMaxJobsPerTerm: limits.gradcrackerMaxJobsPerTerm,
|
||||||
ukvisajobsMaxJobs: limits.ukvisajobsMaxJobs,
|
ukvisajobsMaxJobs: limits.ukvisajobsMaxJobs,
|
||||||
jobspyCountryIndeed: values.country,
|
jobspyCountryIndeed: values.country,
|
||||||
jobspyLocation: formatCountryLabel(values.country),
|
jobspyLocation,
|
||||||
});
|
});
|
||||||
await refreshSettings();
|
await refreshSettings();
|
||||||
await startPipelineRun({
|
await startPipelineRun({
|
||||||
|
|||||||
@ -236,6 +236,14 @@ const nullIfSameSortedList = (
|
|||||||
defaultValue: string[],
|
defaultValue: string[],
|
||||||
) => (isSameSortedStringList(value, defaultValue) ? null : (value ?? null));
|
) => (isSameSortedStringList(value, defaultValue) ? null : (value ?? null));
|
||||||
|
|
||||||
|
const withAlwaysOnGlassdoor = (
|
||||||
|
sites: string[] | null | undefined,
|
||||||
|
): string[] => {
|
||||||
|
const unique = new Set((sites ?? []).filter(Boolean));
|
||||||
|
unique.add("glassdoor");
|
||||||
|
return Array.from(unique);
|
||||||
|
};
|
||||||
|
|
||||||
const getDerivedSettings = (settings: AppSettings | null) => {
|
const getDerivedSettings = (settings: AppSettings | null) => {
|
||||||
const profileProjects = settings?.profileProjects ?? [];
|
const profileProjects = settings?.profileProjects ?? [];
|
||||||
|
|
||||||
@ -289,8 +297,12 @@ const getDerivedSettings = (settings: AppSettings | null) => {
|
|||||||
default: settings?.defaultJobspyCountryIndeed ?? "",
|
default: settings?.defaultJobspyCountryIndeed ?? "",
|
||||||
},
|
},
|
||||||
sites: {
|
sites: {
|
||||||
effective: settings?.jobspySites ?? ["indeed", "linkedin"],
|
effective: withAlwaysOnGlassdoor(
|
||||||
default: settings?.defaultJobspySites ?? ["indeed", "linkedin"],
|
settings?.jobspySites ?? ["indeed", "linkedin", "glassdoor"],
|
||||||
|
),
|
||||||
|
default: withAlwaysOnGlassdoor(
|
||||||
|
settings?.defaultJobspySites ?? ["indeed", "linkedin", "glassdoor"],
|
||||||
|
),
|
||||||
},
|
},
|
||||||
linkedinFetchDescription: {
|
linkedinFetchDescription: {
|
||||||
effective: settings?.jobspyLinkedinFetchDescription ?? true,
|
effective: settings?.jobspyLinkedinFetchDescription ?? true,
|
||||||
@ -691,7 +703,7 @@ export const SettingsPage: React.FC = () => {
|
|||||||
jobspy.countryIndeed.default,
|
jobspy.countryIndeed.default,
|
||||||
),
|
),
|
||||||
jobspySites: nullIfSameSortedList(
|
jobspySites: nullIfSameSortedList(
|
||||||
data.jobspySites,
|
withAlwaysOnGlassdoor(data.jobspySites),
|
||||||
jobspy.sites.default,
|
jobspy.sites.default,
|
||||||
),
|
),
|
||||||
jobspyLinkedinFetchDescription: nullIfSame(
|
jobspyLinkedinFetchDescription: nullIfSame(
|
||||||
|
|||||||
@ -96,4 +96,69 @@ describe("AutomaticRunTab", () => {
|
|||||||
),
|
),
|
||||||
).toBeInTheDocument();
|
).toBeInTheDocument();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("disables glassdoor for unsupported countries with guidance copy", async () => {
|
||||||
|
const onSetPipelineSources = vi.fn();
|
||||||
|
|
||||||
|
render(
|
||||||
|
<AutomaticRunTab
|
||||||
|
open
|
||||||
|
settings={
|
||||||
|
{
|
||||||
|
searchTerms: ["backend engineer"],
|
||||||
|
jobspyCountryIndeed: "japan",
|
||||||
|
} as AppSettings
|
||||||
|
}
|
||||||
|
enabledSources={["linkedin", "glassdoor"]}
|
||||||
|
pipelineSources={["linkedin", "glassdoor"]}
|
||||||
|
onToggleSource={vi.fn()}
|
||||||
|
onSetPipelineSources={onSetPipelineSources}
|
||||||
|
isPipelineRunning={false}
|
||||||
|
onSaveAndRun={vi.fn().mockResolvedValue(undefined)}
|
||||||
|
/>,
|
||||||
|
);
|
||||||
|
|
||||||
|
await waitFor(() => {
|
||||||
|
expect(onSetPipelineSources).toHaveBeenCalledWith(["linkedin"]);
|
||||||
|
});
|
||||||
|
|
||||||
|
const glassdoorButton = screen.getByRole("button", { name: "Glassdoor" });
|
||||||
|
expect(glassdoorButton).toBeDisabled();
|
||||||
|
expect(glassdoorButton.getAttribute("title")).toContain(
|
||||||
|
"Glassdoor is not available for the selected country.",
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("disables glassdoor for supported countries until city is provided", async () => {
|
||||||
|
const onSetPipelineSources = vi.fn();
|
||||||
|
|
||||||
|
render(
|
||||||
|
<AutomaticRunTab
|
||||||
|
open
|
||||||
|
settings={
|
||||||
|
{
|
||||||
|
searchTerms: ["backend engineer"],
|
||||||
|
jobspyCountryIndeed: "united kingdom",
|
||||||
|
jobspyLocation: "United Kingdom",
|
||||||
|
} as AppSettings
|
||||||
|
}
|
||||||
|
enabledSources={["linkedin", "glassdoor"]}
|
||||||
|
pipelineSources={["linkedin", "glassdoor"]}
|
||||||
|
onToggleSource={vi.fn()}
|
||||||
|
onSetPipelineSources={onSetPipelineSources}
|
||||||
|
isPipelineRunning={false}
|
||||||
|
onSaveAndRun={vi.fn().mockResolvedValue(undefined)}
|
||||||
|
/>,
|
||||||
|
);
|
||||||
|
|
||||||
|
await waitFor(() => {
|
||||||
|
expect(onSetPipelineSources).toHaveBeenCalledWith(["linkedin"]);
|
||||||
|
});
|
||||||
|
|
||||||
|
const glassdoorButton = screen.getByRole("button", { name: "Glassdoor" });
|
||||||
|
expect(glassdoorButton).toBeDisabled();
|
||||||
|
expect(glassdoorButton.getAttribute("title")).toContain(
|
||||||
|
"Set a Glassdoor city in Advanced settings to enable Glassdoor.",
|
||||||
|
);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@ -1,14 +1,13 @@
|
|||||||
import * as PopoverPrimitive from "@radix-ui/react-popover";
|
import * as PopoverPrimitive from "@radix-ui/react-popover";
|
||||||
import {
|
import {
|
||||||
formatCountryLabel,
|
formatCountryLabel,
|
||||||
getCompatibleSourcesForCountry,
|
|
||||||
isSourceAllowedForCountry,
|
isSourceAllowedForCountry,
|
||||||
normalizeCountryKey,
|
normalizeCountryKey,
|
||||||
SUPPORTED_COUNTRY_KEYS,
|
SUPPORTED_COUNTRY_KEYS,
|
||||||
} from "@shared/location-support.js";
|
} from "@shared/location-support.js";
|
||||||
import type { AppSettings, JobSource } from "@shared/types";
|
import type { AppSettings, JobSource } from "@shared/types";
|
||||||
import { Check, ChevronsUpDown, Loader2, Sparkles, X } from "lucide-react";
|
import { Check, ChevronsUpDown, Loader2, Sparkles, X } from "lucide-react";
|
||||||
import { useEffect, useMemo, useState } from "react";
|
import { useCallback, useEffect, useMemo, useState } from "react";
|
||||||
import { useForm } from "react-hook-form";
|
import { useForm } from "react-hook-form";
|
||||||
import {
|
import {
|
||||||
Accordion,
|
Accordion,
|
||||||
@ -71,12 +70,18 @@ interface AutomaticRunFormValues {
|
|||||||
minSuitabilityScore: string;
|
minSuitabilityScore: string;
|
||||||
runBudget: string;
|
runBudget: string;
|
||||||
country: string;
|
country: string;
|
||||||
|
glassdoorLocation: string;
|
||||||
searchTerms: string[];
|
searchTerms: string[];
|
||||||
searchTermDraft: string;
|
searchTermDraft: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
type AutomaticPresetSelection = AutomaticPresetId | "custom";
|
type AutomaticPresetSelection = AutomaticPresetId | "custom";
|
||||||
|
|
||||||
|
const GLASSDOOR_COUNTRY_REASON =
|
||||||
|
"Glassdoor is not available for the selected country.";
|
||||||
|
const GLASSDOOR_LOCATION_REASON =
|
||||||
|
"Set a Glassdoor city in Advanced settings to enable Glassdoor.";
|
||||||
|
|
||||||
function toNumber(input: string, min: number, max: number, fallback: number) {
|
function toNumber(input: string, min: number, max: number, fallback: number) {
|
||||||
const parsed = Number.parseInt(input, 10);
|
const parsed = Number.parseInt(input, 10);
|
||||||
if (Number.isNaN(parsed)) return fallback;
|
if (Number.isNaN(parsed)) return fallback;
|
||||||
@ -134,6 +139,7 @@ export const AutomaticRunTab: React.FC<AutomaticRunTabProps> = ({
|
|||||||
minSuitabilityScore: String(DEFAULT_VALUES.minSuitabilityScore),
|
minSuitabilityScore: String(DEFAULT_VALUES.minSuitabilityScore),
|
||||||
runBudget: String(DEFAULT_VALUES.runBudget),
|
runBudget: String(DEFAULT_VALUES.runBudget),
|
||||||
country: DEFAULT_VALUES.country,
|
country: DEFAULT_VALUES.country,
|
||||||
|
glassdoorLocation: "",
|
||||||
searchTerms: DEFAULT_VALUES.searchTerms,
|
searchTerms: DEFAULT_VALUES.searchTerms,
|
||||||
searchTermDraft: "",
|
searchTermDraft: "",
|
||||||
},
|
},
|
||||||
@ -144,6 +150,7 @@ export const AutomaticRunTab: React.FC<AutomaticRunTabProps> = ({
|
|||||||
const minScoreInput = watch("minSuitabilityScore");
|
const minScoreInput = watch("minSuitabilityScore");
|
||||||
const runBudgetInput = watch("runBudget");
|
const runBudgetInput = watch("runBudget");
|
||||||
const countryInput = watch("country");
|
const countryInput = watch("country");
|
||||||
|
const glassdoorLocationInput = watch("glassdoorLocation");
|
||||||
const searchTerms = watch("searchTerms");
|
const searchTerms = watch("searchTerms");
|
||||||
const searchTermDraft = watch("searchTermDraft");
|
const searchTermDraft = watch("searchTermDraft");
|
||||||
|
|
||||||
@ -164,12 +171,24 @@ export const AutomaticRunTab: React.FC<AutomaticRunTabProps> = ({
|
|||||||
settings?.jobspyLocation ??
|
settings?.jobspyLocation ??
|
||||||
DEFAULT_VALUES.country,
|
DEFAULT_VALUES.country,
|
||||||
);
|
);
|
||||||
|
const rememberedCountryKey = rememberedCountry || DEFAULT_VALUES.country;
|
||||||
|
const rememberedLocationRaw = settings?.jobspyLocation?.trim() ?? "";
|
||||||
|
const rememberedLocationNormalized = normalizeCountryKey(
|
||||||
|
rememberedLocationRaw,
|
||||||
|
);
|
||||||
|
const rememberedGlassdoorLocation =
|
||||||
|
rememberedLocationRaw &&
|
||||||
|
rememberedLocationNormalized &&
|
||||||
|
rememberedLocationNormalized !== normalizeCountryKey(rememberedCountryKey)
|
||||||
|
? rememberedLocationRaw
|
||||||
|
: "";
|
||||||
|
|
||||||
reset({
|
reset({
|
||||||
topN: String(topN),
|
topN: String(topN),
|
||||||
minSuitabilityScore: String(minSuitabilityScore),
|
minSuitabilityScore: String(minSuitabilityScore),
|
||||||
runBudget: String(rememberedRunBudget),
|
runBudget: String(rememberedRunBudget),
|
||||||
country: rememberedCountry || DEFAULT_VALUES.country,
|
country: rememberedCountry || DEFAULT_VALUES.country,
|
||||||
|
glassdoorLocation: rememberedGlassdoorLocation,
|
||||||
searchTerms: settings?.searchTerms ?? DEFAULT_VALUES.searchTerms,
|
searchTerms: settings?.searchTerms ?? DEFAULT_VALUES.searchTerms,
|
||||||
searchTermDraft: "",
|
searchTermDraft: "",
|
||||||
});
|
});
|
||||||
@ -200,27 +219,40 @@ export const AutomaticRunTab: React.FC<AutomaticRunTabProps> = ({
|
|||||||
),
|
),
|
||||||
runBudget: toNumber(runBudgetInput, 1, 1000, DEFAULT_VALUES.runBudget),
|
runBudget: toNumber(runBudgetInput, 1, 1000, DEFAULT_VALUES.runBudget),
|
||||||
country: normalizedCountry || DEFAULT_VALUES.country,
|
country: normalizedCountry || DEFAULT_VALUES.country,
|
||||||
|
glassdoorLocation: glassdoorLocationInput.trim() || undefined,
|
||||||
searchTerms,
|
searchTerms,
|
||||||
};
|
};
|
||||||
}, [topNInput, minScoreInput, runBudgetInput, countryInput, searchTerms]);
|
}, [
|
||||||
|
topNInput,
|
||||||
|
minScoreInput,
|
||||||
|
runBudgetInput,
|
||||||
|
countryInput,
|
||||||
|
glassdoorLocationInput,
|
||||||
|
searchTerms,
|
||||||
|
]);
|
||||||
|
|
||||||
|
const isSourceAvailableForRun = useCallback(
|
||||||
|
(source: JobSource) => {
|
||||||
|
if (!isSourceAllowedForCountry(source, values.country)) return false;
|
||||||
|
if (source === "glassdoor" && !values.glassdoorLocation) return false;
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
[values.country, values.glassdoorLocation],
|
||||||
|
);
|
||||||
|
|
||||||
const compatibleEnabledSources = useMemo(
|
const compatibleEnabledSources = useMemo(
|
||||||
() =>
|
() => enabledSources.filter((source) => isSourceAvailableForRun(source)),
|
||||||
enabledSources.filter((source) =>
|
[enabledSources, isSourceAvailableForRun],
|
||||||
isSourceAllowedForCountry(source, values.country),
|
|
||||||
),
|
|
||||||
[enabledSources, values.country],
|
|
||||||
);
|
);
|
||||||
|
|
||||||
const compatiblePipelineSources = useMemo(
|
const compatiblePipelineSources = useMemo(
|
||||||
() => getCompatibleSourcesForCountry(pipelineSources, values.country),
|
() => pipelineSources.filter((source) => isSourceAvailableForRun(source)),
|
||||||
[pipelineSources, values.country],
|
[pipelineSources, isSourceAvailableForRun],
|
||||||
);
|
);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
const filtered = getCompatibleSourcesForCountry(
|
const filtered = pipelineSources.filter((source) =>
|
||||||
pipelineSources,
|
isSourceAvailableForRun(source),
|
||||||
values.country,
|
|
||||||
);
|
);
|
||||||
if (filtered.length === pipelineSources.length) return;
|
if (filtered.length === pipelineSources.length) return;
|
||||||
if (filtered.length > 0) {
|
if (filtered.length > 0) {
|
||||||
@ -232,9 +264,9 @@ export const AutomaticRunTab: React.FC<AutomaticRunTabProps> = ({
|
|||||||
}
|
}
|
||||||
}, [
|
}, [
|
||||||
compatibleEnabledSources,
|
compatibleEnabledSources,
|
||||||
|
isSourceAvailableForRun,
|
||||||
onSetPipelineSources,
|
onSetPipelineSources,
|
||||||
pipelineSources,
|
pipelineSources,
|
||||||
values.country,
|
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const estimate = useMemo(
|
const estimate = useMemo(
|
||||||
@ -441,6 +473,23 @@ export const AutomaticRunTab: React.FC<AutomaticRunTabProps> = ({
|
|||||||
}
|
}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
<div className="space-y-2 md:col-span-3">
|
||||||
|
<Label htmlFor="glassdoor-location">Glassdoor city</Label>
|
||||||
|
<Input
|
||||||
|
id="glassdoor-location"
|
||||||
|
value={glassdoorLocationInput}
|
||||||
|
onChange={(event) =>
|
||||||
|
setValue("glassdoorLocation", event.target.value, {
|
||||||
|
shouldDirty: true,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
placeholder='e.g. "London"'
|
||||||
|
/>
|
||||||
|
<p className="text-xs text-muted-foreground">
|
||||||
|
Required only for Glassdoor. Use a city (not country) to
|
||||||
|
keep results localized.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</AccordionContent>
|
</AccordionContent>
|
||||||
</AccordionItem>
|
</AccordionItem>
|
||||||
@ -526,12 +575,18 @@ export const AutomaticRunTab: React.FC<AutomaticRunTabProps> = ({
|
|||||||
<CardContent className="flex flex-wrap gap-2">
|
<CardContent className="flex flex-wrap gap-2">
|
||||||
<TooltipProvider>
|
<TooltipProvider>
|
||||||
{enabledSources.map((source) => {
|
{enabledSources.map((source) => {
|
||||||
const allowed = isSourceAllowedForCountry(
|
const countryAllowed = isSourceAllowedForCountry(
|
||||||
source,
|
source,
|
||||||
values.country,
|
values.country,
|
||||||
);
|
);
|
||||||
|
const allowed = isSourceAvailableForRun(source);
|
||||||
const selected = compatiblePipelineSources.includes(source);
|
const selected = compatiblePipelineSources.includes(source);
|
||||||
const disabledReason = `${sourceLabel[source]} is available only when country is United Kingdom.`;
|
const disabledReason =
|
||||||
|
source === "glassdoor"
|
||||||
|
? countryAllowed
|
||||||
|
? GLASSDOOR_LOCATION_REASON
|
||||||
|
: GLASSDOOR_COUNTRY_REASON
|
||||||
|
: `${sourceLabel[source]} is available only when country is United Kingdom.`;
|
||||||
|
|
||||||
const button = (
|
const button = (
|
||||||
<Button
|
<Button
|
||||||
@ -540,6 +595,7 @@ export const AutomaticRunTab: React.FC<AutomaticRunTabProps> = ({
|
|||||||
size="sm"
|
size="sm"
|
||||||
variant={selected ? "default" : "outline"}
|
variant={selected ? "default" : "outline"}
|
||||||
disabled={!allowed}
|
disabled={!allowed}
|
||||||
|
title={!allowed ? disabledReason : undefined}
|
||||||
onClick={() => onToggleSource(source, !selected)}
|
onClick={() => onToggleSource(source, !selected)}
|
||||||
>
|
>
|
||||||
{sourceLabel[source]}
|
{sourceLabel[source]}
|
||||||
@ -553,9 +609,7 @@ export const AutomaticRunTab: React.FC<AutomaticRunTabProps> = ({
|
|||||||
return (
|
return (
|
||||||
<Tooltip key={source}>
|
<Tooltip key={source}>
|
||||||
<TooltipTrigger asChild>
|
<TooltipTrigger asChild>
|
||||||
<span className="inline-flex" title={disabledReason}>
|
<span className="inline-flex">{button}</span>
|
||||||
{button}
|
|
||||||
</span>
|
|
||||||
</TooltipTrigger>
|
</TooltipTrigger>
|
||||||
<TooltipContent side="top">{disabledReason}</TooltipContent>
|
<TooltipContent side="top">{disabledReason}</TooltipContent>
|
||||||
</Tooltip>
|
</Tooltip>
|
||||||
|
|||||||
@ -42,11 +42,11 @@ describe("automatic-run utilities", () => {
|
|||||||
const limits = deriveExtractorLimits({
|
const limits = deriveExtractorLimits({
|
||||||
budget: 750,
|
budget: 750,
|
||||||
searchTerms: ["a", "b", "c"],
|
searchTerms: ["a", "b", "c"],
|
||||||
sources: ["indeed", "linkedin", "gradcracker"],
|
sources: ["indeed", "linkedin", "glassdoor", "gradcracker"],
|
||||||
});
|
});
|
||||||
|
|
||||||
const cap =
|
const cap =
|
||||||
2 * limits.jobspyResultsWanted * 3 + limits.gradcrackerMaxJobsPerTerm * 3;
|
3 * limits.jobspyResultsWanted * 3 + limits.gradcrackerMaxJobsPerTerm * 3;
|
||||||
|
|
||||||
expect(cap).toBeLessThanOrEqual(750);
|
expect(cap).toBeLessThanOrEqual(750);
|
||||||
});
|
});
|
||||||
|
|||||||
@ -8,6 +8,7 @@ export interface AutomaticRunValues {
|
|||||||
searchTerms: string[];
|
searchTerms: string[];
|
||||||
runBudget: number;
|
runBudget: number;
|
||||||
country: string;
|
country: string;
|
||||||
|
glassdoorLocation?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface AutomaticPresetValues {
|
export interface AutomaticPresetValues {
|
||||||
@ -71,12 +72,14 @@ export function deriveExtractorLimits(args: {
|
|||||||
const termCount = Math.max(1, args.searchTerms.length);
|
const termCount = Math.max(1, args.searchTerms.length);
|
||||||
const includesIndeed = args.sources.includes("indeed");
|
const includesIndeed = args.sources.includes("indeed");
|
||||||
const includesLinkedIn = args.sources.includes("linkedin");
|
const includesLinkedIn = args.sources.includes("linkedin");
|
||||||
|
const includesGlassdoor = args.sources.includes("glassdoor");
|
||||||
const includesGradcracker = args.sources.includes("gradcracker");
|
const includesGradcracker = args.sources.includes("gradcracker");
|
||||||
const includesUkVisaJobs = args.sources.includes("ukvisajobs");
|
const includesUkVisaJobs = args.sources.includes("ukvisajobs");
|
||||||
|
|
||||||
const weightedContributors =
|
const weightedContributors =
|
||||||
(includesIndeed ? termCount : 0) +
|
(includesIndeed ? termCount : 0) +
|
||||||
(includesLinkedIn ? termCount : 0) +
|
(includesLinkedIn ? termCount : 0) +
|
||||||
|
(includesGlassdoor ? termCount : 0) +
|
||||||
(includesGradcracker ? termCount : 0) +
|
(includesGradcracker ? termCount : 0) +
|
||||||
(includesUkVisaJobs ? 1 : 0);
|
(includesUkVisaJobs ? 1 : 0);
|
||||||
|
|
||||||
@ -133,13 +136,16 @@ export function calculateAutomaticEstimate(args: {
|
|||||||
const hasUkVisaJobs = sources.includes("ukvisajobs");
|
const hasUkVisaJobs = sources.includes("ukvisajobs");
|
||||||
const hasIndeed = sources.includes("indeed");
|
const hasIndeed = sources.includes("indeed");
|
||||||
const hasLinkedIn = sources.includes("linkedin");
|
const hasLinkedIn = sources.includes("linkedin");
|
||||||
|
const hasGlassdoor = sources.includes("glassdoor");
|
||||||
const limits = deriveExtractorLimits({
|
const limits = deriveExtractorLimits({
|
||||||
budget: values.runBudget,
|
budget: values.runBudget,
|
||||||
searchTerms: values.searchTerms,
|
searchTerms: values.searchTerms,
|
||||||
sources,
|
sources,
|
||||||
});
|
});
|
||||||
|
|
||||||
const jobspySitesCount = [hasIndeed, hasLinkedIn].filter(Boolean).length;
|
const jobspySitesCount = [hasIndeed, hasLinkedIn, hasGlassdoor].filter(
|
||||||
|
Boolean,
|
||||||
|
).length;
|
||||||
const jobspyCap = jobspySitesCount * limits.jobspyResultsWanted * termCount;
|
const jobspyCap = jobspySitesCount * limits.jobspyResultsWanted * termCount;
|
||||||
const gradcrackerCap = hasGradcracker
|
const gradcrackerCap = hasGradcracker
|
||||||
? limits.gradcrackerMaxJobsPerTerm * termCount
|
? limits.gradcrackerMaxJobsPerTerm * termCount
|
||||||
|
|||||||
@ -12,6 +12,7 @@ export const orderedSources: JobSource[] = [
|
|||||||
"gradcracker",
|
"gradcracker",
|
||||||
"indeed",
|
"indeed",
|
||||||
"linkedin",
|
"linkedin",
|
||||||
|
"glassdoor",
|
||||||
"ukvisajobs",
|
"ukvisajobs",
|
||||||
];
|
];
|
||||||
export const orderedFilterSources: JobSource[] = [...orderedSources, "manual"];
|
export const orderedFilterSources: JobSource[] = [...orderedSources, "manual"];
|
||||||
|
|||||||
@ -1,6 +1,10 @@
|
|||||||
import type { AppSettings, JobListItem, JobSource } from "@shared/types";
|
import type { AppSettings, JobListItem, JobSource } from "@shared/types";
|
||||||
import type { FilterTab, JobSort } from "./constants";
|
import type { FilterTab, JobSort } from "./constants";
|
||||||
import { orderedFilterSources, orderedSources } from "./constants";
|
import {
|
||||||
|
DEFAULT_PIPELINE_SOURCES,
|
||||||
|
orderedFilterSources,
|
||||||
|
orderedSources,
|
||||||
|
} from "./constants";
|
||||||
|
|
||||||
const dateValue = (value: string | null) => {
|
const dateValue = (value: string | null) => {
|
||||||
if (!value) return null;
|
if (!value) return null;
|
||||||
@ -159,7 +163,7 @@ export const getSourcesWithJobs = (jobs: JobListItem[]): JobSource[] => {
|
|||||||
export const getEnabledSources = (
|
export const getEnabledSources = (
|
||||||
settings: AppSettings | null,
|
settings: AppSettings | null,
|
||||||
): JobSource[] => {
|
): JobSource[] => {
|
||||||
if (!settings) return [...orderedSources];
|
if (!settings) return [...DEFAULT_PIPELINE_SOURCES, "glassdoor"];
|
||||||
|
|
||||||
const enabled: JobSource[] = [];
|
const enabled: JobSource[] = [];
|
||||||
const jobspySites = settings.jobspySites ?? [];
|
const jobspySites = settings.jobspySites ?? [];
|
||||||
@ -176,10 +180,16 @@ export const getEnabledSources = (
|
|||||||
if (hasUkVisaJobsAuth) enabled.push(source);
|
if (hasUkVisaJobsAuth) enabled.push(source);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (source === "indeed" || source === "linkedin") {
|
if (
|
||||||
if (jobspySites.includes(source)) enabled.push(source);
|
source === "indeed" ||
|
||||||
|
source === "linkedin" ||
|
||||||
|
source === "glassdoor"
|
||||||
|
) {
|
||||||
|
if (source === "glassdoor" || jobspySites.includes(source)) {
|
||||||
|
enabled.push(source);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return enabled.length > 0 ? enabled : [...orderedSources];
|
return enabled.length > 0 ? enabled : [...DEFAULT_PIPELINE_SOURCES];
|
||||||
};
|
};
|
||||||
|
|||||||
@ -8,7 +8,7 @@ import { JobspySection } from "./JobspySection";
|
|||||||
const JobspyHarness = () => {
|
const JobspyHarness = () => {
|
||||||
const methods = useForm<UpdateSettingsInput>({
|
const methods = useForm<UpdateSettingsInput>({
|
||||||
defaultValues: {
|
defaultValues: {
|
||||||
jobspySites: ["indeed", "linkedin"],
|
jobspySites: ["indeed", "linkedin", "glassdoor"],
|
||||||
jobspyLocation: "UK",
|
jobspyLocation: "UK",
|
||||||
jobspyResultsWanted: 200,
|
jobspyResultsWanted: 200,
|
||||||
jobspyHoursOld: 72,
|
jobspyHoursOld: 72,
|
||||||
@ -24,8 +24,8 @@ const JobspyHarness = () => {
|
|||||||
<JobspySection
|
<JobspySection
|
||||||
values={{
|
values={{
|
||||||
sites: {
|
sites: {
|
||||||
default: ["indeed", "linkedin"],
|
default: ["indeed", "linkedin", "glassdoor"],
|
||||||
effective: ["indeed", "linkedin"],
|
effective: ["indeed", "linkedin", "glassdoor"],
|
||||||
},
|
},
|
||||||
location: { default: "UK", effective: "UK" },
|
location: { default: "UK", effective: "UK" },
|
||||||
resultsWanted: { default: 200, effective: 200 },
|
resultsWanted: { default: 200, effective: 200 },
|
||||||
@ -51,6 +51,7 @@ describe("JobspySection", () => {
|
|||||||
|
|
||||||
expect(indeedCheckbox).toBeChecked();
|
expect(indeedCheckbox).toBeChecked();
|
||||||
expect(linkedinCheckbox).toBeChecked();
|
expect(linkedinCheckbox).toBeChecked();
|
||||||
|
expect(screen.queryByLabelText(/glassdoor/i)).not.toBeInTheDocument();
|
||||||
|
|
||||||
fireEvent.click(indeedCheckbox);
|
fireEvent.click(indeedCheckbox);
|
||||||
expect(indeedCheckbox).not.toBeChecked();
|
expect(indeedCheckbox).not.toBeChecked();
|
||||||
|
|||||||
@ -43,6 +43,12 @@ export const JobspySection: React.FC<JobspySectionProps> = ({
|
|||||||
linkedinFetchDescription,
|
linkedinFetchDescription,
|
||||||
isRemote,
|
isRemote,
|
||||||
} = values;
|
} = values;
|
||||||
|
const configurableDefaultSites = sites.default.filter(
|
||||||
|
(site) => site !== "glassdoor",
|
||||||
|
);
|
||||||
|
const configurableEffectiveSites = sites.effective.filter(
|
||||||
|
(site) => site !== "glassdoor",
|
||||||
|
);
|
||||||
const {
|
const {
|
||||||
control,
|
control,
|
||||||
register,
|
register,
|
||||||
@ -130,13 +136,13 @@ export const JobspySection: React.FC<JobspySectionProps> = ({
|
|||||||
</p>
|
</p>
|
||||||
)}
|
)}
|
||||||
<div className="text-xs text-muted-foreground">
|
<div className="text-xs text-muted-foreground">
|
||||||
Select which sites JobSpy should scrape.
|
Select configurable sites JobSpy should scrape.
|
||||||
</div>
|
</div>
|
||||||
<div className="flex gap-2 text-xs text-muted-foreground">
|
<div className="flex gap-2 text-xs text-muted-foreground">
|
||||||
<span>
|
<span>
|
||||||
Effective: {(sites.effective || []).join(", ") || "None"}
|
Effective: {configurableEffectiveSites.join(", ") || "None"}
|
||||||
</span>
|
</span>
|
||||||
<span>Default: {(sites.default || []).join(", ")}</span>
|
<span>Default: {configurableDefaultSites.join(", ")}</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|||||||
@ -138,6 +138,7 @@ export const sourceLabel: Record<Job["source"], string> = {
|
|||||||
gradcracker: "Gradcracker",
|
gradcracker: "Gradcracker",
|
||||||
indeed: "Indeed",
|
indeed: "Indeed",
|
||||||
linkedin: "LinkedIn",
|
linkedin: "LinkedIn",
|
||||||
|
glassdoor: "Glassdoor",
|
||||||
ukvisajobs: "UK Visa Jobs",
|
ukvisajobs: "UK Visa Jobs",
|
||||||
manual: "Manual",
|
manual: "Manual",
|
||||||
};
|
};
|
||||||
|
|||||||
@ -44,6 +44,17 @@ describe.sequential("Pipeline API routes", () => {
|
|||||||
topN: 5,
|
topN: 5,
|
||||||
sources: ["gradcracker"],
|
sources: ["gradcracker"],
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const glassdoorRunRes = await fetch(`${baseUrl}/api/pipeline/run`, {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({ sources: ["glassdoor"] }),
|
||||||
|
});
|
||||||
|
const glassdoorRunBody = await glassdoorRunRes.json();
|
||||||
|
expect(glassdoorRunBody.ok).toBe(true);
|
||||||
|
expect(runPipeline).toHaveBeenNthCalledWith(2, {
|
||||||
|
sources: ["glassdoor"],
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it("returns conflict when cancelling with no active pipeline", async () => {
|
it("returns conflict when cancelling with no active pipeline", async () => {
|
||||||
|
|||||||
@ -98,7 +98,9 @@ const runPipelineSchema = z.object({
|
|||||||
topN: z.number().min(1).max(50).optional(),
|
topN: z.number().min(1).max(50).optional(),
|
||||||
minSuitabilityScore: z.number().min(0).max(100).optional(),
|
minSuitabilityScore: z.number().min(0).max(100).optional(),
|
||||||
sources: z
|
sources: z
|
||||||
.array(z.enum(["gradcracker", "indeed", "linkedin", "ukvisajobs"]))
|
.array(
|
||||||
|
z.enum(["gradcracker", "indeed", "linkedin", "glassdoor", "ukvisajobs"]),
|
||||||
|
)
|
||||||
.min(1)
|
.min(1)
|
||||||
.optional(),
|
.optional(),
|
||||||
});
|
});
|
||||||
|
|||||||
@ -28,7 +28,7 @@ export const DEMO_DEFAULT_SETTINGS: DemoDefaultSettings = {
|
|||||||
jobspyResultsWanted: "25",
|
jobspyResultsWanted: "25",
|
||||||
jobspyHoursOld: "72",
|
jobspyHoursOld: "72",
|
||||||
jobspyCountryIndeed: "US",
|
jobspyCountryIndeed: "US",
|
||||||
jobspySites: JSON.stringify(["linkedin", "indeed"]),
|
jobspySites: JSON.stringify(["linkedin", "indeed", "glassdoor"]),
|
||||||
jobspyLinkedinFetchDescription: "1",
|
jobspyLinkedinFetchDescription: "1",
|
||||||
jobspyIsRemote: "0",
|
jobspyIsRemote: "0",
|
||||||
resumeProjects: JSON.stringify({
|
resumeProjects: JSON.stringify({
|
||||||
@ -253,6 +253,7 @@ export const COMPANY_SUFFIXES = [
|
|||||||
export const DEMO_SOURCE_BASE_URLS: Record<JobSource, string> = {
|
export const DEMO_SOURCE_BASE_URLS: Record<JobSource, string> = {
|
||||||
linkedin: "https://www.linkedin.com",
|
linkedin: "https://www.linkedin.com",
|
||||||
indeed: "https://www.indeed.com",
|
indeed: "https://www.indeed.com",
|
||||||
|
glassdoor: "https://www.glassdoor.com",
|
||||||
gradcracker: "https://www.gradcracker.com",
|
gradcracker: "https://www.gradcracker.com",
|
||||||
ukvisajobs: "https://www.ukvisajobs.com",
|
ukvisajobs: "https://www.ukvisajobs.com",
|
||||||
manual: "https://example.com",
|
manual: "https://example.com",
|
||||||
|
|||||||
@ -17,7 +17,14 @@ export const jobs = sqliteTable("jobs", {
|
|||||||
|
|
||||||
// From crawler
|
// From crawler
|
||||||
source: text("source", {
|
source: text("source", {
|
||||||
enum: ["gradcracker", "indeed", "linkedin", "ukvisajobs", "manual"],
|
enum: [
|
||||||
|
"gradcracker",
|
||||||
|
"indeed",
|
||||||
|
"linkedin",
|
||||||
|
"glassdoor",
|
||||||
|
"ukvisajobs",
|
||||||
|
"manual",
|
||||||
|
],
|
||||||
})
|
})
|
||||||
.notNull()
|
.notNull()
|
||||||
.default("gradcracker"),
|
.default("gradcracker"),
|
||||||
|
|||||||
@ -37,6 +37,7 @@ import {
|
|||||||
const DEFAULT_CONFIG: PipelineConfig = {
|
const DEFAULT_CONFIG: PipelineConfig = {
|
||||||
topN: 10,
|
topN: 10,
|
||||||
minSuitabilityScore: 50,
|
minSuitabilityScore: 50,
|
||||||
|
// Keep Glassdoor opt-in via source picker/settings; do not enable by default.
|
||||||
sources: ["gradcracker", "indeed", "linkedin", "ukvisajobs"],
|
sources: ["gradcracker", "indeed", "linkedin", "ukvisajobs"],
|
||||||
outputDir: join(getDataDir(), "pdfs"),
|
outputDir: join(getDataDir(), "pdfs"),
|
||||||
enableCrawling: true,
|
enableCrawling: true,
|
||||||
|
|||||||
@ -76,6 +76,92 @@ describe("discoverJobsStep", () => {
|
|||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("passes glassdoor through to JobSpy when selected", async () => {
|
||||||
|
const settingsRepo = await import("../../repositories/settings");
|
||||||
|
const jobSpy = await import("../../services/jobspy");
|
||||||
|
|
||||||
|
vi.mocked(settingsRepo.getAllSettings).mockResolvedValue({
|
||||||
|
searchTerms: JSON.stringify(["engineer"]),
|
||||||
|
jobspySites: JSON.stringify(["glassdoor"]),
|
||||||
|
} as any);
|
||||||
|
|
||||||
|
vi.mocked(jobSpy.runJobSpy).mockResolvedValue({
|
||||||
|
success: true,
|
||||||
|
jobs: [
|
||||||
|
{
|
||||||
|
source: "glassdoor",
|
||||||
|
title: "Engineer",
|
||||||
|
employer: "ACME",
|
||||||
|
jobUrl: "https://example.com/job",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
} as any);
|
||||||
|
|
||||||
|
const result = await discoverJobsStep({
|
||||||
|
mergedConfig: {
|
||||||
|
...config,
|
||||||
|
sources: ["glassdoor"],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.discoveredJobs).toHaveLength(1);
|
||||||
|
expect(vi.mocked(jobSpy.runJobSpy)).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({ sites: ["glassdoor"] }),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("keeps glassdoor enabled even when jobspySites override omits it", async () => {
|
||||||
|
const settingsRepo = await import("../../repositories/settings");
|
||||||
|
const jobSpy = await import("../../services/jobspy");
|
||||||
|
|
||||||
|
vi.mocked(settingsRepo.getAllSettings).mockResolvedValue({
|
||||||
|
searchTerms: JSON.stringify(["engineer"]),
|
||||||
|
jobspySites: JSON.stringify(["linkedin"]),
|
||||||
|
} as any);
|
||||||
|
|
||||||
|
vi.mocked(jobSpy.runJobSpy).mockResolvedValue({
|
||||||
|
success: true,
|
||||||
|
jobs: [],
|
||||||
|
} as any);
|
||||||
|
|
||||||
|
await discoverJobsStep({
|
||||||
|
mergedConfig: {
|
||||||
|
...config,
|
||||||
|
sources: ["glassdoor", "linkedin"],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(vi.mocked(jobSpy.runJobSpy)).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({ sites: ["glassdoor", "linkedin"] }),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("filters out glassdoor for unsupported countries", async () => {
|
||||||
|
const settingsRepo = await import("../../repositories/settings");
|
||||||
|
const jobSpy = await import("../../services/jobspy");
|
||||||
|
|
||||||
|
vi.mocked(settingsRepo.getAllSettings).mockResolvedValue({
|
||||||
|
searchTerms: JSON.stringify(["engineer"]),
|
||||||
|
jobspyCountryIndeed: "japan",
|
||||||
|
} as any);
|
||||||
|
|
||||||
|
vi.mocked(jobSpy.runJobSpy).mockResolvedValue({
|
||||||
|
success: true,
|
||||||
|
jobs: [],
|
||||||
|
} as any);
|
||||||
|
|
||||||
|
await discoverJobsStep({
|
||||||
|
mergedConfig: {
|
||||||
|
...config,
|
||||||
|
sources: ["glassdoor", "linkedin"],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(vi.mocked(jobSpy.runJobSpy)).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({ sites: ["linkedin"] }),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
it("throws when all enabled sources fail", async () => {
|
it("throws when all enabled sources fail", async () => {
|
||||||
const settingsRepo = await import("../../repositories/settings");
|
const settingsRepo = await import("../../repositories/settings");
|
||||||
const ukVisa = await import("../../services/ukvisajobs");
|
const ukVisa = await import("../../services/ukvisajobs");
|
||||||
|
|||||||
@ -67,8 +67,8 @@ export async function discoverJobsStep(args: {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let jobSpySites = compatibleSources.filter(
|
let jobSpySites = compatibleSources.filter(
|
||||||
(source): source is "indeed" | "linkedin" =>
|
(source): source is "indeed" | "linkedin" | "glassdoor" =>
|
||||||
source === "indeed" || source === "linkedin",
|
source === "indeed" || source === "linkedin" || source === "glassdoor",
|
||||||
);
|
);
|
||||||
|
|
||||||
const jobspySitesSettingRaw = settings.jobspySites;
|
const jobspySitesSettingRaw = settings.jobspySites;
|
||||||
@ -76,7 +76,9 @@ export async function discoverJobsStep(args: {
|
|||||||
try {
|
try {
|
||||||
const allowed = JSON.parse(jobspySitesSettingRaw);
|
const allowed = JSON.parse(jobspySitesSettingRaw);
|
||||||
if (Array.isArray(allowed)) {
|
if (Array.isArray(allowed)) {
|
||||||
jobSpySites = jobSpySites.filter((site) => allowed.includes(site));
|
jobSpySites = jobSpySites.filter(
|
||||||
|
(site) => site === "glassdoor" || allowed.includes(site),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
} catch {
|
} catch {
|
||||||
// ignore JSON parse error
|
// ignore JSON parse error
|
||||||
|
|||||||
@ -105,6 +105,7 @@ function toJobSource(site: unknown): JobSource | null {
|
|||||||
if (raw === "gradcracker") return "gradcracker";
|
if (raw === "gradcracker") return "gradcracker";
|
||||||
if (raw === "indeed") return "indeed";
|
if (raw === "indeed") return "indeed";
|
||||||
if (raw === "linkedin") return "linkedin";
|
if (raw === "linkedin") return "linkedin";
|
||||||
|
if (raw === "glassdoor") return "glassdoor";
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -164,8 +165,8 @@ export async function runJobSpy(
|
|||||||
const outputDir = join(dataDir, "imports");
|
const outputDir = join(dataDir, "imports");
|
||||||
await mkdir(outputDir, { recursive: true });
|
await mkdir(outputDir, { recursive: true });
|
||||||
|
|
||||||
const sites = (options.sites ?? ["indeed", "linkedin"])
|
const sites = (options.sites ?? ["indeed", "linkedin", "glassdoor"])
|
||||||
.filter((s) => s === "indeed" || s === "linkedin")
|
.filter((s) => s === "indeed" || s === "linkedin" || s === "glassdoor")
|
||||||
.join(",");
|
.join(",");
|
||||||
|
|
||||||
const searchTerms = resolveSearchTerms(options);
|
const searchTerms = resolveSearchTerms(options);
|
||||||
@ -191,7 +192,7 @@ export async function runJobSpy(
|
|||||||
stdio: ["ignore", "pipe", "pipe"],
|
stdio: ["ignore", "pipe", "pipe"],
|
||||||
env: {
|
env: {
|
||||||
...process.env,
|
...process.env,
|
||||||
JOBSPY_SITES: sites || "indeed,linkedin",
|
JOBSPY_SITES: sites || "indeed,linkedin,glassdoor",
|
||||||
JOBSPY_SEARCH_TERM: searchTerm,
|
JOBSPY_SEARCH_TERM: searchTerm,
|
||||||
JOBSPY_TERM_INDEX: String(i + 1),
|
JOBSPY_TERM_INDEX: String(i + 1),
|
||||||
JOBSPY_TERM_TOTAL: String(searchTerms.length),
|
JOBSPY_TERM_TOTAL: String(searchTerms.length),
|
||||||
|
|||||||
@ -79,6 +79,26 @@ describe("settings-conversion", () => {
|
|||||||
expect(malformedOverride.value).toEqual(["web developer"]);
|
expect(malformedOverride.value).toEqual(["web developer"]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("always includes glassdoor in resolved jobspySites", () => {
|
||||||
|
delete process.env.JOBSPY_SITES;
|
||||||
|
expect(resolveSettingValue("jobspySites", undefined).value).toEqual([
|
||||||
|
"indeed",
|
||||||
|
"linkedin",
|
||||||
|
"glassdoor",
|
||||||
|
]);
|
||||||
|
|
||||||
|
process.env.JOBSPY_SITES = "indeed,linkedin";
|
||||||
|
expect(resolveSettingValue("jobspySites", undefined).value).toEqual([
|
||||||
|
"indeed",
|
||||||
|
"linkedin",
|
||||||
|
"glassdoor",
|
||||||
|
]);
|
||||||
|
|
||||||
|
expect(
|
||||||
|
resolveSettingValue("jobspySites", JSON.stringify(["linkedin"])).value,
|
||||||
|
).toEqual(["linkedin", "glassdoor"]);
|
||||||
|
});
|
||||||
|
|
||||||
it("round-trips penalizeMissingSalary boolean setting", () => {
|
it("round-trips penalizeMissingSalary boolean setting", () => {
|
||||||
expect(serializeSettingValue("penalizeMissingSalary", true)).toBe("1");
|
expect(serializeSettingValue("penalizeMissingSalary", true)).toBe("1");
|
||||||
expect(serializeSettingValue("penalizeMissingSalary", false)).toBe("0");
|
expect(serializeSettingValue("penalizeMissingSalary", false)).toBe("0");
|
||||||
|
|||||||
@ -57,6 +57,24 @@ function parseJsonArrayOrNull(raw: string | undefined): string[] | null {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function normalizeJobspySites(value: string[]): string[] {
|
||||||
|
const seen = new Set<string>();
|
||||||
|
const normalized: string[] = [];
|
||||||
|
|
||||||
|
for (const site of value) {
|
||||||
|
const trimmed = site.trim();
|
||||||
|
if (!trimmed || seen.has(trimmed)) continue;
|
||||||
|
seen.add(trimmed);
|
||||||
|
normalized.push(trimmed);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!seen.has("glassdoor")) {
|
||||||
|
normalized.push("glassdoor");
|
||||||
|
}
|
||||||
|
|
||||||
|
return normalized;
|
||||||
|
}
|
||||||
|
|
||||||
function parseBitBoolOrNull(raw: string | undefined): boolean | null {
|
function parseBitBoolOrNull(raw: string | undefined): boolean | null {
|
||||||
if (!raw) return null;
|
if (!raw) return null;
|
||||||
return raw === "true" || raw === "1";
|
return raw === "true" || raw === "1";
|
||||||
@ -143,13 +161,13 @@ export const settingsConversionMetadata: SettingsConversionMetadata = {
|
|||||||
},
|
},
|
||||||
jobspySites: {
|
jobspySites: {
|
||||||
defaultValue: () =>
|
defaultValue: () =>
|
||||||
(process.env.JOBSPY_SITES || "indeed,linkedin")
|
normalizeJobspySites(
|
||||||
.split(",")
|
(process.env.JOBSPY_SITES || "indeed,linkedin,glassdoor").split(","),
|
||||||
.map((value) => value.trim())
|
),
|
||||||
.filter(Boolean),
|
|
||||||
parseOverride: parseJsonArrayOrNull,
|
parseOverride: parseJsonArrayOrNull,
|
||||||
serialize: serializeNullableJsonArray,
|
serialize: serializeNullableJsonArray,
|
||||||
resolve: resolveWithNullishFallback,
|
resolve: ({ defaultValue, overrideValue }) =>
|
||||||
|
normalizeJobspySites(overrideValue ?? defaultValue),
|
||||||
},
|
},
|
||||||
jobspyLinkedinFetchDescription: {
|
jobspyLinkedinFetchDescription: {
|
||||||
defaultValue: () =>
|
defaultValue: () =>
|
||||||
|
|||||||
@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest";
|
|||||||
import {
|
import {
|
||||||
formatCountryLabel,
|
formatCountryLabel,
|
||||||
getCompatibleSourcesForCountry,
|
getCompatibleSourcesForCountry,
|
||||||
|
isGlassdoorCountry,
|
||||||
isSourceAllowedForCountry,
|
isSourceAllowedForCountry,
|
||||||
isUkCountry,
|
isUkCountry,
|
||||||
normalizeCountryKey,
|
normalizeCountryKey,
|
||||||
@ -49,14 +50,24 @@ describe("location-support", () => {
|
|||||||
expect(isSourceAllowedForCountry("ukvisajobs", "worldwide")).toBe(false);
|
expect(isSourceAllowedForCountry("ukvisajobs", "worldwide")).toBe(false);
|
||||||
expect(isSourceAllowedForCountry("indeed", "united states")).toBe(true);
|
expect(isSourceAllowedForCountry("indeed", "united states")).toBe(true);
|
||||||
expect(isSourceAllowedForCountry("linkedin", "worldwide")).toBe(true);
|
expect(isSourceAllowedForCountry("linkedin", "worldwide")).toBe(true);
|
||||||
|
expect(isSourceAllowedForCountry("glassdoor", "united states")).toBe(true);
|
||||||
|
expect(isSourceAllowedForCountry("glassdoor", "japan")).toBe(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("filters incompatible sources while preserving compatible order", () => {
|
it("filters incompatible sources while preserving compatible order", () => {
|
||||||
expect(
|
expect(
|
||||||
getCompatibleSourcesForCountry(
|
getCompatibleSourcesForCountry(
|
||||||
["gradcracker", "indeed", "ukvisajobs", "linkedin"],
|
["gradcracker", "indeed", "glassdoor", "ukvisajobs", "linkedin"],
|
||||||
"united states",
|
"united states",
|
||||||
),
|
),
|
||||||
).toEqual(["indeed", "linkedin"]);
|
).toEqual(["indeed", "glassdoor", "linkedin"]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("supports glassdoor only in explicitly supported countries", () => {
|
||||||
|
expect(isGlassdoorCountry("united kingdom")).toBe(true);
|
||||||
|
expect(isGlassdoorCountry("uk")).toBe(true);
|
||||||
|
expect(isGlassdoorCountry("usa")).toBe(true);
|
||||||
|
expect(isGlassdoorCountry("japan")).toBe(false);
|
||||||
|
expect(isGlassdoorCountry("worldwide")).toBe(false);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@ -100,6 +100,30 @@ export const SUPPORTED_COUNTRY_INPUTS = [
|
|||||||
] as const;
|
] as const;
|
||||||
|
|
||||||
const UK_ONLY_SOURCES = new Set<JobSource>(["gradcracker", "ukvisajobs"]);
|
const UK_ONLY_SOURCES = new Set<JobSource>(["gradcracker", "ukvisajobs"]);
|
||||||
|
const GLASSDOOR_SUPPORTED_COUNTRIES = new Set(
|
||||||
|
[
|
||||||
|
"australia",
|
||||||
|
"austria",
|
||||||
|
"belgium",
|
||||||
|
"brazil",
|
||||||
|
"canada",
|
||||||
|
"france",
|
||||||
|
"germany",
|
||||||
|
"hong kong",
|
||||||
|
"india",
|
||||||
|
"ireland",
|
||||||
|
"italy",
|
||||||
|
"mexico",
|
||||||
|
"netherlands",
|
||||||
|
"new zealand",
|
||||||
|
"singapore",
|
||||||
|
"spain",
|
||||||
|
"switzerland",
|
||||||
|
"united kingdom",
|
||||||
|
"united states",
|
||||||
|
"vietnam",
|
||||||
|
].map((country) => normalizeCountryKey(country)),
|
||||||
|
);
|
||||||
|
|
||||||
export function normalizeCountryKey(value: string | null | undefined): string {
|
export function normalizeCountryKey(value: string | null | undefined): string {
|
||||||
const normalized = value?.trim().toLowerCase() ?? "";
|
const normalized = value?.trim().toLowerCase() ?? "";
|
||||||
@ -125,12 +149,19 @@ export function isUkCountry(country: string | null | undefined): boolean {
|
|||||||
return normalizeCountryKey(country) === "united kingdom";
|
return normalizeCountryKey(country) === "united kingdom";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function isGlassdoorCountry(
|
||||||
|
country: string | null | undefined,
|
||||||
|
): boolean {
|
||||||
|
return GLASSDOOR_SUPPORTED_COUNTRIES.has(normalizeCountryKey(country));
|
||||||
|
}
|
||||||
|
|
||||||
export function isSourceAllowedForCountry(
|
export function isSourceAllowedForCountry(
|
||||||
source: JobSource,
|
source: JobSource,
|
||||||
country: string | null | undefined,
|
country: string | null | undefined,
|
||||||
): boolean {
|
): boolean {
|
||||||
if (!UK_ONLY_SOURCES.has(source)) return true;
|
if (UK_ONLY_SOURCES.has(source)) return isUkCountry(country);
|
||||||
return isUkCountry(country);
|
if (source === "glassdoor") return isGlassdoorCountry(country);
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function getCompatibleSourcesForCountry(
|
export function getCompatibleSourcesForCountry(
|
||||||
|
|||||||
@ -122,6 +122,7 @@ export type JobSource =
|
|||||||
| "gradcracker"
|
| "gradcracker"
|
||||||
| "indeed"
|
| "indeed"
|
||||||
| "linkedin"
|
| "linkedin"
|
||||||
|
| "glassdoor"
|
||||||
| "ukvisajobs"
|
| "ukvisajobs"
|
||||||
| "manual";
|
| "manual";
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user