Some checks failed
CI / Linting (Biome) (push) Failing after 41s
CI / Tests (push) Successful in 5m22s
CI / Type Check (adzuna-extractor) (push) Successful in 1m9s
CI / Type Check (gradcracker-extractor) (push) Successful in 1m14s
CI / Type Check (hiringcafe-extractor) (push) Successful in 1m11s
CI / Type Check (orchestrator) (push) Successful in 1m28s
CI / Type Check (startupjobs-extractor) (push) Successful in 1m13s
CI / Type Check (ukvisajobs-extractor) (push) Successful in 1m12s
CI / Documentation (push) Successful in 2m0s
QAJobsBoard and similar feeds often store Worldwide/Remote while the real country is only in the description. Scan title and description when location is vague, and prefer concrete locations from QAJobsBoard postings. Co-authored-by: Cursor <cursoragent@cursor.com>
634 lines
19 KiB
TypeScript
634 lines
19 KiB
TypeScript
import { logger } from "@infra/logger";
|
|
import { sanitizeUnknown } from "@infra/sanitize";
|
|
import { getExtractorRegistry } from "@server/extractors/registry";
|
|
import { DEFAULT_JOB_OWNER_PROFILE_ID } from "@server/infra/job-owner-context";
|
|
import { getAllJobUrls } from "@server/repositories/jobs";
|
|
import { getProfileById } from "@server/repositories/profiles";
|
|
import * as settingsRepo from "@server/repositories/settings";
|
|
import { asyncPool } from "@server/utils/async-pool";
|
|
import {
|
|
jobMatchesBlockedCountries,
|
|
resolveBlockedCountriesFromStoredString,
|
|
} from "@shared/blocked-countries.js";
|
|
import {
|
|
formatCountryLabel,
|
|
isSourceAllowedForCountry,
|
|
normalizeCountryKey,
|
|
} from "@shared/location-support.js";
|
|
import { resolveBlockedCompanyKeywordsFromStoredString } from "@shared/resolve-blocked-company-keywords.js";
|
|
import {
|
|
inferCountryKeyFromSearchGeography,
|
|
matchesRequestedCity,
|
|
resolveSearchCities,
|
|
shouldApplyStrictCityFilter,
|
|
} from "@shared/search-cities.js";
|
|
import { jobSearchProfileSchema } from "@shared/settings-registry.js";
|
|
import type { CreateJobInput, PipelineConfig } from "@shared/types";
|
|
import { type CrawlSource, progressHelpers, updateProgress } from "../progress";
|
|
|
|
const DISCOVERY_CONCURRENCY = 3;
|
|
|
|
type DiscoveryTaskResult = {
|
|
discoveredJobs: CreateJobInput[];
|
|
sourceErrors: string[];
|
|
};
|
|
|
|
type DiscoverySourceTask = {
|
|
source: CrawlSource;
|
|
termsTotal?: number;
|
|
detail: string;
|
|
run: () => Promise<DiscoveryTaskResult>;
|
|
};
|
|
|
|
function isBlockedEmployer(
|
|
employer: string | null | undefined,
|
|
blockedKeywordsLowerCase: string[],
|
|
): boolean {
|
|
if (!employer) return false;
|
|
if (blockedKeywordsLowerCase.length === 0) return false;
|
|
const normalizedEmployer = employer.toLowerCase();
|
|
return blockedKeywordsLowerCase.some((keyword) =>
|
|
normalizedEmployer.includes(keyword),
|
|
);
|
|
}
|
|
|
|
function filterJobsByRequestedCities(args: {
|
|
jobs: CreateJobInput[];
|
|
selectedCountry: string;
|
|
requestedCities: string[];
|
|
}): CreateJobInput[] {
|
|
const { jobs, selectedCountry, requestedCities } = args;
|
|
if (requestedCities.length === 0) return jobs;
|
|
|
|
return jobs.filter((job) =>
|
|
requestedCities.some((requestedCity) => {
|
|
const strict = shouldApplyStrictCityFilter(
|
|
requestedCity,
|
|
selectedCountry,
|
|
);
|
|
if (!strict) return true;
|
|
return matchesRequestedCity(job.location, requestedCity);
|
|
}),
|
|
);
|
|
}
|
|
|
|
const ROLE_TOKEN_STOPWORDS = new Set([
|
|
"a",
|
|
"an",
|
|
"and",
|
|
"the",
|
|
"of",
|
|
"to",
|
|
"for",
|
|
"in",
|
|
"on",
|
|
"with",
|
|
"at",
|
|
"by",
|
|
"from",
|
|
"senior",
|
|
"sr",
|
|
"jr",
|
|
"junior",
|
|
"lead",
|
|
"principal",
|
|
"staff",
|
|
"i",
|
|
"ii",
|
|
"iii",
|
|
"iv",
|
|
"v",
|
|
"remote",
|
|
"hybrid",
|
|
"onsite",
|
|
// These are too generic and cause massive false positives.
|
|
"software",
|
|
"development",
|
|
"developer",
|
|
"engineer",
|
|
"engineering",
|
|
]);
|
|
|
|
function normalizeText(value: string | null | undefined): string {
|
|
return (value ?? "").toLowerCase().replace(/\s+/g, " ").trim();
|
|
}
|
|
|
|
function buildRoleMatchers(phrases: string[]): {
|
|
phraseMatchers: string[];
|
|
tokenMatchers: string[];
|
|
} {
|
|
const phraseMatchers = phrases.map((p) => normalizeText(p)).filter(Boolean);
|
|
|
|
const tokenSet = new Set<string>();
|
|
for (const phrase of phraseMatchers) {
|
|
for (const token of phrase.split(/[^a-z0-9+.#]+/g)) {
|
|
const cleaned = token.trim();
|
|
if (!cleaned) continue;
|
|
if (cleaned.length < 2) continue;
|
|
if (ROLE_TOKEN_STOPWORDS.has(cleaned)) continue;
|
|
tokenSet.add(cleaned);
|
|
}
|
|
}
|
|
|
|
// Ensure common QA acronyms remain even if user only typed long-form roles.
|
|
for (const token of ["qa", "sdet", "test", "testing", "automation"]) {
|
|
tokenSet.add(token);
|
|
}
|
|
|
|
return { phraseMatchers, tokenMatchers: [...tokenSet] };
|
|
}
|
|
|
|
function matchesAny(text: string, needles: string[]): boolean {
|
|
if (!text) return false;
|
|
for (const needle of needles) {
|
|
if (needle && text.includes(needle)) return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
function filterJobsBySearchProfile(args: {
|
|
jobs: CreateJobInput[];
|
|
targetRolePhrases: string[];
|
|
mustHaveSkills: string[];
|
|
dealBreakers: string[];
|
|
}): { jobs: CreateJobInput[]; dropped: number } {
|
|
const { jobs, targetRolePhrases, mustHaveSkills, dealBreakers } = args;
|
|
|
|
const roleMatchers = buildRoleMatchers(targetRolePhrases);
|
|
const mustHaveLower = mustHaveSkills.map(normalizeText).filter(Boolean);
|
|
const dealBreakersLower = dealBreakers.map(normalizeText).filter(Boolean);
|
|
|
|
const filtered = jobs.filter((job) => {
|
|
const title = normalizeText(job.title);
|
|
const body = normalizeText(job.jobDescription);
|
|
const haystack = `${title}\n${body}`;
|
|
|
|
if (
|
|
dealBreakersLower.length > 0 &&
|
|
matchesAny(haystack, dealBreakersLower)
|
|
) {
|
|
return false;
|
|
}
|
|
|
|
// If the user specified target roles, enforce a strict role match so we
|
|
// don't surface irrelevant jobs (e.g. legal/sales/finance) in Discovered.
|
|
if (roleMatchers.phraseMatchers.length > 0) {
|
|
const roleMatch =
|
|
matchesAny(title, roleMatchers.phraseMatchers) ||
|
|
matchesAny(title, roleMatchers.tokenMatchers) ||
|
|
matchesAny(body, roleMatchers.phraseMatchers) ||
|
|
matchesAny(body, roleMatchers.tokenMatchers);
|
|
if (!roleMatch) return false;
|
|
}
|
|
|
|
if (mustHaveLower.length > 0 && !matchesAny(haystack, mustHaveLower)) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
});
|
|
|
|
return { jobs: filtered, dropped: jobs.length - filtered.length };
|
|
}
|
|
|
|
export async function discoverJobsStep(args: {
|
|
mergedConfig: PipelineConfig;
|
|
shouldCancel?: () => boolean;
|
|
}): Promise<{
|
|
discoveredJobs: CreateJobInput[];
|
|
sourceErrors: string[];
|
|
}> {
|
|
logger.info("Running discovery step");
|
|
|
|
const discoveredJobs: CreateJobInput[] = [];
|
|
const sourceErrors: string[] = [];
|
|
|
|
const settings = await settingsRepo.getAllSettings();
|
|
const registry = await getExtractorRegistry();
|
|
|
|
const searchTermsSetting = settings.searchTerms;
|
|
let searchTerms: string[] = [];
|
|
|
|
if (searchTermsSetting) {
|
|
searchTerms = JSON.parse(searchTermsSetting) as string[];
|
|
} else {
|
|
const defaultSearchTermsEnv =
|
|
process.env.JOBSPY_SEARCH_TERMS || "web developer";
|
|
searchTerms = defaultSearchTermsEnv
|
|
.split("|")
|
|
.map((term) => term.trim())
|
|
.filter(Boolean);
|
|
}
|
|
|
|
const ownerProfileId =
|
|
args.mergedConfig.ownerProfileId ?? DEFAULT_JOB_OWNER_PROFILE_ID;
|
|
let searchProfileTargetRoles: string[] = [];
|
|
let searchProfileMustHaveSkills: string[] = [];
|
|
let searchProfileDealBreakers: string[] = [];
|
|
|
|
const mergeTargetRoles = (targetRoles: unknown) => {
|
|
if (!Array.isArray(targetRoles) || targetRoles.length === 0) return;
|
|
const existingLower = new Set(searchTerms.map((t) => t.toLowerCase()));
|
|
for (const role of targetRoles) {
|
|
if (
|
|
typeof role === "string" &&
|
|
role.trim() &&
|
|
!existingLower.has(role.trim().toLowerCase())
|
|
) {
|
|
searchTerms.push(role.trim());
|
|
existingLower.add(role.trim().toLowerCase());
|
|
}
|
|
}
|
|
logger.info("Augmented search terms with profile target roles", {
|
|
addedRoles: targetRoles.length,
|
|
totalTerms: searchTerms.length,
|
|
});
|
|
};
|
|
|
|
if (ownerProfileId && ownerProfileId !== DEFAULT_JOB_OWNER_PROFILE_ID) {
|
|
const row = await getProfileById(ownerProfileId);
|
|
if (row?.data) {
|
|
const parsed = jobSearchProfileSchema.safeParse(row.data);
|
|
if (parsed.success) {
|
|
searchProfileTargetRoles = parsed.data.targetRoles ?? [];
|
|
searchProfileMustHaveSkills = parsed.data.mustHaveSkills ?? [];
|
|
searchProfileDealBreakers = parsed.data.dealBreakers ?? [];
|
|
if (searchProfileTargetRoles.length > 0) {
|
|
mergeTargetRoles(searchProfileTargetRoles);
|
|
}
|
|
} else if (row.data.targetRoles?.length) {
|
|
// Legacy profile shapes: keep augmenting terms but we won't enforce strict filtering.
|
|
mergeTargetRoles(row.data.targetRoles);
|
|
}
|
|
}
|
|
} else {
|
|
const profileSetting = settings.jobSearchProfile;
|
|
if (profileSetting) {
|
|
try {
|
|
const profile = JSON.parse(profileSetting);
|
|
const parsed = jobSearchProfileSchema.safeParse(profile);
|
|
if (parsed.success) {
|
|
searchProfileTargetRoles = parsed.data.targetRoles ?? [];
|
|
searchProfileMustHaveSkills = parsed.data.mustHaveSkills ?? [];
|
|
searchProfileDealBreakers = parsed.data.dealBreakers ?? [];
|
|
if (searchProfileTargetRoles.length > 0) {
|
|
mergeTargetRoles(searchProfileTargetRoles);
|
|
}
|
|
} else if (
|
|
Array.isArray((profile as { targetRoles?: unknown }).targetRoles) &&
|
|
(profile as { targetRoles: unknown[] }).targetRoles.length > 0
|
|
) {
|
|
mergeTargetRoles((profile as { targetRoles: unknown }).targetRoles);
|
|
}
|
|
} catch {
|
|
// malformed profile JSON, continue with existing terms
|
|
}
|
|
}
|
|
}
|
|
|
|
const geographyCountryKey = inferCountryKeyFromSearchGeography(
|
|
settings.searchCities,
|
|
settings.jobspyLocation,
|
|
);
|
|
const configuredIndeedKey = settings.jobspyCountryIndeed?.trim()
|
|
? normalizeCountryKey(settings.jobspyCountryIndeed)
|
|
: null;
|
|
if (
|
|
geographyCountryKey &&
|
|
configuredIndeedKey &&
|
|
geographyCountryKey !== configuredIndeedKey
|
|
) {
|
|
logger.warn(
|
|
"Indeed country setting disagrees with country-level search geography; aligning JobSpy and source routing to geography",
|
|
{
|
|
step: "discover-jobs",
|
|
geographyCountryKey,
|
|
jobspyCountryIndeed: configuredIndeedKey,
|
|
},
|
|
);
|
|
}
|
|
|
|
const selectedCountry = normalizeCountryKey(
|
|
geographyCountryKey ??
|
|
settings.jobspyCountryIndeed ??
|
|
settings.searchCities ??
|
|
settings.jobspyLocation ??
|
|
"united kingdom",
|
|
);
|
|
|
|
const effectiveJobspyCountryIndeed =
|
|
geographyCountryKey ?? settings.jobspyCountryIndeed;
|
|
const compatibleSources = args.mergedConfig.sources.filter((source) =>
|
|
isSourceAllowedForCountry(source, selectedCountry),
|
|
);
|
|
let existingJobUrlsPromise: Promise<string[]> | null = null;
|
|
const getExistingJobUrls = (): Promise<string[]> => {
|
|
if (!existingJobUrlsPromise) {
|
|
existingJobUrlsPromise = getAllJobUrls(ownerProfileId);
|
|
}
|
|
return existingJobUrlsPromise;
|
|
};
|
|
const skippedSources = args.mergedConfig.sources.filter(
|
|
(source) => !compatibleSources.includes(source),
|
|
);
|
|
|
|
if (skippedSources.length > 0) {
|
|
logger.info("Skipping incompatible sources for selected country", {
|
|
step: "discover-jobs",
|
|
country: selectedCountry,
|
|
countryLabel: formatCountryLabel(selectedCountry),
|
|
requestedSources: args.mergedConfig.sources,
|
|
skippedSources,
|
|
});
|
|
}
|
|
|
|
if (args.mergedConfig.sources.length > 0 && compatibleSources.length === 0) {
|
|
throw new Error(
|
|
`No compatible sources for selected country: ${formatCountryLabel(selectedCountry)}`,
|
|
);
|
|
}
|
|
|
|
const groupedByManifest = new Map<
|
|
string,
|
|
{ sources: string[]; detail: string; termsTotal?: number }
|
|
>();
|
|
|
|
for (const source of compatibleSources) {
|
|
const manifest = registry.manifestBySource.get(source);
|
|
if (!manifest) {
|
|
sourceErrors.push(`${source}: extractor manifest not registered`);
|
|
continue;
|
|
}
|
|
|
|
const existing = groupedByManifest.get(manifest.id);
|
|
if (existing) {
|
|
existing.sources.push(source);
|
|
continue;
|
|
}
|
|
|
|
groupedByManifest.set(manifest.id, {
|
|
sources: [source],
|
|
termsTotal: searchTerms.length,
|
|
detail: `${manifest.displayName}: fetching jobs...`,
|
|
});
|
|
}
|
|
|
|
const sourceTasks: DiscoverySourceTask[] = [];
|
|
|
|
for (const [manifestId, grouped] of groupedByManifest) {
|
|
const manifest = registry.manifests.get(manifestId);
|
|
if (!manifest) continue;
|
|
|
|
sourceTasks.push({
|
|
source: manifest.id,
|
|
termsTotal: grouped.termsTotal,
|
|
detail:
|
|
grouped.sources.length > 1
|
|
? `${manifest.displayName}: ${grouped.sources.join(", ")}...`
|
|
: grouped.detail,
|
|
run: async () => {
|
|
const filteredSettings = Object.fromEntries(
|
|
Object.entries(settings).filter(
|
|
([, value]) =>
|
|
typeof value === "string" || typeof value === "undefined",
|
|
),
|
|
) as Record<string, string | undefined>;
|
|
|
|
if (effectiveJobspyCountryIndeed !== undefined) {
|
|
filteredSettings.jobspyCountryIndeed = effectiveJobspyCountryIndeed;
|
|
}
|
|
|
|
const result = await manifest.run({
|
|
source: grouped.sources[0],
|
|
selectedSources: grouped.sources,
|
|
settings: filteredSettings,
|
|
searchTerms,
|
|
selectedCountry,
|
|
getExistingJobUrls,
|
|
shouldCancel: args.shouldCancel,
|
|
onProgress: (event) => {
|
|
progressHelpers.crawlingUpdate({
|
|
source: manifest.id,
|
|
termsProcessed: event.termsProcessed,
|
|
termsTotal: event.termsTotal,
|
|
listPagesProcessed: event.listPagesProcessed,
|
|
listPagesTotal: event.listPagesTotal,
|
|
jobCardsFound: event.jobCardsFound,
|
|
jobPagesEnqueued: event.jobPagesEnqueued,
|
|
jobPagesSkipped: event.jobPagesSkipped,
|
|
jobPagesProcessed: event.jobPagesProcessed,
|
|
phase: event.phase,
|
|
currentUrl: event.currentUrl,
|
|
});
|
|
|
|
if (event.detail) {
|
|
updateProgress({
|
|
step: "crawling",
|
|
detail: event.detail,
|
|
});
|
|
}
|
|
},
|
|
});
|
|
|
|
if (!result.success) {
|
|
return {
|
|
discoveredJobs: [],
|
|
sourceErrors: [
|
|
`${manifest.displayName || manifest.id}: ${result.error ?? "unknown error"} (sources: ${grouped.sources.join(",")})`,
|
|
],
|
|
};
|
|
}
|
|
|
|
return {
|
|
discoveredJobs: result.jobs,
|
|
sourceErrors: [],
|
|
};
|
|
},
|
|
});
|
|
}
|
|
|
|
const totalSources = sourceTasks.length;
|
|
let completedSources = 0;
|
|
|
|
progressHelpers.startCrawling(totalSources);
|
|
|
|
if (args.shouldCancel?.()) {
|
|
return { discoveredJobs, sourceErrors };
|
|
}
|
|
|
|
const sourceResults = await asyncPool({
|
|
items: sourceTasks,
|
|
concurrency: DISCOVERY_CONCURRENCY,
|
|
shouldStop: args.shouldCancel,
|
|
onTaskStarted: (sourceTask) => {
|
|
progressHelpers.startSource(
|
|
sourceTask.source,
|
|
completedSources,
|
|
totalSources,
|
|
{
|
|
termsTotal: sourceTask.termsTotal,
|
|
detail: sourceTask.detail,
|
|
},
|
|
);
|
|
},
|
|
onTaskSettled: () => {
|
|
completedSources += 1;
|
|
progressHelpers.completeSource(completedSources, totalSources);
|
|
},
|
|
task: async (sourceTask) => {
|
|
try {
|
|
return await sourceTask.run();
|
|
} catch (error) {
|
|
logger.warn("Discovery source task failed", {
|
|
sourceTask: sourceTask.source,
|
|
error: sanitizeUnknown(error),
|
|
});
|
|
|
|
return {
|
|
discoveredJobs: [],
|
|
sourceErrors: [
|
|
`${sourceTask.source}: ${error instanceof Error ? error.message : "unknown error"}`,
|
|
],
|
|
};
|
|
}
|
|
},
|
|
});
|
|
|
|
for (const sourceResult of sourceResults) {
|
|
discoveredJobs.push(...sourceResult.discoveredJobs);
|
|
sourceErrors.push(...sourceResult.sourceErrors);
|
|
}
|
|
|
|
const requestedCities = resolveSearchCities({
|
|
single: settings.searchCities ?? settings.jobspyLocation,
|
|
});
|
|
const cityFilteredJobs = filterJobsByRequestedCities({
|
|
jobs: discoveredJobs,
|
|
selectedCountry,
|
|
requestedCities,
|
|
});
|
|
const cityFilteredOutCount = discoveredJobs.length - cityFilteredJobs.length;
|
|
|
|
if (cityFilteredOutCount > 0) {
|
|
logger.info("Dropped discovered jobs that did not match requested cities", {
|
|
step: "discover-jobs",
|
|
droppedCount: cityFilteredOutCount,
|
|
requestedCities,
|
|
selectedCountry,
|
|
});
|
|
}
|
|
|
|
const blockedCompanyKeywords = resolveBlockedCompanyKeywordsFromStoredString(
|
|
settings.blockedCompanyKeywords,
|
|
);
|
|
const blockedKeywordsLowerCase = blockedCompanyKeywords.map((value) =>
|
|
value.toLowerCase(),
|
|
);
|
|
const afterCompanyFilter = cityFilteredJobs.filter(
|
|
(job) => !isBlockedEmployer(job.employer, blockedKeywordsLowerCase),
|
|
);
|
|
const companyDroppedCount =
|
|
cityFilteredJobs.length - afterCompanyFilter.length;
|
|
|
|
if (companyDroppedCount > 0) {
|
|
const blockedCompanyKeywordsPreview = blockedCompanyKeywords.slice(0, 10);
|
|
const blockedCompanyKeywordsTruncated =
|
|
blockedCompanyKeywordsPreview.length < blockedCompanyKeywords.length;
|
|
|
|
logger.info("Dropped discovered jobs matching blocked company keywords", {
|
|
step: "discover-jobs",
|
|
droppedCount: companyDroppedCount,
|
|
blockedKeywordCount: blockedCompanyKeywords.length,
|
|
blockedCompanyKeywordsPreview,
|
|
blockedCompanyKeywordsTruncated,
|
|
});
|
|
|
|
logger.debug("Full blocked company keywords used for filtering", {
|
|
step: "discover-jobs",
|
|
blockedCompanyKeywords,
|
|
});
|
|
}
|
|
|
|
const blockedCountryKeys = resolveBlockedCountriesFromStoredString(
|
|
settings.blockedCountries,
|
|
);
|
|
const filteredDiscoveredJobs = afterCompanyFilter.filter(
|
|
(job) =>
|
|
!jobMatchesBlockedCountries(
|
|
{
|
|
location: job.location,
|
|
jobDescription: job.jobDescription,
|
|
title: job.title,
|
|
},
|
|
blockedCountryKeys,
|
|
),
|
|
);
|
|
const countryDroppedCount =
|
|
afterCompanyFilter.length - filteredDiscoveredJobs.length;
|
|
|
|
if (countryDroppedCount > 0) {
|
|
const blockedCountriesPreview = blockedCountryKeys.slice(0, 10);
|
|
const blockedCountriesTruncated =
|
|
blockedCountriesPreview.length < blockedCountryKeys.length;
|
|
|
|
logger.info("Dropped discovered jobs in blocked countries", {
|
|
step: "discover-jobs",
|
|
droppedCount: countryDroppedCount,
|
|
blockedCountryCount: blockedCountryKeys.length,
|
|
blockedCountriesPreview,
|
|
blockedCountriesTruncated,
|
|
});
|
|
|
|
logger.debug("Full blocked countries used for filtering", {
|
|
step: "discover-jobs",
|
|
blockedCountryKeys,
|
|
});
|
|
}
|
|
|
|
if (args.shouldCancel?.()) {
|
|
return { discoveredJobs: filteredDiscoveredJobs, sourceErrors };
|
|
}
|
|
|
|
const strictProfileFilteringEnabled =
|
|
searchProfileTargetRoles.length > 0 ||
|
|
searchProfileMustHaveSkills.length > 0 ||
|
|
searchProfileDealBreakers.length > 0;
|
|
const profileFiltered = strictProfileFilteringEnabled
|
|
? filterJobsBySearchProfile({
|
|
jobs: filteredDiscoveredJobs,
|
|
targetRolePhrases: searchProfileTargetRoles.length
|
|
? searchProfileTargetRoles
|
|
: searchTerms,
|
|
mustHaveSkills: searchProfileMustHaveSkills,
|
|
dealBreakers: searchProfileDealBreakers,
|
|
})
|
|
: { jobs: filteredDiscoveredJobs, dropped: 0 };
|
|
|
|
if (profileFiltered.dropped > 0) {
|
|
logger.info("Dropped discovered jobs that didn't match search profile", {
|
|
step: "discover-jobs",
|
|
droppedCount: profileFiltered.dropped,
|
|
targetRolesCount: searchProfileTargetRoles.length,
|
|
mustHaveSkillsCount: searchProfileMustHaveSkills.length,
|
|
dealBreakersCount: searchProfileDealBreakers.length,
|
|
});
|
|
}
|
|
|
|
if (profileFiltered.jobs.length === 0 && sourceErrors.length > 0) {
|
|
throw new Error(`All sources failed: ${sourceErrors.join("; ")}`);
|
|
}
|
|
|
|
if (sourceErrors.length > 0) {
|
|
logger.warn("Some discovery sources failed", { sourceErrors });
|
|
}
|
|
|
|
progressHelpers.crawlingComplete(profileFiltered.jobs.length);
|
|
|
|
const stamped = profileFiltered.jobs.map((job) => ({
|
|
...job,
|
|
ownerProfileId,
|
|
}));
|
|
|
|
return { discoveredJobs: stamped, sourceErrors };
|
|
}
|