keywords can be set from UI
This commit is contained in:
parent
bd7baafbec
commit
572cb1d42d
@ -17,11 +17,30 @@ const locations = [
|
||||
];
|
||||
|
||||
// roles
|
||||
const roles = [
|
||||
const defaultRoles = [
|
||||
"web-development",
|
||||
"software-systems",
|
||||
];
|
||||
|
||||
let roles = defaultRoles;
|
||||
const envRolesRaw = process.env.GRADCRACKER_SEARCH_TERMS;
|
||||
|
||||
if (envRolesRaw) {
|
||||
try {
|
||||
const parsed = JSON.parse(envRolesRaw) as string[];
|
||||
if (Array.isArray(parsed) && parsed.length > 0) {
|
||||
roles = parsed.map(term =>
|
||||
term.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, '-')
|
||||
.replace(/^-+|-+$/g, '')
|
||||
);
|
||||
console.log(`Using configured search terms: ${roles.join(', ')}`);
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn('Failed to parse GRADCRACKER_SEARCH_TERMS', e);
|
||||
}
|
||||
}
|
||||
|
||||
// combo of locations and roles
|
||||
const gradcrackerUrls = locations.flatMap((location) => {
|
||||
return roles.map((role) => {
|
||||
|
||||
@ -106,6 +106,7 @@ export async function updateSettings(update: {
|
||||
jobCompleteWebhookUrl?: string | null
|
||||
resumeProjects?: ResumeProjectsSettings | null
|
||||
ukvisajobsMaxJobs?: number | null
|
||||
searchTerms?: string[] | null
|
||||
}): Promise<AppSettings> {
|
||||
return fetchApi<AppSettings>('/settings', {
|
||||
method: 'PATCH',
|
||||
|
||||
@ -43,6 +43,7 @@ export const SettingsPage: React.FC = () => {
|
||||
const [jobCompleteWebhookUrlDraft, setJobCompleteWebhookUrlDraft] = useState("")
|
||||
const [resumeProjectsDraft, setResumeProjectsDraft] = useState<ResumeProjectsSettings | null>(null)
|
||||
const [ukvisajobsMaxJobsDraft, setUkvisajobsMaxJobsDraft] = useState<number | null>(null)
|
||||
const [searchTermsDraft, setSearchTermsDraft] = useState<string[] | null>(null)
|
||||
const [isSaving, setIsSaving] = useState(false)
|
||||
const [isLoading, setIsLoading] = useState(true)
|
||||
|
||||
@ -59,6 +60,7 @@ export const SettingsPage: React.FC = () => {
|
||||
setJobCompleteWebhookUrlDraft(data.overrideJobCompleteWebhookUrl ?? "")
|
||||
setResumeProjectsDraft(data.resumeProjects)
|
||||
setUkvisajobsMaxJobsDraft(data.overrideUkvisajobsMaxJobs)
|
||||
setSearchTermsDraft(data.overrideSearchTerms)
|
||||
})
|
||||
.catch((error) => {
|
||||
const message = error instanceof Error ? error.message : "Failed to load settings"
|
||||
@ -86,6 +88,9 @@ export const SettingsPage: React.FC = () => {
|
||||
const effectiveUkvisajobsMaxJobs = settings?.ukvisajobsMaxJobs ?? 50
|
||||
const defaultUkvisajobsMaxJobs = settings?.defaultUkvisajobsMaxJobs ?? 50
|
||||
const overrideUkvisajobsMaxJobs = settings?.overrideUkvisajobsMaxJobs
|
||||
const effectiveSearchTerms = settings?.searchTerms ?? []
|
||||
const defaultSearchTerms = settings?.defaultSearchTerms ?? []
|
||||
const overrideSearchTerms = settings?.overrideSearchTerms
|
||||
const profileProjects = settings?.profileProjects ?? []
|
||||
const maxProjectsTotal = profileProjects.length
|
||||
const lockedCount = resumeProjectsDraft?.lockedProjectIds.length ?? 0
|
||||
@ -99,12 +104,14 @@ export const SettingsPage: React.FC = () => {
|
||||
const nextJobCompleteWebhook = jobCompleteWebhookUrlDraft.trim()
|
||||
const currentJobCompleteWebhook = (overrideJobCompleteWebhookUrl ?? "").trim()
|
||||
const ukvisajobsChanged = ukvisajobsMaxJobsDraft !== (overrideUkvisajobsMaxJobs ?? null)
|
||||
const searchTermsChanged = JSON.stringify(searchTermsDraft) !== JSON.stringify(overrideSearchTerms ?? null)
|
||||
return (
|
||||
next !== current ||
|
||||
nextWebhook !== currentWebhook ||
|
||||
nextJobCompleteWebhook !== currentJobCompleteWebhook ||
|
||||
!resumeProjectsEqual(resumeProjectsDraft, settings.resumeProjects) ||
|
||||
ukvisajobsChanged
|
||||
ukvisajobsChanged ||
|
||||
searchTermsChanged
|
||||
)
|
||||
}, [
|
||||
settings,
|
||||
@ -117,6 +124,8 @@ export const SettingsPage: React.FC = () => {
|
||||
resumeProjectsDraft,
|
||||
ukvisajobsMaxJobsDraft,
|
||||
overrideUkvisajobsMaxJobs,
|
||||
searchTermsDraft,
|
||||
overrideSearchTerms,
|
||||
])
|
||||
|
||||
const handleSave = async () => {
|
||||
@ -130,12 +139,14 @@ export const SettingsPage: React.FC = () => {
|
||||
? null
|
||||
: resumeProjectsDraft
|
||||
const ukvisajobsMaxJobsOverride = ukvisajobsMaxJobsDraft === defaultUkvisajobsMaxJobs ? null : ukvisajobsMaxJobsDraft
|
||||
const searchTermsOverride = arraysEqual(searchTermsDraft ?? [], defaultSearchTerms) ? null : searchTermsDraft
|
||||
const updated = await api.updateSettings({
|
||||
model: trimmed.length > 0 ? trimmed : null,
|
||||
pipelineWebhookUrl: webhookTrimmed.length > 0 ? webhookTrimmed : null,
|
||||
jobCompleteWebhookUrl: jobCompleteTrimmed.length > 0 ? jobCompleteTrimmed : null,
|
||||
resumeProjects: resumeProjectsOverride,
|
||||
ukvisajobsMaxJobs: ukvisajobsMaxJobsOverride,
|
||||
searchTerms: searchTermsOverride,
|
||||
})
|
||||
setSettings(updated)
|
||||
setModelDraft(updated.overrideModel ?? "")
|
||||
@ -143,6 +154,7 @@ export const SettingsPage: React.FC = () => {
|
||||
setJobCompleteWebhookUrlDraft(updated.overrideJobCompleteWebhookUrl ?? "")
|
||||
setResumeProjectsDraft(updated.resumeProjects)
|
||||
setUkvisajobsMaxJobsDraft(updated.overrideUkvisajobsMaxJobs)
|
||||
setSearchTermsDraft(updated.overrideSearchTerms)
|
||||
toast.success("Settings saved")
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : "Failed to save settings"
|
||||
@ -161,6 +173,7 @@ export const SettingsPage: React.FC = () => {
|
||||
jobCompleteWebhookUrl: null,
|
||||
resumeProjects: null,
|
||||
ukvisajobsMaxJobs: null,
|
||||
searchTerms: null,
|
||||
})
|
||||
setSettings(updated)
|
||||
setModelDraft("")
|
||||
@ -168,6 +181,7 @@ export const SettingsPage: React.FC = () => {
|
||||
setJobCompleteWebhookUrlDraft("")
|
||||
setResumeProjectsDraft(updated.resumeProjects)
|
||||
setUkvisajobsMaxJobsDraft(null)
|
||||
setSearchTermsDraft(null)
|
||||
toast.success("Reset to default")
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : "Failed to reset settings"
|
||||
@ -330,6 +344,52 @@ export const SettingsPage: React.FC = () => {
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle className="text-base">Search Terms</CardTitle>
|
||||
</CardHeader>
|
||||
|
||||
<CardContent className="space-y-4">
|
||||
<div className="space-y-2">
|
||||
<div className="text-sm font-medium">Global search terms</div>
|
||||
<textarea
|
||||
className="flex min-h-[80px] w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50"
|
||||
value={searchTermsDraft ? searchTermsDraft.join('\n') : (defaultSearchTerms ?? []).join('\n')}
|
||||
onChange={(event) => {
|
||||
const text = event.target.value
|
||||
const terms = text.split('\n') // Don't filter here to allow empty lines while typing
|
||||
setSearchTermsDraft(terms)
|
||||
}}
|
||||
onBlur={() => {
|
||||
// Clean up on blur
|
||||
if (searchTermsDraft) {
|
||||
setSearchTermsDraft(searchTermsDraft.map(t => t.trim()).filter(Boolean))
|
||||
}
|
||||
}}
|
||||
placeholder="e.g. web developer"
|
||||
disabled={isLoading || isSaving}
|
||||
rows={5}
|
||||
/>
|
||||
<div className="text-xs text-muted-foreground">
|
||||
One term per line. Applies to UKVisaJobs and other supported extractors.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<Separator />
|
||||
|
||||
<div className="grid gap-2 text-sm sm:grid-cols-2">
|
||||
<div>
|
||||
<div className="text-xs text-muted-foreground">Effective</div>
|
||||
<div className="break-words font-mono text-xs">{(effectiveSearchTerms || []).join(', ') || "—"}</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-xs text-muted-foreground">Default (env)</div>
|
||||
<div className="break-words font-mono text-xs">{(defaultSearchTerms || []).join(', ') || "—"}</div>
|
||||
</div>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle className="text-base">Resume Projects</CardTitle>
|
||||
|
||||
@ -243,6 +243,13 @@ apiRouter.get('/settings', async (_req: Request, res: Response) => {
|
||||
const overrideUkvisajobsMaxJobs = overrideUkvisajobsMaxJobsRaw ? parseInt(overrideUkvisajobsMaxJobsRaw, 10) : null;
|
||||
const ukvisajobsMaxJobs = overrideUkvisajobsMaxJobs ?? defaultUkvisajobsMaxJobs;
|
||||
|
||||
// Search terms - stored as JSON array, default from env var (pipe-separated)
|
||||
const overrideSearchTermsRaw = await settingsRepo.getSetting('searchTerms');
|
||||
const defaultSearchTermsEnv = process.env.JOBSPY_SEARCH_TERMS || 'web developer';
|
||||
const defaultSearchTerms = defaultSearchTermsEnv.split('|').map(s => s.trim()).filter(Boolean);
|
||||
const overrideSearchTerms = overrideSearchTermsRaw ? JSON.parse(overrideSearchTermsRaw) as string[] : null;
|
||||
const searchTerms = overrideSearchTerms ?? defaultSearchTerms;
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
@ -259,6 +266,9 @@ apiRouter.get('/settings', async (_req: Request, res: Response) => {
|
||||
ukvisajobsMaxJobs,
|
||||
defaultUkvisajobsMaxJobs,
|
||||
overrideUkvisajobsMaxJobs,
|
||||
searchTerms,
|
||||
defaultSearchTerms,
|
||||
overrideSearchTerms,
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
@ -277,6 +287,7 @@ const updateSettingsSchema = z.object({
|
||||
aiSelectableProjectIds: z.array(z.string().trim().min(1)).max(200),
|
||||
}).nullable().optional(),
|
||||
ukvisajobsMaxJobs: z.number().int().min(1).max(200).nullable().optional(),
|
||||
searchTerms: z.array(z.string().trim().min(1).max(200)).max(50).nullable().optional(),
|
||||
});
|
||||
|
||||
/**
|
||||
@ -320,6 +331,11 @@ apiRouter.patch('/settings', async (req: Request, res: Response) => {
|
||||
await settingsRepo.setSetting('ukvisajobsMaxJobs', ukvisajobsMaxJobs !== null ? String(ukvisajobsMaxJobs) : null);
|
||||
}
|
||||
|
||||
if ('searchTerms' in input) {
|
||||
const searchTerms = input.searchTerms ?? null;
|
||||
await settingsRepo.setSetting('searchTerms', searchTerms !== null ? JSON.stringify(searchTerms) : null);
|
||||
}
|
||||
|
||||
const overrideModel = await settingsRepo.getSetting('model');
|
||||
const defaultModel = process.env.MODEL || 'openai/gpt-4o-mini';
|
||||
const model = overrideModel || defaultModel;
|
||||
@ -342,6 +358,13 @@ apiRouter.patch('/settings', async (req: Request, res: Response) => {
|
||||
const overrideUkvisajobsMaxJobs = overrideUkvisajobsMaxJobsRaw ? parseInt(overrideUkvisajobsMaxJobsRaw, 10) : null;
|
||||
const ukvisajobsMaxJobs = overrideUkvisajobsMaxJobs ?? defaultUkvisajobsMaxJobs;
|
||||
|
||||
// Search terms - stored as JSON array, default from env var (pipe-separated)
|
||||
const overrideSearchTermsRaw = await settingsRepo.getSetting('searchTerms');
|
||||
const defaultSearchTermsEnv = process.env.JOBSPY_SEARCH_TERMS || 'web developer';
|
||||
const defaultSearchTerms = defaultSearchTermsEnv.split('|').map(s => s.trim()).filter(Boolean);
|
||||
const overrideSearchTerms = overrideSearchTermsRaw ? JSON.parse(overrideSearchTermsRaw) as string[] : null;
|
||||
const searchTerms = overrideSearchTerms ?? defaultSearchTerms;
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
@ -358,6 +381,9 @@ apiRouter.patch('/settings', async (req: Request, res: Response) => {
|
||||
ukvisajobsMaxJobs,
|
||||
defaultUkvisajobsMaxJobs,
|
||||
overrideUkvisajobsMaxJobs,
|
||||
searchTerms,
|
||||
defaultSearchTerms,
|
||||
overrideSearchTerms,
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
|
||||
@ -108,35 +108,22 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
|
||||
// Step 2: Run crawler
|
||||
console.log('\n🕷️ Running crawler...');
|
||||
progressHelpers.startCrawling();
|
||||
const existingJobUrls = await jobsRepo.getAllJobUrls();
|
||||
|
||||
const discoveredJobs: CreateJobInput[] = [];
|
||||
const sourceErrors: string[] = [];
|
||||
|
||||
if (mergedConfig.sources.includes('gradcracker')) {
|
||||
const crawlerResult = await runCrawler({
|
||||
existingJobUrls,
|
||||
onProgress: (update) => {
|
||||
progressHelpers.crawlingUpdate({
|
||||
listPagesProcessed: update.listPagesProcessed,
|
||||
listPagesTotal: update.listPagesTotal,
|
||||
jobCardsFound: update.jobCardsFound,
|
||||
jobPagesEnqueued: update.jobPagesEnqueued,
|
||||
jobPagesSkipped: update.jobPagesSkipped,
|
||||
jobPagesProcessed: update.jobPagesProcessed,
|
||||
phase: update.phase,
|
||||
currentUrl: update.currentUrl,
|
||||
});
|
||||
},
|
||||
});
|
||||
// Read search terms setting
|
||||
const searchTermsSetting = await settingsRepo.getSetting('searchTerms');
|
||||
let searchTerms: string[] = [];
|
||||
|
||||
if (!crawlerResult.success) {
|
||||
sourceErrors.push(`gradcracker: ${crawlerResult.error ?? 'unknown error'}`);
|
||||
} else {
|
||||
discoveredJobs.push(...crawlerResult.jobs);
|
||||
}
|
||||
if (searchTermsSetting) {
|
||||
searchTerms = JSON.parse(searchTermsSetting) as string[];
|
||||
} else {
|
||||
// Default from env var
|
||||
const defaultSearchTermsEnv = process.env.JOBSPY_SEARCH_TERMS || 'web developer';
|
||||
searchTerms = defaultSearchTermsEnv.split('|').map(s => s.trim()).filter(Boolean);
|
||||
}
|
||||
|
||||
// Run JobSpy (Indeed/LinkedIn) if selected
|
||||
const jobSpySites = mergedConfig.sources.filter(
|
||||
(s): s is 'indeed' | 'linkedin' => s === 'indeed' || s === 'linkedin'
|
||||
);
|
||||
@ -147,7 +134,10 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
|
||||
detail: `JobSpy: scraping ${jobSpySites.join(', ')}...`,
|
||||
});
|
||||
|
||||
const jobSpyResult = await runJobSpy({ sites: jobSpySites });
|
||||
const jobSpyResult = await runJobSpy({
|
||||
sites: jobSpySites,
|
||||
searchTerms,
|
||||
});
|
||||
if (!jobSpyResult.success) {
|
||||
sourceErrors.push(`jobspy: ${jobSpyResult.error ?? 'unknown error'}`);
|
||||
} else {
|
||||
@ -155,6 +145,39 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
|
||||
}
|
||||
}
|
||||
|
||||
// Run Gradcracker crawler if selected
|
||||
if (mergedConfig.sources.includes('gradcracker')) {
|
||||
updateProgress({
|
||||
step: 'crawling',
|
||||
detail: 'Gradcracker: scraping...',
|
||||
});
|
||||
|
||||
// Pass existing URLs to avoid clicking "Apply" on jobs we already have
|
||||
const existingJobUrls = await jobsRepo.getAllJobUrls();
|
||||
|
||||
const crawlerResult = await runCrawler({
|
||||
existingJobUrls,
|
||||
searchTerms,
|
||||
onProgress: (progress) => {
|
||||
// Calculate overall progress based on list pages processed vs total
|
||||
// This is rough but better than nothing
|
||||
if (progress.listPagesTotal && progress.listPagesTotal > 0) {
|
||||
const percent = Math.round((progress.listPagesProcessed ?? 0) / progress.listPagesTotal * 100);
|
||||
updateProgress({
|
||||
step: 'crawling',
|
||||
detail: `Gradcracker: ${percent}% (scan ${progress.listPagesProcessed}/${progress.listPagesTotal}, found ${progress.jobCardsFound})`,
|
||||
});
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
if (!crawlerResult.success) {
|
||||
sourceErrors.push(`gradcracker: ${crawlerResult.error ?? 'unknown error'}`);
|
||||
} else {
|
||||
discoveredJobs.push(...crawlerResult.jobs);
|
||||
}
|
||||
}
|
||||
|
||||
// Run UKVisaJobs extractor if selected
|
||||
if (mergedConfig.sources.includes('ukvisajobs')) {
|
||||
updateProgress({
|
||||
@ -166,7 +189,10 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
|
||||
const ukvisajobsMaxJobsSetting = await settingsRepo.getSetting('ukvisajobsMaxJobs');
|
||||
const ukvisajobsMaxJobs = ukvisajobsMaxJobsSetting ? parseInt(ukvisajobsMaxJobsSetting, 10) : 50;
|
||||
|
||||
const ukVisaResult = await runUkVisaJobs({ maxJobs: ukvisajobsMaxJobs });
|
||||
const ukVisaResult = await runUkVisaJobs({
|
||||
maxJobs: ukvisajobsMaxJobs,
|
||||
searchTerms,
|
||||
});
|
||||
if (!ukVisaResult.success) {
|
||||
sourceErrors.push(`ukvisajobs: ${ukVisaResult.error ?? 'unknown error'}`);
|
||||
} else {
|
||||
|
||||
@ -12,6 +12,7 @@ export type SettingKey = 'model'
|
||||
| 'jobCompleteWebhookUrl'
|
||||
| 'resumeProjects'
|
||||
| 'ukvisajobsMaxJobs'
|
||||
| 'searchTerms'
|
||||
|
||||
export async function getSetting(key: SettingKey): Promise<string | null> {
|
||||
const [row] = await db.select().from(settings).where(eq(settings.key, key))
|
||||
|
||||
@ -32,6 +32,11 @@ export interface RunCrawlerOptions {
|
||||
* Optional callback for live crawl progress emitted by the Gradcracker extractor.
|
||||
*/
|
||||
onProgress?: (update: JobExtractorProgress) => void;
|
||||
|
||||
/**
|
||||
* List of search terms to be used as roles for URL generation.
|
||||
*/
|
||||
searchTerms?: string[];
|
||||
}
|
||||
|
||||
interface JobExtractorProgress {
|
||||
@ -61,13 +66,13 @@ async function writeExistingJobUrlsFile(existingJobUrls: string[] | undefined):
|
||||
*/
|
||||
export async function runCrawler(options: RunCrawlerOptions = {}): Promise<CrawlerResult> {
|
||||
console.log('🕷️ Starting job crawler...');
|
||||
|
||||
|
||||
try {
|
||||
// Clear previous results
|
||||
await clearStorageDataset();
|
||||
|
||||
const existingJobUrlsFile = await writeExistingJobUrlsFile(options.existingJobUrls);
|
||||
|
||||
|
||||
// Run the crawler
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const child = spawn('npm', ['run', 'start'], {
|
||||
@ -78,6 +83,7 @@ export async function runCrawler(options: RunCrawlerOptions = {}): Promise<Crawl
|
||||
...process.env,
|
||||
JOBOPS_SKIP_APPLY_FOR_EXISTING: '1',
|
||||
JOBOPS_EMIT_PROGRESS: '1',
|
||||
GRADCRACKER_SEARCH_TERMS: options.searchTerms ? JSON.stringify(options.searchTerms) : '',
|
||||
...(existingJobUrlsFile ? { JOBOPS_EXISTING_JOB_URLS_FILE: existingJobUrlsFile } : {}),
|
||||
},
|
||||
});
|
||||
@ -101,7 +107,7 @@ export async function runCrawler(options: RunCrawlerOptions = {}): Promise<Crawl
|
||||
|
||||
stdoutRl?.on('line', (line) => handleLine(line, process.stdout));
|
||||
stderrRl?.on('line', (line) => handleLine(line, process.stderr));
|
||||
|
||||
|
||||
child.on('close', (code) => {
|
||||
stdoutRl?.close();
|
||||
stderrRl?.close();
|
||||
@ -111,15 +117,15 @@ export async function runCrawler(options: RunCrawlerOptions = {}): Promise<Crawl
|
||||
reject(new Error(`Crawler exited with code ${code}`));
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
child.on('error', reject);
|
||||
});
|
||||
|
||||
|
||||
// Read crawled jobs from storage
|
||||
const jobs = await readCrawledJobs();
|
||||
|
||||
|
||||
console.log(`✅ Crawler completed. Found ${jobs.length} jobs.`);
|
||||
|
||||
|
||||
return { success: true, jobs };
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
@ -135,13 +141,13 @@ async function readCrawledJobs(): Promise<CreateJobInput[]> {
|
||||
try {
|
||||
const files = await readdir(STORAGE_DIR);
|
||||
const jsonFiles = files.filter(f => f.endsWith('.json'));
|
||||
|
||||
|
||||
const jobs: CreateJobInput[] = [];
|
||||
|
||||
|
||||
for (const file of jsonFiles) {
|
||||
const content = await readFile(join(STORAGE_DIR, file), 'utf-8');
|
||||
const data = JSON.parse(content);
|
||||
|
||||
|
||||
// Map crawler output to our job input format
|
||||
jobs.push({
|
||||
source: 'gradcracker',
|
||||
@ -159,7 +165,7 @@ async function readCrawledJobs(): Promise<CreateJobInput[]> {
|
||||
jobDescription: data.jobDescription,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
return jobs;
|
||||
} catch (error) {
|
||||
console.error('Failed to read crawled jobs:', error);
|
||||
|
||||
@ -15,10 +15,12 @@ const UKVISAJOBS_DIR = join(__dirname, '../../../../extractors/ukvisajobs');
|
||||
const STORAGE_DIR = join(UKVISAJOBS_DIR, 'storage/datasets/default');
|
||||
|
||||
export interface RunUkVisaJobsOptions {
|
||||
/** Maximum number of jobs to fetch. Defaults to 50, max 200. */
|
||||
/** Maximum number of jobs to fetch per search term. Defaults to 50, max 200. */
|
||||
maxJobs?: number;
|
||||
/** Search keyword filter (optional) */
|
||||
/** Search keyword filter (single) - legacy support */
|
||||
searchKeyword?: string;
|
||||
/** List of search terms to run sequentially */
|
||||
searchTerms?: string[];
|
||||
}
|
||||
|
||||
export interface UkVisaJobsResult {
|
||||
@ -38,46 +40,76 @@ async function clearStorageDataset(): Promise<void> {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the UK Visa Jobs extractor.
|
||||
*/
|
||||
export async function runUkVisaJobs(options: RunUkVisaJobsOptions = {}): Promise<UkVisaJobsResult> {
|
||||
console.log('🇬🇧 Running UK Visa Jobs extractor...');
|
||||
|
||||
try {
|
||||
// Clear previous results
|
||||
await clearStorageDataset();
|
||||
await mkdir(STORAGE_DIR, { recursive: true });
|
||||
|
||||
// Run the extractor using npx tsx directly (more reliable in Docker/different environments)
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const child = spawn('npx', ['tsx', 'src/main.ts'], {
|
||||
cwd: UKVISAJOBS_DIR,
|
||||
stdio: 'inherit',
|
||||
env: {
|
||||
...process.env,
|
||||
UKVISAJOBS_MAX_JOBS: String(options.maxJobs ?? 50),
|
||||
UKVISAJOBS_SEARCH_KEYWORD: options.searchKeyword ?? '',
|
||||
},
|
||||
});
|
||||
|
||||
child.on('close', (code) => {
|
||||
if (code === 0) resolve();
|
||||
else reject(new Error(`UK Visa Jobs extractor exited with code ${code}`));
|
||||
});
|
||||
child.on('error', reject);
|
||||
});
|
||||
|
||||
// Read the output dataset
|
||||
const jobs = await readDataset();
|
||||
console.log(`✅ UK Visa Jobs: imported ${jobs.length} jobs`);
|
||||
|
||||
return { success: true, jobs };
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
console.error(`❌ UK Visa Jobs failed: ${message}`);
|
||||
return { success: false, jobs: [], error: message };
|
||||
// Determine terms to run
|
||||
const terms: string[] = [];
|
||||
if (options.searchTerms && options.searchTerms.length > 0) {
|
||||
terms.push(...options.searchTerms);
|
||||
} else if (options.searchKeyword) {
|
||||
terms.push(options.searchKeyword);
|
||||
} else {
|
||||
// No search terms = run once without keyword
|
||||
terms.push('');
|
||||
}
|
||||
|
||||
const allJobs: CreateJobInput[] = [];
|
||||
const seenIds = new Set<string>();
|
||||
|
||||
for (const term of terms) {
|
||||
const termLabel = term ? `"${term}"` : 'all jobs';
|
||||
console.log(` Running for ${termLabel}...`);
|
||||
|
||||
try {
|
||||
// Clear previous results for this run
|
||||
await clearStorageDataset();
|
||||
await mkdir(STORAGE_DIR, { recursive: true });
|
||||
|
||||
// Run the extractor
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const child = spawn('npx', ['tsx', 'src/main.ts'], {
|
||||
cwd: UKVISAJOBS_DIR,
|
||||
stdio: 'inherit',
|
||||
env: {
|
||||
...process.env,
|
||||
UKVISAJOBS_MAX_JOBS: String(options.maxJobs ?? 50),
|
||||
UKVISAJOBS_SEARCH_KEYWORD: term,
|
||||
},
|
||||
});
|
||||
|
||||
child.on('close', (code) => {
|
||||
if (code === 0) resolve();
|
||||
else reject(new Error(`UK Visa Jobs extractor exited with code ${code}`));
|
||||
});
|
||||
child.on('error', reject);
|
||||
});
|
||||
|
||||
// Read the output dataset and accumulate
|
||||
const runJobs = await readDataset();
|
||||
let newCount = 0;
|
||||
|
||||
for (const job of runJobs) {
|
||||
// Deduplicate by sourceJobId or jobUrl
|
||||
const id = job.sourceJobId || job.jobUrl;
|
||||
if (!seenIds.has(id)) {
|
||||
seenIds.add(id);
|
||||
allJobs.push(job);
|
||||
newCount++;
|
||||
}
|
||||
}
|
||||
|
||||
console.log(` ✅ Fetched ${runJobs.length} jobs for ${termLabel} (${newCount} new unique)`);
|
||||
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'Unknown error';
|
||||
console.error(`❌ UK Visa Jobs failed for ${termLabel}: ${message}`);
|
||||
// Continue to next term instead of failing completely
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`✅ UK Visa Jobs: imported total ${allJobs.length} unique jobs`);
|
||||
return { success: true, jobs: allJobs };
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -204,4 +204,7 @@ export interface AppSettings {
|
||||
ukvisajobsMaxJobs: number;
|
||||
defaultUkvisajobsMaxJobs: number;
|
||||
overrideUkvisajobsMaxJobs: number | null;
|
||||
searchTerms: string[];
|
||||
defaultSearchTerms: string[];
|
||||
overrideSearchTerms: string[] | null;
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user