gradcracker limits
This commit is contained in:
parent
8f278a228a
commit
2cf9249159
@ -44,15 +44,18 @@ if (envRolesRaw) {
|
||||
// combo of locations and roles
|
||||
const gradcrackerUrls = locations.flatMap((location) => {
|
||||
return roles.map((role) => {
|
||||
return `https://www.gradcracker.com/search/computing-technology/${role}-graduate-jobs-in-${location}?order=dateAdded`;
|
||||
return {
|
||||
url: `https://www.gradcracker.com/search/computing-technology/${role}-graduate-jobs-in-${location}?order=dateAdded`,
|
||||
role
|
||||
};
|
||||
});
|
||||
});
|
||||
|
||||
console.log(`Total gradcracker URLs: ${gradcrackerUrls.length}`)
|
||||
|
||||
const startUrls = gradcrackerUrls.map((url) => ({
|
||||
const startUrls = gradcrackerUrls.map(({ url, role }) => ({
|
||||
url,
|
||||
userData: { label: "gradcracker-list-page" },
|
||||
userData: { label: "gradcracker-list-page", role },
|
||||
}));
|
||||
|
||||
initJobOpsProgress(startUrls.length);
|
||||
|
||||
@ -44,6 +44,10 @@ function getExistingJobUrlSet(): Set<string> {
|
||||
const SKIP_APPLY_FOR_EXISTING = process.env.JOBOPS_SKIP_APPLY_FOR_EXISTING === "1";
|
||||
const EXISTING_JOB_URLS = getExistingJobUrlSet();
|
||||
|
||||
// Global counters for max jobs per search term
|
||||
const jobCounts = new Map<string, number>();
|
||||
const MAX_JOBS_PER_TERM = parseInt(process.env.GRADCRACKER_MAX_JOBS_PER_TERM || "0", 10);
|
||||
|
||||
interface Job {
|
||||
title: string | null;
|
||||
jobUrl: string | null;
|
||||
@ -62,7 +66,22 @@ export const router = createPlaywrightRouter();
|
||||
router.addHandler(
|
||||
"gradcracker-list-page",
|
||||
async ({ page, request, enqueueLinks }) => {
|
||||
log.info(`Processing: ${request.url}`);
|
||||
const { role } = request.userData;
|
||||
log.info(`Processing: ${request.url} (Role: ${role})`);
|
||||
|
||||
if (MAX_JOBS_PER_TERM > 0) {
|
||||
const currentCount = jobCounts.get(role) || 0;
|
||||
if (currentCount >= MAX_JOBS_PER_TERM) {
|
||||
log.info(`Max jobs (${MAX_JOBS_PER_TERM}) already enqueued for role "${role}". Skipping list page.`);
|
||||
markListPageDone({
|
||||
currentUrl: request.url,
|
||||
jobCardsFound: 0,
|
||||
jobPagesEnqueued: 0,
|
||||
jobPagesSkipped: 0,
|
||||
});
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Wait until the job cards are rendered
|
||||
await page.waitForSelector("article[wire\\:key]", { timeout: 10000 });
|
||||
@ -172,6 +191,16 @@ router.addHandler(
|
||||
if (isKnownJob) {
|
||||
skippedKnownJobs++;
|
||||
} else {
|
||||
// Check if we reached the limit for this search term
|
||||
if (MAX_JOBS_PER_TERM > 0) {
|
||||
const currentCount = jobCounts.get(role) || 0;
|
||||
if (currentCount >= MAX_JOBS_PER_TERM) {
|
||||
log.info(`Reached max jobs limit (${MAX_JOBS_PER_TERM}) for role "${role}" while processing list. Stopping.`);
|
||||
break;
|
||||
}
|
||||
jobCounts.set(role, currentCount + 1);
|
||||
}
|
||||
|
||||
await enqueueLinks({
|
||||
urls: [jobUrl],
|
||||
userData: {
|
||||
|
||||
@ -154,6 +154,7 @@ export async function updateSettings(update: {
|
||||
jobCompleteWebhookUrl?: string | null
|
||||
resumeProjects?: ResumeProjectsSettings | null
|
||||
ukvisajobsMaxJobs?: number | null
|
||||
gradcrackerMaxJobsPerTerm?: number | null
|
||||
searchTerms?: string[] | null
|
||||
jobspyLocation?: string | null
|
||||
jobspyResultsWanted?: number | null
|
||||
|
||||
@ -78,6 +78,7 @@ export const SettingsPage: React.FC = () => {
|
||||
const [jobCompleteWebhookUrlDraft, setJobCompleteWebhookUrlDraft] = useState("")
|
||||
const [resumeProjectsDraft, setResumeProjectsDraft] = useState<ResumeProjectsSettings | null>(null)
|
||||
const [ukvisajobsMaxJobsDraft, setUkvisajobsMaxJobsDraft] = useState<number | null>(null)
|
||||
const [gradcrackerMaxJobsPerTermDraft, setGradcrackerMaxJobsPerTermDraft] = useState<number | null>(null)
|
||||
const [searchTermsDraft, setSearchTermsDraft] = useState<string[] | null>(null)
|
||||
const [jobspyLocationDraft, setJobspyLocationDraft] = useState<string | null>(null)
|
||||
const [jobspyResultsWantedDraft, setJobspyResultsWantedDraft] = useState<number | null>(null)
|
||||
@ -105,6 +106,7 @@ export const SettingsPage: React.FC = () => {
|
||||
setJobCompleteWebhookUrlDraft(data.overrideJobCompleteWebhookUrl ?? "")
|
||||
setResumeProjectsDraft(data.resumeProjects)
|
||||
setUkvisajobsMaxJobsDraft(data.overrideUkvisajobsMaxJobs)
|
||||
setGradcrackerMaxJobsPerTermDraft(data.overrideGradcrackerMaxJobsPerTerm)
|
||||
setSearchTermsDraft(data.overrideSearchTerms)
|
||||
setJobspyLocationDraft(data.overrideJobspyLocation)
|
||||
setJobspyResultsWantedDraft(data.overrideJobspyResultsWanted)
|
||||
@ -145,6 +147,9 @@ export const SettingsPage: React.FC = () => {
|
||||
const effectiveUkvisajobsMaxJobs = settings?.ukvisajobsMaxJobs ?? 50
|
||||
const defaultUkvisajobsMaxJobs = settings?.defaultUkvisajobsMaxJobs ?? 50
|
||||
const overrideUkvisajobsMaxJobs = settings?.overrideUkvisajobsMaxJobs
|
||||
const effectiveGradcrackerMaxJobsPerTerm = settings?.gradcrackerMaxJobsPerTerm ?? 50
|
||||
const defaultGradcrackerMaxJobsPerTerm = settings?.defaultGradcrackerMaxJobsPerTerm ?? 50
|
||||
const overrideGradcrackerMaxJobsPerTerm = settings?.overrideGradcrackerMaxJobsPerTerm
|
||||
const effectiveSearchTerms = settings?.searchTerms ?? []
|
||||
const defaultSearchTerms = settings?.defaultSearchTerms ?? []
|
||||
const overrideSearchTerms = settings?.overrideSearchTerms
|
||||
@ -185,6 +190,7 @@ export const SettingsPage: React.FC = () => {
|
||||
const nextJobCompleteWebhook = jobCompleteWebhookUrlDraft.trim()
|
||||
const currentJobCompleteWebhook = (overrideJobCompleteWebhookUrl ?? "").trim()
|
||||
const ukvisajobsChanged = ukvisajobsMaxJobsDraft !== (overrideUkvisajobsMaxJobs ?? null)
|
||||
const gradcrackerChanged = gradcrackerMaxJobsPerTermDraft !== (overrideGradcrackerMaxJobsPerTerm ?? null)
|
||||
const searchTermsChanged = JSON.stringify(searchTermsDraft) !== JSON.stringify(overrideSearchTerms ?? null)
|
||||
return (
|
||||
next !== current ||
|
||||
@ -195,6 +201,7 @@ export const SettingsPage: React.FC = () => {
|
||||
nextJobCompleteWebhook !== currentJobCompleteWebhook ||
|
||||
!resumeProjectsEqual(resumeProjectsDraft, settings.resumeProjects) ||
|
||||
ukvisajobsChanged ||
|
||||
gradcrackerChanged ||
|
||||
searchTermsChanged ||
|
||||
jobspyLocationDraft !== (overrideJobspyLocation ?? null) ||
|
||||
jobspyResultsWantedDraft !== (overrideJobspyResultsWanted ?? null) ||
|
||||
@ -220,6 +227,8 @@ export const SettingsPage: React.FC = () => {
|
||||
resumeProjectsDraft,
|
||||
ukvisajobsMaxJobsDraft,
|
||||
overrideUkvisajobsMaxJobs,
|
||||
gradcrackerMaxJobsPerTermDraft,
|
||||
overrideGradcrackerMaxJobsPerTerm,
|
||||
searchTermsDraft,
|
||||
overrideSearchTerms,
|
||||
jobspyLocationDraft,
|
||||
@ -250,6 +259,7 @@ export const SettingsPage: React.FC = () => {
|
||||
? null
|
||||
: resumeProjectsDraft
|
||||
const ukvisajobsMaxJobsOverride = ukvisajobsMaxJobsDraft === defaultUkvisajobsMaxJobs ? null : ukvisajobsMaxJobsDraft
|
||||
const gradcrackerMaxJobsPerTermOverride = gradcrackerMaxJobsPerTermDraft === defaultGradcrackerMaxJobsPerTerm ? null : gradcrackerMaxJobsPerTermDraft
|
||||
const searchTermsOverride = arraysEqual(searchTermsDraft ?? [], defaultSearchTerms) ? null : searchTermsDraft
|
||||
const jobspyLocationOverride = jobspyLocationDraft === defaultJobspyLocation ? null : jobspyLocationDraft
|
||||
const jobspyResultsWantedOverride = jobspyResultsWantedDraft === defaultJobspyResultsWanted ? null : jobspyResultsWantedDraft
|
||||
@ -266,6 +276,7 @@ export const SettingsPage: React.FC = () => {
|
||||
jobCompleteWebhookUrl: jobCompleteTrimmed.length > 0 ? jobCompleteTrimmed : null,
|
||||
resumeProjects: resumeProjectsOverride,
|
||||
ukvisajobsMaxJobs: ukvisajobsMaxJobsOverride,
|
||||
gradcrackerMaxJobsPerTerm: gradcrackerMaxJobsPerTermOverride,
|
||||
searchTerms: searchTermsOverride,
|
||||
jobspyLocation: jobspyLocationOverride,
|
||||
jobspyResultsWanted: jobspyResultsWantedOverride,
|
||||
@ -283,6 +294,7 @@ export const SettingsPage: React.FC = () => {
|
||||
setJobCompleteWebhookUrlDraft(updated.overrideJobCompleteWebhookUrl ?? "")
|
||||
setResumeProjectsDraft(updated.resumeProjects)
|
||||
setUkvisajobsMaxJobsDraft(updated.overrideUkvisajobsMaxJobs)
|
||||
setGradcrackerMaxJobsPerTermDraft(updated.overrideGradcrackerMaxJobsPerTerm)
|
||||
setSearchTermsDraft(updated.overrideSearchTerms)
|
||||
setJobspyLocationDraft(updated.overrideJobspyLocation)
|
||||
setJobspyResultsWantedDraft(updated.overrideJobspyResultsWanted)
|
||||
@ -365,6 +377,7 @@ export const SettingsPage: React.FC = () => {
|
||||
jobCompleteWebhookUrl: null,
|
||||
resumeProjects: null,
|
||||
ukvisajobsMaxJobs: null,
|
||||
gradcrackerMaxJobsPerTerm: null,
|
||||
searchTerms: null,
|
||||
jobspyLocation: null,
|
||||
jobspyResultsWanted: null,
|
||||
@ -382,6 +395,7 @@ export const SettingsPage: React.FC = () => {
|
||||
setJobCompleteWebhookUrlDraft("")
|
||||
setResumeProjectsDraft(updated.resumeProjects)
|
||||
setUkvisajobsMaxJobsDraft(null)
|
||||
setGradcrackerMaxJobsPerTermDraft(null)
|
||||
setSearchTermsDraft(null)
|
||||
setJobspyLocationDraft(null)
|
||||
setJobspyResultsWantedDraft(null)
|
||||
@ -573,20 +587,20 @@ export const SettingsPage: React.FC = () => {
|
||||
type="number"
|
||||
inputMode="numeric"
|
||||
min={1}
|
||||
max={200}
|
||||
max={1000}
|
||||
value={ukvisajobsMaxJobsDraft ?? defaultUkvisajobsMaxJobs}
|
||||
onChange={(event) => {
|
||||
const value = parseInt(event.target.value, 10)
|
||||
if (Number.isNaN(value)) {
|
||||
setUkvisajobsMaxJobsDraft(null)
|
||||
} else {
|
||||
setUkvisajobsMaxJobsDraft(Math.min(200, Math.max(1, value)))
|
||||
setUkvisajobsMaxJobsDraft(Math.min(1000, Math.max(1, value)))
|
||||
}
|
||||
}}
|
||||
disabled={isLoading || isSaving}
|
||||
/>
|
||||
<div className="text-xs text-muted-foreground">
|
||||
Maximum number of jobs to fetch from UKVisaJobs per pipeline run. Range: 1-200.
|
||||
Maximum number of jobs to fetch from UKVisaJobs per pipeline run. Range: 1-1000.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@ -599,7 +613,52 @@ export const SettingsPage: React.FC = () => {
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-xs text-muted-foreground">Default</div>
|
||||
<div className="break-words font-mono text-xs">{defaultUkvisajobsMaxJobs}</div>
|
||||
<div className="break-words font-mono text-xs font-semibold">{defaultUkvisajobsMaxJobs}</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</AccordionContent>
|
||||
</AccordionItem>
|
||||
|
||||
<AccordionItem value="gradcracker" className="border rounded-lg px-4">
|
||||
<AccordionTrigger className="hover:no-underline py-4">
|
||||
<span className="text-base font-semibold">Gradcracker Extractor</span>
|
||||
</AccordionTrigger>
|
||||
<AccordionContent className="pb-4">
|
||||
<div className="space-y-4">
|
||||
<div className="space-y-2">
|
||||
<div className="text-sm font-medium">Max jobs per search term</div>
|
||||
<Input
|
||||
type="number"
|
||||
inputMode="numeric"
|
||||
min={1}
|
||||
max={1000}
|
||||
value={gradcrackerMaxJobsPerTermDraft ?? defaultGradcrackerMaxJobsPerTerm}
|
||||
onChange={(event) => {
|
||||
const value = parseInt(event.target.value, 10)
|
||||
if (Number.isNaN(value)) {
|
||||
setGradcrackerMaxJobsPerTermDraft(null)
|
||||
} else {
|
||||
setGradcrackerMaxJobsPerTermDraft(Math.min(1000, Math.max(1, value)))
|
||||
}
|
||||
}}
|
||||
disabled={isLoading || isSaving}
|
||||
/>
|
||||
<div className="text-xs text-muted-foreground">
|
||||
Maximum number of jobs to fetch for EACH search term from Gradcracker. Range: 1-1000.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<Separator />
|
||||
|
||||
<div className="grid gap-2 text-sm sm:grid-cols-2">
|
||||
<div>
|
||||
<div className="text-xs text-muted-foreground">Effective</div>
|
||||
<div className="break-words font-mono text-xs">{effectiveGradcrackerMaxJobsPerTerm}</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-xs text-muted-foreground">Default</div>
|
||||
<div className="break-words font-mono text-xs font-semibold">{defaultGradcrackerMaxJobsPerTerm}</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@ -299,6 +299,11 @@ apiRouter.get('/settings', async (_req: Request, res: Response) => {
|
||||
const overrideUkvisajobsMaxJobs = overrideUkvisajobsMaxJobsRaw ? parseInt(overrideUkvisajobsMaxJobsRaw, 10) : null;
|
||||
const ukvisajobsMaxJobs = overrideUkvisajobsMaxJobs ?? defaultUkvisajobsMaxJobs;
|
||||
|
||||
const overrideGradcrackerMaxJobsPerTermRaw = await settingsRepo.getSetting('gradcrackerMaxJobsPerTerm');
|
||||
const defaultGradcrackerMaxJobsPerTerm = 50;
|
||||
const overrideGradcrackerMaxJobsPerTerm = overrideGradcrackerMaxJobsPerTermRaw ? parseInt(overrideGradcrackerMaxJobsPerTermRaw, 10) : null;
|
||||
const gradcrackerMaxJobsPerTerm = overrideGradcrackerMaxJobsPerTerm ?? defaultGradcrackerMaxJobsPerTerm;
|
||||
|
||||
const overrideSearchTermsRaw = await settingsRepo.getSetting('searchTerms');
|
||||
const defaultSearchTermsEnv = process.env.JOBSPY_SEARCH_TERMS || 'web developer';
|
||||
const defaultSearchTerms = defaultSearchTermsEnv.split('|').map(s => s.trim()).filter(Boolean);
|
||||
@ -358,6 +363,9 @@ apiRouter.get('/settings', async (_req: Request, res: Response) => {
|
||||
ukvisajobsMaxJobs,
|
||||
defaultUkvisajobsMaxJobs,
|
||||
overrideUkvisajobsMaxJobs,
|
||||
gradcrackerMaxJobsPerTerm,
|
||||
defaultGradcrackerMaxJobsPerTerm,
|
||||
overrideGradcrackerMaxJobsPerTerm,
|
||||
searchTerms,
|
||||
defaultSearchTerms,
|
||||
overrideSearchTerms,
|
||||
@ -400,6 +408,7 @@ const updateSettingsSchema = z.object({
|
||||
aiSelectableProjectIds: z.array(z.string().trim().min(1)).max(200),
|
||||
}).nullable().optional(),
|
||||
ukvisajobsMaxJobs: z.number().int().min(1).max(200).nullable().optional(),
|
||||
gradcrackerMaxJobsPerTerm: z.number().int().min(1).max(200).nullable().optional(),
|
||||
searchTerms: z.array(z.string().trim().min(1).max(200)).max(50).nullable().optional(),
|
||||
jobspyLocation: z.string().trim().min(1).max(100).nullable().optional(),
|
||||
jobspyResultsWanted: z.number().int().min(1).max(500).nullable().optional(),
|
||||
@ -460,6 +469,11 @@ apiRouter.patch('/settings', async (req: Request, res: Response) => {
|
||||
await settingsRepo.setSetting('ukvisajobsMaxJobs', ukvisajobsMaxJobs !== null ? String(ukvisajobsMaxJobs) : null);
|
||||
}
|
||||
|
||||
if ('gradcrackerMaxJobsPerTerm' in input) {
|
||||
const gradcrackerMaxJobsPerTerm = input.gradcrackerMaxJobsPerTerm ?? null;
|
||||
await settingsRepo.setSetting('gradcrackerMaxJobsPerTerm', gradcrackerMaxJobsPerTerm !== null ? String(gradcrackerMaxJobsPerTerm) : null);
|
||||
}
|
||||
|
||||
if ('searchTerms' in input) {
|
||||
const searchTerms = input.searchTerms ?? null;
|
||||
await settingsRepo.setSetting('searchTerms', searchTerms !== null ? JSON.stringify(searchTerms) : null);
|
||||
@ -526,6 +540,11 @@ apiRouter.patch('/settings', async (req: Request, res: Response) => {
|
||||
const overrideUkvisajobsMaxJobs = overrideUkvisajobsMaxJobsRaw ? parseInt(overrideUkvisajobsMaxJobsRaw, 10) : null;
|
||||
const ukvisajobsMaxJobs = overrideUkvisajobsMaxJobs ?? defaultUkvisajobsMaxJobs;
|
||||
|
||||
const overrideGradcrackerMaxJobsPerTermRaw = await settingsRepo.getSetting('gradcrackerMaxJobsPerTerm');
|
||||
const defaultGradcrackerMaxJobsPerTerm = 50;
|
||||
const overrideGradcrackerMaxJobsPerTerm = overrideGradcrackerMaxJobsPerTermRaw ? parseInt(overrideGradcrackerMaxJobsPerTermRaw, 10) : null;
|
||||
const gradcrackerMaxJobsPerTerm = overrideGradcrackerMaxJobsPerTerm ?? defaultGradcrackerMaxJobsPerTerm;
|
||||
|
||||
// Search terms - stored as JSON array, default from env var (pipe-separated)
|
||||
const overrideSearchTermsRaw = await settingsRepo.getSetting('searchTerms');
|
||||
const defaultSearchTermsEnv = process.env.JOBSPY_SEARCH_TERMS || 'web developer';
|
||||
@ -586,6 +605,9 @@ apiRouter.patch('/settings', async (req: Request, res: Response) => {
|
||||
ukvisajobsMaxJobs,
|
||||
defaultUkvisajobsMaxJobs,
|
||||
overrideUkvisajobsMaxJobs,
|
||||
gradcrackerMaxJobsPerTerm,
|
||||
defaultGradcrackerMaxJobsPerTerm,
|
||||
overrideGradcrackerMaxJobsPerTerm,
|
||||
searchTerms,
|
||||
defaultSearchTerms,
|
||||
overrideSearchTerms,
|
||||
|
||||
@ -182,9 +182,13 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
|
||||
// Pass existing URLs to avoid clicking "Apply" on jobs we already have
|
||||
const existingJobUrls = await jobsRepo.getAllJobUrls();
|
||||
|
||||
const gradcrackerMaxJobsSetting = await settingsRepo.getSetting('gradcrackerMaxJobsPerTerm');
|
||||
const gradcrackerMaxJobs = gradcrackerMaxJobsSetting ? parseInt(gradcrackerMaxJobsSetting, 10) : 50;
|
||||
|
||||
const crawlerResult = await runCrawler({
|
||||
existingJobUrls,
|
||||
searchTerms,
|
||||
maxJobsPerTerm: gradcrackerMaxJobs,
|
||||
onProgress: (progress) => {
|
||||
// Calculate overall progress based on list pages processed vs total
|
||||
// This is rough but better than nothing
|
||||
|
||||
@ -15,6 +15,7 @@ export type SettingKey = 'model'
|
||||
| 'jobCompleteWebhookUrl'
|
||||
| 'resumeProjects'
|
||||
| 'ukvisajobsMaxJobs'
|
||||
| 'gradcrackerMaxJobsPerTerm'
|
||||
| 'searchTerms'
|
||||
| 'jobspyLocation'
|
||||
| 'jobspyResultsWanted'
|
||||
|
||||
@ -37,6 +37,11 @@ export interface RunCrawlerOptions {
|
||||
* List of search terms to be used as roles for URL generation.
|
||||
*/
|
||||
searchTerms?: string[];
|
||||
|
||||
/**
|
||||
* Max jobs to fetch per search term.
|
||||
*/
|
||||
maxJobsPerTerm?: number;
|
||||
}
|
||||
|
||||
interface JobExtractorProgress {
|
||||
@ -84,6 +89,7 @@ export async function runCrawler(options: RunCrawlerOptions = {}): Promise<Crawl
|
||||
JOBOPS_SKIP_APPLY_FOR_EXISTING: '1',
|
||||
JOBOPS_EMIT_PROGRESS: '1',
|
||||
GRADCRACKER_SEARCH_TERMS: options.searchTerms ? JSON.stringify(options.searchTerms) : '',
|
||||
GRADCRACKER_MAX_JOBS_PER_TERM: options.maxJobsPerTerm ? String(options.maxJobsPerTerm) : '',
|
||||
...(existingJobUrlsFile ? { JOBOPS_EXISTING_JOB_URLS_FILE: existingJobUrlsFile } : {}),
|
||||
},
|
||||
});
|
||||
|
||||
@ -262,6 +262,9 @@ export interface AppSettings {
|
||||
ukvisajobsMaxJobs: number;
|
||||
defaultUkvisajobsMaxJobs: number;
|
||||
overrideUkvisajobsMaxJobs: number | null;
|
||||
gradcrackerMaxJobsPerTerm: number;
|
||||
defaultGradcrackerMaxJobsPerTerm: number;
|
||||
overrideGradcrackerMaxJobsPerTerm: number | null;
|
||||
searchTerms: string[];
|
||||
defaultSearchTerms: string[];
|
||||
overrideSearchTerms: string[] | null;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user