gradcracker limits

This commit is contained in:
DaKheera47 2026-01-15 19:17:23 +00:00
parent 8f278a228a
commit 2cf9249159
9 changed files with 136 additions and 8 deletions

View File

@ -44,15 +44,18 @@ if (envRolesRaw) {
// combo of locations and roles
const gradcrackerUrls = locations.flatMap((location) => {
return roles.map((role) => {
return `https://www.gradcracker.com/search/computing-technology/${role}-graduate-jobs-in-${location}?order=dateAdded`;
return {
url: `https://www.gradcracker.com/search/computing-technology/${role}-graduate-jobs-in-${location}?order=dateAdded`,
role
};
});
});
console.log(`Total gradcracker URLs: ${gradcrackerUrls.length}`)
const startUrls = gradcrackerUrls.map((url) => ({
const startUrls = gradcrackerUrls.map(({ url, role }) => ({
url,
userData: { label: "gradcracker-list-page" },
userData: { label: "gradcracker-list-page", role },
}));
initJobOpsProgress(startUrls.length);

View File

@ -44,6 +44,10 @@ function getExistingJobUrlSet(): Set<string> {
const SKIP_APPLY_FOR_EXISTING = process.env.JOBOPS_SKIP_APPLY_FOR_EXISTING === "1";
const EXISTING_JOB_URLS = getExistingJobUrlSet();
// Global counters for max jobs per search term
const jobCounts = new Map<string, number>();
const MAX_JOBS_PER_TERM = parseInt(process.env.GRADCRACKER_MAX_JOBS_PER_TERM || "0", 10);
interface Job {
title: string | null;
jobUrl: string | null;
@ -62,7 +66,22 @@ export const router = createPlaywrightRouter();
router.addHandler(
"gradcracker-list-page",
async ({ page, request, enqueueLinks }) => {
log.info(`Processing: ${request.url}`);
const { role } = request.userData;
log.info(`Processing: ${request.url} (Role: ${role})`);
if (MAX_JOBS_PER_TERM > 0) {
const currentCount = jobCounts.get(role) || 0;
if (currentCount >= MAX_JOBS_PER_TERM) {
log.info(`Max jobs (${MAX_JOBS_PER_TERM}) already enqueued for role "${role}". Skipping list page.`);
markListPageDone({
currentUrl: request.url,
jobCardsFound: 0,
jobPagesEnqueued: 0,
jobPagesSkipped: 0,
});
return;
}
}
// Wait until the job cards are rendered
await page.waitForSelector("article[wire\\:key]", { timeout: 10000 });
@ -172,6 +191,16 @@ router.addHandler(
if (isKnownJob) {
skippedKnownJobs++;
} else {
// Check if we reached the limit for this search term
if (MAX_JOBS_PER_TERM > 0) {
const currentCount = jobCounts.get(role) || 0;
if (currentCount >= MAX_JOBS_PER_TERM) {
log.info(`Reached max jobs limit (${MAX_JOBS_PER_TERM}) for role "${role}" while processing list. Stopping.`);
break;
}
jobCounts.set(role, currentCount + 1);
}
await enqueueLinks({
urls: [jobUrl],
userData: {

View File

@ -154,6 +154,7 @@ export async function updateSettings(update: {
jobCompleteWebhookUrl?: string | null
resumeProjects?: ResumeProjectsSettings | null
ukvisajobsMaxJobs?: number | null
gradcrackerMaxJobsPerTerm?: number | null
searchTerms?: string[] | null
jobspyLocation?: string | null
jobspyResultsWanted?: number | null

View File

@ -78,6 +78,7 @@ export const SettingsPage: React.FC = () => {
const [jobCompleteWebhookUrlDraft, setJobCompleteWebhookUrlDraft] = useState("")
const [resumeProjectsDraft, setResumeProjectsDraft] = useState<ResumeProjectsSettings | null>(null)
const [ukvisajobsMaxJobsDraft, setUkvisajobsMaxJobsDraft] = useState<number | null>(null)
const [gradcrackerMaxJobsPerTermDraft, setGradcrackerMaxJobsPerTermDraft] = useState<number | null>(null)
const [searchTermsDraft, setSearchTermsDraft] = useState<string[] | null>(null)
const [jobspyLocationDraft, setJobspyLocationDraft] = useState<string | null>(null)
const [jobspyResultsWantedDraft, setJobspyResultsWantedDraft] = useState<number | null>(null)
@ -105,6 +106,7 @@ export const SettingsPage: React.FC = () => {
setJobCompleteWebhookUrlDraft(data.overrideJobCompleteWebhookUrl ?? "")
setResumeProjectsDraft(data.resumeProjects)
setUkvisajobsMaxJobsDraft(data.overrideUkvisajobsMaxJobs)
setGradcrackerMaxJobsPerTermDraft(data.overrideGradcrackerMaxJobsPerTerm)
setSearchTermsDraft(data.overrideSearchTerms)
setJobspyLocationDraft(data.overrideJobspyLocation)
setJobspyResultsWantedDraft(data.overrideJobspyResultsWanted)
@ -145,6 +147,9 @@ export const SettingsPage: React.FC = () => {
const effectiveUkvisajobsMaxJobs = settings?.ukvisajobsMaxJobs ?? 50
const defaultUkvisajobsMaxJobs = settings?.defaultUkvisajobsMaxJobs ?? 50
const overrideUkvisajobsMaxJobs = settings?.overrideUkvisajobsMaxJobs
const effectiveGradcrackerMaxJobsPerTerm = settings?.gradcrackerMaxJobsPerTerm ?? 50
const defaultGradcrackerMaxJobsPerTerm = settings?.defaultGradcrackerMaxJobsPerTerm ?? 50
const overrideGradcrackerMaxJobsPerTerm = settings?.overrideGradcrackerMaxJobsPerTerm
const effectiveSearchTerms = settings?.searchTerms ?? []
const defaultSearchTerms = settings?.defaultSearchTerms ?? []
const overrideSearchTerms = settings?.overrideSearchTerms
@ -185,6 +190,7 @@ export const SettingsPage: React.FC = () => {
const nextJobCompleteWebhook = jobCompleteWebhookUrlDraft.trim()
const currentJobCompleteWebhook = (overrideJobCompleteWebhookUrl ?? "").trim()
const ukvisajobsChanged = ukvisajobsMaxJobsDraft !== (overrideUkvisajobsMaxJobs ?? null)
const gradcrackerChanged = gradcrackerMaxJobsPerTermDraft !== (overrideGradcrackerMaxJobsPerTerm ?? null)
const searchTermsChanged = JSON.stringify(searchTermsDraft) !== JSON.stringify(overrideSearchTerms ?? null)
return (
next !== current ||
@ -195,6 +201,7 @@ export const SettingsPage: React.FC = () => {
nextJobCompleteWebhook !== currentJobCompleteWebhook ||
!resumeProjectsEqual(resumeProjectsDraft, settings.resumeProjects) ||
ukvisajobsChanged ||
gradcrackerChanged ||
searchTermsChanged ||
jobspyLocationDraft !== (overrideJobspyLocation ?? null) ||
jobspyResultsWantedDraft !== (overrideJobspyResultsWanted ?? null) ||
@ -220,6 +227,8 @@ export const SettingsPage: React.FC = () => {
resumeProjectsDraft,
ukvisajobsMaxJobsDraft,
overrideUkvisajobsMaxJobs,
gradcrackerMaxJobsPerTermDraft,
overrideGradcrackerMaxJobsPerTerm,
searchTermsDraft,
overrideSearchTerms,
jobspyLocationDraft,
@ -250,6 +259,7 @@ export const SettingsPage: React.FC = () => {
? null
: resumeProjectsDraft
const ukvisajobsMaxJobsOverride = ukvisajobsMaxJobsDraft === defaultUkvisajobsMaxJobs ? null : ukvisajobsMaxJobsDraft
const gradcrackerMaxJobsPerTermOverride = gradcrackerMaxJobsPerTermDraft === defaultGradcrackerMaxJobsPerTerm ? null : gradcrackerMaxJobsPerTermDraft
const searchTermsOverride = arraysEqual(searchTermsDraft ?? [], defaultSearchTerms) ? null : searchTermsDraft
const jobspyLocationOverride = jobspyLocationDraft === defaultJobspyLocation ? null : jobspyLocationDraft
const jobspyResultsWantedOverride = jobspyResultsWantedDraft === defaultJobspyResultsWanted ? null : jobspyResultsWantedDraft
@ -266,6 +276,7 @@ export const SettingsPage: React.FC = () => {
jobCompleteWebhookUrl: jobCompleteTrimmed.length > 0 ? jobCompleteTrimmed : null,
resumeProjects: resumeProjectsOverride,
ukvisajobsMaxJobs: ukvisajobsMaxJobsOverride,
gradcrackerMaxJobsPerTerm: gradcrackerMaxJobsPerTermOverride,
searchTerms: searchTermsOverride,
jobspyLocation: jobspyLocationOverride,
jobspyResultsWanted: jobspyResultsWantedOverride,
@ -283,6 +294,7 @@ export const SettingsPage: React.FC = () => {
setJobCompleteWebhookUrlDraft(updated.overrideJobCompleteWebhookUrl ?? "")
setResumeProjectsDraft(updated.resumeProjects)
setUkvisajobsMaxJobsDraft(updated.overrideUkvisajobsMaxJobs)
setGradcrackerMaxJobsPerTermDraft(updated.overrideGradcrackerMaxJobsPerTerm)
setSearchTermsDraft(updated.overrideSearchTerms)
setJobspyLocationDraft(updated.overrideJobspyLocation)
setJobspyResultsWantedDraft(updated.overrideJobspyResultsWanted)
@ -365,6 +377,7 @@ export const SettingsPage: React.FC = () => {
jobCompleteWebhookUrl: null,
resumeProjects: null,
ukvisajobsMaxJobs: null,
gradcrackerMaxJobsPerTerm: null,
searchTerms: null,
jobspyLocation: null,
jobspyResultsWanted: null,
@ -382,6 +395,7 @@ export const SettingsPage: React.FC = () => {
setJobCompleteWebhookUrlDraft("")
setResumeProjectsDraft(updated.resumeProjects)
setUkvisajobsMaxJobsDraft(null)
setGradcrackerMaxJobsPerTermDraft(null)
setSearchTermsDraft(null)
setJobspyLocationDraft(null)
setJobspyResultsWantedDraft(null)
@ -573,20 +587,20 @@ export const SettingsPage: React.FC = () => {
type="number"
inputMode="numeric"
min={1}
max={200}
max={1000}
value={ukvisajobsMaxJobsDraft ?? defaultUkvisajobsMaxJobs}
onChange={(event) => {
const value = parseInt(event.target.value, 10)
if (Number.isNaN(value)) {
setUkvisajobsMaxJobsDraft(null)
} else {
setUkvisajobsMaxJobsDraft(Math.min(200, Math.max(1, value)))
setUkvisajobsMaxJobsDraft(Math.min(1000, Math.max(1, value)))
}
}}
disabled={isLoading || isSaving}
/>
<div className="text-xs text-muted-foreground">
Maximum number of jobs to fetch from UKVisaJobs per pipeline run. Range: 1-200.
Maximum number of jobs to fetch from UKVisaJobs per pipeline run. Range: 1-1000.
</div>
</div>
@ -599,7 +613,52 @@ export const SettingsPage: React.FC = () => {
</div>
<div>
<div className="text-xs text-muted-foreground">Default</div>
<div className="break-words font-mono text-xs">{defaultUkvisajobsMaxJobs}</div>
<div className="break-words font-mono text-xs font-semibold">{defaultUkvisajobsMaxJobs}</div>
</div>
</div>
</div>
</AccordionContent>
</AccordionItem>
<AccordionItem value="gradcracker" className="border rounded-lg px-4">
<AccordionTrigger className="hover:no-underline py-4">
<span className="text-base font-semibold">Gradcracker Extractor</span>
</AccordionTrigger>
<AccordionContent className="pb-4">
<div className="space-y-4">
<div className="space-y-2">
<div className="text-sm font-medium">Max jobs per search term</div>
<Input
type="number"
inputMode="numeric"
min={1}
max={1000}
value={gradcrackerMaxJobsPerTermDraft ?? defaultGradcrackerMaxJobsPerTerm}
onChange={(event) => {
const value = parseInt(event.target.value, 10)
if (Number.isNaN(value)) {
setGradcrackerMaxJobsPerTermDraft(null)
} else {
setGradcrackerMaxJobsPerTermDraft(Math.min(1000, Math.max(1, value)))
}
}}
disabled={isLoading || isSaving}
/>
<div className="text-xs text-muted-foreground">
Maximum number of jobs to fetch for EACH search term from Gradcracker. Range: 1-1000.
</div>
</div>
<Separator />
<div className="grid gap-2 text-sm sm:grid-cols-2">
<div>
<div className="text-xs text-muted-foreground">Effective</div>
<div className="break-words font-mono text-xs">{effectiveGradcrackerMaxJobsPerTerm}</div>
</div>
<div>
<div className="text-xs text-muted-foreground">Default</div>
<div className="break-words font-mono text-xs font-semibold">{defaultGradcrackerMaxJobsPerTerm}</div>
</div>
</div>
</div>

View File

@ -299,6 +299,11 @@ apiRouter.get('/settings', async (_req: Request, res: Response) => {
const overrideUkvisajobsMaxJobs = overrideUkvisajobsMaxJobsRaw ? parseInt(overrideUkvisajobsMaxJobsRaw, 10) : null;
const ukvisajobsMaxJobs = overrideUkvisajobsMaxJobs ?? defaultUkvisajobsMaxJobs;
const overrideGradcrackerMaxJobsPerTermRaw = await settingsRepo.getSetting('gradcrackerMaxJobsPerTerm');
const defaultGradcrackerMaxJobsPerTerm = 50;
const overrideGradcrackerMaxJobsPerTerm = overrideGradcrackerMaxJobsPerTermRaw ? parseInt(overrideGradcrackerMaxJobsPerTermRaw, 10) : null;
const gradcrackerMaxJobsPerTerm = overrideGradcrackerMaxJobsPerTerm ?? defaultGradcrackerMaxJobsPerTerm;
const overrideSearchTermsRaw = await settingsRepo.getSetting('searchTerms');
const defaultSearchTermsEnv = process.env.JOBSPY_SEARCH_TERMS || 'web developer';
const defaultSearchTerms = defaultSearchTermsEnv.split('|').map(s => s.trim()).filter(Boolean);
@ -358,6 +363,9 @@ apiRouter.get('/settings', async (_req: Request, res: Response) => {
ukvisajobsMaxJobs,
defaultUkvisajobsMaxJobs,
overrideUkvisajobsMaxJobs,
gradcrackerMaxJobsPerTerm,
defaultGradcrackerMaxJobsPerTerm,
overrideGradcrackerMaxJobsPerTerm,
searchTerms,
defaultSearchTerms,
overrideSearchTerms,
@ -400,6 +408,7 @@ const updateSettingsSchema = z.object({
aiSelectableProjectIds: z.array(z.string().trim().min(1)).max(200),
}).nullable().optional(),
ukvisajobsMaxJobs: z.number().int().min(1).max(200).nullable().optional(),
gradcrackerMaxJobsPerTerm: z.number().int().min(1).max(200).nullable().optional(),
searchTerms: z.array(z.string().trim().min(1).max(200)).max(50).nullable().optional(),
jobspyLocation: z.string().trim().min(1).max(100).nullable().optional(),
jobspyResultsWanted: z.number().int().min(1).max(500).nullable().optional(),
@ -460,6 +469,11 @@ apiRouter.patch('/settings', async (req: Request, res: Response) => {
await settingsRepo.setSetting('ukvisajobsMaxJobs', ukvisajobsMaxJobs !== null ? String(ukvisajobsMaxJobs) : null);
}
if ('gradcrackerMaxJobsPerTerm' in input) {
const gradcrackerMaxJobsPerTerm = input.gradcrackerMaxJobsPerTerm ?? null;
await settingsRepo.setSetting('gradcrackerMaxJobsPerTerm', gradcrackerMaxJobsPerTerm !== null ? String(gradcrackerMaxJobsPerTerm) : null);
}
if ('searchTerms' in input) {
const searchTerms = input.searchTerms ?? null;
await settingsRepo.setSetting('searchTerms', searchTerms !== null ? JSON.stringify(searchTerms) : null);
@ -526,6 +540,11 @@ apiRouter.patch('/settings', async (req: Request, res: Response) => {
const overrideUkvisajobsMaxJobs = overrideUkvisajobsMaxJobsRaw ? parseInt(overrideUkvisajobsMaxJobsRaw, 10) : null;
const ukvisajobsMaxJobs = overrideUkvisajobsMaxJobs ?? defaultUkvisajobsMaxJobs;
const overrideGradcrackerMaxJobsPerTermRaw = await settingsRepo.getSetting('gradcrackerMaxJobsPerTerm');
const defaultGradcrackerMaxJobsPerTerm = 50;
const overrideGradcrackerMaxJobsPerTerm = overrideGradcrackerMaxJobsPerTermRaw ? parseInt(overrideGradcrackerMaxJobsPerTermRaw, 10) : null;
const gradcrackerMaxJobsPerTerm = overrideGradcrackerMaxJobsPerTerm ?? defaultGradcrackerMaxJobsPerTerm;
// Search terms - stored as JSON array, default from env var (pipe-separated)
const overrideSearchTermsRaw = await settingsRepo.getSetting('searchTerms');
const defaultSearchTermsEnv = process.env.JOBSPY_SEARCH_TERMS || 'web developer';
@ -586,6 +605,9 @@ apiRouter.patch('/settings', async (req: Request, res: Response) => {
ukvisajobsMaxJobs,
defaultUkvisajobsMaxJobs,
overrideUkvisajobsMaxJobs,
gradcrackerMaxJobsPerTerm,
defaultGradcrackerMaxJobsPerTerm,
overrideGradcrackerMaxJobsPerTerm,
searchTerms,
defaultSearchTerms,
overrideSearchTerms,

View File

@ -182,9 +182,13 @@ export async function runPipeline(config: Partial<PipelineConfig> = {}): Promise
// Pass existing URLs to avoid clicking "Apply" on jobs we already have
const existingJobUrls = await jobsRepo.getAllJobUrls();
const gradcrackerMaxJobsSetting = await settingsRepo.getSetting('gradcrackerMaxJobsPerTerm');
const gradcrackerMaxJobs = gradcrackerMaxJobsSetting ? parseInt(gradcrackerMaxJobsSetting, 10) : 50;
const crawlerResult = await runCrawler({
existingJobUrls,
searchTerms,
maxJobsPerTerm: gradcrackerMaxJobs,
onProgress: (progress) => {
// Calculate overall progress based on list pages processed vs total
// This is rough but better than nothing

View File

@ -15,6 +15,7 @@ export type SettingKey = 'model'
| 'jobCompleteWebhookUrl'
| 'resumeProjects'
| 'ukvisajobsMaxJobs'
| 'gradcrackerMaxJobsPerTerm'
| 'searchTerms'
| 'jobspyLocation'
| 'jobspyResultsWanted'

View File

@ -37,6 +37,11 @@ export interface RunCrawlerOptions {
* List of search terms to be used as roles for URL generation.
*/
searchTerms?: string[];
/**
* Max jobs to fetch per search term.
*/
maxJobsPerTerm?: number;
}
interface JobExtractorProgress {
@ -84,6 +89,7 @@ export async function runCrawler(options: RunCrawlerOptions = {}): Promise<Crawl
JOBOPS_SKIP_APPLY_FOR_EXISTING: '1',
JOBOPS_EMIT_PROGRESS: '1',
GRADCRACKER_SEARCH_TERMS: options.searchTerms ? JSON.stringify(options.searchTerms) : '',
GRADCRACKER_MAX_JOBS_PER_TERM: options.maxJobsPerTerm ? String(options.maxJobsPerTerm) : '',
...(existingJobUrlsFile ? { JOBOPS_EXISTING_JOB_URLS_FILE: existingJobUrlsFile } : {}),
},
});

View File

@ -262,6 +262,9 @@ export interface AppSettings {
ukvisajobsMaxJobs: number;
defaultUkvisajobsMaxJobs: number;
overrideUkvisajobsMaxJobs: number | null;
gradcrackerMaxJobsPerTerm: number;
defaultGradcrackerMaxJobsPerTerm: number;
overrideGradcrackerMaxJobsPerTerm: number | null;
searchTerms: string[];
defaultSearchTerms: string[];
overrideSearchTerms: string[] | null;