diff --git a/.env.example b/.env.example index a1e14a4..5ae395c 100644 --- a/.env.example +++ b/.env.example @@ -5,6 +5,12 @@ MODEL=google/gemini-3-flash-preview +# Self-hosted Ollama (e.g. 16GB GPU): use a 22B-class model for scoring/tailoring; pull the tag on the server first. +# MODEL=mistral-small:22b +# LLM_PROVIDER=ollama +# LLM_BASE_URL=http://127.0.0.1:11434 +# Heavier option (~may offload layers to CPU on 16GB): qwen2.5:32b + # DEPRECATED (auto-copied to LLM_API_KEY for compatibility) # OPENROUTER_API_KEY=your_openrouter_api_key_here @@ -64,11 +70,18 @@ UKVISAJOBS_HEADLESS=true # ============================================================================= # Adzuna (multi-country API source) - optional # ============================================================================= -# App credentials from Adzuna developer account. +# Register at https://developer.adzuna.com/admin/access_details ADZUNA_APP_ID= ADZUNA_APP_KEY= -# Optional default per-term cap (can be overridden by UI run budget logic). +# Default cap per search term (orchestrator run budget / settings can override). # ADZUNA_MAX_JOBS_PER_TERM=50 +# API page size (Adzuna max 50). +# ADZUNA_RESULTS_PER_PAGE=50 +# Optional global `where` text for Adzuna. Pipeline runs usually use Settings → search cities +# instead; leave unset unless you want a fixed location for standalone extractor use. +# ADZUNA_LOCATION_QUERY= +# Only for running the extractor CLI alone; the pipeline sets country from your run (us / ca / gb / …). +# ADZUNA_COUNTRY=gb # ============================================================================= # JobSpy - Job search configuration diff --git a/orchestrator/src/server/services/scorer.ts b/orchestrator/src/server/services/scorer.ts index 96bd458..b5f2ade 100644 --- a/orchestrator/src/server/services/scorer.ts +++ b/orchestrator/src/server/services/scorer.ts @@ -202,10 +202,19 @@ export async function scoreJobSuitability( }); } - const clampedScore = Math.min(100, Math.max(0, Math.round(data.score))); - const clampedReason = data.reason || "No explanation provided"; + let clampedScore = Math.min(100, Math.max(0, Math.round(data.score))); + let clampedReason = data.reason || "No explanation provided"; const analysis = extractAnalysis(data); + const roleTypeMatchCap = applyRoleTypeMatchCap( + clampedScore, + clampedReason, + data, + hasProfile ? jobSearchProfile : null, + ); + clampedScore = roleTypeMatchCap.score; + clampedReason = roleTypeMatchCap.reason; + const penaltyResult = applySalaryPenalty(job, clampedScore, clampedReason, { penalizeMissingSalary: settings.penalizeMissingSalary.value, missingSalaryPenalty: settings.missingSalaryPenalty.value, @@ -218,6 +227,47 @@ export async function scoreJobSuitability( }; } +const ROLE_TYPE_MATCH_CAP_THRESHOLD = 40; +const ROLE_TYPE_MATCH_SCORE_CEILING = 25; + +/** + * When the LLM reports a low roleTypeMatch but still assigns a high overall + * score (common with smaller models that overweight keyword overlap), enforce + * a hard ceiling so role-type mismatches can't float to the top of the list. + */ +function applyRoleTypeMatchCap( + score: number, + reason: string, + data: ScoringLlmResponse, + jobSearchProfile: JobSearchProfile | null, +): { score: number; reason: string } { + if (!jobSearchProfile || jobSearchProfile.targetRoles.length === 0) { + return { score, reason }; + } + + const roleTypeMatch = + typeof data.roleTypeMatch === "number" ? data.roleTypeMatch : null; + if ( + roleTypeMatch === null || + roleTypeMatch >= ROLE_TYPE_MATCH_CAP_THRESHOLD + ) { + return { score, reason }; + } + + if (score <= ROLE_TYPE_MATCH_SCORE_CEILING) { + return { score, reason }; + } + + const cappedScore = Math.min(score, ROLE_TYPE_MATCH_SCORE_CEILING); + const capNote = `Capped from ${score} to ${cappedScore} (role type match ${roleTypeMatch}% < ${ROLE_TYPE_MATCH_CAP_THRESHOLD}% threshold).`; + logger.info("Applied role-type match cap", { + originalScore: score, + cappedScore, + roleTypeMatch, + }); + return { score: cappedScore, reason: `${reason} ${capNote}` }; +} + function hasNonEmptyProfile(p: JobSearchProfile): boolean { return ( p.targetRoles.length > 0 || @@ -339,24 +389,39 @@ About the Candidate: ${p.aboutMe || "Not provided"}` const dealBreakerRules = hasProfilePrefs ? ` -DEAL-BREAKER RULES (STRICTLY ENFORCE): -- If the job's primary role type fundamentally mismatches the candidate's target roles, score MUST be 0-20. - Example: If candidate wants "automation tester" roles, a "Full Stack Developer" job should score very low - even if the description mentions testing tools. The JOB TITLE and PRIMARY RESPONSIBILITIES matter most. +DEAL-BREAKER RULES (STRICTLY ENFORCE — these override all other criteria): +- ROLE TYPE IS THE PRIMARY GATE. Evaluate the JOB TITLE and PRIMARY RESPONSIBILITIES first. + If the core role type mismatches the candidate's target roles, score MUST be 0-20 and roleTypeMatch MUST be 0-30. + Do NOT inflate the score because the candidate's resume languages (e.g. C#, Java, Python) overlap with the job's language requirements. + Shared programming languages alone do NOT make a role a good fit. +- Examples of role-type mismatches (all should score 0-20): + * Candidate targets "QA Automation / SDET" → "Senior Software Engineer" (core dev, not QA) = score 10-20 + * Candidate targets "QA Automation / SDET" → "Backend Engineer" (core dev) = score 10-20 + * Candidate targets "QA Automation / SDET" → "DevOps Engineer" (infra, not QA) = score 10-15 + * Candidate targets "QA Automation / SDET" → "Data Scientist" = score 0-10 +- Examples of good role-type matches (eligible for full score range): + * Candidate targets "QA Automation / SDET" → "SDET" = roleTypeMatch 95-100 + * Candidate targets "QA Automation / SDET" → "QA Automation Engineer" = roleTypeMatch 90-100 + * Candidate targets "QA Automation / SDET" → "Software Engineer in Test" = roleTypeMatch 85-95 + * Candidate targets "QA Automation / SDET" → "Test Automation Engineer" = roleTypeMatch 85-95 - If any deal-breaker keywords appear in the job title or core requirements, score MUST be 0-15. - If the job requires experience far beyond the candidate's level, reduce score by 30-50 points. - A job mentioning a candidate's skill as a minor "nice-to-have" does NOT make it a good match - if the core role is completely different from what the candidate wants.` + if the core role is completely different from what the candidate wants. +- When in doubt about role type, err on the side of a LOWER score. The candidate would rather miss + a borderline match than waste time on roles that don't align with their career focus.` : ""; const scoringCriteria = hasProfilePrefs ? `SCORING CRITERIA (with candidate preferences): -- Role type alignment with target roles: 0-35 points (MOST IMPORTANT - is this the KIND of job they want?) -- Skills match (must-haves weighted 3x, nice-to-haves 1x): 0-25 points +- Role type alignment with target roles: 0-40 points (GATING FACTOR — if this is below 15, the total score MUST be below 25 regardless of other criteria) +- Skills match with role-relevant skills (must-haves weighted 3x, nice-to-haves 1x): 0-25 points - Experience level match: 0-15 points - Location/remote work alignment with preferences: 0-10 points -- Industry/domain fit: 0-10 points -- Career growth and salary alignment: 0-5 points` +- Industry/domain fit: 0-5 points +- Career growth and salary alignment: 0-5 points + +CRITICAL: A "Senior Software Engineer" role and a "QA Automation Engineer" role are FUNDAMENTALLY DIFFERENT job types even if they share programming languages. Evaluate the PRIMARY function of the role, not just keyword overlap.` : `SCORING CRITERIA: - Skills match (technologies, frameworks, languages): 0-30 points - Experience level match: 0-25 points @@ -403,8 +468,13 @@ RULES FOR ANALYSIS FIELDS: - "suggestions": 1-3 actionable things the candidate could do to be stronger for this type of role. - "dealBreakerHits": List any deal-breakers triggered. Empty array if none. -EXAMPLE VALID RESPONSE: -{"score": 25, "reason": "This is a full-stack developer role but the candidate is targeting automation testing positions. The mention of Playwright in the description is minor and not the core focus.", "roleTypeMatch": 15, "strengths": ["Has Playwright experience mentioned in the job description", "Located in the same city"], "gaps": ["No React/Node.js full-stack experience", "Job requires 3+ years of backend development"], "suggestions": ["If interested in full-stack, build portfolio projects with React and Node.js", "Consider SDET roles that bridge testing and development"], "dealBreakerHits": ["Role type mismatch: Full Stack Developer vs target of Automation Tester"]}`; +EXAMPLE RESPONSES: + +Role mismatch (candidate wants QA/SDET, job is core software engineering): +{"score": 15, "reason": "This is a core software engineering role focused on GPU infrastructure and platform development. Despite shared languages (C#, Java, Python), the primary responsibilities are software development, not testing or quality assurance.", "roleTypeMatch": 10, "strengths": ["Experience with C# and Java", "Familiar with Azure cloud"], "gaps": ["Role is software development, not QA/SDET", "No GPU/HPC infrastructure experience", "No hardware/software interaction experience"], "suggestions": ["Focus on SDET or QA Automation roles at Microsoft instead", "Look for test infrastructure roles in cloud platform teams"], "dealBreakerHits": ["Role type mismatch: Senior Software Engineer (development) vs target of QA Automation/SDET"]} + +Good match (role aligns with candidate's target): +{"score": 78, "reason": "Strong QA automation role with Playwright requirement matching the candidate's core expertise. CI/CD pipeline ownership aligns well with their DevOps experience.", "roleTypeMatch": 90, "strengths": ["5+ years Playwright experience exceeds the 2-year requirement", "Strong CI/CD pipeline experience with GitHub Actions"], "gaps": ["No experience with the company's specific domain"], "suggestions": ["Highlight regulated-industry QA experience from iGaming role"], "dealBreakerHits": []}`; } export function sanitizeProfileForPrompt( @@ -427,10 +497,11 @@ export function sanitizeProfileForPrompt( ? p.sections?.projects?.items.slice(0, 6) : []; + const basics = p.basics as Record | undefined; return { basics: { - label: p.basics?.label, - summary: p.basics?.summary, + headline: basics?.headline || basics?.label || null, + summary: basics?.summary, }, skills: p.sections?.skills ?? null, experience: experienceItems, @@ -439,6 +510,48 @@ export function sanitizeProfileForPrompt( }; } +const MOCK_TITLE_BOOST_KEYWORDS = [ + "qa", + "qe", + "sdet", + "test", + "quality", + "automation", + "playwright", + "cypress", + "selenium", +]; + +const MOCK_JD_BOOST_KEYWORDS = [ + "playwright", + "cypress", + "selenium", + "test automation", + "qa automation", + "quality assurance", + "sdet", + "shift-left", + "bdd", + "e2e", + "end-to-end", + "regression", + "ci/cd", + "typescript", + "javascript", +]; + +const MOCK_PENALTY_KEYWORDS = [ + "principal", + "staff", + "manager", + "director", + "vp ", + "vice president", + "instructor", + "trainer", + "teacher", +]; + async function mockScore( job: Job, settings: { penalizeMissingSalary: boolean; missingSalaryPenalty: number }, @@ -446,41 +559,22 @@ async function mockScore( const jd = (job.jobDescription || "").toLowerCase(); const title = job.title.toLowerCase(); - const goodKeywords = [ - "typescript", - "react", - "node", - "python", - "web", - "frontend", - "backend", - "fullstack", - "software", - "engineer", - "developer", - ]; - const badKeywords = [ - "senior", - "5+ years", - "10+ years", - "principal", - "staff", - "manager", - ]; + let score = 30; - let score = 50; + const titleHasQa = MOCK_TITLE_BOOST_KEYWORDS.some((kw) => title.includes(kw)); + if (titleHasQa) score += 25; - for (const kw of goodKeywords) { - if (jd.includes(kw) || title.includes(kw)) score += 5; + for (const kw of MOCK_JD_BOOST_KEYWORDS) { + if (jd.includes(kw)) score += 3; } - for (const kw of badKeywords) { - if (jd.includes(kw) || title.includes(kw)) score -= 10; + for (const kw of MOCK_PENALTY_KEYWORDS) { + if (title.includes(kw)) score -= 15; } score = Math.min(100, Math.max(0, score)); - const baseReason = "Scored using keyword matching (API key not configured)"; + const baseReason = "Scored using keyword matching (LLM unavailable)"; const penaltyResult = applySalaryPenalty(job, score, baseReason, settings);