Improve job scoring for QA vs generic SWE roles; document Ollama on 16GB GPU

- Cap suitability score when roleTypeMatch is low and target roles are set - Harden scoring prompt (role-type gating, examples, headline in resume payload) - QA-oriented mock scoring when LLM is unavailable - .env.example: optional Ollama MODEL/LLM_BASE_URL block for self-hosted LLM Made-with: Cursor
2026-04-05 21:35:40 -04:00 · 2026-04-05 21:35:40 -04:00 · 14a6da4bdf
commit 14a6da4bdf
parent 1d450d7cc5
2 changed files with 151 additions and 44 deletions
--- a/.env.example
+++ b/.env.example
@ -5,6 +5,12 @@
 MODEL=google/gemini-3-flash-preview
 # Self-hosted Ollama (e.g. 16GB GPU): use a 22B-class model for scoring/tailoring; pull the tag on the server first.
 # MODEL=mistral-small:22b
 # LLM_PROVIDER=ollama
 # LLM_BASE_URL=http://127.0.0.1:11434
 # Heavier option (~may offload layers to CPU on 16GB): qwen2.5:32b
 # DEPRECATED (auto-copied to LLM_API_KEY for compatibility)
 # OPENROUTER_API_KEY=your_openrouter_api_key_here
@ -64,11 +70,18 @@ UKVISAJOBS_HEADLESS=true
 # =============================================================================
 # Adzuna (multi-country API source) - optional
 # =============================================================================
-# App credentials from Adzuna developer account.
+# Register at https://developer.adzuna.com/admin/access_details
 ADZUNA_APP_ID=
 ADZUNA_APP_KEY=
-# Optional default per-term cap (can be overridden by UI run budget logic).
+# Default cap per search term (orchestrator run budget / settings can override).
 # ADZUNA_MAX_JOBS_PER_TERM=50
 # API page size (Adzuna max 50).
 # ADZUNA_RESULTS_PER_PAGE=50
 # Optional global `where` text for Adzuna. Pipeline runs usually use Settings → search cities
 # instead; leave unset unless you want a fixed location for standalone extractor use.
 # ADZUNA_LOCATION_QUERY=
 # Only for running the extractor CLI alone; the pipeline sets country from your run (us / ca / gb / …).
 # ADZUNA_COUNTRY=gb
 # =============================================================================
 # JobSpy - Job search configuration
--- a/orchestrator/src/server/services/scorer.ts
+++ b/orchestrator/src/server/services/scorer.ts
@ -202,10 +202,19 @@ export async function scoreJobSuitability(
    });
  }
-  const clampedScore = Math.min(100, Math.max(0, Math.round(data.score)));
+  let clampedScore = Math.min(100, Math.max(0, Math.round(data.score)));
-  const clampedReason = data.reason || "No explanation provided";
+  let clampedReason = data.reason || "No explanation provided";
  const analysis = extractAnalysis(data);
  const roleTypeMatchCap = applyRoleTypeMatchCap(
    clampedScore,
    clampedReason,
    data,
    hasProfile ? jobSearchProfile : null,
  );
  clampedScore = roleTypeMatchCap.score;
  clampedReason = roleTypeMatchCap.reason;
  const penaltyResult = applySalaryPenalty(job, clampedScore, clampedReason, {
    penalizeMissingSalary: settings.penalizeMissingSalary.value,
    missingSalaryPenalty: settings.missingSalaryPenalty.value,
@ -218,6 +227,47 @@ export async function scoreJobSuitability(
  };
 }
 const ROLE_TYPE_MATCH_CAP_THRESHOLD = 40;
 const ROLE_TYPE_MATCH_SCORE_CEILING = 25;
 /**
 * When the LLM reports a low roleTypeMatch but still assigns a high overall
 * score (common with smaller models that overweight keyword overlap), enforce
 * a hard ceiling so role-type mismatches can't float to the top of the list.
 */
 function applyRoleTypeMatchCap(
  score: number,
  reason: string,
  data: ScoringLlmResponse,
  jobSearchProfile: JobSearchProfile | null,
 ): { score: number; reason: string } {
  if (!jobSearchProfile || jobSearchProfile.targetRoles.length === 0) {
    return { score, reason };
  }
  const roleTypeMatch =
    typeof data.roleTypeMatch === "number" ? data.roleTypeMatch : null;
  if (
    roleTypeMatch === null ||
    roleTypeMatch >= ROLE_TYPE_MATCH_CAP_THRESHOLD
  ) {
    return { score, reason };
  }
  if (score <= ROLE_TYPE_MATCH_SCORE_CEILING) {
    return { score, reason };
  }
  const cappedScore = Math.min(score, ROLE_TYPE_MATCH_SCORE_CEILING);
  const capNote = `Capped from ${score} to ${cappedScore} (role type match ${roleTypeMatch}% < ${ROLE_TYPE_MATCH_CAP_THRESHOLD}% threshold).`;
  logger.info("Applied role-type match cap", {
    originalScore: score,
    cappedScore,
    roleTypeMatch,
  });
  return { score: cappedScore, reason: `${reason} ${capNote}` };
 }
 function hasNonEmptyProfile(p: JobSearchProfile): boolean {
  return (
    p.targetRoles.length > 0 ||
@ -339,24 +389,39 @@ About the Candidate: ${p.aboutMe || "Not provided"}`
  const dealBreakerRules = hasProfilePrefs
    ? `
-DEAL-BREAKER RULES (STRICTLY ENFORCE):
+DEAL-BREAKER RULES (STRICTLY ENFORCE — these override all other criteria):
- If the job's primary role type fundamentally mismatches the candidate's target roles, score MUST be 0-20.
+- ROLE TYPE IS THE PRIMARY GATE. Evaluate the JOB TITLE and PRIMARY RESPONSIBILITIES first.
-  Example: If candidate wants "automation tester" roles, a "Full Stack Developer" job should score very low
+  If the core role type mismatches the candidate's target roles, score MUST be 0-20 and roleTypeMatch MUST be 0-30.
-  even if the description mentions testing tools. The JOB TITLE and PRIMARY RESPONSIBILITIES matter most.
+  Do NOT inflate the score because the candidate's resume languages (e.g. C#, Java, Python) overlap with the job's language requirements.
  Shared programming languages alone do NOT make a role a good fit.
 - Examples of role-type mismatches (all should score 0-20):
  * Candidate targets "QA Automation / SDET" → "Senior Software Engineer" (core dev, not QA) = score 10-20
  * Candidate targets "QA Automation / SDET" → "Backend Engineer" (core dev) = score 10-20
  * Candidate targets "QA Automation / SDET" → "DevOps Engineer" (infra, not QA) = score 10-15
  * Candidate targets "QA Automation / SDET" → "Data Scientist" = score 0-10
 - Examples of good role-type matches (eligible for full score range):
  * Candidate targets "QA Automation / SDET" → "SDET" = roleTypeMatch 95-100
  * Candidate targets "QA Automation / SDET" → "QA Automation Engineer" = roleTypeMatch 90-100
  * Candidate targets "QA Automation / SDET" → "Software Engineer in Test" = roleTypeMatch 85-95
  * Candidate targets "QA Automation / SDET" → "Test Automation Engineer" = roleTypeMatch 85-95
 - If any deal-breaker keywords appear in the job title or core requirements, score MUST be 0-15.
 - If the job requires experience far beyond the candidate's level, reduce score by 30-50 points.
 - A job mentioning a candidate's skill as a minor "nice-to-have" does NOT make it a good match
-  if the core role is completely different from what the candidate wants.`
+  if the core role is completely different from what the candidate wants.
 - When in doubt about role type, err on the side of a LOWER score. The candidate would rather miss
  a borderline match than waste time on roles that don't align with their career focus.`
    : "";
  const scoringCriteria = hasProfilePrefs
    ? `SCORING CRITERIA (with candidate preferences):
- Role type alignment with target roles: 0-35 points (MOST IMPORTANT - is this the KIND of job they want?)
+- Role type alignment with target roles: 0-40 points (GATING FACTOR — if this is below 15, the total score MUST be below 25 regardless of other criteria)
- Skills match (must-haves weighted 3x, nice-to-haves 1x): 0-25 points
+- Skills match with role-relevant skills (must-haves weighted 3x, nice-to-haves 1x): 0-25 points
 - Experience level match: 0-15 points
 - Location/remote work alignment with preferences: 0-10 points
- Industry/domain fit: 0-10 points
+- Industry/domain fit: 0-5 points
- Career growth and salary alignment: 0-5 points`
+- Career growth and salary alignment: 0-5 points
 CRITICAL: A "Senior Software Engineer" role and a "QA Automation Engineer" role are FUNDAMENTALLY DIFFERENT job types even if they share programming languages. Evaluate the PRIMARY function of the role, not just keyword overlap.`
    : `SCORING CRITERIA:
 - Skills match (technologies, frameworks, languages): 0-30 points
 - Experience level match: 0-25 points
@ -403,8 +468,13 @@ RULES FOR ANALYSIS FIELDS:
 - "suggestions": 1-3 actionable things the candidate could do to be stronger for this type of role.
 - "dealBreakerHits": List any deal-breakers triggered. Empty array if none.
-EXAMPLE VALID RESPONSE:
+EXAMPLE RESPONSES:
-{"score": 25, "reason": "This is a full-stack developer role but the candidate is targeting automation testing positions. The mention of Playwright in the description is minor and not the core focus.", "roleTypeMatch": 15, "strengths": ["Has Playwright experience mentioned in the job description", "Located in the same city"], "gaps": ["No React/Node.js full-stack experience", "Job requires 3+ years of backend development"], "suggestions": ["If interested in full-stack, build portfolio projects with React and Node.js", "Consider SDET roles that bridge testing and development"], "dealBreakerHits": ["Role type mismatch: Full Stack Developer vs target of Automation Tester"]}`;
+
 Role mismatch (candidate wants QA/SDET, job is core software engineering):
 {"score": 15, "reason": "This is a core software engineering role focused on GPU infrastructure and platform development. Despite shared languages (C#, Java, Python), the primary responsibilities are software development, not testing or quality assurance.", "roleTypeMatch": 10, "strengths": ["Experience with C# and Java", "Familiar with Azure cloud"], "gaps": ["Role is software development, not QA/SDET", "No GPU/HPC infrastructure experience", "No hardware/software interaction experience"], "suggestions": ["Focus on SDET or QA Automation roles at Microsoft instead", "Look for test infrastructure roles in cloud platform teams"], "dealBreakerHits": ["Role type mismatch: Senior Software Engineer (development) vs target of QA Automation/SDET"]}
 Good match (role aligns with candidate's target):
 {"score": 78, "reason": "Strong QA automation role with Playwright requirement matching the candidate's core expertise. CI/CD pipeline ownership aligns well with their DevOps experience.", "roleTypeMatch": 90, "strengths": ["5+ years Playwright experience exceeds the 2-year requirement", "Strong CI/CD pipeline experience with GitHub Actions"], "gaps": ["No experience with the company's specific domain"], "suggestions": ["Highlight regulated-industry QA experience from iGaming role"], "dealBreakerHits": []}`;
 }
 export function sanitizeProfileForPrompt(
@ -427,10 +497,11 @@ export function sanitizeProfileForPrompt(
    ? p.sections?.projects?.items.slice(0, 6)
    : [];
  const basics = p.basics as Record<string, unknown> | undefined;
  return {
    basics: {
-      label: p.basics?.label,
+      headline: basics?.headline || basics?.label || null,
-      summary: p.basics?.summary,
+      summary: basics?.summary,
    },
    skills: p.sections?.skills ?? null,
    experience: experienceItems,
@ -439,6 +510,48 @@ export function sanitizeProfileForPrompt(
  };
 }
 const MOCK_TITLE_BOOST_KEYWORDS = [
  "qa",
  "qe",
  "sdet",
  "test",
  "quality",
  "automation",
  "playwright",
  "cypress",
  "selenium",
 ];
 const MOCK_JD_BOOST_KEYWORDS = [
  "playwright",
  "cypress",
  "selenium",
  "test automation",
  "qa automation",
  "quality assurance",
  "sdet",
  "shift-left",
  "bdd",
  "e2e",
  "end-to-end",
  "regression",
  "ci/cd",
  "typescript",
  "javascript",
 ];
 const MOCK_PENALTY_KEYWORDS = [
  "principal",
  "staff",
  "manager",
  "director",
  "vp ",
  "vice president",
  "instructor",
  "trainer",
  "teacher",
 ];
 async function mockScore(
  job: Job,
  settings: { penalizeMissingSalary: boolean; missingSalaryPenalty: number },
@ -446,41 +559,22 @@ async function mockScore(
  const jd = (job.jobDescription || "").toLowerCase();
  const title = job.title.toLowerCase();
-  const goodKeywords = [
+  let score = 30;
    "typescript",
    "react",
    "node",
    "python",
    "web",
    "frontend",
    "backend",
    "fullstack",
    "software",
    "engineer",
    "developer",
  ];
  const badKeywords = [
    "senior",
    "5+ years",
    "10+ years",
    "principal",
    "staff",
    "manager",
  ];
-  let score = 50;
+  const titleHasQa = MOCK_TITLE_BOOST_KEYWORDS.some((kw) => title.includes(kw));
  if (titleHasQa) score += 25;
-  for (const kw of goodKeywords) {
+  for (const kw of MOCK_JD_BOOST_KEYWORDS) {
-    if (jd.includes(kw) || title.includes(kw)) score += 5;
+    if (jd.includes(kw)) score += 3;
  }
-  for (const kw of badKeywords) {
+  for (const kw of MOCK_PENALTY_KEYWORDS) {
-    if (jd.includes(kw) || title.includes(kw)) score -= 10;
+    if (title.includes(kw)) score -= 15;
  }
  score = Math.min(100, Math.max(0, score));
-  const baseReason = "Scored using keyword matching (API key not configured)";
+  const baseReason = "Scored using keyword matching (LLM unavailable)";
  const penaltyResult = applySalaryPenalty(job, score, baseReason, settings);