Improve job scoring for QA vs generic SWE roles; document Ollama on 16GB GPU

- Cap suitability score when roleTypeMatch is low and target roles are set - Harden scoring prompt (role-type gating, examples, headline in resume payload) - QA-oriented mock scoring when LLM is unavailable - .env.example: optional Ollama MODEL/LLM_BASE_URL block for self-hosted LLM Made-with: Cursor
2026-04-05 21:35:40 -04:00 · 2026-04-05 21:35:40 -04:00 · 14a6da4bdf
commit 14a6da4bdf
parent 1d450d7cc5
2 changed files with 151 additions and 44 deletions
--- a/.env.example
+++ b/.env.example
@ -5,6 +5,12 @@

 MODEL=google/gemini-3-flash-preview

+# Self-hosted Ollama (e.g. 16GB GPU): use a 22B-class model for scoring/tailoring; pull the tag on the server first.
+# MODEL=mistral-small:22b
+# LLM_PROVIDER=ollama
+# LLM_BASE_URL=http://127.0.0.1:11434
+# Heavier option (~may offload layers to CPU on 16GB): qwen2.5:32b
+
 # DEPRECATED (auto-copied to LLM_API_KEY for compatibility)
 # OPENROUTER_API_KEY=your_openrouter_api_key_here

@ -64,11 +70,18 @@ UKVISAJOBS_HEADLESS=true
 # =============================================================================
 # Adzuna (multi-country API source) - optional
 # =============================================================================
-# App credentials from Adzuna developer account.
+# Register at https://developer.adzuna.com/admin/access_details
 ADZUNA_APP_ID=
 ADZUNA_APP_KEY=
-# Optional default per-term cap (can be overridden by UI run budget logic).
+# Default cap per search term (orchestrator run budget / settings can override).
 # ADZUNA_MAX_JOBS_PER_TERM=50
+# API page size (Adzuna max 50).
+# ADZUNA_RESULTS_PER_PAGE=50
+# Optional global `where` text for Adzuna. Pipeline runs usually use Settings → search cities
+# instead; leave unset unless you want a fixed location for standalone extractor use.
+# ADZUNA_LOCATION_QUERY=
+# Only for running the extractor CLI alone; the pipeline sets country from your run (us / ca / gb / …).
+# ADZUNA_COUNTRY=gb

 # =============================================================================
 # JobSpy - Job search configuration
--- a/orchestrator/src/server/services/scorer.ts
+++ b/orchestrator/src/server/services/scorer.ts
@ -202,10 +202,19 @@ export async function scoreJobSuitability(
    });
  }

-  const clampedScore = Math.min(100, Math.max(0, Math.round(data.score)));
-  const clampedReason = data.reason || "No explanation provided";
+  let clampedScore = Math.min(100, Math.max(0, Math.round(data.score)));
+  let clampedReason = data.reason || "No explanation provided";
  const analysis = extractAnalysis(data);

+  const roleTypeMatchCap = applyRoleTypeMatchCap(
+    clampedScore,
+    clampedReason,
+    data,
+    hasProfile ? jobSearchProfile : null,
+  );
+  clampedScore = roleTypeMatchCap.score;
+  clampedReason = roleTypeMatchCap.reason;
+
  const penaltyResult = applySalaryPenalty(job, clampedScore, clampedReason, {
    penalizeMissingSalary: settings.penalizeMissingSalary.value,
    missingSalaryPenalty: settings.missingSalaryPenalty.value,
@ -218,6 +227,47 @@ export async function scoreJobSuitability(
  };
 }

+const ROLE_TYPE_MATCH_CAP_THRESHOLD = 40;
+const ROLE_TYPE_MATCH_SCORE_CEILING = 25;
+
+/**
+ * When the LLM reports a low roleTypeMatch but still assigns a high overall
+ * score (common with smaller models that overweight keyword overlap), enforce
+ * a hard ceiling so role-type mismatches can't float to the top of the list.
+ */
+function applyRoleTypeMatchCap(
+  score: number,
+  reason: string,
+  data: ScoringLlmResponse,
+  jobSearchProfile: JobSearchProfile | null,
+): { score: number; reason: string } {
+  if (!jobSearchProfile || jobSearchProfile.targetRoles.length === 0) {
+    return { score, reason };
+  }
+
+  const roleTypeMatch =
+    typeof data.roleTypeMatch === "number" ? data.roleTypeMatch : null;
+  if (
+    roleTypeMatch === null ||
+    roleTypeMatch >= ROLE_TYPE_MATCH_CAP_THRESHOLD
+  ) {
+    return { score, reason };
+  }
+
+  if (score <= ROLE_TYPE_MATCH_SCORE_CEILING) {
+    return { score, reason };
+  }
+
+  const cappedScore = Math.min(score, ROLE_TYPE_MATCH_SCORE_CEILING);
+  const capNote = `Capped from ${score} to ${cappedScore} (role type match ${roleTypeMatch}% < ${ROLE_TYPE_MATCH_CAP_THRESHOLD}% threshold).`;
+  logger.info("Applied role-type match cap", {
+    originalScore: score,
+    cappedScore,
+    roleTypeMatch,
+  });
+  return { score: cappedScore, reason: `${reason} ${capNote}` };
+}
+
 function hasNonEmptyProfile(p: JobSearchProfile): boolean {
  return (
    p.targetRoles.length > 0 ||
@ -339,24 +389,39 @@ About the Candidate: ${p.aboutMe || "Not provided"}`

  const dealBreakerRules = hasProfilePrefs
    ? `
-DEAL-BREAKER RULES (STRICTLY ENFORCE):
- If the job's primary role type fundamentally mismatches the candidate's target roles, score MUST be 0-20.
-  Example: If candidate wants "automation tester" roles, a "Full Stack Developer" job should score very low
-  even if the description mentions testing tools. The JOB TITLE and PRIMARY RESPONSIBILITIES matter most.
+DEAL-BREAKER RULES (STRICTLY ENFORCE — these override all other criteria):
+- ROLE TYPE IS THE PRIMARY GATE. Evaluate the JOB TITLE and PRIMARY RESPONSIBILITIES first.
+  If the core role type mismatches the candidate's target roles, score MUST be 0-20 and roleTypeMatch MUST be 0-30.
+  Do NOT inflate the score because the candidate's resume languages (e.g. C#, Java, Python) overlap with the job's language requirements.
+  Shared programming languages alone do NOT make a role a good fit.
+- Examples of role-type mismatches (all should score 0-20):
+  * Candidate targets "QA Automation / SDET" → "Senior Software Engineer" (core dev, not QA) = score 10-20
+  * Candidate targets "QA Automation / SDET" → "Backend Engineer" (core dev) = score 10-20
+  * Candidate targets "QA Automation / SDET" → "DevOps Engineer" (infra, not QA) = score 10-15
+  * Candidate targets "QA Automation / SDET" → "Data Scientist" = score 0-10
+- Examples of good role-type matches (eligible for full score range):
+  * Candidate targets "QA Automation / SDET" → "SDET" = roleTypeMatch 95-100
+  * Candidate targets "QA Automation / SDET" → "QA Automation Engineer" = roleTypeMatch 90-100
+  * Candidate targets "QA Automation / SDET" → "Software Engineer in Test" = roleTypeMatch 85-95
+  * Candidate targets "QA Automation / SDET" → "Test Automation Engineer" = roleTypeMatch 85-95
 - If any deal-breaker keywords appear in the job title or core requirements, score MUST be 0-15.
 - If the job requires experience far beyond the candidate's level, reduce score by 30-50 points.
 - A job mentioning a candidate's skill as a minor "nice-to-have" does NOT make it a good match
-  if the core role is completely different from what the candidate wants.`
+  if the core role is completely different from what the candidate wants.
+- When in doubt about role type, err on the side of a LOWER score. The candidate would rather miss
+  a borderline match than waste time on roles that don't align with their career focus.`
    : "";

  const scoringCriteria = hasProfilePrefs
    ? `SCORING CRITERIA (with candidate preferences):
- Role type alignment with target roles: 0-35 points (MOST IMPORTANT - is this the KIND of job they want?)
- Skills match (must-haves weighted 3x, nice-to-haves 1x): 0-25 points
+- Role type alignment with target roles: 0-40 points (GATING FACTOR — if this is below 15, the total score MUST be below 25 regardless of other criteria)
+- Skills match with role-relevant skills (must-haves weighted 3x, nice-to-haves 1x): 0-25 points
 - Experience level match: 0-15 points
 - Location/remote work alignment with preferences: 0-10 points
- Industry/domain fit: 0-10 points
- Career growth and salary alignment: 0-5 points`
+- Industry/domain fit: 0-5 points
+- Career growth and salary alignment: 0-5 points
+
+CRITICAL: A "Senior Software Engineer" role and a "QA Automation Engineer" role are FUNDAMENTALLY DIFFERENT job types even if they share programming languages. Evaluate the PRIMARY function of the role, not just keyword overlap.`
    : `SCORING CRITERIA:
 - Skills match (technologies, frameworks, languages): 0-30 points
 - Experience level match: 0-25 points
@ -403,8 +468,13 @@ RULES FOR ANALYSIS FIELDS:
 - "suggestions": 1-3 actionable things the candidate could do to be stronger for this type of role.
 - "dealBreakerHits": List any deal-breakers triggered. Empty array if none.

-EXAMPLE VALID RESPONSE:
-{"score": 25, "reason": "This is a full-stack developer role but the candidate is targeting automation testing positions. The mention of Playwright in the description is minor and not the core focus.", "roleTypeMatch": 15, "strengths": ["Has Playwright experience mentioned in the job description", "Located in the same city"], "gaps": ["No React/Node.js full-stack experience", "Job requires 3+ years of backend development"], "suggestions": ["If interested in full-stack, build portfolio projects with React and Node.js", "Consider SDET roles that bridge testing and development"], "dealBreakerHits": ["Role type mismatch: Full Stack Developer vs target of Automation Tester"]}`;
+EXAMPLE RESPONSES:
+
+Role mismatch (candidate wants QA/SDET, job is core software engineering):
+{"score": 15, "reason": "This is a core software engineering role focused on GPU infrastructure and platform development. Despite shared languages (C#, Java, Python), the primary responsibilities are software development, not testing or quality assurance.", "roleTypeMatch": 10, "strengths": ["Experience with C# and Java", "Familiar with Azure cloud"], "gaps": ["Role is software development, not QA/SDET", "No GPU/HPC infrastructure experience", "No hardware/software interaction experience"], "suggestions": ["Focus on SDET or QA Automation roles at Microsoft instead", "Look for test infrastructure roles in cloud platform teams"], "dealBreakerHits": ["Role type mismatch: Senior Software Engineer (development) vs target of QA Automation/SDET"]}
+
+Good match (role aligns with candidate's target):
+{"score": 78, "reason": "Strong QA automation role with Playwright requirement matching the candidate's core expertise. CI/CD pipeline ownership aligns well with their DevOps experience.", "roleTypeMatch": 90, "strengths": ["5+ years Playwright experience exceeds the 2-year requirement", "Strong CI/CD pipeline experience with GitHub Actions"], "gaps": ["No experience with the company's specific domain"], "suggestions": ["Highlight regulated-industry QA experience from iGaming role"], "dealBreakerHits": []}`;
 }

 export function sanitizeProfileForPrompt(
@ -427,10 +497,11 @@ export function sanitizeProfileForPrompt(
    ? p.sections?.projects?.items.slice(0, 6)
    : [];

+  const basics = p.basics as Record<string, unknown> | undefined;
  return {
    basics: {
-      label: p.basics?.label,
-      summary: p.basics?.summary,
+      headline: basics?.headline || basics?.label || null,
+      summary: basics?.summary,
    },
    skills: p.sections?.skills ?? null,
    experience: experienceItems,
@ -439,6 +510,48 @@ export function sanitizeProfileForPrompt(
  };
 }

+const MOCK_TITLE_BOOST_KEYWORDS = [
+  "qa",
+  "qe",
+  "sdet",
+  "test",
+  "quality",
+  "automation",
+  "playwright",
+  "cypress",
+  "selenium",
+];
+
+const MOCK_JD_BOOST_KEYWORDS = [
+  "playwright",
+  "cypress",
+  "selenium",
+  "test automation",
+  "qa automation",
+  "quality assurance",
+  "sdet",
+  "shift-left",
+  "bdd",
+  "e2e",
+  "end-to-end",
+  "regression",
+  "ci/cd",
+  "typescript",
+  "javascript",
+];
+
+const MOCK_PENALTY_KEYWORDS = [
+  "principal",
+  "staff",
+  "manager",
+  "director",
+  "vp ",
+  "vice president",
+  "instructor",
+  "trainer",
+  "teacher",
+];
+
 async function mockScore(
  job: Job,
  settings: { penalizeMissingSalary: boolean; missingSalaryPenalty: number },
@ -446,41 +559,22 @@ async function mockScore(
  const jd = (job.jobDescription || "").toLowerCase();
  const title = job.title.toLowerCase();

-  const goodKeywords = [
-    "typescript",
-    "react",
-    "node",
-    "python",
-    "web",
-    "frontend",
-    "backend",
-    "fullstack",
-    "software",
-    "engineer",
-    "developer",
-  ];
-  const badKeywords = [
-    "senior",
-    "5+ years",
-    "10+ years",
-    "principal",
-    "staff",
-    "manager",
-  ];
+  let score = 30;

-  let score = 50;
+  const titleHasQa = MOCK_TITLE_BOOST_KEYWORDS.some((kw) => title.includes(kw));
+  if (titleHasQa) score += 25;

-  for (const kw of goodKeywords) {
-    if (jd.includes(kw) || title.includes(kw)) score += 5;
+  for (const kw of MOCK_JD_BOOST_KEYWORDS) {
+    if (jd.includes(kw)) score += 3;
  }

-  for (const kw of badKeywords) {
-    if (jd.includes(kw) || title.includes(kw)) score -= 10;
+  for (const kw of MOCK_PENALTY_KEYWORDS) {
+    if (title.includes(kw)) score -= 15;
  }

  score = Math.min(100, Math.max(0, score));

-  const baseReason = "Scored using keyword matching (API key not configured)";
+  const baseReason = "Scored using keyword matching (LLM unavailable)";

  const penaltyResult = applySalaryPenalty(job, score, baseReason, settings);