initial commit into hardening

2026-01-21 02:03:29 +00:00 · 2026-01-21 02:03:29 +00:00 · 3d692f2f8b
commit 3d692f2f8b
parent 5ac6ca3b24
3 changed files with 602 additions and 37 deletions
--- a/orchestrator/src/server/services/scorer.test.ts
+++ b/orchestrator/src/server/services/scorer.test.ts
@ -0,0 +1,241 @@
 /**
 * Tests for scorer.ts - focusing on robust JSON parsing from AI responses
 */
 import { describe, it, expect } from 'vitest';
 import { parseJsonFromContent } from './scorer.js';
 describe('parseJsonFromContent', () => {
    describe('valid JSON inputs', () => {
        it('should parse clean JSON object', () => {
            const input = '{"score": 85, "reason": "Great match"}';
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(85);
            expect(result.reason).toBe('Great match');
        });
        it('should parse JSON with extra whitespace', () => {
            const input = '  { "score" : 75 , "reason" : "Good fit" }  ';
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(75);
            expect(result.reason).toBe('Good fit');
        });
        it('should parse JSON with newlines', () => {
            const input = `{
        "score": 90,
        "reason": "Excellent match for the role"
      }`;
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(90);
            expect(result.reason).toBe('Excellent match for the role');
        });
    });
    describe('markdown code fences', () => {
        it('should strip ```json code fences', () => {
            const input = '```json\n{"score": 80, "reason": "Match"}\n```';
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(80);
        });
        it('should strip ```JSON code fences (uppercase)', () => {
            const input = '```JSON\n{"score": 80, "reason": "Match"}\n```';
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(80);
        });
        it('should strip ``` code fences without language specifier', () => {
            const input = '```\n{"score": 70, "reason": "Decent"}\n```';
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(70);
        });
        it('should handle nested code fence patterns', () => {
            const input = 'Here is the score:\n```json\n{"score": 65, "reason": "Partial match"}\n```\nEnd.';
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(65);
        });
    });
    describe('surrounding text', () => {
        it('should extract JSON from text before', () => {
            const input = 'Based on my analysis, here is my evaluation: {"score": 55, "reason": "Limited match"}';
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(55);
        });
        it('should extract JSON from text after', () => {
            const input = '{"score": 60, "reason": "Moderate match"} I hope this helps!';
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(60);
        });
        it('should extract JSON from surrounding text on both sides', () => {
            const input = 'Here is my response:\n\n{"score": 45, "reason": "Below average fit"}\n\nLet me know if you need more details.';
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(45);
        });
    });
    describe('common JSON formatting issues', () => {
        it('should handle trailing comma before closing brace', () => {
            const input = '{"score": 78, "reason": "Good skills",}';
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(78);
        });
        it('should handle single quotes instead of double quotes', () => {
            const input = "{'score': 82, 'reason': 'Strong candidate'}";
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(82);
        });
        it('should handle unquoted keys', () => {
            const input = '{score: 77, reason: "Reasonable match"}';
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(77);
        });
        it('should handle mixed issues (trailing comma, single quotes)', () => {
            const input = "{'score': 68, 'reason': 'Average fit',}";
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(68);
        });
    });
    describe('decimal scores', () => {
        it('should parse and round decimal scores', () => {
            // parseJsonFromContent returns raw value for valid JSON; rounding only in regex fallback
            const input = '{"score": 85.7, "reason": "Very good match"}';
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(85.7);
        });
        it('should parse decimal scores in malformed text', () => {
            const input = 'The score is score: 72.3, reason: "Above average"';
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(72);
        });
    });
    describe('malformed responses - regex fallback', () => {
        it('should extract score from completely malformed response', () => {
            const input = 'I think the score should be score: 50 and the reason: "Average candidate"';
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(50);
        });
        it('should extract score with equals sign syntax', () => {
            const input = 'score = 88, reason = "Excellent match"';
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(88);
        });
        it('should handle reason with special characters', () => {
            const input = '{"score": 73, "reason": "Good match! The candidate\'s skills align well."}';
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(73);
        });
        it('should provide default reason when only score is extractable', () => {
            const input = 'I rate this candidate 85 out of 100 - score: 85';
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(85);
            expect(result.reason).toBeDefined();
        });
    });
    describe('edge cases', () => {
        it('should handle zero score', () => {
            const input = '{"score": 0, "reason": "No match at all"}';
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(0);
        });
        it('should handle score of 100', () => {
            const input = '{"score": 100, "reason": "Perfect candidate"}';
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(100);
        });
        it('should handle empty reason', () => {
            const input = '{"score": 50, "reason": ""}';
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(50);
            expect(result.reason).toBe('');
        });
        it('should handle multiline reason', () => {
            const input = `{"score": 70, "reason": "Good skills match. Experience is a bit lacking."}`;
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(70);
            expect(result.reason).toContain('Good skills match');
        });
        it('should handle unicode in reason', () => {
            const input = '{"score": 80, "reason": "Great match ✓ for this role"}';
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(80);
        });
    });
    describe('failure cases', () => {
        it('should throw when no score can be extracted', () => {
            const input = 'This is just plain text with no JSON or score.';
            expect(() => parseJsonFromContent(input)).toThrow('Unable to parse JSON from model response');
        });
        it('should throw for empty input', () => {
            expect(() => parseJsonFromContent('')).toThrow('Unable to parse JSON from model response');
        });
        it('should throw for only whitespace', () => {
            expect(() => parseJsonFromContent('   \n\t   ')).toThrow('Unable to parse JSON from model response');
        });
    });
    describe('real-world AI responses', () => {
        it('should handle GPT-style verbose response', () => {
            const input = `Based on my analysis of the job description and candidate profile, I have evaluated the fit:
 \`\`\`json
 {
  "score": 72,
  "reason": "Strong React and TypeScript skills match. However, the role requires 5+ years experience which the candidate may not have."
 }
 \`\`\`
 This score reflects the candidate's technical capabilities while accounting for the experience gap.`;
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(72);
            expect(result.reason).toContain('React and TypeScript');
        });
        it('should handle Claude-style response with thinking', () => {
            const input = `Let me evaluate this candidate against the job requirements.
 {"score": 83, "reason": "Excellent frontend skills with React and modern tooling. Good culture fit based on startup experience."}`;
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(83);
        });
        it('should handle response with JSON5-style comments', () => {
            // Some models output JSON5-like syntax with comments
            const input = `{
  "score": 67, // Good but not great
  "reason": "Matches most requirements but lacks cloud experience"
 }`;
            // This will fail standard parse but regex should catch it
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(67);
        });
        it('should handle response with extra properties', () => {
            const input = '{"score": 79, "reason": "Good match", "confidence": "high", "breakdown": {"skills": 25, "experience": 20}}';
            const result = parseJsonFromContent(input);
            expect(result.score).toBe(79);
            expect(result.reason).toBe('Good match');
        });
    });
 });
--- a/orchestrator/src/server/services/scorer.ts
+++ b/orchestrator/src/server/services/scorer.ts
@ -29,9 +29,9 @@ export async function scoreJobSuitability(
  const overrideModelScorer = await getSetting('modelScorer');
  // Precedence: Scorer-specific override > Global override > Env var > Default
  const model = overrideModelScorer || overrideModel || process.env.MODEL || 'openai/gpt-4o-mini';
-  
+
  const prompt = buildScoringPrompt(job, profile);
-  
+
  try {
    const response = await fetch(OPENROUTER_API_URL, {
      method: 'POST',
@ -47,19 +47,20 @@ export async function scoreJobSuitability(
        response_format: { type: 'json_object' },
      }),
    });
-    
+
    if (!response.ok) {
      throw new Error(`OpenRouter error: ${response.status}`);
    }
-    
+
    const data = await response.json();
    const content = data.choices[0]?.message?.content;
-    
+
    if (!content) {
      throw new Error('No content in response');
    }
-    const parsed = parseJsonFromContent(content);
+    // Log raw response for debugging when issues occur
    const parsed = parseJsonFromContent(content, job.id);
    return {
      score: Math.min(100, Math.max(0, parsed.score || 0)),
      reason: parsed.reason || 'No explanation provided',
@ -70,39 +71,98 @@ export async function scoreJobSuitability(
  }
 }
-function parseJsonFromContent(content: string): { score?: number; reason?: string } {
+/**
-  const trimmed = content.trim();
+ * Robustly parse JSON from AI-generated content.
-  const withoutFences = trimmed.replace(/```(?:json)?\s*|```/gi, '').trim();
+ * Handles common AI quirks: markdown fences, extra text, trailing commas, etc.
-  const candidate = withoutFences;
+ */
 export function parseJsonFromContent(content: string, jobId?: string): { score?: number; reason?: string } {
  const originalContent = content;
  let candidate = content.trim();
  // Step 1: Remove markdown code fences (with or without language specifier)
  candidate = candidate.replace(/```(?:json|JSON)?\s*/g, '').replace(/```/g, '').trim();
  // Step 2: Try to extract JSON object if there's surrounding text
  const jsonMatch = candidate.match(/\{[\s\S]*\}/);
  if (jsonMatch) {
    candidate = jsonMatch[0];
  }
  // Step 3: Try direct parse first
  try {
    return JSON.parse(candidate);
  } catch {
-    const firstBrace = candidate.indexOf('{');
+    // Continue with sanitization
    const lastBrace = candidate.lastIndexOf('}');
    if (firstBrace !== -1 && lastBrace !== -1 && lastBrace > firstBrace) {
      const sliced = candidate.slice(firstBrace, lastBrace + 1);
      return JSON.parse(sliced);
    }
    throw new Error('Unable to parse JSON from model response');
  }
  // Step 4: Fix common JSON issues
  let sanitized = candidate;
  // Remove JavaScript-style comments (// and /* */)
  sanitized = sanitized.replace(/\/\/[^\n]*/g, '');
  sanitized = sanitized.replace(/\/\*[\s\S]*?\*\//g, '');
  // Remove trailing commas before } or ]
  sanitized = sanitized.replace(/,\s*([\]}])/g, '$1');
  // Fix unquoted keys: word: -> "word":
  // Be more careful - only match at start of object or after comma
  sanitized = sanitized.replace(/([{,]\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*:/g, '$1"$2":');
  // Fix single quotes to double quotes
  sanitized = sanitized.replace(/'/g, '"');
  // Remove ALL control characters (including newlines/tabs INSIDE string values which break JSON)
  // First, let's normalize the string - escape actual newlines inside strings
  sanitized = sanitized.replace(/[\x00-\x1F\x7F]/g, (match) => {
    if (match === '\n') return '\\n';
    if (match === '\r') return '\\r';
    if (match === '\t') return '\\t';
    return '';
  });
  // Step 5: Try parsing the sanitized version
  try {
    return JSON.parse(sanitized);
  } catch {
    // Continue with more aggressive extraction
  }
  // Step 6: Even more aggressive - try to rebuild a minimal valid JSON
  // by extracting just the score and reason values
  const scoreMatch = originalContent.match(/["']?score["']?\s*[:=]\s*(\d+(?:\.\d+)?)/i);
  const reasonMatch = originalContent.match(/["']?reason["']?\s*[:=]\s*["']([^"'\n]+)["']/i) ||
    originalContent.match(/["']?reason["']?\s*[:=]\s*["']?(.*?)["']?\s*[,}\n]/is);
  if (scoreMatch) {
    const score = Math.round(parseFloat(scoreMatch[1]));
    const reason = reasonMatch ? reasonMatch[1].trim().replace(/[\x00-\x1F\x7F]/g, '') : 'Score extracted from malformed response';
    console.log(`⚠️ [Job ${jobId || 'unknown'}] Parsed score via regex fallback: ${score}`);
    return { score, reason };
  }
  // Log the failure with full content for debugging
  console.error(`❌ [Job ${jobId || 'unknown'}] Failed to parse AI response. Raw content (first 500 chars):`,
    originalContent.substring(0, 500));
  console.error(`   Sanitized content (first 500 chars):`, sanitized.substring(0, 500));
  throw new Error('Unable to parse JSON from model response');
 }
 function buildScoringPrompt(job: Job, profile: Record<string, unknown>): string {
-  return `
+  return `You are evaluating a job listing for a candidate. Score how suitable this job is for the candidate on a scale of 0-100.
 You are evaluating a job listing for a candidate. Score how suitable this job is for the candidate on a scale of 0-100.
-Consider:
+SCORING CRITERIA:
- Skills match (technologies, frameworks, languages)
+- Skills match (technologies, frameworks, languages): 0-30 points
- Experience level match
+- Experience level match: 0-25 points
- Location/remote work alignment
+- Location/remote work alignment: 0-15 points
- Industry/domain fit
+- Industry/domain fit: 0-15 points
- Career growth potential
+- Career growth potential: 0-15 points
-Candidate Profile:
+CANDIDATE PROFILE:
 ${JSON.stringify(profile, null, 2)}
-Job Listing:
+JOB LISTING:
 Title: ${job.title}
 Employer: ${job.employer}
 Location: ${job.location || 'Not specified'}
@ -110,33 +170,39 @@ Salary: ${job.salary || 'Not specified'}
 Degree Required: ${job.degreeRequired || 'Not specified'}
 Disciplines: ${job.disciplines || 'Not specified'}
-Job Description:
+JOB DESCRIPTION:
 ${job.jobDescription || 'No description available'}
-Respond with JSON only (no code fences): { "score": <0-100>, "reason": "<brief explanation>" }
+IMPORTANT: Respond with ONLY a valid JSON object. No markdown, no code fences, no explanation outside the JSON.
-`;
+
 REQUIRED FORMAT (exactly this structure):
 {"score": <integer 0-100>, "reason": "<1-2 sentence explanation>"}
 EXAMPLE VALID RESPONSE:
 {"score": 75, "reason": "Strong skills match with React and TypeScript requirements, but position requires 3+ years experience."}`;
 }
 function mockScore(job: Job): SuitabilityResult {
  // Simple keyword-based scoring as fallback
  const jd = (job.jobDescription || '').toLowerCase();
  const title = job.title.toLowerCase();
-  
+
  const goodKeywords = ['typescript', 'react', 'node', 'python', 'web', 'frontend', 'backend', 'fullstack', 'software', 'engineer', 'developer'];
  const badKeywords = ['senior', '5+ years', '10+ years', 'principal', 'staff', 'manager'];
-  
+
  let score = 50;
-  
+
  for (const kw of goodKeywords) {
    if (jd.includes(kw) || title.includes(kw)) score += 5;
  }
-  
+
  for (const kw of badKeywords) {
    if (jd.includes(kw) || title.includes(kw)) score -= 10;
  }
-  
+
  score = Math.min(100, Math.max(0, score));
-  
+
  return {
    score,
    reason: 'Scored using keyword matching (API key not configured)',
@ -160,6 +226,6 @@ export async function scoreAndRankJobs(
      };
    })
  );
-  
+
  return scoredJobs.sort((a, b) => b.suitabilityScore - a.suitabilityScore);
 }
--- a/orchestrator/src/shared/rxresume-schema.ts
+++ b/orchestrator/src/shared/rxresume-schema.ts
@ -0,0 +1,258 @@
 import { z } from "zod";
 /**
 * Schema matching the JSON you pasted (the "visible"/"summary"/"date"/"href" format).
 * This is intentionally permissive (passthrough) so small future additions won't break parsing.
 */
 export const hrefUrlSchema = z.object({
    href: z.string().default(""),
    label: z.string().default(""),
 });
 export const pictureEffectsSchema = z.object({
    border: z.boolean().default(false),
    hidden: z.boolean().default(false),
    grayscale: z.boolean().default(false),
 });
 export const basicsPictureSchema = z.object({
    url: z.string().default(""),
    size: z.number().default(120),
    effects: pictureEffectsSchema,
    aspectRatio: z.number().default(1),
    borderRadius: z.number().default(0),
 });
 export const customFieldSchema = z
    .object({
        id: z.string().optional(),
        icon: z.string().optional(),
        text: z.string().optional(),
    })
    .passthrough();
 export const basicsSchema = z
    .object({
        url: hrefUrlSchema,
        name: z.string(),
        email: z.string().email().or(z.literal("")).default(""),
        phone: z.string().default(""),
        picture: basicsPictureSchema,
        headline: z.string().default(""),
        location: z.string().default(""),
        customFields: z.array(customFieldSchema).default([]),
    })
    .passthrough();
 export const metadataCssSchema = z.object({
    value: z.string().default(""),
    visible: z.boolean().default(false),
 });
 export const metadataPageOptionsSchema = z.object({
    breakLine: z.boolean().default(false),
    pageNumbers: z.boolean().default(false),
 });
 export const metadataPageSchema = z.object({
    format: z.enum(["a4", "letter"]).default("a4"),
    margin: z.number().default(34),
    options: metadataPageOptionsSchema.default({ breakLine: false, pageNumbers: false }),
 });
 export const metadataThemeSchema = z.object({
    text: z.string().default("#000000"),
    primary: z.string().default("#475569"),
    background: z.string().default("#ffffff"),
 });
 /**
 * Your "layout" is shaped like:
 * [
 *   [
 *     [ "summary", "profiles", ... ], // main column ids
 *     [ "skills", "languages" ]       // sidebar column ids
 *   ],
 *   ...
 * ]
 */
 export const metadataLayoutSchema = z.array(
    z.tuple([z.array(z.string()), z.array(z.string())])
 );
 export const metadataTypographySchema = z
    .object({
        font: z.object({
            size: z.number().default(13),
            family: z.string().default("IBM Plex Sans"),
            subset: z.string().default("latin"),
            variants: z.array(z.string()).default(["regular"]),
        }),
        hideIcons: z.boolean().default(false),
        lineHeight: z.number().default(1.75),
        underlineLinks: z.boolean().default(true),
    })
    .passthrough();
 export const metadataSchema = z
    .object({
        css: metadataCssSchema,
        page: metadataPageSchema,
        notes: z.string().default(""),
        theme: metadataThemeSchema,
        layout: metadataLayoutSchema.default([]),
        template: z.string().default("onyx"),
        typography: metadataTypographySchema,
    })
    .passthrough();
 /** Common section container used by most sections in your JSON */
 export const baseSectionSchema = z
    .object({
        id: z.string(),
        name: z.string(),
        columns: z.number().default(1),
        visible: z.boolean().default(true),
        separateLinks: z.boolean().default(true),
        items: z.array(z.unknown()).default([]),
    })
    .passthrough();
 /** Item schemas (based on the items you included) */
 export const profileItemSchema = z
    .object({
        id: z.string(),
        url: hrefUrlSchema,
        icon: z.string().default(""),
        network: z.string(),
        visible: z.boolean().default(true),
        username: z.string().default(""),
    })
    .passthrough();
 export const skillItemSchema = z
    .object({
        id: z.string(),
        name: z.string(),
        level: z.number().default(0),
        visible: z.boolean().default(true),
        keywords: z.array(z.string()).default([]),
        description: z.string().default(""),
    })
    .passthrough();
 export const projectItemSchema = z
    .object({
        id: z.string(),
        url: hrefUrlSchema,
        date: z.string().default(""),
        name: z.string(),
        summary: z.string().default(""), // HTML string in your data
        visible: z.boolean().default(true),
        keywords: z.array(z.string()).default([]),
        description: z.string().default(""),
    })
    .passthrough();
 export const educationItemSchema = z
    .object({
        id: z.string(),
        url: hrefUrlSchema,
        area: z.string().default(""),
        date: z.string().default(""),
        score: z.string().default(""),
        summary: z.string().default(""), // HTML string
        visible: z.boolean().default(true),
        studyType: z.string().default(""),
        institution: z.string().default(""),
    })
    .passthrough();
 export const experienceItemSchema = z
    .object({
        id: z.string(),
        url: hrefUrlSchema,
        date: z.string().default(""),
        company: z.string(),
        summary: z.string().default(""), // HTML string
        visible: z.boolean().default(true),
        location: z.string().default(""),
        position: z.string().default(""),
    })
    .passthrough();
 /** Section schemas with typed items */
 export const profilesSectionSchema = baseSectionSchema.extend({
    items: z.array(profileItemSchema).default([]),
 });
 export const skillsSectionSchema = baseSectionSchema.extend({
    items: z.array(skillItemSchema).default([]),
 });
 export const projectsSectionSchema = baseSectionSchema.extend({
    items: z.array(projectItemSchema).default([]),
 });
 export const educationSectionSchema = baseSectionSchema.extend({
    items: z.array(educationItemSchema).default([]),
 });
 export const experienceSectionSchema = baseSectionSchema.extend({
    items: z.array(experienceItemSchema).default([]),
 });
 /**
 * Your "summary" section is not an items array; it carries "content".
 * Keep it separate.
 */
 export const summarySectionSchema = z
    .object({
        id: z.string(),
        name: z.string(),
        columns: z.number().default(1),
        content: z.string().default(""), // HTML string
        visible: z.boolean().default(true),
        separateLinks: z.boolean().default(true),
    })
    .passthrough();
 /** Empty-ish sections (you have them as items: []) */
 export const emptyItemsSectionSchema = baseSectionSchema.extend({
    items: z.array(z.unknown()).default([]),
 });
 /**
 * Your "sections" object contains a fixed set of keys, plus `custom: {}`.
 * `custom` is an object with no guaranteed structure in your sample, so passthrough.
 */
 export const sectionsSchema = z
    .object({
        awards: emptyItemsSectionSchema,
        custom: z.object({}).passthrough().default({}),
        skills: skillsSectionSchema,
        summary: summarySectionSchema,
        profiles: profilesSectionSchema,
        projects: projectsSectionSchema,
        education: educationSectionSchema,
        interests: emptyItemsSectionSchema,
        languages: emptyItemsSectionSchema,
        volunteer: emptyItemsSectionSchema,
        experience: experienceSectionSchema,
        references: emptyItemsSectionSchema,
        publications: emptyItemsSectionSchema,
        certifications: emptyItemsSectionSchema,
    })
    .passthrough();
 /** Top-level schema matching what you pasted */
 export const myResumeJsonSchema = z
    .object({
        basics: basicsSchema,
        metadata: metadataSchema,
        sections: sectionsSchema,
    })
    .passthrough();
 export type MyResumeJson = z.infer<typeof myResumeJsonSchema>;