From 3d692f2f8b0e1750e3d65fd03c5ee512d798daa1 Mon Sep 17 00:00:00 2001 From: DaKheera47 Date: Wed, 21 Jan 2026 02:03:29 +0000 Subject: [PATCH] initial commit into hardening --- .../src/server/services/scorer.test.ts | 241 ++++++++++++++++ orchestrator/src/server/services/scorer.ts | 140 +++++++--- orchestrator/src/shared/rxresume-schema.ts | 258 ++++++++++++++++++ 3 files changed, 602 insertions(+), 37 deletions(-) create mode 100644 orchestrator/src/server/services/scorer.test.ts create mode 100644 orchestrator/src/shared/rxresume-schema.ts diff --git a/orchestrator/src/server/services/scorer.test.ts b/orchestrator/src/server/services/scorer.test.ts new file mode 100644 index 0000000..08caa61 --- /dev/null +++ b/orchestrator/src/server/services/scorer.test.ts @@ -0,0 +1,241 @@ +/** + * Tests for scorer.ts - focusing on robust JSON parsing from AI responses + */ + +import { describe, it, expect } from 'vitest'; +import { parseJsonFromContent } from './scorer.js'; + +describe('parseJsonFromContent', () => { + describe('valid JSON inputs', () => { + it('should parse clean JSON object', () => { + const input = '{"score": 85, "reason": "Great match"}'; + const result = parseJsonFromContent(input); + expect(result.score).toBe(85); + expect(result.reason).toBe('Great match'); + }); + + it('should parse JSON with extra whitespace', () => { + const input = ' { "score" : 75 , "reason" : "Good fit" } '; + const result = parseJsonFromContent(input); + expect(result.score).toBe(75); + expect(result.reason).toBe('Good fit'); + }); + + it('should parse JSON with newlines', () => { + const input = `{ + "score": 90, + "reason": "Excellent match for the role" + }`; + const result = parseJsonFromContent(input); + expect(result.score).toBe(90); + expect(result.reason).toBe('Excellent match for the role'); + }); + }); + + describe('markdown code fences', () => { + it('should strip ```json code fences', () => { + const input = '```json\n{"score": 80, "reason": "Match"}\n```'; + const result = parseJsonFromContent(input); + expect(result.score).toBe(80); + }); + + it('should strip ```JSON code fences (uppercase)', () => { + const input = '```JSON\n{"score": 80, "reason": "Match"}\n```'; + const result = parseJsonFromContent(input); + expect(result.score).toBe(80); + }); + + it('should strip ``` code fences without language specifier', () => { + const input = '```\n{"score": 70, "reason": "Decent"}\n```'; + const result = parseJsonFromContent(input); + expect(result.score).toBe(70); + }); + + it('should handle nested code fence patterns', () => { + const input = 'Here is the score:\n```json\n{"score": 65, "reason": "Partial match"}\n```\nEnd.'; + const result = parseJsonFromContent(input); + expect(result.score).toBe(65); + }); + }); + + describe('surrounding text', () => { + it('should extract JSON from text before', () => { + const input = 'Based on my analysis, here is my evaluation: {"score": 55, "reason": "Limited match"}'; + const result = parseJsonFromContent(input); + expect(result.score).toBe(55); + }); + + it('should extract JSON from text after', () => { + const input = '{"score": 60, "reason": "Moderate match"} I hope this helps!'; + const result = parseJsonFromContent(input); + expect(result.score).toBe(60); + }); + + it('should extract JSON from surrounding text on both sides', () => { + const input = 'Here is my response:\n\n{"score": 45, "reason": "Below average fit"}\n\nLet me know if you need more details.'; + const result = parseJsonFromContent(input); + expect(result.score).toBe(45); + }); + }); + + describe('common JSON formatting issues', () => { + it('should handle trailing comma before closing brace', () => { + const input = '{"score": 78, "reason": "Good skills",}'; + const result = parseJsonFromContent(input); + expect(result.score).toBe(78); + }); + + it('should handle single quotes instead of double quotes', () => { + const input = "{'score': 82, 'reason': 'Strong candidate'}"; + const result = parseJsonFromContent(input); + expect(result.score).toBe(82); + }); + + it('should handle unquoted keys', () => { + const input = '{score: 77, reason: "Reasonable match"}'; + const result = parseJsonFromContent(input); + expect(result.score).toBe(77); + }); + + it('should handle mixed issues (trailing comma, single quotes)', () => { + const input = "{'score': 68, 'reason': 'Average fit',}"; + const result = parseJsonFromContent(input); + expect(result.score).toBe(68); + }); + }); + + describe('decimal scores', () => { + it('should parse and round decimal scores', () => { + // parseJsonFromContent returns raw value for valid JSON; rounding only in regex fallback + const input = '{"score": 85.7, "reason": "Very good match"}'; + const result = parseJsonFromContent(input); + expect(result.score).toBe(85.7); + }); + + it('should parse decimal scores in malformed text', () => { + const input = 'The score is score: 72.3, reason: "Above average"'; + const result = parseJsonFromContent(input); + expect(result.score).toBe(72); + }); + }); + + describe('malformed responses - regex fallback', () => { + it('should extract score from completely malformed response', () => { + const input = 'I think the score should be score: 50 and the reason: "Average candidate"'; + const result = parseJsonFromContent(input); + expect(result.score).toBe(50); + }); + + it('should extract score with equals sign syntax', () => { + const input = 'score = 88, reason = "Excellent match"'; + const result = parseJsonFromContent(input); + expect(result.score).toBe(88); + }); + + it('should handle reason with special characters', () => { + const input = '{"score": 73, "reason": "Good match! The candidate\'s skills align well."}'; + const result = parseJsonFromContent(input); + expect(result.score).toBe(73); + }); + + it('should provide default reason when only score is extractable', () => { + const input = 'I rate this candidate 85 out of 100 - score: 85'; + const result = parseJsonFromContent(input); + expect(result.score).toBe(85); + expect(result.reason).toBeDefined(); + }); + }); + + describe('edge cases', () => { + it('should handle zero score', () => { + const input = '{"score": 0, "reason": "No match at all"}'; + const result = parseJsonFromContent(input); + expect(result.score).toBe(0); + }); + + it('should handle score of 100', () => { + const input = '{"score": 100, "reason": "Perfect candidate"}'; + const result = parseJsonFromContent(input); + expect(result.score).toBe(100); + }); + + it('should handle empty reason', () => { + const input = '{"score": 50, "reason": ""}'; + const result = parseJsonFromContent(input); + expect(result.score).toBe(50); + expect(result.reason).toBe(''); + }); + + it('should handle multiline reason', () => { + const input = `{"score": 70, "reason": "Good skills match. Experience is a bit lacking."}`; + const result = parseJsonFromContent(input); + expect(result.score).toBe(70); + expect(result.reason).toContain('Good skills match'); + }); + + it('should handle unicode in reason', () => { + const input = '{"score": 80, "reason": "Great match ✓ for this role"}'; + const result = parseJsonFromContent(input); + expect(result.score).toBe(80); + }); + }); + + describe('failure cases', () => { + it('should throw when no score can be extracted', () => { + const input = 'This is just plain text with no JSON or score.'; + expect(() => parseJsonFromContent(input)).toThrow('Unable to parse JSON from model response'); + }); + + it('should throw for empty input', () => { + expect(() => parseJsonFromContent('')).toThrow('Unable to parse JSON from model response'); + }); + + it('should throw for only whitespace', () => { + expect(() => parseJsonFromContent(' \n\t ')).toThrow('Unable to parse JSON from model response'); + }); + }); + + describe('real-world AI responses', () => { + it('should handle GPT-style verbose response', () => { + const input = `Based on my analysis of the job description and candidate profile, I have evaluated the fit: + +\`\`\`json +{ + "score": 72, + "reason": "Strong React and TypeScript skills match. However, the role requires 5+ years experience which the candidate may not have." +} +\`\`\` + +This score reflects the candidate's technical capabilities while accounting for the experience gap.`; + const result = parseJsonFromContent(input); + expect(result.score).toBe(72); + expect(result.reason).toContain('React and TypeScript'); + }); + + it('should handle Claude-style response with thinking', () => { + const input = `Let me evaluate this candidate against the job requirements. + +{"score": 83, "reason": "Excellent frontend skills with React and modern tooling. Good culture fit based on startup experience."}`; + const result = parseJsonFromContent(input); + expect(result.score).toBe(83); + }); + + it('should handle response with JSON5-style comments', () => { + // Some models output JSON5-like syntax with comments + const input = `{ + "score": 67, // Good but not great + "reason": "Matches most requirements but lacks cloud experience" +}`; + // This will fail standard parse but regex should catch it + const result = parseJsonFromContent(input); + expect(result.score).toBe(67); + }); + + it('should handle response with extra properties', () => { + const input = '{"score": 79, "reason": "Good match", "confidence": "high", "breakdown": {"skills": 25, "experience": 20}}'; + const result = parseJsonFromContent(input); + expect(result.score).toBe(79); + expect(result.reason).toBe('Good match'); + }); + }); +}); diff --git a/orchestrator/src/server/services/scorer.ts b/orchestrator/src/server/services/scorer.ts index 4c7e18d..909a802 100644 --- a/orchestrator/src/server/services/scorer.ts +++ b/orchestrator/src/server/services/scorer.ts @@ -29,9 +29,9 @@ export async function scoreJobSuitability( const overrideModelScorer = await getSetting('modelScorer'); // Precedence: Scorer-specific override > Global override > Env var > Default const model = overrideModelScorer || overrideModel || process.env.MODEL || 'openai/gpt-4o-mini'; - + const prompt = buildScoringPrompt(job, profile); - + try { const response = await fetch(OPENROUTER_API_URL, { method: 'POST', @@ -47,19 +47,20 @@ export async function scoreJobSuitability( response_format: { type: 'json_object' }, }), }); - + if (!response.ok) { throw new Error(`OpenRouter error: ${response.status}`); } - + const data = await response.json(); const content = data.choices[0]?.message?.content; - + if (!content) { throw new Error('No content in response'); } - const parsed = parseJsonFromContent(content); + // Log raw response for debugging when issues occur + const parsed = parseJsonFromContent(content, job.id); return { score: Math.min(100, Math.max(0, parsed.score || 0)), reason: parsed.reason || 'No explanation provided', @@ -70,39 +71,98 @@ export async function scoreJobSuitability( } } -function parseJsonFromContent(content: string): { score?: number; reason?: string } { - const trimmed = content.trim(); - const withoutFences = trimmed.replace(/```(?:json)?\s*|```/gi, '').trim(); - const candidate = withoutFences; +/** + * Robustly parse JSON from AI-generated content. + * Handles common AI quirks: markdown fences, extra text, trailing commas, etc. + */ +export function parseJsonFromContent(content: string, jobId?: string): { score?: number; reason?: string } { + const originalContent = content; + let candidate = content.trim(); + // Step 1: Remove markdown code fences (with or without language specifier) + candidate = candidate.replace(/```(?:json|JSON)?\s*/g, '').replace(/```/g, '').trim(); + + // Step 2: Try to extract JSON object if there's surrounding text + const jsonMatch = candidate.match(/\{[\s\S]*\}/); + if (jsonMatch) { + candidate = jsonMatch[0]; + } + + // Step 3: Try direct parse first try { return JSON.parse(candidate); } catch { - const firstBrace = candidate.indexOf('{'); - const lastBrace = candidate.lastIndexOf('}'); - if (firstBrace !== -1 && lastBrace !== -1 && lastBrace > firstBrace) { - const sliced = candidate.slice(firstBrace, lastBrace + 1); - return JSON.parse(sliced); - } - throw new Error('Unable to parse JSON from model response'); + // Continue with sanitization } + + // Step 4: Fix common JSON issues + let sanitized = candidate; + + // Remove JavaScript-style comments (// and /* */) + sanitized = sanitized.replace(/\/\/[^\n]*/g, ''); + sanitized = sanitized.replace(/\/\*[\s\S]*?\*\//g, ''); + + // Remove trailing commas before } or ] + sanitized = sanitized.replace(/,\s*([\]}])/g, '$1'); + + // Fix unquoted keys: word: -> "word": + // Be more careful - only match at start of object or after comma + sanitized = sanitized.replace(/([{,]\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*:/g, '$1"$2":'); + + // Fix single quotes to double quotes + sanitized = sanitized.replace(/'/g, '"'); + + // Remove ALL control characters (including newlines/tabs INSIDE string values which break JSON) + // First, let's normalize the string - escape actual newlines inside strings + sanitized = sanitized.replace(/[\x00-\x1F\x7F]/g, (match) => { + if (match === '\n') return '\\n'; + if (match === '\r') return '\\r'; + if (match === '\t') return '\\t'; + return ''; + }); + + // Step 5: Try parsing the sanitized version + try { + return JSON.parse(sanitized); + } catch { + // Continue with more aggressive extraction + } + + // Step 6: Even more aggressive - try to rebuild a minimal valid JSON + // by extracting just the score and reason values + const scoreMatch = originalContent.match(/["']?score["']?\s*[:=]\s*(\d+(?:\.\d+)?)/i); + const reasonMatch = originalContent.match(/["']?reason["']?\s*[:=]\s*["']([^"'\n]+)["']/i) || + originalContent.match(/["']?reason["']?\s*[:=]\s*["']?(.*?)["']?\s*[,}\n]/is); + + if (scoreMatch) { + const score = Math.round(parseFloat(scoreMatch[1])); + const reason = reasonMatch ? reasonMatch[1].trim().replace(/[\x00-\x1F\x7F]/g, '') : 'Score extracted from malformed response'; + console.log(`⚠️ [Job ${jobId || 'unknown'}] Parsed score via regex fallback: ${score}`); + return { score, reason }; + } + + // Log the failure with full content for debugging + console.error(`❌ [Job ${jobId || 'unknown'}] Failed to parse AI response. Raw content (first 500 chars):`, + originalContent.substring(0, 500)); + console.error(` Sanitized content (first 500 chars):`, sanitized.substring(0, 500)); + + throw new Error('Unable to parse JSON from model response'); } function buildScoringPrompt(job: Job, profile: Record): string { - return ` -You are evaluating a job listing for a candidate. Score how suitable this job is for the candidate on a scale of 0-100. + return `You are evaluating a job listing for a candidate. Score how suitable this job is for the candidate on a scale of 0-100. -Consider: -- Skills match (technologies, frameworks, languages) -- Experience level match -- Location/remote work alignment -- Industry/domain fit -- Career growth potential +SCORING CRITERIA: +- Skills match (technologies, frameworks, languages): 0-30 points +- Experience level match: 0-25 points +- Location/remote work alignment: 0-15 points +- Industry/domain fit: 0-15 points +- Career growth potential: 0-15 points -Candidate Profile: +CANDIDATE PROFILE: ${JSON.stringify(profile, null, 2)} -Job Listing: +JOB LISTING: Title: ${job.title} Employer: ${job.employer} Location: ${job.location || 'Not specified'} @@ -110,33 +170,39 @@ Salary: ${job.salary || 'Not specified'} Degree Required: ${job.degreeRequired || 'Not specified'} Disciplines: ${job.disciplines || 'Not specified'} -Job Description: +JOB DESCRIPTION: ${job.jobDescription || 'No description available'} -Respond with JSON only (no code fences): { "score": <0-100>, "reason": "" } -`; +IMPORTANT: Respond with ONLY a valid JSON object. No markdown, no code fences, no explanation outside the JSON. + +REQUIRED FORMAT (exactly this structure): +{"score": , "reason": "<1-2 sentence explanation>"} + +EXAMPLE VALID RESPONSE: +{"score": 75, "reason": "Strong skills match with React and TypeScript requirements, but position requires 3+ years experience."}`; } + function mockScore(job: Job): SuitabilityResult { // Simple keyword-based scoring as fallback const jd = (job.jobDescription || '').toLowerCase(); const title = job.title.toLowerCase(); - + const goodKeywords = ['typescript', 'react', 'node', 'python', 'web', 'frontend', 'backend', 'fullstack', 'software', 'engineer', 'developer']; const badKeywords = ['senior', '5+ years', '10+ years', 'principal', 'staff', 'manager']; - + let score = 50; - + for (const kw of goodKeywords) { if (jd.includes(kw) || title.includes(kw)) score += 5; } - + for (const kw of badKeywords) { if (jd.includes(kw) || title.includes(kw)) score -= 10; } - + score = Math.min(100, Math.max(0, score)); - + return { score, reason: 'Scored using keyword matching (API key not configured)', @@ -160,6 +226,6 @@ export async function scoreAndRankJobs( }; }) ); - + return scoredJobs.sort((a, b) => b.suitabilityScore - a.suitabilityScore); } diff --git a/orchestrator/src/shared/rxresume-schema.ts b/orchestrator/src/shared/rxresume-schema.ts new file mode 100644 index 0000000..086daa8 --- /dev/null +++ b/orchestrator/src/shared/rxresume-schema.ts @@ -0,0 +1,258 @@ +import { z } from "zod"; + +/** + * Schema matching the JSON you pasted (the "visible"/"summary"/"date"/"href" format). + * This is intentionally permissive (passthrough) so small future additions won't break parsing. + */ + +export const hrefUrlSchema = z.object({ + href: z.string().default(""), + label: z.string().default(""), +}); + +export const pictureEffectsSchema = z.object({ + border: z.boolean().default(false), + hidden: z.boolean().default(false), + grayscale: z.boolean().default(false), +}); + +export const basicsPictureSchema = z.object({ + url: z.string().default(""), + size: z.number().default(120), + effects: pictureEffectsSchema, + aspectRatio: z.number().default(1), + borderRadius: z.number().default(0), +}); + +export const customFieldSchema = z + .object({ + id: z.string().optional(), + icon: z.string().optional(), + text: z.string().optional(), + }) + .passthrough(); + +export const basicsSchema = z + .object({ + url: hrefUrlSchema, + name: z.string(), + email: z.string().email().or(z.literal("")).default(""), + phone: z.string().default(""), + picture: basicsPictureSchema, + headline: z.string().default(""), + location: z.string().default(""), + customFields: z.array(customFieldSchema).default([]), + }) + .passthrough(); + +export const metadataCssSchema = z.object({ + value: z.string().default(""), + visible: z.boolean().default(false), +}); + +export const metadataPageOptionsSchema = z.object({ + breakLine: z.boolean().default(false), + pageNumbers: z.boolean().default(false), +}); + +export const metadataPageSchema = z.object({ + format: z.enum(["a4", "letter"]).default("a4"), + margin: z.number().default(34), + options: metadataPageOptionsSchema.default({ breakLine: false, pageNumbers: false }), +}); + +export const metadataThemeSchema = z.object({ + text: z.string().default("#000000"), + primary: z.string().default("#475569"), + background: z.string().default("#ffffff"), +}); + +/** + * Your "layout" is shaped like: + * [ + * [ + * [ "summary", "profiles", ... ], // main column ids + * [ "skills", "languages" ] // sidebar column ids + * ], + * ... + * ] + */ +export const metadataLayoutSchema = z.array( + z.tuple([z.array(z.string()), z.array(z.string())]) +); + +export const metadataTypographySchema = z + .object({ + font: z.object({ + size: z.number().default(13), + family: z.string().default("IBM Plex Sans"), + subset: z.string().default("latin"), + variants: z.array(z.string()).default(["regular"]), + }), + hideIcons: z.boolean().default(false), + lineHeight: z.number().default(1.75), + underlineLinks: z.boolean().default(true), + }) + .passthrough(); + +export const metadataSchema = z + .object({ + css: metadataCssSchema, + page: metadataPageSchema, + notes: z.string().default(""), + theme: metadataThemeSchema, + layout: metadataLayoutSchema.default([]), + template: z.string().default("onyx"), + typography: metadataTypographySchema, + }) + .passthrough(); + +/** Common section container used by most sections in your JSON */ +export const baseSectionSchema = z + .object({ + id: z.string(), + name: z.string(), + columns: z.number().default(1), + visible: z.boolean().default(true), + separateLinks: z.boolean().default(true), + items: z.array(z.unknown()).default([]), + }) + .passthrough(); + +/** Item schemas (based on the items you included) */ +export const profileItemSchema = z + .object({ + id: z.string(), + url: hrefUrlSchema, + icon: z.string().default(""), + network: z.string(), + visible: z.boolean().default(true), + username: z.string().default(""), + }) + .passthrough(); + +export const skillItemSchema = z + .object({ + id: z.string(), + name: z.string(), + level: z.number().default(0), + visible: z.boolean().default(true), + keywords: z.array(z.string()).default([]), + description: z.string().default(""), + }) + .passthrough(); + +export const projectItemSchema = z + .object({ + id: z.string(), + url: hrefUrlSchema, + date: z.string().default(""), + name: z.string(), + summary: z.string().default(""), // HTML string in your data + visible: z.boolean().default(true), + keywords: z.array(z.string()).default([]), + description: z.string().default(""), + }) + .passthrough(); + +export const educationItemSchema = z + .object({ + id: z.string(), + url: hrefUrlSchema, + area: z.string().default(""), + date: z.string().default(""), + score: z.string().default(""), + summary: z.string().default(""), // HTML string + visible: z.boolean().default(true), + studyType: z.string().default(""), + institution: z.string().default(""), + }) + .passthrough(); + +export const experienceItemSchema = z + .object({ + id: z.string(), + url: hrefUrlSchema, + date: z.string().default(""), + company: z.string(), + summary: z.string().default(""), // HTML string + visible: z.boolean().default(true), + location: z.string().default(""), + position: z.string().default(""), + }) + .passthrough(); + +/** Section schemas with typed items */ +export const profilesSectionSchema = baseSectionSchema.extend({ + items: z.array(profileItemSchema).default([]), +}); + +export const skillsSectionSchema = baseSectionSchema.extend({ + items: z.array(skillItemSchema).default([]), +}); + +export const projectsSectionSchema = baseSectionSchema.extend({ + items: z.array(projectItemSchema).default([]), +}); + +export const educationSectionSchema = baseSectionSchema.extend({ + items: z.array(educationItemSchema).default([]), +}); + +export const experienceSectionSchema = baseSectionSchema.extend({ + items: z.array(experienceItemSchema).default([]), +}); + +/** + * Your "summary" section is not an items array; it carries "content". + * Keep it separate. + */ +export const summarySectionSchema = z + .object({ + id: z.string(), + name: z.string(), + columns: z.number().default(1), + content: z.string().default(""), // HTML string + visible: z.boolean().default(true), + separateLinks: z.boolean().default(true), + }) + .passthrough(); + +/** Empty-ish sections (you have them as items: []) */ +export const emptyItemsSectionSchema = baseSectionSchema.extend({ + items: z.array(z.unknown()).default([]), +}); + +/** + * Your "sections" object contains a fixed set of keys, plus `custom: {}`. + * `custom` is an object with no guaranteed structure in your sample, so passthrough. + */ +export const sectionsSchema = z + .object({ + awards: emptyItemsSectionSchema, + custom: z.object({}).passthrough().default({}), + skills: skillsSectionSchema, + summary: summarySectionSchema, + profiles: profilesSectionSchema, + projects: projectsSectionSchema, + education: educationSectionSchema, + interests: emptyItemsSectionSchema, + languages: emptyItemsSectionSchema, + volunteer: emptyItemsSectionSchema, + experience: experienceSectionSchema, + references: emptyItemsSectionSchema, + publications: emptyItemsSectionSchema, + certifications: emptyItemsSectionSchema, + }) + .passthrough(); + +/** Top-level schema matching what you pasted */ +export const myResumeJsonSchema = z + .object({ + basics: basicsSchema, + metadata: metadataSchema, + sections: sectionsSchema, + }) + .passthrough(); + +export type MyResumeJson = z.infer;