initial commit into hardening

This commit is contained in:
DaKheera47 2026-01-21 02:03:29 +00:00
parent 5ac6ca3b24
commit 3d692f2f8b
3 changed files with 602 additions and 37 deletions

View File

@ -0,0 +1,241 @@
/**
* Tests for scorer.ts - focusing on robust JSON parsing from AI responses
*/
import { describe, it, expect } from 'vitest';
import { parseJsonFromContent } from './scorer.js';
describe('parseJsonFromContent', () => {
describe('valid JSON inputs', () => {
it('should parse clean JSON object', () => {
const input = '{"score": 85, "reason": "Great match"}';
const result = parseJsonFromContent(input);
expect(result.score).toBe(85);
expect(result.reason).toBe('Great match');
});
it('should parse JSON with extra whitespace', () => {
const input = ' { "score" : 75 , "reason" : "Good fit" } ';
const result = parseJsonFromContent(input);
expect(result.score).toBe(75);
expect(result.reason).toBe('Good fit');
});
it('should parse JSON with newlines', () => {
const input = `{
"score": 90,
"reason": "Excellent match for the role"
}`;
const result = parseJsonFromContent(input);
expect(result.score).toBe(90);
expect(result.reason).toBe('Excellent match for the role');
});
});
describe('markdown code fences', () => {
it('should strip ```json code fences', () => {
const input = '```json\n{"score": 80, "reason": "Match"}\n```';
const result = parseJsonFromContent(input);
expect(result.score).toBe(80);
});
it('should strip ```JSON code fences (uppercase)', () => {
const input = '```JSON\n{"score": 80, "reason": "Match"}\n```';
const result = parseJsonFromContent(input);
expect(result.score).toBe(80);
});
it('should strip ``` code fences without language specifier', () => {
const input = '```\n{"score": 70, "reason": "Decent"}\n```';
const result = parseJsonFromContent(input);
expect(result.score).toBe(70);
});
it('should handle nested code fence patterns', () => {
const input = 'Here is the score:\n```json\n{"score": 65, "reason": "Partial match"}\n```\nEnd.';
const result = parseJsonFromContent(input);
expect(result.score).toBe(65);
});
});
describe('surrounding text', () => {
it('should extract JSON from text before', () => {
const input = 'Based on my analysis, here is my evaluation: {"score": 55, "reason": "Limited match"}';
const result = parseJsonFromContent(input);
expect(result.score).toBe(55);
});
it('should extract JSON from text after', () => {
const input = '{"score": 60, "reason": "Moderate match"} I hope this helps!';
const result = parseJsonFromContent(input);
expect(result.score).toBe(60);
});
it('should extract JSON from surrounding text on both sides', () => {
const input = 'Here is my response:\n\n{"score": 45, "reason": "Below average fit"}\n\nLet me know if you need more details.';
const result = parseJsonFromContent(input);
expect(result.score).toBe(45);
});
});
describe('common JSON formatting issues', () => {
it('should handle trailing comma before closing brace', () => {
const input = '{"score": 78, "reason": "Good skills",}';
const result = parseJsonFromContent(input);
expect(result.score).toBe(78);
});
it('should handle single quotes instead of double quotes', () => {
const input = "{'score': 82, 'reason': 'Strong candidate'}";
const result = parseJsonFromContent(input);
expect(result.score).toBe(82);
});
it('should handle unquoted keys', () => {
const input = '{score: 77, reason: "Reasonable match"}';
const result = parseJsonFromContent(input);
expect(result.score).toBe(77);
});
it('should handle mixed issues (trailing comma, single quotes)', () => {
const input = "{'score': 68, 'reason': 'Average fit',}";
const result = parseJsonFromContent(input);
expect(result.score).toBe(68);
});
});
describe('decimal scores', () => {
it('should parse and round decimal scores', () => {
// parseJsonFromContent returns raw value for valid JSON; rounding only in regex fallback
const input = '{"score": 85.7, "reason": "Very good match"}';
const result = parseJsonFromContent(input);
expect(result.score).toBe(85.7);
});
it('should parse decimal scores in malformed text', () => {
const input = 'The score is score: 72.3, reason: "Above average"';
const result = parseJsonFromContent(input);
expect(result.score).toBe(72);
});
});
describe('malformed responses - regex fallback', () => {
it('should extract score from completely malformed response', () => {
const input = 'I think the score should be score: 50 and the reason: "Average candidate"';
const result = parseJsonFromContent(input);
expect(result.score).toBe(50);
});
it('should extract score with equals sign syntax', () => {
const input = 'score = 88, reason = "Excellent match"';
const result = parseJsonFromContent(input);
expect(result.score).toBe(88);
});
it('should handle reason with special characters', () => {
const input = '{"score": 73, "reason": "Good match! The candidate\'s skills align well."}';
const result = parseJsonFromContent(input);
expect(result.score).toBe(73);
});
it('should provide default reason when only score is extractable', () => {
const input = 'I rate this candidate 85 out of 100 - score: 85';
const result = parseJsonFromContent(input);
expect(result.score).toBe(85);
expect(result.reason).toBeDefined();
});
});
describe('edge cases', () => {
it('should handle zero score', () => {
const input = '{"score": 0, "reason": "No match at all"}';
const result = parseJsonFromContent(input);
expect(result.score).toBe(0);
});
it('should handle score of 100', () => {
const input = '{"score": 100, "reason": "Perfect candidate"}';
const result = parseJsonFromContent(input);
expect(result.score).toBe(100);
});
it('should handle empty reason', () => {
const input = '{"score": 50, "reason": ""}';
const result = parseJsonFromContent(input);
expect(result.score).toBe(50);
expect(result.reason).toBe('');
});
it('should handle multiline reason', () => {
const input = `{"score": 70, "reason": "Good skills match. Experience is a bit lacking."}`;
const result = parseJsonFromContent(input);
expect(result.score).toBe(70);
expect(result.reason).toContain('Good skills match');
});
it('should handle unicode in reason', () => {
const input = '{"score": 80, "reason": "Great match ✓ for this role"}';
const result = parseJsonFromContent(input);
expect(result.score).toBe(80);
});
});
describe('failure cases', () => {
it('should throw when no score can be extracted', () => {
const input = 'This is just plain text with no JSON or score.';
expect(() => parseJsonFromContent(input)).toThrow('Unable to parse JSON from model response');
});
it('should throw for empty input', () => {
expect(() => parseJsonFromContent('')).toThrow('Unable to parse JSON from model response');
});
it('should throw for only whitespace', () => {
expect(() => parseJsonFromContent(' \n\t ')).toThrow('Unable to parse JSON from model response');
});
});
describe('real-world AI responses', () => {
it('should handle GPT-style verbose response', () => {
const input = `Based on my analysis of the job description and candidate profile, I have evaluated the fit:
\`\`\`json
{
"score": 72,
"reason": "Strong React and TypeScript skills match. However, the role requires 5+ years experience which the candidate may not have."
}
\`\`\`
This score reflects the candidate's technical capabilities while accounting for the experience gap.`;
const result = parseJsonFromContent(input);
expect(result.score).toBe(72);
expect(result.reason).toContain('React and TypeScript');
});
it('should handle Claude-style response with thinking', () => {
const input = `Let me evaluate this candidate against the job requirements.
{"score": 83, "reason": "Excellent frontend skills with React and modern tooling. Good culture fit based on startup experience."}`;
const result = parseJsonFromContent(input);
expect(result.score).toBe(83);
});
it('should handle response with JSON5-style comments', () => {
// Some models output JSON5-like syntax with comments
const input = `{
"score": 67, // Good but not great
"reason": "Matches most requirements but lacks cloud experience"
}`;
// This will fail standard parse but regex should catch it
const result = parseJsonFromContent(input);
expect(result.score).toBe(67);
});
it('should handle response with extra properties', () => {
const input = '{"score": 79, "reason": "Good match", "confidence": "high", "breakdown": {"skills": 25, "experience": 20}}';
const result = parseJsonFromContent(input);
expect(result.score).toBe(79);
expect(result.reason).toBe('Good match');
});
});
});

View File

@ -29,9 +29,9 @@ export async function scoreJobSuitability(
const overrideModelScorer = await getSetting('modelScorer'); const overrideModelScorer = await getSetting('modelScorer');
// Precedence: Scorer-specific override > Global override > Env var > Default // Precedence: Scorer-specific override > Global override > Env var > Default
const model = overrideModelScorer || overrideModel || process.env.MODEL || 'openai/gpt-4o-mini'; const model = overrideModelScorer || overrideModel || process.env.MODEL || 'openai/gpt-4o-mini';
const prompt = buildScoringPrompt(job, profile); const prompt = buildScoringPrompt(job, profile);
try { try {
const response = await fetch(OPENROUTER_API_URL, { const response = await fetch(OPENROUTER_API_URL, {
method: 'POST', method: 'POST',
@ -47,19 +47,20 @@ export async function scoreJobSuitability(
response_format: { type: 'json_object' }, response_format: { type: 'json_object' },
}), }),
}); });
if (!response.ok) { if (!response.ok) {
throw new Error(`OpenRouter error: ${response.status}`); throw new Error(`OpenRouter error: ${response.status}`);
} }
const data = await response.json(); const data = await response.json();
const content = data.choices[0]?.message?.content; const content = data.choices[0]?.message?.content;
if (!content) { if (!content) {
throw new Error('No content in response'); throw new Error('No content in response');
} }
const parsed = parseJsonFromContent(content); // Log raw response for debugging when issues occur
const parsed = parseJsonFromContent(content, job.id);
return { return {
score: Math.min(100, Math.max(0, parsed.score || 0)), score: Math.min(100, Math.max(0, parsed.score || 0)),
reason: parsed.reason || 'No explanation provided', reason: parsed.reason || 'No explanation provided',
@ -70,39 +71,98 @@ export async function scoreJobSuitability(
} }
} }
function parseJsonFromContent(content: string): { score?: number; reason?: string } { /**
const trimmed = content.trim(); * Robustly parse JSON from AI-generated content.
const withoutFences = trimmed.replace(/```(?:json)?\s*|```/gi, '').trim(); * Handles common AI quirks: markdown fences, extra text, trailing commas, etc.
const candidate = withoutFences; */
export function parseJsonFromContent(content: string, jobId?: string): { score?: number; reason?: string } {
const originalContent = content;
let candidate = content.trim();
// Step 1: Remove markdown code fences (with or without language specifier)
candidate = candidate.replace(/```(?:json|JSON)?\s*/g, '').replace(/```/g, '').trim();
// Step 2: Try to extract JSON object if there's surrounding text
const jsonMatch = candidate.match(/\{[\s\S]*\}/);
if (jsonMatch) {
candidate = jsonMatch[0];
}
// Step 3: Try direct parse first
try { try {
return JSON.parse(candidate); return JSON.parse(candidate);
} catch { } catch {
const firstBrace = candidate.indexOf('{'); // Continue with sanitization
const lastBrace = candidate.lastIndexOf('}');
if (firstBrace !== -1 && lastBrace !== -1 && lastBrace > firstBrace) {
const sliced = candidate.slice(firstBrace, lastBrace + 1);
return JSON.parse(sliced);
}
throw new Error('Unable to parse JSON from model response');
} }
// Step 4: Fix common JSON issues
let sanitized = candidate;
// Remove JavaScript-style comments (// and /* */)
sanitized = sanitized.replace(/\/\/[^\n]*/g, '');
sanitized = sanitized.replace(/\/\*[\s\S]*?\*\//g, '');
// Remove trailing commas before } or ]
sanitized = sanitized.replace(/,\s*([\]}])/g, '$1');
// Fix unquoted keys: word: -> "word":
// Be more careful - only match at start of object or after comma
sanitized = sanitized.replace(/([{,]\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*:/g, '$1"$2":');
// Fix single quotes to double quotes
sanitized = sanitized.replace(/'/g, '"');
// Remove ALL control characters (including newlines/tabs INSIDE string values which break JSON)
// First, let's normalize the string - escape actual newlines inside strings
sanitized = sanitized.replace(/[\x00-\x1F\x7F]/g, (match) => {
if (match === '\n') return '\\n';
if (match === '\r') return '\\r';
if (match === '\t') return '\\t';
return '';
});
// Step 5: Try parsing the sanitized version
try {
return JSON.parse(sanitized);
} catch {
// Continue with more aggressive extraction
}
// Step 6: Even more aggressive - try to rebuild a minimal valid JSON
// by extracting just the score and reason values
const scoreMatch = originalContent.match(/["']?score["']?\s*[:=]\s*(\d+(?:\.\d+)?)/i);
const reasonMatch = originalContent.match(/["']?reason["']?\s*[:=]\s*["']([^"'\n]+)["']/i) ||
originalContent.match(/["']?reason["']?\s*[:=]\s*["']?(.*?)["']?\s*[,}\n]/is);
if (scoreMatch) {
const score = Math.round(parseFloat(scoreMatch[1]));
const reason = reasonMatch ? reasonMatch[1].trim().replace(/[\x00-\x1F\x7F]/g, '') : 'Score extracted from malformed response';
console.log(`⚠️ [Job ${jobId || 'unknown'}] Parsed score via regex fallback: ${score}`);
return { score, reason };
}
// Log the failure with full content for debugging
console.error(`❌ [Job ${jobId || 'unknown'}] Failed to parse AI response. Raw content (first 500 chars):`,
originalContent.substring(0, 500));
console.error(` Sanitized content (first 500 chars):`, sanitized.substring(0, 500));
throw new Error('Unable to parse JSON from model response');
} }
function buildScoringPrompt(job: Job, profile: Record<string, unknown>): string { function buildScoringPrompt(job: Job, profile: Record<string, unknown>): string {
return ` return `You are evaluating a job listing for a candidate. Score how suitable this job is for the candidate on a scale of 0-100.
You are evaluating a job listing for a candidate. Score how suitable this job is for the candidate on a scale of 0-100.
Consider: SCORING CRITERIA:
- Skills match (technologies, frameworks, languages) - Skills match (technologies, frameworks, languages): 0-30 points
- Experience level match - Experience level match: 0-25 points
- Location/remote work alignment - Location/remote work alignment: 0-15 points
- Industry/domain fit - Industry/domain fit: 0-15 points
- Career growth potential - Career growth potential: 0-15 points
Candidate Profile: CANDIDATE PROFILE:
${JSON.stringify(profile, null, 2)} ${JSON.stringify(profile, null, 2)}
Job Listing: JOB LISTING:
Title: ${job.title} Title: ${job.title}
Employer: ${job.employer} Employer: ${job.employer}
Location: ${job.location || 'Not specified'} Location: ${job.location || 'Not specified'}
@ -110,33 +170,39 @@ Salary: ${job.salary || 'Not specified'}
Degree Required: ${job.degreeRequired || 'Not specified'} Degree Required: ${job.degreeRequired || 'Not specified'}
Disciplines: ${job.disciplines || 'Not specified'} Disciplines: ${job.disciplines || 'Not specified'}
Job Description: JOB DESCRIPTION:
${job.jobDescription || 'No description available'} ${job.jobDescription || 'No description available'}
Respond with JSON only (no code fences): { "score": <0-100>, "reason": "<brief explanation>" } IMPORTANT: Respond with ONLY a valid JSON object. No markdown, no code fences, no explanation outside the JSON.
`;
REQUIRED FORMAT (exactly this structure):
{"score": <integer 0-100>, "reason": "<1-2 sentence explanation>"}
EXAMPLE VALID RESPONSE:
{"score": 75, "reason": "Strong skills match with React and TypeScript requirements, but position requires 3+ years experience."}`;
} }
function mockScore(job: Job): SuitabilityResult { function mockScore(job: Job): SuitabilityResult {
// Simple keyword-based scoring as fallback // Simple keyword-based scoring as fallback
const jd = (job.jobDescription || '').toLowerCase(); const jd = (job.jobDescription || '').toLowerCase();
const title = job.title.toLowerCase(); const title = job.title.toLowerCase();
const goodKeywords = ['typescript', 'react', 'node', 'python', 'web', 'frontend', 'backend', 'fullstack', 'software', 'engineer', 'developer']; const goodKeywords = ['typescript', 'react', 'node', 'python', 'web', 'frontend', 'backend', 'fullstack', 'software', 'engineer', 'developer'];
const badKeywords = ['senior', '5+ years', '10+ years', 'principal', 'staff', 'manager']; const badKeywords = ['senior', '5+ years', '10+ years', 'principal', 'staff', 'manager'];
let score = 50; let score = 50;
for (const kw of goodKeywords) { for (const kw of goodKeywords) {
if (jd.includes(kw) || title.includes(kw)) score += 5; if (jd.includes(kw) || title.includes(kw)) score += 5;
} }
for (const kw of badKeywords) { for (const kw of badKeywords) {
if (jd.includes(kw) || title.includes(kw)) score -= 10; if (jd.includes(kw) || title.includes(kw)) score -= 10;
} }
score = Math.min(100, Math.max(0, score)); score = Math.min(100, Math.max(0, score));
return { return {
score, score,
reason: 'Scored using keyword matching (API key not configured)', reason: 'Scored using keyword matching (API key not configured)',
@ -160,6 +226,6 @@ export async function scoreAndRankJobs(
}; };
}) })
); );
return scoredJobs.sort((a, b) => b.suitabilityScore - a.suitabilityScore); return scoredJobs.sort((a, b) => b.suitabilityScore - a.suitabilityScore);
} }

View File

@ -0,0 +1,258 @@
import { z } from "zod";
/**
* Schema matching the JSON you pasted (the "visible"/"summary"/"date"/"href" format).
* This is intentionally permissive (passthrough) so small future additions won't break parsing.
*/
export const hrefUrlSchema = z.object({
href: z.string().default(""),
label: z.string().default(""),
});
export const pictureEffectsSchema = z.object({
border: z.boolean().default(false),
hidden: z.boolean().default(false),
grayscale: z.boolean().default(false),
});
export const basicsPictureSchema = z.object({
url: z.string().default(""),
size: z.number().default(120),
effects: pictureEffectsSchema,
aspectRatio: z.number().default(1),
borderRadius: z.number().default(0),
});
export const customFieldSchema = z
.object({
id: z.string().optional(),
icon: z.string().optional(),
text: z.string().optional(),
})
.passthrough();
export const basicsSchema = z
.object({
url: hrefUrlSchema,
name: z.string(),
email: z.string().email().or(z.literal("")).default(""),
phone: z.string().default(""),
picture: basicsPictureSchema,
headline: z.string().default(""),
location: z.string().default(""),
customFields: z.array(customFieldSchema).default([]),
})
.passthrough();
export const metadataCssSchema = z.object({
value: z.string().default(""),
visible: z.boolean().default(false),
});
export const metadataPageOptionsSchema = z.object({
breakLine: z.boolean().default(false),
pageNumbers: z.boolean().default(false),
});
export const metadataPageSchema = z.object({
format: z.enum(["a4", "letter"]).default("a4"),
margin: z.number().default(34),
options: metadataPageOptionsSchema.default({ breakLine: false, pageNumbers: false }),
});
export const metadataThemeSchema = z.object({
text: z.string().default("#000000"),
primary: z.string().default("#475569"),
background: z.string().default("#ffffff"),
});
/**
* Your "layout" is shaped like:
* [
* [
* [ "summary", "profiles", ... ], // main column ids
* [ "skills", "languages" ] // sidebar column ids
* ],
* ...
* ]
*/
export const metadataLayoutSchema = z.array(
z.tuple([z.array(z.string()), z.array(z.string())])
);
export const metadataTypographySchema = z
.object({
font: z.object({
size: z.number().default(13),
family: z.string().default("IBM Plex Sans"),
subset: z.string().default("latin"),
variants: z.array(z.string()).default(["regular"]),
}),
hideIcons: z.boolean().default(false),
lineHeight: z.number().default(1.75),
underlineLinks: z.boolean().default(true),
})
.passthrough();
export const metadataSchema = z
.object({
css: metadataCssSchema,
page: metadataPageSchema,
notes: z.string().default(""),
theme: metadataThemeSchema,
layout: metadataLayoutSchema.default([]),
template: z.string().default("onyx"),
typography: metadataTypographySchema,
})
.passthrough();
/** Common section container used by most sections in your JSON */
export const baseSectionSchema = z
.object({
id: z.string(),
name: z.string(),
columns: z.number().default(1),
visible: z.boolean().default(true),
separateLinks: z.boolean().default(true),
items: z.array(z.unknown()).default([]),
})
.passthrough();
/** Item schemas (based on the items you included) */
export const profileItemSchema = z
.object({
id: z.string(),
url: hrefUrlSchema,
icon: z.string().default(""),
network: z.string(),
visible: z.boolean().default(true),
username: z.string().default(""),
})
.passthrough();
export const skillItemSchema = z
.object({
id: z.string(),
name: z.string(),
level: z.number().default(0),
visible: z.boolean().default(true),
keywords: z.array(z.string()).default([]),
description: z.string().default(""),
})
.passthrough();
export const projectItemSchema = z
.object({
id: z.string(),
url: hrefUrlSchema,
date: z.string().default(""),
name: z.string(),
summary: z.string().default(""), // HTML string in your data
visible: z.boolean().default(true),
keywords: z.array(z.string()).default([]),
description: z.string().default(""),
})
.passthrough();
export const educationItemSchema = z
.object({
id: z.string(),
url: hrefUrlSchema,
area: z.string().default(""),
date: z.string().default(""),
score: z.string().default(""),
summary: z.string().default(""), // HTML string
visible: z.boolean().default(true),
studyType: z.string().default(""),
institution: z.string().default(""),
})
.passthrough();
export const experienceItemSchema = z
.object({
id: z.string(),
url: hrefUrlSchema,
date: z.string().default(""),
company: z.string(),
summary: z.string().default(""), // HTML string
visible: z.boolean().default(true),
location: z.string().default(""),
position: z.string().default(""),
})
.passthrough();
/** Section schemas with typed items */
export const profilesSectionSchema = baseSectionSchema.extend({
items: z.array(profileItemSchema).default([]),
});
export const skillsSectionSchema = baseSectionSchema.extend({
items: z.array(skillItemSchema).default([]),
});
export const projectsSectionSchema = baseSectionSchema.extend({
items: z.array(projectItemSchema).default([]),
});
export const educationSectionSchema = baseSectionSchema.extend({
items: z.array(educationItemSchema).default([]),
});
export const experienceSectionSchema = baseSectionSchema.extend({
items: z.array(experienceItemSchema).default([]),
});
/**
* Your "summary" section is not an items array; it carries "content".
* Keep it separate.
*/
export const summarySectionSchema = z
.object({
id: z.string(),
name: z.string(),
columns: z.number().default(1),
content: z.string().default(""), // HTML string
visible: z.boolean().default(true),
separateLinks: z.boolean().default(true),
})
.passthrough();
/** Empty-ish sections (you have them as items: []) */
export const emptyItemsSectionSchema = baseSectionSchema.extend({
items: z.array(z.unknown()).default([]),
});
/**
* Your "sections" object contains a fixed set of keys, plus `custom: {}`.
* `custom` is an object with no guaranteed structure in your sample, so passthrough.
*/
export const sectionsSchema = z
.object({
awards: emptyItemsSectionSchema,
custom: z.object({}).passthrough().default({}),
skills: skillsSectionSchema,
summary: summarySectionSchema,
profiles: profilesSectionSchema,
projects: projectsSectionSchema,
education: educationSectionSchema,
interests: emptyItemsSectionSchema,
languages: emptyItemsSectionSchema,
volunteer: emptyItemsSectionSchema,
experience: experienceSectionSchema,
references: emptyItemsSectionSchema,
publications: emptyItemsSectionSchema,
certifications: emptyItemsSectionSchema,
})
.passthrough();
/** Top-level schema matching what you pasted */
export const myResumeJsonSchema = z
.object({
basics: basicsSchema,
metadata: metadataSchema,
sections: sectionsSchema,
})
.passthrough();
export type MyResumeJson = z.infer<typeof myResumeJsonSchema>;