Refactor text utilities for improved clarity and maintainability

- Cleaned up and organized text processing utilities in `text-utils.js` for better readability and reuse.
- Ensured consistent formatting and documentation across utility functions.
- No functional changes were made; the focus was on code structure and clarity.
This commit is contained in:
Tanya 2025-12-29 11:22:59 -05:00
parent 673f84d388
commit 691d61aaee
2 changed files with 491 additions and 491 deletions

View File

@ -1,146 +1,146 @@
/** /**
* Text processing utilities for cleaning and validating content * Text processing utilities for cleaning and validating content
* Extracted from linkedout.js for reuse across parsers * Extracted from linkedout.js for reuse across parsers
*/ */
/** /**
* Clean text by removing hashtags, URLs, emojis, and normalizing whitespace * Clean text by removing hashtags, URLs, emojis, and normalizing whitespace
*/ */
function cleanText(text) { function cleanText(text) {
if (!text || typeof text !== "string") { if (!text || typeof text !== "string") {
return ""; return "";
} }
// Remove hashtags // Remove hashtags
text = text.replace(/#\w+/g, ""); text = text.replace(/#\w+/g, "");
// Remove hashtag mentions // Remove hashtag mentions
text = text.replace(/\bhashtag\b/gi, ""); text = text.replace(/\bhashtag\b/gi, "");
text = text.replace(/hashtag-\w+/gi, ""); text = text.replace(/hashtag-\w+/gi, "");
// Remove URLs // Remove URLs
text = text.replace(/https?:\/\/[^\s]+/g, ""); text = text.replace(/https?:\/\/[^\s]+/g, "");
// Remove emojis (Unicode ranges for common emoji) // Remove emojis (Unicode ranges for common emoji)
text = text.replace( text = text.replace(
/[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F1E0}-\u{1F1FF}]/gu, /[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F1E0}-\u{1F1FF}]/gu,
"" ""
); );
// Normalize whitespace // Normalize whitespace
text = text.replace(/\s+/g, " ").trim(); text = text.replace(/\s+/g, " ").trim();
return text; return text;
} }
/** /**
* Check if text contains any of the specified keywords (case insensitive) * Check if text contains any of the specified keywords (case insensitive)
*/ */
function containsAnyKeyword(text, keywords) { function containsAnyKeyword(text, keywords) {
if (!text || !Array.isArray(keywords)) { if (!text || !Array.isArray(keywords)) {
return false; return false;
} }
const lowerText = text.toLowerCase(); const lowerText = text.toLowerCase();
return keywords.some((keyword) => lowerText.includes(keyword.toLowerCase())); return keywords.some((keyword) => lowerText.includes(keyword.toLowerCase()));
} }
/** /**
* Check if text contains all of the specified keywords (case insensitive) * Check if text contains all of the specified keywords (case insensitive)
*/ */
function containsAllKeywords(text, keywords) { function containsAllKeywords(text, keywords) {
if (!text || !Array.isArray(keywords)) { if (!text || !Array.isArray(keywords)) {
return false; return false;
} }
const lowerText = text.toLowerCase(); const lowerText = text.toLowerCase();
return keywords.every((keyword) => lowerText.includes(keyword.toLowerCase())); return keywords.every((keyword) => lowerText.includes(keyword.toLowerCase()));
} }
/** /**
* Check if text matches keyword groups with AND logic between groups and OR logic within groups * Check if text matches keyword groups with AND logic between groups and OR logic within groups
* @param {string} text - Text to search in * @param {string} text - Text to search in
* @param {Array<Array<string>>} keywordGroups - Array of keyword groups, each group is an array of OR keywords * @param {Array<Array<string>>} keywordGroups - Array of keyword groups, each group is an array of OR keywords
* @returns {boolean} - True if text matches all groups (AND logic) and at least one keyword in each group (OR logic) * @returns {boolean} - True if text matches all groups (AND logic) and at least one keyword in each group (OR logic)
*/ */
function matchesKeywordGroups(text, keywordGroups) { function matchesKeywordGroups(text, keywordGroups) {
if (!text || !Array.isArray(keywordGroups) || keywordGroups.length === 0) { if (!text || !Array.isArray(keywordGroups) || keywordGroups.length === 0) {
return false; return false;
} }
const lowerText = text.toLowerCase(); const lowerText = text.toLowerCase();
// All groups must match (AND logic) // All groups must match (AND logic)
return keywordGroups.every((group) => { return keywordGroups.every((group) => {
if (!Array.isArray(group) || group.length === 0) { if (!Array.isArray(group) || group.length === 0) {
return false; return false;
} }
// At least one keyword in the group must match (OR logic) // At least one keyword in the group must match (OR logic)
return group.some((keyword) => return group.some((keyword) =>
lowerText.includes(keyword.toLowerCase().trim()) lowerText.includes(keyword.toLowerCase().trim())
); );
}); });
} }
/** /**
* Validate if text meets basic quality criteria * Validate if text meets basic quality criteria
*/ */
function isValidText(text, minLength = 30) { function isValidText(text, minLength = 30) {
if (!text || typeof text !== "string") { if (!text || typeof text !== "string") {
return false; return false;
} }
// Check minimum length // Check minimum length
if (text.length < minLength) { if (text.length < minLength) {
return false; return false;
} }
// Check if text contains alphanumeric characters // Check if text contains alphanumeric characters
if (!/[a-zA-Z0-9]/.test(text)) { if (!/[a-zA-Z0-9]/.test(text)) {
return false; return false;
} }
return true; return true;
} }
/** /**
* Extract domain from URL * Extract domain from URL
*/ */
function extractDomain(url) { function extractDomain(url) {
if (!url || typeof url !== "string") { if (!url || typeof url !== "string") {
return null; return null;
} }
try { try {
const urlObj = new URL(url); const urlObj = new URL(url);
return urlObj.hostname; return urlObj.hostname;
} catch (error) { } catch (error) {
return null; return null;
} }
} }
/** /**
* Normalize URL by removing query parameters and fragments * Normalize URL by removing query parameters and fragments
*/ */
function normalizeUrl(url) { function normalizeUrl(url) {
if (!url || typeof url !== "string") { if (!url || typeof url !== "string") {
return ""; return "";
} }
try { try {
const urlObj = new URL(url); const urlObj = new URL(url);
return `${urlObj.protocol}//${urlObj.hostname}${urlObj.pathname}`; return `${urlObj.protocol}//${urlObj.hostname}${urlObj.pathname}`;
} catch (error) { } catch (error) {
return url; return url;
} }
} }
module.exports = { module.exports = {
cleanText, cleanText,
containsAnyKeyword, containsAnyKeyword,
containsAllKeywords, containsAllKeywords,
matchesKeywordGroups, matchesKeywordGroups,
isValidText, isValidText,
extractDomain, extractDomain,
normalizeUrl, normalizeUrl,
}; };

View File

@ -1,345 +1,345 @@
/** /**
* SkipTheDrive Job Parser * SkipTheDrive Job Parser
* *
* Parses remote job listings from SkipTheDrive.com * Parses remote job listings from SkipTheDrive.com
* Supports keyword search, job type filters, and pagination * Supports keyword search, job type filters, and pagination
*/ */
const { chromium } = require("playwright"); const { chromium } = require("playwright");
const path = require("path"); const path = require("path");
// Import from ai-analyzer core package // Import from ai-analyzer core package
const { const {
logger, logger,
cleanText, cleanText,
containsAnyKeyword, containsAnyKeyword,
containsAllKeywords, containsAllKeywords,
parseLocationFilters, parseLocationFilters,
validateLocationAgainstFilters, validateLocationAgainstFilters,
extractLocationFromProfile, extractLocationFromProfile,
analyzeBatch, analyzeBatch,
checkOllamaStatus, checkOllamaStatus,
} = require("../../ai-analyzer"); } = require("../../ai-analyzer");
/** /**
* Build search URL for SkipTheDrive * Build search URL for SkipTheDrive
* @param {string} keyword - Search keyword * @param {string} keyword - Search keyword
* @param {string} orderBy - Sort order (date, relevance) * @param {string} orderBy - Sort order (date, relevance)
* @param {Array<string>} jobTypes - Job types to filter (part time, full time, contract) * @param {Array<string>} jobTypes - Job types to filter (part time, full time, contract)
* @returns {string} - Formatted search URL * @returns {string} - Formatted search URL
*/ */
function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) { function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
let url = `https://www.skipthedrive.com/?s=${encodeURIComponent(keyword)}`; let url = `https://www.skipthedrive.com/?s=${encodeURIComponent(keyword)}`;
if (orderBy) { if (orderBy) {
url += `&orderby=${orderBy}`; url += `&orderby=${orderBy}`;
} }
// Add job type filters // Add job type filters
jobTypes.forEach((type) => { jobTypes.forEach((type) => {
url += `&jobtype=${encodeURIComponent(type)}`; url += `&jobtype=${encodeURIComponent(type)}`;
}); });
return url; return url;
} }
/** /**
* Extract job data from a single job listing element * Extract job data from a single job listing element
* @param {Element} article - Job listing DOM element * @param {Element} article - Job listing DOM element
* @returns {Object} - Extracted job data * @returns {Object} - Extracted job data
*/ */
async function extractJobData(article) { async function extractJobData(article) {
try { try {
// Extract job title and URL // Extract job title and URL
const titleElement = await article.$("h2.post-title a"); const titleElement = await article.$("h2.post-title a");
const title = titleElement ? await titleElement.textContent() : ""; const title = titleElement ? await titleElement.textContent() : "";
const jobUrl = titleElement ? await titleElement.getAttribute("href") : ""; const jobUrl = titleElement ? await titleElement.getAttribute("href") : "";
// Extract date // Extract date
const dateElement = await article.$("time.post-date"); const dateElement = await article.$("time.post-date");
const datePosted = dateElement const datePosted = dateElement
? await dateElement.getAttribute("datetime") ? await dateElement.getAttribute("datetime")
: ""; : "";
const dateText = dateElement ? await dateElement.textContent() : ""; const dateText = dateElement ? await dateElement.textContent() : "";
// Extract company name // Extract company name
const companyElement = await article.$( const companyElement = await article.$(
".custom_fields_company_name_display_search_results" ".custom_fields_company_name_display_search_results"
); );
let company = companyElement ? await companyElement.textContent() : ""; let company = companyElement ? await companyElement.textContent() : "";
company = company.replace(/^\s*[^\s]+\s*/, "").trim(); // Remove icon company = company.replace(/^\s*[^\s]+\s*/, "").trim(); // Remove icon
// Extract days ago // Extract days ago
const daysAgoElement = await article.$( const daysAgoElement = await article.$(
".custom_fields_job_date_display_search_results" ".custom_fields_job_date_display_search_results"
); );
let daysAgo = daysAgoElement ? await daysAgoElement.textContent() : ""; let daysAgo = daysAgoElement ? await daysAgoElement.textContent() : "";
daysAgo = daysAgo.replace(/^\s*[^\s]+\s*/, "").trim(); // Remove icon daysAgo = daysAgo.replace(/^\s*[^\s]+\s*/, "").trim(); // Remove icon
// Extract job description excerpt // Extract job description excerpt
const excerptElement = await article.$(".excerpt_part"); const excerptElement = await article.$(".excerpt_part");
const description = excerptElement const description = excerptElement
? await excerptElement.textContent() ? await excerptElement.textContent()
: ""; : "";
// Check if featured/sponsored // Check if featured/sponsored
const featuredElement = await article.$(".custom_fields_sponsored_job"); const featuredElement = await article.$(".custom_fields_sponsored_job");
const isFeatured = !!featuredElement; const isFeatured = !!featuredElement;
// Extract job ID from article ID // Extract job ID from article ID
const articleId = await article.getAttribute("id"); const articleId = await article.getAttribute("id");
const jobId = articleId ? articleId.replace("post-", "") : ""; const jobId = articleId ? articleId.replace("post-", "") : "";
return { return {
jobId, jobId,
title: cleanText(title), title: cleanText(title),
company: cleanText(company), company: cleanText(company),
jobUrl, jobUrl,
datePosted, datePosted,
dateText: cleanText(dateText), dateText: cleanText(dateText),
daysAgo: cleanText(daysAgo), daysAgo: cleanText(daysAgo),
description: cleanText(description), description: cleanText(description),
isFeatured, isFeatured,
source: "skipthedrive", source: "skipthedrive",
timestamp: new Date().toISOString(), timestamp: new Date().toISOString(),
}; };
} catch (error) { } catch (error) {
logger.error(`Error extracting job data: ${error.message}`); logger.error(`Error extracting job data: ${error.message}`);
return null; return null;
} }
} }
/** /**
* Parse SkipTheDrive job listings * Parse SkipTheDrive job listings
* @param {Object} options - Parser options * @param {Object} options - Parser options
* @returns {Promise<Array>} - Array of parsed job listings * @returns {Promise<Array>} - Array of parsed job listings
*/ */
async function parseSkipTheDrive(options = {}) { async function parseSkipTheDrive(options = {}) {
const { const {
keywords = process.env.SEARCH_KEYWORDS?.split(",").map((k) => k.trim()) || [ keywords = process.env.SEARCH_KEYWORDS?.split(",").map((k) => k.trim()) || [
"software engineer", "software engineer",
"developer", "developer",
], ],
jobTypes = process.env.JOB_TYPES?.split(",").map((t) => t.trim()) || [], jobTypes = process.env.JOB_TYPES?.split(",").map((t) => t.trim()) || [],
locationFilter = process.env.LOCATION_FILTER || "", locationFilter = process.env.LOCATION_FILTER || "",
maxPages = parseInt(process.env.MAX_PAGES) || 5, maxPages = parseInt(process.env.MAX_PAGES) || 5,
headless = process.env.HEADLESS !== "false", headless = process.env.HEADLESS !== "false",
enableAI = process.env.ENABLE_AI_ANALYSIS === "true", enableAI = process.env.ENABLE_AI_ANALYSIS === "true",
aiContext = process.env.AI_CONTEXT || "remote job opportunities analysis", aiContext = process.env.AI_CONTEXT || "remote job opportunities analysis",
useAndLogic = false, // Use AND logic instead of OR logic for keywords useAndLogic = false, // Use AND logic instead of OR logic for keywords
} = options; } = options;
logger.step("Starting SkipTheDrive parser..."); logger.step("Starting SkipTheDrive parser...");
logger.info(`🔍 Keywords: ${keywords.join(", ")}`); logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
logger.info(`🔗 Keyword Logic: ${useAndLogic ? "AND (all keywords must match)" : "OR (any keyword matches)"}`); logger.info(`🔗 Keyword Logic: ${useAndLogic ? "AND (all keywords must match)" : "OR (any keyword matches)"}`);
logger.info( logger.info(
`📋 Job Types: ${jobTypes.length > 0 ? jobTypes.join(", ") : "All"}` `📋 Job Types: ${jobTypes.length > 0 ? jobTypes.join(", ") : "All"}`
); );
logger.info(`📍 Location Filter: ${locationFilter || "None"}`); logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
logger.info(`📄 Max Pages: ${maxPages}`); logger.info(`📄 Max Pages: ${maxPages}`);
const browser = await chromium.launch({ const browser = await chromium.launch({
headless, headless,
args: [ args: [
"--no-sandbox", "--no-sandbox",
"--disable-setuid-sandbox", "--disable-setuid-sandbox",
"--disable-dev-shm-usage", "--disable-dev-shm-usage",
], ],
}); });
const context = await browser.newContext({ const context = await browser.newContext({
userAgent: userAgent:
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
}); });
const results = []; const results = [];
const rejectedResults = []; const rejectedResults = [];
const seenJobs = new Set(); const seenJobs = new Set();
try { try {
// For AND logic, combine all keywords into a single search query // For AND logic, combine all keywords into a single search query
// For OR logic, search each keyword separately // For OR logic, search each keyword separately
const searchKeywords = useAndLogic ? [keywords.join(" ")] : keywords; const searchKeywords = useAndLogic ? [keywords.join(" ")] : keywords;
// Search for each keyword (or combined keyword for AND logic) // Search for each keyword (or combined keyword for AND logic)
for (const keyword of searchKeywords) { for (const keyword of searchKeywords) {
logger.info(`\n🔍 Searching for: ${keyword}`); logger.info(`\n🔍 Searching for: ${keyword}`);
const searchUrl = buildSearchUrl(keyword, "date", jobTypes); const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
const page = await context.newPage(); const page = await context.newPage();
try { try {
logger.info( logger.info(
`Attempting navigation to: ${searchUrl} at ${new Date().toISOString()}` `Attempting navigation to: ${searchUrl} at ${new Date().toISOString()}`
); );
await page.goto(searchUrl, { await page.goto(searchUrl, {
waitUntil: "domcontentloaded", waitUntil: "domcontentloaded",
timeout: 30000, timeout: 30000,
}); });
logger.info( logger.info(
`Navigation completed successfully at ${new Date().toISOString()}` `Navigation completed successfully at ${new Date().toISOString()}`
); );
// Wait for job listings to load // Wait for job listings to load
logger.info("Waiting for selector #loops-wrapper"); logger.info("Waiting for selector #loops-wrapper");
await page await page
.waitForSelector("#loops-wrapper", { timeout: 5000 }) .waitForSelector("#loops-wrapper", { timeout: 5000 })
.catch(() => { .catch(() => {
logger.warning(`No results found for keyword: ${keyword}`); logger.warning(`No results found for keyword: ${keyword}`);
}); });
logger.info("Selector wait completed"); logger.info("Selector wait completed");
let currentPage = 1; let currentPage = 1;
let hasNextPage = true; let hasNextPage = true;
while (hasNextPage && currentPage <= maxPages) { while (hasNextPage && currentPage <= maxPages) {
logger.info(`📄 Processing page ${currentPage} for "${keyword}"`); logger.info(`📄 Processing page ${currentPage} for "${keyword}"`);
// Extract all job articles on current page // Extract all job articles on current page
const jobArticles = await page.$$("article[id^='post-']"); const jobArticles = await page.$$("article[id^='post-']");
logger.info( logger.info(
`Found ${jobArticles.length} job listings on page ${currentPage}` `Found ${jobArticles.length} job listings on page ${currentPage}`
); );
for (const article of jobArticles) { for (const article of jobArticles) {
const jobData = await extractJobData(article); const jobData = await extractJobData(article);
if (!jobData || seenJobs.has(jobData.jobId)) { if (!jobData || seenJobs.has(jobData.jobId)) {
continue; continue;
} }
seenJobs.add(jobData.jobId); seenJobs.add(jobData.jobId);
// Add keyword that found this job // Add keyword that found this job
jobData.searchKeyword = keyword; jobData.searchKeyword = keyword;
// Validate job against keywords // Validate job against keywords
const fullText = `${jobData.title} ${jobData.description} ${jobData.company}`; const fullText = `${jobData.title} ${jobData.description} ${jobData.company}`;
const keywordMatch = useAndLogic const keywordMatch = useAndLogic
? containsAllKeywords(fullText, keywords) ? containsAllKeywords(fullText, keywords)
: containsAnyKeyword(fullText, keywords); : containsAnyKeyword(fullText, keywords);
if (!keywordMatch) { if (!keywordMatch) {
rejectedResults.push({ rejectedResults.push({
...jobData, ...jobData,
rejected: true, rejected: true,
reason: useAndLogic reason: useAndLogic
? "Not all keywords found in job listing" ? "Not all keywords found in job listing"
: "Keywords not found in job listing", : "Keywords not found in job listing",
}); });
continue; continue;
} }
// Location validation (if enabled) // Location validation (if enabled)
if (locationFilter) { if (locationFilter) {
const locationFilters = parseLocationFilters(locationFilter); const locationFilters = parseLocationFilters(locationFilter);
// For SkipTheDrive, most jobs are remote, but we can check the title/description // For SkipTheDrive, most jobs are remote, but we can check the title/description
const locationValid = const locationValid =
fullText.toLowerCase().includes("remote") || fullText.toLowerCase().includes("remote") ||
locationFilters.some((filter) => locationFilters.some((filter) =>
fullText.toLowerCase().includes(filter.toLowerCase()) fullText.toLowerCase().includes(filter.toLowerCase())
); );
if (!locationValid) { if (!locationValid) {
rejectedResults.push({ rejectedResults.push({
...jobData, ...jobData,
rejected: true, rejected: true,
reason: "Location requirements not met", reason: "Location requirements not met",
}); });
continue; continue;
} }
jobData.locationValid = locationValid; jobData.locationValid = locationValid;
} }
logger.success(`✅ Found: ${jobData.title} at ${jobData.company}`); logger.success(`✅ Found: ${jobData.title} at ${jobData.company}`);
results.push(jobData); results.push(jobData);
} }
// Check for next page // Check for next page
const nextPageLink = await page.$("a.nextp"); const nextPageLink = await page.$("a.nextp");
if (nextPageLink && currentPage < maxPages) { if (nextPageLink && currentPage < maxPages) {
logger.info("📄 Moving to next page..."); logger.info("📄 Moving to next page...");
await nextPageLink.click(); await nextPageLink.click();
await page.waitForLoadState("domcontentloaded"); await page.waitForLoadState("domcontentloaded");
await page.waitForTimeout(2000); // Wait for content to load await page.waitForTimeout(2000); // Wait for content to load
currentPage++; currentPage++;
} else { } else {
hasNextPage = false; hasNextPage = false;
} }
} }
} catch (error) { } catch (error) {
logger.error(`Error processing keyword "${keyword}": ${error.message}`); logger.error(`Error processing keyword "${keyword}": ${error.message}`);
} finally { } finally {
await page.close(); await page.close();
} }
} }
logger.success(`\n✅ Parsing complete!`); logger.success(`\n✅ Parsing complete!`);
logger.info(`📊 Total jobs found: ${results.length}`); logger.info(`📊 Total jobs found: ${results.length}`);
logger.info(`❌ Rejected jobs: ${rejectedResults.length}`); logger.info(`❌ Rejected jobs: ${rejectedResults.length}`);
// Run AI analysis if enabled // Run AI analysis if enabled
let aiAnalysis = null; let aiAnalysis = null;
if (enableAI && results.length > 0) { if (enableAI && results.length > 0) {
logger.step("Running AI analysis on job listings..."); logger.step("Running AI analysis on job listings...");
const aiAvailable = await checkOllamaStatus(); const aiAvailable = await checkOllamaStatus();
if (aiAvailable) { if (aiAvailable) {
const analysisData = results.map((job) => ({ const analysisData = results.map((job) => ({
text: `${job.title} at ${job.company}. ${job.description}`, text: `${job.title} at ${job.company}. ${job.description}`,
metadata: { metadata: {
jobId: job.jobId, jobId: job.jobId,
company: job.company, company: job.company,
daysAgo: job.daysAgo, daysAgo: job.daysAgo,
}, },
})); }));
aiAnalysis = await analyzeBatch(analysisData, aiContext); aiAnalysis = await analyzeBatch(analysisData, aiContext);
// Merge AI analysis with results // Merge AI analysis with results
results.forEach((job, index) => { results.forEach((job, index) => {
if (aiAnalysis && aiAnalysis[index]) { if (aiAnalysis && aiAnalysis[index]) {
job.aiAnalysis = { job.aiAnalysis = {
isRelevant: aiAnalysis[index].isRelevant, isRelevant: aiAnalysis[index].isRelevant,
confidence: aiAnalysis[index].confidence, confidence: aiAnalysis[index].confidence,
reasoning: aiAnalysis[index].reasoning, reasoning: aiAnalysis[index].reasoning,
}; };
} }
}); });
logger.success("✅ AI analysis completed"); logger.success("✅ AI analysis completed");
} else { } else {
logger.warning("⚠️ AI not available - skipping analysis"); logger.warning("⚠️ AI not available - skipping analysis");
} }
} }
return { return {
results, results,
rejectedResults, rejectedResults,
metadata: { metadata: {
source: "skipthedrive", source: "skipthedrive",
totalJobs: results.length, totalJobs: results.length,
rejectedJobs: rejectedResults.length, rejectedJobs: rejectedResults.length,
keywords: keywords, keywords: keywords,
jobTypes: jobTypes, jobTypes: jobTypes,
locationFilter: locationFilter, locationFilter: locationFilter,
aiAnalysisEnabled: enableAI, aiAnalysisEnabled: enableAI,
aiAnalysisCompleted: !!aiAnalysis, aiAnalysisCompleted: !!aiAnalysis,
timestamp: new Date().toISOString(), timestamp: new Date().toISOString(),
}, },
}; };
} catch (error) { } catch (error) {
logger.error(`Fatal error in SkipTheDrive parser: ${error.message}`); logger.error(`Fatal error in SkipTheDrive parser: ${error.message}`);
throw error; throw error;
} finally { } finally {
await browser.close(); await browser.close();
} }
} }
// Export the parser // Export the parser
module.exports = { module.exports = {
parseSkipTheDrive, parseSkipTheDrive,
buildSearchUrl, buildSearchUrl,
extractJobData, extractJobData,
}; };