Refactor text utilities for improved clarity and maintainability

- Cleaned up and organized text processing utilities in `text-utils.js` for better readability and reuse. - Ensured consistent formatting and documentation across utility functions. - No functional changes were made; the focus was on code structure and clarity.
2025-12-29 11:22:59 -05:00 · 2025-12-29 11:22:59 -05:00 · 691d61aaee
commit 691d61aaee
parent 673f84d388
2 changed files with 491 additions and 491 deletions
--- a/ai-analyzer/src/text-utils.js
+++ b/ai-analyzer/src/text-utils.js
@ -1,146 +1,146 @@
-/**
+/**
- * Text processing utilities for cleaning and validating content
+ * Text processing utilities for cleaning and validating content
- * Extracted from linkedout.js for reuse across parsers
+ * Extracted from linkedout.js for reuse across parsers
- */
+ */
-
+
-/**
+/**
- * Clean text by removing hashtags, URLs, emojis, and normalizing whitespace
+ * Clean text by removing hashtags, URLs, emojis, and normalizing whitespace
- */
+ */
-function cleanText(text) {
+function cleanText(text) {
-  if (!text || typeof text !== "string") {
+  if (!text || typeof text !== "string") {
-    return "";
+    return "";
-  }
+  }
-
+
-  // Remove hashtags
+  // Remove hashtags
-  text = text.replace(/#\w+/g, "");
+  text = text.replace(/#\w+/g, "");
-
+
-  // Remove hashtag mentions
+  // Remove hashtag mentions
-  text = text.replace(/\bhashtag\b/gi, "");
+  text = text.replace(/\bhashtag\b/gi, "");
-  text = text.replace(/hashtag-\w+/gi, "");
+  text = text.replace(/hashtag-\w+/gi, "");
-
+
-  // Remove URLs
+  // Remove URLs
-  text = text.replace(/https?:\/\/[^\s]+/g, "");
+  text = text.replace(/https?:\/\/[^\s]+/g, "");
-
+
-  // Remove emojis (Unicode ranges for common emoji)
+  // Remove emojis (Unicode ranges for common emoji)
-  text = text.replace(
+  text = text.replace(
-    /[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F1E0}-\u{1F1FF}]/gu,
+    /[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F1E0}-\u{1F1FF}]/gu,
-    ""
+    ""
-  );
+  );
-
+
-  // Normalize whitespace
+  // Normalize whitespace
-  text = text.replace(/\s+/g, " ").trim();
+  text = text.replace(/\s+/g, " ").trim();
-
+
-  return text;
+  return text;
-}
+}
-
+
-/**
+/**
- * Check if text contains any of the specified keywords (case insensitive)
+ * Check if text contains any of the specified keywords (case insensitive)
- */
+ */
-function containsAnyKeyword(text, keywords) {
+function containsAnyKeyword(text, keywords) {
-  if (!text || !Array.isArray(keywords)) {
+  if (!text || !Array.isArray(keywords)) {
-    return false;
+    return false;
-  }
+  }
-
+
-  const lowerText = text.toLowerCase();
+  const lowerText = text.toLowerCase();
-  return keywords.some((keyword) => lowerText.includes(keyword.toLowerCase()));
+  return keywords.some((keyword) => lowerText.includes(keyword.toLowerCase()));
-}
+}
-
+
-/**
+/**
- * Check if text contains all of the specified keywords (case insensitive)
+ * Check if text contains all of the specified keywords (case insensitive)
- */
+ */
-function containsAllKeywords(text, keywords) {
+function containsAllKeywords(text, keywords) {
-  if (!text || !Array.isArray(keywords)) {
+  if (!text || !Array.isArray(keywords)) {
-    return false;
+    return false;
-  }
+  }
-
+
-  const lowerText = text.toLowerCase();
+  const lowerText = text.toLowerCase();
-  return keywords.every((keyword) => lowerText.includes(keyword.toLowerCase()));
+  return keywords.every((keyword) => lowerText.includes(keyword.toLowerCase()));
-}
+}
-
+
-/**
+/**
- * Check if text matches keyword groups with AND logic between groups and OR logic within groups
+ * Check if text matches keyword groups with AND logic between groups and OR logic within groups
- * @param {string} text - Text to search in
+ * @param {string} text - Text to search in
- * @param {Array<Array<string>>} keywordGroups - Array of keyword groups, each group is an array of OR keywords
+ * @param {Array<Array<string>>} keywordGroups - Array of keyword groups, each group is an array of OR keywords
- * @returns {boolean} - True if text matches all groups (AND logic) and at least one keyword in each group (OR logic)
+ * @returns {boolean} - True if text matches all groups (AND logic) and at least one keyword in each group (OR logic)
- */
+ */
-function matchesKeywordGroups(text, keywordGroups) {
+function matchesKeywordGroups(text, keywordGroups) {
-  if (!text || !Array.isArray(keywordGroups) || keywordGroups.length === 0) {
+  if (!text || !Array.isArray(keywordGroups) || keywordGroups.length === 0) {
-    return false;
+    return false;
-  }
+  }
-
+
-  const lowerText = text.toLowerCase();
+  const lowerText = text.toLowerCase();
-  
+  
-  // All groups must match (AND logic)
+  // All groups must match (AND logic)
-  return keywordGroups.every((group) => {
+  return keywordGroups.every((group) => {
-    if (!Array.isArray(group) || group.length === 0) {
+    if (!Array.isArray(group) || group.length === 0) {
-      return false;
+      return false;
-    }
+    }
-    // At least one keyword in the group must match (OR logic)
+    // At least one keyword in the group must match (OR logic)
-    return group.some((keyword) => 
+    return group.some((keyword) => 
-      lowerText.includes(keyword.toLowerCase().trim())
+      lowerText.includes(keyword.toLowerCase().trim())
-    );
+    );
-  });
+  });
-}
+}
-
+
-/**
+/**
- * Validate if text meets basic quality criteria
+ * Validate if text meets basic quality criteria
- */
+ */
-function isValidText(text, minLength = 30) {
+function isValidText(text, minLength = 30) {
-  if (!text || typeof text !== "string") {
+  if (!text || typeof text !== "string") {
-    return false;
+    return false;
-  }
+  }
-
+
-  // Check minimum length
+  // Check minimum length
-  if (text.length < minLength) {
+  if (text.length < minLength) {
-    return false;
+    return false;
-  }
+  }
-
+
-  // Check if text contains alphanumeric characters
+  // Check if text contains alphanumeric characters
-  if (!/[a-zA-Z0-9]/.test(text)) {
+  if (!/[a-zA-Z0-9]/.test(text)) {
-    return false;
+    return false;
-  }
+  }
-
+
-  return true;
+  return true;
-}
+}
-
+
-/**
+/**
- * Extract domain from URL
+ * Extract domain from URL
- */
+ */
-function extractDomain(url) {
+function extractDomain(url) {
-  if (!url || typeof url !== "string") {
+  if (!url || typeof url !== "string") {
-    return null;
+    return null;
-  }
+  }
-
+
-  try {
+  try {
-    const urlObj = new URL(url);
+    const urlObj = new URL(url);
-    return urlObj.hostname;
+    return urlObj.hostname;
-  } catch (error) {
+  } catch (error) {
-    return null;
+    return null;
-  }
+  }
-}
+}
-
+
-/**
+/**
- * Normalize URL by removing query parameters and fragments
+ * Normalize URL by removing query parameters and fragments
- */
+ */
-function normalizeUrl(url) {
+function normalizeUrl(url) {
-  if (!url || typeof url !== "string") {
+  if (!url || typeof url !== "string") {
-    return "";
+    return "";
-  }
+  }
-
+
-  try {
+  try {
-    const urlObj = new URL(url);
+    const urlObj = new URL(url);
-    return `${urlObj.protocol}//${urlObj.hostname}${urlObj.pathname}`;
+    return `${urlObj.protocol}//${urlObj.hostname}${urlObj.pathname}`;
-  } catch (error) {
+  } catch (error) {
-    return url;
+    return url;
-  }
+  }
-}
+}
-
+
-module.exports = {
+module.exports = {
-  cleanText,
+  cleanText,
-  containsAnyKeyword,
+  containsAnyKeyword,
-  containsAllKeywords,
+  containsAllKeywords,
-  matchesKeywordGroups,
+  matchesKeywordGroups,
-  isValidText,
+  isValidText,
-  extractDomain,
+  extractDomain,
-  normalizeUrl,
+  normalizeUrl,
-};
+};
--- a/job-search-parser/parsers/skipthedrive.js
+++ b/job-search-parser/parsers/skipthedrive.js
@ -1,345 +1,345 @@
-/**
+/**
- * SkipTheDrive Job Parser
+ * SkipTheDrive Job Parser
- *
+ *
- * Parses remote job listings from SkipTheDrive.com
+ * Parses remote job listings from SkipTheDrive.com
- * Supports keyword search, job type filters, and pagination
+ * Supports keyword search, job type filters, and pagination
- */
+ */
-
+
-const { chromium } = require("playwright");
+const { chromium } = require("playwright");
-const path = require("path");
+const path = require("path");
-
+
-// Import from ai-analyzer core package
+// Import from ai-analyzer core package
-const {
+const {
-  logger,
+  logger,
-  cleanText,
+  cleanText,
-  containsAnyKeyword,
+  containsAnyKeyword,
-  containsAllKeywords,
+  containsAllKeywords,
-  parseLocationFilters,
+  parseLocationFilters,
-  validateLocationAgainstFilters,
+  validateLocationAgainstFilters,
-  extractLocationFromProfile,
+  extractLocationFromProfile,
-  analyzeBatch,
+  analyzeBatch,
-  checkOllamaStatus,
+  checkOllamaStatus,
-} = require("../../ai-analyzer");
+} = require("../../ai-analyzer");
-
+
-/**
+/**
- * Build search URL for SkipTheDrive
+ * Build search URL for SkipTheDrive
- * @param {string} keyword - Search keyword
+ * @param {string} keyword - Search keyword
- * @param {string} orderBy - Sort order (date, relevance)
+ * @param {string} orderBy - Sort order (date, relevance)
- * @param {Array<string>} jobTypes - Job types to filter (part time, full time, contract)
+ * @param {Array<string>} jobTypes - Job types to filter (part time, full time, contract)
- * @returns {string} - Formatted search URL
+ * @returns {string} - Formatted search URL
- */
+ */
-function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
+function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
-  let url = `https://www.skipthedrive.com/?s=${encodeURIComponent(keyword)}`;
+  let url = `https://www.skipthedrive.com/?s=${encodeURIComponent(keyword)}`;
-
+
-  if (orderBy) {
+  if (orderBy) {
-    url += `&orderby=${orderBy}`;
+    url += `&orderby=${orderBy}`;
-  }
+  }
-
+
-  // Add job type filters
+  // Add job type filters
-  jobTypes.forEach((type) => {
+  jobTypes.forEach((type) => {
-    url += `&jobtype=${encodeURIComponent(type)}`;
+    url += `&jobtype=${encodeURIComponent(type)}`;
-  });
+  });
-
+
-  return url;
+  return url;
-}
+}
-
+
-/**
+/**
- * Extract job data from a single job listing element
+ * Extract job data from a single job listing element
- * @param {Element} article - Job listing DOM element
+ * @param {Element} article - Job listing DOM element
- * @returns {Object} - Extracted job data
+ * @returns {Object} - Extracted job data
- */
+ */
-async function extractJobData(article) {
+async function extractJobData(article) {
-  try {
+  try {
-    // Extract job title and URL
+    // Extract job title and URL
-    const titleElement = await article.$("h2.post-title a");
+    const titleElement = await article.$("h2.post-title a");
-    const title = titleElement ? await titleElement.textContent() : "";
+    const title = titleElement ? await titleElement.textContent() : "";
-    const jobUrl = titleElement ? await titleElement.getAttribute("href") : "";
+    const jobUrl = titleElement ? await titleElement.getAttribute("href") : "";
-
+
-    // Extract date
+    // Extract date
-    const dateElement = await article.$("time.post-date");
+    const dateElement = await article.$("time.post-date");
-    const datePosted = dateElement
+    const datePosted = dateElement
-      ? await dateElement.getAttribute("datetime")
+      ? await dateElement.getAttribute("datetime")
-      : "";
+      : "";
-    const dateText = dateElement ? await dateElement.textContent() : "";
+    const dateText = dateElement ? await dateElement.textContent() : "";
-
+
-    // Extract company name
+    // Extract company name
-    const companyElement = await article.$(
+    const companyElement = await article.$(
-      ".custom_fields_company_name_display_search_results"
+      ".custom_fields_company_name_display_search_results"
-    );
+    );
-    let company = companyElement ? await companyElement.textContent() : "";
+    let company = companyElement ? await companyElement.textContent() : "";
-    company = company.replace(/^\s*[^\s]+\s*/, "").trim(); // Remove icon
+    company = company.replace(/^\s*[^\s]+\s*/, "").trim(); // Remove icon
-
+
-    // Extract days ago
+    // Extract days ago
-    const daysAgoElement = await article.$(
+    const daysAgoElement = await article.$(
-      ".custom_fields_job_date_display_search_results"
+      ".custom_fields_job_date_display_search_results"
-    );
+    );
-    let daysAgo = daysAgoElement ? await daysAgoElement.textContent() : "";
+    let daysAgo = daysAgoElement ? await daysAgoElement.textContent() : "";
-    daysAgo = daysAgo.replace(/^\s*[^\s]+\s*/, "").trim(); // Remove icon
+    daysAgo = daysAgo.replace(/^\s*[^\s]+\s*/, "").trim(); // Remove icon
-
+
-    // Extract job description excerpt
+    // Extract job description excerpt
-    const excerptElement = await article.$(".excerpt_part");
+    const excerptElement = await article.$(".excerpt_part");
-    const description = excerptElement
+    const description = excerptElement
-      ? await excerptElement.textContent()
+      ? await excerptElement.textContent()
-      : "";
+      : "";
-
+
-    // Check if featured/sponsored
+    // Check if featured/sponsored
-    const featuredElement = await article.$(".custom_fields_sponsored_job");
+    const featuredElement = await article.$(".custom_fields_sponsored_job");
-    const isFeatured = !!featuredElement;
+    const isFeatured = !!featuredElement;
-
+
-    // Extract job ID from article ID
+    // Extract job ID from article ID
-    const articleId = await article.getAttribute("id");
+    const articleId = await article.getAttribute("id");
-    const jobId = articleId ? articleId.replace("post-", "") : "";
+    const jobId = articleId ? articleId.replace("post-", "") : "";
-
+
-    return {
+    return {
-      jobId,
+      jobId,
-      title: cleanText(title),
+      title: cleanText(title),
-      company: cleanText(company),
+      company: cleanText(company),
-      jobUrl,
+      jobUrl,
-      datePosted,
+      datePosted,
-      dateText: cleanText(dateText),
+      dateText: cleanText(dateText),
-      daysAgo: cleanText(daysAgo),
+      daysAgo: cleanText(daysAgo),
-      description: cleanText(description),
+      description: cleanText(description),
-      isFeatured,
+      isFeatured,
-      source: "skipthedrive",
+      source: "skipthedrive",
-      timestamp: new Date().toISOString(),
+      timestamp: new Date().toISOString(),
-    };
+    };
-  } catch (error) {
+  } catch (error) {
-    logger.error(`Error extracting job data: ${error.message}`);
+    logger.error(`Error extracting job data: ${error.message}`);
-    return null;
+    return null;
-  }
+  }
-}
+}
-
+
-/**
+/**
- * Parse SkipTheDrive job listings
+ * Parse SkipTheDrive job listings
- * @param {Object} options - Parser options
+ * @param {Object} options - Parser options
- * @returns {Promise<Array>} - Array of parsed job listings
+ * @returns {Promise<Array>} - Array of parsed job listings
- */
+ */
-async function parseSkipTheDrive(options = {}) {
+async function parseSkipTheDrive(options = {}) {
-  const {
+  const {
-    keywords = process.env.SEARCH_KEYWORDS?.split(",").map((k) => k.trim()) || [
+    keywords = process.env.SEARCH_KEYWORDS?.split(",").map((k) => k.trim()) || [
-      "software engineer",
+      "software engineer",
-      "developer",
+      "developer",
-    ],
+    ],
-    jobTypes = process.env.JOB_TYPES?.split(",").map((t) => t.trim()) || [],
+    jobTypes = process.env.JOB_TYPES?.split(",").map((t) => t.trim()) || [],
-    locationFilter = process.env.LOCATION_FILTER || "",
+    locationFilter = process.env.LOCATION_FILTER || "",
-    maxPages = parseInt(process.env.MAX_PAGES) || 5,
+    maxPages = parseInt(process.env.MAX_PAGES) || 5,
-    headless = process.env.HEADLESS !== "false",
+    headless = process.env.HEADLESS !== "false",
-    enableAI = process.env.ENABLE_AI_ANALYSIS === "true",
+    enableAI = process.env.ENABLE_AI_ANALYSIS === "true",
-    aiContext = process.env.AI_CONTEXT || "remote job opportunities analysis",
+    aiContext = process.env.AI_CONTEXT || "remote job opportunities analysis",
-    useAndLogic = false, // Use AND logic instead of OR logic for keywords
+    useAndLogic = false, // Use AND logic instead of OR logic for keywords
-  } = options;
+  } = options;
-
+
-  logger.step("Starting SkipTheDrive parser...");
+  logger.step("Starting SkipTheDrive parser...");
-  logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
+  logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
-  logger.info(`🔗 Keyword Logic: ${useAndLogic ? "AND (all keywords must match)" : "OR (any keyword matches)"}`);
+  logger.info(`🔗 Keyword Logic: ${useAndLogic ? "AND (all keywords must match)" : "OR (any keyword matches)"}`);
-  logger.info(
+  logger.info(
-    `📋 Job Types: ${jobTypes.length > 0 ? jobTypes.join(", ") : "All"}`
+    `📋 Job Types: ${jobTypes.length > 0 ? jobTypes.join(", ") : "All"}`
-  );
+  );
-  logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
+  logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
-  logger.info(`📄 Max Pages: ${maxPages}`);
+  logger.info(`📄 Max Pages: ${maxPages}`);
-
+
-  const browser = await chromium.launch({
+  const browser = await chromium.launch({
-    headless,
+    headless,
-    args: [
+    args: [
-      "--no-sandbox",
+      "--no-sandbox",
-      "--disable-setuid-sandbox",
+      "--disable-setuid-sandbox",
-      "--disable-dev-shm-usage",
+      "--disable-dev-shm-usage",
-    ],
+    ],
-  });
+  });
-
+
-  const context = await browser.newContext({
+  const context = await browser.newContext({
-    userAgent:
+    userAgent:
-      "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
+      "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
-  });
+  });
-
+
-  const results = [];
+  const results = [];
-  const rejectedResults = [];
+  const rejectedResults = [];
-  const seenJobs = new Set();
+  const seenJobs = new Set();
-
+
-  try {
+  try {
-    // For AND logic, combine all keywords into a single search query
+    // For AND logic, combine all keywords into a single search query
-    // For OR logic, search each keyword separately
+    // For OR logic, search each keyword separately
-    const searchKeywords = useAndLogic ? [keywords.join(" ")] : keywords;
+    const searchKeywords = useAndLogic ? [keywords.join(" ")] : keywords;
-
+
-    // Search for each keyword (or combined keyword for AND logic)
+    // Search for each keyword (or combined keyword for AND logic)
-    for (const keyword of searchKeywords) {
+    for (const keyword of searchKeywords) {
-      logger.info(`\n🔍 Searching for: ${keyword}`);
+      logger.info(`\n🔍 Searching for: ${keyword}`);
-
+
-      const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
+      const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
-      const page = await context.newPage();
+      const page = await context.newPage();
-
+
-      try {
+      try {
-        logger.info(
+        logger.info(
-          `Attempting navigation to: ${searchUrl} at ${new Date().toISOString()}`
+          `Attempting navigation to: ${searchUrl} at ${new Date().toISOString()}`
-        );
+        );
-        await page.goto(searchUrl, {
+        await page.goto(searchUrl, {
-          waitUntil: "domcontentloaded",
+          waitUntil: "domcontentloaded",
-          timeout: 30000,
+          timeout: 30000,
-        });
+        });
-        logger.info(
+        logger.info(
-          `Navigation completed successfully at ${new Date().toISOString()}`
+          `Navigation completed successfully at ${new Date().toISOString()}`
-        );
+        );
-
+
-        // Wait for job listings to load
+        // Wait for job listings to load
-        logger.info("Waiting for selector #loops-wrapper");
+        logger.info("Waiting for selector #loops-wrapper");
-        await page
+        await page
-          .waitForSelector("#loops-wrapper", { timeout: 5000 })
+          .waitForSelector("#loops-wrapper", { timeout: 5000 })
-          .catch(() => {
+          .catch(() => {
-            logger.warning(`No results found for keyword: ${keyword}`);
+            logger.warning(`No results found for keyword: ${keyword}`);
-          });
+          });
-        logger.info("Selector wait completed");
+        logger.info("Selector wait completed");
-
+
-        let currentPage = 1;
+        let currentPage = 1;
-        let hasNextPage = true;
+        let hasNextPage = true;
-
+
-        while (hasNextPage && currentPage <= maxPages) {
+        while (hasNextPage && currentPage <= maxPages) {
-          logger.info(`📄 Processing page ${currentPage} for "${keyword}"`);
+          logger.info(`📄 Processing page ${currentPage} for "${keyword}"`);
-
+
-          // Extract all job articles on current page
+          // Extract all job articles on current page
-          const jobArticles = await page.$$("article[id^='post-']");
+          const jobArticles = await page.$$("article[id^='post-']");
-          logger.info(
+          logger.info(
-            `Found ${jobArticles.length} job listings on page ${currentPage}`
+            `Found ${jobArticles.length} job listings on page ${currentPage}`
-          );
+          );
-
+
-          for (const article of jobArticles) {
+          for (const article of jobArticles) {
-            const jobData = await extractJobData(article);
+            const jobData = await extractJobData(article);
-
+
-            if (!jobData || seenJobs.has(jobData.jobId)) {
+            if (!jobData || seenJobs.has(jobData.jobId)) {
-              continue;
+              continue;
-            }
+            }
-
+
-            seenJobs.add(jobData.jobId);
+            seenJobs.add(jobData.jobId);
-
+
-            // Add keyword that found this job
+            // Add keyword that found this job
-            jobData.searchKeyword = keyword;
+            jobData.searchKeyword = keyword;
-
+
-            // Validate job against keywords
+            // Validate job against keywords
-            const fullText = `${jobData.title} ${jobData.description} ${jobData.company}`;
+            const fullText = `${jobData.title} ${jobData.description} ${jobData.company}`;
-            const keywordMatch = useAndLogic 
+            const keywordMatch = useAndLogic 
-              ? containsAllKeywords(fullText, keywords)
+              ? containsAllKeywords(fullText, keywords)
-              : containsAnyKeyword(fullText, keywords);
+              : containsAnyKeyword(fullText, keywords);
-            
+            
-            if (!keywordMatch) {
+            if (!keywordMatch) {
-              rejectedResults.push({
+              rejectedResults.push({
-                ...jobData,
+                ...jobData,
-                rejected: true,
+                rejected: true,
-                reason: useAndLogic 
+                reason: useAndLogic 
-                  ? "Not all keywords found in job listing" 
+                  ? "Not all keywords found in job listing" 
-                  : "Keywords not found in job listing",
+                  : "Keywords not found in job listing",
-              });
+              });
-              continue;
+              continue;
-            }
+            }
-
+
-            // Location validation (if enabled)
+            // Location validation (if enabled)
-            if (locationFilter) {
+            if (locationFilter) {
-              const locationFilters = parseLocationFilters(locationFilter);
+              const locationFilters = parseLocationFilters(locationFilter);
-              // For SkipTheDrive, most jobs are remote, but we can check the title/description
+              // For SkipTheDrive, most jobs are remote, but we can check the title/description
-              const locationValid =
+              const locationValid =
-                fullText.toLowerCase().includes("remote") ||
+                fullText.toLowerCase().includes("remote") ||
-                locationFilters.some((filter) =>
+                locationFilters.some((filter) =>
-                  fullText.toLowerCase().includes(filter.toLowerCase())
+                  fullText.toLowerCase().includes(filter.toLowerCase())
-                );
+                );
-
+
-              if (!locationValid) {
+              if (!locationValid) {
-                rejectedResults.push({
+                rejectedResults.push({
-                  ...jobData,
+                  ...jobData,
-                  rejected: true,
+                  rejected: true,
-                  reason: "Location requirements not met",
+                  reason: "Location requirements not met",
-                });
+                });
-                continue;
+                continue;
-              }
+              }
-
+
-              jobData.locationValid = locationValid;
+              jobData.locationValid = locationValid;
-            }
+            }
-
+
-            logger.success(`✅ Found: ${jobData.title} at ${jobData.company}`);
+            logger.success(`✅ Found: ${jobData.title} at ${jobData.company}`);
-            results.push(jobData);
+            results.push(jobData);
-          }
+          }
-
+
-          // Check for next page
+          // Check for next page
-          const nextPageLink = await page.$("a.nextp");
+          const nextPageLink = await page.$("a.nextp");
-          if (nextPageLink && currentPage < maxPages) {
+          if (nextPageLink && currentPage < maxPages) {
-            logger.info("📄 Moving to next page...");
+            logger.info("📄 Moving to next page...");
-            await nextPageLink.click();
+            await nextPageLink.click();
-            await page.waitForLoadState("domcontentloaded");
+            await page.waitForLoadState("domcontentloaded");
-            await page.waitForTimeout(2000); // Wait for content to load
+            await page.waitForTimeout(2000); // Wait for content to load
-            currentPage++;
+            currentPage++;
-          } else {
+          } else {
-            hasNextPage = false;
+            hasNextPage = false;
-          }
+          }
-        }
+        }
-      } catch (error) {
+      } catch (error) {
-        logger.error(`Error processing keyword "${keyword}": ${error.message}`);
+        logger.error(`Error processing keyword "${keyword}": ${error.message}`);
-      } finally {
+      } finally {
-        await page.close();
+        await page.close();
-      }
+      }
-    }
+    }
-
+
-    logger.success(`\n✅ Parsing complete!`);
+    logger.success(`\n✅ Parsing complete!`);
-    logger.info(`📊 Total jobs found: ${results.length}`);
+    logger.info(`📊 Total jobs found: ${results.length}`);
-    logger.info(`❌ Rejected jobs: ${rejectedResults.length}`);
+    logger.info(`❌ Rejected jobs: ${rejectedResults.length}`);
-
+
-    // Run AI analysis if enabled
+    // Run AI analysis if enabled
-    let aiAnalysis = null;
+    let aiAnalysis = null;
-    if (enableAI && results.length > 0) {
+    if (enableAI && results.length > 0) {
-      logger.step("Running AI analysis on job listings...");
+      logger.step("Running AI analysis on job listings...");
-
+
-      const aiAvailable = await checkOllamaStatus();
+      const aiAvailable = await checkOllamaStatus();
-      if (aiAvailable) {
+      if (aiAvailable) {
-        const analysisData = results.map((job) => ({
+        const analysisData = results.map((job) => ({
-          text: `${job.title} at ${job.company}. ${job.description}`,
+          text: `${job.title} at ${job.company}. ${job.description}`,
-          metadata: {
+          metadata: {
-            jobId: job.jobId,
+            jobId: job.jobId,
-            company: job.company,
+            company: job.company,
-            daysAgo: job.daysAgo,
+            daysAgo: job.daysAgo,
-          },
+          },
-        }));
+        }));
-
+
-        aiAnalysis = await analyzeBatch(analysisData, aiContext);
+        aiAnalysis = await analyzeBatch(analysisData, aiContext);
-
+
-        // Merge AI analysis with results
+        // Merge AI analysis with results
-        results.forEach((job, index) => {
+        results.forEach((job, index) => {
-          if (aiAnalysis && aiAnalysis[index]) {
+          if (aiAnalysis && aiAnalysis[index]) {
-            job.aiAnalysis = {
+            job.aiAnalysis = {
-              isRelevant: aiAnalysis[index].isRelevant,
+              isRelevant: aiAnalysis[index].isRelevant,
-              confidence: aiAnalysis[index].confidence,
+              confidence: aiAnalysis[index].confidence,
-              reasoning: aiAnalysis[index].reasoning,
+              reasoning: aiAnalysis[index].reasoning,
-            };
+            };
-          }
+          }
-        });
+        });
-
+
-        logger.success("✅ AI analysis completed");
+        logger.success("✅ AI analysis completed");
-      } else {
+      } else {
-        logger.warning("⚠️ AI not available - skipping analysis");
+        logger.warning("⚠️ AI not available - skipping analysis");
-      }
+      }
-    }
+    }
-
+
-    return {
+    return {
-      results,
+      results,
-      rejectedResults,
+      rejectedResults,
-      metadata: {
+      metadata: {
-        source: "skipthedrive",
+        source: "skipthedrive",
-        totalJobs: results.length,
+        totalJobs: results.length,
-        rejectedJobs: rejectedResults.length,
+        rejectedJobs: rejectedResults.length,
-        keywords: keywords,
+        keywords: keywords,
-        jobTypes: jobTypes,
+        jobTypes: jobTypes,
-        locationFilter: locationFilter,
+        locationFilter: locationFilter,
-        aiAnalysisEnabled: enableAI,
+        aiAnalysisEnabled: enableAI,
-        aiAnalysisCompleted: !!aiAnalysis,
+        aiAnalysisCompleted: !!aiAnalysis,
-        timestamp: new Date().toISOString(),
+        timestamp: new Date().toISOString(),
-      },
+      },
-    };
+    };
-  } catch (error) {
+  } catch (error) {
-    logger.error(`Fatal error in SkipTheDrive parser: ${error.message}`);
+    logger.error(`Fatal error in SkipTheDrive parser: ${error.message}`);
-    throw error;
+    throw error;
-  } finally {
+  } finally {
-    await browser.close();
+    await browser.close();
-  }
+  }
-}
+}
-
+
-// Export the parser
+// Export the parser
-module.exports = {
+module.exports = {
-  parseSkipTheDrive,
+  parseSkipTheDrive,
-  buildSearchUrl,
+  buildSearchUrl,
-  extractJobData,
+  extractJobData,
-};
+};