Enhance job search parser with LinkedIn strategy and configuration updates

- Added LinkedIn jobs parsing strategy to support job extraction from LinkedIn. - Updated job search parser to include new site strategy and improved argument parsing for max pages and exclusion of rejected results. - Enhanced README documentation to reflect new features and usage examples. - Refactored existing strategies for consistency and improved error handling.
2025-12-16 23:17:12 -05:00 · 2025-12-16 23:17:12 -05:00 · 4099b23744
commit 4099b23744
parent bbfd3c84aa
8 changed files with 2431 additions and 888 deletions
--- a/core-parser/index.js
+++ b/core-parser/index.js
@ -62,3 +62,5 @@ class CoreParser {
 module.exports = CoreParser;
--- a/job-search-parser/README.md
+++ b/job-search-parser/README.md
--- a/job-search-parser/index.js
+++ b/job-search-parser/index.js
@ -10,6 +10,7 @@ const path = require("path");
 const fs = require("fs");
 const CoreParser = require("../core-parser");
 const { skipthedriveStrategy } = require("./strategies/skipthedrive-strategy");
 const { linkedinJobsStrategy } = require("./strategies/linkedin-jobs-strategy");
 const { logger, analyzeBatch, checkOllamaStatus } = require("ai-analyzer");
 // Load environment variables
@ -18,14 +19,16 @@ require("dotenv").config({ path: path.join(__dirname, ".env") });
 // Configuration from environment
 const HEADLESS = process.env.HEADLESS !== "false";
 const SEARCH_KEYWORDS =
-  process.env.SEARCH_KEYWORDS || "software engineer,developer,programmer";
+  process.env.SEARCH_KEYWORDS || "co-op,intern";//"software engineer,developer,programmer";
 const LOCATION_FILTER = process.env.LOCATION_FILTER;
 const ENABLE_AI_ANALYSIS = process.env.ENABLE_AI_ANALYSIS === "true";
 const MAX_PAGES = parseInt(process.env.MAX_PAGES) || 5;
 const EXCLUDE_REJECTED = process.env.EXCLUDE_REJECTED === "true";
 // Available site strategies
 const SITE_STRATEGIES = {
  skipthedrive: skipthedriveStrategy,
  linkedin: linkedinJobsStrategy,
  // Add more site strategies here
  // indeed: indeedStrategy,
  // glassdoor: glassdoorStrategy,
@ -41,6 +44,7 @@ function parseArguments() {
    keywords: null,
    locationFilter: null,
    maxPages: MAX_PAGES,
    excludeRejected: EXCLUDE_REJECTED,
  };
  args.forEach((arg) => {
@ -57,7 +61,15 @@ function parseArguments() {
    } else if (arg.startsWith("--location=")) {
      options.locationFilter = arg.split("=")[1];
    } else if (arg.startsWith("--max-pages=")) {
-      options.maxPages = parseInt(arg.split("=")[1]) || MAX_PAGES;
+      const value = arg.split("=")[1];
      // Support "all" or "0" to mean unlimited pages
      if (value === "all" || value === "0") {
        options.maxPages = 0; // 0 means unlimited
      } else {
        options.maxPages = parseInt(value) || MAX_PAGES;
      }
    } else if (arg === "--no-rejected" || arg === "--exclude-rejected") {
      options.excludeRejected = true;
    }
  });
@ -84,6 +96,7 @@ async function startJobSearchParser(options = {}) {
      finalOptions.keywords || SEARCH_KEYWORDS.split(",").map((k) => k.trim());
    const locationFilter = finalOptions.locationFilter || LOCATION_FILTER;
    const sites = finalOptions.sites;
    const excludeRejected = finalOptions.excludeRejected !== undefined ? finalOptions.excludeRejected : EXCLUDE_REJECTED;
    logger.info(`📦 Selected job sites: ${sites.join(", ")}`);
    logger.info(`🔍 Search Keywords: ${keywords.join(", ")}`);
@ -108,18 +121,46 @@ async function startJobSearchParser(options = {}) {
        logger.step(`\n🌐 Parsing ${site}...`);
        const startTime = Date.now();
-        const parseResult = await strategy(coreParser, {
+        // Prepare strategy options
        const strategyOptions = {
          keywords,
          locationFilter,
          maxPages: finalOptions.maxPages,
-        });
+        };
        // Add credentials for LinkedIn
        if (site === "linkedin") {
          const LINKEDIN_USERNAME = process.env.LINKEDIN_USERNAME;
          const LINKEDIN_PASSWORD = process.env.LINKEDIN_PASSWORD;
          if (!LINKEDIN_USERNAME || !LINKEDIN_PASSWORD) {
            logger.error(`❌ LinkedIn credentials not found. Please set LINKEDIN_USERNAME and LINKEDIN_PASSWORD in .env file`);
            siteResults[site] = {
              count: 0,
              rejected: 0,
              duration: "0s",
              error: "LinkedIn credentials not found",
            };
            continue;
          }
          strategyOptions.credentials = {
            username: LINKEDIN_USERNAME,
            password: LINKEDIN_PASSWORD,
          };
          strategyOptions.location = process.env.LINKEDIN_JOB_LOCATION || "";
        }
        const parseResult = await strategy(coreParser, strategyOptions);
        const { results, rejectedResults, summary } = parseResult;
        const duration = ((Date.now() - startTime) / 1000).toFixed(2);
        // Collect results
        logger.info(`📦 Strategy returned: ${results.length} results, ${rejectedResults.length} rejected`);
        allResults.push(...results);
        allRejectedResults.push(...rejectedResults);
        logger.info(`📦 Total accumulated: ${allResults.length} results, ${allRejectedResults.length} rejected`);
        siteResults[site] = {
          count: results.length,
@ -162,6 +203,9 @@ async function startJobSearchParser(options = {}) {
    }
    // Save results
    logger.info(`💾 Preparing to save: ${allResults.length} results, ${allRejectedResults.length} rejected`);
    logger.info(`💾 EXCLUDE_REJECTED env: ${process.env.EXCLUDE_REJECTED}, excludeRejected variable: ${excludeRejected}`);
    const outputData = {
      metadata: {
        extractedAt: new Date().toISOString(),
@ -171,11 +215,21 @@ async function startJobSearchParser(options = {}) {
        keywords: keywords.join(", "),
        locationFilter,
        analysisResults,
        rejectedJobsExcluded: excludeRejected,
      },
      results: allResults,
      rejectedResults: allRejectedResults,
      siteResults,
    };
    // Always include rejectedResults if not excluded (make it explicit, not using spread)
    if (!excludeRejected) {
      outputData.rejectedResults = allRejectedResults;
      logger.info(`✅ Including ${allRejectedResults.length} rejected results in output`);
    } else {
      logger.info(`⏭️  Excluding rejected results (EXCLUDE_REJECTED=true)`);
    }
    logger.info(`💾 Final output: ${outputData.results.length} results, ${outputData.rejectedResults?.length || 0} rejected`);
    const resultsDir = path.join(__dirname, "results");
    if (!fs.existsSync(resultsDir)) {
--- a/job-search-parser/strategies/linkedin-jobs-strategy.js
+++ b/job-search-parser/strategies/linkedin-jobs-strategy.js
--- a/job-search-parser/strategies/skipthedrive-strategy.js
+++ b/job-search-parser/strategies/skipthedrive-strategy.js
@ -1,302 +1,299 @@
-/**
+/**
- * SkipTheDrive Parsing Strategy
+ * SkipTheDrive Parsing Strategy
- *
+ *
- * Uses core-parser for browser management and ai-analyzer for utilities
+ * Uses core-parser for browser management and ai-analyzer for utilities
- */
+ */
-
+
-const {
+const {
-  logger,
+  logger,
-  cleanText,
+  cleanText,
-  containsAnyKeyword,
+  containsAnyKeyword,
-  validateLocationAgainstFilters,
+  validateLocationAgainstFilters,
-} = require("ai-analyzer");
+} = require("ai-analyzer");
-
+
-/**
+/**
- * SkipTheDrive URL builder
+ * SkipTheDrive URL builder
- */
+ */
-function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
+function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
-  const baseUrl = "https://www.skipthedrive.com/";
+  const baseUrl = "https://www.skipthedrive.com/";
-  const params = new URLSearchParams({
+  const params = new URLSearchParams({
-    s: keyword,
+    s: keyword,
-    orderby: orderBy,
+    orderby: orderBy,
-  });
+  });
-
+
-  if (jobTypes && jobTypes.length > 0) {
+  if (jobTypes && jobTypes.length > 0) {
-    params.append("job_type", jobTypes.join(","));
+    params.append("job_type", jobTypes.join(","));
-  }
+  }
-
+
-  return `${baseUrl}?${params.toString()}`;
+  return `${baseUrl}?${params.toString()}`;
-}
+}
-
+
-/**
+/**
- * SkipTheDrive parsing strategy function
+ * SkipTheDrive parsing strategy function
- */
+ */
-async function skipthedriveStrategy(coreParser, options = {}) {
+async function skipthedriveStrategy(coreParser, options = {}) {
-  const {
+  const {
-    keywords = ["software engineer", "developer", "programmer"],
+    keywords = ["software engineer", "developer", "programmer"],
-    locationFilter = null,
+    locationFilter = null,
-    maxPages = 5,
+    maxPages = 5,
-    jobTypes = [],
+    jobTypes = [],
-  } = options;
+  } = options;
-
+
-  const results = [];
+  const results = [];
-  const rejectedResults = [];
+  const rejectedResults = [];
-  const seenJobs = new Set();
+  const seenJobs = new Set();
-
+
-  try {
+  try {
-    // Create main page
+    // Create main page
-    const page = await coreParser.createPage("skipthedrive-main");
+    const page = await coreParser.createPage("skipthedrive-main");
-
+
-    logger.info("🚀 Starting SkipTheDrive parser...");
+    logger.info("🚀 Starting SkipTheDrive parser...");
-    logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
+    logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
-    logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
+    logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
-    logger.info(`📄 Max Pages: ${maxPages}`);
+    logger.info(`📄 Max Pages: ${maxPages}`);
-
+
-    // Search for each keyword
+    // Search for each keyword
-    for (const keyword of keywords) {
+    for (const keyword of keywords) {
-      logger.info(`\n🔍 Searching for: ${keyword}`);
+      logger.info(`\n🔍 Searching for: ${keyword}`);
-
+
-      const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
+      const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
-
+
-      try {
+      try {
-        // Navigate to search results
+        // Navigate to search results
-        await coreParser.navigateTo(searchUrl, {
+        await coreParser.navigateTo(searchUrl, {
-          pageId: "skipthedrive-main",
+          pageId: "skipthedrive-main",
-          retries: 2,
+          retries: 2,
-          timeout: 30000,
+          timeout: 30000,
-        });
+        });
-
+
-        // Wait for job listings to load
+        // Wait for job listings to load
-        const hasResults = await coreParser
+        const hasResults = await page
-          .waitForSelector(
+          .waitForSelector("#loops-wrapper", {
-            "#loops-wrapper",
+            timeout: 5000,
-            {
+          })
-              timeout: 5000,
+          .then(() => true)
-            },
+          .catch(() => {
-            "skipthedrive-main"
+            logger.warning(`No results found for keyword: ${keyword}`);
-          )
+            return false;
-          .catch(() => {
+          });
-            logger.warning(`No results found for keyword: ${keyword}`);
+
-            return false;
+        if (!hasResults) {
-          });
+          continue;
-
+        }
-        if (!hasResults) {
+
-          continue;
+        // Process multiple pages
-        }
+        let currentPage = 1;
-
+        let hasNextPage = true;
-        // Process multiple pages
+
-        let currentPage = 1;
+        while (hasNextPage && currentPage <= maxPages) {
-        let hasNextPage = true;
+          logger.info(`📄 Processing page ${currentPage} for "${keyword}"`);
-
+
-        while (hasNextPage && currentPage <= maxPages) {
+          // Extract jobs from current page
-          logger.info(`📄 Processing page ${currentPage} for "${keyword}"`);
+          const pageJobs = await extractJobsFromPage(
-
+            page,
-          // Extract jobs from current page
+            keyword,
-          const pageJobs = await extractJobsFromPage(
+            locationFilter
-            page,
+          );
-            keyword,
+
-            locationFilter
+          for (const job of pageJobs) {
-          );
+            // Skip duplicates
-
+            if (seenJobs.has(job.jobId)) continue;
-          for (const job of pageJobs) {
+            seenJobs.add(job.jobId);
-            // Skip duplicates
+
-            if (seenJobs.has(job.jobId)) continue;
+            // Validate location if filtering enabled
-            seenJobs.add(job.jobId);
+            if (locationFilter) {
-
+              const locationValid = validateLocationAgainstFilters(
-            // Validate location if filtering enabled
+                job.location,
-            if (locationFilter) {
+                locationFilter
-              const locationValid = validateLocationAgainstFilters(
+              );
-                job.location,
+
-                locationFilter
+              if (!locationValid) {
-              );
+                rejectedResults.push({
-
+                  ...job,
-              if (!locationValid) {
+                  rejectionReason: "Location filter mismatch",
-                rejectedResults.push({
+                });
-                  ...job,
+                continue;
-                  rejectionReason: "Location filter mismatch",
+              }
-                });
+            }
-                continue;
+
-              }
+            results.push(job);
-            }
+          }
-
+
-            results.push(job);
+          // Check for next page
-          }
+          hasNextPage = await hasNextPageAvailable(page);
-
+          if (hasNextPage && currentPage < maxPages) {
-          // Check for next page
+            await navigateToNextPage(page, currentPage + 1);
-          hasNextPage = await hasNextPageAvailable(page);
+            currentPage++;
-          if (hasNextPage && currentPage < maxPages) {
+
-            await navigateToNextPage(page, currentPage + 1);
+            // Wait for new page to load
-            currentPage++;
+            await page.waitForTimeout(2000);
-
+          } else {
-            // Wait for new page to load
+            hasNextPage = false;
-            await page.waitForTimeout(2000);
+          }
-          } else {
+        }
-            hasNextPage = false;
+      } catch (error) {
-          }
+        logger.error(`Error processing keyword "${keyword}": ${error.message}`);
-        }
+      }
-      } catch (error) {
+    }
-        logger.error(`Error processing keyword "${keyword}": ${error.message}`);
+
-      }
+    logger.info(
-    }
+      `🎯 SkipTheDrive parsing completed: ${results.length} jobs found, ${rejectedResults.length} rejected`
-
+    );
-    logger.info(
+
-      `🎯 SkipTheDrive parsing completed: ${results.length} jobs found, ${rejectedResults.length} rejected`
+    return {
-    );
+      results,
-
+      rejectedResults,
-    return {
+      summary: {
-      results,
+        totalJobs: results.length,
-      rejectedResults,
+        totalRejected: rejectedResults.length,
-      summary: {
+        keywords: keywords.join(", "),
-        totalJobs: results.length,
+        locationFilter,
-        totalRejected: rejectedResults.length,
+        source: "skipthedrive",
-        keywords: keywords.join(", "),
+      },
-        locationFilter,
+    };
-        source: "skipthedrive",
+  } catch (error) {
-      },
+    logger.error(`❌ SkipTheDrive parsing failed: ${error.message}`);
-    };
+    throw error;
-  } catch (error) {
+  }
-    logger.error(`❌ SkipTheDrive parsing failed: ${error.message}`);
+}
-    throw error;
+
-  }
+/**
-}
+ * Extract jobs from current page
-
+ */
-/**
+async function extractJobsFromPage(page, keyword, locationFilter) {
- * Extract jobs from current page
+  const jobs = [];
- */
+
-async function extractJobsFromPage(page, keyword, locationFilter) {
+  try {
-  const jobs = [];
+    // Get all job article elements
-
+    const jobElements = await page.$$("article.job_listing");
-  try {
+
-    // Get all job article elements
+    for (const jobElement of jobElements) {
-    const jobElements = await page.$$("article.job_listing");
+      try {
-
+        const job = await extractJobData(jobElement, keyword);
-    for (const jobElement of jobElements) {
+        if (job) {
-      try {
+          jobs.push(job);
-        const job = await extractJobData(jobElement, keyword);
+        }
-        if (job) {
+      } catch (error) {
-          jobs.push(job);
+        logger.warning(`Failed to extract job data: ${error.message}`);
-        }
+      }
-      } catch (error) {
+    }
-        logger.warning(`Failed to extract job data: ${error.message}`);
+  } catch (error) {
-      }
+    logger.error(`Failed to extract jobs from page: ${error.message}`);
-    }
+  }
-  } catch (error) {
+
-    logger.error(`Failed to extract jobs from page: ${error.message}`);
+  return jobs;
-  }
+}
-
+
-  return jobs;
+/**
-}
+ * Extract data from individual job element
-
+ */
-/**
+async function extractJobData(jobElement, keyword) {
- * Extract data from individual job element
+  try {
- */
+    // Extract job ID
-async function extractJobData(jobElement, keyword) {
+    const articleId = (await jobElement.getAttribute("id")) || "";
-  try {
+    const jobId = articleId ? articleId.replace("post-", "") : "";
-    // Extract job ID
+
-    const articleId = (await jobElement.getAttribute("id")) || "";
+    // Extract title
-    const jobId = articleId ? articleId.replace("post-", "") : "";
+    const titleElement = await jobElement.$(".job_listing-title a");
-
+    const title = titleElement
-    // Extract title
+      ? cleanText(await titleElement.textContent())
-    const titleElement = await jobElement.$(".job_listing-title a");
+      : "";
-    const title = titleElement
+    const jobUrl = titleElement ? await titleElement.getAttribute("href") : "";
-      ? cleanText(await titleElement.textContent())
+
-      : "";
+    // Extract company
-    const jobUrl = titleElement ? await titleElement.getAttribute("href") : "";
+    const companyElement = await jobElement.$(".company");
-
+    const company = companyElement
-    // Extract company
+      ? cleanText(await companyElement.textContent())
-    const companyElement = await jobElement.$(".company");
+      : "";
-    const company = companyElement
+
-      ? cleanText(await companyElement.textContent())
+    // Extract location
-      : "";
+    const locationElement = await jobElement.$(".location");
-
+    const location = locationElement
-    // Extract location
+      ? cleanText(await locationElement.textContent())
-    const locationElement = await jobElement.$(".location");
+      : "";
-    const location = locationElement
+
-      ? cleanText(await locationElement.textContent())
+    // Extract date posted
-      : "";
+    const dateElement = await jobElement.$(".job-date");
-
+    const dateText = dateElement
-    // Extract date posted
+      ? cleanText(await dateElement.textContent())
-    const dateElement = await jobElement.$(".job-date");
+      : "";
-    const dateText = dateElement
+
-      ? cleanText(await dateElement.textContent())
+    // Extract description
-      : "";
+    const descElement = await jobElement.$(".job_listing-description");
-
+    const description = descElement
-    // Extract description
+      ? cleanText(await descElement.textContent())
-    const descElement = await jobElement.$(".job_listing-description");
+      : "";
-    const description = descElement
+
-      ? cleanText(await descElement.textContent())
+    // Check if featured
-      : "";
+    const featuredElement = await jobElement.$(".featured");
-
+    const isFeatured = featuredElement !== null;
-    // Check if featured
+
-    const featuredElement = await jobElement.$(".featured");
+    // Parse date
-    const isFeatured = featuredElement !== null;
+    let datePosted = null;
-
+    let daysAgo = null;
-    // Parse date
+
-    let datePosted = null;
+    if (dateText) {
-    let daysAgo = null;
+      const match = dateText.match(/(\d+)\s+days?\s+ago/);
-
+      if (match) {
-    if (dateText) {
+        daysAgo = parseInt(match[1]);
-      const match = dateText.match(/(\d+)\s+days?\s+ago/);
+        const date = new Date();
-      if (match) {
+        date.setDate(date.getDate() - daysAgo);
-        daysAgo = parseInt(match[1]);
+        datePosted = date.toISOString().split("T")[0];
-        const date = new Date();
+      }
-        date.setDate(date.getDate() - daysAgo);
+    }
-        datePosted = date.toISOString().split("T")[0];
+
-      }
+    return {
-    }
+      jobId,
-
+      title,
-    return {
+      company,
-      jobId,
+      location,
-      title,
+      jobUrl,
-      company,
+      datePosted,
-      location,
+      dateText,
-      jobUrl,
+      daysAgo,
-      datePosted,
+      description,
-      dateText,
+      isFeatured,
-      daysAgo,
+      keyword,
-      description,
+      extractedAt: new Date().toISOString(),
-      isFeatured,
+      source: "skipthedrive",
-      keyword,
+    };
-      extractedAt: new Date().toISOString(),
+  } catch (error) {
-      source: "skipthedrive",
+    logger.warning(`Error extracting job data: ${error.message}`);
-    };
+    return null;
-  } catch (error) {
+  }
-    logger.warning(`Error extracting job data: ${error.message}`);
+}
-    return null;
+
-  }
+/**
-}
+ * Check if next page is available
-
+ */
-/**
+async function hasNextPageAvailable(page) {
- * Check if next page is available
+  try {
- */
+    const nextButton = await page.$(".next-page");
-async function hasNextPageAvailable(page) {
+    return nextButton !== null;
-  try {
+  } catch {
-    const nextButton = await page.$(".next-page");
+    return false;
-    return nextButton !== null;
+  }
-  } catch {
+}
-    return false;
+
-  }
+/**
-}
+ * Navigate to next page
-
+ */
-/**
+async function navigateToNextPage(page, pageNumber) {
- * Navigate to next page
+  try {
- */
+    const nextButton = await page.$(".next-page");
-async function navigateToNextPage(page, pageNumber) {
+    if (nextButton) {
-  try {
+      await nextButton.click();
-    const nextButton = await page.$(".next-page");
+    }
-    if (nextButton) {
+  } catch (error) {
-      await nextButton.click();
+    logger.warning(
-    }
+      `Failed to navigate to page ${pageNumber}: ${error.message}`
-  } catch (error) {
+    );
-    logger.warning(
+  }
-      `Failed to navigate to page ${pageNumber}: ${error.message}`
+}
-    );
+
-  }
+module.exports = {
-}
+  skipthedriveStrategy,
-
+  buildSearchUrl,
-module.exports = {
+  extractJobsFromPage,
-  skipthedriveStrategy,
+  extractJobData,
-  buildSearchUrl,
+};
  extractJobsFromPage,
  extractJobData,
 };
--- a/linkedin-parser/index.js
+++ b/linkedin-parser/index.js
@ -31,12 +31,13 @@ const LINKEDIN_USERNAME = process.env.LINKEDIN_USERNAME;
 const LINKEDIN_PASSWORD = process.env.LINKEDIN_PASSWORD;
 const HEADLESS = process.env.HEADLESS !== "false";
 const SEARCH_KEYWORDS =
-  process.env.SEARCH_KEYWORDS || "layoff,downsizing";//,job cuts";
+  process.env.SEARCH_KEYWORDS || "layoff";//,downsizing";//,job cuts";
 const LOCATION_FILTER = process.env.LOCATION_FILTER;
 const ENABLE_AI_ANALYSIS = process.env.ENABLE_AI_ANALYSIS !== "false";
 const AI_CONTEXT = process.env.AI_CONTEXT || "job market analysis and trends";
 const OLLAMA_MODEL = process.env.OLLAMA_MODEL || DEFAULT_MODEL;
 const MAX_RESULTS = parseInt(process.env.MAX_RESULTS) || 50;
 const EXTRACT_LOCATION_FROM_PROFILE = process.env.EXTRACT_LOCATION_FROM_PROFILE === "true";
 /**
 * Main LinkedIn parser function
@ -71,6 +72,7 @@ async function startLinkedInParser(options = {}) {
      keywords,
      locationFilter: LOCATION_FILTER,
      maxResults: MAX_RESULTS,
      extractLocationFromProfile: EXTRACT_LOCATION_FROM_PROFILE,
      credentials: {
        username: LINKEDIN_USERNAME,
        password: LINKEDIN_PASSWORD,
--- a/linkedin-parser/strategies/linkedin-strategy.js
+++ b/linkedin-parser/strategies/linkedin-strategy.js
@ -21,6 +21,7 @@ async function linkedinStrategy(coreParser, options = {}) {
    keywords = ["layoff", "downsizing", "job cuts"],
    locationFilter = null,
    maxResults = 50,
    extractLocationFromProfile = false,
    credentials = {},
  } = options;
@ -106,7 +107,7 @@ async function linkedinStrategy(coreParser, options = {}) {
      }
      // Extract posts from current page
-      const posts = await extractPostsFromPage(page, keyword);
+      const posts = await extractPostsFromPage(page, keyword, extractLocationFromProfile);
      logger.info(`📊 Found ${posts.length} posts for keyword "${keyword}"`);
      for (const post of posts) {
@ -172,7 +173,7 @@ async function linkedinStrategy(coreParser, options = {}) {
 /**
 * Extract posts from current search results page
 */
-async function extractPostsFromPage(page, keyword) {
+async function extractPostsFromPage(page, keyword, extractLocationFromProfile = false) {
  const posts = [];
  try {
@ -254,10 +255,26 @@ async function extractPostsFromPage(page, keyword) {
        const post = await extractPostData(postElements[i], keyword);
        if (post) {
          // If location is missing and we're enabled to extract from profile, try to get it
          if (!post.location && extractLocationFromProfile && post.authorUrl) {
            try {
              logger.debug(`📍 Location missing for post ${i + 1}, attempting to extract from profile...`);
              const profileLocation = await extractLocationFromProfilePage(page, post.authorUrl);
              if (profileLocation) {
                post.location = profileLocation;
                post.profileLocation = profileLocation;
                logger.debug(`✅ Extracted location from profile: ${profileLocation}`);
              }
            } catch (error) {
              logger.debug(`⚠️  Could not extract location from profile: ${error.message}`);
            }
          }
          posts.push(post);
          const hasContent = post.content && post.content.length > 0;
          const hasAuthor = post.authorName && post.authorName.length > 0;
-          logger.debug(`✅ Extracted post ${i + 1}/${postElements.length}: ${post.postId.substring(0, 20)}... (content: ${hasContent ? 'yes' : 'no'}, author: ${hasAuthor ? 'yes' : 'no'})`);
+          const hasLocation = post.location && post.location.length > 0;
          logger.debug(`✅ Extracted post ${i + 1}/${postElements.length}: ${post.postId.substring(0, 20)}... (content: ${hasContent ? 'yes' : 'no'}, author: ${hasAuthor ? 'yes' : 'no'}, location: ${hasLocation ? 'yes' : 'no'})`);
        } else {
          logger.debug(`⏭️  Post ${i + 1}/${postElements.length} filtered out (no keyword match or missing data)`);
        }
@ -626,6 +643,42 @@ async function extractPostData(postElement, keyword) {
          }
        }
      }
      // Try to extract from data attributes or hidden elements
      if (!data.location) {
        // Check for data attributes that might contain location
        const actorSection = el.querySelector(".feed-shared-actor");
        if (actorSection) {
          // Check all data attributes
          for (const attr of actorSection.attributes) {
            if (attr.name.startsWith("data-") && attr.value) {
              const value = attr.value.toLowerCase();
              // Look for location-like patterns in data attributes
              if (/(ontario|alberta|british columbia|quebec|toronto|vancouver|calgary|ottawa|montreal)/i.test(value)) {
                // Try to extract the actual location text
                const locationMatch = attr.value.match(/([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*,\s*[A-Z][a-z]+)/);
                if (locationMatch) {
                  data.location = locationMatch[0];
                  break;
                }
              }
            }
          }
          // Check for hidden spans or divs with location info
          const hiddenElements = actorSection.querySelectorAll("span[style*='display: none'], div[style*='display: none'], [aria-hidden='true']");
          for (const hiddenElem of hiddenElements) {
            const text = hiddenElem.textContent || hiddenElem.getAttribute("aria-label") || "";
            if (text && /(ontario|alberta|british columbia|quebec|toronto|vancouver)/i.test(text)) {
              const locationMatch = text.match(/([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*,\s*[A-Z][a-z]+)/);
              if (locationMatch) {
                data.location = locationMatch[0].trim();
                break;
              }
            }
          }
        }
      }
      // Extract engagement metrics - try multiple approaches
      const likesSelectors = [
@ -799,6 +852,48 @@ async function extractPostData(postElement, keyword) {
  }
 }
 /**
 * Extract location from a LinkedIn profile page
 */
 async function extractLocationFromProfilePage(page, profileUrl) {
  try {
    // Ensure URL is complete
    let fullUrl = profileUrl;
    if (!fullUrl.startsWith("http")) {
      fullUrl = `https://www.linkedin.com${fullUrl}`;
    }
    // Remove query parameters that might cause issues
    fullUrl = fullUrl.split("?")[0];
    // Open profile in new tab
    const profilePage = await page.context().newPage();
    try {
      await profilePage.goto(fullUrl, {
        waitUntil: "domcontentloaded",
        timeout: 15000,
      });
      // Wait a bit for content to load
      await new Promise(resolve => setTimeout(resolve, 2000));
      // Use the extractLocationFromProfile utility from ai-analyzer
      const location = await extractLocationFromProfile(profilePage);
      await profilePage.close();
      return location;
    } catch (error) {
      await profilePage.close();
      throw error;
    }
  } catch (error) {
    logger.debug(`Failed to extract location from profile ${profileUrl}: ${error.message}`);
    return "";
  }
 }
 /**
 * Extract numbers from text (e.g., "15 likes" -> 15)
 */
--- a/test/ai-analyzer.test.js
+++ b/test/ai-analyzer.test.js
@ -1,80 +1,80 @@
-const fs = require("fs");
+const fs = require("fs");
-const assert = require("assert");
+const assert = require("assert");
-const { analyzeSinglePost, checkOllamaStatus } = require("../ai-analyzer");
+const { analyzeSinglePost, checkOllamaStatus } = require("../ai-analyzer");
-
+
-console.log("AI Analyzer logic tests");
+console.log("AI Analyzer logic tests");
-
+
-const testData = JSON.parse(
+const testData = JSON.parse(
-  fs.readFileSync(__dirname + "/test-data.json", "utf-8")
+  fs.readFileSync(__dirname + "/test-data.json", "utf-8")
-);
+);
-const aiResults = testData.positive;
+const aiResults = testData.positive;
-const context = "job layoffs and workforce reduction";
+const context = "job layoffs and workforce reduction";
-const model = process.env.OLLAMA_MODEL || "mistral"; // Use OLLAMA_MODEL from env or default to mistral
+const model = process.env.OLLAMA_MODEL || "mistral"; // Use OLLAMA_MODEL from env or default to mistral
-
+
-(async () => {
+(async () => {
-  // Check if Ollama is available
+  // Check if Ollama is available
-  const ollamaAvailable = await checkOllamaStatus(model);
+  const ollamaAvailable = await checkOllamaStatus(model);
-  if (!ollamaAvailable) {
+  if (!ollamaAvailable) {
-    console.log("SKIP: Ollama not available - skipping AI analyzer tests");
+    console.log("SKIP: Ollama not available - skipping AI analyzer tests");
-    console.log("PASS: AI analyzer tests skipped (Ollama not running)");
+    console.log("PASS: AI analyzer tests skipped (Ollama not running)");
-    return;
+    return;
-  }
+  }
-
+
-  console.log(`Testing AI analyzer with ${aiResults.length} posts...`);
+  console.log(`Testing AI analyzer with ${aiResults.length} posts...`);
-
+
-  for (let i = 0; i < aiResults.length; i++) {
+  for (let i = 0; i < aiResults.length; i++) {
-    const post = aiResults[i];
+    const post = aiResults[i];
-    console.log(`Testing post ${i + 1}: "${post.text.substring(0, 50)}..."`);
+    console.log(`Testing post ${i + 1}: "${post.text.substring(0, 50)}..."`);
-
+
-    const aiOutput = await analyzeSinglePost(post.text, context, model);
+    const aiOutput = await analyzeSinglePost(post.text, context, model);
-
+
-    // Test that the function returns the expected structure
+    // Test that the function returns the expected structure
-    assert(
+    assert(
-      typeof aiOutput === "object" && aiOutput !== null,
+      typeof aiOutput === "object" && aiOutput !== null,
-      `Post ${i} output is not an object`
+      `Post ${i} output is not an object`
-    );
+    );
-
+
-    assert(
+    assert(
-      typeof aiOutput.isRelevant === "boolean",
+      typeof aiOutput.isRelevant === "boolean",
-      `Post ${i} isRelevant is not a boolean: ${typeof aiOutput.isRelevant}`
+      `Post ${i} isRelevant is not a boolean: ${typeof aiOutput.isRelevant}`
-    );
+    );
-
+
-    assert(
+    assert(
-      typeof aiOutput.confidence === "number",
+      typeof aiOutput.confidence === "number",
-      `Post ${i} confidence is not a number: ${typeof aiOutput.confidence}`
+      `Post ${i} confidence is not a number: ${typeof aiOutput.confidence}`
-    );
+    );
-
+
-    assert(
+    assert(
-      typeof aiOutput.reasoning === "string",
+      typeof aiOutput.reasoning === "string",
-      `Post ${i} reasoning is not a string: ${typeof aiOutput.reasoning}`
+      `Post ${i} reasoning is not a string: ${typeof aiOutput.reasoning}`
-    );
+    );
-
+
-    // Test that confidence is within valid range
+    // Test that confidence is within valid range
-    assert(
+    assert(
-      aiOutput.confidence >= 0 && aiOutput.confidence <= 1,
+      aiOutput.confidence >= 0 && aiOutput.confidence <= 1,
-      `Post ${i} confidence out of range: ${aiOutput.confidence} (should be 0-1)`
+      `Post ${i} confidence out of range: ${aiOutput.confidence} (should be 0-1)`
-    );
+    );
-
+
-    // Test that reasoning exists and is not empty
+    // Test that reasoning exists and is not empty
-    assert(
+    assert(
-      aiOutput.reasoning && aiOutput.reasoning.length > 0,
+      aiOutput.reasoning && aiOutput.reasoning.length > 0,
-      `Post ${i} missing or empty reasoning`
+      `Post ${i} missing or empty reasoning`
-    );
+    );
-
+
-    // Test that relevance is a boolean value
+    // Test that relevance is a boolean value
-    assert(
+    assert(
-      aiOutput.isRelevant === true || aiOutput.isRelevant === false,
+      aiOutput.isRelevant === true || aiOutput.isRelevant === false,
-      `Post ${i} isRelevant is not a valid boolean: ${aiOutput.isRelevant}`
+      `Post ${i} isRelevant is not a valid boolean: ${aiOutput.isRelevant}`
-    );
+    );
-
+
-    console.log(
+    console.log(
-      `  ✓ Post ${i + 1}: relevant=${aiOutput.isRelevant}, confidence=${
+      `  ✓ Post ${i + 1}: relevant=${aiOutput.isRelevant}, confidence=${
-        aiOutput.confidence
+        aiOutput.confidence
-      }`
+      }`
-    );
+    );
-  }
+  }
-
+
-  console.log(
+  console.log(
-    "PASS: AI analyzer returns valid structure and values for all test posts."
+    "PASS: AI analyzer returns valid structure and values for all test posts."
-  );
+  );
-})();
+})();
`@ -62,3 +62,5 @@ class CoreParser {`

	`module.exports = CoreParser;`	`module.exports = CoreParser;`