Enhance job search parser with LinkedIn strategy and configuration updates

- Added LinkedIn jobs parsing strategy to support job extraction from LinkedIn. - Updated job search parser to include new site strategy and improved argument parsing for max pages and exclusion of rejected results. - Enhanced README documentation to reflect new features and usage examples. - Refactored existing strategies for consistency and improved error handling.
2025-12-16 23:17:12 -05:00 · 2025-12-16 23:17:12 -05:00 · 4099b23744
commit 4099b23744
parent bbfd3c84aa
8 changed files with 2431 additions and 888 deletions
--- a/core-parser/index.js
+++ b/core-parser/index.js
@ -62,3 +62,5 @@ class CoreParser {

 module.exports = CoreParser;

+
+
--- a/job-search-parser/README.md
+++ b/job-search-parser/README.md
--- a/job-search-parser/index.js
+++ b/job-search-parser/index.js
@ -10,6 +10,7 @@ const path = require("path");
 const fs = require("fs");
 const CoreParser = require("../core-parser");
 const { skipthedriveStrategy } = require("./strategies/skipthedrive-strategy");
+const { linkedinJobsStrategy } = require("./strategies/linkedin-jobs-strategy");
 const { logger, analyzeBatch, checkOllamaStatus } = require("ai-analyzer");

 // Load environment variables
@ -18,14 +19,16 @@ require("dotenv").config({ path: path.join(__dirname, ".env") });
 // Configuration from environment
 const HEADLESS = process.env.HEADLESS !== "false";
 const SEARCH_KEYWORDS =
-  process.env.SEARCH_KEYWORDS || "software engineer,developer,programmer";
+  process.env.SEARCH_KEYWORDS || "co-op,intern";//"software engineer,developer,programmer";
 const LOCATION_FILTER = process.env.LOCATION_FILTER;
 const ENABLE_AI_ANALYSIS = process.env.ENABLE_AI_ANALYSIS === "true";
 const MAX_PAGES = parseInt(process.env.MAX_PAGES) || 5;
+const EXCLUDE_REJECTED = process.env.EXCLUDE_REJECTED === "true";

 // Available site strategies
 const SITE_STRATEGIES = {
  skipthedrive: skipthedriveStrategy,
+  linkedin: linkedinJobsStrategy,
  // Add more site strategies here
  // indeed: indeedStrategy,
  // glassdoor: glassdoorStrategy,
@ -41,6 +44,7 @@ function parseArguments() {
    keywords: null,
    locationFilter: null,
    maxPages: MAX_PAGES,
+    excludeRejected: EXCLUDE_REJECTED,
  };

  args.forEach((arg) => {
@ -57,7 +61,15 @@ function parseArguments() {
    } else if (arg.startsWith("--location=")) {
      options.locationFilter = arg.split("=")[1];
    } else if (arg.startsWith("--max-pages=")) {
-      options.maxPages = parseInt(arg.split("=")[1]) || MAX_PAGES;
+      const value = arg.split("=")[1];
+      // Support "all" or "0" to mean unlimited pages
+      if (value === "all" || value === "0") {
+        options.maxPages = 0; // 0 means unlimited
+      } else {
+        options.maxPages = parseInt(value) || MAX_PAGES;
+      }
+    } else if (arg === "--no-rejected" || arg === "--exclude-rejected") {
+      options.excludeRejected = true;
    }
  });

@ -84,6 +96,7 @@ async function startJobSearchParser(options = {}) {
      finalOptions.keywords || SEARCH_KEYWORDS.split(",").map((k) => k.trim());
    const locationFilter = finalOptions.locationFilter || LOCATION_FILTER;
    const sites = finalOptions.sites;
+    const excludeRejected = finalOptions.excludeRejected !== undefined ? finalOptions.excludeRejected : EXCLUDE_REJECTED;

    logger.info(`📦 Selected job sites: ${sites.join(", ")}`);
    logger.info(`🔍 Search Keywords: ${keywords.join(", ")}`);
@ -108,18 +121,46 @@ async function startJobSearchParser(options = {}) {
        logger.step(`\n🌐 Parsing ${site}...`);
        const startTime = Date.now();

-        const parseResult = await strategy(coreParser, {
+        // Prepare strategy options
+        const strategyOptions = {
          keywords,
          locationFilter,
          maxPages: finalOptions.maxPages,
-        });
+        };
+
+        // Add credentials for LinkedIn
+        if (site === "linkedin") {
+          const LINKEDIN_USERNAME = process.env.LINKEDIN_USERNAME;
+          const LINKEDIN_PASSWORD = process.env.LINKEDIN_PASSWORD;
+          
+          if (!LINKEDIN_USERNAME || !LINKEDIN_PASSWORD) {
+            logger.error(`❌ LinkedIn credentials not found. Please set LINKEDIN_USERNAME and LINKEDIN_PASSWORD in .env file`);
+            siteResults[site] = {
+              count: 0,
+              rejected: 0,
+              duration: "0s",
+              error: "LinkedIn credentials not found",
+            };
+            continue;
+          }
+
+          strategyOptions.credentials = {
+            username: LINKEDIN_USERNAME,
+            password: LINKEDIN_PASSWORD,
+          };
+          strategyOptions.location = process.env.LINKEDIN_JOB_LOCATION || "";
+        }
+
+        const parseResult = await strategy(coreParser, strategyOptions);

        const { results, rejectedResults, summary } = parseResult;
        const duration = ((Date.now() - startTime) / 1000).toFixed(2);

        // Collect results
+        logger.info(`📦 Strategy returned: ${results.length} results, ${rejectedResults.length} rejected`);
        allResults.push(...results);
        allRejectedResults.push(...rejectedResults);
+        logger.info(`📦 Total accumulated: ${allResults.length} results, ${allRejectedResults.length} rejected`);

        siteResults[site] = {
          count: results.length,
@ -162,6 +203,9 @@ async function startJobSearchParser(options = {}) {
    }

    // Save results
+    logger.info(`💾 Preparing to save: ${allResults.length} results, ${allRejectedResults.length} rejected`);
+    logger.info(`💾 EXCLUDE_REJECTED env: ${process.env.EXCLUDE_REJECTED}, excludeRejected variable: ${excludeRejected}`);
+    
    const outputData = {
      metadata: {
        extractedAt: new Date().toISOString(),
@ -171,11 +215,21 @@ async function startJobSearchParser(options = {}) {
        keywords: keywords.join(", "),
        locationFilter,
        analysisResults,
+        rejectedJobsExcluded: excludeRejected,
      },
      results: allResults,
-      rejectedResults: allRejectedResults,
      siteResults,
    };
+    
+    // Always include rejectedResults if not excluded (make it explicit, not using spread)
+    if (!excludeRejected) {
+      outputData.rejectedResults = allRejectedResults;
+      logger.info(`✅ Including ${allRejectedResults.length} rejected results in output`);
+    } else {
+      logger.info(`⏭️  Excluding rejected results (EXCLUDE_REJECTED=true)`);
+    }
+    
+    logger.info(`💾 Final output: ${outputData.results.length} results, ${outputData.rejectedResults?.length || 0} rejected`);

    const resultsDir = path.join(__dirname, "results");
    if (!fs.existsSync(resultsDir)) {
--- a/job-search-parser/strategies/linkedin-jobs-strategy.js
+++ b/job-search-parser/strategies/linkedin-jobs-strategy.js
--- a/job-search-parser/strategies/skipthedrive-strategy.js
+++ b/job-search-parser/strategies/skipthedrive-strategy.js
@ -1,302 +1,299 @@
-/**
- * SkipTheDrive Parsing Strategy
- *
- * Uses core-parser for browser management and ai-analyzer for utilities
- */
-
-const {
-  logger,
-  cleanText,
-  containsAnyKeyword,
-  validateLocationAgainstFilters,
-} = require("ai-analyzer");
-
-/**
- * SkipTheDrive URL builder
- */
-function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
-  const baseUrl = "https://www.skipthedrive.com/";
-  const params = new URLSearchParams({
-    s: keyword,
-    orderby: orderBy,
-  });
-
-  if (jobTypes && jobTypes.length > 0) {
-    params.append("job_type", jobTypes.join(","));
-  }
-
-  return `${baseUrl}?${params.toString()}`;
-}
-
-/**
- * SkipTheDrive parsing strategy function
- */
-async function skipthedriveStrategy(coreParser, options = {}) {
-  const {
-    keywords = ["software engineer", "developer", "programmer"],
-    locationFilter = null,
-    maxPages = 5,
-    jobTypes = [],
-  } = options;
-
-  const results = [];
-  const rejectedResults = [];
-  const seenJobs = new Set();
-
-  try {
-    // Create main page
-    const page = await coreParser.createPage("skipthedrive-main");
-
-    logger.info("🚀 Starting SkipTheDrive parser...");
-    logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
-    logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
-    logger.info(`📄 Max Pages: ${maxPages}`);
-
-    // Search for each keyword
-    for (const keyword of keywords) {
-      logger.info(`\n🔍 Searching for: ${keyword}`);
-
-      const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
-
-      try {
-        // Navigate to search results
-        await coreParser.navigateTo(searchUrl, {
-          pageId: "skipthedrive-main",
-          retries: 2,
-          timeout: 30000,
-        });
-
-        // Wait for job listings to load
-        const hasResults = await coreParser
-          .waitForSelector(
-            "#loops-wrapper",
-            {
-              timeout: 5000,
-            },
-            "skipthedrive-main"
-          )
-          .catch(() => {
-            logger.warning(`No results found for keyword: ${keyword}`);
-            return false;
-          });
-
-        if (!hasResults) {
-          continue;
-        }
-
-        // Process multiple pages
-        let currentPage = 1;
-        let hasNextPage = true;
-
-        while (hasNextPage && currentPage <= maxPages) {
-          logger.info(`📄 Processing page ${currentPage} for "${keyword}"`);
-
-          // Extract jobs from current page
-          const pageJobs = await extractJobsFromPage(
-            page,
-            keyword,
-            locationFilter
-          );
-
-          for (const job of pageJobs) {
-            // Skip duplicates
-            if (seenJobs.has(job.jobId)) continue;
-            seenJobs.add(job.jobId);
-
-            // Validate location if filtering enabled
-            if (locationFilter) {
-              const locationValid = validateLocationAgainstFilters(
-                job.location,
-                locationFilter
-              );
-
-              if (!locationValid) {
-                rejectedResults.push({
-                  ...job,
-                  rejectionReason: "Location filter mismatch",
-                });
-                continue;
-              }
-            }
-
-            results.push(job);
-          }
-
-          // Check for next page
-          hasNextPage = await hasNextPageAvailable(page);
-          if (hasNextPage && currentPage < maxPages) {
-            await navigateToNextPage(page, currentPage + 1);
-            currentPage++;
-
-            // Wait for new page to load
-            await page.waitForTimeout(2000);
-          } else {
-            hasNextPage = false;
-          }
-        }
-      } catch (error) {
-        logger.error(`Error processing keyword "${keyword}": ${error.message}`);
-      }
-    }
-
-    logger.info(
-      `🎯 SkipTheDrive parsing completed: ${results.length} jobs found, ${rejectedResults.length} rejected`
-    );
-
-    return {
-      results,
-      rejectedResults,
-      summary: {
-        totalJobs: results.length,
-        totalRejected: rejectedResults.length,
-        keywords: keywords.join(", "),
-        locationFilter,
-        source: "skipthedrive",
-      },
-    };
-  } catch (error) {
-    logger.error(`❌ SkipTheDrive parsing failed: ${error.message}`);
-    throw error;
-  }
-}
-
-/**
- * Extract jobs from current page
- */
-async function extractJobsFromPage(page, keyword, locationFilter) {
-  const jobs = [];
-
-  try {
-    // Get all job article elements
-    const jobElements = await page.$$("article.job_listing");
-
-    for (const jobElement of jobElements) {
-      try {
-        const job = await extractJobData(jobElement, keyword);
-        if (job) {
-          jobs.push(job);
-        }
-      } catch (error) {
-        logger.warning(`Failed to extract job data: ${error.message}`);
-      }
-    }
-  } catch (error) {
-    logger.error(`Failed to extract jobs from page: ${error.message}`);
-  }
-
-  return jobs;
-}
-
-/**
- * Extract data from individual job element
- */
-async function extractJobData(jobElement, keyword) {
-  try {
-    // Extract job ID
-    const articleId = (await jobElement.getAttribute("id")) || "";
-    const jobId = articleId ? articleId.replace("post-", "") : "";
-
-    // Extract title
-    const titleElement = await jobElement.$(".job_listing-title a");
-    const title = titleElement
-      ? cleanText(await titleElement.textContent())
-      : "";
-    const jobUrl = titleElement ? await titleElement.getAttribute("href") : "";
-
-    // Extract company
-    const companyElement = await jobElement.$(".company");
-    const company = companyElement
-      ? cleanText(await companyElement.textContent())
-      : "";
-
-    // Extract location
-    const locationElement = await jobElement.$(".location");
-    const location = locationElement
-      ? cleanText(await locationElement.textContent())
-      : "";
-
-    // Extract date posted
-    const dateElement = await jobElement.$(".job-date");
-    const dateText = dateElement
-      ? cleanText(await dateElement.textContent())
-      : "";
-
-    // Extract description
-    const descElement = await jobElement.$(".job_listing-description");
-    const description = descElement
-      ? cleanText(await descElement.textContent())
-      : "";
-
-    // Check if featured
-    const featuredElement = await jobElement.$(".featured");
-    const isFeatured = featuredElement !== null;
-
-    // Parse date
-    let datePosted = null;
-    let daysAgo = null;
-
-    if (dateText) {
-      const match = dateText.match(/(\d+)\s+days?\s+ago/);
-      if (match) {
-        daysAgo = parseInt(match[1]);
-        const date = new Date();
-        date.setDate(date.getDate() - daysAgo);
-        datePosted = date.toISOString().split("T")[0];
-      }
-    }
-
-    return {
-      jobId,
-      title,
-      company,
-      location,
-      jobUrl,
-      datePosted,
-      dateText,
-      daysAgo,
-      description,
-      isFeatured,
-      keyword,
-      extractedAt: new Date().toISOString(),
-      source: "skipthedrive",
-    };
-  } catch (error) {
-    logger.warning(`Error extracting job data: ${error.message}`);
-    return null;
-  }
-}
-
-/**
- * Check if next page is available
- */
-async function hasNextPageAvailable(page) {
-  try {
-    const nextButton = await page.$(".next-page");
-    return nextButton !== null;
-  } catch {
-    return false;
-  }
-}
-
-/**
- * Navigate to next page
- */
-async function navigateToNextPage(page, pageNumber) {
-  try {
-    const nextButton = await page.$(".next-page");
-    if (nextButton) {
-      await nextButton.click();
-    }
-  } catch (error) {
-    logger.warning(
-      `Failed to navigate to page ${pageNumber}: ${error.message}`
-    );
-  }
-}
-
-module.exports = {
-  skipthedriveStrategy,
-  buildSearchUrl,
-  extractJobsFromPage,
-  extractJobData,
-};
+/**
+ * SkipTheDrive Parsing Strategy
+ *
+ * Uses core-parser for browser management and ai-analyzer for utilities
+ */
+
+const {
+  logger,
+  cleanText,
+  containsAnyKeyword,
+  validateLocationAgainstFilters,
+} = require("ai-analyzer");
+
+/**
+ * SkipTheDrive URL builder
+ */
+function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
+  const baseUrl = "https://www.skipthedrive.com/";
+  const params = new URLSearchParams({
+    s: keyword,
+    orderby: orderBy,
+  });
+
+  if (jobTypes && jobTypes.length > 0) {
+    params.append("job_type", jobTypes.join(","));
+  }
+
+  return `${baseUrl}?${params.toString()}`;
+}
+
+/**
+ * SkipTheDrive parsing strategy function
+ */
+async function skipthedriveStrategy(coreParser, options = {}) {
+  const {
+    keywords = ["software engineer", "developer", "programmer"],
+    locationFilter = null,
+    maxPages = 5,
+    jobTypes = [],
+  } = options;
+
+  const results = [];
+  const rejectedResults = [];
+  const seenJobs = new Set();
+
+  try {
+    // Create main page
+    const page = await coreParser.createPage("skipthedrive-main");
+
+    logger.info("🚀 Starting SkipTheDrive parser...");
+    logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
+    logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
+    logger.info(`📄 Max Pages: ${maxPages}`);
+
+    // Search for each keyword
+    for (const keyword of keywords) {
+      logger.info(`\n🔍 Searching for: ${keyword}`);
+
+      const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
+
+      try {
+        // Navigate to search results
+        await coreParser.navigateTo(searchUrl, {
+          pageId: "skipthedrive-main",
+          retries: 2,
+          timeout: 30000,
+        });
+
+        // Wait for job listings to load
+        const hasResults = await page
+          .waitForSelector("#loops-wrapper", {
+            timeout: 5000,
+          })
+          .then(() => true)
+          .catch(() => {
+            logger.warning(`No results found for keyword: ${keyword}`);
+            return false;
+          });
+
+        if (!hasResults) {
+          continue;
+        }
+
+        // Process multiple pages
+        let currentPage = 1;
+        let hasNextPage = true;
+
+        while (hasNextPage && currentPage <= maxPages) {
+          logger.info(`📄 Processing page ${currentPage} for "${keyword}"`);
+
+          // Extract jobs from current page
+          const pageJobs = await extractJobsFromPage(
+            page,
+            keyword,
+            locationFilter
+          );
+
+          for (const job of pageJobs) {
+            // Skip duplicates
+            if (seenJobs.has(job.jobId)) continue;
+            seenJobs.add(job.jobId);
+
+            // Validate location if filtering enabled
+            if (locationFilter) {
+              const locationValid = validateLocationAgainstFilters(
+                job.location,
+                locationFilter
+              );
+
+              if (!locationValid) {
+                rejectedResults.push({
+                  ...job,
+                  rejectionReason: "Location filter mismatch",
+                });
+                continue;
+              }
+            }
+
+            results.push(job);
+          }
+
+          // Check for next page
+          hasNextPage = await hasNextPageAvailable(page);
+          if (hasNextPage && currentPage < maxPages) {
+            await navigateToNextPage(page, currentPage + 1);
+            currentPage++;
+
+            // Wait for new page to load
+            await page.waitForTimeout(2000);
+          } else {
+            hasNextPage = false;
+          }
+        }
+      } catch (error) {
+        logger.error(`Error processing keyword "${keyword}": ${error.message}`);
+      }
+    }
+
+    logger.info(
+      `🎯 SkipTheDrive parsing completed: ${results.length} jobs found, ${rejectedResults.length} rejected`
+    );
+
+    return {
+      results,
+      rejectedResults,
+      summary: {
+        totalJobs: results.length,
+        totalRejected: rejectedResults.length,
+        keywords: keywords.join(", "),
+        locationFilter,
+        source: "skipthedrive",
+      },
+    };
+  } catch (error) {
+    logger.error(`❌ SkipTheDrive parsing failed: ${error.message}`);
+    throw error;
+  }
+}
+
+/**
+ * Extract jobs from current page
+ */
+async function extractJobsFromPage(page, keyword, locationFilter) {
+  const jobs = [];
+
+  try {
+    // Get all job article elements
+    const jobElements = await page.$$("article.job_listing");
+
+    for (const jobElement of jobElements) {
+      try {
+        const job = await extractJobData(jobElement, keyword);
+        if (job) {
+          jobs.push(job);
+        }
+      } catch (error) {
+        logger.warning(`Failed to extract job data: ${error.message}`);
+      }
+    }
+  } catch (error) {
+    logger.error(`Failed to extract jobs from page: ${error.message}`);
+  }
+
+  return jobs;
+}
+
+/**
+ * Extract data from individual job element
+ */
+async function extractJobData(jobElement, keyword) {
+  try {
+    // Extract job ID
+    const articleId = (await jobElement.getAttribute("id")) || "";
+    const jobId = articleId ? articleId.replace("post-", "") : "";
+
+    // Extract title
+    const titleElement = await jobElement.$(".job_listing-title a");
+    const title = titleElement
+      ? cleanText(await titleElement.textContent())
+      : "";
+    const jobUrl = titleElement ? await titleElement.getAttribute("href") : "";
+
+    // Extract company
+    const companyElement = await jobElement.$(".company");
+    const company = companyElement
+      ? cleanText(await companyElement.textContent())
+      : "";
+
+    // Extract location
+    const locationElement = await jobElement.$(".location");
+    const location = locationElement
+      ? cleanText(await locationElement.textContent())
+      : "";
+
+    // Extract date posted
+    const dateElement = await jobElement.$(".job-date");
+    const dateText = dateElement
+      ? cleanText(await dateElement.textContent())
+      : "";
+
+    // Extract description
+    const descElement = await jobElement.$(".job_listing-description");
+    const description = descElement
+      ? cleanText(await descElement.textContent())
+      : "";
+
+    // Check if featured
+    const featuredElement = await jobElement.$(".featured");
+    const isFeatured = featuredElement !== null;
+
+    // Parse date
+    let datePosted = null;
+    let daysAgo = null;
+
+    if (dateText) {
+      const match = dateText.match(/(\d+)\s+days?\s+ago/);
+      if (match) {
+        daysAgo = parseInt(match[1]);
+        const date = new Date();
+        date.setDate(date.getDate() - daysAgo);
+        datePosted = date.toISOString().split("T")[0];
+      }
+    }
+
+    return {
+      jobId,
+      title,
+      company,
+      location,
+      jobUrl,
+      datePosted,
+      dateText,
+      daysAgo,
+      description,
+      isFeatured,
+      keyword,
+      extractedAt: new Date().toISOString(),
+      source: "skipthedrive",
+    };
+  } catch (error) {
+    logger.warning(`Error extracting job data: ${error.message}`);
+    return null;
+  }
+}
+
+/**
+ * Check if next page is available
+ */
+async function hasNextPageAvailable(page) {
+  try {
+    const nextButton = await page.$(".next-page");
+    return nextButton !== null;
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Navigate to next page
+ */
+async function navigateToNextPage(page, pageNumber) {
+  try {
+    const nextButton = await page.$(".next-page");
+    if (nextButton) {
+      await nextButton.click();
+    }
+  } catch (error) {
+    logger.warning(
+      `Failed to navigate to page ${pageNumber}: ${error.message}`
+    );
+  }
+}
+
+module.exports = {
+  skipthedriveStrategy,
+  buildSearchUrl,
+  extractJobsFromPage,
+  extractJobData,
+};
--- a/linkedin-parser/index.js
+++ b/linkedin-parser/index.js
@ -31,12 +31,13 @@ const LINKEDIN_USERNAME = process.env.LINKEDIN_USERNAME;
 const LINKEDIN_PASSWORD = process.env.LINKEDIN_PASSWORD;
 const HEADLESS = process.env.HEADLESS !== "false";
 const SEARCH_KEYWORDS =
-  process.env.SEARCH_KEYWORDS || "layoff,downsizing";//,job cuts";
+  process.env.SEARCH_KEYWORDS || "layoff";//,downsizing";//,job cuts";
 const LOCATION_FILTER = process.env.LOCATION_FILTER;
 const ENABLE_AI_ANALYSIS = process.env.ENABLE_AI_ANALYSIS !== "false";
 const AI_CONTEXT = process.env.AI_CONTEXT || "job market analysis and trends";
 const OLLAMA_MODEL = process.env.OLLAMA_MODEL || DEFAULT_MODEL;
 const MAX_RESULTS = parseInt(process.env.MAX_RESULTS) || 50;
+const EXTRACT_LOCATION_FROM_PROFILE = process.env.EXTRACT_LOCATION_FROM_PROFILE === "true";

 /**
 * Main LinkedIn parser function
@ -71,6 +72,7 @@ async function startLinkedInParser(options = {}) {
      keywords,
      locationFilter: LOCATION_FILTER,
      maxResults: MAX_RESULTS,
+      extractLocationFromProfile: EXTRACT_LOCATION_FROM_PROFILE,
      credentials: {
        username: LINKEDIN_USERNAME,
        password: LINKEDIN_PASSWORD,
--- a/linkedin-parser/strategies/linkedin-strategy.js
+++ b/linkedin-parser/strategies/linkedin-strategy.js
@ -21,6 +21,7 @@ async function linkedinStrategy(coreParser, options = {}) {
    keywords = ["layoff", "downsizing", "job cuts"],
    locationFilter = null,
    maxResults = 50,
+    extractLocationFromProfile = false,
    credentials = {},
  } = options;

@ -106,7 +107,7 @@ async function linkedinStrategy(coreParser, options = {}) {
      }

      // Extract posts from current page
-      const posts = await extractPostsFromPage(page, keyword);
+      const posts = await extractPostsFromPage(page, keyword, extractLocationFromProfile);
      logger.info(`📊 Found ${posts.length} posts for keyword "${keyword}"`);

      for (const post of posts) {
@ -172,7 +173,7 @@ async function linkedinStrategy(coreParser, options = {}) {
 /**
 * Extract posts from current search results page
 */
-async function extractPostsFromPage(page, keyword) {
+async function extractPostsFromPage(page, keyword, extractLocationFromProfile = false) {
  const posts = [];

  try {
@ -254,10 +255,26 @@ async function extractPostsFromPage(page, keyword) {

        const post = await extractPostData(postElements[i], keyword);
        if (post) {
+          // If location is missing and we're enabled to extract from profile, try to get it
+          if (!post.location && extractLocationFromProfile && post.authorUrl) {
+            try {
+              logger.debug(`📍 Location missing for post ${i + 1}, attempting to extract from profile...`);
+              const profileLocation = await extractLocationFromProfilePage(page, post.authorUrl);
+              if (profileLocation) {
+                post.location = profileLocation;
+                post.profileLocation = profileLocation;
+                logger.debug(`✅ Extracted location from profile: ${profileLocation}`);
+              }
+            } catch (error) {
+              logger.debug(`⚠️  Could not extract location from profile: ${error.message}`);
+            }
+          }
+          
          posts.push(post);
          const hasContent = post.content && post.content.length > 0;
          const hasAuthor = post.authorName && post.authorName.length > 0;
-          logger.debug(`✅ Extracted post ${i + 1}/${postElements.length}: ${post.postId.substring(0, 20)}... (content: ${hasContent ? 'yes' : 'no'}, author: ${hasAuthor ? 'yes' : 'no'})`);
+          const hasLocation = post.location && post.location.length > 0;
+          logger.debug(`✅ Extracted post ${i + 1}/${postElements.length}: ${post.postId.substring(0, 20)}... (content: ${hasContent ? 'yes' : 'no'}, author: ${hasAuthor ? 'yes' : 'no'}, location: ${hasLocation ? 'yes' : 'no'})`);
        } else {
          logger.debug(`⏭️  Post ${i + 1}/${postElements.length} filtered out (no keyword match or missing data)`);
        }
@ -626,6 +643,42 @@ async function extractPostData(postElement, keyword) {
          }
        }
      }
+      
+      // Try to extract from data attributes or hidden elements
+      if (!data.location) {
+        // Check for data attributes that might contain location
+        const actorSection = el.querySelector(".feed-shared-actor");
+        if (actorSection) {
+          // Check all data attributes
+          for (const attr of actorSection.attributes) {
+            if (attr.name.startsWith("data-") && attr.value) {
+              const value = attr.value.toLowerCase();
+              // Look for location-like patterns in data attributes
+              if (/(ontario|alberta|british columbia|quebec|toronto|vancouver|calgary|ottawa|montreal)/i.test(value)) {
+                // Try to extract the actual location text
+                const locationMatch = attr.value.match(/([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*,\s*[A-Z][a-z]+)/);
+                if (locationMatch) {
+                  data.location = locationMatch[0];
+                  break;
+                }
+              }
+            }
+          }
+          
+          // Check for hidden spans or divs with location info
+          const hiddenElements = actorSection.querySelectorAll("span[style*='display: none'], div[style*='display: none'], [aria-hidden='true']");
+          for (const hiddenElem of hiddenElements) {
+            const text = hiddenElem.textContent || hiddenElem.getAttribute("aria-label") || "";
+            if (text && /(ontario|alberta|british columbia|quebec|toronto|vancouver)/i.test(text)) {
+              const locationMatch = text.match(/([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*,\s*[A-Z][a-z]+)/);
+              if (locationMatch) {
+                data.location = locationMatch[0].trim();
+                break;
+              }
+            }
+          }
+        }
+      }

      // Extract engagement metrics - try multiple approaches
      const likesSelectors = [
@ -799,6 +852,48 @@ async function extractPostData(postElement, keyword) {
  }
 }

+/**
+ * Extract location from a LinkedIn profile page
+ */
+async function extractLocationFromProfilePage(page, profileUrl) {
+  try {
+    // Ensure URL is complete
+    let fullUrl = profileUrl;
+    if (!fullUrl.startsWith("http")) {
+      fullUrl = `https://www.linkedin.com${fullUrl}`;
+    }
+    
+    // Remove query parameters that might cause issues
+    fullUrl = fullUrl.split("?")[0];
+    
+    // Open profile in new tab
+    const profilePage = await page.context().newPage();
+    
+    try {
+      await profilePage.goto(fullUrl, {
+        waitUntil: "domcontentloaded",
+        timeout: 15000,
+      });
+      
+      // Wait a bit for content to load
+      await new Promise(resolve => setTimeout(resolve, 2000));
+      
+      // Use the extractLocationFromProfile utility from ai-analyzer
+      const location = await extractLocationFromProfile(profilePage);
+      
+      await profilePage.close();
+      
+      return location;
+    } catch (error) {
+      await profilePage.close();
+      throw error;
+    }
+  } catch (error) {
+    logger.debug(`Failed to extract location from profile ${profileUrl}: ${error.message}`);
+    return "";
+  }
+}
+
 /**
 * Extract numbers from text (e.g., "15 likes" -> 15)
 */
--- a/test/ai-analyzer.test.js
+++ b/test/ai-analyzer.test.js
@ -1,80 +1,80 @@
-const fs = require("fs");
-const assert = require("assert");
-const { analyzeSinglePost, checkOllamaStatus } = require("../ai-analyzer");
-
-console.log("AI Analyzer logic tests");
-
-const testData = JSON.parse(
-  fs.readFileSync(__dirname + "/test-data.json", "utf-8")
-);
-const aiResults = testData.positive;
-const context = "job layoffs and workforce reduction";
-const model = process.env.OLLAMA_MODEL || "mistral"; // Use OLLAMA_MODEL from env or default to mistral
-
-(async () => {
-  // Check if Ollama is available
-  const ollamaAvailable = await checkOllamaStatus(model);
-  if (!ollamaAvailable) {
-    console.log("SKIP: Ollama not available - skipping AI analyzer tests");
-    console.log("PASS: AI analyzer tests skipped (Ollama not running)");
-    return;
-  }
-
-  console.log(`Testing AI analyzer with ${aiResults.length} posts...`);
-
-  for (let i = 0; i < aiResults.length; i++) {
-    const post = aiResults[i];
-    console.log(`Testing post ${i + 1}: "${post.text.substring(0, 50)}..."`);
-
-    const aiOutput = await analyzeSinglePost(post.text, context, model);
-
-    // Test that the function returns the expected structure
-    assert(
-      typeof aiOutput === "object" && aiOutput !== null,
-      `Post ${i} output is not an object`
-    );
-
-    assert(
-      typeof aiOutput.isRelevant === "boolean",
-      `Post ${i} isRelevant is not a boolean: ${typeof aiOutput.isRelevant}`
-    );
-
-    assert(
-      typeof aiOutput.confidence === "number",
-      `Post ${i} confidence is not a number: ${typeof aiOutput.confidence}`
-    );
-
-    assert(
-      typeof aiOutput.reasoning === "string",
-      `Post ${i} reasoning is not a string: ${typeof aiOutput.reasoning}`
-    );
-
-    // Test that confidence is within valid range
-    assert(
-      aiOutput.confidence >= 0 && aiOutput.confidence <= 1,
-      `Post ${i} confidence out of range: ${aiOutput.confidence} (should be 0-1)`
-    );
-
-    // Test that reasoning exists and is not empty
-    assert(
-      aiOutput.reasoning && aiOutput.reasoning.length > 0,
-      `Post ${i} missing or empty reasoning`
-    );
-
-    // Test that relevance is a boolean value
-    assert(
-      aiOutput.isRelevant === true || aiOutput.isRelevant === false,
-      `Post ${i} isRelevant is not a valid boolean: ${aiOutput.isRelevant}`
-    );
-
-    console.log(
-      `  ✓ Post ${i + 1}: relevant=${aiOutput.isRelevant}, confidence=${
-        aiOutput.confidence
-      }`
-    );
-  }
-
-  console.log(
-    "PASS: AI analyzer returns valid structure and values for all test posts."
-  );
-})();
+const fs = require("fs");
+const assert = require("assert");
+const { analyzeSinglePost, checkOllamaStatus } = require("../ai-analyzer");
+
+console.log("AI Analyzer logic tests");
+
+const testData = JSON.parse(
+  fs.readFileSync(__dirname + "/test-data.json", "utf-8")
+);
+const aiResults = testData.positive;
+const context = "job layoffs and workforce reduction";
+const model = process.env.OLLAMA_MODEL || "mistral"; // Use OLLAMA_MODEL from env or default to mistral
+
+(async () => {
+  // Check if Ollama is available
+  const ollamaAvailable = await checkOllamaStatus(model);
+  if (!ollamaAvailable) {
+    console.log("SKIP: Ollama not available - skipping AI analyzer tests");
+    console.log("PASS: AI analyzer tests skipped (Ollama not running)");
+    return;
+  }
+
+  console.log(`Testing AI analyzer with ${aiResults.length} posts...`);
+
+  for (let i = 0; i < aiResults.length; i++) {
+    const post = aiResults[i];
+    console.log(`Testing post ${i + 1}: "${post.text.substring(0, 50)}..."`);
+
+    const aiOutput = await analyzeSinglePost(post.text, context, model);
+
+    // Test that the function returns the expected structure
+    assert(
+      typeof aiOutput === "object" && aiOutput !== null,
+      `Post ${i} output is not an object`
+    );
+
+    assert(
+      typeof aiOutput.isRelevant === "boolean",
+      `Post ${i} isRelevant is not a boolean: ${typeof aiOutput.isRelevant}`
+    );
+
+    assert(
+      typeof aiOutput.confidence === "number",
+      `Post ${i} confidence is not a number: ${typeof aiOutput.confidence}`
+    );
+
+    assert(
+      typeof aiOutput.reasoning === "string",
+      `Post ${i} reasoning is not a string: ${typeof aiOutput.reasoning}`
+    );
+
+    // Test that confidence is within valid range
+    assert(
+      aiOutput.confidence >= 0 && aiOutput.confidence <= 1,
+      `Post ${i} confidence out of range: ${aiOutput.confidence} (should be 0-1)`
+    );
+
+    // Test that reasoning exists and is not empty
+    assert(
+      aiOutput.reasoning && aiOutput.reasoning.length > 0,
+      `Post ${i} missing or empty reasoning`
+    );
+
+    // Test that relevance is a boolean value
+    assert(
+      aiOutput.isRelevant === true || aiOutput.isRelevant === false,
+      `Post ${i} isRelevant is not a valid boolean: ${aiOutput.isRelevant}`
+    );
+
+    console.log(
+      `  ✓ Post ${i + 1}: relevant=${aiOutput.isRelevant}, confidence=${
+        aiOutput.confidence
+      }`
+    );
+  }
+
+  console.log(
+    "PASS: AI analyzer returns valid structure and values for all test posts."
+  );
+})();
				`@ -62,3 +62,5 @@ class CoreParser {`

				`module.exports = CoreParser;`