Refactor AI analysis utilities and enhance LinkedIn parser

- Updated `ai-utils.js` to improve AI response parsing and added timeout handling for API requests. - Modified `linkedin-parser` to refine search keyword handling and improve post extraction reliability. - Enhanced location filtering logic and added more robust selectors for extracting post data. - Improved logging for debugging purposes, including detailed extraction results and fallback mechanisms.
2025-12-12 15:45:07 -05:00 · 2025-12-12 15:45:07 -05:00 · bbfd3c84aa
commit bbfd3c84aa
parent 8de65bc04c
3 changed files with 996 additions and 411 deletions
--- a/ai-analyzer/src/ai-utils.js
+++ b/ai-analyzer/src/ai-utils.js
@ -1,305 +1,442 @@
-const { logger } = require("./logger");
+const { logger } = require("./logger");
-
+
-/**
+/**
- * AI Analysis utilities for post processing with Ollama
+ * AI Analysis utilities for post processing with Ollama
- * Extracted from ai-analyzer-local.js for reuse across parsers
+ * Extracted from ai-analyzer-local.js for reuse across parsers
- */
+ */
-
+
-// Default model from environment variable or fallback to "mistral"
+// Default model from environment variable or fallback to "mistral"
-const DEFAULT_MODEL = process.env.OLLAMA_MODEL || "mistral";
+const DEFAULT_MODEL = process.env.OLLAMA_MODEL || "mistral";
-
+
-/**
+/**
- * Check if Ollama is running and the model is available
+ * Check if Ollama is running and the model is available
- */
+ */
-async function checkOllamaStatus(
+async function checkOllamaStatus(
-  model = DEFAULT_MODEL,
+  model = DEFAULT_MODEL,
-  ollamaHost = "http://localhost:11434"
+  ollamaHost = "http://localhost:11434"
-) {
+) {
-  try {
+  try {
-    // Check if Ollama is running
+    // Check if Ollama is running
-    const response = await fetch(`${ollamaHost}/api/tags`);
+    const response = await fetch(`${ollamaHost}/api/tags`);
-    if (!response.ok) {
+    if (!response.ok) {
-      throw new Error(`Ollama not running on ${ollamaHost}`);
+      throw new Error(`Ollama not running on ${ollamaHost}`);
-    }
+    }
-
+
-    const data = await response.json();
+    const data = await response.json();
-    const availableModels = data.models.map((m) => m.name);
+    const availableModels = data.models.map((m) => m.name);
-
+
-    logger.ai("Ollama is running");
+    logger.ai("Ollama is running");
-    logger.info(
+    logger.info(
-      `📦 Available models: ${availableModels
+      `📦 Available models: ${availableModels
-        .map((m) => m.split(":")[0])
+        .map((m) => m.split(":")[0])
-        .join(", ")}`
+        .join(", ")}`
-    );
+    );
-
+
-    // Check if requested model is available
+    // Check if requested model is available
-    const modelExists = availableModels.some((m) => m.startsWith(model));
+    const modelExists = availableModels.some((m) => m.startsWith(model));
-    if (!modelExists) {
+    if (!modelExists) {
-      logger.error(`Model "${model}" not found`);
+      logger.error(`Model "${model}" not found`);
-      logger.error(`💡 Install it with: ollama pull ${model}`);
+      logger.error(`💡 Install it with: ollama pull ${model}`);
-      logger.error(
+      logger.error(
-        `💡 Or choose from: ${availableModels
+        `💡 Or choose from: ${availableModels
-          .map((m) => m.split(":")[0])
+          .map((m) => m.split(":")[0])
-          .join(", ")}`
+          .join(", ")}`
-      );
+      );
-      return false;
+      return false;
-    }
+    }
-
+
-    logger.success(`Using model: ${model}`);
+    logger.success(`Using model: ${model}`);
-    return true;
+    return true;
-  } catch (error) {
+  } catch (error) {
-    logger.error(`Error connecting to Ollama: ${error.message}`);
+    logger.error(`Error connecting to Ollama: ${error.message}`);
-    logger.error("💡 Make sure Ollama is installed and running:");
+    logger.error("💡 Make sure Ollama is installed and running:");
-    logger.error("   1. Install: https://ollama.ai/");
+    logger.error("   1. Install: https://ollama.ai/");
-    logger.error("   2. Start: ollama serve");
+    logger.error("   2. Start: ollama serve");
-    logger.error(`   3. Install model: ollama pull ${model}`);
+    logger.error(`   3. Install model: ollama pull ${model}`);
-    return false;
+    return false;
-  }
+  }
-}
+}
-
+
-/**
+/**
- * Analyze multiple posts using local Ollama
+ * Analyze multiple posts using local Ollama
- */
+ */
-async function analyzeBatch(
+async function analyzeBatch(
-  posts,
+  posts,
-  context,
+  context,
-  model = DEFAULT_MODEL,
+  model = DEFAULT_MODEL,
-  ollamaHost = "http://localhost:11434"
+  ollamaHost = "http://localhost:11434"
-) {
+) {
-  logger.ai(`Analyzing batch of ${posts.length} posts with ${model}...`);
+  logger.ai(`Analyzing batch of ${posts.length} posts with ${model}...`);
-
+
-  try {
+  try {
-    const prompt = `You are an expert at analyzing LinkedIn posts for relevance to specific contexts.
+    const prompt = `Analyze ${posts.length} LinkedIn posts for relevance to: "${context}"
-
+
-CONTEXT TO MATCH: "${context}"
+POSTS:
-
+${posts
-Analyze these ${
+  .map(
-      posts.length
+    (post, i) => `
-    } LinkedIn posts and determine if each relates to the context above.
+POST ${i + 1}:
-
+"${post.text.substring(0, 400)}${post.text.length > 400 ? "..." : ""}"
-POSTS:
+`
-${posts
+  )
-  .map(
+  .join("")}
-    (post, i) => `
+
-POST ${i + 1}:
+REQUIRED FORMAT - Respond with EXACTLY ${posts.length} lines, one per post:
-"${post.text.substring(0, 400)}${post.text.length > 400 ? "..." : ""}"
+POST 1: YES | 0.8 | reason here
-`
+POST 2: NO | 0.2 | reason here
-  )
+POST 3: YES | 0.9 | reason here
-  .join("")}
+
-
+RULES:
-For each post, provide:
+- Use YES or NO (uppercase)
- Is it relevant to "${context}"? (YES/NO)
+- Use pipe character | as separator
- Confidence level (0.0 to 1.0)
+- Confidence must be 0.0 to 1.0 (decimal number)
- Brief reasoning
+- Keep reasoning brief (one sentence)
-
+- MUST include all ${posts.length} posts in order
-Respond in this EXACT format for each post:
+
-POST 1: YES/NO | 0.X | brief reason
+Examples:
-POST 2: YES/NO | 0.X | brief reason
+POST 1: YES | 0.9 | mentions layoffs and job cuts
-POST 3: YES/NO | 0.X | brief reason
+POST 2: NO | 0.1 | unrelated topic about vacation
-
+POST 3: YES | 0.7 | discusses workforce reduction`;
-Examples:
+
- For layoff context: "laid off 50 employees" = YES | 0.9 | mentions layoffs
+    // Add timeout to prevent hanging (5 minutes max)
- For hiring context: "we're hiring developers" = YES | 0.8 | job posting
+    const controller = new AbortController();
- Unrelated content = NO | 0.1 | not relevant to context`;
+    const timeoutId = setTimeout(() => controller.abort(), 5 * 60 * 1000); // 5 minutes
-
+
-    const response = await fetch(`${ollamaHost}/api/generate`, {
+    try {
-      method: "POST",
+      const response = await fetch(`${ollamaHost}/api/generate`, {
-      headers: {
+        method: "POST",
-        "Content-Type": "application/json",
+        headers: {
-      },
+          "Content-Type": "application/json",
-      body: JSON.stringify({
+        },
-        model: model,
+        body: JSON.stringify({
-        prompt: prompt,
+          model: model,
-        stream: false,
+          prompt: prompt,
-        options: {
+          stream: false,
-          temperature: 0.3,
+          options: {
-          top_p: 0.9,
+            temperature: 0.3,
-        },
+            top_p: 0.9,
-      }),
+          },
-    });
+        }),
-
+        signal: controller.signal,
-    if (!response.ok) {
+      });
-      throw new Error(
+
-        `Ollama API error: ${response.status} ${response.statusText}`
+      clearTimeout(timeoutId);
-      );
+
-    }
+      if (!response.ok) {
-
+        throw new Error(
-    const data = await response.json();
+          `Ollama API error: ${response.status} ${response.statusText}`
-    const aiResponse = data.response.trim();
+        );
-
+      }
-    // Parse the response
+
-    const analyses = [];
+      const data = await response.json();
-    const lines = aiResponse.split("\n").filter((line) => line.trim());
+      const aiResponse = data.response.trim();
-
+
-    for (let i = 0; i < posts.length; i++) {
+      // Parse the response
-      let analysis = {
+      const analyses = [];
-        postIndex: i + 1,
+      const lines = aiResponse.split("\n").filter((line) => line.trim());
-        isRelevant: false,
+
-        confidence: 0.5,
+      // Log the raw response for debugging
-        reasoning: "Could not parse AI response",
+      logger.debug(`AI Response length: ${aiResponse.length} chars`);
-      };
+      if (aiResponse.length > 0) {
-
+        logger.debug(`AI Response (first 1000 chars):\n${aiResponse.substring(0, 1000)}`);
-      // Look for lines that match "POST X:" pattern
+      } else {
-      const postPattern = new RegExp(`POST\\s*${i + 1}:?\\s*(.+)`, "i");
+        logger.warning("⚠️  AI response is empty!");
-
+      }
-      for (const line of lines) {
+
-        const match = line.match(postPattern);
+      for (let i = 0; i < posts.length; i++) {
-        if (match) {
+      let analysis = {
-          const content = match[1].trim();
+        postIndex: i + 1,
-
+        isRelevant: false,
-          // Parse: YES/NO | 0.X | reasoning
+        confidence: 0.5,
-          const parts = content.split("|").map((p) => p.trim());
+        reasoning: "Could not parse AI response",
-
+      };
-          if (parts.length >= 3) {
+
-            analysis.isRelevant = parts[0].toUpperCase().includes("YES");
+      // Try multiple patterns to find the post analysis
-            analysis.confidence = Math.max(
+      // IMPORTANT: Try numbered patterns first, only use generic pattern as last resort
-              0,
+      const numberedPatterns = [
-              Math.min(1, parseFloat(parts[1]) || 0.5)
+        // Exact format: POST 1: YES | 0.8 | reason
-            );
+        new RegExp(`POST\\s*${i + 1}:?\\s*(.+)`, "i"),
-            analysis.reasoning = parts[2] || "No reasoning provided";
+        // Numbered list: 1. YES | 0.8 | reason
-          } else {
+        new RegExp(`^\\s*${i + 1}[.)]\\s*(.+)`, "i"),
-            // Fallback parsing
+        // Just the number: 1: YES | 0.8 | reason
-            analysis.isRelevant =
+        new RegExp(`^\\s*${i + 1}:\\s*(.+)`, "i"),
-              content.toUpperCase().includes("YES") ||
+      ];
-              content.toLowerCase().includes("relevant");
+
-            analysis.confidence = 0.6;
+      let found = false;
-            analysis.reasoning = content.substring(0, 100);
+      let matchedContent = null;
-          }
+
-          break;
+      // First, try to find a line with the specific post number
-        }
+      for (const line of lines) {
-      }
+        for (const pattern of numberedPatterns) {
-
+          const match = line.match(pattern);
-      analyses.push(analysis);
+          if (match) {
-    }
+            matchedContent = match[1].trim();
-
+            found = true;
-    // If we didn't get enough analyses, fill in defaults
+            break;
-    while (analyses.length < posts.length) {
+          }
-      analyses.push({
+        }
-        postIndex: analyses.length + 1,
+        if (found) break;
-        isRelevant: false,
+      }
-        confidence: 0.3,
+
-        reasoning: "AI response parsing failed",
+      // If not found with numbered patterns, try position-based matching as fallback
-      });
+      if (!found && lines.length > i) {
-    }
+        const targetLine = lines[i];
-
+        if (targetLine) {
-    return analyses;
+          // Try to parse the line even without post number
-  } catch (error) {
+          const genericMatch = targetLine.match(/^(?:POST\s*\d+:?\s*)?(.+)$/i);
-    logger.error(`Error in batch AI analysis: ${error.message}`);
+          if (genericMatch) {
-
+            matchedContent = genericMatch[1].trim();
-    // Fallback: mark all as relevant with low confidence
+            found = true;
-    return posts.map((_, i) => ({
+          }
-      postIndex: i + 1,
+        }
-      isRelevant: true,
+      }
-      confidence: 0.3,
+
-      reasoning: `Analysis failed: ${error.message}`,
+      if (found && matchedContent) {
-    }));
+        const content = matchedContent;
-  }
+        
-}
+        // Try to parse: YES/NO | 0.X | reasoning
-
+        let parts = content.split("|").map((p) => p.trim());
-/**
+        
- * Analyze a single post using local Ollama (fallback)
+        // If no pipe separator, try other separators
- */
+        if (parts.length < 2) {
-async function analyzeSinglePost(
+          // Try colon separator: YES: 0.8: reason
-  text,
+          parts = content.split(":").map((p) => p.trim());
-  context,
+        }
-  model = DEFAULT_MODEL,
+        if (parts.length < 2) {
-  ollamaHost = "http://localhost:11434"
+          // Try dash separator: YES - 0.8 - reason
-) {
+          parts = content.split("-").map((p) => p.trim());
-  const prompt = `Analyze this LinkedIn post for relevance to: "${context}"
+        }
-
+
-Post: "${text}"
+        // Extract YES/NO
-
+        const relevanceText = parts[0] || content;
-Is this post relevant to "${context}"? Provide:
+        analysis.isRelevant = 
-1. YES or NO
+          relevanceText.toUpperCase().includes("YES") ||
-2. Confidence (0.0 to 1.0)  
+          relevanceText.toLowerCase().includes("relevant") ||
-3. Brief reason
+          relevanceText.toLowerCase().includes("yes");
-
+
-Format: YES/NO | 0.X | reason`;
+        // Extract confidence (look for number between 0 and 1)
-
+        if (parts.length >= 2) {
-  try {
+          const confidenceMatch = parts[1].match(/(0?\.\d+|1\.0|0|1)/);
-    const response = await fetch(`${ollamaHost}/api/generate`, {
+          if (confidenceMatch) {
-      method: "POST",
+            analysis.confidence = Math.max(
-      headers: {
+              0,
-        "Content-Type": "application/json",
+              Math.min(1, parseFloat(confidenceMatch[1]) || 0.5)
-      },
+            );
-      body: JSON.stringify({
+          }
-        model: model,
+        } else {
-        prompt: prompt,
+          // Try to find confidence in the whole content
-        stream: false,
+          const confidenceMatch = content.match(/(0?\.\d+|1\.0|0|1)/);
-        options: {
+          if (confidenceMatch) {
-          temperature: 0.3,
+            analysis.confidence = Math.max(
-        },
+              0,
-      }),
+              Math.min(1, parseFloat(confidenceMatch[1]) || 0.5)
-    });
+            );
-
+          }
-    if (!response.ok) {
+        }
-      throw new Error(`Ollama API error: ${response.status}`);
+
-    }
+        // Extract reasoning (everything after confidence, or whole content if no structure)
-
+        if (parts.length >= 3) {
-    const data = await response.json();
+          analysis.reasoning = parts.slice(2).join(" ").trim() || parts[2] || "No reasoning provided";
-    const aiResponse = data.response.trim();
+        } else if (parts.length === 2) {
-
+          // If only 2 parts, second part might be reasoning
-    // Parse response
+          analysis.reasoning = parts[1].substring(0, 200);
-    const parts = aiResponse.split("|").map((p) => p.trim());
+        } else {
-
+          // Use the whole content as reasoning, but remove YES/NO and confidence
-    if (parts.length >= 3) {
+          let reasoning = content
-      return {
+            .replace(/YES|NO/gi, "")
-        isRelevant: parts[0].toUpperCase().includes("YES"),
+            .replace(/0?\.\d+|1\.0/g, "")
-        confidence: Math.max(0, Math.min(1, parseFloat(parts[1]) || 0.5)),
+            .replace(/\|/g, "")
-        reasoning: parts[2],
+            .trim();
-      };
+          analysis.reasoning = reasoning || "Analysis provided but format unclear";
-    } else {
+        }
-      // Fallback parsing
+      }
-      return {
+
-        isRelevant:
+      // If still not found, try to extract from the entire response by position
-          aiResponse.toLowerCase().includes("yes") ||
+      if (!found && lines.length > 0) {
-          aiResponse.toLowerCase().includes("relevant"),
+        // Try to get the line at position i (allowing for some variance)
-        confidence: 0.6,
+        const targetLine = lines[Math.min(i, lines.length - 1)];
-        reasoning: aiResponse.substring(0, 100),
+        if (targetLine) {
-      };
+          // Extract any YES/NO indication
-    }
+          analysis.isRelevant = 
-  } catch (error) {
+            targetLine.toUpperCase().includes("YES") ||
-    return {
+            targetLine.toLowerCase().includes("relevant");
-      isRelevant: true, // Default to include on error
+          
-      confidence: 0.3,
+          // Extract confidence
-      reasoning: `Analysis failed: ${error.message}`,
+          const confidenceMatch = targetLine.match(/(0?\.\d+|1\.0|0|1)/);
-    };
+          if (confidenceMatch) {
-  }
+            analysis.confidence = Math.max(
-}
+              0,
-
+              Math.min(1, parseFloat(confidenceMatch[1]) || 0.5)
-/**
+            );
- * Find the most recent results file if none specified
+          }
- */
+          
-function findLatestResultsFile(resultsDir = "results") {
+          // Use the line as reasoning
-  const fs = require("fs");
+          analysis.reasoning = targetLine.substring(0, 200).trim() || "Parsed from unstructured response";
-  const path = require("path");
+          found = true;
-
+        }
-  if (!fs.existsSync(resultsDir)) {
+      }
-    throw new Error("Results directory not found. Run the scraper first.");
+
-  }
+      // Last resort: if still not found, try to extract from the entire response text
-
+      if (!found && aiResponse.length > 0) {
-  const files = fs
+        // Look for any mention of relevance in the response
-    .readdirSync(resultsDir)
+        const responseLower = aiResponse.toLowerCase();
-    .filter(
+        const hasRelevant = responseLower.includes("relevant") || responseLower.includes("yes");
-      (f) =>
+        analysis.isRelevant = hasRelevant;
-        (f.startsWith("results-") || f.startsWith("linkedin-results-")) &&
+        
-        f.endsWith(".json") &&
+        // Try to find any confidence number
-        !f.includes("-ai-")
+        const allConfidenceMatches = aiResponse.match(/(0?\.\d+|1\.0|0|1)/g);
-    )
+        if (allConfidenceMatches && allConfidenceMatches.length > i) {
-    .sort()
+          analysis.confidence = Math.max(
-    .reverse();
+            0,
-
+            Math.min(1, parseFloat(allConfidenceMatches[i]) || 0.5)
-  if (files.length === 0) {
+          );
-    throw new Error("No results files found. Run the scraper first.");
+        }
-  }
+        
-
+        // Use a portion of the response as reasoning
-  return path.join(resultsDir, files[0]);
+        const responseSnippet = aiResponse.substring(i * 100, (i + 1) * 200).trim();
-}
+        analysis.reasoning = responseSnippet || "Could not parse structured response, using fallback";
-
+        
-module.exports = {
+        logger.warning(`⚠️  Post ${i + 1}: Using fallback parsing - AI response format unclear`);
-  checkOllamaStatus,
+      }
-  analyzeBatch,
+
-  analyzeSinglePost,
+        analyses.push(analysis);
-  findLatestResultsFile,
+      }
-  DEFAULT_MODEL, // Export so other modules can use it
+
-};
+      // If we didn't get enough analyses, fill in defaults
      while (analyses.length < posts.length) {
        analyses.push({
          postIndex: analyses.length + 1,
          isRelevant: false,
          confidence: 0.3,
          reasoning: "AI response parsing failed",
        });
      }
      return analyses;
    } catch (error) {
      clearTimeout(timeoutId);
      if (error.name === 'AbortError') {
        throw new Error('Request timeout: AI analysis took longer than 5 minutes');
      }
      throw error;
    }
  } catch (error) {
    logger.error(`Error in batch AI analysis: ${error.message}`);
    // Fallback: mark all as relevant with low confidence
    return posts.map((_, i) => ({
      postIndex: i + 1,
      isRelevant: true,
      confidence: 0.3,
      reasoning: `Analysis failed: ${error.message}`,
    }));
  }
 }
 /**
 * Analyze a single post using local Ollama (fallback)
 */
 async function analyzeSinglePost(
  text,
  context,
  model = DEFAULT_MODEL,
  ollamaHost = "http://localhost:11434"
 ) {
  const prompt = `Analyze this LinkedIn post for relevance to: "${context}"
 Post: "${text}"
 Is this post relevant to "${context}"? Provide:
 1. YES or NO
 2. Confidence (0.0 to 1.0)  
 3. Brief reason
 Format: YES/NO | 0.X | reason`;
  try {
    const response = await fetch(`${ollamaHost}/api/generate`, {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        model: model,
        prompt: prompt,
        stream: false,
        options: {
          temperature: 0.3,
        },
      }),
    });
    if (!response.ok) {
      throw new Error(`Ollama API error: ${response.status}`);
    }
    const data = await response.json();
    const aiResponse = data.response.trim();
    // Parse response
    const parts = aiResponse.split("|").map((p) => p.trim());
    if (parts.length >= 3) {
      return {
        isRelevant: parts[0].toUpperCase().includes("YES"),
        confidence: Math.max(0, Math.min(1, parseFloat(parts[1]) || 0.5)),
        reasoning: parts[2],
      };
    } else {
      // Fallback parsing
      return {
        isRelevant:
          aiResponse.toLowerCase().includes("yes") ||
          aiResponse.toLowerCase().includes("relevant"),
        confidence: 0.6,
        reasoning: aiResponse.substring(0, 100),
      };
    }
  } catch (error) {
    return {
      isRelevant: true, // Default to include on error
      confidence: 0.3,
      reasoning: `Analysis failed: ${error.message}`,
    };
  }
 }
 /**
 * Find the most recent results file if none specified
 */
 function findLatestResultsFile(resultsDir = "results") {
  const fs = require("fs");
  const path = require("path");
  if (!fs.existsSync(resultsDir)) {
    throw new Error("Results directory not found. Run the scraper first.");
  }
  const files = fs
    .readdirSync(resultsDir)
    .filter(
      (f) =>
        (f.startsWith("results-") || f.startsWith("linkedin-results-")) &&
        f.endsWith(".json") &&
        !f.includes("-ai-")
    )
    .sort()
    .reverse();
  if (files.length === 0) {
    throw new Error("No results files found. Run the scraper first.");
  }
  return path.join(resultsDir, files[0]);
 }
 module.exports = {
  checkOllamaStatus,
  analyzeBatch,
  analyzeSinglePost,
  findLatestResultsFile,
  DEFAULT_MODEL, // Export so other modules can use it
 };
--- a/linkedin-parser/index.js
+++ b/linkedin-parser/index.js
@ -31,7 +31,7 @@ const LINKEDIN_USERNAME = process.env.LINKEDIN_USERNAME;
 const LINKEDIN_PASSWORD = process.env.LINKEDIN_PASSWORD;
 const HEADLESS = process.env.HEADLESS !== "false";
 const SEARCH_KEYWORDS =
-  process.env.SEARCH_KEYWORDS || "layoff,downsizing,job cuts";
+  process.env.SEARCH_KEYWORDS || "layoff,downsizing";//,job cuts";
 const LOCATION_FILTER = process.env.LOCATION_FILTER;
 const ENABLE_AI_ANALYSIS = process.env.ENABLE_AI_ANALYSIS !== "false";
 const AI_CONTEXT = process.env.AI_CONTEXT || "job market analysis and trends";
--- a/linkedin-parser/strategies/linkedin-strategy.js
+++ b/linkedin-parser/strategies/linkedin-strategy.js
@ -10,6 +10,7 @@ const {
  containsAnyKeyword,
  validateLocationAgainstFilters,
  extractLocationFromProfile,
  parseLocationFilters,
 } = require("ai-analyzer");
 /**
@ -48,28 +49,44 @@ async function linkedinStrategy(coreParser, options = {}) {
      await coreParser.navigateTo(searchUrl, {
        pageId: "linkedin-main",
        retries: 2,
        waitUntil: "networkidle", // Wait for network to be idle
      });
-      // Wait for page to load - use delay utility instead of waitForTimeout
+      // Wait for page to load and content to render
-      await new Promise(resolve => setTimeout(resolve, 3000)); // Give LinkedIn time to render
+      await new Promise(resolve => setTimeout(resolve, 5000)); // Give LinkedIn time to render dynamic content
      // Scroll down a bit to trigger lazy loading
      try {
        await page.evaluate(() => {
          window.scrollTo(0, 500);
        });
        await new Promise(resolve => setTimeout(resolve, 2000));
      } catch (e) {
        logger.debug(`Could not scroll page: ${e.message}`);
      }
      // Wait for search results - try multiple selectors
      let hasResults = false;
      const possibleSelectors = [
        ".feed-shared-update-v2",
        "article[data-urn*='urn:li:activity']",
        "article",
        ".search-results-container",
        ".search-results__list",
        ".reusable-search__result-container",
        "[data-test-id='search-results']",
        ".feed-shared-update-v2",
        "article",
      ];
      for (const selector of possibleSelectors) {
        try {
-          await page.waitForSelector(selector, { timeout: 5000 });
+          await page.waitForSelector(selector, { timeout: 10000 });
-          hasResults = true;
+          // Verify we actually have post elements
-          logger.info(`✅ Found results container with selector: ${selector}`);
+          const count = await page.$$(selector).then(elements => elements.length);
-          break;
+          if (count > 0) {
            hasResults = true;
            logger.info(`✅ Found ${count} post elements with selector: ${selector}`);
            break;
          }
        } catch (e) {
          // Try next selector
        }
@ -100,20 +117,24 @@ async function linkedinStrategy(coreParser, options = {}) {
        // Validate location if filtering enabled
        if (locationFilter) {
          const postLocation = post.location || post.profileLocation || "";
          // Parse locationFilter string into array if it's a string
          const locationFiltersArray = typeof locationFilter === 'string' 
            ? parseLocationFilters(locationFilter) 
            : locationFilter;
          const locationValid = validateLocationAgainstFilters(
            postLocation,
-            locationFilter
+            locationFiltersArray
          );
-          if (!locationValid) {
+          if (!locationValid.isValid) {
            logger.debug(`⏭️  Post rejected: location "${postLocation}" doesn't match filter "${locationFilter}"`);
            rejectedResults.push({
              ...post,
-              rejectionReason: `Location filter mismatch: "${postLocation}" not in "${locationFilter}"`,
+              rejectionReason: locationValid.reasoning || `Location filter mismatch: "${postLocation}" not in "${locationFilter}"`,
            });
            continue;
          } else {
-            logger.debug(`✅ Post location "${postLocation}" matches filter "${locationFilter}"`);
+            logger.debug(`✅ Post location "${postLocation}" matches filter "${locationFilter}" (${locationValid.reasoning || 'matched'})`);
          }
        }
@ -156,9 +177,12 @@ async function extractPostsFromPage(page, keyword) {
  try {
    // Try multiple selectors for post elements (LinkedIn changes these frequently)
    // Prioritize selectors that are more specific to actual posts
    const postSelectors = [
-      ".feed-shared-update-v2",
+      "article[data-urn*='urn:li:activity']", // Most specific - posts with activity ID
      ".feed-shared-update-v2[data-urn*='urn:li:activity']",
      "article.feed-shared-update-v2",
      ".feed-shared-update-v2",
      "[data-urn*='urn:li:activity']",
      ".reusable-search__result-container",
      ".search-result__wrapper",
@ -170,11 +194,30 @@ async function extractPostsFromPage(page, keyword) {
    for (const selector of postSelectors) {
      try {
        // Wait a bit for elements to be available
        await page.waitForSelector(selector, { timeout: 3000 }).catch(() => {});
        postElements = await page.$$(selector);
        // Filter to only elements that have a data-urn attribute (actual posts)
        if (postElements.length > 0) {
-          usedSelector = selector;
+          const validElements = [];
-          logger.info(`✅ Found ${postElements.length} post elements using selector: ${selector}`);
+          for (const elem of postElements) {
-          break;
+            try {
              const dataUrn = await elem.getAttribute("data-urn");
              if (dataUrn && dataUrn.includes("urn:li:activity")) {
                validElements.push(elem);
              }
            } catch (e) {
              // Element might have been detached, skip it
            }
          }
          if (validElements.length > 0) {
            postElements = validElements;
            usedSelector = selector;
            logger.info(`✅ Found ${postElements.length} valid post elements using selector: ${selector}`);
            break;
          }
        }
      } catch (e) {
        // Try next selector
@ -199,10 +242,22 @@ async function extractPostsFromPage(page, keyword) {
    for (let i = 0; i < postElements.length; i++) {
      try {
        // Scroll element into view to ensure it's fully rendered
        try {
          await postElements[i].evaluate((el) => {
            el.scrollIntoView({ behavior: 'smooth', block: 'center' });
          });
          await new Promise(resolve => setTimeout(resolve, 500)); // Small delay for rendering
        } catch (e) {
          // Element might already be in view or detached, continue anyway
        }
        const post = await extractPostData(postElements[i], keyword);
        if (post) {
          posts.push(post);
-          logger.debug(`✅ Extracted post ${i + 1}/${postElements.length}: ${post.postId.substring(0, 20)}...`);
+          const hasContent = post.content && post.content.length > 0;
          const hasAuthor = post.authorName && post.authorName.length > 0;
          logger.debug(`✅ Extracted post ${i + 1}/${postElements.length}: ${post.postId.substring(0, 20)}... (content: ${hasContent ? 'yes' : 'no'}, author: ${hasAuthor ? 'yes' : 'no'})`);
        } else {
          logger.debug(`⏭️  Post ${i + 1}/${postElements.length} filtered out (no keyword match or missing data)`);
        }
@ -222,131 +277,524 @@ async function extractPostsFromPage(page, keyword) {
 /**
 * Extract data from individual post element
 * Uses evaluate() to extract data directly from DOM for better reliability
 */
 async function extractPostData(postElement, keyword) {
  try {
-    // Extract post ID
+    // Use evaluate to extract data directly from the DOM element
-    const postId = (await postElement.getAttribute("data-urn")) || "";
+    // This is more reliable than using selectors which may not match
    const postData = await postElement.evaluate((el, keyword) => {
      const data = {
        postId: "",
        authorName: "",
        authorUrl: "",
        content: "",
        timestamp: "",
        location: "",
        likes: 0,
        comments: 0,
      };
-    // Extract author info
+      // Extract post ID from data-urn attribute
-    const authorElement = await postElement.$(".feed-shared-actor__name");
+      data.postId = el.getAttribute("data-urn") || 
-    const authorName = authorElement
+                    el.getAttribute("data-activity-id") ||
-      ? cleanText(await authorElement.textContent())
+                    el.querySelector("[data-urn]")?.getAttribute("data-urn") || "";
      : "";
-    const authorLinkElement = await postElement.$(".feed-shared-actor__name a");
+      // Extract author name - try multiple selectors and approaches
-    const authorUrl = authorLinkElement
+      const authorSelectors = [
-      ? await authorLinkElement.getAttribute("href")
+        ".feed-shared-actor__name",
-      : "";
+        ".feed-shared-actor__name-link",
        ".update-components-actor__name",
        ".feed-shared-actor__name a",
        "[data-test-id='actor-name']",
        "span[aria-label*='name']",
        "a[href*='/in/'] span",
        ".feed-shared-actor a span",
        ".feed-shared-actor span",
        ".feed-shared-actor__name-link span",
      ];
-    // Extract post content
+      for (const selector of authorSelectors) {
-    const contentElement = await postElement.$(".feed-shared-text");
+        const elem = el.querySelector(selector);
-    const content = contentElement
+        if (elem) {
-      ? cleanText(await contentElement.textContent())
+          const text = elem.textContent?.trim() || elem.innerText?.trim();
-      : "";
+          if (text && text.length > 0 && text.length < 100) { // Reasonable name length
-
+            data.authorName = text;
-    // Extract timestamp
+            // Try to get link from same element or parent
-    const timeElement = await postElement.$(
+            const link = elem.closest("a") || elem.querySelector("a");
-      ".feed-shared-actor__sub-description time"
+            if (link) {
-    );
+              data.authorUrl = link.getAttribute("href") || "";
-    const timestamp = timeElement
+            }
      ? await timeElement.getAttribute("datetime")
      : "";
    // Extract location from profile (try multiple selectors)
    let location = "";
    const locationSelectors = [
      ".feed-shared-actor__sub-description .feed-shared-actor__sub-description-link",
      ".feed-shared-actor__sub-description .feed-shared-actor__sub-description-link--without-hover",
      ".feed-shared-actor__sub-description span[aria-label*='location']",
      ".feed-shared-actor__sub-description span[aria-label*='Location']",
    ];
    for (const selector of locationSelectors) {
      try {
        const locationElement = await postElement.$(selector);
        if (locationElement) {
          const locationText = await locationElement.textContent();
          if (locationText && locationText.trim()) {
            location = cleanText(locationText);
            break;
          }
        }
      } catch (e) {
        // Try next selector
      }
    }
-    // If no location found in sub-description, try to extract from author link hover or profile
+      // If author name found but no URL, try to find link separately
-    if (!location) {
+      if (data.authorName && !data.authorUrl) {
-      try {
+        const authorLink = el.querySelector(".feed-shared-actor__name-link, .feed-shared-actor__name a, a[href*='/in/']");
-        // Try to get location from data attributes or other sources
+        if (authorLink) {
-        const subDescElement = await postElement.$(".feed-shared-actor__sub-description");
+          data.authorUrl = authorLink.getAttribute("href") || "";
-        if (subDescElement) {
+        }
-          const subDescText = await subDescElement.textContent();
+      }
-          // Look for location patterns (City, Province/State, Country)
+
-          const locationMatch = subDescText.match(/([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*),\s*([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)(?:,\s*([A-Z][a-z]+))?/);
+      // Fallback: Look for any link with /in/ pattern and get the name from nearby text
-          if (locationMatch) {
+      if (!data.authorName) {
-            location = cleanText(locationMatch[0]);
+        const profileLinks = el.querySelectorAll("a[href*='/in/']");
        for (const link of profileLinks) {
          // Skip if it's a company link
          if (link.getAttribute("href")?.includes("/company/")) continue;
          // Get text from the link or nearby
          const linkText = link.textContent?.trim() || link.innerText?.trim();
          if (linkText && linkText.length > 0 && linkText.length < 100 && !linkText.includes("View")) {
            data.authorName = linkText;
            data.authorUrl = link.getAttribute("href") || "";
            break;
          }
          // Try to get text from first child span
          const childSpan = link.querySelector("span");
          if (childSpan) {
            const spanText = childSpan.textContent?.trim() || childSpan.innerText?.trim();
            if (spanText && spanText.length > 0 && spanText.length < 100) {
              data.authorName = spanText;
              data.authorUrl = link.getAttribute("href") || "";
              break;
            }
          }
          // Try to get text from parent
          const parentText = link.parentElement?.textContent?.trim();
          if (parentText && parentText.length < 100 && !parentText.includes("View")) {
            // Extract just the name part (first line or first few words)
            const namePart = parentText.split("\n")[0].split("·")[0].trim();
            if (namePart.length > 0 && namePart.length < 100) {
              data.authorName = namePart;
              data.authorUrl = link.getAttribute("href") || "";
              break;
            }
          }
        }
      } catch (e) {
        // Location extraction failed, continue without it
      }
      // Last resort: Extract from actor section by looking at all text
      if (!data.authorName) {
        const actorSection = el.querySelector(".feed-shared-actor, .update-components-actor, [class*='actor']");
        if (actorSection) {
          const actorText = actorSection.textContent || actorSection.innerText || "";
          const lines = actorText.split("\n").map(l => l.trim()).filter(l => l.length > 0);
          // First non-empty line is often the name
          for (const line of lines) {
            if (line.length > 0 && line.length < 100 && 
                !line.includes("·") && 
                !line.includes("ago") && 
                !line.match(/^\d+/) &&
                !line.toLowerCase().includes("view")) {
              data.authorName = line;
              // Try to find associated link
              const link = actorSection.querySelector("a[href*='/in/']");
              if (link) {
                data.authorUrl = link.getAttribute("href") || "";
              }
              break;
            }
          }
        }
      }
      // Extract post content - try multiple selectors
      const contentSelectors = [
        ".feed-shared-text",
        ".feed-shared-text__text-view",
        ".feed-shared-update-v2__description",
        ".update-components-text",
        "[data-test-id='post-text']",
        ".feed-shared-text span",
        ".feed-shared-update-v2__description-wrapper",
      ];
      for (const selector of contentSelectors) {
        const elem = el.querySelector(selector);
        if (elem) {
          const text = elem.textContent?.trim() || elem.innerText?.trim();
          if (text && text.length > 10) { // Only use if substantial content
            data.content = text;
            break;
          }
        }
      }
      // Extract timestamp
      const timeSelectors = [
        ".feed-shared-actor__sub-description time",
        "time[datetime]",
        "[data-test-id='timestamp']",
        ".feed-shared-actor__sub-description time[datetime]",
        "time",
        ".feed-shared-actor__sub-description time",
        "span[aria-label*='time']",
        "span[aria-label*='ago']",
      ];
      for (const selector of timeSelectors) {
        const elem = el.querySelector(selector);
        if (elem) {
          data.timestamp = elem.getAttribute("datetime") || 
                          elem.getAttribute("title") ||
                          elem.getAttribute("aria-label") ||
                          elem.textContent?.trim() || "";
          if (data.timestamp) break;
        }
      }
      // Fallback: Look for time-like patterns in sub-description
      if (!data.timestamp) {
        const subDesc = el.querySelector(".feed-shared-actor__sub-description");
        if (subDesc) {
          const subDescText = subDesc.textContent || subDesc.innerText || "";
          // Look for patterns like "2h", "3d", "1w", "2 months ago", etc.
          const timePatterns = [
            /\d+\s*(minute|hour|day|week|month|year)s?\s*ago/i,
            /\d+\s*(h|d|w|mo|yr)/i,
            /(just now|today|yesterday)/i,
          ];
          for (const pattern of timePatterns) {
            const match = subDescText.match(pattern);
            if (match) {
              data.timestamp = match[0];
              break;
            }
          }
        }
      }
      // Extract location - try multiple approaches
      const locationSelectors = [
        ".feed-shared-actor__sub-description .feed-shared-actor__sub-description-link",
        ".feed-shared-actor__sub-description-link--without-hover",
        "span[aria-label*='location' i]",
        "span[aria-label*='Location']",
        ".feed-shared-actor__sub-description span",
        ".feed-shared-actor__sub-description a",
        "a[href*='/company/']",
        "a[href*='/location/']",
      ];
      for (const selector of locationSelectors) {
        const elem = el.querySelector(selector);
        if (elem) {
          const text = elem.textContent?.trim() || elem.getAttribute("aria-label") || elem.innerText?.trim() || "";
          // Check if it looks like a location (contains comma or common location words)
          if (text && text.length > 2 && text.length < 100) {
            // More flexible location detection
            if (text.includes(",") || 
                /(city|province|state|country|region|ontario|alberta|british columbia|quebec|manitoba|saskatchewan|nova scotia|new brunswick|newfoundland|prince edward island|yukon|northwest territories|nunavut)/i.test(text) ||
                /^[A-Z][a-z]+,\s*[A-Z][a-z]+/i.test(text)) {
              data.location = text;
              break;
            }
          }
        }
      }
      // If no location found, try parsing from sub-description text
      if (!data.location) {
        const subDesc = el.querySelector(".feed-shared-actor__sub-description");
        if (subDesc) {
          const subDescText = subDesc.textContent || subDesc.innerText || "";
          // First, try to get all links in sub-description (location is often a link)
          const subDescLinks = subDesc.querySelectorAll("a");
          for (const link of subDescLinks) {
            const linkText = link.textContent?.trim() || link.innerText?.trim() || "";
            const linkHref = link.getAttribute("href") || "";
            // Skip if it's a time/date link or company link
            if (linkHref.includes("/company/") || linkText.match(/\d+\s*(minute|hour|day|week|month|year|h|d|w)/i)) {
              continue;
            }
            // If link text looks like a location
            if (linkText && linkText.length > 2 && linkText.length < 100) {
              if (linkText.includes(",") || 
                  /(ontario|alberta|british columbia|quebec|manitoba|saskatchewan|nova scotia|new brunswick|newfoundland|prince edward island|yukon|northwest territories|nunavut|toronto|vancouver|calgary|ottawa|montreal|winnipeg|edmonton|halifax|victoria|regina|saskatoon|windsor|kitchener|hamilton|london|st\.?\s*catharines|oshawa|barrie|greater sudbury|sherbrooke|kelowna|abbotsford|trois-rivières|guelph|cambridge|coquitlam|saanich|saint john|thunder bay|waterloo|delta|chatham|red deer|kamloops|brantford|whitehorse|yellowknife|iqaluit)/i.test(linkText)) {
                data.location = linkText;
                break;
              }
            }
          }
          // If still no location, try pattern matching on the full text
          if (!data.location && subDescText) {
            // Look for location patterns (City, Province/State, Country)
            const locationPatterns = [
              // Full location: "City, Province, Country"
              /([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*),\s*([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)(?:,\s*([A-Z][a-z]+))?/,
              // City, Province
              /([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\s*,\s*([A-Z][a-z]+)/,
              // Just province/state names
              /\b(Ontario|Alberta|British Columbia|Quebec|Manitoba|Saskatchewan|Nova Scotia|New Brunswick|Newfoundland|Prince Edward Island|Yukon|Northwest Territories|Nunavut|ON|AB|BC|QC|MB|SK|NS|NB|NL|PE|YT|NT|NU)\b/i,
              // Major cities
              /\b(Toronto|Vancouver|Calgary|Ottawa|Montreal|Winnipeg|Edmonton|Halifax|Victoria|Regina|Saskatoon)\b/i,
            ];
            for (const pattern of locationPatterns) {
              const match = subDescText.match(pattern);
              if (match) {
                // Get more context around the match
                const matchIndex = subDescText.indexOf(match[0]);
                const contextStart = Math.max(0, matchIndex - 30);
                const contextEnd = Math.min(subDescText.length, matchIndex + match[0].length + 30);
                const context = subDescText.substring(contextStart, contextEnd).trim();
                // Extract just the location part (remove time/date info)
                let locationText = match[0].trim();
                // If we have more context, try to get a better location string
                if (context.includes(",") && context.length < 100) {
                  // Try to extract "City, Province" pattern from context
                  const cityProvinceMatch = context.match(/([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*),\s*([A-Z][a-z]+)/);
                  if (cityProvinceMatch) {
                    locationText = cityProvinceMatch[0].trim();
                  }
                }
                data.location = locationText;
                break;
              }
            }
          }
          // Last resort: extract any text that looks location-like from sub-description
          if (!data.location && subDescText) {
            // Split by common separators and look for location-like text
            const parts = subDescText.split(/[·•|]/).map(p => p.trim()).filter(p => p.length > 0);
            for (const part of parts) {
              // Skip if it looks like time/date
              if (part.match(/\d+\s*(minute|hour|day|week|month|year|h|d|w|ago)/i)) {
                continue;
              }
              // Check if it looks like a location
              if (part.length > 2 && part.length < 100 && 
                  (part.includes(",") || 
                   /(ontario|alberta|british columbia|quebec|manitoba|toronto|vancouver|calgary|ottawa|montreal)/i.test(part))) {
                data.location = part;
                break;
              }
            }
          }
        }
      }
      // Final fallback: look anywhere in the actor section for location-like text
      if (!data.location) {
        const actorSection = el.querySelector(".feed-shared-actor, .update-components-actor");
        if (actorSection) {
          const actorText = actorSection.textContent || actorSection.innerText || "";
          // Look for province names
          const provinceMatch = actorText.match(/\b(Ontario|Alberta|British Columbia|Quebec|Manitoba|Saskatchewan|Nova Scotia|New Brunswick|Newfoundland|Prince Edward Island|Yukon|Northwest Territories|Nunavut)\b/i);
          if (provinceMatch) {
            // Try to get city, province if available
            const cityProvinceMatch = actorText.match(/([A-Z][a-z]+),\s*(Ontario|Alberta|British Columbia|Quebec|Manitoba|Saskatchewan|Nova Scotia|New Brunswick|Newfoundland|Prince Edward Island|Yukon|Northwest Territories|Nunavut)/i);
            if (cityProvinceMatch) {
              data.location = cityProvinceMatch[0].trim();
            } else {
              data.location = provinceMatch[0].trim();
            }
          }
        }
      }
      // Try to extract from any hover cards or mini profiles in the DOM
      if (!data.location) {
        // Look for mini profile cards or tooltips
        const miniProfileSelectors = [
          "[data-control-name='hovercard']",
          ".artdeco-hoverable-trigger",
          ".feed-shared-actor__meta",
          ".pv-text-details__left-panel",
        ];
        for (const selector of miniProfileSelectors) {
          const elem = el.querySelector(selector);
          if (elem) {
            const text = elem.textContent || elem.innerText || "";
            // Look for location patterns
            const locationMatch = text.match(/([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*),\s*(Ontario|Alberta|British Columbia|Quebec|Manitoba|Saskatchewan|Nova Scotia|New Brunswick|Newfoundland|Prince Edward Island|Yukon|Northwest Territories|Nunavut)/i);
            if (locationMatch) {
              data.location = locationMatch[0].trim();
              break;
            }
          }
        }
      }
      // Extract engagement metrics - try multiple approaches
      const likesSelectors = [
        ".social-counts-reactions__count",
        "[data-test-id='reactions-count']",
        ".social-counts__reactions-count",
        ".feed-shared-social-action-bar__reactions-count",
        "button[aria-label*='reaction']",
        "button[aria-label*='like']",
        ".social-actions-button__reactions-count",
        "[data-test-id='social-actions__reactions-count']",
      ];
      for (const selector of likesSelectors) {
        const elem = el.querySelector(selector);
        if (elem) {
          const text = elem.textContent?.trim() || elem.getAttribute("aria-label") || "";
          const match = text.match(/(\d+)/);
          if (match) {
            data.likes = parseInt(match[1], 10) || 0;
            break;
          }
        }
      }
      // Fallback: Look for any button or element with reaction/like text
      if (data.likes === 0) {
        const allButtons = el.querySelectorAll("button, span, div");
        for (const btn of allButtons) {
          const text = btn.textContent?.trim() || btn.getAttribute("aria-label") || "";
          if (/reaction|like/i.test(text)) {
            const match = text.match(/(\d+)/);
            if (match) {
              data.likes = parseInt(match[1], 10) || 0;
              break;
            }
          }
        }
      }
      const commentsSelectors = [
        ".social-counts-comments__count",
        "[data-test-id='comments-count']",
        ".social-counts__comments-count",
        ".feed-shared-social-action-bar__comments-count",
        "button[aria-label*='comment']",
        ".social-actions-button__comments-count",
        "[data-test-id='social-actions__comments-count']",
      ];
      for (const selector of commentsSelectors) {
        const elem = el.querySelector(selector);
        if (elem) {
          const text = elem.textContent?.trim() || elem.getAttribute("aria-label") || "";
          const match = text.match(/(\d+)/);
          if (match) {
            data.comments = parseInt(match[1], 10) || 0;
            break;
          }
        }
      }
      // Fallback: Look for any button or element with comment text
      if (data.comments === 0) {
        const allButtons = el.querySelectorAll("button, span, div");
        for (const btn of allButtons) {
          const text = btn.textContent?.trim() || btn.getAttribute("aria-label") || "";
          if (/comment/i.test(text)) {
            const match = text.match(/(\d+)/);
            if (match) {
              data.comments = parseInt(match[1], 10) || 0;
              break;
            }
          }
        }
      }
      return data;
    }, keyword);
    // Clean and format the extracted data
    const authorName = cleanText(postData.authorName);
    let authorUrl = postData.authorUrl || "";
    if (authorUrl && !authorUrl.startsWith("http")) {
      authorUrl = `https://www.linkedin.com${authorUrl}`;
    }
-    // Extract engagement metrics
+    const content = cleanText(postData.content);
-    const likesElement = await postElement.$(".social-counts-reactions__count");
+    const location = cleanText(postData.location);
-    const likesText = likesElement
+    const timestamp = postData.timestamp || "";
      ? cleanText(await likesElement.textContent())
      : "0";
    const commentsElement = await postElement.$(
      ".social-counts-comments__count"
    );
    const commentsText = commentsElement
      ? cleanText(await commentsElement.textContent())
      : "0";
    // Note: LinkedIn search already filters by keyword semantically
    // We don't filter by content keyword match because:
    // 1. LinkedIn's search is semantic - it finds related posts, not just exact matches
    // 2. The keyword might be in comments, hashtags, or metadata, not visible text
    // 3. Posts might be about the topic without using the exact keyword
    // 
    // Optional: Log if keyword appears in content (for debugging, but don't filter)
    const keywordLower = keyword.toLowerCase();
    const contentLower = content.toLowerCase();
    const hasKeywordInContent = contentLower.includes(keywordLower);
    if (!hasKeywordInContent && content.length > 50) {
      logger.debug(`ℹ️  Post doesn't contain keyword "${keyword}" in visible content, but including it (LinkedIn search matched it)`);
    }
    // Validate we have minimum required data
-    if (!postId && !content) {
+    if (!postData.postId && !content) {
      logger.debug(`⏭️  Post filtered: missing both postId and content`);
      return null;
    }
    // Log extraction results for debugging
    const missingFields = [];
    if (!authorName) missingFields.push("authorName");
    if (!authorUrl) missingFields.push("authorUrl");
    if (!location) missingFields.push("location");
    if (!timestamp) missingFields.push("timestamp");
    if (postData.likes === 0 && postData.comments === 0) missingFields.push("engagement");
    if (missingFields.length > 0 && postData.postId) {
      logger.debug(`⚠️  Post ${postData.postId.substring(0, 20)}... missing: ${missingFields.join(", ")}`);
      // If location is missing, log sub-description content for debugging
      if (!location && process.env.DEBUG_EXTRACTION === "true") {
        try {
          const subDescInfo = await postElement.evaluate((el) => {
            const subDesc = el.querySelector(".feed-shared-actor__sub-description");
            if (subDesc) {
              return {
                text: subDesc.textContent || subDesc.innerText || "",
                html: subDesc.innerHTML.substring(0, 500),
                links: Array.from(subDesc.querySelectorAll("a")).map(a => ({
                  text: a.textContent?.trim(),
                  href: a.getAttribute("href")
                }))
              };
            }
            return null;
          });
          if (subDescInfo) {
            logger.debug(`Sub-description text: "${subDescInfo.text}"`);
            logger.debug(`Sub-description links: ${JSON.stringify(subDescInfo.links)}`);
          }
        } catch (e) {
          // Ignore errors in debugging
        }
      }
      // Optionally log HTML structure for first failed extraction (to help debug)
      if (process.env.DEBUG_EXTRACTION === "true" && missingFields.length >= 3) {
        try {
          const htmlSnippet = await postElement.evaluate((el) => {
            // Get the outer HTML of the element (limited to first 2000 chars)
            const html = el.outerHTML || "";
            return html.substring(0, 2000);
          });
          logger.debug(`HTML structure (first 2000 chars):\n${htmlSnippet}`);
        } catch (e) {
          // Ignore errors in debugging
        }
      }
    }
    return {
-      postId: cleanText(postId),
+      postId: cleanText(postData.postId),
      authorName,
      authorUrl,
-      profileLink: authorUrl ? (authorUrl.startsWith("http") ? authorUrl : `https://www.linkedin.com${authorUrl}`) : "",
+      profileLink: authorUrl,
      text: content,
      content: content,
      location: location,
      profileLocation: location, // Alias for compatibility
      timestamp,
      keyword,
-      likes: extractNumber(likesText),
+      likes: postData.likes || 0,
-      comments: extractNumber(commentsText),
+      comments: postData.comments || 0,
      extractedAt: new Date().toISOString(),
      source: "linkedin",
      parser: "linkedout-parser",
    };
  } catch (error) {
    logger.warning(`Error extracting post data: ${error.message}`);
    logger.debug(`Stack trace: ${error.stack}`);
    return null;
  }
 }