Refactor AI analysis utilities and enhance LinkedIn parser

- Updated `ai-utils.js` to improve AI response parsing and added timeout handling for API requests. - Modified `linkedin-parser` to refine search keyword handling and improve post extraction reliability. - Enhanced location filtering logic and added more robust selectors for extracting post data. - Improved logging for debugging purposes, including detailed extraction results and fallback mechanisms.
2025-12-12 15:45:07 -05:00 · 2025-12-12 15:45:07 -05:00 · bbfd3c84aa
commit bbfd3c84aa
parent 8de65bc04c
3 changed files with 996 additions and 411 deletions
--- a/ai-analyzer/src/ai-utils.js
+++ b/ai-analyzer/src/ai-utils.js
@ -1,305 +1,442 @@
-const { logger } = require("./logger");
-
-/**
- * AI Analysis utilities for post processing with Ollama
- * Extracted from ai-analyzer-local.js for reuse across parsers
- */
-
-// Default model from environment variable or fallback to "mistral"
-const DEFAULT_MODEL = process.env.OLLAMA_MODEL || "mistral";
-
-/**
- * Check if Ollama is running and the model is available
- */
-async function checkOllamaStatus(
-  model = DEFAULT_MODEL,
-  ollamaHost = "http://localhost:11434"
-) {
-  try {
-    // Check if Ollama is running
-    const response = await fetch(`${ollamaHost}/api/tags`);
-    if (!response.ok) {
-      throw new Error(`Ollama not running on ${ollamaHost}`);
-    }
-
-    const data = await response.json();
-    const availableModels = data.models.map((m) => m.name);
-
-    logger.ai("Ollama is running");
-    logger.info(
-      `📦 Available models: ${availableModels
-        .map((m) => m.split(":")[0])
-        .join(", ")}`
-    );
-
-    // Check if requested model is available
-    const modelExists = availableModels.some((m) => m.startsWith(model));
-    if (!modelExists) {
-      logger.error(`Model "${model}" not found`);
-      logger.error(`💡 Install it with: ollama pull ${model}`);
-      logger.error(
-        `💡 Or choose from: ${availableModels
-          .map((m) => m.split(":")[0])
-          .join(", ")}`
-      );
-      return false;
-    }
-
-    logger.success(`Using model: ${model}`);
-    return true;
-  } catch (error) {
-    logger.error(`Error connecting to Ollama: ${error.message}`);
-    logger.error("💡 Make sure Ollama is installed and running:");
-    logger.error("   1. Install: https://ollama.ai/");
-    logger.error("   2. Start: ollama serve");
-    logger.error(`   3. Install model: ollama pull ${model}`);
-    return false;
-  }
-}
-
-/**
- * Analyze multiple posts using local Ollama
- */
-async function analyzeBatch(
-  posts,
-  context,
-  model = DEFAULT_MODEL,
-  ollamaHost = "http://localhost:11434"
-) {
-  logger.ai(`Analyzing batch of ${posts.length} posts with ${model}...`);
-
-  try {
-    const prompt = `You are an expert at analyzing LinkedIn posts for relevance to specific contexts.
-
-CONTEXT TO MATCH: "${context}"
-
-Analyze these ${
-      posts.length
-    } LinkedIn posts and determine if each relates to the context above.
-
-POSTS:
-${posts
-  .map(
-    (post, i) => `
-POST ${i + 1}:
-"${post.text.substring(0, 400)}${post.text.length > 400 ? "..." : ""}"
-`
-  )
-  .join("")}
-
-For each post, provide:
- Is it relevant to "${context}"? (YES/NO)
- Confidence level (0.0 to 1.0)
- Brief reasoning
-
-Respond in this EXACT format for each post:
-POST 1: YES/NO | 0.X | brief reason
-POST 2: YES/NO | 0.X | brief reason
-POST 3: YES/NO | 0.X | brief reason
-
-Examples:
- For layoff context: "laid off 50 employees" = YES | 0.9 | mentions layoffs
- For hiring context: "we're hiring developers" = YES | 0.8 | job posting
- Unrelated content = NO | 0.1 | not relevant to context`;
-
-    const response = await fetch(`${ollamaHost}/api/generate`, {
-      method: "POST",
-      headers: {
-        "Content-Type": "application/json",
-      },
-      body: JSON.stringify({
-        model: model,
-        prompt: prompt,
-        stream: false,
-        options: {
-          temperature: 0.3,
-          top_p: 0.9,
-        },
-      }),
-    });
-
-    if (!response.ok) {
-      throw new Error(
-        `Ollama API error: ${response.status} ${response.statusText}`
-      );
-    }
-
-    const data = await response.json();
-    const aiResponse = data.response.trim();
-
-    // Parse the response
-    const analyses = [];
-    const lines = aiResponse.split("\n").filter((line) => line.trim());
-
-    for (let i = 0; i < posts.length; i++) {
-      let analysis = {
-        postIndex: i + 1,
-        isRelevant: false,
-        confidence: 0.5,
-        reasoning: "Could not parse AI response",
-      };
-
-      // Look for lines that match "POST X:" pattern
-      const postPattern = new RegExp(`POST\\s*${i + 1}:?\\s*(.+)`, "i");
-
-      for (const line of lines) {
-        const match = line.match(postPattern);
-        if (match) {
-          const content = match[1].trim();
-
-          // Parse: YES/NO | 0.X | reasoning
-          const parts = content.split("|").map((p) => p.trim());
-
-          if (parts.length >= 3) {
-            analysis.isRelevant = parts[0].toUpperCase().includes("YES");
-            analysis.confidence = Math.max(
-              0,
-              Math.min(1, parseFloat(parts[1]) || 0.5)
-            );
-            analysis.reasoning = parts[2] || "No reasoning provided";
-          } else {
-            // Fallback parsing
-            analysis.isRelevant =
-              content.toUpperCase().includes("YES") ||
-              content.toLowerCase().includes("relevant");
-            analysis.confidence = 0.6;
-            analysis.reasoning = content.substring(0, 100);
-          }
-          break;
-        }
-      }
-
-      analyses.push(analysis);
-    }
-
-    // If we didn't get enough analyses, fill in defaults
-    while (analyses.length < posts.length) {
-      analyses.push({
-        postIndex: analyses.length + 1,
-        isRelevant: false,
-        confidence: 0.3,
-        reasoning: "AI response parsing failed",
-      });
-    }
-
-    return analyses;
-  } catch (error) {
-    logger.error(`Error in batch AI analysis: ${error.message}`);
-
-    // Fallback: mark all as relevant with low confidence
-    return posts.map((_, i) => ({
-      postIndex: i + 1,
-      isRelevant: true,
-      confidence: 0.3,
-      reasoning: `Analysis failed: ${error.message}`,
-    }));
-  }
-}
-
-/**
- * Analyze a single post using local Ollama (fallback)
- */
-async function analyzeSinglePost(
-  text,
-  context,
-  model = DEFAULT_MODEL,
-  ollamaHost = "http://localhost:11434"
-) {
-  const prompt = `Analyze this LinkedIn post for relevance to: "${context}"
-
-Post: "${text}"
-
-Is this post relevant to "${context}"? Provide:
-1. YES or NO
-2. Confidence (0.0 to 1.0)  
-3. Brief reason
-
-Format: YES/NO | 0.X | reason`;
-
-  try {
-    const response = await fetch(`${ollamaHost}/api/generate`, {
-      method: "POST",
-      headers: {
-        "Content-Type": "application/json",
-      },
-      body: JSON.stringify({
-        model: model,
-        prompt: prompt,
-        stream: false,
-        options: {
-          temperature: 0.3,
-        },
-      }),
-    });
-
-    if (!response.ok) {
-      throw new Error(`Ollama API error: ${response.status}`);
-    }
-
-    const data = await response.json();
-    const aiResponse = data.response.trim();
-
-    // Parse response
-    const parts = aiResponse.split("|").map((p) => p.trim());
-
-    if (parts.length >= 3) {
-      return {
-        isRelevant: parts[0].toUpperCase().includes("YES"),
-        confidence: Math.max(0, Math.min(1, parseFloat(parts[1]) || 0.5)),
-        reasoning: parts[2],
-      };
-    } else {
-      // Fallback parsing
-      return {
-        isRelevant:
-          aiResponse.toLowerCase().includes("yes") ||
-          aiResponse.toLowerCase().includes("relevant"),
-        confidence: 0.6,
-        reasoning: aiResponse.substring(0, 100),
-      };
-    }
-  } catch (error) {
-    return {
-      isRelevant: true, // Default to include on error
-      confidence: 0.3,
-      reasoning: `Analysis failed: ${error.message}`,
-    };
-  }
-}
-
-/**
- * Find the most recent results file if none specified
- */
-function findLatestResultsFile(resultsDir = "results") {
-  const fs = require("fs");
-  const path = require("path");
-
-  if (!fs.existsSync(resultsDir)) {
-    throw new Error("Results directory not found. Run the scraper first.");
-  }
-
-  const files = fs
-    .readdirSync(resultsDir)
-    .filter(
-      (f) =>
-        (f.startsWith("results-") || f.startsWith("linkedin-results-")) &&
-        f.endsWith(".json") &&
-        !f.includes("-ai-")
-    )
-    .sort()
-    .reverse();
-
-  if (files.length === 0) {
-    throw new Error("No results files found. Run the scraper first.");
-  }
-
-  return path.join(resultsDir, files[0]);
-}
-
-module.exports = {
-  checkOllamaStatus,
-  analyzeBatch,
-  analyzeSinglePost,
-  findLatestResultsFile,
-  DEFAULT_MODEL, // Export so other modules can use it
-};
+const { logger } = require("./logger");
+
+/**
+ * AI Analysis utilities for post processing with Ollama
+ * Extracted from ai-analyzer-local.js for reuse across parsers
+ */
+
+// Default model from environment variable or fallback to "mistral"
+const DEFAULT_MODEL = process.env.OLLAMA_MODEL || "mistral";
+
+/**
+ * Check if Ollama is running and the model is available
+ */
+async function checkOllamaStatus(
+  model = DEFAULT_MODEL,
+  ollamaHost = "http://localhost:11434"
+) {
+  try {
+    // Check if Ollama is running
+    const response = await fetch(`${ollamaHost}/api/tags`);
+    if (!response.ok) {
+      throw new Error(`Ollama not running on ${ollamaHost}`);
+    }
+
+    const data = await response.json();
+    const availableModels = data.models.map((m) => m.name);
+
+    logger.ai("Ollama is running");
+    logger.info(
+      `📦 Available models: ${availableModels
+        .map((m) => m.split(":")[0])
+        .join(", ")}`
+    );
+
+    // Check if requested model is available
+    const modelExists = availableModels.some((m) => m.startsWith(model));
+    if (!modelExists) {
+      logger.error(`Model "${model}" not found`);
+      logger.error(`💡 Install it with: ollama pull ${model}`);
+      logger.error(
+        `💡 Or choose from: ${availableModels
+          .map((m) => m.split(":")[0])
+          .join(", ")}`
+      );
+      return false;
+    }
+
+    logger.success(`Using model: ${model}`);
+    return true;
+  } catch (error) {
+    logger.error(`Error connecting to Ollama: ${error.message}`);
+    logger.error("💡 Make sure Ollama is installed and running:");
+    logger.error("   1. Install: https://ollama.ai/");
+    logger.error("   2. Start: ollama serve");
+    logger.error(`   3. Install model: ollama pull ${model}`);
+    return false;
+  }
+}
+
+/**
+ * Analyze multiple posts using local Ollama
+ */
+async function analyzeBatch(
+  posts,
+  context,
+  model = DEFAULT_MODEL,
+  ollamaHost = "http://localhost:11434"
+) {
+  logger.ai(`Analyzing batch of ${posts.length} posts with ${model}...`);
+
+  try {
+    const prompt = `Analyze ${posts.length} LinkedIn posts for relevance to: "${context}"
+
+POSTS:
+${posts
+  .map(
+    (post, i) => `
+POST ${i + 1}:
+"${post.text.substring(0, 400)}${post.text.length > 400 ? "..." : ""}"
+`
+  )
+  .join("")}
+
+REQUIRED FORMAT - Respond with EXACTLY ${posts.length} lines, one per post:
+POST 1: YES | 0.8 | reason here
+POST 2: NO | 0.2 | reason here
+POST 3: YES | 0.9 | reason here
+
+RULES:
+- Use YES or NO (uppercase)
+- Use pipe character | as separator
+- Confidence must be 0.0 to 1.0 (decimal number)
+- Keep reasoning brief (one sentence)
+- MUST include all ${posts.length} posts in order
+
+Examples:
+POST 1: YES | 0.9 | mentions layoffs and job cuts
+POST 2: NO | 0.1 | unrelated topic about vacation
+POST 3: YES | 0.7 | discusses workforce reduction`;
+
+    // Add timeout to prevent hanging (5 minutes max)
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => controller.abort(), 5 * 60 * 1000); // 5 minutes
+
+    try {
+      const response = await fetch(`${ollamaHost}/api/generate`, {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+        },
+        body: JSON.stringify({
+          model: model,
+          prompt: prompt,
+          stream: false,
+          options: {
+            temperature: 0.3,
+            top_p: 0.9,
+          },
+        }),
+        signal: controller.signal,
+      });
+
+      clearTimeout(timeoutId);
+
+      if (!response.ok) {
+        throw new Error(
+          `Ollama API error: ${response.status} ${response.statusText}`
+        );
+      }
+
+      const data = await response.json();
+      const aiResponse = data.response.trim();
+
+      // Parse the response
+      const analyses = [];
+      const lines = aiResponse.split("\n").filter((line) => line.trim());
+
+      // Log the raw response for debugging
+      logger.debug(`AI Response length: ${aiResponse.length} chars`);
+      if (aiResponse.length > 0) {
+        logger.debug(`AI Response (first 1000 chars):\n${aiResponse.substring(0, 1000)}`);
+      } else {
+        logger.warning("⚠️  AI response is empty!");
+      }
+
+      for (let i = 0; i < posts.length; i++) {
+      let analysis = {
+        postIndex: i + 1,
+        isRelevant: false,
+        confidence: 0.5,
+        reasoning: "Could not parse AI response",
+      };
+
+      // Try multiple patterns to find the post analysis
+      // IMPORTANT: Try numbered patterns first, only use generic pattern as last resort
+      const numberedPatterns = [
+        // Exact format: POST 1: YES | 0.8 | reason
+        new RegExp(`POST\\s*${i + 1}:?\\s*(.+)`, "i"),
+        // Numbered list: 1. YES | 0.8 | reason
+        new RegExp(`^\\s*${i + 1}[.)]\\s*(.+)`, "i"),
+        // Just the number: 1: YES | 0.8 | reason
+        new RegExp(`^\\s*${i + 1}:\\s*(.+)`, "i"),
+      ];
+
+      let found = false;
+      let matchedContent = null;
+
+      // First, try to find a line with the specific post number
+      for (const line of lines) {
+        for (const pattern of numberedPatterns) {
+          const match = line.match(pattern);
+          if (match) {
+            matchedContent = match[1].trim();
+            found = true;
+            break;
+          }
+        }
+        if (found) break;
+      }
+
+      // If not found with numbered patterns, try position-based matching as fallback
+      if (!found && lines.length > i) {
+        const targetLine = lines[i];
+        if (targetLine) {
+          // Try to parse the line even without post number
+          const genericMatch = targetLine.match(/^(?:POST\s*\d+:?\s*)?(.+)$/i);
+          if (genericMatch) {
+            matchedContent = genericMatch[1].trim();
+            found = true;
+          }
+        }
+      }
+
+      if (found && matchedContent) {
+        const content = matchedContent;
+        
+        // Try to parse: YES/NO | 0.X | reasoning
+        let parts = content.split("|").map((p) => p.trim());
+        
+        // If no pipe separator, try other separators
+        if (parts.length < 2) {
+          // Try colon separator: YES: 0.8: reason
+          parts = content.split(":").map((p) => p.trim());
+        }
+        if (parts.length < 2) {
+          // Try dash separator: YES - 0.8 - reason
+          parts = content.split("-").map((p) => p.trim());
+        }
+
+        // Extract YES/NO
+        const relevanceText = parts[0] || content;
+        analysis.isRelevant = 
+          relevanceText.toUpperCase().includes("YES") ||
+          relevanceText.toLowerCase().includes("relevant") ||
+          relevanceText.toLowerCase().includes("yes");
+
+        // Extract confidence (look for number between 0 and 1)
+        if (parts.length >= 2) {
+          const confidenceMatch = parts[1].match(/(0?\.\d+|1\.0|0|1)/);
+          if (confidenceMatch) {
+            analysis.confidence = Math.max(
+              0,
+              Math.min(1, parseFloat(confidenceMatch[1]) || 0.5)
+            );
+          }
+        } else {
+          // Try to find confidence in the whole content
+          const confidenceMatch = content.match(/(0?\.\d+|1\.0|0|1)/);
+          if (confidenceMatch) {
+            analysis.confidence = Math.max(
+              0,
+              Math.min(1, parseFloat(confidenceMatch[1]) || 0.5)
+            );
+          }
+        }
+
+        // Extract reasoning (everything after confidence, or whole content if no structure)
+        if (parts.length >= 3) {
+          analysis.reasoning = parts.slice(2).join(" ").trim() || parts[2] || "No reasoning provided";
+        } else if (parts.length === 2) {
+          // If only 2 parts, second part might be reasoning
+          analysis.reasoning = parts[1].substring(0, 200);
+        } else {
+          // Use the whole content as reasoning, but remove YES/NO and confidence
+          let reasoning = content
+            .replace(/YES|NO/gi, "")
+            .replace(/0?\.\d+|1\.0/g, "")
+            .replace(/\|/g, "")
+            .trim();
+          analysis.reasoning = reasoning || "Analysis provided but format unclear";
+        }
+      }
+
+      // If still not found, try to extract from the entire response by position
+      if (!found && lines.length > 0) {
+        // Try to get the line at position i (allowing for some variance)
+        const targetLine = lines[Math.min(i, lines.length - 1)];
+        if (targetLine) {
+          // Extract any YES/NO indication
+          analysis.isRelevant = 
+            targetLine.toUpperCase().includes("YES") ||
+            targetLine.toLowerCase().includes("relevant");
+          
+          // Extract confidence
+          const confidenceMatch = targetLine.match(/(0?\.\d+|1\.0|0|1)/);
+          if (confidenceMatch) {
+            analysis.confidence = Math.max(
+              0,
+              Math.min(1, parseFloat(confidenceMatch[1]) || 0.5)
+            );
+          }
+          
+          // Use the line as reasoning
+          analysis.reasoning = targetLine.substring(0, 200).trim() || "Parsed from unstructured response";
+          found = true;
+        }
+      }
+
+      // Last resort: if still not found, try to extract from the entire response text
+      if (!found && aiResponse.length > 0) {
+        // Look for any mention of relevance in the response
+        const responseLower = aiResponse.toLowerCase();
+        const hasRelevant = responseLower.includes("relevant") || responseLower.includes("yes");
+        analysis.isRelevant = hasRelevant;
+        
+        // Try to find any confidence number
+        const allConfidenceMatches = aiResponse.match(/(0?\.\d+|1\.0|0|1)/g);
+        if (allConfidenceMatches && allConfidenceMatches.length > i) {
+          analysis.confidence = Math.max(
+            0,
+            Math.min(1, parseFloat(allConfidenceMatches[i]) || 0.5)
+          );
+        }
+        
+        // Use a portion of the response as reasoning
+        const responseSnippet = aiResponse.substring(i * 100, (i + 1) * 200).trim();
+        analysis.reasoning = responseSnippet || "Could not parse structured response, using fallback";
+        
+        logger.warning(`⚠️  Post ${i + 1}: Using fallback parsing - AI response format unclear`);
+      }
+
+        analyses.push(analysis);
+      }
+
+      // If we didn't get enough analyses, fill in defaults
+      while (analyses.length < posts.length) {
+        analyses.push({
+          postIndex: analyses.length + 1,
+          isRelevant: false,
+          confidence: 0.3,
+          reasoning: "AI response parsing failed",
+        });
+      }
+
+      return analyses;
+    } catch (error) {
+      clearTimeout(timeoutId);
+      if (error.name === 'AbortError') {
+        throw new Error('Request timeout: AI analysis took longer than 5 minutes');
+      }
+      throw error;
+    }
+  } catch (error) {
+    logger.error(`Error in batch AI analysis: ${error.message}`);
+
+    // Fallback: mark all as relevant with low confidence
+    return posts.map((_, i) => ({
+      postIndex: i + 1,
+      isRelevant: true,
+      confidence: 0.3,
+      reasoning: `Analysis failed: ${error.message}`,
+    }));
+  }
+}
+
+/**
+ * Analyze a single post using local Ollama (fallback)
+ */
+async function analyzeSinglePost(
+  text,
+  context,
+  model = DEFAULT_MODEL,
+  ollamaHost = "http://localhost:11434"
+) {
+  const prompt = `Analyze this LinkedIn post for relevance to: "${context}"
+
+Post: "${text}"
+
+Is this post relevant to "${context}"? Provide:
+1. YES or NO
+2. Confidence (0.0 to 1.0)  
+3. Brief reason
+
+Format: YES/NO | 0.X | reason`;
+
+  try {
+    const response = await fetch(`${ollamaHost}/api/generate`, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify({
+        model: model,
+        prompt: prompt,
+        stream: false,
+        options: {
+          temperature: 0.3,
+        },
+      }),
+    });
+
+    if (!response.ok) {
+      throw new Error(`Ollama API error: ${response.status}`);
+    }
+
+    const data = await response.json();
+    const aiResponse = data.response.trim();
+
+    // Parse response
+    const parts = aiResponse.split("|").map((p) => p.trim());
+
+    if (parts.length >= 3) {
+      return {
+        isRelevant: parts[0].toUpperCase().includes("YES"),
+        confidence: Math.max(0, Math.min(1, parseFloat(parts[1]) || 0.5)),
+        reasoning: parts[2],
+      };
+    } else {
+      // Fallback parsing
+      return {
+        isRelevant:
+          aiResponse.toLowerCase().includes("yes") ||
+          aiResponse.toLowerCase().includes("relevant"),
+        confidence: 0.6,
+        reasoning: aiResponse.substring(0, 100),
+      };
+    }
+  } catch (error) {
+    return {
+      isRelevant: true, // Default to include on error
+      confidence: 0.3,
+      reasoning: `Analysis failed: ${error.message}`,
+    };
+  }
+}
+
+/**
+ * Find the most recent results file if none specified
+ */
+function findLatestResultsFile(resultsDir = "results") {
+  const fs = require("fs");
+  const path = require("path");
+
+  if (!fs.existsSync(resultsDir)) {
+    throw new Error("Results directory not found. Run the scraper first.");
+  }
+
+  const files = fs
+    .readdirSync(resultsDir)
+    .filter(
+      (f) =>
+        (f.startsWith("results-") || f.startsWith("linkedin-results-")) &&
+        f.endsWith(".json") &&
+        !f.includes("-ai-")
+    )
+    .sort()
+    .reverse();
+
+  if (files.length === 0) {
+    throw new Error("No results files found. Run the scraper first.");
+  }
+
+  return path.join(resultsDir, files[0]);
+}
+
+module.exports = {
+  checkOllamaStatus,
+  analyzeBatch,
+  analyzeSinglePost,
+  findLatestResultsFile,
+  DEFAULT_MODEL, // Export so other modules can use it
+};
--- a/linkedin-parser/index.js
+++ b/linkedin-parser/index.js
@ -31,7 +31,7 @@ const LINKEDIN_USERNAME = process.env.LINKEDIN_USERNAME;
 const LINKEDIN_PASSWORD = process.env.LINKEDIN_PASSWORD;
 const HEADLESS = process.env.HEADLESS !== "false";
 const SEARCH_KEYWORDS =
-  process.env.SEARCH_KEYWORDS || "layoff,downsizing,job cuts";
+  process.env.SEARCH_KEYWORDS || "layoff,downsizing";//,job cuts";
 const LOCATION_FILTER = process.env.LOCATION_FILTER;
 const ENABLE_AI_ANALYSIS = process.env.ENABLE_AI_ANALYSIS !== "false";
 const AI_CONTEXT = process.env.AI_CONTEXT || "job market analysis and trends";
--- a/linkedin-parser/strategies/linkedin-strategy.js
+++ b/linkedin-parser/strategies/linkedin-strategy.js
@ -10,6 +10,7 @@ const {
  containsAnyKeyword,
  validateLocationAgainstFilters,
  extractLocationFromProfile,
+  parseLocationFilters,
 } = require("ai-analyzer");

 /**
@ -48,28 +49,44 @@ async function linkedinStrategy(coreParser, options = {}) {
      await coreParser.navigateTo(searchUrl, {
        pageId: "linkedin-main",
        retries: 2,
+        waitUntil: "networkidle", // Wait for network to be idle
      });

-      // Wait for page to load - use delay utility instead of waitForTimeout
-      await new Promise(resolve => setTimeout(resolve, 3000)); // Give LinkedIn time to render
+      // Wait for page to load and content to render
+      await new Promise(resolve => setTimeout(resolve, 5000)); // Give LinkedIn time to render dynamic content
+
+      // Scroll down a bit to trigger lazy loading
+      try {
+        await page.evaluate(() => {
+          window.scrollTo(0, 500);
+        });
+        await new Promise(resolve => setTimeout(resolve, 2000));
+      } catch (e) {
+        logger.debug(`Could not scroll page: ${e.message}`);
+      }

      // Wait for search results - try multiple selectors
      let hasResults = false;
      const possibleSelectors = [
+        ".feed-shared-update-v2",
+        "article[data-urn*='urn:li:activity']",
+        "article",
        ".search-results-container",
        ".search-results__list",
        ".reusable-search__result-container",
        "[data-test-id='search-results']",
-        ".feed-shared-update-v2",
-        "article",
      ];

      for (const selector of possibleSelectors) {
        try {
-          await page.waitForSelector(selector, { timeout: 5000 });
-          hasResults = true;
-          logger.info(`✅ Found results container with selector: ${selector}`);
-          break;
+          await page.waitForSelector(selector, { timeout: 10000 });
+          // Verify we actually have post elements
+          const count = await page.$$(selector).then(elements => elements.length);
+          if (count > 0) {
+            hasResults = true;
+            logger.info(`✅ Found ${count} post elements with selector: ${selector}`);
+            break;
+          }
        } catch (e) {
          // Try next selector
        }
@ -100,20 +117,24 @@ async function linkedinStrategy(coreParser, options = {}) {
        // Validate location if filtering enabled
        if (locationFilter) {
          const postLocation = post.location || post.profileLocation || "";
+          // Parse locationFilter string into array if it's a string
+          const locationFiltersArray = typeof locationFilter === 'string' 
+            ? parseLocationFilters(locationFilter) 
+            : locationFilter;
          const locationValid = validateLocationAgainstFilters(
            postLocation,
-            locationFilter
+            locationFiltersArray
          );

-          if (!locationValid) {
+          if (!locationValid.isValid) {
            logger.debug(`⏭️  Post rejected: location "${postLocation}" doesn't match filter "${locationFilter}"`);
            rejectedResults.push({
              ...post,
-              rejectionReason: `Location filter mismatch: "${postLocation}" not in "${locationFilter}"`,
+              rejectionReason: locationValid.reasoning || `Location filter mismatch: "${postLocation}" not in "${locationFilter}"`,
            });
            continue;
          } else {
-            logger.debug(`✅ Post location "${postLocation}" matches filter "${locationFilter}"`);
+            logger.debug(`✅ Post location "${postLocation}" matches filter "${locationFilter}" (${locationValid.reasoning || 'matched'})`);
          }
        }

@ -156,9 +177,12 @@ async function extractPostsFromPage(page, keyword) {

  try {
    // Try multiple selectors for post elements (LinkedIn changes these frequently)
+    // Prioritize selectors that are more specific to actual posts
    const postSelectors = [
-      ".feed-shared-update-v2",
+      "article[data-urn*='urn:li:activity']", // Most specific - posts with activity ID
+      ".feed-shared-update-v2[data-urn*='urn:li:activity']",
      "article.feed-shared-update-v2",
+      ".feed-shared-update-v2",
      "[data-urn*='urn:li:activity']",
      ".reusable-search__result-container",
      ".search-result__wrapper",
@ -170,11 +194,30 @@ async function extractPostsFromPage(page, keyword) {

    for (const selector of postSelectors) {
      try {
+        // Wait a bit for elements to be available
+        await page.waitForSelector(selector, { timeout: 3000 }).catch(() => {});
        postElements = await page.$$(selector);
+        
+        // Filter to only elements that have a data-urn attribute (actual posts)
        if (postElements.length > 0) {
-          usedSelector = selector;
-          logger.info(`✅ Found ${postElements.length} post elements using selector: ${selector}`);
-          break;
+          const validElements = [];
+          for (const elem of postElements) {
+            try {
+              const dataUrn = await elem.getAttribute("data-urn");
+              if (dataUrn && dataUrn.includes("urn:li:activity")) {
+                validElements.push(elem);
+              }
+            } catch (e) {
+              // Element might have been detached, skip it
+            }
+          }
+          
+          if (validElements.length > 0) {
+            postElements = validElements;
+            usedSelector = selector;
+            logger.info(`✅ Found ${postElements.length} valid post elements using selector: ${selector}`);
+            break;
+          }
        }
      } catch (e) {
        // Try next selector
@ -199,10 +242,22 @@ async function extractPostsFromPage(page, keyword) {

    for (let i = 0; i < postElements.length; i++) {
      try {
+        // Scroll element into view to ensure it's fully rendered
+        try {
+          await postElements[i].evaluate((el) => {
+            el.scrollIntoView({ behavior: 'smooth', block: 'center' });
+          });
+          await new Promise(resolve => setTimeout(resolve, 500)); // Small delay for rendering
+        } catch (e) {
+          // Element might already be in view or detached, continue anyway
+        }
+
        const post = await extractPostData(postElements[i], keyword);
        if (post) {
          posts.push(post);
-          logger.debug(`✅ Extracted post ${i + 1}/${postElements.length}: ${post.postId.substring(0, 20)}...`);
+          const hasContent = post.content && post.content.length > 0;
+          const hasAuthor = post.authorName && post.authorName.length > 0;
+          logger.debug(`✅ Extracted post ${i + 1}/${postElements.length}: ${post.postId.substring(0, 20)}... (content: ${hasContent ? 'yes' : 'no'}, author: ${hasAuthor ? 'yes' : 'no'})`);
        } else {
          logger.debug(`⏭️  Post ${i + 1}/${postElements.length} filtered out (no keyword match or missing data)`);
        }
@ -222,131 +277,524 @@ async function extractPostsFromPage(page, keyword) {

 /**
 * Extract data from individual post element
+ * Uses evaluate() to extract data directly from DOM for better reliability
 */
 async function extractPostData(postElement, keyword) {
  try {
-    // Extract post ID
-    const postId = (await postElement.getAttribute("data-urn")) || "";
+    // Use evaluate to extract data directly from the DOM element
+    // This is more reliable than using selectors which may not match
+    const postData = await postElement.evaluate((el, keyword) => {
+      const data = {
+        postId: "",
+        authorName: "",
+        authorUrl: "",
+        content: "",
+        timestamp: "",
+        location: "",
+        likes: 0,
+        comments: 0,
+      };

-    // Extract author info
-    const authorElement = await postElement.$(".feed-shared-actor__name");
-    const authorName = authorElement
-      ? cleanText(await authorElement.textContent())
-      : "";
+      // Extract post ID from data-urn attribute
+      data.postId = el.getAttribute("data-urn") || 
+                    el.getAttribute("data-activity-id") ||
+                    el.querySelector("[data-urn]")?.getAttribute("data-urn") || "";

-    const authorLinkElement = await postElement.$(".feed-shared-actor__name a");
-    const authorUrl = authorLinkElement
-      ? await authorLinkElement.getAttribute("href")
-      : "";
+      // Extract author name - try multiple selectors and approaches
+      const authorSelectors = [
+        ".feed-shared-actor__name",
+        ".feed-shared-actor__name-link",
+        ".update-components-actor__name",
+        ".feed-shared-actor__name a",
+        "[data-test-id='actor-name']",
+        "span[aria-label*='name']",
+        "a[href*='/in/'] span",
+        ".feed-shared-actor a span",
+        ".feed-shared-actor span",
+        ".feed-shared-actor__name-link span",
+      ];

-    // Extract post content
-    const contentElement = await postElement.$(".feed-shared-text");
-    const content = contentElement
-      ? cleanText(await contentElement.textContent())
-      : "";
-
-    // Extract timestamp
-    const timeElement = await postElement.$(
-      ".feed-shared-actor__sub-description time"
-    );
-    const timestamp = timeElement
-      ? await timeElement.getAttribute("datetime")
-      : "";
-
-    // Extract location from profile (try multiple selectors)
-    let location = "";
-    const locationSelectors = [
-      ".feed-shared-actor__sub-description .feed-shared-actor__sub-description-link",
-      ".feed-shared-actor__sub-description .feed-shared-actor__sub-description-link--without-hover",
-      ".feed-shared-actor__sub-description span[aria-label*='location']",
-      ".feed-shared-actor__sub-description span[aria-label*='Location']",
-    ];
-
-    for (const selector of locationSelectors) {
-      try {
-        const locationElement = await postElement.$(selector);
-        if (locationElement) {
-          const locationText = await locationElement.textContent();
-          if (locationText && locationText.trim()) {
-            location = cleanText(locationText);
+      for (const selector of authorSelectors) {
+        const elem = el.querySelector(selector);
+        if (elem) {
+          const text = elem.textContent?.trim() || elem.innerText?.trim();
+          if (text && text.length > 0 && text.length < 100) { // Reasonable name length
+            data.authorName = text;
+            // Try to get link from same element or parent
+            const link = elem.closest("a") || elem.querySelector("a");
+            if (link) {
+              data.authorUrl = link.getAttribute("href") || "";
+            }
            break;
          }
        }
-      } catch (e) {
-        // Try next selector
      }
-    }

-    // If no location found in sub-description, try to extract from author link hover or profile
-    if (!location) {
-      try {
-        // Try to get location from data attributes or other sources
-        const subDescElement = await postElement.$(".feed-shared-actor__sub-description");
-        if (subDescElement) {
-          const subDescText = await subDescElement.textContent();
-          // Look for location patterns (City, Province/State, Country)
-          const locationMatch = subDescText.match(/([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*),\s*([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)(?:,\s*([A-Z][a-z]+))?/);
-          if (locationMatch) {
-            location = cleanText(locationMatch[0]);
+      // If author name found but no URL, try to find link separately
+      if (data.authorName && !data.authorUrl) {
+        const authorLink = el.querySelector(".feed-shared-actor__name-link, .feed-shared-actor__name a, a[href*='/in/']");
+        if (authorLink) {
+          data.authorUrl = authorLink.getAttribute("href") || "";
+        }
+      }
+
+      // Fallback: Look for any link with /in/ pattern and get the name from nearby text
+      if (!data.authorName) {
+        const profileLinks = el.querySelectorAll("a[href*='/in/']");
+        for (const link of profileLinks) {
+          // Skip if it's a company link
+          if (link.getAttribute("href")?.includes("/company/")) continue;
+          
+          // Get text from the link or nearby
+          const linkText = link.textContent?.trim() || link.innerText?.trim();
+          if (linkText && linkText.length > 0 && linkText.length < 100 && !linkText.includes("View")) {
+            data.authorName = linkText;
+            data.authorUrl = link.getAttribute("href") || "";
+            break;
+          }
+          // Try to get text from first child span
+          const childSpan = link.querySelector("span");
+          if (childSpan) {
+            const spanText = childSpan.textContent?.trim() || childSpan.innerText?.trim();
+            if (spanText && spanText.length > 0 && spanText.length < 100) {
+              data.authorName = spanText;
+              data.authorUrl = link.getAttribute("href") || "";
+              break;
+            }
+          }
+          // Try to get text from parent
+          const parentText = link.parentElement?.textContent?.trim();
+          if (parentText && parentText.length < 100 && !parentText.includes("View")) {
+            // Extract just the name part (first line or first few words)
+            const namePart = parentText.split("\n")[0].split("·")[0].trim();
+            if (namePart.length > 0 && namePart.length < 100) {
+              data.authorName = namePart;
+              data.authorUrl = link.getAttribute("href") || "";
+              break;
+            }
          }
        }
-      } catch (e) {
-        // Location extraction failed, continue without it
      }
+
+      // Last resort: Extract from actor section by looking at all text
+      if (!data.authorName) {
+        const actorSection = el.querySelector(".feed-shared-actor, .update-components-actor, [class*='actor']");
+        if (actorSection) {
+          const actorText = actorSection.textContent || actorSection.innerText || "";
+          const lines = actorText.split("\n").map(l => l.trim()).filter(l => l.length > 0);
+          // First non-empty line is often the name
+          for (const line of lines) {
+            if (line.length > 0 && line.length < 100 && 
+                !line.includes("·") && 
+                !line.includes("ago") && 
+                !line.match(/^\d+/) &&
+                !line.toLowerCase().includes("view")) {
+              data.authorName = line;
+              // Try to find associated link
+              const link = actorSection.querySelector("a[href*='/in/']");
+              if (link) {
+                data.authorUrl = link.getAttribute("href") || "";
+              }
+              break;
+            }
+          }
+        }
+      }
+
+      // Extract post content - try multiple selectors
+      const contentSelectors = [
+        ".feed-shared-text",
+        ".feed-shared-text__text-view",
+        ".feed-shared-update-v2__description",
+        ".update-components-text",
+        "[data-test-id='post-text']",
+        ".feed-shared-text span",
+        ".feed-shared-update-v2__description-wrapper",
+      ];
+
+      for (const selector of contentSelectors) {
+        const elem = el.querySelector(selector);
+        if (elem) {
+          const text = elem.textContent?.trim() || elem.innerText?.trim();
+          if (text && text.length > 10) { // Only use if substantial content
+            data.content = text;
+            break;
+          }
+        }
+      }
+
+      // Extract timestamp
+      const timeSelectors = [
+        ".feed-shared-actor__sub-description time",
+        "time[datetime]",
+        "[data-test-id='timestamp']",
+        ".feed-shared-actor__sub-description time[datetime]",
+        "time",
+        ".feed-shared-actor__sub-description time",
+        "span[aria-label*='time']",
+        "span[aria-label*='ago']",
+      ];
+
+      for (const selector of timeSelectors) {
+        const elem = el.querySelector(selector);
+        if (elem) {
+          data.timestamp = elem.getAttribute("datetime") || 
+                          elem.getAttribute("title") ||
+                          elem.getAttribute("aria-label") ||
+                          elem.textContent?.trim() || "";
+          if (data.timestamp) break;
+        }
+      }
+
+      // Fallback: Look for time-like patterns in sub-description
+      if (!data.timestamp) {
+        const subDesc = el.querySelector(".feed-shared-actor__sub-description");
+        if (subDesc) {
+          const subDescText = subDesc.textContent || subDesc.innerText || "";
+          // Look for patterns like "2h", "3d", "1w", "2 months ago", etc.
+          const timePatterns = [
+            /\d+\s*(minute|hour|day|week|month|year)s?\s*ago/i,
+            /\d+\s*(h|d|w|mo|yr)/i,
+            /(just now|today|yesterday)/i,
+          ];
+          for (const pattern of timePatterns) {
+            const match = subDescText.match(pattern);
+            if (match) {
+              data.timestamp = match[0];
+              break;
+            }
+          }
+        }
+      }
+
+      // Extract location - try multiple approaches
+      const locationSelectors = [
+        ".feed-shared-actor__sub-description .feed-shared-actor__sub-description-link",
+        ".feed-shared-actor__sub-description-link--without-hover",
+        "span[aria-label*='location' i]",
+        "span[aria-label*='Location']",
+        ".feed-shared-actor__sub-description span",
+        ".feed-shared-actor__sub-description a",
+        "a[href*='/company/']",
+        "a[href*='/location/']",
+      ];
+
+      for (const selector of locationSelectors) {
+        const elem = el.querySelector(selector);
+        if (elem) {
+          const text = elem.textContent?.trim() || elem.getAttribute("aria-label") || elem.innerText?.trim() || "";
+          // Check if it looks like a location (contains comma or common location words)
+          if (text && text.length > 2 && text.length < 100) {
+            // More flexible location detection
+            if (text.includes(",") || 
+                /(city|province|state|country|region|ontario|alberta|british columbia|quebec|manitoba|saskatchewan|nova scotia|new brunswick|newfoundland|prince edward island|yukon|northwest territories|nunavut)/i.test(text) ||
+                /^[A-Z][a-z]+,\s*[A-Z][a-z]+/i.test(text)) {
+              data.location = text;
+              break;
+            }
+          }
+        }
+      }
+
+      // If no location found, try parsing from sub-description text
+      if (!data.location) {
+        const subDesc = el.querySelector(".feed-shared-actor__sub-description");
+        if (subDesc) {
+          const subDescText = subDesc.textContent || subDesc.innerText || "";
+          
+          // First, try to get all links in sub-description (location is often a link)
+          const subDescLinks = subDesc.querySelectorAll("a");
+          for (const link of subDescLinks) {
+            const linkText = link.textContent?.trim() || link.innerText?.trim() || "";
+            const linkHref = link.getAttribute("href") || "";
+            
+            // Skip if it's a time/date link or company link
+            if (linkHref.includes("/company/") || linkText.match(/\d+\s*(minute|hour|day|week|month|year|h|d|w)/i)) {
+              continue;
+            }
+            
+            // If link text looks like a location
+            if (linkText && linkText.length > 2 && linkText.length < 100) {
+              if (linkText.includes(",") || 
+                  /(ontario|alberta|british columbia|quebec|manitoba|saskatchewan|nova scotia|new brunswick|newfoundland|prince edward island|yukon|northwest territories|nunavut|toronto|vancouver|calgary|ottawa|montreal|winnipeg|edmonton|halifax|victoria|regina|saskatoon|windsor|kitchener|hamilton|london|st\.?\s*catharines|oshawa|barrie|greater sudbury|sherbrooke|kelowna|abbotsford|trois-rivières|guelph|cambridge|coquitlam|saanich|saint john|thunder bay|waterloo|delta|chatham|red deer|kamloops|brantford|whitehorse|yellowknife|iqaluit)/i.test(linkText)) {
+                data.location = linkText;
+                break;
+              }
+            }
+          }
+          
+          // If still no location, try pattern matching on the full text
+          if (!data.location && subDescText) {
+            // Look for location patterns (City, Province/State, Country)
+            const locationPatterns = [
+              // Full location: "City, Province, Country"
+              /([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*),\s*([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)(?:,\s*([A-Z][a-z]+))?/,
+              // City, Province
+              /([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\s*,\s*([A-Z][a-z]+)/,
+              // Just province/state names
+              /\b(Ontario|Alberta|British Columbia|Quebec|Manitoba|Saskatchewan|Nova Scotia|New Brunswick|Newfoundland|Prince Edward Island|Yukon|Northwest Territories|Nunavut|ON|AB|BC|QC|MB|SK|NS|NB|NL|PE|YT|NT|NU)\b/i,
+              // Major cities
+              /\b(Toronto|Vancouver|Calgary|Ottawa|Montreal|Winnipeg|Edmonton|Halifax|Victoria|Regina|Saskatoon)\b/i,
+            ];
+            
+            for (const pattern of locationPatterns) {
+              const match = subDescText.match(pattern);
+              if (match) {
+                // Get more context around the match
+                const matchIndex = subDescText.indexOf(match[0]);
+                const contextStart = Math.max(0, matchIndex - 30);
+                const contextEnd = Math.min(subDescText.length, matchIndex + match[0].length + 30);
+                const context = subDescText.substring(contextStart, contextEnd).trim();
+                
+                // Extract just the location part (remove time/date info)
+                let locationText = match[0].trim();
+                // If we have more context, try to get a better location string
+                if (context.includes(",") && context.length < 100) {
+                  // Try to extract "City, Province" pattern from context
+                  const cityProvinceMatch = context.match(/([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*),\s*([A-Z][a-z]+)/);
+                  if (cityProvinceMatch) {
+                    locationText = cityProvinceMatch[0].trim();
+                  }
+                }
+                
+                data.location = locationText;
+                break;
+              }
+            }
+          }
+          
+          // Last resort: extract any text that looks location-like from sub-description
+          if (!data.location && subDescText) {
+            // Split by common separators and look for location-like text
+            const parts = subDescText.split(/[·•|]/).map(p => p.trim()).filter(p => p.length > 0);
+            for (const part of parts) {
+              // Skip if it looks like time/date
+              if (part.match(/\d+\s*(minute|hour|day|week|month|year|h|d|w|ago)/i)) {
+                continue;
+              }
+              // Check if it looks like a location
+              if (part.length > 2 && part.length < 100 && 
+                  (part.includes(",") || 
+                   /(ontario|alberta|british columbia|quebec|manitoba|toronto|vancouver|calgary|ottawa|montreal)/i.test(part))) {
+                data.location = part;
+                break;
+              }
+            }
+          }
+        }
+      }
+      
+      // Final fallback: look anywhere in the actor section for location-like text
+      if (!data.location) {
+        const actorSection = el.querySelector(".feed-shared-actor, .update-components-actor");
+        if (actorSection) {
+          const actorText = actorSection.textContent || actorSection.innerText || "";
+          // Look for province names
+          const provinceMatch = actorText.match(/\b(Ontario|Alberta|British Columbia|Quebec|Manitoba|Saskatchewan|Nova Scotia|New Brunswick|Newfoundland|Prince Edward Island|Yukon|Northwest Territories|Nunavut)\b/i);
+          if (provinceMatch) {
+            // Try to get city, province if available
+            const cityProvinceMatch = actorText.match(/([A-Z][a-z]+),\s*(Ontario|Alberta|British Columbia|Quebec|Manitoba|Saskatchewan|Nova Scotia|New Brunswick|Newfoundland|Prince Edward Island|Yukon|Northwest Territories|Nunavut)/i);
+            if (cityProvinceMatch) {
+              data.location = cityProvinceMatch[0].trim();
+            } else {
+              data.location = provinceMatch[0].trim();
+            }
+          }
+        }
+      }
+      
+      // Try to extract from any hover cards or mini profiles in the DOM
+      if (!data.location) {
+        // Look for mini profile cards or tooltips
+        const miniProfileSelectors = [
+          "[data-control-name='hovercard']",
+          ".artdeco-hoverable-trigger",
+          ".feed-shared-actor__meta",
+          ".pv-text-details__left-panel",
+        ];
+        
+        for (const selector of miniProfileSelectors) {
+          const elem = el.querySelector(selector);
+          if (elem) {
+            const text = elem.textContent || elem.innerText || "";
+            // Look for location patterns
+            const locationMatch = text.match(/([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*),\s*(Ontario|Alberta|British Columbia|Quebec|Manitoba|Saskatchewan|Nova Scotia|New Brunswick|Newfoundland|Prince Edward Island|Yukon|Northwest Territories|Nunavut)/i);
+            if (locationMatch) {
+              data.location = locationMatch[0].trim();
+              break;
+            }
+          }
+        }
+      }
+
+      // Extract engagement metrics - try multiple approaches
+      const likesSelectors = [
+        ".social-counts-reactions__count",
+        "[data-test-id='reactions-count']",
+        ".social-counts__reactions-count",
+        ".feed-shared-social-action-bar__reactions-count",
+        "button[aria-label*='reaction']",
+        "button[aria-label*='like']",
+        ".social-actions-button__reactions-count",
+        "[data-test-id='social-actions__reactions-count']",
+      ];
+
+      for (const selector of likesSelectors) {
+        const elem = el.querySelector(selector);
+        if (elem) {
+          const text = elem.textContent?.trim() || elem.getAttribute("aria-label") || "";
+          const match = text.match(/(\d+)/);
+          if (match) {
+            data.likes = parseInt(match[1], 10) || 0;
+            break;
+          }
+        }
+      }
+
+      // Fallback: Look for any button or element with reaction/like text
+      if (data.likes === 0) {
+        const allButtons = el.querySelectorAll("button, span, div");
+        for (const btn of allButtons) {
+          const text = btn.textContent?.trim() || btn.getAttribute("aria-label") || "";
+          if (/reaction|like/i.test(text)) {
+            const match = text.match(/(\d+)/);
+            if (match) {
+              data.likes = parseInt(match[1], 10) || 0;
+              break;
+            }
+          }
+        }
+      }
+
+      const commentsSelectors = [
+        ".social-counts-comments__count",
+        "[data-test-id='comments-count']",
+        ".social-counts__comments-count",
+        ".feed-shared-social-action-bar__comments-count",
+        "button[aria-label*='comment']",
+        ".social-actions-button__comments-count",
+        "[data-test-id='social-actions__comments-count']",
+      ];
+
+      for (const selector of commentsSelectors) {
+        const elem = el.querySelector(selector);
+        if (elem) {
+          const text = elem.textContent?.trim() || elem.getAttribute("aria-label") || "";
+          const match = text.match(/(\d+)/);
+          if (match) {
+            data.comments = parseInt(match[1], 10) || 0;
+            break;
+          }
+        }
+      }
+
+      // Fallback: Look for any button or element with comment text
+      if (data.comments === 0) {
+        const allButtons = el.querySelectorAll("button, span, div");
+        for (const btn of allButtons) {
+          const text = btn.textContent?.trim() || btn.getAttribute("aria-label") || "";
+          if (/comment/i.test(text)) {
+            const match = text.match(/(\d+)/);
+            if (match) {
+              data.comments = parseInt(match[1], 10) || 0;
+              break;
+            }
+          }
+        }
+      }
+
+      return data;
+    }, keyword);
+
+    // Clean and format the extracted data
+    const authorName = cleanText(postData.authorName);
+    let authorUrl = postData.authorUrl || "";
+    if (authorUrl && !authorUrl.startsWith("http")) {
+      authorUrl = `https://www.linkedin.com${authorUrl}`;
    }

-    // Extract engagement metrics
-    const likesElement = await postElement.$(".social-counts-reactions__count");
-    const likesText = likesElement
-      ? cleanText(await likesElement.textContent())
-      : "0";
-
-    const commentsElement = await postElement.$(
-      ".social-counts-comments__count"
-    );
-    const commentsText = commentsElement
-      ? cleanText(await commentsElement.textContent())
-      : "0";
-
-    // Note: LinkedIn search already filters by keyword semantically
-    // We don't filter by content keyword match because:
-    // 1. LinkedIn's search is semantic - it finds related posts, not just exact matches
-    // 2. The keyword might be in comments, hashtags, or metadata, not visible text
-    // 3. Posts might be about the topic without using the exact keyword
-    // 
-    // Optional: Log if keyword appears in content (for debugging, but don't filter)
-    const keywordLower = keyword.toLowerCase();
-    const contentLower = content.toLowerCase();
-    const hasKeywordInContent = contentLower.includes(keywordLower);
-    if (!hasKeywordInContent && content.length > 50) {
-      logger.debug(`ℹ️  Post doesn't contain keyword "${keyword}" in visible content, but including it (LinkedIn search matched it)`);
-    }
+    const content = cleanText(postData.content);
+    const location = cleanText(postData.location);
+    const timestamp = postData.timestamp || "";

    // Validate we have minimum required data
-    if (!postId && !content) {
+    if (!postData.postId && !content) {
      logger.debug(`⏭️  Post filtered: missing both postId and content`);
      return null;
    }

+    // Log extraction results for debugging
+    const missingFields = [];
+    if (!authorName) missingFields.push("authorName");
+    if (!authorUrl) missingFields.push("authorUrl");
+    if (!location) missingFields.push("location");
+    if (!timestamp) missingFields.push("timestamp");
+    if (postData.likes === 0 && postData.comments === 0) missingFields.push("engagement");
+
+    if (missingFields.length > 0 && postData.postId) {
+      logger.debug(`⚠️  Post ${postData.postId.substring(0, 20)}... missing: ${missingFields.join(", ")}`);
+      
+      // If location is missing, log sub-description content for debugging
+      if (!location && process.env.DEBUG_EXTRACTION === "true") {
+        try {
+          const subDescInfo = await postElement.evaluate((el) => {
+            const subDesc = el.querySelector(".feed-shared-actor__sub-description");
+            if (subDesc) {
+              return {
+                text: subDesc.textContent || subDesc.innerText || "",
+                html: subDesc.innerHTML.substring(0, 500),
+                links: Array.from(subDesc.querySelectorAll("a")).map(a => ({
+                  text: a.textContent?.trim(),
+                  href: a.getAttribute("href")
+                }))
+              };
+            }
+            return null;
+          });
+          if (subDescInfo) {
+            logger.debug(`Sub-description text: "${subDescInfo.text}"`);
+            logger.debug(`Sub-description links: ${JSON.stringify(subDescInfo.links)}`);
+          }
+        } catch (e) {
+          // Ignore errors in debugging
+        }
+      }
+      
+      // Optionally log HTML structure for first failed extraction (to help debug)
+      if (process.env.DEBUG_EXTRACTION === "true" && missingFields.length >= 3) {
+        try {
+          const htmlSnippet = await postElement.evaluate((el) => {
+            // Get the outer HTML of the element (limited to first 2000 chars)
+            const html = el.outerHTML || "";
+            return html.substring(0, 2000);
+          });
+          logger.debug(`HTML structure (first 2000 chars):\n${htmlSnippet}`);
+        } catch (e) {
+          // Ignore errors in debugging
+        }
+      }
+    }
+
    return {
-      postId: cleanText(postId),
+      postId: cleanText(postData.postId),
      authorName,
      authorUrl,
-      profileLink: authorUrl ? (authorUrl.startsWith("http") ? authorUrl : `https://www.linkedin.com${authorUrl}`) : "",
+      profileLink: authorUrl,
      text: content,
      content: content,
      location: location,
      profileLocation: location, // Alias for compatibility
      timestamp,
      keyword,
-      likes: extractNumber(likesText),
-      comments: extractNumber(commentsText),
+      likes: postData.likes || 0,
+      comments: postData.comments || 0,
      extractedAt: new Date().toISOString(),
      source: "linkedin",
      parser: "linkedout-parser",
    };
  } catch (error) {
    logger.warning(`Error extracting post data: ${error.message}`);
+    logger.debug(`Stack trace: ${error.stack}`);
    return null;
  }
 }