18 changed files with 1931 additions and 13699 deletions
--- a/.gitignore
+++ b/.gitignore
@ -8,10 +8,3 @@ zip*
 *.7z
 *obfuscated.js
 .history
 # Debug files
 debug-*.js
 debug-*.png
 *.png
 *.log
 # Install scripts (optional - remove if you want to commit)
 install-ollama.sh
--- a/ai-analyzer/cli.js
+++ b/ai-analyzer/cli.js
@ -1,4 +1,4 @@
-#!/usr/bin/env node
+#!/usr/bin/env node
 /**
 * AI Analyzer CLI
--- a/ai-analyzer/src/ai-utils.js
+++ b/ai-analyzer/src/ai-utils.js
@ -1,491 +1,301 @@
-const { logger } = require("./logger");
+const { logger } = require("./logger");
-
+
-/**
+/**
- * AI Analysis utilities for post processing with Ollama
+ * AI Analysis utilities for post processing with Ollama
- * Extracted from ai-analyzer-local.js for reuse across parsers
+ * Extracted from ai-analyzer-local.js for reuse across parsers
- */
+ */
-
+
-// Default model from environment variable or fallback to "mistral"
+/**
-const DEFAULT_MODEL = process.env.OLLAMA_MODEL || "mistral";
+ * Check if Ollama is running and the model is available
-
+ */
-/**
+async function checkOllamaStatus(
- * Check if Ollama is running and the model is available
+  model = "mistral",
- */
+  ollamaHost = "http://localhost:11434"
-async function checkOllamaStatus(
+) {
-  model = DEFAULT_MODEL,
+  try {
-  ollamaHost = "http://localhost:11434"
+    // Check if Ollama is running
-) {
+    const response = await fetch(`${ollamaHost}/api/tags`);
-  try {
+    if (!response.ok) {
-    // Check if Ollama is running
+      throw new Error(`Ollama not running on ${ollamaHost}`);
-    const response = await fetch(`${ollamaHost}/api/tags`);
+    }
-    if (!response.ok) {
+
-      throw new Error(`Ollama not running on ${ollamaHost}`);
+    const data = await response.json();
-    }
+    const availableModels = data.models.map((m) => m.name);
-
+
-    const data = await response.json();
+    logger.ai("Ollama is running");
-    const availableModels = data.models.map((m) => m.name);
+    logger.info(
-
+      `📦 Available models: ${availableModels
-    logger.ai("Ollama is running");
+        .map((m) => m.split(":")[0])
-    logger.info(
+        .join(", ")}`
-      `📦 Available models: ${availableModels
+    );
-        .map((m) => m.split(":")[0])
+
-        .join(", ")}`
+    // Check if requested model is available
-    );
+    const modelExists = availableModels.some((m) => m.startsWith(model));
-
+    if (!modelExists) {
-    // Check if requested model is available
+      logger.error(`Model "${model}" not found`);
-    const modelExists = availableModels.some((m) => m.startsWith(model));
+      logger.error(`💡 Install it with: ollama pull ${model}`);
-    if (!modelExists) {
+      logger.error(
-      logger.error(`Model "${model}" not found`);
+        `💡 Or choose from: ${availableModels
-      logger.error(`💡 Install it with: ollama pull ${model}`);
+          .map((m) => m.split(":")[0])
-      logger.error(
+          .join(", ")}`
-        `💡 Or choose from: ${availableModels
+      );
-          .map((m) => m.split(":")[0])
+      return false;
-          .join(", ")}`
+    }
-      );
+
-      return false;
+    logger.success(`Using model: ${model}`);
-    }
+    return true;
-
+  } catch (error) {
-    logger.success(`Using model: ${model}`);
+    logger.error(`Error connecting to Ollama: ${error.message}`);
-    return true;
+    logger.error("💡 Make sure Ollama is installed and running:");
-  } catch (error) {
+    logger.error("   1. Install: https://ollama.ai/");
-    logger.error(`Error connecting to Ollama: ${error.message}`);
+    logger.error("   2. Start: ollama serve");
-    logger.error("💡 Make sure Ollama is installed and running:");
+    logger.error(`   3. Install model: ollama pull ${model}`);
-    logger.error("   1. Install: https://ollama.ai/");
+    return false;
-    logger.error("   2. Start: ollama serve");
+  }
-    logger.error(`   3. Install model: ollama pull ${model}`);
+}
-    return false;
+
-  }
+/**
-}
+ * Analyze multiple posts using local Ollama
-
+ */
-/**
+async function analyzeBatch(
- * Analyze multiple posts using local Ollama
+  posts,
- */
+  context,
-async function analyzeBatch(
+  model = "mistral",
-  posts,
+  ollamaHost = "http://localhost:11434"
-  context,
+) {
-  model = DEFAULT_MODEL,
+  logger.ai(`Analyzing batch of ${posts.length} posts with ${model}...`);
-  ollamaHost = "http://localhost:11434"
+
-) {
+  try {
-  logger.ai(`Analyzing batch of ${posts.length} posts with ${model}...`);
+    const prompt = `You are an expert at analyzing LinkedIn posts for relevance to specific contexts.
-
+
-  try {
+CONTEXT TO MATCH: "${context}"
-    // Detect if context is about a student profile
+
-    const isStudentContext = /student|undergraduate|first year|second year|third year|fourth year|freshman|sophomore|junior|senior|co-op|internship/i.test(context);
+Analyze these ${
-    
+      posts.length
-    // Build enhanced prompt based on context type
+    } LinkedIn posts and determine if each relates to the context above.
-    let analysisInstructions = "";
+
-    if (isStudentContext) {
+POSTS:
-      analysisInstructions = `
+${posts
-ANALYSIS FOCUS (Student Context Detected):
+  .map(
- Pay special attention to the "Requirements" section
+    (post, i) => `
- Evaluate if the job requirements match the student's level (${context})
+POST ${i + 1}:
- Consider: Are requirements too advanced? Are they appropriate for entry-level/co-op/internship?
+"${post.text.substring(0, 400)}${post.text.length > 400 ? "..." : ""}"
- Check if the role duties are suitable for a student's skill level
+`
- Look for keywords like "co-op", "internship", "entry-level", "student", "junior"
+  )
- If requirements mention "years of experience", "senior", "expert", "PhD", etc., this may not be suitable
+  .join("")}
- If requirements are reasonable for a student (basic skills, willingness to learn), mark as relevant`;
+
-    } else {
+For each post, provide:
-      analysisInstructions = `
+- Is it relevant to "${context}"? (YES/NO)
-ANALYSIS FOCUS:
+- Confidence level (0.0 to 1.0)
- Evaluate overall relevance to: "${context}"
+- Brief reasoning
- Consider job title, description, duties, and requirements
+
- Assess if the job matches the specified criteria`;
+Respond in this EXACT format for each post:
-    }
+POST 1: YES/NO | 0.X | brief reason
-
+POST 2: YES/NO | 0.X | brief reason
-    const prompt = `Analyze ${posts.length} job postings for relevance to: "${context}"
+POST 3: YES/NO | 0.X | brief reason
-
+
-${analysisInstructions}
+Examples:
-
+- For layoff context: "laid off 50 employees" = YES | 0.9 | mentions layoffs
-JOB POSTINGS:
+- For hiring context: "we're hiring developers" = YES | 0.8 | job posting
-${posts
+- Unrelated content = NO | 0.1 | not relevant to context`;
-  .map(
+
-    (post, i) => {
+    const response = await fetch(`${ollamaHost}/api/generate`, {
-      // For student contexts, prioritize Requirements section if text is too long
+      method: "POST",
-      let jobText = post.text;
+      headers: {
-      if (isStudentContext && jobText.length > 1200) {
+        "Content-Type": "application/json",
-        // Try to extract Requirements section if present
+      },
-        const requirementsMatch = jobText.match(/Requirements?:[\s\S]{0,600}/i);
+      body: JSON.stringify({
-        const dutiesMatch = jobText.match(/Role Duties?:[\s\S]{0,300}/i);
+        model: model,
-        const titleMatch = jobText.match(/Title:[\s\S]{0,100}/i);
+        prompt: prompt,
-        
+        stream: false,
-        if (requirementsMatch) {
+        options: {
-          // Prioritize: Title + Requirements (most important for students)
+          temperature: 0.3,
-          jobText = (titleMatch ? titleMatch[0] + "\n\n" : "") + 
+          top_p: 0.9,
-                    (requirementsMatch ? requirementsMatch[0] : "") +
+        },
-                    (dutiesMatch ? "\n\n" + dutiesMatch[0] : "");
+      }),
-        } else {
+    });
-          // Fallback to truncation
+
-          jobText = jobText.substring(0, 1200) + "...";
+    if (!response.ok) {
-        }
+      throw new Error(
-      } else if (jobText.length > 1200) {
+        `Ollama API error: ${response.status} ${response.statusText}`
-        jobText = jobText.substring(0, 1200) + "...";
+      );
-      }
+    }
-      
+
-      return `
+    const data = await response.json();
-JOB ${i + 1}:
+    const aiResponse = data.response.trim();
-${jobText}
+
-`;
+    // Parse the response
-    }
+    const analyses = [];
-  )
+    const lines = aiResponse.split("\n").filter((line) => line.trim());
-  .join("")}
+
-
+    for (let i = 0; i < posts.length; i++) {
-REQUIRED FORMAT - Respond with EXACTLY ${posts.length} lines, one per post:
+      let analysis = {
-JOB 1: YES | 0.8 | reason here
+        postIndex: i + 1,
-JOB 2: NO | 0.2 | reason here
+        isRelevant: false,
-JOB 3: YES | 0.9 | reason here
+        confidence: 0.5,
-
+        reasoning: "Could not parse AI response",
-RULES:
+      };
- Use YES or NO (uppercase)
+
- Use pipe character | as separator
+      // Look for lines that match "POST X:" pattern
- Confidence must be 0.0 to 1.0 (decimal number)
+      const postPattern = new RegExp(`POST\\s*${i + 1}:?\\s*(.+)`, "i");
- Keep reasoning brief (one sentence)
+
- MUST include all ${posts.length} jobs in order
+      for (const line of lines) {
-${isStudentContext ? "- When analyzing requirements, explicitly mention if requirements are too advanced or appropriate for the student level" : ""}
+        const match = line.match(postPattern);
-
+        if (match) {
-Examples:
+          const content = match[1].trim();
-JOB 1: YES | 0.9 | co-op position suitable for first year students
+
-JOB 2: NO | 0.2 | requires 5+ years experience, too advanced
+          // Parse: YES/NO | 0.X | reasoning
-JOB 3: YES | 0.7 | entry-level role with basic requirements appropriate for students`;
+          const parts = content.split("|").map((p) => p.trim());
-
+
-    // Add timeout to prevent hanging (5 minutes max)
+          if (parts.length >= 3) {
-    const controller = new AbortController();
+            analysis.isRelevant = parts[0].toUpperCase().includes("YES");
-    const timeoutId = setTimeout(() => controller.abort(), 5 * 60 * 1000); // 5 minutes
+            analysis.confidence = Math.max(
-
+              0,
-    try {
+              Math.min(1, parseFloat(parts[1]) || 0.5)
-      const response = await fetch(`${ollamaHost}/api/generate`, {
+            );
-        method: "POST",
+            analysis.reasoning = parts[2] || "No reasoning provided";
-        headers: {
+          } else {
-          "Content-Type": "application/json",
+            // Fallback parsing
-        },
+            analysis.isRelevant =
-        body: JSON.stringify({
+              content.toUpperCase().includes("YES") ||
-          model: model,
+              content.toLowerCase().includes("relevant");
-          prompt: prompt,
+            analysis.confidence = 0.6;
-          stream: false,
+            analysis.reasoning = content.substring(0, 100);
-          options: {
+          }
-            temperature: 0.3,
+          break;
-            top_p: 0.9,
+        }
-          },
+      }
-        }),
+
-        signal: controller.signal,
+      analyses.push(analysis);
-      });
+    }
-
+
-      clearTimeout(timeoutId);
+    // If we didn't get enough analyses, fill in defaults
-
+    while (analyses.length < posts.length) {
-      if (!response.ok) {
+      analyses.push({
-        throw new Error(
+        postIndex: analyses.length + 1,
-          `Ollama API error: ${response.status} ${response.statusText}`
+        isRelevant: false,
-        );
+        confidence: 0.3,
-      }
+        reasoning: "AI response parsing failed",
-
+      });
-      const data = await response.json();
+    }
-      const aiResponse = data.response.trim();
+
-
+    return analyses;
-      // Parse the response
+  } catch (error) {
-      const analyses = [];
+    logger.error(`Error in batch AI analysis: ${error.message}`);
-      const lines = aiResponse.split("\n").filter((line) => line.trim());
+
-
+    // Fallback: mark all as relevant with low confidence
-      // Log the raw response for debugging
+    return posts.map((_, i) => ({
-      logger.debug(`AI Response length: ${aiResponse.length} chars`);
+      postIndex: i + 1,
-      if (aiResponse.length > 0) {
+      isRelevant: true,
-        logger.debug(`AI Response (first 1000 chars):\n${aiResponse.substring(0, 1000)}`);
+      confidence: 0.3,
-      } else {
+      reasoning: `Analysis failed: ${error.message}`,
-        logger.warning("⚠️  AI response is empty!");
+    }));
-      }
+  }
-
+}
-      for (let i = 0; i < posts.length; i++) {
+
-      let analysis = {
+/**
-        postIndex: i + 1,
+ * Analyze a single post using local Ollama (fallback)
-        isRelevant: false,
+ */
-        confidence: 0.5,
+async function analyzeSinglePost(
-        reasoning: "Could not parse AI response",
+  text,
-      };
+  context,
-
+  model = "mistral",
-      // Try multiple patterns to find the post analysis
+  ollamaHost = "http://localhost:11434"
-      // IMPORTANT: Try numbered patterns first, only use generic pattern as last resort
+) {
-      const numberedPatterns = [
+  const prompt = `Analyze this LinkedIn post for relevance to: "${context}"
-        // Exact format: POST 1: YES | 0.8 | reason
+
-        new RegExp(`POST\\s*${i + 1}:?\\s*(.+)`, "i"),
+Post: "${text}"
-        // Numbered list: 1. YES | 0.8 | reason
+
-        new RegExp(`^\\s*${i + 1}[.)]\\s*(.+)`, "i"),
+Is this post relevant to "${context}"? Provide:
-        // Just the number: 1: YES | 0.8 | reason
+1. YES or NO
-        new RegExp(`^\\s*${i + 1}:\\s*(.+)`, "i"),
+2. Confidence (0.0 to 1.0)  
-      ];
+3. Brief reason
-
+
-      let found = false;
+Format: YES/NO | 0.X | reason`;
-      let matchedContent = null;
+
-
+  try {
-      // First, try to find a line with the specific post number
+    const response = await fetch(`${ollamaHost}/api/generate`, {
-      for (const line of lines) {
+      method: "POST",
-        for (const pattern of numberedPatterns) {
+      headers: {
-          const match = line.match(pattern);
+        "Content-Type": "application/json",
-          if (match) {
+      },
-            matchedContent = match[1].trim();
+      body: JSON.stringify({
-            found = true;
+        model: model,
-            break;
+        prompt: prompt,
-          }
+        stream: false,
-        }
+        options: {
-        if (found) break;
+          temperature: 0.3,
-      }
+        },
-
+      }),
-      // If not found with numbered patterns, try position-based matching as fallback
+    });
-      if (!found && lines.length > i) {
+
-        const targetLine = lines[i];
+    if (!response.ok) {
-        if (targetLine) {
+      throw new Error(`Ollama API error: ${response.status}`);
-          // Try to parse the line even without post number
+    }
-          const genericMatch = targetLine.match(/^(?:POST\s*\d+:?\s*)?(.+)$/i);
+
-          if (genericMatch) {
+    const data = await response.json();
-            matchedContent = genericMatch[1].trim();
+    const aiResponse = data.response.trim();
-            found = true;
+
-          }
+    // Parse response
-        }
+    const parts = aiResponse.split("|").map((p) => p.trim());
-      }
+
-
+    if (parts.length >= 3) {
-      if (found && matchedContent) {
+      return {
-        const content = matchedContent;
+        isRelevant: parts[0].toUpperCase().includes("YES"),
-        
+        confidence: Math.max(0, Math.min(1, parseFloat(parts[1]) || 0.5)),
-        // Try to parse: YES/NO | 0.X | reasoning
+        reasoning: parts[2],
-        let parts = content.split("|").map((p) => p.trim());
+      };
-        
+    } else {
-        // If no pipe separator, try other separators
+      // Fallback parsing
-        if (parts.length < 2) {
+      return {
-          // Try colon separator: YES: 0.8: reason
+        isRelevant:
-          parts = content.split(":").map((p) => p.trim());
+          aiResponse.toLowerCase().includes("yes") ||
-        }
+          aiResponse.toLowerCase().includes("relevant"),
-        if (parts.length < 2) {
+        confidence: 0.6,
-          // Try dash separator: YES - 0.8 - reason
+        reasoning: aiResponse.substring(0, 100),
-          parts = content.split("-").map((p) => p.trim());
+      };
-        }
+    }
-
+  } catch (error) {
-        // Extract YES/NO
+    return {
-        const relevanceText = parts[0] || content;
+      isRelevant: true, // Default to include on error
-        analysis.isRelevant = 
+      confidence: 0.3,
-          relevanceText.toUpperCase().includes("YES") ||
+      reasoning: `Analysis failed: ${error.message}`,
-          relevanceText.toLowerCase().includes("relevant") ||
+    };
-          relevanceText.toLowerCase().includes("yes");
+  }
-
+}
-        // Extract confidence (look for number between 0 and 1)
+
-        if (parts.length >= 2) {
+/**
-          const confidenceMatch = parts[1].match(/(0?\.\d+|1\.0|0|1)/);
+ * Find the most recent results file if none specified
-          if (confidenceMatch) {
+ */
-            analysis.confidence = Math.max(
+function findLatestResultsFile(resultsDir = "results") {
-              0,
+  const fs = require("fs");
-              Math.min(1, parseFloat(confidenceMatch[1]) || 0.5)
+  const path = require("path");
-            );
+
-          }
+  if (!fs.existsSync(resultsDir)) {
-        } else {
+    throw new Error("Results directory not found. Run the scraper first.");
-          // Try to find confidence in the whole content
+  }
-          const confidenceMatch = content.match(/(0?\.\d+|1\.0|0|1)/);
+
-          if (confidenceMatch) {
+  const files = fs
-            analysis.confidence = Math.max(
+    .readdirSync(resultsDir)
-              0,
+    .filter(
-              Math.min(1, parseFloat(confidenceMatch[1]) || 0.5)
+      (f) =>
-            );
+        (f.startsWith("results-") || f.startsWith("linkedin-results-")) &&
-          }
+        f.endsWith(".json") &&
-        }
+        !f.includes("-ai-")
-
+    )
-        // Extract reasoning (everything after confidence, or whole content if no structure)
+    .sort()
-        if (parts.length >= 3) {
+    .reverse();
-          analysis.reasoning = parts.slice(2).join(" ").trim() || parts[2] || "No reasoning provided";
+
-        } else if (parts.length === 2) {
+  if (files.length === 0) {
-          // If only 2 parts, second part might be reasoning
+    throw new Error("No results files found. Run the scraper first.");
-          analysis.reasoning = parts[1].substring(0, 200);
+  }
-        } else {
+
-          // Use the whole content as reasoning, but remove YES/NO and confidence
+  return path.join(resultsDir, files[0]);
-          let reasoning = content
+}
-            .replace(/YES|NO/gi, "")
+
-            .replace(/0?\.\d+|1\.0/g, "")
+module.exports = {
-            .replace(/\|/g, "")
+  checkOllamaStatus,
-            .trim();
+  analyzeBatch,
-          analysis.reasoning = reasoning || "Analysis provided but format unclear";
+  analyzeSinglePost,
-        }
+  findLatestResultsFile,
-      }
+};
      // If still not found, try to extract from the entire response by position
      if (!found && lines.length > 0) {
        // Try to get the line at position i (allowing for some variance)
        const targetLine = lines[Math.min(i, lines.length - 1)];
        if (targetLine) {
          // Extract any YES/NO indication
          analysis.isRelevant = 
            targetLine.toUpperCase().includes("YES") ||
            targetLine.toLowerCase().includes("relevant");
          // Extract confidence
          const confidenceMatch = targetLine.match(/(0?\.\d+|1\.0|0|1)/);
          if (confidenceMatch) {
            analysis.confidence = Math.max(
              0,
              Math.min(1, parseFloat(confidenceMatch[1]) || 0.5)
            );
          }
          // Use the line as reasoning
          analysis.reasoning = targetLine.substring(0, 200).trim() || "Parsed from unstructured response";
          found = true;
        }
      }
      // Last resort: if still not found, try to extract from the entire response text
      if (!found && aiResponse.length > 0) {
        // Look for any mention of relevance in the response
        const responseLower = aiResponse.toLowerCase();
        const hasRelevant = responseLower.includes("relevant") || responseLower.includes("yes");
        analysis.isRelevant = hasRelevant;
        // Try to find any confidence number
        const allConfidenceMatches = aiResponse.match(/(0?\.\d+|1\.0|0|1)/g);
        if (allConfidenceMatches && allConfidenceMatches.length > i) {
          analysis.confidence = Math.max(
            0,
            Math.min(1, parseFloat(allConfidenceMatches[i]) || 0.5)
          );
        }
        // Use a portion of the response as reasoning
        const responseSnippet = aiResponse.substring(i * 100, (i + 1) * 200).trim();
        analysis.reasoning = responseSnippet || "Could not parse structured response, using fallback";
        logger.warning(`⚠️  Post ${i + 1}: Using fallback parsing - AI response format unclear`);
      }
        analyses.push(analysis);
      }
      // If we didn't get enough analyses, fill in defaults
      while (analyses.length < posts.length) {
        analyses.push({
          postIndex: analyses.length + 1,
          isRelevant: false,
          confidence: 0.3,
          reasoning: "AI response parsing failed",
        });
      }
      return analyses;
    } catch (error) {
      clearTimeout(timeoutId);
      if (error.name === 'AbortError') {
        throw new Error('Request timeout: AI analysis took longer than 5 minutes');
      }
      throw error;
    }
  } catch (error) {
    logger.error(`Error in batch AI analysis: ${error.message}`);
    // Fallback: mark all as relevant with low confidence
    return posts.map((_, i) => ({
      postIndex: i + 1,
      isRelevant: true,
      confidence: 0.3,
      reasoning: `Analysis failed: ${error.message}`,
    }));
  }
 }
 /**
 * Analyze a single post using local Ollama (fallback)
 */
 async function analyzeSinglePost(
  text,
  context,
  model = DEFAULT_MODEL,
  ollamaHost = "http://localhost:11434"
 ) {
  const prompt = `Analyze this LinkedIn post for relevance to: "${context}"
 Post: "${text}"
 Is this post relevant to "${context}"? Provide:
 1. YES or NO
 2. Confidence (0.0 to 1.0)  
 3. Brief reason
 Format: YES/NO | 0.X | reason`;
  try {
    const response = await fetch(`${ollamaHost}/api/generate`, {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        model: model,
        prompt: prompt,
        stream: false,
        options: {
          temperature: 0.3,
        },
      }),
    });
    if (!response.ok) {
      throw new Error(`Ollama API error: ${response.status}`);
    }
    const data = await response.json();
    const aiResponse = data.response.trim();
    // Parse response
    const parts = aiResponse.split("|").map((p) => p.trim());
    if (parts.length >= 3) {
      return {
        isRelevant: parts[0].toUpperCase().includes("YES"),
        confidence: Math.max(0, Math.min(1, parseFloat(parts[1]) || 0.5)),
        reasoning: parts[2],
      };
    } else {
      // Fallback parsing
      return {
        isRelevant:
          aiResponse.toLowerCase().includes("yes") ||
          aiResponse.toLowerCase().includes("relevant"),
        confidence: 0.6,
        reasoning: aiResponse.substring(0, 100),
      };
    }
  } catch (error) {
    return {
      isRelevant: true, // Default to include on error
      confidence: 0.3,
      reasoning: `Analysis failed: ${error.message}`,
    };
  }
 }
 /**
 * Find the most recent results file if none specified
 */
 function findLatestResultsFile(resultsDir = "results") {
  const fs = require("fs");
  const path = require("path");
  if (!fs.existsSync(resultsDir)) {
    throw new Error("Results directory not found. Run the scraper first.");
  }
  const files = fs
    .readdirSync(resultsDir)
    .filter(
      (f) =>
        (f.startsWith("results-") || f.startsWith("linkedin-results-")) &&
        f.endsWith(".json") &&
        !f.includes("-ai-")
    )
    .sort()
    .reverse();
  if (files.length === 0) {
    throw new Error("No results files found. Run the scraper first.");
  }
  return path.join(resultsDir, files[0]);
 }
 module.exports = {
  checkOllamaStatus,
  analyzeBatch,
  analyzeSinglePost,
  findLatestResultsFile,
  DEFAULT_MODEL, // Export so other modules can use it
 };
--- a/ai-analyzer/src/text-utils.js
+++ b/ai-analyzer/src/text-utils.js
@ -1,146 +1,107 @@
-/**
+/**
- * Text processing utilities for cleaning and validating content
+ * Text processing utilities for cleaning and validating content
- * Extracted from linkedout.js for reuse across parsers
+ * Extracted from linkedout.js for reuse across parsers
- */
+ */
-
+
-/**
+/**
- * Clean text by removing hashtags, URLs, emojis, and normalizing whitespace
+ * Clean text by removing hashtags, URLs, emojis, and normalizing whitespace
- */
+ */
-function cleanText(text) {
+function cleanText(text) {
-  if (!text || typeof text !== "string") {
+  if (!text || typeof text !== "string") {
-    return "";
+    return "";
-  }
+  }
-
+
-  // Remove hashtags
+  // Remove hashtags
-  text = text.replace(/#\w+/g, "");
+  text = text.replace(/#\w+/g, "");
-
+
-  // Remove hashtag mentions
+  // Remove hashtag mentions
-  text = text.replace(/\bhashtag\b/gi, "");
+  text = text.replace(/\bhashtag\b/gi, "");
-  text = text.replace(/hashtag-\w+/gi, "");
+  text = text.replace(/hashtag-\w+/gi, "");
-
+
-  // Remove URLs
+  // Remove URLs
-  text = text.replace(/https?:\/\/[^\s]+/g, "");
+  text = text.replace(/https?:\/\/[^\s]+/g, "");
-
+
-  // Remove emojis (Unicode ranges for common emoji)
+  // Remove emojis (Unicode ranges for common emoji)
-  text = text.replace(
+  text = text.replace(
-    /[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F1E0}-\u{1F1FF}]/gu,
+    /[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F1E0}-\u{1F1FF}]/gu,
-    ""
+    ""
-  );
+  );
-
+
-  // Normalize whitespace
+  // Normalize whitespace
-  text = text.replace(/\s+/g, " ").trim();
+  text = text.replace(/\s+/g, " ").trim();
-
+
-  return text;
+  return text;
-}
+}
-
+
-/**
+/**
- * Check if text contains any of the specified keywords (case insensitive)
+ * Check if text contains any of the specified keywords (case insensitive)
- */
+ */
-function containsAnyKeyword(text, keywords) {
+function containsAnyKeyword(text, keywords) {
-  if (!text || !Array.isArray(keywords)) {
+  if (!text || !Array.isArray(keywords)) {
-    return false;
+    return false;
-  }
+  }
-
+
-  const lowerText = text.toLowerCase();
+  const lowerText = text.toLowerCase();
-  return keywords.some((keyword) => lowerText.includes(keyword.toLowerCase()));
+  return keywords.some((keyword) => lowerText.includes(keyword.toLowerCase()));
-}
+}
-
+
-/**
+/**
- * Check if text contains all of the specified keywords (case insensitive)
+ * Validate if text meets basic quality criteria
- */
+ */
-function containsAllKeywords(text, keywords) {
+function isValidText(text, minLength = 30) {
-  if (!text || !Array.isArray(keywords)) {
+  if (!text || typeof text !== "string") {
-    return false;
+    return false;
-  }
+  }
-
+
-  const lowerText = text.toLowerCase();
+  // Check minimum length
-  return keywords.every((keyword) => lowerText.includes(keyword.toLowerCase()));
+  if (text.length < minLength) {
-}
+    return false;
-
+  }
-/**
+
- * Check if text matches keyword groups with AND logic between groups and OR logic within groups
+  // Check if text contains alphanumeric characters
- * @param {string} text - Text to search in
+  if (!/[a-zA-Z0-9]/.test(text)) {
- * @param {Array<Array<string>>} keywordGroups - Array of keyword groups, each group is an array of OR keywords
+    return false;
- * @returns {boolean} - True if text matches all groups (AND logic) and at least one keyword in each group (OR logic)
+  }
- */
+
-function matchesKeywordGroups(text, keywordGroups) {
+  return true;
-  if (!text || !Array.isArray(keywordGroups) || keywordGroups.length === 0) {
+}
-    return false;
+
-  }
+/**
-
+ * Extract domain from URL
-  const lowerText = text.toLowerCase();
+ */
-  
+function extractDomain(url) {
-  // All groups must match (AND logic)
+  if (!url || typeof url !== "string") {
-  return keywordGroups.every((group) => {
+    return null;
-    if (!Array.isArray(group) || group.length === 0) {
+  }
-      return false;
+
-    }
+  try {
-    // At least one keyword in the group must match (OR logic)
+    const urlObj = new URL(url);
-    return group.some((keyword) => 
+    return urlObj.hostname;
-      lowerText.includes(keyword.toLowerCase().trim())
+  } catch (error) {
-    );
+    return null;
-  });
+  }
-}
+}
-
+
-/**
+/**
- * Validate if text meets basic quality criteria
+ * Normalize URL by removing query parameters and fragments
- */
+ */
-function isValidText(text, minLength = 30) {
+function normalizeUrl(url) {
-  if (!text || typeof text !== "string") {
+  if (!url || typeof url !== "string") {
-    return false;
+    return "";
-  }
+  }
-
+
-  // Check minimum length
+  try {
-  if (text.length < minLength) {
+    const urlObj = new URL(url);
-    return false;
+    return `${urlObj.protocol}//${urlObj.hostname}${urlObj.pathname}`;
-  }
+  } catch (error) {
-
+    return url;
-  // Check if text contains alphanumeric characters
+  }
-  if (!/[a-zA-Z0-9]/.test(text)) {
+}
-    return false;
+
-  }
+module.exports = {
-
+  cleanText,
-  return true;
+  containsAnyKeyword,
-}
+  isValidText,
-
+  extractDomain,
-/**
+  normalizeUrl,
- * Extract domain from URL
+};
 */
 function extractDomain(url) {
  if (!url || typeof url !== "string") {
    return null;
  }
  try {
    const urlObj = new URL(url);
    return urlObj.hostname;
  } catch (error) {
    return null;
  }
 }
 /**
 * Normalize URL by removing query parameters and fragments
 */
 function normalizeUrl(url) {
  if (!url || typeof url !== "string") {
    return "";
  }
  try {
    const urlObj = new URL(url);
    return `${urlObj.protocol}//${urlObj.hostname}${urlObj.pathname}`;
  } catch (error) {
    return url;
  }
 }
 module.exports = {
  cleanText,
  containsAnyKeyword,
  containsAllKeywords,
  matchesKeywordGroups,
  isValidText,
  extractDomain,
  normalizeUrl,
 };
--- a/core-parser/index.js
+++ b/core-parser/index.js
@ -20,26 +20,7 @@ class CoreParser {
    this.browser = await playwright.chromium.launch({
      headless: this.config.headless
    });
-    
+    this.context = await this.browser.newContext();
    // Create context with user agent to appear more like a real browser
    const contextOptions = {
      userAgent: this.config.userAgent || 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
      viewport: { width: 1920, height: 1080 },
      locale: 'en-US',
      timezoneId: 'America/New_York',
    };
    // Add extra HTTP headers to appear more legitimate
    contextOptions.extraHTTPHeaders = {
      'Accept-Language': 'en-US,en;q=0.9',
      'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
      'Accept-Encoding': 'gzip, deflate, br',
      'DNT': '1',
      'Connection': 'keep-alive',
      'Upgrade-Insecure-Requests': '1',
    };
    this.context = await this.browser.newContext(contextOptions);
  }
  async createPage(id) {
@ -80,7 +61,3 @@ class CoreParser {
 }
 module.exports = CoreParser;
--- a/core-parser/package.json
+++ b/core-parser/package.json
@ -1,9 +1,7 @@
-{
+{
-  "name": "core-parser",
+  "name": "core-parser",
-  "version": "1.0.0",
+  "version": "1.0.0",
-  "main": "index.js",
+  "main": "index.js",
-  "description": "Core parser utilities for browser management",
+  "description": "Core parser utilities for browser management",
-  "dependencies": {
+  "dependencies": {}
-    "playwright": "^1.40.0"
+}
  }
 }
--- a/job-search-parser/README.md
+++ b/job-search-parser/README.md
--- a/job-search-parser/index.js
+++ b/job-search-parser/index.js
@ -10,10 +10,7 @@ const path = require("path");
 const fs = require("fs");
 const CoreParser = require("../core-parser");
 const { skipthedriveStrategy } = require("./strategies/skipthedrive-strategy");
-const { linkedinJobsStrategy } = require("./strategies/linkedin-jobs-strategy");
+const { logger, analyzeBatch, checkOllamaStatus } = require("ai-analyzer");
 const { indeedStrategy } = require("./strategies/indeed-strategy");
 const { logger, analyzeBatch, checkOllamaStatus, DEFAULT_MODEL } = require("ai-analyzer");
 const { convertResultsToCsv } = require("./src/csv-utils");
 // Load environment variables
 require("dotenv").config({ path: path.join(__dirname, ".env") });
@ -21,23 +18,16 @@ require("dotenv").config({ path: path.join(__dirname, ".env") });
 // Configuration from environment
 const HEADLESS = process.env.HEADLESS !== "false";
 const SEARCH_KEYWORDS =
-  process.env.SEARCH_KEYWORDS || "co-op,intern";//"software engineer,developer,programmer";
+  process.env.SEARCH_KEYWORDS || "software engineer,developer,programmer";
 const LOCATION_FILTER = process.env.LOCATION_FILTER;
 const ENABLE_AI_ANALYSIS = process.env.ENABLE_AI_ANALYSIS === "true";
 const AI_CONTEXT = process.env.AI_CONTEXT || "Job market analysis focusing on job postings, skills, and trends";
 const OLLAMA_MODEL = process.env.OLLAMA_MODEL || DEFAULT_MODEL;
 const MAX_PAGES = parseInt(process.env.MAX_PAGES) || 5;
 const EXCLUDE_REJECTED = process.env.EXCLUDE_REJECTED === "true";
 const OUTPUT_FORMAT = process.env.OUTPUT_FORMAT || "json"; // "json", "csv", or "both"
 const MIN_DATE = process.env.MIN_DATE; // Minimum posted date (format: YYYY-MM-DD)
 const USE_AND_LOGIC = process.env.USE_AND_LOGIC === "true"; // Use AND logic for keywords
 // Available site strategies
 const SITE_STRATEGIES = {
  skipthedrive: skipthedriveStrategy,
  linkedin: linkedinJobsStrategy,
  indeed: indeedStrategy,
  // Add more site strategies here
  // indeed: indeedStrategy,
  // glassdoor: glassdoorStrategy,
 };
@ -51,10 +41,6 @@ function parseArguments() {
    keywords: null,
    locationFilter: null,
    maxPages: MAX_PAGES,
    excludeRejected: EXCLUDE_REJECTED,
    outputFormat: OUTPUT_FORMAT,
    minDate: MIN_DATE,
    useAndLogic: USE_AND_LOGIC, // Use AND logic instead of OR logic for keywords (from env or CLI)
  };
  args.forEach((arg) => {
@ -71,26 +57,7 @@ function parseArguments() {
    } else if (arg.startsWith("--location=")) {
      options.locationFilter = arg.split("=")[1];
    } else if (arg.startsWith("--max-pages=")) {
-      const value = arg.split("=")[1];
+      options.maxPages = parseInt(arg.split("=")[1]) || MAX_PAGES;
      // Support "all" or "0" to mean unlimited pages
      if (value === "all" || value === "0") {
        options.maxPages = 0; // 0 means unlimited
      } else {
        options.maxPages = parseInt(value) || MAX_PAGES;
      }
    } else if (arg === "--no-rejected" || arg === "--exclude-rejected") {
      options.excludeRejected = true;
    } else if (arg.startsWith("--output=") || arg.startsWith("--format=")) {
      const format = arg.split("=")[1].toLowerCase();
      if (["json", "csv", "both"].includes(format)) {
        options.outputFormat = format;
      } else {
        logger.warning(`⚠️  Unknown output format: ${format}. Using default: json`);
      }
    } else if (arg.startsWith("--min-date=")) {
      options.minDate = arg.split("=")[1];
    } else if (arg === "--and" || arg === "--all-keywords") {
      options.useAndLogic = true; // CLI flag overrides env variable
    }
  });
@ -113,136 +80,21 @@ async function startJobSearchParser(options = {}) {
    logger.step("🚀 Job Search Parser Starting...");
    // Parse keywords
-    let keywords =
+    const keywords =
      finalOptions.keywords || SEARCH_KEYWORDS.split(",").map((k) => k.trim());
    // Parse keyword groups if AND logic is enabled and keywords contain pipe (|) separator
    // Format: "co-op|intern,summer 2026" means (co-op OR intern) AND (summer 2026)
    let keywordGroups = null;
    if (finalOptions.useAndLogic && keywords.some(k => k.includes('|'))) {
      keywordGroups = keywords.map(group => 
        group.split('|').map(k => k.trim()).filter(k => k.length > 0)
      );
      logger.info(`🔍 Keyword Groups: ${keywordGroups.map(g => `(${g.join(' OR ')})`).join(' AND ')}`);
    }
    const locationFilter = finalOptions.locationFilter || LOCATION_FILTER;
    const sites = finalOptions.sites;
    const excludeRejected = finalOptions.excludeRejected !== undefined ? finalOptions.excludeRejected : EXCLUDE_REJECTED;
    logger.info(`📦 Selected job sites: ${sites.join(", ")}`);
    logger.info(`🔍 Search Keywords: ${keywords.join(", ")}`);
    if (keywordGroups) {
      logger.info(`🔗 Keyword Logic: Grouped AND/OR - ${keywordGroups.map(g => `(${g.join(' OR ')})`).join(' AND ')}`);
    } else {
      logger.info(`🔗 Keyword Logic: ${finalOptions.useAndLogic ? "AND (all keywords must match)" : "OR (any keyword matches)"}`);
    }
    logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
    const minDate = finalOptions.minDate || MIN_DATE;
    if (minDate) {
      logger.info(`📅 Min Date Filter: ${minDate} (jobs posted after this date)`);
    }
    logger.info(
      `🧠 AI Analysis: ${ENABLE_AI_ANALYSIS ? "Enabled" : "Disabled"}`
    );
    if (ENABLE_AI_ANALYSIS) {
      logger.info(`   Context: "${AI_CONTEXT}"`);
      logger.info(`   Model: ${OLLAMA_MODEL}`);
    }
    const allResults = [];
    const allRejectedResults = [];
    const siteResults = {};
    let analysisResults = null;
    // Initialize results directory and file for incremental saving
    const resultsDir = path.join(__dirname, "results");
    if (!fs.existsSync(resultsDir)) {
      fs.mkdirSync(resultsDir, { recursive: true });
    }
    const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
    const outputFormat = finalOptions.outputFormat || OUTPUT_FORMAT;
    let incrementalJsonFilepath = null;
    let incrementalCsvFilepath = null;
    // Initialize incremental save files
    if (outputFormat === "json" || outputFormat === "both") {
      const jsonFilename = `job-search-results-${timestamp}.json`;
      incrementalJsonFilepath = path.join(resultsDir, jsonFilename);
    }
    if (outputFormat === "csv" || outputFormat === "both") {
      const csvFilename = `job-search-results-${timestamp}.csv`;
      incrementalCsvFilepath = path.join(resultsDir, csvFilename);
    }
    /**
     * Save results incrementally as they're found
     */
    const saveIncrementalResults = (currentResults, currentRejectedResults, currentSiteResults, currentAnalysisResults = null, isComplete = false) => {
      try {
        const outputData = {
          metadata: {
            extractedAt: new Date().toISOString(),
            parser: "job-search-parser",
            version: "2.0.0",
            sites: sites,
            keywords: keywords.join(", "),
            locationFilter,
            aiAnalysisEnabled: ENABLE_AI_ANALYSIS,
            aiContext: ENABLE_AI_ANALYSIS ? AI_CONTEXT : undefined,
            aiModel: ENABLE_AI_ANALYSIS ? OLLAMA_MODEL : undefined,
            analysisResults: currentAnalysisResults,
            rejectedJobsExcluded: excludeRejected,
            isComplete: isComplete,
            lastUpdated: new Date().toISOString(),
          },
          results: currentResults,
          siteResults: currentSiteResults,
        };
        if (!excludeRejected) {
          outputData.rejectedResults = currentRejectedResults;
        }
        // Save JSON incrementally
        if (incrementalJsonFilepath) {
          fs.writeFileSync(incrementalJsonFilepath, JSON.stringify(outputData, null, 2));
        }
        // Save CSV incrementally (convert on each save)
        if (incrementalCsvFilepath) {
          const csvContent = convertResultsToCsv(outputData);
          fs.writeFileSync(incrementalCsvFilepath, csvContent);
        }
        if (!isComplete) {
          logger.info(`💾 Incremental save: ${currentResults.length} results saved to ${incrementalJsonFilepath || incrementalCsvFilepath}`);
        }
      } catch (error) {
        logger.warning(`⚠️  Failed to save incremental results: ${error.message}`);
      }
    };
    // Save initial empty state
    saveIncrementalResults([], [], {}, null, false);
    // Set up signal handlers for graceful shutdown
    let isShuttingDown = false;
    const gracefulShutdown = async (signal) => {
      if (isShuttingDown) return;
      isShuttingDown = true;
      logger.warning(`\n⚠️  Received ${signal}, saving current results before exit...`);
      saveIncrementalResults(allResults, allRejectedResults, siteResults, analysisResults, false);
      logger.info(`💾 Saved ${allResults.length} results before shutdown`);
      await coreParser.cleanup();
      process.exit(0);
    };
    process.on('SIGINT', () => gracefulShutdown('SIGINT'));
    process.on('SIGTERM', () => gracefulShutdown('SIGTERM'));
    // Process each selected site
    for (const site of sites) {
@ -256,49 +108,18 @@ async function startJobSearchParser(options = {}) {
        logger.step(`\n🌐 Parsing ${site}...`);
        const startTime = Date.now();
-        // Prepare strategy options
+        const parseResult = await strategy(coreParser, {
        const strategyOptions = {
          keywords,
          keywordGroups, // Pass grouped keywords if available
          locationFilter,
          maxPages: finalOptions.maxPages,
-          useAndLogic: finalOptions.useAndLogic || false,
+        });
        };
        // Add credentials for LinkedIn
        if (site === "linkedin") {
          const LINKEDIN_USERNAME = process.env.LINKEDIN_USERNAME;
          const LINKEDIN_PASSWORD = process.env.LINKEDIN_PASSWORD;
          if (!LINKEDIN_USERNAME || !LINKEDIN_PASSWORD) {
            logger.error(`❌ LinkedIn credentials not found. Please set LINKEDIN_USERNAME and LINKEDIN_PASSWORD in .env file`);
            siteResults[site] = {
              count: 0,
              rejected: 0,
              duration: "0s",
              error: "LinkedIn credentials not found",
            };
            continue;
          }
          strategyOptions.credentials = {
            username: LINKEDIN_USERNAME,
            password: LINKEDIN_PASSWORD,
          };
          strategyOptions.location = process.env.LINKEDIN_JOB_LOCATION || "";
          strategyOptions.minDate = minDate; // Add date filter for LinkedIn
        }
        const parseResult = await strategy(coreParser, strategyOptions);
        const { results, rejectedResults, summary } = parseResult;
        const duration = ((Date.now() - startTime) / 1000).toFixed(2);
        // Collect results
        logger.info(`📦 Strategy returned: ${results.length} results, ${rejectedResults.length} rejected`);
        allResults.push(...results);
        allRejectedResults.push(...rejectedResults);
        logger.info(`📦 Total accumulated: ${allResults.length} results, ${allRejectedResults.length} rejected`);
        siteResults[site] = {
          count: results.length,
@ -310,9 +131,6 @@ async function startJobSearchParser(options = {}) {
        logger.success(
          `✅ ${site} completed in ${duration}s - Found ${results.length} jobs`
        );
        // Save results incrementally after each site
        saveIncrementalResults(allResults, allRejectedResults, siteResults, analysisResults, false);
      } catch (error) {
        logger.error(`❌ ${site} parsing failed: ${error.message}`);
        siteResults[site] = {
@ -321,126 +139,60 @@ async function startJobSearchParser(options = {}) {
          duration: "0s",
          error: error.message,
        };
        // Save even on error to preserve what we have
        saveIncrementalResults(allResults, allRejectedResults, siteResults, analysisResults, false);
      }
    }
    // AI Analysis if enabled
-    // Save results before AI analysis (in case AI analysis takes a long time)
+    let analysisResults = null;
    if (allResults.length > 0) {
      saveIncrementalResults(allResults, allRejectedResults, siteResults, null, false);
    }
    if (ENABLE_AI_ANALYSIS && allResults.length > 0) {
      logger.step("🧠 Running AI Analysis...");
-      const ollamaAvailable = await checkOllamaStatus(OLLAMA_MODEL);
+      const ollamaStatus = await checkOllamaStatus();
-      if (ollamaAvailable) {
+      if (ollamaStatus.available) {
-        // Prepare data for analysis (analyzeBatch expects objects with 'text' field)
+        analysisResults = await analyzeBatch(allResults, {
-        const analysisData = allResults.map((job) => {
+          context:
-          // Build comprehensive text including all available job information
+            "Job market analysis focusing on job postings, skills, and trends",
          const parts = [];
          if (job.title) parts.push(`Title: ${job.title}`);
          if (job.company) parts.push(`Company: ${job.company}`);
          if (job.description) parts.push(`Description: ${job.description}`);
          if (job.roleDuties) parts.push(`Role Duties: ${job.roleDuties}`);
          if (job.jobRequirements) parts.push(`Requirements: ${job.jobRequirements}`);
          return {
            text: parts.join("\n\n"),
            location: job.location || "",
            keyword: job.keyword || "",
            timestamp: job.extractedAt || job.postedDate || "",
            roleDuties: job.roleDuties || "",
            jobRequirements: job.jobRequirements || "",
          };
        });
        // Process in smaller batches to avoid timeouts (5 jobs per batch)
        const BATCH_SIZE = parseInt(process.env.AI_BATCH_SIZE) || 5;
        analysisResults = [];
        for (let i = 0; i < analysisData.length; i += BATCH_SIZE) {
          const batch = analysisData.slice(i, i + BATCH_SIZE);
          const batchNumber = Math.floor(i / BATCH_SIZE) + 1;
          const totalBatches = Math.ceil(analysisData.length / BATCH_SIZE);
          logger.info(`   Processing batch ${batchNumber}/${totalBatches} (${batch.length} jobs)...`);
          try {
            const batchResults = await analyzeBatch(
              batch,
              AI_CONTEXT,
              OLLAMA_MODEL
            );
            analysisResults.push(...batchResults);
            logger.success(`   ✅ Batch ${batchNumber} completed`);
          } catch (error) {
            logger.error(`   ❌ Batch ${batchNumber} failed: ${error.message}`);
            // Add fallback results for this batch
            const fallbackResults = batch.map((_, idx) => ({
              postIndex: i + idx + 1,
              isRelevant: true,
              confidence: 0.3,
              reasoning: `Analysis failed: ${error.message}`,
            }));
            analysisResults.push(...fallbackResults);
          }
        }
        // Embed AI analysis into each job result
        allResults.forEach((job, index) => {
          if (analysisResults && analysisResults[index]) {
            job.aiAnalysis = {
              isRelevant: analysisResults[index].isRelevant,
              confidence: analysisResults[index].confidence,
              reasoning: analysisResults[index].reasoning,
              context: AI_CONTEXT,
              model: OLLAMA_MODEL,
              analyzedAt: new Date().toISOString(),
            };
          }
        });
        logger.success(
          `✅ AI Analysis completed for ${allResults.length} jobs`
        );
        // Save results after AI analysis completes
        saveIncrementalResults(allResults, allRejectedResults, siteResults, analysisResults, false);
      } else {
        logger.warning("⚠️  Ollama not available, skipping AI analysis");
      }
    }
-    // Final save with complete flag
+    // Save results
-    logger.info(`💾 Preparing final save: ${allResults.length} results, ${allRejectedResults.length} rejected`);
+    const outputData = {
-    logger.info(`💾 EXCLUDE_REJECTED env: ${process.env.EXCLUDE_REJECTED}, excludeRejected variable: ${excludeRejected}`);
+      metadata: {
-    
+        extractedAt: new Date().toISOString(),
-    if (!excludeRejected) {
+        parser: "job-search-parser",
-      logger.info(`✅ Including ${allRejectedResults.length} rejected results in output`);
+        version: "2.0.0",
-    } else {
+        sites: sites,
-      logger.info(`⏭️  Excluding rejected results (EXCLUDE_REJECTED=true)`);
+        keywords: keywords.join(", "),
-    }
+        locationFilter,
-    
+        analysisResults,
-    logger.info(`💾 Final output: ${allResults.length} results, ${allRejectedResults.length} rejected`);
+      },
      results: allResults,
      rejectedResults: allRejectedResults,
      siteResults,
    };
-    // Final save with isComplete flag
+    const resultsDir = path.join(__dirname, "results");
-    saveIncrementalResults(allResults, allRejectedResults, siteResults, analysisResults, true);
+    if (!fs.existsSync(resultsDir)) {
-    
+      fs.mkdirSync(resultsDir, { recursive: true });
-    const savedFiles = [];
+    }
-    if (incrementalJsonFilepath) savedFiles.push(incrementalJsonFilepath);
+
-    if (incrementalCsvFilepath) savedFiles.push(incrementalCsvFilepath);
+    const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
    const filename = `job-search-results-${timestamp}.json`;
    const filepath = path.join(resultsDir, filename);
    fs.writeFileSync(filepath, JSON.stringify(outputData, null, 2));
    // Final summary
    logger.step("\n📊 Job Search Parser Summary");
    logger.success(`✅ Total jobs found: ${allResults.length}`);
    logger.info(`❌ Total rejected: ${allRejectedResults.length}`);
-    logger.info(`📁 Results saved to:`);
+    logger.info(`📁 Results saved to: ${filepath}`);
    savedFiles.forEach(filepath => {
      logger.info(`   ${filepath}`);
    });
    logger.info("\n📈 Results by site:");
    for (const [site, stats] of Object.entries(siteResults)) {
@ -455,31 +207,6 @@ async function startJobSearchParser(options = {}) {
    logger.success("\n✅ Job Search Parser completed successfully!");
    // Construct output data for return
    const outputData = {
      metadata: {
        extractedAt: new Date().toISOString(),
        parser: "job-search-parser",
        version: "2.0.0",
        sites: sites,
        keywords: keywords.join(", "),
        locationFilter,
        aiAnalysisEnabled: ENABLE_AI_ANALYSIS,
        aiContext: ENABLE_AI_ANALYSIS ? AI_CONTEXT : undefined,
        aiModel: ENABLE_AI_ANALYSIS ? OLLAMA_MODEL : undefined,
        analysisResults: analysisResults,
        rejectedJobsExcluded: excludeRejected,
        isComplete: true,
        lastUpdated: new Date().toISOString(),
      },
      results: allResults,
      siteResults: siteResults,
    };
    if (!excludeRejected) {
      outputData.rejectedResults = allRejectedResults;
    }
    return outputData;
  } catch (error) {
    logger.error(`❌ Job Search Parser failed: ${error.message}`);
--- a/job-search-parser/parsers/skipthedrive.js
+++ b/job-search-parser/parsers/skipthedrive.js
@ -1,345 +1,332 @@
-/**
+/**
- * SkipTheDrive Job Parser
+ * SkipTheDrive Job Parser
- *
+ *
- * Parses remote job listings from SkipTheDrive.com
+ * Parses remote job listings from SkipTheDrive.com
- * Supports keyword search, job type filters, and pagination
+ * Supports keyword search, job type filters, and pagination
- */
+ */
-
+
-const { chromium } = require("playwright");
+const { chromium } = require("playwright");
-const path = require("path");
+const path = require("path");
-
+
-// Import from ai-analyzer core package
+// Import from ai-analyzer core package
-const {
+const {
-  logger,
+  logger,
-  cleanText,
+  cleanText,
-  containsAnyKeyword,
+  containsAnyKeyword,
-  containsAllKeywords,
+  parseLocationFilters,
-  parseLocationFilters,
+  validateLocationAgainstFilters,
-  validateLocationAgainstFilters,
+  extractLocationFromProfile,
-  extractLocationFromProfile,
+  analyzeBatch,
-  analyzeBatch,
+  checkOllamaStatus,
-  checkOllamaStatus,
+} = require("../../ai-analyzer");
-} = require("../../ai-analyzer");
+
-
+/**
-/**
+ * Build search URL for SkipTheDrive
- * Build search URL for SkipTheDrive
+ * @param {string} keyword - Search keyword
- * @param {string} keyword - Search keyword
+ * @param {string} orderBy - Sort order (date, relevance)
- * @param {string} orderBy - Sort order (date, relevance)
+ * @param {Array<string>} jobTypes - Job types to filter (part time, full time, contract)
- * @param {Array<string>} jobTypes - Job types to filter (part time, full time, contract)
+ * @returns {string} - Formatted search URL
- * @returns {string} - Formatted search URL
+ */
- */
+function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
-function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
+  let url = `https://www.skipthedrive.com/?s=${encodeURIComponent(keyword)}`;
-  let url = `https://www.skipthedrive.com/?s=${encodeURIComponent(keyword)}`;
+
-
+  if (orderBy) {
-  if (orderBy) {
+    url += `&orderby=${orderBy}`;
-    url += `&orderby=${orderBy}`;
+  }
-  }
+
-
+  // Add job type filters
-  // Add job type filters
+  jobTypes.forEach((type) => {
-  jobTypes.forEach((type) => {
+    url += `&jobtype=${encodeURIComponent(type)}`;
-    url += `&jobtype=${encodeURIComponent(type)}`;
+  });
-  });
+
-
+  return url;
-  return url;
+}
-}
+
-
+/**
-/**
+ * Extract job data from a single job listing element
- * Extract job data from a single job listing element
+ * @param {Element} article - Job listing DOM element
- * @param {Element} article - Job listing DOM element
+ * @returns {Object} - Extracted job data
- * @returns {Object} - Extracted job data
+ */
- */
+async function extractJobData(article) {
-async function extractJobData(article) {
+  try {
-  try {
+    // Extract job title and URL
-    // Extract job title and URL
+    const titleElement = await article.$("h2.post-title a");
-    const titleElement = await article.$("h2.post-title a");
+    const title = titleElement ? await titleElement.textContent() : "";
-    const title = titleElement ? await titleElement.textContent() : "";
+    const jobUrl = titleElement ? await titleElement.getAttribute("href") : "";
-    const jobUrl = titleElement ? await titleElement.getAttribute("href") : "";
+
-
+    // Extract date
-    // Extract date
+    const dateElement = await article.$("time.post-date");
-    const dateElement = await article.$("time.post-date");
+    const datePosted = dateElement
-    const datePosted = dateElement
+      ? await dateElement.getAttribute("datetime")
-      ? await dateElement.getAttribute("datetime")
+      : "";
-      : "";
+    const dateText = dateElement ? await dateElement.textContent() : "";
-    const dateText = dateElement ? await dateElement.textContent() : "";
+
-
+    // Extract company name
-    // Extract company name
+    const companyElement = await article.$(
-    const companyElement = await article.$(
+      ".custom_fields_company_name_display_search_results"
-      ".custom_fields_company_name_display_search_results"
+    );
-    );
+    let company = companyElement ? await companyElement.textContent() : "";
-    let company = companyElement ? await companyElement.textContent() : "";
+    company = company.replace(/^\s*[^\s]+\s*/, "").trim(); // Remove icon
-    company = company.replace(/^\s*[^\s]+\s*/, "").trim(); // Remove icon
+
-
+    // Extract days ago
-    // Extract days ago
+    const daysAgoElement = await article.$(
-    const daysAgoElement = await article.$(
+      ".custom_fields_job_date_display_search_results"
-      ".custom_fields_job_date_display_search_results"
+    );
-    );
+    let daysAgo = daysAgoElement ? await daysAgoElement.textContent() : "";
-    let daysAgo = daysAgoElement ? await daysAgoElement.textContent() : "";
+    daysAgo = daysAgo.replace(/^\s*[^\s]+\s*/, "").trim(); // Remove icon
-    daysAgo = daysAgo.replace(/^\s*[^\s]+\s*/, "").trim(); // Remove icon
+
-
+    // Extract job description excerpt
-    // Extract job description excerpt
+    const excerptElement = await article.$(".excerpt_part");
-    const excerptElement = await article.$(".excerpt_part");
+    const description = excerptElement
-    const description = excerptElement
+      ? await excerptElement.textContent()
-      ? await excerptElement.textContent()
+      : "";
-      : "";
+
-
+    // Check if featured/sponsored
-    // Check if featured/sponsored
+    const featuredElement = await article.$(".custom_fields_sponsored_job");
-    const featuredElement = await article.$(".custom_fields_sponsored_job");
+    const isFeatured = !!featuredElement;
-    const isFeatured = !!featuredElement;
+
-
+    // Extract job ID from article ID
-    // Extract job ID from article ID
+    const articleId = await article.getAttribute("id");
-    const articleId = await article.getAttribute("id");
+    const jobId = articleId ? articleId.replace("post-", "") : "";
-    const jobId = articleId ? articleId.replace("post-", "") : "";
+
-
+    return {
-    return {
+      jobId,
-      jobId,
+      title: cleanText(title),
-      title: cleanText(title),
+      company: cleanText(company),
-      company: cleanText(company),
+      jobUrl,
-      jobUrl,
+      datePosted,
-      datePosted,
+      dateText: cleanText(dateText),
-      dateText: cleanText(dateText),
+      daysAgo: cleanText(daysAgo),
-      daysAgo: cleanText(daysAgo),
+      description: cleanText(description),
-      description: cleanText(description),
+      isFeatured,
-      isFeatured,
+      source: "skipthedrive",
-      source: "skipthedrive",
+      timestamp: new Date().toISOString(),
-      timestamp: new Date().toISOString(),
+    };
-    };
+  } catch (error) {
-  } catch (error) {
+    logger.error(`Error extracting job data: ${error.message}`);
-    logger.error(`Error extracting job data: ${error.message}`);
+    return null;
-    return null;
+  }
-  }
+}
-}
+
-
+/**
-/**
+ * Parse SkipTheDrive job listings
- * Parse SkipTheDrive job listings
+ * @param {Object} options - Parser options
- * @param {Object} options - Parser options
+ * @returns {Promise<Array>} - Array of parsed job listings
- * @returns {Promise<Array>} - Array of parsed job listings
+ */
- */
+async function parseSkipTheDrive(options = {}) {
-async function parseSkipTheDrive(options = {}) {
+  const {
-  const {
+    keywords = process.env.SEARCH_KEYWORDS?.split(",").map((k) => k.trim()) || [
-    keywords = process.env.SEARCH_KEYWORDS?.split(",").map((k) => k.trim()) || [
+      "software engineer",
-      "software engineer",
+      "developer",
-      "developer",
+    ],
-    ],
+    jobTypes = process.env.JOB_TYPES?.split(",").map((t) => t.trim()) || [],
-    jobTypes = process.env.JOB_TYPES?.split(",").map((t) => t.trim()) || [],
+    locationFilter = process.env.LOCATION_FILTER || "",
-    locationFilter = process.env.LOCATION_FILTER || "",
+    maxPages = parseInt(process.env.MAX_PAGES) || 5,
-    maxPages = parseInt(process.env.MAX_PAGES) || 5,
+    headless = process.env.HEADLESS !== "false",
-    headless = process.env.HEADLESS !== "false",
+    enableAI = process.env.ENABLE_AI_ANALYSIS === "true",
-    enableAI = process.env.ENABLE_AI_ANALYSIS === "true",
+    aiContext = process.env.AI_CONTEXT || "remote job opportunities analysis",
-    aiContext = process.env.AI_CONTEXT || "remote job opportunities analysis",
+  } = options;
-    useAndLogic = false, // Use AND logic instead of OR logic for keywords
+
-  } = options;
+  logger.step("Starting SkipTheDrive parser...");
-
+  logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
-  logger.step("Starting SkipTheDrive parser...");
+  logger.info(
-  logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
+    `📋 Job Types: ${jobTypes.length > 0 ? jobTypes.join(", ") : "All"}`
-  logger.info(`🔗 Keyword Logic: ${useAndLogic ? "AND (all keywords must match)" : "OR (any keyword matches)"}`);
+  );
-  logger.info(
+  logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
-    `📋 Job Types: ${jobTypes.length > 0 ? jobTypes.join(", ") : "All"}`
+  logger.info(`📄 Max Pages: ${maxPages}`);
-  );
+
-  logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
+  const browser = await chromium.launch({
-  logger.info(`📄 Max Pages: ${maxPages}`);
+    headless,
-
+    args: [
-  const browser = await chromium.launch({
+      "--no-sandbox",
-    headless,
+      "--disable-setuid-sandbox",
-    args: [
+      "--disable-dev-shm-usage",
-      "--no-sandbox",
+    ],
-      "--disable-setuid-sandbox",
+  });
-      "--disable-dev-shm-usage",
+
-    ],
+  const context = await browser.newContext({
-  });
+    userAgent:
-
+      "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
-  const context = await browser.newContext({
+  });
-    userAgent:
+
-      "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
+  const results = [];
-  });
+  const rejectedResults = [];
-
+  const seenJobs = new Set();
-  const results = [];
+
-  const rejectedResults = [];
+  try {
-  const seenJobs = new Set();
+    // Search for each keyword
-
+    for (const keyword of keywords) {
-  try {
+      logger.info(`\n🔍 Searching for: ${keyword}`);
-    // For AND logic, combine all keywords into a single search query
+
-    // For OR logic, search each keyword separately
+      const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
-    const searchKeywords = useAndLogic ? [keywords.join(" ")] : keywords;
+      const page = await context.newPage();
-
+
-    // Search for each keyword (or combined keyword for AND logic)
+      try {
-    for (const keyword of searchKeywords) {
+        logger.info(
-      logger.info(`\n🔍 Searching for: ${keyword}`);
+          `Attempting navigation to: ${searchUrl} at ${new Date().toISOString()}`
-
+        );
-      const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
+        await page.goto(searchUrl, {
-      const page = await context.newPage();
+          waitUntil: "domcontentloaded",
-
+          timeout: 30000,
-      try {
+        });
-        logger.info(
+        logger.info(
-          `Attempting navigation to: ${searchUrl} at ${new Date().toISOString()}`
+          `Navigation completed successfully at ${new Date().toISOString()}`
-        );
+        );
-        await page.goto(searchUrl, {
+
-          waitUntil: "domcontentloaded",
+        // Wait for job listings to load
-          timeout: 30000,
+        logger.info("Waiting for selector #loops-wrapper");
-        });
+        await page
-        logger.info(
+          .waitForSelector("#loops-wrapper", { timeout: 5000 })
-          `Navigation completed successfully at ${new Date().toISOString()}`
+          .catch(() => {
-        );
+            logger.warning(`No results found for keyword: ${keyword}`);
-
+          });
-        // Wait for job listings to load
+        logger.info("Selector wait completed");
-        logger.info("Waiting for selector #loops-wrapper");
+
-        await page
+        let currentPage = 1;
-          .waitForSelector("#loops-wrapper", { timeout: 5000 })
+        let hasNextPage = true;
-          .catch(() => {
+
-            logger.warning(`No results found for keyword: ${keyword}`);
+        while (hasNextPage && currentPage <= maxPages) {
-          });
+          logger.info(`📄 Processing page ${currentPage} for "${keyword}"`);
-        logger.info("Selector wait completed");
+
-
+          // Extract all job articles on current page
-        let currentPage = 1;
+          const jobArticles = await page.$$("article[id^='post-']");
-        let hasNextPage = true;
+          logger.info(
-
+            `Found ${jobArticles.length} job listings on page ${currentPage}`
-        while (hasNextPage && currentPage <= maxPages) {
+          );
-          logger.info(`📄 Processing page ${currentPage} for "${keyword}"`);
+
-
+          for (const article of jobArticles) {
-          // Extract all job articles on current page
+            const jobData = await extractJobData(article);
-          const jobArticles = await page.$$("article[id^='post-']");
+
-          logger.info(
+            if (!jobData || seenJobs.has(jobData.jobId)) {
-            `Found ${jobArticles.length} job listings on page ${currentPage}`
+              continue;
-          );
+            }
-
+
-          for (const article of jobArticles) {
+            seenJobs.add(jobData.jobId);
-            const jobData = await extractJobData(article);
+
-
+            // Add keyword that found this job
-            if (!jobData || seenJobs.has(jobData.jobId)) {
+            jobData.searchKeyword = keyword;
-              continue;
+
-            }
+            // Validate job against keywords
-
+            const fullText = `${jobData.title} ${jobData.description} ${jobData.company}`;
-            seenJobs.add(jobData.jobId);
+            if (!containsAnyKeyword(fullText, keywords)) {
-
+              rejectedResults.push({
-            // Add keyword that found this job
+                ...jobData,
-            jobData.searchKeyword = keyword;
+                rejected: true,
-
+                reason: "Keywords not found in job listing",
-            // Validate job against keywords
+              });
-            const fullText = `${jobData.title} ${jobData.description} ${jobData.company}`;
+              continue;
-            const keywordMatch = useAndLogic 
+            }
-              ? containsAllKeywords(fullText, keywords)
+
-              : containsAnyKeyword(fullText, keywords);
+            // Location validation (if enabled)
-            
+            if (locationFilter) {
-            if (!keywordMatch) {
+              const locationFilters = parseLocationFilters(locationFilter);
-              rejectedResults.push({
+              // For SkipTheDrive, most jobs are remote, but we can check the title/description
-                ...jobData,
+              const locationValid =
-                rejected: true,
+                fullText.toLowerCase().includes("remote") ||
-                reason: useAndLogic 
+                locationFilters.some((filter) =>
-                  ? "Not all keywords found in job listing" 
+                  fullText.toLowerCase().includes(filter.toLowerCase())
-                  : "Keywords not found in job listing",
+                );
-              });
+
-              continue;
+              if (!locationValid) {
-            }
+                rejectedResults.push({
-
+                  ...jobData,
-            // Location validation (if enabled)
+                  rejected: true,
-            if (locationFilter) {
+                  reason: "Location requirements not met",
-              const locationFilters = parseLocationFilters(locationFilter);
+                });
-              // For SkipTheDrive, most jobs are remote, but we can check the title/description
+                continue;
-              const locationValid =
+              }
-                fullText.toLowerCase().includes("remote") ||
+
-                locationFilters.some((filter) =>
+              jobData.locationValid = locationValid;
-                  fullText.toLowerCase().includes(filter.toLowerCase())
+            }
-                );
+
-
+            logger.success(`✅ Found: ${jobData.title} at ${jobData.company}`);
-              if (!locationValid) {
+            results.push(jobData);
-                rejectedResults.push({
+          }
-                  ...jobData,
+
-                  rejected: true,
+          // Check for next page
-                  reason: "Location requirements not met",
+          const nextPageLink = await page.$("a.nextp");
-                });
+          if (nextPageLink && currentPage < maxPages) {
-                continue;
+            logger.info("📄 Moving to next page...");
-              }
+            await nextPageLink.click();
-
+            await page.waitForLoadState("domcontentloaded");
-              jobData.locationValid = locationValid;
+            await page.waitForTimeout(2000); // Wait for content to load
-            }
+            currentPage++;
-
+          } else {
-            logger.success(`✅ Found: ${jobData.title} at ${jobData.company}`);
+            hasNextPage = false;
-            results.push(jobData);
+          }
-          }
+        }
-
+      } catch (error) {
-          // Check for next page
+        logger.error(`Error processing keyword "${keyword}": ${error.message}`);
-          const nextPageLink = await page.$("a.nextp");
+      } finally {
-          if (nextPageLink && currentPage < maxPages) {
+        await page.close();
-            logger.info("📄 Moving to next page...");
+      }
-            await nextPageLink.click();
+    }
-            await page.waitForLoadState("domcontentloaded");
+
-            await page.waitForTimeout(2000); // Wait for content to load
+    logger.success(`\n✅ Parsing complete!`);
-            currentPage++;
+    logger.info(`📊 Total jobs found: ${results.length}`);
-          } else {
+    logger.info(`❌ Rejected jobs: ${rejectedResults.length}`);
-            hasNextPage = false;
+
-          }
+    // Run AI analysis if enabled
-        }
+    let aiAnalysis = null;
-      } catch (error) {
+    if (enableAI && results.length > 0) {
-        logger.error(`Error processing keyword "${keyword}": ${error.message}`);
+      logger.step("Running AI analysis on job listings...");
-      } finally {
+
-        await page.close();
+      const aiAvailable = await checkOllamaStatus();
-      }
+      if (aiAvailable) {
-    }
+        const analysisData = results.map((job) => ({
-
+          text: `${job.title} at ${job.company}. ${job.description}`,
-    logger.success(`\n✅ Parsing complete!`);
+          metadata: {
-    logger.info(`📊 Total jobs found: ${results.length}`);
+            jobId: job.jobId,
-    logger.info(`❌ Rejected jobs: ${rejectedResults.length}`);
+            company: job.company,
-
+            daysAgo: job.daysAgo,
-    // Run AI analysis if enabled
+          },
-    let aiAnalysis = null;
+        }));
-    if (enableAI && results.length > 0) {
+
-      logger.step("Running AI analysis on job listings...");
+        aiAnalysis = await analyzeBatch(analysisData, aiContext);
-
+
-      const aiAvailable = await checkOllamaStatus();
+        // Merge AI analysis with results
-      if (aiAvailable) {
+        results.forEach((job, index) => {
-        const analysisData = results.map((job) => ({
+          if (aiAnalysis && aiAnalysis[index]) {
-          text: `${job.title} at ${job.company}. ${job.description}`,
+            job.aiAnalysis = {
-          metadata: {
+              isRelevant: aiAnalysis[index].isRelevant,
-            jobId: job.jobId,
+              confidence: aiAnalysis[index].confidence,
-            company: job.company,
+              reasoning: aiAnalysis[index].reasoning,
-            daysAgo: job.daysAgo,
+            };
-          },
+          }
-        }));
+        });
-
+
-        aiAnalysis = await analyzeBatch(analysisData, aiContext);
+        logger.success("✅ AI analysis completed");
-
+      } else {
-        // Merge AI analysis with results
+        logger.warning("⚠️ AI not available - skipping analysis");
-        results.forEach((job, index) => {
+      }
-          if (aiAnalysis && aiAnalysis[index]) {
+    }
-            job.aiAnalysis = {
+
-              isRelevant: aiAnalysis[index].isRelevant,
+    return {
-              confidence: aiAnalysis[index].confidence,
+      results,
-              reasoning: aiAnalysis[index].reasoning,
+      rejectedResults,
-            };
+      metadata: {
-          }
+        source: "skipthedrive",
-        });
+        totalJobs: results.length,
-
+        rejectedJobs: rejectedResults.length,
-        logger.success("✅ AI analysis completed");
+        keywords: keywords,
-      } else {
+        jobTypes: jobTypes,
-        logger.warning("⚠️ AI not available - skipping analysis");
+        locationFilter: locationFilter,
-      }
+        aiAnalysisEnabled: enableAI,
-    }
+        aiAnalysisCompleted: !!aiAnalysis,
-
+        timestamp: new Date().toISOString(),
-    return {
+      },
-      results,
+    };
-      rejectedResults,
+  } catch (error) {
-      metadata: {
+    logger.error(`Fatal error in SkipTheDrive parser: ${error.message}`);
-        source: "skipthedrive",
+    throw error;
-        totalJobs: results.length,
+  } finally {
-        rejectedJobs: rejectedResults.length,
+    await browser.close();
-        keywords: keywords,
+  }
-        jobTypes: jobTypes,
+}
-        locationFilter: locationFilter,
+
-        aiAnalysisEnabled: enableAI,
+// Export the parser
-        aiAnalysisCompleted: !!aiAnalysis,
+module.exports = {
-        timestamp: new Date().toISOString(),
+  parseSkipTheDrive,
-      },
+  buildSearchUrl,
-    };
+  extractJobData,
-  } catch (error) {
+};
    logger.error(`Fatal error in SkipTheDrive parser: ${error.message}`);
    throw error;
  } finally {
    await browser.close();
  }
 }
 // Export the parser
 module.exports = {
  parseSkipTheDrive,
  buildSearchUrl,
  extractJobData,
 };
--- a/job-search-parser/src/csv-utils.js
+++ b/job-search-parser/src/csv-utils.js
@ -1,116 +0,0 @@
 /**
 * CSV Utilities
 * 
 * Functions for converting job search results to CSV format
 */
 /**
 * Escapes a CSV field value
 * @param {string} value - The value to escape
 * @returns {string} - The escaped value
 */
 function escapeCsvField(value) {
  if (value === null || value === undefined) {
    return "";
  }
  const stringValue = String(value);
  // If the value contains comma, newline, or double quote, wrap it in quotes and escape quotes
  if (stringValue.includes(",") || stringValue.includes("\n") || stringValue.includes('"')) {
    return `"${stringValue.replace(/"/g, '""')}"`;
  }
  return stringValue;
 }
 /**
 * Converts job results to CSV format
 * @param {Array} jobs - Array of job objects
 * @param {Object} metadata - Metadata object (optional)
 * @returns {string} - CSV string
 */
 function convertJobsToCsv(jobs, metadata = null) {
  if (!jobs || jobs.length === 0) {
    return "";
  }
  // Define CSV columns based on job object structure
  const columns = [
    "jobId",
    "title",
    "company",
    "location",
    "jobUrl",
    "postedDate",
    "description",
    "roleDuties",
    "jobRequirements",
    "jobType",
    "experienceLevel",
    "keyword",
    "extractedAt",
    "source",
    "aiRelevant",
    "aiConfidence",
    "aiReasoning",
    "aiContext",
    "aiModel",
    "aiAnalyzedAt"
  ];
  // Create header row
  const headerRow = columns.map(col => escapeCsvField(col)).join(",");
  // Create data rows
  const dataRows = jobs.map(job => {
    const row = columns.map(col => {
      if (col.startsWith("ai")) {
        // Handle AI analysis fields
        const aiField = col.substring(2).charAt(0).toLowerCase() + col.substring(3);
        if (job.aiAnalysis) {
          if (aiField === "relevant") {
            return escapeCsvField(job.aiAnalysis.isRelevant ? "Yes" : "No");
          } else if (aiField === "confidence") {
            return escapeCsvField(job.aiAnalysis.confidence || "");
          } else if (aiField === "reasoning") {
            return escapeCsvField(job.aiAnalysis.reasoning || "");
          } else if (aiField === "context") {
            return escapeCsvField(job.aiAnalysis.context || "");
          } else if (aiField === "model") {
            return escapeCsvField(job.aiAnalysis.model || "");
          } else if (aiField === "analyzedAt") {
            return escapeCsvField(job.aiAnalysis.analyzedAt || "");
          }
        }
        return "";
      } else {
        return escapeCsvField(job[col] || "");
      }
    });
    return row.join(",");
  });
  // Combine header and data rows
  return [headerRow, ...dataRows].join("\n");
 }
 /**
 * Converts full results object (with metadata) to CSV
 * @param {Object} resultsData - Full results object with metadata, results, etc.
 * @returns {string} - CSV string
 */
 function convertResultsToCsv(resultsData) {
  if (!resultsData || !resultsData.results) {
    return "";
  }
  return convertJobsToCsv(resultsData.results, resultsData.metadata);
 }
 module.exports = {
  convertJobsToCsv,
  convertResultsToCsv,
  escapeCsvField,
 };
--- a/job-search-parser/strategies/indeed-strategy.js
+++ b/job-search-parser/strategies/indeed-strategy.js
@ -1,947 +0,0 @@
 /**
 * Indeed Parsing Strategy
 *
 * Uses core-parser for browser management and ai-analyzer for utilities
 */
 const {
  logger,
  cleanText,
  containsAnyKeyword,
  containsAllKeywords,
  matchesKeywordGroups,
  validateLocationAgainstFilters,
 } = require("ai-analyzer");
 /**
 * Indeed URL builder
 */
 function buildSearchUrl(keyword, location = "", filters = {}) {
  const baseUrl = "https://www.indeed.com/jobs";
  const params = new URLSearchParams({
    q: keyword,
    sort: "date", // Sort by date (newest first)
  });
  if (location) {
    params.append("l", location);
  }
  // Add date filter if provided
  if (filters.fromage) {
    // fromage is in days (e.g., 1 = last 24 hours, 7 = last 7 days, 30 = last 30 days)
    params.append("fromage", filters.fromage);
  }
  // Add job type filter
  if (filters.jobType) {
    // jt=fulltime, parttime, contract, internship, temporary
    params.append("jt", filters.jobType);
  }
  // Add remote filter
  if (filters.remote) {
    params.append("remote", "true");
  }
  // Add experience level filter
  if (filters.experienceLevel) {
    // explvl=entry_level, mid_level, senior_level
    params.append("explvl", filters.experienceLevel);
  }
  return `${baseUrl}?${params.toString()}`;
 }
 /**
 * Indeed parsing strategy function
 */
 async function indeedStrategy(coreParser, options = {}) {
  const {
    keywords = ["software engineer", "developer"],
    keywordGroups = null, // Array of keyword groups for grouped AND/OR logic
    locationFilter = null,
    maxPages = 5,
    location = "", // Indeed location search (e.g., "Toronto, ON", "Canada")
    minDate = null, // Minimum posted date (format: YYYY-MM-DD)
    useAndLogic = false, // Use AND logic instead of OR logic for keywords
  } = options;
  const results = [];
  const rejectedResults = [];
  const seenJobs = new Set();
  try {
    // Create main page
    const page = await coreParser.createPage("indeed-main");
    logger.info("🚀 Starting Indeed parser...");
    logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
    if (keywordGroups) {
      logger.info(`🔗 Keyword Logic: Grouped AND/OR - ${keywordGroups.map(g => `(${g.join(' OR ')})`).join(' AND ')}`);
    } else {
      logger.info(`🔗 Keyword Logic: ${useAndLogic ? "AND (all keywords must match)" : "OR (any keyword matches)"}`);
    }
    logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
    logger.info(`🌍 Indeed Location: ${location || "None"}`);
    logger.info(`📄 Max Pages: ${maxPages}`);
    // Convert minDate to fromage (days ago)
    let fromage = null;
    if (minDate) {
      try {
        const minDateObj = new Date(minDate);
        const now = new Date();
        const daysDiff = Math.floor((now - minDateObj) / (1000 * 60 * 60 * 24));
        if (daysDiff > 0 && daysDiff <= 30) {
          fromage = daysDiff;
          logger.info(`📅 Min Date Filter: ${minDate} (${fromage} days ago)`);
        } else if (daysDiff > 30) {
          fromage = 30; // Indeed's maximum is typically 30 days
          logger.info(`📅 Min Date Filter: ${minDate} (limited to 30 days)`);
        }
      } catch (error) {
        logger.warning(`⚠️  Invalid date format for minDate: ${minDate}. Expected format: YYYY-MM-DD`);
      }
    }
    // Determine search keywords based on logic type
    let searchKeywords;
    if (keywordGroups) {
      // For grouped AND/OR logic, search each keyword in each group (OR within groups)
      searchKeywords = keywordGroups.flat(); // Flatten all keywords from all groups
    } else if (useAndLogic) {
      // For simple AND logic, combine all keywords into a single search query
      searchKeywords = [keywords.join(" ")];
    } else {
      // For OR logic, search each keyword separately
      searchKeywords = keywords;
    }
    // Search for each keyword (or combined keyword for AND logic)
    for (const keyword of searchKeywords) {
      logger.info(`\n🔍 Searching Indeed for: "${keyword}"`);
      const searchUrl = buildSearchUrl(keyword, location, {
        fromage: fromage,
      });
      logger.info(`🔗 Search URL: ${searchUrl}`);
      try {
        // Navigate to job search results
        // Use domcontentloaded instead of networkidle for faster loading
        // Indeed can be slow to fully load, so we'll wait for DOM and then check for content
        try {
          await coreParser.navigateTo(searchUrl, {
            pageId: "indeed-main",
            retries: 2,
            waitUntil: "domcontentloaded",
            timeout: 60000, // Increase timeout to 60 seconds
          });
        } catch (navError) {
          // If navigation fails, try with load event instead
          logger.warning(`⚠️  Initial navigation failed, trying with 'load' event: ${navError.message}`);
          try {
            await coreParser.navigateTo(searchUrl, {
              pageId: "indeed-main",
              retries: 1,
              waitUntil: "load",
              timeout: 60000,
            });
          } catch (loadError) {
            // Last resort: try direct page navigation
            logger.warning(`⚠️  Load event failed, trying direct navigation: ${loadError.message}`);
            await page.goto(searchUrl, { timeout: 60000, waitUntil: "domcontentloaded" }).catch(() => {
              throw new Error(`Failed to navigate to Indeed after all attempts: ${loadError.message}`);
            });
          }
        }
        // Wait for page to load and let JavaScript execute
        await new Promise((resolve) => setTimeout(resolve, 5000));
        // Check if we're on the right page
        const currentUrl = page.url();
        logger.info(`📍 Current page URL: ${currentUrl}`);
        // Check if we were redirected or blocked (check URL first)
        if (currentUrl.includes('captcha') || currentUrl.includes('blocked') || currentUrl.includes('access-denied') || currentUrl.includes('verify')) {
          logger.error(`❌ Indeed appears to be blocking access. URL: ${currentUrl}`);
          throw new Error('Indeed is showing a CAPTCHA or verification page. Please try running in non-headless mode (set HEADLESS=false in .env) or wait and try again later.');
        }
        // Check page content for CAPTCHA/human verification indicators
        try {
          const pageContent = await page.evaluate(() => {
            const bodyText = document.body?.textContent?.toLowerCase() || '';
            const title = document.title?.toLowerCase() || '';
            // Check for common CAPTCHA/verification indicators
            const captchaIndicators = [
              'verify you\'re human',
              'verify you are human',
              'captcha',
              'prove you\'re not a robot',
              'unusual traffic',
              'automated queries',
              'please verify',
              'security check',
              'access denied',
              'blocked',
            ];
            const foundIndicators = captchaIndicators.filter(indicator => 
              bodyText.includes(indicator) || title.includes(indicator)
            );
            return {
              hasCaptcha: foundIndicators.length > 0,
              indicators: foundIndicators,
              title: document.title,
              bodyPreview: bodyText.substring(0, 500),
            };
          });
          if (pageContent.hasCaptcha) {
            logger.error(`❌ Indeed is showing a CAPTCHA/verification page.`);
            logger.error(`   Detected indicators: ${pageContent.indicators.join(', ')}`);
            logger.error(`   Page title: ${pageContent.title}`);
            logger.error(`\n💡 Solutions:`);
            logger.error(`   1. Run in non-headless mode: Set HEADLESS=false in .env file`);
            logger.error(`   2. Wait a few minutes and try again`);
            logger.error(`   3. Use a different IP address or VPN`);
            logger.error(`   4. Manually solve the CAPTCHA in a browser, then try again`);
            throw new Error(`Indeed CAPTCHA detected: ${pageContent.indicators.join(', ')}. Please see suggestions above.`);
          }
        } catch (checkError) {
          // If the check itself fails, log but don't throw (might be a different error)
          if (checkError.message.includes('CAPTCHA')) {
            throw checkError; // Re-throw CAPTCHA errors
          }
          logger.debug(`Could not check for CAPTCHA: ${checkError.message}`);
        }
        // Check for results count
        try {
          const resultsText = await page.evaluate(() => {
            const countElement = document.querySelector(".jobsearch-JobCountAndSortPane-jobCount");
            return countElement ? countElement.textContent : "No results count found";
          });
          logger.info(`📊 Indeed results info: ${resultsText}`);
        } catch (e) {
          logger.debug(`Could not get results count: ${e.message}`);
        }
        // Wait for job listings container
        let hasResults = false;
        const possibleSelectors = [
          "#mosaic-provider-jobcards",
          ".job_seen_beacon",
          "[data-jk]",
          ".jobsearch-SerpJobCard",
          ".jobCard",
        ];
        for (const selector of possibleSelectors) {
          try {
            await page.waitForSelector(selector, { timeout: 5000 });
            const count = await page.$$(selector).then((elements) => elements.length);
            if (count > 0) {
              hasResults = true;
              logger.info(`✅ Found job results container with selector: ${selector} (${count} jobs)`);
              break;
            }
          } catch (e) {
            // Try next selector
            continue;
          }
        }
        if (!hasResults) {
          logger.warning(`⚠️  No job results container found for keyword: ${keyword}`);
          continue;
        }
        // Process multiple pages
        let currentPage = 1;
        const maxPagesToProcess = maxPages > 0 ? maxPages : 999; // 0 means unlimited
        logger.info(`📄 Processing pages (max: ${maxPagesToProcess === 999 ? 'unlimited' : maxPagesToProcess}) for "${keyword}"...`);
        while (currentPage <= maxPagesToProcess) {
          logger.info(`📄 Processing page ${currentPage}...`);
          // Wait for page to fully load
          await new Promise((resolve) => setTimeout(resolve, 2000));
          // Extract jobs from current page
          const pageJobs = await extractJobsFromPage(page, keyword, locationFilter);
          logger.info(`📋 Extracted ${pageJobs.length} jobs from page ${currentPage}`);
          if (pageJobs.length === 0) {
            logger.warning(`⚠️  No jobs found on page ${currentPage}, stopping pagination`);
            break;
          }
          // Process each job
          for (const job of pageJobs) {
            // Skip duplicates
            if (seenJobs.has(job.jobId)) {
              continue;
            }
            seenJobs.add(job.jobId);
            // Validate keywords based on logic type
            if (keywordGroups) {
              // Grouped AND/OR logic: all groups must match (AND), at least one keyword per group (OR)
              const fullText = `${job.title} ${job.description} ${job.company}`;
              if (!matchesKeywordGroups(fullText, keywordGroups)) {
                rejectedResults.push({
                  ...job,
                  rejectionReason: "Job does not match all keyword groups",
                });
                continue;
              }
            } else if (useAndLogic) {
              // Simple AND logic: all keywords must match
              const fullText = `${job.title} ${job.description} ${job.company}`.toLowerCase();
              if (!containsAllKeywords(fullText, keywords)) {
                rejectedResults.push({
                  ...job,
                  rejectionReason: "Not all keywords found in job listing",
                });
                continue;
              }
            }
            // Validate location if filtering enabled
            if (locationFilter) {
              const locationValid = validateLocationAgainstFilters(
                job.location,
                locationFilter
              );
              if (!locationValid.isValid) {
                rejectedResults.push({
                  ...job,
                  rejectionReason: locationValid.reasoning || "Location filter mismatch",
                });
                continue;
              }
            }
            results.push(job);
          }
          // Check if there's a next page
          const hasNext = await hasNextPageAvailable(page);
          if (!hasNext) {
            logger.info(`✅ No more pages available. Total jobs extracted: ${results.length}`);
            break;
          }
          // Navigate to next page if we haven't reached maxPages
          if (currentPage < maxPagesToProcess) {
            logger.info(`➡️  Navigating to page ${currentPage + 1}...`);
            const navigationSuccess = await navigateToNextPage(page);
            if (!navigationSuccess) {
              logger.warning(`⚠️  Failed to navigate to next page, stopping pagination`);
              break;
            }
            currentPage++;
          } else {
            logger.info(`📊 Reached max pages limit (${maxPagesToProcess}). Total jobs extracted: ${results.length}`);
            break;
          }
        }
        const totalExtracted = results.length + rejectedResults.length;
        logger.info(`📋 Extracted ${results.length} accepted jobs, ${rejectedResults.length} rejected jobs (${totalExtracted} total) across ${currentPage} page(s) for "${keyword}"`);
      } catch (error) {
        logger.error(`Error processing keyword "${keyword}": ${error.message}`);
        logger.error(`Stack: ${error.stack}`);
      }
    }
    logger.info(
      `🎯 Indeed parsing completed: ${results.length} jobs found, ${rejectedResults.length} rejected`
    );
    return {
      results,
      rejectedResults,
      summary: {
        totalJobs: results.length,
        totalRejected: rejectedResults.length,
        keywords: keywords.join(", "),
        locationFilter,
        source: "indeed",
      },
    };
  } catch (error) {
    logger.error(`❌ Indeed parsing failed: ${error.message}`);
    logger.error(`Stack: ${error.stack}`);
    return {
      results,
      rejectedResults,
      summary: {
        totalJobs: results.length,
        totalRejected: rejectedResults.length,
        keywords: keywords.join(", "),
        locationFilter,
        source: "indeed",
        error: error.message,
      },
    };
  }
 }
 /**
 * Extract jobs from current page
 */
 async function extractJobsFromPage(page, keyword, locationFilter) {
  const jobs = [];
  try {
    // Indeed job listings are typically in divs with data-jk attribute (job key)
    const jobSelectors = [
      "[data-jk]",
      ".job_seen_beacon",
      ".jobsearch-SerpJobCard",
      ".jobCard",
      "div[data-testid='job-card']",
    ];
    let jobElements = [];
    for (const selector of jobSelectors) {
      try {
        await page.waitForSelector(selector, { timeout: 3000 }).catch(() => {});
        const elements = await page.$$(selector);
        if (elements.length > 0) {
          jobElements = elements;
          logger.info(`✅ Found ${jobElements.length} job elements using selector: ${selector}`);
          break;
        }
      } catch (e) {
        // Try next selector
        continue;
      }
    }
    if (jobElements.length === 0) {
      logger.warning(`⚠️  No job elements found with any selector`);
      return jobs;
    }
    for (const jobElement of jobElements) {
      try {
        // Try to scroll job into view, but don't fail if it times out
        // Some elements might be in hidden containers or lazy-loaded
        try {
          await Promise.race([
            jobElement.scrollIntoViewIfNeeded(),
            new Promise((_, reject) => 
              setTimeout(() => reject(new Error('Scroll timeout')), 2000)
            )
          ]);
          await new Promise((resolve) => setTimeout(resolve, 100));
        } catch (scrollError) {
          // If scrolling fails, try a simpler scroll approach
          try {
            await jobElement.evaluate((el) => {
              el.scrollIntoView({ behavior: 'auto', block: 'center' });
            });
            await new Promise((resolve) => setTimeout(resolve, 100));
          } catch (simpleScrollError) {
            // If even simple scroll fails, continue anyway - we can still extract data
            logger.debug(`Could not scroll element into view, continuing anyway: ${simpleScrollError.message}`);
          }
        }
        const job = await extractJobData(jobElement, keyword);
        if (job && (job.title || job.jobId)) {
          jobs.push(job);
        }
      } catch (error) {
        logger.warning(`Failed to extract job data: ${error.message}`);
      }
    }
  } catch (error) {
    logger.error(`Failed to extract jobs from page: ${error.message}`);
  }
  return jobs;
 }
 /**
 * Extract data from individual job element
 */
 async function extractJobData(jobElement, keyword) {
  try {
    const jobData = await jobElement.evaluate((el) => {
      const data = {
        jobId: "",
        title: "",
        company: "",
        location: "",
        jobUrl: "",
        postedDate: "",
        description: "",
        salary: "",
        jobType: "",
      };
      // Extract job ID from data-jk attribute
      data.jobId = el.getAttribute("data-jk") || "";
      // Extract title and URL
      const titleSelectors = [
        "h2.jobTitle a",
        "h2.jobTitle",
        "a[data-jk]",
        "h2 a",
        ".jobTitle a",
        "[class*='jobTitle'] a",
      ];
      for (const selector of titleSelectors) {
        const titleElement = el.querySelector(selector);
        if (titleElement) {
          data.title = titleElement.textContent?.trim() || titleElement.innerText?.trim() || "";
          if (titleElement.tagName === "A") {
            data.jobUrl = titleElement.getAttribute("href") || "";
          } else {
            const link = titleElement.querySelector("a");
            if (link) {
              data.jobUrl = link.getAttribute("href") || "";
            }
          }
          if (data.title) break;
        }
      }
      // Extract company name
      const companySelectors = [
        "[data-testid='company-name']",
        ".companyName",
        "[class*='companyName']",
        "span.companyName",
        "a[data-testid='company-name']",
      ];
      for (const selector of companySelectors) {
        const companyElement = el.querySelector(selector);
        if (companyElement) {
          const text = companyElement.textContent?.trim() || companyElement.innerText?.trim() || "";
          if (text && text.length > 0) {
            data.company = text;
            break;
          }
        }
      }
      // Extract location
      const locationSelectors = [
        "[data-testid='job-location']",
        ".companyLocation",
        "[class*='companyLocation']",
        "[class*='location']",
      ];
      for (const selector of locationSelectors) {
        const locationElement = el.querySelector(selector);
        if (locationElement) {
          const text = locationElement.textContent?.trim() || locationElement.innerText?.trim() || "";
          if (text && text.length > 0) {
            data.location = text;
            break;
          }
        }
      }
      // Extract salary
      const salarySelectors = [
        "[data-testid='attribute_snippet_testid']",
        ".salary-snippet",
        "[class*='salary']",
        ".salaryText",
      ];
      for (const selector of salarySelectors) {
        const salaryElement = el.querySelector(selector);
        if (salaryElement) {
          const text = salaryElement.textContent?.trim() || salaryElement.innerText?.trim() || "";
          if (text && text.includes("$") || text.match(/\d+/)) {
            data.salary = text;
            break;
          }
        }
      }
      // Extract posted date
      const dateSelectors = [
        "[data-testid='myJobsStateDate']",
        ".date",
        "[class*='date']",
        "span.date",
      ];
      for (const selector of dateSelectors) {
        const dateElement = el.querySelector(selector);
        if (dateElement) {
          const text = dateElement.textContent?.trim() || dateElement.innerText?.trim() || "";
          if (text) {
            // Parse relative dates like "2 days ago", "Just posted", etc.
            const now = new Date();
            if (text.match(/just posted|today/i)) {
              data.postedDate = now.toISOString().split("T")[0];
            } else if (text.match(/\d+\s*(day|days)/i)) {
              const match = text.match(/(\d+)\s*day/i);
              if (match) {
                const daysAgo = parseInt(match[1]);
                const date = new Date(now);
                date.setDate(date.getDate() - daysAgo);
                data.postedDate = date.toISOString().split("T")[0];
              }
            } else {
              data.postedDate = text;
            }
            break;
          }
        }
      }
      // Extract description snippet
      const descSelectors = [
        ".job-snippet",
        "[class*='job-snippet']",
        "[class*='summary']",
        ".summary",
      ];
      for (const selector of descSelectors) {
        const descElement = el.querySelector(selector);
        if (descElement) {
          const text = descElement.textContent?.trim() || descElement.innerText?.trim() || "";
          if (text && text.length > 20) {
            data.description = text.substring(0, 500); // Limit description length
            break;
          }
        }
      }
      return data;
    });
    // Clean and format
    const title = cleanText(jobData.title);
    let jobUrl = jobData.jobUrl || "";
    // Make URL absolute if relative
    if (jobUrl && !jobUrl.startsWith("http")) {
      if (jobUrl.startsWith("/")) {
        jobUrl = `https://www.indeed.com${jobUrl}`;
      } else {
        jobUrl = `https://www.indeed.com/viewjob?jk=${jobData.jobId}`;
      }
    } else if (!jobUrl && jobData.jobId) {
      jobUrl = `https://www.indeed.com/viewjob?jk=${jobData.jobId}`;
    }
    // Generate job ID if not found
    const jobId = jobData.jobId || `indeed-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
    if (!jobId && !title) {
      return null;
    }
    return {
      jobId,
      title,
      company: cleanText(jobData.company),
      location: cleanText(jobData.location),
      jobUrl,
      postedDate: jobData.postedDate,
      description: cleanText(jobData.description),
      salary: cleanText(jobData.salary),
      jobType: jobData.jobType,
      keyword,
      extractedAt: new Date().toISOString(),
      source: "indeed",
    };
  } catch (error) {
    logger.warning(`Error extracting job data: ${error.message}`);
    return null;
  }
 }
 /**
 * Parse job description to separate role duties from job requirements
 */
 function parseDutiesAndRequirements(description) {
  if (!description || description.trim().length === 0) {
    return { duties: "", requirements: "" };
  }
  // Common section headers that indicate duties/responsibilities
  const dutiesKeywords = [
    /responsibilities?:/i,
    /duties?:/i,
    /what you['\u2019]ll do/i,
    /key responsibilities/i,
    /your role/i,
    /position overview/i,
    /about the role/i,
  ];
  // Common section headers that indicate requirements/qualifications
  const requirementsKeywords = [
    /requirements?:/i,
    /qualifications?:/i,
    /must have/i,
    /required:/i,
    /what you['\u2019]ll bring/i,
    /you have:/i,
    /skills required/i,
    /minimum requirements/i,
  ];
  // Split description into sections
  const sections = description.split(/\n\s*\n|\r\n\s*\r\n/).filter(s => s.trim().length > 0);
  let currentSection = "duties";
  let dutiesText = "";
  let requirementsText = "";
  for (const section of sections) {
    let isRequirementsSection = false;
    for (const keyword of requirementsKeywords) {
      if (keyword.test(section)) {
        isRequirementsSection = true;
        currentSection = "requirements";
        break;
      }
    }
    if (!isRequirementsSection) {
      for (const keyword of dutiesKeywords) {
        if (keyword.test(section)) {
          currentSection = "duties";
          break;
        }
      }
    }
    if (currentSection === "requirements") {
      requirementsText += (requirementsText ? "\n\n" : "") + section.trim();
    } else {
      dutiesText += (dutiesText ? "\n\n" : "") + section.trim();
    }
  }
  // Fallback: if we still have nothing separated, put first 60% in duties, rest in requirements
  if (!dutiesText && !requirementsText && description) {
    const midPoint = Math.floor(description.length * 0.6);
    dutiesText = description.substring(0, midPoint).trim();
    requirementsText = description.substring(midPoint).trim();
  }
  return {
    duties: dutiesText.trim(),
    requirements: requirementsText.trim(),
  };
 }
 /**
 * Check if next page is available
 */
 async function hasNextPageAvailable(page) {
  try {
    const nextButtonSelectors = [
      "a[aria-label='Next']",
      "a[aria-label='Next Page']",
      "a[data-testid='pagination-page-next']",
      "[data-testid='pagination-page-next']",
      "a[aria-label*='Next']",
    ];
    for (const selector of nextButtonSelectors) {
      try {
        const nextButton = await page.$(selector);
        if (nextButton) {
          const isDisabled = await nextButton.evaluate((el) => {
            return el.hasAttribute("disabled") ||
                   el.getAttribute("aria-disabled") === "true" ||
                   el.classList.contains("disabled");
          }).catch(() => false);
          if (!isDisabled) {
            return true;
          }
        }
      } catch (e) {
        continue;
      }
    }
    return false;
  } catch (error) {
    logger.debug(`Error checking for next page: ${error.message}`);
    return false;
  }
 }
 /**
 * Navigate to next page
 */
 async function navigateToNextPage(page) {
  try {
    const nextButtonSelectors = [
      "a[aria-label='Next']",
      "a[aria-label='Next Page']",
      "a[data-testid='pagination-page-next']",
      "[data-testid='pagination-page-next']",
      "a[aria-label*='Next']",
    ];
    for (const selector of nextButtonSelectors) {
      try {
        const nextButton = await page.$(selector);
        if (nextButton) {
          const isDisabled = await nextButton.evaluate((el) => {
            return el.hasAttribute("disabled") ||
                   el.getAttribute("aria-disabled") === "true" ||
                   el.classList.contains("disabled");
          }).catch(() => false);
          if (!isDisabled) {
            // Get current URL before navigation
            const urlBefore = page.url();
            await nextButton.scrollIntoViewIfNeeded().catch(() => {});
            await new Promise((resolve) => setTimeout(resolve, 500));
            await nextButton.click();
            logger.info(`✅ Clicked next page button`);
            // Wait for navigation to complete (URL change or content load)
            // Indeed might use AJAX, so wait for either URL change or content update
            let navigationComplete = false;
            const maxWaitTime = 10000; // 10 seconds max wait
            const startTime = Date.now();
            while (!navigationComplete && (Date.now() - startTime) < maxWaitTime) {
              await new Promise((resolve) => setTimeout(resolve, 500));
              // Check if URL changed (full page navigation)
              const currentUrl = page.url();
              if (currentUrl !== urlBefore) {
                logger.info(`📍 URL changed to: ${currentUrl}`);
                navigationComplete = true;
                break;
              }
              // Check if job elements appeared (AJAX navigation)
              const jobCount = await page.$$eval(
                "[data-jk], .job_seen_beacon, .jobsearch-SerpJobCard, .jobCard",
                (elements) => elements.length
              ).catch(() => 0);
              if (jobCount > 0) {
                logger.info(`✅ Found ${jobCount} job elements (AJAX navigation)`);
                navigationComplete = true;
                break;
              }
            }
            // Additional wait for content to stabilize
            await new Promise((resolve) => setTimeout(resolve, 2000));
            // Check for CAPTCHA after navigation
            const currentUrl = page.url();
            if (currentUrl.includes('captcha') || currentUrl.includes('verify') || currentUrl.includes('blocked')) {
              logger.error(`❌ CAPTCHA detected after navigation to page. URL: ${currentUrl}`);
              throw new Error('Indeed is showing a CAPTCHA. Please run in non-headless mode (HEADLESS=false) or wait and try again.');
            }
            // Check page content for CAPTCHA
            try {
              const hasCaptcha = await page.evaluate(() => {
                const bodyText = document.body?.textContent?.toLowerCase() || '';
                const indicators = ['verify you\'re human', 'captcha', 'unusual traffic', 'automated queries'];
                return indicators.some(ind => bodyText.includes(ind));
              });
              if (hasCaptcha) {
                logger.error(`❌ CAPTCHA detected on page content after navigation`);
                throw new Error('Indeed CAPTCHA detected. Please run in non-headless mode (HEADLESS=false) to solve it manually.');
              }
            } catch (captchaError) {
              if (captchaError.message.includes('CAPTCHA')) {
                throw captchaError;
              }
            }
            // Scroll page to trigger any lazy loading
            try {
              await page.evaluate(() => {
                window.scrollTo(0, 300);
              });
              await new Promise((resolve) => setTimeout(resolve, 1000));
            } catch (e) {
              // Ignore scroll errors
            }
            // Final check for job elements with multiple selectors
            const finalJobCount = await page.$$eval(
              "[data-jk], .job_seen_beacon, .jobsearch-SerpJobCard, .jobCard, div[data-testid='job-card']",
              (elements) => elements.length
            ).catch(() => 0);
            if (finalJobCount > 0) {
              logger.info(`✅ Navigation successful, found ${finalJobCount} job elements`);
              return true;
            } else {
              logger.warning(`⚠️  No job elements found after navigation (waited ${maxWaitTime}ms)`);
              // Debug: check what's on the page
              try {
                const pageTitle = await page.title();
                const pageUrl = page.url();
                logger.debug(`Page title: ${pageTitle}, URL: ${pageUrl}`);
                // Check if it's a CAPTCHA page
                const bodyText = await page.evaluate(() => document.body?.textContent?.toLowerCase() || '');
                if (bodyText.includes('captcha') || bodyText.includes('verify')) {
                  logger.error(`❌ Page appears to be a CAPTCHA page`);
                  throw new Error('Indeed is showing a CAPTCHA. Please run in non-headless mode (HEADLESS=false).');
                }
              } catch (e) {
                if (e.message.includes('CAPTCHA')) {
                  throw e;
                }
                // Ignore other debug errors
              }
              return false;
            }
          }
        }
      } catch (e) {
        continue;
      }
    }
    logger.warning(`⚠️  Could not find or click next page button`);
    return false;
  } catch (error) {
    logger.warning(`Failed to navigate to next page: ${error.message}`);
    return false;
  }
 }
 module.exports = {
  indeedStrategy,
  buildSearchUrl,
 };
--- a/job-search-parser/strategies/linkedin-jobs-strategy.js
+++ b/job-search-parser/strategies/linkedin-jobs-strategy.js
--- a/job-search-parser/strategies/skipthedrive-strategy.js
+++ b/job-search-parser/strategies/skipthedrive-strategy.js
@ -1,493 +1,302 @@
-/**
+/**
- * SkipTheDrive Parsing Strategy
+ * SkipTheDrive Parsing Strategy
- *
+ *
- * Uses core-parser for browser management and ai-analyzer for utilities
+ * Uses core-parser for browser management and ai-analyzer for utilities
- */
+ */
-
+
-const {
+const {
-  logger,
+  logger,
-  cleanText,
+  cleanText,
-  containsAnyKeyword,
+  containsAnyKeyword,
-  containsAllKeywords,
+  validateLocationAgainstFilters,
-  matchesKeywordGroups,
+} = require("ai-analyzer");
-  validateLocationAgainstFilters,
+
-} = require("ai-analyzer");
+/**
-
+ * SkipTheDrive URL builder
-/**
+ */
- * SkipTheDrive URL builder
+function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
- */
+  const baseUrl = "https://www.skipthedrive.com/";
-function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
+  const params = new URLSearchParams({
-  const baseUrl = "https://www.skipthedrive.com/";
+    s: keyword,
-  const params = new URLSearchParams({
+    orderby: orderBy,
-    s: keyword,
+  });
-    orderby: orderBy,
+
-  });
+  if (jobTypes && jobTypes.length > 0) {
-
+    params.append("job_type", jobTypes.join(","));
-  if (jobTypes && jobTypes.length > 0) {
+  }
-    params.append("job_type", jobTypes.join(","));
+
-  }
+  return `${baseUrl}?${params.toString()}`;
-
+}
-  return `${baseUrl}?${params.toString()}`;
+
-}
+/**
-
+ * SkipTheDrive parsing strategy function
-/**
+ */
- * SkipTheDrive parsing strategy function
+async function skipthedriveStrategy(coreParser, options = {}) {
- */
+  const {
-async function skipthedriveStrategy(coreParser, options = {}) {
+    keywords = ["software engineer", "developer", "programmer"],
-  const {
+    locationFilter = null,
-    keywords = ["software engineer", "developer", "programmer"],
+    maxPages = 5,
-    keywordGroups = null, // Array of keyword groups for grouped AND/OR logic
+    jobTypes = [],
-    locationFilter = null,
+  } = options;
-    maxPages = 5,
+
-    jobTypes = [],
+  const results = [];
-    useAndLogic = false, // Use AND logic instead of OR logic for keywords
+  const rejectedResults = [];
-  } = options;
+  const seenJobs = new Set();
-
+
-  const results = [];
+  try {
-  const rejectedResults = [];
+    // Create main page
-  const seenJobs = new Set();
+    const page = await coreParser.createPage("skipthedrive-main");
-
+
-  try {
+    logger.info("🚀 Starting SkipTheDrive parser...");
-    // Create main page
+    logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
-    const page = await coreParser.createPage("skipthedrive-main");
+    logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
-
+    logger.info(`📄 Max Pages: ${maxPages}`);
-    logger.info("🚀 Starting SkipTheDrive parser...");
+
-    logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
+    // Search for each keyword
-    if (keywordGroups) {
+    for (const keyword of keywords) {
-      logger.info(`🔗 Keyword Logic: Grouped AND/OR - ${keywordGroups.map(g => `(${g.join(' OR ')})`).join(' AND ')}`);
+      logger.info(`\n🔍 Searching for: ${keyword}`);
-    } else {
+
-      logger.info(`🔗 Keyword Logic: ${useAndLogic ? "AND (all keywords must match)" : "OR (any keyword matches)"}`);
+      const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
-    }
+
-    logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
+      try {
-    logger.info(`📄 Max Pages: ${maxPages}`);
+        // Navigate to search results
-
+        await coreParser.navigateTo(searchUrl, {
-    // Determine search keywords based on logic type
+          pageId: "skipthedrive-main",
-    let searchKeywords;
+          retries: 2,
-    if (keywordGroups) {
+          timeout: 30000,
-      // For grouped AND/OR logic, search each keyword in each group (OR within groups)
+        });
-      searchKeywords = keywordGroups.flat(); // Flatten all keywords from all groups
+
-    } else if (useAndLogic) {
+        // Wait for job listings to load
-      // For simple AND logic, combine all keywords into a single search query
+        const hasResults = await coreParser
-      searchKeywords = [keywords.join(" ")];
+          .waitForSelector(
-    } else {
+            "#loops-wrapper",
-      // For OR logic, search each keyword separately
+            {
-      searchKeywords = keywords;
+              timeout: 5000,
-    }
+            },
-
+            "skipthedrive-main"
-    // Search for each keyword (or combined keyword for AND logic)
+          )
-    for (const keyword of searchKeywords) {
+          .catch(() => {
-      logger.info(`\n🔍 Searching for: ${keyword}`);
+            logger.warning(`No results found for keyword: ${keyword}`);
-
+            return false;
-      const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
+          });
-
+
-      try {
+        if (!hasResults) {
-        // Navigate to search results
+          continue;
-        await coreParser.navigateTo(searchUrl, {
+        }
-          pageId: "skipthedrive-main",
+
-          retries: 2,
+        // Process multiple pages
-          timeout: 30000,
+        let currentPage = 1;
-        });
+        let hasNextPage = true;
-
+
-        // Wait for job listings to load
+        while (hasNextPage && currentPage <= maxPages) {
-        const hasResults = await page
+          logger.info(`📄 Processing page ${currentPage} for "${keyword}"`);
-          .waitForSelector("#loops-wrapper", {
+
-            timeout: 5000,
+          // Extract jobs from current page
-          })
+          const pageJobs = await extractJobsFromPage(
-          .then(() => true)
+            page,
-          .catch(() => {
+            keyword,
-            logger.warning(`No results found for keyword: ${keyword}`);
+            locationFilter
-            return false;
+          );
-          });
+
-
+          for (const job of pageJobs) {
-        if (!hasResults) {
+            // Skip duplicates
-          continue;
+            if (seenJobs.has(job.jobId)) continue;
-        }
+            seenJobs.add(job.jobId);
-
+
-        // Process multiple pages
+            // Validate location if filtering enabled
-        let currentPage = 1;
+            if (locationFilter) {
-        let hasNextPage = true;
+              const locationValid = validateLocationAgainstFilters(
-
+                job.location,
-        while (hasNextPage && currentPage <= maxPages) {
+                locationFilter
-          logger.info(`📄 Processing page ${currentPage} for "${keyword}"`);
+              );
-
+
-          // Extract jobs from current page
+              if (!locationValid) {
-          const pageJobs = await extractJobsFromPage(
+                rejectedResults.push({
-            page,
+                  ...job,
-            keyword,
+                  rejectionReason: "Location filter mismatch",
-            locationFilter,
+                });
-            keywords,
+                continue;
-            keywordGroups,
+              }
-            useAndLogic
+            }
-          );
+
-
+            results.push(job);
-          for (const job of pageJobs) {
+          }
-            // Skip duplicates
+
-            if (seenJobs.has(job.jobId)) continue;
+          // Check for next page
-            seenJobs.add(job.jobId);
+          hasNextPage = await hasNextPageAvailable(page);
-
+          if (hasNextPage && currentPage < maxPages) {
-            // Validate keywords based on logic type
+            await navigateToNextPage(page, currentPage + 1);
-            if (keywordGroups) {
+            currentPage++;
-              // Grouped AND/OR logic: all groups must match (AND), at least one keyword per group (OR)
+
-              const fullText = `${job.title} ${job.description} ${job.company}`;
+            // Wait for new page to load
-              if (!matchesKeywordGroups(fullText, keywordGroups)) {
+            await page.waitForTimeout(2000);
-                rejectedResults.push({
+          } else {
-                  ...job,
+            hasNextPage = false;
-                  rejectionReason: "Job does not match all keyword groups",
+          }
-                });
+        }
-                continue;
+      } catch (error) {
-              }
+        logger.error(`Error processing keyword "${keyword}": ${error.message}`);
-            } else if (useAndLogic) {
+      }
-              // Simple AND logic: all keywords must match
+    }
-              const fullText = `${job.title} ${job.description} ${job.company}`.toLowerCase();
+
-              if (!containsAllKeywords(fullText, keywords)) {
+    logger.info(
-                rejectedResults.push({
+      `🎯 SkipTheDrive parsing completed: ${results.length} jobs found, ${rejectedResults.length} rejected`
-                  ...job,
+    );
-                  rejectionReason: "Not all keywords found in job listing",
+
-                });
+    return {
-                continue;
+      results,
-              }
+      rejectedResults,
-            }
+      summary: {
-
+        totalJobs: results.length,
-            // Validate location if filtering enabled
+        totalRejected: rejectedResults.length,
-            if (locationFilter) {
+        keywords: keywords.join(", "),
-              const locationValid = validateLocationAgainstFilters(
+        locationFilter,
-                job.location,
+        source: "skipthedrive",
-                locationFilter
+      },
-              );
+    };
-
+  } catch (error) {
-              if (!locationValid) {
+    logger.error(`❌ SkipTheDrive parsing failed: ${error.message}`);
-                rejectedResults.push({
+    throw error;
-                  ...job,
+  }
-                  rejectionReason: "Location filter mismatch",
+}
-                });
+
-                continue;
+/**
-              }
+ * Extract jobs from current page
-            }
+ */
-
+async function extractJobsFromPage(page, keyword, locationFilter) {
-            results.push(job);
+  const jobs = [];
-          }
+
-
+  try {
-          // Check for next page
+    // Get all job article elements
-          hasNextPage = await hasNextPageAvailable(page);
+    const jobElements = await page.$$("article.job_listing");
-          if (hasNextPage && currentPage < maxPages) {
+
-            await navigateToNextPage(page, currentPage + 1);
+    for (const jobElement of jobElements) {
-            currentPage++;
+      try {
-
+        const job = await extractJobData(jobElement, keyword);
-            // Wait for new page to load
+        if (job) {
-            await page.waitForTimeout(2000);
+          jobs.push(job);
-          } else {
+        }
-            hasNextPage = false;
+      } catch (error) {
-          }
+        logger.warning(`Failed to extract job data: ${error.message}`);
-        }
+      }
-      } catch (error) {
+    }
-        logger.error(`Error processing keyword "${keyword}": ${error.message}`);
+  } catch (error) {
-      }
+    logger.error(`Failed to extract jobs from page: ${error.message}`);
-    }
+  }
-
+
-    logger.info(
+  return jobs;
-      `🎯 SkipTheDrive parsing completed: ${results.length} jobs found, ${rejectedResults.length} rejected`
+}
-    );
+
-
+/**
-    return {
+ * Extract data from individual job element
-      results,
+ */
-      rejectedResults,
+async function extractJobData(jobElement, keyword) {
-      summary: {
+  try {
-        totalJobs: results.length,
+    // Extract job ID
-        totalRejected: rejectedResults.length,
+    const articleId = (await jobElement.getAttribute("id")) || "";
-        keywords: keywords.join(", "),
+    const jobId = articleId ? articleId.replace("post-", "") : "";
-        locationFilter,
+
-        source: "skipthedrive",
+    // Extract title
-      },
+    const titleElement = await jobElement.$(".job_listing-title a");
-    };
+    const title = titleElement
-  } catch (error) {
+      ? cleanText(await titleElement.textContent())
-    logger.error(`❌ SkipTheDrive parsing failed: ${error.message}`);
+      : "";
-    throw error;
+    const jobUrl = titleElement ? await titleElement.getAttribute("href") : "";
-  }
+
-}
+    // Extract company
-
+    const companyElement = await jobElement.$(".company");
-/**
+    const company = companyElement
- * Extract jobs from current page
+      ? cleanText(await companyElement.textContent())
- */
+      : "";
-async function extractJobsFromPage(page, keyword, locationFilter, allKeywords = [], keywordGroups = null, useAndLogic = false) {
+
-  const jobs = [];
+    // Extract location
-
+    const locationElement = await jobElement.$(".location");
-  try {
+    const location = locationElement
-    // Get all job article elements
+      ? cleanText(await locationElement.textContent())
-    const jobElements = await page.$$("article.job_listing");
+      : "";
-
+
-    for (const jobElement of jobElements) {
+    // Extract date posted
-      try {
+    const dateElement = await jobElement.$(".job-date");
-        const job = await extractJobData(jobElement, keyword);
+    const dateText = dateElement
-        if (job) {
+      ? cleanText(await dateElement.textContent())
-          jobs.push(job);
+      : "";
-        }
+
-      } catch (error) {
+    // Extract description
-        logger.warning(`Failed to extract job data: ${error.message}`);
+    const descElement = await jobElement.$(".job_listing-description");
-      }
+    const description = descElement
-    }
+      ? cleanText(await descElement.textContent())
-  } catch (error) {
+      : "";
-    logger.error(`Failed to extract jobs from page: ${error.message}`);
+
-  }
+    // Check if featured
-
+    const featuredElement = await jobElement.$(".featured");
-  return jobs;
+    const isFeatured = featuredElement !== null;
-}
+
-
+    // Parse date
-/**
+    let datePosted = null;
- * Parse job description to separate role duties from job requirements
+    let daysAgo = null;
- */
+
-function parseDutiesAndRequirements(description) {
+    if (dateText) {
-  if (!description || description.trim().length === 0) {
+      const match = dateText.match(/(\d+)\s+days?\s+ago/);
-    return { duties: "", requirements: "" };
+      if (match) {
-  }
+        daysAgo = parseInt(match[1]);
-
+        const date = new Date();
-  // Common section headers that indicate duties/responsibilities
+        date.setDate(date.getDate() - daysAgo);
-  const dutiesKeywords = [
+        datePosted = date.toISOString().split("T")[0];
-    /responsibilities?:/i,
+      }
-    /duties?:/i,
+    }
-    /what you['\u2019]ll do/i,
+
-    /key responsibilities/i,
+    return {
-    /your role/i,
+      jobId,
-    /position overview/i,
+      title,
-    /about the role/i,
+      company,
-    /role overview/i,
+      location,
-    /what we need/i,
+      jobUrl,
-    /you will:/i,
+      datePosted,
-    /you['\u2019]ll be responsible/i,
+      dateText,
-  ];
+      daysAgo,
-
+      description,
-  // Common section headers that indicate requirements/qualifications
+      isFeatured,
-  const requirementsKeywords = [
+      keyword,
-    /requirements?:/i,
+      extractedAt: new Date().toISOString(),
-    /qualifications?:/i,
+      source: "skipthedrive",
-    /must have/i,
+    };
-    /required:/i,
+  } catch (error) {
-    /what you['\u2019]ll bring/i,
+    logger.warning(`Error extracting job data: ${error.message}`);
-    /you have:/i,
+    return null;
-    /skills required/i,
+  }
-    /minimum requirements/i,
+}
-    /preferred qualifications/i,
+
-    /education:/i,
+/**
-    /experience:/i,
+ * Check if next page is available
-    /you must have/i,
+ */
-    /we['\u2019]re looking for/i,
+async function hasNextPageAvailable(page) {
-  ];
+  try {
-
+    const nextButton = await page.$(".next-page");
-  // Split description into sections (by common delimiters)
+    return nextButton !== null;
-  const sections = description.split(/\n\s*\n|\r\n\s*\r\n/).filter(s => s.trim().length > 0);
+  } catch {
-
+    return false;
-  let currentSection = "duties"; // Default to duties
+  }
-  let dutiesText = "";
+}
-  let requirementsText = "";
+
-
+/**
-  for (const section of sections) {
+ * Navigate to next page
-    const sectionLower = section.toLowerCase();
+ */
-    
+async function navigateToNextPage(page, pageNumber) {
-    // Check if this section is about requirements
+  try {
-    let isRequirementsSection = false;
+    const nextButton = await page.$(".next-page");
-    for (const keyword of requirementsKeywords) {
+    if (nextButton) {
-      if (keyword.test(section)) {
+      await nextButton.click();
-        isRequirementsSection = true;
+    }
-        currentSection = "requirements";
+  } catch (error) {
-        break;
+    logger.warning(
-      }
+      `Failed to navigate to page ${pageNumber}: ${error.message}`
-    }
+    );
-
+  }
-    // Check if this section is about duties/responsibilities
+}
-    if (!isRequirementsSection) {
+
-      for (const keyword of dutiesKeywords) {
+module.exports = {
-        if (keyword.test(section)) {
+  skipthedriveStrategy,
-          currentSection = "duties";
+  buildSearchUrl,
-          break;
+  extractJobsFromPage,
-        }
+  extractJobData,
-      }
+};
    }
    // Add to appropriate section
    if (currentSection === "requirements") {
      requirementsText += (requirementsText ? "\n\n" : "") + section.trim();
    } else {
      dutiesText += (dutiesText ? "\n\n" : "") + section.trim();
    }
  }
  // If we couldn't split by sections, try to find bullet points or numbered lists
  if (!dutiesText && !requirementsText) {
    const lines = description.split(/\n/);
    let foundRequirementsHeader = false;
    for (let i = 0; i < lines.length; i++) {
      const line = lines[i].trim();
      if (line.length === 0) continue;
      // Check if this line is a requirements header
      for (const keyword of requirementsKeywords) {
        if (keyword.test(line)) {
          foundRequirementsHeader = true;
          break;
        }
      }
      if (foundRequirementsHeader) {
        requirementsText += (requirementsText ? "\n" : "") + line;
      } else {
        // Check if it's a duties header
        let isDutiesHeader = false;
        for (const keyword of dutiesKeywords) {
          if (keyword.test(line)) {
            isDutiesHeader = true;
            break;
          }
        }
        if (!isDutiesHeader) {
          // Add to duties if we haven't found requirements header yet
          if (!foundRequirementsHeader) {
            dutiesText += (dutiesText ? "\n" : "") + line;
          } else {
            requirementsText += (requirementsText ? "\n" : "") + line;
          }
        } else {
          dutiesText += (dutiesText ? "\n" : "") + line;
        }
      }
    }
  }
  // Fallback: if we still have nothing separated, put first 60% in duties, rest in requirements
  if (!dutiesText && !requirementsText && description) {
    const midPoint = Math.floor(description.length * 0.6);
    const lastRequirementsKeyword = description.toLowerCase().lastIndexOf("requirement");
    const lastQualificationsKeyword = description.toLowerCase().lastIndexOf("qualification");
    const splitPoint = Math.max(
      lastRequirementsKeyword > 0 ? lastRequirementsKeyword : midPoint,
      lastQualificationsKeyword > 0 ? lastQualificationsKeyword : midPoint
    );
    dutiesText = description.substring(0, splitPoint).trim();
    requirementsText = description.substring(splitPoint).trim();
  }
  return {
    duties: dutiesText.trim(),
    requirements: requirementsText.trim(),
  };
 }
 /**
 * Extract data from individual job element
 */
 async function extractJobData(jobElement, keyword) {
  try {
    // Extract job ID
    const articleId = (await jobElement.getAttribute("id")) || "";
    const jobId = articleId ? articleId.replace("post-", "") : "";
    // Extract title
    const titleElement = await jobElement.$(".job_listing-title a");
    const title = titleElement
      ? cleanText(await titleElement.textContent())
      : "";
    const jobUrl = titleElement ? await titleElement.getAttribute("href") : "";
    // Extract company
    const companyElement = await jobElement.$(".company");
    const company = companyElement
      ? cleanText(await companyElement.textContent())
      : "";
    // Extract location
    const locationElement = await jobElement.$(".location");
    const location = locationElement
      ? cleanText(await locationElement.textContent())
      : "";
    // Extract date posted
    const dateElement = await jobElement.$(".job-date");
    const dateText = dateElement
      ? cleanText(await dateElement.textContent())
      : "";
    // Extract description
    const descElement = await jobElement.$(".job_listing-description");
    const description = descElement
      ? cleanText(await descElement.textContent())
      : "";
    // Check if featured
    const featuredElement = await jobElement.$(".featured");
    const isFeatured = featuredElement !== null;
    // Parse date
    let datePosted = null;
    let daysAgo = null;
    if (dateText) {
      const match = dateText.match(/(\d+)\s+days?\s+ago/);
      if (match) {
        daysAgo = parseInt(match[1]);
        const date = new Date();
        date.setDate(date.getDate() - daysAgo);
        datePosted = date.toISOString().split("T")[0];
      }
    }
    // Parse duties and requirements from description if available
    const parsed = parseDutiesAndRequirements(description);
    return {
      jobId,
      title,
      company,
      location,
      jobUrl,
      datePosted,
      dateText,
      daysAgo,
      description,
      roleDuties: parsed.duties,
      jobRequirements: parsed.requirements,
      isFeatured,
      keyword,
      extractedAt: new Date().toISOString(),
      source: "skipthedrive",
    };
  } catch (error) {
    logger.warning(`Error extracting job data: ${error.message}`);
    return null;
  }
 }
 /**
 * Check if next page is available
 */
 async function hasNextPageAvailable(page) {
  try {
    const nextButton = await page.$(".next-page");
    return nextButton !== null;
  } catch {
    return false;
  }
 }
 /**
 * Navigate to next page
 */
 async function navigateToNextPage(page, pageNumber) {
  try {
    const nextButton = await page.$(".next-page");
    if (nextButton) {
      await nextButton.click();
    }
  } catch (error) {
    logger.warning(
      `Failed to navigate to page ${pageNumber}: ${error.message}`
    );
  }
 }
 module.exports = {
  skipthedriveStrategy,
  buildSearchUrl,
  extractJobsFromPage,
  extractJobData,
 };
--- a/linkedin-parser/index.js
+++ b/linkedin-parser/index.js
@ -10,34 +10,20 @@ const path = require("path");
 const fs = require("fs");
 const CoreParser = require("../core-parser");
 const { linkedinStrategy } = require("./strategies/linkedin-strategy");
-const { logger, analyzeBatch, checkOllamaStatus, DEFAULT_MODEL } = require("ai-analyzer");
+const { logger, analyzeBatch, checkOllamaStatus } = require("ai-analyzer");
-// Load environment variables - check both linkedin-parser/.env and root .env
+// Load environment variables
-const localEnvPath = path.join(__dirname, ".env");
+require("dotenv").config({ path: path.join(__dirname, ".env") });
 const rootEnvPath = path.join(__dirname, "..", ".env");
 // Try local .env first, then root .env
 if (fs.existsSync(localEnvPath)) {
  require("dotenv").config({ path: localEnvPath });
 } else if (fs.existsSync(rootEnvPath)) {
  require("dotenv").config({ path: rootEnvPath });
 } else {
  // Try default dotenv behavior (looks in current directory and parent directories)
  require("dotenv").config();
 }
 // Configuration from environment
 const LINKEDIN_USERNAME = process.env.LINKEDIN_USERNAME;
 const LINKEDIN_PASSWORD = process.env.LINKEDIN_PASSWORD;
 const HEADLESS = process.env.HEADLESS !== "false";
 const SEARCH_KEYWORDS =
-  process.env.SEARCH_KEYWORDS || "layoff";//,downsizing";//,job cuts";
+  process.env.SEARCH_KEYWORDS || "layoff,downsizing,job cuts";
 const LOCATION_FILTER = process.env.LOCATION_FILTER;
-const ENABLE_AI_ANALYSIS = process.env.ENABLE_AI_ANALYSIS !== "false";
+const ENABLE_AI_ANALYSIS = process.env.ENABLE_AI_ANALYSIS === "true";
 const AI_CONTEXT = process.env.AI_CONTEXT || "job market analysis and trends";
 const OLLAMA_MODEL = process.env.OLLAMA_MODEL || DEFAULT_MODEL;
 const MAX_RESULTS = parseInt(process.env.MAX_RESULTS) || 50;
 const EXTRACT_LOCATION_FROM_PROFILE = process.env.EXTRACT_LOCATION_FROM_PROFILE === "true";
 /**
 * Main LinkedIn parser function
@ -72,7 +58,6 @@ async function startLinkedInParser(options = {}) {
      keywords,
      locationFilter: LOCATION_FILTER,
      maxResults: MAX_RESULTS,
      extractLocationFromProfile: EXTRACT_LOCATION_FROM_PROFILE,
      credentials: {
        username: LINKEDIN_USERNAME,
        password: LINKEDIN_PASSWORD,
@ -81,109 +66,52 @@ async function startLinkedInParser(options = {}) {
    const { results, rejectedResults, summary } = parseResult;
-    // AI Analysis if enabled - embed results into each post
+    // AI Analysis if enabled
-    let resultsWithAI = results;
+    let analysisResults = null;
    let aiAnalysisCompleted = false;
    if (ENABLE_AI_ANALYSIS && results.length > 0) {
      logger.step("🧠 Running AI Analysis...");
-      const ollamaAvailable = await checkOllamaStatus(OLLAMA_MODEL);
+      const ollamaStatus = await checkOllamaStatus();
-      if (ollamaAvailable) {
+      if (ollamaStatus.available) {
-        // Prepare data for analysis (analyzeBatch expects posts with 'text' field)
+        analysisResults = await analyzeBatch(results, {
-        const analysisData = results.map((post) => ({
+          context:
-          text: post.text || post.content || "",
+            "LinkedIn posts analysis focusing on job market trends and layoffs",
          location: post.location || "",
          keyword: post.keyword || "",
          timestamp: post.timestamp || post.extractedAt || "",
        }));
        const analysisResults = await analyzeBatch(
          analysisData,
          AI_CONTEXT,
          OLLAMA_MODEL
        );
        // Embed AI analysis into each result
        resultsWithAI = results.map((post, index) => {
          const aiResult = analysisResults[index];
          return {
            ...post,
            aiAnalysis: {
              isRelevant: aiResult.isRelevant,
              confidence: aiResult.confidence,
              reasoning: aiResult.reasoning,
              context: AI_CONTEXT,
              model: OLLAMA_MODEL,
              analyzedAt: new Date().toISOString(),
            },
          };
        });
        aiAnalysisCompleted = true;
        logger.success(`✅ AI Analysis completed for ${results.length} posts`);
      } else {
        logger.warning("⚠️  Ollama not available, skipping AI analysis");
      }
    }
-    // Prepare results with embedded AI analysis
+    // Save results
    const outputData = {
      metadata: {
-        timestamp: new Date().toISOString(),
+        extractedAt: new Date().toISOString(),
        totalPosts: resultsWithAI.length,
        rejectedPosts: rejectedResults.length,
        aiAnalysisEnabled: ENABLE_AI_ANALYSIS,
        aiAnalysisCompleted: aiAnalysisCompleted,
        aiContext: aiAnalysisCompleted ? AI_CONTEXT : undefined,
        aiModel: aiAnalysisCompleted ? OLLAMA_MODEL : undefined,
        locationFilter: LOCATION_FILTER || undefined,
        parser: "linkedin-parser",
        version: "2.0.0",
        summary,
        analysisResults,
      },
-      results: resultsWithAI,
+      results,
      rejectedResults,
    };
    // Prepare rejected posts file
    const rejectedData = rejectedResults.map((post) => ({
      rejected: true,
      reason: post.rejectionReason || "Location filter failed: Location not in filter",
      keyword: post.keyword,
      text: post.text || post.content,
      profileLink: post.profileLink || post.authorUrl,
      location: post.location || post.profileLocation,
      timestamp: post.timestamp || post.extractedAt,
    }));
    const resultsDir = path.join(__dirname, "results");
    if (!fs.existsSync(resultsDir)) {
      fs.mkdirSync(resultsDir, { recursive: true });
    }
    const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
-    const resultsFilename = `linkedin-results-${timestamp}.json`;
+    const filename = `linkedin-results-${timestamp}.json`;
-    const rejectedFilename = `linkedin-rejected-${timestamp}.json`;
+    const filepath = path.join(resultsDir, filename);
    const resultsFilepath = path.join(resultsDir, resultsFilename);
    const rejectedFilepath = path.join(resultsDir, rejectedFilename);
-    // Save results with AI analysis
+    fs.writeFileSync(filepath, JSON.stringify(outputData, null, 2));
    fs.writeFileSync(resultsFilepath, JSON.stringify(outputData, null, 2));
    // Save rejected posts separately
    if (rejectedData.length > 0) {
      fs.writeFileSync(
        rejectedFilepath,
        JSON.stringify(rejectedData, null, 2)
      );
    }
    // Final summary
    logger.success("✅ LinkedIn parsing completed successfully!");
-    logger.info(`📊 Total posts found: ${resultsWithAI.length}`);
+    logger.info(`📊 Total posts found: ${results.length}`);
    logger.info(`❌ Total rejected: ${rejectedResults.length}`);
-    logger.info(`📁 Results saved to: ${resultsFilepath}`);
+    logger.info(`📁 Results saved to: ${filepath}`);
    if (rejectedData.length > 0) {
      logger.info(`📁 Rejected posts saved to: ${rejectedFilepath}`);
    }
    return outputData;
  } catch (error) {
--- a/linkedin-parser/package-lock.json
+++ b/linkedin-parser/package-lock.json
--- a/linkedin-parser/strategies/linkedin-strategy.js
+++ b/linkedin-parser/strategies/linkedin-strategy.js
--- a/package-lock.json
+++ b/package-lock.json
--- a/test/ai-analyzer.test.js
+++ b/test/ai-analyzer.test.js
@ -1,80 +1,80 @@
-const fs = require("fs");
+const fs = require("fs");
-const assert = require("assert");
+const assert = require("assert");
-const { analyzeSinglePost, checkOllamaStatus } = require("../ai-analyzer");
+const { analyzeSinglePost, checkOllamaStatus } = require("../ai-analyzer");
-
+
-console.log("AI Analyzer logic tests");
+console.log("AI Analyzer logic tests");
-
+
-const testData = JSON.parse(
+const testData = JSON.parse(
-  fs.readFileSync(__dirname + "/test-data.json", "utf-8")
+  fs.readFileSync(__dirname + "/test-data.json", "utf-8")
-);
+);
-const aiResults = testData.positive;
+const aiResults = testData.positive;
-const context = "job layoffs and workforce reduction";
+const context = "job layoffs and workforce reduction";
-const model = process.env.OLLAMA_MODEL || "mistral"; // Use OLLAMA_MODEL from env or default to mistral
+const model = "mistral"; // or your default model
-
+
-(async () => {
+(async () => {
-  // Check if Ollama is available
+  // Check if Ollama is available
-  const ollamaAvailable = await checkOllamaStatus(model);
+  const ollamaAvailable = await checkOllamaStatus(model);
-  if (!ollamaAvailable) {
+  if (!ollamaAvailable) {
-    console.log("SKIP: Ollama not available - skipping AI analyzer tests");
+    console.log("SKIP: Ollama not available - skipping AI analyzer tests");
-    console.log("PASS: AI analyzer tests skipped (Ollama not running)");
+    console.log("PASS: AI analyzer tests skipped (Ollama not running)");
-    return;
+    return;
-  }
+  }
-
+
-  console.log(`Testing AI analyzer with ${aiResults.length} posts...`);
+  console.log(`Testing AI analyzer with ${aiResults.length} posts...`);
-
+
-  for (let i = 0; i < aiResults.length; i++) {
+  for (let i = 0; i < aiResults.length; i++) {
-    const post = aiResults[i];
+    const post = aiResults[i];
-    console.log(`Testing post ${i + 1}: "${post.text.substring(0, 50)}..."`);
+    console.log(`Testing post ${i + 1}: "${post.text.substring(0, 50)}..."`);
-
+
-    const aiOutput = await analyzeSinglePost(post.text, context, model);
+    const aiOutput = await analyzeSinglePost(post.text, context, model);
-
+
-    // Test that the function returns the expected structure
+    // Test that the function returns the expected structure
-    assert(
+    assert(
-      typeof aiOutput === "object" && aiOutput !== null,
+      typeof aiOutput === "object" && aiOutput !== null,
-      `Post ${i} output is not an object`
+      `Post ${i} output is not an object`
-    );
+    );
-
+
-    assert(
+    assert(
-      typeof aiOutput.isRelevant === "boolean",
+      typeof aiOutput.isRelevant === "boolean",
-      `Post ${i} isRelevant is not a boolean: ${typeof aiOutput.isRelevant}`
+      `Post ${i} isRelevant is not a boolean: ${typeof aiOutput.isRelevant}`
-    );
+    );
-
+
-    assert(
+    assert(
-      typeof aiOutput.confidence === "number",
+      typeof aiOutput.confidence === "number",
-      `Post ${i} confidence is not a number: ${typeof aiOutput.confidence}`
+      `Post ${i} confidence is not a number: ${typeof aiOutput.confidence}`
-    );
+    );
-
+
-    assert(
+    assert(
-      typeof aiOutput.reasoning === "string",
+      typeof aiOutput.reasoning === "string",
-      `Post ${i} reasoning is not a string: ${typeof aiOutput.reasoning}`
+      `Post ${i} reasoning is not a string: ${typeof aiOutput.reasoning}`
-    );
+    );
-
+
-    // Test that confidence is within valid range
+    // Test that confidence is within valid range
-    assert(
+    assert(
-      aiOutput.confidence >= 0 && aiOutput.confidence <= 1,
+      aiOutput.confidence >= 0 && aiOutput.confidence <= 1,
-      `Post ${i} confidence out of range: ${aiOutput.confidence} (should be 0-1)`
+      `Post ${i} confidence out of range: ${aiOutput.confidence} (should be 0-1)`
-    );
+    );
-
+
-    // Test that reasoning exists and is not empty
+    // Test that reasoning exists and is not empty
-    assert(
+    assert(
-      aiOutput.reasoning && aiOutput.reasoning.length > 0,
+      aiOutput.reasoning && aiOutput.reasoning.length > 0,
-      `Post ${i} missing or empty reasoning`
+      `Post ${i} missing or empty reasoning`
-    );
+    );
-
+
-    // Test that relevance is a boolean value
+    // Test that relevance is a boolean value
-    assert(
+    assert(
-      aiOutput.isRelevant === true || aiOutput.isRelevant === false,
+      aiOutput.isRelevant === true || aiOutput.isRelevant === false,
-      `Post ${i} isRelevant is not a valid boolean: ${aiOutput.isRelevant}`
+      `Post ${i} isRelevant is not a valid boolean: ${aiOutput.isRelevant}`
-    );
+    );
-
+
-    console.log(
+    console.log(
-      `  ✓ Post ${i + 1}: relevant=${aiOutput.isRelevant}, confidence=${
+      `  ✓ Post ${i + 1}: relevant=${aiOutput.isRelevant}, confidence=${
-        aiOutput.confidence
+        aiOutput.confidence
-      }`
+      }`
-    );
+    );
-  }
+  }
-
+
-  console.log(
+  console.log(
-    "PASS: AI analyzer returns valid structure and values for all test posts."
+    "PASS: AI analyzer returns valid structure and values for all test posts."
-  );
+  );
-})();
+})();