Enhance job search parser with advanced keyword filtering and job detail extraction

- Implemented grouped AND/OR logic for keyword searches, allowing for more flexible job matching criteria. - Added a minimum date filter to restrict job results to postings after a specified date. - Enhanced job detail extraction to include role duties and job requirements from job descriptions. - Updated README with new command line options and examples for using date filters and keyword logic. - Improved logging to provide clearer insights into keyword matching logic and job search parameters.
2025-12-18 13:33:19 -05:00 · 2025-12-18 13:33:19 -05:00 · 47cdc03fb8
commit 47cdc03fb8
parent 00c4cf1b6f
8 changed files with 900 additions and 88 deletions
--- a/ai-analyzer/src/ai-utils.js
+++ b/ai-analyzer/src/ai-utils.js
@ -69,34 +69,83 @@ async function analyzeBatch(
  logger.ai(`Analyzing batch of ${posts.length} posts with ${model}...`);
  try {
-    const prompt = `Analyze ${posts.length} LinkedIn posts for relevance to: "${context}"
+    // Detect if context is about a student profile
    const isStudentContext = /student|undergraduate|first year|second year|third year|fourth year|freshman|sophomore|junior|senior|co-op|internship/i.test(context);
    // Build enhanced prompt based on context type
    let analysisInstructions = "";
    if (isStudentContext) {
      analysisInstructions = `
 ANALYSIS FOCUS (Student Context Detected):
 - Pay special attention to the "Requirements" section
 - Evaluate if the job requirements match the student's level (${context})
 - Consider: Are requirements too advanced? Are they appropriate for entry-level/co-op/internship?
 - Check if the role duties are suitable for a student's skill level
 - Look for keywords like "co-op", "internship", "entry-level", "student", "junior"
 - If requirements mention "years of experience", "senior", "expert", "PhD", etc., this may not be suitable
 - If requirements are reasonable for a student (basic skills, willingness to learn), mark as relevant`;
    } else {
      analysisInstructions = `
 ANALYSIS FOCUS:
 - Evaluate overall relevance to: "${context}"
 - Consider job title, description, duties, and requirements
 - Assess if the job matches the specified criteria`;
    }
-POSTS:
+    const prompt = `Analyze ${posts.length} job postings for relevance to: "${context}"
 ${analysisInstructions}
 JOB POSTINGS:
 ${posts
  .map(
-    (post, i) => `
+    (post, i) => {
-POST ${i + 1}:
+      // For student contexts, prioritize Requirements section if text is too long
-"${post.text.substring(0, 400)}${post.text.length > 400 ? "..." : ""}"
+      let jobText = post.text;
-`
+      if (isStudentContext && jobText.length > 1200) {
        // Try to extract Requirements section if present
        const requirementsMatch = jobText.match(/Requirements?:[\s\S]{0,600}/i);
        const dutiesMatch = jobText.match(/Role Duties?:[\s\S]{0,300}/i);
        const titleMatch = jobText.match(/Title:[\s\S]{0,100}/i);
        if (requirementsMatch) {
          // Prioritize: Title + Requirements (most important for students)
          jobText = (titleMatch ? titleMatch[0] + "\n\n" : "") + 
                    (requirementsMatch ? requirementsMatch[0] : "") +
                    (dutiesMatch ? "\n\n" + dutiesMatch[0] : "");
        } else {
          // Fallback to truncation
          jobText = jobText.substring(0, 1200) + "...";
        }
      } else if (jobText.length > 1200) {
        jobText = jobText.substring(0, 1200) + "...";
      }
      return `
 JOB ${i + 1}:
 ${jobText}
 `;
    }
  )
  .join("")}
 REQUIRED FORMAT - Respond with EXACTLY ${posts.length} lines, one per post:
-POST 1: YES | 0.8 | reason here
+JOB 1: YES | 0.8 | reason here
-POST 2: NO | 0.2 | reason here
+JOB 2: NO | 0.2 | reason here
-POST 3: YES | 0.9 | reason here
+JOB 3: YES | 0.9 | reason here
 RULES:
 - Use YES or NO (uppercase)
 - Use pipe character | as separator
 - Confidence must be 0.0 to 1.0 (decimal number)
 - Keep reasoning brief (one sentence)
- MUST include all ${posts.length} posts in order
+- MUST include all ${posts.length} jobs in order
 ${isStudentContext ? "- When analyzing requirements, explicitly mention if requirements are too advanced or appropriate for the student level" : ""}
 Examples:
-POST 1: YES | 0.9 | mentions layoffs and job cuts
+JOB 1: YES | 0.9 | co-op position suitable for first year students
-POST 2: NO | 0.1 | unrelated topic about vacation
+JOB 2: NO | 0.2 | requires 5+ years experience, too advanced
-POST 3: YES | 0.7 | discusses workforce reduction`;
+JOB 3: YES | 0.7 | entry-level role with basic requirements appropriate for students`;
    // Add timeout to prevent hanging (5 minutes max)
    const controller = new AbortController();
--- a/ai-analyzer/src/text-utils.js
+++ b/ai-analyzer/src/text-utils.js
@ -45,6 +45,43 @@ function containsAnyKeyword(text, keywords) {
  return keywords.some((keyword) => lowerText.includes(keyword.toLowerCase()));
 }
 /**
 * Check if text contains all of the specified keywords (case insensitive)
 */
 function containsAllKeywords(text, keywords) {
  if (!text || !Array.isArray(keywords)) {
    return false;
  }
  const lowerText = text.toLowerCase();
  return keywords.every((keyword) => lowerText.includes(keyword.toLowerCase()));
 }
 /**
 * Check if text matches keyword groups with AND logic between groups and OR logic within groups
 * @param {string} text - Text to search in
 * @param {Array<Array<string>>} keywordGroups - Array of keyword groups, each group is an array of OR keywords
 * @returns {boolean} - True if text matches all groups (AND logic) and at least one keyword in each group (OR logic)
 */
 function matchesKeywordGroups(text, keywordGroups) {
  if (!text || !Array.isArray(keywordGroups) || keywordGroups.length === 0) {
    return false;
  }
  const lowerText = text.toLowerCase();
  // All groups must match (AND logic)
  return keywordGroups.every((group) => {
    if (!Array.isArray(group) || group.length === 0) {
      return false;
    }
    // At least one keyword in the group must match (OR logic)
    return group.some((keyword) => 
      lowerText.includes(keyword.toLowerCase().trim())
    );
  });
 }
 /**
 * Validate if text meets basic quality criteria
 */
@ -101,6 +138,8 @@ function normalizeUrl(url) {
 module.exports = {
  cleanText,
  containsAnyKeyword,
  containsAllKeywords,
  matchesKeywordGroups,
  isValidText,
  extractDomain,
  normalizeUrl,
--- a/job-search-parser/README.md
+++ b/job-search-parser/README.md
@ -92,10 +92,32 @@ node index.js --sites=linkedin --keywords="software engineer,developer"
 # Search with location filter
 node index.js --sites=linkedin --keywords="co-op" --location="Ontario"
 # Search with date filter (jobs posted after specific date)
 node index.js --sites=linkedin --keywords="co-op" --min-date="2025-12-01"
 # Combine filters
 node index.js --sites=linkedin --keywords="co-op" --location="Ontario" --min-date="2025-12-01"
 # Combine multiple sites
 node index.js --sites=linkedin,skipthedrive --keywords="intern,co-op"
 # Use AND logic - jobs must match ALL keywords (e.g., "co-op" AND "summer 2026")
 node index.js --sites=linkedin --keywords="co-op,summer 2026" --and
 # Use grouped AND/OR logic - (co-op OR intern) AND (summer 2026)
 # Use | (pipe) for OR within groups, , (comma) to separate AND groups
 node index.js --sites=linkedin --keywords="co-op|intern,summer 2026" --and
 # Multiple AND groups - (co-op OR intern) AND (summer 2026) AND (remote)
 node index.js --sites=linkedin --keywords="co-op|intern,summer 2026,remote" --and
 ```
 **Date Filter Notes:**
 - The date filter uses LinkedIn's `f_TPR` parameter to filter at the LinkedIn level before parsing
 - Format: `YYYY-MM-DD` (e.g., `2025-12-01`)
 - LinkedIn supports relative timeframes up to ~30 days
 - For dates older than 30 days, LinkedIn may limit results to the maximum supported timeframe
 ### 🚧 Planned Parsers
 - **Indeed**: Comprehensive job aggregator
@ -128,6 +150,9 @@ Create a `.env` file in the parser directory:
 ```env
 # Job Search Configuration
 SEARCH_KEYWORDS=software engineer,developer,programmer
 # For grouped AND/OR logic, use pipe (|) for OR within groups and comma (,) for AND groups:
 # SEARCH_KEYWORDS=co-op|intern,summer 2026,remote  # (co-op OR intern) AND (summer 2026) AND (remote)
 USE_AND_LOGIC=false  # Set to "true" to enable AND logic (required for grouped keywords)
 LOCATION_FILTER=Ontario,Canada
 MAX_PAGES=5
@ -136,6 +161,9 @@ LINKEDIN_USERNAME=your_email@example.com
 LINKEDIN_PASSWORD=your_password
 LINKEDIN_JOB_LOCATION=Canada  # Optional: LinkedIn location search
 # Date Filter (LinkedIn only - filters at LinkedIn level before parsing)
 MIN_DATE=2025-12-01  # Format: YYYY-MM-DD (jobs posted after this date)
 # Analysis Configuration
 ENABLE_AI_ANALYSIS=false
 HEADLESS=true
@ -144,6 +172,22 @@ HEADLESS=true
 OUTPUT_FORMAT=json  # Options: "json", "csv", or "both"
 ```
 **Keyword Examples in .env:**
 ```env
 # Simple OR logic (default) - matches ANY keyword
 SEARCH_KEYWORDS=co-op,intern
 USE_AND_LOGIC=false
 # Simple AND logic - matches ALL keywords
 SEARCH_KEYWORDS=co-op,summer 2026
 USE_AND_LOGIC=true
 # Grouped AND/OR logic - (co-op OR intern) AND (summer 2026) AND (remote)
 SEARCH_KEYWORDS=co-op|intern,summer 2026,remote
 USE_AND_LOGIC=true
 ```
 ### Command Line Options
 ```bash
@ -168,16 +212,34 @@ node index.js --no-rejected
 # Output format (json, csv, or both)
 node index.js --output=csv
 node index.js --output=both
 # Date filter (LinkedIn only - filters at LinkedIn level)
 node index.js --sites=linkedin --min-date="2025-12-01"
 # Use AND logic for keywords (all keywords must match)
 node index.js --sites=linkedin --keywords="co-op,summer 2026" --and
 # Use grouped AND/OR logic: (co-op OR intern) AND (summer 2026)
 # Use | (pipe) for OR within groups, , (comma) to separate AND groups
 node index.js --sites=linkedin --keywords="co-op|intern,summer 2026" --and
 # Multiple AND groups: (co-op OR intern) AND (summer 2026) AND (remote)
 node index.js --sites=linkedin --keywords="co-op|intern,summer 2026,remote" --and
 ```
 **Available Options:**
 - `--sites="site1,site2"`: Job sites to parse (linkedin, skipthedrive)
 - `--keywords="keyword1,keyword2"`: Search keywords
  - Use `|` (pipe) to separate OR keywords within a group: `"co-op|intern"` means "co-op" OR "intern"
  - Use `,` (comma) to separate AND groups when using `--and`: `"co-op|intern,summer 2026"` means (co-op OR intern) AND (summer 2026)
 - `--location="LOCATION"`: Location filter
 - `--max-pages=NUMBER`: Maximum pages to parse (0 or "all" for unlimited)
 - `--min-date="YYYY-MM-DD"`: Minimum posted date filter (LinkedIn only - filters at LinkedIn level before parsing)
 - `--no-rejected` or `--exclude-rejected`: Exclude rejected results from output
 - `--output=FORMAT` or `--format=FORMAT`: Output format - "json", "csv", or "both" (default: "json")
 - `--and` or `--all-keywords`: Use AND logic for keywords (all keywords must match). Default is OR logic (any keyword matches)
  - When combined with `|` (pipe) in keywords, enables grouped AND/OR logic
 ## 📊 Keywords
--- a/job-search-parser/index.js
+++ b/job-search-parser/index.js
@ -28,6 +28,8 @@ const OLLAMA_MODEL = process.env.OLLAMA_MODEL || DEFAULT_MODEL;
 const MAX_PAGES = parseInt(process.env.MAX_PAGES) || 5;
 const EXCLUDE_REJECTED = process.env.EXCLUDE_REJECTED === "true";
 const OUTPUT_FORMAT = process.env.OUTPUT_FORMAT || "json"; // "json", "csv", or "both"
 const MIN_DATE = process.env.MIN_DATE; // Minimum posted date (format: YYYY-MM-DD)
 const USE_AND_LOGIC = process.env.USE_AND_LOGIC === "true"; // Use AND logic for keywords
 // Available site strategies
 const SITE_STRATEGIES = {
@ -50,6 +52,8 @@ function parseArguments() {
    maxPages: MAX_PAGES,
    excludeRejected: EXCLUDE_REJECTED,
    outputFormat: OUTPUT_FORMAT,
    minDate: MIN_DATE,
    useAndLogic: USE_AND_LOGIC, // Use AND logic instead of OR logic for keywords (from env or CLI)
  };
  args.forEach((arg) => {
@ -82,6 +86,10 @@ function parseArguments() {
      } else {
        logger.warning(`⚠️  Unknown output format: ${format}. Using default: json`);
      }
    } else if (arg.startsWith("--min-date=")) {
      options.minDate = arg.split("=")[1];
    } else if (arg === "--and" || arg === "--all-keywords") {
      options.useAndLogic = true; // CLI flag overrides env variable
    }
  });
@ -104,15 +112,35 @@ async function startJobSearchParser(options = {}) {
    logger.step("🚀 Job Search Parser Starting...");
    // Parse keywords
-    const keywords =
+    let keywords =
      finalOptions.keywords || SEARCH_KEYWORDS.split(",").map((k) => k.trim());
    // Parse keyword groups if AND logic is enabled and keywords contain pipe (|) separator
    // Format: "co-op|intern,summer 2026" means (co-op OR intern) AND (summer 2026)
    let keywordGroups = null;
    if (finalOptions.useAndLogic && keywords.some(k => k.includes('|'))) {
      keywordGroups = keywords.map(group => 
        group.split('|').map(k => k.trim()).filter(k => k.length > 0)
      );
      logger.info(`🔍 Keyword Groups: ${keywordGroups.map(g => `(${g.join(' OR ')})`).join(' AND ')}`);
    }
    const locationFilter = finalOptions.locationFilter || LOCATION_FILTER;
    const sites = finalOptions.sites;
    const excludeRejected = finalOptions.excludeRejected !== undefined ? finalOptions.excludeRejected : EXCLUDE_REJECTED;
    logger.info(`📦 Selected job sites: ${sites.join(", ")}`);
    logger.info(`🔍 Search Keywords: ${keywords.join(", ")}`);
    if (keywordGroups) {
      logger.info(`🔗 Keyword Logic: Grouped AND/OR - ${keywordGroups.map(g => `(${g.join(' OR ')})`).join(' AND ')}`);
    } else {
      logger.info(`🔗 Keyword Logic: ${finalOptions.useAndLogic ? "AND (all keywords must match)" : "OR (any keyword matches)"}`);
    }
    logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
    const minDate = finalOptions.minDate || MIN_DATE;
    if (minDate) {
      logger.info(`📅 Min Date Filter: ${minDate} (jobs posted after this date)`);
    }
    logger.info(
      `🧠 AI Analysis: ${ENABLE_AI_ANALYSIS ? "Enabled" : "Disabled"}`
    );
@ -124,6 +152,96 @@ async function startJobSearchParser(options = {}) {
    const allResults = [];
    const allRejectedResults = [];
    const siteResults = {};
    let analysisResults = null;
    // Initialize results directory and file for incremental saving
    const resultsDir = path.join(__dirname, "results");
    if (!fs.existsSync(resultsDir)) {
      fs.mkdirSync(resultsDir, { recursive: true });
    }
    const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
    const outputFormat = finalOptions.outputFormat || OUTPUT_FORMAT;
    let incrementalJsonFilepath = null;
    let incrementalCsvFilepath = null;
    // Initialize incremental save files
    if (outputFormat === "json" || outputFormat === "both") {
      const jsonFilename = `job-search-results-${timestamp}.json`;
      incrementalJsonFilepath = path.join(resultsDir, jsonFilename);
    }
    if (outputFormat === "csv" || outputFormat === "both") {
      const csvFilename = `job-search-results-${timestamp}.csv`;
      incrementalCsvFilepath = path.join(resultsDir, csvFilename);
    }
    /**
     * Save results incrementally as they're found
     */
    const saveIncrementalResults = (currentResults, currentRejectedResults, currentSiteResults, currentAnalysisResults = null, isComplete = false) => {
      try {
        const outputData = {
          metadata: {
            extractedAt: new Date().toISOString(),
            parser: "job-search-parser",
            version: "2.0.0",
            sites: sites,
            keywords: keywords.join(", "),
            locationFilter,
            aiAnalysisEnabled: ENABLE_AI_ANALYSIS,
            aiContext: ENABLE_AI_ANALYSIS ? AI_CONTEXT : undefined,
            aiModel: ENABLE_AI_ANALYSIS ? OLLAMA_MODEL : undefined,
            analysisResults: currentAnalysisResults,
            rejectedJobsExcluded: excludeRejected,
            isComplete: isComplete,
            lastUpdated: new Date().toISOString(),
          },
          results: currentResults,
          siteResults: currentSiteResults,
        };
        if (!excludeRejected) {
          outputData.rejectedResults = currentRejectedResults;
        }
        // Save JSON incrementally
        if (incrementalJsonFilepath) {
          fs.writeFileSync(incrementalJsonFilepath, JSON.stringify(outputData, null, 2));
        }
        // Save CSV incrementally (convert on each save)
        if (incrementalCsvFilepath) {
          const csvContent = convertResultsToCsv(outputData);
          fs.writeFileSync(incrementalCsvFilepath, csvContent);
        }
        if (!isComplete) {
          logger.info(`💾 Incremental save: ${currentResults.length} results saved to ${incrementalJsonFilepath || incrementalCsvFilepath}`);
        }
      } catch (error) {
        logger.warning(`⚠️  Failed to save incremental results: ${error.message}`);
      }
    };
    // Save initial empty state
    saveIncrementalResults([], [], {}, null, false);
    // Set up signal handlers for graceful shutdown
    let isShuttingDown = false;
    const gracefulShutdown = async (signal) => {
      if (isShuttingDown) return;
      isShuttingDown = true;
      logger.warning(`\n⚠️  Received ${signal}, saving current results before exit...`);
      saveIncrementalResults(allResults, allRejectedResults, siteResults, analysisResults, false);
      logger.info(`💾 Saved ${allResults.length} results before shutdown`);
      await coreParser.cleanup();
      process.exit(0);
    };
    process.on('SIGINT', () => gracefulShutdown('SIGINT'));
    process.on('SIGTERM', () => gracefulShutdown('SIGTERM'));
    // Process each selected site
    for (const site of sites) {
@ -140,8 +258,10 @@ async function startJobSearchParser(options = {}) {
        // Prepare strategy options
        const strategyOptions = {
          keywords,
          keywordGroups, // Pass grouped keywords if available
          locationFilter,
          maxPages: finalOptions.maxPages,
          useAndLogic: finalOptions.useAndLogic || false,
        };
        // Add credentials for LinkedIn
@ -165,6 +285,7 @@ async function startJobSearchParser(options = {}) {
            password: LINKEDIN_PASSWORD,
          };
          strategyOptions.location = process.env.LINKEDIN_JOB_LOCATION || "";
          strategyOptions.minDate = minDate; // Add date filter for LinkedIn
        }
        const parseResult = await strategy(coreParser, strategyOptions);
@ -188,6 +309,9 @@ async function startJobSearchParser(options = {}) {
        logger.success(
          `✅ ${site} completed in ${duration}s - Found ${results.length} jobs`
        );
        // Save results incrementally after each site
        saveIncrementalResults(allResults, allRejectedResults, siteResults, analysisResults, false);
      } catch (error) {
        logger.error(`❌ ${site} parsing failed: ${error.message}`);
        siteResults[site] = {
@ -196,23 +320,41 @@ async function startJobSearchParser(options = {}) {
          duration: "0s",
          error: error.message,
        };
        // Save even on error to preserve what we have
        saveIncrementalResults(allResults, allRejectedResults, siteResults, analysisResults, false);
      }
    }
    // AI Analysis if enabled
-    let analysisResults = null;
+    // Save results before AI analysis (in case AI analysis takes a long time)
    if (allResults.length > 0) {
      saveIncrementalResults(allResults, allRejectedResults, siteResults, null, false);
    }
    if (ENABLE_AI_ANALYSIS && allResults.length > 0) {
      logger.step("🧠 Running AI Analysis...");
      const ollamaAvailable = await checkOllamaStatus(OLLAMA_MODEL);
      if (ollamaAvailable) {
        // Prepare data for analysis (analyzeBatch expects objects with 'text' field)
-        const analysisData = allResults.map((job) => ({
+        const analysisData = allResults.map((job) => {
-          text: `${job.title || ""} at ${job.company || ""}. ${job.description || ""}`.trim(),
+          // Build comprehensive text including all available job information
-          location: job.location || "",
+          const parts = [];
-          keyword: job.keyword || "",
+          if (job.title) parts.push(`Title: ${job.title}`);
-          timestamp: job.extractedAt || job.postedDate || "",
+          if (job.company) parts.push(`Company: ${job.company}`);
-        }));
+          if (job.description) parts.push(`Description: ${job.description}`);
          if (job.roleDuties) parts.push(`Role Duties: ${job.roleDuties}`);
          if (job.jobRequirements) parts.push(`Requirements: ${job.jobRequirements}`);
          return {
            text: parts.join("\n\n"),
            location: job.location || "",
            keyword: job.keyword || "",
            timestamp: job.extractedAt || job.postedDate || "",
            roleDuties: job.roleDuties || "",
            jobRequirements: job.jobRequirements || "",
          };
        });
        // Process in smaller batches to avoid timeouts (5 jobs per batch)
        const BATCH_SIZE = parseInt(process.env.AI_BATCH_SIZE) || 5;
@ -263,68 +405,32 @@ async function startJobSearchParser(options = {}) {
        logger.success(
          `✅ AI Analysis completed for ${allResults.length} jobs`
        );
        // Save results after AI analysis completes
        saveIncrementalResults(allResults, allRejectedResults, siteResults, analysisResults, false);
      } else {
        logger.warning("⚠️  Ollama not available, skipping AI analysis");
      }
    }
-    // Save results
+    // Final save with complete flag
-    logger.info(`💾 Preparing to save: ${allResults.length} results, ${allRejectedResults.length} rejected`);
+    logger.info(`💾 Preparing final save: ${allResults.length} results, ${allRejectedResults.length} rejected`);
    logger.info(`💾 EXCLUDE_REJECTED env: ${process.env.EXCLUDE_REJECTED}, excludeRejected variable: ${excludeRejected}`);
    const outputData = {
      metadata: {
        extractedAt: new Date().toISOString(),
        parser: "job-search-parser",
        version: "2.0.0",
        sites: sites,
        keywords: keywords.join(", "),
        locationFilter,
        aiAnalysisEnabled: ENABLE_AI_ANALYSIS,
        aiContext: ENABLE_AI_ANALYSIS ? AI_CONTEXT : undefined,
        aiModel: ENABLE_AI_ANALYSIS ? OLLAMA_MODEL : undefined,
        analysisResults,
        rejectedJobsExcluded: excludeRejected,
      },
      results: allResults,
      siteResults,
    };
    // Always include rejectedResults if not excluded (make it explicit, not using spread)
    if (!excludeRejected) {
      outputData.rejectedResults = allRejectedResults;
      logger.info(`✅ Including ${allRejectedResults.length} rejected results in output`);
    } else {
      logger.info(`⏭️  Excluding rejected results (EXCLUDE_REJECTED=true)`);
    }
-    logger.info(`💾 Final output: ${outputData.results.length} results, ${outputData.rejectedResults?.length || 0} rejected`);
+    logger.info(`💾 Final output: ${allResults.length} results, ${allRejectedResults.length} rejected`);
-    const resultsDir = path.join(__dirname, "results");
+    // Final save with isComplete flag
-    if (!fs.existsSync(resultsDir)) {
+    saveIncrementalResults(allResults, allRejectedResults, siteResults, analysisResults, true);
-      fs.mkdirSync(resultsDir, { recursive: true });
+    
    }
    const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
    const outputFormat = finalOptions.outputFormat || OUTPUT_FORMAT;
    const savedFiles = [];
-
+    if (incrementalJsonFilepath) savedFiles.push(incrementalJsonFilepath);
-    // Save JSON if format is "json" or "both"
+    if (incrementalCsvFilepath) savedFiles.push(incrementalCsvFilepath);
    if (outputFormat === "json" || outputFormat === "both") {
      const jsonFilename = `job-search-results-${timestamp}.json`;
      const jsonFilepath = path.join(resultsDir, jsonFilename);
      fs.writeFileSync(jsonFilepath, JSON.stringify(outputData, null, 2));
      savedFiles.push(jsonFilepath);
    }
    // Save CSV if format is "csv" or "both"
    if (outputFormat === "csv" || outputFormat === "both") {
      const csvFilename = `job-search-results-${timestamp}.csv`;
      const csvFilepath = path.join(resultsDir, csvFilename);
      const csvContent = convertResultsToCsv(outputData);
      fs.writeFileSync(csvFilepath, csvContent);
      savedFiles.push(csvFilepath);
    }
    // Final summary
    logger.step("\n📊 Job Search Parser Summary");
@ -348,6 +454,31 @@ async function startJobSearchParser(options = {}) {
    logger.success("\n✅ Job Search Parser completed successfully!");
    // Construct output data for return
    const outputData = {
      metadata: {
        extractedAt: new Date().toISOString(),
        parser: "job-search-parser",
        version: "2.0.0",
        sites: sites,
        keywords: keywords.join(", "),
        locationFilter,
        aiAnalysisEnabled: ENABLE_AI_ANALYSIS,
        aiContext: ENABLE_AI_ANALYSIS ? AI_CONTEXT : undefined,
        aiModel: ENABLE_AI_ANALYSIS ? OLLAMA_MODEL : undefined,
        analysisResults: analysisResults,
        rejectedJobsExcluded: excludeRejected,
        isComplete: true,
        lastUpdated: new Date().toISOString(),
      },
      results: allResults,
      siteResults: siteResults,
    };
    if (!excludeRejected) {
      outputData.rejectedResults = allRejectedResults;
    }
    return outputData;
  } catch (error) {
    logger.error(`❌ Job Search Parser failed: ${error.message}`);
--- a/job-search-parser/parsers/skipthedrive.js
+++ b/job-search-parser/parsers/skipthedrive.js
@ -13,6 +13,7 @@ const {
  logger,
  cleanText,
  containsAnyKeyword,
  containsAllKeywords,
  parseLocationFilters,
  validateLocationAgainstFilters,
  extractLocationFromProfile,
@ -125,10 +126,12 @@ async function parseSkipTheDrive(options = {}) {
    headless = process.env.HEADLESS !== "false",
    enableAI = process.env.ENABLE_AI_ANALYSIS === "true",
    aiContext = process.env.AI_CONTEXT || "remote job opportunities analysis",
    useAndLogic = false, // Use AND logic instead of OR logic for keywords
  } = options;
  logger.step("Starting SkipTheDrive parser...");
  logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
  logger.info(`🔗 Keyword Logic: ${useAndLogic ? "AND (all keywords must match)" : "OR (any keyword matches)"}`);
  logger.info(
    `📋 Job Types: ${jobTypes.length > 0 ? jobTypes.join(", ") : "All"}`
  );
@ -154,8 +157,12 @@ async function parseSkipTheDrive(options = {}) {
  const seenJobs = new Set();
  try {
-    // Search for each keyword
+    // For AND logic, combine all keywords into a single search query
-    for (const keyword of keywords) {
+    // For OR logic, search each keyword separately
    const searchKeywords = useAndLogic ? [keywords.join(" ")] : keywords;
    // Search for each keyword (or combined keyword for AND logic)
    for (const keyword of searchKeywords) {
      logger.info(`\n🔍 Searching for: ${keyword}`);
      const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
@ -208,11 +215,17 @@ async function parseSkipTheDrive(options = {}) {
            // Validate job against keywords
            const fullText = `${jobData.title} ${jobData.description} ${jobData.company}`;
-            if (!containsAnyKeyword(fullText, keywords)) {
+            const keywordMatch = useAndLogic 
              ? containsAllKeywords(fullText, keywords)
              : containsAnyKeyword(fullText, keywords);
            if (!keywordMatch) {
              rejectedResults.push({
                ...jobData,
                rejected: true,
-                reason: "Keywords not found in job listing",
+                reason: useAndLogic 
                  ? "Not all keywords found in job listing" 
                  : "Keywords not found in job listing",
              });
              continue;
            }
--- a/job-search-parser/src/csv-utils.js
+++ b/job-search-parser/src/csv-utils.js
@ -44,6 +44,8 @@ function convertJobsToCsv(jobs, metadata = null) {
    "jobUrl",
    "postedDate",
    "description",
    "roleDuties",
    "jobRequirements",
    "jobType",
    "experienceLevel",
    "keyword",
--- a/job-search-parser/strategies/linkedin-jobs-strategy.js
+++ b/job-search-parser/strategies/linkedin-jobs-strategy.js
@ -10,6 +10,8 @@ const {
  validateLocationAgainstFilters,
  parseLocationFilters,
  containsAnyKeyword,
  containsAllKeywords,
  matchesKeywordGroups,
 } = require("ai-analyzer");
 /**
@ -34,6 +36,28 @@ function buildJobSearchUrl(keyword, location = "", filters = {}) {
    params.append("location", location);
  }
  // Add date filter if provided (f_TPR parameter)
  // LinkedIn uses f_TPR=r<seconds> where seconds is the time range
  if (filters.minDate) {
    try {
      const minDate = new Date(filters.minDate);
      const now = new Date();
      const secondsDiff = Math.floor((now - minDate) / 1000);
      // LinkedIn supports relative timeframes (f_TPR parameter)
      // If date is in the future, don't add filter
      if (secondsDiff > 0) {
        // LinkedIn typically supports up to ~30 days (2592000 seconds)
        // For dates older than 30 days, we'll still add it but LinkedIn may limit results
        const maxSeconds = 2592000; // 30 days
        const timeRange = Math.min(secondsDiff, maxSeconds);
        params.append("f_TPR", `r${timeRange}`);
      }
    } catch (error) {
      logger.warning(`⚠️  Invalid date format for minDate: ${filters.minDate}. Expected format: YYYY-MM-DD`);
    }
  }
  // Add additional filters
  if (filters.experienceLevel) {
    params.append("f_E", filters.experienceLevel);
@ -54,10 +78,13 @@ function buildJobSearchUrl(keyword, location = "", filters = {}) {
 async function linkedinJobsStrategy(coreParser, options = {}) {
  const {
    keywords = ["software engineer", "developer"],
    keywordGroups = null, // Array of keyword groups for grouped AND/OR logic
    locationFilter = null,
    maxPages = 5,
    credentials = {},
    location = "", // LinkedIn location search (e.g., "Canada", "Toronto, Ontario, Canada")
    minDate = null, // Minimum posted date (format: YYYY-MM-DD)
    useAndLogic = false, // Use AND logic instead of OR logic for keywords
  } = options;
  const results = [];
@ -79,15 +106,39 @@ async function linkedinJobsStrategy(coreParser, options = {}) {
    logger.info("🚀 Starting LinkedIn Jobs parser...");
    logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
    if (keywordGroups) {
      logger.info(`🔗 Keyword Logic: Grouped AND/OR - ${keywordGroups.map(g => `(${g.join(' OR ')})`).join(' AND ')}`);
    } else {
      logger.info(`🔗 Keyword Logic: ${useAndLogic ? "AND (all keywords must match)" : "OR (any keyword matches)"}`);
    }
    logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
    logger.info(`🌍 LinkedIn Location: ${location || "None"}`);
    logger.info(`📄 Max Pages: ${maxPages}`);
    if (minDate) {
      logger.info(`📅 Min Date Filter: ${minDate} (jobs posted after this date)`);
    }
-    // Search for each keyword
+    // Determine search keywords based on logic type
-    for (const keyword of keywords) {
+    let searchKeywords;
    if (keywordGroups) {
      // For grouped AND/OR logic, search each keyword in each group (OR within groups)
      // We'll combine results and filter to ensure all groups match (AND between groups)
      searchKeywords = keywordGroups.flat(); // Flatten all keywords from all groups
    } else if (useAndLogic) {
      // For simple AND logic, combine all keywords into a single search query
      searchKeywords = [keywords.join(" ")];
    } else {
      // For OR logic, search each keyword separately
      searchKeywords = keywords;
    }
    // Search for each keyword (or combined keyword for AND logic)
    for (const keyword of searchKeywords) {
      logger.info(`\n🔍 Searching LinkedIn Jobs for: "${keyword}"`);
-      const searchUrl = buildJobSearchUrl(keyword, location);
+      const searchUrl = buildJobSearchUrl(keyword, location, {
        minDate: minDate,
      });
      logger.info(`🔗 Search URL: ${searchUrl}`);
      // Check if page is still valid before proceeding
@ -220,7 +271,7 @@ async function linkedinJobsStrategy(coreParser, options = {}) {
          await new Promise((resolve) => setTimeout(resolve, 2000));
          // Extract jobs from current page
-          const pageJobs = await extractJobsFromPage(page, keyword, locationFilter);
+          const pageJobs = await extractJobsFromPage(page, keyword, locationFilter, coreParser);
          logger.info(`📋 Extracted ${pageJobs.length} jobs from page ${currentPage}`);
          if (pageJobs.length === 0) {
@ -317,10 +368,35 @@ async function linkedinJobsStrategy(coreParser, options = {}) {
          }
          seenJobs.add(job.jobId);
-          // REMOVED: Keyword validation - LinkedIn already filtered by keyword in search results
+          // Validate keywords based on logic type
-          // If LinkedIn returned this job in search results, it matches the keyword.
+          if (keywordGroups) {
-          // The snippet might not contain the keyword, but the full description does.
+            // Grouped AND/OR logic: all groups must match (AND), at least one keyword per group (OR)
-          // Trust LinkedIn's search algorithm rather than re-validating against snippets.
+            const fullText = `${job.title} ${job.description} ${job.company}`;
            if (!matchesKeywordGroups(fullText, keywordGroups)) {
              rejectedResults.push({
                ...job,
                rejectionReason: "Job does not match all keyword groups",
              });
              if (process.env.DEBUG === "true") {
                logger.debug(`🔍 Rejected (grouped logic): "${job.title}" - does not match all groups`);
              }
              continue;
            }
          } else if (useAndLogic) {
            // Simple AND logic: all keywords must match
            const fullText = `${job.title} ${job.description} ${job.company}`.toLowerCase();
            if (!containsAllKeywords(fullText, keywords)) {
              rejectedResults.push({
                ...job,
                rejectionReason: "Not all keywords found in job listing",
              });
              if (process.env.DEBUG === "true") {
                logger.debug(`🔍 Rejected (AND logic): "${job.title}" - not all keywords found`);
              }
              continue;
            }
          }
          // For OR logic, trust LinkedIn's search results (already filtered)
          // Validate location if filtering enabled
          if (locationFilter) {
@ -514,7 +590,7 @@ async function scrollToLoadJobs(page) {
 /**
 * Extract jobs from current page
 */
-async function extractJobsFromPage(page, keyword, locationFilter) {
+async function extractJobsFromPage(page, keyword, locationFilter, coreParser = null) {
  const jobs = [];
  try {
@ -644,7 +720,7 @@ async function extractJobsFromPage(page, keyword, locationFilter) {
          logger.debug(`Could not scroll/hover job element ${i}: ${scrollError.message}`);
        }
-        const job = await extractJobData(jobElement, keyword);
+        const job = await extractJobData(jobElement, keyword, page, coreParser);
        if (job && (job.title || job.jobId)) {
          // Only add if we have at least a title or jobId
          jobs.push(job);
@ -671,10 +747,240 @@ async function extractJobsFromPage(page, keyword, locationFilter) {
  return jobs;
 }
 /**
 * Extract full job description from job detail page
 */
 async function extractFullJobDescription(coreParser, jobUrl) {
  try {
    if (!jobUrl) {
      return { fullDescription: "", roleDuties: "", jobRequirements: "" };
    }
    // Create a separate page for detail extraction to avoid disrupting search results
    const detailPage = await coreParser.createPage(`linkedin-job-detail-${Date.now()}`);
    try {
      // Navigate to job detail page
      await detailPage.goto(jobUrl, { waitUntil: "networkidle2", timeout: 30000 }).catch(() => {});
      await new Promise((resolve) => setTimeout(resolve, 2000)); // Wait for content to load
      const jobDetails = await detailPage.evaluate(() => {
      const details = {
        fullDescription: "",
        roleDuties: "",
        jobRequirements: "",
      };
      // Try multiple selectors for job description container
      const descriptionSelectors = [
        ".description__text",
        ".show-more-less-html__markup",
        "[class*='description__text']",
        "[class*='job-description']",
        ".jobs-description__text",
        ".jobs-box__html-content",
        "[data-test-id='job-description']",
        ".jobs-details__main-content",
        ".jobs-description-content__text",
      ];
      let descriptionElement = null;
      for (const selector of descriptionSelectors) {
        descriptionElement = document.querySelector(selector);
        if (descriptionElement) {
          break;
        }
      }
      if (descriptionElement) {
        details.fullDescription = descriptionElement.textContent?.trim() || 
                                 descriptionElement.innerText?.trim() || "";
      }
      // If we didn't find description, try to get from main content area
      if (!details.fullDescription) {
        const mainContent = document.querySelector("main") || 
                           document.querySelector("[class*='jobs-details']") ||
                           document.querySelector("[class*='job-details']");
        if (mainContent) {
          details.fullDescription = mainContent.textContent?.trim() || 
                                   mainContent.innerText?.trim() || "";
        }
      }
      return details;
      });
      // Parse duties and requirements from full description
      const parsed = parseDutiesAndRequirements(jobDetails.fullDescription);
      return {
        fullDescription: jobDetails.fullDescription,
        roleDuties: parsed.duties,
        jobRequirements: parsed.requirements,
      };
    } finally {
      // Close the detail page to free resources
      try {
        await detailPage.close();
      } catch (closeError) {
        // Ignore close errors
      }
    }
  } catch (error) {
    logger.warning(`Failed to extract full job description from ${jobUrl}: ${error.message}`);
    return { fullDescription: "", roleDuties: "", jobRequirements: "" };
  }
 }
 /**
 * Parse job description to separate role duties from job requirements
 */
 function parseDutiesAndRequirements(description) {
  if (!description || description.trim().length === 0) {
    return { duties: "", requirements: "" };
  }
  const duties = [];
  const requirements = [];
  // Common section headers that indicate duties/responsibilities
  const dutiesKeywords = [
    /responsibilities?:/i,
    /duties?:/i,
    /what you['\u2019]ll do/i,
    /key responsibilities/i,
    /your role/i,
    /position overview/i,
    /about the role/i,
    /role overview/i,
    /what we need/i,
    /you will:/i,
    /you['\u2019]ll be responsible/i,
  ];
  // Common section headers that indicate requirements/qualifications
  const requirementsKeywords = [
    /requirements?:/i,
    /qualifications?:/i,
    /must have/i,
    /required:/i,
    /what you['\u2019]ll bring/i,
    /you have:/i,
    /skills required/i,
    /minimum requirements/i,
    /preferred qualifications/i,
    /education:/i,
    /experience:/i,
    /you must have/i,
    /we['\u2019]re looking for/i,
  ];
  // Split description into sections (by common delimiters)
  const sections = description.split(/\n\s*\n|\r\n\s*\r\n/).filter(s => s.trim().length > 0);
  let currentSection = "duties"; // Default to duties
  let dutiesText = "";
  let requirementsText = "";
  for (const section of sections) {
    const sectionLower = section.toLowerCase();
    // Check if this section is about requirements
    let isRequirementsSection = false;
    for (const keyword of requirementsKeywords) {
      if (keyword.test(section)) {
        isRequirementsSection = true;
        currentSection = "requirements";
        break;
      }
    }
    // Check if this section is about duties/responsibilities
    if (!isRequirementsSection) {
      for (const keyword of dutiesKeywords) {
        if (keyword.test(section)) {
          currentSection = "duties";
          break;
        }
      }
    }
    // Add to appropriate section
    if (currentSection === "requirements") {
      requirementsText += (requirementsText ? "\n\n" : "") + section.trim();
    } else {
      dutiesText += (dutiesText ? "\n\n" : "") + section.trim();
    }
  }
  // If we couldn't split by sections, try to find bullet points or numbered lists
  if (!dutiesText && !requirementsText) {
    const lines = description.split(/\n/);
    let foundRequirementsHeader = false;
    for (let i = 0; i < lines.length; i++) {
      const line = lines[i].trim();
      if (line.length === 0) continue;
      // Check if this line is a requirements header
      for (const keyword of requirementsKeywords) {
        if (keyword.test(line)) {
          foundRequirementsHeader = true;
          break;
        }
      }
      if (foundRequirementsHeader) {
        requirementsText += (requirementsText ? "\n" : "") + line;
      } else {
        // Check if it's a duties header
        let isDutiesHeader = false;
        for (const keyword of dutiesKeywords) {
          if (keyword.test(line)) {
            isDutiesHeader = true;
            break;
          }
        }
        if (!isDutiesHeader) {
          // Add to duties if we haven't found requirements header yet
          if (!foundRequirementsHeader) {
            dutiesText += (dutiesText ? "\n" : "") + line;
          } else {
            requirementsText += (requirementsText ? "\n" : "") + line;
          }
        } else {
          dutiesText += (dutiesText ? "\n" : "") + line;
        }
      }
    }
  }
  // Fallback: if we still have nothing separated, put first 60% in duties, rest in requirements
  if (!dutiesText && !requirementsText && description) {
    const midPoint = Math.floor(description.length * 0.6);
    const lastRequirementsKeyword = description.toLowerCase().lastIndexOf("requirement");
    const lastQualificationsKeyword = description.toLowerCase().lastIndexOf("qualification");
    const splitPoint = Math.max(
      lastRequirementsKeyword > 0 ? lastRequirementsKeyword : midPoint,
      lastQualificationsKeyword > 0 ? lastQualificationsKeyword : midPoint
    );
    dutiesText = description.substring(0, splitPoint).trim();
    requirementsText = description.substring(splitPoint).trim();
  }
  return {
    duties: dutiesText.trim(),
    requirements: requirementsText.trim(),
  };
 }
 /**
 * Extract data from individual job element
 */
-async function extractJobData(jobElement, keyword) {
+async function extractJobData(jobElement, keyword, page = null, coreParser = null) {
  try {
    const jobData = await jobElement.evaluate((el) => {
      const data = {
@ -1191,6 +1497,20 @@ async function extractJobData(jobElement, keyword) {
    // Generate job ID if not found
    const jobId = jobData.jobId || `linkedin-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
    // Extract full job details if coreParser and jobUrl are provided
    let fullDetails = { fullDescription: "", roleDuties: "", jobRequirements: "" };
    if (coreParser && jobUrl) {
      try {
        fullDetails = await extractFullJobDescription(coreParser, jobUrl);
        // If we got full description, update the description field
        if (fullDetails.fullDescription) {
          jobData.description = fullDetails.fullDescription;
        }
      } catch (error) {
        logger.debug(`Could not extract full job details for ${jobUrl}: ${error.message}`);
      }
    }
    return {
      jobId,
      title,
@ -1198,7 +1518,9 @@ async function extractJobData(jobElement, keyword) {
      location: cleanText(jobData.location),
      jobUrl,
      postedDate: jobData.postedDate,
-      description: cleanText(jobData.description),
+      description: cleanText(fullDetails.fullDescription || jobData.description),
      roleDuties: cleanText(fullDetails.roleDuties),
      jobRequirements: cleanText(fullDetails.jobRequirements),
      jobType: jobData.jobType,
      experienceLevel: jobData.experienceLevel,
      keyword,
--- a/job-search-parser/strategies/skipthedrive-strategy.js
+++ b/job-search-parser/strategies/skipthedrive-strategy.js
@ -8,6 +8,8 @@ const {
  logger,
  cleanText,
  containsAnyKeyword,
  containsAllKeywords,
  matchesKeywordGroups,
  validateLocationAgainstFilters,
 } = require("ai-analyzer");
@ -34,9 +36,11 @@ function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
 async function skipthedriveStrategy(coreParser, options = {}) {
  const {
    keywords = ["software engineer", "developer", "programmer"],
    keywordGroups = null, // Array of keyword groups for grouped AND/OR logic
    locationFilter = null,
    maxPages = 5,
    jobTypes = [],
    useAndLogic = false, // Use AND logic instead of OR logic for keywords
  } = options;
  const results = [];
@ -49,11 +53,29 @@ async function skipthedriveStrategy(coreParser, options = {}) {
    logger.info("🚀 Starting SkipTheDrive parser...");
    logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
    if (keywordGroups) {
      logger.info(`🔗 Keyword Logic: Grouped AND/OR - ${keywordGroups.map(g => `(${g.join(' OR ')})`).join(' AND ')}`);
    } else {
      logger.info(`🔗 Keyword Logic: ${useAndLogic ? "AND (all keywords must match)" : "OR (any keyword matches)"}`);
    }
    logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
    logger.info(`📄 Max Pages: ${maxPages}`);
-    // Search for each keyword
+    // Determine search keywords based on logic type
-    for (const keyword of keywords) {
+    let searchKeywords;
    if (keywordGroups) {
      // For grouped AND/OR logic, search each keyword in each group (OR within groups)
      searchKeywords = keywordGroups.flat(); // Flatten all keywords from all groups
    } else if (useAndLogic) {
      // For simple AND logic, combine all keywords into a single search query
      searchKeywords = [keywords.join(" ")];
    } else {
      // For OR logic, search each keyword separately
      searchKeywords = keywords;
    }
    // Search for each keyword (or combined keyword for AND logic)
    for (const keyword of searchKeywords) {
      logger.info(`\n🔍 Searching for: ${keyword}`);
      const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
@ -92,7 +114,10 @@ async function skipthedriveStrategy(coreParser, options = {}) {
          const pageJobs = await extractJobsFromPage(
            page,
            keyword,
-            locationFilter
+            locationFilter,
            keywords,
            keywordGroups,
            useAndLogic
          );
          for (const job of pageJobs) {
@ -100,6 +125,29 @@ async function skipthedriveStrategy(coreParser, options = {}) {
            if (seenJobs.has(job.jobId)) continue;
            seenJobs.add(job.jobId);
            // Validate keywords based on logic type
            if (keywordGroups) {
              // Grouped AND/OR logic: all groups must match (AND), at least one keyword per group (OR)
              const fullText = `${job.title} ${job.description} ${job.company}`;
              if (!matchesKeywordGroups(fullText, keywordGroups)) {
                rejectedResults.push({
                  ...job,
                  rejectionReason: "Job does not match all keyword groups",
                });
                continue;
              }
            } else if (useAndLogic) {
              // Simple AND logic: all keywords must match
              const fullText = `${job.title} ${job.description} ${job.company}`.toLowerCase();
              if (!containsAllKeywords(fullText, keywords)) {
                rejectedResults.push({
                  ...job,
                  rejectionReason: "Not all keywords found in job listing",
                });
                continue;
              }
            }
            // Validate location if filtering enabled
            if (locationFilter) {
              const locationValid = validateLocationAgainstFilters(
@ -160,7 +208,7 @@ async function skipthedriveStrategy(coreParser, options = {}) {
 /**
 * Extract jobs from current page
 */
-async function extractJobsFromPage(page, keyword, locationFilter) {
+async function extractJobsFromPage(page, keyword, locationFilter, allKeywords = [], keywordGroups = null, useAndLogic = false) {
  const jobs = [];
  try {
@ -184,6 +232,147 @@ async function extractJobsFromPage(page, keyword, locationFilter) {
  return jobs;
 }
 /**
 * Parse job description to separate role duties from job requirements
 */
 function parseDutiesAndRequirements(description) {
  if (!description || description.trim().length === 0) {
    return { duties: "", requirements: "" };
  }
  // Common section headers that indicate duties/responsibilities
  const dutiesKeywords = [
    /responsibilities?:/i,
    /duties?:/i,
    /what you['\u2019]ll do/i,
    /key responsibilities/i,
    /your role/i,
    /position overview/i,
    /about the role/i,
    /role overview/i,
    /what we need/i,
    /you will:/i,
    /you['\u2019]ll be responsible/i,
  ];
  // Common section headers that indicate requirements/qualifications
  const requirementsKeywords = [
    /requirements?:/i,
    /qualifications?:/i,
    /must have/i,
    /required:/i,
    /what you['\u2019]ll bring/i,
    /you have:/i,
    /skills required/i,
    /minimum requirements/i,
    /preferred qualifications/i,
    /education:/i,
    /experience:/i,
    /you must have/i,
    /we['\u2019]re looking for/i,
  ];
  // Split description into sections (by common delimiters)
  const sections = description.split(/\n\s*\n|\r\n\s*\r\n/).filter(s => s.trim().length > 0);
  let currentSection = "duties"; // Default to duties
  let dutiesText = "";
  let requirementsText = "";
  for (const section of sections) {
    const sectionLower = section.toLowerCase();
    // Check if this section is about requirements
    let isRequirementsSection = false;
    for (const keyword of requirementsKeywords) {
      if (keyword.test(section)) {
        isRequirementsSection = true;
        currentSection = "requirements";
        break;
      }
    }
    // Check if this section is about duties/responsibilities
    if (!isRequirementsSection) {
      for (const keyword of dutiesKeywords) {
        if (keyword.test(section)) {
          currentSection = "duties";
          break;
        }
      }
    }
    // Add to appropriate section
    if (currentSection === "requirements") {
      requirementsText += (requirementsText ? "\n\n" : "") + section.trim();
    } else {
      dutiesText += (dutiesText ? "\n\n" : "") + section.trim();
    }
  }
  // If we couldn't split by sections, try to find bullet points or numbered lists
  if (!dutiesText && !requirementsText) {
    const lines = description.split(/\n/);
    let foundRequirementsHeader = false;
    for (let i = 0; i < lines.length; i++) {
      const line = lines[i].trim();
      if (line.length === 0) continue;
      // Check if this line is a requirements header
      for (const keyword of requirementsKeywords) {
        if (keyword.test(line)) {
          foundRequirementsHeader = true;
          break;
        }
      }
      if (foundRequirementsHeader) {
        requirementsText += (requirementsText ? "\n" : "") + line;
      } else {
        // Check if it's a duties header
        let isDutiesHeader = false;
        for (const keyword of dutiesKeywords) {
          if (keyword.test(line)) {
            isDutiesHeader = true;
            break;
          }
        }
        if (!isDutiesHeader) {
          // Add to duties if we haven't found requirements header yet
          if (!foundRequirementsHeader) {
            dutiesText += (dutiesText ? "\n" : "") + line;
          } else {
            requirementsText += (requirementsText ? "\n" : "") + line;
          }
        } else {
          dutiesText += (dutiesText ? "\n" : "") + line;
        }
      }
    }
  }
  // Fallback: if we still have nothing separated, put first 60% in duties, rest in requirements
  if (!dutiesText && !requirementsText && description) {
    const midPoint = Math.floor(description.length * 0.6);
    const lastRequirementsKeyword = description.toLowerCase().lastIndexOf("requirement");
    const lastQualificationsKeyword = description.toLowerCase().lastIndexOf("qualification");
    const splitPoint = Math.max(
      lastRequirementsKeyword > 0 ? lastRequirementsKeyword : midPoint,
      lastQualificationsKeyword > 0 ? lastQualificationsKeyword : midPoint
    );
    dutiesText = description.substring(0, splitPoint).trim();
    requirementsText = description.substring(splitPoint).trim();
  }
  return {
    duties: dutiesText.trim(),
    requirements: requirementsText.trim(),
  };
 }
 /**
 * Extract data from individual job element
 */
@ -242,6 +431,9 @@ async function extractJobData(jobElement, keyword) {
      }
    }
    // Parse duties and requirements from description if available
    const parsed = parseDutiesAndRequirements(description);
    return {
      jobId,
      title,
@ -252,6 +444,8 @@ async function extractJobData(jobElement, keyword) {
      dateText,
      daysAgo,
      description,
      roleDuties: parsed.duties,
      jobRequirements: parsed.requirements,
      isFeatured,
      keyword,
      extractedAt: new Date().toISOString(),