diff --git a/core-parser/index.js b/core-parser/index.js
index ae1a0e0..a721783 100644
--- a/core-parser/index.js
+++ b/core-parser/index.js
@@ -62,3 +62,5 @@ class CoreParser {
module.exports = CoreParser;
+
+
diff --git a/job-search-parser/README.md b/job-search-parser/README.md
index 487b08e..5896666 100644
--- a/job-search-parser/README.md
+++ b/job-search-parser/README.md
@@ -1,497 +1,530 @@
-# Job Search Parser - Job Market Intelligence
-
-Specialized parser for job market intelligence, tracking job postings, market trends, and competitive analysis. Focuses on tech roles and industry insights.
-
-## ๐ฏ Purpose
-
-The Job Search Parser is designed to:
-
-- **Track Job Market Trends**: Monitor demand for specific roles and skills
-- **Competitive Intelligence**: Analyze salary ranges and requirements
-- **Industry Insights**: Track hiring patterns across different sectors
-- **Skill Gap Analysis**: Identify in-demand technologies and frameworks
-- **Market Demand Forecasting**: Predict job market trends
-
-## ๐ Features
-
-### Core Functionality
-
-- **Multi-Source Aggregation**: Collect job data from multiple platforms
-- **Role-Specific Tracking**: Focus on tech roles and emerging positions
-- **Skill Analysis**: Extract and categorize required skills
-- **Salary Intelligence**: Track compensation ranges and trends
-- **Company Intelligence**: Monitor hiring companies and patterns
-
-### Advanced Features
-
-- **Market Trend Analysis**: Identify growing and declining job categories
-- **Geographic Distribution**: Track job distribution by location
-- **Experience Level Analysis**: Entry, mid, senior level tracking
-- **Remote Work Trends**: Monitor remote/hybrid work patterns
-- **Technology Stack Tracking**: Framework and tool popularity
-
-## ๐ Supported Job Sites
-
-### โ
Implemented Parsers
-
-#### SkipTheDrive Parser
-
-Remote job board specializing in work-from-home positions.
-
-**Features:**
-
-- Keyword-based job search with relevance sorting
-- Job type filtering (full-time, part-time, contract)
-- Multi-page result parsing with pagination
-- Featured/sponsored job identification
-- AI-powered job relevance analysis
-- Automatic duplicate detection
-
-**Usage:**
-
-```bash
-# Parse SkipTheDrive for QA automation jobs
-node index.js --sites=skipthedrive --keywords="automation qa,qa engineer"
-
-# Filter by job type
-JOB_TYPES="full time,contract" node index.js --sites=skipthedrive
-
-# Run demo with limited results
-node index.js --sites=skipthedrive --demo
-```
-
-### ๐ง Planned Parsers
-
-- **Indeed**: Comprehensive job aggregator
-- **Glassdoor**: Jobs with company reviews and salary data
-- **Monster**: Traditional job board
-- **SimplyHired**: Job aggregator with salary estimates
-- **LinkedIn Jobs**: Professional network job postings
-- **AngelList**: Startup and tech jobs
-- **Remote.co**: Dedicated remote work jobs
-- **FlexJobs**: Flexible and remote positions
-
-## ๐ฆ Installation
-
-```bash
-# Install dependencies
-npm install
-
-# Run tests
-npm test
-
-# Run demo
-node demo.js
-```
-
-## ๐ง Configuration
-
-### Environment Variables
-
-Create a `.env` file in the parser directory:
-
-```env
-# Job Search Configuration
-SEARCH_SOURCES=linkedin,indeed,glassdoor
-TARGET_ROLES=software engineer,data scientist,product manager
-LOCATION_FILTER=Toronto,Vancouver,Calgary
-EXPERIENCE_LEVELS=entry,mid,senior
-REMOTE_PREFERENCE=remote,hybrid,onsite
-
-# Analysis Configuration
-ENABLE_SALARY_ANALYSIS=true
-ENABLE_SKILL_ANALYSIS=true
-ENABLE_TREND_ANALYSIS=true
-MIN_SALARY=50000
-MAX_SALARY=200000
-
-# Output Configuration
-OUTPUT_FORMAT=json,csv
-SAVE_RAW_DATA=true
-ANALYSIS_INTERVAL=daily
-```
-
-### Command Line Options
-
-```bash
-# Basic usage
-node index.js
-
-# Specific roles
-node index.js --roles="frontend developer,backend developer"
-
-# Geographic focus
-node index.js --locations="Toronto,Vancouver"
-
-# Experience level
-node index.js --experience="senior"
-
-# Output format
-node index.js --output=results/job-market-analysis.json
-```
-
-**Available Options:**
-
-- `--roles="role1,role2"`: Target job roles
-- `--locations="city1,city2"`: Geographic focus
-- `--experience="entry|mid|senior"`: Experience level
-- `--remote="remote|hybrid|onsite"`: Remote work preference
-- `--salary-min=NUMBER`: Minimum salary filter
-- `--salary-max=NUMBER`: Maximum salary filter
-- `--output=FILE`: Output filename
-- `--format=json|csv`: Output format
-- `--trends`: Enable trend analysis
-- `--skills`: Enable skill analysis
-
-## ๐ Keywords
-
-### Role-Specific Keywords
-
-Place keyword CSV files in the `keywords/` directory:
-
-```
-job-search-parser/
-โโโ keywords/
-โ โโโ job-search-keywords.csv # General job search terms
-โ โโโ tech-roles.csv # Technology roles
-โ โโโ data-roles.csv # Data science roles
-โ โโโ management-roles.csv # Management positions
-โ โโโ emerging-roles.csv # Emerging job categories
-โโโ index.js
-```
-
-### Tech Roles Keywords
-
-```csv
-keyword
-software engineer
-frontend developer
-backend developer
-full stack developer
-data scientist
-machine learning engineer
-devops engineer
-site reliability engineer
-cloud architect
-security engineer
-mobile developer
-iOS developer
-Android developer
-react developer
-vue developer
-angular developer
-node.js developer
-python developer
-java developer
-golang developer
-rust developer
-data engineer
-analytics engineer
-```
-
-### Data Science Keywords
-
-```csv
-keyword
-data scientist
-machine learning engineer
-data analyst
-business analyst
-data engineer
-analytics engineer
-ML engineer
-AI engineer
-statistician
-quantitative analyst
-research scientist
-data architect
-BI developer
-ETL developer
-```
-
-## ๐ Usage Examples
-
-### Basic Job Search
-
-```bash
-# Standard job market analysis
-node index.js
-
-# Specific tech roles
-node index.js --roles="software engineer,data scientist"
-
-# Geographic focus
-node index.js --locations="Toronto,Vancouver,Calgary"
-```
-
-### Advanced Analysis
-
-```bash
-# Senior level positions
-node index.js --experience="senior" --salary-min=100000
-
-# Remote work opportunities
-node index.js --remote="remote" --roles="frontend developer"
-
-# Trend analysis
-node index.js --trends --skills --output=results/trends.json
-```
-
-### Market Intelligence
-
-```bash
-# Salary analysis
-node index.js --salary-min=80000 --salary-max=150000
-
-# Skill gap analysis
-node index.js --skills --roles="machine learning engineer"
-
-# Competitive intelligence
-node index.js --companies="Google,Microsoft,Amazon"
-```
-
-## ๐ Output Format
-
-### JSON Structure
-
-```json
-{
- "metadata": {
- "timestamp": "2024-01-15T10:30:00Z",
- "search_parameters": {
- "roles": ["software engineer", "data scientist"],
- "locations": ["Toronto", "Vancouver"],
- "experience_levels": ["mid", "senior"],
- "remote_preference": ["remote", "hybrid"]
- },
- "total_jobs_found": 1250,
- "analysis_duration_seconds": 45
- },
- "market_overview": {
- "total_jobs": 1250,
- "average_salary": 95000,
- "salary_range": {
- "min": 65000,
- "max": 180000,
- "median": 92000
- },
- "remote_distribution": {
- "remote": 45,
- "hybrid": 35,
- "onsite": 20
- },
- "experience_distribution": {
- "entry": 15,
- "mid": 45,
- "senior": 40
- }
- },
- "trends": {
- "growing_skills": [
- { "skill": "React", "growth_rate": 25 },
- { "skill": "Python", "growth_rate": 18 },
- { "skill": "AWS", "growth_rate": 22 }
- ],
- "declining_skills": [
- { "skill": "jQuery", "growth_rate": -12 },
- { "skill": "PHP", "growth_rate": -8 }
- ],
- "emerging_roles": ["AI Engineer", "DevSecOps Engineer", "Data Engineer"]
- },
- "jobs": [
- {
- "id": "job_1",
- "title": "Senior Software Engineer",
- "company": "TechCorp",
- "location": "Toronto, Ontario",
- "remote_type": "hybrid",
- "salary": {
- "min": 100000,
- "max": 140000,
- "currency": "CAD"
- },
- "required_skills": ["React", "Node.js", "TypeScript", "AWS"],
- "preferred_skills": ["GraphQL", "Docker", "Kubernetes"],
- "experience_level": "senior",
- "job_url": "https://example.com/job/1",
- "posted_date": "2024-01-10T09:00:00Z",
- "scraped_at": "2024-01-15T10:30:00Z"
- }
- ],
- "analysis": {
- "skill_demand": {
- "React": { "count": 45, "avg_salary": 98000 },
- "Python": { "count": 38, "avg_salary": 102000 },
- "AWS": { "count": 32, "avg_salary": 105000 }
- },
- "company_insights": {
- "top_hirers": [
- { "company": "TechCorp", "jobs": 25 },
- { "company": "StartupXYZ", "jobs": 18 }
- ],
- "salary_leaders": [
- { "company": "BigTech", "avg_salary": 120000 },
- { "company": "FinTech", "avg_salary": 115000 }
- ]
- }
- }
-}
-```
-
-### CSV Output
-
-The parser can also generate CSV files for easy analysis:
-
-```csv
-job_id,title,company,location,remote_type,salary_min,salary_max,required_skills,experience_level,posted_date
-job_1,Senior Software Engineer,TechCorp,Toronto,hybrid,100000,140000,"React,Node.js,TypeScript",senior,2024-01-10
-job_2,Data Scientist,DataCorp,Vancouver,remote,90000,130000,"Python,SQL,ML",mid,2024-01-09
-```
-
-## ๐ Security & Best Practices
-
-### Data Privacy
-
-- Respect job site terms of service
-- Implement appropriate rate limiting
-- Store data securely and responsibly
-- Anonymize sensitive information
-
-### Rate Limiting
-
-- Implement delays between requests
-- Respect API rate limits
-- Use multiple data sources
-- Monitor for blocking/detection
-
-### Legal Compliance
-
-- Educational and research purposes only
-- Respect website terms of service
-- Implement data retention policies
-- Monitor for legal changes
-
-## ๐งช Testing
-
-### Run Tests
-
-```bash
-# All tests
-npm test
-
-# Specific test suites
-npm test -- --testNamePattern="JobSearch"
-npm test -- --testNamePattern="Analysis"
-npm test -- --testNamePattern="Trends"
-```
-
-### Test Coverage
-
-```bash
-npm run test:coverage
-```
-
-## ๐ Performance Optimization
-
-### Recommended Settings
-
-#### Fast Analysis
-
-```bash
-node index.js --roles="software engineer" --locations="Toronto"
-```
-
-#### Comprehensive Analysis
-
-```bash
-node index.js --trends --skills --experience="all"
-```
-
-#### Focused Intelligence
-
-```bash
-node index.js --salary-min=80000 --remote="remote" --trends
-```
-
-### Performance Tips
-
-- Use specific role filters to reduce data volume
-- Implement caching for repeated searches
-- Use parallel processing for multiple sources
-- Optimize data storage and retrieval
-
-## ๐ง Troubleshooting
-
-### Common Issues
-
-#### Rate Limiting
-
-```bash
-# Reduce request frequency
-export REQUEST_DELAY=2000
-node index.js
-```
-
-#### Data Source Issues
-
-```bash
-# Use specific sources
-node index.js --sources="linkedin,indeed"
-
-# Check source availability
-node index.js --test-sources
-```
-
-#### Output Issues
-
-```bash
-# Check output directory
-mkdir -p results
-node index.js --output=results/analysis.json
-
-# Verify file permissions
-chmod 755 results/
-```
-
-## ๐ Monitoring & Analytics
-
-### Key Metrics
-
-- **Job Volume**: Total jobs found per search
-- **Salary Trends**: Average and median salary changes
-- **Skill Demand**: Most requested skills
-- **Remote Adoption**: Remote work trend analysis
-- **Market Velocity**: Job posting frequency
-
-### Dashboard Integration
-
-- Real-time market monitoring
-- Trend visualization
-- Salary benchmarking
-- Skill gap analysis
-- Competitive intelligence
-
-## ๐ค Contributing
-
-### Development Setup
-
-1. Fork the repository
-2. Create feature branch
-3. Add tests for new functionality
-4. Ensure all tests pass
-5. Submit pull request
-
-### Code Standards
-
-- Follow existing code style
-- Add JSDoc comments
-- Maintain test coverage
-- Update documentation
-
-## ๐ License
-
-This parser is part of the LinkedOut platform and follows the same licensing terms.
-
----
-
-**Note**: This tool is designed for educational and research purposes. Always respect website terms of service and implement appropriate rate limiting and ethical usage practices.
+# Job Search Parser - Job Market Intelligence
+
+Specialized parser for job market intelligence, tracking job postings, market trends, and competitive analysis. Focuses on tech roles and industry insights.
+
+## ๐ฏ Purpose
+
+The Job Search Parser is designed to:
+
+- **Track Job Market Trends**: Monitor demand for specific roles and skills
+- **Competitive Intelligence**: Analyze salary ranges and requirements
+- **Industry Insights**: Track hiring patterns across different sectors
+- **Skill Gap Analysis**: Identify in-demand technologies and frameworks
+- **Market Demand Forecasting**: Predict job market trends
+
+## ๐ Features
+
+### Core Functionality
+
+- **Multi-Source Aggregation**: Collect job data from multiple platforms
+- **Role-Specific Tracking**: Focus on tech roles and emerging positions
+- **Skill Analysis**: Extract and categorize required skills
+- **Salary Intelligence**: Track compensation ranges and trends
+- **Company Intelligence**: Monitor hiring companies and patterns
+
+### Advanced Features
+
+- **Market Trend Analysis**: Identify growing and declining job categories
+- **Geographic Distribution**: Track job distribution by location
+- **Experience Level Analysis**: Entry, mid, senior level tracking
+- **Remote Work Trends**: Monitor remote/hybrid work patterns
+- **Technology Stack Tracking**: Framework and tool popularity
+
+## ๐ Supported Job Sites
+
+### โ
Implemented Parsers
+
+#### SkipTheDrive Parser
+
+Remote job board specializing in work-from-home positions.
+
+**Features:**
+
+- Keyword-based job search with relevance sorting
+- Job type filtering (full-time, part-time, contract)
+- Multi-page result parsing with pagination
+- Featured/sponsored job identification
+- AI-powered job relevance analysis
+- Automatic duplicate detection
+
+**Usage:**
+
+```bash
+# Parse SkipTheDrive for QA automation jobs
+node index.js --sites=skipthedrive --keywords="automation qa,qa engineer"
+
+# Filter by job type
+JOB_TYPES="full time,contract" node index.js --sites=skipthedrive
+
+# Run demo with limited results
+node index.js --sites=skipthedrive --demo
+```
+
+#### LinkedIn Jobs Parser
+
+Professional network job postings with comprehensive job data.
+
+**Features:**
+
+- LinkedIn authentication support
+- Keyword-based job search
+- Location filtering (both LinkedIn location and post-extraction filter)
+- Multi-page result parsing with pagination
+- Job type and experience level extraction
+- Automatic duplicate detection
+- Infinite scroll handling
+
+**Requirements:**
+
+- LinkedIn credentials (username and password) must be set in `.env` file:
+ ```env
+ LINKEDIN_USERNAME=tatiana.litvak25@gmail.com
+ LINKEDIN_PASSWORD=Sladkiy99(
+ LINKEDIN_JOB_LOCATION=Canada # Optional: LinkedIn location filter
+ ```
+
+**Usage:**
+
+```bash
+# Search LinkedIn jobs
+node index.js --sites=linkedin --keywords="software engineer,developer"
+
+# Search with location filter
+node index.js --sites=linkedin --keywords="co-op" --location="Ontario"
+
+# Combine multiple sites
+node index.js --sites=linkedin,skipthedrive --keywords="intern,co-op"
+```
+
+### ๐ง Planned Parsers
+
+- **Indeed**: Comprehensive job aggregator
+- **Glassdoor**: Jobs with company reviews and salary data
+- **Monster**: Traditional job board
+- **SimplyHired**: Job aggregator with salary estimates
+- **AngelList**: Startup and tech jobs
+- **Remote.co**: Dedicated remote work jobs
+- **FlexJobs**: Flexible and remote positions
+
+## ๐ฆ Installation
+
+```bash
+# Install dependencies
+npm install
+
+# Run tests
+npm test
+
+# Run demo
+node demo.js
+```
+
+## ๐ง Configuration
+
+### Environment Variables
+
+Create a `.env` file in the parser directory:
+
+```env
+# Job Search Configuration
+SEARCH_KEYWORDS=software engineer,developer,programmer
+LOCATION_FILTER=Ontario,Canada
+MAX_PAGES=5
+
+# LinkedIn Configuration (required for LinkedIn jobs)
+LINKEDIN_USERNAME=your_email@example.com
+LINKEDIN_PASSWORD=your_password
+LINKEDIN_JOB_LOCATION=Canada # Optional: LinkedIn location search
+
+# Analysis Configuration
+ENABLE_AI_ANALYSIS=false
+HEADLESS=true
+
+# Output Configuration
+OUTPUT_FORMAT=json
+```
+
+### Command Line Options
+
+```bash
+# Basic usage
+node index.js
+
+# Specific roles
+node index.js --roles="frontend developer,backend developer"
+
+# Geographic focus
+node index.js --locations="Toronto,Vancouver"
+
+# Experience level
+node index.js --experience="senior"
+
+# Output format
+node index.js --output=results/job-market-analysis.json
+```
+
+**Available Options:**
+
+- `--roles="role1,role2"`: Target job roles
+- `--locations="city1,city2"`: Geographic focus
+- `--experience="entry|mid|senior"`: Experience level
+- `--remote="remote|hybrid|onsite"`: Remote work preference
+- `--salary-min=NUMBER`: Minimum salary filter
+- `--salary-max=NUMBER`: Maximum salary filter
+- `--output=FILE`: Output filename
+- `--format=json|csv`: Output format
+- `--trends`: Enable trend analysis
+- `--skills`: Enable skill analysis
+
+## ๐ Keywords
+
+### Role-Specific Keywords
+
+Place keyword CSV files in the `keywords/` directory:
+
+```
+job-search-parser/
+โโโ keywords/
+โ โโโ job-search-keywords.csv # General job search terms
+โ โโโ tech-roles.csv # Technology roles
+โ โโโ data-roles.csv # Data science roles
+โ โโโ management-roles.csv # Management positions
+โ โโโ emerging-roles.csv # Emerging job categories
+โโโ index.js
+```
+
+### Tech Roles Keywords
+
+```csv
+keyword
+software engineer
+frontend developer
+backend developer
+full stack developer
+data scientist
+machine learning engineer
+devops engineer
+site reliability engineer
+cloud architect
+security engineer
+mobile developer
+iOS developer
+Android developer
+react developer
+vue developer
+angular developer
+node.js developer
+python developer
+java developer
+golang developer
+rust developer
+data engineer
+analytics engineer
+```
+
+### Data Science Keywords
+
+```csv
+keyword
+data scientist
+machine learning engineer
+data analyst
+business analyst
+data engineer
+analytics engineer
+ML engineer
+AI engineer
+statistician
+quantitative analyst
+research scientist
+data architect
+BI developer
+ETL developer
+```
+
+## ๐ Usage Examples
+
+### Basic Job Search
+
+```bash
+# Standard job market analysis
+node index.js
+
+# Specific tech roles
+node index.js --roles="software engineer,data scientist"
+
+# Geographic focus
+node index.js --locations="Toronto,Vancouver,Calgary"
+```
+
+### Advanced Analysis
+
+```bash
+# Senior level positions
+node index.js --experience="senior" --salary-min=100000
+
+# Remote work opportunities
+node index.js --remote="remote" --roles="frontend developer"
+
+# Trend analysis
+node index.js --trends --skills --output=results/trends.json
+```
+
+### Market Intelligence
+
+```bash
+# Salary analysis
+node index.js --salary-min=80000 --salary-max=150000
+
+# Skill gap analysis
+node index.js --skills --roles="machine learning engineer"
+
+# Competitive intelligence
+node index.js --companies="Google,Microsoft,Amazon"
+```
+
+## ๐ Output Format
+
+### JSON Structure
+
+```json
+{
+ "metadata": {
+ "timestamp": "2024-01-15T10:30:00Z",
+ "search_parameters": {
+ "roles": ["software engineer", "data scientist"],
+ "locations": ["Toronto", "Vancouver"],
+ "experience_levels": ["mid", "senior"],
+ "remote_preference": ["remote", "hybrid"]
+ },
+ "total_jobs_found": 1250,
+ "analysis_duration_seconds": 45
+ },
+ "market_overview": {
+ "total_jobs": 1250,
+ "average_salary": 95000,
+ "salary_range": {
+ "min": 65000,
+ "max": 180000,
+ "median": 92000
+ },
+ "remote_distribution": {
+ "remote": 45,
+ "hybrid": 35,
+ "onsite": 20
+ },
+ "experience_distribution": {
+ "entry": 15,
+ "mid": 45,
+ "senior": 40
+ }
+ },
+ "trends": {
+ "growing_skills": [
+ { "skill": "React", "growth_rate": 25 },
+ { "skill": "Python", "growth_rate": 18 },
+ { "skill": "AWS", "growth_rate": 22 }
+ ],
+ "declining_skills": [
+ { "skill": "jQuery", "growth_rate": -12 },
+ { "skill": "PHP", "growth_rate": -8 }
+ ],
+ "emerging_roles": ["AI Engineer", "DevSecOps Engineer", "Data Engineer"]
+ },
+ "jobs": [
+ {
+ "id": "job_1",
+ "title": "Senior Software Engineer",
+ "company": "TechCorp",
+ "location": "Toronto, Ontario",
+ "remote_type": "hybrid",
+ "salary": {
+ "min": 100000,
+ "max": 140000,
+ "currency": "CAD"
+ },
+ "required_skills": ["React", "Node.js", "TypeScript", "AWS"],
+ "preferred_skills": ["GraphQL", "Docker", "Kubernetes"],
+ "experience_level": "senior",
+ "job_url": "https://example.com/job/1",
+ "posted_date": "2024-01-10T09:00:00Z",
+ "scraped_at": "2024-01-15T10:30:00Z"
+ }
+ ],
+ "analysis": {
+ "skill_demand": {
+ "React": { "count": 45, "avg_salary": 98000 },
+ "Python": { "count": 38, "avg_salary": 102000 },
+ "AWS": { "count": 32, "avg_salary": 105000 }
+ },
+ "company_insights": {
+ "top_hirers": [
+ { "company": "TechCorp", "jobs": 25 },
+ { "company": "StartupXYZ", "jobs": 18 }
+ ],
+ "salary_leaders": [
+ { "company": "BigTech", "avg_salary": 120000 },
+ { "company": "FinTech", "avg_salary": 115000 }
+ ]
+ }
+ }
+}
+```
+
+### CSV Output
+
+The parser can also generate CSV files for easy analysis:
+
+```csv
+job_id,title,company,location,remote_type,salary_min,salary_max,required_skills,experience_level,posted_date
+job_1,Senior Software Engineer,TechCorp,Toronto,hybrid,100000,140000,"React,Node.js,TypeScript",senior,2024-01-10
+job_2,Data Scientist,DataCorp,Vancouver,remote,90000,130000,"Python,SQL,ML",mid,2024-01-09
+```
+
+## ๐ Security & Best Practices
+
+### Data Privacy
+
+- Respect job site terms of service
+- Implement appropriate rate limiting
+- Store data securely and responsibly
+- Anonymize sensitive information
+
+### Rate Limiting
+
+- Implement delays between requests
+- Respect API rate limits
+- Use multiple data sources
+- Monitor for blocking/detection
+
+### Legal Compliance
+
+- Educational and research purposes only
+- Respect website terms of service
+- Implement data retention policies
+- Monitor for legal changes
+
+## ๐งช Testing
+
+### Run Tests
+
+```bash
+# All tests
+npm test
+
+# Specific test suites
+npm test -- --testNamePattern="JobSearch"
+npm test -- --testNamePattern="Analysis"
+npm test -- --testNamePattern="Trends"
+```
+
+### Test Coverage
+
+```bash
+npm run test:coverage
+```
+
+## ๐ Performance Optimization
+
+### Recommended Settings
+
+#### Fast Analysis
+
+```bash
+node index.js --roles="software engineer" --locations="Toronto"
+```
+
+#### Comprehensive Analysis
+
+```bash
+node index.js --trends --skills --experience="all"
+```
+
+#### Focused Intelligence
+
+```bash
+node index.js --salary-min=80000 --remote="remote" --trends
+```
+
+### Performance Tips
+
+- Use specific role filters to reduce data volume
+- Implement caching for repeated searches
+- Use parallel processing for multiple sources
+- Optimize data storage and retrieval
+
+## ๐ง Troubleshooting
+
+### Common Issues
+
+#### Rate Limiting
+
+```bash
+# Reduce request frequency
+export REQUEST_DELAY=2000
+node index.js
+```
+
+#### Data Source Issues
+
+```bash
+# Use specific sources
+node index.js --sources="linkedin,indeed"
+
+# Check source availability
+node index.js --test-sources
+```
+
+#### Output Issues
+
+```bash
+# Check output directory
+mkdir -p results
+node index.js --output=results/analysis.json
+
+# Verify file permissions
+chmod 755 results/
+```
+
+## ๐ Monitoring & Analytics
+
+### Key Metrics
+
+- **Job Volume**: Total jobs found per search
+- **Salary Trends**: Average and median salary changes
+- **Skill Demand**: Most requested skills
+- **Remote Adoption**: Remote work trend analysis
+- **Market Velocity**: Job posting frequency
+
+### Dashboard Integration
+
+- Real-time market monitoring
+- Trend visualization
+- Salary benchmarking
+- Skill gap analysis
+- Competitive intelligence
+
+## ๐ค Contributing
+
+### Development Setup
+
+1. Fork the repository
+2. Create feature branch
+3. Add tests for new functionality
+4. Ensure all tests pass
+5. Submit pull request
+
+### Code Standards
+
+- Follow existing code style
+- Add JSDoc comments
+- Maintain test coverage
+- Update documentation
+
+## ๐ License
+
+This parser is part of the LinkedOut platform and follows the same licensing terms.
+
+---
+
+**Note**: This tool is designed for educational and research purposes. Always respect website terms of service and implement appropriate rate limiting and ethical usage practices.
diff --git a/job-search-parser/index.js b/job-search-parser/index.js
index 64a47df..656eed9 100644
--- a/job-search-parser/index.js
+++ b/job-search-parser/index.js
@@ -10,6 +10,7 @@ const path = require("path");
const fs = require("fs");
const CoreParser = require("../core-parser");
const { skipthedriveStrategy } = require("./strategies/skipthedrive-strategy");
+const { linkedinJobsStrategy } = require("./strategies/linkedin-jobs-strategy");
const { logger, analyzeBatch, checkOllamaStatus } = require("ai-analyzer");
// Load environment variables
@@ -18,14 +19,16 @@ require("dotenv").config({ path: path.join(__dirname, ".env") });
// Configuration from environment
const HEADLESS = process.env.HEADLESS !== "false";
const SEARCH_KEYWORDS =
- process.env.SEARCH_KEYWORDS || "software engineer,developer,programmer";
+ process.env.SEARCH_KEYWORDS || "co-op,intern";//"software engineer,developer,programmer";
const LOCATION_FILTER = process.env.LOCATION_FILTER;
const ENABLE_AI_ANALYSIS = process.env.ENABLE_AI_ANALYSIS === "true";
const MAX_PAGES = parseInt(process.env.MAX_PAGES) || 5;
+const EXCLUDE_REJECTED = process.env.EXCLUDE_REJECTED === "true";
// Available site strategies
const SITE_STRATEGIES = {
skipthedrive: skipthedriveStrategy,
+ linkedin: linkedinJobsStrategy,
// Add more site strategies here
// indeed: indeedStrategy,
// glassdoor: glassdoorStrategy,
@@ -41,6 +44,7 @@ function parseArguments() {
keywords: null,
locationFilter: null,
maxPages: MAX_PAGES,
+ excludeRejected: EXCLUDE_REJECTED,
};
args.forEach((arg) => {
@@ -57,7 +61,15 @@ function parseArguments() {
} else if (arg.startsWith("--location=")) {
options.locationFilter = arg.split("=")[1];
} else if (arg.startsWith("--max-pages=")) {
- options.maxPages = parseInt(arg.split("=")[1]) || MAX_PAGES;
+ const value = arg.split("=")[1];
+ // Support "all" or "0" to mean unlimited pages
+ if (value === "all" || value === "0") {
+ options.maxPages = 0; // 0 means unlimited
+ } else {
+ options.maxPages = parseInt(value) || MAX_PAGES;
+ }
+ } else if (arg === "--no-rejected" || arg === "--exclude-rejected") {
+ options.excludeRejected = true;
}
});
@@ -84,6 +96,7 @@ async function startJobSearchParser(options = {}) {
finalOptions.keywords || SEARCH_KEYWORDS.split(",").map((k) => k.trim());
const locationFilter = finalOptions.locationFilter || LOCATION_FILTER;
const sites = finalOptions.sites;
+ const excludeRejected = finalOptions.excludeRejected !== undefined ? finalOptions.excludeRejected : EXCLUDE_REJECTED;
logger.info(`๐ฆ Selected job sites: ${sites.join(", ")}`);
logger.info(`๐ Search Keywords: ${keywords.join(", ")}`);
@@ -108,18 +121,46 @@ async function startJobSearchParser(options = {}) {
logger.step(`\n๐ Parsing ${site}...`);
const startTime = Date.now();
- const parseResult = await strategy(coreParser, {
+ // Prepare strategy options
+ const strategyOptions = {
keywords,
locationFilter,
maxPages: finalOptions.maxPages,
- });
+ };
+
+ // Add credentials for LinkedIn
+ if (site === "linkedin") {
+ const LINKEDIN_USERNAME = process.env.LINKEDIN_USERNAME;
+ const LINKEDIN_PASSWORD = process.env.LINKEDIN_PASSWORD;
+
+ if (!LINKEDIN_USERNAME || !LINKEDIN_PASSWORD) {
+ logger.error(`โ LinkedIn credentials not found. Please set LINKEDIN_USERNAME and LINKEDIN_PASSWORD in .env file`);
+ siteResults[site] = {
+ count: 0,
+ rejected: 0,
+ duration: "0s",
+ error: "LinkedIn credentials not found",
+ };
+ continue;
+ }
+
+ strategyOptions.credentials = {
+ username: LINKEDIN_USERNAME,
+ password: LINKEDIN_PASSWORD,
+ };
+ strategyOptions.location = process.env.LINKEDIN_JOB_LOCATION || "";
+ }
+
+ const parseResult = await strategy(coreParser, strategyOptions);
const { results, rejectedResults, summary } = parseResult;
const duration = ((Date.now() - startTime) / 1000).toFixed(2);
// Collect results
+ logger.info(`๐ฆ Strategy returned: ${results.length} results, ${rejectedResults.length} rejected`);
allResults.push(...results);
allRejectedResults.push(...rejectedResults);
+ logger.info(`๐ฆ Total accumulated: ${allResults.length} results, ${allRejectedResults.length} rejected`);
siteResults[site] = {
count: results.length,
@@ -162,6 +203,9 @@ async function startJobSearchParser(options = {}) {
}
// Save results
+ logger.info(`๐พ Preparing to save: ${allResults.length} results, ${allRejectedResults.length} rejected`);
+ logger.info(`๐พ EXCLUDE_REJECTED env: ${process.env.EXCLUDE_REJECTED}, excludeRejected variable: ${excludeRejected}`);
+
const outputData = {
metadata: {
extractedAt: new Date().toISOString(),
@@ -171,11 +215,21 @@ async function startJobSearchParser(options = {}) {
keywords: keywords.join(", "),
locationFilter,
analysisResults,
+ rejectedJobsExcluded: excludeRejected,
},
results: allResults,
- rejectedResults: allRejectedResults,
siteResults,
};
+
+ // Always include rejectedResults if not excluded (make it explicit, not using spread)
+ if (!excludeRejected) {
+ outputData.rejectedResults = allRejectedResults;
+ logger.info(`โ
Including ${allRejectedResults.length} rejected results in output`);
+ } else {
+ logger.info(`โญ๏ธ Excluding rejected results (EXCLUDE_REJECTED=true)`);
+ }
+
+ logger.info(`๐พ Final output: ${outputData.results.length} results, ${outputData.rejectedResults?.length || 0} rejected`);
const resultsDir = path.join(__dirname, "results");
if (!fs.existsSync(resultsDir)) {
diff --git a/job-search-parser/strategies/linkedin-jobs-strategy.js b/job-search-parser/strategies/linkedin-jobs-strategy.js
new file mode 100644
index 0000000..9cc4299
--- /dev/null
+++ b/job-search-parser/strategies/linkedin-jobs-strategy.js
@@ -0,0 +1,1360 @@
+/**
+ * LinkedIn Jobs Parsing Strategy
+ *
+ * Uses core-parser for browser management and ai-analyzer for utilities
+ */
+
+const {
+ logger,
+ cleanText,
+ validateLocationAgainstFilters,
+ parseLocationFilters,
+ containsAnyKeyword,
+} = require("ai-analyzer");
+
+/**
+ * LinkedIn Jobs URL builder
+ */
+function buildJobSearchUrl(keyword, location = "", filters = {}) {
+ const baseUrl = "https://www.linkedin.com/jobs/search/";
+
+ // Always wrap keywords in quotes to ensure exact phrase matching
+ // LinkedIn's search treats unquoted keywords as individual words (OR logic)
+ // e.g., "co-op" becomes "co" OR "op", "software engineer" becomes "software" OR "engineer"
+ // Wrapping in quotes forces LinkedIn to search for the exact phrase
+ // URLSearchParams will properly encode the quotes
+ const searchKeyword = `"${keyword}"`;
+
+ const params = new URLSearchParams({
+ keywords: searchKeyword,
+ sortBy: "DD", // Date posted (newest first)
+ });
+
+ if (location) {
+ params.append("location", location);
+ }
+
+ // Add additional filters
+ if (filters.experienceLevel) {
+ params.append("f_E", filters.experienceLevel);
+ }
+ if (filters.jobType) {
+ params.append("f_JT", filters.jobType); // F=Full-time, P=Part-time, C=Contract, T=Temporary, I=Internship
+ }
+ if (filters.remote) {
+ params.append("f_WT", "2"); // 2 = Remote
+ }
+
+ return `${baseUrl}?${params.toString()}`;
+}
+
+/**
+ * LinkedIn Jobs parsing strategy function
+ */
+async function linkedinJobsStrategy(coreParser, options = {}) {
+ const {
+ keywords = ["software engineer", "developer"],
+ locationFilter = null,
+ maxPages = 5,
+ credentials = {},
+ location = "", // LinkedIn location search (e.g., "Canada", "Toronto, Ontario, Canada")
+ } = options;
+
+ const results = [];
+ const rejectedResults = [];
+ const seenJobs = new Set();
+
+ // Create a backup to track results in case of issues
+ let resultsBackup = [];
+ let rejectedResultsBackup = [];
+
+ try {
+ // Create main page
+ const page = await coreParser.createPage("linkedin-jobs-main");
+
+ // Authenticate to LinkedIn
+ logger.info("๐ Authenticating to LinkedIn...");
+ await coreParser.authenticate("linkedin", credentials, "linkedin-jobs-main");
+ logger.info("โ
LinkedIn authentication successful");
+
+ logger.info("๐ Starting LinkedIn Jobs parser...");
+ logger.info(`๐ Keywords: ${keywords.join(", ")}`);
+ logger.info(`๐ Location Filter: ${locationFilter || "None"}`);
+ logger.info(`๐ LinkedIn Location: ${location || "None"}`);
+ logger.info(`๐ Max Pages: ${maxPages}`);
+
+ // Search for each keyword
+ for (const keyword of keywords) {
+ logger.info(`\n๐ Searching LinkedIn Jobs for: "${keyword}"`);
+
+ const searchUrl = buildJobSearchUrl(keyword, location);
+ logger.info(`๐ Search URL: ${searchUrl}`);
+
+ // Check if page is still valid before proceeding
+ try {
+ await page.evaluate(() => document.readyState).catch(() => {
+ throw new Error("Page is no longer valid - browser may have closed");
+ });
+ } catch (pageError) {
+ logger.error(`โ Page is no longer accessible: ${pageError.message}`);
+ logger.info(`โ ๏ธ Preserving ${results.length} jobs found so far`);
+ break; // Exit keyword loop if page is invalid
+ }
+
+ try {
+ // Navigate to job search results
+ await coreParser.navigateTo(searchUrl, {
+ pageId: "linkedin-jobs-main",
+ retries: 2,
+ waitUntil: "networkidle",
+ });
+
+ // Wait for page to load - reduced delay, use networkidle from navigation
+ await new Promise((resolve) => setTimeout(resolve, 2000));
+
+ // Verify we're on the right page and check what LinkedIn shows
+ const currentUrl = page.url();
+ logger.info(`๐ Current page URL: ${currentUrl}`);
+
+ // Check if LinkedIn shows any results count
+ try {
+ const resultsText = await page.evaluate(() => {
+ // Look for result count text like "Showing X results" or "X jobs"
+ const possibleTexts = [
+ document.querySelector("h1")?.textContent,
+ document.querySelector(".results-context-header__job-count")?.textContent,
+ document.querySelector("[class*='results-count']")?.textContent,
+ document.querySelector("[class*='job-count']")?.textContent,
+ ].filter(Boolean);
+ return possibleTexts.join(" | ") || "No results count found";
+ });
+ logger.info(`๐ LinkedIn results info: ${resultsText}`);
+ } catch (e) {
+ logger.debug(`Could not get results count: ${e.message}`);
+ }
+
+ // Scroll to trigger lazy loading - single scroll operation
+ try {
+ await page.evaluate(() => {
+ window.scrollTo(0, 500);
+ });
+ await new Promise((resolve) => setTimeout(resolve, 1000));
+ } catch (e) {
+ logger.debug(`Could not scroll page: ${e.message}`);
+ }
+
+ // Wait for job listings container - try multiple selectors
+ let hasResults = false;
+ const possibleSelectors = [
+ ".jobs-search-results-list",
+ ".jobs-search-results",
+ "[data-test-id='job-search-results-list']",
+ ".scaffold-layout__list-container",
+ "ul.scaffold-layout__list-container",
+ ".jobs-search__results-list",
+ "main .scaffold-layout__list",
+ ];
+
+ // Try selectors in parallel with shorter timeout
+ const selectorPromises = possibleSelectors.map(async (selector) => {
+ try {
+ await page.waitForSelector(selector, { timeout: 3000 });
+ const count = await page.$$(selector).then((elements) => elements.length);
+ if (count > 0) {
+ return { selector, count, success: true };
+ }
+ } catch (e) {
+ // Selector failed
+ }
+ return { selector, success: false };
+ });
+
+ // Wait for first successful selector
+ const selectorResults = await Promise.allSettled(selectorPromises);
+ for (const result of selectorResults) {
+ if (result.status === 'fulfilled' && result.value.success) {
+ hasResults = true;
+ logger.info(`โ
Found job results container with selector: ${result.value.selector}`);
+ break;
+ }
+ }
+
+ if (!hasResults) {
+ logger.warning(`โ ๏ธ No job results container found for keyword: ${keyword}`);
+
+ // Debug: Check what's actually on the page
+ try {
+ const pageTitle = await page.title();
+ const pageUrl = page.url();
+ logger.info(`๐ Page title: ${pageTitle}`);
+ logger.info(`๐ Page URL: ${pageUrl}`);
+
+ // Check for common LinkedIn elements
+ const hasMain = await page.$("main").then(el => el !== null).catch(() => false);
+ const hasJobsSection = await page.$("[class*='job']").then(el => el !== null).catch(() => false);
+ logger.info(`๐ Debug - Has main: ${hasMain}, Has jobs section: ${hasJobsSection}`);
+
+ // Take screenshot for debugging
+ const screenshotPath = `debug-linkedin-jobs-${keyword.replace(/\s+/g, '-')}-${Date.now()}.png`;
+ await page.screenshot({ path: screenshotPath, fullPage: true });
+ logger.info(`๐ธ Debug screenshot saved: ${screenshotPath}`);
+ } catch (e) {
+ logger.warning(`Could not capture debug info: ${e.message}`);
+ }
+
+ continue;
+ }
+
+ // LinkedIn uses pagination with a "Next" button
+ // Extract jobs from each page and navigate to next page
+ const allJobs = [];
+ let currentPage = 1;
+ const maxPagesToProcess = maxPages > 0 ? maxPages : 999; // 0 means unlimited
+
+ logger.info(`๐ Processing pages (max: ${maxPagesToProcess === 999 ? 'unlimited' : maxPagesToProcess}) for "${keyword}"...`);
+
+ while (currentPage <= maxPagesToProcess) {
+ logger.info(`๐ Processing page ${currentPage}...`);
+
+ // Wait for page to fully load
+ await new Promise((resolve) => setTimeout(resolve, 2000));
+
+ // Extract jobs from current page
+ const pageJobs = await extractJobsFromPage(page, keyword, locationFilter);
+ logger.info(`๐ Extracted ${pageJobs.length} jobs from page ${currentPage}`);
+
+ if (pageJobs.length === 0) {
+ logger.warning(`โ ๏ธ No jobs found on page ${currentPage}, stopping pagination`);
+ break;
+ }
+
+ allJobs.push(...pageJobs);
+
+ // Check if there's a next page
+ const hasNext = await hasNextPageAvailable(page);
+ if (!hasNext) {
+ logger.info(`โ
No more pages available. Total jobs extracted: ${allJobs.length}`);
+ break;
+ }
+
+ // Navigate to next page if we haven't reached maxPages
+ if (currentPage < maxPagesToProcess) {
+ logger.info(`โก๏ธ Navigating to page ${currentPage + 1}...`);
+ const navigationSuccess = await navigateToNextPage(page);
+
+ if (!navigationSuccess) {
+ logger.warning(`โ ๏ธ Failed to navigate to next page, stopping pagination`);
+ break;
+ }
+
+ currentPage++;
+
+ // Quick verification that job elements are present (navigateToNextPage already waited for them)
+ const jobCount = await page.$$eval(
+ "li[data-occludable-job-id], li.jobs-search-results__list-item, .scaffold-layout__list-item",
+ (elements) => elements.length
+ ).catch(() => 0);
+
+ if (jobCount === 0) {
+ logger.warning(`โ ๏ธ No job elements found on page ${currentPage} after navigation, stopping pagination`);
+ break;
+ }
+
+ logger.debug(`โ
Page ${currentPage} loaded with ${jobCount} job elements`);
+ } else {
+ logger.info(`๐ Reached max pages limit (${maxPagesToProcess}). Total jobs extracted: ${allJobs.length}`);
+ break;
+ }
+ }
+
+ logger.info(`๐ Extracted ${allJobs.length} total jobs across ${currentPage} page(s)`);
+
+ // Verify page is still valid after extraction
+ try {
+ await page.evaluate(() => document.readyState);
+ } catch (pageError) {
+ logger.warning(`โ ๏ธ Page became invalid after extraction, but we have ${allJobs.length} jobs extracted`);
+ }
+
+ // Log sample job data for debugging
+ if (allJobs.length > 0 && process.env.DEBUG === "true") {
+ const sampleJob = allJobs[0];
+ logger.debug(`๐ Sample job: ID=${sampleJob.jobId}, Title=${sampleJob.title}, Location=${sampleJob.location || 'N/A'}, Company=${sampleJob.company || 'N/A'}`);
+ }
+
+ let duplicateCount = 0;
+ let locationRejectedCount = 0;
+ let addedCount = 0;
+ let noJobIdCount = 0;
+
+ for (const job of allJobs) {
+ // Handle jobs without jobId - use URL as fallback identifier
+ if (!job.jobId || job.jobId === "") {
+ noJobIdCount++;
+ // Use job URL as identifier if no jobId
+ if (job.jobUrl) {
+ const urlMatch = job.jobUrl.match(/\/jobs\/view\/(\d+)/);
+ if (urlMatch) {
+ job.jobId = urlMatch[1];
+ } else {
+ // Generate a unique ID from URL
+ job.jobId = `linkedin-${job.jobUrl.replace(/[^a-zA-Z0-9]/g, '-')}`;
+ }
+ } else {
+ // No jobId and no URL - skip this job
+ logger.warning(`โ ๏ธ Job has no jobId or URL, skipping: ${job.title || 'Unknown'}`);
+ continue;
+ }
+ }
+
+ // Skip duplicates
+ if (seenJobs.has(job.jobId)) {
+ duplicateCount++;
+ if (process.env.DEBUG === "true") {
+ logger.debug(`โญ๏ธ Skipping duplicate job: ${job.jobId} - ${job.title}`);
+ }
+ continue;
+ }
+ seenJobs.add(job.jobId);
+
+ // REMOVED: Keyword validation - LinkedIn already filtered by keyword in search results
+ // If LinkedIn returned this job in search results, it matches the keyword.
+ // The snippet might not contain the keyword, but the full description does.
+ // Trust LinkedIn's search algorithm rather than re-validating against snippets.
+
+ // Validate location if filtering enabled
+ if (locationFilter) {
+ // Parse locationFilter string into array if it's a string
+ const locationFiltersArray = typeof locationFilter === 'string'
+ ? parseLocationFilters(locationFilter)
+ : locationFilter;
+
+ const locationValid = validateLocationAgainstFilters(
+ job.location,
+ locationFiltersArray
+ );
+
+ if (!locationValid.isValid) {
+ locationRejectedCount++;
+ rejectedResults.push({
+ ...job,
+ rejectionReason: locationValid.reasoning || "Location filter mismatch",
+ });
+ if (process.env.DEBUG === "true") {
+ logger.debug(`๐ Rejected location: "${job.location}" - ${locationValid.reasoning || "Location filter mismatch"}`);
+ }
+ continue;
+ }
+ }
+
+ results.push(job);
+ addedCount++;
+ }
+
+ // Backup results after each keyword processing
+ resultsBackup = [...results];
+ rejectedResultsBackup = [...rejectedResults];
+
+ logger.info(`๐ Processing complete: ${addedCount} added, ${locationRejectedCount} location-rejected, ${duplicateCount} duplicates, ${noJobIdCount} had no jobId`);
+ logger.info(`๐ Current results count: ${results.length} jobs accumulated so far`);
+ logger.info(`๐ Backup results count: ${resultsBackup.length} jobs in backup`);
+ } catch (error) {
+ logger.error(`Error processing keyword "${keyword}": ${error.message}`);
+ logger.error(`Stack: ${error.stack}`);
+ // Preserve results even if there's an error
+ logger.info(`โ ๏ธ Preserving ${results.length} jobs found before error`);
+ }
+ }
+
+ // Log results before returning
+ logger.info(`๐ Final results check: results.length=${results.length}, rejectedResults.length=${rejectedResults.length}`);
+ logger.info(`๐ Backup check: resultsBackup.length=${resultsBackup.length}, rejectedResultsBackup.length=${rejectedResultsBackup.length}`);
+
+ // If results array is empty but backup has data, use backup (defensive programming)
+ const finalResults = results.length > 0 ? results : resultsBackup;
+ const finalRejectedResults = rejectedResults.length > 0 ? rejectedResults : rejectedResultsBackup;
+
+ if (results.length === 0 && resultsBackup.length > 0) {
+ logger.warning(`โ ๏ธ Results array was empty but backup has ${resultsBackup.length} jobs - using backup!`);
+ }
+
+ if (finalResults.length > 0) {
+ logger.info(`๐ First result sample: ${JSON.stringify(finalResults[0], null, 2).substring(0, 200)}...`);
+ }
+
+ logger.info(
+ `๐ฏ LinkedIn Jobs parsing completed: ${finalResults.length} jobs found, ${finalRejectedResults.length} rejected`
+ );
+
+ // Final verification - log if results seem wrong
+ if (finalResults.length === 0 && finalRejectedResults.length === 0) {
+ logger.warning(`โ ๏ธ No jobs found or rejected - this might indicate an extraction issue`);
+ }
+
+ const returnValue = {
+ results: [...finalResults], // Create a copy to ensure we're returning the right data
+ rejectedResults: [...finalRejectedResults],
+ summary: {
+ totalJobs: finalResults.length,
+ totalRejected: finalRejectedResults.length,
+ keywords: keywords.join(", "),
+ locationFilter,
+ source: "linkedin-jobs",
+ },
+ };
+
+ logger.info(`๐ฆ Returning: ${returnValue.results.length} results, ${returnValue.rejectedResults.length} rejected`);
+ return returnValue;
+ } catch (error) {
+ logger.error(`โ LinkedIn Jobs parsing failed: ${error.message}`);
+ logger.error(`Stack: ${error.stack}`);
+ // Return whatever results we have, even if there was an error
+ logger.info(`โ ๏ธ Returning ${results.length} jobs found before fatal error`);
+ return {
+ results,
+ rejectedResults,
+ summary: {
+ totalJobs: results.length,
+ totalRejected: rejectedResults.length,
+ keywords: keywords.join(", "),
+ locationFilter,
+ source: "linkedin-jobs",
+ error: error.message,
+ },
+ };
+ }
+}
+
+/**
+ * Scroll to load more jobs (LinkedIn uses infinite scroll) - improved to load all jobs
+ */
+async function scrollToLoadJobs(page) {
+ try {
+ let previousJobCount = 0;
+ let currentJobCount = 0;
+ let scrollAttempts = 0;
+ let noChangeCount = 0; // Count how many times count hasn't changed
+ const maxScrollAttempts = 50; // Increased for large result sets
+ const maxNoChangeAttempts = 3; // Stop if count doesn't change 3 times in a row
+
+ logger.info(`๐ Starting to scroll and load jobs...`);
+
+ // Keep scrolling until no more jobs load
+ while (scrollAttempts < maxScrollAttempts) {
+ // Count current jobs
+ currentJobCount = await page.$$eval(
+ "li[data-occludable-job-id], li.jobs-search-results__list-item, .scaffold-layout__list-item",
+ (elements) => elements.length
+ ).catch(() => 0);
+
+ // If no new jobs loaded, increment no-change counter
+ if (currentJobCount === previousJobCount && scrollAttempts > 0) {
+ noChangeCount++;
+ // If count hasn't changed 3 times in a row, we're probably done
+ if (noChangeCount >= maxNoChangeAttempts) {
+ logger.info(`๐ Loaded ${currentJobCount} jobs after ${scrollAttempts} scrolls (no new jobs for ${noChangeCount} attempts)`);
+ break;
+ }
+ } else {
+ // Count changed, reset no-change counter
+ noChangeCount = 0;
+ }
+
+ previousJobCount = currentJobCount;
+
+ // Scroll down - use smooth scrolling to trigger lazy loading
+ await page.evaluate(() => {
+ window.scrollTo({
+ top: document.body.scrollHeight,
+ behavior: 'smooth'
+ });
+ });
+
+ // Wait for new content to load - LinkedIn sometimes needs more time
+ await new Promise((resolve) => setTimeout(resolve, 2500));
+
+ // Also try scrolling in smaller increments to trigger lazy loading
+ if (scrollAttempts % 3 === 0) {
+ await page.evaluate(() => {
+ window.scrollBy(0, 1000);
+ });
+ await new Promise((resolve) => setTimeout(resolve, 1000));
+ }
+
+ scrollAttempts++;
+
+ // Log progress every 5 scrolls
+ if (scrollAttempts % 5 === 0) {
+ const newCount = await page.$$eval(
+ "li[data-occludable-job-id], li.jobs-search-results__list-item, .scaffold-layout__list-item",
+ (elements) => elements.length
+ ).catch(() => 0);
+ logger.info(`๐ Scrolled ${scrollAttempts} times, loaded ${newCount} jobs so far...`);
+ }
+ }
+
+ // Final scroll to ensure everything is loaded
+ await page.evaluate(() => {
+ window.scrollTo(0, document.body.scrollHeight);
+ });
+ await new Promise((resolve) => setTimeout(resolve, 2000));
+
+ // Final count
+ const finalCount = await page.$$eval(
+ "li[data-occludable-job-id], li.jobs-search-results__list-item, .scaffold-layout__list-item",
+ (elements) => elements.length
+ ).catch(() => 0);
+ logger.info(`โ
Finished scrolling. Total jobs loaded: ${finalCount}`);
+
+ } catch (error) {
+ logger.warning(`Could not scroll page: ${error.message}`);
+ }
+}
+
+/**
+ * Extract jobs from current page
+ */
+async function extractJobsFromPage(page, keyword, locationFilter) {
+ const jobs = [];
+
+ try {
+ // LinkedIn job listings are in
with class "jobs-search-results__list"
+ // Each job is a - with class "jobs-search-results__list-item"
+ // Try multiple selectors as LinkedIn changes their structure
+ const jobSelectors = [
+ "li.jobs-search-results__list-item",
+ "li[data-occludable-job-id]",
+ ".job-card-container",
+ "[data-test-id='job-search-result']",
+ ".scaffold-layout__list-item",
+ "li.scaffold-layout__list-item",
+ "ul.scaffold-layout__list-container > li",
+ "main ul li",
+ "[class*='job-card']",
+ "[class*='job-search-result']",
+ ];
+
+ // Try selectors in parallel for faster detection
+ let jobElements = [];
+ const selectorChecks = jobSelectors.map(async (selector) => {
+ try {
+ await page.waitForSelector(selector, { timeout: 2000 }).catch(() => {});
+ const elements = await page.$$(selector);
+ if (elements.length > 0) {
+ return { selector, elements, success: true };
+ }
+ } catch (e) {
+ // Selector failed
+ }
+ return { selector, elements: [], success: false };
+ });
+
+ const selectorResults = await Promise.allSettled(selectorChecks);
+ for (const result of selectorResults) {
+ if (result.status === 'fulfilled' && result.value.success) {
+ jobElements = result.value.elements;
+ logger.info(`โ
Found ${jobElements.length} job elements using selector: ${result.value.selector}`);
+ break;
+ }
+ }
+
+ if (jobElements.length === 0) {
+ logger.warning(`โ ๏ธ No job elements found with any selector`);
+
+ // Fallback: Try to find job links directly and use their parent containers
+ try {
+ logger.info(`๐ Trying fallback: searching for job links directly...`);
+ const jobLinks = await page.$$("a[href*='/jobs/view/']");
+ if (jobLinks.length > 0) {
+ logger.info(`โ
Found ${jobLinks.length} job links using fallback method`);
+
+ // Get unique parent containers for each link
+ const seenUrls = new Set();
+ const parentElements = [];
+
+ for (const link of jobLinks) {
+ try {
+ // Extract job URL to check for duplicates
+ const href = await link.getAttribute("href");
+ if (!href || seenUrls.has(href)) continue;
+ seenUrls.add(href);
+
+ // Get parent element using evaluate
+ const parentHandle = await link.evaluateHandle((el) => {
+ return el.closest("li") || el.closest("[class*='card']") || el.closest("div") || el.parentElement;
+ });
+
+ if (parentHandle) {
+ parentElements.push(parentHandle);
+ }
+ } catch (e) {
+ // Skip if we can't process this link
+ }
+ }
+
+ if (parentElements.length > 0) {
+ jobElements = parentElements;
+ logger.info(`โ
Using ${jobElements.length} unique job elements from fallback`);
+ }
+ }
+ } catch (e) {
+ logger.warning(`Fallback method failed: ${e.message}`);
+ }
+
+ // Debug: Log what selectors we can find
+ if (jobElements.length === 0) {
+ try {
+ const allLis = await page.$$("li").then(elements => elements.length);
+ const allDivs = await page.$$("div[class*='job']").then(elements => elements.length);
+ const jobLinks = await page.$$("a[href*='/jobs/']").then(elements => elements.length);
+ logger.info(`๐ Debug - Found ${allLis}
- elements, ${allDivs} job-related divs, ${jobLinks} job links`);
+
+ // Try to find any list container
+ const listContainers = await page.$$("ul, ol").then(elements => elements.length);
+ logger.info(`๐ Debug - Found ${listContainers} list containers`);
+ } catch (e) {
+ // Ignore debug errors
+ }
+ }
+
+ if (jobElements.length === 0) {
+ return jobs;
+ }
+ }
+
+ let extractedCount = 0;
+ let failedCount = 0;
+
+ for (let i = 0; i < jobElements.length; i++) {
+ const jobElement = jobElements[i];
+ try {
+ // Scroll job into view and hover to trigger lazy loading of content
+ try {
+ await jobElement.scrollIntoViewIfNeeded();
+ await new Promise((resolve) => setTimeout(resolve, 100)); // Small delay for content to load
+
+ // Hover over the element to trigger LinkedIn's lazy loading
+ await jobElement.hover().catch(() => {
+ // If hover fails, try scrolling again
+ jobElement.scrollIntoViewIfNeeded();
+ });
+ await new Promise((resolve) => setTimeout(resolve, 200)); // Wait for content to load after hover
+ } catch (scrollError) {
+ // If scrolling/hovering fails, continue anyway - might still have data
+ logger.debug(`Could not scroll/hover job element ${i}: ${scrollError.message}`);
+ }
+
+ const job = await extractJobData(jobElement, keyword);
+ if (job && (job.title || job.jobId)) {
+ // Only add if we have at least a title or jobId
+ jobs.push(job);
+ extractedCount++;
+ } else {
+ failedCount++;
+ if (process.env.DEBUG === "true") {
+ logger.debug(`Job ${i} extraction returned empty: jobId=${job?.jobId || 'none'}, title=${job?.title || 'none'}`);
+ }
+ }
+ } catch (error) {
+ logger.warning(`Failed to extract job data for element ${i}: ${error.message}`);
+ failedCount++;
+ }
+ }
+
+ if (jobElements.length > 0) {
+ logger.info(`๐ Extraction summary: ${extractedCount} successful, ${failedCount} failed out of ${jobElements.length} job elements`);
+ }
+ } catch (error) {
+ logger.error(`Failed to extract jobs from page: ${error.message}`);
+ }
+
+ return jobs;
+}
+
+/**
+ * Extract data from individual job element
+ */
+async function extractJobData(jobElement, keyword) {
+ try {
+ const jobData = await jobElement.evaluate((el) => {
+ const data = {
+ jobId: "",
+ title: "",
+ company: "",
+ location: "",
+ jobUrl: "",
+ postedDate: "",
+ description: "",
+ jobType: "",
+ experienceLevel: "",
+ };
+
+ // Extract job ID from data-job-id or link
+ const jobIdAttr = el.getAttribute("data-job-id") ||
+ el.getAttribute("data-occludable-job-id") ||
+ el.querySelector("[data-job-id]")?.getAttribute("data-job-id");
+
+ if (jobIdAttr) {
+ data.jobId = jobIdAttr.toString();
+ }
+
+ // Extract title and URL - try multiple selectors (updated for LinkedIn's current structure)
+ const titleSelectors = [
+ "a.job-card-list__title",
+ ".job-card-list__title-link",
+ "a[data-test-id='job-title']",
+ ".base-search-card__title a",
+ "h3 a",
+ ".job-card-container__link",
+ "a[href*='/jobs/view/']",
+ ".job-card-list__title a",
+ ".base-search-card__title",
+ "h3.base-search-card__title a",
+ "[class*='job-title'] a",
+ "[class*='job-card'] a[href*='/jobs/']",
+ "a[href*='/jobs/view/'] span", // LinkedIn sometimes wraps title in span
+ "h3[class*='title'] a",
+ "h4[class*='title'] a",
+ ".job-search-card__title a",
+ ".jobs-search-results__list-item a[href*='/jobs/view/']",
+ ];
+
+ for (const selector of titleSelectors) {
+ const link = el.querySelector(selector);
+ if (link) {
+ data.jobUrl = link.getAttribute("href") || "";
+ // Extract job ID from URL if not found
+ if (!data.jobId && data.jobUrl) {
+ const match = data.jobUrl.match(/\/jobs\/view\/(\d+)/);
+ if (match) {
+ data.jobId = match[1];
+ }
+ }
+ // Try to get text from link or its children
+ data.title = link.textContent?.trim() || link.innerText?.trim() || "";
+ // If link has no text, try getting from child span or div
+ if (!data.title) {
+ const child = link.querySelector("span, div");
+ if (child) {
+ data.title = child.textContent?.trim() || child.innerText?.trim() || "";
+ }
+ }
+ if (data.title) break;
+ }
+ }
+
+ // Fallback: Get title from any link with job URL pattern
+ if (!data.title) {
+ const allLinks = el.querySelectorAll("a[href*='/jobs/view/']");
+ for (const link of allLinks) {
+ const href = link.getAttribute("href") || "";
+ if (href.includes("/jobs/view/")) {
+ data.jobUrl = href;
+ // Extract job ID from URL
+ const match = href.match(/\/jobs\/view\/(\d+)/);
+ if (match) {
+ data.jobId = match[1];
+ }
+ // Get text from link or any visible child
+ data.title = link.textContent?.trim() || link.innerText?.trim() || "";
+ if (!data.title) {
+ const visibleChild = Array.from(link.querySelectorAll("*")).find(
+ child => child.textContent?.trim() && child.offsetParent !== null
+ );
+ if (visibleChild) {
+ data.title = visibleChild.textContent?.trim() || "";
+ }
+ }
+ if (data.title) break;
+ }
+ }
+ }
+
+ // Last resort: Extract from aria-label or title attribute
+ if (!data.title) {
+ const linkWithAria = el.querySelector("a[aria-label], a[title]");
+ if (linkWithAria) {
+ data.title = linkWithAria.getAttribute("aria-label") ||
+ linkWithAria.getAttribute("title") || "";
+ if (linkWithAria.getAttribute("href")?.includes("/jobs/view/")) {
+ data.jobUrl = linkWithAria.getAttribute("href");
+ }
+ }
+ }
+
+ // Extract company name - try multiple selectors and patterns
+ const companySelectors = [
+ ".job-card-container__company-name",
+ ".job-card-container__primary-description",
+ "a[data-test-id='job-company-name']",
+ ".base-search-card__subtitle",
+ ".job-card-container__company-name-link",
+ "[class*='company-name']",
+ "[class*='job-card-container__company']",
+ ".base-search-card__subtitle-link",
+ "a[href*='/company/']",
+ "[class*='subtitle']",
+ "[class*='primary-description']",
+ ];
+
+ for (const selector of companySelectors) {
+ const companyElement = el.querySelector(selector);
+ if (companyElement) {
+ const text = companyElement.textContent?.trim() ||
+ companyElement.innerText?.trim() || "";
+ // Filter out common non-company text
+ if (text &&
+ text.length > 1 &&
+ text.length < 100 &&
+ !text.match(/^\d+\s*(minute|hour|day|week|month|year|h|d|w|ago)/i) &&
+ !text.match(/^(Full-time|Part-time|Contract|Internship|Temporary)$/i) &&
+ !text.includes(",") && // Location usually has comma, company usually doesn't
+ !text.match(/^[A-Z][a-z]+,\s*[A-Z]/)) { // Not a location pattern
+ data.company = text;
+ break;
+ }
+ }
+ }
+
+ // Fallback: Look for company link and get its text
+ if (!data.company) {
+ const companyLink = el.querySelector("a[href*='/company/']");
+ if (companyLink) {
+ const linkText = companyLink.textContent?.trim() || companyLink.innerText?.trim() || "";
+ if (linkText && linkText.length > 1 && linkText.length < 100) {
+ data.company = linkText;
+ }
+ }
+ }
+
+ // Fallback: Look for text that appears after the title but before location/metadata
+ if (!data.company) {
+ const titleElement = el.querySelector("a[href*='/jobs/view/']");
+ if (titleElement) {
+ // Get the next sibling or parent's next child
+ let current = titleElement.parentElement;
+ if (current) {
+ const siblings = Array.from(current.children);
+ const titleIndex = siblings.indexOf(titleElement);
+ // Check next few siblings
+ for (let i = titleIndex + 1; i < Math.min(titleIndex + 4, siblings.length); i++) {
+ const sibling = siblings[i];
+ const text = sibling.textContent?.trim() || sibling.innerText?.trim() || "";
+ if (text &&
+ text.length > 1 &&
+ text.length < 100 &&
+ !text.match(/^\d+\s*(minute|hour|day|week|month|year|h|d|w|ago)/i) &&
+ !text.match(/^(Full-time|Part-time|Contract|Internship|Temporary)$/i) &&
+ !text.includes(",")) {
+ data.company = text;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ // Extract location - try multiple selectors and patterns
+ const locationSelectors = [
+ ".job-card-container__metadata-item",
+ ".job-card-container__metadata-wrapper .job-card-container__metadata-item",
+ "[data-test-id='job-location']",
+ ".base-search-card__metadata",
+ ".job-card-container__metadata",
+ "[class*='metadata']",
+ "[class*='location']",
+ ];
+
+ for (const selector of locationSelectors) {
+ const locationElements = el.querySelectorAll(selector);
+ for (const locationElement of locationElements) {
+ const text = locationElement.textContent?.trim() ||
+ locationElement.innerText?.trim() || "";
+ // Check if it looks like a location (not a date or job type)
+ if (text &&
+ !text.match(/\d+\s*(minute|hour|day|week|month|year|h|d|w|ago)/i) &&
+ !text.match(/^(Full-time|Part-time|Contract|Internship|Temporary)$/i) &&
+ (text.includes(",") ||
+ text.match(/^[A-Z][a-z]+,\s*[A-Z]/) || // City, State/Province pattern
+ /(ontario|alberta|british columbia|quebec|manitoba|saskatchewan|nova scotia|new brunswick|newfoundland|toronto|vancouver|calgary|ottawa|montreal|canada|united states|usa)/i.test(text))) {
+ data.location = text;
+ break;
+ }
+ }
+ if (data.location) break;
+ }
+
+ // Fallback: Look for location link
+ if (!data.location) {
+ const locationLink = el.querySelector("a[href*='/location/']");
+ if (locationLink) {
+ const linkText = locationLink.textContent?.trim() || locationLink.innerText?.trim() || "";
+ if (linkText && linkText.length > 2) {
+ data.location = linkText;
+ }
+ }
+ }
+
+ // Fallback: Look for text patterns that look like locations
+ if (!data.location) {
+ // Get all text nodes and look for location-like patterns
+ const allText = el.innerText || el.textContent || "";
+ const lines = allText.split("\n").map(l => l.trim()).filter(l => l.length > 0);
+
+ for (const line of lines) {
+ // Skip if it's the title, company, or a date
+ if (line === data.title ||
+ line === data.company ||
+ line.match(/^\d+\s*(minute|hour|day|week|month|year|h|d|w|ago)/i) ||
+ line.match(/^(Full-time|Part-time|Contract|Internship|Temporary)$/i)) {
+ continue;
+ }
+
+ // Check if it looks like a location
+ if (line.includes(",") ||
+ line.match(/^[A-Z][a-z]+,\s*[A-Z]/) ||
+ /(ontario|alberta|british columbia|quebec|manitoba|saskatchewan|toronto|vancouver|calgary|ottawa|montreal|canada)/i.test(line)) {
+ data.location = line;
+ break;
+ }
+ }
+ }
+
+ // Extract posted date
+ const dateSelectors = [
+ "time",
+ ".job-card-container__metadata-item time",
+ "[data-test-id='job-posted-date']",
+ "time[datetime]",
+ "[class*='date']",
+ "[class*='posted']",
+ ];
+
+ for (const selector of dateSelectors) {
+ const dateElement = el.querySelector(selector);
+ if (dateElement) {
+ const datetime = dateElement.getAttribute("datetime");
+ const title = dateElement.getAttribute("title");
+ const text = dateElement.textContent?.trim() || dateElement.innerText?.trim() || "";
+
+ if (datetime) {
+ data.postedDate = datetime;
+ break;
+ } else if (title && title.match(/\d{4}-\d{2}-\d{2}/)) {
+ data.postedDate = title;
+ break;
+ } else if (text && text.match(/\d+\s*(minute|hour|day|week|month|year|h|d|w|ago)/i)) {
+ // Parse relative dates like "2 days ago"
+ const match = text.match(/(\d+)\s*(minute|hour|day|week|month|year|h|d|w)/i);
+ if (match) {
+ const amount = parseInt(match[1]);
+ const unit = match[2].toLowerCase();
+ const date = new Date();
+
+ if (unit.includes("minute") || unit === "h") {
+ date.setMinutes(date.getMinutes() - amount);
+ } else if (unit.includes("hour") || unit === "h") {
+ date.setHours(date.getHours() - amount);
+ } else if (unit.includes("day") || unit === "d") {
+ date.setDate(date.getDate() - amount);
+ } else if (unit.includes("week") || unit === "w") {
+ date.setDate(date.getDate() - (amount * 7));
+ } else if (unit.includes("month")) {
+ date.setMonth(date.getMonth() - amount);
+ } else if (unit.includes("year")) {
+ date.setFullYear(date.getFullYear() - amount);
+ }
+
+ data.postedDate = date.toISOString().split("T")[0];
+ } else {
+ data.postedDate = text;
+ }
+ break;
+ }
+ }
+ }
+
+ // Fallback: Look for date patterns in metadata text
+ if (!data.postedDate) {
+ const metadataItems = el.querySelectorAll("[class*='metadata']");
+ for (const item of metadataItems) {
+ const text = item.textContent?.trim() || item.innerText?.trim() || "";
+ if (text && text.match(/\d+\s*(minute|hour|day|week|month|year|h|d|w|ago)/i)) {
+ const match = text.match(/(\d+)\s*(minute|hour|day|week|month|year|h|d|w)/i);
+ if (match) {
+ const amount = parseInt(match[1]);
+ const unit = match[2].toLowerCase();
+ const date = new Date();
+
+ if (unit.includes("day") || unit === "d") {
+ date.setDate(date.getDate() - amount);
+ data.postedDate = date.toISOString().split("T")[0];
+ break;
+ } else if (unit.includes("week") || unit === "w") {
+ date.setDate(date.getDate() - (amount * 7));
+ data.postedDate = date.toISOString().split("T")[0];
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ // Extract job type and experience level from metadata
+ const metadataSelectors = [
+ ".job-card-container__metadata-item",
+ "[class*='metadata']",
+ "[class*='job-type']",
+ "[class*='experience']",
+ ];
+
+ for (const selector of metadataSelectors) {
+ const metadataItems = el.querySelectorAll(selector);
+ for (const item of metadataItems) {
+ const text = item.textContent?.trim() || item.innerText?.trim() || "";
+
+ // Check for job type
+ if (!data.jobType && text.match(/^(Full-time|Part-time|Contract|Internship|Temporary|Freelance)$/i)) {
+ data.jobType = text;
+ }
+
+ // Check for experience level
+ if (!data.experienceLevel && text.match(/(Entry level|Mid-Senior|Associate|Executive|Internship|Senior|Junior|Mid-level)/i)) {
+ data.experienceLevel = text;
+ }
+
+ if (data.jobType && data.experienceLevel) break;
+ }
+ if (data.jobType && data.experienceLevel) break;
+ }
+
+ // Fallback: Look in all text for job type and experience patterns
+ if (!data.jobType || !data.experienceLevel) {
+ const allText = el.innerText || el.textContent || "";
+ const words = allText.split(/\s+/);
+
+ if (!data.jobType) {
+ for (const word of words) {
+ if (word.match(/^(Full-time|Part-time|Contract|Internship|Temporary|Freelance)$/i)) {
+ data.jobType = word;
+ break;
+ }
+ }
+ }
+
+ if (!data.experienceLevel) {
+ for (let i = 0; i < words.length - 1; i++) {
+ const phrase = words.slice(i, i + 2).join(" ");
+ if (phrase.match(/(Entry level|Mid-Senior|Associate|Executive|Internship|Senior level|Junior level|Mid level)/i)) {
+ data.experienceLevel = phrase;
+ break;
+ }
+ }
+ }
+ }
+
+ // Try to get description snippet
+ const descSelectors = [
+ ".job-card-list__description",
+ ".job-card-container__description",
+ "[data-test-id='job-description']",
+ ".base-search-card__snippet",
+ "[class*='description']",
+ "[class*='snippet']",
+ "[class*='summary']",
+ ];
+
+ for (const selector of descSelectors) {
+ const descElement = el.querySelector(selector);
+ if (descElement) {
+ const text = descElement.textContent?.trim() ||
+ descElement.innerText?.trim() || "";
+ // Only use if it's substantial (more than just a few words)
+ if (text && text.length > 20) {
+ data.description = text.substring(0, 500); // Limit description length
+ break;
+ }
+ }
+ }
+
+ // Fallback: Extract description from any paragraph or div that's not title/company/location
+ if (!data.description) {
+ const allElements = el.querySelectorAll("p, div, span");
+ for (const elem of allElements) {
+ const text = elem.textContent?.trim() || elem.innerText?.trim() || "";
+ // Skip if it's title, company, location, or too short
+ if (text &&
+ text.length > 30 &&
+ text !== data.title &&
+ text !== data.company &&
+ text !== data.location &&
+ !text.match(/^\d+\s*(minute|hour|day|week|month|year|h|d|w|ago)/i) &&
+ !text.match(/^(Full-time|Part-time|Contract|Internship|Temporary)$/i)) {
+ data.description = text.substring(0, 500);
+ break;
+ }
+ }
+ }
+
+ return data;
+ });
+
+ // Clean and format
+ const title = cleanText(jobData.title);
+ let jobUrl = jobData.jobUrl || "";
+ if (jobUrl && !jobUrl.startsWith("http")) {
+ jobUrl = `https://www.linkedin.com${jobUrl}`;
+ }
+
+ // If we have no title and no jobId, try one more aggressive extraction
+ if (!jobData.jobId && !title) {
+ // Try to extract from the entire element's text content
+ try {
+ const allText = await jobElement.evaluate((el) => {
+ // Get all text, split by newlines
+ const text = el.innerText || el.textContent || "";
+ const lines = text.split("\n").map(l => l.trim()).filter(l => l.length > 0);
+
+ // Find job link
+ const jobLink = el.querySelector("a[href*='/jobs/view/']");
+ const jobUrl = jobLink?.getAttribute("href") || "";
+ const jobIdMatch = jobUrl.match(/\/jobs\/view\/(\d+)/);
+ const jobId = jobIdMatch ? jobIdMatch[1] : "";
+
+ // First non-empty line is usually the title
+ const title = lines[0] || "";
+
+ // Look for company (usually second line or contains company pattern)
+ let company = "";
+ for (const line of lines.slice(1, 5)) {
+ if (line && line.length < 100 && !line.includes(",") &&
+ !line.match(/^\d+\s*(minute|hour|day|week|month|year|h|d|w|ago)/i) &&
+ !line.match(/^(Full-time|Part-time|Contract|Internship|Temporary)$/i)) {
+ company = line;
+ break;
+ }
+ }
+
+ // Look for location (usually has comma or location keywords)
+ let location = "";
+ for (const line of lines) {
+ if (line.includes(",") ||
+ line.match(/^[A-Z][a-z]+,\s*[A-Z]/) ||
+ /(ontario|alberta|british columbia|quebec|manitoba|saskatchewan|toronto|vancouver|calgary|ottawa|montreal|canada|united states|usa)/i.test(line)) {
+ location = line;
+ break;
+ }
+ }
+
+ return { jobId, jobUrl, title, company, location };
+ });
+
+ if (allText.jobId || allText.title) {
+ // Use the extracted data
+ if (allText.jobId) jobData.jobId = allText.jobId;
+ if (allText.jobUrl) jobData.jobUrl = allText.jobUrl;
+ if (allText.title && !title) title = allText.title;
+ if (allText.company && !jobData.company) jobData.company = allText.company;
+ if (allText.location && !jobData.location) jobData.location = allText.location;
+ }
+ } catch (e) {
+ // Fallback extraction failed
+ }
+ }
+
+ // If we still have no title and no jobId, this extraction failed
+ if (!jobData.jobId && !title) {
+ logger.warning(`โ ๏ธ Job extraction failed: no jobId or title found`);
+ return null;
+ }
+
+ // Filter out jobs that have jobId but no meaningful data (title, company, or location)
+ // These are likely jobs that haven't loaded their content yet
+ if (jobData.jobId && !title && !jobData.company && !jobData.location) {
+ logger.debug(`โ ๏ธ Job ${jobData.jobId} has no extractable data (title, company, or location) - skipping`);
+ return null;
+ }
+
+ // Log if we're missing critical fields (only in debug mode to reduce noise)
+ if (process.env.DEBUG === "true") {
+ if (!title) {
+ logger.warning(`โ ๏ธ Job ${jobData.jobId} missing title`);
+ }
+ if (!jobData.company) {
+ logger.debug(`โ ๏ธ Job ${jobData.jobId || title} missing company`);
+ }
+ if (!jobData.location) {
+ logger.debug(`โ ๏ธ Job ${jobData.jobId || title} missing location`);
+ }
+ }
+
+ // Generate job ID if not found
+ const jobId = jobData.jobId || `linkedin-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
+
+ return {
+ jobId,
+ title,
+ company: cleanText(jobData.company),
+ location: cleanText(jobData.location),
+ jobUrl,
+ postedDate: jobData.postedDate,
+ description: cleanText(jobData.description),
+ jobType: jobData.jobType,
+ experienceLevel: jobData.experienceLevel,
+ keyword,
+ extractedAt: new Date().toISOString(),
+ source: "linkedin-jobs",
+ };
+ } catch (error) {
+ logger.warning(`Error extracting job data: ${error.message}`);
+ return null;
+ }
+}
+
+/**
+ * Check if next page is available
+ */
+async function hasNextPageAvailable(page) {
+ try {
+ // LinkedIn uses pagination buttons - try multiple selectors
+ const nextButtonSelectors = [
+ "button[aria-label='Next']",
+ "button[aria-label='Next page']",
+ "button[aria-label*='Next']",
+ ".artdeco-pagination__button--next",
+ "button[data-test-id='pagination-next-button']",
+ "button.pagination__button--next",
+ "button[class*='pagination'][class*='next']",
+ "li[class*='pagination'][class*='next'] button",
+ "a[aria-label='Next']",
+ "a[aria-label='Next page']",
+ ];
+
+ for (const selector of nextButtonSelectors) {
+ try {
+ const nextButton = await page.$(selector);
+ if (nextButton) {
+ // Check if button is disabled
+ const isDisabled = await nextButton.evaluate((el) => {
+ return el.hasAttribute("disabled") ||
+ el.getAttribute("aria-disabled") === "true" ||
+ el.classList.contains("disabled") ||
+ el.classList.contains("artdeco-button--disabled");
+ }).catch(() => false);
+
+ // Check if button is visible
+ const isVisible = await nextButton.isVisible().catch(() => false);
+
+ if (!isDisabled && isVisible) {
+ logger.debug(`โ
Found next page button with selector: ${selector}`);
+ return true;
+ }
+ }
+ } catch (e) {
+ // Try next selector
+ continue;
+ }
+ }
+
+ logger.debug(`โ No next page button found`);
+ return false;
+ } catch (error) {
+ logger.debug(`Error checking for next page: ${error.message}`);
+ return false;
+ }
+}
+
+/**
+ * Navigate to next page
+ */
+async function navigateToNextPage(page) {
+ try {
+ const nextButtonSelectors = [
+ "button[aria-label='Next']",
+ "button[aria-label='Next page']",
+ "button[aria-label*='Next']",
+ ".artdeco-pagination__button--next",
+ "button[data-test-id='pagination-next-button']",
+ "button.pagination__button--next",
+ "button[class*='pagination'][class*='next']",
+ "li[class*='pagination'][class*='next'] button",
+ "a[aria-label='Next']",
+ "a[aria-label='Next page']",
+ ];
+
+ for (const selector of nextButtonSelectors) {
+ try {
+ const nextButton = await page.$(selector);
+ if (nextButton) {
+ // Check if button is disabled
+ const isDisabled = await nextButton.evaluate((el) => {
+ return el.hasAttribute("disabled") ||
+ el.getAttribute("aria-disabled") === "true" ||
+ el.classList.contains("disabled") ||
+ el.classList.contains("artdeco-button--disabled");
+ }).catch(() => false);
+
+ if (!isDisabled) {
+ // Scroll button into view before clicking (minimal delay)
+ await nextButton.scrollIntoViewIfNeeded();
+ await new Promise((resolve) => setTimeout(resolve, 100));
+
+ // Click the button and wait for job elements to appear
+ // This is more efficient than waiting for fixed timeouts
+ const clickPromise = nextButton.click();
+ logger.info(`โ
Clicked next page button (selector: ${selector})`);
+
+ // Wait for click to complete
+ await clickPromise;
+
+ // Wait for job elements to appear (this is the key indicator that page loaded)
+ // Use Promise.race to wait for any of the common job element selectors
+ try {
+ await Promise.race([
+ page.waitForSelector("li[data-occludable-job-id]", { timeout: 6000 }),
+ page.waitForSelector("li.jobs-search-results__list-item", { timeout: 6000 }),
+ page.waitForSelector(".scaffold-layout__list-item", { timeout: 6000 }),
+ ]);
+
+ // Small buffer for content to fully render
+ await new Promise((resolve) => setTimeout(resolve, 300));
+ return true;
+ } catch (e) {
+ // If elements don't appear quickly, wait a bit more and check
+ logger.debug(`โ ๏ธ Job elements not detected immediately, waiting...`);
+ await new Promise((resolve) => setTimeout(resolve, 1500));
+
+ // Verify elements exist now
+ const jobCount = await page.$$eval(
+ "li[data-occludable-job-id], li.jobs-search-results__list-item, .scaffold-layout__list-item",
+ (elements) => elements.length
+ ).catch(() => 0);
+
+ if (jobCount > 0) {
+ return true;
+ } else {
+ logger.warning(`โ ๏ธ No job elements found after navigation`);
+ return false;
+ }
+ }
+ }
+ }
+ } catch (e) {
+ // Try next selector
+ continue;
+ }
+ }
+
+ logger.warning(`โ ๏ธ Could not find or click next page button`);
+ return false;
+ } catch (error) {
+ logger.warning(`Failed to navigate to next page: ${error.message}`);
+ return false;
+ }
+}
+
+module.exports = {
+ linkedinJobsStrategy,
+ buildJobSearchUrl,
+};
+
diff --git a/job-search-parser/strategies/skipthedrive-strategy.js b/job-search-parser/strategies/skipthedrive-strategy.js
index 5b39c7e..092ef09 100644
--- a/job-search-parser/strategies/skipthedrive-strategy.js
+++ b/job-search-parser/strategies/skipthedrive-strategy.js
@@ -1,302 +1,299 @@
-/**
- * SkipTheDrive Parsing Strategy
- *
- * Uses core-parser for browser management and ai-analyzer for utilities
- */
-
-const {
- logger,
- cleanText,
- containsAnyKeyword,
- validateLocationAgainstFilters,
-} = require("ai-analyzer");
-
-/**
- * SkipTheDrive URL builder
- */
-function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
- const baseUrl = "https://www.skipthedrive.com/";
- const params = new URLSearchParams({
- s: keyword,
- orderby: orderBy,
- });
-
- if (jobTypes && jobTypes.length > 0) {
- params.append("job_type", jobTypes.join(","));
- }
-
- return `${baseUrl}?${params.toString()}`;
-}
-
-/**
- * SkipTheDrive parsing strategy function
- */
-async function skipthedriveStrategy(coreParser, options = {}) {
- const {
- keywords = ["software engineer", "developer", "programmer"],
- locationFilter = null,
- maxPages = 5,
- jobTypes = [],
- } = options;
-
- const results = [];
- const rejectedResults = [];
- const seenJobs = new Set();
-
- try {
- // Create main page
- const page = await coreParser.createPage("skipthedrive-main");
-
- logger.info("๐ Starting SkipTheDrive parser...");
- logger.info(`๐ Keywords: ${keywords.join(", ")}`);
- logger.info(`๐ Location Filter: ${locationFilter || "None"}`);
- logger.info(`๐ Max Pages: ${maxPages}`);
-
- // Search for each keyword
- for (const keyword of keywords) {
- logger.info(`\n๐ Searching for: ${keyword}`);
-
- const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
-
- try {
- // Navigate to search results
- await coreParser.navigateTo(searchUrl, {
- pageId: "skipthedrive-main",
- retries: 2,
- timeout: 30000,
- });
-
- // Wait for job listings to load
- const hasResults = await coreParser
- .waitForSelector(
- "#loops-wrapper",
- {
- timeout: 5000,
- },
- "skipthedrive-main"
- )
- .catch(() => {
- logger.warning(`No results found for keyword: ${keyword}`);
- return false;
- });
-
- if (!hasResults) {
- continue;
- }
-
- // Process multiple pages
- let currentPage = 1;
- let hasNextPage = true;
-
- while (hasNextPage && currentPage <= maxPages) {
- logger.info(`๐ Processing page ${currentPage} for "${keyword}"`);
-
- // Extract jobs from current page
- const pageJobs = await extractJobsFromPage(
- page,
- keyword,
- locationFilter
- );
-
- for (const job of pageJobs) {
- // Skip duplicates
- if (seenJobs.has(job.jobId)) continue;
- seenJobs.add(job.jobId);
-
- // Validate location if filtering enabled
- if (locationFilter) {
- const locationValid = validateLocationAgainstFilters(
- job.location,
- locationFilter
- );
-
- if (!locationValid) {
- rejectedResults.push({
- ...job,
- rejectionReason: "Location filter mismatch",
- });
- continue;
- }
- }
-
- results.push(job);
- }
-
- // Check for next page
- hasNextPage = await hasNextPageAvailable(page);
- if (hasNextPage && currentPage < maxPages) {
- await navigateToNextPage(page, currentPage + 1);
- currentPage++;
-
- // Wait for new page to load
- await page.waitForTimeout(2000);
- } else {
- hasNextPage = false;
- }
- }
- } catch (error) {
- logger.error(`Error processing keyword "${keyword}": ${error.message}`);
- }
- }
-
- logger.info(
- `๐ฏ SkipTheDrive parsing completed: ${results.length} jobs found, ${rejectedResults.length} rejected`
- );
-
- return {
- results,
- rejectedResults,
- summary: {
- totalJobs: results.length,
- totalRejected: rejectedResults.length,
- keywords: keywords.join(", "),
- locationFilter,
- source: "skipthedrive",
- },
- };
- } catch (error) {
- logger.error(`โ SkipTheDrive parsing failed: ${error.message}`);
- throw error;
- }
-}
-
-/**
- * Extract jobs from current page
- */
-async function extractJobsFromPage(page, keyword, locationFilter) {
- const jobs = [];
-
- try {
- // Get all job article elements
- const jobElements = await page.$$("article.job_listing");
-
- for (const jobElement of jobElements) {
- try {
- const job = await extractJobData(jobElement, keyword);
- if (job) {
- jobs.push(job);
- }
- } catch (error) {
- logger.warning(`Failed to extract job data: ${error.message}`);
- }
- }
- } catch (error) {
- logger.error(`Failed to extract jobs from page: ${error.message}`);
- }
-
- return jobs;
-}
-
-/**
- * Extract data from individual job element
- */
-async function extractJobData(jobElement, keyword) {
- try {
- // Extract job ID
- const articleId = (await jobElement.getAttribute("id")) || "";
- const jobId = articleId ? articleId.replace("post-", "") : "";
-
- // Extract title
- const titleElement = await jobElement.$(".job_listing-title a");
- const title = titleElement
- ? cleanText(await titleElement.textContent())
- : "";
- const jobUrl = titleElement ? await titleElement.getAttribute("href") : "";
-
- // Extract company
- const companyElement = await jobElement.$(".company");
- const company = companyElement
- ? cleanText(await companyElement.textContent())
- : "";
-
- // Extract location
- const locationElement = await jobElement.$(".location");
- const location = locationElement
- ? cleanText(await locationElement.textContent())
- : "";
-
- // Extract date posted
- const dateElement = await jobElement.$(".job-date");
- const dateText = dateElement
- ? cleanText(await dateElement.textContent())
- : "";
-
- // Extract description
- const descElement = await jobElement.$(".job_listing-description");
- const description = descElement
- ? cleanText(await descElement.textContent())
- : "";
-
- // Check if featured
- const featuredElement = await jobElement.$(".featured");
- const isFeatured = featuredElement !== null;
-
- // Parse date
- let datePosted = null;
- let daysAgo = null;
-
- if (dateText) {
- const match = dateText.match(/(\d+)\s+days?\s+ago/);
- if (match) {
- daysAgo = parseInt(match[1]);
- const date = new Date();
- date.setDate(date.getDate() - daysAgo);
- datePosted = date.toISOString().split("T")[0];
- }
- }
-
- return {
- jobId,
- title,
- company,
- location,
- jobUrl,
- datePosted,
- dateText,
- daysAgo,
- description,
- isFeatured,
- keyword,
- extractedAt: new Date().toISOString(),
- source: "skipthedrive",
- };
- } catch (error) {
- logger.warning(`Error extracting job data: ${error.message}`);
- return null;
- }
-}
-
-/**
- * Check if next page is available
- */
-async function hasNextPageAvailable(page) {
- try {
- const nextButton = await page.$(".next-page");
- return nextButton !== null;
- } catch {
- return false;
- }
-}
-
-/**
- * Navigate to next page
- */
-async function navigateToNextPage(page, pageNumber) {
- try {
- const nextButton = await page.$(".next-page");
- if (nextButton) {
- await nextButton.click();
- }
- } catch (error) {
- logger.warning(
- `Failed to navigate to page ${pageNumber}: ${error.message}`
- );
- }
-}
-
-module.exports = {
- skipthedriveStrategy,
- buildSearchUrl,
- extractJobsFromPage,
- extractJobData,
-};
+/**
+ * SkipTheDrive Parsing Strategy
+ *
+ * Uses core-parser for browser management and ai-analyzer for utilities
+ */
+
+const {
+ logger,
+ cleanText,
+ containsAnyKeyword,
+ validateLocationAgainstFilters,
+} = require("ai-analyzer");
+
+/**
+ * SkipTheDrive URL builder
+ */
+function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
+ const baseUrl = "https://www.skipthedrive.com/";
+ const params = new URLSearchParams({
+ s: keyword,
+ orderby: orderBy,
+ });
+
+ if (jobTypes && jobTypes.length > 0) {
+ params.append("job_type", jobTypes.join(","));
+ }
+
+ return `${baseUrl}?${params.toString()}`;
+}
+
+/**
+ * SkipTheDrive parsing strategy function
+ */
+async function skipthedriveStrategy(coreParser, options = {}) {
+ const {
+ keywords = ["software engineer", "developer", "programmer"],
+ locationFilter = null,
+ maxPages = 5,
+ jobTypes = [],
+ } = options;
+
+ const results = [];
+ const rejectedResults = [];
+ const seenJobs = new Set();
+
+ try {
+ // Create main page
+ const page = await coreParser.createPage("skipthedrive-main");
+
+ logger.info("๐ Starting SkipTheDrive parser...");
+ logger.info(`๐ Keywords: ${keywords.join(", ")}`);
+ logger.info(`๐ Location Filter: ${locationFilter || "None"}`);
+ logger.info(`๐ Max Pages: ${maxPages}`);
+
+ // Search for each keyword
+ for (const keyword of keywords) {
+ logger.info(`\n๐ Searching for: ${keyword}`);
+
+ const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
+
+ try {
+ // Navigate to search results
+ await coreParser.navigateTo(searchUrl, {
+ pageId: "skipthedrive-main",
+ retries: 2,
+ timeout: 30000,
+ });
+
+ // Wait for job listings to load
+ const hasResults = await page
+ .waitForSelector("#loops-wrapper", {
+ timeout: 5000,
+ })
+ .then(() => true)
+ .catch(() => {
+ logger.warning(`No results found for keyword: ${keyword}`);
+ return false;
+ });
+
+ if (!hasResults) {
+ continue;
+ }
+
+ // Process multiple pages
+ let currentPage = 1;
+ let hasNextPage = true;
+
+ while (hasNextPage && currentPage <= maxPages) {
+ logger.info(`๐ Processing page ${currentPage} for "${keyword}"`);
+
+ // Extract jobs from current page
+ const pageJobs = await extractJobsFromPage(
+ page,
+ keyword,
+ locationFilter
+ );
+
+ for (const job of pageJobs) {
+ // Skip duplicates
+ if (seenJobs.has(job.jobId)) continue;
+ seenJobs.add(job.jobId);
+
+ // Validate location if filtering enabled
+ if (locationFilter) {
+ const locationValid = validateLocationAgainstFilters(
+ job.location,
+ locationFilter
+ );
+
+ if (!locationValid) {
+ rejectedResults.push({
+ ...job,
+ rejectionReason: "Location filter mismatch",
+ });
+ continue;
+ }
+ }
+
+ results.push(job);
+ }
+
+ // Check for next page
+ hasNextPage = await hasNextPageAvailable(page);
+ if (hasNextPage && currentPage < maxPages) {
+ await navigateToNextPage(page, currentPage + 1);
+ currentPage++;
+
+ // Wait for new page to load
+ await page.waitForTimeout(2000);
+ } else {
+ hasNextPage = false;
+ }
+ }
+ } catch (error) {
+ logger.error(`Error processing keyword "${keyword}": ${error.message}`);
+ }
+ }
+
+ logger.info(
+ `๐ฏ SkipTheDrive parsing completed: ${results.length} jobs found, ${rejectedResults.length} rejected`
+ );
+
+ return {
+ results,
+ rejectedResults,
+ summary: {
+ totalJobs: results.length,
+ totalRejected: rejectedResults.length,
+ keywords: keywords.join(", "),
+ locationFilter,
+ source: "skipthedrive",
+ },
+ };
+ } catch (error) {
+ logger.error(`โ SkipTheDrive parsing failed: ${error.message}`);
+ throw error;
+ }
+}
+
+/**
+ * Extract jobs from current page
+ */
+async function extractJobsFromPage(page, keyword, locationFilter) {
+ const jobs = [];
+
+ try {
+ // Get all job article elements
+ const jobElements = await page.$$("article.job_listing");
+
+ for (const jobElement of jobElements) {
+ try {
+ const job = await extractJobData(jobElement, keyword);
+ if (job) {
+ jobs.push(job);
+ }
+ } catch (error) {
+ logger.warning(`Failed to extract job data: ${error.message}`);
+ }
+ }
+ } catch (error) {
+ logger.error(`Failed to extract jobs from page: ${error.message}`);
+ }
+
+ return jobs;
+}
+
+/**
+ * Extract data from individual job element
+ */
+async function extractJobData(jobElement, keyword) {
+ try {
+ // Extract job ID
+ const articleId = (await jobElement.getAttribute("id")) || "";
+ const jobId = articleId ? articleId.replace("post-", "") : "";
+
+ // Extract title
+ const titleElement = await jobElement.$(".job_listing-title a");
+ const title = titleElement
+ ? cleanText(await titleElement.textContent())
+ : "";
+ const jobUrl = titleElement ? await titleElement.getAttribute("href") : "";
+
+ // Extract company
+ const companyElement = await jobElement.$(".company");
+ const company = companyElement
+ ? cleanText(await companyElement.textContent())
+ : "";
+
+ // Extract location
+ const locationElement = await jobElement.$(".location");
+ const location = locationElement
+ ? cleanText(await locationElement.textContent())
+ : "";
+
+ // Extract date posted
+ const dateElement = await jobElement.$(".job-date");
+ const dateText = dateElement
+ ? cleanText(await dateElement.textContent())
+ : "";
+
+ // Extract description
+ const descElement = await jobElement.$(".job_listing-description");
+ const description = descElement
+ ? cleanText(await descElement.textContent())
+ : "";
+
+ // Check if featured
+ const featuredElement = await jobElement.$(".featured");
+ const isFeatured = featuredElement !== null;
+
+ // Parse date
+ let datePosted = null;
+ let daysAgo = null;
+
+ if (dateText) {
+ const match = dateText.match(/(\d+)\s+days?\s+ago/);
+ if (match) {
+ daysAgo = parseInt(match[1]);
+ const date = new Date();
+ date.setDate(date.getDate() - daysAgo);
+ datePosted = date.toISOString().split("T")[0];
+ }
+ }
+
+ return {
+ jobId,
+ title,
+ company,
+ location,
+ jobUrl,
+ datePosted,
+ dateText,
+ daysAgo,
+ description,
+ isFeatured,
+ keyword,
+ extractedAt: new Date().toISOString(),
+ source: "skipthedrive",
+ };
+ } catch (error) {
+ logger.warning(`Error extracting job data: ${error.message}`);
+ return null;
+ }
+}
+
+/**
+ * Check if next page is available
+ */
+async function hasNextPageAvailable(page) {
+ try {
+ const nextButton = await page.$(".next-page");
+ return nextButton !== null;
+ } catch {
+ return false;
+ }
+}
+
+/**
+ * Navigate to next page
+ */
+async function navigateToNextPage(page, pageNumber) {
+ try {
+ const nextButton = await page.$(".next-page");
+ if (nextButton) {
+ await nextButton.click();
+ }
+ } catch (error) {
+ logger.warning(
+ `Failed to navigate to page ${pageNumber}: ${error.message}`
+ );
+ }
+}
+
+module.exports = {
+ skipthedriveStrategy,
+ buildSearchUrl,
+ extractJobsFromPage,
+ extractJobData,
+};
diff --git a/linkedin-parser/index.js b/linkedin-parser/index.js
index b7d6719..461f684 100644
--- a/linkedin-parser/index.js
+++ b/linkedin-parser/index.js
@@ -31,12 +31,13 @@ const LINKEDIN_USERNAME = process.env.LINKEDIN_USERNAME;
const LINKEDIN_PASSWORD = process.env.LINKEDIN_PASSWORD;
const HEADLESS = process.env.HEADLESS !== "false";
const SEARCH_KEYWORDS =
- process.env.SEARCH_KEYWORDS || "layoff,downsizing";//,job cuts";
+ process.env.SEARCH_KEYWORDS || "layoff";//,downsizing";//,job cuts";
const LOCATION_FILTER = process.env.LOCATION_FILTER;
const ENABLE_AI_ANALYSIS = process.env.ENABLE_AI_ANALYSIS !== "false";
const AI_CONTEXT = process.env.AI_CONTEXT || "job market analysis and trends";
const OLLAMA_MODEL = process.env.OLLAMA_MODEL || DEFAULT_MODEL;
const MAX_RESULTS = parseInt(process.env.MAX_RESULTS) || 50;
+const EXTRACT_LOCATION_FROM_PROFILE = process.env.EXTRACT_LOCATION_FROM_PROFILE === "true";
/**
* Main LinkedIn parser function
@@ -71,6 +72,7 @@ async function startLinkedInParser(options = {}) {
keywords,
locationFilter: LOCATION_FILTER,
maxResults: MAX_RESULTS,
+ extractLocationFromProfile: EXTRACT_LOCATION_FROM_PROFILE,
credentials: {
username: LINKEDIN_USERNAME,
password: LINKEDIN_PASSWORD,
diff --git a/linkedin-parser/strategies/linkedin-strategy.js b/linkedin-parser/strategies/linkedin-strategy.js
index dbc4da4..1a90da0 100644
--- a/linkedin-parser/strategies/linkedin-strategy.js
+++ b/linkedin-parser/strategies/linkedin-strategy.js
@@ -21,6 +21,7 @@ async function linkedinStrategy(coreParser, options = {}) {
keywords = ["layoff", "downsizing", "job cuts"],
locationFilter = null,
maxResults = 50,
+ extractLocationFromProfile = false,
credentials = {},
} = options;
@@ -106,7 +107,7 @@ async function linkedinStrategy(coreParser, options = {}) {
}
// Extract posts from current page
- const posts = await extractPostsFromPage(page, keyword);
+ const posts = await extractPostsFromPage(page, keyword, extractLocationFromProfile);
logger.info(`๐ Found ${posts.length} posts for keyword "${keyword}"`);
for (const post of posts) {
@@ -172,7 +173,7 @@ async function linkedinStrategy(coreParser, options = {}) {
/**
* Extract posts from current search results page
*/
-async function extractPostsFromPage(page, keyword) {
+async function extractPostsFromPage(page, keyword, extractLocationFromProfile = false) {
const posts = [];
try {
@@ -254,10 +255,26 @@ async function extractPostsFromPage(page, keyword) {
const post = await extractPostData(postElements[i], keyword);
if (post) {
+ // If location is missing and we're enabled to extract from profile, try to get it
+ if (!post.location && extractLocationFromProfile && post.authorUrl) {
+ try {
+ logger.debug(`๐ Location missing for post ${i + 1}, attempting to extract from profile...`);
+ const profileLocation = await extractLocationFromProfilePage(page, post.authorUrl);
+ if (profileLocation) {
+ post.location = profileLocation;
+ post.profileLocation = profileLocation;
+ logger.debug(`โ
Extracted location from profile: ${profileLocation}`);
+ }
+ } catch (error) {
+ logger.debug(`โ ๏ธ Could not extract location from profile: ${error.message}`);
+ }
+ }
+
posts.push(post);
const hasContent = post.content && post.content.length > 0;
const hasAuthor = post.authorName && post.authorName.length > 0;
- logger.debug(`โ
Extracted post ${i + 1}/${postElements.length}: ${post.postId.substring(0, 20)}... (content: ${hasContent ? 'yes' : 'no'}, author: ${hasAuthor ? 'yes' : 'no'})`);
+ const hasLocation = post.location && post.location.length > 0;
+ logger.debug(`โ
Extracted post ${i + 1}/${postElements.length}: ${post.postId.substring(0, 20)}... (content: ${hasContent ? 'yes' : 'no'}, author: ${hasAuthor ? 'yes' : 'no'}, location: ${hasLocation ? 'yes' : 'no'})`);
} else {
logger.debug(`โญ๏ธ Post ${i + 1}/${postElements.length} filtered out (no keyword match or missing data)`);
}
@@ -626,6 +643,42 @@ async function extractPostData(postElement, keyword) {
}
}
}
+
+ // Try to extract from data attributes or hidden elements
+ if (!data.location) {
+ // Check for data attributes that might contain location
+ const actorSection = el.querySelector(".feed-shared-actor");
+ if (actorSection) {
+ // Check all data attributes
+ for (const attr of actorSection.attributes) {
+ if (attr.name.startsWith("data-") && attr.value) {
+ const value = attr.value.toLowerCase();
+ // Look for location-like patterns in data attributes
+ if (/(ontario|alberta|british columbia|quebec|toronto|vancouver|calgary|ottawa|montreal)/i.test(value)) {
+ // Try to extract the actual location text
+ const locationMatch = attr.value.match(/([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*,\s*[A-Z][a-z]+)/);
+ if (locationMatch) {
+ data.location = locationMatch[0];
+ break;
+ }
+ }
+ }
+ }
+
+ // Check for hidden spans or divs with location info
+ const hiddenElements = actorSection.querySelectorAll("span[style*='display: none'], div[style*='display: none'], [aria-hidden='true']");
+ for (const hiddenElem of hiddenElements) {
+ const text = hiddenElem.textContent || hiddenElem.getAttribute("aria-label") || "";
+ if (text && /(ontario|alberta|british columbia|quebec|toronto|vancouver)/i.test(text)) {
+ const locationMatch = text.match(/([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*,\s*[A-Z][a-z]+)/);
+ if (locationMatch) {
+ data.location = locationMatch[0].trim();
+ break;
+ }
+ }
+ }
+ }
+ }
// Extract engagement metrics - try multiple approaches
const likesSelectors = [
@@ -799,6 +852,48 @@ async function extractPostData(postElement, keyword) {
}
}
+/**
+ * Extract location from a LinkedIn profile page
+ */
+async function extractLocationFromProfilePage(page, profileUrl) {
+ try {
+ // Ensure URL is complete
+ let fullUrl = profileUrl;
+ if (!fullUrl.startsWith("http")) {
+ fullUrl = `https://www.linkedin.com${fullUrl}`;
+ }
+
+ // Remove query parameters that might cause issues
+ fullUrl = fullUrl.split("?")[0];
+
+ // Open profile in new tab
+ const profilePage = await page.context().newPage();
+
+ try {
+ await profilePage.goto(fullUrl, {
+ waitUntil: "domcontentloaded",
+ timeout: 15000,
+ });
+
+ // Wait a bit for content to load
+ await new Promise(resolve => setTimeout(resolve, 2000));
+
+ // Use the extractLocationFromProfile utility from ai-analyzer
+ const location = await extractLocationFromProfile(profilePage);
+
+ await profilePage.close();
+
+ return location;
+ } catch (error) {
+ await profilePage.close();
+ throw error;
+ }
+ } catch (error) {
+ logger.debug(`Failed to extract location from profile ${profileUrl}: ${error.message}`);
+ return "";
+ }
+}
+
/**
* Extract numbers from text (e.g., "15 likes" -> 15)
*/
diff --git a/test/ai-analyzer.test.js b/test/ai-analyzer.test.js
index 3f2bf98..ccf63d8 100644
--- a/test/ai-analyzer.test.js
+++ b/test/ai-analyzer.test.js
@@ -1,80 +1,80 @@
-const fs = require("fs");
-const assert = require("assert");
-const { analyzeSinglePost, checkOllamaStatus } = require("../ai-analyzer");
-
-console.log("AI Analyzer logic tests");
-
-const testData = JSON.parse(
- fs.readFileSync(__dirname + "/test-data.json", "utf-8")
-);
-const aiResults = testData.positive;
-const context = "job layoffs and workforce reduction";
-const model = process.env.OLLAMA_MODEL || "mistral"; // Use OLLAMA_MODEL from env or default to mistral
-
-(async () => {
- // Check if Ollama is available
- const ollamaAvailable = await checkOllamaStatus(model);
- if (!ollamaAvailable) {
- console.log("SKIP: Ollama not available - skipping AI analyzer tests");
- console.log("PASS: AI analyzer tests skipped (Ollama not running)");
- return;
- }
-
- console.log(`Testing AI analyzer with ${aiResults.length} posts...`);
-
- for (let i = 0; i < aiResults.length; i++) {
- const post = aiResults[i];
- console.log(`Testing post ${i + 1}: "${post.text.substring(0, 50)}..."`);
-
- const aiOutput = await analyzeSinglePost(post.text, context, model);
-
- // Test that the function returns the expected structure
- assert(
- typeof aiOutput === "object" && aiOutput !== null,
- `Post ${i} output is not an object`
- );
-
- assert(
- typeof aiOutput.isRelevant === "boolean",
- `Post ${i} isRelevant is not a boolean: ${typeof aiOutput.isRelevant}`
- );
-
- assert(
- typeof aiOutput.confidence === "number",
- `Post ${i} confidence is not a number: ${typeof aiOutput.confidence}`
- );
-
- assert(
- typeof aiOutput.reasoning === "string",
- `Post ${i} reasoning is not a string: ${typeof aiOutput.reasoning}`
- );
-
- // Test that confidence is within valid range
- assert(
- aiOutput.confidence >= 0 && aiOutput.confidence <= 1,
- `Post ${i} confidence out of range: ${aiOutput.confidence} (should be 0-1)`
- );
-
- // Test that reasoning exists and is not empty
- assert(
- aiOutput.reasoning && aiOutput.reasoning.length > 0,
- `Post ${i} missing or empty reasoning`
- );
-
- // Test that relevance is a boolean value
- assert(
- aiOutput.isRelevant === true || aiOutput.isRelevant === false,
- `Post ${i} isRelevant is not a valid boolean: ${aiOutput.isRelevant}`
- );
-
- console.log(
- ` โ Post ${i + 1}: relevant=${aiOutput.isRelevant}, confidence=${
- aiOutput.confidence
- }`
- );
- }
-
- console.log(
- "PASS: AI analyzer returns valid structure and values for all test posts."
- );
-})();
+const fs = require("fs");
+const assert = require("assert");
+const { analyzeSinglePost, checkOllamaStatus } = require("../ai-analyzer");
+
+console.log("AI Analyzer logic tests");
+
+const testData = JSON.parse(
+ fs.readFileSync(__dirname + "/test-data.json", "utf-8")
+);
+const aiResults = testData.positive;
+const context = "job layoffs and workforce reduction";
+const model = process.env.OLLAMA_MODEL || "mistral"; // Use OLLAMA_MODEL from env or default to mistral
+
+(async () => {
+ // Check if Ollama is available
+ const ollamaAvailable = await checkOllamaStatus(model);
+ if (!ollamaAvailable) {
+ console.log("SKIP: Ollama not available - skipping AI analyzer tests");
+ console.log("PASS: AI analyzer tests skipped (Ollama not running)");
+ return;
+ }
+
+ console.log(`Testing AI analyzer with ${aiResults.length} posts...`);
+
+ for (let i = 0; i < aiResults.length; i++) {
+ const post = aiResults[i];
+ console.log(`Testing post ${i + 1}: "${post.text.substring(0, 50)}..."`);
+
+ const aiOutput = await analyzeSinglePost(post.text, context, model);
+
+ // Test that the function returns the expected structure
+ assert(
+ typeof aiOutput === "object" && aiOutput !== null,
+ `Post ${i} output is not an object`
+ );
+
+ assert(
+ typeof aiOutput.isRelevant === "boolean",
+ `Post ${i} isRelevant is not a boolean: ${typeof aiOutput.isRelevant}`
+ );
+
+ assert(
+ typeof aiOutput.confidence === "number",
+ `Post ${i} confidence is not a number: ${typeof aiOutput.confidence}`
+ );
+
+ assert(
+ typeof aiOutput.reasoning === "string",
+ `Post ${i} reasoning is not a string: ${typeof aiOutput.reasoning}`
+ );
+
+ // Test that confidence is within valid range
+ assert(
+ aiOutput.confidence >= 0 && aiOutput.confidence <= 1,
+ `Post ${i} confidence out of range: ${aiOutput.confidence} (should be 0-1)`
+ );
+
+ // Test that reasoning exists and is not empty
+ assert(
+ aiOutput.reasoning && aiOutput.reasoning.length > 0,
+ `Post ${i} missing or empty reasoning`
+ );
+
+ // Test that relevance is a boolean value
+ assert(
+ aiOutput.isRelevant === true || aiOutput.isRelevant === false,
+ `Post ${i} isRelevant is not a valid boolean: ${aiOutput.isRelevant}`
+ );
+
+ console.log(
+ ` โ Post ${i + 1}: relevant=${aiOutput.isRelevant}, confidence=${
+ aiOutput.confidence
+ }`
+ );
+ }
+
+ console.log(
+ "PASS: AI analyzer returns valid structure and values for all test posts."
+ );
+})();