diff --git a/.gitignore b/.gitignore index 4f3efa0..f45f482 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,10 @@ +.vscode/ +*.md +!README.md node_modules/ .env results/ -linkedout.exe -linkedout-macos zip* *.7z *obfuscated.js -.history \ No newline at end of file +.history diff --git a/README.md b/README.md new file mode 100644 index 0000000..59a5651 --- /dev/null +++ b/README.md @@ -0,0 +1,247 @@ +# LinkedOut - LinkedIn Posts Scraper + +A Node.js application that automates LinkedIn login and scrapes posts containing specific keywords. The tool is designed to help track job market trends, layoffs, and open work opportunities by monitoring LinkedIn content. + +## Features + +- **Automated LinkedIn Login**: Uses Playwright to automate browser interactions +- **Keyword-based Search**: Searches for posts containing keywords from CSV files or CLI +- **Flexible Keyword Sources**: Supports multiple CSV files in `keywords/` or CLI-only mode +- **Configurable Search Parameters**: Customizable date ranges, sorting options, city, and scroll behavior +- **Duplicate Detection**: Prevents duplicate posts and profiles in results +- **Clean Text Processing**: Removes hashtags, emojis, and URLs from post content +- **Timestamped Results**: Saves results to JSON files with timestamps +- **Command-line Overrides**: Support for runtime parameter adjustments +- **Enhanced Geographic Location Validation**: Validates user locations against 200+ Canadian cities with smart matching +- **Local AI Analysis (Ollama)**: Free, private, and fast post-processing with local LLMs +- **Flexible Processing**: Disable features, run AI analysis immediately, or process results later + +## Prerequisites + +- Node.js (v14 or higher) +- Valid LinkedIn account credentials +- [Ollama](https://ollama.ai/) with a model (free, private, local AI) + +## Installation + +1. Clone the repository or download the files +2. Install dependencies: + + ```bash + npm install + ``` + +3. Copy the configuration template and customize: + + ```bash + cp env-config.example .env + ``` + +4. Edit `.env` with your settings (see Configuration section below) + +## Configuration + +### Environment Variables (.env file) + +Create a `.env` file from `env-config.example`: + +```env +# LinkedIn Credentials (Required) +LINKEDIN_USERNAME=your_email@example.com +LINKEDIN_PASSWORD=your_password + +# Basic Settings +HEADLESS=true +KEYWORDS=keywords-layoff.csv # Just the filename; always looks in keywords/ unless path is given +DATE_POSTED=past-week +SORT_BY=date_posted +CITY=Toronto +WHEELS=5 + +# Enhanced Location Filtering +LOCATION_FILTER=Ontario,Manitoba +ENABLE_LOCATION_CHECK=true + +# Local AI Analysis (Ollama) +ENABLE_LOCAL_AI=true +OLLAMA_MODEL=mistral +OLLAMA_HOST=http://localhost:11434 +RUN_LOCAL_AI_AFTER_SCRAPING=false # true = run after scraping, false = run manually +AI_CONTEXT=job layoffs and workforce reduction +AI_CONFIDENCE=0.7 +AI_BATCH_SIZE=3 +``` + +### Configuration Options + +#### Required + +- `LINKEDIN_USERNAME`: Your LinkedIn email/username +- `LINKEDIN_PASSWORD`: Your LinkedIn password + +#### Basic Settings + +- `HEADLESS`: Browser headless mode (`true`/`false`, default: `true`) +- `KEYWORDS`: CSV file name (default: `keywords-layoff.csv` in `keywords/` folder) +- `DATE_POSTED`: Filter by date (`past-24h`, `past-week`, `past-month`, or empty) +- `SORT_BY`: Sort results (`relevance` or `date_posted`) +- `CITY`: Search location (default: `Toronto`) +- `WHEELS`: Number of scrolls to load posts (default: `5`) + +#### Enhanced Location Filtering + +- `LOCATION_FILTER`: Geographic filter - supports multiple provinces/cities: + - Single: `Ontario` or `Toronto` + - Multiple: `Ontario,Manitoba` or `Toronto,Vancouver` +- `ENABLE_LOCATION_CHECK`: Enable location validation (`true`/`false`) + +#### Local AI Analysis (Ollama) + +- `ENABLE_LOCAL_AI=true`: Enable local AI analysis +- `OLLAMA_MODEL`: Model to use (`mistral`, `llama2`, `codellama`) +- `OLLAMA_HOST`: Ollama server URL (default: `http://localhost:11434`) +- `RUN_LOCAL_AI_AFTER_SCRAPING`: Run AI immediately after scraping (`true`/`false`) +- `AI_CONTEXT`: Context for analysis (e.g., `job layoffs`) +- `AI_CONFIDENCE`: Minimum confidence threshold (0.0-1.0, default: 0.7) +- `AI_BATCH_SIZE`: Posts per batch (default: 3) + +## Usage + +### Basic Commands + +```bash +# Standard scraping with configured settings +node linkedout.js + +# Visual mode (see browser) +node linkedout.js --headless=false + +# Use only these keywords (ignore CSV) +node linkedout.js --keyword="layoff,downsizing" + +# Add extra keywords to CSV/CLI list +node linkedout.js --add-keyword="hiring freeze,open to work" + +# Override city and date +node linkedout.js --city="Vancouver" --date_posted=past-month + +# Custom output file +node linkedout.js --output=results/myfile.json + +# Skip location and AI filtering (fastest) +node linkedout.js --no-location --no-ai + +# Run AI analysis immediately after scraping +node linkedout.js --ai-after + +# Show help +node linkedout.js --help +``` + +### All Command-line Options + +- `--headless=true|false`: Override browser headless mode +- `--keyword="kw1,kw2"`: Use only these keywords (comma-separated, overrides CSV) +- `--add-keyword="kw1,kw2"`: Add extra keywords to CSV/CLI list +- `--city="CityName"`: Override city +- `--date_posted=VALUE`: Override date posted (past-24h, past-week, past-month, or empty) +- `--sort_by=VALUE`: Override sort by (date_posted or relevance) +- `--location_filter=VALUE`: Override location filter +- `--output=FILE`: Output file name +- `--no-location`: Disable location filtering +- `--no-ai`: Disable AI analysis +- `--ai-after`: Run local AI analysis after scraping +- `--help, -h`: Show help message + +### Keyword Files + +- Place all keyword CSVs in the `keywords/` folder +- Example: `keywords/keywords-layoff.csv`, `keywords/keywords-open-work.csv` +- Custom CSV format: header `keyword` with one keyword per line + +### Local AI Analysis Commands + +After scraping, you can run AI analysis on the results: + +```bash +# Analyze latest results +node ai-analyzer-local.js --context="job layoffs" + +# Analyze specific file +node ai-analyzer-local.js --input=results/results-2024-01-15.json --context="hiring" + +# Use different model +node ai-analyzer-local.js --model=llama2 --context="remote work" + +# Change confidence and batch size +node ai-analyzer-local.js --context="job layoffs" --confidence=0.8 --batch-size=5 +``` + +## Workflow Examples + +### 1. Quick Start (All Features) + +```bash +node linkedout.js --ai-after +``` + +### 2. Fast Scraping Only + +```bash +node linkedout.js --no-location --no-ai +``` + +### 3. Location-Only Filtering + +```bash +node linkedout.js --no-ai +``` + +### 4. Test Different AI Contexts + +```bash +node linkedout.js --no-ai +node ai-analyzer-local.js --context="job layoffs" +node ai-analyzer-local.js --context="hiring opportunities" +node ai-analyzer-local.js --context="remote work" +``` + +## Project Structure + +``` +linkedout/ +├── .env # Your configuration (create from template) +├── env-config.example # Configuration template +├── linkedout.js # Main scraper +├── ai-analyzer-local.js # Free local AI analyzer (Ollama) +├── location-utils.js # Enhanced location utilities +├── package.json # Dependencies +├── keywords/ # All keyword CSVs go here +│ ├── keywords-layoff.csv +│ └── keywords-open-work.csv +├── results/ # Output directory +└── README.md # This documentation +``` + +## Legal & Security + +- **Credentials**: Store securely in `.env`, add to `.gitignore` +- **LinkedIn ToS**: Respect rate limits and usage guidelines +- **Privacy**: Local AI keeps all data on your machine +- **Usage**: Educational and research purposes only + +## Dependencies + +- `playwright`: Browser automation +- `dotenv`: Environment variables +- `csv-parser`: CSV file reading +- Built-in: `fs`, `path`, `child_process` + +## Support + +For issues: + +1. Check this README +2. Verify `.env` configuration +3. Test with `--headless=false` for debugging +4. Check Ollama status: `ollama list` diff --git a/ai-analyzer-local.js b/ai-analyzer-local.js new file mode 100644 index 0000000..fea3a16 --- /dev/null +++ b/ai-analyzer-local.js @@ -0,0 +1,540 @@ +#!/usr/bin/env node + +/** + * Local AI Post-Processing Analyzer for LinkedOut + * + * Uses Ollama for completely FREE local AI analysis. + * + * FEATURES: + * - Analyze LinkedOut results for context relevance (layoffs, hiring, etc.) + * - Works on latest or specified results file + * - Batch processing for speed + * - Configurable context, model, confidence, batch size + * - CLI and .env configuration + * - 100% local, private, and free + * + * USAGE: + * node ai-analyzer-local.js [options] + * + * COMMAND-LINE OPTIONS: + * --input= Input JSON file (default: latest in results/) + * --context= AI context to analyze against (required) + * --confidence= Minimum confidence threshold (0.0-1.0, default: 0.7) + * --model= Ollama model to use (default: llama2) + * --batch-size= Number of posts to process at once (default: 3) + * --output= Output file (default: adds -ai-local suffix) + * --help, -h Show this help message + * + * EXAMPLES: + * node ai-analyzer-local.js --context="job layoffs" + * node ai-analyzer-local.js --input=results/results-2024-01-15.json --context="hiring" + * node ai-analyzer-local.js --model=mistral --context="remote work" + * node ai-analyzer-local.js --context="job layoffs" --confidence=0.8 --batch-size=5 + * + * ENVIRONMENT VARIABLES (.env file): + * AI_CONTEXT, AI_CONFIDENCE, AI_BATCH_SIZE, OLLAMA_MODEL, OLLAMA_HOST + * See README for full list. + * + * OUTPUT: + * - Saves to results/ with -ai-local suffix unless --output is specified + * + * DEPENDENCIES: + * - Ollama (https://ollama.ai/) + * - Node.js built-ins: fs, path, fetch + * + * SECURITY & LEGAL: + * - All analysis is local, no data leaves your machine + * - Use responsibly for educational/research purposes + */ + +require("dotenv").config(); +const fs = require("fs"); +const path = require("path"); + +// Configuration from environment and command line +const DEFAULT_CONTEXT = + process.env.AI_CONTEXT || "job layoffs and workforce reduction"; +const DEFAULT_CONFIDENCE = parseFloat(process.env.AI_CONFIDENCE || "0.7"); +const DEFAULT_BATCH_SIZE = parseInt(process.env.AI_BATCH_SIZE || "3"); +const DEFAULT_MODEL = process.env.OLLAMA_MODEL || "llama2"; +const OLLAMA_HOST = process.env.OLLAMA_HOST || "http://localhost:11434"; + +// Parse command line arguments +const args = process.argv.slice(2); +let inputFile = null; +let context = DEFAULT_CONTEXT; +let confidenceThreshold = DEFAULT_CONFIDENCE; +let batchSize = DEFAULT_BATCH_SIZE; +let model = DEFAULT_MODEL; +let outputFile = null; + +for (const arg of args) { + if (arg.startsWith("--input=")) { + inputFile = arg.split("=")[1]; + } else if (arg.startsWith("--context=")) { + context = arg.split("=")[1]; + } else if (arg.startsWith("--confidence=")) { + confidenceThreshold = parseFloat(arg.split("=")[1]); + } else if (arg.startsWith("--batch-size=")) { + batchSize = parseInt(arg.split("=")[1]); + } else if (arg.startsWith("--model=")) { + model = arg.split("=")[1]; + } else if (arg.startsWith("--output=")) { + outputFile = arg.split("=")[1]; + } +} + +if (!context) { + console.error("❌ Error: No AI context specified"); + console.error('Use --context="your context" or set AI_CONTEXT in .env'); + process.exit(1); +} + +/** + * Check if Ollama is running and the model is available + */ +async function checkOllamaStatus() { + try { + // Check if Ollama is running + const response = await fetch(`${OLLAMA_HOST}/api/tags`); + if (!response.ok) { + throw new Error(`Ollama not running on ${OLLAMA_HOST}`); + } + + const data = await response.json(); + const availableModels = data.models.map((m) => m.name); + + console.log(`🤖 Ollama is running`); + console.log( + `📦 Available models: ${availableModels + .map((m) => m.split(":")[0]) + .join(", ")}` + ); + + // Check if requested model is available + const modelExists = availableModels.some((m) => m.startsWith(model)); + if (!modelExists) { + console.error(`❌ Model "${model}" not found`); + console.error(`💡 Install it with: ollama pull ${model}`); + console.error( + `💡 Or choose from: ${availableModels + .map((m) => m.split(":")[0]) + .join(", ")}` + ); + process.exit(1); + } + + console.log(`✅ Using model: ${model}`); + return true; + } catch (error) { + console.error("❌ Error connecting to Ollama:", error.message); + console.error("💡 Make sure Ollama is installed and running:"); + console.error(" 1. Install: https://ollama.ai/"); + console.error(" 2. Start: ollama serve"); + console.error(` 3. Install model: ollama pull ${model}`); + process.exit(1); + } +} + +/** + * Find the most recent results file if none specified + */ +function findLatestResultsFile() { + const resultsDir = "results"; + if (!fs.existsSync(resultsDir)) { + throw new Error("Results directory not found. Run the scraper first."); + } + + const files = fs + .readdirSync(resultsDir) + .filter( + (f) => + f.startsWith("results-") && f.endsWith(".json") && !f.includes("-ai-") + ) + .sort() + .reverse(); + + if (files.length === 0) { + throw new Error("No results files found. Run the scraper first."); + } + + return path.join(resultsDir, files[0]); +} + +/** + * Analyze multiple posts using local Ollama + */ +async function analyzeBatch(posts, context, model) { + console.log(`🤖 Analyzing batch of ${posts.length} posts with ${model}...`); + + try { + const prompt = `You are an expert at analyzing LinkedIn posts for relevance to specific contexts. + +CONTEXT TO MATCH: "${context}" + +Analyze these ${ + posts.length + } LinkedIn posts and determine if each relates to the context above. + +POSTS: +${posts + .map( + (post, i) => ` +POST ${i + 1}: +"${post.text.substring(0, 400)}${post.text.length > 400 ? "..." : ""}" +` + ) + .join("")} + +For each post, provide: +- Is it relevant to "${context}"? (YES/NO) +- Confidence level (0.0 to 1.0) +- Brief reasoning + +Respond in this EXACT format for each post: +POST 1: YES/NO | 0.X | brief reason +POST 2: YES/NO | 0.X | brief reason +POST 3: YES/NO | 0.X | brief reason + +Examples: +- For layoff context: "laid off 50 employees" = YES | 0.9 | mentions layoffs +- For hiring context: "we're hiring developers" = YES | 0.8 | job posting +- Unrelated content = NO | 0.1 | not relevant to context`; + + const response = await fetch(`${OLLAMA_HOST}/api/generate`, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: model, + prompt: prompt, + stream: false, + options: { + temperature: 0.3, + top_p: 0.9, + }, + }), + }); + + if (!response.ok) { + throw new Error( + `Ollama API error: ${response.status} ${response.statusText}` + ); + } + + const data = await response.json(); + const aiResponse = data.response.trim(); + + // Parse the response + const analyses = []; + const lines = aiResponse.split("\n").filter((line) => line.trim()); + + for (let i = 0; i < posts.length; i++) { + let analysis = { + postIndex: i + 1, + isRelevant: false, + confidence: 0.5, + reasoning: "Could not parse AI response", + }; + + // Look for lines that match "POST X:" pattern + const postPattern = new RegExp(`POST\\s*${i + 1}:?\\s*(.+)`, "i"); + + for (const line of lines) { + const match = line.match(postPattern); + if (match) { + const content = match[1].trim(); + + // Parse: YES/NO | 0.X | reasoning + const parts = content.split("|").map((p) => p.trim()); + + if (parts.length >= 3) { + analysis.isRelevant = parts[0].toUpperCase().includes("YES"); + analysis.confidence = Math.max( + 0, + Math.min(1, parseFloat(parts[1]) || 0.5) + ); + analysis.reasoning = parts[2] || "No reasoning provided"; + } else { + // Fallback parsing + analysis.isRelevant = + content.toUpperCase().includes("YES") || + content.toLowerCase().includes("relevant"); + analysis.confidence = 0.6; + analysis.reasoning = content.substring(0, 100); + } + break; + } + } + + analyses.push(analysis); + } + + // If we didn't get enough analyses, fill in defaults + while (analyses.length < posts.length) { + analyses.push({ + postIndex: analyses.length + 1, + isRelevant: false, + confidence: 0.3, + reasoning: "AI response parsing failed", + }); + } + + return analyses; + } catch (error) { + console.error(`❌ Error in batch AI analysis: ${error.message}`); + + // Fallback: mark all as relevant with low confidence + return posts.map((_, i) => ({ + postIndex: i + 1, + isRelevant: true, + confidence: 0.3, + reasoning: `Analysis failed: ${error.message}`, + })); + } +} + +/** + * Analyze a single post using local Ollama (fallback) + */ +async function analyzeSinglePost(text, context, model) { + const prompt = `Analyze this LinkedIn post for relevance to: "${context}" + +Post: "${text}" + +Is this post relevant to "${context}"? Provide: +1. YES or NO +2. Confidence (0.0 to 1.0) +3. Brief reason + +Format: YES/NO | 0.X | reason`; + + try { + const response = await fetch(`${OLLAMA_HOST}/api/generate`, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: model, + prompt: prompt, + stream: false, + options: { + temperature: 0.3, + }, + }), + }); + + if (!response.ok) { + throw new Error(`Ollama API error: ${response.status}`); + } + + const data = await response.json(); + const aiResponse = data.response.trim(); + + // Parse response + const parts = aiResponse.split("|").map((p) => p.trim()); + + if (parts.length >= 3) { + return { + isRelevant: parts[0].toUpperCase().includes("YES"), + confidence: Math.max(0, Math.min(1, parseFloat(parts[1]) || 0.5)), + reasoning: parts[2], + }; + } else { + // Fallback parsing + return { + isRelevant: + aiResponse.toLowerCase().includes("yes") || + aiResponse.toLowerCase().includes("relevant"), + confidence: 0.6, + reasoning: aiResponse.substring(0, 100), + }; + } + } catch (error) { + return { + isRelevant: true, // Default to include on error + confidence: 0.3, + reasoning: `Analysis failed: ${error.message}`, + }; + } +} + +/** + * Main processing function + */ +async function main() { + try { + console.log("🚀 LinkedOut Local AI Analyzer Starting..."); + console.log(`📊 Context: "${context}"`); + console.log(`🎯 Confidence Threshold: ${confidenceThreshold}`); + console.log(`📦 Batch Size: ${batchSize}`); + console.log(`🤖 Model: ${model}`); + + // Check Ollama status + await checkOllamaStatus(); + + // Determine input file + if (!inputFile) { + inputFile = findLatestResultsFile(); + console.log(`📂 Using latest results file: ${inputFile}`); + } else { + console.log(`📂 Using specified file: ${inputFile}`); + } + + // Load results + if (!fs.existsSync(inputFile)) { + throw new Error(`Input file not found: ${inputFile}`); + } + + const rawData = fs.readFileSync(inputFile, "utf-8"); + const results = JSON.parse(rawData); + + if (!Array.isArray(results) || results.length === 0) { + throw new Error("No posts found in input file"); + } + + console.log(`📋 Loaded ${results.length} posts for analysis`); + + // Process in batches + const processedResults = []; + let totalRelevant = 0; + let totalProcessed = 0; + + for (let i = 0; i < results.length; i += batchSize) { + const batch = results.slice(i, i + batchSize); + console.log( + `\n📦 Processing batch ${Math.floor(i / batchSize) + 1}/${Math.ceil( + results.length / batchSize + )} (${batch.length} posts)` + ); + + const analyses = await analyzeBatch(batch, context, model); + + // Apply analyses to posts + for (let j = 0; j < batch.length; j++) { + const post = batch[j]; + const analysis = analyses[j]; + + const enhancedPost = { + ...post, + aiRelevant: analysis.isRelevant, + aiConfidence: analysis.confidence, + aiReasoning: analysis.reasoning, + aiModel: model, + aiAnalyzedAt: new Date().toLocaleString("en-CA", { + year: "numeric", + month: "2-digit", + day: "2-digit", + hour: "2-digit", + minute: "2-digit", + second: "2-digit", + hour12: false, + }), + aiType: "local-ollama", + aiProcessed: true, + }; + + // Apply confidence threshold + if (analysis.confidence >= confidenceThreshold) { + if (analysis.isRelevant) { + processedResults.push(enhancedPost); + totalRelevant++; + } + } else { + // Include low-confidence posts but flag them + enhancedPost.lowConfidence = true; + processedResults.push(enhancedPost); + } + + totalProcessed++; + console.log( + ` ${ + analysis.isRelevant ? "✅" : "❌" + } Post ${totalProcessed}: ${analysis.confidence.toFixed( + 2 + )} confidence - ${analysis.reasoning.substring(0, 100)}...` + ); + } + + // Small delay between batches to be nice to the system + if (i + batchSize < results.length) { + console.log("⏳ Brief pause..."); + await new Promise((resolve) => setTimeout(resolve, 500)); + } + } + + // Determine output file + if (!outputFile) { + const inputBasename = path.basename(inputFile, ".json"); + const inputDir = path.dirname(inputFile); + outputFile = path.join(inputDir, `${inputBasename}-ai-local.json`); + } + + // Save results + fs.writeFileSync( + outputFile, + JSON.stringify(processedResults, null, 2), + "utf-8" + ); + + console.log("\n🎉 Local AI Analysis Complete!"); + console.log(`📊 Results:`); + console.log(` Total posts processed: ${totalProcessed}`); + console.log(` Relevant posts found: ${totalRelevant}`); + console.log(` Final results saved: ${processedResults.length}`); + console.log(`📁 Output saved to: ${outputFile}`); + console.log(`💰 Cost: $0.00 (completely free!)`); + } catch (error) { + console.error("❌ Error:", error.message); + process.exit(1); + } +} + +// Show help if requested +if (args.includes("--help") || args.includes("-h")) { + console.log(` +LinkedOut Local AI Analyzer (Ollama) + +🚀 FREE local AI analysis - No API costs, complete privacy! + +Usage: node ai-analyzer-local.js [options] + +Options: + --input= Input JSON file (default: latest in results/) + --context= AI context to analyze against (required) + --confidence= Minimum confidence threshold (0.0-1.0, default: 0.7) + --model= Ollama model to use (default: llama2) + --batch-size= Number of posts to process at once (default: 3) + --output= Output file (default: adds -ai-local suffix) + --help, -h Show this help message + +Examples: + node ai-analyzer-local.js --context="job layoffs" + node ai-analyzer-local.js --model=mistral --context="hiring opportunities" + node ai-analyzer-local.js --context="remote work" --confidence=0.8 + +Prerequisites: + 1. Install Ollama: https://ollama.ai/ + 2. Install a model: ollama pull llama2 + 3. Start Ollama: ollama serve + +Popular Models: + - llama2 (good general purpose) + - mistral (fast and accurate) + - codellama (good for technical content) + - llama2:13b (more accurate, slower) + +Environment Variables: + AI_CONTEXT Default context for analysis + AI_CONFIDENCE Default confidence threshold + AI_BATCH_SIZE Default batch size + OLLAMA_MODEL Default model (llama2, mistral, etc.) + OLLAMA_HOST Ollama host (default: http://localhost:11434) +`); + process.exit(0); +} + +// Run the analyzer +main(); diff --git a/keywords-layoff.csv b/keywords/keywords-layoff.csv similarity index 100% rename from keywords-layoff.csv rename to keywords/keywords-layoff.csv diff --git a/keywords-open-work.csv b/keywords/keywords-open-work.csv similarity index 100% rename from keywords-open-work.csv rename to keywords/keywords-open-work.csv diff --git a/linkedout.js b/linkedout.js index c2892d3..5b3773a 100644 --- a/linkedout.js +++ b/linkedout.js @@ -1,57 +1,132 @@ /** - * LinkedIn Posts Scraper (linkedout) + * LinkedIn Posts Scraper (LinkedOut) * - * This script logs into LinkedIn using credentials stored in a .env file, - * reads keywords from a CSV file (keywords.csv), and scrapes posts matching - * those keywords from LinkedIn's content search. + * A comprehensive tool for scraping LinkedIn posts based on keyword searches. + * Designed to track job market trends, layoffs, and open work opportunities + * by monitoring LinkedIn content automatically. * - * Usage: - * node linkedout.js [--headless=true|false] [--keyword=additional_keyword] + * FEATURES: + * - Automated LinkedIn login with browser automation + * - Keyword-based post searching from CSV files or CLI + * - Configurable search parameters (date, location, sorting) + * - Duplicate detection for posts and profiles + * - Text cleaning (removes hashtags, URLs, emojis) + * - Timestamped JSON output files + * - Command-line parameter overrides (see below) + * - Enhanced geographic location validation + * - Optional local AI-powered context analysis (Ollama) * - * Command-line Parameters: - * --headless: Override the headless mode (true or false). Defaults to value in .env (HEADLESS). - * --keyword: Append an additional keyword to the list of keywords from keywords.csv. + * USAGE: + * node linkedout.js [options] * - * Output: - * Saves results to a timestamped JSON file in the 'results' directory. + * COMMAND-LINE OPTIONS: + * --headless=true|false Override browser headless mode + * --keyword="kw1,kw2" Use only these keywords (comma-separated, overrides CSV) + * --add-keyword="kw1,kw2" Add extra keywords to CSV/CLI list + * --city="CityName" Override city + * --date_posted=VALUE Override date posted (past-24h, past-week, past-month, or empty) + * --sort_by=VALUE Override sort by (date_posted or relevance) + * --location_filter=VALUE Override location filter + * --output=FILE Output file name + * --no-location Disable location filtering + * --no-ai Disable AI analysis + * --ai-after Run local AI analysis after scraping + * --help, -h Show this help message * - * Requirements: - * - Node.js environment (or use the compiled executable) - * - Playwright installed (or included in the binary) - * - dotenv package for environment variables - * - csv-parser package for reading CSV files + * EXAMPLES: + * node linkedout.js # Standard scraping + * node linkedout.js --headless=false # Visual mode + * node linkedout.js --keyword="layoff,downsizing" # Only these keywords + * node linkedout.js --add-keyword="hiring freeze" # Add extra keyword(s) + * node linkedout.js --city="Vancouver" --date_posted=past-month + * node linkedout.js --output=results/myfile.json + * node linkedout.js --no-location --no-ai # Fastest, no filters + * node linkedout.js --ai-after # Run AI after scraping * - * Environment Variables (.env): - * LINKEDIN_USERNAME - Your LinkedIn username - * LINKEDIN_PASSWORD - Your LinkedIn password - * HEADLESS - Default headless mode (true or false) + * POST-PROCESSING AI ANALYSIS: + * node ai-analyzer-local.js --context="job layoffs" # Run on latest results file + * node ai-analyzer-local.js --input=results/results-2024-01-15.json --context="hiring" * - * Example: - * node linkedout.js --headless=true --keyword=layoff + * ENVIRONMENT VARIABLES (.env file): + * KEYWORDS=keywords-layoff.csv (filename only, always looks in keywords/ folder unless path is given) + * See README for full list. + * + * OUTPUT: + * - Saves to results/results-YYYY-MM-DD-HH-MM.json (or as specified by --output) + * - Enhanced format with optional location validation and local AI analysis + * + * KEYWORD FILES: + * - Place all keyword CSVs in the keywords/ folder + * - keywords-layoff.csv: 33+ layoff-related terms + * - keywords-open-work.csv: Terms for finding people open to work + * - Custom CSV format: header "keyword" with one keyword per line + * + * DEPENDENCIES: + * - playwright: Browser automation + * - dotenv: Environment variable management + * - csv-parser: CSV file parsing + * - Node.js built-ins: fs, path, child_process + * + * SECURITY & LEGAL: + * - Store credentials securely in .env file + * - Respect LinkedIn's Terms of Service + * - Use responsibly for educational/research purposes + * - Consider rate limiting and LinkedIn API for production use */ -process.env.PLAYWRIGHT_BROWSERS_PATH = "0"; +//process.env.PLAYWRIGHT_BROWSERS_PATH = "0"; +// Suppress D-Bus notification errors in WSL +process.env.NO_AT_BRIDGE = "1"; +process.env.DBUS_SESSION_BUS_ADDRESS = "/dev/null"; const { chromium } = require("playwright"); const fs = require("fs"); const path = require("path"); require("dotenv").config(); const csv = require("csv-parser"); +const { spawn } = require("child_process"); -const DATE_POSTED = process.env.DATE_POSTED || "past-week"; // "past-24h", "past-week", "past-month", or "" -const SORT_BY = process.env.SORT_BY || "date_posted"; // "relevance", "date_posted" -const WHEELS = process.env.WHEELS || 5; +// Core configuration +const DATE_POSTED = process.env.DATE_POSTED || "past-week"; +const SORT_BY = process.env.SORT_BY || "date_posted"; +const WHEELS = parseInt(process.env.WHEELS) || 5; const CITY = process.env.CITY || "Toronto"; -// Read credentials and headless mode from .env +// Location filtering configuration +const LOCATION_FILTER = process.env.LOCATION_FILTER || ""; +const ENABLE_LOCATION_CHECK = process.env.ENABLE_LOCATION_CHECK === "true"; + +// Local AI analysis configuration +const ENABLE_LOCAL_AI = process.env.ENABLE_LOCAL_AI === "true"; +const RUN_LOCAL_AI_AFTER_SCRAPING = + process.env.RUN_LOCAL_AI_AFTER_SCRAPING === "true"; +const AI_CONTEXT = + process.env.AI_CONTEXT || "job layoffs and workforce reduction"; + +// Import enhanced location utilities +const { + parseLocationFilters, + validateLocationAgainstFilters, + extractLocationFromProfile, +} = require("./location-utils"); + +// Read credentials const LINKEDIN_USERNAME = process.env.LINKEDIN_USERNAME; const LINKEDIN_PASSWORD = process.env.LINKEDIN_PASSWORD; - -// Default headless mode from .env let HEADLESS = process.env.HEADLESS === "true"; // Parse command-line arguments const args = process.argv.slice(2); -let additionalKeyword = null; +let cliKeywords = null; // If set, only use these +let additionalKeywords = []; +let disableLocation = false; +let disableAI = false; +let runAIAfter = RUN_LOCAL_AI_AFTER_SCRAPING; +let cliCity = null; +let cliDatePosted = null; +let cliSortBy = null; +let cliLocationFilter = null; +let cliOutput = null; +let showHelp = false; for (const arg of args) { if (arg.startsWith("--headless=")) { @@ -59,7 +134,99 @@ for (const arg of args) { HEADLESS = val === "true"; } if (arg.startsWith("--keyword=")) { - additionalKeyword = arg.split("=")[1]; + cliKeywords = arg + .split("=")[1] + .split(",") + .map((k) => k.trim()) + .filter(Boolean); + } + if (arg.startsWith("--add-keyword=")) { + additionalKeywords = additionalKeywords.concat( + arg + .split("=")[1] + .split(",") + .map((k) => k.trim()) + .filter(Boolean) + ); + } + if (arg === "--no-location") { + disableLocation = true; + } + if (arg === "--no-ai") { + disableAI = true; + } + if (arg === "--ai-after") { + runAIAfter = true; + } + if (arg.startsWith("--city=")) { + cliCity = arg.split("=")[1]; + } + if (arg.startsWith("--date_posted=")) { + cliDatePosted = arg.split("=")[1]; + } + if (arg.startsWith("--sort_by=")) { + cliSortBy = arg.split("=")[1]; + } + if (arg.startsWith("--location_filter=")) { + cliLocationFilter = arg.split("=")[1]; + } + if (arg.startsWith("--output=")) { + cliOutput = arg.split("=")[1]; + } + if (arg === "--help" || arg === "-h") { + showHelp = true; + } +} + +if (showHelp) { + console.log( + `\nLinkedOut - LinkedIn Posts Scraper\n\nUsage: node linkedout.js [options]\n\nOptions:\n --headless=true|false Override browser headless mode\n --keyword="kw1,kw2" Use only these keywords (comma-separated, overrides CSV)\n --add-keyword="kw1,kw2" Add extra keywords to CSV list\n --city="CityName" Override city\n --date_posted=VALUE Override date posted (past-24h, past-week, past-month or '')\n --sort_by=VALUE Override sort by (date_posted or relevance)\n --location_filter=VALUE Override location filter\n --output=FILE Output file name\n --no-location Disable location filtering\n --no-ai Disable AI analysis\n --ai-after Run local AI analysis after scraping\n --help, -h Show this help message\n\nExamples:\n node linkedout.js --keyword="layoff,downsizing"\n node linkedout.js --add-keyword="hiring freeze"\n node linkedout.js --city="Vancouver" --date_posted=past-month\n node linkedout.js --output=results/myfile.json\n` + ); + process.exit(0); +} + +// Use CLI overrides if provided +const EFFECTIVE_CITY = cliCity || CITY; +const EFFECTIVE_DATE_POSTED = cliDatePosted || DATE_POSTED; +const EFFECTIVE_SORT_BY = cliSortBy || SORT_BY; +const EFFECTIVE_LOCATION_FILTER = cliLocationFilter || LOCATION_FILTER; + +// Read keywords from CSV or CLI +const keywords = []; +let keywordEnv = process.env.KEYWORDS || "keywords-layoff.csv"; +let csvPath = path.join( + process.cwd(), + keywordEnv.includes("/") ? keywordEnv : `keywords/${keywordEnv}` +); + +function loadKeywordsAndStart() { + if (cliKeywords) { + // Only use CLI keywords + cliKeywords.forEach((k) => keywords.push(k)); + if (additionalKeywords.length > 0) { + additionalKeywords.forEach((k) => keywords.push(k)); + } + startScraper(); + } else { + // Load from CSV, then add any additional keywords + fs.createReadStream(csvPath) + .pipe(csv()) + .on("data", (row) => { + if (row.keyword) keywords.push(row.keyword.trim()); + }) + .on("end", () => { + if (keywords.length === 0) { + console.error("No keywords found in csv"); + process.exit(1); + } + if (additionalKeywords.length > 0) { + additionalKeywords.forEach((k) => keywords.push(k)); + console.log( + `Added additional keywords: ${additionalKeywords.join(", ")}` + ); + } + startScraper(); + }); } } @@ -84,9 +251,10 @@ function buildSearchUrl(keyword, city) { let url = `https://www.linkedin.com/search/results/content/?keywords=${encodeURIComponent( keyword + " " + city )}`; - if (DATE_POSTED) - url += `&datePosted=${encodeURIComponent(`"${DATE_POSTED}"`)}`; - if (SORT_BY) url += `&sortBy=${encodeURIComponent(`"${SORT_BY}"`)}`; + if (EFFECTIVE_DATE_POSTED) + url += `&datePosted=${encodeURIComponent(`"${EFFECTIVE_DATE_POSTED}"`)}`; + if (EFFECTIVE_SORT_BY) + url += `&sortBy=${encodeURIComponent(`"${EFFECTIVE_SORT_BY}"`)}`; url += `&origin=FACETED_SEARCH`; return url; } @@ -95,144 +263,386 @@ function containsAnyKeyword(text, keywords) { return keywords.some((k) => text.toLowerCase().includes(k.toLowerCase())); } -// Read keywords from CSV -const keywords = []; -const csvPath = path.join( - process.cwd(), - process.env.KEYWORDS || "keywords-layoff.csv" -); +/** + * Enhanced profile location validation with smart waiting (no timeouts) + * Uses a new tab to avoid disrupting the main scraping flow + */ +async function validateProfileLocation( + context, + profileLink, + locationFilterString +) { + if (!locationFilterString || !ENABLE_LOCATION_CHECK || disableLocation) { + return { + isValid: true, + location: "Not checked", + matchedFilter: null, + reasoning: "Location check disabled", + error: null, + }; + } -fs.createReadStream(csvPath) - .pipe(csv()) - .on("data", (row) => { - if (row.keyword) keywords.push(row.keyword.trim()); - }) - .on("end", async () => { - if (keywords.length === 0) { - console.error("No keywords found in csv"); - process.exit(1); - } + let profilePage = null; + try { + console.log(`🌍 Checking profile location: ${profileLink}`); - // Append additional keyword if provided - if (additionalKeyword) { - keywords.push(additionalKeyword); - console.log(`Added additional keyword from CLI: ${additionalKeyword}`); - } - - const browser = await chromium.launch({ - headless: HEADLESS, - args: ["--no-sandbox", "--disable-setuid-sandbox"], - }); - const context = await browser.newContext(); - const page = await Promise.race([ - context.newPage(), - new Promise((_, reject) => - setTimeout(() => reject(new Error("newPage timeout")), 10000) - ), - ]).catch((err) => { - console.error("Failed to create new page:", err); - process.exit(1); + // Create a new page/tab for profile validation + profilePage = await context.newPage(); + await profilePage.goto(profileLink, { + waitUntil: "domcontentloaded", + timeout: 10000, }); - try { - await page.goto("https://www.linkedin.com/login"); - await page.fill('input[name="session_key"]', LINKEDIN_USERNAME); - await page.fill('input[name="session_password"]', LINKEDIN_PASSWORD); - await page.click('button[type="submit"]'); - await page.waitForSelector("img.global-nav__me-photo", { - timeout: 10000, - }); + // Always use smart waiting for key profile elements + await Promise.race([ + profilePage.waitForSelector("h1", { timeout: 3000 }), + profilePage.waitForSelector("[data-field='experience_section']", { + timeout: 3000, + }), + profilePage.waitForSelector(".pv-text-details__left-panel", { + timeout: 3000, + }), + ]); - const seenPosts = new Set(); - const seenProfiles = new Set(); - const results = []; + // Use enhanced location extraction + const location = await extractLocationFromProfile(profilePage); - for (const keyword of keywords) { - const searchUrl = buildSearchUrl(keyword, CITY); - await page.goto(searchUrl, { waitUntil: "load" }); + if (!location) { + return { + isValid: false, + location: "Location not found", + matchedFilter: null, + reasoning: "Could not extract location from profile", + error: "Location extraction failed", + }; + } - try { - await page.waitForSelector(".feed-shared-update-v2", { - timeout: 3000, + // Parse location filters + const locationFilters = parseLocationFilters(locationFilterString); + + // Validate against filters + const validationResult = validateLocationAgainstFilters( + location, + locationFilters + ); + + return { + isValid: validationResult.isValid, + location, + matchedFilter: validationResult.matchedFilter, + reasoning: validationResult.reasoning, + error: validationResult.isValid ? null : validationResult.reasoning, + }; + } catch (error) { + console.error(`❌ Error checking profile location: ${error.message}`); + return { + isValid: false, + location: "Error checking location", + matchedFilter: null, + reasoning: `Error: ${error.message}`, + error: error.message, + }; + } finally { + // Always close the profile page to clean up + if (profilePage) { + try { + await profilePage.close(); + } catch (closeError) { + console.error(`⚠️ Error closing profile page: ${closeError.message}`); + } + } + } +} + +/** + * Run local AI analysis after scraping is complete + */ +async function runPostScrapingLocalAI(resultsFile) { + if (disableAI || !ENABLE_LOCAL_AI || !runAIAfter) { + return; + } + + console.log("\n🧠 Starting post-scraping local AI analysis..."); + + const analyzerScript = "ai-analyzer-local.js"; + const args = [`--input=${resultsFile}`, `--context=${AI_CONTEXT}`]; + + console.log(`🚀 Running: node ${analyzerScript} ${args.join(" ")}`); + + return new Promise((resolve, reject) => { + const child = spawn("node", [analyzerScript, ...args], { + stdio: "inherit", + cwd: process.cwd(), + }); + + child.on("close", (code) => { + if (code === 0) { + console.log("✅ Local AI analysis completed successfully"); + resolve(); + } else { + console.error(`❌ Local AI analysis failed with code ${code}`); + reject(new Error(`Local AI analysis process exited with code ${code}`)); + } + }); + + child.on("error", (error) => { + console.error(`❌ Failed to run local AI analysis: ${error.message}`); + reject(error); + }); + }); +} + +async function startScraper() { + console.log("\n🚀 LinkedOut Scraper Starting..."); + console.log(`📊 Keywords: ${keywords.length}`); + console.log( + `🌍 Location Filter: ${ + ENABLE_LOCATION_CHECK && !disableLocation + ? LOCATION_FILTER || "None" + : "Disabled" + }` + ); + console.log( + `🧠 Local AI Analysis: ${ + ENABLE_LOCAL_AI && !disableAI + ? runAIAfter + ? "After scraping" + : "Manual" + : "Disabled" + }` + ); + + const browser = await chromium.launch({ + headless: HEADLESS, + args: ["--no-sandbox", "--disable-setuid-sandbox"], + }); + const context = await browser.newContext(); + const page = await Promise.race([ + context.newPage(), + new Promise((_, reject) => + setTimeout(() => reject(new Error("newPage timeout")), 10000) + ), + ]).catch((err) => { + console.error("Failed to create new page:", err); + process.exit(1); + }); + + let scrapeError = null; + try { + await page.goto("https://www.linkedin.com/login"); + await page.fill('input[name="session_key"]', LINKEDIN_USERNAME); + await page.fill('input[name="session_password"]', LINKEDIN_PASSWORD); + await page.click('button[type="submit"]'); + await page.waitForSelector("img.global-nav__me-photo", { + timeout: 15000, + }); + + const seenPosts = new Set(); + const seenProfiles = new Set(); + const results = []; + const rejectedResults = []; + + for (const keyword of keywords) { + const searchUrl = buildSearchUrl(keyword, EFFECTIVE_CITY); + await page.goto(searchUrl, { waitUntil: "load" }); + + try { + await page.waitForSelector(".feed-shared-update-v2", { + timeout: 3000, + }); + } catch (error) { + console.log( + `---\nNo posts found for keyword: ${keyword}\nCity: ${EFFECTIVE_CITY}\nDate posted: ${EFFECTIVE_DATE_POSTED}\nSort by: ${EFFECTIVE_SORT_BY}` + ); + continue; + } + + for (let i = 0; i < WHEELS; i++) { + await page.mouse.wheel(0, 1000); + await page.waitForTimeout(1000); + } + + const postContainers = await page.$$(".feed-shared-update-v2"); + for (const container of postContainers) { + let text = ""; + const textHandle = await container.$( + "div.update-components-text, span.break-words" + ); + if (textHandle) { + text = (await textHandle.textContent()) || ""; + text = cleanText(text); + } + if ( + !text || + seenPosts.has(text) || + text.length < 30 || + !/[a-zA-Z0-9]/.test(text) + ) { + rejectedResults.push({ + rejected: true, + reason: !text + ? "No text" + : seenPosts.has(text) + ? "Duplicate post" + : text.length < 30 + ? "Text too short" + : "No alphanumeric content", + keyword, + text, + profileLink: null, + timestamp: new Date().toISOString(), + }); + continue; + } + seenPosts.add(text); + + let profileLink = ""; + const profileLinkElement = await container.$('a[href*="/in/"]'); + if (profileLinkElement) { + profileLink = await profileLinkElement.getAttribute("href"); + if (profileLink && !profileLink.startsWith("http")) { + profileLink = `https://www.linkedin.com${profileLink}`; + } + profileLink = profileLink.split("?")[0]; + } + + if (!profileLink || seenProfiles.has(profileLink)) { + rejectedResults.push({ + rejected: true, + reason: !profileLink ? "No profile link" : "Duplicate profile", + keyword, + text, + profileLink, + timestamp: new Date().toISOString(), + }); + continue; + } + seenProfiles.add(profileLink); + + // Double-check keyword presence + if (!containsAnyKeyword(text, keywords)) { + rejectedResults.push({ + rejected: true, + reason: "Keyword not present", + keyword, + text, + profileLink, + timestamp: new Date().toISOString(), + }); + continue; + } + + console.log("---"); + console.log("Keyword:", keyword); + console.log("Post:", text.substring(0, 100) + "..."); + console.log("Profile:", profileLink); + + // Enhanced location validation + const locationCheck = await validateProfileLocation( + context, + profileLink, + EFFECTIVE_LOCATION_FILTER + ); + console.log("📍 Location:", locationCheck.location); + console.log("🎯 Match:", locationCheck.reasoning); + + if (!locationCheck.isValid) { + rejectedResults.push({ + rejected: true, + reason: `Location filter failed: ${locationCheck.error}`, + keyword, + text, + profileLink, + location: locationCheck.location, + locationReasoning: locationCheck.reasoning, + timestamp: new Date().toISOString(), }); - } catch (error) { console.log( - `---\nNo posts found for keyword: ${keyword}\nDate posted: ${DATE_POSTED}\nSort by: ${SORT_BY}` + "❌ Skipping - Location filter failed:", + locationCheck.error ); continue; } - for (let i = 0; i < WHEELS; i++) { - await page.mouse.wheel(0, 1000); - await page.waitForTimeout(1000); - } + console.log("✅ Post passed all filters"); - const postContainers = await page.$$(".feed-shared-update-v2"); - for (const container of postContainers) { - let text = ""; - const textHandle = await container.$( - "div.update-components-text, span.break-words" - ); - if (textHandle) { - text = (await textHandle.textContent()) || ""; - text = cleanText(text); - } - if ( - !text || - seenPosts.has(text) || - text.length < 30 || - !/[a-zA-Z0-9]/.test(text) - ) - continue; - seenPosts.add(text); - - let profileLink = ""; - const profileLinkElement = await container.$('a[href*="/in/"]'); - if (profileLinkElement) { - profileLink = await profileLinkElement.getAttribute("href"); - if (profileLink && !profileLink.startsWith("http")) { - profileLink = `https://www.linkedin.com${profileLink}`; - } - profileLink = profileLink.split("?")[0]; - } - - if (!profileLink || seenProfiles.has(profileLink)) continue; - seenProfiles.add(profileLink); - - // Double-check keyword presence - if (!containsAnyKeyword(text, keywords)) continue; - - console.log("---"); - console.log("Keyword:", keyword); - console.log("Post:", text); - console.log("Profile:", profileLink); - - results.push({ - keyword, - text, - profileLink, - }); - } + results.push({ + keyword, + text, + profileLink, + location: locationCheck.location, + locationValid: locationCheck.isValid, + locationMatchedFilter: locationCheck.matchedFilter, + locationReasoning: locationCheck.reasoning, + timestamp: new Date().toLocaleString("en-CA", { + year: "numeric", + month: "2-digit", + day: "2-digit", + hour: "2-digit", + minute: "2-digit", + second: "2-digit", + hour12: false, + }), + aiProcessed: false, + }); } + } - const now = new Date(); - const timestamp = `${now.getFullYear()}-${String( - now.getMonth() + 1 - ).padStart(2, "0")}-${String(now.getDate()).padStart(2, "0")}-${String( + const now = new Date(); + const timestamp = + cliOutput || + `${now.getFullYear()}-${String(now.getMonth() + 1).padStart( + 2, + "0" + )}-${String(now.getDate()).padStart(2, "0")}-${String( now.getHours() ).padStart(2, "0")}-${String(now.getMinutes()).padStart(2, "0")}`; - const resultsDir = "results"; - const resultsFile = `${resultsDir}/results-${timestamp}.json`; + const resultsDir = "results"; + const resultsFile = `${resultsDir}/results-${timestamp}.json`; + const rejectedFile = `${resultsDir}/results-${timestamp}-rejected.json`; - if (!fs.existsSync(resultsDir)) { - fs.mkdirSync(resultsDir); - } - - fs.writeFileSync(resultsFile, JSON.stringify(results, null, 2), "utf-8"); - console.log(`Saved ${results.length} posts to ${resultsFile}`); - } catch (err) { - console.error("Error:", err); - } finally { - await browser.close(); + if (!fs.existsSync(resultsDir)) { + fs.mkdirSync(resultsDir); } - }); + + fs.writeFileSync(resultsFile, JSON.stringify(results, null, 2), "utf-8"); + fs.writeFileSync( + rejectedFile, + JSON.stringify(rejectedResults, null, 2), + "utf-8" + ); + console.log(`\n🎉 Scraping Complete!`); + console.log(`📊 Saved ${results.length} posts to ${resultsFile}`); + console.log( + `📋 Saved ${rejectedResults.length} rejected posts to ${rejectedFile}` + ); + + // Run local AI analysis if requested + if (runAIAfter && results.length > 0 && !scrapeError) { + try { + await runPostScrapingLocalAI(resultsFile); + } catch (error) { + console.error( + "⚠️ Local AI analysis failed, but scraping completed successfully" + ); + } + } + + console.log(`\n💡 Next steps:`); + console.log(` 📋 Review results in ${resultsFile}`); + if (!runAIAfter && !disableAI) { + console.log(` 🧠 Local AI Analysis:`); + console.log(` node ai-analyzer-local.js --context="${AI_CONTEXT}"`); + console.log( + ` node ai-analyzer-local.js --input=${resultsFile} --context="your context"` + ); + } + } catch (err) { + scrapeError = err; + console.error("Error:", err); + } finally { + await browser.close(); + } +} + +loadKeywordsAndStart(); diff --git a/location-utils.js b/location-utils.js new file mode 100644 index 0000000..b6107b0 --- /dev/null +++ b/location-utils.js @@ -0,0 +1,1126 @@ +/** + * Enhanced Location Filtering Utilities - Improved Version + * + * Place all keyword CSVs in the keywords/ folder for use with LinkedOut. + * + * These utilities provide: + * - Comprehensive city/province lookup for Canada + * - Fast O(1) city-to-province matching + * - Flexible location filter parsing and validation + * - Used by linkedout.js for profile location validation + * + * USAGE (for developers): + * const { parseLocationFilters, validateLocationAgainstFilters, extractLocationFromProfile } = require('./location-utils'); + * + * See linkedout.js for integration details. + */ +// Suppress D-Bus notification errors in WSL +process.env.NO_AT_BRIDGE = "1"; +process.env.DBUS_SESSION_BUS_ADDRESS = "/dev/null"; + +// Organized by province with comprehensive coverage +const CITIES_BY_PROVINCE = { + ontario: [ + // Greater Toronto Area + "toronto", + "mississauga", + "brampton", + "markham", + "vaughan", + "richmond hill", + "oakville", + "burlington", + "pickering", + "ajax", + "whitby", + "oshawa", + "milton", + "newmarket", + "aurora", + "georgina", + "king", + "whitchurch-stouffville", + "caledon", + "halton hills", + "clarington", + "scugog", + "uxbridge", + + // Southwestern Ontario + "london", + "windsor", + "kitchener", + "waterloo", + "cambridge", + "guelph", + "brantford", + "woodstock", + "stratford", + "sarnia", + "chatham", + "leamington", + "kingsville", + "amherstburg", + "tecumseh", + "lakeshore", + "essex", + "tilbury", + "st. thomas", + "ingersoll", + "tillsonburg", + "simcoe", + "delhi", + "port dover", + "welland", + "niagara falls", + "st. catharines", + "thorold", + "fort erie", + "grimsby", + "lincoln", + "pelham", + "wainfleet", + "west lincoln", + + // Central Ontario + "hamilton", + "barrie", + "orillia", + "midland", + "penetanguishene", + "collingwood", + "wasaga beach", + "blue mountains", + "clearview", + "springwater", + "innisfil", + "bradford west gwillimbury", + "essa", + "new tecumseth", + "adjala-tosorontio", + "mono", + "orangeville", + "shelburne", + "mulmur", + "amaranth", + "east garafraxa", + + // Eastern Ontario + "ottawa", + "gatineau", + "kingston", + "cornwall", + "pembroke", + "petawawa", + "deep river", + "arnprior", + "renfrew", + "carleton place", + "almonte", + "smiths falls", + "perth", + "brockville", + "prescott", + "iroquois", + "morrisburg", + "winchester", + "kemptville", + "merrickville-wolford", + "westport", + "gananoque", + "lansdowne", + "belleville", + "trenton", + "picton", + "napanee", + "deseronto", + "quinte west", + + // Northern Ontario + "sudbury", + "north bay", + "sault ste. marie", + "thunder bay", + "timmins", + "kirkland lake", + "cochrane", + "kapuskasing", + "hearst", + "iroquois falls", + "smooth rock falls", + "matheson", + "new liskeard", + "haileybury", + "cobalt", + "temiskaming shores", + "englehart", + "elliot lake", + "espanola", + "blind river", + "spanish", + "massey", + "thessalon", + "wawa", + "chapleau", + "white river", + "marathon", + "terrace bay", + "schreiber", + "nipigon", + "red rock", + "geraldton", + "longlac", + "beardmore", + "greenstone", + "ignace", + "dryden", + "kenora", + "fort frances", + "atikokan", + "rainy river", + "emo", + "sioux lookout", + "pickle lake", + "red lake", + + // Additional mid-size communities + "cobourg", + "port hope", + "peterborough", + "lindsay", + "fenelon falls", + "bobcaygeon", + "minden", + "haliburton", + "bancroft", + "barry's bay", + "huntsville", + "bracebridge", + "gravenhurst", + "parry sound", + "burk's falls", + "powassan", + "callander", + "sturgeon falls", + "west nipissing", + "french river", + "killarney", + "gore bay", + "little current", + "mindemoya", + "wikwemikong", + "m'chigeeng", + "aundeck omni kaning", + ], + + manitoba: [ + "winnipeg", + "brandon", + "steinbach", + "thompson", + "portage la prairie", + "winkler", + "selkirk", + "morden", + "dauphin", + "the pas", + "flin flon", + "swan river", + "neepawa", + "virden", + "souris", + "carman", + "stonewall", + "beausejour", + "gimli", + "arborg", + "teulon", + "ashern", + "eriksdale", + "fisher branch", + "riverton", + "winnipeg beach", + "dunnottar", + "altona", + "morris", + "emerson", + "killarney", + "boissevain", + "deloraine", + "melita", + "waskada", + "cartwright", + "crystal city", + "pilot mound", + "manitou", + "la riviere", + "glenboro", + "treherne", + "holland", + "hamiota", + "shoal lake", + "russell", + "roblin", + "grandview", + "minitonas", + "bowsman", + "birtle", + "rossburn", + "sandy lake", + ], + + "british columbia": [ + "vancouver", + "surrey", + "burnaby", + "richmond", + "abbotsford", + "coquitlam", + "langley", + "delta", + "north vancouver", + "west vancouver", + "new westminster", + "port coquitlam", + "maple ridge", + "white rock", + "pitt meadows", + "port moody", + "bowen island", + "anmore", + "belcarra", + "lions bay", + "victoria", + "saanich", + "esquimalt", + "oak bay", + "view royal", + "sidney", + "central saanich", + "north saanich", + "highlands", + "metchosin", + "sooke", + "colwood", + "langford", + "duncan", + "nanaimo", + "parksville", + "qualicum beach", + "courtenay", + "comox", + "campbell river", + "port alberni", + "tofino", + "ucluelet", + "kelowna", + "vernon", + "penticton", + "kamloops", + "salmon arm", + "revelstoke", + "golden", + "invermere", + "cranbrook", + "fernie", + "kimberley", + "nelson", + "castlegar", + "trail", + "rossland", + "grand forks", + "osoyoos", + "oliver", + "summerland", + "peachland", + "westbank", + "prince george", + "quesnel", + "williams lake", + "100 mile house", + "clinton", + "cache creek", + "ashcroft", + "merritt", + "princeton", + "hope", + "chilliwack", + "mission", + "harrison hot springs", + "agassiz", + "kent", + "fraser valley", + "squamish", + "whistler", + "pemberton", + "lillooet", + "lytton", + "prince rupert", + "terrace", + "kitimat", + "smithers", + "burns lake", + "vanderhoof", + "fort st. john", + "dawson creek", + "tumbler ridge", + "chetwynd", + "hudson's hope", + "fort nelson", + "fort st. james", + ], + + alberta: [ + "calgary", + "edmonton", + "red deer", + "lethbridge", + "medicine hat", + "grande prairie", + "airdrie", + "spruce grove", + "leduc", + "lloydminster", + "camrose", + "wetaskiwin", + "lacombe", + "ponoka", + "sylvan lake", + "blackfalds", + "innisfail", + "olds", + "didsbury", + "carstairs", + "cochrane", + "canmore", + "banff", + "okotoks", + "high river", + "strathmore", + "chestermere", + "drumheller", + "three hills", + "hanna", + "oyen", + "consort", + "provost", + "wainwright", + "vermilion", + "lloydminster", + "bonnyville", + "cold lake", + "st. paul", + "two hills", + "vegreville", + "mundare", + "lamont", + "bruderheim", + "morinville", + "legal", + "bon accord", + "gibbons", + "redwater", + "smoky lake", + "willingdon", + "andrew", + "chipman", + "fort saskatchewan", + "sherwood park", + "beaumont", + "devon", + "calmar", + "thorsby", + "warburg", + "breton", + "winfield", + "drayton valley", + "rocky mountain house", + "sundre", + "caroline", + "rimbey", + "bentley", + "blackfalds", + "penhold", + "bowden", + "eckville", + "rocky mountain house", + "sundre", + "olds", + "fort mcmurray", + "slave lake", + "high prairie", + "valleyview", + "fox creek", + "whitecourt", + "mayerthorpe", + "barrhead", + "westlock", + "athabasca", + "boyle", + "newbrook", + "wandering river", + "peace river", + "grimshaw", + "manning", + "fairview", + "high level", + "rainbow lake", + "zama city", + ], + + quebec: [ + "montreal", + "quebec city", + "laval", + "gatineau", + "longueuil", + "sherbrooke", + "saguenay", + "levis", + "trois-rivieres", + "terrebonne", + "saint-jean-sur-richelieu", + "repentigny", + "brossard", + "drummondville", + "saint-jerome", + "granby", + "blainville", + "saint-hyacinthe", + "shawinigan", + "dollard-des-ormeaux", + "rimouski", + "sorel-tracy", + "victoriaville", + "saint-eustache", + "vaudreuil-dorion", + "val-d'or", + "salaberry-de-valleyfield", + "sept-iles", + "rouyn-noranda", + "thetford mines", + "alma", + "joliette", + "saint-georges", + "baie-comeau", + "mascouche", + "beloeil", + "chateauguay", + "saint-constant", + "sainte-catherine", + "saint-bruno-de-montarville", + "boucherville", + "saint-lambert", + "candiac", + "la prairie", + "saint-basile-le-grand", + "carignan", + "chambly", + "saint-mathieu-de-beloeil", + ], + + saskatchewan: [ + "saskatoon", + "regina", + "prince albert", + "moose jaw", + "swift current", + "yorkton", + "north battleford", + "estevan", + "weyburn", + "lloydminster", + "martensville", + "warman", + "humboldt", + "kindersley", + "melville", + "tisdale", + "nipawin", + "melfort", + "unity", + "biggar", + "rosetown", + "outlook", + "davidson", + "watrous", + "lanigan", + "wynyard", + "foam lake", + "canora", + "preeceville", + "kamsack", + "roblin", + "hudson bay", + "carrot river", + "white fox", + "spiritwood", + "maidstone", + "lashburn", + "cut knife", + "wilkie", + "macklin", + "luseland", + "kerrobert", + "kindersley", + "eston", + "elrose", + "alsask", + "leader", + "maple creek", + "shaunavon", + "gull lake", + "cabri", + "kyle", + "rosetown", + "kindersley", + ], + + "nova scotia": [ + "halifax", + "dartmouth", + "sydney", + "truro", + "new glasgow", + "glace bay", + "yarmouth", + "bridgewater", + "kentville", + "amherst", + "new waterford", + "sydney mines", + "antigonish", + "stellarton", + "westville", + "pictou", + "digby", + "windsor", + "wolfville", + "middleton", + "annapolis royal", + "liverpool", + "shelburne", + "lockeport", + "lunenburg", + "mahone bay", + "chester", + "hubbards", + "tantallon", + "fall river", + "beaver bank", + "sackville", + "bedford", + "cole harbour", + "eastern passage", + "porters lake", + "musquodoboit harbour", + "sheet harbour", + "stewiacke", + "shubenacadie", + "elmsdale", + "enfield", + "lantz", + "milford", + "gay's river", + "mount uniacke", + "nine mile river", + ], + + "new brunswick": [ + "saint john", + "moncton", + "fredericton", + "dieppe", + "riverview", + "miramichi", + "edmundston", + "campbellton", + "bathurst", + "sackville", + "sussex", + "hampton", + "quispamsis", + "rothesay", + "grand bay-westfield", + "st. stephen", + "st. andrews", + "blacks harbour", + "grand manan", + "deer island", + "campobello island", + "woodstock", + "hartland", + "florenceville-bristol", + "perth-andover", + "grand falls", + "plaster rock", + "tobique first nation", + "nackawic", + "mcadam", + "harvey", + "chipman", + "minto", + "gagetown", + "oromocto", + "new maryland", + "hanwell", + "kingsclear", + "stanley", + "doaktown", + "blackville", + "renous", + "boiestown", + "caraquet", + "shippagan", + "tracadie", + "neguac", + "rogersville", + "rexton", + "richibucto", + "bouctouche", + "shediac", + "cap-pele", + "beaubassin-est", + ], + + "newfoundland and labrador": [ + "st. johns", + "mount pearl", + "corner brook", + "conception bay south", + "paradise", + "grand falls-windsor", + "happy valley-goose bay", + "gander", + "carbonear", + "stephenville", + "bay roberts", + "clarenville", + "marystown", + "deer lake", + "channel-port aux basques", + "labrador city", + "wabana", + "holyrood", + "portugal cove-st. philips", + "torbay", + "pouch cove", + "flatrock", + "logy bay-middle cove-outer cove", + "petty harbour-maddox cove", + "bauline", + "witless bay", + "ferryland", + "aquaforte", + "renews-cappahayden", + "trepassey", + "branch", + "placentia", + "come by chance", + "sunnyside", + "whitbourne", + "chapel arm", + "bluewater", + "norman's cove-long cove", + "heart's content", + "heart's delight-islington", + "cavendish", + "new melbourne", + "whiteway", + "trinity", + "bonavista", + ], + + "prince edward island": [ + "charlottetown", + "summerside", + "stratford", + "cornwall", + "montague", + "souris", + "kensington", + "alberton", + "tignish", + "o'leary", + "wellington", + "borden-carleton", + "murray river", + "georgetown", + "crapaud", + "breadalbane", + "hunter river", + "new london", + "cavendish", + "stanley bridge", + "rustico", + "brackley", + "winsloe", + "york", + "tea hill", + "miltonvale park", + "sherwood", + "warren grove", + "clyde river", + "bonshaw", + "vernon bridge", + "orwell", + "wood islands", + "belle river", + "murray harbour", + "little sands", + "gladstone", + "annandale", + "montague", + "brudenell", + "cardigan", + "launching", + "pooles corner", + "morell", + "st. peters", + "red point", + "lakeville", + "souris west", + ], + + "northwest territories": [ + "yellowknife", + "hay river", + "inuvik", + "fort simpson", + "fort smith", + "norman wells", + "iqaluit", + "rankin inlet", + "arviat", + "baker lake", + "cambridge bay", + "gjoa haven", + "kugluktuk", + "taloyoak", + "fort mcpherson", + "aklavik", + "tuktoyaktuk", + "paulatuk", + "sachs harbour", + "ulukhaktok", + "tsiigehtchic", + "fort good hope", + "colville lake", + "tulita", + "deline", + "wrigley", + "nahanni butte", + "jean marie river", + "kakisa", + "enterprise", + "fort resolution", + "lutselk'e", + "gameti", + "wekweeti", + "whati", + "behchoko", + ], + + yukon: [ + "whitehorse", + "dawson city", + "watson lake", + "haines junction", + "carmacks", + "mayo", + "faro", + "ross river", + "teslin", + "carcross", + "tagish", + "marsh lake", + "ibex valley", + "mount lorne", + "granger", + "takhini", + "fish lake", + "mendenhall", + "pelly crossing", + "stewart crossing", + "beaver creek", + "destruction bay", + "burwash landing", + "kluane lake", + "silver city", + "champagne", + "old crow", + "eagle plains", + "fort mcpherson", + ], + + nunavut: [ + "iqaluit", + "rankin inlet", + "arviat", + "baker lake", + "cambridge bay", + "gjoa haven", + "kugluktuk", + "taloyoak", + "kugaaruk", + "igloolik", + "hall beach", + "pond inlet", + "arctic bay", + "clyde river", + "pangnirtung", + "cape dorset", + "kimmirut", + "sanikiluaq", + "whale cove", + "chesterfield inlet", + "coral harbour", + "naujaat", + "igloolik", + "sanirajak", + "grise fiord", + "resolute", + "alert", + "eureka", + ], +}; + +// Create reverse lookup for faster searching +const CITY_TO_PROVINCE = {}; +for (const [province, cities] of Object.entries(CITIES_BY_PROVINCE)) { + for (const city of cities) { + CITY_TO_PROVINCE[city.toLowerCase()] = province.toLowerCase(); + } +} + +// Province name variations and abbreviations (unchanged) +const PROVINCE_VARIATIONS = { + ontario: ["ontario", "ont", "on"], + manitoba: ["manitoba", "man", "mb"], + "british columbia": ["british columbia", "bc", "b.c."], + alberta: ["alberta", "alta", "ab"], + quebec: ["quebec", "que", "qc", "québec"], + saskatchewan: ["saskatchewan", "sask", "sk"], + "nova scotia": ["nova scotia", "ns", "n.s."], + "new brunswick": ["new brunswick", "nb", "n.b."], + "newfoundland and labrador": [ + "newfoundland and labrador", + "nl", + "n.l.", + "newfoundland", + "nfld", + ], + "prince edward island": ["prince edward island", "pei", "p.e.i."], + "northwest territories": ["northwest territories", "nt", "n.w.t.", "nwt"], + yukon: ["yukon", "yt", "y.t."], + nunavut: ["nunavut", "nu", "nvt"], +}; + +/** + * Parse location filters from environment variable + * Supports multiple formats: + * - Single: "Ontario" + * - Multiple: "Ontario,Manitoba" or "Ontario|Manitoba" + * - Mixed: "Toronto,Ontario,Vancouver" + */ +function parseLocationFilters(locationFilterString) { + if (!locationFilterString) return []; + + // Split by comma or pipe + const filters = locationFilterString + .split(/[,|]/) + .map((f) => f.trim().toLowerCase()); + return filters.filter((f) => f.length > 0); +} + +/** + * Enhanced location validation with comprehensive city coverage + * @param {string} userLocation - User's location from LinkedIn profile + * @param {string[]} locationFilters - Array of location filters + * @returns {Object} - {isValid: boolean, matchedFilter: string, reasoning: string} + */ +function validateLocationAgainstFilters(userLocation, locationFilters) { + if (!userLocation || locationFilters.length === 0) { + return { + isValid: true, + matchedFilter: null, + reasoning: "No filtering applied", + }; + } + + const normalizedLocation = userLocation.toLowerCase(); + + // Check each filter + for (const filter of locationFilters) { + const normalizedFilter = filter.toLowerCase(); + + // 1. Direct string match with word boundaries + const filterRegex = new RegExp( + `\\b${normalizedFilter.replace(/[-/\\^$*+?.()|[\]{}]/g, "\\$&")}\\b`, + "i" + ); + if (filterRegex.test(normalizedLocation)) { + return { + isValid: true, + matchedFilter: filter, + reasoning: `Direct match: "${normalizedFilter}" found in "${userLocation}"`, + }; + } + + // 2. Check if filter is a province - look for cities in that province + const provinceVariations = PROVINCE_VARIATIONS[normalizedFilter] || []; + for (const variation of provinceVariations) { + const variationRegex = new RegExp( + `\\b${variation.replace(/[-/\\^$*+?.()|[\]{}]/g, "\\$&")}\\b`, + "i" + ); + if (variationRegex.test(normalizedLocation)) { + return { + isValid: true, + matchedFilter: filter, + reasoning: `Province match: "${variation}" found in "${userLocation}"`, + }; + } + } + + // 3. Check if any city in the location maps to the filtered province + for (const [city, province] of Object.entries(CITY_TO_PROVINCE)) { + // Use word boundary regex to match city as a whole word + const cityRegex = new RegExp( + `\\b${city.replace(/[-/\\^$*+?.()|[\]{}]/g, "\\$&")}\\b`, + "i" + ); + if (cityRegex.test(normalizedLocation) && province === normalizedFilter) { + return { + isValid: true, + matchedFilter: filter, + reasoning: `City-to-province match: "${city}" maps to "${province}"`, + }; + } + } + + // 4. Check if filter is a city and maps to a province mentioned in location + const mappedProvince = CITY_TO_PROVINCE[normalizedFilter]; + if (mappedProvince) { + const provinceVariations = PROVINCE_VARIATIONS[mappedProvince] || []; + for (const variation of provinceVariations) { + const variationRegex = new RegExp( + `\\b${variation.replace(/[-/\\^$*+?.()|[\]{}]/g, "\\$&")}\\b`, + "i" + ); + if (variationRegex.test(normalizedLocation)) { + return { + isValid: true, + matchedFilter: filter, + reasoning: `Reverse city match: "${filter}" is in "${mappedProvince}" which matches location`, + }; + } + } + } + + // 5. Partial city name matching (for areas like "Greater Toronto Area") + const words = normalizedLocation.split(/[\s,.-]+/); + for (const word of words) { + if (word.length > 3) { + // Avoid matching short words + // Use word boundary regex to match word as a whole city name + const mappedProvince = CITY_TO_PROVINCE[word]; + if (mappedProvince === normalizedFilter) { + return { + isValid: true, + matchedFilter: filter, + reasoning: `Partial city match: "${word}" from "${userLocation}" maps to "${normalizedFilter}"`, + }; + } + } + } + } + + return { + isValid: false, + matchedFilter: null, + reasoning: `Location "${userLocation}" does not match any of: ${locationFilters.join( + ", " + )}`, + }; +} + +/** + * Extract location from LinkedIn profile with improved selectors + * @param {Object} page - Playwright page object + * @returns {Promise} - Extracted location or empty string + */ +async function extractLocationFromProfile(page) { + // Enhanced selectors for location information + const locationSelectors = [ + // Primary location selectors + ".text-body-small.inline.t-black--light.break-words", + ".pv-text-details__left-panel .text-body-small", + ".pb2.pv-text-details__left-panel", + ".text-body-small.inline", + '[data-field="location_details"]', + + // Additional selectors for different LinkedIn layouts + ".pv-text-details__left-panel-item", + ".pv-entity__location", + ".pv-top-card__location", + ".pv-top-card--list-bullet .pv-top-card--list-bullet-item", + ".artdeco-entity-lockup__subtitle", + + // Mobile/responsive selectors + ".profile-topcard__location", + ".profile-topcard__location-data", + ]; + + for (const selector of locationSelectors) { + try { + const elements = await page.$$(selector); + + for (const element of elements) { + const text = await element.textContent(); + if (text && text.trim()) { + const cleanText = text.trim(); + + // Accept locations with OR without commas + // Common patterns: "Toronto, ON", "Toronto", "Toronto, Ontario, Canada" + if ( + cleanText.length > 2 && + (cleanText.includes(",") || /^[a-zA-Z\s.-]+$/.test(cleanText)) && + !cleanText.toLowerCase().includes("connection") && + !cleanText.toLowerCase().includes("follower") && + !cleanText.toLowerCase().includes("experience") && + cleanText.length < 100 + ) { + return cleanText; + } + } + } + } catch (e) { + // Continue to next selector + } + } + + return ""; +} + +/** + * Get statistics about city coverage + */ +function getCoverageStats() { + const stats = {}; + for (const [province, cities] of Object.entries(CITIES_BY_PROVINCE)) { + stats[province] = cities.length; + } + stats.total = Object.keys(CITY_TO_PROVINCE).length; + return stats; +} + +module.exports = { + parseLocationFilters, + validateLocationAgainstFilters, + extractLocationFromProfile, + CITY_TO_PROVINCE, + CITIES_BY_PROVINCE, + PROVINCE_VARIATIONS, + getCoverageStats, +}; diff --git a/package-lock.json b/package-lock.json index e1c40a5..3f229ea 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,7 +11,7 @@ "dependencies": { "csv-parser": "^3.2.0", "dotenv": "^17.0.0", - "playwright": "^1.53.1" + "playwright": "^1.53.2" } }, "node_modules/csv-parser": { @@ -53,12 +53,12 @@ } }, "node_modules/playwright": { - "version": "1.53.1", - "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.53.1.tgz", - "integrity": "sha512-LJ13YLr/ocweuwxyGf1XNFWIU4M2zUSo149Qbp+A4cpwDjsxRPj7k6H25LBrEHiEwxvRbD8HdwvQmRMSvquhYw==", + "version": "1.53.2", + "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.53.2.tgz", + "integrity": "sha512-6K/qQxVFuVQhRQhFsVZ9fGeatxirtrpPgxzBYWyZLEXJzqYwuL4fuNmfOfD5et1tJE4GScKyPNeLhZeRwuTU3A==", "license": "Apache-2.0", "dependencies": { - "playwright-core": "1.53.1" + "playwright-core": "1.53.2" }, "bin": { "playwright": "cli.js" @@ -71,9 +71,9 @@ } }, "node_modules/playwright-core": { - "version": "1.53.1", - "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.53.1.tgz", - "integrity": "sha512-Z46Oq7tLAyT0lGoFx4DOuB1IA9D1TPj0QkYxpPVUnGDqHHvDpCftu1J2hM2PiWsNMoZh8+LQaarAWcDfPBc6zg==", + "version": "1.53.2", + "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.53.2.tgz", + "integrity": "sha512-ox/OytMy+2w1jcYEYlOo1Hhp8hZkLCximMTUTMBXjGUA1KoFfiSZ+DU+3a739jsPY0yoKH2TFy9S2fsJas8yAw==", "license": "Apache-2.0", "bin": { "playwright-core": "cli.js" diff --git a/package.json b/package.json index 58efb3d..758d179 100644 --- a/package.json +++ b/package.json @@ -13,6 +13,6 @@ "dependencies": { "csv-parser": "^3.2.0", "dotenv": "^17.0.0", - "playwright": "^1.53.1" + "playwright": "^1.53.2" } } diff --git a/test.js b/test/test.js similarity index 95% rename from test.js rename to test/test.js index 47d145e..86351f9 100644 --- a/test.js +++ b/test/test.js @@ -1,19 +1,19 @@ -console.log("START!"); - -const { chromium } = require("playwright"); -(async () => { - console.log("browser!"); - - const browser = await chromium.launch({ - headless: true, - args: ["--no-sandbox", "--disable-setuid-sandbox"], - }); - console.log("new page!"); - - const page = await browser.newPage(); - console.log("GOTO!"); - - await page.goto("https://example.com"); - console.log("Success!"); - await browser.close(); -})(); +console.log("START!"); + +const { chromium } = require("playwright"); +(async () => { + console.log("browser!"); + + const browser = await chromium.launch({ + headless: true, + args: ["--no-sandbox", "--disable-setuid-sandbox"], + }); + console.log("new page!"); + + const page = await browser.newPage(); + console.log("GOTO!"); + + await page.goto("https://example.com"); + console.log("Success!"); + await browser.close(); +})();