From a04e9fb374d8de748f1a532e9e6faa252e1d61ce Mon Sep 17 00:00:00 2001 From: ilia Date: Thu, 3 Jul 2025 21:43:45 -0400 Subject: [PATCH] update --- README.md | 494 +++++++++---------- ai-analyzer-local.js | 1080 +++++++++++++++++++++--------------------- test/test.js | 38 +- 3 files changed, 806 insertions(+), 806 deletions(-) diff --git a/README.md b/README.md index 59a5651..5b29249 100644 --- a/README.md +++ b/README.md @@ -1,247 +1,247 @@ -# LinkedOut - LinkedIn Posts Scraper - -A Node.js application that automates LinkedIn login and scrapes posts containing specific keywords. The tool is designed to help track job market trends, layoffs, and open work opportunities by monitoring LinkedIn content. - -## Features - -- **Automated LinkedIn Login**: Uses Playwright to automate browser interactions -- **Keyword-based Search**: Searches for posts containing keywords from CSV files or CLI -- **Flexible Keyword Sources**: Supports multiple CSV files in `keywords/` or CLI-only mode -- **Configurable Search Parameters**: Customizable date ranges, sorting options, city, and scroll behavior -- **Duplicate Detection**: Prevents duplicate posts and profiles in results -- **Clean Text Processing**: Removes hashtags, emojis, and URLs from post content -- **Timestamped Results**: Saves results to JSON files with timestamps -- **Command-line Overrides**: Support for runtime parameter adjustments -- **Enhanced Geographic Location Validation**: Validates user locations against 200+ Canadian cities with smart matching -- **Local AI Analysis (Ollama)**: Free, private, and fast post-processing with local LLMs -- **Flexible Processing**: Disable features, run AI analysis immediately, or process results later - -## Prerequisites - -- Node.js (v14 or higher) -- Valid LinkedIn account credentials -- [Ollama](https://ollama.ai/) with a model (free, private, local AI) - -## Installation - -1. Clone the repository or download the files -2. Install dependencies: - - ```bash - npm install - ``` - -3. Copy the configuration template and customize: - - ```bash - cp env-config.example .env - ``` - -4. Edit `.env` with your settings (see Configuration section below) - -## Configuration - -### Environment Variables (.env file) - -Create a `.env` file from `env-config.example`: - -```env -# LinkedIn Credentials (Required) -LINKEDIN_USERNAME=your_email@example.com -LINKEDIN_PASSWORD=your_password - -# Basic Settings -HEADLESS=true -KEYWORDS=keywords-layoff.csv # Just the filename; always looks in keywords/ unless path is given -DATE_POSTED=past-week -SORT_BY=date_posted -CITY=Toronto -WHEELS=5 - -# Enhanced Location Filtering -LOCATION_FILTER=Ontario,Manitoba -ENABLE_LOCATION_CHECK=true - -# Local AI Analysis (Ollama) -ENABLE_LOCAL_AI=true -OLLAMA_MODEL=mistral -OLLAMA_HOST=http://localhost:11434 -RUN_LOCAL_AI_AFTER_SCRAPING=false # true = run after scraping, false = run manually -AI_CONTEXT=job layoffs and workforce reduction -AI_CONFIDENCE=0.7 -AI_BATCH_SIZE=3 -``` - -### Configuration Options - -#### Required - -- `LINKEDIN_USERNAME`: Your LinkedIn email/username -- `LINKEDIN_PASSWORD`: Your LinkedIn password - -#### Basic Settings - -- `HEADLESS`: Browser headless mode (`true`/`false`, default: `true`) -- `KEYWORDS`: CSV file name (default: `keywords-layoff.csv` in `keywords/` folder) -- `DATE_POSTED`: Filter by date (`past-24h`, `past-week`, `past-month`, or empty) -- `SORT_BY`: Sort results (`relevance` or `date_posted`) -- `CITY`: Search location (default: `Toronto`) -- `WHEELS`: Number of scrolls to load posts (default: `5`) - -#### Enhanced Location Filtering - -- `LOCATION_FILTER`: Geographic filter - supports multiple provinces/cities: - - Single: `Ontario` or `Toronto` - - Multiple: `Ontario,Manitoba` or `Toronto,Vancouver` -- `ENABLE_LOCATION_CHECK`: Enable location validation (`true`/`false`) - -#### Local AI Analysis (Ollama) - -- `ENABLE_LOCAL_AI=true`: Enable local AI analysis -- `OLLAMA_MODEL`: Model to use (`mistral`, `llama2`, `codellama`) -- `OLLAMA_HOST`: Ollama server URL (default: `http://localhost:11434`) -- `RUN_LOCAL_AI_AFTER_SCRAPING`: Run AI immediately after scraping (`true`/`false`) -- `AI_CONTEXT`: Context for analysis (e.g., `job layoffs`) -- `AI_CONFIDENCE`: Minimum confidence threshold (0.0-1.0, default: 0.7) -- `AI_BATCH_SIZE`: Posts per batch (default: 3) - -## Usage - -### Basic Commands - -```bash -# Standard scraping with configured settings -node linkedout.js - -# Visual mode (see browser) -node linkedout.js --headless=false - -# Use only these keywords (ignore CSV) -node linkedout.js --keyword="layoff,downsizing" - -# Add extra keywords to CSV/CLI list -node linkedout.js --add-keyword="hiring freeze,open to work" - -# Override city and date -node linkedout.js --city="Vancouver" --date_posted=past-month - -# Custom output file -node linkedout.js --output=results/myfile.json - -# Skip location and AI filtering (fastest) -node linkedout.js --no-location --no-ai - -# Run AI analysis immediately after scraping -node linkedout.js --ai-after - -# Show help -node linkedout.js --help -``` - -### All Command-line Options - -- `--headless=true|false`: Override browser headless mode -- `--keyword="kw1,kw2"`: Use only these keywords (comma-separated, overrides CSV) -- `--add-keyword="kw1,kw2"`: Add extra keywords to CSV/CLI list -- `--city="CityName"`: Override city -- `--date_posted=VALUE`: Override date posted (past-24h, past-week, past-month, or empty) -- `--sort_by=VALUE`: Override sort by (date_posted or relevance) -- `--location_filter=VALUE`: Override location filter -- `--output=FILE`: Output file name -- `--no-location`: Disable location filtering -- `--no-ai`: Disable AI analysis -- `--ai-after`: Run local AI analysis after scraping -- `--help, -h`: Show help message - -### Keyword Files - -- Place all keyword CSVs in the `keywords/` folder -- Example: `keywords/keywords-layoff.csv`, `keywords/keywords-open-work.csv` -- Custom CSV format: header `keyword` with one keyword per line - -### Local AI Analysis Commands - -After scraping, you can run AI analysis on the results: - -```bash -# Analyze latest results -node ai-analyzer-local.js --context="job layoffs" - -# Analyze specific file -node ai-analyzer-local.js --input=results/results-2024-01-15.json --context="hiring" - -# Use different model -node ai-analyzer-local.js --model=llama2 --context="remote work" - -# Change confidence and batch size -node ai-analyzer-local.js --context="job layoffs" --confidence=0.8 --batch-size=5 -``` - -## Workflow Examples - -### 1. Quick Start (All Features) - -```bash -node linkedout.js --ai-after -``` - -### 2. Fast Scraping Only - -```bash -node linkedout.js --no-location --no-ai -``` - -### 3. Location-Only Filtering - -```bash -node linkedout.js --no-ai -``` - -### 4. Test Different AI Contexts - -```bash -node linkedout.js --no-ai -node ai-analyzer-local.js --context="job layoffs" -node ai-analyzer-local.js --context="hiring opportunities" -node ai-analyzer-local.js --context="remote work" -``` - -## Project Structure - -``` -linkedout/ -├── .env # Your configuration (create from template) -├── env-config.example # Configuration template -├── linkedout.js # Main scraper -├── ai-analyzer-local.js # Free local AI analyzer (Ollama) -├── location-utils.js # Enhanced location utilities -├── package.json # Dependencies -├── keywords/ # All keyword CSVs go here -│ ├── keywords-layoff.csv -│ └── keywords-open-work.csv -├── results/ # Output directory -└── README.md # This documentation -``` - -## Legal & Security - -- **Credentials**: Store securely in `.env`, add to `.gitignore` -- **LinkedIn ToS**: Respect rate limits and usage guidelines -- **Privacy**: Local AI keeps all data on your machine -- **Usage**: Educational and research purposes only - -## Dependencies - -- `playwright`: Browser automation -- `dotenv`: Environment variables -- `csv-parser`: CSV file reading -- Built-in: `fs`, `path`, `child_process` - -## Support - -For issues: - -1. Check this README -2. Verify `.env` configuration -3. Test with `--headless=false` for debugging -4. Check Ollama status: `ollama list` +# LinkedOut - LinkedIn Posts Scraper + +A Node.js application that automates LinkedIn login and scrapes posts containing specific keywords. The tool is designed to help track job market trends, layoffs, and open work opportunities by monitoring LinkedIn content. + +## Features + +- **Automated LinkedIn Login**: Uses Playwright to automate browser interactions +- **Keyword-based Search**: Searches for posts containing keywords from CSV files or CLI +- **Flexible Keyword Sources**: Supports multiple CSV files in `keywords/` or CLI-only mode +- **Configurable Search Parameters**: Customizable date ranges, sorting options, city, and scroll behavior +- **Duplicate Detection**: Prevents duplicate posts and profiles in results +- **Clean Text Processing**: Removes hashtags, emojis, and URLs from post content +- **Timestamped Results**: Saves results to JSON files with timestamps +- **Command-line Overrides**: Support for runtime parameter adjustments +- **Enhanced Geographic Location Validation**: Validates user locations against 200+ Canadian cities with smart matching +- **Local AI Analysis (Ollama)**: Free, private, and fast post-processing with local LLMs +- **Flexible Processing**: Disable features, run AI analysis immediately, or process results later + +## Prerequisites + +- Node.js (v14 or higher) +- Valid LinkedIn account credentials +- [Ollama](https://ollama.ai/) with a model (free, private, local AI) + +## Installation + +1. Clone the repository or download the files +2. Install dependencies: + + ```bash + npm install + ``` + +3. Copy the configuration template and customize: + + ```bash + cp env-config.example .env + ``` + +4. Edit `.env` with your settings (see Configuration section below) + +## Configuration + +### Environment Variables (.env file) + +Create a `.env` file from `env-config.example`: + +```env +# LinkedIn Credentials (Required) +LINKEDIN_USERNAME=your_email@example.com +LINKEDIN_PASSWORD=your_password + +# Basic Settings +HEADLESS=true +KEYWORDS=keywords-layoff.csv # Just the filename; always looks in keywords/ unless path is given +DATE_POSTED=past-week +SORT_BY=date_posted +CITY=Toronto +WHEELS=5 + +# Enhanced Location Filtering +LOCATION_FILTER=Ontario,Manitoba +ENABLE_LOCATION_CHECK=true + +# Local AI Analysis (Ollama) +ENABLE_LOCAL_AI=true +OLLAMA_MODEL=mistral +OLLAMA_HOST=http://localhost:11434 +RUN_LOCAL_AI_AFTER_SCRAPING=false # true = run after scraping, false = run manually +AI_CONTEXT=job layoffs and workforce reduction +AI_CONFIDENCE=0.7 +AI_BATCH_SIZE=3 +``` + +### Configuration Options + +#### Required + +- `LINKEDIN_USERNAME`: Your LinkedIn email/username +- `LINKEDIN_PASSWORD`: Your LinkedIn password + +#### Basic Settings + +- `HEADLESS`: Browser headless mode (`true`/`false`, default: `true`) +- `KEYWORDS`: CSV file name (default: `keywords-layoff.csv` in `keywords/` folder) +- `DATE_POSTED`: Filter by date (`past-24h`, `past-week`, `past-month`, or empty) +- `SORT_BY`: Sort results (`relevance` or `date_posted`) +- `CITY`: Search location (default: `Toronto`) +- `WHEELS`: Number of scrolls to load posts (default: `5`) + +#### Enhanced Location Filtering + +- `LOCATION_FILTER`: Geographic filter - supports multiple provinces/cities: + - Single: `Ontario` or `Toronto` + - Multiple: `Ontario,Manitoba` or `Toronto,Vancouver` +- `ENABLE_LOCATION_CHECK`: Enable location validation (`true`/`false`) + +#### Local AI Analysis (Ollama) + +- `ENABLE_LOCAL_AI=true`: Enable local AI analysis +- `OLLAMA_MODEL`: Model to use (`mistral`, `llama2`, `codellama`) +- `OLLAMA_HOST`: Ollama server URL (default: `http://localhost:11434`) +- `RUN_LOCAL_AI_AFTER_SCRAPING`: Run AI immediately after scraping (`true`/`false`) +- `AI_CONTEXT`: Context for analysis (e.g., `job layoffs`) +- `AI_CONFIDENCE`: Minimum confidence threshold (0.0-1.0, default: 0.7) +- `AI_BATCH_SIZE`: Posts per batch (default: 3) + +## Usage + +### Basic Commands + +```bash +# Standard scraping with configured settings +node linkedout.js + +# Visual mode (see browser) +node linkedout.js --headless=false + +# Use only these keywords (ignore CSV) +node linkedout.js --keyword="layoff,downsizing" + +# Add extra keywords to CSV/CLI list +node linkedout.js --add-keyword="hiring freeze,open to work" + +# Override city and date +node linkedout.js --city="Vancouver" --date_posted=past-month + +# Custom output file +node linkedout.js --output=results/myfile.json + +# Skip location and AI filtering (fastest) +node linkedout.js --no-location --no-ai + +# Run AI analysis immediately after scraping +node linkedout.js --ai-after + +# Show help +node linkedout.js --help +``` + +### All Command-line Options + +- `--headless=true|false`: Override browser headless mode +- `--keyword="kw1,kw2"`: Use only these keywords (comma-separated, overrides CSV) +- `--add-keyword="kw1,kw2"`: Add extra keywords to CSV/CLI list +- `--city="CityName"`: Override city +- `--date_posted=VALUE`: Override date posted (past-24h, past-week, past-month, or empty) +- `--sort_by=VALUE`: Override sort by (date_posted or relevance) +- `--location_filter=VALUE`: Override location filter +- `--output=FILE`: Output file name +- `--no-location`: Disable location filtering +- `--no-ai`: Disable AI analysis +- `--ai-after`: Run local AI analysis after scraping +- `--help, -h`: Show help message + +### Keyword Files + +- Place all keyword CSVs in the `keywords/` folder +- Example: `keywords/keywords-layoff.csv`, `keywords/keywords-open-work.csv` +- Custom CSV format: header `keyword` with one keyword per line + +### Local AI Analysis Commands + +After scraping, you can run AI analysis on the results: + +```bash +# Analyze latest results +node ai-analyzer-local.js --context="job layoffs" + +# Analyze specific file +node ai-analyzer-local.js --input=results/results-2024-01-15.json --context="hiring" + +# Use different model +node ai-analyzer-local.js --model=llama2 --context="remote work" + +# Change confidence and batch size +node ai-analyzer-local.js --context="job layoffs" --confidence=0.8 --batch-size=5 +``` + +## Workflow Examples + +### 1. Quick Start (All Features) + +```bash +node linkedout.js --ai-after +``` + +### 2. Fast Scraping Only + +```bash +node linkedout.js --no-location --no-ai +``` + +### 3. Location-Only Filtering + +```bash +node linkedout.js --no-ai +``` + +### 4. Test Different AI Contexts + +```bash +node linkedout.js --no-ai +node ai-analyzer-local.js --context="job layoffs" +node ai-analyzer-local.js --context="hiring opportunities" +node ai-analyzer-local.js --context="remote work" +``` + +## Project Structure + +``` +linkedout/ +├── .env # Your configuration (create from template) +├── env-config.example # Configuration template +├── linkedout.js # Main scraper +├── ai-analyzer-local.js # Free local AI analyzer (Ollama) +├── location-utils.js # Enhanced location utilities +├── package.json # Dependencies +├── keywords/ # All keyword CSVs go here +│ ├── keywords-layoff.csv +│ └── keywords-open-work.csv +├── results/ # Output directory +└── README.md # This documentation +``` + +## Legal & Security + +- **Credentials**: Store securely in `.env`, add to `.gitignore` +- **LinkedIn ToS**: Respect rate limits and usage guidelines +- **Privacy**: Local AI keeps all data on your machine +- **Usage**: Educational and research purposes only + +## Dependencies + +- `playwright`: Browser automation +- `dotenv`: Environment variables +- `csv-parser`: CSV file reading +- Built-in: `fs`, `path`, `child_process` + +## Support + +For issues: + +1. Check this README +2. Verify `.env` configuration +3. Test with `--headless=false` for debugging +4. Check Ollama status: `ollama list` diff --git a/ai-analyzer-local.js b/ai-analyzer-local.js index fea3a16..35576c5 100644 --- a/ai-analyzer-local.js +++ b/ai-analyzer-local.js @@ -1,540 +1,540 @@ -#!/usr/bin/env node - -/** - * Local AI Post-Processing Analyzer for LinkedOut - * - * Uses Ollama for completely FREE local AI analysis. - * - * FEATURES: - * - Analyze LinkedOut results for context relevance (layoffs, hiring, etc.) - * - Works on latest or specified results file - * - Batch processing for speed - * - Configurable context, model, confidence, batch size - * - CLI and .env configuration - * - 100% local, private, and free - * - * USAGE: - * node ai-analyzer-local.js [options] - * - * COMMAND-LINE OPTIONS: - * --input= Input JSON file (default: latest in results/) - * --context= AI context to analyze against (required) - * --confidence= Minimum confidence threshold (0.0-1.0, default: 0.7) - * --model= Ollama model to use (default: llama2) - * --batch-size= Number of posts to process at once (default: 3) - * --output= Output file (default: adds -ai-local suffix) - * --help, -h Show this help message - * - * EXAMPLES: - * node ai-analyzer-local.js --context="job layoffs" - * node ai-analyzer-local.js --input=results/results-2024-01-15.json --context="hiring" - * node ai-analyzer-local.js --model=mistral --context="remote work" - * node ai-analyzer-local.js --context="job layoffs" --confidence=0.8 --batch-size=5 - * - * ENVIRONMENT VARIABLES (.env file): - * AI_CONTEXT, AI_CONFIDENCE, AI_BATCH_SIZE, OLLAMA_MODEL, OLLAMA_HOST - * See README for full list. - * - * OUTPUT: - * - Saves to results/ with -ai-local suffix unless --output is specified - * - * DEPENDENCIES: - * - Ollama (https://ollama.ai/) - * - Node.js built-ins: fs, path, fetch - * - * SECURITY & LEGAL: - * - All analysis is local, no data leaves your machine - * - Use responsibly for educational/research purposes - */ - -require("dotenv").config(); -const fs = require("fs"); -const path = require("path"); - -// Configuration from environment and command line -const DEFAULT_CONTEXT = - process.env.AI_CONTEXT || "job layoffs and workforce reduction"; -const DEFAULT_CONFIDENCE = parseFloat(process.env.AI_CONFIDENCE || "0.7"); -const DEFAULT_BATCH_SIZE = parseInt(process.env.AI_BATCH_SIZE || "3"); -const DEFAULT_MODEL = process.env.OLLAMA_MODEL || "llama2"; -const OLLAMA_HOST = process.env.OLLAMA_HOST || "http://localhost:11434"; - -// Parse command line arguments -const args = process.argv.slice(2); -let inputFile = null; -let context = DEFAULT_CONTEXT; -let confidenceThreshold = DEFAULT_CONFIDENCE; -let batchSize = DEFAULT_BATCH_SIZE; -let model = DEFAULT_MODEL; -let outputFile = null; - -for (const arg of args) { - if (arg.startsWith("--input=")) { - inputFile = arg.split("=")[1]; - } else if (arg.startsWith("--context=")) { - context = arg.split("=")[1]; - } else if (arg.startsWith("--confidence=")) { - confidenceThreshold = parseFloat(arg.split("=")[1]); - } else if (arg.startsWith("--batch-size=")) { - batchSize = parseInt(arg.split("=")[1]); - } else if (arg.startsWith("--model=")) { - model = arg.split("=")[1]; - } else if (arg.startsWith("--output=")) { - outputFile = arg.split("=")[1]; - } -} - -if (!context) { - console.error("❌ Error: No AI context specified"); - console.error('Use --context="your context" or set AI_CONTEXT in .env'); - process.exit(1); -} - -/** - * Check if Ollama is running and the model is available - */ -async function checkOllamaStatus() { - try { - // Check if Ollama is running - const response = await fetch(`${OLLAMA_HOST}/api/tags`); - if (!response.ok) { - throw new Error(`Ollama not running on ${OLLAMA_HOST}`); - } - - const data = await response.json(); - const availableModels = data.models.map((m) => m.name); - - console.log(`🤖 Ollama is running`); - console.log( - `📦 Available models: ${availableModels - .map((m) => m.split(":")[0]) - .join(", ")}` - ); - - // Check if requested model is available - const modelExists = availableModels.some((m) => m.startsWith(model)); - if (!modelExists) { - console.error(`❌ Model "${model}" not found`); - console.error(`💡 Install it with: ollama pull ${model}`); - console.error( - `💡 Or choose from: ${availableModels - .map((m) => m.split(":")[0]) - .join(", ")}` - ); - process.exit(1); - } - - console.log(`✅ Using model: ${model}`); - return true; - } catch (error) { - console.error("❌ Error connecting to Ollama:", error.message); - console.error("💡 Make sure Ollama is installed and running:"); - console.error(" 1. Install: https://ollama.ai/"); - console.error(" 2. Start: ollama serve"); - console.error(` 3. Install model: ollama pull ${model}`); - process.exit(1); - } -} - -/** - * Find the most recent results file if none specified - */ -function findLatestResultsFile() { - const resultsDir = "results"; - if (!fs.existsSync(resultsDir)) { - throw new Error("Results directory not found. Run the scraper first."); - } - - const files = fs - .readdirSync(resultsDir) - .filter( - (f) => - f.startsWith("results-") && f.endsWith(".json") && !f.includes("-ai-") - ) - .sort() - .reverse(); - - if (files.length === 0) { - throw new Error("No results files found. Run the scraper first."); - } - - return path.join(resultsDir, files[0]); -} - -/** - * Analyze multiple posts using local Ollama - */ -async function analyzeBatch(posts, context, model) { - console.log(`🤖 Analyzing batch of ${posts.length} posts with ${model}...`); - - try { - const prompt = `You are an expert at analyzing LinkedIn posts for relevance to specific contexts. - -CONTEXT TO MATCH: "${context}" - -Analyze these ${ - posts.length - } LinkedIn posts and determine if each relates to the context above. - -POSTS: -${posts - .map( - (post, i) => ` -POST ${i + 1}: -"${post.text.substring(0, 400)}${post.text.length > 400 ? "..." : ""}" -` - ) - .join("")} - -For each post, provide: -- Is it relevant to "${context}"? (YES/NO) -- Confidence level (0.0 to 1.0) -- Brief reasoning - -Respond in this EXACT format for each post: -POST 1: YES/NO | 0.X | brief reason -POST 2: YES/NO | 0.X | brief reason -POST 3: YES/NO | 0.X | brief reason - -Examples: -- For layoff context: "laid off 50 employees" = YES | 0.9 | mentions layoffs -- For hiring context: "we're hiring developers" = YES | 0.8 | job posting -- Unrelated content = NO | 0.1 | not relevant to context`; - - const response = await fetch(`${OLLAMA_HOST}/api/generate`, { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - model: model, - prompt: prompt, - stream: false, - options: { - temperature: 0.3, - top_p: 0.9, - }, - }), - }); - - if (!response.ok) { - throw new Error( - `Ollama API error: ${response.status} ${response.statusText}` - ); - } - - const data = await response.json(); - const aiResponse = data.response.trim(); - - // Parse the response - const analyses = []; - const lines = aiResponse.split("\n").filter((line) => line.trim()); - - for (let i = 0; i < posts.length; i++) { - let analysis = { - postIndex: i + 1, - isRelevant: false, - confidence: 0.5, - reasoning: "Could not parse AI response", - }; - - // Look for lines that match "POST X:" pattern - const postPattern = new RegExp(`POST\\s*${i + 1}:?\\s*(.+)`, "i"); - - for (const line of lines) { - const match = line.match(postPattern); - if (match) { - const content = match[1].trim(); - - // Parse: YES/NO | 0.X | reasoning - const parts = content.split("|").map((p) => p.trim()); - - if (parts.length >= 3) { - analysis.isRelevant = parts[0].toUpperCase().includes("YES"); - analysis.confidence = Math.max( - 0, - Math.min(1, parseFloat(parts[1]) || 0.5) - ); - analysis.reasoning = parts[2] || "No reasoning provided"; - } else { - // Fallback parsing - analysis.isRelevant = - content.toUpperCase().includes("YES") || - content.toLowerCase().includes("relevant"); - analysis.confidence = 0.6; - analysis.reasoning = content.substring(0, 100); - } - break; - } - } - - analyses.push(analysis); - } - - // If we didn't get enough analyses, fill in defaults - while (analyses.length < posts.length) { - analyses.push({ - postIndex: analyses.length + 1, - isRelevant: false, - confidence: 0.3, - reasoning: "AI response parsing failed", - }); - } - - return analyses; - } catch (error) { - console.error(`❌ Error in batch AI analysis: ${error.message}`); - - // Fallback: mark all as relevant with low confidence - return posts.map((_, i) => ({ - postIndex: i + 1, - isRelevant: true, - confidence: 0.3, - reasoning: `Analysis failed: ${error.message}`, - })); - } -} - -/** - * Analyze a single post using local Ollama (fallback) - */ -async function analyzeSinglePost(text, context, model) { - const prompt = `Analyze this LinkedIn post for relevance to: "${context}" - -Post: "${text}" - -Is this post relevant to "${context}"? Provide: -1. YES or NO -2. Confidence (0.0 to 1.0) -3. Brief reason - -Format: YES/NO | 0.X | reason`; - - try { - const response = await fetch(`${OLLAMA_HOST}/api/generate`, { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - model: model, - prompt: prompt, - stream: false, - options: { - temperature: 0.3, - }, - }), - }); - - if (!response.ok) { - throw new Error(`Ollama API error: ${response.status}`); - } - - const data = await response.json(); - const aiResponse = data.response.trim(); - - // Parse response - const parts = aiResponse.split("|").map((p) => p.trim()); - - if (parts.length >= 3) { - return { - isRelevant: parts[0].toUpperCase().includes("YES"), - confidence: Math.max(0, Math.min(1, parseFloat(parts[1]) || 0.5)), - reasoning: parts[2], - }; - } else { - // Fallback parsing - return { - isRelevant: - aiResponse.toLowerCase().includes("yes") || - aiResponse.toLowerCase().includes("relevant"), - confidence: 0.6, - reasoning: aiResponse.substring(0, 100), - }; - } - } catch (error) { - return { - isRelevant: true, // Default to include on error - confidence: 0.3, - reasoning: `Analysis failed: ${error.message}`, - }; - } -} - -/** - * Main processing function - */ -async function main() { - try { - console.log("🚀 LinkedOut Local AI Analyzer Starting..."); - console.log(`📊 Context: "${context}"`); - console.log(`🎯 Confidence Threshold: ${confidenceThreshold}`); - console.log(`📦 Batch Size: ${batchSize}`); - console.log(`🤖 Model: ${model}`); - - // Check Ollama status - await checkOllamaStatus(); - - // Determine input file - if (!inputFile) { - inputFile = findLatestResultsFile(); - console.log(`📂 Using latest results file: ${inputFile}`); - } else { - console.log(`📂 Using specified file: ${inputFile}`); - } - - // Load results - if (!fs.existsSync(inputFile)) { - throw new Error(`Input file not found: ${inputFile}`); - } - - const rawData = fs.readFileSync(inputFile, "utf-8"); - const results = JSON.parse(rawData); - - if (!Array.isArray(results) || results.length === 0) { - throw new Error("No posts found in input file"); - } - - console.log(`📋 Loaded ${results.length} posts for analysis`); - - // Process in batches - const processedResults = []; - let totalRelevant = 0; - let totalProcessed = 0; - - for (let i = 0; i < results.length; i += batchSize) { - const batch = results.slice(i, i + batchSize); - console.log( - `\n📦 Processing batch ${Math.floor(i / batchSize) + 1}/${Math.ceil( - results.length / batchSize - )} (${batch.length} posts)` - ); - - const analyses = await analyzeBatch(batch, context, model); - - // Apply analyses to posts - for (let j = 0; j < batch.length; j++) { - const post = batch[j]; - const analysis = analyses[j]; - - const enhancedPost = { - ...post, - aiRelevant: analysis.isRelevant, - aiConfidence: analysis.confidence, - aiReasoning: analysis.reasoning, - aiModel: model, - aiAnalyzedAt: new Date().toLocaleString("en-CA", { - year: "numeric", - month: "2-digit", - day: "2-digit", - hour: "2-digit", - minute: "2-digit", - second: "2-digit", - hour12: false, - }), - aiType: "local-ollama", - aiProcessed: true, - }; - - // Apply confidence threshold - if (analysis.confidence >= confidenceThreshold) { - if (analysis.isRelevant) { - processedResults.push(enhancedPost); - totalRelevant++; - } - } else { - // Include low-confidence posts but flag them - enhancedPost.lowConfidence = true; - processedResults.push(enhancedPost); - } - - totalProcessed++; - console.log( - ` ${ - analysis.isRelevant ? "✅" : "❌" - } Post ${totalProcessed}: ${analysis.confidence.toFixed( - 2 - )} confidence - ${analysis.reasoning.substring(0, 100)}...` - ); - } - - // Small delay between batches to be nice to the system - if (i + batchSize < results.length) { - console.log("⏳ Brief pause..."); - await new Promise((resolve) => setTimeout(resolve, 500)); - } - } - - // Determine output file - if (!outputFile) { - const inputBasename = path.basename(inputFile, ".json"); - const inputDir = path.dirname(inputFile); - outputFile = path.join(inputDir, `${inputBasename}-ai-local.json`); - } - - // Save results - fs.writeFileSync( - outputFile, - JSON.stringify(processedResults, null, 2), - "utf-8" - ); - - console.log("\n🎉 Local AI Analysis Complete!"); - console.log(`📊 Results:`); - console.log(` Total posts processed: ${totalProcessed}`); - console.log(` Relevant posts found: ${totalRelevant}`); - console.log(` Final results saved: ${processedResults.length}`); - console.log(`📁 Output saved to: ${outputFile}`); - console.log(`💰 Cost: $0.00 (completely free!)`); - } catch (error) { - console.error("❌ Error:", error.message); - process.exit(1); - } -} - -// Show help if requested -if (args.includes("--help") || args.includes("-h")) { - console.log(` -LinkedOut Local AI Analyzer (Ollama) - -🚀 FREE local AI analysis - No API costs, complete privacy! - -Usage: node ai-analyzer-local.js [options] - -Options: - --input= Input JSON file (default: latest in results/) - --context= AI context to analyze against (required) - --confidence= Minimum confidence threshold (0.0-1.0, default: 0.7) - --model= Ollama model to use (default: llama2) - --batch-size= Number of posts to process at once (default: 3) - --output= Output file (default: adds -ai-local suffix) - --help, -h Show this help message - -Examples: - node ai-analyzer-local.js --context="job layoffs" - node ai-analyzer-local.js --model=mistral --context="hiring opportunities" - node ai-analyzer-local.js --context="remote work" --confidence=0.8 - -Prerequisites: - 1. Install Ollama: https://ollama.ai/ - 2. Install a model: ollama pull llama2 - 3. Start Ollama: ollama serve - -Popular Models: - - llama2 (good general purpose) - - mistral (fast and accurate) - - codellama (good for technical content) - - llama2:13b (more accurate, slower) - -Environment Variables: - AI_CONTEXT Default context for analysis - AI_CONFIDENCE Default confidence threshold - AI_BATCH_SIZE Default batch size - OLLAMA_MODEL Default model (llama2, mistral, etc.) - OLLAMA_HOST Ollama host (default: http://localhost:11434) -`); - process.exit(0); -} - -// Run the analyzer -main(); +#!/usr/bin/env node + +/** + * Local AI Post-Processing Analyzer for LinkedOut + * + * Uses Ollama for completely FREE local AI analysis. + * + * FEATURES: + * - Analyze LinkedOut results for context relevance (layoffs, hiring, etc.) + * - Works on latest or specified results file + * - Batch processing for speed + * - Configurable context, model, confidence, batch size + * - CLI and .env configuration + * - 100% local, private, and free + * + * USAGE: + * node ai-analyzer-local.js [options] + * + * COMMAND-LINE OPTIONS: + * --input= Input JSON file (default: latest in results/) + * --context= AI context to analyze against (required) + * --confidence= Minimum confidence threshold (0.0-1.0, default: 0.7) + * --model= Ollama model to use (default: llama2) + * --batch-size= Number of posts to process at once (default: 3) + * --output= Output file (default: adds -ai-local suffix) + * --help, -h Show this help message + * + * EXAMPLES: + * node ai-analyzer-local.js --context="job layoffs" + * node ai-analyzer-local.js --input=results/results-2024-01-15.json --context="hiring" + * node ai-analyzer-local.js --model=mistral --context="remote work" + * node ai-analyzer-local.js --context="job layoffs" --confidence=0.8 --batch-size=5 + * + * ENVIRONMENT VARIABLES (.env file): + * AI_CONTEXT, AI_CONFIDENCE, AI_BATCH_SIZE, OLLAMA_MODEL, OLLAMA_HOST + * See README for full list. + * + * OUTPUT: + * - Saves to results/ with -ai-local suffix unless --output is specified + * + * DEPENDENCIES: + * - Ollama (https://ollama.ai/) + * - Node.js built-ins: fs, path, fetch + * + * SECURITY & LEGAL: + * - All analysis is local, no data leaves your machine + * - Use responsibly for educational/research purposes + */ + +require("dotenv").config(); +const fs = require("fs"); +const path = require("path"); + +// Configuration from environment and command line +const DEFAULT_CONTEXT = + process.env.AI_CONTEXT || "job layoffs and workforce reduction"; +const DEFAULT_CONFIDENCE = parseFloat(process.env.AI_CONFIDENCE || "0.7"); +const DEFAULT_BATCH_SIZE = parseInt(process.env.AI_BATCH_SIZE || "3"); +const DEFAULT_MODEL = process.env.OLLAMA_MODEL || "llama2"; +const OLLAMA_HOST = process.env.OLLAMA_HOST || "http://localhost:11434"; + +// Parse command line arguments +const args = process.argv.slice(2); +let inputFile = null; +let context = DEFAULT_CONTEXT; +let confidenceThreshold = DEFAULT_CONFIDENCE; +let batchSize = DEFAULT_BATCH_SIZE; +let model = DEFAULT_MODEL; +let outputFile = null; + +for (const arg of args) { + if (arg.startsWith("--input=")) { + inputFile = arg.split("=")[1]; + } else if (arg.startsWith("--context=")) { + context = arg.split("=")[1]; + } else if (arg.startsWith("--confidence=")) { + confidenceThreshold = parseFloat(arg.split("=")[1]); + } else if (arg.startsWith("--batch-size=")) { + batchSize = parseInt(arg.split("=")[1]); + } else if (arg.startsWith("--model=")) { + model = arg.split("=")[1]; + } else if (arg.startsWith("--output=")) { + outputFile = arg.split("=")[1]; + } +} + +if (!context) { + console.error("❌ Error: No AI context specified"); + console.error('Use --context="your context" or set AI_CONTEXT in .env'); + process.exit(1); +} + +/** + * Check if Ollama is running and the model is available + */ +async function checkOllamaStatus() { + try { + // Check if Ollama is running + const response = await fetch(`${OLLAMA_HOST}/api/tags`); + if (!response.ok) { + throw new Error(`Ollama not running on ${OLLAMA_HOST}`); + } + + const data = await response.json(); + const availableModels = data.models.map((m) => m.name); + + console.log(`🤖 Ollama is running`); + console.log( + `📦 Available models: ${availableModels + .map((m) => m.split(":")[0]) + .join(", ")}` + ); + + // Check if requested model is available + const modelExists = availableModels.some((m) => m.startsWith(model)); + if (!modelExists) { + console.error(`❌ Model "${model}" not found`); + console.error(`💡 Install it with: ollama pull ${model}`); + console.error( + `💡 Or choose from: ${availableModels + .map((m) => m.split(":")[0]) + .join(", ")}` + ); + process.exit(1); + } + + console.log(`✅ Using model: ${model}`); + return true; + } catch (error) { + console.error("❌ Error connecting to Ollama:", error.message); + console.error("💡 Make sure Ollama is installed and running:"); + console.error(" 1. Install: https://ollama.ai/"); + console.error(" 2. Start: ollama serve"); + console.error(` 3. Install model: ollama pull ${model}`); + process.exit(1); + } +} + +/** + * Find the most recent results file if none specified + */ +function findLatestResultsFile() { + const resultsDir = "results"; + if (!fs.existsSync(resultsDir)) { + throw new Error("Results directory not found. Run the scraper first."); + } + + const files = fs + .readdirSync(resultsDir) + .filter( + (f) => + f.startsWith("results-") && f.endsWith(".json") && !f.includes("-ai-") + ) + .sort() + .reverse(); + + if (files.length === 0) { + throw new Error("No results files found. Run the scraper first."); + } + + return path.join(resultsDir, files[0]); +} + +/** + * Analyze multiple posts using local Ollama + */ +async function analyzeBatch(posts, context, model) { + console.log(`🤖 Analyzing batch of ${posts.length} posts with ${model}...`); + + try { + const prompt = `You are an expert at analyzing LinkedIn posts for relevance to specific contexts. + +CONTEXT TO MATCH: "${context}" + +Analyze these ${ + posts.length + } LinkedIn posts and determine if each relates to the context above. + +POSTS: +${posts + .map( + (post, i) => ` +POST ${i + 1}: +"${post.text.substring(0, 400)}${post.text.length > 400 ? "..." : ""}" +` + ) + .join("")} + +For each post, provide: +- Is it relevant to "${context}"? (YES/NO) +- Confidence level (0.0 to 1.0) +- Brief reasoning + +Respond in this EXACT format for each post: +POST 1: YES/NO | 0.X | brief reason +POST 2: YES/NO | 0.X | brief reason +POST 3: YES/NO | 0.X | brief reason + +Examples: +- For layoff context: "laid off 50 employees" = YES | 0.9 | mentions layoffs +- For hiring context: "we're hiring developers" = YES | 0.8 | job posting +- Unrelated content = NO | 0.1 | not relevant to context`; + + const response = await fetch(`${OLLAMA_HOST}/api/generate`, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: model, + prompt: prompt, + stream: false, + options: { + temperature: 0.3, + top_p: 0.9, + }, + }), + }); + + if (!response.ok) { + throw new Error( + `Ollama API error: ${response.status} ${response.statusText}` + ); + } + + const data = await response.json(); + const aiResponse = data.response.trim(); + + // Parse the response + const analyses = []; + const lines = aiResponse.split("\n").filter((line) => line.trim()); + + for (let i = 0; i < posts.length; i++) { + let analysis = { + postIndex: i + 1, + isRelevant: false, + confidence: 0.5, + reasoning: "Could not parse AI response", + }; + + // Look for lines that match "POST X:" pattern + const postPattern = new RegExp(`POST\\s*${i + 1}:?\\s*(.+)`, "i"); + + for (const line of lines) { + const match = line.match(postPattern); + if (match) { + const content = match[1].trim(); + + // Parse: YES/NO | 0.X | reasoning + const parts = content.split("|").map((p) => p.trim()); + + if (parts.length >= 3) { + analysis.isRelevant = parts[0].toUpperCase().includes("YES"); + analysis.confidence = Math.max( + 0, + Math.min(1, parseFloat(parts[1]) || 0.5) + ); + analysis.reasoning = parts[2] || "No reasoning provided"; + } else { + // Fallback parsing + analysis.isRelevant = + content.toUpperCase().includes("YES") || + content.toLowerCase().includes("relevant"); + analysis.confidence = 0.6; + analysis.reasoning = content.substring(0, 100); + } + break; + } + } + + analyses.push(analysis); + } + + // If we didn't get enough analyses, fill in defaults + while (analyses.length < posts.length) { + analyses.push({ + postIndex: analyses.length + 1, + isRelevant: false, + confidence: 0.3, + reasoning: "AI response parsing failed", + }); + } + + return analyses; + } catch (error) { + console.error(`❌ Error in batch AI analysis: ${error.message}`); + + // Fallback: mark all as relevant with low confidence + return posts.map((_, i) => ({ + postIndex: i + 1, + isRelevant: true, + confidence: 0.3, + reasoning: `Analysis failed: ${error.message}`, + })); + } +} + +/** + * Analyze a single post using local Ollama (fallback) + */ +async function analyzeSinglePost(text, context, model) { + const prompt = `Analyze this LinkedIn post for relevance to: "${context}" + +Post: "${text}" + +Is this post relevant to "${context}"? Provide: +1. YES or NO +2. Confidence (0.0 to 1.0) +3. Brief reason + +Format: YES/NO | 0.X | reason`; + + try { + const response = await fetch(`${OLLAMA_HOST}/api/generate`, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: model, + prompt: prompt, + stream: false, + options: { + temperature: 0.3, + }, + }), + }); + + if (!response.ok) { + throw new Error(`Ollama API error: ${response.status}`); + } + + const data = await response.json(); + const aiResponse = data.response.trim(); + + // Parse response + const parts = aiResponse.split("|").map((p) => p.trim()); + + if (parts.length >= 3) { + return { + isRelevant: parts[0].toUpperCase().includes("YES"), + confidence: Math.max(0, Math.min(1, parseFloat(parts[1]) || 0.5)), + reasoning: parts[2], + }; + } else { + // Fallback parsing + return { + isRelevant: + aiResponse.toLowerCase().includes("yes") || + aiResponse.toLowerCase().includes("relevant"), + confidence: 0.6, + reasoning: aiResponse.substring(0, 100), + }; + } + } catch (error) { + return { + isRelevant: true, // Default to include on error + confidence: 0.3, + reasoning: `Analysis failed: ${error.message}`, + }; + } +} + +/** + * Main processing function + */ +async function main() { + try { + console.log("🚀 LinkedOut Local AI Analyzer Starting..."); + console.log(`📊 Context: "${context}"`); + console.log(`🎯 Confidence Threshold: ${confidenceThreshold}`); + console.log(`📦 Batch Size: ${batchSize}`); + console.log(`🤖 Model: ${model}`); + + // Check Ollama status + await checkOllamaStatus(); + + // Determine input file + if (!inputFile) { + inputFile = findLatestResultsFile(); + console.log(`📂 Using latest results file: ${inputFile}`); + } else { + console.log(`📂 Using specified file: ${inputFile}`); + } + + // Load results + if (!fs.existsSync(inputFile)) { + throw new Error(`Input file not found: ${inputFile}`); + } + + const rawData = fs.readFileSync(inputFile, "utf-8"); + const results = JSON.parse(rawData); + + if (!Array.isArray(results) || results.length === 0) { + throw new Error("No posts found in input file"); + } + + console.log(`📋 Loaded ${results.length} posts for analysis`); + + // Process in batches + const processedResults = []; + let totalRelevant = 0; + let totalProcessed = 0; + + for (let i = 0; i < results.length; i += batchSize) { + const batch = results.slice(i, i + batchSize); + console.log( + `\n📦 Processing batch ${Math.floor(i / batchSize) + 1}/${Math.ceil( + results.length / batchSize + )} (${batch.length} posts)` + ); + + const analyses = await analyzeBatch(batch, context, model); + + // Apply analyses to posts + for (let j = 0; j < batch.length; j++) { + const post = batch[j]; + const analysis = analyses[j]; + + const enhancedPost = { + ...post, + aiRelevant: analysis.isRelevant, + aiConfidence: analysis.confidence, + aiReasoning: analysis.reasoning, + aiModel: model, + aiAnalyzedAt: new Date().toLocaleString("en-CA", { + year: "numeric", + month: "2-digit", + day: "2-digit", + hour: "2-digit", + minute: "2-digit", + second: "2-digit", + hour12: false, + }), + aiType: "local-ollama", + aiProcessed: true, + }; + + // Apply confidence threshold + if (analysis.confidence >= confidenceThreshold) { + if (analysis.isRelevant) { + processedResults.push(enhancedPost); + totalRelevant++; + } + } else { + // Include low-confidence posts but flag them + enhancedPost.lowConfidence = true; + processedResults.push(enhancedPost); + } + + totalProcessed++; + console.log( + ` ${ + analysis.isRelevant ? "✅" : "❌" + } Post ${totalProcessed}: ${analysis.confidence.toFixed( + 2 + )} confidence - ${analysis.reasoning.substring(0, 100)}...` + ); + } + + // Small delay between batches to be nice to the system + if (i + batchSize < results.length) { + console.log("⏳ Brief pause..."); + await new Promise((resolve) => setTimeout(resolve, 500)); + } + } + + // Determine output file + if (!outputFile) { + const inputBasename = path.basename(inputFile, ".json"); + const inputDir = path.dirname(inputFile); + outputFile = path.join(inputDir, `${inputBasename}-ai-local.json`); + } + + // Save results + fs.writeFileSync( + outputFile, + JSON.stringify(processedResults, null, 2), + "utf-8" + ); + + console.log("\n🎉 Local AI Analysis Complete!"); + console.log(`📊 Results:`); + console.log(` Total posts processed: ${totalProcessed}`); + console.log(` Relevant posts found: ${totalRelevant}`); + console.log(` Final results saved: ${processedResults.length}`); + console.log(`📁 Output saved to: ${outputFile}`); + console.log(`💰 Cost: $0.00 (completely free!)`); + } catch (error) { + console.error("❌ Error:", error.message); + process.exit(1); + } +} + +// Show help if requested +if (args.includes("--help") || args.includes("-h")) { + console.log(` +LinkedOut Local AI Analyzer (Ollama) + +🚀 FREE local AI analysis - No API costs, complete privacy! + +Usage: node ai-analyzer-local.js [options] + +Options: + --input= Input JSON file (default: latest in results/) + --context= AI context to analyze against (required) + --confidence= Minimum confidence threshold (0.0-1.0, default: 0.7) + --model= Ollama model to use (default: llama2) + --batch-size= Number of posts to process at once (default: 3) + --output= Output file (default: adds -ai-local suffix) + --help, -h Show this help message + +Examples: + node ai-analyzer-local.js --context="job layoffs" + node ai-analyzer-local.js --model=mistral --context="hiring opportunities" + node ai-analyzer-local.js --context="remote work" --confidence=0.8 + +Prerequisites: + 1. Install Ollama: https://ollama.ai/ + 2. Install a model: ollama pull llama2 + 3. Start Ollama: ollama serve + +Popular Models: + - llama2 (good general purpose) + - mistral (fast and accurate) + - codellama (good for technical content) + - llama2:13b (more accurate, slower) + +Environment Variables: + AI_CONTEXT Default context for analysis + AI_CONFIDENCE Default confidence threshold + AI_BATCH_SIZE Default batch size + OLLAMA_MODEL Default model (llama2, mistral, etc.) + OLLAMA_HOST Ollama host (default: http://localhost:11434) +`); + process.exit(0); +} + +// Run the analyzer +main(); diff --git a/test/test.js b/test/test.js index 86351f9..47d145e 100644 --- a/test/test.js +++ b/test/test.js @@ -1,19 +1,19 @@ -console.log("START!"); - -const { chromium } = require("playwright"); -(async () => { - console.log("browser!"); - - const browser = await chromium.launch({ - headless: true, - args: ["--no-sandbox", "--disable-setuid-sandbox"], - }); - console.log("new page!"); - - const page = await browser.newPage(); - console.log("GOTO!"); - - await page.goto("https://example.com"); - console.log("Success!"); - await browser.close(); -})(); +console.log("START!"); + +const { chromium } = require("playwright"); +(async () => { + console.log("browser!"); + + const browser = await chromium.launch({ + headless: true, + args: ["--no-sandbox", "--disable-setuid-sandbox"], + }); + console.log("new page!"); + + const page = await browser.newPage(); + console.log("GOTO!"); + + await page.goto("https://example.com"); + console.log("Success!"); + await browser.close(); +})();