#!/usr/bin/env node /** * Local AI Post-Processing Analyzer for LinkedOut * * Uses Ollama for completely FREE local AI analysis. * * FEATURES: * - Analyze LinkedOut results for context relevance (layoffs, hiring, etc.) * - Works on latest or specified results file * - Batch processing for speed * - Configurable context, model, confidence, batch size * - CLI and .env configuration * - 100% local, private, and free * * USAGE: * node ai-analyzer-local.js [options] * * COMMAND-LINE OPTIONS: * --input= Input JSON file (default: latest in results/) * --context= AI context to analyze against (required) * --confidence= Minimum confidence threshold (0.0-1.0, default: 0.7) * --model= Ollama model to use (default: llama2) * --batch-size= Number of posts to process at once (default: 3) * --output= Output file (default: adds -ai-local suffix) * --help, -h Show this help message * * EXAMPLES: * node ai-analyzer-local.js --context="job layoffs" * node ai-analyzer-local.js --input=results/results-2024-01-15.json --context="hiring" * node ai-analyzer-local.js --model=mistral --context="remote work" * node ai-analyzer-local.js --context="job layoffs" --confidence=0.8 --batch-size=5 * * ENVIRONMENT VARIABLES (.env file): * AI_CONTEXT, AI_CONFIDENCE, AI_BATCH_SIZE, OLLAMA_MODEL, OLLAMA_HOST * See README for full list. * * OUTPUT: * - Saves to results/ with -ai-local suffix unless --output is specified * * DEPENDENCIES: * - Ollama (https://ollama.ai/) * - Node.js built-ins: fs, path, fetch * * SECURITY & LEGAL: * - All analysis is local, no data leaves your machine * - Use responsibly for educational/research purposes */ require("dotenv").config(); const fs = require("fs"); const path = require("path"); // Configuration from environment and command line const DEFAULT_CONTEXT = process.env.AI_CONTEXT || "job layoffs and workforce reduction"; const DEFAULT_CONFIDENCE = parseFloat(process.env.AI_CONFIDENCE || "0.7"); const DEFAULT_BATCH_SIZE = parseInt(process.env.AI_BATCH_SIZE || "3"); const DEFAULT_MODEL = process.env.OLLAMA_MODEL || "llama2"; const OLLAMA_HOST = process.env.OLLAMA_HOST || "http://localhost:11434"; // Parse command line arguments const args = process.argv.slice(2); let inputFile = null; let context = DEFAULT_CONTEXT; let confidenceThreshold = DEFAULT_CONFIDENCE; let batchSize = DEFAULT_BATCH_SIZE; let model = DEFAULT_MODEL; let outputFile = null; for (const arg of args) { if (arg.startsWith("--input=")) { inputFile = arg.split("=")[1]; } else if (arg.startsWith("--context=")) { context = arg.split("=")[1]; } else if (arg.startsWith("--confidence=")) { confidenceThreshold = parseFloat(arg.split("=")[1]); } else if (arg.startsWith("--batch-size=")) { batchSize = parseInt(arg.split("=")[1]); } else if (arg.startsWith("--model=")) { model = arg.split("=")[1]; } else if (arg.startsWith("--output=")) { outputFile = arg.split("=")[1]; } } if (!context) { console.error("❌ Error: No AI context specified"); console.error('Use --context="your context" or set AI_CONTEXT in .env'); process.exit(1); } /** * Check if Ollama is running and the model is available */ async function checkOllamaStatus() { try { // Check if Ollama is running const response = await fetch(`${OLLAMA_HOST}/api/tags`); if (!response.ok) { throw new Error(`Ollama not running on ${OLLAMA_HOST}`); } const data = await response.json(); const availableModels = data.models.map((m) => m.name); console.log(`🤖 Ollama is running`); console.log( `📦 Available models: ${availableModels .map((m) => m.split(":")[0]) .join(", ")}` ); // Check if requested model is available const modelExists = availableModels.some((m) => m.startsWith(model)); if (!modelExists) { console.error(`❌ Model "${model}" not found`); console.error(`💡 Install it with: ollama pull ${model}`); console.error( `💡 Or choose from: ${availableModels .map((m) => m.split(":")[0]) .join(", ")}` ); process.exit(1); } console.log(`✅ Using model: ${model}`); return true; } catch (error) { console.error("❌ Error connecting to Ollama:", error.message); console.error("💡 Make sure Ollama is installed and running:"); console.error(" 1. Install: https://ollama.ai/"); console.error(" 2. Start: ollama serve"); console.error(` 3. Install model: ollama pull ${model}`); process.exit(1); } } /** * Find the most recent results file if none specified */ function findLatestResultsFile() { const resultsDir = "results"; if (!fs.existsSync(resultsDir)) { throw new Error("Results directory not found. Run the scraper first."); } const files = fs .readdirSync(resultsDir) .filter( (f) => f.startsWith("results-") && f.endsWith(".json") && !f.includes("-ai-") ) .sort() .reverse(); if (files.length === 0) { throw new Error("No results files found. Run the scraper first."); } return path.join(resultsDir, files[0]); } /** * Analyze multiple posts using local Ollama */ async function analyzeBatch(posts, context, model) { console.log(`🤖 Analyzing batch of ${posts.length} posts with ${model}...`); try { const prompt = `You are an expert at analyzing LinkedIn posts for relevance to specific contexts. CONTEXT TO MATCH: "${context}" Analyze these ${ posts.length } LinkedIn posts and determine if each relates to the context above. POSTS: ${posts .map( (post, i) => ` POST ${i + 1}: "${post.text.substring(0, 400)}${post.text.length > 400 ? "..." : ""}" ` ) .join("")} For each post, provide: - Is it relevant to "${context}"? (YES/NO) - Confidence level (0.0 to 1.0) - Brief reasoning Respond in this EXACT format for each post: POST 1: YES/NO | 0.X | brief reason POST 2: YES/NO | 0.X | brief reason POST 3: YES/NO | 0.X | brief reason Examples: - For layoff context: "laid off 50 employees" = YES | 0.9 | mentions layoffs - For hiring context: "we're hiring developers" = YES | 0.8 | job posting - Unrelated content = NO | 0.1 | not relevant to context`; const response = await fetch(`${OLLAMA_HOST}/api/generate`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ model: model, prompt: prompt, stream: false, options: { temperature: 0.3, top_p: 0.9, }, }), }); if (!response.ok) { throw new Error( `Ollama API error: ${response.status} ${response.statusText}` ); } const data = await response.json(); const aiResponse = data.response.trim(); // Parse the response const analyses = []; const lines = aiResponse.split("\n").filter((line) => line.trim()); for (let i = 0; i < posts.length; i++) { let analysis = { postIndex: i + 1, isRelevant: false, confidence: 0.5, reasoning: "Could not parse AI response", }; // Look for lines that match "POST X:" pattern const postPattern = new RegExp(`POST\\s*${i + 1}:?\\s*(.+)`, "i"); for (const line of lines) { const match = line.match(postPattern); if (match) { const content = match[1].trim(); // Parse: YES/NO | 0.X | reasoning const parts = content.split("|").map((p) => p.trim()); if (parts.length >= 3) { analysis.isRelevant = parts[0].toUpperCase().includes("YES"); analysis.confidence = Math.max( 0, Math.min(1, parseFloat(parts[1]) || 0.5) ); analysis.reasoning = parts[2] || "No reasoning provided"; } else { // Fallback parsing analysis.isRelevant = content.toUpperCase().includes("YES") || content.toLowerCase().includes("relevant"); analysis.confidence = 0.6; analysis.reasoning = content.substring(0, 100); } break; } } analyses.push(analysis); } // If we didn't get enough analyses, fill in defaults while (analyses.length < posts.length) { analyses.push({ postIndex: analyses.length + 1, isRelevant: false, confidence: 0.3, reasoning: "AI response parsing failed", }); } return analyses; } catch (error) { console.error(`❌ Error in batch AI analysis: ${error.message}`); // Fallback: mark all as relevant with low confidence return posts.map((_, i) => ({ postIndex: i + 1, isRelevant: true, confidence: 0.3, reasoning: `Analysis failed: ${error.message}`, })); } } /** * Analyze a single post using local Ollama (fallback) */ async function analyzeSinglePost(text, context, model) { const prompt = `Analyze this LinkedIn post for relevance to: "${context}" Post: "${text}" Is this post relevant to "${context}"? Provide: 1. YES or NO 2. Confidence (0.0 to 1.0) 3. Brief reason Format: YES/NO | 0.X | reason`; try { const response = await fetch(`${OLLAMA_HOST}/api/generate`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ model: model, prompt: prompt, stream: false, options: { temperature: 0.3, }, }), }); if (!response.ok) { throw new Error(`Ollama API error: ${response.status}`); } const data = await response.json(); const aiResponse = data.response.trim(); // Parse response const parts = aiResponse.split("|").map((p) => p.trim()); if (parts.length >= 3) { return { isRelevant: parts[0].toUpperCase().includes("YES"), confidence: Math.max(0, Math.min(1, parseFloat(parts[1]) || 0.5)), reasoning: parts[2], }; } else { // Fallback parsing return { isRelevant: aiResponse.toLowerCase().includes("yes") || aiResponse.toLowerCase().includes("relevant"), confidence: 0.6, reasoning: aiResponse.substring(0, 100), }; } } catch (error) { return { isRelevant: true, // Default to include on error confidence: 0.3, reasoning: `Analysis failed: ${error.message}`, }; } } /** * Main processing function */ async function main() { try { console.log("🚀 LinkedOut Local AI Analyzer Starting..."); console.log(`📊 Context: "${context}"`); console.log(`🎯 Confidence Threshold: ${confidenceThreshold}`); console.log(`📦 Batch Size: ${batchSize}`); console.log(`🤖 Model: ${model}`); // Check Ollama status await checkOllamaStatus(); // Determine input file if (!inputFile) { inputFile = findLatestResultsFile(); console.log(`📂 Using latest results file: ${inputFile}`); } else { console.log(`📂 Using specified file: ${inputFile}`); } // Load results if (!fs.existsSync(inputFile)) { throw new Error(`Input file not found: ${inputFile}`); } const rawData = fs.readFileSync(inputFile, "utf-8"); const results = JSON.parse(rawData); if (!Array.isArray(results) || results.length === 0) { throw new Error("No posts found in input file"); } console.log(`📋 Loaded ${results.length} posts for analysis`); // Process in batches const processedResults = []; let totalRelevant = 0; let totalProcessed = 0; for (let i = 0; i < results.length; i += batchSize) { const batch = results.slice(i, i + batchSize); console.log( `\n📦 Processing batch ${Math.floor(i / batchSize) + 1}/${Math.ceil( results.length / batchSize )} (${batch.length} posts)` ); const analyses = await analyzeBatch(batch, context, model); // Apply analyses to posts for (let j = 0; j < batch.length; j++) { const post = batch[j]; const analysis = analyses[j]; const enhancedPost = { ...post, aiRelevant: analysis.isRelevant, aiConfidence: analysis.confidence, aiReasoning: analysis.reasoning, aiModel: model, aiAnalyzedAt: new Date().toLocaleString("en-CA", { year: "numeric", month: "2-digit", day: "2-digit", hour: "2-digit", minute: "2-digit", second: "2-digit", hour12: false, }), aiType: "local-ollama", aiProcessed: true, }; // Apply confidence threshold if (analysis.confidence >= confidenceThreshold) { if (analysis.isRelevant) { processedResults.push(enhancedPost); totalRelevant++; } } else { // Include low-confidence posts but flag them enhancedPost.lowConfidence = true; processedResults.push(enhancedPost); } totalProcessed++; console.log( ` ${ analysis.isRelevant ? "✅" : "❌" } Post ${totalProcessed}: ${analysis.confidence.toFixed( 2 )} confidence - ${analysis.reasoning.substring(0, 100)}...` ); } // Small delay between batches to be nice to the system if (i + batchSize < results.length) { console.log("⏳ Brief pause..."); await new Promise((resolve) => setTimeout(resolve, 500)); } } // Determine output file if (!outputFile) { const inputBasename = path.basename(inputFile, ".json"); const inputDir = path.dirname(inputFile); outputFile = path.join(inputDir, `${inputBasename}-ai-local.json`); } // Save results fs.writeFileSync( outputFile, JSON.stringify(processedResults, null, 2), "utf-8" ); console.log("\n🎉 Local AI Analysis Complete!"); console.log(`📊 Results:`); console.log(` Total posts processed: ${totalProcessed}`); console.log(` Relevant posts found: ${totalRelevant}`); console.log(` Final results saved: ${processedResults.length}`); console.log(`📁 Output saved to: ${outputFile}`); console.log(`💰 Cost: $0.00 (completely free!)`); } catch (error) { console.error("❌ Error:", error.message); process.exit(1); } } // Show help if requested if (args.includes("--help") || args.includes("-h")) { console.log(` LinkedOut Local AI Analyzer (Ollama) 🚀 FREE local AI analysis - No API costs, complete privacy! Usage: node ai-analyzer-local.js [options] Options: --input= Input JSON file (default: latest in results/) --context= AI context to analyze against (required) --confidence= Minimum confidence threshold (0.0-1.0, default: 0.7) --model= Ollama model to use (default: llama2) --batch-size= Number of posts to process at once (default: 3) --output= Output file (default: adds -ai-local suffix) --help, -h Show this help message Examples: node ai-analyzer-local.js --context="job layoffs" node ai-analyzer-local.js --model=mistral --context="hiring opportunities" node ai-analyzer-local.js --context="remote work" --confidence=0.8 Prerequisites: 1. Install Ollama: https://ollama.ai/ 2. Install a model: ollama pull llama2 3. Start Ollama: ollama serve Popular Models: - llama2 (good general purpose) - mistral (fast and accurate) - codellama (good for technical content) - llama2:13b (more accurate, slower) Environment Variables: AI_CONTEXT Default context for analysis AI_CONFIDENCE Default confidence threshold AI_BATCH_SIZE Default batch size OLLAMA_MODEL Default model (llama2, mistral, etc.) OLLAMA_HOST Ollama host (default: http://localhost:11434) `); process.exit(0); } // Run the analyzer main();