linkedout/ai-analyzer-local.js
2025-07-03 21:41:56 -04:00

541 lines
16 KiB
JavaScript

#!/usr/bin/env node
/**
* Local AI Post-Processing Analyzer for LinkedOut
*
* Uses Ollama for completely FREE local AI analysis.
*
* FEATURES:
* - Analyze LinkedOut results for context relevance (layoffs, hiring, etc.)
* - Works on latest or specified results file
* - Batch processing for speed
* - Configurable context, model, confidence, batch size
* - CLI and .env configuration
* - 100% local, private, and free
*
* USAGE:
* node ai-analyzer-local.js [options]
*
* COMMAND-LINE OPTIONS:
* --input=<file> Input JSON file (default: latest in results/)
* --context=<text> AI context to analyze against (required)
* --confidence=<num> Minimum confidence threshold (0.0-1.0, default: 0.7)
* --model=<name> Ollama model to use (default: llama2)
* --batch-size=<num> Number of posts to process at once (default: 3)
* --output=<file> Output file (default: adds -ai-local suffix)
* --help, -h Show this help message
*
* EXAMPLES:
* node ai-analyzer-local.js --context="job layoffs"
* node ai-analyzer-local.js --input=results/results-2024-01-15.json --context="hiring"
* node ai-analyzer-local.js --model=mistral --context="remote work"
* node ai-analyzer-local.js --context="job layoffs" --confidence=0.8 --batch-size=5
*
* ENVIRONMENT VARIABLES (.env file):
* AI_CONTEXT, AI_CONFIDENCE, AI_BATCH_SIZE, OLLAMA_MODEL, OLLAMA_HOST
* See README for full list.
*
* OUTPUT:
* - Saves to results/ with -ai-local suffix unless --output is specified
*
* DEPENDENCIES:
* - Ollama (https://ollama.ai/)
* - Node.js built-ins: fs, path, fetch
*
* SECURITY & LEGAL:
* - All analysis is local, no data leaves your machine
* - Use responsibly for educational/research purposes
*/
require("dotenv").config();
const fs = require("fs");
const path = require("path");
// Configuration from environment and command line
const DEFAULT_CONTEXT =
process.env.AI_CONTEXT || "job layoffs and workforce reduction";
const DEFAULT_CONFIDENCE = parseFloat(process.env.AI_CONFIDENCE || "0.7");
const DEFAULT_BATCH_SIZE = parseInt(process.env.AI_BATCH_SIZE || "3");
const DEFAULT_MODEL = process.env.OLLAMA_MODEL || "llama2";
const OLLAMA_HOST = process.env.OLLAMA_HOST || "http://localhost:11434";
// Parse command line arguments
const args = process.argv.slice(2);
let inputFile = null;
let context = DEFAULT_CONTEXT;
let confidenceThreshold = DEFAULT_CONFIDENCE;
let batchSize = DEFAULT_BATCH_SIZE;
let model = DEFAULT_MODEL;
let outputFile = null;
for (const arg of args) {
if (arg.startsWith("--input=")) {
inputFile = arg.split("=")[1];
} else if (arg.startsWith("--context=")) {
context = arg.split("=")[1];
} else if (arg.startsWith("--confidence=")) {
confidenceThreshold = parseFloat(arg.split("=")[1]);
} else if (arg.startsWith("--batch-size=")) {
batchSize = parseInt(arg.split("=")[1]);
} else if (arg.startsWith("--model=")) {
model = arg.split("=")[1];
} else if (arg.startsWith("--output=")) {
outputFile = arg.split("=")[1];
}
}
if (!context) {
console.error("❌ Error: No AI context specified");
console.error('Use --context="your context" or set AI_CONTEXT in .env');
process.exit(1);
}
/**
* Check if Ollama is running and the model is available
*/
async function checkOllamaStatus() {
try {
// Check if Ollama is running
const response = await fetch(`${OLLAMA_HOST}/api/tags`);
if (!response.ok) {
throw new Error(`Ollama not running on ${OLLAMA_HOST}`);
}
const data = await response.json();
const availableModels = data.models.map((m) => m.name);
console.log(`🤖 Ollama is running`);
console.log(
`📦 Available models: ${availableModels
.map((m) => m.split(":")[0])
.join(", ")}`
);
// Check if requested model is available
const modelExists = availableModels.some((m) => m.startsWith(model));
if (!modelExists) {
console.error(`❌ Model "${model}" not found`);
console.error(`💡 Install it with: ollama pull ${model}`);
console.error(
`💡 Or choose from: ${availableModels
.map((m) => m.split(":")[0])
.join(", ")}`
);
process.exit(1);
}
console.log(`✅ Using model: ${model}`);
return true;
} catch (error) {
console.error("❌ Error connecting to Ollama:", error.message);
console.error("💡 Make sure Ollama is installed and running:");
console.error(" 1. Install: https://ollama.ai/");
console.error(" 2. Start: ollama serve");
console.error(` 3. Install model: ollama pull ${model}`);
process.exit(1);
}
}
/**
* Find the most recent results file if none specified
*/
function findLatestResultsFile() {
const resultsDir = "results";
if (!fs.existsSync(resultsDir)) {
throw new Error("Results directory not found. Run the scraper first.");
}
const files = fs
.readdirSync(resultsDir)
.filter(
(f) =>
f.startsWith("results-") && f.endsWith(".json") && !f.includes("-ai-")
)
.sort()
.reverse();
if (files.length === 0) {
throw new Error("No results files found. Run the scraper first.");
}
return path.join(resultsDir, files[0]);
}
/**
* Analyze multiple posts using local Ollama
*/
async function analyzeBatch(posts, context, model) {
console.log(`🤖 Analyzing batch of ${posts.length} posts with ${model}...`);
try {
const prompt = `You are an expert at analyzing LinkedIn posts for relevance to specific contexts.
CONTEXT TO MATCH: "${context}"
Analyze these ${
posts.length
} LinkedIn posts and determine if each relates to the context above.
POSTS:
${posts
.map(
(post, i) => `
POST ${i + 1}:
"${post.text.substring(0, 400)}${post.text.length > 400 ? "..." : ""}"
`
)
.join("")}
For each post, provide:
- Is it relevant to "${context}"? (YES/NO)
- Confidence level (0.0 to 1.0)
- Brief reasoning
Respond in this EXACT format for each post:
POST 1: YES/NO | 0.X | brief reason
POST 2: YES/NO | 0.X | brief reason
POST 3: YES/NO | 0.X | brief reason
Examples:
- For layoff context: "laid off 50 employees" = YES | 0.9 | mentions layoffs
- For hiring context: "we're hiring developers" = YES | 0.8 | job posting
- Unrelated content = NO | 0.1 | not relevant to context`;
const response = await fetch(`${OLLAMA_HOST}/api/generate`, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
model: model,
prompt: prompt,
stream: false,
options: {
temperature: 0.3,
top_p: 0.9,
},
}),
});
if (!response.ok) {
throw new Error(
`Ollama API error: ${response.status} ${response.statusText}`
);
}
const data = await response.json();
const aiResponse = data.response.trim();
// Parse the response
const analyses = [];
const lines = aiResponse.split("\n").filter((line) => line.trim());
for (let i = 0; i < posts.length; i++) {
let analysis = {
postIndex: i + 1,
isRelevant: false,
confidence: 0.5,
reasoning: "Could not parse AI response",
};
// Look for lines that match "POST X:" pattern
const postPattern = new RegExp(`POST\\s*${i + 1}:?\\s*(.+)`, "i");
for (const line of lines) {
const match = line.match(postPattern);
if (match) {
const content = match[1].trim();
// Parse: YES/NO | 0.X | reasoning
const parts = content.split("|").map((p) => p.trim());
if (parts.length >= 3) {
analysis.isRelevant = parts[0].toUpperCase().includes("YES");
analysis.confidence = Math.max(
0,
Math.min(1, parseFloat(parts[1]) || 0.5)
);
analysis.reasoning = parts[2] || "No reasoning provided";
} else {
// Fallback parsing
analysis.isRelevant =
content.toUpperCase().includes("YES") ||
content.toLowerCase().includes("relevant");
analysis.confidence = 0.6;
analysis.reasoning = content.substring(0, 100);
}
break;
}
}
analyses.push(analysis);
}
// If we didn't get enough analyses, fill in defaults
while (analyses.length < posts.length) {
analyses.push({
postIndex: analyses.length + 1,
isRelevant: false,
confidence: 0.3,
reasoning: "AI response parsing failed",
});
}
return analyses;
} catch (error) {
console.error(`❌ Error in batch AI analysis: ${error.message}`);
// Fallback: mark all as relevant with low confidence
return posts.map((_, i) => ({
postIndex: i + 1,
isRelevant: true,
confidence: 0.3,
reasoning: `Analysis failed: ${error.message}`,
}));
}
}
/**
* Analyze a single post using local Ollama (fallback)
*/
async function analyzeSinglePost(text, context, model) {
const prompt = `Analyze this LinkedIn post for relevance to: "${context}"
Post: "${text}"
Is this post relevant to "${context}"? Provide:
1. YES or NO
2. Confidence (0.0 to 1.0)
3. Brief reason
Format: YES/NO | 0.X | reason`;
try {
const response = await fetch(`${OLLAMA_HOST}/api/generate`, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
model: model,
prompt: prompt,
stream: false,
options: {
temperature: 0.3,
},
}),
});
if (!response.ok) {
throw new Error(`Ollama API error: ${response.status}`);
}
const data = await response.json();
const aiResponse = data.response.trim();
// Parse response
const parts = aiResponse.split("|").map((p) => p.trim());
if (parts.length >= 3) {
return {
isRelevant: parts[0].toUpperCase().includes("YES"),
confidence: Math.max(0, Math.min(1, parseFloat(parts[1]) || 0.5)),
reasoning: parts[2],
};
} else {
// Fallback parsing
return {
isRelevant:
aiResponse.toLowerCase().includes("yes") ||
aiResponse.toLowerCase().includes("relevant"),
confidence: 0.6,
reasoning: aiResponse.substring(0, 100),
};
}
} catch (error) {
return {
isRelevant: true, // Default to include on error
confidence: 0.3,
reasoning: `Analysis failed: ${error.message}`,
};
}
}
/**
* Main processing function
*/
async function main() {
try {
console.log("🚀 LinkedOut Local AI Analyzer Starting...");
console.log(`📊 Context: "${context}"`);
console.log(`🎯 Confidence Threshold: ${confidenceThreshold}`);
console.log(`📦 Batch Size: ${batchSize}`);
console.log(`🤖 Model: ${model}`);
// Check Ollama status
await checkOllamaStatus();
// Determine input file
if (!inputFile) {
inputFile = findLatestResultsFile();
console.log(`📂 Using latest results file: ${inputFile}`);
} else {
console.log(`📂 Using specified file: ${inputFile}`);
}
// Load results
if (!fs.existsSync(inputFile)) {
throw new Error(`Input file not found: ${inputFile}`);
}
const rawData = fs.readFileSync(inputFile, "utf-8");
const results = JSON.parse(rawData);
if (!Array.isArray(results) || results.length === 0) {
throw new Error("No posts found in input file");
}
console.log(`📋 Loaded ${results.length} posts for analysis`);
// Process in batches
const processedResults = [];
let totalRelevant = 0;
let totalProcessed = 0;
for (let i = 0; i < results.length; i += batchSize) {
const batch = results.slice(i, i + batchSize);
console.log(
`\n📦 Processing batch ${Math.floor(i / batchSize) + 1}/${Math.ceil(
results.length / batchSize
)} (${batch.length} posts)`
);
const analyses = await analyzeBatch(batch, context, model);
// Apply analyses to posts
for (let j = 0; j < batch.length; j++) {
const post = batch[j];
const analysis = analyses[j];
const enhancedPost = {
...post,
aiRelevant: analysis.isRelevant,
aiConfidence: analysis.confidence,
aiReasoning: analysis.reasoning,
aiModel: model,
aiAnalyzedAt: new Date().toLocaleString("en-CA", {
year: "numeric",
month: "2-digit",
day: "2-digit",
hour: "2-digit",
minute: "2-digit",
second: "2-digit",
hour12: false,
}),
aiType: "local-ollama",
aiProcessed: true,
};
// Apply confidence threshold
if (analysis.confidence >= confidenceThreshold) {
if (analysis.isRelevant) {
processedResults.push(enhancedPost);
totalRelevant++;
}
} else {
// Include low-confidence posts but flag them
enhancedPost.lowConfidence = true;
processedResults.push(enhancedPost);
}
totalProcessed++;
console.log(
` ${
analysis.isRelevant ? "✅" : "❌"
} Post ${totalProcessed}: ${analysis.confidence.toFixed(
2
)} confidence - ${analysis.reasoning.substring(0, 100)}...`
);
}
// Small delay between batches to be nice to the system
if (i + batchSize < results.length) {
console.log("⏳ Brief pause...");
await new Promise((resolve) => setTimeout(resolve, 500));
}
}
// Determine output file
if (!outputFile) {
const inputBasename = path.basename(inputFile, ".json");
const inputDir = path.dirname(inputFile);
outputFile = path.join(inputDir, `${inputBasename}-ai-local.json`);
}
// Save results
fs.writeFileSync(
outputFile,
JSON.stringify(processedResults, null, 2),
"utf-8"
);
console.log("\n🎉 Local AI Analysis Complete!");
console.log(`📊 Results:`);
console.log(` Total posts processed: ${totalProcessed}`);
console.log(` Relevant posts found: ${totalRelevant}`);
console.log(` Final results saved: ${processedResults.length}`);
console.log(`📁 Output saved to: ${outputFile}`);
console.log(`💰 Cost: $0.00 (completely free!)`);
} catch (error) {
console.error("❌ Error:", error.message);
process.exit(1);
}
}
// Show help if requested
if (args.includes("--help") || args.includes("-h")) {
console.log(`
LinkedOut Local AI Analyzer (Ollama)
🚀 FREE local AI analysis - No API costs, complete privacy!
Usage: node ai-analyzer-local.js [options]
Options:
--input=<file> Input JSON file (default: latest in results/)
--context=<text> AI context to analyze against (required)
--confidence=<num> Minimum confidence threshold (0.0-1.0, default: 0.7)
--model=<name> Ollama model to use (default: llama2)
--batch-size=<num> Number of posts to process at once (default: 3)
--output=<file> Output file (default: adds -ai-local suffix)
--help, -h Show this help message
Examples:
node ai-analyzer-local.js --context="job layoffs"
node ai-analyzer-local.js --model=mistral --context="hiring opportunities"
node ai-analyzer-local.js --context="remote work" --confidence=0.8
Prerequisites:
1. Install Ollama: https://ollama.ai/
2. Install a model: ollama pull llama2
3. Start Ollama: ollama serve
Popular Models:
- llama2 (good general purpose)
- mistral (fast and accurate)
- codellama (good for technical content)
- llama2:13b (more accurate, slower)
Environment Variables:
AI_CONTEXT Default context for analysis
AI_CONFIDENCE Default confidence threshold
AI_BATCH_SIZE Default batch size
OLLAMA_MODEL Default model (llama2, mistral, etc.)
OLLAMA_HOST Ollama host (default: http://localhost:11434)
`);
process.exit(0);
}
// Run the analyzer
main();