541 lines
16 KiB
JavaScript
541 lines
16 KiB
JavaScript
#!/usr/bin/env node
|
|
|
|
/**
|
|
* Local AI Post-Processing Analyzer for LinkedOut
|
|
*
|
|
* Uses Ollama for completely FREE local AI analysis.
|
|
*
|
|
* FEATURES:
|
|
* - Analyze LinkedOut results for context relevance (layoffs, hiring, etc.)
|
|
* - Works on latest or specified results file
|
|
* - Batch processing for speed
|
|
* - Configurable context, model, confidence, batch size
|
|
* - CLI and .env configuration
|
|
* - 100% local, private, and free
|
|
*
|
|
* USAGE:
|
|
* node ai-analyzer-local.js [options]
|
|
*
|
|
* COMMAND-LINE OPTIONS:
|
|
* --input=<file> Input JSON file (default: latest in results/)
|
|
* --context=<text> AI context to analyze against (required)
|
|
* --confidence=<num> Minimum confidence threshold (0.0-1.0, default: 0.7)
|
|
* --model=<name> Ollama model to use (default: llama2)
|
|
* --batch-size=<num> Number of posts to process at once (default: 3)
|
|
* --output=<file> Output file (default: adds -ai-local suffix)
|
|
* --help, -h Show this help message
|
|
*
|
|
* EXAMPLES:
|
|
* node ai-analyzer-local.js --context="job layoffs"
|
|
* node ai-analyzer-local.js --input=results/results-2024-01-15.json --context="hiring"
|
|
* node ai-analyzer-local.js --model=mistral --context="remote work"
|
|
* node ai-analyzer-local.js --context="job layoffs" --confidence=0.8 --batch-size=5
|
|
*
|
|
* ENVIRONMENT VARIABLES (.env file):
|
|
* AI_CONTEXT, AI_CONFIDENCE, AI_BATCH_SIZE, OLLAMA_MODEL, OLLAMA_HOST
|
|
* See README for full list.
|
|
*
|
|
* OUTPUT:
|
|
* - Saves to results/ with -ai-local suffix unless --output is specified
|
|
*
|
|
* DEPENDENCIES:
|
|
* - Ollama (https://ollama.ai/)
|
|
* - Node.js built-ins: fs, path, fetch
|
|
*
|
|
* SECURITY & LEGAL:
|
|
* - All analysis is local, no data leaves your machine
|
|
* - Use responsibly for educational/research purposes
|
|
*/
|
|
|
|
require("dotenv").config();
|
|
const fs = require("fs");
|
|
const path = require("path");
|
|
|
|
// Configuration from environment and command line
|
|
const DEFAULT_CONTEXT =
|
|
process.env.AI_CONTEXT || "job layoffs and workforce reduction";
|
|
const DEFAULT_CONFIDENCE = parseFloat(process.env.AI_CONFIDENCE || "0.7");
|
|
const DEFAULT_BATCH_SIZE = parseInt(process.env.AI_BATCH_SIZE || "3");
|
|
const DEFAULT_MODEL = process.env.OLLAMA_MODEL || "llama2";
|
|
const OLLAMA_HOST = process.env.OLLAMA_HOST || "http://localhost:11434";
|
|
|
|
// Parse command line arguments
|
|
const args = process.argv.slice(2);
|
|
let inputFile = null;
|
|
let context = DEFAULT_CONTEXT;
|
|
let confidenceThreshold = DEFAULT_CONFIDENCE;
|
|
let batchSize = DEFAULT_BATCH_SIZE;
|
|
let model = DEFAULT_MODEL;
|
|
let outputFile = null;
|
|
|
|
for (const arg of args) {
|
|
if (arg.startsWith("--input=")) {
|
|
inputFile = arg.split("=")[1];
|
|
} else if (arg.startsWith("--context=")) {
|
|
context = arg.split("=")[1];
|
|
} else if (arg.startsWith("--confidence=")) {
|
|
confidenceThreshold = parseFloat(arg.split("=")[1]);
|
|
} else if (arg.startsWith("--batch-size=")) {
|
|
batchSize = parseInt(arg.split("=")[1]);
|
|
} else if (arg.startsWith("--model=")) {
|
|
model = arg.split("=")[1];
|
|
} else if (arg.startsWith("--output=")) {
|
|
outputFile = arg.split("=")[1];
|
|
}
|
|
}
|
|
|
|
if (!context) {
|
|
console.error("❌ Error: No AI context specified");
|
|
console.error('Use --context="your context" or set AI_CONTEXT in .env');
|
|
process.exit(1);
|
|
}
|
|
|
|
/**
|
|
* Check if Ollama is running and the model is available
|
|
*/
|
|
async function checkOllamaStatus() {
|
|
try {
|
|
// Check if Ollama is running
|
|
const response = await fetch(`${OLLAMA_HOST}/api/tags`);
|
|
if (!response.ok) {
|
|
throw new Error(`Ollama not running on ${OLLAMA_HOST}`);
|
|
}
|
|
|
|
const data = await response.json();
|
|
const availableModels = data.models.map((m) => m.name);
|
|
|
|
console.log(`🤖 Ollama is running`);
|
|
console.log(
|
|
`📦 Available models: ${availableModels
|
|
.map((m) => m.split(":")[0])
|
|
.join(", ")}`
|
|
);
|
|
|
|
// Check if requested model is available
|
|
const modelExists = availableModels.some((m) => m.startsWith(model));
|
|
if (!modelExists) {
|
|
console.error(`❌ Model "${model}" not found`);
|
|
console.error(`💡 Install it with: ollama pull ${model}`);
|
|
console.error(
|
|
`💡 Or choose from: ${availableModels
|
|
.map((m) => m.split(":")[0])
|
|
.join(", ")}`
|
|
);
|
|
process.exit(1);
|
|
}
|
|
|
|
console.log(`✅ Using model: ${model}`);
|
|
return true;
|
|
} catch (error) {
|
|
console.error("❌ Error connecting to Ollama:", error.message);
|
|
console.error("💡 Make sure Ollama is installed and running:");
|
|
console.error(" 1. Install: https://ollama.ai/");
|
|
console.error(" 2. Start: ollama serve");
|
|
console.error(` 3. Install model: ollama pull ${model}`);
|
|
process.exit(1);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Find the most recent results file if none specified
|
|
*/
|
|
function findLatestResultsFile() {
|
|
const resultsDir = "results";
|
|
if (!fs.existsSync(resultsDir)) {
|
|
throw new Error("Results directory not found. Run the scraper first.");
|
|
}
|
|
|
|
const files = fs
|
|
.readdirSync(resultsDir)
|
|
.filter(
|
|
(f) =>
|
|
f.startsWith("results-") && f.endsWith(".json") && !f.includes("-ai-")
|
|
)
|
|
.sort()
|
|
.reverse();
|
|
|
|
if (files.length === 0) {
|
|
throw new Error("No results files found. Run the scraper first.");
|
|
}
|
|
|
|
return path.join(resultsDir, files[0]);
|
|
}
|
|
|
|
/**
|
|
* Analyze multiple posts using local Ollama
|
|
*/
|
|
async function analyzeBatch(posts, context, model) {
|
|
console.log(`🤖 Analyzing batch of ${posts.length} posts with ${model}...`);
|
|
|
|
try {
|
|
const prompt = `You are an expert at analyzing LinkedIn posts for relevance to specific contexts.
|
|
|
|
CONTEXT TO MATCH: "${context}"
|
|
|
|
Analyze these ${
|
|
posts.length
|
|
} LinkedIn posts and determine if each relates to the context above.
|
|
|
|
POSTS:
|
|
${posts
|
|
.map(
|
|
(post, i) => `
|
|
POST ${i + 1}:
|
|
"${post.text.substring(0, 400)}${post.text.length > 400 ? "..." : ""}"
|
|
`
|
|
)
|
|
.join("")}
|
|
|
|
For each post, provide:
|
|
- Is it relevant to "${context}"? (YES/NO)
|
|
- Confidence level (0.0 to 1.0)
|
|
- Brief reasoning
|
|
|
|
Respond in this EXACT format for each post:
|
|
POST 1: YES/NO | 0.X | brief reason
|
|
POST 2: YES/NO | 0.X | brief reason
|
|
POST 3: YES/NO | 0.X | brief reason
|
|
|
|
Examples:
|
|
- For layoff context: "laid off 50 employees" = YES | 0.9 | mentions layoffs
|
|
- For hiring context: "we're hiring developers" = YES | 0.8 | job posting
|
|
- Unrelated content = NO | 0.1 | not relevant to context`;
|
|
|
|
const response = await fetch(`${OLLAMA_HOST}/api/generate`, {
|
|
method: "POST",
|
|
headers: {
|
|
"Content-Type": "application/json",
|
|
},
|
|
body: JSON.stringify({
|
|
model: model,
|
|
prompt: prompt,
|
|
stream: false,
|
|
options: {
|
|
temperature: 0.3,
|
|
top_p: 0.9,
|
|
},
|
|
}),
|
|
});
|
|
|
|
if (!response.ok) {
|
|
throw new Error(
|
|
`Ollama API error: ${response.status} ${response.statusText}`
|
|
);
|
|
}
|
|
|
|
const data = await response.json();
|
|
const aiResponse = data.response.trim();
|
|
|
|
// Parse the response
|
|
const analyses = [];
|
|
const lines = aiResponse.split("\n").filter((line) => line.trim());
|
|
|
|
for (let i = 0; i < posts.length; i++) {
|
|
let analysis = {
|
|
postIndex: i + 1,
|
|
isRelevant: false,
|
|
confidence: 0.5,
|
|
reasoning: "Could not parse AI response",
|
|
};
|
|
|
|
// Look for lines that match "POST X:" pattern
|
|
const postPattern = new RegExp(`POST\\s*${i + 1}:?\\s*(.+)`, "i");
|
|
|
|
for (const line of lines) {
|
|
const match = line.match(postPattern);
|
|
if (match) {
|
|
const content = match[1].trim();
|
|
|
|
// Parse: YES/NO | 0.X | reasoning
|
|
const parts = content.split("|").map((p) => p.trim());
|
|
|
|
if (parts.length >= 3) {
|
|
analysis.isRelevant = parts[0].toUpperCase().includes("YES");
|
|
analysis.confidence = Math.max(
|
|
0,
|
|
Math.min(1, parseFloat(parts[1]) || 0.5)
|
|
);
|
|
analysis.reasoning = parts[2] || "No reasoning provided";
|
|
} else {
|
|
// Fallback parsing
|
|
analysis.isRelevant =
|
|
content.toUpperCase().includes("YES") ||
|
|
content.toLowerCase().includes("relevant");
|
|
analysis.confidence = 0.6;
|
|
analysis.reasoning = content.substring(0, 100);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
analyses.push(analysis);
|
|
}
|
|
|
|
// If we didn't get enough analyses, fill in defaults
|
|
while (analyses.length < posts.length) {
|
|
analyses.push({
|
|
postIndex: analyses.length + 1,
|
|
isRelevant: false,
|
|
confidence: 0.3,
|
|
reasoning: "AI response parsing failed",
|
|
});
|
|
}
|
|
|
|
return analyses;
|
|
} catch (error) {
|
|
console.error(`❌ Error in batch AI analysis: ${error.message}`);
|
|
|
|
// Fallback: mark all as relevant with low confidence
|
|
return posts.map((_, i) => ({
|
|
postIndex: i + 1,
|
|
isRelevant: true,
|
|
confidence: 0.3,
|
|
reasoning: `Analysis failed: ${error.message}`,
|
|
}));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Analyze a single post using local Ollama (fallback)
|
|
*/
|
|
async function analyzeSinglePost(text, context, model) {
|
|
const prompt = `Analyze this LinkedIn post for relevance to: "${context}"
|
|
|
|
Post: "${text}"
|
|
|
|
Is this post relevant to "${context}"? Provide:
|
|
1. YES or NO
|
|
2. Confidence (0.0 to 1.0)
|
|
3. Brief reason
|
|
|
|
Format: YES/NO | 0.X | reason`;
|
|
|
|
try {
|
|
const response = await fetch(`${OLLAMA_HOST}/api/generate`, {
|
|
method: "POST",
|
|
headers: {
|
|
"Content-Type": "application/json",
|
|
},
|
|
body: JSON.stringify({
|
|
model: model,
|
|
prompt: prompt,
|
|
stream: false,
|
|
options: {
|
|
temperature: 0.3,
|
|
},
|
|
}),
|
|
});
|
|
|
|
if (!response.ok) {
|
|
throw new Error(`Ollama API error: ${response.status}`);
|
|
}
|
|
|
|
const data = await response.json();
|
|
const aiResponse = data.response.trim();
|
|
|
|
// Parse response
|
|
const parts = aiResponse.split("|").map((p) => p.trim());
|
|
|
|
if (parts.length >= 3) {
|
|
return {
|
|
isRelevant: parts[0].toUpperCase().includes("YES"),
|
|
confidence: Math.max(0, Math.min(1, parseFloat(parts[1]) || 0.5)),
|
|
reasoning: parts[2],
|
|
};
|
|
} else {
|
|
// Fallback parsing
|
|
return {
|
|
isRelevant:
|
|
aiResponse.toLowerCase().includes("yes") ||
|
|
aiResponse.toLowerCase().includes("relevant"),
|
|
confidence: 0.6,
|
|
reasoning: aiResponse.substring(0, 100),
|
|
};
|
|
}
|
|
} catch (error) {
|
|
return {
|
|
isRelevant: true, // Default to include on error
|
|
confidence: 0.3,
|
|
reasoning: `Analysis failed: ${error.message}`,
|
|
};
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Main processing function
|
|
*/
|
|
async function main() {
|
|
try {
|
|
console.log("🚀 LinkedOut Local AI Analyzer Starting...");
|
|
console.log(`📊 Context: "${context}"`);
|
|
console.log(`🎯 Confidence Threshold: ${confidenceThreshold}`);
|
|
console.log(`📦 Batch Size: ${batchSize}`);
|
|
console.log(`🤖 Model: ${model}`);
|
|
|
|
// Check Ollama status
|
|
await checkOllamaStatus();
|
|
|
|
// Determine input file
|
|
if (!inputFile) {
|
|
inputFile = findLatestResultsFile();
|
|
console.log(`📂 Using latest results file: ${inputFile}`);
|
|
} else {
|
|
console.log(`📂 Using specified file: ${inputFile}`);
|
|
}
|
|
|
|
// Load results
|
|
if (!fs.existsSync(inputFile)) {
|
|
throw new Error(`Input file not found: ${inputFile}`);
|
|
}
|
|
|
|
const rawData = fs.readFileSync(inputFile, "utf-8");
|
|
const results = JSON.parse(rawData);
|
|
|
|
if (!Array.isArray(results) || results.length === 0) {
|
|
throw new Error("No posts found in input file");
|
|
}
|
|
|
|
console.log(`📋 Loaded ${results.length} posts for analysis`);
|
|
|
|
// Process in batches
|
|
const processedResults = [];
|
|
let totalRelevant = 0;
|
|
let totalProcessed = 0;
|
|
|
|
for (let i = 0; i < results.length; i += batchSize) {
|
|
const batch = results.slice(i, i + batchSize);
|
|
console.log(
|
|
`\n📦 Processing batch ${Math.floor(i / batchSize) + 1}/${Math.ceil(
|
|
results.length / batchSize
|
|
)} (${batch.length} posts)`
|
|
);
|
|
|
|
const analyses = await analyzeBatch(batch, context, model);
|
|
|
|
// Apply analyses to posts
|
|
for (let j = 0; j < batch.length; j++) {
|
|
const post = batch[j];
|
|
const analysis = analyses[j];
|
|
|
|
const enhancedPost = {
|
|
...post,
|
|
aiRelevant: analysis.isRelevant,
|
|
aiConfidence: analysis.confidence,
|
|
aiReasoning: analysis.reasoning,
|
|
aiModel: model,
|
|
aiAnalyzedAt: new Date().toLocaleString("en-CA", {
|
|
year: "numeric",
|
|
month: "2-digit",
|
|
day: "2-digit",
|
|
hour: "2-digit",
|
|
minute: "2-digit",
|
|
second: "2-digit",
|
|
hour12: false,
|
|
}),
|
|
aiType: "local-ollama",
|
|
aiProcessed: true,
|
|
};
|
|
|
|
// Apply confidence threshold
|
|
if (analysis.confidence >= confidenceThreshold) {
|
|
if (analysis.isRelevant) {
|
|
processedResults.push(enhancedPost);
|
|
totalRelevant++;
|
|
}
|
|
} else {
|
|
// Include low-confidence posts but flag them
|
|
enhancedPost.lowConfidence = true;
|
|
processedResults.push(enhancedPost);
|
|
}
|
|
|
|
totalProcessed++;
|
|
console.log(
|
|
` ${
|
|
analysis.isRelevant ? "✅" : "❌"
|
|
} Post ${totalProcessed}: ${analysis.confidence.toFixed(
|
|
2
|
|
)} confidence - ${analysis.reasoning.substring(0, 100)}...`
|
|
);
|
|
}
|
|
|
|
// Small delay between batches to be nice to the system
|
|
if (i + batchSize < results.length) {
|
|
console.log("⏳ Brief pause...");
|
|
await new Promise((resolve) => setTimeout(resolve, 500));
|
|
}
|
|
}
|
|
|
|
// Determine output file
|
|
if (!outputFile) {
|
|
const inputBasename = path.basename(inputFile, ".json");
|
|
const inputDir = path.dirname(inputFile);
|
|
outputFile = path.join(inputDir, `${inputBasename}-ai-local.json`);
|
|
}
|
|
|
|
// Save results
|
|
fs.writeFileSync(
|
|
outputFile,
|
|
JSON.stringify(processedResults, null, 2),
|
|
"utf-8"
|
|
);
|
|
|
|
console.log("\n🎉 Local AI Analysis Complete!");
|
|
console.log(`📊 Results:`);
|
|
console.log(` Total posts processed: ${totalProcessed}`);
|
|
console.log(` Relevant posts found: ${totalRelevant}`);
|
|
console.log(` Final results saved: ${processedResults.length}`);
|
|
console.log(`📁 Output saved to: ${outputFile}`);
|
|
console.log(`💰 Cost: $0.00 (completely free!)`);
|
|
} catch (error) {
|
|
console.error("❌ Error:", error.message);
|
|
process.exit(1);
|
|
}
|
|
}
|
|
|
|
// Show help if requested
|
|
if (args.includes("--help") || args.includes("-h")) {
|
|
console.log(`
|
|
LinkedOut Local AI Analyzer (Ollama)
|
|
|
|
🚀 FREE local AI analysis - No API costs, complete privacy!
|
|
|
|
Usage: node ai-analyzer-local.js [options]
|
|
|
|
Options:
|
|
--input=<file> Input JSON file (default: latest in results/)
|
|
--context=<text> AI context to analyze against (required)
|
|
--confidence=<num> Minimum confidence threshold (0.0-1.0, default: 0.7)
|
|
--model=<name> Ollama model to use (default: llama2)
|
|
--batch-size=<num> Number of posts to process at once (default: 3)
|
|
--output=<file> Output file (default: adds -ai-local suffix)
|
|
--help, -h Show this help message
|
|
|
|
Examples:
|
|
node ai-analyzer-local.js --context="job layoffs"
|
|
node ai-analyzer-local.js --model=mistral --context="hiring opportunities"
|
|
node ai-analyzer-local.js --context="remote work" --confidence=0.8
|
|
|
|
Prerequisites:
|
|
1. Install Ollama: https://ollama.ai/
|
|
2. Install a model: ollama pull llama2
|
|
3. Start Ollama: ollama serve
|
|
|
|
Popular Models:
|
|
- llama2 (good general purpose)
|
|
- mistral (fast and accurate)
|
|
- codellama (good for technical content)
|
|
- llama2:13b (more accurate, slower)
|
|
|
|
Environment Variables:
|
|
AI_CONTEXT Default context for analysis
|
|
AI_CONFIDENCE Default confidence threshold
|
|
AI_BATCH_SIZE Default batch size
|
|
OLLAMA_MODEL Default model (llama2, mistral, etc.)
|
|
OLLAMA_HOST Ollama host (default: http://localhost:11434)
|
|
`);
|
|
process.exit(0);
|
|
}
|
|
|
|
// Run the analyzer
|
|
main();
|