linkedout/ai-analyzer-local.js

#!/usr/bin/env node

/**
 * Local AI Post-Processing Analyzer for LinkedOut
 *
 * Uses Ollama for completely FREE local AI analysis.
 *
 * FEATURES:
 *   - Analyze LinkedOut results for context relevance (layoffs, hiring, etc.)
 *   - Works on latest or specified results file
 *   - Batch processing for speed
 *   - Configurable context, model, confidence, batch size
 *   - CLI and .env configuration
 *   - 100% local, private, and free
 *
 * USAGE:
 *   node ai-analyzer-local.js [options]
 *
 * COMMAND-LINE OPTIONS:
 *   --input=<file>       Input JSON file (default: latest in results/)
 *   --context=<text>     AI context to analyze against (required)
 *   --confidence=<num>   Minimum confidence threshold (0.0-1.0, default: 0.7)
 *   --model=<name>       Ollama model to use (default: llama2)
 *   --batch-size=<num>   Number of posts to process at once (default: 3)
 *   --output=<file>      Output file (default: adds -ai-local suffix)
 *   --help, -h           Show this help message
 *
 * EXAMPLES:
 *   node ai-analyzer-local.js --context="job layoffs"
 *   node ai-analyzer-local.js --input=results/results-2024-01-15.json --context="hiring"
 *   node ai-analyzer-local.js --model=mistral --context="remote work"
 *   node ai-analyzer-local.js --context="job layoffs" --confidence=0.8 --batch-size=5
 *
 * ENVIRONMENT VARIABLES (.env file):
 *   AI_CONTEXT, AI_CONFIDENCE, AI_BATCH_SIZE, OLLAMA_MODEL, OLLAMA_HOST
 *   See README for full list.
 *
 * OUTPUT:
 *   - Saves to results/ with -ai-local suffix unless --output is specified
 *
 * DEPENDENCIES:
 *   - Ollama (https://ollama.ai/)
 *   - Node.js built-ins: fs, path, fetch
 *
 * SECURITY & LEGAL:
 *   - All analysis is local, no data leaves your machine
 *   - Use responsibly for educational/research purposes
 */

require("dotenv").config();
const fs = require("fs");
const path = require("path");

// Configuration from environment and command line
const DEFAULT_CONTEXT =
  process.env.AI_CONTEXT || "job layoffs and workforce reduction";
const DEFAULT_CONFIDENCE = parseFloat(process.env.AI_CONFIDENCE || "0.7");
const DEFAULT_BATCH_SIZE = parseInt(process.env.AI_BATCH_SIZE || "3");
const DEFAULT_MODEL = process.env.OLLAMA_MODEL || "llama2";
const OLLAMA_HOST = process.env.OLLAMA_HOST || "http://localhost:11434";

// Parse command line arguments
const args = process.argv.slice(2);
let inputFile = null;
let context = DEFAULT_CONTEXT;
let confidenceThreshold = DEFAULT_CONFIDENCE;
let batchSize = DEFAULT_BATCH_SIZE;
let model = DEFAULT_MODEL;
let outputFile = null;

for (const arg of args) {
  if (arg.startsWith("--input=")) {
    inputFile = arg.split("=")[1];
  } else if (arg.startsWith("--context=")) {
    context = arg.split("=")[1];
  } else if (arg.startsWith("--confidence=")) {
    confidenceThreshold = parseFloat(arg.split("=")[1]);
  } else if (arg.startsWith("--batch-size=")) {
    batchSize = parseInt(arg.split("=")[1]);
  } else if (arg.startsWith("--model=")) {
    model = arg.split("=")[1];
  } else if (arg.startsWith("--output=")) {
    outputFile = arg.split("=")[1];
  }
}

if (!context) {
  console.error("❌ Error: No AI context specified");
  console.error('Use --context="your context" or set AI_CONTEXT in .env');
  process.exit(1);
}

/**
 * Check if Ollama is running and the model is available
 */
async function checkOllamaStatus() {
  try {
    // Check if Ollama is running
    const response = await fetch(`${OLLAMA_HOST}/api/tags`);
    if (!response.ok) {
      throw new Error(`Ollama not running on ${OLLAMA_HOST}`);
    }

    const data = await response.json();
    const availableModels = data.models.map((m) => m.name);

    console.log(`🤖 Ollama is running`);
    console.log(
      `📦 Available models: ${availableModels
        .map((m) => m.split(":")[0])
        .join(", ")}`
    );

    // Check if requested model is available
    const modelExists = availableModels.some((m) => m.startsWith(model));
    if (!modelExists) {
      console.error(`❌ Model "${model}" not found`);
      console.error(`💡 Install it with: ollama pull ${model}`);
      console.error(
        `💡 Or choose from: ${availableModels
          .map((m) => m.split(":")[0])
          .join(", ")}`
      );
      process.exit(1);
    }

    console.log(`✅ Using model: ${model}`);
    return true;
  } catch (error) {
    console.error("❌ Error connecting to Ollama:", error.message);
    console.error("💡 Make sure Ollama is installed and running:");
    console.error("   1. Install: https://ollama.ai/");
    console.error("   2. Start: ollama serve");
    console.error(`   3. Install model: ollama pull ${model}`);
    process.exit(1);
  }
}

/**
 * Find the most recent results file if none specified
 */
function findLatestResultsFile() {
  const resultsDir = "results";
  if (!fs.existsSync(resultsDir)) {
    throw new Error("Results directory not found. Run the scraper first.");
  }

  const files = fs
    .readdirSync(resultsDir)
    .filter(
      (f) =>
        f.startsWith("results-") && f.endsWith(".json") && !f.includes("-ai-")
    )
    .sort()
    .reverse();

  if (files.length === 0) {
    throw new Error("No results files found. Run the scraper first.");
  }

  return path.join(resultsDir, files[0]);
}

/**
 * Analyze multiple posts using local Ollama
 */
async function analyzeBatch(posts, context, model) {
  console.log(`🤖 Analyzing batch of ${posts.length} posts with ${model}...`);

  try {
    const prompt = `You are an expert at analyzing LinkedIn posts for relevance to specific contexts.

CONTEXT TO MATCH: "${context}"

Analyze these ${
      posts.length
    } LinkedIn posts and determine if each relates to the context above.

POSTS:
${posts
  .map(
    (post, i) => `
POST ${i + 1}:
"${post.text.substring(0, 400)}${post.text.length > 400 ? "..." : ""}"
`
  )
  .join("")}

For each post, provide:
- Is it relevant to "${context}"? (YES/NO)
- Confidence level (0.0 to 1.0)
- Brief reasoning

Respond in this EXACT format for each post:
POST 1: YES/NO | 0.X | brief reason
POST 2: YES/NO | 0.X | brief reason
POST 3: YES/NO | 0.X | brief reason

Examples:
- For layoff context: "laid off 50 employees" = YES | 0.9 | mentions layoffs
- For hiring context: "we're hiring developers" = YES | 0.8 | job posting
- Unrelated content = NO | 0.1 | not relevant to context`;

    const response = await fetch(`${OLLAMA_HOST}/api/generate`, {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        model: model,
        prompt: prompt,
        stream: false,
        options: {
          temperature: 0.3,
          top_p: 0.9,
        },
      }),
    });

    if (!response.ok) {
      throw new Error(
        `Ollama API error: ${response.status} ${response.statusText}`
      );
    }

    const data = await response.json();
    const aiResponse = data.response.trim();

    // Parse the response
    const analyses = [];
    const lines = aiResponse.split("\n").filter((line) => line.trim());

    for (let i = 0; i < posts.length; i++) {
      let analysis = {
        postIndex: i + 1,
        isRelevant: false,
        confidence: 0.5,
        reasoning: "Could not parse AI response",
      };

      // Look for lines that match "POST X:" pattern
      const postPattern = new RegExp(`POST\\s*${i + 1}:?\\s*(.+)`, "i");

      for (const line of lines) {
        const match = line.match(postPattern);
        if (match) {
          const content = match[1].trim();

          // Parse: YES/NO | 0.X | reasoning
          const parts = content.split("|").map((p) => p.trim());

          if (parts.length >= 3) {
            analysis.isRelevant = parts[0].toUpperCase().includes("YES");
            analysis.confidence = Math.max(
              0,
              Math.min(1, parseFloat(parts[1]) || 0.5)
            );
            analysis.reasoning = parts[2] || "No reasoning provided";
          } else {
            // Fallback parsing
            analysis.isRelevant =
              content.toUpperCase().includes("YES") ||
              content.toLowerCase().includes("relevant");
            analysis.confidence = 0.6;
            analysis.reasoning = content.substring(0, 100);
          }
          break;
        }
      }

      analyses.push(analysis);
    }

    // If we didn't get enough analyses, fill in defaults
    while (analyses.length < posts.length) {
      analyses.push({
        postIndex: analyses.length + 1,
        isRelevant: false,
        confidence: 0.3,
        reasoning: "AI response parsing failed",
      });
    }

    return analyses;
  } catch (error) {
    console.error(`❌ Error in batch AI analysis: ${error.message}`);

    // Fallback: mark all as relevant with low confidence
    return posts.map((_, i) => ({
      postIndex: i + 1,
      isRelevant: true,
      confidence: 0.3,
      reasoning: `Analysis failed: ${error.message}`,
    }));
  }
}

/**
 * Analyze a single post using local Ollama (fallback)
 */
async function analyzeSinglePost(text, context, model) {
  const prompt = `Analyze this LinkedIn post for relevance to: "${context}"

Post: "${text}"

Is this post relevant to "${context}"? Provide:
1. YES or NO
2. Confidence (0.0 to 1.0)
3. Brief reason

Format: YES/NO | 0.X | reason`;

  try {
    const response = await fetch(`${OLLAMA_HOST}/api/generate`, {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        model: model,
        prompt: prompt,
        stream: false,
        options: {
          temperature: 0.3,
        },
      }),
    });

    if (!response.ok) {
      throw new Error(`Ollama API error: ${response.status}`);
    }

    const data = await response.json();
    const aiResponse = data.response.trim();

    // Parse response
    const parts = aiResponse.split("|").map((p) => p.trim());

    if (parts.length >= 3) {
      return {
        isRelevant: parts[0].toUpperCase().includes("YES"),
        confidence: Math.max(0, Math.min(1, parseFloat(parts[1]) || 0.5)),
        reasoning: parts[2],
      };
    } else {
      // Fallback parsing
      return {
        isRelevant:
          aiResponse.toLowerCase().includes("yes") ||
          aiResponse.toLowerCase().includes("relevant"),
        confidence: 0.6,
        reasoning: aiResponse.substring(0, 100),
      };
    }
  } catch (error) {
    return {
      isRelevant: true, // Default to include on error
      confidence: 0.3,
      reasoning: `Analysis failed: ${error.message}`,
    };
  }
}

/**
 * Main processing function
 */
async function main() {
  try {
    console.log("🚀 LinkedOut Local AI Analyzer Starting...");
    console.log(`📊 Context: "${context}"`);
    console.log(`🎯 Confidence Threshold: ${confidenceThreshold}`);
    console.log(`📦 Batch Size: ${batchSize}`);
    console.log(`🤖 Model: ${model}`);

    // Check Ollama status
    await checkOllamaStatus();

    // Determine input file
    if (!inputFile) {
      inputFile = findLatestResultsFile();
      console.log(`📂 Using latest results file: ${inputFile}`);
    } else {
      console.log(`📂 Using specified file: ${inputFile}`);
    }

    // Load results
    if (!fs.existsSync(inputFile)) {
      throw new Error(`Input file not found: ${inputFile}`);
    }

    const rawData = fs.readFileSync(inputFile, "utf-8");
    const results = JSON.parse(rawData);

    if (!Array.isArray(results) || results.length === 0) {
      throw new Error("No posts found in input file");
    }

    console.log(`📋 Loaded ${results.length} posts for analysis`);

    // Process in batches
    const processedResults = [];
    let totalRelevant = 0;
    let totalProcessed = 0;

    for (let i = 0; i < results.length; i += batchSize) {
      const batch = results.slice(i, i + batchSize);
      console.log(
        `\n📦 Processing batch ${Math.floor(i / batchSize) + 1}/${Math.ceil(
          results.length / batchSize
        )} (${batch.length} posts)`
      );

      const analyses = await analyzeBatch(batch, context, model);

      // Apply analyses to posts
      for (let j = 0; j < batch.length; j++) {
        const post = batch[j];
        const analysis = analyses[j];

        const enhancedPost = {
          ...post,
          aiRelevant: analysis.isRelevant,
          aiConfidence: analysis.confidence,
          aiReasoning: analysis.reasoning,
          aiModel: model,
          aiAnalyzedAt: new Date().toLocaleString("en-CA", {
            year: "numeric",
            month: "2-digit",
            day: "2-digit",
            hour: "2-digit",
            minute: "2-digit",
            second: "2-digit",
            hour12: false,
          }),
          aiType: "local-ollama",
          aiProcessed: true,
        };

        // Apply confidence threshold
        if (analysis.confidence >= confidenceThreshold) {
          if (analysis.isRelevant) {
            processedResults.push(enhancedPost);
            totalRelevant++;
          }
        } else {
          // Include low-confidence posts but flag them
          enhancedPost.lowConfidence = true;
          processedResults.push(enhancedPost);
        }

        totalProcessed++;
        console.log(
          `   ${
            analysis.isRelevant ? "✅" : "❌"
          } Post ${totalProcessed}: ${analysis.confidence.toFixed(
            2
          )} confidence - ${analysis.reasoning.substring(0, 100)}...`
        );
      }

      // Small delay between batches to be nice to the system
      if (i + batchSize < results.length) {
        console.log("⏳ Brief pause...");
        await new Promise((resolve) => setTimeout(resolve, 500));
      }
    }

    // Determine output file
    if (!outputFile) {
      const inputBasename = path.basename(inputFile, ".json");
      const inputDir = path.dirname(inputFile);
      outputFile = path.join(inputDir, `${inputBasename}-ai-local.json`);
    }

    // Save results
    fs.writeFileSync(
      outputFile,
      JSON.stringify(processedResults, null, 2),
      "utf-8"
    );

    console.log("\n🎉 Local AI Analysis Complete!");
    console.log(`📊 Results:`);
    console.log(`   Total posts processed: ${totalProcessed}`);
    console.log(`   Relevant posts found: ${totalRelevant}`);
    console.log(`   Final results saved: ${processedResults.length}`);
    console.log(`📁 Output saved to: ${outputFile}`);
    console.log(`💰 Cost: $0.00 (completely free!)`);
  } catch (error) {
    console.error("❌ Error:", error.message);
    process.exit(1);
  }
}

// Show help if requested
if (args.includes("--help") || args.includes("-h")) {
  console.log(`
LinkedOut Local AI Analyzer (Ollama)

🚀 FREE local AI analysis - No API costs, complete privacy!

Usage: node ai-analyzer-local.js [options]

Options:
  --input=<file>       Input JSON file (default: latest in results/)
  --context=<text>     AI context to analyze against (required)
  --confidence=<num>   Minimum confidence threshold (0.0-1.0, default: 0.7)
  --model=<name>       Ollama model to use (default: llama2)
  --batch-size=<num>   Number of posts to process at once (default: 3)
  --output=<file>      Output file (default: adds -ai-local suffix)
  --help, -h           Show this help message

Examples:
  node ai-analyzer-local.js --context="job layoffs"
  node ai-analyzer-local.js --model=mistral --context="hiring opportunities"
  node ai-analyzer-local.js --context="remote work" --confidence=0.8

Prerequisites:
  1. Install Ollama: https://ollama.ai/
  2. Install a model: ollama pull llama2
  3. Start Ollama: ollama serve

Popular Models:
  - llama2 (good general purpose)
  - mistral (fast and accurate)
  - codellama (good for technical content)
  - llama2:13b (more accurate, slower)

Environment Variables:
  AI_CONTEXT           Default context for analysis
  AI_CONFIDENCE        Default confidence threshold
  AI_BATCH_SIZE        Default batch size
  OLLAMA_MODEL         Default model (llama2, mistral, etc.)
  OLLAMA_HOST          Ollama host (default: http://localhost:11434)
`);
  process.exit(0);
}

// Run the analyzer
main();