linkedout/ai-analyzer/cli.js

#!/usr/bin/env node

/**
 * AI Analyzer CLI
 *
 * Command-line interface for the ai-analyzer package
 * Can be used by any parser to analyze JSON files
 */

const fs = require("fs");
const path = require("path");

// Import AI utilities from this package
const {
  logger,
  analyzeBatch,
  checkOllamaStatus,
  findLatestResultsFile,
} = require("./index");

// Default configuration
const DEFAULT_CONTEXT =
  process.env.AI_CONTEXT || "job market analysis and trends";
const DEFAULT_MODEL = process.env.OLLAMA_MODEL || "mistral";
const DEFAULT_RESULTS_DIR = "results";

// Parse command line arguments
const args = process.argv.slice(2);
let inputFile = null;
let outputFile = null;
let context = DEFAULT_CONTEXT;
let model = DEFAULT_MODEL;
let findLatest = false;
let resultsDir = DEFAULT_RESULTS_DIR;

for (const arg of args) {
  if (arg.startsWith("--input=")) {
    inputFile = arg.split("=")[1];
  } else if (arg.startsWith("--output=")) {
    outputFile = arg.split("=")[1];
  } else if (arg.startsWith("--context=")) {
    context = arg.split("=")[1];
  } else if (arg.startsWith("--model=")) {
    model = arg.split("=")[1];
  } else if (arg.startsWith("--dir=")) {
    resultsDir = arg.split("=")[1];
  } else if (arg === "--latest") {
    findLatest = true;
  } else if (arg === "--help" || arg === "-h") {
    console.log(`
AI Analyzer CLI

Usage: node cli.js [options]

Options:
  --input=FILE              Input JSON file
  --output=FILE             Output file (default: ai-analysis-{timestamp}.json)
  --context="description"   Analysis context (default: "${DEFAULT_CONTEXT}")
  --model=MODEL             Ollama model (default: ${DEFAULT_MODEL})
  --latest                  Use latest results file from results directory
  --dir=PATH                Directory to look for results (default: 'results')
  --help, -h                Show this help

Examples:
  node cli.js --input=results.json
  node cli.js --latest --dir=results
  node cli.js --input=results.json --context="job trends" --model=mistral

Environment Variables:
  AI_CONTEXT                Default analysis context
  OLLAMA_MODEL              Default Ollama model
`);
    process.exit(0);
  }
}

async function main() {
  try {
    // Determine input file
    if (findLatest) {
      try {
        inputFile = findLatestResultsFile(resultsDir);
        logger.info(`Found latest results file: ${inputFile}`);
      } catch (error) {
        logger.error(
          `❌ No results files found in '${resultsDir}': ${error.message}`
        );
        logger.info(`💡 To create results files:`);
        logger.info(
          `   1. Run a parser first (e.g., npm start in linkedin-parser)`
        );
        logger.info(`   2. Or provide a specific file with --input=FILE`);
        logger.info(`   3. Or create a sample JSON file to test with`);
        process.exit(1);
      }
    }

    // If inputFile is a relative path and --dir is set, resolve it
    if (inputFile && !path.isAbsolute(inputFile) && !fs.existsSync(inputFile)) {
      const candidate = path.join(resultsDir, inputFile);
      if (fs.existsSync(candidate)) {
        inputFile = candidate;
      }
    }

    if (!inputFile) {
      logger.error("❌ Input file required. Use --input=FILE or --latest");
      logger.info(`💡 Examples:`);
      logger.info(`   node cli.js --input=results.json`);
      logger.info(`   node cli.js --latest --dir=results`);
      logger.info(`   node cli.js --help`);
      process.exit(1);
    }

    // Load input file
    logger.step(`Loading input file: ${inputFile}`);

    if (!fs.existsSync(inputFile)) {
      throw new Error(`Input file not found: ${inputFile}`);
    }

    const data = JSON.parse(fs.readFileSync(inputFile, "utf-8"));

    // Extract posts from different formats
    let posts = [];
    if (data.results && Array.isArray(data.results)) {
      posts = data.results;
      logger.info(`Found ${posts.length} items in results array`);
    } else if (Array.isArray(data)) {
      posts = data;
      logger.info(`Found ${posts.length} items in array`);
    } else {
      throw new Error("Invalid JSON format - need array or {results: [...]}");
    }

    if (posts.length === 0) {
      throw new Error("No items found to analyze");
    }

    // Check AI availability
    logger.step("Checking AI availability");
    const aiAvailable = await checkOllamaStatus(model);
    if (!aiAvailable) {
      throw new Error(
        `AI not available. Make sure Ollama is running and model '${model}' is installed.`
      );
    }

    // Check if results already have AI analysis
    const hasExistingAI = posts.some((post) => post.aiAnalysis);
    if (hasExistingAI) {
      logger.info(
        `📋 Results already contain AI analysis - will update with new context`
      );
    }

    // Prepare data for analysis
    const analysisData = posts.map((post, i) => ({
      text: post.text || post.content || post.post || "",
      location: post.location || "Unknown",
      keyword: post.keyword || "Unknown",
      timestamp: post.timestamp || new Date().toISOString(),
    }));

    // Run analysis
    logger.step(`Running AI analysis with context: "${context}"`);
    const analysis = await analyzeBatch(analysisData, context, model);

    // Integrate AI analysis back into the original results
    const updatedPosts = posts.map((post, index) => {
      const aiResult = analysis[index];
      return {
        ...post,
        aiAnalysis: {
          isRelevant: aiResult.isRelevant,
          confidence: aiResult.confidence,
          reasoning: aiResult.reasoning,
          context: context,
          model: model,
          analyzedAt: new Date().toISOString(),
        },
      };
    });

    // Update the original data structure
    if (data.results && Array.isArray(data.results)) {
      data.results = updatedPosts;
      // Update metadata
      data.metadata = data.metadata || {};
      data.metadata.aiAnalysisUpdated = new Date().toISOString();
      data.metadata.aiContext = context;
      data.metadata.aiModel = model;
    } else {
      // If it's a simple array, create a proper structure
      data = {
        metadata: {
          timestamp: new Date().toISOString(),
          totalItems: updatedPosts.length,
          aiContext: context,
          aiModel: model,
          analysisType: "cli",
        },
        results: updatedPosts,
      };
    }

    // Generate output filename if not provided
    if (!outputFile) {
      // Use the original filename with -ai suffix
      const originalName = path.basename(inputFile, path.extname(inputFile));
      outputFile = path.join(
        path.dirname(inputFile),
        `${originalName}-ai.json`
      );
    }

    // Save updated results back to file
    fs.writeFileSync(outputFile, JSON.stringify(data, null, 2));

    // Show summary
    const relevant = analysis.filter((a) => a.isRelevant).length;
    const irrelevant = analysis.filter((a) => !a.isRelevant).length;
    const avgConfidence =
      analysis.reduce((sum, a) => sum + a.confidence, 0) / analysis.length;

    logger.success("✅ AI analysis completed and integrated");
    logger.info(`📊 Context: "${context}"`);
    logger.info(`📈 Total items analyzed: ${analysis.length}`);
    logger.info(
      `✅ Relevant items: ${relevant} (${(
        (relevant / analysis.length) *
        100
      ).toFixed(1)}%)`
    );
    logger.info(
      `❌ Irrelevant items: ${irrelevant} (${(
        (irrelevant / analysis.length) *
        100
      ).toFixed(1)}%)`
    );
    logger.info(`🎯 Average confidence: ${avgConfidence.toFixed(2)}`);
    logger.file(`🧠 Updated results saved to: ${outputFile}`);
  } catch (error) {
    logger.error(`❌ Analysis failed: ${error.message}`);
    process.exit(1);
  }
}

// Run the CLI
main();