linkedout/linkedin-parser/index.js

#!/usr/bin/env node

/**
 * LinkedIn Parser - Refactored
 *
 * Uses core-parser for browser management and linkedin-strategy for parsing logic
 */

const path = require("path");
const fs = require("fs");
const CoreParser = require("../core-parser");
const { linkedinStrategy } = require("./strategies/linkedin-strategy");
const { logger, analyzeBatch, checkOllamaStatus, DEFAULT_MODEL } = require("ai-analyzer");

// Load environment variables - check both linkedin-parser/.env and root .env
const localEnvPath = path.join(__dirname, ".env");
const rootEnvPath = path.join(__dirname, "..", ".env");

// Try local .env first, then root .env
if (fs.existsSync(localEnvPath)) {
  require("dotenv").config({ path: localEnvPath });
} else if (fs.existsSync(rootEnvPath)) {
  require("dotenv").config({ path: rootEnvPath });
} else {
  // Try default dotenv behavior (looks in current directory and parent directories)
  require("dotenv").config();
}

// Configuration from environment
const LINKEDIN_USERNAME = process.env.LINKEDIN_USERNAME;
const LINKEDIN_PASSWORD = process.env.LINKEDIN_PASSWORD;
const HEADLESS = process.env.HEADLESS !== "false";
const SEARCH_KEYWORDS =
  process.env.SEARCH_KEYWORDS || "layoff,downsizing";//,job cuts";
const LOCATION_FILTER = process.env.LOCATION_FILTER;
const ENABLE_AI_ANALYSIS = process.env.ENABLE_AI_ANALYSIS !== "false";
const AI_CONTEXT = process.env.AI_CONTEXT || "job market analysis and trends";
const OLLAMA_MODEL = process.env.OLLAMA_MODEL || DEFAULT_MODEL;
const MAX_RESULTS = parseInt(process.env.MAX_RESULTS) || 50;

/**
 * Main LinkedIn parser function
 */
async function startLinkedInParser(options = {}) {
  const coreParser = new CoreParser({
    headless: HEADLESS,
    timeout: 30000,
  });

  try {
    logger.step("🚀 LinkedIn Parser Starting...");

    // Validate credentials
    if (!LINKEDIN_USERNAME || !LINKEDIN_PASSWORD) {
      throw new Error(
        "LinkedIn credentials not found. Please set LINKEDIN_USERNAME and LINKEDIN_PASSWORD in .env file"
      );
    }

    // Parse keywords
    const keywords = SEARCH_KEYWORDS.split(",").map((k) => k.trim());
    logger.info(`🔍 Search Keywords: ${keywords.join(", ")}`);
    logger.info(`📍 Location Filter: ${LOCATION_FILTER || "None"}`);
    logger.info(
      `🧠 AI Analysis: ${ENABLE_AI_ANALYSIS ? "Enabled" : "Disabled"}`
    );
    logger.info(`📊 Max Results: ${MAX_RESULTS}`);

    // Run LinkedIn parsing strategy
    const parseResult = await linkedinStrategy(coreParser, {
      keywords,
      locationFilter: LOCATION_FILTER,
      maxResults: MAX_RESULTS,
      credentials: {
        username: LINKEDIN_USERNAME,
        password: LINKEDIN_PASSWORD,
      },
    });

    const { results, rejectedResults, summary } = parseResult;

    // AI Analysis if enabled - embed results into each post
    let resultsWithAI = results;
    let aiAnalysisCompleted = false;
    if (ENABLE_AI_ANALYSIS && results.length > 0) {
      logger.step("🧠 Running AI Analysis...");

      const ollamaAvailable = await checkOllamaStatus(OLLAMA_MODEL);
      if (ollamaAvailable) {
        // Prepare data for analysis (analyzeBatch expects posts with 'text' field)
        const analysisData = results.map((post) => ({
          text: post.text || post.content || "",
          location: post.location || "",
          keyword: post.keyword || "",
          timestamp: post.timestamp || post.extractedAt || "",
        }));

        const analysisResults = await analyzeBatch(
          analysisData,
          AI_CONTEXT,
          OLLAMA_MODEL
        );

        // Embed AI analysis into each result
        resultsWithAI = results.map((post, index) => {
          const aiResult = analysisResults[index];
          return {
            ...post,
            aiAnalysis: {
              isRelevant: aiResult.isRelevant,
              confidence: aiResult.confidence,
              reasoning: aiResult.reasoning,
              context: AI_CONTEXT,
              model: OLLAMA_MODEL,
              analyzedAt: new Date().toISOString(),
            },
          };
        });

        aiAnalysisCompleted = true;
        logger.success(`✅ AI Analysis completed for ${results.length} posts`);
      } else {
        logger.warning("⚠️  Ollama not available, skipping AI analysis");
      }
    }

    // Prepare results with embedded AI analysis
    const outputData = {
      metadata: {
        timestamp: new Date().toISOString(),
        totalPosts: resultsWithAI.length,
        rejectedPosts: rejectedResults.length,
        aiAnalysisEnabled: ENABLE_AI_ANALYSIS,
        aiAnalysisCompleted: aiAnalysisCompleted,
        aiContext: aiAnalysisCompleted ? AI_CONTEXT : undefined,
        aiModel: aiAnalysisCompleted ? OLLAMA_MODEL : undefined,
        locationFilter: LOCATION_FILTER || undefined,
        parser: "linkedin-parser",
        version: "2.0.0",
      },
      results: resultsWithAI,
    };

    // Prepare rejected posts file
    const rejectedData = rejectedResults.map((post) => ({
      rejected: true,
      reason: post.rejectionReason || "Location filter failed: Location not in filter",
      keyword: post.keyword,
      text: post.text || post.content,
      profileLink: post.profileLink || post.authorUrl,
      location: post.location || post.profileLocation,
      timestamp: post.timestamp || post.extractedAt,
    }));

    const resultsDir = path.join(__dirname, "results");
    if (!fs.existsSync(resultsDir)) {
      fs.mkdirSync(resultsDir, { recursive: true });
    }

    const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
    const resultsFilename = `linkedin-results-${timestamp}.json`;
    const rejectedFilename = `linkedin-rejected-${timestamp}.json`;
    const resultsFilepath = path.join(resultsDir, resultsFilename);
    const rejectedFilepath = path.join(resultsDir, rejectedFilename);

    // Save results with AI analysis
    fs.writeFileSync(resultsFilepath, JSON.stringify(outputData, null, 2));

    // Save rejected posts separately
    if (rejectedData.length > 0) {
      fs.writeFileSync(
        rejectedFilepath,
        JSON.stringify(rejectedData, null, 2)
      );
    }

    // Final summary
    logger.success("✅ LinkedIn parsing completed successfully!");
    logger.info(`📊 Total posts found: ${resultsWithAI.length}`);
    logger.info(`❌ Total rejected: ${rejectedResults.length}`);
    logger.info(`📁 Results saved to: ${resultsFilepath}`);
    if (rejectedData.length > 0) {
      logger.info(`📁 Rejected posts saved to: ${rejectedFilepath}`);
    }

    return outputData;
  } catch (error) {
    logger.error(`❌ LinkedIn parser failed: ${error.message}`);
    throw error;
  } finally {
    await coreParser.cleanup();
  }
}

// CLI handling
if (require.main === module) {
  const args = process.argv.slice(2);
  const options = {};

  // Parse command line arguments
  args.forEach((arg) => {
    if (arg.startsWith("--")) {
      const [key, value] = arg.slice(2).split("=");
      options[key] = value || true;
    }
  });

  startLinkedInParser(options)
    .then(() => process.exit(0))
    .catch((error) => {
      console.error("Fatal error:", error.message);
      process.exit(1);
    });
}

module.exports = { startLinkedInParser };