#!/usr/bin/env node /** * LinkedIn Parser - Refactored * * Uses core-parser for browser management and linkedin-strategy for parsing logic */ const path = require("path"); const fs = require("fs"); const CoreParser = require("../core-parser"); const { linkedinStrategy } = require("./strategies/linkedin-strategy"); const { logger, analyzeBatch, checkOllamaStatus, DEFAULT_MODEL } = require("ai-analyzer"); // Load environment variables - check both linkedin-parser/.env and root .env const localEnvPath = path.join(__dirname, ".env"); const rootEnvPath = path.join(__dirname, "..", ".env"); // Try local .env first, then root .env if (fs.existsSync(localEnvPath)) { require("dotenv").config({ path: localEnvPath }); } else if (fs.existsSync(rootEnvPath)) { require("dotenv").config({ path: rootEnvPath }); } else { // Try default dotenv behavior (looks in current directory and parent directories) require("dotenv").config(); } // Configuration from environment const LINKEDIN_USERNAME = process.env.LINKEDIN_USERNAME; const LINKEDIN_PASSWORD = process.env.LINKEDIN_PASSWORD; const HEADLESS = process.env.HEADLESS !== "false"; const SEARCH_KEYWORDS = process.env.SEARCH_KEYWORDS || "layoff";//,downsizing";//,job cuts"; const LOCATION_FILTER = process.env.LOCATION_FILTER; const ENABLE_AI_ANALYSIS = process.env.ENABLE_AI_ANALYSIS !== "false"; const AI_CONTEXT = process.env.AI_CONTEXT || "job market analysis and trends"; const OLLAMA_MODEL = process.env.OLLAMA_MODEL || DEFAULT_MODEL; const MAX_RESULTS = parseInt(process.env.MAX_RESULTS) || 50; const EXTRACT_LOCATION_FROM_PROFILE = process.env.EXTRACT_LOCATION_FROM_PROFILE === "true"; /** * Main LinkedIn parser function */ async function startLinkedInParser(options = {}) { const coreParser = new CoreParser({ headless: HEADLESS, timeout: 30000, }); try { logger.step("🚀 LinkedIn Parser Starting..."); // Validate credentials if (!LINKEDIN_USERNAME || !LINKEDIN_PASSWORD) { throw new Error( "LinkedIn credentials not found. Please set LINKEDIN_USERNAME and LINKEDIN_PASSWORD in .env file" ); } // Parse keywords const keywords = SEARCH_KEYWORDS.split(",").map((k) => k.trim()); logger.info(`🔍 Search Keywords: ${keywords.join(", ")}`); logger.info(`📍 Location Filter: ${LOCATION_FILTER || "None"}`); logger.info( `🧠 AI Analysis: ${ENABLE_AI_ANALYSIS ? "Enabled" : "Disabled"}` ); logger.info(`📊 Max Results: ${MAX_RESULTS}`); // Run LinkedIn parsing strategy const parseResult = await linkedinStrategy(coreParser, { keywords, locationFilter: LOCATION_FILTER, maxResults: MAX_RESULTS, extractLocationFromProfile: EXTRACT_LOCATION_FROM_PROFILE, credentials: { username: LINKEDIN_USERNAME, password: LINKEDIN_PASSWORD, }, }); const { results, rejectedResults, summary } = parseResult; // AI Analysis if enabled - embed results into each post let resultsWithAI = results; let aiAnalysisCompleted = false; if (ENABLE_AI_ANALYSIS && results.length > 0) { logger.step("🧠 Running AI Analysis..."); const ollamaAvailable = await checkOllamaStatus(OLLAMA_MODEL); if (ollamaAvailable) { // Prepare data for analysis (analyzeBatch expects posts with 'text' field) const analysisData = results.map((post) => ({ text: post.text || post.content || "", location: post.location || "", keyword: post.keyword || "", timestamp: post.timestamp || post.extractedAt || "", })); const analysisResults = await analyzeBatch( analysisData, AI_CONTEXT, OLLAMA_MODEL ); // Embed AI analysis into each result resultsWithAI = results.map((post, index) => { const aiResult = analysisResults[index]; return { ...post, aiAnalysis: { isRelevant: aiResult.isRelevant, confidence: aiResult.confidence, reasoning: aiResult.reasoning, context: AI_CONTEXT, model: OLLAMA_MODEL, analyzedAt: new Date().toISOString(), }, }; }); aiAnalysisCompleted = true; logger.success(`✅ AI Analysis completed for ${results.length} posts`); } else { logger.warning("⚠️ Ollama not available, skipping AI analysis"); } } // Prepare results with embedded AI analysis const outputData = { metadata: { timestamp: new Date().toISOString(), totalPosts: resultsWithAI.length, rejectedPosts: rejectedResults.length, aiAnalysisEnabled: ENABLE_AI_ANALYSIS, aiAnalysisCompleted: aiAnalysisCompleted, aiContext: aiAnalysisCompleted ? AI_CONTEXT : undefined, aiModel: aiAnalysisCompleted ? OLLAMA_MODEL : undefined, locationFilter: LOCATION_FILTER || undefined, parser: "linkedin-parser", version: "2.0.0", }, results: resultsWithAI, }; // Prepare rejected posts file const rejectedData = rejectedResults.map((post) => ({ rejected: true, reason: post.rejectionReason || "Location filter failed: Location not in filter", keyword: post.keyword, text: post.text || post.content, profileLink: post.profileLink || post.authorUrl, location: post.location || post.profileLocation, timestamp: post.timestamp || post.extractedAt, })); const resultsDir = path.join(__dirname, "results"); if (!fs.existsSync(resultsDir)) { fs.mkdirSync(resultsDir, { recursive: true }); } const timestamp = new Date().toISOString().replace(/[:.]/g, "-"); const resultsFilename = `linkedin-results-${timestamp}.json`; const rejectedFilename = `linkedin-rejected-${timestamp}.json`; const resultsFilepath = path.join(resultsDir, resultsFilename); const rejectedFilepath = path.join(resultsDir, rejectedFilename); // Save results with AI analysis fs.writeFileSync(resultsFilepath, JSON.stringify(outputData, null, 2)); // Save rejected posts separately if (rejectedData.length > 0) { fs.writeFileSync( rejectedFilepath, JSON.stringify(rejectedData, null, 2) ); } // Final summary logger.success("✅ LinkedIn parsing completed successfully!"); logger.info(`📊 Total posts found: ${resultsWithAI.length}`); logger.info(`❌ Total rejected: ${rejectedResults.length}`); logger.info(`📁 Results saved to: ${resultsFilepath}`); if (rejectedData.length > 0) { logger.info(`📁 Rejected posts saved to: ${rejectedFilepath}`); } return outputData; } catch (error) { logger.error(`❌ LinkedIn parser failed: ${error.message}`); throw error; } finally { await coreParser.cleanup(); } } // CLI handling if (require.main === module) { const args = process.argv.slice(2); const options = {}; // Parse command line arguments args.forEach((arg) => { if (arg.startsWith("--")) { const [key, value] = arg.slice(2).split("="); options[key] = value || true; } }); startLinkedInParser(options) .then(() => process.exit(0)) .catch((error) => { console.error("Fatal error:", error.message); process.exit(1); }); } module.exports = { startLinkedInParser };