#!/usr/bin/env node /** * Job Search Parser - Refactored * * Uses core-parser for browser management and site-specific strategies for parsing logic */ const path = require("path"); const fs = require("fs"); const CoreParser = require("../core-parser"); const { skipthedriveStrategy } = require("./strategies/skipthedrive-strategy"); const { logger, analyzeBatch, checkOllamaStatus } = require("ai-analyzer"); // Load environment variables require("dotenv").config({ path: path.join(__dirname, ".env") }); // Configuration from environment const HEADLESS = process.env.HEADLESS !== "false"; const SEARCH_KEYWORDS = process.env.SEARCH_KEYWORDS || "software engineer,developer,programmer"; const LOCATION_FILTER = process.env.LOCATION_FILTER; const ENABLE_AI_ANALYSIS = process.env.ENABLE_AI_ANALYSIS === "true"; const MAX_PAGES = parseInt(process.env.MAX_PAGES) || 5; // Available site strategies const SITE_STRATEGIES = { skipthedrive: skipthedriveStrategy, // Add more site strategies here // indeed: indeedStrategy, // glassdoor: glassdoorStrategy, }; /** * Parse command line arguments */ function parseArguments() { const args = process.argv.slice(2); const options = { sites: ["skipthedrive"], // default keywords: null, locationFilter: null, maxPages: MAX_PAGES, }; args.forEach((arg) => { if (arg.startsWith("--sites=")) { options.sites = arg .split("=")[1] .split(",") .map((s) => s.trim()); } else if (arg.startsWith("--keywords=")) { options.keywords = arg .split("=")[1] .split(",") .map((k) => k.trim()); } else if (arg.startsWith("--location=")) { options.locationFilter = arg.split("=")[1]; } else if (arg.startsWith("--max-pages=")) { options.maxPages = parseInt(arg.split("=")[1]) || MAX_PAGES; } }); return options; } /** * Main job search parser function */ async function startJobSearchParser(options = {}) { const cliOptions = parseArguments(); const finalOptions = { ...cliOptions, ...options }; const coreParser = new CoreParser({ headless: HEADLESS, timeout: 30000, }); try { logger.step("šŸš€ Job Search Parser Starting..."); // Parse keywords const keywords = finalOptions.keywords || SEARCH_KEYWORDS.split(",").map((k) => k.trim()); const locationFilter = finalOptions.locationFilter || LOCATION_FILTER; const sites = finalOptions.sites; logger.info(`šŸ“¦ Selected job sites: ${sites.join(", ")}`); logger.info(`šŸ” Search Keywords: ${keywords.join(", ")}`); logger.info(`šŸ“ Location Filter: ${locationFilter || "None"}`); logger.info( `🧠 AI Analysis: ${ENABLE_AI_ANALYSIS ? "Enabled" : "Disabled"}` ); const allResults = []; const allRejectedResults = []; const siteResults = {}; // Process each selected site for (const site of sites) { const strategy = SITE_STRATEGIES[site]; if (!strategy) { logger.error(`āŒ Unknown site strategy: ${site}`); continue; } try { logger.step(`\n🌐 Parsing ${site}...`); const startTime = Date.now(); const parseResult = await strategy(coreParser, { keywords, locationFilter, maxPages: finalOptions.maxPages, }); const { results, rejectedResults, summary } = parseResult; const duration = ((Date.now() - startTime) / 1000).toFixed(2); // Collect results allResults.push(...results); allRejectedResults.push(...rejectedResults); siteResults[site] = { count: results.length, rejected: rejectedResults.length, duration: `${duration}s`, summary, }; logger.success( `āœ… ${site} completed in ${duration}s - Found ${results.length} jobs` ); } catch (error) { logger.error(`āŒ ${site} parsing failed: ${error.message}`); siteResults[site] = { count: 0, rejected: 0, duration: "0s", error: error.message, }; } } // AI Analysis if enabled let analysisResults = null; if (ENABLE_AI_ANALYSIS && allResults.length > 0) { logger.step("🧠 Running AI Analysis..."); const ollamaStatus = await checkOllamaStatus(); if (ollamaStatus.available) { analysisResults = await analyzeBatch(allResults, { context: "Job market analysis focusing on job postings, skills, and trends", }); logger.success( `āœ… AI Analysis completed for ${allResults.length} jobs` ); } else { logger.warning("āš ļø Ollama not available, skipping AI analysis"); } } // Save results const outputData = { metadata: { extractedAt: new Date().toISOString(), parser: "job-search-parser", version: "2.0.0", sites: sites, keywords: keywords.join(", "), locationFilter, analysisResults, }, results: allResults, rejectedResults: allRejectedResults, siteResults, }; const resultsDir = path.join(__dirname, "results"); if (!fs.existsSync(resultsDir)) { fs.mkdirSync(resultsDir, { recursive: true }); } const timestamp = new Date().toISOString().replace(/[:.]/g, "-"); const filename = `job-search-results-${timestamp}.json`; const filepath = path.join(resultsDir, filename); fs.writeFileSync(filepath, JSON.stringify(outputData, null, 2)); // Final summary logger.step("\nšŸ“Š Job Search Parser Summary"); logger.success(`āœ… Total jobs found: ${allResults.length}`); logger.info(`āŒ Total rejected: ${allRejectedResults.length}`); logger.info(`šŸ“ Results saved to: ${filepath}`); logger.info("\nšŸ“ˆ Results by site:"); for (const [site, stats] of Object.entries(siteResults)) { if (stats.error) { logger.error(` ${site}: ERROR - ${stats.error}`); } else { logger.info( ` ${site}: ${stats.count} jobs found, ${stats.rejected} rejected (${stats.duration})` ); } } logger.success("\nāœ… Job Search Parser completed successfully!"); return outputData; } catch (error) { logger.error(`āŒ Job Search Parser failed: ${error.message}`); throw error; } finally { await coreParser.cleanup(); } } // CLI handling if (require.main === module) { startJobSearchParser() .then(() => process.exit(0)) .catch((error) => { console.error("Fatal error:", error.message); process.exit(1); }); } module.exports = { startJobSearchParser };