230 lines
6.5 KiB
JavaScript
230 lines
6.5 KiB
JavaScript
#!/usr/bin/env node
|
|
|
|
/**
|
|
* Job Search Parser - Refactored
|
|
*
|
|
* Uses core-parser for browser management and site-specific strategies for parsing logic
|
|
*/
|
|
|
|
const path = require("path");
|
|
const fs = require("fs");
|
|
const CoreParser = require("../core-parser");
|
|
const { skipthedriveStrategy } = require("./strategies/skipthedrive-strategy");
|
|
const { logger, analyzeBatch, checkOllamaStatus } = require("ai-analyzer");
|
|
|
|
// Load environment variables
|
|
require("dotenv").config({ path: path.join(__dirname, ".env") });
|
|
|
|
// Configuration from environment
|
|
const HEADLESS = process.env.HEADLESS !== "false";
|
|
const SEARCH_KEYWORDS =
|
|
process.env.SEARCH_KEYWORDS || "software engineer,developer,programmer";
|
|
const LOCATION_FILTER = process.env.LOCATION_FILTER;
|
|
const ENABLE_AI_ANALYSIS = process.env.ENABLE_AI_ANALYSIS === "true";
|
|
const MAX_PAGES = parseInt(process.env.MAX_PAGES) || 5;
|
|
|
|
// Available site strategies
|
|
const SITE_STRATEGIES = {
|
|
skipthedrive: skipthedriveStrategy,
|
|
// Add more site strategies here
|
|
// indeed: indeedStrategy,
|
|
// glassdoor: glassdoorStrategy,
|
|
};
|
|
|
|
/**
|
|
* Parse command line arguments
|
|
*/
|
|
function parseArguments() {
|
|
const args = process.argv.slice(2);
|
|
const options = {
|
|
sites: ["skipthedrive"], // default
|
|
keywords: null,
|
|
locationFilter: null,
|
|
maxPages: MAX_PAGES,
|
|
};
|
|
|
|
args.forEach((arg) => {
|
|
if (arg.startsWith("--sites=")) {
|
|
options.sites = arg
|
|
.split("=")[1]
|
|
.split(",")
|
|
.map((s) => s.trim());
|
|
} else if (arg.startsWith("--keywords=")) {
|
|
options.keywords = arg
|
|
.split("=")[1]
|
|
.split(",")
|
|
.map((k) => k.trim());
|
|
} else if (arg.startsWith("--location=")) {
|
|
options.locationFilter = arg.split("=")[1];
|
|
} else if (arg.startsWith("--max-pages=")) {
|
|
options.maxPages = parseInt(arg.split("=")[1]) || MAX_PAGES;
|
|
}
|
|
});
|
|
|
|
return options;
|
|
}
|
|
|
|
/**
|
|
* Main job search parser function
|
|
*/
|
|
async function startJobSearchParser(options = {}) {
|
|
const cliOptions = parseArguments();
|
|
const finalOptions = { ...cliOptions, ...options };
|
|
|
|
const coreParser = new CoreParser({
|
|
headless: HEADLESS,
|
|
timeout: 30000,
|
|
});
|
|
|
|
try {
|
|
logger.step("🚀 Job Search Parser Starting...");
|
|
|
|
// Parse keywords
|
|
const keywords =
|
|
finalOptions.keywords || SEARCH_KEYWORDS.split(",").map((k) => k.trim());
|
|
const locationFilter = finalOptions.locationFilter || LOCATION_FILTER;
|
|
const sites = finalOptions.sites;
|
|
|
|
logger.info(`📦 Selected job sites: ${sites.join(", ")}`);
|
|
logger.info(`🔍 Search Keywords: ${keywords.join(", ")}`);
|
|
logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
|
|
logger.info(
|
|
`🧠 AI Analysis: ${ENABLE_AI_ANALYSIS ? "Enabled" : "Disabled"}`
|
|
);
|
|
|
|
const allResults = [];
|
|
const allRejectedResults = [];
|
|
const siteResults = {};
|
|
|
|
// Process each selected site
|
|
for (const site of sites) {
|
|
const strategy = SITE_STRATEGIES[site];
|
|
if (!strategy) {
|
|
logger.error(`❌ Unknown site strategy: ${site}`);
|
|
continue;
|
|
}
|
|
|
|
try {
|
|
logger.step(`\n🌐 Parsing ${site}...`);
|
|
const startTime = Date.now();
|
|
|
|
const parseResult = await strategy(coreParser, {
|
|
keywords,
|
|
locationFilter,
|
|
maxPages: finalOptions.maxPages,
|
|
});
|
|
|
|
const { results, rejectedResults, summary } = parseResult;
|
|
const duration = ((Date.now() - startTime) / 1000).toFixed(2);
|
|
|
|
// Collect results
|
|
allResults.push(...results);
|
|
allRejectedResults.push(...rejectedResults);
|
|
|
|
siteResults[site] = {
|
|
count: results.length,
|
|
rejected: rejectedResults.length,
|
|
duration: `${duration}s`,
|
|
summary,
|
|
};
|
|
|
|
logger.success(
|
|
`✅ ${site} completed in ${duration}s - Found ${results.length} jobs`
|
|
);
|
|
} catch (error) {
|
|
logger.error(`❌ ${site} parsing failed: ${error.message}`);
|
|
siteResults[site] = {
|
|
count: 0,
|
|
rejected: 0,
|
|
duration: "0s",
|
|
error: error.message,
|
|
};
|
|
}
|
|
}
|
|
|
|
// AI Analysis if enabled
|
|
let analysisResults = null;
|
|
if (ENABLE_AI_ANALYSIS && allResults.length > 0) {
|
|
logger.step("🧠 Running AI Analysis...");
|
|
|
|
const ollamaStatus = await checkOllamaStatus();
|
|
if (ollamaStatus.available) {
|
|
analysisResults = await analyzeBatch(allResults, {
|
|
context:
|
|
"Job market analysis focusing on job postings, skills, and trends",
|
|
});
|
|
logger.success(
|
|
`✅ AI Analysis completed for ${allResults.length} jobs`
|
|
);
|
|
} else {
|
|
logger.warning("⚠️ Ollama not available, skipping AI analysis");
|
|
}
|
|
}
|
|
|
|
// Save results
|
|
const outputData = {
|
|
metadata: {
|
|
extractedAt: new Date().toISOString(),
|
|
parser: "job-search-parser",
|
|
version: "2.0.0",
|
|
sites: sites,
|
|
keywords: keywords.join(", "),
|
|
locationFilter,
|
|
analysisResults,
|
|
},
|
|
results: allResults,
|
|
rejectedResults: allRejectedResults,
|
|
siteResults,
|
|
};
|
|
|
|
const resultsDir = path.join(__dirname, "results");
|
|
if (!fs.existsSync(resultsDir)) {
|
|
fs.mkdirSync(resultsDir, { recursive: true });
|
|
}
|
|
|
|
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
|
|
const filename = `job-search-results-${timestamp}.json`;
|
|
const filepath = path.join(resultsDir, filename);
|
|
|
|
fs.writeFileSync(filepath, JSON.stringify(outputData, null, 2));
|
|
|
|
// Final summary
|
|
logger.step("\n📊 Job Search Parser Summary");
|
|
logger.success(`✅ Total jobs found: ${allResults.length}`);
|
|
logger.info(`❌ Total rejected: ${allRejectedResults.length}`);
|
|
logger.info(`📁 Results saved to: ${filepath}`);
|
|
|
|
logger.info("\n📈 Results by site:");
|
|
for (const [site, stats] of Object.entries(siteResults)) {
|
|
if (stats.error) {
|
|
logger.error(` ${site}: ERROR - ${stats.error}`);
|
|
} else {
|
|
logger.info(
|
|
` ${site}: ${stats.count} jobs found, ${stats.rejected} rejected (${stats.duration})`
|
|
);
|
|
}
|
|
}
|
|
|
|
logger.success("\n✅ Job Search Parser completed successfully!");
|
|
|
|
return outputData;
|
|
} catch (error) {
|
|
logger.error(`❌ Job Search Parser failed: ${error.message}`);
|
|
throw error;
|
|
} finally {
|
|
await coreParser.cleanup();
|
|
}
|
|
}
|
|
|
|
// CLI handling
|
|
if (require.main === module) {
|
|
startJobSearchParser()
|
|
.then(() => process.exit(0))
|
|
.catch((error) => {
|
|
console.error("Fatal error:", error.message);
|
|
process.exit(1);
|
|
});
|
|
}
|
|
|
|
module.exports = { startJobSearchParser };
|