update CoreParser to increase default timeout and change navigation waitUntil option to networkidle
This commit is contained in:
parent
ef9720abf2
commit
83ed86668e
@ -1,250 +1,250 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* AI Analyzer CLI
|
||||
*
|
||||
* Command-line interface for the ai-analyzer package
|
||||
* Can be used by any parser to analyze JSON files
|
||||
*/
|
||||
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
|
||||
// Import AI utilities from this package
|
||||
const {
|
||||
logger,
|
||||
analyzeBatch,
|
||||
checkOllamaStatus,
|
||||
findLatestResultsFile,
|
||||
} = require("./index");
|
||||
|
||||
// Default configuration
|
||||
const DEFAULT_CONTEXT =
|
||||
process.env.AI_CONTEXT || "job market analysis and trends";
|
||||
const DEFAULT_MODEL = process.env.OLLAMA_MODEL || "mistral";
|
||||
const DEFAULT_RESULTS_DIR = "results";
|
||||
|
||||
// Parse command line arguments
|
||||
const args = process.argv.slice(2);
|
||||
let inputFile = null;
|
||||
let outputFile = null;
|
||||
let context = DEFAULT_CONTEXT;
|
||||
let model = DEFAULT_MODEL;
|
||||
let findLatest = false;
|
||||
let resultsDir = DEFAULT_RESULTS_DIR;
|
||||
|
||||
for (const arg of args) {
|
||||
if (arg.startsWith("--input=")) {
|
||||
inputFile = arg.split("=")[1];
|
||||
} else if (arg.startsWith("--output=")) {
|
||||
outputFile = arg.split("=")[1];
|
||||
} else if (arg.startsWith("--context=")) {
|
||||
context = arg.split("=")[1];
|
||||
} else if (arg.startsWith("--model=")) {
|
||||
model = arg.split("=")[1];
|
||||
} else if (arg.startsWith("--dir=")) {
|
||||
resultsDir = arg.split("=")[1];
|
||||
} else if (arg === "--latest") {
|
||||
findLatest = true;
|
||||
} else if (arg === "--help" || arg === "-h") {
|
||||
console.log(`
|
||||
AI Analyzer CLI
|
||||
|
||||
Usage: node cli.js [options]
|
||||
|
||||
Options:
|
||||
--input=FILE Input JSON file
|
||||
--output=FILE Output file (default: ai-analysis-{timestamp}.json)
|
||||
--context="description" Analysis context (default: "${DEFAULT_CONTEXT}")
|
||||
--model=MODEL Ollama model (default: ${DEFAULT_MODEL})
|
||||
--latest Use latest results file from results directory
|
||||
--dir=PATH Directory to look for results (default: 'results')
|
||||
--help, -h Show this help
|
||||
|
||||
Examples:
|
||||
node cli.js --input=results.json
|
||||
node cli.js --latest --dir=results
|
||||
node cli.js --input=results.json --context="job trends" --model=mistral
|
||||
|
||||
Environment Variables:
|
||||
AI_CONTEXT Default analysis context
|
||||
OLLAMA_MODEL Default Ollama model
|
||||
`);
|
||||
process.exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
try {
|
||||
// Determine input file
|
||||
if (findLatest) {
|
||||
try {
|
||||
inputFile = findLatestResultsFile(resultsDir);
|
||||
logger.info(`Found latest results file: ${inputFile}`);
|
||||
} catch (error) {
|
||||
logger.error(
|
||||
`❌ No results files found in '${resultsDir}': ${error.message}`
|
||||
);
|
||||
logger.info(`💡 To create results files:`);
|
||||
logger.info(
|
||||
` 1. Run a parser first (e.g., npm start in linkedin-parser)`
|
||||
);
|
||||
logger.info(` 2. Or provide a specific file with --input=FILE`);
|
||||
logger.info(` 3. Or create a sample JSON file to test with`);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// If inputFile is a relative path and --dir is set, resolve it
|
||||
if (inputFile && !path.isAbsolute(inputFile) && !fs.existsSync(inputFile)) {
|
||||
const candidate = path.join(resultsDir, inputFile);
|
||||
if (fs.existsSync(candidate)) {
|
||||
inputFile = candidate;
|
||||
}
|
||||
}
|
||||
|
||||
if (!inputFile) {
|
||||
logger.error("❌ Input file required. Use --input=FILE or --latest");
|
||||
logger.info(`💡 Examples:`);
|
||||
logger.info(` node cli.js --input=results.json`);
|
||||
logger.info(` node cli.js --latest --dir=results`);
|
||||
logger.info(` node cli.js --help`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Load input file
|
||||
logger.step(`Loading input file: ${inputFile}`);
|
||||
|
||||
if (!fs.existsSync(inputFile)) {
|
||||
throw new Error(`Input file not found: ${inputFile}`);
|
||||
}
|
||||
|
||||
const data = JSON.parse(fs.readFileSync(inputFile, "utf-8"));
|
||||
|
||||
// Extract posts from different formats
|
||||
let posts = [];
|
||||
if (data.results && Array.isArray(data.results)) {
|
||||
posts = data.results;
|
||||
logger.info(`Found ${posts.length} items in results array`);
|
||||
} else if (Array.isArray(data)) {
|
||||
posts = data;
|
||||
logger.info(`Found ${posts.length} items in array`);
|
||||
} else {
|
||||
throw new Error("Invalid JSON format - need array or {results: [...]}");
|
||||
}
|
||||
|
||||
if (posts.length === 0) {
|
||||
throw new Error("No items found to analyze");
|
||||
}
|
||||
|
||||
// Check AI availability
|
||||
logger.step("Checking AI availability");
|
||||
const aiAvailable = await checkOllamaStatus(model);
|
||||
if (!aiAvailable) {
|
||||
throw new Error(
|
||||
`AI not available. Make sure Ollama is running and model '${model}' is installed.`
|
||||
);
|
||||
}
|
||||
|
||||
// Check if results already have AI analysis
|
||||
const hasExistingAI = posts.some((post) => post.aiAnalysis);
|
||||
if (hasExistingAI) {
|
||||
logger.info(
|
||||
`📋 Results already contain AI analysis - will update with new context`
|
||||
);
|
||||
}
|
||||
|
||||
// Prepare data for analysis
|
||||
const analysisData = posts.map((post, i) => ({
|
||||
text: post.text || post.content || post.post || "",
|
||||
location: post.location || "Unknown",
|
||||
keyword: post.keyword || "Unknown",
|
||||
timestamp: post.timestamp || new Date().toISOString(),
|
||||
}));
|
||||
|
||||
// Run analysis
|
||||
logger.step(`Running AI analysis with context: "${context}"`);
|
||||
const analysis = await analyzeBatch(analysisData, context, model);
|
||||
|
||||
// Integrate AI analysis back into the original results
|
||||
const updatedPosts = posts.map((post, index) => {
|
||||
const aiResult = analysis[index];
|
||||
return {
|
||||
...post,
|
||||
aiAnalysis: {
|
||||
isRelevant: aiResult.isRelevant,
|
||||
confidence: aiResult.confidence,
|
||||
reasoning: aiResult.reasoning,
|
||||
context: context,
|
||||
model: model,
|
||||
analyzedAt: new Date().toISOString(),
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
// Update the original data structure
|
||||
if (data.results && Array.isArray(data.results)) {
|
||||
data.results = updatedPosts;
|
||||
// Update metadata
|
||||
data.metadata = data.metadata || {};
|
||||
data.metadata.aiAnalysisUpdated = new Date().toISOString();
|
||||
data.metadata.aiContext = context;
|
||||
data.metadata.aiModel = model;
|
||||
} else {
|
||||
// If it's a simple array, create a proper structure
|
||||
data = {
|
||||
metadata: {
|
||||
timestamp: new Date().toISOString(),
|
||||
totalItems: updatedPosts.length,
|
||||
aiContext: context,
|
||||
aiModel: model,
|
||||
analysisType: "cli",
|
||||
},
|
||||
results: updatedPosts,
|
||||
};
|
||||
}
|
||||
|
||||
// Generate output filename if not provided
|
||||
if (!outputFile) {
|
||||
// Use the original filename with -ai suffix
|
||||
const originalName = path.basename(inputFile, path.extname(inputFile));
|
||||
outputFile = path.join(
|
||||
path.dirname(inputFile),
|
||||
`${originalName}-ai.json`
|
||||
);
|
||||
}
|
||||
|
||||
// Save updated results back to file
|
||||
fs.writeFileSync(outputFile, JSON.stringify(data, null, 2));
|
||||
|
||||
// Show summary
|
||||
const relevant = analysis.filter((a) => a.isRelevant).length;
|
||||
const irrelevant = analysis.filter((a) => !a.isRelevant).length;
|
||||
const avgConfidence =
|
||||
analysis.reduce((sum, a) => sum + a.confidence, 0) / analysis.length;
|
||||
|
||||
logger.success("✅ AI analysis completed and integrated");
|
||||
logger.info(`📊 Context: "${context}"`);
|
||||
logger.info(`📈 Total items analyzed: ${analysis.length}`);
|
||||
logger.info(
|
||||
`✅ Relevant items: ${relevant} (${(
|
||||
(relevant / analysis.length) *
|
||||
100
|
||||
).toFixed(1)}%)`
|
||||
);
|
||||
logger.info(
|
||||
`❌ Irrelevant items: ${irrelevant} (${(
|
||||
(irrelevant / analysis.length) *
|
||||
100
|
||||
).toFixed(1)}%)`
|
||||
);
|
||||
logger.info(`🎯 Average confidence: ${avgConfidence.toFixed(2)}`);
|
||||
logger.file(`🧠 Updated results saved to: ${outputFile}`);
|
||||
} catch (error) {
|
||||
logger.error(`❌ Analysis failed: ${error.message}`);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Run the CLI
|
||||
main();
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* AI Analyzer CLI
|
||||
*
|
||||
* Command-line interface for the ai-analyzer package
|
||||
* Can be used by any parser to analyze JSON files
|
||||
*/
|
||||
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
|
||||
// Import AI utilities from this package
|
||||
const {
|
||||
logger,
|
||||
analyzeBatch,
|
||||
checkOllamaStatus,
|
||||
findLatestResultsFile,
|
||||
} = require("./index");
|
||||
|
||||
// Default configuration
|
||||
const DEFAULT_CONTEXT =
|
||||
process.env.AI_CONTEXT || "job market analysis and trends";
|
||||
const DEFAULT_MODEL = process.env.OLLAMA_MODEL || "mistral";
|
||||
const DEFAULT_RESULTS_DIR = "results";
|
||||
|
||||
// Parse command line arguments
|
||||
const args = process.argv.slice(2);
|
||||
let inputFile = null;
|
||||
let outputFile = null;
|
||||
let context = DEFAULT_CONTEXT;
|
||||
let model = DEFAULT_MODEL;
|
||||
let findLatest = false;
|
||||
let resultsDir = DEFAULT_RESULTS_DIR;
|
||||
|
||||
for (const arg of args) {
|
||||
if (arg.startsWith("--input=")) {
|
||||
inputFile = arg.split("=")[1];
|
||||
} else if (arg.startsWith("--output=")) {
|
||||
outputFile = arg.split("=")[1];
|
||||
} else if (arg.startsWith("--context=")) {
|
||||
context = arg.split("=")[1];
|
||||
} else if (arg.startsWith("--model=")) {
|
||||
model = arg.split("=")[1];
|
||||
} else if (arg.startsWith("--dir=")) {
|
||||
resultsDir = arg.split("=")[1];
|
||||
} else if (arg === "--latest") {
|
||||
findLatest = true;
|
||||
} else if (arg === "--help" || arg === "-h") {
|
||||
console.log(`
|
||||
AI Analyzer CLI
|
||||
|
||||
Usage: node cli.js [options]
|
||||
|
||||
Options:
|
||||
--input=FILE Input JSON file
|
||||
--output=FILE Output file (default: ai-analysis-{timestamp}.json)
|
||||
--context="description" Analysis context (default: "${DEFAULT_CONTEXT}")
|
||||
--model=MODEL Ollama model (default: ${DEFAULT_MODEL})
|
||||
--latest Use latest results file from results directory
|
||||
--dir=PATH Directory to look for results (default: 'results')
|
||||
--help, -h Show this help
|
||||
|
||||
Examples:
|
||||
node cli.js --input=results.json
|
||||
node cli.js --latest --dir=results
|
||||
node cli.js --input=results.json --context="job trends" --model=mistral
|
||||
|
||||
Environment Variables:
|
||||
AI_CONTEXT Default analysis context
|
||||
OLLAMA_MODEL Default Ollama model
|
||||
`);
|
||||
process.exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
try {
|
||||
// Determine input file
|
||||
if (findLatest) {
|
||||
try {
|
||||
inputFile = findLatestResultsFile(resultsDir);
|
||||
logger.info(`Found latest results file: ${inputFile}`);
|
||||
} catch (error) {
|
||||
logger.error(
|
||||
`❌ No results files found in '${resultsDir}': ${error.message}`
|
||||
);
|
||||
logger.info(`💡 To create results files:`);
|
||||
logger.info(
|
||||
` 1. Run a parser first (e.g., npm start in linkedin-parser)`
|
||||
);
|
||||
logger.info(` 2. Or provide a specific file with --input=FILE`);
|
||||
logger.info(` 3. Or create a sample JSON file to test with`);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// If inputFile is a relative path and --dir is set, resolve it
|
||||
if (inputFile && !path.isAbsolute(inputFile) && !fs.existsSync(inputFile)) {
|
||||
const candidate = path.join(resultsDir, inputFile);
|
||||
if (fs.existsSync(candidate)) {
|
||||
inputFile = candidate;
|
||||
}
|
||||
}
|
||||
|
||||
if (!inputFile) {
|
||||
logger.error("❌ Input file required. Use --input=FILE or --latest");
|
||||
logger.info(`💡 Examples:`);
|
||||
logger.info(` node cli.js --input=results.json`);
|
||||
logger.info(` node cli.js --latest --dir=results`);
|
||||
logger.info(` node cli.js --help`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Load input file
|
||||
logger.step(`Loading input file: ${inputFile}`);
|
||||
|
||||
if (!fs.existsSync(inputFile)) {
|
||||
throw new Error(`Input file not found: ${inputFile}`);
|
||||
}
|
||||
|
||||
const data = JSON.parse(fs.readFileSync(inputFile, "utf-8"));
|
||||
|
||||
// Extract posts from different formats
|
||||
let posts = [];
|
||||
if (data.results && Array.isArray(data.results)) {
|
||||
posts = data.results;
|
||||
logger.info(`Found ${posts.length} items in results array`);
|
||||
} else if (Array.isArray(data)) {
|
||||
posts = data;
|
||||
logger.info(`Found ${posts.length} items in array`);
|
||||
} else {
|
||||
throw new Error("Invalid JSON format - need array or {results: [...]}");
|
||||
}
|
||||
|
||||
if (posts.length === 0) {
|
||||
throw new Error("No items found to analyze");
|
||||
}
|
||||
|
||||
// Check AI availability
|
||||
logger.step("Checking AI availability");
|
||||
const aiAvailable = await checkOllamaStatus(model);
|
||||
if (!aiAvailable) {
|
||||
throw new Error(
|
||||
`AI not available. Make sure Ollama is running and model '${model}' is installed.`
|
||||
);
|
||||
}
|
||||
|
||||
// Check if results already have AI analysis
|
||||
const hasExistingAI = posts.some((post) => post.aiAnalysis);
|
||||
if (hasExistingAI) {
|
||||
logger.info(
|
||||
`📋 Results already contain AI analysis - will update with new context`
|
||||
);
|
||||
}
|
||||
|
||||
// Prepare data for analysis
|
||||
const analysisData = posts.map((post, i) => ({
|
||||
text: post.text || post.content || post.post || "",
|
||||
location: post.location || "Unknown",
|
||||
keyword: post.keyword || "Unknown",
|
||||
timestamp: post.timestamp || new Date().toISOString(),
|
||||
}));
|
||||
|
||||
// Run analysis
|
||||
logger.step(`Running AI analysis with context: "${context}"`);
|
||||
const analysis = await analyzeBatch(analysisData, context, model);
|
||||
|
||||
// Integrate AI analysis back into the original results
|
||||
const updatedPosts = posts.map((post, index) => {
|
||||
const aiResult = analysis[index];
|
||||
return {
|
||||
...post,
|
||||
aiAnalysis: {
|
||||
isRelevant: aiResult.isRelevant,
|
||||
confidence: aiResult.confidence,
|
||||
reasoning: aiResult.reasoning,
|
||||
context: context,
|
||||
model: model,
|
||||
analyzedAt: new Date().toISOString(),
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
// Update the original data structure
|
||||
if (data.results && Array.isArray(data.results)) {
|
||||
data.results = updatedPosts;
|
||||
// Update metadata
|
||||
data.metadata = data.metadata || {};
|
||||
data.metadata.aiAnalysisUpdated = new Date().toISOString();
|
||||
data.metadata.aiContext = context;
|
||||
data.metadata.aiModel = model;
|
||||
} else {
|
||||
// If it's a simple array, create a proper structure
|
||||
data = {
|
||||
metadata: {
|
||||
timestamp: new Date().toISOString(),
|
||||
totalItems: updatedPosts.length,
|
||||
aiContext: context,
|
||||
aiModel: model,
|
||||
analysisType: "cli",
|
||||
},
|
||||
results: updatedPosts,
|
||||
};
|
||||
}
|
||||
|
||||
// Generate output filename if not provided
|
||||
if (!outputFile) {
|
||||
// Use the original filename with -ai suffix
|
||||
const originalName = path.basename(inputFile, path.extname(inputFile));
|
||||
outputFile = path.join(
|
||||
path.dirname(inputFile),
|
||||
`${originalName}-ai.json`
|
||||
);
|
||||
}
|
||||
|
||||
// Save updated results back to file
|
||||
fs.writeFileSync(outputFile, JSON.stringify(data, null, 2));
|
||||
|
||||
// Show summary
|
||||
const relevant = analysis.filter((a) => a.isRelevant).length;
|
||||
const irrelevant = analysis.filter((a) => !a.isRelevant).length;
|
||||
const avgConfidence =
|
||||
analysis.reduce((sum, a) => sum + a.confidence, 0) / analysis.length;
|
||||
|
||||
logger.success("✅ AI analysis completed and integrated");
|
||||
logger.info(`📊 Context: "${context}"`);
|
||||
logger.info(`📈 Total items analyzed: ${analysis.length}`);
|
||||
logger.info(
|
||||
`✅ Relevant items: ${relevant} (${(
|
||||
(relevant / analysis.length) *
|
||||
100
|
||||
).toFixed(1)}%)`
|
||||
);
|
||||
logger.info(
|
||||
`❌ Irrelevant items: ${irrelevant} (${(
|
||||
(irrelevant / analysis.length) *
|
||||
100
|
||||
).toFixed(1)}%)`
|
||||
);
|
||||
logger.info(`🎯 Average confidence: ${avgConfidence.toFixed(2)}`);
|
||||
logger.file(`🧠 Updated results saved to: ${outputFile}`);
|
||||
} catch (error) {
|
||||
logger.error(`❌ Analysis failed: ${error.message}`);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Run the CLI
|
||||
main();
|
||||
|
||||
@ -1,346 +1,346 @@
|
||||
/**
|
||||
* AI Analyzer Demo
|
||||
*
|
||||
* Demonstrates all the core utilities provided by the ai-analyzer package:
|
||||
* - Logger functionality
|
||||
* - Text processing utilities
|
||||
* - Location validation
|
||||
* - AI analysis capabilities
|
||||
* - Test utilities
|
||||
*/
|
||||
|
||||
const {
|
||||
logger,
|
||||
Logger,
|
||||
cleanText,
|
||||
containsAnyKeyword,
|
||||
parseLocationFilters,
|
||||
validateLocationAgainstFilters,
|
||||
extractLocationFromProfile,
|
||||
analyzeBatch,
|
||||
} = require("./index");
|
||||
|
||||
// Terminal colors for demo output
|
||||
const colors = {
|
||||
reset: "\x1b[0m",
|
||||
bright: "\x1b[1m",
|
||||
cyan: "\x1b[36m",
|
||||
green: "\x1b[32m",
|
||||
yellow: "\x1b[33m",
|
||||
blue: "\x1b[34m",
|
||||
magenta: "\x1b[35m",
|
||||
red: "\x1b[31m",
|
||||
};
|
||||
|
||||
const demo = {
|
||||
title: (text) =>
|
||||
console.log(`\n${colors.bright}${colors.cyan}${text}${colors.reset}`),
|
||||
section: (text) =>
|
||||
console.log(`\n${colors.bright}${colors.magenta}${text}${colors.reset}`),
|
||||
success: (text) => console.log(`${colors.green}✅ ${text}${colors.reset}`),
|
||||
info: (text) => console.log(`${colors.blue}ℹ️ ${text}${colors.reset}`),
|
||||
warning: (text) => console.log(`${colors.yellow}⚠️ ${text}${colors.reset}`),
|
||||
error: (text) => console.log(`${colors.red}❌ ${text}${colors.reset}`),
|
||||
code: (text) => console.log(`${colors.cyan}${text}${colors.reset}`),
|
||||
};
|
||||
|
||||
async function runDemo() {
|
||||
demo.title("=== AI Analyzer Demo ===");
|
||||
demo.info(
|
||||
"This demo showcases all the core utilities provided by the ai-analyzer package."
|
||||
);
|
||||
demo.info("Press Enter to continue through each section...\n");
|
||||
|
||||
await waitForEnter();
|
||||
|
||||
// 1. Logger Demo
|
||||
await demonstrateLogger();
|
||||
|
||||
// 2. Text Processing Demo
|
||||
await demonstrateTextProcessing();
|
||||
|
||||
// 3. Location Validation Demo
|
||||
await demonstrateLocationValidation();
|
||||
|
||||
// 4. AI Analysis Demo
|
||||
await demonstrateAIAnalysis();
|
||||
|
||||
// 5. Integration Demo
|
||||
await demonstrateIntegration();
|
||||
|
||||
demo.title("=== Demo Complete ===");
|
||||
demo.success("All ai-analyzer utilities demonstrated successfully!");
|
||||
demo.info("Check the README.md for detailed API documentation.");
|
||||
}
|
||||
|
||||
async function demonstrateLogger() {
|
||||
demo.section("1. Logger Utilities");
|
||||
demo.info(
|
||||
"The logger provides consistent logging across all parsers with configurable levels and color support."
|
||||
);
|
||||
|
||||
demo.code("// Using default logger");
|
||||
logger.info("This is an info message");
|
||||
logger.warning("This is a warning message");
|
||||
logger.error("This is an error message");
|
||||
logger.success("This is a success message");
|
||||
logger.debug("This is a debug message (if enabled)");
|
||||
|
||||
demo.code("// Convenience methods with emoji prefixes");
|
||||
logger.step("Starting demo process");
|
||||
logger.search("Searching for keywords");
|
||||
logger.ai("Running AI analysis");
|
||||
logger.location("Validating location");
|
||||
logger.file("Saving results");
|
||||
|
||||
demo.code("// Custom logger configuration");
|
||||
const customLogger = new Logger({
|
||||
debug: false,
|
||||
colors: true,
|
||||
});
|
||||
customLogger.info("Custom logger with debug disabled");
|
||||
customLogger.debug("This won't show");
|
||||
|
||||
demo.code("// Silent mode");
|
||||
const silentLogger = new Logger();
|
||||
silentLogger.silent();
|
||||
silentLogger.info("This won't show");
|
||||
silentLogger.verbose(); // Re-enable all levels
|
||||
|
||||
await waitForEnter();
|
||||
}
|
||||
|
||||
async function demonstrateTextProcessing() {
|
||||
demo.section("2. Text Processing Utilities");
|
||||
demo.info(
|
||||
"Text utilities provide content cleaning and keyword matching capabilities."
|
||||
);
|
||||
|
||||
const sampleTexts = [
|
||||
"Check out this #awesome post! https://example.com 🚀",
|
||||
"Just got #laidoff from my job. Looking for new opportunities!",
|
||||
"Company is #downsizing and I'm affected. #RIF #layoff",
|
||||
"Great news! We're #hiring new developers! 🎉",
|
||||
];
|
||||
|
||||
demo.code("// Text cleaning examples:");
|
||||
sampleTexts.forEach((text, index) => {
|
||||
const cleaned = cleanText(text);
|
||||
demo.info(`Original: ${text}`);
|
||||
demo.success(`Cleaned: ${cleaned}`);
|
||||
console.log();
|
||||
});
|
||||
|
||||
demo.code("// Keyword matching:");
|
||||
const keywords = ["layoff", "downsizing", "RIF", "hiring"];
|
||||
|
||||
sampleTexts.forEach((text, index) => {
|
||||
const hasMatch = containsAnyKeyword(text, keywords);
|
||||
const matchedKeywords = keywords.filter((keyword) =>
|
||||
text.toLowerCase().includes(keyword.toLowerCase())
|
||||
);
|
||||
|
||||
demo.info(
|
||||
`Text ${index + 1}: ${hasMatch ? "✅" : "❌"} ${
|
||||
matchedKeywords.join(", ") || "No matches"
|
||||
}`
|
||||
);
|
||||
});
|
||||
|
||||
await waitForEnter();
|
||||
}
|
||||
|
||||
async function demonstrateLocationValidation() {
|
||||
demo.section("3. Location Validation Utilities");
|
||||
demo.info(
|
||||
"Location utilities provide geographic filtering and validation capabilities."
|
||||
);
|
||||
|
||||
demo.code("// Location filter parsing:");
|
||||
const filterStrings = [
|
||||
"Ontario,Manitoba",
|
||||
"Toronto,Vancouver",
|
||||
"British Columbia,Alberta",
|
||||
"Canada",
|
||||
];
|
||||
|
||||
filterStrings.forEach((filterString) => {
|
||||
const filters = parseLocationFilters(filterString);
|
||||
demo.info(`Filter: "${filterString}"`);
|
||||
demo.success(`Parsed: [${filters.join(", ")}]`);
|
||||
console.log();
|
||||
});
|
||||
|
||||
demo.code("// Location validation examples:");
|
||||
const testLocations = [
|
||||
{ location: "Toronto, Ontario, Canada", filters: ["Ontario"] },
|
||||
{ location: "Vancouver, BC", filters: ["British Columbia"] },
|
||||
{ location: "Calgary, Alberta", filters: ["Ontario"] },
|
||||
{ location: "Montreal, Quebec", filters: ["Ontario", "Manitoba"] },
|
||||
{ location: "New York, NY", filters: ["Ontario"] },
|
||||
];
|
||||
|
||||
testLocations.forEach(({ location, filters }) => {
|
||||
const isValid = validateLocationAgainstFilters(location, filters);
|
||||
demo.info(`Location: "${location}"`);
|
||||
demo.info(`Filters: [${filters.join(", ")}]`);
|
||||
demo.success(`Valid: ${isValid ? "✅ Yes" : "❌ No"}`);
|
||||
console.log();
|
||||
});
|
||||
|
||||
demo.code("// Profile location extraction:");
|
||||
const profileTexts = [
|
||||
"Software Engineer at Tech Corp • Toronto, Ontario",
|
||||
"Product Manager • Vancouver, BC",
|
||||
"Data Scientist • Remote",
|
||||
"CEO at Startup Inc • Montreal, Quebec, Canada",
|
||||
];
|
||||
|
||||
profileTexts.forEach((profileText) => {
|
||||
const location = extractLocationFromProfile(profileText);
|
||||
demo.info(`Profile: "${profileText}"`);
|
||||
demo.success(`Extracted: "${location || "No location found"}"`);
|
||||
console.log();
|
||||
});
|
||||
|
||||
await waitForEnter();
|
||||
}
|
||||
|
||||
async function demonstrateAIAnalysis() {
|
||||
demo.section("4. AI Analysis Utilities");
|
||||
demo.info(
|
||||
"AI utilities provide content analysis using OpenAI or local Ollama models."
|
||||
);
|
||||
|
||||
// Mock posts for demo
|
||||
const mockPosts = [
|
||||
{
|
||||
id: "1",
|
||||
content:
|
||||
"Just got laid off from my software engineering role. Looking for new opportunities in Toronto.",
|
||||
author: "John Doe",
|
||||
location: "Toronto, Ontario",
|
||||
},
|
||||
{
|
||||
id: "2",
|
||||
content:
|
||||
"Our company is downsizing and I'm affected. This is really tough news.",
|
||||
author: "Jane Smith",
|
||||
location: "Vancouver, BC",
|
||||
},
|
||||
{
|
||||
id: "3",
|
||||
content:
|
||||
"We're hiring! Looking for talented developers to join our team.",
|
||||
author: "Bob Wilson",
|
||||
location: "Calgary, Alberta",
|
||||
},
|
||||
];
|
||||
|
||||
demo.code("// Mock AI analysis (simulated):");
|
||||
demo.info("In a real scenario, this would call Ollama or OpenAI API");
|
||||
|
||||
mockPosts.forEach((post, index) => {
|
||||
demo.info(`Post ${index + 1}: ${post.content.substring(0, 50)}...`);
|
||||
demo.success(
|
||||
`Analysis: Relevant to job layoffs (confidence: 0.${85 + index * 5})`
|
||||
);
|
||||
console.log();
|
||||
});
|
||||
|
||||
demo.code("// Batch analysis simulation:");
|
||||
demo.info("Processing batch of 3 posts...");
|
||||
await simulateProcessing();
|
||||
demo.success("Batch analysis completed!");
|
||||
|
||||
await waitForEnter();
|
||||
}
|
||||
|
||||
async function demonstrateIntegration() {
|
||||
demo.section("5. Integration Example");
|
||||
demo.info("Here's how all utilities work together in a real scenario:");
|
||||
|
||||
const samplePost = {
|
||||
id: "demo-1",
|
||||
content:
|
||||
"Just got #laidoff from my job at TechCorp! Looking for new opportunities in #Toronto. This is really tough but I'm staying positive! 🚀",
|
||||
author: "Demo User",
|
||||
location: "Toronto, Ontario, Canada",
|
||||
};
|
||||
|
||||
demo.code("// Processing pipeline:");
|
||||
|
||||
// 1. Log the start
|
||||
logger.step("Processing new post");
|
||||
|
||||
// 2. Clean the text
|
||||
const cleanedContent = cleanText(samplePost.content);
|
||||
logger.info(`Cleaned content: ${cleanedContent}`);
|
||||
|
||||
// 3. Check for keywords
|
||||
const keywords = ["layoff", "downsizing", "RIF"];
|
||||
const hasKeywords = containsAnyKeyword(cleanedContent, keywords);
|
||||
logger.search(`Keyword match: ${hasKeywords ? "Found" : "Not found"}`);
|
||||
|
||||
// 4. Validate location
|
||||
const locationFilters = parseLocationFilters("Ontario,Manitoba");
|
||||
const isValidLocation = validateLocationAgainstFilters(
|
||||
samplePost.location,
|
||||
locationFilters
|
||||
);
|
||||
logger.location(`Location valid: ${isValidLocation ? "Yes" : "No"}`);
|
||||
|
||||
// 5. Simulate AI analysis
|
||||
if (hasKeywords && isValidLocation) {
|
||||
logger.ai("Running AI analysis...");
|
||||
await simulateProcessing();
|
||||
logger.success("Post accepted and analyzed!");
|
||||
} else {
|
||||
logger.warning("Post rejected - doesn't meet criteria");
|
||||
}
|
||||
|
||||
await waitForEnter();
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
function waitForEnter() {
|
||||
return new Promise((resolve) => {
|
||||
const readline = require("readline");
|
||||
const rl = readline.createInterface({
|
||||
input: process.stdin,
|
||||
output: process.stdout,
|
||||
});
|
||||
|
||||
rl.question("\nPress Enter to continue...", () => {
|
||||
rl.close();
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function simulateProcessing() {
|
||||
return new Promise((resolve) => {
|
||||
const dots = [".", "..", "..."];
|
||||
let i = 0;
|
||||
const interval = setInterval(() => {
|
||||
process.stdout.write(`\rProcessing${dots[i]}`);
|
||||
i = (i + 1) % dots.length;
|
||||
}, 500);
|
||||
|
||||
setTimeout(() => {
|
||||
clearInterval(interval);
|
||||
process.stdout.write("\r");
|
||||
resolve();
|
||||
}, 2000);
|
||||
});
|
||||
}
|
||||
|
||||
// Run the demo if this file is executed directly
|
||||
if (require.main === module) {
|
||||
runDemo().catch((error) => {
|
||||
demo.error(`Demo failed: ${error.message}`);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = { runDemo };
|
||||
/**
|
||||
* AI Analyzer Demo
|
||||
*
|
||||
* Demonstrates all the core utilities provided by the ai-analyzer package:
|
||||
* - Logger functionality
|
||||
* - Text processing utilities
|
||||
* - Location validation
|
||||
* - AI analysis capabilities
|
||||
* - Test utilities
|
||||
*/
|
||||
|
||||
const {
|
||||
logger,
|
||||
Logger,
|
||||
cleanText,
|
||||
containsAnyKeyword,
|
||||
parseLocationFilters,
|
||||
validateLocationAgainstFilters,
|
||||
extractLocationFromProfile,
|
||||
analyzeBatch,
|
||||
} = require("./index");
|
||||
|
||||
// Terminal colors for demo output
|
||||
const colors = {
|
||||
reset: "\x1b[0m",
|
||||
bright: "\x1b[1m",
|
||||
cyan: "\x1b[36m",
|
||||
green: "\x1b[32m",
|
||||
yellow: "\x1b[33m",
|
||||
blue: "\x1b[34m",
|
||||
magenta: "\x1b[35m",
|
||||
red: "\x1b[31m",
|
||||
};
|
||||
|
||||
const demo = {
|
||||
title: (text) =>
|
||||
console.log(`\n${colors.bright}${colors.cyan}${text}${colors.reset}`),
|
||||
section: (text) =>
|
||||
console.log(`\n${colors.bright}${colors.magenta}${text}${colors.reset}`),
|
||||
success: (text) => console.log(`${colors.green}✅ ${text}${colors.reset}`),
|
||||
info: (text) => console.log(`${colors.blue}ℹ️ ${text}${colors.reset}`),
|
||||
warning: (text) => console.log(`${colors.yellow}⚠️ ${text}${colors.reset}`),
|
||||
error: (text) => console.log(`${colors.red}❌ ${text}${colors.reset}`),
|
||||
code: (text) => console.log(`${colors.cyan}${text}${colors.reset}`),
|
||||
};
|
||||
|
||||
async function runDemo() {
|
||||
demo.title("=== AI Analyzer Demo ===");
|
||||
demo.info(
|
||||
"This demo showcases all the core utilities provided by the ai-analyzer package."
|
||||
);
|
||||
demo.info("Press Enter to continue through each section...\n");
|
||||
|
||||
await waitForEnter();
|
||||
|
||||
// 1. Logger Demo
|
||||
await demonstrateLogger();
|
||||
|
||||
// 2. Text Processing Demo
|
||||
await demonstrateTextProcessing();
|
||||
|
||||
// 3. Location Validation Demo
|
||||
await demonstrateLocationValidation();
|
||||
|
||||
// 4. AI Analysis Demo
|
||||
await demonstrateAIAnalysis();
|
||||
|
||||
// 5. Integration Demo
|
||||
await demonstrateIntegration();
|
||||
|
||||
demo.title("=== Demo Complete ===");
|
||||
demo.success("All ai-analyzer utilities demonstrated successfully!");
|
||||
demo.info("Check the README.md for detailed API documentation.");
|
||||
}
|
||||
|
||||
async function demonstrateLogger() {
|
||||
demo.section("1. Logger Utilities");
|
||||
demo.info(
|
||||
"The logger provides consistent logging across all parsers with configurable levels and color support."
|
||||
);
|
||||
|
||||
demo.code("// Using default logger");
|
||||
logger.info("This is an info message");
|
||||
logger.warning("This is a warning message");
|
||||
logger.error("This is an error message");
|
||||
logger.success("This is a success message");
|
||||
logger.debug("This is a debug message (if enabled)");
|
||||
|
||||
demo.code("// Convenience methods with emoji prefixes");
|
||||
logger.step("Starting demo process");
|
||||
logger.search("Searching for keywords");
|
||||
logger.ai("Running AI analysis");
|
||||
logger.location("Validating location");
|
||||
logger.file("Saving results");
|
||||
|
||||
demo.code("// Custom logger configuration");
|
||||
const customLogger = new Logger({
|
||||
debug: false,
|
||||
colors: true,
|
||||
});
|
||||
customLogger.info("Custom logger with debug disabled");
|
||||
customLogger.debug("This won't show");
|
||||
|
||||
demo.code("// Silent mode");
|
||||
const silentLogger = new Logger();
|
||||
silentLogger.silent();
|
||||
silentLogger.info("This won't show");
|
||||
silentLogger.verbose(); // Re-enable all levels
|
||||
|
||||
await waitForEnter();
|
||||
}
|
||||
|
||||
async function demonstrateTextProcessing() {
|
||||
demo.section("2. Text Processing Utilities");
|
||||
demo.info(
|
||||
"Text utilities provide content cleaning and keyword matching capabilities."
|
||||
);
|
||||
|
||||
const sampleTexts = [
|
||||
"Check out this #awesome post! https://example.com 🚀",
|
||||
"Just got #laidoff from my job. Looking for new opportunities!",
|
||||
"Company is #downsizing and I'm affected. #RIF #layoff",
|
||||
"Great news! We're #hiring new developers! 🎉",
|
||||
];
|
||||
|
||||
demo.code("// Text cleaning examples:");
|
||||
sampleTexts.forEach((text, index) => {
|
||||
const cleaned = cleanText(text);
|
||||
demo.info(`Original: ${text}`);
|
||||
demo.success(`Cleaned: ${cleaned}`);
|
||||
console.log();
|
||||
});
|
||||
|
||||
demo.code("// Keyword matching:");
|
||||
const keywords = ["layoff", "downsizing", "RIF", "hiring"];
|
||||
|
||||
sampleTexts.forEach((text, index) => {
|
||||
const hasMatch = containsAnyKeyword(text, keywords);
|
||||
const matchedKeywords = keywords.filter((keyword) =>
|
||||
text.toLowerCase().includes(keyword.toLowerCase())
|
||||
);
|
||||
|
||||
demo.info(
|
||||
`Text ${index + 1}: ${hasMatch ? "✅" : "❌"} ${
|
||||
matchedKeywords.join(", ") || "No matches"
|
||||
}`
|
||||
);
|
||||
});
|
||||
|
||||
await waitForEnter();
|
||||
}
|
||||
|
||||
async function demonstrateLocationValidation() {
|
||||
demo.section("3. Location Validation Utilities");
|
||||
demo.info(
|
||||
"Location utilities provide geographic filtering and validation capabilities."
|
||||
);
|
||||
|
||||
demo.code("// Location filter parsing:");
|
||||
const filterStrings = [
|
||||
"Ontario,Manitoba",
|
||||
"Toronto,Vancouver",
|
||||
"British Columbia,Alberta",
|
||||
"Canada",
|
||||
];
|
||||
|
||||
filterStrings.forEach((filterString) => {
|
||||
const filters = parseLocationFilters(filterString);
|
||||
demo.info(`Filter: "${filterString}"`);
|
||||
demo.success(`Parsed: [${filters.join(", ")}]`);
|
||||
console.log();
|
||||
});
|
||||
|
||||
demo.code("// Location validation examples:");
|
||||
const testLocations = [
|
||||
{ location: "Toronto, Ontario, Canada", filters: ["Ontario"] },
|
||||
{ location: "Vancouver, BC", filters: ["British Columbia"] },
|
||||
{ location: "Calgary, Alberta", filters: ["Ontario"] },
|
||||
{ location: "Montreal, Quebec", filters: ["Ontario", "Manitoba"] },
|
||||
{ location: "New York, NY", filters: ["Ontario"] },
|
||||
];
|
||||
|
||||
testLocations.forEach(({ location, filters }) => {
|
||||
const isValid = validateLocationAgainstFilters(location, filters);
|
||||
demo.info(`Location: "${location}"`);
|
||||
demo.info(`Filters: [${filters.join(", ")}]`);
|
||||
demo.success(`Valid: ${isValid ? "✅ Yes" : "❌ No"}`);
|
||||
console.log();
|
||||
});
|
||||
|
||||
demo.code("// Profile location extraction:");
|
||||
const profileTexts = [
|
||||
"Software Engineer at Tech Corp • Toronto, Ontario",
|
||||
"Product Manager • Vancouver, BC",
|
||||
"Data Scientist • Remote",
|
||||
"CEO at Startup Inc • Montreal, Quebec, Canada",
|
||||
];
|
||||
|
||||
profileTexts.forEach((profileText) => {
|
||||
const location = extractLocationFromProfile(profileText);
|
||||
demo.info(`Profile: "${profileText}"`);
|
||||
demo.success(`Extracted: "${location || "No location found"}"`);
|
||||
console.log();
|
||||
});
|
||||
|
||||
await waitForEnter();
|
||||
}
|
||||
|
||||
async function demonstrateAIAnalysis() {
|
||||
demo.section("4. AI Analysis Utilities");
|
||||
demo.info(
|
||||
"AI utilities provide content analysis using OpenAI or local Ollama models."
|
||||
);
|
||||
|
||||
// Mock posts for demo
|
||||
const mockPosts = [
|
||||
{
|
||||
id: "1",
|
||||
content:
|
||||
"Just got laid off from my software engineering role. Looking for new opportunities in Toronto.",
|
||||
author: "John Doe",
|
||||
location: "Toronto, Ontario",
|
||||
},
|
||||
{
|
||||
id: "2",
|
||||
content:
|
||||
"Our company is downsizing and I'm affected. This is really tough news.",
|
||||
author: "Jane Smith",
|
||||
location: "Vancouver, BC",
|
||||
},
|
||||
{
|
||||
id: "3",
|
||||
content:
|
||||
"We're hiring! Looking for talented developers to join our team.",
|
||||
author: "Bob Wilson",
|
||||
location: "Calgary, Alberta",
|
||||
},
|
||||
];
|
||||
|
||||
demo.code("// Mock AI analysis (simulated):");
|
||||
demo.info("In a real scenario, this would call Ollama or OpenAI API");
|
||||
|
||||
mockPosts.forEach((post, index) => {
|
||||
demo.info(`Post ${index + 1}: ${post.content.substring(0, 50)}...`);
|
||||
demo.success(
|
||||
`Analysis: Relevant to job layoffs (confidence: 0.${85 + index * 5})`
|
||||
);
|
||||
console.log();
|
||||
});
|
||||
|
||||
demo.code("// Batch analysis simulation:");
|
||||
demo.info("Processing batch of 3 posts...");
|
||||
await simulateProcessing();
|
||||
demo.success("Batch analysis completed!");
|
||||
|
||||
await waitForEnter();
|
||||
}
|
||||
|
||||
async function demonstrateIntegration() {
|
||||
demo.section("5. Integration Example");
|
||||
demo.info("Here's how all utilities work together in a real scenario:");
|
||||
|
||||
const samplePost = {
|
||||
id: "demo-1",
|
||||
content:
|
||||
"Just got #laidoff from my job at TechCorp! Looking for new opportunities in #Toronto. This is really tough but I'm staying positive! 🚀",
|
||||
author: "Demo User",
|
||||
location: "Toronto, Ontario, Canada",
|
||||
};
|
||||
|
||||
demo.code("// Processing pipeline:");
|
||||
|
||||
// 1. Log the start
|
||||
logger.step("Processing new post");
|
||||
|
||||
// 2. Clean the text
|
||||
const cleanedContent = cleanText(samplePost.content);
|
||||
logger.info(`Cleaned content: ${cleanedContent}`);
|
||||
|
||||
// 3. Check for keywords
|
||||
const keywords = ["layoff", "downsizing", "RIF"];
|
||||
const hasKeywords = containsAnyKeyword(cleanedContent, keywords);
|
||||
logger.search(`Keyword match: ${hasKeywords ? "Found" : "Not found"}`);
|
||||
|
||||
// 4. Validate location
|
||||
const locationFilters = parseLocationFilters("Ontario,Manitoba");
|
||||
const isValidLocation = validateLocationAgainstFilters(
|
||||
samplePost.location,
|
||||
locationFilters
|
||||
);
|
||||
logger.location(`Location valid: ${isValidLocation ? "Yes" : "No"}`);
|
||||
|
||||
// 5. Simulate AI analysis
|
||||
if (hasKeywords && isValidLocation) {
|
||||
logger.ai("Running AI analysis...");
|
||||
await simulateProcessing();
|
||||
logger.success("Post accepted and analyzed!");
|
||||
} else {
|
||||
logger.warning("Post rejected - doesn't meet criteria");
|
||||
}
|
||||
|
||||
await waitForEnter();
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
function waitForEnter() {
|
||||
return new Promise((resolve) => {
|
||||
const readline = require("readline");
|
||||
const rl = readline.createInterface({
|
||||
input: process.stdin,
|
||||
output: process.stdout,
|
||||
});
|
||||
|
||||
rl.question("\nPress Enter to continue...", () => {
|
||||
rl.close();
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function simulateProcessing() {
|
||||
return new Promise((resolve) => {
|
||||
const dots = [".", "..", "..."];
|
||||
let i = 0;
|
||||
const interval = setInterval(() => {
|
||||
process.stdout.write(`\rProcessing${dots[i]}`);
|
||||
i = (i + 1) % dots.length;
|
||||
}, 500);
|
||||
|
||||
setTimeout(() => {
|
||||
clearInterval(interval);
|
||||
process.stdout.write("\r");
|
||||
resolve();
|
||||
}, 2000);
|
||||
});
|
||||
}
|
||||
|
||||
// Run the demo if this file is executed directly
|
||||
if (require.main === module) {
|
||||
runDemo().catch((error) => {
|
||||
demo.error(`Demo failed: ${error.message}`);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = { runDemo };
|
||||
|
||||
@ -1,22 +1,22 @@
|
||||
/**
|
||||
* ai-analyzer - Core utilities for parsers
|
||||
* Main entry point that exports all modules
|
||||
*/
|
||||
|
||||
// Export all utilities with clean namespace
|
||||
module.exports = {
|
||||
// Logger utilities
|
||||
...require("./src/logger"),
|
||||
|
||||
// AI analysis utilities
|
||||
...require("./src/ai-utils"),
|
||||
|
||||
// Text processing utilities
|
||||
...require("./src/text-utils"),
|
||||
|
||||
// Location validation utilities
|
||||
...require("./src/location-utils"),
|
||||
|
||||
// Test utilities
|
||||
...require("./src/test-utils"),
|
||||
};
|
||||
/**
|
||||
* ai-analyzer - Core utilities for parsers
|
||||
* Main entry point that exports all modules
|
||||
*/
|
||||
|
||||
// Export all utilities with clean namespace
|
||||
module.exports = {
|
||||
// Logger utilities
|
||||
...require("./src/logger"),
|
||||
|
||||
// AI analysis utilities
|
||||
...require("./src/ai-utils"),
|
||||
|
||||
// Text processing utilities
|
||||
...require("./src/text-utils"),
|
||||
|
||||
// Location validation utilities
|
||||
...require("./src/location-utils"),
|
||||
|
||||
// Test utilities
|
||||
...require("./src/test-utils"),
|
||||
};
|
||||
|
||||
7428
ai-analyzer/package-lock.json
generated
7428
ai-analyzer/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -1,301 +1,301 @@
|
||||
const { logger } = require("./logger");
|
||||
|
||||
/**
|
||||
* AI Analysis utilities for post processing with Ollama
|
||||
* Extracted from ai-analyzer-local.js for reuse across parsers
|
||||
*/
|
||||
|
||||
/**
|
||||
* Check if Ollama is running and the model is available
|
||||
*/
|
||||
async function checkOllamaStatus(
|
||||
model = "mistral",
|
||||
ollamaHost = "http://localhost:11434"
|
||||
) {
|
||||
try {
|
||||
// Check if Ollama is running
|
||||
const response = await fetch(`${ollamaHost}/api/tags`);
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama not running on ${ollamaHost}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const availableModels = data.models.map((m) => m.name);
|
||||
|
||||
logger.ai("Ollama is running");
|
||||
logger.info(
|
||||
`📦 Available models: ${availableModels
|
||||
.map((m) => m.split(":")[0])
|
||||
.join(", ")}`
|
||||
);
|
||||
|
||||
// Check if requested model is available
|
||||
const modelExists = availableModels.some((m) => m.startsWith(model));
|
||||
if (!modelExists) {
|
||||
logger.error(`Model "${model}" not found`);
|
||||
logger.error(`💡 Install it with: ollama pull ${model}`);
|
||||
logger.error(
|
||||
`💡 Or choose from: ${availableModels
|
||||
.map((m) => m.split(":")[0])
|
||||
.join(", ")}`
|
||||
);
|
||||
return false;
|
||||
}
|
||||
|
||||
logger.success(`Using model: ${model}`);
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error(`Error connecting to Ollama: ${error.message}`);
|
||||
logger.error("💡 Make sure Ollama is installed and running:");
|
||||
logger.error(" 1. Install: https://ollama.ai/");
|
||||
logger.error(" 2. Start: ollama serve");
|
||||
logger.error(` 3. Install model: ollama pull ${model}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyze multiple posts using local Ollama
|
||||
*/
|
||||
async function analyzeBatch(
|
||||
posts,
|
||||
context,
|
||||
model = "mistral",
|
||||
ollamaHost = "http://localhost:11434"
|
||||
) {
|
||||
logger.ai(`Analyzing batch of ${posts.length} posts with ${model}...`);
|
||||
|
||||
try {
|
||||
const prompt = `You are an expert at analyzing LinkedIn posts for relevance to specific contexts.
|
||||
|
||||
CONTEXT TO MATCH: "${context}"
|
||||
|
||||
Analyze these ${
|
||||
posts.length
|
||||
} LinkedIn posts and determine if each relates to the context above.
|
||||
|
||||
POSTS:
|
||||
${posts
|
||||
.map(
|
||||
(post, i) => `
|
||||
POST ${i + 1}:
|
||||
"${post.text.substring(0, 400)}${post.text.length > 400 ? "..." : ""}"
|
||||
`
|
||||
)
|
||||
.join("")}
|
||||
|
||||
For each post, provide:
|
||||
- Is it relevant to "${context}"? (YES/NO)
|
||||
- Confidence level (0.0 to 1.0)
|
||||
- Brief reasoning
|
||||
|
||||
Respond in this EXACT format for each post:
|
||||
POST 1: YES/NO | 0.X | brief reason
|
||||
POST 2: YES/NO | 0.X | brief reason
|
||||
POST 3: YES/NO | 0.X | brief reason
|
||||
|
||||
Examples:
|
||||
- For layoff context: "laid off 50 employees" = YES | 0.9 | mentions layoffs
|
||||
- For hiring context: "we're hiring developers" = YES | 0.8 | job posting
|
||||
- Unrelated content = NO | 0.1 | not relevant to context`;
|
||||
|
||||
const response = await fetch(`${ollamaHost}/api/generate`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: model,
|
||||
prompt: prompt,
|
||||
stream: false,
|
||||
options: {
|
||||
temperature: 0.3,
|
||||
top_p: 0.9,
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(
|
||||
`Ollama API error: ${response.status} ${response.statusText}`
|
||||
);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const aiResponse = data.response.trim();
|
||||
|
||||
// Parse the response
|
||||
const analyses = [];
|
||||
const lines = aiResponse.split("\n").filter((line) => line.trim());
|
||||
|
||||
for (let i = 0; i < posts.length; i++) {
|
||||
let analysis = {
|
||||
postIndex: i + 1,
|
||||
isRelevant: false,
|
||||
confidence: 0.5,
|
||||
reasoning: "Could not parse AI response",
|
||||
};
|
||||
|
||||
// Look for lines that match "POST X:" pattern
|
||||
const postPattern = new RegExp(`POST\\s*${i + 1}:?\\s*(.+)`, "i");
|
||||
|
||||
for (const line of lines) {
|
||||
const match = line.match(postPattern);
|
||||
if (match) {
|
||||
const content = match[1].trim();
|
||||
|
||||
// Parse: YES/NO | 0.X | reasoning
|
||||
const parts = content.split("|").map((p) => p.trim());
|
||||
|
||||
if (parts.length >= 3) {
|
||||
analysis.isRelevant = parts[0].toUpperCase().includes("YES");
|
||||
analysis.confidence = Math.max(
|
||||
0,
|
||||
Math.min(1, parseFloat(parts[1]) || 0.5)
|
||||
);
|
||||
analysis.reasoning = parts[2] || "No reasoning provided";
|
||||
} else {
|
||||
// Fallback parsing
|
||||
analysis.isRelevant =
|
||||
content.toUpperCase().includes("YES") ||
|
||||
content.toLowerCase().includes("relevant");
|
||||
analysis.confidence = 0.6;
|
||||
analysis.reasoning = content.substring(0, 100);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
analyses.push(analysis);
|
||||
}
|
||||
|
||||
// If we didn't get enough analyses, fill in defaults
|
||||
while (analyses.length < posts.length) {
|
||||
analyses.push({
|
||||
postIndex: analyses.length + 1,
|
||||
isRelevant: false,
|
||||
confidence: 0.3,
|
||||
reasoning: "AI response parsing failed",
|
||||
});
|
||||
}
|
||||
|
||||
return analyses;
|
||||
} catch (error) {
|
||||
logger.error(`Error in batch AI analysis: ${error.message}`);
|
||||
|
||||
// Fallback: mark all as relevant with low confidence
|
||||
return posts.map((_, i) => ({
|
||||
postIndex: i + 1,
|
||||
isRelevant: true,
|
||||
confidence: 0.3,
|
||||
reasoning: `Analysis failed: ${error.message}`,
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyze a single post using local Ollama (fallback)
|
||||
*/
|
||||
async function analyzeSinglePost(
|
||||
text,
|
||||
context,
|
||||
model = "mistral",
|
||||
ollamaHost = "http://localhost:11434"
|
||||
) {
|
||||
const prompt = `Analyze this LinkedIn post for relevance to: "${context}"
|
||||
|
||||
Post: "${text}"
|
||||
|
||||
Is this post relevant to "${context}"? Provide:
|
||||
1. YES or NO
|
||||
2. Confidence (0.0 to 1.0)
|
||||
3. Brief reason
|
||||
|
||||
Format: YES/NO | 0.X | reason`;
|
||||
|
||||
try {
|
||||
const response = await fetch(`${ollamaHost}/api/generate`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: model,
|
||||
prompt: prompt,
|
||||
stream: false,
|
||||
options: {
|
||||
temperature: 0.3,
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama API error: ${response.status}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const aiResponse = data.response.trim();
|
||||
|
||||
// Parse response
|
||||
const parts = aiResponse.split("|").map((p) => p.trim());
|
||||
|
||||
if (parts.length >= 3) {
|
||||
return {
|
||||
isRelevant: parts[0].toUpperCase().includes("YES"),
|
||||
confidence: Math.max(0, Math.min(1, parseFloat(parts[1]) || 0.5)),
|
||||
reasoning: parts[2],
|
||||
};
|
||||
} else {
|
||||
// Fallback parsing
|
||||
return {
|
||||
isRelevant:
|
||||
aiResponse.toLowerCase().includes("yes") ||
|
||||
aiResponse.toLowerCase().includes("relevant"),
|
||||
confidence: 0.6,
|
||||
reasoning: aiResponse.substring(0, 100),
|
||||
};
|
||||
}
|
||||
} catch (error) {
|
||||
return {
|
||||
isRelevant: true, // Default to include on error
|
||||
confidence: 0.3,
|
||||
reasoning: `Analysis failed: ${error.message}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the most recent results file if none specified
|
||||
*/
|
||||
function findLatestResultsFile(resultsDir = "results") {
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
|
||||
if (!fs.existsSync(resultsDir)) {
|
||||
throw new Error("Results directory not found. Run the scraper first.");
|
||||
}
|
||||
|
||||
const files = fs
|
||||
.readdirSync(resultsDir)
|
||||
.filter(
|
||||
(f) =>
|
||||
(f.startsWith("results-") || f.startsWith("linkedin-results-")) &&
|
||||
f.endsWith(".json") &&
|
||||
!f.includes("-ai-")
|
||||
)
|
||||
.sort()
|
||||
.reverse();
|
||||
|
||||
if (files.length === 0) {
|
||||
throw new Error("No results files found. Run the scraper first.");
|
||||
}
|
||||
|
||||
return path.join(resultsDir, files[0]);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
checkOllamaStatus,
|
||||
analyzeBatch,
|
||||
analyzeSinglePost,
|
||||
findLatestResultsFile,
|
||||
};
|
||||
const { logger } = require("./logger");
|
||||
|
||||
/**
|
||||
* AI Analysis utilities for post processing with Ollama
|
||||
* Extracted from ai-analyzer-local.js for reuse across parsers
|
||||
*/
|
||||
|
||||
/**
|
||||
* Check if Ollama is running and the model is available
|
||||
*/
|
||||
async function checkOllamaStatus(
|
||||
model = "mistral",
|
||||
ollamaHost = "http://localhost:11434"
|
||||
) {
|
||||
try {
|
||||
// Check if Ollama is running
|
||||
const response = await fetch(`${ollamaHost}/api/tags`);
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama not running on ${ollamaHost}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const availableModels = data.models.map((m) => m.name);
|
||||
|
||||
logger.ai("Ollama is running");
|
||||
logger.info(
|
||||
`📦 Available models: ${availableModels
|
||||
.map((m) => m.split(":")[0])
|
||||
.join(", ")}`
|
||||
);
|
||||
|
||||
// Check if requested model is available
|
||||
const modelExists = availableModels.some((m) => m.startsWith(model));
|
||||
if (!modelExists) {
|
||||
logger.error(`Model "${model}" not found`);
|
||||
logger.error(`💡 Install it with: ollama pull ${model}`);
|
||||
logger.error(
|
||||
`💡 Or choose from: ${availableModels
|
||||
.map((m) => m.split(":")[0])
|
||||
.join(", ")}`
|
||||
);
|
||||
return false;
|
||||
}
|
||||
|
||||
logger.success(`Using model: ${model}`);
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error(`Error connecting to Ollama: ${error.message}`);
|
||||
logger.error("💡 Make sure Ollama is installed and running:");
|
||||
logger.error(" 1. Install: https://ollama.ai/");
|
||||
logger.error(" 2. Start: ollama serve");
|
||||
logger.error(` 3. Install model: ollama pull ${model}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyze multiple posts using local Ollama
|
||||
*/
|
||||
async function analyzeBatch(
|
||||
posts,
|
||||
context,
|
||||
model = "mistral",
|
||||
ollamaHost = "http://localhost:11434"
|
||||
) {
|
||||
logger.ai(`Analyzing batch of ${posts.length} posts with ${model}...`);
|
||||
|
||||
try {
|
||||
const prompt = `You are an expert at analyzing LinkedIn posts for relevance to specific contexts.
|
||||
|
||||
CONTEXT TO MATCH: "${context}"
|
||||
|
||||
Analyze these ${
|
||||
posts.length
|
||||
} LinkedIn posts and determine if each relates to the context above.
|
||||
|
||||
POSTS:
|
||||
${posts
|
||||
.map(
|
||||
(post, i) => `
|
||||
POST ${i + 1}:
|
||||
"${post.text.substring(0, 400)}${post.text.length > 400 ? "..." : ""}"
|
||||
`
|
||||
)
|
||||
.join("")}
|
||||
|
||||
For each post, provide:
|
||||
- Is it relevant to "${context}"? (YES/NO)
|
||||
- Confidence level (0.0 to 1.0)
|
||||
- Brief reasoning
|
||||
|
||||
Respond in this EXACT format for each post:
|
||||
POST 1: YES/NO | 0.X | brief reason
|
||||
POST 2: YES/NO | 0.X | brief reason
|
||||
POST 3: YES/NO | 0.X | brief reason
|
||||
|
||||
Examples:
|
||||
- For layoff context: "laid off 50 employees" = YES | 0.9 | mentions layoffs
|
||||
- For hiring context: "we're hiring developers" = YES | 0.8 | job posting
|
||||
- Unrelated content = NO | 0.1 | not relevant to context`;
|
||||
|
||||
const response = await fetch(`${ollamaHost}/api/generate`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: model,
|
||||
prompt: prompt,
|
||||
stream: false,
|
||||
options: {
|
||||
temperature: 0.3,
|
||||
top_p: 0.9,
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(
|
||||
`Ollama API error: ${response.status} ${response.statusText}`
|
||||
);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const aiResponse = data.response.trim();
|
||||
|
||||
// Parse the response
|
||||
const analyses = [];
|
||||
const lines = aiResponse.split("\n").filter((line) => line.trim());
|
||||
|
||||
for (let i = 0; i < posts.length; i++) {
|
||||
let analysis = {
|
||||
postIndex: i + 1,
|
||||
isRelevant: false,
|
||||
confidence: 0.5,
|
||||
reasoning: "Could not parse AI response",
|
||||
};
|
||||
|
||||
// Look for lines that match "POST X:" pattern
|
||||
const postPattern = new RegExp(`POST\\s*${i + 1}:?\\s*(.+)`, "i");
|
||||
|
||||
for (const line of lines) {
|
||||
const match = line.match(postPattern);
|
||||
if (match) {
|
||||
const content = match[1].trim();
|
||||
|
||||
// Parse: YES/NO | 0.X | reasoning
|
||||
const parts = content.split("|").map((p) => p.trim());
|
||||
|
||||
if (parts.length >= 3) {
|
||||
analysis.isRelevant = parts[0].toUpperCase().includes("YES");
|
||||
analysis.confidence = Math.max(
|
||||
0,
|
||||
Math.min(1, parseFloat(parts[1]) || 0.5)
|
||||
);
|
||||
analysis.reasoning = parts[2] || "No reasoning provided";
|
||||
} else {
|
||||
// Fallback parsing
|
||||
analysis.isRelevant =
|
||||
content.toUpperCase().includes("YES") ||
|
||||
content.toLowerCase().includes("relevant");
|
||||
analysis.confidence = 0.6;
|
||||
analysis.reasoning = content.substring(0, 100);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
analyses.push(analysis);
|
||||
}
|
||||
|
||||
// If we didn't get enough analyses, fill in defaults
|
||||
while (analyses.length < posts.length) {
|
||||
analyses.push({
|
||||
postIndex: analyses.length + 1,
|
||||
isRelevant: false,
|
||||
confidence: 0.3,
|
||||
reasoning: "AI response parsing failed",
|
||||
});
|
||||
}
|
||||
|
||||
return analyses;
|
||||
} catch (error) {
|
||||
logger.error(`Error in batch AI analysis: ${error.message}`);
|
||||
|
||||
// Fallback: mark all as relevant with low confidence
|
||||
return posts.map((_, i) => ({
|
||||
postIndex: i + 1,
|
||||
isRelevant: true,
|
||||
confidence: 0.3,
|
||||
reasoning: `Analysis failed: ${error.message}`,
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyze a single post using local Ollama (fallback)
|
||||
*/
|
||||
async function analyzeSinglePost(
|
||||
text,
|
||||
context,
|
||||
model = "mistral",
|
||||
ollamaHost = "http://localhost:11434"
|
||||
) {
|
||||
const prompt = `Analyze this LinkedIn post for relevance to: "${context}"
|
||||
|
||||
Post: "${text}"
|
||||
|
||||
Is this post relevant to "${context}"? Provide:
|
||||
1. YES or NO
|
||||
2. Confidence (0.0 to 1.0)
|
||||
3. Brief reason
|
||||
|
||||
Format: YES/NO | 0.X | reason`;
|
||||
|
||||
try {
|
||||
const response = await fetch(`${ollamaHost}/api/generate`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: model,
|
||||
prompt: prompt,
|
||||
stream: false,
|
||||
options: {
|
||||
temperature: 0.3,
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama API error: ${response.status}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const aiResponse = data.response.trim();
|
||||
|
||||
// Parse response
|
||||
const parts = aiResponse.split("|").map((p) => p.trim());
|
||||
|
||||
if (parts.length >= 3) {
|
||||
return {
|
||||
isRelevant: parts[0].toUpperCase().includes("YES"),
|
||||
confidence: Math.max(0, Math.min(1, parseFloat(parts[1]) || 0.5)),
|
||||
reasoning: parts[2],
|
||||
};
|
||||
} else {
|
||||
// Fallback parsing
|
||||
return {
|
||||
isRelevant:
|
||||
aiResponse.toLowerCase().includes("yes") ||
|
||||
aiResponse.toLowerCase().includes("relevant"),
|
||||
confidence: 0.6,
|
||||
reasoning: aiResponse.substring(0, 100),
|
||||
};
|
||||
}
|
||||
} catch (error) {
|
||||
return {
|
||||
isRelevant: true, // Default to include on error
|
||||
confidence: 0.3,
|
||||
reasoning: `Analysis failed: ${error.message}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the most recent results file if none specified
|
||||
*/
|
||||
function findLatestResultsFile(resultsDir = "results") {
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
|
||||
if (!fs.existsSync(resultsDir)) {
|
||||
throw new Error("Results directory not found. Run the scraper first.");
|
||||
}
|
||||
|
||||
const files = fs
|
||||
.readdirSync(resultsDir)
|
||||
.filter(
|
||||
(f) =>
|
||||
(f.startsWith("results-") || f.startsWith("linkedin-results-")) &&
|
||||
f.endsWith(".json") &&
|
||||
!f.includes("-ai-")
|
||||
)
|
||||
.sort()
|
||||
.reverse();
|
||||
|
||||
if (files.length === 0) {
|
||||
throw new Error("No results files found. Run the scraper first.");
|
||||
}
|
||||
|
||||
return path.join(resultsDir, files[0]);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
checkOllamaStatus,
|
||||
analyzeBatch,
|
||||
analyzeSinglePost,
|
||||
findLatestResultsFile,
|
||||
};
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,123 +1,123 @@
|
||||
const chalk = require("chalk");
|
||||
|
||||
/**
|
||||
* Configurable logger with color support and level controls
|
||||
* Can enable/disable different log levels: debug, info, warning, error, success
|
||||
*/
|
||||
class Logger {
|
||||
constructor(options = {}) {
|
||||
this.levels = {
|
||||
debug: options.debug !== false,
|
||||
info: options.info !== false,
|
||||
warning: options.warning !== false,
|
||||
error: options.error !== false,
|
||||
success: options.success !== false,
|
||||
};
|
||||
this.colors = options.colors !== false;
|
||||
}
|
||||
|
||||
_formatMessage(level, message, prefix = "") {
|
||||
const timestamp = new Date().toLocaleTimeString();
|
||||
const fullMessage = `${prefix}${message}`;
|
||||
|
||||
if (!this.colors) {
|
||||
return `[${timestamp}] [${level.toUpperCase()}] ${fullMessage}`;
|
||||
}
|
||||
|
||||
switch (level) {
|
||||
case "debug":
|
||||
return chalk.gray(`[${timestamp}] [DEBUG] ${fullMessage}`);
|
||||
case "info":
|
||||
return chalk.blue(`[${timestamp}] [INFO] ${fullMessage}`);
|
||||
case "warning":
|
||||
return chalk.yellow(`[${timestamp}] [WARNING] ${fullMessage}`);
|
||||
case "error":
|
||||
return chalk.red(`[${timestamp}] [ERROR] ${fullMessage}`);
|
||||
case "success":
|
||||
return chalk.green(`[${timestamp}] [SUCCESS] ${fullMessage}`);
|
||||
default:
|
||||
return `[${timestamp}] [${level.toUpperCase()}] ${fullMessage}`;
|
||||
}
|
||||
}
|
||||
|
||||
debug(message) {
|
||||
if (this.levels.debug) {
|
||||
console.log(this._formatMessage("debug", message));
|
||||
}
|
||||
}
|
||||
|
||||
info(message) {
|
||||
if (this.levels.info) {
|
||||
console.log(this._formatMessage("info", message));
|
||||
}
|
||||
}
|
||||
|
||||
warning(message) {
|
||||
if (this.levels.warning) {
|
||||
console.warn(this._formatMessage("warning", message));
|
||||
}
|
||||
}
|
||||
|
||||
error(message) {
|
||||
if (this.levels.error) {
|
||||
console.error(this._formatMessage("error", message));
|
||||
}
|
||||
}
|
||||
|
||||
success(message) {
|
||||
if (this.levels.success) {
|
||||
console.log(this._formatMessage("success", message));
|
||||
}
|
||||
}
|
||||
|
||||
// Convenience methods with emoji prefixes for better UX
|
||||
step(message) {
|
||||
this.info(`🚀 ${message}`);
|
||||
}
|
||||
|
||||
search(message) {
|
||||
this.info(`🔍 ${message}`);
|
||||
}
|
||||
|
||||
ai(message) {
|
||||
this.info(`🧠 ${message}`);
|
||||
}
|
||||
|
||||
location(message) {
|
||||
this.info(`📍 ${message}`);
|
||||
}
|
||||
|
||||
file(message) {
|
||||
this.info(`📄 ${message}`);
|
||||
}
|
||||
|
||||
// Configure logger levels at runtime
|
||||
setLevel(level, enabled) {
|
||||
if (this.levels.hasOwnProperty(level)) {
|
||||
this.levels[level] = enabled;
|
||||
}
|
||||
}
|
||||
|
||||
// Disable all logging
|
||||
silent() {
|
||||
Object.keys(this.levels).forEach((level) => {
|
||||
this.levels[level] = false;
|
||||
});
|
||||
}
|
||||
|
||||
// Enable all logging
|
||||
verbose() {
|
||||
Object.keys(this.levels).forEach((level) => {
|
||||
this.levels[level] = true;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Create default logger instance
|
||||
const logger = new Logger();
|
||||
|
||||
// Export both the class and default instance
|
||||
module.exports = {
|
||||
Logger,
|
||||
logger,
|
||||
};
|
||||
const chalk = require("chalk");
|
||||
|
||||
/**
|
||||
* Configurable logger with color support and level controls
|
||||
* Can enable/disable different log levels: debug, info, warning, error, success
|
||||
*/
|
||||
class Logger {
|
||||
constructor(options = {}) {
|
||||
this.levels = {
|
||||
debug: options.debug !== false,
|
||||
info: options.info !== false,
|
||||
warning: options.warning !== false,
|
||||
error: options.error !== false,
|
||||
success: options.success !== false,
|
||||
};
|
||||
this.colors = options.colors !== false;
|
||||
}
|
||||
|
||||
_formatMessage(level, message, prefix = "") {
|
||||
const timestamp = new Date().toLocaleTimeString();
|
||||
const fullMessage = `${prefix}${message}`;
|
||||
|
||||
if (!this.colors) {
|
||||
return `[${timestamp}] [${level.toUpperCase()}] ${fullMessage}`;
|
||||
}
|
||||
|
||||
switch (level) {
|
||||
case "debug":
|
||||
return chalk.gray(`[${timestamp}] [DEBUG] ${fullMessage}`);
|
||||
case "info":
|
||||
return chalk.blue(`[${timestamp}] [INFO] ${fullMessage}`);
|
||||
case "warning":
|
||||
return chalk.yellow(`[${timestamp}] [WARNING] ${fullMessage}`);
|
||||
case "error":
|
||||
return chalk.red(`[${timestamp}] [ERROR] ${fullMessage}`);
|
||||
case "success":
|
||||
return chalk.green(`[${timestamp}] [SUCCESS] ${fullMessage}`);
|
||||
default:
|
||||
return `[${timestamp}] [${level.toUpperCase()}] ${fullMessage}`;
|
||||
}
|
||||
}
|
||||
|
||||
debug(message) {
|
||||
if (this.levels.debug) {
|
||||
console.log(this._formatMessage("debug", message));
|
||||
}
|
||||
}
|
||||
|
||||
info(message) {
|
||||
if (this.levels.info) {
|
||||
console.log(this._formatMessage("info", message));
|
||||
}
|
||||
}
|
||||
|
||||
warning(message) {
|
||||
if (this.levels.warning) {
|
||||
console.warn(this._formatMessage("warning", message));
|
||||
}
|
||||
}
|
||||
|
||||
error(message) {
|
||||
if (this.levels.error) {
|
||||
console.error(this._formatMessage("error", message));
|
||||
}
|
||||
}
|
||||
|
||||
success(message) {
|
||||
if (this.levels.success) {
|
||||
console.log(this._formatMessage("success", message));
|
||||
}
|
||||
}
|
||||
|
||||
// Convenience methods with emoji prefixes for better UX
|
||||
step(message) {
|
||||
this.info(`🚀 ${message}`);
|
||||
}
|
||||
|
||||
search(message) {
|
||||
this.info(`🔍 ${message}`);
|
||||
}
|
||||
|
||||
ai(message) {
|
||||
this.info(`🧠 ${message}`);
|
||||
}
|
||||
|
||||
location(message) {
|
||||
this.info(`📍 ${message}`);
|
||||
}
|
||||
|
||||
file(message) {
|
||||
this.info(`📄 ${message}`);
|
||||
}
|
||||
|
||||
// Configure logger levels at runtime
|
||||
setLevel(level, enabled) {
|
||||
if (this.levels.hasOwnProperty(level)) {
|
||||
this.levels[level] = enabled;
|
||||
}
|
||||
}
|
||||
|
||||
// Disable all logging
|
||||
silent() {
|
||||
Object.keys(this.levels).forEach((level) => {
|
||||
this.levels[level] = false;
|
||||
});
|
||||
}
|
||||
|
||||
// Enable all logging
|
||||
verbose() {
|
||||
Object.keys(this.levels).forEach((level) => {
|
||||
this.levels[level] = true;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Create default logger instance
|
||||
const logger = new Logger();
|
||||
|
||||
// Export both the class and default instance
|
||||
module.exports = {
|
||||
Logger,
|
||||
logger,
|
||||
};
|
||||
|
||||
@ -1,124 +1,124 @@
|
||||
/**
|
||||
* Shared test utilities for parsers
|
||||
* Common mocks, helpers, and test data
|
||||
*/
|
||||
|
||||
/**
|
||||
* Mock Playwright page object for testing
|
||||
*/
|
||||
function createMockPage() {
|
||||
return {
|
||||
goto: jest.fn().mockResolvedValue(undefined),
|
||||
waitForSelector: jest.fn().mockResolvedValue(undefined),
|
||||
$$: jest.fn().mockResolvedValue([]),
|
||||
$: jest.fn().mockResolvedValue(null),
|
||||
textContent: jest.fn().mockResolvedValue(""),
|
||||
close: jest.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Mock fetch for AI API calls
|
||||
*/
|
||||
function createMockFetch(response = {}) {
|
||||
return jest.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: jest.fn().mockResolvedValue(response),
|
||||
...response,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Sample test data for posts
|
||||
*/
|
||||
const samplePosts = [
|
||||
{
|
||||
text: "We are laying off 100 employees due to economic downturn.",
|
||||
keyword: "layoff",
|
||||
profileLink: "https://linkedin.com/in/test-user-1",
|
||||
},
|
||||
{
|
||||
text: "Exciting opportunity! We are hiring senior developers for our team.",
|
||||
keyword: "hiring",
|
||||
profileLink: "https://linkedin.com/in/test-user-2",
|
||||
},
|
||||
];
|
||||
|
||||
/**
|
||||
* Sample location test data
|
||||
*/
|
||||
const sampleLocations = [
|
||||
"Toronto, Ontario, Canada",
|
||||
"Vancouver, BC",
|
||||
"Calgary, Alberta",
|
||||
"Montreal, Quebec",
|
||||
"Halifax, Nova Scotia",
|
||||
];
|
||||
|
||||
/**
|
||||
* Common test assertions
|
||||
*/
|
||||
function expectValidPost(post) {
|
||||
expect(post).toHaveProperty("text");
|
||||
expect(post).toHaveProperty("keyword");
|
||||
expect(post).toHaveProperty("profileLink");
|
||||
expect(typeof post.text).toBe("string");
|
||||
expect(post.text.length).toBeGreaterThan(0);
|
||||
}
|
||||
|
||||
function expectValidAIAnalysis(analysis) {
|
||||
expect(analysis).toHaveProperty("isRelevant");
|
||||
expect(analysis).toHaveProperty("confidence");
|
||||
expect(analysis).toHaveProperty("reasoning");
|
||||
expect(typeof analysis.isRelevant).toBe("boolean");
|
||||
expect(analysis.confidence).toBeGreaterThanOrEqual(0);
|
||||
expect(analysis.confidence).toBeLessThanOrEqual(1);
|
||||
}
|
||||
|
||||
function expectValidLocation(location) {
|
||||
expect(typeof location).toBe("string");
|
||||
expect(location.length).toBeGreaterThan(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test environment setup
|
||||
*/
|
||||
function setupTestEnv() {
|
||||
// Mock environment variables
|
||||
process.env.NODE_ENV = "test";
|
||||
process.env.OLLAMA_HOST = "http://localhost:11434";
|
||||
process.env.AI_CONTEXT = "test context";
|
||||
|
||||
// Suppress console output during tests
|
||||
jest.spyOn(console, "log").mockImplementation(() => {});
|
||||
jest.spyOn(console, "error").mockImplementation(() => {});
|
||||
jest.spyOn(console, "warn").mockImplementation(() => {});
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up test environment
|
||||
*/
|
||||
function teardownTestEnv() {
|
||||
// Restore console
|
||||
console.log.mockRestore();
|
||||
console.error.mockRestore();
|
||||
console.warn.mockRestore();
|
||||
|
||||
// Clear environment
|
||||
delete process.env.NODE_ENV;
|
||||
delete process.env.OLLAMA_HOST;
|
||||
delete process.env.AI_CONTEXT;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
createMockPage,
|
||||
createMockFetch,
|
||||
samplePosts,
|
||||
sampleLocations,
|
||||
expectValidPost,
|
||||
expectValidAIAnalysis,
|
||||
expectValidLocation,
|
||||
setupTestEnv,
|
||||
teardownTestEnv,
|
||||
};
|
||||
/**
|
||||
* Shared test utilities for parsers
|
||||
* Common mocks, helpers, and test data
|
||||
*/
|
||||
|
||||
/**
|
||||
* Mock Playwright page object for testing
|
||||
*/
|
||||
function createMockPage() {
|
||||
return {
|
||||
goto: jest.fn().mockResolvedValue(undefined),
|
||||
waitForSelector: jest.fn().mockResolvedValue(undefined),
|
||||
$$: jest.fn().mockResolvedValue([]),
|
||||
$: jest.fn().mockResolvedValue(null),
|
||||
textContent: jest.fn().mockResolvedValue(""),
|
||||
close: jest.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Mock fetch for AI API calls
|
||||
*/
|
||||
function createMockFetch(response = {}) {
|
||||
return jest.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: jest.fn().mockResolvedValue(response),
|
||||
...response,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Sample test data for posts
|
||||
*/
|
||||
const samplePosts = [
|
||||
{
|
||||
text: "We are laying off 100 employees due to economic downturn.",
|
||||
keyword: "layoff",
|
||||
profileLink: "https://linkedin.com/in/test-user-1",
|
||||
},
|
||||
{
|
||||
text: "Exciting opportunity! We are hiring senior developers for our team.",
|
||||
keyword: "hiring",
|
||||
profileLink: "https://linkedin.com/in/test-user-2",
|
||||
},
|
||||
];
|
||||
|
||||
/**
|
||||
* Sample location test data
|
||||
*/
|
||||
const sampleLocations = [
|
||||
"Toronto, Ontario, Canada",
|
||||
"Vancouver, BC",
|
||||
"Calgary, Alberta",
|
||||
"Montreal, Quebec",
|
||||
"Halifax, Nova Scotia",
|
||||
];
|
||||
|
||||
/**
|
||||
* Common test assertions
|
||||
*/
|
||||
function expectValidPost(post) {
|
||||
expect(post).toHaveProperty("text");
|
||||
expect(post).toHaveProperty("keyword");
|
||||
expect(post).toHaveProperty("profileLink");
|
||||
expect(typeof post.text).toBe("string");
|
||||
expect(post.text.length).toBeGreaterThan(0);
|
||||
}
|
||||
|
||||
function expectValidAIAnalysis(analysis) {
|
||||
expect(analysis).toHaveProperty("isRelevant");
|
||||
expect(analysis).toHaveProperty("confidence");
|
||||
expect(analysis).toHaveProperty("reasoning");
|
||||
expect(typeof analysis.isRelevant).toBe("boolean");
|
||||
expect(analysis.confidence).toBeGreaterThanOrEqual(0);
|
||||
expect(analysis.confidence).toBeLessThanOrEqual(1);
|
||||
}
|
||||
|
||||
function expectValidLocation(location) {
|
||||
expect(typeof location).toBe("string");
|
||||
expect(location.length).toBeGreaterThan(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test environment setup
|
||||
*/
|
||||
function setupTestEnv() {
|
||||
// Mock environment variables
|
||||
process.env.NODE_ENV = "test";
|
||||
process.env.OLLAMA_HOST = "http://localhost:11434";
|
||||
process.env.AI_CONTEXT = "test context";
|
||||
|
||||
// Suppress console output during tests
|
||||
jest.spyOn(console, "log").mockImplementation(() => {});
|
||||
jest.spyOn(console, "error").mockImplementation(() => {});
|
||||
jest.spyOn(console, "warn").mockImplementation(() => {});
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up test environment
|
||||
*/
|
||||
function teardownTestEnv() {
|
||||
// Restore console
|
||||
console.log.mockRestore();
|
||||
console.error.mockRestore();
|
||||
console.warn.mockRestore();
|
||||
|
||||
// Clear environment
|
||||
delete process.env.NODE_ENV;
|
||||
delete process.env.OLLAMA_HOST;
|
||||
delete process.env.AI_CONTEXT;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
createMockPage,
|
||||
createMockFetch,
|
||||
samplePosts,
|
||||
sampleLocations,
|
||||
expectValidPost,
|
||||
expectValidAIAnalysis,
|
||||
expectValidLocation,
|
||||
setupTestEnv,
|
||||
teardownTestEnv,
|
||||
};
|
||||
|
||||
@ -1,107 +1,107 @@
|
||||
/**
|
||||
* Text processing utilities for cleaning and validating content
|
||||
* Extracted from linkedout.js for reuse across parsers
|
||||
*/
|
||||
|
||||
/**
|
||||
* Clean text by removing hashtags, URLs, emojis, and normalizing whitespace
|
||||
*/
|
||||
function cleanText(text) {
|
||||
if (!text || typeof text !== "string") {
|
||||
return "";
|
||||
}
|
||||
|
||||
// Remove hashtags
|
||||
text = text.replace(/#\w+/g, "");
|
||||
|
||||
// Remove hashtag mentions
|
||||
text = text.replace(/\bhashtag\b/gi, "");
|
||||
text = text.replace(/hashtag-\w+/gi, "");
|
||||
|
||||
// Remove URLs
|
||||
text = text.replace(/https?:\/\/[^\s]+/g, "");
|
||||
|
||||
// Remove emojis (Unicode ranges for common emoji)
|
||||
text = text.replace(
|
||||
/[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F1E0}-\u{1F1FF}]/gu,
|
||||
""
|
||||
);
|
||||
|
||||
// Normalize whitespace
|
||||
text = text.replace(/\s+/g, " ").trim();
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if text contains any of the specified keywords (case insensitive)
|
||||
*/
|
||||
function containsAnyKeyword(text, keywords) {
|
||||
if (!text || !Array.isArray(keywords)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const lowerText = text.toLowerCase();
|
||||
return keywords.some((keyword) => lowerText.includes(keyword.toLowerCase()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate if text meets basic quality criteria
|
||||
*/
|
||||
function isValidText(text, minLength = 30) {
|
||||
if (!text || typeof text !== "string") {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check minimum length
|
||||
if (text.length < minLength) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if text contains alphanumeric characters
|
||||
if (!/[a-zA-Z0-9]/.test(text)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract domain from URL
|
||||
*/
|
||||
function extractDomain(url) {
|
||||
if (!url || typeof url !== "string") {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const urlObj = new URL(url);
|
||||
return urlObj.hostname;
|
||||
} catch (error) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize URL by removing query parameters and fragments
|
||||
*/
|
||||
function normalizeUrl(url) {
|
||||
if (!url || typeof url !== "string") {
|
||||
return "";
|
||||
}
|
||||
|
||||
try {
|
||||
const urlObj = new URL(url);
|
||||
return `${urlObj.protocol}//${urlObj.hostname}${urlObj.pathname}`;
|
||||
} catch (error) {
|
||||
return url;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
cleanText,
|
||||
containsAnyKeyword,
|
||||
isValidText,
|
||||
extractDomain,
|
||||
normalizeUrl,
|
||||
};
|
||||
/**
|
||||
* Text processing utilities for cleaning and validating content
|
||||
* Extracted from linkedout.js for reuse across parsers
|
||||
*/
|
||||
|
||||
/**
|
||||
* Clean text by removing hashtags, URLs, emojis, and normalizing whitespace
|
||||
*/
|
||||
function cleanText(text) {
|
||||
if (!text || typeof text !== "string") {
|
||||
return "";
|
||||
}
|
||||
|
||||
// Remove hashtags
|
||||
text = text.replace(/#\w+/g, "");
|
||||
|
||||
// Remove hashtag mentions
|
||||
text = text.replace(/\bhashtag\b/gi, "");
|
||||
text = text.replace(/hashtag-\w+/gi, "");
|
||||
|
||||
// Remove URLs
|
||||
text = text.replace(/https?:\/\/[^\s]+/g, "");
|
||||
|
||||
// Remove emojis (Unicode ranges for common emoji)
|
||||
text = text.replace(
|
||||
/[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F1E0}-\u{1F1FF}]/gu,
|
||||
""
|
||||
);
|
||||
|
||||
// Normalize whitespace
|
||||
text = text.replace(/\s+/g, " ").trim();
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if text contains any of the specified keywords (case insensitive)
|
||||
*/
|
||||
function containsAnyKeyword(text, keywords) {
|
||||
if (!text || !Array.isArray(keywords)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const lowerText = text.toLowerCase();
|
||||
return keywords.some((keyword) => lowerText.includes(keyword.toLowerCase()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate if text meets basic quality criteria
|
||||
*/
|
||||
function isValidText(text, minLength = 30) {
|
||||
if (!text || typeof text !== "string") {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check minimum length
|
||||
if (text.length < minLength) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if text contains alphanumeric characters
|
||||
if (!/[a-zA-Z0-9]/.test(text)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract domain from URL
|
||||
*/
|
||||
function extractDomain(url) {
|
||||
if (!url || typeof url !== "string") {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const urlObj = new URL(url);
|
||||
return urlObj.hostname;
|
||||
} catch (error) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize URL by removing query parameters and fragments
|
||||
*/
|
||||
function normalizeUrl(url) {
|
||||
if (!url || typeof url !== "string") {
|
||||
return "";
|
||||
}
|
||||
|
||||
try {
|
||||
const urlObj = new URL(url);
|
||||
return `${urlObj.protocol}//${urlObj.hostname}${urlObj.pathname}`;
|
||||
} catch (error) {
|
||||
return url;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
cleanText,
|
||||
containsAnyKeyword,
|
||||
isValidText,
|
||||
extractDomain,
|
||||
normalizeUrl,
|
||||
};
|
||||
|
||||
@ -1,194 +1,194 @@
|
||||
/**
|
||||
* Test file for logger functionality
|
||||
*/
|
||||
|
||||
const { Logger, logger } = require("../src/logger");
|
||||
|
||||
describe("Logger", () => {
|
||||
let consoleSpy;
|
||||
let consoleWarnSpy;
|
||||
let consoleErrorSpy;
|
||||
|
||||
beforeEach(() => {
|
||||
consoleSpy = jest.spyOn(console, "log").mockImplementation();
|
||||
consoleWarnSpy = jest.spyOn(console, "warn").mockImplementation();
|
||||
consoleErrorSpy = jest.spyOn(console, "error").mockImplementation();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
consoleSpy.mockRestore();
|
||||
consoleWarnSpy.mockRestore();
|
||||
consoleErrorSpy.mockRestore();
|
||||
});
|
||||
|
||||
test("should create default logger instance", () => {
|
||||
expect(logger).toBeDefined();
|
||||
expect(logger).toBeInstanceOf(Logger);
|
||||
});
|
||||
|
||||
test("should log info messages", () => {
|
||||
logger.info("Test message");
|
||||
expect(consoleSpy).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should create custom logger with disabled levels", () => {
|
||||
const customLogger = new Logger({ debug: false });
|
||||
customLogger.debug("This should not log");
|
||||
expect(consoleSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should use emoji prefixes for convenience methods", () => {
|
||||
logger.step("Test step");
|
||||
logger.ai("Test AI");
|
||||
logger.location("Test location");
|
||||
expect(consoleSpy).toHaveBeenCalledTimes(3);
|
||||
});
|
||||
|
||||
test("should configure levels at runtime", () => {
|
||||
const customLogger = new Logger();
|
||||
customLogger.setLevel("debug", false);
|
||||
customLogger.debug("This should not log");
|
||||
expect(consoleSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should go silent when requested", () => {
|
||||
const customLogger = new Logger();
|
||||
customLogger.silent();
|
||||
customLogger.info("This should not log");
|
||||
customLogger.error("This should not log");
|
||||
expect(consoleSpy).not.toHaveBeenCalled();
|
||||
expect(consoleErrorSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// Additional test cases for comprehensive coverage
|
||||
|
||||
test("should log warning messages", () => {
|
||||
logger.warning("Test warning");
|
||||
expect(consoleWarnSpy).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should log error messages", () => {
|
||||
logger.error("Test error");
|
||||
expect(consoleErrorSpy).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should log success messages", () => {
|
||||
logger.success("Test success");
|
||||
expect(consoleSpy).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should log debug messages", () => {
|
||||
logger.debug("Test debug");
|
||||
expect(consoleSpy).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should respect disabled warning level", () => {
|
||||
const customLogger = new Logger({ warning: false });
|
||||
customLogger.warning("This should not log");
|
||||
expect(consoleWarnSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should respect disabled error level", () => {
|
||||
const customLogger = new Logger({ error: false });
|
||||
customLogger.error("This should not log");
|
||||
expect(consoleErrorSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should respect disabled success level", () => {
|
||||
const customLogger = new Logger({ success: false });
|
||||
customLogger.success("This should not log");
|
||||
expect(consoleSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should respect disabled info level", () => {
|
||||
const customLogger = new Logger({ info: false });
|
||||
customLogger.info("This should not log");
|
||||
expect(consoleSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should test all convenience methods", () => {
|
||||
logger.step("Test step");
|
||||
logger.search("Test search");
|
||||
logger.ai("Test AI");
|
||||
logger.location("Test location");
|
||||
logger.file("Test file");
|
||||
expect(consoleSpy).toHaveBeenCalledTimes(5);
|
||||
});
|
||||
|
||||
test("should enable all levels with verbose method", () => {
|
||||
const customLogger = new Logger({ debug: false, info: false });
|
||||
customLogger.verbose();
|
||||
customLogger.debug("This should log");
|
||||
customLogger.info("This should log");
|
||||
expect(consoleSpy).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
test("should handle setLevel with invalid level gracefully", () => {
|
||||
const customLogger = new Logger();
|
||||
expect(() => {
|
||||
customLogger.setLevel("invalid", false);
|
||||
}).not.toThrow();
|
||||
});
|
||||
|
||||
test("should format messages with timestamps", () => {
|
||||
logger.info("Test message");
|
||||
const loggedMessage = consoleSpy.mock.calls[0][0];
|
||||
expect(loggedMessage).toMatch(/\[\d{1,2}:\d{2}:\d{2}\]/);
|
||||
});
|
||||
|
||||
test("should include level in formatted messages", () => {
|
||||
logger.info("Test message");
|
||||
const loggedMessage = consoleSpy.mock.calls[0][0];
|
||||
expect(loggedMessage).toContain("[INFO]");
|
||||
});
|
||||
|
||||
test("should disable colors when colors option is false", () => {
|
||||
const customLogger = new Logger({ colors: false });
|
||||
customLogger.info("Test message");
|
||||
const loggedMessage = consoleSpy.mock.calls[0][0];
|
||||
// Should not contain ANSI color codes
|
||||
expect(loggedMessage).not.toMatch(/\u001b\[/);
|
||||
});
|
||||
|
||||
test("should enable colors by default", () => {
|
||||
logger.info("Test message");
|
||||
const loggedMessage = consoleSpy.mock.calls[0][0];
|
||||
// Should contain ANSI color codes
|
||||
expect(loggedMessage).toMatch(/\u001b\[/);
|
||||
});
|
||||
|
||||
test("should handle multiple level configurations", () => {
|
||||
const customLogger = new Logger({
|
||||
debug: false,
|
||||
info: true,
|
||||
warning: false,
|
||||
error: true,
|
||||
success: false,
|
||||
});
|
||||
|
||||
customLogger.debug("Should not log");
|
||||
customLogger.info("Should log");
|
||||
customLogger.warning("Should not log");
|
||||
customLogger.error("Should log");
|
||||
customLogger.success("Should not log");
|
||||
|
||||
expect(consoleSpy).toHaveBeenCalledTimes(1);
|
||||
expect(consoleErrorSpy).toHaveBeenCalledTimes(1);
|
||||
expect(consoleWarnSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should handle empty or undefined messages", () => {
|
||||
expect(() => {
|
||||
logger.info("");
|
||||
logger.info(undefined);
|
||||
logger.info(null);
|
||||
}).not.toThrow();
|
||||
});
|
||||
|
||||
test("should handle complex message objects", () => {
|
||||
const testObj = { key: "value", nested: { data: "test" } };
|
||||
expect(() => {
|
||||
logger.info(testObj);
|
||||
}).not.toThrow();
|
||||
});
|
||||
});
|
||||
/**
|
||||
* Test file for logger functionality
|
||||
*/
|
||||
|
||||
const { Logger, logger } = require("../src/logger");
|
||||
|
||||
describe("Logger", () => {
|
||||
let consoleSpy;
|
||||
let consoleWarnSpy;
|
||||
let consoleErrorSpy;
|
||||
|
||||
beforeEach(() => {
|
||||
consoleSpy = jest.spyOn(console, "log").mockImplementation();
|
||||
consoleWarnSpy = jest.spyOn(console, "warn").mockImplementation();
|
||||
consoleErrorSpy = jest.spyOn(console, "error").mockImplementation();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
consoleSpy.mockRestore();
|
||||
consoleWarnSpy.mockRestore();
|
||||
consoleErrorSpy.mockRestore();
|
||||
});
|
||||
|
||||
test("should create default logger instance", () => {
|
||||
expect(logger).toBeDefined();
|
||||
expect(logger).toBeInstanceOf(Logger);
|
||||
});
|
||||
|
||||
test("should log info messages", () => {
|
||||
logger.info("Test message");
|
||||
expect(consoleSpy).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should create custom logger with disabled levels", () => {
|
||||
const customLogger = new Logger({ debug: false });
|
||||
customLogger.debug("This should not log");
|
||||
expect(consoleSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should use emoji prefixes for convenience methods", () => {
|
||||
logger.step("Test step");
|
||||
logger.ai("Test AI");
|
||||
logger.location("Test location");
|
||||
expect(consoleSpy).toHaveBeenCalledTimes(3);
|
||||
});
|
||||
|
||||
test("should configure levels at runtime", () => {
|
||||
const customLogger = new Logger();
|
||||
customLogger.setLevel("debug", false);
|
||||
customLogger.debug("This should not log");
|
||||
expect(consoleSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should go silent when requested", () => {
|
||||
const customLogger = new Logger();
|
||||
customLogger.silent();
|
||||
customLogger.info("This should not log");
|
||||
customLogger.error("This should not log");
|
||||
expect(consoleSpy).not.toHaveBeenCalled();
|
||||
expect(consoleErrorSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// Additional test cases for comprehensive coverage
|
||||
|
||||
test("should log warning messages", () => {
|
||||
logger.warning("Test warning");
|
||||
expect(consoleWarnSpy).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should log error messages", () => {
|
||||
logger.error("Test error");
|
||||
expect(consoleErrorSpy).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should log success messages", () => {
|
||||
logger.success("Test success");
|
||||
expect(consoleSpy).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should log debug messages", () => {
|
||||
logger.debug("Test debug");
|
||||
expect(consoleSpy).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should respect disabled warning level", () => {
|
||||
const customLogger = new Logger({ warning: false });
|
||||
customLogger.warning("This should not log");
|
||||
expect(consoleWarnSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should respect disabled error level", () => {
|
||||
const customLogger = new Logger({ error: false });
|
||||
customLogger.error("This should not log");
|
||||
expect(consoleErrorSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should respect disabled success level", () => {
|
||||
const customLogger = new Logger({ success: false });
|
||||
customLogger.success("This should not log");
|
||||
expect(consoleSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should respect disabled info level", () => {
|
||||
const customLogger = new Logger({ info: false });
|
||||
customLogger.info("This should not log");
|
||||
expect(consoleSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should test all convenience methods", () => {
|
||||
logger.step("Test step");
|
||||
logger.search("Test search");
|
||||
logger.ai("Test AI");
|
||||
logger.location("Test location");
|
||||
logger.file("Test file");
|
||||
expect(consoleSpy).toHaveBeenCalledTimes(5);
|
||||
});
|
||||
|
||||
test("should enable all levels with verbose method", () => {
|
||||
const customLogger = new Logger({ debug: false, info: false });
|
||||
customLogger.verbose();
|
||||
customLogger.debug("This should log");
|
||||
customLogger.info("This should log");
|
||||
expect(consoleSpy).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
test("should handle setLevel with invalid level gracefully", () => {
|
||||
const customLogger = new Logger();
|
||||
expect(() => {
|
||||
customLogger.setLevel("invalid", false);
|
||||
}).not.toThrow();
|
||||
});
|
||||
|
||||
test("should format messages with timestamps", () => {
|
||||
logger.info("Test message");
|
||||
const loggedMessage = consoleSpy.mock.calls[0][0];
|
||||
expect(loggedMessage).toMatch(/\[\d{1,2}:\d{2}:\d{2}\]/);
|
||||
});
|
||||
|
||||
test("should include level in formatted messages", () => {
|
||||
logger.info("Test message");
|
||||
const loggedMessage = consoleSpy.mock.calls[0][0];
|
||||
expect(loggedMessage).toContain("[INFO]");
|
||||
});
|
||||
|
||||
test("should disable colors when colors option is false", () => {
|
||||
const customLogger = new Logger({ colors: false });
|
||||
customLogger.info("Test message");
|
||||
const loggedMessage = consoleSpy.mock.calls[0][0];
|
||||
// Should not contain ANSI color codes
|
||||
expect(loggedMessage).not.toMatch(/\u001b\[/);
|
||||
});
|
||||
|
||||
test("should enable colors by default", () => {
|
||||
logger.info("Test message");
|
||||
const loggedMessage = consoleSpy.mock.calls[0][0];
|
||||
// Should contain ANSI color codes
|
||||
expect(loggedMessage).toMatch(/\u001b\[/);
|
||||
});
|
||||
|
||||
test("should handle multiple level configurations", () => {
|
||||
const customLogger = new Logger({
|
||||
debug: false,
|
||||
info: true,
|
||||
warning: false,
|
||||
error: true,
|
||||
success: false,
|
||||
});
|
||||
|
||||
customLogger.debug("Should not log");
|
||||
customLogger.info("Should log");
|
||||
customLogger.warning("Should not log");
|
||||
customLogger.error("Should log");
|
||||
customLogger.success("Should not log");
|
||||
|
||||
expect(consoleSpy).toHaveBeenCalledTimes(1);
|
||||
expect(consoleErrorSpy).toHaveBeenCalledTimes(1);
|
||||
expect(consoleWarnSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should handle empty or undefined messages", () => {
|
||||
expect(() => {
|
||||
logger.info("");
|
||||
logger.info(undefined);
|
||||
logger.info(null);
|
||||
}).not.toThrow();
|
||||
});
|
||||
|
||||
test("should handle complex message objects", () => {
|
||||
const testObj = { key: "value", nested: { data: "test" } };
|
||||
expect(() => {
|
||||
logger.info(testObj);
|
||||
}).not.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
@ -1,94 +1,94 @@
|
||||
/**
|
||||
* Authentication Manager
|
||||
*
|
||||
* Handles login/authentication for different sites
|
||||
*/
|
||||
|
||||
class AuthManager {
|
||||
constructor(coreParser) {
|
||||
this.coreParser = coreParser;
|
||||
}
|
||||
|
||||
/**
|
||||
* Authenticate to a specific site
|
||||
*/
|
||||
async authenticate(site, credentials, pageId = "default") {
|
||||
const strategies = {
|
||||
linkedin: this.authenticateLinkedIn.bind(this),
|
||||
// Add more auth strategies as needed
|
||||
};
|
||||
|
||||
const strategy = strategies[site.toLowerCase()];
|
||||
if (!strategy) {
|
||||
throw new Error(`No authentication strategy found for site: ${site}`);
|
||||
}
|
||||
|
||||
return await strategy(credentials, pageId);
|
||||
}
|
||||
|
||||
/**
|
||||
* LinkedIn authentication strategy
|
||||
*/
|
||||
async authenticateLinkedIn(credentials, pageId = "default") {
|
||||
const { username, password } = credentials;
|
||||
if (!username || !password) {
|
||||
throw new Error("LinkedIn authentication requires username and password");
|
||||
}
|
||||
|
||||
const page = this.coreParser.getPage(pageId);
|
||||
if (!page) {
|
||||
throw new Error(`Page with ID '${pageId}' not found`);
|
||||
}
|
||||
|
||||
try {
|
||||
// Navigate to LinkedIn login
|
||||
await this.coreParser.navigateTo("https://www.linkedin.com/login", {
|
||||
pageId,
|
||||
});
|
||||
|
||||
// Fill credentials
|
||||
await page.fill('input[name="session_key"]', username);
|
||||
await page.fill('input[name="session_password"]', password);
|
||||
|
||||
// Submit form
|
||||
await page.click('button[type="submit"]');
|
||||
|
||||
// Wait for successful login (profile image appears)
|
||||
await page.waitForSelector("img.global-nav__me-photo", {
|
||||
timeout: 15000,
|
||||
});
|
||||
|
||||
return true;
|
||||
} catch (error) {
|
||||
throw new Error(`LinkedIn authentication failed: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if currently authenticated to a site
|
||||
*/
|
||||
async isAuthenticated(site, pageId = "default") {
|
||||
const page = this.coreParser.getPage(pageId);
|
||||
if (!page) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const checkers = {
|
||||
linkedin: async () => {
|
||||
try {
|
||||
await page.waitForSelector("img.global-nav__me-photo", {
|
||||
timeout: 2000,
|
||||
});
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
const checker = checkers[site.toLowerCase()];
|
||||
return checker ? await checker() : false;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = AuthManager;
|
||||
/**
|
||||
* Authentication Manager
|
||||
*
|
||||
* Handles login/authentication for different sites
|
||||
*/
|
||||
|
||||
class AuthManager {
|
||||
constructor(coreParser) {
|
||||
this.coreParser = coreParser;
|
||||
}
|
||||
|
||||
/**
|
||||
* Authenticate to a specific site
|
||||
*/
|
||||
async authenticate(site, credentials, pageId = "default") {
|
||||
const strategies = {
|
||||
linkedin: this.authenticateLinkedIn.bind(this),
|
||||
// Add more auth strategies as needed
|
||||
};
|
||||
|
||||
const strategy = strategies[site.toLowerCase()];
|
||||
if (!strategy) {
|
||||
throw new Error(`No authentication strategy found for site: ${site}`);
|
||||
}
|
||||
|
||||
return await strategy(credentials, pageId);
|
||||
}
|
||||
|
||||
/**
|
||||
* LinkedIn authentication strategy
|
||||
*/
|
||||
async authenticateLinkedIn(credentials, pageId = "default") {
|
||||
const { username, password } = credentials;
|
||||
if (!username || !password) {
|
||||
throw new Error("LinkedIn authentication requires username and password");
|
||||
}
|
||||
|
||||
const page = this.coreParser.getPage(pageId);
|
||||
if (!page) {
|
||||
throw new Error(`Page with ID '${pageId}' not found`);
|
||||
}
|
||||
|
||||
try {
|
||||
// Navigate to LinkedIn login
|
||||
await this.coreParser.navigateTo("https://www.linkedin.com/login", {
|
||||
pageId,
|
||||
});
|
||||
|
||||
// Fill credentials
|
||||
await page.fill('input[name="session_key"]', username);
|
||||
await page.fill('input[name="session_password"]', password);
|
||||
|
||||
// Submit form
|
||||
await page.click('button[type="submit"]');
|
||||
|
||||
// Wait for successful login (profile image appears)
|
||||
await page.waitForSelector("img.global-nav__me-photo", {
|
||||
timeout: 15000,
|
||||
});
|
||||
|
||||
return true;
|
||||
} catch (error) {
|
||||
throw new Error(`LinkedIn authentication failed: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if currently authenticated to a site
|
||||
*/
|
||||
async isAuthenticated(site, pageId = "default") {
|
||||
const page = this.coreParser.getPage(pageId);
|
||||
if (!page) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const checkers = {
|
||||
linkedin: async () => {
|
||||
try {
|
||||
await page.waitForSelector("img.global-nav__me-photo", {
|
||||
timeout: 2000,
|
||||
});
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
const checker = checkers[site.toLowerCase()];
|
||||
return checker ? await checker() : false;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = AuthManager;
|
||||
|
||||
63
core-parser/index.js
Normal file
63
core-parser/index.js
Normal file
@ -0,0 +1,63 @@
|
||||
const playwright = require('playwright');
|
||||
const AuthManager = require('./auth-manager');
|
||||
const NavigationManager = require('./navigation');
|
||||
|
||||
class CoreParser {
|
||||
constructor(config = {}) {
|
||||
this.config = {
|
||||
headless: true,
|
||||
timeout: 60000, // Increased default timeout
|
||||
...config
|
||||
};
|
||||
this.browser = null;
|
||||
this.context = null;
|
||||
this.pages = {};
|
||||
this.authManager = new AuthManager(this);
|
||||
this.navigationManager = new NavigationManager(this);
|
||||
}
|
||||
|
||||
async init() {
|
||||
this.browser = await playwright.chromium.launch({
|
||||
headless: this.config.headless
|
||||
});
|
||||
this.context = await this.browser.newContext();
|
||||
}
|
||||
|
||||
async createPage(id) {
|
||||
if (!this.browser) await this.init();
|
||||
const page = await this.context.newPage();
|
||||
this.pages[id] = page;
|
||||
return page;
|
||||
}
|
||||
|
||||
getPage(id) {
|
||||
return this.pages[id];
|
||||
}
|
||||
|
||||
async authenticate(site, credentials, pageId) {
|
||||
return this.authManager.authenticate(site, credentials, pageId);
|
||||
}
|
||||
|
||||
async navigateTo(url, options = {}) {
|
||||
const {
|
||||
pageId = "default",
|
||||
waitUntil = "networkidle", // Changed default to networkidle
|
||||
retries = 1,
|
||||
retryDelay = 2000,
|
||||
timeout = this.config.timeout,
|
||||
} = options;
|
||||
|
||||
return this.navigationManager.navigateTo(url, options);
|
||||
}
|
||||
|
||||
async cleanup() {
|
||||
if (this.browser) {
|
||||
await this.browser.close();
|
||||
this.browser = null;
|
||||
this.context = null;
|
||||
this.pages = {};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = CoreParser;
|
||||
@ -1,131 +1,131 @@
|
||||
/**
|
||||
* Navigation Manager
|
||||
*
|
||||
* Handles page navigation with error handling, retries, and logging
|
||||
*/
|
||||
|
||||
class NavigationManager {
|
||||
constructor(coreParser) {
|
||||
this.coreParser = coreParser;
|
||||
}
|
||||
|
||||
/**
|
||||
* Navigate to URL with comprehensive error handling
|
||||
*/
|
||||
async navigateTo(url, options = {}) {
|
||||
const {
|
||||
pageId = "default",
|
||||
waitUntil = "domcontentloaded",
|
||||
retries = 1,
|
||||
retryDelay = 2000,
|
||||
timeout = this.coreParser.config.timeout,
|
||||
} = options;
|
||||
|
||||
const page = this.coreParser.getPage(pageId);
|
||||
if (!page) {
|
||||
throw new Error(`Page with ID '${pageId}' not found`);
|
||||
}
|
||||
|
||||
let lastError;
|
||||
|
||||
for (let attempt = 0; attempt <= retries; attempt++) {
|
||||
try {
|
||||
console.log(
|
||||
`🌐 Navigating to: ${url} (attempt ${attempt + 1}/${retries + 1})`
|
||||
);
|
||||
|
||||
await page.goto(url, {
|
||||
waitUntil,
|
||||
timeout,
|
||||
});
|
||||
|
||||
console.log(`✅ Navigation successful: ${url}`);
|
||||
return true;
|
||||
} catch (error) {
|
||||
lastError = error;
|
||||
console.warn(
|
||||
`⚠️ Navigation attempt ${attempt + 1} failed: ${error.message}`
|
||||
);
|
||||
|
||||
if (attempt < retries) {
|
||||
console.log(`🔄 Retrying in ${retryDelay}ms...`);
|
||||
await this.delay(retryDelay);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// All attempts failed
|
||||
const errorMessage = `Navigation failed after ${retries + 1} attempts: ${
|
||||
lastError.message
|
||||
}`;
|
||||
console.error(`❌ ${errorMessage}`);
|
||||
throw new Error(errorMessage);
|
||||
}
|
||||
|
||||
/**
|
||||
* Navigate and wait for specific selector
|
||||
*/
|
||||
async navigateAndWaitFor(url, selector, options = {}) {
|
||||
await this.navigateTo(url, options);
|
||||
|
||||
const { pageId = "default", timeout = this.coreParser.config.timeout } =
|
||||
options;
|
||||
const page = this.coreParser.getPage(pageId);
|
||||
|
||||
try {
|
||||
await page.waitForSelector(selector, { timeout });
|
||||
console.log(`✅ Selector found: ${selector}`);
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.warn(`⚠️ Selector not found: ${selector} - ${error.message}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if current page has specific content
|
||||
*/
|
||||
async hasContent(content, options = {}) {
|
||||
const { pageId = "default", timeout = 5000 } = options;
|
||||
const page = this.coreParser.getPage(pageId);
|
||||
|
||||
try {
|
||||
await page.waitForFunction(
|
||||
(text) => document.body.innerText.includes(text),
|
||||
content,
|
||||
{ timeout }
|
||||
);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility delay function
|
||||
*/
|
||||
async delay(ms) {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current page URL
|
||||
*/
|
||||
getCurrentUrl(pageId = "default") {
|
||||
const page = this.coreParser.getPage(pageId);
|
||||
return page ? page.url() : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Take screenshot for debugging
|
||||
*/
|
||||
async screenshot(filepath, pageId = "default") {
|
||||
const page = this.coreParser.getPage(pageId);
|
||||
if (page) {
|
||||
await page.screenshot({ path: filepath });
|
||||
console.log(`📸 Screenshot saved: ${filepath}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = NavigationManager;
|
||||
/**
|
||||
* Navigation Manager
|
||||
*
|
||||
* Handles page navigation with error handling, retries, and logging
|
||||
*/
|
||||
|
||||
class NavigationManager {
|
||||
constructor(coreParser) {
|
||||
this.coreParser = coreParser;
|
||||
}
|
||||
|
||||
/**
|
||||
* Navigate to URL with comprehensive error handling
|
||||
*/
|
||||
async navigateTo(url, options = {}) {
|
||||
const {
|
||||
pageId = "default",
|
||||
waitUntil = "domcontentloaded",
|
||||
retries = 1,
|
||||
retryDelay = 2000,
|
||||
timeout = this.coreParser.config.timeout,
|
||||
} = options;
|
||||
|
||||
const page = this.coreParser.getPage(pageId);
|
||||
if (!page) {
|
||||
throw new Error(`Page with ID '${pageId}' not found`);
|
||||
}
|
||||
|
||||
let lastError;
|
||||
|
||||
for (let attempt = 0; attempt <= retries; attempt++) {
|
||||
try {
|
||||
console.log(
|
||||
`🌐 Navigating to: ${url} (attempt ${attempt + 1}/${retries + 1})`
|
||||
);
|
||||
|
||||
await page.goto(url, {
|
||||
waitUntil,
|
||||
timeout,
|
||||
});
|
||||
|
||||
console.log(`✅ Navigation successful: ${url}`);
|
||||
return true;
|
||||
} catch (error) {
|
||||
lastError = error;
|
||||
console.warn(
|
||||
`⚠️ Navigation attempt ${attempt + 1} failed: ${error.message}`
|
||||
);
|
||||
|
||||
if (attempt < retries) {
|
||||
console.log(`🔄 Retrying in ${retryDelay}ms...`);
|
||||
await this.delay(retryDelay);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// All attempts failed
|
||||
const errorMessage = `Navigation failed after ${retries + 1} attempts: ${
|
||||
lastError.message
|
||||
}`;
|
||||
console.error(`❌ ${errorMessage}`);
|
||||
throw new Error(errorMessage);
|
||||
}
|
||||
|
||||
/**
|
||||
* Navigate and wait for specific selector
|
||||
*/
|
||||
async navigateAndWaitFor(url, selector, options = {}) {
|
||||
await this.navigateTo(url, options);
|
||||
|
||||
const { pageId = "default", timeout = this.coreParser.config.timeout } =
|
||||
options;
|
||||
const page = this.coreParser.getPage(pageId);
|
||||
|
||||
try {
|
||||
await page.waitForSelector(selector, { timeout });
|
||||
console.log(`✅ Selector found: ${selector}`);
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.warn(`⚠️ Selector not found: ${selector} - ${error.message}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if current page has specific content
|
||||
*/
|
||||
async hasContent(content, options = {}) {
|
||||
const { pageId = "default", timeout = 5000 } = options;
|
||||
const page = this.coreParser.getPage(pageId);
|
||||
|
||||
try {
|
||||
await page.waitForFunction(
|
||||
(text) => document.body.innerText.includes(text),
|
||||
content,
|
||||
{ timeout }
|
||||
);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility delay function
|
||||
*/
|
||||
async delay(ms) {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current page URL
|
||||
*/
|
||||
getCurrentUrl(pageId = "default") {
|
||||
const page = this.coreParser.getPage(pageId);
|
||||
return page ? page.url() : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Take screenshot for debugging
|
||||
*/
|
||||
async screenshot(filepath, pageId = "default") {
|
||||
const page = this.coreParser.getPage(pageId);
|
||||
if (page) {
|
||||
await page.screenshot({ path: filepath });
|
||||
console.log(`📸 Screenshot saved: ${filepath}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = NavigationManager;
|
||||
|
||||
@ -1,27 +1,7 @@
|
||||
{
|
||||
"name": "core-parser",
|
||||
"version": "1.0.0",
|
||||
"description": "Core browser automation and parsing engine for all parsers",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "jest",
|
||||
"install:browsers": "npx playwright install chromium"
|
||||
},
|
||||
"keywords": [
|
||||
"parser",
|
||||
"playwright",
|
||||
"browser",
|
||||
"automation",
|
||||
"core"
|
||||
],
|
||||
"author": "Job Market Intelligence Team",
|
||||
"license": "ISC",
|
||||
"type": "commonjs",
|
||||
"dependencies": {
|
||||
"playwright": "^1.53.2",
|
||||
"dotenv": "^17.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"jest": "^29.7.0"
|
||||
}
|
||||
}
|
||||
{
|
||||
"name": "core-parser",
|
||||
"version": "1.0.0",
|
||||
"main": "index.js",
|
||||
"description": "Core parser utilities for browser management",
|
||||
"dependencies": {}
|
||||
}
|
||||
|
||||
@ -1,497 +1,497 @@
|
||||
# Job Search Parser - Job Market Intelligence
|
||||
|
||||
Specialized parser for job market intelligence, tracking job postings, market trends, and competitive analysis. Focuses on tech roles and industry insights.
|
||||
|
||||
## 🎯 Purpose
|
||||
|
||||
The Job Search Parser is designed to:
|
||||
|
||||
- **Track Job Market Trends**: Monitor demand for specific roles and skills
|
||||
- **Competitive Intelligence**: Analyze salary ranges and requirements
|
||||
- **Industry Insights**: Track hiring patterns across different sectors
|
||||
- **Skill Gap Analysis**: Identify in-demand technologies and frameworks
|
||||
- **Market Demand Forecasting**: Predict job market trends
|
||||
|
||||
## 🚀 Features
|
||||
|
||||
### Core Functionality
|
||||
|
||||
- **Multi-Source Aggregation**: Collect job data from multiple platforms
|
||||
- **Role-Specific Tracking**: Focus on tech roles and emerging positions
|
||||
- **Skill Analysis**: Extract and categorize required skills
|
||||
- **Salary Intelligence**: Track compensation ranges and trends
|
||||
- **Company Intelligence**: Monitor hiring companies and patterns
|
||||
|
||||
### Advanced Features
|
||||
|
||||
- **Market Trend Analysis**: Identify growing and declining job categories
|
||||
- **Geographic Distribution**: Track job distribution by location
|
||||
- **Experience Level Analysis**: Entry, mid, senior level tracking
|
||||
- **Remote Work Trends**: Monitor remote/hybrid work patterns
|
||||
- **Technology Stack Tracking**: Framework and tool popularity
|
||||
|
||||
## 🌐 Supported Job Sites
|
||||
|
||||
### ✅ Implemented Parsers
|
||||
|
||||
#### SkipTheDrive Parser
|
||||
|
||||
Remote job board specializing in work-from-home positions.
|
||||
|
||||
**Features:**
|
||||
|
||||
- Keyword-based job search with relevance sorting
|
||||
- Job type filtering (full-time, part-time, contract)
|
||||
- Multi-page result parsing with pagination
|
||||
- Featured/sponsored job identification
|
||||
- AI-powered job relevance analysis
|
||||
- Automatic duplicate detection
|
||||
|
||||
**Usage:**
|
||||
|
||||
```bash
|
||||
# Parse SkipTheDrive for QA automation jobs
|
||||
node index.js --sites=skipthedrive --keywords="automation qa,qa engineer"
|
||||
|
||||
# Filter by job type
|
||||
JOB_TYPES="full time,contract" node index.js --sites=skipthedrive
|
||||
|
||||
# Run demo with limited results
|
||||
node index.js --sites=skipthedrive --demo
|
||||
```
|
||||
|
||||
### 🚧 Planned Parsers
|
||||
|
||||
- **Indeed**: Comprehensive job aggregator
|
||||
- **Glassdoor**: Jobs with company reviews and salary data
|
||||
- **Monster**: Traditional job board
|
||||
- **SimplyHired**: Job aggregator with salary estimates
|
||||
- **LinkedIn Jobs**: Professional network job postings
|
||||
- **AngelList**: Startup and tech jobs
|
||||
- **Remote.co**: Dedicated remote work jobs
|
||||
- **FlexJobs**: Flexible and remote positions
|
||||
|
||||
## 📦 Installation
|
||||
|
||||
```bash
|
||||
# Install dependencies
|
||||
npm install
|
||||
|
||||
# Run tests
|
||||
npm test
|
||||
|
||||
# Run demo
|
||||
node demo.js
|
||||
```
|
||||
|
||||
## 🔧 Configuration
|
||||
|
||||
### Environment Variables
|
||||
|
||||
Create a `.env` file in the parser directory:
|
||||
|
||||
```env
|
||||
# Job Search Configuration
|
||||
SEARCH_SOURCES=linkedin,indeed,glassdoor
|
||||
TARGET_ROLES=software engineer,data scientist,product manager
|
||||
LOCATION_FILTER=Toronto,Vancouver,Calgary
|
||||
EXPERIENCE_LEVELS=entry,mid,senior
|
||||
REMOTE_PREFERENCE=remote,hybrid,onsite
|
||||
|
||||
# Analysis Configuration
|
||||
ENABLE_SALARY_ANALYSIS=true
|
||||
ENABLE_SKILL_ANALYSIS=true
|
||||
ENABLE_TREND_ANALYSIS=true
|
||||
MIN_SALARY=50000
|
||||
MAX_SALARY=200000
|
||||
|
||||
# Output Configuration
|
||||
OUTPUT_FORMAT=json,csv
|
||||
SAVE_RAW_DATA=true
|
||||
ANALYSIS_INTERVAL=daily
|
||||
```
|
||||
|
||||
### Command Line Options
|
||||
|
||||
```bash
|
||||
# Basic usage
|
||||
node index.js
|
||||
|
||||
# Specific roles
|
||||
node index.js --roles="frontend developer,backend developer"
|
||||
|
||||
# Geographic focus
|
||||
node index.js --locations="Toronto,Vancouver"
|
||||
|
||||
# Experience level
|
||||
node index.js --experience="senior"
|
||||
|
||||
# Output format
|
||||
node index.js --output=results/job-market-analysis.json
|
||||
```
|
||||
|
||||
**Available Options:**
|
||||
|
||||
- `--roles="role1,role2"`: Target job roles
|
||||
- `--locations="city1,city2"`: Geographic focus
|
||||
- `--experience="entry|mid|senior"`: Experience level
|
||||
- `--remote="remote|hybrid|onsite"`: Remote work preference
|
||||
- `--salary-min=NUMBER`: Minimum salary filter
|
||||
- `--salary-max=NUMBER`: Maximum salary filter
|
||||
- `--output=FILE`: Output filename
|
||||
- `--format=json|csv`: Output format
|
||||
- `--trends`: Enable trend analysis
|
||||
- `--skills`: Enable skill analysis
|
||||
|
||||
## 📊 Keywords
|
||||
|
||||
### Role-Specific Keywords
|
||||
|
||||
Place keyword CSV files in the `keywords/` directory:
|
||||
|
||||
```
|
||||
job-search-parser/
|
||||
├── keywords/
|
||||
│ ├── job-search-keywords.csv # General job search terms
|
||||
│ ├── tech-roles.csv # Technology roles
|
||||
│ ├── data-roles.csv # Data science roles
|
||||
│ ├── management-roles.csv # Management positions
|
||||
│ └── emerging-roles.csv # Emerging job categories
|
||||
└── index.js
|
||||
```
|
||||
|
||||
### Tech Roles Keywords
|
||||
|
||||
```csv
|
||||
keyword
|
||||
software engineer
|
||||
frontend developer
|
||||
backend developer
|
||||
full stack developer
|
||||
data scientist
|
||||
machine learning engineer
|
||||
devops engineer
|
||||
site reliability engineer
|
||||
cloud architect
|
||||
security engineer
|
||||
mobile developer
|
||||
iOS developer
|
||||
Android developer
|
||||
react developer
|
||||
vue developer
|
||||
angular developer
|
||||
node.js developer
|
||||
python developer
|
||||
java developer
|
||||
golang developer
|
||||
rust developer
|
||||
data engineer
|
||||
analytics engineer
|
||||
```
|
||||
|
||||
### Data Science Keywords
|
||||
|
||||
```csv
|
||||
keyword
|
||||
data scientist
|
||||
machine learning engineer
|
||||
data analyst
|
||||
business analyst
|
||||
data engineer
|
||||
analytics engineer
|
||||
ML engineer
|
||||
AI engineer
|
||||
statistician
|
||||
quantitative analyst
|
||||
research scientist
|
||||
data architect
|
||||
BI developer
|
||||
ETL developer
|
||||
```
|
||||
|
||||
## 📈 Usage Examples
|
||||
|
||||
### Basic Job Search
|
||||
|
||||
```bash
|
||||
# Standard job market analysis
|
||||
node index.js
|
||||
|
||||
# Specific tech roles
|
||||
node index.js --roles="software engineer,data scientist"
|
||||
|
||||
# Geographic focus
|
||||
node index.js --locations="Toronto,Vancouver,Calgary"
|
||||
```
|
||||
|
||||
### Advanced Analysis
|
||||
|
||||
```bash
|
||||
# Senior level positions
|
||||
node index.js --experience="senior" --salary-min=100000
|
||||
|
||||
# Remote work opportunities
|
||||
node index.js --remote="remote" --roles="frontend developer"
|
||||
|
||||
# Trend analysis
|
||||
node index.js --trends --skills --output=results/trends.json
|
||||
```
|
||||
|
||||
### Market Intelligence
|
||||
|
||||
```bash
|
||||
# Salary analysis
|
||||
node index.js --salary-min=80000 --salary-max=150000
|
||||
|
||||
# Skill gap analysis
|
||||
node index.js --skills --roles="machine learning engineer"
|
||||
|
||||
# Competitive intelligence
|
||||
node index.js --companies="Google,Microsoft,Amazon"
|
||||
```
|
||||
|
||||
## 📊 Output Format
|
||||
|
||||
### JSON Structure
|
||||
|
||||
```json
|
||||
{
|
||||
"metadata": {
|
||||
"timestamp": "2024-01-15T10:30:00Z",
|
||||
"search_parameters": {
|
||||
"roles": ["software engineer", "data scientist"],
|
||||
"locations": ["Toronto", "Vancouver"],
|
||||
"experience_levels": ["mid", "senior"],
|
||||
"remote_preference": ["remote", "hybrid"]
|
||||
},
|
||||
"total_jobs_found": 1250,
|
||||
"analysis_duration_seconds": 45
|
||||
},
|
||||
"market_overview": {
|
||||
"total_jobs": 1250,
|
||||
"average_salary": 95000,
|
||||
"salary_range": {
|
||||
"min": 65000,
|
||||
"max": 180000,
|
||||
"median": 92000
|
||||
},
|
||||
"remote_distribution": {
|
||||
"remote": 45,
|
||||
"hybrid": 35,
|
||||
"onsite": 20
|
||||
},
|
||||
"experience_distribution": {
|
||||
"entry": 15,
|
||||
"mid": 45,
|
||||
"senior": 40
|
||||
}
|
||||
},
|
||||
"trends": {
|
||||
"growing_skills": [
|
||||
{ "skill": "React", "growth_rate": 25 },
|
||||
{ "skill": "Python", "growth_rate": 18 },
|
||||
{ "skill": "AWS", "growth_rate": 22 }
|
||||
],
|
||||
"declining_skills": [
|
||||
{ "skill": "jQuery", "growth_rate": -12 },
|
||||
{ "skill": "PHP", "growth_rate": -8 }
|
||||
],
|
||||
"emerging_roles": ["AI Engineer", "DevSecOps Engineer", "Data Engineer"]
|
||||
},
|
||||
"jobs": [
|
||||
{
|
||||
"id": "job_1",
|
||||
"title": "Senior Software Engineer",
|
||||
"company": "TechCorp",
|
||||
"location": "Toronto, Ontario",
|
||||
"remote_type": "hybrid",
|
||||
"salary": {
|
||||
"min": 100000,
|
||||
"max": 140000,
|
||||
"currency": "CAD"
|
||||
},
|
||||
"required_skills": ["React", "Node.js", "TypeScript", "AWS"],
|
||||
"preferred_skills": ["GraphQL", "Docker", "Kubernetes"],
|
||||
"experience_level": "senior",
|
||||
"job_url": "https://example.com/job/1",
|
||||
"posted_date": "2024-01-10T09:00:00Z",
|
||||
"scraped_at": "2024-01-15T10:30:00Z"
|
||||
}
|
||||
],
|
||||
"analysis": {
|
||||
"skill_demand": {
|
||||
"React": { "count": 45, "avg_salary": 98000 },
|
||||
"Python": { "count": 38, "avg_salary": 102000 },
|
||||
"AWS": { "count": 32, "avg_salary": 105000 }
|
||||
},
|
||||
"company_insights": {
|
||||
"top_hirers": [
|
||||
{ "company": "TechCorp", "jobs": 25 },
|
||||
{ "company": "StartupXYZ", "jobs": 18 }
|
||||
],
|
||||
"salary_leaders": [
|
||||
{ "company": "BigTech", "avg_salary": 120000 },
|
||||
{ "company": "FinTech", "avg_salary": 115000 }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### CSV Output
|
||||
|
||||
The parser can also generate CSV files for easy analysis:
|
||||
|
||||
```csv
|
||||
job_id,title,company,location,remote_type,salary_min,salary_max,required_skills,experience_level,posted_date
|
||||
job_1,Senior Software Engineer,TechCorp,Toronto,hybrid,100000,140000,"React,Node.js,TypeScript",senior,2024-01-10
|
||||
job_2,Data Scientist,DataCorp,Vancouver,remote,90000,130000,"Python,SQL,ML",mid,2024-01-09
|
||||
```
|
||||
|
||||
## 🔒 Security & Best Practices
|
||||
|
||||
### Data Privacy
|
||||
|
||||
- Respect job site terms of service
|
||||
- Implement appropriate rate limiting
|
||||
- Store data securely and responsibly
|
||||
- Anonymize sensitive information
|
||||
|
||||
### Rate Limiting
|
||||
|
||||
- Implement delays between requests
|
||||
- Respect API rate limits
|
||||
- Use multiple data sources
|
||||
- Monitor for blocking/detection
|
||||
|
||||
### Legal Compliance
|
||||
|
||||
- Educational and research purposes only
|
||||
- Respect website terms of service
|
||||
- Implement data retention policies
|
||||
- Monitor for legal changes
|
||||
|
||||
## 🧪 Testing
|
||||
|
||||
### Run Tests
|
||||
|
||||
```bash
|
||||
# All tests
|
||||
npm test
|
||||
|
||||
# Specific test suites
|
||||
npm test -- --testNamePattern="JobSearch"
|
||||
npm test -- --testNamePattern="Analysis"
|
||||
npm test -- --testNamePattern="Trends"
|
||||
```
|
||||
|
||||
### Test Coverage
|
||||
|
||||
```bash
|
||||
npm run test:coverage
|
||||
```
|
||||
|
||||
## 🚀 Performance Optimization
|
||||
|
||||
### Recommended Settings
|
||||
|
||||
#### Fast Analysis
|
||||
|
||||
```bash
|
||||
node index.js --roles="software engineer" --locations="Toronto"
|
||||
```
|
||||
|
||||
#### Comprehensive Analysis
|
||||
|
||||
```bash
|
||||
node index.js --trends --skills --experience="all"
|
||||
```
|
||||
|
||||
#### Focused Intelligence
|
||||
|
||||
```bash
|
||||
node index.js --salary-min=80000 --remote="remote" --trends
|
||||
```
|
||||
|
||||
### Performance Tips
|
||||
|
||||
- Use specific role filters to reduce data volume
|
||||
- Implement caching for repeated searches
|
||||
- Use parallel processing for multiple sources
|
||||
- Optimize data storage and retrieval
|
||||
|
||||
## 🔧 Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
#### Rate Limiting
|
||||
|
||||
```bash
|
||||
# Reduce request frequency
|
||||
export REQUEST_DELAY=2000
|
||||
node index.js
|
||||
```
|
||||
|
||||
#### Data Source Issues
|
||||
|
||||
```bash
|
||||
# Use specific sources
|
||||
node index.js --sources="linkedin,indeed"
|
||||
|
||||
# Check source availability
|
||||
node index.js --test-sources
|
||||
```
|
||||
|
||||
#### Output Issues
|
||||
|
||||
```bash
|
||||
# Check output directory
|
||||
mkdir -p results
|
||||
node index.js --output=results/analysis.json
|
||||
|
||||
# Verify file permissions
|
||||
chmod 755 results/
|
||||
```
|
||||
|
||||
## 📈 Monitoring & Analytics
|
||||
|
||||
### Key Metrics
|
||||
|
||||
- **Job Volume**: Total jobs found per search
|
||||
- **Salary Trends**: Average and median salary changes
|
||||
- **Skill Demand**: Most requested skills
|
||||
- **Remote Adoption**: Remote work trend analysis
|
||||
- **Market Velocity**: Job posting frequency
|
||||
|
||||
### Dashboard Integration
|
||||
|
||||
- Real-time market monitoring
|
||||
- Trend visualization
|
||||
- Salary benchmarking
|
||||
- Skill gap analysis
|
||||
- Competitive intelligence
|
||||
|
||||
## 🤝 Contributing
|
||||
|
||||
### Development Setup
|
||||
|
||||
1. Fork the repository
|
||||
2. Create feature branch
|
||||
3. Add tests for new functionality
|
||||
4. Ensure all tests pass
|
||||
5. Submit pull request
|
||||
|
||||
### Code Standards
|
||||
|
||||
- Follow existing code style
|
||||
- Add JSDoc comments
|
||||
- Maintain test coverage
|
||||
- Update documentation
|
||||
|
||||
## 📄 License
|
||||
|
||||
This parser is part of the LinkedOut platform and follows the same licensing terms.
|
||||
|
||||
---
|
||||
|
||||
**Note**: This tool is designed for educational and research purposes. Always respect website terms of service and implement appropriate rate limiting and ethical usage practices.
|
||||
# Job Search Parser - Job Market Intelligence
|
||||
|
||||
Specialized parser for job market intelligence, tracking job postings, market trends, and competitive analysis. Focuses on tech roles and industry insights.
|
||||
|
||||
## 🎯 Purpose
|
||||
|
||||
The Job Search Parser is designed to:
|
||||
|
||||
- **Track Job Market Trends**: Monitor demand for specific roles and skills
|
||||
- **Competitive Intelligence**: Analyze salary ranges and requirements
|
||||
- **Industry Insights**: Track hiring patterns across different sectors
|
||||
- **Skill Gap Analysis**: Identify in-demand technologies and frameworks
|
||||
- **Market Demand Forecasting**: Predict job market trends
|
||||
|
||||
## 🚀 Features
|
||||
|
||||
### Core Functionality
|
||||
|
||||
- **Multi-Source Aggregation**: Collect job data from multiple platforms
|
||||
- **Role-Specific Tracking**: Focus on tech roles and emerging positions
|
||||
- **Skill Analysis**: Extract and categorize required skills
|
||||
- **Salary Intelligence**: Track compensation ranges and trends
|
||||
- **Company Intelligence**: Monitor hiring companies and patterns
|
||||
|
||||
### Advanced Features
|
||||
|
||||
- **Market Trend Analysis**: Identify growing and declining job categories
|
||||
- **Geographic Distribution**: Track job distribution by location
|
||||
- **Experience Level Analysis**: Entry, mid, senior level tracking
|
||||
- **Remote Work Trends**: Monitor remote/hybrid work patterns
|
||||
- **Technology Stack Tracking**: Framework and tool popularity
|
||||
|
||||
## 🌐 Supported Job Sites
|
||||
|
||||
### ✅ Implemented Parsers
|
||||
|
||||
#### SkipTheDrive Parser
|
||||
|
||||
Remote job board specializing in work-from-home positions.
|
||||
|
||||
**Features:**
|
||||
|
||||
- Keyword-based job search with relevance sorting
|
||||
- Job type filtering (full-time, part-time, contract)
|
||||
- Multi-page result parsing with pagination
|
||||
- Featured/sponsored job identification
|
||||
- AI-powered job relevance analysis
|
||||
- Automatic duplicate detection
|
||||
|
||||
**Usage:**
|
||||
|
||||
```bash
|
||||
# Parse SkipTheDrive for QA automation jobs
|
||||
node index.js --sites=skipthedrive --keywords="automation qa,qa engineer"
|
||||
|
||||
# Filter by job type
|
||||
JOB_TYPES="full time,contract" node index.js --sites=skipthedrive
|
||||
|
||||
# Run demo with limited results
|
||||
node index.js --sites=skipthedrive --demo
|
||||
```
|
||||
|
||||
### 🚧 Planned Parsers
|
||||
|
||||
- **Indeed**: Comprehensive job aggregator
|
||||
- **Glassdoor**: Jobs with company reviews and salary data
|
||||
- **Monster**: Traditional job board
|
||||
- **SimplyHired**: Job aggregator with salary estimates
|
||||
- **LinkedIn Jobs**: Professional network job postings
|
||||
- **AngelList**: Startup and tech jobs
|
||||
- **Remote.co**: Dedicated remote work jobs
|
||||
- **FlexJobs**: Flexible and remote positions
|
||||
|
||||
## 📦 Installation
|
||||
|
||||
```bash
|
||||
# Install dependencies
|
||||
npm install
|
||||
|
||||
# Run tests
|
||||
npm test
|
||||
|
||||
# Run demo
|
||||
node demo.js
|
||||
```
|
||||
|
||||
## 🔧 Configuration
|
||||
|
||||
### Environment Variables
|
||||
|
||||
Create a `.env` file in the parser directory:
|
||||
|
||||
```env
|
||||
# Job Search Configuration
|
||||
SEARCH_SOURCES=linkedin,indeed,glassdoor
|
||||
TARGET_ROLES=software engineer,data scientist,product manager
|
||||
LOCATION_FILTER=Toronto,Vancouver,Calgary
|
||||
EXPERIENCE_LEVELS=entry,mid,senior
|
||||
REMOTE_PREFERENCE=remote,hybrid,onsite
|
||||
|
||||
# Analysis Configuration
|
||||
ENABLE_SALARY_ANALYSIS=true
|
||||
ENABLE_SKILL_ANALYSIS=true
|
||||
ENABLE_TREND_ANALYSIS=true
|
||||
MIN_SALARY=50000
|
||||
MAX_SALARY=200000
|
||||
|
||||
# Output Configuration
|
||||
OUTPUT_FORMAT=json,csv
|
||||
SAVE_RAW_DATA=true
|
||||
ANALYSIS_INTERVAL=daily
|
||||
```
|
||||
|
||||
### Command Line Options
|
||||
|
||||
```bash
|
||||
# Basic usage
|
||||
node index.js
|
||||
|
||||
# Specific roles
|
||||
node index.js --roles="frontend developer,backend developer"
|
||||
|
||||
# Geographic focus
|
||||
node index.js --locations="Toronto,Vancouver"
|
||||
|
||||
# Experience level
|
||||
node index.js --experience="senior"
|
||||
|
||||
# Output format
|
||||
node index.js --output=results/job-market-analysis.json
|
||||
```
|
||||
|
||||
**Available Options:**
|
||||
|
||||
- `--roles="role1,role2"`: Target job roles
|
||||
- `--locations="city1,city2"`: Geographic focus
|
||||
- `--experience="entry|mid|senior"`: Experience level
|
||||
- `--remote="remote|hybrid|onsite"`: Remote work preference
|
||||
- `--salary-min=NUMBER`: Minimum salary filter
|
||||
- `--salary-max=NUMBER`: Maximum salary filter
|
||||
- `--output=FILE`: Output filename
|
||||
- `--format=json|csv`: Output format
|
||||
- `--trends`: Enable trend analysis
|
||||
- `--skills`: Enable skill analysis
|
||||
|
||||
## 📊 Keywords
|
||||
|
||||
### Role-Specific Keywords
|
||||
|
||||
Place keyword CSV files in the `keywords/` directory:
|
||||
|
||||
```
|
||||
job-search-parser/
|
||||
├── keywords/
|
||||
│ ├── job-search-keywords.csv # General job search terms
|
||||
│ ├── tech-roles.csv # Technology roles
|
||||
│ ├── data-roles.csv # Data science roles
|
||||
│ ├── management-roles.csv # Management positions
|
||||
│ └── emerging-roles.csv # Emerging job categories
|
||||
└── index.js
|
||||
```
|
||||
|
||||
### Tech Roles Keywords
|
||||
|
||||
```csv
|
||||
keyword
|
||||
software engineer
|
||||
frontend developer
|
||||
backend developer
|
||||
full stack developer
|
||||
data scientist
|
||||
machine learning engineer
|
||||
devops engineer
|
||||
site reliability engineer
|
||||
cloud architect
|
||||
security engineer
|
||||
mobile developer
|
||||
iOS developer
|
||||
Android developer
|
||||
react developer
|
||||
vue developer
|
||||
angular developer
|
||||
node.js developer
|
||||
python developer
|
||||
java developer
|
||||
golang developer
|
||||
rust developer
|
||||
data engineer
|
||||
analytics engineer
|
||||
```
|
||||
|
||||
### Data Science Keywords
|
||||
|
||||
```csv
|
||||
keyword
|
||||
data scientist
|
||||
machine learning engineer
|
||||
data analyst
|
||||
business analyst
|
||||
data engineer
|
||||
analytics engineer
|
||||
ML engineer
|
||||
AI engineer
|
||||
statistician
|
||||
quantitative analyst
|
||||
research scientist
|
||||
data architect
|
||||
BI developer
|
||||
ETL developer
|
||||
```
|
||||
|
||||
## 📈 Usage Examples
|
||||
|
||||
### Basic Job Search
|
||||
|
||||
```bash
|
||||
# Standard job market analysis
|
||||
node index.js
|
||||
|
||||
# Specific tech roles
|
||||
node index.js --roles="software engineer,data scientist"
|
||||
|
||||
# Geographic focus
|
||||
node index.js --locations="Toronto,Vancouver,Calgary"
|
||||
```
|
||||
|
||||
### Advanced Analysis
|
||||
|
||||
```bash
|
||||
# Senior level positions
|
||||
node index.js --experience="senior" --salary-min=100000
|
||||
|
||||
# Remote work opportunities
|
||||
node index.js --remote="remote" --roles="frontend developer"
|
||||
|
||||
# Trend analysis
|
||||
node index.js --trends --skills --output=results/trends.json
|
||||
```
|
||||
|
||||
### Market Intelligence
|
||||
|
||||
```bash
|
||||
# Salary analysis
|
||||
node index.js --salary-min=80000 --salary-max=150000
|
||||
|
||||
# Skill gap analysis
|
||||
node index.js --skills --roles="machine learning engineer"
|
||||
|
||||
# Competitive intelligence
|
||||
node index.js --companies="Google,Microsoft,Amazon"
|
||||
```
|
||||
|
||||
## 📊 Output Format
|
||||
|
||||
### JSON Structure
|
||||
|
||||
```json
|
||||
{
|
||||
"metadata": {
|
||||
"timestamp": "2024-01-15T10:30:00Z",
|
||||
"search_parameters": {
|
||||
"roles": ["software engineer", "data scientist"],
|
||||
"locations": ["Toronto", "Vancouver"],
|
||||
"experience_levels": ["mid", "senior"],
|
||||
"remote_preference": ["remote", "hybrid"]
|
||||
},
|
||||
"total_jobs_found": 1250,
|
||||
"analysis_duration_seconds": 45
|
||||
},
|
||||
"market_overview": {
|
||||
"total_jobs": 1250,
|
||||
"average_salary": 95000,
|
||||
"salary_range": {
|
||||
"min": 65000,
|
||||
"max": 180000,
|
||||
"median": 92000
|
||||
},
|
||||
"remote_distribution": {
|
||||
"remote": 45,
|
||||
"hybrid": 35,
|
||||
"onsite": 20
|
||||
},
|
||||
"experience_distribution": {
|
||||
"entry": 15,
|
||||
"mid": 45,
|
||||
"senior": 40
|
||||
}
|
||||
},
|
||||
"trends": {
|
||||
"growing_skills": [
|
||||
{ "skill": "React", "growth_rate": 25 },
|
||||
{ "skill": "Python", "growth_rate": 18 },
|
||||
{ "skill": "AWS", "growth_rate": 22 }
|
||||
],
|
||||
"declining_skills": [
|
||||
{ "skill": "jQuery", "growth_rate": -12 },
|
||||
{ "skill": "PHP", "growth_rate": -8 }
|
||||
],
|
||||
"emerging_roles": ["AI Engineer", "DevSecOps Engineer", "Data Engineer"]
|
||||
},
|
||||
"jobs": [
|
||||
{
|
||||
"id": "job_1",
|
||||
"title": "Senior Software Engineer",
|
||||
"company": "TechCorp",
|
||||
"location": "Toronto, Ontario",
|
||||
"remote_type": "hybrid",
|
||||
"salary": {
|
||||
"min": 100000,
|
||||
"max": 140000,
|
||||
"currency": "CAD"
|
||||
},
|
||||
"required_skills": ["React", "Node.js", "TypeScript", "AWS"],
|
||||
"preferred_skills": ["GraphQL", "Docker", "Kubernetes"],
|
||||
"experience_level": "senior",
|
||||
"job_url": "https://example.com/job/1",
|
||||
"posted_date": "2024-01-10T09:00:00Z",
|
||||
"scraped_at": "2024-01-15T10:30:00Z"
|
||||
}
|
||||
],
|
||||
"analysis": {
|
||||
"skill_demand": {
|
||||
"React": { "count": 45, "avg_salary": 98000 },
|
||||
"Python": { "count": 38, "avg_salary": 102000 },
|
||||
"AWS": { "count": 32, "avg_salary": 105000 }
|
||||
},
|
||||
"company_insights": {
|
||||
"top_hirers": [
|
||||
{ "company": "TechCorp", "jobs": 25 },
|
||||
{ "company": "StartupXYZ", "jobs": 18 }
|
||||
],
|
||||
"salary_leaders": [
|
||||
{ "company": "BigTech", "avg_salary": 120000 },
|
||||
{ "company": "FinTech", "avg_salary": 115000 }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### CSV Output
|
||||
|
||||
The parser can also generate CSV files for easy analysis:
|
||||
|
||||
```csv
|
||||
job_id,title,company,location,remote_type,salary_min,salary_max,required_skills,experience_level,posted_date
|
||||
job_1,Senior Software Engineer,TechCorp,Toronto,hybrid,100000,140000,"React,Node.js,TypeScript",senior,2024-01-10
|
||||
job_2,Data Scientist,DataCorp,Vancouver,remote,90000,130000,"Python,SQL,ML",mid,2024-01-09
|
||||
```
|
||||
|
||||
## 🔒 Security & Best Practices
|
||||
|
||||
### Data Privacy
|
||||
|
||||
- Respect job site terms of service
|
||||
- Implement appropriate rate limiting
|
||||
- Store data securely and responsibly
|
||||
- Anonymize sensitive information
|
||||
|
||||
### Rate Limiting
|
||||
|
||||
- Implement delays between requests
|
||||
- Respect API rate limits
|
||||
- Use multiple data sources
|
||||
- Monitor for blocking/detection
|
||||
|
||||
### Legal Compliance
|
||||
|
||||
- Educational and research purposes only
|
||||
- Respect website terms of service
|
||||
- Implement data retention policies
|
||||
- Monitor for legal changes
|
||||
|
||||
## 🧪 Testing
|
||||
|
||||
### Run Tests
|
||||
|
||||
```bash
|
||||
# All tests
|
||||
npm test
|
||||
|
||||
# Specific test suites
|
||||
npm test -- --testNamePattern="JobSearch"
|
||||
npm test -- --testNamePattern="Analysis"
|
||||
npm test -- --testNamePattern="Trends"
|
||||
```
|
||||
|
||||
### Test Coverage
|
||||
|
||||
```bash
|
||||
npm run test:coverage
|
||||
```
|
||||
|
||||
## 🚀 Performance Optimization
|
||||
|
||||
### Recommended Settings
|
||||
|
||||
#### Fast Analysis
|
||||
|
||||
```bash
|
||||
node index.js --roles="software engineer" --locations="Toronto"
|
||||
```
|
||||
|
||||
#### Comprehensive Analysis
|
||||
|
||||
```bash
|
||||
node index.js --trends --skills --experience="all"
|
||||
```
|
||||
|
||||
#### Focused Intelligence
|
||||
|
||||
```bash
|
||||
node index.js --salary-min=80000 --remote="remote" --trends
|
||||
```
|
||||
|
||||
### Performance Tips
|
||||
|
||||
- Use specific role filters to reduce data volume
|
||||
- Implement caching for repeated searches
|
||||
- Use parallel processing for multiple sources
|
||||
- Optimize data storage and retrieval
|
||||
|
||||
## 🔧 Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
#### Rate Limiting
|
||||
|
||||
```bash
|
||||
# Reduce request frequency
|
||||
export REQUEST_DELAY=2000
|
||||
node index.js
|
||||
```
|
||||
|
||||
#### Data Source Issues
|
||||
|
||||
```bash
|
||||
# Use specific sources
|
||||
node index.js --sources="linkedin,indeed"
|
||||
|
||||
# Check source availability
|
||||
node index.js --test-sources
|
||||
```
|
||||
|
||||
#### Output Issues
|
||||
|
||||
```bash
|
||||
# Check output directory
|
||||
mkdir -p results
|
||||
node index.js --output=results/analysis.json
|
||||
|
||||
# Verify file permissions
|
||||
chmod 755 results/
|
||||
```
|
||||
|
||||
## 📈 Monitoring & Analytics
|
||||
|
||||
### Key Metrics
|
||||
|
||||
- **Job Volume**: Total jobs found per search
|
||||
- **Salary Trends**: Average and median salary changes
|
||||
- **Skill Demand**: Most requested skills
|
||||
- **Remote Adoption**: Remote work trend analysis
|
||||
- **Market Velocity**: Job posting frequency
|
||||
|
||||
### Dashboard Integration
|
||||
|
||||
- Real-time market monitoring
|
||||
- Trend visualization
|
||||
- Salary benchmarking
|
||||
- Skill gap analysis
|
||||
- Competitive intelligence
|
||||
|
||||
## 🤝 Contributing
|
||||
|
||||
### Development Setup
|
||||
|
||||
1. Fork the repository
|
||||
2. Create feature branch
|
||||
3. Add tests for new functionality
|
||||
4. Ensure all tests pass
|
||||
5. Submit pull request
|
||||
|
||||
### Code Standards
|
||||
|
||||
- Follow existing code style
|
||||
- Add JSDoc comments
|
||||
- Maintain test coverage
|
||||
- Update documentation
|
||||
|
||||
## 📄 License
|
||||
|
||||
This parser is part of the LinkedOut platform and follows the same licensing terms.
|
||||
|
||||
---
|
||||
|
||||
**Note**: This tool is designed for educational and research purposes. Always respect website terms of service and implement appropriate rate limiting and ethical usage practices.
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,9 +1,9 @@
|
||||
keyword
|
||||
qa automation
|
||||
automation test
|
||||
sdet
|
||||
qa lead
|
||||
automation lead
|
||||
playwright
|
||||
cypress
|
||||
keyword
|
||||
qa automation
|
||||
automation test
|
||||
sdet
|
||||
qa lead
|
||||
automation lead
|
||||
playwright
|
||||
cypress
|
||||
quality assurance engineer
|
||||
|
@ -1,129 +1,129 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* SkipTheDrive Parser Demo
|
||||
*
|
||||
* Demonstrates the SkipTheDrive job parser functionality
|
||||
*/
|
||||
|
||||
const { parseSkipTheDrive } = require("./skipthedrive");
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
const { logger } = require("../../ai-analyzer");
|
||||
|
||||
// Load environment variables
|
||||
require("dotenv").config({ path: path.join(__dirname, "..", ".env") });
|
||||
|
||||
async function runDemo() {
|
||||
logger.step("🚀 SkipTheDrive Parser Demo");
|
||||
|
||||
// Demo configuration
|
||||
const options = {
|
||||
// Search for QA automation jobs (from your example)
|
||||
keywords: process.env.SEARCH_KEYWORDS?.split(",").map((k) => k.trim()) || [
|
||||
"automation qa",
|
||||
"qa engineer",
|
||||
"test automation",
|
||||
],
|
||||
|
||||
// Job type filters - can be: "part time", "full time", "contract"
|
||||
jobTypes: process.env.JOB_TYPES?.split(",").map((t) => t.trim()) || [],
|
||||
|
||||
// Location filter (optional)
|
||||
locationFilter: process.env.LOCATION_FILTER || "",
|
||||
|
||||
// Maximum pages to parse
|
||||
maxPages: parseInt(process.env.MAX_PAGES) || 3,
|
||||
|
||||
// Browser headless mode
|
||||
headless: process.env.HEADLESS !== "false",
|
||||
|
||||
// AI analysis
|
||||
enableAI: process.env.ENABLE_AI_ANALYSIS !== "false",
|
||||
aiContext: "remote QA and test automation job opportunities",
|
||||
};
|
||||
|
||||
logger.info("Configuration:");
|
||||
logger.info(`- Keywords: ${options.keywords.join(", ")}`);
|
||||
logger.info(
|
||||
`- Job Types: ${
|
||||
options.jobTypes.length > 0 ? options.jobTypes.join(", ") : "All types"
|
||||
}`
|
||||
);
|
||||
logger.info(`- Location Filter: ${options.locationFilter || "None"}`);
|
||||
logger.info(`- Max Pages: ${options.maxPages}`);
|
||||
logger.info(`- Headless: ${options.headless}`);
|
||||
logger.info(`- AI Analysis: ${options.enableAI}`);
|
||||
logger.info("\nStarting parser...");
|
||||
|
||||
try {
|
||||
const startTime = Date.now();
|
||||
const results = await parseSkipTheDrive(options);
|
||||
const duration = ((Date.now() - startTime) / 1000).toFixed(2);
|
||||
|
||||
// Save results
|
||||
const timestamp = new Date()
|
||||
.toISOString()
|
||||
.replace(/[:.]/g, "-")
|
||||
.slice(0, -5);
|
||||
const resultsDir = path.join(__dirname, "..", "results");
|
||||
|
||||
if (!fs.existsSync(resultsDir)) {
|
||||
fs.mkdirSync(resultsDir, { recursive: true });
|
||||
}
|
||||
|
||||
const resultsFile = path.join(
|
||||
resultsDir,
|
||||
`skipthedrive-results-${timestamp}.json`
|
||||
);
|
||||
fs.writeFileSync(resultsFile, JSON.stringify(results, null, 2));
|
||||
|
||||
// Display summary
|
||||
logger.step("\n📊 Parsing Summary:");
|
||||
logger.info(`- Duration: ${duration} seconds`);
|
||||
logger.info(`- Jobs Found: ${results.results.length}`);
|
||||
logger.info(`- Jobs Rejected: ${results.rejectedResults.length}`);
|
||||
logger.file(`- Results saved to: ${resultsFile}`);
|
||||
|
||||
// Show sample results
|
||||
if (results.results.length > 0) {
|
||||
logger.info("\n🔍 Sample Jobs Found:");
|
||||
results.results.slice(0, 5).forEach((job, index) => {
|
||||
logger.info(`\n${index + 1}. ${job.title}`);
|
||||
logger.info(` Company: ${job.company}`);
|
||||
logger.info(` Posted: ${job.daysAgo}`);
|
||||
logger.info(` Featured: ${job.isFeatured ? "Yes" : "No"}`);
|
||||
logger.info(` URL: ${job.jobUrl}`);
|
||||
if (job.aiAnalysis) {
|
||||
logger.ai(
|
||||
` AI Relevant: ${job.aiAnalysis.isRelevant ? "Yes" : "No"} (${(
|
||||
job.aiAnalysis.confidence * 100
|
||||
).toFixed(0)}% confidence)`
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Show rejection reasons
|
||||
if (results.rejectedResults.length > 0) {
|
||||
const rejectionReasons = {};
|
||||
results.rejectedResults.forEach((job) => {
|
||||
rejectionReasons[job.reason] = (rejectionReasons[job.reason] || 0) + 1;
|
||||
});
|
||||
|
||||
logger.info("\n❌ Rejection Reasons:");
|
||||
Object.entries(rejectionReasons).forEach(([reason, count]) => {
|
||||
logger.info(` ${reason}: ${count}`);
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error("\n❌ Demo failed:", error.message);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Run the demo
|
||||
runDemo().catch((err) => {
|
||||
logger.error("Fatal error:", err);
|
||||
process.exit(1);
|
||||
});
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* SkipTheDrive Parser Demo
|
||||
*
|
||||
* Demonstrates the SkipTheDrive job parser functionality
|
||||
*/
|
||||
|
||||
const { parseSkipTheDrive } = require("./skipthedrive");
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
const { logger } = require("../../ai-analyzer");
|
||||
|
||||
// Load environment variables
|
||||
require("dotenv").config({ path: path.join(__dirname, "..", ".env") });
|
||||
|
||||
async function runDemo() {
|
||||
logger.step("🚀 SkipTheDrive Parser Demo");
|
||||
|
||||
// Demo configuration
|
||||
const options = {
|
||||
// Search for QA automation jobs (from your example)
|
||||
keywords: process.env.SEARCH_KEYWORDS?.split(",").map((k) => k.trim()) || [
|
||||
"automation qa",
|
||||
"qa engineer",
|
||||
"test automation",
|
||||
],
|
||||
|
||||
// Job type filters - can be: "part time", "full time", "contract"
|
||||
jobTypes: process.env.JOB_TYPES?.split(",").map((t) => t.trim()) || [],
|
||||
|
||||
// Location filter (optional)
|
||||
locationFilter: process.env.LOCATION_FILTER || "",
|
||||
|
||||
// Maximum pages to parse
|
||||
maxPages: parseInt(process.env.MAX_PAGES) || 3,
|
||||
|
||||
// Browser headless mode
|
||||
headless: process.env.HEADLESS !== "false",
|
||||
|
||||
// AI analysis
|
||||
enableAI: process.env.ENABLE_AI_ANALYSIS !== "false",
|
||||
aiContext: "remote QA and test automation job opportunities",
|
||||
};
|
||||
|
||||
logger.info("Configuration:");
|
||||
logger.info(`- Keywords: ${options.keywords.join(", ")}`);
|
||||
logger.info(
|
||||
`- Job Types: ${
|
||||
options.jobTypes.length > 0 ? options.jobTypes.join(", ") : "All types"
|
||||
}`
|
||||
);
|
||||
logger.info(`- Location Filter: ${options.locationFilter || "None"}`);
|
||||
logger.info(`- Max Pages: ${options.maxPages}`);
|
||||
logger.info(`- Headless: ${options.headless}`);
|
||||
logger.info(`- AI Analysis: ${options.enableAI}`);
|
||||
logger.info("\nStarting parser...");
|
||||
|
||||
try {
|
||||
const startTime = Date.now();
|
||||
const results = await parseSkipTheDrive(options);
|
||||
const duration = ((Date.now() - startTime) / 1000).toFixed(2);
|
||||
|
||||
// Save results
|
||||
const timestamp = new Date()
|
||||
.toISOString()
|
||||
.replace(/[:.]/g, "-")
|
||||
.slice(0, -5);
|
||||
const resultsDir = path.join(__dirname, "..", "results");
|
||||
|
||||
if (!fs.existsSync(resultsDir)) {
|
||||
fs.mkdirSync(resultsDir, { recursive: true });
|
||||
}
|
||||
|
||||
const resultsFile = path.join(
|
||||
resultsDir,
|
||||
`skipthedrive-results-${timestamp}.json`
|
||||
);
|
||||
fs.writeFileSync(resultsFile, JSON.stringify(results, null, 2));
|
||||
|
||||
// Display summary
|
||||
logger.step("\n📊 Parsing Summary:");
|
||||
logger.info(`- Duration: ${duration} seconds`);
|
||||
logger.info(`- Jobs Found: ${results.results.length}`);
|
||||
logger.info(`- Jobs Rejected: ${results.rejectedResults.length}`);
|
||||
logger.file(`- Results saved to: ${resultsFile}`);
|
||||
|
||||
// Show sample results
|
||||
if (results.results.length > 0) {
|
||||
logger.info("\n🔍 Sample Jobs Found:");
|
||||
results.results.slice(0, 5).forEach((job, index) => {
|
||||
logger.info(`\n${index + 1}. ${job.title}`);
|
||||
logger.info(` Company: ${job.company}`);
|
||||
logger.info(` Posted: ${job.daysAgo}`);
|
||||
logger.info(` Featured: ${job.isFeatured ? "Yes" : "No"}`);
|
||||
logger.info(` URL: ${job.jobUrl}`);
|
||||
if (job.aiAnalysis) {
|
||||
logger.ai(
|
||||
` AI Relevant: ${job.aiAnalysis.isRelevant ? "Yes" : "No"} (${(
|
||||
job.aiAnalysis.confidence * 100
|
||||
).toFixed(0)}% confidence)`
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Show rejection reasons
|
||||
if (results.rejectedResults.length > 0) {
|
||||
const rejectionReasons = {};
|
||||
results.rejectedResults.forEach((job) => {
|
||||
rejectionReasons[job.reason] = (rejectionReasons[job.reason] || 0) + 1;
|
||||
});
|
||||
|
||||
logger.info("\n❌ Rejection Reasons:");
|
||||
Object.entries(rejectionReasons).forEach(([reason, count]) => {
|
||||
logger.info(` ${reason}: ${count}`);
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error("\n❌ Demo failed:", error.message);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Run the demo
|
||||
runDemo().catch((err) => {
|
||||
logger.error("Fatal error:", err);
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
@ -1,332 +1,332 @@
|
||||
/**
|
||||
* SkipTheDrive Job Parser
|
||||
*
|
||||
* Parses remote job listings from SkipTheDrive.com
|
||||
* Supports keyword search, job type filters, and pagination
|
||||
*/
|
||||
|
||||
const { chromium } = require("playwright");
|
||||
const path = require("path");
|
||||
|
||||
// Import from ai-analyzer core package
|
||||
const {
|
||||
logger,
|
||||
cleanText,
|
||||
containsAnyKeyword,
|
||||
parseLocationFilters,
|
||||
validateLocationAgainstFilters,
|
||||
extractLocationFromProfile,
|
||||
analyzeBatch,
|
||||
checkOllamaStatus,
|
||||
} = require("../../ai-analyzer");
|
||||
|
||||
/**
|
||||
* Build search URL for SkipTheDrive
|
||||
* @param {string} keyword - Search keyword
|
||||
* @param {string} orderBy - Sort order (date, relevance)
|
||||
* @param {Array<string>} jobTypes - Job types to filter (part time, full time, contract)
|
||||
* @returns {string} - Formatted search URL
|
||||
*/
|
||||
function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
|
||||
let url = `https://www.skipthedrive.com/?s=${encodeURIComponent(keyword)}`;
|
||||
|
||||
if (orderBy) {
|
||||
url += `&orderby=${orderBy}`;
|
||||
}
|
||||
|
||||
// Add job type filters
|
||||
jobTypes.forEach((type) => {
|
||||
url += `&jobtype=${encodeURIComponent(type)}`;
|
||||
});
|
||||
|
||||
return url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract job data from a single job listing element
|
||||
* @param {Element} article - Job listing DOM element
|
||||
* @returns {Object} - Extracted job data
|
||||
*/
|
||||
async function extractJobData(article) {
|
||||
try {
|
||||
// Extract job title and URL
|
||||
const titleElement = await article.$("h2.post-title a");
|
||||
const title = titleElement ? await titleElement.textContent() : "";
|
||||
const jobUrl = titleElement ? await titleElement.getAttribute("href") : "";
|
||||
|
||||
// Extract date
|
||||
const dateElement = await article.$("time.post-date");
|
||||
const datePosted = dateElement
|
||||
? await dateElement.getAttribute("datetime")
|
||||
: "";
|
||||
const dateText = dateElement ? await dateElement.textContent() : "";
|
||||
|
||||
// Extract company name
|
||||
const companyElement = await article.$(
|
||||
".custom_fields_company_name_display_search_results"
|
||||
);
|
||||
let company = companyElement ? await companyElement.textContent() : "";
|
||||
company = company.replace(/^\s*[^\s]+\s*/, "").trim(); // Remove icon
|
||||
|
||||
// Extract days ago
|
||||
const daysAgoElement = await article.$(
|
||||
".custom_fields_job_date_display_search_results"
|
||||
);
|
||||
let daysAgo = daysAgoElement ? await daysAgoElement.textContent() : "";
|
||||
daysAgo = daysAgo.replace(/^\s*[^\s]+\s*/, "").trim(); // Remove icon
|
||||
|
||||
// Extract job description excerpt
|
||||
const excerptElement = await article.$(".excerpt_part");
|
||||
const description = excerptElement
|
||||
? await excerptElement.textContent()
|
||||
: "";
|
||||
|
||||
// Check if featured/sponsored
|
||||
const featuredElement = await article.$(".custom_fields_sponsored_job");
|
||||
const isFeatured = !!featuredElement;
|
||||
|
||||
// Extract job ID from article ID
|
||||
const articleId = await article.getAttribute("id");
|
||||
const jobId = articleId ? articleId.replace("post-", "") : "";
|
||||
|
||||
return {
|
||||
jobId,
|
||||
title: cleanText(title),
|
||||
company: cleanText(company),
|
||||
jobUrl,
|
||||
datePosted,
|
||||
dateText: cleanText(dateText),
|
||||
daysAgo: cleanText(daysAgo),
|
||||
description: cleanText(description),
|
||||
isFeatured,
|
||||
source: "skipthedrive",
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error(`Error extracting job data: ${error.message}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse SkipTheDrive job listings
|
||||
* @param {Object} options - Parser options
|
||||
* @returns {Promise<Array>} - Array of parsed job listings
|
||||
*/
|
||||
async function parseSkipTheDrive(options = {}) {
|
||||
const {
|
||||
keywords = process.env.SEARCH_KEYWORDS?.split(",").map((k) => k.trim()) || [
|
||||
"software engineer",
|
||||
"developer",
|
||||
],
|
||||
jobTypes = process.env.JOB_TYPES?.split(",").map((t) => t.trim()) || [],
|
||||
locationFilter = process.env.LOCATION_FILTER || "",
|
||||
maxPages = parseInt(process.env.MAX_PAGES) || 5,
|
||||
headless = process.env.HEADLESS !== "false",
|
||||
enableAI = process.env.ENABLE_AI_ANALYSIS === "true",
|
||||
aiContext = process.env.AI_CONTEXT || "remote job opportunities analysis",
|
||||
} = options;
|
||||
|
||||
logger.step("Starting SkipTheDrive parser...");
|
||||
logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
|
||||
logger.info(
|
||||
`📋 Job Types: ${jobTypes.length > 0 ? jobTypes.join(", ") : "All"}`
|
||||
);
|
||||
logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
|
||||
logger.info(`📄 Max Pages: ${maxPages}`);
|
||||
|
||||
const browser = await chromium.launch({
|
||||
headless,
|
||||
args: [
|
||||
"--no-sandbox",
|
||||
"--disable-setuid-sandbox",
|
||||
"--disable-dev-shm-usage",
|
||||
],
|
||||
});
|
||||
|
||||
const context = await browser.newContext({
|
||||
userAgent:
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
||||
});
|
||||
|
||||
const results = [];
|
||||
const rejectedResults = [];
|
||||
const seenJobs = new Set();
|
||||
|
||||
try {
|
||||
// Search for each keyword
|
||||
for (const keyword of keywords) {
|
||||
logger.info(`\n🔍 Searching for: ${keyword}`);
|
||||
|
||||
const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
|
||||
const page = await context.newPage();
|
||||
|
||||
try {
|
||||
logger.info(
|
||||
`Attempting navigation to: ${searchUrl} at ${new Date().toISOString()}`
|
||||
);
|
||||
await page.goto(searchUrl, {
|
||||
waitUntil: "domcontentloaded",
|
||||
timeout: 30000,
|
||||
});
|
||||
logger.info(
|
||||
`Navigation completed successfully at ${new Date().toISOString()}`
|
||||
);
|
||||
|
||||
// Wait for job listings to load
|
||||
logger.info("Waiting for selector #loops-wrapper");
|
||||
await page
|
||||
.waitForSelector("#loops-wrapper", { timeout: 5000 })
|
||||
.catch(() => {
|
||||
logger.warning(`No results found for keyword: ${keyword}`);
|
||||
});
|
||||
logger.info("Selector wait completed");
|
||||
|
||||
let currentPage = 1;
|
||||
let hasNextPage = true;
|
||||
|
||||
while (hasNextPage && currentPage <= maxPages) {
|
||||
logger.info(`📄 Processing page ${currentPage} for "${keyword}"`);
|
||||
|
||||
// Extract all job articles on current page
|
||||
const jobArticles = await page.$$("article[id^='post-']");
|
||||
logger.info(
|
||||
`Found ${jobArticles.length} job listings on page ${currentPage}`
|
||||
);
|
||||
|
||||
for (const article of jobArticles) {
|
||||
const jobData = await extractJobData(article);
|
||||
|
||||
if (!jobData || seenJobs.has(jobData.jobId)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
seenJobs.add(jobData.jobId);
|
||||
|
||||
// Add keyword that found this job
|
||||
jobData.searchKeyword = keyword;
|
||||
|
||||
// Validate job against keywords
|
||||
const fullText = `${jobData.title} ${jobData.description} ${jobData.company}`;
|
||||
if (!containsAnyKeyword(fullText, keywords)) {
|
||||
rejectedResults.push({
|
||||
...jobData,
|
||||
rejected: true,
|
||||
reason: "Keywords not found in job listing",
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
// Location validation (if enabled)
|
||||
if (locationFilter) {
|
||||
const locationFilters = parseLocationFilters(locationFilter);
|
||||
// For SkipTheDrive, most jobs are remote, but we can check the title/description
|
||||
const locationValid =
|
||||
fullText.toLowerCase().includes("remote") ||
|
||||
locationFilters.some((filter) =>
|
||||
fullText.toLowerCase().includes(filter.toLowerCase())
|
||||
);
|
||||
|
||||
if (!locationValid) {
|
||||
rejectedResults.push({
|
||||
...jobData,
|
||||
rejected: true,
|
||||
reason: "Location requirements not met",
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
jobData.locationValid = locationValid;
|
||||
}
|
||||
|
||||
logger.success(`✅ Found: ${jobData.title} at ${jobData.company}`);
|
||||
results.push(jobData);
|
||||
}
|
||||
|
||||
// Check for next page
|
||||
const nextPageLink = await page.$("a.nextp");
|
||||
if (nextPageLink && currentPage < maxPages) {
|
||||
logger.info("📄 Moving to next page...");
|
||||
await nextPageLink.click();
|
||||
await page.waitForLoadState("domcontentloaded");
|
||||
await page.waitForTimeout(2000); // Wait for content to load
|
||||
currentPage++;
|
||||
} else {
|
||||
hasNextPage = false;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Error processing keyword "${keyword}": ${error.message}`);
|
||||
} finally {
|
||||
await page.close();
|
||||
}
|
||||
}
|
||||
|
||||
logger.success(`\n✅ Parsing complete!`);
|
||||
logger.info(`📊 Total jobs found: ${results.length}`);
|
||||
logger.info(`❌ Rejected jobs: ${rejectedResults.length}`);
|
||||
|
||||
// Run AI analysis if enabled
|
||||
let aiAnalysis = null;
|
||||
if (enableAI && results.length > 0) {
|
||||
logger.step("Running AI analysis on job listings...");
|
||||
|
||||
const aiAvailable = await checkOllamaStatus();
|
||||
if (aiAvailable) {
|
||||
const analysisData = results.map((job) => ({
|
||||
text: `${job.title} at ${job.company}. ${job.description}`,
|
||||
metadata: {
|
||||
jobId: job.jobId,
|
||||
company: job.company,
|
||||
daysAgo: job.daysAgo,
|
||||
},
|
||||
}));
|
||||
|
||||
aiAnalysis = await analyzeBatch(analysisData, aiContext);
|
||||
|
||||
// Merge AI analysis with results
|
||||
results.forEach((job, index) => {
|
||||
if (aiAnalysis && aiAnalysis[index]) {
|
||||
job.aiAnalysis = {
|
||||
isRelevant: aiAnalysis[index].isRelevant,
|
||||
confidence: aiAnalysis[index].confidence,
|
||||
reasoning: aiAnalysis[index].reasoning,
|
||||
};
|
||||
}
|
||||
});
|
||||
|
||||
logger.success("✅ AI analysis completed");
|
||||
} else {
|
||||
logger.warning("⚠️ AI not available - skipping analysis");
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
results,
|
||||
rejectedResults,
|
||||
metadata: {
|
||||
source: "skipthedrive",
|
||||
totalJobs: results.length,
|
||||
rejectedJobs: rejectedResults.length,
|
||||
keywords: keywords,
|
||||
jobTypes: jobTypes,
|
||||
locationFilter: locationFilter,
|
||||
aiAnalysisEnabled: enableAI,
|
||||
aiAnalysisCompleted: !!aiAnalysis,
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error(`Fatal error in SkipTheDrive parser: ${error.message}`);
|
||||
throw error;
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
// Export the parser
|
||||
module.exports = {
|
||||
parseSkipTheDrive,
|
||||
buildSearchUrl,
|
||||
extractJobData,
|
||||
};
|
||||
/**
|
||||
* SkipTheDrive Job Parser
|
||||
*
|
||||
* Parses remote job listings from SkipTheDrive.com
|
||||
* Supports keyword search, job type filters, and pagination
|
||||
*/
|
||||
|
||||
const { chromium } = require("playwright");
|
||||
const path = require("path");
|
||||
|
||||
// Import from ai-analyzer core package
|
||||
const {
|
||||
logger,
|
||||
cleanText,
|
||||
containsAnyKeyword,
|
||||
parseLocationFilters,
|
||||
validateLocationAgainstFilters,
|
||||
extractLocationFromProfile,
|
||||
analyzeBatch,
|
||||
checkOllamaStatus,
|
||||
} = require("../../ai-analyzer");
|
||||
|
||||
/**
|
||||
* Build search URL for SkipTheDrive
|
||||
* @param {string} keyword - Search keyword
|
||||
* @param {string} orderBy - Sort order (date, relevance)
|
||||
* @param {Array<string>} jobTypes - Job types to filter (part time, full time, contract)
|
||||
* @returns {string} - Formatted search URL
|
||||
*/
|
||||
function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
|
||||
let url = `https://www.skipthedrive.com/?s=${encodeURIComponent(keyword)}`;
|
||||
|
||||
if (orderBy) {
|
||||
url += `&orderby=${orderBy}`;
|
||||
}
|
||||
|
||||
// Add job type filters
|
||||
jobTypes.forEach((type) => {
|
||||
url += `&jobtype=${encodeURIComponent(type)}`;
|
||||
});
|
||||
|
||||
return url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract job data from a single job listing element
|
||||
* @param {Element} article - Job listing DOM element
|
||||
* @returns {Object} - Extracted job data
|
||||
*/
|
||||
async function extractJobData(article) {
|
||||
try {
|
||||
// Extract job title and URL
|
||||
const titleElement = await article.$("h2.post-title a");
|
||||
const title = titleElement ? await titleElement.textContent() : "";
|
||||
const jobUrl = titleElement ? await titleElement.getAttribute("href") : "";
|
||||
|
||||
// Extract date
|
||||
const dateElement = await article.$("time.post-date");
|
||||
const datePosted = dateElement
|
||||
? await dateElement.getAttribute("datetime")
|
||||
: "";
|
||||
const dateText = dateElement ? await dateElement.textContent() : "";
|
||||
|
||||
// Extract company name
|
||||
const companyElement = await article.$(
|
||||
".custom_fields_company_name_display_search_results"
|
||||
);
|
||||
let company = companyElement ? await companyElement.textContent() : "";
|
||||
company = company.replace(/^\s*[^\s]+\s*/, "").trim(); // Remove icon
|
||||
|
||||
// Extract days ago
|
||||
const daysAgoElement = await article.$(
|
||||
".custom_fields_job_date_display_search_results"
|
||||
);
|
||||
let daysAgo = daysAgoElement ? await daysAgoElement.textContent() : "";
|
||||
daysAgo = daysAgo.replace(/^\s*[^\s]+\s*/, "").trim(); // Remove icon
|
||||
|
||||
// Extract job description excerpt
|
||||
const excerptElement = await article.$(".excerpt_part");
|
||||
const description = excerptElement
|
||||
? await excerptElement.textContent()
|
||||
: "";
|
||||
|
||||
// Check if featured/sponsored
|
||||
const featuredElement = await article.$(".custom_fields_sponsored_job");
|
||||
const isFeatured = !!featuredElement;
|
||||
|
||||
// Extract job ID from article ID
|
||||
const articleId = await article.getAttribute("id");
|
||||
const jobId = articleId ? articleId.replace("post-", "") : "";
|
||||
|
||||
return {
|
||||
jobId,
|
||||
title: cleanText(title),
|
||||
company: cleanText(company),
|
||||
jobUrl,
|
||||
datePosted,
|
||||
dateText: cleanText(dateText),
|
||||
daysAgo: cleanText(daysAgo),
|
||||
description: cleanText(description),
|
||||
isFeatured,
|
||||
source: "skipthedrive",
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error(`Error extracting job data: ${error.message}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse SkipTheDrive job listings
|
||||
* @param {Object} options - Parser options
|
||||
* @returns {Promise<Array>} - Array of parsed job listings
|
||||
*/
|
||||
async function parseSkipTheDrive(options = {}) {
|
||||
const {
|
||||
keywords = process.env.SEARCH_KEYWORDS?.split(",").map((k) => k.trim()) || [
|
||||
"software engineer",
|
||||
"developer",
|
||||
],
|
||||
jobTypes = process.env.JOB_TYPES?.split(",").map((t) => t.trim()) || [],
|
||||
locationFilter = process.env.LOCATION_FILTER || "",
|
||||
maxPages = parseInt(process.env.MAX_PAGES) || 5,
|
||||
headless = process.env.HEADLESS !== "false",
|
||||
enableAI = process.env.ENABLE_AI_ANALYSIS === "true",
|
||||
aiContext = process.env.AI_CONTEXT || "remote job opportunities analysis",
|
||||
} = options;
|
||||
|
||||
logger.step("Starting SkipTheDrive parser...");
|
||||
logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
|
||||
logger.info(
|
||||
`📋 Job Types: ${jobTypes.length > 0 ? jobTypes.join(", ") : "All"}`
|
||||
);
|
||||
logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
|
||||
logger.info(`📄 Max Pages: ${maxPages}`);
|
||||
|
||||
const browser = await chromium.launch({
|
||||
headless,
|
||||
args: [
|
||||
"--no-sandbox",
|
||||
"--disable-setuid-sandbox",
|
||||
"--disable-dev-shm-usage",
|
||||
],
|
||||
});
|
||||
|
||||
const context = await browser.newContext({
|
||||
userAgent:
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
||||
});
|
||||
|
||||
const results = [];
|
||||
const rejectedResults = [];
|
||||
const seenJobs = new Set();
|
||||
|
||||
try {
|
||||
// Search for each keyword
|
||||
for (const keyword of keywords) {
|
||||
logger.info(`\n🔍 Searching for: ${keyword}`);
|
||||
|
||||
const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
|
||||
const page = await context.newPage();
|
||||
|
||||
try {
|
||||
logger.info(
|
||||
`Attempting navigation to: ${searchUrl} at ${new Date().toISOString()}`
|
||||
);
|
||||
await page.goto(searchUrl, {
|
||||
waitUntil: "domcontentloaded",
|
||||
timeout: 30000,
|
||||
});
|
||||
logger.info(
|
||||
`Navigation completed successfully at ${new Date().toISOString()}`
|
||||
);
|
||||
|
||||
// Wait for job listings to load
|
||||
logger.info("Waiting for selector #loops-wrapper");
|
||||
await page
|
||||
.waitForSelector("#loops-wrapper", { timeout: 5000 })
|
||||
.catch(() => {
|
||||
logger.warning(`No results found for keyword: ${keyword}`);
|
||||
});
|
||||
logger.info("Selector wait completed");
|
||||
|
||||
let currentPage = 1;
|
||||
let hasNextPage = true;
|
||||
|
||||
while (hasNextPage && currentPage <= maxPages) {
|
||||
logger.info(`📄 Processing page ${currentPage} for "${keyword}"`);
|
||||
|
||||
// Extract all job articles on current page
|
||||
const jobArticles = await page.$$("article[id^='post-']");
|
||||
logger.info(
|
||||
`Found ${jobArticles.length} job listings on page ${currentPage}`
|
||||
);
|
||||
|
||||
for (const article of jobArticles) {
|
||||
const jobData = await extractJobData(article);
|
||||
|
||||
if (!jobData || seenJobs.has(jobData.jobId)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
seenJobs.add(jobData.jobId);
|
||||
|
||||
// Add keyword that found this job
|
||||
jobData.searchKeyword = keyword;
|
||||
|
||||
// Validate job against keywords
|
||||
const fullText = `${jobData.title} ${jobData.description} ${jobData.company}`;
|
||||
if (!containsAnyKeyword(fullText, keywords)) {
|
||||
rejectedResults.push({
|
||||
...jobData,
|
||||
rejected: true,
|
||||
reason: "Keywords not found in job listing",
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
// Location validation (if enabled)
|
||||
if (locationFilter) {
|
||||
const locationFilters = parseLocationFilters(locationFilter);
|
||||
// For SkipTheDrive, most jobs are remote, but we can check the title/description
|
||||
const locationValid =
|
||||
fullText.toLowerCase().includes("remote") ||
|
||||
locationFilters.some((filter) =>
|
||||
fullText.toLowerCase().includes(filter.toLowerCase())
|
||||
);
|
||||
|
||||
if (!locationValid) {
|
||||
rejectedResults.push({
|
||||
...jobData,
|
||||
rejected: true,
|
||||
reason: "Location requirements not met",
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
jobData.locationValid = locationValid;
|
||||
}
|
||||
|
||||
logger.success(`✅ Found: ${jobData.title} at ${jobData.company}`);
|
||||
results.push(jobData);
|
||||
}
|
||||
|
||||
// Check for next page
|
||||
const nextPageLink = await page.$("a.nextp");
|
||||
if (nextPageLink && currentPage < maxPages) {
|
||||
logger.info("📄 Moving to next page...");
|
||||
await nextPageLink.click();
|
||||
await page.waitForLoadState("domcontentloaded");
|
||||
await page.waitForTimeout(2000); // Wait for content to load
|
||||
currentPage++;
|
||||
} else {
|
||||
hasNextPage = false;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Error processing keyword "${keyword}": ${error.message}`);
|
||||
} finally {
|
||||
await page.close();
|
||||
}
|
||||
}
|
||||
|
||||
logger.success(`\n✅ Parsing complete!`);
|
||||
logger.info(`📊 Total jobs found: ${results.length}`);
|
||||
logger.info(`❌ Rejected jobs: ${rejectedResults.length}`);
|
||||
|
||||
// Run AI analysis if enabled
|
||||
let aiAnalysis = null;
|
||||
if (enableAI && results.length > 0) {
|
||||
logger.step("Running AI analysis on job listings...");
|
||||
|
||||
const aiAvailable = await checkOllamaStatus();
|
||||
if (aiAvailable) {
|
||||
const analysisData = results.map((job) => ({
|
||||
text: `${job.title} at ${job.company}. ${job.description}`,
|
||||
metadata: {
|
||||
jobId: job.jobId,
|
||||
company: job.company,
|
||||
daysAgo: job.daysAgo,
|
||||
},
|
||||
}));
|
||||
|
||||
aiAnalysis = await analyzeBatch(analysisData, aiContext);
|
||||
|
||||
// Merge AI analysis with results
|
||||
results.forEach((job, index) => {
|
||||
if (aiAnalysis && aiAnalysis[index]) {
|
||||
job.aiAnalysis = {
|
||||
isRelevant: aiAnalysis[index].isRelevant,
|
||||
confidence: aiAnalysis[index].confidence,
|
||||
reasoning: aiAnalysis[index].reasoning,
|
||||
};
|
||||
}
|
||||
});
|
||||
|
||||
logger.success("✅ AI analysis completed");
|
||||
} else {
|
||||
logger.warning("⚠️ AI not available - skipping analysis");
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
results,
|
||||
rejectedResults,
|
||||
metadata: {
|
||||
source: "skipthedrive",
|
||||
totalJobs: results.length,
|
||||
rejectedJobs: rejectedResults.length,
|
||||
keywords: keywords,
|
||||
jobTypes: jobTypes,
|
||||
locationFilter: locationFilter,
|
||||
aiAnalysisEnabled: enableAI,
|
||||
aiAnalysisCompleted: !!aiAnalysis,
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error(`Fatal error in SkipTheDrive parser: ${error.message}`);
|
||||
throw error;
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
// Export the parser
|
||||
module.exports = {
|
||||
parseSkipTheDrive,
|
||||
buildSearchUrl,
|
||||
extractJobData,
|
||||
};
|
||||
|
||||
@ -1,302 +1,302 @@
|
||||
/**
|
||||
* SkipTheDrive Parsing Strategy
|
||||
*
|
||||
* Uses core-parser for browser management and ai-analyzer for utilities
|
||||
*/
|
||||
|
||||
const {
|
||||
logger,
|
||||
cleanText,
|
||||
containsAnyKeyword,
|
||||
validateLocationAgainstFilters,
|
||||
} = require("ai-analyzer");
|
||||
|
||||
/**
|
||||
* SkipTheDrive URL builder
|
||||
*/
|
||||
function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
|
||||
const baseUrl = "https://www.skipthedrive.com/";
|
||||
const params = new URLSearchParams({
|
||||
s: keyword,
|
||||
orderby: orderBy,
|
||||
});
|
||||
|
||||
if (jobTypes && jobTypes.length > 0) {
|
||||
params.append("job_type", jobTypes.join(","));
|
||||
}
|
||||
|
||||
return `${baseUrl}?${params.toString()}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* SkipTheDrive parsing strategy function
|
||||
*/
|
||||
async function skipthedriveStrategy(coreParser, options = {}) {
|
||||
const {
|
||||
keywords = ["software engineer", "developer", "programmer"],
|
||||
locationFilter = null,
|
||||
maxPages = 5,
|
||||
jobTypes = [],
|
||||
} = options;
|
||||
|
||||
const results = [];
|
||||
const rejectedResults = [];
|
||||
const seenJobs = new Set();
|
||||
|
||||
try {
|
||||
// Create main page
|
||||
const page = await coreParser.createPage("skipthedrive-main");
|
||||
|
||||
logger.info("🚀 Starting SkipTheDrive parser...");
|
||||
logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
|
||||
logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
|
||||
logger.info(`📄 Max Pages: ${maxPages}`);
|
||||
|
||||
// Search for each keyword
|
||||
for (const keyword of keywords) {
|
||||
logger.info(`\n🔍 Searching for: ${keyword}`);
|
||||
|
||||
const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
|
||||
|
||||
try {
|
||||
// Navigate to search results
|
||||
await coreParser.navigateTo(searchUrl, {
|
||||
pageId: "skipthedrive-main",
|
||||
retries: 2,
|
||||
timeout: 30000,
|
||||
});
|
||||
|
||||
// Wait for job listings to load
|
||||
const hasResults = await coreParser
|
||||
.waitForSelector(
|
||||
"#loops-wrapper",
|
||||
{
|
||||
timeout: 5000,
|
||||
},
|
||||
"skipthedrive-main"
|
||||
)
|
||||
.catch(() => {
|
||||
logger.warning(`No results found for keyword: ${keyword}`);
|
||||
return false;
|
||||
});
|
||||
|
||||
if (!hasResults) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Process multiple pages
|
||||
let currentPage = 1;
|
||||
let hasNextPage = true;
|
||||
|
||||
while (hasNextPage && currentPage <= maxPages) {
|
||||
logger.info(`📄 Processing page ${currentPage} for "${keyword}"`);
|
||||
|
||||
// Extract jobs from current page
|
||||
const pageJobs = await extractJobsFromPage(
|
||||
page,
|
||||
keyword,
|
||||
locationFilter
|
||||
);
|
||||
|
||||
for (const job of pageJobs) {
|
||||
// Skip duplicates
|
||||
if (seenJobs.has(job.jobId)) continue;
|
||||
seenJobs.add(job.jobId);
|
||||
|
||||
// Validate location if filtering enabled
|
||||
if (locationFilter) {
|
||||
const locationValid = validateLocationAgainstFilters(
|
||||
job.location,
|
||||
locationFilter
|
||||
);
|
||||
|
||||
if (!locationValid) {
|
||||
rejectedResults.push({
|
||||
...job,
|
||||
rejectionReason: "Location filter mismatch",
|
||||
});
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
results.push(job);
|
||||
}
|
||||
|
||||
// Check for next page
|
||||
hasNextPage = await hasNextPageAvailable(page);
|
||||
if (hasNextPage && currentPage < maxPages) {
|
||||
await navigateToNextPage(page, currentPage + 1);
|
||||
currentPage++;
|
||||
|
||||
// Wait for new page to load
|
||||
await page.waitForTimeout(2000);
|
||||
} else {
|
||||
hasNextPage = false;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Error processing keyword "${keyword}": ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(
|
||||
`🎯 SkipTheDrive parsing completed: ${results.length} jobs found, ${rejectedResults.length} rejected`
|
||||
);
|
||||
|
||||
return {
|
||||
results,
|
||||
rejectedResults,
|
||||
summary: {
|
||||
totalJobs: results.length,
|
||||
totalRejected: rejectedResults.length,
|
||||
keywords: keywords.join(", "),
|
||||
locationFilter,
|
||||
source: "skipthedrive",
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error(`❌ SkipTheDrive parsing failed: ${error.message}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract jobs from current page
|
||||
*/
|
||||
async function extractJobsFromPage(page, keyword, locationFilter) {
|
||||
const jobs = [];
|
||||
|
||||
try {
|
||||
// Get all job article elements
|
||||
const jobElements = await page.$$("article.job_listing");
|
||||
|
||||
for (const jobElement of jobElements) {
|
||||
try {
|
||||
const job = await extractJobData(jobElement, keyword);
|
||||
if (job) {
|
||||
jobs.push(job);
|
||||
}
|
||||
} catch (error) {
|
||||
logger.warning(`Failed to extract job data: ${error.message}`);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Failed to extract jobs from page: ${error.message}`);
|
||||
}
|
||||
|
||||
return jobs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract data from individual job element
|
||||
*/
|
||||
async function extractJobData(jobElement, keyword) {
|
||||
try {
|
||||
// Extract job ID
|
||||
const articleId = (await jobElement.getAttribute("id")) || "";
|
||||
const jobId = articleId ? articleId.replace("post-", "") : "";
|
||||
|
||||
// Extract title
|
||||
const titleElement = await jobElement.$(".job_listing-title a");
|
||||
const title = titleElement
|
||||
? cleanText(await titleElement.textContent())
|
||||
: "";
|
||||
const jobUrl = titleElement ? await titleElement.getAttribute("href") : "";
|
||||
|
||||
// Extract company
|
||||
const companyElement = await jobElement.$(".company");
|
||||
const company = companyElement
|
||||
? cleanText(await companyElement.textContent())
|
||||
: "";
|
||||
|
||||
// Extract location
|
||||
const locationElement = await jobElement.$(".location");
|
||||
const location = locationElement
|
||||
? cleanText(await locationElement.textContent())
|
||||
: "";
|
||||
|
||||
// Extract date posted
|
||||
const dateElement = await jobElement.$(".job-date");
|
||||
const dateText = dateElement
|
||||
? cleanText(await dateElement.textContent())
|
||||
: "";
|
||||
|
||||
// Extract description
|
||||
const descElement = await jobElement.$(".job_listing-description");
|
||||
const description = descElement
|
||||
? cleanText(await descElement.textContent())
|
||||
: "";
|
||||
|
||||
// Check if featured
|
||||
const featuredElement = await jobElement.$(".featured");
|
||||
const isFeatured = featuredElement !== null;
|
||||
|
||||
// Parse date
|
||||
let datePosted = null;
|
||||
let daysAgo = null;
|
||||
|
||||
if (dateText) {
|
||||
const match = dateText.match(/(\d+)\s+days?\s+ago/);
|
||||
if (match) {
|
||||
daysAgo = parseInt(match[1]);
|
||||
const date = new Date();
|
||||
date.setDate(date.getDate() - daysAgo);
|
||||
datePosted = date.toISOString().split("T")[0];
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
jobId,
|
||||
title,
|
||||
company,
|
||||
location,
|
||||
jobUrl,
|
||||
datePosted,
|
||||
dateText,
|
||||
daysAgo,
|
||||
description,
|
||||
isFeatured,
|
||||
keyword,
|
||||
extractedAt: new Date().toISOString(),
|
||||
source: "skipthedrive",
|
||||
};
|
||||
} catch (error) {
|
||||
logger.warning(`Error extracting job data: ${error.message}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if next page is available
|
||||
*/
|
||||
async function hasNextPageAvailable(page) {
|
||||
try {
|
||||
const nextButton = await page.$(".next-page");
|
||||
return nextButton !== null;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Navigate to next page
|
||||
*/
|
||||
async function navigateToNextPage(page, pageNumber) {
|
||||
try {
|
||||
const nextButton = await page.$(".next-page");
|
||||
if (nextButton) {
|
||||
await nextButton.click();
|
||||
}
|
||||
} catch (error) {
|
||||
logger.warning(
|
||||
`Failed to navigate to page ${pageNumber}: ${error.message}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
skipthedriveStrategy,
|
||||
buildSearchUrl,
|
||||
extractJobsFromPage,
|
||||
extractJobData,
|
||||
};
|
||||
/**
|
||||
* SkipTheDrive Parsing Strategy
|
||||
*
|
||||
* Uses core-parser for browser management and ai-analyzer for utilities
|
||||
*/
|
||||
|
||||
const {
|
||||
logger,
|
||||
cleanText,
|
||||
containsAnyKeyword,
|
||||
validateLocationAgainstFilters,
|
||||
} = require("ai-analyzer");
|
||||
|
||||
/**
|
||||
* SkipTheDrive URL builder
|
||||
*/
|
||||
function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
|
||||
const baseUrl = "https://www.skipthedrive.com/";
|
||||
const params = new URLSearchParams({
|
||||
s: keyword,
|
||||
orderby: orderBy,
|
||||
});
|
||||
|
||||
if (jobTypes && jobTypes.length > 0) {
|
||||
params.append("job_type", jobTypes.join(","));
|
||||
}
|
||||
|
||||
return `${baseUrl}?${params.toString()}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* SkipTheDrive parsing strategy function
|
||||
*/
|
||||
async function skipthedriveStrategy(coreParser, options = {}) {
|
||||
const {
|
||||
keywords = ["software engineer", "developer", "programmer"],
|
||||
locationFilter = null,
|
||||
maxPages = 5,
|
||||
jobTypes = [],
|
||||
} = options;
|
||||
|
||||
const results = [];
|
||||
const rejectedResults = [];
|
||||
const seenJobs = new Set();
|
||||
|
||||
try {
|
||||
// Create main page
|
||||
const page = await coreParser.createPage("skipthedrive-main");
|
||||
|
||||
logger.info("🚀 Starting SkipTheDrive parser...");
|
||||
logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
|
||||
logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
|
||||
logger.info(`📄 Max Pages: ${maxPages}`);
|
||||
|
||||
// Search for each keyword
|
||||
for (const keyword of keywords) {
|
||||
logger.info(`\n🔍 Searching for: ${keyword}`);
|
||||
|
||||
const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
|
||||
|
||||
try {
|
||||
// Navigate to search results
|
||||
await coreParser.navigateTo(searchUrl, {
|
||||
pageId: "skipthedrive-main",
|
||||
retries: 2,
|
||||
timeout: 30000,
|
||||
});
|
||||
|
||||
// Wait for job listings to load
|
||||
const hasResults = await coreParser
|
||||
.waitForSelector(
|
||||
"#loops-wrapper",
|
||||
{
|
||||
timeout: 5000,
|
||||
},
|
||||
"skipthedrive-main"
|
||||
)
|
||||
.catch(() => {
|
||||
logger.warning(`No results found for keyword: ${keyword}`);
|
||||
return false;
|
||||
});
|
||||
|
||||
if (!hasResults) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Process multiple pages
|
||||
let currentPage = 1;
|
||||
let hasNextPage = true;
|
||||
|
||||
while (hasNextPage && currentPage <= maxPages) {
|
||||
logger.info(`📄 Processing page ${currentPage} for "${keyword}"`);
|
||||
|
||||
// Extract jobs from current page
|
||||
const pageJobs = await extractJobsFromPage(
|
||||
page,
|
||||
keyword,
|
||||
locationFilter
|
||||
);
|
||||
|
||||
for (const job of pageJobs) {
|
||||
// Skip duplicates
|
||||
if (seenJobs.has(job.jobId)) continue;
|
||||
seenJobs.add(job.jobId);
|
||||
|
||||
// Validate location if filtering enabled
|
||||
if (locationFilter) {
|
||||
const locationValid = validateLocationAgainstFilters(
|
||||
job.location,
|
||||
locationFilter
|
||||
);
|
||||
|
||||
if (!locationValid) {
|
||||
rejectedResults.push({
|
||||
...job,
|
||||
rejectionReason: "Location filter mismatch",
|
||||
});
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
results.push(job);
|
||||
}
|
||||
|
||||
// Check for next page
|
||||
hasNextPage = await hasNextPageAvailable(page);
|
||||
if (hasNextPage && currentPage < maxPages) {
|
||||
await navigateToNextPage(page, currentPage + 1);
|
||||
currentPage++;
|
||||
|
||||
// Wait for new page to load
|
||||
await page.waitForTimeout(2000);
|
||||
} else {
|
||||
hasNextPage = false;
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Error processing keyword "${keyword}": ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(
|
||||
`🎯 SkipTheDrive parsing completed: ${results.length} jobs found, ${rejectedResults.length} rejected`
|
||||
);
|
||||
|
||||
return {
|
||||
results,
|
||||
rejectedResults,
|
||||
summary: {
|
||||
totalJobs: results.length,
|
||||
totalRejected: rejectedResults.length,
|
||||
keywords: keywords.join(", "),
|
||||
locationFilter,
|
||||
source: "skipthedrive",
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error(`❌ SkipTheDrive parsing failed: ${error.message}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract jobs from current page
|
||||
*/
|
||||
async function extractJobsFromPage(page, keyword, locationFilter) {
|
||||
const jobs = [];
|
||||
|
||||
try {
|
||||
// Get all job article elements
|
||||
const jobElements = await page.$$("article.job_listing");
|
||||
|
||||
for (const jobElement of jobElements) {
|
||||
try {
|
||||
const job = await extractJobData(jobElement, keyword);
|
||||
if (job) {
|
||||
jobs.push(job);
|
||||
}
|
||||
} catch (error) {
|
||||
logger.warning(`Failed to extract job data: ${error.message}`);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Failed to extract jobs from page: ${error.message}`);
|
||||
}
|
||||
|
||||
return jobs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract data from individual job element
|
||||
*/
|
||||
async function extractJobData(jobElement, keyword) {
|
||||
try {
|
||||
// Extract job ID
|
||||
const articleId = (await jobElement.getAttribute("id")) || "";
|
||||
const jobId = articleId ? articleId.replace("post-", "") : "";
|
||||
|
||||
// Extract title
|
||||
const titleElement = await jobElement.$(".job_listing-title a");
|
||||
const title = titleElement
|
||||
? cleanText(await titleElement.textContent())
|
||||
: "";
|
||||
const jobUrl = titleElement ? await titleElement.getAttribute("href") : "";
|
||||
|
||||
// Extract company
|
||||
const companyElement = await jobElement.$(".company");
|
||||
const company = companyElement
|
||||
? cleanText(await companyElement.textContent())
|
||||
: "";
|
||||
|
||||
// Extract location
|
||||
const locationElement = await jobElement.$(".location");
|
||||
const location = locationElement
|
||||
? cleanText(await locationElement.textContent())
|
||||
: "";
|
||||
|
||||
// Extract date posted
|
||||
const dateElement = await jobElement.$(".job-date");
|
||||
const dateText = dateElement
|
||||
? cleanText(await dateElement.textContent())
|
||||
: "";
|
||||
|
||||
// Extract description
|
||||
const descElement = await jobElement.$(".job_listing-description");
|
||||
const description = descElement
|
||||
? cleanText(await descElement.textContent())
|
||||
: "";
|
||||
|
||||
// Check if featured
|
||||
const featuredElement = await jobElement.$(".featured");
|
||||
const isFeatured = featuredElement !== null;
|
||||
|
||||
// Parse date
|
||||
let datePosted = null;
|
||||
let daysAgo = null;
|
||||
|
||||
if (dateText) {
|
||||
const match = dateText.match(/(\d+)\s+days?\s+ago/);
|
||||
if (match) {
|
||||
daysAgo = parseInt(match[1]);
|
||||
const date = new Date();
|
||||
date.setDate(date.getDate() - daysAgo);
|
||||
datePosted = date.toISOString().split("T")[0];
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
jobId,
|
||||
title,
|
||||
company,
|
||||
location,
|
||||
jobUrl,
|
||||
datePosted,
|
||||
dateText,
|
||||
daysAgo,
|
||||
description,
|
||||
isFeatured,
|
||||
keyword,
|
||||
extractedAt: new Date().toISOString(),
|
||||
source: "skipthedrive",
|
||||
};
|
||||
} catch (error) {
|
||||
logger.warning(`Error extracting job data: ${error.message}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if next page is available
|
||||
*/
|
||||
async function hasNextPageAvailable(page) {
|
||||
try {
|
||||
const nextButton = await page.$(".next-page");
|
||||
return nextButton !== null;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Navigate to next page
|
||||
*/
|
||||
async function navigateToNextPage(page, pageNumber) {
|
||||
try {
|
||||
const nextButton = await page.$(".next-page");
|
||||
if (nextButton) {
|
||||
await nextButton.click();
|
||||
}
|
||||
} catch (error) {
|
||||
logger.warning(
|
||||
`Failed to navigate to page ${pageNumber}: ${error.message}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
skipthedriveStrategy,
|
||||
buildSearchUrl,
|
||||
extractJobsFromPage,
|
||||
extractJobData,
|
||||
};
|
||||
|
||||
@ -1,412 +1,412 @@
|
||||
/**
|
||||
* LinkedIn Parser Demo
|
||||
*
|
||||
* Demonstrates the LinkedIn Parser's capabilities for scraping LinkedIn content
|
||||
* with keyword-based searching, location filtering, and AI analysis.
|
||||
*
|
||||
* This demo uses simulated data for safety and demonstration purposes.
|
||||
*/
|
||||
|
||||
const { logger } = require("../ai-analyzer");
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
|
||||
// Terminal colors for demo output
|
||||
const colors = {
|
||||
reset: "\x1b[0m",
|
||||
bright: "\x1b[1m",
|
||||
cyan: "\x1b[36m",
|
||||
green: "\x1b[32m",
|
||||
yellow: "\x1b[33m",
|
||||
blue: "\x1b[34m",
|
||||
magenta: "\x1b[35m",
|
||||
red: "\x1b[31m",
|
||||
};
|
||||
|
||||
const demo = {
|
||||
title: (text) =>
|
||||
console.log(`\n${colors.bright}${colors.cyan}${text}${colors.reset}`),
|
||||
section: (text) =>
|
||||
console.log(`\n${colors.bright}${colors.magenta}${text}${colors.reset}`),
|
||||
success: (text) => console.log(`${colors.green}✅ ${text}${colors.reset}`),
|
||||
info: (text) => console.log(`${colors.blue}ℹ️ ${text}${colors.reset}`),
|
||||
warning: (text) => console.log(`${colors.yellow}⚠️ ${text}${colors.reset}`),
|
||||
error: (text) => console.log(`${colors.red}❌ ${text}${colors.reset}`),
|
||||
code: (text) => console.log(`${colors.cyan}${text}${colors.reset}`),
|
||||
};
|
||||
|
||||
// Mock data for demonstration
|
||||
const mockPosts = [
|
||||
{
|
||||
id: "post_1",
|
||||
content:
|
||||
"Just got laid off from my software engineering role at TechCorp. Looking for new opportunities in Toronto. This is really tough but I'm staying positive!",
|
||||
original_content:
|
||||
"Just got #laidoff from my software engineering role at TechCorp! Looking for new opportunities in #Toronto. This is really tough but I'm staying positive! 🚀",
|
||||
author: {
|
||||
name: "John Doe",
|
||||
title: "Software Engineer",
|
||||
company: "TechCorp",
|
||||
location: "Toronto, Ontario, Canada",
|
||||
profile_url: "https://linkedin.com/in/johndoe",
|
||||
},
|
||||
engagement: { likes: 45, comments: 12, shares: 3 },
|
||||
metadata: {
|
||||
post_date: "2024-01-10T14:30:00Z",
|
||||
scraped_at: "2024-01-15T10:30:00Z",
|
||||
search_keyword: "layoff",
|
||||
location_validated: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: "post_2",
|
||||
content:
|
||||
"Our company is downsizing and I'm affected. This is really tough news but I'm grateful for the time I had here.",
|
||||
original_content:
|
||||
"Our company is #downsizing and I'm affected. This is really tough news but I'm grateful for the time I had here. #RIF #layoff",
|
||||
author: {
|
||||
name: "Jane Smith",
|
||||
title: "Product Manager",
|
||||
company: "StartupXYZ",
|
||||
location: "Vancouver, British Columbia, Canada",
|
||||
profile_url: "https://linkedin.com/in/janesmith",
|
||||
},
|
||||
engagement: { likes: 23, comments: 8, shares: 1 },
|
||||
metadata: {
|
||||
post_date: "2024-01-09T16:45:00Z",
|
||||
scraped_at: "2024-01-15T10:30:00Z",
|
||||
search_keyword: "downsizing",
|
||||
location_validated: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: "post_3",
|
||||
content:
|
||||
"Open to work! Looking for new opportunities in software development. I have 5 years of experience in React, Node.js, and cloud technologies.",
|
||||
original_content:
|
||||
"Open to work! Looking for new opportunities in software development. I have 5 years of experience in #React, #NodeJS, and #cloud technologies. #opentowork #jobsearch",
|
||||
author: {
|
||||
name: "Bob Wilson",
|
||||
title: "Full Stack Developer",
|
||||
company: "Freelance",
|
||||
location: "Calgary, Alberta, Canada",
|
||||
profile_url: "https://linkedin.com/in/bobwilson",
|
||||
},
|
||||
engagement: { likes: 67, comments: 15, shares: 8 },
|
||||
metadata: {
|
||||
post_date: "2024-01-08T11:20:00Z",
|
||||
scraped_at: "2024-01-15T10:30:00Z",
|
||||
search_keyword: "open to work",
|
||||
location_validated: true,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
async function runDemo() {
|
||||
demo.title("=== LinkedIn Parser Demo ===");
|
||||
demo.info(
|
||||
"This demo showcases the LinkedIn Parser's capabilities for scraping LinkedIn content."
|
||||
);
|
||||
demo.info("All data shown is simulated for demonstration purposes.");
|
||||
demo.info("Press Enter to continue through each section...\n");
|
||||
|
||||
await waitForEnter();
|
||||
|
||||
// 1. Configuration Demo
|
||||
await demonstrateConfiguration();
|
||||
|
||||
// 2. Keyword Loading Demo
|
||||
await demonstrateKeywordLoading();
|
||||
|
||||
// 3. Search Process Demo
|
||||
await demonstrateSearchProcess();
|
||||
|
||||
// 4. Location Filtering Demo
|
||||
await demonstrateLocationFiltering();
|
||||
|
||||
// 5. AI Analysis Demo
|
||||
await demonstrateAIAnalysis();
|
||||
|
||||
// 6. Output Generation Demo
|
||||
await demonstrateOutputGeneration();
|
||||
|
||||
demo.title("=== Demo Complete ===");
|
||||
demo.success("LinkedIn Parser demo completed successfully!");
|
||||
demo.info("Check the README.md for detailed usage instructions.");
|
||||
}
|
||||
|
||||
async function demonstrateConfiguration() {
|
||||
demo.section("1. Configuration Setup");
|
||||
demo.info(
|
||||
"The LinkedIn Parser uses environment variables and command-line options for configuration."
|
||||
);
|
||||
|
||||
demo.code("// Environment Variables (.env file)");
|
||||
demo.info("LINKEDIN_USERNAME=your_email@example.com");
|
||||
demo.info("LINKEDIN_PASSWORD=your_password");
|
||||
demo.info("CITY=Toronto");
|
||||
demo.info("DATE_POSTED=past-week");
|
||||
demo.info("SORT_BY=date_posted");
|
||||
demo.info("WHEELS=5");
|
||||
demo.info("LOCATION_FILTER=Ontario,Manitoba");
|
||||
demo.info("ENABLE_LOCATION_CHECK=true");
|
||||
demo.info("ENABLE_LOCAL_AI=true");
|
||||
demo.info('AI_CONTEXT="job layoffs and workforce reduction"');
|
||||
demo.info("OLLAMA_MODEL=mistral");
|
||||
|
||||
demo.code("// Command Line Options");
|
||||
demo.info('node index.js --keyword="layoff,downsizing" --city="Vancouver"');
|
||||
demo.info("node index.js --no-location --no-ai");
|
||||
demo.info("node index.js --output=results/my-results.json");
|
||||
demo.info("node index.js --ai-after");
|
||||
|
||||
await waitForEnter();
|
||||
}
|
||||
|
||||
async function demonstrateKeywordLoading() {
|
||||
demo.section("2. Keyword Loading");
|
||||
demo.info(
|
||||
"Keywords can be loaded from CSV files or specified via command line."
|
||||
);
|
||||
|
||||
// Simulate loading keywords from CSV
|
||||
demo.code("// Loading keywords from CSV file");
|
||||
logger.step("Loading keywords from keywords/linkedin-keywords.csv");
|
||||
|
||||
const keywords = [
|
||||
"layoff",
|
||||
"downsizing",
|
||||
"reduction in force",
|
||||
"RIF",
|
||||
"termination",
|
||||
"job loss",
|
||||
"workforce reduction",
|
||||
"open to work",
|
||||
"actively seeking",
|
||||
"job search",
|
||||
];
|
||||
|
||||
demo.success(`Loaded ${keywords.length} keywords from CSV file`);
|
||||
demo.info("Keywords: " + keywords.slice(0, 5).join(", ") + "...");
|
||||
|
||||
demo.code("// Command line keyword override");
|
||||
demo.info('node index.js --keyword="layoff,downsizing"');
|
||||
demo.info('node index.js --add-keyword="hiring freeze"');
|
||||
|
||||
await waitForEnter();
|
||||
}
|
||||
|
||||
async function demonstrateSearchProcess() {
|
||||
demo.section("3. Search Process Simulation");
|
||||
demo.info(
|
||||
"The parser performs automated LinkedIn searches for each keyword."
|
||||
);
|
||||
|
||||
const keywords = ["layoff", "downsizing", "open to work"];
|
||||
|
||||
for (const keyword of keywords) {
|
||||
demo.code(`// Searching for keyword: "${keyword}"`);
|
||||
logger.search(`Searching for "${keyword}" in Toronto`);
|
||||
|
||||
// Simulate search process
|
||||
await simulateSearch();
|
||||
|
||||
const foundCount = Math.floor(Math.random() * 50) + 10;
|
||||
const acceptedCount = Math.floor(foundCount * 0.3);
|
||||
|
||||
logger.info(`Found ${foundCount} posts, checking profiles for location...`);
|
||||
logger.success(`Accepted ${acceptedCount} posts after location validation`);
|
||||
|
||||
console.log();
|
||||
}
|
||||
|
||||
await waitForEnter();
|
||||
}
|
||||
|
||||
async function demonstrateLocationFiltering() {
|
||||
demo.section("4. Location Filtering");
|
||||
demo.info(
|
||||
"Posts are filtered based on author location using geographic validation."
|
||||
);
|
||||
|
||||
demo.code("// Location filter configuration");
|
||||
demo.info("LOCATION_FILTER=Ontario,Manitoba");
|
||||
demo.info("ENABLE_LOCATION_CHECK=true");
|
||||
|
||||
demo.code("// Location validation examples");
|
||||
const testLocations = [
|
||||
{ location: "Toronto, Ontario, Canada", valid: true },
|
||||
{ location: "Vancouver, British Columbia, Canada", valid: false },
|
||||
{ location: "Calgary, Alberta, Canada", valid: false },
|
||||
{ location: "Winnipeg, Manitoba, Canada", valid: true },
|
||||
{ location: "New York, NY, USA", valid: false },
|
||||
];
|
||||
|
||||
testLocations.forEach(({ location, valid }) => {
|
||||
logger.location(`Checking location: ${location}`);
|
||||
if (valid) {
|
||||
logger.success(`✅ Location valid - post accepted`);
|
||||
} else {
|
||||
logger.warning(`❌ Location invalid - post rejected`);
|
||||
}
|
||||
});
|
||||
|
||||
await waitForEnter();
|
||||
}
|
||||
|
||||
async function demonstrateAIAnalysis() {
|
||||
demo.section("5. AI Analysis");
|
||||
demo.info(
|
||||
"Posts can be analyzed using local Ollama or OpenAI for relevance scoring."
|
||||
);
|
||||
|
||||
demo.code("// AI analysis configuration");
|
||||
demo.info("ENABLE_LOCAL_AI=true");
|
||||
demo.info('AI_CONTEXT="job layoffs and workforce reduction"');
|
||||
demo.info("OLLAMA_MODEL=mistral");
|
||||
|
||||
demo.code("// Analyzing posts with AI");
|
||||
logger.ai("Starting AI analysis of accepted posts...");
|
||||
|
||||
for (let i = 0; i < mockPosts.length; i++) {
|
||||
const post = mockPosts[i];
|
||||
logger.info(`Analyzing post ${i + 1}: ${post.content.substring(0, 50)}...`);
|
||||
|
||||
// Simulate AI analysis
|
||||
await simulateProcessing();
|
||||
|
||||
const relevanceScore = 0.7 + Math.random() * 0.3;
|
||||
const confidence = 0.8 + Math.random() * 0.2;
|
||||
|
||||
logger.success(
|
||||
`Relevance: ${relevanceScore.toFixed(
|
||||
2
|
||||
)}, Confidence: ${confidence.toFixed(2)}`
|
||||
);
|
||||
|
||||
// Add AI analysis to post
|
||||
post.ai_analysis = {
|
||||
relevance_score: relevanceScore,
|
||||
confidence: confidence,
|
||||
context_match: relevanceScore > 0.7,
|
||||
analysis_text: `This post discusses ${post.metadata.search_keyword} and is relevant to the search context.`,
|
||||
};
|
||||
}
|
||||
|
||||
await waitForEnter();
|
||||
}
|
||||
|
||||
async function demonstrateOutputGeneration() {
|
||||
demo.section("6. Output Generation");
|
||||
demo.info("Results are saved to JSON files with comprehensive metadata.");
|
||||
|
||||
demo.code("// Generating output file");
|
||||
logger.file("Saving results to JSON file...");
|
||||
|
||||
const outputData = {
|
||||
metadata: {
|
||||
timestamp: new Date().toISOString(),
|
||||
keywords: ["layoff", "downsizing", "open to work"],
|
||||
city: "Toronto",
|
||||
date_posted: "past-week",
|
||||
sort_by: "date_posted",
|
||||
total_posts_found: 150,
|
||||
accepted_posts: mockPosts.length,
|
||||
rejected_posts: 147,
|
||||
processing_time_seconds: 180,
|
||||
},
|
||||
posts: mockPosts,
|
||||
};
|
||||
|
||||
// Save to demo file
|
||||
const outputPath = path.join(__dirname, "demo-results.json");
|
||||
fs.writeFileSync(outputPath, JSON.stringify(outputData, null, 2));
|
||||
|
||||
demo.success(`Results saved to: ${outputPath}`);
|
||||
demo.info(`Total posts processed: ${outputData.metadata.total_posts_found}`);
|
||||
demo.info(`Posts accepted: ${outputData.metadata.accepted_posts}`);
|
||||
demo.info(`Posts rejected: ${outputData.metadata.rejected_posts}`);
|
||||
|
||||
demo.code("// Output file structure");
|
||||
demo.info("📁 demo-results.json");
|
||||
demo.info(" ├── metadata");
|
||||
demo.info(" │ ├── timestamp");
|
||||
demo.info(" │ ├── keywords");
|
||||
demo.info(" │ ├── city");
|
||||
demo.info(" │ ├── total_posts_found");
|
||||
demo.info(" │ ├── accepted_posts");
|
||||
demo.info(" │ └── processing_time_seconds");
|
||||
demo.info(" └── posts[]");
|
||||
demo.info(" ├── id");
|
||||
demo.info(" ├── content");
|
||||
demo.info(" ├── author");
|
||||
demo.info(" ├── engagement");
|
||||
demo.info(" ├── ai_analysis");
|
||||
demo.info(" └── metadata");
|
||||
|
||||
await waitForEnter();
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
function waitForEnter() {
|
||||
return new Promise((resolve) => {
|
||||
const readline = require("readline");
|
||||
const rl = readline.createInterface({
|
||||
input: process.stdin,
|
||||
output: process.stdout,
|
||||
});
|
||||
|
||||
rl.question("\nPress Enter to continue...", () => {
|
||||
rl.close();
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function simulateSearch() {
|
||||
return new Promise((resolve) => {
|
||||
const steps = [
|
||||
"Launching browser",
|
||||
"Logging in",
|
||||
"Navigating to search",
|
||||
"Loading results",
|
||||
];
|
||||
let i = 0;
|
||||
const interval = setInterval(() => {
|
||||
if (i < steps.length) {
|
||||
logger.info(steps[i]);
|
||||
i++;
|
||||
} else {
|
||||
clearInterval(interval);
|
||||
resolve();
|
||||
}
|
||||
}, 800);
|
||||
});
|
||||
}
|
||||
|
||||
async function simulateProcessing() {
|
||||
return new Promise((resolve) => {
|
||||
const dots = [".", "..", "..."];
|
||||
let i = 0;
|
||||
const interval = setInterval(() => {
|
||||
process.stdout.write(`\rProcessing${dots[i]}`);
|
||||
i = (i + 1) % dots.length;
|
||||
}, 500);
|
||||
|
||||
setTimeout(() => {
|
||||
clearInterval(interval);
|
||||
process.stdout.write("\r");
|
||||
resolve();
|
||||
}, 1500);
|
||||
});
|
||||
}
|
||||
|
||||
// Run the demo if this file is executed directly
|
||||
if (require.main === module) {
|
||||
runDemo().catch((error) => {
|
||||
demo.error(`Demo failed: ${error.message}`);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = { runDemo };
|
||||
/**
|
||||
* LinkedIn Parser Demo
|
||||
*
|
||||
* Demonstrates the LinkedIn Parser's capabilities for scraping LinkedIn content
|
||||
* with keyword-based searching, location filtering, and AI analysis.
|
||||
*
|
||||
* This demo uses simulated data for safety and demonstration purposes.
|
||||
*/
|
||||
|
||||
const { logger } = require("../ai-analyzer");
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
|
||||
// Terminal colors for demo output
|
||||
const colors = {
|
||||
reset: "\x1b[0m",
|
||||
bright: "\x1b[1m",
|
||||
cyan: "\x1b[36m",
|
||||
green: "\x1b[32m",
|
||||
yellow: "\x1b[33m",
|
||||
blue: "\x1b[34m",
|
||||
magenta: "\x1b[35m",
|
||||
red: "\x1b[31m",
|
||||
};
|
||||
|
||||
const demo = {
|
||||
title: (text) =>
|
||||
console.log(`\n${colors.bright}${colors.cyan}${text}${colors.reset}`),
|
||||
section: (text) =>
|
||||
console.log(`\n${colors.bright}${colors.magenta}${text}${colors.reset}`),
|
||||
success: (text) => console.log(`${colors.green}✅ ${text}${colors.reset}`),
|
||||
info: (text) => console.log(`${colors.blue}ℹ️ ${text}${colors.reset}`),
|
||||
warning: (text) => console.log(`${colors.yellow}⚠️ ${text}${colors.reset}`),
|
||||
error: (text) => console.log(`${colors.red}❌ ${text}${colors.reset}`),
|
||||
code: (text) => console.log(`${colors.cyan}${text}${colors.reset}`),
|
||||
};
|
||||
|
||||
// Mock data for demonstration
|
||||
const mockPosts = [
|
||||
{
|
||||
id: "post_1",
|
||||
content:
|
||||
"Just got laid off from my software engineering role at TechCorp. Looking for new opportunities in Toronto. This is really tough but I'm staying positive!",
|
||||
original_content:
|
||||
"Just got #laidoff from my software engineering role at TechCorp! Looking for new opportunities in #Toronto. This is really tough but I'm staying positive! 🚀",
|
||||
author: {
|
||||
name: "John Doe",
|
||||
title: "Software Engineer",
|
||||
company: "TechCorp",
|
||||
location: "Toronto, Ontario, Canada",
|
||||
profile_url: "https://linkedin.com/in/johndoe",
|
||||
},
|
||||
engagement: { likes: 45, comments: 12, shares: 3 },
|
||||
metadata: {
|
||||
post_date: "2024-01-10T14:30:00Z",
|
||||
scraped_at: "2024-01-15T10:30:00Z",
|
||||
search_keyword: "layoff",
|
||||
location_validated: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: "post_2",
|
||||
content:
|
||||
"Our company is downsizing and I'm affected. This is really tough news but I'm grateful for the time I had here.",
|
||||
original_content:
|
||||
"Our company is #downsizing and I'm affected. This is really tough news but I'm grateful for the time I had here. #RIF #layoff",
|
||||
author: {
|
||||
name: "Jane Smith",
|
||||
title: "Product Manager",
|
||||
company: "StartupXYZ",
|
||||
location: "Vancouver, British Columbia, Canada",
|
||||
profile_url: "https://linkedin.com/in/janesmith",
|
||||
},
|
||||
engagement: { likes: 23, comments: 8, shares: 1 },
|
||||
metadata: {
|
||||
post_date: "2024-01-09T16:45:00Z",
|
||||
scraped_at: "2024-01-15T10:30:00Z",
|
||||
search_keyword: "downsizing",
|
||||
location_validated: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: "post_3",
|
||||
content:
|
||||
"Open to work! Looking for new opportunities in software development. I have 5 years of experience in React, Node.js, and cloud technologies.",
|
||||
original_content:
|
||||
"Open to work! Looking for new opportunities in software development. I have 5 years of experience in #React, #NodeJS, and #cloud technologies. #opentowork #jobsearch",
|
||||
author: {
|
||||
name: "Bob Wilson",
|
||||
title: "Full Stack Developer",
|
||||
company: "Freelance",
|
||||
location: "Calgary, Alberta, Canada",
|
||||
profile_url: "https://linkedin.com/in/bobwilson",
|
||||
},
|
||||
engagement: { likes: 67, comments: 15, shares: 8 },
|
||||
metadata: {
|
||||
post_date: "2024-01-08T11:20:00Z",
|
||||
scraped_at: "2024-01-15T10:30:00Z",
|
||||
search_keyword: "open to work",
|
||||
location_validated: true,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
async function runDemo() {
|
||||
demo.title("=== LinkedIn Parser Demo ===");
|
||||
demo.info(
|
||||
"This demo showcases the LinkedIn Parser's capabilities for scraping LinkedIn content."
|
||||
);
|
||||
demo.info("All data shown is simulated for demonstration purposes.");
|
||||
demo.info("Press Enter to continue through each section...\n");
|
||||
|
||||
await waitForEnter();
|
||||
|
||||
// 1. Configuration Demo
|
||||
await demonstrateConfiguration();
|
||||
|
||||
// 2. Keyword Loading Demo
|
||||
await demonstrateKeywordLoading();
|
||||
|
||||
// 3. Search Process Demo
|
||||
await demonstrateSearchProcess();
|
||||
|
||||
// 4. Location Filtering Demo
|
||||
await demonstrateLocationFiltering();
|
||||
|
||||
// 5. AI Analysis Demo
|
||||
await demonstrateAIAnalysis();
|
||||
|
||||
// 6. Output Generation Demo
|
||||
await demonstrateOutputGeneration();
|
||||
|
||||
demo.title("=== Demo Complete ===");
|
||||
demo.success("LinkedIn Parser demo completed successfully!");
|
||||
demo.info("Check the README.md for detailed usage instructions.");
|
||||
}
|
||||
|
||||
async function demonstrateConfiguration() {
|
||||
demo.section("1. Configuration Setup");
|
||||
demo.info(
|
||||
"The LinkedIn Parser uses environment variables and command-line options for configuration."
|
||||
);
|
||||
|
||||
demo.code("// Environment Variables (.env file)");
|
||||
demo.info("LINKEDIN_USERNAME=your_email@example.com");
|
||||
demo.info("LINKEDIN_PASSWORD=your_password");
|
||||
demo.info("CITY=Toronto");
|
||||
demo.info("DATE_POSTED=past-week");
|
||||
demo.info("SORT_BY=date_posted");
|
||||
demo.info("WHEELS=5");
|
||||
demo.info("LOCATION_FILTER=Ontario,Manitoba");
|
||||
demo.info("ENABLE_LOCATION_CHECK=true");
|
||||
demo.info("ENABLE_LOCAL_AI=true");
|
||||
demo.info('AI_CONTEXT="job layoffs and workforce reduction"');
|
||||
demo.info("OLLAMA_MODEL=mistral");
|
||||
|
||||
demo.code("// Command Line Options");
|
||||
demo.info('node index.js --keyword="layoff,downsizing" --city="Vancouver"');
|
||||
demo.info("node index.js --no-location --no-ai");
|
||||
demo.info("node index.js --output=results/my-results.json");
|
||||
demo.info("node index.js --ai-after");
|
||||
|
||||
await waitForEnter();
|
||||
}
|
||||
|
||||
async function demonstrateKeywordLoading() {
|
||||
demo.section("2. Keyword Loading");
|
||||
demo.info(
|
||||
"Keywords can be loaded from CSV files or specified via command line."
|
||||
);
|
||||
|
||||
// Simulate loading keywords from CSV
|
||||
demo.code("// Loading keywords from CSV file");
|
||||
logger.step("Loading keywords from keywords/linkedin-keywords.csv");
|
||||
|
||||
const keywords = [
|
||||
"layoff",
|
||||
"downsizing",
|
||||
"reduction in force",
|
||||
"RIF",
|
||||
"termination",
|
||||
"job loss",
|
||||
"workforce reduction",
|
||||
"open to work",
|
||||
"actively seeking",
|
||||
"job search",
|
||||
];
|
||||
|
||||
demo.success(`Loaded ${keywords.length} keywords from CSV file`);
|
||||
demo.info("Keywords: " + keywords.slice(0, 5).join(", ") + "...");
|
||||
|
||||
demo.code("// Command line keyword override");
|
||||
demo.info('node index.js --keyword="layoff,downsizing"');
|
||||
demo.info('node index.js --add-keyword="hiring freeze"');
|
||||
|
||||
await waitForEnter();
|
||||
}
|
||||
|
||||
async function demonstrateSearchProcess() {
|
||||
demo.section("3. Search Process Simulation");
|
||||
demo.info(
|
||||
"The parser performs automated LinkedIn searches for each keyword."
|
||||
);
|
||||
|
||||
const keywords = ["layoff", "downsizing", "open to work"];
|
||||
|
||||
for (const keyword of keywords) {
|
||||
demo.code(`// Searching for keyword: "${keyword}"`);
|
||||
logger.search(`Searching for "${keyword}" in Toronto`);
|
||||
|
||||
// Simulate search process
|
||||
await simulateSearch();
|
||||
|
||||
const foundCount = Math.floor(Math.random() * 50) + 10;
|
||||
const acceptedCount = Math.floor(foundCount * 0.3);
|
||||
|
||||
logger.info(`Found ${foundCount} posts, checking profiles for location...`);
|
||||
logger.success(`Accepted ${acceptedCount} posts after location validation`);
|
||||
|
||||
console.log();
|
||||
}
|
||||
|
||||
await waitForEnter();
|
||||
}
|
||||
|
||||
async function demonstrateLocationFiltering() {
|
||||
demo.section("4. Location Filtering");
|
||||
demo.info(
|
||||
"Posts are filtered based on author location using geographic validation."
|
||||
);
|
||||
|
||||
demo.code("// Location filter configuration");
|
||||
demo.info("LOCATION_FILTER=Ontario,Manitoba");
|
||||
demo.info("ENABLE_LOCATION_CHECK=true");
|
||||
|
||||
demo.code("// Location validation examples");
|
||||
const testLocations = [
|
||||
{ location: "Toronto, Ontario, Canada", valid: true },
|
||||
{ location: "Vancouver, British Columbia, Canada", valid: false },
|
||||
{ location: "Calgary, Alberta, Canada", valid: false },
|
||||
{ location: "Winnipeg, Manitoba, Canada", valid: true },
|
||||
{ location: "New York, NY, USA", valid: false },
|
||||
];
|
||||
|
||||
testLocations.forEach(({ location, valid }) => {
|
||||
logger.location(`Checking location: ${location}`);
|
||||
if (valid) {
|
||||
logger.success(`✅ Location valid - post accepted`);
|
||||
} else {
|
||||
logger.warning(`❌ Location invalid - post rejected`);
|
||||
}
|
||||
});
|
||||
|
||||
await waitForEnter();
|
||||
}
|
||||
|
||||
async function demonstrateAIAnalysis() {
|
||||
demo.section("5. AI Analysis");
|
||||
demo.info(
|
||||
"Posts can be analyzed using local Ollama or OpenAI for relevance scoring."
|
||||
);
|
||||
|
||||
demo.code("// AI analysis configuration");
|
||||
demo.info("ENABLE_LOCAL_AI=true");
|
||||
demo.info('AI_CONTEXT="job layoffs and workforce reduction"');
|
||||
demo.info("OLLAMA_MODEL=mistral");
|
||||
|
||||
demo.code("// Analyzing posts with AI");
|
||||
logger.ai("Starting AI analysis of accepted posts...");
|
||||
|
||||
for (let i = 0; i < mockPosts.length; i++) {
|
||||
const post = mockPosts[i];
|
||||
logger.info(`Analyzing post ${i + 1}: ${post.content.substring(0, 50)}...`);
|
||||
|
||||
// Simulate AI analysis
|
||||
await simulateProcessing();
|
||||
|
||||
const relevanceScore = 0.7 + Math.random() * 0.3;
|
||||
const confidence = 0.8 + Math.random() * 0.2;
|
||||
|
||||
logger.success(
|
||||
`Relevance: ${relevanceScore.toFixed(
|
||||
2
|
||||
)}, Confidence: ${confidence.toFixed(2)}`
|
||||
);
|
||||
|
||||
// Add AI analysis to post
|
||||
post.ai_analysis = {
|
||||
relevance_score: relevanceScore,
|
||||
confidence: confidence,
|
||||
context_match: relevanceScore > 0.7,
|
||||
analysis_text: `This post discusses ${post.metadata.search_keyword} and is relevant to the search context.`,
|
||||
};
|
||||
}
|
||||
|
||||
await waitForEnter();
|
||||
}
|
||||
|
||||
async function demonstrateOutputGeneration() {
|
||||
demo.section("6. Output Generation");
|
||||
demo.info("Results are saved to JSON files with comprehensive metadata.");
|
||||
|
||||
demo.code("// Generating output file");
|
||||
logger.file("Saving results to JSON file...");
|
||||
|
||||
const outputData = {
|
||||
metadata: {
|
||||
timestamp: new Date().toISOString(),
|
||||
keywords: ["layoff", "downsizing", "open to work"],
|
||||
city: "Toronto",
|
||||
date_posted: "past-week",
|
||||
sort_by: "date_posted",
|
||||
total_posts_found: 150,
|
||||
accepted_posts: mockPosts.length,
|
||||
rejected_posts: 147,
|
||||
processing_time_seconds: 180,
|
||||
},
|
||||
posts: mockPosts,
|
||||
};
|
||||
|
||||
// Save to demo file
|
||||
const outputPath = path.join(__dirname, "demo-results.json");
|
||||
fs.writeFileSync(outputPath, JSON.stringify(outputData, null, 2));
|
||||
|
||||
demo.success(`Results saved to: ${outputPath}`);
|
||||
demo.info(`Total posts processed: ${outputData.metadata.total_posts_found}`);
|
||||
demo.info(`Posts accepted: ${outputData.metadata.accepted_posts}`);
|
||||
demo.info(`Posts rejected: ${outputData.metadata.rejected_posts}`);
|
||||
|
||||
demo.code("// Output file structure");
|
||||
demo.info("📁 demo-results.json");
|
||||
demo.info(" ├── metadata");
|
||||
demo.info(" │ ├── timestamp");
|
||||
demo.info(" │ ├── keywords");
|
||||
demo.info(" │ ├── city");
|
||||
demo.info(" │ ├── total_posts_found");
|
||||
demo.info(" │ ├── accepted_posts");
|
||||
demo.info(" │ └── processing_time_seconds");
|
||||
demo.info(" └── posts[]");
|
||||
demo.info(" ├── id");
|
||||
demo.info(" ├── content");
|
||||
demo.info(" ├── author");
|
||||
demo.info(" ├── engagement");
|
||||
demo.info(" ├── ai_analysis");
|
||||
demo.info(" └── metadata");
|
||||
|
||||
await waitForEnter();
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
function waitForEnter() {
|
||||
return new Promise((resolve) => {
|
||||
const readline = require("readline");
|
||||
const rl = readline.createInterface({
|
||||
input: process.stdin,
|
||||
output: process.stdout,
|
||||
});
|
||||
|
||||
rl.question("\nPress Enter to continue...", () => {
|
||||
rl.close();
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function simulateSearch() {
|
||||
return new Promise((resolve) => {
|
||||
const steps = [
|
||||
"Launching browser",
|
||||
"Logging in",
|
||||
"Navigating to search",
|
||||
"Loading results",
|
||||
];
|
||||
let i = 0;
|
||||
const interval = setInterval(() => {
|
||||
if (i < steps.length) {
|
||||
logger.info(steps[i]);
|
||||
i++;
|
||||
} else {
|
||||
clearInterval(interval);
|
||||
resolve();
|
||||
}
|
||||
}, 800);
|
||||
});
|
||||
}
|
||||
|
||||
async function simulateProcessing() {
|
||||
return new Promise((resolve) => {
|
||||
const dots = [".", "..", "..."];
|
||||
let i = 0;
|
||||
const interval = setInterval(() => {
|
||||
process.stdout.write(`\rProcessing${dots[i]}`);
|
||||
i = (i + 1) % dots.length;
|
||||
}, 500);
|
||||
|
||||
setTimeout(() => {
|
||||
clearInterval(interval);
|
||||
process.stdout.write("\r");
|
||||
resolve();
|
||||
}, 1500);
|
||||
});
|
||||
}
|
||||
|
||||
// Run the demo if this file is executed directly
|
||||
if (require.main === module) {
|
||||
runDemo().catch((error) => {
|
||||
demo.error(`Demo failed: ${error.message}`);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = { runDemo };
|
||||
|
||||
@ -1,51 +1,51 @@
|
||||
keyword
|
||||
acquisition
|
||||
actively seeking
|
||||
bankruptcy
|
||||
business realignment
|
||||
career transition
|
||||
company closure
|
||||
company reorganization
|
||||
cost cutting
|
||||
department closure
|
||||
downsizing
|
||||
furlough
|
||||
headcount reduction
|
||||
hiring
|
||||
hiring freeze
|
||||
involuntary separation
|
||||
job cuts
|
||||
job elimination
|
||||
job loss
|
||||
job opportunity
|
||||
job search
|
||||
layoff
|
||||
looking for opportunities
|
||||
mass layoff
|
||||
merger
|
||||
new position
|
||||
new role
|
||||
office closure
|
||||
open to work
|
||||
organizational change
|
||||
outplacement
|
||||
plant closure
|
||||
position elimination
|
||||
recruiting
|
||||
reduction in force
|
||||
redundancies
|
||||
redundancy
|
||||
restructuring
|
||||
rightsizing
|
||||
RIF
|
||||
role elimination
|
||||
separation
|
||||
site closure
|
||||
staff reduction
|
||||
terminated
|
||||
termination
|
||||
voluntary separation
|
||||
workforce adjustment
|
||||
workforce optimization
|
||||
workforce reduction
|
||||
workforce transition
|
||||
keyword
|
||||
acquisition
|
||||
actively seeking
|
||||
bankruptcy
|
||||
business realignment
|
||||
career transition
|
||||
company closure
|
||||
company reorganization
|
||||
cost cutting
|
||||
department closure
|
||||
downsizing
|
||||
furlough
|
||||
headcount reduction
|
||||
hiring
|
||||
hiring freeze
|
||||
involuntary separation
|
||||
job cuts
|
||||
job elimination
|
||||
job loss
|
||||
job opportunity
|
||||
job search
|
||||
layoff
|
||||
looking for opportunities
|
||||
mass layoff
|
||||
merger
|
||||
new position
|
||||
new role
|
||||
office closure
|
||||
open to work
|
||||
organizational change
|
||||
outplacement
|
||||
plant closure
|
||||
position elimination
|
||||
recruiting
|
||||
reduction in force
|
||||
redundancies
|
||||
redundancy
|
||||
restructuring
|
||||
rightsizing
|
||||
RIF
|
||||
role elimination
|
||||
separation
|
||||
site closure
|
||||
staff reduction
|
||||
terminated
|
||||
termination
|
||||
voluntary separation
|
||||
workforce adjustment
|
||||
workforce optimization
|
||||
workforce reduction
|
||||
workforce transition
|
||||
|
||||
|
@ -1,230 +1,230 @@
|
||||
/**
|
||||
* LinkedIn Parsing Strategy
|
||||
*
|
||||
* Uses core-parser for browser management and ai-analyzer for utilities
|
||||
*/
|
||||
|
||||
const {
|
||||
logger,
|
||||
cleanText,
|
||||
containsAnyKeyword,
|
||||
validateLocationAgainstFilters,
|
||||
extractLocationFromProfile,
|
||||
} = require("ai-analyzer");
|
||||
|
||||
/**
|
||||
* LinkedIn parsing strategy function
|
||||
*/
|
||||
async function linkedinStrategy(coreParser, options = {}) {
|
||||
const {
|
||||
keywords = ["layoff", "downsizing", "job cuts"],
|
||||
locationFilter = null,
|
||||
maxResults = 50,
|
||||
credentials = {},
|
||||
} = options;
|
||||
|
||||
const results = [];
|
||||
const rejectedResults = [];
|
||||
const seenPosts = new Set();
|
||||
const seenProfiles = new Set();
|
||||
|
||||
try {
|
||||
// Create main page
|
||||
const page = await coreParser.createPage("linkedin-main");
|
||||
|
||||
// Authenticate to LinkedIn
|
||||
logger.info("🔐 Authenticating to LinkedIn...");
|
||||
await coreParser.authenticate("linkedin", credentials, "linkedin-main");
|
||||
logger.info("✅ LinkedIn authentication successful");
|
||||
|
||||
// Search for posts with each keyword
|
||||
for (const keyword of keywords) {
|
||||
logger.info(`🔍 Searching LinkedIn for: "${keyword}"`);
|
||||
|
||||
const searchUrl = `https://www.linkedin.com/search/results/content/?keywords=${encodeURIComponent(
|
||||
keyword
|
||||
)}&sortBy=date_posted`;
|
||||
|
||||
await coreParser.navigateTo(searchUrl, {
|
||||
pageId: "linkedin-main",
|
||||
retries: 2,
|
||||
});
|
||||
|
||||
// Wait for search results
|
||||
const hasResults = await coreParser.navigationManager.navigateAndWaitFor(
|
||||
searchUrl,
|
||||
".search-results-container",
|
||||
{ pageId: "linkedin-main", timeout: 10000 }
|
||||
);
|
||||
|
||||
if (!hasResults) {
|
||||
logger.warning(`No search results found for keyword: ${keyword}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Extract posts from current page
|
||||
const posts = await extractPostsFromPage(page, keyword);
|
||||
|
||||
for (const post of posts) {
|
||||
// Skip duplicates
|
||||
if (seenPosts.has(post.postId)) continue;
|
||||
seenPosts.add(post.postId);
|
||||
|
||||
// Validate location if filtering enabled
|
||||
if (locationFilter) {
|
||||
const locationValid = validateLocationAgainstFilters(
|
||||
post.location || post.profileLocation,
|
||||
locationFilter
|
||||
);
|
||||
|
||||
if (!locationValid) {
|
||||
rejectedResults.push({
|
||||
...post,
|
||||
rejectionReason: "Location filter mismatch",
|
||||
});
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
results.push(post);
|
||||
|
||||
if (results.length >= maxResults) {
|
||||
logger.info(`📊 Reached maximum results limit: ${maxResults}`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (results.length >= maxResults) break;
|
||||
}
|
||||
|
||||
logger.info(
|
||||
`🎯 LinkedIn parsing completed: ${results.length} posts found, ${rejectedResults.length} rejected`
|
||||
);
|
||||
|
||||
return {
|
||||
results,
|
||||
rejectedResults,
|
||||
summary: {
|
||||
totalPosts: results.length,
|
||||
totalRejected: rejectedResults.length,
|
||||
keywords: keywords.join(", "),
|
||||
locationFilter,
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error(`❌ LinkedIn parsing failed: ${error.message}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract posts from current search results page
|
||||
*/
|
||||
async function extractPostsFromPage(page, keyword) {
|
||||
const posts = [];
|
||||
|
||||
try {
|
||||
// Get all post elements
|
||||
const postElements = await page.$$(".feed-shared-update-v2");
|
||||
|
||||
for (const postElement of postElements) {
|
||||
try {
|
||||
const post = await extractPostData(postElement, keyword);
|
||||
if (post) {
|
||||
posts.push(post);
|
||||
}
|
||||
} catch (error) {
|
||||
logger.warning(`Failed to extract post data: ${error.message}`);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Failed to extract posts from page: ${error.message}`);
|
||||
}
|
||||
|
||||
return posts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract data from individual post element
|
||||
*/
|
||||
async function extractPostData(postElement, keyword) {
|
||||
try {
|
||||
// Extract post ID
|
||||
const postId = (await postElement.getAttribute("data-urn")) || "";
|
||||
|
||||
// Extract author info
|
||||
const authorElement = await postElement.$(".feed-shared-actor__name");
|
||||
const authorName = authorElement
|
||||
? cleanText(await authorElement.textContent())
|
||||
: "";
|
||||
|
||||
const authorLinkElement = await postElement.$(".feed-shared-actor__name a");
|
||||
const authorUrl = authorLinkElement
|
||||
? await authorLinkElement.getAttribute("href")
|
||||
: "";
|
||||
|
||||
// Extract post content
|
||||
const contentElement = await postElement.$(".feed-shared-text");
|
||||
const content = contentElement
|
||||
? cleanText(await contentElement.textContent())
|
||||
: "";
|
||||
|
||||
// Extract timestamp
|
||||
const timeElement = await postElement.$(
|
||||
".feed-shared-actor__sub-description time"
|
||||
);
|
||||
const timestamp = timeElement
|
||||
? await timeElement.getAttribute("datetime")
|
||||
: "";
|
||||
|
||||
// Extract engagement metrics
|
||||
const likesElement = await postElement.$(".social-counts-reactions__count");
|
||||
const likesText = likesElement
|
||||
? cleanText(await likesElement.textContent())
|
||||
: "0";
|
||||
|
||||
const commentsElement = await postElement.$(
|
||||
".social-counts-comments__count"
|
||||
);
|
||||
const commentsText = commentsElement
|
||||
? cleanText(await commentsElement.textContent())
|
||||
: "0";
|
||||
|
||||
// Check if post contains relevant keywords
|
||||
const isRelevant = containsAnyKeyword(content, [keyword]);
|
||||
|
||||
if (!isRelevant) {
|
||||
return null; // Skip irrelevant posts
|
||||
}
|
||||
|
||||
return {
|
||||
postId: cleanText(postId),
|
||||
authorName,
|
||||
authorUrl,
|
||||
content,
|
||||
timestamp,
|
||||
keyword,
|
||||
likes: extractNumber(likesText),
|
||||
comments: extractNumber(commentsText),
|
||||
extractedAt: new Date().toISOString(),
|
||||
source: "linkedin",
|
||||
};
|
||||
} catch (error) {
|
||||
logger.warning(`Error extracting post data: ${error.message}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract numbers from text (e.g., "15 likes" -> 15)
|
||||
*/
|
||||
function extractNumber(text) {
|
||||
const match = text.match(/\d+/);
|
||||
return match ? parseInt(match[0]) : 0;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
linkedinStrategy,
|
||||
extractPostsFromPage,
|
||||
extractPostData,
|
||||
};
|
||||
/**
|
||||
* LinkedIn Parsing Strategy
|
||||
*
|
||||
* Uses core-parser for browser management and ai-analyzer for utilities
|
||||
*/
|
||||
|
||||
const {
|
||||
logger,
|
||||
cleanText,
|
||||
containsAnyKeyword,
|
||||
validateLocationAgainstFilters,
|
||||
extractLocationFromProfile,
|
||||
} = require("ai-analyzer");
|
||||
|
||||
/**
|
||||
* LinkedIn parsing strategy function
|
||||
*/
|
||||
async function linkedinStrategy(coreParser, options = {}) {
|
||||
const {
|
||||
keywords = ["layoff", "downsizing", "job cuts"],
|
||||
locationFilter = null,
|
||||
maxResults = 50,
|
||||
credentials = {},
|
||||
} = options;
|
||||
|
||||
const results = [];
|
||||
const rejectedResults = [];
|
||||
const seenPosts = new Set();
|
||||
const seenProfiles = new Set();
|
||||
|
||||
try {
|
||||
// Create main page
|
||||
const page = await coreParser.createPage("linkedin-main");
|
||||
|
||||
// Authenticate to LinkedIn
|
||||
logger.info("🔐 Authenticating to LinkedIn...");
|
||||
await coreParser.authenticate("linkedin", credentials, "linkedin-main");
|
||||
logger.info("✅ LinkedIn authentication successful");
|
||||
|
||||
// Search for posts with each keyword
|
||||
for (const keyword of keywords) {
|
||||
logger.info(`🔍 Searching LinkedIn for: "${keyword}"`);
|
||||
|
||||
const searchUrl = `https://www.linkedin.com/search/results/content/?keywords=${encodeURIComponent(
|
||||
keyword
|
||||
)}&sortBy=date_posted`;
|
||||
|
||||
await coreParser.navigateTo(searchUrl, {
|
||||
pageId: "linkedin-main",
|
||||
retries: 2,
|
||||
});
|
||||
|
||||
// Wait for search results
|
||||
const hasResults = await coreParser.navigationManager.navigateAndWaitFor(
|
||||
searchUrl,
|
||||
".search-results-container",
|
||||
{ pageId: "linkedin-main", timeout: 10000 }
|
||||
);
|
||||
|
||||
if (!hasResults) {
|
||||
logger.warning(`No search results found for keyword: ${keyword}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Extract posts from current page
|
||||
const posts = await extractPostsFromPage(page, keyword);
|
||||
|
||||
for (const post of posts) {
|
||||
// Skip duplicates
|
||||
if (seenPosts.has(post.postId)) continue;
|
||||
seenPosts.add(post.postId);
|
||||
|
||||
// Validate location if filtering enabled
|
||||
if (locationFilter) {
|
||||
const locationValid = validateLocationAgainstFilters(
|
||||
post.location || post.profileLocation,
|
||||
locationFilter
|
||||
);
|
||||
|
||||
if (!locationValid) {
|
||||
rejectedResults.push({
|
||||
...post,
|
||||
rejectionReason: "Location filter mismatch",
|
||||
});
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
results.push(post);
|
||||
|
||||
if (results.length >= maxResults) {
|
||||
logger.info(`📊 Reached maximum results limit: ${maxResults}`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (results.length >= maxResults) break;
|
||||
}
|
||||
|
||||
logger.info(
|
||||
`🎯 LinkedIn parsing completed: ${results.length} posts found, ${rejectedResults.length} rejected`
|
||||
);
|
||||
|
||||
return {
|
||||
results,
|
||||
rejectedResults,
|
||||
summary: {
|
||||
totalPosts: results.length,
|
||||
totalRejected: rejectedResults.length,
|
||||
keywords: keywords.join(", "),
|
||||
locationFilter,
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error(`❌ LinkedIn parsing failed: ${error.message}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract posts from current search results page
|
||||
*/
|
||||
async function extractPostsFromPage(page, keyword) {
|
||||
const posts = [];
|
||||
|
||||
try {
|
||||
// Get all post elements
|
||||
const postElements = await page.$$(".feed-shared-update-v2");
|
||||
|
||||
for (const postElement of postElements) {
|
||||
try {
|
||||
const post = await extractPostData(postElement, keyword);
|
||||
if (post) {
|
||||
posts.push(post);
|
||||
}
|
||||
} catch (error) {
|
||||
logger.warning(`Failed to extract post data: ${error.message}`);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Failed to extract posts from page: ${error.message}`);
|
||||
}
|
||||
|
||||
return posts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract data from individual post element
|
||||
*/
|
||||
async function extractPostData(postElement, keyword) {
|
||||
try {
|
||||
// Extract post ID
|
||||
const postId = (await postElement.getAttribute("data-urn")) || "";
|
||||
|
||||
// Extract author info
|
||||
const authorElement = await postElement.$(".feed-shared-actor__name");
|
||||
const authorName = authorElement
|
||||
? cleanText(await authorElement.textContent())
|
||||
: "";
|
||||
|
||||
const authorLinkElement = await postElement.$(".feed-shared-actor__name a");
|
||||
const authorUrl = authorLinkElement
|
||||
? await authorLinkElement.getAttribute("href")
|
||||
: "";
|
||||
|
||||
// Extract post content
|
||||
const contentElement = await postElement.$(".feed-shared-text");
|
||||
const content = contentElement
|
||||
? cleanText(await contentElement.textContent())
|
||||
: "";
|
||||
|
||||
// Extract timestamp
|
||||
const timeElement = await postElement.$(
|
||||
".feed-shared-actor__sub-description time"
|
||||
);
|
||||
const timestamp = timeElement
|
||||
? await timeElement.getAttribute("datetime")
|
||||
: "";
|
||||
|
||||
// Extract engagement metrics
|
||||
const likesElement = await postElement.$(".social-counts-reactions__count");
|
||||
const likesText = likesElement
|
||||
? cleanText(await likesElement.textContent())
|
||||
: "0";
|
||||
|
||||
const commentsElement = await postElement.$(
|
||||
".social-counts-comments__count"
|
||||
);
|
||||
const commentsText = commentsElement
|
||||
? cleanText(await commentsElement.textContent())
|
||||
: "0";
|
||||
|
||||
// Check if post contains relevant keywords
|
||||
const isRelevant = containsAnyKeyword(content, [keyword]);
|
||||
|
||||
if (!isRelevant) {
|
||||
return null; // Skip irrelevant posts
|
||||
}
|
||||
|
||||
return {
|
||||
postId: cleanText(postId),
|
||||
authorName,
|
||||
authorUrl,
|
||||
content,
|
||||
timestamp,
|
||||
keyword,
|
||||
likes: extractNumber(likesText),
|
||||
comments: extractNumber(commentsText),
|
||||
extractedAt: new Date().toISOString(),
|
||||
source: "linkedin",
|
||||
};
|
||||
} catch (error) {
|
||||
logger.warning(`Error extracting post data: ${error.message}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract numbers from text (e.g., "15 likes" -> 15)
|
||||
*/
|
||||
function extractNumber(text) {
|
||||
const match = text.match(/\d+/);
|
||||
return match ? parseInt(match[0]) : 0;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
linkedinStrategy,
|
||||
extractPostsFromPage,
|
||||
extractPostData,
|
||||
};
|
||||
|
||||
@ -1,34 +1,34 @@
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"text": "Just got laid off from my software engineering role. Looking for new opportunities in the Toronto area.",
|
||||
"location": "Toronto, Ontario, Canada",
|
||||
"keyword": "layoff",
|
||||
"timestamp": "2024-01-15T10:30:00Z"
|
||||
},
|
||||
{
|
||||
"text": "Excited to share that I'm starting a new position as a Senior Developer at TechCorp!",
|
||||
"location": "Vancouver, BC, Canada",
|
||||
"keyword": "hiring",
|
||||
"timestamp": "2024-01-15T11:00:00Z"
|
||||
},
|
||||
{
|
||||
"text": "Our company is going through a restructuring and unfortunately had to let go of 50 employees.",
|
||||
"location": "Montreal, Quebec, Canada",
|
||||
"keyword": "layoff",
|
||||
"timestamp": "2024-01-15T11:30:00Z"
|
||||
},
|
||||
{
|
||||
"text": "Beautiful weather today! Perfect for a walk in the park.",
|
||||
"location": "Calgary, Alberta, Canada",
|
||||
"keyword": "weather",
|
||||
"timestamp": "2024-01-15T12:00:00Z"
|
||||
},
|
||||
{
|
||||
"text": "We're hiring! Looking for talented developers to join our growing team.",
|
||||
"location": "Ottawa, Ontario, Canada",
|
||||
"keyword": "hiring",
|
||||
"timestamp": "2024-01-15T12:30:00Z"
|
||||
}
|
||||
]
|
||||
}
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"text": "Just got laid off from my software engineering role. Looking for new opportunities in the Toronto area.",
|
||||
"location": "Toronto, Ontario, Canada",
|
||||
"keyword": "layoff",
|
||||
"timestamp": "2024-01-15T10:30:00Z"
|
||||
},
|
||||
{
|
||||
"text": "Excited to share that I'm starting a new position as a Senior Developer at TechCorp!",
|
||||
"location": "Vancouver, BC, Canada",
|
||||
"keyword": "hiring",
|
||||
"timestamp": "2024-01-15T11:00:00Z"
|
||||
},
|
||||
{
|
||||
"text": "Our company is going through a restructuring and unfortunately had to let go of 50 employees.",
|
||||
"location": "Montreal, Quebec, Canada",
|
||||
"keyword": "layoff",
|
||||
"timestamp": "2024-01-15T11:30:00Z"
|
||||
},
|
||||
{
|
||||
"text": "Beautiful weather today! Perfect for a walk in the park.",
|
||||
"location": "Calgary, Alberta, Canada",
|
||||
"keyword": "weather",
|
||||
"timestamp": "2024-01-15T12:00:00Z"
|
||||
},
|
||||
{
|
||||
"text": "We're hiring! Looking for talented developers to join our growing team.",
|
||||
"location": "Ottawa, Ontario, Canada",
|
||||
"keyword": "hiring",
|
||||
"timestamp": "2024-01-15T12:30:00Z"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user