update CoreParser to increase default timeout and change navigation waitUntil option to networkidle

This commit is contained in:
ilia 2025-12-12 12:18:48 -05:00
parent ef9720abf2
commit 83ed86668e
24 changed files with 9137 additions and 9094 deletions

View File

@ -1,250 +1,250 @@
#!/usr/bin/env node
/**
* AI Analyzer CLI
*
* Command-line interface for the ai-analyzer package
* Can be used by any parser to analyze JSON files
*/
const fs = require("fs");
const path = require("path");
// Import AI utilities from this package
const {
logger,
analyzeBatch,
checkOllamaStatus,
findLatestResultsFile,
} = require("./index");
// Default configuration
const DEFAULT_CONTEXT =
process.env.AI_CONTEXT || "job market analysis and trends";
const DEFAULT_MODEL = process.env.OLLAMA_MODEL || "mistral";
const DEFAULT_RESULTS_DIR = "results";
// Parse command line arguments
const args = process.argv.slice(2);
let inputFile = null;
let outputFile = null;
let context = DEFAULT_CONTEXT;
let model = DEFAULT_MODEL;
let findLatest = false;
let resultsDir = DEFAULT_RESULTS_DIR;
for (const arg of args) {
if (arg.startsWith("--input=")) {
inputFile = arg.split("=")[1];
} else if (arg.startsWith("--output=")) {
outputFile = arg.split("=")[1];
} else if (arg.startsWith("--context=")) {
context = arg.split("=")[1];
} else if (arg.startsWith("--model=")) {
model = arg.split("=")[1];
} else if (arg.startsWith("--dir=")) {
resultsDir = arg.split("=")[1];
} else if (arg === "--latest") {
findLatest = true;
} else if (arg === "--help" || arg === "-h") {
console.log(`
AI Analyzer CLI
Usage: node cli.js [options]
Options:
--input=FILE Input JSON file
--output=FILE Output file (default: ai-analysis-{timestamp}.json)
--context="description" Analysis context (default: "${DEFAULT_CONTEXT}")
--model=MODEL Ollama model (default: ${DEFAULT_MODEL})
--latest Use latest results file from results directory
--dir=PATH Directory to look for results (default: 'results')
--help, -h Show this help
Examples:
node cli.js --input=results.json
node cli.js --latest --dir=results
node cli.js --input=results.json --context="job trends" --model=mistral
Environment Variables:
AI_CONTEXT Default analysis context
OLLAMA_MODEL Default Ollama model
`);
process.exit(0);
}
}
async function main() {
try {
// Determine input file
if (findLatest) {
try {
inputFile = findLatestResultsFile(resultsDir);
logger.info(`Found latest results file: ${inputFile}`);
} catch (error) {
logger.error(
`❌ No results files found in '${resultsDir}': ${error.message}`
);
logger.info(`💡 To create results files:`);
logger.info(
` 1. Run a parser first (e.g., npm start in linkedin-parser)`
);
logger.info(` 2. Or provide a specific file with --input=FILE`);
logger.info(` 3. Or create a sample JSON file to test with`);
process.exit(1);
}
}
// If inputFile is a relative path and --dir is set, resolve it
if (inputFile && !path.isAbsolute(inputFile) && !fs.existsSync(inputFile)) {
const candidate = path.join(resultsDir, inputFile);
if (fs.existsSync(candidate)) {
inputFile = candidate;
}
}
if (!inputFile) {
logger.error("❌ Input file required. Use --input=FILE or --latest");
logger.info(`💡 Examples:`);
logger.info(` node cli.js --input=results.json`);
logger.info(` node cli.js --latest --dir=results`);
logger.info(` node cli.js --help`);
process.exit(1);
}
// Load input file
logger.step(`Loading input file: ${inputFile}`);
if (!fs.existsSync(inputFile)) {
throw new Error(`Input file not found: ${inputFile}`);
}
const data = JSON.parse(fs.readFileSync(inputFile, "utf-8"));
// Extract posts from different formats
let posts = [];
if (data.results && Array.isArray(data.results)) {
posts = data.results;
logger.info(`Found ${posts.length} items in results array`);
} else if (Array.isArray(data)) {
posts = data;
logger.info(`Found ${posts.length} items in array`);
} else {
throw new Error("Invalid JSON format - need array or {results: [...]}");
}
if (posts.length === 0) {
throw new Error("No items found to analyze");
}
// Check AI availability
logger.step("Checking AI availability");
const aiAvailable = await checkOllamaStatus(model);
if (!aiAvailable) {
throw new Error(
`AI not available. Make sure Ollama is running and model '${model}' is installed.`
);
}
// Check if results already have AI analysis
const hasExistingAI = posts.some((post) => post.aiAnalysis);
if (hasExistingAI) {
logger.info(
`📋 Results already contain AI analysis - will update with new context`
);
}
// Prepare data for analysis
const analysisData = posts.map((post, i) => ({
text: post.text || post.content || post.post || "",
location: post.location || "Unknown",
keyword: post.keyword || "Unknown",
timestamp: post.timestamp || new Date().toISOString(),
}));
// Run analysis
logger.step(`Running AI analysis with context: "${context}"`);
const analysis = await analyzeBatch(analysisData, context, model);
// Integrate AI analysis back into the original results
const updatedPosts = posts.map((post, index) => {
const aiResult = analysis[index];
return {
...post,
aiAnalysis: {
isRelevant: aiResult.isRelevant,
confidence: aiResult.confidence,
reasoning: aiResult.reasoning,
context: context,
model: model,
analyzedAt: new Date().toISOString(),
},
};
});
// Update the original data structure
if (data.results && Array.isArray(data.results)) {
data.results = updatedPosts;
// Update metadata
data.metadata = data.metadata || {};
data.metadata.aiAnalysisUpdated = new Date().toISOString();
data.metadata.aiContext = context;
data.metadata.aiModel = model;
} else {
// If it's a simple array, create a proper structure
data = {
metadata: {
timestamp: new Date().toISOString(),
totalItems: updatedPosts.length,
aiContext: context,
aiModel: model,
analysisType: "cli",
},
results: updatedPosts,
};
}
// Generate output filename if not provided
if (!outputFile) {
// Use the original filename with -ai suffix
const originalName = path.basename(inputFile, path.extname(inputFile));
outputFile = path.join(
path.dirname(inputFile),
`${originalName}-ai.json`
);
}
// Save updated results back to file
fs.writeFileSync(outputFile, JSON.stringify(data, null, 2));
// Show summary
const relevant = analysis.filter((a) => a.isRelevant).length;
const irrelevant = analysis.filter((a) => !a.isRelevant).length;
const avgConfidence =
analysis.reduce((sum, a) => sum + a.confidence, 0) / analysis.length;
logger.success("✅ AI analysis completed and integrated");
logger.info(`📊 Context: "${context}"`);
logger.info(`📈 Total items analyzed: ${analysis.length}`);
logger.info(
`✅ Relevant items: ${relevant} (${(
(relevant / analysis.length) *
100
).toFixed(1)}%)`
);
logger.info(
`❌ Irrelevant items: ${irrelevant} (${(
(irrelevant / analysis.length) *
100
).toFixed(1)}%)`
);
logger.info(`🎯 Average confidence: ${avgConfidence.toFixed(2)}`);
logger.file(`🧠 Updated results saved to: ${outputFile}`);
} catch (error) {
logger.error(`❌ Analysis failed: ${error.message}`);
process.exit(1);
}
}
// Run the CLI
main();
#!/usr/bin/env node
/**
* AI Analyzer CLI
*
* Command-line interface for the ai-analyzer package
* Can be used by any parser to analyze JSON files
*/
const fs = require("fs");
const path = require("path");
// Import AI utilities from this package
const {
logger,
analyzeBatch,
checkOllamaStatus,
findLatestResultsFile,
} = require("./index");
// Default configuration
const DEFAULT_CONTEXT =
process.env.AI_CONTEXT || "job market analysis and trends";
const DEFAULT_MODEL = process.env.OLLAMA_MODEL || "mistral";
const DEFAULT_RESULTS_DIR = "results";
// Parse command line arguments
const args = process.argv.slice(2);
let inputFile = null;
let outputFile = null;
let context = DEFAULT_CONTEXT;
let model = DEFAULT_MODEL;
let findLatest = false;
let resultsDir = DEFAULT_RESULTS_DIR;
for (const arg of args) {
if (arg.startsWith("--input=")) {
inputFile = arg.split("=")[1];
} else if (arg.startsWith("--output=")) {
outputFile = arg.split("=")[1];
} else if (arg.startsWith("--context=")) {
context = arg.split("=")[1];
} else if (arg.startsWith("--model=")) {
model = arg.split("=")[1];
} else if (arg.startsWith("--dir=")) {
resultsDir = arg.split("=")[1];
} else if (arg === "--latest") {
findLatest = true;
} else if (arg === "--help" || arg === "-h") {
console.log(`
AI Analyzer CLI
Usage: node cli.js [options]
Options:
--input=FILE Input JSON file
--output=FILE Output file (default: ai-analysis-{timestamp}.json)
--context="description" Analysis context (default: "${DEFAULT_CONTEXT}")
--model=MODEL Ollama model (default: ${DEFAULT_MODEL})
--latest Use latest results file from results directory
--dir=PATH Directory to look for results (default: 'results')
--help, -h Show this help
Examples:
node cli.js --input=results.json
node cli.js --latest --dir=results
node cli.js --input=results.json --context="job trends" --model=mistral
Environment Variables:
AI_CONTEXT Default analysis context
OLLAMA_MODEL Default Ollama model
`);
process.exit(0);
}
}
async function main() {
try {
// Determine input file
if (findLatest) {
try {
inputFile = findLatestResultsFile(resultsDir);
logger.info(`Found latest results file: ${inputFile}`);
} catch (error) {
logger.error(
`❌ No results files found in '${resultsDir}': ${error.message}`
);
logger.info(`💡 To create results files:`);
logger.info(
` 1. Run a parser first (e.g., npm start in linkedin-parser)`
);
logger.info(` 2. Or provide a specific file with --input=FILE`);
logger.info(` 3. Or create a sample JSON file to test with`);
process.exit(1);
}
}
// If inputFile is a relative path and --dir is set, resolve it
if (inputFile && !path.isAbsolute(inputFile) && !fs.existsSync(inputFile)) {
const candidate = path.join(resultsDir, inputFile);
if (fs.existsSync(candidate)) {
inputFile = candidate;
}
}
if (!inputFile) {
logger.error("❌ Input file required. Use --input=FILE or --latest");
logger.info(`💡 Examples:`);
logger.info(` node cli.js --input=results.json`);
logger.info(` node cli.js --latest --dir=results`);
logger.info(` node cli.js --help`);
process.exit(1);
}
// Load input file
logger.step(`Loading input file: ${inputFile}`);
if (!fs.existsSync(inputFile)) {
throw new Error(`Input file not found: ${inputFile}`);
}
const data = JSON.parse(fs.readFileSync(inputFile, "utf-8"));
// Extract posts from different formats
let posts = [];
if (data.results && Array.isArray(data.results)) {
posts = data.results;
logger.info(`Found ${posts.length} items in results array`);
} else if (Array.isArray(data)) {
posts = data;
logger.info(`Found ${posts.length} items in array`);
} else {
throw new Error("Invalid JSON format - need array or {results: [...]}");
}
if (posts.length === 0) {
throw new Error("No items found to analyze");
}
// Check AI availability
logger.step("Checking AI availability");
const aiAvailable = await checkOllamaStatus(model);
if (!aiAvailable) {
throw new Error(
`AI not available. Make sure Ollama is running and model '${model}' is installed.`
);
}
// Check if results already have AI analysis
const hasExistingAI = posts.some((post) => post.aiAnalysis);
if (hasExistingAI) {
logger.info(
`📋 Results already contain AI analysis - will update with new context`
);
}
// Prepare data for analysis
const analysisData = posts.map((post, i) => ({
text: post.text || post.content || post.post || "",
location: post.location || "Unknown",
keyword: post.keyword || "Unknown",
timestamp: post.timestamp || new Date().toISOString(),
}));
// Run analysis
logger.step(`Running AI analysis with context: "${context}"`);
const analysis = await analyzeBatch(analysisData, context, model);
// Integrate AI analysis back into the original results
const updatedPosts = posts.map((post, index) => {
const aiResult = analysis[index];
return {
...post,
aiAnalysis: {
isRelevant: aiResult.isRelevant,
confidence: aiResult.confidence,
reasoning: aiResult.reasoning,
context: context,
model: model,
analyzedAt: new Date().toISOString(),
},
};
});
// Update the original data structure
if (data.results && Array.isArray(data.results)) {
data.results = updatedPosts;
// Update metadata
data.metadata = data.metadata || {};
data.metadata.aiAnalysisUpdated = new Date().toISOString();
data.metadata.aiContext = context;
data.metadata.aiModel = model;
} else {
// If it's a simple array, create a proper structure
data = {
metadata: {
timestamp: new Date().toISOString(),
totalItems: updatedPosts.length,
aiContext: context,
aiModel: model,
analysisType: "cli",
},
results: updatedPosts,
};
}
// Generate output filename if not provided
if (!outputFile) {
// Use the original filename with -ai suffix
const originalName = path.basename(inputFile, path.extname(inputFile));
outputFile = path.join(
path.dirname(inputFile),
`${originalName}-ai.json`
);
}
// Save updated results back to file
fs.writeFileSync(outputFile, JSON.stringify(data, null, 2));
// Show summary
const relevant = analysis.filter((a) => a.isRelevant).length;
const irrelevant = analysis.filter((a) => !a.isRelevant).length;
const avgConfidence =
analysis.reduce((sum, a) => sum + a.confidence, 0) / analysis.length;
logger.success("✅ AI analysis completed and integrated");
logger.info(`📊 Context: "${context}"`);
logger.info(`📈 Total items analyzed: ${analysis.length}`);
logger.info(
`✅ Relevant items: ${relevant} (${(
(relevant / analysis.length) *
100
).toFixed(1)}%)`
);
logger.info(
`❌ Irrelevant items: ${irrelevant} (${(
(irrelevant / analysis.length) *
100
).toFixed(1)}%)`
);
logger.info(`🎯 Average confidence: ${avgConfidence.toFixed(2)}`);
logger.file(`🧠 Updated results saved to: ${outputFile}`);
} catch (error) {
logger.error(`❌ Analysis failed: ${error.message}`);
process.exit(1);
}
}
// Run the CLI
main();

View File

@ -1,346 +1,346 @@
/**
* AI Analyzer Demo
*
* Demonstrates all the core utilities provided by the ai-analyzer package:
* - Logger functionality
* - Text processing utilities
* - Location validation
* - AI analysis capabilities
* - Test utilities
*/
const {
logger,
Logger,
cleanText,
containsAnyKeyword,
parseLocationFilters,
validateLocationAgainstFilters,
extractLocationFromProfile,
analyzeBatch,
} = require("./index");
// Terminal colors for demo output
const colors = {
reset: "\x1b[0m",
bright: "\x1b[1m",
cyan: "\x1b[36m",
green: "\x1b[32m",
yellow: "\x1b[33m",
blue: "\x1b[34m",
magenta: "\x1b[35m",
red: "\x1b[31m",
};
const demo = {
title: (text) =>
console.log(`\n${colors.bright}${colors.cyan}${text}${colors.reset}`),
section: (text) =>
console.log(`\n${colors.bright}${colors.magenta}${text}${colors.reset}`),
success: (text) => console.log(`${colors.green}${text}${colors.reset}`),
info: (text) => console.log(`${colors.blue} ${text}${colors.reset}`),
warning: (text) => console.log(`${colors.yellow}⚠️ ${text}${colors.reset}`),
error: (text) => console.log(`${colors.red}${text}${colors.reset}`),
code: (text) => console.log(`${colors.cyan}${text}${colors.reset}`),
};
async function runDemo() {
demo.title("=== AI Analyzer Demo ===");
demo.info(
"This demo showcases all the core utilities provided by the ai-analyzer package."
);
demo.info("Press Enter to continue through each section...\n");
await waitForEnter();
// 1. Logger Demo
await demonstrateLogger();
// 2. Text Processing Demo
await demonstrateTextProcessing();
// 3. Location Validation Demo
await demonstrateLocationValidation();
// 4. AI Analysis Demo
await demonstrateAIAnalysis();
// 5. Integration Demo
await demonstrateIntegration();
demo.title("=== Demo Complete ===");
demo.success("All ai-analyzer utilities demonstrated successfully!");
demo.info("Check the README.md for detailed API documentation.");
}
async function demonstrateLogger() {
demo.section("1. Logger Utilities");
demo.info(
"The logger provides consistent logging across all parsers with configurable levels and color support."
);
demo.code("// Using default logger");
logger.info("This is an info message");
logger.warning("This is a warning message");
logger.error("This is an error message");
logger.success("This is a success message");
logger.debug("This is a debug message (if enabled)");
demo.code("// Convenience methods with emoji prefixes");
logger.step("Starting demo process");
logger.search("Searching for keywords");
logger.ai("Running AI analysis");
logger.location("Validating location");
logger.file("Saving results");
demo.code("// Custom logger configuration");
const customLogger = new Logger({
debug: false,
colors: true,
});
customLogger.info("Custom logger with debug disabled");
customLogger.debug("This won't show");
demo.code("// Silent mode");
const silentLogger = new Logger();
silentLogger.silent();
silentLogger.info("This won't show");
silentLogger.verbose(); // Re-enable all levels
await waitForEnter();
}
async function demonstrateTextProcessing() {
demo.section("2. Text Processing Utilities");
demo.info(
"Text utilities provide content cleaning and keyword matching capabilities."
);
const sampleTexts = [
"Check out this #awesome post! https://example.com 🚀",
"Just got #laidoff from my job. Looking for new opportunities!",
"Company is #downsizing and I'm affected. #RIF #layoff",
"Great news! We're #hiring new developers! 🎉",
];
demo.code("// Text cleaning examples:");
sampleTexts.forEach((text, index) => {
const cleaned = cleanText(text);
demo.info(`Original: ${text}`);
demo.success(`Cleaned: ${cleaned}`);
console.log();
});
demo.code("// Keyword matching:");
const keywords = ["layoff", "downsizing", "RIF", "hiring"];
sampleTexts.forEach((text, index) => {
const hasMatch = containsAnyKeyword(text, keywords);
const matchedKeywords = keywords.filter((keyword) =>
text.toLowerCase().includes(keyword.toLowerCase())
);
demo.info(
`Text ${index + 1}: ${hasMatch ? "✅" : "❌"} ${
matchedKeywords.join(", ") || "No matches"
}`
);
});
await waitForEnter();
}
async function demonstrateLocationValidation() {
demo.section("3. Location Validation Utilities");
demo.info(
"Location utilities provide geographic filtering and validation capabilities."
);
demo.code("// Location filter parsing:");
const filterStrings = [
"Ontario,Manitoba",
"Toronto,Vancouver",
"British Columbia,Alberta",
"Canada",
];
filterStrings.forEach((filterString) => {
const filters = parseLocationFilters(filterString);
demo.info(`Filter: "${filterString}"`);
demo.success(`Parsed: [${filters.join(", ")}]`);
console.log();
});
demo.code("// Location validation examples:");
const testLocations = [
{ location: "Toronto, Ontario, Canada", filters: ["Ontario"] },
{ location: "Vancouver, BC", filters: ["British Columbia"] },
{ location: "Calgary, Alberta", filters: ["Ontario"] },
{ location: "Montreal, Quebec", filters: ["Ontario", "Manitoba"] },
{ location: "New York, NY", filters: ["Ontario"] },
];
testLocations.forEach(({ location, filters }) => {
const isValid = validateLocationAgainstFilters(location, filters);
demo.info(`Location: "${location}"`);
demo.info(`Filters: [${filters.join(", ")}]`);
demo.success(`Valid: ${isValid ? "✅ Yes" : "❌ No"}`);
console.log();
});
demo.code("// Profile location extraction:");
const profileTexts = [
"Software Engineer at Tech Corp • Toronto, Ontario",
"Product Manager • Vancouver, BC",
"Data Scientist • Remote",
"CEO at Startup Inc • Montreal, Quebec, Canada",
];
profileTexts.forEach((profileText) => {
const location = extractLocationFromProfile(profileText);
demo.info(`Profile: "${profileText}"`);
demo.success(`Extracted: "${location || "No location found"}"`);
console.log();
});
await waitForEnter();
}
async function demonstrateAIAnalysis() {
demo.section("4. AI Analysis Utilities");
demo.info(
"AI utilities provide content analysis using OpenAI or local Ollama models."
);
// Mock posts for demo
const mockPosts = [
{
id: "1",
content:
"Just got laid off from my software engineering role. Looking for new opportunities in Toronto.",
author: "John Doe",
location: "Toronto, Ontario",
},
{
id: "2",
content:
"Our company is downsizing and I'm affected. This is really tough news.",
author: "Jane Smith",
location: "Vancouver, BC",
},
{
id: "3",
content:
"We're hiring! Looking for talented developers to join our team.",
author: "Bob Wilson",
location: "Calgary, Alberta",
},
];
demo.code("// Mock AI analysis (simulated):");
demo.info("In a real scenario, this would call Ollama or OpenAI API");
mockPosts.forEach((post, index) => {
demo.info(`Post ${index + 1}: ${post.content.substring(0, 50)}...`);
demo.success(
`Analysis: Relevant to job layoffs (confidence: 0.${85 + index * 5})`
);
console.log();
});
demo.code("// Batch analysis simulation:");
demo.info("Processing batch of 3 posts...");
await simulateProcessing();
demo.success("Batch analysis completed!");
await waitForEnter();
}
async function demonstrateIntegration() {
demo.section("5. Integration Example");
demo.info("Here's how all utilities work together in a real scenario:");
const samplePost = {
id: "demo-1",
content:
"Just got #laidoff from my job at TechCorp! Looking for new opportunities in #Toronto. This is really tough but I'm staying positive! 🚀",
author: "Demo User",
location: "Toronto, Ontario, Canada",
};
demo.code("// Processing pipeline:");
// 1. Log the start
logger.step("Processing new post");
// 2. Clean the text
const cleanedContent = cleanText(samplePost.content);
logger.info(`Cleaned content: ${cleanedContent}`);
// 3. Check for keywords
const keywords = ["layoff", "downsizing", "RIF"];
const hasKeywords = containsAnyKeyword(cleanedContent, keywords);
logger.search(`Keyword match: ${hasKeywords ? "Found" : "Not found"}`);
// 4. Validate location
const locationFilters = parseLocationFilters("Ontario,Manitoba");
const isValidLocation = validateLocationAgainstFilters(
samplePost.location,
locationFilters
);
logger.location(`Location valid: ${isValidLocation ? "Yes" : "No"}`);
// 5. Simulate AI analysis
if (hasKeywords && isValidLocation) {
logger.ai("Running AI analysis...");
await simulateProcessing();
logger.success("Post accepted and analyzed!");
} else {
logger.warning("Post rejected - doesn't meet criteria");
}
await waitForEnter();
}
// Helper functions
function waitForEnter() {
return new Promise((resolve) => {
const readline = require("readline");
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
});
rl.question("\nPress Enter to continue...", () => {
rl.close();
resolve();
});
});
}
async function simulateProcessing() {
return new Promise((resolve) => {
const dots = [".", "..", "..."];
let i = 0;
const interval = setInterval(() => {
process.stdout.write(`\rProcessing${dots[i]}`);
i = (i + 1) % dots.length;
}, 500);
setTimeout(() => {
clearInterval(interval);
process.stdout.write("\r");
resolve();
}, 2000);
});
}
// Run the demo if this file is executed directly
if (require.main === module) {
runDemo().catch((error) => {
demo.error(`Demo failed: ${error.message}`);
process.exit(1);
});
}
module.exports = { runDemo };
/**
* AI Analyzer Demo
*
* Demonstrates all the core utilities provided by the ai-analyzer package:
* - Logger functionality
* - Text processing utilities
* - Location validation
* - AI analysis capabilities
* - Test utilities
*/
const {
logger,
Logger,
cleanText,
containsAnyKeyword,
parseLocationFilters,
validateLocationAgainstFilters,
extractLocationFromProfile,
analyzeBatch,
} = require("./index");
// Terminal colors for demo output
const colors = {
reset: "\x1b[0m",
bright: "\x1b[1m",
cyan: "\x1b[36m",
green: "\x1b[32m",
yellow: "\x1b[33m",
blue: "\x1b[34m",
magenta: "\x1b[35m",
red: "\x1b[31m",
};
const demo = {
title: (text) =>
console.log(`\n${colors.bright}${colors.cyan}${text}${colors.reset}`),
section: (text) =>
console.log(`\n${colors.bright}${colors.magenta}${text}${colors.reset}`),
success: (text) => console.log(`${colors.green}${text}${colors.reset}`),
info: (text) => console.log(`${colors.blue} ${text}${colors.reset}`),
warning: (text) => console.log(`${colors.yellow}⚠️ ${text}${colors.reset}`),
error: (text) => console.log(`${colors.red}${text}${colors.reset}`),
code: (text) => console.log(`${colors.cyan}${text}${colors.reset}`),
};
async function runDemo() {
demo.title("=== AI Analyzer Demo ===");
demo.info(
"This demo showcases all the core utilities provided by the ai-analyzer package."
);
demo.info("Press Enter to continue through each section...\n");
await waitForEnter();
// 1. Logger Demo
await demonstrateLogger();
// 2. Text Processing Demo
await demonstrateTextProcessing();
// 3. Location Validation Demo
await demonstrateLocationValidation();
// 4. AI Analysis Demo
await demonstrateAIAnalysis();
// 5. Integration Demo
await demonstrateIntegration();
demo.title("=== Demo Complete ===");
demo.success("All ai-analyzer utilities demonstrated successfully!");
demo.info("Check the README.md for detailed API documentation.");
}
async function demonstrateLogger() {
demo.section("1. Logger Utilities");
demo.info(
"The logger provides consistent logging across all parsers with configurable levels and color support."
);
demo.code("// Using default logger");
logger.info("This is an info message");
logger.warning("This is a warning message");
logger.error("This is an error message");
logger.success("This is a success message");
logger.debug("This is a debug message (if enabled)");
demo.code("// Convenience methods with emoji prefixes");
logger.step("Starting demo process");
logger.search("Searching for keywords");
logger.ai("Running AI analysis");
logger.location("Validating location");
logger.file("Saving results");
demo.code("// Custom logger configuration");
const customLogger = new Logger({
debug: false,
colors: true,
});
customLogger.info("Custom logger with debug disabled");
customLogger.debug("This won't show");
demo.code("// Silent mode");
const silentLogger = new Logger();
silentLogger.silent();
silentLogger.info("This won't show");
silentLogger.verbose(); // Re-enable all levels
await waitForEnter();
}
async function demonstrateTextProcessing() {
demo.section("2. Text Processing Utilities");
demo.info(
"Text utilities provide content cleaning and keyword matching capabilities."
);
const sampleTexts = [
"Check out this #awesome post! https://example.com 🚀",
"Just got #laidoff from my job. Looking for new opportunities!",
"Company is #downsizing and I'm affected. #RIF #layoff",
"Great news! We're #hiring new developers! 🎉",
];
demo.code("// Text cleaning examples:");
sampleTexts.forEach((text, index) => {
const cleaned = cleanText(text);
demo.info(`Original: ${text}`);
demo.success(`Cleaned: ${cleaned}`);
console.log();
});
demo.code("// Keyword matching:");
const keywords = ["layoff", "downsizing", "RIF", "hiring"];
sampleTexts.forEach((text, index) => {
const hasMatch = containsAnyKeyword(text, keywords);
const matchedKeywords = keywords.filter((keyword) =>
text.toLowerCase().includes(keyword.toLowerCase())
);
demo.info(
`Text ${index + 1}: ${hasMatch ? "✅" : "❌"} ${
matchedKeywords.join(", ") || "No matches"
}`
);
});
await waitForEnter();
}
async function demonstrateLocationValidation() {
demo.section("3. Location Validation Utilities");
demo.info(
"Location utilities provide geographic filtering and validation capabilities."
);
demo.code("// Location filter parsing:");
const filterStrings = [
"Ontario,Manitoba",
"Toronto,Vancouver",
"British Columbia,Alberta",
"Canada",
];
filterStrings.forEach((filterString) => {
const filters = parseLocationFilters(filterString);
demo.info(`Filter: "${filterString}"`);
demo.success(`Parsed: [${filters.join(", ")}]`);
console.log();
});
demo.code("// Location validation examples:");
const testLocations = [
{ location: "Toronto, Ontario, Canada", filters: ["Ontario"] },
{ location: "Vancouver, BC", filters: ["British Columbia"] },
{ location: "Calgary, Alberta", filters: ["Ontario"] },
{ location: "Montreal, Quebec", filters: ["Ontario", "Manitoba"] },
{ location: "New York, NY", filters: ["Ontario"] },
];
testLocations.forEach(({ location, filters }) => {
const isValid = validateLocationAgainstFilters(location, filters);
demo.info(`Location: "${location}"`);
demo.info(`Filters: [${filters.join(", ")}]`);
demo.success(`Valid: ${isValid ? "✅ Yes" : "❌ No"}`);
console.log();
});
demo.code("// Profile location extraction:");
const profileTexts = [
"Software Engineer at Tech Corp • Toronto, Ontario",
"Product Manager • Vancouver, BC",
"Data Scientist • Remote",
"CEO at Startup Inc • Montreal, Quebec, Canada",
];
profileTexts.forEach((profileText) => {
const location = extractLocationFromProfile(profileText);
demo.info(`Profile: "${profileText}"`);
demo.success(`Extracted: "${location || "No location found"}"`);
console.log();
});
await waitForEnter();
}
async function demonstrateAIAnalysis() {
demo.section("4. AI Analysis Utilities");
demo.info(
"AI utilities provide content analysis using OpenAI or local Ollama models."
);
// Mock posts for demo
const mockPosts = [
{
id: "1",
content:
"Just got laid off from my software engineering role. Looking for new opportunities in Toronto.",
author: "John Doe",
location: "Toronto, Ontario",
},
{
id: "2",
content:
"Our company is downsizing and I'm affected. This is really tough news.",
author: "Jane Smith",
location: "Vancouver, BC",
},
{
id: "3",
content:
"We're hiring! Looking for talented developers to join our team.",
author: "Bob Wilson",
location: "Calgary, Alberta",
},
];
demo.code("// Mock AI analysis (simulated):");
demo.info("In a real scenario, this would call Ollama or OpenAI API");
mockPosts.forEach((post, index) => {
demo.info(`Post ${index + 1}: ${post.content.substring(0, 50)}...`);
demo.success(
`Analysis: Relevant to job layoffs (confidence: 0.${85 + index * 5})`
);
console.log();
});
demo.code("// Batch analysis simulation:");
demo.info("Processing batch of 3 posts...");
await simulateProcessing();
demo.success("Batch analysis completed!");
await waitForEnter();
}
async function demonstrateIntegration() {
demo.section("5. Integration Example");
demo.info("Here's how all utilities work together in a real scenario:");
const samplePost = {
id: "demo-1",
content:
"Just got #laidoff from my job at TechCorp! Looking for new opportunities in #Toronto. This is really tough but I'm staying positive! 🚀",
author: "Demo User",
location: "Toronto, Ontario, Canada",
};
demo.code("// Processing pipeline:");
// 1. Log the start
logger.step("Processing new post");
// 2. Clean the text
const cleanedContent = cleanText(samplePost.content);
logger.info(`Cleaned content: ${cleanedContent}`);
// 3. Check for keywords
const keywords = ["layoff", "downsizing", "RIF"];
const hasKeywords = containsAnyKeyword(cleanedContent, keywords);
logger.search(`Keyword match: ${hasKeywords ? "Found" : "Not found"}`);
// 4. Validate location
const locationFilters = parseLocationFilters("Ontario,Manitoba");
const isValidLocation = validateLocationAgainstFilters(
samplePost.location,
locationFilters
);
logger.location(`Location valid: ${isValidLocation ? "Yes" : "No"}`);
// 5. Simulate AI analysis
if (hasKeywords && isValidLocation) {
logger.ai("Running AI analysis...");
await simulateProcessing();
logger.success("Post accepted and analyzed!");
} else {
logger.warning("Post rejected - doesn't meet criteria");
}
await waitForEnter();
}
// Helper functions
function waitForEnter() {
return new Promise((resolve) => {
const readline = require("readline");
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
});
rl.question("\nPress Enter to continue...", () => {
rl.close();
resolve();
});
});
}
async function simulateProcessing() {
return new Promise((resolve) => {
const dots = [".", "..", "..."];
let i = 0;
const interval = setInterval(() => {
process.stdout.write(`\rProcessing${dots[i]}`);
i = (i + 1) % dots.length;
}, 500);
setTimeout(() => {
clearInterval(interval);
process.stdout.write("\r");
resolve();
}, 2000);
});
}
// Run the demo if this file is executed directly
if (require.main === module) {
runDemo().catch((error) => {
demo.error(`Demo failed: ${error.message}`);
process.exit(1);
});
}
module.exports = { runDemo };

View File

@ -1,22 +1,22 @@
/**
* ai-analyzer - Core utilities for parsers
* Main entry point that exports all modules
*/
// Export all utilities with clean namespace
module.exports = {
// Logger utilities
...require("./src/logger"),
// AI analysis utilities
...require("./src/ai-utils"),
// Text processing utilities
...require("./src/text-utils"),
// Location validation utilities
...require("./src/location-utils"),
// Test utilities
...require("./src/test-utils"),
};
/**
* ai-analyzer - Core utilities for parsers
* Main entry point that exports all modules
*/
// Export all utilities with clean namespace
module.exports = {
// Logger utilities
...require("./src/logger"),
// AI analysis utilities
...require("./src/ai-utils"),
// Text processing utilities
...require("./src/text-utils"),
// Location validation utilities
...require("./src/location-utils"),
// Test utilities
...require("./src/test-utils"),
};

File diff suppressed because it is too large Load Diff

View File

@ -1,301 +1,301 @@
const { logger } = require("./logger");
/**
* AI Analysis utilities for post processing with Ollama
* Extracted from ai-analyzer-local.js for reuse across parsers
*/
/**
* Check if Ollama is running and the model is available
*/
async function checkOllamaStatus(
model = "mistral",
ollamaHost = "http://localhost:11434"
) {
try {
// Check if Ollama is running
const response = await fetch(`${ollamaHost}/api/tags`);
if (!response.ok) {
throw new Error(`Ollama not running on ${ollamaHost}`);
}
const data = await response.json();
const availableModels = data.models.map((m) => m.name);
logger.ai("Ollama is running");
logger.info(
`📦 Available models: ${availableModels
.map((m) => m.split(":")[0])
.join(", ")}`
);
// Check if requested model is available
const modelExists = availableModels.some((m) => m.startsWith(model));
if (!modelExists) {
logger.error(`Model "${model}" not found`);
logger.error(`💡 Install it with: ollama pull ${model}`);
logger.error(
`💡 Or choose from: ${availableModels
.map((m) => m.split(":")[0])
.join(", ")}`
);
return false;
}
logger.success(`Using model: ${model}`);
return true;
} catch (error) {
logger.error(`Error connecting to Ollama: ${error.message}`);
logger.error("💡 Make sure Ollama is installed and running:");
logger.error(" 1. Install: https://ollama.ai/");
logger.error(" 2. Start: ollama serve");
logger.error(` 3. Install model: ollama pull ${model}`);
return false;
}
}
/**
* Analyze multiple posts using local Ollama
*/
async function analyzeBatch(
posts,
context,
model = "mistral",
ollamaHost = "http://localhost:11434"
) {
logger.ai(`Analyzing batch of ${posts.length} posts with ${model}...`);
try {
const prompt = `You are an expert at analyzing LinkedIn posts for relevance to specific contexts.
CONTEXT TO MATCH: "${context}"
Analyze these ${
posts.length
} LinkedIn posts and determine if each relates to the context above.
POSTS:
${posts
.map(
(post, i) => `
POST ${i + 1}:
"${post.text.substring(0, 400)}${post.text.length > 400 ? "..." : ""}"
`
)
.join("")}
For each post, provide:
- Is it relevant to "${context}"? (YES/NO)
- Confidence level (0.0 to 1.0)
- Brief reasoning
Respond in this EXACT format for each post:
POST 1: YES/NO | 0.X | brief reason
POST 2: YES/NO | 0.X | brief reason
POST 3: YES/NO | 0.X | brief reason
Examples:
- For layoff context: "laid off 50 employees" = YES | 0.9 | mentions layoffs
- For hiring context: "we're hiring developers" = YES | 0.8 | job posting
- Unrelated content = NO | 0.1 | not relevant to context`;
const response = await fetch(`${ollamaHost}/api/generate`, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
model: model,
prompt: prompt,
stream: false,
options: {
temperature: 0.3,
top_p: 0.9,
},
}),
});
if (!response.ok) {
throw new Error(
`Ollama API error: ${response.status} ${response.statusText}`
);
}
const data = await response.json();
const aiResponse = data.response.trim();
// Parse the response
const analyses = [];
const lines = aiResponse.split("\n").filter((line) => line.trim());
for (let i = 0; i < posts.length; i++) {
let analysis = {
postIndex: i + 1,
isRelevant: false,
confidence: 0.5,
reasoning: "Could not parse AI response",
};
// Look for lines that match "POST X:" pattern
const postPattern = new RegExp(`POST\\s*${i + 1}:?\\s*(.+)`, "i");
for (const line of lines) {
const match = line.match(postPattern);
if (match) {
const content = match[1].trim();
// Parse: YES/NO | 0.X | reasoning
const parts = content.split("|").map((p) => p.trim());
if (parts.length >= 3) {
analysis.isRelevant = parts[0].toUpperCase().includes("YES");
analysis.confidence = Math.max(
0,
Math.min(1, parseFloat(parts[1]) || 0.5)
);
analysis.reasoning = parts[2] || "No reasoning provided";
} else {
// Fallback parsing
analysis.isRelevant =
content.toUpperCase().includes("YES") ||
content.toLowerCase().includes("relevant");
analysis.confidence = 0.6;
analysis.reasoning = content.substring(0, 100);
}
break;
}
}
analyses.push(analysis);
}
// If we didn't get enough analyses, fill in defaults
while (analyses.length < posts.length) {
analyses.push({
postIndex: analyses.length + 1,
isRelevant: false,
confidence: 0.3,
reasoning: "AI response parsing failed",
});
}
return analyses;
} catch (error) {
logger.error(`Error in batch AI analysis: ${error.message}`);
// Fallback: mark all as relevant with low confidence
return posts.map((_, i) => ({
postIndex: i + 1,
isRelevant: true,
confidence: 0.3,
reasoning: `Analysis failed: ${error.message}`,
}));
}
}
/**
* Analyze a single post using local Ollama (fallback)
*/
async function analyzeSinglePost(
text,
context,
model = "mistral",
ollamaHost = "http://localhost:11434"
) {
const prompt = `Analyze this LinkedIn post for relevance to: "${context}"
Post: "${text}"
Is this post relevant to "${context}"? Provide:
1. YES or NO
2. Confidence (0.0 to 1.0)
3. Brief reason
Format: YES/NO | 0.X | reason`;
try {
const response = await fetch(`${ollamaHost}/api/generate`, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
model: model,
prompt: prompt,
stream: false,
options: {
temperature: 0.3,
},
}),
});
if (!response.ok) {
throw new Error(`Ollama API error: ${response.status}`);
}
const data = await response.json();
const aiResponse = data.response.trim();
// Parse response
const parts = aiResponse.split("|").map((p) => p.trim());
if (parts.length >= 3) {
return {
isRelevant: parts[0].toUpperCase().includes("YES"),
confidence: Math.max(0, Math.min(1, parseFloat(parts[1]) || 0.5)),
reasoning: parts[2],
};
} else {
// Fallback parsing
return {
isRelevant:
aiResponse.toLowerCase().includes("yes") ||
aiResponse.toLowerCase().includes("relevant"),
confidence: 0.6,
reasoning: aiResponse.substring(0, 100),
};
}
} catch (error) {
return {
isRelevant: true, // Default to include on error
confidence: 0.3,
reasoning: `Analysis failed: ${error.message}`,
};
}
}
/**
* Find the most recent results file if none specified
*/
function findLatestResultsFile(resultsDir = "results") {
const fs = require("fs");
const path = require("path");
if (!fs.existsSync(resultsDir)) {
throw new Error("Results directory not found. Run the scraper first.");
}
const files = fs
.readdirSync(resultsDir)
.filter(
(f) =>
(f.startsWith("results-") || f.startsWith("linkedin-results-")) &&
f.endsWith(".json") &&
!f.includes("-ai-")
)
.sort()
.reverse();
if (files.length === 0) {
throw new Error("No results files found. Run the scraper first.");
}
return path.join(resultsDir, files[0]);
}
module.exports = {
checkOllamaStatus,
analyzeBatch,
analyzeSinglePost,
findLatestResultsFile,
};
const { logger } = require("./logger");
/**
* AI Analysis utilities for post processing with Ollama
* Extracted from ai-analyzer-local.js for reuse across parsers
*/
/**
* Check if Ollama is running and the model is available
*/
async function checkOllamaStatus(
model = "mistral",
ollamaHost = "http://localhost:11434"
) {
try {
// Check if Ollama is running
const response = await fetch(`${ollamaHost}/api/tags`);
if (!response.ok) {
throw new Error(`Ollama not running on ${ollamaHost}`);
}
const data = await response.json();
const availableModels = data.models.map((m) => m.name);
logger.ai("Ollama is running");
logger.info(
`📦 Available models: ${availableModels
.map((m) => m.split(":")[0])
.join(", ")}`
);
// Check if requested model is available
const modelExists = availableModels.some((m) => m.startsWith(model));
if (!modelExists) {
logger.error(`Model "${model}" not found`);
logger.error(`💡 Install it with: ollama pull ${model}`);
logger.error(
`💡 Or choose from: ${availableModels
.map((m) => m.split(":")[0])
.join(", ")}`
);
return false;
}
logger.success(`Using model: ${model}`);
return true;
} catch (error) {
logger.error(`Error connecting to Ollama: ${error.message}`);
logger.error("💡 Make sure Ollama is installed and running:");
logger.error(" 1. Install: https://ollama.ai/");
logger.error(" 2. Start: ollama serve");
logger.error(` 3. Install model: ollama pull ${model}`);
return false;
}
}
/**
* Analyze multiple posts using local Ollama
*/
async function analyzeBatch(
posts,
context,
model = "mistral",
ollamaHost = "http://localhost:11434"
) {
logger.ai(`Analyzing batch of ${posts.length} posts with ${model}...`);
try {
const prompt = `You are an expert at analyzing LinkedIn posts for relevance to specific contexts.
CONTEXT TO MATCH: "${context}"
Analyze these ${
posts.length
} LinkedIn posts and determine if each relates to the context above.
POSTS:
${posts
.map(
(post, i) => `
POST ${i + 1}:
"${post.text.substring(0, 400)}${post.text.length > 400 ? "..." : ""}"
`
)
.join("")}
For each post, provide:
- Is it relevant to "${context}"? (YES/NO)
- Confidence level (0.0 to 1.0)
- Brief reasoning
Respond in this EXACT format for each post:
POST 1: YES/NO | 0.X | brief reason
POST 2: YES/NO | 0.X | brief reason
POST 3: YES/NO | 0.X | brief reason
Examples:
- For layoff context: "laid off 50 employees" = YES | 0.9 | mentions layoffs
- For hiring context: "we're hiring developers" = YES | 0.8 | job posting
- Unrelated content = NO | 0.1 | not relevant to context`;
const response = await fetch(`${ollamaHost}/api/generate`, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
model: model,
prompt: prompt,
stream: false,
options: {
temperature: 0.3,
top_p: 0.9,
},
}),
});
if (!response.ok) {
throw new Error(
`Ollama API error: ${response.status} ${response.statusText}`
);
}
const data = await response.json();
const aiResponse = data.response.trim();
// Parse the response
const analyses = [];
const lines = aiResponse.split("\n").filter((line) => line.trim());
for (let i = 0; i < posts.length; i++) {
let analysis = {
postIndex: i + 1,
isRelevant: false,
confidence: 0.5,
reasoning: "Could not parse AI response",
};
// Look for lines that match "POST X:" pattern
const postPattern = new RegExp(`POST\\s*${i + 1}:?\\s*(.+)`, "i");
for (const line of lines) {
const match = line.match(postPattern);
if (match) {
const content = match[1].trim();
// Parse: YES/NO | 0.X | reasoning
const parts = content.split("|").map((p) => p.trim());
if (parts.length >= 3) {
analysis.isRelevant = parts[0].toUpperCase().includes("YES");
analysis.confidence = Math.max(
0,
Math.min(1, parseFloat(parts[1]) || 0.5)
);
analysis.reasoning = parts[2] || "No reasoning provided";
} else {
// Fallback parsing
analysis.isRelevant =
content.toUpperCase().includes("YES") ||
content.toLowerCase().includes("relevant");
analysis.confidence = 0.6;
analysis.reasoning = content.substring(0, 100);
}
break;
}
}
analyses.push(analysis);
}
// If we didn't get enough analyses, fill in defaults
while (analyses.length < posts.length) {
analyses.push({
postIndex: analyses.length + 1,
isRelevant: false,
confidence: 0.3,
reasoning: "AI response parsing failed",
});
}
return analyses;
} catch (error) {
logger.error(`Error in batch AI analysis: ${error.message}`);
// Fallback: mark all as relevant with low confidence
return posts.map((_, i) => ({
postIndex: i + 1,
isRelevant: true,
confidence: 0.3,
reasoning: `Analysis failed: ${error.message}`,
}));
}
}
/**
* Analyze a single post using local Ollama (fallback)
*/
async function analyzeSinglePost(
text,
context,
model = "mistral",
ollamaHost = "http://localhost:11434"
) {
const prompt = `Analyze this LinkedIn post for relevance to: "${context}"
Post: "${text}"
Is this post relevant to "${context}"? Provide:
1. YES or NO
2. Confidence (0.0 to 1.0)
3. Brief reason
Format: YES/NO | 0.X | reason`;
try {
const response = await fetch(`${ollamaHost}/api/generate`, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
model: model,
prompt: prompt,
stream: false,
options: {
temperature: 0.3,
},
}),
});
if (!response.ok) {
throw new Error(`Ollama API error: ${response.status}`);
}
const data = await response.json();
const aiResponse = data.response.trim();
// Parse response
const parts = aiResponse.split("|").map((p) => p.trim());
if (parts.length >= 3) {
return {
isRelevant: parts[0].toUpperCase().includes("YES"),
confidence: Math.max(0, Math.min(1, parseFloat(parts[1]) || 0.5)),
reasoning: parts[2],
};
} else {
// Fallback parsing
return {
isRelevant:
aiResponse.toLowerCase().includes("yes") ||
aiResponse.toLowerCase().includes("relevant"),
confidence: 0.6,
reasoning: aiResponse.substring(0, 100),
};
}
} catch (error) {
return {
isRelevant: true, // Default to include on error
confidence: 0.3,
reasoning: `Analysis failed: ${error.message}`,
};
}
}
/**
* Find the most recent results file if none specified
*/
function findLatestResultsFile(resultsDir = "results") {
const fs = require("fs");
const path = require("path");
if (!fs.existsSync(resultsDir)) {
throw new Error("Results directory not found. Run the scraper first.");
}
const files = fs
.readdirSync(resultsDir)
.filter(
(f) =>
(f.startsWith("results-") || f.startsWith("linkedin-results-")) &&
f.endsWith(".json") &&
!f.includes("-ai-")
)
.sort()
.reverse();
if (files.length === 0) {
throw new Error("No results files found. Run the scraper first.");
}
return path.join(resultsDir, files[0]);
}
module.exports = {
checkOllamaStatus,
analyzeBatch,
analyzeSinglePost,
findLatestResultsFile,
};

File diff suppressed because it is too large Load Diff

View File

@ -1,123 +1,123 @@
const chalk = require("chalk");
/**
* Configurable logger with color support and level controls
* Can enable/disable different log levels: debug, info, warning, error, success
*/
class Logger {
constructor(options = {}) {
this.levels = {
debug: options.debug !== false,
info: options.info !== false,
warning: options.warning !== false,
error: options.error !== false,
success: options.success !== false,
};
this.colors = options.colors !== false;
}
_formatMessage(level, message, prefix = "") {
const timestamp = new Date().toLocaleTimeString();
const fullMessage = `${prefix}${message}`;
if (!this.colors) {
return `[${timestamp}] [${level.toUpperCase()}] ${fullMessage}`;
}
switch (level) {
case "debug":
return chalk.gray(`[${timestamp}] [DEBUG] ${fullMessage}`);
case "info":
return chalk.blue(`[${timestamp}] [INFO] ${fullMessage}`);
case "warning":
return chalk.yellow(`[${timestamp}] [WARNING] ${fullMessage}`);
case "error":
return chalk.red(`[${timestamp}] [ERROR] ${fullMessage}`);
case "success":
return chalk.green(`[${timestamp}] [SUCCESS] ${fullMessage}`);
default:
return `[${timestamp}] [${level.toUpperCase()}] ${fullMessage}`;
}
}
debug(message) {
if (this.levels.debug) {
console.log(this._formatMessage("debug", message));
}
}
info(message) {
if (this.levels.info) {
console.log(this._formatMessage("info", message));
}
}
warning(message) {
if (this.levels.warning) {
console.warn(this._formatMessage("warning", message));
}
}
error(message) {
if (this.levels.error) {
console.error(this._formatMessage("error", message));
}
}
success(message) {
if (this.levels.success) {
console.log(this._formatMessage("success", message));
}
}
// Convenience methods with emoji prefixes for better UX
step(message) {
this.info(`🚀 ${message}`);
}
search(message) {
this.info(`🔍 ${message}`);
}
ai(message) {
this.info(`🧠 ${message}`);
}
location(message) {
this.info(`📍 ${message}`);
}
file(message) {
this.info(`📄 ${message}`);
}
// Configure logger levels at runtime
setLevel(level, enabled) {
if (this.levels.hasOwnProperty(level)) {
this.levels[level] = enabled;
}
}
// Disable all logging
silent() {
Object.keys(this.levels).forEach((level) => {
this.levels[level] = false;
});
}
// Enable all logging
verbose() {
Object.keys(this.levels).forEach((level) => {
this.levels[level] = true;
});
}
}
// Create default logger instance
const logger = new Logger();
// Export both the class and default instance
module.exports = {
Logger,
logger,
};
const chalk = require("chalk");
/**
* Configurable logger with color support and level controls
* Can enable/disable different log levels: debug, info, warning, error, success
*/
class Logger {
constructor(options = {}) {
this.levels = {
debug: options.debug !== false,
info: options.info !== false,
warning: options.warning !== false,
error: options.error !== false,
success: options.success !== false,
};
this.colors = options.colors !== false;
}
_formatMessage(level, message, prefix = "") {
const timestamp = new Date().toLocaleTimeString();
const fullMessage = `${prefix}${message}`;
if (!this.colors) {
return `[${timestamp}] [${level.toUpperCase()}] ${fullMessage}`;
}
switch (level) {
case "debug":
return chalk.gray(`[${timestamp}] [DEBUG] ${fullMessage}`);
case "info":
return chalk.blue(`[${timestamp}] [INFO] ${fullMessage}`);
case "warning":
return chalk.yellow(`[${timestamp}] [WARNING] ${fullMessage}`);
case "error":
return chalk.red(`[${timestamp}] [ERROR] ${fullMessage}`);
case "success":
return chalk.green(`[${timestamp}] [SUCCESS] ${fullMessage}`);
default:
return `[${timestamp}] [${level.toUpperCase()}] ${fullMessage}`;
}
}
debug(message) {
if (this.levels.debug) {
console.log(this._formatMessage("debug", message));
}
}
info(message) {
if (this.levels.info) {
console.log(this._formatMessage("info", message));
}
}
warning(message) {
if (this.levels.warning) {
console.warn(this._formatMessage("warning", message));
}
}
error(message) {
if (this.levels.error) {
console.error(this._formatMessage("error", message));
}
}
success(message) {
if (this.levels.success) {
console.log(this._formatMessage("success", message));
}
}
// Convenience methods with emoji prefixes for better UX
step(message) {
this.info(`🚀 ${message}`);
}
search(message) {
this.info(`🔍 ${message}`);
}
ai(message) {
this.info(`🧠 ${message}`);
}
location(message) {
this.info(`📍 ${message}`);
}
file(message) {
this.info(`📄 ${message}`);
}
// Configure logger levels at runtime
setLevel(level, enabled) {
if (this.levels.hasOwnProperty(level)) {
this.levels[level] = enabled;
}
}
// Disable all logging
silent() {
Object.keys(this.levels).forEach((level) => {
this.levels[level] = false;
});
}
// Enable all logging
verbose() {
Object.keys(this.levels).forEach((level) => {
this.levels[level] = true;
});
}
}
// Create default logger instance
const logger = new Logger();
// Export both the class and default instance
module.exports = {
Logger,
logger,
};

View File

@ -1,124 +1,124 @@
/**
* Shared test utilities for parsers
* Common mocks, helpers, and test data
*/
/**
* Mock Playwright page object for testing
*/
function createMockPage() {
return {
goto: jest.fn().mockResolvedValue(undefined),
waitForSelector: jest.fn().mockResolvedValue(undefined),
$$: jest.fn().mockResolvedValue([]),
$: jest.fn().mockResolvedValue(null),
textContent: jest.fn().mockResolvedValue(""),
close: jest.fn().mockResolvedValue(undefined),
};
}
/**
* Mock fetch for AI API calls
*/
function createMockFetch(response = {}) {
return jest.fn().mockResolvedValue({
ok: true,
status: 200,
json: jest.fn().mockResolvedValue(response),
...response,
});
}
/**
* Sample test data for posts
*/
const samplePosts = [
{
text: "We are laying off 100 employees due to economic downturn.",
keyword: "layoff",
profileLink: "https://linkedin.com/in/test-user-1",
},
{
text: "Exciting opportunity! We are hiring senior developers for our team.",
keyword: "hiring",
profileLink: "https://linkedin.com/in/test-user-2",
},
];
/**
* Sample location test data
*/
const sampleLocations = [
"Toronto, Ontario, Canada",
"Vancouver, BC",
"Calgary, Alberta",
"Montreal, Quebec",
"Halifax, Nova Scotia",
];
/**
* Common test assertions
*/
function expectValidPost(post) {
expect(post).toHaveProperty("text");
expect(post).toHaveProperty("keyword");
expect(post).toHaveProperty("profileLink");
expect(typeof post.text).toBe("string");
expect(post.text.length).toBeGreaterThan(0);
}
function expectValidAIAnalysis(analysis) {
expect(analysis).toHaveProperty("isRelevant");
expect(analysis).toHaveProperty("confidence");
expect(analysis).toHaveProperty("reasoning");
expect(typeof analysis.isRelevant).toBe("boolean");
expect(analysis.confidence).toBeGreaterThanOrEqual(0);
expect(analysis.confidence).toBeLessThanOrEqual(1);
}
function expectValidLocation(location) {
expect(typeof location).toBe("string");
expect(location.length).toBeGreaterThan(0);
}
/**
* Test environment setup
*/
function setupTestEnv() {
// Mock environment variables
process.env.NODE_ENV = "test";
process.env.OLLAMA_HOST = "http://localhost:11434";
process.env.AI_CONTEXT = "test context";
// Suppress console output during tests
jest.spyOn(console, "log").mockImplementation(() => {});
jest.spyOn(console, "error").mockImplementation(() => {});
jest.spyOn(console, "warn").mockImplementation(() => {});
}
/**
* Clean up test environment
*/
function teardownTestEnv() {
// Restore console
console.log.mockRestore();
console.error.mockRestore();
console.warn.mockRestore();
// Clear environment
delete process.env.NODE_ENV;
delete process.env.OLLAMA_HOST;
delete process.env.AI_CONTEXT;
}
module.exports = {
createMockPage,
createMockFetch,
samplePosts,
sampleLocations,
expectValidPost,
expectValidAIAnalysis,
expectValidLocation,
setupTestEnv,
teardownTestEnv,
};
/**
* Shared test utilities for parsers
* Common mocks, helpers, and test data
*/
/**
* Mock Playwright page object for testing
*/
function createMockPage() {
return {
goto: jest.fn().mockResolvedValue(undefined),
waitForSelector: jest.fn().mockResolvedValue(undefined),
$$: jest.fn().mockResolvedValue([]),
$: jest.fn().mockResolvedValue(null),
textContent: jest.fn().mockResolvedValue(""),
close: jest.fn().mockResolvedValue(undefined),
};
}
/**
* Mock fetch for AI API calls
*/
function createMockFetch(response = {}) {
return jest.fn().mockResolvedValue({
ok: true,
status: 200,
json: jest.fn().mockResolvedValue(response),
...response,
});
}
/**
* Sample test data for posts
*/
const samplePosts = [
{
text: "We are laying off 100 employees due to economic downturn.",
keyword: "layoff",
profileLink: "https://linkedin.com/in/test-user-1",
},
{
text: "Exciting opportunity! We are hiring senior developers for our team.",
keyword: "hiring",
profileLink: "https://linkedin.com/in/test-user-2",
},
];
/**
* Sample location test data
*/
const sampleLocations = [
"Toronto, Ontario, Canada",
"Vancouver, BC",
"Calgary, Alberta",
"Montreal, Quebec",
"Halifax, Nova Scotia",
];
/**
* Common test assertions
*/
function expectValidPost(post) {
expect(post).toHaveProperty("text");
expect(post).toHaveProperty("keyword");
expect(post).toHaveProperty("profileLink");
expect(typeof post.text).toBe("string");
expect(post.text.length).toBeGreaterThan(0);
}
function expectValidAIAnalysis(analysis) {
expect(analysis).toHaveProperty("isRelevant");
expect(analysis).toHaveProperty("confidence");
expect(analysis).toHaveProperty("reasoning");
expect(typeof analysis.isRelevant).toBe("boolean");
expect(analysis.confidence).toBeGreaterThanOrEqual(0);
expect(analysis.confidence).toBeLessThanOrEqual(1);
}
function expectValidLocation(location) {
expect(typeof location).toBe("string");
expect(location.length).toBeGreaterThan(0);
}
/**
* Test environment setup
*/
function setupTestEnv() {
// Mock environment variables
process.env.NODE_ENV = "test";
process.env.OLLAMA_HOST = "http://localhost:11434";
process.env.AI_CONTEXT = "test context";
// Suppress console output during tests
jest.spyOn(console, "log").mockImplementation(() => {});
jest.spyOn(console, "error").mockImplementation(() => {});
jest.spyOn(console, "warn").mockImplementation(() => {});
}
/**
* Clean up test environment
*/
function teardownTestEnv() {
// Restore console
console.log.mockRestore();
console.error.mockRestore();
console.warn.mockRestore();
// Clear environment
delete process.env.NODE_ENV;
delete process.env.OLLAMA_HOST;
delete process.env.AI_CONTEXT;
}
module.exports = {
createMockPage,
createMockFetch,
samplePosts,
sampleLocations,
expectValidPost,
expectValidAIAnalysis,
expectValidLocation,
setupTestEnv,
teardownTestEnv,
};

View File

@ -1,107 +1,107 @@
/**
* Text processing utilities for cleaning and validating content
* Extracted from linkedout.js for reuse across parsers
*/
/**
* Clean text by removing hashtags, URLs, emojis, and normalizing whitespace
*/
function cleanText(text) {
if (!text || typeof text !== "string") {
return "";
}
// Remove hashtags
text = text.replace(/#\w+/g, "");
// Remove hashtag mentions
text = text.replace(/\bhashtag\b/gi, "");
text = text.replace(/hashtag-\w+/gi, "");
// Remove URLs
text = text.replace(/https?:\/\/[^\s]+/g, "");
// Remove emojis (Unicode ranges for common emoji)
text = text.replace(
/[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F1E0}-\u{1F1FF}]/gu,
""
);
// Normalize whitespace
text = text.replace(/\s+/g, " ").trim();
return text;
}
/**
* Check if text contains any of the specified keywords (case insensitive)
*/
function containsAnyKeyword(text, keywords) {
if (!text || !Array.isArray(keywords)) {
return false;
}
const lowerText = text.toLowerCase();
return keywords.some((keyword) => lowerText.includes(keyword.toLowerCase()));
}
/**
* Validate if text meets basic quality criteria
*/
function isValidText(text, minLength = 30) {
if (!text || typeof text !== "string") {
return false;
}
// Check minimum length
if (text.length < minLength) {
return false;
}
// Check if text contains alphanumeric characters
if (!/[a-zA-Z0-9]/.test(text)) {
return false;
}
return true;
}
/**
* Extract domain from URL
*/
function extractDomain(url) {
if (!url || typeof url !== "string") {
return null;
}
try {
const urlObj = new URL(url);
return urlObj.hostname;
} catch (error) {
return null;
}
}
/**
* Normalize URL by removing query parameters and fragments
*/
function normalizeUrl(url) {
if (!url || typeof url !== "string") {
return "";
}
try {
const urlObj = new URL(url);
return `${urlObj.protocol}//${urlObj.hostname}${urlObj.pathname}`;
} catch (error) {
return url;
}
}
module.exports = {
cleanText,
containsAnyKeyword,
isValidText,
extractDomain,
normalizeUrl,
};
/**
* Text processing utilities for cleaning and validating content
* Extracted from linkedout.js for reuse across parsers
*/
/**
* Clean text by removing hashtags, URLs, emojis, and normalizing whitespace
*/
function cleanText(text) {
if (!text || typeof text !== "string") {
return "";
}
// Remove hashtags
text = text.replace(/#\w+/g, "");
// Remove hashtag mentions
text = text.replace(/\bhashtag\b/gi, "");
text = text.replace(/hashtag-\w+/gi, "");
// Remove URLs
text = text.replace(/https?:\/\/[^\s]+/g, "");
// Remove emojis (Unicode ranges for common emoji)
text = text.replace(
/[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F1E0}-\u{1F1FF}]/gu,
""
);
// Normalize whitespace
text = text.replace(/\s+/g, " ").trim();
return text;
}
/**
* Check if text contains any of the specified keywords (case insensitive)
*/
function containsAnyKeyword(text, keywords) {
if (!text || !Array.isArray(keywords)) {
return false;
}
const lowerText = text.toLowerCase();
return keywords.some((keyword) => lowerText.includes(keyword.toLowerCase()));
}
/**
* Validate if text meets basic quality criteria
*/
function isValidText(text, minLength = 30) {
if (!text || typeof text !== "string") {
return false;
}
// Check minimum length
if (text.length < minLength) {
return false;
}
// Check if text contains alphanumeric characters
if (!/[a-zA-Z0-9]/.test(text)) {
return false;
}
return true;
}
/**
* Extract domain from URL
*/
function extractDomain(url) {
if (!url || typeof url !== "string") {
return null;
}
try {
const urlObj = new URL(url);
return urlObj.hostname;
} catch (error) {
return null;
}
}
/**
* Normalize URL by removing query parameters and fragments
*/
function normalizeUrl(url) {
if (!url || typeof url !== "string") {
return "";
}
try {
const urlObj = new URL(url);
return `${urlObj.protocol}//${urlObj.hostname}${urlObj.pathname}`;
} catch (error) {
return url;
}
}
module.exports = {
cleanText,
containsAnyKeyword,
isValidText,
extractDomain,
normalizeUrl,
};

View File

@ -1,194 +1,194 @@
/**
* Test file for logger functionality
*/
const { Logger, logger } = require("../src/logger");
describe("Logger", () => {
let consoleSpy;
let consoleWarnSpy;
let consoleErrorSpy;
beforeEach(() => {
consoleSpy = jest.spyOn(console, "log").mockImplementation();
consoleWarnSpy = jest.spyOn(console, "warn").mockImplementation();
consoleErrorSpy = jest.spyOn(console, "error").mockImplementation();
});
afterEach(() => {
consoleSpy.mockRestore();
consoleWarnSpy.mockRestore();
consoleErrorSpy.mockRestore();
});
test("should create default logger instance", () => {
expect(logger).toBeDefined();
expect(logger).toBeInstanceOf(Logger);
});
test("should log info messages", () => {
logger.info("Test message");
expect(consoleSpy).toHaveBeenCalled();
});
test("should create custom logger with disabled levels", () => {
const customLogger = new Logger({ debug: false });
customLogger.debug("This should not log");
expect(consoleSpy).not.toHaveBeenCalled();
});
test("should use emoji prefixes for convenience methods", () => {
logger.step("Test step");
logger.ai("Test AI");
logger.location("Test location");
expect(consoleSpy).toHaveBeenCalledTimes(3);
});
test("should configure levels at runtime", () => {
const customLogger = new Logger();
customLogger.setLevel("debug", false);
customLogger.debug("This should not log");
expect(consoleSpy).not.toHaveBeenCalled();
});
test("should go silent when requested", () => {
const customLogger = new Logger();
customLogger.silent();
customLogger.info("This should not log");
customLogger.error("This should not log");
expect(consoleSpy).not.toHaveBeenCalled();
expect(consoleErrorSpy).not.toHaveBeenCalled();
});
// Additional test cases for comprehensive coverage
test("should log warning messages", () => {
logger.warning("Test warning");
expect(consoleWarnSpy).toHaveBeenCalled();
});
test("should log error messages", () => {
logger.error("Test error");
expect(consoleErrorSpy).toHaveBeenCalled();
});
test("should log success messages", () => {
logger.success("Test success");
expect(consoleSpy).toHaveBeenCalled();
});
test("should log debug messages", () => {
logger.debug("Test debug");
expect(consoleSpy).toHaveBeenCalled();
});
test("should respect disabled warning level", () => {
const customLogger = new Logger({ warning: false });
customLogger.warning("This should not log");
expect(consoleWarnSpy).not.toHaveBeenCalled();
});
test("should respect disabled error level", () => {
const customLogger = new Logger({ error: false });
customLogger.error("This should not log");
expect(consoleErrorSpy).not.toHaveBeenCalled();
});
test("should respect disabled success level", () => {
const customLogger = new Logger({ success: false });
customLogger.success("This should not log");
expect(consoleSpy).not.toHaveBeenCalled();
});
test("should respect disabled info level", () => {
const customLogger = new Logger({ info: false });
customLogger.info("This should not log");
expect(consoleSpy).not.toHaveBeenCalled();
});
test("should test all convenience methods", () => {
logger.step("Test step");
logger.search("Test search");
logger.ai("Test AI");
logger.location("Test location");
logger.file("Test file");
expect(consoleSpy).toHaveBeenCalledTimes(5);
});
test("should enable all levels with verbose method", () => {
const customLogger = new Logger({ debug: false, info: false });
customLogger.verbose();
customLogger.debug("This should log");
customLogger.info("This should log");
expect(consoleSpy).toHaveBeenCalledTimes(2);
});
test("should handle setLevel with invalid level gracefully", () => {
const customLogger = new Logger();
expect(() => {
customLogger.setLevel("invalid", false);
}).not.toThrow();
});
test("should format messages with timestamps", () => {
logger.info("Test message");
const loggedMessage = consoleSpy.mock.calls[0][0];
expect(loggedMessage).toMatch(/\[\d{1,2}:\d{2}:\d{2}\]/);
});
test("should include level in formatted messages", () => {
logger.info("Test message");
const loggedMessage = consoleSpy.mock.calls[0][0];
expect(loggedMessage).toContain("[INFO]");
});
test("should disable colors when colors option is false", () => {
const customLogger = new Logger({ colors: false });
customLogger.info("Test message");
const loggedMessage = consoleSpy.mock.calls[0][0];
// Should not contain ANSI color codes
expect(loggedMessage).not.toMatch(/\u001b\[/);
});
test("should enable colors by default", () => {
logger.info("Test message");
const loggedMessage = consoleSpy.mock.calls[0][0];
// Should contain ANSI color codes
expect(loggedMessage).toMatch(/\u001b\[/);
});
test("should handle multiple level configurations", () => {
const customLogger = new Logger({
debug: false,
info: true,
warning: false,
error: true,
success: false,
});
customLogger.debug("Should not log");
customLogger.info("Should log");
customLogger.warning("Should not log");
customLogger.error("Should log");
customLogger.success("Should not log");
expect(consoleSpy).toHaveBeenCalledTimes(1);
expect(consoleErrorSpy).toHaveBeenCalledTimes(1);
expect(consoleWarnSpy).not.toHaveBeenCalled();
});
test("should handle empty or undefined messages", () => {
expect(() => {
logger.info("");
logger.info(undefined);
logger.info(null);
}).not.toThrow();
});
test("should handle complex message objects", () => {
const testObj = { key: "value", nested: { data: "test" } };
expect(() => {
logger.info(testObj);
}).not.toThrow();
});
});
/**
* Test file for logger functionality
*/
const { Logger, logger } = require("../src/logger");
describe("Logger", () => {
let consoleSpy;
let consoleWarnSpy;
let consoleErrorSpy;
beforeEach(() => {
consoleSpy = jest.spyOn(console, "log").mockImplementation();
consoleWarnSpy = jest.spyOn(console, "warn").mockImplementation();
consoleErrorSpy = jest.spyOn(console, "error").mockImplementation();
});
afterEach(() => {
consoleSpy.mockRestore();
consoleWarnSpy.mockRestore();
consoleErrorSpy.mockRestore();
});
test("should create default logger instance", () => {
expect(logger).toBeDefined();
expect(logger).toBeInstanceOf(Logger);
});
test("should log info messages", () => {
logger.info("Test message");
expect(consoleSpy).toHaveBeenCalled();
});
test("should create custom logger with disabled levels", () => {
const customLogger = new Logger({ debug: false });
customLogger.debug("This should not log");
expect(consoleSpy).not.toHaveBeenCalled();
});
test("should use emoji prefixes for convenience methods", () => {
logger.step("Test step");
logger.ai("Test AI");
logger.location("Test location");
expect(consoleSpy).toHaveBeenCalledTimes(3);
});
test("should configure levels at runtime", () => {
const customLogger = new Logger();
customLogger.setLevel("debug", false);
customLogger.debug("This should not log");
expect(consoleSpy).not.toHaveBeenCalled();
});
test("should go silent when requested", () => {
const customLogger = new Logger();
customLogger.silent();
customLogger.info("This should not log");
customLogger.error("This should not log");
expect(consoleSpy).not.toHaveBeenCalled();
expect(consoleErrorSpy).not.toHaveBeenCalled();
});
// Additional test cases for comprehensive coverage
test("should log warning messages", () => {
logger.warning("Test warning");
expect(consoleWarnSpy).toHaveBeenCalled();
});
test("should log error messages", () => {
logger.error("Test error");
expect(consoleErrorSpy).toHaveBeenCalled();
});
test("should log success messages", () => {
logger.success("Test success");
expect(consoleSpy).toHaveBeenCalled();
});
test("should log debug messages", () => {
logger.debug("Test debug");
expect(consoleSpy).toHaveBeenCalled();
});
test("should respect disabled warning level", () => {
const customLogger = new Logger({ warning: false });
customLogger.warning("This should not log");
expect(consoleWarnSpy).not.toHaveBeenCalled();
});
test("should respect disabled error level", () => {
const customLogger = new Logger({ error: false });
customLogger.error("This should not log");
expect(consoleErrorSpy).not.toHaveBeenCalled();
});
test("should respect disabled success level", () => {
const customLogger = new Logger({ success: false });
customLogger.success("This should not log");
expect(consoleSpy).not.toHaveBeenCalled();
});
test("should respect disabled info level", () => {
const customLogger = new Logger({ info: false });
customLogger.info("This should not log");
expect(consoleSpy).not.toHaveBeenCalled();
});
test("should test all convenience methods", () => {
logger.step("Test step");
logger.search("Test search");
logger.ai("Test AI");
logger.location("Test location");
logger.file("Test file");
expect(consoleSpy).toHaveBeenCalledTimes(5);
});
test("should enable all levels with verbose method", () => {
const customLogger = new Logger({ debug: false, info: false });
customLogger.verbose();
customLogger.debug("This should log");
customLogger.info("This should log");
expect(consoleSpy).toHaveBeenCalledTimes(2);
});
test("should handle setLevel with invalid level gracefully", () => {
const customLogger = new Logger();
expect(() => {
customLogger.setLevel("invalid", false);
}).not.toThrow();
});
test("should format messages with timestamps", () => {
logger.info("Test message");
const loggedMessage = consoleSpy.mock.calls[0][0];
expect(loggedMessage).toMatch(/\[\d{1,2}:\d{2}:\d{2}\]/);
});
test("should include level in formatted messages", () => {
logger.info("Test message");
const loggedMessage = consoleSpy.mock.calls[0][0];
expect(loggedMessage).toContain("[INFO]");
});
test("should disable colors when colors option is false", () => {
const customLogger = new Logger({ colors: false });
customLogger.info("Test message");
const loggedMessage = consoleSpy.mock.calls[0][0];
// Should not contain ANSI color codes
expect(loggedMessage).not.toMatch(/\u001b\[/);
});
test("should enable colors by default", () => {
logger.info("Test message");
const loggedMessage = consoleSpy.mock.calls[0][0];
// Should contain ANSI color codes
expect(loggedMessage).toMatch(/\u001b\[/);
});
test("should handle multiple level configurations", () => {
const customLogger = new Logger({
debug: false,
info: true,
warning: false,
error: true,
success: false,
});
customLogger.debug("Should not log");
customLogger.info("Should log");
customLogger.warning("Should not log");
customLogger.error("Should log");
customLogger.success("Should not log");
expect(consoleSpy).toHaveBeenCalledTimes(1);
expect(consoleErrorSpy).toHaveBeenCalledTimes(1);
expect(consoleWarnSpy).not.toHaveBeenCalled();
});
test("should handle empty or undefined messages", () => {
expect(() => {
logger.info("");
logger.info(undefined);
logger.info(null);
}).not.toThrow();
});
test("should handle complex message objects", () => {
const testObj = { key: "value", nested: { data: "test" } };
expect(() => {
logger.info(testObj);
}).not.toThrow();
});
});

View File

@ -1,94 +1,94 @@
/**
* Authentication Manager
*
* Handles login/authentication for different sites
*/
class AuthManager {
constructor(coreParser) {
this.coreParser = coreParser;
}
/**
* Authenticate to a specific site
*/
async authenticate(site, credentials, pageId = "default") {
const strategies = {
linkedin: this.authenticateLinkedIn.bind(this),
// Add more auth strategies as needed
};
const strategy = strategies[site.toLowerCase()];
if (!strategy) {
throw new Error(`No authentication strategy found for site: ${site}`);
}
return await strategy(credentials, pageId);
}
/**
* LinkedIn authentication strategy
*/
async authenticateLinkedIn(credentials, pageId = "default") {
const { username, password } = credentials;
if (!username || !password) {
throw new Error("LinkedIn authentication requires username and password");
}
const page = this.coreParser.getPage(pageId);
if (!page) {
throw new Error(`Page with ID '${pageId}' not found`);
}
try {
// Navigate to LinkedIn login
await this.coreParser.navigateTo("https://www.linkedin.com/login", {
pageId,
});
// Fill credentials
await page.fill('input[name="session_key"]', username);
await page.fill('input[name="session_password"]', password);
// Submit form
await page.click('button[type="submit"]');
// Wait for successful login (profile image appears)
await page.waitForSelector("img.global-nav__me-photo", {
timeout: 15000,
});
return true;
} catch (error) {
throw new Error(`LinkedIn authentication failed: ${error.message}`);
}
}
/**
* Check if currently authenticated to a site
*/
async isAuthenticated(site, pageId = "default") {
const page = this.coreParser.getPage(pageId);
if (!page) {
return false;
}
const checkers = {
linkedin: async () => {
try {
await page.waitForSelector("img.global-nav__me-photo", {
timeout: 2000,
});
return true;
} catch {
return false;
}
},
};
const checker = checkers[site.toLowerCase()];
return checker ? await checker() : false;
}
}
module.exports = AuthManager;
/**
* Authentication Manager
*
* Handles login/authentication for different sites
*/
class AuthManager {
constructor(coreParser) {
this.coreParser = coreParser;
}
/**
* Authenticate to a specific site
*/
async authenticate(site, credentials, pageId = "default") {
const strategies = {
linkedin: this.authenticateLinkedIn.bind(this),
// Add more auth strategies as needed
};
const strategy = strategies[site.toLowerCase()];
if (!strategy) {
throw new Error(`No authentication strategy found for site: ${site}`);
}
return await strategy(credentials, pageId);
}
/**
* LinkedIn authentication strategy
*/
async authenticateLinkedIn(credentials, pageId = "default") {
const { username, password } = credentials;
if (!username || !password) {
throw new Error("LinkedIn authentication requires username and password");
}
const page = this.coreParser.getPage(pageId);
if (!page) {
throw new Error(`Page with ID '${pageId}' not found`);
}
try {
// Navigate to LinkedIn login
await this.coreParser.navigateTo("https://www.linkedin.com/login", {
pageId,
});
// Fill credentials
await page.fill('input[name="session_key"]', username);
await page.fill('input[name="session_password"]', password);
// Submit form
await page.click('button[type="submit"]');
// Wait for successful login (profile image appears)
await page.waitForSelector("img.global-nav__me-photo", {
timeout: 15000,
});
return true;
} catch (error) {
throw new Error(`LinkedIn authentication failed: ${error.message}`);
}
}
/**
* Check if currently authenticated to a site
*/
async isAuthenticated(site, pageId = "default") {
const page = this.coreParser.getPage(pageId);
if (!page) {
return false;
}
const checkers = {
linkedin: async () => {
try {
await page.waitForSelector("img.global-nav__me-photo", {
timeout: 2000,
});
return true;
} catch {
return false;
}
},
};
const checker = checkers[site.toLowerCase()];
return checker ? await checker() : false;
}
}
module.exports = AuthManager;

63
core-parser/index.js Normal file
View File

@ -0,0 +1,63 @@
const playwright = require('playwright');
const AuthManager = require('./auth-manager');
const NavigationManager = require('./navigation');
class CoreParser {
constructor(config = {}) {
this.config = {
headless: true,
timeout: 60000, // Increased default timeout
...config
};
this.browser = null;
this.context = null;
this.pages = {};
this.authManager = new AuthManager(this);
this.navigationManager = new NavigationManager(this);
}
async init() {
this.browser = await playwright.chromium.launch({
headless: this.config.headless
});
this.context = await this.browser.newContext();
}
async createPage(id) {
if (!this.browser) await this.init();
const page = await this.context.newPage();
this.pages[id] = page;
return page;
}
getPage(id) {
return this.pages[id];
}
async authenticate(site, credentials, pageId) {
return this.authManager.authenticate(site, credentials, pageId);
}
async navigateTo(url, options = {}) {
const {
pageId = "default",
waitUntil = "networkidle", // Changed default to networkidle
retries = 1,
retryDelay = 2000,
timeout = this.config.timeout,
} = options;
return this.navigationManager.navigateTo(url, options);
}
async cleanup() {
if (this.browser) {
await this.browser.close();
this.browser = null;
this.context = null;
this.pages = {};
}
}
}
module.exports = CoreParser;

View File

@ -1,131 +1,131 @@
/**
* Navigation Manager
*
* Handles page navigation with error handling, retries, and logging
*/
class NavigationManager {
constructor(coreParser) {
this.coreParser = coreParser;
}
/**
* Navigate to URL with comprehensive error handling
*/
async navigateTo(url, options = {}) {
const {
pageId = "default",
waitUntil = "domcontentloaded",
retries = 1,
retryDelay = 2000,
timeout = this.coreParser.config.timeout,
} = options;
const page = this.coreParser.getPage(pageId);
if (!page) {
throw new Error(`Page with ID '${pageId}' not found`);
}
let lastError;
for (let attempt = 0; attempt <= retries; attempt++) {
try {
console.log(
`🌐 Navigating to: ${url} (attempt ${attempt + 1}/${retries + 1})`
);
await page.goto(url, {
waitUntil,
timeout,
});
console.log(`✅ Navigation successful: ${url}`);
return true;
} catch (error) {
lastError = error;
console.warn(
`⚠️ Navigation attempt ${attempt + 1} failed: ${error.message}`
);
if (attempt < retries) {
console.log(`🔄 Retrying in ${retryDelay}ms...`);
await this.delay(retryDelay);
}
}
}
// All attempts failed
const errorMessage = `Navigation failed after ${retries + 1} attempts: ${
lastError.message
}`;
console.error(`${errorMessage}`);
throw new Error(errorMessage);
}
/**
* Navigate and wait for specific selector
*/
async navigateAndWaitFor(url, selector, options = {}) {
await this.navigateTo(url, options);
const { pageId = "default", timeout = this.coreParser.config.timeout } =
options;
const page = this.coreParser.getPage(pageId);
try {
await page.waitForSelector(selector, { timeout });
console.log(`✅ Selector found: ${selector}`);
return true;
} catch (error) {
console.warn(`⚠️ Selector not found: ${selector} - ${error.message}`);
return false;
}
}
/**
* Check if current page has specific content
*/
async hasContent(content, options = {}) {
const { pageId = "default", timeout = 5000 } = options;
const page = this.coreParser.getPage(pageId);
try {
await page.waitForFunction(
(text) => document.body.innerText.includes(text),
content,
{ timeout }
);
return true;
} catch {
return false;
}
}
/**
* Utility delay function
*/
async delay(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
/**
* Get current page URL
*/
getCurrentUrl(pageId = "default") {
const page = this.coreParser.getPage(pageId);
return page ? page.url() : null;
}
/**
* Take screenshot for debugging
*/
async screenshot(filepath, pageId = "default") {
const page = this.coreParser.getPage(pageId);
if (page) {
await page.screenshot({ path: filepath });
console.log(`📸 Screenshot saved: ${filepath}`);
}
}
}
module.exports = NavigationManager;
/**
* Navigation Manager
*
* Handles page navigation with error handling, retries, and logging
*/
class NavigationManager {
constructor(coreParser) {
this.coreParser = coreParser;
}
/**
* Navigate to URL with comprehensive error handling
*/
async navigateTo(url, options = {}) {
const {
pageId = "default",
waitUntil = "domcontentloaded",
retries = 1,
retryDelay = 2000,
timeout = this.coreParser.config.timeout,
} = options;
const page = this.coreParser.getPage(pageId);
if (!page) {
throw new Error(`Page with ID '${pageId}' not found`);
}
let lastError;
for (let attempt = 0; attempt <= retries; attempt++) {
try {
console.log(
`🌐 Navigating to: ${url} (attempt ${attempt + 1}/${retries + 1})`
);
await page.goto(url, {
waitUntil,
timeout,
});
console.log(`✅ Navigation successful: ${url}`);
return true;
} catch (error) {
lastError = error;
console.warn(
`⚠️ Navigation attempt ${attempt + 1} failed: ${error.message}`
);
if (attempt < retries) {
console.log(`🔄 Retrying in ${retryDelay}ms...`);
await this.delay(retryDelay);
}
}
}
// All attempts failed
const errorMessage = `Navigation failed after ${retries + 1} attempts: ${
lastError.message
}`;
console.error(`${errorMessage}`);
throw new Error(errorMessage);
}
/**
* Navigate and wait for specific selector
*/
async navigateAndWaitFor(url, selector, options = {}) {
await this.navigateTo(url, options);
const { pageId = "default", timeout = this.coreParser.config.timeout } =
options;
const page = this.coreParser.getPage(pageId);
try {
await page.waitForSelector(selector, { timeout });
console.log(`✅ Selector found: ${selector}`);
return true;
} catch (error) {
console.warn(`⚠️ Selector not found: ${selector} - ${error.message}`);
return false;
}
}
/**
* Check if current page has specific content
*/
async hasContent(content, options = {}) {
const { pageId = "default", timeout = 5000 } = options;
const page = this.coreParser.getPage(pageId);
try {
await page.waitForFunction(
(text) => document.body.innerText.includes(text),
content,
{ timeout }
);
return true;
} catch {
return false;
}
}
/**
* Utility delay function
*/
async delay(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
/**
* Get current page URL
*/
getCurrentUrl(pageId = "default") {
const page = this.coreParser.getPage(pageId);
return page ? page.url() : null;
}
/**
* Take screenshot for debugging
*/
async screenshot(filepath, pageId = "default") {
const page = this.coreParser.getPage(pageId);
if (page) {
await page.screenshot({ path: filepath });
console.log(`📸 Screenshot saved: ${filepath}`);
}
}
}
module.exports = NavigationManager;

View File

@ -1,27 +1,7 @@
{
"name": "core-parser",
"version": "1.0.0",
"description": "Core browser automation and parsing engine for all parsers",
"main": "index.js",
"scripts": {
"test": "jest",
"install:browsers": "npx playwright install chromium"
},
"keywords": [
"parser",
"playwright",
"browser",
"automation",
"core"
],
"author": "Job Market Intelligence Team",
"license": "ISC",
"type": "commonjs",
"dependencies": {
"playwright": "^1.53.2",
"dotenv": "^17.0.0"
},
"devDependencies": {
"jest": "^29.7.0"
}
}
{
"name": "core-parser",
"version": "1.0.0",
"main": "index.js",
"description": "Core parser utilities for browser management",
"dependencies": {}
}

View File

@ -1,497 +1,497 @@
# Job Search Parser - Job Market Intelligence
Specialized parser for job market intelligence, tracking job postings, market trends, and competitive analysis. Focuses on tech roles and industry insights.
## 🎯 Purpose
The Job Search Parser is designed to:
- **Track Job Market Trends**: Monitor demand for specific roles and skills
- **Competitive Intelligence**: Analyze salary ranges and requirements
- **Industry Insights**: Track hiring patterns across different sectors
- **Skill Gap Analysis**: Identify in-demand technologies and frameworks
- **Market Demand Forecasting**: Predict job market trends
## 🚀 Features
### Core Functionality
- **Multi-Source Aggregation**: Collect job data from multiple platforms
- **Role-Specific Tracking**: Focus on tech roles and emerging positions
- **Skill Analysis**: Extract and categorize required skills
- **Salary Intelligence**: Track compensation ranges and trends
- **Company Intelligence**: Monitor hiring companies and patterns
### Advanced Features
- **Market Trend Analysis**: Identify growing and declining job categories
- **Geographic Distribution**: Track job distribution by location
- **Experience Level Analysis**: Entry, mid, senior level tracking
- **Remote Work Trends**: Monitor remote/hybrid work patterns
- **Technology Stack Tracking**: Framework and tool popularity
## 🌐 Supported Job Sites
### ✅ Implemented Parsers
#### SkipTheDrive Parser
Remote job board specializing in work-from-home positions.
**Features:**
- Keyword-based job search with relevance sorting
- Job type filtering (full-time, part-time, contract)
- Multi-page result parsing with pagination
- Featured/sponsored job identification
- AI-powered job relevance analysis
- Automatic duplicate detection
**Usage:**
```bash
# Parse SkipTheDrive for QA automation jobs
node index.js --sites=skipthedrive --keywords="automation qa,qa engineer"
# Filter by job type
JOB_TYPES="full time,contract" node index.js --sites=skipthedrive
# Run demo with limited results
node index.js --sites=skipthedrive --demo
```
### 🚧 Planned Parsers
- **Indeed**: Comprehensive job aggregator
- **Glassdoor**: Jobs with company reviews and salary data
- **Monster**: Traditional job board
- **SimplyHired**: Job aggregator with salary estimates
- **LinkedIn Jobs**: Professional network job postings
- **AngelList**: Startup and tech jobs
- **Remote.co**: Dedicated remote work jobs
- **FlexJobs**: Flexible and remote positions
## 📦 Installation
```bash
# Install dependencies
npm install
# Run tests
npm test
# Run demo
node demo.js
```
## 🔧 Configuration
### Environment Variables
Create a `.env` file in the parser directory:
```env
# Job Search Configuration
SEARCH_SOURCES=linkedin,indeed,glassdoor
TARGET_ROLES=software engineer,data scientist,product manager
LOCATION_FILTER=Toronto,Vancouver,Calgary
EXPERIENCE_LEVELS=entry,mid,senior
REMOTE_PREFERENCE=remote,hybrid,onsite
# Analysis Configuration
ENABLE_SALARY_ANALYSIS=true
ENABLE_SKILL_ANALYSIS=true
ENABLE_TREND_ANALYSIS=true
MIN_SALARY=50000
MAX_SALARY=200000
# Output Configuration
OUTPUT_FORMAT=json,csv
SAVE_RAW_DATA=true
ANALYSIS_INTERVAL=daily
```
### Command Line Options
```bash
# Basic usage
node index.js
# Specific roles
node index.js --roles="frontend developer,backend developer"
# Geographic focus
node index.js --locations="Toronto,Vancouver"
# Experience level
node index.js --experience="senior"
# Output format
node index.js --output=results/job-market-analysis.json
```
**Available Options:**
- `--roles="role1,role2"`: Target job roles
- `--locations="city1,city2"`: Geographic focus
- `--experience="entry|mid|senior"`: Experience level
- `--remote="remote|hybrid|onsite"`: Remote work preference
- `--salary-min=NUMBER`: Minimum salary filter
- `--salary-max=NUMBER`: Maximum salary filter
- `--output=FILE`: Output filename
- `--format=json|csv`: Output format
- `--trends`: Enable trend analysis
- `--skills`: Enable skill analysis
## 📊 Keywords
### Role-Specific Keywords
Place keyword CSV files in the `keywords/` directory:
```
job-search-parser/
├── keywords/
│ ├── job-search-keywords.csv # General job search terms
│ ├── tech-roles.csv # Technology roles
│ ├── data-roles.csv # Data science roles
│ ├── management-roles.csv # Management positions
│ └── emerging-roles.csv # Emerging job categories
└── index.js
```
### Tech Roles Keywords
```csv
keyword
software engineer
frontend developer
backend developer
full stack developer
data scientist
machine learning engineer
devops engineer
site reliability engineer
cloud architect
security engineer
mobile developer
iOS developer
Android developer
react developer
vue developer
angular developer
node.js developer
python developer
java developer
golang developer
rust developer
data engineer
analytics engineer
```
### Data Science Keywords
```csv
keyword
data scientist
machine learning engineer
data analyst
business analyst
data engineer
analytics engineer
ML engineer
AI engineer
statistician
quantitative analyst
research scientist
data architect
BI developer
ETL developer
```
## 📈 Usage Examples
### Basic Job Search
```bash
# Standard job market analysis
node index.js
# Specific tech roles
node index.js --roles="software engineer,data scientist"
# Geographic focus
node index.js --locations="Toronto,Vancouver,Calgary"
```
### Advanced Analysis
```bash
# Senior level positions
node index.js --experience="senior" --salary-min=100000
# Remote work opportunities
node index.js --remote="remote" --roles="frontend developer"
# Trend analysis
node index.js --trends --skills --output=results/trends.json
```
### Market Intelligence
```bash
# Salary analysis
node index.js --salary-min=80000 --salary-max=150000
# Skill gap analysis
node index.js --skills --roles="machine learning engineer"
# Competitive intelligence
node index.js --companies="Google,Microsoft,Amazon"
```
## 📊 Output Format
### JSON Structure
```json
{
"metadata": {
"timestamp": "2024-01-15T10:30:00Z",
"search_parameters": {
"roles": ["software engineer", "data scientist"],
"locations": ["Toronto", "Vancouver"],
"experience_levels": ["mid", "senior"],
"remote_preference": ["remote", "hybrid"]
},
"total_jobs_found": 1250,
"analysis_duration_seconds": 45
},
"market_overview": {
"total_jobs": 1250,
"average_salary": 95000,
"salary_range": {
"min": 65000,
"max": 180000,
"median": 92000
},
"remote_distribution": {
"remote": 45,
"hybrid": 35,
"onsite": 20
},
"experience_distribution": {
"entry": 15,
"mid": 45,
"senior": 40
}
},
"trends": {
"growing_skills": [
{ "skill": "React", "growth_rate": 25 },
{ "skill": "Python", "growth_rate": 18 },
{ "skill": "AWS", "growth_rate": 22 }
],
"declining_skills": [
{ "skill": "jQuery", "growth_rate": -12 },
{ "skill": "PHP", "growth_rate": -8 }
],
"emerging_roles": ["AI Engineer", "DevSecOps Engineer", "Data Engineer"]
},
"jobs": [
{
"id": "job_1",
"title": "Senior Software Engineer",
"company": "TechCorp",
"location": "Toronto, Ontario",
"remote_type": "hybrid",
"salary": {
"min": 100000,
"max": 140000,
"currency": "CAD"
},
"required_skills": ["React", "Node.js", "TypeScript", "AWS"],
"preferred_skills": ["GraphQL", "Docker", "Kubernetes"],
"experience_level": "senior",
"job_url": "https://example.com/job/1",
"posted_date": "2024-01-10T09:00:00Z",
"scraped_at": "2024-01-15T10:30:00Z"
}
],
"analysis": {
"skill_demand": {
"React": { "count": 45, "avg_salary": 98000 },
"Python": { "count": 38, "avg_salary": 102000 },
"AWS": { "count": 32, "avg_salary": 105000 }
},
"company_insights": {
"top_hirers": [
{ "company": "TechCorp", "jobs": 25 },
{ "company": "StartupXYZ", "jobs": 18 }
],
"salary_leaders": [
{ "company": "BigTech", "avg_salary": 120000 },
{ "company": "FinTech", "avg_salary": 115000 }
]
}
}
}
```
### CSV Output
The parser can also generate CSV files for easy analysis:
```csv
job_id,title,company,location,remote_type,salary_min,salary_max,required_skills,experience_level,posted_date
job_1,Senior Software Engineer,TechCorp,Toronto,hybrid,100000,140000,"React,Node.js,TypeScript",senior,2024-01-10
job_2,Data Scientist,DataCorp,Vancouver,remote,90000,130000,"Python,SQL,ML",mid,2024-01-09
```
## 🔒 Security & Best Practices
### Data Privacy
- Respect job site terms of service
- Implement appropriate rate limiting
- Store data securely and responsibly
- Anonymize sensitive information
### Rate Limiting
- Implement delays between requests
- Respect API rate limits
- Use multiple data sources
- Monitor for blocking/detection
### Legal Compliance
- Educational and research purposes only
- Respect website terms of service
- Implement data retention policies
- Monitor for legal changes
## 🧪 Testing
### Run Tests
```bash
# All tests
npm test
# Specific test suites
npm test -- --testNamePattern="JobSearch"
npm test -- --testNamePattern="Analysis"
npm test -- --testNamePattern="Trends"
```
### Test Coverage
```bash
npm run test:coverage
```
## 🚀 Performance Optimization
### Recommended Settings
#### Fast Analysis
```bash
node index.js --roles="software engineer" --locations="Toronto"
```
#### Comprehensive Analysis
```bash
node index.js --trends --skills --experience="all"
```
#### Focused Intelligence
```bash
node index.js --salary-min=80000 --remote="remote" --trends
```
### Performance Tips
- Use specific role filters to reduce data volume
- Implement caching for repeated searches
- Use parallel processing for multiple sources
- Optimize data storage and retrieval
## 🔧 Troubleshooting
### Common Issues
#### Rate Limiting
```bash
# Reduce request frequency
export REQUEST_DELAY=2000
node index.js
```
#### Data Source Issues
```bash
# Use specific sources
node index.js --sources="linkedin,indeed"
# Check source availability
node index.js --test-sources
```
#### Output Issues
```bash
# Check output directory
mkdir -p results
node index.js --output=results/analysis.json
# Verify file permissions
chmod 755 results/
```
## 📈 Monitoring & Analytics
### Key Metrics
- **Job Volume**: Total jobs found per search
- **Salary Trends**: Average and median salary changes
- **Skill Demand**: Most requested skills
- **Remote Adoption**: Remote work trend analysis
- **Market Velocity**: Job posting frequency
### Dashboard Integration
- Real-time market monitoring
- Trend visualization
- Salary benchmarking
- Skill gap analysis
- Competitive intelligence
## 🤝 Contributing
### Development Setup
1. Fork the repository
2. Create feature branch
3. Add tests for new functionality
4. Ensure all tests pass
5. Submit pull request
### Code Standards
- Follow existing code style
- Add JSDoc comments
- Maintain test coverage
- Update documentation
## 📄 License
This parser is part of the LinkedOut platform and follows the same licensing terms.
---
**Note**: This tool is designed for educational and research purposes. Always respect website terms of service and implement appropriate rate limiting and ethical usage practices.
# Job Search Parser - Job Market Intelligence
Specialized parser for job market intelligence, tracking job postings, market trends, and competitive analysis. Focuses on tech roles and industry insights.
## 🎯 Purpose
The Job Search Parser is designed to:
- **Track Job Market Trends**: Monitor demand for specific roles and skills
- **Competitive Intelligence**: Analyze salary ranges and requirements
- **Industry Insights**: Track hiring patterns across different sectors
- **Skill Gap Analysis**: Identify in-demand technologies and frameworks
- **Market Demand Forecasting**: Predict job market trends
## 🚀 Features
### Core Functionality
- **Multi-Source Aggregation**: Collect job data from multiple platforms
- **Role-Specific Tracking**: Focus on tech roles and emerging positions
- **Skill Analysis**: Extract and categorize required skills
- **Salary Intelligence**: Track compensation ranges and trends
- **Company Intelligence**: Monitor hiring companies and patterns
### Advanced Features
- **Market Trend Analysis**: Identify growing and declining job categories
- **Geographic Distribution**: Track job distribution by location
- **Experience Level Analysis**: Entry, mid, senior level tracking
- **Remote Work Trends**: Monitor remote/hybrid work patterns
- **Technology Stack Tracking**: Framework and tool popularity
## 🌐 Supported Job Sites
### ✅ Implemented Parsers
#### SkipTheDrive Parser
Remote job board specializing in work-from-home positions.
**Features:**
- Keyword-based job search with relevance sorting
- Job type filtering (full-time, part-time, contract)
- Multi-page result parsing with pagination
- Featured/sponsored job identification
- AI-powered job relevance analysis
- Automatic duplicate detection
**Usage:**
```bash
# Parse SkipTheDrive for QA automation jobs
node index.js --sites=skipthedrive --keywords="automation qa,qa engineer"
# Filter by job type
JOB_TYPES="full time,contract" node index.js --sites=skipthedrive
# Run demo with limited results
node index.js --sites=skipthedrive --demo
```
### 🚧 Planned Parsers
- **Indeed**: Comprehensive job aggregator
- **Glassdoor**: Jobs with company reviews and salary data
- **Monster**: Traditional job board
- **SimplyHired**: Job aggregator with salary estimates
- **LinkedIn Jobs**: Professional network job postings
- **AngelList**: Startup and tech jobs
- **Remote.co**: Dedicated remote work jobs
- **FlexJobs**: Flexible and remote positions
## 📦 Installation
```bash
# Install dependencies
npm install
# Run tests
npm test
# Run demo
node demo.js
```
## 🔧 Configuration
### Environment Variables
Create a `.env` file in the parser directory:
```env
# Job Search Configuration
SEARCH_SOURCES=linkedin,indeed,glassdoor
TARGET_ROLES=software engineer,data scientist,product manager
LOCATION_FILTER=Toronto,Vancouver,Calgary
EXPERIENCE_LEVELS=entry,mid,senior
REMOTE_PREFERENCE=remote,hybrid,onsite
# Analysis Configuration
ENABLE_SALARY_ANALYSIS=true
ENABLE_SKILL_ANALYSIS=true
ENABLE_TREND_ANALYSIS=true
MIN_SALARY=50000
MAX_SALARY=200000
# Output Configuration
OUTPUT_FORMAT=json,csv
SAVE_RAW_DATA=true
ANALYSIS_INTERVAL=daily
```
### Command Line Options
```bash
# Basic usage
node index.js
# Specific roles
node index.js --roles="frontend developer,backend developer"
# Geographic focus
node index.js --locations="Toronto,Vancouver"
# Experience level
node index.js --experience="senior"
# Output format
node index.js --output=results/job-market-analysis.json
```
**Available Options:**
- `--roles="role1,role2"`: Target job roles
- `--locations="city1,city2"`: Geographic focus
- `--experience="entry|mid|senior"`: Experience level
- `--remote="remote|hybrid|onsite"`: Remote work preference
- `--salary-min=NUMBER`: Minimum salary filter
- `--salary-max=NUMBER`: Maximum salary filter
- `--output=FILE`: Output filename
- `--format=json|csv`: Output format
- `--trends`: Enable trend analysis
- `--skills`: Enable skill analysis
## 📊 Keywords
### Role-Specific Keywords
Place keyword CSV files in the `keywords/` directory:
```
job-search-parser/
├── keywords/
│ ├── job-search-keywords.csv # General job search terms
│ ├── tech-roles.csv # Technology roles
│ ├── data-roles.csv # Data science roles
│ ├── management-roles.csv # Management positions
│ └── emerging-roles.csv # Emerging job categories
└── index.js
```
### Tech Roles Keywords
```csv
keyword
software engineer
frontend developer
backend developer
full stack developer
data scientist
machine learning engineer
devops engineer
site reliability engineer
cloud architect
security engineer
mobile developer
iOS developer
Android developer
react developer
vue developer
angular developer
node.js developer
python developer
java developer
golang developer
rust developer
data engineer
analytics engineer
```
### Data Science Keywords
```csv
keyword
data scientist
machine learning engineer
data analyst
business analyst
data engineer
analytics engineer
ML engineer
AI engineer
statistician
quantitative analyst
research scientist
data architect
BI developer
ETL developer
```
## 📈 Usage Examples
### Basic Job Search
```bash
# Standard job market analysis
node index.js
# Specific tech roles
node index.js --roles="software engineer,data scientist"
# Geographic focus
node index.js --locations="Toronto,Vancouver,Calgary"
```
### Advanced Analysis
```bash
# Senior level positions
node index.js --experience="senior" --salary-min=100000
# Remote work opportunities
node index.js --remote="remote" --roles="frontend developer"
# Trend analysis
node index.js --trends --skills --output=results/trends.json
```
### Market Intelligence
```bash
# Salary analysis
node index.js --salary-min=80000 --salary-max=150000
# Skill gap analysis
node index.js --skills --roles="machine learning engineer"
# Competitive intelligence
node index.js --companies="Google,Microsoft,Amazon"
```
## 📊 Output Format
### JSON Structure
```json
{
"metadata": {
"timestamp": "2024-01-15T10:30:00Z",
"search_parameters": {
"roles": ["software engineer", "data scientist"],
"locations": ["Toronto", "Vancouver"],
"experience_levels": ["mid", "senior"],
"remote_preference": ["remote", "hybrid"]
},
"total_jobs_found": 1250,
"analysis_duration_seconds": 45
},
"market_overview": {
"total_jobs": 1250,
"average_salary": 95000,
"salary_range": {
"min": 65000,
"max": 180000,
"median": 92000
},
"remote_distribution": {
"remote": 45,
"hybrid": 35,
"onsite": 20
},
"experience_distribution": {
"entry": 15,
"mid": 45,
"senior": 40
}
},
"trends": {
"growing_skills": [
{ "skill": "React", "growth_rate": 25 },
{ "skill": "Python", "growth_rate": 18 },
{ "skill": "AWS", "growth_rate": 22 }
],
"declining_skills": [
{ "skill": "jQuery", "growth_rate": -12 },
{ "skill": "PHP", "growth_rate": -8 }
],
"emerging_roles": ["AI Engineer", "DevSecOps Engineer", "Data Engineer"]
},
"jobs": [
{
"id": "job_1",
"title": "Senior Software Engineer",
"company": "TechCorp",
"location": "Toronto, Ontario",
"remote_type": "hybrid",
"salary": {
"min": 100000,
"max": 140000,
"currency": "CAD"
},
"required_skills": ["React", "Node.js", "TypeScript", "AWS"],
"preferred_skills": ["GraphQL", "Docker", "Kubernetes"],
"experience_level": "senior",
"job_url": "https://example.com/job/1",
"posted_date": "2024-01-10T09:00:00Z",
"scraped_at": "2024-01-15T10:30:00Z"
}
],
"analysis": {
"skill_demand": {
"React": { "count": 45, "avg_salary": 98000 },
"Python": { "count": 38, "avg_salary": 102000 },
"AWS": { "count": 32, "avg_salary": 105000 }
},
"company_insights": {
"top_hirers": [
{ "company": "TechCorp", "jobs": 25 },
{ "company": "StartupXYZ", "jobs": 18 }
],
"salary_leaders": [
{ "company": "BigTech", "avg_salary": 120000 },
{ "company": "FinTech", "avg_salary": 115000 }
]
}
}
}
```
### CSV Output
The parser can also generate CSV files for easy analysis:
```csv
job_id,title,company,location,remote_type,salary_min,salary_max,required_skills,experience_level,posted_date
job_1,Senior Software Engineer,TechCorp,Toronto,hybrid,100000,140000,"React,Node.js,TypeScript",senior,2024-01-10
job_2,Data Scientist,DataCorp,Vancouver,remote,90000,130000,"Python,SQL,ML",mid,2024-01-09
```
## 🔒 Security & Best Practices
### Data Privacy
- Respect job site terms of service
- Implement appropriate rate limiting
- Store data securely and responsibly
- Anonymize sensitive information
### Rate Limiting
- Implement delays between requests
- Respect API rate limits
- Use multiple data sources
- Monitor for blocking/detection
### Legal Compliance
- Educational and research purposes only
- Respect website terms of service
- Implement data retention policies
- Monitor for legal changes
## 🧪 Testing
### Run Tests
```bash
# All tests
npm test
# Specific test suites
npm test -- --testNamePattern="JobSearch"
npm test -- --testNamePattern="Analysis"
npm test -- --testNamePattern="Trends"
```
### Test Coverage
```bash
npm run test:coverage
```
## 🚀 Performance Optimization
### Recommended Settings
#### Fast Analysis
```bash
node index.js --roles="software engineer" --locations="Toronto"
```
#### Comprehensive Analysis
```bash
node index.js --trends --skills --experience="all"
```
#### Focused Intelligence
```bash
node index.js --salary-min=80000 --remote="remote" --trends
```
### Performance Tips
- Use specific role filters to reduce data volume
- Implement caching for repeated searches
- Use parallel processing for multiple sources
- Optimize data storage and retrieval
## 🔧 Troubleshooting
### Common Issues
#### Rate Limiting
```bash
# Reduce request frequency
export REQUEST_DELAY=2000
node index.js
```
#### Data Source Issues
```bash
# Use specific sources
node index.js --sources="linkedin,indeed"
# Check source availability
node index.js --test-sources
```
#### Output Issues
```bash
# Check output directory
mkdir -p results
node index.js --output=results/analysis.json
# Verify file permissions
chmod 755 results/
```
## 📈 Monitoring & Analytics
### Key Metrics
- **Job Volume**: Total jobs found per search
- **Salary Trends**: Average and median salary changes
- **Skill Demand**: Most requested skills
- **Remote Adoption**: Remote work trend analysis
- **Market Velocity**: Job posting frequency
### Dashboard Integration
- Real-time market monitoring
- Trend visualization
- Salary benchmarking
- Skill gap analysis
- Competitive intelligence
## 🤝 Contributing
### Development Setup
1. Fork the repository
2. Create feature branch
3. Add tests for new functionality
4. Ensure all tests pass
5. Submit pull request
### Code Standards
- Follow existing code style
- Add JSDoc comments
- Maintain test coverage
- Update documentation
## 📄 License
This parser is part of the LinkedOut platform and follows the same licensing terms.
---
**Note**: This tool is designed for educational and research purposes. Always respect website terms of service and implement appropriate rate limiting and ethical usage practices.

File diff suppressed because it is too large Load Diff

View File

@ -1,9 +1,9 @@
keyword
qa automation
automation test
sdet
qa lead
automation lead
playwright
cypress
keyword
qa automation
automation test
sdet
qa lead
automation lead
playwright
cypress
quality assurance engineer
1 keyword
2 qa automation
3 automation test
4 sdet
5 qa lead
6 automation lead
7 playwright
8 cypress
9 quality assurance engineer

View File

@ -1,129 +1,129 @@
#!/usr/bin/env node
/**
* SkipTheDrive Parser Demo
*
* Demonstrates the SkipTheDrive job parser functionality
*/
const { parseSkipTheDrive } = require("./skipthedrive");
const fs = require("fs");
const path = require("path");
const { logger } = require("../../ai-analyzer");
// Load environment variables
require("dotenv").config({ path: path.join(__dirname, "..", ".env") });
async function runDemo() {
logger.step("🚀 SkipTheDrive Parser Demo");
// Demo configuration
const options = {
// Search for QA automation jobs (from your example)
keywords: process.env.SEARCH_KEYWORDS?.split(",").map((k) => k.trim()) || [
"automation qa",
"qa engineer",
"test automation",
],
// Job type filters - can be: "part time", "full time", "contract"
jobTypes: process.env.JOB_TYPES?.split(",").map((t) => t.trim()) || [],
// Location filter (optional)
locationFilter: process.env.LOCATION_FILTER || "",
// Maximum pages to parse
maxPages: parseInt(process.env.MAX_PAGES) || 3,
// Browser headless mode
headless: process.env.HEADLESS !== "false",
// AI analysis
enableAI: process.env.ENABLE_AI_ANALYSIS !== "false",
aiContext: "remote QA and test automation job opportunities",
};
logger.info("Configuration:");
logger.info(`- Keywords: ${options.keywords.join(", ")}`);
logger.info(
`- Job Types: ${
options.jobTypes.length > 0 ? options.jobTypes.join(", ") : "All types"
}`
);
logger.info(`- Location Filter: ${options.locationFilter || "None"}`);
logger.info(`- Max Pages: ${options.maxPages}`);
logger.info(`- Headless: ${options.headless}`);
logger.info(`- AI Analysis: ${options.enableAI}`);
logger.info("\nStarting parser...");
try {
const startTime = Date.now();
const results = await parseSkipTheDrive(options);
const duration = ((Date.now() - startTime) / 1000).toFixed(2);
// Save results
const timestamp = new Date()
.toISOString()
.replace(/[:.]/g, "-")
.slice(0, -5);
const resultsDir = path.join(__dirname, "..", "results");
if (!fs.existsSync(resultsDir)) {
fs.mkdirSync(resultsDir, { recursive: true });
}
const resultsFile = path.join(
resultsDir,
`skipthedrive-results-${timestamp}.json`
);
fs.writeFileSync(resultsFile, JSON.stringify(results, null, 2));
// Display summary
logger.step("\n📊 Parsing Summary:");
logger.info(`- Duration: ${duration} seconds`);
logger.info(`- Jobs Found: ${results.results.length}`);
logger.info(`- Jobs Rejected: ${results.rejectedResults.length}`);
logger.file(`- Results saved to: ${resultsFile}`);
// Show sample results
if (results.results.length > 0) {
logger.info("\n🔍 Sample Jobs Found:");
results.results.slice(0, 5).forEach((job, index) => {
logger.info(`\n${index + 1}. ${job.title}`);
logger.info(` Company: ${job.company}`);
logger.info(` Posted: ${job.daysAgo}`);
logger.info(` Featured: ${job.isFeatured ? "Yes" : "No"}`);
logger.info(` URL: ${job.jobUrl}`);
if (job.aiAnalysis) {
logger.ai(
` AI Relevant: ${job.aiAnalysis.isRelevant ? "Yes" : "No"} (${(
job.aiAnalysis.confidence * 100
).toFixed(0)}% confidence)`
);
}
});
}
// Show rejection reasons
if (results.rejectedResults.length > 0) {
const rejectionReasons = {};
results.rejectedResults.forEach((job) => {
rejectionReasons[job.reason] = (rejectionReasons[job.reason] || 0) + 1;
});
logger.info("\n❌ Rejection Reasons:");
Object.entries(rejectionReasons).forEach(([reason, count]) => {
logger.info(` ${reason}: ${count}`);
});
}
} catch (error) {
logger.error("\n❌ Demo failed:", error.message);
process.exit(1);
}
}
// Run the demo
runDemo().catch((err) => {
logger.error("Fatal error:", err);
process.exit(1);
});
#!/usr/bin/env node
/**
* SkipTheDrive Parser Demo
*
* Demonstrates the SkipTheDrive job parser functionality
*/
const { parseSkipTheDrive } = require("./skipthedrive");
const fs = require("fs");
const path = require("path");
const { logger } = require("../../ai-analyzer");
// Load environment variables
require("dotenv").config({ path: path.join(__dirname, "..", ".env") });
async function runDemo() {
logger.step("🚀 SkipTheDrive Parser Demo");
// Demo configuration
const options = {
// Search for QA automation jobs (from your example)
keywords: process.env.SEARCH_KEYWORDS?.split(",").map((k) => k.trim()) || [
"automation qa",
"qa engineer",
"test automation",
],
// Job type filters - can be: "part time", "full time", "contract"
jobTypes: process.env.JOB_TYPES?.split(",").map((t) => t.trim()) || [],
// Location filter (optional)
locationFilter: process.env.LOCATION_FILTER || "",
// Maximum pages to parse
maxPages: parseInt(process.env.MAX_PAGES) || 3,
// Browser headless mode
headless: process.env.HEADLESS !== "false",
// AI analysis
enableAI: process.env.ENABLE_AI_ANALYSIS !== "false",
aiContext: "remote QA and test automation job opportunities",
};
logger.info("Configuration:");
logger.info(`- Keywords: ${options.keywords.join(", ")}`);
logger.info(
`- Job Types: ${
options.jobTypes.length > 0 ? options.jobTypes.join(", ") : "All types"
}`
);
logger.info(`- Location Filter: ${options.locationFilter || "None"}`);
logger.info(`- Max Pages: ${options.maxPages}`);
logger.info(`- Headless: ${options.headless}`);
logger.info(`- AI Analysis: ${options.enableAI}`);
logger.info("\nStarting parser...");
try {
const startTime = Date.now();
const results = await parseSkipTheDrive(options);
const duration = ((Date.now() - startTime) / 1000).toFixed(2);
// Save results
const timestamp = new Date()
.toISOString()
.replace(/[:.]/g, "-")
.slice(0, -5);
const resultsDir = path.join(__dirname, "..", "results");
if (!fs.existsSync(resultsDir)) {
fs.mkdirSync(resultsDir, { recursive: true });
}
const resultsFile = path.join(
resultsDir,
`skipthedrive-results-${timestamp}.json`
);
fs.writeFileSync(resultsFile, JSON.stringify(results, null, 2));
// Display summary
logger.step("\n📊 Parsing Summary:");
logger.info(`- Duration: ${duration} seconds`);
logger.info(`- Jobs Found: ${results.results.length}`);
logger.info(`- Jobs Rejected: ${results.rejectedResults.length}`);
logger.file(`- Results saved to: ${resultsFile}`);
// Show sample results
if (results.results.length > 0) {
logger.info("\n🔍 Sample Jobs Found:");
results.results.slice(0, 5).forEach((job, index) => {
logger.info(`\n${index + 1}. ${job.title}`);
logger.info(` Company: ${job.company}`);
logger.info(` Posted: ${job.daysAgo}`);
logger.info(` Featured: ${job.isFeatured ? "Yes" : "No"}`);
logger.info(` URL: ${job.jobUrl}`);
if (job.aiAnalysis) {
logger.ai(
` AI Relevant: ${job.aiAnalysis.isRelevant ? "Yes" : "No"} (${(
job.aiAnalysis.confidence * 100
).toFixed(0)}% confidence)`
);
}
});
}
// Show rejection reasons
if (results.rejectedResults.length > 0) {
const rejectionReasons = {};
results.rejectedResults.forEach((job) => {
rejectionReasons[job.reason] = (rejectionReasons[job.reason] || 0) + 1;
});
logger.info("\n❌ Rejection Reasons:");
Object.entries(rejectionReasons).forEach(([reason, count]) => {
logger.info(` ${reason}: ${count}`);
});
}
} catch (error) {
logger.error("\n❌ Demo failed:", error.message);
process.exit(1);
}
}
// Run the demo
runDemo().catch((err) => {
logger.error("Fatal error:", err);
process.exit(1);
});

View File

@ -1,332 +1,332 @@
/**
* SkipTheDrive Job Parser
*
* Parses remote job listings from SkipTheDrive.com
* Supports keyword search, job type filters, and pagination
*/
const { chromium } = require("playwright");
const path = require("path");
// Import from ai-analyzer core package
const {
logger,
cleanText,
containsAnyKeyword,
parseLocationFilters,
validateLocationAgainstFilters,
extractLocationFromProfile,
analyzeBatch,
checkOllamaStatus,
} = require("../../ai-analyzer");
/**
* Build search URL for SkipTheDrive
* @param {string} keyword - Search keyword
* @param {string} orderBy - Sort order (date, relevance)
* @param {Array<string>} jobTypes - Job types to filter (part time, full time, contract)
* @returns {string} - Formatted search URL
*/
function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
let url = `https://www.skipthedrive.com/?s=${encodeURIComponent(keyword)}`;
if (orderBy) {
url += `&orderby=${orderBy}`;
}
// Add job type filters
jobTypes.forEach((type) => {
url += `&jobtype=${encodeURIComponent(type)}`;
});
return url;
}
/**
* Extract job data from a single job listing element
* @param {Element} article - Job listing DOM element
* @returns {Object} - Extracted job data
*/
async function extractJobData(article) {
try {
// Extract job title and URL
const titleElement = await article.$("h2.post-title a");
const title = titleElement ? await titleElement.textContent() : "";
const jobUrl = titleElement ? await titleElement.getAttribute("href") : "";
// Extract date
const dateElement = await article.$("time.post-date");
const datePosted = dateElement
? await dateElement.getAttribute("datetime")
: "";
const dateText = dateElement ? await dateElement.textContent() : "";
// Extract company name
const companyElement = await article.$(
".custom_fields_company_name_display_search_results"
);
let company = companyElement ? await companyElement.textContent() : "";
company = company.replace(/^\s*[^\s]+\s*/, "").trim(); // Remove icon
// Extract days ago
const daysAgoElement = await article.$(
".custom_fields_job_date_display_search_results"
);
let daysAgo = daysAgoElement ? await daysAgoElement.textContent() : "";
daysAgo = daysAgo.replace(/^\s*[^\s]+\s*/, "").trim(); // Remove icon
// Extract job description excerpt
const excerptElement = await article.$(".excerpt_part");
const description = excerptElement
? await excerptElement.textContent()
: "";
// Check if featured/sponsored
const featuredElement = await article.$(".custom_fields_sponsored_job");
const isFeatured = !!featuredElement;
// Extract job ID from article ID
const articleId = await article.getAttribute("id");
const jobId = articleId ? articleId.replace("post-", "") : "";
return {
jobId,
title: cleanText(title),
company: cleanText(company),
jobUrl,
datePosted,
dateText: cleanText(dateText),
daysAgo: cleanText(daysAgo),
description: cleanText(description),
isFeatured,
source: "skipthedrive",
timestamp: new Date().toISOString(),
};
} catch (error) {
logger.error(`Error extracting job data: ${error.message}`);
return null;
}
}
/**
* Parse SkipTheDrive job listings
* @param {Object} options - Parser options
* @returns {Promise<Array>} - Array of parsed job listings
*/
async function parseSkipTheDrive(options = {}) {
const {
keywords = process.env.SEARCH_KEYWORDS?.split(",").map((k) => k.trim()) || [
"software engineer",
"developer",
],
jobTypes = process.env.JOB_TYPES?.split(",").map((t) => t.trim()) || [],
locationFilter = process.env.LOCATION_FILTER || "",
maxPages = parseInt(process.env.MAX_PAGES) || 5,
headless = process.env.HEADLESS !== "false",
enableAI = process.env.ENABLE_AI_ANALYSIS === "true",
aiContext = process.env.AI_CONTEXT || "remote job opportunities analysis",
} = options;
logger.step("Starting SkipTheDrive parser...");
logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
logger.info(
`📋 Job Types: ${jobTypes.length > 0 ? jobTypes.join(", ") : "All"}`
);
logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
logger.info(`📄 Max Pages: ${maxPages}`);
const browser = await chromium.launch({
headless,
args: [
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-dev-shm-usage",
],
});
const context = await browser.newContext({
userAgent:
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
});
const results = [];
const rejectedResults = [];
const seenJobs = new Set();
try {
// Search for each keyword
for (const keyword of keywords) {
logger.info(`\n🔍 Searching for: ${keyword}`);
const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
const page = await context.newPage();
try {
logger.info(
`Attempting navigation to: ${searchUrl} at ${new Date().toISOString()}`
);
await page.goto(searchUrl, {
waitUntil: "domcontentloaded",
timeout: 30000,
});
logger.info(
`Navigation completed successfully at ${new Date().toISOString()}`
);
// Wait for job listings to load
logger.info("Waiting for selector #loops-wrapper");
await page
.waitForSelector("#loops-wrapper", { timeout: 5000 })
.catch(() => {
logger.warning(`No results found for keyword: ${keyword}`);
});
logger.info("Selector wait completed");
let currentPage = 1;
let hasNextPage = true;
while (hasNextPage && currentPage <= maxPages) {
logger.info(`📄 Processing page ${currentPage} for "${keyword}"`);
// Extract all job articles on current page
const jobArticles = await page.$$("article[id^='post-']");
logger.info(
`Found ${jobArticles.length} job listings on page ${currentPage}`
);
for (const article of jobArticles) {
const jobData = await extractJobData(article);
if (!jobData || seenJobs.has(jobData.jobId)) {
continue;
}
seenJobs.add(jobData.jobId);
// Add keyword that found this job
jobData.searchKeyword = keyword;
// Validate job against keywords
const fullText = `${jobData.title} ${jobData.description} ${jobData.company}`;
if (!containsAnyKeyword(fullText, keywords)) {
rejectedResults.push({
...jobData,
rejected: true,
reason: "Keywords not found in job listing",
});
continue;
}
// Location validation (if enabled)
if (locationFilter) {
const locationFilters = parseLocationFilters(locationFilter);
// For SkipTheDrive, most jobs are remote, but we can check the title/description
const locationValid =
fullText.toLowerCase().includes("remote") ||
locationFilters.some((filter) =>
fullText.toLowerCase().includes(filter.toLowerCase())
);
if (!locationValid) {
rejectedResults.push({
...jobData,
rejected: true,
reason: "Location requirements not met",
});
continue;
}
jobData.locationValid = locationValid;
}
logger.success(`✅ Found: ${jobData.title} at ${jobData.company}`);
results.push(jobData);
}
// Check for next page
const nextPageLink = await page.$("a.nextp");
if (nextPageLink && currentPage < maxPages) {
logger.info("📄 Moving to next page...");
await nextPageLink.click();
await page.waitForLoadState("domcontentloaded");
await page.waitForTimeout(2000); // Wait for content to load
currentPage++;
} else {
hasNextPage = false;
}
}
} catch (error) {
logger.error(`Error processing keyword "${keyword}": ${error.message}`);
} finally {
await page.close();
}
}
logger.success(`\n✅ Parsing complete!`);
logger.info(`📊 Total jobs found: ${results.length}`);
logger.info(`❌ Rejected jobs: ${rejectedResults.length}`);
// Run AI analysis if enabled
let aiAnalysis = null;
if (enableAI && results.length > 0) {
logger.step("Running AI analysis on job listings...");
const aiAvailable = await checkOllamaStatus();
if (aiAvailable) {
const analysisData = results.map((job) => ({
text: `${job.title} at ${job.company}. ${job.description}`,
metadata: {
jobId: job.jobId,
company: job.company,
daysAgo: job.daysAgo,
},
}));
aiAnalysis = await analyzeBatch(analysisData, aiContext);
// Merge AI analysis with results
results.forEach((job, index) => {
if (aiAnalysis && aiAnalysis[index]) {
job.aiAnalysis = {
isRelevant: aiAnalysis[index].isRelevant,
confidence: aiAnalysis[index].confidence,
reasoning: aiAnalysis[index].reasoning,
};
}
});
logger.success("✅ AI analysis completed");
} else {
logger.warning("⚠️ AI not available - skipping analysis");
}
}
return {
results,
rejectedResults,
metadata: {
source: "skipthedrive",
totalJobs: results.length,
rejectedJobs: rejectedResults.length,
keywords: keywords,
jobTypes: jobTypes,
locationFilter: locationFilter,
aiAnalysisEnabled: enableAI,
aiAnalysisCompleted: !!aiAnalysis,
timestamp: new Date().toISOString(),
},
};
} catch (error) {
logger.error(`Fatal error in SkipTheDrive parser: ${error.message}`);
throw error;
} finally {
await browser.close();
}
}
// Export the parser
module.exports = {
parseSkipTheDrive,
buildSearchUrl,
extractJobData,
};
/**
* SkipTheDrive Job Parser
*
* Parses remote job listings from SkipTheDrive.com
* Supports keyword search, job type filters, and pagination
*/
const { chromium } = require("playwright");
const path = require("path");
// Import from ai-analyzer core package
const {
logger,
cleanText,
containsAnyKeyword,
parseLocationFilters,
validateLocationAgainstFilters,
extractLocationFromProfile,
analyzeBatch,
checkOllamaStatus,
} = require("../../ai-analyzer");
/**
* Build search URL for SkipTheDrive
* @param {string} keyword - Search keyword
* @param {string} orderBy - Sort order (date, relevance)
* @param {Array<string>} jobTypes - Job types to filter (part time, full time, contract)
* @returns {string} - Formatted search URL
*/
function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
let url = `https://www.skipthedrive.com/?s=${encodeURIComponent(keyword)}`;
if (orderBy) {
url += `&orderby=${orderBy}`;
}
// Add job type filters
jobTypes.forEach((type) => {
url += `&jobtype=${encodeURIComponent(type)}`;
});
return url;
}
/**
* Extract job data from a single job listing element
* @param {Element} article - Job listing DOM element
* @returns {Object} - Extracted job data
*/
async function extractJobData(article) {
try {
// Extract job title and URL
const titleElement = await article.$("h2.post-title a");
const title = titleElement ? await titleElement.textContent() : "";
const jobUrl = titleElement ? await titleElement.getAttribute("href") : "";
// Extract date
const dateElement = await article.$("time.post-date");
const datePosted = dateElement
? await dateElement.getAttribute("datetime")
: "";
const dateText = dateElement ? await dateElement.textContent() : "";
// Extract company name
const companyElement = await article.$(
".custom_fields_company_name_display_search_results"
);
let company = companyElement ? await companyElement.textContent() : "";
company = company.replace(/^\s*[^\s]+\s*/, "").trim(); // Remove icon
// Extract days ago
const daysAgoElement = await article.$(
".custom_fields_job_date_display_search_results"
);
let daysAgo = daysAgoElement ? await daysAgoElement.textContent() : "";
daysAgo = daysAgo.replace(/^\s*[^\s]+\s*/, "").trim(); // Remove icon
// Extract job description excerpt
const excerptElement = await article.$(".excerpt_part");
const description = excerptElement
? await excerptElement.textContent()
: "";
// Check if featured/sponsored
const featuredElement = await article.$(".custom_fields_sponsored_job");
const isFeatured = !!featuredElement;
// Extract job ID from article ID
const articleId = await article.getAttribute("id");
const jobId = articleId ? articleId.replace("post-", "") : "";
return {
jobId,
title: cleanText(title),
company: cleanText(company),
jobUrl,
datePosted,
dateText: cleanText(dateText),
daysAgo: cleanText(daysAgo),
description: cleanText(description),
isFeatured,
source: "skipthedrive",
timestamp: new Date().toISOString(),
};
} catch (error) {
logger.error(`Error extracting job data: ${error.message}`);
return null;
}
}
/**
* Parse SkipTheDrive job listings
* @param {Object} options - Parser options
* @returns {Promise<Array>} - Array of parsed job listings
*/
async function parseSkipTheDrive(options = {}) {
const {
keywords = process.env.SEARCH_KEYWORDS?.split(",").map((k) => k.trim()) || [
"software engineer",
"developer",
],
jobTypes = process.env.JOB_TYPES?.split(",").map((t) => t.trim()) || [],
locationFilter = process.env.LOCATION_FILTER || "",
maxPages = parseInt(process.env.MAX_PAGES) || 5,
headless = process.env.HEADLESS !== "false",
enableAI = process.env.ENABLE_AI_ANALYSIS === "true",
aiContext = process.env.AI_CONTEXT || "remote job opportunities analysis",
} = options;
logger.step("Starting SkipTheDrive parser...");
logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
logger.info(
`📋 Job Types: ${jobTypes.length > 0 ? jobTypes.join(", ") : "All"}`
);
logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
logger.info(`📄 Max Pages: ${maxPages}`);
const browser = await chromium.launch({
headless,
args: [
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-dev-shm-usage",
],
});
const context = await browser.newContext({
userAgent:
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
});
const results = [];
const rejectedResults = [];
const seenJobs = new Set();
try {
// Search for each keyword
for (const keyword of keywords) {
logger.info(`\n🔍 Searching for: ${keyword}`);
const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
const page = await context.newPage();
try {
logger.info(
`Attempting navigation to: ${searchUrl} at ${new Date().toISOString()}`
);
await page.goto(searchUrl, {
waitUntil: "domcontentloaded",
timeout: 30000,
});
logger.info(
`Navigation completed successfully at ${new Date().toISOString()}`
);
// Wait for job listings to load
logger.info("Waiting for selector #loops-wrapper");
await page
.waitForSelector("#loops-wrapper", { timeout: 5000 })
.catch(() => {
logger.warning(`No results found for keyword: ${keyword}`);
});
logger.info("Selector wait completed");
let currentPage = 1;
let hasNextPage = true;
while (hasNextPage && currentPage <= maxPages) {
logger.info(`📄 Processing page ${currentPage} for "${keyword}"`);
// Extract all job articles on current page
const jobArticles = await page.$$("article[id^='post-']");
logger.info(
`Found ${jobArticles.length} job listings on page ${currentPage}`
);
for (const article of jobArticles) {
const jobData = await extractJobData(article);
if (!jobData || seenJobs.has(jobData.jobId)) {
continue;
}
seenJobs.add(jobData.jobId);
// Add keyword that found this job
jobData.searchKeyword = keyword;
// Validate job against keywords
const fullText = `${jobData.title} ${jobData.description} ${jobData.company}`;
if (!containsAnyKeyword(fullText, keywords)) {
rejectedResults.push({
...jobData,
rejected: true,
reason: "Keywords not found in job listing",
});
continue;
}
// Location validation (if enabled)
if (locationFilter) {
const locationFilters = parseLocationFilters(locationFilter);
// For SkipTheDrive, most jobs are remote, but we can check the title/description
const locationValid =
fullText.toLowerCase().includes("remote") ||
locationFilters.some((filter) =>
fullText.toLowerCase().includes(filter.toLowerCase())
);
if (!locationValid) {
rejectedResults.push({
...jobData,
rejected: true,
reason: "Location requirements not met",
});
continue;
}
jobData.locationValid = locationValid;
}
logger.success(`✅ Found: ${jobData.title} at ${jobData.company}`);
results.push(jobData);
}
// Check for next page
const nextPageLink = await page.$("a.nextp");
if (nextPageLink && currentPage < maxPages) {
logger.info("📄 Moving to next page...");
await nextPageLink.click();
await page.waitForLoadState("domcontentloaded");
await page.waitForTimeout(2000); // Wait for content to load
currentPage++;
} else {
hasNextPage = false;
}
}
} catch (error) {
logger.error(`Error processing keyword "${keyword}": ${error.message}`);
} finally {
await page.close();
}
}
logger.success(`\n✅ Parsing complete!`);
logger.info(`📊 Total jobs found: ${results.length}`);
logger.info(`❌ Rejected jobs: ${rejectedResults.length}`);
// Run AI analysis if enabled
let aiAnalysis = null;
if (enableAI && results.length > 0) {
logger.step("Running AI analysis on job listings...");
const aiAvailable = await checkOllamaStatus();
if (aiAvailable) {
const analysisData = results.map((job) => ({
text: `${job.title} at ${job.company}. ${job.description}`,
metadata: {
jobId: job.jobId,
company: job.company,
daysAgo: job.daysAgo,
},
}));
aiAnalysis = await analyzeBatch(analysisData, aiContext);
// Merge AI analysis with results
results.forEach((job, index) => {
if (aiAnalysis && aiAnalysis[index]) {
job.aiAnalysis = {
isRelevant: aiAnalysis[index].isRelevant,
confidence: aiAnalysis[index].confidence,
reasoning: aiAnalysis[index].reasoning,
};
}
});
logger.success("✅ AI analysis completed");
} else {
logger.warning("⚠️ AI not available - skipping analysis");
}
}
return {
results,
rejectedResults,
metadata: {
source: "skipthedrive",
totalJobs: results.length,
rejectedJobs: rejectedResults.length,
keywords: keywords,
jobTypes: jobTypes,
locationFilter: locationFilter,
aiAnalysisEnabled: enableAI,
aiAnalysisCompleted: !!aiAnalysis,
timestamp: new Date().toISOString(),
},
};
} catch (error) {
logger.error(`Fatal error in SkipTheDrive parser: ${error.message}`);
throw error;
} finally {
await browser.close();
}
}
// Export the parser
module.exports = {
parseSkipTheDrive,
buildSearchUrl,
extractJobData,
};

View File

@ -1,302 +1,302 @@
/**
* SkipTheDrive Parsing Strategy
*
* Uses core-parser for browser management and ai-analyzer for utilities
*/
const {
logger,
cleanText,
containsAnyKeyword,
validateLocationAgainstFilters,
} = require("ai-analyzer");
/**
* SkipTheDrive URL builder
*/
function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
const baseUrl = "https://www.skipthedrive.com/";
const params = new URLSearchParams({
s: keyword,
orderby: orderBy,
});
if (jobTypes && jobTypes.length > 0) {
params.append("job_type", jobTypes.join(","));
}
return `${baseUrl}?${params.toString()}`;
}
/**
* SkipTheDrive parsing strategy function
*/
async function skipthedriveStrategy(coreParser, options = {}) {
const {
keywords = ["software engineer", "developer", "programmer"],
locationFilter = null,
maxPages = 5,
jobTypes = [],
} = options;
const results = [];
const rejectedResults = [];
const seenJobs = new Set();
try {
// Create main page
const page = await coreParser.createPage("skipthedrive-main");
logger.info("🚀 Starting SkipTheDrive parser...");
logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
logger.info(`📄 Max Pages: ${maxPages}`);
// Search for each keyword
for (const keyword of keywords) {
logger.info(`\n🔍 Searching for: ${keyword}`);
const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
try {
// Navigate to search results
await coreParser.navigateTo(searchUrl, {
pageId: "skipthedrive-main",
retries: 2,
timeout: 30000,
});
// Wait for job listings to load
const hasResults = await coreParser
.waitForSelector(
"#loops-wrapper",
{
timeout: 5000,
},
"skipthedrive-main"
)
.catch(() => {
logger.warning(`No results found for keyword: ${keyword}`);
return false;
});
if (!hasResults) {
continue;
}
// Process multiple pages
let currentPage = 1;
let hasNextPage = true;
while (hasNextPage && currentPage <= maxPages) {
logger.info(`📄 Processing page ${currentPage} for "${keyword}"`);
// Extract jobs from current page
const pageJobs = await extractJobsFromPage(
page,
keyword,
locationFilter
);
for (const job of pageJobs) {
// Skip duplicates
if (seenJobs.has(job.jobId)) continue;
seenJobs.add(job.jobId);
// Validate location if filtering enabled
if (locationFilter) {
const locationValid = validateLocationAgainstFilters(
job.location,
locationFilter
);
if (!locationValid) {
rejectedResults.push({
...job,
rejectionReason: "Location filter mismatch",
});
continue;
}
}
results.push(job);
}
// Check for next page
hasNextPage = await hasNextPageAvailable(page);
if (hasNextPage && currentPage < maxPages) {
await navigateToNextPage(page, currentPage + 1);
currentPage++;
// Wait for new page to load
await page.waitForTimeout(2000);
} else {
hasNextPage = false;
}
}
} catch (error) {
logger.error(`Error processing keyword "${keyword}": ${error.message}`);
}
}
logger.info(
`🎯 SkipTheDrive parsing completed: ${results.length} jobs found, ${rejectedResults.length} rejected`
);
return {
results,
rejectedResults,
summary: {
totalJobs: results.length,
totalRejected: rejectedResults.length,
keywords: keywords.join(", "),
locationFilter,
source: "skipthedrive",
},
};
} catch (error) {
logger.error(`❌ SkipTheDrive parsing failed: ${error.message}`);
throw error;
}
}
/**
* Extract jobs from current page
*/
async function extractJobsFromPage(page, keyword, locationFilter) {
const jobs = [];
try {
// Get all job article elements
const jobElements = await page.$$("article.job_listing");
for (const jobElement of jobElements) {
try {
const job = await extractJobData(jobElement, keyword);
if (job) {
jobs.push(job);
}
} catch (error) {
logger.warning(`Failed to extract job data: ${error.message}`);
}
}
} catch (error) {
logger.error(`Failed to extract jobs from page: ${error.message}`);
}
return jobs;
}
/**
* Extract data from individual job element
*/
async function extractJobData(jobElement, keyword) {
try {
// Extract job ID
const articleId = (await jobElement.getAttribute("id")) || "";
const jobId = articleId ? articleId.replace("post-", "") : "";
// Extract title
const titleElement = await jobElement.$(".job_listing-title a");
const title = titleElement
? cleanText(await titleElement.textContent())
: "";
const jobUrl = titleElement ? await titleElement.getAttribute("href") : "";
// Extract company
const companyElement = await jobElement.$(".company");
const company = companyElement
? cleanText(await companyElement.textContent())
: "";
// Extract location
const locationElement = await jobElement.$(".location");
const location = locationElement
? cleanText(await locationElement.textContent())
: "";
// Extract date posted
const dateElement = await jobElement.$(".job-date");
const dateText = dateElement
? cleanText(await dateElement.textContent())
: "";
// Extract description
const descElement = await jobElement.$(".job_listing-description");
const description = descElement
? cleanText(await descElement.textContent())
: "";
// Check if featured
const featuredElement = await jobElement.$(".featured");
const isFeatured = featuredElement !== null;
// Parse date
let datePosted = null;
let daysAgo = null;
if (dateText) {
const match = dateText.match(/(\d+)\s+days?\s+ago/);
if (match) {
daysAgo = parseInt(match[1]);
const date = new Date();
date.setDate(date.getDate() - daysAgo);
datePosted = date.toISOString().split("T")[0];
}
}
return {
jobId,
title,
company,
location,
jobUrl,
datePosted,
dateText,
daysAgo,
description,
isFeatured,
keyword,
extractedAt: new Date().toISOString(),
source: "skipthedrive",
};
} catch (error) {
logger.warning(`Error extracting job data: ${error.message}`);
return null;
}
}
/**
* Check if next page is available
*/
async function hasNextPageAvailable(page) {
try {
const nextButton = await page.$(".next-page");
return nextButton !== null;
} catch {
return false;
}
}
/**
* Navigate to next page
*/
async function navigateToNextPage(page, pageNumber) {
try {
const nextButton = await page.$(".next-page");
if (nextButton) {
await nextButton.click();
}
} catch (error) {
logger.warning(
`Failed to navigate to page ${pageNumber}: ${error.message}`
);
}
}
module.exports = {
skipthedriveStrategy,
buildSearchUrl,
extractJobsFromPage,
extractJobData,
};
/**
* SkipTheDrive Parsing Strategy
*
* Uses core-parser for browser management and ai-analyzer for utilities
*/
const {
logger,
cleanText,
containsAnyKeyword,
validateLocationAgainstFilters,
} = require("ai-analyzer");
/**
* SkipTheDrive URL builder
*/
function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
const baseUrl = "https://www.skipthedrive.com/";
const params = new URLSearchParams({
s: keyword,
orderby: orderBy,
});
if (jobTypes && jobTypes.length > 0) {
params.append("job_type", jobTypes.join(","));
}
return `${baseUrl}?${params.toString()}`;
}
/**
* SkipTheDrive parsing strategy function
*/
async function skipthedriveStrategy(coreParser, options = {}) {
const {
keywords = ["software engineer", "developer", "programmer"],
locationFilter = null,
maxPages = 5,
jobTypes = [],
} = options;
const results = [];
const rejectedResults = [];
const seenJobs = new Set();
try {
// Create main page
const page = await coreParser.createPage("skipthedrive-main");
logger.info("🚀 Starting SkipTheDrive parser...");
logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
logger.info(`📄 Max Pages: ${maxPages}`);
// Search for each keyword
for (const keyword of keywords) {
logger.info(`\n🔍 Searching for: ${keyword}`);
const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
try {
// Navigate to search results
await coreParser.navigateTo(searchUrl, {
pageId: "skipthedrive-main",
retries: 2,
timeout: 30000,
});
// Wait for job listings to load
const hasResults = await coreParser
.waitForSelector(
"#loops-wrapper",
{
timeout: 5000,
},
"skipthedrive-main"
)
.catch(() => {
logger.warning(`No results found for keyword: ${keyword}`);
return false;
});
if (!hasResults) {
continue;
}
// Process multiple pages
let currentPage = 1;
let hasNextPage = true;
while (hasNextPage && currentPage <= maxPages) {
logger.info(`📄 Processing page ${currentPage} for "${keyword}"`);
// Extract jobs from current page
const pageJobs = await extractJobsFromPage(
page,
keyword,
locationFilter
);
for (const job of pageJobs) {
// Skip duplicates
if (seenJobs.has(job.jobId)) continue;
seenJobs.add(job.jobId);
// Validate location if filtering enabled
if (locationFilter) {
const locationValid = validateLocationAgainstFilters(
job.location,
locationFilter
);
if (!locationValid) {
rejectedResults.push({
...job,
rejectionReason: "Location filter mismatch",
});
continue;
}
}
results.push(job);
}
// Check for next page
hasNextPage = await hasNextPageAvailable(page);
if (hasNextPage && currentPage < maxPages) {
await navigateToNextPage(page, currentPage + 1);
currentPage++;
// Wait for new page to load
await page.waitForTimeout(2000);
} else {
hasNextPage = false;
}
}
} catch (error) {
logger.error(`Error processing keyword "${keyword}": ${error.message}`);
}
}
logger.info(
`🎯 SkipTheDrive parsing completed: ${results.length} jobs found, ${rejectedResults.length} rejected`
);
return {
results,
rejectedResults,
summary: {
totalJobs: results.length,
totalRejected: rejectedResults.length,
keywords: keywords.join(", "),
locationFilter,
source: "skipthedrive",
},
};
} catch (error) {
logger.error(`❌ SkipTheDrive parsing failed: ${error.message}`);
throw error;
}
}
/**
* Extract jobs from current page
*/
async function extractJobsFromPage(page, keyword, locationFilter) {
const jobs = [];
try {
// Get all job article elements
const jobElements = await page.$$("article.job_listing");
for (const jobElement of jobElements) {
try {
const job = await extractJobData(jobElement, keyword);
if (job) {
jobs.push(job);
}
} catch (error) {
logger.warning(`Failed to extract job data: ${error.message}`);
}
}
} catch (error) {
logger.error(`Failed to extract jobs from page: ${error.message}`);
}
return jobs;
}
/**
* Extract data from individual job element
*/
async function extractJobData(jobElement, keyword) {
try {
// Extract job ID
const articleId = (await jobElement.getAttribute("id")) || "";
const jobId = articleId ? articleId.replace("post-", "") : "";
// Extract title
const titleElement = await jobElement.$(".job_listing-title a");
const title = titleElement
? cleanText(await titleElement.textContent())
: "";
const jobUrl = titleElement ? await titleElement.getAttribute("href") : "";
// Extract company
const companyElement = await jobElement.$(".company");
const company = companyElement
? cleanText(await companyElement.textContent())
: "";
// Extract location
const locationElement = await jobElement.$(".location");
const location = locationElement
? cleanText(await locationElement.textContent())
: "";
// Extract date posted
const dateElement = await jobElement.$(".job-date");
const dateText = dateElement
? cleanText(await dateElement.textContent())
: "";
// Extract description
const descElement = await jobElement.$(".job_listing-description");
const description = descElement
? cleanText(await descElement.textContent())
: "";
// Check if featured
const featuredElement = await jobElement.$(".featured");
const isFeatured = featuredElement !== null;
// Parse date
let datePosted = null;
let daysAgo = null;
if (dateText) {
const match = dateText.match(/(\d+)\s+days?\s+ago/);
if (match) {
daysAgo = parseInt(match[1]);
const date = new Date();
date.setDate(date.getDate() - daysAgo);
datePosted = date.toISOString().split("T")[0];
}
}
return {
jobId,
title,
company,
location,
jobUrl,
datePosted,
dateText,
daysAgo,
description,
isFeatured,
keyword,
extractedAt: new Date().toISOString(),
source: "skipthedrive",
};
} catch (error) {
logger.warning(`Error extracting job data: ${error.message}`);
return null;
}
}
/**
* Check if next page is available
*/
async function hasNextPageAvailable(page) {
try {
const nextButton = await page.$(".next-page");
return nextButton !== null;
} catch {
return false;
}
}
/**
* Navigate to next page
*/
async function navigateToNextPage(page, pageNumber) {
try {
const nextButton = await page.$(".next-page");
if (nextButton) {
await nextButton.click();
}
} catch (error) {
logger.warning(
`Failed to navigate to page ${pageNumber}: ${error.message}`
);
}
}
module.exports = {
skipthedriveStrategy,
buildSearchUrl,
extractJobsFromPage,
extractJobData,
};

View File

@ -1,412 +1,412 @@
/**
* LinkedIn Parser Demo
*
* Demonstrates the LinkedIn Parser's capabilities for scraping LinkedIn content
* with keyword-based searching, location filtering, and AI analysis.
*
* This demo uses simulated data for safety and demonstration purposes.
*/
const { logger } = require("../ai-analyzer");
const fs = require("fs");
const path = require("path");
// Terminal colors for demo output
const colors = {
reset: "\x1b[0m",
bright: "\x1b[1m",
cyan: "\x1b[36m",
green: "\x1b[32m",
yellow: "\x1b[33m",
blue: "\x1b[34m",
magenta: "\x1b[35m",
red: "\x1b[31m",
};
const demo = {
title: (text) =>
console.log(`\n${colors.bright}${colors.cyan}${text}${colors.reset}`),
section: (text) =>
console.log(`\n${colors.bright}${colors.magenta}${text}${colors.reset}`),
success: (text) => console.log(`${colors.green}${text}${colors.reset}`),
info: (text) => console.log(`${colors.blue} ${text}${colors.reset}`),
warning: (text) => console.log(`${colors.yellow}⚠️ ${text}${colors.reset}`),
error: (text) => console.log(`${colors.red}${text}${colors.reset}`),
code: (text) => console.log(`${colors.cyan}${text}${colors.reset}`),
};
// Mock data for demonstration
const mockPosts = [
{
id: "post_1",
content:
"Just got laid off from my software engineering role at TechCorp. Looking for new opportunities in Toronto. This is really tough but I'm staying positive!",
original_content:
"Just got #laidoff from my software engineering role at TechCorp! Looking for new opportunities in #Toronto. This is really tough but I'm staying positive! 🚀",
author: {
name: "John Doe",
title: "Software Engineer",
company: "TechCorp",
location: "Toronto, Ontario, Canada",
profile_url: "https://linkedin.com/in/johndoe",
},
engagement: { likes: 45, comments: 12, shares: 3 },
metadata: {
post_date: "2024-01-10T14:30:00Z",
scraped_at: "2024-01-15T10:30:00Z",
search_keyword: "layoff",
location_validated: true,
},
},
{
id: "post_2",
content:
"Our company is downsizing and I'm affected. This is really tough news but I'm grateful for the time I had here.",
original_content:
"Our company is #downsizing and I'm affected. This is really tough news but I'm grateful for the time I had here. #RIF #layoff",
author: {
name: "Jane Smith",
title: "Product Manager",
company: "StartupXYZ",
location: "Vancouver, British Columbia, Canada",
profile_url: "https://linkedin.com/in/janesmith",
},
engagement: { likes: 23, comments: 8, shares: 1 },
metadata: {
post_date: "2024-01-09T16:45:00Z",
scraped_at: "2024-01-15T10:30:00Z",
search_keyword: "downsizing",
location_validated: true,
},
},
{
id: "post_3",
content:
"Open to work! Looking for new opportunities in software development. I have 5 years of experience in React, Node.js, and cloud technologies.",
original_content:
"Open to work! Looking for new opportunities in software development. I have 5 years of experience in #React, #NodeJS, and #cloud technologies. #opentowork #jobsearch",
author: {
name: "Bob Wilson",
title: "Full Stack Developer",
company: "Freelance",
location: "Calgary, Alberta, Canada",
profile_url: "https://linkedin.com/in/bobwilson",
},
engagement: { likes: 67, comments: 15, shares: 8 },
metadata: {
post_date: "2024-01-08T11:20:00Z",
scraped_at: "2024-01-15T10:30:00Z",
search_keyword: "open to work",
location_validated: true,
},
},
];
async function runDemo() {
demo.title("=== LinkedIn Parser Demo ===");
demo.info(
"This demo showcases the LinkedIn Parser's capabilities for scraping LinkedIn content."
);
demo.info("All data shown is simulated for demonstration purposes.");
demo.info("Press Enter to continue through each section...\n");
await waitForEnter();
// 1. Configuration Demo
await demonstrateConfiguration();
// 2. Keyword Loading Demo
await demonstrateKeywordLoading();
// 3. Search Process Demo
await demonstrateSearchProcess();
// 4. Location Filtering Demo
await demonstrateLocationFiltering();
// 5. AI Analysis Demo
await demonstrateAIAnalysis();
// 6. Output Generation Demo
await demonstrateOutputGeneration();
demo.title("=== Demo Complete ===");
demo.success("LinkedIn Parser demo completed successfully!");
demo.info("Check the README.md for detailed usage instructions.");
}
async function demonstrateConfiguration() {
demo.section("1. Configuration Setup");
demo.info(
"The LinkedIn Parser uses environment variables and command-line options for configuration."
);
demo.code("// Environment Variables (.env file)");
demo.info("LINKEDIN_USERNAME=your_email@example.com");
demo.info("LINKEDIN_PASSWORD=your_password");
demo.info("CITY=Toronto");
demo.info("DATE_POSTED=past-week");
demo.info("SORT_BY=date_posted");
demo.info("WHEELS=5");
demo.info("LOCATION_FILTER=Ontario,Manitoba");
demo.info("ENABLE_LOCATION_CHECK=true");
demo.info("ENABLE_LOCAL_AI=true");
demo.info('AI_CONTEXT="job layoffs and workforce reduction"');
demo.info("OLLAMA_MODEL=mistral");
demo.code("// Command Line Options");
demo.info('node index.js --keyword="layoff,downsizing" --city="Vancouver"');
demo.info("node index.js --no-location --no-ai");
demo.info("node index.js --output=results/my-results.json");
demo.info("node index.js --ai-after");
await waitForEnter();
}
async function demonstrateKeywordLoading() {
demo.section("2. Keyword Loading");
demo.info(
"Keywords can be loaded from CSV files or specified via command line."
);
// Simulate loading keywords from CSV
demo.code("// Loading keywords from CSV file");
logger.step("Loading keywords from keywords/linkedin-keywords.csv");
const keywords = [
"layoff",
"downsizing",
"reduction in force",
"RIF",
"termination",
"job loss",
"workforce reduction",
"open to work",
"actively seeking",
"job search",
];
demo.success(`Loaded ${keywords.length} keywords from CSV file`);
demo.info("Keywords: " + keywords.slice(0, 5).join(", ") + "...");
demo.code("// Command line keyword override");
demo.info('node index.js --keyword="layoff,downsizing"');
demo.info('node index.js --add-keyword="hiring freeze"');
await waitForEnter();
}
async function demonstrateSearchProcess() {
demo.section("3. Search Process Simulation");
demo.info(
"The parser performs automated LinkedIn searches for each keyword."
);
const keywords = ["layoff", "downsizing", "open to work"];
for (const keyword of keywords) {
demo.code(`// Searching for keyword: "${keyword}"`);
logger.search(`Searching for "${keyword}" in Toronto`);
// Simulate search process
await simulateSearch();
const foundCount = Math.floor(Math.random() * 50) + 10;
const acceptedCount = Math.floor(foundCount * 0.3);
logger.info(`Found ${foundCount} posts, checking profiles for location...`);
logger.success(`Accepted ${acceptedCount} posts after location validation`);
console.log();
}
await waitForEnter();
}
async function demonstrateLocationFiltering() {
demo.section("4. Location Filtering");
demo.info(
"Posts are filtered based on author location using geographic validation."
);
demo.code("// Location filter configuration");
demo.info("LOCATION_FILTER=Ontario,Manitoba");
demo.info("ENABLE_LOCATION_CHECK=true");
demo.code("// Location validation examples");
const testLocations = [
{ location: "Toronto, Ontario, Canada", valid: true },
{ location: "Vancouver, British Columbia, Canada", valid: false },
{ location: "Calgary, Alberta, Canada", valid: false },
{ location: "Winnipeg, Manitoba, Canada", valid: true },
{ location: "New York, NY, USA", valid: false },
];
testLocations.forEach(({ location, valid }) => {
logger.location(`Checking location: ${location}`);
if (valid) {
logger.success(`✅ Location valid - post accepted`);
} else {
logger.warning(`❌ Location invalid - post rejected`);
}
});
await waitForEnter();
}
async function demonstrateAIAnalysis() {
demo.section("5. AI Analysis");
demo.info(
"Posts can be analyzed using local Ollama or OpenAI for relevance scoring."
);
demo.code("// AI analysis configuration");
demo.info("ENABLE_LOCAL_AI=true");
demo.info('AI_CONTEXT="job layoffs and workforce reduction"');
demo.info("OLLAMA_MODEL=mistral");
demo.code("// Analyzing posts with AI");
logger.ai("Starting AI analysis of accepted posts...");
for (let i = 0; i < mockPosts.length; i++) {
const post = mockPosts[i];
logger.info(`Analyzing post ${i + 1}: ${post.content.substring(0, 50)}...`);
// Simulate AI analysis
await simulateProcessing();
const relevanceScore = 0.7 + Math.random() * 0.3;
const confidence = 0.8 + Math.random() * 0.2;
logger.success(
`Relevance: ${relevanceScore.toFixed(
2
)}, Confidence: ${confidence.toFixed(2)}`
);
// Add AI analysis to post
post.ai_analysis = {
relevance_score: relevanceScore,
confidence: confidence,
context_match: relevanceScore > 0.7,
analysis_text: `This post discusses ${post.metadata.search_keyword} and is relevant to the search context.`,
};
}
await waitForEnter();
}
async function demonstrateOutputGeneration() {
demo.section("6. Output Generation");
demo.info("Results are saved to JSON files with comprehensive metadata.");
demo.code("// Generating output file");
logger.file("Saving results to JSON file...");
const outputData = {
metadata: {
timestamp: new Date().toISOString(),
keywords: ["layoff", "downsizing", "open to work"],
city: "Toronto",
date_posted: "past-week",
sort_by: "date_posted",
total_posts_found: 150,
accepted_posts: mockPosts.length,
rejected_posts: 147,
processing_time_seconds: 180,
},
posts: mockPosts,
};
// Save to demo file
const outputPath = path.join(__dirname, "demo-results.json");
fs.writeFileSync(outputPath, JSON.stringify(outputData, null, 2));
demo.success(`Results saved to: ${outputPath}`);
demo.info(`Total posts processed: ${outputData.metadata.total_posts_found}`);
demo.info(`Posts accepted: ${outputData.metadata.accepted_posts}`);
demo.info(`Posts rejected: ${outputData.metadata.rejected_posts}`);
demo.code("// Output file structure");
demo.info("📁 demo-results.json");
demo.info(" ├── metadata");
demo.info(" │ ├── timestamp");
demo.info(" │ ├── keywords");
demo.info(" │ ├── city");
demo.info(" │ ├── total_posts_found");
demo.info(" │ ├── accepted_posts");
demo.info(" │ └── processing_time_seconds");
demo.info(" └── posts[]");
demo.info(" ├── id");
demo.info(" ├── content");
demo.info(" ├── author");
demo.info(" ├── engagement");
demo.info(" ├── ai_analysis");
demo.info(" └── metadata");
await waitForEnter();
}
// Helper functions
function waitForEnter() {
return new Promise((resolve) => {
const readline = require("readline");
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
});
rl.question("\nPress Enter to continue...", () => {
rl.close();
resolve();
});
});
}
async function simulateSearch() {
return new Promise((resolve) => {
const steps = [
"Launching browser",
"Logging in",
"Navigating to search",
"Loading results",
];
let i = 0;
const interval = setInterval(() => {
if (i < steps.length) {
logger.info(steps[i]);
i++;
} else {
clearInterval(interval);
resolve();
}
}, 800);
});
}
async function simulateProcessing() {
return new Promise((resolve) => {
const dots = [".", "..", "..."];
let i = 0;
const interval = setInterval(() => {
process.stdout.write(`\rProcessing${dots[i]}`);
i = (i + 1) % dots.length;
}, 500);
setTimeout(() => {
clearInterval(interval);
process.stdout.write("\r");
resolve();
}, 1500);
});
}
// Run the demo if this file is executed directly
if (require.main === module) {
runDemo().catch((error) => {
demo.error(`Demo failed: ${error.message}`);
process.exit(1);
});
}
module.exports = { runDemo };
/**
* LinkedIn Parser Demo
*
* Demonstrates the LinkedIn Parser's capabilities for scraping LinkedIn content
* with keyword-based searching, location filtering, and AI analysis.
*
* This demo uses simulated data for safety and demonstration purposes.
*/
const { logger } = require("../ai-analyzer");
const fs = require("fs");
const path = require("path");
// Terminal colors for demo output
const colors = {
reset: "\x1b[0m",
bright: "\x1b[1m",
cyan: "\x1b[36m",
green: "\x1b[32m",
yellow: "\x1b[33m",
blue: "\x1b[34m",
magenta: "\x1b[35m",
red: "\x1b[31m",
};
const demo = {
title: (text) =>
console.log(`\n${colors.bright}${colors.cyan}${text}${colors.reset}`),
section: (text) =>
console.log(`\n${colors.bright}${colors.magenta}${text}${colors.reset}`),
success: (text) => console.log(`${colors.green}${text}${colors.reset}`),
info: (text) => console.log(`${colors.blue} ${text}${colors.reset}`),
warning: (text) => console.log(`${colors.yellow}⚠️ ${text}${colors.reset}`),
error: (text) => console.log(`${colors.red}${text}${colors.reset}`),
code: (text) => console.log(`${colors.cyan}${text}${colors.reset}`),
};
// Mock data for demonstration
const mockPosts = [
{
id: "post_1",
content:
"Just got laid off from my software engineering role at TechCorp. Looking for new opportunities in Toronto. This is really tough but I'm staying positive!",
original_content:
"Just got #laidoff from my software engineering role at TechCorp! Looking for new opportunities in #Toronto. This is really tough but I'm staying positive! 🚀",
author: {
name: "John Doe",
title: "Software Engineer",
company: "TechCorp",
location: "Toronto, Ontario, Canada",
profile_url: "https://linkedin.com/in/johndoe",
},
engagement: { likes: 45, comments: 12, shares: 3 },
metadata: {
post_date: "2024-01-10T14:30:00Z",
scraped_at: "2024-01-15T10:30:00Z",
search_keyword: "layoff",
location_validated: true,
},
},
{
id: "post_2",
content:
"Our company is downsizing and I'm affected. This is really tough news but I'm grateful for the time I had here.",
original_content:
"Our company is #downsizing and I'm affected. This is really tough news but I'm grateful for the time I had here. #RIF #layoff",
author: {
name: "Jane Smith",
title: "Product Manager",
company: "StartupXYZ",
location: "Vancouver, British Columbia, Canada",
profile_url: "https://linkedin.com/in/janesmith",
},
engagement: { likes: 23, comments: 8, shares: 1 },
metadata: {
post_date: "2024-01-09T16:45:00Z",
scraped_at: "2024-01-15T10:30:00Z",
search_keyword: "downsizing",
location_validated: true,
},
},
{
id: "post_3",
content:
"Open to work! Looking for new opportunities in software development. I have 5 years of experience in React, Node.js, and cloud technologies.",
original_content:
"Open to work! Looking for new opportunities in software development. I have 5 years of experience in #React, #NodeJS, and #cloud technologies. #opentowork #jobsearch",
author: {
name: "Bob Wilson",
title: "Full Stack Developer",
company: "Freelance",
location: "Calgary, Alberta, Canada",
profile_url: "https://linkedin.com/in/bobwilson",
},
engagement: { likes: 67, comments: 15, shares: 8 },
metadata: {
post_date: "2024-01-08T11:20:00Z",
scraped_at: "2024-01-15T10:30:00Z",
search_keyword: "open to work",
location_validated: true,
},
},
];
async function runDemo() {
demo.title("=== LinkedIn Parser Demo ===");
demo.info(
"This demo showcases the LinkedIn Parser's capabilities for scraping LinkedIn content."
);
demo.info("All data shown is simulated for demonstration purposes.");
demo.info("Press Enter to continue through each section...\n");
await waitForEnter();
// 1. Configuration Demo
await demonstrateConfiguration();
// 2. Keyword Loading Demo
await demonstrateKeywordLoading();
// 3. Search Process Demo
await demonstrateSearchProcess();
// 4. Location Filtering Demo
await demonstrateLocationFiltering();
// 5. AI Analysis Demo
await demonstrateAIAnalysis();
// 6. Output Generation Demo
await demonstrateOutputGeneration();
demo.title("=== Demo Complete ===");
demo.success("LinkedIn Parser demo completed successfully!");
demo.info("Check the README.md for detailed usage instructions.");
}
async function demonstrateConfiguration() {
demo.section("1. Configuration Setup");
demo.info(
"The LinkedIn Parser uses environment variables and command-line options for configuration."
);
demo.code("// Environment Variables (.env file)");
demo.info("LINKEDIN_USERNAME=your_email@example.com");
demo.info("LINKEDIN_PASSWORD=your_password");
demo.info("CITY=Toronto");
demo.info("DATE_POSTED=past-week");
demo.info("SORT_BY=date_posted");
demo.info("WHEELS=5");
demo.info("LOCATION_FILTER=Ontario,Manitoba");
demo.info("ENABLE_LOCATION_CHECK=true");
demo.info("ENABLE_LOCAL_AI=true");
demo.info('AI_CONTEXT="job layoffs and workforce reduction"');
demo.info("OLLAMA_MODEL=mistral");
demo.code("// Command Line Options");
demo.info('node index.js --keyword="layoff,downsizing" --city="Vancouver"');
demo.info("node index.js --no-location --no-ai");
demo.info("node index.js --output=results/my-results.json");
demo.info("node index.js --ai-after");
await waitForEnter();
}
async function demonstrateKeywordLoading() {
demo.section("2. Keyword Loading");
demo.info(
"Keywords can be loaded from CSV files or specified via command line."
);
// Simulate loading keywords from CSV
demo.code("// Loading keywords from CSV file");
logger.step("Loading keywords from keywords/linkedin-keywords.csv");
const keywords = [
"layoff",
"downsizing",
"reduction in force",
"RIF",
"termination",
"job loss",
"workforce reduction",
"open to work",
"actively seeking",
"job search",
];
demo.success(`Loaded ${keywords.length} keywords from CSV file`);
demo.info("Keywords: " + keywords.slice(0, 5).join(", ") + "...");
demo.code("// Command line keyword override");
demo.info('node index.js --keyword="layoff,downsizing"');
demo.info('node index.js --add-keyword="hiring freeze"');
await waitForEnter();
}
async function demonstrateSearchProcess() {
demo.section("3. Search Process Simulation");
demo.info(
"The parser performs automated LinkedIn searches for each keyword."
);
const keywords = ["layoff", "downsizing", "open to work"];
for (const keyword of keywords) {
demo.code(`// Searching for keyword: "${keyword}"`);
logger.search(`Searching for "${keyword}" in Toronto`);
// Simulate search process
await simulateSearch();
const foundCount = Math.floor(Math.random() * 50) + 10;
const acceptedCount = Math.floor(foundCount * 0.3);
logger.info(`Found ${foundCount} posts, checking profiles for location...`);
logger.success(`Accepted ${acceptedCount} posts after location validation`);
console.log();
}
await waitForEnter();
}
async function demonstrateLocationFiltering() {
demo.section("4. Location Filtering");
demo.info(
"Posts are filtered based on author location using geographic validation."
);
demo.code("// Location filter configuration");
demo.info("LOCATION_FILTER=Ontario,Manitoba");
demo.info("ENABLE_LOCATION_CHECK=true");
demo.code("// Location validation examples");
const testLocations = [
{ location: "Toronto, Ontario, Canada", valid: true },
{ location: "Vancouver, British Columbia, Canada", valid: false },
{ location: "Calgary, Alberta, Canada", valid: false },
{ location: "Winnipeg, Manitoba, Canada", valid: true },
{ location: "New York, NY, USA", valid: false },
];
testLocations.forEach(({ location, valid }) => {
logger.location(`Checking location: ${location}`);
if (valid) {
logger.success(`✅ Location valid - post accepted`);
} else {
logger.warning(`❌ Location invalid - post rejected`);
}
});
await waitForEnter();
}
async function demonstrateAIAnalysis() {
demo.section("5. AI Analysis");
demo.info(
"Posts can be analyzed using local Ollama or OpenAI for relevance scoring."
);
demo.code("// AI analysis configuration");
demo.info("ENABLE_LOCAL_AI=true");
demo.info('AI_CONTEXT="job layoffs and workforce reduction"');
demo.info("OLLAMA_MODEL=mistral");
demo.code("// Analyzing posts with AI");
logger.ai("Starting AI analysis of accepted posts...");
for (let i = 0; i < mockPosts.length; i++) {
const post = mockPosts[i];
logger.info(`Analyzing post ${i + 1}: ${post.content.substring(0, 50)}...`);
// Simulate AI analysis
await simulateProcessing();
const relevanceScore = 0.7 + Math.random() * 0.3;
const confidence = 0.8 + Math.random() * 0.2;
logger.success(
`Relevance: ${relevanceScore.toFixed(
2
)}, Confidence: ${confidence.toFixed(2)}`
);
// Add AI analysis to post
post.ai_analysis = {
relevance_score: relevanceScore,
confidence: confidence,
context_match: relevanceScore > 0.7,
analysis_text: `This post discusses ${post.metadata.search_keyword} and is relevant to the search context.`,
};
}
await waitForEnter();
}
async function demonstrateOutputGeneration() {
demo.section("6. Output Generation");
demo.info("Results are saved to JSON files with comprehensive metadata.");
demo.code("// Generating output file");
logger.file("Saving results to JSON file...");
const outputData = {
metadata: {
timestamp: new Date().toISOString(),
keywords: ["layoff", "downsizing", "open to work"],
city: "Toronto",
date_posted: "past-week",
sort_by: "date_posted",
total_posts_found: 150,
accepted_posts: mockPosts.length,
rejected_posts: 147,
processing_time_seconds: 180,
},
posts: mockPosts,
};
// Save to demo file
const outputPath = path.join(__dirname, "demo-results.json");
fs.writeFileSync(outputPath, JSON.stringify(outputData, null, 2));
demo.success(`Results saved to: ${outputPath}`);
demo.info(`Total posts processed: ${outputData.metadata.total_posts_found}`);
demo.info(`Posts accepted: ${outputData.metadata.accepted_posts}`);
demo.info(`Posts rejected: ${outputData.metadata.rejected_posts}`);
demo.code("// Output file structure");
demo.info("📁 demo-results.json");
demo.info(" ├── metadata");
demo.info(" │ ├── timestamp");
demo.info(" │ ├── keywords");
demo.info(" │ ├── city");
demo.info(" │ ├── total_posts_found");
demo.info(" │ ├── accepted_posts");
demo.info(" │ └── processing_time_seconds");
demo.info(" └── posts[]");
demo.info(" ├── id");
demo.info(" ├── content");
demo.info(" ├── author");
demo.info(" ├── engagement");
demo.info(" ├── ai_analysis");
demo.info(" └── metadata");
await waitForEnter();
}
// Helper functions
function waitForEnter() {
return new Promise((resolve) => {
const readline = require("readline");
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
});
rl.question("\nPress Enter to continue...", () => {
rl.close();
resolve();
});
});
}
async function simulateSearch() {
return new Promise((resolve) => {
const steps = [
"Launching browser",
"Logging in",
"Navigating to search",
"Loading results",
];
let i = 0;
const interval = setInterval(() => {
if (i < steps.length) {
logger.info(steps[i]);
i++;
} else {
clearInterval(interval);
resolve();
}
}, 800);
});
}
async function simulateProcessing() {
return new Promise((resolve) => {
const dots = [".", "..", "..."];
let i = 0;
const interval = setInterval(() => {
process.stdout.write(`\rProcessing${dots[i]}`);
i = (i + 1) % dots.length;
}, 500);
setTimeout(() => {
clearInterval(interval);
process.stdout.write("\r");
resolve();
}, 1500);
});
}
// Run the demo if this file is executed directly
if (require.main === module) {
runDemo().catch((error) => {
demo.error(`Demo failed: ${error.message}`);
process.exit(1);
});
}
module.exports = { runDemo };

View File

@ -1,51 +1,51 @@
keyword
acquisition
actively seeking
bankruptcy
business realignment
career transition
company closure
company reorganization
cost cutting
department closure
downsizing
furlough
headcount reduction
hiring
hiring freeze
involuntary separation
job cuts
job elimination
job loss
job opportunity
job search
layoff
looking for opportunities
mass layoff
merger
new position
new role
office closure
open to work
organizational change
outplacement
plant closure
position elimination
recruiting
reduction in force
redundancies
redundancy
restructuring
rightsizing
RIF
role elimination
separation
site closure
staff reduction
terminated
termination
voluntary separation
workforce adjustment
workforce optimization
workforce reduction
workforce transition
keyword
acquisition
actively seeking
bankruptcy
business realignment
career transition
company closure
company reorganization
cost cutting
department closure
downsizing
furlough
headcount reduction
hiring
hiring freeze
involuntary separation
job cuts
job elimination
job loss
job opportunity
job search
layoff
looking for opportunities
mass layoff
merger
new position
new role
office closure
open to work
organizational change
outplacement
plant closure
position elimination
recruiting
reduction in force
redundancies
redundancy
restructuring
rightsizing
RIF
role elimination
separation
site closure
staff reduction
terminated
termination
voluntary separation
workforce adjustment
workforce optimization
workforce reduction
workforce transition

1 keyword
2 acquisition
3 actively seeking
4 bankruptcy
5 business realignment
6 career transition
7 company closure
8 company reorganization
9 cost cutting
10 department closure
11 downsizing
12 furlough
13 headcount reduction
14 hiring
15 hiring freeze
16 involuntary separation
17 job cuts
18 job elimination
19 job loss
20 job opportunity
21 job search
22 layoff
23 looking for opportunities
24 mass layoff
25 merger
26 new position
27 new role
28 office closure
29 open to work
30 organizational change
31 outplacement
32 plant closure
33 position elimination
34 recruiting
35 reduction in force
36 redundancies
37 redundancy
38 restructuring
39 rightsizing
40 RIF
41 role elimination
42 separation
43 site closure
44 staff reduction
45 terminated
46 termination
47 voluntary separation
48 workforce adjustment
49 workforce optimization
50 workforce reduction
51 workforce transition

View File

@ -1,230 +1,230 @@
/**
* LinkedIn Parsing Strategy
*
* Uses core-parser for browser management and ai-analyzer for utilities
*/
const {
logger,
cleanText,
containsAnyKeyword,
validateLocationAgainstFilters,
extractLocationFromProfile,
} = require("ai-analyzer");
/**
* LinkedIn parsing strategy function
*/
async function linkedinStrategy(coreParser, options = {}) {
const {
keywords = ["layoff", "downsizing", "job cuts"],
locationFilter = null,
maxResults = 50,
credentials = {},
} = options;
const results = [];
const rejectedResults = [];
const seenPosts = new Set();
const seenProfiles = new Set();
try {
// Create main page
const page = await coreParser.createPage("linkedin-main");
// Authenticate to LinkedIn
logger.info("🔐 Authenticating to LinkedIn...");
await coreParser.authenticate("linkedin", credentials, "linkedin-main");
logger.info("✅ LinkedIn authentication successful");
// Search for posts with each keyword
for (const keyword of keywords) {
logger.info(`🔍 Searching LinkedIn for: "${keyword}"`);
const searchUrl = `https://www.linkedin.com/search/results/content/?keywords=${encodeURIComponent(
keyword
)}&sortBy=date_posted`;
await coreParser.navigateTo(searchUrl, {
pageId: "linkedin-main",
retries: 2,
});
// Wait for search results
const hasResults = await coreParser.navigationManager.navigateAndWaitFor(
searchUrl,
".search-results-container",
{ pageId: "linkedin-main", timeout: 10000 }
);
if (!hasResults) {
logger.warning(`No search results found for keyword: ${keyword}`);
continue;
}
// Extract posts from current page
const posts = await extractPostsFromPage(page, keyword);
for (const post of posts) {
// Skip duplicates
if (seenPosts.has(post.postId)) continue;
seenPosts.add(post.postId);
// Validate location if filtering enabled
if (locationFilter) {
const locationValid = validateLocationAgainstFilters(
post.location || post.profileLocation,
locationFilter
);
if (!locationValid) {
rejectedResults.push({
...post,
rejectionReason: "Location filter mismatch",
});
continue;
}
}
results.push(post);
if (results.length >= maxResults) {
logger.info(`📊 Reached maximum results limit: ${maxResults}`);
break;
}
}
if (results.length >= maxResults) break;
}
logger.info(
`🎯 LinkedIn parsing completed: ${results.length} posts found, ${rejectedResults.length} rejected`
);
return {
results,
rejectedResults,
summary: {
totalPosts: results.length,
totalRejected: rejectedResults.length,
keywords: keywords.join(", "),
locationFilter,
},
};
} catch (error) {
logger.error(`❌ LinkedIn parsing failed: ${error.message}`);
throw error;
}
}
/**
* Extract posts from current search results page
*/
async function extractPostsFromPage(page, keyword) {
const posts = [];
try {
// Get all post elements
const postElements = await page.$$(".feed-shared-update-v2");
for (const postElement of postElements) {
try {
const post = await extractPostData(postElement, keyword);
if (post) {
posts.push(post);
}
} catch (error) {
logger.warning(`Failed to extract post data: ${error.message}`);
}
}
} catch (error) {
logger.error(`Failed to extract posts from page: ${error.message}`);
}
return posts;
}
/**
* Extract data from individual post element
*/
async function extractPostData(postElement, keyword) {
try {
// Extract post ID
const postId = (await postElement.getAttribute("data-urn")) || "";
// Extract author info
const authorElement = await postElement.$(".feed-shared-actor__name");
const authorName = authorElement
? cleanText(await authorElement.textContent())
: "";
const authorLinkElement = await postElement.$(".feed-shared-actor__name a");
const authorUrl = authorLinkElement
? await authorLinkElement.getAttribute("href")
: "";
// Extract post content
const contentElement = await postElement.$(".feed-shared-text");
const content = contentElement
? cleanText(await contentElement.textContent())
: "";
// Extract timestamp
const timeElement = await postElement.$(
".feed-shared-actor__sub-description time"
);
const timestamp = timeElement
? await timeElement.getAttribute("datetime")
: "";
// Extract engagement metrics
const likesElement = await postElement.$(".social-counts-reactions__count");
const likesText = likesElement
? cleanText(await likesElement.textContent())
: "0";
const commentsElement = await postElement.$(
".social-counts-comments__count"
);
const commentsText = commentsElement
? cleanText(await commentsElement.textContent())
: "0";
// Check if post contains relevant keywords
const isRelevant = containsAnyKeyword(content, [keyword]);
if (!isRelevant) {
return null; // Skip irrelevant posts
}
return {
postId: cleanText(postId),
authorName,
authorUrl,
content,
timestamp,
keyword,
likes: extractNumber(likesText),
comments: extractNumber(commentsText),
extractedAt: new Date().toISOString(),
source: "linkedin",
};
} catch (error) {
logger.warning(`Error extracting post data: ${error.message}`);
return null;
}
}
/**
* Extract numbers from text (e.g., "15 likes" -> 15)
*/
function extractNumber(text) {
const match = text.match(/\d+/);
return match ? parseInt(match[0]) : 0;
}
module.exports = {
linkedinStrategy,
extractPostsFromPage,
extractPostData,
};
/**
* LinkedIn Parsing Strategy
*
* Uses core-parser for browser management and ai-analyzer for utilities
*/
const {
logger,
cleanText,
containsAnyKeyword,
validateLocationAgainstFilters,
extractLocationFromProfile,
} = require("ai-analyzer");
/**
* LinkedIn parsing strategy function
*/
async function linkedinStrategy(coreParser, options = {}) {
const {
keywords = ["layoff", "downsizing", "job cuts"],
locationFilter = null,
maxResults = 50,
credentials = {},
} = options;
const results = [];
const rejectedResults = [];
const seenPosts = new Set();
const seenProfiles = new Set();
try {
// Create main page
const page = await coreParser.createPage("linkedin-main");
// Authenticate to LinkedIn
logger.info("🔐 Authenticating to LinkedIn...");
await coreParser.authenticate("linkedin", credentials, "linkedin-main");
logger.info("✅ LinkedIn authentication successful");
// Search for posts with each keyword
for (const keyword of keywords) {
logger.info(`🔍 Searching LinkedIn for: "${keyword}"`);
const searchUrl = `https://www.linkedin.com/search/results/content/?keywords=${encodeURIComponent(
keyword
)}&sortBy=date_posted`;
await coreParser.navigateTo(searchUrl, {
pageId: "linkedin-main",
retries: 2,
});
// Wait for search results
const hasResults = await coreParser.navigationManager.navigateAndWaitFor(
searchUrl,
".search-results-container",
{ pageId: "linkedin-main", timeout: 10000 }
);
if (!hasResults) {
logger.warning(`No search results found for keyword: ${keyword}`);
continue;
}
// Extract posts from current page
const posts = await extractPostsFromPage(page, keyword);
for (const post of posts) {
// Skip duplicates
if (seenPosts.has(post.postId)) continue;
seenPosts.add(post.postId);
// Validate location if filtering enabled
if (locationFilter) {
const locationValid = validateLocationAgainstFilters(
post.location || post.profileLocation,
locationFilter
);
if (!locationValid) {
rejectedResults.push({
...post,
rejectionReason: "Location filter mismatch",
});
continue;
}
}
results.push(post);
if (results.length >= maxResults) {
logger.info(`📊 Reached maximum results limit: ${maxResults}`);
break;
}
}
if (results.length >= maxResults) break;
}
logger.info(
`🎯 LinkedIn parsing completed: ${results.length} posts found, ${rejectedResults.length} rejected`
);
return {
results,
rejectedResults,
summary: {
totalPosts: results.length,
totalRejected: rejectedResults.length,
keywords: keywords.join(", "),
locationFilter,
},
};
} catch (error) {
logger.error(`❌ LinkedIn parsing failed: ${error.message}`);
throw error;
}
}
/**
* Extract posts from current search results page
*/
async function extractPostsFromPage(page, keyword) {
const posts = [];
try {
// Get all post elements
const postElements = await page.$$(".feed-shared-update-v2");
for (const postElement of postElements) {
try {
const post = await extractPostData(postElement, keyword);
if (post) {
posts.push(post);
}
} catch (error) {
logger.warning(`Failed to extract post data: ${error.message}`);
}
}
} catch (error) {
logger.error(`Failed to extract posts from page: ${error.message}`);
}
return posts;
}
/**
* Extract data from individual post element
*/
async function extractPostData(postElement, keyword) {
try {
// Extract post ID
const postId = (await postElement.getAttribute("data-urn")) || "";
// Extract author info
const authorElement = await postElement.$(".feed-shared-actor__name");
const authorName = authorElement
? cleanText(await authorElement.textContent())
: "";
const authorLinkElement = await postElement.$(".feed-shared-actor__name a");
const authorUrl = authorLinkElement
? await authorLinkElement.getAttribute("href")
: "";
// Extract post content
const contentElement = await postElement.$(".feed-shared-text");
const content = contentElement
? cleanText(await contentElement.textContent())
: "";
// Extract timestamp
const timeElement = await postElement.$(
".feed-shared-actor__sub-description time"
);
const timestamp = timeElement
? await timeElement.getAttribute("datetime")
: "";
// Extract engagement metrics
const likesElement = await postElement.$(".social-counts-reactions__count");
const likesText = likesElement
? cleanText(await likesElement.textContent())
: "0";
const commentsElement = await postElement.$(
".social-counts-comments__count"
);
const commentsText = commentsElement
? cleanText(await commentsElement.textContent())
: "0";
// Check if post contains relevant keywords
const isRelevant = containsAnyKeyword(content, [keyword]);
if (!isRelevant) {
return null; // Skip irrelevant posts
}
return {
postId: cleanText(postId),
authorName,
authorUrl,
content,
timestamp,
keyword,
likes: extractNumber(likesText),
comments: extractNumber(commentsText),
extractedAt: new Date().toISOString(),
source: "linkedin",
};
} catch (error) {
logger.warning(`Error extracting post data: ${error.message}`);
return null;
}
}
/**
* Extract numbers from text (e.g., "15 likes" -> 15)
*/
function extractNumber(text) {
const match = text.match(/\d+/);
return match ? parseInt(match[0]) : 0;
}
module.exports = {
linkedinStrategy,
extractPostsFromPage,
extractPostData,
};

View File

@ -1,34 +1,34 @@
{
"results": [
{
"text": "Just got laid off from my software engineering role. Looking for new opportunities in the Toronto area.",
"location": "Toronto, Ontario, Canada",
"keyword": "layoff",
"timestamp": "2024-01-15T10:30:00Z"
},
{
"text": "Excited to share that I'm starting a new position as a Senior Developer at TechCorp!",
"location": "Vancouver, BC, Canada",
"keyword": "hiring",
"timestamp": "2024-01-15T11:00:00Z"
},
{
"text": "Our company is going through a restructuring and unfortunately had to let go of 50 employees.",
"location": "Montreal, Quebec, Canada",
"keyword": "layoff",
"timestamp": "2024-01-15T11:30:00Z"
},
{
"text": "Beautiful weather today! Perfect for a walk in the park.",
"location": "Calgary, Alberta, Canada",
"keyword": "weather",
"timestamp": "2024-01-15T12:00:00Z"
},
{
"text": "We're hiring! Looking for talented developers to join our growing team.",
"location": "Ottawa, Ontario, Canada",
"keyword": "hiring",
"timestamp": "2024-01-15T12:30:00Z"
}
]
}
{
"results": [
{
"text": "Just got laid off from my software engineering role. Looking for new opportunities in the Toronto area.",
"location": "Toronto, Ontario, Canada",
"keyword": "layoff",
"timestamp": "2024-01-15T10:30:00Z"
},
{
"text": "Excited to share that I'm starting a new position as a Senior Developer at TechCorp!",
"location": "Vancouver, BC, Canada",
"keyword": "hiring",
"timestamp": "2024-01-15T11:00:00Z"
},
{
"text": "Our company is going through a restructuring and unfortunately had to let go of 50 employees.",
"location": "Montreal, Quebec, Canada",
"keyword": "layoff",
"timestamp": "2024-01-15T11:30:00Z"
},
{
"text": "Beautiful weather today! Perfect for a walk in the park.",
"location": "Calgary, Alberta, Canada",
"keyword": "weather",
"timestamp": "2024-01-15T12:00:00Z"
},
{
"text": "We're hiring! Looking for talented developers to join our growing team.",
"location": "Ottawa, Ontario, Canada",
"keyword": "hiring",
"timestamp": "2024-01-15T12:30:00Z"
}
]
}