update CoreParser to increase default timeout and change navigation waitUntil option to networkidle
This commit is contained in:
parent
ef9720abf2
commit
83ed86668e
@ -1,250 +1,250 @@
|
|||||||
#!/usr/bin/env node
|
#!/usr/bin/env node
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* AI Analyzer CLI
|
* AI Analyzer CLI
|
||||||
*
|
*
|
||||||
* Command-line interface for the ai-analyzer package
|
* Command-line interface for the ai-analyzer package
|
||||||
* Can be used by any parser to analyze JSON files
|
* Can be used by any parser to analyze JSON files
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const fs = require("fs");
|
const fs = require("fs");
|
||||||
const path = require("path");
|
const path = require("path");
|
||||||
|
|
||||||
// Import AI utilities from this package
|
// Import AI utilities from this package
|
||||||
const {
|
const {
|
||||||
logger,
|
logger,
|
||||||
analyzeBatch,
|
analyzeBatch,
|
||||||
checkOllamaStatus,
|
checkOllamaStatus,
|
||||||
findLatestResultsFile,
|
findLatestResultsFile,
|
||||||
} = require("./index");
|
} = require("./index");
|
||||||
|
|
||||||
// Default configuration
|
// Default configuration
|
||||||
const DEFAULT_CONTEXT =
|
const DEFAULT_CONTEXT =
|
||||||
process.env.AI_CONTEXT || "job market analysis and trends";
|
process.env.AI_CONTEXT || "job market analysis and trends";
|
||||||
const DEFAULT_MODEL = process.env.OLLAMA_MODEL || "mistral";
|
const DEFAULT_MODEL = process.env.OLLAMA_MODEL || "mistral";
|
||||||
const DEFAULT_RESULTS_DIR = "results";
|
const DEFAULT_RESULTS_DIR = "results";
|
||||||
|
|
||||||
// Parse command line arguments
|
// Parse command line arguments
|
||||||
const args = process.argv.slice(2);
|
const args = process.argv.slice(2);
|
||||||
let inputFile = null;
|
let inputFile = null;
|
||||||
let outputFile = null;
|
let outputFile = null;
|
||||||
let context = DEFAULT_CONTEXT;
|
let context = DEFAULT_CONTEXT;
|
||||||
let model = DEFAULT_MODEL;
|
let model = DEFAULT_MODEL;
|
||||||
let findLatest = false;
|
let findLatest = false;
|
||||||
let resultsDir = DEFAULT_RESULTS_DIR;
|
let resultsDir = DEFAULT_RESULTS_DIR;
|
||||||
|
|
||||||
for (const arg of args) {
|
for (const arg of args) {
|
||||||
if (arg.startsWith("--input=")) {
|
if (arg.startsWith("--input=")) {
|
||||||
inputFile = arg.split("=")[1];
|
inputFile = arg.split("=")[1];
|
||||||
} else if (arg.startsWith("--output=")) {
|
} else if (arg.startsWith("--output=")) {
|
||||||
outputFile = arg.split("=")[1];
|
outputFile = arg.split("=")[1];
|
||||||
} else if (arg.startsWith("--context=")) {
|
} else if (arg.startsWith("--context=")) {
|
||||||
context = arg.split("=")[1];
|
context = arg.split("=")[1];
|
||||||
} else if (arg.startsWith("--model=")) {
|
} else if (arg.startsWith("--model=")) {
|
||||||
model = arg.split("=")[1];
|
model = arg.split("=")[1];
|
||||||
} else if (arg.startsWith("--dir=")) {
|
} else if (arg.startsWith("--dir=")) {
|
||||||
resultsDir = arg.split("=")[1];
|
resultsDir = arg.split("=")[1];
|
||||||
} else if (arg === "--latest") {
|
} else if (arg === "--latest") {
|
||||||
findLatest = true;
|
findLatest = true;
|
||||||
} else if (arg === "--help" || arg === "-h") {
|
} else if (arg === "--help" || arg === "-h") {
|
||||||
console.log(`
|
console.log(`
|
||||||
AI Analyzer CLI
|
AI Analyzer CLI
|
||||||
|
|
||||||
Usage: node cli.js [options]
|
Usage: node cli.js [options]
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
--input=FILE Input JSON file
|
--input=FILE Input JSON file
|
||||||
--output=FILE Output file (default: ai-analysis-{timestamp}.json)
|
--output=FILE Output file (default: ai-analysis-{timestamp}.json)
|
||||||
--context="description" Analysis context (default: "${DEFAULT_CONTEXT}")
|
--context="description" Analysis context (default: "${DEFAULT_CONTEXT}")
|
||||||
--model=MODEL Ollama model (default: ${DEFAULT_MODEL})
|
--model=MODEL Ollama model (default: ${DEFAULT_MODEL})
|
||||||
--latest Use latest results file from results directory
|
--latest Use latest results file from results directory
|
||||||
--dir=PATH Directory to look for results (default: 'results')
|
--dir=PATH Directory to look for results (default: 'results')
|
||||||
--help, -h Show this help
|
--help, -h Show this help
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
node cli.js --input=results.json
|
node cli.js --input=results.json
|
||||||
node cli.js --latest --dir=results
|
node cli.js --latest --dir=results
|
||||||
node cli.js --input=results.json --context="job trends" --model=mistral
|
node cli.js --input=results.json --context="job trends" --model=mistral
|
||||||
|
|
||||||
Environment Variables:
|
Environment Variables:
|
||||||
AI_CONTEXT Default analysis context
|
AI_CONTEXT Default analysis context
|
||||||
OLLAMA_MODEL Default Ollama model
|
OLLAMA_MODEL Default Ollama model
|
||||||
`);
|
`);
|
||||||
process.exit(0);
|
process.exit(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
try {
|
try {
|
||||||
// Determine input file
|
// Determine input file
|
||||||
if (findLatest) {
|
if (findLatest) {
|
||||||
try {
|
try {
|
||||||
inputFile = findLatestResultsFile(resultsDir);
|
inputFile = findLatestResultsFile(resultsDir);
|
||||||
logger.info(`Found latest results file: ${inputFile}`);
|
logger.info(`Found latest results file: ${inputFile}`);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(
|
logger.error(
|
||||||
`❌ No results files found in '${resultsDir}': ${error.message}`
|
`❌ No results files found in '${resultsDir}': ${error.message}`
|
||||||
);
|
);
|
||||||
logger.info(`💡 To create results files:`);
|
logger.info(`💡 To create results files:`);
|
||||||
logger.info(
|
logger.info(
|
||||||
` 1. Run a parser first (e.g., npm start in linkedin-parser)`
|
` 1. Run a parser first (e.g., npm start in linkedin-parser)`
|
||||||
);
|
);
|
||||||
logger.info(` 2. Or provide a specific file with --input=FILE`);
|
logger.info(` 2. Or provide a specific file with --input=FILE`);
|
||||||
logger.info(` 3. Or create a sample JSON file to test with`);
|
logger.info(` 3. Or create a sample JSON file to test with`);
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If inputFile is a relative path and --dir is set, resolve it
|
// If inputFile is a relative path and --dir is set, resolve it
|
||||||
if (inputFile && !path.isAbsolute(inputFile) && !fs.existsSync(inputFile)) {
|
if (inputFile && !path.isAbsolute(inputFile) && !fs.existsSync(inputFile)) {
|
||||||
const candidate = path.join(resultsDir, inputFile);
|
const candidate = path.join(resultsDir, inputFile);
|
||||||
if (fs.existsSync(candidate)) {
|
if (fs.existsSync(candidate)) {
|
||||||
inputFile = candidate;
|
inputFile = candidate;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!inputFile) {
|
if (!inputFile) {
|
||||||
logger.error("❌ Input file required. Use --input=FILE or --latest");
|
logger.error("❌ Input file required. Use --input=FILE or --latest");
|
||||||
logger.info(`💡 Examples:`);
|
logger.info(`💡 Examples:`);
|
||||||
logger.info(` node cli.js --input=results.json`);
|
logger.info(` node cli.js --input=results.json`);
|
||||||
logger.info(` node cli.js --latest --dir=results`);
|
logger.info(` node cli.js --latest --dir=results`);
|
||||||
logger.info(` node cli.js --help`);
|
logger.info(` node cli.js --help`);
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load input file
|
// Load input file
|
||||||
logger.step(`Loading input file: ${inputFile}`);
|
logger.step(`Loading input file: ${inputFile}`);
|
||||||
|
|
||||||
if (!fs.existsSync(inputFile)) {
|
if (!fs.existsSync(inputFile)) {
|
||||||
throw new Error(`Input file not found: ${inputFile}`);
|
throw new Error(`Input file not found: ${inputFile}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const data = JSON.parse(fs.readFileSync(inputFile, "utf-8"));
|
const data = JSON.parse(fs.readFileSync(inputFile, "utf-8"));
|
||||||
|
|
||||||
// Extract posts from different formats
|
// Extract posts from different formats
|
||||||
let posts = [];
|
let posts = [];
|
||||||
if (data.results && Array.isArray(data.results)) {
|
if (data.results && Array.isArray(data.results)) {
|
||||||
posts = data.results;
|
posts = data.results;
|
||||||
logger.info(`Found ${posts.length} items in results array`);
|
logger.info(`Found ${posts.length} items in results array`);
|
||||||
} else if (Array.isArray(data)) {
|
} else if (Array.isArray(data)) {
|
||||||
posts = data;
|
posts = data;
|
||||||
logger.info(`Found ${posts.length} items in array`);
|
logger.info(`Found ${posts.length} items in array`);
|
||||||
} else {
|
} else {
|
||||||
throw new Error("Invalid JSON format - need array or {results: [...]}");
|
throw new Error("Invalid JSON format - need array or {results: [...]}");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (posts.length === 0) {
|
if (posts.length === 0) {
|
||||||
throw new Error("No items found to analyze");
|
throw new Error("No items found to analyze");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check AI availability
|
// Check AI availability
|
||||||
logger.step("Checking AI availability");
|
logger.step("Checking AI availability");
|
||||||
const aiAvailable = await checkOllamaStatus(model);
|
const aiAvailable = await checkOllamaStatus(model);
|
||||||
if (!aiAvailable) {
|
if (!aiAvailable) {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
`AI not available. Make sure Ollama is running and model '${model}' is installed.`
|
`AI not available. Make sure Ollama is running and model '${model}' is installed.`
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if results already have AI analysis
|
// Check if results already have AI analysis
|
||||||
const hasExistingAI = posts.some((post) => post.aiAnalysis);
|
const hasExistingAI = posts.some((post) => post.aiAnalysis);
|
||||||
if (hasExistingAI) {
|
if (hasExistingAI) {
|
||||||
logger.info(
|
logger.info(
|
||||||
`📋 Results already contain AI analysis - will update with new context`
|
`📋 Results already contain AI analysis - will update with new context`
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prepare data for analysis
|
// Prepare data for analysis
|
||||||
const analysisData = posts.map((post, i) => ({
|
const analysisData = posts.map((post, i) => ({
|
||||||
text: post.text || post.content || post.post || "",
|
text: post.text || post.content || post.post || "",
|
||||||
location: post.location || "Unknown",
|
location: post.location || "Unknown",
|
||||||
keyword: post.keyword || "Unknown",
|
keyword: post.keyword || "Unknown",
|
||||||
timestamp: post.timestamp || new Date().toISOString(),
|
timestamp: post.timestamp || new Date().toISOString(),
|
||||||
}));
|
}));
|
||||||
|
|
||||||
// Run analysis
|
// Run analysis
|
||||||
logger.step(`Running AI analysis with context: "${context}"`);
|
logger.step(`Running AI analysis with context: "${context}"`);
|
||||||
const analysis = await analyzeBatch(analysisData, context, model);
|
const analysis = await analyzeBatch(analysisData, context, model);
|
||||||
|
|
||||||
// Integrate AI analysis back into the original results
|
// Integrate AI analysis back into the original results
|
||||||
const updatedPosts = posts.map((post, index) => {
|
const updatedPosts = posts.map((post, index) => {
|
||||||
const aiResult = analysis[index];
|
const aiResult = analysis[index];
|
||||||
return {
|
return {
|
||||||
...post,
|
...post,
|
||||||
aiAnalysis: {
|
aiAnalysis: {
|
||||||
isRelevant: aiResult.isRelevant,
|
isRelevant: aiResult.isRelevant,
|
||||||
confidence: aiResult.confidence,
|
confidence: aiResult.confidence,
|
||||||
reasoning: aiResult.reasoning,
|
reasoning: aiResult.reasoning,
|
||||||
context: context,
|
context: context,
|
||||||
model: model,
|
model: model,
|
||||||
analyzedAt: new Date().toISOString(),
|
analyzedAt: new Date().toISOString(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
// Update the original data structure
|
// Update the original data structure
|
||||||
if (data.results && Array.isArray(data.results)) {
|
if (data.results && Array.isArray(data.results)) {
|
||||||
data.results = updatedPosts;
|
data.results = updatedPosts;
|
||||||
// Update metadata
|
// Update metadata
|
||||||
data.metadata = data.metadata || {};
|
data.metadata = data.metadata || {};
|
||||||
data.metadata.aiAnalysisUpdated = new Date().toISOString();
|
data.metadata.aiAnalysisUpdated = new Date().toISOString();
|
||||||
data.metadata.aiContext = context;
|
data.metadata.aiContext = context;
|
||||||
data.metadata.aiModel = model;
|
data.metadata.aiModel = model;
|
||||||
} else {
|
} else {
|
||||||
// If it's a simple array, create a proper structure
|
// If it's a simple array, create a proper structure
|
||||||
data = {
|
data = {
|
||||||
metadata: {
|
metadata: {
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
totalItems: updatedPosts.length,
|
totalItems: updatedPosts.length,
|
||||||
aiContext: context,
|
aiContext: context,
|
||||||
aiModel: model,
|
aiModel: model,
|
||||||
analysisType: "cli",
|
analysisType: "cli",
|
||||||
},
|
},
|
||||||
results: updatedPosts,
|
results: updatedPosts,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate output filename if not provided
|
// Generate output filename if not provided
|
||||||
if (!outputFile) {
|
if (!outputFile) {
|
||||||
// Use the original filename with -ai suffix
|
// Use the original filename with -ai suffix
|
||||||
const originalName = path.basename(inputFile, path.extname(inputFile));
|
const originalName = path.basename(inputFile, path.extname(inputFile));
|
||||||
outputFile = path.join(
|
outputFile = path.join(
|
||||||
path.dirname(inputFile),
|
path.dirname(inputFile),
|
||||||
`${originalName}-ai.json`
|
`${originalName}-ai.json`
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save updated results back to file
|
// Save updated results back to file
|
||||||
fs.writeFileSync(outputFile, JSON.stringify(data, null, 2));
|
fs.writeFileSync(outputFile, JSON.stringify(data, null, 2));
|
||||||
|
|
||||||
// Show summary
|
// Show summary
|
||||||
const relevant = analysis.filter((a) => a.isRelevant).length;
|
const relevant = analysis.filter((a) => a.isRelevant).length;
|
||||||
const irrelevant = analysis.filter((a) => !a.isRelevant).length;
|
const irrelevant = analysis.filter((a) => !a.isRelevant).length;
|
||||||
const avgConfidence =
|
const avgConfidence =
|
||||||
analysis.reduce((sum, a) => sum + a.confidence, 0) / analysis.length;
|
analysis.reduce((sum, a) => sum + a.confidence, 0) / analysis.length;
|
||||||
|
|
||||||
logger.success("✅ AI analysis completed and integrated");
|
logger.success("✅ AI analysis completed and integrated");
|
||||||
logger.info(`📊 Context: "${context}"`);
|
logger.info(`📊 Context: "${context}"`);
|
||||||
logger.info(`📈 Total items analyzed: ${analysis.length}`);
|
logger.info(`📈 Total items analyzed: ${analysis.length}`);
|
||||||
logger.info(
|
logger.info(
|
||||||
`✅ Relevant items: ${relevant} (${(
|
`✅ Relevant items: ${relevant} (${(
|
||||||
(relevant / analysis.length) *
|
(relevant / analysis.length) *
|
||||||
100
|
100
|
||||||
).toFixed(1)}%)`
|
).toFixed(1)}%)`
|
||||||
);
|
);
|
||||||
logger.info(
|
logger.info(
|
||||||
`❌ Irrelevant items: ${irrelevant} (${(
|
`❌ Irrelevant items: ${irrelevant} (${(
|
||||||
(irrelevant / analysis.length) *
|
(irrelevant / analysis.length) *
|
||||||
100
|
100
|
||||||
).toFixed(1)}%)`
|
).toFixed(1)}%)`
|
||||||
);
|
);
|
||||||
logger.info(`🎯 Average confidence: ${avgConfidence.toFixed(2)}`);
|
logger.info(`🎯 Average confidence: ${avgConfidence.toFixed(2)}`);
|
||||||
logger.file(`🧠 Updated results saved to: ${outputFile}`);
|
logger.file(`🧠 Updated results saved to: ${outputFile}`);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(`❌ Analysis failed: ${error.message}`);
|
logger.error(`❌ Analysis failed: ${error.message}`);
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run the CLI
|
// Run the CLI
|
||||||
main();
|
main();
|
||||||
|
|||||||
@ -1,346 +1,346 @@
|
|||||||
/**
|
/**
|
||||||
* AI Analyzer Demo
|
* AI Analyzer Demo
|
||||||
*
|
*
|
||||||
* Demonstrates all the core utilities provided by the ai-analyzer package:
|
* Demonstrates all the core utilities provided by the ai-analyzer package:
|
||||||
* - Logger functionality
|
* - Logger functionality
|
||||||
* - Text processing utilities
|
* - Text processing utilities
|
||||||
* - Location validation
|
* - Location validation
|
||||||
* - AI analysis capabilities
|
* - AI analysis capabilities
|
||||||
* - Test utilities
|
* - Test utilities
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const {
|
const {
|
||||||
logger,
|
logger,
|
||||||
Logger,
|
Logger,
|
||||||
cleanText,
|
cleanText,
|
||||||
containsAnyKeyword,
|
containsAnyKeyword,
|
||||||
parseLocationFilters,
|
parseLocationFilters,
|
||||||
validateLocationAgainstFilters,
|
validateLocationAgainstFilters,
|
||||||
extractLocationFromProfile,
|
extractLocationFromProfile,
|
||||||
analyzeBatch,
|
analyzeBatch,
|
||||||
} = require("./index");
|
} = require("./index");
|
||||||
|
|
||||||
// Terminal colors for demo output
|
// Terminal colors for demo output
|
||||||
const colors = {
|
const colors = {
|
||||||
reset: "\x1b[0m",
|
reset: "\x1b[0m",
|
||||||
bright: "\x1b[1m",
|
bright: "\x1b[1m",
|
||||||
cyan: "\x1b[36m",
|
cyan: "\x1b[36m",
|
||||||
green: "\x1b[32m",
|
green: "\x1b[32m",
|
||||||
yellow: "\x1b[33m",
|
yellow: "\x1b[33m",
|
||||||
blue: "\x1b[34m",
|
blue: "\x1b[34m",
|
||||||
magenta: "\x1b[35m",
|
magenta: "\x1b[35m",
|
||||||
red: "\x1b[31m",
|
red: "\x1b[31m",
|
||||||
};
|
};
|
||||||
|
|
||||||
const demo = {
|
const demo = {
|
||||||
title: (text) =>
|
title: (text) =>
|
||||||
console.log(`\n${colors.bright}${colors.cyan}${text}${colors.reset}`),
|
console.log(`\n${colors.bright}${colors.cyan}${text}${colors.reset}`),
|
||||||
section: (text) =>
|
section: (text) =>
|
||||||
console.log(`\n${colors.bright}${colors.magenta}${text}${colors.reset}`),
|
console.log(`\n${colors.bright}${colors.magenta}${text}${colors.reset}`),
|
||||||
success: (text) => console.log(`${colors.green}✅ ${text}${colors.reset}`),
|
success: (text) => console.log(`${colors.green}✅ ${text}${colors.reset}`),
|
||||||
info: (text) => console.log(`${colors.blue}ℹ️ ${text}${colors.reset}`),
|
info: (text) => console.log(`${colors.blue}ℹ️ ${text}${colors.reset}`),
|
||||||
warning: (text) => console.log(`${colors.yellow}⚠️ ${text}${colors.reset}`),
|
warning: (text) => console.log(`${colors.yellow}⚠️ ${text}${colors.reset}`),
|
||||||
error: (text) => console.log(`${colors.red}❌ ${text}${colors.reset}`),
|
error: (text) => console.log(`${colors.red}❌ ${text}${colors.reset}`),
|
||||||
code: (text) => console.log(`${colors.cyan}${text}${colors.reset}`),
|
code: (text) => console.log(`${colors.cyan}${text}${colors.reset}`),
|
||||||
};
|
};
|
||||||
|
|
||||||
async function runDemo() {
|
async function runDemo() {
|
||||||
demo.title("=== AI Analyzer Demo ===");
|
demo.title("=== AI Analyzer Demo ===");
|
||||||
demo.info(
|
demo.info(
|
||||||
"This demo showcases all the core utilities provided by the ai-analyzer package."
|
"This demo showcases all the core utilities provided by the ai-analyzer package."
|
||||||
);
|
);
|
||||||
demo.info("Press Enter to continue through each section...\n");
|
demo.info("Press Enter to continue through each section...\n");
|
||||||
|
|
||||||
await waitForEnter();
|
await waitForEnter();
|
||||||
|
|
||||||
// 1. Logger Demo
|
// 1. Logger Demo
|
||||||
await demonstrateLogger();
|
await demonstrateLogger();
|
||||||
|
|
||||||
// 2. Text Processing Demo
|
// 2. Text Processing Demo
|
||||||
await demonstrateTextProcessing();
|
await demonstrateTextProcessing();
|
||||||
|
|
||||||
// 3. Location Validation Demo
|
// 3. Location Validation Demo
|
||||||
await demonstrateLocationValidation();
|
await demonstrateLocationValidation();
|
||||||
|
|
||||||
// 4. AI Analysis Demo
|
// 4. AI Analysis Demo
|
||||||
await demonstrateAIAnalysis();
|
await demonstrateAIAnalysis();
|
||||||
|
|
||||||
// 5. Integration Demo
|
// 5. Integration Demo
|
||||||
await demonstrateIntegration();
|
await demonstrateIntegration();
|
||||||
|
|
||||||
demo.title("=== Demo Complete ===");
|
demo.title("=== Demo Complete ===");
|
||||||
demo.success("All ai-analyzer utilities demonstrated successfully!");
|
demo.success("All ai-analyzer utilities demonstrated successfully!");
|
||||||
demo.info("Check the README.md for detailed API documentation.");
|
demo.info("Check the README.md for detailed API documentation.");
|
||||||
}
|
}
|
||||||
|
|
||||||
async function demonstrateLogger() {
|
async function demonstrateLogger() {
|
||||||
demo.section("1. Logger Utilities");
|
demo.section("1. Logger Utilities");
|
||||||
demo.info(
|
demo.info(
|
||||||
"The logger provides consistent logging across all parsers with configurable levels and color support."
|
"The logger provides consistent logging across all parsers with configurable levels and color support."
|
||||||
);
|
);
|
||||||
|
|
||||||
demo.code("// Using default logger");
|
demo.code("// Using default logger");
|
||||||
logger.info("This is an info message");
|
logger.info("This is an info message");
|
||||||
logger.warning("This is a warning message");
|
logger.warning("This is a warning message");
|
||||||
logger.error("This is an error message");
|
logger.error("This is an error message");
|
||||||
logger.success("This is a success message");
|
logger.success("This is a success message");
|
||||||
logger.debug("This is a debug message (if enabled)");
|
logger.debug("This is a debug message (if enabled)");
|
||||||
|
|
||||||
demo.code("// Convenience methods with emoji prefixes");
|
demo.code("// Convenience methods with emoji prefixes");
|
||||||
logger.step("Starting demo process");
|
logger.step("Starting demo process");
|
||||||
logger.search("Searching for keywords");
|
logger.search("Searching for keywords");
|
||||||
logger.ai("Running AI analysis");
|
logger.ai("Running AI analysis");
|
||||||
logger.location("Validating location");
|
logger.location("Validating location");
|
||||||
logger.file("Saving results");
|
logger.file("Saving results");
|
||||||
|
|
||||||
demo.code("// Custom logger configuration");
|
demo.code("// Custom logger configuration");
|
||||||
const customLogger = new Logger({
|
const customLogger = new Logger({
|
||||||
debug: false,
|
debug: false,
|
||||||
colors: true,
|
colors: true,
|
||||||
});
|
});
|
||||||
customLogger.info("Custom logger with debug disabled");
|
customLogger.info("Custom logger with debug disabled");
|
||||||
customLogger.debug("This won't show");
|
customLogger.debug("This won't show");
|
||||||
|
|
||||||
demo.code("// Silent mode");
|
demo.code("// Silent mode");
|
||||||
const silentLogger = new Logger();
|
const silentLogger = new Logger();
|
||||||
silentLogger.silent();
|
silentLogger.silent();
|
||||||
silentLogger.info("This won't show");
|
silentLogger.info("This won't show");
|
||||||
silentLogger.verbose(); // Re-enable all levels
|
silentLogger.verbose(); // Re-enable all levels
|
||||||
|
|
||||||
await waitForEnter();
|
await waitForEnter();
|
||||||
}
|
}
|
||||||
|
|
||||||
async function demonstrateTextProcessing() {
|
async function demonstrateTextProcessing() {
|
||||||
demo.section("2. Text Processing Utilities");
|
demo.section("2. Text Processing Utilities");
|
||||||
demo.info(
|
demo.info(
|
||||||
"Text utilities provide content cleaning and keyword matching capabilities."
|
"Text utilities provide content cleaning and keyword matching capabilities."
|
||||||
);
|
);
|
||||||
|
|
||||||
const sampleTexts = [
|
const sampleTexts = [
|
||||||
"Check out this #awesome post! https://example.com 🚀",
|
"Check out this #awesome post! https://example.com 🚀",
|
||||||
"Just got #laidoff from my job. Looking for new opportunities!",
|
"Just got #laidoff from my job. Looking for new opportunities!",
|
||||||
"Company is #downsizing and I'm affected. #RIF #layoff",
|
"Company is #downsizing and I'm affected. #RIF #layoff",
|
||||||
"Great news! We're #hiring new developers! 🎉",
|
"Great news! We're #hiring new developers! 🎉",
|
||||||
];
|
];
|
||||||
|
|
||||||
demo.code("// Text cleaning examples:");
|
demo.code("// Text cleaning examples:");
|
||||||
sampleTexts.forEach((text, index) => {
|
sampleTexts.forEach((text, index) => {
|
||||||
const cleaned = cleanText(text);
|
const cleaned = cleanText(text);
|
||||||
demo.info(`Original: ${text}`);
|
demo.info(`Original: ${text}`);
|
||||||
demo.success(`Cleaned: ${cleaned}`);
|
demo.success(`Cleaned: ${cleaned}`);
|
||||||
console.log();
|
console.log();
|
||||||
});
|
});
|
||||||
|
|
||||||
demo.code("// Keyword matching:");
|
demo.code("// Keyword matching:");
|
||||||
const keywords = ["layoff", "downsizing", "RIF", "hiring"];
|
const keywords = ["layoff", "downsizing", "RIF", "hiring"];
|
||||||
|
|
||||||
sampleTexts.forEach((text, index) => {
|
sampleTexts.forEach((text, index) => {
|
||||||
const hasMatch = containsAnyKeyword(text, keywords);
|
const hasMatch = containsAnyKeyword(text, keywords);
|
||||||
const matchedKeywords = keywords.filter((keyword) =>
|
const matchedKeywords = keywords.filter((keyword) =>
|
||||||
text.toLowerCase().includes(keyword.toLowerCase())
|
text.toLowerCase().includes(keyword.toLowerCase())
|
||||||
);
|
);
|
||||||
|
|
||||||
demo.info(
|
demo.info(
|
||||||
`Text ${index + 1}: ${hasMatch ? "✅" : "❌"} ${
|
`Text ${index + 1}: ${hasMatch ? "✅" : "❌"} ${
|
||||||
matchedKeywords.join(", ") || "No matches"
|
matchedKeywords.join(", ") || "No matches"
|
||||||
}`
|
}`
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
await waitForEnter();
|
await waitForEnter();
|
||||||
}
|
}
|
||||||
|
|
||||||
async function demonstrateLocationValidation() {
|
async function demonstrateLocationValidation() {
|
||||||
demo.section("3. Location Validation Utilities");
|
demo.section("3. Location Validation Utilities");
|
||||||
demo.info(
|
demo.info(
|
||||||
"Location utilities provide geographic filtering and validation capabilities."
|
"Location utilities provide geographic filtering and validation capabilities."
|
||||||
);
|
);
|
||||||
|
|
||||||
demo.code("// Location filter parsing:");
|
demo.code("// Location filter parsing:");
|
||||||
const filterStrings = [
|
const filterStrings = [
|
||||||
"Ontario,Manitoba",
|
"Ontario,Manitoba",
|
||||||
"Toronto,Vancouver",
|
"Toronto,Vancouver",
|
||||||
"British Columbia,Alberta",
|
"British Columbia,Alberta",
|
||||||
"Canada",
|
"Canada",
|
||||||
];
|
];
|
||||||
|
|
||||||
filterStrings.forEach((filterString) => {
|
filterStrings.forEach((filterString) => {
|
||||||
const filters = parseLocationFilters(filterString);
|
const filters = parseLocationFilters(filterString);
|
||||||
demo.info(`Filter: "${filterString}"`);
|
demo.info(`Filter: "${filterString}"`);
|
||||||
demo.success(`Parsed: [${filters.join(", ")}]`);
|
demo.success(`Parsed: [${filters.join(", ")}]`);
|
||||||
console.log();
|
console.log();
|
||||||
});
|
});
|
||||||
|
|
||||||
demo.code("// Location validation examples:");
|
demo.code("// Location validation examples:");
|
||||||
const testLocations = [
|
const testLocations = [
|
||||||
{ location: "Toronto, Ontario, Canada", filters: ["Ontario"] },
|
{ location: "Toronto, Ontario, Canada", filters: ["Ontario"] },
|
||||||
{ location: "Vancouver, BC", filters: ["British Columbia"] },
|
{ location: "Vancouver, BC", filters: ["British Columbia"] },
|
||||||
{ location: "Calgary, Alberta", filters: ["Ontario"] },
|
{ location: "Calgary, Alberta", filters: ["Ontario"] },
|
||||||
{ location: "Montreal, Quebec", filters: ["Ontario", "Manitoba"] },
|
{ location: "Montreal, Quebec", filters: ["Ontario", "Manitoba"] },
|
||||||
{ location: "New York, NY", filters: ["Ontario"] },
|
{ location: "New York, NY", filters: ["Ontario"] },
|
||||||
];
|
];
|
||||||
|
|
||||||
testLocations.forEach(({ location, filters }) => {
|
testLocations.forEach(({ location, filters }) => {
|
||||||
const isValid = validateLocationAgainstFilters(location, filters);
|
const isValid = validateLocationAgainstFilters(location, filters);
|
||||||
demo.info(`Location: "${location}"`);
|
demo.info(`Location: "${location}"`);
|
||||||
demo.info(`Filters: [${filters.join(", ")}]`);
|
demo.info(`Filters: [${filters.join(", ")}]`);
|
||||||
demo.success(`Valid: ${isValid ? "✅ Yes" : "❌ No"}`);
|
demo.success(`Valid: ${isValid ? "✅ Yes" : "❌ No"}`);
|
||||||
console.log();
|
console.log();
|
||||||
});
|
});
|
||||||
|
|
||||||
demo.code("// Profile location extraction:");
|
demo.code("// Profile location extraction:");
|
||||||
const profileTexts = [
|
const profileTexts = [
|
||||||
"Software Engineer at Tech Corp • Toronto, Ontario",
|
"Software Engineer at Tech Corp • Toronto, Ontario",
|
||||||
"Product Manager • Vancouver, BC",
|
"Product Manager • Vancouver, BC",
|
||||||
"Data Scientist • Remote",
|
"Data Scientist • Remote",
|
||||||
"CEO at Startup Inc • Montreal, Quebec, Canada",
|
"CEO at Startup Inc • Montreal, Quebec, Canada",
|
||||||
];
|
];
|
||||||
|
|
||||||
profileTexts.forEach((profileText) => {
|
profileTexts.forEach((profileText) => {
|
||||||
const location = extractLocationFromProfile(profileText);
|
const location = extractLocationFromProfile(profileText);
|
||||||
demo.info(`Profile: "${profileText}"`);
|
demo.info(`Profile: "${profileText}"`);
|
||||||
demo.success(`Extracted: "${location || "No location found"}"`);
|
demo.success(`Extracted: "${location || "No location found"}"`);
|
||||||
console.log();
|
console.log();
|
||||||
});
|
});
|
||||||
|
|
||||||
await waitForEnter();
|
await waitForEnter();
|
||||||
}
|
}
|
||||||
|
|
||||||
async function demonstrateAIAnalysis() {
|
async function demonstrateAIAnalysis() {
|
||||||
demo.section("4. AI Analysis Utilities");
|
demo.section("4. AI Analysis Utilities");
|
||||||
demo.info(
|
demo.info(
|
||||||
"AI utilities provide content analysis using OpenAI or local Ollama models."
|
"AI utilities provide content analysis using OpenAI or local Ollama models."
|
||||||
);
|
);
|
||||||
|
|
||||||
// Mock posts for demo
|
// Mock posts for demo
|
||||||
const mockPosts = [
|
const mockPosts = [
|
||||||
{
|
{
|
||||||
id: "1",
|
id: "1",
|
||||||
content:
|
content:
|
||||||
"Just got laid off from my software engineering role. Looking for new opportunities in Toronto.",
|
"Just got laid off from my software engineering role. Looking for new opportunities in Toronto.",
|
||||||
author: "John Doe",
|
author: "John Doe",
|
||||||
location: "Toronto, Ontario",
|
location: "Toronto, Ontario",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
id: "2",
|
id: "2",
|
||||||
content:
|
content:
|
||||||
"Our company is downsizing and I'm affected. This is really tough news.",
|
"Our company is downsizing and I'm affected. This is really tough news.",
|
||||||
author: "Jane Smith",
|
author: "Jane Smith",
|
||||||
location: "Vancouver, BC",
|
location: "Vancouver, BC",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
id: "3",
|
id: "3",
|
||||||
content:
|
content:
|
||||||
"We're hiring! Looking for talented developers to join our team.",
|
"We're hiring! Looking for talented developers to join our team.",
|
||||||
author: "Bob Wilson",
|
author: "Bob Wilson",
|
||||||
location: "Calgary, Alberta",
|
location: "Calgary, Alberta",
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
demo.code("// Mock AI analysis (simulated):");
|
demo.code("// Mock AI analysis (simulated):");
|
||||||
demo.info("In a real scenario, this would call Ollama or OpenAI API");
|
demo.info("In a real scenario, this would call Ollama or OpenAI API");
|
||||||
|
|
||||||
mockPosts.forEach((post, index) => {
|
mockPosts.forEach((post, index) => {
|
||||||
demo.info(`Post ${index + 1}: ${post.content.substring(0, 50)}...`);
|
demo.info(`Post ${index + 1}: ${post.content.substring(0, 50)}...`);
|
||||||
demo.success(
|
demo.success(
|
||||||
`Analysis: Relevant to job layoffs (confidence: 0.${85 + index * 5})`
|
`Analysis: Relevant to job layoffs (confidence: 0.${85 + index * 5})`
|
||||||
);
|
);
|
||||||
console.log();
|
console.log();
|
||||||
});
|
});
|
||||||
|
|
||||||
demo.code("// Batch analysis simulation:");
|
demo.code("// Batch analysis simulation:");
|
||||||
demo.info("Processing batch of 3 posts...");
|
demo.info("Processing batch of 3 posts...");
|
||||||
await simulateProcessing();
|
await simulateProcessing();
|
||||||
demo.success("Batch analysis completed!");
|
demo.success("Batch analysis completed!");
|
||||||
|
|
||||||
await waitForEnter();
|
await waitForEnter();
|
||||||
}
|
}
|
||||||
|
|
||||||
async function demonstrateIntegration() {
|
async function demonstrateIntegration() {
|
||||||
demo.section("5. Integration Example");
|
demo.section("5. Integration Example");
|
||||||
demo.info("Here's how all utilities work together in a real scenario:");
|
demo.info("Here's how all utilities work together in a real scenario:");
|
||||||
|
|
||||||
const samplePost = {
|
const samplePost = {
|
||||||
id: "demo-1",
|
id: "demo-1",
|
||||||
content:
|
content:
|
||||||
"Just got #laidoff from my job at TechCorp! Looking for new opportunities in #Toronto. This is really tough but I'm staying positive! 🚀",
|
"Just got #laidoff from my job at TechCorp! Looking for new opportunities in #Toronto. This is really tough but I'm staying positive! 🚀",
|
||||||
author: "Demo User",
|
author: "Demo User",
|
||||||
location: "Toronto, Ontario, Canada",
|
location: "Toronto, Ontario, Canada",
|
||||||
};
|
};
|
||||||
|
|
||||||
demo.code("// Processing pipeline:");
|
demo.code("// Processing pipeline:");
|
||||||
|
|
||||||
// 1. Log the start
|
// 1. Log the start
|
||||||
logger.step("Processing new post");
|
logger.step("Processing new post");
|
||||||
|
|
||||||
// 2. Clean the text
|
// 2. Clean the text
|
||||||
const cleanedContent = cleanText(samplePost.content);
|
const cleanedContent = cleanText(samplePost.content);
|
||||||
logger.info(`Cleaned content: ${cleanedContent}`);
|
logger.info(`Cleaned content: ${cleanedContent}`);
|
||||||
|
|
||||||
// 3. Check for keywords
|
// 3. Check for keywords
|
||||||
const keywords = ["layoff", "downsizing", "RIF"];
|
const keywords = ["layoff", "downsizing", "RIF"];
|
||||||
const hasKeywords = containsAnyKeyword(cleanedContent, keywords);
|
const hasKeywords = containsAnyKeyword(cleanedContent, keywords);
|
||||||
logger.search(`Keyword match: ${hasKeywords ? "Found" : "Not found"}`);
|
logger.search(`Keyword match: ${hasKeywords ? "Found" : "Not found"}`);
|
||||||
|
|
||||||
// 4. Validate location
|
// 4. Validate location
|
||||||
const locationFilters = parseLocationFilters("Ontario,Manitoba");
|
const locationFilters = parseLocationFilters("Ontario,Manitoba");
|
||||||
const isValidLocation = validateLocationAgainstFilters(
|
const isValidLocation = validateLocationAgainstFilters(
|
||||||
samplePost.location,
|
samplePost.location,
|
||||||
locationFilters
|
locationFilters
|
||||||
);
|
);
|
||||||
logger.location(`Location valid: ${isValidLocation ? "Yes" : "No"}`);
|
logger.location(`Location valid: ${isValidLocation ? "Yes" : "No"}`);
|
||||||
|
|
||||||
// 5. Simulate AI analysis
|
// 5. Simulate AI analysis
|
||||||
if (hasKeywords && isValidLocation) {
|
if (hasKeywords && isValidLocation) {
|
||||||
logger.ai("Running AI analysis...");
|
logger.ai("Running AI analysis...");
|
||||||
await simulateProcessing();
|
await simulateProcessing();
|
||||||
logger.success("Post accepted and analyzed!");
|
logger.success("Post accepted and analyzed!");
|
||||||
} else {
|
} else {
|
||||||
logger.warning("Post rejected - doesn't meet criteria");
|
logger.warning("Post rejected - doesn't meet criteria");
|
||||||
}
|
}
|
||||||
|
|
||||||
await waitForEnter();
|
await waitForEnter();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper functions
|
// Helper functions
|
||||||
function waitForEnter() {
|
function waitForEnter() {
|
||||||
return new Promise((resolve) => {
|
return new Promise((resolve) => {
|
||||||
const readline = require("readline");
|
const readline = require("readline");
|
||||||
const rl = readline.createInterface({
|
const rl = readline.createInterface({
|
||||||
input: process.stdin,
|
input: process.stdin,
|
||||||
output: process.stdout,
|
output: process.stdout,
|
||||||
});
|
});
|
||||||
|
|
||||||
rl.question("\nPress Enter to continue...", () => {
|
rl.question("\nPress Enter to continue...", () => {
|
||||||
rl.close();
|
rl.close();
|
||||||
resolve();
|
resolve();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async function simulateProcessing() {
|
async function simulateProcessing() {
|
||||||
return new Promise((resolve) => {
|
return new Promise((resolve) => {
|
||||||
const dots = [".", "..", "..."];
|
const dots = [".", "..", "..."];
|
||||||
let i = 0;
|
let i = 0;
|
||||||
const interval = setInterval(() => {
|
const interval = setInterval(() => {
|
||||||
process.stdout.write(`\rProcessing${dots[i]}`);
|
process.stdout.write(`\rProcessing${dots[i]}`);
|
||||||
i = (i + 1) % dots.length;
|
i = (i + 1) % dots.length;
|
||||||
}, 500);
|
}, 500);
|
||||||
|
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
clearInterval(interval);
|
clearInterval(interval);
|
||||||
process.stdout.write("\r");
|
process.stdout.write("\r");
|
||||||
resolve();
|
resolve();
|
||||||
}, 2000);
|
}, 2000);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run the demo if this file is executed directly
|
// Run the demo if this file is executed directly
|
||||||
if (require.main === module) {
|
if (require.main === module) {
|
||||||
runDemo().catch((error) => {
|
runDemo().catch((error) => {
|
||||||
demo.error(`Demo failed: ${error.message}`);
|
demo.error(`Demo failed: ${error.message}`);
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { runDemo };
|
module.exports = { runDemo };
|
||||||
|
|||||||
@ -1,22 +1,22 @@
|
|||||||
/**
|
/**
|
||||||
* ai-analyzer - Core utilities for parsers
|
* ai-analyzer - Core utilities for parsers
|
||||||
* Main entry point that exports all modules
|
* Main entry point that exports all modules
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// Export all utilities with clean namespace
|
// Export all utilities with clean namespace
|
||||||
module.exports = {
|
module.exports = {
|
||||||
// Logger utilities
|
// Logger utilities
|
||||||
...require("./src/logger"),
|
...require("./src/logger"),
|
||||||
|
|
||||||
// AI analysis utilities
|
// AI analysis utilities
|
||||||
...require("./src/ai-utils"),
|
...require("./src/ai-utils"),
|
||||||
|
|
||||||
// Text processing utilities
|
// Text processing utilities
|
||||||
...require("./src/text-utils"),
|
...require("./src/text-utils"),
|
||||||
|
|
||||||
// Location validation utilities
|
// Location validation utilities
|
||||||
...require("./src/location-utils"),
|
...require("./src/location-utils"),
|
||||||
|
|
||||||
// Test utilities
|
// Test utilities
|
||||||
...require("./src/test-utils"),
|
...require("./src/test-utils"),
|
||||||
};
|
};
|
||||||
|
|||||||
7428
ai-analyzer/package-lock.json
generated
7428
ai-analyzer/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -1,301 +1,301 @@
|
|||||||
const { logger } = require("./logger");
|
const { logger } = require("./logger");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* AI Analysis utilities for post processing with Ollama
|
* AI Analysis utilities for post processing with Ollama
|
||||||
* Extracted from ai-analyzer-local.js for reuse across parsers
|
* Extracted from ai-analyzer-local.js for reuse across parsers
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if Ollama is running and the model is available
|
* Check if Ollama is running and the model is available
|
||||||
*/
|
*/
|
||||||
async function checkOllamaStatus(
|
async function checkOllamaStatus(
|
||||||
model = "mistral",
|
model = "mistral",
|
||||||
ollamaHost = "http://localhost:11434"
|
ollamaHost = "http://localhost:11434"
|
||||||
) {
|
) {
|
||||||
try {
|
try {
|
||||||
// Check if Ollama is running
|
// Check if Ollama is running
|
||||||
const response = await fetch(`${ollamaHost}/api/tags`);
|
const response = await fetch(`${ollamaHost}/api/tags`);
|
||||||
if (!response.ok) {
|
if (!response.ok) {
|
||||||
throw new Error(`Ollama not running on ${ollamaHost}`);
|
throw new Error(`Ollama not running on ${ollamaHost}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const data = await response.json();
|
const data = await response.json();
|
||||||
const availableModels = data.models.map((m) => m.name);
|
const availableModels = data.models.map((m) => m.name);
|
||||||
|
|
||||||
logger.ai("Ollama is running");
|
logger.ai("Ollama is running");
|
||||||
logger.info(
|
logger.info(
|
||||||
`📦 Available models: ${availableModels
|
`📦 Available models: ${availableModels
|
||||||
.map((m) => m.split(":")[0])
|
.map((m) => m.split(":")[0])
|
||||||
.join(", ")}`
|
.join(", ")}`
|
||||||
);
|
);
|
||||||
|
|
||||||
// Check if requested model is available
|
// Check if requested model is available
|
||||||
const modelExists = availableModels.some((m) => m.startsWith(model));
|
const modelExists = availableModels.some((m) => m.startsWith(model));
|
||||||
if (!modelExists) {
|
if (!modelExists) {
|
||||||
logger.error(`Model "${model}" not found`);
|
logger.error(`Model "${model}" not found`);
|
||||||
logger.error(`💡 Install it with: ollama pull ${model}`);
|
logger.error(`💡 Install it with: ollama pull ${model}`);
|
||||||
logger.error(
|
logger.error(
|
||||||
`💡 Or choose from: ${availableModels
|
`💡 Or choose from: ${availableModels
|
||||||
.map((m) => m.split(":")[0])
|
.map((m) => m.split(":")[0])
|
||||||
.join(", ")}`
|
.join(", ")}`
|
||||||
);
|
);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.success(`Using model: ${model}`);
|
logger.success(`Using model: ${model}`);
|
||||||
return true;
|
return true;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(`Error connecting to Ollama: ${error.message}`);
|
logger.error(`Error connecting to Ollama: ${error.message}`);
|
||||||
logger.error("💡 Make sure Ollama is installed and running:");
|
logger.error("💡 Make sure Ollama is installed and running:");
|
||||||
logger.error(" 1. Install: https://ollama.ai/");
|
logger.error(" 1. Install: https://ollama.ai/");
|
||||||
logger.error(" 2. Start: ollama serve");
|
logger.error(" 2. Start: ollama serve");
|
||||||
logger.error(` 3. Install model: ollama pull ${model}`);
|
logger.error(` 3. Install model: ollama pull ${model}`);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Analyze multiple posts using local Ollama
|
* Analyze multiple posts using local Ollama
|
||||||
*/
|
*/
|
||||||
async function analyzeBatch(
|
async function analyzeBatch(
|
||||||
posts,
|
posts,
|
||||||
context,
|
context,
|
||||||
model = "mistral",
|
model = "mistral",
|
||||||
ollamaHost = "http://localhost:11434"
|
ollamaHost = "http://localhost:11434"
|
||||||
) {
|
) {
|
||||||
logger.ai(`Analyzing batch of ${posts.length} posts with ${model}...`);
|
logger.ai(`Analyzing batch of ${posts.length} posts with ${model}...`);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const prompt = `You are an expert at analyzing LinkedIn posts for relevance to specific contexts.
|
const prompt = `You are an expert at analyzing LinkedIn posts for relevance to specific contexts.
|
||||||
|
|
||||||
CONTEXT TO MATCH: "${context}"
|
CONTEXT TO MATCH: "${context}"
|
||||||
|
|
||||||
Analyze these ${
|
Analyze these ${
|
||||||
posts.length
|
posts.length
|
||||||
} LinkedIn posts and determine if each relates to the context above.
|
} LinkedIn posts and determine if each relates to the context above.
|
||||||
|
|
||||||
POSTS:
|
POSTS:
|
||||||
${posts
|
${posts
|
||||||
.map(
|
.map(
|
||||||
(post, i) => `
|
(post, i) => `
|
||||||
POST ${i + 1}:
|
POST ${i + 1}:
|
||||||
"${post.text.substring(0, 400)}${post.text.length > 400 ? "..." : ""}"
|
"${post.text.substring(0, 400)}${post.text.length > 400 ? "..." : ""}"
|
||||||
`
|
`
|
||||||
)
|
)
|
||||||
.join("")}
|
.join("")}
|
||||||
|
|
||||||
For each post, provide:
|
For each post, provide:
|
||||||
- Is it relevant to "${context}"? (YES/NO)
|
- Is it relevant to "${context}"? (YES/NO)
|
||||||
- Confidence level (0.0 to 1.0)
|
- Confidence level (0.0 to 1.0)
|
||||||
- Brief reasoning
|
- Brief reasoning
|
||||||
|
|
||||||
Respond in this EXACT format for each post:
|
Respond in this EXACT format for each post:
|
||||||
POST 1: YES/NO | 0.X | brief reason
|
POST 1: YES/NO | 0.X | brief reason
|
||||||
POST 2: YES/NO | 0.X | brief reason
|
POST 2: YES/NO | 0.X | brief reason
|
||||||
POST 3: YES/NO | 0.X | brief reason
|
POST 3: YES/NO | 0.X | brief reason
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
- For layoff context: "laid off 50 employees" = YES | 0.9 | mentions layoffs
|
- For layoff context: "laid off 50 employees" = YES | 0.9 | mentions layoffs
|
||||||
- For hiring context: "we're hiring developers" = YES | 0.8 | job posting
|
- For hiring context: "we're hiring developers" = YES | 0.8 | job posting
|
||||||
- Unrelated content = NO | 0.1 | not relevant to context`;
|
- Unrelated content = NO | 0.1 | not relevant to context`;
|
||||||
|
|
||||||
const response = await fetch(`${ollamaHost}/api/generate`, {
|
const response = await fetch(`${ollamaHost}/api/generate`, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: {
|
headers: {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
},
|
},
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
model: model,
|
model: model,
|
||||||
prompt: prompt,
|
prompt: prompt,
|
||||||
stream: false,
|
stream: false,
|
||||||
options: {
|
options: {
|
||||||
temperature: 0.3,
|
temperature: 0.3,
|
||||||
top_p: 0.9,
|
top_p: 0.9,
|
||||||
},
|
},
|
||||||
}),
|
}),
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!response.ok) {
|
if (!response.ok) {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
`Ollama API error: ${response.status} ${response.statusText}`
|
`Ollama API error: ${response.status} ${response.statusText}`
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
const data = await response.json();
|
const data = await response.json();
|
||||||
const aiResponse = data.response.trim();
|
const aiResponse = data.response.trim();
|
||||||
|
|
||||||
// Parse the response
|
// Parse the response
|
||||||
const analyses = [];
|
const analyses = [];
|
||||||
const lines = aiResponse.split("\n").filter((line) => line.trim());
|
const lines = aiResponse.split("\n").filter((line) => line.trim());
|
||||||
|
|
||||||
for (let i = 0; i < posts.length; i++) {
|
for (let i = 0; i < posts.length; i++) {
|
||||||
let analysis = {
|
let analysis = {
|
||||||
postIndex: i + 1,
|
postIndex: i + 1,
|
||||||
isRelevant: false,
|
isRelevant: false,
|
||||||
confidence: 0.5,
|
confidence: 0.5,
|
||||||
reasoning: "Could not parse AI response",
|
reasoning: "Could not parse AI response",
|
||||||
};
|
};
|
||||||
|
|
||||||
// Look for lines that match "POST X:" pattern
|
// Look for lines that match "POST X:" pattern
|
||||||
const postPattern = new RegExp(`POST\\s*${i + 1}:?\\s*(.+)`, "i");
|
const postPattern = new RegExp(`POST\\s*${i + 1}:?\\s*(.+)`, "i");
|
||||||
|
|
||||||
for (const line of lines) {
|
for (const line of lines) {
|
||||||
const match = line.match(postPattern);
|
const match = line.match(postPattern);
|
||||||
if (match) {
|
if (match) {
|
||||||
const content = match[1].trim();
|
const content = match[1].trim();
|
||||||
|
|
||||||
// Parse: YES/NO | 0.X | reasoning
|
// Parse: YES/NO | 0.X | reasoning
|
||||||
const parts = content.split("|").map((p) => p.trim());
|
const parts = content.split("|").map((p) => p.trim());
|
||||||
|
|
||||||
if (parts.length >= 3) {
|
if (parts.length >= 3) {
|
||||||
analysis.isRelevant = parts[0].toUpperCase().includes("YES");
|
analysis.isRelevant = parts[0].toUpperCase().includes("YES");
|
||||||
analysis.confidence = Math.max(
|
analysis.confidence = Math.max(
|
||||||
0,
|
0,
|
||||||
Math.min(1, parseFloat(parts[1]) || 0.5)
|
Math.min(1, parseFloat(parts[1]) || 0.5)
|
||||||
);
|
);
|
||||||
analysis.reasoning = parts[2] || "No reasoning provided";
|
analysis.reasoning = parts[2] || "No reasoning provided";
|
||||||
} else {
|
} else {
|
||||||
// Fallback parsing
|
// Fallback parsing
|
||||||
analysis.isRelevant =
|
analysis.isRelevant =
|
||||||
content.toUpperCase().includes("YES") ||
|
content.toUpperCase().includes("YES") ||
|
||||||
content.toLowerCase().includes("relevant");
|
content.toLowerCase().includes("relevant");
|
||||||
analysis.confidence = 0.6;
|
analysis.confidence = 0.6;
|
||||||
analysis.reasoning = content.substring(0, 100);
|
analysis.reasoning = content.substring(0, 100);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
analyses.push(analysis);
|
analyses.push(analysis);
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we didn't get enough analyses, fill in defaults
|
// If we didn't get enough analyses, fill in defaults
|
||||||
while (analyses.length < posts.length) {
|
while (analyses.length < posts.length) {
|
||||||
analyses.push({
|
analyses.push({
|
||||||
postIndex: analyses.length + 1,
|
postIndex: analyses.length + 1,
|
||||||
isRelevant: false,
|
isRelevant: false,
|
||||||
confidence: 0.3,
|
confidence: 0.3,
|
||||||
reasoning: "AI response parsing failed",
|
reasoning: "AI response parsing failed",
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
return analyses;
|
return analyses;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(`Error in batch AI analysis: ${error.message}`);
|
logger.error(`Error in batch AI analysis: ${error.message}`);
|
||||||
|
|
||||||
// Fallback: mark all as relevant with low confidence
|
// Fallback: mark all as relevant with low confidence
|
||||||
return posts.map((_, i) => ({
|
return posts.map((_, i) => ({
|
||||||
postIndex: i + 1,
|
postIndex: i + 1,
|
||||||
isRelevant: true,
|
isRelevant: true,
|
||||||
confidence: 0.3,
|
confidence: 0.3,
|
||||||
reasoning: `Analysis failed: ${error.message}`,
|
reasoning: `Analysis failed: ${error.message}`,
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Analyze a single post using local Ollama (fallback)
|
* Analyze a single post using local Ollama (fallback)
|
||||||
*/
|
*/
|
||||||
async function analyzeSinglePost(
|
async function analyzeSinglePost(
|
||||||
text,
|
text,
|
||||||
context,
|
context,
|
||||||
model = "mistral",
|
model = "mistral",
|
||||||
ollamaHost = "http://localhost:11434"
|
ollamaHost = "http://localhost:11434"
|
||||||
) {
|
) {
|
||||||
const prompt = `Analyze this LinkedIn post for relevance to: "${context}"
|
const prompt = `Analyze this LinkedIn post for relevance to: "${context}"
|
||||||
|
|
||||||
Post: "${text}"
|
Post: "${text}"
|
||||||
|
|
||||||
Is this post relevant to "${context}"? Provide:
|
Is this post relevant to "${context}"? Provide:
|
||||||
1. YES or NO
|
1. YES or NO
|
||||||
2. Confidence (0.0 to 1.0)
|
2. Confidence (0.0 to 1.0)
|
||||||
3. Brief reason
|
3. Brief reason
|
||||||
|
|
||||||
Format: YES/NO | 0.X | reason`;
|
Format: YES/NO | 0.X | reason`;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response = await fetch(`${ollamaHost}/api/generate`, {
|
const response = await fetch(`${ollamaHost}/api/generate`, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: {
|
headers: {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
},
|
},
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
model: model,
|
model: model,
|
||||||
prompt: prompt,
|
prompt: prompt,
|
||||||
stream: false,
|
stream: false,
|
||||||
options: {
|
options: {
|
||||||
temperature: 0.3,
|
temperature: 0.3,
|
||||||
},
|
},
|
||||||
}),
|
}),
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!response.ok) {
|
if (!response.ok) {
|
||||||
throw new Error(`Ollama API error: ${response.status}`);
|
throw new Error(`Ollama API error: ${response.status}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const data = await response.json();
|
const data = await response.json();
|
||||||
const aiResponse = data.response.trim();
|
const aiResponse = data.response.trim();
|
||||||
|
|
||||||
// Parse response
|
// Parse response
|
||||||
const parts = aiResponse.split("|").map((p) => p.trim());
|
const parts = aiResponse.split("|").map((p) => p.trim());
|
||||||
|
|
||||||
if (parts.length >= 3) {
|
if (parts.length >= 3) {
|
||||||
return {
|
return {
|
||||||
isRelevant: parts[0].toUpperCase().includes("YES"),
|
isRelevant: parts[0].toUpperCase().includes("YES"),
|
||||||
confidence: Math.max(0, Math.min(1, parseFloat(parts[1]) || 0.5)),
|
confidence: Math.max(0, Math.min(1, parseFloat(parts[1]) || 0.5)),
|
||||||
reasoning: parts[2],
|
reasoning: parts[2],
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
// Fallback parsing
|
// Fallback parsing
|
||||||
return {
|
return {
|
||||||
isRelevant:
|
isRelevant:
|
||||||
aiResponse.toLowerCase().includes("yes") ||
|
aiResponse.toLowerCase().includes("yes") ||
|
||||||
aiResponse.toLowerCase().includes("relevant"),
|
aiResponse.toLowerCase().includes("relevant"),
|
||||||
confidence: 0.6,
|
confidence: 0.6,
|
||||||
reasoning: aiResponse.substring(0, 100),
|
reasoning: aiResponse.substring(0, 100),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
return {
|
return {
|
||||||
isRelevant: true, // Default to include on error
|
isRelevant: true, // Default to include on error
|
||||||
confidence: 0.3,
|
confidence: 0.3,
|
||||||
reasoning: `Analysis failed: ${error.message}`,
|
reasoning: `Analysis failed: ${error.message}`,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find the most recent results file if none specified
|
* Find the most recent results file if none specified
|
||||||
*/
|
*/
|
||||||
function findLatestResultsFile(resultsDir = "results") {
|
function findLatestResultsFile(resultsDir = "results") {
|
||||||
const fs = require("fs");
|
const fs = require("fs");
|
||||||
const path = require("path");
|
const path = require("path");
|
||||||
|
|
||||||
if (!fs.existsSync(resultsDir)) {
|
if (!fs.existsSync(resultsDir)) {
|
||||||
throw new Error("Results directory not found. Run the scraper first.");
|
throw new Error("Results directory not found. Run the scraper first.");
|
||||||
}
|
}
|
||||||
|
|
||||||
const files = fs
|
const files = fs
|
||||||
.readdirSync(resultsDir)
|
.readdirSync(resultsDir)
|
||||||
.filter(
|
.filter(
|
||||||
(f) =>
|
(f) =>
|
||||||
(f.startsWith("results-") || f.startsWith("linkedin-results-")) &&
|
(f.startsWith("results-") || f.startsWith("linkedin-results-")) &&
|
||||||
f.endsWith(".json") &&
|
f.endsWith(".json") &&
|
||||||
!f.includes("-ai-")
|
!f.includes("-ai-")
|
||||||
)
|
)
|
||||||
.sort()
|
.sort()
|
||||||
.reverse();
|
.reverse();
|
||||||
|
|
||||||
if (files.length === 0) {
|
if (files.length === 0) {
|
||||||
throw new Error("No results files found. Run the scraper first.");
|
throw new Error("No results files found. Run the scraper first.");
|
||||||
}
|
}
|
||||||
|
|
||||||
return path.join(resultsDir, files[0]);
|
return path.join(resultsDir, files[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
checkOllamaStatus,
|
checkOllamaStatus,
|
||||||
analyzeBatch,
|
analyzeBatch,
|
||||||
analyzeSinglePost,
|
analyzeSinglePost,
|
||||||
findLatestResultsFile,
|
findLatestResultsFile,
|
||||||
};
|
};
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -1,123 +1,123 @@
|
|||||||
const chalk = require("chalk");
|
const chalk = require("chalk");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Configurable logger with color support and level controls
|
* Configurable logger with color support and level controls
|
||||||
* Can enable/disable different log levels: debug, info, warning, error, success
|
* Can enable/disable different log levels: debug, info, warning, error, success
|
||||||
*/
|
*/
|
||||||
class Logger {
|
class Logger {
|
||||||
constructor(options = {}) {
|
constructor(options = {}) {
|
||||||
this.levels = {
|
this.levels = {
|
||||||
debug: options.debug !== false,
|
debug: options.debug !== false,
|
||||||
info: options.info !== false,
|
info: options.info !== false,
|
||||||
warning: options.warning !== false,
|
warning: options.warning !== false,
|
||||||
error: options.error !== false,
|
error: options.error !== false,
|
||||||
success: options.success !== false,
|
success: options.success !== false,
|
||||||
};
|
};
|
||||||
this.colors = options.colors !== false;
|
this.colors = options.colors !== false;
|
||||||
}
|
}
|
||||||
|
|
||||||
_formatMessage(level, message, prefix = "") {
|
_formatMessage(level, message, prefix = "") {
|
||||||
const timestamp = new Date().toLocaleTimeString();
|
const timestamp = new Date().toLocaleTimeString();
|
||||||
const fullMessage = `${prefix}${message}`;
|
const fullMessage = `${prefix}${message}`;
|
||||||
|
|
||||||
if (!this.colors) {
|
if (!this.colors) {
|
||||||
return `[${timestamp}] [${level.toUpperCase()}] ${fullMessage}`;
|
return `[${timestamp}] [${level.toUpperCase()}] ${fullMessage}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (level) {
|
switch (level) {
|
||||||
case "debug":
|
case "debug":
|
||||||
return chalk.gray(`[${timestamp}] [DEBUG] ${fullMessage}`);
|
return chalk.gray(`[${timestamp}] [DEBUG] ${fullMessage}`);
|
||||||
case "info":
|
case "info":
|
||||||
return chalk.blue(`[${timestamp}] [INFO] ${fullMessage}`);
|
return chalk.blue(`[${timestamp}] [INFO] ${fullMessage}`);
|
||||||
case "warning":
|
case "warning":
|
||||||
return chalk.yellow(`[${timestamp}] [WARNING] ${fullMessage}`);
|
return chalk.yellow(`[${timestamp}] [WARNING] ${fullMessage}`);
|
||||||
case "error":
|
case "error":
|
||||||
return chalk.red(`[${timestamp}] [ERROR] ${fullMessage}`);
|
return chalk.red(`[${timestamp}] [ERROR] ${fullMessage}`);
|
||||||
case "success":
|
case "success":
|
||||||
return chalk.green(`[${timestamp}] [SUCCESS] ${fullMessage}`);
|
return chalk.green(`[${timestamp}] [SUCCESS] ${fullMessage}`);
|
||||||
default:
|
default:
|
||||||
return `[${timestamp}] [${level.toUpperCase()}] ${fullMessage}`;
|
return `[${timestamp}] [${level.toUpperCase()}] ${fullMessage}`;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
debug(message) {
|
debug(message) {
|
||||||
if (this.levels.debug) {
|
if (this.levels.debug) {
|
||||||
console.log(this._formatMessage("debug", message));
|
console.log(this._formatMessage("debug", message));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
info(message) {
|
info(message) {
|
||||||
if (this.levels.info) {
|
if (this.levels.info) {
|
||||||
console.log(this._formatMessage("info", message));
|
console.log(this._formatMessage("info", message));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
warning(message) {
|
warning(message) {
|
||||||
if (this.levels.warning) {
|
if (this.levels.warning) {
|
||||||
console.warn(this._formatMessage("warning", message));
|
console.warn(this._formatMessage("warning", message));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
error(message) {
|
error(message) {
|
||||||
if (this.levels.error) {
|
if (this.levels.error) {
|
||||||
console.error(this._formatMessage("error", message));
|
console.error(this._formatMessage("error", message));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
success(message) {
|
success(message) {
|
||||||
if (this.levels.success) {
|
if (this.levels.success) {
|
||||||
console.log(this._formatMessage("success", message));
|
console.log(this._formatMessage("success", message));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convenience methods with emoji prefixes for better UX
|
// Convenience methods with emoji prefixes for better UX
|
||||||
step(message) {
|
step(message) {
|
||||||
this.info(`🚀 ${message}`);
|
this.info(`🚀 ${message}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
search(message) {
|
search(message) {
|
||||||
this.info(`🔍 ${message}`);
|
this.info(`🔍 ${message}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
ai(message) {
|
ai(message) {
|
||||||
this.info(`🧠 ${message}`);
|
this.info(`🧠 ${message}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
location(message) {
|
location(message) {
|
||||||
this.info(`📍 ${message}`);
|
this.info(`📍 ${message}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
file(message) {
|
file(message) {
|
||||||
this.info(`📄 ${message}`);
|
this.info(`📄 ${message}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Configure logger levels at runtime
|
// Configure logger levels at runtime
|
||||||
setLevel(level, enabled) {
|
setLevel(level, enabled) {
|
||||||
if (this.levels.hasOwnProperty(level)) {
|
if (this.levels.hasOwnProperty(level)) {
|
||||||
this.levels[level] = enabled;
|
this.levels[level] = enabled;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Disable all logging
|
// Disable all logging
|
||||||
silent() {
|
silent() {
|
||||||
Object.keys(this.levels).forEach((level) => {
|
Object.keys(this.levels).forEach((level) => {
|
||||||
this.levels[level] = false;
|
this.levels[level] = false;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enable all logging
|
// Enable all logging
|
||||||
verbose() {
|
verbose() {
|
||||||
Object.keys(this.levels).forEach((level) => {
|
Object.keys(this.levels).forEach((level) => {
|
||||||
this.levels[level] = true;
|
this.levels[level] = true;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create default logger instance
|
// Create default logger instance
|
||||||
const logger = new Logger();
|
const logger = new Logger();
|
||||||
|
|
||||||
// Export both the class and default instance
|
// Export both the class and default instance
|
||||||
module.exports = {
|
module.exports = {
|
||||||
Logger,
|
Logger,
|
||||||
logger,
|
logger,
|
||||||
};
|
};
|
||||||
|
|||||||
@ -1,124 +1,124 @@
|
|||||||
/**
|
/**
|
||||||
* Shared test utilities for parsers
|
* Shared test utilities for parsers
|
||||||
* Common mocks, helpers, and test data
|
* Common mocks, helpers, and test data
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Mock Playwright page object for testing
|
* Mock Playwright page object for testing
|
||||||
*/
|
*/
|
||||||
function createMockPage() {
|
function createMockPage() {
|
||||||
return {
|
return {
|
||||||
goto: jest.fn().mockResolvedValue(undefined),
|
goto: jest.fn().mockResolvedValue(undefined),
|
||||||
waitForSelector: jest.fn().mockResolvedValue(undefined),
|
waitForSelector: jest.fn().mockResolvedValue(undefined),
|
||||||
$$: jest.fn().mockResolvedValue([]),
|
$$: jest.fn().mockResolvedValue([]),
|
||||||
$: jest.fn().mockResolvedValue(null),
|
$: jest.fn().mockResolvedValue(null),
|
||||||
textContent: jest.fn().mockResolvedValue(""),
|
textContent: jest.fn().mockResolvedValue(""),
|
||||||
close: jest.fn().mockResolvedValue(undefined),
|
close: jest.fn().mockResolvedValue(undefined),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Mock fetch for AI API calls
|
* Mock fetch for AI API calls
|
||||||
*/
|
*/
|
||||||
function createMockFetch(response = {}) {
|
function createMockFetch(response = {}) {
|
||||||
return jest.fn().mockResolvedValue({
|
return jest.fn().mockResolvedValue({
|
||||||
ok: true,
|
ok: true,
|
||||||
status: 200,
|
status: 200,
|
||||||
json: jest.fn().mockResolvedValue(response),
|
json: jest.fn().mockResolvedValue(response),
|
||||||
...response,
|
...response,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sample test data for posts
|
* Sample test data for posts
|
||||||
*/
|
*/
|
||||||
const samplePosts = [
|
const samplePosts = [
|
||||||
{
|
{
|
||||||
text: "We are laying off 100 employees due to economic downturn.",
|
text: "We are laying off 100 employees due to economic downturn.",
|
||||||
keyword: "layoff",
|
keyword: "layoff",
|
||||||
profileLink: "https://linkedin.com/in/test-user-1",
|
profileLink: "https://linkedin.com/in/test-user-1",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
text: "Exciting opportunity! We are hiring senior developers for our team.",
|
text: "Exciting opportunity! We are hiring senior developers for our team.",
|
||||||
keyword: "hiring",
|
keyword: "hiring",
|
||||||
profileLink: "https://linkedin.com/in/test-user-2",
|
profileLink: "https://linkedin.com/in/test-user-2",
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sample location test data
|
* Sample location test data
|
||||||
*/
|
*/
|
||||||
const sampleLocations = [
|
const sampleLocations = [
|
||||||
"Toronto, Ontario, Canada",
|
"Toronto, Ontario, Canada",
|
||||||
"Vancouver, BC",
|
"Vancouver, BC",
|
||||||
"Calgary, Alberta",
|
"Calgary, Alberta",
|
||||||
"Montreal, Quebec",
|
"Montreal, Quebec",
|
||||||
"Halifax, Nova Scotia",
|
"Halifax, Nova Scotia",
|
||||||
];
|
];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Common test assertions
|
* Common test assertions
|
||||||
*/
|
*/
|
||||||
function expectValidPost(post) {
|
function expectValidPost(post) {
|
||||||
expect(post).toHaveProperty("text");
|
expect(post).toHaveProperty("text");
|
||||||
expect(post).toHaveProperty("keyword");
|
expect(post).toHaveProperty("keyword");
|
||||||
expect(post).toHaveProperty("profileLink");
|
expect(post).toHaveProperty("profileLink");
|
||||||
expect(typeof post.text).toBe("string");
|
expect(typeof post.text).toBe("string");
|
||||||
expect(post.text.length).toBeGreaterThan(0);
|
expect(post.text.length).toBeGreaterThan(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
function expectValidAIAnalysis(analysis) {
|
function expectValidAIAnalysis(analysis) {
|
||||||
expect(analysis).toHaveProperty("isRelevant");
|
expect(analysis).toHaveProperty("isRelevant");
|
||||||
expect(analysis).toHaveProperty("confidence");
|
expect(analysis).toHaveProperty("confidence");
|
||||||
expect(analysis).toHaveProperty("reasoning");
|
expect(analysis).toHaveProperty("reasoning");
|
||||||
expect(typeof analysis.isRelevant).toBe("boolean");
|
expect(typeof analysis.isRelevant).toBe("boolean");
|
||||||
expect(analysis.confidence).toBeGreaterThanOrEqual(0);
|
expect(analysis.confidence).toBeGreaterThanOrEqual(0);
|
||||||
expect(analysis.confidence).toBeLessThanOrEqual(1);
|
expect(analysis.confidence).toBeLessThanOrEqual(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
function expectValidLocation(location) {
|
function expectValidLocation(location) {
|
||||||
expect(typeof location).toBe("string");
|
expect(typeof location).toBe("string");
|
||||||
expect(location.length).toBeGreaterThan(0);
|
expect(location.length).toBeGreaterThan(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test environment setup
|
* Test environment setup
|
||||||
*/
|
*/
|
||||||
function setupTestEnv() {
|
function setupTestEnv() {
|
||||||
// Mock environment variables
|
// Mock environment variables
|
||||||
process.env.NODE_ENV = "test";
|
process.env.NODE_ENV = "test";
|
||||||
process.env.OLLAMA_HOST = "http://localhost:11434";
|
process.env.OLLAMA_HOST = "http://localhost:11434";
|
||||||
process.env.AI_CONTEXT = "test context";
|
process.env.AI_CONTEXT = "test context";
|
||||||
|
|
||||||
// Suppress console output during tests
|
// Suppress console output during tests
|
||||||
jest.spyOn(console, "log").mockImplementation(() => {});
|
jest.spyOn(console, "log").mockImplementation(() => {});
|
||||||
jest.spyOn(console, "error").mockImplementation(() => {});
|
jest.spyOn(console, "error").mockImplementation(() => {});
|
||||||
jest.spyOn(console, "warn").mockImplementation(() => {});
|
jest.spyOn(console, "warn").mockImplementation(() => {});
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Clean up test environment
|
* Clean up test environment
|
||||||
*/
|
*/
|
||||||
function teardownTestEnv() {
|
function teardownTestEnv() {
|
||||||
// Restore console
|
// Restore console
|
||||||
console.log.mockRestore();
|
console.log.mockRestore();
|
||||||
console.error.mockRestore();
|
console.error.mockRestore();
|
||||||
console.warn.mockRestore();
|
console.warn.mockRestore();
|
||||||
|
|
||||||
// Clear environment
|
// Clear environment
|
||||||
delete process.env.NODE_ENV;
|
delete process.env.NODE_ENV;
|
||||||
delete process.env.OLLAMA_HOST;
|
delete process.env.OLLAMA_HOST;
|
||||||
delete process.env.AI_CONTEXT;
|
delete process.env.AI_CONTEXT;
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
createMockPage,
|
createMockPage,
|
||||||
createMockFetch,
|
createMockFetch,
|
||||||
samplePosts,
|
samplePosts,
|
||||||
sampleLocations,
|
sampleLocations,
|
||||||
expectValidPost,
|
expectValidPost,
|
||||||
expectValidAIAnalysis,
|
expectValidAIAnalysis,
|
||||||
expectValidLocation,
|
expectValidLocation,
|
||||||
setupTestEnv,
|
setupTestEnv,
|
||||||
teardownTestEnv,
|
teardownTestEnv,
|
||||||
};
|
};
|
||||||
|
|||||||
@ -1,107 +1,107 @@
|
|||||||
/**
|
/**
|
||||||
* Text processing utilities for cleaning and validating content
|
* Text processing utilities for cleaning and validating content
|
||||||
* Extracted from linkedout.js for reuse across parsers
|
* Extracted from linkedout.js for reuse across parsers
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Clean text by removing hashtags, URLs, emojis, and normalizing whitespace
|
* Clean text by removing hashtags, URLs, emojis, and normalizing whitespace
|
||||||
*/
|
*/
|
||||||
function cleanText(text) {
|
function cleanText(text) {
|
||||||
if (!text || typeof text !== "string") {
|
if (!text || typeof text !== "string") {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove hashtags
|
// Remove hashtags
|
||||||
text = text.replace(/#\w+/g, "");
|
text = text.replace(/#\w+/g, "");
|
||||||
|
|
||||||
// Remove hashtag mentions
|
// Remove hashtag mentions
|
||||||
text = text.replace(/\bhashtag\b/gi, "");
|
text = text.replace(/\bhashtag\b/gi, "");
|
||||||
text = text.replace(/hashtag-\w+/gi, "");
|
text = text.replace(/hashtag-\w+/gi, "");
|
||||||
|
|
||||||
// Remove URLs
|
// Remove URLs
|
||||||
text = text.replace(/https?:\/\/[^\s]+/g, "");
|
text = text.replace(/https?:\/\/[^\s]+/g, "");
|
||||||
|
|
||||||
// Remove emojis (Unicode ranges for common emoji)
|
// Remove emojis (Unicode ranges for common emoji)
|
||||||
text = text.replace(
|
text = text.replace(
|
||||||
/[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F1E0}-\u{1F1FF}]/gu,
|
/[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F1E0}-\u{1F1FF}]/gu,
|
||||||
""
|
""
|
||||||
);
|
);
|
||||||
|
|
||||||
// Normalize whitespace
|
// Normalize whitespace
|
||||||
text = text.replace(/\s+/g, " ").trim();
|
text = text.replace(/\s+/g, " ").trim();
|
||||||
|
|
||||||
return text;
|
return text;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if text contains any of the specified keywords (case insensitive)
|
* Check if text contains any of the specified keywords (case insensitive)
|
||||||
*/
|
*/
|
||||||
function containsAnyKeyword(text, keywords) {
|
function containsAnyKeyword(text, keywords) {
|
||||||
if (!text || !Array.isArray(keywords)) {
|
if (!text || !Array.isArray(keywords)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const lowerText = text.toLowerCase();
|
const lowerText = text.toLowerCase();
|
||||||
return keywords.some((keyword) => lowerText.includes(keyword.toLowerCase()));
|
return keywords.some((keyword) => lowerText.includes(keyword.toLowerCase()));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validate if text meets basic quality criteria
|
* Validate if text meets basic quality criteria
|
||||||
*/
|
*/
|
||||||
function isValidText(text, minLength = 30) {
|
function isValidText(text, minLength = 30) {
|
||||||
if (!text || typeof text !== "string") {
|
if (!text || typeof text !== "string") {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check minimum length
|
// Check minimum length
|
||||||
if (text.length < minLength) {
|
if (text.length < minLength) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if text contains alphanumeric characters
|
// Check if text contains alphanumeric characters
|
||||||
if (!/[a-zA-Z0-9]/.test(text)) {
|
if (!/[a-zA-Z0-9]/.test(text)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract domain from URL
|
* Extract domain from URL
|
||||||
*/
|
*/
|
||||||
function extractDomain(url) {
|
function extractDomain(url) {
|
||||||
if (!url || typeof url !== "string") {
|
if (!url || typeof url !== "string") {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const urlObj = new URL(url);
|
const urlObj = new URL(url);
|
||||||
return urlObj.hostname;
|
return urlObj.hostname;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Normalize URL by removing query parameters and fragments
|
* Normalize URL by removing query parameters and fragments
|
||||||
*/
|
*/
|
||||||
function normalizeUrl(url) {
|
function normalizeUrl(url) {
|
||||||
if (!url || typeof url !== "string") {
|
if (!url || typeof url !== "string") {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const urlObj = new URL(url);
|
const urlObj = new URL(url);
|
||||||
return `${urlObj.protocol}//${urlObj.hostname}${urlObj.pathname}`;
|
return `${urlObj.protocol}//${urlObj.hostname}${urlObj.pathname}`;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
return url;
|
return url;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
cleanText,
|
cleanText,
|
||||||
containsAnyKeyword,
|
containsAnyKeyword,
|
||||||
isValidText,
|
isValidText,
|
||||||
extractDomain,
|
extractDomain,
|
||||||
normalizeUrl,
|
normalizeUrl,
|
||||||
};
|
};
|
||||||
|
|||||||
@ -1,194 +1,194 @@
|
|||||||
/**
|
/**
|
||||||
* Test file for logger functionality
|
* Test file for logger functionality
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const { Logger, logger } = require("../src/logger");
|
const { Logger, logger } = require("../src/logger");
|
||||||
|
|
||||||
describe("Logger", () => {
|
describe("Logger", () => {
|
||||||
let consoleSpy;
|
let consoleSpy;
|
||||||
let consoleWarnSpy;
|
let consoleWarnSpy;
|
||||||
let consoleErrorSpy;
|
let consoleErrorSpy;
|
||||||
|
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
consoleSpy = jest.spyOn(console, "log").mockImplementation();
|
consoleSpy = jest.spyOn(console, "log").mockImplementation();
|
||||||
consoleWarnSpy = jest.spyOn(console, "warn").mockImplementation();
|
consoleWarnSpy = jest.spyOn(console, "warn").mockImplementation();
|
||||||
consoleErrorSpy = jest.spyOn(console, "error").mockImplementation();
|
consoleErrorSpy = jest.spyOn(console, "error").mockImplementation();
|
||||||
});
|
});
|
||||||
|
|
||||||
afterEach(() => {
|
afterEach(() => {
|
||||||
consoleSpy.mockRestore();
|
consoleSpy.mockRestore();
|
||||||
consoleWarnSpy.mockRestore();
|
consoleWarnSpy.mockRestore();
|
||||||
consoleErrorSpy.mockRestore();
|
consoleErrorSpy.mockRestore();
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should create default logger instance", () => {
|
test("should create default logger instance", () => {
|
||||||
expect(logger).toBeDefined();
|
expect(logger).toBeDefined();
|
||||||
expect(logger).toBeInstanceOf(Logger);
|
expect(logger).toBeInstanceOf(Logger);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should log info messages", () => {
|
test("should log info messages", () => {
|
||||||
logger.info("Test message");
|
logger.info("Test message");
|
||||||
expect(consoleSpy).toHaveBeenCalled();
|
expect(consoleSpy).toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should create custom logger with disabled levels", () => {
|
test("should create custom logger with disabled levels", () => {
|
||||||
const customLogger = new Logger({ debug: false });
|
const customLogger = new Logger({ debug: false });
|
||||||
customLogger.debug("This should not log");
|
customLogger.debug("This should not log");
|
||||||
expect(consoleSpy).not.toHaveBeenCalled();
|
expect(consoleSpy).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should use emoji prefixes for convenience methods", () => {
|
test("should use emoji prefixes for convenience methods", () => {
|
||||||
logger.step("Test step");
|
logger.step("Test step");
|
||||||
logger.ai("Test AI");
|
logger.ai("Test AI");
|
||||||
logger.location("Test location");
|
logger.location("Test location");
|
||||||
expect(consoleSpy).toHaveBeenCalledTimes(3);
|
expect(consoleSpy).toHaveBeenCalledTimes(3);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should configure levels at runtime", () => {
|
test("should configure levels at runtime", () => {
|
||||||
const customLogger = new Logger();
|
const customLogger = new Logger();
|
||||||
customLogger.setLevel("debug", false);
|
customLogger.setLevel("debug", false);
|
||||||
customLogger.debug("This should not log");
|
customLogger.debug("This should not log");
|
||||||
expect(consoleSpy).not.toHaveBeenCalled();
|
expect(consoleSpy).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should go silent when requested", () => {
|
test("should go silent when requested", () => {
|
||||||
const customLogger = new Logger();
|
const customLogger = new Logger();
|
||||||
customLogger.silent();
|
customLogger.silent();
|
||||||
customLogger.info("This should not log");
|
customLogger.info("This should not log");
|
||||||
customLogger.error("This should not log");
|
customLogger.error("This should not log");
|
||||||
expect(consoleSpy).not.toHaveBeenCalled();
|
expect(consoleSpy).not.toHaveBeenCalled();
|
||||||
expect(consoleErrorSpy).not.toHaveBeenCalled();
|
expect(consoleErrorSpy).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
// Additional test cases for comprehensive coverage
|
// Additional test cases for comprehensive coverage
|
||||||
|
|
||||||
test("should log warning messages", () => {
|
test("should log warning messages", () => {
|
||||||
logger.warning("Test warning");
|
logger.warning("Test warning");
|
||||||
expect(consoleWarnSpy).toHaveBeenCalled();
|
expect(consoleWarnSpy).toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should log error messages", () => {
|
test("should log error messages", () => {
|
||||||
logger.error("Test error");
|
logger.error("Test error");
|
||||||
expect(consoleErrorSpy).toHaveBeenCalled();
|
expect(consoleErrorSpy).toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should log success messages", () => {
|
test("should log success messages", () => {
|
||||||
logger.success("Test success");
|
logger.success("Test success");
|
||||||
expect(consoleSpy).toHaveBeenCalled();
|
expect(consoleSpy).toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should log debug messages", () => {
|
test("should log debug messages", () => {
|
||||||
logger.debug("Test debug");
|
logger.debug("Test debug");
|
||||||
expect(consoleSpy).toHaveBeenCalled();
|
expect(consoleSpy).toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should respect disabled warning level", () => {
|
test("should respect disabled warning level", () => {
|
||||||
const customLogger = new Logger({ warning: false });
|
const customLogger = new Logger({ warning: false });
|
||||||
customLogger.warning("This should not log");
|
customLogger.warning("This should not log");
|
||||||
expect(consoleWarnSpy).not.toHaveBeenCalled();
|
expect(consoleWarnSpy).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should respect disabled error level", () => {
|
test("should respect disabled error level", () => {
|
||||||
const customLogger = new Logger({ error: false });
|
const customLogger = new Logger({ error: false });
|
||||||
customLogger.error("This should not log");
|
customLogger.error("This should not log");
|
||||||
expect(consoleErrorSpy).not.toHaveBeenCalled();
|
expect(consoleErrorSpy).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should respect disabled success level", () => {
|
test("should respect disabled success level", () => {
|
||||||
const customLogger = new Logger({ success: false });
|
const customLogger = new Logger({ success: false });
|
||||||
customLogger.success("This should not log");
|
customLogger.success("This should not log");
|
||||||
expect(consoleSpy).not.toHaveBeenCalled();
|
expect(consoleSpy).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should respect disabled info level", () => {
|
test("should respect disabled info level", () => {
|
||||||
const customLogger = new Logger({ info: false });
|
const customLogger = new Logger({ info: false });
|
||||||
customLogger.info("This should not log");
|
customLogger.info("This should not log");
|
||||||
expect(consoleSpy).not.toHaveBeenCalled();
|
expect(consoleSpy).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should test all convenience methods", () => {
|
test("should test all convenience methods", () => {
|
||||||
logger.step("Test step");
|
logger.step("Test step");
|
||||||
logger.search("Test search");
|
logger.search("Test search");
|
||||||
logger.ai("Test AI");
|
logger.ai("Test AI");
|
||||||
logger.location("Test location");
|
logger.location("Test location");
|
||||||
logger.file("Test file");
|
logger.file("Test file");
|
||||||
expect(consoleSpy).toHaveBeenCalledTimes(5);
|
expect(consoleSpy).toHaveBeenCalledTimes(5);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should enable all levels with verbose method", () => {
|
test("should enable all levels with verbose method", () => {
|
||||||
const customLogger = new Logger({ debug: false, info: false });
|
const customLogger = new Logger({ debug: false, info: false });
|
||||||
customLogger.verbose();
|
customLogger.verbose();
|
||||||
customLogger.debug("This should log");
|
customLogger.debug("This should log");
|
||||||
customLogger.info("This should log");
|
customLogger.info("This should log");
|
||||||
expect(consoleSpy).toHaveBeenCalledTimes(2);
|
expect(consoleSpy).toHaveBeenCalledTimes(2);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should handle setLevel with invalid level gracefully", () => {
|
test("should handle setLevel with invalid level gracefully", () => {
|
||||||
const customLogger = new Logger();
|
const customLogger = new Logger();
|
||||||
expect(() => {
|
expect(() => {
|
||||||
customLogger.setLevel("invalid", false);
|
customLogger.setLevel("invalid", false);
|
||||||
}).not.toThrow();
|
}).not.toThrow();
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should format messages with timestamps", () => {
|
test("should format messages with timestamps", () => {
|
||||||
logger.info("Test message");
|
logger.info("Test message");
|
||||||
const loggedMessage = consoleSpy.mock.calls[0][0];
|
const loggedMessage = consoleSpy.mock.calls[0][0];
|
||||||
expect(loggedMessage).toMatch(/\[\d{1,2}:\d{2}:\d{2}\]/);
|
expect(loggedMessage).toMatch(/\[\d{1,2}:\d{2}:\d{2}\]/);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should include level in formatted messages", () => {
|
test("should include level in formatted messages", () => {
|
||||||
logger.info("Test message");
|
logger.info("Test message");
|
||||||
const loggedMessage = consoleSpy.mock.calls[0][0];
|
const loggedMessage = consoleSpy.mock.calls[0][0];
|
||||||
expect(loggedMessage).toContain("[INFO]");
|
expect(loggedMessage).toContain("[INFO]");
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should disable colors when colors option is false", () => {
|
test("should disable colors when colors option is false", () => {
|
||||||
const customLogger = new Logger({ colors: false });
|
const customLogger = new Logger({ colors: false });
|
||||||
customLogger.info("Test message");
|
customLogger.info("Test message");
|
||||||
const loggedMessage = consoleSpy.mock.calls[0][0];
|
const loggedMessage = consoleSpy.mock.calls[0][0];
|
||||||
// Should not contain ANSI color codes
|
// Should not contain ANSI color codes
|
||||||
expect(loggedMessage).not.toMatch(/\u001b\[/);
|
expect(loggedMessage).not.toMatch(/\u001b\[/);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should enable colors by default", () => {
|
test("should enable colors by default", () => {
|
||||||
logger.info("Test message");
|
logger.info("Test message");
|
||||||
const loggedMessage = consoleSpy.mock.calls[0][0];
|
const loggedMessage = consoleSpy.mock.calls[0][0];
|
||||||
// Should contain ANSI color codes
|
// Should contain ANSI color codes
|
||||||
expect(loggedMessage).toMatch(/\u001b\[/);
|
expect(loggedMessage).toMatch(/\u001b\[/);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should handle multiple level configurations", () => {
|
test("should handle multiple level configurations", () => {
|
||||||
const customLogger = new Logger({
|
const customLogger = new Logger({
|
||||||
debug: false,
|
debug: false,
|
||||||
info: true,
|
info: true,
|
||||||
warning: false,
|
warning: false,
|
||||||
error: true,
|
error: true,
|
||||||
success: false,
|
success: false,
|
||||||
});
|
});
|
||||||
|
|
||||||
customLogger.debug("Should not log");
|
customLogger.debug("Should not log");
|
||||||
customLogger.info("Should log");
|
customLogger.info("Should log");
|
||||||
customLogger.warning("Should not log");
|
customLogger.warning("Should not log");
|
||||||
customLogger.error("Should log");
|
customLogger.error("Should log");
|
||||||
customLogger.success("Should not log");
|
customLogger.success("Should not log");
|
||||||
|
|
||||||
expect(consoleSpy).toHaveBeenCalledTimes(1);
|
expect(consoleSpy).toHaveBeenCalledTimes(1);
|
||||||
expect(consoleErrorSpy).toHaveBeenCalledTimes(1);
|
expect(consoleErrorSpy).toHaveBeenCalledTimes(1);
|
||||||
expect(consoleWarnSpy).not.toHaveBeenCalled();
|
expect(consoleWarnSpy).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should handle empty or undefined messages", () => {
|
test("should handle empty or undefined messages", () => {
|
||||||
expect(() => {
|
expect(() => {
|
||||||
logger.info("");
|
logger.info("");
|
||||||
logger.info(undefined);
|
logger.info(undefined);
|
||||||
logger.info(null);
|
logger.info(null);
|
||||||
}).not.toThrow();
|
}).not.toThrow();
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should handle complex message objects", () => {
|
test("should handle complex message objects", () => {
|
||||||
const testObj = { key: "value", nested: { data: "test" } };
|
const testObj = { key: "value", nested: { data: "test" } };
|
||||||
expect(() => {
|
expect(() => {
|
||||||
logger.info(testObj);
|
logger.info(testObj);
|
||||||
}).not.toThrow();
|
}).not.toThrow();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@ -1,94 +1,94 @@
|
|||||||
/**
|
/**
|
||||||
* Authentication Manager
|
* Authentication Manager
|
||||||
*
|
*
|
||||||
* Handles login/authentication for different sites
|
* Handles login/authentication for different sites
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class AuthManager {
|
class AuthManager {
|
||||||
constructor(coreParser) {
|
constructor(coreParser) {
|
||||||
this.coreParser = coreParser;
|
this.coreParser = coreParser;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Authenticate to a specific site
|
* Authenticate to a specific site
|
||||||
*/
|
*/
|
||||||
async authenticate(site, credentials, pageId = "default") {
|
async authenticate(site, credentials, pageId = "default") {
|
||||||
const strategies = {
|
const strategies = {
|
||||||
linkedin: this.authenticateLinkedIn.bind(this),
|
linkedin: this.authenticateLinkedIn.bind(this),
|
||||||
// Add more auth strategies as needed
|
// Add more auth strategies as needed
|
||||||
};
|
};
|
||||||
|
|
||||||
const strategy = strategies[site.toLowerCase()];
|
const strategy = strategies[site.toLowerCase()];
|
||||||
if (!strategy) {
|
if (!strategy) {
|
||||||
throw new Error(`No authentication strategy found for site: ${site}`);
|
throw new Error(`No authentication strategy found for site: ${site}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
return await strategy(credentials, pageId);
|
return await strategy(credentials, pageId);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* LinkedIn authentication strategy
|
* LinkedIn authentication strategy
|
||||||
*/
|
*/
|
||||||
async authenticateLinkedIn(credentials, pageId = "default") {
|
async authenticateLinkedIn(credentials, pageId = "default") {
|
||||||
const { username, password } = credentials;
|
const { username, password } = credentials;
|
||||||
if (!username || !password) {
|
if (!username || !password) {
|
||||||
throw new Error("LinkedIn authentication requires username and password");
|
throw new Error("LinkedIn authentication requires username and password");
|
||||||
}
|
}
|
||||||
|
|
||||||
const page = this.coreParser.getPage(pageId);
|
const page = this.coreParser.getPage(pageId);
|
||||||
if (!page) {
|
if (!page) {
|
||||||
throw new Error(`Page with ID '${pageId}' not found`);
|
throw new Error(`Page with ID '${pageId}' not found`);
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Navigate to LinkedIn login
|
// Navigate to LinkedIn login
|
||||||
await this.coreParser.navigateTo("https://www.linkedin.com/login", {
|
await this.coreParser.navigateTo("https://www.linkedin.com/login", {
|
||||||
pageId,
|
pageId,
|
||||||
});
|
});
|
||||||
|
|
||||||
// Fill credentials
|
// Fill credentials
|
||||||
await page.fill('input[name="session_key"]', username);
|
await page.fill('input[name="session_key"]', username);
|
||||||
await page.fill('input[name="session_password"]', password);
|
await page.fill('input[name="session_password"]', password);
|
||||||
|
|
||||||
// Submit form
|
// Submit form
|
||||||
await page.click('button[type="submit"]');
|
await page.click('button[type="submit"]');
|
||||||
|
|
||||||
// Wait for successful login (profile image appears)
|
// Wait for successful login (profile image appears)
|
||||||
await page.waitForSelector("img.global-nav__me-photo", {
|
await page.waitForSelector("img.global-nav__me-photo", {
|
||||||
timeout: 15000,
|
timeout: 15000,
|
||||||
});
|
});
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
throw new Error(`LinkedIn authentication failed: ${error.message}`);
|
throw new Error(`LinkedIn authentication failed: ${error.message}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if currently authenticated to a site
|
* Check if currently authenticated to a site
|
||||||
*/
|
*/
|
||||||
async isAuthenticated(site, pageId = "default") {
|
async isAuthenticated(site, pageId = "default") {
|
||||||
const page = this.coreParser.getPage(pageId);
|
const page = this.coreParser.getPage(pageId);
|
||||||
if (!page) {
|
if (!page) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const checkers = {
|
const checkers = {
|
||||||
linkedin: async () => {
|
linkedin: async () => {
|
||||||
try {
|
try {
|
||||||
await page.waitForSelector("img.global-nav__me-photo", {
|
await page.waitForSelector("img.global-nav__me-photo", {
|
||||||
timeout: 2000,
|
timeout: 2000,
|
||||||
});
|
});
|
||||||
return true;
|
return true;
|
||||||
} catch {
|
} catch {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
const checker = checkers[site.toLowerCase()];
|
const checker = checkers[site.toLowerCase()];
|
||||||
return checker ? await checker() : false;
|
return checker ? await checker() : false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = AuthManager;
|
module.exports = AuthManager;
|
||||||
|
|||||||
63
core-parser/index.js
Normal file
63
core-parser/index.js
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
const playwright = require('playwright');
|
||||||
|
const AuthManager = require('./auth-manager');
|
||||||
|
const NavigationManager = require('./navigation');
|
||||||
|
|
||||||
|
class CoreParser {
|
||||||
|
constructor(config = {}) {
|
||||||
|
this.config = {
|
||||||
|
headless: true,
|
||||||
|
timeout: 60000, // Increased default timeout
|
||||||
|
...config
|
||||||
|
};
|
||||||
|
this.browser = null;
|
||||||
|
this.context = null;
|
||||||
|
this.pages = {};
|
||||||
|
this.authManager = new AuthManager(this);
|
||||||
|
this.navigationManager = new NavigationManager(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
async init() {
|
||||||
|
this.browser = await playwright.chromium.launch({
|
||||||
|
headless: this.config.headless
|
||||||
|
});
|
||||||
|
this.context = await this.browser.newContext();
|
||||||
|
}
|
||||||
|
|
||||||
|
async createPage(id) {
|
||||||
|
if (!this.browser) await this.init();
|
||||||
|
const page = await this.context.newPage();
|
||||||
|
this.pages[id] = page;
|
||||||
|
return page;
|
||||||
|
}
|
||||||
|
|
||||||
|
getPage(id) {
|
||||||
|
return this.pages[id];
|
||||||
|
}
|
||||||
|
|
||||||
|
async authenticate(site, credentials, pageId) {
|
||||||
|
return this.authManager.authenticate(site, credentials, pageId);
|
||||||
|
}
|
||||||
|
|
||||||
|
async navigateTo(url, options = {}) {
|
||||||
|
const {
|
||||||
|
pageId = "default",
|
||||||
|
waitUntil = "networkidle", // Changed default to networkidle
|
||||||
|
retries = 1,
|
||||||
|
retryDelay = 2000,
|
||||||
|
timeout = this.config.timeout,
|
||||||
|
} = options;
|
||||||
|
|
||||||
|
return this.navigationManager.navigateTo(url, options);
|
||||||
|
}
|
||||||
|
|
||||||
|
async cleanup() {
|
||||||
|
if (this.browser) {
|
||||||
|
await this.browser.close();
|
||||||
|
this.browser = null;
|
||||||
|
this.context = null;
|
||||||
|
this.pages = {};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = CoreParser;
|
||||||
@ -1,131 +1,131 @@
|
|||||||
/**
|
/**
|
||||||
* Navigation Manager
|
* Navigation Manager
|
||||||
*
|
*
|
||||||
* Handles page navigation with error handling, retries, and logging
|
* Handles page navigation with error handling, retries, and logging
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class NavigationManager {
|
class NavigationManager {
|
||||||
constructor(coreParser) {
|
constructor(coreParser) {
|
||||||
this.coreParser = coreParser;
|
this.coreParser = coreParser;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Navigate to URL with comprehensive error handling
|
* Navigate to URL with comprehensive error handling
|
||||||
*/
|
*/
|
||||||
async navigateTo(url, options = {}) {
|
async navigateTo(url, options = {}) {
|
||||||
const {
|
const {
|
||||||
pageId = "default",
|
pageId = "default",
|
||||||
waitUntil = "domcontentloaded",
|
waitUntil = "domcontentloaded",
|
||||||
retries = 1,
|
retries = 1,
|
||||||
retryDelay = 2000,
|
retryDelay = 2000,
|
||||||
timeout = this.coreParser.config.timeout,
|
timeout = this.coreParser.config.timeout,
|
||||||
} = options;
|
} = options;
|
||||||
|
|
||||||
const page = this.coreParser.getPage(pageId);
|
const page = this.coreParser.getPage(pageId);
|
||||||
if (!page) {
|
if (!page) {
|
||||||
throw new Error(`Page with ID '${pageId}' not found`);
|
throw new Error(`Page with ID '${pageId}' not found`);
|
||||||
}
|
}
|
||||||
|
|
||||||
let lastError;
|
let lastError;
|
||||||
|
|
||||||
for (let attempt = 0; attempt <= retries; attempt++) {
|
for (let attempt = 0; attempt <= retries; attempt++) {
|
||||||
try {
|
try {
|
||||||
console.log(
|
console.log(
|
||||||
`🌐 Navigating to: ${url} (attempt ${attempt + 1}/${retries + 1})`
|
`🌐 Navigating to: ${url} (attempt ${attempt + 1}/${retries + 1})`
|
||||||
);
|
);
|
||||||
|
|
||||||
await page.goto(url, {
|
await page.goto(url, {
|
||||||
waitUntil,
|
waitUntil,
|
||||||
timeout,
|
timeout,
|
||||||
});
|
});
|
||||||
|
|
||||||
console.log(`✅ Navigation successful: ${url}`);
|
console.log(`✅ Navigation successful: ${url}`);
|
||||||
return true;
|
return true;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
lastError = error;
|
lastError = error;
|
||||||
console.warn(
|
console.warn(
|
||||||
`⚠️ Navigation attempt ${attempt + 1} failed: ${error.message}`
|
`⚠️ Navigation attempt ${attempt + 1} failed: ${error.message}`
|
||||||
);
|
);
|
||||||
|
|
||||||
if (attempt < retries) {
|
if (attempt < retries) {
|
||||||
console.log(`🔄 Retrying in ${retryDelay}ms...`);
|
console.log(`🔄 Retrying in ${retryDelay}ms...`);
|
||||||
await this.delay(retryDelay);
|
await this.delay(retryDelay);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// All attempts failed
|
// All attempts failed
|
||||||
const errorMessage = `Navigation failed after ${retries + 1} attempts: ${
|
const errorMessage = `Navigation failed after ${retries + 1} attempts: ${
|
||||||
lastError.message
|
lastError.message
|
||||||
}`;
|
}`;
|
||||||
console.error(`❌ ${errorMessage}`);
|
console.error(`❌ ${errorMessage}`);
|
||||||
throw new Error(errorMessage);
|
throw new Error(errorMessage);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Navigate and wait for specific selector
|
* Navigate and wait for specific selector
|
||||||
*/
|
*/
|
||||||
async navigateAndWaitFor(url, selector, options = {}) {
|
async navigateAndWaitFor(url, selector, options = {}) {
|
||||||
await this.navigateTo(url, options);
|
await this.navigateTo(url, options);
|
||||||
|
|
||||||
const { pageId = "default", timeout = this.coreParser.config.timeout } =
|
const { pageId = "default", timeout = this.coreParser.config.timeout } =
|
||||||
options;
|
options;
|
||||||
const page = this.coreParser.getPage(pageId);
|
const page = this.coreParser.getPage(pageId);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await page.waitForSelector(selector, { timeout });
|
await page.waitForSelector(selector, { timeout });
|
||||||
console.log(`✅ Selector found: ${selector}`);
|
console.log(`✅ Selector found: ${selector}`);
|
||||||
return true;
|
return true;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.warn(`⚠️ Selector not found: ${selector} - ${error.message}`);
|
console.warn(`⚠️ Selector not found: ${selector} - ${error.message}`);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if current page has specific content
|
* Check if current page has specific content
|
||||||
*/
|
*/
|
||||||
async hasContent(content, options = {}) {
|
async hasContent(content, options = {}) {
|
||||||
const { pageId = "default", timeout = 5000 } = options;
|
const { pageId = "default", timeout = 5000 } = options;
|
||||||
const page = this.coreParser.getPage(pageId);
|
const page = this.coreParser.getPage(pageId);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await page.waitForFunction(
|
await page.waitForFunction(
|
||||||
(text) => document.body.innerText.includes(text),
|
(text) => document.body.innerText.includes(text),
|
||||||
content,
|
content,
|
||||||
{ timeout }
|
{ timeout }
|
||||||
);
|
);
|
||||||
return true;
|
return true;
|
||||||
} catch {
|
} catch {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility delay function
|
* Utility delay function
|
||||||
*/
|
*/
|
||||||
async delay(ms) {
|
async delay(ms) {
|
||||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get current page URL
|
* Get current page URL
|
||||||
*/
|
*/
|
||||||
getCurrentUrl(pageId = "default") {
|
getCurrentUrl(pageId = "default") {
|
||||||
const page = this.coreParser.getPage(pageId);
|
const page = this.coreParser.getPage(pageId);
|
||||||
return page ? page.url() : null;
|
return page ? page.url() : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Take screenshot for debugging
|
* Take screenshot for debugging
|
||||||
*/
|
*/
|
||||||
async screenshot(filepath, pageId = "default") {
|
async screenshot(filepath, pageId = "default") {
|
||||||
const page = this.coreParser.getPage(pageId);
|
const page = this.coreParser.getPage(pageId);
|
||||||
if (page) {
|
if (page) {
|
||||||
await page.screenshot({ path: filepath });
|
await page.screenshot({ path: filepath });
|
||||||
console.log(`📸 Screenshot saved: ${filepath}`);
|
console.log(`📸 Screenshot saved: ${filepath}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = NavigationManager;
|
module.exports = NavigationManager;
|
||||||
|
|||||||
@ -1,27 +1,7 @@
|
|||||||
{
|
{
|
||||||
"name": "core-parser",
|
"name": "core-parser",
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"description": "Core browser automation and parsing engine for all parsers",
|
"main": "index.js",
|
||||||
"main": "index.js",
|
"description": "Core parser utilities for browser management",
|
||||||
"scripts": {
|
"dependencies": {}
|
||||||
"test": "jest",
|
}
|
||||||
"install:browsers": "npx playwright install chromium"
|
|
||||||
},
|
|
||||||
"keywords": [
|
|
||||||
"parser",
|
|
||||||
"playwright",
|
|
||||||
"browser",
|
|
||||||
"automation",
|
|
||||||
"core"
|
|
||||||
],
|
|
||||||
"author": "Job Market Intelligence Team",
|
|
||||||
"license": "ISC",
|
|
||||||
"type": "commonjs",
|
|
||||||
"dependencies": {
|
|
||||||
"playwright": "^1.53.2",
|
|
||||||
"dotenv": "^17.0.0"
|
|
||||||
},
|
|
||||||
"devDependencies": {
|
|
||||||
"jest": "^29.7.0"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@ -1,497 +1,497 @@
|
|||||||
# Job Search Parser - Job Market Intelligence
|
# Job Search Parser - Job Market Intelligence
|
||||||
|
|
||||||
Specialized parser for job market intelligence, tracking job postings, market trends, and competitive analysis. Focuses on tech roles and industry insights.
|
Specialized parser for job market intelligence, tracking job postings, market trends, and competitive analysis. Focuses on tech roles and industry insights.
|
||||||
|
|
||||||
## 🎯 Purpose
|
## 🎯 Purpose
|
||||||
|
|
||||||
The Job Search Parser is designed to:
|
The Job Search Parser is designed to:
|
||||||
|
|
||||||
- **Track Job Market Trends**: Monitor demand for specific roles and skills
|
- **Track Job Market Trends**: Monitor demand for specific roles and skills
|
||||||
- **Competitive Intelligence**: Analyze salary ranges and requirements
|
- **Competitive Intelligence**: Analyze salary ranges and requirements
|
||||||
- **Industry Insights**: Track hiring patterns across different sectors
|
- **Industry Insights**: Track hiring patterns across different sectors
|
||||||
- **Skill Gap Analysis**: Identify in-demand technologies and frameworks
|
- **Skill Gap Analysis**: Identify in-demand technologies and frameworks
|
||||||
- **Market Demand Forecasting**: Predict job market trends
|
- **Market Demand Forecasting**: Predict job market trends
|
||||||
|
|
||||||
## 🚀 Features
|
## 🚀 Features
|
||||||
|
|
||||||
### Core Functionality
|
### Core Functionality
|
||||||
|
|
||||||
- **Multi-Source Aggregation**: Collect job data from multiple platforms
|
- **Multi-Source Aggregation**: Collect job data from multiple platforms
|
||||||
- **Role-Specific Tracking**: Focus on tech roles and emerging positions
|
- **Role-Specific Tracking**: Focus on tech roles and emerging positions
|
||||||
- **Skill Analysis**: Extract and categorize required skills
|
- **Skill Analysis**: Extract and categorize required skills
|
||||||
- **Salary Intelligence**: Track compensation ranges and trends
|
- **Salary Intelligence**: Track compensation ranges and trends
|
||||||
- **Company Intelligence**: Monitor hiring companies and patterns
|
- **Company Intelligence**: Monitor hiring companies and patterns
|
||||||
|
|
||||||
### Advanced Features
|
### Advanced Features
|
||||||
|
|
||||||
- **Market Trend Analysis**: Identify growing and declining job categories
|
- **Market Trend Analysis**: Identify growing and declining job categories
|
||||||
- **Geographic Distribution**: Track job distribution by location
|
- **Geographic Distribution**: Track job distribution by location
|
||||||
- **Experience Level Analysis**: Entry, mid, senior level tracking
|
- **Experience Level Analysis**: Entry, mid, senior level tracking
|
||||||
- **Remote Work Trends**: Monitor remote/hybrid work patterns
|
- **Remote Work Trends**: Monitor remote/hybrid work patterns
|
||||||
- **Technology Stack Tracking**: Framework and tool popularity
|
- **Technology Stack Tracking**: Framework and tool popularity
|
||||||
|
|
||||||
## 🌐 Supported Job Sites
|
## 🌐 Supported Job Sites
|
||||||
|
|
||||||
### ✅ Implemented Parsers
|
### ✅ Implemented Parsers
|
||||||
|
|
||||||
#### SkipTheDrive Parser
|
#### SkipTheDrive Parser
|
||||||
|
|
||||||
Remote job board specializing in work-from-home positions.
|
Remote job board specializing in work-from-home positions.
|
||||||
|
|
||||||
**Features:**
|
**Features:**
|
||||||
|
|
||||||
- Keyword-based job search with relevance sorting
|
- Keyword-based job search with relevance sorting
|
||||||
- Job type filtering (full-time, part-time, contract)
|
- Job type filtering (full-time, part-time, contract)
|
||||||
- Multi-page result parsing with pagination
|
- Multi-page result parsing with pagination
|
||||||
- Featured/sponsored job identification
|
- Featured/sponsored job identification
|
||||||
- AI-powered job relevance analysis
|
- AI-powered job relevance analysis
|
||||||
- Automatic duplicate detection
|
- Automatic duplicate detection
|
||||||
|
|
||||||
**Usage:**
|
**Usage:**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Parse SkipTheDrive for QA automation jobs
|
# Parse SkipTheDrive for QA automation jobs
|
||||||
node index.js --sites=skipthedrive --keywords="automation qa,qa engineer"
|
node index.js --sites=skipthedrive --keywords="automation qa,qa engineer"
|
||||||
|
|
||||||
# Filter by job type
|
# Filter by job type
|
||||||
JOB_TYPES="full time,contract" node index.js --sites=skipthedrive
|
JOB_TYPES="full time,contract" node index.js --sites=skipthedrive
|
||||||
|
|
||||||
# Run demo with limited results
|
# Run demo with limited results
|
||||||
node index.js --sites=skipthedrive --demo
|
node index.js --sites=skipthedrive --demo
|
||||||
```
|
```
|
||||||
|
|
||||||
### 🚧 Planned Parsers
|
### 🚧 Planned Parsers
|
||||||
|
|
||||||
- **Indeed**: Comprehensive job aggregator
|
- **Indeed**: Comprehensive job aggregator
|
||||||
- **Glassdoor**: Jobs with company reviews and salary data
|
- **Glassdoor**: Jobs with company reviews and salary data
|
||||||
- **Monster**: Traditional job board
|
- **Monster**: Traditional job board
|
||||||
- **SimplyHired**: Job aggregator with salary estimates
|
- **SimplyHired**: Job aggregator with salary estimates
|
||||||
- **LinkedIn Jobs**: Professional network job postings
|
- **LinkedIn Jobs**: Professional network job postings
|
||||||
- **AngelList**: Startup and tech jobs
|
- **AngelList**: Startup and tech jobs
|
||||||
- **Remote.co**: Dedicated remote work jobs
|
- **Remote.co**: Dedicated remote work jobs
|
||||||
- **FlexJobs**: Flexible and remote positions
|
- **FlexJobs**: Flexible and remote positions
|
||||||
|
|
||||||
## 📦 Installation
|
## 📦 Installation
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Install dependencies
|
# Install dependencies
|
||||||
npm install
|
npm install
|
||||||
|
|
||||||
# Run tests
|
# Run tests
|
||||||
npm test
|
npm test
|
||||||
|
|
||||||
# Run demo
|
# Run demo
|
||||||
node demo.js
|
node demo.js
|
||||||
```
|
```
|
||||||
|
|
||||||
## 🔧 Configuration
|
## 🔧 Configuration
|
||||||
|
|
||||||
### Environment Variables
|
### Environment Variables
|
||||||
|
|
||||||
Create a `.env` file in the parser directory:
|
Create a `.env` file in the parser directory:
|
||||||
|
|
||||||
```env
|
```env
|
||||||
# Job Search Configuration
|
# Job Search Configuration
|
||||||
SEARCH_SOURCES=linkedin,indeed,glassdoor
|
SEARCH_SOURCES=linkedin,indeed,glassdoor
|
||||||
TARGET_ROLES=software engineer,data scientist,product manager
|
TARGET_ROLES=software engineer,data scientist,product manager
|
||||||
LOCATION_FILTER=Toronto,Vancouver,Calgary
|
LOCATION_FILTER=Toronto,Vancouver,Calgary
|
||||||
EXPERIENCE_LEVELS=entry,mid,senior
|
EXPERIENCE_LEVELS=entry,mid,senior
|
||||||
REMOTE_PREFERENCE=remote,hybrid,onsite
|
REMOTE_PREFERENCE=remote,hybrid,onsite
|
||||||
|
|
||||||
# Analysis Configuration
|
# Analysis Configuration
|
||||||
ENABLE_SALARY_ANALYSIS=true
|
ENABLE_SALARY_ANALYSIS=true
|
||||||
ENABLE_SKILL_ANALYSIS=true
|
ENABLE_SKILL_ANALYSIS=true
|
||||||
ENABLE_TREND_ANALYSIS=true
|
ENABLE_TREND_ANALYSIS=true
|
||||||
MIN_SALARY=50000
|
MIN_SALARY=50000
|
||||||
MAX_SALARY=200000
|
MAX_SALARY=200000
|
||||||
|
|
||||||
# Output Configuration
|
# Output Configuration
|
||||||
OUTPUT_FORMAT=json,csv
|
OUTPUT_FORMAT=json,csv
|
||||||
SAVE_RAW_DATA=true
|
SAVE_RAW_DATA=true
|
||||||
ANALYSIS_INTERVAL=daily
|
ANALYSIS_INTERVAL=daily
|
||||||
```
|
```
|
||||||
|
|
||||||
### Command Line Options
|
### Command Line Options
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Basic usage
|
# Basic usage
|
||||||
node index.js
|
node index.js
|
||||||
|
|
||||||
# Specific roles
|
# Specific roles
|
||||||
node index.js --roles="frontend developer,backend developer"
|
node index.js --roles="frontend developer,backend developer"
|
||||||
|
|
||||||
# Geographic focus
|
# Geographic focus
|
||||||
node index.js --locations="Toronto,Vancouver"
|
node index.js --locations="Toronto,Vancouver"
|
||||||
|
|
||||||
# Experience level
|
# Experience level
|
||||||
node index.js --experience="senior"
|
node index.js --experience="senior"
|
||||||
|
|
||||||
# Output format
|
# Output format
|
||||||
node index.js --output=results/job-market-analysis.json
|
node index.js --output=results/job-market-analysis.json
|
||||||
```
|
```
|
||||||
|
|
||||||
**Available Options:**
|
**Available Options:**
|
||||||
|
|
||||||
- `--roles="role1,role2"`: Target job roles
|
- `--roles="role1,role2"`: Target job roles
|
||||||
- `--locations="city1,city2"`: Geographic focus
|
- `--locations="city1,city2"`: Geographic focus
|
||||||
- `--experience="entry|mid|senior"`: Experience level
|
- `--experience="entry|mid|senior"`: Experience level
|
||||||
- `--remote="remote|hybrid|onsite"`: Remote work preference
|
- `--remote="remote|hybrid|onsite"`: Remote work preference
|
||||||
- `--salary-min=NUMBER`: Minimum salary filter
|
- `--salary-min=NUMBER`: Minimum salary filter
|
||||||
- `--salary-max=NUMBER`: Maximum salary filter
|
- `--salary-max=NUMBER`: Maximum salary filter
|
||||||
- `--output=FILE`: Output filename
|
- `--output=FILE`: Output filename
|
||||||
- `--format=json|csv`: Output format
|
- `--format=json|csv`: Output format
|
||||||
- `--trends`: Enable trend analysis
|
- `--trends`: Enable trend analysis
|
||||||
- `--skills`: Enable skill analysis
|
- `--skills`: Enable skill analysis
|
||||||
|
|
||||||
## 📊 Keywords
|
## 📊 Keywords
|
||||||
|
|
||||||
### Role-Specific Keywords
|
### Role-Specific Keywords
|
||||||
|
|
||||||
Place keyword CSV files in the `keywords/` directory:
|
Place keyword CSV files in the `keywords/` directory:
|
||||||
|
|
||||||
```
|
```
|
||||||
job-search-parser/
|
job-search-parser/
|
||||||
├── keywords/
|
├── keywords/
|
||||||
│ ├── job-search-keywords.csv # General job search terms
|
│ ├── job-search-keywords.csv # General job search terms
|
||||||
│ ├── tech-roles.csv # Technology roles
|
│ ├── tech-roles.csv # Technology roles
|
||||||
│ ├── data-roles.csv # Data science roles
|
│ ├── data-roles.csv # Data science roles
|
||||||
│ ├── management-roles.csv # Management positions
|
│ ├── management-roles.csv # Management positions
|
||||||
│ └── emerging-roles.csv # Emerging job categories
|
│ └── emerging-roles.csv # Emerging job categories
|
||||||
└── index.js
|
└── index.js
|
||||||
```
|
```
|
||||||
|
|
||||||
### Tech Roles Keywords
|
### Tech Roles Keywords
|
||||||
|
|
||||||
```csv
|
```csv
|
||||||
keyword
|
keyword
|
||||||
software engineer
|
software engineer
|
||||||
frontend developer
|
frontend developer
|
||||||
backend developer
|
backend developer
|
||||||
full stack developer
|
full stack developer
|
||||||
data scientist
|
data scientist
|
||||||
machine learning engineer
|
machine learning engineer
|
||||||
devops engineer
|
devops engineer
|
||||||
site reliability engineer
|
site reliability engineer
|
||||||
cloud architect
|
cloud architect
|
||||||
security engineer
|
security engineer
|
||||||
mobile developer
|
mobile developer
|
||||||
iOS developer
|
iOS developer
|
||||||
Android developer
|
Android developer
|
||||||
react developer
|
react developer
|
||||||
vue developer
|
vue developer
|
||||||
angular developer
|
angular developer
|
||||||
node.js developer
|
node.js developer
|
||||||
python developer
|
python developer
|
||||||
java developer
|
java developer
|
||||||
golang developer
|
golang developer
|
||||||
rust developer
|
rust developer
|
||||||
data engineer
|
data engineer
|
||||||
analytics engineer
|
analytics engineer
|
||||||
```
|
```
|
||||||
|
|
||||||
### Data Science Keywords
|
### Data Science Keywords
|
||||||
|
|
||||||
```csv
|
```csv
|
||||||
keyword
|
keyword
|
||||||
data scientist
|
data scientist
|
||||||
machine learning engineer
|
machine learning engineer
|
||||||
data analyst
|
data analyst
|
||||||
business analyst
|
business analyst
|
||||||
data engineer
|
data engineer
|
||||||
analytics engineer
|
analytics engineer
|
||||||
ML engineer
|
ML engineer
|
||||||
AI engineer
|
AI engineer
|
||||||
statistician
|
statistician
|
||||||
quantitative analyst
|
quantitative analyst
|
||||||
research scientist
|
research scientist
|
||||||
data architect
|
data architect
|
||||||
BI developer
|
BI developer
|
||||||
ETL developer
|
ETL developer
|
||||||
```
|
```
|
||||||
|
|
||||||
## 📈 Usage Examples
|
## 📈 Usage Examples
|
||||||
|
|
||||||
### Basic Job Search
|
### Basic Job Search
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Standard job market analysis
|
# Standard job market analysis
|
||||||
node index.js
|
node index.js
|
||||||
|
|
||||||
# Specific tech roles
|
# Specific tech roles
|
||||||
node index.js --roles="software engineer,data scientist"
|
node index.js --roles="software engineer,data scientist"
|
||||||
|
|
||||||
# Geographic focus
|
# Geographic focus
|
||||||
node index.js --locations="Toronto,Vancouver,Calgary"
|
node index.js --locations="Toronto,Vancouver,Calgary"
|
||||||
```
|
```
|
||||||
|
|
||||||
### Advanced Analysis
|
### Advanced Analysis
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Senior level positions
|
# Senior level positions
|
||||||
node index.js --experience="senior" --salary-min=100000
|
node index.js --experience="senior" --salary-min=100000
|
||||||
|
|
||||||
# Remote work opportunities
|
# Remote work opportunities
|
||||||
node index.js --remote="remote" --roles="frontend developer"
|
node index.js --remote="remote" --roles="frontend developer"
|
||||||
|
|
||||||
# Trend analysis
|
# Trend analysis
|
||||||
node index.js --trends --skills --output=results/trends.json
|
node index.js --trends --skills --output=results/trends.json
|
||||||
```
|
```
|
||||||
|
|
||||||
### Market Intelligence
|
### Market Intelligence
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Salary analysis
|
# Salary analysis
|
||||||
node index.js --salary-min=80000 --salary-max=150000
|
node index.js --salary-min=80000 --salary-max=150000
|
||||||
|
|
||||||
# Skill gap analysis
|
# Skill gap analysis
|
||||||
node index.js --skills --roles="machine learning engineer"
|
node index.js --skills --roles="machine learning engineer"
|
||||||
|
|
||||||
# Competitive intelligence
|
# Competitive intelligence
|
||||||
node index.js --companies="Google,Microsoft,Amazon"
|
node index.js --companies="Google,Microsoft,Amazon"
|
||||||
```
|
```
|
||||||
|
|
||||||
## 📊 Output Format
|
## 📊 Output Format
|
||||||
|
|
||||||
### JSON Structure
|
### JSON Structure
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"timestamp": "2024-01-15T10:30:00Z",
|
"timestamp": "2024-01-15T10:30:00Z",
|
||||||
"search_parameters": {
|
"search_parameters": {
|
||||||
"roles": ["software engineer", "data scientist"],
|
"roles": ["software engineer", "data scientist"],
|
||||||
"locations": ["Toronto", "Vancouver"],
|
"locations": ["Toronto", "Vancouver"],
|
||||||
"experience_levels": ["mid", "senior"],
|
"experience_levels": ["mid", "senior"],
|
||||||
"remote_preference": ["remote", "hybrid"]
|
"remote_preference": ["remote", "hybrid"]
|
||||||
},
|
},
|
||||||
"total_jobs_found": 1250,
|
"total_jobs_found": 1250,
|
||||||
"analysis_duration_seconds": 45
|
"analysis_duration_seconds": 45
|
||||||
},
|
},
|
||||||
"market_overview": {
|
"market_overview": {
|
||||||
"total_jobs": 1250,
|
"total_jobs": 1250,
|
||||||
"average_salary": 95000,
|
"average_salary": 95000,
|
||||||
"salary_range": {
|
"salary_range": {
|
||||||
"min": 65000,
|
"min": 65000,
|
||||||
"max": 180000,
|
"max": 180000,
|
||||||
"median": 92000
|
"median": 92000
|
||||||
},
|
},
|
||||||
"remote_distribution": {
|
"remote_distribution": {
|
||||||
"remote": 45,
|
"remote": 45,
|
||||||
"hybrid": 35,
|
"hybrid": 35,
|
||||||
"onsite": 20
|
"onsite": 20
|
||||||
},
|
},
|
||||||
"experience_distribution": {
|
"experience_distribution": {
|
||||||
"entry": 15,
|
"entry": 15,
|
||||||
"mid": 45,
|
"mid": 45,
|
||||||
"senior": 40
|
"senior": 40
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"trends": {
|
"trends": {
|
||||||
"growing_skills": [
|
"growing_skills": [
|
||||||
{ "skill": "React", "growth_rate": 25 },
|
{ "skill": "React", "growth_rate": 25 },
|
||||||
{ "skill": "Python", "growth_rate": 18 },
|
{ "skill": "Python", "growth_rate": 18 },
|
||||||
{ "skill": "AWS", "growth_rate": 22 }
|
{ "skill": "AWS", "growth_rate": 22 }
|
||||||
],
|
],
|
||||||
"declining_skills": [
|
"declining_skills": [
|
||||||
{ "skill": "jQuery", "growth_rate": -12 },
|
{ "skill": "jQuery", "growth_rate": -12 },
|
||||||
{ "skill": "PHP", "growth_rate": -8 }
|
{ "skill": "PHP", "growth_rate": -8 }
|
||||||
],
|
],
|
||||||
"emerging_roles": ["AI Engineer", "DevSecOps Engineer", "Data Engineer"]
|
"emerging_roles": ["AI Engineer", "DevSecOps Engineer", "Data Engineer"]
|
||||||
},
|
},
|
||||||
"jobs": [
|
"jobs": [
|
||||||
{
|
{
|
||||||
"id": "job_1",
|
"id": "job_1",
|
||||||
"title": "Senior Software Engineer",
|
"title": "Senior Software Engineer",
|
||||||
"company": "TechCorp",
|
"company": "TechCorp",
|
||||||
"location": "Toronto, Ontario",
|
"location": "Toronto, Ontario",
|
||||||
"remote_type": "hybrid",
|
"remote_type": "hybrid",
|
||||||
"salary": {
|
"salary": {
|
||||||
"min": 100000,
|
"min": 100000,
|
||||||
"max": 140000,
|
"max": 140000,
|
||||||
"currency": "CAD"
|
"currency": "CAD"
|
||||||
},
|
},
|
||||||
"required_skills": ["React", "Node.js", "TypeScript", "AWS"],
|
"required_skills": ["React", "Node.js", "TypeScript", "AWS"],
|
||||||
"preferred_skills": ["GraphQL", "Docker", "Kubernetes"],
|
"preferred_skills": ["GraphQL", "Docker", "Kubernetes"],
|
||||||
"experience_level": "senior",
|
"experience_level": "senior",
|
||||||
"job_url": "https://example.com/job/1",
|
"job_url": "https://example.com/job/1",
|
||||||
"posted_date": "2024-01-10T09:00:00Z",
|
"posted_date": "2024-01-10T09:00:00Z",
|
||||||
"scraped_at": "2024-01-15T10:30:00Z"
|
"scraped_at": "2024-01-15T10:30:00Z"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"analysis": {
|
"analysis": {
|
||||||
"skill_demand": {
|
"skill_demand": {
|
||||||
"React": { "count": 45, "avg_salary": 98000 },
|
"React": { "count": 45, "avg_salary": 98000 },
|
||||||
"Python": { "count": 38, "avg_salary": 102000 },
|
"Python": { "count": 38, "avg_salary": 102000 },
|
||||||
"AWS": { "count": 32, "avg_salary": 105000 }
|
"AWS": { "count": 32, "avg_salary": 105000 }
|
||||||
},
|
},
|
||||||
"company_insights": {
|
"company_insights": {
|
||||||
"top_hirers": [
|
"top_hirers": [
|
||||||
{ "company": "TechCorp", "jobs": 25 },
|
{ "company": "TechCorp", "jobs": 25 },
|
||||||
{ "company": "StartupXYZ", "jobs": 18 }
|
{ "company": "StartupXYZ", "jobs": 18 }
|
||||||
],
|
],
|
||||||
"salary_leaders": [
|
"salary_leaders": [
|
||||||
{ "company": "BigTech", "avg_salary": 120000 },
|
{ "company": "BigTech", "avg_salary": 120000 },
|
||||||
{ "company": "FinTech", "avg_salary": 115000 }
|
{ "company": "FinTech", "avg_salary": 115000 }
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
### CSV Output
|
### CSV Output
|
||||||
|
|
||||||
The parser can also generate CSV files for easy analysis:
|
The parser can also generate CSV files for easy analysis:
|
||||||
|
|
||||||
```csv
|
```csv
|
||||||
job_id,title,company,location,remote_type,salary_min,salary_max,required_skills,experience_level,posted_date
|
job_id,title,company,location,remote_type,salary_min,salary_max,required_skills,experience_level,posted_date
|
||||||
job_1,Senior Software Engineer,TechCorp,Toronto,hybrid,100000,140000,"React,Node.js,TypeScript",senior,2024-01-10
|
job_1,Senior Software Engineer,TechCorp,Toronto,hybrid,100000,140000,"React,Node.js,TypeScript",senior,2024-01-10
|
||||||
job_2,Data Scientist,DataCorp,Vancouver,remote,90000,130000,"Python,SQL,ML",mid,2024-01-09
|
job_2,Data Scientist,DataCorp,Vancouver,remote,90000,130000,"Python,SQL,ML",mid,2024-01-09
|
||||||
```
|
```
|
||||||
|
|
||||||
## 🔒 Security & Best Practices
|
## 🔒 Security & Best Practices
|
||||||
|
|
||||||
### Data Privacy
|
### Data Privacy
|
||||||
|
|
||||||
- Respect job site terms of service
|
- Respect job site terms of service
|
||||||
- Implement appropriate rate limiting
|
- Implement appropriate rate limiting
|
||||||
- Store data securely and responsibly
|
- Store data securely and responsibly
|
||||||
- Anonymize sensitive information
|
- Anonymize sensitive information
|
||||||
|
|
||||||
### Rate Limiting
|
### Rate Limiting
|
||||||
|
|
||||||
- Implement delays between requests
|
- Implement delays between requests
|
||||||
- Respect API rate limits
|
- Respect API rate limits
|
||||||
- Use multiple data sources
|
- Use multiple data sources
|
||||||
- Monitor for blocking/detection
|
- Monitor for blocking/detection
|
||||||
|
|
||||||
### Legal Compliance
|
### Legal Compliance
|
||||||
|
|
||||||
- Educational and research purposes only
|
- Educational and research purposes only
|
||||||
- Respect website terms of service
|
- Respect website terms of service
|
||||||
- Implement data retention policies
|
- Implement data retention policies
|
||||||
- Monitor for legal changes
|
- Monitor for legal changes
|
||||||
|
|
||||||
## 🧪 Testing
|
## 🧪 Testing
|
||||||
|
|
||||||
### Run Tests
|
### Run Tests
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# All tests
|
# All tests
|
||||||
npm test
|
npm test
|
||||||
|
|
||||||
# Specific test suites
|
# Specific test suites
|
||||||
npm test -- --testNamePattern="JobSearch"
|
npm test -- --testNamePattern="JobSearch"
|
||||||
npm test -- --testNamePattern="Analysis"
|
npm test -- --testNamePattern="Analysis"
|
||||||
npm test -- --testNamePattern="Trends"
|
npm test -- --testNamePattern="Trends"
|
||||||
```
|
```
|
||||||
|
|
||||||
### Test Coverage
|
### Test Coverage
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
npm run test:coverage
|
npm run test:coverage
|
||||||
```
|
```
|
||||||
|
|
||||||
## 🚀 Performance Optimization
|
## 🚀 Performance Optimization
|
||||||
|
|
||||||
### Recommended Settings
|
### Recommended Settings
|
||||||
|
|
||||||
#### Fast Analysis
|
#### Fast Analysis
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
node index.js --roles="software engineer" --locations="Toronto"
|
node index.js --roles="software engineer" --locations="Toronto"
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Comprehensive Analysis
|
#### Comprehensive Analysis
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
node index.js --trends --skills --experience="all"
|
node index.js --trends --skills --experience="all"
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Focused Intelligence
|
#### Focused Intelligence
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
node index.js --salary-min=80000 --remote="remote" --trends
|
node index.js --salary-min=80000 --remote="remote" --trends
|
||||||
```
|
```
|
||||||
|
|
||||||
### Performance Tips
|
### Performance Tips
|
||||||
|
|
||||||
- Use specific role filters to reduce data volume
|
- Use specific role filters to reduce data volume
|
||||||
- Implement caching for repeated searches
|
- Implement caching for repeated searches
|
||||||
- Use parallel processing for multiple sources
|
- Use parallel processing for multiple sources
|
||||||
- Optimize data storage and retrieval
|
- Optimize data storage and retrieval
|
||||||
|
|
||||||
## 🔧 Troubleshooting
|
## 🔧 Troubleshooting
|
||||||
|
|
||||||
### Common Issues
|
### Common Issues
|
||||||
|
|
||||||
#### Rate Limiting
|
#### Rate Limiting
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Reduce request frequency
|
# Reduce request frequency
|
||||||
export REQUEST_DELAY=2000
|
export REQUEST_DELAY=2000
|
||||||
node index.js
|
node index.js
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Data Source Issues
|
#### Data Source Issues
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Use specific sources
|
# Use specific sources
|
||||||
node index.js --sources="linkedin,indeed"
|
node index.js --sources="linkedin,indeed"
|
||||||
|
|
||||||
# Check source availability
|
# Check source availability
|
||||||
node index.js --test-sources
|
node index.js --test-sources
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Output Issues
|
#### Output Issues
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Check output directory
|
# Check output directory
|
||||||
mkdir -p results
|
mkdir -p results
|
||||||
node index.js --output=results/analysis.json
|
node index.js --output=results/analysis.json
|
||||||
|
|
||||||
# Verify file permissions
|
# Verify file permissions
|
||||||
chmod 755 results/
|
chmod 755 results/
|
||||||
```
|
```
|
||||||
|
|
||||||
## 📈 Monitoring & Analytics
|
## 📈 Monitoring & Analytics
|
||||||
|
|
||||||
### Key Metrics
|
### Key Metrics
|
||||||
|
|
||||||
- **Job Volume**: Total jobs found per search
|
- **Job Volume**: Total jobs found per search
|
||||||
- **Salary Trends**: Average and median salary changes
|
- **Salary Trends**: Average and median salary changes
|
||||||
- **Skill Demand**: Most requested skills
|
- **Skill Demand**: Most requested skills
|
||||||
- **Remote Adoption**: Remote work trend analysis
|
- **Remote Adoption**: Remote work trend analysis
|
||||||
- **Market Velocity**: Job posting frequency
|
- **Market Velocity**: Job posting frequency
|
||||||
|
|
||||||
### Dashboard Integration
|
### Dashboard Integration
|
||||||
|
|
||||||
- Real-time market monitoring
|
- Real-time market monitoring
|
||||||
- Trend visualization
|
- Trend visualization
|
||||||
- Salary benchmarking
|
- Salary benchmarking
|
||||||
- Skill gap analysis
|
- Skill gap analysis
|
||||||
- Competitive intelligence
|
- Competitive intelligence
|
||||||
|
|
||||||
## 🤝 Contributing
|
## 🤝 Contributing
|
||||||
|
|
||||||
### Development Setup
|
### Development Setup
|
||||||
|
|
||||||
1. Fork the repository
|
1. Fork the repository
|
||||||
2. Create feature branch
|
2. Create feature branch
|
||||||
3. Add tests for new functionality
|
3. Add tests for new functionality
|
||||||
4. Ensure all tests pass
|
4. Ensure all tests pass
|
||||||
5. Submit pull request
|
5. Submit pull request
|
||||||
|
|
||||||
### Code Standards
|
### Code Standards
|
||||||
|
|
||||||
- Follow existing code style
|
- Follow existing code style
|
||||||
- Add JSDoc comments
|
- Add JSDoc comments
|
||||||
- Maintain test coverage
|
- Maintain test coverage
|
||||||
- Update documentation
|
- Update documentation
|
||||||
|
|
||||||
## 📄 License
|
## 📄 License
|
||||||
|
|
||||||
This parser is part of the LinkedOut platform and follows the same licensing terms.
|
This parser is part of the LinkedOut platform and follows the same licensing terms.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
**Note**: This tool is designed for educational and research purposes. Always respect website terms of service and implement appropriate rate limiting and ethical usage practices.
|
**Note**: This tool is designed for educational and research purposes. Always respect website terms of service and implement appropriate rate limiting and ethical usage practices.
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -1,9 +1,9 @@
|
|||||||
keyword
|
keyword
|
||||||
qa automation
|
qa automation
|
||||||
automation test
|
automation test
|
||||||
sdet
|
sdet
|
||||||
qa lead
|
qa lead
|
||||||
automation lead
|
automation lead
|
||||||
playwright
|
playwright
|
||||||
cypress
|
cypress
|
||||||
quality assurance engineer
|
quality assurance engineer
|
||||||
|
@ -1,129 +1,129 @@
|
|||||||
#!/usr/bin/env node
|
#!/usr/bin/env node
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* SkipTheDrive Parser Demo
|
* SkipTheDrive Parser Demo
|
||||||
*
|
*
|
||||||
* Demonstrates the SkipTheDrive job parser functionality
|
* Demonstrates the SkipTheDrive job parser functionality
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const { parseSkipTheDrive } = require("./skipthedrive");
|
const { parseSkipTheDrive } = require("./skipthedrive");
|
||||||
const fs = require("fs");
|
const fs = require("fs");
|
||||||
const path = require("path");
|
const path = require("path");
|
||||||
const { logger } = require("../../ai-analyzer");
|
const { logger } = require("../../ai-analyzer");
|
||||||
|
|
||||||
// Load environment variables
|
// Load environment variables
|
||||||
require("dotenv").config({ path: path.join(__dirname, "..", ".env") });
|
require("dotenv").config({ path: path.join(__dirname, "..", ".env") });
|
||||||
|
|
||||||
async function runDemo() {
|
async function runDemo() {
|
||||||
logger.step("🚀 SkipTheDrive Parser Demo");
|
logger.step("🚀 SkipTheDrive Parser Demo");
|
||||||
|
|
||||||
// Demo configuration
|
// Demo configuration
|
||||||
const options = {
|
const options = {
|
||||||
// Search for QA automation jobs (from your example)
|
// Search for QA automation jobs (from your example)
|
||||||
keywords: process.env.SEARCH_KEYWORDS?.split(",").map((k) => k.trim()) || [
|
keywords: process.env.SEARCH_KEYWORDS?.split(",").map((k) => k.trim()) || [
|
||||||
"automation qa",
|
"automation qa",
|
||||||
"qa engineer",
|
"qa engineer",
|
||||||
"test automation",
|
"test automation",
|
||||||
],
|
],
|
||||||
|
|
||||||
// Job type filters - can be: "part time", "full time", "contract"
|
// Job type filters - can be: "part time", "full time", "contract"
|
||||||
jobTypes: process.env.JOB_TYPES?.split(",").map((t) => t.trim()) || [],
|
jobTypes: process.env.JOB_TYPES?.split(",").map((t) => t.trim()) || [],
|
||||||
|
|
||||||
// Location filter (optional)
|
// Location filter (optional)
|
||||||
locationFilter: process.env.LOCATION_FILTER || "",
|
locationFilter: process.env.LOCATION_FILTER || "",
|
||||||
|
|
||||||
// Maximum pages to parse
|
// Maximum pages to parse
|
||||||
maxPages: parseInt(process.env.MAX_PAGES) || 3,
|
maxPages: parseInt(process.env.MAX_PAGES) || 3,
|
||||||
|
|
||||||
// Browser headless mode
|
// Browser headless mode
|
||||||
headless: process.env.HEADLESS !== "false",
|
headless: process.env.HEADLESS !== "false",
|
||||||
|
|
||||||
// AI analysis
|
// AI analysis
|
||||||
enableAI: process.env.ENABLE_AI_ANALYSIS !== "false",
|
enableAI: process.env.ENABLE_AI_ANALYSIS !== "false",
|
||||||
aiContext: "remote QA and test automation job opportunities",
|
aiContext: "remote QA and test automation job opportunities",
|
||||||
};
|
};
|
||||||
|
|
||||||
logger.info("Configuration:");
|
logger.info("Configuration:");
|
||||||
logger.info(`- Keywords: ${options.keywords.join(", ")}`);
|
logger.info(`- Keywords: ${options.keywords.join(", ")}`);
|
||||||
logger.info(
|
logger.info(
|
||||||
`- Job Types: ${
|
`- Job Types: ${
|
||||||
options.jobTypes.length > 0 ? options.jobTypes.join(", ") : "All types"
|
options.jobTypes.length > 0 ? options.jobTypes.join(", ") : "All types"
|
||||||
}`
|
}`
|
||||||
);
|
);
|
||||||
logger.info(`- Location Filter: ${options.locationFilter || "None"}`);
|
logger.info(`- Location Filter: ${options.locationFilter || "None"}`);
|
||||||
logger.info(`- Max Pages: ${options.maxPages}`);
|
logger.info(`- Max Pages: ${options.maxPages}`);
|
||||||
logger.info(`- Headless: ${options.headless}`);
|
logger.info(`- Headless: ${options.headless}`);
|
||||||
logger.info(`- AI Analysis: ${options.enableAI}`);
|
logger.info(`- AI Analysis: ${options.enableAI}`);
|
||||||
logger.info("\nStarting parser...");
|
logger.info("\nStarting parser...");
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const startTime = Date.now();
|
const startTime = Date.now();
|
||||||
const results = await parseSkipTheDrive(options);
|
const results = await parseSkipTheDrive(options);
|
||||||
const duration = ((Date.now() - startTime) / 1000).toFixed(2);
|
const duration = ((Date.now() - startTime) / 1000).toFixed(2);
|
||||||
|
|
||||||
// Save results
|
// Save results
|
||||||
const timestamp = new Date()
|
const timestamp = new Date()
|
||||||
.toISOString()
|
.toISOString()
|
||||||
.replace(/[:.]/g, "-")
|
.replace(/[:.]/g, "-")
|
||||||
.slice(0, -5);
|
.slice(0, -5);
|
||||||
const resultsDir = path.join(__dirname, "..", "results");
|
const resultsDir = path.join(__dirname, "..", "results");
|
||||||
|
|
||||||
if (!fs.existsSync(resultsDir)) {
|
if (!fs.existsSync(resultsDir)) {
|
||||||
fs.mkdirSync(resultsDir, { recursive: true });
|
fs.mkdirSync(resultsDir, { recursive: true });
|
||||||
}
|
}
|
||||||
|
|
||||||
const resultsFile = path.join(
|
const resultsFile = path.join(
|
||||||
resultsDir,
|
resultsDir,
|
||||||
`skipthedrive-results-${timestamp}.json`
|
`skipthedrive-results-${timestamp}.json`
|
||||||
);
|
);
|
||||||
fs.writeFileSync(resultsFile, JSON.stringify(results, null, 2));
|
fs.writeFileSync(resultsFile, JSON.stringify(results, null, 2));
|
||||||
|
|
||||||
// Display summary
|
// Display summary
|
||||||
logger.step("\n📊 Parsing Summary:");
|
logger.step("\n📊 Parsing Summary:");
|
||||||
logger.info(`- Duration: ${duration} seconds`);
|
logger.info(`- Duration: ${duration} seconds`);
|
||||||
logger.info(`- Jobs Found: ${results.results.length}`);
|
logger.info(`- Jobs Found: ${results.results.length}`);
|
||||||
logger.info(`- Jobs Rejected: ${results.rejectedResults.length}`);
|
logger.info(`- Jobs Rejected: ${results.rejectedResults.length}`);
|
||||||
logger.file(`- Results saved to: ${resultsFile}`);
|
logger.file(`- Results saved to: ${resultsFile}`);
|
||||||
|
|
||||||
// Show sample results
|
// Show sample results
|
||||||
if (results.results.length > 0) {
|
if (results.results.length > 0) {
|
||||||
logger.info("\n🔍 Sample Jobs Found:");
|
logger.info("\n🔍 Sample Jobs Found:");
|
||||||
results.results.slice(0, 5).forEach((job, index) => {
|
results.results.slice(0, 5).forEach((job, index) => {
|
||||||
logger.info(`\n${index + 1}. ${job.title}`);
|
logger.info(`\n${index + 1}. ${job.title}`);
|
||||||
logger.info(` Company: ${job.company}`);
|
logger.info(` Company: ${job.company}`);
|
||||||
logger.info(` Posted: ${job.daysAgo}`);
|
logger.info(` Posted: ${job.daysAgo}`);
|
||||||
logger.info(` Featured: ${job.isFeatured ? "Yes" : "No"}`);
|
logger.info(` Featured: ${job.isFeatured ? "Yes" : "No"}`);
|
||||||
logger.info(` URL: ${job.jobUrl}`);
|
logger.info(` URL: ${job.jobUrl}`);
|
||||||
if (job.aiAnalysis) {
|
if (job.aiAnalysis) {
|
||||||
logger.ai(
|
logger.ai(
|
||||||
` AI Relevant: ${job.aiAnalysis.isRelevant ? "Yes" : "No"} (${(
|
` AI Relevant: ${job.aiAnalysis.isRelevant ? "Yes" : "No"} (${(
|
||||||
job.aiAnalysis.confidence * 100
|
job.aiAnalysis.confidence * 100
|
||||||
).toFixed(0)}% confidence)`
|
).toFixed(0)}% confidence)`
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Show rejection reasons
|
// Show rejection reasons
|
||||||
if (results.rejectedResults.length > 0) {
|
if (results.rejectedResults.length > 0) {
|
||||||
const rejectionReasons = {};
|
const rejectionReasons = {};
|
||||||
results.rejectedResults.forEach((job) => {
|
results.rejectedResults.forEach((job) => {
|
||||||
rejectionReasons[job.reason] = (rejectionReasons[job.reason] || 0) + 1;
|
rejectionReasons[job.reason] = (rejectionReasons[job.reason] || 0) + 1;
|
||||||
});
|
});
|
||||||
|
|
||||||
logger.info("\n❌ Rejection Reasons:");
|
logger.info("\n❌ Rejection Reasons:");
|
||||||
Object.entries(rejectionReasons).forEach(([reason, count]) => {
|
Object.entries(rejectionReasons).forEach(([reason, count]) => {
|
||||||
logger.info(` ${reason}: ${count}`);
|
logger.info(` ${reason}: ${count}`);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error("\n❌ Demo failed:", error.message);
|
logger.error("\n❌ Demo failed:", error.message);
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run the demo
|
// Run the demo
|
||||||
runDemo().catch((err) => {
|
runDemo().catch((err) => {
|
||||||
logger.error("Fatal error:", err);
|
logger.error("Fatal error:", err);
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
});
|
});
|
||||||
|
|||||||
@ -1,332 +1,332 @@
|
|||||||
/**
|
/**
|
||||||
* SkipTheDrive Job Parser
|
* SkipTheDrive Job Parser
|
||||||
*
|
*
|
||||||
* Parses remote job listings from SkipTheDrive.com
|
* Parses remote job listings from SkipTheDrive.com
|
||||||
* Supports keyword search, job type filters, and pagination
|
* Supports keyword search, job type filters, and pagination
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const { chromium } = require("playwright");
|
const { chromium } = require("playwright");
|
||||||
const path = require("path");
|
const path = require("path");
|
||||||
|
|
||||||
// Import from ai-analyzer core package
|
// Import from ai-analyzer core package
|
||||||
const {
|
const {
|
||||||
logger,
|
logger,
|
||||||
cleanText,
|
cleanText,
|
||||||
containsAnyKeyword,
|
containsAnyKeyword,
|
||||||
parseLocationFilters,
|
parseLocationFilters,
|
||||||
validateLocationAgainstFilters,
|
validateLocationAgainstFilters,
|
||||||
extractLocationFromProfile,
|
extractLocationFromProfile,
|
||||||
analyzeBatch,
|
analyzeBatch,
|
||||||
checkOllamaStatus,
|
checkOllamaStatus,
|
||||||
} = require("../../ai-analyzer");
|
} = require("../../ai-analyzer");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Build search URL for SkipTheDrive
|
* Build search URL for SkipTheDrive
|
||||||
* @param {string} keyword - Search keyword
|
* @param {string} keyword - Search keyword
|
||||||
* @param {string} orderBy - Sort order (date, relevance)
|
* @param {string} orderBy - Sort order (date, relevance)
|
||||||
* @param {Array<string>} jobTypes - Job types to filter (part time, full time, contract)
|
* @param {Array<string>} jobTypes - Job types to filter (part time, full time, contract)
|
||||||
* @returns {string} - Formatted search URL
|
* @returns {string} - Formatted search URL
|
||||||
*/
|
*/
|
||||||
function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
|
function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
|
||||||
let url = `https://www.skipthedrive.com/?s=${encodeURIComponent(keyword)}`;
|
let url = `https://www.skipthedrive.com/?s=${encodeURIComponent(keyword)}`;
|
||||||
|
|
||||||
if (orderBy) {
|
if (orderBy) {
|
||||||
url += `&orderby=${orderBy}`;
|
url += `&orderby=${orderBy}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add job type filters
|
// Add job type filters
|
||||||
jobTypes.forEach((type) => {
|
jobTypes.forEach((type) => {
|
||||||
url += `&jobtype=${encodeURIComponent(type)}`;
|
url += `&jobtype=${encodeURIComponent(type)}`;
|
||||||
});
|
});
|
||||||
|
|
||||||
return url;
|
return url;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract job data from a single job listing element
|
* Extract job data from a single job listing element
|
||||||
* @param {Element} article - Job listing DOM element
|
* @param {Element} article - Job listing DOM element
|
||||||
* @returns {Object} - Extracted job data
|
* @returns {Object} - Extracted job data
|
||||||
*/
|
*/
|
||||||
async function extractJobData(article) {
|
async function extractJobData(article) {
|
||||||
try {
|
try {
|
||||||
// Extract job title and URL
|
// Extract job title and URL
|
||||||
const titleElement = await article.$("h2.post-title a");
|
const titleElement = await article.$("h2.post-title a");
|
||||||
const title = titleElement ? await titleElement.textContent() : "";
|
const title = titleElement ? await titleElement.textContent() : "";
|
||||||
const jobUrl = titleElement ? await titleElement.getAttribute("href") : "";
|
const jobUrl = titleElement ? await titleElement.getAttribute("href") : "";
|
||||||
|
|
||||||
// Extract date
|
// Extract date
|
||||||
const dateElement = await article.$("time.post-date");
|
const dateElement = await article.$("time.post-date");
|
||||||
const datePosted = dateElement
|
const datePosted = dateElement
|
||||||
? await dateElement.getAttribute("datetime")
|
? await dateElement.getAttribute("datetime")
|
||||||
: "";
|
: "";
|
||||||
const dateText = dateElement ? await dateElement.textContent() : "";
|
const dateText = dateElement ? await dateElement.textContent() : "";
|
||||||
|
|
||||||
// Extract company name
|
// Extract company name
|
||||||
const companyElement = await article.$(
|
const companyElement = await article.$(
|
||||||
".custom_fields_company_name_display_search_results"
|
".custom_fields_company_name_display_search_results"
|
||||||
);
|
);
|
||||||
let company = companyElement ? await companyElement.textContent() : "";
|
let company = companyElement ? await companyElement.textContent() : "";
|
||||||
company = company.replace(/^\s*[^\s]+\s*/, "").trim(); // Remove icon
|
company = company.replace(/^\s*[^\s]+\s*/, "").trim(); // Remove icon
|
||||||
|
|
||||||
// Extract days ago
|
// Extract days ago
|
||||||
const daysAgoElement = await article.$(
|
const daysAgoElement = await article.$(
|
||||||
".custom_fields_job_date_display_search_results"
|
".custom_fields_job_date_display_search_results"
|
||||||
);
|
);
|
||||||
let daysAgo = daysAgoElement ? await daysAgoElement.textContent() : "";
|
let daysAgo = daysAgoElement ? await daysAgoElement.textContent() : "";
|
||||||
daysAgo = daysAgo.replace(/^\s*[^\s]+\s*/, "").trim(); // Remove icon
|
daysAgo = daysAgo.replace(/^\s*[^\s]+\s*/, "").trim(); // Remove icon
|
||||||
|
|
||||||
// Extract job description excerpt
|
// Extract job description excerpt
|
||||||
const excerptElement = await article.$(".excerpt_part");
|
const excerptElement = await article.$(".excerpt_part");
|
||||||
const description = excerptElement
|
const description = excerptElement
|
||||||
? await excerptElement.textContent()
|
? await excerptElement.textContent()
|
||||||
: "";
|
: "";
|
||||||
|
|
||||||
// Check if featured/sponsored
|
// Check if featured/sponsored
|
||||||
const featuredElement = await article.$(".custom_fields_sponsored_job");
|
const featuredElement = await article.$(".custom_fields_sponsored_job");
|
||||||
const isFeatured = !!featuredElement;
|
const isFeatured = !!featuredElement;
|
||||||
|
|
||||||
// Extract job ID from article ID
|
// Extract job ID from article ID
|
||||||
const articleId = await article.getAttribute("id");
|
const articleId = await article.getAttribute("id");
|
||||||
const jobId = articleId ? articleId.replace("post-", "") : "";
|
const jobId = articleId ? articleId.replace("post-", "") : "";
|
||||||
|
|
||||||
return {
|
return {
|
||||||
jobId,
|
jobId,
|
||||||
title: cleanText(title),
|
title: cleanText(title),
|
||||||
company: cleanText(company),
|
company: cleanText(company),
|
||||||
jobUrl,
|
jobUrl,
|
||||||
datePosted,
|
datePosted,
|
||||||
dateText: cleanText(dateText),
|
dateText: cleanText(dateText),
|
||||||
daysAgo: cleanText(daysAgo),
|
daysAgo: cleanText(daysAgo),
|
||||||
description: cleanText(description),
|
description: cleanText(description),
|
||||||
isFeatured,
|
isFeatured,
|
||||||
source: "skipthedrive",
|
source: "skipthedrive",
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(`Error extracting job data: ${error.message}`);
|
logger.error(`Error extracting job data: ${error.message}`);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse SkipTheDrive job listings
|
* Parse SkipTheDrive job listings
|
||||||
* @param {Object} options - Parser options
|
* @param {Object} options - Parser options
|
||||||
* @returns {Promise<Array>} - Array of parsed job listings
|
* @returns {Promise<Array>} - Array of parsed job listings
|
||||||
*/
|
*/
|
||||||
async function parseSkipTheDrive(options = {}) {
|
async function parseSkipTheDrive(options = {}) {
|
||||||
const {
|
const {
|
||||||
keywords = process.env.SEARCH_KEYWORDS?.split(",").map((k) => k.trim()) || [
|
keywords = process.env.SEARCH_KEYWORDS?.split(",").map((k) => k.trim()) || [
|
||||||
"software engineer",
|
"software engineer",
|
||||||
"developer",
|
"developer",
|
||||||
],
|
],
|
||||||
jobTypes = process.env.JOB_TYPES?.split(",").map((t) => t.trim()) || [],
|
jobTypes = process.env.JOB_TYPES?.split(",").map((t) => t.trim()) || [],
|
||||||
locationFilter = process.env.LOCATION_FILTER || "",
|
locationFilter = process.env.LOCATION_FILTER || "",
|
||||||
maxPages = parseInt(process.env.MAX_PAGES) || 5,
|
maxPages = parseInt(process.env.MAX_PAGES) || 5,
|
||||||
headless = process.env.HEADLESS !== "false",
|
headless = process.env.HEADLESS !== "false",
|
||||||
enableAI = process.env.ENABLE_AI_ANALYSIS === "true",
|
enableAI = process.env.ENABLE_AI_ANALYSIS === "true",
|
||||||
aiContext = process.env.AI_CONTEXT || "remote job opportunities analysis",
|
aiContext = process.env.AI_CONTEXT || "remote job opportunities analysis",
|
||||||
} = options;
|
} = options;
|
||||||
|
|
||||||
logger.step("Starting SkipTheDrive parser...");
|
logger.step("Starting SkipTheDrive parser...");
|
||||||
logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
|
logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
|
||||||
logger.info(
|
logger.info(
|
||||||
`📋 Job Types: ${jobTypes.length > 0 ? jobTypes.join(", ") : "All"}`
|
`📋 Job Types: ${jobTypes.length > 0 ? jobTypes.join(", ") : "All"}`
|
||||||
);
|
);
|
||||||
logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
|
logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
|
||||||
logger.info(`📄 Max Pages: ${maxPages}`);
|
logger.info(`📄 Max Pages: ${maxPages}`);
|
||||||
|
|
||||||
const browser = await chromium.launch({
|
const browser = await chromium.launch({
|
||||||
headless,
|
headless,
|
||||||
args: [
|
args: [
|
||||||
"--no-sandbox",
|
"--no-sandbox",
|
||||||
"--disable-setuid-sandbox",
|
"--disable-setuid-sandbox",
|
||||||
"--disable-dev-shm-usage",
|
"--disable-dev-shm-usage",
|
||||||
],
|
],
|
||||||
});
|
});
|
||||||
|
|
||||||
const context = await browser.newContext({
|
const context = await browser.newContext({
|
||||||
userAgent:
|
userAgent:
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
||||||
});
|
});
|
||||||
|
|
||||||
const results = [];
|
const results = [];
|
||||||
const rejectedResults = [];
|
const rejectedResults = [];
|
||||||
const seenJobs = new Set();
|
const seenJobs = new Set();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Search for each keyword
|
// Search for each keyword
|
||||||
for (const keyword of keywords) {
|
for (const keyword of keywords) {
|
||||||
logger.info(`\n🔍 Searching for: ${keyword}`);
|
logger.info(`\n🔍 Searching for: ${keyword}`);
|
||||||
|
|
||||||
const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
|
const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
|
||||||
const page = await context.newPage();
|
const page = await context.newPage();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
logger.info(
|
logger.info(
|
||||||
`Attempting navigation to: ${searchUrl} at ${new Date().toISOString()}`
|
`Attempting navigation to: ${searchUrl} at ${new Date().toISOString()}`
|
||||||
);
|
);
|
||||||
await page.goto(searchUrl, {
|
await page.goto(searchUrl, {
|
||||||
waitUntil: "domcontentloaded",
|
waitUntil: "domcontentloaded",
|
||||||
timeout: 30000,
|
timeout: 30000,
|
||||||
});
|
});
|
||||||
logger.info(
|
logger.info(
|
||||||
`Navigation completed successfully at ${new Date().toISOString()}`
|
`Navigation completed successfully at ${new Date().toISOString()}`
|
||||||
);
|
);
|
||||||
|
|
||||||
// Wait for job listings to load
|
// Wait for job listings to load
|
||||||
logger.info("Waiting for selector #loops-wrapper");
|
logger.info("Waiting for selector #loops-wrapper");
|
||||||
await page
|
await page
|
||||||
.waitForSelector("#loops-wrapper", { timeout: 5000 })
|
.waitForSelector("#loops-wrapper", { timeout: 5000 })
|
||||||
.catch(() => {
|
.catch(() => {
|
||||||
logger.warning(`No results found for keyword: ${keyword}`);
|
logger.warning(`No results found for keyword: ${keyword}`);
|
||||||
});
|
});
|
||||||
logger.info("Selector wait completed");
|
logger.info("Selector wait completed");
|
||||||
|
|
||||||
let currentPage = 1;
|
let currentPage = 1;
|
||||||
let hasNextPage = true;
|
let hasNextPage = true;
|
||||||
|
|
||||||
while (hasNextPage && currentPage <= maxPages) {
|
while (hasNextPage && currentPage <= maxPages) {
|
||||||
logger.info(`📄 Processing page ${currentPage} for "${keyword}"`);
|
logger.info(`📄 Processing page ${currentPage} for "${keyword}"`);
|
||||||
|
|
||||||
// Extract all job articles on current page
|
// Extract all job articles on current page
|
||||||
const jobArticles = await page.$$("article[id^='post-']");
|
const jobArticles = await page.$$("article[id^='post-']");
|
||||||
logger.info(
|
logger.info(
|
||||||
`Found ${jobArticles.length} job listings on page ${currentPage}`
|
`Found ${jobArticles.length} job listings on page ${currentPage}`
|
||||||
);
|
);
|
||||||
|
|
||||||
for (const article of jobArticles) {
|
for (const article of jobArticles) {
|
||||||
const jobData = await extractJobData(article);
|
const jobData = await extractJobData(article);
|
||||||
|
|
||||||
if (!jobData || seenJobs.has(jobData.jobId)) {
|
if (!jobData || seenJobs.has(jobData.jobId)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
seenJobs.add(jobData.jobId);
|
seenJobs.add(jobData.jobId);
|
||||||
|
|
||||||
// Add keyword that found this job
|
// Add keyword that found this job
|
||||||
jobData.searchKeyword = keyword;
|
jobData.searchKeyword = keyword;
|
||||||
|
|
||||||
// Validate job against keywords
|
// Validate job against keywords
|
||||||
const fullText = `${jobData.title} ${jobData.description} ${jobData.company}`;
|
const fullText = `${jobData.title} ${jobData.description} ${jobData.company}`;
|
||||||
if (!containsAnyKeyword(fullText, keywords)) {
|
if (!containsAnyKeyword(fullText, keywords)) {
|
||||||
rejectedResults.push({
|
rejectedResults.push({
|
||||||
...jobData,
|
...jobData,
|
||||||
rejected: true,
|
rejected: true,
|
||||||
reason: "Keywords not found in job listing",
|
reason: "Keywords not found in job listing",
|
||||||
});
|
});
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Location validation (if enabled)
|
// Location validation (if enabled)
|
||||||
if (locationFilter) {
|
if (locationFilter) {
|
||||||
const locationFilters = parseLocationFilters(locationFilter);
|
const locationFilters = parseLocationFilters(locationFilter);
|
||||||
// For SkipTheDrive, most jobs are remote, but we can check the title/description
|
// For SkipTheDrive, most jobs are remote, but we can check the title/description
|
||||||
const locationValid =
|
const locationValid =
|
||||||
fullText.toLowerCase().includes("remote") ||
|
fullText.toLowerCase().includes("remote") ||
|
||||||
locationFilters.some((filter) =>
|
locationFilters.some((filter) =>
|
||||||
fullText.toLowerCase().includes(filter.toLowerCase())
|
fullText.toLowerCase().includes(filter.toLowerCase())
|
||||||
);
|
);
|
||||||
|
|
||||||
if (!locationValid) {
|
if (!locationValid) {
|
||||||
rejectedResults.push({
|
rejectedResults.push({
|
||||||
...jobData,
|
...jobData,
|
||||||
rejected: true,
|
rejected: true,
|
||||||
reason: "Location requirements not met",
|
reason: "Location requirements not met",
|
||||||
});
|
});
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
jobData.locationValid = locationValid;
|
jobData.locationValid = locationValid;
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.success(`✅ Found: ${jobData.title} at ${jobData.company}`);
|
logger.success(`✅ Found: ${jobData.title} at ${jobData.company}`);
|
||||||
results.push(jobData);
|
results.push(jobData);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for next page
|
// Check for next page
|
||||||
const nextPageLink = await page.$("a.nextp");
|
const nextPageLink = await page.$("a.nextp");
|
||||||
if (nextPageLink && currentPage < maxPages) {
|
if (nextPageLink && currentPage < maxPages) {
|
||||||
logger.info("📄 Moving to next page...");
|
logger.info("📄 Moving to next page...");
|
||||||
await nextPageLink.click();
|
await nextPageLink.click();
|
||||||
await page.waitForLoadState("domcontentloaded");
|
await page.waitForLoadState("domcontentloaded");
|
||||||
await page.waitForTimeout(2000); // Wait for content to load
|
await page.waitForTimeout(2000); // Wait for content to load
|
||||||
currentPage++;
|
currentPage++;
|
||||||
} else {
|
} else {
|
||||||
hasNextPage = false;
|
hasNextPage = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(`Error processing keyword "${keyword}": ${error.message}`);
|
logger.error(`Error processing keyword "${keyword}": ${error.message}`);
|
||||||
} finally {
|
} finally {
|
||||||
await page.close();
|
await page.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.success(`\n✅ Parsing complete!`);
|
logger.success(`\n✅ Parsing complete!`);
|
||||||
logger.info(`📊 Total jobs found: ${results.length}`);
|
logger.info(`📊 Total jobs found: ${results.length}`);
|
||||||
logger.info(`❌ Rejected jobs: ${rejectedResults.length}`);
|
logger.info(`❌ Rejected jobs: ${rejectedResults.length}`);
|
||||||
|
|
||||||
// Run AI analysis if enabled
|
// Run AI analysis if enabled
|
||||||
let aiAnalysis = null;
|
let aiAnalysis = null;
|
||||||
if (enableAI && results.length > 0) {
|
if (enableAI && results.length > 0) {
|
||||||
logger.step("Running AI analysis on job listings...");
|
logger.step("Running AI analysis on job listings...");
|
||||||
|
|
||||||
const aiAvailable = await checkOllamaStatus();
|
const aiAvailable = await checkOllamaStatus();
|
||||||
if (aiAvailable) {
|
if (aiAvailable) {
|
||||||
const analysisData = results.map((job) => ({
|
const analysisData = results.map((job) => ({
|
||||||
text: `${job.title} at ${job.company}. ${job.description}`,
|
text: `${job.title} at ${job.company}. ${job.description}`,
|
||||||
metadata: {
|
metadata: {
|
||||||
jobId: job.jobId,
|
jobId: job.jobId,
|
||||||
company: job.company,
|
company: job.company,
|
||||||
daysAgo: job.daysAgo,
|
daysAgo: job.daysAgo,
|
||||||
},
|
},
|
||||||
}));
|
}));
|
||||||
|
|
||||||
aiAnalysis = await analyzeBatch(analysisData, aiContext);
|
aiAnalysis = await analyzeBatch(analysisData, aiContext);
|
||||||
|
|
||||||
// Merge AI analysis with results
|
// Merge AI analysis with results
|
||||||
results.forEach((job, index) => {
|
results.forEach((job, index) => {
|
||||||
if (aiAnalysis && aiAnalysis[index]) {
|
if (aiAnalysis && aiAnalysis[index]) {
|
||||||
job.aiAnalysis = {
|
job.aiAnalysis = {
|
||||||
isRelevant: aiAnalysis[index].isRelevant,
|
isRelevant: aiAnalysis[index].isRelevant,
|
||||||
confidence: aiAnalysis[index].confidence,
|
confidence: aiAnalysis[index].confidence,
|
||||||
reasoning: aiAnalysis[index].reasoning,
|
reasoning: aiAnalysis[index].reasoning,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
logger.success("✅ AI analysis completed");
|
logger.success("✅ AI analysis completed");
|
||||||
} else {
|
} else {
|
||||||
logger.warning("⚠️ AI not available - skipping analysis");
|
logger.warning("⚠️ AI not available - skipping analysis");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
results,
|
results,
|
||||||
rejectedResults,
|
rejectedResults,
|
||||||
metadata: {
|
metadata: {
|
||||||
source: "skipthedrive",
|
source: "skipthedrive",
|
||||||
totalJobs: results.length,
|
totalJobs: results.length,
|
||||||
rejectedJobs: rejectedResults.length,
|
rejectedJobs: rejectedResults.length,
|
||||||
keywords: keywords,
|
keywords: keywords,
|
||||||
jobTypes: jobTypes,
|
jobTypes: jobTypes,
|
||||||
locationFilter: locationFilter,
|
locationFilter: locationFilter,
|
||||||
aiAnalysisEnabled: enableAI,
|
aiAnalysisEnabled: enableAI,
|
||||||
aiAnalysisCompleted: !!aiAnalysis,
|
aiAnalysisCompleted: !!aiAnalysis,
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(`Fatal error in SkipTheDrive parser: ${error.message}`);
|
logger.error(`Fatal error in SkipTheDrive parser: ${error.message}`);
|
||||||
throw error;
|
throw error;
|
||||||
} finally {
|
} finally {
|
||||||
await browser.close();
|
await browser.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Export the parser
|
// Export the parser
|
||||||
module.exports = {
|
module.exports = {
|
||||||
parseSkipTheDrive,
|
parseSkipTheDrive,
|
||||||
buildSearchUrl,
|
buildSearchUrl,
|
||||||
extractJobData,
|
extractJobData,
|
||||||
};
|
};
|
||||||
|
|||||||
@ -1,302 +1,302 @@
|
|||||||
/**
|
/**
|
||||||
* SkipTheDrive Parsing Strategy
|
* SkipTheDrive Parsing Strategy
|
||||||
*
|
*
|
||||||
* Uses core-parser for browser management and ai-analyzer for utilities
|
* Uses core-parser for browser management and ai-analyzer for utilities
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const {
|
const {
|
||||||
logger,
|
logger,
|
||||||
cleanText,
|
cleanText,
|
||||||
containsAnyKeyword,
|
containsAnyKeyword,
|
||||||
validateLocationAgainstFilters,
|
validateLocationAgainstFilters,
|
||||||
} = require("ai-analyzer");
|
} = require("ai-analyzer");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* SkipTheDrive URL builder
|
* SkipTheDrive URL builder
|
||||||
*/
|
*/
|
||||||
function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
|
function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) {
|
||||||
const baseUrl = "https://www.skipthedrive.com/";
|
const baseUrl = "https://www.skipthedrive.com/";
|
||||||
const params = new URLSearchParams({
|
const params = new URLSearchParams({
|
||||||
s: keyword,
|
s: keyword,
|
||||||
orderby: orderBy,
|
orderby: orderBy,
|
||||||
});
|
});
|
||||||
|
|
||||||
if (jobTypes && jobTypes.length > 0) {
|
if (jobTypes && jobTypes.length > 0) {
|
||||||
params.append("job_type", jobTypes.join(","));
|
params.append("job_type", jobTypes.join(","));
|
||||||
}
|
}
|
||||||
|
|
||||||
return `${baseUrl}?${params.toString()}`;
|
return `${baseUrl}?${params.toString()}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* SkipTheDrive parsing strategy function
|
* SkipTheDrive parsing strategy function
|
||||||
*/
|
*/
|
||||||
async function skipthedriveStrategy(coreParser, options = {}) {
|
async function skipthedriveStrategy(coreParser, options = {}) {
|
||||||
const {
|
const {
|
||||||
keywords = ["software engineer", "developer", "programmer"],
|
keywords = ["software engineer", "developer", "programmer"],
|
||||||
locationFilter = null,
|
locationFilter = null,
|
||||||
maxPages = 5,
|
maxPages = 5,
|
||||||
jobTypes = [],
|
jobTypes = [],
|
||||||
} = options;
|
} = options;
|
||||||
|
|
||||||
const results = [];
|
const results = [];
|
||||||
const rejectedResults = [];
|
const rejectedResults = [];
|
||||||
const seenJobs = new Set();
|
const seenJobs = new Set();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Create main page
|
// Create main page
|
||||||
const page = await coreParser.createPage("skipthedrive-main");
|
const page = await coreParser.createPage("skipthedrive-main");
|
||||||
|
|
||||||
logger.info("🚀 Starting SkipTheDrive parser...");
|
logger.info("🚀 Starting SkipTheDrive parser...");
|
||||||
logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
|
logger.info(`🔍 Keywords: ${keywords.join(", ")}`);
|
||||||
logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
|
logger.info(`📍 Location Filter: ${locationFilter || "None"}`);
|
||||||
logger.info(`📄 Max Pages: ${maxPages}`);
|
logger.info(`📄 Max Pages: ${maxPages}`);
|
||||||
|
|
||||||
// Search for each keyword
|
// Search for each keyword
|
||||||
for (const keyword of keywords) {
|
for (const keyword of keywords) {
|
||||||
logger.info(`\n🔍 Searching for: ${keyword}`);
|
logger.info(`\n🔍 Searching for: ${keyword}`);
|
||||||
|
|
||||||
const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
|
const searchUrl = buildSearchUrl(keyword, "date", jobTypes);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Navigate to search results
|
// Navigate to search results
|
||||||
await coreParser.navigateTo(searchUrl, {
|
await coreParser.navigateTo(searchUrl, {
|
||||||
pageId: "skipthedrive-main",
|
pageId: "skipthedrive-main",
|
||||||
retries: 2,
|
retries: 2,
|
||||||
timeout: 30000,
|
timeout: 30000,
|
||||||
});
|
});
|
||||||
|
|
||||||
// Wait for job listings to load
|
// Wait for job listings to load
|
||||||
const hasResults = await coreParser
|
const hasResults = await coreParser
|
||||||
.waitForSelector(
|
.waitForSelector(
|
||||||
"#loops-wrapper",
|
"#loops-wrapper",
|
||||||
{
|
{
|
||||||
timeout: 5000,
|
timeout: 5000,
|
||||||
},
|
},
|
||||||
"skipthedrive-main"
|
"skipthedrive-main"
|
||||||
)
|
)
|
||||||
.catch(() => {
|
.catch(() => {
|
||||||
logger.warning(`No results found for keyword: ${keyword}`);
|
logger.warning(`No results found for keyword: ${keyword}`);
|
||||||
return false;
|
return false;
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!hasResults) {
|
if (!hasResults) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process multiple pages
|
// Process multiple pages
|
||||||
let currentPage = 1;
|
let currentPage = 1;
|
||||||
let hasNextPage = true;
|
let hasNextPage = true;
|
||||||
|
|
||||||
while (hasNextPage && currentPage <= maxPages) {
|
while (hasNextPage && currentPage <= maxPages) {
|
||||||
logger.info(`📄 Processing page ${currentPage} for "${keyword}"`);
|
logger.info(`📄 Processing page ${currentPage} for "${keyword}"`);
|
||||||
|
|
||||||
// Extract jobs from current page
|
// Extract jobs from current page
|
||||||
const pageJobs = await extractJobsFromPage(
|
const pageJobs = await extractJobsFromPage(
|
||||||
page,
|
page,
|
||||||
keyword,
|
keyword,
|
||||||
locationFilter
|
locationFilter
|
||||||
);
|
);
|
||||||
|
|
||||||
for (const job of pageJobs) {
|
for (const job of pageJobs) {
|
||||||
// Skip duplicates
|
// Skip duplicates
|
||||||
if (seenJobs.has(job.jobId)) continue;
|
if (seenJobs.has(job.jobId)) continue;
|
||||||
seenJobs.add(job.jobId);
|
seenJobs.add(job.jobId);
|
||||||
|
|
||||||
// Validate location if filtering enabled
|
// Validate location if filtering enabled
|
||||||
if (locationFilter) {
|
if (locationFilter) {
|
||||||
const locationValid = validateLocationAgainstFilters(
|
const locationValid = validateLocationAgainstFilters(
|
||||||
job.location,
|
job.location,
|
||||||
locationFilter
|
locationFilter
|
||||||
);
|
);
|
||||||
|
|
||||||
if (!locationValid) {
|
if (!locationValid) {
|
||||||
rejectedResults.push({
|
rejectedResults.push({
|
||||||
...job,
|
...job,
|
||||||
rejectionReason: "Location filter mismatch",
|
rejectionReason: "Location filter mismatch",
|
||||||
});
|
});
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
results.push(job);
|
results.push(job);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for next page
|
// Check for next page
|
||||||
hasNextPage = await hasNextPageAvailable(page);
|
hasNextPage = await hasNextPageAvailable(page);
|
||||||
if (hasNextPage && currentPage < maxPages) {
|
if (hasNextPage && currentPage < maxPages) {
|
||||||
await navigateToNextPage(page, currentPage + 1);
|
await navigateToNextPage(page, currentPage + 1);
|
||||||
currentPage++;
|
currentPage++;
|
||||||
|
|
||||||
// Wait for new page to load
|
// Wait for new page to load
|
||||||
await page.waitForTimeout(2000);
|
await page.waitForTimeout(2000);
|
||||||
} else {
|
} else {
|
||||||
hasNextPage = false;
|
hasNextPage = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(`Error processing keyword "${keyword}": ${error.message}`);
|
logger.error(`Error processing keyword "${keyword}": ${error.message}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
`🎯 SkipTheDrive parsing completed: ${results.length} jobs found, ${rejectedResults.length} rejected`
|
`🎯 SkipTheDrive parsing completed: ${results.length} jobs found, ${rejectedResults.length} rejected`
|
||||||
);
|
);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
results,
|
results,
|
||||||
rejectedResults,
|
rejectedResults,
|
||||||
summary: {
|
summary: {
|
||||||
totalJobs: results.length,
|
totalJobs: results.length,
|
||||||
totalRejected: rejectedResults.length,
|
totalRejected: rejectedResults.length,
|
||||||
keywords: keywords.join(", "),
|
keywords: keywords.join(", "),
|
||||||
locationFilter,
|
locationFilter,
|
||||||
source: "skipthedrive",
|
source: "skipthedrive",
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(`❌ SkipTheDrive parsing failed: ${error.message}`);
|
logger.error(`❌ SkipTheDrive parsing failed: ${error.message}`);
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract jobs from current page
|
* Extract jobs from current page
|
||||||
*/
|
*/
|
||||||
async function extractJobsFromPage(page, keyword, locationFilter) {
|
async function extractJobsFromPage(page, keyword, locationFilter) {
|
||||||
const jobs = [];
|
const jobs = [];
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Get all job article elements
|
// Get all job article elements
|
||||||
const jobElements = await page.$$("article.job_listing");
|
const jobElements = await page.$$("article.job_listing");
|
||||||
|
|
||||||
for (const jobElement of jobElements) {
|
for (const jobElement of jobElements) {
|
||||||
try {
|
try {
|
||||||
const job = await extractJobData(jobElement, keyword);
|
const job = await extractJobData(jobElement, keyword);
|
||||||
if (job) {
|
if (job) {
|
||||||
jobs.push(job);
|
jobs.push(job);
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.warning(`Failed to extract job data: ${error.message}`);
|
logger.warning(`Failed to extract job data: ${error.message}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(`Failed to extract jobs from page: ${error.message}`);
|
logger.error(`Failed to extract jobs from page: ${error.message}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
return jobs;
|
return jobs;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract data from individual job element
|
* Extract data from individual job element
|
||||||
*/
|
*/
|
||||||
async function extractJobData(jobElement, keyword) {
|
async function extractJobData(jobElement, keyword) {
|
||||||
try {
|
try {
|
||||||
// Extract job ID
|
// Extract job ID
|
||||||
const articleId = (await jobElement.getAttribute("id")) || "";
|
const articleId = (await jobElement.getAttribute("id")) || "";
|
||||||
const jobId = articleId ? articleId.replace("post-", "") : "";
|
const jobId = articleId ? articleId.replace("post-", "") : "";
|
||||||
|
|
||||||
// Extract title
|
// Extract title
|
||||||
const titleElement = await jobElement.$(".job_listing-title a");
|
const titleElement = await jobElement.$(".job_listing-title a");
|
||||||
const title = titleElement
|
const title = titleElement
|
||||||
? cleanText(await titleElement.textContent())
|
? cleanText(await titleElement.textContent())
|
||||||
: "";
|
: "";
|
||||||
const jobUrl = titleElement ? await titleElement.getAttribute("href") : "";
|
const jobUrl = titleElement ? await titleElement.getAttribute("href") : "";
|
||||||
|
|
||||||
// Extract company
|
// Extract company
|
||||||
const companyElement = await jobElement.$(".company");
|
const companyElement = await jobElement.$(".company");
|
||||||
const company = companyElement
|
const company = companyElement
|
||||||
? cleanText(await companyElement.textContent())
|
? cleanText(await companyElement.textContent())
|
||||||
: "";
|
: "";
|
||||||
|
|
||||||
// Extract location
|
// Extract location
|
||||||
const locationElement = await jobElement.$(".location");
|
const locationElement = await jobElement.$(".location");
|
||||||
const location = locationElement
|
const location = locationElement
|
||||||
? cleanText(await locationElement.textContent())
|
? cleanText(await locationElement.textContent())
|
||||||
: "";
|
: "";
|
||||||
|
|
||||||
// Extract date posted
|
// Extract date posted
|
||||||
const dateElement = await jobElement.$(".job-date");
|
const dateElement = await jobElement.$(".job-date");
|
||||||
const dateText = dateElement
|
const dateText = dateElement
|
||||||
? cleanText(await dateElement.textContent())
|
? cleanText(await dateElement.textContent())
|
||||||
: "";
|
: "";
|
||||||
|
|
||||||
// Extract description
|
// Extract description
|
||||||
const descElement = await jobElement.$(".job_listing-description");
|
const descElement = await jobElement.$(".job_listing-description");
|
||||||
const description = descElement
|
const description = descElement
|
||||||
? cleanText(await descElement.textContent())
|
? cleanText(await descElement.textContent())
|
||||||
: "";
|
: "";
|
||||||
|
|
||||||
// Check if featured
|
// Check if featured
|
||||||
const featuredElement = await jobElement.$(".featured");
|
const featuredElement = await jobElement.$(".featured");
|
||||||
const isFeatured = featuredElement !== null;
|
const isFeatured = featuredElement !== null;
|
||||||
|
|
||||||
// Parse date
|
// Parse date
|
||||||
let datePosted = null;
|
let datePosted = null;
|
||||||
let daysAgo = null;
|
let daysAgo = null;
|
||||||
|
|
||||||
if (dateText) {
|
if (dateText) {
|
||||||
const match = dateText.match(/(\d+)\s+days?\s+ago/);
|
const match = dateText.match(/(\d+)\s+days?\s+ago/);
|
||||||
if (match) {
|
if (match) {
|
||||||
daysAgo = parseInt(match[1]);
|
daysAgo = parseInt(match[1]);
|
||||||
const date = new Date();
|
const date = new Date();
|
||||||
date.setDate(date.getDate() - daysAgo);
|
date.setDate(date.getDate() - daysAgo);
|
||||||
datePosted = date.toISOString().split("T")[0];
|
datePosted = date.toISOString().split("T")[0];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
jobId,
|
jobId,
|
||||||
title,
|
title,
|
||||||
company,
|
company,
|
||||||
location,
|
location,
|
||||||
jobUrl,
|
jobUrl,
|
||||||
datePosted,
|
datePosted,
|
||||||
dateText,
|
dateText,
|
||||||
daysAgo,
|
daysAgo,
|
||||||
description,
|
description,
|
||||||
isFeatured,
|
isFeatured,
|
||||||
keyword,
|
keyword,
|
||||||
extractedAt: new Date().toISOString(),
|
extractedAt: new Date().toISOString(),
|
||||||
source: "skipthedrive",
|
source: "skipthedrive",
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.warning(`Error extracting job data: ${error.message}`);
|
logger.warning(`Error extracting job data: ${error.message}`);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if next page is available
|
* Check if next page is available
|
||||||
*/
|
*/
|
||||||
async function hasNextPageAvailable(page) {
|
async function hasNextPageAvailable(page) {
|
||||||
try {
|
try {
|
||||||
const nextButton = await page.$(".next-page");
|
const nextButton = await page.$(".next-page");
|
||||||
return nextButton !== null;
|
return nextButton !== null;
|
||||||
} catch {
|
} catch {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Navigate to next page
|
* Navigate to next page
|
||||||
*/
|
*/
|
||||||
async function navigateToNextPage(page, pageNumber) {
|
async function navigateToNextPage(page, pageNumber) {
|
||||||
try {
|
try {
|
||||||
const nextButton = await page.$(".next-page");
|
const nextButton = await page.$(".next-page");
|
||||||
if (nextButton) {
|
if (nextButton) {
|
||||||
await nextButton.click();
|
await nextButton.click();
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.warning(
|
logger.warning(
|
||||||
`Failed to navigate to page ${pageNumber}: ${error.message}`
|
`Failed to navigate to page ${pageNumber}: ${error.message}`
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
skipthedriveStrategy,
|
skipthedriveStrategy,
|
||||||
buildSearchUrl,
|
buildSearchUrl,
|
||||||
extractJobsFromPage,
|
extractJobsFromPage,
|
||||||
extractJobData,
|
extractJobData,
|
||||||
};
|
};
|
||||||
|
|||||||
@ -1,412 +1,412 @@
|
|||||||
/**
|
/**
|
||||||
* LinkedIn Parser Demo
|
* LinkedIn Parser Demo
|
||||||
*
|
*
|
||||||
* Demonstrates the LinkedIn Parser's capabilities for scraping LinkedIn content
|
* Demonstrates the LinkedIn Parser's capabilities for scraping LinkedIn content
|
||||||
* with keyword-based searching, location filtering, and AI analysis.
|
* with keyword-based searching, location filtering, and AI analysis.
|
||||||
*
|
*
|
||||||
* This demo uses simulated data for safety and demonstration purposes.
|
* This demo uses simulated data for safety and demonstration purposes.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const { logger } = require("../ai-analyzer");
|
const { logger } = require("../ai-analyzer");
|
||||||
const fs = require("fs");
|
const fs = require("fs");
|
||||||
const path = require("path");
|
const path = require("path");
|
||||||
|
|
||||||
// Terminal colors for demo output
|
// Terminal colors for demo output
|
||||||
const colors = {
|
const colors = {
|
||||||
reset: "\x1b[0m",
|
reset: "\x1b[0m",
|
||||||
bright: "\x1b[1m",
|
bright: "\x1b[1m",
|
||||||
cyan: "\x1b[36m",
|
cyan: "\x1b[36m",
|
||||||
green: "\x1b[32m",
|
green: "\x1b[32m",
|
||||||
yellow: "\x1b[33m",
|
yellow: "\x1b[33m",
|
||||||
blue: "\x1b[34m",
|
blue: "\x1b[34m",
|
||||||
magenta: "\x1b[35m",
|
magenta: "\x1b[35m",
|
||||||
red: "\x1b[31m",
|
red: "\x1b[31m",
|
||||||
};
|
};
|
||||||
|
|
||||||
const demo = {
|
const demo = {
|
||||||
title: (text) =>
|
title: (text) =>
|
||||||
console.log(`\n${colors.bright}${colors.cyan}${text}${colors.reset}`),
|
console.log(`\n${colors.bright}${colors.cyan}${text}${colors.reset}`),
|
||||||
section: (text) =>
|
section: (text) =>
|
||||||
console.log(`\n${colors.bright}${colors.magenta}${text}${colors.reset}`),
|
console.log(`\n${colors.bright}${colors.magenta}${text}${colors.reset}`),
|
||||||
success: (text) => console.log(`${colors.green}✅ ${text}${colors.reset}`),
|
success: (text) => console.log(`${colors.green}✅ ${text}${colors.reset}`),
|
||||||
info: (text) => console.log(`${colors.blue}ℹ️ ${text}${colors.reset}`),
|
info: (text) => console.log(`${colors.blue}ℹ️ ${text}${colors.reset}`),
|
||||||
warning: (text) => console.log(`${colors.yellow}⚠️ ${text}${colors.reset}`),
|
warning: (text) => console.log(`${colors.yellow}⚠️ ${text}${colors.reset}`),
|
||||||
error: (text) => console.log(`${colors.red}❌ ${text}${colors.reset}`),
|
error: (text) => console.log(`${colors.red}❌ ${text}${colors.reset}`),
|
||||||
code: (text) => console.log(`${colors.cyan}${text}${colors.reset}`),
|
code: (text) => console.log(`${colors.cyan}${text}${colors.reset}`),
|
||||||
};
|
};
|
||||||
|
|
||||||
// Mock data for demonstration
|
// Mock data for demonstration
|
||||||
const mockPosts = [
|
const mockPosts = [
|
||||||
{
|
{
|
||||||
id: "post_1",
|
id: "post_1",
|
||||||
content:
|
content:
|
||||||
"Just got laid off from my software engineering role at TechCorp. Looking for new opportunities in Toronto. This is really tough but I'm staying positive!",
|
"Just got laid off from my software engineering role at TechCorp. Looking for new opportunities in Toronto. This is really tough but I'm staying positive!",
|
||||||
original_content:
|
original_content:
|
||||||
"Just got #laidoff from my software engineering role at TechCorp! Looking for new opportunities in #Toronto. This is really tough but I'm staying positive! 🚀",
|
"Just got #laidoff from my software engineering role at TechCorp! Looking for new opportunities in #Toronto. This is really tough but I'm staying positive! 🚀",
|
||||||
author: {
|
author: {
|
||||||
name: "John Doe",
|
name: "John Doe",
|
||||||
title: "Software Engineer",
|
title: "Software Engineer",
|
||||||
company: "TechCorp",
|
company: "TechCorp",
|
||||||
location: "Toronto, Ontario, Canada",
|
location: "Toronto, Ontario, Canada",
|
||||||
profile_url: "https://linkedin.com/in/johndoe",
|
profile_url: "https://linkedin.com/in/johndoe",
|
||||||
},
|
},
|
||||||
engagement: { likes: 45, comments: 12, shares: 3 },
|
engagement: { likes: 45, comments: 12, shares: 3 },
|
||||||
metadata: {
|
metadata: {
|
||||||
post_date: "2024-01-10T14:30:00Z",
|
post_date: "2024-01-10T14:30:00Z",
|
||||||
scraped_at: "2024-01-15T10:30:00Z",
|
scraped_at: "2024-01-15T10:30:00Z",
|
||||||
search_keyword: "layoff",
|
search_keyword: "layoff",
|
||||||
location_validated: true,
|
location_validated: true,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
id: "post_2",
|
id: "post_2",
|
||||||
content:
|
content:
|
||||||
"Our company is downsizing and I'm affected. This is really tough news but I'm grateful for the time I had here.",
|
"Our company is downsizing and I'm affected. This is really tough news but I'm grateful for the time I had here.",
|
||||||
original_content:
|
original_content:
|
||||||
"Our company is #downsizing and I'm affected. This is really tough news but I'm grateful for the time I had here. #RIF #layoff",
|
"Our company is #downsizing and I'm affected. This is really tough news but I'm grateful for the time I had here. #RIF #layoff",
|
||||||
author: {
|
author: {
|
||||||
name: "Jane Smith",
|
name: "Jane Smith",
|
||||||
title: "Product Manager",
|
title: "Product Manager",
|
||||||
company: "StartupXYZ",
|
company: "StartupXYZ",
|
||||||
location: "Vancouver, British Columbia, Canada",
|
location: "Vancouver, British Columbia, Canada",
|
||||||
profile_url: "https://linkedin.com/in/janesmith",
|
profile_url: "https://linkedin.com/in/janesmith",
|
||||||
},
|
},
|
||||||
engagement: { likes: 23, comments: 8, shares: 1 },
|
engagement: { likes: 23, comments: 8, shares: 1 },
|
||||||
metadata: {
|
metadata: {
|
||||||
post_date: "2024-01-09T16:45:00Z",
|
post_date: "2024-01-09T16:45:00Z",
|
||||||
scraped_at: "2024-01-15T10:30:00Z",
|
scraped_at: "2024-01-15T10:30:00Z",
|
||||||
search_keyword: "downsizing",
|
search_keyword: "downsizing",
|
||||||
location_validated: true,
|
location_validated: true,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
id: "post_3",
|
id: "post_3",
|
||||||
content:
|
content:
|
||||||
"Open to work! Looking for new opportunities in software development. I have 5 years of experience in React, Node.js, and cloud technologies.",
|
"Open to work! Looking for new opportunities in software development. I have 5 years of experience in React, Node.js, and cloud technologies.",
|
||||||
original_content:
|
original_content:
|
||||||
"Open to work! Looking for new opportunities in software development. I have 5 years of experience in #React, #NodeJS, and #cloud technologies. #opentowork #jobsearch",
|
"Open to work! Looking for new opportunities in software development. I have 5 years of experience in #React, #NodeJS, and #cloud technologies. #opentowork #jobsearch",
|
||||||
author: {
|
author: {
|
||||||
name: "Bob Wilson",
|
name: "Bob Wilson",
|
||||||
title: "Full Stack Developer",
|
title: "Full Stack Developer",
|
||||||
company: "Freelance",
|
company: "Freelance",
|
||||||
location: "Calgary, Alberta, Canada",
|
location: "Calgary, Alberta, Canada",
|
||||||
profile_url: "https://linkedin.com/in/bobwilson",
|
profile_url: "https://linkedin.com/in/bobwilson",
|
||||||
},
|
},
|
||||||
engagement: { likes: 67, comments: 15, shares: 8 },
|
engagement: { likes: 67, comments: 15, shares: 8 },
|
||||||
metadata: {
|
metadata: {
|
||||||
post_date: "2024-01-08T11:20:00Z",
|
post_date: "2024-01-08T11:20:00Z",
|
||||||
scraped_at: "2024-01-15T10:30:00Z",
|
scraped_at: "2024-01-15T10:30:00Z",
|
||||||
search_keyword: "open to work",
|
search_keyword: "open to work",
|
||||||
location_validated: true,
|
location_validated: true,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
async function runDemo() {
|
async function runDemo() {
|
||||||
demo.title("=== LinkedIn Parser Demo ===");
|
demo.title("=== LinkedIn Parser Demo ===");
|
||||||
demo.info(
|
demo.info(
|
||||||
"This demo showcases the LinkedIn Parser's capabilities for scraping LinkedIn content."
|
"This demo showcases the LinkedIn Parser's capabilities for scraping LinkedIn content."
|
||||||
);
|
);
|
||||||
demo.info("All data shown is simulated for demonstration purposes.");
|
demo.info("All data shown is simulated for demonstration purposes.");
|
||||||
demo.info("Press Enter to continue through each section...\n");
|
demo.info("Press Enter to continue through each section...\n");
|
||||||
|
|
||||||
await waitForEnter();
|
await waitForEnter();
|
||||||
|
|
||||||
// 1. Configuration Demo
|
// 1. Configuration Demo
|
||||||
await demonstrateConfiguration();
|
await demonstrateConfiguration();
|
||||||
|
|
||||||
// 2. Keyword Loading Demo
|
// 2. Keyword Loading Demo
|
||||||
await demonstrateKeywordLoading();
|
await demonstrateKeywordLoading();
|
||||||
|
|
||||||
// 3. Search Process Demo
|
// 3. Search Process Demo
|
||||||
await demonstrateSearchProcess();
|
await demonstrateSearchProcess();
|
||||||
|
|
||||||
// 4. Location Filtering Demo
|
// 4. Location Filtering Demo
|
||||||
await demonstrateLocationFiltering();
|
await demonstrateLocationFiltering();
|
||||||
|
|
||||||
// 5. AI Analysis Demo
|
// 5. AI Analysis Demo
|
||||||
await demonstrateAIAnalysis();
|
await demonstrateAIAnalysis();
|
||||||
|
|
||||||
// 6. Output Generation Demo
|
// 6. Output Generation Demo
|
||||||
await demonstrateOutputGeneration();
|
await demonstrateOutputGeneration();
|
||||||
|
|
||||||
demo.title("=== Demo Complete ===");
|
demo.title("=== Demo Complete ===");
|
||||||
demo.success("LinkedIn Parser demo completed successfully!");
|
demo.success("LinkedIn Parser demo completed successfully!");
|
||||||
demo.info("Check the README.md for detailed usage instructions.");
|
demo.info("Check the README.md for detailed usage instructions.");
|
||||||
}
|
}
|
||||||
|
|
||||||
async function demonstrateConfiguration() {
|
async function demonstrateConfiguration() {
|
||||||
demo.section("1. Configuration Setup");
|
demo.section("1. Configuration Setup");
|
||||||
demo.info(
|
demo.info(
|
||||||
"The LinkedIn Parser uses environment variables and command-line options for configuration."
|
"The LinkedIn Parser uses environment variables and command-line options for configuration."
|
||||||
);
|
);
|
||||||
|
|
||||||
demo.code("// Environment Variables (.env file)");
|
demo.code("// Environment Variables (.env file)");
|
||||||
demo.info("LINKEDIN_USERNAME=your_email@example.com");
|
demo.info("LINKEDIN_USERNAME=your_email@example.com");
|
||||||
demo.info("LINKEDIN_PASSWORD=your_password");
|
demo.info("LINKEDIN_PASSWORD=your_password");
|
||||||
demo.info("CITY=Toronto");
|
demo.info("CITY=Toronto");
|
||||||
demo.info("DATE_POSTED=past-week");
|
demo.info("DATE_POSTED=past-week");
|
||||||
demo.info("SORT_BY=date_posted");
|
demo.info("SORT_BY=date_posted");
|
||||||
demo.info("WHEELS=5");
|
demo.info("WHEELS=5");
|
||||||
demo.info("LOCATION_FILTER=Ontario,Manitoba");
|
demo.info("LOCATION_FILTER=Ontario,Manitoba");
|
||||||
demo.info("ENABLE_LOCATION_CHECK=true");
|
demo.info("ENABLE_LOCATION_CHECK=true");
|
||||||
demo.info("ENABLE_LOCAL_AI=true");
|
demo.info("ENABLE_LOCAL_AI=true");
|
||||||
demo.info('AI_CONTEXT="job layoffs and workforce reduction"');
|
demo.info('AI_CONTEXT="job layoffs and workforce reduction"');
|
||||||
demo.info("OLLAMA_MODEL=mistral");
|
demo.info("OLLAMA_MODEL=mistral");
|
||||||
|
|
||||||
demo.code("// Command Line Options");
|
demo.code("// Command Line Options");
|
||||||
demo.info('node index.js --keyword="layoff,downsizing" --city="Vancouver"');
|
demo.info('node index.js --keyword="layoff,downsizing" --city="Vancouver"');
|
||||||
demo.info("node index.js --no-location --no-ai");
|
demo.info("node index.js --no-location --no-ai");
|
||||||
demo.info("node index.js --output=results/my-results.json");
|
demo.info("node index.js --output=results/my-results.json");
|
||||||
demo.info("node index.js --ai-after");
|
demo.info("node index.js --ai-after");
|
||||||
|
|
||||||
await waitForEnter();
|
await waitForEnter();
|
||||||
}
|
}
|
||||||
|
|
||||||
async function demonstrateKeywordLoading() {
|
async function demonstrateKeywordLoading() {
|
||||||
demo.section("2. Keyword Loading");
|
demo.section("2. Keyword Loading");
|
||||||
demo.info(
|
demo.info(
|
||||||
"Keywords can be loaded from CSV files or specified via command line."
|
"Keywords can be loaded from CSV files or specified via command line."
|
||||||
);
|
);
|
||||||
|
|
||||||
// Simulate loading keywords from CSV
|
// Simulate loading keywords from CSV
|
||||||
demo.code("// Loading keywords from CSV file");
|
demo.code("// Loading keywords from CSV file");
|
||||||
logger.step("Loading keywords from keywords/linkedin-keywords.csv");
|
logger.step("Loading keywords from keywords/linkedin-keywords.csv");
|
||||||
|
|
||||||
const keywords = [
|
const keywords = [
|
||||||
"layoff",
|
"layoff",
|
||||||
"downsizing",
|
"downsizing",
|
||||||
"reduction in force",
|
"reduction in force",
|
||||||
"RIF",
|
"RIF",
|
||||||
"termination",
|
"termination",
|
||||||
"job loss",
|
"job loss",
|
||||||
"workforce reduction",
|
"workforce reduction",
|
||||||
"open to work",
|
"open to work",
|
||||||
"actively seeking",
|
"actively seeking",
|
||||||
"job search",
|
"job search",
|
||||||
];
|
];
|
||||||
|
|
||||||
demo.success(`Loaded ${keywords.length} keywords from CSV file`);
|
demo.success(`Loaded ${keywords.length} keywords from CSV file`);
|
||||||
demo.info("Keywords: " + keywords.slice(0, 5).join(", ") + "...");
|
demo.info("Keywords: " + keywords.slice(0, 5).join(", ") + "...");
|
||||||
|
|
||||||
demo.code("// Command line keyword override");
|
demo.code("// Command line keyword override");
|
||||||
demo.info('node index.js --keyword="layoff,downsizing"');
|
demo.info('node index.js --keyword="layoff,downsizing"');
|
||||||
demo.info('node index.js --add-keyword="hiring freeze"');
|
demo.info('node index.js --add-keyword="hiring freeze"');
|
||||||
|
|
||||||
await waitForEnter();
|
await waitForEnter();
|
||||||
}
|
}
|
||||||
|
|
||||||
async function demonstrateSearchProcess() {
|
async function demonstrateSearchProcess() {
|
||||||
demo.section("3. Search Process Simulation");
|
demo.section("3. Search Process Simulation");
|
||||||
demo.info(
|
demo.info(
|
||||||
"The parser performs automated LinkedIn searches for each keyword."
|
"The parser performs automated LinkedIn searches for each keyword."
|
||||||
);
|
);
|
||||||
|
|
||||||
const keywords = ["layoff", "downsizing", "open to work"];
|
const keywords = ["layoff", "downsizing", "open to work"];
|
||||||
|
|
||||||
for (const keyword of keywords) {
|
for (const keyword of keywords) {
|
||||||
demo.code(`// Searching for keyword: "${keyword}"`);
|
demo.code(`// Searching for keyword: "${keyword}"`);
|
||||||
logger.search(`Searching for "${keyword}" in Toronto`);
|
logger.search(`Searching for "${keyword}" in Toronto`);
|
||||||
|
|
||||||
// Simulate search process
|
// Simulate search process
|
||||||
await simulateSearch();
|
await simulateSearch();
|
||||||
|
|
||||||
const foundCount = Math.floor(Math.random() * 50) + 10;
|
const foundCount = Math.floor(Math.random() * 50) + 10;
|
||||||
const acceptedCount = Math.floor(foundCount * 0.3);
|
const acceptedCount = Math.floor(foundCount * 0.3);
|
||||||
|
|
||||||
logger.info(`Found ${foundCount} posts, checking profiles for location...`);
|
logger.info(`Found ${foundCount} posts, checking profiles for location...`);
|
||||||
logger.success(`Accepted ${acceptedCount} posts after location validation`);
|
logger.success(`Accepted ${acceptedCount} posts after location validation`);
|
||||||
|
|
||||||
console.log();
|
console.log();
|
||||||
}
|
}
|
||||||
|
|
||||||
await waitForEnter();
|
await waitForEnter();
|
||||||
}
|
}
|
||||||
|
|
||||||
async function demonstrateLocationFiltering() {
|
async function demonstrateLocationFiltering() {
|
||||||
demo.section("4. Location Filtering");
|
demo.section("4. Location Filtering");
|
||||||
demo.info(
|
demo.info(
|
||||||
"Posts are filtered based on author location using geographic validation."
|
"Posts are filtered based on author location using geographic validation."
|
||||||
);
|
);
|
||||||
|
|
||||||
demo.code("// Location filter configuration");
|
demo.code("// Location filter configuration");
|
||||||
demo.info("LOCATION_FILTER=Ontario,Manitoba");
|
demo.info("LOCATION_FILTER=Ontario,Manitoba");
|
||||||
demo.info("ENABLE_LOCATION_CHECK=true");
|
demo.info("ENABLE_LOCATION_CHECK=true");
|
||||||
|
|
||||||
demo.code("// Location validation examples");
|
demo.code("// Location validation examples");
|
||||||
const testLocations = [
|
const testLocations = [
|
||||||
{ location: "Toronto, Ontario, Canada", valid: true },
|
{ location: "Toronto, Ontario, Canada", valid: true },
|
||||||
{ location: "Vancouver, British Columbia, Canada", valid: false },
|
{ location: "Vancouver, British Columbia, Canada", valid: false },
|
||||||
{ location: "Calgary, Alberta, Canada", valid: false },
|
{ location: "Calgary, Alberta, Canada", valid: false },
|
||||||
{ location: "Winnipeg, Manitoba, Canada", valid: true },
|
{ location: "Winnipeg, Manitoba, Canada", valid: true },
|
||||||
{ location: "New York, NY, USA", valid: false },
|
{ location: "New York, NY, USA", valid: false },
|
||||||
];
|
];
|
||||||
|
|
||||||
testLocations.forEach(({ location, valid }) => {
|
testLocations.forEach(({ location, valid }) => {
|
||||||
logger.location(`Checking location: ${location}`);
|
logger.location(`Checking location: ${location}`);
|
||||||
if (valid) {
|
if (valid) {
|
||||||
logger.success(`✅ Location valid - post accepted`);
|
logger.success(`✅ Location valid - post accepted`);
|
||||||
} else {
|
} else {
|
||||||
logger.warning(`❌ Location invalid - post rejected`);
|
logger.warning(`❌ Location invalid - post rejected`);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
await waitForEnter();
|
await waitForEnter();
|
||||||
}
|
}
|
||||||
|
|
||||||
async function demonstrateAIAnalysis() {
|
async function demonstrateAIAnalysis() {
|
||||||
demo.section("5. AI Analysis");
|
demo.section("5. AI Analysis");
|
||||||
demo.info(
|
demo.info(
|
||||||
"Posts can be analyzed using local Ollama or OpenAI for relevance scoring."
|
"Posts can be analyzed using local Ollama or OpenAI for relevance scoring."
|
||||||
);
|
);
|
||||||
|
|
||||||
demo.code("// AI analysis configuration");
|
demo.code("// AI analysis configuration");
|
||||||
demo.info("ENABLE_LOCAL_AI=true");
|
demo.info("ENABLE_LOCAL_AI=true");
|
||||||
demo.info('AI_CONTEXT="job layoffs and workforce reduction"');
|
demo.info('AI_CONTEXT="job layoffs and workforce reduction"');
|
||||||
demo.info("OLLAMA_MODEL=mistral");
|
demo.info("OLLAMA_MODEL=mistral");
|
||||||
|
|
||||||
demo.code("// Analyzing posts with AI");
|
demo.code("// Analyzing posts with AI");
|
||||||
logger.ai("Starting AI analysis of accepted posts...");
|
logger.ai("Starting AI analysis of accepted posts...");
|
||||||
|
|
||||||
for (let i = 0; i < mockPosts.length; i++) {
|
for (let i = 0; i < mockPosts.length; i++) {
|
||||||
const post = mockPosts[i];
|
const post = mockPosts[i];
|
||||||
logger.info(`Analyzing post ${i + 1}: ${post.content.substring(0, 50)}...`);
|
logger.info(`Analyzing post ${i + 1}: ${post.content.substring(0, 50)}...`);
|
||||||
|
|
||||||
// Simulate AI analysis
|
// Simulate AI analysis
|
||||||
await simulateProcessing();
|
await simulateProcessing();
|
||||||
|
|
||||||
const relevanceScore = 0.7 + Math.random() * 0.3;
|
const relevanceScore = 0.7 + Math.random() * 0.3;
|
||||||
const confidence = 0.8 + Math.random() * 0.2;
|
const confidence = 0.8 + Math.random() * 0.2;
|
||||||
|
|
||||||
logger.success(
|
logger.success(
|
||||||
`Relevance: ${relevanceScore.toFixed(
|
`Relevance: ${relevanceScore.toFixed(
|
||||||
2
|
2
|
||||||
)}, Confidence: ${confidence.toFixed(2)}`
|
)}, Confidence: ${confidence.toFixed(2)}`
|
||||||
);
|
);
|
||||||
|
|
||||||
// Add AI analysis to post
|
// Add AI analysis to post
|
||||||
post.ai_analysis = {
|
post.ai_analysis = {
|
||||||
relevance_score: relevanceScore,
|
relevance_score: relevanceScore,
|
||||||
confidence: confidence,
|
confidence: confidence,
|
||||||
context_match: relevanceScore > 0.7,
|
context_match: relevanceScore > 0.7,
|
||||||
analysis_text: `This post discusses ${post.metadata.search_keyword} and is relevant to the search context.`,
|
analysis_text: `This post discusses ${post.metadata.search_keyword} and is relevant to the search context.`,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
await waitForEnter();
|
await waitForEnter();
|
||||||
}
|
}
|
||||||
|
|
||||||
async function demonstrateOutputGeneration() {
|
async function demonstrateOutputGeneration() {
|
||||||
demo.section("6. Output Generation");
|
demo.section("6. Output Generation");
|
||||||
demo.info("Results are saved to JSON files with comprehensive metadata.");
|
demo.info("Results are saved to JSON files with comprehensive metadata.");
|
||||||
|
|
||||||
demo.code("// Generating output file");
|
demo.code("// Generating output file");
|
||||||
logger.file("Saving results to JSON file...");
|
logger.file("Saving results to JSON file...");
|
||||||
|
|
||||||
const outputData = {
|
const outputData = {
|
||||||
metadata: {
|
metadata: {
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
keywords: ["layoff", "downsizing", "open to work"],
|
keywords: ["layoff", "downsizing", "open to work"],
|
||||||
city: "Toronto",
|
city: "Toronto",
|
||||||
date_posted: "past-week",
|
date_posted: "past-week",
|
||||||
sort_by: "date_posted",
|
sort_by: "date_posted",
|
||||||
total_posts_found: 150,
|
total_posts_found: 150,
|
||||||
accepted_posts: mockPosts.length,
|
accepted_posts: mockPosts.length,
|
||||||
rejected_posts: 147,
|
rejected_posts: 147,
|
||||||
processing_time_seconds: 180,
|
processing_time_seconds: 180,
|
||||||
},
|
},
|
||||||
posts: mockPosts,
|
posts: mockPosts,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Save to demo file
|
// Save to demo file
|
||||||
const outputPath = path.join(__dirname, "demo-results.json");
|
const outputPath = path.join(__dirname, "demo-results.json");
|
||||||
fs.writeFileSync(outputPath, JSON.stringify(outputData, null, 2));
|
fs.writeFileSync(outputPath, JSON.stringify(outputData, null, 2));
|
||||||
|
|
||||||
demo.success(`Results saved to: ${outputPath}`);
|
demo.success(`Results saved to: ${outputPath}`);
|
||||||
demo.info(`Total posts processed: ${outputData.metadata.total_posts_found}`);
|
demo.info(`Total posts processed: ${outputData.metadata.total_posts_found}`);
|
||||||
demo.info(`Posts accepted: ${outputData.metadata.accepted_posts}`);
|
demo.info(`Posts accepted: ${outputData.metadata.accepted_posts}`);
|
||||||
demo.info(`Posts rejected: ${outputData.metadata.rejected_posts}`);
|
demo.info(`Posts rejected: ${outputData.metadata.rejected_posts}`);
|
||||||
|
|
||||||
demo.code("// Output file structure");
|
demo.code("// Output file structure");
|
||||||
demo.info("📁 demo-results.json");
|
demo.info("📁 demo-results.json");
|
||||||
demo.info(" ├── metadata");
|
demo.info(" ├── metadata");
|
||||||
demo.info(" │ ├── timestamp");
|
demo.info(" │ ├── timestamp");
|
||||||
demo.info(" │ ├── keywords");
|
demo.info(" │ ├── keywords");
|
||||||
demo.info(" │ ├── city");
|
demo.info(" │ ├── city");
|
||||||
demo.info(" │ ├── total_posts_found");
|
demo.info(" │ ├── total_posts_found");
|
||||||
demo.info(" │ ├── accepted_posts");
|
demo.info(" │ ├── accepted_posts");
|
||||||
demo.info(" │ └── processing_time_seconds");
|
demo.info(" │ └── processing_time_seconds");
|
||||||
demo.info(" └── posts[]");
|
demo.info(" └── posts[]");
|
||||||
demo.info(" ├── id");
|
demo.info(" ├── id");
|
||||||
demo.info(" ├── content");
|
demo.info(" ├── content");
|
||||||
demo.info(" ├── author");
|
demo.info(" ├── author");
|
||||||
demo.info(" ├── engagement");
|
demo.info(" ├── engagement");
|
||||||
demo.info(" ├── ai_analysis");
|
demo.info(" ├── ai_analysis");
|
||||||
demo.info(" └── metadata");
|
demo.info(" └── metadata");
|
||||||
|
|
||||||
await waitForEnter();
|
await waitForEnter();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper functions
|
// Helper functions
|
||||||
function waitForEnter() {
|
function waitForEnter() {
|
||||||
return new Promise((resolve) => {
|
return new Promise((resolve) => {
|
||||||
const readline = require("readline");
|
const readline = require("readline");
|
||||||
const rl = readline.createInterface({
|
const rl = readline.createInterface({
|
||||||
input: process.stdin,
|
input: process.stdin,
|
||||||
output: process.stdout,
|
output: process.stdout,
|
||||||
});
|
});
|
||||||
|
|
||||||
rl.question("\nPress Enter to continue...", () => {
|
rl.question("\nPress Enter to continue...", () => {
|
||||||
rl.close();
|
rl.close();
|
||||||
resolve();
|
resolve();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async function simulateSearch() {
|
async function simulateSearch() {
|
||||||
return new Promise((resolve) => {
|
return new Promise((resolve) => {
|
||||||
const steps = [
|
const steps = [
|
||||||
"Launching browser",
|
"Launching browser",
|
||||||
"Logging in",
|
"Logging in",
|
||||||
"Navigating to search",
|
"Navigating to search",
|
||||||
"Loading results",
|
"Loading results",
|
||||||
];
|
];
|
||||||
let i = 0;
|
let i = 0;
|
||||||
const interval = setInterval(() => {
|
const interval = setInterval(() => {
|
||||||
if (i < steps.length) {
|
if (i < steps.length) {
|
||||||
logger.info(steps[i]);
|
logger.info(steps[i]);
|
||||||
i++;
|
i++;
|
||||||
} else {
|
} else {
|
||||||
clearInterval(interval);
|
clearInterval(interval);
|
||||||
resolve();
|
resolve();
|
||||||
}
|
}
|
||||||
}, 800);
|
}, 800);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async function simulateProcessing() {
|
async function simulateProcessing() {
|
||||||
return new Promise((resolve) => {
|
return new Promise((resolve) => {
|
||||||
const dots = [".", "..", "..."];
|
const dots = [".", "..", "..."];
|
||||||
let i = 0;
|
let i = 0;
|
||||||
const interval = setInterval(() => {
|
const interval = setInterval(() => {
|
||||||
process.stdout.write(`\rProcessing${dots[i]}`);
|
process.stdout.write(`\rProcessing${dots[i]}`);
|
||||||
i = (i + 1) % dots.length;
|
i = (i + 1) % dots.length;
|
||||||
}, 500);
|
}, 500);
|
||||||
|
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
clearInterval(interval);
|
clearInterval(interval);
|
||||||
process.stdout.write("\r");
|
process.stdout.write("\r");
|
||||||
resolve();
|
resolve();
|
||||||
}, 1500);
|
}, 1500);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run the demo if this file is executed directly
|
// Run the demo if this file is executed directly
|
||||||
if (require.main === module) {
|
if (require.main === module) {
|
||||||
runDemo().catch((error) => {
|
runDemo().catch((error) => {
|
||||||
demo.error(`Demo failed: ${error.message}`);
|
demo.error(`Demo failed: ${error.message}`);
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { runDemo };
|
module.exports = { runDemo };
|
||||||
|
|||||||
@ -1,51 +1,51 @@
|
|||||||
keyword
|
keyword
|
||||||
acquisition
|
acquisition
|
||||||
actively seeking
|
actively seeking
|
||||||
bankruptcy
|
bankruptcy
|
||||||
business realignment
|
business realignment
|
||||||
career transition
|
career transition
|
||||||
company closure
|
company closure
|
||||||
company reorganization
|
company reorganization
|
||||||
cost cutting
|
cost cutting
|
||||||
department closure
|
department closure
|
||||||
downsizing
|
downsizing
|
||||||
furlough
|
furlough
|
||||||
headcount reduction
|
headcount reduction
|
||||||
hiring
|
hiring
|
||||||
hiring freeze
|
hiring freeze
|
||||||
involuntary separation
|
involuntary separation
|
||||||
job cuts
|
job cuts
|
||||||
job elimination
|
job elimination
|
||||||
job loss
|
job loss
|
||||||
job opportunity
|
job opportunity
|
||||||
job search
|
job search
|
||||||
layoff
|
layoff
|
||||||
looking for opportunities
|
looking for opportunities
|
||||||
mass layoff
|
mass layoff
|
||||||
merger
|
merger
|
||||||
new position
|
new position
|
||||||
new role
|
new role
|
||||||
office closure
|
office closure
|
||||||
open to work
|
open to work
|
||||||
organizational change
|
organizational change
|
||||||
outplacement
|
outplacement
|
||||||
plant closure
|
plant closure
|
||||||
position elimination
|
position elimination
|
||||||
recruiting
|
recruiting
|
||||||
reduction in force
|
reduction in force
|
||||||
redundancies
|
redundancies
|
||||||
redundancy
|
redundancy
|
||||||
restructuring
|
restructuring
|
||||||
rightsizing
|
rightsizing
|
||||||
RIF
|
RIF
|
||||||
role elimination
|
role elimination
|
||||||
separation
|
separation
|
||||||
site closure
|
site closure
|
||||||
staff reduction
|
staff reduction
|
||||||
terminated
|
terminated
|
||||||
termination
|
termination
|
||||||
voluntary separation
|
voluntary separation
|
||||||
workforce adjustment
|
workforce adjustment
|
||||||
workforce optimization
|
workforce optimization
|
||||||
workforce reduction
|
workforce reduction
|
||||||
workforce transition
|
workforce transition
|
||||||
|
|||||||
|
@ -1,230 +1,230 @@
|
|||||||
/**
|
/**
|
||||||
* LinkedIn Parsing Strategy
|
* LinkedIn Parsing Strategy
|
||||||
*
|
*
|
||||||
* Uses core-parser for browser management and ai-analyzer for utilities
|
* Uses core-parser for browser management and ai-analyzer for utilities
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const {
|
const {
|
||||||
logger,
|
logger,
|
||||||
cleanText,
|
cleanText,
|
||||||
containsAnyKeyword,
|
containsAnyKeyword,
|
||||||
validateLocationAgainstFilters,
|
validateLocationAgainstFilters,
|
||||||
extractLocationFromProfile,
|
extractLocationFromProfile,
|
||||||
} = require("ai-analyzer");
|
} = require("ai-analyzer");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* LinkedIn parsing strategy function
|
* LinkedIn parsing strategy function
|
||||||
*/
|
*/
|
||||||
async function linkedinStrategy(coreParser, options = {}) {
|
async function linkedinStrategy(coreParser, options = {}) {
|
||||||
const {
|
const {
|
||||||
keywords = ["layoff", "downsizing", "job cuts"],
|
keywords = ["layoff", "downsizing", "job cuts"],
|
||||||
locationFilter = null,
|
locationFilter = null,
|
||||||
maxResults = 50,
|
maxResults = 50,
|
||||||
credentials = {},
|
credentials = {},
|
||||||
} = options;
|
} = options;
|
||||||
|
|
||||||
const results = [];
|
const results = [];
|
||||||
const rejectedResults = [];
|
const rejectedResults = [];
|
||||||
const seenPosts = new Set();
|
const seenPosts = new Set();
|
||||||
const seenProfiles = new Set();
|
const seenProfiles = new Set();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Create main page
|
// Create main page
|
||||||
const page = await coreParser.createPage("linkedin-main");
|
const page = await coreParser.createPage("linkedin-main");
|
||||||
|
|
||||||
// Authenticate to LinkedIn
|
// Authenticate to LinkedIn
|
||||||
logger.info("🔐 Authenticating to LinkedIn...");
|
logger.info("🔐 Authenticating to LinkedIn...");
|
||||||
await coreParser.authenticate("linkedin", credentials, "linkedin-main");
|
await coreParser.authenticate("linkedin", credentials, "linkedin-main");
|
||||||
logger.info("✅ LinkedIn authentication successful");
|
logger.info("✅ LinkedIn authentication successful");
|
||||||
|
|
||||||
// Search for posts with each keyword
|
// Search for posts with each keyword
|
||||||
for (const keyword of keywords) {
|
for (const keyword of keywords) {
|
||||||
logger.info(`🔍 Searching LinkedIn for: "${keyword}"`);
|
logger.info(`🔍 Searching LinkedIn for: "${keyword}"`);
|
||||||
|
|
||||||
const searchUrl = `https://www.linkedin.com/search/results/content/?keywords=${encodeURIComponent(
|
const searchUrl = `https://www.linkedin.com/search/results/content/?keywords=${encodeURIComponent(
|
||||||
keyword
|
keyword
|
||||||
)}&sortBy=date_posted`;
|
)}&sortBy=date_posted`;
|
||||||
|
|
||||||
await coreParser.navigateTo(searchUrl, {
|
await coreParser.navigateTo(searchUrl, {
|
||||||
pageId: "linkedin-main",
|
pageId: "linkedin-main",
|
||||||
retries: 2,
|
retries: 2,
|
||||||
});
|
});
|
||||||
|
|
||||||
// Wait for search results
|
// Wait for search results
|
||||||
const hasResults = await coreParser.navigationManager.navigateAndWaitFor(
|
const hasResults = await coreParser.navigationManager.navigateAndWaitFor(
|
||||||
searchUrl,
|
searchUrl,
|
||||||
".search-results-container",
|
".search-results-container",
|
||||||
{ pageId: "linkedin-main", timeout: 10000 }
|
{ pageId: "linkedin-main", timeout: 10000 }
|
||||||
);
|
);
|
||||||
|
|
||||||
if (!hasResults) {
|
if (!hasResults) {
|
||||||
logger.warning(`No search results found for keyword: ${keyword}`);
|
logger.warning(`No search results found for keyword: ${keyword}`);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract posts from current page
|
// Extract posts from current page
|
||||||
const posts = await extractPostsFromPage(page, keyword);
|
const posts = await extractPostsFromPage(page, keyword);
|
||||||
|
|
||||||
for (const post of posts) {
|
for (const post of posts) {
|
||||||
// Skip duplicates
|
// Skip duplicates
|
||||||
if (seenPosts.has(post.postId)) continue;
|
if (seenPosts.has(post.postId)) continue;
|
||||||
seenPosts.add(post.postId);
|
seenPosts.add(post.postId);
|
||||||
|
|
||||||
// Validate location if filtering enabled
|
// Validate location if filtering enabled
|
||||||
if (locationFilter) {
|
if (locationFilter) {
|
||||||
const locationValid = validateLocationAgainstFilters(
|
const locationValid = validateLocationAgainstFilters(
|
||||||
post.location || post.profileLocation,
|
post.location || post.profileLocation,
|
||||||
locationFilter
|
locationFilter
|
||||||
);
|
);
|
||||||
|
|
||||||
if (!locationValid) {
|
if (!locationValid) {
|
||||||
rejectedResults.push({
|
rejectedResults.push({
|
||||||
...post,
|
...post,
|
||||||
rejectionReason: "Location filter mismatch",
|
rejectionReason: "Location filter mismatch",
|
||||||
});
|
});
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
results.push(post);
|
results.push(post);
|
||||||
|
|
||||||
if (results.length >= maxResults) {
|
if (results.length >= maxResults) {
|
||||||
logger.info(`📊 Reached maximum results limit: ${maxResults}`);
|
logger.info(`📊 Reached maximum results limit: ${maxResults}`);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (results.length >= maxResults) break;
|
if (results.length >= maxResults) break;
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
`🎯 LinkedIn parsing completed: ${results.length} posts found, ${rejectedResults.length} rejected`
|
`🎯 LinkedIn parsing completed: ${results.length} posts found, ${rejectedResults.length} rejected`
|
||||||
);
|
);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
results,
|
results,
|
||||||
rejectedResults,
|
rejectedResults,
|
||||||
summary: {
|
summary: {
|
||||||
totalPosts: results.length,
|
totalPosts: results.length,
|
||||||
totalRejected: rejectedResults.length,
|
totalRejected: rejectedResults.length,
|
||||||
keywords: keywords.join(", "),
|
keywords: keywords.join(", "),
|
||||||
locationFilter,
|
locationFilter,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(`❌ LinkedIn parsing failed: ${error.message}`);
|
logger.error(`❌ LinkedIn parsing failed: ${error.message}`);
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract posts from current search results page
|
* Extract posts from current search results page
|
||||||
*/
|
*/
|
||||||
async function extractPostsFromPage(page, keyword) {
|
async function extractPostsFromPage(page, keyword) {
|
||||||
const posts = [];
|
const posts = [];
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Get all post elements
|
// Get all post elements
|
||||||
const postElements = await page.$$(".feed-shared-update-v2");
|
const postElements = await page.$$(".feed-shared-update-v2");
|
||||||
|
|
||||||
for (const postElement of postElements) {
|
for (const postElement of postElements) {
|
||||||
try {
|
try {
|
||||||
const post = await extractPostData(postElement, keyword);
|
const post = await extractPostData(postElement, keyword);
|
||||||
if (post) {
|
if (post) {
|
||||||
posts.push(post);
|
posts.push(post);
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.warning(`Failed to extract post data: ${error.message}`);
|
logger.warning(`Failed to extract post data: ${error.message}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(`Failed to extract posts from page: ${error.message}`);
|
logger.error(`Failed to extract posts from page: ${error.message}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
return posts;
|
return posts;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract data from individual post element
|
* Extract data from individual post element
|
||||||
*/
|
*/
|
||||||
async function extractPostData(postElement, keyword) {
|
async function extractPostData(postElement, keyword) {
|
||||||
try {
|
try {
|
||||||
// Extract post ID
|
// Extract post ID
|
||||||
const postId = (await postElement.getAttribute("data-urn")) || "";
|
const postId = (await postElement.getAttribute("data-urn")) || "";
|
||||||
|
|
||||||
// Extract author info
|
// Extract author info
|
||||||
const authorElement = await postElement.$(".feed-shared-actor__name");
|
const authorElement = await postElement.$(".feed-shared-actor__name");
|
||||||
const authorName = authorElement
|
const authorName = authorElement
|
||||||
? cleanText(await authorElement.textContent())
|
? cleanText(await authorElement.textContent())
|
||||||
: "";
|
: "";
|
||||||
|
|
||||||
const authorLinkElement = await postElement.$(".feed-shared-actor__name a");
|
const authorLinkElement = await postElement.$(".feed-shared-actor__name a");
|
||||||
const authorUrl = authorLinkElement
|
const authorUrl = authorLinkElement
|
||||||
? await authorLinkElement.getAttribute("href")
|
? await authorLinkElement.getAttribute("href")
|
||||||
: "";
|
: "";
|
||||||
|
|
||||||
// Extract post content
|
// Extract post content
|
||||||
const contentElement = await postElement.$(".feed-shared-text");
|
const contentElement = await postElement.$(".feed-shared-text");
|
||||||
const content = contentElement
|
const content = contentElement
|
||||||
? cleanText(await contentElement.textContent())
|
? cleanText(await contentElement.textContent())
|
||||||
: "";
|
: "";
|
||||||
|
|
||||||
// Extract timestamp
|
// Extract timestamp
|
||||||
const timeElement = await postElement.$(
|
const timeElement = await postElement.$(
|
||||||
".feed-shared-actor__sub-description time"
|
".feed-shared-actor__sub-description time"
|
||||||
);
|
);
|
||||||
const timestamp = timeElement
|
const timestamp = timeElement
|
||||||
? await timeElement.getAttribute("datetime")
|
? await timeElement.getAttribute("datetime")
|
||||||
: "";
|
: "";
|
||||||
|
|
||||||
// Extract engagement metrics
|
// Extract engagement metrics
|
||||||
const likesElement = await postElement.$(".social-counts-reactions__count");
|
const likesElement = await postElement.$(".social-counts-reactions__count");
|
||||||
const likesText = likesElement
|
const likesText = likesElement
|
||||||
? cleanText(await likesElement.textContent())
|
? cleanText(await likesElement.textContent())
|
||||||
: "0";
|
: "0";
|
||||||
|
|
||||||
const commentsElement = await postElement.$(
|
const commentsElement = await postElement.$(
|
||||||
".social-counts-comments__count"
|
".social-counts-comments__count"
|
||||||
);
|
);
|
||||||
const commentsText = commentsElement
|
const commentsText = commentsElement
|
||||||
? cleanText(await commentsElement.textContent())
|
? cleanText(await commentsElement.textContent())
|
||||||
: "0";
|
: "0";
|
||||||
|
|
||||||
// Check if post contains relevant keywords
|
// Check if post contains relevant keywords
|
||||||
const isRelevant = containsAnyKeyword(content, [keyword]);
|
const isRelevant = containsAnyKeyword(content, [keyword]);
|
||||||
|
|
||||||
if (!isRelevant) {
|
if (!isRelevant) {
|
||||||
return null; // Skip irrelevant posts
|
return null; // Skip irrelevant posts
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
postId: cleanText(postId),
|
postId: cleanText(postId),
|
||||||
authorName,
|
authorName,
|
||||||
authorUrl,
|
authorUrl,
|
||||||
content,
|
content,
|
||||||
timestamp,
|
timestamp,
|
||||||
keyword,
|
keyword,
|
||||||
likes: extractNumber(likesText),
|
likes: extractNumber(likesText),
|
||||||
comments: extractNumber(commentsText),
|
comments: extractNumber(commentsText),
|
||||||
extractedAt: new Date().toISOString(),
|
extractedAt: new Date().toISOString(),
|
||||||
source: "linkedin",
|
source: "linkedin",
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.warning(`Error extracting post data: ${error.message}`);
|
logger.warning(`Error extracting post data: ${error.message}`);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract numbers from text (e.g., "15 likes" -> 15)
|
* Extract numbers from text (e.g., "15 likes" -> 15)
|
||||||
*/
|
*/
|
||||||
function extractNumber(text) {
|
function extractNumber(text) {
|
||||||
const match = text.match(/\d+/);
|
const match = text.match(/\d+/);
|
||||||
return match ? parseInt(match[0]) : 0;
|
return match ? parseInt(match[0]) : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
linkedinStrategy,
|
linkedinStrategy,
|
||||||
extractPostsFromPage,
|
extractPostsFromPage,
|
||||||
extractPostData,
|
extractPostData,
|
||||||
};
|
};
|
||||||
|
|||||||
@ -1,34 +1,34 @@
|
|||||||
{
|
{
|
||||||
"results": [
|
"results": [
|
||||||
{
|
{
|
||||||
"text": "Just got laid off from my software engineering role. Looking for new opportunities in the Toronto area.",
|
"text": "Just got laid off from my software engineering role. Looking for new opportunities in the Toronto area.",
|
||||||
"location": "Toronto, Ontario, Canada",
|
"location": "Toronto, Ontario, Canada",
|
||||||
"keyword": "layoff",
|
"keyword": "layoff",
|
||||||
"timestamp": "2024-01-15T10:30:00Z"
|
"timestamp": "2024-01-15T10:30:00Z"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"text": "Excited to share that I'm starting a new position as a Senior Developer at TechCorp!",
|
"text": "Excited to share that I'm starting a new position as a Senior Developer at TechCorp!",
|
||||||
"location": "Vancouver, BC, Canada",
|
"location": "Vancouver, BC, Canada",
|
||||||
"keyword": "hiring",
|
"keyword": "hiring",
|
||||||
"timestamp": "2024-01-15T11:00:00Z"
|
"timestamp": "2024-01-15T11:00:00Z"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"text": "Our company is going through a restructuring and unfortunately had to let go of 50 employees.",
|
"text": "Our company is going through a restructuring and unfortunately had to let go of 50 employees.",
|
||||||
"location": "Montreal, Quebec, Canada",
|
"location": "Montreal, Quebec, Canada",
|
||||||
"keyword": "layoff",
|
"keyword": "layoff",
|
||||||
"timestamp": "2024-01-15T11:30:00Z"
|
"timestamp": "2024-01-15T11:30:00Z"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"text": "Beautiful weather today! Perfect for a walk in the park.",
|
"text": "Beautiful weather today! Perfect for a walk in the park.",
|
||||||
"location": "Calgary, Alberta, Canada",
|
"location": "Calgary, Alberta, Canada",
|
||||||
"keyword": "weather",
|
"keyword": "weather",
|
||||||
"timestamp": "2024-01-15T12:00:00Z"
|
"timestamp": "2024-01-15T12:00:00Z"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"text": "We're hiring! Looking for talented developers to join our growing team.",
|
"text": "We're hiring! Looking for talented developers to join our growing team.",
|
||||||
"location": "Ottawa, Ontario, Canada",
|
"location": "Ottawa, Ontario, Canada",
|
||||||
"keyword": "hiring",
|
"keyword": "hiring",
|
||||||
"timestamp": "2024-01-15T12:30:00Z"
|
"timestamp": "2024-01-15T12:30:00Z"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user