Enhance job search parser with CSV output support
- Added functionality to export job search results in CSV format alongside JSON. - Introduced command line options for specifying output format: "json", "csv", or "both". - Updated README to include usage instructions for CSV output and detailed CSV structure. - Created utility functions for converting job results to CSV format, ensuring proper field escaping.
This commit is contained in:
parent
8f526b3518
commit
00c4cf1b6f
@ -141,7 +141,7 @@ ENABLE_AI_ANALYSIS=false
|
|||||||
HEADLESS=true
|
HEADLESS=true
|
||||||
|
|
||||||
# Output Configuration
|
# Output Configuration
|
||||||
OUTPUT_FORMAT=json
|
OUTPUT_FORMAT=json # Options: "json", "csv", or "both"
|
||||||
```
|
```
|
||||||
|
|
||||||
### Command Line Options
|
### Command Line Options
|
||||||
@ -150,31 +150,34 @@ OUTPUT_FORMAT=json
|
|||||||
# Basic usage
|
# Basic usage
|
||||||
node index.js
|
node index.js
|
||||||
|
|
||||||
# Specific roles
|
# Select sites to parse
|
||||||
node index.js --roles="frontend developer,backend developer"
|
node index.js --sites=linkedin,skipthedrive
|
||||||
|
|
||||||
# Geographic focus
|
# Search keywords
|
||||||
node index.js --locations="Toronto,Vancouver"
|
node index.js --keywords="software engineer,developer"
|
||||||
|
|
||||||
# Experience level
|
# Location filter
|
||||||
node index.js --experience="senior"
|
node index.js --location="Ontario"
|
||||||
|
|
||||||
# Output format
|
# Max pages to parse
|
||||||
node index.js --output=results/job-market-analysis.json
|
node index.js --max-pages=10
|
||||||
|
|
||||||
|
# Exclude rejected results
|
||||||
|
node index.js --no-rejected
|
||||||
|
|
||||||
|
# Output format (json, csv, or both)
|
||||||
|
node index.js --output=csv
|
||||||
|
node index.js --output=both
|
||||||
```
|
```
|
||||||
|
|
||||||
**Available Options:**
|
**Available Options:**
|
||||||
|
|
||||||
- `--roles="role1,role2"`: Target job roles
|
- `--sites="site1,site2"`: Job sites to parse (linkedin, skipthedrive)
|
||||||
- `--locations="city1,city2"`: Geographic focus
|
- `--keywords="keyword1,keyword2"`: Search keywords
|
||||||
- `--experience="entry|mid|senior"`: Experience level
|
- `--location="LOCATION"`: Location filter
|
||||||
- `--remote="remote|hybrid|onsite"`: Remote work preference
|
- `--max-pages=NUMBER`: Maximum pages to parse (0 or "all" for unlimited)
|
||||||
- `--salary-min=NUMBER`: Minimum salary filter
|
- `--no-rejected` or `--exclude-rejected`: Exclude rejected results from output
|
||||||
- `--salary-max=NUMBER`: Maximum salary filter
|
- `--output=FORMAT` or `--format=FORMAT`: Output format - "json", "csv", or "both" (default: "json")
|
||||||
- `--output=FILE`: Output filename
|
|
||||||
- `--format=json|csv`: Output format
|
|
||||||
- `--trends`: Enable trend analysis
|
|
||||||
- `--skills`: Enable skill analysis
|
|
||||||
|
|
||||||
## 📊 Keywords
|
## 📊 Keywords
|
||||||
|
|
||||||
@ -373,12 +376,46 @@ node index.js --companies="Google,Microsoft,Amazon"
|
|||||||
|
|
||||||
### CSV Output
|
### CSV Output
|
||||||
|
|
||||||
The parser can also generate CSV files for easy analysis:
|
The parser can generate CSV files for easy spreadsheet analysis. Use `--output=csv` or `OUTPUT_FORMAT=csv` to export results as CSV.
|
||||||
|
|
||||||
|
**CSV Columns:**
|
||||||
|
- `jobId`: Unique job identifier
|
||||||
|
- `title`: Job title
|
||||||
|
- `company`: Company name
|
||||||
|
- `location`: Job location
|
||||||
|
- `jobUrl`: Link to job posting
|
||||||
|
- `postedDate`: Date job was posted
|
||||||
|
- `description`: Job description
|
||||||
|
- `jobType`: Type of job (full-time, part-time, contract, etc.)
|
||||||
|
- `experienceLevel`: Required experience level
|
||||||
|
- `keyword`: Search keyword that matched
|
||||||
|
- `extractedAt`: Timestamp when job was extracted
|
||||||
|
- `source`: Source site (e.g., "linkedin-jobs", "skipthedrive")
|
||||||
|
- `aiRelevant`: AI analysis relevance (Yes/No)
|
||||||
|
- `aiConfidence`: AI confidence score (0-1)
|
||||||
|
- `aiReasoning`: AI reasoning for relevance
|
||||||
|
- `aiContext`: AI analysis context
|
||||||
|
- `aiModel`: AI model used for analysis
|
||||||
|
- `aiAnalyzedAt`: Timestamp of AI analysis
|
||||||
|
|
||||||
|
**Example CSV Output:**
|
||||||
|
|
||||||
```csv
|
```csv
|
||||||
job_id,title,company,location,remote_type,salary_min,salary_max,required_skills,experience_level,posted_date
|
jobId,title,company,location,jobUrl,postedDate,description,jobType,experienceLevel,keyword,extractedAt,source,aiRelevant,aiConfidence,aiReasoning,aiContext,aiModel,aiAnalyzedAt
|
||||||
job_1,Senior Software Engineer,TechCorp,Toronto,hybrid,100000,140000,"React,Node.js,TypeScript",senior,2024-01-10
|
4344137241,Web Applications Co-op/Intern,Nokia,Kanata ON (Hybrid),https://www.linkedin.com/jobs/view/4344137241,,"Web Applications Co-op/Intern",,co-op,2025-12-17T04:50:05.600Z,linkedin-jobs,Yes,0.8,"The post mentions a co-op/intern position",co-op and internship opportunities for First year Math students,mistral,2025-12-17T04:58:33.479Z
|
||||||
job_2,Data Scientist,DataCorp,Vancouver,remote,90000,130000,"Python,SQL,ML",mid,2024-01-09
|
```
|
||||||
|
|
||||||
|
**Usage:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Export as CSV only
|
||||||
|
node index.js --output=csv
|
||||||
|
|
||||||
|
# Export both JSON and CSV
|
||||||
|
node index.js --output=both
|
||||||
|
|
||||||
|
# Using environment variable
|
||||||
|
OUTPUT_FORMAT=csv node index.js
|
||||||
```
|
```
|
||||||
|
|
||||||
## 🔒 Security & Best Practices
|
## 🔒 Security & Best Practices
|
||||||
|
|||||||
@ -12,6 +12,7 @@ const CoreParser = require("../core-parser");
|
|||||||
const { skipthedriveStrategy } = require("./strategies/skipthedrive-strategy");
|
const { skipthedriveStrategy } = require("./strategies/skipthedrive-strategy");
|
||||||
const { linkedinJobsStrategy } = require("./strategies/linkedin-jobs-strategy");
|
const { linkedinJobsStrategy } = require("./strategies/linkedin-jobs-strategy");
|
||||||
const { logger, analyzeBatch, checkOllamaStatus, DEFAULT_MODEL } = require("ai-analyzer");
|
const { logger, analyzeBatch, checkOllamaStatus, DEFAULT_MODEL } = require("ai-analyzer");
|
||||||
|
const { convertResultsToCsv } = require("./src/csv-utils");
|
||||||
|
|
||||||
// Load environment variables
|
// Load environment variables
|
||||||
require("dotenv").config({ path: path.join(__dirname, ".env") });
|
require("dotenv").config({ path: path.join(__dirname, ".env") });
|
||||||
@ -26,6 +27,7 @@ const AI_CONTEXT = process.env.AI_CONTEXT || "Job market analysis focusing on jo
|
|||||||
const OLLAMA_MODEL = process.env.OLLAMA_MODEL || DEFAULT_MODEL;
|
const OLLAMA_MODEL = process.env.OLLAMA_MODEL || DEFAULT_MODEL;
|
||||||
const MAX_PAGES = parseInt(process.env.MAX_PAGES) || 5;
|
const MAX_PAGES = parseInt(process.env.MAX_PAGES) || 5;
|
||||||
const EXCLUDE_REJECTED = process.env.EXCLUDE_REJECTED === "true";
|
const EXCLUDE_REJECTED = process.env.EXCLUDE_REJECTED === "true";
|
||||||
|
const OUTPUT_FORMAT = process.env.OUTPUT_FORMAT || "json"; // "json", "csv", or "both"
|
||||||
|
|
||||||
// Available site strategies
|
// Available site strategies
|
||||||
const SITE_STRATEGIES = {
|
const SITE_STRATEGIES = {
|
||||||
@ -47,6 +49,7 @@ function parseArguments() {
|
|||||||
locationFilter: null,
|
locationFilter: null,
|
||||||
maxPages: MAX_PAGES,
|
maxPages: MAX_PAGES,
|
||||||
excludeRejected: EXCLUDE_REJECTED,
|
excludeRejected: EXCLUDE_REJECTED,
|
||||||
|
outputFormat: OUTPUT_FORMAT,
|
||||||
};
|
};
|
||||||
|
|
||||||
args.forEach((arg) => {
|
args.forEach((arg) => {
|
||||||
@ -72,6 +75,13 @@ function parseArguments() {
|
|||||||
}
|
}
|
||||||
} else if (arg === "--no-rejected" || arg === "--exclude-rejected") {
|
} else if (arg === "--no-rejected" || arg === "--exclude-rejected") {
|
||||||
options.excludeRejected = true;
|
options.excludeRejected = true;
|
||||||
|
} else if (arg.startsWith("--output=") || arg.startsWith("--format=")) {
|
||||||
|
const format = arg.split("=")[1].toLowerCase();
|
||||||
|
if (["json", "csv", "both"].includes(format)) {
|
||||||
|
options.outputFormat = format;
|
||||||
|
} else {
|
||||||
|
logger.warning(`⚠️ Unknown output format: ${format}. Using default: json`);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -296,16 +306,34 @@ async function startJobSearchParser(options = {}) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
|
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
|
||||||
const filename = `job-search-results-${timestamp}.json`;
|
const outputFormat = finalOptions.outputFormat || OUTPUT_FORMAT;
|
||||||
const filepath = path.join(resultsDir, filename);
|
const savedFiles = [];
|
||||||
|
|
||||||
fs.writeFileSync(filepath, JSON.stringify(outputData, null, 2));
|
// Save JSON if format is "json" or "both"
|
||||||
|
if (outputFormat === "json" || outputFormat === "both") {
|
||||||
|
const jsonFilename = `job-search-results-${timestamp}.json`;
|
||||||
|
const jsonFilepath = path.join(resultsDir, jsonFilename);
|
||||||
|
fs.writeFileSync(jsonFilepath, JSON.stringify(outputData, null, 2));
|
||||||
|
savedFiles.push(jsonFilepath);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save CSV if format is "csv" or "both"
|
||||||
|
if (outputFormat === "csv" || outputFormat === "both") {
|
||||||
|
const csvFilename = `job-search-results-${timestamp}.csv`;
|
||||||
|
const csvFilepath = path.join(resultsDir, csvFilename);
|
||||||
|
const csvContent = convertResultsToCsv(outputData);
|
||||||
|
fs.writeFileSync(csvFilepath, csvContent);
|
||||||
|
savedFiles.push(csvFilepath);
|
||||||
|
}
|
||||||
|
|
||||||
// Final summary
|
// Final summary
|
||||||
logger.step("\n📊 Job Search Parser Summary");
|
logger.step("\n📊 Job Search Parser Summary");
|
||||||
logger.success(`✅ Total jobs found: ${allResults.length}`);
|
logger.success(`✅ Total jobs found: ${allResults.length}`);
|
||||||
logger.info(`❌ Total rejected: ${allRejectedResults.length}`);
|
logger.info(`❌ Total rejected: ${allRejectedResults.length}`);
|
||||||
logger.info(`📁 Results saved to: ${filepath}`);
|
logger.info(`📁 Results saved to:`);
|
||||||
|
savedFiles.forEach(filepath => {
|
||||||
|
logger.info(` ${filepath}`);
|
||||||
|
});
|
||||||
|
|
||||||
logger.info("\n📈 Results by site:");
|
logger.info("\n📈 Results by site:");
|
||||||
for (const [site, stats] of Object.entries(siteResults)) {
|
for (const [site, stats] of Object.entries(siteResults)) {
|
||||||
|
|||||||
114
job-search-parser/src/csv-utils.js
Normal file
114
job-search-parser/src/csv-utils.js
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
/**
|
||||||
|
* CSV Utilities
|
||||||
|
*
|
||||||
|
* Functions for converting job search results to CSV format
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Escapes a CSV field value
|
||||||
|
* @param {string} value - The value to escape
|
||||||
|
* @returns {string} - The escaped value
|
||||||
|
*/
|
||||||
|
function escapeCsvField(value) {
|
||||||
|
if (value === null || value === undefined) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
const stringValue = String(value);
|
||||||
|
|
||||||
|
// If the value contains comma, newline, or double quote, wrap it in quotes and escape quotes
|
||||||
|
if (stringValue.includes(",") || stringValue.includes("\n") || stringValue.includes('"')) {
|
||||||
|
return `"${stringValue.replace(/"/g, '""')}"`;
|
||||||
|
}
|
||||||
|
|
||||||
|
return stringValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts job results to CSV format
|
||||||
|
* @param {Array} jobs - Array of job objects
|
||||||
|
* @param {Object} metadata - Metadata object (optional)
|
||||||
|
* @returns {string} - CSV string
|
||||||
|
*/
|
||||||
|
function convertJobsToCsv(jobs, metadata = null) {
|
||||||
|
if (!jobs || jobs.length === 0) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Define CSV columns based on job object structure
|
||||||
|
const columns = [
|
||||||
|
"jobId",
|
||||||
|
"title",
|
||||||
|
"company",
|
||||||
|
"location",
|
||||||
|
"jobUrl",
|
||||||
|
"postedDate",
|
||||||
|
"description",
|
||||||
|
"jobType",
|
||||||
|
"experienceLevel",
|
||||||
|
"keyword",
|
||||||
|
"extractedAt",
|
||||||
|
"source",
|
||||||
|
"aiRelevant",
|
||||||
|
"aiConfidence",
|
||||||
|
"aiReasoning",
|
||||||
|
"aiContext",
|
||||||
|
"aiModel",
|
||||||
|
"aiAnalyzedAt"
|
||||||
|
];
|
||||||
|
|
||||||
|
// Create header row
|
||||||
|
const headerRow = columns.map(col => escapeCsvField(col)).join(",");
|
||||||
|
|
||||||
|
// Create data rows
|
||||||
|
const dataRows = jobs.map(job => {
|
||||||
|
const row = columns.map(col => {
|
||||||
|
if (col.startsWith("ai")) {
|
||||||
|
// Handle AI analysis fields
|
||||||
|
const aiField = col.substring(2).charAt(0).toLowerCase() + col.substring(3);
|
||||||
|
if (job.aiAnalysis) {
|
||||||
|
if (aiField === "relevant") {
|
||||||
|
return escapeCsvField(job.aiAnalysis.isRelevant ? "Yes" : "No");
|
||||||
|
} else if (aiField === "confidence") {
|
||||||
|
return escapeCsvField(job.aiAnalysis.confidence || "");
|
||||||
|
} else if (aiField === "reasoning") {
|
||||||
|
return escapeCsvField(job.aiAnalysis.reasoning || "");
|
||||||
|
} else if (aiField === "context") {
|
||||||
|
return escapeCsvField(job.aiAnalysis.context || "");
|
||||||
|
} else if (aiField === "model") {
|
||||||
|
return escapeCsvField(job.aiAnalysis.model || "");
|
||||||
|
} else if (aiField === "analyzedAt") {
|
||||||
|
return escapeCsvField(job.aiAnalysis.analyzedAt || "");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
} else {
|
||||||
|
return escapeCsvField(job[col] || "");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return row.join(",");
|
||||||
|
});
|
||||||
|
|
||||||
|
// Combine header and data rows
|
||||||
|
return [headerRow, ...dataRows].join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts full results object (with metadata) to CSV
|
||||||
|
* @param {Object} resultsData - Full results object with metadata, results, etc.
|
||||||
|
* @returns {string} - CSV string
|
||||||
|
*/
|
||||||
|
function convertResultsToCsv(resultsData) {
|
||||||
|
if (!resultsData || !resultsData.results) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
return convertJobsToCsv(resultsData.results, resultsData.metadata);
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
convertJobsToCsv,
|
||||||
|
convertResultsToCsv,
|
||||||
|
escapeCsvField,
|
||||||
|
};
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user