diff --git a/job-search-parser/README.md b/job-search-parser/README.md index f5d4789..cb17ea8 100644 --- a/job-search-parser/README.md +++ b/job-search-parser/README.md @@ -141,7 +141,7 @@ ENABLE_AI_ANALYSIS=false HEADLESS=true # Output Configuration -OUTPUT_FORMAT=json +OUTPUT_FORMAT=json # Options: "json", "csv", or "both" ``` ### Command Line Options @@ -150,31 +150,34 @@ OUTPUT_FORMAT=json # Basic usage node index.js -# Specific roles -node index.js --roles="frontend developer,backend developer" +# Select sites to parse +node index.js --sites=linkedin,skipthedrive -# Geographic focus -node index.js --locations="Toronto,Vancouver" +# Search keywords +node index.js --keywords="software engineer,developer" -# Experience level -node index.js --experience="senior" +# Location filter +node index.js --location="Ontario" -# Output format -node index.js --output=results/job-market-analysis.json +# Max pages to parse +node index.js --max-pages=10 + +# Exclude rejected results +node index.js --no-rejected + +# Output format (json, csv, or both) +node index.js --output=csv +node index.js --output=both ``` **Available Options:** -- `--roles="role1,role2"`: Target job roles -- `--locations="city1,city2"`: Geographic focus -- `--experience="entry|mid|senior"`: Experience level -- `--remote="remote|hybrid|onsite"`: Remote work preference -- `--salary-min=NUMBER`: Minimum salary filter -- `--salary-max=NUMBER`: Maximum salary filter -- `--output=FILE`: Output filename -- `--format=json|csv`: Output format -- `--trends`: Enable trend analysis -- `--skills`: Enable skill analysis +- `--sites="site1,site2"`: Job sites to parse (linkedin, skipthedrive) +- `--keywords="keyword1,keyword2"`: Search keywords +- `--location="LOCATION"`: Location filter +- `--max-pages=NUMBER`: Maximum pages to parse (0 or "all" for unlimited) +- `--no-rejected` or `--exclude-rejected`: Exclude rejected results from output +- `--output=FORMAT` or `--format=FORMAT`: Output format - "json", "csv", or "both" (default: "json") ## šŸ“Š Keywords @@ -373,12 +376,46 @@ node index.js --companies="Google,Microsoft,Amazon" ### CSV Output -The parser can also generate CSV files for easy analysis: +The parser can generate CSV files for easy spreadsheet analysis. Use `--output=csv` or `OUTPUT_FORMAT=csv` to export results as CSV. + +**CSV Columns:** +- `jobId`: Unique job identifier +- `title`: Job title +- `company`: Company name +- `location`: Job location +- `jobUrl`: Link to job posting +- `postedDate`: Date job was posted +- `description`: Job description +- `jobType`: Type of job (full-time, part-time, contract, etc.) +- `experienceLevel`: Required experience level +- `keyword`: Search keyword that matched +- `extractedAt`: Timestamp when job was extracted +- `source`: Source site (e.g., "linkedin-jobs", "skipthedrive") +- `aiRelevant`: AI analysis relevance (Yes/No) +- `aiConfidence`: AI confidence score (0-1) +- `aiReasoning`: AI reasoning for relevance +- `aiContext`: AI analysis context +- `aiModel`: AI model used for analysis +- `aiAnalyzedAt`: Timestamp of AI analysis + +**Example CSV Output:** ```csv -job_id,title,company,location,remote_type,salary_min,salary_max,required_skills,experience_level,posted_date -job_1,Senior Software Engineer,TechCorp,Toronto,hybrid,100000,140000,"React,Node.js,TypeScript",senior,2024-01-10 -job_2,Data Scientist,DataCorp,Vancouver,remote,90000,130000,"Python,SQL,ML",mid,2024-01-09 +jobId,title,company,location,jobUrl,postedDate,description,jobType,experienceLevel,keyword,extractedAt,source,aiRelevant,aiConfidence,aiReasoning,aiContext,aiModel,aiAnalyzedAt +4344137241,Web Applications Co-op/Intern,Nokia,Kanata ON (Hybrid),https://www.linkedin.com/jobs/view/4344137241,,"Web Applications Co-op/Intern",,co-op,2025-12-17T04:50:05.600Z,linkedin-jobs,Yes,0.8,"The post mentions a co-op/intern position",co-op and internship opportunities for First year Math students,mistral,2025-12-17T04:58:33.479Z +``` + +**Usage:** + +```bash +# Export as CSV only +node index.js --output=csv + +# Export both JSON and CSV +node index.js --output=both + +# Using environment variable +OUTPUT_FORMAT=csv node index.js ``` ## šŸ”’ Security & Best Practices diff --git a/job-search-parser/index.js b/job-search-parser/index.js index 10f48ba..1be0efd 100644 --- a/job-search-parser/index.js +++ b/job-search-parser/index.js @@ -12,6 +12,7 @@ const CoreParser = require("../core-parser"); const { skipthedriveStrategy } = require("./strategies/skipthedrive-strategy"); const { linkedinJobsStrategy } = require("./strategies/linkedin-jobs-strategy"); const { logger, analyzeBatch, checkOllamaStatus, DEFAULT_MODEL } = require("ai-analyzer"); +const { convertResultsToCsv } = require("./src/csv-utils"); // Load environment variables require("dotenv").config({ path: path.join(__dirname, ".env") }); @@ -26,6 +27,7 @@ const AI_CONTEXT = process.env.AI_CONTEXT || "Job market analysis focusing on jo const OLLAMA_MODEL = process.env.OLLAMA_MODEL || DEFAULT_MODEL; const MAX_PAGES = parseInt(process.env.MAX_PAGES) || 5; const EXCLUDE_REJECTED = process.env.EXCLUDE_REJECTED === "true"; +const OUTPUT_FORMAT = process.env.OUTPUT_FORMAT || "json"; // "json", "csv", or "both" // Available site strategies const SITE_STRATEGIES = { @@ -47,6 +49,7 @@ function parseArguments() { locationFilter: null, maxPages: MAX_PAGES, excludeRejected: EXCLUDE_REJECTED, + outputFormat: OUTPUT_FORMAT, }; args.forEach((arg) => { @@ -72,6 +75,13 @@ function parseArguments() { } } else if (arg === "--no-rejected" || arg === "--exclude-rejected") { options.excludeRejected = true; + } else if (arg.startsWith("--output=") || arg.startsWith("--format=")) { + const format = arg.split("=")[1].toLowerCase(); + if (["json", "csv", "both"].includes(format)) { + options.outputFormat = format; + } else { + logger.warning(`āš ļø Unknown output format: ${format}. Using default: json`); + } } }); @@ -296,16 +306,34 @@ async function startJobSearchParser(options = {}) { } const timestamp = new Date().toISOString().replace(/[:.]/g, "-"); - const filename = `job-search-results-${timestamp}.json`; - const filepath = path.join(resultsDir, filename); + const outputFormat = finalOptions.outputFormat || OUTPUT_FORMAT; + const savedFiles = []; - fs.writeFileSync(filepath, JSON.stringify(outputData, null, 2)); + // Save JSON if format is "json" or "both" + if (outputFormat === "json" || outputFormat === "both") { + const jsonFilename = `job-search-results-${timestamp}.json`; + const jsonFilepath = path.join(resultsDir, jsonFilename); + fs.writeFileSync(jsonFilepath, JSON.stringify(outputData, null, 2)); + savedFiles.push(jsonFilepath); + } + + // Save CSV if format is "csv" or "both" + if (outputFormat === "csv" || outputFormat === "both") { + const csvFilename = `job-search-results-${timestamp}.csv`; + const csvFilepath = path.join(resultsDir, csvFilename); + const csvContent = convertResultsToCsv(outputData); + fs.writeFileSync(csvFilepath, csvContent); + savedFiles.push(csvFilepath); + } // Final summary logger.step("\nšŸ“Š Job Search Parser Summary"); logger.success(`āœ… Total jobs found: ${allResults.length}`); logger.info(`āŒ Total rejected: ${allRejectedResults.length}`); - logger.info(`šŸ“ Results saved to: ${filepath}`); + logger.info(`šŸ“ Results saved to:`); + savedFiles.forEach(filepath => { + logger.info(` ${filepath}`); + }); logger.info("\nšŸ“ˆ Results by site:"); for (const [site, stats] of Object.entries(siteResults)) { diff --git a/job-search-parser/src/csv-utils.js b/job-search-parser/src/csv-utils.js new file mode 100644 index 0000000..1689363 --- /dev/null +++ b/job-search-parser/src/csv-utils.js @@ -0,0 +1,114 @@ +/** + * CSV Utilities + * + * Functions for converting job search results to CSV format + */ + +/** + * Escapes a CSV field value + * @param {string} value - The value to escape + * @returns {string} - The escaped value + */ +function escapeCsvField(value) { + if (value === null || value === undefined) { + return ""; + } + + const stringValue = String(value); + + // If the value contains comma, newline, or double quote, wrap it in quotes and escape quotes + if (stringValue.includes(",") || stringValue.includes("\n") || stringValue.includes('"')) { + return `"${stringValue.replace(/"/g, '""')}"`; + } + + return stringValue; +} + +/** + * Converts job results to CSV format + * @param {Array} jobs - Array of job objects + * @param {Object} metadata - Metadata object (optional) + * @returns {string} - CSV string + */ +function convertJobsToCsv(jobs, metadata = null) { + if (!jobs || jobs.length === 0) { + return ""; + } + + // Define CSV columns based on job object structure + const columns = [ + "jobId", + "title", + "company", + "location", + "jobUrl", + "postedDate", + "description", + "jobType", + "experienceLevel", + "keyword", + "extractedAt", + "source", + "aiRelevant", + "aiConfidence", + "aiReasoning", + "aiContext", + "aiModel", + "aiAnalyzedAt" + ]; + + // Create header row + const headerRow = columns.map(col => escapeCsvField(col)).join(","); + + // Create data rows + const dataRows = jobs.map(job => { + const row = columns.map(col => { + if (col.startsWith("ai")) { + // Handle AI analysis fields + const aiField = col.substring(2).charAt(0).toLowerCase() + col.substring(3); + if (job.aiAnalysis) { + if (aiField === "relevant") { + return escapeCsvField(job.aiAnalysis.isRelevant ? "Yes" : "No"); + } else if (aiField === "confidence") { + return escapeCsvField(job.aiAnalysis.confidence || ""); + } else if (aiField === "reasoning") { + return escapeCsvField(job.aiAnalysis.reasoning || ""); + } else if (aiField === "context") { + return escapeCsvField(job.aiAnalysis.context || ""); + } else if (aiField === "model") { + return escapeCsvField(job.aiAnalysis.model || ""); + } else if (aiField === "analyzedAt") { + return escapeCsvField(job.aiAnalysis.analyzedAt || ""); + } + } + return ""; + } else { + return escapeCsvField(job[col] || ""); + } + }); + return row.join(","); + }); + + // Combine header and data rows + return [headerRow, ...dataRows].join("\n"); +} + +/** + * Converts full results object (with metadata) to CSV + * @param {Object} resultsData - Full results object with metadata, results, etc. + * @returns {string} - CSV string + */ +function convertResultsToCsv(resultsData) { + if (!resultsData || !resultsData.results) { + return ""; + } + + return convertJobsToCsv(resultsData.results, resultsData.metadata); +} + +module.exports = { + convertJobsToCsv, + convertResultsToCsv, + escapeCsvField, +}; +