/** * SkipTheDrive Parsing Strategy * * Uses core-parser for browser management and ai-analyzer for utilities */ const { logger, cleanText, containsAnyKeyword, validateLocationAgainstFilters, } = require("ai-analyzer"); /** * SkipTheDrive URL builder */ function buildSearchUrl(keyword, orderBy = "date", jobTypes = []) { const baseUrl = "https://www.skipthedrive.com/"; const params = new URLSearchParams({ s: keyword, orderby: orderBy, }); if (jobTypes && jobTypes.length > 0) { params.append("job_type", jobTypes.join(",")); } return `${baseUrl}?${params.toString()}`; } /** * SkipTheDrive parsing strategy function */ async function skipthedriveStrategy(coreParser, options = {}) { const { keywords = ["software engineer", "developer", "programmer"], locationFilter = null, maxPages = 5, jobTypes = [], } = options; const results = []; const rejectedResults = []; const seenJobs = new Set(); try { // Create main page const page = await coreParser.createPage("skipthedrive-main"); logger.info("šŸš€ Starting SkipTheDrive parser..."); logger.info(`šŸ” Keywords: ${keywords.join(", ")}`); logger.info(`šŸ“ Location Filter: ${locationFilter || "None"}`); logger.info(`šŸ“„ Max Pages: ${maxPages}`); // Search for each keyword for (const keyword of keywords) { logger.info(`\nšŸ” Searching for: ${keyword}`); const searchUrl = buildSearchUrl(keyword, "date", jobTypes); try { // Navigate to search results await coreParser.navigateTo(searchUrl, { pageId: "skipthedrive-main", retries: 2, timeout: 30000, }); // Wait for job listings to load const hasResults = await coreParser .waitForSelector( "#loops-wrapper", { timeout: 5000, }, "skipthedrive-main" ) .catch(() => { logger.warning(`No results found for keyword: ${keyword}`); return false; }); if (!hasResults) { continue; } // Process multiple pages let currentPage = 1; let hasNextPage = true; while (hasNextPage && currentPage <= maxPages) { logger.info(`šŸ“„ Processing page ${currentPage} for "${keyword}"`); // Extract jobs from current page const pageJobs = await extractJobsFromPage( page, keyword, locationFilter ); for (const job of pageJobs) { // Skip duplicates if (seenJobs.has(job.jobId)) continue; seenJobs.add(job.jobId); // Validate location if filtering enabled if (locationFilter) { const locationValid = validateLocationAgainstFilters( job.location, locationFilter ); if (!locationValid) { rejectedResults.push({ ...job, rejectionReason: "Location filter mismatch", }); continue; } } results.push(job); } // Check for next page hasNextPage = await hasNextPageAvailable(page); if (hasNextPage && currentPage < maxPages) { await navigateToNextPage(page, currentPage + 1); currentPage++; // Wait for new page to load await page.waitForTimeout(2000); } else { hasNextPage = false; } } } catch (error) { logger.error(`Error processing keyword "${keyword}": ${error.message}`); } } logger.info( `šŸŽÆ SkipTheDrive parsing completed: ${results.length} jobs found, ${rejectedResults.length} rejected` ); return { results, rejectedResults, summary: { totalJobs: results.length, totalRejected: rejectedResults.length, keywords: keywords.join(", "), locationFilter, source: "skipthedrive", }, }; } catch (error) { logger.error(`āŒ SkipTheDrive parsing failed: ${error.message}`); throw error; } } /** * Extract jobs from current page */ async function extractJobsFromPage(page, keyword, locationFilter) { const jobs = []; try { // Get all job article elements const jobElements = await page.$$("article.job_listing"); for (const jobElement of jobElements) { try { const job = await extractJobData(jobElement, keyword); if (job) { jobs.push(job); } } catch (error) { logger.warning(`Failed to extract job data: ${error.message}`); } } } catch (error) { logger.error(`Failed to extract jobs from page: ${error.message}`); } return jobs; } /** * Extract data from individual job element */ async function extractJobData(jobElement, keyword) { try { // Extract job ID const articleId = (await jobElement.getAttribute("id")) || ""; const jobId = articleId ? articleId.replace("post-", "") : ""; // Extract title const titleElement = await jobElement.$(".job_listing-title a"); const title = titleElement ? cleanText(await titleElement.textContent()) : ""; const jobUrl = titleElement ? await titleElement.getAttribute("href") : ""; // Extract company const companyElement = await jobElement.$(".company"); const company = companyElement ? cleanText(await companyElement.textContent()) : ""; // Extract location const locationElement = await jobElement.$(".location"); const location = locationElement ? cleanText(await locationElement.textContent()) : ""; // Extract date posted const dateElement = await jobElement.$(".job-date"); const dateText = dateElement ? cleanText(await dateElement.textContent()) : ""; // Extract description const descElement = await jobElement.$(".job_listing-description"); const description = descElement ? cleanText(await descElement.textContent()) : ""; // Check if featured const featuredElement = await jobElement.$(".featured"); const isFeatured = featuredElement !== null; // Parse date let datePosted = null; let daysAgo = null; if (dateText) { const match = dateText.match(/(\d+)\s+days?\s+ago/); if (match) { daysAgo = parseInt(match[1]); const date = new Date(); date.setDate(date.getDate() - daysAgo); datePosted = date.toISOString().split("T")[0]; } } return { jobId, title, company, location, jobUrl, datePosted, dateText, daysAgo, description, isFeatured, keyword, extractedAt: new Date().toISOString(), source: "skipthedrive", }; } catch (error) { logger.warning(`Error extracting job data: ${error.message}`); return null; } } /** * Check if next page is available */ async function hasNextPageAvailable(page) { try { const nextButton = await page.$(".next-page"); return nextButton !== null; } catch { return false; } } /** * Navigate to next page */ async function navigateToNextPage(page, pageNumber) { try { const nextButton = await page.$(".next-page"); if (nextButton) { await nextButton.click(); } } catch (error) { logger.warning( `Failed to navigate to page ${pageNumber}: ${error.message}` ); } } module.exports = { skipthedriveStrategy, buildSearchUrl, extractJobsFromPage, extractJobData, };