- Added LinkedIn jobs parsing strategy to support job extraction from LinkedIn. - Updated job search parser to include new site strategy and improved argument parsing for max pages and exclusion of rejected results. - Enhanced README documentation to reflect new features and usage examples. - Refactored existing strategies for consistency and improved error handling.
67 lines
1.5 KiB
JavaScript
67 lines
1.5 KiB
JavaScript
const playwright = require('playwright');
|
|
const AuthManager = require('./auth-manager');
|
|
const NavigationManager = require('./navigation');
|
|
|
|
class CoreParser {
|
|
constructor(config = {}) {
|
|
this.config = {
|
|
headless: true,
|
|
timeout: 60000, // Increased default timeout
|
|
...config
|
|
};
|
|
this.browser = null;
|
|
this.context = null;
|
|
this.pages = {};
|
|
this.authManager = new AuthManager(this);
|
|
this.navigationManager = new NavigationManager(this);
|
|
}
|
|
|
|
async init() {
|
|
this.browser = await playwright.chromium.launch({
|
|
headless: this.config.headless
|
|
});
|
|
this.context = await this.browser.newContext();
|
|
}
|
|
|
|
async createPage(id) {
|
|
if (!this.browser) await this.init();
|
|
const page = await this.context.newPage();
|
|
this.pages[id] = page;
|
|
return page;
|
|
}
|
|
|
|
getPage(id) {
|
|
return this.pages[id];
|
|
}
|
|
|
|
async authenticate(site, credentials, pageId) {
|
|
return this.authManager.authenticate(site, credentials, pageId);
|
|
}
|
|
|
|
async navigateTo(url, options = {}) {
|
|
const {
|
|
pageId = "default",
|
|
waitUntil = "networkidle", // Changed default to networkidle
|
|
retries = 1,
|
|
retryDelay = 2000,
|
|
timeout = this.config.timeout,
|
|
} = options;
|
|
|
|
return this.navigationManager.navigateTo(url, options);
|
|
}
|
|
|
|
async cleanup() {
|
|
if (this.browser) {
|
|
await this.browser.close();
|
|
this.browser = null;
|
|
this.context = null;
|
|
this.pages = {};
|
|
}
|
|
}
|
|
}
|
|
|
|
module.exports = CoreParser;
|
|
|
|
|
|
|