/** * LinkedIn Parsing Strategy * * Uses core-parser for browser management and ai-analyzer for utilities */ const { logger, cleanText, containsAnyKeyword, validateLocationAgainstFilters, extractLocationFromProfile, } = require("ai-analyzer"); /** * LinkedIn parsing strategy function */ async function linkedinStrategy(coreParser, options = {}) { const { keywords = ["layoff", "downsizing", "job cuts"], locationFilter = null, maxResults = 50, credentials = {}, } = options; const results = []; const rejectedResults = []; const seenPosts = new Set(); const seenProfiles = new Set(); try { // Create main page const page = await coreParser.createPage("linkedin-main"); // Authenticate to LinkedIn logger.info("🔐 Authenticating to LinkedIn..."); await coreParser.authenticate("linkedin", credentials, "linkedin-main"); logger.info("✅ LinkedIn authentication successful"); // Search for posts with each keyword for (const keyword of keywords) { logger.info(`🔍 Searching LinkedIn for: "${keyword}"`); const searchUrl = `https://www.linkedin.com/search/results/content/?keywords=${encodeURIComponent( keyword )}&sortBy=date_posted`; await coreParser.navigateTo(searchUrl, { pageId: "linkedin-main", retries: 2, }); // Wait for search results const hasResults = await coreParser.navigationManager.navigateAndWaitFor( searchUrl, ".search-results-container", { pageId: "linkedin-main", timeout: 10000 } ); if (!hasResults) { logger.warning(`No search results found for keyword: ${keyword}`); continue; } // Extract posts from current page const posts = await extractPostsFromPage(page, keyword); for (const post of posts) { // Skip duplicates if (seenPosts.has(post.postId)) continue; seenPosts.add(post.postId); // Validate location if filtering enabled if (locationFilter) { const locationValid = validateLocationAgainstFilters( post.location || post.profileLocation, locationFilter ); if (!locationValid) { rejectedResults.push({ ...post, rejectionReason: "Location filter mismatch", }); continue; } } results.push(post); if (results.length >= maxResults) { logger.info(`📊 Reached maximum results limit: ${maxResults}`); break; } } if (results.length >= maxResults) break; } logger.info( `🎯 LinkedIn parsing completed: ${results.length} posts found, ${rejectedResults.length} rejected` ); return { results, rejectedResults, summary: { totalPosts: results.length, totalRejected: rejectedResults.length, keywords: keywords.join(", "), locationFilter, }, }; } catch (error) { logger.error(`❌ LinkedIn parsing failed: ${error.message}`); throw error; } } /** * Extract posts from current search results page */ async function extractPostsFromPage(page, keyword) { const posts = []; try { // Get all post elements const postElements = await page.$$(".feed-shared-update-v2"); for (const postElement of postElements) { try { const post = await extractPostData(postElement, keyword); if (post) { posts.push(post); } } catch (error) { logger.warning(`Failed to extract post data: ${error.message}`); } } } catch (error) { logger.error(`Failed to extract posts from page: ${error.message}`); } return posts; } /** * Extract data from individual post element */ async function extractPostData(postElement, keyword) { try { // Extract post ID const postId = (await postElement.getAttribute("data-urn")) || ""; // Extract author info const authorElement = await postElement.$(".feed-shared-actor__name"); const authorName = authorElement ? cleanText(await authorElement.textContent()) : ""; const authorLinkElement = await postElement.$(".feed-shared-actor__name a"); const authorUrl = authorLinkElement ? await authorLinkElement.getAttribute("href") : ""; // Extract post content const contentElement = await postElement.$(".feed-shared-text"); const content = contentElement ? cleanText(await contentElement.textContent()) : ""; // Extract timestamp const timeElement = await postElement.$( ".feed-shared-actor__sub-description time" ); const timestamp = timeElement ? await timeElement.getAttribute("datetime") : ""; // Extract engagement metrics const likesElement = await postElement.$(".social-counts-reactions__count"); const likesText = likesElement ? cleanText(await likesElement.textContent()) : "0"; const commentsElement = await postElement.$( ".social-counts-comments__count" ); const commentsText = commentsElement ? cleanText(await commentsElement.textContent()) : "0"; // Check if post contains relevant keywords const isRelevant = containsAnyKeyword(content, [keyword]); if (!isRelevant) { return null; // Skip irrelevant posts } return { postId: cleanText(postId), authorName, authorUrl, content, timestamp, keyword, likes: extractNumber(likesText), comments: extractNumber(commentsText), extractedAt: new Date().toISOString(), source: "linkedin", }; } catch (error) { logger.warning(`Error extracting post data: ${error.message}`); return null; } } /** * Extract numbers from text (e.g., "15 likes" -> 15) */ function extractNumber(text) { const match = text.match(/\d+/); return match ? parseInt(match[0]) : 0; } module.exports = { linkedinStrategy, extractPostsFromPage, extractPostData, };