/** * LinkedIn Posts Scraper (linkedout) * * This script logs into LinkedIn using credentials stored in a .env file, * reads keywords from a CSV file (keywords.csv), and scrapes posts matching * those keywords from LinkedIn's content search. * * Usage: * node linkedout.js [--headless=true|false] [--keyword=additional_keyword] * * Command-line Parameters: * --headless: Override the headless mode (true or false). Defaults to value in .env (HEADLESS). * --keyword: Append an additional keyword to the list of keywords from keywords.csv. * * Output: * Saves results to a timestamped JSON file in the 'results' directory. * * Requirements: * - Node.js environment (or use the compiled executable) * - Playwright installed (or included in the binary) * - dotenv package for environment variables * - csv-parser package for reading CSV files * * Environment Variables (.env): * LINKEDIN_USERNAME - Your LinkedIn username * LINKEDIN_PASSWORD - Your LinkedIn password * HEADLESS - Default headless mode (true or false) * * Example: * node linkedout.js --headless=true --keyword=layoff */ process.env.PLAYWRIGHT_BROWSERS_PATH = "0"; const { chromium } = require("playwright"); const fs = require("fs"); const path = require("path"); require("dotenv").config(); const csv = require("csv-parser"); const DATE_POSTED = "past-week"; // "past-24h", "past-week", "past-month", or "" const SORT_BY = "date_posted"; // "relevance", "date_posted" const WHEELS = 5; const CITY = "Toronto"; // Read credentials and headless mode from .env const LINKEDIN_USERNAME = process.env.LINKEDIN_USERNAME; const LINKEDIN_PASSWORD = process.env.LINKEDIN_PASSWORD; // Default headless mode from .env let HEADLESS = process.env.HEADLESS === "true"; // Parse command-line arguments const args = process.argv.slice(2); let additionalKeyword = null; for (const arg of args) { if (arg.startsWith("--headless=")) { const val = arg.split("=")[1].toLowerCase(); HEADLESS = val === "true"; } if (arg.startsWith("--keyword=")) { additionalKeyword = arg.split("=")[1]; } } if (!LINKEDIN_USERNAME || !LINKEDIN_PASSWORD) { throw new Error("Missing LinkedIn credentials in .env file."); } function cleanText(text) { text = text.replace(/#\w+/g, ""); text = text.replace(/\bhashtag\b/gi, ""); text = text.replace(/hashtag-\w+/gi, ""); text = text.replace(/https?:\/\/[^\s]+/g, ""); text = text.replace( /[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F1E0}-\u{1F1FF}]/gu, "" ); text = text.replace(/\s+/g, " ").trim(); return text; } function buildSearchUrl(keyword, city) { let url = `https://www.linkedin.com/search/results/content/?keywords=${encodeURIComponent( keyword + " " + city )}`; if (DATE_POSTED) url += `&datePosted=${encodeURIComponent(`"${DATE_POSTED}"`)}`; if (SORT_BY) url += `&sortBy=${encodeURIComponent(`"${SORT_BY}"`)}`; url += `&origin=FACETED_SEARCH`; return url; } function containsAnyKeyword(text, keywords) { return keywords.some((k) => text.toLowerCase().includes(k.toLowerCase())); } // Read keywords from CSV const keywords = []; const csvPath = path.join(process.cwd(), "keywords.csv"); fs.createReadStream(csvPath) .pipe(csv()) .on("data", (row) => { if (row.keyword) keywords.push(row.keyword.trim()); }) .on("end", async () => { if (keywords.length === 0) { console.error("No keywords found in keywords.csv"); process.exit(1); } // Append additional keyword if provided if (additionalKeyword) { keywords.push(additionalKeyword); console.log(`Added additional keyword from CLI: ${additionalKeyword}`); } const browser = await chromium.launch({ headless: HEADLESS, args: ["--no-sandbox", "--disable-setuid-sandbox"], }); const context = await browser.newContext(); const page = await Promise.race([ context.newPage(), new Promise((_, reject) => setTimeout(() => reject(new Error("newPage timeout")), 10000) ), ]).catch((err) => { console.error("Failed to create new page:", err); process.exit(1); }); try { await page.goto("https://www.linkedin.com/login"); await page.fill('input[name="session_key"]', LINKEDIN_USERNAME); await page.fill('input[name="session_password"]', LINKEDIN_PASSWORD); await page.click('button[type="submit"]'); await page.waitForSelector("img.global-nav__me-photo", { timeout: 10000, }); const seenPosts = new Set(); const seenProfiles = new Set(); const results = []; for (const keyword of keywords) { const searchUrl = buildSearchUrl(keyword, CITY); await page.goto(searchUrl, { waitUntil: "load" }); try { await page.waitForSelector(".feed-shared-update-v2", { timeout: 3000, }); } catch (error) { console.log( `---\nNo posts found for keyword: ${keyword}\nDate posted: ${DATE_POSTED}\nSort by: ${SORT_BY}` ); continue; } for (let i = 0; i < WHEELS; i++) { await page.mouse.wheel(0, 1000); await page.waitForTimeout(1000); } const postContainers = await page.$$(".feed-shared-update-v2"); for (const container of postContainers) { let text = ""; const textHandle = await container.$( "div.update-components-text, span.break-words" ); if (textHandle) { text = (await textHandle.textContent()) || ""; text = cleanText(text); } if ( !text || seenPosts.has(text) || text.length < 30 || !/[a-zA-Z0-9]/.test(text) ) continue; seenPosts.add(text); let profileLink = ""; const profileLinkElement = await container.$('a[href*="/in/"]'); if (profileLinkElement) { profileLink = await profileLinkElement.getAttribute("href"); if (profileLink && !profileLink.startsWith("http")) { profileLink = `https://www.linkedin.com${profileLink}`; } profileLink = profileLink.split("?")[0]; } if (!profileLink || seenProfiles.has(profileLink)) continue; seenProfiles.add(profileLink); // Double-check keyword presence if (!containsAnyKeyword(text, keywords)) continue; console.log("---"); console.log("Keyword:", keyword); console.log("Post:", text); console.log("Profile:", profileLink); results.push({ keyword, text, profileLink, }); } } const now = new Date(); const timestamp = `${now.getFullYear()}-${String( now.getMonth() + 1 ).padStart(2, "0")}-${String(now.getDate()).padStart(2, "0")}-${String( now.getHours() ).padStart(2, "0")}-${String(now.getMinutes()).padStart(2, "0")}`; const resultsDir = "results"; const resultsFile = `${resultsDir}/results-${timestamp}.json`; if (!fs.existsSync(resultsDir)) { fs.mkdirSync(resultsDir); } fs.writeFileSync(resultsFile, JSON.stringify(results, null, 2), "utf-8"); console.log(`Saved ${results.length} posts to ${resultsFile}`); } catch (err) { console.error("Error:", err); } finally { await browser.close(); } });