This commit is contained in:
ilia 2025-07-17 16:55:21 -08:00
parent b3702fa151
commit ead5cdef15
3 changed files with 453 additions and 7 deletions

View File

@ -98,7 +98,7 @@ AI_BATCH_SIZE=3
#### Local AI Analysis (Ollama)
- `ENABLE_LOCAL_AI=true`: Enable local AI analysis
- `OLLAMA_MODEL`: Model to use (`mistral`, `llama2`, `codellama`)
- `OLLAMA_MODEL`: Model to use (auto-detects available models: `mistral`, `llama2`, `codellama`, etc.)
- `OLLAMA_HOST`: Ollama server URL (default: `http://localhost:11434`)
- `RUN_LOCAL_AI_AFTER_SCRAPING`: Run AI immediately after scraping (`true`/`false`)
- `AI_CONTEXT`: Context for analysis (e.g., `job layoffs`)
@ -107,6 +107,26 @@ AI_BATCH_SIZE=3
## Usage
### Demo Mode
For testing and demonstration purposes, you can run the interactive demo:
```bash
# Run interactive demo (simulates scraping with fake data)
npm run demo
# Or directly:
node demo.js
```
The demo mode:
- Uses fake, anonymized data for safety
- Walks through all configuration options interactively
- Shows available Ollama models for selection
- Demonstrates the complete workflow without actual LinkedIn scraping
- Perfect for creating documentation, GIFs, or testing configurations
### Basic Commands
```bash
@ -170,34 +190,44 @@ node ai-analyzer-local.js --context="job layoffs"
# Analyze specific file
node ai-analyzer-local.js --input=results/results-2024-01-15.json --context="hiring"
# Use different model
# Use different model (auto-detects available models)
node ai-analyzer-local.js --model=llama2 --context="remote work"
# Change confidence and batch size
node ai-analyzer-local.js --context="job layoffs" --confidence=0.8 --batch-size=5
# Check available models
ollama list
```
## Workflow Examples
### 1. Quick Start (All Features)
### 1. First Time Setup (Demo Mode)
```bash
# Run interactive demo to test configuration
npm run demo
```
### 2. Quick Start (All Features)
```bash
node linkedout.js --ai-after
```
### 2. Fast Scraping Only
### 3. Fast Scraping Only
```bash
node linkedout.js --no-location --no-ai
```
### 3. Location-Only Filtering
### 4. Location-Only Filtering
```bash
node linkedout.js --no-ai
```
### 4. Test Different AI Contexts
### 5. Test Different AI Contexts
```bash
node linkedout.js --no-ai
@ -213,6 +243,7 @@ linkedout/
├── .env # Your configuration (create from template)
├── env-config.example # Configuration template
├── linkedout.js # Main scraper
├── demo.js # Interactive demo with fake data
├── ai-analyzer-local.js # Free local AI analyzer (Ollama)
├── location-utils.js # Enhanced location utilities
├── package.json # Dependencies

414
demo.js Normal file
View File

@ -0,0 +1,414 @@
const fs = require("fs");
const path = require("path");
const readline = require("readline");
// Terminal colors for better readability
const colors = {
reset: "\x1b[0m",
bright: "\x1b[1m",
dim: "\x1b[2m",
red: "\x1b[31m",
green: "\x1b[32m",
yellow: "\x1b[33m",
blue: "\x1b[34m",
magenta: "\x1b[35m",
cyan: "\x1b[36m",
white: "\x1b[37m",
bgRed: "\x1b[41m",
bgGreen: "\x1b[42m",
bgYellow: "\x1b[43m",
bgBlue: "\x1b[44m",
};
// Helper functions for colored output
const log = {
title: (text) =>
console.log(`${colors.bright}${colors.cyan}${text}${colors.reset}`),
success: (text) => console.log(`${colors.green}${text}${colors.reset}`),
info: (text) => console.log(`${colors.blue} ${text}${colors.reset}`),
warning: (text) => console.log(`${colors.yellow}⚠️ ${text}${colors.reset}`),
error: (text) => console.log(`${colors.red}${text}${colors.reset}`),
highlight: (text) =>
console.log(`${colors.bright}${colors.yellow}${text}${colors.reset}`),
step: (text) =>
console.log(`${colors.bright}${colors.magenta}🚀 ${text}${colors.reset}`),
file: (text) => console.log(`${colors.cyan}📄 ${text}${colors.reset}`),
ai: (text) =>
console.log(`${colors.bright}${colors.blue}🧠 ${text}${colors.reset}`),
search: (text) => console.log(`${colors.green}🔍 ${text}${colors.reset}`),
};
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
terminal: true,
});
function prompt(question, defaultVal) {
return new Promise((resolve) => {
rl.question(`${question} (default: ${defaultVal}): `, (answer) => {
resolve(answer.trim() || defaultVal);
});
});
}
/**
* Fetch available Ollama models from the local instance
*/
async function getAvailableModels() {
// For demo purposes, just mock 3 popular models
log.info("Simulating Ollama model detection...");
await new Promise((r) => setTimeout(r, 500)); // Simulate API call
const mockModels = ["mistral", "llama2", "codellama"];
log.success(`Found ${mockModels.length} available models`);
return mockModels;
}
/**
* Interactive model selection with available models
*/
async function selectModel(availableModels) {
log.highlight("\n📦 Available Ollama models:");
availableModels.forEach((model, index) => {
console.log(
` ${colors.bright}${index + 1}.${colors.reset} ${colors.cyan}${model}${
colors.reset
}`
);
});
const defaultModel = availableModels.includes("mistral")
? "mistral"
: availableModels[0];
const selection = await prompt(
`${colors.bright}Choose model (1-${availableModels.length} or model name)${colors.reset}`,
defaultModel
);
// Check if it's a number selection
const num = parseInt(selection);
if (num >= 1 && num <= availableModels.length) {
const selectedModel = availableModels[num - 1];
log.success(`Selected model: ${selectedModel}`);
return selectedModel;
}
// Check if it's a valid model name
if (availableModels.includes(selection)) {
log.success(`Selected model: ${selection}`);
return selection;
}
// Default fallback
log.success(`Using default model: ${defaultModel}`);
return defaultModel;
}
async function main() {
log.title("=== LinkedOut Demo Workflow ===");
log.info(
"This is a simulated demo for creating a GIF. It uses fake data and anonymizes personal information."
);
log.highlight("Press Enter to accept defaults.\n");
// Prompt for all possible settings based on linkedout.js configurations
const headless = await prompt("Headless mode", "true");
const keywordsSource = await prompt(
"Keywords source (CSV file or comma-separated)",
"keywords-layoff.csv"
);
const addKeywords = await prompt("Additional keywords (comma-separated)", "");
const city = await prompt("City", "Toronto");
const date_posted = await prompt(
"Date posted (past-24h, past-week, past-month, or empty)",
"past-week"
);
const sort_by = await prompt(
"Sort by (date_posted or relevance)",
"date_posted"
);
const wheels = await prompt("Number of scrolls", "5");
const location_filter = await prompt(
"Location filter (e.g., Ontario,Manitoba)",
"Ontario"
);
const enable_location = await prompt("Enable location check", "true");
const output = await prompt(
"Output file (without extension)",
"demo-results"
);
const enable_ai = await prompt("Enable local AI", "true");
const run_ai_after = await prompt("Run AI after scraping", "true");
const ai_context = await prompt(
"AI context",
"job layoffs and workforce reduction"
);
// Get available models and let user choose
const availableModels = await getAvailableModels();
const ollama_model = await selectModel(availableModels);
const ai_confidence = await prompt("AI confidence threshold", "0.7");
const ai_batch_size = await prompt("AI batch size", "3");
// Simulate loading keywords (only use first 2 for demo)
let keywords = ["layoff", "downsizing"]; // Default demo keywords - only 2 for demo
if (keywordsSource !== "keywords-layoff.csv") {
keywords = keywordsSource
.split(",")
.map((k) => k.trim())
.slice(0, 2);
}
if (addKeywords) {
keywords = keywords.concat(addKeywords.split(",").map((k) => k.trim()));
}
log.step(`Starting demo scrape with ${keywords.length} keywords...`);
log.info(`🌍 City: ${city}, Date: ${date_posted}, Sort: ${sort_by}`);
log.info(
`🔄 Scrolls: ${wheels}, Location filter: ${location_filter || "None"}`
);
// Simulate browser launch and login
await new Promise((r) => setTimeout(r, 500));
log.step("Launching browser" + (headless === "true" ? " (headless)" : ""));
await new Promise((r) => setTimeout(r, 500));
log.step("Logging in to LinkedIn...");
// Simulate scraping for each keyword
const fakePosts = [];
const rejectedPosts = [];
// Define specific numbers for each keyword
const keywordData = {
layoff: { found: 3, accepted: 2, rejected: 1 },
downsizing: { found: 2, accepted: 1, rejected: 1 },
};
for (const keyword of keywords) {
await new Promise((r) => setTimeout(r, 300));
const data = keywordData[keyword] || { found: 2, accepted: 1, rejected: 1 };
log.search(`Searching for "${keyword}"...`);
log.info(`Found ${data.found} posts, checking profiles for location...`);
// Add specific number of accepted posts per keyword
for (let i = 0; i < data.accepted; i++) {
const location =
enable_location === "true"
? i % 2 === 0
? "Toronto, Ontario, Canada"
: "Calgary, Alberta, Canada"
: undefined;
let text;
if (keyword === "layoff") {
text =
i === 0
? "Long considered a local success story, Calgary robotics company Attabotics is restructuring as it deals with insolvency. It has terminated 192 of its 203 employees, keeping a skeleton crew of only 11 as it navigates the road ahead."
: "I'm working to report on the recent game industry layoffs and I'm hoping to connect with anyone connected to or impacted by the recent mass layoffs. Please feel free to contact me either here or anonymously.";
} else {
text =
"Thinking about downsizing your home in Alberta? It's not just a change of address—it's a smart financial move and a big step toward enjoying retirement! Here's what you need to know about tapping into home equity and saving on monthly bills.";
}
fakePosts.push({
keyword,
text: text,
profileLink: `https://www.linkedin.com/in/demo-user-${Math.random()
.toString(36)
.slice(2)}`,
timestamp:
new Date().toISOString().split("T")[0] +
", " +
new Date().toLocaleTimeString("en-CA", { hour12: false }),
location,
locationValid: location ? true : undefined,
locationMatchedFilter: location
? location.includes("Ontario")
? "ontario"
: "alberta"
: undefined,
locationReasoning: location
? `Direct match: "${
location.includes("Ontario") ? "ontario" : "alberta"
}" found in "${location}"`
: undefined,
aiProcessed: false,
});
}
// Add specific rejected posts per keyword
for (let i = 0; i < data.rejected; i++) {
if (keyword === "layoff") {
rejectedPosts.push({
rejected: true,
reason:
'Location filter failed: Location "Vancouver, British Columbia, Canada" does not match any of: ontario, alberta',
keyword: "layoff",
text: "Sad to announce that our Vancouver tech startup is going through a difficult restructuring. We've had to make the tough decision to lay off 30% of our engineering team. These are incredibly talented people and I'm happy to provide recommendations.",
profileLink: "https://www.linkedin.com/in/demo-vancouver-user",
location: "Vancouver, British Columbia, Canada",
locationReasoning:
'Location "Vancouver, British Columbia, Canada" does not match any of: ontario, alberta',
timestamp: new Date().toISOString(),
});
} else {
rejectedPosts.push({
rejected: true,
reason: "No profile link",
keyword: "downsizing",
text: "The days of entering retirement mortgage-free are fading fast — even for older Canadians. A recent Royal LePage survey reveals nearly 1 in 3 Canadians retiring in the next 2 years will still carry a mortgage. Contact us and let's talk about planning smarter — whether you're 25 or 65.",
profileLink: "",
timestamp: new Date().toISOString(),
});
}
}
log.success(
`${data.accepted} posts accepted, ❌ ${data.rejected} posts rejected`
);
}
log.success(`Found ${fakePosts.length} demo posts total`);
// Simulate location validation if enabled
if (enable_location === "true" && location_filter) {
await new Promise((r) => setTimeout(r, 500));
log.step("Validating locations against filter...");
}
// Simulate saving results
const timestamp =
new Date().toISOString().split("T")[0] +
"-" +
new Date().toISOString().split("T")[1].split(".")[0].replace(/:/g, "-");
// Save main results file
let resultsFile = output
? `results/${output}.json`
: `results/demo-results-${timestamp}.json`;
fs.mkdirSync(path.dirname(resultsFile), { recursive: true });
fs.writeFileSync(resultsFile, JSON.stringify(fakePosts, null, 2));
log.file(`Saved demo results to ${resultsFile}`);
// Save rejected posts file
let rejectedFile = output
? `results/${output}-rejected.json`
: `results/demo-results-${timestamp}-rejected.json`;
fs.writeFileSync(rejectedFile, JSON.stringify(rejectedPosts, null, 2));
log.file(`Saved demo rejected posts to ${rejectedFile}`);
const newFiles = [resultsFile, rejectedFile];
// Simulate AI analysis if enabled and set to run after
let aiFile;
if (enable_ai === "true" && run_ai_after === "true") {
await new Promise((r) => setTimeout(r, 500));
log.ai(`Running local AI analysis with model ${ollama_model}...`);
log.info(
`Context: "${ai_context}", Confidence: ${ai_confidence}, Batch size: ${ai_batch_size}`
);
await new Promise((r) => setTimeout(r, 800));
// Fake AI processing with realistic examples
const aiResults = fakePosts.map((post, index) => {
let isRelevant, confidence, reasoning;
if (post.keyword === "layoff") {
if (index === 0) {
// First layoff post - highly relevant
isRelevant = true;
confidence = 0.94;
reasoning =
"The post clearly states that a company has terminated 192 of its 203 employees as part of restructuring due to insolvency, which is directly related to job layoffs and workforce reduction.";
} else {
// Second layoff post - highly relevant
isRelevant = true;
confidence = 0.92;
reasoning =
"Post explicitly discusses game industry layoffs and mass layoffs, which directly relates to job layoffs and workforce reduction.";
}
} else {
// Downsizing post - not relevant to job layoffs
isRelevant = false;
confidence = 0.25;
reasoning =
"The post discusses downsizing a home and financial considerations for retirement, which are not directly related to job layoffs or workforce reduction.";
}
return {
...post,
aiProcessed: true,
aiRelevant: isRelevant,
aiConfidence: Math.round(confidence * 100) / 100, // Round to 2 decimal places
aiReasoning: reasoning,
aiModel: ollama_model,
aiAnalyzedAt:
new Date().toISOString().split("T")[0] +
", " +
new Date().toLocaleTimeString("en-CA", { hour12: false }),
aiType: "local-ollama",
...(confidence < parseFloat(ai_confidence)
? { lowConfidence: true }
: {}),
};
});
aiFile = output
? `results/${output}-ai.json`
: `results/demo-ai-${timestamp}.json`;
fs.writeFileSync(aiFile, JSON.stringify(aiResults, null, 2));
log.file(`Saved demo AI results to ${aiFile}`);
newFiles.push(aiFile);
}
// List new files
log.title("\n=== Demo Complete ===");
log.highlight("New JSON files created:");
newFiles.forEach((file) => log.file(file));
log.info(
"\nYou can right-click the file paths in your terminal or copy them to open in your IDE."
);
// Show examples of what each file contains
log.title("\n=== File Contents Examples ===");
log.highlight("\n📄 Main Results File (accepted posts):");
log.info("Contains posts that passed all filters:");
console.log(
`${colors.dim}${JSON.stringify(fakePosts.slice(0, 1), null, 2)}${
colors.reset
}`
);
log.highlight("\n🚫 Rejected Posts File:");
log.info("Contains posts that were filtered out:");
console.log(
`${colors.dim}${JSON.stringify(rejectedPosts.slice(0, 1), null, 2)}${
colors.reset
}`
);
if (enable_ai === "true" && run_ai_after === "true") {
log.highlight("\n🧠 AI Analysis File:");
log.info("Contains posts with AI relevance analysis:");
const aiResults = JSON.parse(fs.readFileSync(aiFile, "utf-8"));
console.log(
`${colors.dim}${JSON.stringify(aiResults.slice(0, 1), null, 2)}${
colors.reset
}`
);
log.highlight("\nKey AI Features Demonstrated:");
log.success("✅ aiRelevant: true/false based on context analysis");
log.success("✅ aiConfidence: rounded to 2 decimal places (0.00-1.00)");
log.success("✅ aiReasoning: detailed explanation of relevance decision");
log.success(
"✅ Location filtering: shows why posts were accepted/rejected"
);
}
rl.close();
}
main();

View File

@ -7,7 +7,8 @@
"test": "node test/all-tests.js",
"test:location-utils": "node test/location-utils.test.js",
"test:linkedout": "node test/linkedout.test.js",
"test:ai-analyzer": "node test/ai-analyzer.test.js"
"test:ai-analyzer": "node test/ai-analyzer.test.js",
"demo": "node demo.js"
},
"keywords": [],
"author": "",