Dockerization :)
This commit is contained in:
parent
d743aacd1a
commit
051c09d943
45
.dockerignore
Normal file
45
.dockerignore
Normal file
@ -0,0 +1,45 @@
|
||||
# Dependencies
|
||||
**/node_modules
|
||||
**/.venv
|
||||
**/__pycache__
|
||||
|
||||
# Build outputs
|
||||
**/dist
|
||||
**/*.pyc
|
||||
|
||||
# Data (mounted as volume)
|
||||
data/
|
||||
|
||||
# Environment files (passed via docker-compose)
|
||||
.env
|
||||
**/.env
|
||||
**/.env.local
|
||||
|
||||
# Git
|
||||
.git
|
||||
.gitignore
|
||||
|
||||
# IDE
|
||||
.idea
|
||||
.vscode
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
|
||||
# Test files
|
||||
**/*.test.ts
|
||||
**/*.spec.ts
|
||||
**/test/
|
||||
**/tests/
|
||||
**/__tests__/
|
||||
|
||||
# Documentation
|
||||
*.md
|
||||
!README.md
|
||||
25
.env.example
Normal file
25
.env.example
Normal file
@ -0,0 +1,25 @@
|
||||
# =============================================================================
|
||||
# Job Ops - Environment Variables
|
||||
# Copy this file to .env and fill in your values
|
||||
# =============================================================================
|
||||
|
||||
# OpenRouter API for AI scoring and summaries
|
||||
# Get your key at: https://openrouter.ai/keys
|
||||
OPENROUTER_API_KEY=your_openrouter_api_key_here
|
||||
MODEL=openai/gpt-4o-mini
|
||||
|
||||
# RXResume credentials for PDF generation
|
||||
# Create an account at: https://rxresu.me
|
||||
RXRESUME_EMAIL=your_email@example.com
|
||||
RXRESUME_PASSWORD=your_password_here
|
||||
|
||||
# Pipeline configuration
|
||||
PIPELINE_TOP_N=10
|
||||
PIPELINE_MIN_SCORE=50
|
||||
|
||||
# Optional: Notion integration for job tracking
|
||||
NOTION_API_KEY=
|
||||
NOTION_DATABASE_ID=
|
||||
|
||||
# Optional: Webhook secret for n8n automation
|
||||
WEBHOOK_SECRET=
|
||||
11
.gitignore
vendored
11
.gitignore
vendored
@ -1 +1,10 @@
|
||||
.DS_Store
|
||||
# Environment files
|
||||
.env
|
||||
*.env.local
|
||||
|
||||
# Data directory (bind mount in Docker)
|
||||
data/
|
||||
|
||||
# OS files
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
60
Dockerfile
Normal file
60
Dockerfile
Normal file
@ -0,0 +1,60 @@
|
||||
# =============================================================================
|
||||
# Job Ops - Unified Docker Image
|
||||
# Contains: Orchestrator (Node.js), Job Crawler, Resume Generator (Python/Playwright)
|
||||
# =============================================================================
|
||||
|
||||
FROM mcr.microsoft.com/playwright:v1.49.1-jammy
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Install Node.js 20.x and Python
|
||||
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
|
||||
apt-get update && \
|
||||
apt-get install -y nodejs python3 python3-pip && \
|
||||
npm install -g pnpm
|
||||
|
||||
# Install Python dependencies for resume generator
|
||||
RUN pip3 install --no-cache-dir playwright
|
||||
|
||||
# Copy package files first for better caching
|
||||
COPY orchestrator/package*.json ./orchestrator/
|
||||
COPY job-extractor/package*.json ./job-extractor/
|
||||
|
||||
# Install Node.js dependencies
|
||||
WORKDIR /app/orchestrator
|
||||
RUN npm install --production=false
|
||||
|
||||
WORKDIR /app/job-extractor
|
||||
RUN npm install --production=false
|
||||
|
||||
# Copy source code
|
||||
WORKDIR /app
|
||||
COPY orchestrator ./orchestrator
|
||||
COPY job-extractor ./job-extractor
|
||||
COPY resume-generator ./resume-generator
|
||||
|
||||
# Build the orchestrator (client + server)
|
||||
WORKDIR /app/orchestrator
|
||||
RUN npm run build
|
||||
|
||||
# Create data directories
|
||||
RUN mkdir -p /app/data/pdfs
|
||||
|
||||
# Expose ports
|
||||
EXPOSE 3001
|
||||
|
||||
# Environment variables (can be overridden)
|
||||
ENV NODE_ENV=production
|
||||
ENV PORT=3001
|
||||
ENV PYTHON_PATH=/usr/bin/python3
|
||||
ENV DATA_DIR=/app/data
|
||||
ENV RESUME_GEN_DIR=/app/resume-generator
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD curl -f http://localhost:3001/health || exit 1
|
||||
|
||||
# Run migrations and start the server
|
||||
WORKDIR /app/orchestrator
|
||||
CMD ["sh", "-c", "npm run db:migrate && npm run start"]
|
||||
127
README.md
Normal file
127
README.md
Normal file
@ -0,0 +1,127 @@
|
||||
# Job Ops 🚀
|
||||
|
||||
Automated job discovery, scoring, and resume generation pipeline.
|
||||
|
||||
## Features
|
||||
|
||||
- **Job Crawler** - Discovers jobs from Gradcracker and other sources
|
||||
- **AI Scoring** - Ranks jobs by suitability using OpenRouter API
|
||||
- **Resume Generator** - Creates tailored PDFs via RXResume automation
|
||||
- **Dashboard UI** - React-based interface for reviewing and applying
|
||||
|
||||
## Quick Start with Docker
|
||||
|
||||
### 1. Configure Environment
|
||||
|
||||
```bash
|
||||
# Copy the example env file
|
||||
cp .env.example .env
|
||||
|
||||
# Edit with your credentials
|
||||
nano .env
|
||||
```
|
||||
|
||||
Required environment variables:
|
||||
- `OPENROUTER_API_KEY` - Get from [openrouter.ai/keys](https://openrouter.ai/keys)
|
||||
- `RXRESUME_EMAIL` - Your [rxresu.me](https://rxresu.me) account email
|
||||
- `RXRESUME_PASSWORD` - Your RXResume password
|
||||
|
||||
### 2. Add Your Base Resume
|
||||
|
||||
Place your resume JSON at `resume-generator/base.json`.
|
||||
You can export this from RXResume.
|
||||
|
||||
### 3. Run
|
||||
|
||||
```bash
|
||||
# Build and start
|
||||
docker compose up -d
|
||||
|
||||
# View logs
|
||||
docker compose logs -f
|
||||
|
||||
# Stop
|
||||
docker compose down
|
||||
```
|
||||
|
||||
### 4. Access
|
||||
|
||||
- **Dashboard**: http://localhost:3001
|
||||
- **API**: http://localhost:3001/api
|
||||
- **Health**: http://localhost:3001/health
|
||||
|
||||
## Data Persistence
|
||||
|
||||
All data is stored in the `./data` directory:
|
||||
- `data/jobs.db` - SQLite database
|
||||
- `data/pdfs/` - Generated resume PDFs
|
||||
|
||||
## Development
|
||||
|
||||
### Without Docker
|
||||
|
||||
```bash
|
||||
# Install dependencies
|
||||
cd orchestrator && npm install
|
||||
cd ../job-extractor && npm install
|
||||
|
||||
# Set up Python environment for resume generator
|
||||
cd ../resume-generator
|
||||
python3 -m venv .venv
|
||||
source .venv/bin/activate
|
||||
pip install playwright
|
||||
playwright install chromium
|
||||
|
||||
# Run orchestrator (from orchestrator folder)
|
||||
cd ../orchestrator
|
||||
cp .env.example .env # Configure your env
|
||||
npm run db:migrate
|
||||
npm run dev
|
||||
```
|
||||
|
||||
### Build Docker Image
|
||||
|
||||
```bash
|
||||
docker build -t job-ops:latest .
|
||||
```
|
||||
|
||||
### Push to Docker Hub
|
||||
|
||||
```bash
|
||||
docker tag job-ops:latest yourusername/job-ops:latest
|
||||
docker push yourusername/job-ops:latest
|
||||
```
|
||||
|
||||
## API Endpoints
|
||||
|
||||
| Method | Endpoint | Description |
|
||||
|--------|----------|-------------|
|
||||
| GET | `/api/jobs` | List all jobs |
|
||||
| GET | `/api/jobs/:id` | Get job details |
|
||||
| PATCH | `/api/jobs/:id` | Update job |
|
||||
| POST | `/api/jobs/:id/process` | Generate resume for job |
|
||||
| POST | `/api/jobs/:id/apply` | Mark as applied |
|
||||
| POST | `/api/jobs/:id/reject` | Skip job |
|
||||
| POST | `/api/jobs/process-discovered` | Process all discovered jobs |
|
||||
| GET | `/api/pipeline/status` | Pipeline status |
|
||||
| POST | `/api/pipeline/run` | Trigger pipeline |
|
||||
| GET | `/api/pipeline/progress` | SSE progress stream |
|
||||
| DELETE | `/api/database` | Clear all data |
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
job-ops/
|
||||
├── orchestrator/ # Node.js backend + React frontend
|
||||
│ ├── src/server/ # Express API, services, DB
|
||||
│ └── src/client/ # React dashboard
|
||||
├── job-extractor/ # Crawlee-based job crawler
|
||||
├── resume-generator/ # Python Playwright automation
|
||||
├── data/ # SQLite DB + generated PDFs
|
||||
├── Dockerfile
|
||||
└── docker-compose.yml
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
MIT
|
||||
55
docker-compose.yml
Normal file
55
docker-compose.yml
Normal file
@ -0,0 +1,55 @@
|
||||
# =============================================================================
|
||||
# Job Ops - Docker Compose Configuration
|
||||
# =============================================================================
|
||||
|
||||
services:
|
||||
job-ops:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
image: job-ops:latest
|
||||
container_name: job-ops
|
||||
ports:
|
||||
- "3005:3001"
|
||||
volumes:
|
||||
# Persist database and generated PDFs
|
||||
- ./data:/app/data
|
||||
# Base resume JSON (read-only)
|
||||
- ./resume-generator/base.json:/app/resume-generator/base.json:ro
|
||||
environment:
|
||||
# Server config
|
||||
- NODE_ENV=production
|
||||
- PORT=3001
|
||||
|
||||
# OpenRouter API for AI scoring and summaries
|
||||
- OPENROUTER_API_KEY=${OPENROUTER_API_KEY}
|
||||
- MODEL=${MODEL:-openai/gpt-4o-mini}
|
||||
|
||||
# RXResume credentials for PDF generation
|
||||
- RXRESUME_EMAIL=${RXRESUME_EMAIL}
|
||||
- RXRESUME_PASSWORD=${RXRESUME_PASSWORD}
|
||||
|
||||
# Pipeline configuration
|
||||
- PIPELINE_TOP_N=${PIPELINE_TOP_N:-10}
|
||||
- PIPELINE_MIN_SCORE=${PIPELINE_MIN_SCORE:-50}
|
||||
|
||||
# Optional: Notion integration
|
||||
- NOTION_API_KEY=${NOTION_API_KEY:-}
|
||||
- NOTION_DATABASE_ID=${NOTION_DATABASE_ID:-}
|
||||
|
||||
# Optional: Webhook secret for n8n
|
||||
- WEBHOOK_SECRET=${WEBHOOK_SECRET:-}
|
||||
|
||||
# Python path (uses system python in container)
|
||||
- PYTHON_PATH=/usr/bin/python3
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:3001/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
# Volumes for data persistence
|
||||
volumes:
|
||||
data:
|
||||
@ -53,7 +53,7 @@ const crawler = new PlaywrightCrawler({
|
||||
launchContext: {
|
||||
launcher: firefox,
|
||||
launchOptions: await launchOptions({
|
||||
headless: false,
|
||||
headless: true,
|
||||
humanize: true,
|
||||
geoip: true,
|
||||
}),
|
||||
|
||||
@ -7,7 +7,11 @@ import { join, dirname } from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const DB_PATH = join(__dirname, '../../../data/jobs.db');
|
||||
|
||||
// Database path - can be overridden via env for Docker
|
||||
const DB_PATH = process.env.DATA_DIR
|
||||
? join(process.env.DATA_DIR, 'jobs.db')
|
||||
: join(__dirname, '../../../data/jobs.db');
|
||||
|
||||
/**
|
||||
* Clear all data from the database (keeps the schema intact).
|
||||
|
||||
@ -10,7 +10,11 @@ import { existsSync, mkdirSync } from 'fs';
|
||||
import * as schema from './schema.js';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const DB_PATH = join(__dirname, '../../../data/jobs.db');
|
||||
|
||||
// Database path - can be overridden via env for Docker
|
||||
const DB_PATH = process.env.DATA_DIR
|
||||
? join(process.env.DATA_DIR, 'jobs.db')
|
||||
: join(__dirname, '../../../data/jobs.db');
|
||||
|
||||
// Ensure data directory exists
|
||||
const dataDir = dirname(DB_PATH);
|
||||
|
||||
@ -8,7 +8,11 @@ import { fileURLToPath } from 'url';
|
||||
import { existsSync, mkdirSync } from 'fs';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const DB_PATH = join(__dirname, '../../../data/jobs.db');
|
||||
|
||||
// Database path - can be overridden via env for Docker
|
||||
const DB_PATH = process.env.DATA_DIR
|
||||
? join(process.env.DATA_DIR, 'jobs.db')
|
||||
: join(__dirname, '../../../data/jobs.db');
|
||||
|
||||
// Ensure data directory exists
|
||||
const dataDir = dirname(DB_PATH);
|
||||
@ -45,7 +49,7 @@ const migrations = [
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)`,
|
||||
|
||||
|
||||
`CREATE TABLE IF NOT EXISTS pipeline_runs (
|
||||
id TEXT PRIMARY KEY,
|
||||
started_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
@ -55,7 +59,7 @@ const migrations = [
|
||||
jobs_processed INTEGER NOT NULL DEFAULT 0,
|
||||
error_message TEXT
|
||||
)`,
|
||||
|
||||
|
||||
`CREATE INDEX IF NOT EXISTS idx_jobs_status ON jobs(status)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_jobs_discovered_at ON jobs(discovered_at)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_pipeline_runs_started_at ON pipeline_runs(started_at)`,
|
||||
|
||||
@ -34,7 +34,9 @@ app.use((req, res, next) => {
|
||||
app.use('/api', apiRouter);
|
||||
|
||||
// Serve static files for generated PDFs
|
||||
const pdfDir = join(__dirname, '../../data/pdfs');
|
||||
const pdfDir = process.env.DATA_DIR
|
||||
? join(process.env.DATA_DIR, 'pdfs')
|
||||
: join(__dirname, '../../data/pdfs');
|
||||
app.use('/pdfs', express.static(pdfDir));
|
||||
|
||||
// Health check
|
||||
|
||||
@ -6,12 +6,16 @@
|
||||
import { spawn } from 'child_process';
|
||||
import { join, dirname } from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
import { readFile, writeFile, copyFile, access, mkdir } from 'fs/promises';
|
||||
import { readFile, writeFile, mkdir, access } from 'fs/promises';
|
||||
import { existsSync } from 'fs';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const RESUME_GEN_DIR = join(__dirname, '../../../../resume-generator');
|
||||
const OUTPUT_DIR = join(__dirname, '../../../data/pdfs');
|
||||
|
||||
// Paths - can be overridden via env for Docker
|
||||
const RESUME_GEN_DIR = process.env.RESUME_GEN_DIR || join(__dirname, '../../../../resume-generator');
|
||||
const OUTPUT_DIR = process.env.DATA_DIR
|
||||
? join(process.env.DATA_DIR, 'pdfs')
|
||||
: join(__dirname, '../../../data/pdfs');
|
||||
|
||||
export interface PdfResult {
|
||||
success: boolean;
|
||||
@ -55,22 +59,11 @@ export async function generatePdf(
|
||||
const tempResumePath = join(RESUME_GEN_DIR, `temp_resume_${jobId}.json`);
|
||||
await writeFile(tempResumePath, JSON.stringify(baseResume, null, 2));
|
||||
|
||||
// Generate PDF using Python script
|
||||
// Generate PDF using Python script - output directly to our data folder
|
||||
const outputFilename = `resume_${jobId}.pdf`;
|
||||
const outputPath = join(OUTPUT_DIR, outputFilename);
|
||||
|
||||
await runPythonPdfGenerator(tempResumePath, outputFilename);
|
||||
|
||||
// Move generated PDF to our output directory
|
||||
const pythonOutputPath = join(RESUME_GEN_DIR, 'resumes', outputFilename);
|
||||
|
||||
try {
|
||||
await access(pythonOutputPath);
|
||||
await copyFile(pythonOutputPath, outputPath);
|
||||
} catch {
|
||||
// PDF might already be in the right place or script output different location
|
||||
console.warn('PDF not found at expected Python output location');
|
||||
}
|
||||
await runPythonPdfGenerator(tempResumePath, outputFilename, OUTPUT_DIR);
|
||||
|
||||
// Cleanup temp file
|
||||
try {
|
||||
@ -94,11 +87,12 @@ export async function generatePdf(
|
||||
*/
|
||||
async function runPythonPdfGenerator(
|
||||
jsonPath: string,
|
||||
outputFilename: string
|
||||
outputFilename: string,
|
||||
outputDir: string
|
||||
): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
// Use the virtual environment's Python
|
||||
const pythonPath = join(RESUME_GEN_DIR, '.venv', 'bin', 'python');
|
||||
// Use the virtual environment's Python (or system python in Docker)
|
||||
const pythonPath = process.env.PYTHON_PATH || join(RESUME_GEN_DIR, '.venv', 'bin', 'python');
|
||||
|
||||
const child = spawn(pythonPath, ['rxresume_automation.py'], {
|
||||
cwd: RESUME_GEN_DIR,
|
||||
@ -106,6 +100,7 @@ async function runPythonPdfGenerator(
|
||||
...process.env,
|
||||
RESUME_JSON_PATH: jsonPath,
|
||||
OUTPUT_FILENAME: outputFilename,
|
||||
OUTPUT_DIR: outputDir,
|
||||
},
|
||||
stdio: 'inherit',
|
||||
});
|
||||
|
||||
@ -3,8 +3,8 @@
|
||||
"compilerOptions": {
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "bundler",
|
||||
"outDir": "./dist/server",
|
||||
"rootDir": "./src/server"
|
||||
"outDir": "./dist",
|
||||
"rootDir": "./src"
|
||||
},
|
||||
"include": ["src/server/**/*", "src/shared/**/*"]
|
||||
}
|
||||
|
||||
@ -21,7 +21,9 @@ RESUME_JSON_PATH = (
|
||||
_custom_output_filename = os.getenv("OUTPUT_FILENAME")
|
||||
OUTPUT_FILENAME = _custom_output_filename if _custom_output_filename else "resume.pdf"
|
||||
|
||||
OUTPUT_DIR = BASE_DIR / "resumes"
|
||||
# Output directory - can be overridden by orchestrator
|
||||
_custom_output_dir = os.getenv("OUTPUT_DIR")
|
||||
OUTPUT_DIR = Path(_custom_output_dir) if _custom_output_dir else BASE_DIR / "resumes"
|
||||
|
||||
|
||||
def login(page):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user