Dockerization :)

This commit is contained in:
DaKheera47 2025-12-11 23:56:07 +00:00
parent d743aacd1a
commit 051c09d943
14 changed files with 362 additions and 30 deletions

45
.dockerignore Normal file
View File

@ -0,0 +1,45 @@
# Dependencies
**/node_modules
**/.venv
**/__pycache__
# Build outputs
**/dist
**/*.pyc
# Data (mounted as volume)
data/
# Environment files (passed via docker-compose)
.env
**/.env
**/.env.local
# Git
.git
.gitignore
# IDE
.idea
.vscode
*.swp
*.swo
# OS
.DS_Store
Thumbs.db
# Logs
*.log
npm-debug.log*
# Test files
**/*.test.ts
**/*.spec.ts
**/test/
**/tests/
**/__tests__/
# Documentation
*.md
!README.md

25
.env.example Normal file
View File

@ -0,0 +1,25 @@
# =============================================================================
# Job Ops - Environment Variables
# Copy this file to .env and fill in your values
# =============================================================================
# OpenRouter API for AI scoring and summaries
# Get your key at: https://openrouter.ai/keys
OPENROUTER_API_KEY=your_openrouter_api_key_here
MODEL=openai/gpt-4o-mini
# RXResume credentials for PDF generation
# Create an account at: https://rxresu.me
RXRESUME_EMAIL=your_email@example.com
RXRESUME_PASSWORD=your_password_here
# Pipeline configuration
PIPELINE_TOP_N=10
PIPELINE_MIN_SCORE=50
# Optional: Notion integration for job tracking
NOTION_API_KEY=
NOTION_DATABASE_ID=
# Optional: Webhook secret for n8n automation
WEBHOOK_SECRET=

9
.gitignore vendored
View File

@ -1 +1,10 @@
# Environment files
.env
*.env.local
# Data directory (bind mount in Docker)
data/
# OS files
.DS_Store
Thumbs.db

60
Dockerfile Normal file
View File

@ -0,0 +1,60 @@
# =============================================================================
# Job Ops - Unified Docker Image
# Contains: Orchestrator (Node.js), Job Crawler, Resume Generator (Python/Playwright)
# =============================================================================
FROM mcr.microsoft.com/playwright:v1.49.1-jammy
# Set working directory
WORKDIR /app
# Install Node.js 20.x and Python
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
apt-get update && \
apt-get install -y nodejs python3 python3-pip && \
npm install -g pnpm
# Install Python dependencies for resume generator
RUN pip3 install --no-cache-dir playwright
# Copy package files first for better caching
COPY orchestrator/package*.json ./orchestrator/
COPY job-extractor/package*.json ./job-extractor/
# Install Node.js dependencies
WORKDIR /app/orchestrator
RUN npm install --production=false
WORKDIR /app/job-extractor
RUN npm install --production=false
# Copy source code
WORKDIR /app
COPY orchestrator ./orchestrator
COPY job-extractor ./job-extractor
COPY resume-generator ./resume-generator
# Build the orchestrator (client + server)
WORKDIR /app/orchestrator
RUN npm run build
# Create data directories
RUN mkdir -p /app/data/pdfs
# Expose ports
EXPOSE 3001
# Environment variables (can be overridden)
ENV NODE_ENV=production
ENV PORT=3001
ENV PYTHON_PATH=/usr/bin/python3
ENV DATA_DIR=/app/data
ENV RESUME_GEN_DIR=/app/resume-generator
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD curl -f http://localhost:3001/health || exit 1
# Run migrations and start the server
WORKDIR /app/orchestrator
CMD ["sh", "-c", "npm run db:migrate && npm run start"]

127
README.md Normal file
View File

@ -0,0 +1,127 @@
# Job Ops 🚀
Automated job discovery, scoring, and resume generation pipeline.
## Features
- **Job Crawler** - Discovers jobs from Gradcracker and other sources
- **AI Scoring** - Ranks jobs by suitability using OpenRouter API
- **Resume Generator** - Creates tailored PDFs via RXResume automation
- **Dashboard UI** - React-based interface for reviewing and applying
## Quick Start with Docker
### 1. Configure Environment
```bash
# Copy the example env file
cp .env.example .env
# Edit with your credentials
nano .env
```
Required environment variables:
- `OPENROUTER_API_KEY` - Get from [openrouter.ai/keys](https://openrouter.ai/keys)
- `RXRESUME_EMAIL` - Your [rxresu.me](https://rxresu.me) account email
- `RXRESUME_PASSWORD` - Your RXResume password
### 2. Add Your Base Resume
Place your resume JSON at `resume-generator/base.json`.
You can export this from RXResume.
### 3. Run
```bash
# Build and start
docker compose up -d
# View logs
docker compose logs -f
# Stop
docker compose down
```
### 4. Access
- **Dashboard**: http://localhost:3001
- **API**: http://localhost:3001/api
- **Health**: http://localhost:3001/health
## Data Persistence
All data is stored in the `./data` directory:
- `data/jobs.db` - SQLite database
- `data/pdfs/` - Generated resume PDFs
## Development
### Without Docker
```bash
# Install dependencies
cd orchestrator && npm install
cd ../job-extractor && npm install
# Set up Python environment for resume generator
cd ../resume-generator
python3 -m venv .venv
source .venv/bin/activate
pip install playwright
playwright install chromium
# Run orchestrator (from orchestrator folder)
cd ../orchestrator
cp .env.example .env # Configure your env
npm run db:migrate
npm run dev
```
### Build Docker Image
```bash
docker build -t job-ops:latest .
```
### Push to Docker Hub
```bash
docker tag job-ops:latest yourusername/job-ops:latest
docker push yourusername/job-ops:latest
```
## API Endpoints
| Method | Endpoint | Description |
|--------|----------|-------------|
| GET | `/api/jobs` | List all jobs |
| GET | `/api/jobs/:id` | Get job details |
| PATCH | `/api/jobs/:id` | Update job |
| POST | `/api/jobs/:id/process` | Generate resume for job |
| POST | `/api/jobs/:id/apply` | Mark as applied |
| POST | `/api/jobs/:id/reject` | Skip job |
| POST | `/api/jobs/process-discovered` | Process all discovered jobs |
| GET | `/api/pipeline/status` | Pipeline status |
| POST | `/api/pipeline/run` | Trigger pipeline |
| GET | `/api/pipeline/progress` | SSE progress stream |
| DELETE | `/api/database` | Clear all data |
## Architecture
```
job-ops/
├── orchestrator/ # Node.js backend + React frontend
│ ├── src/server/ # Express API, services, DB
│ └── src/client/ # React dashboard
├── job-extractor/ # Crawlee-based job crawler
├── resume-generator/ # Python Playwright automation
├── data/ # SQLite DB + generated PDFs
├── Dockerfile
└── docker-compose.yml
```
## License
MIT

55
docker-compose.yml Normal file
View File

@ -0,0 +1,55 @@
# =============================================================================
# Job Ops - Docker Compose Configuration
# =============================================================================
services:
job-ops:
build:
context: .
dockerfile: Dockerfile
image: job-ops:latest
container_name: job-ops
ports:
- "3005:3001"
volumes:
# Persist database and generated PDFs
- ./data:/app/data
# Base resume JSON (read-only)
- ./resume-generator/base.json:/app/resume-generator/base.json:ro
environment:
# Server config
- NODE_ENV=production
- PORT=3001
# OpenRouter API for AI scoring and summaries
- OPENROUTER_API_KEY=${OPENROUTER_API_KEY}
- MODEL=${MODEL:-openai/gpt-4o-mini}
# RXResume credentials for PDF generation
- RXRESUME_EMAIL=${RXRESUME_EMAIL}
- RXRESUME_PASSWORD=${RXRESUME_PASSWORD}
# Pipeline configuration
- PIPELINE_TOP_N=${PIPELINE_TOP_N:-10}
- PIPELINE_MIN_SCORE=${PIPELINE_MIN_SCORE:-50}
# Optional: Notion integration
- NOTION_API_KEY=${NOTION_API_KEY:-}
- NOTION_DATABASE_ID=${NOTION_DATABASE_ID:-}
# Optional: Webhook secret for n8n
- WEBHOOK_SECRET=${WEBHOOK_SECRET:-}
# Python path (uses system python in container)
- PYTHON_PATH=/usr/bin/python3
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3001/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 10s
# Volumes for data persistence
volumes:
data:

View File

@ -53,7 +53,7 @@ const crawler = new PlaywrightCrawler({
launchContext: {
launcher: firefox,
launchOptions: await launchOptions({
headless: false,
headless: true,
humanize: true,
geoip: true,
}),

View File

@ -7,7 +7,11 @@ import { join, dirname } from 'path';
import { fileURLToPath } from 'url';
const __dirname = dirname(fileURLToPath(import.meta.url));
const DB_PATH = join(__dirname, '../../../data/jobs.db');
// Database path - can be overridden via env for Docker
const DB_PATH = process.env.DATA_DIR
? join(process.env.DATA_DIR, 'jobs.db')
: join(__dirname, '../../../data/jobs.db');
/**
* Clear all data from the database (keeps the schema intact).

View File

@ -10,7 +10,11 @@ import { existsSync, mkdirSync } from 'fs';
import * as schema from './schema.js';
const __dirname = dirname(fileURLToPath(import.meta.url));
const DB_PATH = join(__dirname, '../../../data/jobs.db');
// Database path - can be overridden via env for Docker
const DB_PATH = process.env.DATA_DIR
? join(process.env.DATA_DIR, 'jobs.db')
: join(__dirname, '../../../data/jobs.db');
// Ensure data directory exists
const dataDir = dirname(DB_PATH);

View File

@ -8,7 +8,11 @@ import { fileURLToPath } from 'url';
import { existsSync, mkdirSync } from 'fs';
const __dirname = dirname(fileURLToPath(import.meta.url));
const DB_PATH = join(__dirname, '../../../data/jobs.db');
// Database path - can be overridden via env for Docker
const DB_PATH = process.env.DATA_DIR
? join(process.env.DATA_DIR, 'jobs.db')
: join(__dirname, '../../../data/jobs.db');
// Ensure data directory exists
const dataDir = dirname(DB_PATH);

View File

@ -34,7 +34,9 @@ app.use((req, res, next) => {
app.use('/api', apiRouter);
// Serve static files for generated PDFs
const pdfDir = join(__dirname, '../../data/pdfs');
const pdfDir = process.env.DATA_DIR
? join(process.env.DATA_DIR, 'pdfs')
: join(__dirname, '../../data/pdfs');
app.use('/pdfs', express.static(pdfDir));
// Health check

View File

@ -6,12 +6,16 @@
import { spawn } from 'child_process';
import { join, dirname } from 'path';
import { fileURLToPath } from 'url';
import { readFile, writeFile, copyFile, access, mkdir } from 'fs/promises';
import { readFile, writeFile, mkdir, access } from 'fs/promises';
import { existsSync } from 'fs';
const __dirname = dirname(fileURLToPath(import.meta.url));
const RESUME_GEN_DIR = join(__dirname, '../../../../resume-generator');
const OUTPUT_DIR = join(__dirname, '../../../data/pdfs');
// Paths - can be overridden via env for Docker
const RESUME_GEN_DIR = process.env.RESUME_GEN_DIR || join(__dirname, '../../../../resume-generator');
const OUTPUT_DIR = process.env.DATA_DIR
? join(process.env.DATA_DIR, 'pdfs')
: join(__dirname, '../../../data/pdfs');
export interface PdfResult {
success: boolean;
@ -55,22 +59,11 @@ export async function generatePdf(
const tempResumePath = join(RESUME_GEN_DIR, `temp_resume_${jobId}.json`);
await writeFile(tempResumePath, JSON.stringify(baseResume, null, 2));
// Generate PDF using Python script
// Generate PDF using Python script - output directly to our data folder
const outputFilename = `resume_${jobId}.pdf`;
const outputPath = join(OUTPUT_DIR, outputFilename);
await runPythonPdfGenerator(tempResumePath, outputFilename);
// Move generated PDF to our output directory
const pythonOutputPath = join(RESUME_GEN_DIR, 'resumes', outputFilename);
try {
await access(pythonOutputPath);
await copyFile(pythonOutputPath, outputPath);
} catch {
// PDF might already be in the right place or script output different location
console.warn('PDF not found at expected Python output location');
}
await runPythonPdfGenerator(tempResumePath, outputFilename, OUTPUT_DIR);
// Cleanup temp file
try {
@ -94,11 +87,12 @@ export async function generatePdf(
*/
async function runPythonPdfGenerator(
jsonPath: string,
outputFilename: string
outputFilename: string,
outputDir: string
): Promise<void> {
return new Promise((resolve, reject) => {
// Use the virtual environment's Python
const pythonPath = join(RESUME_GEN_DIR, '.venv', 'bin', 'python');
// Use the virtual environment's Python (or system python in Docker)
const pythonPath = process.env.PYTHON_PATH || join(RESUME_GEN_DIR, '.venv', 'bin', 'python');
const child = spawn(pythonPath, ['rxresume_automation.py'], {
cwd: RESUME_GEN_DIR,
@ -106,6 +100,7 @@ async function runPythonPdfGenerator(
...process.env,
RESUME_JSON_PATH: jsonPath,
OUTPUT_FILENAME: outputFilename,
OUTPUT_DIR: outputDir,
},
stdio: 'inherit',
});

View File

@ -3,8 +3,8 @@
"compilerOptions": {
"module": "ESNext",
"moduleResolution": "bundler",
"outDir": "./dist/server",
"rootDir": "./src/server"
"outDir": "./dist",
"rootDir": "./src"
},
"include": ["src/server/**/*", "src/shared/**/*"]
}

View File

@ -21,7 +21,9 @@ RESUME_JSON_PATH = (
_custom_output_filename = os.getenv("OUTPUT_FILENAME")
OUTPUT_FILENAME = _custom_output_filename if _custom_output_filename else "resume.pdf"
OUTPUT_DIR = BASE_DIR / "resumes"
# Output directory - can be overridden by orchestrator
_custom_output_dir = os.getenv("OUTPUT_DIR")
OUTPUT_DIR = Path(_custom_output_dir) if _custom_output_dir else BASE_DIR / "resumes"
def login(page):