From 051c09d9436919b17fb6429b95a70cee2a9443ff Mon Sep 17 00:00:00 2001 From: DaKheera47 Date: Thu, 11 Dec 2025 23:56:07 +0000 Subject: [PATCH] Dockerization :) --- .dockerignore | 45 +++++++++ .env.example | 25 +++++ .gitignore | 11 +- Dockerfile | 60 +++++++++++ README.md | 127 ++++++++++++++++++++++++ docker-compose.yml | 55 ++++++++++ job-extractor/src/main.ts | 2 +- orchestrator/src/server/db/clear.ts | 6 +- orchestrator/src/server/db/index.ts | 6 +- orchestrator/src/server/db/migrate.ts | 10 +- orchestrator/src/server/index.ts | 4 +- orchestrator/src/server/services/pdf.ts | 33 +++--- orchestrator/tsconfig.server.json | 4 +- resume-generator/rxresume_automation.py | 4 +- 14 files changed, 362 insertions(+), 30 deletions(-) create mode 100644 .dockerignore create mode 100644 .env.example create mode 100644 Dockerfile create mode 100644 README.md create mode 100644 docker-compose.yml diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..676d295 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,45 @@ +# Dependencies +**/node_modules +**/.venv +**/__pycache__ + +# Build outputs +**/dist +**/*.pyc + +# Data (mounted as volume) +data/ + +# Environment files (passed via docker-compose) +.env +**/.env +**/.env.local + +# Git +.git +.gitignore + +# IDE +.idea +.vscode +*.swp +*.swo + +# OS +.DS_Store +Thumbs.db + +# Logs +*.log +npm-debug.log* + +# Test files +**/*.test.ts +**/*.spec.ts +**/test/ +**/tests/ +**/__tests__/ + +# Documentation +*.md +!README.md diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..6f6df10 --- /dev/null +++ b/.env.example @@ -0,0 +1,25 @@ +# ============================================================================= +# Job Ops - Environment Variables +# Copy this file to .env and fill in your values +# ============================================================================= + +# OpenRouter API for AI scoring and summaries +# Get your key at: https://openrouter.ai/keys +OPENROUTER_API_KEY=your_openrouter_api_key_here +MODEL=openai/gpt-4o-mini + +# RXResume credentials for PDF generation +# Create an account at: https://rxresu.me +RXRESUME_EMAIL=your_email@example.com +RXRESUME_PASSWORD=your_password_here + +# Pipeline configuration +PIPELINE_TOP_N=10 +PIPELINE_MIN_SCORE=50 + +# Optional: Notion integration for job tracking +NOTION_API_KEY= +NOTION_DATABASE_ID= + +# Optional: Webhook secret for n8n automation +WEBHOOK_SECRET= diff --git a/.gitignore b/.gitignore index 496ee2c..b3c4c83 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,10 @@ -.DS_Store \ No newline at end of file +# Environment files +.env +*.env.local + +# Data directory (bind mount in Docker) +data/ + +# OS files +.DS_Store +Thumbs.db \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..391e17b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,60 @@ +# ============================================================================= +# Job Ops - Unified Docker Image +# Contains: Orchestrator (Node.js), Job Crawler, Resume Generator (Python/Playwright) +# ============================================================================= + +FROM mcr.microsoft.com/playwright:v1.49.1-jammy + +# Set working directory +WORKDIR /app + +# Install Node.js 20.x and Python +RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \ + apt-get update && \ + apt-get install -y nodejs python3 python3-pip && \ + npm install -g pnpm + +# Install Python dependencies for resume generator +RUN pip3 install --no-cache-dir playwright + +# Copy package files first for better caching +COPY orchestrator/package*.json ./orchestrator/ +COPY job-extractor/package*.json ./job-extractor/ + +# Install Node.js dependencies +WORKDIR /app/orchestrator +RUN npm install --production=false + +WORKDIR /app/job-extractor +RUN npm install --production=false + +# Copy source code +WORKDIR /app +COPY orchestrator ./orchestrator +COPY job-extractor ./job-extractor +COPY resume-generator ./resume-generator + +# Build the orchestrator (client + server) +WORKDIR /app/orchestrator +RUN npm run build + +# Create data directories +RUN mkdir -p /app/data/pdfs + +# Expose ports +EXPOSE 3001 + +# Environment variables (can be overridden) +ENV NODE_ENV=production +ENV PORT=3001 +ENV PYTHON_PATH=/usr/bin/python3 +ENV DATA_DIR=/app/data +ENV RESUME_GEN_DIR=/app/resume-generator + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ + CMD curl -f http://localhost:3001/health || exit 1 + +# Run migrations and start the server +WORKDIR /app/orchestrator +CMD ["sh", "-c", "npm run db:migrate && npm run start"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..be9fb9a --- /dev/null +++ b/README.md @@ -0,0 +1,127 @@ +# Job Ops 🚀 + +Automated job discovery, scoring, and resume generation pipeline. + +## Features + +- **Job Crawler** - Discovers jobs from Gradcracker and other sources +- **AI Scoring** - Ranks jobs by suitability using OpenRouter API +- **Resume Generator** - Creates tailored PDFs via RXResume automation +- **Dashboard UI** - React-based interface for reviewing and applying + +## Quick Start with Docker + +### 1. Configure Environment + +```bash +# Copy the example env file +cp .env.example .env + +# Edit with your credentials +nano .env +``` + +Required environment variables: +- `OPENROUTER_API_KEY` - Get from [openrouter.ai/keys](https://openrouter.ai/keys) +- `RXRESUME_EMAIL` - Your [rxresu.me](https://rxresu.me) account email +- `RXRESUME_PASSWORD` - Your RXResume password + +### 2. Add Your Base Resume + +Place your resume JSON at `resume-generator/base.json`. +You can export this from RXResume. + +### 3. Run + +```bash +# Build and start +docker compose up -d + +# View logs +docker compose logs -f + +# Stop +docker compose down +``` + +### 4. Access + +- **Dashboard**: http://localhost:3001 +- **API**: http://localhost:3001/api +- **Health**: http://localhost:3001/health + +## Data Persistence + +All data is stored in the `./data` directory: +- `data/jobs.db` - SQLite database +- `data/pdfs/` - Generated resume PDFs + +## Development + +### Without Docker + +```bash +# Install dependencies +cd orchestrator && npm install +cd ../job-extractor && npm install + +# Set up Python environment for resume generator +cd ../resume-generator +python3 -m venv .venv +source .venv/bin/activate +pip install playwright +playwright install chromium + +# Run orchestrator (from orchestrator folder) +cd ../orchestrator +cp .env.example .env # Configure your env +npm run db:migrate +npm run dev +``` + +### Build Docker Image + +```bash +docker build -t job-ops:latest . +``` + +### Push to Docker Hub + +```bash +docker tag job-ops:latest yourusername/job-ops:latest +docker push yourusername/job-ops:latest +``` + +## API Endpoints + +| Method | Endpoint | Description | +|--------|----------|-------------| +| GET | `/api/jobs` | List all jobs | +| GET | `/api/jobs/:id` | Get job details | +| PATCH | `/api/jobs/:id` | Update job | +| POST | `/api/jobs/:id/process` | Generate resume for job | +| POST | `/api/jobs/:id/apply` | Mark as applied | +| POST | `/api/jobs/:id/reject` | Skip job | +| POST | `/api/jobs/process-discovered` | Process all discovered jobs | +| GET | `/api/pipeline/status` | Pipeline status | +| POST | `/api/pipeline/run` | Trigger pipeline | +| GET | `/api/pipeline/progress` | SSE progress stream | +| DELETE | `/api/database` | Clear all data | + +## Architecture + +``` +job-ops/ +├── orchestrator/ # Node.js backend + React frontend +│ ├── src/server/ # Express API, services, DB +│ └── src/client/ # React dashboard +├── job-extractor/ # Crawlee-based job crawler +├── resume-generator/ # Python Playwright automation +├── data/ # SQLite DB + generated PDFs +├── Dockerfile +└── docker-compose.yml +``` + +## License + +MIT diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..3b85d12 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,55 @@ +# ============================================================================= +# Job Ops - Docker Compose Configuration +# ============================================================================= + +services: + job-ops: + build: + context: . + dockerfile: Dockerfile + image: job-ops:latest + container_name: job-ops + ports: + - "3005:3001" + volumes: + # Persist database and generated PDFs + - ./data:/app/data + # Base resume JSON (read-only) + - ./resume-generator/base.json:/app/resume-generator/base.json:ro + environment: + # Server config + - NODE_ENV=production + - PORT=3001 + + # OpenRouter API for AI scoring and summaries + - OPENROUTER_API_KEY=${OPENROUTER_API_KEY} + - MODEL=${MODEL:-openai/gpt-4o-mini} + + # RXResume credentials for PDF generation + - RXRESUME_EMAIL=${RXRESUME_EMAIL} + - RXRESUME_PASSWORD=${RXRESUME_PASSWORD} + + # Pipeline configuration + - PIPELINE_TOP_N=${PIPELINE_TOP_N:-10} + - PIPELINE_MIN_SCORE=${PIPELINE_MIN_SCORE:-50} + + # Optional: Notion integration + - NOTION_API_KEY=${NOTION_API_KEY:-} + - NOTION_DATABASE_ID=${NOTION_DATABASE_ID:-} + + # Optional: Webhook secret for n8n + - WEBHOOK_SECRET=${WEBHOOK_SECRET:-} + + # Python path (uses system python in container) + - PYTHON_PATH=/usr/bin/python3 + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:3001/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 10s + +# Volumes for data persistence +volumes: + data: diff --git a/job-extractor/src/main.ts b/job-extractor/src/main.ts index f452294..7a99c7c 100644 --- a/job-extractor/src/main.ts +++ b/job-extractor/src/main.ts @@ -53,7 +53,7 @@ const crawler = new PlaywrightCrawler({ launchContext: { launcher: firefox, launchOptions: await launchOptions({ - headless: false, + headless: true, humanize: true, geoip: true, }), diff --git a/orchestrator/src/server/db/clear.ts b/orchestrator/src/server/db/clear.ts index 712dbd2..be125c6 100644 --- a/orchestrator/src/server/db/clear.ts +++ b/orchestrator/src/server/db/clear.ts @@ -7,7 +7,11 @@ import { join, dirname } from 'path'; import { fileURLToPath } from 'url'; const __dirname = dirname(fileURLToPath(import.meta.url)); -const DB_PATH = join(__dirname, '../../../data/jobs.db'); + +// Database path - can be overridden via env for Docker +const DB_PATH = process.env.DATA_DIR + ? join(process.env.DATA_DIR, 'jobs.db') + : join(__dirname, '../../../data/jobs.db'); /** * Clear all data from the database (keeps the schema intact). diff --git a/orchestrator/src/server/db/index.ts b/orchestrator/src/server/db/index.ts index 24fcab9..b80d75d 100644 --- a/orchestrator/src/server/db/index.ts +++ b/orchestrator/src/server/db/index.ts @@ -10,7 +10,11 @@ import { existsSync, mkdirSync } from 'fs'; import * as schema from './schema.js'; const __dirname = dirname(fileURLToPath(import.meta.url)); -const DB_PATH = join(__dirname, '../../../data/jobs.db'); + +// Database path - can be overridden via env for Docker +const DB_PATH = process.env.DATA_DIR + ? join(process.env.DATA_DIR, 'jobs.db') + : join(__dirname, '../../../data/jobs.db'); // Ensure data directory exists const dataDir = dirname(DB_PATH); diff --git a/orchestrator/src/server/db/migrate.ts b/orchestrator/src/server/db/migrate.ts index 5487f22..a9b47c7 100644 --- a/orchestrator/src/server/db/migrate.ts +++ b/orchestrator/src/server/db/migrate.ts @@ -8,7 +8,11 @@ import { fileURLToPath } from 'url'; import { existsSync, mkdirSync } from 'fs'; const __dirname = dirname(fileURLToPath(import.meta.url)); -const DB_PATH = join(__dirname, '../../../data/jobs.db'); + +// Database path - can be overridden via env for Docker +const DB_PATH = process.env.DATA_DIR + ? join(process.env.DATA_DIR, 'jobs.db') + : join(__dirname, '../../../data/jobs.db'); // Ensure data directory exists const dataDir = dirname(DB_PATH); @@ -45,7 +49,7 @@ const migrations = [ created_at TEXT NOT NULL DEFAULT (datetime('now')), updated_at TEXT NOT NULL DEFAULT (datetime('now')) )`, - + `CREATE TABLE IF NOT EXISTS pipeline_runs ( id TEXT PRIMARY KEY, started_at TEXT NOT NULL DEFAULT (datetime('now')), @@ -55,7 +59,7 @@ const migrations = [ jobs_processed INTEGER NOT NULL DEFAULT 0, error_message TEXT )`, - + `CREATE INDEX IF NOT EXISTS idx_jobs_status ON jobs(status)`, `CREATE INDEX IF NOT EXISTS idx_jobs_discovered_at ON jobs(discovered_at)`, `CREATE INDEX IF NOT EXISTS idx_pipeline_runs_started_at ON pipeline_runs(started_at)`, diff --git a/orchestrator/src/server/index.ts b/orchestrator/src/server/index.ts index a564c41..d2b769d 100644 --- a/orchestrator/src/server/index.ts +++ b/orchestrator/src/server/index.ts @@ -34,7 +34,9 @@ app.use((req, res, next) => { app.use('/api', apiRouter); // Serve static files for generated PDFs -const pdfDir = join(__dirname, '../../data/pdfs'); +const pdfDir = process.env.DATA_DIR + ? join(process.env.DATA_DIR, 'pdfs') + : join(__dirname, '../../data/pdfs'); app.use('/pdfs', express.static(pdfDir)); // Health check diff --git a/orchestrator/src/server/services/pdf.ts b/orchestrator/src/server/services/pdf.ts index 8683ce7..92986f4 100644 --- a/orchestrator/src/server/services/pdf.ts +++ b/orchestrator/src/server/services/pdf.ts @@ -6,12 +6,16 @@ import { spawn } from 'child_process'; import { join, dirname } from 'path'; import { fileURLToPath } from 'url'; -import { readFile, writeFile, copyFile, access, mkdir } from 'fs/promises'; +import { readFile, writeFile, mkdir, access } from 'fs/promises'; import { existsSync } from 'fs'; const __dirname = dirname(fileURLToPath(import.meta.url)); -const RESUME_GEN_DIR = join(__dirname, '../../../../resume-generator'); -const OUTPUT_DIR = join(__dirname, '../../../data/pdfs'); + +// Paths - can be overridden via env for Docker +const RESUME_GEN_DIR = process.env.RESUME_GEN_DIR || join(__dirname, '../../../../resume-generator'); +const OUTPUT_DIR = process.env.DATA_DIR + ? join(process.env.DATA_DIR, 'pdfs') + : join(__dirname, '../../../data/pdfs'); export interface PdfResult { success: boolean; @@ -55,22 +59,11 @@ export async function generatePdf( const tempResumePath = join(RESUME_GEN_DIR, `temp_resume_${jobId}.json`); await writeFile(tempResumePath, JSON.stringify(baseResume, null, 2)); - // Generate PDF using Python script + // Generate PDF using Python script - output directly to our data folder const outputFilename = `resume_${jobId}.pdf`; const outputPath = join(OUTPUT_DIR, outputFilename); - await runPythonPdfGenerator(tempResumePath, outputFilename); - - // Move generated PDF to our output directory - const pythonOutputPath = join(RESUME_GEN_DIR, 'resumes', outputFilename); - - try { - await access(pythonOutputPath); - await copyFile(pythonOutputPath, outputPath); - } catch { - // PDF might already be in the right place or script output different location - console.warn('PDF not found at expected Python output location'); - } + await runPythonPdfGenerator(tempResumePath, outputFilename, OUTPUT_DIR); // Cleanup temp file try { @@ -94,11 +87,12 @@ export async function generatePdf( */ async function runPythonPdfGenerator( jsonPath: string, - outputFilename: string + outputFilename: string, + outputDir: string ): Promise { return new Promise((resolve, reject) => { - // Use the virtual environment's Python - const pythonPath = join(RESUME_GEN_DIR, '.venv', 'bin', 'python'); + // Use the virtual environment's Python (or system python in Docker) + const pythonPath = process.env.PYTHON_PATH || join(RESUME_GEN_DIR, '.venv', 'bin', 'python'); const child = spawn(pythonPath, ['rxresume_automation.py'], { cwd: RESUME_GEN_DIR, @@ -106,6 +100,7 @@ async function runPythonPdfGenerator( ...process.env, RESUME_JSON_PATH: jsonPath, OUTPUT_FILENAME: outputFilename, + OUTPUT_DIR: outputDir, }, stdio: 'inherit', }); diff --git a/orchestrator/tsconfig.server.json b/orchestrator/tsconfig.server.json index 0f600b8..64c78ec 100644 --- a/orchestrator/tsconfig.server.json +++ b/orchestrator/tsconfig.server.json @@ -3,8 +3,8 @@ "compilerOptions": { "module": "ESNext", "moduleResolution": "bundler", - "outDir": "./dist/server", - "rootDir": "./src/server" + "outDir": "./dist", + "rootDir": "./src" }, "include": ["src/server/**/*", "src/shared/**/*"] } diff --git a/resume-generator/rxresume_automation.py b/resume-generator/rxresume_automation.py index 45993a8..23a0284 100644 --- a/resume-generator/rxresume_automation.py +++ b/resume-generator/rxresume_automation.py @@ -21,7 +21,9 @@ RESUME_JSON_PATH = ( _custom_output_filename = os.getenv("OUTPUT_FILENAME") OUTPUT_FILENAME = _custom_output_filename if _custom_output_filename else "resume.pdf" -OUTPUT_DIR = BASE_DIR / "resumes" +# Output directory - can be overridden by orchestrator +_custom_output_dir = os.getenv("OUTPUT_DIR") +OUTPUT_DIR = Path(_custom_output_dir) if _custom_output_dir else BASE_DIR / "resumes" def login(page):