faster dockerfile, more caching and removing multi-stage

This commit is contained in:
DaKheera47 2026-01-27 15:53:32 +00:00
parent 33ab3595ae
commit 52ab5873ac

View File

@ -1,75 +1,91 @@
# =============================================================================
# Job Ops - Slim Docker Image
# Only includes Firefox (for Camoufox) - much smaller than full Playwright
# =============================================================================
# syntax=docker/dockerfile:1.6
FROM node:20-slim AS base
FROM node:20-slim AS builder
# Install system dependencies for browsers and Python
RUN apt-get update && apt-get install -y \
python3 \
python3-pip \
curl \
# Firefox dependencies
libgtk-3-0 \
libdbus-glib-1-2 \
libxt6 \
libx11-xcb1 \
libasound2 \
&& rm -rf /var/lib/apt/lists/*
ENV DEBIAN_FRONTEND=noninteractive
# Put Playwright browsers in a known cacheable location
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 python3-pip curl ca-certificates git \
build-essential pkg-config \
&& rm -rf /var/lib/apt/lists/*
# Set working directory
WORKDIR /app
# Install Playwright and Firefox only (plus JobSpy for Indeed/LinkedIn scraping)
RUN pip3 install --no-cache-dir --break-system-packages playwright python-jobspy && \
npx playwright install firefox
# ---- Python deps (cached) ----
RUN --mount=type=cache,target=/root/.cache/pip \
pip3 install --no-cache-dir --break-system-packages playwright python-jobspy
# Copy package files first for better caching
# Install Firefox for Python Playwright (cached via PLAYWRIGHT_BROWSERS_PATH layer + mount)
RUN python3 -m playwright install firefox
# ---- Node deps (copy lockfiles; cached) ----
COPY orchestrator/package*.json ./orchestrator/
COPY extractors/gradcracker/package*.json ./extractors/gradcracker/
COPY extractors/ukvisajobs/package*.json ./extractors/ukvisajobs/
# Install Node.js dependencies
WORKDIR /app/orchestrator
RUN npm install --production=false
RUN --mount=type=cache,target=/root/.npm \
npm ci --no-audit --no-fund --progress=false
WORKDIR /app/extractors/gradcracker
RUN npm install --production=false
RUN --mount=type=cache,target=/root/.npm \
npm ci --no-audit --no-fund --progress=false
# Install Camoufox browser (downloads its own Firefox fork)
RUN npx camoufox fetch
# Camoufox fetch (cache npm + whatever it downloads to; if it uses HOME, this helps)
WORKDIR /app/extractors/gradcracker
RUN --mount=type=cache,target=/root/.npm \
--mount=type=cache,target=/root/.cache \
npx camoufox fetch
WORKDIR /app/extractors/ukvisajobs
RUN npm install --production=false
RUN --mount=type=cache,target=/root/.npm \
npm ci --no-audit --no-fund --progress=false
# Copy source code
# ---- Copy sources late (preserves dependency cache) ----
WORKDIR /app
COPY orchestrator ./orchestrator
COPY extractors/gradcracker ./extractors/gradcracker
COPY extractors/jobspy ./extractors/jobspy
COPY extractors/ukvisajobs ./extractors/ukvisajobs
# Build the orchestrator (client + server)
# Build orchestrator
WORKDIR /app/orchestrator
RUN npm run build
# Create data directories
RUN mkdir -p /app/data/pdfs
# Expose ports
EXPOSE 3001
# Environment variables (can be overridden)
FROM node:20-slim AS runtime
ENV DEBIAN_FRONTEND=noninteractive
ENV NODE_ENV=production
ENV PORT=3001
ENV PYTHON_PATH=/usr/bin/python3
ENV DATA_DIR=/app/data
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
# Health check
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 python3-pip curl ca-certificates \
libgtk-3-0 libdbus-glib-1-2 libxt6 libx11-xcb1 libasound2 \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Python runtime deps
RUN --mount=type=cache,target=/root/.cache/pip \
pip3 install --no-cache-dir --break-system-packages playwright python-jobspy
# Copy cached browsers from builder (fast; no redownload)
COPY --from=builder /ms-playwright /ms-playwright
# Copy built app + node_modules from builder (fast path)
COPY --from=builder /app/orchestrator /app/orchestrator
COPY --from=builder /app/extractors /app/extractors
RUN mkdir -p /app/data/pdfs
EXPOSE 3001
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD curl -f http://localhost:3001/health || exit 1
CMD curl -f http://localhost:3001/health || exit 1
# Run migrations and start the server
WORKDIR /app/orchestrator
CMD ["sh", "-c", "npm run db:migrate && npm run start"]