diff --git a/Dockerfile b/Dockerfile index d9ba4ec..ed896fe 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,75 +1,91 @@ -# ============================================================================= -# Job Ops - Slim Docker Image -# Only includes Firefox (for Camoufox) - much smaller than full Playwright -# ============================================================================= +# syntax=docker/dockerfile:1.6 -FROM node:20-slim AS base +FROM node:20-slim AS builder -# Install system dependencies for browsers and Python -RUN apt-get update && apt-get install -y \ - python3 \ - python3-pip \ - curl \ - # Firefox dependencies - libgtk-3-0 \ - libdbus-glib-1-2 \ - libxt6 \ - libx11-xcb1 \ - libasound2 \ - && rm -rf /var/lib/apt/lists/* +ENV DEBIAN_FRONTEND=noninteractive +# Put Playwright browsers in a known cacheable location +ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright + +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3 python3-pip curl ca-certificates git \ + build-essential pkg-config \ + && rm -rf /var/lib/apt/lists/* -# Set working directory WORKDIR /app -# Install Playwright and Firefox only (plus JobSpy for Indeed/LinkedIn scraping) -RUN pip3 install --no-cache-dir --break-system-packages playwright python-jobspy && \ - npx playwright install firefox +# ---- Python deps (cached) ---- +RUN --mount=type=cache,target=/root/.cache/pip \ + pip3 install --no-cache-dir --break-system-packages playwright python-jobspy -# Copy package files first for better caching +# Install Firefox for Python Playwright (cached via PLAYWRIGHT_BROWSERS_PATH layer + mount) +RUN python3 -m playwright install firefox + +# ---- Node deps (copy lockfiles; cached) ---- COPY orchestrator/package*.json ./orchestrator/ COPY extractors/gradcracker/package*.json ./extractors/gradcracker/ COPY extractors/ukvisajobs/package*.json ./extractors/ukvisajobs/ -# Install Node.js dependencies WORKDIR /app/orchestrator -RUN npm install --production=false +RUN --mount=type=cache,target=/root/.npm \ + npm ci --no-audit --no-fund --progress=false WORKDIR /app/extractors/gradcracker -RUN npm install --production=false +RUN --mount=type=cache,target=/root/.npm \ + npm ci --no-audit --no-fund --progress=false -# Install Camoufox browser (downloads its own Firefox fork) -RUN npx camoufox fetch +# Camoufox fetch (cache npm + whatever it downloads to; if it uses HOME, this helps) +WORKDIR /app/extractors/gradcracker +RUN --mount=type=cache,target=/root/.npm \ + --mount=type=cache,target=/root/.cache \ + npx camoufox fetch WORKDIR /app/extractors/ukvisajobs -RUN npm install --production=false +RUN --mount=type=cache,target=/root/.npm \ + npm ci --no-audit --no-fund --progress=false -# Copy source code +# ---- Copy sources late (preserves dependency cache) ---- WORKDIR /app COPY orchestrator ./orchestrator COPY extractors/gradcracker ./extractors/gradcracker COPY extractors/jobspy ./extractors/jobspy COPY extractors/ukvisajobs ./extractors/ukvisajobs -# Build the orchestrator (client + server) +# Build orchestrator WORKDIR /app/orchestrator RUN npm run build -# Create data directories -RUN mkdir -p /app/data/pdfs -# Expose ports -EXPOSE 3001 - -# Environment variables (can be overridden) +FROM node:20-slim AS runtime +ENV DEBIAN_FRONTEND=noninteractive ENV NODE_ENV=production ENV PORT=3001 ENV PYTHON_PATH=/usr/bin/python3 ENV DATA_DIR=/app/data +ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright -# Health check +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3 python3-pip curl ca-certificates \ + libgtk-3-0 libdbus-glib-1-2 libxt6 libx11-xcb1 libasound2 \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# Python runtime deps +RUN --mount=type=cache,target=/root/.cache/pip \ + pip3 install --no-cache-dir --break-system-packages playwright python-jobspy + +# Copy cached browsers from builder (fast; no redownload) +COPY --from=builder /ms-playwright /ms-playwright + +# Copy built app + node_modules from builder (fast path) +COPY --from=builder /app/orchestrator /app/orchestrator +COPY --from=builder /app/extractors /app/extractors + +RUN mkdir -p /app/data/pdfs + +EXPOSE 3001 HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ - CMD curl -f http://localhost:3001/health || exit 1 + CMD curl -f http://localhost:3001/health || exit 1 -# Run migrations and start the server WORKDIR /app/orchestrator CMD ["sh", "-c", "npm run db:migrate && npm run start"]