faster dockerfile, more caching and removing multi-stage
This commit is contained in:
parent
33ab3595ae
commit
52ab5873ac
94
Dockerfile
94
Dockerfile
@ -1,75 +1,91 @@
|
||||
# =============================================================================
|
||||
# Job Ops - Slim Docker Image
|
||||
# Only includes Firefox (for Camoufox) - much smaller than full Playwright
|
||||
# =============================================================================
|
||||
# syntax=docker/dockerfile:1.6
|
||||
|
||||
FROM node:20-slim AS base
|
||||
FROM node:20-slim AS builder
|
||||
|
||||
# Install system dependencies for browsers and Python
|
||||
RUN apt-get update && apt-get install -y \
|
||||
python3 \
|
||||
python3-pip \
|
||||
curl \
|
||||
# Firefox dependencies
|
||||
libgtk-3-0 \
|
||||
libdbus-glib-1-2 \
|
||||
libxt6 \
|
||||
libx11-xcb1 \
|
||||
libasound2 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
# Put Playwright browsers in a known cacheable location
|
||||
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3 python3-pip curl ca-certificates git \
|
||||
build-essential pkg-config \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Install Playwright and Firefox only (plus JobSpy for Indeed/LinkedIn scraping)
|
||||
RUN pip3 install --no-cache-dir --break-system-packages playwright python-jobspy && \
|
||||
npx playwright install firefox
|
||||
# ---- Python deps (cached) ----
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
pip3 install --no-cache-dir --break-system-packages playwright python-jobspy
|
||||
|
||||
# Copy package files first for better caching
|
||||
# Install Firefox for Python Playwright (cached via PLAYWRIGHT_BROWSERS_PATH layer + mount)
|
||||
RUN python3 -m playwright install firefox
|
||||
|
||||
# ---- Node deps (copy lockfiles; cached) ----
|
||||
COPY orchestrator/package*.json ./orchestrator/
|
||||
COPY extractors/gradcracker/package*.json ./extractors/gradcracker/
|
||||
COPY extractors/ukvisajobs/package*.json ./extractors/ukvisajobs/
|
||||
|
||||
# Install Node.js dependencies
|
||||
WORKDIR /app/orchestrator
|
||||
RUN npm install --production=false
|
||||
RUN --mount=type=cache,target=/root/.npm \
|
||||
npm ci --no-audit --no-fund --progress=false
|
||||
|
||||
WORKDIR /app/extractors/gradcracker
|
||||
RUN npm install --production=false
|
||||
RUN --mount=type=cache,target=/root/.npm \
|
||||
npm ci --no-audit --no-fund --progress=false
|
||||
|
||||
# Install Camoufox browser (downloads its own Firefox fork)
|
||||
RUN npx camoufox fetch
|
||||
# Camoufox fetch (cache npm + whatever it downloads to; if it uses HOME, this helps)
|
||||
WORKDIR /app/extractors/gradcracker
|
||||
RUN --mount=type=cache,target=/root/.npm \
|
||||
--mount=type=cache,target=/root/.cache \
|
||||
npx camoufox fetch
|
||||
|
||||
WORKDIR /app/extractors/ukvisajobs
|
||||
RUN npm install --production=false
|
||||
RUN --mount=type=cache,target=/root/.npm \
|
||||
npm ci --no-audit --no-fund --progress=false
|
||||
|
||||
# Copy source code
|
||||
# ---- Copy sources late (preserves dependency cache) ----
|
||||
WORKDIR /app
|
||||
COPY orchestrator ./orchestrator
|
||||
COPY extractors/gradcracker ./extractors/gradcracker
|
||||
COPY extractors/jobspy ./extractors/jobspy
|
||||
COPY extractors/ukvisajobs ./extractors/ukvisajobs
|
||||
|
||||
# Build the orchestrator (client + server)
|
||||
# Build orchestrator
|
||||
WORKDIR /app/orchestrator
|
||||
RUN npm run build
|
||||
|
||||
# Create data directories
|
||||
RUN mkdir -p /app/data/pdfs
|
||||
|
||||
# Expose ports
|
||||
EXPOSE 3001
|
||||
|
||||
# Environment variables (can be overridden)
|
||||
FROM node:20-slim AS runtime
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV NODE_ENV=production
|
||||
ENV PORT=3001
|
||||
ENV PYTHON_PATH=/usr/bin/python3
|
||||
ENV DATA_DIR=/app/data
|
||||
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
|
||||
|
||||
# Health check
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3 python3-pip curl ca-certificates \
|
||||
libgtk-3-0 libdbus-glib-1-2 libxt6 libx11-xcb1 libasound2 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Python runtime deps
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
pip3 install --no-cache-dir --break-system-packages playwright python-jobspy
|
||||
|
||||
# Copy cached browsers from builder (fast; no redownload)
|
||||
COPY --from=builder /ms-playwright /ms-playwright
|
||||
|
||||
# Copy built app + node_modules from builder (fast path)
|
||||
COPY --from=builder /app/orchestrator /app/orchestrator
|
||||
COPY --from=builder /app/extractors /app/extractors
|
||||
|
||||
RUN mkdir -p /app/data/pdfs
|
||||
|
||||
EXPOSE 3001
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD curl -f http://localhost:3001/health || exit 1
|
||||
CMD curl -f http://localhost:3001/health || exit 1
|
||||
|
||||
# Run migrations and start the server
|
||||
WORKDIR /app/orchestrator
|
||||
CMD ["sh", "-c", "npm run db:migrate && npm run start"]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user