Jobber/Dockerfile

92 lines
2.9 KiB
Docker

# syntax=docker/dockerfile:1.6
FROM node:20-slim AS builder
ENV DEBIAN_FRONTEND=noninteractive
# Put Playwright browsers in a known cacheable location
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 python3-pip curl ca-certificates git \
build-essential pkg-config \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# ---- Python deps (cached) ----
RUN --mount=type=cache,target=/root/.cache/pip \
pip3 install --no-cache-dir --break-system-packages playwright python-jobspy
# Install Firefox for Python Playwright (cached via PLAYWRIGHT_BROWSERS_PATH layer + mount)
RUN python3 -m playwright install firefox
# ---- Node deps (copy lockfiles; cached) ----
COPY orchestrator/package*.json ./orchestrator/
COPY extractors/gradcracker/package*.json ./extractors/gradcracker/
COPY extractors/ukvisajobs/package*.json ./extractors/ukvisajobs/
WORKDIR /app/orchestrator
RUN --mount=type=cache,target=/root/.npm \
npm ci --no-audit --no-fund --progress=false
WORKDIR /app/extractors/gradcracker
RUN --mount=type=cache,target=/root/.npm \
npm ci --no-audit --no-fund --progress=false
# Camoufox fetch (cache npm + whatever it downloads to; if it uses HOME, this helps)
WORKDIR /app/extractors/gradcracker
RUN --mount=type=cache,target=/root/.npm \
--mount=type=cache,target=/root/.cache \
npx camoufox fetch
WORKDIR /app/extractors/ukvisajobs
RUN --mount=type=cache,target=/root/.npm \
npm ci --no-audit --no-fund --progress=false
# ---- Copy sources late (preserves dependency cache) ----
WORKDIR /app
COPY orchestrator ./orchestrator
COPY extractors/gradcracker ./extractors/gradcracker
COPY extractors/jobspy ./extractors/jobspy
COPY extractors/ukvisajobs ./extractors/ukvisajobs
# Build orchestrator
WORKDIR /app/orchestrator
RUN npm run build
FROM node:20-slim AS runtime
ENV DEBIAN_FRONTEND=noninteractive
ENV NODE_ENV=production
ENV PORT=3001
ENV PYTHON_PATH=/usr/bin/python3
ENV DATA_DIR=/app/data
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 python3-pip curl ca-certificates \
libgtk-3-0 libdbus-glib-1-2 libxt6 libx11-xcb1 libasound2 \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Python runtime deps
RUN --mount=type=cache,target=/root/.cache/pip \
pip3 install --no-cache-dir --break-system-packages playwright python-jobspy
# Copy cached browsers from builder (fast; no redownload)
COPY --from=builder /ms-playwright /ms-playwright
# Copy built app + node_modules from builder (fast path)
COPY --from=builder /app/orchestrator /app/orchestrator
COPY --from=builder /app/extractors /app/extractors
RUN mkdir -p /app/data/pdfs
EXPOSE 3001
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD curl -f http://localhost:3001/health || exit 1
WORKDIR /app/orchestrator
CMD ["sh", "-c", "npm run db:migrate && npm run start"]