Jobber/Dockerfile
ilia 67508d56ea
Some checks failed
CI / Linting (Biome) (push) Failing after 35s
CI / Tests (push) Successful in 5m28s
CI / Type Check (adzuna-extractor) (push) Successful in 1m5s
CI / Type Check (gradcracker-extractor) (push) Successful in 1m9s
CI / Type Check (hiringcafe-extractor) (push) Successful in 1m4s
CI / Type Check (orchestrator) (push) Successful in 1m21s
CI / Type Check (startupjobs-extractor) (push) Successful in 1m4s
CI / Type Check (ukvisajobs-extractor) (push) Successful in 1m5s
CI / Documentation (push) Successful in 1m55s
fix(docker): copy full extractors tree into image for runtime manifests
The Dockerfile only copied a fixed list of extractor dirs; new sources
were listed in shared but their manifest.ts files were absent from the
container, so discovery logged missingManifest in production.

Copy extractors/ once before npm install in builder and production, and
skip redundant per-extractor COPY lines. Add extractors/*/storage/ to
.dockerignore to avoid baking local cache into the build context.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-12 20:36:27 -04:00

131 lines
4.4 KiB
Docker

# syntax=docker/dockerfile:1.6
# ============================================================================
# BUILD STAGE
# ============================================================================
FROM node:22-slim AS builder
ENV DEBIAN_FRONTEND=noninteractive
ENV NODE_ENV=production
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
# Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
python3 python3-minimal libpython3.11-minimal \
python3-pip \
build-essential pkg-config \
libgtk-3-0 libgtk-3-common \
libdbus-glib-1-2 libxt6 libx11-xcb1 libasound2 \
curl && \
rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
WORKDIR /app
# Install Python dependencies with pip cache
RUN --mount=type=cache,target=/root/.cache/pip \
pip3 install --no-cache-dir --break-system-packages playwright python-jobspy
# Install Firefox for Python Playwright with cache
RUN --mount=type=cache,target=/root/.cache/pip \
python3 -m playwright install firefox
# Copy package files for dependency installation
COPY package*.json ./
COPY docs-site/package*.json ./docs-site/
COPY shared/package*.json ./shared/
COPY orchestrator/package*.json ./orchestrator/
# All npm workspaces under extractors/* (manifests + package.json per extractor)
COPY extractors ./extractors
# Install Node dependencies with npm cache (dev deps needed for build)
RUN --mount=type=cache,target=/root/.npm \
npm install --workspaces --include-workspace-root --include=dev \
--no-audit --no-fund --progress=false
# Fetch Camoufox binaries - do this before copying source code to cache the download
# Even if source changes, this layer remains cached.
RUN npx camoufox-js fetch
# Copy source code
WORKDIR /app
COPY shared ./shared
COPY docs-site ./docs-site
COPY orchestrator ./orchestrator
COPY visa-sponsor-providers ./visa-sponsor-providers
# extractors/ already copied before npm install (full tree for manifests at runtime)
# Build documentation site bundle
WORKDIR /app/docs-site
RUN npm run build
# Build client bundle (Vite inlines VITE_* at compile time — not read from runtime .env)
# Default: skip Reactive Resume steps in onboarding; configure PDFs via Settings or env on the server.
ARG VITE_SKIP_RXRESUME_ONBOARDING=true
ENV VITE_SKIP_RXRESUME_ONBOARDING=${VITE_SKIP_RXRESUME_ONBOARDING}
WORKDIR /app/orchestrator
RUN npm run build:client
# ============================================================================
# PRODUCTION STAGE
# ============================================================================
FROM node:22-slim AS production
ENV DEBIAN_FRONTEND=noninteractive
ENV NODE_ENV=production
ENV PORT=3001
ENV PYTHON_PATH=/usr/bin/python3
ENV DATA_DIR=/app/data
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
# Install only runtime dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
python3 python3-minimal libpython3.11-minimal \
python3-pip \
libgtk-3-0 libgtk-3-common \
libdbus-glib-1-2 libxt6 libx11-xcb1 libasound2 \
curl && \
rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
WORKDIR /app
# Copy Python dependencies from builder
COPY --from=builder /usr/local/lib/python3.11/dist-packages /usr/local/lib/python3.11/dist-packages
COPY --from=builder /ms-playwright /ms-playwright
# Copy package files
COPY package*.json ./
COPY docs-site/package*.json ./docs-site/
COPY shared/package*.json ./shared/
COPY orchestrator/package*.json ./orchestrator/
COPY extractors ./extractors
# Install production Node dependencies only
RUN --mount=type=cache,target=/root/.npm \
npm install --workspaces --include-workspace-root --omit=dev \
--no-audit --no-fund --progress=false
# Copy built assets and source code from builder
COPY --from=builder /app/orchestrator/dist ./orchestrator/dist
COPY --from=builder /app/docs-site/build ./orchestrator/dist/docs
COPY shared ./shared
COPY orchestrator ./orchestrator
COPY visa-sponsor-providers ./visa-sponsor-providers
# extractors/ already copied before npm install
# Reuse Camoufox binaries from builder instead of fetching again
COPY --from=builder /root/.cache/camoufox /root/.cache/camoufox
WORKDIR /app
# Create data directory
RUN mkdir -p /app/data/pdfs
EXPOSE 3001
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD curl -f http://localhost:3001/health || exit 1
WORKDIR /app/orchestrator
CMD ["sh", "-c", "npx tsx src/server/db/migrate.ts && npm run start"]