#!/usr/bin/env bash # Run Jobber pipeline, wait until it finishes, send summary + job links to Telegram. # Secrets: copy scripts/jobber-cron.env.example to /root/.jobber-cron.env (chmod 600). set -euo pipefail ENV_FILE="${JOBBER_CRON_ENV:-/root/.jobber-cron.env}" if [[ ! -f "$ENV_FILE" ]]; then echo "Missing env file: $ENV_FILE (set JOBBER_CRON_ENV or create the default path)" >&2 exit 1 fi # shellcheck source=/dev/null source "$ENV_FILE" : "${TELEGRAM_BOT_TOKEN:?Set TELEGRAM_BOT_TOKEN in $ENV_FILE}" : "${TELEGRAM_CHAT_ID:?Set TELEGRAM_CHAT_ID in $ENV_FILE}" BASE="${JOBOPS_URL:-http://127.0.0.1:3005}" MAX_JOBS="${JOB_TELEGRAM_MAX_JOBS:-25}" AUTH=() if [[ -n "${BASIC_AUTH_USER:-}" && -n "${BASIC_AUTH_PASSWORD:-}" ]]; then AUTH=(-u "${BASIC_AUTH_USER}:${BASIC_AUTH_PASSWORD}") fi tg_html_escape() { printf '%s' "$1" | sed -e 's/&/\&/g' -e 's//\>/g' } tg_href_escape() { printf '%s' "$1" | sed -e 's/&/\&/g' -e 's/"/\"/g' } send_tg_html() { local msg="$1" curl -sS -X POST "https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}/sendMessage" \ -H "Content-Type: application/json" \ -d "$(jq -n \ --arg c "$TELEGRAM_CHAT_ID" \ --arg t "$msg" \ '{chat_id: $c, text: $t, parse_mode: "HTML", disable_web_page_preview: true}')" >/dev/null } fetch_status() { curl -sS --compressed "${AUTH[@]}" -H "Accept: application/json" \ "${BASE}/api/pipeline/status" } apply_cron_settings() { local patch='{}' if [[ -n "${JOBBER_CRON_SEARCH_CITIES:-}" ]]; then patch="$(echo "$patch" | jq --arg v "$JOBBER_CRON_SEARCH_CITIES" '. + {searchCities: $v}')" fi if [[ -n "${JOBBER_CRON_JOBSPY_COUNTRY:-}" ]]; then patch="$(echo "$patch" | jq --arg v "$JOBBER_CRON_JOBSPY_COUNTRY" '. + {jobspyCountryIndeed: $v}')" fi if [[ -n "${JOBBER_CRON_WORKPLACE_TYPES:-}" ]]; then patch="$(echo "$patch" | jq --arg v "$JOBBER_CRON_WORKPLACE_TYPES" \ '. + {workplaceTypes: ($v | split(",") | map(gsub("^\\s+|\\s+$";"")) | map(select(. != "")))}')" fi if [[ -n "${JOBBER_CRON_SEARCH_TERMS:-}" ]]; then patch="$(echo "$patch" | jq --arg v "$JOBBER_CRON_SEARCH_TERMS" \ '. + {searchTerms: ($v | split("|") | map(gsub("^\\s+|\\s+$";"")) | map(select(. != "")))}')" fi if [[ "$patch" == "{}" ]]; then return 0 fi local resp resp="$(curl -sS --compressed "${AUTH[@]}" -X PATCH "${BASE}/api/settings" \ -H "Accept: application/json" -H "Content-Type: application/json" \ -d "$patch")" if ! echo "$resp" | jq -e '.ok == true' >/dev/null 2>&1; then send_tg_html "Jobber: PATCH /api/settings failed before cron run: $(tg_html_escape "$(echo "$resp" | jq -c . 2>/dev/null || echo "$resp")")" exit 1 fi } fetch_jobs_list() { curl -sS --compressed "${AUTH[@]}" -H "Accept: application/json" \ "${BASE}/api/jobs?view=list" } fetch_jobs_revision() { curl -sS --compressed "${AUTH[@]}" -H "Accept: application/json" \ "${BASE}/api/jobs/revision" } # After a run, the jobs list can briefly lag; also catches flaky proxies. fetch_jobs_list_when_ready() { local expected_discovered="$1" local resp="" local n=0 local attempt=0 while [[ $attempt -lt 25 ]]; do resp="$(fetch_jobs_list)" if echo "$resp" | jq -e '.ok == true' >/dev/null 2>&1; then n="$(echo "$resp" | jq -r '((.data // {}) | .jobs // []) | length')" if [[ "$expected_discovered" -eq 0 ]] || [[ "$n" -gt 0 ]]; then echo "$resp" return 0 fi fi attempt=$((attempt + 1)) sleep 2 done echo "$resp" } build_job_lines_html() { local jobs_json="$1" local started="$2" local completed="$3" local max_n="$4" # Pipeline run times are ISO-8601 (…T…Z). Jobs often use SQLite datetime('now'): "YYYY-MM-DD HH:MM:SS". # Raw string compare treats space before the clock as sorting before "T", so every SQLite-style # discoveredAt incorrectly falls *before* the run window. Normalize for comparison only. echo "$jobs_json" | jq -c --arg s "$started" --arg e "$completed" --argjson max "$max_n" ' def pickurl: if (.jobUrl // "") != "" then .jobUrl elif (.applicationLink // "") != "" then .applicationLink else "" end; def normalizeTs: if . == null or . == "" then "" elif test("[Tt]") then . else sub(" "; "T") end; def pickrows($all): if ($s == "" or $s == null) then { rows: ($all | sort_by(.discoveredAt | normalizeTs) | reverse), usedFallback: true } else ($all | map(select( ($e != "" and ((.discoveredAt | normalizeTs) >= ($s | normalizeTs)) and ((.discoveredAt | normalizeTs) <= ($e | normalizeTs))) ))) as $win | if ($win | length) > 0 then { rows: $win, usedFallback: false } else ($all | map(select(((.discoveredAt | normalizeTs) >= ($s | normalizeTs))))) as $from | if ($from | length) > 0 then { rows: $from, usedFallback: false } else { rows: ($all | sort_by(.discoveredAt | normalizeTs) | reverse), usedFallback: true } end end end; (((.data // {}) | .jobs) // []) as $all | if ($all | length) == 0 then {total: 0, lines: [], usedFallback: false} else pickrows($all) as $picked | ($picked.rows | sort_by(.discoveredAt | normalizeTs) | reverse) as $sorted | ($sorted | length) as $total | ($sorted | .[0:max]) as $slice | { total: $total, usedFallback: $picked.usedFallback, lines: [ $slice[] | { title: (.title // "Untitled"), url: pickurl, employer: (.employer // "") } ] } end ' } append_lines_from_json() { local sel="$1" local -n _out="$2" local item line title url emp while IFS= read -r item; do [[ -z "$item" || "$item" == "null" ]] && continue title="$(echo "$item" | jq -r '.title // "Untitled"')" url="$(echo "$item" | jq -r '.url // ""')" emp="$(echo "$item" | jq -r '.employer // ""')" if [[ -n "$url" && "$url" != "null" ]]; then line="$(tg_html_escape "$emp") — $(tg_html_escape "$title")" else line="$(tg_html_escape "$emp") — $(tg_html_escape "$title") (no URL)" fi _out+=$'\n'"${line}" done < <(echo "$sel" | jq -c '.lines[]? // empty') } body="$(fetch_status)" if ! echo "$body" | jq -e '.ok == true' >/dev/null 2>&1; then send_tg_html "Jobber: /api/pipeline/status failed (before run). Check container." exit 1 fi if echo "$body" | jq -e '.data.isRunning == true' >/dev/null 2>&1; then send_tg_html "Jobber: pipeline already running; skipping scheduled run." exit 0 fi apply_cron_settings # Optional: comma-separated sources (see JOBBER_PIPELINE_SOURCES in jobber-cron.env.example). # If unset, POST body is {} and the server uses its default source list. run_body='{}' if [[ -n "${JOBBER_PIPELINE_SOURCES:-}" ]]; then run_body="$(jq -n --arg s "$JOBBER_PIPELINE_SOURCES" \ '$s | split(",") | map(gsub("^\\s+|\\s+$";"")) | map(select(. != "")) | {sources: .}')" fi resp="$(curl -sS --compressed "${AUTH[@]}" -X POST "${BASE}/api/pipeline/run" \ -H "Accept: application/json" -H "Content-Type: application/json" -d "$run_body")" if ! echo "$resp" | jq -e '.ok == true' >/dev/null 2>&1; then _fail_json="$(echo "$resp" | jq -c . 2>/dev/null || echo "$resp")" send_tg_html "Jobber: POST /api/pipeline/run failed: $(tg_html_escape "$_fail_json")" exit 1 fi was_running=0 for _ in $(seq 1 720); do sleep 30 body="$(fetch_status)" if ! echo "$body" | jq -e '.ok == true' >/dev/null 2>&1; then send_tg_html "Jobber: status check failed mid-run." exit 1 fi running="$(echo "$body" | jq -r '.data.isRunning')" if [[ "$running" == "true" ]]; then was_running=1 elif [[ "$was_running" -eq 1 ]]; then lr="$(echo "$body" | jq '.data.lastRun')" st="$(echo "$lr" | jq -r '.status // "unknown"')" disc="$(echo "$lr" | jq -r '.jobsDiscovered // 0')" proc="$(echo "$lr" | jq -r '.jobsProcessed // 0')" err="$(echo "$lr" | jq -r '.errorMessage // empty')" started="$(echo "$lr" | jq -r '.startedAt // ""')" completed="$(echo "$lr" | jq -r '.completedAt // ""')" msg="Jobber pipeline: $(tg_html_escape "$st")" msg+=$'\n'"Discovered: ${disc}, processed: ${proc}." [[ -n "$err" ]] && msg+=$'\n'"Error: $(tg_html_escape "$err")" jobs_resp="$(fetch_jobs_list_when_ready "$disc")" if echo "$jobs_resp" | jq -e '.ok == true' >/dev/null 2>&1; then list_n="$(echo "$jobs_resp" | jq -r '((.data // {}) | .jobs // []) | length')" if ! sel="$(build_job_lines_html "$jobs_resp" "$started" "$completed" "$MAX_JOBS")"; then sel='{"total":0,"lines":[],"usedFallback":false,"jqError":true}' fi total="$(echo "$sel" | jq -r '.total // 0')" shown="$(echo "$sel" | jq -r '.lines | length')" used_fb="$(echo "$sel" | jq -r '.usedFallback // false')" jq_err="$(echo "$sel" | jq -r '.jqError // false')" if [[ "$total" -gt 0 ]]; then if [[ "$used_fb" == "true" ]]; then msg+=$'\n\n'"Recent jobs (showing ${shown} of ${total}; time window did not match — links may include older discoveries):" else msg+=$'\n\n'"Jobs in this run (showing ${shown} of ${total}):" fi append_lines_from_json "$sel" msg rest=$((total - shown)) if [[ "$rest" -gt 0 ]]; then msg+=$'\n\n'"…and ${rest} more not shown." fi else rev_json="$(fetch_jobs_revision)" rev_ok="$(echo "$rev_json" | jq -r 'if .ok == true then "1" else "0" end')" rev_total="-1" if [[ "$rev_ok" == "1" ]]; then rev_total="$(echo "$rev_json" | jq -r '(.data.total // 0)')" fi msg+=$'\n\n'"No job lines to show (list payload: ${list_n} rows)." if [[ "$jq_err" == "true" ]]; then msg+=" JSON/jq error while filtering." else msg+="" fi if [[ "$rev_ok" == "1" ]]; then msg+=$'\n'"GET /api/jobs/revision reports ${rev_total} jobs in DB." if [[ "$rev_total" -gt 0 && "$list_n" -eq 0 ]]; then msg+=$'\n'"List response empty but DB has jobs — check reverse-proxy body limits, or multiple instances with different data dirs." elif [[ "$rev_total" -eq 0 && "$disc" -gt 0 ]]; then msg+=$'\n'"Pipeline run reported ${disc} discovered but DB job count is 0 — wrong JOBOPS_URL (different server), or DB reset since the run." fi else msg+=$'\n'"Could not read /api/jobs/revision for diagnostics." fi msg+=$'\n'"Open the app: $(tg_html_escape "${BASE}")" fi else msg+=$'\n\n'"Could not load GET /api/jobs for links." fi send_tg_html "$msg" exit 0 fi done send_tg_html "Jobber: timed out waiting for pipeline (6h). Check server." exit 1