Jobber/scripts/jobber-pipeline-telegram.sh
ilia 09ab32e1e8 chore: default pipeline includes Glassdoor; document DB copy and cron sources
- Add glassdoor to DEFAULT_CONFIG so POST /api/pipeline/run with {} runs JobSpy trio.
- jobber-pipeline-telegram.sh: optional JOBBER_PIPELINE_SOURCES for explicit source list.
- Deploy doc: WAL checkpoint + rsync jobs.db to VM for profiles/settings/jobs.

Made-with: Cursor
2026-04-05 19:49:01 -04:00

264 lines
9.6 KiB
Bash
Executable File

#!/usr/bin/env bash
# Run Jobber pipeline, wait until it finishes, send summary + job links to Telegram.
# Secrets: copy scripts/jobber-cron.env.example to /root/.jobber-cron.env (chmod 600).
set -euo pipefail
ENV_FILE="${JOBBER_CRON_ENV:-/root/.jobber-cron.env}"
if [[ ! -f "$ENV_FILE" ]]; then
echo "Missing env file: $ENV_FILE (set JOBBER_CRON_ENV or create the default path)" >&2
exit 1
fi
# shellcheck source=/dev/null
source "$ENV_FILE"
: "${TELEGRAM_BOT_TOKEN:?Set TELEGRAM_BOT_TOKEN in $ENV_FILE}"
: "${TELEGRAM_CHAT_ID:?Set TELEGRAM_CHAT_ID in $ENV_FILE}"
BASE="${JOBOPS_URL:-http://127.0.0.1:3005}"
MAX_JOBS="${JOB_TELEGRAM_MAX_JOBS:-25}"
AUTH=()
if [[ -n "${BASIC_AUTH_USER:-}" && -n "${BASIC_AUTH_PASSWORD:-}" ]]; then
AUTH=(-u "${BASIC_AUTH_USER}:${BASIC_AUTH_PASSWORD}")
fi
tg_html_escape() {
printf '%s' "$1" | sed -e 's/&/\&amp;/g' -e 's/</\&lt;/g' -e 's/>/\&gt;/g'
}
tg_href_escape() {
printf '%s' "$1" | sed -e 's/&/\&amp;/g' -e 's/"/\&quot;/g'
}
send_tg_html() {
local msg="$1"
curl -sS -X POST "https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}/sendMessage" \
-H "Content-Type: application/json" \
-d "$(jq -n \
--arg c "$TELEGRAM_CHAT_ID" \
--arg t "$msg" \
'{chat_id: $c, text: $t, parse_mode: "HTML", disable_web_page_preview: true}')" >/dev/null
}
fetch_status() {
curl -sS --compressed "${AUTH[@]}" -H "Accept: application/json" \
"${BASE}/api/pipeline/status"
}
fetch_jobs_list() {
curl -sS --compressed "${AUTH[@]}" -H "Accept: application/json" \
"${BASE}/api/jobs?view=list"
}
fetch_jobs_revision() {
curl -sS --compressed "${AUTH[@]}" -H "Accept: application/json" \
"${BASE}/api/jobs/revision"
}
# After a run, the jobs list can briefly lag; also catches flaky proxies.
fetch_jobs_list_when_ready() {
local expected_discovered="$1"
local resp=""
local n=0
local attempt=0
while [[ $attempt -lt 25 ]]; do
resp="$(fetch_jobs_list)"
if echo "$resp" | jq -e '.ok == true' >/dev/null 2>&1; then
n="$(echo "$resp" | jq -r '((.data // {}) | .jobs // []) | length')"
if [[ "$expected_discovered" -eq 0 ]] || [[ "$n" -gt 0 ]]; then
echo "$resp"
return 0
fi
fi
attempt=$((attempt + 1))
sleep 2
done
echo "$resp"
}
build_job_lines_html() {
local jobs_json="$1"
local started="$2"
local completed="$3"
local max_n="$4"
# Pipeline run times are ISO-8601 (…T…Z). Jobs often use SQLite datetime('now'): "YYYY-MM-DD HH:MM:SS".
# Raw string compare treats space before the clock as sorting before "T", so every SQLite-style
# discoveredAt incorrectly falls *before* the run window. Normalize for comparison only.
echo "$jobs_json" | jq -c --arg s "$started" --arg e "$completed" --argjson max "$max_n" '
def pickurl:
if (.jobUrl // "") != "" then .jobUrl
elif (.applicationLink // "") != "" then .applicationLink
else "" end;
def normalizeTs:
if . == null or . == "" then ""
elif test("[Tt]") then .
else sub(" "; "T")
end;
def pickrows($all):
if ($s == "" or $s == null) then
{ rows: ($all | sort_by(.discoveredAt | normalizeTs) | reverse), usedFallback: true }
else
($all
| map(select(
($e != "" and ((.discoveredAt | normalizeTs) >= ($s | normalizeTs)) and ((.discoveredAt | normalizeTs) <= ($e | normalizeTs)))
))) as $win |
if ($win | length) > 0 then { rows: $win, usedFallback: false }
else
($all | map(select(((.discoveredAt | normalizeTs) >= ($s | normalizeTs))))) as $from |
if ($from | length) > 0 then { rows: $from, usedFallback: false }
else
{ rows: ($all | sort_by(.discoveredAt | normalizeTs) | reverse), usedFallback: true }
end
end
end;
(((.data // {}) | .jobs) // []) as $all |
if ($all | length) == 0 then
{total: 0, lines: [], usedFallback: false}
else
pickrows($all) as $picked |
($picked.rows | sort_by(.discoveredAt | normalizeTs) | reverse) as $sorted |
($sorted | length) as $total |
($sorted | .[0:max]) as $slice |
{
total: $total,
usedFallback: $picked.usedFallback,
lines: [
$slice[] |
{
title: (.title // "Untitled"),
url: pickurl,
employer: (.employer // "")
}
]
}
end
'
}
append_lines_from_json() {
local sel="$1"
local -n _out="$2"
local item line title url emp
while IFS= read -r item; do
[[ -z "$item" || "$item" == "null" ]] && continue
title="$(echo "$item" | jq -r '.title // "Untitled"')"
url="$(echo "$item" | jq -r '.url // ""')"
emp="$(echo "$item" | jq -r '.employer // ""')"
if [[ -n "$url" && "$url" != "null" ]]; then
line="$(tg_html_escape "$emp") — <a href=\"$(tg_href_escape "$url")\">$(tg_html_escape "$title")</a>"
else
line="$(tg_html_escape "$emp")$(tg_html_escape "$title") <i>(no URL)</i>"
fi
_out+=$'\n'"${line}"
done < <(echo "$sel" | jq -c '.lines[]? // empty')
}
body="$(fetch_status)"
if ! echo "$body" | jq -e '.ok == true' >/dev/null 2>&1; then
send_tg_html "Jobber: /api/pipeline/status failed (before run). Check container."
exit 1
fi
if echo "$body" | jq -e '.data.isRunning == true' >/dev/null 2>&1; then
send_tg_html "Jobber: pipeline already running; skipping scheduled run."
exit 0
fi
# Optional: comma-separated sources (see JOBBER_PIPELINE_SOURCES in jobber-cron.env.example).
# If unset, POST body is {} and the server uses its default source list.
run_body='{}'
if [[ -n "${JOBBER_PIPELINE_SOURCES:-}" ]]; then
run_body="$(jq -n --arg s "$JOBBER_PIPELINE_SOURCES" \
'$s | split(",") | map(gsub("^\\s+|\\s+$";"")) | map(select(. != "")) | {sources: .}')"
fi
resp="$(curl -sS --compressed "${AUTH[@]}" -X POST "${BASE}/api/pipeline/run" \
-H "Accept: application/json" -H "Content-Type: application/json" -d "$run_body")"
if ! echo "$resp" | jq -e '.ok == true' >/dev/null 2>&1; then
_fail_json="$(echo "$resp" | jq -c . 2>/dev/null || echo "$resp")"
send_tg_html "Jobber: POST /api/pipeline/run failed: $(tg_html_escape "$_fail_json")"
exit 1
fi
was_running=0
for _ in $(seq 1 720); do
sleep 30
body="$(fetch_status)"
if ! echo "$body" | jq -e '.ok == true' >/dev/null 2>&1; then
send_tg_html "Jobber: status check failed mid-run."
exit 1
fi
running="$(echo "$body" | jq -r '.data.isRunning')"
if [[ "$running" == "true" ]]; then
was_running=1
elif [[ "$was_running" -eq 1 ]]; then
lr="$(echo "$body" | jq '.data.lastRun')"
st="$(echo "$lr" | jq -r '.status // "unknown"')"
disc="$(echo "$lr" | jq -r '.jobsDiscovered // 0')"
proc="$(echo "$lr" | jq -r '.jobsProcessed // 0')"
err="$(echo "$lr" | jq -r '.errorMessage // empty')"
started="$(echo "$lr" | jq -r '.startedAt // ""')"
completed="$(echo "$lr" | jq -r '.completedAt // ""')"
msg="<b>Jobber</b> pipeline: <b>$(tg_html_escape "$st")</b>"
msg+=$'\n'"Discovered: ${disc}, processed: ${proc}."
[[ -n "$err" ]] && msg+=$'\n'"<b>Error:</b> $(tg_html_escape "$err")"
jobs_resp="$(fetch_jobs_list_when_ready "$disc")"
if echo "$jobs_resp" | jq -e '.ok == true' >/dev/null 2>&1; then
list_n="$(echo "$jobs_resp" | jq -r '((.data // {}) | .jobs // []) | length')"
if ! sel="$(build_job_lines_html "$jobs_resp" "$started" "$completed" "$MAX_JOBS")"; then
sel='{"total":0,"lines":[],"usedFallback":false,"jqError":true}'
fi
total="$(echo "$sel" | jq -r '.total // 0')"
shown="$(echo "$sel" | jq -r '.lines | length')"
used_fb="$(echo "$sel" | jq -r '.usedFallback // false')"
jq_err="$(echo "$sel" | jq -r '.jqError // false')"
if [[ "$total" -gt 0 ]]; then
if [[ "$used_fb" == "true" ]]; then
msg+=$'\n\n'"<b>Recent jobs</b> (showing ${shown} of ${total}; time window did not match — links may include older discoveries):"
else
msg+=$'\n\n'"<b>Jobs in this run</b> (showing ${shown} of ${total}):"
fi
append_lines_from_json "$sel" msg
rest=$((total - shown))
if [[ "$rest" -gt 0 ]]; then
msg+=$'\n\n'"<i>…and ${rest} more not shown.</i>"
fi
else
rev_json="$(fetch_jobs_revision)"
rev_ok="$(echo "$rev_json" | jq -r 'if .ok == true then "1" else "0" end')"
rev_total="-1"
if [[ "$rev_ok" == "1" ]]; then
rev_total="$(echo "$rev_json" | jq -r '(.data.total // 0)')"
fi
msg+=$'\n\n'"<i>No job lines to show (list payload: ${list_n} rows)."
if [[ "$jq_err" == "true" ]]; then
msg+=" JSON/jq error while filtering.</i>"
else
msg+="</i>"
fi
if [[ "$rev_ok" == "1" ]]; then
msg+=$'\n'"<i>GET /api/jobs/revision reports <b>${rev_total}</b> jobs in DB.</i>"
if [[ "$rev_total" -gt 0 && "$list_n" -eq 0 ]]; then
msg+=$'\n'"<i>List response empty but DB has jobs — check reverse-proxy body limits, or multiple instances with different data dirs.</i>"
elif [[ "$rev_total" -eq 0 && "$disc" -gt 0 ]]; then
msg+=$'\n'"<i>Pipeline run reported ${disc} discovered but DB job count is 0 — wrong <code>JOBOPS_URL</code> (different server), or DB reset since the run.</i>"
fi
else
msg+=$'\n'"<i>Could not read /api/jobs/revision for diagnostics.</i>"
fi
msg+=$'\n'"<i>Open the app: $(tg_html_escape "${BASE}")</i>"
fi
else
msg+=$'\n\n'"<i>Could not load GET /api/jobs for links.</i>"
fi
send_tg_html "$msg"
exit 0
fi
done
send_tg_html "Jobber: timed out waiting for pipeline (6h). Check server."
exit 1