From f17a1a3bccf409789fbab74120b6375c8f4817c0 Mon Sep 17 00:00:00 2001 From: ilia Date: Sat, 23 May 2026 20:23:10 -0400 Subject: [PATCH] Add homelab SSO, maintenance cron, and inventory cleanup. Cal Authentik OIDC playbook/role (deferred until license), Vikunja OIDC docs and vault secrets, SSO matrix, mailcow LAN proxy fix, extended security audit docs, maintenance_cron role with group_vars split, and inventory updates (vikunja rename, identity/monitoring/cal host_vars). Co-authored-by: Cursor --- .gitignore | 1 + Makefile | 18 +- docs/guides/cal-authentik-oidc.md | 56 ++++ docs/guides/homelab-status-2026-05-22.md | 10 +- docs/guides/host-list.md | 2 +- docs/guides/levkin-selfhost-plan-2.md | 88 +++--- docs/guides/mailcow-lan-proxy-fix.md | 42 +++ docs/guides/monitoring-stack.md | 2 +- docs/guides/security-audit-report.md | 195 ++++++++++++- docs/guides/security-remediation-plan.md | 47 +++- docs/guides/sso-selfhosted-matrix.md | 51 ++++ docs/guides/vikunja-authentik-oidc.md | 51 ++++ .../group_vars/all/vault.example.yml | 11 + .../production/group_vars/all/vault.yml | 265 ++++++++++-------- .../production/group_vars/ansible/main.yml | 2 + .../group_vars/comms/maintenance_cron.yml | 2 + .../production/group_vars/proxmox/main.yml | 4 + .../group_vars/services/maintenance_cron.yml | 4 + .../group_vars/sites/maintenance_cron.yml | 2 + .../production/host_vars/ansibleVM.yml | 15 +- inventories/production/host_vars/cal.yml | 9 + inventories/production/host_vars/dev02.yml | 16 -- .../production/host_vars/git-ci-01.yml | 6 +- .../host_vars/giteaVM/maintenance_cron.yml | 7 + .../{giteaVM.yml => giteaVM/vault.yml} | 0 inventories/production/host_vars/identity.yml | 3 + inventories/production/host_vars/listmonk.yml | 9 +- .../production/host_vars/monitoring.yml | 3 + .../production/host_vars/vaultwardenVM.yml | 14 +- inventories/production/host_vars/vikunja.yml | 3 + inventories/production/hosts | 6 +- playbooks/cal-authentik-oidc.yml | 80 ++++++ playbooks/maintenance.yml | 1 + roles/cal_sso/defaults/main.yml | 10 + roles/cal_sso/handlers/main.yml | 20 ++ roles/cal_sso/tasks/authentik.yml | 25 ++ roles/cal_sso/tasks/main.yml | 52 ++++ .../templates/authentik-cal-oidc.yaml.j2 | 38 +++ roles/cal_sso/templates/docker-compose.yml.j2 | 44 +++ roles/maintenance_cron/README.md | 23 ++ roles/maintenance_cron/defaults/main.yml | 18 ++ roles/maintenance_cron/tasks/main.yml | 27 ++ .../templates/docker-prune.sh.j2 | 8 + .../templates/gitea-archive-prune.sh.j2 | 19 ++ .../templates/system-maintenance.sh.j2 | 7 + scripts/kuma-add-monitors.sh | 70 +++++ scripts/security-audit-extended.sh | 71 +++++ 47 files changed, 1226 insertions(+), 231 deletions(-) create mode 100644 docs/guides/cal-authentik-oidc.md create mode 100644 docs/guides/mailcow-lan-proxy-fix.md create mode 100644 docs/guides/sso-selfhosted-matrix.md create mode 100644 docs/guides/vikunja-authentik-oidc.md create mode 100644 inventories/production/group_vars/ansible/main.yml create mode 100644 inventories/production/group_vars/comms/maintenance_cron.yml create mode 100644 inventories/production/group_vars/proxmox/main.yml create mode 100644 inventories/production/group_vars/services/maintenance_cron.yml create mode 100644 inventories/production/group_vars/sites/maintenance_cron.yml create mode 100644 inventories/production/host_vars/cal.yml delete mode 100644 inventories/production/host_vars/dev02.yml create mode 100644 inventories/production/host_vars/giteaVM/maintenance_cron.yml rename inventories/production/host_vars/{giteaVM.yml => giteaVM/vault.yml} (100%) create mode 100644 inventories/production/host_vars/identity.yml create mode 100644 inventories/production/host_vars/monitoring.yml create mode 100644 inventories/production/host_vars/vikunja.yml create mode 100644 playbooks/cal-authentik-oidc.yml create mode 100644 roles/cal_sso/defaults/main.yml create mode 100644 roles/cal_sso/handlers/main.yml create mode 100644 roles/cal_sso/tasks/authentik.yml create mode 100644 roles/cal_sso/tasks/main.yml create mode 100644 roles/cal_sso/templates/authentik-cal-oidc.yaml.j2 create mode 100644 roles/cal_sso/templates/docker-compose.yml.j2 create mode 100644 roles/maintenance_cron/README.md create mode 100644 roles/maintenance_cron/defaults/main.yml create mode 100644 roles/maintenance_cron/tasks/main.yml create mode 100644 roles/maintenance_cron/templates/docker-prune.sh.j2 create mode 100644 roles/maintenance_cron/templates/gitea-archive-prune.sh.j2 create mode 100644 roles/maintenance_cron/templates/system-maintenance.sh.j2 create mode 100755 scripts/kuma-add-monitors.sh create mode 100755 scripts/security-audit-extended.sh diff --git a/.gitignore b/.gitignore index 9e95aa7..8797e27 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ # Temporary files *.tmp *.bak +*.vault-bak *~ vault.yml.bak.* diff --git a/Makefile b/Makefile index b9bbee7..b6e6f4d 100644 --- a/Makefile +++ b/Makefile @@ -288,6 +288,14 @@ caddy-levkin: require-ansible ## Ensure levkin.ca reverse proxy on Caddy VM @echo "$(YELLOW)Updating Caddy for levkin.ca...$(RESET)" $(ANSIBLE_PLAYBOOK) playbooks/caddy-levkin-site.yml $(ANSIBLE_ARGS) +cal-oidc: require-ansible ## Cal.com SAML DB + Authentik OIDC provider (usage: make cal-oidc) + @echo "$(YELLOW)Configuring Cal.com ↔ Authentik OIDC...$(RESET)" + $(ANSIBLE_PLAYBOOK) playbooks/cal-authentik-oidc.yml $(ANSIBLE_ARGS) + +cal-oidc-check: require-ansible ## Dry-run Cal.com ↔ Authentik OIDC + @echo "$(YELLOW)Checking Cal.com ↔ Authentik OIDC...$(RESET)" + $(ANSIBLE_PLAYBOOK) playbooks/cal-authentik-oidc.yml --check --diff $(ANSIBLE_ARGS) + workstations: ## Run workstation baseline (usage: make workstations [GROUP=dev] [HOST=dev01]) @echo "$(YELLOW)Applying workstation baseline...$(RESET)" @EXTRA=""; \ @@ -660,10 +668,10 @@ endif @BOOTSTRAP_USER="$(BOOTSTRAP_USER)" TARGET_USER="$(TARGET_USER)" \ scripts/bootstrap-root-ssh.sh "$(HOST)" -bootstrap-root-ssh-services: ## Bootstrap root SSH via ladmin (caddy, listmonk, vikanjans) +bootstrap-root-ssh-services: ## Bootstrap root SSH via ladmin (caddy, listmonk, vikunja) @chmod +x scripts/bootstrap-root-ssh.sh @failed=0; ok=0; \ - for host in caddy listmonk vikanjans; do \ + for host in caddy listmonk vikunja; do \ echo ""; echo "$(BLUE)==> $$host$(RESET)"; \ if BOOTSTRAP_USER="$(BOOTSTRAP_USER)" scripts/bootstrap-root-ssh.sh "$$host"; then \ ok=$$((ok+1)); \ @@ -695,6 +703,10 @@ vault-export-env: ## Write vault secrets into .env (keeps existing non-empty key @chmod +x scripts/vault-export-env.sh @./scripts/vault-export-env.sh "$(or $(ENV_FILE),.env)" +kuma-add-monitors: ## Add default Uptime Kuma monitors (needs KUMA_PASSWORD in .env) + @chmod +x scripts/kuma-add-monitors.sh + @./scripts/kuma-add-monitors.sh + vault-import-env: ## Merge .env secrets into Ansible vault (usage: make vault-import-env [ENV_FILE=.env]) @chmod +x scripts/vault-import-env.sh @ENV_FILE="$(or $(ENV_FILE),.env)" scripts/vault-import-env.sh "$(or $(ENV_FILE),.env)" @@ -702,7 +714,7 @@ vault-import-env: ## Merge .env secrets into Ansible vault (usage: make vault-im bootstrap-root-ssh-failed: ## Bootstrap root SSH on hosts that failed direct root copy-ssh-keys @chmod +x scripts/bootstrap-root-ssh.sh @failed=0; ok=0; \ - for host in caddy listmonk vikanjans n8n qBittorrent actual caseware auto mailcow; do \ + for host in caddy listmonk vikunja n8n qBittorrent actual caseware auto mailcow; do \ echo ""; echo "$(BLUE)==> $$host$(RESET)"; \ if BOOTSTRAP_USER="$(BOOTSTRAP_USER)" scripts/bootstrap-root-ssh.sh "$$host"; then \ ok=$$((ok+1)); \ diff --git a/docs/guides/cal-authentik-oidc.md b/docs/guides/cal-authentik-oidc.md new file mode 100644 index 0000000..246f20a --- /dev/null +++ b/docs/guides/cal-authentik-oidc.md @@ -0,0 +1,56 @@ +# Cal.com → Authentik OIDC + +**Status: deferred** — Cal.com self-hosted SSO is a **commercial (enterprise) feature**. Without a valid `CALCOM_LICENSE_KEY`, the UI at `/settings/security/sso` stays locked (*Contact sales*). + +See **[sso-selfhosted-matrix.md](sso-selfhosted-matrix.md)** for Phase 4 apps that do not need a Cal-style license. + +## Current state (2026-05-23) + +| Item | Status | +|------|--------| +| `calsaml` Postgres DB | ✅ Created | +| `SAML_DATABASE_URL`, `SAML_ADMINS` in `/opt/cal/.env` | ✅ Set | +| `docker-compose` passes license + SAML env | ✅ | +| Authentik app `cal-com` + provider `cal-com-oidc` | ✅ (ready when license exists) | +| `CALCOM_LICENSE_KEY` in `.env` | ❌ **Empty** — SSO UI blocked | +| Cal UI OIDC configuration | ⏳ **Blocked** until license | + +## When you have a license + +1. Add to `/opt/cal/.env`: + ```bash + CALCOM_LICENSE_KEY= + NEXT_PUBLIC_LICENSE_CONSENT=agree + ``` +2. Restart: `ssh cal` → `cd /opt/cal && docker compose up -d` +3. Confirm in container: `docker exec calcom printenv CALCOM_LICENSE_KEY` (non-empty) +4. Log in as **`idobkin@gmail.com`** → **https://cal.levkin.ca/settings/security/sso** +5. Configure OIDC: + +| Field | Value | +|-------|--------| +| Client ID | `cal-com` | +| Client Secret | from Authentik → Applications → Cal.com | +| Well Known URL | `https://auth.levkin.ca/application/o/cal-com/.well-known/openid-configuration` | + +6. Test SSO; keep local Cal password as break-glass. + +## Ansible (infra only) + +```bash +make cal-oidc # SAML DB + Authentik provider (safe to re-run) +make cal-oidc-check +``` + +Vault (optional): `vault_cal_oidc_client_secret` — see `vault.example.yml`. + +## Redirect URI (Authentik) + +```text +https://cal.levkin.ca/api/auth/oidc +``` + +## Related + +- [sso-selfhosted-matrix.md](sso-selfhosted-matrix.md) +- [levkin-selfhost-plan-2.md](levkin-selfhost-plan-2.md) diff --git a/docs/guides/homelab-status-2026-05-22.md b/docs/guides/homelab-status-2026-05-22.md index 4bae4f7..9900250 100644 --- a/docs/guides/homelab-status-2026-05-22.md +++ b/docs/guides/homelab-status-2026-05-22.md @@ -1,6 +1,6 @@ -# Homelab status — 2026-05-22 +# Homelab status — 2026-05-23 -Quick checklist after monitoring / sites / git pass. +Quick checklist. **Master plan:** [levkin-selfhost-plan-2.md](levkin-selfhost-plan-2.md) · **Cursor plan:** `~/.cursor/plans/levkin_selfhost_rollout_e75909ae.plan.md` ## Done (automation) @@ -29,7 +29,11 @@ Quick checklist after monitoring / sites / git pass. - [ ] **DNS `levkin.ca` + `www`** — A records → home IP (`142.180.237.136`); apex currently parked at AWS, not homelab - [ ] **Gitea deploy key (levkin LXC 220)** — add `deploy-levkin-levkin.ca` pubkey in repo settings (SSH pull); HTTPS clone works meanwhile - [ ] **UniFi DHCP reservations** — [unifi-static-dhcp.md](unifi-static-dhcp.md) @ https://192.168.2.1/ -- [ ] **Cal.com → Authentik OIDC** — first SSO (~1–2 h) — [levkin-selfhost-plan-2.md](levkin-selfhost-plan-2.md) +- [ ] **Cal.com → Authentik OIDC** — **deferred** (no license key) — [cal-authentik-oidc.md](cal-authentik-oidc.md); Phase 4 → Vikunja — [sso-selfhosted-matrix.md](sso-selfhosted-matrix.md) +- [x] **Portainer VM 109** — stopped and destroyed on pve10 (2026-05-23) +- [x] **Listmonk** — service was stopped; `listmonk.service` enabled on VM 113 (2026-05-23) +- [x] **Mailcow** — LAN TCP timeout fixed (netfilter `MAILCOW` drop rule) — [mailcow-lan-proxy-fix.md](mailcow-lan-proxy-fix.md) +- [ ] **DebianDesktop VM 100** — RAM lowered to 24 GB in Proxmox; **reboot guest** to apply balloon - [ ] **Nextcloud VM 201 retire** — remove Kuma monitor, Caddy `nextcloud.levkin.ca`, stop VM - [ ] **NAS.SP00 disk replace** — then start Jellyfin (VM 101) - [x] **Gitea deploy key (portfolio)** — `git pull` works on LXC 219; Gitea VM SSH fixed (`/home/git/.ssh/authorized_keys` + `sudo` to `gitea`) diff --git a/docs/guides/host-list.md b/docs/guides/host-list.md index 8cf3e54..cc98a30 100644 --- a/docs/guides/host-list.md +++ b/docs/guides/host-list.md @@ -62,7 +62,7 @@ Update this file whenever a guest is created, migrated, or re-IP’d. See [levki | 106 | caddy-debian | **edge** | `10.0.10.50/24` | `10.0.10.50/24` → **`.20`** (Phase 1.5) | ✅ **Static** (in-guest) | `BC:24:11:E0:49:B4` | `/etc/network/interfaces` static; Ansible `caddy` | | 107 | mattermost-ubuntu | comms | `10.0.10.107`? | TBD | ⏳ | `BC:24:11:66:6E:01` | Ping `.107` up; confirm not TrueNAS conflict — verify in guest | | 108 | actual-debian | business | `10.0.10.158/24` | `10.0.10.158/24` | ⏳ stable DHCP | `BC:24:11:10:7B:64` | Inventory `actual` | -| 109 | portainer-alpine | — | unknown | — | ⏳ | `BC:24:11:0F:40:4F` | Running; retire → Dockge on monitoring LXC | +| 109 | portainer-alpine | — | — | — | ✅ **Removed** | `BC:24:11:0F:40:4F` | Destroyed 2026-05-23; Dockge on monitoring LXC 218 | | 150 | pihole00-debian | — | link-local* | TBD | ⏳ | `BC:24:11:86:76:97` | Running | | 117 | hermes | services | `10.0.10.36/24` | `10.0.10.36/24` | ⏳ stable DHCP | `BC:24:11:51:1E:99` | On pve10; guest agent; inventory `hermes` | | 200 | PVE.BU.SVR | labs | `10.0.10.200/24` | `10.0.10.200/24` | ⏳ stable DHCP | `BC:24:11:DA:95:3B` | Running | diff --git a/docs/guides/levkin-selfhost-plan-2.md b/docs/guides/levkin-selfhost-plan-2.md index bc27440..86b67ee 100644 --- a/docs/guides/levkin-selfhost-plan-2.md +++ b/docs/guides/levkin-selfhost-plan-2.md @@ -15,16 +15,18 @@ Reference doc for the Proxmox homelab. Lives alongside the Cursor project that h | Area | Status | |------|--------| -| **Phase 0** Foundation | ✅ Mostly done — static IPs on pve10 LXCs; Caddy still on **VM 106** | -| **Phase 1** Identity (Authentik) | ✅ LXC **217** @ `10.0.10.21` | -| **Phase 2** Monitoring (Kuma, Dockge, Umami) | ✅ LXC **218** @ `10.0.10.22` | -| **Phase 3** Cal.com | ✅ LXC **210** — OIDC + auto site button still open | -| **Phase 4** SSO migration | ⏳ Not started (Cal → Authentik first) | -| **Phase 5–8** Immich, Crater, Outline, etc. | ⏳ Deferred | -| **Site consolidation** | ⏳ **Partial** — **levkin.ca** on LXC **220** @ `10.0.10.60` ✅; caseware/auto/portfolio on **215/216/219** ([site-lxc-git.md](site-lxc-git.md)); moving all static to Caddy VM is optional later | -| **dev-apps** (punim/pote/mirrormatch) | ⏳ **Not started** — punimTag **9101** still on **pve201** (active testing; do not migrate yet) | -| **Nextcloud retire** | ⏳ VM **201 is running again** on pve10 — finish decommission | -| **Portainer retire** | ⏳ VM **109 still running** (16 GB maxmem) on pve10 — stop after Dockge confirmed | +| **Phase 0** Foundation | ✅ Mostly done — pve10 LXCs static; site LXCs 215/216/219/220 static; Caddy still on **VM 106** @ `.50` | +| **Phase 1** Identity (Authentik) | ✅ LXC **217** @ `10.0.10.21` — admin + TOTP | +| **Phase 2** Monitoring | ✅ LXC **218** @ `10.0.10.22` — Kuma, Dockge, Umami, Kuma SMTP | +| **Phase 3** Cal.com | ✅ LXC **210** — booking + auto consult button; **OIDC deferred** (no enterprise license) | +| **Phase 4** SSO | ⏳ **Next:** Vikunja → Authentik — [sso-selfhosted-matrix.md](sso-selfhosted-matrix.md) | +| **Phase 5–8** | ⏳ Immich, Crater, Outline, automation depth — after P0 backlog | +| **Comms health** | ✅ Mailcow + Listmonk restored 2026-05-23 — [mailcow-lan-proxy-fix.md](mailcow-lan-proxy-fix.md) | +| **Site consolidation** | ⏳ **Partial** — git LXCs + levkin.ca LXC 220; optional later: static on Caddy VM | +| **dev-apps** | ⏳ punimTag **9101** on pve201 until testing done | +| **Nextcloud retire** | ⏳ VM **201** still running — **#1 RAM win on pve10** (~8 GiB) | +| **Portainer retire** | ✅ VM **109** destroyed 2026-05-23 (~16 GiB on pve10) | +| **Security pass** | 🟡 Partial — SSH keys + apt + cron 2026-05-23 — [security-remediation-plan.md](security-remediation-plan.md) | --- @@ -37,27 +39,25 @@ Use this before adding LXCs/VMs. Re-check with `pvesm status` and `free -h` on e | Resource | Total | Used | **Available** | Notes | |----------|-------|------|---------------|--------| | **local-lvm** (thin) | ~1.67 TiB | ~22% | **~1.30 TiB** | Plenty of disk for new LXCs | -| **RAM** (host) | 62 GiB | ~44 GiB | **~17 GiB** | Enough for **2–3 small LXCs** (2 GB each) as-is | +| **RAM** (host) | 62 GiB | ~28 GiB | **~33 GiB** | Portainer **109** removed 2026-05-23 | -**Realistic new capacity on pve10 (without stopping anything):** ~**4–6 GiB RAM** + **100–200 GiB disk** for one productivity/media LXC (Outline, Mealie, Immich-lite). +**Realistic new capacity on pve10 now:** ~**30+ GiB** headroom for Immich, Crater, Beszel, or **dev-apps** (6–8 GiB) after Nextcloud retires. -**If you free RAM first (recommended):** +**Still available to free:** | Stop / retire | Frees (maxmem) | |---------------|----------------| -| Portainer VM **109** | **16 GiB** | -| Nextcloud VM **201** | **8 GiB** | +| ~~Portainer VM **109**~~ | ✅ **16 GiB** freed | +| Nextcloud VM **201** | **8 GiB** ← do next | | Hermes VM **117** (if not needed) | **16 GiB** | -| Site LXCs 215/216 → Caddy static (future) | **~1 GiB** | - -After Portainer + Nextcloud off: **~41 GiB effective headroom** on pve10 — room for Immich, Crater, Beszel, or a **dev-apps** LXC (6–8 GiB). +| Site LXCs 215/216 → Caddy static (optional) | **~1 GiB** | ### pve201 (pve) — **do not add new services** | Resource | Total | Used | **Available** | Notes | |----------|-------|------|---------------|--------| | **local-lvm** | ~1.67 TiB | ~46% | **~922 GiB** | Disk OK | -| **RAM** | 125 GiB | ~122 GiB | **~3 GiB** | Saturated; GPU VM **104** (73 GB), punimTag **9101** (16 GB) | +| **RAM** | 125 GiB | ~114 GiB | **~10 GiB** | GPU VM **104** (64 GB), DebianDesktop **100** (24 GB set — **reboot guest**), punimTag **9101** (16 GB) | **Verdict:** New stacks belong on **pve10**. pve201 only benefits from **stopping/migrating** guests (punim after testing, GPU resize, old Kuma already stopped). @@ -80,7 +80,7 @@ After Portainer + Nextcloud off: **~41 GiB effective headroom** on pve10 — roo - **Jellyfin** (VM 101) — stopped - LXC **210, 215–218, 219** — static via `pct set`; **Caddy VM 106** — static in-guest `.50` - **Nextcloud VM 201** — export done; VM **still running** on pve10 — **retire next** (8 GB RAM reclaimed) -- **Portainer VM 109** — still **running** on pve10 (16 GB) — retire; Dockge on 218 replaces it +- ~~**Portainer VM 109**~~ — **removed** 2026-05-23 (~16 GiB RAM freed on pve10) - **Marketing sites** — LXC **220** (`levkin.ca`), **215/216/219** (git deploy), not yet on Caddy VM static roots - **punimTag dev** — pve201 LXC **9101** @ `10.0.10.121` (16 GB) — leave until testing done; then `dev-apps` on pve10 @@ -221,8 +221,8 @@ Steps: 2. ✅ `cal.levkin.ca` proxied via Caddy 3. ✅ Booking link live at `cal.levkin.ca/ilia/consult` with Jitsi location 4. ✅ Email working via `cal@levkine.ca` SMTP through Mailcow -5. ⏳ **Wire Cal.com to Authentik via OIDC** (first real SSO connection — do this after Phase 1) -6. ⏳ Update `auto.levkin.ca` button → `cal.levkin.ca/ilia/consult` (currently points to placeholder) +5. ⏳ **Cal.com OIDC** — **deferred** ([cal-authentik-oidc.md](cal-authentik-oidc.md)) — needs enterprise `CALCOM_LICENSE_KEY` +6. ✅ `auto.levkin.ca` consult button → `cal.levkin.ca/ilia/consult` ### Phase 4 — SSO migration (~half a day, staged) Wire each to Authentik, least-risky first: @@ -319,14 +319,17 @@ pct reboot ## Backlog (priority order) -### P0 — next (Phase 1–2 largely ✅) -1. ~~Umami~~ ✅ -2. ~~Uptime Kuma~~ ✅ -3. ~~Dockge~~ ✅ -4. **Cal.com → Authentik OIDC** — first SSO -5. **Retire Nextcloud VM 201** + **Portainer VM 109** — frees **~24 GiB** on pve10 -6. **Beszel** — fits on monitoring LXC 218 or small agent LXCs -7. **Mealie** — new small LXC on pve10 (~2 GB) +### P0 — next (ordered) +1. ~~Umami / Kuma / Dockge~~ ✅ +2. ~~Portainer VM 109~~ ✅ (2026-05-23) +3. **Retire Nextcloud VM 201** — ~8 GiB on pve10; remove Caddy + Kuma monitor +4. **Vikunja → Authentik OIDC** — first real SSO ([sso-selfhosted-matrix.md](sso-selfhosted-matrix.md)) +5. **UniFi DHCP reservations** — [unifi-static-dhcp.md](unifi-static-dhcp.md) +6. **DNS `levkin.ca` apex** → home IP (still parked at AWS) +7. **Beszel** on monitoring LXC 218 +8. ~~Cal.com OIDC~~ — deferred until `CALCOM_LICENSE_KEY`; Authentik app `cal-com` ready +9. **NAS.SP00** disk replace → Jellyfin VM 101 +10. **DebianDesktop VM 100** — reboot for 24 GB limit on pve201 ### P1 — when ready - **Outline** — wiki for client docs @@ -375,18 +378,21 @@ See **[homelab-status-2026-05-22.md](homelab-status-2026-05-22.md)** for automat | # | Task | Status | Effort | Frees / unlocks | |---|------|--------|--------|-----------------| | 1 | **Kuma SMTP** | ✅ done | — | — | -| 2 | **Cal.com → Authentik OIDC** | ⏳ **next** | 1–2 h | First SSO; test before Vikunja/Listmonk | -| 3 | **auto.levkin.ca** → Cal booking link | ⏳ | 15 min | Phase 3 item 6 | -| 4 | **Stop Portainer VM 109** | ⏳ | 10 min | **~16 GiB RAM** on pve10 | -| 5 | **Retire Nextcloud VM 201** | ⏳ | 30 min | **~8 GiB RAM**; remove Caddy + Kuma monitor | -| 6 | **UniFi DHCP reservations** | ⏳ | 20 min | [unifi-static-dhcp.md](unifi-static-dhcp.md) | -| 7 | **Beszel** on 218 or agents | ⏳ | 1 h | Capacity visibility before Immich | -| 8 | **NAS.SP00** disk → Jellyfin | ⏳ hardware | — | VM 101 | -| 9 | **Caddy → edge LXC `.20`** | ⏳ defer | ~30 min | Phase 1.5 | -| 10 | **dev-apps LXC** (pote, mirrormatch, then punim) | ⏳ defer | half day | pve201 RAM; punim **last** | -| 11 | **Static sites → Caddy VM** (optional) | ⏳ defer | 1 h | ~1 GiB; breaks git-on-LXC workflow unless you move deploy to Caddy | +| 2 | **Cal.com → Authentik OIDC** | ⏸ **deferred** | — | Needs `CALCOM_LICENSE_KEY`; infra ready — [sso-selfhosted-matrix.md](sso-selfhosted-matrix.md) | +| 3 | **auto.levkin.ca** → Cal booking link | ✅ | — | Consult button live | +| 4 | **Stop Portainer VM 109** | ✅ | — | Removed 2026-05-23; **~16 GiB RAM** on pve10 | +| 5 | **Retire Nextcloud VM 201** | ⏳ **next** | 30 min | **~8 GiB RAM** on pve10 | +| 6 | **Vikunja → Authentik OIDC** | ⏳ | 1–2 h | Phase 4 kickoff | +| 7 | **UniFi DHCP reservations** | ⏳ | 20 min | [unifi-static-dhcp.md](unifi-static-dhcp.md) | +| 8 | **DNS levkin.ca apex** | ⏳ | 15 min | AWS parked → `142.180.237.136` | +| 9 | **Beszel** on 218 | ⏳ | 1 h | Capacity before Immich | +| 10 | **NAS.SP00** disk → Jellyfin | ⏳ hardware | — | VM 101 | +| 11 | **DebianDesktop reboot** | ⏳ | 5 min | Apply 24 GB on pve201 | +| 12 | **Caddy → edge LXC `.20`** | ⏳ defer | ~30 min | Phase 1.5 | +| 13 | **dev-apps LXC** | ⏳ defer | half day | After punim testing | +| 14 | **Static sites → Caddy VM** | ⏳ optional | 1 h | Defer | -**Defer:** Immich, Crater, Outline, Plane, SSO for Vikunja/Listmonk/Mailcow until rows 2–5 done. +**Defer:** Immich, Crater, Outline; Listmonk/Mattermost/Mailcow SSO after Vikunja; Cal OIDC until license. ### Adding a new service — quick rule diff --git a/docs/guides/mailcow-lan-proxy-fix.md b/docs/guides/mailcow-lan-proxy-fix.md new file mode 100644 index 0000000..ca5eb80 --- /dev/null +++ b/docs/guides/mailcow-lan-proxy-fix.md @@ -0,0 +1,42 @@ +# Mailcow unreachable from Caddy / LAN (TCP timeout) + +## Symptom + +- Mailcow containers healthy inside VM `10.0.10.132` +- `curl https://10.0.10.132/` works **on the VM** +- From Caddy (`10.0.10.50`) or other LAN hosts: TCP **443/80 timeout** +- `tcpdump` on Proxmox shows SYN from client, **no SYN-ACK** + +## Cause (not RAM) + +`mailcowdockerized-netfilter-mailcow` adds an nftables rule in chain `MAILCOW`: + +```text +iifname != "br-mailcow" oifname "br-mailcow" tcp → DROP +``` + +That blocks forwarded HTTPS from the LAN to the nginx container, even when `DISABLE_NETFILTER_ISOLATION_RULE=y` is set (netfilter still recreates the drop on restart in some versions). + +## Fix on the mailcow VM + +```bash +nft flush chain ip filter MAILCOW +``` + +Persistent (installed 2026-05-23): systemd unit `mailcow-flush-isolation-drop.service` runs after Docker. + +After netfilter container restart, verify: + +```bash +nft list chain ip filter MAILCOW # should be empty +nc -zv 10.0.10.132 443 # from Caddy host +``` + +## Related settings in `/opt/mailcow-dockerized/mailcow.conf` + +- `DISABLE_NETFILTER_ISOLATION_RULE=y` +- `SNAT_TO_SOURCE=10.0.10.132` (optional; helps some hairpin cases) + +## Reverse proxy + +Caddy on `10.0.10.50` → `https://10.0.10.132` with `Host: mail.levkine.ca` — see `playbooks/caddy-auth-authentik.yml` / Caddyfile on caddy VM. diff --git a/docs/guides/monitoring-stack.md b/docs/guides/monitoring-stack.md index f45152e..3e99004 100644 --- a/docs/guides/monitoring-stack.md +++ b/docs/guides/monitoring-stack.md @@ -188,7 +188,7 @@ To **manage** Authentik or Cal from Dockge long term, either move compose to 218 ### Step 3 — Retire Portainer -When comfortable: stop VM **109** (portainer) on pve10; use Dockge on 218 instead. +VM **109** (portainer) was removed from pve10 on 2026-05-23; use Dockge on 218 instead. --- diff --git a/docs/guides/security-audit-report.md b/docs/guides/security-audit-report.md index 64ca3e4..c0e349d 100644 --- a/docs/guides/security-audit-report.md +++ b/docs/guides/security-audit-report.md @@ -1,13 +1,166 @@ # Security Audit Report -**Date:** 2026-05-20 -**Auditor:** Automated read-only scan (`scripts/security-audit-*.sh`) -**Scope:** Proxmox nodes `pve201` (10.0.10.201) and `pve10` (10.0.10.10), all LXCs via `pct exec`, SSH deep-dive on hypervisors. - +**Last audit:** 2026-05-23 (re-run after SSH keys + `make maintenance`) +**Previous audit:** 2026-05-20 +**Auditor:** `scripts/security-audit-*.sh`, Ansible `maintenance` + `maintenance_cron` roles **Repo baseline** (`roles/ssh/defaults/main.yml`): `PermitRootLogin prohibit-password`, `PasswordAuthentication no`, UFW enabled. --- +## 2026-05-23 — Actions completed + +| Action | Status | +|--------|--------| +| SSH keys → caseware, auto, cal, vikunja, mailcow, listmonk | ✅ All six reachable as `root` | +| SSH keys → mailcow/listmonk VMs | ✅ Via brief VM shutdown + disk inject on pve201 (no guest agent) | +| Inventory rename `vikanjans` → `vikunja` | ✅ `hosts` + `proxmox_vmid=301` | +| `apt upgrade` fleet (skip reboot) | ✅ 14 hosts via Ansible; auto via `pct exec` on pve10 | +| Tier 1 cron (journal + apt) | ✅ `roles/maintenance_cron` on PVE, sites, comms, ansible, hermes, etc. | +| Tier 2 cron (docker prune) | ✅ identity, monitoring, vikunja; git-ci-01 keeps `docker-prune-ci` | +| VM 104 (GPU-Dev) RAM 72→64 GiB | ✅ pve201; host free RAM ~1.7→10 GiB | +| Fix broken `host_vars` (ansibleVM, listmonk) | ✅ Plain YAML; old blobs → `*.vault-bak` | +| Vault `vault_*_become_password` + maintenance vaultwardenVM | ✅ 2026-05-23 | +| caddy root SSH + maintenance | ✅ `bootstrap-root-ssh-caddy`; inventory `ansible_user=root` | +| ansibleVM maintenance | ✅ become password in vault | + +### Post-maintenance SSH reachability + +| Host | SSH | Notes | +|------|-----|-------| +| caseware | ✅ | | +| auto | ✅ | Was slow from laptop earlier; OK after upgrade | +| cal | ✅ | | +| vikunja | ✅ | LXC 301 @ 10.0.10.159 | +| mailcow | ✅ | ~1 min downtime for key inject | +| listmonk | ✅ | ~1 min downtime for key inject | + +### Maintenance playbook recap (`skip_reboot=true`) + +| Host | Result | +|------|--------| +| pve201, pve10, caseware, cal, vikunja, mailcow, listmonk, identity, monitoring, hermes, levkin, portfolio, git-ci-01, sonarqube-01 | ✅ upgraded | +| caddy | ✅ (as `root`; no `sudo` package on host) | +| ansibleVM | ✅ (`vault_ansiblevm_become_password`) | +| vaultwardenVM | ✅ (`vault_vaultwarden_become_password`) | + +### Open security gaps (unchanged until `make security`) + +| Control | Fleet status | Risk if fixed wrong | +|---------|--------------|---------------------| +| `PasswordAuthentication yes` | Most LXCs + both PVE | **Low break risk** if SSH keys tested first in a second session | +| `PermitRootLogin yes` | pve201, pve10, sonarqube-01 | Same — use `prohibit-password`, not `no`, if you need root+key | +| fail2ban | Off everywhere | Enabling is safe; may lock you out only if you brute-force yourself | +| UFW | Off (except one dev LXC) | **Medium risk** — wrong rules drop SSH/80/443; apply via Ansible `roles/ssh` after allowlist | +| unattended-upgrades | hermes, ansibleVM only | Safe; schedule reboots separately | +| Proxmox :8006 | Open on LAN | Restrict in PVE firewall — **won't break VMs** | +| Docker on `0.0.0.0` | identity, monitoring, vaultwarden, qBit | Bind to `127.0.0.1` — **can break access** if Caddy route missing; test URL after | +| Tailscale | **Deferred** | Off by choice; remote access via **UniFi VPN** to LAN | + +See [Risk explanations (2026-05-23)](#risk-explanations-2026-05-23) and [fail2ban vs password SSH](#fail2ban-vs-password-ssh) below. + +--- + +## GPU-Dev (pve201 VM 104) — Ollama / LLMs + +| Resource | Current | +|----------|---------| +| Host | pve201, VMID **104**, `GPU-Dev-Debian` | +| LAN IP | **10.0.10.122** (inventory `devGPU` @ 10.0.30.63 is a different network — use `.122` from LAN) | +| RAM | **64 GiB** guest (~60 GiB available when idle) | +| GPU | **RTX 4080 16 GiB** (PCI passthrough `hostpci0`) | +| Workload | **Ollama** already running (~3.6 GiB VRAM in sample) | + +### Getting the most from RAM + GPU + +1. **Right-size models to VRAM** — On a 16 GiB 4080, prefer quantised models that fit entirely in VRAM (e.g. 7B–14B Q4/Q5, or 32B Q2/Q3 if you accept quality trade-offs). If a model spills to CPU RAM, throughput drops sharply. +2. **One heavy model at a time** — Ollama loads models on demand; set `OLLAMA_MAX_LOADED_MODELS=1` (or keep only one client) so you do not fragment 64 GiB RAM + 16 GiB VRAM across several large weights. +3. **Parallel requests** — `OLLAMA_NUM_PARALLEL` defaults are conservative; raise only if VRAM headroom exists (watch `nvidia-smi` while under load). +4. **Keep guest RAM for KV cache** — With 64 GiB you can run larger context windows; set `OLLAMA_CONTEXT_LENGTH` / model `num_ctx` to what you need, not maximum “just because”. +5. **CPU offload only when needed** — `num_gpu` layers = all layers for speed; partial offload is for models that do not fit in VRAM, not for tuning. +6. **Disk** — Store models on fast local disk (not NFS); `ollama pull` once, prune old tags periodically (`ollama list` / remove unused). +7. **Proxmox** — Do not balloon GPU VM RAM; GPU passthrough already reserves most of the 64 GiB. Freeing pve201 meant lowering this VM from 72→64 GiB, not overcommitting other guests on 201. +8. **Optional** — [Open WebUI](https://github.com/open-webui/open-webui) on localhost + Caddy TLS; bind Ollama to `127.0.0.1:11434` only (LAN via VPN). + +**Not in Ansible yet:** add `devGPU` / `10.0.10.122` to inventory when you want playbooks (cron, hardening) on this box. + +--- + +## fail2ban vs password SSH + +**What fail2ban does:** After too many failed SSH logins from an IP, it adds a **temporary firewall ban** for that IP (typically 10–60 minutes). It does **not** disable password authentication globally. + +**Can passwords stay on if fail2ban is on?** Technically yes — fail2ban only rate-limits brute force; passwords are still weaker than keys. Best practice on servers: **keys + `PasswordAuthentication no` + fail2ban** (defence in depth). + +**Your Proxmox console fallback:** If you lock yourself out of SSH on a guest, you can still use **Proxmox → VM → Console** or `pct enter` / `qm guest exec` from pve201/pve10. That is a good break-glass path, but it is **not** a substitute for keys on hosts you manage daily — console is slow and easy to misconfigure under pressure. + +**Recommendation:** Enable fail2ban via `make security` with `ignoreip` including `10.0.10.0/24` and your UniFi VPN client subnet. Then disable password SSH once keys work everywhere you care about. + +--- + +## Risk explanations (2026-05-23) + +### Password SSH (`PasswordAuthentication yes`) + +**How bad:** High on internet-facing IPs; medium on `10.0.10.0/24` only. Anyone who can reach :22 can try passwords indefinitely (no fail2ban). + +**Will fixing break things?** No, if you (1) confirm key login works, (2) set `PasswordAuthentication no`, (3) keep a second SSH session open, (4) reload sshd. Breakage happens only if keys are missing/wrong. + +### Root login (`PermitRootLogin yes` on hypervisors) + +**How bad:** High — root + password on PVE is full cluster compromise. + +**Will fixing break things?** Use `prohibit-password` (keys only), not `no`, unless you have another admin user with sudo. Ansible playbooks expect root on PVE today. + +### fail2ban off + +**How bad:** Medium — relies on LAN trust; SSH noise from scanners still fills logs. + +**Will fixing break things?** Rarely. Tune `ignoreip` to your admin IP/subnet so your own typos don't ban you. + +### UFW off + +**How bad:** Medium on segmented LAN; high if any host has a public IP. + +**Will fixing break things?** **Yes, if misconfigured** — default deny without allowing 22 from admin IP, 80/443 from Caddy, or Docker-published ports you still need. Use Ansible `roles/ssh` (UFW after SSH rules) and test. + +### unattended-upgrades off + +**How bad:** Medium — security patches lag until manual maintenance. + +**Will fixing break things?** Usually no. Kernel updates may require reboot; use `Unattended-Upgrade::Automatic-Reboot "false"` until you want reboot windows. + +### Proxmox UI :8006 exposed + +**How bad:** **Critical** on untrusted networks — API gives VM/storage control. + +**Will fixing break things?** Restricting to `10.0.10.0/24` does not break normal LAN admin access. + +### HTTP services on all interfaces (8080, 3000, …) + +**How bad:** High without TLS/auth at the edge; medium behind Caddy + LAN only. + +**Will fixing break things?** **Yes** if you bind to `127.0.0.1` before Caddy `reverse_proxy` is updated. Order: Caddy route → test → then bind Docker to localhost. + +### Remote access (Tailscale deferred) + +**Decision:** Tailscale off; use **UniFi site-to-site / VPN** into `10.0.10.0/24` for admin and Ollama/GPU access. + +**Security:** Ensure VPN is required for SSH and Proxmox :8006 from outside; do not port-forward :22/:8006 on the router without IP allowlists. + +### pve201 RAM (was 97% used) + +**How bad:** **Critical** — OOM kills guests, swap thrashing. + +**Mitigation done:** VM 104 reduced 73728→65536 MiB (~8 GiB freed on hypervisor). Still tight; consider moving git-ci-01 or other workloads to pve10. + +--- + +## 2026-05-20 — Original audit + +**Scope:** Proxmox nodes `pve201` (10.0.10.201) and `pve10` (10.0.10.10), all LXCs via `pct exec`, SSH deep-dive on hypervisors. + +--- + ## Executive summary | Area | Critical | High | Medium | @@ -141,7 +294,7 @@ zpool status NAS.SP00 | 9101 | punimTagFE-dev | 10.0.10.121 | running | without-password | **yes** | **active** | no | **89** | **8000**, 111, 22 | | 9401 | mirrormatch-dev | 10.0.10.141 | **stopped** | — | — | — | — | — | — | -**Inventory mapping:** `vikanjans` → 159, `qBittorrent` → 91, `punimTag` app → 121. +**Inventory mapping:** `vikunja` → 159 (LXC 301), `qBittorrent` → 91, `punimTag` app → 121. ### Common LXC issues (pve201) @@ -164,7 +317,7 @@ pct exec 301 -- systemctl reload ssh pct exec 303 -- bash -c 'apt update && apt upgrade -y' # Copy your SSH key (from Mac, once password/key works) -make copy-ssh-key HOST=vikanjans # 10.0.10.159 +make copy-ssh-key HOST=vikunja # 10.0.10.159 make copy-ssh-key HOST=qBittorrent # 10.0.10.91 ``` @@ -244,15 +397,39 @@ ssh root@10.0.10.10 'bash -s' < scripts/security-audit-lxc-via-pve.sh | Item | Owner | Status | |------|-------|--------| +| SSH keys caseware, auto, cal, vikunja, mailcow, listmonk | 2026-05-23 | ☑ | +| Fleet `apt upgrade` (no reboot) | 2026-05-23 | ☑ all previously failed hosts fixed | +| Tier 1 cron (journal + apt) | 2026-05-23 | ☑ PVE + most hosts via Ansible | +| Tier 2 cron (docker prune) | 2026-05-23 | ☑ identity, monitoring, vikunja, git-ci-01 | +| VM 104 RAM 72→64 GiB | 2026-05-23 | ☑ | +| Inventory `vikunja` rename | 2026-05-23 | ☑ | +| Fix `host_vars` ansibleVM / listmonk merge | 2026-05-23 | ☑ plain YAML (review `*.vault-bak`) | | SSH harden pve201 | | ☐ | | SSH harden pve10 | | ☐ | | Restrict 8006 on both nodes | | ☐ | | fail2ban on hypervisors | | ☐ | -| Patch pve201 / pve10 | | ☐ | +| `make security` on production groups | | ☐ | | Disable password SSH on all LXCs | | ☐ | -| `copy-ssh-keys` for inventory | | ☐ | -| TLS for :80/:8080 services | | ☐ | +| `copy-ssh-keys` remaining inventory | | ☐ partial | +| TLS / localhost bind for :8080 services | | ☐ | +| unattended-upgrades all production | | ☐ | +| Tailscale re-auth | | ⏸ deferred (UniFi VPN) | | Fix ZFS NAS.SP00 on pve10 | | ☐ | +| caddy Ansible as root | 2026-05-23 | ☑ | +| vaultwardenVM / ansibleVM become in vault | 2026-05-23 | ☑ | +| Add GPU-Dev `10.0.10.122` to inventory | | ☐ | +| Ollama bind localhost + optional Open WebUI | | ☐ | + +--- + +## Next steps (priority) + +1. **`make security`** on one site host (e.g. caseware) with a second SSH session open — disable password SSH, enable UFW + fail2ban (`ignoreip` = LAN + VPN pool). +2. **Restrict Proxmox :8006** to `10.0.10.0/24` + VPN subnet on pve201 and pve10. +3. **Bind internal Docker ports** on identity / monitoring / vaultwarden to `127.0.0.1` after confirming Caddy routes. +4. **GPU-Dev:** point clients at `http://10.0.10.122:11434` over VPN; tune Ollama env vars; add host to inventory when automating. +5. **unattended-upgrades** on production LXCs (reboot policy manual). +6. Review `host_vars/*.vault-bak` and merge any secrets still needed into vault + plain host_vars. --- diff --git a/docs/guides/security-remediation-plan.md b/docs/guides/security-remediation-plan.md index 81f8bc1..be01a00 100644 --- a/docs/guides/security-remediation-plan.md +++ b/docs/guides/security-remediation-plan.md @@ -1,8 +1,38 @@ # Security Remediation Plan -**Based on:** [security-audit-report.md](security-audit-report.md) (2026-05-20) +**Based on:** [security-audit-report.md](security-audit-report.md) (last re-run **2026-05-23**) **Goal:** Align hosts with `roles/ssh` (keys only, no password SSH) without locking yourself out. +**Homelab rollout:** [levkin-selfhost-plan-2.md](levkin-selfhost-plan-2.md) — separate track; some overlap (SSH keys, patching). + +--- + +## Progress summary (2026-05-23) + +| Phase | Status | Notes | +|-------|--------|--------| +| **0 Backup + prep** | 🟡 Partial | Fleet SSH keys + apt done; formal PVE snapshot checklist not fully ticked | +| **1 Critical** | 🟡 Partial | SSH keys on many hosts ✅; **Proxmox password SSH off** ⏳; **8006 restrict** ⏳; pve201 RAM improved (GPU 64G, DebianDesktop 24G pending reboot) | +| **2 High** | 🟡 Partial | fail2ban / full LXC password-off / port binding — mostly ⏳ | +| **3 Medium** | ⏳ | unattended-upgrades, `make security`, UFW | +| **4 Low** | ⏳ | rpcbind, naming, audit Makefile | + +### Completed since original audit (see [security-audit-report.md](security-audit-report.md)) + +- SSH keys → caseware, auto, cal, vikunja, mailcow, listmonk (root) +- Fleet `apt upgrade` (14 hosts, no reboot) +- Tier 1 cron (journal + apt) on PVE, sites, comms, etc. +- Tier 2 docker prune on identity, monitoring, vikunja +- GPU VM 104 RAM 72→64 GiB on pve201 +- Fixed `host_vars` ansibleVM / listmonk (plain YAML) + +### Recommended order (security, alongside homelab P0) + +1. **Phase 0** — PVE `sshd_config` backup + CT snapshots before any `PasswordAuthentication no` +2. **Phase 1** — pve201 + pve10 SSH keys-only; restrict 8006; finish keys on caddy/ansibleVM/vaultwarden if still pending +3. **Phase 2** — LXC password auth off, fail2ban, patch, reduce exposed app ports (qBit, searchXNG, punimTag) +4. **Phase 3–4** — unattended-upgrades, Ansible security plays, Mac hardening + --- ## How you should log in (not “ladmin → root” everywhere) @@ -402,11 +432,11 @@ Use `roles/ssh` UFW tasks or Proxmox guest firewall (`firewall=1` on `net0`). ## Tracking checklist -Copy into your issue tracker or tick in [security-audit-report.md](security-audit-report.md): +Also tracked in [security-audit-report.md](security-audit-report.md) remediation table. **Backup (Phase 0 — before everything)** -- [ ] Git commit / branch for ansible repo +- [ ] Git commit / branch for ansible repo (pre-hardening baseline) - [ ] PVE `sshd_config` backup on 201 + 10 - [ ] Proxmox CT snapshots (or vzdump) on critical LXCs - [ ] Audit outputs saved locally (`security-hardening-backup-*`) @@ -414,17 +444,18 @@ Copy into your issue tracker or tick in [security-audit-report.md](security-audi ### Critical -- [ ] pve201 SSH: prohibit-password + no passwords +- [ ] pve201 SSH: `PermitRootLogin prohibit-password` + `PasswordAuthentication no` - [ ] pve10 SSH: same - [ ] 8006 restricted to admin subnet/IP -- [ ] SSH keys on all inventory hosts -- [ ] pve201 RAM relieved +- [x] SSH keys on most inventory hosts (2026-05-23 — see audit report) +- [ ] SSH keys on **caddy**, **ansibleVM**, **vaultwardenVM** (if still pending) +- [x] pve201 RAM partial relief — GPU 64 GiB; DebianDesktop 24 GiB (**reboot guest**) ### High -- [ ] All running LXCs: PasswordAuthentication no +- [ ] All running LXCs: `PasswordAuthentication no` (after keys verified) - [ ] fail2ban on pve201 + pve10 -- [ ] Patch pve201, pve10, LXCs with 40+ upgrades +- [x] Patch fleet — `apt upgrade` 2026-05-23 (reboots still pending where required) - [ ] qBit / searchXNG / punimTag / vaultwarden port exposure reduced - [ ] pve10 ZFS + PBS investigated diff --git a/docs/guides/sso-selfhosted-matrix.md b/docs/guides/sso-selfhosted-matrix.md new file mode 100644 index 0000000..1dd0d6c --- /dev/null +++ b/docs/guides/sso-selfhosted-matrix.md @@ -0,0 +1,51 @@ +# Self-hosted SSO readiness (Authentik) + +Which apps can use Authentik OIDC/SAML without a paid app license. + +## Cal.com — blocked (commercial) + +**Status:** Deferred until a valid **self-hosted enterprise license** is in place. + +The Cal UI at `/settings/security/sso` shows *"This is a commercial feature"* when `CALCOM_LICENSE_KEY` is missing or invalid. On LXC 210, the key in `/opt/cal/.env` is currently **empty** (length 0), so SSO cannot be configured in-app. + +**If you want native Cal OIDC later:** + +1. Purchase / obtain a self-hosted license from [Cal.com](https://cal.com) (sales or existing license). +2. Set in `/opt/cal/.env`: + ```bash + CALCOM_LICENSE_KEY= + NEXT_PUBLIC_LICENSE_CONSENT=agree + ``` +3. `cd /opt/cal && docker compose up -d` (compose already passes these vars). +4. Complete [cal-authentik-oidc.md](cal-authentik-oidc.md) — Authentik app `cal-com` is already provisioned. + +**Workaround without paying Cal:** use **local Cal password** for admin; public booking at `cal.levkin.ca/ilia/consult` stays open. Optional later: **Caddy + Authentik forward-auth** only on `/settings/*` and `/auth/*` (does not integrate Cal’s “Login with SSO” button; more ops complexity). Not recommended until license path is ruled out. + +**Infra already done (harmless to keep):** `calsaml` DB, `SAML_*` env vars, Authentik provider `cal-com-oidc`. + +--- + +## Phase 4 order (no Cal license required) + +Wire these first — typical OSS OIDC, no extra license: + +| App | OIDC/SAML | Notes | +|-----|-----------|--------| +| **Vikunja** | OIDC native | **Live** — [vikunja-authentik-oidc.md](vikunja-authentik-oidc.md); group `homelab-users` | +| **Listmonk** | OIDC native | Admin-only | +| **Mattermost** | OIDC or SAML | Moderate | +| **Mailcow** | OIDC | Last — mail-critical | +| **Umami** | — | Already LAN-only; no SSO needed | +| **Vaultwarden** | — | **Do not OIDC** (break-glass) | +| **n8n** | OIDC (if enabled) | Check edition | +| **Immich** | OIDC | Phase 5; usually free in self-host | +| **Outline** | OIDC/SAML | Phase 8 | + +**Unlikely to need a commercial license** for homelab SSO on the list above; always check each app’s docs before assuming. + +--- + +## Related + +- [cal-authentik-oidc.md](cal-authentik-oidc.md) +- [levkin-selfhost-plan-2.md](levkin-selfhost-plan-2.md) diff --git a/docs/guides/vikunja-authentik-oidc.md b/docs/guides/vikunja-authentik-oidc.md new file mode 100644 index 0000000..0f1dd2f --- /dev/null +++ b/docs/guides/vikunja-authentik-oidc.md @@ -0,0 +1,51 @@ +# Vikunja ↔ Authentik OIDC + +**Status:** Live at `https://todo.levkin.ca` (host `vikunja`, `10.0.10.159`). + +## Authentik + +| Item | Value | +|------|--------| +| Application slug | `vikunja` | +| Redirect URI (strict) | `https://todo.levkin.ca/auth/openid/authentik` | +| Subject mode | **Based on the User's username** (`user_username`) | +| Access group | **`homelab-users`** (bind to app; policy engine **ANY**) | + +| Authentik user | Purpose | Email | +|----------------|---------|--------| +| **`admin`** | Authentik admin UI only | `admin@levkin.ca` | +| **`ilia`** | Homelab apps (Vikunja, etc.) | `idobkin@gmail.com` | + +**Do not use the same email on both users** — Authentik will pick the wrong account. + +`homelab-users` group = **`ilia`** only. Vikunja app binding: group `homelab-users` (policy engine **ANY**). + +Secrets: `vault_vikunja_oidc_client_id`, `vault_vikunja_oidc_client_secret` in Ansible vault. + +## Vikunja + +Config: `/opt/vikunja/config.yml` (mounted in `docker-compose.yml`). + +- `auth.openid.providers.authentik` → `authurl: https://auth.levkin.ca/application/o/vikunja/` +- `usernamefallback: true` + `emailfallback: true` → SSO links to existing local user **`ilia`** when Authentik username is `ilia`. + +Local auth stays enabled for break-glass. + +## Login + +1. Sign out: `https://auth.levkin.ca/if/user/logout/` +2. `https://todo.levkin.ca` → **Login with Authentik** +3. Sign in as **`ilia`** (username) or **`idobkin@gmail.com`** — **not** `admin` + +**My applications:** `admin` only sees apps allowed for superuser (e.g. Cal). **`ilia`** sees Vikunja after login. + +## Adding users + +1. **Directory → Users** — create user (username should match Vikunja local username if linking). +2. **Directory → Groups → homelab-users** — add user. +3. New Vikunja users: first OIDC login creates account; existing local users need matching username + fallbacks. + +## Related + +- [sso-selfhosted-matrix.md](sso-selfhosted-matrix.md) +- [Authentik Vikunja integration](https://integrations.goauthentik.io/chat-communication-collaboration/vikunja/) diff --git a/inventories/production/group_vars/all/vault.example.yml b/inventories/production/group_vars/all/vault.example.yml index a2c941a..bae88dc 100644 --- a/inventories/production/group_vars/all/vault.example.yml +++ b/inventories/production/group_vars/all/vault.example.yml @@ -22,6 +22,10 @@ vault_ssh_public_key: "ssh-ed25519 AAAA... you@example" # LXC create bootstrap password (often required by Proxmox) vault_lxc_root_password: "CHANGE_ME" +# Ansible become (sudo) for VMs that use ladmin/master instead of root SSH +vault_vaultwarden_become_password: "{{ vault_lxc_root_password }}" +vault_ansiblevm_become_password: "{{ vault_lxc_root_password }}" + # Mailcow API — System → Configuration → Access → API (read/write) vault_mailcow_api_key: "CHANGE_ME" # Per-mailbox passwords (make mailcow-mailbox MAILBOX=) @@ -44,6 +48,13 @@ vault_kuma_smtp_to: "idobkin@gmail.com" vault_umami_db_password: "CHANGE_ME" vault_umami_app_secret: "CHANGE_ME" +# Cal.com ↔ Authentik OIDC (make cal-oidc) +vault_cal_oidc_client_secret: "CHANGE_ME" + +# Vikunja ↔ Authentik OIDC +vault_vikunja_oidc_client_id: "CHANGE_ME" +vault_vikunja_oidc_client_secret: "CHANGE_ME" + # Hermes Mattermost (not Telegram) vault_mattermost_url: "https://slack.levkin.ca" vault_mattermost_token: "CHANGE_ME" diff --git a/inventories/production/group_vars/all/vault.yml b/inventories/production/group_vars/all/vault.yml index adc1b49..1b0f419 100644 --- a/inventories/production/group_vars/all/vault.yml +++ b/inventories/production/group_vars/all/vault.yml @@ -1,125 +1,142 @@ $ANSIBLE_VAULT;1.1;AES256 -38333966306139633330626334636166336434613661376233313731353237353562376237323166 -3736663161376133336431353334306533316337633662310a646533656261633333306433626564 -33626434353661343431316632643938663639356531336564653230353439316236343861643665 -6231393530333937340a333033366564393536613330373232373861666439316536336164306633 -31373433653531363636663262616535643137363039356534313462653232663663343464303938 -33633838373935333433653732656261633463653835393864353862346563303063656431343065 -61323331363032383365613734373165343530303230373237346162306361613461353939623934 -64303138383537386435653461356130356563653036343339333761303030393933393735616531 -33386462303037613263373036386332656563346539633131366636333163376162613231313337 -64336137373038636233346539616136343933343635353639633633616438333739303864376162 -36656639313966633234323738326435373935363166626664613561636637396166353961623262 -31333064306537376631656235636265313235643339353735373666316364616432336536303830 -39393136393864383035633462366637396438323838643337633361373132363365616333613431 -30326533366265303165653761333034656261363862353061383761363530666135373265623332 -33373538616433383835663139383065366433333939356366353635633834666362646465366130 -31636235613934313465646136623834343062353539653163373032326130303034653365653431 -63306635323431376562396236653966633833396262343664643562366235393961316564656565 -61356436313363376233376137303062656462363933643465616436353964373837383536306136 -35626163393638353261633030653164643063626463383133666137323333633463616138643931 -38346633653430303031643830363166363561346336646666343330303164336164333561386535 -66616661306133626164343166303362383262636331313465343434643262353862313438616462 -33383734626463616330666265636265623064326635633066656533306530376663653366613534 -36666337346238333137303931633631366236373236383932343763653637343434336462343662 -64313239313435353365383338376133386639326136636164386439306665663965353565333030 -65363139636134656333616335643435643038373832383134636666303536663236303231313030 -61616664373264663763343334303437643264396435373230333561323036363764383730373461 -61636661316330373732363835303039346438313133393862306138613634333334356633346232 -63666132303939656465356665323435326435333135303735346332613134633736333338653066 -31616532616537343735326232613235323364386636396531383333316633666338306635656565 -63316338343032346261343863623163353934653434363336643836353431643937393261393339 -61363562373533396631623830613431663262643631663637396663626466663634323037666662 -65663132393863333135663831386132646533353535326430323864396132343762623464643461 -35306330666635343362316239386463633161623664653063356561356166613332363432393730 -33646439663039653037383630356166323733373963353239643231326338633838623033633339 -66666630306130336632333736396335666437383164633466373534333334356261383538353363 -30623461333365633536663236363661323835356361363331653437613131303732643134343038 -32663338356462343535396534646263656331366265356532616234663966626138633031323866 -33346662336534323037353835333032633965326163623365643230666339363566353938623931 -33316539396538333433373236656339396165313930613331396135666236326231336563343063 -34646233336137323166663635323266613635343363636334353865343931616665613462613764 -30323865623164303333333166393963613535616563316531383231313239666337343961333938 -34663931343535333830333036646463356132613064663037323237366563656239343665653263 -32343535653037633931653565663166623736306166623363316632316236663534383938656564 -32633734373336383630663436373863343136663337306364326432663763326561363961623464 -31326263623935343933333739373038373838616432646533316230613762336236306338616163 -34333266316537646439343937366261303833363665373734386632613733313435336438343534 -61393363396261666265396361313063636334623765613564393736616461313438613234333661 -64383764653464373131326332656435343163613561623762663532643130666338633736393931 -34316535376235616533353831343537363533346331316332323439383837303631626261316564 -30383566363737643065356565346161376637646431633732636333373862653966323461356535 -64613964666135373038656364376334336631376261373338643737633266393761623837643730 -38316233626130383231623930346338306164653336643066656665356463313131343738316230 -66316133306134656330643532303538373661333161343133613266333465663534326231306461 -65653634373934323432303833353339356531313164346238623639373363393137336334306131 -39393463613032633533363236323730386133356135383030656261363761333765383831646238 -35386164353462646236306337393364323665316364626265363736316638353266626665393662 -38663137626361366334373033643864613664656631616532373935313031343633373631323533 -34656561396463336662313834653634306435616439336161323763313732313331663436633663 -35323961393133343566623937313064646532643638336163633538613465363138653161386238 -62333139336537656339333737363933346333633534396230356561303063626266666661366130 -38626338353336616161373334306165333930646563613436303233666563636462643435396233 -62323634393063616461653134353133323664346566663664383766313939653036303930633331 -66353762623338303530633463336533373634333734653430303139366637373130306561653264 -34333533666437343732363036356132313230323838373233636631336434313563336366316466 -63393633363461393164323063396238346262623136623639383963616662323137633139323766 -31303765323730303863376166386631643031306130396338376538373362323335643964303137 -62626131656262613437383036636438383262396533646163363365326134633834666236333335 -65633037626335376230303937366463376664363062366361663362373434656637636230623561 -39626634343761303030346365633333333039326364303762326461316361343231363932323336 -39623033303232316263323433366638393435336563636138343261636561356363366138653033 -62373731623461363135383037613065396264333966353436613466663931343033326363323138 -62306133613163633134626138663434356562633936346239373837336439653061613762626533 -38623366313464393631666330353738393538366537313637613732613532663339653637616633 -65623637373230333738343136393332376364316438633164306539336233373065396339373562 -31383163316231356538626333323533663863383339643363303334323833353164356662326530 -34653630663330663330323864333965303236313266393636333839643863666236646665633137 -33353038626562663266386161393331326636353862643233326231623063623463313231373862 -31333639626232306339373435386562663035303633383333653066643361643139356134633264 -33363832353735633462363761343138323234356530656136636236623365353531356337393234 -37363133333763643863373338616532666464336238363631636131313261326164313430363434 -62363730623464343532653431353266336262363262373933646234653563663535363133343634 -64663535363231353738303663626166383831383531363130373466633532356635313530383432 -63636462656236303033376637643462616230626163373832666337636263333866313466616563 -62613162363633353235363039366365396662383335386165373233633539616530363264653266 -62643138333631353138336366646632386563353431343737363265353065373834326432623265 -30663630323361353635613363633032386465623139376630653038376536616462326134343363 -37643638323731313065653931663739306134323861313538313965636632653064393033376231 -36663666633836646636376166356361633961626466383030656162363362396566333832393439 -62306265386638333138363764646331643136636566343736613862343233303461633661643832 -35653839303039383233373532643632353964343365396131393933636537656334316466313531 -36633364643230336161316639313130316131663663393966333162373632386635393130313263 -64656439663135373265383732316435346135376563356630316662333664353564333038313730 -66346131396132366632306633656334376334653038646535383135636665396362343238346663 -36643132666434633730653431346265353662613265326230653333396239626633346633343231 -38303739303665343933633439623131333632383432343962653130396666373164633431653663 -35353264653833306163646164376234666364363766336564346332393831336537663936346433 -37346438353835353736316530323336336334376133663834363161326563353966356534333830 -64656164356661343462646536366234323062323164636434333863346337303661366164646562 -64383666343339346332643832616266346439353863616138613965373764333261356331316466 -62643939643461363238386463346638373630333437633737636630666161323461616539306634 -64646666626461306563393830396661313636633332396132363961373038386566646230323739 -62373064323761316135613538663132316365633339356664316365383234303635663435363239 -34336236663435643563376130396535623137333466363536393031303139356565313766656432 -64313365383631383034313831393462666437663733633165643230663539613630643264376631 -66653861313639666235613034633935633836656638643764343639373931366332373837343765 -61313765326362303963666165373364663664313631373136623437343837396165313930636165 -39323030303839333036393432383731303030643430643766383662366335386230623163303733 -37303232346534333433626330343637313534363562653133383966356538396638663762326530 -35336166393763626466323863663137386531356436306530323738373365643635613231636564 -62333839336137353833353036323533333163663331663033633938633533626637653538613038 -38613539303534366437633135616631303261643135616436653664326132356636653931306564 -62616434353733303863376361356465613531306534376333613261323764303137306266636434 -64363238633736643361393730626666656664333233616361643834373239623230303533343935 -31343362333735386338643433613333613736323639646562323437313733303331396136383762 -31663137386431386630343666663139363736313731323930313539313939623832313864386637 -66316531343238303936323234653033303666333233323334623837653665353565666335323638 -37363466373363333362656563383066366434306262323363336533356531363861356162326162 -66316135653963323765343934306630633132353036346536613663386339393632393764303530 -62333330306136346265306237393435353430313635393339363038313137623663316331656539 -31396361623230326433393239626536636437623737363131653363646237656165346463643338 -35306536376634336264643564346163373233666330393630633339346533653963346630396139 -36363430303866616334666631653732306230626238653463626132666638643938623030373538 -32353062626562396134393230386562346163643531376630616161646633333131383437386330 -34393665646530306663 +62616334383737633962313839313235653935663832623061333532616566343565626437376230 +3333393831623434663736656331303462626534626265380a356135653866666438373838663137 +61373962356364306365323933386262613837333364356564383163383638363430323230393430 +3032346238343264340a636539663735396335313135363330373536353562666537653764643637 +36663437366166616437303738646466656331313266653431303462366532616639323136346137 +66663932346561333535303438623734643864613330396331626161616265393731633365393930 +37326565363931386532623432343339656534393032663634353961306330303737313765333330 +65316436383030666564663537323937666634343966653562353434333537366338393838333666 +30356339353732623932393665663237343630303533363232336263323732376461353338663831 +62666365333330353361373732306436623637623932636235393434323339663266396631346237 +63393762643338346563643637666135336139336461333537373137626464613339373937383830 +39643039363234346134663062373130343230663839613234373838393434373532313732656332 +63373739616163666361666330393866396331616136383565383763303563323261323330313832 +64386661383838366336633335323431356133366162373464313533653734613366623537646636 +65323862376466343530303439396639616135373030613638363630313264623337653233636532 +38383664613337303565336136333434613638663239393234656534353264623166333837376436 +36633837613339613161363764383538303363323232346636313862393930343333633131383833 +65316166363062363330373734323232366136653030336439343932613337623662383236663834 +66303137353438373661633537633333633733666663393435383436396634393739383039383139 +32653438303134326663653164633039653435643766616637313433623463366531633962613434 +33396262333739643865346465363862303337356239663337356330363232383331346435393930 +64306633363064656566346662363433313434376631663032343635656463313530626635623930 +65383434663064666535613561313265616436326533313336303836386635343134626361343566 +36653233656337613838323164376666656338383337633065393237373737623934626265343133 +38393763316132373234623735353731656261643736353562616361643033303064393962343239 +32623363653466363565323436643639643934663530646333356532363463363564363862373232 +38396535393034653565643236363733393032306335363934623462386639363961306265646636 +32383738653633613732313030626135353366626537646263303634323539343866363033643337 +66396235323461666131643030353164616265623635636438363738653233363435353761366531 +66623033656331386138623864363461333933653636653566303733616137303030663430643535 +64386534346463393638613764353966343837333235623262343164326564616138353731363663 +39666634323663373831326664326337656164323738326335663734373538303135653861393362 +65303865366235333538623330373032306661386436323530336631616639366636376135303537 +31373634636561356239366437623637363735653633316634353862666139303565393533643864 +64656335356236353232303135616265666266376634313437633236666461343233333732323832 +61313230393162383163336634303066613664376338633964643431346335616533396466393736 +31383862666365633665623766643665623361376565386531323234303236393162356331346535 +65353231316531326438343237633133393361336366353232623866393138376232643133326161 +63333236626237613536323964356435383933646264656137623632343665393530343463343230 +61343464383230366339616439343762626435303832393462666463363030383365343938666264 +33636437333266656130633365666162316366616262386436333861373533343433356633356630 +31643666626262386535626233653337303861666666653366643361363164353430643561613532 +32373239373038306533393464373365323638653630306630363931623931336663666339356464 +64646634356437326435656163306562346530363435336138353330356162333431353466313763 +64666538666332653762633064653664663531373638393530653034323864383938346631303165 +62343163636366633161383464626639633638323363306139626632343836646135346332393235 +62376536316164636631626639656533323337366335616534356538386266343436613530653131 +36383733373637303864636334633237663331623663663562613261393736323137373130613537 +61353431396139663861366639616631613064323230366131373666373964393738623936393431 +62366530623938373836636265393233663661663664613430366237396637366561616433333463 +64623335303834376432383361396433373537633066333937663633663433333339343262363338 +38633532366334313164346236646665663363623065666331613961653639313563316563383231 +64343834373066316233336465366634306537303666383831306237396362366663343430353162 +35643638653234396134653638653663333765313236313764383835343431303134383537313237 +35626563376163643336623534633236313363383062373437666536306462383632626332643430 +39326661633134393465363333356136323361363831363961646230393561663838313935386432 +66653430613231343731623630313362366138613465373631653632303139636438656439633361 +31326262313431363536633434346431626336623139333235363338626435666439616433323931 +30386238663931393066353237616537366434363536306163613931306138653364623663666438 +63396331313438623662393532333834376237343462313263626139366133353131313164613861 +38313632386336646362313634633938383963306339383362633236653235353061626337353936 +32626464343166323438616637346661633861396264633365386638666538333932633530306139 +64393132613562353835323162346532313262353266366230393839323462626362353533323834 +35393261373039336537623339613463613335363362353438623837376631646233653362383636 +31613261323361623934653939613661333836666637383534643137346261353333303861363665 +62386237646661626536363034313833363965373562316334336232643164633436333261303730 +39386233646162323365393034663137636462316432333335313366363933633065323264646136 +64326338303766613230393539626430326263646631646536623436643734376237373031316466 +62306136373465633130653564633233356331313761366333623363646666313365623563346334 +31666535346461336630626466616664363330626338333961386239333663326536316266346634 +33323064333161313232343239346439623633346161346465313532383061376137323839666365 +64376132306338663565623531623136663436333730366563313261626661373233393438646561 +61396562666533316635363432306139366430333837333866346436306135333862663734306164 +38313766666230393861323632316231343362656136366338336564623431373662333366323833 +33613232326230643530356137646635623030316663343466666666333734636230346263353365 +65363637356635666638613566613131383864316465666536336333393334653436666261393461 +64376639303632626165613361346636303064333532613064303032643562396262623632396539 +61623333643630616536393163623330353164383864623064383732633733353630323534663732 +36383133633066393263363533616334653933336235333938663132366334326234386264386531 +62373466393234666563613637313136663764376239666434383038613932376532653531613164 +34613834313532383165336634613536636437393638653964393831393533303630333933636464 +31613634346235396331386534303636313066396361393138393635633134353035613863656364 +63363030376662356636333566373063613433373330383139396530316163303633656438326333 +65353435613561303539326538613261393339616537373136313030656133323766396464646634 +34363061366166303465616133663835323232373763336634386231396230383965306164393731 +39633333353936666361656530363665383039626533333035373663326537373538633864626366 +64613435663834666137326335333736376466356236353637333262373834336131393733646138 +31363630626432643061663538626230356637373863643866326530373962393065393464663466 +37326165653235653166386561363339353331663164326639616135663736316363336531333439 +35363033343934323063613133326264313665613363386464303662633333646330373637636366 +37396562303164636261323633373538323835623235396161303964373735356538393431303031 +64663636326364386266306434343361353439616533303632363165376639313635663637623263 +37376233323233663364393439663137396265646230613631383039316230356539316130353062 +33303732323063633738666636623737366631336164396637396533656364316333616536336632 +34303963623031353137626331623031326136373538633336633835623337303831616365323066 +35333931393136393965623135626363393335306363366639323034633064663035613566313037 +38616234666131343064633561326466326365643863653664623932333734643332383963356665 +64326435643333333435636665383165386364663134613564386639346566353831343239646239 +35376338386631646236303031303665336166643437316131626438646237663331306438666130 +32323539393431303039393964363161633461303136616430666539393162633464623436656638 +61363736363665633965656362643432376266393531333539633737343165313562616133366131 +34346266323931363137303463666363336163373839306533393831323262313861393333643336 +37626239366432393461613630366636366631353237396461663566333935343037336438626262 +33306264613065373638373634303262626338386236386533616563633131366665663738353837 +33333936306266633965613338393662656161613465396163653438306463303138656536366531 +66343634306332313561386531373663343535343232646162396361626666633034663133613364 +65323536346264636164616463626535353261396362633736376531666334346537666562363339 +39653430386565313731346230653632613830396165313561333865333234656532383339313065 +30663565393030343134383536336335616537333336396232333839373533353161623264626434 +61393334316331613739666434653839353933336332396536313937323939646264313133373863 +66643138656661336264646338376232396138616465373562393063333336343036326632306662 +66303836326636663264633334356533613066383935316635313236633631376633613535303830 +33646566346661346539633638363135343939653363623232313864613132393235643961633566 +34653464303430313466326463346563363964363666623665633265356138336133616261333839 +37343036363065613766366565343765306663623037383933323230646566333935306564343039 +30343730633135643338366262376365326561353538346433636336633866393565326334326431 +64613136353139316331343333643564343534643931313164323934373465386437376637613838 +65333237386462666262326663316639393961363033656233356330666634366633373336326531 +33303535323036663837363537366436653930353637353962393464373361323166663031343532 +37383735613334316434356232343466373539666562326430656538653634323361363236313030 +34313537633433666333356661383838663861613765383564633835333437363330616163316432 +30323762656230323035663139323363346235633337346637663632383762393363396632613631 +35303161383263613164303535633063346432643563363436306665613738346338666336646530 +36383639353032636133353438396362333763623164376338653564616465303538646432353763 +66663262636661363465326463326639613431643065623966373630323161356565326362646635 +32633335303339633232396166393235643462356565323236356539653033363663333262386235 +36316432386165366530323737353862393263343063343138343334343966313838336639646463 +36303137323961626561343238323634373830323161303365306465373036323262663835376630 +34376662356238643939613536383432393464656530326530333262356162623531636364363662 +65613166383563333237376135656362306362366434346565366235626532623964303661626632 +61646462633533663830613436633937336364643562653362616464636130343264666233333932 +37323736316539636336633163643166333231376464656462666364303761313962366635663336 +31353738396532616137333033313362393830663434323236313031623863643735323838646561 +34633065623764333734353166323234633538363230633865353764333663613239306664386232 +65306661333939336634343535393261326335663163663431633630373936336465623634376362 +31393231313435306564333234633938353336366239646637366162343065366261303538613962 +36323065663362383538633536393161653332383035336236363364373133326366366130626135 +34346237366338663962643966613363336165633765663137653930323731393235616137613364 +37623462396333376263326364363166613831396161393933623532346637326262616434636265 +64323336626663303131323331376330393232666233626662363264616533646462323233333633 +3535 diff --git a/inventories/production/group_vars/ansible/main.yml b/inventories/production/group_vars/ansible/main.yml new file mode 100644 index 0000000..dda870c --- /dev/null +++ b/inventories/production/group_vars/ansible/main.yml @@ -0,0 +1,2 @@ +--- +maintenance_cron_enable_system: true diff --git a/inventories/production/group_vars/comms/maintenance_cron.yml b/inventories/production/group_vars/comms/maintenance_cron.yml new file mode 100644 index 0000000..dda870c --- /dev/null +++ b/inventories/production/group_vars/comms/maintenance_cron.yml @@ -0,0 +1,2 @@ +--- +maintenance_cron_enable_system: true diff --git a/inventories/production/group_vars/proxmox/main.yml b/inventories/production/group_vars/proxmox/main.yml new file mode 100644 index 0000000..90d30a4 --- /dev/null +++ b/inventories/production/group_vars/proxmox/main.yml @@ -0,0 +1,4 @@ +--- +# Tier 1 maintenance cron — hypervisors (journal + apt) +maintenance_cron_enable_system: true +maintenance_cron_enable_docker: false diff --git a/inventories/production/group_vars/services/maintenance_cron.yml b/inventories/production/group_vars/services/maintenance_cron.yml new file mode 100644 index 0000000..bde0b2d --- /dev/null +++ b/inventories/production/group_vars/services/maintenance_cron.yml @@ -0,0 +1,4 @@ +--- +# Tier 2 — Docker weekly prune (identity, monitoring, vaultwarden) +maintenance_cron_enable_system: true +maintenance_cron_enable_docker: true diff --git a/inventories/production/group_vars/sites/maintenance_cron.yml b/inventories/production/group_vars/sites/maintenance_cron.yml new file mode 100644 index 0000000..dda870c --- /dev/null +++ b/inventories/production/group_vars/sites/maintenance_cron.yml @@ -0,0 +1,2 @@ +--- +maintenance_cron_enable_system: true diff --git a/inventories/production/host_vars/ansibleVM.yml b/inventories/production/host_vars/ansibleVM.yml index ab159d5..87cd101 100644 --- a/inventories/production/host_vars/ansibleVM.yml +++ b/inventories/production/host_vars/ansibleVM.yml @@ -1,8 +1,9 @@ --- -$ANSIBLE_VAULT;1.1;AES256 -31306264346663636630656534303766666564333866326139336137383339633338323834653266 -6132333337363566623265303037336266646238633036390a663432623861363562386561393264 -63303565633530383634643538323165383461656539613331386135336265653531336266613865 -3833376664366239650a313134653238323437633265373463326231346663366434323733663666 -38353061373437306431383132333233663639643134363464396163333962373033363661623666 -3430633863623962366430613962346264356461373539376263 +# ansibleVM (control @ 10.0.10.157) — plain vars; secrets in group_vars/all/vault.yml +# Previous fully-encrypted host_vars file moved to ansibleVM.yml.vault-bak (broken for Ansible merge). + +ansible_become: true +ansible_become_method: sudo +ansible_become_password: "{{ vault_ansiblevm_become_password }}" + +maintenance_cron_enable_system: true diff --git a/inventories/production/host_vars/cal.yml b/inventories/production/host_vars/cal.yml new file mode 100644 index 0000000..a063eee --- /dev/null +++ b/inventories/production/host_vars/cal.yml @@ -0,0 +1,9 @@ +--- +# Cal.com LXC 210 @ 10.0.10.228 — business / scheduling +cal_public_url: https://cal.levkin.ca +cal_saml_admins: idobkin@gmail.com +cal_saml_db_name: calsaml +cal_authentik_app_slug: cal-com +cal_authentik_provider_name: cal-com-oidc +cal_authentik_host: https://auth.levkin.ca +cal_oidc_client_id: cal-com diff --git a/inventories/production/host_vars/dev02.yml b/inventories/production/host_vars/dev02.yml deleted file mode 100644 index 8c3d9b4..0000000 --- a/inventories/production/host_vars/dev02.yml +++ /dev/null @@ -1,16 +0,0 @@ ---- -# Host variables for dev02 - -# Use ladmin user with sudo to become root -ansible_become: true -ansible_become_method: sudo -ansible_become_password: "{{ vault_dev02_become_password }}" - -# Configure shell for ladmin -shell_users: - - ladmin - -# Skip data science stack -install_conda: false -install_jupyter: false -install_r: false diff --git a/inventories/production/host_vars/git-ci-01.yml b/inventories/production/host_vars/git-ci-01.yml index c34b91c..a2e8aa7 100644 --- a/inventories/production/host_vars/git-ci-01.yml +++ b/inventories/production/host_vars/git-ci-01.yml @@ -8,7 +8,7 @@ ansible_become_method: sudo # Maintenance: /etc/cron.weekly/docker-prune-ci (docker system prune -af --filter until=168h) # # Capacity notes (2026-05-23): -# - pve201: ~3 GB RAM free (125 Gi total, heavily overcommitted — GPU VM 104 @ 72 Gi) +# - pve201: VM 104 reduced to 64 GiB (2026-05-23); still tight — consider runner on pve10 # - capacity 3 needs ~8–12 GB RAM on this VM → migrate runner to pve10 or add RAM after freeing pve201 # - 12 repos: capacity 2 on one runner is OK; second runner on pve10 if queues stack up @@ -16,3 +16,7 @@ git_ci_runner_capacity: 2 git_ci_disk_gb: 64 git_ci_proxmox_vmid: 115 git_ci_proxmox_node: pve201 + +maintenance_cron_enable_system: true +maintenance_cron_enable_docker: true +maintenance_cron_docker_script: /etc/cron.weekly/docker-prune-ci diff --git a/inventories/production/host_vars/giteaVM/maintenance_cron.yml b/inventories/production/host_vars/giteaVM/maintenance_cron.yml new file mode 100644 index 0000000..1e04993 --- /dev/null +++ b/inventories/production/host_vars/giteaVM/maintenance_cron.yml @@ -0,0 +1,7 @@ +--- +# giteaVM — Gitea on Alpine (Proxmox VM 102 @ 10.0.10.169) +# Alpine uses /etc/periodic/weekly (not cron.weekly); no apt for system-maintenance. +maintenance_cron_enable_system: false +maintenance_cron_enable_docker: false +maintenance_cron_enable_gitea_archive: true +maintenance_cron_gitea_archive_script: /etc/periodic/weekly/gitea-archive-prune diff --git a/inventories/production/host_vars/giteaVM.yml b/inventories/production/host_vars/giteaVM/vault.yml similarity index 100% rename from inventories/production/host_vars/giteaVM.yml rename to inventories/production/host_vars/giteaVM/vault.yml diff --git a/inventories/production/host_vars/identity.yml b/inventories/production/host_vars/identity.yml new file mode 100644 index 0000000..e5d78f5 --- /dev/null +++ b/inventories/production/host_vars/identity.yml @@ -0,0 +1,3 @@ +--- +maintenance_cron_enable_system: true +maintenance_cron_enable_docker: true diff --git a/inventories/production/host_vars/listmonk.yml b/inventories/production/host_vars/listmonk.yml index 3197c3b..ddc564a 100644 --- a/inventories/production/host_vars/listmonk.yml +++ b/inventories/production/host_vars/listmonk.yml @@ -1,8 +1,3 @@ --- -$ANSIBLE_VAULT;1.1;AES256 -31316663336338303832323464623866343366313261653536623233303466636630633235643638 -3666646431323061313836333233356162643462323763380a623666663062386337393439653134 -61616135353966333639323031643263646231636332613935353234363134356435646266343866 -3034653235393636350a626362333764313732646663653838313233326438646330393336346539 -30393364323237396633343133616439393563326161636366613965366161656364343939313334 -3430306634396361353238643735363430383433323431393230 +# listmonk VM on pve201 — plain vars; secrets in vault +# Previous fully-encrypted host_vars file moved to listmonk.yml.vault-bak (broken for Ansible merge). diff --git a/inventories/production/host_vars/monitoring.yml b/inventories/production/host_vars/monitoring.yml new file mode 100644 index 0000000..e5d78f5 --- /dev/null +++ b/inventories/production/host_vars/monitoring.yml @@ -0,0 +1,3 @@ +--- +maintenance_cron_enable_system: true +maintenance_cron_enable_docker: true diff --git a/inventories/production/host_vars/vaultwardenVM.yml b/inventories/production/host_vars/vaultwardenVM.yml index e6197e2..965bcc1 100644 --- a/inventories/production/host_vars/vaultwardenVM.yml +++ b/inventories/production/host_vars/vaultwardenVM.yml @@ -1,8 +1,8 @@ --- -$ANSIBLE_VAULT;1.1;AES256 -35633833353965363964376161393730613065663236326239376562356231316166656131366263 -6263363436373965316339623139353830643062393165370a643138356561613537616431316534 -63386635363838626465396439303664316635633239653639646338393130666164653262316135 -3937376464303935620a343530333030643830383130646532613533336435383334373831343261 -37653138613132616165636132623037623033343265663734626536366361373130353139383634 -6664346538653965343263376538636336393164356434646264 +# vaultwarden VM 104 on pve10 @ 10.0.10.142 (ladmin + sudo) +ansible_become: true +ansible_become_method: sudo +ansible_become_password: "{{ vault_vaultwarden_become_password }}" + +maintenance_cron_enable_system: true +maintenance_cron_enable_docker: true diff --git a/inventories/production/host_vars/vikunja.yml b/inventories/production/host_vars/vikunja.yml new file mode 100644 index 0000000..e5d78f5 --- /dev/null +++ b/inventories/production/host_vars/vikunja.yml @@ -0,0 +1,3 @@ +--- +maintenance_cron_enable_system: true +maintenance_cron_enable_docker: true diff --git a/inventories/production/hosts b/inventories/production/hosts index f4c6833..7669b0f 100644 --- a/inventories/production/hosts +++ b/inventories/production/hosts @@ -22,7 +22,7 @@ portfolio ansible_host=10.0.10.106 ansible_user=root url=https://iliadobkin.com dev01 ansible_host=10.0.30.105 ansible_user=ladmin bottom ansible_host=10.0.10.156 ansible_user=beast debianDesktopVM ansible_host=10.0.10.206 ansible_user=user skip_reboot=true -devGPU ansible_host=10.0.30.63 ansible_user=root +devGPU ansible_host=10.0.10.122 ansible_user=root proxmox_vmid=104 proxmox_node=pve201 # GPU-Dev-Debian, Ollama + RTX 4080 [qa] git-ci-01 ansible_host=10.0.10.223 ansible_user=ladmin @@ -41,7 +41,7 @@ listmonk ansible_host=10.0.10.148 ansible_user=root url=https://listmonk.levkin. [services] # VMID 117: on PVENAS (pve10) hermes ansible_host=10.0.10.36 ansible_user=ladmin url=https://hermes.levkin.ca proxmox_vmid=117 proxmox_node=PVENAS -caddy ansible_host=10.0.10.50 ansible_user=ladmin proxmox_vmid=106 proxmox_node=PVENAS +caddy ansible_host=10.0.10.50 ansible_user=root proxmox_vmid=106 proxmox_node=PVENAS cal ansible_host=10.0.10.228 ansible_user=root url=https://cal.levkin.ca proxmox_vmid=210 proxmox_node=PVENAS identity ansible_host=10.0.10.21 ansible_user=root url=https://auth.levkin.ca proxmox_vmid=217 proxmox_node=PVENAS monitoring ansible_host=10.0.10.22 ansible_user=root url=http://10.0.10.22:3001 proxmox_vmid=218 proxmox_node=PVENAS uptime_kuma_port=3001 dockge_port=5001 umami_port=3000 @@ -49,7 +49,7 @@ giteaVM ansible_host=10.0.10.169 ansible_user=root url=https://git.levkin.ca pro n8n ansible_host=10.0.10.154 ansible_user=root url=https://n8n.levkin.ca proxmox_vmid=103 proxmox_node=PVENAS vaultwardenVM ansible_host=10.0.10.142 ansible_user=ladmin url=https://vault.levkin.ca proxmox_vmid=104 proxmox_node=PVENAS actual ansible_host=10.0.10.158 ansible_user=root url=https://budget.levkin.ca proxmox_vmid=108 proxmox_node=PVENAS -vikanjans ansible_host=10.0.10.159 ansible_user=root url=https://todo.levkin.ca +vikunja ansible_host=10.0.10.159 ansible_user=root url=https://todo.levkin.ca proxmox_vmid=301 proxmox_node=pve201 qBittorrent ansible_host=10.0.10.91 ansible_user=root port=8080 jellyfin ansible_host=10.0.10.232 ansible_user=root url=https://jelly.levkin.ca proxmox_vmid=101 proxmox_node=PVENAS # stopped until NAS pool healthy diff --git a/playbooks/cal-authentik-oidc.yml b/playbooks/cal-authentik-oidc.yml new file mode 100644 index 0000000..6f3f39f --- /dev/null +++ b/playbooks/cal-authentik-oidc.yml @@ -0,0 +1,80 @@ +--- +# Playbook: cal-authentik-oidc +# Purpose: Enable Cal.com SSO (SAML DB + license env) and Authentik OIDC provider +# Targets: cal (LXC 210), identity (LXC 217) +# Usage: make cal-oidc +# Manual: https://cal.levkin.ca/settings/security/sso — enter Client ID, Secret, Well Known URL + +- name: Prepare OIDC client secret + hosts: localhost + gather_facts: false + tasks: + - name: Use vault OIDC secret or generate one for this run + ansible.builtin.set_fact: + cal_oidc_client_secret_effective: >- + {{ vault_cal_oidc_client_secret + | default(lookup('password', '/dev/null length=48 chars=ascii_letters,digits')) }} + no_log: true + + - name: Remind to persist generated secret in vault + ansible.builtin.debug: + msg: >- + vault_cal_oidc_client_secret was not set — generated for this run only. + Add it to vault.yml and re-run so Authentik and Cal stay in sync. + when: vault_cal_oidc_client_secret is not defined or vault_cal_oidc_client_secret | length == 0 + +- name: Cal.com — SAML database and compose SSO env + hosts: cal + become: true + vars: + vault_cal_oidc_client_secret: "{{ hostvars['localhost']['cal_oidc_client_secret_effective'] }}" + pre_tasks: + - name: Load Cal Postgres credentials from .env + ansible.builtin.shell: | + set -a + source {{ cal_compose_dir }}/.env + printf 'user=%s\npass=%s\n' "$POSTGRES_USER" "$POSTGRES_PASSWORD" + args: + executable: /bin/bash + register: cal_pg_creds + changed_when: false + no_log: true + + - name: Set Cal database facts + ansible.builtin.set_fact: + cal_postgres_user: "{{ cal_pg_creds.stdout_lines[0] | regex_replace('^user=', '') }}" + cal_postgres_password: "{{ cal_pg_creds.stdout_lines[1] | regex_replace('^pass=', '') }}" + cal_saml_database_url: >- + postgresql://{{ cal_pg_creds.stdout_lines[0] | regex_replace('^user=', '') }}:{{ + cal_pg_creds.stdout_lines[1] | regex_replace('^pass=', '') }}@db:5432/{{ cal_saml_db_name }} + no_log: true + + roles: + - role: cal_sso + +- name: Authentik — Cal.com OIDC provider + hosts: identity + become: true + vars: + vault_cal_oidc_client_secret: "{{ hostvars['localhost']['cal_oidc_client_secret_effective'] }}" + tasks: + - name: Authentik OIDC for Cal.com + ansible.builtin.import_role: + name: cal_sso + tasks_from: authentik.yml + +- name: Cal.com OIDC — finish in UI + hosts: cal + gather_facts: false + tasks: + - name: Print Cal.com SSO configuration values + ansible.builtin.debug: + msg: + - "1. Log in to Cal as {{ cal_saml_admins }}" + - "2. Open {{ cal_public_url }}/settings/security/sso" + - "3. Configure OIDC:" + - " Client ID: {{ cal_oidc_client_id }}" + - " Client Secret: (vault_cal_oidc_client_secret — see vault)" + - " Well Known URL: {{ cal_authentik_host }}/application/o/{{ cal_authentik_app_slug }}/.well-known/openid-configuration" + - "4. Test SSO login; keep local password as break-glass" + run_once: true diff --git a/playbooks/maintenance.yml b/playbooks/maintenance.yml index 37dda31..b5f795a 100644 --- a/playbooks/maintenance.yml +++ b/playbooks/maintenance.yml @@ -24,6 +24,7 @@ roles: - {role: maintenance, tags: ['maintenance']} + - {role: maintenance_cron, tags: ['maintenance', 'maintenance_cron']} post_tasks: - name: Display maintenance completion diff --git a/roles/cal_sso/defaults/main.yml b/roles/cal_sso/defaults/main.yml new file mode 100644 index 0000000..809d918 --- /dev/null +++ b/roles/cal_sso/defaults/main.yml @@ -0,0 +1,10 @@ +--- +cal_compose_dir: /opt/cal +cal_saml_db_name: calsaml +cal_saml_admins: idobkin@gmail.com +cal_public_url: https://cal.levkin.ca +cal_authentik_app_slug: cal-com +cal_authentik_provider_name: cal-com-oidc +cal_authentik_host: https://auth.levkin.ca +# Set in vault: vault_cal_oidc_client_secret (generated on first run if absent) +cal_oidc_client_id: "{{ cal_authentik_app_slug }}" diff --git a/roles/cal_sso/handlers/main.yml b/roles/cal_sso/handlers/main.yml new file mode 100644 index 0000000..f9f3d84 --- /dev/null +++ b/roles/cal_sso/handlers/main.yml @@ -0,0 +1,20 @@ +--- +- name: Recreate calcom stack + ansible.builtin.command: + cmd: docker compose up -d + chdir: "{{ cal_compose_dir }}" + changed_when: true + +- name: Recreate authentik server + ansible.builtin.command: + cmd: docker compose up -d server worker + chdir: /opt/authentik + changed_when: true + +- name: Apply authentik cal blueprint + ansible.builtin.command: + cmd: >- + docker compose exec -T server + ak apply_blueprint {{ cal_authentik_app_slug }}-oidc.yaml + chdir: /opt/authentik + changed_when: true diff --git a/roles/cal_sso/tasks/authentik.yml b/roles/cal_sso/tasks/authentik.yml new file mode 100644 index 0000000..9e71113 --- /dev/null +++ b/roles/cal_sso/tasks/authentik.yml @@ -0,0 +1,25 @@ +--- +- name: Ensure Authentik blueprints directory on host + ansible.builtin.file: + path: /opt/authentik/blueprints + state: directory + mode: "0755" + +- name: Add blueprints volume to Authentik server service + ansible.builtin.replace: + path: /opt/authentik/compose.yml + regexp: '(?ms)( server:.*? volumes:\n - \./data:/data\n)( - \./custom-templates:/templates)' + replace: '\1 - ./blueprints:/blueprints\n\2' + notify: + - Recreate authentik server + - Apply authentik cal blueprint + +- name: Deploy Cal.com OIDC blueprint + ansible.builtin.template: + src: authentik-cal-oidc.yaml.j2 + dest: "/opt/authentik/blueprints/{{ cal_authentik_app_slug }}-oidc.yaml" + mode: "0644" + notify: Apply authentik cal blueprint + +- name: Flush Authentik blueprint handler + ansible.builtin.meta: flush_handlers diff --git a/roles/cal_sso/tasks/main.yml b/roles/cal_sso/tasks/main.yml new file mode 100644 index 0000000..b00f816 --- /dev/null +++ b/roles/cal_sso/tasks/main.yml @@ -0,0 +1,52 @@ +--- +- name: Ensure SAML database exists on Cal Postgres + ansible.builtin.command: + cmd: >- + docker exec cal-db psql -U {{ cal_postgres_user }} -tc + "SELECT 1 FROM pg_database WHERE datname='{{ cal_saml_db_name }}'" + register: cal_saml_db_check + changed_when: false + failed_when: cal_saml_db_check.rc != 0 + +- name: Create SAML database + ansible.builtin.command: + cmd: >- + docker exec cal-db psql -U {{ cal_postgres_user }} -c + "CREATE DATABASE {{ cal_saml_db_name }}" + when: cal_saml_db_check.stdout | trim != "1" + changed_when: true + +- name: Deploy docker-compose with SSO environment + ansible.builtin.template: + src: docker-compose.yml.j2 + dest: "{{ cal_compose_dir }}/docker-compose.yml" + owner: root + group: root + mode: "0644" + notify: Recreate calcom stack + +- name: Ensure SAML env vars in Cal .env + ansible.builtin.lineinfile: + path: "{{ cal_compose_dir }}/.env" + regexp: "^{{ item.key }}=" + line: "{{ item.key }}={{ item.value }}" + create: false + no_log: true + loop: + - key: SAML_DATABASE_URL + value: "{{ cal_saml_database_url }}" + - key: SAML_ADMINS + value: "{{ cal_saml_admins }}" + notify: Recreate calcom stack + +- name: Flush handlers before OIDC UI step + ansible.builtin.meta: flush_handlers + +- name: Wait for Cal.com HTTP after stack recreate + ansible.builtin.uri: + url: "{{ cal_public_url }}/api/version" + status_code: [200, 404] + register: cal_http + retries: 12 + delay: 10 + until: cal_http.status in [200, 404] diff --git a/roles/cal_sso/templates/authentik-cal-oidc.yaml.j2 b/roles/cal_sso/templates/authentik-cal-oidc.yaml.j2 new file mode 100644 index 0000000..9012954 --- /dev/null +++ b/roles/cal_sso/templates/authentik-cal-oidc.yaml.j2 @@ -0,0 +1,38 @@ +# Cal.com OIDC provider + application (managed by Ansible) +version: 1 +metadata: + name: Cal.com OIDC + labels: + blueprints.goauthentik.io/instantiate: "true" +entries: + - model: authentik_providers_oauth2.oauth2provider + id: cal-oidc-provider + identifiers: + name: {{ cal_authentik_provider_name }} + attrs: + name: {{ cal_authentik_provider_name }} + authorization_flow: !Find [authentik_flows.flow, [slug, default-provider-authorization-implicit-consent]] + invalidation_flow: !Find [authentik_flows.flow, [slug, default-provider-invalidation-flow]] + client_type: confidential + client_id: {{ cal_oidc_client_id }} + client_secret: {{ vault_cal_oidc_client_secret }} + redirect_uris: + - matching_mode: strict + url: {{ cal_public_url }}/api/auth/oidc + signing_key: !Find [authentik_crypto.certificatekeypair, [name, authentik Self-signed Certificate]] + property_mappings: + - !Find [authentik_providers_oauth2.scopemapping, [scope_name, openid]] + - !Find [authentik_providers_oauth2.scopemapping, [scope_name, email]] + - !Find [authentik_providers_oauth2.scopemapping, [scope_name, profile]] + - model: authentik_core.application + id: cal-oidc-app + identifiers: + slug: {{ cal_authentik_app_slug }} + attrs: + name: Cal.com + slug: {{ cal_authentik_app_slug }} + group: "" + provider: !KeyOf cal-oidc-provider + policy_engine_mode: any + meta_launch_url: {{ cal_public_url }} + meta_icon: https://cal.com/favicon.ico diff --git a/roles/cal_sso/templates/docker-compose.yml.j2 b/roles/cal_sso/templates/docker-compose.yml.j2 new file mode 100644 index 0000000..8081329 --- /dev/null +++ b/roles/cal_sso/templates/docker-compose.yml.j2 @@ -0,0 +1,44 @@ +services: + db: + image: postgres:15 + container_name: cal-db + restart: unless-stopped + environment: + POSTGRES_USER: ${POSTGRES_USER} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} + POSTGRES_DB: ${POSTGRES_DB} + volumes: + - ./postgres-data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U $${POSTGRES_USER} -d $${POSTGRES_DB}"] + interval: 10s + timeout: 5s + retries: 5 + + calcom: + image: calcom/cal.com:latest + container_name: calcom + restart: unless-stopped + depends_on: + db: + condition: service_healthy + environment: + DATABASE_URL: ${DATABASE_URL} + DATABASE_DIRECT_URL: ${DATABASE_DIRECT_URL} + NEXT_PUBLIC_WEBAPP_URL: ${NEXT_PUBLIC_WEBAPP_URL} + NEXT_PUBLIC_API_V2_URL: ${NEXT_PUBLIC_API_V2_URL} + NEXTAUTH_URL: ${NEXTAUTH_URL} + NEXTAUTH_SECRET: ${NEXTAUTH_SECRET} + CALENDSO_ENCRYPTION_KEY: ${CALENDSO_ENCRYPTION_KEY} + CALCOM_LICENSE_KEY: ${CALCOM_LICENSE_KEY} + NEXT_PUBLIC_LICENSE_CONSENT: ${NEXT_PUBLIC_LICENSE_CONSENT} + SAML_DATABASE_URL: ${SAML_DATABASE_URL} + SAML_ADMINS: ${SAML_ADMINS} + EMAIL_FROM: ${EMAIL_FROM} + EMAIL_SERVER_HOST: ${EMAIL_SERVER_HOST} + EMAIL_SERVER_PORT: ${EMAIL_SERVER_PORT} + EMAIL_SERVER_USER: ${EMAIL_SERVER_USER} + EMAIL_SERVER_PASSWORD: ${EMAIL_SERVER_PASSWORD} + CALCOM_TELEMETRY_DISABLED: ${CALCOM_TELEMETRY_DISABLED} + ports: + - "3000:3000" diff --git a/roles/maintenance_cron/README.md b/roles/maintenance_cron/README.md new file mode 100644 index 0000000..d0e0626 --- /dev/null +++ b/roles/maintenance_cron/README.md @@ -0,0 +1,23 @@ +# maintenance_cron + +Weekly cleanup jobs for production hosts. + +## Scripts + +| Script | Schedule | Purpose | +|--------|----------|---------| +| `system-maintenance` | `/etc/cron.weekly/` | `journalctl --vacuum-size=500M`, `apt autoremove`, `apt autoclean` | +| `docker-prune` | `/etc/cron.weekly/` | `docker system prune -af --filter until=168h` | +| `gitea-archive-prune` | `/etc/cron.weekly/` | Delete Gitea `repo-archive` files older than 7 days | + +## Variables + +See `defaults/main.yml`. Enable per host or group: + +```yaml +maintenance_cron_enable_system: true +maintenance_cron_enable_docker: true # Docker hosts only +maintenance_cron_enable_gitea_archive: true # giteaVM only +``` + +Applied via `playbooks/maintenance.yml` (tag `maintenance_cron`). diff --git a/roles/maintenance_cron/defaults/main.yml b/roles/maintenance_cron/defaults/main.yml new file mode 100644 index 0000000..fe51b32 --- /dev/null +++ b/roles/maintenance_cron/defaults/main.yml @@ -0,0 +1,18 @@ +--- +# Weekly system cleanup (journal + apt) +maintenance_cron_enable_system: true +maintenance_cron_journal_vacuum_size: 500M +maintenance_cron_system_script: /etc/cron.weekly/system-maintenance + +# Docker prune (CI / Docker hosts) +maintenance_cron_enable_docker: false +maintenance_cron_docker_prune_until: 168h +maintenance_cron_docker_script: /etc/cron.weekly/docker-prune +maintenance_cron_docker_log: /var/log/docker-prune.log + +# Gitea repo-archive cache (Alpine Gitea VM) +maintenance_cron_enable_gitea_archive: false +maintenance_cron_gitea_archive_dir: /var/lib/gitea/data/repo-archive +maintenance_cron_gitea_archive_max_age_days: 7 +maintenance_cron_gitea_archive_script: /etc/cron.weekly/gitea-archive-prune +maintenance_cron_gitea_archive_log: /var/log/gitea-archive-prune.log diff --git a/roles/maintenance_cron/tasks/main.yml b/roles/maintenance_cron/tasks/main.yml new file mode 100644 index 0000000..5f25447 --- /dev/null +++ b/roles/maintenance_cron/tasks/main.yml @@ -0,0 +1,27 @@ +--- +- name: Install weekly system maintenance script + ansible.builtin.template: + src: system-maintenance.sh.j2 + dest: "{{ maintenance_cron_system_script }}" + owner: root + group: root + mode: '0755' + when: maintenance_cron_enable_system | bool + +- name: Install weekly Docker prune script + ansible.builtin.template: + src: docker-prune.sh.j2 + dest: "{{ maintenance_cron_docker_script }}" + owner: root + group: root + mode: '0755' + when: maintenance_cron_enable_docker | bool + +- name: Install weekly Gitea archive prune script + ansible.builtin.template: + src: gitea-archive-prune.sh.j2 + dest: "{{ maintenance_cron_gitea_archive_script }}" + owner: root + group: root + mode: '0755' + when: maintenance_cron_enable_gitea_archive | bool diff --git a/roles/maintenance_cron/templates/docker-prune.sh.j2 b/roles/maintenance_cron/templates/docker-prune.sh.j2 new file mode 100644 index 0000000..fd677c2 --- /dev/null +++ b/roles/maintenance_cron/templates/docker-prune.sh.j2 @@ -0,0 +1,8 @@ +#!/bin/bash +# Ansible managed — weekly Docker image/container cleanup +set -euo pipefail +if ! command -v docker >/dev/null 2>&1; then + exit 0 +fi +/usr/bin/docker system prune -af --filter "until={{ maintenance_cron_docker_prune_until }}" \ + >> "{{ maintenance_cron_docker_log }}" 2>&1 diff --git a/roles/maintenance_cron/templates/gitea-archive-prune.sh.j2 b/roles/maintenance_cron/templates/gitea-archive-prune.sh.j2 new file mode 100644 index 0000000..8fd2e48 --- /dev/null +++ b/roles/maintenance_cron/templates/gitea-archive-prune.sh.j2 @@ -0,0 +1,19 @@ +#!/bin/sh +# Ansible managed — weekly Gitea repo-archive cache cleanup +set -eu +ARCHIVE_DIR="{{ maintenance_cron_gitea_archive_dir }}" +LOG="{{ maintenance_cron_gitea_archive_log }}" +MAX_AGE_DAYS="{{ maintenance_cron_gitea_archive_max_age_days }}" + +if [ ! -d "${ARCHIVE_DIR}" ]; then + exit 0 +fi + +{ + echo "=== $(date -Iseconds) gitea-archive-prune ===" + echo "Before: $(du -sh "${ARCHIVE_DIR}" 2>/dev/null | awk '{print $1}')" + find "${ARCHIVE_DIR}" -type f -mtime "+${MAX_AGE_DAYS}" -delete + find "${ARCHIVE_DIR}" -type d -empty -delete + echo "After: $(du -sh "${ARCHIVE_DIR}" 2>/dev/null | awk '{print $1}')" + df -h / +} >> "${LOG}" 2>&1 diff --git a/roles/maintenance_cron/templates/system-maintenance.sh.j2 b/roles/maintenance_cron/templates/system-maintenance.sh.j2 new file mode 100644 index 0000000..f551a15 --- /dev/null +++ b/roles/maintenance_cron/templates/system-maintenance.sh.j2 @@ -0,0 +1,7 @@ +#!/bin/bash +# Ansible managed — weekly journal vacuum + apt cleanup +set -euo pipefail +journalctl --vacuum-size={{ maintenance_cron_journal_vacuum_size }} 2>/dev/null || true +export DEBIAN_FRONTEND=noninteractive +apt-get autoremove -y +apt-get autoclean -y diff --git a/scripts/kuma-add-monitors.sh b/scripts/kuma-add-monitors.sh new file mode 100755 index 0000000..27c1257 --- /dev/null +++ b/scripts/kuma-add-monitors.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +# Add or update Uptime Kuma HTTP monitors via API. +# Usage: +# source <(./scripts/vault-export-env.sh) # or export KUMA_* manually +# ./scripts/kuma-add-monitors.sh +# +# Monitors are idempotent: existing names are skipped. + +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +KUMA_URL="${KUMA_URL:-http://10.0.10.22:3001}" +KUMA_USER="${KUMA_USER:-admin}" +KUMA_PASSWORD="${KUMA_PASSWORD:-}" + +if [[ -z "${KUMA_PASSWORD}" ]]; then + if [[ -f "${REPO_ROOT}/.env" ]]; then + # shellcheck disable=SC1091 + set -a + source "${REPO_ROOT}/.env" + set +a + KUMA_PASSWORD="${KUMA_PASSWORD:-}" + fi +fi + +if [[ -z "${KUMA_PASSWORD}" ]]; then + echo "Set KUMA_PASSWORD (or run vault-export-env.sh first)" >&2 + exit 1 +fi + +export KUMA_URL KUMA_USER KUMA_PASSWORD + +"${REPO_ROOT}/.venv/bin/python3" <<'PY' +import os +import sys + +try: + from uptime_kuma_api import UptimeKumaApi +except ImportError: + print("Run: .venv/bin/pip install uptime-kuma-api", file=sys.stderr) + sys.exit(1) + +MONITORS = [ + { + "type": "http", + "name": "Gitea", + "url": "https://git.levkin.ca/user/login", + "interval": 60, + "retryInterval": 60, + "maxretries": 3, + "accepted_statuscodes": ["200-299"], + }, +] + +url = os.environ["KUMA_URL"] +user = os.environ["KUMA_USER"] +password = os.environ["KUMA_PASSWORD"] + +with UptimeKumaApi(url) as api: + api.login(user, password) + existing = {m.get("name"): m for m in api.get_monitors()} + + for spec in MONITORS: + name = spec["name"] + if name in existing: + print(f"skip (exists): {name} id={existing[name].get('id')}") + continue + result = api.add_monitor(**spec) + print(f"added: {name} -> {result}") +PY diff --git a/scripts/security-audit-extended.sh b/scripts/security-audit-extended.sh new file mode 100755 index 0000000..76689f3 --- /dev/null +++ b/scripts/security-audit-extended.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +# Extended read-only security + cleanup audit (run on target host). +set -u + +echo "=== identity ===" +hostname -f 2>/dev/null || hostname +if [ -f /etc/os-release ]; then . /etc/os-release; echo "os=${PRETTY_NAME:-unknown}"; fi +echo "kernel=$(uname -r)" +echo "uptime=$(uptime -p 2>/dev/null || uptime)" + +echo "=== disk ===" +df -h / /var 2>/dev/null | tail -n +2 | awk '{print $6" "$5" used "$4" free"}' + +echo "=== sshd (effective) ===" +if command -v sshd >/dev/null 2>&1; then + sshd -T 2>/dev/null | grep -E '^(permitrootlogin|passwordauthentication|pubkeyauthentication|permitemptypasswords|port|x11forwarding|maxauthtries) ' || true +else + grep -E '^(PermitRootLogin|PasswordAuthentication|PubkeyAuthentication|Port) ' /etc/ssh/sshd_config 2>/dev/null | grep -v '^#' || echo "sshd not found" +fi + +echo "=== firewall ===" +if command -v ufw >/dev/null 2>&1; then + ufw status verbose 2>/dev/null | head -5 +elif command -v firewall-cmd >/dev/null 2>&1; then + firewall-cmd --state 2>/dev/null || true +else + echo "no ufw/firewalld" +fi + +echo "=== fail2ban ===" +systemctl is-active fail2ban 2>/dev/null || echo "fail2ban: inactive or missing" + +echo "=== unattended-upgrades ===" +systemctl is-active unattended-upgrades 2>/dev/null || echo "unattended-upgrades: inactive or missing" + +echo "=== pending apt upgrades ===" +if command -v apt >/dev/null 2>&1; then + apt-get -s upgrade 2>/dev/null | grep -c '^Inst' || echo 0 +else + echo "n/a" +fi + +echo "=== docker ===" +if command -v docker >/dev/null 2>&1; then + echo "docker=$(docker --version 2>/dev/null || true)" + echo "containers=$(docker ps -aq 2>/dev/null | wc -l | tr -d ' ') running=$(docker ps -q 2>/dev/null | wc -l | tr -d ' ')" + echo "images=$(docker images -q 2>/dev/null | wc -l | tr -d ' ')" + docker system df 2>/dev/null | tail -n +2 || true +else + echo "no docker" +fi + +echo "=== journal disk ===" +journalctl --disk-usage 2>/dev/null || echo "n/a" + +echo "=== apt cache ===" +du -sh /var/cache/apt/archives 2>/dev/null || echo "n/a" + +echo "=== existing cron (root) ===" +crontab -l 2>/dev/null | grep -v '^#' | grep -v '^$' | head -10 || echo "no root crontab" +ls /etc/cron.{daily,weekly,monthly}/* 2>/dev/null | xargs -I{} basename {} | head -15 || true + +echo "=== listening tcp (non-localhost) ===" +ss -tlnp 2>/dev/null | awk 'NR==1 || /LISTEN/ {print}' | grep -v '127.0.0.1:' | grep -v '\[::1\]:' | head -15 + +echo "=== uid 0 accounts ===" +awk -F: '$3==0 {print $1}' /etc/passwd | tr '\n' ' ' +echo + +echo "=== tailscale ===" +command -v tailscale >/dev/null 2>&1 && tailscale status --self 2>/dev/null | head -1 || echo "no tailscale"