Compare commits

..

No commits in common. "master" and "fix/inventory-host-ips@skipci" have entirely different histories.

91 changed files with 313 additions and 5994 deletions

View File

@ -1,37 +0,0 @@
# Copy to .env (gitignored): cp .env.example .env
#
# vault → .env: make vault-export-env
# .env → vault: make vault-import-env
# hosts → vault: make vault-pull-infra-secrets (SSH to monitoring/hermes, then import)
#
# Prefer vault for long-term storage; delete .env after export if you want.
# Mailcow (make mailcow-mailbox MAILBOX=alerts)
MAILCOW_API_KEY=
ALERTS_PASSWORD=
# Uptime Kuma @ 10.0.10.22:3001 (scripts/kuma-setup-smtp.sh)
KUMA_URL=http://10.0.10.22:3001
KUMA_USER=admin
KUMA_PASSWORD=
# Kuma SMTP notification (after alerts@ mailbox exists)
SMTP_HOST=mail.levkine.ca
SMTP_PORT=587
SMTP_USER=alerts@levkine.ca
SMTP_PASS=
SMTP_TO=idobkin@gmail.com
# Umami @ 10.0.10.22:3000 (admin UI password; DB pass is on LXC only)
UMAMI_ADMIN_PASSWORD=
# Hermes Mattermost (not Telegram)
MATTERMOST_URL=
MATTERMOST_TOKEN=
MATTERMOST_ALLOWED_USERS=
# Optional: same password on Proxmox / LXCs / caddy root (if you use one shared admin password)
# PROXMOX_PASSWORD=
# LXC_ROOT_PASSWORD=
# Per-mailbox: MAILBOX_notify_PASSWORD=

View File

@ -65,7 +65,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
if: needs.skip-ci-check.outputs.should-skip != '1' && (github.event_name == 'pull_request' || github.ref == 'refs/heads/master') if: needs.skip-ci-check.outputs.should-skip != '1' && (github.event_name == 'pull_request' || github.ref == 'refs/heads/master')
container: container:
image: node:20-bookworm image: node:20-bullseye
steps: steps:
- name: Check out code - name: Check out code
uses: actions/checkout@v4 uses: actions/checkout@v4
@ -84,26 +84,12 @@ jobs:
needs: skip-ci-check needs: skip-ci-check
runs-on: ubuntu-latest runs-on: ubuntu-latest
if: needs.skip-ci-check.outputs.should-skip != '1' && (github.event_name == 'pull_request' || github.ref == 'refs/heads/master') if: needs.skip-ci-check.outputs.should-skip != '1' && (github.event_name == 'pull_request' || github.ref == 'refs/heads/master')
env:
PIP_NO_CACHE_DIR: "1"
PIP_BREAK_SYSTEM_PACKAGES: "1"
container: container:
image: node:20-bookworm image: node:20-bullseye
steps: steps:
- name: Check out code - name: Check out code
uses: actions/checkout@v4 uses: actions/checkout@v4
- name: Bootstrap pip (PEP 668 / bookworm)
run: |
python3 --version
if ! python3 -m pip --version >/dev/null 2>&1; then
curl -fsSL https://bootstrap.pypa.io/get-pip.py -o /tmp/get-pip.py
python3 /tmp/get-pip.py --disable-pip-version-check --break-system-packages
fi
- name: Show disk space (runner may be full)
run: df -h / /tmp || true
- name: Configure CI Ansible (no vault, localhost inventory) - name: Configure CI Ansible (no vault, localhost inventory)
run: | run: |
set -e set -e
@ -112,13 +98,12 @@ jobs:
localhost ansible_connection=local localhost ansible_connection=local
EOF EOF
cat > /tmp/ci-ansible.cfg <<EOF cat > /tmp/ci-ansible.cfg <<'EOF'
[defaults] [defaults]
inventory = /tmp/ci-inventory.ini inventory = /tmp/ci-inventory.ini
roles_path = ${GITHUB_WORKSPACE}/roles roles_path = /workspace/ilia/ansible/roles
host_key_checking = False host_key_checking = False
stdout_callback = default stdout_callback = yaml
callback_result_format = yaml
bin_ansible_callbacks = True bin_ansible_callbacks = True
retry_files_enabled = False retry_files_enabled = False
interpreter_python = auto_silent interpreter_python = auto_silent
@ -130,29 +115,18 @@ jobs:
echo "ANSIBLE_INVENTORY=/tmp/ci-inventory.ini" >> "$GITHUB_ENV" echo "ANSIBLE_INVENTORY=/tmp/ci-inventory.ini" >> "$GITHUB_ENV"
- name: Install Ansible and linting tools - name: Install Ansible and linting tools
run: pip3 install --no-cache-dir ansible ansible-lint yamllint pyyaml
- name: Install Ansible collections
run: | run: |
python3 -m pip install --no-cache-dir ansible-core ansible-lint yamllint pyyaml
ansible-galaxy collection install -r collections/requirements.yml ansible-galaxy collection install -r collections/requirements.yml
rm -rf /root/.cache/pip /tmp/pip-* 2>/dev/null || true
- name: Validate YAML syntax - name: Validate YAML syntax
run: | run: |
echo "Checking YAML syntax..." echo "Checking YAML syntax..."
find . \( -name "*.yml" -o -name "*.yaml" \) \ find . -name "*.yml" -o -name "*.yaml" | grep -v ".git" | while read file; do
! -path "./.git/*" \ python3 -c "import yaml; yaml.safe_load(open('$file'))" || exit 1
! -path "./node_modules/*" \ done
! -path "./.venv/*" \
! -name "vault.yml" \
! -name "vault.yaml" \
! -name "vault_*.yml" \
! -name "vault_*.yaml" \
| while read -r file; do
if head -n 5 "$file" | grep -q '^\$ANSIBLE_VAULT'; then
echo "Skipping encrypted vault file: $file"
continue
fi
python3 -c "import yaml; yaml.safe_load(open('$file'))" || exit 1
done
- name: Run ansible-lint - name: Run ansible-lint
run: ansible-lint run: ansible-lint
@ -162,7 +136,7 @@ jobs:
if: needs.skip-ci-check.outputs.should-skip != '1' if: needs.skip-ci-check.outputs.should-skip != '1'
runs-on: ubuntu-latest runs-on: ubuntu-latest
container: container:
image: node:20-bookworm image: node:20-bullseye
steps: steps:
- name: Check out code - name: Check out code
uses: actions/checkout@v4 uses: actions/checkout@v4
@ -180,11 +154,8 @@ jobs:
needs: skip-ci-check needs: skip-ci-check
if: needs.skip-ci-check.outputs.should-skip != '1' if: needs.skip-ci-check.outputs.should-skip != '1'
runs-on: ubuntu-latest runs-on: ubuntu-latest
env:
PIP_NO_CACHE_DIR: "1"
PIP_BREAK_SYSTEM_PACKAGES: "1"
container: container:
image: node:20-bookworm image: node:20-bullseye
steps: steps:
- name: Check out code - name: Check out code
uses: actions/checkout@v4 uses: actions/checkout@v4
@ -202,12 +173,8 @@ jobs:
- name: Scan Python dependencies - name: Scan Python dependencies
run: | run: |
if [ -f requirements.txt ]; then if [ -f requirements.txt ]; then
if ! python3 -m pip --version >/dev/null 2>&1; then pip3 install --no-cache-dir pip-audit
curl -fsSL https://bootstrap.pypa.io/get-pip.py -o /tmp/get-pip.py pip-audit -r requirements.txt
python3 /tmp/get-pip.py --disable-pip-version-check --break-system-packages
fi
python3 -m pip install --no-cache-dir pip-audit
python3 -m pip-audit -r requirements.txt
else else
echo "No requirements.txt, skipping pip-audit" echo "No requirements.txt, skipping pip-audit"
fi fi
@ -217,25 +184,14 @@ jobs:
needs: skip-ci-check needs: skip-ci-check
if: needs.skip-ci-check.outputs.should-skip != '1' if: needs.skip-ci-check.outputs.should-skip != '1'
runs-on: ubuntu-latest runs-on: ubuntu-latest
env:
PIP_NO_CACHE_DIR: "1"
PIP_BREAK_SYSTEM_PACKAGES: "1"
container: container:
image: node:20-bookworm image: node:20-bullseye
steps: steps:
- name: Check out code - name: Check out code
uses: actions/checkout@v4 uses: actions/checkout@v4
- name: Bootstrap pip (PEP 668 / bookworm)
run: |
python3 --version
if ! python3 -m pip --version >/dev/null 2>&1; then
curl -fsSL https://bootstrap.pypa.io/get-pip.py -o /tmp/get-pip.py
python3 /tmp/get-pip.py --disable-pip-version-check --break-system-packages
fi
- name: Install Semgrep - name: Install Semgrep
run: python3 -m pip install --no-cache-dir semgrep run: pip3 install --no-cache-dir semgrep
- name: Run Semgrep scan - name: Run Semgrep scan
run: semgrep --config=auto --error run: semgrep --config=auto --error
@ -246,7 +202,7 @@ jobs:
if: needs.skip-ci-check.outputs.should-skip != '1' if: needs.skip-ci-check.outputs.should-skip != '1'
runs-on: ubuntu-latest runs-on: ubuntu-latest
container: container:
image: node:20-bookworm image: node:20-bullseye
steps: steps:
- name: Check out code - name: Check out code
uses: actions/checkout@v4 uses: actions/checkout@v4
@ -268,24 +224,14 @@ jobs:
needs: skip-ci-check needs: skip-ci-check
if: needs.skip-ci-check.outputs.should-skip != '1' if: needs.skip-ci-check.outputs.should-skip != '1'
runs-on: ubuntu-latest runs-on: ubuntu-latest
env:
PIP_NO_CACHE_DIR: "1"
PIP_BREAK_SYSTEM_PACKAGES: "1"
container: container:
image: node:20-bookworm image: node:20-bullseye
steps: steps:
- name: Check out code - name: Check out code
uses: actions/checkout@v4 uses: actions/checkout@v4
- name: Bootstrap pip (PEP 668 / bookworm)
run: |
if ! python3 -m pip --version >/dev/null 2>&1; then
curl -fsSL https://bootstrap.pypa.io/get-pip.py -o /tmp/get-pip.py
python3 /tmp/get-pip.py --disable-pip-version-check --break-system-packages
fi
- name: Install Ansible - name: Install Ansible
run: python3 -m pip install --no-cache-dir ansible-core run: pip3 install --no-cache-dir ansible
- name: Validate vault files are encrypted - name: Validate vault files are encrypted
run: | run: |
@ -322,22 +268,12 @@ jobs:
needs: skip-ci-check needs: skip-ci-check
if: needs.skip-ci-check.outputs.should-skip != '1' if: needs.skip-ci-check.outputs.should-skip != '1'
runs-on: ubuntu-latest runs-on: ubuntu-latest
env:
PIP_NO_CACHE_DIR: "1"
PIP_BREAK_SYSTEM_PACKAGES: "1"
container: container:
image: node:20-bookworm image: node:20-bullseye
steps: steps:
- name: Check out code - name: Check out code
uses: actions/checkout@v4 uses: actions/checkout@v4
- name: Bootstrap pip (PEP 668 / bookworm)
run: |
if ! python3 -m pip --version >/dev/null 2>&1; then
curl -fsSL https://bootstrap.pypa.io/get-pip.py -o /tmp/get-pip.py
python3 /tmp/get-pip.py --disable-pip-version-check --break-system-packages
fi
- name: Configure CI Ansible (no vault, localhost inventory) - name: Configure CI Ansible (no vault, localhost inventory)
run: | run: |
set -e set -e
@ -362,27 +298,14 @@ jobs:
[local] [local]
localhost ansible_connection=local localhost ansible_connection=local
[sites]
localhost ansible_connection=local
[comms]
localhost ansible_connection=local
[proxmox]
localhost ansible_connection=local
[caddy]
localhost ansible_connection=local
EOF EOF
cat > /tmp/ci-ansible.cfg <<EOF cat > /tmp/ci-ansible.cfg <<'EOF'
[defaults] [defaults]
inventory = /tmp/ci-inventory.ini inventory = /tmp/ci-inventory.ini
roles_path = ${GITHUB_WORKSPACE}/roles roles_path = /workspace/ilia/ansible/roles
host_key_checking = False host_key_checking = False
stdout_callback = default stdout_callback = yaml
callback_result_format = yaml
bin_ansible_callbacks = True bin_ansible_callbacks = True
retry_files_enabled = False retry_files_enabled = False
interpreter_python = auto_silent interpreter_python = auto_silent
@ -394,10 +317,11 @@ jobs:
echo "ANSIBLE_INVENTORY=/tmp/ci-inventory.ini" >> "$GITHUB_ENV" echo "ANSIBLE_INVENTORY=/tmp/ci-inventory.ini" >> "$GITHUB_ENV"
- name: Install Ansible - name: Install Ansible
run: pip3 install --no-cache-dir ansible
- name: Install Ansible collections
run: | run: |
python3 -m pip install --no-cache-dir ansible-core
ansible-galaxy collection install -r collections/requirements.yml ansible-galaxy collection install -r collections/requirements.yml
rm -rf /root/.cache/pip /tmp/pip-* 2>/dev/null || true
- name: Validate playbooks (CI inventory, no vault) - name: Validate playbooks (CI inventory, no vault)
run: | run: |
@ -428,13 +352,12 @@ jobs:
if: needs.skip-ci-check.outputs.should-skip != '1' if: needs.skip-ci-check.outputs.should-skip != '1'
runs-on: ubuntu-latest runs-on: ubuntu-latest
container: container:
image: node:20-bookworm image: node:20-bullseye
steps: steps:
- name: Check out code - name: Check out code
uses: actions/checkout@v4 uses: actions/checkout@v4
- name: Install Trivy - name: Install Trivy
continue-on-error: true
run: | run: |
set -e set -e
# Use a fixed, known-good Trivy version to avoid URL/redirect issues # Use a fixed, known-good Trivy version to avoid URL/redirect issues
@ -492,13 +415,18 @@ jobs:
needs: skip-ci-check needs: skip-ci-check
if: needs.skip-ci-check.outputs.should-skip != '1' && (github.event_name == 'pull_request' || github.ref == 'refs/heads/master') if: needs.skip-ci-check.outputs.should-skip != '1' && (github.event_name == 'pull_request' || github.ref == 'refs/heads/master')
runs-on: ubuntu-latest runs-on: ubuntu-latest
continue-on-error: true
container: container:
image: sonarsource/sonar-scanner-cli:latest image: sonarsource/sonar-scanner-cli:5.0.1.3006
env: env:
SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }} SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }}
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
steps: steps:
- name: Install Node.js for checkout action
run: apk add --no-cache nodejs npm curl
- name: Check out code
uses: actions/checkout@v4
- name: Verify SonarQube connection - name: Verify SonarQube connection
run: | run: |
echo "Checking SonarQube connectivity..." echo "Checking SonarQube connectivity..."

11
.gitignore vendored
View File

@ -5,7 +5,6 @@
# Temporary files # Temporary files
*.tmp *.tmp
*.bak *.bak
*.vault-bak
*~ *~
vault.yml.bak.* vault.yml.bak.*
@ -18,9 +17,6 @@ id_rsa
id_ed25519 id_ed25519
id_ecdsa id_ecdsa
# Python venv (make bootstrap)
.venv/
# Python bytecode # Python bytecode
__pycache__/ __pycache__/
*.py[cod] *.py[cod]
@ -38,11 +34,4 @@ Thumbs.db
.ansible/facts/ .ansible/facts/
# Local data exports (Nextcloud, etc.)
exports/
# Local secrets (Mailcow API, Kuma passwords) — never commit
.env
.env.local
node_modules/ node_modules/

View File

@ -1,7 +1,7 @@
{ {
"default": true, "default": true,
"MD013": { "MD013": {
"line_length": 400, "line_length": 160,
"code_blocks": false, "code_blocks": false,
"tables": false "tables": false
}, },
@ -13,8 +13,6 @@
"MD034": false, "MD034": false,
"MD040": false, "MD040": false,
"MD047": false, "MD047": false,
"MD058": false, "MD058": false
"MD060": false,
"MD036": false
} }

249
Makefile
View File

@ -1,4 +1,4 @@
.PHONY: help bootstrap lint test check dev datascience inventory inventory-all local servers workstations clean status tailscale tailscale-check tailscale-dev tailscale-status create-vault create-vm monitoring copy-ssh-key copy-ssh-keys copy-ssh-keys-ansible copy-ssh-key-mailcow bootstrap-root-ssh bootstrap-root-ssh-services bootstrap-root-ssh-failed mailcow-mailbox mailcow-create-alerts vault-import-env .PHONY: help bootstrap lint test check dev datascience inventory inventory-all local servers workstations clean status tailscale tailscale-check tailscale-dev tailscale-status create-vault create-vm monitoring
.DEFAULT_GOAL := help .DEFAULT_GOAL := help
## Colors for output ## Colors for output
@ -28,27 +28,13 @@ PYTHON_REQ := requirements.txt
INVENTORY := inventories/production INVENTORY := inventories/production
INVENTORY_HOSTS := $(INVENTORY)/hosts INVENTORY_HOSTS := $(INVENTORY)/hosts
# Python venv (created by `make bootstrap`)
VENV := .venv
ifneq ($(wildcard $(VENV)/bin/ansible-playbook),)
export PATH := $(abspath $(VENV)/bin):$(PATH)
ANSIBLE_VAULT := $(abspath $(VENV))/bin/ansible-vault
else
ANSIBLE_VAULT := ansible-vault
endif
# Common ansible-playbook command with options # Common ansible-playbook command with options
ANSIBLE_PLAYBOOK := ansible-playbook -i $(INVENTORY) ANSIBLE_PLAYBOOK := ansible-playbook -i $(INVENTORY)
ANSIBLE_ARGS := --vault-password-file ~/.ansible-vault-pass ANSIBLE_ARGS := --vault-password-file ~/.ansible-vault-pass
# Note: sudo passwords are in vault files as ansible_become_password # Note: sudo passwords are in vault files as ansible_become_password
## Auto-detect current host to exclude from remote operations ## Auto-detect current host to exclude from remote operations
UNAME_S := $(shell uname -s) CURRENT_IP := $(shell hostname -I | awk '{print $$1}')
ifeq ($(UNAME_S),Darwin)
CURRENT_IP := $(shell ipconfig getifaddr en0 2>/dev/null || ipconfig getifaddr en1 2>/dev/null || echo "")
else
CURRENT_IP := $(shell hostname -I 2>/dev/null | awk '{print $$1}')
endif
# NOTE: inventory parsing may require vault secrets. Keep this best-effort and silent in CI. # NOTE: inventory parsing may require vault secrets. Keep this best-effort and silent in CI.
CURRENT_HOST := $(shell ansible-inventory --list --vault-password-file ~/.ansible-vault-pass 2>/dev/null | jq -r '._meta.hostvars | to_entries[] | select(.value.ansible_host == "$(CURRENT_IP)") | .key' 2>/dev/null | head -1) CURRENT_HOST := $(shell ansible-inventory --list --vault-password-file ~/.ansible-vault-pass 2>/dev/null | jq -r '._meta.hostvars | to_entries[] | select(.value.ansible_host == "$(CURRENT_IP)") | .key' 2>/dev/null | head -1)
EXCLUDE_CURRENT := $(if $(CURRENT_HOST),--limit '!$(CURRENT_HOST)',) EXCLUDE_CURRENT := $(if $(CURRENT_HOST),--limit '!$(CURRENT_HOST)',)
@ -73,36 +59,37 @@ help: ## Show this help message
@echo " make maintenance-verbose GROUP=dev # Verbose maintenance on dev group" @echo " make maintenance-verbose GROUP=dev # Verbose maintenance on dev group"
@echo "" @echo ""
require-ansible: ## Verify ansible is available (run make bootstrap if missing)
@command -v ansible-playbook >/dev/null 2>&1 && command -v ansible-vault >/dev/null 2>&1 || { \
echo "$(RED)ansible-playbook/ansible-vault not found$(RESET)"; \
echo "Run: $(BLUE)make bootstrap$(RESET)"; \
exit 1; \
}
bootstrap: ## Install all project dependencies from requirements files bootstrap: ## Install all project dependencies from requirements files
@echo "$(BOLD)Installing Project Dependencies$(RESET)" @echo "$(BOLD)Installing Project Dependencies$(RESET)"
@echo "" @echo ""
@echo "$(YELLOW)Python venv ($(VENV))/$(PYTHON_REQ):$(RESET)" @echo "$(YELLOW)Python Requirements ($(PYTHON_REQ)):$(RESET)"
@if [ ! -f "$(PYTHON_REQ)" ]; then \ @if [ -f "$(PYTHON_REQ)" ]; then \
if command -v pipx >/dev/null 2>&1; then \
printf " %-30s " "Installing with pipx"; \
if pipx install -r $(PYTHON_REQ) >/dev/null 2>&1; then \
echo "$(GREEN)✓ Installed$(RESET)"; \
else \
echo "$(YELLOW)⚠ Some packages may have failed$(RESET)"; \
fi; \
elif command -v pip3 >/dev/null 2>&1; then \
printf " %-30s " "Installing with pip3 --user"; \
if pip3 install --user -r $(PYTHON_REQ) >/dev/null 2>&1; then \
echo "$(GREEN)✓ Installed$(RESET)"; \
else \
printf " %-30s " "Trying with --break-system-packages"; \
if pip3 install --break-system-packages -r $(PYTHON_REQ) >/dev/null 2>&1; then \
echo "$(GREEN)✓ Installed$(RESET)"; \
else \
echo "$(RED)✗ Failed$(RESET)"; \
fi; \
fi; \
else \
printf " %-30s " "Python packages"; \
echo "$(YELLOW)⚠ Skipped (pip3/pipx not found)$(RESET)"; \
fi; \
else \
printf " %-30s " "$(PYTHON_REQ)"; \ printf " %-30s " "$(PYTHON_REQ)"; \
echo "$(RED)✗ File not found$(RESET)"; \ echo "$(RED)✗ File not found$(RESET)"; \
elif ! command -v python3 >/dev/null 2>&1; then \
printf " %-30s " "Python venv"; \
echo "$(RED)✗ python3 not found$(RESET)"; \
else \
if [ ! -d "$(VENV)" ]; then \
printf " %-30s " "Creating venv"; \
python3 -m venv "$(VENV)" && echo "$(GREEN)✓ Created$(RESET)" || { echo "$(RED)✗ Failed$(RESET)"; exit 1; }; \
fi; \
printf " %-30s " "Installing packages"; \
if "$(VENV)/bin/pip" install -r "$(PYTHON_REQ)" >/dev/null 2>&1; then \
echo "$(GREEN)✓ Installed$(RESET)"; \
echo " $(BLUE)Ansible:$(RESET) $(abspath $(VENV))/bin/ansible-playbook"; \
else \
echo "$(RED)✗ Failed$(RESET)"; \
exit 1; \
fi; \
fi fi
@echo "" @echo ""
@echo "$(YELLOW)Node.js Dependencies (package.json):$(RESET)" @echo "$(YELLOW)Node.js Dependencies (package.json):$(RESET)"
@ -120,9 +107,7 @@ bootstrap: ## Install all project dependencies from requirements files
@echo "" @echo ""
@echo "$(YELLOW)Ansible Collections ($(COLLECTIONS_REQ)):$(RESET)" @echo "$(YELLOW)Ansible Collections ($(COLLECTIONS_REQ)):$(RESET)"
@if [ -f "$(COLLECTIONS_REQ)" ]; then \ @if [ -f "$(COLLECTIONS_REQ)" ]; then \
GALAXY="$$(command -v ansible-galaxy)"; \ ansible-galaxy collection install -r $(COLLECTIONS_REQ) 2>&1 | grep -E "(Installing|Skipping|ERROR)" | while read line; do \
[ -x "$(VENV)/bin/ansible-galaxy" ] && GALAXY="$(abspath $(VENV))/bin/ansible-galaxy"; \
"$$GALAXY" collection install -r $(COLLECTIONS_REQ) 2>&1 | grep -E "(Installing|Skipping|ERROR)" | while read line; do \
if echo "$$line" | grep -q "Installing"; then \ if echo "$$line" | grep -q "Installing"; then \
collection=$$(echo "$$line" | awk '{print $$2}' | sed 's/:.*//'); \ collection=$$(echo "$$line" | awk '{print $$2}' | sed 's/:.*//'); \
printf " $(GREEN)✓ %-30s$(RESET) Installed\n" "$$collection"; \ printf " $(GREEN)✓ %-30s$(RESET) Installed\n" "$$collection"; \
@ -132,7 +117,7 @@ bootstrap: ## Install all project dependencies from requirements files
elif echo "$$line" | grep -q "ERROR"; then \ elif echo "$$line" | grep -q "ERROR"; then \
printf " $(RED)✗ Error: $$line$(RESET)\n"; \ printf " $(RED)✗ Error: $$line$(RESET)\n"; \
fi; \ fi; \
done || "$$GALAXY" collection install -r $(COLLECTIONS_REQ); \ done || ansible-galaxy collection install -r $(COLLECTIONS_REQ); \
else \ else \
printf " %-30s " "$(COLLECTIONS_REQ)"; \ printf " %-30s " "$(COLLECTIONS_REQ)"; \
echo "$(RED)✗ File not found$(RESET)"; \ echo "$(RED)✗ File not found$(RESET)"; \
@ -280,22 +265,6 @@ servers: ## Run baseline server playbook (usage: make servers [GROUP=services] [
$(ANSIBLE_PLAYBOOK) $(PLAYBOOK_SERVERS); \ $(ANSIBLE_PLAYBOOK) $(PLAYBOOK_SERVERS); \
fi fi
caddy-auth: require-ansible ## Ensure auth.levkin.ca reverse proxy on Caddy VM
@echo "$(YELLOW)Updating Caddy for Authentik...$(RESET)"
$(ANSIBLE_PLAYBOOK) playbooks/caddy-auth-authentik.yml $(ANSIBLE_ARGS)
caddy-levkin: require-ansible ## Ensure levkin.ca reverse proxy on Caddy VM
@echo "$(YELLOW)Updating Caddy for levkin.ca...$(RESET)"
$(ANSIBLE_PLAYBOOK) playbooks/caddy-levkin-site.yml $(ANSIBLE_ARGS)
cal-oidc: require-ansible ## Cal.com SAML DB + Authentik OIDC provider (usage: make cal-oidc)
@echo "$(YELLOW)Configuring Cal.com ↔ Authentik OIDC...$(RESET)"
$(ANSIBLE_PLAYBOOK) playbooks/cal-authentik-oidc.yml $(ANSIBLE_ARGS)
cal-oidc-check: require-ansible ## Dry-run Cal.com ↔ Authentik OIDC
@echo "$(YELLOW)Checking Cal.com ↔ Authentik OIDC...$(RESET)"
$(ANSIBLE_PLAYBOOK) playbooks/cal-authentik-oidc.yml --check --diff $(ANSIBLE_ARGS)
workstations: ## Run workstation baseline (usage: make workstations [GROUP=dev] [HOST=dev01]) workstations: ## Run workstation baseline (usage: make workstations [GROUP=dev] [HOST=dev01])
@echo "$(YELLOW)Applying workstation baseline...$(RESET)" @echo "$(YELLOW)Applying workstation baseline...$(RESET)"
@EXTRA=""; \ @EXTRA=""; \
@ -457,7 +426,7 @@ apps: ## Install applications only
$(ANSIBLE_PLAYBOOK) $(PLAYBOOK_WORKSTATIONS) --tags apps $(ANSIBLE_PLAYBOOK) $(PLAYBOOK_WORKSTATIONS) --tags apps
# Connectivity targets # Connectivity targets
ping: require-ansible auto-fallback ## Ping hosts with colored output (usage: make ping [GROUP=dev] [HOST=dev01]) ping: auto-fallback ## Ping hosts with colored output (usage: make ping [GROUP=dev] [HOST=dev01])
ifdef HOST ifdef HOST
@echo "$(YELLOW)Pinging host: $(HOST)$(RESET)" @echo "$(YELLOW)Pinging host: $(HOST)$(RESET)"
@ansible $(HOST) -m ping --one-line | while read line; do \ @ansible $(HOST) -m ping --one-line | while read line; do \
@ -574,25 +543,16 @@ tailscale-status: ## Check Tailscale status on all machines
done done
# Vault management # Vault management
edit-vault: require-ansible ## Edit encrypted host vars (usage: make edit-vault HOST=KrakenMint) edit-vault: ## Edit encrypted host vars (usage: make edit-vault HOST=dev01)
ifndef HOST ifndef HOST
@echo "$(RED)Error: HOST parameter required$(RESET)" @echo "$(RED)Error: HOST parameter required$(RESET)"
@echo "Usage: make edit-vault HOST=KrakenMint" @echo "Usage: make edit-vault HOST=dev01"
@exit 1 @exit 1
endif endif
@vault_file="$(INVENTORY)/host_vars/$(HOST)/vault.yml"; \ ansible-vault edit host_vars/$(HOST).yml
if [ ! -f "$$vault_file" ]; then vault_file="$(INVENTORY)/host_vars/$(HOST).yml"; fi; \
if [ ! -f "$$vault_file" ]; then \
echo "$(RED)No vault file for $(HOST):$(RESET)"; \
echo " $(INVENTORY)/host_vars/$(HOST)/vault.yml"; \
echo " $(INVENTORY)/host_vars/$(HOST).yml"; \
exit 1; \
fi; \
echo "$(BLUE)Editing $$vault_file$(RESET)"; \
$(ANSIBLE_VAULT) edit "$$vault_file"
edit-group-vault: require-ansible ## Edit encrypted group vars (usage: make edit-group-vault) edit-group-vault: ## Edit encrypted group vars (usage: make edit-group-vault)
$(ANSIBLE_VAULT) edit $(INVENTORY)/group_vars/all/vault.yml ansible-vault edit inventories/production/group_vars/all/vault.yml
copy-ssh-key: ## Copy SSH key to specific host (usage: make copy-ssh-key HOST=giteaVM) copy-ssh-key: ## Copy SSH key to specific host (usage: make copy-ssh-key HOST=giteaVM)
@ -602,144 +562,19 @@ ifndef HOST
@exit 1 @exit 1
endif endif
@echo "$(YELLOW)Copying SSH key to $(HOST)...$(RESET)" @echo "$(YELLOW)Copying SSH key to $(HOST)...$(RESET)"
@ip=$$(ansible-inventory -i $(INVENTORY) $(ANSIBLE_ARGS) --list 2>/dev/null | jq -r --arg h "$(HOST)" '._meta.hostvars[$$h].ansible_host // empty'); \ @ip=$$(ansible-inventory --list | jq -r "._meta.hostvars.$(HOST).ansible_host // empty" 2>/dev/null); \
user=$$(ansible-inventory -i $(INVENTORY) $(ANSIBLE_ARGS) --list 2>/dev/null | jq -r --arg h "$(HOST)" '._meta.hostvars[$$h].ansible_user // empty'); \ user=$$(ansible-inventory --list | jq -r "._meta.hostvars.$(HOST).ansible_user // empty" 2>/dev/null); \
if [ -z "$$ip" ] || [ "$$ip" = "null" ]; then \ if [ -n "$$ip" ] && [ "$$ip" != "null" ] && [ -n "$$user" ] && [ "$$user" != "null" ]; then \
ip=$$(awk -v h="$(HOST)" '$$1==h {print $$2}' $(INVENTORY_HOSTS) | sed 's/ansible_host=//'); \
fi; \
if [ -z "$$user" ] || [ "$$user" = "null" ]; then \
user=$$(awk -v h="$(HOST)" '$$1==h {for(i=2;i<=NF;i++) if($$i~/^ansible_user=/) {sub(/ansible_user=/,"",$$i); print $$i; exit}}' $(INVENTORY_HOSTS)); \
fi; \
if [ -n "$$ip" ] && [ -n "$$user" ]; then \
echo "Target: $$user@$$ip"; \ echo "Target: $$user@$$ip"; \
ssh-copy-id -i "$${SSH_PUBLIC_KEY:-$$HOME/.ssh/id_ed25519.pub}" "$$user@$$ip"; \ ssh-copy-id $$user@$$ip; \
else \ else \
echo "$(RED)Could not determine IP or user for $(HOST)$(RESET)"; \ echo "$(RED)Could not determine IP or user for $(HOST)$(RESET)"; \
echo "Check your inventory and host_vars"; \ echo "Check your inventory and host_vars"; \
exit 1; \
fi fi
copy-ssh-keys: ## Copy SSH key to all inventory hosts (usage: make copy-ssh-keys [GROUP=services]) create-vault: ## Create encrypted vault file for secrets (passwords, auth keys, etc.)
@echo "$(YELLOW)Copying SSH key to inventory hosts...$(RESET)"
@echo "Using key: $${SSH_PUBLIC_KEY:-$$HOME/.ssh/id_ed25519.pub}"
@echo "$(YELLOW)You will be prompted for each host's password (last time).$(RESET)"
@failed=0; ok=0; \
if [ -n "$(GROUP)" ]; then \
hosts=$$(ansible-inventory -i $(INVENTORY) $(ANSIBLE_ARGS) --list 2>/dev/null | jq -r ".\"$(GROUP)\".hosts[]? // empty"); \
else \
hosts=$$(ansible-inventory -i $(INVENTORY) $(ANSIBLE_ARGS) --list 2>/dev/null | jq -r '._meta.hostvars | keys[]' | grep -v '^localhost$$' | sort); \
fi; \
if [ -z "$$hosts" ]; then \
if [ -n "$(GROUP)" ]; then \
hosts=$$(awk -v g="$(GROUP)" 'BEGIN{ing=0} /^\[/ {ing=($$0=="["g"]"); next} ing && /^[a-zA-Z]/ {print $$1}' $(INVENTORY_HOSTS)); \
else \
hosts=$$(awk '/^\[/ {next} /^[a-zA-Z]/ && $$1!="localhost" {print $$1}' $(INVENTORY_HOSTS)); \
fi; \
fi; \
for host in $$hosts; do \
echo ""; echo "$(BLUE)==> $$host$(RESET)"; \
if $(MAKE) --no-print-directory copy-ssh-key HOST=$$host; then ok=$$((ok+1)); else failed=$$((failed+1)); fi; \
done; \
echo ""; \
echo "$(GREEN)Done: $$ok succeeded$(RESET), $(RED)$$failed failed$(RESET)"; \
[ $$failed -eq 0 ]
copy-ssh-keys-ansible: require-ansible ## Copy SSH key via Ansible (usage: make copy-ssh-keys-ansible [GROUP=services] [HOST=dev01])
@echo "$(YELLOW)Deploying SSH key with Ansible (may prompt for SSH password)...$(RESET)"
@limit="all:!local"; \
[ -n "$(GROUP)" ] && limit="$(GROUP)"; \
[ -n "$(HOST)" ] && limit="$(HOST)"; \
$(ANSIBLE_PLAYBOOK) playbooks/ssh-keys.yml $(ANSIBLE_ARGS) --limit "$$limit" --ask-pass
copy-ssh-key-mailcow: ## Copy SSH key to Mailcow VM (root@10.0.10.132 on pve201; prompts for root password once)
@$(MAKE) --no-print-directory copy-ssh-key HOST=mailcow
bootstrap-root-ssh-caddy: ## Bootstrap root on caddy via su + vault_lxc_root_password
@chmod +x scripts/bootstrap-root-ssh-su-password.sh scripts/load-vault-lxc-root-password.sh
@. scripts/load-vault-lxc-root-password.sh; ./scripts/bootstrap-root-ssh-su-password.sh caddy
bootstrap-root-ssh: ## SSH as ladmin, su to root, install root key (usage: make bootstrap-root-ssh HOST=listmonk)
ifndef HOST
@echo "$(RED)Error: HOST parameter required$(RESET)"
@echo "Usage: make bootstrap-root-ssh HOST=listmonk"
@exit 1
endif
@chmod +x scripts/bootstrap-root-ssh.sh
@BOOTSTRAP_USER="$(BOOTSTRAP_USER)" TARGET_USER="$(TARGET_USER)" \
scripts/bootstrap-root-ssh.sh "$(HOST)"
bootstrap-root-ssh-services: ## Bootstrap root SSH via ladmin (caddy, listmonk, vikunja)
@chmod +x scripts/bootstrap-root-ssh.sh
@failed=0; ok=0; \
for host in caddy listmonk vikunja; do \
echo ""; echo "$(BLUE)==> $$host$(RESET)"; \
if BOOTSTRAP_USER="$(BOOTSTRAP_USER)" scripts/bootstrap-root-ssh.sh "$$host"; then \
ok=$$((ok+1)); \
else \
failed=$$((failed+1)); \
fi; \
done; \
echo ""; echo "$(GREEN)Done: $$ok succeeded$(RESET), $(RED)$$failed failed$(RESET)"; \
[ $$failed -eq 0 ]
mailcow-mailbox: ## Create Mailcow mailbox (usage: make mailcow-mailbox MAILBOX=alerts)
ifndef MAILBOX
@echo "$(RED)Error: MAILBOX required$(RESET)"
@echo "Usage: make mailcow-mailbox MAILBOX=alerts"
@echo "Define mailboxes in inventories/production/group_vars/all/mailcow.yml"
@exit 1
endif
@chmod +x scripts/run-mailcow-mailbox.sh
@MAILBOX="$(MAILBOX)" ./scripts/run-mailcow-mailbox.sh
mailcow-create-alerts: ## Alias for make mailcow-mailbox MAILBOX=alerts
@$(MAKE) --no-print-directory mailcow-mailbox MAILBOX=alerts
vault-pull-infra-secrets: ## Pull Umami/Mattermost from hosts → .env → vault (not vault→.env)
@chmod +x scripts/vault-pull-infra-secrets.sh scripts/vault-import-env.sh
@./scripts/vault-pull-infra-secrets.sh
vault-export-env: ## Write vault secrets into .env (keeps existing non-empty keys)
@chmod +x scripts/vault-export-env.sh
@./scripts/vault-export-env.sh "$(or $(ENV_FILE),.env)"
kuma-add-monitors: ## Add default Uptime Kuma monitors (needs KUMA_PASSWORD in .env)
@chmod +x scripts/kuma-add-monitors.sh
@./scripts/kuma-add-monitors.sh
beszel-setup-smtp: ## Configure Beszel SMTP (needs BESZEL_EMAIL, BESZEL_PASSWORD, SMTP_PASS)
@chmod +x scripts/beszel-setup-smtp.sh
@./scripts/beszel-setup-smtp.sh
beszel-setup-alerts: ## Enable Beszel alerts on all systems (needs BESZEL_EMAIL, BESZEL_PASSWORD)
@chmod +x scripts/beszel-setup-alerts.sh
@./scripts/beszel-setup-alerts.sh
caddy-monitoring: require-ansible ## stats.levkin.ca + status.levkin.ca on Caddy VM
@echo "Ensuring monitoring public proxies on caddy..."
$(ANSIBLE_PLAYBOOK) playbooks/caddy-monitoring-sites.yml $(ANSIBLE_ARGS)
vault-import-env: ## Merge .env secrets into Ansible vault (usage: make vault-import-env [ENV_FILE=.env])
@chmod +x scripts/vault-import-env.sh
@ENV_FILE="$(or $(ENV_FILE),.env)" scripts/vault-import-env.sh "$(or $(ENV_FILE),.env)"
bootstrap-root-ssh-failed: ## Bootstrap root SSH on hosts that failed direct root copy-ssh-keys
@chmod +x scripts/bootstrap-root-ssh.sh
@failed=0; ok=0; \
for host in caddy listmonk vikunja n8n qBittorrent actual caseware auto mailcow; do \
echo ""; echo "$(BLUE)==> $$host$(RESET)"; \
if BOOTSTRAP_USER="$(BOOTSTRAP_USER)" scripts/bootstrap-root-ssh.sh "$$host"; then \
ok=$$((ok+1)); \
else \
failed=$$((failed+1)); \
fi; \
done; \
echo ""; echo "$(GREEN)Done: $$ok succeeded$(RESET), $(RED)$$failed failed$(RESET)"; \
[ $$failed -eq 0 ]
create-vault: require-ansible ## Create encrypted vault file for secrets (passwords, auth keys, etc.)
@echo "$(YELLOW)Creating vault file for storing secrets...$(RESET)" @echo "$(YELLOW)Creating vault file for storing secrets...$(RESET)"
$(ANSIBLE_VAULT) create $(INVENTORY)/group_vars/all/vault.yml ansible-vault create group_vars/all/vault.yml
@echo "$(GREEN)✓ Vault file created. Add your secrets here (e.g. vault_tailscale_auth_key)$(RESET)" @echo "$(GREEN)✓ Vault file created. Add your secrets here (e.g. vault_tailscale_auth_key)$(RESET)"
create-vm: ## Create Ansible controller VM on Proxmox create-vm: ## Create Ansible controller VM on Proxmox

View File

@ -2,8 +2,7 @@
inventory = inventories/production inventory = inventories/production
roles_path = roles roles_path = roles
host_key_checking = False host_key_checking = False
stdout_callback = default stdout_callback = yaml
callback_result_format = yaml
bin_ansible_callbacks = True bin_ansible_callbacks = True
retry_files_enabled = False retry_files_enabled = False
gathering = smart gathering = smart

View File

@ -4,7 +4,6 @@
HOSTS_FILE="inventories/production/hosts" HOSTS_FILE="inventories/production/hosts"
TIMEOUT=3 TIMEOUT=3
CHANGED=false CHANGED=false
UNAME_S="$(uname -s)"
# Colors # Colors
GREEN='\033[0;32m' GREEN='\033[0;32m'
@ -19,12 +18,10 @@ echo "=================================================================="
# Function to test IP connectivity # Function to test IP connectivity
test_ip() { test_ip() {
local ip="$1" local ip="$1"
if [[ "$UNAME_S" == "Darwin" ]]; then if ping -c 1 -W "$TIMEOUT" "$ip" >/dev/null 2>&1; then
# macOS: -W is wait time in milliseconds return 0
ping -c 1 -W $((TIMEOUT * 1000)) "$ip" >/dev/null 2>&1
else else
# Linux: -W is timeout in seconds return 1
ping -c 1 -W "$TIMEOUT" "$ip" >/dev/null 2>&1
fi fi
} }
@ -34,7 +31,7 @@ test_ssh() {
local ip="$2" local ip="$2"
local user="$3" local user="$3"
if ssh -o ConnectTimeout=3 -o BatchMode=yes "$user@$ip" exit >/dev/null 2>&1; then if timeout 5 ssh -o ConnectTimeout=3 -o BatchMode=yes "$user@$ip" exit >/dev/null 2>&1; then
return 0 return 0
else else
return 1 return 1
@ -49,14 +46,11 @@ switch_to_fallback() {
echo -e " ${YELLOW}→ Switching $hostname to fallback IP: $fallback_ip${NC}" echo -e " ${YELLOW}→ Switching $hostname to fallback IP: $fallback_ip${NC}"
# Use sed to replace the primary IP with fallback IP (BSD/GNU compatible) # Use sed to replace the primary IP with fallback IP
if [[ "$UNAME_S" == "Darwin" ]]; then sed -i "s/$hostname ansible_host=$primary_ip/$hostname ansible_host=$fallback_ip/" "$HOSTS_FILE"
sed -i '' "s/$hostname ansible_host=$primary_ip/$hostname ansible_host=$fallback_ip/" "$HOSTS_FILE"
sed -i '' "s/ ansible_host_fallback=$fallback_ip//" "$HOSTS_FILE" # Remove the fallback attribute since we're now using it as primary
else sed -i "s/ ansible_host_fallback=$fallback_ip//" "$HOSTS_FILE"
sed -i "s/$hostname ansible_host=$primary_ip/$hostname ansible_host=$fallback_ip/" "$HOSTS_FILE"
sed -i "s/ ansible_host_fallback=$fallback_ip//" "$HOSTS_FILE"
fi
CHANGED=true CHANGED=true
} }
@ -72,10 +66,9 @@ while IFS= read -r line; do
# Parse host entry # Parse host entry
if [[ "$line" =~ ansible_host= ]]; then if [[ "$line" =~ ansible_host= ]]; then
hostname=$(echo "$line" | awk '{print $1}') hostname=$(echo "$line" | awk '{print $1}')
primary_ip=$(echo "$line" | sed -n 's/.*ansible_host=\([^[:space:]]*\).*/\1/p') primary_ip=$(echo "$line" | grep -oP 'ansible_host=\K[^\s]+')
fallback_ip=$(echo "$line" | sed -n 's/.*ansible_host_fallback=\([^[:space:]]*\).*/\1/p') fallback_ip=$(echo "$line" | grep -oP 'ansible_host_fallback=\K[^\s]+' || echo "")
user=$(echo "$line" | sed -n 's/.*ansible_user=\([^[:space:]]*\).*/\1/p') user=$(echo "$line" | grep -oP 'ansible_user=\K[^\s]+' || echo "root")
[[ -z "$user" ]] && user="root"
echo -n "Testing $hostname ($primary_ip)... " echo -n "Testing $hostname ($primary_ip)... "

View File

@ -1,60 +0,0 @@
# Encrypted secrets in this project
Ansible Vault is the standard way to store and share secrets with this repo. Plain `.env` files are gitignored and meant only as a **temporary** import path on your machine.
## Recommended workflow
1. **Never commit** `.env`, API keys, or passwords.
2. Store secrets in `inventories/production/group_vars/all/vault.yml` (encrypted).
3. Edit with `make edit-group-vault` (uses `~/.ansible-vault-pass` on your workstation).
4. Teammates need the same vault password file out-of-band (password manager, not git).
## One-time import from `.env`
```bash
cp .env.example .env
# fill MAILCOW_API_KEY, ALERTS_PASSWORD, etc.
make vault-import-env
rm .env # optional after import
```
`make vault-import-env` merges supported keys into the vault and re-encrypts the file.
## Mailcow mailboxes (dynamic)
| File | Purpose |
|------|---------|
| `group_vars/all/mailcow.yml` | Mailbox names, local parts, quotas (no secrets) |
| `vault.yml` | `vault_mailcow_api_key`, `vault_mailcow_mailbox_passwords` |
```bash
make mailcow-mailbox MAILBOX=alerts
```
Add a new mailbox:
1. In `mailcow.yml` under `mailcow_mailboxes:` add e.g. `notify: { local_part: notify, name: Notify, quota: 512, vault_password_key: notify }`
2. In vault: `vault_mailcow_mailbox_passwords.notify: "..."` (via `make edit-group-vault`)
3. `make mailcow-mailbox MAILBOX=notify`
## Can `.env` itself be encrypted?
Yes, but Ansible projects usually skip that pattern:
| Approach | Use when |
|----------|----------|
| **Ansible Vault** (`vault.yml`) | Default for this repo — works with playbooks and `make` targets |
| **`ansible-vault encrypt .env`** | Produces `.env` vault blob; you must `ansible-vault view .env` or decrypt to a temp file before tools read it — awkward for shell scripts |
| **Password manager / 1Password CLI** | Personal machine only, not for CI/ansible runs |
| **SOPS / Mozilla SOPS** | Teams that want encrypted YAML/JSON in git with KMS/PGP — heavier setup |
**Sharing encrypted secrets with others:** share the **vault password** (or per-host vault pass) securely once; they clone the repo and use the same encrypted `vault.yml`. Do not email `.env` files.
## Encrypting a single value (without opening the whole file)
```bash
ansible-vault encrypt_string 'secret-value' --name 'vault_my_secret' \
--vault-password-file ~/.ansible-vault-pass
```
Paste the output into `vault.yml` inside the encrypted file, or into a vars file that is entirely vault-encrypted.

View File

@ -1,56 +0,0 @@
# Cal.com → Authentik OIDC
**Status: deferred** — Cal.com self-hosted SSO is a **commercial (enterprise) feature**. Without a valid `CALCOM_LICENSE_KEY`, the UI at `/settings/security/sso` stays locked (*Contact sales*).
See **[sso-selfhosted-matrix.md](sso-selfhosted-matrix.md)** for Phase 4 apps that do not need a Cal-style license.
## Current state (2026-05-23)
| Item | Status |
|------|--------|
| `calsaml` Postgres DB | ✅ Created |
| `SAML_DATABASE_URL`, `SAML_ADMINS` in `/opt/cal/.env` | ✅ Set |
| `docker-compose` passes license + SAML env | ✅ |
| Authentik app `cal-com` + provider `cal-com-oidc` | ✅ (ready when license exists) |
| `CALCOM_LICENSE_KEY` in `.env` | ❌ **Empty** — SSO UI blocked |
| Cal UI OIDC configuration | ⏳ **Blocked** until license |
## When you have a license
1. Add to `/opt/cal/.env`:
```bash
CALCOM_LICENSE_KEY=<key-from-cal.com>
NEXT_PUBLIC_LICENSE_CONSENT=agree
```
2. Restart: `ssh cal``cd /opt/cal && docker compose up -d`
3. Confirm in container: `docker exec calcom printenv CALCOM_LICENSE_KEY` (non-empty)
4. Log in as **`idobkin@gmail.com`** → **https://cal.levkin.ca/settings/security/sso**
5. Configure OIDC:
| Field | Value |
|-------|--------|
| Client ID | `cal-com` |
| Client Secret | from Authentik → Applications → Cal.com |
| Well Known URL | `https://auth.levkin.ca/application/o/cal-com/.well-known/openid-configuration` |
Test SSO; keep local Cal password as break-glass.
## Ansible (infra only)
```bash
make cal-oidc # SAML DB + Authentik provider (safe to re-run)
make cal-oidc-check
```
Vault (optional): `vault_cal_oidc_client_secret` — see `vault.example.yml`.
## Redirect URI (Authentik)
```text
https://cal.levkin.ca/api/auth/oidc
```
## Related
- [sso-selfhosted-matrix.md](sso-selfhosted-matrix.md)
- [levkin-selfhost-plan-2.md](levkin-selfhost-plan-2.md)

View File

@ -1,67 +0,0 @@
# CI runners and Ansible control hosts
**LAN:** `10.0.10.0/24` · **Inventory group:** `[qa]` and `[ansible]`
These hosts are **not** Proxmox guests on pve10; they live on pve201 or as standalone VMs. Use this doc with [host-list.md](host-list.md) and [unifi-static-dhcp.md](unifi-static-dhcp.md).
---
## Summary
| Inventory | IP | User | Role | Proxmox | Notes |
|-----------|-----|------|------|---------|-------|
| **git-ci-01** | `10.0.10.223` | `ladmin` | Gitea Actions (`act_runner`) | pve201 VM **115** | 2 cores, 4 GB RAM, 64 GB disk |
| **sonarqube-01** | `10.0.10.54` | `ladmin` | SonarQube analysis | pve201 (verify VMID) | QA static analysis |
| **ansibleVM** | `10.0.10.157` | `master` | Ansible control / automation | pve201 (verify VMID) | `become` via sudo; vault secrets in group_vars |
---
## git-ci-01 — Gitea Actions runner
- **Host vars:** `inventories/production/host_vars/git-ci-01.yml`
- **Runner config:** `/etc/act_runner/config.yaml` on the guest
- **Capacity:** 2 concurrent jobs (`git_ci_runner_capacity: 2`)
- **Maintenance:** weekly docker prune via `maintenance_cron` role
**When pve201 is tight:** consider a second runner LXC on pve10 after Nextcloud/Portainer retire (see plan-2 capacity table).
```bash
make ping HOST=git-ci-01
ssh ladmin@10.0.10.223
```
---
## sonarqube-01 — code quality
- **Inventory:** `[qa]` group
- **Login:** `ssh ladmin@10.0.10.54` (key only after hardening)
```bash
make ping HOST=sonarqube-01
```
Pin **MAC → `10.0.10.54`** in UniFi if DHCP drift is observed ([unifi-static-dhcp.md](unifi-static-dhcp.md)).
---
## ansibleVM — control node
- **Host vars:** `inventories/production/host_vars/ansibleVM.yml`
- **Secrets:** `vault_ansiblevm_become_password` in vault
- **Purpose:** run playbooks from the LAN when not using your Mac
```bash
make ping HOST=ansibleVM
ssh master@10.0.10.157
```
On your Mac, the repo at `~/Documents/code/ansible` with `~/.ansible-vault-pass` remains the primary control path (`make apply`, etc.).
---
## Related
- [host-list.md](host-list.md) — Proxmox guest IPs/MACs
- [security-remediation-plan.md](security-remediation-plan.md) — SSH keys on QA hosts
- [levkin-selfhost-plan-2.md](levkin-selfhost-plan-2.md) — dev-apps / runner migration backlog

View File

@ -1,81 +0,0 @@
# Cursor MCP servers for this homelab / Ansible repo
**Global config:** `~/.cursor/mcp.json` (all projects)
**Project config:** `.cursor/mcp.json` (this repo only — optional override)
After editing, **restart Cursor** or use **Settings → MCP → Refresh**.
---
## Installed (global)
| MCP | Purpose | When the agent uses it |
|-----|---------|-------------------------|
| **playwright** | Browser automation (login flows, SSO smoke tests, UI screenshots) | Verify `auth.levkin.ca` → app OIDC; Kuma/Beszel/Listmonk admin clicks |
| **hermes** | Telegram/Discord/Slack/WhatsApp/Signal/Matrix via Hermes VM **117** | Notify you on deploy finish, alert failures, ask approval from phone |
### Playwright notes
- Official package: `@playwright/mcp@latest` via `npx`
- Homelab origins restricted in `args` (`*.levkin.ca`, `10.0.10.*`)
- For saved login state: add `--storage-state=~/path/auth-state.json` after manual login once
- Headless (no window): add `--headless` to `args`
**If MCP shows “errored”:** Settings → MCP → playwright → view log; restart Cursor.
**Browsers for `@playwright/mcp`:** the MCP package downloads Chromium on first run. You do **not** need `npm install @playwright/test` in this Ansible repo.
Optional local install (only if you run Playwright scripts in-repo):
```bash
npm install -D @playwright/test
npx playwright install chromium
```
### Hermes notes
- Runs over SSH to `ladmin@10.0.10.36` — requires VPN/LAN or Tailscale to Hermes VM
- Complements Ansible (infra) with **human notifications**, not provisioning
---
## Recommended additions (not installed yet)
| MCP | Why for Ansible / homelab | Install hint |
|-----|---------------------------|--------------|
| **GitHub** (`gh` / official) | PRs, CI failures, issue links from chat | Cursor MCP directory → GitHub |
| **Gitea** (custom or HTTP) | Your `git.levkin.ca` — same as GitHub MCP pattern | Community server or REST via script |
| **Filesystem** (built-in) | Already available in agent mode | — |
| **Postgres** | Query listmonk/cal DBs for debugging | `@modelcontextprotocol/server-postgres` + DSN in env |
| **Docker** | Inspect containers on monitoring/identity LXCs | SSH + `docker` often enough; MCP optional |
| **Grafana/Prometheus** | If you add observability later | Official or community MCP |
| **UniFi** | DHCP/client status without opening UI | Community UniFi MCP + `UNIFI_API_KEY` in env |
| **Proxmox** | VM/LXC state from chat | Community proxmox MCP or keep using `make` + SSH |
**Lower priority:** Notion, Linear, Sentry — only if you adopt those tools.
---
## What MCP does *not* replace
| Task | Use instead |
|------|-------------|
| Provision LXCs/VMs | Ansible playbooks + `make` |
| Secrets | Ansible Vault (`make edit-group-vault`) |
| Authentik providers/apps | Authentik API token or blueprints (`roles/cal_sso`) |
| Repeatable SSO | API/blueprints > Playwright (Playwright = verify UI) |
---
## Security
- Do **not** put vault passwords or API tokens in `mcp.json` unless the server supports env vars and you use OS keychain
- Rotate tokens if pasted in chat
- Hermes SSH key: same trust as any admin SSH to homelab
---
## Related
- [handoff-2026-05-24.md](handoff-2026-05-24.md)
- [sso-selfhosted-matrix.md](sso-selfhosted-matrix.md)

View File

@ -1,122 +0,0 @@
# Homelab sprint handoff — 2026-05-24 (archive)
**Branch:** `homelab/post-sprint-2026-05-24` (merge to `master`)
**Next work:** [handoff-next-steps.md](handoff-next-steps.md)
**Master plan:** [levkin-selfhost-plan-2.md](levkin-selfhost-plan-2.md) · **MCPs:** [cursor-mcp-homelab.md](cursor-mcp-homelab.md)
## Phases 0 + 1 — complete ✅
| Phase | Done |
|-------|------|
| **0** | LXCs static, UniFi VM DHCP, DNS apex + auth, identity LXC provisioned |
| **1** | Authentik @ `.21`, `auth.levkin.ca`, admin + TOTP, Caddy passthrough |
---
## Done (P0 sprint)
### Monitoring (LXC 218 @ `10.0.10.22`)
| Item | State |
|------|--------|
| **Kuma** | 17 HTTP monitors, all email-linked; admin password in vault |
| **Kuma status page** | https://status.levkin.ca — slug `homelab`, 17 monitors |
| **Umami** | https://stats.levkin.ca → `:3000` (Caddy + DNS) |
| **Beszel** | 16 agents **up**; alerts Status/CPU/RAM/disk on all systems |
| **SMTP** | `alerts@levkine.ca` — Kuma + Beszel; `listmonk@` — user UI + vault |
### Caddy (VM 106 @ `10.0.10.50`)
- `stats.levkin.ca``10.0.10.22:3000`
- `status.levkin.ca``10.0.10.22:3001`
- Playbook: `playbooks/caddy-monitoring-sites.yml` · `make caddy-monitoring`
### Phase 0 — Foundation ✅
- All pve10 **LXCs** static via `pct set` (210, 215221)
- **VM 106** Caddy static in-guest `.50`
- **UniFi DHCP** reservations applied for homelab VMs (API; key in vault)
- **DNS** `auth.levkin.ca` + `levkin.ca` apex → home IP
- **Identity LXC 217** @ `.21` provisioned (Phase 1 infra)
Caddy still on **VM 106** — edge LXC → **Phase 2 backlog** ([handoff-next-steps.md](handoff-next-steps.md)).
### Infra / migrations
- Nextcloud VM 201 retired; listmonk → LXC 221; pve201 VM 113 destroyed
- Vikunja OIDC verified (browser login as `ilia`)
- Listmonk upgraded **v2.4 → v6.1.0** + Authentik OIDC — [listmonk-authentik-oidc.md](listmonk-authentik-oidc.md)
- Gitea deploy key levkin LXC 220 — added + tested
- **DebianDesktop VM 100** (pve201) — rebooted; 24 GB RAM active
- UniFi API key in vault; DHCP fixed IPs applied for homelab VMs
### Vault keys added/updated
- `vault_uptime_kuma_password`, `vault_unifi_url`, `vault_unifi_api_key`, `vault_unifi_site`
- Export: `make vault-export-env`
---
## Capacity snapshot (2026-05-24)
| Node | RAM available | Disk (local-lvm) | Notes |
|------|---------------|------------------|--------|
| **pve10** | ~22 GiB / 62 GiB | ~1.30 TiB free (~22% used) | Primary for new LXCs |
| **pve201** | ~19 GiB / 125 GiB | ~922 GiB free (~46% used) | Do not add services |
---
## Hostname map (monitoring)
| DNS | Service | Backend | Public? |
|-----|---------|---------|---------|
| `stats.levkin.ca` | Umami | `:3000` | Yes (tracker + admin reachable) |
| `status.levkin.ca` | Kuma status page | `:3001` | Yes (status only via Kuma domain binding) |
| *(none)* | Beszel | `:8090` | **LAN/Tailscale only** |
| *(none)* | Kuma admin | `:3001` | LAN only — do not expose `/dashboard` |
| *(none)* | Dockge | `:5001` | LAN only |
**Hermes ≠ Mattermost:** VM **117** @ `.36` = Hermes agent; Mattermost = VM **107** @ `slack.levkin.ca`.
---
## Sprint also delivered (Phases 24)
- Monitoring stack, Beszel alerts, SSO (Vikunja, Listmonk, Mattermost, Mailcow)
**After merge:** see [handoff-next-steps.md](handoff-next-steps.md).
---
## Key commands
```bash
make vault-export-env
make caddy-monitoring # stats + status on Caddy
./scripts/kuma-add-monitors.sh # needs KUMA_PASSWORD in .env
./scripts/beszel-setup-smtp.sh # Beszel SMTP via alerts@
./scripts/beszel-setup-alerts.sh # Status + CPU/RAM/disk on all systems
ssh root@10.0.10.22 # monitoring LXC
ssh root@10.0.10.50 # Caddy VM
```
## Key files
- `playbooks/caddy-monitoring-sites.yml`
- `scripts/kuma-add-monitors.sh`, `scripts/beszel-setup-smtp.sh`, `scripts/beszel-setup-alerts.sh`
- `docs/guides/monitoring-stack.md`, `levkin-selfhost-plan-2.md`
- `inventories/production/group_vars/all/vault.yml` (encrypted)
## Beszel agent install quirk
Installer hangs on “Enable automatic daily updates?” — use `printf n | install.sh`. SSH sessions may exit 255 after success; verify with `systemctl is-active beszel-agent`.
## UniFi
- API: `https://192.168.2.1/proxy/network/api` + header `X-API-KEY`
- Network for homelab: **Administration** `10.0.10.0/24`
- n8n MAC in live UniFi: `bc:24:11:c9:f7:48` @ `.154` (inventory `61:de:7a` is a different guest @ `.35`)
---
*Last updated: 2026-05-24*

View File

@ -1,66 +0,0 @@
# Handoff — next steps (after post-sprint merge)
**Merged from:** `homelab/post-sprint-2026-05-24``master`
**Sprint snapshot:** [handoff-2026-05-24.md](handoff-2026-05-24.md)
**Master plan:** [levkin-selfhost-plan-2.md](levkin-selfhost-plan-2.md)
---
## Phases complete
| Phase | Status |
|-------|--------|
| **0 Foundation** | ✅ Static IPs, DNS, UniFi DHCP, Caddy VM `.50` |
| **1 Identity** | ✅ Authentik LXC 217, `auth.levkin.ca`, admin + TOTP |
| **2 Monitoring** | ✅ (sprint) Kuma, Umami, Beszel, Dockge, `status`/`stats` |
| **3 Cal.com** | ✅ booking live; OIDC deferred (license) |
| **4 SSO** | ✅ Vikunja, Listmonk, Mattermost, Mailcow — **smoke-test in browser** |
**Not Phase 0/1:** Caddy → edge LXC `.20` moved to **Phase 2 backlog** (was Phase 1.5).
---
## Immediate (this week)
1. **SSO smoke tests** (Playwright MCP or manual) as `ilia`:
- https://todo.levkin.ca — Authentik
- https://listmonk.levkin.ca/admin — Authentik
- https://slack.levkin.ca — “GitLab” / Authentik button
- https://mail.levkine.ca — Generic-OIDC
2. **Rotate secrets** — Authentik API token, Beszel admin, OIDC client secrets (batch when stable)
3. **Mattermost users** — existing accounts: Profile → Switch to GitLab SSO
---
## Phase 2 backlog (infra + ops)
| Priority | Item | Effort |
|----------|------|--------|
| 1 | **Caddy → edge LXC** @ `10.0.10.20` | ~30 min + 24h watch |
| 2 | **Security remediation** — [security-remediation-plan.md](security-remediation-plan.md) | ongoing |
| 3 | **NAS disk** `W4J0L3PY` → Jellyfin VM 101 | hardware |
| 4 | **Cal OIDC** | blocked on `CALCOM_LICENSE_KEY` |
| 5 | **Phases 58** — Immich, Crater, Outline, etc. | when needed |
---
## Useful commands
```bash
make vault-export-env
make caddy-monitoring
make beszel-setup-alerts # BESZEL_EMAIL + BESZEL_PASSWORD
./scripts/kuma-add-monitors.sh
ssh root@10.0.10.237 # Mattermost (root key installed)
```
## Docs added this sprint
- [listmonk-authentik-oidc.md](listmonk-authentik-oidc.md)
- [mattermost-authentik-gitlab-oauth.md](mattermost-authentik-gitlab-oauth.md)
- [mailcow-authentik-oidc.md](mailcow-authentik-oidc.md)
- [cursor-mcp-homelab.md](cursor-mcp-homelab.md)
---
*2026-05-24*

View File

@ -1,157 +0,0 @@
# Host list — Proxmox guests (source of truth)
**Node:** PVENAS (`pve10` @ `10.0.10.10`)
**Audited:** 2026-05-24 (Phase 0 complete — LXCs static + UniFi VM DHCP)
**LAN:** `10.0.10.0/24`, gateway `10.0.10.1`
Update this file whenever a guest is created, migrated, or re-IPd. See [levkin-selfhost-plan-2.md](levkin-selfhost-plan-2.md) for IP range policy.
---
## IP range plan (10.0.10.0/24)
| Range | Reserved for |
|-------|----------------|
| `.1.9` | Network gear |
| `.10.19` | Proxmox host(s) + PBS |
| `.20.39` | Edge / identity / comms |
| `.40.79` | Application LXCs / VMs |
| `.80.99` | Media VMs |
| `.100.199` | DHCP pool (clients) |
| `.200.249` | Labs / heavy VMs |
| `.250.254` | Reserved |
**Rollout reservations (free):** `.20` edge LXC
---
## Proxmox host
| VMID | Name | Role | Current IP | Target static IP | DHCP/Static | Notes |
|------|------|------|------------|------------------|-------------|-------|
| — | **pve10** | Proxmox (PVENAS) | `10.0.10.10/24` | `.10` | Static | This node |
---
## LXCs (pve10)
| VMID | Name | Plan group | Current IP | Target static IP | DHCP/Static | MAC | Notes |
|------|------|------------|------------|------------------|-------------|-----|-------|
| 210 | cal | business | `10.0.10.228/24` | `10.0.10.228/24` | ✅ **Static** | `BC:24:11:DD:F8:7C` | Cal.com — `pct set` applied; in Ansible `hosts` |
| 215 | caseware | **marketing site** | `10.0.10.105/24` | `10.0.10.105/24` | ✅ **Static** | `BC:24:11:72:04:53` | Static HTML `/var/www/caseware``caseware.levkin.ca` |
| 216 | auto | **marketing site** | `10.0.10.59/24` | `10.0.10.59/24` | ✅ **Static** | `BC:24:11:43:F0:86` | Static HTML `/var/www/auto``auto.levkin.ca` |
| 219 | portfolio | **marketing site** | `10.0.10.106/24` | `10.0.10.106/24` | ✅ **Static** | `BC:24:11:DF:94:32` | Static HTML `/var/www/portfolio``iliadobkin.com` (migrated from pve201 LXC 306) |
| 220 | levkin | **marketing site** | `10.0.10.60/24` | `10.0.10.60/24` | ✅ **Static** | `BC:24:11:C6:B2:E4` | Vite `www/``levkin.ca` (spec), `levkin.ca/folders` (stack) — [site-lxc-git.md](site-lxc-git.md) |
| 217 | identity | identity | `10.0.10.21/24` | `10.0.10.21/24` | ✅ **Static** | `BC:24:11:3C:85:45` | Authentik + Postgres + Redis; `auth.levkin.ca` via Caddy |
| 218 | monitoring | monitoring | `10.0.10.22/24` | `10.0.10.22/24` | ✅ **Static** | `BC:24:11:54:43:13` | Kuma `:3001`, Dockge `:5001`, Umami `:3000`, Beszel `:8090` (LAN) — [monitoring-stack.md](monitoring-stack.md) |
| 221 | listmonk | productivity | `10.0.10.148/24` | `10.0.10.148/24` | ✅ **Static** | `BC:24:11:18:0C:62` | Migrated from pve201 VM **113** 2026-05-23; Postgres 17 + native binary |
**pve201 (not pve10):** LXC **305** `kuma-debian` @ `10.0.10.197`**stopped 2026-05-22** (replaced by monitoring LXC 218). `onboot` disabled. LXC **306** `portfolio`**destroyed/purged 2026-05-22** (now pve10 LXC **219** @ `10.0.10.106`).
---
## VMs (pve10)
| VMID | Name | Plan group | Current IP | Target static IP | DHCP/Static | MAC | Notes |
|------|------|------------|------------|------------------|-------------|-----|-------|
| 100 | homepage-debian | — | — | — | — | — | **Stopped** |
| 101 | Jellyfin | media | `10.0.10.232` | `10.0.10.232/24` | ⏳ DHCP? | `BC:24:11:29:B8:84` | **Stopped** (turned off 2026-05-22); inventory `jellyfin` |
| 102 | gitea-alpine | — | `10.0.10.169/24` | `10.0.10.169/24` | ⏳ stable DHCP | `BC:24:11:E9:BD:E5` | Pin in-guest or router reservation |
| 103 | WRA | — | `10.0.10.154/24` | `10.0.10.154/24` | ⏳ stable DHCP | `BC:24:11:61:DE:7A` | Inventory `n8n`; pin when automating |
| 104 | vaultwarden-debian | identity | `10.0.10.142/24` | `10.0.10.142/24` | ⏳ stable DHCP | `BC:24:11:58:DB:DC` | Inventory `vaultwardenVM` |
| 105 | TrueNAS | — | `10.0.10.107/24` | `10.0.10.107/24` | ⏳ stable DHCP | `BC:24:11:14:DE:B5` | NAS UI; pool `NAS.SP00` degraded |
| 106 | caddy-debian | **edge** | `10.0.10.50/24` | `10.0.10.50/24`**`.20`** (Phase 1.5) | ✅ **Static** (in-guest) | `BC:24:11:E0:49:B4` | `/etc/network/interfaces` static; Ansible `caddy` |
| 107 | mattermost-ubuntu | comms | `10.0.10.237/24` | `10.0.10.237/24` | ⏳ router DHCP | `BC:24:11:66:6E:01` | `slack.levkin.ca` → Caddy → `:8065` |
| 108 | actual-debian | business | `10.0.10.158/24` | `10.0.10.158/24` | ⏳ stable DHCP | `BC:24:11:10:7B:64` | Inventory `actual` |
| 109 | portainer-alpine | — | — | — | ✅ **Removed** | `BC:24:11:0F:40:4F` | Destroyed 2026-05-23; Dockge on monitoring LXC 218 |
| 150 | pihole00-debian | — | link-local* | TBD | ⏳ | `BC:24:11:86:76:97` | Running |
| 117 | hermes | services | `10.0.10.36/24` | `10.0.10.36/24` | ⏳ stable DHCP | `BC:24:11:51:1E:99` | On pve10; guest agent; inventory `hermes` |
| 200 | PVE.BU.SVR | labs | `10.0.10.200/24` | `10.0.10.200/24` | ⏳ stable DHCP | `BC:24:11:DA:95:3B` | Running |
| 201 | NextcloudAIO-debian | (decommission) | `10.0.10.24/24` | — | 🗑️ **Stopped** | `BC:24:11:14:D4:DE` | Retired 2026-05-23 — Caddy removed, `onboot 0`, ~8 GiB RAM freed |
| 300 | pihole-debian | — | — | — | — | — | **Stopped** |
\* ARP showed IPv6 link-local only at audit time — confirm IPv4 inside guest or install QEMU guest agent.
---
## Inventory cross-reference (Ansible `hosts`)
| Inventory name | IP in hosts | pve10 guest | Match |
|----------------|-------------|-------------|-------|
| caddy | `10.0.10.50` | VM 106 | ✅ |
| cal | `10.0.10.228` | LXC 210 | ✅ |
| caseware | `10.0.10.105` | LXC 215 | ✅ |
| auto | `10.0.10.59` | LXC 216 | ✅ |
| portfolio | `10.0.10.106` | LXC 219 | ✅ |
| levkin | `10.0.10.60` | LXC 220 | ✅ |
| identity | `10.0.10.21` | LXC 217 | ✅ |
| monitoring | `10.0.10.22` | LXC 218 | ✅ |
| vaultwardenVM | `10.0.10.142` | VM 104 | ✅ |
| giteaVM | `10.0.10.169` | VM 102 | ✅ |
| n8n | `10.0.10.154` | VM 103? | ⚠️ verify (WRA vs n8n) |
| listmonk | `10.0.10.148` | LXC **221** | ✅ migrated from pve201 VM 113 |
| mailcow | `10.0.10.132` | pve201 VM 106 | ✅ `[comms]` |
| hermes | `10.0.10.36` | VM 117 | ✅ on pve10 |
| jellyfin | `10.0.10.232` | VM 101 | ✅ (stopped until NAS healthy) |
| nextcloud | `10.0.10.24` | VM 201 | stopped / retired (commented in inventory) |
| portainerVM | — | VM 109 | removed (Dockge on monitoring) |
---
## Static IP conversion queue (pve10)
Priority order (plan-2):
1. ✅ **LXC 210** — done (`10.0.10.228/24`)
2. ✅ **LXC 215, 216** — pinned (`.105`, `.59`)
3. ✅ **LXC 217** (identity) — `10.0.10.21/24`, Authentik deployed
4. ✅ **VM 106** (caddy) — static in-guest `.50`
5. ✅ **LXC 218** (monitoring) — `.22`, Kuma/Dockge/Umami
6. ✅ **VMs** — UniFi DHCP reservations applied 2026-05-24 — [vm-static-ip-router-reservations.md](vm-static-ip-router-reservations.md); skip **201** (retired)
7. **New:** edge LXC @ **`.20`** (Phase 1.5)
Example:
```bash
# On pve10 (PVENAS)
pct set 215 -net0 name=eth0,bridge=vmbr0,ip=10.0.10.105/24,gw=10.0.10.1
pct set 216 -net0 name=eth0,bridge=vmbr0,ip=10.0.10.59/24,gw=10.0.10.1
```
---
## NAS / storage note
- ZFS pool **`NAS.SP00`** on this node: **DEGRADED** (disk `W4J0L3PY` failed). See [nas-sp00-drive-failure-report.md](nas-sp00-drive-failure-report.md), [nas-sp00-smart-audit-2026-05-21.md](nas-sp00-smart-audit-2026-05-21.md).
- VM **201** root disk on NAS — avoid heavy I/O until pool is healthy.
---
## QA / control (not pve10 LXCs)
See [ci-runners-and-control.md](ci-runners-and-control.md).
| Inventory | IP | Proxmox | Notes |
|-----------|-----|---------|-------|
| git-ci-01 | `10.0.10.223` | pve201 VM 115 | Gitea Actions runner |
| sonarqube-01 | `10.0.10.54` | pve201 | SonarQube |
| ansibleVM | `10.0.10.157` | pve201 | Ansible control (`master`) |
---
## Audit checklist
- [x] `pct list` / `qm list` on pve10
- [x] ARP / ping for running guests
- [ ] `pct exec` / guest agent for VMs missing IPv4
- [x] Initial `host-list.md` created
- [x] Pin 215/216 static
- [x] Identity LXC 217 @ `.21` (Authentik Phase 1 infra)
- [x] Monitoring LXC 218 @ `.22`
- [x] Caddy VM 106 static `.50`
- [x] LXC backups `backup-20260522` on 217, 218
- [x] Router DHCP reservations for VMs — UniFi API 2026-05-24
- [x] Retire VM 201 (Nextcloud) — stopped 2026-05-23
- [x] Listmonk → pve10 LXC 221 @ `.148` (static via `pct set`; no UniFi lease needed)
- [x] Phase 0 complete — all critical guests pinned
- [ ] Re-run after NAS disk replace

View File

@ -1,444 +0,0 @@
# Levkin self-hosted stack — plan & decisions
Reference doc for the Proxmox homelab. Lives alongside the Cursor project that has the Proxmox info.
**Conventions:**
- All groups run inside an LXC unless marked **VM**.
- Inside each LXC: one `docker-compose.yml`, managed by **Dockge** where applicable.
- Caddy on the `edge` LXC is the only thing exposed to the internet.
- Authentik on the `identity` LXC is the source of truth for who you are.
- Vaultwarden stays standalone (it's the break-glass path if Authentik dies).
---
## Progress summary (updated 2026-05-24)
| Area | Status |
|------|--------|
| **Phase 0** Foundation | ✅ **Done** — pve10 LXCs static; UniFi VM DHCP reservations; auth + apex DNS; Caddy on **VM 106** @ `.50` (edge LXC = Phase 1.5) |
| **Phase 1** Identity (Authentik) | ✅ LXC **217** @ `10.0.10.21` — admin + TOTP |
| **Phase 2** Monitoring | ✅ LXC **218** — Kuma (17 monitors), Dockge, Umami, Beszel (16 agents), SMTP |
| **Phase 3** Cal.com | ✅ LXC **210** — booking + auto consult button; **OIDC deferred** (no enterprise license) |
| **Phase 4** SSO | ✅ Vikunja, Listmonk, Mattermost, Mailcow — browser smoke tests remaining |
| **Phase 58** | ⏳ Immich, Crater, Outline, automation depth — after P0 backlog |
| **Comms health** | ✅ Mailcow + Listmonk restored 2026-05-23 — [mailcow-lan-proxy-fix.md](mailcow-lan-proxy-fix.md) |
| **Site consolidation** | ⏳ **Partial** — git LXCs + levkin.ca LXC 220; optional later: static on Caddy VM |
| **dev-apps** | ⏳ punimTag **9101** on pve201 until testing done |
| **Nextcloud retire** | ✅ VM **201** stopped, `onboot 0`, Caddy removed (~8 GiB RAM freed) |
| **Portainer retire** | ✅ VM **109** destroyed 2026-05-23 (~16 GiB on pve10) |
| **Security pass** | 🟡 Partial — SSH keys + apt + cron 2026-05-23 — [security-remediation-plan.md](security-remediation-plan.md) |
---
## Capacity headroom (live check 2026-05-24)
Use this before adding LXCs/VMs. Re-check with `pvesm status` and `free -h` on each node.
### pve10 (PVENAS) — **primary place for new homelab services**
| Resource | Total | Used | **Available** | Notes |
|----------|-------|------|---------------|--------|
| **local-lvm** (thin) | ~1.67 TiB | ~22% | **~1.30 TiB** | New guests on **local-lvm** only (NAS SP00 degraded) |
| **RAM** (host) | 62 GiB | ~40 GiB | **~22 GiB** | Portainer **109** + Nextcloud **201** freed |
**Running:** LXCs 210, 215221; VMs 102108, 117, 150, 200. **Stopped:** 101 Jellyfin, 201 Nextcloud.
**Headroom:** ~**20+ GiB RAM** for Immich, Crater, or dev-apps LXC.
**Still available to free:**
| Stop / retire | Frees (maxmem) |
|---------------|----------------|
| ~~Portainer VM **109**~~ | ✅ **16 GiB** freed |
| ~~Nextcloud VM **201**~~ | ✅ **8 GiB** freed |
| Hermes VM **117** (if not needed) | **16 GiB** |
| Site LXCs 215/216 → Caddy static (optional) | **~1 GiB** |
### pve201 (pve) — **do not add new homelab services**
| Resource | Total | Used | **Available** | Notes |
|----------|-------|------|---------------|--------|
| **local-lvm** | ~1.67 TiB | ~46% | **~922 GiB** | Disk OK |
| **RAM** | 125 GiB | ~105 GiB | **~19 GiB** | GPU **104** (64 GB), DebianDesktop **100** (24 GB ✅ rebooted), punim **9101** (16 GB) |
**Verdict:** New stacks on **pve10** only. pve201: stop/migrate punim after testing.
---
## Current state (May 2026)
**Already running:**
- Caddy reverse proxy — currently on a **VM** (should migrate to LXC, see "Caddy migration" section)
- Mailcow — VM, mail domain is `levkine.ca` (with e)
- Vaultwarden, Vikunja, n8n, Listmonk, Mattermost — across various LXCs/VMs
- **Cal.com** — LXC id `210`, `cal.levkin.ca`, Postgres included, admin user `ilia`, 15-min consult event live at `cal.levkin.ca/ilia/consult` with Jitsi link
- Caddy entries live for: `levkin.ca`, `caseware.levkin.ca`, `auto.levkin.ca`, `iliadobkin.com`, `cal.levkin.ca`, `listmonk.levkin.ca`, `pdf.levkin.ca`, `search.levkin.ca`, `auth.levkin.ca`, `stats.levkin.ca`, **`status.levkin.ca`**
- **Authentik** — LXC **217** @ `10.0.10.21`, `https://auth.levkin.ca`, admin + TOTP enrolled
- **Monitoring** — LXC **218** @ `10.0.10.22`: Uptime Kuma `:3001`, Dockge `:5001`, Umami `:3000` (LAN-only) — [monitoring-stack.md](monitoring-stack.md)
- **Umami** + **Authentik** admin/TOTP/backup codes — done
- **Uptime Kuma** — monitors live; email alerts via Mailcow — see [monitoring-stack.md](monitoring-stack.md)
- **Dockge** on 218 — manages local `/opt/monitoring` stack
- **Snapshots** `backup-20260522` on LXCs **217**, **218**
- **Jellyfin** (VM 101) — stopped
- LXC **210, 215221** — static via `pct set`; **Caddy VM 106** — static in-guest `.50`
- **Nextcloud VM 201** — retired (stopped, `onboot 0`, Caddy removed)
- ~~**Portainer VM 109**~~**removed** 2026-05-23 (~16 GiB RAM freed on pve10)
- **Marketing sites** — LXC **220** (`levkin.ca`), **215/216/219** (git deploy), not yet on Caddy VM static roots
- **punimTag dev** — pve201 LXC **9101** @ `10.0.10.121` (16 GB) — leave until testing done; then `dev-apps` on pve10
**Decisions locked in:**
- Container manager: **Dockge** (not Portainer, not Coolify/Dokploy/CapRover)
- Chat: **Mattermost only** — no Matrix/Synapse
- Knowledge tool: **Outline** for client-facing, **SiYuan** if/when PhD work picks up (don't run Affine + Trilium too)
- Bookmark manager: **Linkwarden** (full-page archive is the killer feature)
- Authentik is the SSO target; Vaultwarden stays standalone
---
## LXC / VM grouping table
| Group | What's inside | Why grouped | LXC or VM |
|---|---|---|---|
| **edge** | Caddy reverse proxy, Crowdsec/Fail2ban | The front door — small, stable, restart rarely | LXC, 1 vCPU, 1GB RAM |
| **identity** | Authentik (+ Postgres + Redis), Vaultwarden | Auth-critical — touch rarely, back up religiously | LXC, 2 vCPU, 2GB RAM |
| **comms** | Mailcow | Mailcow's compose is huge (15+ containers) and self-contained — wants its own host | **VM**, 4GB RAM |
| **automation** | n8n, Windmill (later), Huginn (later) | Active workloads, frequent updates, you'll touch these a lot | LXC, 24 vCPU, 4GB RAM |
| **productivity** | Vikunja, Listmonk, Outline, Mealie, Linkwarden | Personal/team productivity, low-resource | LXC, 2 vCPU, 4GB RAM |
| **media** | Immich, Nextcloud, Paperless-ngx | Large storage, GPU passthrough useful for Immich ML | **VM** if GPU passthrough, else LXC. Lots of disk. |
| **business** | Cal.com ✅, Crater | Client-facing, financial — back up often | LXC, 2 vCPU, 2GB RAM |
| **monitoring** | Uptime Kuma ✅, Dockge ✅, Umami ✅, Beszel (later) | Ops stack on LXC **218** | LXC, 2 vCPU, 2GB RAM |
| **labs** | Anything experimental — Flowise, Trigger.dev | Things you're trying out, can be wiped | LXC, scratch space |
### Why this grouping (cheat sheet)
- One service goes bad → only its group restarts.
- Need a kernel upgrade for one stack → snapshot the LXC, upgrade, roll back if broken.
- Mailcow's huge surface area is isolated in its own VM.
- Edge LXC is tiny and stable → perfect for the layer everything depends on.
- Backup cadence per group (see Backups section).
- Resource limits per LXC mean a runaway container can't eat n8n's RAM.
---
## Subdomains
Only expose what actually needs to be public. Internal services use Tailscale/Wireguard for remote access.
### Expose publicly
| Subdomain | Service | Group | Why public | Status |
|---|---|---|---|---|
| `levkin.ca` | Company site (spec + `/folders`) | edge | Main brand | ✅ LXC 220 — **DNS must point to home IP** (was parked elsewhere) |
| `caseware.levkin.ca` | Static site | edge | Marketing | ✅ live |
| `auto.levkin.ca` | Static site | edge | Marketing | ✅ live |
| `iliadobkin.com` | Portfolio (SDET) | edge | Personal site | ✅ live (pve10 LXC 219) |
| `cal.levkin.ca` | Cal.com | business | Clients book on it | ✅ live |
| `listmonk.levkin.ca` | Listmonk | productivity | Unsubscribe URLs must resolve | ✅ live |
| `mail.levkine.ca` | Mailcow | comms | Mail server | ✅ live |
| `auth.levkin.ca` | Authentik | identity | OIDC redirect URLs need external resolution | ✅ live |
| `bill.levkin.ca` | Crater | business | Clients view invoices | ⏳ Phase 6 |
| `cloud.levkin.ca` | Nextcloud | media | **Retiring** — decommission VM 201 after cutover | 🗑️ |
| `photos.levkin.ca` | Immich | media | Mobile apps need public hostname | ⏳ Phase 5 |
| `vault.levkin.ca` | Vaultwarden | identity | Mobile clients need public hostname | ⏳ |
| `notes.levkin.ca` | Outline | productivity | Sharing docs with clients | ⏳ |
| `chat.levkin.ca` | Mattermost | comms | Only if inviting outside users | ⏳ optional |
### Keep internal only (no public DNS, no Caddy block)
Reachable only via local network or Tailscale/Wireguard:
| Service | Reason |
|---|---|
| Umami admin UI | Only you need the dashboard. Tracking endpoint can be public, dashboard isn't. |
| Uptime Kuma | Status dashboard is for you. Don't advertise infrastructure. |
| Beszel | Metrics are admin-only. |
| Dockge | Admin UI — local only. |
| n8n editor | UI shouldn't be exposed. Webhooks go on `hooks.levkin.ca` if needed. |
| Huginn / Windmill / Flowise | Admin tools. |
| Vikunja | Personal task manager. |
| Mealie | Family recipes. |
| Trigger.dev | Internal automation. |
| Paperless-ngx | Personal documents. Never expose. |
| SiYuan | Personal knowledge. |
| Linkwarden | Personal bookmarks. |
### Borderline (decide per service)
| Subdomain | Service | Notes |
|---|---|---|
| `stats.levkin.ca` | Umami | Public tracker script; admin UI prefer LAN `:3000` |
| `status.levkin.ca` | Uptime Kuma | **Public status page** only (not admin UI) |
| *(none)* | Beszel | **LAN/Tailscale** `10.0.10.22:8090` — host metrics, no public DNS |
---
## Phased rollout
### Phase 0 — Foundation ✅
1. ✅ Caddy running (on VM — migrate to LXC in Phase 1.5)
2. ✅ **Static IP audit** — all pve10 LXCs pinned via `pct set`; Caddy VM static `.50`; homelab VMs pinned via UniFi DHCP — see [host-list.md](host-list.md)
3. ✅ DNS for `auth.levkin.ca` + `levkin.ca` apex → home IP
4. ✅ `identity` LXC **217** @ `10.0.10.21` (2 vCPU, 2GB RAM, 20GB `local-lvm`, Debian 12 + Docker Compose)
### Phase 1 — Identity ✅
1. ✅ Deploy Authentik in `identity` LXC (Authentik + Postgres + Redis, official compose at `/opt/authentik`)
2. ✅ Caddy: `auth.levkin.ca``10.0.10.21:9000` (simple passthrough, no forward-auth)
3. ✅ Admin user (`admin`), TOTP enrolled
4. ✅ `authentik Admins` group (skip custom `users` group until more accounts)
5. ✅ Static backup codes; **don't OIDC other apps until Cal.com test**
### Phase 2 — Next infra (was Phase 1.5) — Caddy migration to LXC ⏳
Deferred until after sprint merge. Authentik + SSO are stable; edge migration is the next structural change.
Why Caddy belongs in an LXC, not a VM:
- ~50MB OS overhead vs ~512MB for a VM
- Boot/restart in 2-5s vs 20-40s (matters when reloading config)
- Snapshot/backup is faster
- Caddy is a Go binary doing reverse-proxy work — no need for kernel isolation
- Near-native network performance
Steps:
1. Create `edge` LXC: Debian 12, 1 vCPU, 512MB RAM, 8GB disk, **static IP from host list**
2. Install Caddy via official Debian repo:
```bash
apt install -y debian-keyring debian-archive-keyring apt-transport-https
curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/gpg.key' | gpg --dearmor -o /usr/share/keyrings/caddy-stable-archive-keyring.gpg
curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/debian.deb.txt' | tee /etc/apt/sources.list.d/caddy-stable.list
apt update && apt install caddy
```
3. Copy `Caddyfile` + custom snippets (`(security-headers)` etc.) from the VM
4. Add a **test subdomain** (e.g. `test.levkin.ca`) pointing at the new LXC — verify TLS issues and routing works
5. Cut over: update router port-forward (80/443) to the new LXC IP. DNS A records don't need to change if they point to your home IP.
6. Watch Mailcow, Cal.com, Listmonk, the marketing sites for ~24h
7. Keep the old VM snapshot for a week, then delete
### Phase 2 — Quick wins ✅
1. ✅ **Umami** — tracking on levkin.ca, caseware, auto, and iliadobkin.com (portfolio)
2. ✅ **Uptime Kuma** — monitors in UI
3. ✅ **Dockge** — logged in; register `/opt/monitoring` stack (see [monitoring-stack.md](monitoring-stack.md))
4. ✅ **Kuma email alerts** — SMTP via Mailcow — [monitoring-stack.md](monitoring-stack.md)
### Phase 3 — Cal.com (mostly done) ✅
1. ✅ Cal.com deployed in `business` LXC (id 210, Postgres included)
2. ✅ `cal.levkin.ca` proxied via Caddy
3. ✅ Booking link live at `cal.levkin.ca/ilia/consult` with Jitsi location
4. ✅ Email working via `cal@levkine.ca` SMTP through Mailcow
5. ⏳ **Cal.com OIDC****deferred** ([cal-authentik-oidc.md](cal-authentik-oidc.md)) — needs enterprise `CALCOM_LICENSE_KEY`
6. ✅ `auto.levkin.ca` consult button → `cal.levkin.ca/ilia/consult`
### Phase 4 — SSO migration ✅
1. ✅ **Vikunja** — [vikunja-authentik-oidc.md](vikunja-authentik-oidc.md)
2. ~~**Nextcloud**~~ — skipped (VM 201 retired)
3. ✅ **Listmonk** — [listmonk-authentik-oidc.md](listmonk-authentik-oidc.md) (v6.1.0)
4. ✅ **Mattermost** — [mattermost-authentik-gitlab-oauth.md](mattermost-authentik-gitlab-oauth.md)
5. ✅ **Mailcow** — [mailcow-authentik-oidc.md](mailcow-authentik-oidc.md)
**Remaining:** browser smoke tests as `ilia`; rotate OIDC secrets when done.
For each: keep a local admin password as a break-glass account.
### Phase 5 — Family / personal wins (~1 evening)
1. **Immich** in `media` VM — install mobile apps for you and family, enable auto-upload. Face recognition runs in background; "my kids 2024" works within a couple days.
2. Skip PhotoPrism — Immich covers it.
### Phase 6 — Business / consulting (~12 evenings)
1. **Crater** in `business` LXC — tax rates, company info, Stripe integration if you want online payment
2. **Beszel** hub in `monitoring` LXC + agents on each LXC — one dashboard for resource usage
### Phase 7 — Automation depth (ongoing)
Only when you have a real use case:
1. **Huginn** in `automation` — first agent: competitor pages, kosher product availability, grant deadlines
2. **Windmill** in `automation` — first script: rewrite an n8n flow with too many code nodes
3. **Flowise** in `labs` — first flow: chat-with-docs against your consulting notes
### Phase 8 — Knowledge / research
1. **Outline** in `productivity` LXC — client-facing wiki + your notes
2. **Linkwarden** in `productivity` LXC — bookmarks with full-page archive
3. **Paperless-ngx** in `media` — scan and OCR the paper that's accumulating
4. **SiYuan** — only if/when PhD or long-form research becomes relevant
---
## Static IP audit
**Maintain a `host-list.md` file** (in this Cursor project, alongside this plan) with every LXC/VM, its current IP, its target static IP, and DHCP/static status. Cursor will use this as the source of truth when scripting changes.
Suggested format:
| LXC/VM ID | Name | Role | Current IP | Target static IP | DHCP/Static | Notes |
|---|---|---|---|---|---|---|
| 210 | cal | Cal.com | 10.0.10.228/24 (DHCP) | 10.0.10.228/24 | ⏳ static | Convert ASAP |
| ... | ... | ... | ... | ... | ... | ... |
### Recommended IP plan
Use `/24` subnets within `10.0.10.0/24` (or whatever your LAN is) with role-based ranges so it's scannable:
| Range | Reserved for |
|---|---|
| `.1 - .9` | Network gear (router, switches, APs) |
| `.10 - .19` | Proxmox host(s) + PBS |
| `.20 - .39` | Edge / identity / comms (critical infra) |
| `.40 - .79` | Application LXCs (productivity, automation, business, monitoring) |
| `.80 - .99` | Media VM(s) |
| `.100 - .199` | DHCP pool (clients, phones, laptops) |
| `.200 - .249` | Labs / experimental |
| `.250 - .254` | Reserved |
### How to set static on a Proxmox LXC
Two methods — pick one and stick with it:
**Method A — Proxmox CLI (recommended, survives reboots cleanly):**
```bash
pct set <ID> -net0 name=eth0,bridge=vmbr0,ip=10.0.10.X/24,gw=10.0.10.1
pct reboot <ID>
```
**Method B — Router DHCP reservation:**
- Reserve the IP in your router's DHCP table by MAC address. LXC stays "DHCP" technically, but always gets the same IP.
- Easier if you have many hosts and one router.
- Risk: if the LXC's MAC changes (rebuild from snapshot to new ID), reservation breaks.
**Recommendation:** Method A (`pct set`) for everything critical (edge, identity, comms, business). Method B is fine for labs/experimental LXCs.
### Audit checklist
1. List every LXC: `pct list`
2. List every VM: `qm list`
3. For each, run `pct exec <ID> -- ip a` (or `qm guest exec <ID> -- ip a` for VMs) and check whether the IP came from DHCP
4. Fill in `host-list.md`
5. Pick target IPs from the range plan above
6. Convert one at a time, lowest-risk first (labs → productivity → business → comms → identity → edge)
7. **After each conversion**, verify the Caddy reverse-proxy entry still works (curl from outside)
8. Update `host-list.md` status column
### Hosts known to need conversion right now
- ~~**LXC 210 (cal)**~~ — static at `10.0.10.228`
- **Site LXCs 220, 215/216/219** — static; served via Caddy → nginx on each LXC (git deploy). Optional future: static files on Caddy VM only.
---
## Backlog (priority order)
### P0 — status (2026-05-24)
| # | Item | Status |
|---|------|--------|
| 1 | Umami / Kuma / Dockge | ✅ |
| 2 | Portainer VM 109 | ✅ removed |
| 3 | Nextcloud VM 201 | ✅ retired |
| 4 | Listmonk → LXC 221 | ✅ + SMTP + VM 113 destroyed |
| 5 | Beszel agents | ✅ **16 systems** |
| 6 | Kuma monitors + email | ✅ **17 monitors**, all alert-linked |
| 7 | DNS `levkin.ca` apex | ✅ |
| 8 | Vikunja OIDC infra | ✅ live — browser test as `ilia` still manual |
| 9 | UniFi DHCP listmonk MAC | ⏳ manual @ UniFi |
| 10 | NAS / Jellyfin / DebianDesktop | **deferred** |
| 11 | Cal OIDC | deferred (no license) |
### P1 — next
See **[handoff-next-steps.md](handoff-next-steps.md)** — SSO smoke tests, secret rotation.
### Phase 2 backlog (was P1 infra)
1. **Caddy → edge LXC** @ `10.0.10.20`
2. **Security remediation** — [security-remediation-plan.md](security-remediation-plan.md)
3. **NAS / Jellyfin** — disk `W4J0L3PY`
### P1 — when ready
- **Outline** — wiki for client docs
- **Linkwarden** — bookmarks with full-page archive
- **Plane** — Jira-lite project management (pair with Mattermost)
### P2 — when you have a real need
- **Crater** — invoicing (Phase 6)
- **Immich** — photos (Phase 5)
- **Paperless-ngx** — document scanning (Phase 8)
- **Huginn** — first when you have a monitoring use case
- **Windmill** — when n8n hits limits
- **Trigger.dev** — durable background jobs in code (better fit than Windmill for QA work)
- **PrivateBin** — encrypted paste for sharing secrets with contractors
- **Addy.io** — email aliases
- **SiYuan** — if PhD work picks up
- **Flowise** — labs only, when LLM workflow use case appears
### Skip / declined
- ~~PhotoPrism~~ — Immich covers it
- ~~Activepieces~~ — you already have n8n
- ~~Affine / Trilium~~ — picked Outline + SiYuan instead
- ~~Matrix/Synapse + Element~~ — staying on Mattermost
- ~~Coolify / Dokploy / CapRover~~ — Dockge is enough; revisit only if writing many custom apps
---
## Backup strategy
- **Proxmox Backup Server (PBS)** or `vzdump` to a NAS — snapshot each LXC/VM nightly
- **Critical groups** (`identity`, `comms`, `business`): 7 daily + 4 weekly + 12 monthly
- **Productivity/automation**: 7 daily + 4 weekly
- **Labs**: 3 daily, no long retention
- **Off-site copy** of `identity` and `business` LXCs — these contain auth and billing data. Encrypted copy to Wasabi or Backblaze B2.
The whole LXC gets snapshotted — much simpler than file-level container backup.
**Done on pve10 (2026-05-22):** `pct snapshot` **`backup-20260522`** on LXCs **217** (identity) and **218** (monitoring).
---
## Next steps (priority order)
See **[handoff-2026-05-24.md](handoff-2026-05-24.md)** for sprint status checklist.
| # | Task | Status | Effort | Frees / unlocks |
|---|------|--------|--------|-----------------|
| 1 | **Kuma SMTP** | ✅ done | — | — |
| 2 | **Cal.com → Authentik OIDC** | ⏸ **deferred** | — | Needs `CALCOM_LICENSE_KEY`; infra ready — [sso-selfhosted-matrix.md](sso-selfhosted-matrix.md) |
| 3 | **auto.levkin.ca** → Cal booking link | ✅ | — | Consult button live |
| 4 | **Stop Portainer VM 109** | ✅ | — | Removed 2026-05-23; **~16 GiB RAM** on pve10 |
| 5 | **Retire Nextcloud VM 201** | ✅ | — | ~8 GiB RAM freed |
| 6 | **Vikunja → Authentik OIDC** | 🟡 infra OK | 15 min | Browser login as `ilia` |
| 7 | **UniFi DHCP reservations** | ⏳ | 20 min | [unifi-static-dhcp.md](unifi-static-dhcp.md) |
| 8 | **DNS levkin.ca apex** | ✅ | — | `142.180.237.136` |
| 9 | **Beszel + Kuma** | ✅ | — | 16 Beszel agents; 17 Kuma monitors |
| 10 | ~~**Listmonk SMTP**~~ | ✅ | — | UI + vault |
| 10 | **NAS.SP00** disk → Jellyfin | ⏳ hardware | — | VM 101 |
| 11 | **DebianDesktop reboot** | ✅ | — | VM 100 rebooted; 24 GB active on pve201 |
| 12 | **Caddy → edge LXC `.20`** | ⏳ defer | ~30 min | Phase 1.5 |
| 13 | **dev-apps LXC** | ⏳ defer | half day | After punim testing |
| 14 | **Static sites → Caddy VM** | ⏳ optional | 1 h | Defer |
**Defer:** Immich, Crater, Outline; Listmonk/Mattermost/Mailcow SSO after Vikunja; Cal OIDC until license.
### Adding a new service — quick rule
| Want to add… | Node | RAM budget | Prerequisite |
|--------------|------|------------|--------------|
| Small app (Mealie, Linkwarden) | pve10 | 2 GB LXC | ~22 GiB free on pve10 |
| Medium (Outline, Crater) | pve10 | 4 GB LXC | Portainer + Nextcloud already freed |
| Heavy (Immich + ML) | pve10 or pve201 GPU | 48 GB+ | NAS healthy; pve201 only after GPU/punim sized down |
| Dev sandbox | pve10 `dev-apps` | 68 GB | punim 9101 migration only after testing |
### Nextcloud decommission (VM 201)
1. Confirm export in `exports/nextcloud-2026-05-21/` is complete
2. Delete **Nextcloud** monitor in Kuma
3. Remove `nextcloud.levkin.ca` from Caddy VM
4. Stop VM 201; update [host-list.md](host-list.md)
5. After NAS healthy: optional `vzdump` archive then delete disk
---
## Important rules
1. **Never put Authentik behind itself.** `auth.levkin.ca` is a simple Caddy passthrough — no forward-auth, no fancy dependencies. If Authentik goes down, you'd lose access to Authentik.
2. **Vaultwarden stays standalone.** It's your break-glass path if Authentik dies. Don't OIDC it.
3. **Keep a local admin password on every SSO-wired app.** OIDC integrations break during upgrades — you need to log in to fix them.
4. **Local admin to Proxmox host.** Independent of Authentik and Vaultwarden. Written down somewhere physical.
5. **Don't expose admin UIs publicly.** Dockge, Beszel, Uptime Kuma admin, n8n editor — use Tailscale or Wireguard for remote access.
6. **Static IPs for every LXC.** DHCP will eventually move them and Caddy will break. Set via `pct set <id> -net0 ...ip=10.0.10.X/24,gw=...` or a router reservation.
7. **Cal.com LXC (210)** — static at `.228` ✅.
8. **Maintain `host-list.md`** as the single source of truth for IPs. Update it whenever a new LXC/VM is created or migrated.

View File

@ -1,50 +0,0 @@
# Listmonk ↔ Authentik OIDC
**Status:** Live at `https://listmonk.levkin.ca` (LXC **221**, `10.0.10.148`).
**Requires listmonk v5+** (OIDC). Upgraded from v2.4.0 → **v6.1.0** on 2026-05-24.
## Authentik
| Item | Value |
|------|--------|
| Application slug | `listmonk` |
| Provider name | `listmonk-oidc` |
| Client ID | `listmonk` |
| Redirect URI (strict) | `https://listmonk.levkin.ca/auth/oidc` |
| Subject mode | **user_username** |
| Signing key | `authentik Self-signed Certificate` |
| Access group | **`homelab-users`** (app binding) |
Client secret: `vault_listmonk_oidc_client_secret` in Ansible vault (rotate if exposed).
## Listmonk
Configured via **Settings → Security → OIDC** (stored in DB):
- **Provider URL:** `https://auth.levkin.ca/application/o/listmonk/`
- **Auto-create users:** enabled (Super Admin role id `1` for new SSO users)
Break-glass: local user `listmonk` (password login still enabled).
## Login
1. Sign out: `https://auth.levkin.ca/if/user/logout/`
2. `https://listmonk.levkin.ca/admin` → **Login with Authentik**
3. Sign in as **`ilia`** (must be in `homelab-users`)
## Upgrade (if needed)
```bash
ssh root@10.0.10.148
systemctl stop listmonk
curl -fsSL -o /tmp/lm.tgz https://github.com/knadh/listmonk/releases/download/v6.1.0/listmonk_6.1.0_linux_amd64.tar.gz
tar -xzf /tmp/lm.tgz -C /tmp && mv /tmp/listmonk /root/listmonk
/root/listmonk --config /etc/listmonk/config.toml --upgrade --yes
systemctl start listmonk
```
## Related
- [sso-selfhosted-matrix.md](sso-selfhosted-matrix.md)
- [Listmonk OIDC docs](https://listmonk.app/docs/oidc/)

View File

@ -1,52 +0,0 @@
# Mailcow ↔ Authentik OIDC
**Status:** Configured 2026-05-24 (Generic-OIDC in DB + Authentik app `mailcow`)
**Requires:** mailcow **2025-03+** (this host: `2025-10a`)
**URL:** https://mail.levkine.ca
---
## What OIDC means
**OIDC** = **OpenID Connect** — login with an identity provider (Authentik) instead of a separate password per app. You sign in once at `auth.levkin.ca`, apps trust that login.
---
## Authentik
| Item | Value |
|------|--------|
| Application slug | `mailcow` |
| Provider | `mailcow-oidc` |
| Client ID | `mailcow` |
| Redirect URI | `https://mail.levkine.ca` |
| Scope mapping | `mailcow_template``default` mailbox template |
| Access | `homelab-users` |
Secret: `vault_mailcow_oidc_client_secret` in Ansible vault.
---
## Mailcow (applied via MySQL `identity_provider`)
- **Identity Provider:** Generic-OIDC
- **Authorize / token / userinfo:** `https://auth.levkin.ca/application/o/{authorize,token,userinfo}/`
- **Redirect URL:** `https://mail.levkine.ca`
- **Scopes:** `openid profile email mailcow_template`
Mailbox users with SSO need matching email in Authentik. Admin UI may still use local admin for break-glass.
---
## Verify
Log out of Mailcow → login should offer external IdP. Test with user `ilia` in `homelab-users`.
---
## Related
- [sso-selfhosted-matrix.md](sso-selfhosted-matrix.md)
- [Authentik mailcow integration](https://integrations.goauthentik.io/chat-communication-collaboration/mailcow/)

View File

@ -1,42 +0,0 @@
# Mailcow unreachable from Caddy / LAN (TCP timeout)
## Symptom
- Mailcow containers healthy inside VM `10.0.10.132`
- `curl https://10.0.10.132/` works **on the VM**
- From Caddy (`10.0.10.50`) or other LAN hosts: TCP **443/80 timeout**
- `tcpdump` on Proxmox shows SYN from client, **no SYN-ACK**
## Cause (not RAM)
`mailcowdockerized-netfilter-mailcow` adds an nftables rule in chain `MAILCOW`:
```text
iifname != "br-mailcow" oifname "br-mailcow" tcp → DROP
```
That blocks forwarded HTTPS from the LAN to the nginx container, even when `DISABLE_NETFILTER_ISOLATION_RULE=y` is set (netfilter still recreates the drop on restart in some versions).
## Fix on the mailcow VM
```bash
nft flush chain ip filter MAILCOW
```
Persistent (installed 2026-05-23): systemd unit `mailcow-flush-isolation-drop.service` runs after Docker.
After netfilter container restart, verify:
```bash
nft list chain ip filter MAILCOW # should be empty
nc -zv 10.0.10.132 443 # from Caddy host
```
## Related settings in `/opt/mailcow-dockerized/mailcow.conf`
- `DISABLE_NETFILTER_ISOLATION_RULE=y`
- `SNAT_TO_SOURCE=10.0.10.132` (optional; helps some hairpin cases)
## Reverse proxy
Caddy on `10.0.10.50``https://10.0.10.132` with `Host: mail.levkine.ca` — see `playbooks/caddy-auth-authentik.yml` / Caddyfile on caddy VM.

View File

@ -1,74 +0,0 @@
# Mattermost ↔ Authentik (GitLab OAuth workaround)
**Status:** ✅ Live (config.json patched 2026-05-24; VM **107** @ `10.0.10.237`)
Team Edition has no generic OIDC UI — use **GitLab OAuth** endpoints pointed at Authentik.
**URL:** https://slack.levkin.ca · **Backend:** `10.0.10.237:8065` (VM **107** on pve10)
---
## Authentik (done 2026-05-24)
| Item | Value |
|------|--------|
| Application slug | `mattermost` |
| Provider | `mattermost-gitlab-oidc` |
| Client ID | `mattermost` |
| Redirect URI | `https://slack.levkin.ca/signup/gitlab/complete` |
| Scope mappings | `mattermost-username`, `mattermost-id` + default OpenID |
| Access | `homelab-users` group binding |
Client secret: store in vault as `vault_mattermost_oidc_client_secret` (rotate if exposed).
---
## Mattermost — apply on VM
SSH as root (or bootstrap key first: `make bootstrap-root-ssh` once password works):
```bash
ssh root@10.0.10.237
```
Edit `/opt/mattermost/config/config.json` (path may vary — `find / -name config.json -path '*mattermost*'`).
Set `GitLabSettings`:
```json
"GitLabSettings": {
"Enable": true,
"Secret": "<vault_mattermost_oidc_client_secret>",
"Id": "mattermost",
"Scope": "",
"AuthEndpoint": "https://auth.levkin.ca/application/o/authorize/",
"TokenEndpoint": "https://auth.levkin.ca/application/o/token/",
"UserAPIEndpoint": "https://auth.levkin.ca/application/o/userinfo/",
"DiscoveryEndpoint": "https://auth.levkin.ca/application/o/mattermost/.well-known/openid-configuration",
"ButtonText": "Log in with Authentik",
"ButtonColor": "#fd4b2d"
}
```
Then:
1. **System Console** → Authentication → Signup → **Enable Account Creation** = true
2. `systemctl restart mattermost` (or `docker compose restart` if containerized)
3. Log out → use **GitLab** button (actually Authentik)
4. Existing users: Profile → Security → **Switch to GitLab SSO** (see [Authentik integration](https://integrations.goauthentik.io/chat-communication-collaboration/mattermost-team-edition/))
---
## Verify
```bash
curl -sS https://auth.levkin.ca/application/o/mattermost/.well-known/openid-configuration | head
curl -sS -o /dev/null -w '%{http_code}\n' https://slack.levkin.ca/login
```
---
## Related
- [sso-selfhosted-matrix.md](sso-selfhosted-matrix.md)
- [cursor-mcp-homelab.md](cursor-mcp-homelab.md) — Playwright can smoke-test login after `config.json` is applied

View File

@ -1,382 +0,0 @@
# Monitoring stack (LXC 218)
**Host:** `monitoring` @ `10.0.10.22` (PVENAS pve10, VMID **218**)
**Compose:** `/opt/monitoring/compose.yml`
**Stacks dir (Dockge):** `/opt/stacks`
All admin UIs are **LAN-only** (no public Caddy blocks). Use Tailscale or local network.
| Service | URL | Port | Notes |
|---------|-----|------|-------|
| **Uptime Kuma** | http://10.0.10.22:3001 | 3001 | Admin + monitors configured ✅ (replaces pve201 LXC **305** @ `.197`, stopped) |
| **Dockge** | http://10.0.10.22:5001 | 5001 | Manage compose on **this LXC only** |
| **Umami** | http://10.0.10.22:3000 | 3000 | Password changed ✅; levkin.ca + caseware + auto + portfolio tracked |
| **Beszel** | http://10.0.10.22:8090 | 8090 | Hub (LAN). **Fresh install 2026-05-23** — create admin on first visit; SMTP below |
| Service | Public URL | Admin / internal | Role |
|---------|------------|------------------|------|
| **Umami** | `stats.levkin.ca` (script + optional admin) | `http://10.0.10.22:3000` | Page analytics |
| **Uptime Kuma** | **`status.levkin.ca`** (status page only — configure in Kuma) | `http://10.0.10.22:3001` | URL uptime + alerts |
| **Beszel** | **No public hostname** (by design) | `http://10.0.10.22:8090` | Host metrics (CPU/RAM/disk) |
| **Dockge** | — | `http://10.0.10.22:5001` | Compose UI on monitoring LXC |
Optional later: `metrics.levkin.ca` → Beszel **only if** you add Caddy with Tailscale/LAN restriction — not a public dashboard like `status.levkin.ca`.
---
## Beszel — admin + SMTP (Mailcow)
**URL:** http://10.0.10.22:8090
### 1. Create admin
Open in browser → **Create account** (first user = admin).
### 2. SMTP via `alerts@levkine.ca`
Same Mailcow mailbox as [Kuma email alerts](#uptime-kuma--email-alerts-mailcow).
**UI:** Settings → Notifications → SMTP (PocketBase mail settings)
| Field | Value |
|-------|--------|
| Host | `mail.levkine.ca` |
| Port | `587` |
| Username | `alerts@levkine.ca` |
| Password | alerts mailbox password |
| Enforce TLS | **OFF** (STARTTLS, like Kuma) |
| Sender | `alerts@levkine.ca` |
After save: `ssh root@10.0.10.22 'cd /opt/monitoring && docker compose restart beszel'`
**Script (after admin exists):**
```bash
export BESZEL_EMAIL='you@example.com' BESZEL_PASSWORD='...' SMTP_PASS='...'
./scripts/beszel-setup-smtp.sh
make beszel-setup-alerts # Status + CPU/RAM/disk on all systems
```
### 3. Local agent (monitoring LXC)
Hub → **Add System** → host `/beszel_socket/beszel.sock` → copy **TOKEN** and hub **KEY**.
In `/opt/monitoring/compose.yml`, set `TOKEN` and `KEY` **literally** in the `beszel-agent` environment block (do not rely on `${BESZEL_*}` — compose may expand them empty):
```yaml
beszel-agent:
environment:
LISTEN: /beszel_socket/beszel.sock
HUB_URL: http://127.0.0.1:8090
TOKEN: "<system-token-from-hub>"
KEY: "<hub-public-key-from-settings>"
```
Then: `cd /opt/monitoring && docker compose up -d beszel-agent`
**Hypervisor agents (pve10, pve201):** install script flags are `-k`, `-t`, `-url`, `-p` (not `-key`):
```bash
curl -fsSL https://get.beszel.dev | sh -s -- \
-k 'ssh-ed25519 AAAA...' \
-t '<system-token>' \
-url 'http://10.0.10.22:8090' \
-p 45876
```
Verify: `systemctl status beszel-agent` and hub shows **up**.
**Deployed 2026-05-24:** **16 systems, all up** — see table below.
### 4. Beszel agents (deployed)
| System | Host | Status |
|--------|------|--------|
| monitoring-218 | unix socket | ✅ |
| pve10 / pve201 | hypervisors | ✅ |
| identity-217 | `10.0.10.21` | ✅ |
| caddy-106 | `10.0.10.50` | ✅ |
| mailcow | `10.0.10.132` | ✅ |
| listmonk-221 | `10.0.10.148` | ✅ |
| cal-210 | `10.0.10.228` | ✅ |
| gitea-102 | `10.0.10.169` (Alpine/openrc) | ✅ |
| vikunja-301 | `10.0.10.159` | ✅ |
| vaultwarden-104 | `10.0.10.142` (`ladmin` + sudo) | ✅ |
| hermes-117 | `10.0.10.36` (Hermes agent VM; **not** Mattermost — `ladmin` + sudo) | ✅ |
| levkin-220 | `10.0.10.60` | ✅ |
| caseware-215 | `10.0.10.105` | ✅ |
| auto-216 | `10.0.10.59` | ✅ |
| portfolio-219 | `10.0.10.106` | ✅ |
**Install notes:** use `-k`, `-t`, `-url`, `-p` flags; pipe `printf n` to skip auto-update prompt. Gitea needs `apk add curl` first. Vaultwarden/hermes use `ladmin` + sudo.
#### Still optional (Beszel)
| System | Notes |
|--------|--------|
| **n8n** VM 103 | Automation |
| **TrueNAS** VM 105 | When NAS healthy |
#### Alerts to enable (UI — bell icon per system)
| Alert | Suggested threshold |
|-------|---------------------|
| **Status** | Down > 1 min |
| **CPU** | > 85% for 5 min |
| **Memory** | > 90% |
| **Disk** | > 85% on `/` or `local-lvm` |
| **Temperature** | > 75°C (if sensors available) |
**Optional later:** universal token (`/settings/tokens`) for bulk agent install — requires a regular Beszel user (superusers cannot use universal tokens).
**Not Beszel:** use **Kuma** for public URL uptime (`https://…`); use **Umami** for page analytics (no alerts).
See also [smtp-inventory.md](smtp-inventory.md) — Beszel shares `alerts@` with Kuma; Umami does not use SMTP.
---
## Backups (pve10)
| Guest | VMID | Snapshot | Date |
|-------|------|----------|------|
| identity | 217 | `backup-20260522` | 2026-05-22 |
| monitoring | 218 | `backup-20260522` | 2026-05-22 |
On pve10:
```bash
pct listsnapshot 217
pct listsnapshot 218
# Rollback if needed:
# pct rollback 217 backup-20260522
```
Optional off-node copy (when NAS healthy): `vzdump 217 218 --storage local --mode snapshot --compress zstd`
---
## Uptime Kuma — monitors
**URL:** http://10.0.10.22:3001
### Current (2026-05-24) — 17 monitors
| Name | URL | Email alert |
|------|-----|-------------|
| Authentic | https://auth.levkin.ca | ✅ |
| Cal.com | https://cal.levkin.ca | ✅ |
| Caseware Landing | https://caseware.levkin.ca | ✅ |
| Automation Landing | https://auto.levkin.ca | ✅ |
| Mailcow | https://mail.levkine.ca | ✅ |
| Listmonk | https://listmonk.levkin.ca | ✅ |
| Gitea | https://git.levkin.ca | ✅ |
| Todo (Vikunja) | https://todo.levkin.ca | ✅ |
| Vault | https://vault.levkin.ca | ✅ |
| PVENAS | https://10.0.10.10:8006 | ✅ |
| PVE201 | https://10.0.10.201:8006 | ✅ |
| **levkin.ca** | https://levkin.ca | ✅ added 2026-05-24 |
| **Portfolio** | https://iliadobkin.com | ✅ |
| **Search** | https://search.levkin.ca | ✅ |
| **PDF** | https://pdf.levkin.ca | ✅ |
| **Umami script** | https://stats.levkin.ca/script.js | ✅ |
| **Mattermost** | https://slack.levkin.ca | ✅ (VM **107** — not Hermes) |
SMTP notification **My Email Alert**`alerts@levkine.ca``idobkin@gmail.com`. All monitors linked to email alerts.
**Public status page:** https://status.levkin.ca — slug `homelab`, domain set in Kuma (17 monitors). Caddy → `10.0.10.22:3001`. Admin UI stays at `http://10.0.10.22:3001` (LAN only).
Admin password in vault: `vault_uptime_kuma_password` (`admin` user).
### Add monitors (script)
```bash
make vault-export-env # sets KUMA_PASSWORD
./scripts/kuma-add-monitors.sh
```
**Removed / do not add:** Nextcloud (`cloud.levkin.ca`) — VM 201 retired.
**Not in Kuma (by design):** Beszel `:8090`, Dockge `:5001`, Umami admin `:3000` — LAN-only; use **Beszel** for host metrics.
---
## Uptime Kuma — email alerts (Mailcow)
Mail domain is **`levkine.ca`** (with **e**). Cal.com already sends via Mailcow as `cal@levkine.ca`.
### Which email to use
| Role | Address | Notes |
|------|---------|-------|
| **SMTP server** | `mail.levkine.ca` | Mailcow host |
| **SMTP port** | `587` | STARTTLS (not 465 unless you prefer SMTPS) |
| **From (sender)** | `alerts@levkine.ca` | Create mailbox in Mailcow if it does not exist |
| **To (you)** | `idobkin@gmail.com` or `ilia@levkine.ca` | Use whichever you read; Gmail is fine for alerts |
### 1. Create mailbox in Mailcow (if needed)
**Automated (needs Mailcow API key):**
```bash
# Define mailbox in group_vars/all/mailcow.yml, password in vault:
make mailcow-mailbox MAILBOX=alerts
# (alias: make mailcow-create-alerts)
# Import from .env into vault once, then delete .env:
cp .env.example .env # MAILCOW_API_KEY=... ALERTS_PASSWORD=...
make vault-import-env
make mailcow-mailbox MAILBOX=alerts
```
To add another mailbox tomorrow: edit `mailcow.yml` + `vault_mailcow_mailbox_passwords.<name>`, then `make mailcow-mailbox MAILBOX=<name>`.
**Manual UI:**
1. https://mail.levkine.ca → admin login
2. **Email → Mailboxes → Add**`alerts@levkine.ca` (strong password → store in Vaultwarden)
3. Optional: alias `monitoring@levkine.ca` → same inbox
### 2. Add notification in Kuma
**Automated (from your Mac, after mailbox exists):**
```bash
cd /path/to/ansible
pip install uptime-kuma-api # or: .venv/bin/pip install uptime-kuma-api
export KUMA_URL=http://10.0.10.22:3001 KUMA_USER=admin KUMA_PASSWORD='...'
export SMTP_USER=alerts@levkine.ca SMTP_PASS='...' SMTP_TO=idobkin@gmail.com
./scripts/kuma-setup-smtp.sh
```
**Manual UI:**
1. http://10.0.10.22:3001 → **Settings****Notifications****Setup Notification**
2. Type: **Email (SMTP)**
3. Fill in:
| Field | Value |
|-------|--------|
| SMTP Host | `mail.levkine.ca` |
| SMTP Port | `587` |
| Security | TLS / STARTTLS |
| Username | `alerts@levkine.ca` |
| Password | mailbox password |
| From Email | `alerts@levkine.ca` |
| To Email | `idobkin@gmail.com` (or your `@levkine.ca`) |
4. **Test** → save
5. Edit each monitor (or default) → **Notifications** → enable this channel
**Alternative:** Mattermost webhook (`slack.levkin.ca`) if you prefer chat over email.
---
## Dockge — what to do after login
**On server today:**
| Path | Contents |
|------|----------|
| `/opt/monitoring/compose.yml` | **Live** stack (Docker project `monitoring`, 4 containers running) |
| `/opt/stacks/monitoring/compose.yaml` | Copy for Dockge (same services) |
| `/opt/stacks/authentik-ref/`, `cal-ref/` | README only — **no** compose file (ignore) |
**Why “Scan Stacks Folder” looks empty**
- Scan only picks up folders under **`/opt/stacks`** that contain `compose.yaml` / `compose.yml`.
- Your containers were started from **`/opt/monitoring`**, so Docker does not automatically link them to `/opt/stacks/monitoring` until you register that folder in Dockge.
**Fix (pick one):**
### Dockge UI note (your version)
**Settings → General** only has hostname — there is **no “Stacks directory” field**. That path is fixed at deploy time:
`DOCKGE_STACKS_DIR=/opt/stacks` (already set in `/opt/monitoring/compose.yml`).
Stacks are managed from the **home / dashboard** page, not Settings.
### Option 1 — Add stack manually (recommended)
1. http://10.0.10.22:5001 → **home** (logo / dashboard, not Settings)
2. **+ Create Stack** (or **Compose** → new stack)
3. Name: `monitoring`
4. Path: `/opt/stacks/monitoring` (must contain `compose.yaml`)
5. Open stack → review compose → **do not Start** until old project is stopped (below)
### Option 2 — Scan from dashboard menu
1. Stay on **dashboard** (not Settings)
2. Top-right **⋮** → **Scan Stacks Folder**
3. Pick **`monitoring`** if it appears (`authentik-ref` / `cal-ref` have no compose — ignore)
**Avoid duplicate containers**
Before starting from Dockge:
```bash
ssh root@10.0.10.22
cd /opt/monitoring && docker compose down
# Then start from Dockge UI on stack monitoring, OR:
cd /opt/stacks/monitoring && docker compose --env-file .env up -d
```
Until you do that, Kuma/Dockge/Umami keep running from `/opt/monitoring`; Dockge is optional for edits until cutover.
### Optional reference stacks (read-only)
Create empty stacks under `/opt/stacks/` only if you want a UI placeholder:
```bash
ssh root@10.0.10.22
mkdir -p /opt/stacks/authentik /opt/stacks/cal
# Copy compose for reference (does NOT control remote host):
scp root@10.0.10.21:/opt/authentik/compose.yml /opt/stacks/authentik/
```
To **manage** Authentik or Cal from Dockge long term, either move compose to 218 (not recommended) or install Dockge on each LXC later.
### Step 3 — Retire Portainer
VM **109** (portainer) was removed from pve10 on 2026-05-23; use Dockge on 218 instead.
---
## Umami
- ✅ Running at http://10.0.10.22:3000 (LAN / Tailscale only)
- ✅ **Public tracking** via `https://stats.levkin.ca/script.js` on **levkin.ca** (LXC 220), caseware, auto, and **iliadobkin.com** (portfolio LXC 219)
**Three choices (pick one later; none block the sites):**
| Option | Effort | Notes |
|--------|--------|--------|
| **A — Skip public analytics** | 0 | Use Umami dashboard on `:3000` when you care; no DNS/Caddy |
| **B — One DNS + Caddy block** | ~10 min | A record → home IP + Caddy `reverse_proxy 10.0.10.22:3000` on caddy VM |
| **C — Re-add script tags** | 2 min | After B works, insert script before `</head>` on 215/216 |
**Suggested public hostname (instead of `analytics`):** `stats.levkin.ca` (short, clear). Alternatives: `umami.levkin.ca`, `metrics.levkin.ca`.
```caddy
stats.levkin.ca {
import security-headers
encode gzip
reverse_proxy 10.0.10.22:3000
}
```
Script tag then: `https://stats.levkin.ca/script.js`
We are **not stuck** — marketing sites do not need Umami to render. Option A is fine for now.
---
## Maintenance
```bash
ssh root@10.0.10.22
cd /opt/monitoring
docker compose --env-file .env pull
docker compose --env-file .env up -d
docker compose ps
```

View File

@ -1,203 +0,0 @@
# NAS.SP00 drive failure — IT report
**Date:** 2026-05-21
**Host:** PVENAS (Proxmox VE) — `10.0.10.10`
**Pool:** ZFS `NAS.SP00` (~9 TB, ~862 GB used)
**Prepared for:** IT / hardware replacement
**SMART audit:** [nas-sp00-smart-audit-2026-05-21.md](nas-sp00-smart-audit-2026-05-21.md)
---
## Executive summary
One disk in a four-drive ZFS mirror pair has **failed at the hardware level**. The pool is **DEGRADED** but **online** with **no known data errors** at this time. The failed drive must be **physically replaced** and the pool **resilvered**. Until then, **mirror-0 has no redundancy** — a second failure on the remaining disk in that mirror (`W4J0L0BA`) could cause data loss.
This issue also caused a **host-wide I/O wedge** (pool SUSPENDED → stuck `sync()`), which blocked LXC/VM operations unrelated to the pool (e.g. Cal.com on `local-lvm`). That was cleared by a forced node reboot; **replacing the drive remains required**.
---
## Pool layout
| Vdev | Role | Disk A | Disk B | Status |
|------|------|--------|--------|--------|
| mirror-0 | RAID1 pair | `W4J0L0BA` (sda, 5 TB) | `W4J0L3PY` (sdb) | **DEGRADED** — sdb UNAVAIL |
| mirror-1 | RAID1 pair | `W4J0LKCD` (sdd, 5 TB) | `W4J0K9V7` (sdc, 5 TB) | **ONLINE** |
Model family (healthy drives): Seagate **ST5000DM000-1FK178** (5 TB, 7200 RPM).
---
## Failed drive identification
| Field | Expected | Observed |
|-------|----------|----------|
| **Serial** | W4J0L3PY | W4J0L3PY |
| **Model** | ST5000DM000-1FK178 | ST5000DM000 (truncated reporting) |
| **WWN** | — | `5000c50082cc8bbb` |
| **Firmware** | — | CC48 |
| **Capacity** | ~5,000,981,078,016 bytes (**5.00 TB**) | **137,438,952,960 bytes (~137 GB)** |
| **Linux device** | `/dev/sdb` | `/dev/sdb` |
| **ZFS state** | ONLINE | **UNAVAIL** — label missing/invalid |
ZFS last known path:
`/dev/disk/by-id/ata-ST5000DM000-1FK178_W4J0L3PY-part1`
---
## Symptoms and evidence
### 1. Capacity collapse (primary indicator)
The drive is detected as **~137 GB** instead of **5 TB**. ZFS cannot use a partition label created for a 5 TB disk on a device that exposes only a tiny fraction of capacity. This pattern is typical of:
- **Failed HDD** (media/controller failure)
- **Bad SATA cable, backplane port, or HBA port**
- **USB/SATA bridge failure** (if applicable)
- **Severe firmware/HPA corruption** (less common)
### 2. SMART / SCSI errors
`smartctl` against `/dev/sdb`:
- **Read SMART Data failed:** scsi error aborted command
- **Overall health:** UNKNOWN (attributes unreadable)
- Multiple log read commands fail (Error Log, Self-test Log, GP Log, etc.)
Healthy sibling in same mirror (`/dev/sda`, W4J0L0BA): **SMART PASSED**, full 5 TB capacity.
### 3. Kernel log (`dmesg` at boot, 2026-05-21 ~21:27)
Repeated on **`sdb`**:
```
Buffer I/O error on dev sdb
Sense Key: Medium Error
Add. Sense: Unrecovered read error
critical medium error, dev sdb, sector N op 0x0:(READ)
```
Indicates the block device cannot reliably read media — **hardware or link layer**, not a ZFS configuration issue.
### 4. ZFS pool history
- Pool previously entered **SUSPENDED** state (I/O failures on faulted devices).
- After node reboot: pool **DEGRADED**, short **resilver** completed with **0 errors** (healing scan on remaining devices).
- Current: **No known data errors** in `zpool status`.
---
## Impact
### Storage / services on `NAS.SP00`
Proxmox guests with disks on this pool (non-exhaustive):
| VMID | Name | NAS-backed storage |
|------|------|-------------------|
| 101 | Jellyfin | 1 TB zvol |
| 105 | TrueNAS | 1 TB zvol |
| 108 | actual-debian | 10 GB |
| 200 | PVE.BU.SVR | 1 TB |
| 201 | NextcloudAIO-debian | 8 TB |
**Risk:** With mirror-0 degraded, blocks stored only on the surviving mirror-0 disk have **no redundancy** until the failed drive is replaced and resilver completes.
### Unrelated workloads
Guests on **`local-lvm`** (NVMe, e.g. Cal.com LXC 210, Caddy VM 106) are **not stored on NAS.SP00** but were affected when the pool suspended and blocked system-wide `sync()`.
### Backup target
Proxmox datastore **PVEBUVD00** (PBS @ `10.0.10.200:8007`) reports **unreachable** from this node — separate issue; verify PBS host/network.
---
## Diagnosis
| Question | Answer |
|----------|--------|
| Is this a ZFS misconfiguration? | **No** — config is consistent; three drives show correct 5 TB labels. |
| Is the pool lost? | **No** — degraded but importable; no known data errors currently. |
| Which disk to replace? | **Seagate W4J0L3PY** (`/dev/sdb`, mirror-0 failed leg). |
| Can we fix it in software? | **Unlikely** — capacity and SMART failures point to hardware. |
| Safe to reseat first? | **Optional trial** — power down or hot-swap per chassis policy; if capacity still reads ~137 GB, **replace disk**. |
---
## Recommended actions
### Immediate (IT / on-site)
1. **Identify physical slot** for serial **W4J0L3PY** (compare to inventory/asset tags).
2. **Reseat** SATA/SAS cable and backplane connection once (if hot-swap policy allows). Reboot or rescan SCSI bus.
3. If capacity is still wrong or SMART still fails → **replace with new 5 TB+ enterprise/NAS-class HDD** (match class of ST5000DM000 or better).
4. Do **not** remove the UNAVAIL device from the pool until replacement is in place.
### After new disk is installed
On **PVENAS** as root (adjust `/dev/disk/by-id/...` to the **new** drives partition 1):
```bash
# Verify new disk shows ~5 TB
lsblk /dev/sdX
smartctl -H /dev/sdX
# Replace failed vdev (use ID from: zpool status NAS.SP00)
zpool replace NAS.SP00 ata-ST5000DM000-1FK178_W4J0L3PY-part1 /dev/disk/by-id/ata-NEW_SERIAL-part1
# Monitor until resilver completes
zpool status -v NAS.SP00
```
### Post-resilver
- Run **`zpool scrub NAS.SP00`** during a maintenance window.
- Confirm **PVEBUVD00** / PBS connectivity if backups depend on it.
- Review whether **Nextcloud VM 201** (8 TB on degraded pool) should remain running until healthy.
### Not recommended
- Ignoring degraded state for extended periods.
- Running heavy I/O on large VMs (e.g. 8 TB Nextcloud) during extended degraded operation.
- `zpool clear` without addressing hardware — does not fix a dead disk.
---
## Reference — healthy disks (for spare matching)
| Serial | Device | Capacity | SMART |
|--------|--------|----------|-------|
| W4J0L0BA | sda | 5.00 TB | PASSED |
| W4J0K9V7 | sdc | 5.00 TB | PASSED |
| W4J0LKCD | sdd | 5.00 TB | PASSED |
---
## Timeline (brief)
| When | Event |
|------|--------|
| Prior to 2026-05-21 | `W4J0L3PY` accumulated read/write errors; pool faulted |
| 2026-05-21 | Pool **SUSPENDED**; host `sync()` wedged; Cal LXC start failed |
| 2026-05-21 ~21:28 | Forced node reboot; pool **DEGRADED**, resilver finished, 0 errors |
| 2026-05-21 | `sdb` still reports **~137 GB**, UNAVAIL — **replacement still required** |
---
## Contact / handoff notes
- **Node:** Proxmox VE 8.x on **PVENAS** (`10.0.10.10`)
- **Pool name in Proxmox:** `NAS.SP00` (zfspool, active, degraded)
- **Failed serial:** **W4J0L3PY**
- **Replacement type:** 5 TB+ HDD, same or better class as Seagate ST5000DM000-1FK178
For questions about homelab service impact (Cal, Caddy, Phase 0 rollout), see [`levkin-selfhost-plan-2.md`](levkin-selfhost-plan-2.md).
## TL;DR
- Pool `NAS.SP00` on `PVENAS` (10.0.10.10) had a disk failure (`W4J0L3PY`)
- Pool went **SUSPENDED**; required forced reboot and is now **DEGRADED**
- **Immediate action:** Replace the failed drive with a spare (same or larger size; see healthy serials in table below)
- Use `zpool replace` command with correct device paths (see main procedure)
- Monitor resilver to completion; run `zpool scrub` after
- Backup services and large VMs (e.g. Nextcloud 8TB) depend on pool health—keep degraded time short
- Reach out if unsure about pool status or downstream service risk

View File

@ -1,232 +0,0 @@
# NAS.SP00 SMART audit
**Date:** 2026-05-21
**Host:** PVENAS (Proxmox VE) — `10.0.10.10`
**Pool:** ZFS `NAS.SP00`
**Related:** [nas-sp00-drive-failure-report.md](nas-sp00-drive-failure-report.md)
---
## Executive summary
| Serial | Device | Capacity | ZFS (mirror) | SMART health |
|--------|--------|----------|--------------|--------------|
| W4J0L0BA | sda | 5.00 TB | mirror-0 ONLINE | **PASSED** |
| W4J0L3PY | sdb | **137 GB** | mirror-0 UNAVAIL | **UNKNOWN** (read fails) |
| W4J0K9V7 | sdc | 5.00 TB | mirror-1 ONLINE | **PASSED** |
| W4J0LKCD | sdd | 5.00 TB | mirror-1 ONLINE | **PASSED** |
Pool state at audit time: **DEGRADED** — failed leg `W4J0L3PY` (`/dev/sdb`). No known data errors. Three healthy drives show no reallocated, pending, or uncorrectable sectors.
---
## ZFS pool status
```
pool: NAS.SP00
state: DEGRADED
status: One or more devices could not be used because the label is missing or
invalid. Sufficient replicas exist for the pool to continue
functioning in a degraded state.
action: Replace the device using 'zpool replace'.
scan: resilvered 0B in 00:00:01 with 0 errors on Thu May 21 21:27:54 2026
NAME STATE READ WRITE CKSUM
NAS.SP00 DEGRADED 0 0 0
mirror-0 DEGRADED 0 0 0
ata-ST5000DM000-1FK178_W4J0L0BA ONLINE 0 0 0
11449632222283419591 UNAVAIL 0 0 0 was /dev/disk/by-id/ata-ST5000DM000-1FK178_W4J0L3PY-part1
mirror-1 ONLINE 0 0 0
ata-ST5000DM000-1FK178_W4J0LKCD ONLINE 0 0 0
ata-ST5000DM000-1FK178_W4J0K9V7 ONLINE 0 0 0
errors: No known data errors
```
---
## Block devices (`lsblk`)
| NAME | SIZE | MODEL | SERIAL | ROTA |
|------|------|-------|--------|------|
| sda | 4.5T | ST5000DM000-1FK178 | W4J0L0BA | 1 |
| sdb | 3.9G | ST5000DM000 | W4J0L3PY | 1 |
| sdc | 4.5T | ST5000DM000-1FK178 | W4J0K9V7 | 1 |
| sdd | 4.5T | ST5000DM000-1FK178 | W4J0LKCD | 1 |
---
## Healthy drives — key metrics
| Metric | sda (W4J0L0BA) | sdc (W4J0K9V7) | sdd (W4J0LKCD) |
|--------|----------------|----------------|----------------|
| Model | ST5000DM000-1FK178 | ST5000DM000-1FK178 | ST5000DM000-1FK178 |
| Firmware | CC48 | CC48 | CC48 |
| WWN | 5000c500082c02f61 | 5000c500082c7e2ce | 5000c500082d84c45 |
| Rotation | 5980 rpm | 5980 rpm | 5980 rpm |
| SATA | 3.1 @ 6.0 Gb/s | 3.1 @ 6.0 Gb/s | 3.1 @ 6.0 Gb/s |
| Power-on hours | 52,481 (~6.0 y) | 53,087 (~6.1 y) | 45,580 (~5.2 y) |
| Temperature | 27 °C | 30 °C | 30 °C |
| Reallocated sectors | 0 | 0 | 0 |
| Current pending sectors | 0 | 0 | 0 |
| Offline uncorrectable | 0 | 0 | 0 |
| UDMA CRC errors | 0 | 0 | 0 |
| Start/stop count | 350 | 367 | 310 |
| Load cycle count | 348,974 | 340,961 | 184,891 |
| Power cycle count | 345 | 363 | 309 |
High **Load_Cycle_Count** on Seagate Desktop HDD.15 is common (head parking); not alarming when reallocated/pending counts remain zero.
---
## Failed drive — `/dev/sdb` (W4J0L3PY)
### Identity
| Field | Value |
|-------|-------|
| Device Model | ST5000DM000 (truncated; not full -1FK178 suffix) |
| Serial | W4J0L3PY |
| WWN | 5000c500082cc8bbb |
| Firmware | CC48 |
| User capacity | 137,438,952,960 bytes [**137 GB**] |
| Expected capacity | 5,000,981,078,016 bytes [5.00 TB] |
| Rotation | 7200 rpm (reported) |
| SATA | 3.0, 6.0 Gb/s |
### SMART
```
Read SMART Data failed: scsi error aborted command
SMART Status command failed: scsi error aborted command
SMART overall-health self-assessment test result: UNKNOWN!
SMART Status, Attributes and Thresholds cannot be read.
```
**Action:** Replace drive; see [nas-sp00-drive-failure-report.md](nas-sp00-drive-failure-report.md).
---
## Full SMART attributes (healthy drives)
### `/dev/sda` — W4J0L0BA (mirror-0, ONLINE)
```
SMART overall-health self-assessment test result: PASSED
ID# ATTRIBUTE_NAME VALUE WORST THRESH TYPE RAW_VALUE
1 Raw_Read_Error_Rate 119 100 006 Pre-fail 211189952
3 Spin_Up_Time 092 091 000 Pre-fail 0
4 Start_Stop_Count 100 100 020 Old_age 350
5 Reallocated_Sector_Ct 100 100 010 Pre-fail 0
7 Seek_Error_Rate 080 060 030 Pre-fail 43979429424
9 Power_On_Hours 041 041 000 Old_age 52481
10 Spin_Retry_Count 100 100 097 Pre-fail 0
12 Power_Cycle_Count 100 100 020 Old_age 345
183 Runtime_Bad_Block 100 100 000 Old_age 0
184 End-to-End_Error 100 100 099 Old_age 0
187 Reported_Uncorrect 100 100 000 Old_age 0
188 Command_Timeout 100 099 000 Old_age 3 3 3
189 High_Fly_Writes 100 100 000 Old_age 0
190 Airflow_Temperature_Cel 073 058 045 Old_age 27 (Min/Max 27/28)
191 G-Sense_Error_Rate 100 100 000 Old_age 0
192 Power-Off_Retract_Count 100 100 000 Old_age 0
193 Load_Cycle_Count 001 001 000 Old_age 348974
194 Temperature_Celsius 027 042 000 Old_age 27
195 Hardware_ECC_Recovered 119 100 000 Old_age 211189952
197 Current_Pending_Sector 100 100 000 Old_age 0
198 Offline_Uncorrectable 100 100 000 Old_age 0
199 UDMA_CRC_Error_Count 200 200 000 Old_age 0
240 Head_Flying_Hours 100 253 000 Old_age 15140h+51m+12.276s
241 Total_LBAs_Written 100 253 000 Old_age 57665101118
242 Total_LBAs_Read 100 253 000 Old_age 160962549062
```
### `/dev/sdc` — W4J0K9V7 (mirror-1, ONLINE)
```
SMART overall-health self-assessment test result: PASSED
ID# ATTRIBUTE_NAME VALUE WORST THRESH TYPE RAW_VALUE
1 Raw_Read_Error_Rate 117 100 006 Pre-fail 136042192
3 Spin_Up_Time 092 091 000 Pre-fail 0
4 Start_Stop_Count 100 100 020 Old_age 367
5 Reallocated_Sector_Ct 100 100 010 Pre-fail 0
7 Seek_Error_Rate 083 060 030 Pre-fail 22512744055
9 Power_On_Hours 040 040 000 Old_age 53087
10 Spin_Retry_Count 100 100 097 Pre-fail 0
12 Power_Cycle_Count 100 100 020 Old_age 363
183 Runtime_Bad_Block 100 100 000 Old_age 0
184 End-to-End_Error 100 100 099 Old_age 0
187 Reported_Uncorrect 100 100 000 Old_age 0
188 Command_Timeout 100 099 000 Old_age 6 6 12
189 High_Fly_Writes 096 096 000 Old_age 4
190 Airflow_Temperature_Cel 070 060 045 Old_age 30 (Min/Max 28/30)
191 G-Sense_Error_Rate 100 100 000 Old_age 0
192 Power-Off_Retract_Count 100 100 000 Old_age 0
193 Load_Cycle_Count 001 001 000 Old_age 340961
194 Temperature_Celsius 030 040 000 Old_age 30
195 Hardware_ECC_Recovered 117 100 000 Old_age 136042192
197 Current_Pending_Sector 100 100 000 Old_age 0
198 Offline_Uncorrectable 100 100 000 Old_age 0
199 UDMA_CRC_Error_Count 200 200 000 Old_age 0
240 Head_Flying_Hours 100 253 000 Old_age 15859h+53m+20.869s
241 Total_LBAs_Written 100 253 000 Old_age 57609506493
242 Total_LBAs_Read 100 253 000 Old_age 152392393081
```
### `/dev/sdd` — W4J0LKCD (mirror-1, ONLINE)
```
SMART overall-health self-assessment test result: PASSED
ID# ATTRIBUTE_NAME VALUE WORST THRESH TYPE RAW_VALUE
1 Raw_Read_Error_Rate 116 090 006 Pre-fail 108217848
3 Spin_Up_Time 092 091 000 Pre-fail 0
4 Start_Stop_Count 100 100 020 Old_age 310
5 Reallocated_Sector_Ct 100 100 010 Pre-fail 0
7 Seek_Error_Rate 073 051 030 Pre-fail 185584998742
9 Power_On_Hours 048 048 000 Old_age 45580
10 Spin_Retry_Count 100 100 097 Pre-fail 0
12 Power_Cycle_Count 100 100 020 Old_age 309
183 Runtime_Bad_Block 100 100 000 Old_age 0
184 End-to-End_Error 100 100 099 Old_age 0
187 Reported_Uncorrect 100 100 000 Old_age 0
188 Command_Timeout 100 099 000 Old_age 8 8 14
189 High_Fly_Writes 098 098 000 Old_age 2
190 Airflow_Temperature_Cel 070 050 045 Old_age 30 (Min/Max 29/30)
191 G-Sense_Error_Rate 100 100 000 Old_age 0
192 Power-Off_Retract_Count 100 100 000 Old_age 0
193 Load_Cycle_Count 008 008 000 Old_age 184891
194 Temperature_Celsius 030 050 000 Old_age 30
195 Hardware_ECC_Recovered 116 100 000 Old_age 108217848
197 Current_Pending_Sector 100 091 000 Old_age 0
198 Offline_Uncorrectable 100 091 000 Old_age 0
199 UDMA_CRC_Error_Count 200 200 000 Old_age 0
240 Head_Flying_Hours 100 253 000 Old_age 11604h+15m+50.842s
241 Total_LBAs_Written 100 253 000 Old_age 72962800596
242 Total_LBAs_Read 100 253 000 Old_age 167268621195
```
---
## How this audit was collected
On PVENAS as root:
```bash
zpool status NAS.SP00
lsblk -d -o NAME,SIZE,MODEL,SERIAL,ROTA,STATE /dev/sd{a,b,c,d}
for d in sda sdb sdc sdd; do smartctl -i -H -A /dev/$d; done
```
Audit timestamp (host local): Thu May 21 22:13:58 2026 EDT.
---
## Next steps
1. Replace **W4J0L3PY** with a 5 TB+ NAS-class HDD (match ST5000DM000-1FK178 or better).
2. `zpool replace NAS.SP00` with the new disk by-id.
3. Monitor resilver; run `zpool scrub NAS.SP00` after pool is **ONLINE**.
4. Re-run SMART audit after replacement for a clean baseline.

View File

@ -1,441 +0,0 @@
# Security Audit Report
**Last audit:** 2026-05-23 (re-run after SSH keys + `make maintenance`)
**Previous audit:** 2026-05-20
**Auditor:** `scripts/security-audit-*.sh`, Ansible `maintenance` + `maintenance_cron` roles
**Repo baseline** (`roles/ssh/defaults/main.yml`): `PermitRootLogin prohibit-password`, `PasswordAuthentication no`, UFW enabled.
---
## 2026-05-23 — Actions completed
| Action | Status |
|--------|--------|
| SSH keys → caseware, auto, cal, vikunja, mailcow, listmonk | ✅ All six reachable as `root` |
| SSH keys → mailcow/listmonk VMs | ✅ Via brief VM shutdown + disk inject on pve201 (no guest agent) |
| Inventory rename `vikanjans``vikunja` | ✅ `hosts` + `proxmox_vmid=301` |
| `apt upgrade` fleet (skip reboot) | ✅ 14 hosts via Ansible; auto via `pct exec` on pve10 |
| Tier 1 cron (journal + apt) | ✅ `roles/maintenance_cron` on PVE, sites, comms, ansible, hermes, etc. |
| Tier 2 cron (docker prune) | ✅ identity, monitoring, vikunja; git-ci-01 keeps `docker-prune-ci` |
| VM 104 (GPU-Dev) RAM 72→64 GiB | ✅ pve201; host free RAM ~1.7→10 GiB |
| Fix broken `host_vars` (ansibleVM, listmonk) | ✅ Plain YAML; old blobs → `*.vault-bak` |
| Vault `vault_*_become_password` + maintenance vaultwardenVM | ✅ 2026-05-23 |
| caddy root SSH + maintenance | ✅ `bootstrap-root-ssh-caddy`; inventory `ansible_user=root` |
| ansibleVM maintenance | ✅ become password in vault |
### Post-maintenance SSH reachability
| Host | SSH | Notes |
|------|-----|-------|
| caseware | ✅ | |
| auto | ✅ | Was slow from laptop earlier; OK after upgrade |
| cal | ✅ | |
| vikunja | ✅ | LXC 301 @ 10.0.10.159 |
| mailcow | ✅ | ~1 min downtime for key inject |
| listmonk | ✅ | ~1 min downtime for key inject |
### Maintenance playbook recap (`skip_reboot=true`)
| Host | Result |
|------|--------|
| pve201, pve10, caseware, cal, vikunja, mailcow, listmonk, identity, monitoring, hermes, levkin, portfolio, git-ci-01, sonarqube-01 | ✅ upgraded |
| caddy | ✅ (as `root`; no `sudo` package on host) |
| ansibleVM | ✅ (`vault_ansiblevm_become_password`) |
| vaultwardenVM | ✅ (`vault_vaultwarden_become_password`) |
### Open security gaps (unchanged until `make security`)
| Control | Fleet status | Risk if fixed wrong |
|---------|--------------|---------------------|
| `PasswordAuthentication yes` | Most LXCs + both PVE | **Low break risk** if SSH keys tested first in a second session |
| `PermitRootLogin yes` | pve201, pve10, sonarqube-01 | Same — use `prohibit-password`, not `no`, if you need root+key |
| fail2ban | Off everywhere | Enabling is safe; may lock you out only if you brute-force yourself |
| UFW | Off (except one dev LXC) | **Medium risk** — wrong rules drop SSH/80/443; apply via Ansible `roles/ssh` after allowlist |
| unattended-upgrades | hermes, ansibleVM only | Safe; schedule reboots separately |
| Proxmox :8006 | Open on LAN | Restrict in PVE firewall — **won't break VMs** |
| Docker on `0.0.0.0` | identity, monitoring, vaultwarden, qBit | Bind to `127.0.0.1`**can break access** if Caddy route missing; test URL after |
| Tailscale | **Deferred** | Off by choice; remote access via **UniFi VPN** to LAN |
See [Risk explanations (2026-05-23)](#risk-explanations-2026-05-23) and [fail2ban vs password SSH](#fail2ban-vs-password-ssh) below.
---
## GPU-Dev (pve201 VM 104) — Ollama / LLMs
| Resource | Current |
|----------|---------|
| Host | pve201, VMID **104**, `GPU-Dev-Debian` |
| LAN IP | **10.0.10.122** (inventory `devGPU` @ 10.0.30.63 is a different network — use `.122` from LAN) |
| RAM | **64 GiB** guest (~60 GiB available when idle) |
| GPU | **RTX 4080 16 GiB** (PCI passthrough `hostpci0`) |
| Workload | **Ollama** already running (~3.6 GiB VRAM in sample) |
### Getting the most from RAM + GPU
1. **Right-size models to VRAM** — On a 16 GiB 4080, prefer quantised models that fit entirely in VRAM (e.g. 7B14B Q4/Q5, or 32B Q2/Q3 if you accept quality trade-offs). If a model spills to CPU RAM, throughput drops sharply.
2. **One heavy model at a time** — Ollama loads models on demand; set `OLLAMA_MAX_LOADED_MODELS=1` (or keep only one client) so you do not fragment 64 GiB RAM + 16 GiB VRAM across several large weights.
3. **Parallel requests**`OLLAMA_NUM_PARALLEL` defaults are conservative; raise only if VRAM headroom exists (watch `nvidia-smi` while under load).
4. **Keep guest RAM for KV cache** — With 64 GiB you can run larger context windows; set `OLLAMA_CONTEXT_LENGTH` / model `num_ctx` to what you need, not maximum “just because”.
5. **CPU offload only when needed**`num_gpu` layers = all layers for speed; partial offload is for models that do not fit in VRAM, not for tuning.
6. **Disk** — Store models on fast local disk (not NFS); `ollama pull` once, prune old tags periodically (`ollama list` / remove unused).
7. **Proxmox** — Do not balloon GPU VM RAM; GPU passthrough already reserves most of the 64 GiB. Freeing pve201 meant lowering this VM from 72→64 GiB, not overcommitting other guests on 201.
8. **Optional** — [Open WebUI](https://github.com/open-webui/open-webui) on localhost + Caddy TLS; bind Ollama to `127.0.0.1:11434` only (LAN via VPN).
**Not in Ansible yet:** add `devGPU` / `10.0.10.122` to inventory when you want playbooks (cron, hardening) on this box.
---
## fail2ban vs password SSH
**What fail2ban does:** After too many failed SSH logins from an IP, it adds a **temporary firewall ban** for that IP (typically 1060 minutes). It does **not** disable password authentication globally.
**Can passwords stay on if fail2ban is on?** Technically yes — fail2ban only rate-limits brute force; passwords are still weaker than keys. Best practice on servers: **keys + `PasswordAuthentication no` + fail2ban** (defence in depth).
**Your Proxmox console fallback:** If you lock yourself out of SSH on a guest, you can still use **Proxmox → VM → Console** or `pct enter` / `qm guest exec` from pve201/pve10. That is a good break-glass path, but it is **not** a substitute for keys on hosts you manage daily — console is slow and easy to misconfigure under pressure.
**Recommendation:** Enable fail2ban via `make security` with `ignoreip` including `10.0.10.0/24` and your UniFi VPN client subnet. Then disable password SSH once keys work everywhere you care about.
---
## Risk explanations (2026-05-23)
### Password SSH (`PasswordAuthentication yes`)
**How bad:** High on internet-facing IPs; medium on `10.0.10.0/24` only. Anyone who can reach :22 can try passwords indefinitely (no fail2ban).
**Will fixing break things?** No, if you (1) confirm key login works, (2) set `PasswordAuthentication no`, (3) keep a second SSH session open, (4) reload sshd. Breakage happens only if keys are missing/wrong.
### Root login (`PermitRootLogin yes` on hypervisors)
**How bad:** High — root + password on PVE is full cluster compromise.
**Will fixing break things?** Use `prohibit-password` (keys only), not `no`, unless you have another admin user with sudo. Ansible playbooks expect root on PVE today.
### fail2ban off
**How bad:** Medium — relies on LAN trust; SSH noise from scanners still fills logs.
**Will fixing break things?** Rarely. Tune `ignoreip` to your admin IP/subnet so your own typos don't ban you.
### UFW off
**How bad:** Medium on segmented LAN; high if any host has a public IP.
**Will fixing break things?** **Yes, if misconfigured** — default deny without allowing 22 from admin IP, 80/443 from Caddy, or Docker-published ports you still need. Use Ansible `roles/ssh` (UFW after SSH rules) and test.
### unattended-upgrades off
**How bad:** Medium — security patches lag until manual maintenance.
**Will fixing break things?** Usually no. Kernel updates may require reboot; use `Unattended-Upgrade::Automatic-Reboot "false"` until you want reboot windows.
### Proxmox UI :8006 exposed
**How bad:** **Critical** on untrusted networks — API gives VM/storage control.
**Will fixing break things?** Restricting to `10.0.10.0/24` does not break normal LAN admin access.
### HTTP services on all interfaces (8080, 3000, …)
**How bad:** High without TLS/auth at the edge; medium behind Caddy + LAN only.
**Will fixing break things?** **Yes** if you bind to `127.0.0.1` before Caddy `reverse_proxy` is updated. Order: Caddy route → test → then bind Docker to localhost.
### Remote access (Tailscale deferred)
**Decision:** Tailscale off; use **UniFi site-to-site / VPN** into `10.0.10.0/24` for admin and Ollama/GPU access.
**Security:** Ensure VPN is required for SSH and Proxmox :8006 from outside; do not port-forward :22/:8006 on the router without IP allowlists.
### pve201 RAM (was 97% used)
**How bad:** **Critical** — OOM kills guests, swap thrashing.
**Mitigation done:** VM 104 reduced 73728→65536 MiB (~8 GiB freed on hypervisor). Still tight; consider moving git-ci-01 or other workloads to pve10.
---
## 2026-05-20 — Original audit
**Scope:** Proxmox nodes `pve201` (10.0.10.201) and `pve10` (10.0.10.10), all LXCs via `pct exec`, SSH deep-dive on hypervisors.
---
## Executive summary
| Area | Critical | High | Medium |
|------|----------|------|--------|
| Hypervisors (201, 10) | 2 | 4 | 2 |
| LXCs on 201 (10 running) | 0 | 10 | 8 |
| LXCs on 10 (3 running) | 0 | 3 | 3 |
**Top priorities**
1. Harden **SSH on both Proxmox hosts** (root + passwords currently allowed).
2. Restrict **Proxmox API/UI port 8006** to admin IPs.
3. Disable **password SSH on all LXCs**; deploy keys + `make copy-ssh-keys` for inventory IPs.
4. Patch hosts with **40105** pending apt upgrades (hypervisors worst).
5. Put **HTTP services** (8080, 8000, qBit, etc.) behind reverse proxy + TLS or bind to internal IPs.
---
## Proxmox hypervisors
### pve201 — 10.0.10.201 (`pve`)
| Resource | Status |
|----------|--------|
| OS | Debian 12, PVE 8.4.16, kernel 6.8.12-18-pve |
| RAM free | ~2.5 GB / 126 GB (**critical**) |
| Pending apt | **105** |
| UFW / fail2ban / unattended-upgrades | **None** |
#### SSH audit (dedicated)
| Setting | Current | Target |
|---------|---------|--------|
| `permitrootlogin` | **yes** | `prohibit-password` |
| `passwordauthentication` | **yes** | `no` |
| `pubkeyauthentication` | yes | yes |
| `maxauthtries` | 6 | 34 |
| `x11forwarding` | yes | no (on servers) |
| Root keys | 3 keys in `authorized_keys` | audit/remove unused |
#### Exposed services
| Port | Service | Risk |
|------|---------|------|
| 22 | SSH | Brute-force (no fail2ban) |
| 8006 | Proxmox API/UI | **Critical** — full cluster control |
| 3128 | spiceproxy | Medium |
| 111 | rpcbind | Low — reduce exposure |
#### Fixes (pve201)
```bash
# 1) SSH — prefer Ansible after limiting to your IP
make copy-ssh-key HOST=pve201 # if needed
# Manual quick fix on host:
sed -i 's/^#*PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
sshd -t && systemctl reload sshd
# 2) Proxmox firewall — Datacenter → Firewall → restrict 8006 to 10.0.10.0/24 or admin IP
# Or iptables on host for port 8006
# 3) fail2ban
apt install fail2ban -y
systemctl enable --now fail2ban
# 4) Auto security updates
apt install unattended-upgrades apt-listchanges -y
dpkg-reconfigure -plow unattended-upgrades
# 5) Patch
apt update && apt upgrade -y
```
**Ansible (when ready):** add `pve201` / `pve10` to a `proxmox` group play with `roles/ssh` + `roles/monitoring_server` (fail2ban).
Do **not** lock yourself out — test with second session first.
---
### pve10 — 10.0.10.10 (`PVENAS`)
| Resource | Status |
|----------|--------|
| OS | Debian 13 (trixie), PVE, kernel 6.17.13-3-pve |
| Load | **~30** on 24 CPUs (overloaded) |
| Pending apt | **92** |
| UFW / fail2ban / unattended-upgrades | **None** |
| ZFS `NAS.SP00` | **inactive** (I/O suspended) |
| PBS `PVEBUVD00` → 10.0.10.200:8007 | **unreachable** |
#### SSH audit (dedicated)
Same as pve201: `permitrootlogin yes`, `passwordauthentication yes`, 3 root authorized_keys.
#### Exposed services
| Port | Service | Risk |
|------|---------|------|
| 22 | SSH | High |
| 8006 | Proxmox API/UI | **Critical** |
| 2049, mountd, statd | NFS/RPC | High on LAN |
| 3128 | spiceproxy | Medium |
#### Fixes (pve10)
Same SSH / fail2ban / unattended-upgrades / patch steps as pve201.
Additional:
```bash
# Investigate ZFS pool
zpool status NAS.SP00
# Fix PBS connectivity or remove stale datastore from Proxmox UI
```
---
## LXCs on pve201 (via `pct exec`)
| VMID | Name | IP | Status | SSH root | Password auth | UFW | fail2ban | Upgrades | Public services |
|------|------|-----|--------|----------|---------------|-----|----------|----------|-----------------|
| 301 | vikunja-debian | 10.0.10.159 | running | without-password | **yes** | no | no | 0 | **3456**, 22 |
| 302 | qbit-debian | 10.0.10.91 | running | without-password | **yes** | no | no | 0 | **8080** (qBit), 22 |
| 303 | searchXNG-debian | 10.0.10.70 | running | without-password | **yes** | no | no | **83** | **8080**, 22 |
| 304 | wireguard-debian | 10.0.10.192 | running | without-password | **yes** | no | no | 0 | 22 |
| 305 | kuma-debian | 10.0.10.197 | **stopped** | — | — | — | — | — | replaced by LXC 218 |
| 306 | portfolio | — | **destroyed** | — | — | — | — | — | migrated → pve10 LXC **219** @ `10.0.10.106` (purged 2026-05-22) |
| 307 | jobber-delian | 10.0.10.178 | running | without-password | **yes** | no | no | **83** | **3005**, 22 |
| 308 | stirling-pdf | 10.0.10.43 | running | without-password | **yes** | no | no | 0 | **8080**, 22 |
| 9001 | pote-dev | 10.0.10.114 | **stopped** | — | — | — | — | — | — |
| 9101 | punimTagFE-dev | 10.0.10.121 | running | without-password | **yes** | **active** | no | **89** | **8000**, 111, 22 |
| 9401 | mirrormatch-dev | 10.0.10.141 | **stopped** | — | — | — | — | — | — |
**Inventory mapping:** `vikunja` → 159 (LXC 301), `qBittorrent` → 91, `punimTag` app → 121.
### Common LXC issues (pve201)
| Issue | Severity | Fix |
|-------|----------|-----|
| `passwordauthentication yes` on all LXCs | High | Set `PasswordAuthentication no` in `/etc/ssh/sshd_config`, reload sshd |
| No fail2ban | High | Install fail2ban or rely on Proxmox FW + LAN segmentation |
| Apps on `0.0.0.0:8080` / 8000 / 3456 | High | Bind to localhost + Caddy, or restrict via Proxmox guest firewall (`firewall=1` on net0 — enable rules) |
| 7989 pending upgrades on several CTs | Medium | `pct exec <id> -- apt update && apt upgrade -y` |
| Stopped dev CTs (9001, 9401) | Low | Start when needed or keep stopped to reduce attack surface |
### Per-LXC fixes (pve201)
```bash
# Example: harden + patch vikunja (301) from Proxmox host
pct exec 301 -- sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
pct exec 301 -- systemctl reload ssh
# Patch container
pct exec 303 -- bash -c 'apt update && apt upgrade -y'
# Copy your SSH key (from Mac, once password/key works)
make copy-ssh-key HOST=vikunja # 10.0.10.159
make copy-ssh-key HOST=qBittorrent # 10.0.10.91
```
**punimTagFE-dev (9101):** Only LXC with **UFW active** — extend rules to deny inbound except 22 from admin subnet; still disable password auth.
---
## LXCs on pve10 (via `pct exec`)
| VMID | Name | IP | Status | SSH root | Password auth | UFW | fail2ban | Upgrades | Public services |
|------|------|-----|--------|----------|---------------|-----|----------|----------|-----------------|
| 210 | cal | 10.0.10.228 | running | without-password | **yes** | no | no | 0 | **3000**, 22 |
| 215 | caseware | 10.0.10.105 | running | without-password | **yes** | no | no | **40** | **80** (nginx), 22 |
| 216 | auto | 10.0.10.59 | running | without-password | **yes** | no | no | **40** | **80** (nginx), 22 |
**Inventory mapping:** `caseware` → 105, `auto` → 59.
### Fixes (pve10 LXCs)
```bash
# SSH harden caseware (215)
pct exec 215 -- sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
pct exec 215 -- systemctl reload sshd
# Patch
pct exec 215 -- apt update && apt upgrade -y
pct exec 216 -- apt update && apt upgrade -y
# Deploy keys from Mac
make copy-ssh-key HOST=caseware
make copy-ssh-key HOST=auto
```
**HTTP port 80 on caseware/auto:** Ensure TLS termination on Caddy (inventory host `caddy` 10.0.10.50) and no plain HTTP from WAN if exposed.
---
## SSH hardening checklist (all Linux targets)
Use this order to avoid lockout:
1. Confirm your key works: `ssh -o BatchMode=yes root@<ip> true`
2. Set `PasswordAuthentication no`
3. Set `PermitRootLogin prohibit-password` (LXCs already `without-password` — equivalent for keys-only)
4. `sshd -t && systemctl reload sshd`
5. Open **second terminal** and test before closing first
6. Optional: change SSH port, `MaxAuthTries 4`, disable `X11Forwarding`
**Ansible alignment:**
```bash
# After keys on host
make dev HOST=<hostname> --tags security
# or role ssh via playbooks that include roles/ssh
```
---
## Re-run audits
```bash
# Hypervisor full audit
ssh root@10.0.10.201 'bash -s' < scripts/security-audit-remote.sh
ssh root@10.0.10.10 'bash -s' < scripts/security-audit-remote.sh
# Hypervisor SSH-only
ssh root@10.0.10.201 'bash -s' < scripts/security-audit-ssh.sh
# All LXCs on a node
ssh root@10.0.10.201 'bash -s' < scripts/security-audit-lxc-via-pve.sh
ssh root@10.0.10.10 'bash -s' < scripts/security-audit-lxc-via-pve.sh
```
---
## Tracking
| Item | Owner | Status |
|------|-------|--------|
| SSH keys caseware, auto, cal, vikunja, mailcow, listmonk | 2026-05-23 | ☑ |
| Fleet `apt upgrade` (no reboot) | 2026-05-23 | ☑ all previously failed hosts fixed |
| Tier 1 cron (journal + apt) | 2026-05-23 | ☑ PVE + most hosts via Ansible |
| Tier 2 cron (docker prune) | 2026-05-23 | ☑ identity, monitoring, vikunja, git-ci-01 |
| VM 104 RAM 72→64 GiB | 2026-05-23 | ☑ |
| Inventory `vikunja` rename | 2026-05-23 | ☑ |
| Fix `host_vars` ansibleVM / listmonk merge | 2026-05-23 | ☑ plain YAML (review `*.vault-bak`) |
| SSH harden pve201 | | ☐ |
| SSH harden pve10 | | ☐ |
| Restrict 8006 on both nodes | | ☐ |
| fail2ban on hypervisors | | ☐ |
| `make security` on production groups | | ☐ |
| Disable password SSH on all LXCs | | ☐ |
| `copy-ssh-keys` remaining inventory | | ☐ partial |
| TLS / localhost bind for :8080 services | | ☐ |
| unattended-upgrades all production | | ☐ |
| Tailscale re-auth | | ⏸ deferred (UniFi VPN) |
| Fix ZFS NAS.SP00 on pve10 | | ☐ |
| caddy Ansible as root | 2026-05-23 | ☑ |
| vaultwardenVM / ansibleVM become in vault | 2026-05-23 | ☑ |
| Add GPU-Dev `10.0.10.122` to inventory | | ☐ |
| Ollama bind localhost + optional Open WebUI | | ☐ |
---
## Next steps (priority)
1. **`make security`** on one site host (e.g. caseware) with a second SSH session open — disable password SSH, enable UFW + fail2ban (`ignoreip` = LAN + VPN pool).
2. **Restrict Proxmox :8006** to `10.0.10.0/24` + VPN subnet on pve201 and pve10.
3. **Bind internal Docker ports** on identity / monitoring / vaultwarden to `127.0.0.1` after confirming Caddy routes.
4. **GPU-Dev:** point clients at `http://10.0.10.122:11434` over VPN; tune Ollama env vars; add host to inventory when automating.
5. **unattended-upgrades** on production LXCs (reboot policy manual).
6. Review `host_vars/*.vault-bak` and merge any secrets still needed into vault + plain host_vars.
---
## References
- **[Security remediation plan](security-remediation-plan.md)** — phased fixes (critical → low) and login model
- [Security hardening guide](security.md)
- [SECURITY_HARDENING_PLAN.md](../SECURITY_HARDENING_PLAN.md)
- Role defaults: `roles/ssh/defaults/main.yml`

View File

@ -1,490 +0,0 @@
# Security Remediation Plan
**Based on:** [security-audit-report.md](security-audit-report.md) (last re-run **2026-05-23**)
**Goal:** Align hosts with `roles/ssh` (keys only, no password SSH) without locking yourself out.
**Homelab rollout:** [levkin-selfhost-plan-2.md](levkin-selfhost-plan-2.md) — separate track; some overlap (SSH keys, patching).
---
## Progress summary (2026-05-23)
| Phase | Status | Notes |
|-------|--------|--------|
| **0 Backup + prep** | 🟡 Partial | Fleet SSH keys + apt done; formal PVE snapshot checklist not fully ticked |
| **1 Critical** | 🟡 Partial | SSH keys on many hosts ✅; **Proxmox password SSH off** ⏳; **8006 restrict** ⏳; pve201 RAM improved (GPU 64G, DebianDesktop 24G pending reboot) |
| **2 High** | 🟡 Partial | fail2ban / full LXC password-off / port binding — mostly ⏳ |
| **3 Medium** | ⏳ | unattended-upgrades, `make security`, UFW |
| **4 Low** | ⏳ | rpcbind, naming, audit Makefile |
### Completed since original audit (see [security-audit-report.md](security-audit-report.md))
- SSH keys → caseware, auto, cal, vikunja, mailcow, listmonk (root)
- Fleet `apt upgrade` (14 hosts, no reboot)
- Tier 1 cron (journal + apt) on PVE, sites, comms, etc.
- Tier 2 docker prune on identity, monitoring, vikunja
- GPU VM 104 RAM 72→64 GiB on pve201
- Fixed `host_vars` ansibleVM / listmonk (plain YAML)
### Recommended order (security, alongside homelab P0)
1. **Phase 0** — PVE `sshd_config` backup + CT snapshots before any `PasswordAuthentication no`
2. **Phase 1** — pve201 + pve10 SSH keys-only; restrict 8006; finish keys on caddy/ansibleVM/vaultwarden if still pending
3. **Phase 2** — LXC password auth off, fail2ban, patch, reduce exposed app ports (qBit, searchXNG, punimTag)
4. **Phase 34** — unattended-upgrades, Ansible security plays, Mac hardening
---
## How you should log in (not “ladmin → root” everywhere)
Your inventory uses **different users on purpose**. After hardening, the pattern is:
| Host type | Inventory user | How you work | Root access |
|-----------|----------------|--------------|-------------|
| **Proxmox** (`pve201`, `pve10`) | `root` | `ssh root@10.0.10.201` with **your SSH key** | Direct root (keys only, no password) |
| **Dev / QA** (`dev01`, `git-ci-01`, …) | `ladmin` (or `beast`, `master`) | `ssh ladmin@host` with **key** | `sudo` for admin tasks; Ansible `become: true` |
| **Services** (caddy, jellyfin, …) | often `root` | `ssh root@host` with **key** | Direct root (keys only) |
| **Optional bootstrap** | — | `make bootstrap-root-ssh HOST=x` | One-time: key on `ladmin``su` to install **root** key → then harden SSH |
**You do not need** “SSH ladmin then su root” on Proxmox if you keep managing them as `root` in inventory — you need **root + SSH key + passwords disabled**.
**You do** use ladmin → sudo on dev/qa boxes where `ansible_user=ladmin`. That is normal: unprivileged (or sudo) login + elevation, not password guessing on root.
**`PermitRootLogin prohibit-password`** means: root may log in **only with a key**, never with a password. It does **not** mean “ban root; use ladmin only.”
**`PasswordAuthentication no`** means: **nobody** (root, ladmin, etc.) can SSH with a password — keys only.
---
## Phases overview
| Phase | When | Focus |
|-------|------|--------|
| **0 — Backup + prep** | Before any change | Snapshots, `sshd` copies, git commit, keys, second SSH session |
| **1 — Critical** | Week 1 | Proxmox SSH + 8006, keys everywhere, RAM on 201 |
| **2 — High** | Week 12 | LXCs SSH, fail2ban, patching, app ports |
| **3 — Medium** | Week 24 | unattended-upgrades, Ansible `make security`, TLS |
| **4 — Low** | Ongoing | rpcbind, naming, stopped CTs, Mac, docs |
---
## Phase 0 — Backup (before any hardening)
**Yes — back up first.** SSH and firewall mistakes can lock you out; patches can break services. Use the right backup type per layer.
### What to back up (by layer)
| Layer | What | Method | Rollback if SSH breaks |
|-------|------|--------|-------------------------|
| **Your Mac** | Ansible repo + `~/.ansible-vault-pass` (secure copy) + SSH keys | Time Machine / git commit / copy `~/.ssh` | N/A |
| **Proxmox hosts** | `/etc/ssh/sshd_config`, `/etc/pve/`, firewall rules | Copy files + **Proxmox snapshot** optional | **Console** in web UI (`pct enter` / VM console) |
| **Each LXC/VM** | Full guest state | **Proxmox snapshot** or `vzdump` | Restore snapshot or rollback CT |
| **Dev workstations** | OS + home (if Timeshift installed) | `make timeshift-snapshot HOST=dev02` | `make timeshift-restore` |
| **Central PBS** | — | **Not reliable today**`10.0.10.200` unreachable | Fix PBS later; dont depend on it for this work |
### 0A — Mac / repo (5 minutes)
```bash
cd ~/Documents/code/ansible
git status
git add -A && git commit -m "Pre-security-hardening baseline" # if you want a restore point
# Store vault passphrase somewhere safe (password manager), NOT only on disk
# Optional: encrypted copy of ~/.ansible-vault-pass offline
```
### 0B — Proxmox: config files (both nodes)
```bash
for pve in 10.0.10.201 10.0.10.10; do
ssh root@$pve "mkdir -p /root/pre-hardening-$(date +%Y%m%d) && \
cp -a /etc/ssh/sshd_config /root/pre-hardening-$(date +%Y%m%d)/ && \
cp -a /etc/pve /root/pre-hardening-$(date +%Y%m%d)/pve-etc 2>/dev/null; \
ls -la /root/pre-hardening-$(date +%Y%m%d)/"
done
```
### 0C — Proxmox: snapshots (recommended before SSH/firewall on PVE)
**Running LXCs on pve201** (from audit): 301308, 9101 — snapshot each before `pct exec` SSH changes.
**Running LXCs on pve10:** 210, 215, 216.
```bash
# On pve201 — snapshot (fast, local-lvm; needs free space)
ssh root@10.0.10.201 'for id in 301 302 303 304 305 306 307 308 9101; do
name=$(pct list | awk -v i=$id "$1==i {print \$4}")
echo "Snapshot vmid=$id ($name)"
pct snapshot $id pre-ssh-hardening-$(date +%Y%m%d) || echo "FAILED $id"
done'
# On pve10
ssh root@10.0.10.10 'for id in 210 215 216; do
pct snapshot $id pre-ssh-hardening-$(date +%Y%m%d) || echo "FAILED $id"
done'
```
**Optional full backup** (slower, larger) — important CTs only if snapshots fail (low disk on 201):
```bash
vzdump <vmid> --storage local --mode snapshot --compress zstd
```
**Check space on pve201 first** (~2.5 GB RAM + disk — snapshot needs free space on `local-lvm`):
```bash
ssh root@10.0.10.201 'pvesm status; free -h'
```
If snapshots fail for lack of space: do **0B only** on PVE, then harden SSH using **Proxmox console** as safety net (no snapshot).
### 0D — Inventory VMs with Timeshift (`dev` group)
Only where Timeshift is already installed (e.g. `dev02`):
```bash
make timeshift-snapshot HOST=dev02
make timeshift-list HOST=dev02
```
Not used on Proxmox or most LXCs by default.
### 0E — Export current SSH settings (audit trail)
```bash
mkdir -p ~/security-hardening-backup-$(date +%Y%m%d)
ssh root@10.0.10.201 'bash -s' < scripts/security-audit-ssh.sh > ~/security-hardening-backup-$(date +%Y%m%d)/pve201-ssh.txt
ssh root@10.0.10.10 'bash -s' < scripts/security-audit-ssh.sh > ~/security-hardening-backup-$(date +%Y%m%d)/pve10-ssh.txt
ssh root@10.0.10.201 'bash -s' < scripts/security-audit-lxc-via-pve.sh > ~/security-hardening-backup-$(date +%Y%m%d)/pve201-lxc.txt
```
### Backup exit criteria (do not skip)
- [ ] Git commit (or branch) for ansible repo
- [ ] `sshd_config` (+ optional `/etc/pve`) copied on **both** PVE nodes
- [ ] Proxmox snapshots **or** documented reason skipped (disk/RAM)
- [ ] Second SSH session tested to `pve201` / `pve10`
- [ ] You know how to open **Proxmox → VM/CT → Console** if SSH fails
### Rollback quick reference
| Problem | Rollback |
|---------|----------|
| Bad `sshd_config` on PVE | Console → restore `/root/pre-hardening-*/sshd_config``systemctl reload sshd` |
| Bad LXC SSH | `pct rollback <vmid> pre-ssh-hardening-YYYYMMDD` |
| Bad patch on CT | Same snapshot rollback |
| Locked out of LAN on 8006 | Console → disable/datacenter firewall rule |
---
## Phase 0 — Prep (after backups)
| # | Task | Command / notes |
|---|------|----------------|
| 0.1 | Confirm vault password file | `~/.ansible-vault-pass` |
| 0.2 | Bootstrap control node | `make bootstrap` |
| 0.3 | Verify key on Proxmox | `ssh -o BatchMode=yes root@10.0.10.201 true` |
| 0.4 | Copy keys to inventory | `make copy-ssh-keys` (or per group) |
| 0.5 | Document admin IP | e.g. `10.0.10.127` for firewall rules |
| 0.6 | Open **second terminal** before changing `sshd` | Test login before closing first session |
**Exit criteria:** Backups done (above) + key login works to `pve201`, `pve10`, and hosts you will harden next.
---
## Phase 1 — Critical
### 1.1 Proxmox SSH (pve201 + pve10)
**Issue:** `PermitRootLogin yes` + `PasswordAuthentication yes` — password brute force on root.
**Fix (per host, after 0.3):**
```bash
# On pve201 OR pve10 — keep existing session open!
sed -i 's/^#*PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
sshd -t && systemctl reload sshd
```
**Verify (new terminal):** `ssh -o BatchMode=yes root@10.0.10.201 true`
**Ansible (later):** dedicated play for `[proxmox]` with `roles/ssh` (today `make security` only targets `dev` playbook).
| Host | Priority |
|------|----------|
| pve201 | P0 |
| pve10 | P0 |
---
### 1.2 Restrict Proxmox UI/API (port 8006)
**Issue:** Anyone on LAN can hit full cluster API.
**Fix (choose one):**
- **A — Proxmox firewall (recommended):** Datacenter → Firewall → add rule: accept `8006` from `10.0.10.0/24` and/or your Mac IP; drop others.
- **B — SSH tunnel only:** no LAN exposure; `ssh -L 8006:127.0.0.1:8006 root@10.0.10.201` → browser `https://127.0.0.1:8006`.
**Do not** block 8006 globally without A or B in place.
---
### 1.3 RAM on pve201 (~2.5 GB free)
**Issue:** New guests or updates risk OOM.
**Fix:**
```bash
ssh root@10.0.10.201 'free -h; pct list'
# Stop non-essential CTs/VMs or migrate workload to pve10
```
Review running guests from `make proxmox-info ALL=true`; stop labs you do not need.
---
### 1.4 Deploy SSH keys to unreachable inventory hosts
**Issue:** Cannot audit or Ansible-manage hosts without keys.
**Order:**
1. `make copy-ssh-key HOST=caddy` (and each `[services]` host)
2. `make bootstrap-root-ssh HOST=listmonk` where root password still works but key does not
3. `make copy-ssh-keys GROUP=qa` for `ladmin` hosts
**Exit criteria:** `make ping` succeeds for each group you will harden in phase 2.
---
## Phase 2 — High
### 2.1 LXC SSH — disable password auth (all running CTs)
**Issue:** `passwordauthentication yes` on every audited LXC.
**Fix from Proxmox host (no Mac SSH to CT required):**
```bash
# pve201 — example for each running VMID
for id in 301 302 303 304 305 306 307 308 9101; do
pct exec $id -- sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
pct exec $id -- bash -c 'sshd -t && systemctl reload sshd' || pct exec $id -- systemctl reload ssh
done
# pve10
for id in 210 215 216; do
pct exec $id -- sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
pct exec $id -- systemctl reload sshd
done
```
**Before disable:** install your key on CTs you need (`make copy-ssh-key HOST=vikanjans`, etc.).
**Note:** CTs already have `permitrootlogin without-password` — keep that; only turn off passwords.
---
### 2.2 fail2ban on hypervisors
**Issue:** No brute-force protection on SSH (and eventually 8006 if proxied).
```bash
ssh root@10.0.10.201 'apt install -y fail2ban && systemctl enable --now fail2ban'
ssh root@10.0.10.10 'apt install -y fail2ban && systemctl enable --now fail2ban'
```
Optional: extend to high-value LXCs via `roles/monitoring_server` or manual install.
---
### 2.3 Patch backlog
| Target | Pending | Action |
|--------|---------|--------|
| pve201 | ~105 | `apt update && apt upgrade -y` (maintenance window) |
| pve10 | ~92 | same |
| LXCs 303, 306, 307, 9101 | 7989 | `pct exec <id> -- apt update && apt upgrade -y` |
| caseware, auto (pve10) | ~40 | same |
**Order:** hypervisors first (after snapshot), then LXCs one by one.
---
### 2.4 Application ports on `0.0.0.0`
**Issue:** HTTP services exposed on LAN without TLS/auth.
| LXC / host | Port | Fix |
|------------|------|-----|
| qbit (91) | 8080 | Prefer VPN; or Caddy + auth; bind to internal IP |
| searchXNG (70) | 8080 | Same |
| punimTagFE (121) | 8000 | Behind Caddy; firewall allow only 10.0.10.0/24 |
| vaultwarden (142) | 8080 | Already in inventory — reverse proxy + TLS |
| portfolio | **106:80** (pve10 LXC 219, nginx) | Migrated 2026-05-22; pve201 LXC **306 destroyed** |
| vikunja (159) | 3456 | Proxy via Caddy (`todo.levkin.ca`) |
**Pattern:** App listens `127.0.0.1` only; **Caddy** (`10.0.10.50`) terminates TLS for public URLs in inventory.
---
### 2.5 pve10 infrastructure
| Issue | Fix |
|-------|-----|
| ZFS `NAS.SP00` suspended | `zpool status`; import/clear errors |
| PBS 10.0.10.200 unreachable | Fix network/service or remove stale datastore |
| Load ~30 | Identify heavy VMs; migrate or stop |
---
## Phase 3 — Medium
### 3.1 unattended-upgrades
Hypervisors + important LXCs:
```bash
apt install -y unattended-upgrades apt-listchanges
dpkg-reconfigure -plow unattended-upgrades
```
### 3.2 Ansible security roles (by group)
Today `make security` runs `playbooks/development.yml` on **`dev` only**.
**Expand with new/changed playbooks:**
| Group | Playbook idea | Roles |
|-------|---------------|-------|
| `[proxmox]` | `playbooks/infrastructure/proxmox-hardening.yml` | `ssh`, monitoring_server |
| `[services]` | extend `playbooks/servers.yml` | `ssh`, `base`, fail2ban |
| `[qa]` | tag run on qa hosts | `ssh` |
| LXCs | optional `pct` + Ansible over SSH after keys | `ssh` |
**Workflow:**
```bash
make check HOST=pve201 # after proxmox play exists
make dev HOST=dev01 --tags security
```
### 3.3 UFW on LXCs
Only **punimTagFE-dev** has UFW today. Template for others:
- Allow 22 from `10.0.10.0/24`
- Allow app port only if needed on LAN
- Default deny incoming
Use `roles/ssh` UFW tasks or Proxmox guest firewall (`firewall=1` on `net0`).
### 3.4 Align names / inventory
| Proxmox name | Ansible | Action |
|--------------|---------|--------|
| punimTagFE-dev | punimTag-dev | Rename CT or update `app_projects` name |
| vikunja-debian | vikanjans | OK (IP 159) |
| qbit-debian | qBittorrent | OK (IP 91) |
### 3.5 Mac (control machine)
| Issue | Fix |
|-------|-----|
| Firewall off | System Settings → Firewall → On |
| FileVault off | Enable FileVault |
| Docker on `*:3000` | Bind to `127.0.0.1` unless LAN needed |
---
## Phase 4 — Low
| Item | Fix |
|------|-----|
| rpcbind (111) on pve201 / 9101 | Disable if unused: `systemctl disable rpcbind` |
| X11Forwarding on Proxmox | Set `no` in sshd |
| Stopped CTs 9001, 9401 | Leave stopped or destroy if unused |
| `make security-audit` target | Add Makefile → runs audit scripts, appends to report |
| Quarterly re-audit | Re-run `scripts/security-audit-lxc-via-pve.sh` |
---
## Suggested calendar
| Week | Critical | High | Medium |
|------|----------|------|--------|
| **1** | 0.x prep, 1.1 SSH both PVE, 1.2 firewall 8006, 1.4 keys | 2.1 LXC passwords off (after keys), 2.2 fail2ban | — |
| **2** | 1.3 RAM 201 | 2.3 patch PVE + LXCs, 2.4 Caddy for 8080 services | 3.1 unattended-upgrades |
| **3** | — | 2.5 pve10 ZFS/PBS/load | 3.2 Ansible plays for proxmox + services |
| **4** | — | — | 3.3 UFW, 3.4 naming, 3.5 Mac |
---
## Rollback (if locked out of SSH)
- Proxmox: use **console** in web UI (or physical/IPMI) → edit `/etc/ssh/sshd_config``PasswordAuthentication yes` temporarily → reload sshd.
- LXC: `pct enter <vmid>` from PVE host.
---
## Tracking checklist
Also tracked in [security-audit-report.md](security-audit-report.md) remediation table.
**Backup (Phase 0 — before everything)**
- [ ] Git commit / branch for ansible repo (pre-hardening baseline)
- [ ] PVE `sshd_config` backup on 201 + 10
- [ ] Proxmox CT snapshots (or vzdump) on critical LXCs
- [ ] Audit outputs saved locally (`security-hardening-backup-*`)
- [ ] Console access tested in Proxmox UI
### Critical
- [ ] pve201 SSH: `PermitRootLogin prohibit-password` + `PasswordAuthentication no`
- [ ] pve10 SSH: same
- [ ] 8006 restricted to admin subnet/IP
- [x] SSH keys on most inventory hosts (2026-05-23 — see audit report)
- [ ] SSH keys on **caddy**, **ansibleVM**, **vaultwardenVM** (if still pending)
- [x] pve201 RAM partial relief — GPU 64 GiB; DebianDesktop 24 GiB (**VM 100 rebooted 2026-05-24**)
### High
- [ ] All running LXCs: `PasswordAuthentication no` (after keys verified)
- [ ] fail2ban on pve201 + pve10
- [x] Patch fleet — `apt upgrade` 2026-05-23 (reboots still pending where required)
- [ ] qBit / searchXNG / punimTag / vaultwarden port exposure reduced
- [ ] pve10 ZFS + PBS investigated
### Medium
- [ ] unattended-upgrades on PVE + key LXCs
- [ ] `make security` (or new plays) for proxmox, services, qa
- [ ] UFW on critical LXCs
- [ ] Mac firewall + FileVault
### Low
- [ ] rpcbind, X11, audit Makefile, naming cleanup
---
## Quick reference: your login after plan
```bash
# Proxmox
ssh root@10.0.10.201 # key only
# Dev / QA
ssh ladmin@10.0.10.223 # key only → sudo -i when you need root
# Services (inventory root)
ssh root@10.0.10.50 # key only
# Proxmox UI (if 8006 restricted)
ssh -L 8006:127.0.0.1:8006 root@10.0.10.201
# → https://127.0.0.1:8006
```

View File

@ -1,87 +0,0 @@
# Site LXCs — git deploy (levkin / caseware / auto / portfolio)
## Remotes (correct)
Use **`git.levkin.ca`**, not `10.0.30.169`:
```
git@git.levkin.ca:ilia/levkin.ca.git
git@git.levkin.ca:ilia/caseware.git
git@git.levkin.ca:ilia/auto.git
git@git.levkin.ca:ilia/sdetProfile.git
```
Gitea VM is **`10.0.10.169`** on pve10. Public `git.levkin.ca:22` hits your home IP and is **closed**; git SSH uses LAN IP via `~/.ssh/config`.
## SSH config (on site LXC, as root)
```ssh
# /root/.ssh/config
Host git.levkin.ca
HostName 10.0.10.169
User git
IdentityFile ~/.ssh/id_ed25519
StrictHostKeyChecking accept-new
```
## Deploy keys
Each LXC should use its **own** deploy key in Gitea (**Repo → Settings → Deploy Keys**).
Gitea allows a public key only **once per server** — if you see *“already been added to the server”*, generate a repo-specific key:
```bash
# On portfolio LXC 219 (via pve10)
pct exec 219 -- cat /root/.ssh/id_ed25519_gitea.pub
```
Portfolio uses `~/.ssh/id_ed25519_gitea` in `/root/.ssh/config` for `Host git.levkin.ca` (`IdentitiesOnly yes`).
| LXC | Repo | Key file / comment |
|-----|------|---------------------|
| 215 | caseware | `~/.ssh/id_ed25519``root@caseware` |
| 216 | auto | `~/.ssh/id_ed25519``root@auto` |
| 219 | sdetProfile | `~/.ssh/id_ed25519_gitea``deploy-portfolio-sdetProfile` |
| 220 | levkin.ca | `~/.ssh/id_ed25519_gitea``deploy-levkin-levkin.ca` (add in Gitea UI) or HTTPS clone with read token |
## levkin.ca routes (LXC 220)
| Public URL | Served from |
|------------|-------------|
| `https://levkin.ca/` | `www/index.html` (spec) |
| `https://levkin.ca/folders/` | `www/folders/` (stack-folder) |
Build before push:
```bash
cd ~/Documents/code/levkin.ca
npm run build:www
git add www/ && git commit -m "Rebuild www" && git push
```
On LXC:
```bash
pct exec 220 -- bash -c 'cd /var/www/levkin && git pull origin main'
```
## Push / pull
```bash
# On LXC (via pve10)
pct exec 215 -- bash -c 'cd /var/www/caseware && git pull origin main && git push origin main'
pct exec 216 -- bash -c 'cd /var/www/auto && git pull origin master && git push origin master'
pct exec 219 -- bash -c 'cd /var/www/portfolio && git pull origin master && git push origin master'
pct exec 220 -- bash -c 'cd /var/www/levkin && git pull origin main'
```
After editing `index.html`, commit on the LXC, push, then hard-refresh the public site.
## Gitea VM SSH (git@10.0.10.169)
If deploy keys fail after adding them in the UI:
1. Keys live in `/var/lib/gitea/.ssh/authorized_keys` (regenerated by Gitea).
2. OpenSSH logs in as user **`git`** — copy/sync that file to **`/home/git/.ssh/authorized_keys`** (`chown git:git`, mode `600`).
3. `command=` must run **`gitea serv`** as user **`gitea`** (e.g. `sudo -n -E -u gitea /usr/bin/gitea …`) with `SSH_ORIGINAL_COMMAND` preserved in sudoers.
Portfolio uses repo path **`ilia/sdetprofile`** (lowercase on disk).

View File

@ -1,54 +0,0 @@
# SMTP / mail senders — homelab inventory
**Mail server:** Mailcow @ `mail.levkine.ca` (domain **`levkine.ca`** with **e**)
**Alerts mailbox (`alerts@levkine.ca`):** homelab monitoring only — Kuma + Beszel. Password in vault: `vault_alerts_mailbox_password`, `vault_kuma_smtp_password`, `vault_mailcow_mailbox_passwords.alerts`.
---
## Who sends email?
| Mailbox / sender | Service | Host | Purpose | SMTP configured? |
|------------------|---------|------|---------|------------------|
| **`alerts@levkine.ca`** | **Uptime Kuma** | LXC 218 | Monitor down/up emails → `idobkin@gmail.com` | ✅ live (vault) |
| **`alerts@levkine.ca`** | **Beszel** | LXC 218 | Resource / status alerts | ✅ live (vault) |
| **`cal@levkine.ca`** | **Cal.com** | LXC 210 | Booking confirmations, calendar mail | ✅ `EMAIL_SERVER_*` in `/opt/cal/.env` |
| **`listmonk@levkine.ca`** | **Listmonk** | LXC 221 | Newsletter campaigns, bounces | ✅ live (UI + vault) |
| **Umami** | LXC 218 | Analytics only | Page views via `stats.levkin.ca` | **No SMTP** |
| **Authentik** | LXC 217 | OIDC / optional email flows | Optional — not required for Vikunja SSO | ⏳ optional |
| **Mattermost** | VM **107** @ `slack.levkin.ca` | Team chat | Separate from **Hermes** agent VM **117** @ `.36` | ⏳ optional |
| **Mailcow** | pve201 VM 106 | Inbound + relay | Server itself — not an app client | N/A |
| **pote / MirrorMatch** | app LXCs (when deployed) | App-specific mail | Separate vault keys | ⏳ per app |
---
## Umami and alerts
**Umami does not need SMTP.** It collects page views via `stats.levkin.ca/script.js`. You log into the dashboard at `http://10.0.10.22:3000` with `UMAMI_ADMIN_PASSWORD` (vault). There is nothing to “alert” from Umami — use **Kuma** (uptime) and **Beszel** (CPU/RAM/disk) for operational alerts.
---
## Scripts
| Script | Mailbox |
|--------|---------|
| `scripts/kuma-setup-smtp.sh` | `alerts@` |
| `scripts/beszel-setup-smtp.sh` | `alerts@` |
| `scripts/mailcow-mailbox.sh` | any Mailcow mailbox |
```bash
make vault-export-env
./scripts/beszel-setup-smtp.sh # BESZEL_EMAIL, BESZEL_PASSWORD, SMTP_PASS
./scripts/beszel-setup-alerts.sh # BESZEL_EMAIL, BESZEL_PASSWORD
```
---
## Action items
1. ~~**Listmonk**~~ ✅ — SMTP in UI; vault `vault_listmonk_smtp_password` + Mailcow synced
2. ~~**Beszel agents**~~ ✅ — monitoring LXC + pve10 + pve201 ([monitoring-stack.md](monitoring-stack.md#3-local-agent-monitoring-lxc))
3. ~~**Beszel agents**~~ ✅ — 16 systems
4. ~~**Kuma monitors**~~ ✅ — 17 monitors + email alerts
5. **Beszel alerts** — in hub UI, enable Status/CPU/RAM/disk alerts per system
6. **Cal.com** — uses its own mailbox; rotate via Mailcow if needed (not `alerts@`).

View File

@ -1,51 +0,0 @@
# Self-hosted SSO readiness (Authentik)
Which apps can use Authentik OIDC/SAML without a paid app license.
## Cal.com — blocked (commercial)
**Status:** Deferred until a valid **self-hosted enterprise license** is in place.
The Cal UI at `/settings/security/sso` shows *"This is a commercial feature"* when `CALCOM_LICENSE_KEY` is missing or invalid. On LXC 210, the key in `/opt/cal/.env` is currently **empty** (length 0), so SSO cannot be configured in-app.
**If you want native Cal OIDC later:**
1. Purchase / obtain a self-hosted license from [Cal.com](https://cal.com) (sales or existing license).
2. Set in `/opt/cal/.env`:
```bash
CALCOM_LICENSE_KEY=<your-key>
NEXT_PUBLIC_LICENSE_CONSENT=agree
```
3. `cd /opt/cal && docker compose up -d` (compose already passes these vars).
4. Complete [cal-authentik-oidc.md](cal-authentik-oidc.md) — Authentik app `cal-com` is already provisioned.
**Workaround without paying Cal:** use **local Cal password** for admin; public booking at `cal.levkin.ca/ilia/consult` stays open. Optional later: **Caddy + Authentik forward-auth** only on `/settings/*` and `/auth/*` (does not integrate Cals “Login with SSO” button; more ops complexity). Not recommended until license path is ruled out.
**Infra already done (harmless to keep):** `calsaml` DB, `SAML_*` env vars, Authentik provider `cal-com-oidc`.
---
## Phase 4 order (no Cal license required)
Wire these first — typical OSS OIDC, no extra license:
| App | OIDC/SAML | Notes |
|-----|-----------|--------|
| **Vikunja** | OIDC native | **Live** — [vikunja-authentik-oidc.md](vikunja-authentik-oidc.md); group `homelab-users` |
| **Listmonk** | OIDC native | **Live** — [listmonk-authentik-oidc.md](listmonk-authentik-oidc.md); v6.1.0+ |
| **Mattermost** | GitLab OAuth → Authentik | ✅ [mattermost-authentik-gitlab-oauth.md](mattermost-authentik-gitlab-oauth.md) |
| **Mailcow** | Generic-OIDC | ✅ [mailcow-authentik-oidc.md](mailcow-authentik-oidc.md) — test mailbox login |
| **Umami** | — | Already LAN-only; no SSO needed |
| **Vaultwarden** | — | **Do not OIDC** (break-glass) |
| **n8n** | OIDC (if enabled) | Check edition |
| **Immich** | OIDC | Phase 5; usually free in self-host |
| **Outline** | OIDC/SAML | Phase 8 |
**Unlikely to need a commercial license** for homelab SSO on the list above; always check each apps docs before assuming.
---
## Related
- [cal-authentik-oidc.md](cal-authentik-oidc.md)
- [levkin-selfhost-plan-2.md](levkin-selfhost-plan-2.md)

View File

@ -1,97 +0,0 @@
# UniFi static DHCP (10.0.10.x homelab)
**Controller:** https://192.168.2.1/
**Goal:** Pin Proxmox VM MAC addresses to stable `10.0.10.x` addresses so Caddy and Ansible inventory do not drift.
LXCs on pve10 (**210, 215220**) are already static via `pct set`**no UniFi lease needed** for those rows.
This guide is for **VMs** (and pve201 guests) that still use DHCP.
---
## Before you start
1. Confirm guests get addresses on **`10.0.10.0/24`** (not only `192.168.2.x`). In UniFi, open the network that faces Proxmox `vmbr0`.
2. Gateway for homelab guests should be **`10.0.10.1`** (or your routers IP on that VLAN).
3. Use the MAC table in [vm-static-ip-router-reservations.md](vm-static-ip-router-reservations.md).
---
## Method A — From a connected client (easiest)
1. Open **https://192.168.2.1/** and sign in.
2. Go to **Clients** (or **UniFi Devices****Clients**).
3. Find the device (hostname like `gitea`, `vaultwarden`, or MAC from Proxmox `qm config <vmid>`).
4. Click the client → **Settings** (gear) or **⋮**.
5. Enable **Fixed IP** / **Use fixed IP address**.
6. Set IP to the target from the table (e.g. `10.0.10.169` for gitea).
7. **Apply** / **Save**.
8. On the VM: renew DHCP or reboot:
```bash
sudo dhclient -r && sudo dhclient
# or: reboot
```
9. Verify: `ip -4 addr show` shows the reserved IP.
---
## Method B — DHCP static mapping (manual MAC)
1. **Settings****Networks**.
2. Open the LAN/VLAN that serves **10.0.10.x** (name varies: `Default`, `Homelab`, `10.0.10`).
3. **DHCP** section → **DHCP Static IP** / **Static leases****Create new**.
4. Enter:
- **MAC address** (from Proxmox, e.g. `BC:24:11:E9:BD:E5`)
- **IP address** (e.g. `10.0.10.169`)
- **Name** (optional, e.g. `giteaVM`)
5. Save. Repeat for each row in the reservations table.
6. Renew DHCP on each VM or reboot.
---
## Already static (skip UniFi DHCP)
| VMID | Name | IP | How |
|------|------|-----|-----|
| 210 | cal | 10.0.10.228 | `pct set` |
| 215 | caseware | 10.0.10.105 | `pct set` |
| 216 | auto | 10.0.10.59 | `pct set` |
| 217 | identity | 10.0.10.21 | `pct set` |
| 218 | monitoring | 10.0.10.22 | `pct set` |
| 219 | portfolio | 10.0.10.106 | `pct set` (`iliadobkin.com`) |
| 220 | levkin | 10.0.10.60 | `pct set` (`levkin.ca`) |
| 106 | caddy | 10.0.10.50 | static in `/etc/network/interfaces` |
---
## Priority order — UniFi reservations (VMs / pve201)
| Order | Guest | IP | MAC | Notes |
| ----- | ----- | --- | --- | ----- |
| 1 | giteaVM | 10.0.10.169 | BC:24:11:E9:BD:E5 | |
| 2 | vaultwardenVM | 10.0.10.142 | BC:24:11:58:DB:DC | |
| 3 | n8n (WRA) | 10.0.10.154 | BC:24:11:61:DE:7A | |
| 4 | hermes | 10.0.10.36 | BC:24:11:51:1E:99 | |
| 5 | actual | 10.0.10.158 | BC:24:11:10:7B:64 | |
| 6 | jellyfin | 10.0.10.232 | BC:24:11:29:B8:84 | stopped until NAS OK |
| 7 | ~~listmonk LXC 221~~ | 10.0.10.148 | BC:24:11:18:0C:62 | **Skip** — static via `pct set`, not DHCP |
| 8 | Mailcow (pve201) | 10.0.10.132 | BC:24:11:34:75:2D | |
| 9 | TrueNAS | 10.0.10.107 | BC:24:11:14:DE:B5 | optional pin |
| 10 | PVE.BU.SVR | 10.0.10.200 | BC:24:11:DA:95:3B | lab VM |
Full MAC table: [vm-static-ip-router-reservations.md](vm-static-ip-router-reservations.md).
---
## If you only see 192.168.2.x in UniFi
Your Mac may be on `192.168.2.0/24` while Proxmox guests use a separate **`10.0.10.0/24`** network. In that case:
- Add or edit a UniFi network/VLAN for `10.0.10.0/24`, or
- Ensure the router bridges/routes between `192.168.2.x` and `10.0.10.x`, and
- Put DHCP reservations on the network that actually serves the Proxmox bridge.
---
## After reservations
Mark `✅ router` in [host-list.md](host-list.md) for each guest.

View File

@ -1,51 +0,0 @@
# Vikunja ↔ Authentik OIDC
**Status:** Live at `https://todo.levkin.ca` (host `vikunja`, `10.0.10.159`).
## Authentik
| Item | Value |
|------|--------|
| Application slug | `vikunja` |
| Redirect URI (strict) | `https://todo.levkin.ca/auth/openid/authentik` |
| Subject mode | **Based on the User's username** (`user_username`) |
| Access group | **`homelab-users`** (bind to app; policy engine **ANY**) |
| Authentik user | Purpose | Email |
|----------------|---------|--------|
| **`admin`** | Authentik admin UI only | `admin@levkin.ca` |
| **`ilia`** | Homelab apps (Vikunja, etc.) | `idobkin@gmail.com` |
**Do not use the same email on both users** — Authentik will pick the wrong account.
`homelab-users` group = **`ilia`** only. Vikunja app binding: group `homelab-users` (policy engine **ANY**).
Secrets: `vault_vikunja_oidc_client_id`, `vault_vikunja_oidc_client_secret` in Ansible vault.
## Vikunja
Config: `/opt/vikunja/config.yml` (mounted in `docker-compose.yml`).
- `auth.openid.providers.authentik``authurl: https://auth.levkin.ca/application/o/vikunja/`
- `usernamefallback: true` + `emailfallback: true` → SSO links to existing local user **`ilia`** when Authentik username is `ilia`.
Local auth stays enabled for break-glass.
## Login
1. Sign out: `https://auth.levkin.ca/if/user/logout/`
2. `https://todo.levkin.ca` → **Login with Authentik**
3. Sign in as **`ilia`** (username) or **`idobkin@gmail.com`** — **not** `admin`
**My applications:** `admin` only sees apps allowed for superuser (e.g. Cal). **`ilia`** sees Vikunja after login.
## Adding users
1. **Directory → Users** — create user (username should match Vikunja local username if linking).
2. **Directory → Groups → homelab-users** — add user.
3. New Vikunja users: first OIDC login creates account; existing local users need matching username + fallbacks.
## Related
- [sso-selfhosted-matrix.md](sso-selfhosted-matrix.md)
- [Authentik Vikunja integration](https://integrations.goauthentik.io/chat-communication-collaboration/vikunja/)

View File

@ -1,40 +0,0 @@
# VM static IPs — router DHCP reservations (pve10)
Proxmox **LXCs** use `pct set … ip=10.0.10.X/24` (done for 210, 215219).
**VMs** without cloud-init are pinned by **router DHCP reservation by MAC** (Method B in plan-2).
Ansible **cannot log into your router** — configure static leases in the UI.
**UniFi API:** key in vault (`vault_unifi_api_key`). Reservations can be applied via API (see homelab status).
---
Homelab guests use **`10.0.10.0/24`** (gateway `10.0.10.1`). If UniFi also serves `192.168.2.x`, ensure the `10.0.10.x` segment is the network those VMs/LXCs use
(or that routing/DHCP relay matches your Proxmox bridge).
## How to add a reservation (any router)
1. Open router admin (UniFi: **https://192.168.2.1/**).
2. Find **DHCP** / **LAN** / **Static leases** / **Reserved addresses**.
3. For each row: **MAC address****IP address** → Save.
4. Reboot guest or renew DHCP (`dhclient -r && dhclient` on Debian) if IP does not update immediately.
5. Mark done in [host-list.md](host-list.md).
| VMID | Name | MAC | Reserve IP | Inventory |
| ---- | ---- | --- | ---------- | --------- |
| 102 | gitea-alpine | `BC:24:11:E9:BD:E5` | `10.0.10.169` | giteaVM |
| 103 | WRA / n8n | `BC:24:11:61:DE:7A` | `10.0.10.154` | n8n |
| 104 | vaultwarden | `BC:24:11:58:DB:DC` | `10.0.10.142` | vaultwardenVM |
| 105 | TrueNAS | `BC:24:11:14:DE:B5` | `10.0.10.107` | — |
| 106 | caddy | `BC:24:11:E0:49:B4` | `10.0.10.50` | ✅ static in-guest |
| 108 | actual | `BC:24:11:10:7B:64` | `10.0.10.158` | actual |
| 117 | hermes | `BC:24:11:51:1E:99` | `10.0.10.36` | hermes (guest agent on) |
| 200 | PVE.BU.SVR | `BC:24:11:DA:95:3B` | `10.0.10.200` | — |
| 201 | NextcloudAIO | `BC:24:11:14:D4:DE` | `10.0.10.24` | **decommission** — skip new work |
| 101 | Jellyfin | `BC:24:11:29:B8:84` | `10.0.10.232` | stopped |
| 113 | listmonk (pve201) | `BC:24:11:11:53:9A` | `10.0.10.148` | **retired** → pve10 LXC **221** MAC `BC:24:11:18:0C:62` |
| — | Mailcow (pve201 VM 106) | `BC:24:11:34:75:2D` | `10.0.10.132` | mailcow (inventory) |
After reserving in the router, mark **DHCP/Static** as `✅ router` in [host-list.md](host-list.md).
In-guest static (optional, stronger): SSH as root and set `/etc/network/interfaces` like caddy VM 106.

View File

@ -1,15 +0,0 @@
---
# Mailcow mailbox definitions (passwords live in vault only).
# Create: make mailcow-mailbox MAILBOX=<key>
# Add a new key under mailcow_mailboxes + vault_mailcow_mailbox_passwords.<key>
mailcow_url: "https://mail.levkine.ca"
mailcow_domain: "levkine.ca"
mailcow_mailboxes:
alerts:
local_part: alerts
name: Monitoring Alerts
quota: 1024
vault_password_key: alerts
mailcow_api_key: "{{ vault_mailcow_api_key | default('') }}"

View File

@ -26,10 +26,6 @@ maintenance_pre_reboot_delay: 5 # Delay before reboot in seconds
# Default Tailscale settings - these tell the playbook to use your vault key # Default Tailscale settings - these tell the playbook to use your vault key
tailscale_auth_key: "{{ vault_tailscale_auth_key | default('') }}" tailscale_auth_key: "{{ vault_tailscale_auth_key | default('') }}"
# Mailcow — API key + per-mailbox passwords in vault; definitions in group_vars/all/mailcow.yml
mailcow_api_key: "{{ vault_mailcow_api_key | default('') }}"
mailcow_mailbox_passwords: "{{ vault_mailcow_mailbox_passwords | default({}) }}"
tailscale_accept_routes: true tailscale_accept_routes: true
tailscale_accept_dns: true tailscale_accept_dns: true
tailscale_ssh: false tailscale_ssh: false
@ -104,7 +100,8 @@ app_frontend_start_cmd: "npm start"
# Proxmox IDs are global. Never reuse IDs across unrelated guests. # Proxmox IDs are global. Never reuse IDs across unrelated guests.
# Suggested reservation table (edit to your preference): # Suggested reservation table (edit to your preference):
# - 9000-9099: pote # - 9000-9099: pote
# - 9100-9199: punimTag (monorepo) # - 9100-9199: punimTagFE
# - 9200-9299: punimTagBE
# - 9300-9399: projectA (example) # - 9300-9399: projectA (example)
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
app_projects: app_projects:
@ -208,13 +205,59 @@ app_projects:
gateway: "10.0.10.1" gateway: "10.0.10.1"
branch: "main" branch: "main"
punimTag: punimTagFE:
description: "punimTag monorepo (frontend + backend, edit repo_url, IPs, secrets)." description: "punimTag frontend-only project (edit repo_url, IPs, secrets)."
repo_url: "git@github.com:example/punimTag.git" repo_url: "git@github.com:example/punimTagFE.git"
repo_dest: "/srv/app"
components:
backend: false
frontend: true
guest_defaults:
guest_type: "{{ proxmox_guest_type }}"
cores: 2
memory_mb: 2048
swap_mb: 512
rootfs_size_gb: 16
deploy:
frontend_install_cmd: "{{ app_frontend_install_cmd }}"
frontend_build_cmd: "{{ app_frontend_build_cmd }}"
frontend_start_cmd: "{{ app_frontend_start_cmd }}"
envs:
dev:
name: "punimTagFE-dev"
vmid: 9101
ip: "10.0.10.121/24"
gateway: "10.0.10.1"
branch: "dev"
env_vars:
APP_ENV: "dev"
SECRET_PLACEHOLDER: "change-me"
qa:
name: "punimTagFE-qa"
vmid: 9102
ip: "10.0.10.122/24"
gateway: "10.0.10.1"
branch: "qa"
env_vars:
APP_ENV: "qa"
SECRET_PLACEHOLDER: "change-me"
prod:
name: "punimTagFE-prod"
vmid: 9103
ip: "10.0.10.123/24"
gateway: "10.0.10.1"
branch: "main"
env_vars:
APP_ENV: "prod"
SECRET_PLACEHOLDER: "change-me"
punimTagBE:
description: "punimTag backend-only project (edit repo_url, IPs, secrets)."
repo_url: "git@github.com:example/punimTagBE.git"
repo_dest: "/srv/app" repo_dest: "/srv/app"
components: components:
backend: true backend: true
frontend: true frontend: false
guest_defaults: guest_defaults:
guest_type: "{{ proxmox_guest_type }}" guest_type: "{{ proxmox_guest_type }}"
cores: 2 cores: 2
@ -225,58 +268,34 @@ app_projects:
backend_install_cmd: "{{ app_backend_install_cmd }}" backend_install_cmd: "{{ app_backend_install_cmd }}"
backend_migrate_cmd: "{{ app_backend_migrate_cmd }}" backend_migrate_cmd: "{{ app_backend_migrate_cmd }}"
backend_start_cmd: "{{ app_backend_start_cmd }}" backend_start_cmd: "{{ app_backend_start_cmd }}"
frontend_install_cmd: "{{ app_frontend_install_cmd }}"
frontend_build_cmd: "{{ app_frontend_build_cmd }}"
frontend_start_cmd: "{{ app_frontend_start_cmd }}"
envs: envs:
dev: dev:
name: "punimTag-dev" name: "punimTagBE-dev"
vmid: 9101 vmid: 9201
ip: "10.0.10.121/24" ip: "10.0.10.131/24"
gateway: "10.0.10.1" gateway: "10.0.10.1"
branch: "dev" branch: "dev"
env_vars: env_vars:
APP_ENV: "dev" APP_ENV: "dev"
NODE_ENV: "production" SECRET_PLACEHOLDER: "change-me"
DATABASE_HOST: "10.0.10.181"
DATABASE_PORT: "5432"
DATABASE_URL: "{{ vault_punimtag_database_url_dev | default('postgresql://punimtag_dev_user:CHANGE_ME@10.0.10.181:5432/punimtag_dev') }}"
BACKEND_PORT: "{{ app_backend_port }}"
FRONTEND_PORT: "{{ app_frontend_port }}"
BACKEND_BASE_URL: "http://10.0.10.121:{{ app_backend_port }}"
FRONTEND_BASE_URL: "http://10.0.10.121:{{ app_frontend_port }}"
qa: qa:
name: "punimTag-qa" name: "punimTagBE-qa"
vmid: 9102 vmid: 9202
ip: "10.0.10.122/24" ip: "10.0.10.132/24"
gateway: "10.0.10.1" gateway: "10.0.10.1"
branch: "qa" branch: "qa"
env_vars: env_vars:
APP_ENV: "qa" APP_ENV: "qa"
NODE_ENV: "production" SECRET_PLACEHOLDER: "change-me"
DATABASE_HOST: "10.0.10.181"
DATABASE_PORT: "5432"
DATABASE_URL: "{{ vault_punimtag_database_url_qa | default('postgresql://punimtag_qa_user:CHANGE_ME@10.0.10.181:5432/punimtag_qa') }}"
BACKEND_PORT: "{{ app_backend_port }}"
FRONTEND_PORT: "{{ app_frontend_port }}"
BACKEND_BASE_URL: "http://10.0.10.122:{{ app_backend_port }}"
FRONTEND_BASE_URL: "http://10.0.10.122:{{ app_frontend_port }}"
prod: prod:
name: "punimTag-prod" name: "punimTagBE-prod"
vmid: 9103 vmid: 9203
ip: "10.0.10.123/24" ip: "10.0.10.133/24"
gateway: "10.0.10.1" gateway: "10.0.10.1"
branch: "main" branch: "main"
env_vars: env_vars:
APP_ENV: "prod" APP_ENV: "prod"
NODE_ENV: "production" SECRET_PLACEHOLDER: "change-me"
DATABASE_HOST: "10.0.10.181"
DATABASE_PORT: "5432"
DATABASE_URL: "{{ vault_punimtag_database_url_prod | default('postgresql://punimtag_prod_user:CHANGE_ME@10.0.10.181:5432/punimtag_prod') }}"
BACKEND_PORT: "{{ app_backend_port }}"
FRONTEND_PORT: "{{ app_frontend_port }}"
BACKEND_BASE_URL: "http://10.0.10.123:{{ app_backend_port }}"
FRONTEND_BASE_URL: "http://10.0.10.123:{{ app_frontend_port }}"
mirrormatch: mirrormatch:
description: "Mirrormatch Prisma/Node backend (dev/qa/prod)." description: "Mirrormatch Prisma/Node backend (dev/qa/prod)."

View File

@ -22,67 +22,6 @@ vault_ssh_public_key: "ssh-ed25519 AAAA... you@example"
# LXC create bootstrap password (often required by Proxmox) # LXC create bootstrap password (often required by Proxmox)
vault_lxc_root_password: "CHANGE_ME" vault_lxc_root_password: "CHANGE_ME"
# Ansible become (sudo) for VMs that use ladmin/master instead of root SSH
vault_vaultwarden_become_password: "{{ vault_lxc_root_password }}"
vault_ansiblevm_become_password: "{{ vault_lxc_root_password }}"
# Mailcow API — System → Configuration → Access → API (read/write)
vault_mailcow_api_key: "CHANGE_ME"
# Per-mailbox passwords (make mailcow-mailbox MAILBOX=<key>)
vault_mailcow_mailbox_passwords:
alerts: "CHANGE_ME"
# Legacy alias (optional)
vault_alerts_mailbox_password: "CHANGE_ME"
# Uptime Kuma + SMTP (monitoring LXC)
vault_uptime_kuma_url: "http://10.0.10.22:3001"
vault_uptime_kuma_user: "admin"
vault_uptime_kuma_password: "CHANGE_ME"
vault_kuma_smtp_host: "mail.levkine.ca"
vault_kuma_smtp_port: "587"
vault_kuma_smtp_user: "alerts@levkine.ca"
vault_kuma_smtp_password: "CHANGE_ME"
vault_kuma_smtp_to: "idobkin@gmail.com"
# UniFi Network (Integrations → API key)
vault_unifi_url: "https://192.168.2.1"
vault_unifi_site: "default"
vault_unifi_api_key: "CHANGE_ME"
# Umami (monitoring LXC /opt/monitoring/.env)
vault_umami_db_password: "CHANGE_ME"
vault_umami_app_secret: "CHANGE_ME"
# Umami admin UI password (monitoring LXC :3000)
vault_umami_admin_password: "CHANGE_ME"
# Cal.com ↔ Authentik OIDC (make cal-oidc)
vault_cal_oidc_client_secret: "CHANGE_ME"
# Vikunja ↔ Authentik OIDC
vault_vikunja_oidc_client_id: "CHANGE_ME"
vault_vikunja_oidc_client_secret: "CHANGE_ME"
# Listmonk ↔ Authentik OIDC
vault_listmonk_oidc_client_id: "listmonk"
vault_listmonk_oidc_client_secret: "CHANGE_ME"
# Authentik API (Integrations → Tokens) — for automation scripts
vault_authentik_url: "https://auth.levkin.ca"
vault_authentik_api_token: "CHANGE_ME"
# Beszel hub admin (monitoring LXC :8090)
vault_beszel_email: "CHANGE_ME"
vault_beszel_password: "CHANGE_ME"
# Mattermost Team Edition @ slack.levkin.ca (GitLab OAuth → Authentik)
vault_mattermost_oidc_client_id: "mattermost"
vault_mattermost_oidc_client_secret: "CHANGE_ME"
# Hermes bridge (VM 117 — not Mattermost server)
vault_mattermost_url: "https://slack.levkin.ca"
vault_mattermost_token: "CHANGE_ME"
vault_mattermost_allowed_users: "CHANGE_ME"
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
# POTE (python/venv + cron) secrets # POTE (python/venv + cron) secrets
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
@ -131,18 +70,4 @@ vault_mirrormatch_smtp_user: "smtp-user"
vault_mirrormatch_smtp_password: "CHANGE_ME" vault_mirrormatch_smtp_password: "CHANGE_ME"
vault_mirrormatch_smtp_from: "MirrorMatch <noreply@mirrormatch.com>" vault_mirrormatch_smtp_from: "MirrorMatch <noreply@mirrormatch.com>"
# -----------------------------------------------------------------------------
# punimTag (monorepo) secrets
# -----------------------------------------------------------------------------
# Optional deploy key for private repo access
vault_punimtag_git_ssh_key: |
-----BEGIN OPENSSH PRIVATE KEY-----
CHANGE_ME
-----END OPENSSH PRIVATE KEY-----
# Per-environment database URLs (use external Postgres at 10.0.10.181:5432)
vault_punimtag_database_url_dev: "postgresql://punimtag_dev_user:CHANGE_ME@10.0.10.181:5432/punimtag_dev"
vault_punimtag_database_url_qa: "postgresql://punimtag_qa_user:CHANGE_ME@10.0.10.181:5432/punimtag_qa"
vault_punimtag_database_url_prod: "postgresql://punimtag_prod_user:CHANGE_ME@10.0.10.181:5432/punimtag_prod"

View File

@ -1,170 +1,100 @@
$ANSIBLE_VAULT;1.1;AES256 $ANSIBLE_VAULT;1.1;AES256
65613063646137663934353263656638653161613265383666626435656466646465396236386162 38316537376634623462313731323238666165383731656632373665653534623163386333303865
3237303634613436386234663239313939373238326663630a663963616465373836333837343935 3865383030316132663831303932376437346335323233630a643331663539383163306666393764
61356266646463313730383831656165663062623739303132313865336433626138363430663033 38313265656561343839616565343663353037663237663032366632373831363336306632626266
3266663236663730340a383335383938363930353635646365373536643335313564623238313032 3361643865333533340a356233663034343932323831323236356161396237346532323838373135
61383637613635643434306465623230343864663039363633346431306162643364383661616366 33393239313730363336613338373039663735323431323562613363343863326234633833663631
36376666343066316338343731643163626435333134356461343065366635666130383132363934 66343462623231663932633537373361313764393630356666393662653135356139663935613038
37656565346436643666363439323661333561613934303766646632666438623331663666333738 65383261363065633235343031346535373564373931373063386265343335623265653739613830
64626464343033366162333531646261623439346639626266646563633565626232333133313938 32656233393330633362623932316431383761306332393466313936396533333839313831663331
35633836626639376565363639666235643633343233663539613861363461313433333936616531 34353864356336303331663233653666363966376162303731626134313235306238323363303439
30356532323565373462373533663962343463613466373731643263393863396433313935363036 32333039653235326632303637303065386161616138356463623561366637376366326262303166
61663263303266313665656133366135616564363137623166306638613434613566666364613132 38323763393934666539373063323265333961666164613437316164633565393035626538353365
34613066626366313363383834376661313764663737383162343663333361313666386332376562 33386562336665383863636639643232623161643933313664396534383362303838663362653736
31616438333438346537366664333130646264636461346362666335343038643765353264396338 64393334616165336638306235363734653431646431616139373336656333623963386538646230
35633530613237646235353436303236313561363134356566356466376539393033663564343334 39663230363063386231343730663162313463666135323265613261626637626332353534396535
36336130313262343537333934383532386639386664346561346131663437343564623736643863 31623664363766646332396336396133613662643232366433323330373962633839613635333763
30336334396235326164333439656336613935653937636233643136653062643663643466366436 63306230623438346639323863353137363330316630316130326134323731326635643736373736
32336533336566626266613838343364626439376563663462366664356235326535643732303531 62336362656265633233623165376436373231656666303832373966353732313031623865316663
35643739373939626663633861343065346262623765373339643732323666396236626130646537 63356163636238346230623732326232646434623532633439646536656362393162613535613565
34336530633337633662306363656661303464623961333335353538623462656162323634326264 66616539316362376561386263373464623030636661663435383839643565393632616232663035
39386130323935303437346263646633356233326339663534363037326365666630356434356133 34653735383964653930633664346330386566343830336238306562343164366131643138643339
61316363306462383661666232383131353132646432656531626463666534626561616537653735 35313366356637643262636238366263353535306434633732623335643266396335666636666663
31663531333834363062633436626631383536633631343237373034633131326633353333303663 37333232393765306433326164663538663839623034373535653737633366303665633831303334
64336536653663323132386564306431336436323635346430613661353165376365363063633735 32303061363863386139613464326466336136396534663538643163343439343763383534306636
62313033353161373232653031306362376330343535306533663839373535643165613636313532 62353733613330376163386331626463656462336237656339356132643135363537343638303261
65393932623666613431313431373761313666313462633937646633656436363661353537313463 33366332653439313137613665386136666536356537346665333935366336623734393738346434
30313365346636326236366261316135663962663065323531643938343464376163353034313563 63326265346362636564366265373134336662626332653464646139656635313961656230336537
64326438343937326361303137373036373364643235346230646333633662643738363833383036 63666638326337643033363964643339666130386139363138656165666333356465643337396165
31336537376664343530333638613665376666393432643434373361646166623032333437376164 30336330633632353231613938646165383966613863366330646162646266346139343434393865
38336130393465353839383330393931336461326230306136323265653664643831366363633635 66346365663230626531643963383462636465363965393762336233366538393133313138616335
63343136336466356533633633653931616236656432636434343130653733306161623538666262 32353834313762363265643031343237633732393166343139363163326439666162396332353038
33653933663034376434353966633761666162353166616636376561306266653930386639613935 31306530626666343361313736313636613335376163383237303063393333386663333333336137
30343162396133633136373038663965633536363731613832636534303766663031333135386238 37346166316231623638386635613230663063653037643930333961316434643361633035633734
66333334356263383661363737373363636535363238363761656363373164613034386136396332 65643937636361653433383262643265373165613437336236633631323635613034663834646665
63663836623532653136623663396336613066303839643534363365363031653335396365643362 30373730373438613132633932333565376665333565383932356334653738646166393934626362
32663364616438656565656533313733653933656538616137336531643864393766333935626333 30666666303832613633316230623038343165396338343535663931383639623430643238656261
64343835303739303461333430653430386364336262656163313837363634366335666163366232 39623037333063306266323335303736346236636137633863353866343136346335353865303961
34323338386234393431643865666334333065393534306438386335626438393130333162623337 31346331333066376330306361396262333762393838303165383134303435353630366130303536
38393561353339336265613236366631376561356538303836663465346532306166363466393865 34386532356239326166386665623435646432636561363564656161646563306234333138333839
36343339373032643536386364343332663632313763646139303962646266663266633465343463 38316337656631313763393135396464643338386636336234346663653538353863643636323032
61303737353761656363663432643133643738623965353139336434646530323339356163303065 35326133623064363838386662653138613438386564316635373838366262656364666633636539
36623230396535386639313534393239343439613238656361616333656139316465323163653639 61306563666138656161336466323537626161313366616662623362643036636132663634313137
66653630666139393730313136326532636439666631313363656464663563326634613134356432 39653437306662646162613763343736636530356465346132646238633166373838353836326461
39613866633165306132643761646136646335663831656337343235643365376130636666316632 36326666323636353239303262623436643932353164323630326635653635653233363265316264
64656236333064353332396264316134633565323332396136356636353430313064396338613266 30653763643431626539356161376534396437636463303363663134373961616561363561333333
65656338303266333735386537616337643838663136663237373862363163623238303139303661 34306537326666383664336464656464623731656566653132613565336536323438666333366466
66636232346339323338653264643939643630616238623363636336666362343864663730306633 64613738653730333633383062653837366266316536653139643362373039383831363666333934
33326566363961633066373732393439346262323561643534353063633939633731336331623332 34383833336266356436666636323239336432386133303466636138643934356266326533643161
63623562313863383966656437643033656664623132613965613435643065336133643661386434 36393664313963393930383533623565383332613933396639613037323266663439313138326261
34366161343038306337393761613661353163623061376362653061383739376338323463623161 30353861303661303836343165353362663632306430626337356562343637653164396237333566
65393531643061653264323837313236333733393536653733656336666464353635653434383738 37656230363530323836373363646334356262646633313932383161303264613238373936353036
33656363643264383161383032333365633766316335636334346662623335396163613635363365 61376264633930356465626266623930333039383032316163633037323035346130343934616261
34383066376433663336633030616362366633613366633832333034303136656264393464366538 31666166393462366561303833353135326566356637376466613934376233303162323033623031
33366266613336373739376230393364653862666261356664323464643033336162386337653937 63656131333439353537623662363530383866326432306361316465383137633536666364623662
32666363376463383461343864343265393062636237633438643238386436616163323463663732 37353561633839623530333663643130326131333330626661396636343234666139336539653162
66613165333762633961663833376465643537323062613664306135663332323038353934383530 62383636663137626637303535333862366434626161353239393232313537343865646564626331
31383235393034376532386265346332303634346631653537303836383234386239343732386363 39366665363030643764663963316163343033326434373265343664393439316333346434363563
39363232346130633864343364636434646439663834326635666666376664366430303932613232 61346164396561343865626362616433306230333130653166656230353364316536626432373333
66643832363836336430383763383635336136313639313665366330353231333265663164323037 35383133363530666263316431396462383133363965336637386632363263656261353963313161
65636535633531393439386331383037326361366232613332653637373435333233643138306333 36383632326264373436383638383064346334336238656239393833653531656461356136303434
38343137386131613937346630353332306639616335646665613166326634636339346262356538 37663434663732306631656334306361663562303863386135623066633963373034373139666332
33386534366465633532323837663464306133343762623862363536643139353461663132313337 35393433646333363839666434663535363661616330386234366132303161383063663836626561
64336231363537623435663430623336326537386362326165306139343165353136346336643437 35393064343735303032313266643338623834383838633834636536363539656466663864613366
38626636613666336161666439343862363738366531343832303237393734653937336637636534 66636363623330326436363936313938333638323939323035616232366563316364343834376630
62313731303162653863306635373538333334303663663635363732353833323735353130393531 66656434336661643861613737616138396330383832386230383331646462323363373363393733
65316236333133396634373165393365653837653066343839333334366362636537653466383439 63363237636137373566363438663966396432613964336164326138623737393636396234646232
66363339366265353834663033653135333562373364653830663730386130353732393766653931 64343361363365356135666235623833396131626663303839653535663732313831633163643638
64333334363634356135666664633164366665383234623563333330306335303836333662316633 35396262373837343238343838663635353838373338663732626330613237623332336436643136
35343864363938383461336436366566323734306264393763396265663961313534323663613631 38653833383430393837383566643765653834306636356466326364303334653034626262356630
35346439666639613836613162303736323564653761363661383265356538663633393838613635 34333338333336373433356235386337346666343830303164363235303265313134323339653339
65323939663164613432643136363233336436633131356661313034376266643832386137653565 63316238346132653663653165313635336638646362356337643766366564383531633565303431
65356235616165643265346661396434626539336633353437316339626139383362343834646332 66616433663630343439336661346266336139613537653438653432326666326137306364376137
62393632313130323866336233396362303437653032653662613332616430666139376561663164 66333939643262633532363966623439373434393862353237613135646663623236646331643537
65393633356364313937326565616164373463653736336533616565616635366663336533363936 31353566653464313433636635393330646166613232633734346639326534373163383064353732
34636336626364363039383231363962363065316436336234346463323035303665383437383938 32373861303064346266643338316465653031646633633936373738663837383162643534623131
36366537393864363365393633363937373162323237306432636266386462346464393661643032 31633662356534343636313834386139656439663733333762323962323939623032396239356437
31616632383964646462316666393664366232633736356166323432333834323261393564656530 37633739613433613365313337383835623936623530363831383535663337343264356532616434
66316433333037313930646234666162336331333861643162633434306432653061613830613861 39393634396664636166346631313764343733666534613935393637363233373331303837656463
31363266303635613830373265383436363833653237386436653036666336326137386232646535 37363266363634353136316532333462396266373733333633356239653334363835326261323661
33353930373038376332343766393338383235653338316564646632643138373033663433613832 66323032346364356230613831643236316530356132343863393361343462373433383265336333
66336562333532643330343962323063383138633737633634386661623539353431313730313666 30343730316366366234333263343965633466333439653739663333643939303631353664316435
65633337613632646635643566393136353430366539613862323932633631656662376564623562 36396139623562656632666165666662626263643436396431326135633932393965656531633761
31623666303637393862353233653533616633643537626431373066616338326662313939343236 39303634643936366438336534613532303134343164326661626363656562383564623264636132
33633534646633633161306336663139623036303330613364373539626433333937633631346534 39656636303636393761653035303832386430646162343830343834316534636263373763643765
31636632393561333961623565663637663237636262636366336437326634306331313938653561 61366335643531666232303231656336643833396238336639333437363564636566636632303364
61353263643263383266333932373933353834303663316337303763643336376165656135323331 62623738336237393638363436396662656565653839643164356565313563663561666237383036
34613131373638303730383961383738613164386339646231663533393931343236333761376630 33626464663465643230376164653062663063636630613064643632643235643662653566333333
33393133313134323135393533653064626635363532323866623033336437653732336237616232 62353763643830363638323731303537633837393235656661333263323536363330356362643333
38353034306362656234663030313435306234353139376135383233363264613135303937663637 34346666656432626365383639326538643862346265316263326531623631383962383734316330
31353530333162613563626435316231303037353639653366353032393937306134373935356564 39333430613761663337306331623461643635653431343336663163343766373464366538313335
37386364333533626638383435303365356465336637383933386463623930356633326331316231 61643538643231333636643836663663313534356662386532633331346664653262353839643066
64353334353764383764326131326236353532666330303765336265356632356433303338323831 36393366653131316636646336313362656662666163333635633132323438353435373430643839
32336334373433623134373765393365613465316462646439623366626631353338613464643139 37623936393962333065663536306238653466363634386632366637363265303734356535333735
33313634626663623961653632633064393364336434373137306261316264663061313065396536 64623330303965393533326563643063303762646664666464643239386435343065326234306632
32326634643164623635666335363839356362633038636330626663326130636363616635333732 35346338373866303838613933653230373737396134653533376265356432333933356237636338
61306135643830613433343131623263626265373161646266666234383838376566356365653036 66656536393530316435323863373962636465333331653364626162326562393565313538633264
35303235353537333831623066356462633135316464663133633933356565323538373732313839 34613633393862333731336563636136666166613037613833333063303162373339663539646631
39636236343136363233613734366539656139636666303266323162316235343030633662343636 36303962356562306239616634376339356135666663303836353061663039343836356262373932
39353465333034303838663264323532663832373932306638323539353938313338373966346339 65346466373532633365383835323062313531623130396130376531626333653862393462643631
33653563386136623239303561616235623439616236336239353730326264353261633433393638 366330333666336262373364663864336633
64656465356662303266656235303361363462376430386137363330633339643961343332393266
37636364623766616265616430636666636130353765363636656361356632356436626232336663
31373336663136383738336436373539373962613161363738393761666636396330386238646566
31633831646366383439363431376335333464363234353632633062653432376339353863616539
64666463316662626233643964363237326638613335623766323535636263313033363236323963
65383935643035313062383135653361666331616232393262343562643435666362373564323263
38333039663831343236333231313964323734623532663336323636363230343530633432346662
31353931636336346462643365626635336639666339393331366665333161353566623963616236
38363430316339326536366165616564336630626662383737346433636539303463643863373863
66636231336238396265623139336630613933383166383734333837616639373536333265656237
63666262383265396261316564336635373262313164383061646364356131396331323135323266
33383030643732343938383637663465316463346432643064653035303865383433653333353437
63633864353235656662346462666165383432333062353433623738376233346639663234376461
35306137333963393537616635326536386339653734316138363165656431653232323963366635
63373263643133393966396163333932613233373637326639373635363837656437353533623862
66356336353163303638316364313339333533653265356138383763326633633761396238326336
64643966333832633431613532623738666535393361323631636335653133363636663562376535
64323539306434633165356134623566666564323437316535666235383162646163343761343331
62353036663033333932333238646464656261336363303639303163303636383639613265396632
64653137313330643133316664303764336536666534336238643833663165613231396366393739
32323133616334373062306530343462353563616663333536373665323532636561303435386362
38636539623036326432323465303833316436373135326335313061363130383139343933346661
31326638356262613866613131376438396662616434643638326561613365623134656535373335
62363435626431393632636338353632656430613564336166353339636232316234666430326431
38653966303736303964653737323032336533313332643534303334613261646530396630633833
35646137323766373536613339663333643134326335306635303834366330333132613230616439
31353066303733313338636561613931303931663263326530353138323863643466663261333965
39313363383030333130376331616533643437393830316661383766613834646663653435323364
30316536336236336262643434636538326137373139363832313962663638333863346432633863
37623039373763653339653235363866666639343039323636633864373034323761636365666263
61313235613764626335313666366663383663663532353738636635653861643161623736306635
65353030646331313565653362356463323265653962306236326263323561316661626463636633
38366133363034623630346235313237376332666537373762363861373465376534646233636230
34353636316464346662663530613437643436373931623966383566633739653834366662633538
30343766366430636333646463393238633963343932313536366432336266613166323939613533
62353533346130346235336262653561366436376362633966336439663838356431303039626335
61626331333737623133356432666137393463666661386664373630306666616337666432363031
33616330376630653538323636663930393033313965343830636566343234653361333135313766
64666365343862343333626666656333323431623061656534343765643338333434393061326538
36353331363131386334356136383439613731333766353833643364363665343937653636306464
36373562333164633637373162633430353266343465613635326536663962633335303063376663
61353235353034356638376532323530323138383465346234643639636136636532356666336236
61633936623937366131343836633261643238643663353136313963303161313936393838323963
63383361653834366136333936373463333634326635316132323632613533343933613434643031
34333962353831643933303362313033643736353163306464373231353633373261663361613065
34653165643636383935343162653034333438306437616263366130393664666661616465343339
39303065643037636639633334656430656334643038306561393330366365613731363336333638
35316137633262343833636631373063316361316535373536653561363463326437353464343161
31386237363766346434353032306561643436393935303836363339316134303330306239306166
64366161646664373535666333303763306431316639343262663762303239303339323063303462
38373235656137303832623865376131616365376538613766373331633831346665323962316364
33396337363235613162376162373733373366613462373432353330363762393062353436316539
65643839333461366564336461346565333638643836653063373464376365326433613939666463
37616235306632373262396435343264313338386364613132363537623762653530383537653236
31303466623362356366663731376465363661613434623834663432633133656132376131376332
62653766326137396665666330336632306263663464383263313732326338363937363566393733
61633862376162323536363366396434313732366262646535333831663032333137373639383066
35363235356430643965343665306566633630623863343866613530626662623063616434623037
38383938656465643364636333316461613833623464366635396365393766383163633439396236
62623930316339306235646431383866663038626135356337343262346562336166306532653833
36613136376136633463636261646566323564363862613163646433643266643231343337653865
38303866326164663030666635323738346536376462326631343439393364623636396665343937
63346566663832313536646435626437383335623232366234626665323663356431643264353630
32396130633337333164346662343539383432623265653066373665343532653435653835656561
63383431663563636437626435333632303765653264303435633862323466336533333765666161
34313931386236313338633265323330653233393835663035353061346533353736316633353666
30343934363834386234306332383737373938363236623163376565643235626165303738353563
35636264663937316532653934313730393934633833653130383461343763623636633836646632
61623135623561636430366661373565616463643765306562383963343736306135353434336166
3466

View File

@ -1,2 +0,0 @@
---
maintenance_cron_enable_system: true

View File

@ -1,2 +0,0 @@
---
maintenance_cron_enable_system: true

View File

@ -1,4 +0,0 @@
---
# Tier 1 maintenance cron — hypervisors (journal + apt)
maintenance_cron_enable_system: true
maintenance_cron_enable_docker: false

View File

@ -1,4 +0,0 @@
---
# Tier 2 — Docker weekly prune (identity, monitoring, vaultwarden)
maintenance_cron_enable_system: true
maintenance_cron_enable_docker: true

View File

@ -1,2 +0,0 @@
---
maintenance_cron_enable_system: true

View File

@ -1,9 +1,8 @@
--- ---
# ansibleVM (control @ 10.0.10.157) — plain vars; secrets in group_vars/all/vault.yml $ANSIBLE_VAULT;1.1;AES256
# Previous fully-encrypted host_vars file moved to ansibleVM.yml.vault-bak (broken for Ansible merge). 31306264346663636630656534303766666564333866326139336137383339633338323834653266
6132333337363566623265303037336266646238633036390a663432623861363562386561393264
ansible_become: true 63303565633530383634643538323165383461656539613331386135336265653531336266613865
ansible_become_method: sudo 3833376664366239650a313134653238323437633265373463326231346663366434323733663666
ansible_become_password: "{{ vault_ansiblevm_become_password }}" 38353061373437306431383132333233663639643134363464396163333962373033363661623666
3430633863623962366430613962346264356461373539376263
maintenance_cron_enable_system: true

View File

@ -1,3 +1,4 @@
---
$ANSIBLE_VAULT;1.1;AES256 $ANSIBLE_VAULT;1.1;AES256
66633265383239626163633134656233613638643862323562373330643363323036333334646566 66633265383239626163633134656233613638643862323562373330643363323036333334646566
3439646635343533353432323064643135623532333738380a353866643461636233376432396434 3439646635343533353432323064643135623532333738380a353866643461636233376432396434

View File

@ -1,9 +0,0 @@
---
# Cal.com LXC 210 @ 10.0.10.228 — business / scheduling
cal_public_url: https://cal.levkin.ca
cal_saml_admins: idobkin@gmail.com
cal_saml_db_name: calsaml
cal_authentik_app_slug: cal-com
cal_authentik_provider_name: cal-com-oidc
cal_authentik_host: https://auth.levkin.ca
cal_oidc_client_id: cal-com

View File

@ -0,0 +1,16 @@
---
# Host variables for dev02
# Use ladmin user with sudo to become root
ansible_become: true
ansible_become_method: sudo
ansible_become_password: "{{ vault_dev02_become_password }}"
# Configure shell for ladmin
shell_users:
- ladmin
# Skip data science stack
install_conda: false
install_jupyter: false
install_r: false

View File

@ -1,22 +1,8 @@
--- ---
# git-ci-01 — Gitea Actions runner (VM 115 on pve201 @ 10.0.10.223) # Configure sudo path for git-ci-01
# Sudo may not be in PATH for non-interactive shells
ansible_become_exe: /usr/bin/sudo ansible_become_exe: /usr/bin/sudo
ansible_become_method: sudo ansible_become_method: sudo
# Proxmox (manual / qm): VMID 115, 2 cores, 4096 MB RAM, 64 GB disk (scsi0) # Alternative: if sudo is in a different location, update this
# act_runner: /etc/act_runner/config.yaml — capacity 2, force_pull false # ansible_become_exe: /usr/local/bin/sudo
# Maintenance: /etc/cron.weekly/docker-prune-ci (docker system prune -af --filter until=168h)
#
# Capacity notes (2026-05-23):
# - pve201: VM 104 reduced to 64 GiB (2026-05-23); still tight — consider runner on pve10
# - capacity 3 needs ~812 GB RAM on this VM → migrate runner to pve10 or add RAM after freeing pve201
# - 12 repos: capacity 2 on one runner is OK; second runner on pve10 if queues stack up
git_ci_runner_capacity: 2
git_ci_disk_gb: 64
git_ci_proxmox_vmid: 115
git_ci_proxmox_node: pve201
maintenance_cron_enable_system: true
maintenance_cron_enable_docker: true
maintenance_cron_docker_script: /etc/cron.weekly/docker-prune-ci

View File

@ -1,7 +0,0 @@
---
# giteaVM — Gitea on Alpine (Proxmox VM 102 @ 10.0.10.169)
# Alpine uses /etc/periodic/weekly (not cron.weekly); no apt for system-maintenance.
maintenance_cron_enable_system: false
maintenance_cron_enable_docker: false
maintenance_cron_enable_gitea_archive: true
maintenance_cron_gitea_archive_script: /etc/periodic/weekly/gitea-archive-prune

View File

@ -1,4 +0,0 @@
---
# Hermes agent VM 117 @ 10.0.10.36 (user: hermes, admin: ladmin)
# Secrets: vault_hermes_telegram_bot_token, mattermost in /home/hermes/.hermes/secrets/
hermes_home: /home/hermes/.hermes

View File

@ -1,3 +0,0 @@
---
maintenance_cron_enable_system: true
maintenance_cron_enable_docker: true

View File

@ -1,5 +1,8 @@
--- ---
# listmonk LXC 221 on pve10 @ 10.0.10.148 — plain vars; secrets in vault $ANSIBLE_VAULT;1.1;AES256
# Migrated from pve201 VM 113 on 2026-05-23. 31316663336338303832323464623866343366313261653536623233303466636630633235643638
proxmox_vmid: 221 3666646431323061313836333233356162643462323763380a623666663062386337393439653134
proxmox_node: PVENAS 61616135353966333639323031643263646231636332613935353234363134356435646266343866
3034653235393636350a626362333764313732646663653838313233326438646330393336346539
30393364323237396633343133616439393563326161636366613965366161656364343939313334
3430306634396361353238643735363430383433323431393230

View File

@ -1,4 +0,0 @@
---
# Control node (runs playbooks with connection: local).
# Use project venv so API deps (proxmoxer, etc.) match `make bootstrap`.
ansible_python_interpreter: "{{ inventory_dir }}/../../.venv/bin/python3"

View File

@ -1,7 +0,0 @@
---
# Mailcow VM 106 on pve201 (Mailcow-debian)
# API/UI: https://mail.levkine.ca — domain levkine.ca (with e)
# SSH: root only (no ladmin). First access: make copy-ssh-key-mailcow
mailcow_url: "https://mail.levkine.ca"
mailcow_domain: "levkine.ca"
mailcow_alerts_user: "alerts"

View File

@ -1,3 +0,0 @@
---
maintenance_cron_enable_system: true
maintenance_cron_enable_docker: true

View File

@ -1,8 +1,8 @@
--- ---
# vaultwarden VM 104 on pve10 @ 10.0.10.142 (ladmin + sudo) $ANSIBLE_VAULT;1.1;AES256
ansible_become: true 35633833353965363964376161393730613065663236326239376562356231316166656131366263
ansible_become_method: sudo 6263363436373965316339623139353830643062393165370a643138356561613537616431316534
ansible_become_password: "{{ vault_vaultwarden_become_password }}" 63386635363838626465396439303664316635633239653639646338393130666164653262316135
3937376464303935620a343530333030643830383130646532613533336435383334373831343261
maintenance_cron_enable_system: true 37653138613132616165636132623037623033343265663734626536366361373130353139383634
maintenance_cron_enable_docker: true 6664346538653965343263376538636336393164356434646264

View File

@ -1,3 +0,0 @@
---
maintenance_cron_enable_system: true
maintenance_cron_enable_docker: true

View File

@ -2,27 +2,16 @@
# Primary IPs: Tailscale (100.x.x.x) for remote access # Primary IPs: Tailscale (100.x.x.x) for remote access
# Fallback IPs: Local network (10.0.x.x) when Tailscale is down # Fallback IPs: Local network (10.0.x.x) when Tailscale is down
# Usage: ansible_host_fallback is available for manual fallback # Usage: ansible_host_fallback is available for manual fallback
# Public URLs: levkin.ca DNS A records → Caddy (142.180.237.136), except home → 100.100.100.100
# #
# NOTE: Proxmox app projects (dev/qa/prod) are provisioned dynamically via # NOTE: Proxmox app projects (dev/qa/prod) are provisioned dynamically via
# `playbooks/app/site.yml` (it uses `add_host` based on `app_projects`). # `playbooks/app/site.yml` (it uses `add_host` based on `app_projects`).
# You generally do NOT need to add project hosts here. # You generally do NOT need to add project hosts here.
[proxmox]
pve201 ansible_host=10.0.10.201 ansible_user=root
pve10 ansible_host=10.0.10.10 ansible_user=root
[sites]
levkin ansible_host=10.0.10.60 ansible_user=root url=https://levkin.ca proxmox_vmid=220 proxmox_node=PVENAS
caseware ansible_host=10.0.10.105 ansible_user=root url=https://caseware.levkin.ca proxmox_vmid=215 proxmox_node=PVENAS
auto ansible_host=10.0.10.59 ansible_user=root url=https://auto.levkin.ca proxmox_vmid=216 proxmox_node=PVENAS
portfolio ansible_host=10.0.10.106 ansible_user=root url=https://iliadobkin.com proxmox_vmid=219 proxmox_node=PVENAS
[dev] [dev]
dev01 ansible_host=10.0.30.105 ansible_user=ladmin dev01 ansible_host=10.0.30.105 ansible_user=ladmin
bottom ansible_host=10.0.10.156 ansible_user=beast bottom ansible_host=10.0.10.156 ansible_user=beast
debianDesktopVM ansible_host=10.0.10.206 ansible_user=user skip_reboot=true debianDesktopVM ansible_host=10.0.10.206 ansible_user=user skip_reboot=true
devGPU ansible_host=10.0.10.122 ansible_user=root proxmox_vmid=104 proxmox_node=pve201 # GPU-Dev-Debian, Ollama + RTX 4080 devGPU ansible_host=10.0.30.63 ansible_user=root
[qa] [qa]
git-ci-01 ansible_host=10.0.10.223 ansible_user=ladmin git-ci-01 ansible_host=10.0.10.223 ansible_user=ladmin
@ -33,34 +22,25 @@ KrakenMint ansible_host=10.0.10.120 ansible_user=ladmin
[ansible] [ansible]
ansibleVM ansible_host=10.0.10.157 ansible_user=master ansibleVM ansible_host=10.0.10.157 ansible_user=master
[comms] [tailscale]
# pve201 — email + newsletters tailscaleVM ansible_host=100.66.218.53 ansible_user=ladmin
mailcow ansible_host=10.0.10.132 ansible_user=root url=https://mail.levkine.ca proxmox_vmid=106 proxmox_node=pve201
listmonk ansible_host=10.0.10.148 ansible_user=root url=https://listmonk.levkin.ca proxmox_vmid=221 proxmox_node=PVENAS
mattermost ansible_host=10.0.10.237 ansible_user=root url=https://slack.levkin.ca proxmox_vmid=107 proxmox_node=PVENAS
[services] [services]
# VMID 117: on PVENAS (pve10) caddy ansible_host=10.0.10.50 ansible_user=root
hermes ansible_host=10.0.10.36 ansible_user=ladmin url=https://hermes.levkin.ca proxmox_vmid=117 proxmox_node=PVENAS jellyfin ansible_host=10.0.10.232 ansible_user=root
caddy ansible_host=10.0.10.50 ansible_user=root proxmox_vmid=106 proxmox_node=PVENAS listmonk ansible_host=10.0.10.148 ansible_user=root
cal ansible_host=10.0.10.228 ansible_user=root url=https://cal.levkin.ca proxmox_vmid=210 proxmox_node=PVENAS nextcloud ansible_host=10.0.10.25 ansible_user=root
identity ansible_host=10.0.10.21 ansible_user=root url=https://auth.levkin.ca proxmox_vmid=217 proxmox_node=PVENAS actual ansible_host=10.0.10.158 ansible_user=root
monitoring ansible_host=10.0.10.22 ansible_user=root url=http://10.0.10.22:3001 proxmox_vmid=218 proxmox_node=PVENAS uptime_kuma_port=3001 dockge_port=5001 umami_port=3000 vikanjans ansible_host=10.0.10.159 ansible_user=root
giteaVM ansible_host=10.0.10.169 ansible_user=root url=https://git.levkin.ca proxmox_vmid=102 proxmox_node=PVENAS n8n ansible_host=10.0.10.154 ansible_user=root
n8n ansible_host=10.0.10.154 ansible_user=root url=https://n8n.levkin.ca proxmox_vmid=103 proxmox_node=PVENAS giteaVM ansible_host=10.0.10.169 ansible_user=root
vaultwardenVM ansible_host=10.0.10.142 ansible_user=ladmin url=https://vault.levkin.ca proxmox_vmid=104 proxmox_node=PVENAS portainerVM ansible_host=10.0.30.69 ansible_user=ladmin
actual ansible_host=10.0.10.158 ansible_user=root url=https://budget.levkin.ca proxmox_vmid=108 proxmox_node=PVENAS homepageVM ansible_host=10.0.30.12 ansible_user=homepage
vikunja ansible_host=10.0.10.159 ansible_user=root url=https://todo.levkin.ca proxmox_vmid=301 proxmox_node=pve201 vaultwardenVM ansible_host=10.0.10.142 ansible_user=ladmin
qBittorrent ansible_host=10.0.10.91 ansible_user=root port=8080 qBittorrent ansible_host=10.0.10.91 ansible_user=root port=8080
jellyfin ansible_host=10.0.10.232 ansible_user=root url=https://jelly.levkin.ca proxmox_vmid=101 proxmox_node=PVENAS # stopped until NAS pool healthy
# Retired / stopped — kept for reference; do not run playbooks against these without intent [desktop]
# nextcloud ansible_host=10.0.10.24 ansible_user=root url=https://nextcloud.levkin.ca # VM 201 decommission desktop-beast ansible_host=100.117.34.106 ansible_user=beast
# portainerVM ansible_host=10.0.30.69 ansible_user=ladmin # retired → Dockge on monitoring
# homepageVM ansible_host=10.0.30.12 ansible_user=homepage # VM 100 stopped on pve10
#[desktop]
#desktop-beast ansible_host=100.117.34.106 ansible_user=beast
[local] [local]
localhost ansible_connection=local localhost ansible_connection=local

View File

@ -1,53 +0,0 @@
---
# Playbook: caddy-auth-authentik
# Purpose: Add auth.levkin.ca reverse proxy to Caddy (Phase 1 Authentik)
# Targets: caddy
# Usage: make -f Makefile caddy-auth OR ansible-playbook playbooks/caddy-auth-authentik.yml
- name: Add Authentik proxy block to Caddy
hosts: caddy
become: true
become_method: ansible.builtin.su
tasks:
- name: Ensure auth.levkin.ca HTTPS block exists (after cal block)
ansible.builtin.shell: |
set -euo pipefail
if grep -q '^auth\.levkin\.ca {' /etc/caddy/Caddyfile; then
exit 0
fi
awk '
/^cal\.levkin\.ca \{/ { in_cal=1 }
in_cal && /^}$/ && !done {
print
print ""
print "auth.levkin.ca {"
print " import security-headers"
print " encode gzip"
print " reverse_proxy 10.0.10.21:9000"
print "}"
done=1
next
}
{ print }
' /etc/caddy/Caddyfile > /tmp/Caddyfile.new
mv /tmp/Caddyfile.new /etc/caddy/Caddyfile
args:
executable: /bin/bash
changed_when: true
notify: Reload caddy
- name: Ensure auth.levkin.ca HTTP redirect in :80 block
ansible.builtin.blockinfile:
path: /etc/caddy/Caddyfile
marker: "# {mark} ANSIBLE MANAGED auth.levkin.ca :80"
insertafter: '@vault host vault.levkin.ca'
block: |
@auth host auth.levkin.ca
redir @auth https://auth.levkin.ca{uri} permanent
notify: Reload caddy
handlers:
- name: Reload caddy
ansible.builtin.command: caddy reload --config /etc/caddy/Caddyfile
changed_when: true

View File

@ -1,55 +0,0 @@
---
# Playbook: caddy-levkin-site
# Purpose: Add levkin.ca reverse proxy to Caddy (site LXC 220)
# Targets: caddy
# Usage: make caddy-levkin
- name: Add levkin.ca proxy block to Caddy
hosts: caddy
become: true
become_method: ansible.builtin.su
tasks:
- name: Ensure levkin.ca HTTPS block exists (after caseware block)
ansible.builtin.shell: |
set -euo pipefail
if grep -q '^levkin\.ca,' /etc/caddy/Caddyfile || grep -q '^levkin\.ca {' /etc/caddy/Caddyfile; then
exit 0
fi
awk -v upstream="{{ levkin_site_upstream | default('10.0.10.60:80') }}" '
/^caseware\.levkin\.ca \{/ { in_cw=1 }
in_cw && /^}$/ && !done {
print
print ""
print "levkin.ca, www.levkin.ca {"
print " import security-headers"
print " @www host www.levkin.ca"
print " redir @www https://levkin.ca{uri} permanent"
print " reverse_proxy " upstream
print "}"
done=1
next
}
{ print }
' /etc/caddy/Caddyfile > /tmp/Caddyfile.new
mv /tmp/Caddyfile.new /etc/caddy/Caddyfile
args:
executable: /bin/bash
register: levkin_https_block
changed_when: levkin_https_block.rc == 0
notify: Reload caddy
- name: Ensure levkin.ca HTTP redirect in :80 block
ansible.builtin.blockinfile:
path: /etc/caddy/Caddyfile
marker: "# {mark} ANSIBLE MANAGED levkin.ca :80"
insertafter: '@vikunja host todo.levkin.ca'
block: |
@levkin host levkin.ca www.levkin.ca
redir @levkin https://levkin.ca{uri} permanent
notify: Reload caddy
handlers:
- name: Reload caddy
ansible.builtin.command: caddy reload --config /etc/caddy/Caddyfile
changed_when: true

View File

@ -1,72 +0,0 @@
---
# Playbook: caddy-monitoring-sites
# Purpose: stats.levkin.ca (Umami) + status.levkin.ca (Kuma status page) on Caddy VM 106
# Targets: caddy
# Usage: make caddy-monitoring
- name: Ensure monitoring public proxies on Caddy
hosts: caddy
become: true
become_method: ansible.builtin.su
tasks:
- name: Ensure stats.levkin.ca block exists
ansible.builtin.shell: |
set -euo pipefail
if grep -q '^stats\.levkin\.ca {' /etc/caddy/Caddyfile; then
exit 0
fi
awk -v upstream="{{ monitoring_umami_upstream | default('10.0.10.22:3000') }}" '
/^caseware\.levkin\.ca \{/ { in_cw=1 }
in_cw && /^}$/ && !done {
print
print ""
print "stats.levkin.ca {"
print " import security-headers"
print " encode gzip"
print " reverse_proxy " upstream
print "}"
done=1
next
}
{ print }
' /etc/caddy/Caddyfile > /tmp/Caddyfile.new
mv /tmp/Caddyfile.new /etc/caddy/Caddyfile
args:
executable: /bin/bash
register: stats_block
changed_when: stats_block.rc == 0
notify: Reload caddy
- name: Ensure status.levkin.ca block exists
ansible.builtin.shell: |
set -euo pipefail
if grep -q '^status\.levkin\.ca {' /etc/caddy/Caddyfile; then
exit 0
fi
awk -v upstream="{{ monitoring_kuma_upstream | default('10.0.10.22:3001') }}" '
/^stats\.levkin\.ca \{/ { in_stats=1 }
in_stats && /^}$/ && !done {
print
print ""
print "status.levkin.ca {"
print " import security-headers"
print " encode gzip"
print " reverse_proxy " upstream
print "}"
done=1
next
}
{ print }
' /etc/caddy/Caddyfile > /tmp/Caddyfile.new
mv /tmp/Caddyfile.new /etc/caddy/Caddyfile
args:
executable: /bin/bash
register: status_block
changed_when: status_block.rc == 0
notify: Reload caddy
handlers:
- name: Reload caddy
ansible.builtin.command: caddy reload --config /etc/caddy/Caddyfile
changed_when: true

View File

@ -1,79 +0,0 @@
---
# Playbook: cal-authentik-oidc
# Purpose: Enable Cal.com SSO (SAML DB + license env) and Authentik OIDC provider
# Targets: cal (LXC 210), identity (LXC 217)
# Usage: make cal-oidc
# Manual: https://cal.levkin.ca/settings/security/sso — enter Client ID, Secret, Well Known URL
- name: Prepare OIDC client secret
hosts: localhost
gather_facts: false
tasks:
- name: Use vault OIDC secret or generate one for this run
ansible.builtin.set_fact:
cal_oidc_client_secret_effective: >-
{{ vault_cal_oidc_client_secret
| default(lookup('password', '/dev/null length=48 chars=ascii_letters,digits')) }}
no_log: true
- name: Remind to persist generated secret in vault
ansible.builtin.debug:
msg: >-
vault_cal_oidc_client_secret was not set — generated for this run only.
Add it to vault.yml and re-run so Authentik and Cal stay in sync.
when: vault_cal_oidc_client_secret is not defined or vault_cal_oidc_client_secret | length == 0
- name: Cal.com — SAML database and compose SSO env
hosts: cal
become: true
vars:
vault_cal_oidc_client_secret: "{{ hostvars['localhost']['cal_oidc_client_secret_effective'] }}"
pre_tasks:
- name: Load Cal Postgres credentials from .env
ansible.builtin.shell: |
set -a
source {{ cal_compose_dir }}/.env
printf 'user=%s\npass=%s\n' "$POSTGRES_USER" "$POSTGRES_PASSWORD"
args:
executable: /bin/bash
register: cal_pg_creds
changed_when: false
no_log: true
- name: Set Cal database facts
ansible.builtin.set_fact:
cal_postgres_user: "{{ cal_pg_creds.stdout_lines[0] | regex_replace('^user=', '') }}"
cal_postgres_password: "{{ cal_pg_creds.stdout_lines[1] | regex_replace('^pass=', '') }}"
cal_saml_database_url: >-
postgresql://{{ cal_pg_creds.stdout_lines[0] | regex_replace('^user=', '') }}:{{
cal_pg_creds.stdout_lines[1] | regex_replace('^pass=', '') }}@db:5432/{{ cal_saml_db_name }}
no_log: true
roles:
- role: cal_sso
- name: Authentik — Cal.com OIDC provider
hosts: identity
become: true
vars:
vault_cal_oidc_client_secret: "{{ hostvars['localhost']['cal_oidc_client_secret_effective'] }}"
tasks:
- name: Authentik OIDC for Cal.com
ansible.builtin.import_role:
name: cal_sso
tasks_from: authentik.yml
- name: Cal.com OIDC — finish in UI
hosts: cal
gather_facts: false
tasks:
- name: Print Cal.com SSO configuration values
ansible.builtin.debug:
msg:
- "1. Log in to Cal as {{ cal_saml_admins }}"
- "2. Open {{ cal_public_url }}/settings/security/sso"
- "3. Configure OIDC:"
- " Client ID: {{ cal_oidc_client_id }}"
- " Client Secret: (vault_cal_oidc_client_secret — see vault)"
- " Well Known URL: {{ cal_authentik_host }}/application/o/{{ cal_authentik_app_slug }}/.well-known/openid-configuration"
- "4. Test SSO login; keep local password as break-glass"

View File

@ -24,7 +24,6 @@
roles: roles:
- {role: maintenance, tags: ['maintenance']} - {role: maintenance, tags: ['maintenance']}
- {role: maintenance_cron, tags: ['maintenance', 'maintenance_cron']}
post_tasks: post_tasks:
- name: Display maintenance completion - name: Display maintenance completion

View File

@ -1,20 +0,0 @@
---
# Playbook: ssh-keys
# Purpose: Install your workstation SSH public key on all inventory hosts
# Targets: all hosts except localhost
# Usage: make copy-ssh-keys-ansible
# make copy-ssh-keys-ansible GROUP=services
# make copy-ssh-keys-ansible HOST=dev01
- name: Deploy workstation SSH public key
hosts: all:!local
gather_facts: false
vars:
ssh_public_key_file: "{{ lookup('env', 'SSH_PUBLIC_KEY') | default(lookup('env', 'HOME') + '/.ssh/id_ed25519.pub', true) }}"
tasks:
- name: Add SSH public key for ansible_user
ansible.posix.authorized_key:
user: "{{ ansible_user | default(ansible_user_id) }}"
state: present
key: "{{ lookup('file', ssh_public_key_file) }}"
become: false

View File

@ -1,10 +0,0 @@
---
cal_compose_dir: /opt/cal
cal_saml_db_name: calsaml
cal_saml_admins: idobkin@gmail.com
cal_public_url: https://cal.levkin.ca
cal_authentik_app_slug: cal-com
cal_authentik_provider_name: cal-com-oidc
cal_authentik_host: https://auth.levkin.ca
# Set in vault: vault_cal_oidc_client_secret (generated on first run if absent)
cal_oidc_client_id: "{{ cal_authentik_app_slug }}"

View File

@ -1,20 +0,0 @@
---
- name: Recreate calcom stack
ansible.builtin.command:
cmd: docker compose up -d
chdir: "{{ cal_compose_dir }}"
changed_when: true
- name: Recreate authentik server
ansible.builtin.command:
cmd: docker compose up -d server worker
chdir: /opt/authentik
changed_when: true
- name: Apply authentik cal blueprint
ansible.builtin.command:
cmd: >-
docker compose exec -T server
ak apply_blueprint {{ cal_authentik_app_slug }}-oidc.yaml
chdir: /opt/authentik
changed_when: true

View File

@ -1,25 +0,0 @@
---
- name: Ensure Authentik blueprints directory on host
ansible.builtin.file:
path: /opt/authentik/blueprints
state: directory
mode: "0755"
- name: Add blueprints volume to Authentik server service
ansible.builtin.replace:
path: /opt/authentik/compose.yml
regexp: '(?ms)( server:.*? volumes:\n - \./data:/data\n)( - \./custom-templates:/templates)'
replace: '\1 - ./blueprints:/blueprints\n\2'
notify:
- Recreate authentik server
- Apply authentik cal blueprint
- name: Deploy Cal.com OIDC blueprint
ansible.builtin.template:
src: authentik-cal-oidc.yaml.j2
dest: "/opt/authentik/blueprints/{{ cal_authentik_app_slug }}-oidc.yaml"
mode: "0644"
notify: Apply authentik cal blueprint
- name: Flush Authentik blueprint handler
ansible.builtin.meta: flush_handlers

View File

@ -1,52 +0,0 @@
---
- name: Ensure SAML database exists on Cal Postgres
ansible.builtin.command:
cmd: >-
docker exec cal-db psql -U {{ cal_postgres_user }} -tc
"SELECT 1 FROM pg_database WHERE datname='{{ cal_saml_db_name }}'"
register: cal_saml_db_check
changed_when: false
failed_when: cal_saml_db_check.rc != 0
- name: Create SAML database
ansible.builtin.command:
cmd: >-
docker exec cal-db psql -U {{ cal_postgres_user }} -c
"CREATE DATABASE {{ cal_saml_db_name }}"
when: cal_saml_db_check.stdout | trim != "1"
changed_when: true
- name: Deploy docker-compose with SSO environment
ansible.builtin.template:
src: docker-compose.yml.j2
dest: "{{ cal_compose_dir }}/docker-compose.yml"
owner: root
group: root
mode: "0644"
notify: Recreate calcom stack
- name: Ensure SAML env vars in Cal .env
ansible.builtin.lineinfile:
path: "{{ cal_compose_dir }}/.env"
regexp: "^{{ item.key }}="
line: "{{ item.key }}={{ item.value }}"
create: false
no_log: true
loop:
- key: SAML_DATABASE_URL
value: "{{ cal_saml_database_url }}"
- key: SAML_ADMINS
value: "{{ cal_saml_admins }}"
notify: Recreate calcom stack
- name: Flush handlers before OIDC UI step
ansible.builtin.meta: flush_handlers
- name: Wait for Cal.com HTTP after stack recreate
ansible.builtin.uri:
url: "{{ cal_public_url }}/api/version"
status_code: [200, 404]
register: cal_http
retries: 12
delay: 10
until: cal_http.status in [200, 404]

View File

@ -1,38 +0,0 @@
# Cal.com OIDC provider + application (managed by Ansible)
version: 1
metadata:
name: Cal.com OIDC
labels:
blueprints.goauthentik.io/instantiate: "true"
entries:
- model: authentik_providers_oauth2.oauth2provider
id: cal-oidc-provider
identifiers:
name: {{ cal_authentik_provider_name }}
attrs:
name: {{ cal_authentik_provider_name }}
authorization_flow: !Find [authentik_flows.flow, [slug, default-provider-authorization-implicit-consent]]
invalidation_flow: !Find [authentik_flows.flow, [slug, default-provider-invalidation-flow]]
client_type: confidential
client_id: {{ cal_oidc_client_id }}
client_secret: {{ vault_cal_oidc_client_secret }}
redirect_uris:
- matching_mode: strict
url: {{ cal_public_url }}/api/auth/oidc
signing_key: !Find [authentik_crypto.certificatekeypair, [name, authentik Self-signed Certificate]]
property_mappings:
- !Find [authentik_providers_oauth2.scopemapping, [scope_name, openid]]
- !Find [authentik_providers_oauth2.scopemapping, [scope_name, email]]
- !Find [authentik_providers_oauth2.scopemapping, [scope_name, profile]]
- model: authentik_core.application
id: cal-oidc-app
identifiers:
slug: {{ cal_authentik_app_slug }}
attrs:
name: Cal.com
slug: {{ cal_authentik_app_slug }}
group: ""
provider: !KeyOf cal-oidc-provider
policy_engine_mode: any
meta_launch_url: {{ cal_public_url }}
meta_icon: https://cal.com/favicon.ico

View File

@ -1,44 +0,0 @@
services:
db:
image: postgres:15
container_name: cal-db
restart: unless-stopped
environment:
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
POSTGRES_DB: ${POSTGRES_DB}
volumes:
- ./postgres-data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U $${POSTGRES_USER} -d $${POSTGRES_DB}"]
interval: 10s
timeout: 5s
retries: 5
calcom:
image: calcom/cal.com:latest
container_name: calcom
restart: unless-stopped
depends_on:
db:
condition: service_healthy
environment:
DATABASE_URL: ${DATABASE_URL}
DATABASE_DIRECT_URL: ${DATABASE_DIRECT_URL}
NEXT_PUBLIC_WEBAPP_URL: ${NEXT_PUBLIC_WEBAPP_URL}
NEXT_PUBLIC_API_V2_URL: ${NEXT_PUBLIC_API_V2_URL}
NEXTAUTH_URL: ${NEXTAUTH_URL}
NEXTAUTH_SECRET: ${NEXTAUTH_SECRET}
CALENDSO_ENCRYPTION_KEY: ${CALENDSO_ENCRYPTION_KEY}
CALCOM_LICENSE_KEY: ${CALCOM_LICENSE_KEY}
NEXT_PUBLIC_LICENSE_CONSENT: ${NEXT_PUBLIC_LICENSE_CONSENT}
SAML_DATABASE_URL: ${SAML_DATABASE_URL}
SAML_ADMINS: ${SAML_ADMINS}
EMAIL_FROM: ${EMAIL_FROM}
EMAIL_SERVER_HOST: ${EMAIL_SERVER_HOST}
EMAIL_SERVER_PORT: ${EMAIL_SERVER_PORT}
EMAIL_SERVER_USER: ${EMAIL_SERVER_USER}
EMAIL_SERVER_PASSWORD: ${EMAIL_SERVER_PASSWORD}
CALCOM_TELEMETRY_DISABLED: ${CALCOM_TELEMETRY_DISABLED}
ports:
- "3000:3000"

View File

@ -1,23 +0,0 @@
# maintenance_cron
Weekly cleanup jobs for production hosts.
## Scripts
| Script | Schedule | Purpose |
|--------|----------|---------|
| `system-maintenance` | `/etc/cron.weekly/` | `journalctl --vacuum-size=500M`, `apt autoremove`, `apt autoclean` |
| `docker-prune` | `/etc/cron.weekly/` | `docker system prune -af --filter until=168h` |
| `gitea-archive-prune` | `/etc/cron.weekly/` | Delete Gitea `repo-archive` files older than 7 days |
## Variables
See `defaults/main.yml`. Enable per host or group:
```yaml
maintenance_cron_enable_system: true
maintenance_cron_enable_docker: true # Docker hosts only
maintenance_cron_enable_gitea_archive: true # giteaVM only
```
Applied via `playbooks/maintenance.yml` (tag `maintenance_cron`).

View File

@ -1,18 +0,0 @@
---
# Weekly system cleanup (journal + apt)
maintenance_cron_enable_system: true
maintenance_cron_journal_vacuum_size: 500M
maintenance_cron_system_script: /etc/cron.weekly/system-maintenance
# Docker prune (CI / Docker hosts)
maintenance_cron_enable_docker: false
maintenance_cron_docker_prune_until: 168h
maintenance_cron_docker_script: /etc/cron.weekly/docker-prune
maintenance_cron_docker_log: /var/log/docker-prune.log
# Gitea repo-archive cache (Alpine Gitea VM)
maintenance_cron_enable_gitea_archive: false
maintenance_cron_gitea_archive_dir: /var/lib/gitea/data/repo-archive
maintenance_cron_gitea_archive_max_age_days: 7
maintenance_cron_gitea_archive_script: /etc/cron.weekly/gitea-archive-prune
maintenance_cron_gitea_archive_log: /var/log/gitea-archive-prune.log

View File

@ -1,27 +0,0 @@
---
- name: Install weekly system maintenance script
ansible.builtin.template:
src: system-maintenance.sh.j2
dest: "{{ maintenance_cron_system_script }}"
owner: root
group: root
mode: '0755'
when: maintenance_cron_enable_system | bool
- name: Install weekly Docker prune script
ansible.builtin.template:
src: docker-prune.sh.j2
dest: "{{ maintenance_cron_docker_script }}"
owner: root
group: root
mode: '0755'
when: maintenance_cron_enable_docker | bool
- name: Install weekly Gitea archive prune script
ansible.builtin.template:
src: gitea-archive-prune.sh.j2
dest: "{{ maintenance_cron_gitea_archive_script }}"
owner: root
group: root
mode: '0755'
when: maintenance_cron_enable_gitea_archive | bool

View File

@ -1,8 +0,0 @@
#!/bin/bash
# Ansible managed — weekly Docker image/container cleanup
set -euo pipefail
if ! command -v docker >/dev/null 2>&1; then
exit 0
fi
/usr/bin/docker system prune -af --filter "until={{ maintenance_cron_docker_prune_until }}" \
>> "{{ maintenance_cron_docker_log }}" 2>&1

View File

@ -1,19 +0,0 @@
#!/bin/sh
# Ansible managed — weekly Gitea repo-archive cache cleanup
set -eu
ARCHIVE_DIR="{{ maintenance_cron_gitea_archive_dir }}"
LOG="{{ maintenance_cron_gitea_archive_log }}"
MAX_AGE_DAYS="{{ maintenance_cron_gitea_archive_max_age_days }}"
if [ ! -d "${ARCHIVE_DIR}" ]; then
exit 0
fi
{
echo "=== $(date -Iseconds) gitea-archive-prune ==="
echo "Before: $(du -sh "${ARCHIVE_DIR}" 2>/dev/null | awk '{print $1}')"
find "${ARCHIVE_DIR}" -type f -mtime "+${MAX_AGE_DAYS}" -delete
find "${ARCHIVE_DIR}" -type d -empty -delete
echo "After: $(du -sh "${ARCHIVE_DIR}" 2>/dev/null | awk '{print $1}')"
df -h /
} >> "${LOG}" 2>&1

View File

@ -1,7 +0,0 @@
#!/bin/bash
# Ansible managed — weekly journal vacuum + apt cleanup
set -euo pipefail
journalctl --vacuum-size={{ maintenance_cron_journal_vacuum_size }} 2>/dev/null || true
export DEBIAN_FRONTEND=noninteractive
apt-get autoremove -y
apt-get autoclean -y

View File

@ -1,132 +0,0 @@
#!/usr/bin/env bash
# Enable Beszel alerts (Status, CPU, Memory, Disk) on all monitored systems.
#
# Prerequisite: admin account + SMTP configured (./scripts/beszel-setup-smtp.sh)
#
# Usage:
# export BESZEL_URL=http://10.0.10.22:8090
# export BESZEL_EMAIL=you@example.com
# export BESZEL_PASSWORD='your-beszel-password'
# ./scripts/beszel-setup-alerts.sh
#
# Optional thresholds (percent unless noted):
# BESZEL_CPU_THRESHOLD=80
# BESZEL_MEM_THRESHOLD=85
# BESZEL_DISK_THRESHOLD=90
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
BESZEL_URL="${BESZEL_URL:-http://10.0.10.22:8090}"
BESZEL_EMAIL="${BESZEL_EMAIL:-}"
BESZEL_PASSWORD="${BESZEL_PASSWORD:-}"
BESZEL_CPU_THRESHOLD="${BESZEL_CPU_THRESHOLD:-80}"
BESZEL_MEM_THRESHOLD="${BESZEL_MEM_THRESHOLD:-85}"
BESZEL_DISK_THRESHOLD="${BESZEL_DISK_THRESHOLD:-90}"
if [[ -z "${BESZEL_EMAIL}" || -z "${BESZEL_PASSWORD}" ]] && [[ -f "${REPO_ROOT}/.env" ]]; then
# shellcheck disable=SC1091
set +u
set -a
# shellcheck source=/dev/null
source "${REPO_ROOT}/.env"
set +a
set -u
BESZEL_EMAIL="${BESZEL_EMAIL:-}"
BESZEL_PASSWORD="${BESZEL_PASSWORD:-}"
fi
if [[ -z "${BESZEL_EMAIL}" || -z "${BESZEL_PASSWORD}" ]]; then
echo "Set BESZEL_EMAIL and BESZEL_PASSWORD (Beszel admin)" >&2
exit 1
fi
export BESZEL_URL BESZEL_EMAIL BESZEL_PASSWORD
export BESZEL_CPU_THRESHOLD BESZEL_MEM_THRESHOLD BESZEL_DISK_THRESHOLD
"${REPO_ROOT}/.venv/bin/python3" <<'PY'
import json
import os
import sys
import urllib.error
import urllib.parse
import urllib.request
base = os.environ["BESZEL_URL"].rstrip("/")
email = os.environ["BESZEL_EMAIL"]
password = os.environ["BESZEL_PASSWORD"]
cpu_threshold = float(os.environ["BESZEL_CPU_THRESHOLD"])
mem_threshold = float(os.environ["BESZEL_MEM_THRESHOLD"])
disk_threshold = float(os.environ["BESZEL_DISK_THRESHOLD"])
ALERTS = [
("Status", None),
("CPU", cpu_threshold),
("Memory", mem_threshold),
("Disk", disk_threshold),
]
def req(method, path, token=None, body=None):
url = f"{base}{path}"
data = None
headers = {"Content-Type": "application/json"}
if body is not None:
data = json.dumps(body).encode()
if token:
headers["Authorization"] = token
request = urllib.request.Request(url, data=data, headers=headers, method=method)
try:
with urllib.request.urlopen(request, timeout=30) as resp:
raw = resp.read().decode()
return resp.status, json.loads(raw) if raw else {}
except urllib.error.HTTPError as e:
raw = e.read().decode()
print(f"HTTP {e.code} {path}: {raw}", file=sys.stderr)
raise
print(f"Login to Beszel at {base} as {email}...")
token = None
for collection in ("_superusers", "users"):
try:
_, auth = req(
"POST",
f"/api/collections/{collection}/auth-with-password",
body={"identity": email, "password": password},
)
token = auth.get("token")
if token:
print(f"Login OK ({collection})")
break
except urllib.error.HTTPError:
continue
if not token:
print("Login failed: check BESZEL_EMAIL and BESZEL_PASSWORD", file=sys.stderr)
sys.exit(1)
query = urllib.parse.urlencode({"perPage": 500, "fields": "id,name"})
_, systems_resp = req("GET", f"/api/collections/systems/records?{query}", token=token)
systems = systems_resp.get("items", [])
if not systems:
print("No systems found in Beszel hub", file=sys.stderr)
sys.exit(1)
system_ids = [s["id"] for s in systems]
names = ", ".join(s.get("name", s["id"]) for s in systems)
print(f"Found {len(system_ids)} systems: {names}")
for alert_name, threshold in ALERTS:
body = {"name": alert_name, "systems": system_ids}
if threshold is not None:
body["value"] = threshold
req("POST", "/api/beszel/user-alerts", token=token, body=body)
if threshold is None:
print(f" ✅ {alert_name} (down detection)")
else:
print(f" ✅ {alert_name} > {threshold}%")
print("\nDone. Alerts apply to all systems. Verify in hub UI (bell icon) or send test:")
print(f" curl -X POST {base}/api/beszel/test-notification -H 'Authorization: {token[:20]}...'")
PY

View File

@ -1,140 +0,0 @@
#!/usr/bin/env bash
# Configure Beszel (PocketBase) SMTP for Mailcow alerts mailbox.
#
# Prerequisite: admin account created at http://10.0.10.22:8090
#
# Usage:
# export BESZEL_URL=http://10.0.10.22:8090
# export BESZEL_EMAIL=you@example.com
# export BESZEL_PASSWORD='your-beszel-password'
# export SMTP_PASS='alerts@ mailbox password' # or source .env
# ./scripts/beszel-setup-smtp.sh
#
# Optional: SMTP_TO for test email (default idobkin@gmail.com)
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
BESZEL_URL="${BESZEL_URL:-http://10.0.10.22:8090}"
BESZEL_EMAIL="${BESZEL_EMAIL:-}"
BESZEL_PASSWORD="${BESZEL_PASSWORD:-}"
SMTP_HOST="${SMTP_HOST:-mail.levkine.ca}"
SMTP_PORT="${SMTP_PORT:-587}"
SMTP_USER="${SMTP_USER:-alerts@levkine.ca}"
SMTP_PASS="${SMTP_PASS:-}"
SMTP_TO="${SMTP_TO:-idobkin@gmail.com}"
if [[ -z "${SMTP_PASS}" ]] && [[ -f "${REPO_ROOT}/.env" ]]; then
# shellcheck disable=SC1091
set +u
set -a
# shellcheck source=/dev/null
source "${REPO_ROOT}/.env"
set +a
set -u
SMTP_PASS="${SMTP_PASS:-${ALERTS_PASSWORD:-}}"
fi
if [[ -z "${BESZEL_EMAIL}" || -z "${BESZEL_PASSWORD}" ]]; then
echo "Set BESZEL_EMAIL and BESZEL_PASSWORD (Beszel admin you just created)" >&2
exit 1
fi
if [[ -z "${SMTP_PASS}" ]]; then
echo "Set SMTP_PASS or ALERTS_PASSWORD (alerts@levkine.ca mailbox password)" >&2
exit 1
fi
export BESZEL_URL BESZEL_EMAIL BESZEL_PASSWORD SMTP_HOST SMTP_PORT SMTP_USER SMTP_PASS SMTP_TO
"${REPO_ROOT}/.venv/bin/python3" <<'PY'
import json
import os
import sys
import urllib.error
import urllib.request
base = os.environ["BESZEL_URL"].rstrip("/")
email = os.environ["BESZEL_EMAIL"]
password = os.environ["BESZEL_PASSWORD"]
smtp_host = os.environ["SMTP_HOST"]
smtp_port = int(os.environ["SMTP_PORT"])
smtp_user = os.environ["SMTP_USER"]
smtp_pass = os.environ["SMTP_PASS"]
smtp_to = os.environ["SMTP_TO"]
def req(method, path, token=None, body=None):
url = f"{base}{path}"
data = None
headers = {"Content-Type": "application/json"}
if body is not None:
data = json.dumps(body).encode()
if token:
headers["Authorization"] = token
request = urllib.request.Request(url, data=data, headers=headers, method=method)
try:
with urllib.request.urlopen(request, timeout=30) as resp:
raw = resp.read().decode()
return resp.status, json.loads(raw) if raw else {}
except urllib.error.HTTPError as e:
raw = e.read().decode()
print(f"HTTP {e.code} {path}: {raw}", file=sys.stderr)
raise
print(f"Login to Beszel at {base} as {email}...")
for collection in ("_superusers", "users"):
try:
status, auth = req(
"POST",
f"/api/collections/{collection}/auth-with-password",
body={"identity": email, "password": password},
)
token = auth.get("token")
if token:
print(f"Login OK ({collection})")
break
except urllib.error.HTTPError:
token = None
continue
else:
print("Login failed: check BESZEL_EMAIL and BESZEL_PASSWORD", file=sys.stderr)
sys.exit(1)
print("Configuring SMTP (Mailcow STARTTLS)...")
req(
"PATCH",
"/api/settings",
token=token,
body={
"smtp": {
"enabled": True,
"host": smtp_host,
"port": smtp_port,
"username": smtp_user,
"password": smtp_pass,
"tls": False,
"authMethod": "PLAIN",
"localName": "monitoring.levkin.ca",
},
"meta": {
"senderName": "Beszel",
"senderAddress": smtp_user,
},
},
)
print("SMTP settings saved")
print(f"Sending test email to {smtp_to}...")
req(
"POST",
"/api/settings/test/email",
token=token,
body={"email": smtp_to, "template": "verification", "collection": "_superusers"},
)
print("Test email request accepted — check inbox (and restart beszel if alerts fail later)")
print("\nDone. Restart hub to avoid cached mail client issues:")
print(" ssh root@10.0.10.22 'cd /opt/monitoring && docker compose restart beszel'")
PY

View File

@ -1,60 +0,0 @@
#!/usr/bin/env bash
# Bootstrap root SSH when `su` needs a password (no sudo on host).
# Usage: BOOTSTRAP_SU_PASSWORD='...' ./scripts/bootstrap-root-ssh-su-password.sh HOST
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
HOST="${1:-}"
BOOTSTRAP_USER="${BOOTSTRAP_USER:-ladmin}"
PUBKEY_FILE="${SSH_PUBLIC_KEY:-${HOME}/.ssh/id_ed25519.pub}"
SU_PASSWORD="${BOOTSTRAP_SU_PASSWORD:-}"
[[ -n "${HOST}" ]] || { echo "Usage: $0 HOST" >&2; exit 1; }
[[ -n "${SU_PASSWORD}" ]] || { echo "Set BOOTSTRAP_SU_PASSWORD" >&2; exit 1; }
[[ -f "${PUBKEY_FILE}" ]] || { echo "Missing ${PUBKEY_FILE}" >&2; exit 1; }
IP="$(awk -v h="${HOST}" '$1==h {for(i=2;i<=NF;i++) if($i~/^ansible_host=/) {sub(/ansible_host=/,"",$i); print $i; exit}}' \
"${REPO_ROOT}/inventories/production/hosts")"
[[ -n "${IP}" ]] || { echo "No ansible_host for ${HOST}" >&2; exit 1; }
PUBKEY="$(cat "${PUBKEY_FILE}")"
export IP BOOTSTRAP_USER SU_PASSWORD PUBKEY
/usr/bin/expect <<'EXPECT'
set timeout 60
spawn ssh -o StrictHostKeyChecking=accept-new $env(BOOTSTRAP_USER)@$env(IP)
expect {
-re {[$#] $} { }
timeout { exit 1 }
}
send "su -\r"
expect {
"Password:" {
send "$env(SU_PASSWORD)\r"
}
timeout { exit 1 }
}
expect {
-re {root@caddy|#||[$#] $} { }
timeout { exit 1 }
}
send "bash --noprofile --norc\r"
expect {
-re {# $} { }
timeout { exit 1 }
}
send "mkdir -p /root/.ssh && chmod 700 /root/.ssh && touch /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys\r"
expect -re {# $}
send "grep -qF '$env(PUBKEY)' /root/.ssh/authorized_keys || echo '$env(PUBKEY)' >> /root/.ssh/authorized_keys\r"
expect -re {# $}
send "sed -i 's/^#*PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config 2>/dev/null || echo PermitRootLogin prohibit-password >> /etc/ssh/sshd_config\r"
expect -re {# $}
send "systemctl restart ssh 2>/dev/null || systemctl restart sshd 2>/dev/null || true\r"
expect -re {# $}
send "exit\r"
expect eof
EXPECT
ssh -o BatchMode=yes -i "${PUBKEY_FILE}" -o ConnectTimeout=10 \
"root@${IP}" "echo OK: root@${IP}"
echo "Done: root key on ${HOST}"

View File

@ -1,103 +0,0 @@
#!/usr/bin/env bash
# Bootstrap root SSH key access via a normal user (default: ladmin).
# Usage: ./scripts/bootstrap-root-ssh.sh HOSTNAME
# BOOTSTRAP_USER=ladmin TARGET_USER=root SSH_PUBLIC_KEY=~/.ssh/id_ed25519.pub
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
INVENTORY_HOSTS="${INVENTORY_HOSTS:-${REPO_ROOT}/inventories/production/hosts}"
PUBKEY_FILE="${SSH_PUBLIC_KEY:-${HOME}/.ssh/id_ed25519.pub}"
BOOTSTRAP_USER="${BOOTSTRAP_USER:-ladmin}"
TARGET_USER="${TARGET_USER:-root}"
HOST="${1:-}"
if [[ -z "${HOST}" ]]; then
echo "Usage: $0 HOST" >&2
exit 1
fi
if [[ ! -f "${PUBKEY_FILE}" ]]; then
echo "Public key not found: ${PUBKEY_FILE}" >&2
exit 1
fi
resolve_from_inventory() {
awk -v host="${HOST}" '
$1 == host {
for (i = 2; i <= NF; i++) {
if ($i ~ /^ansible_host=/) {
sub(/ansible_host=/, "", $i)
ip = $i
}
if ($i ~ /^ansible_user=/) {
sub(/ansible_user=/, "", $i)
user = $i
}
}
}
END {
print ip
print user
}
' "${INVENTORY_HOSTS}"
}
IP="$(resolve_from_inventory | sed -n '1p')"
INV_USER="$(resolve_from_inventory | sed -n '2p')"
if [[ -z "${IP}" ]]; then
echo "Could not resolve ansible_host for ${HOST} in ${INVENTORY_HOSTS}" >&2
exit 1
fi
echo "==> ${HOST} (${BOOTSTRAP_USER}@${IP} -> ${TARGET_USER})"
echo " Inventory ansible_user: ${INV_USER:-<unset>}"
echo " Public key: ${PUBKEY_FILE}"
echo ""
echo "Step 1/3: Install key for ${BOOTSTRAP_USER} (password: ${BOOTSTRAP_USER})"
ssh-copy-id -i "${PUBKEY_FILE}" -o StrictHostKeyChecking=accept-new \
"${BOOTSTRAP_USER}@${IP}"
echo ""
echo "Step 2/3: Copy key and configure ${TARGET_USER} via su (password: root)"
REMOTE_KEY="/tmp/ansible-bootstrap.pub"
scp -o StrictHostKeyChecking=accept-new "${PUBKEY_FILE}" \
"${BOOTSTRAP_USER}@${IP}:${REMOTE_KEY}"
ssh -t "${BOOTSTRAP_USER}@${IP}" bash -s <<REMOTE_SCRIPT
set -e
REMOTE_KEY="${REMOTE_KEY}"
su - root <<ROOT_SCRIPT
set -e
mkdir -p /root/.ssh
chmod 700 /root/.ssh
touch /root/.ssh/authorized_keys
chmod 600 /root/.ssh/authorized_keys
if ! grep -qF "\$(cat "\${REMOTE_KEY}")" /root/.ssh/authorized_keys 2>/dev/null; then
cat "\${REMOTE_KEY}" >> /root/.ssh/authorized_keys
fi
rm -f "\${REMOTE_KEY}"
if [ -f /etc/ssh/sshd_config ]; then
if grep -q '^PermitRootLogin' /etc/ssh/sshd_config; then
sed -i 's/^#*PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
else
echo 'PermitRootLogin prohibit-password' >> /etc/ssh/sshd_config
fi
systemctl restart ssh 2>/dev/null \
|| systemctl restart sshd 2>/dev/null \
|| service ssh restart 2>/dev/null \
|| true
fi
echo "OK: root authorized_keys updated; PermitRootLogin prohibit-password"
ROOT_SCRIPT
REMOTE_SCRIPT
echo ""
echo "Step 3/3: Verify ${TARGET_USER} key login"
ssh -o BatchMode=yes -i "${PUBKEY_FILE}" -o StrictHostKeyChecking=accept-new \
"${TARGET_USER}@${IP}" "echo OK: ${TARGET_USER}@${IP} accepts your SSH key"
echo ""
echo "Done: ${HOST} — use: ssh -i ${PUBKEY_FILE} ${TARGET_USER}@${IP}"

View File

@ -1,89 +0,0 @@
#!/usr/bin/env bash
# Add or update Uptime Kuma HTTP monitors via API.
# Usage:
# export KUMA_PASSWORD='...' # not in vault yet — set manually once
# ./scripts/kuma-add-monitors.sh
#
# Monitors are idempotent: existing names are skipped.
# Links the default SMTP notification to any monitor that has none.
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
KUMA_URL="${KUMA_URL:-http://10.0.10.22:3001}"
KUMA_USER="${KUMA_USER:-admin}"
KUMA_PASSWORD="${KUMA_PASSWORD:-}"
if [[ -z "${KUMA_PASSWORD}" ]] && [[ -f "${REPO_ROOT}/.env" ]]; then
# shellcheck disable=SC1091
set -a
source "${REPO_ROOT}/.env"
set +a
KUMA_PASSWORD="${KUMA_PASSWORD:-}"
fi
if [[ -z "${KUMA_PASSWORD}" ]]; then
echo "Set KUMA_PASSWORD (admin UI password; not stored in vault yet)" >&2
exit 1
fi
export KUMA_URL KUMA_USER KUMA_PASSWORD
"${REPO_ROOT}/.venv/bin/python3" <<'PY'
import os
import sys
try:
from uptime_kuma_api import UptimeKumaApi
except ImportError:
print("Run: .venv/bin/pip install uptime-kuma-api", file=sys.stderr)
sys.exit(1)
# Public HTTPS endpoints worth watching (Beszel covers host metrics separately).
MONITORS = [
{"type": "http", "name": "levkin.ca", "url": "https://levkin.ca", "interval": 60, "retryInterval": 60, "maxretries": 3, "accepted_statuscodes": ["200-299"]},
{"type": "http", "name": "Portfolio", "url": "https://iliadobkin.com", "interval": 60, "retryInterval": 60, "maxretries": 3, "accepted_statuscodes": ["200-299"]},
{"type": "http", "name": "Search", "url": "https://search.levkin.ca", "interval": 120, "retryInterval": 60, "maxretries": 3, "accepted_statuscodes": ["200-299"]},
{"type": "http", "name": "PDF", "url": "https://pdf.levkin.ca", "interval": 120, "retryInterval": 60, "maxretries": 3, "accepted_statuscodes": ["200-299"]},
{"type": "http", "name": "Umami script", "url": "https://stats.levkin.ca/script.js", "interval": 300, "retryInterval": 120, "maxretries": 3, "accepted_statuscodes": ["200-299"]},
{"type": "http", "name": "Mattermost", "url": "https://slack.levkin.ca", "interval": 120, "retryInterval": 60, "maxretries": 3, "accepted_statuscodes": ["200-299"]},
]
url = os.environ["KUMA_URL"]
user = os.environ["KUMA_USER"]
password = os.environ["KUMA_PASSWORD"]
with UptimeKumaApi(url) as api:
api.login(user, password)
existing = {m.get("name"): m for m in api.get_monitors()}
notifs = [n for n in api.get_notifications() if n.get("isActive")]
smtp = next((n for n in notifs if n.get("isActive")), None)
if not smtp and notifs:
smtp = notifs[0]
if not smtp:
smtp = {"id": 1} # fallback: first notification in DB
for spec in MONITORS:
name = spec["name"]
if name in existing:
print(f"skip (exists): {name} id={existing[name].get('id')}")
continue
result = api.add_monitor(**spec)
print(f"added: {name} -> {result}")
existing[name] = result if isinstance(result, dict) else {"id": result}
if smtp:
nid = smtp["id"]
for m in api.get_monitors():
nlist = m.get("notificationIDList") or {}
if isinstance(nlist, dict) and nlist.get(str(nid)):
continue
if isinstance(nlist, dict):
nlist[str(nid)] = True
else:
nlist = {str(nid): True}
api.edit_monitor(m["id"], notificationIDList=nlist)
print(f"linked notification {nid} -> {m.get('name')}")
else:
print("warn: no SMTP notification found — create one in Kuma UI first", file=sys.stderr)
PY

View File

@ -1,66 +0,0 @@
#!/usr/bin/env bash
# Configure Uptime Kuma SMTP notification (Mailcow) via Socket.IO API.
# Run from machine with network access to Kuma:
# export KUMA_URL=http://10.0.10.22:3001
# export KUMA_USER=admin
# export KUMA_PASSWORD='your-kuma-password'
# export SMTP_USER=alerts@levkine.ca
# export SMTP_PASS='mailbox-password'
# export SMTP_TO=idobkin@gmail.com
# pip install uptime-kuma-api
# ./scripts/kuma-setup-smtp.sh
set -euo pipefail
KUMA_URL="${KUMA_URL:-http://10.0.10.22:3001}"
KUMA_USER="${KUMA_USER:-admin}"
KUMA_PASSWORD="${KUMA_PASSWORD:-}"
SMTP_HOST="${SMTP_HOST:-mail.levkine.ca}"
SMTP_PORT="${SMTP_PORT:-587}"
SMTP_USER="${SMTP_USER:-alerts@levkine.ca}"
SMTP_PASS="${SMTP_PASS:-}"
SMTP_TO="${SMTP_TO:-idobkin@gmail.com}"
if [[ -z "${KUMA_PASSWORD}" || -z "${SMTP_PASS}" ]]; then
echo "Set KUMA_PASSWORD and SMTP_PASS" >&2
exit 1
fi
python3 <<'PY'
import os
import sys
try:
from uptime_kuma_api import UptimeKumaApi
except ImportError:
print("pip install uptime-kuma-api", file=sys.stderr)
sys.exit(1)
url = os.environ["KUMA_URL"]
user = os.environ["KUMA_USER"]
password = os.environ["KUMA_PASSWORD"]
smtp_host = os.environ["SMTP_HOST"]
smtp_port = int(os.environ["SMTP_PORT"])
smtp_user = os.environ["SMTP_USER"]
smtp_pass = os.environ["SMTP_PASS"]
smtp_to = os.environ["SMTP_TO"]
with UptimeKumaApi(url) as api:
api.login(user, password)
# Notification type name in Kuma 1.x is often 'smtp' / 'email'
result = api.add_notification(
name="Mailcow alerts",
type="smtp",
isDefault=True,
applyExisting=True,
smtpHost=smtp_host,
smtpPort=smtp_port,
smtpSecure=True,
smtpIgnoreTLS=False,
smtpUsername=smtp_user,
smtpPassword=smtp_pass,
smtpFrom=smtp_user,
smtpTo=smtp_to,
)
print(result)
PY

View File

@ -1,51 +0,0 @@
#!/usr/bin/env bash
# Export Mailcow API + mailbox password from .env or Ansible vault.
# Usage: source scripts/load-mailcow-vault-env.sh [mailbox_local_part]
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
VAULT_FILE="${REPO_ROOT}/inventories/production/group_vars/all/vault.yml"
VAULT_PASS="${HOME}/.ansible-vault-pass"
ANSIBLE_VAULT="${REPO_ROOT}/.venv/bin/ansible-vault"
MAILBOX_KEY="${1:-${MAILBOX:-${MAILBOX_LOCAL_PART:-}}}"
set -a
[ -f "${REPO_ROOT}/.env" ] && . "${REPO_ROOT}/.env"
set +a
if [[ -n "${MAILCOW_API_KEY:-}" && -n "${MAILBOX_PASSWORD:-${ALERTS_PASSWORD:-}}" ]]; then
export MAILBOX_PASSWORD="${MAILBOX_PASSWORD:-${ALERTS_PASSWORD:-}}"
return 0 2>/dev/null || exit 0
fi
if [[ ! -f "${VAULT_FILE}" ]] || [[ ! -f "${VAULT_PASS}" ]]; then
return 0 2>/dev/null || exit 0
fi
eval "$("${REPO_ROOT}/.venv/bin/python3" - "${VAULT_FILE}" "${VAULT_PASS}" "${ANSIBLE_VAULT}" "${MAILBOX_KEY}" <<'PY'
import os, subprocess, sys, yaml, shlex
vault_file, vault_pass, ansible_vault, mailbox_key = sys.argv[1:5]
text = subprocess.check_output(
[ansible_vault, "view", vault_file, "--vault-password-file", vault_pass],
text=True,
)
data = yaml.safe_load(text) or {}
out = []
api = data.get("vault_mailcow_api_key") or ""
if api:
out.append("export MAILCOW_API_KEY=" + shlex.quote(str(api)))
passwords = data.get("vault_mailcow_mailbox_passwords") or {}
pw = ""
if mailbox_key and mailbox_key in passwords:
pw = passwords[mailbox_key]
elif mailbox_key == "alerts":
pw = data.get("vault_alerts_mailbox_password") or passwords.get("alerts", "")
if pw:
out.append("export MAILBOX_PASSWORD=" + shlex.quote(str(pw)))
out.append("export ALERTS_PASSWORD=" + shlex.quote(str(pw)))
print("\n".join(out))
PY
)"
return 0 2>/dev/null || exit 0

View File

@ -1,18 +0,0 @@
#!/usr/bin/env bash
# Export BOOTSTRAP_SU_PASSWORD from vault_lxc_root_password
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
eval "$("${REPO_ROOT}/.venv/bin/python3" - "${REPO_ROOT}" <<'PY'
import os, subprocess, sys, yaml, shlex
repo = sys.argv[1]
text = subprocess.check_output(
[os.path.join(repo, ".venv/bin/ansible-vault"), "view",
os.path.join(repo, "inventories/production/group_vars/all/vault.yml"),
"--vault-password-file", os.path.expanduser("~/.ansible-vault-pass")],
text=True,
)
pw = (yaml.safe_load(text) or {}).get("vault_lxc_root_password", "")
if pw:
print("export BOOTSTRAP_SU_PASSWORD=" + shlex.quote(str(pw)))
PY
)"

View File

@ -1,32 +0,0 @@
#!/usr/bin/env bash
# Resolve MAILBOX= key from inventories/production/group_vars/all/mailcow.yml
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
MAILBOX="${MAILBOX:-}"
[[ -n "${MAILBOX}" ]] || { echo "MAILBOX required" >&2; exit 1; }
eval "$("${REPO_ROOT}/.venv/bin/python3" - "${REPO_ROOT}" "${MAILBOX}" <<'PY'
import sys, yaml, shlex, os
repo, key = sys.argv[1], sys.argv[2]
path = os.path.join(repo, "inventories/production/group_vars/all/mailcow.yml")
with open(path) as f:
data = yaml.safe_load(f) or {}
boxes = data.get("mailcow_mailboxes") or {}
if key not in boxes:
raise SystemExit(f"Unknown MAILBOX={key!r}. Add it to mailcow_mailboxes in mailcow.yml")
b = boxes[key]
out = []
for k, env in [
("local_part", "MAILBOX_LOCAL_PART"),
("name", "MAILBOX_NAME"),
("quota", "MAILBOX_QUOTA"),
]:
if k in b and b[k] is not None:
out.append(f"export {env}={shlex.quote(str(b[k]))}")
if b.get("vault_password_key"):
out.append(f"export MAILBOX_VAULT_KEY={shlex.quote(str(b['vault_password_key']))}")
print("\n".join(out))
PY
)"

View File

@ -1,62 +0,0 @@
#!/usr/bin/env bash
# Create or update a Mailcow mailbox via API.
#
# Usage:
# make mailcow-mailbox MAILBOX=alerts
# # or with env (after: source scripts/load-mailcow-vault-env.sh):
# MAILBOX_LOCAL_PART=notify MAILBOX_NAME="Notify" MAILBOX_PASSWORD='...' ./scripts/mailcow-mailbox.sh
#
# Variables (env or make):
# MAILBOX / MAILBOX_LOCAL_PART — local part (required)
# MAILBOX_NAME — display name (default: title-case of local part)
# MAILBOX_PASSWORD — if unset, loaded from vault_mailcow_mailbox_passwords[local_part]
# MAILBOX_QUOTA — MiB (default 1024)
# MAILCOW_URL, MAILCOW_DOMAIN, MAILCOW_API_KEY — see load-mailcow-vault-env.sh
set -euo pipefail
MAILCOW_URL="${MAILCOW_URL:-https://mail.levkine.ca}"
DOMAIN="${MAILCOW_DOMAIN:-levkine.ca}"
LOCAL_PART="${MAILBOX_LOCAL_PART:-${MAILBOX:-}}"
API_KEY="${MAILCOW_API_KEY:-}"
MAILBOX_PASSWORD="${MAILBOX_PASSWORD:-${ALERTS_PASSWORD:-}}"
QUOTA="${MAILBOX_QUOTA:-1024}"
if [[ -z "${LOCAL_PART}" ]]; then
echo "Set MAILBOX=localpart or MAILBOX_LOCAL_PART" >&2
exit 1
fi
if [[ -z "${API_KEY}" ]]; then
echo "Set MAILCOW_API_KEY (make mailcow-mailbox loads vault/.env)" >&2
exit 1
fi
if [[ -z "${MAILBOX_PASSWORD}" ]]; then
echo "Set MAILBOX_PASSWORD or add vault_mailcow_mailbox_passwords.${LOCAL_PART} in vault" >&2
exit 1
fi
DISPLAY_NAME="${MAILBOX_NAME:-$(echo "${LOCAL_PART}" | sed 's/[-_]/ /g' | awk '{for(i=1;i<=NF;i++) $i=toupper(substr($i,1,1)) tolower(substr($i,2)); print}')}"
ATTR=$(jq -nc \
--arg lp "${LOCAL_PART}" \
--arg dom "${DOMAIN}" \
--arg name "${DISPLAY_NAME}" \
--arg pw "${MAILBOX_PASSWORD}" \
--arg quota "${QUOTA}" \
'{local_part:$lp,domain:$dom,name:$name,quota:$quota,password:$pw,password2:$pw,active:"1"}')
echo "Creating mailbox ${LOCAL_PART}@${DOMAIN} (${DISPLAY_NAME})..."
RESP=$(curl -sk -w "\n%{http_code}" -X POST "${MAILCOW_URL}/api/v1/add/mailbox" \
-H "X-API-Key: ${API_KEY}" \
-d "attr=${ATTR}")
HTTP_CODE=$(echo "${RESP}" | tail -1)
BODY=$(echo "${RESP}" | sed '$d')
echo "${BODY}" | jq . 2>/dev/null || echo "${BODY}"
if [[ "${HTTP_CODE}" -lt 200 || "${HTTP_CODE}" -ge 300 ]]; then
echo "Mailcow API HTTP ${HTTP_CODE}" >&2
exit 1
fi
echo "Done: ${LOCAL_PART}@${DOMAIN}"

View File

@ -1,17 +0,0 @@
#!/usr/bin/env bash
# Wrapper for: make mailcow-mailbox MAILBOX=name
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
MAILBOX="${MAILBOX:?MAILBOX required}"
cd "${REPO_ROOT}"
eval "$(./scripts/mailcow-mailbox-from-inventory.sh)"
. ./scripts/load-mailcow-vault-env.sh "${MAILBOX_VAULT_KEY:-${MAILBOX}}"
if [[ -z "${MAILCOW_API_KEY:-}" || -z "${MAILBOX_PASSWORD:-}" ]]; then
echo "Missing vault_mailcow_api_key or vault_mailcow_mailbox_passwords.${MAILBOX}" >&2
exit 1
fi
exec ./scripts/mailcow-mailbox.sh

View File

@ -1,71 +0,0 @@
#!/usr/bin/env bash
# Extended read-only security + cleanup audit (run on target host).
set -u
echo "=== identity ==="
hostname -f 2>/dev/null || hostname
if [ -f /etc/os-release ]; then . /etc/os-release; echo "os=${PRETTY_NAME:-unknown}"; fi
echo "kernel=$(uname -r)"
echo "uptime=$(uptime -p 2>/dev/null || uptime)"
echo "=== disk ==="
df -h / /var 2>/dev/null | tail -n +2 | awk '{print $6" "$5" used "$4" free"}'
echo "=== sshd (effective) ==="
if command -v sshd >/dev/null 2>&1; then
sshd -T 2>/dev/null | grep -E '^(permitrootlogin|passwordauthentication|pubkeyauthentication|permitemptypasswords|port|x11forwarding|maxauthtries) ' || true
else
grep -E '^(PermitRootLogin|PasswordAuthentication|PubkeyAuthentication|Port) ' /etc/ssh/sshd_config 2>/dev/null | grep -v '^#' || echo "sshd not found"
fi
echo "=== firewall ==="
if command -v ufw >/dev/null 2>&1; then
ufw status verbose 2>/dev/null | head -5
elif command -v firewall-cmd >/dev/null 2>&1; then
firewall-cmd --state 2>/dev/null || true
else
echo "no ufw/firewalld"
fi
echo "=== fail2ban ==="
systemctl is-active fail2ban 2>/dev/null || echo "fail2ban: inactive or missing"
echo "=== unattended-upgrades ==="
systemctl is-active unattended-upgrades 2>/dev/null || echo "unattended-upgrades: inactive or missing"
echo "=== pending apt upgrades ==="
if command -v apt >/dev/null 2>&1; then
apt-get -s upgrade 2>/dev/null | grep -c '^Inst' || echo 0
else
echo "n/a"
fi
echo "=== docker ==="
if command -v docker >/dev/null 2>&1; then
echo "docker=$(docker --version 2>/dev/null || true)"
echo "containers=$(docker ps -aq 2>/dev/null | wc -l | tr -d ' ') running=$(docker ps -q 2>/dev/null | wc -l | tr -d ' ')"
echo "images=$(docker images -q 2>/dev/null | wc -l | tr -d ' ')"
docker system df 2>/dev/null | tail -n +2 || true
else
echo "no docker"
fi
echo "=== journal disk ==="
journalctl --disk-usage 2>/dev/null || echo "n/a"
echo "=== apt cache ==="
du -sh /var/cache/apt/archives 2>/dev/null || echo "n/a"
echo "=== existing cron (root) ==="
crontab -l 2>/dev/null | grep -v '^#' | grep -v '^$' | head -10 || echo "no root crontab"
ls /etc/cron.{daily,weekly,monthly}/* 2>/dev/null | xargs -I{} basename {} | head -15 || true
echo "=== listening tcp (non-localhost) ==="
ss -tlnp 2>/dev/null | awk 'NR==1 || /LISTEN/ {print}' | grep -v '127.0.0.1:' | grep -v '\[::1\]:' | head -15
echo "=== uid 0 accounts ==="
awk -F: '$3==0 {print $1}' /etc/passwd | tr '\n' ' '
echo
echo "=== tailscale ==="
command -v tailscale >/dev/null 2>&1 && tailscale status --self 2>/dev/null | head -1 || echo "no tailscale"

View File

@ -1,39 +0,0 @@
#!/usr/bin/env bash
# Audit LXCs on a Proxmox node via pct exec (run ON the PVE host as root).
set -u
AUDIT='#!/bin/bash
echo "=== identity ==="
hostname -f 2>/dev/null || hostname
[ -f /etc/os-release ] && . /etc/os-release && echo "os=${PRETTY_NAME:-unknown}"
echo "ip=$(hostname -I 2>/dev/null | awk "{print \$1}")"
echo "=== sshd (effective) ==="
if command -v sshd >/dev/null 2>&1; then
sshd -T 2>/dev/null | grep -E "^(permitrootlogin|passwordauthentication|pubkeyauthentication|permitemptypasswords|port) " || true
else
grep -E "^(PermitRootLogin|PasswordAuthentication|PubkeyAuthentication|Port) " /etc/ssh/sshd_config 2>/dev/null | grep -v "^#" || echo "sshd not installed"
fi
echo "=== firewall ==="
ufw status 2>/dev/null | head -3 || echo "no ufw"
echo "=== fail2ban ==="
systemctl is-active fail2ban 2>/dev/null || echo "inactive/missing"
echo "=== pending upgrades ==="
apt-get -s upgrade 2>/dev/null | grep -c "^Inst" || echo 0
echo "=== public listeners ==="
ss -tlnp 2>/dev/null | grep LISTEN | grep -v "127.0.0.1:" | grep -v "\[::1\]:" | head -12
'
echo "PVE_NODE=$(hostname -f 2>/dev/null || hostname)"
echo "PVE_IP=$(hostname -I | awk '{print $1}')"
for id in $(pct list 2>/dev/null | awk 'NR>1 {print $1}'); do
name=$(pct list | awk -v id="$id" '$1==id {print $4}')
status=$(pct list | awk -v id="$id" '$1==id {print $2}')
echo ""
echo "######## LXC vmid=$id name=$name status=$status ########"
if [ "$status" != "running" ]; then
echo "SKIP: not running"
continue
fi
pct exec "$id" -- bash -c "$AUDIT" 2>&1 || echo "ERROR: pct exec failed"
done

View File

@ -1,48 +0,0 @@
#!/usr/bin/env bash
# Quick read-only security snapshot (run on target host).
set -euo pipefail
echo "=== identity ==="
hostname -f 2>/dev/null || hostname
if [ -f /etc/os-release ]; then . /etc/os-release; echo "os=${PRETTY_NAME:-unknown}"; fi
echo "kernel=$(uname -r)"
echo "uptime=$(uptime -p 2>/dev/null || uptime)"
echo "=== sshd (effective) ==="
if command -v sshd >/dev/null 2>&1; then
sshd -T 2>/dev/null | grep -E '^(permitrootlogin|passwordauthentication|pubkeyauthentication|permitemptypasswords|port|x11forwarding|allowtcpforwarding) ' || true
else
grep -E '^(PermitRootLogin|PasswordAuthentication|PubkeyAuthentication|Port) ' /etc/ssh/sshd_config 2>/dev/null | grep -v '^#' || echo "sshd not found"
fi
echo "=== firewall ==="
if command -v ufw >/dev/null 2>&1; then
ufw status verbose 2>/dev/null | head -8
elif command -v firewall-cmd >/dev/null 2>&1; then
firewall-cmd --state 2>/dev/null || true
else
echo "no ufw/firewalld"
fi
echo "=== fail2ban ==="
systemctl is-active fail2ban 2>/dev/null || echo "fail2ban: inactive or missing"
echo "=== unattended-upgrades ==="
systemctl is-active unattended-upgrades 2>/dev/null || echo "unattended-upgrades: inactive or missing"
echo "=== pending apt upgrades ==="
if command -v apt >/dev/null 2>&1; then
apt-get -s upgrade 2>/dev/null | grep -c '^Inst' || echo 0
else
echo "n/a"
fi
echo "=== listening tcp (public) ==="
ss -tlnp 2>/dev/null | awk 'NR==1 || /LISTEN/ {print}' | grep -v '127.0.0.1:' | grep -v '\[::1\]:' | head -20
echo "=== uid 0 accounts ==="
awk -F: '$3==0 {print $1}' /etc/passwd | tr '\n' ' '
echo
echo "=== last logins (top 5) ==="
last -n 5 2>/dev/null | head -5 || true

View File

@ -1,27 +0,0 @@
#!/usr/bin/env bash
# SSH-focused audit (hypervisor or guest).
set -u
echo "=== host ==="
hostname -f 2>/dev/null || hostname
echo "=== sshd effective config ==="
if command -v sshd >/dev/null 2>&1; then
sshd -T 2>/dev/null | grep -E '^(port|permitrootlogin|passwordauthentication|pubkeyauthentication|permitemptypasswords|maxauthtries|x11forwarding|allowtcpforwarding|gatewayports|permittunnel|usepam|kbdinteractiveauthentication) ' || true
else
echo "sshd binary missing"
fi
echo "=== sshd_config (non-comment) ==="
grep -E '^(Port|PermitRootLogin|PasswordAuthentication|PubkeyAuthentication|PermitEmptyPasswords|MaxAuthTries|AllowUsers|AllowGroups|X11Forwarding) ' /etc/ssh/sshd_config 2>/dev/null || true
echo "=== authorized_keys (root) ==="
if [ -f /root/.ssh/authorized_keys ]; then
wc -l /root/.ssh/authorized_keys
awk '{print $NF}' /root/.ssh/authorized_keys 2>/dev/null | sed 's/^/ key: /'
else
echo "no /root/.ssh/authorized_keys"
fi
echo "=== recent ssh auth failures (today) ==="
journalctl -u ssh -u sshd --since today 2>/dev/null | grep -iE 'Failed|Invalid|refused' | tail -5 || grep -iE 'Failed|Invalid' /var/log/auth.log 2>/dev/null | tail -5 || echo "no logs"

View File

@ -1,83 +0,0 @@
#!/usr/bin/env bash
# Write Ansible vault secrets into .env (for local scripts / reference).
# Does not print secret values. Does not overwrite non-empty .env keys.
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
ENV_FILE="${1:-${REPO_ROOT}/.env}"
VAULT_FILE="${REPO_ROOT}/inventories/production/group_vars/all/vault.yml"
VAULT_PASS="${HOME}/.ansible-vault-pass"
ANSIBLE_VAULT="${REPO_ROOT}/.venv/bin/ansible-vault"
[[ -f "${VAULT_PASS}" ]] || { echo "Missing ${VAULT_PASS}" >&2; exit 1; }
"${REPO_ROOT}/.venv/bin/python3" - "${ENV_FILE}" "${VAULT_FILE}" "${VAULT_PASS}" "${ANSIBLE_VAULT}" <<'PY'
import subprocess, sys, yaml
from pathlib import Path
env_file, vault_file, vault_pass, ansible_vault = sys.argv[1:5]
# vault key -> .env key
MAP = {
"vault_mailcow_api_key": "MAILCOW_API_KEY",
"vault_alerts_mailbox_password": "ALERTS_PASSWORD",
"vault_uptime_kuma_password": "KUMA_PASSWORD",
"vault_uptime_kuma_user": "KUMA_USER",
"vault_uptime_kuma_url": "KUMA_URL",
"vault_umami_admin_password": "UMAMI_ADMIN_PASSWORD",
"vault_umami_db_password": "UMAMI_DB_PASS",
"vault_umami_app_secret": "UMAMI_APP_SECRET",
"vault_kuma_smtp_host": "SMTP_HOST",
"vault_kuma_smtp_port": "SMTP_PORT",
"vault_kuma_smtp_user": "SMTP_USER",
"vault_kuma_smtp_password": "SMTP_PASS",
"vault_kuma_smtp_to": "SMTP_TO",
"vault_mattermost_url": "MATTERMOST_URL",
"vault_mattermost_token": "MATTERMOST_TOKEN",
"vault_mattermost_allowed_users": "MATTERMOST_ALLOWED_USERS",
"vault_unifi_api_key": "UNIFI_API_KEY",
"vault_unifi_url": "UNIFI_URL",
}
def parse_env(text):
d = {}
for line in text.splitlines():
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
k, _, v = line.partition("=")
d[k.strip()] = v.strip().strip("'").strip('"')
return d
text = subprocess.check_output(
[ansible_vault, "view", vault_file, "--vault-password-file", vault_pass],
text=True,
)
data = yaml.safe_load(text) or {}
existing = parse_env(Path(env_file).read_text()) if Path(env_file).exists() else {}
merged = dict(existing)
for vk, ek in MAP.items():
val = data.get(vk)
if val is None or val == "":
continue
if merged.get(ek):
continue
merged[ek] = str(val)
pw = data.get("vault_mailcow_mailbox_passwords") or {}
if pw.get("alerts") and not merged.get("ALERTS_PASSWORD"):
merged["ALERTS_PASSWORD"] = str(pw["alerts"])
header = """# Merged from Ansible vault (make vault-export-env). Fill gaps manually.
# vault → .env: make vault-export-env
# .env → vault: make vault-import-env
# hosts → .env → vault: make vault-pull-infra-secrets
"""
body = "\n".join(f"{k}={v}" for k, v in sorted(merged.items())) + "\n"
Path(env_file).write_text(header + body)
print(f"Wrote {len(merged)} keys to {env_file} (existing non-empty keys kept)")
PY
chmod 600 "${ENV_FILE}" 2>/dev/null || true

View File

@ -1,98 +0,0 @@
#!/usr/bin/env bash
# Merge .env into inventories/production/group_vars/all/vault.yml
# Usage: make vault-import-env [ENV_FILE=.env]
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
ENV_FILE="${1:-${ENV_FILE:-${REPO_ROOT}/.env}}"
VAULT_FILE="${REPO_ROOT}/inventories/production/group_vars/all/vault.yml"
VAULT_PASS="${HOME}/.ansible-vault-pass"
ANSIBLE_VAULT="${REPO_ROOT}/.venv/bin/ansible-vault"
[[ -f "${ENV_FILE}" ]] || { echo "No env file: ${ENV_FILE}" >&2; exit 1; }
[[ -f "${VAULT_PASS}" ]] || { echo "Missing ${VAULT_PASS}" >&2; exit 1; }
"${REPO_ROOT}/.venv/bin/python3" - "${ENV_FILE}" "${VAULT_FILE}" "${VAULT_PASS}" "${ANSIBLE_VAULT}" <<'PY'
import os, re, subprocess, sys, tempfile, yaml
env_file, vault_file, vault_pass, ansible_vault = sys.argv[1:5]
def load_env(path):
out = {}
with open(path) as f:
for line in f:
line = line.strip()
if not line or line.startswith("#"):
continue
if line.startswith("export "):
line = line[7:].strip()
if "=" not in line:
continue
k, _, v = line.partition("=")
v = v.strip().strip("'").strip('"')
if v:
out[k.strip()] = v
return out
# .env key -> vault key (or vault_mailcow_mailbox_passwords.<name>)
MAP = {
"MAILCOW_API_KEY": "vault_mailcow_api_key",
"ALERTS_PASSWORD": ("vault_alerts_mailbox_password", "alerts"),
"KUMA_PASSWORD": "vault_uptime_kuma_password",
"KUMA_USER": "vault_uptime_kuma_user",
"KUMA_URL": "vault_uptime_kuma_url",
"UMAMI_ADMIN_PASSWORD": "vault_umami_admin_password",
"UMAMI_DB_PASS": "vault_umami_db_password",
"UMAMI_APP_SECRET": "vault_umami_app_secret",
"SMTP_HOST": "vault_kuma_smtp_host",
"SMTP_PORT": "vault_kuma_smtp_port",
"SMTP_USER": "vault_kuma_smtp_user",
"SMTP_PASS": "vault_kuma_smtp_password",
"SMTP_TO": "vault_kuma_smtp_to",
"MATTERMOST_URL": "vault_mattermost_url",
"MATTERMOST_TOKEN": "vault_mattermost_token",
"MATTERMOST_ALLOWED_USERS": "vault_mattermost_allowed_users",
"UNIFI_API_KEY": "vault_unifi_api_key",
"UNIFI_URL": "vault_unifi_url",
"PROXMOX_PASSWORD": "vault_proxmox_password",
"LXC_ROOT_PASSWORD": "vault_lxc_root_password",
}
env = load_env(env_file)
text = subprocess.check_output(
[ansible_vault, "view", vault_file, "--vault-password-file", vault_pass],
text=True,
)
data = yaml.safe_load(text) or {}
passwords = dict(data.get("vault_mailcow_mailbox_passwords") or {})
for k, v in env.items():
m = re.match(r"^MAILBOX_(.+)_PASSWORD$", k, re.I)
if m:
passwords[m.group(1).lower()] = v
continue
target = MAP.get(k)
if not target:
continue
if isinstance(target, tuple):
data[target[0]] = v
passwords[target[1]] = v
else:
data[target] = v
if passwords:
data["vault_mailcow_mailbox_passwords"] = passwords
fd, tmp = tempfile.mkstemp(suffix=".yml")
os.close(fd)
with open(tmp, "w") as f:
yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
subprocess.run(
[ansible_vault, "encrypt", tmp, "--output", vault_file,
"--vault-password-file", vault_pass, "--encrypt-vault-id", "default"],
check=True,
)
os.remove(tmp)
print(f"Updated {vault_file} from {env_file} ({len(env)} values)")
PY

View File

@ -1,70 +0,0 @@
#!/usr/bin/env bash
# Pull secrets from live hosts into .env, then vault-import-env.
# Does not print secret values.
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
ENV_FILE="${REPO_ROOT}/.env"
python3 - "${ENV_FILE}" <<'PY'
import subprocess, sys
from pathlib import Path
out = Path(sys.argv[1])
lines = []
def sh(cmd):
return subprocess.check_output(cmd, shell=True, text=True).strip()
def parse_env(text):
d = {}
for line in text.splitlines():
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
k, _, v = line.partition("=")
d[k.strip()] = v.strip().strip("'").strip('"')
return d
# monitoring LXC
try:
raw = sh("ssh -o BatchMode=yes -o ConnectTimeout=8 root@10.0.10.22 'cat /opt/monitoring/.env 2>/dev/null'")
m = parse_env(raw)
if m.get("UMAMI_DB_PASS"):
lines.append(f"UMAMI_DB_PASS={m['UMAMI_DB_PASS']}")
if m.get("UMAMI_APP_SECRET"):
lines.append(f"UMAMI_APP_SECRET={m['UMAMI_APP_SECRET']}")
except Exception as e:
print(f"# skip monitoring: {e}", file=sys.stderr)
# hermes mattermost
try:
raw = sh("ssh -o BatchMode=yes -o ConnectTimeout=8 ladmin@10.0.10.36 \"sudo cat /home/hermes/.hermes/secrets/mattermost.env 2>/dev/null\"")
h = parse_env(raw)
for k in ("MATTERMOST_URL", "MATTERMOST_TOKEN", "MATTERMOST_ALLOWED_USERS"):
if h.get(k):
lines.append(f"{k}={h[k]}")
except Exception as e:
print(f"# skip hermes: {e}", file=sys.stderr)
# merge with existing .env (preserve user-filled keys)
existing = {}
if out.exists():
existing = parse_env(out.read_text())
merged = {**existing}
for line in lines:
k, _, v = line.partition("=")
merged[k] = v
header = """# Auto-merged by scripts/vault-pull-infra-secrets.sh + your edits
# Run: make vault-import-env
"""
body = "\n".join(f"{k}={v}" for k, v in sorted(merged.items())) + "\n"
out.write_text(header + body)
print(f"Wrote {len(merged)} keys to {out}")
PY
chmod 600 "${ENV_FILE}" 2>/dev/null || true
"${REPO_ROOT}/scripts/vault-import-env.sh" "${ENV_FILE}"