Compare commits

..

No commits in common. "master" and "fix/inventory-host-ips@skipci" have entirely different histories.

81 changed files with 313 additions and 4879 deletions

View File

@ -1,37 +0,0 @@
# Copy to .env (gitignored): cp .env.example .env
#
# vault → .env: make vault-export-env
# .env → vault: make vault-import-env
# hosts → vault: make vault-pull-infra-secrets (SSH to monitoring/hermes, then import)
#
# Prefer vault for long-term storage; delete .env after export if you want.
# Mailcow (make mailcow-mailbox MAILBOX=alerts)
MAILCOW_API_KEY=
ALERTS_PASSWORD=
# Uptime Kuma @ 10.0.10.22:3001 (scripts/kuma-setup-smtp.sh)
KUMA_URL=http://10.0.10.22:3001
KUMA_USER=admin
KUMA_PASSWORD=
# Kuma SMTP notification (after alerts@ mailbox exists)
SMTP_HOST=mail.levkine.ca
SMTP_PORT=587
SMTP_USER=alerts@levkine.ca
SMTP_PASS=
SMTP_TO=idobkin@gmail.com
# Umami @ 10.0.10.22:3000 (admin UI password; DB pass is on LXC only)
UMAMI_ADMIN_PASSWORD=
# Hermes Mattermost (not Telegram)
MATTERMOST_URL=
MATTERMOST_TOKEN=
MATTERMOST_ALLOWED_USERS=
# Optional: same password on Proxmox / LXCs / caddy root (if you use one shared admin password)
# PROXMOX_PASSWORD=
# LXC_ROOT_PASSWORD=
# Per-mailbox: MAILBOX_notify_PASSWORD=

View File

@ -65,7 +65,7 @@ jobs:
runs-on: ubuntu-latest
if: needs.skip-ci-check.outputs.should-skip != '1' && (github.event_name == 'pull_request' || github.ref == 'refs/heads/master')
container:
image: node:20-bookworm
image: node:20-bullseye
steps:
- name: Check out code
uses: actions/checkout@v4
@ -84,26 +84,12 @@ jobs:
needs: skip-ci-check
runs-on: ubuntu-latest
if: needs.skip-ci-check.outputs.should-skip != '1' && (github.event_name == 'pull_request' || github.ref == 'refs/heads/master')
env:
PIP_NO_CACHE_DIR: "1"
PIP_BREAK_SYSTEM_PACKAGES: "1"
container:
image: node:20-bookworm
image: node:20-bullseye
steps:
- name: Check out code
uses: actions/checkout@v4
- name: Bootstrap pip (PEP 668 / bookworm)
run: |
python3 --version
if ! python3 -m pip --version >/dev/null 2>&1; then
curl -fsSL https://bootstrap.pypa.io/get-pip.py -o /tmp/get-pip.py
python3 /tmp/get-pip.py --disable-pip-version-check --break-system-packages
fi
- name: Show disk space (runner may be full)
run: df -h / /tmp || true
- name: Configure CI Ansible (no vault, localhost inventory)
run: |
set -e
@ -112,13 +98,12 @@ jobs:
localhost ansible_connection=local
EOF
cat > /tmp/ci-ansible.cfg <<EOF
cat > /tmp/ci-ansible.cfg <<'EOF'
[defaults]
inventory = /tmp/ci-inventory.ini
roles_path = ${GITHUB_WORKSPACE}/roles
roles_path = /workspace/ilia/ansible/roles
host_key_checking = False
stdout_callback = default
callback_result_format = yaml
stdout_callback = yaml
bin_ansible_callbacks = True
retry_files_enabled = False
interpreter_python = auto_silent
@ -130,29 +115,18 @@ jobs:
echo "ANSIBLE_INVENTORY=/tmp/ci-inventory.ini" >> "$GITHUB_ENV"
- name: Install Ansible and linting tools
run: pip3 install --no-cache-dir ansible ansible-lint yamllint pyyaml
- name: Install Ansible collections
run: |
python3 -m pip install --no-cache-dir ansible-core ansible-lint yamllint pyyaml
ansible-galaxy collection install -r collections/requirements.yml
rm -rf /root/.cache/pip /tmp/pip-* 2>/dev/null || true
- name: Validate YAML syntax
run: |
echo "Checking YAML syntax..."
find . \( -name "*.yml" -o -name "*.yaml" \) \
! -path "./.git/*" \
! -path "./node_modules/*" \
! -path "./.venv/*" \
! -name "vault.yml" \
! -name "vault.yaml" \
! -name "vault_*.yml" \
! -name "vault_*.yaml" \
| while read -r file; do
if head -n 5 "$file" | grep -q '^\$ANSIBLE_VAULT'; then
echo "Skipping encrypted vault file: $file"
continue
fi
python3 -c "import yaml; yaml.safe_load(open('$file'))" || exit 1
done
find . -name "*.yml" -o -name "*.yaml" | grep -v ".git" | while read file; do
python3 -c "import yaml; yaml.safe_load(open('$file'))" || exit 1
done
- name: Run ansible-lint
run: ansible-lint
@ -162,7 +136,7 @@ jobs:
if: needs.skip-ci-check.outputs.should-skip != '1'
runs-on: ubuntu-latest
container:
image: node:20-bookworm
image: node:20-bullseye
steps:
- name: Check out code
uses: actions/checkout@v4
@ -180,11 +154,8 @@ jobs:
needs: skip-ci-check
if: needs.skip-ci-check.outputs.should-skip != '1'
runs-on: ubuntu-latest
env:
PIP_NO_CACHE_DIR: "1"
PIP_BREAK_SYSTEM_PACKAGES: "1"
container:
image: node:20-bookworm
image: node:20-bullseye
steps:
- name: Check out code
uses: actions/checkout@v4
@ -202,12 +173,8 @@ jobs:
- name: Scan Python dependencies
run: |
if [ -f requirements.txt ]; then
if ! python3 -m pip --version >/dev/null 2>&1; then
curl -fsSL https://bootstrap.pypa.io/get-pip.py -o /tmp/get-pip.py
python3 /tmp/get-pip.py --disable-pip-version-check --break-system-packages
fi
python3 -m pip install --no-cache-dir pip-audit
python3 -m pip-audit -r requirements.txt
pip3 install --no-cache-dir pip-audit
pip-audit -r requirements.txt
else
echo "No requirements.txt, skipping pip-audit"
fi
@ -217,25 +184,14 @@ jobs:
needs: skip-ci-check
if: needs.skip-ci-check.outputs.should-skip != '1'
runs-on: ubuntu-latest
env:
PIP_NO_CACHE_DIR: "1"
PIP_BREAK_SYSTEM_PACKAGES: "1"
container:
image: node:20-bookworm
image: node:20-bullseye
steps:
- name: Check out code
uses: actions/checkout@v4
- name: Bootstrap pip (PEP 668 / bookworm)
run: |
python3 --version
if ! python3 -m pip --version >/dev/null 2>&1; then
curl -fsSL https://bootstrap.pypa.io/get-pip.py -o /tmp/get-pip.py
python3 /tmp/get-pip.py --disable-pip-version-check --break-system-packages
fi
- name: Install Semgrep
run: python3 -m pip install --no-cache-dir semgrep
run: pip3 install --no-cache-dir semgrep
- name: Run Semgrep scan
run: semgrep --config=auto --error
@ -246,7 +202,7 @@ jobs:
if: needs.skip-ci-check.outputs.should-skip != '1'
runs-on: ubuntu-latest
container:
image: node:20-bookworm
image: node:20-bullseye
steps:
- name: Check out code
uses: actions/checkout@v4
@ -268,24 +224,14 @@ jobs:
needs: skip-ci-check
if: needs.skip-ci-check.outputs.should-skip != '1'
runs-on: ubuntu-latest
env:
PIP_NO_CACHE_DIR: "1"
PIP_BREAK_SYSTEM_PACKAGES: "1"
container:
image: node:20-bookworm
image: node:20-bullseye
steps:
- name: Check out code
uses: actions/checkout@v4
- name: Bootstrap pip (PEP 668 / bookworm)
run: |
if ! python3 -m pip --version >/dev/null 2>&1; then
curl -fsSL https://bootstrap.pypa.io/get-pip.py -o /tmp/get-pip.py
python3 /tmp/get-pip.py --disable-pip-version-check --break-system-packages
fi
- name: Install Ansible
run: python3 -m pip install --no-cache-dir ansible-core
run: pip3 install --no-cache-dir ansible
- name: Validate vault files are encrypted
run: |
@ -322,22 +268,12 @@ jobs:
needs: skip-ci-check
if: needs.skip-ci-check.outputs.should-skip != '1'
runs-on: ubuntu-latest
env:
PIP_NO_CACHE_DIR: "1"
PIP_BREAK_SYSTEM_PACKAGES: "1"
container:
image: node:20-bookworm
image: node:20-bullseye
steps:
- name: Check out code
uses: actions/checkout@v4
- name: Bootstrap pip (PEP 668 / bookworm)
run: |
if ! python3 -m pip --version >/dev/null 2>&1; then
curl -fsSL https://bootstrap.pypa.io/get-pip.py -o /tmp/get-pip.py
python3 /tmp/get-pip.py --disable-pip-version-check --break-system-packages
fi
- name: Configure CI Ansible (no vault, localhost inventory)
run: |
set -e
@ -362,27 +298,14 @@ jobs:
[local]
localhost ansible_connection=local
[sites]
localhost ansible_connection=local
[comms]
localhost ansible_connection=local
[proxmox]
localhost ansible_connection=local
[caddy]
localhost ansible_connection=local
EOF
cat > /tmp/ci-ansible.cfg <<EOF
cat > /tmp/ci-ansible.cfg <<'EOF'
[defaults]
inventory = /tmp/ci-inventory.ini
roles_path = ${GITHUB_WORKSPACE}/roles
roles_path = /workspace/ilia/ansible/roles
host_key_checking = False
stdout_callback = default
callback_result_format = yaml
stdout_callback = yaml
bin_ansible_callbacks = True
retry_files_enabled = False
interpreter_python = auto_silent
@ -394,10 +317,11 @@ jobs:
echo "ANSIBLE_INVENTORY=/tmp/ci-inventory.ini" >> "$GITHUB_ENV"
- name: Install Ansible
run: pip3 install --no-cache-dir ansible
- name: Install Ansible collections
run: |
python3 -m pip install --no-cache-dir ansible-core
ansible-galaxy collection install -r collections/requirements.yml
rm -rf /root/.cache/pip /tmp/pip-* 2>/dev/null || true
- name: Validate playbooks (CI inventory, no vault)
run: |
@ -428,13 +352,12 @@ jobs:
if: needs.skip-ci-check.outputs.should-skip != '1'
runs-on: ubuntu-latest
container:
image: node:20-bookworm
image: node:20-bullseye
steps:
- name: Check out code
uses: actions/checkout@v4
- name: Install Trivy
continue-on-error: true
run: |
set -e
# Use a fixed, known-good Trivy version to avoid URL/redirect issues
@ -492,13 +415,18 @@ jobs:
needs: skip-ci-check
if: needs.skip-ci-check.outputs.should-skip != '1' && (github.event_name == 'pull_request' || github.ref == 'refs/heads/master')
runs-on: ubuntu-latest
continue-on-error: true
container:
image: sonarsource/sonar-scanner-cli:latest
image: sonarsource/sonar-scanner-cli:5.0.1.3006
env:
SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }}
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
steps:
- name: Install Node.js for checkout action
run: apk add --no-cache nodejs npm curl
- name: Check out code
uses: actions/checkout@v4
- name: Verify SonarQube connection
run: |
echo "Checking SonarQube connectivity..."

11
.gitignore vendored
View File

@ -5,7 +5,6 @@
# Temporary files
*.tmp
*.bak
*.vault-bak
*~
vault.yml.bak.*
@ -18,9 +17,6 @@ id_rsa
id_ed25519
id_ecdsa
# Python venv (make bootstrap)
.venv/
# Python bytecode
__pycache__/
*.py[cod]
@ -38,11 +34,4 @@ Thumbs.db
.ansible/facts/
# Local data exports (Nextcloud, etc.)
exports/
# Local secrets (Mailcow API, Kuma passwords) — never commit
.env
.env.local
node_modules/

View File

@ -1,7 +1,7 @@
{
"default": true,
"MD013": {
"line_length": 400,
"line_length": 160,
"code_blocks": false,
"tables": false
},
@ -13,8 +13,6 @@
"MD034": false,
"MD040": false,
"MD047": false,
"MD058": false,
"MD060": false,
"MD036": false
"MD058": false
}

237
Makefile
View File

@ -1,4 +1,4 @@
.PHONY: help bootstrap lint test check dev datascience inventory inventory-all local servers workstations clean status tailscale tailscale-check tailscale-dev tailscale-status create-vault create-vm monitoring copy-ssh-key copy-ssh-keys copy-ssh-keys-ansible copy-ssh-key-mailcow bootstrap-root-ssh bootstrap-root-ssh-services bootstrap-root-ssh-failed mailcow-mailbox mailcow-create-alerts vault-import-env
.PHONY: help bootstrap lint test check dev datascience inventory inventory-all local servers workstations clean status tailscale tailscale-check tailscale-dev tailscale-status create-vault create-vm monitoring
.DEFAULT_GOAL := help
## Colors for output
@ -28,27 +28,13 @@ PYTHON_REQ := requirements.txt
INVENTORY := inventories/production
INVENTORY_HOSTS := $(INVENTORY)/hosts
# Python venv (created by `make bootstrap`)
VENV := .venv
ifneq ($(wildcard $(VENV)/bin/ansible-playbook),)
export PATH := $(abspath $(VENV)/bin):$(PATH)
ANSIBLE_VAULT := $(abspath $(VENV))/bin/ansible-vault
else
ANSIBLE_VAULT := ansible-vault
endif
# Common ansible-playbook command with options
ANSIBLE_PLAYBOOK := ansible-playbook -i $(INVENTORY)
ANSIBLE_ARGS := --vault-password-file ~/.ansible-vault-pass
# Note: sudo passwords are in vault files as ansible_become_password
## Auto-detect current host to exclude from remote operations
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Darwin)
CURRENT_IP := $(shell ipconfig getifaddr en0 2>/dev/null || ipconfig getifaddr en1 2>/dev/null || echo "")
else
CURRENT_IP := $(shell hostname -I 2>/dev/null | awk '{print $$1}')
endif
CURRENT_IP := $(shell hostname -I | awk '{print $$1}')
# NOTE: inventory parsing may require vault secrets. Keep this best-effort and silent in CI.
CURRENT_HOST := $(shell ansible-inventory --list --vault-password-file ~/.ansible-vault-pass 2>/dev/null | jq -r '._meta.hostvars | to_entries[] | select(.value.ansible_host == "$(CURRENT_IP)") | .key' 2>/dev/null | head -1)
EXCLUDE_CURRENT := $(if $(CURRENT_HOST),--limit '!$(CURRENT_HOST)',)
@ -73,36 +59,37 @@ help: ## Show this help message
@echo " make maintenance-verbose GROUP=dev # Verbose maintenance on dev group"
@echo ""
require-ansible: ## Verify ansible is available (run make bootstrap if missing)
@command -v ansible-playbook >/dev/null 2>&1 && command -v ansible-vault >/dev/null 2>&1 || { \
echo "$(RED)ansible-playbook/ansible-vault not found$(RESET)"; \
echo "Run: $(BLUE)make bootstrap$(RESET)"; \
exit 1; \
}
bootstrap: ## Install all project dependencies from requirements files
@echo "$(BOLD)Installing Project Dependencies$(RESET)"
@echo ""
@echo "$(YELLOW)Python venv ($(VENV))/$(PYTHON_REQ):$(RESET)"
@if [ ! -f "$(PYTHON_REQ)" ]; then \
@echo "$(YELLOW)Python Requirements ($(PYTHON_REQ)):$(RESET)"
@if [ -f "$(PYTHON_REQ)" ]; then \
if command -v pipx >/dev/null 2>&1; then \
printf " %-30s " "Installing with pipx"; \
if pipx install -r $(PYTHON_REQ) >/dev/null 2>&1; then \
echo "$(GREEN)✓ Installed$(RESET)"; \
else \
echo "$(YELLOW)⚠ Some packages may have failed$(RESET)"; \
fi; \
elif command -v pip3 >/dev/null 2>&1; then \
printf " %-30s " "Installing with pip3 --user"; \
if pip3 install --user -r $(PYTHON_REQ) >/dev/null 2>&1; then \
echo "$(GREEN)✓ Installed$(RESET)"; \
else \
printf " %-30s " "Trying with --break-system-packages"; \
if pip3 install --break-system-packages -r $(PYTHON_REQ) >/dev/null 2>&1; then \
echo "$(GREEN)✓ Installed$(RESET)"; \
else \
echo "$(RED)✗ Failed$(RESET)"; \
fi; \
fi; \
else \
printf " %-30s " "Python packages"; \
echo "$(YELLOW)⚠ Skipped (pip3/pipx not found)$(RESET)"; \
fi; \
else \
printf " %-30s " "$(PYTHON_REQ)"; \
echo "$(RED)✗ File not found$(RESET)"; \
elif ! command -v python3 >/dev/null 2>&1; then \
printf " %-30s " "Python venv"; \
echo "$(RED)✗ python3 not found$(RESET)"; \
else \
if [ ! -d "$(VENV)" ]; then \
printf " %-30s " "Creating venv"; \
python3 -m venv "$(VENV)" && echo "$(GREEN)✓ Created$(RESET)" || { echo "$(RED)✗ Failed$(RESET)"; exit 1; }; \
fi; \
printf " %-30s " "Installing packages"; \
if "$(VENV)/bin/pip" install -r "$(PYTHON_REQ)" >/dev/null 2>&1; then \
echo "$(GREEN)✓ Installed$(RESET)"; \
echo " $(BLUE)Ansible:$(RESET) $(abspath $(VENV))/bin/ansible-playbook"; \
else \
echo "$(RED)✗ Failed$(RESET)"; \
exit 1; \
fi; \
fi
@echo ""
@echo "$(YELLOW)Node.js Dependencies (package.json):$(RESET)"
@ -120,9 +107,7 @@ bootstrap: ## Install all project dependencies from requirements files
@echo ""
@echo "$(YELLOW)Ansible Collections ($(COLLECTIONS_REQ)):$(RESET)"
@if [ -f "$(COLLECTIONS_REQ)" ]; then \
GALAXY="$$(command -v ansible-galaxy)"; \
[ -x "$(VENV)/bin/ansible-galaxy" ] && GALAXY="$(abspath $(VENV))/bin/ansible-galaxy"; \
"$$GALAXY" collection install -r $(COLLECTIONS_REQ) 2>&1 | grep -E "(Installing|Skipping|ERROR)" | while read line; do \
ansible-galaxy collection install -r $(COLLECTIONS_REQ) 2>&1 | grep -E "(Installing|Skipping|ERROR)" | while read line; do \
if echo "$$line" | grep -q "Installing"; then \
collection=$$(echo "$$line" | awk '{print $$2}' | sed 's/:.*//'); \
printf " $(GREEN)✓ %-30s$(RESET) Installed\n" "$$collection"; \
@ -132,7 +117,7 @@ bootstrap: ## Install all project dependencies from requirements files
elif echo "$$line" | grep -q "ERROR"; then \
printf " $(RED)✗ Error: $$line$(RESET)\n"; \
fi; \
done || "$$GALAXY" collection install -r $(COLLECTIONS_REQ); \
done || ansible-galaxy collection install -r $(COLLECTIONS_REQ); \
else \
printf " %-30s " "$(COLLECTIONS_REQ)"; \
echo "$(RED)✗ File not found$(RESET)"; \
@ -280,22 +265,6 @@ servers: ## Run baseline server playbook (usage: make servers [GROUP=services] [
$(ANSIBLE_PLAYBOOK) $(PLAYBOOK_SERVERS); \
fi
caddy-auth: require-ansible ## Ensure auth.levkin.ca reverse proxy on Caddy VM
@echo "$(YELLOW)Updating Caddy for Authentik...$(RESET)"
$(ANSIBLE_PLAYBOOK) playbooks/caddy-auth-authentik.yml $(ANSIBLE_ARGS)
caddy-levkin: require-ansible ## Ensure levkin.ca reverse proxy on Caddy VM
@echo "$(YELLOW)Updating Caddy for levkin.ca...$(RESET)"
$(ANSIBLE_PLAYBOOK) playbooks/caddy-levkin-site.yml $(ANSIBLE_ARGS)
cal-oidc: require-ansible ## Cal.com SAML DB + Authentik OIDC provider (usage: make cal-oidc)
@echo "$(YELLOW)Configuring Cal.com ↔ Authentik OIDC...$(RESET)"
$(ANSIBLE_PLAYBOOK) playbooks/cal-authentik-oidc.yml $(ANSIBLE_ARGS)
cal-oidc-check: require-ansible ## Dry-run Cal.com ↔ Authentik OIDC
@echo "$(YELLOW)Checking Cal.com ↔ Authentik OIDC...$(RESET)"
$(ANSIBLE_PLAYBOOK) playbooks/cal-authentik-oidc.yml --check --diff $(ANSIBLE_ARGS)
workstations: ## Run workstation baseline (usage: make workstations [GROUP=dev] [HOST=dev01])
@echo "$(YELLOW)Applying workstation baseline...$(RESET)"
@EXTRA=""; \
@ -457,7 +426,7 @@ apps: ## Install applications only
$(ANSIBLE_PLAYBOOK) $(PLAYBOOK_WORKSTATIONS) --tags apps
# Connectivity targets
ping: require-ansible auto-fallback ## Ping hosts with colored output (usage: make ping [GROUP=dev] [HOST=dev01])
ping: auto-fallback ## Ping hosts with colored output (usage: make ping [GROUP=dev] [HOST=dev01])
ifdef HOST
@echo "$(YELLOW)Pinging host: $(HOST)$(RESET)"
@ansible $(HOST) -m ping --one-line | while read line; do \
@ -574,25 +543,16 @@ tailscale-status: ## Check Tailscale status on all machines
done
# Vault management
edit-vault: require-ansible ## Edit encrypted host vars (usage: make edit-vault HOST=KrakenMint)
edit-vault: ## Edit encrypted host vars (usage: make edit-vault HOST=dev01)
ifndef HOST
@echo "$(RED)Error: HOST parameter required$(RESET)"
@echo "Usage: make edit-vault HOST=KrakenMint"
@echo "Usage: make edit-vault HOST=dev01"
@exit 1
endif
@vault_file="$(INVENTORY)/host_vars/$(HOST)/vault.yml"; \
if [ ! -f "$$vault_file" ]; then vault_file="$(INVENTORY)/host_vars/$(HOST).yml"; fi; \
if [ ! -f "$$vault_file" ]; then \
echo "$(RED)No vault file for $(HOST):$(RESET)"; \
echo " $(INVENTORY)/host_vars/$(HOST)/vault.yml"; \
echo " $(INVENTORY)/host_vars/$(HOST).yml"; \
exit 1; \
fi; \
echo "$(BLUE)Editing $$vault_file$(RESET)"; \
$(ANSIBLE_VAULT) edit "$$vault_file"
ansible-vault edit host_vars/$(HOST).yml
edit-group-vault: require-ansible ## Edit encrypted group vars (usage: make edit-group-vault)
$(ANSIBLE_VAULT) edit $(INVENTORY)/group_vars/all/vault.yml
edit-group-vault: ## Edit encrypted group vars (usage: make edit-group-vault)
ansible-vault edit inventories/production/group_vars/all/vault.yml
copy-ssh-key: ## Copy SSH key to specific host (usage: make copy-ssh-key HOST=giteaVM)
@ -602,132 +562,19 @@ ifndef HOST
@exit 1
endif
@echo "$(YELLOW)Copying SSH key to $(HOST)...$(RESET)"
@ip=$$(ansible-inventory -i $(INVENTORY) $(ANSIBLE_ARGS) --list 2>/dev/null | jq -r --arg h "$(HOST)" '._meta.hostvars[$$h].ansible_host // empty'); \
user=$$(ansible-inventory -i $(INVENTORY) $(ANSIBLE_ARGS) --list 2>/dev/null | jq -r --arg h "$(HOST)" '._meta.hostvars[$$h].ansible_user // empty'); \
if [ -z "$$ip" ] || [ "$$ip" = "null" ]; then \
ip=$$(awk -v h="$(HOST)" '$$1==h {print $$2}' $(INVENTORY_HOSTS) | sed 's/ansible_host=//'); \
fi; \
if [ -z "$$user" ] || [ "$$user" = "null" ]; then \
user=$$(awk -v h="$(HOST)" '$$1==h {for(i=2;i<=NF;i++) if($$i~/^ansible_user=/) {sub(/ansible_user=/,"",$$i); print $$i; exit}}' $(INVENTORY_HOSTS)); \
fi; \
if [ -n "$$ip" ] && [ -n "$$user" ]; then \
@ip=$$(ansible-inventory --list | jq -r "._meta.hostvars.$(HOST).ansible_host // empty" 2>/dev/null); \
user=$$(ansible-inventory --list | jq -r "._meta.hostvars.$(HOST).ansible_user // empty" 2>/dev/null); \
if [ -n "$$ip" ] && [ "$$ip" != "null" ] && [ -n "$$user" ] && [ "$$user" != "null" ]; then \
echo "Target: $$user@$$ip"; \
ssh-copy-id -i "$${SSH_PUBLIC_KEY:-$$HOME/.ssh/id_ed25519.pub}" "$$user@$$ip"; \
ssh-copy-id $$user@$$ip; \
else \
echo "$(RED)Could not determine IP or user for $(HOST)$(RESET)"; \
echo "Check your inventory and host_vars"; \
exit 1; \
fi
copy-ssh-keys: ## Copy SSH key to all inventory hosts (usage: make copy-ssh-keys [GROUP=services])
@echo "$(YELLOW)Copying SSH key to inventory hosts...$(RESET)"
@echo "Using key: $${SSH_PUBLIC_KEY:-$$HOME/.ssh/id_ed25519.pub}"
@echo "$(YELLOW)You will be prompted for each host's password (last time).$(RESET)"
@failed=0; ok=0; \
if [ -n "$(GROUP)" ]; then \
hosts=$$(ansible-inventory -i $(INVENTORY) $(ANSIBLE_ARGS) --list 2>/dev/null | jq -r ".\"$(GROUP)\".hosts[]? // empty"); \
else \
hosts=$$(ansible-inventory -i $(INVENTORY) $(ANSIBLE_ARGS) --list 2>/dev/null | jq -r '._meta.hostvars | keys[]' | grep -v '^localhost$$' | sort); \
fi; \
if [ -z "$$hosts" ]; then \
if [ -n "$(GROUP)" ]; then \
hosts=$$(awk -v g="$(GROUP)" 'BEGIN{ing=0} /^\[/ {ing=($$0=="["g"]"); next} ing && /^[a-zA-Z]/ {print $$1}' $(INVENTORY_HOSTS)); \
else \
hosts=$$(awk '/^\[/ {next} /^[a-zA-Z]/ && $$1!="localhost" {print $$1}' $(INVENTORY_HOSTS)); \
fi; \
fi; \
for host in $$hosts; do \
echo ""; echo "$(BLUE)==> $$host$(RESET)"; \
if $(MAKE) --no-print-directory copy-ssh-key HOST=$$host; then ok=$$((ok+1)); else failed=$$((failed+1)); fi; \
done; \
echo ""; \
echo "$(GREEN)Done: $$ok succeeded$(RESET), $(RED)$$failed failed$(RESET)"; \
[ $$failed -eq 0 ]
copy-ssh-keys-ansible: require-ansible ## Copy SSH key via Ansible (usage: make copy-ssh-keys-ansible [GROUP=services] [HOST=dev01])
@echo "$(YELLOW)Deploying SSH key with Ansible (may prompt for SSH password)...$(RESET)"
@limit="all:!local"; \
[ -n "$(GROUP)" ] && limit="$(GROUP)"; \
[ -n "$(HOST)" ] && limit="$(HOST)"; \
$(ANSIBLE_PLAYBOOK) playbooks/ssh-keys.yml $(ANSIBLE_ARGS) --limit "$$limit" --ask-pass
copy-ssh-key-mailcow: ## Copy SSH key to Mailcow VM (root@10.0.10.132 on pve201; prompts for root password once)
@$(MAKE) --no-print-directory copy-ssh-key HOST=mailcow
bootstrap-root-ssh-caddy: ## Bootstrap root on caddy via su + vault_lxc_root_password
@chmod +x scripts/bootstrap-root-ssh-su-password.sh scripts/load-vault-lxc-root-password.sh
@. scripts/load-vault-lxc-root-password.sh; ./scripts/bootstrap-root-ssh-su-password.sh caddy
bootstrap-root-ssh: ## SSH as ladmin, su to root, install root key (usage: make bootstrap-root-ssh HOST=listmonk)
ifndef HOST
@echo "$(RED)Error: HOST parameter required$(RESET)"
@echo "Usage: make bootstrap-root-ssh HOST=listmonk"
@exit 1
endif
@chmod +x scripts/bootstrap-root-ssh.sh
@BOOTSTRAP_USER="$(BOOTSTRAP_USER)" TARGET_USER="$(TARGET_USER)" \
scripts/bootstrap-root-ssh.sh "$(HOST)"
bootstrap-root-ssh-services: ## Bootstrap root SSH via ladmin (caddy, listmonk, vikunja)
@chmod +x scripts/bootstrap-root-ssh.sh
@failed=0; ok=0; \
for host in caddy listmonk vikunja; do \
echo ""; echo "$(BLUE)==> $$host$(RESET)"; \
if BOOTSTRAP_USER="$(BOOTSTRAP_USER)" scripts/bootstrap-root-ssh.sh "$$host"; then \
ok=$$((ok+1)); \
else \
failed=$$((failed+1)); \
fi; \
done; \
echo ""; echo "$(GREEN)Done: $$ok succeeded$(RESET), $(RED)$$failed failed$(RESET)"; \
[ $$failed -eq 0 ]
mailcow-mailbox: ## Create Mailcow mailbox (usage: make mailcow-mailbox MAILBOX=alerts)
ifndef MAILBOX
@echo "$(RED)Error: MAILBOX required$(RESET)"
@echo "Usage: make mailcow-mailbox MAILBOX=alerts"
@echo "Define mailboxes in inventories/production/group_vars/all/mailcow.yml"
@exit 1
endif
@chmod +x scripts/run-mailcow-mailbox.sh
@MAILBOX="$(MAILBOX)" ./scripts/run-mailcow-mailbox.sh
mailcow-create-alerts: ## Alias for make mailcow-mailbox MAILBOX=alerts
@$(MAKE) --no-print-directory mailcow-mailbox MAILBOX=alerts
vault-pull-infra-secrets: ## Pull Umami/Mattermost from hosts → .env → vault (not vault→.env)
@chmod +x scripts/vault-pull-infra-secrets.sh scripts/vault-import-env.sh
@./scripts/vault-pull-infra-secrets.sh
vault-export-env: ## Write vault secrets into .env (keeps existing non-empty keys)
@chmod +x scripts/vault-export-env.sh
@./scripts/vault-export-env.sh "$(or $(ENV_FILE),.env)"
kuma-add-monitors: ## Add default Uptime Kuma monitors (needs KUMA_PASSWORD in .env)
@chmod +x scripts/kuma-add-monitors.sh
@./scripts/kuma-add-monitors.sh
vault-import-env: ## Merge .env secrets into Ansible vault (usage: make vault-import-env [ENV_FILE=.env])
@chmod +x scripts/vault-import-env.sh
@ENV_FILE="$(or $(ENV_FILE),.env)" scripts/vault-import-env.sh "$(or $(ENV_FILE),.env)"
bootstrap-root-ssh-failed: ## Bootstrap root SSH on hosts that failed direct root copy-ssh-keys
@chmod +x scripts/bootstrap-root-ssh.sh
@failed=0; ok=0; \
for host in caddy listmonk vikunja n8n qBittorrent actual caseware auto mailcow; do \
echo ""; echo "$(BLUE)==> $$host$(RESET)"; \
if BOOTSTRAP_USER="$(BOOTSTRAP_USER)" scripts/bootstrap-root-ssh.sh "$$host"; then \
ok=$$((ok+1)); \
else \
failed=$$((failed+1)); \
fi; \
done; \
echo ""; echo "$(GREEN)Done: $$ok succeeded$(RESET), $(RED)$$failed failed$(RESET)"; \
[ $$failed -eq 0 ]
create-vault: require-ansible ## Create encrypted vault file for secrets (passwords, auth keys, etc.)
create-vault: ## Create encrypted vault file for secrets (passwords, auth keys, etc.)
@echo "$(YELLOW)Creating vault file for storing secrets...$(RESET)"
$(ANSIBLE_VAULT) create $(INVENTORY)/group_vars/all/vault.yml
ansible-vault create group_vars/all/vault.yml
@echo "$(GREEN)✓ Vault file created. Add your secrets here (e.g. vault_tailscale_auth_key)$(RESET)"
create-vm: ## Create Ansible controller VM on Proxmox

View File

@ -2,8 +2,7 @@
inventory = inventories/production
roles_path = roles
host_key_checking = False
stdout_callback = default
callback_result_format = yaml
stdout_callback = yaml
bin_ansible_callbacks = True
retry_files_enabled = False
gathering = smart

View File

@ -4,7 +4,6 @@
HOSTS_FILE="inventories/production/hosts"
TIMEOUT=3
CHANGED=false
UNAME_S="$(uname -s)"
# Colors
GREEN='\033[0;32m'
@ -19,12 +18,10 @@ echo "=================================================================="
# Function to test IP connectivity
test_ip() {
local ip="$1"
if [[ "$UNAME_S" == "Darwin" ]]; then
# macOS: -W is wait time in milliseconds
ping -c 1 -W $((TIMEOUT * 1000)) "$ip" >/dev/null 2>&1
if ping -c 1 -W "$TIMEOUT" "$ip" >/dev/null 2>&1; then
return 0
else
# Linux: -W is timeout in seconds
ping -c 1 -W "$TIMEOUT" "$ip" >/dev/null 2>&1
return 1
fi
}
@ -34,7 +31,7 @@ test_ssh() {
local ip="$2"
local user="$3"
if ssh -o ConnectTimeout=3 -o BatchMode=yes "$user@$ip" exit >/dev/null 2>&1; then
if timeout 5 ssh -o ConnectTimeout=3 -o BatchMode=yes "$user@$ip" exit >/dev/null 2>&1; then
return 0
else
return 1
@ -49,14 +46,11 @@ switch_to_fallback() {
echo -e " ${YELLOW}→ Switching $hostname to fallback IP: $fallback_ip${NC}"
# Use sed to replace the primary IP with fallback IP (BSD/GNU compatible)
if [[ "$UNAME_S" == "Darwin" ]]; then
sed -i '' "s/$hostname ansible_host=$primary_ip/$hostname ansible_host=$fallback_ip/" "$HOSTS_FILE"
sed -i '' "s/ ansible_host_fallback=$fallback_ip//" "$HOSTS_FILE"
else
sed -i "s/$hostname ansible_host=$primary_ip/$hostname ansible_host=$fallback_ip/" "$HOSTS_FILE"
sed -i "s/ ansible_host_fallback=$fallback_ip//" "$HOSTS_FILE"
fi
# Use sed to replace the primary IP with fallback IP
sed -i "s/$hostname ansible_host=$primary_ip/$hostname ansible_host=$fallback_ip/" "$HOSTS_FILE"
# Remove the fallback attribute since we're now using it as primary
sed -i "s/ ansible_host_fallback=$fallback_ip//" "$HOSTS_FILE"
CHANGED=true
}
@ -72,10 +66,9 @@ while IFS= read -r line; do
# Parse host entry
if [[ "$line" =~ ansible_host= ]]; then
hostname=$(echo "$line" | awk '{print $1}')
primary_ip=$(echo "$line" | sed -n 's/.*ansible_host=\([^[:space:]]*\).*/\1/p')
fallback_ip=$(echo "$line" | sed -n 's/.*ansible_host_fallback=\([^[:space:]]*\).*/\1/p')
user=$(echo "$line" | sed -n 's/.*ansible_user=\([^[:space:]]*\).*/\1/p')
[[ -z "$user" ]] && user="root"
primary_ip=$(echo "$line" | grep -oP 'ansible_host=\K[^\s]+')
fallback_ip=$(echo "$line" | grep -oP 'ansible_host_fallback=\K[^\s]+' || echo "")
user=$(echo "$line" | grep -oP 'ansible_user=\K[^\s]+' || echo "root")
echo -n "Testing $hostname ($primary_ip)... "

View File

@ -1,60 +0,0 @@
# Encrypted secrets in this project
Ansible Vault is the standard way to store and share secrets with this repo. Plain `.env` files are gitignored and meant only as a **temporary** import path on your machine.
## Recommended workflow
1. **Never commit** `.env`, API keys, or passwords.
2. Store secrets in `inventories/production/group_vars/all/vault.yml` (encrypted).
3. Edit with `make edit-group-vault` (uses `~/.ansible-vault-pass` on your workstation).
4. Teammates need the same vault password file out-of-band (password manager, not git).
## One-time import from `.env`
```bash
cp .env.example .env
# fill MAILCOW_API_KEY, ALERTS_PASSWORD, etc.
make vault-import-env
rm .env # optional after import
```
`make vault-import-env` merges supported keys into the vault and re-encrypts the file.
## Mailcow mailboxes (dynamic)
| File | Purpose |
|------|---------|
| `group_vars/all/mailcow.yml` | Mailbox names, local parts, quotas (no secrets) |
| `vault.yml` | `vault_mailcow_api_key`, `vault_mailcow_mailbox_passwords` |
```bash
make mailcow-mailbox MAILBOX=alerts
```
Add a new mailbox:
1. In `mailcow.yml` under `mailcow_mailboxes:` add e.g. `notify: { local_part: notify, name: Notify, quota: 512, vault_password_key: notify }`
2. In vault: `vault_mailcow_mailbox_passwords.notify: "..."` (via `make edit-group-vault`)
3. `make mailcow-mailbox MAILBOX=notify`
## Can `.env` itself be encrypted?
Yes, but Ansible projects usually skip that pattern:
| Approach | Use when |
|----------|----------|
| **Ansible Vault** (`vault.yml`) | Default for this repo — works with playbooks and `make` targets |
| **`ansible-vault encrypt .env`** | Produces `.env` vault blob; you must `ansible-vault view .env` or decrypt to a temp file before tools read it — awkward for shell scripts |
| **Password manager / 1Password CLI** | Personal machine only, not for CI/ansible runs |
| **SOPS / Mozilla SOPS** | Teams that want encrypted YAML/JSON in git with KMS/PGP — heavier setup |
**Sharing encrypted secrets with others:** share the **vault password** (or per-host vault pass) securely once; they clone the repo and use the same encrypted `vault.yml`. Do not email `.env` files.
## Encrypting a single value (without opening the whole file)
```bash
ansible-vault encrypt_string 'secret-value' --name 'vault_my_secret' \
--vault-password-file ~/.ansible-vault-pass
```
Paste the output into `vault.yml` inside the encrypted file, or into a vars file that is entirely vault-encrypted.

View File

@ -1,56 +0,0 @@
# Cal.com → Authentik OIDC
**Status: deferred** — Cal.com self-hosted SSO is a **commercial (enterprise) feature**. Without a valid `CALCOM_LICENSE_KEY`, the UI at `/settings/security/sso` stays locked (*Contact sales*).
See **[sso-selfhosted-matrix.md](sso-selfhosted-matrix.md)** for Phase 4 apps that do not need a Cal-style license.
## Current state (2026-05-23)
| Item | Status |
|------|--------|
| `calsaml` Postgres DB | ✅ Created |
| `SAML_DATABASE_URL`, `SAML_ADMINS` in `/opt/cal/.env` | ✅ Set |
| `docker-compose` passes license + SAML env | ✅ |
| Authentik app `cal-com` + provider `cal-com-oidc` | ✅ (ready when license exists) |
| `CALCOM_LICENSE_KEY` in `.env` | ❌ **Empty** — SSO UI blocked |
| Cal UI OIDC configuration | ⏳ **Blocked** until license |
## When you have a license
1. Add to `/opt/cal/.env`:
```bash
CALCOM_LICENSE_KEY=<key-from-cal.com>
NEXT_PUBLIC_LICENSE_CONSENT=agree
```
2. Restart: `ssh cal``cd /opt/cal && docker compose up -d`
3. Confirm in container: `docker exec calcom printenv CALCOM_LICENSE_KEY` (non-empty)
4. Log in as **`idobkin@gmail.com`** → **https://cal.levkin.ca/settings/security/sso**
5. Configure OIDC:
| Field | Value |
|-------|--------|
| Client ID | `cal-com` |
| Client Secret | from Authentik → Applications → Cal.com |
| Well Known URL | `https://auth.levkin.ca/application/o/cal-com/.well-known/openid-configuration` |
Test SSO; keep local Cal password as break-glass.
## Ansible (infra only)
```bash
make cal-oidc # SAML DB + Authentik provider (safe to re-run)
make cal-oidc-check
```
Vault (optional): `vault_cal_oidc_client_secret` — see `vault.example.yml`.
## Redirect URI (Authentik)
```text
https://cal.levkin.ca/api/auth/oidc
```
## Related
- [sso-selfhosted-matrix.md](sso-selfhosted-matrix.md)
- [levkin-selfhost-plan-2.md](levkin-selfhost-plan-2.md)

View File

@ -1,70 +0,0 @@
# Homelab status — 2026-05-23
Quick checklist. **Master plan:** [levkin-selfhost-plan-2.md](levkin-selfhost-plan-2.md) · **Cursor plan:** `~/.cursor/plans/levkin_selfhost_rollout_e75909ae.plan.md`
## Done (automation)
| Item | Notes |
|------|--------|
| Mailcow `alerts@levkine.ca` | Created via API |
| Kuma + Dockge + Umami | LXC 218 @ `10.0.10.22`; Dockge stack **monitoring** active |
| Old Kuma pve201 LXC 305 | Stopped, `onboot` off |
| `stats.levkin.ca` | Caddy → Umami `:3000` |
| Tracking scripts | levkin.ca + caseware + auto + portfolio (`iliadobkin.com`) |
| **levkin.ca** | LXC **220** @ `10.0.10.60`; Caddy → nginx; `/` = spec, `/folders/` = stack |
| Portfolio `iliadobkin.com` | Migrated pve201 LXC **306** → pve10 LXC **219** @ `10.0.10.106`; Caddy → nginx `:80` |
| Kuma SMTP | Working (user confirmed) |
| Git remote | `git@git.levkin.ca:ilia/...` (SSH → `10.0.10.169` via `~/.ssh/config` on site LXCs) |
| auto repo | Pushed/pulled on `git.levkin.ca` |
| caseware repo | Pushed to Gitea via bundle on server; LXCs pull via internal SSH |
| Vault | Mailcow, Umami, Mattermost in vault; `make vault-export-env``.env`; `make vault-pull-infra-secrets` = hosts → vault |
| Caddy root SSH | Works (`make bootstrap-root-ssh-caddy`) |
| Hermes Mattermost | `mattermost.env` on VM; Telegram optional/off |
## Your list — still to do
### You (UI / hardware / DNS)
- [x] **Kuma SMTP** — working
- [ ] **DNS `levkin.ca` + `www`** — A records → home IP (`142.180.237.136`); apex currently parked at AWS, not homelab
- [ ] **Gitea deploy key (levkin LXC 220)** — add `deploy-levkin-levkin.ca` pubkey in repo settings (SSH pull); HTTPS clone works meanwhile
- [ ] **UniFi DHCP reservations** — [unifi-static-dhcp.md](unifi-static-dhcp.md) @ https://192.168.2.1/
- [ ] **Cal.com → Authentik OIDC****deferred** (no license key) — [cal-authentik-oidc.md](cal-authentik-oidc.md); Phase 4 → Vikunja — [sso-selfhosted-matrix.md](sso-selfhosted-matrix.md)
- [x] **Portainer VM 109** — stopped and destroyed on pve10 (2026-05-23)
- [x] **Listmonk** — service was stopped; `listmonk.service` enabled on VM 113 (2026-05-23)
- [x] **Mailcow** — LAN TCP timeout fixed (netfilter `MAILCOW` drop rule) — [mailcow-lan-proxy-fix.md](mailcow-lan-proxy-fix.md)
- [ ] **DebianDesktop VM 100** — RAM lowered to 24 GB in Proxmox; **reboot guest** to apply balloon
- [ ] **Nextcloud VM 201 retire** — remove Kuma monitor, Caddy `nextcloud.levkin.ca`, stop VM
- [ ] **NAS.SP00 disk replace** — then start Jellyfin (VM 101)
- [x] **Gitea deploy key (portfolio)**`git pull` works on LXC 219; Gitea VM SSH fixed (`/home/git/.ssh/authorized_keys` + `sudo` to `gitea`)
- [ ] **`.env`** — optional mirror: `make vault-export-env` (vault already has secrets)
- [ ] **Rotate** any secrets pasted in chat (Hermes token, etc.)
### Later / defer
- [ ] Caddy → edge LXC `.20`
- [ ] Immich, Crater, Beszel
- [ ] Public SSH for `git.levkin.ca:22` (optional Caddy `layer4` or DNS split)
## Site LXCs (marketing)
| VMID | Name | IP | Git remote |
|------|------|-----|------------|
| 220 | levkin | 10.0.10.60 | `git@git.levkin.ca:ilia/levkin.ca.git` |
| 215 | caseware | 10.0.10.105 | `git@git.levkin.ca:ilia/caseware.git` |
| 216 | auto | 10.0.10.59 | `git@git.levkin.ca:ilia/auto.git` |
| 219 | portfolio | 10.0.10.106 | `git@git.levkin.ca:ilia/sdetProfile.git` |
**Git SSH note:** `git.levkin.ca` in the URL; traffic goes to **10.0.10.169:22** (not `10.0.30.169`, not public `:22`).
```ssh
# On each site LXC /root/.ssh/config
Host git.levkin.ca
HostName 10.0.10.169
User git
IdentityFile ~/.ssh/id_ed25519
```
## Dockge
Stack **monitoring** in UI = correct. Compose at `/opt/stacks/monitoring/compose.yaml`. Live stack also at `/opt/monitoring` (same containers). Use Dockge for edits/restarts; avoid starting a second copy.

View File

@ -1,142 +0,0 @@
# Host list — Proxmox guests (source of truth)
**Node:** PVENAS (`pve10` @ `10.0.10.10`)
**Audited:** 2026-05-22 (Phase 0 IP pass + monitoring LXC 218 provisioned)
**LAN:** `10.0.10.0/24`, gateway `10.0.10.1`
Update this file whenever a guest is created, migrated, or re-IPd. See [levkin-selfhost-plan-2.md](levkin-selfhost-plan-2.md) for IP range policy.
---
## IP range plan (10.0.10.0/24)
| Range | Reserved for |
|-------|----------------|
| `.1.9` | Network gear |
| `.10.19` | Proxmox host(s) + PBS |
| `.20.39` | Edge / identity / comms |
| `.40.79` | Application LXCs / VMs |
| `.80.99` | Media VMs |
| `.100.199` | DHCP pool (clients) |
| `.200.249` | Labs / heavy VMs |
| `.250.254` | Reserved |
**Rollout reservations (free):** `.20` edge LXC
---
## Proxmox host
| VMID | Name | Role | Current IP | Target static IP | DHCP/Static | Notes |
|------|------|------|------------|------------------|-------------|-------|
| — | **pve10** | Proxmox (PVENAS) | `10.0.10.10/24` | `.10` | Static | This node |
---
## LXCs (pve10)
| VMID | Name | Plan group | Current IP | Target static IP | DHCP/Static | MAC | Notes |
|------|------|------------|------------|------------------|-------------|-----|-------|
| 210 | cal | business | `10.0.10.228/24` | `10.0.10.228/24` | ✅ **Static** | `BC:24:11:DD:F8:7C` | Cal.com — `pct set` applied; in Ansible `hosts` |
| 215 | caseware | **marketing site** | `10.0.10.105/24` | `10.0.10.105/24` | ✅ **Static** | `BC:24:11:72:04:53` | Static HTML `/var/www/caseware``caseware.levkin.ca` |
| 216 | auto | **marketing site** | `10.0.10.59/24` | `10.0.10.59/24` | ✅ **Static** | `BC:24:11:43:F0:86` | Static HTML `/var/www/auto``auto.levkin.ca` |
| 219 | portfolio | **marketing site** | `10.0.10.106/24` | `10.0.10.106/24` | ✅ **Static** | `BC:24:11:DF:94:32` | Static HTML `/var/www/portfolio``iliadobkin.com` (migrated from pve201 LXC 306) |
| 220 | levkin | **marketing site** | `10.0.10.60/24` | `10.0.10.60/24` | ✅ **Static** | `BC:24:11:C6:B2:E4` | Vite `www/``levkin.ca` (spec), `levkin.ca/folders` (stack) — [site-lxc-git.md](site-lxc-git.md) |
| 217 | identity | identity | `10.0.10.21/24` | `10.0.10.21/24` | ✅ **Static** | `BC:24:11:3C:85:45` | Authentik + Postgres + Redis; `auth.levkin.ca` via Caddy |
| 218 | monitoring | monitoring | `10.0.10.22/24` | `10.0.10.22/24` | ✅ **Static** | `BC:24:11:54:43:13` | Uptime Kuma `:3001`, Dockge `:5001`, Umami `:3000` — see [monitoring-stack.md](monitoring-stack.md) |
**pve201 (not pve10):** LXC **305** `kuma-debian` @ `10.0.10.197`**stopped 2026-05-22** (replaced by monitoring LXC 218). `onboot` disabled. LXC **306** `portfolio`**destroyed/purged 2026-05-22** (now pve10 LXC **219** @ `10.0.10.106`).
---
## VMs (pve10)
| VMID | Name | Plan group | Current IP | Target static IP | DHCP/Static | MAC | Notes |
|------|------|------------|------------|------------------|-------------|-----|-------|
| 100 | homepage-debian | — | — | — | — | — | **Stopped** |
| 101 | Jellyfin | media | `10.0.10.232` | `10.0.10.232/24` | ⏳ DHCP? | `BC:24:11:29:B8:84` | **Stopped** (turned off 2026-05-22); inventory `jellyfin` |
| 102 | gitea-alpine | — | `10.0.10.169/24` | `10.0.10.169/24` | ⏳ stable DHCP | `BC:24:11:E9:BD:E5` | Pin in-guest or router reservation |
| 103 | WRA | — | `10.0.10.154/24` | `10.0.10.154/24` | ⏳ stable DHCP | `BC:24:11:61:DE:7A` | Inventory `n8n`; pin when automating |
| 104 | vaultwarden-debian | identity | `10.0.10.142/24` | `10.0.10.142/24` | ⏳ stable DHCP | `BC:24:11:58:DB:DC` | Inventory `vaultwardenVM` |
| 105 | TrueNAS | — | `10.0.10.107/24` | `10.0.10.107/24` | ⏳ stable DHCP | `BC:24:11:14:DE:B5` | NAS UI; pool `NAS.SP00` degraded |
| 106 | caddy-debian | **edge** | `10.0.10.50/24` | `10.0.10.50/24`**`.20`** (Phase 1.5) | ✅ **Static** (in-guest) | `BC:24:11:E0:49:B4` | `/etc/network/interfaces` static; Ansible `caddy` |
| 107 | mattermost-ubuntu | comms | `10.0.10.107`? | TBD | ⏳ | `BC:24:11:66:6E:01` | Ping `.107` up; confirm not TrueNAS conflict — verify in guest |
| 108 | actual-debian | business | `10.0.10.158/24` | `10.0.10.158/24` | ⏳ stable DHCP | `BC:24:11:10:7B:64` | Inventory `actual` |
| 109 | portainer-alpine | — | — | — | ✅ **Removed** | `BC:24:11:0F:40:4F` | Destroyed 2026-05-23; Dockge on monitoring LXC 218 |
| 150 | pihole00-debian | — | link-local* | TBD | ⏳ | `BC:24:11:86:76:97` | Running |
| 117 | hermes | services | `10.0.10.36/24` | `10.0.10.36/24` | ⏳ stable DHCP | `BC:24:11:51:1E:99` | On pve10; guest agent; inventory `hermes` |
| 200 | PVE.BU.SVR | labs | `10.0.10.200/24` | `10.0.10.200/24` | ⏳ stable DHCP | `BC:24:11:DA:95:3B` | Running |
| 201 | NextcloudAIO-debian | (decommission) | `10.0.10.24/24` | — | 🗑️ **Retiring** | `BC:24:11:14:D4:DE` | Export done; remove Caddy + Kuma monitor, then stop VM |
| 300 | pihole-debian | — | — | — | — | — | **Stopped** |
\* ARP showed IPv6 link-local only at audit time — confirm IPv4 inside guest or install QEMU guest agent.
---
## Inventory cross-reference (Ansible `hosts`)
| Inventory name | IP in hosts | pve10 guest | Match |
|----------------|-------------|-------------|-------|
| caddy | `10.0.10.50` | VM 106 | ✅ |
| cal | `10.0.10.228` | LXC 210 | ✅ |
| caseware | `10.0.10.105` | LXC 215 | ✅ |
| auto | `10.0.10.59` | LXC 216 | ✅ |
| portfolio | `10.0.10.106` | LXC 219 | ✅ |
| levkin | `10.0.10.60` | LXC 220 | ✅ |
| identity | `10.0.10.21` | LXC 217 | ✅ |
| monitoring | `10.0.10.22` | LXC 218 | ✅ |
| vaultwardenVM | `10.0.10.142` | VM 104 | ✅ |
| giteaVM | `10.0.10.169` | VM 102 | ✅ |
| n8n | `10.0.10.154` | VM 103? | ⚠️ verify (WRA vs n8n) |
| listmonk | `10.0.10.148` | — | On **pve201** (`[comms]`) |
| mailcow | `10.0.10.132` | pve201 VM 106 | ✅ `[comms]` |
| hermes | `10.0.10.36` | VM 117 | ✅ on pve10 |
| jellyfin | `10.0.10.232` | VM 101 | ✅ (stopped until NAS healthy) |
| nextcloud | `10.0.10.24` | VM 201 | commented out (retiring) |
| portainerVM | — | VM 109 | removed (Dockge on monitoring) |
---
## Static IP conversion queue (pve10)
Priority order (plan-2):
1. ✅ **LXC 210** — done (`10.0.10.228/24`)
2. ✅ **LXC 215, 216** — pinned (`.105`, `.59`)
3. ✅ **LXC 217** (identity) — `10.0.10.21/24`, Authentik deployed
4. ✅ **VM 106** (caddy) — static in-guest `.50`
5. ✅ **LXC 218** (monitoring) — `.22`, Kuma/Dockge/Umami
6. **VMs** — use [vm-static-ip-router-reservations.md](vm-static-ip-router-reservations.md) (router MAC reservations); skip **201** (Nextcloud retire)
7. **New:** edge LXC @ **`.20`** (Phase 1.5)
Example:
```bash
# On pve10 (PVENAS)
pct set 215 -net0 name=eth0,bridge=vmbr0,ip=10.0.10.105/24,gw=10.0.10.1
pct set 216 -net0 name=eth0,bridge=vmbr0,ip=10.0.10.59/24,gw=10.0.10.1
```
---
## NAS / storage note
- ZFS pool **`NAS.SP00`** on this node: **DEGRADED** (disk `W4J0L3PY` failed). See [nas-sp00-drive-failure-report.md](nas-sp00-drive-failure-report.md), [nas-sp00-smart-audit-2026-05-21.md](nas-sp00-smart-audit-2026-05-21.md).
- VM **201** root disk on NAS — avoid heavy I/O until pool is healthy.
---
## Audit checklist
- [x] `pct list` / `qm list` on pve10
- [x] ARP / ping for running guests
- [ ] `pct exec` / guest agent for VMs missing IPv4
- [x] Initial `host-list.md` created
- [x] Pin 215/216 static
- [x] Identity LXC 217 @ `.21` (Authentik Phase 1 infra)
- [x] Monitoring LXC 218 @ `.22`
- [x] Caddy VM 106 static `.50`
- [x] LXC backups `backup-20260522` on 217, 218
- [ ] Router DHCP reservations for VMs — [vm-static-ip-router-reservations.md](vm-static-ip-router-reservations.md) (manual in router UI; table ready)
- [ ] Retire VM 201 (Nextcloud)
- [ ] Re-run after NAS disk replace

View File

@ -1,425 +0,0 @@
# Levkin self-hosted stack — plan & decisions
Reference doc for the Proxmox homelab. Lives alongside the Cursor project that has the Proxmox info.
**Conventions:**
- All groups run inside an LXC unless marked **VM**.
- Inside each LXC: one `docker-compose.yml`, managed by **Dockge** where applicable.
- Caddy on the `edge` LXC is the only thing exposed to the internet.
- Authentik on the `identity` LXC is the source of truth for who you are.
- Vaultwarden stays standalone (it's the break-glass path if Authentik dies).
---
## Progress summary (updated 2026-05-23)
| Area | Status |
|------|--------|
| **Phase 0** Foundation | ✅ Mostly done — pve10 LXCs static; site LXCs 215/216/219/220 static; Caddy still on **VM 106** @ `.50` |
| **Phase 1** Identity (Authentik) | ✅ LXC **217** @ `10.0.10.21` — admin + TOTP |
| **Phase 2** Monitoring | ✅ LXC **218** @ `10.0.10.22` — Kuma, Dockge, Umami, Kuma SMTP |
| **Phase 3** Cal.com | ✅ LXC **210** — booking + auto consult button; **OIDC deferred** (no enterprise license) |
| **Phase 4** SSO | ⏳ **Next:** Vikunja → Authentik — [sso-selfhosted-matrix.md](sso-selfhosted-matrix.md) |
| **Phase 58** | ⏳ Immich, Crater, Outline, automation depth — after P0 backlog |
| **Comms health** | ✅ Mailcow + Listmonk restored 2026-05-23 — [mailcow-lan-proxy-fix.md](mailcow-lan-proxy-fix.md) |
| **Site consolidation** | ⏳ **Partial** — git LXCs + levkin.ca LXC 220; optional later: static on Caddy VM |
| **dev-apps** | ⏳ punimTag **9101** on pve201 until testing done |
| **Nextcloud retire** | ⏳ VM **201** still running — **#1 RAM win on pve10** (~8 GiB) |
| **Portainer retire** | ✅ VM **109** destroyed 2026-05-23 (~16 GiB on pve10) |
| **Security pass** | 🟡 Partial — SSH keys + apt + cron 2026-05-23 — [security-remediation-plan.md](security-remediation-plan.md) |
---
## Capacity headroom (live check 2026-05-23)
Use this before adding LXCs/VMs. Re-check with `pvesm status` and `free -h` on each node.
### pve10 (PVENAS) — **primary place for new homelab services**
| Resource | Total | Used | **Available** | Notes |
|----------|-------|------|---------------|--------|
| **local-lvm** (thin) | ~1.67 TiB | ~22% | **~1.30 TiB** | Plenty of disk for new LXCs |
| **RAM** (host) | 62 GiB | ~28 GiB | **~33 GiB** | Portainer **109** removed 2026-05-23 |
**Realistic new capacity on pve10 now:** ~**30+ GiB** headroom for Immich, Crater, Beszel, or **dev-apps** (68 GiB) after Nextcloud retires.
**Still available to free:**
| Stop / retire | Frees (maxmem) |
|---------------|----------------|
| ~~Portainer VM **109**~~ | ✅ **16 GiB** freed |
| Nextcloud VM **201** | **8 GiB** ← do next |
| Hermes VM **117** (if not needed) | **16 GiB** |
| Site LXCs 215/216 → Caddy static (optional) | **~1 GiB** |
### pve201 (pve) — **do not add new services**
| Resource | Total | Used | **Available** | Notes |
|----------|-------|------|---------------|--------|
| **local-lvm** | ~1.67 TiB | ~46% | **~922 GiB** | Disk OK |
| **RAM** | 125 GiB | ~114 GiB | **~10 GiB** | GPU VM **104** (64 GB), DebianDesktop **100** (24 GB set — **reboot guest**), punimTag **9101** (16 GB) |
**Verdict:** New stacks belong on **pve10**. pve201 only benefits from **stopping/migrating** guests (punim after testing, GPU resize, old Kuma already stopped).
---
## Current state (May 2026)
**Already running:**
- Caddy reverse proxy — currently on a **VM** (should migrate to LXC, see "Caddy migration" section)
- Mailcow — VM, mail domain is `levkine.ca` (with e)
- Vaultwarden, Vikunja, n8n, Listmonk, Mattermost, Nextcloud — across various LXCs
- **Cal.com** — LXC id `210`, `cal.levkin.ca`, Postgres included, admin user `ilia`, 15-min consult event live at `cal.levkin.ca/ilia/consult` with Jitsi link
- Caddy entries live for: `levkin.ca`, `caseware.levkin.ca`, `auto.levkin.ca`, `iliadobkin.com`, `cal.levkin.ca`, `listmonk.levkin.ca`, `pdf.levkin.ca`, `search.levkin.ca`, `auth.levkin.ca`, `stats.levkin.ca`
- **Authentik** — LXC **217** @ `10.0.10.21`, `https://auth.levkin.ca`, admin + TOTP enrolled
- **Monitoring** — LXC **218** @ `10.0.10.22`: Uptime Kuma `:3001`, Dockge `:5001`, Umami `:3000` (LAN-only) — [monitoring-stack.md](monitoring-stack.md)
- **Umami** + **Authentik** admin/TOTP/backup codes — done
- **Uptime Kuma** — monitors live; email alerts via Mailcow — see [monitoring-stack.md](monitoring-stack.md)
- **Dockge** on 218 — manages local `/opt/monitoring` stack
- **Snapshots** `backup-20260522` on LXCs **217**, **218**
- **Jellyfin** (VM 101) — stopped
- LXC **210, 215218, 219** — static via `pct set`; **Caddy VM 106** — static in-guest `.50`
- **Nextcloud VM 201** — export done; VM **still running** on pve10 — **retire next** (8 GB RAM reclaimed)
- ~~**Portainer VM 109**~~**removed** 2026-05-23 (~16 GiB RAM freed on pve10)
- **Marketing sites** — LXC **220** (`levkin.ca`), **215/216/219** (git deploy), not yet on Caddy VM static roots
- **punimTag dev** — pve201 LXC **9101** @ `10.0.10.121` (16 GB) — leave until testing done; then `dev-apps` on pve10
**Decisions locked in:**
- Container manager: **Dockge** (not Portainer, not Coolify/Dokploy/CapRover)
- Chat: **Mattermost only** — no Matrix/Synapse
- Knowledge tool: **Outline** for client-facing, **SiYuan** if/when PhD work picks up (don't run Affine + Trilium too)
- Bookmark manager: **Linkwarden** (full-page archive is the killer feature)
- Authentik is the SSO target; Vaultwarden stays standalone
---
## LXC / VM grouping table
| Group | What's inside | Why grouped | LXC or VM |
|---|---|---|---|
| **edge** | Caddy reverse proxy, Crowdsec/Fail2ban | The front door — small, stable, restart rarely | LXC, 1 vCPU, 1GB RAM |
| **identity** | Authentik (+ Postgres + Redis), Vaultwarden | Auth-critical — touch rarely, back up religiously | LXC, 2 vCPU, 2GB RAM |
| **comms** | Mailcow | Mailcow's compose is huge (15+ containers) and self-contained — wants its own host | **VM**, 4GB RAM |
| **automation** | n8n, Windmill (later), Huginn (later) | Active workloads, frequent updates, you'll touch these a lot | LXC, 24 vCPU, 4GB RAM |
| **productivity** | Vikunja, Listmonk, Outline, Mealie, Linkwarden | Personal/team productivity, low-resource | LXC, 2 vCPU, 4GB RAM |
| **media** | Immich, Nextcloud, Paperless-ngx | Large storage, GPU passthrough useful for Immich ML | **VM** if GPU passthrough, else LXC. Lots of disk. |
| **business** | Cal.com ✅, Crater | Client-facing, financial — back up often | LXC, 2 vCPU, 2GB RAM |
| **monitoring** | Uptime Kuma ✅, Dockge ✅, Umami ✅, Beszel (later) | Ops stack on LXC **218** | LXC, 2 vCPU, 2GB RAM |
| **labs** | Anything experimental — Flowise, Trigger.dev | Things you're trying out, can be wiped | LXC, scratch space |
### Why this grouping (cheat sheet)
- One service goes bad → only its group restarts.
- Need a kernel upgrade for one stack → snapshot the LXC, upgrade, roll back if broken.
- Mailcow's huge surface area is isolated in its own VM.
- Edge LXC is tiny and stable → perfect for the layer everything depends on.
- Backup cadence per group (see Backups section).
- Resource limits per LXC mean a runaway container can't eat n8n's RAM.
---
## Subdomains
Only expose what actually needs to be public. Internal services use Tailscale/Wireguard for remote access.
### Expose publicly
| Subdomain | Service | Group | Why public | Status |
|---|---|---|---|---|
| `levkin.ca` | Company site (spec + `/folders`) | edge | Main brand | ✅ LXC 220 — **DNS must point to home IP** (was parked elsewhere) |
| `caseware.levkin.ca` | Static site | edge | Marketing | ✅ live |
| `auto.levkin.ca` | Static site | edge | Marketing | ✅ live |
| `iliadobkin.com` | Portfolio (SDET) | edge | Personal site | ✅ live (pve10 LXC 219) |
| `cal.levkin.ca` | Cal.com | business | Clients book on it | ✅ live |
| `listmonk.levkin.ca` | Listmonk | productivity | Unsubscribe URLs must resolve | ✅ live |
| `mail.levkine.ca` | Mailcow | comms | Mail server | ✅ live |
| `auth.levkin.ca` | Authentik | identity | OIDC redirect URLs need external resolution | ✅ live |
| `bill.levkin.ca` | Crater | business | Clients view invoices | ⏳ Phase 6 |
| `cloud.levkin.ca` | Nextcloud | media | **Retiring** — decommission VM 201 after cutover | 🗑️ |
| `photos.levkin.ca` | Immich | media | Mobile apps need public hostname | ⏳ Phase 5 |
| `vault.levkin.ca` | Vaultwarden | identity | Mobile clients need public hostname | ⏳ |
| `notes.levkin.ca` | Outline | productivity | Sharing docs with clients | ⏳ |
| `chat.levkin.ca` | Mattermost | comms | Only if inviting outside users | ⏳ optional |
### Keep internal only (no public DNS, no Caddy block)
Reachable only via local network or Tailscale/Wireguard:
| Service | Reason |
|---|---|
| Umami admin UI | Only you need the dashboard. Tracking endpoint can be public, dashboard isn't. |
| Uptime Kuma | Status dashboard is for you. Don't advertise infrastructure. |
| Beszel | Metrics are admin-only. |
| Dockge | Admin UI — local only. |
| n8n editor | UI shouldn't be exposed. Webhooks go on `hooks.levkin.ca` if needed. |
| Huginn / Windmill / Flowise | Admin tools. |
| Vikunja | Personal task manager. |
| Mealie | Family recipes. |
| Trigger.dev | Internal automation. |
| Paperless-ngx | Personal documents. Never expose. |
| SiYuan | Personal knowledge. |
| Linkwarden | Personal bookmarks. |
### Borderline (decide per service)
| Subdomain | Service | Notes |
|---|---|---|
| `stats.levkin.ca` | Umami collector | Only the tracking script endpoint needs to be public; admin UI stays internal |
| `status.levkin.ca` | Uptime Kuma | Kuma supports a separate public status page URL — that one can be public |
---
## Phased rollout
### Phase 0 — Foundation
1. ✅ Caddy running (on VM — migrate to LXC in Phase 1.5)
2. ✅ **Static IP audit (partial)** — all LXCs on pve10 pinned; Caddy VM static `.50`; remaining VMs on stable DHCP — see [host-list.md](host-list.md)
3. ✅ DNS for `auth.levkin.ca` → home IP (verified 2026-05-22)
4. ✅ `identity` LXC **217** @ `10.0.10.21` (2 vCPU, 2GB RAM, 20GB `local-lvm`, Debian 12 + Docker Compose)
### Phase 1 — Identity ✅
1. ✅ Deploy Authentik in `identity` LXC (Authentik + Postgres + Redis, official compose at `/opt/authentik`)
2. ✅ Caddy: `auth.levkin.ca``10.0.10.21:9000` (simple passthrough, no forward-auth)
3. ✅ Admin user (`admin`), TOTP enrolled
4. ✅ `authentik Admins` group (skip custom `users` group until more accounts)
5. ✅ Static backup codes; **don't OIDC other apps until Cal.com test**
### Phase 1.5 — Caddy migration to LXC (~30 min)
Why now (after Phase 1, before bulk SSO work in Phase 4): Authentik is stable enough to absorb a small change, but you haven't yet built the dependency web of OIDC integrations that would make a Caddy reload risky.
Why Caddy belongs in an LXC, not a VM:
- ~50MB OS overhead vs ~512MB for a VM
- Boot/restart in 2-5s vs 20-40s (matters when reloading config)
- Snapshot/backup is faster
- Caddy is a Go binary doing reverse-proxy work — no need for kernel isolation
- Near-native network performance
Steps:
1. Create `edge` LXC: Debian 12, 1 vCPU, 512MB RAM, 8GB disk, **static IP from host list**
2. Install Caddy via official Debian repo:
```bash
apt install -y debian-keyring debian-archive-keyring apt-transport-https
curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/gpg.key' | gpg --dearmor -o /usr/share/keyrings/caddy-stable-archive-keyring.gpg
curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/debian.deb.txt' | tee /etc/apt/sources.list.d/caddy-stable.list
apt update && apt install caddy
```
3. Copy `Caddyfile` + custom snippets (`(security-headers)` etc.) from the VM
4. Add a **test subdomain** (e.g. `test.levkin.ca`) pointing at the new LXC — verify TLS issues and routing works
5. Cut over: update router port-forward (80/443) to the new LXC IP. DNS A records don't need to change if they point to your home IP.
6. Watch Mailcow, Cal.com, Listmonk, the marketing sites for ~24h
7. Keep the old VM snapshot for a week, then delete
### Phase 2 — Quick wins ✅
1. ✅ **Umami** — tracking on levkin.ca, caseware, auto, and iliadobkin.com (portfolio)
2. ✅ **Uptime Kuma** — monitors in UI
3. ✅ **Dockge** — logged in; register `/opt/monitoring` stack (see [monitoring-stack.md](monitoring-stack.md))
4. ✅ **Kuma email alerts** — SMTP via Mailcow (see [homelab-status-2026-05-22.md](homelab-status-2026-05-22.md))
### Phase 3 — Cal.com (mostly done) ✅
1. ✅ Cal.com deployed in `business` LXC (id 210, Postgres included)
2. ✅ `cal.levkin.ca` proxied via Caddy
3. ✅ Booking link live at `cal.levkin.ca/ilia/consult` with Jitsi location
4. ✅ Email working via `cal@levkine.ca` SMTP through Mailcow
5. ⏳ **Cal.com OIDC****deferred** ([cal-authentik-oidc.md](cal-authentik-oidc.md)) — needs enterprise `CALCOM_LICENSE_KEY`
6. ✅ `auto.levkin.ca` consult button → `cal.levkin.ca/ilia/consult`
### Phase 4 — SSO migration (~half a day, staged)
Wire each to Authentik, least-risky first:
1. **Vikunja** (OIDC native) — easy, single-user impact
2. ~~**Nextcloud**~~**skipped** (VM 201 retiring)
3. **Listmonk** (OIDC native, admin only) — easy
4. **Mattermost** (SAML or OIDC native) — moderate
5. **Mailcow** (OIDC) — last, because mail-critical
For each: keep a local admin password as a break-glass account.
### Phase 5 — Family / personal wins (~1 evening)
1. **Immich** in `media` VM — install mobile apps for you and family, enable auto-upload. Face recognition runs in background; "my kids 2024" works within a couple days.
2. Skip PhotoPrism — Immich covers it.
### Phase 6 — Business / consulting (~12 evenings)
1. **Crater** in `business` LXC — tax rates, company info, Stripe integration if you want online payment
2. **Beszel** hub in `monitoring` LXC + agents on each LXC — one dashboard for resource usage
### Phase 7 — Automation depth (ongoing)
Only when you have a real use case:
1. **Huginn** in `automation` — first agent: competitor pages, kosher product availability, grant deadlines
2. **Windmill** in `automation` — first script: rewrite an n8n flow with too many code nodes
3. **Flowise** in `labs` — first flow: chat-with-docs against your consulting notes
### Phase 8 — Knowledge / research
1. **Outline** in `productivity` LXC — client-facing wiki + your notes
2. **Linkwarden** in `productivity` LXC — bookmarks with full-page archive
3. **Paperless-ngx** in `media` — scan and OCR the paper that's accumulating
4. **SiYuan** — only if/when PhD or long-form research becomes relevant
---
## Static IP audit
**Maintain a `host-list.md` file** (in this Cursor project, alongside this plan) with every LXC/VM, its current IP, its target static IP, and DHCP/static status. Cursor will use this as the source of truth when scripting changes.
Suggested format:
| LXC/VM ID | Name | Role | Current IP | Target static IP | DHCP/Static | Notes |
|---|---|---|---|---|---|---|
| 210 | cal | Cal.com | 10.0.10.228/24 (DHCP) | 10.0.10.228/24 | ⏳ static | Convert ASAP |
| ... | ... | ... | ... | ... | ... | ... |
### Recommended IP plan
Use `/24` subnets within `10.0.10.0/24` (or whatever your LAN is) with role-based ranges so it's scannable:
| Range | Reserved for |
|---|---|
| `.1 - .9` | Network gear (router, switches, APs) |
| `.10 - .19` | Proxmox host(s) + PBS |
| `.20 - .39` | Edge / identity / comms (critical infra) |
| `.40 - .79` | Application LXCs (productivity, automation, business, monitoring) |
| `.80 - .99` | Media VM(s) |
| `.100 - .199` | DHCP pool (clients, phones, laptops) |
| `.200 - .249` | Labs / experimental |
| `.250 - .254` | Reserved |
### How to set static on a Proxmox LXC
Two methods — pick one and stick with it:
**Method A — Proxmox CLI (recommended, survives reboots cleanly):**
```bash
pct set <ID> -net0 name=eth0,bridge=vmbr0,ip=10.0.10.X/24,gw=10.0.10.1
pct reboot <ID>
```
**Method B — Router DHCP reservation:**
- Reserve the IP in your router's DHCP table by MAC address. LXC stays "DHCP" technically, but always gets the same IP.
- Easier if you have many hosts and one router.
- Risk: if the LXC's MAC changes (rebuild from snapshot to new ID), reservation breaks.
**Recommendation:** Method A (`pct set`) for everything critical (edge, identity, comms, business). Method B is fine for labs/experimental LXCs.
### Audit checklist
1. List every LXC: `pct list`
2. List every VM: `qm list`
3. For each, run `pct exec <ID> -- ip a` (or `qm guest exec <ID> -- ip a` for VMs) and check whether the IP came from DHCP
4. Fill in `host-list.md`
5. Pick target IPs from the range plan above
6. Convert one at a time, lowest-risk first (labs → productivity → business → comms → identity → edge)
7. **After each conversion**, verify the Caddy reverse-proxy entry still works (curl from outside)
8. Update `host-list.md` status column
### Hosts known to need conversion right now
- ~~**LXC 210 (cal)**~~ — static at `10.0.10.228`
- **Site LXCs 220, 215/216/219** — static; served via Caddy → nginx on each LXC (git deploy). Optional future: static files on Caddy VM only.
---
## Backlog (priority order)
### P0 — next (ordered)
1. ~~Umami / Kuma / Dockge~~
2. ~~Portainer VM 109~~ ✅ (2026-05-23)
3. **Retire Nextcloud VM 201** — ~8 GiB on pve10; remove Caddy + Kuma monitor
4. **Vikunja → Authentik OIDC** — first real SSO ([sso-selfhosted-matrix.md](sso-selfhosted-matrix.md))
5. **UniFi DHCP reservations** — [unifi-static-dhcp.md](unifi-static-dhcp.md)
6. **DNS `levkin.ca` apex** → home IP (still parked at AWS)
7. **Beszel** on monitoring LXC 218
8. ~~Cal.com OIDC~~ — deferred until `CALCOM_LICENSE_KEY`; Authentik app `cal-com` ready
9. **NAS.SP00** disk replace → Jellyfin VM 101
10. **DebianDesktop VM 100** — reboot for 24 GB limit on pve201
### P1 — when ready
- **Outline** — wiki for client docs
- **Linkwarden** — bookmarks with full-page archive
- **Plane** — Jira-lite project management (pair with Mattermost)
### P2 — when you have a real need
- **Crater** — invoicing (Phase 6)
- **Immich** — photos (Phase 5)
- **Paperless-ngx** — document scanning (Phase 8)
- **Huginn** — first when you have a monitoring use case
- **Windmill** — when n8n hits limits
- **Trigger.dev** — durable background jobs in code (better fit than Windmill for QA work)
- **PrivateBin** — encrypted paste for sharing secrets with contractors
- **Addy.io** — email aliases
- **SiYuan** — if PhD work picks up
- **Flowise** — labs only, when LLM workflow use case appears
### Skip / declined
- ~~PhotoPrism~~ — Immich covers it
- ~~Activepieces~~ — you already have n8n
- ~~Affine / Trilium~~ — picked Outline + SiYuan instead
- ~~Matrix/Synapse + Element~~ — staying on Mattermost
- ~~Coolify / Dokploy / CapRover~~ — Dockge is enough; revisit only if writing many custom apps
---
## Backup strategy
- **Proxmox Backup Server (PBS)** or `vzdump` to a NAS — snapshot each LXC/VM nightly
- **Critical groups** (`identity`, `comms`, `business`): 7 daily + 4 weekly + 12 monthly
- **Productivity/automation**: 7 daily + 4 weekly
- **Labs**: 3 daily, no long retention
- **Off-site copy** of `identity` and `business` LXCs — these contain auth and billing data. Encrypted copy to Wasabi or Backblaze B2.
The whole LXC gets snapshotted — much simpler than file-level container backup.
**Done on pve10 (2026-05-22):** `pct snapshot` **`backup-20260522`** on LXCs **217** (identity) and **218** (monitoring).
---
## Next steps (priority order)
See **[homelab-status-2026-05-22.md](homelab-status-2026-05-22.md)** for automation checklist.
| # | Task | Status | Effort | Frees / unlocks |
|---|------|--------|--------|-----------------|
| 1 | **Kuma SMTP** | ✅ done | — | — |
| 2 | **Cal.com → Authentik OIDC** | ⏸ **deferred** | — | Needs `CALCOM_LICENSE_KEY`; infra ready — [sso-selfhosted-matrix.md](sso-selfhosted-matrix.md) |
| 3 | **auto.levkin.ca** → Cal booking link | ✅ | — | Consult button live |
| 4 | **Stop Portainer VM 109** | ✅ | — | Removed 2026-05-23; **~16 GiB RAM** on pve10 |
| 5 | **Retire Nextcloud VM 201** | ⏳ **next** | 30 min | **~8 GiB RAM** on pve10 |
| 6 | **Vikunja → Authentik OIDC** | ⏳ | 12 h | Phase 4 kickoff |
| 7 | **UniFi DHCP reservations** | ⏳ | 20 min | [unifi-static-dhcp.md](unifi-static-dhcp.md) |
| 8 | **DNS levkin.ca apex** | ⏳ | 15 min | AWS parked → `142.180.237.136` |
| 9 | **Beszel** on 218 | ⏳ | 1 h | Capacity before Immich |
| 10 | **NAS.SP00** disk → Jellyfin | ⏳ hardware | — | VM 101 |
| 11 | **DebianDesktop reboot** | ⏳ | 5 min | Apply 24 GB on pve201 |
| 12 | **Caddy → edge LXC `.20`** | ⏳ defer | ~30 min | Phase 1.5 |
| 13 | **dev-apps LXC** | ⏳ defer | half day | After punim testing |
| 14 | **Static sites → Caddy VM** | ⏳ optional | 1 h | Defer |
**Defer:** Immich, Crater, Outline; Listmonk/Mattermost/Mailcow SSO after Vikunja; Cal OIDC until license.
### Adding a new service — quick rule
| Want to add… | Node | RAM budget | Prerequisite |
|--------------|------|------------|--------------|
| Small app (Mealie, Linkwarden) | pve10 | 2 GB LXC | Stop 109 and/or 201 first if host feels tight |
| Medium (Outline, Crater) | pve10 | 4 GB LXC | Free **~24 GiB** via Portainer + Nextcloud retire |
| Heavy (Immich + ML) | pve10 or pve201 GPU | 48 GB+ | NAS healthy; pve201 only after GPU/punim sized down |
| Dev sandbox | pve10 `dev-apps` | 68 GB | punim 9101 migration only after testing |
### Nextcloud decommission (VM 201)
1. Confirm export in `exports/nextcloud-2026-05-21/` is complete
2. Delete **Nextcloud** monitor in Kuma
3. Remove `nextcloud.levkin.ca` from Caddy VM
4. Stop VM 201; update [host-list.md](host-list.md)
5. After NAS healthy: optional `vzdump` archive then delete disk
---
## Important rules
1. **Never put Authentik behind itself.** `auth.levkin.ca` is a simple Caddy passthrough — no forward-auth, no fancy dependencies. If Authentik goes down, you'd lose access to Authentik.
2. **Vaultwarden stays standalone.** It's your break-glass path if Authentik dies. Don't OIDC it.
3. **Keep a local admin password on every SSO-wired app.** OIDC integrations break during upgrades — you need to log in to fix them.
4. **Local admin to Proxmox host.** Independent of Authentik and Vaultwarden. Written down somewhere physical.
5. **Don't expose admin UIs publicly.** Dockge, Beszel, Uptime Kuma admin, n8n editor — use Tailscale or Wireguard for remote access.
6. **Static IPs for every LXC.** DHCP will eventually move them and Caddy will break. Set via `pct set <id> -net0 ...ip=10.0.10.X/24,gw=...` or a router reservation.
7. **Cal.com LXC (210)** — static at `.228` ✅.
8. **Maintain `host-list.md`** as the single source of truth for IPs. Update it whenever a new LXC/VM is created or migrated.

View File

@ -1,42 +0,0 @@
# Mailcow unreachable from Caddy / LAN (TCP timeout)
## Symptom
- Mailcow containers healthy inside VM `10.0.10.132`
- `curl https://10.0.10.132/` works **on the VM**
- From Caddy (`10.0.10.50`) or other LAN hosts: TCP **443/80 timeout**
- `tcpdump` on Proxmox shows SYN from client, **no SYN-ACK**
## Cause (not RAM)
`mailcowdockerized-netfilter-mailcow` adds an nftables rule in chain `MAILCOW`:
```text
iifname != "br-mailcow" oifname "br-mailcow" tcp → DROP
```
That blocks forwarded HTTPS from the LAN to the nginx container, even when `DISABLE_NETFILTER_ISOLATION_RULE=y` is set (netfilter still recreates the drop on restart in some versions).
## Fix on the mailcow VM
```bash
nft flush chain ip filter MAILCOW
```
Persistent (installed 2026-05-23): systemd unit `mailcow-flush-isolation-drop.service` runs after Docker.
After netfilter container restart, verify:
```bash
nft list chain ip filter MAILCOW # should be empty
nc -zv 10.0.10.132 443 # from Caddy host
```
## Related settings in `/opt/mailcow-dockerized/mailcow.conf`
- `DISABLE_NETFILTER_ISOLATION_RULE=y`
- `SNAT_TO_SOURCE=10.0.10.132` (optional; helps some hairpin cases)
## Reverse proxy
Caddy on `10.0.10.50``https://10.0.10.132` with `Host: mail.levkine.ca` — see `playbooks/caddy-auth-authentik.yml` / Caddyfile on caddy VM.

View File

@ -1,232 +0,0 @@
# Monitoring stack (LXC 218)
**Host:** `monitoring` @ `10.0.10.22` (PVENAS pve10, VMID **218**)
**Compose:** `/opt/monitoring/compose.yml`
**Stacks dir (Dockge):** `/opt/stacks`
All admin UIs are **LAN-only** (no public Caddy blocks). Use Tailscale or local network.
| Service | URL | Port | Notes |
|---------|-----|------|-------|
| **Uptime Kuma** | http://10.0.10.22:3001 | 3001 | Admin + monitors configured ✅ (replaces pve201 LXC **305** @ `.197`, stopped) |
| **Dockge** | http://10.0.10.22:5001 | 5001 | Manage compose on **this LXC only** |
| **Umami** | http://10.0.10.22:3000 | 3000 | Password changed ✅; levkin.ca + caseware + auto + portfolio tracked |
Secrets: `/opt/monitoring/.env` on the LXC (mode 600). Not in git.
---
## Backups (pve10)
| Guest | VMID | Snapshot | Date |
|-------|------|----------|------|
| identity | 217 | `backup-20260522` | 2026-05-22 |
| monitoring | 218 | `backup-20260522` | 2026-05-22 |
On pve10:
```bash
pct listsnapshot 217
pct listsnapshot 218
# Rollback if needed:
# pct rollback 217 backup-20260522
```
Optional off-node copy (when NAS healthy): `vzdump 217 218 --storage local --mode snapshot --compress zstd`
---
## Uptime Kuma — monitors
Configured in UI (all green). **Remove** the Nextcloud monitor when VM 201 is retired.
| Name | URL |
|------|-----|
| Authentik | https://auth.levkin.ca |
| Cal.com | https://cal.levkin.ca |
| Caseware / Auto | marketing sites |
| Mailcow | https://mail.levkine.ca |
| Listmonk, Gitea, Vault, Todo, PVE nodes | per your dashboard |
---
## Uptime Kuma — email alerts (Mailcow)
Mail domain is **`levkine.ca`** (with **e**). Cal.com already sends via Mailcow as `cal@levkine.ca`.
### Which email to use
| Role | Address | Notes |
|------|---------|-------|
| **SMTP server** | `mail.levkine.ca` | Mailcow host |
| **SMTP port** | `587` | STARTTLS (not 465 unless you prefer SMTPS) |
| **From (sender)** | `alerts@levkine.ca` | Create mailbox in Mailcow if it does not exist |
| **To (you)** | `idobkin@gmail.com` or `ilia@levkine.ca` | Use whichever you read; Gmail is fine for alerts |
### 1. Create mailbox in Mailcow (if needed)
**Automated (needs Mailcow API key):**
```bash
# Define mailbox in group_vars/all/mailcow.yml, password in vault:
make mailcow-mailbox MAILBOX=alerts
# (alias: make mailcow-create-alerts)
# Import from .env into vault once, then delete .env:
cp .env.example .env # MAILCOW_API_KEY=... ALERTS_PASSWORD=...
make vault-import-env
make mailcow-mailbox MAILBOX=alerts
```
To add another mailbox tomorrow: edit `mailcow.yml` + `vault_mailcow_mailbox_passwords.<name>`, then `make mailcow-mailbox MAILBOX=<name>`.
**Manual UI:**
1. https://mail.levkine.ca → admin login
2. **Email → Mailboxes → Add**`alerts@levkine.ca` (strong password → store in Vaultwarden)
3. Optional: alias `monitoring@levkine.ca` → same inbox
### 2. Add notification in Kuma
**Automated (from your Mac, after mailbox exists):**
```bash
cd /path/to/ansible
pip install uptime-kuma-api # or: .venv/bin/pip install uptime-kuma-api
export KUMA_URL=http://10.0.10.22:3001 KUMA_USER=admin KUMA_PASSWORD='...'
export SMTP_USER=alerts@levkine.ca SMTP_PASS='...' SMTP_TO=idobkin@gmail.com
./scripts/kuma-setup-smtp.sh
```
**Manual UI:**
1. http://10.0.10.22:3001 → **Settings****Notifications****Setup Notification**
2. Type: **Email (SMTP)**
3. Fill in:
| Field | Value |
|-------|--------|
| SMTP Host | `mail.levkine.ca` |
| SMTP Port | `587` |
| Security | TLS / STARTTLS |
| Username | `alerts@levkine.ca` |
| Password | mailbox password |
| From Email | `alerts@levkine.ca` |
| To Email | `idobkin@gmail.com` (or your `@levkine.ca`) |
4. **Test** → save
5. Edit each monitor (or default) → **Notifications** → enable this channel
**Alternative:** Mattermost webhook (`slack.levkin.ca`) if you prefer chat over email.
---
## Dockge — what to do after login
**On server today:**
| Path | Contents |
|------|----------|
| `/opt/monitoring/compose.yml` | **Live** stack (Docker project `monitoring`, 4 containers running) |
| `/opt/stacks/monitoring/compose.yaml` | Copy for Dockge (same services) |
| `/opt/stacks/authentik-ref/`, `cal-ref/` | README only — **no** compose file (ignore) |
**Why “Scan Stacks Folder” looks empty**
- Scan only picks up folders under **`/opt/stacks`** that contain `compose.yaml` / `compose.yml`.
- Your containers were started from **`/opt/monitoring`**, so Docker does not automatically link them to `/opt/stacks/monitoring` until you register that folder in Dockge.
**Fix (pick one):**
### Dockge UI note (your version)
**Settings → General** only has hostname — there is **no “Stacks directory” field**. That path is fixed at deploy time:
`DOCKGE_STACKS_DIR=/opt/stacks` (already set in `/opt/monitoring/compose.yml`).
Stacks are managed from the **home / dashboard** page, not Settings.
### Option 1 — Add stack manually (recommended)
1. http://10.0.10.22:5001 → **home** (logo / dashboard, not Settings)
2. **+ Create Stack** (or **Compose** → new stack)
3. Name: `monitoring`
4. Path: `/opt/stacks/monitoring` (must contain `compose.yaml`)
5. Open stack → review compose → **do not Start** until old project is stopped (below)
### Option 2 — Scan from dashboard menu
1. Stay on **dashboard** (not Settings)
2. Top-right **⋮** → **Scan Stacks Folder**
3. Pick **`monitoring`** if it appears (`authentik-ref` / `cal-ref` have no compose — ignore)
**Avoid duplicate containers**
Before starting from Dockge:
```bash
ssh root@10.0.10.22
cd /opt/monitoring && docker compose down
# Then start from Dockge UI on stack monitoring, OR:
cd /opt/stacks/monitoring && docker compose --env-file .env up -d
```
Until you do that, Kuma/Dockge/Umami keep running from `/opt/monitoring`; Dockge is optional for edits until cutover.
### Optional reference stacks (read-only)
Create empty stacks under `/opt/stacks/` only if you want a UI placeholder:
```bash
ssh root@10.0.10.22
mkdir -p /opt/stacks/authentik /opt/stacks/cal
# Copy compose for reference (does NOT control remote host):
scp root@10.0.10.21:/opt/authentik/compose.yml /opt/stacks/authentik/
```
To **manage** Authentik or Cal from Dockge long term, either move compose to 218 (not recommended) or install Dockge on each LXC later.
### Step 3 — Retire Portainer
VM **109** (portainer) was removed from pve10 on 2026-05-23; use Dockge on 218 instead.
---
## Umami
- ✅ Running at http://10.0.10.22:3000 (LAN / Tailscale only)
- ✅ **Public tracking** via `https://stats.levkin.ca/script.js` on **levkin.ca** (LXC 220), caseware, auto, and **iliadobkin.com** (portfolio LXC 219)
**Three choices (pick one later; none block the sites):**
| Option | Effort | Notes |
|--------|--------|--------|
| **A — Skip public analytics** | 0 | Use Umami dashboard on `:3000` when you care; no DNS/Caddy |
| **B — One DNS + Caddy block** | ~10 min | A record → home IP + Caddy `reverse_proxy 10.0.10.22:3000` on caddy VM |
| **C — Re-add script tags** | 2 min | After B works, insert script before `</head>` on 215/216 |
**Suggested public hostname (instead of `analytics`):** `stats.levkin.ca` (short, clear). Alternatives: `umami.levkin.ca`, `metrics.levkin.ca`.
```caddy
stats.levkin.ca {
import security-headers
encode gzip
reverse_proxy 10.0.10.22:3000
}
```
Script tag then: `https://stats.levkin.ca/script.js`
We are **not stuck** — marketing sites do not need Umami to render. Option A is fine for now.
---
## Maintenance
```bash
ssh root@10.0.10.22
cd /opt/monitoring
docker compose --env-file .env pull
docker compose --env-file .env up -d
docker compose ps
```

View File

@ -1,203 +0,0 @@
# NAS.SP00 drive failure — IT report
**Date:** 2026-05-21
**Host:** PVENAS (Proxmox VE) — `10.0.10.10`
**Pool:** ZFS `NAS.SP00` (~9 TB, ~862 GB used)
**Prepared for:** IT / hardware replacement
**SMART audit:** [nas-sp00-smart-audit-2026-05-21.md](nas-sp00-smart-audit-2026-05-21.md)
---
## Executive summary
One disk in a four-drive ZFS mirror pair has **failed at the hardware level**. The pool is **DEGRADED** but **online** with **no known data errors** at this time. The failed drive must be **physically replaced** and the pool **resilvered**. Until then, **mirror-0 has no redundancy** — a second failure on the remaining disk in that mirror (`W4J0L0BA`) could cause data loss.
This issue also caused a **host-wide I/O wedge** (pool SUSPENDED → stuck `sync()`), which blocked LXC/VM operations unrelated to the pool (e.g. Cal.com on `local-lvm`). That was cleared by a forced node reboot; **replacing the drive remains required**.
---
## Pool layout
| Vdev | Role | Disk A | Disk B | Status |
|------|------|--------|--------|--------|
| mirror-0 | RAID1 pair | `W4J0L0BA` (sda, 5 TB) | `W4J0L3PY` (sdb) | **DEGRADED** — sdb UNAVAIL |
| mirror-1 | RAID1 pair | `W4J0LKCD` (sdd, 5 TB) | `W4J0K9V7` (sdc, 5 TB) | **ONLINE** |
Model family (healthy drives): Seagate **ST5000DM000-1FK178** (5 TB, 7200 RPM).
---
## Failed drive identification
| Field | Expected | Observed |
|-------|----------|----------|
| **Serial** | W4J0L3PY | W4J0L3PY |
| **Model** | ST5000DM000-1FK178 | ST5000DM000 (truncated reporting) |
| **WWN** | — | `5000c50082cc8bbb` |
| **Firmware** | — | CC48 |
| **Capacity** | ~5,000,981,078,016 bytes (**5.00 TB**) | **137,438,952,960 bytes (~137 GB)** |
| **Linux device** | `/dev/sdb` | `/dev/sdb` |
| **ZFS state** | ONLINE | **UNAVAIL** — label missing/invalid |
ZFS last known path:
`/dev/disk/by-id/ata-ST5000DM000-1FK178_W4J0L3PY-part1`
---
## Symptoms and evidence
### 1. Capacity collapse (primary indicator)
The drive is detected as **~137 GB** instead of **5 TB**. ZFS cannot use a partition label created for a 5 TB disk on a device that exposes only a tiny fraction of capacity. This pattern is typical of:
- **Failed HDD** (media/controller failure)
- **Bad SATA cable, backplane port, or HBA port**
- **USB/SATA bridge failure** (if applicable)
- **Severe firmware/HPA corruption** (less common)
### 2. SMART / SCSI errors
`smartctl` against `/dev/sdb`:
- **Read SMART Data failed:** scsi error aborted command
- **Overall health:** UNKNOWN (attributes unreadable)
- Multiple log read commands fail (Error Log, Self-test Log, GP Log, etc.)
Healthy sibling in same mirror (`/dev/sda`, W4J0L0BA): **SMART PASSED**, full 5 TB capacity.
### 3. Kernel log (`dmesg` at boot, 2026-05-21 ~21:27)
Repeated on **`sdb`**:
```
Buffer I/O error on dev sdb
Sense Key: Medium Error
Add. Sense: Unrecovered read error
critical medium error, dev sdb, sector N op 0x0:(READ)
```
Indicates the block device cannot reliably read media — **hardware or link layer**, not a ZFS configuration issue.
### 4. ZFS pool history
- Pool previously entered **SUSPENDED** state (I/O failures on faulted devices).
- After node reboot: pool **DEGRADED**, short **resilver** completed with **0 errors** (healing scan on remaining devices).
- Current: **No known data errors** in `zpool status`.
---
## Impact
### Storage / services on `NAS.SP00`
Proxmox guests with disks on this pool (non-exhaustive):
| VMID | Name | NAS-backed storage |
|------|------|-------------------|
| 101 | Jellyfin | 1 TB zvol |
| 105 | TrueNAS | 1 TB zvol |
| 108 | actual-debian | 10 GB |
| 200 | PVE.BU.SVR | 1 TB |
| 201 | NextcloudAIO-debian | 8 TB |
**Risk:** With mirror-0 degraded, blocks stored only on the surviving mirror-0 disk have **no redundancy** until the failed drive is replaced and resilver completes.
### Unrelated workloads
Guests on **`local-lvm`** (NVMe, e.g. Cal.com LXC 210, Caddy VM 106) are **not stored on NAS.SP00** but were affected when the pool suspended and blocked system-wide `sync()`.
### Backup target
Proxmox datastore **PVEBUVD00** (PBS @ `10.0.10.200:8007`) reports **unreachable** from this node — separate issue; verify PBS host/network.
---
## Diagnosis
| Question | Answer |
|----------|--------|
| Is this a ZFS misconfiguration? | **No** — config is consistent; three drives show correct 5 TB labels. |
| Is the pool lost? | **No** — degraded but importable; no known data errors currently. |
| Which disk to replace? | **Seagate W4J0L3PY** (`/dev/sdb`, mirror-0 failed leg). |
| Can we fix it in software? | **Unlikely** — capacity and SMART failures point to hardware. |
| Safe to reseat first? | **Optional trial** — power down or hot-swap per chassis policy; if capacity still reads ~137 GB, **replace disk**. |
---
## Recommended actions
### Immediate (IT / on-site)
1. **Identify physical slot** for serial **W4J0L3PY** (compare to inventory/asset tags).
2. **Reseat** SATA/SAS cable and backplane connection once (if hot-swap policy allows). Reboot or rescan SCSI bus.
3. If capacity is still wrong or SMART still fails → **replace with new 5 TB+ enterprise/NAS-class HDD** (match class of ST5000DM000 or better).
4. Do **not** remove the UNAVAIL device from the pool until replacement is in place.
### After new disk is installed
On **PVENAS** as root (adjust `/dev/disk/by-id/...` to the **new** drives partition 1):
```bash
# Verify new disk shows ~5 TB
lsblk /dev/sdX
smartctl -H /dev/sdX
# Replace failed vdev (use ID from: zpool status NAS.SP00)
zpool replace NAS.SP00 ata-ST5000DM000-1FK178_W4J0L3PY-part1 /dev/disk/by-id/ata-NEW_SERIAL-part1
# Monitor until resilver completes
zpool status -v NAS.SP00
```
### Post-resilver
- Run **`zpool scrub NAS.SP00`** during a maintenance window.
- Confirm **PVEBUVD00** / PBS connectivity if backups depend on it.
- Review whether **Nextcloud VM 201** (8 TB on degraded pool) should remain running until healthy.
### Not recommended
- Ignoring degraded state for extended periods.
- Running heavy I/O on large VMs (e.g. 8 TB Nextcloud) during extended degraded operation.
- `zpool clear` without addressing hardware — does not fix a dead disk.
---
## Reference — healthy disks (for spare matching)
| Serial | Device | Capacity | SMART |
|--------|--------|----------|-------|
| W4J0L0BA | sda | 5.00 TB | PASSED |
| W4J0K9V7 | sdc | 5.00 TB | PASSED |
| W4J0LKCD | sdd | 5.00 TB | PASSED |
---
## Timeline (brief)
| When | Event |
|------|--------|
| Prior to 2026-05-21 | `W4J0L3PY` accumulated read/write errors; pool faulted |
| 2026-05-21 | Pool **SUSPENDED**; host `sync()` wedged; Cal LXC start failed |
| 2026-05-21 ~21:28 | Forced node reboot; pool **DEGRADED**, resilver finished, 0 errors |
| 2026-05-21 | `sdb` still reports **~137 GB**, UNAVAIL — **replacement still required** |
---
## Contact / handoff notes
- **Node:** Proxmox VE 8.x on **PVENAS** (`10.0.10.10`)
- **Pool name in Proxmox:** `NAS.SP00` (zfspool, active, degraded)
- **Failed serial:** **W4J0L3PY**
- **Replacement type:** 5 TB+ HDD, same or better class as Seagate ST5000DM000-1FK178
For questions about homelab service impact (Cal, Caddy, Phase 0 rollout), see [`levkin-selfhost-plan-2.md`](levkin-selfhost-plan-2.md).
## TL;DR
- Pool `NAS.SP00` on `PVENAS` (10.0.10.10) had a disk failure (`W4J0L3PY`)
- Pool went **SUSPENDED**; required forced reboot and is now **DEGRADED**
- **Immediate action:** Replace the failed drive with a spare (same or larger size; see healthy serials in table below)
- Use `zpool replace` command with correct device paths (see main procedure)
- Monitor resilver to completion; run `zpool scrub` after
- Backup services and large VMs (e.g. Nextcloud 8TB) depend on pool health—keep degraded time short
- Reach out if unsure about pool status or downstream service risk

View File

@ -1,232 +0,0 @@
# NAS.SP00 SMART audit
**Date:** 2026-05-21
**Host:** PVENAS (Proxmox VE) — `10.0.10.10`
**Pool:** ZFS `NAS.SP00`
**Related:** [nas-sp00-drive-failure-report.md](nas-sp00-drive-failure-report.md)
---
## Executive summary
| Serial | Device | Capacity | ZFS (mirror) | SMART health |
|--------|--------|----------|--------------|--------------|
| W4J0L0BA | sda | 5.00 TB | mirror-0 ONLINE | **PASSED** |
| W4J0L3PY | sdb | **137 GB** | mirror-0 UNAVAIL | **UNKNOWN** (read fails) |
| W4J0K9V7 | sdc | 5.00 TB | mirror-1 ONLINE | **PASSED** |
| W4J0LKCD | sdd | 5.00 TB | mirror-1 ONLINE | **PASSED** |
Pool state at audit time: **DEGRADED** — failed leg `W4J0L3PY` (`/dev/sdb`). No known data errors. Three healthy drives show no reallocated, pending, or uncorrectable sectors.
---
## ZFS pool status
```
pool: NAS.SP00
state: DEGRADED
status: One or more devices could not be used because the label is missing or
invalid. Sufficient replicas exist for the pool to continue
functioning in a degraded state.
action: Replace the device using 'zpool replace'.
scan: resilvered 0B in 00:00:01 with 0 errors on Thu May 21 21:27:54 2026
NAME STATE READ WRITE CKSUM
NAS.SP00 DEGRADED 0 0 0
mirror-0 DEGRADED 0 0 0
ata-ST5000DM000-1FK178_W4J0L0BA ONLINE 0 0 0
11449632222283419591 UNAVAIL 0 0 0 was /dev/disk/by-id/ata-ST5000DM000-1FK178_W4J0L3PY-part1
mirror-1 ONLINE 0 0 0
ata-ST5000DM000-1FK178_W4J0LKCD ONLINE 0 0 0
ata-ST5000DM000-1FK178_W4J0K9V7 ONLINE 0 0 0
errors: No known data errors
```
---
## Block devices (`lsblk`)
| NAME | SIZE | MODEL | SERIAL | ROTA |
|------|------|-------|--------|------|
| sda | 4.5T | ST5000DM000-1FK178 | W4J0L0BA | 1 |
| sdb | 3.9G | ST5000DM000 | W4J0L3PY | 1 |
| sdc | 4.5T | ST5000DM000-1FK178 | W4J0K9V7 | 1 |
| sdd | 4.5T | ST5000DM000-1FK178 | W4J0LKCD | 1 |
---
## Healthy drives — key metrics
| Metric | sda (W4J0L0BA) | sdc (W4J0K9V7) | sdd (W4J0LKCD) |
|--------|----------------|----------------|----------------|
| Model | ST5000DM000-1FK178 | ST5000DM000-1FK178 | ST5000DM000-1FK178 |
| Firmware | CC48 | CC48 | CC48 |
| WWN | 5000c500082c02f61 | 5000c500082c7e2ce | 5000c500082d84c45 |
| Rotation | 5980 rpm | 5980 rpm | 5980 rpm |
| SATA | 3.1 @ 6.0 Gb/s | 3.1 @ 6.0 Gb/s | 3.1 @ 6.0 Gb/s |
| Power-on hours | 52,481 (~6.0 y) | 53,087 (~6.1 y) | 45,580 (~5.2 y) |
| Temperature | 27 °C | 30 °C | 30 °C |
| Reallocated sectors | 0 | 0 | 0 |
| Current pending sectors | 0 | 0 | 0 |
| Offline uncorrectable | 0 | 0 | 0 |
| UDMA CRC errors | 0 | 0 | 0 |
| Start/stop count | 350 | 367 | 310 |
| Load cycle count | 348,974 | 340,961 | 184,891 |
| Power cycle count | 345 | 363 | 309 |
High **Load_Cycle_Count** on Seagate Desktop HDD.15 is common (head parking); not alarming when reallocated/pending counts remain zero.
---
## Failed drive — `/dev/sdb` (W4J0L3PY)
### Identity
| Field | Value |
|-------|-------|
| Device Model | ST5000DM000 (truncated; not full -1FK178 suffix) |
| Serial | W4J0L3PY |
| WWN | 5000c500082cc8bbb |
| Firmware | CC48 |
| User capacity | 137,438,952,960 bytes [**137 GB**] |
| Expected capacity | 5,000,981,078,016 bytes [5.00 TB] |
| Rotation | 7200 rpm (reported) |
| SATA | 3.0, 6.0 Gb/s |
### SMART
```
Read SMART Data failed: scsi error aborted command
SMART Status command failed: scsi error aborted command
SMART overall-health self-assessment test result: UNKNOWN!
SMART Status, Attributes and Thresholds cannot be read.
```
**Action:** Replace drive; see [nas-sp00-drive-failure-report.md](nas-sp00-drive-failure-report.md).
---
## Full SMART attributes (healthy drives)
### `/dev/sda` — W4J0L0BA (mirror-0, ONLINE)
```
SMART overall-health self-assessment test result: PASSED
ID# ATTRIBUTE_NAME VALUE WORST THRESH TYPE RAW_VALUE
1 Raw_Read_Error_Rate 119 100 006 Pre-fail 211189952
3 Spin_Up_Time 092 091 000 Pre-fail 0
4 Start_Stop_Count 100 100 020 Old_age 350
5 Reallocated_Sector_Ct 100 100 010 Pre-fail 0
7 Seek_Error_Rate 080 060 030 Pre-fail 43979429424
9 Power_On_Hours 041 041 000 Old_age 52481
10 Spin_Retry_Count 100 100 097 Pre-fail 0
12 Power_Cycle_Count 100 100 020 Old_age 345
183 Runtime_Bad_Block 100 100 000 Old_age 0
184 End-to-End_Error 100 100 099 Old_age 0
187 Reported_Uncorrect 100 100 000 Old_age 0
188 Command_Timeout 100 099 000 Old_age 3 3 3
189 High_Fly_Writes 100 100 000 Old_age 0
190 Airflow_Temperature_Cel 073 058 045 Old_age 27 (Min/Max 27/28)
191 G-Sense_Error_Rate 100 100 000 Old_age 0
192 Power-Off_Retract_Count 100 100 000 Old_age 0
193 Load_Cycle_Count 001 001 000 Old_age 348974
194 Temperature_Celsius 027 042 000 Old_age 27
195 Hardware_ECC_Recovered 119 100 000 Old_age 211189952
197 Current_Pending_Sector 100 100 000 Old_age 0
198 Offline_Uncorrectable 100 100 000 Old_age 0
199 UDMA_CRC_Error_Count 200 200 000 Old_age 0
240 Head_Flying_Hours 100 253 000 Old_age 15140h+51m+12.276s
241 Total_LBAs_Written 100 253 000 Old_age 57665101118
242 Total_LBAs_Read 100 253 000 Old_age 160962549062
```
### `/dev/sdc` — W4J0K9V7 (mirror-1, ONLINE)
```
SMART overall-health self-assessment test result: PASSED
ID# ATTRIBUTE_NAME VALUE WORST THRESH TYPE RAW_VALUE
1 Raw_Read_Error_Rate 117 100 006 Pre-fail 136042192
3 Spin_Up_Time 092 091 000 Pre-fail 0
4 Start_Stop_Count 100 100 020 Old_age 367
5 Reallocated_Sector_Ct 100 100 010 Pre-fail 0
7 Seek_Error_Rate 083 060 030 Pre-fail 22512744055
9 Power_On_Hours 040 040 000 Old_age 53087
10 Spin_Retry_Count 100 100 097 Pre-fail 0
12 Power_Cycle_Count 100 100 020 Old_age 363
183 Runtime_Bad_Block 100 100 000 Old_age 0
184 End-to-End_Error 100 100 099 Old_age 0
187 Reported_Uncorrect 100 100 000 Old_age 0
188 Command_Timeout 100 099 000 Old_age 6 6 12
189 High_Fly_Writes 096 096 000 Old_age 4
190 Airflow_Temperature_Cel 070 060 045 Old_age 30 (Min/Max 28/30)
191 G-Sense_Error_Rate 100 100 000 Old_age 0
192 Power-Off_Retract_Count 100 100 000 Old_age 0
193 Load_Cycle_Count 001 001 000 Old_age 340961
194 Temperature_Celsius 030 040 000 Old_age 30
195 Hardware_ECC_Recovered 117 100 000 Old_age 136042192
197 Current_Pending_Sector 100 100 000 Old_age 0
198 Offline_Uncorrectable 100 100 000 Old_age 0
199 UDMA_CRC_Error_Count 200 200 000 Old_age 0
240 Head_Flying_Hours 100 253 000 Old_age 15859h+53m+20.869s
241 Total_LBAs_Written 100 253 000 Old_age 57609506493
242 Total_LBAs_Read 100 253 000 Old_age 152392393081
```
### `/dev/sdd` — W4J0LKCD (mirror-1, ONLINE)
```
SMART overall-health self-assessment test result: PASSED
ID# ATTRIBUTE_NAME VALUE WORST THRESH TYPE RAW_VALUE
1 Raw_Read_Error_Rate 116 090 006 Pre-fail 108217848
3 Spin_Up_Time 092 091 000 Pre-fail 0
4 Start_Stop_Count 100 100 020 Old_age 310
5 Reallocated_Sector_Ct 100 100 010 Pre-fail 0
7 Seek_Error_Rate 073 051 030 Pre-fail 185584998742
9 Power_On_Hours 048 048 000 Old_age 45580
10 Spin_Retry_Count 100 100 097 Pre-fail 0
12 Power_Cycle_Count 100 100 020 Old_age 309
183 Runtime_Bad_Block 100 100 000 Old_age 0
184 End-to-End_Error 100 100 099 Old_age 0
187 Reported_Uncorrect 100 100 000 Old_age 0
188 Command_Timeout 100 099 000 Old_age 8 8 14
189 High_Fly_Writes 098 098 000 Old_age 2
190 Airflow_Temperature_Cel 070 050 045 Old_age 30 (Min/Max 29/30)
191 G-Sense_Error_Rate 100 100 000 Old_age 0
192 Power-Off_Retract_Count 100 100 000 Old_age 0
193 Load_Cycle_Count 008 008 000 Old_age 184891
194 Temperature_Celsius 030 050 000 Old_age 30
195 Hardware_ECC_Recovered 116 100 000 Old_age 108217848
197 Current_Pending_Sector 100 091 000 Old_age 0
198 Offline_Uncorrectable 100 091 000 Old_age 0
199 UDMA_CRC_Error_Count 200 200 000 Old_age 0
240 Head_Flying_Hours 100 253 000 Old_age 11604h+15m+50.842s
241 Total_LBAs_Written 100 253 000 Old_age 72962800596
242 Total_LBAs_Read 100 253 000 Old_age 167268621195
```
---
## How this audit was collected
On PVENAS as root:
```bash
zpool status NAS.SP00
lsblk -d -o NAME,SIZE,MODEL,SERIAL,ROTA,STATE /dev/sd{a,b,c,d}
for d in sda sdb sdc sdd; do smartctl -i -H -A /dev/$d; done
```
Audit timestamp (host local): Thu May 21 22:13:58 2026 EDT.
---
## Next steps
1. Replace **W4J0L3PY** with a 5 TB+ NAS-class HDD (match ST5000DM000-1FK178 or better).
2. `zpool replace NAS.SP00` with the new disk by-id.
3. Monitor resilver; run `zpool scrub NAS.SP00` after pool is **ONLINE**.
4. Re-run SMART audit after replacement for a clean baseline.

View File

@ -1,441 +0,0 @@
# Security Audit Report
**Last audit:** 2026-05-23 (re-run after SSH keys + `make maintenance`)
**Previous audit:** 2026-05-20
**Auditor:** `scripts/security-audit-*.sh`, Ansible `maintenance` + `maintenance_cron` roles
**Repo baseline** (`roles/ssh/defaults/main.yml`): `PermitRootLogin prohibit-password`, `PasswordAuthentication no`, UFW enabled.
---
## 2026-05-23 — Actions completed
| Action | Status |
|--------|--------|
| SSH keys → caseware, auto, cal, vikunja, mailcow, listmonk | ✅ All six reachable as `root` |
| SSH keys → mailcow/listmonk VMs | ✅ Via brief VM shutdown + disk inject on pve201 (no guest agent) |
| Inventory rename `vikanjans``vikunja` | ✅ `hosts` + `proxmox_vmid=301` |
| `apt upgrade` fleet (skip reboot) | ✅ 14 hosts via Ansible; auto via `pct exec` on pve10 |
| Tier 1 cron (journal + apt) | ✅ `roles/maintenance_cron` on PVE, sites, comms, ansible, hermes, etc. |
| Tier 2 cron (docker prune) | ✅ identity, monitoring, vikunja; git-ci-01 keeps `docker-prune-ci` |
| VM 104 (GPU-Dev) RAM 72→64 GiB | ✅ pve201; host free RAM ~1.7→10 GiB |
| Fix broken `host_vars` (ansibleVM, listmonk) | ✅ Plain YAML; old blobs → `*.vault-bak` |
| Vault `vault_*_become_password` + maintenance vaultwardenVM | ✅ 2026-05-23 |
| caddy root SSH + maintenance | ✅ `bootstrap-root-ssh-caddy`; inventory `ansible_user=root` |
| ansibleVM maintenance | ✅ become password in vault |
### Post-maintenance SSH reachability
| Host | SSH | Notes |
|------|-----|-------|
| caseware | ✅ | |
| auto | ✅ | Was slow from laptop earlier; OK after upgrade |
| cal | ✅ | |
| vikunja | ✅ | LXC 301 @ 10.0.10.159 |
| mailcow | ✅ | ~1 min downtime for key inject |
| listmonk | ✅ | ~1 min downtime for key inject |
### Maintenance playbook recap (`skip_reboot=true`)
| Host | Result |
|------|--------|
| pve201, pve10, caseware, cal, vikunja, mailcow, listmonk, identity, monitoring, hermes, levkin, portfolio, git-ci-01, sonarqube-01 | ✅ upgraded |
| caddy | ✅ (as `root`; no `sudo` package on host) |
| ansibleVM | ✅ (`vault_ansiblevm_become_password`) |
| vaultwardenVM | ✅ (`vault_vaultwarden_become_password`) |
### Open security gaps (unchanged until `make security`)
| Control | Fleet status | Risk if fixed wrong |
|---------|--------------|---------------------|
| `PasswordAuthentication yes` | Most LXCs + both PVE | **Low break risk** if SSH keys tested first in a second session |
| `PermitRootLogin yes` | pve201, pve10, sonarqube-01 | Same — use `prohibit-password`, not `no`, if you need root+key |
| fail2ban | Off everywhere | Enabling is safe; may lock you out only if you brute-force yourself |
| UFW | Off (except one dev LXC) | **Medium risk** — wrong rules drop SSH/80/443; apply via Ansible `roles/ssh` after allowlist |
| unattended-upgrades | hermes, ansibleVM only | Safe; schedule reboots separately |
| Proxmox :8006 | Open on LAN | Restrict in PVE firewall — **won't break VMs** |
| Docker on `0.0.0.0` | identity, monitoring, vaultwarden, qBit | Bind to `127.0.0.1`**can break access** if Caddy route missing; test URL after |
| Tailscale | **Deferred** | Off by choice; remote access via **UniFi VPN** to LAN |
See [Risk explanations (2026-05-23)](#risk-explanations-2026-05-23) and [fail2ban vs password SSH](#fail2ban-vs-password-ssh) below.
---
## GPU-Dev (pve201 VM 104) — Ollama / LLMs
| Resource | Current |
|----------|---------|
| Host | pve201, VMID **104**, `GPU-Dev-Debian` |
| LAN IP | **10.0.10.122** (inventory `devGPU` @ 10.0.30.63 is a different network — use `.122` from LAN) |
| RAM | **64 GiB** guest (~60 GiB available when idle) |
| GPU | **RTX 4080 16 GiB** (PCI passthrough `hostpci0`) |
| Workload | **Ollama** already running (~3.6 GiB VRAM in sample) |
### Getting the most from RAM + GPU
1. **Right-size models to VRAM** — On a 16 GiB 4080, prefer quantised models that fit entirely in VRAM (e.g. 7B14B Q4/Q5, or 32B Q2/Q3 if you accept quality trade-offs). If a model spills to CPU RAM, throughput drops sharply.
2. **One heavy model at a time** — Ollama loads models on demand; set `OLLAMA_MAX_LOADED_MODELS=1` (or keep only one client) so you do not fragment 64 GiB RAM + 16 GiB VRAM across several large weights.
3. **Parallel requests**`OLLAMA_NUM_PARALLEL` defaults are conservative; raise only if VRAM headroom exists (watch `nvidia-smi` while under load).
4. **Keep guest RAM for KV cache** — With 64 GiB you can run larger context windows; set `OLLAMA_CONTEXT_LENGTH` / model `num_ctx` to what you need, not maximum “just because”.
5. **CPU offload only when needed**`num_gpu` layers = all layers for speed; partial offload is for models that do not fit in VRAM, not for tuning.
6. **Disk** — Store models on fast local disk (not NFS); `ollama pull` once, prune old tags periodically (`ollama list` / remove unused).
7. **Proxmox** — Do not balloon GPU VM RAM; GPU passthrough already reserves most of the 64 GiB. Freeing pve201 meant lowering this VM from 72→64 GiB, not overcommitting other guests on 201.
8. **Optional** — [Open WebUI](https://github.com/open-webui/open-webui) on localhost + Caddy TLS; bind Ollama to `127.0.0.1:11434` only (LAN via VPN).
**Not in Ansible yet:** add `devGPU` / `10.0.10.122` to inventory when you want playbooks (cron, hardening) on this box.
---
## fail2ban vs password SSH
**What fail2ban does:** After too many failed SSH logins from an IP, it adds a **temporary firewall ban** for that IP (typically 1060 minutes). It does **not** disable password authentication globally.
**Can passwords stay on if fail2ban is on?** Technically yes — fail2ban only rate-limits brute force; passwords are still weaker than keys. Best practice on servers: **keys + `PasswordAuthentication no` + fail2ban** (defence in depth).
**Your Proxmox console fallback:** If you lock yourself out of SSH on a guest, you can still use **Proxmox → VM → Console** or `pct enter` / `qm guest exec` from pve201/pve10. That is a good break-glass path, but it is **not** a substitute for keys on hosts you manage daily — console is slow and easy to misconfigure under pressure.
**Recommendation:** Enable fail2ban via `make security` with `ignoreip` including `10.0.10.0/24` and your UniFi VPN client subnet. Then disable password SSH once keys work everywhere you care about.
---
## Risk explanations (2026-05-23)
### Password SSH (`PasswordAuthentication yes`)
**How bad:** High on internet-facing IPs; medium on `10.0.10.0/24` only. Anyone who can reach :22 can try passwords indefinitely (no fail2ban).
**Will fixing break things?** No, if you (1) confirm key login works, (2) set `PasswordAuthentication no`, (3) keep a second SSH session open, (4) reload sshd. Breakage happens only if keys are missing/wrong.
### Root login (`PermitRootLogin yes` on hypervisors)
**How bad:** High — root + password on PVE is full cluster compromise.
**Will fixing break things?** Use `prohibit-password` (keys only), not `no`, unless you have another admin user with sudo. Ansible playbooks expect root on PVE today.
### fail2ban off
**How bad:** Medium — relies on LAN trust; SSH noise from scanners still fills logs.
**Will fixing break things?** Rarely. Tune `ignoreip` to your admin IP/subnet so your own typos don't ban you.
### UFW off
**How bad:** Medium on segmented LAN; high if any host has a public IP.
**Will fixing break things?** **Yes, if misconfigured** — default deny without allowing 22 from admin IP, 80/443 from Caddy, or Docker-published ports you still need. Use Ansible `roles/ssh` (UFW after SSH rules) and test.
### unattended-upgrades off
**How bad:** Medium — security patches lag until manual maintenance.
**Will fixing break things?** Usually no. Kernel updates may require reboot; use `Unattended-Upgrade::Automatic-Reboot "false"` until you want reboot windows.
### Proxmox UI :8006 exposed
**How bad:** **Critical** on untrusted networks — API gives VM/storage control.
**Will fixing break things?** Restricting to `10.0.10.0/24` does not break normal LAN admin access.
### HTTP services on all interfaces (8080, 3000, …)
**How bad:** High without TLS/auth at the edge; medium behind Caddy + LAN only.
**Will fixing break things?** **Yes** if you bind to `127.0.0.1` before Caddy `reverse_proxy` is updated. Order: Caddy route → test → then bind Docker to localhost.
### Remote access (Tailscale deferred)
**Decision:** Tailscale off; use **UniFi site-to-site / VPN** into `10.0.10.0/24` for admin and Ollama/GPU access.
**Security:** Ensure VPN is required for SSH and Proxmox :8006 from outside; do not port-forward :22/:8006 on the router without IP allowlists.
### pve201 RAM (was 97% used)
**How bad:** **Critical** — OOM kills guests, swap thrashing.
**Mitigation done:** VM 104 reduced 73728→65536 MiB (~8 GiB freed on hypervisor). Still tight; consider moving git-ci-01 or other workloads to pve10.
---
## 2026-05-20 — Original audit
**Scope:** Proxmox nodes `pve201` (10.0.10.201) and `pve10` (10.0.10.10), all LXCs via `pct exec`, SSH deep-dive on hypervisors.
---
## Executive summary
| Area | Critical | High | Medium |
|------|----------|------|--------|
| Hypervisors (201, 10) | 2 | 4 | 2 |
| LXCs on 201 (10 running) | 0 | 10 | 8 |
| LXCs on 10 (3 running) | 0 | 3 | 3 |
**Top priorities**
1. Harden **SSH on both Proxmox hosts** (root + passwords currently allowed).
2. Restrict **Proxmox API/UI port 8006** to admin IPs.
3. Disable **password SSH on all LXCs**; deploy keys + `make copy-ssh-keys` for inventory IPs.
4. Patch hosts with **40105** pending apt upgrades (hypervisors worst).
5. Put **HTTP services** (8080, 8000, qBit, etc.) behind reverse proxy + TLS or bind to internal IPs.
---
## Proxmox hypervisors
### pve201 — 10.0.10.201 (`pve`)
| Resource | Status |
|----------|--------|
| OS | Debian 12, PVE 8.4.16, kernel 6.8.12-18-pve |
| RAM free | ~2.5 GB / 126 GB (**critical**) |
| Pending apt | **105** |
| UFW / fail2ban / unattended-upgrades | **None** |
#### SSH audit (dedicated)
| Setting | Current | Target |
|---------|---------|--------|
| `permitrootlogin` | **yes** | `prohibit-password` |
| `passwordauthentication` | **yes** | `no` |
| `pubkeyauthentication` | yes | yes |
| `maxauthtries` | 6 | 34 |
| `x11forwarding` | yes | no (on servers) |
| Root keys | 3 keys in `authorized_keys` | audit/remove unused |
#### Exposed services
| Port | Service | Risk |
|------|---------|------|
| 22 | SSH | Brute-force (no fail2ban) |
| 8006 | Proxmox API/UI | **Critical** — full cluster control |
| 3128 | spiceproxy | Medium |
| 111 | rpcbind | Low — reduce exposure |
#### Fixes (pve201)
```bash
# 1) SSH — prefer Ansible after limiting to your IP
make copy-ssh-key HOST=pve201 # if needed
# Manual quick fix on host:
sed -i 's/^#*PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
sshd -t && systemctl reload sshd
# 2) Proxmox firewall — Datacenter → Firewall → restrict 8006 to 10.0.10.0/24 or admin IP
# Or iptables on host for port 8006
# 3) fail2ban
apt install fail2ban -y
systemctl enable --now fail2ban
# 4) Auto security updates
apt install unattended-upgrades apt-listchanges -y
dpkg-reconfigure -plow unattended-upgrades
# 5) Patch
apt update && apt upgrade -y
```
**Ansible (when ready):** add `pve201` / `pve10` to a `proxmox` group play with `roles/ssh` + `roles/monitoring_server` (fail2ban).
Do **not** lock yourself out — test with second session first.
---
### pve10 — 10.0.10.10 (`PVENAS`)
| Resource | Status |
|----------|--------|
| OS | Debian 13 (trixie), PVE, kernel 6.17.13-3-pve |
| Load | **~30** on 24 CPUs (overloaded) |
| Pending apt | **92** |
| UFW / fail2ban / unattended-upgrades | **None** |
| ZFS `NAS.SP00` | **inactive** (I/O suspended) |
| PBS `PVEBUVD00` → 10.0.10.200:8007 | **unreachable** |
#### SSH audit (dedicated)
Same as pve201: `permitrootlogin yes`, `passwordauthentication yes`, 3 root authorized_keys.
#### Exposed services
| Port | Service | Risk |
|------|---------|------|
| 22 | SSH | High |
| 8006 | Proxmox API/UI | **Critical** |
| 2049, mountd, statd | NFS/RPC | High on LAN |
| 3128 | spiceproxy | Medium |
#### Fixes (pve10)
Same SSH / fail2ban / unattended-upgrades / patch steps as pve201.
Additional:
```bash
# Investigate ZFS pool
zpool status NAS.SP00
# Fix PBS connectivity or remove stale datastore from Proxmox UI
```
---
## LXCs on pve201 (via `pct exec`)
| VMID | Name | IP | Status | SSH root | Password auth | UFW | fail2ban | Upgrades | Public services |
|------|------|-----|--------|----------|---------------|-----|----------|----------|-----------------|
| 301 | vikunja-debian | 10.0.10.159 | running | without-password | **yes** | no | no | 0 | **3456**, 22 |
| 302 | qbit-debian | 10.0.10.91 | running | without-password | **yes** | no | no | 0 | **8080** (qBit), 22 |
| 303 | searchXNG-debian | 10.0.10.70 | running | without-password | **yes** | no | no | **83** | **8080**, 22 |
| 304 | wireguard-debian | 10.0.10.192 | running | without-password | **yes** | no | no | 0 | 22 |
| 305 | kuma-debian | 10.0.10.197 | **stopped** | — | — | — | — | — | replaced by LXC 218 |
| 306 | portfolio | — | **destroyed** | — | — | — | — | — | migrated → pve10 LXC **219** @ `10.0.10.106` (purged 2026-05-22) |
| 307 | jobber-delian | 10.0.10.178 | running | without-password | **yes** | no | no | **83** | **3005**, 22 |
| 308 | stirling-pdf | 10.0.10.43 | running | without-password | **yes** | no | no | 0 | **8080**, 22 |
| 9001 | pote-dev | 10.0.10.114 | **stopped** | — | — | — | — | — | — |
| 9101 | punimTagFE-dev | 10.0.10.121 | running | without-password | **yes** | **active** | no | **89** | **8000**, 111, 22 |
| 9401 | mirrormatch-dev | 10.0.10.141 | **stopped** | — | — | — | — | — | — |
**Inventory mapping:** `vikunja` → 159 (LXC 301), `qBittorrent` → 91, `punimTag` app → 121.
### Common LXC issues (pve201)
| Issue | Severity | Fix |
|-------|----------|-----|
| `passwordauthentication yes` on all LXCs | High | Set `PasswordAuthentication no` in `/etc/ssh/sshd_config`, reload sshd |
| No fail2ban | High | Install fail2ban or rely on Proxmox FW + LAN segmentation |
| Apps on `0.0.0.0:8080` / 8000 / 3456 | High | Bind to localhost + Caddy, or restrict via Proxmox guest firewall (`firewall=1` on net0 — enable rules) |
| 7989 pending upgrades on several CTs | Medium | `pct exec <id> -- apt update && apt upgrade -y` |
| Stopped dev CTs (9001, 9401) | Low | Start when needed or keep stopped to reduce attack surface |
### Per-LXC fixes (pve201)
```bash
# Example: harden + patch vikunja (301) from Proxmox host
pct exec 301 -- sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
pct exec 301 -- systemctl reload ssh
# Patch container
pct exec 303 -- bash -c 'apt update && apt upgrade -y'
# Copy your SSH key (from Mac, once password/key works)
make copy-ssh-key HOST=vikunja # 10.0.10.159
make copy-ssh-key HOST=qBittorrent # 10.0.10.91
```
**punimTagFE-dev (9101):** Only LXC with **UFW active** — extend rules to deny inbound except 22 from admin subnet; still disable password auth.
---
## LXCs on pve10 (via `pct exec`)
| VMID | Name | IP | Status | SSH root | Password auth | UFW | fail2ban | Upgrades | Public services |
|------|------|-----|--------|----------|---------------|-----|----------|----------|-----------------|
| 210 | cal | 10.0.10.228 | running | without-password | **yes** | no | no | 0 | **3000**, 22 |
| 215 | caseware | 10.0.10.105 | running | without-password | **yes** | no | no | **40** | **80** (nginx), 22 |
| 216 | auto | 10.0.10.59 | running | without-password | **yes** | no | no | **40** | **80** (nginx), 22 |
**Inventory mapping:** `caseware` → 105, `auto` → 59.
### Fixes (pve10 LXCs)
```bash
# SSH harden caseware (215)
pct exec 215 -- sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
pct exec 215 -- systemctl reload sshd
# Patch
pct exec 215 -- apt update && apt upgrade -y
pct exec 216 -- apt update && apt upgrade -y
# Deploy keys from Mac
make copy-ssh-key HOST=caseware
make copy-ssh-key HOST=auto
```
**HTTP port 80 on caseware/auto:** Ensure TLS termination on Caddy (inventory host `caddy` 10.0.10.50) and no plain HTTP from WAN if exposed.
---
## SSH hardening checklist (all Linux targets)
Use this order to avoid lockout:
1. Confirm your key works: `ssh -o BatchMode=yes root@<ip> true`
2. Set `PasswordAuthentication no`
3. Set `PermitRootLogin prohibit-password` (LXCs already `without-password` — equivalent for keys-only)
4. `sshd -t && systemctl reload sshd`
5. Open **second terminal** and test before closing first
6. Optional: change SSH port, `MaxAuthTries 4`, disable `X11Forwarding`
**Ansible alignment:**
```bash
# After keys on host
make dev HOST=<hostname> --tags security
# or role ssh via playbooks that include roles/ssh
```
---
## Re-run audits
```bash
# Hypervisor full audit
ssh root@10.0.10.201 'bash -s' < scripts/security-audit-remote.sh
ssh root@10.0.10.10 'bash -s' < scripts/security-audit-remote.sh
# Hypervisor SSH-only
ssh root@10.0.10.201 'bash -s' < scripts/security-audit-ssh.sh
# All LXCs on a node
ssh root@10.0.10.201 'bash -s' < scripts/security-audit-lxc-via-pve.sh
ssh root@10.0.10.10 'bash -s' < scripts/security-audit-lxc-via-pve.sh
```
---
## Tracking
| Item | Owner | Status |
|------|-------|--------|
| SSH keys caseware, auto, cal, vikunja, mailcow, listmonk | 2026-05-23 | ☑ |
| Fleet `apt upgrade` (no reboot) | 2026-05-23 | ☑ all previously failed hosts fixed |
| Tier 1 cron (journal + apt) | 2026-05-23 | ☑ PVE + most hosts via Ansible |
| Tier 2 cron (docker prune) | 2026-05-23 | ☑ identity, monitoring, vikunja, git-ci-01 |
| VM 104 RAM 72→64 GiB | 2026-05-23 | ☑ |
| Inventory `vikunja` rename | 2026-05-23 | ☑ |
| Fix `host_vars` ansibleVM / listmonk merge | 2026-05-23 | ☑ plain YAML (review `*.vault-bak`) |
| SSH harden pve201 | | ☐ |
| SSH harden pve10 | | ☐ |
| Restrict 8006 on both nodes | | ☐ |
| fail2ban on hypervisors | | ☐ |
| `make security` on production groups | | ☐ |
| Disable password SSH on all LXCs | | ☐ |
| `copy-ssh-keys` remaining inventory | | ☐ partial |
| TLS / localhost bind for :8080 services | | ☐ |
| unattended-upgrades all production | | ☐ |
| Tailscale re-auth | | ⏸ deferred (UniFi VPN) |
| Fix ZFS NAS.SP00 on pve10 | | ☐ |
| caddy Ansible as root | 2026-05-23 | ☑ |
| vaultwardenVM / ansibleVM become in vault | 2026-05-23 | ☑ |
| Add GPU-Dev `10.0.10.122` to inventory | | ☐ |
| Ollama bind localhost + optional Open WebUI | | ☐ |
---
## Next steps (priority)
1. **`make security`** on one site host (e.g. caseware) with a second SSH session open — disable password SSH, enable UFW + fail2ban (`ignoreip` = LAN + VPN pool).
2. **Restrict Proxmox :8006** to `10.0.10.0/24` + VPN subnet on pve201 and pve10.
3. **Bind internal Docker ports** on identity / monitoring / vaultwarden to `127.0.0.1` after confirming Caddy routes.
4. **GPU-Dev:** point clients at `http://10.0.10.122:11434` over VPN; tune Ollama env vars; add host to inventory when automating.
5. **unattended-upgrades** on production LXCs (reboot policy manual).
6. Review `host_vars/*.vault-bak` and merge any secrets still needed into vault + plain host_vars.
---
## References
- **[Security remediation plan](security-remediation-plan.md)** — phased fixes (critical → low) and login model
- [Security hardening guide](security.md)
- [SECURITY_HARDENING_PLAN.md](../SECURITY_HARDENING_PLAN.md)
- Role defaults: `roles/ssh/defaults/main.yml`

View File

@ -1,490 +0,0 @@
# Security Remediation Plan
**Based on:** [security-audit-report.md](security-audit-report.md) (last re-run **2026-05-23**)
**Goal:** Align hosts with `roles/ssh` (keys only, no password SSH) without locking yourself out.
**Homelab rollout:** [levkin-selfhost-plan-2.md](levkin-selfhost-plan-2.md) — separate track; some overlap (SSH keys, patching).
---
## Progress summary (2026-05-23)
| Phase | Status | Notes |
|-------|--------|--------|
| **0 Backup + prep** | 🟡 Partial | Fleet SSH keys + apt done; formal PVE snapshot checklist not fully ticked |
| **1 Critical** | 🟡 Partial | SSH keys on many hosts ✅; **Proxmox password SSH off** ⏳; **8006 restrict** ⏳; pve201 RAM improved (GPU 64G, DebianDesktop 24G pending reboot) |
| **2 High** | 🟡 Partial | fail2ban / full LXC password-off / port binding — mostly ⏳ |
| **3 Medium** | ⏳ | unattended-upgrades, `make security`, UFW |
| **4 Low** | ⏳ | rpcbind, naming, audit Makefile |
### Completed since original audit (see [security-audit-report.md](security-audit-report.md))
- SSH keys → caseware, auto, cal, vikunja, mailcow, listmonk (root)
- Fleet `apt upgrade` (14 hosts, no reboot)
- Tier 1 cron (journal + apt) on PVE, sites, comms, etc.
- Tier 2 docker prune on identity, monitoring, vikunja
- GPU VM 104 RAM 72→64 GiB on pve201
- Fixed `host_vars` ansibleVM / listmonk (plain YAML)
### Recommended order (security, alongside homelab P0)
1. **Phase 0** — PVE `sshd_config` backup + CT snapshots before any `PasswordAuthentication no`
2. **Phase 1** — pve201 + pve10 SSH keys-only; restrict 8006; finish keys on caddy/ansibleVM/vaultwarden if still pending
3. **Phase 2** — LXC password auth off, fail2ban, patch, reduce exposed app ports (qBit, searchXNG, punimTag)
4. **Phase 34** — unattended-upgrades, Ansible security plays, Mac hardening
---
## How you should log in (not “ladmin → root” everywhere)
Your inventory uses **different users on purpose**. After hardening, the pattern is:
| Host type | Inventory user | How you work | Root access |
|-----------|----------------|--------------|-------------|
| **Proxmox** (`pve201`, `pve10`) | `root` | `ssh root@10.0.10.201` with **your SSH key** | Direct root (keys only, no password) |
| **Dev / QA** (`dev01`, `git-ci-01`, …) | `ladmin` (or `beast`, `master`) | `ssh ladmin@host` with **key** | `sudo` for admin tasks; Ansible `become: true` |
| **Services** (caddy, jellyfin, …) | often `root` | `ssh root@host` with **key** | Direct root (keys only) |
| **Optional bootstrap** | — | `make bootstrap-root-ssh HOST=x` | One-time: key on `ladmin``su` to install **root** key → then harden SSH |
**You do not need** “SSH ladmin then su root” on Proxmox if you keep managing them as `root` in inventory — you need **root + SSH key + passwords disabled**.
**You do** use ladmin → sudo on dev/qa boxes where `ansible_user=ladmin`. That is normal: unprivileged (or sudo) login + elevation, not password guessing on root.
**`PermitRootLogin prohibit-password`** means: root may log in **only with a key**, never with a password. It does **not** mean “ban root; use ladmin only.”
**`PasswordAuthentication no`** means: **nobody** (root, ladmin, etc.) can SSH with a password — keys only.
---
## Phases overview
| Phase | When | Focus |
|-------|------|--------|
| **0 — Backup + prep** | Before any change | Snapshots, `sshd` copies, git commit, keys, second SSH session |
| **1 — Critical** | Week 1 | Proxmox SSH + 8006, keys everywhere, RAM on 201 |
| **2 — High** | Week 12 | LXCs SSH, fail2ban, patching, app ports |
| **3 — Medium** | Week 24 | unattended-upgrades, Ansible `make security`, TLS |
| **4 — Low** | Ongoing | rpcbind, naming, stopped CTs, Mac, docs |
---
## Phase 0 — Backup (before any hardening)
**Yes — back up first.** SSH and firewall mistakes can lock you out; patches can break services. Use the right backup type per layer.
### What to back up (by layer)
| Layer | What | Method | Rollback if SSH breaks |
|-------|------|--------|-------------------------|
| **Your Mac** | Ansible repo + `~/.ansible-vault-pass` (secure copy) + SSH keys | Time Machine / git commit / copy `~/.ssh` | N/A |
| **Proxmox hosts** | `/etc/ssh/sshd_config`, `/etc/pve/`, firewall rules | Copy files + **Proxmox snapshot** optional | **Console** in web UI (`pct enter` / VM console) |
| **Each LXC/VM** | Full guest state | **Proxmox snapshot** or `vzdump` | Restore snapshot or rollback CT |
| **Dev workstations** | OS + home (if Timeshift installed) | `make timeshift-snapshot HOST=dev02` | `make timeshift-restore` |
| **Central PBS** | — | **Not reliable today**`10.0.10.200` unreachable | Fix PBS later; dont depend on it for this work |
### 0A — Mac / repo (5 minutes)
```bash
cd ~/Documents/code/ansible
git status
git add -A && git commit -m "Pre-security-hardening baseline" # if you want a restore point
# Store vault passphrase somewhere safe (password manager), NOT only on disk
# Optional: encrypted copy of ~/.ansible-vault-pass offline
```
### 0B — Proxmox: config files (both nodes)
```bash
for pve in 10.0.10.201 10.0.10.10; do
ssh root@$pve "mkdir -p /root/pre-hardening-$(date +%Y%m%d) && \
cp -a /etc/ssh/sshd_config /root/pre-hardening-$(date +%Y%m%d)/ && \
cp -a /etc/pve /root/pre-hardening-$(date +%Y%m%d)/pve-etc 2>/dev/null; \
ls -la /root/pre-hardening-$(date +%Y%m%d)/"
done
```
### 0C — Proxmox: snapshots (recommended before SSH/firewall on PVE)
**Running LXCs on pve201** (from audit): 301308, 9101 — snapshot each before `pct exec` SSH changes.
**Running LXCs on pve10:** 210, 215, 216.
```bash
# On pve201 — snapshot (fast, local-lvm; needs free space)
ssh root@10.0.10.201 'for id in 301 302 303 304 305 306 307 308 9101; do
name=$(pct list | awk -v i=$id "$1==i {print \$4}")
echo "Snapshot vmid=$id ($name)"
pct snapshot $id pre-ssh-hardening-$(date +%Y%m%d) || echo "FAILED $id"
done'
# On pve10
ssh root@10.0.10.10 'for id in 210 215 216; do
pct snapshot $id pre-ssh-hardening-$(date +%Y%m%d) || echo "FAILED $id"
done'
```
**Optional full backup** (slower, larger) — important CTs only if snapshots fail (low disk on 201):
```bash
vzdump <vmid> --storage local --mode snapshot --compress zstd
```
**Check space on pve201 first** (~2.5 GB RAM + disk — snapshot needs free space on `local-lvm`):
```bash
ssh root@10.0.10.201 'pvesm status; free -h'
```
If snapshots fail for lack of space: do **0B only** on PVE, then harden SSH using **Proxmox console** as safety net (no snapshot).
### 0D — Inventory VMs with Timeshift (`dev` group)
Only where Timeshift is already installed (e.g. `dev02`):
```bash
make timeshift-snapshot HOST=dev02
make timeshift-list HOST=dev02
```
Not used on Proxmox or most LXCs by default.
### 0E — Export current SSH settings (audit trail)
```bash
mkdir -p ~/security-hardening-backup-$(date +%Y%m%d)
ssh root@10.0.10.201 'bash -s' < scripts/security-audit-ssh.sh > ~/security-hardening-backup-$(date +%Y%m%d)/pve201-ssh.txt
ssh root@10.0.10.10 'bash -s' < scripts/security-audit-ssh.sh > ~/security-hardening-backup-$(date +%Y%m%d)/pve10-ssh.txt
ssh root@10.0.10.201 'bash -s' < scripts/security-audit-lxc-via-pve.sh > ~/security-hardening-backup-$(date +%Y%m%d)/pve201-lxc.txt
```
### Backup exit criteria (do not skip)
- [ ] Git commit (or branch) for ansible repo
- [ ] `sshd_config` (+ optional `/etc/pve`) copied on **both** PVE nodes
- [ ] Proxmox snapshots **or** documented reason skipped (disk/RAM)
- [ ] Second SSH session tested to `pve201` / `pve10`
- [ ] You know how to open **Proxmox → VM/CT → Console** if SSH fails
### Rollback quick reference
| Problem | Rollback |
|---------|----------|
| Bad `sshd_config` on PVE | Console → restore `/root/pre-hardening-*/sshd_config``systemctl reload sshd` |
| Bad LXC SSH | `pct rollback <vmid> pre-ssh-hardening-YYYYMMDD` |
| Bad patch on CT | Same snapshot rollback |
| Locked out of LAN on 8006 | Console → disable/datacenter firewall rule |
---
## Phase 0 — Prep (after backups)
| # | Task | Command / notes |
|---|------|----------------|
| 0.1 | Confirm vault password file | `~/.ansible-vault-pass` |
| 0.2 | Bootstrap control node | `make bootstrap` |
| 0.3 | Verify key on Proxmox | `ssh -o BatchMode=yes root@10.0.10.201 true` |
| 0.4 | Copy keys to inventory | `make copy-ssh-keys` (or per group) |
| 0.5 | Document admin IP | e.g. `10.0.10.127` for firewall rules |
| 0.6 | Open **second terminal** before changing `sshd` | Test login before closing first session |
**Exit criteria:** Backups done (above) + key login works to `pve201`, `pve10`, and hosts you will harden next.
---
## Phase 1 — Critical
### 1.1 Proxmox SSH (pve201 + pve10)
**Issue:** `PermitRootLogin yes` + `PasswordAuthentication yes` — password brute force on root.
**Fix (per host, after 0.3):**
```bash
# On pve201 OR pve10 — keep existing session open!
sed -i 's/^#*PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
sshd -t && systemctl reload sshd
```
**Verify (new terminal):** `ssh -o BatchMode=yes root@10.0.10.201 true`
**Ansible (later):** dedicated play for `[proxmox]` with `roles/ssh` (today `make security` only targets `dev` playbook).
| Host | Priority |
|------|----------|
| pve201 | P0 |
| pve10 | P0 |
---
### 1.2 Restrict Proxmox UI/API (port 8006)
**Issue:** Anyone on LAN can hit full cluster API.
**Fix (choose one):**
- **A — Proxmox firewall (recommended):** Datacenter → Firewall → add rule: accept `8006` from `10.0.10.0/24` and/or your Mac IP; drop others.
- **B — SSH tunnel only:** no LAN exposure; `ssh -L 8006:127.0.0.1:8006 root@10.0.10.201` → browser `https://127.0.0.1:8006`.
**Do not** block 8006 globally without A or B in place.
---
### 1.3 RAM on pve201 (~2.5 GB free)
**Issue:** New guests or updates risk OOM.
**Fix:**
```bash
ssh root@10.0.10.201 'free -h; pct list'
# Stop non-essential CTs/VMs or migrate workload to pve10
```
Review running guests from `make proxmox-info ALL=true`; stop labs you do not need.
---
### 1.4 Deploy SSH keys to unreachable inventory hosts
**Issue:** Cannot audit or Ansible-manage hosts without keys.
**Order:**
1. `make copy-ssh-key HOST=caddy` (and each `[services]` host)
2. `make bootstrap-root-ssh HOST=listmonk` where root password still works but key does not
3. `make copy-ssh-keys GROUP=qa` for `ladmin` hosts
**Exit criteria:** `make ping` succeeds for each group you will harden in phase 2.
---
## Phase 2 — High
### 2.1 LXC SSH — disable password auth (all running CTs)
**Issue:** `passwordauthentication yes` on every audited LXC.
**Fix from Proxmox host (no Mac SSH to CT required):**
```bash
# pve201 — example for each running VMID
for id in 301 302 303 304 305 306 307 308 9101; do
pct exec $id -- sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
pct exec $id -- bash -c 'sshd -t && systemctl reload sshd' || pct exec $id -- systemctl reload ssh
done
# pve10
for id in 210 215 216; do
pct exec $id -- sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
pct exec $id -- systemctl reload sshd
done
```
**Before disable:** install your key on CTs you need (`make copy-ssh-key HOST=vikanjans`, etc.).
**Note:** CTs already have `permitrootlogin without-password` — keep that; only turn off passwords.
---
### 2.2 fail2ban on hypervisors
**Issue:** No brute-force protection on SSH (and eventually 8006 if proxied).
```bash
ssh root@10.0.10.201 'apt install -y fail2ban && systemctl enable --now fail2ban'
ssh root@10.0.10.10 'apt install -y fail2ban && systemctl enable --now fail2ban'
```
Optional: extend to high-value LXCs via `roles/monitoring_server` or manual install.
---
### 2.3 Patch backlog
| Target | Pending | Action |
|--------|---------|--------|
| pve201 | ~105 | `apt update && apt upgrade -y` (maintenance window) |
| pve10 | ~92 | same |
| LXCs 303, 306, 307, 9101 | 7989 | `pct exec <id> -- apt update && apt upgrade -y` |
| caseware, auto (pve10) | ~40 | same |
**Order:** hypervisors first (after snapshot), then LXCs one by one.
---
### 2.4 Application ports on `0.0.0.0`
**Issue:** HTTP services exposed on LAN without TLS/auth.
| LXC / host | Port | Fix |
|------------|------|-----|
| qbit (91) | 8080 | Prefer VPN; or Caddy + auth; bind to internal IP |
| searchXNG (70) | 8080 | Same |
| punimTagFE (121) | 8000 | Behind Caddy; firewall allow only 10.0.10.0/24 |
| vaultwarden (142) | 8080 | Already in inventory — reverse proxy + TLS |
| portfolio | **106:80** (pve10 LXC 219, nginx) | Migrated 2026-05-22; pve201 LXC **306 destroyed** |
| vikunja (159) | 3456 | Proxy via Caddy (`todo.levkin.ca`) |
**Pattern:** App listens `127.0.0.1` only; **Caddy** (`10.0.10.50`) terminates TLS for public URLs in inventory.
---
### 2.5 pve10 infrastructure
| Issue | Fix |
|-------|-----|
| ZFS `NAS.SP00` suspended | `zpool status`; import/clear errors |
| PBS 10.0.10.200 unreachable | Fix network/service or remove stale datastore |
| Load ~30 | Identify heavy VMs; migrate or stop |
---
## Phase 3 — Medium
### 3.1 unattended-upgrades
Hypervisors + important LXCs:
```bash
apt install -y unattended-upgrades apt-listchanges
dpkg-reconfigure -plow unattended-upgrades
```
### 3.2 Ansible security roles (by group)
Today `make security` runs `playbooks/development.yml` on **`dev` only**.
**Expand with new/changed playbooks:**
| Group | Playbook idea | Roles |
|-------|---------------|-------|
| `[proxmox]` | `playbooks/infrastructure/proxmox-hardening.yml` | `ssh`, monitoring_server |
| `[services]` | extend `playbooks/servers.yml` | `ssh`, `base`, fail2ban |
| `[qa]` | tag run on qa hosts | `ssh` |
| LXCs | optional `pct` + Ansible over SSH after keys | `ssh` |
**Workflow:**
```bash
make check HOST=pve201 # after proxmox play exists
make dev HOST=dev01 --tags security
```
### 3.3 UFW on LXCs
Only **punimTagFE-dev** has UFW today. Template for others:
- Allow 22 from `10.0.10.0/24`
- Allow app port only if needed on LAN
- Default deny incoming
Use `roles/ssh` UFW tasks or Proxmox guest firewall (`firewall=1` on `net0`).
### 3.4 Align names / inventory
| Proxmox name | Ansible | Action |
|--------------|---------|--------|
| punimTagFE-dev | punimTag-dev | Rename CT or update `app_projects` name |
| vikunja-debian | vikanjans | OK (IP 159) |
| qbit-debian | qBittorrent | OK (IP 91) |
### 3.5 Mac (control machine)
| Issue | Fix |
|-------|-----|
| Firewall off | System Settings → Firewall → On |
| FileVault off | Enable FileVault |
| Docker on `*:3000` | Bind to `127.0.0.1` unless LAN needed |
---
## Phase 4 — Low
| Item | Fix |
|------|-----|
| rpcbind (111) on pve201 / 9101 | Disable if unused: `systemctl disable rpcbind` |
| X11Forwarding on Proxmox | Set `no` in sshd |
| Stopped CTs 9001, 9401 | Leave stopped or destroy if unused |
| `make security-audit` target | Add Makefile → runs audit scripts, appends to report |
| Quarterly re-audit | Re-run `scripts/security-audit-lxc-via-pve.sh` |
---
## Suggested calendar
| Week | Critical | High | Medium |
|------|----------|------|--------|
| **1** | 0.x prep, 1.1 SSH both PVE, 1.2 firewall 8006, 1.4 keys | 2.1 LXC passwords off (after keys), 2.2 fail2ban | — |
| **2** | 1.3 RAM 201 | 2.3 patch PVE + LXCs, 2.4 Caddy for 8080 services | 3.1 unattended-upgrades |
| **3** | — | 2.5 pve10 ZFS/PBS/load | 3.2 Ansible plays for proxmox + services |
| **4** | — | — | 3.3 UFW, 3.4 naming, 3.5 Mac |
---
## Rollback (if locked out of SSH)
- Proxmox: use **console** in web UI (or physical/IPMI) → edit `/etc/ssh/sshd_config``PasswordAuthentication yes` temporarily → reload sshd.
- LXC: `pct enter <vmid>` from PVE host.
---
## Tracking checklist
Also tracked in [security-audit-report.md](security-audit-report.md) remediation table.
**Backup (Phase 0 — before everything)**
- [ ] Git commit / branch for ansible repo (pre-hardening baseline)
- [ ] PVE `sshd_config` backup on 201 + 10
- [ ] Proxmox CT snapshots (or vzdump) on critical LXCs
- [ ] Audit outputs saved locally (`security-hardening-backup-*`)
- [ ] Console access tested in Proxmox UI
### Critical
- [ ] pve201 SSH: `PermitRootLogin prohibit-password` + `PasswordAuthentication no`
- [ ] pve10 SSH: same
- [ ] 8006 restricted to admin subnet/IP
- [x] SSH keys on most inventory hosts (2026-05-23 — see audit report)
- [ ] SSH keys on **caddy**, **ansibleVM**, **vaultwardenVM** (if still pending)
- [x] pve201 RAM partial relief — GPU 64 GiB; DebianDesktop 24 GiB (**reboot guest**)
### High
- [ ] All running LXCs: `PasswordAuthentication no` (after keys verified)
- [ ] fail2ban on pve201 + pve10
- [x] Patch fleet — `apt upgrade` 2026-05-23 (reboots still pending where required)
- [ ] qBit / searchXNG / punimTag / vaultwarden port exposure reduced
- [ ] pve10 ZFS + PBS investigated
### Medium
- [ ] unattended-upgrades on PVE + key LXCs
- [ ] `make security` (or new plays) for proxmox, services, qa
- [ ] UFW on critical LXCs
- [ ] Mac firewall + FileVault
### Low
- [ ] rpcbind, X11, audit Makefile, naming cleanup
---
## Quick reference: your login after plan
```bash
# Proxmox
ssh root@10.0.10.201 # key only
# Dev / QA
ssh ladmin@10.0.10.223 # key only → sudo -i when you need root
# Services (inventory root)
ssh root@10.0.10.50 # key only
# Proxmox UI (if 8006 restricted)
ssh -L 8006:127.0.0.1:8006 root@10.0.10.201
# → https://127.0.0.1:8006
```

View File

@ -1,87 +0,0 @@
# Site LXCs — git deploy (levkin / caseware / auto / portfolio)
## Remotes (correct)
Use **`git.levkin.ca`**, not `10.0.30.169`:
```
git@git.levkin.ca:ilia/levkin.ca.git
git@git.levkin.ca:ilia/caseware.git
git@git.levkin.ca:ilia/auto.git
git@git.levkin.ca:ilia/sdetProfile.git
```
Gitea VM is **`10.0.10.169`** on pve10. Public `git.levkin.ca:22` hits your home IP and is **closed**; git SSH uses LAN IP via `~/.ssh/config`.
## SSH config (on site LXC, as root)
```ssh
# /root/.ssh/config
Host git.levkin.ca
HostName 10.0.10.169
User git
IdentityFile ~/.ssh/id_ed25519
StrictHostKeyChecking accept-new
```
## Deploy keys
Each LXC should use its **own** deploy key in Gitea (**Repo → Settings → Deploy Keys**).
Gitea allows a public key only **once per server** — if you see *“already been added to the server”*, generate a repo-specific key:
```bash
# On portfolio LXC 219 (via pve10)
pct exec 219 -- cat /root/.ssh/id_ed25519_gitea.pub
```
Portfolio uses `~/.ssh/id_ed25519_gitea` in `/root/.ssh/config` for `Host git.levkin.ca` (`IdentitiesOnly yes`).
| LXC | Repo | Key file / comment |
|-----|------|---------------------|
| 215 | caseware | `~/.ssh/id_ed25519``root@caseware` |
| 216 | auto | `~/.ssh/id_ed25519``root@auto` |
| 219 | sdetProfile | `~/.ssh/id_ed25519_gitea``deploy-portfolio-sdetProfile` |
| 220 | levkin.ca | `~/.ssh/id_ed25519_gitea``deploy-levkin-levkin.ca` (add in Gitea UI) or HTTPS clone with read token |
## levkin.ca routes (LXC 220)
| Public URL | Served from |
|------------|-------------|
| `https://levkin.ca/` | `www/index.html` (spec) |
| `https://levkin.ca/folders/` | `www/folders/` (stack-folder) |
Build before push:
```bash
cd ~/Documents/code/levkin.ca
npm run build:www
git add www/ && git commit -m "Rebuild www" && git push
```
On LXC:
```bash
pct exec 220 -- bash -c 'cd /var/www/levkin && git pull origin main'
```
## Push / pull
```bash
# On LXC (via pve10)
pct exec 215 -- bash -c 'cd /var/www/caseware && git pull origin main && git push origin main'
pct exec 216 -- bash -c 'cd /var/www/auto && git pull origin master && git push origin master'
pct exec 219 -- bash -c 'cd /var/www/portfolio && git pull origin master && git push origin master'
pct exec 220 -- bash -c 'cd /var/www/levkin && git pull origin main'
```
After editing `index.html`, commit on the LXC, push, then hard-refresh the public site.
## Gitea VM SSH (git@10.0.10.169)
If deploy keys fail after adding them in the UI:
1. Keys live in `/var/lib/gitea/.ssh/authorized_keys` (regenerated by Gitea).
2. OpenSSH logs in as user **`git`** — copy/sync that file to **`/home/git/.ssh/authorized_keys`** (`chown git:git`, mode `600`).
3. `command=` must run **`gitea serv`** as user **`gitea`** (e.g. `sudo -n -E -u gitea /usr/bin/gitea …`) with `SSH_ORIGINAL_COMMAND` preserved in sudoers.
Portfolio uses repo path **`ilia/sdetprofile`** (lowercase on disk).

View File

@ -1,51 +0,0 @@
# Self-hosted SSO readiness (Authentik)
Which apps can use Authentik OIDC/SAML without a paid app license.
## Cal.com — blocked (commercial)
**Status:** Deferred until a valid **self-hosted enterprise license** is in place.
The Cal UI at `/settings/security/sso` shows *"This is a commercial feature"* when `CALCOM_LICENSE_KEY` is missing or invalid. On LXC 210, the key in `/opt/cal/.env` is currently **empty** (length 0), so SSO cannot be configured in-app.
**If you want native Cal OIDC later:**
1. Purchase / obtain a self-hosted license from [Cal.com](https://cal.com) (sales or existing license).
2. Set in `/opt/cal/.env`:
```bash
CALCOM_LICENSE_KEY=<your-key>
NEXT_PUBLIC_LICENSE_CONSENT=agree
```
3. `cd /opt/cal && docker compose up -d` (compose already passes these vars).
4. Complete [cal-authentik-oidc.md](cal-authentik-oidc.md) — Authentik app `cal-com` is already provisioned.
**Workaround without paying Cal:** use **local Cal password** for admin; public booking at `cal.levkin.ca/ilia/consult` stays open. Optional later: **Caddy + Authentik forward-auth** only on `/settings/*` and `/auth/*` (does not integrate Cals “Login with SSO” button; more ops complexity). Not recommended until license path is ruled out.
**Infra already done (harmless to keep):** `calsaml` DB, `SAML_*` env vars, Authentik provider `cal-com-oidc`.
---
## Phase 4 order (no Cal license required)
Wire these first — typical OSS OIDC, no extra license:
| App | OIDC/SAML | Notes |
|-----|-----------|--------|
| **Vikunja** | OIDC native | **Live** — [vikunja-authentik-oidc.md](vikunja-authentik-oidc.md); group `homelab-users` |
| **Listmonk** | OIDC native | Admin-only |
| **Mattermost** | OIDC or SAML | Moderate |
| **Mailcow** | OIDC | Last — mail-critical |
| **Umami** | — | Already LAN-only; no SSO needed |
| **Vaultwarden** | — | **Do not OIDC** (break-glass) |
| **n8n** | OIDC (if enabled) | Check edition |
| **Immich** | OIDC | Phase 5; usually free in self-host |
| **Outline** | OIDC/SAML | Phase 8 |
**Unlikely to need a commercial license** for homelab SSO on the list above; always check each apps docs before assuming.
---
## Related
- [cal-authentik-oidc.md](cal-authentik-oidc.md)
- [levkin-selfhost-plan-2.md](levkin-selfhost-plan-2.md)

View File

@ -1,97 +0,0 @@
# UniFi static DHCP (10.0.10.x homelab)
**Controller:** https://192.168.2.1/
**Goal:** Pin Proxmox VM MAC addresses to stable `10.0.10.x` addresses so Caddy and Ansible inventory do not drift.
LXCs on pve10 (**210, 215220**) are already static via `pct set`**no UniFi lease needed** for those rows.
This guide is for **VMs** (and pve201 guests) that still use DHCP.
---
## Before you start
1. Confirm guests get addresses on **`10.0.10.0/24`** (not only `192.168.2.x`). In UniFi, open the network that faces Proxmox `vmbr0`.
2. Gateway for homelab guests should be **`10.0.10.1`** (or your routers IP on that VLAN).
3. Use the MAC table in [vm-static-ip-router-reservations.md](vm-static-ip-router-reservations.md).
---
## Method A — From a connected client (easiest)
1. Open **https://192.168.2.1/** and sign in.
2. Go to **Clients** (or **UniFi Devices****Clients**).
3. Find the device (hostname like `gitea`, `vaultwarden`, or MAC from Proxmox `qm config <vmid>`).
4. Click the client → **Settings** (gear) or **⋮**.
5. Enable **Fixed IP** / **Use fixed IP address**.
6. Set IP to the target from the table (e.g. `10.0.10.169` for gitea).
7. **Apply** / **Save**.
8. On the VM: renew DHCP or reboot:
```bash
sudo dhclient -r && sudo dhclient
# or: reboot
```
9. Verify: `ip -4 addr show` shows the reserved IP.
---
## Method B — DHCP static mapping (manual MAC)
1. **Settings****Networks**.
2. Open the LAN/VLAN that serves **10.0.10.x** (name varies: `Default`, `Homelab`, `10.0.10`).
3. **DHCP** section → **DHCP Static IP** / **Static leases****Create new**.
4. Enter:
- **MAC address** (from Proxmox, e.g. `BC:24:11:E9:BD:E5`)
- **IP address** (e.g. `10.0.10.169`)
- **Name** (optional, e.g. `giteaVM`)
5. Save. Repeat for each row in the reservations table.
6. Renew DHCP on each VM or reboot.
---
## Already static (skip UniFi DHCP)
| VMID | Name | IP | How |
|------|------|-----|-----|
| 210 | cal | 10.0.10.228 | `pct set` |
| 215 | caseware | 10.0.10.105 | `pct set` |
| 216 | auto | 10.0.10.59 | `pct set` |
| 217 | identity | 10.0.10.21 | `pct set` |
| 218 | monitoring | 10.0.10.22 | `pct set` |
| 219 | portfolio | 10.0.10.106 | `pct set` (`iliadobkin.com`) |
| 220 | levkin | 10.0.10.60 | `pct set` (`levkin.ca`) |
| 106 | caddy | 10.0.10.50 | static in `/etc/network/interfaces` |
---
## Priority order — UniFi reservations (VMs / pve201)
| Order | Guest | IP | MAC | Notes |
| ----- | ----- | --- | --- | ----- |
| 1 | giteaVM | 10.0.10.169 | BC:24:11:E9:BD:E5 | |
| 2 | vaultwardenVM | 10.0.10.142 | BC:24:11:58:DB:DC | |
| 3 | n8n (WRA) | 10.0.10.154 | BC:24:11:61:DE:7A | |
| 4 | hermes | 10.0.10.36 | BC:24:11:51:1E:99 | |
| 5 | actual | 10.0.10.158 | BC:24:11:10:7B:64 | |
| 6 | jellyfin | 10.0.10.232 | BC:24:11:29:B8:84 | stopped until NAS OK |
| 7 | listmonk (pve201 VM 113) | 10.0.10.148 | BC:24:11:11:53:9A | |
| 8 | Mailcow (pve201) | 10.0.10.132 | BC:24:11:34:75:2D | |
| 9 | TrueNAS | 10.0.10.107 | BC:24:11:14:DE:B5 | optional pin |
| 10 | PVE.BU.SVR | 10.0.10.200 | BC:24:11:DA:95:3B | lab VM |
Full MAC table: [vm-static-ip-router-reservations.md](vm-static-ip-router-reservations.md).
---
## If you only see 192.168.2.x in UniFi
Your Mac may be on `192.168.2.0/24` while Proxmox guests use a separate **`10.0.10.0/24`** network. In that case:
- Add or edit a UniFi network/VLAN for `10.0.10.0/24`, or
- Ensure the router bridges/routes between `192.168.2.x` and `10.0.10.x`, and
- Put DHCP reservations on the network that actually serves the Proxmox bridge.
---
## After reservations
Mark `✅ router` in [host-list.md](host-list.md) for each guest.

View File

@ -1,51 +0,0 @@
# Vikunja ↔ Authentik OIDC
**Status:** Live at `https://todo.levkin.ca` (host `vikunja`, `10.0.10.159`).
## Authentik
| Item | Value |
|------|--------|
| Application slug | `vikunja` |
| Redirect URI (strict) | `https://todo.levkin.ca/auth/openid/authentik` |
| Subject mode | **Based on the User's username** (`user_username`) |
| Access group | **`homelab-users`** (bind to app; policy engine **ANY**) |
| Authentik user | Purpose | Email |
|----------------|---------|--------|
| **`admin`** | Authentik admin UI only | `admin@levkin.ca` |
| **`ilia`** | Homelab apps (Vikunja, etc.) | `idobkin@gmail.com` |
**Do not use the same email on both users** — Authentik will pick the wrong account.
`homelab-users` group = **`ilia`** only. Vikunja app binding: group `homelab-users` (policy engine **ANY**).
Secrets: `vault_vikunja_oidc_client_id`, `vault_vikunja_oidc_client_secret` in Ansible vault.
## Vikunja
Config: `/opt/vikunja/config.yml` (mounted in `docker-compose.yml`).
- `auth.openid.providers.authentik``authurl: https://auth.levkin.ca/application/o/vikunja/`
- `usernamefallback: true` + `emailfallback: true` → SSO links to existing local user **`ilia`** when Authentik username is `ilia`.
Local auth stays enabled for break-glass.
## Login
1. Sign out: `https://auth.levkin.ca/if/user/logout/`
2. `https://todo.levkin.ca` → **Login with Authentik**
3. Sign in as **`ilia`** (username) or **`idobkin@gmail.com`** — **not** `admin`
**My applications:** `admin` only sees apps allowed for superuser (e.g. Cal). **`ilia`** sees Vikunja after login.
## Adding users
1. **Directory → Users** — create user (username should match Vikunja local username if linking).
2. **Directory → Groups → homelab-users** — add user.
3. New Vikunja users: first OIDC login creates account; existing local users need matching username + fallbacks.
## Related
- [sso-selfhosted-matrix.md](sso-selfhosted-matrix.md)
- [Authentik Vikunja integration](https://integrations.goauthentik.io/chat-communication-collaboration/vikunja/)

View File

@ -1,38 +0,0 @@
# VM static IPs — router DHCP reservations (pve10)
Proxmox **LXCs** use `pct set … ip=10.0.10.X/24` (done for 210, 215219).
**VMs** without cloud-init are pinned by **router DHCP reservation by MAC** (Method B in plan-2).
Ansible **cannot log into your router** — configure static leases in the UI.
**Your UniFi:** https://192.168.2.1/ — step-by-step: [unifi-static-dhcp.md](unifi-static-dhcp.md).
Homelab guests use **`10.0.10.0/24`** (gateway `10.0.10.1`). If UniFi also serves `192.168.2.x`, ensure the `10.0.10.x` segment is the network those VMs/LXCs use
(or that routing/DHCP relay matches your Proxmox bridge).
## How to add a reservation (any router)
1. Open router admin (UniFi: **https://192.168.2.1/**).
2. Find **DHCP** / **LAN** / **Static leases** / **Reserved addresses**.
3. For each row: **MAC address****IP address** → Save.
4. Reboot guest or renew DHCP (`dhclient -r && dhclient` on Debian) if IP does not update immediately.
5. Mark done in [host-list.md](host-list.md).
| VMID | Name | MAC | Reserve IP | Inventory |
| ---- | ---- | --- | ---------- | --------- |
| 102 | gitea-alpine | `BC:24:11:E9:BD:E5` | `10.0.10.169` | giteaVM |
| 103 | WRA / n8n | `BC:24:11:61:DE:7A` | `10.0.10.154` | n8n |
| 104 | vaultwarden | `BC:24:11:58:DB:DC` | `10.0.10.142` | vaultwardenVM |
| 105 | TrueNAS | `BC:24:11:14:DE:B5` | `10.0.10.107` | — |
| 106 | caddy | `BC:24:11:E0:49:B4` | `10.0.10.50` | ✅ static in-guest |
| 108 | actual | `BC:24:11:10:7B:64` | `10.0.10.158` | actual |
| 117 | hermes | `BC:24:11:51:1E:99` | `10.0.10.36` | hermes (guest agent on) |
| 200 | PVE.BU.SVR | `BC:24:11:DA:95:3B` | `10.0.10.200` | — |
| 201 | NextcloudAIO | `BC:24:11:14:D4:DE` | `10.0.10.24` | **decommission** — skip new work |
| 101 | Jellyfin | `BC:24:11:29:B8:84` | `10.0.10.232` | stopped |
| 113 | listmonk (pve201) | `BC:24:11:11:53:9A` | `10.0.10.148` | listmonk |
| — | Mailcow (pve201 VM 106) | `BC:24:11:34:75:2D` | `10.0.10.132` | mailcow (inventory) |
After reserving in the router, mark **DHCP/Static** as `✅ router` in [host-list.md](host-list.md).
In-guest static (optional, stronger): SSH as root and set `/etc/network/interfaces` like caddy VM 106.

View File

@ -1,15 +0,0 @@
---
# Mailcow mailbox definitions (passwords live in vault only).
# Create: make mailcow-mailbox MAILBOX=<key>
# Add a new key under mailcow_mailboxes + vault_mailcow_mailbox_passwords.<key>
mailcow_url: "https://mail.levkine.ca"
mailcow_domain: "levkine.ca"
mailcow_mailboxes:
alerts:
local_part: alerts
name: Monitoring Alerts
quota: 1024
vault_password_key: alerts
mailcow_api_key: "{{ vault_mailcow_api_key | default('') }}"

View File

@ -26,10 +26,6 @@ maintenance_pre_reboot_delay: 5 # Delay before reboot in seconds
# Default Tailscale settings - these tell the playbook to use your vault key
tailscale_auth_key: "{{ vault_tailscale_auth_key | default('') }}"
# Mailcow — API key + per-mailbox passwords in vault; definitions in group_vars/all/mailcow.yml
mailcow_api_key: "{{ vault_mailcow_api_key | default('') }}"
mailcow_mailbox_passwords: "{{ vault_mailcow_mailbox_passwords | default({}) }}"
tailscale_accept_routes: true
tailscale_accept_dns: true
tailscale_ssh: false
@ -104,7 +100,8 @@ app_frontend_start_cmd: "npm start"
# Proxmox IDs are global. Never reuse IDs across unrelated guests.
# Suggested reservation table (edit to your preference):
# - 9000-9099: pote
# - 9100-9199: punimTag (monorepo)
# - 9100-9199: punimTagFE
# - 9200-9299: punimTagBE
# - 9300-9399: projectA (example)
# -----------------------------------------------------------------------------
app_projects:
@ -208,13 +205,59 @@ app_projects:
gateway: "10.0.10.1"
branch: "main"
punimTag:
description: "punimTag monorepo (frontend + backend, edit repo_url, IPs, secrets)."
repo_url: "git@github.com:example/punimTag.git"
punimTagFE:
description: "punimTag frontend-only project (edit repo_url, IPs, secrets)."
repo_url: "git@github.com:example/punimTagFE.git"
repo_dest: "/srv/app"
components:
backend: false
frontend: true
guest_defaults:
guest_type: "{{ proxmox_guest_type }}"
cores: 2
memory_mb: 2048
swap_mb: 512
rootfs_size_gb: 16
deploy:
frontend_install_cmd: "{{ app_frontend_install_cmd }}"
frontend_build_cmd: "{{ app_frontend_build_cmd }}"
frontend_start_cmd: "{{ app_frontend_start_cmd }}"
envs:
dev:
name: "punimTagFE-dev"
vmid: 9101
ip: "10.0.10.121/24"
gateway: "10.0.10.1"
branch: "dev"
env_vars:
APP_ENV: "dev"
SECRET_PLACEHOLDER: "change-me"
qa:
name: "punimTagFE-qa"
vmid: 9102
ip: "10.0.10.122/24"
gateway: "10.0.10.1"
branch: "qa"
env_vars:
APP_ENV: "qa"
SECRET_PLACEHOLDER: "change-me"
prod:
name: "punimTagFE-prod"
vmid: 9103
ip: "10.0.10.123/24"
gateway: "10.0.10.1"
branch: "main"
env_vars:
APP_ENV: "prod"
SECRET_PLACEHOLDER: "change-me"
punimTagBE:
description: "punimTag backend-only project (edit repo_url, IPs, secrets)."
repo_url: "git@github.com:example/punimTagBE.git"
repo_dest: "/srv/app"
components:
backend: true
frontend: true
frontend: false
guest_defaults:
guest_type: "{{ proxmox_guest_type }}"
cores: 2
@ -225,58 +268,34 @@ app_projects:
backend_install_cmd: "{{ app_backend_install_cmd }}"
backend_migrate_cmd: "{{ app_backend_migrate_cmd }}"
backend_start_cmd: "{{ app_backend_start_cmd }}"
frontend_install_cmd: "{{ app_frontend_install_cmd }}"
frontend_build_cmd: "{{ app_frontend_build_cmd }}"
frontend_start_cmd: "{{ app_frontend_start_cmd }}"
envs:
dev:
name: "punimTag-dev"
vmid: 9101
ip: "10.0.10.121/24"
name: "punimTagBE-dev"
vmid: 9201
ip: "10.0.10.131/24"
gateway: "10.0.10.1"
branch: "dev"
env_vars:
APP_ENV: "dev"
NODE_ENV: "production"
DATABASE_HOST: "10.0.10.181"
DATABASE_PORT: "5432"
DATABASE_URL: "{{ vault_punimtag_database_url_dev | default('postgresql://punimtag_dev_user:CHANGE_ME@10.0.10.181:5432/punimtag_dev') }}"
BACKEND_PORT: "{{ app_backend_port }}"
FRONTEND_PORT: "{{ app_frontend_port }}"
BACKEND_BASE_URL: "http://10.0.10.121:{{ app_backend_port }}"
FRONTEND_BASE_URL: "http://10.0.10.121:{{ app_frontend_port }}"
SECRET_PLACEHOLDER: "change-me"
qa:
name: "punimTag-qa"
vmid: 9102
ip: "10.0.10.122/24"
name: "punimTagBE-qa"
vmid: 9202
ip: "10.0.10.132/24"
gateway: "10.0.10.1"
branch: "qa"
env_vars:
APP_ENV: "qa"
NODE_ENV: "production"
DATABASE_HOST: "10.0.10.181"
DATABASE_PORT: "5432"
DATABASE_URL: "{{ vault_punimtag_database_url_qa | default('postgresql://punimtag_qa_user:CHANGE_ME@10.0.10.181:5432/punimtag_qa') }}"
BACKEND_PORT: "{{ app_backend_port }}"
FRONTEND_PORT: "{{ app_frontend_port }}"
BACKEND_BASE_URL: "http://10.0.10.122:{{ app_backend_port }}"
FRONTEND_BASE_URL: "http://10.0.10.122:{{ app_frontend_port }}"
SECRET_PLACEHOLDER: "change-me"
prod:
name: "punimTag-prod"
vmid: 9103
ip: "10.0.10.123/24"
name: "punimTagBE-prod"
vmid: 9203
ip: "10.0.10.133/24"
gateway: "10.0.10.1"
branch: "main"
env_vars:
APP_ENV: "prod"
NODE_ENV: "production"
DATABASE_HOST: "10.0.10.181"
DATABASE_PORT: "5432"
DATABASE_URL: "{{ vault_punimtag_database_url_prod | default('postgresql://punimtag_prod_user:CHANGE_ME@10.0.10.181:5432/punimtag_prod') }}"
BACKEND_PORT: "{{ app_backend_port }}"
FRONTEND_PORT: "{{ app_frontend_port }}"
BACKEND_BASE_URL: "http://10.0.10.123:{{ app_backend_port }}"
FRONTEND_BASE_URL: "http://10.0.10.123:{{ app_frontend_port }}"
SECRET_PLACEHOLDER: "change-me"
mirrormatch:
description: "Mirrormatch Prisma/Node backend (dev/qa/prod)."

View File

@ -22,44 +22,6 @@ vault_ssh_public_key: "ssh-ed25519 AAAA... you@example"
# LXC create bootstrap password (often required by Proxmox)
vault_lxc_root_password: "CHANGE_ME"
# Ansible become (sudo) for VMs that use ladmin/master instead of root SSH
vault_vaultwarden_become_password: "{{ vault_lxc_root_password }}"
vault_ansiblevm_become_password: "{{ vault_lxc_root_password }}"
# Mailcow API — System → Configuration → Access → API (read/write)
vault_mailcow_api_key: "CHANGE_ME"
# Per-mailbox passwords (make mailcow-mailbox MAILBOX=<key>)
vault_mailcow_mailbox_passwords:
alerts: "CHANGE_ME"
# Legacy alias (optional)
vault_alerts_mailbox_password: "CHANGE_ME"
# Uptime Kuma + SMTP (monitoring LXC)
vault_uptime_kuma_url: "http://10.0.10.22:3001"
vault_uptime_kuma_user: "admin"
vault_uptime_kuma_password: "CHANGE_ME"
vault_kuma_smtp_host: "mail.levkine.ca"
vault_kuma_smtp_port: "587"
vault_kuma_smtp_user: "alerts@levkine.ca"
vault_kuma_smtp_password: "CHANGE_ME"
vault_kuma_smtp_to: "idobkin@gmail.com"
# Umami (monitoring LXC /opt/monitoring/.env)
vault_umami_db_password: "CHANGE_ME"
vault_umami_app_secret: "CHANGE_ME"
# Cal.com ↔ Authentik OIDC (make cal-oidc)
vault_cal_oidc_client_secret: "CHANGE_ME"
# Vikunja ↔ Authentik OIDC
vault_vikunja_oidc_client_id: "CHANGE_ME"
vault_vikunja_oidc_client_secret: "CHANGE_ME"
# Hermes Mattermost (not Telegram)
vault_mattermost_url: "https://slack.levkin.ca"
vault_mattermost_token: "CHANGE_ME"
vault_mattermost_allowed_users: "CHANGE_ME"
# -----------------------------------------------------------------------------
# POTE (python/venv + cron) secrets
# -----------------------------------------------------------------------------
@ -108,18 +70,4 @@ vault_mirrormatch_smtp_user: "smtp-user"
vault_mirrormatch_smtp_password: "CHANGE_ME"
vault_mirrormatch_smtp_from: "MirrorMatch <noreply@mirrormatch.com>"
# -----------------------------------------------------------------------------
# punimTag (monorepo) secrets
# -----------------------------------------------------------------------------
# Optional deploy key for private repo access
vault_punimtag_git_ssh_key: |
-----BEGIN OPENSSH PRIVATE KEY-----
CHANGE_ME
-----END OPENSSH PRIVATE KEY-----
# Per-environment database URLs (use external Postgres at 10.0.10.181:5432)
vault_punimtag_database_url_dev: "postgresql://punimtag_dev_user:CHANGE_ME@10.0.10.181:5432/punimtag_dev"
vault_punimtag_database_url_qa: "postgresql://punimtag_qa_user:CHANGE_ME@10.0.10.181:5432/punimtag_qa"
vault_punimtag_database_url_prod: "postgresql://punimtag_prod_user:CHANGE_ME@10.0.10.181:5432/punimtag_prod"

View File

@ -1,142 +1,100 @@
$ANSIBLE_VAULT;1.1;AES256
62616334383737633962313839313235653935663832623061333532616566343565626437376230
3333393831623434663736656331303462626534626265380a356135653866666438373838663137
61373962356364306365323933386262613837333364356564383163383638363430323230393430
3032346238343264340a636539663735396335313135363330373536353562666537653764643637
36663437366166616437303738646466656331313266653431303462366532616639323136346137
66663932346561333535303438623734643864613330396331626161616265393731633365393930
37326565363931386532623432343339656534393032663634353961306330303737313765333330
65316436383030666564663537323937666634343966653562353434333537366338393838333666
30356339353732623932393665663237343630303533363232336263323732376461353338663831
62666365333330353361373732306436623637623932636235393434323339663266396631346237
63393762643338346563643637666135336139336461333537373137626464613339373937383830
39643039363234346134663062373130343230663839613234373838393434373532313732656332
63373739616163666361666330393866396331616136383565383763303563323261323330313832
64386661383838366336633335323431356133366162373464313533653734613366623537646636
65323862376466343530303439396639616135373030613638363630313264623337653233636532
38383664613337303565336136333434613638663239393234656534353264623166333837376436
36633837613339613161363764383538303363323232346636313862393930343333633131383833
65316166363062363330373734323232366136653030336439343932613337623662383236663834
66303137353438373661633537633333633733666663393435383436396634393739383039383139
32653438303134326663653164633039653435643766616637313433623463366531633962613434
33396262333739643865346465363862303337356239663337356330363232383331346435393930
64306633363064656566346662363433313434376631663032343635656463313530626635623930
65383434663064666535613561313265616436326533313336303836386635343134626361343566
36653233656337613838323164376666656338383337633065393237373737623934626265343133
38393763316132373234623735353731656261643736353562616361643033303064393962343239
32623363653466363565323436643639643934663530646333356532363463363564363862373232
38396535393034653565643236363733393032306335363934623462386639363961306265646636
32383738653633613732313030626135353366626537646263303634323539343866363033643337
66396235323461666131643030353164616265623635636438363738653233363435353761366531
66623033656331386138623864363461333933653636653566303733616137303030663430643535
64386534346463393638613764353966343837333235623262343164326564616138353731363663
39666634323663373831326664326337656164323738326335663734373538303135653861393362
65303865366235333538623330373032306661386436323530336631616639366636376135303537
31373634636561356239366437623637363735653633316634353862666139303565393533643864
64656335356236353232303135616265666266376634313437633236666461343233333732323832
61313230393162383163336634303066613664376338633964643431346335616533396466393736
31383862666365633665623766643665623361376565386531323234303236393162356331346535
65353231316531326438343237633133393361336366353232623866393138376232643133326161
63333236626237613536323964356435383933646264656137623632343665393530343463343230
61343464383230366339616439343762626435303832393462666463363030383365343938666264
33636437333266656130633365666162316366616262386436333861373533343433356633356630
31643666626262386535626233653337303861666666653366643361363164353430643561613532
32373239373038306533393464373365323638653630306630363931623931336663666339356464
64646634356437326435656163306562346530363435336138353330356162333431353466313763
64666538666332653762633064653664663531373638393530653034323864383938346631303165
62343163636366633161383464626639633638323363306139626632343836646135346332393235
62376536316164636631626639656533323337366335616534356538386266343436613530653131
36383733373637303864636334633237663331623663663562613261393736323137373130613537
61353431396139663861366639616631613064323230366131373666373964393738623936393431
62366530623938373836636265393233663661663664613430366237396637366561616433333463
64623335303834376432383361396433373537633066333937663633663433333339343262363338
38633532366334313164346236646665663363623065666331613961653639313563316563383231
64343834373066316233336465366634306537303666383831306237396362366663343430353162
35643638653234396134653638653663333765313236313764383835343431303134383537313237
35626563376163643336623534633236313363383062373437666536306462383632626332643430
39326661633134393465363333356136323361363831363961646230393561663838313935386432
66653430613231343731623630313362366138613465373631653632303139636438656439633361
31326262313431363536633434346431626336623139333235363338626435666439616433323931
30386238663931393066353237616537366434363536306163613931306138653364623663666438
63396331313438623662393532333834376237343462313263626139366133353131313164613861
38313632386336646362313634633938383963306339383362633236653235353061626337353936
32626464343166323438616637346661633861396264633365386638666538333932633530306139
64393132613562353835323162346532313262353266366230393839323462626362353533323834
35393261373039336537623339613463613335363362353438623837376631646233653362383636
31613261323361623934653939613661333836666637383534643137346261353333303861363665
62386237646661626536363034313833363965373562316334336232643164633436333261303730
39386233646162323365393034663137636462316432333335313366363933633065323264646136
64326338303766613230393539626430326263646631646536623436643734376237373031316466
62306136373465633130653564633233356331313761366333623363646666313365623563346334
31666535346461336630626466616664363330626338333961386239333663326536316266346634
33323064333161313232343239346439623633346161346465313532383061376137323839666365
64376132306338663565623531623136663436333730366563313261626661373233393438646561
61396562666533316635363432306139366430333837333866346436306135333862663734306164
38313766666230393861323632316231343362656136366338336564623431373662333366323833
33613232326230643530356137646635623030316663343466666666333734636230346263353365
65363637356635666638613566613131383864316465666536336333393334653436666261393461
64376639303632626165613361346636303064333532613064303032643562396262623632396539
61623333643630616536393163623330353164383864623064383732633733353630323534663732
36383133633066393263363533616334653933336235333938663132366334326234386264386531
62373466393234666563613637313136663764376239666434383038613932376532653531613164
34613834313532383165336634613536636437393638653964393831393533303630333933636464
31613634346235396331386534303636313066396361393138393635633134353035613863656364
63363030376662356636333566373063613433373330383139396530316163303633656438326333
65353435613561303539326538613261393339616537373136313030656133323766396464646634
34363061366166303465616133663835323232373763336634386231396230383965306164393731
39633333353936666361656530363665383039626533333035373663326537373538633864626366
64613435663834666137326335333736376466356236353637333262373834336131393733646138
31363630626432643061663538626230356637373863643866326530373962393065393464663466
37326165653235653166386561363339353331663164326639616135663736316363336531333439
35363033343934323063613133326264313665613363386464303662633333646330373637636366
37396562303164636261323633373538323835623235396161303964373735356538393431303031
64663636326364386266306434343361353439616533303632363165376639313635663637623263
37376233323233663364393439663137396265646230613631383039316230356539316130353062
33303732323063633738666636623737366631336164396637396533656364316333616536336632
34303963623031353137626331623031326136373538633336633835623337303831616365323066
35333931393136393965623135626363393335306363366639323034633064663035613566313037
38616234666131343064633561326466326365643863653664623932333734643332383963356665
64326435643333333435636665383165386364663134613564386639346566353831343239646239
35376338386631646236303031303665336166643437316131626438646237663331306438666130
32323539393431303039393964363161633461303136616430666539393162633464623436656638
61363736363665633965656362643432376266393531333539633737343165313562616133366131
34346266323931363137303463666363336163373839306533393831323262313861393333643336
37626239366432393461613630366636366631353237396461663566333935343037336438626262
33306264613065373638373634303262626338386236386533616563633131366665663738353837
33333936306266633965613338393662656161613465396163653438306463303138656536366531
66343634306332313561386531373663343535343232646162396361626666633034663133613364
65323536346264636164616463626535353261396362633736376531666334346537666562363339
39653430386565313731346230653632613830396165313561333865333234656532383339313065
30663565393030343134383536336335616537333336396232333839373533353161623264626434
61393334316331613739666434653839353933336332396536313937323939646264313133373863
66643138656661336264646338376232396138616465373562393063333336343036326632306662
66303836326636663264633334356533613066383935316635313236633631376633613535303830
33646566346661346539633638363135343939653363623232313864613132393235643961633566
34653464303430313466326463346563363964363666623665633265356138336133616261333839
37343036363065613766366565343765306663623037383933323230646566333935306564343039
30343730633135643338366262376365326561353538346433636336633866393565326334326431
64613136353139316331343333643564343534643931313164323934373465386437376637613838
65333237386462666262326663316639393961363033656233356330666634366633373336326531
33303535323036663837363537366436653930353637353962393464373361323166663031343532
37383735613334316434356232343466373539666562326430656538653634323361363236313030
34313537633433666333356661383838663861613765383564633835333437363330616163316432
30323762656230323035663139323363346235633337346637663632383762393363396632613631
35303161383263613164303535633063346432643563363436306665613738346338666336646530
36383639353032636133353438396362333763623164376338653564616465303538646432353763
66663262636661363465326463326639613431643065623966373630323161356565326362646635
32633335303339633232396166393235643462356565323236356539653033363663333262386235
36316432386165366530323737353862393263343063343138343334343966313838336639646463
36303137323961626561343238323634373830323161303365306465373036323262663835376630
34376662356238643939613536383432393464656530326530333262356162623531636364363662
65613166383563333237376135656362306362366434346565366235626532623964303661626632
61646462633533663830613436633937336364643562653362616464636130343264666233333932
37323736316539636336633163643166333231376464656462666364303761313962366635663336
31353738396532616137333033313362393830663434323236313031623863643735323838646561
34633065623764333734353166323234633538363230633865353764333663613239306664386232
65306661333939336634343535393261326335663163663431633630373936336465623634376362
31393231313435306564333234633938353336366239646637366162343065366261303538613962
36323065663362383538633536393161653332383035336236363364373133326366366130626135
34346237366338663962643966613363336165633765663137653930323731393235616137613364
37623462396333376263326364363166613831396161393933623532346637326262616434636265
64323336626663303131323331376330393232666233626662363264616533646462323233333633
3535
38316537376634623462313731323238666165383731656632373665653534623163386333303865
3865383030316132663831303932376437346335323233630a643331663539383163306666393764
38313265656561343839616565343663353037663237663032366632373831363336306632626266
3361643865333533340a356233663034343932323831323236356161396237346532323838373135
33393239313730363336613338373039663735323431323562613363343863326234633833663631
66343462623231663932633537373361313764393630356666393662653135356139663935613038
65383261363065633235343031346535373564373931373063386265343335623265653739613830
32656233393330633362623932316431383761306332393466313936396533333839313831663331
34353864356336303331663233653666363966376162303731626134313235306238323363303439
32333039653235326632303637303065386161616138356463623561366637376366326262303166
38323763393934666539373063323265333961666164613437316164633565393035626538353365
33386562336665383863636639643232623161643933313664396534383362303838663362653736
64393334616165336638306235363734653431646431616139373336656333623963386538646230
39663230363063386231343730663162313463666135323265613261626637626332353534396535
31623664363766646332396336396133613662643232366433323330373962633839613635333763
63306230623438346639323863353137363330316630316130326134323731326635643736373736
62336362656265633233623165376436373231656666303832373966353732313031623865316663
63356163636238346230623732326232646434623532633439646536656362393162613535613565
66616539316362376561386263373464623030636661663435383839643565393632616232663035
34653735383964653930633664346330386566343830336238306562343164366131643138643339
35313366356637643262636238366263353535306434633732623335643266396335666636666663
37333232393765306433326164663538663839623034373535653737633366303665633831303334
32303061363863386139613464326466336136396534663538643163343439343763383534306636
62353733613330376163386331626463656462336237656339356132643135363537343638303261
33366332653439313137613665386136666536356537346665333935366336623734393738346434
63326265346362636564366265373134336662626332653464646139656635313961656230336537
63666638326337643033363964643339666130386139363138656165666333356465643337396165
30336330633632353231613938646165383966613863366330646162646266346139343434393865
66346365663230626531643963383462636465363965393762336233366538393133313138616335
32353834313762363265643031343237633732393166343139363163326439666162396332353038
31306530626666343361313736313636613335376163383237303063393333386663333333336137
37346166316231623638386635613230663063653037643930333961316434643361633035633734
65643937636361653433383262643265373165613437336236633631323635613034663834646665
30373730373438613132633932333565376665333565383932356334653738646166393934626362
30666666303832613633316230623038343165396338343535663931383639623430643238656261
39623037333063306266323335303736346236636137633863353866343136346335353865303961
31346331333066376330306361396262333762393838303165383134303435353630366130303536
34386532356239326166386665623435646432636561363564656161646563306234333138333839
38316337656631313763393135396464643338386636336234346663653538353863643636323032
35326133623064363838386662653138613438386564316635373838366262656364666633636539
61306563666138656161336466323537626161313366616662623362643036636132663634313137
39653437306662646162613763343736636530356465346132646238633166373838353836326461
36326666323636353239303262623436643932353164323630326635653635653233363265316264
30653763643431626539356161376534396437636463303363663134373961616561363561333333
34306537326666383664336464656464623731656566653132613565336536323438666333366466
64613738653730333633383062653837366266316536653139643362373039383831363666333934
34383833336266356436666636323239336432386133303466636138643934356266326533643161
36393664313963393930383533623565383332613933396639613037323266663439313138326261
30353861303661303836343165353362663632306430626337356562343637653164396237333566
37656230363530323836373363646334356262646633313932383161303264613238373936353036
61376264633930356465626266623930333039383032316163633037323035346130343934616261
31666166393462366561303833353135326566356637376466613934376233303162323033623031
63656131333439353537623662363530383866326432306361316465383137633536666364623662
37353561633839623530333663643130326131333330626661396636343234666139336539653162
62383636663137626637303535333862366434626161353239393232313537343865646564626331
39366665363030643764663963316163343033326434373265343664393439316333346434363563
61346164396561343865626362616433306230333130653166656230353364316536626432373333
35383133363530666263316431396462383133363965336637386632363263656261353963313161
36383632326264373436383638383064346334336238656239393833653531656461356136303434
37663434663732306631656334306361663562303863386135623066633963373034373139666332
35393433646333363839666434663535363661616330386234366132303161383063663836626561
35393064343735303032313266643338623834383838633834636536363539656466663864613366
66636363623330326436363936313938333638323939323035616232366563316364343834376630
66656434336661643861613737616138396330383832386230383331646462323363373363393733
63363237636137373566363438663966396432613964336164326138623737393636396234646232
64343361363365356135666235623833396131626663303839653535663732313831633163643638
35396262373837343238343838663635353838373338663732626330613237623332336436643136
38653833383430393837383566643765653834306636356466326364303334653034626262356630
34333338333336373433356235386337346666343830303164363235303265313134323339653339
63316238346132653663653165313635336638646362356337643766366564383531633565303431
66616433663630343439336661346266336139613537653438653432326666326137306364376137
66333939643262633532363966623439373434393862353237613135646663623236646331643537
31353566653464313433636635393330646166613232633734346639326534373163383064353732
32373861303064346266643338316465653031646633633936373738663837383162643534623131
31633662356534343636313834386139656439663733333762323962323939623032396239356437
37633739613433613365313337383835623936623530363831383535663337343264356532616434
39393634396664636166346631313764343733666534613935393637363233373331303837656463
37363266363634353136316532333462396266373733333633356239653334363835326261323661
66323032346364356230613831643236316530356132343863393361343462373433383265336333
30343730316366366234333263343965633466333439653739663333643939303631353664316435
36396139623562656632666165666662626263643436396431326135633932393965656531633761
39303634643936366438336534613532303134343164326661626363656562383564623264636132
39656636303636393761653035303832386430646162343830343834316534636263373763643765
61366335643531666232303231656336643833396238336639333437363564636566636632303364
62623738336237393638363436396662656565653839643164356565313563663561666237383036
33626464663465643230376164653062663063636630613064643632643235643662653566333333
62353763643830363638323731303537633837393235656661333263323536363330356362643333
34346666656432626365383639326538643862346265316263326531623631383962383734316330
39333430613761663337306331623461643635653431343336663163343766373464366538313335
61643538643231333636643836663663313534356662386532633331346664653262353839643066
36393366653131316636646336313362656662666163333635633132323438353435373430643839
37623936393962333065663536306238653466363634386632366637363265303734356535333735
64623330303965393533326563643063303762646664666464643239386435343065326234306632
35346338373866303838613933653230373737396134653533376265356432333933356237636338
66656536393530316435323863373962636465333331653364626162326562393565313538633264
34613633393862333731336563636136666166613037613833333063303162373339663539646631
36303962356562306239616634376339356135666663303836353061663039343836356262373932
65346466373532633365383835323062313531623130396130376531626333653862393462643631
366330333666336262373364663864336633

View File

@ -1,2 +0,0 @@
---
maintenance_cron_enable_system: true

View File

@ -1,2 +0,0 @@
---
maintenance_cron_enable_system: true

View File

@ -1,4 +0,0 @@
---
# Tier 1 maintenance cron — hypervisors (journal + apt)
maintenance_cron_enable_system: true
maintenance_cron_enable_docker: false

View File

@ -1,4 +0,0 @@
---
# Tier 2 — Docker weekly prune (identity, monitoring, vaultwarden)
maintenance_cron_enable_system: true
maintenance_cron_enable_docker: true

View File

@ -1,2 +0,0 @@
---
maintenance_cron_enable_system: true

View File

@ -1,9 +1,8 @@
---
# ansibleVM (control @ 10.0.10.157) — plain vars; secrets in group_vars/all/vault.yml
# Previous fully-encrypted host_vars file moved to ansibleVM.yml.vault-bak (broken for Ansible merge).
ansible_become: true
ansible_become_method: sudo
ansible_become_password: "{{ vault_ansiblevm_become_password }}"
maintenance_cron_enable_system: true
$ANSIBLE_VAULT;1.1;AES256
31306264346663636630656534303766666564333866326139336137383339633338323834653266
6132333337363566623265303037336266646238633036390a663432623861363562386561393264
63303565633530383634643538323165383461656539613331386135336265653531336266613865
3833376664366239650a313134653238323437633265373463326231346663366434323733663666
38353061373437306431383132333233663639643134363464396163333962373033363661623666
3430633863623962366430613962346264356461373539376263

View File

@ -1,3 +1,4 @@
---
$ANSIBLE_VAULT;1.1;AES256
66633265383239626163633134656233613638643862323562373330643363323036333334646566
3439646635343533353432323064643135623532333738380a353866643461636233376432396434

View File

@ -1,9 +0,0 @@
---
# Cal.com LXC 210 @ 10.0.10.228 — business / scheduling
cal_public_url: https://cal.levkin.ca
cal_saml_admins: idobkin@gmail.com
cal_saml_db_name: calsaml
cal_authentik_app_slug: cal-com
cal_authentik_provider_name: cal-com-oidc
cal_authentik_host: https://auth.levkin.ca
cal_oidc_client_id: cal-com

View File

@ -0,0 +1,16 @@
---
# Host variables for dev02
# Use ladmin user with sudo to become root
ansible_become: true
ansible_become_method: sudo
ansible_become_password: "{{ vault_dev02_become_password }}"
# Configure shell for ladmin
shell_users:
- ladmin
# Skip data science stack
install_conda: false
install_jupyter: false
install_r: false

View File

@ -1,22 +1,8 @@
---
# git-ci-01 — Gitea Actions runner (VM 115 on pve201 @ 10.0.10.223)
# Configure sudo path for git-ci-01
# Sudo may not be in PATH for non-interactive shells
ansible_become_exe: /usr/bin/sudo
ansible_become_method: sudo
# Proxmox (manual / qm): VMID 115, 2 cores, 4096 MB RAM, 64 GB disk (scsi0)
# act_runner: /etc/act_runner/config.yaml — capacity 2, force_pull false
# Maintenance: /etc/cron.weekly/docker-prune-ci (docker system prune -af --filter until=168h)
#
# Capacity notes (2026-05-23):
# - pve201: VM 104 reduced to 64 GiB (2026-05-23); still tight — consider runner on pve10
# - capacity 3 needs ~812 GB RAM on this VM → migrate runner to pve10 or add RAM after freeing pve201
# - 12 repos: capacity 2 on one runner is OK; second runner on pve10 if queues stack up
git_ci_runner_capacity: 2
git_ci_disk_gb: 64
git_ci_proxmox_vmid: 115
git_ci_proxmox_node: pve201
maintenance_cron_enable_system: true
maintenance_cron_enable_docker: true
maintenance_cron_docker_script: /etc/cron.weekly/docker-prune-ci
# Alternative: if sudo is in a different location, update this
# ansible_become_exe: /usr/local/bin/sudo

View File

@ -1,7 +0,0 @@
---
# giteaVM — Gitea on Alpine (Proxmox VM 102 @ 10.0.10.169)
# Alpine uses /etc/periodic/weekly (not cron.weekly); no apt for system-maintenance.
maintenance_cron_enable_system: false
maintenance_cron_enable_docker: false
maintenance_cron_enable_gitea_archive: true
maintenance_cron_gitea_archive_script: /etc/periodic/weekly/gitea-archive-prune

View File

@ -1,4 +0,0 @@
---
# Hermes agent VM 117 @ 10.0.10.36 (user: hermes, admin: ladmin)
# Secrets: vault_hermes_telegram_bot_token, mattermost in /home/hermes/.hermes/secrets/
hermes_home: /home/hermes/.hermes

View File

@ -1,3 +0,0 @@
---
maintenance_cron_enable_system: true
maintenance_cron_enable_docker: true

View File

@ -1,3 +1,8 @@
---
# listmonk VM on pve201 — plain vars; secrets in vault
# Previous fully-encrypted host_vars file moved to listmonk.yml.vault-bak (broken for Ansible merge).
$ANSIBLE_VAULT;1.1;AES256
31316663336338303832323464623866343366313261653536623233303466636630633235643638
3666646431323061313836333233356162643462323763380a623666663062386337393439653134
61616135353966333639323031643263646231636332613935353234363134356435646266343866
3034653235393636350a626362333764313732646663653838313233326438646330393336346539
30393364323237396633343133616439393563326161636366613965366161656364343939313334
3430306634396361353238643735363430383433323431393230

View File

@ -1,4 +0,0 @@
---
# Control node (runs playbooks with connection: local).
# Use project venv so API deps (proxmoxer, etc.) match `make bootstrap`.
ansible_python_interpreter: "{{ inventory_dir }}/../../.venv/bin/python3"

View File

@ -1,7 +0,0 @@
---
# Mailcow VM 106 on pve201 (Mailcow-debian)
# API/UI: https://mail.levkine.ca — domain levkine.ca (with e)
# SSH: root only (no ladmin). First access: make copy-ssh-key-mailcow
mailcow_url: "https://mail.levkine.ca"
mailcow_domain: "levkine.ca"
mailcow_alerts_user: "alerts"

View File

@ -1,3 +0,0 @@
---
maintenance_cron_enable_system: true
maintenance_cron_enable_docker: true

View File

@ -1,8 +1,8 @@
---
# vaultwarden VM 104 on pve10 @ 10.0.10.142 (ladmin + sudo)
ansible_become: true
ansible_become_method: sudo
ansible_become_password: "{{ vault_vaultwarden_become_password }}"
maintenance_cron_enable_system: true
maintenance_cron_enable_docker: true
$ANSIBLE_VAULT;1.1;AES256
35633833353965363964376161393730613065663236326239376562356231316166656131366263
6263363436373965316339623139353830643062393165370a643138356561613537616431316534
63386635363838626465396439303664316635633239653639646338393130666164653262316135
3937376464303935620a343530333030643830383130646532613533336435383334373831343261
37653138613132616165636132623037623033343265663734626536366361373130353139383634
6664346538653965343263376538636336393164356434646264

View File

@ -1,3 +0,0 @@
---
maintenance_cron_enable_system: true
maintenance_cron_enable_docker: true

View File

@ -2,27 +2,16 @@
# Primary IPs: Tailscale (100.x.x.x) for remote access
# Fallback IPs: Local network (10.0.x.x) when Tailscale is down
# Usage: ansible_host_fallback is available for manual fallback
# Public URLs: levkin.ca DNS A records → Caddy (142.180.237.136), except home → 100.100.100.100
#
# NOTE: Proxmox app projects (dev/qa/prod) are provisioned dynamically via
# `playbooks/app/site.yml` (it uses `add_host` based on `app_projects`).
# You generally do NOT need to add project hosts here.
[proxmox]
pve201 ansible_host=10.0.10.201 ansible_user=root
pve10 ansible_host=10.0.10.10 ansible_user=root
[sites]
levkin ansible_host=10.0.10.60 ansible_user=root url=https://levkin.ca proxmox_vmid=220 proxmox_node=PVENAS
caseware ansible_host=10.0.10.105 ansible_user=root url=https://caseware.levkin.ca proxmox_vmid=215 proxmox_node=PVENAS
auto ansible_host=10.0.10.59 ansible_user=root url=https://auto.levkin.ca proxmox_vmid=216 proxmox_node=PVENAS
portfolio ansible_host=10.0.10.106 ansible_user=root url=https://iliadobkin.com proxmox_vmid=219 proxmox_node=PVENAS
[dev]
dev01 ansible_host=10.0.30.105 ansible_user=ladmin
bottom ansible_host=10.0.10.156 ansible_user=beast
debianDesktopVM ansible_host=10.0.10.206 ansible_user=user skip_reboot=true
devGPU ansible_host=10.0.10.122 ansible_user=root proxmox_vmid=104 proxmox_node=pve201 # GPU-Dev-Debian, Ollama + RTX 4080
devGPU ansible_host=10.0.30.63 ansible_user=root
[qa]
git-ci-01 ansible_host=10.0.10.223 ansible_user=ladmin
@ -33,33 +22,25 @@ KrakenMint ansible_host=10.0.10.120 ansible_user=ladmin
[ansible]
ansibleVM ansible_host=10.0.10.157 ansible_user=master
[comms]
# pve201 — email + newsletters
mailcow ansible_host=10.0.10.132 ansible_user=root url=https://mail.levkine.ca proxmox_vmid=106 proxmox_node=pve201
listmonk ansible_host=10.0.10.148 ansible_user=root url=https://listmonk.levkin.ca proxmox_node=pve201
[tailscale]
tailscaleVM ansible_host=100.66.218.53 ansible_user=ladmin
[services]
# VMID 117: on PVENAS (pve10)
hermes ansible_host=10.0.10.36 ansible_user=ladmin url=https://hermes.levkin.ca proxmox_vmid=117 proxmox_node=PVENAS
caddy ansible_host=10.0.10.50 ansible_user=root proxmox_vmid=106 proxmox_node=PVENAS
cal ansible_host=10.0.10.228 ansible_user=root url=https://cal.levkin.ca proxmox_vmid=210 proxmox_node=PVENAS
identity ansible_host=10.0.10.21 ansible_user=root url=https://auth.levkin.ca proxmox_vmid=217 proxmox_node=PVENAS
monitoring ansible_host=10.0.10.22 ansible_user=root url=http://10.0.10.22:3001 proxmox_vmid=218 proxmox_node=PVENAS uptime_kuma_port=3001 dockge_port=5001 umami_port=3000
giteaVM ansible_host=10.0.10.169 ansible_user=root url=https://git.levkin.ca proxmox_vmid=102 proxmox_node=PVENAS
n8n ansible_host=10.0.10.154 ansible_user=root url=https://n8n.levkin.ca proxmox_vmid=103 proxmox_node=PVENAS
vaultwardenVM ansible_host=10.0.10.142 ansible_user=ladmin url=https://vault.levkin.ca proxmox_vmid=104 proxmox_node=PVENAS
actual ansible_host=10.0.10.158 ansible_user=root url=https://budget.levkin.ca proxmox_vmid=108 proxmox_node=PVENAS
vikunja ansible_host=10.0.10.159 ansible_user=root url=https://todo.levkin.ca proxmox_vmid=301 proxmox_node=pve201
caddy ansible_host=10.0.10.50 ansible_user=root
jellyfin ansible_host=10.0.10.232 ansible_user=root
listmonk ansible_host=10.0.10.148 ansible_user=root
nextcloud ansible_host=10.0.10.25 ansible_user=root
actual ansible_host=10.0.10.158 ansible_user=root
vikanjans ansible_host=10.0.10.159 ansible_user=root
n8n ansible_host=10.0.10.154 ansible_user=root
giteaVM ansible_host=10.0.10.169 ansible_user=root
portainerVM ansible_host=10.0.30.69 ansible_user=ladmin
homepageVM ansible_host=10.0.30.12 ansible_user=homepage
vaultwardenVM ansible_host=10.0.10.142 ansible_user=ladmin
qBittorrent ansible_host=10.0.10.91 ansible_user=root port=8080
jellyfin ansible_host=10.0.10.232 ansible_user=root url=https://jelly.levkin.ca proxmox_vmid=101 proxmox_node=PVENAS # stopped until NAS pool healthy
# Retired / stopped — kept for reference; do not run playbooks against these without intent
# nextcloud ansible_host=10.0.10.24 ansible_user=root url=https://nextcloud.levkin.ca # VM 201 decommission
# portainerVM ansible_host=10.0.30.69 ansible_user=ladmin # retired → Dockge on monitoring
# homepageVM ansible_host=10.0.30.12 ansible_user=homepage # VM 100 stopped on pve10
#[desktop]
#desktop-beast ansible_host=100.117.34.106 ansible_user=beast
[desktop]
desktop-beast ansible_host=100.117.34.106 ansible_user=beast
[local]
localhost ansible_connection=local

View File

@ -1,53 +0,0 @@
---
# Playbook: caddy-auth-authentik
# Purpose: Add auth.levkin.ca reverse proxy to Caddy (Phase 1 Authentik)
# Targets: caddy
# Usage: make -f Makefile caddy-auth OR ansible-playbook playbooks/caddy-auth-authentik.yml
- name: Add Authentik proxy block to Caddy
hosts: caddy
become: true
become_method: ansible.builtin.su
tasks:
- name: Ensure auth.levkin.ca HTTPS block exists (after cal block)
ansible.builtin.shell: |
set -euo pipefail
if grep -q '^auth\.levkin\.ca {' /etc/caddy/Caddyfile; then
exit 0
fi
awk '
/^cal\.levkin\.ca \{/ { in_cal=1 }
in_cal && /^}$/ && !done {
print
print ""
print "auth.levkin.ca {"
print " import security-headers"
print " encode gzip"
print " reverse_proxy 10.0.10.21:9000"
print "}"
done=1
next
}
{ print }
' /etc/caddy/Caddyfile > /tmp/Caddyfile.new
mv /tmp/Caddyfile.new /etc/caddy/Caddyfile
args:
executable: /bin/bash
changed_when: true
notify: Reload caddy
- name: Ensure auth.levkin.ca HTTP redirect in :80 block
ansible.builtin.blockinfile:
path: /etc/caddy/Caddyfile
marker: "# {mark} ANSIBLE MANAGED auth.levkin.ca :80"
insertafter: '@vault host vault.levkin.ca'
block: |
@auth host auth.levkin.ca
redir @auth https://auth.levkin.ca{uri} permanent
notify: Reload caddy
handlers:
- name: Reload caddy
ansible.builtin.command: caddy reload --config /etc/caddy/Caddyfile
changed_when: true

View File

@ -1,55 +0,0 @@
---
# Playbook: caddy-levkin-site
# Purpose: Add levkin.ca reverse proxy to Caddy (site LXC 220)
# Targets: caddy
# Usage: make caddy-levkin
- name: Add levkin.ca proxy block to Caddy
hosts: caddy
become: true
become_method: ansible.builtin.su
tasks:
- name: Ensure levkin.ca HTTPS block exists (after caseware block)
ansible.builtin.shell: |
set -euo pipefail
if grep -q '^levkin\.ca,' /etc/caddy/Caddyfile || grep -q '^levkin\.ca {' /etc/caddy/Caddyfile; then
exit 0
fi
awk -v upstream="{{ levkin_site_upstream | default('10.0.10.60:80') }}" '
/^caseware\.levkin\.ca \{/ { in_cw=1 }
in_cw && /^}$/ && !done {
print
print ""
print "levkin.ca, www.levkin.ca {"
print " import security-headers"
print " @www host www.levkin.ca"
print " redir @www https://levkin.ca{uri} permanent"
print " reverse_proxy " upstream
print "}"
done=1
next
}
{ print }
' /etc/caddy/Caddyfile > /tmp/Caddyfile.new
mv /tmp/Caddyfile.new /etc/caddy/Caddyfile
args:
executable: /bin/bash
register: levkin_https_block
changed_when: levkin_https_block.rc == 0
notify: Reload caddy
- name: Ensure levkin.ca HTTP redirect in :80 block
ansible.builtin.blockinfile:
path: /etc/caddy/Caddyfile
marker: "# {mark} ANSIBLE MANAGED levkin.ca :80"
insertafter: '@vikunja host todo.levkin.ca'
block: |
@levkin host levkin.ca www.levkin.ca
redir @levkin https://levkin.ca{uri} permanent
notify: Reload caddy
handlers:
- name: Reload caddy
ansible.builtin.command: caddy reload --config /etc/caddy/Caddyfile
changed_when: true

View File

@ -1,79 +0,0 @@
---
# Playbook: cal-authentik-oidc
# Purpose: Enable Cal.com SSO (SAML DB + license env) and Authentik OIDC provider
# Targets: cal (LXC 210), identity (LXC 217)
# Usage: make cal-oidc
# Manual: https://cal.levkin.ca/settings/security/sso — enter Client ID, Secret, Well Known URL
- name: Prepare OIDC client secret
hosts: localhost
gather_facts: false
tasks:
- name: Use vault OIDC secret or generate one for this run
ansible.builtin.set_fact:
cal_oidc_client_secret_effective: >-
{{ vault_cal_oidc_client_secret
| default(lookup('password', '/dev/null length=48 chars=ascii_letters,digits')) }}
no_log: true
- name: Remind to persist generated secret in vault
ansible.builtin.debug:
msg: >-
vault_cal_oidc_client_secret was not set — generated for this run only.
Add it to vault.yml and re-run so Authentik and Cal stay in sync.
when: vault_cal_oidc_client_secret is not defined or vault_cal_oidc_client_secret | length == 0
- name: Cal.com — SAML database and compose SSO env
hosts: cal
become: true
vars:
vault_cal_oidc_client_secret: "{{ hostvars['localhost']['cal_oidc_client_secret_effective'] }}"
pre_tasks:
- name: Load Cal Postgres credentials from .env
ansible.builtin.shell: |
set -a
source {{ cal_compose_dir }}/.env
printf 'user=%s\npass=%s\n' "$POSTGRES_USER" "$POSTGRES_PASSWORD"
args:
executable: /bin/bash
register: cal_pg_creds
changed_when: false
no_log: true
- name: Set Cal database facts
ansible.builtin.set_fact:
cal_postgres_user: "{{ cal_pg_creds.stdout_lines[0] | regex_replace('^user=', '') }}"
cal_postgres_password: "{{ cal_pg_creds.stdout_lines[1] | regex_replace('^pass=', '') }}"
cal_saml_database_url: >-
postgresql://{{ cal_pg_creds.stdout_lines[0] | regex_replace('^user=', '') }}:{{
cal_pg_creds.stdout_lines[1] | regex_replace('^pass=', '') }}@db:5432/{{ cal_saml_db_name }}
no_log: true
roles:
- role: cal_sso
- name: Authentik — Cal.com OIDC provider
hosts: identity
become: true
vars:
vault_cal_oidc_client_secret: "{{ hostvars['localhost']['cal_oidc_client_secret_effective'] }}"
tasks:
- name: Authentik OIDC for Cal.com
ansible.builtin.import_role:
name: cal_sso
tasks_from: authentik.yml
- name: Cal.com OIDC — finish in UI
hosts: cal
gather_facts: false
tasks:
- name: Print Cal.com SSO configuration values
ansible.builtin.debug:
msg:
- "1. Log in to Cal as {{ cal_saml_admins }}"
- "2. Open {{ cal_public_url }}/settings/security/sso"
- "3. Configure OIDC:"
- " Client ID: {{ cal_oidc_client_id }}"
- " Client Secret: (vault_cal_oidc_client_secret — see vault)"
- " Well Known URL: {{ cal_authentik_host }}/application/o/{{ cal_authentik_app_slug }}/.well-known/openid-configuration"
- "4. Test SSO login; keep local password as break-glass"

View File

@ -24,7 +24,6 @@
roles:
- {role: maintenance, tags: ['maintenance']}
- {role: maintenance_cron, tags: ['maintenance', 'maintenance_cron']}
post_tasks:
- name: Display maintenance completion

View File

@ -1,20 +0,0 @@
---
# Playbook: ssh-keys
# Purpose: Install your workstation SSH public key on all inventory hosts
# Targets: all hosts except localhost
# Usage: make copy-ssh-keys-ansible
# make copy-ssh-keys-ansible GROUP=services
# make copy-ssh-keys-ansible HOST=dev01
- name: Deploy workstation SSH public key
hosts: all:!local
gather_facts: false
vars:
ssh_public_key_file: "{{ lookup('env', 'SSH_PUBLIC_KEY') | default(lookup('env', 'HOME') + '/.ssh/id_ed25519.pub', true) }}"
tasks:
- name: Add SSH public key for ansible_user
ansible.posix.authorized_key:
user: "{{ ansible_user | default(ansible_user_id) }}"
state: present
key: "{{ lookup('file', ssh_public_key_file) }}"
become: false

View File

@ -1,10 +0,0 @@
---
cal_compose_dir: /opt/cal
cal_saml_db_name: calsaml
cal_saml_admins: idobkin@gmail.com
cal_public_url: https://cal.levkin.ca
cal_authentik_app_slug: cal-com
cal_authentik_provider_name: cal-com-oidc
cal_authentik_host: https://auth.levkin.ca
# Set in vault: vault_cal_oidc_client_secret (generated on first run if absent)
cal_oidc_client_id: "{{ cal_authentik_app_slug }}"

View File

@ -1,20 +0,0 @@
---
- name: Recreate calcom stack
ansible.builtin.command:
cmd: docker compose up -d
chdir: "{{ cal_compose_dir }}"
changed_when: true
- name: Recreate authentik server
ansible.builtin.command:
cmd: docker compose up -d server worker
chdir: /opt/authentik
changed_when: true
- name: Apply authentik cal blueprint
ansible.builtin.command:
cmd: >-
docker compose exec -T server
ak apply_blueprint {{ cal_authentik_app_slug }}-oidc.yaml
chdir: /opt/authentik
changed_when: true

View File

@ -1,25 +0,0 @@
---
- name: Ensure Authentik blueprints directory on host
ansible.builtin.file:
path: /opt/authentik/blueprints
state: directory
mode: "0755"
- name: Add blueprints volume to Authentik server service
ansible.builtin.replace:
path: /opt/authentik/compose.yml
regexp: '(?ms)( server:.*? volumes:\n - \./data:/data\n)( - \./custom-templates:/templates)'
replace: '\1 - ./blueprints:/blueprints\n\2'
notify:
- Recreate authentik server
- Apply authentik cal blueprint
- name: Deploy Cal.com OIDC blueprint
ansible.builtin.template:
src: authentik-cal-oidc.yaml.j2
dest: "/opt/authentik/blueprints/{{ cal_authentik_app_slug }}-oidc.yaml"
mode: "0644"
notify: Apply authentik cal blueprint
- name: Flush Authentik blueprint handler
ansible.builtin.meta: flush_handlers

View File

@ -1,52 +0,0 @@
---
- name: Ensure SAML database exists on Cal Postgres
ansible.builtin.command:
cmd: >-
docker exec cal-db psql -U {{ cal_postgres_user }} -tc
"SELECT 1 FROM pg_database WHERE datname='{{ cal_saml_db_name }}'"
register: cal_saml_db_check
changed_when: false
failed_when: cal_saml_db_check.rc != 0
- name: Create SAML database
ansible.builtin.command:
cmd: >-
docker exec cal-db psql -U {{ cal_postgres_user }} -c
"CREATE DATABASE {{ cal_saml_db_name }}"
when: cal_saml_db_check.stdout | trim != "1"
changed_when: true
- name: Deploy docker-compose with SSO environment
ansible.builtin.template:
src: docker-compose.yml.j2
dest: "{{ cal_compose_dir }}/docker-compose.yml"
owner: root
group: root
mode: "0644"
notify: Recreate calcom stack
- name: Ensure SAML env vars in Cal .env
ansible.builtin.lineinfile:
path: "{{ cal_compose_dir }}/.env"
regexp: "^{{ item.key }}="
line: "{{ item.key }}={{ item.value }}"
create: false
no_log: true
loop:
- key: SAML_DATABASE_URL
value: "{{ cal_saml_database_url }}"
- key: SAML_ADMINS
value: "{{ cal_saml_admins }}"
notify: Recreate calcom stack
- name: Flush handlers before OIDC UI step
ansible.builtin.meta: flush_handlers
- name: Wait for Cal.com HTTP after stack recreate
ansible.builtin.uri:
url: "{{ cal_public_url }}/api/version"
status_code: [200, 404]
register: cal_http
retries: 12
delay: 10
until: cal_http.status in [200, 404]

View File

@ -1,38 +0,0 @@
# Cal.com OIDC provider + application (managed by Ansible)
version: 1
metadata:
name: Cal.com OIDC
labels:
blueprints.goauthentik.io/instantiate: "true"
entries:
- model: authentik_providers_oauth2.oauth2provider
id: cal-oidc-provider
identifiers:
name: {{ cal_authentik_provider_name }}
attrs:
name: {{ cal_authentik_provider_name }}
authorization_flow: !Find [authentik_flows.flow, [slug, default-provider-authorization-implicit-consent]]
invalidation_flow: !Find [authentik_flows.flow, [slug, default-provider-invalidation-flow]]
client_type: confidential
client_id: {{ cal_oidc_client_id }}
client_secret: {{ vault_cal_oidc_client_secret }}
redirect_uris:
- matching_mode: strict
url: {{ cal_public_url }}/api/auth/oidc
signing_key: !Find [authentik_crypto.certificatekeypair, [name, authentik Self-signed Certificate]]
property_mappings:
- !Find [authentik_providers_oauth2.scopemapping, [scope_name, openid]]
- !Find [authentik_providers_oauth2.scopemapping, [scope_name, email]]
- !Find [authentik_providers_oauth2.scopemapping, [scope_name, profile]]
- model: authentik_core.application
id: cal-oidc-app
identifiers:
slug: {{ cal_authentik_app_slug }}
attrs:
name: Cal.com
slug: {{ cal_authentik_app_slug }}
group: ""
provider: !KeyOf cal-oidc-provider
policy_engine_mode: any
meta_launch_url: {{ cal_public_url }}
meta_icon: https://cal.com/favicon.ico

View File

@ -1,44 +0,0 @@
services:
db:
image: postgres:15
container_name: cal-db
restart: unless-stopped
environment:
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
POSTGRES_DB: ${POSTGRES_DB}
volumes:
- ./postgres-data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U $${POSTGRES_USER} -d $${POSTGRES_DB}"]
interval: 10s
timeout: 5s
retries: 5
calcom:
image: calcom/cal.com:latest
container_name: calcom
restart: unless-stopped
depends_on:
db:
condition: service_healthy
environment:
DATABASE_URL: ${DATABASE_URL}
DATABASE_DIRECT_URL: ${DATABASE_DIRECT_URL}
NEXT_PUBLIC_WEBAPP_URL: ${NEXT_PUBLIC_WEBAPP_URL}
NEXT_PUBLIC_API_V2_URL: ${NEXT_PUBLIC_API_V2_URL}
NEXTAUTH_URL: ${NEXTAUTH_URL}
NEXTAUTH_SECRET: ${NEXTAUTH_SECRET}
CALENDSO_ENCRYPTION_KEY: ${CALENDSO_ENCRYPTION_KEY}
CALCOM_LICENSE_KEY: ${CALCOM_LICENSE_KEY}
NEXT_PUBLIC_LICENSE_CONSENT: ${NEXT_PUBLIC_LICENSE_CONSENT}
SAML_DATABASE_URL: ${SAML_DATABASE_URL}
SAML_ADMINS: ${SAML_ADMINS}
EMAIL_FROM: ${EMAIL_FROM}
EMAIL_SERVER_HOST: ${EMAIL_SERVER_HOST}
EMAIL_SERVER_PORT: ${EMAIL_SERVER_PORT}
EMAIL_SERVER_USER: ${EMAIL_SERVER_USER}
EMAIL_SERVER_PASSWORD: ${EMAIL_SERVER_PASSWORD}
CALCOM_TELEMETRY_DISABLED: ${CALCOM_TELEMETRY_DISABLED}
ports:
- "3000:3000"

View File

@ -1,23 +0,0 @@
# maintenance_cron
Weekly cleanup jobs for production hosts.
## Scripts
| Script | Schedule | Purpose |
|--------|----------|---------|
| `system-maintenance` | `/etc/cron.weekly/` | `journalctl --vacuum-size=500M`, `apt autoremove`, `apt autoclean` |
| `docker-prune` | `/etc/cron.weekly/` | `docker system prune -af --filter until=168h` |
| `gitea-archive-prune` | `/etc/cron.weekly/` | Delete Gitea `repo-archive` files older than 7 days |
## Variables
See `defaults/main.yml`. Enable per host or group:
```yaml
maintenance_cron_enable_system: true
maintenance_cron_enable_docker: true # Docker hosts only
maintenance_cron_enable_gitea_archive: true # giteaVM only
```
Applied via `playbooks/maintenance.yml` (tag `maintenance_cron`).

View File

@ -1,18 +0,0 @@
---
# Weekly system cleanup (journal + apt)
maintenance_cron_enable_system: true
maintenance_cron_journal_vacuum_size: 500M
maintenance_cron_system_script: /etc/cron.weekly/system-maintenance
# Docker prune (CI / Docker hosts)
maintenance_cron_enable_docker: false
maintenance_cron_docker_prune_until: 168h
maintenance_cron_docker_script: /etc/cron.weekly/docker-prune
maintenance_cron_docker_log: /var/log/docker-prune.log
# Gitea repo-archive cache (Alpine Gitea VM)
maintenance_cron_enable_gitea_archive: false
maintenance_cron_gitea_archive_dir: /var/lib/gitea/data/repo-archive
maintenance_cron_gitea_archive_max_age_days: 7
maintenance_cron_gitea_archive_script: /etc/cron.weekly/gitea-archive-prune
maintenance_cron_gitea_archive_log: /var/log/gitea-archive-prune.log

View File

@ -1,27 +0,0 @@
---
- name: Install weekly system maintenance script
ansible.builtin.template:
src: system-maintenance.sh.j2
dest: "{{ maintenance_cron_system_script }}"
owner: root
group: root
mode: '0755'
when: maintenance_cron_enable_system | bool
- name: Install weekly Docker prune script
ansible.builtin.template:
src: docker-prune.sh.j2
dest: "{{ maintenance_cron_docker_script }}"
owner: root
group: root
mode: '0755'
when: maintenance_cron_enable_docker | bool
- name: Install weekly Gitea archive prune script
ansible.builtin.template:
src: gitea-archive-prune.sh.j2
dest: "{{ maintenance_cron_gitea_archive_script }}"
owner: root
group: root
mode: '0755'
when: maintenance_cron_enable_gitea_archive | bool

View File

@ -1,8 +0,0 @@
#!/bin/bash
# Ansible managed — weekly Docker image/container cleanup
set -euo pipefail
if ! command -v docker >/dev/null 2>&1; then
exit 0
fi
/usr/bin/docker system prune -af --filter "until={{ maintenance_cron_docker_prune_until }}" \
>> "{{ maintenance_cron_docker_log }}" 2>&1

View File

@ -1,19 +0,0 @@
#!/bin/sh
# Ansible managed — weekly Gitea repo-archive cache cleanup
set -eu
ARCHIVE_DIR="{{ maintenance_cron_gitea_archive_dir }}"
LOG="{{ maintenance_cron_gitea_archive_log }}"
MAX_AGE_DAYS="{{ maintenance_cron_gitea_archive_max_age_days }}"
if [ ! -d "${ARCHIVE_DIR}" ]; then
exit 0
fi
{
echo "=== $(date -Iseconds) gitea-archive-prune ==="
echo "Before: $(du -sh "${ARCHIVE_DIR}" 2>/dev/null | awk '{print $1}')"
find "${ARCHIVE_DIR}" -type f -mtime "+${MAX_AGE_DAYS}" -delete
find "${ARCHIVE_DIR}" -type d -empty -delete
echo "After: $(du -sh "${ARCHIVE_DIR}" 2>/dev/null | awk '{print $1}')"
df -h /
} >> "${LOG}" 2>&1

View File

@ -1,7 +0,0 @@
#!/bin/bash
# Ansible managed — weekly journal vacuum + apt cleanup
set -euo pipefail
journalctl --vacuum-size={{ maintenance_cron_journal_vacuum_size }} 2>/dev/null || true
export DEBIAN_FRONTEND=noninteractive
apt-get autoremove -y
apt-get autoclean -y

View File

@ -1,60 +0,0 @@
#!/usr/bin/env bash
# Bootstrap root SSH when `su` needs a password (no sudo on host).
# Usage: BOOTSTRAP_SU_PASSWORD='...' ./scripts/bootstrap-root-ssh-su-password.sh HOST
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
HOST="${1:-}"
BOOTSTRAP_USER="${BOOTSTRAP_USER:-ladmin}"
PUBKEY_FILE="${SSH_PUBLIC_KEY:-${HOME}/.ssh/id_ed25519.pub}"
SU_PASSWORD="${BOOTSTRAP_SU_PASSWORD:-}"
[[ -n "${HOST}" ]] || { echo "Usage: $0 HOST" >&2; exit 1; }
[[ -n "${SU_PASSWORD}" ]] || { echo "Set BOOTSTRAP_SU_PASSWORD" >&2; exit 1; }
[[ -f "${PUBKEY_FILE}" ]] || { echo "Missing ${PUBKEY_FILE}" >&2; exit 1; }
IP="$(awk -v h="${HOST}" '$1==h {for(i=2;i<=NF;i++) if($i~/^ansible_host=/) {sub(/ansible_host=/,"",$i); print $i; exit}}' \
"${REPO_ROOT}/inventories/production/hosts")"
[[ -n "${IP}" ]] || { echo "No ansible_host for ${HOST}" >&2; exit 1; }
PUBKEY="$(cat "${PUBKEY_FILE}")"
export IP BOOTSTRAP_USER SU_PASSWORD PUBKEY
/usr/bin/expect <<'EXPECT'
set timeout 60
spawn ssh -o StrictHostKeyChecking=accept-new $env(BOOTSTRAP_USER)@$env(IP)
expect {
-re {[$#] $} { }
timeout { exit 1 }
}
send "su -\r"
expect {
"Password:" {
send "$env(SU_PASSWORD)\r"
}
timeout { exit 1 }
}
expect {
-re {root@caddy|#||[$#] $} { }
timeout { exit 1 }
}
send "bash --noprofile --norc\r"
expect {
-re {# $} { }
timeout { exit 1 }
}
send "mkdir -p /root/.ssh && chmod 700 /root/.ssh && touch /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys\r"
expect -re {# $}
send "grep -qF '$env(PUBKEY)' /root/.ssh/authorized_keys || echo '$env(PUBKEY)' >> /root/.ssh/authorized_keys\r"
expect -re {# $}
send "sed -i 's/^#*PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config 2>/dev/null || echo PermitRootLogin prohibit-password >> /etc/ssh/sshd_config\r"
expect -re {# $}
send "systemctl restart ssh 2>/dev/null || systemctl restart sshd 2>/dev/null || true\r"
expect -re {# $}
send "exit\r"
expect eof
EXPECT
ssh -o BatchMode=yes -i "${PUBKEY_FILE}" -o ConnectTimeout=10 \
"root@${IP}" "echo OK: root@${IP}"
echo "Done: root key on ${HOST}"

View File

@ -1,103 +0,0 @@
#!/usr/bin/env bash
# Bootstrap root SSH key access via a normal user (default: ladmin).
# Usage: ./scripts/bootstrap-root-ssh.sh HOSTNAME
# BOOTSTRAP_USER=ladmin TARGET_USER=root SSH_PUBLIC_KEY=~/.ssh/id_ed25519.pub
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
INVENTORY_HOSTS="${INVENTORY_HOSTS:-${REPO_ROOT}/inventories/production/hosts}"
PUBKEY_FILE="${SSH_PUBLIC_KEY:-${HOME}/.ssh/id_ed25519.pub}"
BOOTSTRAP_USER="${BOOTSTRAP_USER:-ladmin}"
TARGET_USER="${TARGET_USER:-root}"
HOST="${1:-}"
if [[ -z "${HOST}" ]]; then
echo "Usage: $0 HOST" >&2
exit 1
fi
if [[ ! -f "${PUBKEY_FILE}" ]]; then
echo "Public key not found: ${PUBKEY_FILE}" >&2
exit 1
fi
resolve_from_inventory() {
awk -v host="${HOST}" '
$1 == host {
for (i = 2; i <= NF; i++) {
if ($i ~ /^ansible_host=/) {
sub(/ansible_host=/, "", $i)
ip = $i
}
if ($i ~ /^ansible_user=/) {
sub(/ansible_user=/, "", $i)
user = $i
}
}
}
END {
print ip
print user
}
' "${INVENTORY_HOSTS}"
}
IP="$(resolve_from_inventory | sed -n '1p')"
INV_USER="$(resolve_from_inventory | sed -n '2p')"
if [[ -z "${IP}" ]]; then
echo "Could not resolve ansible_host for ${HOST} in ${INVENTORY_HOSTS}" >&2
exit 1
fi
echo "==> ${HOST} (${BOOTSTRAP_USER}@${IP} -> ${TARGET_USER})"
echo " Inventory ansible_user: ${INV_USER:-<unset>}"
echo " Public key: ${PUBKEY_FILE}"
echo ""
echo "Step 1/3: Install key for ${BOOTSTRAP_USER} (password: ${BOOTSTRAP_USER})"
ssh-copy-id -i "${PUBKEY_FILE}" -o StrictHostKeyChecking=accept-new \
"${BOOTSTRAP_USER}@${IP}"
echo ""
echo "Step 2/3: Copy key and configure ${TARGET_USER} via su (password: root)"
REMOTE_KEY="/tmp/ansible-bootstrap.pub"
scp -o StrictHostKeyChecking=accept-new "${PUBKEY_FILE}" \
"${BOOTSTRAP_USER}@${IP}:${REMOTE_KEY}"
ssh -t "${BOOTSTRAP_USER}@${IP}" bash -s <<REMOTE_SCRIPT
set -e
REMOTE_KEY="${REMOTE_KEY}"
su - root <<ROOT_SCRIPT
set -e
mkdir -p /root/.ssh
chmod 700 /root/.ssh
touch /root/.ssh/authorized_keys
chmod 600 /root/.ssh/authorized_keys
if ! grep -qF "\$(cat "\${REMOTE_KEY}")" /root/.ssh/authorized_keys 2>/dev/null; then
cat "\${REMOTE_KEY}" >> /root/.ssh/authorized_keys
fi
rm -f "\${REMOTE_KEY}"
if [ -f /etc/ssh/sshd_config ]; then
if grep -q '^PermitRootLogin' /etc/ssh/sshd_config; then
sed -i 's/^#*PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
else
echo 'PermitRootLogin prohibit-password' >> /etc/ssh/sshd_config
fi
systemctl restart ssh 2>/dev/null \
|| systemctl restart sshd 2>/dev/null \
|| service ssh restart 2>/dev/null \
|| true
fi
echo "OK: root authorized_keys updated; PermitRootLogin prohibit-password"
ROOT_SCRIPT
REMOTE_SCRIPT
echo ""
echo "Step 3/3: Verify ${TARGET_USER} key login"
ssh -o BatchMode=yes -i "${PUBKEY_FILE}" -o StrictHostKeyChecking=accept-new \
"${TARGET_USER}@${IP}" "echo OK: ${TARGET_USER}@${IP} accepts your SSH key"
echo ""
echo "Done: ${HOST} — use: ssh -i ${PUBKEY_FILE} ${TARGET_USER}@${IP}"

View File

@ -1,70 +0,0 @@
#!/usr/bin/env bash
# Add or update Uptime Kuma HTTP monitors via API.
# Usage:
# source <(./scripts/vault-export-env.sh) # or export KUMA_* manually
# ./scripts/kuma-add-monitors.sh
#
# Monitors are idempotent: existing names are skipped.
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
KUMA_URL="${KUMA_URL:-http://10.0.10.22:3001}"
KUMA_USER="${KUMA_USER:-admin}"
KUMA_PASSWORD="${KUMA_PASSWORD:-}"
if [[ -z "${KUMA_PASSWORD}" ]]; then
if [[ -f "${REPO_ROOT}/.env" ]]; then
# shellcheck disable=SC1091
set -a
source "${REPO_ROOT}/.env"
set +a
KUMA_PASSWORD="${KUMA_PASSWORD:-}"
fi
fi
if [[ -z "${KUMA_PASSWORD}" ]]; then
echo "Set KUMA_PASSWORD (or run vault-export-env.sh first)" >&2
exit 1
fi
export KUMA_URL KUMA_USER KUMA_PASSWORD
"${REPO_ROOT}/.venv/bin/python3" <<'PY'
import os
import sys
try:
from uptime_kuma_api import UptimeKumaApi
except ImportError:
print("Run: .venv/bin/pip install uptime-kuma-api", file=sys.stderr)
sys.exit(1)
MONITORS = [
{
"type": "http",
"name": "Gitea",
"url": "https://git.levkin.ca/user/login",
"interval": 60,
"retryInterval": 60,
"maxretries": 3,
"accepted_statuscodes": ["200-299"],
},
]
url = os.environ["KUMA_URL"]
user = os.environ["KUMA_USER"]
password = os.environ["KUMA_PASSWORD"]
with UptimeKumaApi(url) as api:
api.login(user, password)
existing = {m.get("name"): m for m in api.get_monitors()}
for spec in MONITORS:
name = spec["name"]
if name in existing:
print(f"skip (exists): {name} id={existing[name].get('id')}")
continue
result = api.add_monitor(**spec)
print(f"added: {name} -> {result}")
PY

View File

@ -1,66 +0,0 @@
#!/usr/bin/env bash
# Configure Uptime Kuma SMTP notification (Mailcow) via Socket.IO API.
# Run from machine with network access to Kuma:
# export KUMA_URL=http://10.0.10.22:3001
# export KUMA_USER=admin
# export KUMA_PASSWORD='your-kuma-password'
# export SMTP_USER=alerts@levkine.ca
# export SMTP_PASS='mailbox-password'
# export SMTP_TO=idobkin@gmail.com
# pip install uptime-kuma-api
# ./scripts/kuma-setup-smtp.sh
set -euo pipefail
KUMA_URL="${KUMA_URL:-http://10.0.10.22:3001}"
KUMA_USER="${KUMA_USER:-admin}"
KUMA_PASSWORD="${KUMA_PASSWORD:-}"
SMTP_HOST="${SMTP_HOST:-mail.levkine.ca}"
SMTP_PORT="${SMTP_PORT:-587}"
SMTP_USER="${SMTP_USER:-alerts@levkine.ca}"
SMTP_PASS="${SMTP_PASS:-}"
SMTP_TO="${SMTP_TO:-idobkin@gmail.com}"
if [[ -z "${KUMA_PASSWORD}" || -z "${SMTP_PASS}" ]]; then
echo "Set KUMA_PASSWORD and SMTP_PASS" >&2
exit 1
fi
python3 <<'PY'
import os
import sys
try:
from uptime_kuma_api import UptimeKumaApi
except ImportError:
print("pip install uptime-kuma-api", file=sys.stderr)
sys.exit(1)
url = os.environ["KUMA_URL"]
user = os.environ["KUMA_USER"]
password = os.environ["KUMA_PASSWORD"]
smtp_host = os.environ["SMTP_HOST"]
smtp_port = int(os.environ["SMTP_PORT"])
smtp_user = os.environ["SMTP_USER"]
smtp_pass = os.environ["SMTP_PASS"]
smtp_to = os.environ["SMTP_TO"]
with UptimeKumaApi(url) as api:
api.login(user, password)
# Notification type name in Kuma 1.x is often 'smtp' / 'email'
result = api.add_notification(
name="Mailcow alerts",
type="smtp",
isDefault=True,
applyExisting=True,
smtpHost=smtp_host,
smtpPort=smtp_port,
smtpSecure=True,
smtpIgnoreTLS=False,
smtpUsername=smtp_user,
smtpPassword=smtp_pass,
smtpFrom=smtp_user,
smtpTo=smtp_to,
)
print(result)
PY

View File

@ -1,51 +0,0 @@
#!/usr/bin/env bash
# Export Mailcow API + mailbox password from .env or Ansible vault.
# Usage: source scripts/load-mailcow-vault-env.sh [mailbox_local_part]
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
VAULT_FILE="${REPO_ROOT}/inventories/production/group_vars/all/vault.yml"
VAULT_PASS="${HOME}/.ansible-vault-pass"
ANSIBLE_VAULT="${REPO_ROOT}/.venv/bin/ansible-vault"
MAILBOX_KEY="${1:-${MAILBOX:-${MAILBOX_LOCAL_PART:-}}}"
set -a
[ -f "${REPO_ROOT}/.env" ] && . "${REPO_ROOT}/.env"
set +a
if [[ -n "${MAILCOW_API_KEY:-}" && -n "${MAILBOX_PASSWORD:-${ALERTS_PASSWORD:-}}" ]]; then
export MAILBOX_PASSWORD="${MAILBOX_PASSWORD:-${ALERTS_PASSWORD:-}}"
return 0 2>/dev/null || exit 0
fi
if [[ ! -f "${VAULT_FILE}" ]] || [[ ! -f "${VAULT_PASS}" ]]; then
return 0 2>/dev/null || exit 0
fi
eval "$("${REPO_ROOT}/.venv/bin/python3" - "${VAULT_FILE}" "${VAULT_PASS}" "${ANSIBLE_VAULT}" "${MAILBOX_KEY}" <<'PY'
import os, subprocess, sys, yaml, shlex
vault_file, vault_pass, ansible_vault, mailbox_key = sys.argv[1:5]
text = subprocess.check_output(
[ansible_vault, "view", vault_file, "--vault-password-file", vault_pass],
text=True,
)
data = yaml.safe_load(text) or {}
out = []
api = data.get("vault_mailcow_api_key") or ""
if api:
out.append("export MAILCOW_API_KEY=" + shlex.quote(str(api)))
passwords = data.get("vault_mailcow_mailbox_passwords") or {}
pw = ""
if mailbox_key and mailbox_key in passwords:
pw = passwords[mailbox_key]
elif mailbox_key == "alerts":
pw = data.get("vault_alerts_mailbox_password") or passwords.get("alerts", "")
if pw:
out.append("export MAILBOX_PASSWORD=" + shlex.quote(str(pw)))
out.append("export ALERTS_PASSWORD=" + shlex.quote(str(pw)))
print("\n".join(out))
PY
)"
return 0 2>/dev/null || exit 0

View File

@ -1,18 +0,0 @@
#!/usr/bin/env bash
# Export BOOTSTRAP_SU_PASSWORD from vault_lxc_root_password
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
eval "$("${REPO_ROOT}/.venv/bin/python3" - "${REPO_ROOT}" <<'PY'
import os, subprocess, sys, yaml, shlex
repo = sys.argv[1]
text = subprocess.check_output(
[os.path.join(repo, ".venv/bin/ansible-vault"), "view",
os.path.join(repo, "inventories/production/group_vars/all/vault.yml"),
"--vault-password-file", os.path.expanduser("~/.ansible-vault-pass")],
text=True,
)
pw = (yaml.safe_load(text) or {}).get("vault_lxc_root_password", "")
if pw:
print("export BOOTSTRAP_SU_PASSWORD=" + shlex.quote(str(pw)))
PY
)"

View File

@ -1,32 +0,0 @@
#!/usr/bin/env bash
# Resolve MAILBOX= key from inventories/production/group_vars/all/mailcow.yml
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
MAILBOX="${MAILBOX:-}"
[[ -n "${MAILBOX}" ]] || { echo "MAILBOX required" >&2; exit 1; }
eval "$("${REPO_ROOT}/.venv/bin/python3" - "${REPO_ROOT}" "${MAILBOX}" <<'PY'
import sys, yaml, shlex, os
repo, key = sys.argv[1], sys.argv[2]
path = os.path.join(repo, "inventories/production/group_vars/all/mailcow.yml")
with open(path) as f:
data = yaml.safe_load(f) or {}
boxes = data.get("mailcow_mailboxes") or {}
if key not in boxes:
raise SystemExit(f"Unknown MAILBOX={key!r}. Add it to mailcow_mailboxes in mailcow.yml")
b = boxes[key]
out = []
for k, env in [
("local_part", "MAILBOX_LOCAL_PART"),
("name", "MAILBOX_NAME"),
("quota", "MAILBOX_QUOTA"),
]:
if k in b and b[k] is not None:
out.append(f"export {env}={shlex.quote(str(b[k]))}")
if b.get("vault_password_key"):
out.append(f"export MAILBOX_VAULT_KEY={shlex.quote(str(b['vault_password_key']))}")
print("\n".join(out))
PY
)"

View File

@ -1,62 +0,0 @@
#!/usr/bin/env bash
# Create or update a Mailcow mailbox via API.
#
# Usage:
# make mailcow-mailbox MAILBOX=alerts
# # or with env (after: source scripts/load-mailcow-vault-env.sh):
# MAILBOX_LOCAL_PART=notify MAILBOX_NAME="Notify" MAILBOX_PASSWORD='...' ./scripts/mailcow-mailbox.sh
#
# Variables (env or make):
# MAILBOX / MAILBOX_LOCAL_PART — local part (required)
# MAILBOX_NAME — display name (default: title-case of local part)
# MAILBOX_PASSWORD — if unset, loaded from vault_mailcow_mailbox_passwords[local_part]
# MAILBOX_QUOTA — MiB (default 1024)
# MAILCOW_URL, MAILCOW_DOMAIN, MAILCOW_API_KEY — see load-mailcow-vault-env.sh
set -euo pipefail
MAILCOW_URL="${MAILCOW_URL:-https://mail.levkine.ca}"
DOMAIN="${MAILCOW_DOMAIN:-levkine.ca}"
LOCAL_PART="${MAILBOX_LOCAL_PART:-${MAILBOX:-}}"
API_KEY="${MAILCOW_API_KEY:-}"
MAILBOX_PASSWORD="${MAILBOX_PASSWORD:-${ALERTS_PASSWORD:-}}"
QUOTA="${MAILBOX_QUOTA:-1024}"
if [[ -z "${LOCAL_PART}" ]]; then
echo "Set MAILBOX=localpart or MAILBOX_LOCAL_PART" >&2
exit 1
fi
if [[ -z "${API_KEY}" ]]; then
echo "Set MAILCOW_API_KEY (make mailcow-mailbox loads vault/.env)" >&2
exit 1
fi
if [[ -z "${MAILBOX_PASSWORD}" ]]; then
echo "Set MAILBOX_PASSWORD or add vault_mailcow_mailbox_passwords.${LOCAL_PART} in vault" >&2
exit 1
fi
DISPLAY_NAME="${MAILBOX_NAME:-$(echo "${LOCAL_PART}" | sed 's/[-_]/ /g' | awk '{for(i=1;i<=NF;i++) $i=toupper(substr($i,1,1)) tolower(substr($i,2)); print}')}"
ATTR=$(jq -nc \
--arg lp "${LOCAL_PART}" \
--arg dom "${DOMAIN}" \
--arg name "${DISPLAY_NAME}" \
--arg pw "${MAILBOX_PASSWORD}" \
--arg quota "${QUOTA}" \
'{local_part:$lp,domain:$dom,name:$name,quota:$quota,password:$pw,password2:$pw,active:"1"}')
echo "Creating mailbox ${LOCAL_PART}@${DOMAIN} (${DISPLAY_NAME})..."
RESP=$(curl -sk -w "\n%{http_code}" -X POST "${MAILCOW_URL}/api/v1/add/mailbox" \
-H "X-API-Key: ${API_KEY}" \
-d "attr=${ATTR}")
HTTP_CODE=$(echo "${RESP}" | tail -1)
BODY=$(echo "${RESP}" | sed '$d')
echo "${BODY}" | jq . 2>/dev/null || echo "${BODY}"
if [[ "${HTTP_CODE}" -lt 200 || "${HTTP_CODE}" -ge 300 ]]; then
echo "Mailcow API HTTP ${HTTP_CODE}" >&2
exit 1
fi
echo "Done: ${LOCAL_PART}@${DOMAIN}"

View File

@ -1,17 +0,0 @@
#!/usr/bin/env bash
# Wrapper for: make mailcow-mailbox MAILBOX=name
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
MAILBOX="${MAILBOX:?MAILBOX required}"
cd "${REPO_ROOT}"
eval "$(./scripts/mailcow-mailbox-from-inventory.sh)"
. ./scripts/load-mailcow-vault-env.sh "${MAILBOX_VAULT_KEY:-${MAILBOX}}"
if [[ -z "${MAILCOW_API_KEY:-}" || -z "${MAILBOX_PASSWORD:-}" ]]; then
echo "Missing vault_mailcow_api_key or vault_mailcow_mailbox_passwords.${MAILBOX}" >&2
exit 1
fi
exec ./scripts/mailcow-mailbox.sh

View File

@ -1,71 +0,0 @@
#!/usr/bin/env bash
# Extended read-only security + cleanup audit (run on target host).
set -u
echo "=== identity ==="
hostname -f 2>/dev/null || hostname
if [ -f /etc/os-release ]; then . /etc/os-release; echo "os=${PRETTY_NAME:-unknown}"; fi
echo "kernel=$(uname -r)"
echo "uptime=$(uptime -p 2>/dev/null || uptime)"
echo "=== disk ==="
df -h / /var 2>/dev/null | tail -n +2 | awk '{print $6" "$5" used "$4" free"}'
echo "=== sshd (effective) ==="
if command -v sshd >/dev/null 2>&1; then
sshd -T 2>/dev/null | grep -E '^(permitrootlogin|passwordauthentication|pubkeyauthentication|permitemptypasswords|port|x11forwarding|maxauthtries) ' || true
else
grep -E '^(PermitRootLogin|PasswordAuthentication|PubkeyAuthentication|Port) ' /etc/ssh/sshd_config 2>/dev/null | grep -v '^#' || echo "sshd not found"
fi
echo "=== firewall ==="
if command -v ufw >/dev/null 2>&1; then
ufw status verbose 2>/dev/null | head -5
elif command -v firewall-cmd >/dev/null 2>&1; then
firewall-cmd --state 2>/dev/null || true
else
echo "no ufw/firewalld"
fi
echo "=== fail2ban ==="
systemctl is-active fail2ban 2>/dev/null || echo "fail2ban: inactive or missing"
echo "=== unattended-upgrades ==="
systemctl is-active unattended-upgrades 2>/dev/null || echo "unattended-upgrades: inactive or missing"
echo "=== pending apt upgrades ==="
if command -v apt >/dev/null 2>&1; then
apt-get -s upgrade 2>/dev/null | grep -c '^Inst' || echo 0
else
echo "n/a"
fi
echo "=== docker ==="
if command -v docker >/dev/null 2>&1; then
echo "docker=$(docker --version 2>/dev/null || true)"
echo "containers=$(docker ps -aq 2>/dev/null | wc -l | tr -d ' ') running=$(docker ps -q 2>/dev/null | wc -l | tr -d ' ')"
echo "images=$(docker images -q 2>/dev/null | wc -l | tr -d ' ')"
docker system df 2>/dev/null | tail -n +2 || true
else
echo "no docker"
fi
echo "=== journal disk ==="
journalctl --disk-usage 2>/dev/null || echo "n/a"
echo "=== apt cache ==="
du -sh /var/cache/apt/archives 2>/dev/null || echo "n/a"
echo "=== existing cron (root) ==="
crontab -l 2>/dev/null | grep -v '^#' | grep -v '^$' | head -10 || echo "no root crontab"
ls /etc/cron.{daily,weekly,monthly}/* 2>/dev/null | xargs -I{} basename {} | head -15 || true
echo "=== listening tcp (non-localhost) ==="
ss -tlnp 2>/dev/null | awk 'NR==1 || /LISTEN/ {print}' | grep -v '127.0.0.1:' | grep -v '\[::1\]:' | head -15
echo "=== uid 0 accounts ==="
awk -F: '$3==0 {print $1}' /etc/passwd | tr '\n' ' '
echo
echo "=== tailscale ==="
command -v tailscale >/dev/null 2>&1 && tailscale status --self 2>/dev/null | head -1 || echo "no tailscale"

View File

@ -1,39 +0,0 @@
#!/usr/bin/env bash
# Audit LXCs on a Proxmox node via pct exec (run ON the PVE host as root).
set -u
AUDIT='#!/bin/bash
echo "=== identity ==="
hostname -f 2>/dev/null || hostname
[ -f /etc/os-release ] && . /etc/os-release && echo "os=${PRETTY_NAME:-unknown}"
echo "ip=$(hostname -I 2>/dev/null | awk "{print \$1}")"
echo "=== sshd (effective) ==="
if command -v sshd >/dev/null 2>&1; then
sshd -T 2>/dev/null | grep -E "^(permitrootlogin|passwordauthentication|pubkeyauthentication|permitemptypasswords|port) " || true
else
grep -E "^(PermitRootLogin|PasswordAuthentication|PubkeyAuthentication|Port) " /etc/ssh/sshd_config 2>/dev/null | grep -v "^#" || echo "sshd not installed"
fi
echo "=== firewall ==="
ufw status 2>/dev/null | head -3 || echo "no ufw"
echo "=== fail2ban ==="
systemctl is-active fail2ban 2>/dev/null || echo "inactive/missing"
echo "=== pending upgrades ==="
apt-get -s upgrade 2>/dev/null | grep -c "^Inst" || echo 0
echo "=== public listeners ==="
ss -tlnp 2>/dev/null | grep LISTEN | grep -v "127.0.0.1:" | grep -v "\[::1\]:" | head -12
'
echo "PVE_NODE=$(hostname -f 2>/dev/null || hostname)"
echo "PVE_IP=$(hostname -I | awk '{print $1}')"
for id in $(pct list 2>/dev/null | awk 'NR>1 {print $1}'); do
name=$(pct list | awk -v id="$id" '$1==id {print $4}')
status=$(pct list | awk -v id="$id" '$1==id {print $2}')
echo ""
echo "######## LXC vmid=$id name=$name status=$status ########"
if [ "$status" != "running" ]; then
echo "SKIP: not running"
continue
fi
pct exec "$id" -- bash -c "$AUDIT" 2>&1 || echo "ERROR: pct exec failed"
done

View File

@ -1,48 +0,0 @@
#!/usr/bin/env bash
# Quick read-only security snapshot (run on target host).
set -euo pipefail
echo "=== identity ==="
hostname -f 2>/dev/null || hostname
if [ -f /etc/os-release ]; then . /etc/os-release; echo "os=${PRETTY_NAME:-unknown}"; fi
echo "kernel=$(uname -r)"
echo "uptime=$(uptime -p 2>/dev/null || uptime)"
echo "=== sshd (effective) ==="
if command -v sshd >/dev/null 2>&1; then
sshd -T 2>/dev/null | grep -E '^(permitrootlogin|passwordauthentication|pubkeyauthentication|permitemptypasswords|port|x11forwarding|allowtcpforwarding) ' || true
else
grep -E '^(PermitRootLogin|PasswordAuthentication|PubkeyAuthentication|Port) ' /etc/ssh/sshd_config 2>/dev/null | grep -v '^#' || echo "sshd not found"
fi
echo "=== firewall ==="
if command -v ufw >/dev/null 2>&1; then
ufw status verbose 2>/dev/null | head -8
elif command -v firewall-cmd >/dev/null 2>&1; then
firewall-cmd --state 2>/dev/null || true
else
echo "no ufw/firewalld"
fi
echo "=== fail2ban ==="
systemctl is-active fail2ban 2>/dev/null || echo "fail2ban: inactive or missing"
echo "=== unattended-upgrades ==="
systemctl is-active unattended-upgrades 2>/dev/null || echo "unattended-upgrades: inactive or missing"
echo "=== pending apt upgrades ==="
if command -v apt >/dev/null 2>&1; then
apt-get -s upgrade 2>/dev/null | grep -c '^Inst' || echo 0
else
echo "n/a"
fi
echo "=== listening tcp (public) ==="
ss -tlnp 2>/dev/null | awk 'NR==1 || /LISTEN/ {print}' | grep -v '127.0.0.1:' | grep -v '\[::1\]:' | head -20
echo "=== uid 0 accounts ==="
awk -F: '$3==0 {print $1}' /etc/passwd | tr '\n' ' '
echo
echo "=== last logins (top 5) ==="
last -n 5 2>/dev/null | head -5 || true

View File

@ -1,27 +0,0 @@
#!/usr/bin/env bash
# SSH-focused audit (hypervisor or guest).
set -u
echo "=== host ==="
hostname -f 2>/dev/null || hostname
echo "=== sshd effective config ==="
if command -v sshd >/dev/null 2>&1; then
sshd -T 2>/dev/null | grep -E '^(port|permitrootlogin|passwordauthentication|pubkeyauthentication|permitemptypasswords|maxauthtries|x11forwarding|allowtcpforwarding|gatewayports|permittunnel|usepam|kbdinteractiveauthentication) ' || true
else
echo "sshd binary missing"
fi
echo "=== sshd_config (non-comment) ==="
grep -E '^(Port|PermitRootLogin|PasswordAuthentication|PubkeyAuthentication|PermitEmptyPasswords|MaxAuthTries|AllowUsers|AllowGroups|X11Forwarding) ' /etc/ssh/sshd_config 2>/dev/null || true
echo "=== authorized_keys (root) ==="
if [ -f /root/.ssh/authorized_keys ]; then
wc -l /root/.ssh/authorized_keys
awk '{print $NF}' /root/.ssh/authorized_keys 2>/dev/null | sed 's/^/ key: /'
else
echo "no /root/.ssh/authorized_keys"
fi
echo "=== recent ssh auth failures (today) ==="
journalctl -u ssh -u sshd --since today 2>/dev/null | grep -iE 'Failed|Invalid|refused' | tail -5 || grep -iE 'Failed|Invalid' /var/log/auth.log 2>/dev/null | tail -5 || echo "no logs"

View File

@ -1,81 +0,0 @@
#!/usr/bin/env bash
# Write Ansible vault secrets into .env (for local scripts / reference).
# Does not print secret values. Does not overwrite non-empty .env keys.
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
ENV_FILE="${1:-${REPO_ROOT}/.env}"
VAULT_FILE="${REPO_ROOT}/inventories/production/group_vars/all/vault.yml"
VAULT_PASS="${HOME}/.ansible-vault-pass"
ANSIBLE_VAULT="${REPO_ROOT}/.venv/bin/ansible-vault"
[[ -f "${VAULT_PASS}" ]] || { echo "Missing ${VAULT_PASS}" >&2; exit 1; }
"${REPO_ROOT}/.venv/bin/python3" - "${ENV_FILE}" "${VAULT_FILE}" "${VAULT_PASS}" "${ANSIBLE_VAULT}" <<'PY'
import subprocess, sys, yaml
from pathlib import Path
env_file, vault_file, vault_pass, ansible_vault = sys.argv[1:5]
# vault key -> .env key
MAP = {
"vault_mailcow_api_key": "MAILCOW_API_KEY",
"vault_alerts_mailbox_password": "ALERTS_PASSWORD",
"vault_uptime_kuma_password": "KUMA_PASSWORD",
"vault_uptime_kuma_user": "KUMA_USER",
"vault_uptime_kuma_url": "KUMA_URL",
"vault_umami_admin_password": "UMAMI_ADMIN_PASSWORD",
"vault_umami_db_password": "UMAMI_DB_PASS",
"vault_umami_app_secret": "UMAMI_APP_SECRET",
"vault_kuma_smtp_host": "SMTP_HOST",
"vault_kuma_smtp_port": "SMTP_PORT",
"vault_kuma_smtp_user": "SMTP_USER",
"vault_kuma_smtp_password": "SMTP_PASS",
"vault_kuma_smtp_to": "SMTP_TO",
"vault_mattermost_url": "MATTERMOST_URL",
"vault_mattermost_token": "MATTERMOST_TOKEN",
"vault_mattermost_allowed_users": "MATTERMOST_ALLOWED_USERS",
}
def parse_env(text):
d = {}
for line in text.splitlines():
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
k, _, v = line.partition("=")
d[k.strip()] = v.strip().strip("'").strip('"')
return d
text = subprocess.check_output(
[ansible_vault, "view", vault_file, "--vault-password-file", vault_pass],
text=True,
)
data = yaml.safe_load(text) or {}
existing = parse_env(Path(env_file).read_text()) if Path(env_file).exists() else {}
merged = dict(existing)
for vk, ek in MAP.items():
val = data.get(vk)
if val is None or val == "":
continue
if merged.get(ek):
continue
merged[ek] = str(val)
pw = data.get("vault_mailcow_mailbox_passwords") or {}
if pw.get("alerts") and not merged.get("ALERTS_PASSWORD"):
merged["ALERTS_PASSWORD"] = str(pw["alerts"])
header = """# Merged from Ansible vault (make vault-export-env). Fill gaps manually.
# vault → .env: make vault-export-env
# .env → vault: make vault-import-env
# hosts → .env → vault: make vault-pull-infra-secrets
"""
body = "\n".join(f"{k}={v}" for k, v in sorted(merged.items())) + "\n"
Path(env_file).write_text(header + body)
print(f"Wrote {len(merged)} keys to {env_file} (existing non-empty keys kept)")
PY
chmod 600 "${ENV_FILE}" 2>/dev/null || true

View File

@ -1,96 +0,0 @@
#!/usr/bin/env bash
# Merge .env into inventories/production/group_vars/all/vault.yml
# Usage: make vault-import-env [ENV_FILE=.env]
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
ENV_FILE="${1:-${ENV_FILE:-${REPO_ROOT}/.env}}"
VAULT_FILE="${REPO_ROOT}/inventories/production/group_vars/all/vault.yml"
VAULT_PASS="${HOME}/.ansible-vault-pass"
ANSIBLE_VAULT="${REPO_ROOT}/.venv/bin/ansible-vault"
[[ -f "${ENV_FILE}" ]] || { echo "No env file: ${ENV_FILE}" >&2; exit 1; }
[[ -f "${VAULT_PASS}" ]] || { echo "Missing ${VAULT_PASS}" >&2; exit 1; }
"${REPO_ROOT}/.venv/bin/python3" - "${ENV_FILE}" "${VAULT_FILE}" "${VAULT_PASS}" "${ANSIBLE_VAULT}" <<'PY'
import os, re, subprocess, sys, tempfile, yaml
env_file, vault_file, vault_pass, ansible_vault = sys.argv[1:5]
def load_env(path):
out = {}
with open(path) as f:
for line in f:
line = line.strip()
if not line or line.startswith("#"):
continue
if line.startswith("export "):
line = line[7:].strip()
if "=" not in line:
continue
k, _, v = line.partition("=")
v = v.strip().strip("'").strip('"')
if v:
out[k.strip()] = v
return out
# .env key -> vault key (or vault_mailcow_mailbox_passwords.<name>)
MAP = {
"MAILCOW_API_KEY": "vault_mailcow_api_key",
"ALERTS_PASSWORD": ("vault_alerts_mailbox_password", "alerts"),
"KUMA_PASSWORD": "vault_uptime_kuma_password",
"KUMA_USER": "vault_uptime_kuma_user",
"KUMA_URL": "vault_uptime_kuma_url",
"UMAMI_ADMIN_PASSWORD": "vault_umami_admin_password",
"UMAMI_DB_PASS": "vault_umami_db_password",
"UMAMI_APP_SECRET": "vault_umami_app_secret",
"SMTP_HOST": "vault_kuma_smtp_host",
"SMTP_PORT": "vault_kuma_smtp_port",
"SMTP_USER": "vault_kuma_smtp_user",
"SMTP_PASS": "vault_kuma_smtp_password",
"SMTP_TO": "vault_kuma_smtp_to",
"MATTERMOST_URL": "vault_mattermost_url",
"MATTERMOST_TOKEN": "vault_mattermost_token",
"MATTERMOST_ALLOWED_USERS": "vault_mattermost_allowed_users",
"PROXMOX_PASSWORD": "vault_proxmox_password",
"LXC_ROOT_PASSWORD": "vault_lxc_root_password",
}
env = load_env(env_file)
text = subprocess.check_output(
[ansible_vault, "view", vault_file, "--vault-password-file", vault_pass],
text=True,
)
data = yaml.safe_load(text) or {}
passwords = dict(data.get("vault_mailcow_mailbox_passwords") or {})
for k, v in env.items():
m = re.match(r"^MAILBOX_(.+)_PASSWORD$", k, re.I)
if m:
passwords[m.group(1).lower()] = v
continue
target = MAP.get(k)
if not target:
continue
if isinstance(target, tuple):
data[target[0]] = v
passwords[target[1]] = v
else:
data[target] = v
if passwords:
data["vault_mailcow_mailbox_passwords"] = passwords
fd, tmp = tempfile.mkstemp(suffix=".yml")
os.close(fd)
with open(tmp, "w") as f:
yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
subprocess.run(
[ansible_vault, "encrypt", tmp, "--output", vault_file,
"--vault-password-file", vault_pass, "--encrypt-vault-id", "default"],
check=True,
)
os.remove(tmp)
print(f"Updated {vault_file} from {env_file} ({len(env)} values)")
PY

View File

@ -1,70 +0,0 @@
#!/usr/bin/env bash
# Pull secrets from live hosts into .env, then vault-import-env.
# Does not print secret values.
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
ENV_FILE="${REPO_ROOT}/.env"
python3 - "${ENV_FILE}" <<'PY'
import subprocess, sys
from pathlib import Path
out = Path(sys.argv[1])
lines = []
def sh(cmd):
return subprocess.check_output(cmd, shell=True, text=True).strip()
def parse_env(text):
d = {}
for line in text.splitlines():
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
k, _, v = line.partition("=")
d[k.strip()] = v.strip().strip("'").strip('"')
return d
# monitoring LXC
try:
raw = sh("ssh -o BatchMode=yes -o ConnectTimeout=8 root@10.0.10.22 'cat /opt/monitoring/.env 2>/dev/null'")
m = parse_env(raw)
if m.get("UMAMI_DB_PASS"):
lines.append(f"UMAMI_DB_PASS={m['UMAMI_DB_PASS']}")
if m.get("UMAMI_APP_SECRET"):
lines.append(f"UMAMI_APP_SECRET={m['UMAMI_APP_SECRET']}")
except Exception as e:
print(f"# skip monitoring: {e}", file=sys.stderr)
# hermes mattermost
try:
raw = sh("ssh -o BatchMode=yes -o ConnectTimeout=8 ladmin@10.0.10.36 \"sudo cat /home/hermes/.hermes/secrets/mattermost.env 2>/dev/null\"")
h = parse_env(raw)
for k in ("MATTERMOST_URL", "MATTERMOST_TOKEN", "MATTERMOST_ALLOWED_USERS"):
if h.get(k):
lines.append(f"{k}={h[k]}")
except Exception as e:
print(f"# skip hermes: {e}", file=sys.stderr)
# merge with existing .env (preserve user-filled keys)
existing = {}
if out.exists():
existing = parse_env(out.read_text())
merged = {**existing}
for line in lines:
k, _, v = line.partition("=")
merged[k] = v
header = """# Auto-merged by scripts/vault-pull-infra-secrets.sh + your edits
# Run: make vault-import-env
"""
body = "\n".join(f"{k}={v}" for k, v in sorted(merged.items())) + "\n"
out.write_text(header + body)
print(f"Wrote {len(merged)} keys to {out}")
PY
chmod 600 "${ENV_FILE}" 2>/dev/null || true
"${REPO_ROOT}/scripts/vault-import-env.sh" "${ENV_FILE}"