Compare commits
22 Commits
fix/invent
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| d121663765 | |||
| e56bbd567d | |||
| f17a1a3bcc | |||
| 15a5ebadaf | |||
| f0ff00a8dc | |||
| 35d17ed527 | |||
| c72c94e983 | |||
| a80a98ff77 | |||
| 980423cd61 | |||
| 5874605467 | |||
| 798fef2b67 | |||
| 52de8740c9 | |||
| dfed055e7c | |||
| 08d5cb4073 | |||
| 70af1b1355 | |||
| 7224dbfd12 | |||
| 8a507eddee | |||
| de49b34cdc | |||
| 9281f12a65 | |||
| 659c6501bf | |||
| fda101c949 | |||
|
|
62a22812a3 |
37
.env.example
Normal file
37
.env.example
Normal file
@ -0,0 +1,37 @@
|
||||
# Copy to .env (gitignored): cp .env.example .env
|
||||
#
|
||||
# vault → .env: make vault-export-env
|
||||
# .env → vault: make vault-import-env
|
||||
# hosts → vault: make vault-pull-infra-secrets (SSH to monitoring/hermes, then import)
|
||||
#
|
||||
# Prefer vault for long-term storage; delete .env after export if you want.
|
||||
|
||||
# Mailcow (make mailcow-mailbox MAILBOX=alerts)
|
||||
MAILCOW_API_KEY=
|
||||
ALERTS_PASSWORD=
|
||||
|
||||
# Uptime Kuma @ 10.0.10.22:3001 (scripts/kuma-setup-smtp.sh)
|
||||
KUMA_URL=http://10.0.10.22:3001
|
||||
KUMA_USER=admin
|
||||
KUMA_PASSWORD=
|
||||
|
||||
# Kuma SMTP notification (after alerts@ mailbox exists)
|
||||
SMTP_HOST=mail.levkine.ca
|
||||
SMTP_PORT=587
|
||||
SMTP_USER=alerts@levkine.ca
|
||||
SMTP_PASS=
|
||||
SMTP_TO=idobkin@gmail.com
|
||||
|
||||
# Umami @ 10.0.10.22:3000 (admin UI password; DB pass is on LXC only)
|
||||
UMAMI_ADMIN_PASSWORD=
|
||||
|
||||
# Hermes Mattermost (not Telegram)
|
||||
MATTERMOST_URL=
|
||||
MATTERMOST_TOKEN=
|
||||
MATTERMOST_ALLOWED_USERS=
|
||||
|
||||
# Optional: same password on Proxmox / LXCs / caddy root (if you use one shared admin password)
|
||||
# PROXMOX_PASSWORD=
|
||||
# LXC_ROOT_PASSWORD=
|
||||
|
||||
# Per-mailbox: MAILBOX_notify_PASSWORD=
|
||||
@ -65,7 +65,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
if: needs.skip-ci-check.outputs.should-skip != '1' && (github.event_name == 'pull_request' || github.ref == 'refs/heads/master')
|
||||
container:
|
||||
image: node:20-bullseye
|
||||
image: node:20-bookworm
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
@ -84,12 +84,26 @@ jobs:
|
||||
needs: skip-ci-check
|
||||
runs-on: ubuntu-latest
|
||||
if: needs.skip-ci-check.outputs.should-skip != '1' && (github.event_name == 'pull_request' || github.ref == 'refs/heads/master')
|
||||
env:
|
||||
PIP_NO_CACHE_DIR: "1"
|
||||
PIP_BREAK_SYSTEM_PACKAGES: "1"
|
||||
container:
|
||||
image: node:20-bullseye
|
||||
image: node:20-bookworm
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Bootstrap pip (PEP 668 / bookworm)
|
||||
run: |
|
||||
python3 --version
|
||||
if ! python3 -m pip --version >/dev/null 2>&1; then
|
||||
curl -fsSL https://bootstrap.pypa.io/get-pip.py -o /tmp/get-pip.py
|
||||
python3 /tmp/get-pip.py --disable-pip-version-check --break-system-packages
|
||||
fi
|
||||
|
||||
- name: Show disk space (runner may be full)
|
||||
run: df -h / /tmp || true
|
||||
|
||||
- name: Configure CI Ansible (no vault, localhost inventory)
|
||||
run: |
|
||||
set -e
|
||||
@ -98,12 +112,13 @@ jobs:
|
||||
localhost ansible_connection=local
|
||||
EOF
|
||||
|
||||
cat > /tmp/ci-ansible.cfg <<'EOF'
|
||||
cat > /tmp/ci-ansible.cfg <<EOF
|
||||
[defaults]
|
||||
inventory = /tmp/ci-inventory.ini
|
||||
roles_path = /workspace/ilia/ansible/roles
|
||||
roles_path = ${GITHUB_WORKSPACE}/roles
|
||||
host_key_checking = False
|
||||
stdout_callback = yaml
|
||||
stdout_callback = default
|
||||
callback_result_format = yaml
|
||||
bin_ansible_callbacks = True
|
||||
retry_files_enabled = False
|
||||
interpreter_python = auto_silent
|
||||
@ -115,18 +130,29 @@ jobs:
|
||||
echo "ANSIBLE_INVENTORY=/tmp/ci-inventory.ini" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Install Ansible and linting tools
|
||||
run: pip3 install --no-cache-dir ansible ansible-lint yamllint pyyaml
|
||||
|
||||
- name: Install Ansible collections
|
||||
run: |
|
||||
python3 -m pip install --no-cache-dir ansible-core ansible-lint yamllint pyyaml
|
||||
ansible-galaxy collection install -r collections/requirements.yml
|
||||
rm -rf /root/.cache/pip /tmp/pip-* 2>/dev/null || true
|
||||
|
||||
- name: Validate YAML syntax
|
||||
run: |
|
||||
echo "Checking YAML syntax..."
|
||||
find . -name "*.yml" -o -name "*.yaml" | grep -v ".git" | while read file; do
|
||||
python3 -c "import yaml; yaml.safe_load(open('$file'))" || exit 1
|
||||
done
|
||||
find . \( -name "*.yml" -o -name "*.yaml" \) \
|
||||
! -path "./.git/*" \
|
||||
! -path "./node_modules/*" \
|
||||
! -path "./.venv/*" \
|
||||
! -name "vault.yml" \
|
||||
! -name "vault.yaml" \
|
||||
! -name "vault_*.yml" \
|
||||
! -name "vault_*.yaml" \
|
||||
| while read -r file; do
|
||||
if head -n 5 "$file" | grep -q '^\$ANSIBLE_VAULT'; then
|
||||
echo "Skipping encrypted vault file: $file"
|
||||
continue
|
||||
fi
|
||||
python3 -c "import yaml; yaml.safe_load(open('$file'))" || exit 1
|
||||
done
|
||||
|
||||
- name: Run ansible-lint
|
||||
run: ansible-lint
|
||||
@ -136,7 +162,7 @@ jobs:
|
||||
if: needs.skip-ci-check.outputs.should-skip != '1'
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: node:20-bullseye
|
||||
image: node:20-bookworm
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
@ -154,8 +180,11 @@ jobs:
|
||||
needs: skip-ci-check
|
||||
if: needs.skip-ci-check.outputs.should-skip != '1'
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
PIP_NO_CACHE_DIR: "1"
|
||||
PIP_BREAK_SYSTEM_PACKAGES: "1"
|
||||
container:
|
||||
image: node:20-bullseye
|
||||
image: node:20-bookworm
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
@ -173,8 +202,12 @@ jobs:
|
||||
- name: Scan Python dependencies
|
||||
run: |
|
||||
if [ -f requirements.txt ]; then
|
||||
pip3 install --no-cache-dir pip-audit
|
||||
pip-audit -r requirements.txt
|
||||
if ! python3 -m pip --version >/dev/null 2>&1; then
|
||||
curl -fsSL https://bootstrap.pypa.io/get-pip.py -o /tmp/get-pip.py
|
||||
python3 /tmp/get-pip.py --disable-pip-version-check --break-system-packages
|
||||
fi
|
||||
python3 -m pip install --no-cache-dir pip-audit
|
||||
python3 -m pip-audit -r requirements.txt
|
||||
else
|
||||
echo "No requirements.txt, skipping pip-audit"
|
||||
fi
|
||||
@ -184,14 +217,25 @@ jobs:
|
||||
needs: skip-ci-check
|
||||
if: needs.skip-ci-check.outputs.should-skip != '1'
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
PIP_NO_CACHE_DIR: "1"
|
||||
PIP_BREAK_SYSTEM_PACKAGES: "1"
|
||||
container:
|
||||
image: node:20-bullseye
|
||||
image: node:20-bookworm
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Bootstrap pip (PEP 668 / bookworm)
|
||||
run: |
|
||||
python3 --version
|
||||
if ! python3 -m pip --version >/dev/null 2>&1; then
|
||||
curl -fsSL https://bootstrap.pypa.io/get-pip.py -o /tmp/get-pip.py
|
||||
python3 /tmp/get-pip.py --disable-pip-version-check --break-system-packages
|
||||
fi
|
||||
|
||||
- name: Install Semgrep
|
||||
run: pip3 install --no-cache-dir semgrep
|
||||
run: python3 -m pip install --no-cache-dir semgrep
|
||||
|
||||
- name: Run Semgrep scan
|
||||
run: semgrep --config=auto --error
|
||||
@ -202,7 +246,7 @@ jobs:
|
||||
if: needs.skip-ci-check.outputs.should-skip != '1'
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: node:20-bullseye
|
||||
image: node:20-bookworm
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
@ -224,14 +268,24 @@ jobs:
|
||||
needs: skip-ci-check
|
||||
if: needs.skip-ci-check.outputs.should-skip != '1'
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
PIP_NO_CACHE_DIR: "1"
|
||||
PIP_BREAK_SYSTEM_PACKAGES: "1"
|
||||
container:
|
||||
image: node:20-bullseye
|
||||
image: node:20-bookworm
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Bootstrap pip (PEP 668 / bookworm)
|
||||
run: |
|
||||
if ! python3 -m pip --version >/dev/null 2>&1; then
|
||||
curl -fsSL https://bootstrap.pypa.io/get-pip.py -o /tmp/get-pip.py
|
||||
python3 /tmp/get-pip.py --disable-pip-version-check --break-system-packages
|
||||
fi
|
||||
|
||||
- name: Install Ansible
|
||||
run: pip3 install --no-cache-dir ansible
|
||||
run: python3 -m pip install --no-cache-dir ansible-core
|
||||
|
||||
- name: Validate vault files are encrypted
|
||||
run: |
|
||||
@ -268,12 +322,22 @@ jobs:
|
||||
needs: skip-ci-check
|
||||
if: needs.skip-ci-check.outputs.should-skip != '1'
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
PIP_NO_CACHE_DIR: "1"
|
||||
PIP_BREAK_SYSTEM_PACKAGES: "1"
|
||||
container:
|
||||
image: node:20-bullseye
|
||||
image: node:20-bookworm
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Bootstrap pip (PEP 668 / bookworm)
|
||||
run: |
|
||||
if ! python3 -m pip --version >/dev/null 2>&1; then
|
||||
curl -fsSL https://bootstrap.pypa.io/get-pip.py -o /tmp/get-pip.py
|
||||
python3 /tmp/get-pip.py --disable-pip-version-check --break-system-packages
|
||||
fi
|
||||
|
||||
- name: Configure CI Ansible (no vault, localhost inventory)
|
||||
run: |
|
||||
set -e
|
||||
@ -298,14 +362,27 @@ jobs:
|
||||
|
||||
[local]
|
||||
localhost ansible_connection=local
|
||||
|
||||
[sites]
|
||||
localhost ansible_connection=local
|
||||
|
||||
[comms]
|
||||
localhost ansible_connection=local
|
||||
|
||||
[proxmox]
|
||||
localhost ansible_connection=local
|
||||
|
||||
[caddy]
|
||||
localhost ansible_connection=local
|
||||
EOF
|
||||
|
||||
cat > /tmp/ci-ansible.cfg <<'EOF'
|
||||
cat > /tmp/ci-ansible.cfg <<EOF
|
||||
[defaults]
|
||||
inventory = /tmp/ci-inventory.ini
|
||||
roles_path = /workspace/ilia/ansible/roles
|
||||
roles_path = ${GITHUB_WORKSPACE}/roles
|
||||
host_key_checking = False
|
||||
stdout_callback = yaml
|
||||
stdout_callback = default
|
||||
callback_result_format = yaml
|
||||
bin_ansible_callbacks = True
|
||||
retry_files_enabled = False
|
||||
interpreter_python = auto_silent
|
||||
@ -317,11 +394,10 @@ jobs:
|
||||
echo "ANSIBLE_INVENTORY=/tmp/ci-inventory.ini" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Install Ansible
|
||||
run: pip3 install --no-cache-dir ansible
|
||||
|
||||
- name: Install Ansible collections
|
||||
run: |
|
||||
python3 -m pip install --no-cache-dir ansible-core
|
||||
ansible-galaxy collection install -r collections/requirements.yml
|
||||
rm -rf /root/.cache/pip /tmp/pip-* 2>/dev/null || true
|
||||
|
||||
- name: Validate playbooks (CI inventory, no vault)
|
||||
run: |
|
||||
@ -352,12 +428,13 @@ jobs:
|
||||
if: needs.skip-ci-check.outputs.should-skip != '1'
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: node:20-bullseye
|
||||
image: node:20-bookworm
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install Trivy
|
||||
continue-on-error: true
|
||||
run: |
|
||||
set -e
|
||||
# Use a fixed, known-good Trivy version to avoid URL/redirect issues
|
||||
@ -415,18 +492,13 @@ jobs:
|
||||
needs: skip-ci-check
|
||||
if: needs.skip-ci-check.outputs.should-skip != '1' && (github.event_name == 'pull_request' || github.ref == 'refs/heads/master')
|
||||
runs-on: ubuntu-latest
|
||||
continue-on-error: true
|
||||
container:
|
||||
image: sonarsource/sonar-scanner-cli:5.0.1.3006
|
||||
image: sonarsource/sonar-scanner-cli:latest
|
||||
env:
|
||||
SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }}
|
||||
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
|
||||
steps:
|
||||
- name: Install Node.js for checkout action
|
||||
run: apk add --no-cache nodejs npm curl
|
||||
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Verify SonarQube connection
|
||||
run: |
|
||||
echo "Checking SonarQube connectivity..."
|
||||
|
||||
11
.gitignore
vendored
11
.gitignore
vendored
@ -5,6 +5,7 @@
|
||||
# Temporary files
|
||||
*.tmp
|
||||
*.bak
|
||||
*.vault-bak
|
||||
*~
|
||||
vault.yml.bak.*
|
||||
|
||||
@ -17,6 +18,9 @@ id_rsa
|
||||
id_ed25519
|
||||
id_ecdsa
|
||||
|
||||
# Python venv (make bootstrap)
|
||||
.venv/
|
||||
|
||||
# Python bytecode
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
@ -34,4 +38,11 @@ Thumbs.db
|
||||
|
||||
.ansible/facts/
|
||||
|
||||
# Local data exports (Nextcloud, etc.)
|
||||
exports/
|
||||
|
||||
# Local secrets (Mailcow API, Kuma passwords) — never commit
|
||||
.env
|
||||
.env.local
|
||||
|
||||
node_modules/
|
||||
@ -1,7 +1,7 @@
|
||||
{
|
||||
"default": true,
|
||||
"MD013": {
|
||||
"line_length": 160,
|
||||
"line_length": 400,
|
||||
"code_blocks": false,
|
||||
"tables": false
|
||||
},
|
||||
@ -13,6 +13,8 @@
|
||||
"MD034": false,
|
||||
"MD040": false,
|
||||
"MD047": false,
|
||||
"MD058": false
|
||||
"MD058": false,
|
||||
"MD060": false,
|
||||
"MD036": false
|
||||
}
|
||||
|
||||
|
||||
237
Makefile
237
Makefile
@ -1,4 +1,4 @@
|
||||
.PHONY: help bootstrap lint test check dev datascience inventory inventory-all local servers workstations clean status tailscale tailscale-check tailscale-dev tailscale-status create-vault create-vm monitoring
|
||||
.PHONY: help bootstrap lint test check dev datascience inventory inventory-all local servers workstations clean status tailscale tailscale-check tailscale-dev tailscale-status create-vault create-vm monitoring copy-ssh-key copy-ssh-keys copy-ssh-keys-ansible copy-ssh-key-mailcow bootstrap-root-ssh bootstrap-root-ssh-services bootstrap-root-ssh-failed mailcow-mailbox mailcow-create-alerts vault-import-env
|
||||
.DEFAULT_GOAL := help
|
||||
|
||||
## Colors for output
|
||||
@ -28,13 +28,27 @@ PYTHON_REQ := requirements.txt
|
||||
INVENTORY := inventories/production
|
||||
INVENTORY_HOSTS := $(INVENTORY)/hosts
|
||||
|
||||
# Python venv (created by `make bootstrap`)
|
||||
VENV := .venv
|
||||
ifneq ($(wildcard $(VENV)/bin/ansible-playbook),)
|
||||
export PATH := $(abspath $(VENV)/bin):$(PATH)
|
||||
ANSIBLE_VAULT := $(abspath $(VENV))/bin/ansible-vault
|
||||
else
|
||||
ANSIBLE_VAULT := ansible-vault
|
||||
endif
|
||||
|
||||
# Common ansible-playbook command with options
|
||||
ANSIBLE_PLAYBOOK := ansible-playbook -i $(INVENTORY)
|
||||
ANSIBLE_ARGS := --vault-password-file ~/.ansible-vault-pass
|
||||
# Note: sudo passwords are in vault files as ansible_become_password
|
||||
|
||||
## Auto-detect current host to exclude from remote operations
|
||||
CURRENT_IP := $(shell hostname -I | awk '{print $$1}')
|
||||
UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
CURRENT_IP := $(shell ipconfig getifaddr en0 2>/dev/null || ipconfig getifaddr en1 2>/dev/null || echo "")
|
||||
else
|
||||
CURRENT_IP := $(shell hostname -I 2>/dev/null | awk '{print $$1}')
|
||||
endif
|
||||
# NOTE: inventory parsing may require vault secrets. Keep this best-effort and silent in CI.
|
||||
CURRENT_HOST := $(shell ansible-inventory --list --vault-password-file ~/.ansible-vault-pass 2>/dev/null | jq -r '._meta.hostvars | to_entries[] | select(.value.ansible_host == "$(CURRENT_IP)") | .key' 2>/dev/null | head -1)
|
||||
EXCLUDE_CURRENT := $(if $(CURRENT_HOST),--limit '!$(CURRENT_HOST)',)
|
||||
@ -59,37 +73,36 @@ help: ## Show this help message
|
||||
@echo " make maintenance-verbose GROUP=dev # Verbose maintenance on dev group"
|
||||
@echo ""
|
||||
|
||||
require-ansible: ## Verify ansible is available (run make bootstrap if missing)
|
||||
@command -v ansible-playbook >/dev/null 2>&1 && command -v ansible-vault >/dev/null 2>&1 || { \
|
||||
echo "$(RED)ansible-playbook/ansible-vault not found$(RESET)"; \
|
||||
echo "Run: $(BLUE)make bootstrap$(RESET)"; \
|
||||
exit 1; \
|
||||
}
|
||||
|
||||
bootstrap: ## Install all project dependencies from requirements files
|
||||
@echo "$(BOLD)Installing Project Dependencies$(RESET)"
|
||||
@echo ""
|
||||
@echo "$(YELLOW)Python Requirements ($(PYTHON_REQ)):$(RESET)"
|
||||
@if [ -f "$(PYTHON_REQ)" ]; then \
|
||||
if command -v pipx >/dev/null 2>&1; then \
|
||||
printf " %-30s " "Installing with pipx"; \
|
||||
if pipx install -r $(PYTHON_REQ) >/dev/null 2>&1; then \
|
||||
echo "$(GREEN)✓ Installed$(RESET)"; \
|
||||
else \
|
||||
echo "$(YELLOW)⚠ Some packages may have failed$(RESET)"; \
|
||||
fi; \
|
||||
elif command -v pip3 >/dev/null 2>&1; then \
|
||||
printf " %-30s " "Installing with pip3 --user"; \
|
||||
if pip3 install --user -r $(PYTHON_REQ) >/dev/null 2>&1; then \
|
||||
echo "$(GREEN)✓ Installed$(RESET)"; \
|
||||
else \
|
||||
printf " %-30s " "Trying with --break-system-packages"; \
|
||||
if pip3 install --break-system-packages -r $(PYTHON_REQ) >/dev/null 2>&1; then \
|
||||
echo "$(GREEN)✓ Installed$(RESET)"; \
|
||||
else \
|
||||
echo "$(RED)✗ Failed$(RESET)"; \
|
||||
fi; \
|
||||
fi; \
|
||||
else \
|
||||
printf " %-30s " "Python packages"; \
|
||||
echo "$(YELLOW)⚠ Skipped (pip3/pipx not found)$(RESET)"; \
|
||||
fi; \
|
||||
else \
|
||||
@echo "$(YELLOW)Python venv ($(VENV))/$(PYTHON_REQ):$(RESET)"
|
||||
@if [ ! -f "$(PYTHON_REQ)" ]; then \
|
||||
printf " %-30s " "$(PYTHON_REQ)"; \
|
||||
echo "$(RED)✗ File not found$(RESET)"; \
|
||||
elif ! command -v python3 >/dev/null 2>&1; then \
|
||||
printf " %-30s " "Python venv"; \
|
||||
echo "$(RED)✗ python3 not found$(RESET)"; \
|
||||
else \
|
||||
if [ ! -d "$(VENV)" ]; then \
|
||||
printf " %-30s " "Creating venv"; \
|
||||
python3 -m venv "$(VENV)" && echo "$(GREEN)✓ Created$(RESET)" || { echo "$(RED)✗ Failed$(RESET)"; exit 1; }; \
|
||||
fi; \
|
||||
printf " %-30s " "Installing packages"; \
|
||||
if "$(VENV)/bin/pip" install -r "$(PYTHON_REQ)" >/dev/null 2>&1; then \
|
||||
echo "$(GREEN)✓ Installed$(RESET)"; \
|
||||
echo " $(BLUE)Ansible:$(RESET) $(abspath $(VENV))/bin/ansible-playbook"; \
|
||||
else \
|
||||
echo "$(RED)✗ Failed$(RESET)"; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
fi
|
||||
@echo ""
|
||||
@echo "$(YELLOW)Node.js Dependencies (package.json):$(RESET)"
|
||||
@ -107,7 +120,9 @@ bootstrap: ## Install all project dependencies from requirements files
|
||||
@echo ""
|
||||
@echo "$(YELLOW)Ansible Collections ($(COLLECTIONS_REQ)):$(RESET)"
|
||||
@if [ -f "$(COLLECTIONS_REQ)" ]; then \
|
||||
ansible-galaxy collection install -r $(COLLECTIONS_REQ) 2>&1 | grep -E "(Installing|Skipping|ERROR)" | while read line; do \
|
||||
GALAXY="$$(command -v ansible-galaxy)"; \
|
||||
[ -x "$(VENV)/bin/ansible-galaxy" ] && GALAXY="$(abspath $(VENV))/bin/ansible-galaxy"; \
|
||||
"$$GALAXY" collection install -r $(COLLECTIONS_REQ) 2>&1 | grep -E "(Installing|Skipping|ERROR)" | while read line; do \
|
||||
if echo "$$line" | grep -q "Installing"; then \
|
||||
collection=$$(echo "$$line" | awk '{print $$2}' | sed 's/:.*//'); \
|
||||
printf " $(GREEN)✓ %-30s$(RESET) Installed\n" "$$collection"; \
|
||||
@ -117,7 +132,7 @@ bootstrap: ## Install all project dependencies from requirements files
|
||||
elif echo "$$line" | grep -q "ERROR"; then \
|
||||
printf " $(RED)✗ Error: $$line$(RESET)\n"; \
|
||||
fi; \
|
||||
done || ansible-galaxy collection install -r $(COLLECTIONS_REQ); \
|
||||
done || "$$GALAXY" collection install -r $(COLLECTIONS_REQ); \
|
||||
else \
|
||||
printf " %-30s " "$(COLLECTIONS_REQ)"; \
|
||||
echo "$(RED)✗ File not found$(RESET)"; \
|
||||
@ -265,6 +280,22 @@ servers: ## Run baseline server playbook (usage: make servers [GROUP=services] [
|
||||
$(ANSIBLE_PLAYBOOK) $(PLAYBOOK_SERVERS); \
|
||||
fi
|
||||
|
||||
caddy-auth: require-ansible ## Ensure auth.levkin.ca reverse proxy on Caddy VM
|
||||
@echo "$(YELLOW)Updating Caddy for Authentik...$(RESET)"
|
||||
$(ANSIBLE_PLAYBOOK) playbooks/caddy-auth-authentik.yml $(ANSIBLE_ARGS)
|
||||
|
||||
caddy-levkin: require-ansible ## Ensure levkin.ca reverse proxy on Caddy VM
|
||||
@echo "$(YELLOW)Updating Caddy for levkin.ca...$(RESET)"
|
||||
$(ANSIBLE_PLAYBOOK) playbooks/caddy-levkin-site.yml $(ANSIBLE_ARGS)
|
||||
|
||||
cal-oidc: require-ansible ## Cal.com SAML DB + Authentik OIDC provider (usage: make cal-oidc)
|
||||
@echo "$(YELLOW)Configuring Cal.com ↔ Authentik OIDC...$(RESET)"
|
||||
$(ANSIBLE_PLAYBOOK) playbooks/cal-authentik-oidc.yml $(ANSIBLE_ARGS)
|
||||
|
||||
cal-oidc-check: require-ansible ## Dry-run Cal.com ↔ Authentik OIDC
|
||||
@echo "$(YELLOW)Checking Cal.com ↔ Authentik OIDC...$(RESET)"
|
||||
$(ANSIBLE_PLAYBOOK) playbooks/cal-authentik-oidc.yml --check --diff $(ANSIBLE_ARGS)
|
||||
|
||||
workstations: ## Run workstation baseline (usage: make workstations [GROUP=dev] [HOST=dev01])
|
||||
@echo "$(YELLOW)Applying workstation baseline...$(RESET)"
|
||||
@EXTRA=""; \
|
||||
@ -426,7 +457,7 @@ apps: ## Install applications only
|
||||
$(ANSIBLE_PLAYBOOK) $(PLAYBOOK_WORKSTATIONS) --tags apps
|
||||
|
||||
# Connectivity targets
|
||||
ping: auto-fallback ## Ping hosts with colored output (usage: make ping [GROUP=dev] [HOST=dev01])
|
||||
ping: require-ansible auto-fallback ## Ping hosts with colored output (usage: make ping [GROUP=dev] [HOST=dev01])
|
||||
ifdef HOST
|
||||
@echo "$(YELLOW)Pinging host: $(HOST)$(RESET)"
|
||||
@ansible $(HOST) -m ping --one-line | while read line; do \
|
||||
@ -543,16 +574,25 @@ tailscale-status: ## Check Tailscale status on all machines
|
||||
done
|
||||
|
||||
# Vault management
|
||||
edit-vault: ## Edit encrypted host vars (usage: make edit-vault HOST=dev01)
|
||||
edit-vault: require-ansible ## Edit encrypted host vars (usage: make edit-vault HOST=KrakenMint)
|
||||
ifndef HOST
|
||||
@echo "$(RED)Error: HOST parameter required$(RESET)"
|
||||
@echo "Usage: make edit-vault HOST=dev01"
|
||||
@echo "Usage: make edit-vault HOST=KrakenMint"
|
||||
@exit 1
|
||||
endif
|
||||
ansible-vault edit host_vars/$(HOST).yml
|
||||
@vault_file="$(INVENTORY)/host_vars/$(HOST)/vault.yml"; \
|
||||
if [ ! -f "$$vault_file" ]; then vault_file="$(INVENTORY)/host_vars/$(HOST).yml"; fi; \
|
||||
if [ ! -f "$$vault_file" ]; then \
|
||||
echo "$(RED)No vault file for $(HOST):$(RESET)"; \
|
||||
echo " $(INVENTORY)/host_vars/$(HOST)/vault.yml"; \
|
||||
echo " $(INVENTORY)/host_vars/$(HOST).yml"; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
echo "$(BLUE)Editing $$vault_file$(RESET)"; \
|
||||
$(ANSIBLE_VAULT) edit "$$vault_file"
|
||||
|
||||
edit-group-vault: ## Edit encrypted group vars (usage: make edit-group-vault)
|
||||
ansible-vault edit inventories/production/group_vars/all/vault.yml
|
||||
edit-group-vault: require-ansible ## Edit encrypted group vars (usage: make edit-group-vault)
|
||||
$(ANSIBLE_VAULT) edit $(INVENTORY)/group_vars/all/vault.yml
|
||||
|
||||
|
||||
copy-ssh-key: ## Copy SSH key to specific host (usage: make copy-ssh-key HOST=giteaVM)
|
||||
@ -562,19 +602,132 @@ ifndef HOST
|
||||
@exit 1
|
||||
endif
|
||||
@echo "$(YELLOW)Copying SSH key to $(HOST)...$(RESET)"
|
||||
@ip=$$(ansible-inventory --list | jq -r "._meta.hostvars.$(HOST).ansible_host // empty" 2>/dev/null); \
|
||||
user=$$(ansible-inventory --list | jq -r "._meta.hostvars.$(HOST).ansible_user // empty" 2>/dev/null); \
|
||||
if [ -n "$$ip" ] && [ "$$ip" != "null" ] && [ -n "$$user" ] && [ "$$user" != "null" ]; then \
|
||||
@ip=$$(ansible-inventory -i $(INVENTORY) $(ANSIBLE_ARGS) --list 2>/dev/null | jq -r --arg h "$(HOST)" '._meta.hostvars[$$h].ansible_host // empty'); \
|
||||
user=$$(ansible-inventory -i $(INVENTORY) $(ANSIBLE_ARGS) --list 2>/dev/null | jq -r --arg h "$(HOST)" '._meta.hostvars[$$h].ansible_user // empty'); \
|
||||
if [ -z "$$ip" ] || [ "$$ip" = "null" ]; then \
|
||||
ip=$$(awk -v h="$(HOST)" '$$1==h {print $$2}' $(INVENTORY_HOSTS) | sed 's/ansible_host=//'); \
|
||||
fi; \
|
||||
if [ -z "$$user" ] || [ "$$user" = "null" ]; then \
|
||||
user=$$(awk -v h="$(HOST)" '$$1==h {for(i=2;i<=NF;i++) if($$i~/^ansible_user=/) {sub(/ansible_user=/,"",$$i); print $$i; exit}}' $(INVENTORY_HOSTS)); \
|
||||
fi; \
|
||||
if [ -n "$$ip" ] && [ -n "$$user" ]; then \
|
||||
echo "Target: $$user@$$ip"; \
|
||||
ssh-copy-id $$user@$$ip; \
|
||||
ssh-copy-id -i "$${SSH_PUBLIC_KEY:-$$HOME/.ssh/id_ed25519.pub}" "$$user@$$ip"; \
|
||||
else \
|
||||
echo "$(RED)Could not determine IP or user for $(HOST)$(RESET)"; \
|
||||
echo "Check your inventory and host_vars"; \
|
||||
exit 1; \
|
||||
fi
|
||||
|
||||
create-vault: ## Create encrypted vault file for secrets (passwords, auth keys, etc.)
|
||||
copy-ssh-keys: ## Copy SSH key to all inventory hosts (usage: make copy-ssh-keys [GROUP=services])
|
||||
@echo "$(YELLOW)Copying SSH key to inventory hosts...$(RESET)"
|
||||
@echo "Using key: $${SSH_PUBLIC_KEY:-$$HOME/.ssh/id_ed25519.pub}"
|
||||
@echo "$(YELLOW)You will be prompted for each host's password (last time).$(RESET)"
|
||||
@failed=0; ok=0; \
|
||||
if [ -n "$(GROUP)" ]; then \
|
||||
hosts=$$(ansible-inventory -i $(INVENTORY) $(ANSIBLE_ARGS) --list 2>/dev/null | jq -r ".\"$(GROUP)\".hosts[]? // empty"); \
|
||||
else \
|
||||
hosts=$$(ansible-inventory -i $(INVENTORY) $(ANSIBLE_ARGS) --list 2>/dev/null | jq -r '._meta.hostvars | keys[]' | grep -v '^localhost$$' | sort); \
|
||||
fi; \
|
||||
if [ -z "$$hosts" ]; then \
|
||||
if [ -n "$(GROUP)" ]; then \
|
||||
hosts=$$(awk -v g="$(GROUP)" 'BEGIN{ing=0} /^\[/ {ing=($$0=="["g"]"); next} ing && /^[a-zA-Z]/ {print $$1}' $(INVENTORY_HOSTS)); \
|
||||
else \
|
||||
hosts=$$(awk '/^\[/ {next} /^[a-zA-Z]/ && $$1!="localhost" {print $$1}' $(INVENTORY_HOSTS)); \
|
||||
fi; \
|
||||
fi; \
|
||||
for host in $$hosts; do \
|
||||
echo ""; echo "$(BLUE)==> $$host$(RESET)"; \
|
||||
if $(MAKE) --no-print-directory copy-ssh-key HOST=$$host; then ok=$$((ok+1)); else failed=$$((failed+1)); fi; \
|
||||
done; \
|
||||
echo ""; \
|
||||
echo "$(GREEN)Done: $$ok succeeded$(RESET), $(RED)$$failed failed$(RESET)"; \
|
||||
[ $$failed -eq 0 ]
|
||||
|
||||
copy-ssh-keys-ansible: require-ansible ## Copy SSH key via Ansible (usage: make copy-ssh-keys-ansible [GROUP=services] [HOST=dev01])
|
||||
@echo "$(YELLOW)Deploying SSH key with Ansible (may prompt for SSH password)...$(RESET)"
|
||||
@limit="all:!local"; \
|
||||
[ -n "$(GROUP)" ] && limit="$(GROUP)"; \
|
||||
[ -n "$(HOST)" ] && limit="$(HOST)"; \
|
||||
$(ANSIBLE_PLAYBOOK) playbooks/ssh-keys.yml $(ANSIBLE_ARGS) --limit "$$limit" --ask-pass
|
||||
|
||||
copy-ssh-key-mailcow: ## Copy SSH key to Mailcow VM (root@10.0.10.132 on pve201; prompts for root password once)
|
||||
@$(MAKE) --no-print-directory copy-ssh-key HOST=mailcow
|
||||
|
||||
bootstrap-root-ssh-caddy: ## Bootstrap root on caddy via su + vault_lxc_root_password
|
||||
@chmod +x scripts/bootstrap-root-ssh-su-password.sh scripts/load-vault-lxc-root-password.sh
|
||||
@. scripts/load-vault-lxc-root-password.sh; ./scripts/bootstrap-root-ssh-su-password.sh caddy
|
||||
|
||||
bootstrap-root-ssh: ## SSH as ladmin, su to root, install root key (usage: make bootstrap-root-ssh HOST=listmonk)
|
||||
ifndef HOST
|
||||
@echo "$(RED)Error: HOST parameter required$(RESET)"
|
||||
@echo "Usage: make bootstrap-root-ssh HOST=listmonk"
|
||||
@exit 1
|
||||
endif
|
||||
@chmod +x scripts/bootstrap-root-ssh.sh
|
||||
@BOOTSTRAP_USER="$(BOOTSTRAP_USER)" TARGET_USER="$(TARGET_USER)" \
|
||||
scripts/bootstrap-root-ssh.sh "$(HOST)"
|
||||
|
||||
bootstrap-root-ssh-services: ## Bootstrap root SSH via ladmin (caddy, listmonk, vikunja)
|
||||
@chmod +x scripts/bootstrap-root-ssh.sh
|
||||
@failed=0; ok=0; \
|
||||
for host in caddy listmonk vikunja; do \
|
||||
echo ""; echo "$(BLUE)==> $$host$(RESET)"; \
|
||||
if BOOTSTRAP_USER="$(BOOTSTRAP_USER)" scripts/bootstrap-root-ssh.sh "$$host"; then \
|
||||
ok=$$((ok+1)); \
|
||||
else \
|
||||
failed=$$((failed+1)); \
|
||||
fi; \
|
||||
done; \
|
||||
echo ""; echo "$(GREEN)Done: $$ok succeeded$(RESET), $(RED)$$failed failed$(RESET)"; \
|
||||
[ $$failed -eq 0 ]
|
||||
|
||||
mailcow-mailbox: ## Create Mailcow mailbox (usage: make mailcow-mailbox MAILBOX=alerts)
|
||||
ifndef MAILBOX
|
||||
@echo "$(RED)Error: MAILBOX required$(RESET)"
|
||||
@echo "Usage: make mailcow-mailbox MAILBOX=alerts"
|
||||
@echo "Define mailboxes in inventories/production/group_vars/all/mailcow.yml"
|
||||
@exit 1
|
||||
endif
|
||||
@chmod +x scripts/run-mailcow-mailbox.sh
|
||||
@MAILBOX="$(MAILBOX)" ./scripts/run-mailcow-mailbox.sh
|
||||
|
||||
mailcow-create-alerts: ## Alias for make mailcow-mailbox MAILBOX=alerts
|
||||
@$(MAKE) --no-print-directory mailcow-mailbox MAILBOX=alerts
|
||||
|
||||
vault-pull-infra-secrets: ## Pull Umami/Mattermost from hosts → .env → vault (not vault→.env)
|
||||
@chmod +x scripts/vault-pull-infra-secrets.sh scripts/vault-import-env.sh
|
||||
@./scripts/vault-pull-infra-secrets.sh
|
||||
|
||||
vault-export-env: ## Write vault secrets into .env (keeps existing non-empty keys)
|
||||
@chmod +x scripts/vault-export-env.sh
|
||||
@./scripts/vault-export-env.sh "$(or $(ENV_FILE),.env)"
|
||||
|
||||
kuma-add-monitors: ## Add default Uptime Kuma monitors (needs KUMA_PASSWORD in .env)
|
||||
@chmod +x scripts/kuma-add-monitors.sh
|
||||
@./scripts/kuma-add-monitors.sh
|
||||
|
||||
vault-import-env: ## Merge .env secrets into Ansible vault (usage: make vault-import-env [ENV_FILE=.env])
|
||||
@chmod +x scripts/vault-import-env.sh
|
||||
@ENV_FILE="$(or $(ENV_FILE),.env)" scripts/vault-import-env.sh "$(or $(ENV_FILE),.env)"
|
||||
|
||||
bootstrap-root-ssh-failed: ## Bootstrap root SSH on hosts that failed direct root copy-ssh-keys
|
||||
@chmod +x scripts/bootstrap-root-ssh.sh
|
||||
@failed=0; ok=0; \
|
||||
for host in caddy listmonk vikunja n8n qBittorrent actual caseware auto mailcow; do \
|
||||
echo ""; echo "$(BLUE)==> $$host$(RESET)"; \
|
||||
if BOOTSTRAP_USER="$(BOOTSTRAP_USER)" scripts/bootstrap-root-ssh.sh "$$host"; then \
|
||||
ok=$$((ok+1)); \
|
||||
else \
|
||||
failed=$$((failed+1)); \
|
||||
fi; \
|
||||
done; \
|
||||
echo ""; echo "$(GREEN)Done: $$ok succeeded$(RESET), $(RED)$$failed failed$(RESET)"; \
|
||||
[ $$failed -eq 0 ]
|
||||
|
||||
create-vault: require-ansible ## Create encrypted vault file for secrets (passwords, auth keys, etc.)
|
||||
@echo "$(YELLOW)Creating vault file for storing secrets...$(RESET)"
|
||||
ansible-vault create group_vars/all/vault.yml
|
||||
$(ANSIBLE_VAULT) create $(INVENTORY)/group_vars/all/vault.yml
|
||||
@echo "$(GREEN)✓ Vault file created. Add your secrets here (e.g. vault_tailscale_auth_key)$(RESET)"
|
||||
|
||||
create-vm: ## Create Ansible controller VM on Proxmox
|
||||
|
||||
@ -2,7 +2,8 @@
|
||||
inventory = inventories/production
|
||||
roles_path = roles
|
||||
host_key_checking = False
|
||||
stdout_callback = yaml
|
||||
stdout_callback = default
|
||||
callback_result_format = yaml
|
||||
bin_ansible_callbacks = True
|
||||
retry_files_enabled = False
|
||||
gathering = smart
|
||||
|
||||
@ -4,6 +4,7 @@
|
||||
HOSTS_FILE="inventories/production/hosts"
|
||||
TIMEOUT=3
|
||||
CHANGED=false
|
||||
UNAME_S="$(uname -s)"
|
||||
|
||||
# Colors
|
||||
GREEN='\033[0;32m'
|
||||
@ -18,10 +19,12 @@ echo "=================================================================="
|
||||
# Function to test IP connectivity
|
||||
test_ip() {
|
||||
local ip="$1"
|
||||
if ping -c 1 -W "$TIMEOUT" "$ip" >/dev/null 2>&1; then
|
||||
return 0
|
||||
if [[ "$UNAME_S" == "Darwin" ]]; then
|
||||
# macOS: -W is wait time in milliseconds
|
||||
ping -c 1 -W $((TIMEOUT * 1000)) "$ip" >/dev/null 2>&1
|
||||
else
|
||||
return 1
|
||||
# Linux: -W is timeout in seconds
|
||||
ping -c 1 -W "$TIMEOUT" "$ip" >/dev/null 2>&1
|
||||
fi
|
||||
}
|
||||
|
||||
@ -31,7 +34,7 @@ test_ssh() {
|
||||
local ip="$2"
|
||||
local user="$3"
|
||||
|
||||
if timeout 5 ssh -o ConnectTimeout=3 -o BatchMode=yes "$user@$ip" exit >/dev/null 2>&1; then
|
||||
if ssh -o ConnectTimeout=3 -o BatchMode=yes "$user@$ip" exit >/dev/null 2>&1; then
|
||||
return 0
|
||||
else
|
||||
return 1
|
||||
@ -46,11 +49,14 @@ switch_to_fallback() {
|
||||
|
||||
echo -e " ${YELLOW}→ Switching $hostname to fallback IP: $fallback_ip${NC}"
|
||||
|
||||
# Use sed to replace the primary IP with fallback IP
|
||||
sed -i "s/$hostname ansible_host=$primary_ip/$hostname ansible_host=$fallback_ip/" "$HOSTS_FILE"
|
||||
|
||||
# Remove the fallback attribute since we're now using it as primary
|
||||
sed -i "s/ ansible_host_fallback=$fallback_ip//" "$HOSTS_FILE"
|
||||
# Use sed to replace the primary IP with fallback IP (BSD/GNU compatible)
|
||||
if [[ "$UNAME_S" == "Darwin" ]]; then
|
||||
sed -i '' "s/$hostname ansible_host=$primary_ip/$hostname ansible_host=$fallback_ip/" "$HOSTS_FILE"
|
||||
sed -i '' "s/ ansible_host_fallback=$fallback_ip//" "$HOSTS_FILE"
|
||||
else
|
||||
sed -i "s/$hostname ansible_host=$primary_ip/$hostname ansible_host=$fallback_ip/" "$HOSTS_FILE"
|
||||
sed -i "s/ ansible_host_fallback=$fallback_ip//" "$HOSTS_FILE"
|
||||
fi
|
||||
|
||||
CHANGED=true
|
||||
}
|
||||
@ -66,9 +72,10 @@ while IFS= read -r line; do
|
||||
# Parse host entry
|
||||
if [[ "$line" =~ ansible_host= ]]; then
|
||||
hostname=$(echo "$line" | awk '{print $1}')
|
||||
primary_ip=$(echo "$line" | grep -oP 'ansible_host=\K[^\s]+')
|
||||
fallback_ip=$(echo "$line" | grep -oP 'ansible_host_fallback=\K[^\s]+' || echo "")
|
||||
user=$(echo "$line" | grep -oP 'ansible_user=\K[^\s]+' || echo "root")
|
||||
primary_ip=$(echo "$line" | sed -n 's/.*ansible_host=\([^[:space:]]*\).*/\1/p')
|
||||
fallback_ip=$(echo "$line" | sed -n 's/.*ansible_host_fallback=\([^[:space:]]*\).*/\1/p')
|
||||
user=$(echo "$line" | sed -n 's/.*ansible_user=\([^[:space:]]*\).*/\1/p')
|
||||
[[ -z "$user" ]] && user="root"
|
||||
|
||||
echo -n "Testing $hostname ($primary_ip)... "
|
||||
|
||||
|
||||
60
docs/guides/ansible-vault-secrets.md
Normal file
60
docs/guides/ansible-vault-secrets.md
Normal file
@ -0,0 +1,60 @@
|
||||
# Encrypted secrets in this project
|
||||
|
||||
Ansible Vault is the standard way to store and share secrets with this repo. Plain `.env` files are gitignored and meant only as a **temporary** import path on your machine.
|
||||
|
||||
## Recommended workflow
|
||||
|
||||
1. **Never commit** `.env`, API keys, or passwords.
|
||||
2. Store secrets in `inventories/production/group_vars/all/vault.yml` (encrypted).
|
||||
3. Edit with `make edit-group-vault` (uses `~/.ansible-vault-pass` on your workstation).
|
||||
4. Teammates need the same vault password file out-of-band (password manager, not git).
|
||||
|
||||
## One-time import from `.env`
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
# fill MAILCOW_API_KEY, ALERTS_PASSWORD, etc.
|
||||
make vault-import-env
|
||||
rm .env # optional after import
|
||||
```
|
||||
|
||||
`make vault-import-env` merges supported keys into the vault and re-encrypts the file.
|
||||
|
||||
## Mailcow mailboxes (dynamic)
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `group_vars/all/mailcow.yml` | Mailbox names, local parts, quotas (no secrets) |
|
||||
| `vault.yml` | `vault_mailcow_api_key`, `vault_mailcow_mailbox_passwords` |
|
||||
|
||||
```bash
|
||||
make mailcow-mailbox MAILBOX=alerts
|
||||
```
|
||||
|
||||
Add a new mailbox:
|
||||
|
||||
1. In `mailcow.yml` under `mailcow_mailboxes:` add e.g. `notify: { local_part: notify, name: Notify, quota: 512, vault_password_key: notify }`
|
||||
2. In vault: `vault_mailcow_mailbox_passwords.notify: "..."` (via `make edit-group-vault`)
|
||||
3. `make mailcow-mailbox MAILBOX=notify`
|
||||
|
||||
## Can `.env` itself be encrypted?
|
||||
|
||||
Yes, but Ansible projects usually skip that pattern:
|
||||
|
||||
| Approach | Use when |
|
||||
|----------|----------|
|
||||
| **Ansible Vault** (`vault.yml`) | Default for this repo — works with playbooks and `make` targets |
|
||||
| **`ansible-vault encrypt .env`** | Produces `.env` vault blob; you must `ansible-vault view .env` or decrypt to a temp file before tools read it — awkward for shell scripts |
|
||||
| **Password manager / 1Password CLI** | Personal machine only, not for CI/ansible runs |
|
||||
| **SOPS / Mozilla SOPS** | Teams that want encrypted YAML/JSON in git with KMS/PGP — heavier setup |
|
||||
|
||||
**Sharing encrypted secrets with others:** share the **vault password** (or per-host vault pass) securely once; they clone the repo and use the same encrypted `vault.yml`. Do not email `.env` files.
|
||||
|
||||
## Encrypting a single value (without opening the whole file)
|
||||
|
||||
```bash
|
||||
ansible-vault encrypt_string 'secret-value' --name 'vault_my_secret' \
|
||||
--vault-password-file ~/.ansible-vault-pass
|
||||
```
|
||||
|
||||
Paste the output into `vault.yml` inside the encrypted file, or into a vars file that is entirely vault-encrypted.
|
||||
56
docs/guides/cal-authentik-oidc.md
Normal file
56
docs/guides/cal-authentik-oidc.md
Normal file
@ -0,0 +1,56 @@
|
||||
# Cal.com → Authentik OIDC
|
||||
|
||||
**Status: deferred** — Cal.com self-hosted SSO is a **commercial (enterprise) feature**. Without a valid `CALCOM_LICENSE_KEY`, the UI at `/settings/security/sso` stays locked (*Contact sales*).
|
||||
|
||||
See **[sso-selfhosted-matrix.md](sso-selfhosted-matrix.md)** for Phase 4 apps that do not need a Cal-style license.
|
||||
|
||||
## Current state (2026-05-23)
|
||||
|
||||
| Item | Status |
|
||||
|------|--------|
|
||||
| `calsaml` Postgres DB | ✅ Created |
|
||||
| `SAML_DATABASE_URL`, `SAML_ADMINS` in `/opt/cal/.env` | ✅ Set |
|
||||
| `docker-compose` passes license + SAML env | ✅ |
|
||||
| Authentik app `cal-com` + provider `cal-com-oidc` | ✅ (ready when license exists) |
|
||||
| `CALCOM_LICENSE_KEY` in `.env` | ❌ **Empty** — SSO UI blocked |
|
||||
| Cal UI OIDC configuration | ⏳ **Blocked** until license |
|
||||
|
||||
## When you have a license
|
||||
|
||||
1. Add to `/opt/cal/.env`:
|
||||
```bash
|
||||
CALCOM_LICENSE_KEY=<key-from-cal.com>
|
||||
NEXT_PUBLIC_LICENSE_CONSENT=agree
|
||||
```
|
||||
2. Restart: `ssh cal` → `cd /opt/cal && docker compose up -d`
|
||||
3. Confirm in container: `docker exec calcom printenv CALCOM_LICENSE_KEY` (non-empty)
|
||||
4. Log in as **`idobkin@gmail.com`** → **https://cal.levkin.ca/settings/security/sso**
|
||||
5. Configure OIDC:
|
||||
|
||||
| Field | Value |
|
||||
|-------|--------|
|
||||
| Client ID | `cal-com` |
|
||||
| Client Secret | from Authentik → Applications → Cal.com |
|
||||
| Well Known URL | `https://auth.levkin.ca/application/o/cal-com/.well-known/openid-configuration` |
|
||||
|
||||
Test SSO; keep local Cal password as break-glass.
|
||||
|
||||
## Ansible (infra only)
|
||||
|
||||
```bash
|
||||
make cal-oidc # SAML DB + Authentik provider (safe to re-run)
|
||||
make cal-oidc-check
|
||||
```
|
||||
|
||||
Vault (optional): `vault_cal_oidc_client_secret` — see `vault.example.yml`.
|
||||
|
||||
## Redirect URI (Authentik)
|
||||
|
||||
```text
|
||||
https://cal.levkin.ca/api/auth/oidc
|
||||
```
|
||||
|
||||
## Related
|
||||
|
||||
- [sso-selfhosted-matrix.md](sso-selfhosted-matrix.md)
|
||||
- [levkin-selfhost-plan-2.md](levkin-selfhost-plan-2.md)
|
||||
70
docs/guides/homelab-status-2026-05-22.md
Normal file
70
docs/guides/homelab-status-2026-05-22.md
Normal file
@ -0,0 +1,70 @@
|
||||
# Homelab status — 2026-05-23
|
||||
|
||||
Quick checklist. **Master plan:** [levkin-selfhost-plan-2.md](levkin-selfhost-plan-2.md) · **Cursor plan:** `~/.cursor/plans/levkin_selfhost_rollout_e75909ae.plan.md`
|
||||
|
||||
## Done (automation)
|
||||
|
||||
| Item | Notes |
|
||||
|------|--------|
|
||||
| Mailcow `alerts@levkine.ca` | Created via API |
|
||||
| Kuma + Dockge + Umami | LXC 218 @ `10.0.10.22`; Dockge stack **monitoring** active |
|
||||
| Old Kuma pve201 LXC 305 | Stopped, `onboot` off |
|
||||
| `stats.levkin.ca` | Caddy → Umami `:3000` |
|
||||
| Tracking scripts | levkin.ca + caseware + auto + portfolio (`iliadobkin.com`) |
|
||||
| **levkin.ca** | LXC **220** @ `10.0.10.60`; Caddy → nginx; `/` = spec, `/folders/` = stack |
|
||||
| Portfolio `iliadobkin.com` | Migrated pve201 LXC **306** → pve10 LXC **219** @ `10.0.10.106`; Caddy → nginx `:80` |
|
||||
| Kuma SMTP | Working (user confirmed) |
|
||||
| Git remote | `git@git.levkin.ca:ilia/...` (SSH → `10.0.10.169` via `~/.ssh/config` on site LXCs) |
|
||||
| auto repo | Pushed/pulled on `git.levkin.ca` |
|
||||
| caseware repo | Pushed to Gitea via bundle on server; LXCs pull via internal SSH |
|
||||
| Vault | Mailcow, Umami, Mattermost in vault; `make vault-export-env` → `.env`; `make vault-pull-infra-secrets` = hosts → vault |
|
||||
| Caddy root SSH | Works (`make bootstrap-root-ssh-caddy`) |
|
||||
| Hermes Mattermost | `mattermost.env` on VM; Telegram optional/off |
|
||||
|
||||
## Your list — still to do
|
||||
|
||||
### You (UI / hardware / DNS)
|
||||
|
||||
- [x] **Kuma SMTP** — working
|
||||
- [ ] **DNS `levkin.ca` + `www`** — A records → home IP (`142.180.237.136`); apex currently parked at AWS, not homelab
|
||||
- [ ] **Gitea deploy key (levkin LXC 220)** — add `deploy-levkin-levkin.ca` pubkey in repo settings (SSH pull); HTTPS clone works meanwhile
|
||||
- [ ] **UniFi DHCP reservations** — [unifi-static-dhcp.md](unifi-static-dhcp.md) @ https://192.168.2.1/
|
||||
- [ ] **Cal.com → Authentik OIDC** — **deferred** (no license key) — [cal-authentik-oidc.md](cal-authentik-oidc.md); Phase 4 → Vikunja — [sso-selfhosted-matrix.md](sso-selfhosted-matrix.md)
|
||||
- [x] **Portainer VM 109** — stopped and destroyed on pve10 (2026-05-23)
|
||||
- [x] **Listmonk** — service was stopped; `listmonk.service` enabled on VM 113 (2026-05-23)
|
||||
- [x] **Mailcow** — LAN TCP timeout fixed (netfilter `MAILCOW` drop rule) — [mailcow-lan-proxy-fix.md](mailcow-lan-proxy-fix.md)
|
||||
- [ ] **DebianDesktop VM 100** — RAM lowered to 24 GB in Proxmox; **reboot guest** to apply balloon
|
||||
- [ ] **Nextcloud VM 201 retire** — remove Kuma monitor, Caddy `nextcloud.levkin.ca`, stop VM
|
||||
- [ ] **NAS.SP00 disk replace** — then start Jellyfin (VM 101)
|
||||
- [x] **Gitea deploy key (portfolio)** — `git pull` works on LXC 219; Gitea VM SSH fixed (`/home/git/.ssh/authorized_keys` + `sudo` to `gitea`)
|
||||
- [ ] **`.env`** — optional mirror: `make vault-export-env` (vault already has secrets)
|
||||
- [ ] **Rotate** any secrets pasted in chat (Hermes token, etc.)
|
||||
|
||||
### Later / defer
|
||||
|
||||
- [ ] Caddy → edge LXC `.20`
|
||||
- [ ] Immich, Crater, Beszel
|
||||
- [ ] Public SSH for `git.levkin.ca:22` (optional Caddy `layer4` or DNS split)
|
||||
|
||||
## Site LXCs (marketing)
|
||||
|
||||
| VMID | Name | IP | Git remote |
|
||||
|------|------|-----|------------|
|
||||
| 220 | levkin | 10.0.10.60 | `git@git.levkin.ca:ilia/levkin.ca.git` |
|
||||
| 215 | caseware | 10.0.10.105 | `git@git.levkin.ca:ilia/caseware.git` |
|
||||
| 216 | auto | 10.0.10.59 | `git@git.levkin.ca:ilia/auto.git` |
|
||||
| 219 | portfolio | 10.0.10.106 | `git@git.levkin.ca:ilia/sdetProfile.git` |
|
||||
|
||||
**Git SSH note:** `git.levkin.ca` in the URL; traffic goes to **10.0.10.169:22** (not `10.0.30.169`, not public `:22`).
|
||||
|
||||
```ssh
|
||||
# On each site LXC /root/.ssh/config
|
||||
Host git.levkin.ca
|
||||
HostName 10.0.10.169
|
||||
User git
|
||||
IdentityFile ~/.ssh/id_ed25519
|
||||
```
|
||||
|
||||
## Dockge
|
||||
|
||||
Stack **monitoring** in UI = correct. Compose at `/opt/stacks/monitoring/compose.yaml`. Live stack also at `/opt/monitoring` (same containers). Use Dockge for edits/restarts; avoid starting a second copy.
|
||||
142
docs/guides/host-list.md
Normal file
142
docs/guides/host-list.md
Normal file
@ -0,0 +1,142 @@
|
||||
# Host list — Proxmox guests (source of truth)
|
||||
|
||||
**Node:** PVENAS (`pve10` @ `10.0.10.10`)
|
||||
**Audited:** 2026-05-22 (Phase 0 IP pass + monitoring LXC 218 provisioned)
|
||||
**LAN:** `10.0.10.0/24`, gateway `10.0.10.1`
|
||||
|
||||
Update this file whenever a guest is created, migrated, or re-IP’d. See [levkin-selfhost-plan-2.md](levkin-selfhost-plan-2.md) for IP range policy.
|
||||
|
||||
---
|
||||
|
||||
## IP range plan (10.0.10.0/24)
|
||||
|
||||
| Range | Reserved for |
|
||||
|-------|----------------|
|
||||
| `.1–.9` | Network gear |
|
||||
| `.10–.19` | Proxmox host(s) + PBS |
|
||||
| `.20–.39` | Edge / identity / comms |
|
||||
| `.40–.79` | Application LXCs / VMs |
|
||||
| `.80–.99` | Media VMs |
|
||||
| `.100–.199` | DHCP pool (clients) |
|
||||
| `.200–.249` | Labs / heavy VMs |
|
||||
| `.250–.254` | Reserved |
|
||||
|
||||
**Rollout reservations (free):** `.20` edge LXC
|
||||
|
||||
---
|
||||
|
||||
## Proxmox host
|
||||
|
||||
| VMID | Name | Role | Current IP | Target static IP | DHCP/Static | Notes |
|
||||
|------|------|------|------------|------------------|-------------|-------|
|
||||
| — | **pve10** | Proxmox (PVENAS) | `10.0.10.10/24` | `.10` | Static | This node |
|
||||
|
||||
---
|
||||
|
||||
## LXCs (pve10)
|
||||
|
||||
| VMID | Name | Plan group | Current IP | Target static IP | DHCP/Static | MAC | Notes |
|
||||
|------|------|------------|------------|------------------|-------------|-----|-------|
|
||||
| 210 | cal | business | `10.0.10.228/24` | `10.0.10.228/24` | ✅ **Static** | `BC:24:11:DD:F8:7C` | Cal.com — `pct set` applied; in Ansible `hosts` |
|
||||
| 215 | caseware | **marketing site** | `10.0.10.105/24` | `10.0.10.105/24` | ✅ **Static** | `BC:24:11:72:04:53` | Static HTML `/var/www/caseware` → `caseware.levkin.ca` |
|
||||
| 216 | auto | **marketing site** | `10.0.10.59/24` | `10.0.10.59/24` | ✅ **Static** | `BC:24:11:43:F0:86` | Static HTML `/var/www/auto` → `auto.levkin.ca` |
|
||||
| 219 | portfolio | **marketing site** | `10.0.10.106/24` | `10.0.10.106/24` | ✅ **Static** | `BC:24:11:DF:94:32` | Static HTML `/var/www/portfolio` → `iliadobkin.com` (migrated from pve201 LXC 306) |
|
||||
| 220 | levkin | **marketing site** | `10.0.10.60/24` | `10.0.10.60/24` | ✅ **Static** | `BC:24:11:C6:B2:E4` | Vite `www/` → `levkin.ca` (spec), `levkin.ca/folders` (stack) — [site-lxc-git.md](site-lxc-git.md) |
|
||||
| 217 | identity | identity | `10.0.10.21/24` | `10.0.10.21/24` | ✅ **Static** | `BC:24:11:3C:85:45` | Authentik + Postgres + Redis; `auth.levkin.ca` via Caddy |
|
||||
| 218 | monitoring | monitoring | `10.0.10.22/24` | `10.0.10.22/24` | ✅ **Static** | `BC:24:11:54:43:13` | Uptime Kuma `:3001`, Dockge `:5001`, Umami `:3000` — see [monitoring-stack.md](monitoring-stack.md) |
|
||||
|
||||
**pve201 (not pve10):** LXC **305** `kuma-debian` @ `10.0.10.197` — **stopped 2026-05-22** (replaced by monitoring LXC 218). `onboot` disabled. LXC **306** `portfolio` — **destroyed/purged 2026-05-22** (now pve10 LXC **219** @ `10.0.10.106`).
|
||||
|
||||
---
|
||||
|
||||
## VMs (pve10)
|
||||
|
||||
| VMID | Name | Plan group | Current IP | Target static IP | DHCP/Static | MAC | Notes |
|
||||
|------|------|------------|------------|------------------|-------------|-----|-------|
|
||||
| 100 | homepage-debian | — | — | — | — | — | **Stopped** |
|
||||
| 101 | Jellyfin | media | `10.0.10.232` | `10.0.10.232/24` | ⏳ DHCP? | `BC:24:11:29:B8:84` | **Stopped** (turned off 2026-05-22); inventory `jellyfin` |
|
||||
| 102 | gitea-alpine | — | `10.0.10.169/24` | `10.0.10.169/24` | ⏳ stable DHCP | `BC:24:11:E9:BD:E5` | Pin in-guest or router reservation |
|
||||
| 103 | WRA | — | `10.0.10.154/24` | `10.0.10.154/24` | ⏳ stable DHCP | `BC:24:11:61:DE:7A` | Inventory `n8n`; pin when automating |
|
||||
| 104 | vaultwarden-debian | identity | `10.0.10.142/24` | `10.0.10.142/24` | ⏳ stable DHCP | `BC:24:11:58:DB:DC` | Inventory `vaultwardenVM` |
|
||||
| 105 | TrueNAS | — | `10.0.10.107/24` | `10.0.10.107/24` | ⏳ stable DHCP | `BC:24:11:14:DE:B5` | NAS UI; pool `NAS.SP00` degraded |
|
||||
| 106 | caddy-debian | **edge** | `10.0.10.50/24` | `10.0.10.50/24` → **`.20`** (Phase 1.5) | ✅ **Static** (in-guest) | `BC:24:11:E0:49:B4` | `/etc/network/interfaces` static; Ansible `caddy` |
|
||||
| 107 | mattermost-ubuntu | comms | `10.0.10.107`? | TBD | ⏳ | `BC:24:11:66:6E:01` | Ping `.107` up; confirm not TrueNAS conflict — verify in guest |
|
||||
| 108 | actual-debian | business | `10.0.10.158/24` | `10.0.10.158/24` | ⏳ stable DHCP | `BC:24:11:10:7B:64` | Inventory `actual` |
|
||||
| 109 | portainer-alpine | — | — | — | ✅ **Removed** | `BC:24:11:0F:40:4F` | Destroyed 2026-05-23; Dockge on monitoring LXC 218 |
|
||||
| 150 | pihole00-debian | — | link-local* | TBD | ⏳ | `BC:24:11:86:76:97` | Running |
|
||||
| 117 | hermes | services | `10.0.10.36/24` | `10.0.10.36/24` | ⏳ stable DHCP | `BC:24:11:51:1E:99` | On pve10; guest agent; inventory `hermes` |
|
||||
| 200 | PVE.BU.SVR | labs | `10.0.10.200/24` | `10.0.10.200/24` | ⏳ stable DHCP | `BC:24:11:DA:95:3B` | Running |
|
||||
| 201 | NextcloudAIO-debian | (decommission) | `10.0.10.24/24` | — | 🗑️ **Retiring** | `BC:24:11:14:D4:DE` | Export done; remove Caddy + Kuma monitor, then stop VM |
|
||||
| 300 | pihole-debian | — | — | — | — | — | **Stopped** |
|
||||
|
||||
\* ARP showed IPv6 link-local only at audit time — confirm IPv4 inside guest or install QEMU guest agent.
|
||||
|
||||
---
|
||||
|
||||
## Inventory cross-reference (Ansible `hosts`)
|
||||
|
||||
| Inventory name | IP in hosts | pve10 guest | Match |
|
||||
|----------------|-------------|-------------|-------|
|
||||
| caddy | `10.0.10.50` | VM 106 | ✅ |
|
||||
| cal | `10.0.10.228` | LXC 210 | ✅ |
|
||||
| caseware | `10.0.10.105` | LXC 215 | ✅ |
|
||||
| auto | `10.0.10.59` | LXC 216 | ✅ |
|
||||
| portfolio | `10.0.10.106` | LXC 219 | ✅ |
|
||||
| levkin | `10.0.10.60` | LXC 220 | ✅ |
|
||||
| identity | `10.0.10.21` | LXC 217 | ✅ |
|
||||
| monitoring | `10.0.10.22` | LXC 218 | ✅ |
|
||||
| vaultwardenVM | `10.0.10.142` | VM 104 | ✅ |
|
||||
| giteaVM | `10.0.10.169` | VM 102 | ✅ |
|
||||
| n8n | `10.0.10.154` | VM 103? | ⚠️ verify (WRA vs n8n) |
|
||||
| listmonk | `10.0.10.148` | — | On **pve201** (`[comms]`) |
|
||||
| mailcow | `10.0.10.132` | pve201 VM 106 | ✅ `[comms]` |
|
||||
| hermes | `10.0.10.36` | VM 117 | ✅ on pve10 |
|
||||
| jellyfin | `10.0.10.232` | VM 101 | ✅ (stopped until NAS healthy) |
|
||||
| nextcloud | `10.0.10.24` | VM 201 | commented out (retiring) |
|
||||
| portainerVM | — | VM 109 | removed (Dockge on monitoring) |
|
||||
|
||||
---
|
||||
|
||||
## Static IP conversion queue (pve10)
|
||||
|
||||
Priority order (plan-2):
|
||||
|
||||
1. ✅ **LXC 210** — done (`10.0.10.228/24`)
|
||||
2. ✅ **LXC 215, 216** — pinned (`.105`, `.59`)
|
||||
3. ✅ **LXC 217** (identity) — `10.0.10.21/24`, Authentik deployed
|
||||
4. ✅ **VM 106** (caddy) — static in-guest `.50`
|
||||
5. ✅ **LXC 218** (monitoring) — `.22`, Kuma/Dockge/Umami
|
||||
6. **VMs** — use [vm-static-ip-router-reservations.md](vm-static-ip-router-reservations.md) (router MAC reservations); skip **201** (Nextcloud retire)
|
||||
7. **New:** edge LXC @ **`.20`** (Phase 1.5)
|
||||
|
||||
Example:
|
||||
|
||||
```bash
|
||||
# On pve10 (PVENAS)
|
||||
pct set 215 -net0 name=eth0,bridge=vmbr0,ip=10.0.10.105/24,gw=10.0.10.1
|
||||
pct set 216 -net0 name=eth0,bridge=vmbr0,ip=10.0.10.59/24,gw=10.0.10.1
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## NAS / storage note
|
||||
|
||||
- ZFS pool **`NAS.SP00`** on this node: **DEGRADED** (disk `W4J0L3PY` failed). See [nas-sp00-drive-failure-report.md](nas-sp00-drive-failure-report.md), [nas-sp00-smart-audit-2026-05-21.md](nas-sp00-smart-audit-2026-05-21.md).
|
||||
- VM **201** root disk on NAS — avoid heavy I/O until pool is healthy.
|
||||
|
||||
---
|
||||
|
||||
## Audit checklist
|
||||
|
||||
- [x] `pct list` / `qm list` on pve10
|
||||
- [x] ARP / ping for running guests
|
||||
- [ ] `pct exec` / guest agent for VMs missing IPv4
|
||||
- [x] Initial `host-list.md` created
|
||||
- [x] Pin 215/216 static
|
||||
- [x] Identity LXC 217 @ `.21` (Authentik Phase 1 infra)
|
||||
- [x] Monitoring LXC 218 @ `.22`
|
||||
- [x] Caddy VM 106 static `.50`
|
||||
- [x] LXC backups `backup-20260522` on 217, 218
|
||||
- [ ] Router DHCP reservations for VMs — [vm-static-ip-router-reservations.md](vm-static-ip-router-reservations.md) (manual in router UI; table ready)
|
||||
- [ ] Retire VM 201 (Nextcloud)
|
||||
- [ ] Re-run after NAS disk replace
|
||||
425
docs/guides/levkin-selfhost-plan-2.md
Normal file
425
docs/guides/levkin-selfhost-plan-2.md
Normal file
@ -0,0 +1,425 @@
|
||||
# Levkin self-hosted stack — plan & decisions
|
||||
|
||||
Reference doc for the Proxmox homelab. Lives alongside the Cursor project that has the Proxmox info.
|
||||
|
||||
**Conventions:**
|
||||
- All groups run inside an LXC unless marked **VM**.
|
||||
- Inside each LXC: one `docker-compose.yml`, managed by **Dockge** where applicable.
|
||||
- Caddy on the `edge` LXC is the only thing exposed to the internet.
|
||||
- Authentik on the `identity` LXC is the source of truth for who you are.
|
||||
- Vaultwarden stays standalone (it's the break-glass path if Authentik dies).
|
||||
|
||||
---
|
||||
|
||||
## Progress summary (updated 2026-05-23)
|
||||
|
||||
| Area | Status |
|
||||
|------|--------|
|
||||
| **Phase 0** Foundation | ✅ Mostly done — pve10 LXCs static; site LXCs 215/216/219/220 static; Caddy still on **VM 106** @ `.50` |
|
||||
| **Phase 1** Identity (Authentik) | ✅ LXC **217** @ `10.0.10.21` — admin + TOTP |
|
||||
| **Phase 2** Monitoring | ✅ LXC **218** @ `10.0.10.22` — Kuma, Dockge, Umami, Kuma SMTP |
|
||||
| **Phase 3** Cal.com | ✅ LXC **210** — booking + auto consult button; **OIDC deferred** (no enterprise license) |
|
||||
| **Phase 4** SSO | ⏳ **Next:** Vikunja → Authentik — [sso-selfhosted-matrix.md](sso-selfhosted-matrix.md) |
|
||||
| **Phase 5–8** | ⏳ Immich, Crater, Outline, automation depth — after P0 backlog |
|
||||
| **Comms health** | ✅ Mailcow + Listmonk restored 2026-05-23 — [mailcow-lan-proxy-fix.md](mailcow-lan-proxy-fix.md) |
|
||||
| **Site consolidation** | ⏳ **Partial** — git LXCs + levkin.ca LXC 220; optional later: static on Caddy VM |
|
||||
| **dev-apps** | ⏳ punimTag **9101** on pve201 until testing done |
|
||||
| **Nextcloud retire** | ⏳ VM **201** still running — **#1 RAM win on pve10** (~8 GiB) |
|
||||
| **Portainer retire** | ✅ VM **109** destroyed 2026-05-23 (~16 GiB on pve10) |
|
||||
| **Security pass** | 🟡 Partial — SSH keys + apt + cron 2026-05-23 — [security-remediation-plan.md](security-remediation-plan.md) |
|
||||
|
||||
---
|
||||
|
||||
## Capacity headroom (live check 2026-05-23)
|
||||
|
||||
Use this before adding LXCs/VMs. Re-check with `pvesm status` and `free -h` on each node.
|
||||
|
||||
### pve10 (PVENAS) — **primary place for new homelab services**
|
||||
|
||||
| Resource | Total | Used | **Available** | Notes |
|
||||
|----------|-------|------|---------------|--------|
|
||||
| **local-lvm** (thin) | ~1.67 TiB | ~22% | **~1.30 TiB** | Plenty of disk for new LXCs |
|
||||
| **RAM** (host) | 62 GiB | ~28 GiB | **~33 GiB** | Portainer **109** removed 2026-05-23 |
|
||||
|
||||
**Realistic new capacity on pve10 now:** ~**30+ GiB** headroom for Immich, Crater, Beszel, or **dev-apps** (6–8 GiB) after Nextcloud retires.
|
||||
|
||||
**Still available to free:**
|
||||
|
||||
| Stop / retire | Frees (maxmem) |
|
||||
|---------------|----------------|
|
||||
| ~~Portainer VM **109**~~ | ✅ **16 GiB** freed |
|
||||
| Nextcloud VM **201** | **8 GiB** ← do next |
|
||||
| Hermes VM **117** (if not needed) | **16 GiB** |
|
||||
| Site LXCs 215/216 → Caddy static (optional) | **~1 GiB** |
|
||||
|
||||
### pve201 (pve) — **do not add new services**
|
||||
|
||||
| Resource | Total | Used | **Available** | Notes |
|
||||
|----------|-------|------|---------------|--------|
|
||||
| **local-lvm** | ~1.67 TiB | ~46% | **~922 GiB** | Disk OK |
|
||||
| **RAM** | 125 GiB | ~114 GiB | **~10 GiB** | GPU VM **104** (64 GB), DebianDesktop **100** (24 GB set — **reboot guest**), punimTag **9101** (16 GB) |
|
||||
|
||||
**Verdict:** New stacks belong on **pve10**. pve201 only benefits from **stopping/migrating** guests (punim after testing, GPU resize, old Kuma already stopped).
|
||||
|
||||
---
|
||||
|
||||
## Current state (May 2026)
|
||||
|
||||
**Already running:**
|
||||
- Caddy reverse proxy — currently on a **VM** (should migrate to LXC, see "Caddy migration" section)
|
||||
- Mailcow — VM, mail domain is `levkine.ca` (with e)
|
||||
- Vaultwarden, Vikunja, n8n, Listmonk, Mattermost, Nextcloud — across various LXCs
|
||||
- **Cal.com** — LXC id `210`, `cal.levkin.ca`, Postgres included, admin user `ilia`, 15-min consult event live at `cal.levkin.ca/ilia/consult` with Jitsi link
|
||||
- Caddy entries live for: `levkin.ca`, `caseware.levkin.ca`, `auto.levkin.ca`, `iliadobkin.com`, `cal.levkin.ca`, `listmonk.levkin.ca`, `pdf.levkin.ca`, `search.levkin.ca`, `auth.levkin.ca`, `stats.levkin.ca`
|
||||
- **Authentik** — LXC **217** @ `10.0.10.21`, `https://auth.levkin.ca`, admin + TOTP enrolled
|
||||
- **Monitoring** — LXC **218** @ `10.0.10.22`: Uptime Kuma `:3001`, Dockge `:5001`, Umami `:3000` (LAN-only) — [monitoring-stack.md](monitoring-stack.md)
|
||||
- **Umami** + **Authentik** admin/TOTP/backup codes — done
|
||||
- **Uptime Kuma** — monitors live; email alerts via Mailcow — see [monitoring-stack.md](monitoring-stack.md)
|
||||
- **Dockge** on 218 — manages local `/opt/monitoring` stack
|
||||
- **Snapshots** `backup-20260522` on LXCs **217**, **218**
|
||||
- **Jellyfin** (VM 101) — stopped
|
||||
- LXC **210, 215–218, 219** — static via `pct set`; **Caddy VM 106** — static in-guest `.50`
|
||||
- **Nextcloud VM 201** — export done; VM **still running** on pve10 — **retire next** (8 GB RAM reclaimed)
|
||||
- ~~**Portainer VM 109**~~ — **removed** 2026-05-23 (~16 GiB RAM freed on pve10)
|
||||
- **Marketing sites** — LXC **220** (`levkin.ca`), **215/216/219** (git deploy), not yet on Caddy VM static roots
|
||||
- **punimTag dev** — pve201 LXC **9101** @ `10.0.10.121` (16 GB) — leave until testing done; then `dev-apps` on pve10
|
||||
|
||||
**Decisions locked in:**
|
||||
- Container manager: **Dockge** (not Portainer, not Coolify/Dokploy/CapRover)
|
||||
- Chat: **Mattermost only** — no Matrix/Synapse
|
||||
- Knowledge tool: **Outline** for client-facing, **SiYuan** if/when PhD work picks up (don't run Affine + Trilium too)
|
||||
- Bookmark manager: **Linkwarden** (full-page archive is the killer feature)
|
||||
- Authentik is the SSO target; Vaultwarden stays standalone
|
||||
|
||||
---
|
||||
|
||||
## LXC / VM grouping table
|
||||
|
||||
| Group | What's inside | Why grouped | LXC or VM |
|
||||
|---|---|---|---|
|
||||
| **edge** | Caddy reverse proxy, Crowdsec/Fail2ban | The front door — small, stable, restart rarely | LXC, 1 vCPU, 1GB RAM |
|
||||
| **identity** | Authentik (+ Postgres + Redis), Vaultwarden | Auth-critical — touch rarely, back up religiously | LXC, 2 vCPU, 2GB RAM |
|
||||
| **comms** | Mailcow | Mailcow's compose is huge (15+ containers) and self-contained — wants its own host | **VM**, 4GB RAM |
|
||||
| **automation** | n8n, Windmill (later), Huginn (later) | Active workloads, frequent updates, you'll touch these a lot | LXC, 2–4 vCPU, 4GB RAM |
|
||||
| **productivity** | Vikunja, Listmonk, Outline, Mealie, Linkwarden | Personal/team productivity, low-resource | LXC, 2 vCPU, 4GB RAM |
|
||||
| **media** | Immich, Nextcloud, Paperless-ngx | Large storage, GPU passthrough useful for Immich ML | **VM** if GPU passthrough, else LXC. Lots of disk. |
|
||||
| **business** | Cal.com ✅, Crater | Client-facing, financial — back up often | LXC, 2 vCPU, 2GB RAM |
|
||||
| **monitoring** | Uptime Kuma ✅, Dockge ✅, Umami ✅, Beszel (later) | Ops stack on LXC **218** | LXC, 2 vCPU, 2GB RAM |
|
||||
| **labs** | Anything experimental — Flowise, Trigger.dev | Things you're trying out, can be wiped | LXC, scratch space |
|
||||
|
||||
### Why this grouping (cheat sheet)
|
||||
|
||||
- One service goes bad → only its group restarts.
|
||||
- Need a kernel upgrade for one stack → snapshot the LXC, upgrade, roll back if broken.
|
||||
- Mailcow's huge surface area is isolated in its own VM.
|
||||
- Edge LXC is tiny and stable → perfect for the layer everything depends on.
|
||||
- Backup cadence per group (see Backups section).
|
||||
- Resource limits per LXC mean a runaway container can't eat n8n's RAM.
|
||||
|
||||
---
|
||||
|
||||
## Subdomains
|
||||
|
||||
Only expose what actually needs to be public. Internal services use Tailscale/Wireguard for remote access.
|
||||
|
||||
### Expose publicly
|
||||
|
||||
| Subdomain | Service | Group | Why public | Status |
|
||||
|---|---|---|---|---|
|
||||
| `levkin.ca` | Company site (spec + `/folders`) | edge | Main brand | ✅ LXC 220 — **DNS must point to home IP** (was parked elsewhere) |
|
||||
| `caseware.levkin.ca` | Static site | edge | Marketing | ✅ live |
|
||||
| `auto.levkin.ca` | Static site | edge | Marketing | ✅ live |
|
||||
| `iliadobkin.com` | Portfolio (SDET) | edge | Personal site | ✅ live (pve10 LXC 219) |
|
||||
| `cal.levkin.ca` | Cal.com | business | Clients book on it | ✅ live |
|
||||
| `listmonk.levkin.ca` | Listmonk | productivity | Unsubscribe URLs must resolve | ✅ live |
|
||||
| `mail.levkine.ca` | Mailcow | comms | Mail server | ✅ live |
|
||||
| `auth.levkin.ca` | Authentik | identity | OIDC redirect URLs need external resolution | ✅ live |
|
||||
| `bill.levkin.ca` | Crater | business | Clients view invoices | ⏳ Phase 6 |
|
||||
| `cloud.levkin.ca` | Nextcloud | media | **Retiring** — decommission VM 201 after cutover | 🗑️ |
|
||||
| `photos.levkin.ca` | Immich | media | Mobile apps need public hostname | ⏳ Phase 5 |
|
||||
| `vault.levkin.ca` | Vaultwarden | identity | Mobile clients need public hostname | ⏳ |
|
||||
| `notes.levkin.ca` | Outline | productivity | Sharing docs with clients | ⏳ |
|
||||
| `chat.levkin.ca` | Mattermost | comms | Only if inviting outside users | ⏳ optional |
|
||||
|
||||
### Keep internal only (no public DNS, no Caddy block)
|
||||
|
||||
Reachable only via local network or Tailscale/Wireguard:
|
||||
|
||||
| Service | Reason |
|
||||
|---|---|
|
||||
| Umami admin UI | Only you need the dashboard. Tracking endpoint can be public, dashboard isn't. |
|
||||
| Uptime Kuma | Status dashboard is for you. Don't advertise infrastructure. |
|
||||
| Beszel | Metrics are admin-only. |
|
||||
| Dockge | Admin UI — local only. |
|
||||
| n8n editor | UI shouldn't be exposed. Webhooks go on `hooks.levkin.ca` if needed. |
|
||||
| Huginn / Windmill / Flowise | Admin tools. |
|
||||
| Vikunja | Personal task manager. |
|
||||
| Mealie | Family recipes. |
|
||||
| Trigger.dev | Internal automation. |
|
||||
| Paperless-ngx | Personal documents. Never expose. |
|
||||
| SiYuan | Personal knowledge. |
|
||||
| Linkwarden | Personal bookmarks. |
|
||||
|
||||
### Borderline (decide per service)
|
||||
|
||||
| Subdomain | Service | Notes |
|
||||
|---|---|---|
|
||||
| `stats.levkin.ca` | Umami collector | Only the tracking script endpoint needs to be public; admin UI stays internal |
|
||||
| `status.levkin.ca` | Uptime Kuma | Kuma supports a separate public status page URL — that one can be public |
|
||||
|
||||
---
|
||||
|
||||
## Phased rollout
|
||||
|
||||
### Phase 0 — Foundation
|
||||
1. ✅ Caddy running (on VM — migrate to LXC in Phase 1.5)
|
||||
2. ✅ **Static IP audit (partial)** — all LXCs on pve10 pinned; Caddy VM static `.50`; remaining VMs on stable DHCP — see [host-list.md](host-list.md)
|
||||
3. ✅ DNS for `auth.levkin.ca` → home IP (verified 2026-05-22)
|
||||
4. ✅ `identity` LXC **217** @ `10.0.10.21` (2 vCPU, 2GB RAM, 20GB `local-lvm`, Debian 12 + Docker Compose)
|
||||
|
||||
### Phase 1 — Identity ✅
|
||||
1. ✅ Deploy Authentik in `identity` LXC (Authentik + Postgres + Redis, official compose at `/opt/authentik`)
|
||||
2. ✅ Caddy: `auth.levkin.ca` → `10.0.10.21:9000` (simple passthrough, no forward-auth)
|
||||
3. ✅ Admin user (`admin`), TOTP enrolled
|
||||
4. ✅ `authentik Admins` group (skip custom `users` group until more accounts)
|
||||
5. ✅ Static backup codes; **don't OIDC other apps until Cal.com test**
|
||||
|
||||
### Phase 1.5 — Caddy migration to LXC (~30 min)
|
||||
|
||||
Why now (after Phase 1, before bulk SSO work in Phase 4): Authentik is stable enough to absorb a small change, but you haven't yet built the dependency web of OIDC integrations that would make a Caddy reload risky.
|
||||
|
||||
Why Caddy belongs in an LXC, not a VM:
|
||||
- ~50MB OS overhead vs ~512MB for a VM
|
||||
- Boot/restart in 2-5s vs 20-40s (matters when reloading config)
|
||||
- Snapshot/backup is faster
|
||||
- Caddy is a Go binary doing reverse-proxy work — no need for kernel isolation
|
||||
- Near-native network performance
|
||||
|
||||
Steps:
|
||||
1. Create `edge` LXC: Debian 12, 1 vCPU, 512MB RAM, 8GB disk, **static IP from host list**
|
||||
2. Install Caddy via official Debian repo:
|
||||
```bash
|
||||
apt install -y debian-keyring debian-archive-keyring apt-transport-https
|
||||
curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/gpg.key' | gpg --dearmor -o /usr/share/keyrings/caddy-stable-archive-keyring.gpg
|
||||
curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/debian.deb.txt' | tee /etc/apt/sources.list.d/caddy-stable.list
|
||||
apt update && apt install caddy
|
||||
```
|
||||
3. Copy `Caddyfile` + custom snippets (`(security-headers)` etc.) from the VM
|
||||
4. Add a **test subdomain** (e.g. `test.levkin.ca`) pointing at the new LXC — verify TLS issues and routing works
|
||||
5. Cut over: update router port-forward (80/443) to the new LXC IP. DNS A records don't need to change if they point to your home IP.
|
||||
6. Watch Mailcow, Cal.com, Listmonk, the marketing sites for ~24h
|
||||
7. Keep the old VM snapshot for a week, then delete
|
||||
|
||||
### Phase 2 — Quick wins ✅
|
||||
1. ✅ **Umami** — tracking on levkin.ca, caseware, auto, and iliadobkin.com (portfolio)
|
||||
2. ✅ **Uptime Kuma** — monitors in UI
|
||||
3. ✅ **Dockge** — logged in; register `/opt/monitoring` stack (see [monitoring-stack.md](monitoring-stack.md))
|
||||
4. ✅ **Kuma email alerts** — SMTP via Mailcow (see [homelab-status-2026-05-22.md](homelab-status-2026-05-22.md))
|
||||
|
||||
### Phase 3 — Cal.com (mostly done) ✅
|
||||
1. ✅ Cal.com deployed in `business` LXC (id 210, Postgres included)
|
||||
2. ✅ `cal.levkin.ca` proxied via Caddy
|
||||
3. ✅ Booking link live at `cal.levkin.ca/ilia/consult` with Jitsi location
|
||||
4. ✅ Email working via `cal@levkine.ca` SMTP through Mailcow
|
||||
5. ⏳ **Cal.com OIDC** — **deferred** ([cal-authentik-oidc.md](cal-authentik-oidc.md)) — needs enterprise `CALCOM_LICENSE_KEY`
|
||||
6. ✅ `auto.levkin.ca` consult button → `cal.levkin.ca/ilia/consult`
|
||||
|
||||
### Phase 4 — SSO migration (~half a day, staged)
|
||||
Wire each to Authentik, least-risky first:
|
||||
1. **Vikunja** (OIDC native) — easy, single-user impact
|
||||
2. ~~**Nextcloud**~~ — **skipped** (VM 201 retiring)
|
||||
3. **Listmonk** (OIDC native, admin only) — easy
|
||||
4. **Mattermost** (SAML or OIDC native) — moderate
|
||||
5. **Mailcow** (OIDC) — last, because mail-critical
|
||||
|
||||
For each: keep a local admin password as a break-glass account.
|
||||
|
||||
### Phase 5 — Family / personal wins (~1 evening)
|
||||
1. **Immich** in `media` VM — install mobile apps for you and family, enable auto-upload. Face recognition runs in background; "my kids 2024" works within a couple days.
|
||||
2. Skip PhotoPrism — Immich covers it.
|
||||
|
||||
### Phase 6 — Business / consulting (~1–2 evenings)
|
||||
1. **Crater** in `business` LXC — tax rates, company info, Stripe integration if you want online payment
|
||||
2. **Beszel** hub in `monitoring` LXC + agents on each LXC — one dashboard for resource usage
|
||||
|
||||
### Phase 7 — Automation depth (ongoing)
|
||||
Only when you have a real use case:
|
||||
1. **Huginn** in `automation` — first agent: competitor pages, kosher product availability, grant deadlines
|
||||
2. **Windmill** in `automation` — first script: rewrite an n8n flow with too many code nodes
|
||||
3. **Flowise** in `labs` — first flow: chat-with-docs against your consulting notes
|
||||
|
||||
### Phase 8 — Knowledge / research
|
||||
1. **Outline** in `productivity` LXC — client-facing wiki + your notes
|
||||
2. **Linkwarden** in `productivity` LXC — bookmarks with full-page archive
|
||||
3. **Paperless-ngx** in `media` — scan and OCR the paper that's accumulating
|
||||
4. **SiYuan** — only if/when PhD or long-form research becomes relevant
|
||||
|
||||
---
|
||||
|
||||
## Static IP audit
|
||||
|
||||
**Maintain a `host-list.md` file** (in this Cursor project, alongside this plan) with every LXC/VM, its current IP, its target static IP, and DHCP/static status. Cursor will use this as the source of truth when scripting changes.
|
||||
|
||||
Suggested format:
|
||||
|
||||
| LXC/VM ID | Name | Role | Current IP | Target static IP | DHCP/Static | Notes |
|
||||
|---|---|---|---|---|---|---|
|
||||
| 210 | cal | Cal.com | 10.0.10.228/24 (DHCP) | 10.0.10.228/24 | ⏳ static | Convert ASAP |
|
||||
| ... | ... | ... | ... | ... | ... | ... |
|
||||
|
||||
### Recommended IP plan
|
||||
|
||||
Use `/24` subnets within `10.0.10.0/24` (or whatever your LAN is) with role-based ranges so it's scannable:
|
||||
|
||||
| Range | Reserved for |
|
||||
|---|---|
|
||||
| `.1 - .9` | Network gear (router, switches, APs) |
|
||||
| `.10 - .19` | Proxmox host(s) + PBS |
|
||||
| `.20 - .39` | Edge / identity / comms (critical infra) |
|
||||
| `.40 - .79` | Application LXCs (productivity, automation, business, monitoring) |
|
||||
| `.80 - .99` | Media VM(s) |
|
||||
| `.100 - .199` | DHCP pool (clients, phones, laptops) |
|
||||
| `.200 - .249` | Labs / experimental |
|
||||
| `.250 - .254` | Reserved |
|
||||
|
||||
### How to set static on a Proxmox LXC
|
||||
|
||||
Two methods — pick one and stick with it:
|
||||
|
||||
**Method A — Proxmox CLI (recommended, survives reboots cleanly):**
|
||||
```bash
|
||||
pct set <ID> -net0 name=eth0,bridge=vmbr0,ip=10.0.10.X/24,gw=10.0.10.1
|
||||
pct reboot <ID>
|
||||
```
|
||||
|
||||
**Method B — Router DHCP reservation:**
|
||||
- Reserve the IP in your router's DHCP table by MAC address. LXC stays "DHCP" technically, but always gets the same IP.
|
||||
- Easier if you have many hosts and one router.
|
||||
- Risk: if the LXC's MAC changes (rebuild from snapshot to new ID), reservation breaks.
|
||||
|
||||
**Recommendation:** Method A (`pct set`) for everything critical (edge, identity, comms, business). Method B is fine for labs/experimental LXCs.
|
||||
|
||||
### Audit checklist
|
||||
|
||||
1. List every LXC: `pct list`
|
||||
2. List every VM: `qm list`
|
||||
3. For each, run `pct exec <ID> -- ip a` (or `qm guest exec <ID> -- ip a` for VMs) and check whether the IP came from DHCP
|
||||
4. Fill in `host-list.md`
|
||||
5. Pick target IPs from the range plan above
|
||||
6. Convert one at a time, lowest-risk first (labs → productivity → business → comms → identity → edge)
|
||||
7. **After each conversion**, verify the Caddy reverse-proxy entry still works (curl from outside)
|
||||
8. Update `host-list.md` status column
|
||||
|
||||
### Hosts known to need conversion right now
|
||||
|
||||
- ~~**LXC 210 (cal)**~~ — static at `10.0.10.228` ✅
|
||||
- **Site LXCs 220, 215/216/219** — static; served via Caddy → nginx on each LXC (git deploy). Optional future: static files on Caddy VM only.
|
||||
|
||||
---
|
||||
|
||||
## Backlog (priority order)
|
||||
|
||||
### P0 — next (ordered)
|
||||
1. ~~Umami / Kuma / Dockge~~ ✅
|
||||
2. ~~Portainer VM 109~~ ✅ (2026-05-23)
|
||||
3. **Retire Nextcloud VM 201** — ~8 GiB on pve10; remove Caddy + Kuma monitor
|
||||
4. **Vikunja → Authentik OIDC** — first real SSO ([sso-selfhosted-matrix.md](sso-selfhosted-matrix.md))
|
||||
5. **UniFi DHCP reservations** — [unifi-static-dhcp.md](unifi-static-dhcp.md)
|
||||
6. **DNS `levkin.ca` apex** → home IP (still parked at AWS)
|
||||
7. **Beszel** on monitoring LXC 218
|
||||
8. ~~Cal.com OIDC~~ — deferred until `CALCOM_LICENSE_KEY`; Authentik app `cal-com` ready
|
||||
9. **NAS.SP00** disk replace → Jellyfin VM 101
|
||||
10. **DebianDesktop VM 100** — reboot for 24 GB limit on pve201
|
||||
|
||||
### P1 — when ready
|
||||
- **Outline** — wiki for client docs
|
||||
- **Linkwarden** — bookmarks with full-page archive
|
||||
- **Plane** — Jira-lite project management (pair with Mattermost)
|
||||
|
||||
### P2 — when you have a real need
|
||||
- **Crater** — invoicing (Phase 6)
|
||||
- **Immich** — photos (Phase 5)
|
||||
- **Paperless-ngx** — document scanning (Phase 8)
|
||||
- **Huginn** — first when you have a monitoring use case
|
||||
- **Windmill** — when n8n hits limits
|
||||
- **Trigger.dev** — durable background jobs in code (better fit than Windmill for QA work)
|
||||
- **PrivateBin** — encrypted paste for sharing secrets with contractors
|
||||
- **Addy.io** — email aliases
|
||||
- **SiYuan** — if PhD work picks up
|
||||
- **Flowise** — labs only, when LLM workflow use case appears
|
||||
|
||||
### Skip / declined
|
||||
- ~~PhotoPrism~~ — Immich covers it
|
||||
- ~~Activepieces~~ — you already have n8n
|
||||
- ~~Affine / Trilium~~ — picked Outline + SiYuan instead
|
||||
- ~~Matrix/Synapse + Element~~ — staying on Mattermost
|
||||
- ~~Coolify / Dokploy / CapRover~~ — Dockge is enough; revisit only if writing many custom apps
|
||||
|
||||
---
|
||||
|
||||
## Backup strategy
|
||||
|
||||
- **Proxmox Backup Server (PBS)** or `vzdump` to a NAS — snapshot each LXC/VM nightly
|
||||
- **Critical groups** (`identity`, `comms`, `business`): 7 daily + 4 weekly + 12 monthly
|
||||
- **Productivity/automation**: 7 daily + 4 weekly
|
||||
- **Labs**: 3 daily, no long retention
|
||||
- **Off-site copy** of `identity` and `business` LXCs — these contain auth and billing data. Encrypted copy to Wasabi or Backblaze B2.
|
||||
|
||||
The whole LXC gets snapshotted — much simpler than file-level container backup.
|
||||
|
||||
**Done on pve10 (2026-05-22):** `pct snapshot` **`backup-20260522`** on LXCs **217** (identity) and **218** (monitoring).
|
||||
|
||||
---
|
||||
|
||||
## Next steps (priority order)
|
||||
|
||||
See **[homelab-status-2026-05-22.md](homelab-status-2026-05-22.md)** for automation checklist.
|
||||
|
||||
| # | Task | Status | Effort | Frees / unlocks |
|
||||
|---|------|--------|--------|-----------------|
|
||||
| 1 | **Kuma SMTP** | ✅ done | — | — |
|
||||
| 2 | **Cal.com → Authentik OIDC** | ⏸ **deferred** | — | Needs `CALCOM_LICENSE_KEY`; infra ready — [sso-selfhosted-matrix.md](sso-selfhosted-matrix.md) |
|
||||
| 3 | **auto.levkin.ca** → Cal booking link | ✅ | — | Consult button live |
|
||||
| 4 | **Stop Portainer VM 109** | ✅ | — | Removed 2026-05-23; **~16 GiB RAM** on pve10 |
|
||||
| 5 | **Retire Nextcloud VM 201** | ⏳ **next** | 30 min | **~8 GiB RAM** on pve10 |
|
||||
| 6 | **Vikunja → Authentik OIDC** | ⏳ | 1–2 h | Phase 4 kickoff |
|
||||
| 7 | **UniFi DHCP reservations** | ⏳ | 20 min | [unifi-static-dhcp.md](unifi-static-dhcp.md) |
|
||||
| 8 | **DNS levkin.ca apex** | ⏳ | 15 min | AWS parked → `142.180.237.136` |
|
||||
| 9 | **Beszel** on 218 | ⏳ | 1 h | Capacity before Immich |
|
||||
| 10 | **NAS.SP00** disk → Jellyfin | ⏳ hardware | — | VM 101 |
|
||||
| 11 | **DebianDesktop reboot** | ⏳ | 5 min | Apply 24 GB on pve201 |
|
||||
| 12 | **Caddy → edge LXC `.20`** | ⏳ defer | ~30 min | Phase 1.5 |
|
||||
| 13 | **dev-apps LXC** | ⏳ defer | half day | After punim testing |
|
||||
| 14 | **Static sites → Caddy VM** | ⏳ optional | 1 h | Defer |
|
||||
|
||||
**Defer:** Immich, Crater, Outline; Listmonk/Mattermost/Mailcow SSO after Vikunja; Cal OIDC until license.
|
||||
|
||||
### Adding a new service — quick rule
|
||||
|
||||
| Want to add… | Node | RAM budget | Prerequisite |
|
||||
|--------------|------|------------|--------------|
|
||||
| Small app (Mealie, Linkwarden) | pve10 | 2 GB LXC | Stop 109 and/or 201 first if host feels tight |
|
||||
| Medium (Outline, Crater) | pve10 | 4 GB LXC | Free **~24 GiB** via Portainer + Nextcloud retire |
|
||||
| Heavy (Immich + ML) | pve10 or pve201 GPU | 4–8 GB+ | NAS healthy; pve201 only after GPU/punim sized down |
|
||||
| Dev sandbox | pve10 `dev-apps` | 6–8 GB | punim 9101 migration only after testing |
|
||||
|
||||
### Nextcloud decommission (VM 201)
|
||||
|
||||
1. Confirm export in `exports/nextcloud-2026-05-21/` is complete
|
||||
2. Delete **Nextcloud** monitor in Kuma
|
||||
3. Remove `nextcloud.levkin.ca` from Caddy VM
|
||||
4. Stop VM 201; update [host-list.md](host-list.md)
|
||||
5. After NAS healthy: optional `vzdump` archive then delete disk
|
||||
|
||||
---
|
||||
|
||||
## Important rules
|
||||
|
||||
1. **Never put Authentik behind itself.** `auth.levkin.ca` is a simple Caddy passthrough — no forward-auth, no fancy dependencies. If Authentik goes down, you'd lose access to Authentik.
|
||||
2. **Vaultwarden stays standalone.** It's your break-glass path if Authentik dies. Don't OIDC it.
|
||||
3. **Keep a local admin password on every SSO-wired app.** OIDC integrations break during upgrades — you need to log in to fix them.
|
||||
4. **Local admin to Proxmox host.** Independent of Authentik and Vaultwarden. Written down somewhere physical.
|
||||
5. **Don't expose admin UIs publicly.** Dockge, Beszel, Uptime Kuma admin, n8n editor — use Tailscale or Wireguard for remote access.
|
||||
6. **Static IPs for every LXC.** DHCP will eventually move them and Caddy will break. Set via `pct set <id> -net0 ...ip=10.0.10.X/24,gw=...` or a router reservation.
|
||||
7. **Cal.com LXC (210)** — static at `.228` ✅.
|
||||
8. **Maintain `host-list.md`** as the single source of truth for IPs. Update it whenever a new LXC/VM is created or migrated.
|
||||
42
docs/guides/mailcow-lan-proxy-fix.md
Normal file
42
docs/guides/mailcow-lan-proxy-fix.md
Normal file
@ -0,0 +1,42 @@
|
||||
# Mailcow unreachable from Caddy / LAN (TCP timeout)
|
||||
|
||||
## Symptom
|
||||
|
||||
- Mailcow containers healthy inside VM `10.0.10.132`
|
||||
- `curl https://10.0.10.132/` works **on the VM**
|
||||
- From Caddy (`10.0.10.50`) or other LAN hosts: TCP **443/80 timeout**
|
||||
- `tcpdump` on Proxmox shows SYN from client, **no SYN-ACK**
|
||||
|
||||
## Cause (not RAM)
|
||||
|
||||
`mailcowdockerized-netfilter-mailcow` adds an nftables rule in chain `MAILCOW`:
|
||||
|
||||
```text
|
||||
iifname != "br-mailcow" oifname "br-mailcow" tcp → DROP
|
||||
```
|
||||
|
||||
That blocks forwarded HTTPS from the LAN to the nginx container, even when `DISABLE_NETFILTER_ISOLATION_RULE=y` is set (netfilter still recreates the drop on restart in some versions).
|
||||
|
||||
## Fix on the mailcow VM
|
||||
|
||||
```bash
|
||||
nft flush chain ip filter MAILCOW
|
||||
```
|
||||
|
||||
Persistent (installed 2026-05-23): systemd unit `mailcow-flush-isolation-drop.service` runs after Docker.
|
||||
|
||||
After netfilter container restart, verify:
|
||||
|
||||
```bash
|
||||
nft list chain ip filter MAILCOW # should be empty
|
||||
nc -zv 10.0.10.132 443 # from Caddy host
|
||||
```
|
||||
|
||||
## Related settings in `/opt/mailcow-dockerized/mailcow.conf`
|
||||
|
||||
- `DISABLE_NETFILTER_ISOLATION_RULE=y`
|
||||
- `SNAT_TO_SOURCE=10.0.10.132` (optional; helps some hairpin cases)
|
||||
|
||||
## Reverse proxy
|
||||
|
||||
Caddy on `10.0.10.50` → `https://10.0.10.132` with `Host: mail.levkine.ca` — see `playbooks/caddy-auth-authentik.yml` / Caddyfile on caddy VM.
|
||||
232
docs/guides/monitoring-stack.md
Normal file
232
docs/guides/monitoring-stack.md
Normal file
@ -0,0 +1,232 @@
|
||||
# Monitoring stack (LXC 218)
|
||||
|
||||
**Host:** `monitoring` @ `10.0.10.22` (PVENAS pve10, VMID **218**)
|
||||
**Compose:** `/opt/monitoring/compose.yml`
|
||||
**Stacks dir (Dockge):** `/opt/stacks`
|
||||
|
||||
All admin UIs are **LAN-only** (no public Caddy blocks). Use Tailscale or local network.
|
||||
|
||||
| Service | URL | Port | Notes |
|
||||
|---------|-----|------|-------|
|
||||
| **Uptime Kuma** | http://10.0.10.22:3001 | 3001 | Admin + monitors configured ✅ (replaces pve201 LXC **305** @ `.197`, stopped) |
|
||||
| **Dockge** | http://10.0.10.22:5001 | 5001 | Manage compose on **this LXC only** |
|
||||
| **Umami** | http://10.0.10.22:3000 | 3000 | Password changed ✅; levkin.ca + caseware + auto + portfolio tracked |
|
||||
|
||||
Secrets: `/opt/monitoring/.env` on the LXC (mode 600). Not in git.
|
||||
|
||||
---
|
||||
|
||||
## Backups (pve10)
|
||||
|
||||
| Guest | VMID | Snapshot | Date |
|
||||
|-------|------|----------|------|
|
||||
| identity | 217 | `backup-20260522` | 2026-05-22 |
|
||||
| monitoring | 218 | `backup-20260522` | 2026-05-22 |
|
||||
|
||||
On pve10:
|
||||
|
||||
```bash
|
||||
pct listsnapshot 217
|
||||
pct listsnapshot 218
|
||||
# Rollback if needed:
|
||||
# pct rollback 217 backup-20260522
|
||||
```
|
||||
|
||||
Optional off-node copy (when NAS healthy): `vzdump 217 218 --storage local --mode snapshot --compress zstd`
|
||||
|
||||
---
|
||||
|
||||
## Uptime Kuma — monitors
|
||||
|
||||
Configured in UI (all green). **Remove** the Nextcloud monitor when VM 201 is retired.
|
||||
|
||||
| Name | URL |
|
||||
|------|-----|
|
||||
| Authentik | https://auth.levkin.ca |
|
||||
| Cal.com | https://cal.levkin.ca |
|
||||
| Caseware / Auto | marketing sites |
|
||||
| Mailcow | https://mail.levkine.ca |
|
||||
| Listmonk, Gitea, Vault, Todo, PVE nodes | per your dashboard |
|
||||
|
||||
---
|
||||
|
||||
## Uptime Kuma — email alerts (Mailcow)
|
||||
|
||||
Mail domain is **`levkine.ca`** (with **e**). Cal.com already sends via Mailcow as `cal@levkine.ca`.
|
||||
|
||||
### Which email to use
|
||||
|
||||
| Role | Address | Notes |
|
||||
|------|---------|-------|
|
||||
| **SMTP server** | `mail.levkine.ca` | Mailcow host |
|
||||
| **SMTP port** | `587` | STARTTLS (not 465 unless you prefer SMTPS) |
|
||||
| **From (sender)** | `alerts@levkine.ca` | Create mailbox in Mailcow if it does not exist |
|
||||
| **To (you)** | `idobkin@gmail.com` or `ilia@levkine.ca` | Use whichever you read; Gmail is fine for alerts |
|
||||
|
||||
### 1. Create mailbox in Mailcow (if needed)
|
||||
|
||||
**Automated (needs Mailcow API key):**
|
||||
|
||||
```bash
|
||||
# Define mailbox in group_vars/all/mailcow.yml, password in vault:
|
||||
make mailcow-mailbox MAILBOX=alerts
|
||||
# (alias: make mailcow-create-alerts)
|
||||
|
||||
# Import from .env into vault once, then delete .env:
|
||||
cp .env.example .env # MAILCOW_API_KEY=... ALERTS_PASSWORD=...
|
||||
make vault-import-env
|
||||
make mailcow-mailbox MAILBOX=alerts
|
||||
```
|
||||
|
||||
To add another mailbox tomorrow: edit `mailcow.yml` + `vault_mailcow_mailbox_passwords.<name>`, then `make mailcow-mailbox MAILBOX=<name>`.
|
||||
|
||||
**Manual UI:**
|
||||
|
||||
1. https://mail.levkine.ca → admin login
|
||||
2. **Email → Mailboxes → Add** → `alerts@levkine.ca` (strong password → store in Vaultwarden)
|
||||
3. Optional: alias `monitoring@levkine.ca` → same inbox
|
||||
|
||||
### 2. Add notification in Kuma
|
||||
|
||||
**Automated (from your Mac, after mailbox exists):**
|
||||
|
||||
```bash
|
||||
cd /path/to/ansible
|
||||
pip install uptime-kuma-api # or: .venv/bin/pip install uptime-kuma-api
|
||||
export KUMA_URL=http://10.0.10.22:3001 KUMA_USER=admin KUMA_PASSWORD='...'
|
||||
export SMTP_USER=alerts@levkine.ca SMTP_PASS='...' SMTP_TO=idobkin@gmail.com
|
||||
./scripts/kuma-setup-smtp.sh
|
||||
```
|
||||
|
||||
**Manual UI:**
|
||||
|
||||
1. http://10.0.10.22:3001 → **Settings** → **Notifications** → **Setup Notification**
|
||||
2. Type: **Email (SMTP)**
|
||||
3. Fill in:
|
||||
|
||||
| Field | Value |
|
||||
|-------|--------|
|
||||
| SMTP Host | `mail.levkine.ca` |
|
||||
| SMTP Port | `587` |
|
||||
| Security | TLS / STARTTLS |
|
||||
| Username | `alerts@levkine.ca` |
|
||||
| Password | mailbox password |
|
||||
| From Email | `alerts@levkine.ca` |
|
||||
| To Email | `idobkin@gmail.com` (or your `@levkine.ca`) |
|
||||
|
||||
4. **Test** → save
|
||||
5. Edit each monitor (or default) → **Notifications** → enable this channel
|
||||
|
||||
**Alternative:** Mattermost webhook (`slack.levkin.ca`) if you prefer chat over email.
|
||||
|
||||
---
|
||||
|
||||
## Dockge — what to do after login
|
||||
|
||||
**On server today:**
|
||||
|
||||
| Path | Contents |
|
||||
|------|----------|
|
||||
| `/opt/monitoring/compose.yml` | **Live** stack (Docker project `monitoring`, 4 containers running) |
|
||||
| `/opt/stacks/monitoring/compose.yaml` | Copy for Dockge (same services) |
|
||||
| `/opt/stacks/authentik-ref/`, `cal-ref/` | README only — **no** compose file (ignore) |
|
||||
|
||||
**Why “Scan Stacks Folder” looks empty**
|
||||
|
||||
- Scan only picks up folders under **`/opt/stacks`** that contain `compose.yaml` / `compose.yml`.
|
||||
- Your containers were started from **`/opt/monitoring`**, so Docker does not automatically link them to `/opt/stacks/monitoring` until you register that folder in Dockge.
|
||||
|
||||
**Fix (pick one):**
|
||||
|
||||
### Dockge UI note (your version)
|
||||
|
||||
**Settings → General** only has hostname — there is **no “Stacks directory” field**. That path is fixed at deploy time:
|
||||
|
||||
`DOCKGE_STACKS_DIR=/opt/stacks` (already set in `/opt/monitoring/compose.yml`).
|
||||
|
||||
Stacks are managed from the **home / dashboard** page, not Settings.
|
||||
|
||||
### Option 1 — Add stack manually (recommended)
|
||||
|
||||
1. http://10.0.10.22:5001 → **home** (logo / dashboard, not Settings)
|
||||
2. **+ Create Stack** (or **Compose** → new stack)
|
||||
3. Name: `monitoring`
|
||||
4. Path: `/opt/stacks/monitoring` (must contain `compose.yaml`)
|
||||
5. Open stack → review compose → **do not Start** until old project is stopped (below)
|
||||
|
||||
### Option 2 — Scan from dashboard menu
|
||||
|
||||
1. Stay on **dashboard** (not Settings)
|
||||
2. Top-right **⋮** → **Scan Stacks Folder**
|
||||
3. Pick **`monitoring`** if it appears (`authentik-ref` / `cal-ref` have no compose — ignore)
|
||||
|
||||
**Avoid duplicate containers**
|
||||
|
||||
Before starting from Dockge:
|
||||
|
||||
```bash
|
||||
ssh root@10.0.10.22
|
||||
cd /opt/monitoring && docker compose down
|
||||
# Then start from Dockge UI on stack monitoring, OR:
|
||||
cd /opt/stacks/monitoring && docker compose --env-file .env up -d
|
||||
```
|
||||
|
||||
Until you do that, Kuma/Dockge/Umami keep running from `/opt/monitoring`; Dockge is optional for edits until cutover.
|
||||
|
||||
### Optional reference stacks (read-only)
|
||||
|
||||
Create empty stacks under `/opt/stacks/` only if you want a UI placeholder:
|
||||
|
||||
```bash
|
||||
ssh root@10.0.10.22
|
||||
mkdir -p /opt/stacks/authentik /opt/stacks/cal
|
||||
# Copy compose for reference (does NOT control remote host):
|
||||
scp root@10.0.10.21:/opt/authentik/compose.yml /opt/stacks/authentik/
|
||||
```
|
||||
|
||||
To **manage** Authentik or Cal from Dockge long term, either move compose to 218 (not recommended) or install Dockge on each LXC later.
|
||||
|
||||
### Step 3 — Retire Portainer
|
||||
|
||||
VM **109** (portainer) was removed from pve10 on 2026-05-23; use Dockge on 218 instead.
|
||||
|
||||
---
|
||||
|
||||
## Umami
|
||||
|
||||
- ✅ Running at http://10.0.10.22:3000 (LAN / Tailscale only)
|
||||
- ✅ **Public tracking** via `https://stats.levkin.ca/script.js` on **levkin.ca** (LXC 220), caseware, auto, and **iliadobkin.com** (portfolio LXC 219)
|
||||
|
||||
**Three choices (pick one later; none block the sites):**
|
||||
|
||||
| Option | Effort | Notes |
|
||||
|--------|--------|--------|
|
||||
| **A — Skip public analytics** | 0 | Use Umami dashboard on `:3000` when you care; no DNS/Caddy |
|
||||
| **B — One DNS + Caddy block** | ~10 min | A record → home IP + Caddy `reverse_proxy 10.0.10.22:3000` on caddy VM |
|
||||
| **C — Re-add script tags** | 2 min | After B works, insert script before `</head>` on 215/216 |
|
||||
|
||||
**Suggested public hostname (instead of `analytics`):** `stats.levkin.ca` (short, clear). Alternatives: `umami.levkin.ca`, `metrics.levkin.ca`.
|
||||
|
||||
```caddy
|
||||
stats.levkin.ca {
|
||||
import security-headers
|
||||
encode gzip
|
||||
reverse_proxy 10.0.10.22:3000
|
||||
}
|
||||
```
|
||||
|
||||
Script tag then: `https://stats.levkin.ca/script.js`
|
||||
|
||||
We are **not stuck** — marketing sites do not need Umami to render. Option A is fine for now.
|
||||
|
||||
---
|
||||
|
||||
## Maintenance
|
||||
|
||||
```bash
|
||||
ssh root@10.0.10.22
|
||||
cd /opt/monitoring
|
||||
docker compose --env-file .env pull
|
||||
docker compose --env-file .env up -d
|
||||
docker compose ps
|
||||
```
|
||||
203
docs/guides/nas-sp00-drive-failure-report.md
Normal file
203
docs/guides/nas-sp00-drive-failure-report.md
Normal file
@ -0,0 +1,203 @@
|
||||
# NAS.SP00 drive failure — IT report
|
||||
|
||||
**Date:** 2026-05-21
|
||||
**Host:** PVENAS (Proxmox VE) — `10.0.10.10`
|
||||
**Pool:** ZFS `NAS.SP00` (~9 TB, ~862 GB used)
|
||||
**Prepared for:** IT / hardware replacement
|
||||
**SMART audit:** [nas-sp00-smart-audit-2026-05-21.md](nas-sp00-smart-audit-2026-05-21.md)
|
||||
|
||||
---
|
||||
|
||||
## Executive summary
|
||||
|
||||
One disk in a four-drive ZFS mirror pair has **failed at the hardware level**. The pool is **DEGRADED** but **online** with **no known data errors** at this time. The failed drive must be **physically replaced** and the pool **resilvered**. Until then, **mirror-0 has no redundancy** — a second failure on the remaining disk in that mirror (`W4J0L0BA`) could cause data loss.
|
||||
|
||||
This issue also caused a **host-wide I/O wedge** (pool SUSPENDED → stuck `sync()`), which blocked LXC/VM operations unrelated to the pool (e.g. Cal.com on `local-lvm`). That was cleared by a forced node reboot; **replacing the drive remains required**.
|
||||
|
||||
---
|
||||
|
||||
## Pool layout
|
||||
|
||||
| Vdev | Role | Disk A | Disk B | Status |
|
||||
|------|------|--------|--------|--------|
|
||||
| mirror-0 | RAID1 pair | `W4J0L0BA` (sda, 5 TB) | `W4J0L3PY` (sdb) | **DEGRADED** — sdb UNAVAIL |
|
||||
| mirror-1 | RAID1 pair | `W4J0LKCD` (sdd, 5 TB) | `W4J0K9V7` (sdc, 5 TB) | **ONLINE** |
|
||||
|
||||
Model family (healthy drives): Seagate **ST5000DM000-1FK178** (5 TB, 7200 RPM).
|
||||
|
||||
---
|
||||
|
||||
## Failed drive identification
|
||||
|
||||
| Field | Expected | Observed |
|
||||
|-------|----------|----------|
|
||||
| **Serial** | W4J0L3PY | W4J0L3PY |
|
||||
| **Model** | ST5000DM000-1FK178 | ST5000DM000 (truncated reporting) |
|
||||
| **WWN** | — | `5000c50082cc8bbb` |
|
||||
| **Firmware** | — | CC48 |
|
||||
| **Capacity** | ~5,000,981,078,016 bytes (**5.00 TB**) | **137,438,952,960 bytes (~137 GB)** |
|
||||
| **Linux device** | `/dev/sdb` | `/dev/sdb` |
|
||||
| **ZFS state** | ONLINE | **UNAVAIL** — label missing/invalid |
|
||||
|
||||
ZFS last known path:
|
||||
`/dev/disk/by-id/ata-ST5000DM000-1FK178_W4J0L3PY-part1`
|
||||
|
||||
---
|
||||
|
||||
## Symptoms and evidence
|
||||
|
||||
### 1. Capacity collapse (primary indicator)
|
||||
|
||||
The drive is detected as **~137 GB** instead of **5 TB**. ZFS cannot use a partition label created for a 5 TB disk on a device that exposes only a tiny fraction of capacity. This pattern is typical of:
|
||||
|
||||
- **Failed HDD** (media/controller failure)
|
||||
- **Bad SATA cable, backplane port, or HBA port**
|
||||
- **USB/SATA bridge failure** (if applicable)
|
||||
- **Severe firmware/HPA corruption** (less common)
|
||||
|
||||
### 2. SMART / SCSI errors
|
||||
|
||||
`smartctl` against `/dev/sdb`:
|
||||
|
||||
- **Read SMART Data failed:** scsi error aborted command
|
||||
- **Overall health:** UNKNOWN (attributes unreadable)
|
||||
- Multiple log read commands fail (Error Log, Self-test Log, GP Log, etc.)
|
||||
|
||||
Healthy sibling in same mirror (`/dev/sda`, W4J0L0BA): **SMART PASSED**, full 5 TB capacity.
|
||||
|
||||
### 3. Kernel log (`dmesg` at boot, 2026-05-21 ~21:27)
|
||||
|
||||
Repeated on **`sdb`**:
|
||||
|
||||
```
|
||||
Buffer I/O error on dev sdb
|
||||
Sense Key: Medium Error
|
||||
Add. Sense: Unrecovered read error
|
||||
critical medium error, dev sdb, sector N op 0x0:(READ)
|
||||
```
|
||||
|
||||
Indicates the block device cannot reliably read media — **hardware or link layer**, not a ZFS configuration issue.
|
||||
|
||||
### 4. ZFS pool history
|
||||
|
||||
- Pool previously entered **SUSPENDED** state (I/O failures on faulted devices).
|
||||
- After node reboot: pool **DEGRADED**, short **resilver** completed with **0 errors** (healing scan on remaining devices).
|
||||
- Current: **No known data errors** in `zpool status`.
|
||||
|
||||
---
|
||||
|
||||
## Impact
|
||||
|
||||
### Storage / services on `NAS.SP00`
|
||||
|
||||
Proxmox guests with disks on this pool (non-exhaustive):
|
||||
|
||||
| VMID | Name | NAS-backed storage |
|
||||
|------|------|-------------------|
|
||||
| 101 | Jellyfin | 1 TB zvol |
|
||||
| 105 | TrueNAS | 1 TB zvol |
|
||||
| 108 | actual-debian | 10 GB |
|
||||
| 200 | PVE.BU.SVR | 1 TB |
|
||||
| 201 | NextcloudAIO-debian | 8 TB |
|
||||
|
||||
**Risk:** With mirror-0 degraded, blocks stored only on the surviving mirror-0 disk have **no redundancy** until the failed drive is replaced and resilver completes.
|
||||
|
||||
### Unrelated workloads
|
||||
|
||||
Guests on **`local-lvm`** (NVMe, e.g. Cal.com LXC 210, Caddy VM 106) are **not stored on NAS.SP00** but were affected when the pool suspended and blocked system-wide `sync()`.
|
||||
|
||||
### Backup target
|
||||
|
||||
Proxmox datastore **PVEBUVD00** (PBS @ `10.0.10.200:8007`) reports **unreachable** from this node — separate issue; verify PBS host/network.
|
||||
|
||||
---
|
||||
|
||||
## Diagnosis
|
||||
|
||||
| Question | Answer |
|
||||
|----------|--------|
|
||||
| Is this a ZFS misconfiguration? | **No** — config is consistent; three drives show correct 5 TB labels. |
|
||||
| Is the pool lost? | **No** — degraded but importable; no known data errors currently. |
|
||||
| Which disk to replace? | **Seagate W4J0L3PY** (`/dev/sdb`, mirror-0 failed leg). |
|
||||
| Can we fix it in software? | **Unlikely** — capacity and SMART failures point to hardware. |
|
||||
| Safe to reseat first? | **Optional trial** — power down or hot-swap per chassis policy; if capacity still reads ~137 GB, **replace disk**. |
|
||||
|
||||
---
|
||||
|
||||
## Recommended actions
|
||||
|
||||
### Immediate (IT / on-site)
|
||||
|
||||
1. **Identify physical slot** for serial **W4J0L3PY** (compare to inventory/asset tags).
|
||||
2. **Reseat** SATA/SAS cable and backplane connection once (if hot-swap policy allows). Reboot or rescan SCSI bus.
|
||||
3. If capacity is still wrong or SMART still fails → **replace with new 5 TB+ enterprise/NAS-class HDD** (match class of ST5000DM000 or better).
|
||||
4. Do **not** remove the UNAVAIL device from the pool until replacement is in place.
|
||||
|
||||
### After new disk is installed
|
||||
|
||||
On **PVENAS** as root (adjust `/dev/disk/by-id/...` to the **new** drive’s partition 1):
|
||||
|
||||
```bash
|
||||
# Verify new disk shows ~5 TB
|
||||
lsblk /dev/sdX
|
||||
smartctl -H /dev/sdX
|
||||
|
||||
# Replace failed vdev (use ID from: zpool status NAS.SP00)
|
||||
zpool replace NAS.SP00 ata-ST5000DM000-1FK178_W4J0L3PY-part1 /dev/disk/by-id/ata-NEW_SERIAL-part1
|
||||
|
||||
# Monitor until resilver completes
|
||||
zpool status -v NAS.SP00
|
||||
```
|
||||
|
||||
### Post-resilver
|
||||
|
||||
- Run **`zpool scrub NAS.SP00`** during a maintenance window.
|
||||
- Confirm **PVEBUVD00** / PBS connectivity if backups depend on it.
|
||||
- Review whether **Nextcloud VM 201** (8 TB on degraded pool) should remain running until healthy.
|
||||
|
||||
### Not recommended
|
||||
|
||||
- Ignoring degraded state for extended periods.
|
||||
- Running heavy I/O on large VMs (e.g. 8 TB Nextcloud) during extended degraded operation.
|
||||
- `zpool clear` without addressing hardware — does not fix a dead disk.
|
||||
|
||||
---
|
||||
|
||||
## Reference — healthy disks (for spare matching)
|
||||
|
||||
| Serial | Device | Capacity | SMART |
|
||||
|--------|--------|----------|-------|
|
||||
| W4J0L0BA | sda | 5.00 TB | PASSED |
|
||||
| W4J0K9V7 | sdc | 5.00 TB | PASSED |
|
||||
| W4J0LKCD | sdd | 5.00 TB | PASSED |
|
||||
|
||||
---
|
||||
|
||||
## Timeline (brief)
|
||||
|
||||
| When | Event |
|
||||
|------|--------|
|
||||
| Prior to 2026-05-21 | `W4J0L3PY` accumulated read/write errors; pool faulted |
|
||||
| 2026-05-21 | Pool **SUSPENDED**; host `sync()` wedged; Cal LXC start failed |
|
||||
| 2026-05-21 ~21:28 | Forced node reboot; pool **DEGRADED**, resilver finished, 0 errors |
|
||||
| 2026-05-21 | `sdb` still reports **~137 GB**, UNAVAIL — **replacement still required** |
|
||||
|
||||
---
|
||||
|
||||
## Contact / handoff notes
|
||||
|
||||
- **Node:** Proxmox VE 8.x on **PVENAS** (`10.0.10.10`)
|
||||
- **Pool name in Proxmox:** `NAS.SP00` (zfspool, active, degraded)
|
||||
- **Failed serial:** **W4J0L3PY**
|
||||
- **Replacement type:** 5 TB+ HDD, same or better class as Seagate ST5000DM000-1FK178
|
||||
|
||||
For questions about homelab service impact (Cal, Caddy, Phase 0 rollout), see [`levkin-selfhost-plan-2.md`](levkin-selfhost-plan-2.md).
|
||||
## TL;DR
|
||||
|
||||
- Pool `NAS.SP00` on `PVENAS` (10.0.10.10) had a disk failure (`W4J0L3PY`)
|
||||
- Pool went **SUSPENDED**; required forced reboot and is now **DEGRADED**
|
||||
- **Immediate action:** Replace the failed drive with a spare (same or larger size; see healthy serials in table below)
|
||||
- Use `zpool replace` command with correct device paths (see main procedure)
|
||||
- Monitor resilver to completion; run `zpool scrub` after
|
||||
- Backup services and large VMs (e.g. Nextcloud 8TB) depend on pool health—keep degraded time short
|
||||
- Reach out if unsure about pool status or downstream service risk
|
||||
232
docs/guides/nas-sp00-smart-audit-2026-05-21.md
Normal file
232
docs/guides/nas-sp00-smart-audit-2026-05-21.md
Normal file
@ -0,0 +1,232 @@
|
||||
# NAS.SP00 SMART audit
|
||||
|
||||
**Date:** 2026-05-21
|
||||
**Host:** PVENAS (Proxmox VE) — `10.0.10.10`
|
||||
**Pool:** ZFS `NAS.SP00`
|
||||
**Related:** [nas-sp00-drive-failure-report.md](nas-sp00-drive-failure-report.md)
|
||||
|
||||
---
|
||||
|
||||
## Executive summary
|
||||
|
||||
| Serial | Device | Capacity | ZFS (mirror) | SMART health |
|
||||
|--------|--------|----------|--------------|--------------|
|
||||
| W4J0L0BA | sda | 5.00 TB | mirror-0 ONLINE | **PASSED** |
|
||||
| W4J0L3PY | sdb | **137 GB** | mirror-0 UNAVAIL | **UNKNOWN** (read fails) |
|
||||
| W4J0K9V7 | sdc | 5.00 TB | mirror-1 ONLINE | **PASSED** |
|
||||
| W4J0LKCD | sdd | 5.00 TB | mirror-1 ONLINE | **PASSED** |
|
||||
|
||||
Pool state at audit time: **DEGRADED** — failed leg `W4J0L3PY` (`/dev/sdb`). No known data errors. Three healthy drives show no reallocated, pending, or uncorrectable sectors.
|
||||
|
||||
---
|
||||
|
||||
## ZFS pool status
|
||||
|
||||
```
|
||||
pool: NAS.SP00
|
||||
state: DEGRADED
|
||||
status: One or more devices could not be used because the label is missing or
|
||||
invalid. Sufficient replicas exist for the pool to continue
|
||||
functioning in a degraded state.
|
||||
action: Replace the device using 'zpool replace'.
|
||||
scan: resilvered 0B in 00:00:01 with 0 errors on Thu May 21 21:27:54 2026
|
||||
|
||||
NAME STATE READ WRITE CKSUM
|
||||
NAS.SP00 DEGRADED 0 0 0
|
||||
mirror-0 DEGRADED 0 0 0
|
||||
ata-ST5000DM000-1FK178_W4J0L0BA ONLINE 0 0 0
|
||||
11449632222283419591 UNAVAIL 0 0 0 was /dev/disk/by-id/ata-ST5000DM000-1FK178_W4J0L3PY-part1
|
||||
mirror-1 ONLINE 0 0 0
|
||||
ata-ST5000DM000-1FK178_W4J0LKCD ONLINE 0 0 0
|
||||
ata-ST5000DM000-1FK178_W4J0K9V7 ONLINE 0 0 0
|
||||
|
||||
errors: No known data errors
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Block devices (`lsblk`)
|
||||
|
||||
| NAME | SIZE | MODEL | SERIAL | ROTA |
|
||||
|------|------|-------|--------|------|
|
||||
| sda | 4.5T | ST5000DM000-1FK178 | W4J0L0BA | 1 |
|
||||
| sdb | 3.9G | ST5000DM000 | W4J0L3PY | 1 |
|
||||
| sdc | 4.5T | ST5000DM000-1FK178 | W4J0K9V7 | 1 |
|
||||
| sdd | 4.5T | ST5000DM000-1FK178 | W4J0LKCD | 1 |
|
||||
|
||||
---
|
||||
|
||||
## Healthy drives — key metrics
|
||||
|
||||
| Metric | sda (W4J0L0BA) | sdc (W4J0K9V7) | sdd (W4J0LKCD) |
|
||||
|--------|----------------|----------------|----------------|
|
||||
| Model | ST5000DM000-1FK178 | ST5000DM000-1FK178 | ST5000DM000-1FK178 |
|
||||
| Firmware | CC48 | CC48 | CC48 |
|
||||
| WWN | 5000c500082c02f61 | 5000c500082c7e2ce | 5000c500082d84c45 |
|
||||
| Rotation | 5980 rpm | 5980 rpm | 5980 rpm |
|
||||
| SATA | 3.1 @ 6.0 Gb/s | 3.1 @ 6.0 Gb/s | 3.1 @ 6.0 Gb/s |
|
||||
| Power-on hours | 52,481 (~6.0 y) | 53,087 (~6.1 y) | 45,580 (~5.2 y) |
|
||||
| Temperature | 27 °C | 30 °C | 30 °C |
|
||||
| Reallocated sectors | 0 | 0 | 0 |
|
||||
| Current pending sectors | 0 | 0 | 0 |
|
||||
| Offline uncorrectable | 0 | 0 | 0 |
|
||||
| UDMA CRC errors | 0 | 0 | 0 |
|
||||
| Start/stop count | 350 | 367 | 310 |
|
||||
| Load cycle count | 348,974 | 340,961 | 184,891 |
|
||||
| Power cycle count | 345 | 363 | 309 |
|
||||
|
||||
High **Load_Cycle_Count** on Seagate Desktop HDD.15 is common (head parking); not alarming when reallocated/pending counts remain zero.
|
||||
|
||||
---
|
||||
|
||||
## Failed drive — `/dev/sdb` (W4J0L3PY)
|
||||
|
||||
### Identity
|
||||
|
||||
| Field | Value |
|
||||
|-------|-------|
|
||||
| Device Model | ST5000DM000 (truncated; not full -1FK178 suffix) |
|
||||
| Serial | W4J0L3PY |
|
||||
| WWN | 5000c500082cc8bbb |
|
||||
| Firmware | CC48 |
|
||||
| User capacity | 137,438,952,960 bytes [**137 GB**] |
|
||||
| Expected capacity | 5,000,981,078,016 bytes [5.00 TB] |
|
||||
| Rotation | 7200 rpm (reported) |
|
||||
| SATA | 3.0, 6.0 Gb/s |
|
||||
|
||||
### SMART
|
||||
|
||||
```
|
||||
Read SMART Data failed: scsi error aborted command
|
||||
SMART Status command failed: scsi error aborted command
|
||||
SMART overall-health self-assessment test result: UNKNOWN!
|
||||
SMART Status, Attributes and Thresholds cannot be read.
|
||||
```
|
||||
|
||||
**Action:** Replace drive; see [nas-sp00-drive-failure-report.md](nas-sp00-drive-failure-report.md).
|
||||
|
||||
---
|
||||
|
||||
## Full SMART attributes (healthy drives)
|
||||
|
||||
### `/dev/sda` — W4J0L0BA (mirror-0, ONLINE)
|
||||
|
||||
```
|
||||
SMART overall-health self-assessment test result: PASSED
|
||||
|
||||
ID# ATTRIBUTE_NAME VALUE WORST THRESH TYPE RAW_VALUE
|
||||
1 Raw_Read_Error_Rate 119 100 006 Pre-fail 211189952
|
||||
3 Spin_Up_Time 092 091 000 Pre-fail 0
|
||||
4 Start_Stop_Count 100 100 020 Old_age 350
|
||||
5 Reallocated_Sector_Ct 100 100 010 Pre-fail 0
|
||||
7 Seek_Error_Rate 080 060 030 Pre-fail 43979429424
|
||||
9 Power_On_Hours 041 041 000 Old_age 52481
|
||||
10 Spin_Retry_Count 100 100 097 Pre-fail 0
|
||||
12 Power_Cycle_Count 100 100 020 Old_age 345
|
||||
183 Runtime_Bad_Block 100 100 000 Old_age 0
|
||||
184 End-to-End_Error 100 100 099 Old_age 0
|
||||
187 Reported_Uncorrect 100 100 000 Old_age 0
|
||||
188 Command_Timeout 100 099 000 Old_age 3 3 3
|
||||
189 High_Fly_Writes 100 100 000 Old_age 0
|
||||
190 Airflow_Temperature_Cel 073 058 045 Old_age 27 (Min/Max 27/28)
|
||||
191 G-Sense_Error_Rate 100 100 000 Old_age 0
|
||||
192 Power-Off_Retract_Count 100 100 000 Old_age 0
|
||||
193 Load_Cycle_Count 001 001 000 Old_age 348974
|
||||
194 Temperature_Celsius 027 042 000 Old_age 27
|
||||
195 Hardware_ECC_Recovered 119 100 000 Old_age 211189952
|
||||
197 Current_Pending_Sector 100 100 000 Old_age 0
|
||||
198 Offline_Uncorrectable 100 100 000 Old_age 0
|
||||
199 UDMA_CRC_Error_Count 200 200 000 Old_age 0
|
||||
240 Head_Flying_Hours 100 253 000 Old_age 15140h+51m+12.276s
|
||||
241 Total_LBAs_Written 100 253 000 Old_age 57665101118
|
||||
242 Total_LBAs_Read 100 253 000 Old_age 160962549062
|
||||
```
|
||||
|
||||
### `/dev/sdc` — W4J0K9V7 (mirror-1, ONLINE)
|
||||
|
||||
```
|
||||
SMART overall-health self-assessment test result: PASSED
|
||||
|
||||
ID# ATTRIBUTE_NAME VALUE WORST THRESH TYPE RAW_VALUE
|
||||
1 Raw_Read_Error_Rate 117 100 006 Pre-fail 136042192
|
||||
3 Spin_Up_Time 092 091 000 Pre-fail 0
|
||||
4 Start_Stop_Count 100 100 020 Old_age 367
|
||||
5 Reallocated_Sector_Ct 100 100 010 Pre-fail 0
|
||||
7 Seek_Error_Rate 083 060 030 Pre-fail 22512744055
|
||||
9 Power_On_Hours 040 040 000 Old_age 53087
|
||||
10 Spin_Retry_Count 100 100 097 Pre-fail 0
|
||||
12 Power_Cycle_Count 100 100 020 Old_age 363
|
||||
183 Runtime_Bad_Block 100 100 000 Old_age 0
|
||||
184 End-to-End_Error 100 100 099 Old_age 0
|
||||
187 Reported_Uncorrect 100 100 000 Old_age 0
|
||||
188 Command_Timeout 100 099 000 Old_age 6 6 12
|
||||
189 High_Fly_Writes 096 096 000 Old_age 4
|
||||
190 Airflow_Temperature_Cel 070 060 045 Old_age 30 (Min/Max 28/30)
|
||||
191 G-Sense_Error_Rate 100 100 000 Old_age 0
|
||||
192 Power-Off_Retract_Count 100 100 000 Old_age 0
|
||||
193 Load_Cycle_Count 001 001 000 Old_age 340961
|
||||
194 Temperature_Celsius 030 040 000 Old_age 30
|
||||
195 Hardware_ECC_Recovered 117 100 000 Old_age 136042192
|
||||
197 Current_Pending_Sector 100 100 000 Old_age 0
|
||||
198 Offline_Uncorrectable 100 100 000 Old_age 0
|
||||
199 UDMA_CRC_Error_Count 200 200 000 Old_age 0
|
||||
240 Head_Flying_Hours 100 253 000 Old_age 15859h+53m+20.869s
|
||||
241 Total_LBAs_Written 100 253 000 Old_age 57609506493
|
||||
242 Total_LBAs_Read 100 253 000 Old_age 152392393081
|
||||
```
|
||||
|
||||
### `/dev/sdd` — W4J0LKCD (mirror-1, ONLINE)
|
||||
|
||||
```
|
||||
SMART overall-health self-assessment test result: PASSED
|
||||
|
||||
ID# ATTRIBUTE_NAME VALUE WORST THRESH TYPE RAW_VALUE
|
||||
1 Raw_Read_Error_Rate 116 090 006 Pre-fail 108217848
|
||||
3 Spin_Up_Time 092 091 000 Pre-fail 0
|
||||
4 Start_Stop_Count 100 100 020 Old_age 310
|
||||
5 Reallocated_Sector_Ct 100 100 010 Pre-fail 0
|
||||
7 Seek_Error_Rate 073 051 030 Pre-fail 185584998742
|
||||
9 Power_On_Hours 048 048 000 Old_age 45580
|
||||
10 Spin_Retry_Count 100 100 097 Pre-fail 0
|
||||
12 Power_Cycle_Count 100 100 020 Old_age 309
|
||||
183 Runtime_Bad_Block 100 100 000 Old_age 0
|
||||
184 End-to-End_Error 100 100 099 Old_age 0
|
||||
187 Reported_Uncorrect 100 100 000 Old_age 0
|
||||
188 Command_Timeout 100 099 000 Old_age 8 8 14
|
||||
189 High_Fly_Writes 098 098 000 Old_age 2
|
||||
190 Airflow_Temperature_Cel 070 050 045 Old_age 30 (Min/Max 29/30)
|
||||
191 G-Sense_Error_Rate 100 100 000 Old_age 0
|
||||
192 Power-Off_Retract_Count 100 100 000 Old_age 0
|
||||
193 Load_Cycle_Count 008 008 000 Old_age 184891
|
||||
194 Temperature_Celsius 030 050 000 Old_age 30
|
||||
195 Hardware_ECC_Recovered 116 100 000 Old_age 108217848
|
||||
197 Current_Pending_Sector 100 091 000 Old_age 0
|
||||
198 Offline_Uncorrectable 100 091 000 Old_age 0
|
||||
199 UDMA_CRC_Error_Count 200 200 000 Old_age 0
|
||||
240 Head_Flying_Hours 100 253 000 Old_age 11604h+15m+50.842s
|
||||
241 Total_LBAs_Written 100 253 000 Old_age 72962800596
|
||||
242 Total_LBAs_Read 100 253 000 Old_age 167268621195
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## How this audit was collected
|
||||
|
||||
On PVENAS as root:
|
||||
|
||||
```bash
|
||||
zpool status NAS.SP00
|
||||
lsblk -d -o NAME,SIZE,MODEL,SERIAL,ROTA,STATE /dev/sd{a,b,c,d}
|
||||
for d in sda sdb sdc sdd; do smartctl -i -H -A /dev/$d; done
|
||||
```
|
||||
|
||||
Audit timestamp (host local): Thu May 21 22:13:58 2026 EDT.
|
||||
|
||||
---
|
||||
|
||||
## Next steps
|
||||
|
||||
1. Replace **W4J0L3PY** with a 5 TB+ NAS-class HDD (match ST5000DM000-1FK178 or better).
|
||||
2. `zpool replace NAS.SP00` with the new disk by-id.
|
||||
3. Monitor resilver; run `zpool scrub NAS.SP00` after pool is **ONLINE**.
|
||||
4. Re-run SMART audit after replacement for a clean baseline.
|
||||
441
docs/guides/security-audit-report.md
Normal file
441
docs/guides/security-audit-report.md
Normal file
@ -0,0 +1,441 @@
|
||||
# Security Audit Report
|
||||
|
||||
**Last audit:** 2026-05-23 (re-run after SSH keys + `make maintenance`)
|
||||
**Previous audit:** 2026-05-20
|
||||
**Auditor:** `scripts/security-audit-*.sh`, Ansible `maintenance` + `maintenance_cron` roles
|
||||
**Repo baseline** (`roles/ssh/defaults/main.yml`): `PermitRootLogin prohibit-password`, `PasswordAuthentication no`, UFW enabled.
|
||||
|
||||
---
|
||||
|
||||
## 2026-05-23 — Actions completed
|
||||
|
||||
| Action | Status |
|
||||
|--------|--------|
|
||||
| SSH keys → caseware, auto, cal, vikunja, mailcow, listmonk | ✅ All six reachable as `root` |
|
||||
| SSH keys → mailcow/listmonk VMs | ✅ Via brief VM shutdown + disk inject on pve201 (no guest agent) |
|
||||
| Inventory rename `vikanjans` → `vikunja` | ✅ `hosts` + `proxmox_vmid=301` |
|
||||
| `apt upgrade` fleet (skip reboot) | ✅ 14 hosts via Ansible; auto via `pct exec` on pve10 |
|
||||
| Tier 1 cron (journal + apt) | ✅ `roles/maintenance_cron` on PVE, sites, comms, ansible, hermes, etc. |
|
||||
| Tier 2 cron (docker prune) | ✅ identity, monitoring, vikunja; git-ci-01 keeps `docker-prune-ci` |
|
||||
| VM 104 (GPU-Dev) RAM 72→64 GiB | ✅ pve201; host free RAM ~1.7→10 GiB |
|
||||
| Fix broken `host_vars` (ansibleVM, listmonk) | ✅ Plain YAML; old blobs → `*.vault-bak` |
|
||||
| Vault `vault_*_become_password` + maintenance vaultwardenVM | ✅ 2026-05-23 |
|
||||
| caddy root SSH + maintenance | ✅ `bootstrap-root-ssh-caddy`; inventory `ansible_user=root` |
|
||||
| ansibleVM maintenance | ✅ become password in vault |
|
||||
|
||||
### Post-maintenance SSH reachability
|
||||
|
||||
| Host | SSH | Notes |
|
||||
|------|-----|-------|
|
||||
| caseware | ✅ | |
|
||||
| auto | ✅ | Was slow from laptop earlier; OK after upgrade |
|
||||
| cal | ✅ | |
|
||||
| vikunja | ✅ | LXC 301 @ 10.0.10.159 |
|
||||
| mailcow | ✅ | ~1 min downtime for key inject |
|
||||
| listmonk | ✅ | ~1 min downtime for key inject |
|
||||
|
||||
### Maintenance playbook recap (`skip_reboot=true`)
|
||||
|
||||
| Host | Result |
|
||||
|------|--------|
|
||||
| pve201, pve10, caseware, cal, vikunja, mailcow, listmonk, identity, monitoring, hermes, levkin, portfolio, git-ci-01, sonarqube-01 | ✅ upgraded |
|
||||
| caddy | ✅ (as `root`; no `sudo` package on host) |
|
||||
| ansibleVM | ✅ (`vault_ansiblevm_become_password`) |
|
||||
| vaultwardenVM | ✅ (`vault_vaultwarden_become_password`) |
|
||||
|
||||
### Open security gaps (unchanged until `make security`)
|
||||
|
||||
| Control | Fleet status | Risk if fixed wrong |
|
||||
|---------|--------------|---------------------|
|
||||
| `PasswordAuthentication yes` | Most LXCs + both PVE | **Low break risk** if SSH keys tested first in a second session |
|
||||
| `PermitRootLogin yes` | pve201, pve10, sonarqube-01 | Same — use `prohibit-password`, not `no`, if you need root+key |
|
||||
| fail2ban | Off everywhere | Enabling is safe; may lock you out only if you brute-force yourself |
|
||||
| UFW | Off (except one dev LXC) | **Medium risk** — wrong rules drop SSH/80/443; apply via Ansible `roles/ssh` after allowlist |
|
||||
| unattended-upgrades | hermes, ansibleVM only | Safe; schedule reboots separately |
|
||||
| Proxmox :8006 | Open on LAN | Restrict in PVE firewall — **won't break VMs** |
|
||||
| Docker on `0.0.0.0` | identity, monitoring, vaultwarden, qBit | Bind to `127.0.0.1` — **can break access** if Caddy route missing; test URL after |
|
||||
| Tailscale | **Deferred** | Off by choice; remote access via **UniFi VPN** to LAN |
|
||||
|
||||
See [Risk explanations (2026-05-23)](#risk-explanations-2026-05-23) and [fail2ban vs password SSH](#fail2ban-vs-password-ssh) below.
|
||||
|
||||
---
|
||||
|
||||
## GPU-Dev (pve201 VM 104) — Ollama / LLMs
|
||||
|
||||
| Resource | Current |
|
||||
|----------|---------|
|
||||
| Host | pve201, VMID **104**, `GPU-Dev-Debian` |
|
||||
| LAN IP | **10.0.10.122** (inventory `devGPU` @ 10.0.30.63 is a different network — use `.122` from LAN) |
|
||||
| RAM | **64 GiB** guest (~60 GiB available when idle) |
|
||||
| GPU | **RTX 4080 16 GiB** (PCI passthrough `hostpci0`) |
|
||||
| Workload | **Ollama** already running (~3.6 GiB VRAM in sample) |
|
||||
|
||||
### Getting the most from RAM + GPU
|
||||
|
||||
1. **Right-size models to VRAM** — On a 16 GiB 4080, prefer quantised models that fit entirely in VRAM (e.g. 7B–14B Q4/Q5, or 32B Q2/Q3 if you accept quality trade-offs). If a model spills to CPU RAM, throughput drops sharply.
|
||||
2. **One heavy model at a time** — Ollama loads models on demand; set `OLLAMA_MAX_LOADED_MODELS=1` (or keep only one client) so you do not fragment 64 GiB RAM + 16 GiB VRAM across several large weights.
|
||||
3. **Parallel requests** — `OLLAMA_NUM_PARALLEL` defaults are conservative; raise only if VRAM headroom exists (watch `nvidia-smi` while under load).
|
||||
4. **Keep guest RAM for KV cache** — With 64 GiB you can run larger context windows; set `OLLAMA_CONTEXT_LENGTH` / model `num_ctx` to what you need, not maximum “just because”.
|
||||
5. **CPU offload only when needed** — `num_gpu` layers = all layers for speed; partial offload is for models that do not fit in VRAM, not for tuning.
|
||||
6. **Disk** — Store models on fast local disk (not NFS); `ollama pull` once, prune old tags periodically (`ollama list` / remove unused).
|
||||
7. **Proxmox** — Do not balloon GPU VM RAM; GPU passthrough already reserves most of the 64 GiB. Freeing pve201 meant lowering this VM from 72→64 GiB, not overcommitting other guests on 201.
|
||||
8. **Optional** — [Open WebUI](https://github.com/open-webui/open-webui) on localhost + Caddy TLS; bind Ollama to `127.0.0.1:11434` only (LAN via VPN).
|
||||
|
||||
**Not in Ansible yet:** add `devGPU` / `10.0.10.122` to inventory when you want playbooks (cron, hardening) on this box.
|
||||
|
||||
---
|
||||
|
||||
## fail2ban vs password SSH
|
||||
|
||||
**What fail2ban does:** After too many failed SSH logins from an IP, it adds a **temporary firewall ban** for that IP (typically 10–60 minutes). It does **not** disable password authentication globally.
|
||||
|
||||
**Can passwords stay on if fail2ban is on?** Technically yes — fail2ban only rate-limits brute force; passwords are still weaker than keys. Best practice on servers: **keys + `PasswordAuthentication no` + fail2ban** (defence in depth).
|
||||
|
||||
**Your Proxmox console fallback:** If you lock yourself out of SSH on a guest, you can still use **Proxmox → VM → Console** or `pct enter` / `qm guest exec` from pve201/pve10. That is a good break-glass path, but it is **not** a substitute for keys on hosts you manage daily — console is slow and easy to misconfigure under pressure.
|
||||
|
||||
**Recommendation:** Enable fail2ban via `make security` with `ignoreip` including `10.0.10.0/24` and your UniFi VPN client subnet. Then disable password SSH once keys work everywhere you care about.
|
||||
|
||||
---
|
||||
|
||||
## Risk explanations (2026-05-23)
|
||||
|
||||
### Password SSH (`PasswordAuthentication yes`)
|
||||
|
||||
**How bad:** High on internet-facing IPs; medium on `10.0.10.0/24` only. Anyone who can reach :22 can try passwords indefinitely (no fail2ban).
|
||||
|
||||
**Will fixing break things?** No, if you (1) confirm key login works, (2) set `PasswordAuthentication no`, (3) keep a second SSH session open, (4) reload sshd. Breakage happens only if keys are missing/wrong.
|
||||
|
||||
### Root login (`PermitRootLogin yes` on hypervisors)
|
||||
|
||||
**How bad:** High — root + password on PVE is full cluster compromise.
|
||||
|
||||
**Will fixing break things?** Use `prohibit-password` (keys only), not `no`, unless you have another admin user with sudo. Ansible playbooks expect root on PVE today.
|
||||
|
||||
### fail2ban off
|
||||
|
||||
**How bad:** Medium — relies on LAN trust; SSH noise from scanners still fills logs.
|
||||
|
||||
**Will fixing break things?** Rarely. Tune `ignoreip` to your admin IP/subnet so your own typos don't ban you.
|
||||
|
||||
### UFW off
|
||||
|
||||
**How bad:** Medium on segmented LAN; high if any host has a public IP.
|
||||
|
||||
**Will fixing break things?** **Yes, if misconfigured** — default deny without allowing 22 from admin IP, 80/443 from Caddy, or Docker-published ports you still need. Use Ansible `roles/ssh` (UFW after SSH rules) and test.
|
||||
|
||||
### unattended-upgrades off
|
||||
|
||||
**How bad:** Medium — security patches lag until manual maintenance.
|
||||
|
||||
**Will fixing break things?** Usually no. Kernel updates may require reboot; use `Unattended-Upgrade::Automatic-Reboot "false"` until you want reboot windows.
|
||||
|
||||
### Proxmox UI :8006 exposed
|
||||
|
||||
**How bad:** **Critical** on untrusted networks — API gives VM/storage control.
|
||||
|
||||
**Will fixing break things?** Restricting to `10.0.10.0/24` does not break normal LAN admin access.
|
||||
|
||||
### HTTP services on all interfaces (8080, 3000, …)
|
||||
|
||||
**How bad:** High without TLS/auth at the edge; medium behind Caddy + LAN only.
|
||||
|
||||
**Will fixing break things?** **Yes** if you bind to `127.0.0.1` before Caddy `reverse_proxy` is updated. Order: Caddy route → test → then bind Docker to localhost.
|
||||
|
||||
### Remote access (Tailscale deferred)
|
||||
|
||||
**Decision:** Tailscale off; use **UniFi site-to-site / VPN** into `10.0.10.0/24` for admin and Ollama/GPU access.
|
||||
|
||||
**Security:** Ensure VPN is required for SSH and Proxmox :8006 from outside; do not port-forward :22/:8006 on the router without IP allowlists.
|
||||
|
||||
### pve201 RAM (was 97% used)
|
||||
|
||||
**How bad:** **Critical** — OOM kills guests, swap thrashing.
|
||||
|
||||
**Mitigation done:** VM 104 reduced 73728→65536 MiB (~8 GiB freed on hypervisor). Still tight; consider moving git-ci-01 or other workloads to pve10.
|
||||
|
||||
---
|
||||
|
||||
## 2026-05-20 — Original audit
|
||||
|
||||
**Scope:** Proxmox nodes `pve201` (10.0.10.201) and `pve10` (10.0.10.10), all LXCs via `pct exec`, SSH deep-dive on hypervisors.
|
||||
|
||||
---
|
||||
|
||||
## Executive summary
|
||||
|
||||
| Area | Critical | High | Medium |
|
||||
|------|----------|------|--------|
|
||||
| Hypervisors (201, 10) | 2 | 4 | 2 |
|
||||
| LXCs on 201 (10 running) | 0 | 10 | 8 |
|
||||
| LXCs on 10 (3 running) | 0 | 3 | 3 |
|
||||
|
||||
**Top priorities**
|
||||
|
||||
1. Harden **SSH on both Proxmox hosts** (root + passwords currently allowed).
|
||||
2. Restrict **Proxmox API/UI port 8006** to admin IPs.
|
||||
3. Disable **password SSH on all LXCs**; deploy keys + `make copy-ssh-keys` for inventory IPs.
|
||||
4. Patch hosts with **40–105** pending apt upgrades (hypervisors worst).
|
||||
5. Put **HTTP services** (8080, 8000, qBit, etc.) behind reverse proxy + TLS or bind to internal IPs.
|
||||
|
||||
---
|
||||
|
||||
## Proxmox hypervisors
|
||||
|
||||
### pve201 — 10.0.10.201 (`pve`)
|
||||
|
||||
| Resource | Status |
|
||||
|----------|--------|
|
||||
| OS | Debian 12, PVE 8.4.16, kernel 6.8.12-18-pve |
|
||||
| RAM free | ~2.5 GB / 126 GB (**critical**) |
|
||||
| Pending apt | **105** |
|
||||
| UFW / fail2ban / unattended-upgrades | **None** |
|
||||
|
||||
#### SSH audit (dedicated)
|
||||
|
||||
| Setting | Current | Target |
|
||||
|---------|---------|--------|
|
||||
| `permitrootlogin` | **yes** | `prohibit-password` |
|
||||
| `passwordauthentication` | **yes** | `no` |
|
||||
| `pubkeyauthentication` | yes | yes |
|
||||
| `maxauthtries` | 6 | 3–4 |
|
||||
| `x11forwarding` | yes | no (on servers) |
|
||||
| Root keys | 3 keys in `authorized_keys` | audit/remove unused |
|
||||
|
||||
#### Exposed services
|
||||
|
||||
| Port | Service | Risk |
|
||||
|------|---------|------|
|
||||
| 22 | SSH | Brute-force (no fail2ban) |
|
||||
| 8006 | Proxmox API/UI | **Critical** — full cluster control |
|
||||
| 3128 | spiceproxy | Medium |
|
||||
| 111 | rpcbind | Low — reduce exposure |
|
||||
|
||||
#### Fixes (pve201)
|
||||
|
||||
```bash
|
||||
# 1) SSH — prefer Ansible after limiting to your IP
|
||||
make copy-ssh-key HOST=pve201 # if needed
|
||||
# Manual quick fix on host:
|
||||
sed -i 's/^#*PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
|
||||
sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
|
||||
sshd -t && systemctl reload sshd
|
||||
|
||||
# 2) Proxmox firewall — Datacenter → Firewall → restrict 8006 to 10.0.10.0/24 or admin IP
|
||||
# Or iptables on host for port 8006
|
||||
|
||||
# 3) fail2ban
|
||||
apt install fail2ban -y
|
||||
systemctl enable --now fail2ban
|
||||
|
||||
# 4) Auto security updates
|
||||
apt install unattended-upgrades apt-listchanges -y
|
||||
dpkg-reconfigure -plow unattended-upgrades
|
||||
|
||||
# 5) Patch
|
||||
apt update && apt upgrade -y
|
||||
```
|
||||
|
||||
**Ansible (when ready):** add `pve201` / `pve10` to a `proxmox` group play with `roles/ssh` + `roles/monitoring_server` (fail2ban).
|
||||
Do **not** lock yourself out — test with second session first.
|
||||
|
||||
---
|
||||
|
||||
### pve10 — 10.0.10.10 (`PVENAS`)
|
||||
|
||||
| Resource | Status |
|
||||
|----------|--------|
|
||||
| OS | Debian 13 (trixie), PVE, kernel 6.17.13-3-pve |
|
||||
| Load | **~30** on 24 CPUs (overloaded) |
|
||||
| Pending apt | **92** |
|
||||
| UFW / fail2ban / unattended-upgrades | **None** |
|
||||
| ZFS `NAS.SP00` | **inactive** (I/O suspended) |
|
||||
| PBS `PVEBUVD00` → 10.0.10.200:8007 | **unreachable** |
|
||||
|
||||
#### SSH audit (dedicated)
|
||||
|
||||
Same as pve201: `permitrootlogin yes`, `passwordauthentication yes`, 3 root authorized_keys.
|
||||
|
||||
#### Exposed services
|
||||
|
||||
| Port | Service | Risk |
|
||||
|------|---------|------|
|
||||
| 22 | SSH | High |
|
||||
| 8006 | Proxmox API/UI | **Critical** |
|
||||
| 2049, mountd, statd | NFS/RPC | High on LAN |
|
||||
| 3128 | spiceproxy | Medium |
|
||||
|
||||
#### Fixes (pve10)
|
||||
|
||||
Same SSH / fail2ban / unattended-upgrades / patch steps as pve201.
|
||||
|
||||
Additional:
|
||||
|
||||
```bash
|
||||
# Investigate ZFS pool
|
||||
zpool status NAS.SP00
|
||||
# Fix PBS connectivity or remove stale datastore from Proxmox UI
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## LXCs on pve201 (via `pct exec`)
|
||||
|
||||
| VMID | Name | IP | Status | SSH root | Password auth | UFW | fail2ban | Upgrades | Public services |
|
||||
|------|------|-----|--------|----------|---------------|-----|----------|----------|-----------------|
|
||||
| 301 | vikunja-debian | 10.0.10.159 | running | without-password | **yes** | no | no | 0 | **3456**, 22 |
|
||||
| 302 | qbit-debian | 10.0.10.91 | running | without-password | **yes** | no | no | 0 | **8080** (qBit), 22 |
|
||||
| 303 | searchXNG-debian | 10.0.10.70 | running | without-password | **yes** | no | no | **83** | **8080**, 22 |
|
||||
| 304 | wireguard-debian | 10.0.10.192 | running | without-password | **yes** | no | no | 0 | 22 |
|
||||
| 305 | kuma-debian | 10.0.10.197 | **stopped** | — | — | — | — | — | replaced by LXC 218 |
|
||||
| 306 | portfolio | — | **destroyed** | — | — | — | — | — | migrated → pve10 LXC **219** @ `10.0.10.106` (purged 2026-05-22) |
|
||||
| 307 | jobber-delian | 10.0.10.178 | running | without-password | **yes** | no | no | **83** | **3005**, 22 |
|
||||
| 308 | stirling-pdf | 10.0.10.43 | running | without-password | **yes** | no | no | 0 | **8080**, 22 |
|
||||
| 9001 | pote-dev | 10.0.10.114 | **stopped** | — | — | — | — | — | — |
|
||||
| 9101 | punimTagFE-dev | 10.0.10.121 | running | without-password | **yes** | **active** | no | **89** | **8000**, 111, 22 |
|
||||
| 9401 | mirrormatch-dev | 10.0.10.141 | **stopped** | — | — | — | — | — | — |
|
||||
|
||||
**Inventory mapping:** `vikunja` → 159 (LXC 301), `qBittorrent` → 91, `punimTag` app → 121.
|
||||
|
||||
### Common LXC issues (pve201)
|
||||
|
||||
| Issue | Severity | Fix |
|
||||
|-------|----------|-----|
|
||||
| `passwordauthentication yes` on all LXCs | High | Set `PasswordAuthentication no` in `/etc/ssh/sshd_config`, reload sshd |
|
||||
| No fail2ban | High | Install fail2ban or rely on Proxmox FW + LAN segmentation |
|
||||
| Apps on `0.0.0.0:8080` / 8000 / 3456 | High | Bind to localhost + Caddy, or restrict via Proxmox guest firewall (`firewall=1` on net0 — enable rules) |
|
||||
| 79–89 pending upgrades on several CTs | Medium | `pct exec <id> -- apt update && apt upgrade -y` |
|
||||
| Stopped dev CTs (9001, 9401) | Low | Start when needed or keep stopped to reduce attack surface |
|
||||
|
||||
### Per-LXC fixes (pve201)
|
||||
|
||||
```bash
|
||||
# Example: harden + patch vikunja (301) from Proxmox host
|
||||
pct exec 301 -- sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
|
||||
pct exec 301 -- systemctl reload ssh
|
||||
|
||||
# Patch container
|
||||
pct exec 303 -- bash -c 'apt update && apt upgrade -y'
|
||||
|
||||
# Copy your SSH key (from Mac, once password/key works)
|
||||
make copy-ssh-key HOST=vikunja # 10.0.10.159
|
||||
make copy-ssh-key HOST=qBittorrent # 10.0.10.91
|
||||
```
|
||||
|
||||
**punimTagFE-dev (9101):** Only LXC with **UFW active** — extend rules to deny inbound except 22 from admin subnet; still disable password auth.
|
||||
|
||||
---
|
||||
|
||||
## LXCs on pve10 (via `pct exec`)
|
||||
|
||||
| VMID | Name | IP | Status | SSH root | Password auth | UFW | fail2ban | Upgrades | Public services |
|
||||
|------|------|-----|--------|----------|---------------|-----|----------|----------|-----------------|
|
||||
| 210 | cal | 10.0.10.228 | running | without-password | **yes** | no | no | 0 | **3000**, 22 |
|
||||
| 215 | caseware | 10.0.10.105 | running | without-password | **yes** | no | no | **40** | **80** (nginx), 22 |
|
||||
| 216 | auto | 10.0.10.59 | running | without-password | **yes** | no | no | **40** | **80** (nginx), 22 |
|
||||
|
||||
**Inventory mapping:** `caseware` → 105, `auto` → 59.
|
||||
|
||||
### Fixes (pve10 LXCs)
|
||||
|
||||
```bash
|
||||
# SSH harden caseware (215)
|
||||
pct exec 215 -- sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
|
||||
pct exec 215 -- systemctl reload sshd
|
||||
|
||||
# Patch
|
||||
pct exec 215 -- apt update && apt upgrade -y
|
||||
pct exec 216 -- apt update && apt upgrade -y
|
||||
|
||||
# Deploy keys from Mac
|
||||
make copy-ssh-key HOST=caseware
|
||||
make copy-ssh-key HOST=auto
|
||||
```
|
||||
|
||||
**HTTP port 80 on caseware/auto:** Ensure TLS termination on Caddy (inventory host `caddy` 10.0.10.50) and no plain HTTP from WAN if exposed.
|
||||
|
||||
---
|
||||
|
||||
## SSH hardening checklist (all Linux targets)
|
||||
|
||||
Use this order to avoid lockout:
|
||||
|
||||
1. Confirm your key works: `ssh -o BatchMode=yes root@<ip> true`
|
||||
2. Set `PasswordAuthentication no`
|
||||
3. Set `PermitRootLogin prohibit-password` (LXCs already `without-password` — equivalent for keys-only)
|
||||
4. `sshd -t && systemctl reload sshd`
|
||||
5. Open **second terminal** and test before closing first
|
||||
6. Optional: change SSH port, `MaxAuthTries 4`, disable `X11Forwarding`
|
||||
|
||||
**Ansible alignment:**
|
||||
|
||||
```bash
|
||||
# After keys on host
|
||||
make dev HOST=<hostname> --tags security
|
||||
# or role ssh via playbooks that include roles/ssh
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Re-run audits
|
||||
|
||||
```bash
|
||||
# Hypervisor full audit
|
||||
ssh root@10.0.10.201 'bash -s' < scripts/security-audit-remote.sh
|
||||
ssh root@10.0.10.10 'bash -s' < scripts/security-audit-remote.sh
|
||||
|
||||
# Hypervisor SSH-only
|
||||
ssh root@10.0.10.201 'bash -s' < scripts/security-audit-ssh.sh
|
||||
|
||||
# All LXCs on a node
|
||||
ssh root@10.0.10.201 'bash -s' < scripts/security-audit-lxc-via-pve.sh
|
||||
ssh root@10.0.10.10 'bash -s' < scripts/security-audit-lxc-via-pve.sh
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Tracking
|
||||
|
||||
| Item | Owner | Status |
|
||||
|------|-------|--------|
|
||||
| SSH keys caseware, auto, cal, vikunja, mailcow, listmonk | 2026-05-23 | ☑ |
|
||||
| Fleet `apt upgrade` (no reboot) | 2026-05-23 | ☑ all previously failed hosts fixed |
|
||||
| Tier 1 cron (journal + apt) | 2026-05-23 | ☑ PVE + most hosts via Ansible |
|
||||
| Tier 2 cron (docker prune) | 2026-05-23 | ☑ identity, monitoring, vikunja, git-ci-01 |
|
||||
| VM 104 RAM 72→64 GiB | 2026-05-23 | ☑ |
|
||||
| Inventory `vikunja` rename | 2026-05-23 | ☑ |
|
||||
| Fix `host_vars` ansibleVM / listmonk merge | 2026-05-23 | ☑ plain YAML (review `*.vault-bak`) |
|
||||
| SSH harden pve201 | | ☐ |
|
||||
| SSH harden pve10 | | ☐ |
|
||||
| Restrict 8006 on both nodes | | ☐ |
|
||||
| fail2ban on hypervisors | | ☐ |
|
||||
| `make security` on production groups | | ☐ |
|
||||
| Disable password SSH on all LXCs | | ☐ |
|
||||
| `copy-ssh-keys` remaining inventory | | ☐ partial |
|
||||
| TLS / localhost bind for :8080 services | | ☐ |
|
||||
| unattended-upgrades all production | | ☐ |
|
||||
| Tailscale re-auth | | ⏸ deferred (UniFi VPN) |
|
||||
| Fix ZFS NAS.SP00 on pve10 | | ☐ |
|
||||
| caddy Ansible as root | 2026-05-23 | ☑ |
|
||||
| vaultwardenVM / ansibleVM become in vault | 2026-05-23 | ☑ |
|
||||
| Add GPU-Dev `10.0.10.122` to inventory | | ☐ |
|
||||
| Ollama bind localhost + optional Open WebUI | | ☐ |
|
||||
|
||||
---
|
||||
|
||||
## Next steps (priority)
|
||||
|
||||
1. **`make security`** on one site host (e.g. caseware) with a second SSH session open — disable password SSH, enable UFW + fail2ban (`ignoreip` = LAN + VPN pool).
|
||||
2. **Restrict Proxmox :8006** to `10.0.10.0/24` + VPN subnet on pve201 and pve10.
|
||||
3. **Bind internal Docker ports** on identity / monitoring / vaultwarden to `127.0.0.1` after confirming Caddy routes.
|
||||
4. **GPU-Dev:** point clients at `http://10.0.10.122:11434` over VPN; tune Ollama env vars; add host to inventory when automating.
|
||||
5. **unattended-upgrades** on production LXCs (reboot policy manual).
|
||||
6. Review `host_vars/*.vault-bak` and merge any secrets still needed into vault + plain host_vars.
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- **[Security remediation plan](security-remediation-plan.md)** — phased fixes (critical → low) and login model
|
||||
- [Security hardening guide](security.md)
|
||||
- [SECURITY_HARDENING_PLAN.md](../SECURITY_HARDENING_PLAN.md)
|
||||
- Role defaults: `roles/ssh/defaults/main.yml`
|
||||
490
docs/guides/security-remediation-plan.md
Normal file
490
docs/guides/security-remediation-plan.md
Normal file
@ -0,0 +1,490 @@
|
||||
# Security Remediation Plan
|
||||
|
||||
**Based on:** [security-audit-report.md](security-audit-report.md) (last re-run **2026-05-23**)
|
||||
**Goal:** Align hosts with `roles/ssh` (keys only, no password SSH) without locking yourself out.
|
||||
|
||||
**Homelab rollout:** [levkin-selfhost-plan-2.md](levkin-selfhost-plan-2.md) — separate track; some overlap (SSH keys, patching).
|
||||
|
||||
---
|
||||
|
||||
## Progress summary (2026-05-23)
|
||||
|
||||
| Phase | Status | Notes |
|
||||
|-------|--------|--------|
|
||||
| **0 Backup + prep** | 🟡 Partial | Fleet SSH keys + apt done; formal PVE snapshot checklist not fully ticked |
|
||||
| **1 Critical** | 🟡 Partial | SSH keys on many hosts ✅; **Proxmox password SSH off** ⏳; **8006 restrict** ⏳; pve201 RAM improved (GPU 64G, DebianDesktop 24G pending reboot) |
|
||||
| **2 High** | 🟡 Partial | fail2ban / full LXC password-off / port binding — mostly ⏳ |
|
||||
| **3 Medium** | ⏳ | unattended-upgrades, `make security`, UFW |
|
||||
| **4 Low** | ⏳ | rpcbind, naming, audit Makefile |
|
||||
|
||||
### Completed since original audit (see [security-audit-report.md](security-audit-report.md))
|
||||
|
||||
- SSH keys → caseware, auto, cal, vikunja, mailcow, listmonk (root)
|
||||
- Fleet `apt upgrade` (14 hosts, no reboot)
|
||||
- Tier 1 cron (journal + apt) on PVE, sites, comms, etc.
|
||||
- Tier 2 docker prune on identity, monitoring, vikunja
|
||||
- GPU VM 104 RAM 72→64 GiB on pve201
|
||||
- Fixed `host_vars` ansibleVM / listmonk (plain YAML)
|
||||
|
||||
### Recommended order (security, alongside homelab P0)
|
||||
|
||||
1. **Phase 0** — PVE `sshd_config` backup + CT snapshots before any `PasswordAuthentication no`
|
||||
2. **Phase 1** — pve201 + pve10 SSH keys-only; restrict 8006; finish keys on caddy/ansibleVM/vaultwarden if still pending
|
||||
3. **Phase 2** — LXC password auth off, fail2ban, patch, reduce exposed app ports (qBit, searchXNG, punimTag)
|
||||
4. **Phase 3–4** — unattended-upgrades, Ansible security plays, Mac hardening
|
||||
|
||||
---
|
||||
|
||||
## How you should log in (not “ladmin → root” everywhere)
|
||||
|
||||
Your inventory uses **different users on purpose**. After hardening, the pattern is:
|
||||
|
||||
| Host type | Inventory user | How you work | Root access |
|
||||
|-----------|----------------|--------------|-------------|
|
||||
| **Proxmox** (`pve201`, `pve10`) | `root` | `ssh root@10.0.10.201` with **your SSH key** | Direct root (keys only, no password) |
|
||||
| **Dev / QA** (`dev01`, `git-ci-01`, …) | `ladmin` (or `beast`, `master`) | `ssh ladmin@host` with **key** | `sudo` for admin tasks; Ansible `become: true` |
|
||||
| **Services** (caddy, jellyfin, …) | often `root` | `ssh root@host` with **key** | Direct root (keys only) |
|
||||
| **Optional bootstrap** | — | `make bootstrap-root-ssh HOST=x` | One-time: key on `ladmin` → `su` to install **root** key → then harden SSH |
|
||||
|
||||
**You do not need** “SSH ladmin then su root” on Proxmox if you keep managing them as `root` in inventory — you need **root + SSH key + passwords disabled**.
|
||||
|
||||
**You do** use ladmin → sudo on dev/qa boxes where `ansible_user=ladmin`. That is normal: unprivileged (or sudo) login + elevation, not password guessing on root.
|
||||
|
||||
**`PermitRootLogin prohibit-password`** means: root may log in **only with a key**, never with a password. It does **not** mean “ban root; use ladmin only.”
|
||||
|
||||
**`PasswordAuthentication no`** means: **nobody** (root, ladmin, etc.) can SSH with a password — keys only.
|
||||
|
||||
---
|
||||
|
||||
## Phases overview
|
||||
|
||||
| Phase | When | Focus |
|
||||
|-------|------|--------|
|
||||
| **0 — Backup + prep** | Before any change | Snapshots, `sshd` copies, git commit, keys, second SSH session |
|
||||
| **1 — Critical** | Week 1 | Proxmox SSH + 8006, keys everywhere, RAM on 201 |
|
||||
| **2 — High** | Week 1–2 | LXCs SSH, fail2ban, patching, app ports |
|
||||
| **3 — Medium** | Week 2–4 | unattended-upgrades, Ansible `make security`, TLS |
|
||||
| **4 — Low** | Ongoing | rpcbind, naming, stopped CTs, Mac, docs |
|
||||
|
||||
---
|
||||
|
||||
## Phase 0 — Backup (before any hardening)
|
||||
|
||||
**Yes — back up first.** SSH and firewall mistakes can lock you out; patches can break services. Use the right backup type per layer.
|
||||
|
||||
### What to back up (by layer)
|
||||
|
||||
| Layer | What | Method | Rollback if SSH breaks |
|
||||
|-------|------|--------|-------------------------|
|
||||
| **Your Mac** | Ansible repo + `~/.ansible-vault-pass` (secure copy) + SSH keys | Time Machine / git commit / copy `~/.ssh` | N/A |
|
||||
| **Proxmox hosts** | `/etc/ssh/sshd_config`, `/etc/pve/`, firewall rules | Copy files + **Proxmox snapshot** optional | **Console** in web UI (`pct enter` / VM console) |
|
||||
| **Each LXC/VM** | Full guest state | **Proxmox snapshot** or `vzdump` | Restore snapshot or rollback CT |
|
||||
| **Dev workstations** | OS + home (if Timeshift installed) | `make timeshift-snapshot HOST=dev02` | `make timeshift-restore` |
|
||||
| **Central PBS** | — | **Not reliable today** — `10.0.10.200` unreachable | Fix PBS later; don’t depend on it for this work |
|
||||
|
||||
### 0A — Mac / repo (5 minutes)
|
||||
|
||||
```bash
|
||||
cd ~/Documents/code/ansible
|
||||
git status
|
||||
git add -A && git commit -m "Pre-security-hardening baseline" # if you want a restore point
|
||||
|
||||
# Store vault passphrase somewhere safe (password manager), NOT only on disk
|
||||
# Optional: encrypted copy of ~/.ansible-vault-pass offline
|
||||
```
|
||||
|
||||
### 0B — Proxmox: config files (both nodes)
|
||||
|
||||
```bash
|
||||
for pve in 10.0.10.201 10.0.10.10; do
|
||||
ssh root@$pve "mkdir -p /root/pre-hardening-$(date +%Y%m%d) && \
|
||||
cp -a /etc/ssh/sshd_config /root/pre-hardening-$(date +%Y%m%d)/ && \
|
||||
cp -a /etc/pve /root/pre-hardening-$(date +%Y%m%d)/pve-etc 2>/dev/null; \
|
||||
ls -la /root/pre-hardening-$(date +%Y%m%d)/"
|
||||
done
|
||||
```
|
||||
|
||||
### 0C — Proxmox: snapshots (recommended before SSH/firewall on PVE)
|
||||
|
||||
**Running LXCs on pve201** (from audit): 301–308, 9101 — snapshot each before `pct exec` SSH changes.
|
||||
|
||||
**Running LXCs on pve10:** 210, 215, 216.
|
||||
|
||||
```bash
|
||||
# On pve201 — snapshot (fast, local-lvm; needs free space)
|
||||
ssh root@10.0.10.201 'for id in 301 302 303 304 305 306 307 308 9101; do
|
||||
name=$(pct list | awk -v i=$id "$1==i {print \$4}")
|
||||
echo "Snapshot vmid=$id ($name)"
|
||||
pct snapshot $id pre-ssh-hardening-$(date +%Y%m%d) || echo "FAILED $id"
|
||||
done'
|
||||
|
||||
# On pve10
|
||||
ssh root@10.0.10.10 'for id in 210 215 216; do
|
||||
pct snapshot $id pre-ssh-hardening-$(date +%Y%m%d) || echo "FAILED $id"
|
||||
done'
|
||||
```
|
||||
|
||||
**Optional full backup** (slower, larger) — important CTs only if snapshots fail (low disk on 201):
|
||||
|
||||
```bash
|
||||
vzdump <vmid> --storage local --mode snapshot --compress zstd
|
||||
```
|
||||
|
||||
**Check space on pve201 first** (~2.5 GB RAM + disk — snapshot needs free space on `local-lvm`):
|
||||
|
||||
```bash
|
||||
ssh root@10.0.10.201 'pvesm status; free -h'
|
||||
```
|
||||
|
||||
If snapshots fail for lack of space: do **0B only** on PVE, then harden SSH using **Proxmox console** as safety net (no snapshot).
|
||||
|
||||
### 0D — Inventory VMs with Timeshift (`dev` group)
|
||||
|
||||
Only where Timeshift is already installed (e.g. `dev02`):
|
||||
|
||||
```bash
|
||||
make timeshift-snapshot HOST=dev02
|
||||
make timeshift-list HOST=dev02
|
||||
```
|
||||
|
||||
Not used on Proxmox or most LXCs by default.
|
||||
|
||||
### 0E — Export current SSH settings (audit trail)
|
||||
|
||||
```bash
|
||||
mkdir -p ~/security-hardening-backup-$(date +%Y%m%d)
|
||||
ssh root@10.0.10.201 'bash -s' < scripts/security-audit-ssh.sh > ~/security-hardening-backup-$(date +%Y%m%d)/pve201-ssh.txt
|
||||
ssh root@10.0.10.10 'bash -s' < scripts/security-audit-ssh.sh > ~/security-hardening-backup-$(date +%Y%m%d)/pve10-ssh.txt
|
||||
ssh root@10.0.10.201 'bash -s' < scripts/security-audit-lxc-via-pve.sh > ~/security-hardening-backup-$(date +%Y%m%d)/pve201-lxc.txt
|
||||
```
|
||||
|
||||
### Backup exit criteria (do not skip)
|
||||
|
||||
- [ ] Git commit (or branch) for ansible repo
|
||||
- [ ] `sshd_config` (+ optional `/etc/pve`) copied on **both** PVE nodes
|
||||
- [ ] Proxmox snapshots **or** documented reason skipped (disk/RAM)
|
||||
- [ ] Second SSH session tested to `pve201` / `pve10`
|
||||
- [ ] You know how to open **Proxmox → VM/CT → Console** if SSH fails
|
||||
|
||||
### Rollback quick reference
|
||||
|
||||
| Problem | Rollback |
|
||||
|---------|----------|
|
||||
| Bad `sshd_config` on PVE | Console → restore `/root/pre-hardening-*/sshd_config` → `systemctl reload sshd` |
|
||||
| Bad LXC SSH | `pct rollback <vmid> pre-ssh-hardening-YYYYMMDD` |
|
||||
| Bad patch on CT | Same snapshot rollback |
|
||||
| Locked out of LAN on 8006 | Console → disable/datacenter firewall rule |
|
||||
|
||||
---
|
||||
|
||||
## Phase 0 — Prep (after backups)
|
||||
|
||||
| # | Task | Command / notes |
|
||||
|---|------|----------------|
|
||||
| 0.1 | Confirm vault password file | `~/.ansible-vault-pass` |
|
||||
| 0.2 | Bootstrap control node | `make bootstrap` |
|
||||
| 0.3 | Verify key on Proxmox | `ssh -o BatchMode=yes root@10.0.10.201 true` |
|
||||
| 0.4 | Copy keys to inventory | `make copy-ssh-keys` (or per group) |
|
||||
| 0.5 | Document admin IP | e.g. `10.0.10.127` for firewall rules |
|
||||
| 0.6 | Open **second terminal** before changing `sshd` | Test login before closing first session |
|
||||
|
||||
**Exit criteria:** Backups done (above) + key login works to `pve201`, `pve10`, and hosts you will harden next.
|
||||
|
||||
---
|
||||
|
||||
## Phase 1 — Critical
|
||||
|
||||
### 1.1 Proxmox SSH (pve201 + pve10)
|
||||
|
||||
**Issue:** `PermitRootLogin yes` + `PasswordAuthentication yes` — password brute force on root.
|
||||
|
||||
**Fix (per host, after 0.3):**
|
||||
|
||||
```bash
|
||||
# On pve201 OR pve10 — keep existing session open!
|
||||
sed -i 's/^#*PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
|
||||
sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
|
||||
sshd -t && systemctl reload sshd
|
||||
```
|
||||
|
||||
**Verify (new terminal):** `ssh -o BatchMode=yes root@10.0.10.201 true`
|
||||
|
||||
**Ansible (later):** dedicated play for `[proxmox]` with `roles/ssh` (today `make security` only targets `dev` playbook).
|
||||
|
||||
| Host | Priority |
|
||||
|------|----------|
|
||||
| pve201 | P0 |
|
||||
| pve10 | P0 |
|
||||
|
||||
---
|
||||
|
||||
### 1.2 Restrict Proxmox UI/API (port 8006)
|
||||
|
||||
**Issue:** Anyone on LAN can hit full cluster API.
|
||||
|
||||
**Fix (choose one):**
|
||||
|
||||
- **A — Proxmox firewall (recommended):** Datacenter → Firewall → add rule: accept `8006` from `10.0.10.0/24` and/or your Mac IP; drop others.
|
||||
- **B — SSH tunnel only:** no LAN exposure; `ssh -L 8006:127.0.0.1:8006 root@10.0.10.201` → browser `https://127.0.0.1:8006`.
|
||||
|
||||
**Do not** block 8006 globally without A or B in place.
|
||||
|
||||
---
|
||||
|
||||
### 1.3 RAM on pve201 (~2.5 GB free)
|
||||
|
||||
**Issue:** New guests or updates risk OOM.
|
||||
|
||||
**Fix:**
|
||||
|
||||
```bash
|
||||
ssh root@10.0.10.201 'free -h; pct list'
|
||||
# Stop non-essential CTs/VMs or migrate workload to pve10
|
||||
```
|
||||
|
||||
Review running guests from `make proxmox-info ALL=true`; stop labs you do not need.
|
||||
|
||||
---
|
||||
|
||||
### 1.4 Deploy SSH keys to unreachable inventory hosts
|
||||
|
||||
**Issue:** Cannot audit or Ansible-manage hosts without keys.
|
||||
|
||||
**Order:**
|
||||
|
||||
1. `make copy-ssh-key HOST=caddy` (and each `[services]` host)
|
||||
2. `make bootstrap-root-ssh HOST=listmonk` where root password still works but key does not
|
||||
3. `make copy-ssh-keys GROUP=qa` for `ladmin` hosts
|
||||
|
||||
**Exit criteria:** `make ping` succeeds for each group you will harden in phase 2.
|
||||
|
||||
---
|
||||
|
||||
## Phase 2 — High
|
||||
|
||||
### 2.1 LXC SSH — disable password auth (all running CTs)
|
||||
|
||||
**Issue:** `passwordauthentication yes` on every audited LXC.
|
||||
|
||||
**Fix from Proxmox host (no Mac SSH to CT required):**
|
||||
|
||||
```bash
|
||||
# pve201 — example for each running VMID
|
||||
for id in 301 302 303 304 305 306 307 308 9101; do
|
||||
pct exec $id -- sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
|
||||
pct exec $id -- bash -c 'sshd -t && systemctl reload sshd' || pct exec $id -- systemctl reload ssh
|
||||
done
|
||||
|
||||
# pve10
|
||||
for id in 210 215 216; do
|
||||
pct exec $id -- sed -i 's/^#*PasswordAuthentication.*/PasswordAuthentication no/' /etc/ssh/sshd_config
|
||||
pct exec $id -- systemctl reload sshd
|
||||
done
|
||||
```
|
||||
|
||||
**Before disable:** install your key on CTs you need (`make copy-ssh-key HOST=vikanjans`, etc.).
|
||||
|
||||
**Note:** CTs already have `permitrootlogin without-password` — keep that; only turn off passwords.
|
||||
|
||||
---
|
||||
|
||||
### 2.2 fail2ban on hypervisors
|
||||
|
||||
**Issue:** No brute-force protection on SSH (and eventually 8006 if proxied).
|
||||
|
||||
```bash
|
||||
ssh root@10.0.10.201 'apt install -y fail2ban && systemctl enable --now fail2ban'
|
||||
ssh root@10.0.10.10 'apt install -y fail2ban && systemctl enable --now fail2ban'
|
||||
```
|
||||
|
||||
Optional: extend to high-value LXCs via `roles/monitoring_server` or manual install.
|
||||
|
||||
---
|
||||
|
||||
### 2.3 Patch backlog
|
||||
|
||||
| Target | Pending | Action |
|
||||
|--------|---------|--------|
|
||||
| pve201 | ~105 | `apt update && apt upgrade -y` (maintenance window) |
|
||||
| pve10 | ~92 | same |
|
||||
| LXCs 303, 306, 307, 9101 | 79–89 | `pct exec <id> -- apt update && apt upgrade -y` |
|
||||
| caseware, auto (pve10) | ~40 | same |
|
||||
|
||||
**Order:** hypervisors first (after snapshot), then LXCs one by one.
|
||||
|
||||
---
|
||||
|
||||
### 2.4 Application ports on `0.0.0.0`
|
||||
|
||||
**Issue:** HTTP services exposed on LAN without TLS/auth.
|
||||
|
||||
| LXC / host | Port | Fix |
|
||||
|------------|------|-----|
|
||||
| qbit (91) | 8080 | Prefer VPN; or Caddy + auth; bind to internal IP |
|
||||
| searchXNG (70) | 8080 | Same |
|
||||
| punimTagFE (121) | 8000 | Behind Caddy; firewall allow only 10.0.10.0/24 |
|
||||
| vaultwarden (142) | 8080 | Already in inventory — reverse proxy + TLS |
|
||||
| portfolio | **106:80** (pve10 LXC 219, nginx) | Migrated 2026-05-22; pve201 LXC **306 destroyed** |
|
||||
| vikunja (159) | 3456 | Proxy via Caddy (`todo.levkin.ca`) |
|
||||
|
||||
**Pattern:** App listens `127.0.0.1` only; **Caddy** (`10.0.10.50`) terminates TLS for public URLs in inventory.
|
||||
|
||||
---
|
||||
|
||||
### 2.5 pve10 infrastructure
|
||||
|
||||
| Issue | Fix |
|
||||
|-------|-----|
|
||||
| ZFS `NAS.SP00` suspended | `zpool status`; import/clear errors |
|
||||
| PBS 10.0.10.200 unreachable | Fix network/service or remove stale datastore |
|
||||
| Load ~30 | Identify heavy VMs; migrate or stop |
|
||||
|
||||
---
|
||||
|
||||
## Phase 3 — Medium
|
||||
|
||||
### 3.1 unattended-upgrades
|
||||
|
||||
Hypervisors + important LXCs:
|
||||
|
||||
```bash
|
||||
apt install -y unattended-upgrades apt-listchanges
|
||||
dpkg-reconfigure -plow unattended-upgrades
|
||||
```
|
||||
|
||||
### 3.2 Ansible security roles (by group)
|
||||
|
||||
Today `make security` runs `playbooks/development.yml` on **`dev` only**.
|
||||
|
||||
**Expand with new/changed playbooks:**
|
||||
|
||||
| Group | Playbook idea | Roles |
|
||||
|-------|---------------|-------|
|
||||
| `[proxmox]` | `playbooks/infrastructure/proxmox-hardening.yml` | `ssh`, monitoring_server |
|
||||
| `[services]` | extend `playbooks/servers.yml` | `ssh`, `base`, fail2ban |
|
||||
| `[qa]` | tag run on qa hosts | `ssh` |
|
||||
| LXCs | optional `pct` + Ansible over SSH after keys | `ssh` |
|
||||
|
||||
**Workflow:**
|
||||
|
||||
```bash
|
||||
make check HOST=pve201 # after proxmox play exists
|
||||
make dev HOST=dev01 --tags security
|
||||
```
|
||||
|
||||
### 3.3 UFW on LXCs
|
||||
|
||||
Only **punimTagFE-dev** has UFW today. Template for others:
|
||||
|
||||
- Allow 22 from `10.0.10.0/24`
|
||||
- Allow app port only if needed on LAN
|
||||
- Default deny incoming
|
||||
|
||||
Use `roles/ssh` UFW tasks or Proxmox guest firewall (`firewall=1` on `net0`).
|
||||
|
||||
### 3.4 Align names / inventory
|
||||
|
||||
| Proxmox name | Ansible | Action |
|
||||
|--------------|---------|--------|
|
||||
| punimTagFE-dev | punimTag-dev | Rename CT or update `app_projects` name |
|
||||
| vikunja-debian | vikanjans | OK (IP 159) |
|
||||
| qbit-debian | qBittorrent | OK (IP 91) |
|
||||
|
||||
### 3.5 Mac (control machine)
|
||||
|
||||
| Issue | Fix |
|
||||
|-------|-----|
|
||||
| Firewall off | System Settings → Firewall → On |
|
||||
| FileVault off | Enable FileVault |
|
||||
| Docker on `*:3000` | Bind to `127.0.0.1` unless LAN needed |
|
||||
|
||||
---
|
||||
|
||||
## Phase 4 — Low
|
||||
|
||||
| Item | Fix |
|
||||
|------|-----|
|
||||
| rpcbind (111) on pve201 / 9101 | Disable if unused: `systemctl disable rpcbind` |
|
||||
| X11Forwarding on Proxmox | Set `no` in sshd |
|
||||
| Stopped CTs 9001, 9401 | Leave stopped or destroy if unused |
|
||||
| `make security-audit` target | Add Makefile → runs audit scripts, appends to report |
|
||||
| Quarterly re-audit | Re-run `scripts/security-audit-lxc-via-pve.sh` |
|
||||
|
||||
---
|
||||
|
||||
## Suggested calendar
|
||||
|
||||
| Week | Critical | High | Medium |
|
||||
|------|----------|------|--------|
|
||||
| **1** | 0.x prep, 1.1 SSH both PVE, 1.2 firewall 8006, 1.4 keys | 2.1 LXC passwords off (after keys), 2.2 fail2ban | — |
|
||||
| **2** | 1.3 RAM 201 | 2.3 patch PVE + LXCs, 2.4 Caddy for 8080 services | 3.1 unattended-upgrades |
|
||||
| **3** | — | 2.5 pve10 ZFS/PBS/load | 3.2 Ansible plays for proxmox + services |
|
||||
| **4** | — | — | 3.3 UFW, 3.4 naming, 3.5 Mac |
|
||||
|
||||
---
|
||||
|
||||
## Rollback (if locked out of SSH)
|
||||
|
||||
- Proxmox: use **console** in web UI (or physical/IPMI) → edit `/etc/ssh/sshd_config` → `PasswordAuthentication yes` temporarily → reload sshd.
|
||||
- LXC: `pct enter <vmid>` from PVE host.
|
||||
|
||||
---
|
||||
|
||||
## Tracking checklist
|
||||
|
||||
Also tracked in [security-audit-report.md](security-audit-report.md) remediation table.
|
||||
|
||||
**Backup (Phase 0 — before everything)**
|
||||
|
||||
- [ ] Git commit / branch for ansible repo (pre-hardening baseline)
|
||||
- [ ] PVE `sshd_config` backup on 201 + 10
|
||||
- [ ] Proxmox CT snapshots (or vzdump) on critical LXCs
|
||||
- [ ] Audit outputs saved locally (`security-hardening-backup-*`)
|
||||
- [ ] Console access tested in Proxmox UI
|
||||
|
||||
### Critical
|
||||
|
||||
- [ ] pve201 SSH: `PermitRootLogin prohibit-password` + `PasswordAuthentication no`
|
||||
- [ ] pve10 SSH: same
|
||||
- [ ] 8006 restricted to admin subnet/IP
|
||||
- [x] SSH keys on most inventory hosts (2026-05-23 — see audit report)
|
||||
- [ ] SSH keys on **caddy**, **ansibleVM**, **vaultwardenVM** (if still pending)
|
||||
- [x] pve201 RAM partial relief — GPU 64 GiB; DebianDesktop 24 GiB (**reboot guest**)
|
||||
|
||||
### High
|
||||
|
||||
- [ ] All running LXCs: `PasswordAuthentication no` (after keys verified)
|
||||
- [ ] fail2ban on pve201 + pve10
|
||||
- [x] Patch fleet — `apt upgrade` 2026-05-23 (reboots still pending where required)
|
||||
- [ ] qBit / searchXNG / punimTag / vaultwarden port exposure reduced
|
||||
- [ ] pve10 ZFS + PBS investigated
|
||||
|
||||
### Medium
|
||||
|
||||
- [ ] unattended-upgrades on PVE + key LXCs
|
||||
- [ ] `make security` (or new plays) for proxmox, services, qa
|
||||
- [ ] UFW on critical LXCs
|
||||
- [ ] Mac firewall + FileVault
|
||||
|
||||
### Low
|
||||
|
||||
- [ ] rpcbind, X11, audit Makefile, naming cleanup
|
||||
|
||||
---
|
||||
|
||||
## Quick reference: your login after plan
|
||||
|
||||
```bash
|
||||
# Proxmox
|
||||
ssh root@10.0.10.201 # key only
|
||||
|
||||
# Dev / QA
|
||||
ssh ladmin@10.0.10.223 # key only → sudo -i when you need root
|
||||
|
||||
# Services (inventory root)
|
||||
ssh root@10.0.10.50 # key only
|
||||
|
||||
# Proxmox UI (if 8006 restricted)
|
||||
ssh -L 8006:127.0.0.1:8006 root@10.0.10.201
|
||||
# → https://127.0.0.1:8006
|
||||
```
|
||||
87
docs/guides/site-lxc-git.md
Normal file
87
docs/guides/site-lxc-git.md
Normal file
@ -0,0 +1,87 @@
|
||||
# Site LXCs — git deploy (levkin / caseware / auto / portfolio)
|
||||
|
||||
## Remotes (correct)
|
||||
|
||||
Use **`git.levkin.ca`**, not `10.0.30.169`:
|
||||
|
||||
```
|
||||
git@git.levkin.ca:ilia/levkin.ca.git
|
||||
git@git.levkin.ca:ilia/caseware.git
|
||||
git@git.levkin.ca:ilia/auto.git
|
||||
git@git.levkin.ca:ilia/sdetProfile.git
|
||||
```
|
||||
|
||||
Gitea VM is **`10.0.10.169`** on pve10. Public `git.levkin.ca:22` hits your home IP and is **closed**; git SSH uses LAN IP via `~/.ssh/config`.
|
||||
|
||||
## SSH config (on site LXC, as root)
|
||||
|
||||
```ssh
|
||||
# /root/.ssh/config
|
||||
Host git.levkin.ca
|
||||
HostName 10.0.10.169
|
||||
User git
|
||||
IdentityFile ~/.ssh/id_ed25519
|
||||
StrictHostKeyChecking accept-new
|
||||
```
|
||||
|
||||
## Deploy keys
|
||||
|
||||
Each LXC should use its **own** deploy key in Gitea (**Repo → Settings → Deploy Keys**).
|
||||
Gitea allows a public key only **once per server** — if you see *“already been added to the server”*, generate a repo-specific key:
|
||||
|
||||
```bash
|
||||
# On portfolio LXC 219 (via pve10)
|
||||
pct exec 219 -- cat /root/.ssh/id_ed25519_gitea.pub
|
||||
```
|
||||
|
||||
Portfolio uses `~/.ssh/id_ed25519_gitea` in `/root/.ssh/config` for `Host git.levkin.ca` (`IdentitiesOnly yes`).
|
||||
|
||||
| LXC | Repo | Key file / comment |
|
||||
|-----|------|---------------------|
|
||||
| 215 | caseware | `~/.ssh/id_ed25519` → `root@caseware` |
|
||||
| 216 | auto | `~/.ssh/id_ed25519` → `root@auto` |
|
||||
| 219 | sdetProfile | `~/.ssh/id_ed25519_gitea` → `deploy-portfolio-sdetProfile` |
|
||||
| 220 | levkin.ca | `~/.ssh/id_ed25519_gitea` → `deploy-levkin-levkin.ca` (add in Gitea UI) or HTTPS clone with read token |
|
||||
|
||||
## levkin.ca routes (LXC 220)
|
||||
|
||||
| Public URL | Served from |
|
||||
|------------|-------------|
|
||||
| `https://levkin.ca/` | `www/index.html` (spec) |
|
||||
| `https://levkin.ca/folders/` | `www/folders/` (stack-folder) |
|
||||
|
||||
Build before push:
|
||||
|
||||
```bash
|
||||
cd ~/Documents/code/levkin.ca
|
||||
npm run build:www
|
||||
git add www/ && git commit -m "Rebuild www" && git push
|
||||
```
|
||||
|
||||
On LXC:
|
||||
|
||||
```bash
|
||||
pct exec 220 -- bash -c 'cd /var/www/levkin && git pull origin main'
|
||||
```
|
||||
|
||||
## Push / pull
|
||||
|
||||
```bash
|
||||
# On LXC (via pve10)
|
||||
pct exec 215 -- bash -c 'cd /var/www/caseware && git pull origin main && git push origin main'
|
||||
pct exec 216 -- bash -c 'cd /var/www/auto && git pull origin master && git push origin master'
|
||||
pct exec 219 -- bash -c 'cd /var/www/portfolio && git pull origin master && git push origin master'
|
||||
pct exec 220 -- bash -c 'cd /var/www/levkin && git pull origin main'
|
||||
```
|
||||
|
||||
After editing `index.html`, commit on the LXC, push, then hard-refresh the public site.
|
||||
|
||||
## Gitea VM SSH (git@10.0.10.169)
|
||||
|
||||
If deploy keys fail after adding them in the UI:
|
||||
|
||||
1. Keys live in `/var/lib/gitea/.ssh/authorized_keys` (regenerated by Gitea).
|
||||
2. OpenSSH logs in as user **`git`** — copy/sync that file to **`/home/git/.ssh/authorized_keys`** (`chown git:git`, mode `600`).
|
||||
3. `command=` must run **`gitea serv`** as user **`gitea`** (e.g. `sudo -n -E -u gitea /usr/bin/gitea …`) with `SSH_ORIGINAL_COMMAND` preserved in sudoers.
|
||||
|
||||
Portfolio uses repo path **`ilia/sdetprofile`** (lowercase on disk).
|
||||
51
docs/guides/sso-selfhosted-matrix.md
Normal file
51
docs/guides/sso-selfhosted-matrix.md
Normal file
@ -0,0 +1,51 @@
|
||||
# Self-hosted SSO readiness (Authentik)
|
||||
|
||||
Which apps can use Authentik OIDC/SAML without a paid app license.
|
||||
|
||||
## Cal.com — blocked (commercial)
|
||||
|
||||
**Status:** Deferred until a valid **self-hosted enterprise license** is in place.
|
||||
|
||||
The Cal UI at `/settings/security/sso` shows *"This is a commercial feature"* when `CALCOM_LICENSE_KEY` is missing or invalid. On LXC 210, the key in `/opt/cal/.env` is currently **empty** (length 0), so SSO cannot be configured in-app.
|
||||
|
||||
**If you want native Cal OIDC later:**
|
||||
|
||||
1. Purchase / obtain a self-hosted license from [Cal.com](https://cal.com) (sales or existing license).
|
||||
2. Set in `/opt/cal/.env`:
|
||||
```bash
|
||||
CALCOM_LICENSE_KEY=<your-key>
|
||||
NEXT_PUBLIC_LICENSE_CONSENT=agree
|
||||
```
|
||||
3. `cd /opt/cal && docker compose up -d` (compose already passes these vars).
|
||||
4. Complete [cal-authentik-oidc.md](cal-authentik-oidc.md) — Authentik app `cal-com` is already provisioned.
|
||||
|
||||
**Workaround without paying Cal:** use **local Cal password** for admin; public booking at `cal.levkin.ca/ilia/consult` stays open. Optional later: **Caddy + Authentik forward-auth** only on `/settings/*` and `/auth/*` (does not integrate Cal’s “Login with SSO” button; more ops complexity). Not recommended until license path is ruled out.
|
||||
|
||||
**Infra already done (harmless to keep):** `calsaml` DB, `SAML_*` env vars, Authentik provider `cal-com-oidc`.
|
||||
|
||||
---
|
||||
|
||||
## Phase 4 order (no Cal license required)
|
||||
|
||||
Wire these first — typical OSS OIDC, no extra license:
|
||||
|
||||
| App | OIDC/SAML | Notes |
|
||||
|-----|-----------|--------|
|
||||
| **Vikunja** | OIDC native | **Live** — [vikunja-authentik-oidc.md](vikunja-authentik-oidc.md); group `homelab-users` |
|
||||
| **Listmonk** | OIDC native | Admin-only |
|
||||
| **Mattermost** | OIDC or SAML | Moderate |
|
||||
| **Mailcow** | OIDC | Last — mail-critical |
|
||||
| **Umami** | — | Already LAN-only; no SSO needed |
|
||||
| **Vaultwarden** | — | **Do not OIDC** (break-glass) |
|
||||
| **n8n** | OIDC (if enabled) | Check edition |
|
||||
| **Immich** | OIDC | Phase 5; usually free in self-host |
|
||||
| **Outline** | OIDC/SAML | Phase 8 |
|
||||
|
||||
**Unlikely to need a commercial license** for homelab SSO on the list above; always check each app’s docs before assuming.
|
||||
|
||||
---
|
||||
|
||||
## Related
|
||||
|
||||
- [cal-authentik-oidc.md](cal-authentik-oidc.md)
|
||||
- [levkin-selfhost-plan-2.md](levkin-selfhost-plan-2.md)
|
||||
97
docs/guides/unifi-static-dhcp.md
Normal file
97
docs/guides/unifi-static-dhcp.md
Normal file
@ -0,0 +1,97 @@
|
||||
# UniFi static DHCP (10.0.10.x homelab)
|
||||
|
||||
**Controller:** https://192.168.2.1/
|
||||
**Goal:** Pin Proxmox VM MAC addresses to stable `10.0.10.x` addresses so Caddy and Ansible inventory do not drift.
|
||||
|
||||
LXCs on pve10 (**210, 215–220**) are already static via `pct set` — **no UniFi lease needed** for those rows.
|
||||
This guide is for **VMs** (and pve201 guests) that still use DHCP.
|
||||
|
||||
---
|
||||
|
||||
## Before you start
|
||||
|
||||
1. Confirm guests get addresses on **`10.0.10.0/24`** (not only `192.168.2.x`). In UniFi, open the network that faces Proxmox `vmbr0`.
|
||||
2. Gateway for homelab guests should be **`10.0.10.1`** (or your router’s IP on that VLAN).
|
||||
3. Use the MAC table in [vm-static-ip-router-reservations.md](vm-static-ip-router-reservations.md).
|
||||
|
||||
---
|
||||
|
||||
## Method A — From a connected client (easiest)
|
||||
|
||||
1. Open **https://192.168.2.1/** and sign in.
|
||||
2. Go to **Clients** (or **UniFi Devices** → **Clients**).
|
||||
3. Find the device (hostname like `gitea`, `vaultwarden`, or MAC from Proxmox `qm config <vmid>`).
|
||||
4. Click the client → **Settings** (gear) or **⋮**.
|
||||
5. Enable **Fixed IP** / **Use fixed IP address**.
|
||||
6. Set IP to the target from the table (e.g. `10.0.10.169` for gitea).
|
||||
7. **Apply** / **Save**.
|
||||
8. On the VM: renew DHCP or reboot:
|
||||
```bash
|
||||
sudo dhclient -r && sudo dhclient
|
||||
# or: reboot
|
||||
```
|
||||
9. Verify: `ip -4 addr show` shows the reserved IP.
|
||||
|
||||
---
|
||||
|
||||
## Method B — DHCP static mapping (manual MAC)
|
||||
|
||||
1. **Settings** → **Networks**.
|
||||
2. Open the LAN/VLAN that serves **10.0.10.x** (name varies: `Default`, `Homelab`, `10.0.10`).
|
||||
3. **DHCP** section → **DHCP Static IP** / **Static leases** → **Create new**.
|
||||
4. Enter:
|
||||
- **MAC address** (from Proxmox, e.g. `BC:24:11:E9:BD:E5`)
|
||||
- **IP address** (e.g. `10.0.10.169`)
|
||||
- **Name** (optional, e.g. `giteaVM`)
|
||||
5. Save. Repeat for each row in the reservations table.
|
||||
6. Renew DHCP on each VM or reboot.
|
||||
|
||||
---
|
||||
|
||||
## Already static (skip UniFi DHCP)
|
||||
|
||||
| VMID | Name | IP | How |
|
||||
|------|------|-----|-----|
|
||||
| 210 | cal | 10.0.10.228 | `pct set` |
|
||||
| 215 | caseware | 10.0.10.105 | `pct set` |
|
||||
| 216 | auto | 10.0.10.59 | `pct set` |
|
||||
| 217 | identity | 10.0.10.21 | `pct set` |
|
||||
| 218 | monitoring | 10.0.10.22 | `pct set` |
|
||||
| 219 | portfolio | 10.0.10.106 | `pct set` (`iliadobkin.com`) |
|
||||
| 220 | levkin | 10.0.10.60 | `pct set` (`levkin.ca`) |
|
||||
| 106 | caddy | 10.0.10.50 | static in `/etc/network/interfaces` |
|
||||
|
||||
---
|
||||
|
||||
## Priority order — UniFi reservations (VMs / pve201)
|
||||
|
||||
| Order | Guest | IP | MAC | Notes |
|
||||
| ----- | ----- | --- | --- | ----- |
|
||||
| 1 | giteaVM | 10.0.10.169 | BC:24:11:E9:BD:E5 | |
|
||||
| 2 | vaultwardenVM | 10.0.10.142 | BC:24:11:58:DB:DC | |
|
||||
| 3 | n8n (WRA) | 10.0.10.154 | BC:24:11:61:DE:7A | |
|
||||
| 4 | hermes | 10.0.10.36 | BC:24:11:51:1E:99 | |
|
||||
| 5 | actual | 10.0.10.158 | BC:24:11:10:7B:64 | |
|
||||
| 6 | jellyfin | 10.0.10.232 | BC:24:11:29:B8:84 | stopped until NAS OK |
|
||||
| 7 | listmonk (pve201 VM 113) | 10.0.10.148 | BC:24:11:11:53:9A | |
|
||||
| 8 | Mailcow (pve201) | 10.0.10.132 | BC:24:11:34:75:2D | |
|
||||
| 9 | TrueNAS | 10.0.10.107 | BC:24:11:14:DE:B5 | optional pin |
|
||||
| 10 | PVE.BU.SVR | 10.0.10.200 | BC:24:11:DA:95:3B | lab VM |
|
||||
|
||||
Full MAC table: [vm-static-ip-router-reservations.md](vm-static-ip-router-reservations.md).
|
||||
|
||||
---
|
||||
|
||||
## If you only see 192.168.2.x in UniFi
|
||||
|
||||
Your Mac may be on `192.168.2.0/24` while Proxmox guests use a separate **`10.0.10.0/24`** network. In that case:
|
||||
|
||||
- Add or edit a UniFi network/VLAN for `10.0.10.0/24`, or
|
||||
- Ensure the router bridges/routes between `192.168.2.x` and `10.0.10.x`, and
|
||||
- Put DHCP reservations on the network that actually serves the Proxmox bridge.
|
||||
|
||||
---
|
||||
|
||||
## After reservations
|
||||
|
||||
Mark `✅ router` in [host-list.md](host-list.md) for each guest.
|
||||
51
docs/guides/vikunja-authentik-oidc.md
Normal file
51
docs/guides/vikunja-authentik-oidc.md
Normal file
@ -0,0 +1,51 @@
|
||||
# Vikunja ↔ Authentik OIDC
|
||||
|
||||
**Status:** Live at `https://todo.levkin.ca` (host `vikunja`, `10.0.10.159`).
|
||||
|
||||
## Authentik
|
||||
|
||||
| Item | Value |
|
||||
|------|--------|
|
||||
| Application slug | `vikunja` |
|
||||
| Redirect URI (strict) | `https://todo.levkin.ca/auth/openid/authentik` |
|
||||
| Subject mode | **Based on the User's username** (`user_username`) |
|
||||
| Access group | **`homelab-users`** (bind to app; policy engine **ANY**) |
|
||||
|
||||
| Authentik user | Purpose | Email |
|
||||
|----------------|---------|--------|
|
||||
| **`admin`** | Authentik admin UI only | `admin@levkin.ca` |
|
||||
| **`ilia`** | Homelab apps (Vikunja, etc.) | `idobkin@gmail.com` |
|
||||
|
||||
**Do not use the same email on both users** — Authentik will pick the wrong account.
|
||||
|
||||
`homelab-users` group = **`ilia`** only. Vikunja app binding: group `homelab-users` (policy engine **ANY**).
|
||||
|
||||
Secrets: `vault_vikunja_oidc_client_id`, `vault_vikunja_oidc_client_secret` in Ansible vault.
|
||||
|
||||
## Vikunja
|
||||
|
||||
Config: `/opt/vikunja/config.yml` (mounted in `docker-compose.yml`).
|
||||
|
||||
- `auth.openid.providers.authentik` → `authurl: https://auth.levkin.ca/application/o/vikunja/`
|
||||
- `usernamefallback: true` + `emailfallback: true` → SSO links to existing local user **`ilia`** when Authentik username is `ilia`.
|
||||
|
||||
Local auth stays enabled for break-glass.
|
||||
|
||||
## Login
|
||||
|
||||
1. Sign out: `https://auth.levkin.ca/if/user/logout/`
|
||||
2. `https://todo.levkin.ca` → **Login with Authentik**
|
||||
3. Sign in as **`ilia`** (username) or **`idobkin@gmail.com`** — **not** `admin`
|
||||
|
||||
**My applications:** `admin` only sees apps allowed for superuser (e.g. Cal). **`ilia`** sees Vikunja after login.
|
||||
|
||||
## Adding users
|
||||
|
||||
1. **Directory → Users** — create user (username should match Vikunja local username if linking).
|
||||
2. **Directory → Groups → homelab-users** — add user.
|
||||
3. New Vikunja users: first OIDC login creates account; existing local users need matching username + fallbacks.
|
||||
|
||||
## Related
|
||||
|
||||
- [sso-selfhosted-matrix.md](sso-selfhosted-matrix.md)
|
||||
- [Authentik Vikunja integration](https://integrations.goauthentik.io/chat-communication-collaboration/vikunja/)
|
||||
38
docs/guides/vm-static-ip-router-reservations.md
Normal file
38
docs/guides/vm-static-ip-router-reservations.md
Normal file
@ -0,0 +1,38 @@
|
||||
# VM static IPs — router DHCP reservations (pve10)
|
||||
|
||||
Proxmox **LXCs** use `pct set … ip=10.0.10.X/24` (done for 210, 215–219).
|
||||
|
||||
**VMs** without cloud-init are pinned by **router DHCP reservation by MAC** (Method B in plan-2).
|
||||
Ansible **cannot log into your router** — configure static leases in the UI.
|
||||
|
||||
**Your UniFi:** https://192.168.2.1/ — step-by-step: [unifi-static-dhcp.md](unifi-static-dhcp.md).
|
||||
|
||||
Homelab guests use **`10.0.10.0/24`** (gateway `10.0.10.1`). If UniFi also serves `192.168.2.x`, ensure the `10.0.10.x` segment is the network those VMs/LXCs use
|
||||
(or that routing/DHCP relay matches your Proxmox bridge).
|
||||
|
||||
## How to add a reservation (any router)
|
||||
|
||||
1. Open router admin (UniFi: **https://192.168.2.1/**).
|
||||
2. Find **DHCP** / **LAN** / **Static leases** / **Reserved addresses**.
|
||||
3. For each row: **MAC address** → **IP address** → Save.
|
||||
4. Reboot guest or renew DHCP (`dhclient -r && dhclient` on Debian) if IP does not update immediately.
|
||||
5. Mark done in [host-list.md](host-list.md).
|
||||
|
||||
| VMID | Name | MAC | Reserve IP | Inventory |
|
||||
| ---- | ---- | --- | ---------- | --------- |
|
||||
| 102 | gitea-alpine | `BC:24:11:E9:BD:E5` | `10.0.10.169` | giteaVM |
|
||||
| 103 | WRA / n8n | `BC:24:11:61:DE:7A` | `10.0.10.154` | n8n |
|
||||
| 104 | vaultwarden | `BC:24:11:58:DB:DC` | `10.0.10.142` | vaultwardenVM |
|
||||
| 105 | TrueNAS | `BC:24:11:14:DE:B5` | `10.0.10.107` | — |
|
||||
| 106 | caddy | `BC:24:11:E0:49:B4` | `10.0.10.50` | ✅ static in-guest |
|
||||
| 108 | actual | `BC:24:11:10:7B:64` | `10.0.10.158` | actual |
|
||||
| 117 | hermes | `BC:24:11:51:1E:99` | `10.0.10.36` | hermes (guest agent on) |
|
||||
| 200 | PVE.BU.SVR | `BC:24:11:DA:95:3B` | `10.0.10.200` | — |
|
||||
| 201 | NextcloudAIO | `BC:24:11:14:D4:DE` | `10.0.10.24` | **decommission** — skip new work |
|
||||
| 101 | Jellyfin | `BC:24:11:29:B8:84` | `10.0.10.232` | stopped |
|
||||
| 113 | listmonk (pve201) | `BC:24:11:11:53:9A` | `10.0.10.148` | listmonk |
|
||||
| — | Mailcow (pve201 VM 106) | `BC:24:11:34:75:2D` | `10.0.10.132` | mailcow (inventory) |
|
||||
|
||||
After reserving in the router, mark **DHCP/Static** as `✅ router` in [host-list.md](host-list.md).
|
||||
|
||||
In-guest static (optional, stronger): SSH as root and set `/etc/network/interfaces` like caddy VM 106.
|
||||
15
inventories/production/group_vars/all/mailcow.yml
Normal file
15
inventories/production/group_vars/all/mailcow.yml
Normal file
@ -0,0 +1,15 @@
|
||||
---
|
||||
# Mailcow mailbox definitions (passwords live in vault only).
|
||||
# Create: make mailcow-mailbox MAILBOX=<key>
|
||||
# Add a new key under mailcow_mailboxes + vault_mailcow_mailbox_passwords.<key>
|
||||
mailcow_url: "https://mail.levkine.ca"
|
||||
mailcow_domain: "levkine.ca"
|
||||
|
||||
mailcow_mailboxes:
|
||||
alerts:
|
||||
local_part: alerts
|
||||
name: Monitoring Alerts
|
||||
quota: 1024
|
||||
vault_password_key: alerts
|
||||
|
||||
mailcow_api_key: "{{ vault_mailcow_api_key | default('') }}"
|
||||
@ -26,6 +26,10 @@ maintenance_pre_reboot_delay: 5 # Delay before reboot in seconds
|
||||
|
||||
# Default Tailscale settings - these tell the playbook to use your vault key
|
||||
tailscale_auth_key: "{{ vault_tailscale_auth_key | default('') }}"
|
||||
|
||||
# Mailcow — API key + per-mailbox passwords in vault; definitions in group_vars/all/mailcow.yml
|
||||
mailcow_api_key: "{{ vault_mailcow_api_key | default('') }}"
|
||||
mailcow_mailbox_passwords: "{{ vault_mailcow_mailbox_passwords | default({}) }}"
|
||||
tailscale_accept_routes: true
|
||||
tailscale_accept_dns: true
|
||||
tailscale_ssh: false
|
||||
@ -100,8 +104,7 @@ app_frontend_start_cmd: "npm start"
|
||||
# Proxmox IDs are global. Never reuse IDs across unrelated guests.
|
||||
# Suggested reservation table (edit to your preference):
|
||||
# - 9000-9099: pote
|
||||
# - 9100-9199: punimTagFE
|
||||
# - 9200-9299: punimTagBE
|
||||
# - 9100-9199: punimTag (monorepo)
|
||||
# - 9300-9399: projectA (example)
|
||||
# -----------------------------------------------------------------------------
|
||||
app_projects:
|
||||
@ -205,59 +208,13 @@ app_projects:
|
||||
gateway: "10.0.10.1"
|
||||
branch: "main"
|
||||
|
||||
punimTagFE:
|
||||
description: "punimTag frontend-only project (edit repo_url, IPs, secrets)."
|
||||
repo_url: "git@github.com:example/punimTagFE.git"
|
||||
repo_dest: "/srv/app"
|
||||
components:
|
||||
backend: false
|
||||
frontend: true
|
||||
guest_defaults:
|
||||
guest_type: "{{ proxmox_guest_type }}"
|
||||
cores: 2
|
||||
memory_mb: 2048
|
||||
swap_mb: 512
|
||||
rootfs_size_gb: 16
|
||||
deploy:
|
||||
frontend_install_cmd: "{{ app_frontend_install_cmd }}"
|
||||
frontend_build_cmd: "{{ app_frontend_build_cmd }}"
|
||||
frontend_start_cmd: "{{ app_frontend_start_cmd }}"
|
||||
envs:
|
||||
dev:
|
||||
name: "punimTagFE-dev"
|
||||
vmid: 9101
|
||||
ip: "10.0.10.121/24"
|
||||
gateway: "10.0.10.1"
|
||||
branch: "dev"
|
||||
env_vars:
|
||||
APP_ENV: "dev"
|
||||
SECRET_PLACEHOLDER: "change-me"
|
||||
qa:
|
||||
name: "punimTagFE-qa"
|
||||
vmid: 9102
|
||||
ip: "10.0.10.122/24"
|
||||
gateway: "10.0.10.1"
|
||||
branch: "qa"
|
||||
env_vars:
|
||||
APP_ENV: "qa"
|
||||
SECRET_PLACEHOLDER: "change-me"
|
||||
prod:
|
||||
name: "punimTagFE-prod"
|
||||
vmid: 9103
|
||||
ip: "10.0.10.123/24"
|
||||
gateway: "10.0.10.1"
|
||||
branch: "main"
|
||||
env_vars:
|
||||
APP_ENV: "prod"
|
||||
SECRET_PLACEHOLDER: "change-me"
|
||||
|
||||
punimTagBE:
|
||||
description: "punimTag backend-only project (edit repo_url, IPs, secrets)."
|
||||
repo_url: "git@github.com:example/punimTagBE.git"
|
||||
punimTag:
|
||||
description: "punimTag monorepo (frontend + backend, edit repo_url, IPs, secrets)."
|
||||
repo_url: "git@github.com:example/punimTag.git"
|
||||
repo_dest: "/srv/app"
|
||||
components:
|
||||
backend: true
|
||||
frontend: false
|
||||
frontend: true
|
||||
guest_defaults:
|
||||
guest_type: "{{ proxmox_guest_type }}"
|
||||
cores: 2
|
||||
@ -268,34 +225,58 @@ app_projects:
|
||||
backend_install_cmd: "{{ app_backend_install_cmd }}"
|
||||
backend_migrate_cmd: "{{ app_backend_migrate_cmd }}"
|
||||
backend_start_cmd: "{{ app_backend_start_cmd }}"
|
||||
frontend_install_cmd: "{{ app_frontend_install_cmd }}"
|
||||
frontend_build_cmd: "{{ app_frontend_build_cmd }}"
|
||||
frontend_start_cmd: "{{ app_frontend_start_cmd }}"
|
||||
envs:
|
||||
dev:
|
||||
name: "punimTagBE-dev"
|
||||
vmid: 9201
|
||||
ip: "10.0.10.131/24"
|
||||
name: "punimTag-dev"
|
||||
vmid: 9101
|
||||
ip: "10.0.10.121/24"
|
||||
gateway: "10.0.10.1"
|
||||
branch: "dev"
|
||||
env_vars:
|
||||
APP_ENV: "dev"
|
||||
SECRET_PLACEHOLDER: "change-me"
|
||||
NODE_ENV: "production"
|
||||
DATABASE_HOST: "10.0.10.181"
|
||||
DATABASE_PORT: "5432"
|
||||
DATABASE_URL: "{{ vault_punimtag_database_url_dev | default('postgresql://punimtag_dev_user:CHANGE_ME@10.0.10.181:5432/punimtag_dev') }}"
|
||||
BACKEND_PORT: "{{ app_backend_port }}"
|
||||
FRONTEND_PORT: "{{ app_frontend_port }}"
|
||||
BACKEND_BASE_URL: "http://10.0.10.121:{{ app_backend_port }}"
|
||||
FRONTEND_BASE_URL: "http://10.0.10.121:{{ app_frontend_port }}"
|
||||
qa:
|
||||
name: "punimTagBE-qa"
|
||||
vmid: 9202
|
||||
ip: "10.0.10.132/24"
|
||||
name: "punimTag-qa"
|
||||
vmid: 9102
|
||||
ip: "10.0.10.122/24"
|
||||
gateway: "10.0.10.1"
|
||||
branch: "qa"
|
||||
env_vars:
|
||||
APP_ENV: "qa"
|
||||
SECRET_PLACEHOLDER: "change-me"
|
||||
NODE_ENV: "production"
|
||||
DATABASE_HOST: "10.0.10.181"
|
||||
DATABASE_PORT: "5432"
|
||||
DATABASE_URL: "{{ vault_punimtag_database_url_qa | default('postgresql://punimtag_qa_user:CHANGE_ME@10.0.10.181:5432/punimtag_qa') }}"
|
||||
BACKEND_PORT: "{{ app_backend_port }}"
|
||||
FRONTEND_PORT: "{{ app_frontend_port }}"
|
||||
BACKEND_BASE_URL: "http://10.0.10.122:{{ app_backend_port }}"
|
||||
FRONTEND_BASE_URL: "http://10.0.10.122:{{ app_frontend_port }}"
|
||||
prod:
|
||||
name: "punimTagBE-prod"
|
||||
vmid: 9203
|
||||
ip: "10.0.10.133/24"
|
||||
name: "punimTag-prod"
|
||||
vmid: 9103
|
||||
ip: "10.0.10.123/24"
|
||||
gateway: "10.0.10.1"
|
||||
branch: "main"
|
||||
env_vars:
|
||||
APP_ENV: "prod"
|
||||
SECRET_PLACEHOLDER: "change-me"
|
||||
NODE_ENV: "production"
|
||||
DATABASE_HOST: "10.0.10.181"
|
||||
DATABASE_PORT: "5432"
|
||||
DATABASE_URL: "{{ vault_punimtag_database_url_prod | default('postgresql://punimtag_prod_user:CHANGE_ME@10.0.10.181:5432/punimtag_prod') }}"
|
||||
BACKEND_PORT: "{{ app_backend_port }}"
|
||||
FRONTEND_PORT: "{{ app_frontend_port }}"
|
||||
BACKEND_BASE_URL: "http://10.0.10.123:{{ app_backend_port }}"
|
||||
FRONTEND_BASE_URL: "http://10.0.10.123:{{ app_frontend_port }}"
|
||||
|
||||
mirrormatch:
|
||||
description: "Mirrormatch Prisma/Node backend (dev/qa/prod)."
|
||||
|
||||
@ -22,6 +22,44 @@ vault_ssh_public_key: "ssh-ed25519 AAAA... you@example"
|
||||
# LXC create bootstrap password (often required by Proxmox)
|
||||
vault_lxc_root_password: "CHANGE_ME"
|
||||
|
||||
# Ansible become (sudo) for VMs that use ladmin/master instead of root SSH
|
||||
vault_vaultwarden_become_password: "{{ vault_lxc_root_password }}"
|
||||
vault_ansiblevm_become_password: "{{ vault_lxc_root_password }}"
|
||||
|
||||
# Mailcow API — System → Configuration → Access → API (read/write)
|
||||
vault_mailcow_api_key: "CHANGE_ME"
|
||||
# Per-mailbox passwords (make mailcow-mailbox MAILBOX=<key>)
|
||||
vault_mailcow_mailbox_passwords:
|
||||
alerts: "CHANGE_ME"
|
||||
# Legacy alias (optional)
|
||||
vault_alerts_mailbox_password: "CHANGE_ME"
|
||||
|
||||
# Uptime Kuma + SMTP (monitoring LXC)
|
||||
vault_uptime_kuma_url: "http://10.0.10.22:3001"
|
||||
vault_uptime_kuma_user: "admin"
|
||||
vault_uptime_kuma_password: "CHANGE_ME"
|
||||
vault_kuma_smtp_host: "mail.levkine.ca"
|
||||
vault_kuma_smtp_port: "587"
|
||||
vault_kuma_smtp_user: "alerts@levkine.ca"
|
||||
vault_kuma_smtp_password: "CHANGE_ME"
|
||||
vault_kuma_smtp_to: "idobkin@gmail.com"
|
||||
|
||||
# Umami (monitoring LXC /opt/monitoring/.env)
|
||||
vault_umami_db_password: "CHANGE_ME"
|
||||
vault_umami_app_secret: "CHANGE_ME"
|
||||
|
||||
# Cal.com ↔ Authentik OIDC (make cal-oidc)
|
||||
vault_cal_oidc_client_secret: "CHANGE_ME"
|
||||
|
||||
# Vikunja ↔ Authentik OIDC
|
||||
vault_vikunja_oidc_client_id: "CHANGE_ME"
|
||||
vault_vikunja_oidc_client_secret: "CHANGE_ME"
|
||||
|
||||
# Hermes Mattermost (not Telegram)
|
||||
vault_mattermost_url: "https://slack.levkin.ca"
|
||||
vault_mattermost_token: "CHANGE_ME"
|
||||
vault_mattermost_allowed_users: "CHANGE_ME"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# POTE (python/venv + cron) secrets
|
||||
# -----------------------------------------------------------------------------
|
||||
@ -70,4 +108,18 @@ vault_mirrormatch_smtp_user: "smtp-user"
|
||||
vault_mirrormatch_smtp_password: "CHANGE_ME"
|
||||
vault_mirrormatch_smtp_from: "MirrorMatch <noreply@mirrormatch.com>"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# punimTag (monorepo) secrets
|
||||
# -----------------------------------------------------------------------------
|
||||
# Optional deploy key for private repo access
|
||||
vault_punimtag_git_ssh_key: |
|
||||
-----BEGIN OPENSSH PRIVATE KEY-----
|
||||
CHANGE_ME
|
||||
-----END OPENSSH PRIVATE KEY-----
|
||||
|
||||
# Per-environment database URLs (use external Postgres at 10.0.10.181:5432)
|
||||
vault_punimtag_database_url_dev: "postgresql://punimtag_dev_user:CHANGE_ME@10.0.10.181:5432/punimtag_dev"
|
||||
vault_punimtag_database_url_qa: "postgresql://punimtag_qa_user:CHANGE_ME@10.0.10.181:5432/punimtag_qa"
|
||||
vault_punimtag_database_url_prod: "postgresql://punimtag_prod_user:CHANGE_ME@10.0.10.181:5432/punimtag_prod"
|
||||
|
||||
|
||||
|
||||
@ -1,100 +1,142 @@
|
||||
$ANSIBLE_VAULT;1.1;AES256
|
||||
38316537376634623462313731323238666165383731656632373665653534623163386333303865
|
||||
3865383030316132663831303932376437346335323233630a643331663539383163306666393764
|
||||
38313265656561343839616565343663353037663237663032366632373831363336306632626266
|
||||
3361643865333533340a356233663034343932323831323236356161396237346532323838373135
|
||||
33393239313730363336613338373039663735323431323562613363343863326234633833663631
|
||||
66343462623231663932633537373361313764393630356666393662653135356139663935613038
|
||||
65383261363065633235343031346535373564373931373063386265343335623265653739613830
|
||||
32656233393330633362623932316431383761306332393466313936396533333839313831663331
|
||||
34353864356336303331663233653666363966376162303731626134313235306238323363303439
|
||||
32333039653235326632303637303065386161616138356463623561366637376366326262303166
|
||||
38323763393934666539373063323265333961666164613437316164633565393035626538353365
|
||||
33386562336665383863636639643232623161643933313664396534383362303838663362653736
|
||||
64393334616165336638306235363734653431646431616139373336656333623963386538646230
|
||||
39663230363063386231343730663162313463666135323265613261626637626332353534396535
|
||||
31623664363766646332396336396133613662643232366433323330373962633839613635333763
|
||||
63306230623438346639323863353137363330316630316130326134323731326635643736373736
|
||||
62336362656265633233623165376436373231656666303832373966353732313031623865316663
|
||||
63356163636238346230623732326232646434623532633439646536656362393162613535613565
|
||||
66616539316362376561386263373464623030636661663435383839643565393632616232663035
|
||||
34653735383964653930633664346330386566343830336238306562343164366131643138643339
|
||||
35313366356637643262636238366263353535306434633732623335643266396335666636666663
|
||||
37333232393765306433326164663538663839623034373535653737633366303665633831303334
|
||||
32303061363863386139613464326466336136396534663538643163343439343763383534306636
|
||||
62353733613330376163386331626463656462336237656339356132643135363537343638303261
|
||||
33366332653439313137613665386136666536356537346665333935366336623734393738346434
|
||||
63326265346362636564366265373134336662626332653464646139656635313961656230336537
|
||||
63666638326337643033363964643339666130386139363138656165666333356465643337396165
|
||||
30336330633632353231613938646165383966613863366330646162646266346139343434393865
|
||||
66346365663230626531643963383462636465363965393762336233366538393133313138616335
|
||||
32353834313762363265643031343237633732393166343139363163326439666162396332353038
|
||||
31306530626666343361313736313636613335376163383237303063393333386663333333336137
|
||||
37346166316231623638386635613230663063653037643930333961316434643361633035633734
|
||||
65643937636361653433383262643265373165613437336236633631323635613034663834646665
|
||||
30373730373438613132633932333565376665333565383932356334653738646166393934626362
|
||||
30666666303832613633316230623038343165396338343535663931383639623430643238656261
|
||||
39623037333063306266323335303736346236636137633863353866343136346335353865303961
|
||||
31346331333066376330306361396262333762393838303165383134303435353630366130303536
|
||||
34386532356239326166386665623435646432636561363564656161646563306234333138333839
|
||||
38316337656631313763393135396464643338386636336234346663653538353863643636323032
|
||||
35326133623064363838386662653138613438386564316635373838366262656364666633636539
|
||||
61306563666138656161336466323537626161313366616662623362643036636132663634313137
|
||||
39653437306662646162613763343736636530356465346132646238633166373838353836326461
|
||||
36326666323636353239303262623436643932353164323630326635653635653233363265316264
|
||||
30653763643431626539356161376534396437636463303363663134373961616561363561333333
|
||||
34306537326666383664336464656464623731656566653132613565336536323438666333366466
|
||||
64613738653730333633383062653837366266316536653139643362373039383831363666333934
|
||||
34383833336266356436666636323239336432386133303466636138643934356266326533643161
|
||||
36393664313963393930383533623565383332613933396639613037323266663439313138326261
|
||||
30353861303661303836343165353362663632306430626337356562343637653164396237333566
|
||||
37656230363530323836373363646334356262646633313932383161303264613238373936353036
|
||||
61376264633930356465626266623930333039383032316163633037323035346130343934616261
|
||||
31666166393462366561303833353135326566356637376466613934376233303162323033623031
|
||||
63656131333439353537623662363530383866326432306361316465383137633536666364623662
|
||||
37353561633839623530333663643130326131333330626661396636343234666139336539653162
|
||||
62383636663137626637303535333862366434626161353239393232313537343865646564626331
|
||||
39366665363030643764663963316163343033326434373265343664393439316333346434363563
|
||||
61346164396561343865626362616433306230333130653166656230353364316536626432373333
|
||||
35383133363530666263316431396462383133363965336637386632363263656261353963313161
|
||||
36383632326264373436383638383064346334336238656239393833653531656461356136303434
|
||||
37663434663732306631656334306361663562303863386135623066633963373034373139666332
|
||||
35393433646333363839666434663535363661616330386234366132303161383063663836626561
|
||||
35393064343735303032313266643338623834383838633834636536363539656466663864613366
|
||||
66636363623330326436363936313938333638323939323035616232366563316364343834376630
|
||||
66656434336661643861613737616138396330383832386230383331646462323363373363393733
|
||||
63363237636137373566363438663966396432613964336164326138623737393636396234646232
|
||||
64343361363365356135666235623833396131626663303839653535663732313831633163643638
|
||||
35396262373837343238343838663635353838373338663732626330613237623332336436643136
|
||||
38653833383430393837383566643765653834306636356466326364303334653034626262356630
|
||||
34333338333336373433356235386337346666343830303164363235303265313134323339653339
|
||||
63316238346132653663653165313635336638646362356337643766366564383531633565303431
|
||||
66616433663630343439336661346266336139613537653438653432326666326137306364376137
|
||||
66333939643262633532363966623439373434393862353237613135646663623236646331643537
|
||||
31353566653464313433636635393330646166613232633734346639326534373163383064353732
|
||||
32373861303064346266643338316465653031646633633936373738663837383162643534623131
|
||||
31633662356534343636313834386139656439663733333762323962323939623032396239356437
|
||||
37633739613433613365313337383835623936623530363831383535663337343264356532616434
|
||||
39393634396664636166346631313764343733666534613935393637363233373331303837656463
|
||||
37363266363634353136316532333462396266373733333633356239653334363835326261323661
|
||||
66323032346364356230613831643236316530356132343863393361343462373433383265336333
|
||||
30343730316366366234333263343965633466333439653739663333643939303631353664316435
|
||||
36396139623562656632666165666662626263643436396431326135633932393965656531633761
|
||||
39303634643936366438336534613532303134343164326661626363656562383564623264636132
|
||||
39656636303636393761653035303832386430646162343830343834316534636263373763643765
|
||||
61366335643531666232303231656336643833396238336639333437363564636566636632303364
|
||||
62623738336237393638363436396662656565653839643164356565313563663561666237383036
|
||||
33626464663465643230376164653062663063636630613064643632643235643662653566333333
|
||||
62353763643830363638323731303537633837393235656661333263323536363330356362643333
|
||||
34346666656432626365383639326538643862346265316263326531623631383962383734316330
|
||||
39333430613761663337306331623461643635653431343336663163343766373464366538313335
|
||||
61643538643231333636643836663663313534356662386532633331346664653262353839643066
|
||||
36393366653131316636646336313362656662666163333635633132323438353435373430643839
|
||||
37623936393962333065663536306238653466363634386632366637363265303734356535333735
|
||||
64623330303965393533326563643063303762646664666464643239386435343065326234306632
|
||||
35346338373866303838613933653230373737396134653533376265356432333933356237636338
|
||||
66656536393530316435323863373962636465333331653364626162326562393565313538633264
|
||||
34613633393862333731336563636136666166613037613833333063303162373339663539646631
|
||||
36303962356562306239616634376339356135666663303836353061663039343836356262373932
|
||||
65346466373532633365383835323062313531623130396130376531626333653862393462643631
|
||||
366330333666336262373364663864336633
|
||||
62616334383737633962313839313235653935663832623061333532616566343565626437376230
|
||||
3333393831623434663736656331303462626534626265380a356135653866666438373838663137
|
||||
61373962356364306365323933386262613837333364356564383163383638363430323230393430
|
||||
3032346238343264340a636539663735396335313135363330373536353562666537653764643637
|
||||
36663437366166616437303738646466656331313266653431303462366532616639323136346137
|
||||
66663932346561333535303438623734643864613330396331626161616265393731633365393930
|
||||
37326565363931386532623432343339656534393032663634353961306330303737313765333330
|
||||
65316436383030666564663537323937666634343966653562353434333537366338393838333666
|
||||
30356339353732623932393665663237343630303533363232336263323732376461353338663831
|
||||
62666365333330353361373732306436623637623932636235393434323339663266396631346237
|
||||
63393762643338346563643637666135336139336461333537373137626464613339373937383830
|
||||
39643039363234346134663062373130343230663839613234373838393434373532313732656332
|
||||
63373739616163666361666330393866396331616136383565383763303563323261323330313832
|
||||
64386661383838366336633335323431356133366162373464313533653734613366623537646636
|
||||
65323862376466343530303439396639616135373030613638363630313264623337653233636532
|
||||
38383664613337303565336136333434613638663239393234656534353264623166333837376436
|
||||
36633837613339613161363764383538303363323232346636313862393930343333633131383833
|
||||
65316166363062363330373734323232366136653030336439343932613337623662383236663834
|
||||
66303137353438373661633537633333633733666663393435383436396634393739383039383139
|
||||
32653438303134326663653164633039653435643766616637313433623463366531633962613434
|
||||
33396262333739643865346465363862303337356239663337356330363232383331346435393930
|
||||
64306633363064656566346662363433313434376631663032343635656463313530626635623930
|
||||
65383434663064666535613561313265616436326533313336303836386635343134626361343566
|
||||
36653233656337613838323164376666656338383337633065393237373737623934626265343133
|
||||
38393763316132373234623735353731656261643736353562616361643033303064393962343239
|
||||
32623363653466363565323436643639643934663530646333356532363463363564363862373232
|
||||
38396535393034653565643236363733393032306335363934623462386639363961306265646636
|
||||
32383738653633613732313030626135353366626537646263303634323539343866363033643337
|
||||
66396235323461666131643030353164616265623635636438363738653233363435353761366531
|
||||
66623033656331386138623864363461333933653636653566303733616137303030663430643535
|
||||
64386534346463393638613764353966343837333235623262343164326564616138353731363663
|
||||
39666634323663373831326664326337656164323738326335663734373538303135653861393362
|
||||
65303865366235333538623330373032306661386436323530336631616639366636376135303537
|
||||
31373634636561356239366437623637363735653633316634353862666139303565393533643864
|
||||
64656335356236353232303135616265666266376634313437633236666461343233333732323832
|
||||
61313230393162383163336634303066613664376338633964643431346335616533396466393736
|
||||
31383862666365633665623766643665623361376565386531323234303236393162356331346535
|
||||
65353231316531326438343237633133393361336366353232623866393138376232643133326161
|
||||
63333236626237613536323964356435383933646264656137623632343665393530343463343230
|
||||
61343464383230366339616439343762626435303832393462666463363030383365343938666264
|
||||
33636437333266656130633365666162316366616262386436333861373533343433356633356630
|
||||
31643666626262386535626233653337303861666666653366643361363164353430643561613532
|
||||
32373239373038306533393464373365323638653630306630363931623931336663666339356464
|
||||
64646634356437326435656163306562346530363435336138353330356162333431353466313763
|
||||
64666538666332653762633064653664663531373638393530653034323864383938346631303165
|
||||
62343163636366633161383464626639633638323363306139626632343836646135346332393235
|
||||
62376536316164636631626639656533323337366335616534356538386266343436613530653131
|
||||
36383733373637303864636334633237663331623663663562613261393736323137373130613537
|
||||
61353431396139663861366639616631613064323230366131373666373964393738623936393431
|
||||
62366530623938373836636265393233663661663664613430366237396637366561616433333463
|
||||
64623335303834376432383361396433373537633066333937663633663433333339343262363338
|
||||
38633532366334313164346236646665663363623065666331613961653639313563316563383231
|
||||
64343834373066316233336465366634306537303666383831306237396362366663343430353162
|
||||
35643638653234396134653638653663333765313236313764383835343431303134383537313237
|
||||
35626563376163643336623534633236313363383062373437666536306462383632626332643430
|
||||
39326661633134393465363333356136323361363831363961646230393561663838313935386432
|
||||
66653430613231343731623630313362366138613465373631653632303139636438656439633361
|
||||
31326262313431363536633434346431626336623139333235363338626435666439616433323931
|
||||
30386238663931393066353237616537366434363536306163613931306138653364623663666438
|
||||
63396331313438623662393532333834376237343462313263626139366133353131313164613861
|
||||
38313632386336646362313634633938383963306339383362633236653235353061626337353936
|
||||
32626464343166323438616637346661633861396264633365386638666538333932633530306139
|
||||
64393132613562353835323162346532313262353266366230393839323462626362353533323834
|
||||
35393261373039336537623339613463613335363362353438623837376631646233653362383636
|
||||
31613261323361623934653939613661333836666637383534643137346261353333303861363665
|
||||
62386237646661626536363034313833363965373562316334336232643164633436333261303730
|
||||
39386233646162323365393034663137636462316432333335313366363933633065323264646136
|
||||
64326338303766613230393539626430326263646631646536623436643734376237373031316466
|
||||
62306136373465633130653564633233356331313761366333623363646666313365623563346334
|
||||
31666535346461336630626466616664363330626338333961386239333663326536316266346634
|
||||
33323064333161313232343239346439623633346161346465313532383061376137323839666365
|
||||
64376132306338663565623531623136663436333730366563313261626661373233393438646561
|
||||
61396562666533316635363432306139366430333837333866346436306135333862663734306164
|
||||
38313766666230393861323632316231343362656136366338336564623431373662333366323833
|
||||
33613232326230643530356137646635623030316663343466666666333734636230346263353365
|
||||
65363637356635666638613566613131383864316465666536336333393334653436666261393461
|
||||
64376639303632626165613361346636303064333532613064303032643562396262623632396539
|
||||
61623333643630616536393163623330353164383864623064383732633733353630323534663732
|
||||
36383133633066393263363533616334653933336235333938663132366334326234386264386531
|
||||
62373466393234666563613637313136663764376239666434383038613932376532653531613164
|
||||
34613834313532383165336634613536636437393638653964393831393533303630333933636464
|
||||
31613634346235396331386534303636313066396361393138393635633134353035613863656364
|
||||
63363030376662356636333566373063613433373330383139396530316163303633656438326333
|
||||
65353435613561303539326538613261393339616537373136313030656133323766396464646634
|
||||
34363061366166303465616133663835323232373763336634386231396230383965306164393731
|
||||
39633333353936666361656530363665383039626533333035373663326537373538633864626366
|
||||
64613435663834666137326335333736376466356236353637333262373834336131393733646138
|
||||
31363630626432643061663538626230356637373863643866326530373962393065393464663466
|
||||
37326165653235653166386561363339353331663164326639616135663736316363336531333439
|
||||
35363033343934323063613133326264313665613363386464303662633333646330373637636366
|
||||
37396562303164636261323633373538323835623235396161303964373735356538393431303031
|
||||
64663636326364386266306434343361353439616533303632363165376639313635663637623263
|
||||
37376233323233663364393439663137396265646230613631383039316230356539316130353062
|
||||
33303732323063633738666636623737366631336164396637396533656364316333616536336632
|
||||
34303963623031353137626331623031326136373538633336633835623337303831616365323066
|
||||
35333931393136393965623135626363393335306363366639323034633064663035613566313037
|
||||
38616234666131343064633561326466326365643863653664623932333734643332383963356665
|
||||
64326435643333333435636665383165386364663134613564386639346566353831343239646239
|
||||
35376338386631646236303031303665336166643437316131626438646237663331306438666130
|
||||
32323539393431303039393964363161633461303136616430666539393162633464623436656638
|
||||
61363736363665633965656362643432376266393531333539633737343165313562616133366131
|
||||
34346266323931363137303463666363336163373839306533393831323262313861393333643336
|
||||
37626239366432393461613630366636366631353237396461663566333935343037336438626262
|
||||
33306264613065373638373634303262626338386236386533616563633131366665663738353837
|
||||
33333936306266633965613338393662656161613465396163653438306463303138656536366531
|
||||
66343634306332313561386531373663343535343232646162396361626666633034663133613364
|
||||
65323536346264636164616463626535353261396362633736376531666334346537666562363339
|
||||
39653430386565313731346230653632613830396165313561333865333234656532383339313065
|
||||
30663565393030343134383536336335616537333336396232333839373533353161623264626434
|
||||
61393334316331613739666434653839353933336332396536313937323939646264313133373863
|
||||
66643138656661336264646338376232396138616465373562393063333336343036326632306662
|
||||
66303836326636663264633334356533613066383935316635313236633631376633613535303830
|
||||
33646566346661346539633638363135343939653363623232313864613132393235643961633566
|
||||
34653464303430313466326463346563363964363666623665633265356138336133616261333839
|
||||
37343036363065613766366565343765306663623037383933323230646566333935306564343039
|
||||
30343730633135643338366262376365326561353538346433636336633866393565326334326431
|
||||
64613136353139316331343333643564343534643931313164323934373465386437376637613838
|
||||
65333237386462666262326663316639393961363033656233356330666634366633373336326531
|
||||
33303535323036663837363537366436653930353637353962393464373361323166663031343532
|
||||
37383735613334316434356232343466373539666562326430656538653634323361363236313030
|
||||
34313537633433666333356661383838663861613765383564633835333437363330616163316432
|
||||
30323762656230323035663139323363346235633337346637663632383762393363396632613631
|
||||
35303161383263613164303535633063346432643563363436306665613738346338666336646530
|
||||
36383639353032636133353438396362333763623164376338653564616465303538646432353763
|
||||
66663262636661363465326463326639613431643065623966373630323161356565326362646635
|
||||
32633335303339633232396166393235643462356565323236356539653033363663333262386235
|
||||
36316432386165366530323737353862393263343063343138343334343966313838336639646463
|
||||
36303137323961626561343238323634373830323161303365306465373036323262663835376630
|
||||
34376662356238643939613536383432393464656530326530333262356162623531636364363662
|
||||
65613166383563333237376135656362306362366434346565366235626532623964303661626632
|
||||
61646462633533663830613436633937336364643562653362616464636130343264666233333932
|
||||
37323736316539636336633163643166333231376464656462666364303761313962366635663336
|
||||
31353738396532616137333033313362393830663434323236313031623863643735323838646561
|
||||
34633065623764333734353166323234633538363230633865353764333663613239306664386232
|
||||
65306661333939336634343535393261326335663163663431633630373936336465623634376362
|
||||
31393231313435306564333234633938353336366239646637366162343065366261303538613962
|
||||
36323065663362383538633536393161653332383035336236363364373133326366366130626135
|
||||
34346237366338663962643966613363336165633765663137653930323731393235616137613364
|
||||
37623462396333376263326364363166613831396161393933623532346637326262616434636265
|
||||
64323336626663303131323331376330393232666233626662363264616533646462323233333633
|
||||
3535
|
||||
|
||||
2
inventories/production/group_vars/ansible/main.yml
Normal file
2
inventories/production/group_vars/ansible/main.yml
Normal file
@ -0,0 +1,2 @@
|
||||
---
|
||||
maintenance_cron_enable_system: true
|
||||
@ -0,0 +1,2 @@
|
||||
---
|
||||
maintenance_cron_enable_system: true
|
||||
4
inventories/production/group_vars/proxmox/main.yml
Normal file
4
inventories/production/group_vars/proxmox/main.yml
Normal file
@ -0,0 +1,4 @@
|
||||
---
|
||||
# Tier 1 maintenance cron — hypervisors (journal + apt)
|
||||
maintenance_cron_enable_system: true
|
||||
maintenance_cron_enable_docker: false
|
||||
@ -0,0 +1,4 @@
|
||||
---
|
||||
# Tier 2 — Docker weekly prune (identity, monitoring, vaultwarden)
|
||||
maintenance_cron_enable_system: true
|
||||
maintenance_cron_enable_docker: true
|
||||
@ -0,0 +1,2 @@
|
||||
---
|
||||
maintenance_cron_enable_system: true
|
||||
@ -1,8 +1,9 @@
|
||||
---
|
||||
$ANSIBLE_VAULT;1.1;AES256
|
||||
31306264346663636630656534303766666564333866326139336137383339633338323834653266
|
||||
6132333337363566623265303037336266646238633036390a663432623861363562386561393264
|
||||
63303565633530383634643538323165383461656539613331386135336265653531336266613865
|
||||
3833376664366239650a313134653238323437633265373463326231346663366434323733663666
|
||||
38353061373437306431383132333233663639643134363464396163333962373033363661623666
|
||||
3430633863623962366430613962346264356461373539376263
|
||||
# ansibleVM (control @ 10.0.10.157) — plain vars; secrets in group_vars/all/vault.yml
|
||||
# Previous fully-encrypted host_vars file moved to ansibleVM.yml.vault-bak (broken for Ansible merge).
|
||||
|
||||
ansible_become: true
|
||||
ansible_become_method: sudo
|
||||
ansible_become_password: "{{ vault_ansiblevm_become_password }}"
|
||||
|
||||
maintenance_cron_enable_system: true
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
---
|
||||
$ANSIBLE_VAULT;1.1;AES256
|
||||
66633265383239626163633134656233613638643862323562373330643363323036333334646566
|
||||
3439646635343533353432323064643135623532333738380a353866643461636233376432396434
|
||||
|
||||
9
inventories/production/host_vars/cal.yml
Normal file
9
inventories/production/host_vars/cal.yml
Normal file
@ -0,0 +1,9 @@
|
||||
---
|
||||
# Cal.com LXC 210 @ 10.0.10.228 — business / scheduling
|
||||
cal_public_url: https://cal.levkin.ca
|
||||
cal_saml_admins: idobkin@gmail.com
|
||||
cal_saml_db_name: calsaml
|
||||
cal_authentik_app_slug: cal-com
|
||||
cal_authentik_provider_name: cal-com-oidc
|
||||
cal_authentik_host: https://auth.levkin.ca
|
||||
cal_oidc_client_id: cal-com
|
||||
@ -1,16 +0,0 @@
|
||||
---
|
||||
# Host variables for dev02
|
||||
|
||||
# Use ladmin user with sudo to become root
|
||||
ansible_become: true
|
||||
ansible_become_method: sudo
|
||||
ansible_become_password: "{{ vault_dev02_become_password }}"
|
||||
|
||||
# Configure shell for ladmin
|
||||
shell_users:
|
||||
- ladmin
|
||||
|
||||
# Skip data science stack
|
||||
install_conda: false
|
||||
install_jupyter: false
|
||||
install_r: false
|
||||
@ -1,8 +1,22 @@
|
||||
---
|
||||
# Configure sudo path for git-ci-01
|
||||
# Sudo may not be in PATH for non-interactive shells
|
||||
# git-ci-01 — Gitea Actions runner (VM 115 on pve201 @ 10.0.10.223)
|
||||
ansible_become_exe: /usr/bin/sudo
|
||||
ansible_become_method: sudo
|
||||
|
||||
# Alternative: if sudo is in a different location, update this
|
||||
# ansible_become_exe: /usr/local/bin/sudo
|
||||
# Proxmox (manual / qm): VMID 115, 2 cores, 4096 MB RAM, 64 GB disk (scsi0)
|
||||
# act_runner: /etc/act_runner/config.yaml — capacity 2, force_pull false
|
||||
# Maintenance: /etc/cron.weekly/docker-prune-ci (docker system prune -af --filter until=168h)
|
||||
#
|
||||
# Capacity notes (2026-05-23):
|
||||
# - pve201: VM 104 reduced to 64 GiB (2026-05-23); still tight — consider runner on pve10
|
||||
# - capacity 3 needs ~8–12 GB RAM on this VM → migrate runner to pve10 or add RAM after freeing pve201
|
||||
# - 12 repos: capacity 2 on one runner is OK; second runner on pve10 if queues stack up
|
||||
|
||||
git_ci_runner_capacity: 2
|
||||
git_ci_disk_gb: 64
|
||||
git_ci_proxmox_vmid: 115
|
||||
git_ci_proxmox_node: pve201
|
||||
|
||||
maintenance_cron_enable_system: true
|
||||
maintenance_cron_enable_docker: true
|
||||
maintenance_cron_docker_script: /etc/cron.weekly/docker-prune-ci
|
||||
|
||||
@ -0,0 +1,7 @@
|
||||
---
|
||||
# giteaVM — Gitea on Alpine (Proxmox VM 102 @ 10.0.10.169)
|
||||
# Alpine uses /etc/periodic/weekly (not cron.weekly); no apt for system-maintenance.
|
||||
maintenance_cron_enable_system: false
|
||||
maintenance_cron_enable_docker: false
|
||||
maintenance_cron_enable_gitea_archive: true
|
||||
maintenance_cron_gitea_archive_script: /etc/periodic/weekly/gitea-archive-prune
|
||||
4
inventories/production/host_vars/hermes.yml
Normal file
4
inventories/production/host_vars/hermes.yml
Normal file
@ -0,0 +1,4 @@
|
||||
---
|
||||
# Hermes agent VM 117 @ 10.0.10.36 (user: hermes, admin: ladmin)
|
||||
# Secrets: vault_hermes_telegram_bot_token, mattermost in /home/hermes/.hermes/secrets/
|
||||
hermes_home: /home/hermes/.hermes
|
||||
3
inventories/production/host_vars/identity.yml
Normal file
3
inventories/production/host_vars/identity.yml
Normal file
@ -0,0 +1,3 @@
|
||||
---
|
||||
maintenance_cron_enable_system: true
|
||||
maintenance_cron_enable_docker: true
|
||||
@ -1,8 +1,3 @@
|
||||
---
|
||||
$ANSIBLE_VAULT;1.1;AES256
|
||||
31316663336338303832323464623866343366313261653536623233303466636630633235643638
|
||||
3666646431323061313836333233356162643462323763380a623666663062386337393439653134
|
||||
61616135353966333639323031643263646231636332613935353234363134356435646266343866
|
||||
3034653235393636350a626362333764313732646663653838313233326438646330393336346539
|
||||
30393364323237396633343133616439393563326161636366613965366161656364343939313334
|
||||
3430306634396361353238643735363430383433323431393230
|
||||
# listmonk VM on pve201 — plain vars; secrets in vault
|
||||
# Previous fully-encrypted host_vars file moved to listmonk.yml.vault-bak (broken for Ansible merge).
|
||||
|
||||
4
inventories/production/host_vars/localhost.yml
Normal file
4
inventories/production/host_vars/localhost.yml
Normal file
@ -0,0 +1,4 @@
|
||||
---
|
||||
# Control node (runs playbooks with connection: local).
|
||||
# Use project venv so API deps (proxmoxer, etc.) match `make bootstrap`.
|
||||
ansible_python_interpreter: "{{ inventory_dir }}/../../.venv/bin/python3"
|
||||
7
inventories/production/host_vars/mailcow.yml
Normal file
7
inventories/production/host_vars/mailcow.yml
Normal file
@ -0,0 +1,7 @@
|
||||
---
|
||||
# Mailcow VM 106 on pve201 (Mailcow-debian)
|
||||
# API/UI: https://mail.levkine.ca — domain levkine.ca (with e)
|
||||
# SSH: root only (no ladmin). First access: make copy-ssh-key-mailcow
|
||||
mailcow_url: "https://mail.levkine.ca"
|
||||
mailcow_domain: "levkine.ca"
|
||||
mailcow_alerts_user: "alerts"
|
||||
3
inventories/production/host_vars/monitoring.yml
Normal file
3
inventories/production/host_vars/monitoring.yml
Normal file
@ -0,0 +1,3 @@
|
||||
---
|
||||
maintenance_cron_enable_system: true
|
||||
maintenance_cron_enable_docker: true
|
||||
@ -1,8 +1,8 @@
|
||||
---
|
||||
$ANSIBLE_VAULT;1.1;AES256
|
||||
35633833353965363964376161393730613065663236326239376562356231316166656131366263
|
||||
6263363436373965316339623139353830643062393165370a643138356561613537616431316534
|
||||
63386635363838626465396439303664316635633239653639646338393130666164653262316135
|
||||
3937376464303935620a343530333030643830383130646532613533336435383334373831343261
|
||||
37653138613132616165636132623037623033343265663734626536366361373130353139383634
|
||||
6664346538653965343263376538636336393164356434646264
|
||||
# vaultwarden VM 104 on pve10 @ 10.0.10.142 (ladmin + sudo)
|
||||
ansible_become: true
|
||||
ansible_become_method: sudo
|
||||
ansible_become_password: "{{ vault_vaultwarden_become_password }}"
|
||||
|
||||
maintenance_cron_enable_system: true
|
||||
maintenance_cron_enable_docker: true
|
||||
|
||||
3
inventories/production/host_vars/vikunja.yml
Normal file
3
inventories/production/host_vars/vikunja.yml
Normal file
@ -0,0 +1,3 @@
|
||||
---
|
||||
maintenance_cron_enable_system: true
|
||||
maintenance_cron_enable_docker: true
|
||||
@ -2,16 +2,27 @@
|
||||
# Primary IPs: Tailscale (100.x.x.x) for remote access
|
||||
# Fallback IPs: Local network (10.0.x.x) when Tailscale is down
|
||||
# Usage: ansible_host_fallback is available for manual fallback
|
||||
# Public URLs: levkin.ca DNS A records → Caddy (142.180.237.136), except home → 100.100.100.100
|
||||
#
|
||||
# NOTE: Proxmox app projects (dev/qa/prod) are provisioned dynamically via
|
||||
# `playbooks/app/site.yml` (it uses `add_host` based on `app_projects`).
|
||||
# You generally do NOT need to add project hosts here.
|
||||
|
||||
[proxmox]
|
||||
pve201 ansible_host=10.0.10.201 ansible_user=root
|
||||
pve10 ansible_host=10.0.10.10 ansible_user=root
|
||||
|
||||
[sites]
|
||||
levkin ansible_host=10.0.10.60 ansible_user=root url=https://levkin.ca proxmox_vmid=220 proxmox_node=PVENAS
|
||||
caseware ansible_host=10.0.10.105 ansible_user=root url=https://caseware.levkin.ca proxmox_vmid=215 proxmox_node=PVENAS
|
||||
auto ansible_host=10.0.10.59 ansible_user=root url=https://auto.levkin.ca proxmox_vmid=216 proxmox_node=PVENAS
|
||||
portfolio ansible_host=10.0.10.106 ansible_user=root url=https://iliadobkin.com proxmox_vmid=219 proxmox_node=PVENAS
|
||||
|
||||
[dev]
|
||||
dev01 ansible_host=10.0.30.105 ansible_user=ladmin
|
||||
bottom ansible_host=10.0.10.156 ansible_user=beast
|
||||
debianDesktopVM ansible_host=10.0.10.206 ansible_user=user skip_reboot=true
|
||||
devGPU ansible_host=10.0.30.63 ansible_user=root
|
||||
devGPU ansible_host=10.0.10.122 ansible_user=root proxmox_vmid=104 proxmox_node=pve201 # GPU-Dev-Debian, Ollama + RTX 4080
|
||||
|
||||
[qa]
|
||||
git-ci-01 ansible_host=10.0.10.223 ansible_user=ladmin
|
||||
@ -22,25 +33,33 @@ KrakenMint ansible_host=10.0.10.120 ansible_user=ladmin
|
||||
[ansible]
|
||||
ansibleVM ansible_host=10.0.10.157 ansible_user=master
|
||||
|
||||
[tailscale]
|
||||
tailscaleVM ansible_host=100.66.218.53 ansible_user=ladmin
|
||||
[comms]
|
||||
# pve201 — email + newsletters
|
||||
mailcow ansible_host=10.0.10.132 ansible_user=root url=https://mail.levkine.ca proxmox_vmid=106 proxmox_node=pve201
|
||||
listmonk ansible_host=10.0.10.148 ansible_user=root url=https://listmonk.levkin.ca proxmox_node=pve201
|
||||
|
||||
[services]
|
||||
caddy ansible_host=10.0.10.50 ansible_user=root
|
||||
jellyfin ansible_host=10.0.10.232 ansible_user=root
|
||||
listmonk ansible_host=10.0.10.148 ansible_user=root
|
||||
nextcloud ansible_host=10.0.10.25 ansible_user=root
|
||||
actual ansible_host=10.0.10.158 ansible_user=root
|
||||
vikanjans ansible_host=10.0.10.159 ansible_user=root
|
||||
n8n ansible_host=10.0.10.154 ansible_user=root
|
||||
giteaVM ansible_host=10.0.10.169 ansible_user=root
|
||||
portainerVM ansible_host=10.0.30.69 ansible_user=ladmin
|
||||
homepageVM ansible_host=10.0.30.12 ansible_user=homepage
|
||||
vaultwardenVM ansible_host=10.0.10.142 ansible_user=ladmin
|
||||
# VMID 117: on PVENAS (pve10)
|
||||
hermes ansible_host=10.0.10.36 ansible_user=ladmin url=https://hermes.levkin.ca proxmox_vmid=117 proxmox_node=PVENAS
|
||||
caddy ansible_host=10.0.10.50 ansible_user=root proxmox_vmid=106 proxmox_node=PVENAS
|
||||
cal ansible_host=10.0.10.228 ansible_user=root url=https://cal.levkin.ca proxmox_vmid=210 proxmox_node=PVENAS
|
||||
identity ansible_host=10.0.10.21 ansible_user=root url=https://auth.levkin.ca proxmox_vmid=217 proxmox_node=PVENAS
|
||||
monitoring ansible_host=10.0.10.22 ansible_user=root url=http://10.0.10.22:3001 proxmox_vmid=218 proxmox_node=PVENAS uptime_kuma_port=3001 dockge_port=5001 umami_port=3000
|
||||
giteaVM ansible_host=10.0.10.169 ansible_user=root url=https://git.levkin.ca proxmox_vmid=102 proxmox_node=PVENAS
|
||||
n8n ansible_host=10.0.10.154 ansible_user=root url=https://n8n.levkin.ca proxmox_vmid=103 proxmox_node=PVENAS
|
||||
vaultwardenVM ansible_host=10.0.10.142 ansible_user=ladmin url=https://vault.levkin.ca proxmox_vmid=104 proxmox_node=PVENAS
|
||||
actual ansible_host=10.0.10.158 ansible_user=root url=https://budget.levkin.ca proxmox_vmid=108 proxmox_node=PVENAS
|
||||
vikunja ansible_host=10.0.10.159 ansible_user=root url=https://todo.levkin.ca proxmox_vmid=301 proxmox_node=pve201
|
||||
qBittorrent ansible_host=10.0.10.91 ansible_user=root port=8080
|
||||
jellyfin ansible_host=10.0.10.232 ansible_user=root url=https://jelly.levkin.ca proxmox_vmid=101 proxmox_node=PVENAS # stopped until NAS pool healthy
|
||||
|
||||
[desktop]
|
||||
desktop-beast ansible_host=100.117.34.106 ansible_user=beast
|
||||
# Retired / stopped — kept for reference; do not run playbooks against these without intent
|
||||
# nextcloud ansible_host=10.0.10.24 ansible_user=root url=https://nextcloud.levkin.ca # VM 201 decommission
|
||||
# portainerVM ansible_host=10.0.30.69 ansible_user=ladmin # retired → Dockge on monitoring
|
||||
# homepageVM ansible_host=10.0.30.12 ansible_user=homepage # VM 100 stopped on pve10
|
||||
|
||||
#[desktop]
|
||||
#desktop-beast ansible_host=100.117.34.106 ansible_user=beast
|
||||
|
||||
[local]
|
||||
localhost ansible_connection=local
|
||||
|
||||
53
playbooks/caddy-auth-authentik.yml
Normal file
53
playbooks/caddy-auth-authentik.yml
Normal file
@ -0,0 +1,53 @@
|
||||
---
|
||||
# Playbook: caddy-auth-authentik
|
||||
# Purpose: Add auth.levkin.ca reverse proxy to Caddy (Phase 1 Authentik)
|
||||
# Targets: caddy
|
||||
# Usage: make -f Makefile caddy-auth OR ansible-playbook playbooks/caddy-auth-authentik.yml
|
||||
|
||||
- name: Add Authentik proxy block to Caddy
|
||||
hosts: caddy
|
||||
become: true
|
||||
become_method: ansible.builtin.su
|
||||
|
||||
tasks:
|
||||
- name: Ensure auth.levkin.ca HTTPS block exists (after cal block)
|
||||
ansible.builtin.shell: |
|
||||
set -euo pipefail
|
||||
if grep -q '^auth\.levkin\.ca {' /etc/caddy/Caddyfile; then
|
||||
exit 0
|
||||
fi
|
||||
awk '
|
||||
/^cal\.levkin\.ca \{/ { in_cal=1 }
|
||||
in_cal && /^}$/ && !done {
|
||||
print
|
||||
print ""
|
||||
print "auth.levkin.ca {"
|
||||
print " import security-headers"
|
||||
print " encode gzip"
|
||||
print " reverse_proxy 10.0.10.21:9000"
|
||||
print "}"
|
||||
done=1
|
||||
next
|
||||
}
|
||||
{ print }
|
||||
' /etc/caddy/Caddyfile > /tmp/Caddyfile.new
|
||||
mv /tmp/Caddyfile.new /etc/caddy/Caddyfile
|
||||
args:
|
||||
executable: /bin/bash
|
||||
changed_when: true
|
||||
notify: Reload caddy
|
||||
|
||||
- name: Ensure auth.levkin.ca HTTP redirect in :80 block
|
||||
ansible.builtin.blockinfile:
|
||||
path: /etc/caddy/Caddyfile
|
||||
marker: "# {mark} ANSIBLE MANAGED auth.levkin.ca :80"
|
||||
insertafter: '@vault host vault.levkin.ca'
|
||||
block: |
|
||||
@auth host auth.levkin.ca
|
||||
redir @auth https://auth.levkin.ca{uri} permanent
|
||||
notify: Reload caddy
|
||||
|
||||
handlers:
|
||||
- name: Reload caddy
|
||||
ansible.builtin.command: caddy reload --config /etc/caddy/Caddyfile
|
||||
changed_when: true
|
||||
55
playbooks/caddy-levkin-site.yml
Normal file
55
playbooks/caddy-levkin-site.yml
Normal file
@ -0,0 +1,55 @@
|
||||
---
|
||||
# Playbook: caddy-levkin-site
|
||||
# Purpose: Add levkin.ca reverse proxy to Caddy (site LXC 220)
|
||||
# Targets: caddy
|
||||
# Usage: make caddy-levkin
|
||||
|
||||
- name: Add levkin.ca proxy block to Caddy
|
||||
hosts: caddy
|
||||
become: true
|
||||
become_method: ansible.builtin.su
|
||||
|
||||
tasks:
|
||||
- name: Ensure levkin.ca HTTPS block exists (after caseware block)
|
||||
ansible.builtin.shell: |
|
||||
set -euo pipefail
|
||||
if grep -q '^levkin\.ca,' /etc/caddy/Caddyfile || grep -q '^levkin\.ca {' /etc/caddy/Caddyfile; then
|
||||
exit 0
|
||||
fi
|
||||
awk -v upstream="{{ levkin_site_upstream | default('10.0.10.60:80') }}" '
|
||||
/^caseware\.levkin\.ca \{/ { in_cw=1 }
|
||||
in_cw && /^}$/ && !done {
|
||||
print
|
||||
print ""
|
||||
print "levkin.ca, www.levkin.ca {"
|
||||
print " import security-headers"
|
||||
print " @www host www.levkin.ca"
|
||||
print " redir @www https://levkin.ca{uri} permanent"
|
||||
print " reverse_proxy " upstream
|
||||
print "}"
|
||||
done=1
|
||||
next
|
||||
}
|
||||
{ print }
|
||||
' /etc/caddy/Caddyfile > /tmp/Caddyfile.new
|
||||
mv /tmp/Caddyfile.new /etc/caddy/Caddyfile
|
||||
args:
|
||||
executable: /bin/bash
|
||||
register: levkin_https_block
|
||||
changed_when: levkin_https_block.rc == 0
|
||||
notify: Reload caddy
|
||||
|
||||
- name: Ensure levkin.ca HTTP redirect in :80 block
|
||||
ansible.builtin.blockinfile:
|
||||
path: /etc/caddy/Caddyfile
|
||||
marker: "# {mark} ANSIBLE MANAGED levkin.ca :80"
|
||||
insertafter: '@vikunja host todo.levkin.ca'
|
||||
block: |
|
||||
@levkin host levkin.ca www.levkin.ca
|
||||
redir @levkin https://levkin.ca{uri} permanent
|
||||
notify: Reload caddy
|
||||
|
||||
handlers:
|
||||
- name: Reload caddy
|
||||
ansible.builtin.command: caddy reload --config /etc/caddy/Caddyfile
|
||||
changed_when: true
|
||||
79
playbooks/cal-authentik-oidc.yml
Normal file
79
playbooks/cal-authentik-oidc.yml
Normal file
@ -0,0 +1,79 @@
|
||||
---
|
||||
# Playbook: cal-authentik-oidc
|
||||
# Purpose: Enable Cal.com SSO (SAML DB + license env) and Authentik OIDC provider
|
||||
# Targets: cal (LXC 210), identity (LXC 217)
|
||||
# Usage: make cal-oidc
|
||||
# Manual: https://cal.levkin.ca/settings/security/sso — enter Client ID, Secret, Well Known URL
|
||||
|
||||
- name: Prepare OIDC client secret
|
||||
hosts: localhost
|
||||
gather_facts: false
|
||||
tasks:
|
||||
- name: Use vault OIDC secret or generate one for this run
|
||||
ansible.builtin.set_fact:
|
||||
cal_oidc_client_secret_effective: >-
|
||||
{{ vault_cal_oidc_client_secret
|
||||
| default(lookup('password', '/dev/null length=48 chars=ascii_letters,digits')) }}
|
||||
no_log: true
|
||||
|
||||
- name: Remind to persist generated secret in vault
|
||||
ansible.builtin.debug:
|
||||
msg: >-
|
||||
vault_cal_oidc_client_secret was not set — generated for this run only.
|
||||
Add it to vault.yml and re-run so Authentik and Cal stay in sync.
|
||||
when: vault_cal_oidc_client_secret is not defined or vault_cal_oidc_client_secret | length == 0
|
||||
|
||||
- name: Cal.com — SAML database and compose SSO env
|
||||
hosts: cal
|
||||
become: true
|
||||
vars:
|
||||
vault_cal_oidc_client_secret: "{{ hostvars['localhost']['cal_oidc_client_secret_effective'] }}"
|
||||
pre_tasks:
|
||||
- name: Load Cal Postgres credentials from .env
|
||||
ansible.builtin.shell: |
|
||||
set -a
|
||||
source {{ cal_compose_dir }}/.env
|
||||
printf 'user=%s\npass=%s\n' "$POSTGRES_USER" "$POSTGRES_PASSWORD"
|
||||
args:
|
||||
executable: /bin/bash
|
||||
register: cal_pg_creds
|
||||
changed_when: false
|
||||
no_log: true
|
||||
|
||||
- name: Set Cal database facts
|
||||
ansible.builtin.set_fact:
|
||||
cal_postgres_user: "{{ cal_pg_creds.stdout_lines[0] | regex_replace('^user=', '') }}"
|
||||
cal_postgres_password: "{{ cal_pg_creds.stdout_lines[1] | regex_replace('^pass=', '') }}"
|
||||
cal_saml_database_url: >-
|
||||
postgresql://{{ cal_pg_creds.stdout_lines[0] | regex_replace('^user=', '') }}:{{
|
||||
cal_pg_creds.stdout_lines[1] | regex_replace('^pass=', '') }}@db:5432/{{ cal_saml_db_name }}
|
||||
no_log: true
|
||||
|
||||
roles:
|
||||
- role: cal_sso
|
||||
|
||||
- name: Authentik — Cal.com OIDC provider
|
||||
hosts: identity
|
||||
become: true
|
||||
vars:
|
||||
vault_cal_oidc_client_secret: "{{ hostvars['localhost']['cal_oidc_client_secret_effective'] }}"
|
||||
tasks:
|
||||
- name: Authentik OIDC for Cal.com
|
||||
ansible.builtin.import_role:
|
||||
name: cal_sso
|
||||
tasks_from: authentik.yml
|
||||
|
||||
- name: Cal.com OIDC — finish in UI
|
||||
hosts: cal
|
||||
gather_facts: false
|
||||
tasks:
|
||||
- name: Print Cal.com SSO configuration values
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
- "1. Log in to Cal as {{ cal_saml_admins }}"
|
||||
- "2. Open {{ cal_public_url }}/settings/security/sso"
|
||||
- "3. Configure OIDC:"
|
||||
- " Client ID: {{ cal_oidc_client_id }}"
|
||||
- " Client Secret: (vault_cal_oidc_client_secret — see vault)"
|
||||
- " Well Known URL: {{ cal_authentik_host }}/application/o/{{ cal_authentik_app_slug }}/.well-known/openid-configuration"
|
||||
- "4. Test SSO login; keep local password as break-glass"
|
||||
@ -24,6 +24,7 @@
|
||||
|
||||
roles:
|
||||
- {role: maintenance, tags: ['maintenance']}
|
||||
- {role: maintenance_cron, tags: ['maintenance', 'maintenance_cron']}
|
||||
|
||||
post_tasks:
|
||||
- name: Display maintenance completion
|
||||
|
||||
20
playbooks/ssh-keys.yml
Normal file
20
playbooks/ssh-keys.yml
Normal file
@ -0,0 +1,20 @@
|
||||
---
|
||||
# Playbook: ssh-keys
|
||||
# Purpose: Install your workstation SSH public key on all inventory hosts
|
||||
# Targets: all hosts except localhost
|
||||
# Usage: make copy-ssh-keys-ansible
|
||||
# make copy-ssh-keys-ansible GROUP=services
|
||||
# make copy-ssh-keys-ansible HOST=dev01
|
||||
|
||||
- name: Deploy workstation SSH public key
|
||||
hosts: all:!local
|
||||
gather_facts: false
|
||||
vars:
|
||||
ssh_public_key_file: "{{ lookup('env', 'SSH_PUBLIC_KEY') | default(lookup('env', 'HOME') + '/.ssh/id_ed25519.pub', true) }}"
|
||||
tasks:
|
||||
- name: Add SSH public key for ansible_user
|
||||
ansible.posix.authorized_key:
|
||||
user: "{{ ansible_user | default(ansible_user_id) }}"
|
||||
state: present
|
||||
key: "{{ lookup('file', ssh_public_key_file) }}"
|
||||
become: false
|
||||
10
roles/cal_sso/defaults/main.yml
Normal file
10
roles/cal_sso/defaults/main.yml
Normal file
@ -0,0 +1,10 @@
|
||||
---
|
||||
cal_compose_dir: /opt/cal
|
||||
cal_saml_db_name: calsaml
|
||||
cal_saml_admins: idobkin@gmail.com
|
||||
cal_public_url: https://cal.levkin.ca
|
||||
cal_authentik_app_slug: cal-com
|
||||
cal_authentik_provider_name: cal-com-oidc
|
||||
cal_authentik_host: https://auth.levkin.ca
|
||||
# Set in vault: vault_cal_oidc_client_secret (generated on first run if absent)
|
||||
cal_oidc_client_id: "{{ cal_authentik_app_slug }}"
|
||||
20
roles/cal_sso/handlers/main.yml
Normal file
20
roles/cal_sso/handlers/main.yml
Normal file
@ -0,0 +1,20 @@
|
||||
---
|
||||
- name: Recreate calcom stack
|
||||
ansible.builtin.command:
|
||||
cmd: docker compose up -d
|
||||
chdir: "{{ cal_compose_dir }}"
|
||||
changed_when: true
|
||||
|
||||
- name: Recreate authentik server
|
||||
ansible.builtin.command:
|
||||
cmd: docker compose up -d server worker
|
||||
chdir: /opt/authentik
|
||||
changed_when: true
|
||||
|
||||
- name: Apply authentik cal blueprint
|
||||
ansible.builtin.command:
|
||||
cmd: >-
|
||||
docker compose exec -T server
|
||||
ak apply_blueprint {{ cal_authentik_app_slug }}-oidc.yaml
|
||||
chdir: /opt/authentik
|
||||
changed_when: true
|
||||
25
roles/cal_sso/tasks/authentik.yml
Normal file
25
roles/cal_sso/tasks/authentik.yml
Normal file
@ -0,0 +1,25 @@
|
||||
---
|
||||
- name: Ensure Authentik blueprints directory on host
|
||||
ansible.builtin.file:
|
||||
path: /opt/authentik/blueprints
|
||||
state: directory
|
||||
mode: "0755"
|
||||
|
||||
- name: Add blueprints volume to Authentik server service
|
||||
ansible.builtin.replace:
|
||||
path: /opt/authentik/compose.yml
|
||||
regexp: '(?ms)( server:.*? volumes:\n - \./data:/data\n)( - \./custom-templates:/templates)'
|
||||
replace: '\1 - ./blueprints:/blueprints\n\2'
|
||||
notify:
|
||||
- Recreate authentik server
|
||||
- Apply authentik cal blueprint
|
||||
|
||||
- name: Deploy Cal.com OIDC blueprint
|
||||
ansible.builtin.template:
|
||||
src: authentik-cal-oidc.yaml.j2
|
||||
dest: "/opt/authentik/blueprints/{{ cal_authentik_app_slug }}-oidc.yaml"
|
||||
mode: "0644"
|
||||
notify: Apply authentik cal blueprint
|
||||
|
||||
- name: Flush Authentik blueprint handler
|
||||
ansible.builtin.meta: flush_handlers
|
||||
52
roles/cal_sso/tasks/main.yml
Normal file
52
roles/cal_sso/tasks/main.yml
Normal file
@ -0,0 +1,52 @@
|
||||
---
|
||||
- name: Ensure SAML database exists on Cal Postgres
|
||||
ansible.builtin.command:
|
||||
cmd: >-
|
||||
docker exec cal-db psql -U {{ cal_postgres_user }} -tc
|
||||
"SELECT 1 FROM pg_database WHERE datname='{{ cal_saml_db_name }}'"
|
||||
register: cal_saml_db_check
|
||||
changed_when: false
|
||||
failed_when: cal_saml_db_check.rc != 0
|
||||
|
||||
- name: Create SAML database
|
||||
ansible.builtin.command:
|
||||
cmd: >-
|
||||
docker exec cal-db psql -U {{ cal_postgres_user }} -c
|
||||
"CREATE DATABASE {{ cal_saml_db_name }}"
|
||||
when: cal_saml_db_check.stdout | trim != "1"
|
||||
changed_when: true
|
||||
|
||||
- name: Deploy docker-compose with SSO environment
|
||||
ansible.builtin.template:
|
||||
src: docker-compose.yml.j2
|
||||
dest: "{{ cal_compose_dir }}/docker-compose.yml"
|
||||
owner: root
|
||||
group: root
|
||||
mode: "0644"
|
||||
notify: Recreate calcom stack
|
||||
|
||||
- name: Ensure SAML env vars in Cal .env
|
||||
ansible.builtin.lineinfile:
|
||||
path: "{{ cal_compose_dir }}/.env"
|
||||
regexp: "^{{ item.key }}="
|
||||
line: "{{ item.key }}={{ item.value }}"
|
||||
create: false
|
||||
no_log: true
|
||||
loop:
|
||||
- key: SAML_DATABASE_URL
|
||||
value: "{{ cal_saml_database_url }}"
|
||||
- key: SAML_ADMINS
|
||||
value: "{{ cal_saml_admins }}"
|
||||
notify: Recreate calcom stack
|
||||
|
||||
- name: Flush handlers before OIDC UI step
|
||||
ansible.builtin.meta: flush_handlers
|
||||
|
||||
- name: Wait for Cal.com HTTP after stack recreate
|
||||
ansible.builtin.uri:
|
||||
url: "{{ cal_public_url }}/api/version"
|
||||
status_code: [200, 404]
|
||||
register: cal_http
|
||||
retries: 12
|
||||
delay: 10
|
||||
until: cal_http.status in [200, 404]
|
||||
38
roles/cal_sso/templates/authentik-cal-oidc.yaml.j2
Normal file
38
roles/cal_sso/templates/authentik-cal-oidc.yaml.j2
Normal file
@ -0,0 +1,38 @@
|
||||
# Cal.com OIDC provider + application (managed by Ansible)
|
||||
version: 1
|
||||
metadata:
|
||||
name: Cal.com OIDC
|
||||
labels:
|
||||
blueprints.goauthentik.io/instantiate: "true"
|
||||
entries:
|
||||
- model: authentik_providers_oauth2.oauth2provider
|
||||
id: cal-oidc-provider
|
||||
identifiers:
|
||||
name: {{ cal_authentik_provider_name }}
|
||||
attrs:
|
||||
name: {{ cal_authentik_provider_name }}
|
||||
authorization_flow: !Find [authentik_flows.flow, [slug, default-provider-authorization-implicit-consent]]
|
||||
invalidation_flow: !Find [authentik_flows.flow, [slug, default-provider-invalidation-flow]]
|
||||
client_type: confidential
|
||||
client_id: {{ cal_oidc_client_id }}
|
||||
client_secret: {{ vault_cal_oidc_client_secret }}
|
||||
redirect_uris:
|
||||
- matching_mode: strict
|
||||
url: {{ cal_public_url }}/api/auth/oidc
|
||||
signing_key: !Find [authentik_crypto.certificatekeypair, [name, authentik Self-signed Certificate]]
|
||||
property_mappings:
|
||||
- !Find [authentik_providers_oauth2.scopemapping, [scope_name, openid]]
|
||||
- !Find [authentik_providers_oauth2.scopemapping, [scope_name, email]]
|
||||
- !Find [authentik_providers_oauth2.scopemapping, [scope_name, profile]]
|
||||
- model: authentik_core.application
|
||||
id: cal-oidc-app
|
||||
identifiers:
|
||||
slug: {{ cal_authentik_app_slug }}
|
||||
attrs:
|
||||
name: Cal.com
|
||||
slug: {{ cal_authentik_app_slug }}
|
||||
group: ""
|
||||
provider: !KeyOf cal-oidc-provider
|
||||
policy_engine_mode: any
|
||||
meta_launch_url: {{ cal_public_url }}
|
||||
meta_icon: https://cal.com/favicon.ico
|
||||
44
roles/cal_sso/templates/docker-compose.yml.j2
Normal file
44
roles/cal_sso/templates/docker-compose.yml.j2
Normal file
@ -0,0 +1,44 @@
|
||||
services:
|
||||
db:
|
||||
image: postgres:15
|
||||
container_name: cal-db
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
POSTGRES_USER: ${POSTGRES_USER}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
POSTGRES_DB: ${POSTGRES_DB}
|
||||
volumes:
|
||||
- ./postgres-data:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U $${POSTGRES_USER} -d $${POSTGRES_DB}"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
calcom:
|
||||
image: calcom/cal.com:latest
|
||||
container_name: calcom
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
DATABASE_URL: ${DATABASE_URL}
|
||||
DATABASE_DIRECT_URL: ${DATABASE_DIRECT_URL}
|
||||
NEXT_PUBLIC_WEBAPP_URL: ${NEXT_PUBLIC_WEBAPP_URL}
|
||||
NEXT_PUBLIC_API_V2_URL: ${NEXT_PUBLIC_API_V2_URL}
|
||||
NEXTAUTH_URL: ${NEXTAUTH_URL}
|
||||
NEXTAUTH_SECRET: ${NEXTAUTH_SECRET}
|
||||
CALENDSO_ENCRYPTION_KEY: ${CALENDSO_ENCRYPTION_KEY}
|
||||
CALCOM_LICENSE_KEY: ${CALCOM_LICENSE_KEY}
|
||||
NEXT_PUBLIC_LICENSE_CONSENT: ${NEXT_PUBLIC_LICENSE_CONSENT}
|
||||
SAML_DATABASE_URL: ${SAML_DATABASE_URL}
|
||||
SAML_ADMINS: ${SAML_ADMINS}
|
||||
EMAIL_FROM: ${EMAIL_FROM}
|
||||
EMAIL_SERVER_HOST: ${EMAIL_SERVER_HOST}
|
||||
EMAIL_SERVER_PORT: ${EMAIL_SERVER_PORT}
|
||||
EMAIL_SERVER_USER: ${EMAIL_SERVER_USER}
|
||||
EMAIL_SERVER_PASSWORD: ${EMAIL_SERVER_PASSWORD}
|
||||
CALCOM_TELEMETRY_DISABLED: ${CALCOM_TELEMETRY_DISABLED}
|
||||
ports:
|
||||
- "3000:3000"
|
||||
23
roles/maintenance_cron/README.md
Normal file
23
roles/maintenance_cron/README.md
Normal file
@ -0,0 +1,23 @@
|
||||
# maintenance_cron
|
||||
|
||||
Weekly cleanup jobs for production hosts.
|
||||
|
||||
## Scripts
|
||||
|
||||
| Script | Schedule | Purpose |
|
||||
|--------|----------|---------|
|
||||
| `system-maintenance` | `/etc/cron.weekly/` | `journalctl --vacuum-size=500M`, `apt autoremove`, `apt autoclean` |
|
||||
| `docker-prune` | `/etc/cron.weekly/` | `docker system prune -af --filter until=168h` |
|
||||
| `gitea-archive-prune` | `/etc/cron.weekly/` | Delete Gitea `repo-archive` files older than 7 days |
|
||||
|
||||
## Variables
|
||||
|
||||
See `defaults/main.yml`. Enable per host or group:
|
||||
|
||||
```yaml
|
||||
maintenance_cron_enable_system: true
|
||||
maintenance_cron_enable_docker: true # Docker hosts only
|
||||
maintenance_cron_enable_gitea_archive: true # giteaVM only
|
||||
```
|
||||
|
||||
Applied via `playbooks/maintenance.yml` (tag `maintenance_cron`).
|
||||
18
roles/maintenance_cron/defaults/main.yml
Normal file
18
roles/maintenance_cron/defaults/main.yml
Normal file
@ -0,0 +1,18 @@
|
||||
---
|
||||
# Weekly system cleanup (journal + apt)
|
||||
maintenance_cron_enable_system: true
|
||||
maintenance_cron_journal_vacuum_size: 500M
|
||||
maintenance_cron_system_script: /etc/cron.weekly/system-maintenance
|
||||
|
||||
# Docker prune (CI / Docker hosts)
|
||||
maintenance_cron_enable_docker: false
|
||||
maintenance_cron_docker_prune_until: 168h
|
||||
maintenance_cron_docker_script: /etc/cron.weekly/docker-prune
|
||||
maintenance_cron_docker_log: /var/log/docker-prune.log
|
||||
|
||||
# Gitea repo-archive cache (Alpine Gitea VM)
|
||||
maintenance_cron_enable_gitea_archive: false
|
||||
maintenance_cron_gitea_archive_dir: /var/lib/gitea/data/repo-archive
|
||||
maintenance_cron_gitea_archive_max_age_days: 7
|
||||
maintenance_cron_gitea_archive_script: /etc/cron.weekly/gitea-archive-prune
|
||||
maintenance_cron_gitea_archive_log: /var/log/gitea-archive-prune.log
|
||||
27
roles/maintenance_cron/tasks/main.yml
Normal file
27
roles/maintenance_cron/tasks/main.yml
Normal file
@ -0,0 +1,27 @@
|
||||
---
|
||||
- name: Install weekly system maintenance script
|
||||
ansible.builtin.template:
|
||||
src: system-maintenance.sh.j2
|
||||
dest: "{{ maintenance_cron_system_script }}"
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0755'
|
||||
when: maintenance_cron_enable_system | bool
|
||||
|
||||
- name: Install weekly Docker prune script
|
||||
ansible.builtin.template:
|
||||
src: docker-prune.sh.j2
|
||||
dest: "{{ maintenance_cron_docker_script }}"
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0755'
|
||||
when: maintenance_cron_enable_docker | bool
|
||||
|
||||
- name: Install weekly Gitea archive prune script
|
||||
ansible.builtin.template:
|
||||
src: gitea-archive-prune.sh.j2
|
||||
dest: "{{ maintenance_cron_gitea_archive_script }}"
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0755'
|
||||
when: maintenance_cron_enable_gitea_archive | bool
|
||||
8
roles/maintenance_cron/templates/docker-prune.sh.j2
Normal file
8
roles/maintenance_cron/templates/docker-prune.sh.j2
Normal file
@ -0,0 +1,8 @@
|
||||
#!/bin/bash
|
||||
# Ansible managed — weekly Docker image/container cleanup
|
||||
set -euo pipefail
|
||||
if ! command -v docker >/dev/null 2>&1; then
|
||||
exit 0
|
||||
fi
|
||||
/usr/bin/docker system prune -af --filter "until={{ maintenance_cron_docker_prune_until }}" \
|
||||
>> "{{ maintenance_cron_docker_log }}" 2>&1
|
||||
19
roles/maintenance_cron/templates/gitea-archive-prune.sh.j2
Normal file
19
roles/maintenance_cron/templates/gitea-archive-prune.sh.j2
Normal file
@ -0,0 +1,19 @@
|
||||
#!/bin/sh
|
||||
# Ansible managed — weekly Gitea repo-archive cache cleanup
|
||||
set -eu
|
||||
ARCHIVE_DIR="{{ maintenance_cron_gitea_archive_dir }}"
|
||||
LOG="{{ maintenance_cron_gitea_archive_log }}"
|
||||
MAX_AGE_DAYS="{{ maintenance_cron_gitea_archive_max_age_days }}"
|
||||
|
||||
if [ ! -d "${ARCHIVE_DIR}" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
{
|
||||
echo "=== $(date -Iseconds) gitea-archive-prune ==="
|
||||
echo "Before: $(du -sh "${ARCHIVE_DIR}" 2>/dev/null | awk '{print $1}')"
|
||||
find "${ARCHIVE_DIR}" -type f -mtime "+${MAX_AGE_DAYS}" -delete
|
||||
find "${ARCHIVE_DIR}" -type d -empty -delete
|
||||
echo "After: $(du -sh "${ARCHIVE_DIR}" 2>/dev/null | awk '{print $1}')"
|
||||
df -h /
|
||||
} >> "${LOG}" 2>&1
|
||||
@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
# Ansible managed — weekly journal vacuum + apt cleanup
|
||||
set -euo pipefail
|
||||
journalctl --vacuum-size={{ maintenance_cron_journal_vacuum_size }} 2>/dev/null || true
|
||||
export DEBIAN_FRONTEND=noninteractive
|
||||
apt-get autoremove -y
|
||||
apt-get autoclean -y
|
||||
60
scripts/bootstrap-root-ssh-su-password.sh
Executable file
60
scripts/bootstrap-root-ssh-su-password.sh
Executable file
@ -0,0 +1,60 @@
|
||||
#!/usr/bin/env bash
|
||||
# Bootstrap root SSH when `su` needs a password (no sudo on host).
|
||||
# Usage: BOOTSTRAP_SU_PASSWORD='...' ./scripts/bootstrap-root-ssh-su-password.sh HOST
|
||||
set -euo pipefail
|
||||
|
||||
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
||||
HOST="${1:-}"
|
||||
BOOTSTRAP_USER="${BOOTSTRAP_USER:-ladmin}"
|
||||
PUBKEY_FILE="${SSH_PUBLIC_KEY:-${HOME}/.ssh/id_ed25519.pub}"
|
||||
SU_PASSWORD="${BOOTSTRAP_SU_PASSWORD:-}"
|
||||
|
||||
[[ -n "${HOST}" ]] || { echo "Usage: $0 HOST" >&2; exit 1; }
|
||||
[[ -n "${SU_PASSWORD}" ]] || { echo "Set BOOTSTRAP_SU_PASSWORD" >&2; exit 1; }
|
||||
[[ -f "${PUBKEY_FILE}" ]] || { echo "Missing ${PUBKEY_FILE}" >&2; exit 1; }
|
||||
|
||||
IP="$(awk -v h="${HOST}" '$1==h {for(i=2;i<=NF;i++) if($i~/^ansible_host=/) {sub(/ansible_host=/,"",$i); print $i; exit}}' \
|
||||
"${REPO_ROOT}/inventories/production/hosts")"
|
||||
[[ -n "${IP}" ]] || { echo "No ansible_host for ${HOST}" >&2; exit 1; }
|
||||
|
||||
PUBKEY="$(cat "${PUBKEY_FILE}")"
|
||||
export IP BOOTSTRAP_USER SU_PASSWORD PUBKEY
|
||||
|
||||
/usr/bin/expect <<'EXPECT'
|
||||
set timeout 60
|
||||
spawn ssh -o StrictHostKeyChecking=accept-new $env(BOOTSTRAP_USER)@$env(IP)
|
||||
expect {
|
||||
-re {[$#] $} { }
|
||||
timeout { exit 1 }
|
||||
}
|
||||
send "su -\r"
|
||||
expect {
|
||||
"Password:" {
|
||||
send "$env(SU_PASSWORD)\r"
|
||||
}
|
||||
timeout { exit 1 }
|
||||
}
|
||||
expect {
|
||||
-re {root@caddy|#|❯|[$#] $} { }
|
||||
timeout { exit 1 }
|
||||
}
|
||||
send "bash --noprofile --norc\r"
|
||||
expect {
|
||||
-re {# $} { }
|
||||
timeout { exit 1 }
|
||||
}
|
||||
send "mkdir -p /root/.ssh && chmod 700 /root/.ssh && touch /root/.ssh/authorized_keys && chmod 600 /root/.ssh/authorized_keys\r"
|
||||
expect -re {# $}
|
||||
send "grep -qF '$env(PUBKEY)' /root/.ssh/authorized_keys || echo '$env(PUBKEY)' >> /root/.ssh/authorized_keys\r"
|
||||
expect -re {# $}
|
||||
send "sed -i 's/^#*PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config 2>/dev/null || echo PermitRootLogin prohibit-password >> /etc/ssh/sshd_config\r"
|
||||
expect -re {# $}
|
||||
send "systemctl restart ssh 2>/dev/null || systemctl restart sshd 2>/dev/null || true\r"
|
||||
expect -re {# $}
|
||||
send "exit\r"
|
||||
expect eof
|
||||
EXPECT
|
||||
|
||||
ssh -o BatchMode=yes -i "${PUBKEY_FILE}" -o ConnectTimeout=10 \
|
||||
"root@${IP}" "echo OK: root@${IP}"
|
||||
echo "Done: root key on ${HOST}"
|
||||
103
scripts/bootstrap-root-ssh.sh
Executable file
103
scripts/bootstrap-root-ssh.sh
Executable file
@ -0,0 +1,103 @@
|
||||
#!/usr/bin/env bash
|
||||
# Bootstrap root SSH key access via a normal user (default: ladmin).
|
||||
# Usage: ./scripts/bootstrap-root-ssh.sh HOSTNAME
|
||||
# BOOTSTRAP_USER=ladmin TARGET_USER=root SSH_PUBLIC_KEY=~/.ssh/id_ed25519.pub
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
||||
INVENTORY_HOSTS="${INVENTORY_HOSTS:-${REPO_ROOT}/inventories/production/hosts}"
|
||||
PUBKEY_FILE="${SSH_PUBLIC_KEY:-${HOME}/.ssh/id_ed25519.pub}"
|
||||
BOOTSTRAP_USER="${BOOTSTRAP_USER:-ladmin}"
|
||||
TARGET_USER="${TARGET_USER:-root}"
|
||||
HOST="${1:-}"
|
||||
|
||||
if [[ -z "${HOST}" ]]; then
|
||||
echo "Usage: $0 HOST" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -f "${PUBKEY_FILE}" ]]; then
|
||||
echo "Public key not found: ${PUBKEY_FILE}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
resolve_from_inventory() {
|
||||
awk -v host="${HOST}" '
|
||||
$1 == host {
|
||||
for (i = 2; i <= NF; i++) {
|
||||
if ($i ~ /^ansible_host=/) {
|
||||
sub(/ansible_host=/, "", $i)
|
||||
ip = $i
|
||||
}
|
||||
if ($i ~ /^ansible_user=/) {
|
||||
sub(/ansible_user=/, "", $i)
|
||||
user = $i
|
||||
}
|
||||
}
|
||||
}
|
||||
END {
|
||||
print ip
|
||||
print user
|
||||
}
|
||||
' "${INVENTORY_HOSTS}"
|
||||
}
|
||||
|
||||
IP="$(resolve_from_inventory | sed -n '1p')"
|
||||
INV_USER="$(resolve_from_inventory | sed -n '2p')"
|
||||
|
||||
if [[ -z "${IP}" ]]; then
|
||||
echo "Could not resolve ansible_host for ${HOST} in ${INVENTORY_HOSTS}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "==> ${HOST} (${BOOTSTRAP_USER}@${IP} -> ${TARGET_USER})"
|
||||
echo " Inventory ansible_user: ${INV_USER:-<unset>}"
|
||||
echo " Public key: ${PUBKEY_FILE}"
|
||||
echo ""
|
||||
|
||||
echo "Step 1/3: Install key for ${BOOTSTRAP_USER} (password: ${BOOTSTRAP_USER})"
|
||||
ssh-copy-id -i "${PUBKEY_FILE}" -o StrictHostKeyChecking=accept-new \
|
||||
"${BOOTSTRAP_USER}@${IP}"
|
||||
|
||||
echo ""
|
||||
echo "Step 2/3: Copy key and configure ${TARGET_USER} via su (password: root)"
|
||||
REMOTE_KEY="/tmp/ansible-bootstrap.pub"
|
||||
scp -o StrictHostKeyChecking=accept-new "${PUBKEY_FILE}" \
|
||||
"${BOOTSTRAP_USER}@${IP}:${REMOTE_KEY}"
|
||||
|
||||
ssh -t "${BOOTSTRAP_USER}@${IP}" bash -s <<REMOTE_SCRIPT
|
||||
set -e
|
||||
REMOTE_KEY="${REMOTE_KEY}"
|
||||
su - root <<ROOT_SCRIPT
|
||||
set -e
|
||||
mkdir -p /root/.ssh
|
||||
chmod 700 /root/.ssh
|
||||
touch /root/.ssh/authorized_keys
|
||||
chmod 600 /root/.ssh/authorized_keys
|
||||
if ! grep -qF "\$(cat "\${REMOTE_KEY}")" /root/.ssh/authorized_keys 2>/dev/null; then
|
||||
cat "\${REMOTE_KEY}" >> /root/.ssh/authorized_keys
|
||||
fi
|
||||
rm -f "\${REMOTE_KEY}"
|
||||
if [ -f /etc/ssh/sshd_config ]; then
|
||||
if grep -q '^PermitRootLogin' /etc/ssh/sshd_config; then
|
||||
sed -i 's/^#*PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
|
||||
else
|
||||
echo 'PermitRootLogin prohibit-password' >> /etc/ssh/sshd_config
|
||||
fi
|
||||
systemctl restart ssh 2>/dev/null \
|
||||
|| systemctl restart sshd 2>/dev/null \
|
||||
|| service ssh restart 2>/dev/null \
|
||||
|| true
|
||||
fi
|
||||
echo "OK: root authorized_keys updated; PermitRootLogin prohibit-password"
|
||||
ROOT_SCRIPT
|
||||
REMOTE_SCRIPT
|
||||
|
||||
echo ""
|
||||
echo "Step 3/3: Verify ${TARGET_USER} key login"
|
||||
ssh -o BatchMode=yes -i "${PUBKEY_FILE}" -o StrictHostKeyChecking=accept-new \
|
||||
"${TARGET_USER}@${IP}" "echo OK: ${TARGET_USER}@${IP} accepts your SSH key"
|
||||
|
||||
echo ""
|
||||
echo "Done: ${HOST} — use: ssh -i ${PUBKEY_FILE} ${TARGET_USER}@${IP}"
|
||||
70
scripts/kuma-add-monitors.sh
Executable file
70
scripts/kuma-add-monitors.sh
Executable file
@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env bash
|
||||
# Add or update Uptime Kuma HTTP monitors via API.
|
||||
# Usage:
|
||||
# source <(./scripts/vault-export-env.sh) # or export KUMA_* manually
|
||||
# ./scripts/kuma-add-monitors.sh
|
||||
#
|
||||
# Monitors are idempotent: existing names are skipped.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
KUMA_URL="${KUMA_URL:-http://10.0.10.22:3001}"
|
||||
KUMA_USER="${KUMA_USER:-admin}"
|
||||
KUMA_PASSWORD="${KUMA_PASSWORD:-}"
|
||||
|
||||
if [[ -z "${KUMA_PASSWORD}" ]]; then
|
||||
if [[ -f "${REPO_ROOT}/.env" ]]; then
|
||||
# shellcheck disable=SC1091
|
||||
set -a
|
||||
source "${REPO_ROOT}/.env"
|
||||
set +a
|
||||
KUMA_PASSWORD="${KUMA_PASSWORD:-}"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ -z "${KUMA_PASSWORD}" ]]; then
|
||||
echo "Set KUMA_PASSWORD (or run vault-export-env.sh first)" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
export KUMA_URL KUMA_USER KUMA_PASSWORD
|
||||
|
||||
"${REPO_ROOT}/.venv/bin/python3" <<'PY'
|
||||
import os
|
||||
import sys
|
||||
|
||||
try:
|
||||
from uptime_kuma_api import UptimeKumaApi
|
||||
except ImportError:
|
||||
print("Run: .venv/bin/pip install uptime-kuma-api", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
MONITORS = [
|
||||
{
|
||||
"type": "http",
|
||||
"name": "Gitea",
|
||||
"url": "https://git.levkin.ca/user/login",
|
||||
"interval": 60,
|
||||
"retryInterval": 60,
|
||||
"maxretries": 3,
|
||||
"accepted_statuscodes": ["200-299"],
|
||||
},
|
||||
]
|
||||
|
||||
url = os.environ["KUMA_URL"]
|
||||
user = os.environ["KUMA_USER"]
|
||||
password = os.environ["KUMA_PASSWORD"]
|
||||
|
||||
with UptimeKumaApi(url) as api:
|
||||
api.login(user, password)
|
||||
existing = {m.get("name"): m for m in api.get_monitors()}
|
||||
|
||||
for spec in MONITORS:
|
||||
name = spec["name"]
|
||||
if name in existing:
|
||||
print(f"skip (exists): {name} id={existing[name].get('id')}")
|
||||
continue
|
||||
result = api.add_monitor(**spec)
|
||||
print(f"added: {name} -> {result}")
|
||||
PY
|
||||
66
scripts/kuma-setup-smtp.sh
Executable file
66
scripts/kuma-setup-smtp.sh
Executable file
@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env bash
|
||||
# Configure Uptime Kuma SMTP notification (Mailcow) via Socket.IO API.
|
||||
# Run from machine with network access to Kuma:
|
||||
# export KUMA_URL=http://10.0.10.22:3001
|
||||
# export KUMA_USER=admin
|
||||
# export KUMA_PASSWORD='your-kuma-password'
|
||||
# export SMTP_USER=alerts@levkine.ca
|
||||
# export SMTP_PASS='mailbox-password'
|
||||
# export SMTP_TO=idobkin@gmail.com
|
||||
# pip install uptime-kuma-api
|
||||
# ./scripts/kuma-setup-smtp.sh
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
KUMA_URL="${KUMA_URL:-http://10.0.10.22:3001}"
|
||||
KUMA_USER="${KUMA_USER:-admin}"
|
||||
KUMA_PASSWORD="${KUMA_PASSWORD:-}"
|
||||
SMTP_HOST="${SMTP_HOST:-mail.levkine.ca}"
|
||||
SMTP_PORT="${SMTP_PORT:-587}"
|
||||
SMTP_USER="${SMTP_USER:-alerts@levkine.ca}"
|
||||
SMTP_PASS="${SMTP_PASS:-}"
|
||||
SMTP_TO="${SMTP_TO:-idobkin@gmail.com}"
|
||||
|
||||
if [[ -z "${KUMA_PASSWORD}" || -z "${SMTP_PASS}" ]]; then
|
||||
echo "Set KUMA_PASSWORD and SMTP_PASS" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
python3 <<'PY'
|
||||
import os
|
||||
import sys
|
||||
|
||||
try:
|
||||
from uptime_kuma_api import UptimeKumaApi
|
||||
except ImportError:
|
||||
print("pip install uptime-kuma-api", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
url = os.environ["KUMA_URL"]
|
||||
user = os.environ["KUMA_USER"]
|
||||
password = os.environ["KUMA_PASSWORD"]
|
||||
smtp_host = os.environ["SMTP_HOST"]
|
||||
smtp_port = int(os.environ["SMTP_PORT"])
|
||||
smtp_user = os.environ["SMTP_USER"]
|
||||
smtp_pass = os.environ["SMTP_PASS"]
|
||||
smtp_to = os.environ["SMTP_TO"]
|
||||
|
||||
with UptimeKumaApi(url) as api:
|
||||
api.login(user, password)
|
||||
# Notification type name in Kuma 1.x is often 'smtp' / 'email'
|
||||
result = api.add_notification(
|
||||
name="Mailcow alerts",
|
||||
type="smtp",
|
||||
isDefault=True,
|
||||
applyExisting=True,
|
||||
smtpHost=smtp_host,
|
||||
smtpPort=smtp_port,
|
||||
smtpSecure=True,
|
||||
smtpIgnoreTLS=False,
|
||||
smtpUsername=smtp_user,
|
||||
smtpPassword=smtp_pass,
|
||||
smtpFrom=smtp_user,
|
||||
smtpTo=smtp_to,
|
||||
)
|
||||
print(result)
|
||||
PY
|
||||
51
scripts/load-mailcow-vault-env.sh
Executable file
51
scripts/load-mailcow-vault-env.sh
Executable file
@ -0,0 +1,51 @@
|
||||
#!/usr/bin/env bash
|
||||
# Export Mailcow API + mailbox password from .env or Ansible vault.
|
||||
# Usage: source scripts/load-mailcow-vault-env.sh [mailbox_local_part]
|
||||
set -euo pipefail
|
||||
|
||||
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
VAULT_FILE="${REPO_ROOT}/inventories/production/group_vars/all/vault.yml"
|
||||
VAULT_PASS="${HOME}/.ansible-vault-pass"
|
||||
ANSIBLE_VAULT="${REPO_ROOT}/.venv/bin/ansible-vault"
|
||||
MAILBOX_KEY="${1:-${MAILBOX:-${MAILBOX_LOCAL_PART:-}}}"
|
||||
|
||||
set -a
|
||||
[ -f "${REPO_ROOT}/.env" ] && . "${REPO_ROOT}/.env"
|
||||
set +a
|
||||
|
||||
if [[ -n "${MAILCOW_API_KEY:-}" && -n "${MAILBOX_PASSWORD:-${ALERTS_PASSWORD:-}}" ]]; then
|
||||
export MAILBOX_PASSWORD="${MAILBOX_PASSWORD:-${ALERTS_PASSWORD:-}}"
|
||||
return 0 2>/dev/null || exit 0
|
||||
fi
|
||||
|
||||
if [[ ! -f "${VAULT_FILE}" ]] || [[ ! -f "${VAULT_PASS}" ]]; then
|
||||
return 0 2>/dev/null || exit 0
|
||||
fi
|
||||
|
||||
eval "$("${REPO_ROOT}/.venv/bin/python3" - "${VAULT_FILE}" "${VAULT_PASS}" "${ANSIBLE_VAULT}" "${MAILBOX_KEY}" <<'PY'
|
||||
import os, subprocess, sys, yaml, shlex
|
||||
|
||||
vault_file, vault_pass, ansible_vault, mailbox_key = sys.argv[1:5]
|
||||
text = subprocess.check_output(
|
||||
[ansible_vault, "view", vault_file, "--vault-password-file", vault_pass],
|
||||
text=True,
|
||||
)
|
||||
data = yaml.safe_load(text) or {}
|
||||
out = []
|
||||
api = data.get("vault_mailcow_api_key") or ""
|
||||
if api:
|
||||
out.append("export MAILCOW_API_KEY=" + shlex.quote(str(api)))
|
||||
passwords = data.get("vault_mailcow_mailbox_passwords") or {}
|
||||
pw = ""
|
||||
if mailbox_key and mailbox_key in passwords:
|
||||
pw = passwords[mailbox_key]
|
||||
elif mailbox_key == "alerts":
|
||||
pw = data.get("vault_alerts_mailbox_password") or passwords.get("alerts", "")
|
||||
if pw:
|
||||
out.append("export MAILBOX_PASSWORD=" + shlex.quote(str(pw)))
|
||||
out.append("export ALERTS_PASSWORD=" + shlex.quote(str(pw)))
|
||||
print("\n".join(out))
|
||||
PY
|
||||
)"
|
||||
|
||||
return 0 2>/dev/null || exit 0
|
||||
18
scripts/load-vault-lxc-root-password.sh
Executable file
18
scripts/load-vault-lxc-root-password.sh
Executable file
@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env bash
|
||||
# Export BOOTSTRAP_SU_PASSWORD from vault_lxc_root_password
|
||||
set -euo pipefail
|
||||
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
eval "$("${REPO_ROOT}/.venv/bin/python3" - "${REPO_ROOT}" <<'PY'
|
||||
import os, subprocess, sys, yaml, shlex
|
||||
repo = sys.argv[1]
|
||||
text = subprocess.check_output(
|
||||
[os.path.join(repo, ".venv/bin/ansible-vault"), "view",
|
||||
os.path.join(repo, "inventories/production/group_vars/all/vault.yml"),
|
||||
"--vault-password-file", os.path.expanduser("~/.ansible-vault-pass")],
|
||||
text=True,
|
||||
)
|
||||
pw = (yaml.safe_load(text) or {}).get("vault_lxc_root_password", "")
|
||||
if pw:
|
||||
print("export BOOTSTRAP_SU_PASSWORD=" + shlex.quote(str(pw)))
|
||||
PY
|
||||
)"
|
||||
32
scripts/mailcow-mailbox-from-inventory.sh
Executable file
32
scripts/mailcow-mailbox-from-inventory.sh
Executable file
@ -0,0 +1,32 @@
|
||||
#!/usr/bin/env bash
|
||||
# Resolve MAILBOX= key from inventories/production/group_vars/all/mailcow.yml
|
||||
set -euo pipefail
|
||||
|
||||
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
||||
MAILBOX="${MAILBOX:-}"
|
||||
[[ -n "${MAILBOX}" ]] || { echo "MAILBOX required" >&2; exit 1; }
|
||||
|
||||
eval "$("${REPO_ROOT}/.venv/bin/python3" - "${REPO_ROOT}" "${MAILBOX}" <<'PY'
|
||||
import sys, yaml, shlex, os
|
||||
|
||||
repo, key = sys.argv[1], sys.argv[2]
|
||||
path = os.path.join(repo, "inventories/production/group_vars/all/mailcow.yml")
|
||||
with open(path) as f:
|
||||
data = yaml.safe_load(f) or {}
|
||||
boxes = data.get("mailcow_mailboxes") or {}
|
||||
if key not in boxes:
|
||||
raise SystemExit(f"Unknown MAILBOX={key!r}. Add it to mailcow_mailboxes in mailcow.yml")
|
||||
b = boxes[key]
|
||||
out = []
|
||||
for k, env in [
|
||||
("local_part", "MAILBOX_LOCAL_PART"),
|
||||
("name", "MAILBOX_NAME"),
|
||||
("quota", "MAILBOX_QUOTA"),
|
||||
]:
|
||||
if k in b and b[k] is not None:
|
||||
out.append(f"export {env}={shlex.quote(str(b[k]))}")
|
||||
if b.get("vault_password_key"):
|
||||
out.append(f"export MAILBOX_VAULT_KEY={shlex.quote(str(b['vault_password_key']))}")
|
||||
print("\n".join(out))
|
||||
PY
|
||||
)"
|
||||
62
scripts/mailcow-mailbox.sh
Executable file
62
scripts/mailcow-mailbox.sh
Executable file
@ -0,0 +1,62 @@
|
||||
#!/usr/bin/env bash
|
||||
# Create or update a Mailcow mailbox via API.
|
||||
#
|
||||
# Usage:
|
||||
# make mailcow-mailbox MAILBOX=alerts
|
||||
# # or with env (after: source scripts/load-mailcow-vault-env.sh):
|
||||
# MAILBOX_LOCAL_PART=notify MAILBOX_NAME="Notify" MAILBOX_PASSWORD='...' ./scripts/mailcow-mailbox.sh
|
||||
#
|
||||
# Variables (env or make):
|
||||
# MAILBOX / MAILBOX_LOCAL_PART — local part (required)
|
||||
# MAILBOX_NAME — display name (default: title-case of local part)
|
||||
# MAILBOX_PASSWORD — if unset, loaded from vault_mailcow_mailbox_passwords[local_part]
|
||||
# MAILBOX_QUOTA — MiB (default 1024)
|
||||
# MAILCOW_URL, MAILCOW_DOMAIN, MAILCOW_API_KEY — see load-mailcow-vault-env.sh
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
MAILCOW_URL="${MAILCOW_URL:-https://mail.levkine.ca}"
|
||||
DOMAIN="${MAILCOW_DOMAIN:-levkine.ca}"
|
||||
LOCAL_PART="${MAILBOX_LOCAL_PART:-${MAILBOX:-}}"
|
||||
API_KEY="${MAILCOW_API_KEY:-}"
|
||||
MAILBOX_PASSWORD="${MAILBOX_PASSWORD:-${ALERTS_PASSWORD:-}}"
|
||||
QUOTA="${MAILBOX_QUOTA:-1024}"
|
||||
|
||||
if [[ -z "${LOCAL_PART}" ]]; then
|
||||
echo "Set MAILBOX=localpart or MAILBOX_LOCAL_PART" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -z "${API_KEY}" ]]; then
|
||||
echo "Set MAILCOW_API_KEY (make mailcow-mailbox loads vault/.env)" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -z "${MAILBOX_PASSWORD}" ]]; then
|
||||
echo "Set MAILBOX_PASSWORD or add vault_mailcow_mailbox_passwords.${LOCAL_PART} in vault" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
DISPLAY_NAME="${MAILBOX_NAME:-$(echo "${LOCAL_PART}" | sed 's/[-_]/ /g' | awk '{for(i=1;i<=NF;i++) $i=toupper(substr($i,1,1)) tolower(substr($i,2)); print}')}"
|
||||
|
||||
ATTR=$(jq -nc \
|
||||
--arg lp "${LOCAL_PART}" \
|
||||
--arg dom "${DOMAIN}" \
|
||||
--arg name "${DISPLAY_NAME}" \
|
||||
--arg pw "${MAILBOX_PASSWORD}" \
|
||||
--arg quota "${QUOTA}" \
|
||||
'{local_part:$lp,domain:$dom,name:$name,quota:$quota,password:$pw,password2:$pw,active:"1"}')
|
||||
|
||||
echo "Creating mailbox ${LOCAL_PART}@${DOMAIN} (${DISPLAY_NAME})..."
|
||||
RESP=$(curl -sk -w "\n%{http_code}" -X POST "${MAILCOW_URL}/api/v1/add/mailbox" \
|
||||
-H "X-API-Key: ${API_KEY}" \
|
||||
-d "attr=${ATTR}")
|
||||
HTTP_CODE=$(echo "${RESP}" | tail -1)
|
||||
BODY=$(echo "${RESP}" | sed '$d')
|
||||
echo "${BODY}" | jq . 2>/dev/null || echo "${BODY}"
|
||||
if [[ "${HTTP_CODE}" -lt 200 || "${HTTP_CODE}" -ge 300 ]]; then
|
||||
echo "Mailcow API HTTP ${HTTP_CODE}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Done: ${LOCAL_PART}@${DOMAIN}"
|
||||
17
scripts/run-mailcow-mailbox.sh
Executable file
17
scripts/run-mailcow-mailbox.sh
Executable file
@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env bash
|
||||
# Wrapper for: make mailcow-mailbox MAILBOX=name
|
||||
set -euo pipefail
|
||||
|
||||
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
||||
MAILBOX="${MAILBOX:?MAILBOX required}"
|
||||
|
||||
cd "${REPO_ROOT}"
|
||||
eval "$(./scripts/mailcow-mailbox-from-inventory.sh)"
|
||||
. ./scripts/load-mailcow-vault-env.sh "${MAILBOX_VAULT_KEY:-${MAILBOX}}"
|
||||
|
||||
if [[ -z "${MAILCOW_API_KEY:-}" || -z "${MAILBOX_PASSWORD:-}" ]]; then
|
||||
echo "Missing vault_mailcow_api_key or vault_mailcow_mailbox_passwords.${MAILBOX}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
exec ./scripts/mailcow-mailbox.sh
|
||||
71
scripts/security-audit-extended.sh
Executable file
71
scripts/security-audit-extended.sh
Executable file
@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env bash
|
||||
# Extended read-only security + cleanup audit (run on target host).
|
||||
set -u
|
||||
|
||||
echo "=== identity ==="
|
||||
hostname -f 2>/dev/null || hostname
|
||||
if [ -f /etc/os-release ]; then . /etc/os-release; echo "os=${PRETTY_NAME:-unknown}"; fi
|
||||
echo "kernel=$(uname -r)"
|
||||
echo "uptime=$(uptime -p 2>/dev/null || uptime)"
|
||||
|
||||
echo "=== disk ==="
|
||||
df -h / /var 2>/dev/null | tail -n +2 | awk '{print $6" "$5" used "$4" free"}'
|
||||
|
||||
echo "=== sshd (effective) ==="
|
||||
if command -v sshd >/dev/null 2>&1; then
|
||||
sshd -T 2>/dev/null | grep -E '^(permitrootlogin|passwordauthentication|pubkeyauthentication|permitemptypasswords|port|x11forwarding|maxauthtries) ' || true
|
||||
else
|
||||
grep -E '^(PermitRootLogin|PasswordAuthentication|PubkeyAuthentication|Port) ' /etc/ssh/sshd_config 2>/dev/null | grep -v '^#' || echo "sshd not found"
|
||||
fi
|
||||
|
||||
echo "=== firewall ==="
|
||||
if command -v ufw >/dev/null 2>&1; then
|
||||
ufw status verbose 2>/dev/null | head -5
|
||||
elif command -v firewall-cmd >/dev/null 2>&1; then
|
||||
firewall-cmd --state 2>/dev/null || true
|
||||
else
|
||||
echo "no ufw/firewalld"
|
||||
fi
|
||||
|
||||
echo "=== fail2ban ==="
|
||||
systemctl is-active fail2ban 2>/dev/null || echo "fail2ban: inactive or missing"
|
||||
|
||||
echo "=== unattended-upgrades ==="
|
||||
systemctl is-active unattended-upgrades 2>/dev/null || echo "unattended-upgrades: inactive or missing"
|
||||
|
||||
echo "=== pending apt upgrades ==="
|
||||
if command -v apt >/dev/null 2>&1; then
|
||||
apt-get -s upgrade 2>/dev/null | grep -c '^Inst' || echo 0
|
||||
else
|
||||
echo "n/a"
|
||||
fi
|
||||
|
||||
echo "=== docker ==="
|
||||
if command -v docker >/dev/null 2>&1; then
|
||||
echo "docker=$(docker --version 2>/dev/null || true)"
|
||||
echo "containers=$(docker ps -aq 2>/dev/null | wc -l | tr -d ' ') running=$(docker ps -q 2>/dev/null | wc -l | tr -d ' ')"
|
||||
echo "images=$(docker images -q 2>/dev/null | wc -l | tr -d ' ')"
|
||||
docker system df 2>/dev/null | tail -n +2 || true
|
||||
else
|
||||
echo "no docker"
|
||||
fi
|
||||
|
||||
echo "=== journal disk ==="
|
||||
journalctl --disk-usage 2>/dev/null || echo "n/a"
|
||||
|
||||
echo "=== apt cache ==="
|
||||
du -sh /var/cache/apt/archives 2>/dev/null || echo "n/a"
|
||||
|
||||
echo "=== existing cron (root) ==="
|
||||
crontab -l 2>/dev/null | grep -v '^#' | grep -v '^$' | head -10 || echo "no root crontab"
|
||||
ls /etc/cron.{daily,weekly,monthly}/* 2>/dev/null | xargs -I{} basename {} | head -15 || true
|
||||
|
||||
echo "=== listening tcp (non-localhost) ==="
|
||||
ss -tlnp 2>/dev/null | awk 'NR==1 || /LISTEN/ {print}' | grep -v '127.0.0.1:' | grep -v '\[::1\]:' | head -15
|
||||
|
||||
echo "=== uid 0 accounts ==="
|
||||
awk -F: '$3==0 {print $1}' /etc/passwd | tr '\n' ' '
|
||||
echo
|
||||
|
||||
echo "=== tailscale ==="
|
||||
command -v tailscale >/dev/null 2>&1 && tailscale status --self 2>/dev/null | head -1 || echo "no tailscale"
|
||||
39
scripts/security-audit-lxc-via-pve.sh
Executable file
39
scripts/security-audit-lxc-via-pve.sh
Executable file
@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env bash
|
||||
# Audit LXCs on a Proxmox node via pct exec (run ON the PVE host as root).
|
||||
set -u
|
||||
|
||||
AUDIT='#!/bin/bash
|
||||
echo "=== identity ==="
|
||||
hostname -f 2>/dev/null || hostname
|
||||
[ -f /etc/os-release ] && . /etc/os-release && echo "os=${PRETTY_NAME:-unknown}"
|
||||
echo "ip=$(hostname -I 2>/dev/null | awk "{print \$1}")"
|
||||
echo "=== sshd (effective) ==="
|
||||
if command -v sshd >/dev/null 2>&1; then
|
||||
sshd -T 2>/dev/null | grep -E "^(permitrootlogin|passwordauthentication|pubkeyauthentication|permitemptypasswords|port) " || true
|
||||
else
|
||||
grep -E "^(PermitRootLogin|PasswordAuthentication|PubkeyAuthentication|Port) " /etc/ssh/sshd_config 2>/dev/null | grep -v "^#" || echo "sshd not installed"
|
||||
fi
|
||||
echo "=== firewall ==="
|
||||
ufw status 2>/dev/null | head -3 || echo "no ufw"
|
||||
echo "=== fail2ban ==="
|
||||
systemctl is-active fail2ban 2>/dev/null || echo "inactive/missing"
|
||||
echo "=== pending upgrades ==="
|
||||
apt-get -s upgrade 2>/dev/null | grep -c "^Inst" || echo 0
|
||||
echo "=== public listeners ==="
|
||||
ss -tlnp 2>/dev/null | grep LISTEN | grep -v "127.0.0.1:" | grep -v "\[::1\]:" | head -12
|
||||
'
|
||||
|
||||
echo "PVE_NODE=$(hostname -f 2>/dev/null || hostname)"
|
||||
echo "PVE_IP=$(hostname -I | awk '{print $1}')"
|
||||
|
||||
for id in $(pct list 2>/dev/null | awk 'NR>1 {print $1}'); do
|
||||
name=$(pct list | awk -v id="$id" '$1==id {print $4}')
|
||||
status=$(pct list | awk -v id="$id" '$1==id {print $2}')
|
||||
echo ""
|
||||
echo "######## LXC vmid=$id name=$name status=$status ########"
|
||||
if [ "$status" != "running" ]; then
|
||||
echo "SKIP: not running"
|
||||
continue
|
||||
fi
|
||||
pct exec "$id" -- bash -c "$AUDIT" 2>&1 || echo "ERROR: pct exec failed"
|
||||
done
|
||||
48
scripts/security-audit-remote.sh
Executable file
48
scripts/security-audit-remote.sh
Executable file
@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env bash
|
||||
# Quick read-only security snapshot (run on target host).
|
||||
set -euo pipefail
|
||||
|
||||
echo "=== identity ==="
|
||||
hostname -f 2>/dev/null || hostname
|
||||
if [ -f /etc/os-release ]; then . /etc/os-release; echo "os=${PRETTY_NAME:-unknown}"; fi
|
||||
echo "kernel=$(uname -r)"
|
||||
echo "uptime=$(uptime -p 2>/dev/null || uptime)"
|
||||
|
||||
echo "=== sshd (effective) ==="
|
||||
if command -v sshd >/dev/null 2>&1; then
|
||||
sshd -T 2>/dev/null | grep -E '^(permitrootlogin|passwordauthentication|pubkeyauthentication|permitemptypasswords|port|x11forwarding|allowtcpforwarding) ' || true
|
||||
else
|
||||
grep -E '^(PermitRootLogin|PasswordAuthentication|PubkeyAuthentication|Port) ' /etc/ssh/sshd_config 2>/dev/null | grep -v '^#' || echo "sshd not found"
|
||||
fi
|
||||
|
||||
echo "=== firewall ==="
|
||||
if command -v ufw >/dev/null 2>&1; then
|
||||
ufw status verbose 2>/dev/null | head -8
|
||||
elif command -v firewall-cmd >/dev/null 2>&1; then
|
||||
firewall-cmd --state 2>/dev/null || true
|
||||
else
|
||||
echo "no ufw/firewalld"
|
||||
fi
|
||||
|
||||
echo "=== fail2ban ==="
|
||||
systemctl is-active fail2ban 2>/dev/null || echo "fail2ban: inactive or missing"
|
||||
|
||||
echo "=== unattended-upgrades ==="
|
||||
systemctl is-active unattended-upgrades 2>/dev/null || echo "unattended-upgrades: inactive or missing"
|
||||
|
||||
echo "=== pending apt upgrades ==="
|
||||
if command -v apt >/dev/null 2>&1; then
|
||||
apt-get -s upgrade 2>/dev/null | grep -c '^Inst' || echo 0
|
||||
else
|
||||
echo "n/a"
|
||||
fi
|
||||
|
||||
echo "=== listening tcp (public) ==="
|
||||
ss -tlnp 2>/dev/null | awk 'NR==1 || /LISTEN/ {print}' | grep -v '127.0.0.1:' | grep -v '\[::1\]:' | head -20
|
||||
|
||||
echo "=== uid 0 accounts ==="
|
||||
awk -F: '$3==0 {print $1}' /etc/passwd | tr '\n' ' '
|
||||
echo
|
||||
|
||||
echo "=== last logins (top 5) ==="
|
||||
last -n 5 2>/dev/null | head -5 || true
|
||||
27
scripts/security-audit-ssh.sh
Executable file
27
scripts/security-audit-ssh.sh
Executable file
@ -0,0 +1,27 @@
|
||||
#!/usr/bin/env bash
|
||||
# SSH-focused audit (hypervisor or guest).
|
||||
set -u
|
||||
|
||||
echo "=== host ==="
|
||||
hostname -f 2>/dev/null || hostname
|
||||
|
||||
echo "=== sshd effective config ==="
|
||||
if command -v sshd >/dev/null 2>&1; then
|
||||
sshd -T 2>/dev/null | grep -E '^(port|permitrootlogin|passwordauthentication|pubkeyauthentication|permitemptypasswords|maxauthtries|x11forwarding|allowtcpforwarding|gatewayports|permittunnel|usepam|kbdinteractiveauthentication) ' || true
|
||||
else
|
||||
echo "sshd binary missing"
|
||||
fi
|
||||
|
||||
echo "=== sshd_config (non-comment) ==="
|
||||
grep -E '^(Port|PermitRootLogin|PasswordAuthentication|PubkeyAuthentication|PermitEmptyPasswords|MaxAuthTries|AllowUsers|AllowGroups|X11Forwarding) ' /etc/ssh/sshd_config 2>/dev/null || true
|
||||
|
||||
echo "=== authorized_keys (root) ==="
|
||||
if [ -f /root/.ssh/authorized_keys ]; then
|
||||
wc -l /root/.ssh/authorized_keys
|
||||
awk '{print $NF}' /root/.ssh/authorized_keys 2>/dev/null | sed 's/^/ key: /'
|
||||
else
|
||||
echo "no /root/.ssh/authorized_keys"
|
||||
fi
|
||||
|
||||
echo "=== recent ssh auth failures (today) ==="
|
||||
journalctl -u ssh -u sshd --since today 2>/dev/null | grep -iE 'Failed|Invalid|refused' | tail -5 || grep -iE 'Failed|Invalid' /var/log/auth.log 2>/dev/null | tail -5 || echo "no logs"
|
||||
81
scripts/vault-export-env.sh
Executable file
81
scripts/vault-export-env.sh
Executable file
@ -0,0 +1,81 @@
|
||||
#!/usr/bin/env bash
|
||||
# Write Ansible vault secrets into .env (for local scripts / reference).
|
||||
# Does not print secret values. Does not overwrite non-empty .env keys.
|
||||
set -euo pipefail
|
||||
|
||||
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
ENV_FILE="${1:-${REPO_ROOT}/.env}"
|
||||
VAULT_FILE="${REPO_ROOT}/inventories/production/group_vars/all/vault.yml"
|
||||
VAULT_PASS="${HOME}/.ansible-vault-pass"
|
||||
ANSIBLE_VAULT="${REPO_ROOT}/.venv/bin/ansible-vault"
|
||||
|
||||
[[ -f "${VAULT_PASS}" ]] || { echo "Missing ${VAULT_PASS}" >&2; exit 1; }
|
||||
|
||||
"${REPO_ROOT}/.venv/bin/python3" - "${ENV_FILE}" "${VAULT_FILE}" "${VAULT_PASS}" "${ANSIBLE_VAULT}" <<'PY'
|
||||
import subprocess, sys, yaml
|
||||
from pathlib import Path
|
||||
|
||||
env_file, vault_file, vault_pass, ansible_vault = sys.argv[1:5]
|
||||
|
||||
# vault key -> .env key
|
||||
MAP = {
|
||||
"vault_mailcow_api_key": "MAILCOW_API_KEY",
|
||||
"vault_alerts_mailbox_password": "ALERTS_PASSWORD",
|
||||
"vault_uptime_kuma_password": "KUMA_PASSWORD",
|
||||
"vault_uptime_kuma_user": "KUMA_USER",
|
||||
"vault_uptime_kuma_url": "KUMA_URL",
|
||||
"vault_umami_admin_password": "UMAMI_ADMIN_PASSWORD",
|
||||
"vault_umami_db_password": "UMAMI_DB_PASS",
|
||||
"vault_umami_app_secret": "UMAMI_APP_SECRET",
|
||||
"vault_kuma_smtp_host": "SMTP_HOST",
|
||||
"vault_kuma_smtp_port": "SMTP_PORT",
|
||||
"vault_kuma_smtp_user": "SMTP_USER",
|
||||
"vault_kuma_smtp_password": "SMTP_PASS",
|
||||
"vault_kuma_smtp_to": "SMTP_TO",
|
||||
"vault_mattermost_url": "MATTERMOST_URL",
|
||||
"vault_mattermost_token": "MATTERMOST_TOKEN",
|
||||
"vault_mattermost_allowed_users": "MATTERMOST_ALLOWED_USERS",
|
||||
}
|
||||
|
||||
def parse_env(text):
|
||||
d = {}
|
||||
for line in text.splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
k, _, v = line.partition("=")
|
||||
d[k.strip()] = v.strip().strip("'").strip('"')
|
||||
return d
|
||||
|
||||
text = subprocess.check_output(
|
||||
[ansible_vault, "view", vault_file, "--vault-password-file", vault_pass],
|
||||
text=True,
|
||||
)
|
||||
data = yaml.safe_load(text) or {}
|
||||
existing = parse_env(Path(env_file).read_text()) if Path(env_file).exists() else {}
|
||||
merged = dict(existing)
|
||||
|
||||
for vk, ek in MAP.items():
|
||||
val = data.get(vk)
|
||||
if val is None or val == "":
|
||||
continue
|
||||
if merged.get(ek):
|
||||
continue
|
||||
merged[ek] = str(val)
|
||||
|
||||
pw = data.get("vault_mailcow_mailbox_passwords") or {}
|
||||
if pw.get("alerts") and not merged.get("ALERTS_PASSWORD"):
|
||||
merged["ALERTS_PASSWORD"] = str(pw["alerts"])
|
||||
|
||||
header = """# Merged from Ansible vault (make vault-export-env). Fill gaps manually.
|
||||
# vault → .env: make vault-export-env
|
||||
# .env → vault: make vault-import-env
|
||||
# hosts → .env → vault: make vault-pull-infra-secrets
|
||||
|
||||
"""
|
||||
body = "\n".join(f"{k}={v}" for k, v in sorted(merged.items())) + "\n"
|
||||
Path(env_file).write_text(header + body)
|
||||
print(f"Wrote {len(merged)} keys to {env_file} (existing non-empty keys kept)")
|
||||
PY
|
||||
|
||||
chmod 600 "${ENV_FILE}" 2>/dev/null || true
|
||||
96
scripts/vault-import-env.sh
Executable file
96
scripts/vault-import-env.sh
Executable file
@ -0,0 +1,96 @@
|
||||
#!/usr/bin/env bash
|
||||
# Merge .env into inventories/production/group_vars/all/vault.yml
|
||||
# Usage: make vault-import-env [ENV_FILE=.env]
|
||||
set -euo pipefail
|
||||
|
||||
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
ENV_FILE="${1:-${ENV_FILE:-${REPO_ROOT}/.env}}"
|
||||
VAULT_FILE="${REPO_ROOT}/inventories/production/group_vars/all/vault.yml"
|
||||
VAULT_PASS="${HOME}/.ansible-vault-pass"
|
||||
ANSIBLE_VAULT="${REPO_ROOT}/.venv/bin/ansible-vault"
|
||||
|
||||
[[ -f "${ENV_FILE}" ]] || { echo "No env file: ${ENV_FILE}" >&2; exit 1; }
|
||||
[[ -f "${VAULT_PASS}" ]] || { echo "Missing ${VAULT_PASS}" >&2; exit 1; }
|
||||
|
||||
"${REPO_ROOT}/.venv/bin/python3" - "${ENV_FILE}" "${VAULT_FILE}" "${VAULT_PASS}" "${ANSIBLE_VAULT}" <<'PY'
|
||||
import os, re, subprocess, sys, tempfile, yaml
|
||||
|
||||
env_file, vault_file, vault_pass, ansible_vault = sys.argv[1:5]
|
||||
|
||||
def load_env(path):
|
||||
out = {}
|
||||
with open(path) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
if line.startswith("export "):
|
||||
line = line[7:].strip()
|
||||
if "=" not in line:
|
||||
continue
|
||||
k, _, v = line.partition("=")
|
||||
v = v.strip().strip("'").strip('"')
|
||||
if v:
|
||||
out[k.strip()] = v
|
||||
return out
|
||||
|
||||
# .env key -> vault key (or vault_mailcow_mailbox_passwords.<name>)
|
||||
MAP = {
|
||||
"MAILCOW_API_KEY": "vault_mailcow_api_key",
|
||||
"ALERTS_PASSWORD": ("vault_alerts_mailbox_password", "alerts"),
|
||||
"KUMA_PASSWORD": "vault_uptime_kuma_password",
|
||||
"KUMA_USER": "vault_uptime_kuma_user",
|
||||
"KUMA_URL": "vault_uptime_kuma_url",
|
||||
"UMAMI_ADMIN_PASSWORD": "vault_umami_admin_password",
|
||||
"UMAMI_DB_PASS": "vault_umami_db_password",
|
||||
"UMAMI_APP_SECRET": "vault_umami_app_secret",
|
||||
"SMTP_HOST": "vault_kuma_smtp_host",
|
||||
"SMTP_PORT": "vault_kuma_smtp_port",
|
||||
"SMTP_USER": "vault_kuma_smtp_user",
|
||||
"SMTP_PASS": "vault_kuma_smtp_password",
|
||||
"SMTP_TO": "vault_kuma_smtp_to",
|
||||
"MATTERMOST_URL": "vault_mattermost_url",
|
||||
"MATTERMOST_TOKEN": "vault_mattermost_token",
|
||||
"MATTERMOST_ALLOWED_USERS": "vault_mattermost_allowed_users",
|
||||
"PROXMOX_PASSWORD": "vault_proxmox_password",
|
||||
"LXC_ROOT_PASSWORD": "vault_lxc_root_password",
|
||||
}
|
||||
|
||||
env = load_env(env_file)
|
||||
text = subprocess.check_output(
|
||||
[ansible_vault, "view", vault_file, "--vault-password-file", vault_pass],
|
||||
text=True,
|
||||
)
|
||||
data = yaml.safe_load(text) or {}
|
||||
passwords = dict(data.get("vault_mailcow_mailbox_passwords") or {})
|
||||
|
||||
for k, v in env.items():
|
||||
m = re.match(r"^MAILBOX_(.+)_PASSWORD$", k, re.I)
|
||||
if m:
|
||||
passwords[m.group(1).lower()] = v
|
||||
continue
|
||||
target = MAP.get(k)
|
||||
if not target:
|
||||
continue
|
||||
if isinstance(target, tuple):
|
||||
data[target[0]] = v
|
||||
passwords[target[1]] = v
|
||||
else:
|
||||
data[target] = v
|
||||
|
||||
if passwords:
|
||||
data["vault_mailcow_mailbox_passwords"] = passwords
|
||||
|
||||
fd, tmp = tempfile.mkstemp(suffix=".yml")
|
||||
os.close(fd)
|
||||
with open(tmp, "w") as f:
|
||||
yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
|
||||
|
||||
subprocess.run(
|
||||
[ansible_vault, "encrypt", tmp, "--output", vault_file,
|
||||
"--vault-password-file", vault_pass, "--encrypt-vault-id", "default"],
|
||||
check=True,
|
||||
)
|
||||
os.remove(tmp)
|
||||
print(f"Updated {vault_file} from {env_file} ({len(env)} values)")
|
||||
PY
|
||||
70
scripts/vault-pull-infra-secrets.sh
Executable file
70
scripts/vault-pull-infra-secrets.sh
Executable file
@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env bash
|
||||
# Pull secrets from live hosts into .env, then vault-import-env.
|
||||
# Does not print secret values.
|
||||
set -euo pipefail
|
||||
|
||||
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
ENV_FILE="${REPO_ROOT}/.env"
|
||||
|
||||
python3 - "${ENV_FILE}" <<'PY'
|
||||
import subprocess, sys
|
||||
from pathlib import Path
|
||||
|
||||
out = Path(sys.argv[1])
|
||||
lines = []
|
||||
|
||||
def sh(cmd):
|
||||
return subprocess.check_output(cmd, shell=True, text=True).strip()
|
||||
|
||||
def parse_env(text):
|
||||
d = {}
|
||||
for line in text.splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
k, _, v = line.partition("=")
|
||||
d[k.strip()] = v.strip().strip("'").strip('"')
|
||||
return d
|
||||
|
||||
# monitoring LXC
|
||||
try:
|
||||
raw = sh("ssh -o BatchMode=yes -o ConnectTimeout=8 root@10.0.10.22 'cat /opt/monitoring/.env 2>/dev/null'")
|
||||
m = parse_env(raw)
|
||||
if m.get("UMAMI_DB_PASS"):
|
||||
lines.append(f"UMAMI_DB_PASS={m['UMAMI_DB_PASS']}")
|
||||
if m.get("UMAMI_APP_SECRET"):
|
||||
lines.append(f"UMAMI_APP_SECRET={m['UMAMI_APP_SECRET']}")
|
||||
except Exception as e:
|
||||
print(f"# skip monitoring: {e}", file=sys.stderr)
|
||||
|
||||
# hermes mattermost
|
||||
try:
|
||||
raw = sh("ssh -o BatchMode=yes -o ConnectTimeout=8 ladmin@10.0.10.36 \"sudo cat /home/hermes/.hermes/secrets/mattermost.env 2>/dev/null\"")
|
||||
h = parse_env(raw)
|
||||
for k in ("MATTERMOST_URL", "MATTERMOST_TOKEN", "MATTERMOST_ALLOWED_USERS"):
|
||||
if h.get(k):
|
||||
lines.append(f"{k}={h[k]}")
|
||||
except Exception as e:
|
||||
print(f"# skip hermes: {e}", file=sys.stderr)
|
||||
|
||||
# merge with existing .env (preserve user-filled keys)
|
||||
existing = {}
|
||||
if out.exists():
|
||||
existing = parse_env(out.read_text())
|
||||
|
||||
merged = {**existing}
|
||||
for line in lines:
|
||||
k, _, v = line.partition("=")
|
||||
merged[k] = v
|
||||
|
||||
header = """# Auto-merged by scripts/vault-pull-infra-secrets.sh + your edits
|
||||
# Run: make vault-import-env
|
||||
|
||||
"""
|
||||
body = "\n".join(f"{k}={v}" for k, v in sorted(merged.items())) + "\n"
|
||||
out.write_text(header + body)
|
||||
print(f"Wrote {len(merged)} keys to {out}")
|
||||
PY
|
||||
|
||||
chmod 600 "${ENV_FILE}" 2>/dev/null || true
|
||||
"${REPO_ROOT}/scripts/vault-import-env.sh" "${ENV_FILE}"
|
||||
Loading…
x
Reference in New Issue
Block a user