From e05b3aa0d5d13ba5c23857169b39e8932ddbed1d Mon Sep 17 00:00:00 2001 From: ilia Date: Tue, 16 Sep 2025 23:00:32 -0400 Subject: [PATCH] Update ansible.cfg and auto-fallback script for improved connectivity handling - Modify ansible.cfg to increase SSH connection retries from 2 to 3 and add a connection timeout setting for better reliability. - Enhance auto-fallback.sh script to provide detailed feedback during IP connectivity tests, including clearer status messages for primary and fallback IP checks. - Update documentation to reflect changes in connectivity testing and fallback procedures. These updates improve the robustness of the connectivity testing process and ensure smoother operations during IP failover scenarios. --- Makefile | 65 ++-- ansible.cfg | 4 +- auto-fallback.sh | 111 +++++++ docs/connectivity-test.md | 175 ++++++++++ .../production/group_vars/all/vault.yml | 18 +- .../production/host_vars/ansibleVM.yml | 7 + inventories/production/host_vars/bottom.yml | 12 +- inventories/production/host_vars/caddy.yml | 7 + .../production/host_vars/debianDesktopVM.yml | 12 +- inventories/production/host_vars/dev01.yml | 12 +- inventories/production/host_vars/giteaVM.yml | 11 +- .../production/host_vars/homepageVM.yml | 11 +- inventories/production/host_vars/jellyfin.yml | 7 + inventories/production/host_vars/listmonk.yml | 7 + .../production/host_vars/portainerVM.yml | 11 +- inventories/production/host_vars/slack.yml | 7 + .../production/host_vars/vaultwardenVM.yml | 7 + inventories/production/hosts | 19 +- package-lock.json | 4 +- playbooks/shell.yml | 31 ++ roles/development/tasks/main.yml | 10 +- roles/docker/tasks/setup_gpg_key.yml | 3 +- roles/docker/tasks/setup_repo_debian.yml | 3 +- roles/docker/tasks/setup_repo_linux_mint.yml | 3 +- roles/docker/tasks/setup_repo_ubuntu.yml | 3 +- roles/tailscale/tasks/debian.yml | 10 +- test_connectivity.py | 311 ++++++++++++++++++ 27 files changed, 779 insertions(+), 102 deletions(-) create mode 100755 auto-fallback.sh create mode 100644 docs/connectivity-test.md create mode 100644 inventories/production/host_vars/ansibleVM.yml create mode 100644 inventories/production/host_vars/caddy.yml create mode 100644 inventories/production/host_vars/jellyfin.yml create mode 100644 inventories/production/host_vars/listmonk.yml create mode 100644 inventories/production/host_vars/slack.yml create mode 100644 inventories/production/host_vars/vaultwardenVM.yml create mode 100644 playbooks/shell.yml create mode 100644 test_connectivity.py diff --git a/Makefile b/Makefile index 8f0ba75..ad20b5c 100644 --- a/Makefile +++ b/Makefile @@ -218,7 +218,7 @@ test: ## Run all tests (lint + syntax check if available) fi @$(MAKE) test-syntax -check: ## Dry-run the development playbook (--check mode) +check: auto-fallback ## Dry-run the development playbook (--check mode) @echo "$(YELLOW)Running dry-run on development hosts...$(RESET)" $(ANSIBLE_PLAYBOOK) $(PLAYBOOK_DEV) --check --diff @@ -226,7 +226,7 @@ check-local: ## Dry-run the local playbook @echo "$(YELLOW)Running dry-run on localhost...$(RESET)" $(ANSIBLE_PLAYBOOK) $(PLAYBOOK_LOCAL) --check --diff -K -apply: ## Run the development playbook on all dev hosts +apply: auto-fallback ## Run the development playbook on all dev hosts @echo "$(YELLOW)Applying development playbook...$(RESET)" $(ANSIBLE_PLAYBOOK) $(PLAYBOOK_DEV) @@ -326,12 +326,16 @@ shell: ## Configure shell only @echo "$(YELLOW)Running shell configuration...$(RESET)" $(ANSIBLE_PLAYBOOK) $(PLAYBOOK_DEV) --tags shell +shell-all: ## Configure shell on all shell_hosts (usage: make shell-all) + @echo "$(YELLOW)Running shell configuration on all shell hosts...$(RESET)" + $(ANSIBLE_PLAYBOOK) playbooks/shell.yml $(ANSIBLE_ARGS) + apps: ## Install applications only @echo "$(YELLOW)Installing applications...$(RESET)" $(ANSIBLE_PLAYBOOK) $(PLAYBOOK_DEV) --tags apps # Connectivity targets -ping: ## Ping hosts with colored output (usage: make ping [GROUP=dev] [HOST=dev01]) +ping: auto-fallback ## Ping hosts with colored output (usage: make ping [GROUP=dev] [HOST=dev01]) ifdef HOST @echo "$(YELLOW)Pinging host: $(HOST)$(RESET)" @ansible $(HOST) -m ping --one-line | while read line; do \ @@ -459,43 +463,6 @@ endif edit-group-vault: ## Edit encrypted group vars (usage: make edit-group-vault) ansible-vault edit inventories/production/group_vars/all/vault.yml -test-connectivity: ## Test network connectivity and SSH access to all hosts - @echo "$(BOLD)Connectivity Test$(RESET)" - @if [ -n "$(CURRENT_HOST)" ]; then \ - echo "$(BLUE)Auto-excluding current host: $(CURRENT_HOST) ($(CURRENT_IP))$(RESET)"; \ - fi - @echo "" - @echo "$(YELLOW)Network Connectivity:$(RESET)" - @ansible-inventory --list | jq -r '._meta.hostvars | to_entries[] | select(.value.ansible_host) | "\(.key) \(.value.ansible_host)"' 2>/dev/null | while read host ip; do \ - if [ "$$host" != "$(CURRENT_HOST)" ]; then \ - printf " %-20s " "$$host ($$ip)"; \ - if ping -c 1 -W 2 $$ip >/dev/null 2>&1; then \ - echo "$(GREEN)✓ Network OK$(RESET)"; \ - else \ - echo "$(RED)✗ Network FAIL$(RESET)"; \ - fi; \ - fi; \ - done - @echo "" - @echo "$(YELLOW)SSH Connectivity:$(RESET)" - @ansible all -m ping --one-line $(EXCLUDE_CURRENT) 2>/dev/null | grep -E "(SUCCESS|UNREACHABLE)" | while read line; do \ - host=$$(echo "$$line" | cut -d' ' -f1); \ - if echo "$$line" | grep -q "SUCCESS"; then \ - printf " $(GREEN)✓ %-20s$(RESET) SSH OK\n" "$$host"; \ - elif echo "$$line" | grep -q "UNREACHABLE"; then \ - printf " $(RED)✗ %-20s$(RESET) SSH FAIL\n" "$$host"; \ - fi; \ - done - @echo "" - @echo "$(YELLOW)SSH Keys:$(RESET)" - @if [ -f ~/.ssh/id_ed25519.pub ]; then \ - echo " $(GREEN)✓ SSH key available$(RESET) (id_ed25519)"; \ - elif [ -f ~/.ssh/id_rsa.pub ]; then \ - echo " $(GREEN)✓ SSH key available$(RESET) (id_rsa)"; \ - else \ - echo " $(RED)✗ No SSH key found$(RESET)"; \ - echo " $(YELLOW) Run: ssh-keygen -t ed25519$(RESET)"; \ - fi copy-ssh-key: ## Copy SSH key to specific host (usage: make copy-ssh-key HOST=giteaVM) ifndef HOST @@ -528,3 +495,21 @@ monitoring: ## Install monitoring tools on all machines @echo "$(YELLOW)Installing monitoring tools...$(RESET)" $(ANSIBLE_PLAYBOOK) $(PLAYBOOK_DEV) --tags monitoring @echo "$(GREEN)✓ Monitoring installation complete$(RESET)" + +test-connectivity: ## Test host connectivity with detailed diagnostics and recommendations + @echo "$(YELLOW)Testing host connectivity...$(RESET)" + @if [ -f "test_connectivity.py" ]; then \ + python3 test_connectivity.py --hosts-file $(INVENTORY_HOSTS); \ + else \ + echo "$(RED)Error: test_connectivity.py not found$(RESET)"; \ + exit 1; \ + fi + +auto-fallback: ## Automatically switch to fallback IPs when primary IPs fail + @echo "$(YELLOW)Auto-fallback: Testing and switching to working IPs...$(RESET)" + @if [ -f "auto-fallback.sh" ]; then \ + chmod +x auto-fallback.sh && ./auto-fallback.sh; \ + else \ + echo "$(RED)Error: auto-fallback.sh not found$(RESET)"; \ + exit 1; \ + fi diff --git a/ansible.cfg b/ansible.cfg index 03586e3..7d31b58 100644 --- a/ansible.cfg +++ b/ansible.cfg @@ -19,6 +19,6 @@ vault_password_file = ~/.ansible-vault-pass ansible_managed = Ansible managed: {file} modified on %Y-%m-%d %H:%M:%S [ssh_connection] -ssh_args = -o ControlMaster=auto -o ControlPersist=60s -retries = 2 +ssh_args = -o ControlMaster=auto -o ControlPersist=60s -o ConnectTimeout=5 +retries = 3 pipelining = True diff --git a/auto-fallback.sh b/auto-fallback.sh new file mode 100755 index 0000000..5078329 --- /dev/null +++ b/auto-fallback.sh @@ -0,0 +1,111 @@ +#!/bin/bash +# Automatically switch to fallback IPs when primary IPs fail + +HOSTS_FILE="inventories/production/hosts" +TIMEOUT=3 +CHANGED=false + +# Colors +GREEN='\033[0;32m' +RED='\033[0;31m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' + +echo -e "${BLUE}Auto-fallback: Testing and switching to fallback IPs when needed...${NC}" +echo "==================================================================" + +# Function to test IP connectivity +test_ip() { + local ip="$1" + if ping -c 1 -W "$TIMEOUT" "$ip" >/dev/null 2>&1; then + return 0 + else + return 1 + fi +} + +# Function to test SSH connectivity +test_ssh() { + local host="$1" + local ip="$2" + local user="$3" + + if timeout 5 ssh -o ConnectTimeout=3 -o BatchMode=yes "$user@$ip" exit >/dev/null 2>&1; then + return 0 + else + return 1 + fi +} + +# Function to switch to fallback IP +switch_to_fallback() { + local hostname="$1" + local primary_ip="$2" + local fallback_ip="$3" + + echo -e " ${YELLOW}→ Switching $hostname to fallback IP: $fallback_ip${NC}" + + # Use sed to replace the primary IP with fallback IP + sed -i "s/$hostname ansible_host=$primary_ip/$hostname ansible_host=$fallback_ip/" "$HOSTS_FILE" + + # Remove the fallback attribute since we're now using it as primary + sed -i "s/ ansible_host_fallback=$fallback_ip//" "$HOSTS_FILE" + + CHANGED=true +} + +# Parse hosts file and test connectivity +while IFS= read -r line; do + # Skip empty lines and comments + [[ -z "$line" || "$line" =~ ^# ]] && continue + + # Skip group headers + [[ "$line" =~ ^\[.*\]$ ]] && continue + + # Parse host entry + if [[ "$line" =~ ansible_host= ]]; then + hostname=$(echo "$line" | awk '{print $1}') + primary_ip=$(echo "$line" | grep -oP 'ansible_host=\K[^\s]+') + fallback_ip=$(echo "$line" | grep -oP 'ansible_host_fallback=\K[^\s]+' || echo "") + user=$(echo "$line" | grep -oP 'ansible_user=\K[^\s]+' || echo "root") + + echo -n "Testing $hostname ($primary_ip)... " + + # Test primary IP + if test_ip "$primary_ip"; then + # Test SSH on primary IP + if test_ssh "$hostname" "$primary_ip" "$user"; then + echo -e "${GREEN}✓ OK${NC}" + else + echo -e "${YELLOW}⚠ Ping OK, SSH failed${NC}" + if [[ -n "$fallback_ip" ]]; then + echo -e " ${BLUE}→ Trying fallback IP...${NC}" + if test_ip "$fallback_ip" && test_ssh "$hostname" "$fallback_ip" "$user"; then + switch_to_fallback "$hostname" "$primary_ip" "$fallback_ip" + else + echo -e " ${RED}✗ Fallback also failed${NC}" + fi + fi + fi + else + echo -e "${RED}✗ Primary IP failed${NC}" + if [[ -n "$fallback_ip" ]]; then + echo -e " ${BLUE}→ Trying fallback IP...${NC}" + if test_ip "$fallback_ip" && test_ssh "$hostname" "$fallback_ip" "$user"; then + switch_to_fallback "$hostname" "$primary_ip" "$fallback_ip" + else + echo -e " ${RED}✗ Fallback also failed${NC}" + fi + fi + fi + fi +done < "$HOSTS_FILE" + +echo "" +if [[ "$CHANGED" == "true" ]]; then + echo -e "${GREEN}✓ Hosts file updated with working IPs!${NC}" + echo -e "${BLUE}You can now run your Ansible commands.${NC}" +else + echo -e "${GREEN}✓ All primary IPs are working - no changes needed.${NC}" +fi diff --git a/docs/connectivity-test.md b/docs/connectivity-test.md new file mode 100644 index 0000000..3b37f09 --- /dev/null +++ b/docs/connectivity-test.md @@ -0,0 +1,175 @@ +# Connectivity Test Documentation + +## Overview + +The `test_connectivity.py` script provides comprehensive connectivity testing for Ansible hosts with intelligent fallback IP detection and detailed diagnostics. + +## Features + +- **Comprehensive Testing**: Tests both ping and SSH connectivity +- **Fallback Detection**: Identifies when fallback IPs should be used +- **Smart Diagnostics**: Provides specific error messages and recommendations +- **Multiple Output Formats**: Console, quiet mode, and JSON export +- **Actionable Recommendations**: Suggests specific commands to fix issues + +## Usage + +### Basic Usage + +```bash +# Test all hosts +make test-connectivity + +# Or run directly +python3 test_connectivity.py +``` + +### Advanced Options + +```bash +# Quiet mode (summary only) +python3 test_connectivity.py --quiet + +# Export results to JSON +python3 test_connectivity.py --json results.json + +# Custom hosts file +python3 test_connectivity.py --hosts-file inventories/staging/hosts + +# Custom timeout +python3 test_connectivity.py --timeout 5 +``` + +## Output Interpretation + +### Status Icons + +- ✅ **SUCCESS**: Host is fully accessible via primary IP +- 🔑 **SSH KEY**: SSH key authentication issue +- 🔧 **SSH SERVICE**: SSH service not running +- ⚠️ **SSH ERROR**: Other SSH-related errors +- 🔄 **USE FALLBACK**: Should switch to fallback IP +- ❌ **BOTH FAILED**: Both primary and fallback IPs failed +- 🚫 **NO FALLBACK**: Primary IP failed, no fallback available +- ❓ **UNKNOWN**: Unexpected connectivity state + +### Common Issues and Solutions + +#### SSH Key Issues +``` +🔑 Fix SSH key issues (2 hosts): + make copy-ssh-key HOST=dev01 + make copy-ssh-key HOST=debianDesktopVM +``` +**Solution**: Run the suggested `make copy-ssh-key` commands + +#### Fallback Recommendations +``` +🔄 Switch to fallback IPs (1 hosts): + sed -i 's/vaultwardenVM ansible_host=100.100.19.11/vaultwardenVM ansible_host=10.0.10.142/' inventories/production/hosts +``` +**Solution**: Run the suggested sed command or use `make auto-fallback` + +#### Critical Issues +``` +🚨 Critical issues (4 hosts): + bottom: ✗ bottom: Primary IP 10.0.10.156 failed, no fallback available +``` +**Solution**: Check network connectivity, host status, or add fallback IPs + +## Integration with Ansible Workflow + +### Before Running Ansible +```bash +# Test connectivity first +make test-connectivity + +# Fix any issues, then run Ansible +make apply +``` + +### Automated Fallback +```bash +# Automatically switch to working IPs +make auto-fallback + +# Then run your Ansible tasks +make apply +``` + +## Configuration + +### Hosts File Format +The script expects hosts with optional fallback IPs: +``` +vaultwardenVM ansible_host=100.100.19.11 ansible_host_fallback=10.0.10.142 ansible_user=ladmin +``` + +### Timeout Settings +- **Ping timeout**: 3 seconds (configurable with `--timeout`) +- **SSH timeout**: 5 seconds (hardcoded for reliability) + +## Troubleshooting + +### Common Problems + +1. **"Permission denied (publickey)"** + - Run: `make copy-ssh-key HOST=hostname` + +2. **"Connection refused"** + - Check if SSH service is running on target host + - Verify firewall settings + +3. **"Host key verification failed"** + - Add host to known_hosts: `ssh-keyscan hostname >> ~/.ssh/known_hosts` + +4. **"No route to host"** + - Check network connectivity + - Verify IP addresses are correct + +### Debug Mode +For detailed debugging, run with verbose output: +```bash +python3 test_connectivity.py --timeout 10 +``` + +## JSON Output Format + +When using `--json`, the output includes detailed information: +```json +[ + { + "hostname": "vaultwardenVM", + "group": "vaultwarden", + "primary_ip": "100.100.19.11", + "fallback_ip": "10.0.10.142", + "user": "ladmin", + "primary_ping": true, + "primary_ssh": true, + "fallback_ping": true, + "fallback_ssh": true, + "status": "success", + "recommendation": "✓ vaultwardenVM is fully accessible via primary IP 100.100.19.11" + } +] +``` + +## Best Practices + +1. **Run before Ansible operations** to catch connectivity issues early +2. **Use quiet mode** in scripts: `python3 test_connectivity.py --quiet` +3. **Export JSON results** for logging and monitoring +4. **Fix SSH key issues** before running Ansible +5. **Use auto-fallback** for automated IP switching + +## Integration with CI/CD + +```bash +# In your CI pipeline +make test-connectivity +if [ $? -ne 0 ]; then + echo "Connectivity issues detected" + exit 1 +fi +make apply +``` diff --git a/inventories/production/group_vars/all/vault.yml b/inventories/production/group_vars/all/vault.yml index 8bb536b..922dceb 100644 --- a/inventories/production/group_vars/all/vault.yml +++ b/inventories/production/group_vars/all/vault.yml @@ -1,10 +1,10 @@ $ANSIBLE_VAULT;1.1;AES256 -36376666313464366432353635643532663733656664336632626633616465313834323563613965 -3261643434373638623932373531366333393736636165620a306437366133343435626639396361 -30373765343339626464336538303634336135316431653264363831643264373636303161616334 -6231326138306564310a343135303362326664653061666363366364376335343431393330643334 -32336263623437636266383730666562303234633438646330313163323232623961613665613031 -65643630623235356164303839663938343238336432663462363431363062623764326536396562 -32613331366135646133373165646634356337376463393530343264386531393837303263353033 -31646238393165613331623164613265613332623933343136623739316262646237323739666434 -6238 +36343265643238633236643162613137393331386164306133666537633336633036376433386161 +3135366566623235333264386539346364333435373065300a633231633731316633313166346161 +30363334613965666634633665363632323966396464633636346533616634393664386566333230 +3463666531323866660a666238383331383562313363386639646161653334313661393065343135 +33613762653361656633366465306264323935363032353737333935363165346639616330333939 +39336538643866366361313838636338643336376365373166376234383838656430623339313162 +37353461313263643263376232393138396233366234336333613535366234383661353938663032 +65383737343164343431363764333063326230623263323231366232626131306637353361343466 +6131 diff --git a/inventories/production/host_vars/ansibleVM.yml b/inventories/production/host_vars/ansibleVM.yml new file mode 100644 index 0000000..7c3ce25 --- /dev/null +++ b/inventories/production/host_vars/ansibleVM.yml @@ -0,0 +1,7 @@ +$ANSIBLE_VAULT;1.1;AES256 +31306264346663636630656534303766666564333866326139336137383339633338323834653266 +6132333337363566623265303037336266646238633036390a663432623861363562386561393264 +63303565633530383634643538323165383461656539613331386135336265653531336266613865 +3833376664366239650a313134653238323437633265373463326231346663366434323733663666 +38353061373437306431383132333233663639643134363464396163333962373033363661623666 +3430633863623962366430613962346264356461373539376263 diff --git a/inventories/production/host_vars/bottom.yml b/inventories/production/host_vars/bottom.yml index f38a32e..44050e7 100644 --- a/inventories/production/host_vars/bottom.yml +++ b/inventories/production/host_vars/bottom.yml @@ -1,7 +1,7 @@ $ANSIBLE_VAULT;1.1;AES256 -37646438353233376464643364306161636364356130316530366430306530323635616531346661 -3935613366376138313461633662353037623534353433620a613163333332346564666530653862 -31636565306234366537623763376161346139306131323366643138613730643761633335343330 -3634376334636363300a666432306663643632353233396666333336386238386438666133303432 -31356266353035303732353661343634653732626166626461333939663033346166393861396332 -6461326530623939336462346531363335383237303731303964 +63393735666433636632373036626634386363303637613162626365353866363434363539363530 +3464633237656336306633383339396132333533626433360a323838653030333533393531316631 +32646430303765313736356233653732666531323266326430646330616537626430323062653266 +3165303465303765630a373163353863373139333936346233343137373962326437633038653564 +35386131393432316261353338623032313136623836326432343939303035623665633462613535 +3137616135626239313337653134333765646362356261376432 diff --git a/inventories/production/host_vars/caddy.yml b/inventories/production/host_vars/caddy.yml new file mode 100644 index 0000000..94184ae --- /dev/null +++ b/inventories/production/host_vars/caddy.yml @@ -0,0 +1,7 @@ +$ANSIBLE_VAULT;1.1;AES256 +66633265383239626163633134656233613638643862323562373330643363323036333334646566 +3439646635343533353432323064643135623532333738380a353866643461636233376432396434 +35316639343131363536343264646666313934616166653535666634633230613537356663313663 +3430343234323963300a376133333935653965396561643431306138323636396635396338356266 +61636237643366646135333633383562656361356233383866616533336666383561303538393234 +3637383939376665356136363636626166323836376264373261 diff --git a/inventories/production/host_vars/debianDesktopVM.yml b/inventories/production/host_vars/debianDesktopVM.yml index 48cfbd8..78b9b5b 100644 --- a/inventories/production/host_vars/debianDesktopVM.yml +++ b/inventories/production/host_vars/debianDesktopVM.yml @@ -1,7 +1,7 @@ $ANSIBLE_VAULT;1.1;AES256 -65643434353032343662326530613038373164393531393865633934333838373232326265646262 -3866613232353361626333323666666563323634383064340a346434393261346461643061306563 -32323730393439663931663734326134633166333866393834353637666465313962363339323930 -6638646636366463360a633731383838656535353536366136616637393332613138313838623337 -62616462623734373932316635623030623335613939313334393632323363316635333839663964 -6235376161333834383438373164396537343438366239356134 +64383961333130663764633131623063373965383261613530313536396362366639393263396664 +3165636131346138613436613565336236313564613731360a623839653064346130333561666164 +66633636373163346538393938323634336463343431353630306330316661383836343634376334 +3666656436336536340a373633363965646164373031323533636563386266366238393539643463 +38386334396335303666633631373834633465636239666131633430303437343137333437366337 +6465303935396332323563613037376231326564363231343332 diff --git a/inventories/production/host_vars/dev01.yml b/inventories/production/host_vars/dev01.yml index a67c650..1005fc4 100644 --- a/inventories/production/host_vars/dev01.yml +++ b/inventories/production/host_vars/dev01.yml @@ -1,7 +1,7 @@ $ANSIBLE_VAULT;1.1;AES256 -31613237643164326433613265306534626435313661663530613134393739633734633130653439 -6265613832306138643133356333336539643636313732660a626262313139623534326137643230 -38346634623535343233336238646335666331393064616631643439353264633262326536363830 -3739663737323639300a396361346133363763356232316338646535383964653331613738616262 -31643031636334373266306266383730656161663566353532353239323130376539363433623362 -6664663838393235323664633038356533313833306432353861 +63663562363638643732663337623063313532373930646634336366316636313265633239373163 +3731653139383136656362366431666264376530636366340a316334393963336537646536336636 +35636261383632623865326631326261353735633233386232383862376466386131663566626636 +3234313733643735360a663230633532393932666163383934303934346466376631303130663635 +33383163306538373834303364333935636566633432623735363731396461343661623638313563 +6233356166666233303635356166323830306165393737353533 diff --git a/inventories/production/host_vars/giteaVM.yml b/inventories/production/host_vars/giteaVM.yml index 59e8e8c..4e4f3c4 100644 --- a/inventories/production/host_vars/giteaVM.yml +++ b/inventories/production/host_vars/giteaVM.yml @@ -1,6 +1,7 @@ $ANSIBLE_VAULT;1.1;AES256 -35613535653633616433383235306131326139313335323039393662313066613966633934333864 -6465656334383738393565613033653230323264363933370a623036393963393833376333383635 -62636466383165383439623736613831663761336662383138386666336365636166373338666232 -6164616262383764340a326530393662383632623538333535353962313138633639653933303564 -3939 +37346464666430303939303936623232303038616132376630653735363138393439353534613166 +3835346134643735666332643734633437393961666465380a393030393161316430626162343363 +31626230623830303537326466303330623065636661643835323064353963616639333364303966 +3030663964373638330a623831306261386338343630376562323839303336346164623330646433 +30306139396638643332396331616461336530666366373533363365313731643033356161373739 +3339663736616238613131613539656535373331386137653630 diff --git a/inventories/production/host_vars/homepageVM.yml b/inventories/production/host_vars/homepageVM.yml index a037b56..a2cf5c8 100644 --- a/inventories/production/host_vars/homepageVM.yml +++ b/inventories/production/host_vars/homepageVM.yml @@ -1,6 +1,7 @@ $ANSIBLE_VAULT;1.1;AES256 -32353034343864393663363666306566396464626335363133316432633832616561336234323138 -6535373836623837323266376539633937326365393730300a303963663165353536656133636663 -63323966353039663531626434303939313137383734363538616564646638353030643130613632 -3131353132336261650a653361333235643130333330346366656637303332666361386461616331 -3132 +33663135313965613939386665326237383639323738383566653565656331303965376464306364 +6162363534306630623537623038363433313430666662340a306564373036366163366565323333 +32663836383133636661626631356636616638613231646537626530316634396539613732306664 +3335303836656236310a316561623562643339643264313635396131633963623764333839303132 +64303234626361633835343564346362336563333537653866393835643832643833616261376261 +3331343439313931383838363839656262616530306534313964 diff --git a/inventories/production/host_vars/jellyfin.yml b/inventories/production/host_vars/jellyfin.yml new file mode 100644 index 0000000..4e32751 --- /dev/null +++ b/inventories/production/host_vars/jellyfin.yml @@ -0,0 +1,7 @@ +$ANSIBLE_VAULT;1.1;AES256 +61623232353833613730343036663434633265346638366431383737623936616131356661616238 +3230346138373030396336663566353433396230346434630a313633633161303539373965343466 +38353131336330643566643232393638616531316430343932636663613437613565643666353863 +3963663866623238630a636532303537616238666165306339303661363732393365336138613236 +61336664393839306565303433646532646535656432633330343030396361623063653263313738 +3834373161333737326432366630623262623730613337386163 diff --git a/inventories/production/host_vars/listmonk.yml b/inventories/production/host_vars/listmonk.yml new file mode 100644 index 0000000..b1eae66 --- /dev/null +++ b/inventories/production/host_vars/listmonk.yml @@ -0,0 +1,7 @@ +$ANSIBLE_VAULT;1.1;AES256 +31316663336338303832323464623866343366313261653536623233303466636630633235643638 +3666646431323061313836333233356162643462323763380a623666663062386337393439653134 +61616135353966333639323031643263646231636332613935353234363134356435646266343866 +3034653235393636350a626362333764313732646663653838313233326438646330393336346539 +30393364323237396633343133616439393563326161636366613965366161656364343939313334 +3430306634396361353238643735363430383433323431393230 diff --git a/inventories/production/host_vars/portainerVM.yml b/inventories/production/host_vars/portainerVM.yml index 138702e..8f6b1cc 100644 --- a/inventories/production/host_vars/portainerVM.yml +++ b/inventories/production/host_vars/portainerVM.yml @@ -1,6 +1,7 @@ $ANSIBLE_VAULT;1.1;AES256 -35386435346434313638656334393931363832396538626361633237653134303639323662353165 -3131653934353233626136386236363565363835373535320a373932343630303363656363346138 -33366161623833366666326161383964396463636633323361333066383066633838636438633364 -3131306263323038370a616432303966323065646466646430356365653334316564333364376535 -3364 +66333033636639396131653539393133383536313661386332646634656664326566663033383937 +3438303835303931616631623331656336366636393531620a363336333634373730623739376564 +32393130343430623938366232313266633531363866653631366632336535616261346437376562 +3331626636346234340a613833613463383430306639393661313333366464373963663964313438 +39623766633965303136363533306438383662306131373864333431346537643834316437653132 +3435316231366632353465336130336464633839346366313465 diff --git a/inventories/production/host_vars/slack.yml b/inventories/production/host_vars/slack.yml new file mode 100644 index 0000000..01d5e9a --- /dev/null +++ b/inventories/production/host_vars/slack.yml @@ -0,0 +1,7 @@ +$ANSIBLE_VAULT;1.1;AES256 +62356361353835643235613335613661356230666539386533383536623432316333346431343462 +3265376632633731623430376333323234633962643766380a363033666334643930326636343963 +33666562356336663663633931383465303365303739663036613039396430646166646139653265 +6663366664623064650a386265333865386538633363613963383630643633356130616238653632 +36333436643563363933353664303765646435323161393938656530336331396130616235653335 +6638346636343339343465663730373134333663633265383763 diff --git a/inventories/production/host_vars/vaultwardenVM.yml b/inventories/production/host_vars/vaultwardenVM.yml new file mode 100644 index 0000000..a1cc814 --- /dev/null +++ b/inventories/production/host_vars/vaultwardenVM.yml @@ -0,0 +1,7 @@ +$ANSIBLE_VAULT;1.1;AES256 +35633833353965363964376161393730613065663236326239376562356231316166656131366263 +6263363436373965316339623139353830643062393165370a643138356561613537616431316534 +63386635363838626465396439303664316635633239653639646338393130666164653262316135 +3937376464303935620a343530333030643830383130646532613533336435383334373831343261 +37653138613132616165636132623037623033343265663734626536366361373130353139383634 +6664346538653965343263376538636336393164356434646264 diff --git a/inventories/production/hosts b/inventories/production/hosts index 532e9f6..2508a2b 100644 --- a/inventories/production/hosts +++ b/inventories/production/hosts @@ -1,3 +1,8 @@ +# Ansible Inventory +# Primary IPs: Tailscale (100.x.x.x) for remote access +# Fallback IPs: Local network (10.0.x.x) when Tailscale is down +# Usage: ansible_host_fallback is available for manual fallback + [gitea] giteaVM ansible_host=10.0.30.169 ansible_user=gitea @@ -8,18 +13,28 @@ portainerVM ansible_host=10.0.30.69 ansible_user=ladmin homepageVM ansible_host=10.0.30.12 ansible_user=homepage [vaultwarden] -vaultwardenVM ansible_host=100.100.19.11 ansible_user=ladmin +vaultwardenVM ansible_host=100.100.19.11 ansible_host_fallback=10.0.10.142 ansible_user=ladmin [dev] dev01 ansible_host=10.0.30.105 ansible_user=ladmin bottom ansible_host=10.0.10.156 ansible_user=beast debianDesktopVM ansible_host=10.0.10.206 ansible_user=user skip_reboot=true + [ansible] -ansible-controlVM ansible_host=10.0.10.157 ansible_user=master +ansibleVM ansible_host=10.0.10.157 ansible_user=master [tailscale] tailscaleVM ansible_host=100.66.218.53 ansible_user=ladmin +[services] +caddy ansible_host=100.117.106.18 ansible_host_fallback=10.0.10.50 ansible_user=ladmin +jellyfin ansible_host=100.104.109.45 ansible_host_fallback=10.0.10.232 ansible_user=user +listmonk ansible_host=100.73.190.115 ansible_host_fallback=10.0.10.149 ansible_user=ladmin +slack ansible_host=100.110.190.69 ansible_host_fallback=10.0.10.154 ansible_user=ladmin + +[desktop] +desktop-beast ansible_host=100.117.34.106 ansible_user=beast + [local] localhost ansible_connection=local diff --git a/package-lock.json b/package-lock.json index 64f4f80..67f60f1 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,8 +13,8 @@ "markdownlint-cli2": "^0.18.1" }, "engines": { - "node": ">=18.0.0", - "npm": ">=9.0.0" + "node": ">=22.0.0", + "npm": ">=10.0.0" } }, "node_modules/@nodelib/fs.scandir": { diff --git a/playbooks/shell.yml b/playbooks/shell.yml new file mode 100644 index 0000000..8828524 --- /dev/null +++ b/playbooks/shell.yml @@ -0,0 +1,31 @@ +--- +# Playbook: shell.yml +# Purpose: Configure shell environment (zsh, oh-my-zsh, plugins) on all hosts +# Targets: all hosts +# Tags: shell +# Usage: make shell-all + +- name: Configure shell environment + hosts: all + become: true + strategy: free + + roles: + - {role: shell, tags: ['shell']} + + pre_tasks: + - name: Update apt cache + ansible.builtin.apt: + update_cache: true + ignore_errors: true + register: apt_update_result + + - name: Display apt update status + ansible.builtin.debug: + msg: "Apt cache update: {{ 'Success' if apt_update_result is succeeded else 'Failed - continuing anyway' }}" + when: ansible_debug_output | default(false) | bool + + tasks: + - name: Display completion message + ansible.builtin.debug: + msg: "Shell configuration completed successfully on {{ inventory_hostname }}!" diff --git a/roles/development/tasks/main.yml b/roles/development/tasks/main.yml index 95ff9f7..921f847 100644 --- a/roles/development/tasks/main.yml +++ b/roles/development/tasks/main.yml @@ -52,7 +52,7 @@ path: /etc/apt/sources.list.d/nodesource.list state: absent become: true - when: + when: - node_version_check.rc != 0 or not node_version_check.stdout.startswith('v2') - nodesource_repo_check.stdout == "wrong_config" @@ -61,7 +61,7 @@ path: /etc/apt/keyrings/nodesource.gpg state: absent become: true - when: + when: - node_version_check.rc != 0 or not node_version_check.stdout.startswith('v2') - nodesource_key_check.stdout == "wrong_key" @@ -71,7 +71,7 @@ state: directory mode: '0755' become: true - when: + when: - node_version_check.rc != 0 or not node_version_check.stdout.startswith('v2') - nodesource_key_check.stdout in ["not_exists", "wrong_key"] @@ -82,7 +82,7 @@ mode: '0644' force: true become: true - when: + when: - node_version_check.rc != 0 or not node_version_check.stdout.startswith('v2') - nodesource_key_check.stdout in ["not_exists", "wrong_key"] @@ -92,7 +92,7 @@ state: present update_cache: false become: true - when: + when: - node_version_check.rc != 0 or not node_version_check.stdout.startswith('v2') - nodesource_repo_check.stdout in ["not_exists", "wrong_config"] diff --git a/roles/docker/tasks/setup_gpg_key.yml b/roles/docker/tasks/setup_gpg_key.yml index 693c72d..20a3817 100644 --- a/roles/docker/tasks/setup_gpg_key.yml +++ b/roles/docker/tasks/setup_gpg_key.yml @@ -42,4 +42,5 @@ ansible.builtin.file: path: /tmp/docker.gpg state: absent - when: docker_key_check.stdout in ["not_exists", "wrong_key"] \ No newline at end of file + when: docker_key_check.stdout in ["not_exists", "wrong_key"] + \ No newline at end of file diff --git a/roles/docker/tasks/setup_repo_debian.yml b/roles/docker/tasks/setup_repo_debian.yml index 438f05b..d83ba97 100644 --- a/roles/docker/tasks/setup_repo_debian.yml +++ b/roles/docker/tasks/setup_repo_debian.yml @@ -25,4 +25,5 @@ repo: "deb [arch=amd64 signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/debian {{ ansible_distribution_release }} stable" state: present update_cache: true - when: docker_repo_check.stdout in ["not_exists", "wrong_config"] \ No newline at end of file + when: docker_repo_check.stdout in ["not_exists", "wrong_config"] + \ No newline at end of file diff --git a/roles/docker/tasks/setup_repo_linux_mint.yml b/roles/docker/tasks/setup_repo_linux_mint.yml index a13031b..f49292c 100644 --- a/roles/docker/tasks/setup_repo_linux_mint.yml +++ b/roles/docker/tasks/setup_repo_linux_mint.yml @@ -33,4 +33,5 @@ repo: "deb [arch=amd64 signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu {{ docker_ubuntu_codename }} stable" state: present update_cache: true - when: docker_repo_check.stdout in ["not_exists", "wrong_config"] \ No newline at end of file + when: docker_repo_check.stdout in ["not_exists", "wrong_config"] + \ No newline at end of file diff --git a/roles/docker/tasks/setup_repo_ubuntu.yml b/roles/docker/tasks/setup_repo_ubuntu.yml index 0602dbf..1ea73dd 100644 --- a/roles/docker/tasks/setup_repo_ubuntu.yml +++ b/roles/docker/tasks/setup_repo_ubuntu.yml @@ -25,4 +25,5 @@ repo: "deb [arch=amd64 signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable" state: present update_cache: true - when: docker_repo_check.stdout in ["not_exists", "wrong_config"] \ No newline at end of file + when: docker_repo_check.stdout in ["not_exists", "wrong_config"] + \ No newline at end of file diff --git a/roles/tailscale/tasks/debian.yml b/roles/tailscale/tasks/debian.yml index 415fa36..4b51cdf 100644 --- a/roles/tailscale/tasks/debian.yml +++ b/roles/tailscale/tasks/debian.yml @@ -40,7 +40,7 @@ path: /usr/share/keyrings/tailscale-archive-keyring.gpg state: absent become: true - when: + when: - tailscale_version_check.rc != 0 - tailscale_key_check.stdout == "wrong_key" @@ -49,7 +49,7 @@ path: /etc/apt/sources.list.d/tailscale.list state: absent become: true - when: + when: - tailscale_version_check.rc != 0 - tailscale_repo_check.stdout == "wrong_config" @@ -59,7 +59,7 @@ dest: /usr/share/keyrings/tailscale-archive-keyring.gpg mode: '0644' become: true - when: + when: - tailscale_version_check.rc != 0 - tailscale_key_check.stdout in ["not_exists", "wrong_key"] @@ -77,7 +77,7 @@ state: present filename: tailscale become: true - when: + when: - tailscale_version_check.rc != 0 - tailscale_repo_check.stdout in ["not_exists", "wrong_config"] @@ -101,4 +101,4 @@ enabled: true state: started become: true - when: tailscale_version_check.rc != 0 \ No newline at end of file + when: tailscale_version_check.rc != 0 diff --git a/test_connectivity.py b/test_connectivity.py new file mode 100644 index 0000000..feeb4f3 --- /dev/null +++ b/test_connectivity.py @@ -0,0 +1,311 @@ +#!/usr/bin/env python3 +""" +Advanced connectivity test for Ansible hosts with fallback IP support. +Tests both primary and fallback IPs, provides detailed diagnostics, and suggests fixes. +""" + +import subprocess +import sys +import argparse +import json +import re +from pathlib import Path +from typing import Dict, List, Tuple, Optional + +class ConnectivityTester: + def __init__(self, hosts_file: str, timeout: int = 3): + self.hosts_file = Path(hosts_file) + self.timeout = timeout + self.results = {} + + def test_ping(self, ip: str) -> bool: + """Test if host is reachable via ping.""" + try: + result = subprocess.run( + ['ping', '-c', '1', '-W', str(self.timeout), ip], + capture_output=True, + timeout=self.timeout + 1 + ) + return result.returncode == 0 + except (subprocess.TimeoutExpired, FileNotFoundError): + return False + + def test_ssh(self, hostname: str, ip: str, user: str) -> Tuple[bool, str]: + """Test SSH connectivity and return (success, error_message).""" + try: + result = subprocess.run( + ['ssh', '-o', 'ConnectTimeout=3', '-o', 'BatchMode=yes', + f'{user}@{ip}', 'exit'], + capture_output=True, + timeout=5 + ) + if result.returncode == 0: + return True, "" + else: + error = result.stderr.decode().strip() + return False, error + except (subprocess.TimeoutExpired, FileNotFoundError) as e: + return False, str(e) + + def parse_hosts_file(self) -> List[Dict]: + """Parse hosts file and return structured host data.""" + hosts = [] + current_group = None + + with open(self.hosts_file, 'r') as f: + for line in f: + line = line.strip() + + # Skip empty lines and comments + if not line or line.startswith('#'): + continue + + # Group header + if line.startswith('[') and line.endswith(']'): + current_group = line[1:-1] + continue + + # Host entry + if current_group and 'ansible_host=' in line: + host_data = self._parse_host_line(line, current_group) + if host_data: + hosts.append(host_data) + + return hosts + + def _parse_host_line(self, line: str, group: str) -> Optional[Dict]: + """Parse a single host line and return host data.""" + parts = line.split() + if not parts: + return None + + hostname = parts[0] + attrs = {} + + for part in parts[1:]: + if '=' in part: + key, value = part.split('=', 1) + attrs[key] = value + + return { + 'hostname': hostname, + 'group': group, + 'primary_ip': attrs.get('ansible_host', ''), + 'fallback_ip': attrs.get('ansible_host_fallback', ''), + 'user': attrs.get('ansible_user', 'root'), + 'original_line': line + } + + def test_host(self, host_data: Dict) -> Dict: + """Test connectivity for a single host.""" + hostname = host_data['hostname'] + primary_ip = host_data['primary_ip'] + fallback_ip = host_data['fallback_ip'] + user = host_data['user'] + + result = { + 'hostname': hostname, + 'group': host_data['group'], + 'primary_ip': primary_ip, + 'fallback_ip': fallback_ip, + 'user': user, + 'primary_ping': False, + 'primary_ssh': False, + 'fallback_ping': False, + 'fallback_ssh': False, + 'primary_ssh_error': '', + 'fallback_ssh_error': '', + 'recommendation': '', + 'status': 'unknown' + } + + # Test primary IP + if primary_ip: + result['primary_ping'] = self.test_ping(primary_ip) + if result['primary_ping']: + ssh_success, ssh_error = self.test_ssh(hostname, primary_ip, user) + result['primary_ssh'] = ssh_success + result['primary_ssh_error'] = ssh_error + + # Test fallback IP + if fallback_ip: + result['fallback_ping'] = self.test_ping(fallback_ip) + if result['fallback_ping']: + ssh_success, ssh_error = self.test_ssh(hostname, fallback_ip, user) + result['fallback_ssh'] = ssh_success + result['fallback_ssh_error'] = ssh_error + + # Determine status and recommendation + result['status'], result['recommendation'] = self._analyze_connectivity(result) + + return result + + def _analyze_connectivity(self, result: Dict) -> Tuple[str, str]: + """Analyze connectivity results and provide recommendations.""" + hostname = result['hostname'] + primary_ip = result['primary_ip'] + fallback_ip = result['fallback_ip'] + + # Primary IP works perfectly + if result['primary_ping'] and result['primary_ssh']: + return 'success', f"✓ {hostname} is fully accessible via primary IP {primary_ip}" + + # Primary ping works but SSH fails + if result['primary_ping'] and not result['primary_ssh']: + error = result['primary_ssh_error'] + if 'Permission denied' in error: + return 'ssh_key', f"⚠ {hostname}: SSH key issue on {primary_ip} - run: make copy-ssh-key HOST={hostname}" + elif 'Connection refused' in error: + return 'ssh_service', f"⚠ {hostname}: SSH service not running on {primary_ip}" + else: + return 'ssh_error', f"⚠ {hostname}: SSH error on {primary_ip} - {error}" + + # Primary IP fails, test fallback + if not result['primary_ping'] and fallback_ip: + if result['fallback_ping'] and result['fallback_ssh']: + return 'use_fallback', f"→ {hostname}: Switch to fallback IP {fallback_ip} (primary {primary_ip} failed)" + elif result['fallback_ping'] and not result['fallback_ssh']: + return 'fallback_ssh', f"⚠ {hostname}: Fallback IP {fallback_ip} reachable but SSH failed" + else: + return 'both_failed', f"✗ {hostname}: Both primary {primary_ip} and fallback {fallback_ip} failed" + + # No fallback IP and primary failed + if not result['primary_ping'] and not fallback_ip: + return 'no_fallback', f"✗ {hostname}: Primary IP {primary_ip} failed, no fallback available" + + return 'unknown', f"? {hostname}: Unknown connectivity state" + + def run_tests(self) -> List[Dict]: + """Run connectivity tests for all hosts.""" + hosts = self.parse_hosts_file() + results = [] + + print("🔍 Testing host connectivity...") + print("=" * 60) + + for host_data in hosts: + print(f"Testing {host_data['hostname']}...", end=' ') + result = self.test_host(host_data) + results.append(result) + + # Print immediate status + if result['status'] == 'success': + print("✅") + elif result['status'] in ['ssh_key', 'ssh_service', 'ssh_error']: + print("⚠️") + elif result['status'] == 'use_fallback': + print("🔄") + else: + print("❌") + + return results + + def print_summary(self, results: List[Dict]): + """Print detailed summary of connectivity test results.""" + print("\n📊 CONNECTIVITY SUMMARY") + print("=" * 60) + + # Group results by status + by_status = {} + for result in results: + status = result['status'] + if status not in by_status: + by_status[status] = [] + by_status[status].append(result) + + # Print each status group + status_icons = { + 'success': '✅', + 'ssh_key': '🔑', + 'ssh_service': '🔧', + 'ssh_error': '⚠️', + 'use_fallback': '🔄', + 'both_failed': '❌', + 'no_fallback': '🚫', + 'unknown': '❓' + } + + for status, hosts in by_status.items(): + icon = status_icons.get(status, '❓') + print(f"\n{icon} {status.upper().replace('_', ' ')} ({len(hosts)} hosts)") + print("-" * 40) + + for result in hosts: + print(f" {result['hostname']:<20} {result['recommendation']}") + + # Print actionable recommendations + self._print_recommendations(results) + + def _print_recommendations(self, results: List[Dict]): + """Print actionable recommendations based on test results.""" + print("\n🛠️ RECOMMENDATIONS") + print("=" * 60) + + # SSH key issues + ssh_key_issues = [r for r in results if r['status'] == 'ssh_key'] + if ssh_key_issues: + print(f"\n🔑 Fix SSH key issues ({len(ssh_key_issues)} hosts):") + for result in ssh_key_issues: + print(f" make copy-ssh-key HOST={result['hostname']}") + + # Fallback recommendations + fallback_needed = [r for r in results if r['status'] == 'use_fallback'] + if fallback_needed: + print(f"\n🔄 Switch to fallback IPs ({len(fallback_needed)} hosts):") + for result in fallback_needed: + print(f" sed -i 's/{result['hostname']} ansible_host={result['primary_ip']}/{result['hostname']} ansible_host={result['fallback_ip']}/' {self.hosts_file}") + + # Critical issues + critical_issues = [r for r in results if r['status'] in ['both_failed', 'no_fallback']] + if critical_issues: + print(f"\n🚨 Critical issues ({len(critical_issues)} hosts):") + for result in critical_issues: + print(f" {result['hostname']}: {result['recommendation']}") + + # Auto-fallback suggestion + if fallback_needed: + print(f"\n🤖 Or run auto-fallback to fix automatically:") + print(f" make auto-fallback") + + def export_json(self, results: List[Dict], output_file: str): + """Export results to JSON file.""" + with open(output_file, 'w') as f: + json.dump(results, f, indent=2) + print(f"\n📄 Results exported to: {output_file}") + +def main(): + parser = argparse.ArgumentParser(description='Advanced connectivity test for Ansible hosts') + parser.add_argument('--hosts-file', default='inventories/production/hosts', + help='Path to hosts file') + parser.add_argument('--timeout', type=int, default=3, + help='Ping timeout in seconds') + parser.add_argument('--json', help='Export results to JSON file') + parser.add_argument('--quiet', action='store_true', + help='Only show summary, not individual tests') + + args = parser.parse_args() + + if not Path(args.hosts_file).exists(): + print(f"❌ Error: Hosts file not found: {args.hosts_file}") + sys.exit(1) + + tester = ConnectivityTester(args.hosts_file, args.timeout) + + if args.quiet: + # Suppress individual test output + import os + with open(os.devnull, 'w') as devnull: + old_stdout = sys.stdout + sys.stdout = devnull + results = tester.run_tests() + sys.stdout = old_stdout + else: + results = tester.run_tests() + + tester.print_summary(results) + + if args.json: + tester.export_json(results, args.json) + +if __name__ == '__main__': + main()