Update ansible.cfg and auto-fallback script for improved connectivity handling

- Modify ansible.cfg to increase SSH connection retries from 2 to 3 and add a connection timeout setting for better reliability.
- Enhance auto-fallback.sh script to provide detailed feedback during IP connectivity tests, including clearer status messages for primary and fallback IP checks.
- Update documentation to reflect changes in connectivity testing and fallback procedures.

These updates improve the robustness of the connectivity testing process and ensure smoother operations during IP failover scenarios.
This commit is contained in:
ilia 2025-09-16 23:00:32 -04:00
parent b424e9b55b
commit e05b3aa0d5
27 changed files with 779 additions and 102 deletions

View File

@ -218,7 +218,7 @@ test: ## Run all tests (lint + syntax check if available)
fi fi
@$(MAKE) test-syntax @$(MAKE) test-syntax
check: ## Dry-run the development playbook (--check mode) check: auto-fallback ## Dry-run the development playbook (--check mode)
@echo "$(YELLOW)Running dry-run on development hosts...$(RESET)" @echo "$(YELLOW)Running dry-run on development hosts...$(RESET)"
$(ANSIBLE_PLAYBOOK) $(PLAYBOOK_DEV) --check --diff $(ANSIBLE_PLAYBOOK) $(PLAYBOOK_DEV) --check --diff
@ -226,7 +226,7 @@ check-local: ## Dry-run the local playbook
@echo "$(YELLOW)Running dry-run on localhost...$(RESET)" @echo "$(YELLOW)Running dry-run on localhost...$(RESET)"
$(ANSIBLE_PLAYBOOK) $(PLAYBOOK_LOCAL) --check --diff -K $(ANSIBLE_PLAYBOOK) $(PLAYBOOK_LOCAL) --check --diff -K
apply: ## Run the development playbook on all dev hosts apply: auto-fallback ## Run the development playbook on all dev hosts
@echo "$(YELLOW)Applying development playbook...$(RESET)" @echo "$(YELLOW)Applying development playbook...$(RESET)"
$(ANSIBLE_PLAYBOOK) $(PLAYBOOK_DEV) $(ANSIBLE_PLAYBOOK) $(PLAYBOOK_DEV)
@ -326,12 +326,16 @@ shell: ## Configure shell only
@echo "$(YELLOW)Running shell configuration...$(RESET)" @echo "$(YELLOW)Running shell configuration...$(RESET)"
$(ANSIBLE_PLAYBOOK) $(PLAYBOOK_DEV) --tags shell $(ANSIBLE_PLAYBOOK) $(PLAYBOOK_DEV) --tags shell
shell-all: ## Configure shell on all shell_hosts (usage: make shell-all)
@echo "$(YELLOW)Running shell configuration on all shell hosts...$(RESET)"
$(ANSIBLE_PLAYBOOK) playbooks/shell.yml $(ANSIBLE_ARGS)
apps: ## Install applications only apps: ## Install applications only
@echo "$(YELLOW)Installing applications...$(RESET)" @echo "$(YELLOW)Installing applications...$(RESET)"
$(ANSIBLE_PLAYBOOK) $(PLAYBOOK_DEV) --tags apps $(ANSIBLE_PLAYBOOK) $(PLAYBOOK_DEV) --tags apps
# Connectivity targets # Connectivity targets
ping: ## Ping hosts with colored output (usage: make ping [GROUP=dev] [HOST=dev01]) ping: auto-fallback ## Ping hosts with colored output (usage: make ping [GROUP=dev] [HOST=dev01])
ifdef HOST ifdef HOST
@echo "$(YELLOW)Pinging host: $(HOST)$(RESET)" @echo "$(YELLOW)Pinging host: $(HOST)$(RESET)"
@ansible $(HOST) -m ping --one-line | while read line; do \ @ansible $(HOST) -m ping --one-line | while read line; do \
@ -459,43 +463,6 @@ endif
edit-group-vault: ## Edit encrypted group vars (usage: make edit-group-vault) edit-group-vault: ## Edit encrypted group vars (usage: make edit-group-vault)
ansible-vault edit inventories/production/group_vars/all/vault.yml ansible-vault edit inventories/production/group_vars/all/vault.yml
test-connectivity: ## Test network connectivity and SSH access to all hosts
@echo "$(BOLD)Connectivity Test$(RESET)"
@if [ -n "$(CURRENT_HOST)" ]; then \
echo "$(BLUE)Auto-excluding current host: $(CURRENT_HOST) ($(CURRENT_IP))$(RESET)"; \
fi
@echo ""
@echo "$(YELLOW)Network Connectivity:$(RESET)"
@ansible-inventory --list | jq -r '._meta.hostvars | to_entries[] | select(.value.ansible_host) | "\(.key) \(.value.ansible_host)"' 2>/dev/null | while read host ip; do \
if [ "$$host" != "$(CURRENT_HOST)" ]; then \
printf " %-20s " "$$host ($$ip)"; \
if ping -c 1 -W 2 $$ip >/dev/null 2>&1; then \
echo "$(GREEN)✓ Network OK$(RESET)"; \
else \
echo "$(RED)✗ Network FAIL$(RESET)"; \
fi; \
fi; \
done
@echo ""
@echo "$(YELLOW)SSH Connectivity:$(RESET)"
@ansible all -m ping --one-line $(EXCLUDE_CURRENT) 2>/dev/null | grep -E "(SUCCESS|UNREACHABLE)" | while read line; do \
host=$$(echo "$$line" | cut -d' ' -f1); \
if echo "$$line" | grep -q "SUCCESS"; then \
printf " $(GREEN)✓ %-20s$(RESET) SSH OK\n" "$$host"; \
elif echo "$$line" | grep -q "UNREACHABLE"; then \
printf " $(RED)✗ %-20s$(RESET) SSH FAIL\n" "$$host"; \
fi; \
done
@echo ""
@echo "$(YELLOW)SSH Keys:$(RESET)"
@if [ -f ~/.ssh/id_ed25519.pub ]; then \
echo " $(GREEN)✓ SSH key available$(RESET) (id_ed25519)"; \
elif [ -f ~/.ssh/id_rsa.pub ]; then \
echo " $(GREEN)✓ SSH key available$(RESET) (id_rsa)"; \
else \
echo " $(RED)✗ No SSH key found$(RESET)"; \
echo " $(YELLOW) Run: ssh-keygen -t ed25519$(RESET)"; \
fi
copy-ssh-key: ## Copy SSH key to specific host (usage: make copy-ssh-key HOST=giteaVM) copy-ssh-key: ## Copy SSH key to specific host (usage: make copy-ssh-key HOST=giteaVM)
ifndef HOST ifndef HOST
@ -528,3 +495,21 @@ monitoring: ## Install monitoring tools on all machines
@echo "$(YELLOW)Installing monitoring tools...$(RESET)" @echo "$(YELLOW)Installing monitoring tools...$(RESET)"
$(ANSIBLE_PLAYBOOK) $(PLAYBOOK_DEV) --tags monitoring $(ANSIBLE_PLAYBOOK) $(PLAYBOOK_DEV) --tags monitoring
@echo "$(GREEN)✓ Monitoring installation complete$(RESET)" @echo "$(GREEN)✓ Monitoring installation complete$(RESET)"
test-connectivity: ## Test host connectivity with detailed diagnostics and recommendations
@echo "$(YELLOW)Testing host connectivity...$(RESET)"
@if [ -f "test_connectivity.py" ]; then \
python3 test_connectivity.py --hosts-file $(INVENTORY_HOSTS); \
else \
echo "$(RED)Error: test_connectivity.py not found$(RESET)"; \
exit 1; \
fi
auto-fallback: ## Automatically switch to fallback IPs when primary IPs fail
@echo "$(YELLOW)Auto-fallback: Testing and switching to working IPs...$(RESET)"
@if [ -f "auto-fallback.sh" ]; then \
chmod +x auto-fallback.sh && ./auto-fallback.sh; \
else \
echo "$(RED)Error: auto-fallback.sh not found$(RESET)"; \
exit 1; \
fi

View File

@ -19,6 +19,6 @@ vault_password_file = ~/.ansible-vault-pass
ansible_managed = Ansible managed: {file} modified on %Y-%m-%d %H:%M:%S ansible_managed = Ansible managed: {file} modified on %Y-%m-%d %H:%M:%S
[ssh_connection] [ssh_connection]
ssh_args = -o ControlMaster=auto -o ControlPersist=60s ssh_args = -o ControlMaster=auto -o ControlPersist=60s -o ConnectTimeout=5
retries = 2 retries = 3
pipelining = True pipelining = True

111
auto-fallback.sh Executable file
View File

@ -0,0 +1,111 @@
#!/bin/bash
# Automatically switch to fallback IPs when primary IPs fail
HOSTS_FILE="inventories/production/hosts"
TIMEOUT=3
CHANGED=false
# Colors
GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
echo -e "${BLUE}Auto-fallback: Testing and switching to fallback IPs when needed...${NC}"
echo "=================================================================="
# Function to test IP connectivity
test_ip() {
local ip="$1"
if ping -c 1 -W "$TIMEOUT" "$ip" >/dev/null 2>&1; then
return 0
else
return 1
fi
}
# Function to test SSH connectivity
test_ssh() {
local host="$1"
local ip="$2"
local user="$3"
if timeout 5 ssh -o ConnectTimeout=3 -o BatchMode=yes "$user@$ip" exit >/dev/null 2>&1; then
return 0
else
return 1
fi
}
# Function to switch to fallback IP
switch_to_fallback() {
local hostname="$1"
local primary_ip="$2"
local fallback_ip="$3"
echo -e " ${YELLOW}→ Switching $hostname to fallback IP: $fallback_ip${NC}"
# Use sed to replace the primary IP with fallback IP
sed -i "s/$hostname ansible_host=$primary_ip/$hostname ansible_host=$fallback_ip/" "$HOSTS_FILE"
# Remove the fallback attribute since we're now using it as primary
sed -i "s/ ansible_host_fallback=$fallback_ip//" "$HOSTS_FILE"
CHANGED=true
}
# Parse hosts file and test connectivity
while IFS= read -r line; do
# Skip empty lines and comments
[[ -z "$line" || "$line" =~ ^# ]] && continue
# Skip group headers
[[ "$line" =~ ^\[.*\]$ ]] && continue
# Parse host entry
if [[ "$line" =~ ansible_host= ]]; then
hostname=$(echo "$line" | awk '{print $1}')
primary_ip=$(echo "$line" | grep -oP 'ansible_host=\K[^\s]+')
fallback_ip=$(echo "$line" | grep -oP 'ansible_host_fallback=\K[^\s]+' || echo "")
user=$(echo "$line" | grep -oP 'ansible_user=\K[^\s]+' || echo "root")
echo -n "Testing $hostname ($primary_ip)... "
# Test primary IP
if test_ip "$primary_ip"; then
# Test SSH on primary IP
if test_ssh "$hostname" "$primary_ip" "$user"; then
echo -e "${GREEN}✓ OK${NC}"
else
echo -e "${YELLOW}⚠ Ping OK, SSH failed${NC}"
if [[ -n "$fallback_ip" ]]; then
echo -e " ${BLUE}→ Trying fallback IP...${NC}"
if test_ip "$fallback_ip" && test_ssh "$hostname" "$fallback_ip" "$user"; then
switch_to_fallback "$hostname" "$primary_ip" "$fallback_ip"
else
echo -e " ${RED}✗ Fallback also failed${NC}"
fi
fi
fi
else
echo -e "${RED}✗ Primary IP failed${NC}"
if [[ -n "$fallback_ip" ]]; then
echo -e " ${BLUE}→ Trying fallback IP...${NC}"
if test_ip "$fallback_ip" && test_ssh "$hostname" "$fallback_ip" "$user"; then
switch_to_fallback "$hostname" "$primary_ip" "$fallback_ip"
else
echo -e " ${RED}✗ Fallback also failed${NC}"
fi
fi
fi
fi
done < "$HOSTS_FILE"
echo ""
if [[ "$CHANGED" == "true" ]]; then
echo -e "${GREEN}✓ Hosts file updated with working IPs!${NC}"
echo -e "${BLUE}You can now run your Ansible commands.${NC}"
else
echo -e "${GREEN}✓ All primary IPs are working - no changes needed.${NC}"
fi

175
docs/connectivity-test.md Normal file
View File

@ -0,0 +1,175 @@
# Connectivity Test Documentation
## Overview
The `test_connectivity.py` script provides comprehensive connectivity testing for Ansible hosts with intelligent fallback IP detection and detailed diagnostics.
## Features
- **Comprehensive Testing**: Tests both ping and SSH connectivity
- **Fallback Detection**: Identifies when fallback IPs should be used
- **Smart Diagnostics**: Provides specific error messages and recommendations
- **Multiple Output Formats**: Console, quiet mode, and JSON export
- **Actionable Recommendations**: Suggests specific commands to fix issues
## Usage
### Basic Usage
```bash
# Test all hosts
make test-connectivity
# Or run directly
python3 test_connectivity.py
```
### Advanced Options
```bash
# Quiet mode (summary only)
python3 test_connectivity.py --quiet
# Export results to JSON
python3 test_connectivity.py --json results.json
# Custom hosts file
python3 test_connectivity.py --hosts-file inventories/staging/hosts
# Custom timeout
python3 test_connectivity.py --timeout 5
```
## Output Interpretation
### Status Icons
- ✅ **SUCCESS**: Host is fully accessible via primary IP
- 🔑 **SSH KEY**: SSH key authentication issue
- 🔧 **SSH SERVICE**: SSH service not running
- ⚠️ **SSH ERROR**: Other SSH-related errors
- 🔄 **USE FALLBACK**: Should switch to fallback IP
- ❌ **BOTH FAILED**: Both primary and fallback IPs failed
- 🚫 **NO FALLBACK**: Primary IP failed, no fallback available
- ❓ **UNKNOWN**: Unexpected connectivity state
### Common Issues and Solutions
#### SSH Key Issues
```
🔑 Fix SSH key issues (2 hosts):
make copy-ssh-key HOST=dev01
make copy-ssh-key HOST=debianDesktopVM
```
**Solution**: Run the suggested `make copy-ssh-key` commands
#### Fallback Recommendations
```
🔄 Switch to fallback IPs (1 hosts):
sed -i 's/vaultwardenVM ansible_host=100.100.19.11/vaultwardenVM ansible_host=10.0.10.142/' inventories/production/hosts
```
**Solution**: Run the suggested sed command or use `make auto-fallback`
#### Critical Issues
```
🚨 Critical issues (4 hosts):
bottom: ✗ bottom: Primary IP 10.0.10.156 failed, no fallback available
```
**Solution**: Check network connectivity, host status, or add fallback IPs
## Integration with Ansible Workflow
### Before Running Ansible
```bash
# Test connectivity first
make test-connectivity
# Fix any issues, then run Ansible
make apply
```
### Automated Fallback
```bash
# Automatically switch to working IPs
make auto-fallback
# Then run your Ansible tasks
make apply
```
## Configuration
### Hosts File Format
The script expects hosts with optional fallback IPs:
```
vaultwardenVM ansible_host=100.100.19.11 ansible_host_fallback=10.0.10.142 ansible_user=ladmin
```
### Timeout Settings
- **Ping timeout**: 3 seconds (configurable with `--timeout`)
- **SSH timeout**: 5 seconds (hardcoded for reliability)
## Troubleshooting
### Common Problems
1. **"Permission denied (publickey)"**
- Run: `make copy-ssh-key HOST=hostname`
2. **"Connection refused"**
- Check if SSH service is running on target host
- Verify firewall settings
3. **"Host key verification failed"**
- Add host to known_hosts: `ssh-keyscan hostname >> ~/.ssh/known_hosts`
4. **"No route to host"**
- Check network connectivity
- Verify IP addresses are correct
### Debug Mode
For detailed debugging, run with verbose output:
```bash
python3 test_connectivity.py --timeout 10
```
## JSON Output Format
When using `--json`, the output includes detailed information:
```json
[
{
"hostname": "vaultwardenVM",
"group": "vaultwarden",
"primary_ip": "100.100.19.11",
"fallback_ip": "10.0.10.142",
"user": "ladmin",
"primary_ping": true,
"primary_ssh": true,
"fallback_ping": true,
"fallback_ssh": true,
"status": "success",
"recommendation": "✓ vaultwardenVM is fully accessible via primary IP 100.100.19.11"
}
]
```
## Best Practices
1. **Run before Ansible operations** to catch connectivity issues early
2. **Use quiet mode** in scripts: `python3 test_connectivity.py --quiet`
3. **Export JSON results** for logging and monitoring
4. **Fix SSH key issues** before running Ansible
5. **Use auto-fallback** for automated IP switching
## Integration with CI/CD
```bash
# In your CI pipeline
make test-connectivity
if [ $? -ne 0 ]; then
echo "Connectivity issues detected"
exit 1
fi
make apply
```

View File

@ -1,10 +1,10 @@
$ANSIBLE_VAULT;1.1;AES256 $ANSIBLE_VAULT;1.1;AES256
36376666313464366432353635643532663733656664336632626633616465313834323563613965 36343265643238633236643162613137393331386164306133666537633336633036376433386161
3261643434373638623932373531366333393736636165620a306437366133343435626639396361 3135366566623235333264386539346364333435373065300a633231633731316633313166346161
30373765343339626464336538303634336135316431653264363831643264373636303161616334 30363334613965666634633665363632323966396464633636346533616634393664386566333230
6231326138306564310a343135303362326664653061666363366364376335343431393330643334 3463666531323866660a666238383331383562313363386639646161653334313661393065343135
32336263623437636266383730666562303234633438646330313163323232623961613665613031 33613762653361656633366465306264323935363032353737333935363165346639616330333939
65643630623235356164303839663938343238336432663462363431363062623764326536396562 39336538643866366361313838636338643336376365373166376234383838656430623339313162
32613331366135646133373165646634356337376463393530343264386531393837303263353033 37353461313263643263376232393138396233366234336333613535366234383661353938663032
31646238393165613331623164613265613332623933343136623739316262646237323739666434 65383737343164343431363764333063326230623263323231366232626131306637353361343466
6238 6131

View File

@ -0,0 +1,7 @@
$ANSIBLE_VAULT;1.1;AES256
31306264346663636630656534303766666564333866326139336137383339633338323834653266
6132333337363566623265303037336266646238633036390a663432623861363562386561393264
63303565633530383634643538323165383461656539613331386135336265653531336266613865
3833376664366239650a313134653238323437633265373463326231346663366434323733663666
38353061373437306431383132333233663639643134363464396163333962373033363661623666
3430633863623962366430613962346264356461373539376263

View File

@ -1,7 +1,7 @@
$ANSIBLE_VAULT;1.1;AES256 $ANSIBLE_VAULT;1.1;AES256
37646438353233376464643364306161636364356130316530366430306530323635616531346661 63393735666433636632373036626634386363303637613162626365353866363434363539363530
3935613366376138313461633662353037623534353433620a613163333332346564666530653862 3464633237656336306633383339396132333533626433360a323838653030333533393531316631
31636565306234366537623763376161346139306131323366643138613730643761633335343330 32646430303765313736356233653732666531323266326430646330616537626430323062653266
3634376334636363300a666432306663643632353233396666333336386238386438666133303432 3165303465303765630a373163353863373139333936346233343137373962326437633038653564
31356266353035303732353661343634653732626166626461333939663033346166393861396332 35386131393432316261353338623032313136623836326432343939303035623665633462613535
6461326530623939336462346531363335383237303731303964 3137616135626239313337653134333765646362356261376432

View File

@ -0,0 +1,7 @@
$ANSIBLE_VAULT;1.1;AES256
66633265383239626163633134656233613638643862323562373330643363323036333334646566
3439646635343533353432323064643135623532333738380a353866643461636233376432396434
35316639343131363536343264646666313934616166653535666634633230613537356663313663
3430343234323963300a376133333935653965396561643431306138323636396635396338356266
61636237643366646135333633383562656361356233383866616533336666383561303538393234
3637383939376665356136363636626166323836376264373261

View File

@ -1,7 +1,7 @@
$ANSIBLE_VAULT;1.1;AES256 $ANSIBLE_VAULT;1.1;AES256
65643434353032343662326530613038373164393531393865633934333838373232326265646262 64383961333130663764633131623063373965383261613530313536396362366639393263396664
3866613232353361626333323666666563323634383064340a346434393261346461643061306563 3165636131346138613436613565336236313564613731360a623839653064346130333561666164
32323730393439663931663734326134633166333866393834353637666465313962363339323930 66633636373163346538393938323634336463343431353630306330316661383836343634376334
6638646636366463360a633731383838656535353536366136616637393332613138313838623337 3666656436336536340a373633363965646164373031323533636563386266366238393539643463
62616462623734373932316635623030623335613939313334393632323363316635333839663964 38386334396335303666633631373834633465636239666131633430303437343137333437366337
6235376161333834383438373164396537343438366239356134 6465303935396332323563613037376231326564363231343332

View File

@ -1,7 +1,7 @@
$ANSIBLE_VAULT;1.1;AES256 $ANSIBLE_VAULT;1.1;AES256
31613237643164326433613265306534626435313661663530613134393739633734633130653439 63663562363638643732663337623063313532373930646634336366316636313265633239373163
6265613832306138643133356333336539643636313732660a626262313139623534326137643230 3731653139383136656362366431666264376530636366340a316334393963336537646536336636
38346634623535343233336238646335666331393064616631643439353264633262326536363830 35636261383632623865326631326261353735633233386232383862376466386131663566626636
3739663737323639300a396361346133363763356232316338646535383964653331613738616262 3234313733643735360a663230633532393932666163383934303934346466376631303130663635
31643031636334373266306266383730656161663566353532353239323130376539363433623362 33383163306538373834303364333935636566633432623735363731396461343661623638313563
6664663838393235323664633038356533313833306432353861 6233356166666233303635356166323830306165393737353533

View File

@ -1,6 +1,7 @@
$ANSIBLE_VAULT;1.1;AES256 $ANSIBLE_VAULT;1.1;AES256
35613535653633616433383235306131326139313335323039393662313066613966633934333864 37346464666430303939303936623232303038616132376630653735363138393439353534613166
6465656334383738393565613033653230323264363933370a623036393963393833376333383635 3835346134643735666332643734633437393961666465380a393030393161316430626162343363
62636466383165383439623736613831663761336662383138386666336365636166373338666232 31626230623830303537326466303330623065636661643835323064353963616639333364303966
6164616262383764340a326530393662383632623538333535353962313138633639653933303564 3030663964373638330a623831306261386338343630376562323839303336346164623330646433
3939 30306139396638643332396331616461336530666366373533363365313731643033356161373739
3339663736616238613131613539656535373331386137653630

View File

@ -1,6 +1,7 @@
$ANSIBLE_VAULT;1.1;AES256 $ANSIBLE_VAULT;1.1;AES256
32353034343864393663363666306566396464626335363133316432633832616561336234323138 33663135313965613939386665326237383639323738383566653565656331303965376464306364
6535373836623837323266376539633937326365393730300a303963663165353536656133636663 6162363534306630623537623038363433313430666662340a306564373036366163366565323333
63323966353039663531626434303939313137383734363538616564646638353030643130613632 32663836383133636661626631356636616638613231646537626530316634396539613732306664
3131353132336261650a653361333235643130333330346366656637303332666361386461616331 3335303836656236310a316561623562643339643264313635396131633963623764333839303132
3132 64303234626361633835343564346362336563333537653866393835643832643833616261376261
3331343439313931383838363839656262616530306534313964

View File

@ -0,0 +1,7 @@
$ANSIBLE_VAULT;1.1;AES256
61623232353833613730343036663434633265346638366431383737623936616131356661616238
3230346138373030396336663566353433396230346434630a313633633161303539373965343466
38353131336330643566643232393638616531316430343932636663613437613565643666353863
3963663866623238630a636532303537616238666165306339303661363732393365336138613236
61336664393839306565303433646532646535656432633330343030396361623063653263313738
3834373161333737326432366630623262623730613337386163

View File

@ -0,0 +1,7 @@
$ANSIBLE_VAULT;1.1;AES256
31316663336338303832323464623866343366313261653536623233303466636630633235643638
3666646431323061313836333233356162643462323763380a623666663062386337393439653134
61616135353966333639323031643263646231636332613935353234363134356435646266343866
3034653235393636350a626362333764313732646663653838313233326438646330393336346539
30393364323237396633343133616439393563326161636366613965366161656364343939313334
3430306634396361353238643735363430383433323431393230

View File

@ -1,6 +1,7 @@
$ANSIBLE_VAULT;1.1;AES256 $ANSIBLE_VAULT;1.1;AES256
35386435346434313638656334393931363832396538626361633237653134303639323662353165 66333033636639396131653539393133383536313661386332646634656664326566663033383937
3131653934353233626136386236363565363835373535320a373932343630303363656363346138 3438303835303931616631623331656336366636393531620a363336333634373730623739376564
33366161623833366666326161383964396463636633323361333066383066633838636438633364 32393130343430623938366232313266633531363866653631366632336535616261346437376562
3131306263323038370a616432303966323065646466646430356365653334316564333364376535 3331626636346234340a613833613463383430306639393661313333366464373963663964313438
3364 39623766633965303136363533306438383662306131373864333431346537643834316437653132
3435316231366632353465336130336464633839346366313465

View File

@ -0,0 +1,7 @@
$ANSIBLE_VAULT;1.1;AES256
62356361353835643235613335613661356230666539386533383536623432316333346431343462
3265376632633731623430376333323234633962643766380a363033666334643930326636343963
33666562356336663663633931383465303365303739663036613039396430646166646139653265
6663366664623064650a386265333865386538633363613963383630643633356130616238653632
36333436643563363933353664303765646435323161393938656530336331396130616235653335
6638346636343339343465663730373134333663633265383763

View File

@ -0,0 +1,7 @@
$ANSIBLE_VAULT;1.1;AES256
35633833353965363964376161393730613065663236326239376562356231316166656131366263
6263363436373965316339623139353830643062393165370a643138356561613537616431316534
63386635363838626465396439303664316635633239653639646338393130666164653262316135
3937376464303935620a343530333030643830383130646532613533336435383334373831343261
37653138613132616165636132623037623033343265663734626536366361373130353139383634
6664346538653965343263376538636336393164356434646264

View File

@ -1,3 +1,8 @@
# Ansible Inventory
# Primary IPs: Tailscale (100.x.x.x) for remote access
# Fallback IPs: Local network (10.0.x.x) when Tailscale is down
# Usage: ansible_host_fallback is available for manual fallback
[gitea] [gitea]
giteaVM ansible_host=10.0.30.169 ansible_user=gitea giteaVM ansible_host=10.0.30.169 ansible_user=gitea
@ -8,18 +13,28 @@ portainerVM ansible_host=10.0.30.69 ansible_user=ladmin
homepageVM ansible_host=10.0.30.12 ansible_user=homepage homepageVM ansible_host=10.0.30.12 ansible_user=homepage
[vaultwarden] [vaultwarden]
vaultwardenVM ansible_host=100.100.19.11 ansible_user=ladmin vaultwardenVM ansible_host=100.100.19.11 ansible_host_fallback=10.0.10.142 ansible_user=ladmin
[dev] [dev]
dev01 ansible_host=10.0.30.105 ansible_user=ladmin dev01 ansible_host=10.0.30.105 ansible_user=ladmin
bottom ansible_host=10.0.10.156 ansible_user=beast bottom ansible_host=10.0.10.156 ansible_user=beast
debianDesktopVM ansible_host=10.0.10.206 ansible_user=user skip_reboot=true debianDesktopVM ansible_host=10.0.10.206 ansible_user=user skip_reboot=true
[ansible] [ansible]
ansible-controlVM ansible_host=10.0.10.157 ansible_user=master ansibleVM ansible_host=10.0.10.157 ansible_user=master
[tailscale] [tailscale]
tailscaleVM ansible_host=100.66.218.53 ansible_user=ladmin tailscaleVM ansible_host=100.66.218.53 ansible_user=ladmin
[services]
caddy ansible_host=100.117.106.18 ansible_host_fallback=10.0.10.50 ansible_user=ladmin
jellyfin ansible_host=100.104.109.45 ansible_host_fallback=10.0.10.232 ansible_user=user
listmonk ansible_host=100.73.190.115 ansible_host_fallback=10.0.10.149 ansible_user=ladmin
slack ansible_host=100.110.190.69 ansible_host_fallback=10.0.10.154 ansible_user=ladmin
[desktop]
desktop-beast ansible_host=100.117.34.106 ansible_user=beast
[local] [local]
localhost ansible_connection=local localhost ansible_connection=local

4
package-lock.json generated
View File

@ -13,8 +13,8 @@
"markdownlint-cli2": "^0.18.1" "markdownlint-cli2": "^0.18.1"
}, },
"engines": { "engines": {
"node": ">=18.0.0", "node": ">=22.0.0",
"npm": ">=9.0.0" "npm": ">=10.0.0"
} }
}, },
"node_modules/@nodelib/fs.scandir": { "node_modules/@nodelib/fs.scandir": {

31
playbooks/shell.yml Normal file
View File

@ -0,0 +1,31 @@
---
# Playbook: shell.yml
# Purpose: Configure shell environment (zsh, oh-my-zsh, plugins) on all hosts
# Targets: all hosts
# Tags: shell
# Usage: make shell-all
- name: Configure shell environment
hosts: all
become: true
strategy: free
roles:
- {role: shell, tags: ['shell']}
pre_tasks:
- name: Update apt cache
ansible.builtin.apt:
update_cache: true
ignore_errors: true
register: apt_update_result
- name: Display apt update status
ansible.builtin.debug:
msg: "Apt cache update: {{ 'Success' if apt_update_result is succeeded else 'Failed - continuing anyway' }}"
when: ansible_debug_output | default(false) | bool
tasks:
- name: Display completion message
ansible.builtin.debug:
msg: "Shell configuration completed successfully on {{ inventory_hostname }}!"

View File

@ -52,7 +52,7 @@
path: /etc/apt/sources.list.d/nodesource.list path: /etc/apt/sources.list.d/nodesource.list
state: absent state: absent
become: true become: true
when: when:
- node_version_check.rc != 0 or not node_version_check.stdout.startswith('v2') - node_version_check.rc != 0 or not node_version_check.stdout.startswith('v2')
- nodesource_repo_check.stdout == "wrong_config" - nodesource_repo_check.stdout == "wrong_config"
@ -61,7 +61,7 @@
path: /etc/apt/keyrings/nodesource.gpg path: /etc/apt/keyrings/nodesource.gpg
state: absent state: absent
become: true become: true
when: when:
- node_version_check.rc != 0 or not node_version_check.stdout.startswith('v2') - node_version_check.rc != 0 or not node_version_check.stdout.startswith('v2')
- nodesource_key_check.stdout == "wrong_key" - nodesource_key_check.stdout == "wrong_key"
@ -71,7 +71,7 @@
state: directory state: directory
mode: '0755' mode: '0755'
become: true become: true
when: when:
- node_version_check.rc != 0 or not node_version_check.stdout.startswith('v2') - node_version_check.rc != 0 or not node_version_check.stdout.startswith('v2')
- nodesource_key_check.stdout in ["not_exists", "wrong_key"] - nodesource_key_check.stdout in ["not_exists", "wrong_key"]
@ -82,7 +82,7 @@
mode: '0644' mode: '0644'
force: true force: true
become: true become: true
when: when:
- node_version_check.rc != 0 or not node_version_check.stdout.startswith('v2') - node_version_check.rc != 0 or not node_version_check.stdout.startswith('v2')
- nodesource_key_check.stdout in ["not_exists", "wrong_key"] - nodesource_key_check.stdout in ["not_exists", "wrong_key"]
@ -92,7 +92,7 @@
state: present state: present
update_cache: false update_cache: false
become: true become: true
when: when:
- node_version_check.rc != 0 or not node_version_check.stdout.startswith('v2') - node_version_check.rc != 0 or not node_version_check.stdout.startswith('v2')
- nodesource_repo_check.stdout in ["not_exists", "wrong_config"] - nodesource_repo_check.stdout in ["not_exists", "wrong_config"]

View File

@ -42,4 +42,5 @@
ansible.builtin.file: ansible.builtin.file:
path: /tmp/docker.gpg path: /tmp/docker.gpg
state: absent state: absent
when: docker_key_check.stdout in ["not_exists", "wrong_key"] when: docker_key_check.stdout in ["not_exists", "wrong_key"]

View File

@ -25,4 +25,5 @@
repo: "deb [arch=amd64 signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/debian {{ ansible_distribution_release }} stable" repo: "deb [arch=amd64 signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/debian {{ ansible_distribution_release }} stable"
state: present state: present
update_cache: true update_cache: true
when: docker_repo_check.stdout in ["not_exists", "wrong_config"] when: docker_repo_check.stdout in ["not_exists", "wrong_config"]

View File

@ -33,4 +33,5 @@
repo: "deb [arch=amd64 signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu {{ docker_ubuntu_codename }} stable" repo: "deb [arch=amd64 signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu {{ docker_ubuntu_codename }} stable"
state: present state: present
update_cache: true update_cache: true
when: docker_repo_check.stdout in ["not_exists", "wrong_config"] when: docker_repo_check.stdout in ["not_exists", "wrong_config"]

View File

@ -25,4 +25,5 @@
repo: "deb [arch=amd64 signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable" repo: "deb [arch=amd64 signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable"
state: present state: present
update_cache: true update_cache: true
when: docker_repo_check.stdout in ["not_exists", "wrong_config"] when: docker_repo_check.stdout in ["not_exists", "wrong_config"]

View File

@ -40,7 +40,7 @@
path: /usr/share/keyrings/tailscale-archive-keyring.gpg path: /usr/share/keyrings/tailscale-archive-keyring.gpg
state: absent state: absent
become: true become: true
when: when:
- tailscale_version_check.rc != 0 - tailscale_version_check.rc != 0
- tailscale_key_check.stdout == "wrong_key" - tailscale_key_check.stdout == "wrong_key"
@ -49,7 +49,7 @@
path: /etc/apt/sources.list.d/tailscale.list path: /etc/apt/sources.list.d/tailscale.list
state: absent state: absent
become: true become: true
when: when:
- tailscale_version_check.rc != 0 - tailscale_version_check.rc != 0
- tailscale_repo_check.stdout == "wrong_config" - tailscale_repo_check.stdout == "wrong_config"
@ -59,7 +59,7 @@
dest: /usr/share/keyrings/tailscale-archive-keyring.gpg dest: /usr/share/keyrings/tailscale-archive-keyring.gpg
mode: '0644' mode: '0644'
become: true become: true
when: when:
- tailscale_version_check.rc != 0 - tailscale_version_check.rc != 0
- tailscale_key_check.stdout in ["not_exists", "wrong_key"] - tailscale_key_check.stdout in ["not_exists", "wrong_key"]
@ -77,7 +77,7 @@
state: present state: present
filename: tailscale filename: tailscale
become: true become: true
when: when:
- tailscale_version_check.rc != 0 - tailscale_version_check.rc != 0
- tailscale_repo_check.stdout in ["not_exists", "wrong_config"] - tailscale_repo_check.stdout in ["not_exists", "wrong_config"]
@ -101,4 +101,4 @@
enabled: true enabled: true
state: started state: started
become: true become: true
when: tailscale_version_check.rc != 0 when: tailscale_version_check.rc != 0

311
test_connectivity.py Normal file
View File

@ -0,0 +1,311 @@
#!/usr/bin/env python3
"""
Advanced connectivity test for Ansible hosts with fallback IP support.
Tests both primary and fallback IPs, provides detailed diagnostics, and suggests fixes.
"""
import subprocess
import sys
import argparse
import json
import re
from pathlib import Path
from typing import Dict, List, Tuple, Optional
class ConnectivityTester:
def __init__(self, hosts_file: str, timeout: int = 3):
self.hosts_file = Path(hosts_file)
self.timeout = timeout
self.results = {}
def test_ping(self, ip: str) -> bool:
"""Test if host is reachable via ping."""
try:
result = subprocess.run(
['ping', '-c', '1', '-W', str(self.timeout), ip],
capture_output=True,
timeout=self.timeout + 1
)
return result.returncode == 0
except (subprocess.TimeoutExpired, FileNotFoundError):
return False
def test_ssh(self, hostname: str, ip: str, user: str) -> Tuple[bool, str]:
"""Test SSH connectivity and return (success, error_message)."""
try:
result = subprocess.run(
['ssh', '-o', 'ConnectTimeout=3', '-o', 'BatchMode=yes',
f'{user}@{ip}', 'exit'],
capture_output=True,
timeout=5
)
if result.returncode == 0:
return True, ""
else:
error = result.stderr.decode().strip()
return False, error
except (subprocess.TimeoutExpired, FileNotFoundError) as e:
return False, str(e)
def parse_hosts_file(self) -> List[Dict]:
"""Parse hosts file and return structured host data."""
hosts = []
current_group = None
with open(self.hosts_file, 'r') as f:
for line in f:
line = line.strip()
# Skip empty lines and comments
if not line or line.startswith('#'):
continue
# Group header
if line.startswith('[') and line.endswith(']'):
current_group = line[1:-1]
continue
# Host entry
if current_group and 'ansible_host=' in line:
host_data = self._parse_host_line(line, current_group)
if host_data:
hosts.append(host_data)
return hosts
def _parse_host_line(self, line: str, group: str) -> Optional[Dict]:
"""Parse a single host line and return host data."""
parts = line.split()
if not parts:
return None
hostname = parts[0]
attrs = {}
for part in parts[1:]:
if '=' in part:
key, value = part.split('=', 1)
attrs[key] = value
return {
'hostname': hostname,
'group': group,
'primary_ip': attrs.get('ansible_host', ''),
'fallback_ip': attrs.get('ansible_host_fallback', ''),
'user': attrs.get('ansible_user', 'root'),
'original_line': line
}
def test_host(self, host_data: Dict) -> Dict:
"""Test connectivity for a single host."""
hostname = host_data['hostname']
primary_ip = host_data['primary_ip']
fallback_ip = host_data['fallback_ip']
user = host_data['user']
result = {
'hostname': hostname,
'group': host_data['group'],
'primary_ip': primary_ip,
'fallback_ip': fallback_ip,
'user': user,
'primary_ping': False,
'primary_ssh': False,
'fallback_ping': False,
'fallback_ssh': False,
'primary_ssh_error': '',
'fallback_ssh_error': '',
'recommendation': '',
'status': 'unknown'
}
# Test primary IP
if primary_ip:
result['primary_ping'] = self.test_ping(primary_ip)
if result['primary_ping']:
ssh_success, ssh_error = self.test_ssh(hostname, primary_ip, user)
result['primary_ssh'] = ssh_success
result['primary_ssh_error'] = ssh_error
# Test fallback IP
if fallback_ip:
result['fallback_ping'] = self.test_ping(fallback_ip)
if result['fallback_ping']:
ssh_success, ssh_error = self.test_ssh(hostname, fallback_ip, user)
result['fallback_ssh'] = ssh_success
result['fallback_ssh_error'] = ssh_error
# Determine status and recommendation
result['status'], result['recommendation'] = self._analyze_connectivity(result)
return result
def _analyze_connectivity(self, result: Dict) -> Tuple[str, str]:
"""Analyze connectivity results and provide recommendations."""
hostname = result['hostname']
primary_ip = result['primary_ip']
fallback_ip = result['fallback_ip']
# Primary IP works perfectly
if result['primary_ping'] and result['primary_ssh']:
return 'success', f"{hostname} is fully accessible via primary IP {primary_ip}"
# Primary ping works but SSH fails
if result['primary_ping'] and not result['primary_ssh']:
error = result['primary_ssh_error']
if 'Permission denied' in error:
return 'ssh_key', f"{hostname}: SSH key issue on {primary_ip} - run: make copy-ssh-key HOST={hostname}"
elif 'Connection refused' in error:
return 'ssh_service', f"{hostname}: SSH service not running on {primary_ip}"
else:
return 'ssh_error', f"{hostname}: SSH error on {primary_ip} - {error}"
# Primary IP fails, test fallback
if not result['primary_ping'] and fallback_ip:
if result['fallback_ping'] and result['fallback_ssh']:
return 'use_fallback', f"{hostname}: Switch to fallback IP {fallback_ip} (primary {primary_ip} failed)"
elif result['fallback_ping'] and not result['fallback_ssh']:
return 'fallback_ssh', f"{hostname}: Fallback IP {fallback_ip} reachable but SSH failed"
else:
return 'both_failed', f"{hostname}: Both primary {primary_ip} and fallback {fallback_ip} failed"
# No fallback IP and primary failed
if not result['primary_ping'] and not fallback_ip:
return 'no_fallback', f"{hostname}: Primary IP {primary_ip} failed, no fallback available"
return 'unknown', f"? {hostname}: Unknown connectivity state"
def run_tests(self) -> List[Dict]:
"""Run connectivity tests for all hosts."""
hosts = self.parse_hosts_file()
results = []
print("🔍 Testing host connectivity...")
print("=" * 60)
for host_data in hosts:
print(f"Testing {host_data['hostname']}...", end=' ')
result = self.test_host(host_data)
results.append(result)
# Print immediate status
if result['status'] == 'success':
print("")
elif result['status'] in ['ssh_key', 'ssh_service', 'ssh_error']:
print("⚠️")
elif result['status'] == 'use_fallback':
print("🔄")
else:
print("")
return results
def print_summary(self, results: List[Dict]):
"""Print detailed summary of connectivity test results."""
print("\n📊 CONNECTIVITY SUMMARY")
print("=" * 60)
# Group results by status
by_status = {}
for result in results:
status = result['status']
if status not in by_status:
by_status[status] = []
by_status[status].append(result)
# Print each status group
status_icons = {
'success': '',
'ssh_key': '🔑',
'ssh_service': '🔧',
'ssh_error': '⚠️',
'use_fallback': '🔄',
'both_failed': '',
'no_fallback': '🚫',
'unknown': ''
}
for status, hosts in by_status.items():
icon = status_icons.get(status, '')
print(f"\n{icon} {status.upper().replace('_', ' ')} ({len(hosts)} hosts)")
print("-" * 40)
for result in hosts:
print(f" {result['hostname']:<20} {result['recommendation']}")
# Print actionable recommendations
self._print_recommendations(results)
def _print_recommendations(self, results: List[Dict]):
"""Print actionable recommendations based on test results."""
print("\n🛠️ RECOMMENDATIONS")
print("=" * 60)
# SSH key issues
ssh_key_issues = [r for r in results if r['status'] == 'ssh_key']
if ssh_key_issues:
print(f"\n🔑 Fix SSH key issues ({len(ssh_key_issues)} hosts):")
for result in ssh_key_issues:
print(f" make copy-ssh-key HOST={result['hostname']}")
# Fallback recommendations
fallback_needed = [r for r in results if r['status'] == 'use_fallback']
if fallback_needed:
print(f"\n🔄 Switch to fallback IPs ({len(fallback_needed)} hosts):")
for result in fallback_needed:
print(f" sed -i 's/{result['hostname']} ansible_host={result['primary_ip']}/{result['hostname']} ansible_host={result['fallback_ip']}/' {self.hosts_file}")
# Critical issues
critical_issues = [r for r in results if r['status'] in ['both_failed', 'no_fallback']]
if critical_issues:
print(f"\n🚨 Critical issues ({len(critical_issues)} hosts):")
for result in critical_issues:
print(f" {result['hostname']}: {result['recommendation']}")
# Auto-fallback suggestion
if fallback_needed:
print(f"\n🤖 Or run auto-fallback to fix automatically:")
print(f" make auto-fallback")
def export_json(self, results: List[Dict], output_file: str):
"""Export results to JSON file."""
with open(output_file, 'w') as f:
json.dump(results, f, indent=2)
print(f"\n📄 Results exported to: {output_file}")
def main():
parser = argparse.ArgumentParser(description='Advanced connectivity test for Ansible hosts')
parser.add_argument('--hosts-file', default='inventories/production/hosts',
help='Path to hosts file')
parser.add_argument('--timeout', type=int, default=3,
help='Ping timeout in seconds')
parser.add_argument('--json', help='Export results to JSON file')
parser.add_argument('--quiet', action='store_true',
help='Only show summary, not individual tests')
args = parser.parse_args()
if not Path(args.hosts_file).exists():
print(f"❌ Error: Hosts file not found: {args.hosts_file}")
sys.exit(1)
tester = ConnectivityTester(args.hosts_file, args.timeout)
if args.quiet:
# Suppress individual test output
import os
with open(os.devnull, 'w') as devnull:
old_stdout = sys.stdout
sys.stdout = devnull
results = tester.run_tests()
sys.stdout = old_stdout
else:
results = tester.run_tests()
tester.print_summary(results)
if args.json:
tester.export_json(results, args.json)
if __name__ == '__main__':
main()