- Modify ansible.cfg to increase SSH connection retries from 2 to 3 and add a connection timeout setting for better reliability. - Enhance auto-fallback.sh script to provide detailed feedback during IP connectivity tests, including clearer status messages for primary and fallback IP checks. - Update documentation to reflect changes in connectivity testing and fallback procedures. These updates improve the robustness of the connectivity testing process and ensure smoother operations during IP failover scenarios.
112 lines
3.5 KiB
Bash
Executable File
112 lines
3.5 KiB
Bash
Executable File
#!/bin/bash
|
|
# Automatically switch to fallback IPs when primary IPs fail
|
|
|
|
HOSTS_FILE="inventories/production/hosts"
|
|
TIMEOUT=3
|
|
CHANGED=false
|
|
|
|
# Colors
|
|
GREEN='\033[0;32m'
|
|
RED='\033[0;31m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m'
|
|
|
|
echo -e "${BLUE}Auto-fallback: Testing and switching to fallback IPs when needed...${NC}"
|
|
echo "=================================================================="
|
|
|
|
# Function to test IP connectivity
|
|
test_ip() {
|
|
local ip="$1"
|
|
if ping -c 1 -W "$TIMEOUT" "$ip" >/dev/null 2>&1; then
|
|
return 0
|
|
else
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Function to test SSH connectivity
|
|
test_ssh() {
|
|
local host="$1"
|
|
local ip="$2"
|
|
local user="$3"
|
|
|
|
if timeout 5 ssh -o ConnectTimeout=3 -o BatchMode=yes "$user@$ip" exit >/dev/null 2>&1; then
|
|
return 0
|
|
else
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Function to switch to fallback IP
|
|
switch_to_fallback() {
|
|
local hostname="$1"
|
|
local primary_ip="$2"
|
|
local fallback_ip="$3"
|
|
|
|
echo -e " ${YELLOW}→ Switching $hostname to fallback IP: $fallback_ip${NC}"
|
|
|
|
# Use sed to replace the primary IP with fallback IP
|
|
sed -i "s/$hostname ansible_host=$primary_ip/$hostname ansible_host=$fallback_ip/" "$HOSTS_FILE"
|
|
|
|
# Remove the fallback attribute since we're now using it as primary
|
|
sed -i "s/ ansible_host_fallback=$fallback_ip//" "$HOSTS_FILE"
|
|
|
|
CHANGED=true
|
|
}
|
|
|
|
# Parse hosts file and test connectivity
|
|
while IFS= read -r line; do
|
|
# Skip empty lines and comments
|
|
[[ -z "$line" || "$line" =~ ^# ]] && continue
|
|
|
|
# Skip group headers
|
|
[[ "$line" =~ ^\[.*\]$ ]] && continue
|
|
|
|
# Parse host entry
|
|
if [[ "$line" =~ ansible_host= ]]; then
|
|
hostname=$(echo "$line" | awk '{print $1}')
|
|
primary_ip=$(echo "$line" | grep -oP 'ansible_host=\K[^\s]+')
|
|
fallback_ip=$(echo "$line" | grep -oP 'ansible_host_fallback=\K[^\s]+' || echo "")
|
|
user=$(echo "$line" | grep -oP 'ansible_user=\K[^\s]+' || echo "root")
|
|
|
|
echo -n "Testing $hostname ($primary_ip)... "
|
|
|
|
# Test primary IP
|
|
if test_ip "$primary_ip"; then
|
|
# Test SSH on primary IP
|
|
if test_ssh "$hostname" "$primary_ip" "$user"; then
|
|
echo -e "${GREEN}✓ OK${NC}"
|
|
else
|
|
echo -e "${YELLOW}⚠ Ping OK, SSH failed${NC}"
|
|
if [[ -n "$fallback_ip" ]]; then
|
|
echo -e " ${BLUE}→ Trying fallback IP...${NC}"
|
|
if test_ip "$fallback_ip" && test_ssh "$hostname" "$fallback_ip" "$user"; then
|
|
switch_to_fallback "$hostname" "$primary_ip" "$fallback_ip"
|
|
else
|
|
echo -e " ${RED}✗ Fallback also failed${NC}"
|
|
fi
|
|
fi
|
|
fi
|
|
else
|
|
echo -e "${RED}✗ Primary IP failed${NC}"
|
|
if [[ -n "$fallback_ip" ]]; then
|
|
echo -e " ${BLUE}→ Trying fallback IP...${NC}"
|
|
if test_ip "$fallback_ip" && test_ssh "$hostname" "$fallback_ip" "$user"; then
|
|
switch_to_fallback "$hostname" "$primary_ip" "$fallback_ip"
|
|
else
|
|
echo -e " ${RED}✗ Fallback also failed${NC}"
|
|
fi
|
|
fi
|
|
fi
|
|
fi
|
|
done < "$HOSTS_FILE"
|
|
|
|
echo ""
|
|
if [[ "$CHANGED" == "true" ]]; then
|
|
echo -e "${GREEN}✓ Hosts file updated with working IPs!${NC}"
|
|
echo -e "${BLUE}You can now run your Ansible commands.${NC}"
|
|
else
|
|
echo -e "${GREEN}✓ All primary IPs are working - no changes needed.${NC}"
|
|
fi
|