llm_council/scripts/diagnose_gpu_vm.sh

#!/bin/bash
# Quick diagnostic script for GPU VM connection

GPU_VM="10.0.30.63"

echo "=== GPU VM Connection Diagnostics ==="
echo ""

echo "1. Testing basic connectivity..."
if ping -c 1 -W 2 $GPU_VM > /dev/null 2>&1; then
    echo "   ✓ GPU VM is reachable"
else
    echo "   ✗ Cannot reach GPU VM - check network/firewall"
    exit 1
fi

echo ""
echo "2. Testing port 11434 (Ollama default)..."
if timeout 3 curl -s http://$GPU_VM:11434/api/tags > /dev/null 2>&1; then
    echo "   ✓ Port 11434 is open and responding"
    echo "   Models available:"
    curl -s http://$GPU_VM:11434/api/tags | python3 -m json.tool 2>/dev/null | grep -E '"name"|"model"' | head -5
else
    echo "   ✗ Port 11434 not responding"
    echo "   Error details:"
    timeout 3 curl -v http://$GPU_VM:11434/api/tags 2>&1 | grep -E "Connection|timeout|refused" | head -3
fi

echo ""
echo "3. Testing port 8000 (alternative)..."
if timeout 3 curl -s http://$GPU_VM:8000/v1/models > /dev/null 2>&1; then
    echo "   ✓ Port 8000 is open and responding"
else
    echo "   ✗ Port 8000 not responding"
fi

echo ""
echo "4. Checking your .env configuration..."
if [ -f .env ]; then
    echo "   OPENAI_COMPAT_BASE_URL: $(grep OPENAI_COMPAT_BASE_URL .env | grep -v '^#' | cut -d'=' -f2)"
    echo "   USE_LOCAL_OLLAMA: $(grep USE_LOCAL_OLLAMA .env | grep -v '^#' | cut -d'=' -f2)"
else
    echo "   ✗ .env file not found"
fi

echo ""
echo "=== Recommendations ==="
echo ""
echo "If port 11434 is not working:"
echo "  1. SSH to GPU VM: ssh root@$GPU_VM"
echo "  2. Check if Ollama is running: systemctl status ollama"
echo "  3. Check what port Ollama is listening on: netstat -tlnp | grep ollama"
echo "  4. If only listening on 127.0.0.1, configure it to listen on 0.0.0.0"
echo ""
echo "If you need to use a different port, update .env:"
echo "  OPENAI_COMPAT_BASE_URL=http://$GPU_VM:PORT"