llm_council/scripts/test_model_timeout.py

#!/usr/bin/env python3
"""Test script to diagnose model timeout issues."""
import asyncio
import time
import httpx
from backend.config import OPENAI_COMPAT_BASE_URL, LLM_TIMEOUT_SECONDS, DEBUG

async def test_model(model: str, max_tokens: int = 10):
    """Test a single model query."""
    print(f"\n{'='*60}")
    print(f"Testing model: {model}")
    print(f"Timeout: {LLM_TIMEOUT_SECONDS}s")
    print(f"Base URL: {OPENAI_COMPAT_BASE_URL}")
    print(f"{'='*60}")

    url = f"{OPENAI_COMPAT_BASE_URL}/v1/chat/completions"
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": "Say hello"}],
        "max_tokens": max_tokens
    }

    start_time = time.time()
    try:
        async with httpx.AsyncClient(timeout=LLM_TIMEOUT_SECONDS) as client:
            print(f"[{time.time() - start_time:.1f}s] Sending request...")
            response = await client.post(url, json=payload)
            elapsed = time.time() - start_time
            print(f"[{elapsed:.1f}s] Response received: Status {response.status_code}")

            if response.status_code == 200:
                data = response.json()
                content = data.get("choices", [{}])[0].get("message", {}).get("content", "")
                print(f"✓ Success! Response: {content[:100]}")
                return True
            else:
                print(f"✗ Error: {response.status_code}")
                print(f"  Response: {response.text[:200]}")
                return False
    except httpx.TimeoutException:
        elapsed = time.time() - start_time
        print(f"✗ Timeout after {elapsed:.1f}s (limit was {LLM_TIMEOUT_SECONDS}s)")
        return False
    except Exception as e:
        elapsed = time.time() - start_time
        print(f"✗ Error after {elapsed:.1f}s: {type(e).__name__}: {e}")
        return False

async def main():
    models = ["llama3.2:1b", "qwen2.5:0.5b", "gemma2:2b"]

    print("Testing models sequentially to diagnose timeout issues...")
    print(f"Current timeout setting: {LLM_TIMEOUT_SECONDS}s")

    results = {}
    for model in models:
        results[model] = await test_model(model)
        # Small delay between tests
        await asyncio.sleep(1)

    print(f"\n{'='*60}")
    print("Summary:")
    for model, success in results.items():
        status = "✓ PASS" if success else "✗ FAIL"
        print(f"  {model}: {status}")
    print(f"{'='*60}")

if __name__ == "__main__":
    asyncio.run(main())