atlas/home-voice-agent/test_end_to_end.py

#!/usr/bin/env python3
"""
End-to-end test for Atlas voice agent system.

Tests the full flow: User query → Router → LLM → Tool Call → Response
"""

import sys
import time
import requests
from pathlib import Path

# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent))

from routing.router import LLMRouter
from mcp_adapter.adapter import MCPAdapter
from conversation.session_manager import SessionManager
from memory.manager import MemoryManager
from monitoring.logger import get_llm_logger

MCP_SERVER_URL = "http://localhost:8000/mcp"


def test_full_conversation_flow():
    """Test a complete conversation with tool calling."""
    print("=" * 60)
    print("End-to-End Conversation Flow Test")
    print("=" * 60)

    # Initialize components
    print("\n1. Initializing components...")
    router = LLMRouter()
    adapter = MCPAdapter(MCP_SERVER_URL)
    session_manager = SessionManager()
    memory_manager = MemoryManager()
    logger = get_llm_logger()

    print("   ✅ Router initialized")
    print("   ✅ MCP Adapter initialized")
    print("   ✅ Session Manager initialized")
    print("   ✅ Memory Manager initialized")
    print("   ✅ Logger initialized")

    # Check MCP server
    print("\n2. Checking MCP server...")
    try:
        response = requests.get("http://localhost:8000/health", timeout=2)
        if response.status_code == 200:
            print("   ✅ MCP server is running")
        else:
            print("   ⚠️  MCP server returned non-200 status")
            return False
    except requests.exceptions.ConnectionError:
        print("   ❌ MCP server is not running!")
        print("      Start it with: cd mcp-server && ./run.sh")
        return False

    # Discover tools
    print("\n3. Discovering available tools...")
    try:
        tools = adapter.discover_tools()
        print(f"   ✅ Found {len(tools)} tools")
        tool_names = [t['name'] for t in tools[:5]]
        print(f"      Sample: {', '.join(tool_names)}...")
    except Exception as e:
        print(f"   ❌ Tool discovery failed: {e}")
        return False

    # Create session
    print("\n4. Creating conversation session...")
    session_id = session_manager.create_session("family", "test-user")
    print(f"   ✅ Session created: {session_id}")

    # Simulate user query
    print("\n5. Simulating user query: 'What time is it?'")
    user_message = "What time is it?"

    # Route request
    routing_decision = router.route_request(agent_type="family")
    print(f"   ✅ Routed to: {routing_decision.agent_type} agent")
    print(f"      URL: {routing_decision.config.base_url}")
    print(f"      Model: {routing_decision.config.model_name}")

    # In a real scenario, we would:
    # 1. Call LLM with user message + available tools
    # 2. LLM decides to call get_current_time tool
    # 3. Execute tool via MCP adapter
    # 4. Get response and format for user
    # 5. Store in session

    # For this test, let's directly test tool calling
    print("\n6. Testing tool call (simulating LLM decision)...")
    try:
        result = adapter.call_tool("get_current_time", {})
        print(f"   ✅ Tool called successfully")
        print(f"      Result: {result.get('content', [{}])[0].get('text', 'No text')[:100]}")
    except Exception as e:
        print(f"   ❌ Tool call failed: {e}")
        return False

    # Test memory storage
    print("\n7. Testing memory storage...")
    try:
        memory_id = memory_manager.store_memory(
            category="preference",
            fact="favorite_coffee",
            value="dark roast",
            confidence=0.9,
            source="explicit"
        )
        print(f"   ✅ Memory stored: ID {memory_id}")

        # Retrieve it
        memory = memory_manager.get_memory(memory_id)
        print(f"   ✅ Memory retrieved: {memory.fact} = {memory.value}")
    except Exception as e:
        print(f"   ❌ Memory test failed: {e}")
        return False

    # Test session storage
    print("\n8. Testing session storage...")
    try:
        session_manager.add_message(session_id, "user", user_message)
        session_manager.add_message(
            session_id,
            "assistant",
            "It's currently 3:45 PM EST.",
            tool_calls=[{"name": "get_current_time", "arguments": {}}]
        )

        session = session_manager.get_session(session_id)
        print(f"   ✅ Session has {len(session.messages)} messages")
    except Exception as e:
        print(f"   ❌ Session storage failed: {e}")
        return False

    print("\n" + "=" * 60)
    print("✅ End-to-end test complete!")
    print("=" * 60)
    print("\n📊 Summary:")
    print("   • All components initialized ✅")
    print("   • MCP server connected ✅")
    print("   • Tools discovered ✅")
    print("   • Session created ✅")
    print("   • Tool calling works ✅")
    print("   • Memory storage works ✅")
    print("   • Session storage works ✅")
    print("\n🎉 System is ready for full conversations!")

    return True


def test_tool_ecosystem():
    """Test various tools in the ecosystem."""
    print("\n" + "=" * 60)
    print("Tool Ecosystem Test")
    print("=" * 60)

    adapter = MCPAdapter(MCP_SERVER_URL)

    # Test different tool categories
    tools_to_test = [
        ("get_current_time", {}),
        ("get_date", {}),
        ("list_tasks", {}),
        ("list_timers", {}),
        ("list_notes", {}),
        ("list_memory", {"category": "preference"}),
    ]

    print(f"\nTesting {len(tools_to_test)} tools...")
    passed = 0
    failed = 0

    for tool_name, args in tools_to_test:
        try:
            result = adapter.call_tool(tool_name, args)
            print(f"   ✅ {tool_name}")
            passed += 1
        except Exception as e:
            print(f"   ❌ {tool_name}: {e}")
            failed += 1

    print(f"\n   Results: {passed} passed, {failed} failed")
    return failed == 0


if __name__ == "__main__":
    print("\n🚀 Starting End-to-End Tests...\n")

    # Test 1: Full conversation flow
    success1 = test_full_conversation_flow()

    # Test 2: Tool ecosystem
    success2 = test_tool_ecosystem()

    # Final summary
    print("\n" + "=" * 60)
    if success1 and success2:
        print("🎉 All end-to-end tests passed!")
        sys.exit(0)
    else:
        print("⚠️  Some tests failed")
        sys.exit(1)