atlas/home-voice-agent/routing/router.py

"""
LLM Router - Routes requests to work or family agent based on identity/origin.
"""

import logging
import requests
from typing import Any, Dict, Optional
from dataclasses import dataclass

logger = logging.getLogger(__name__)


@dataclass
class LLMConfig:
    """Configuration for an LLM server."""
    base_url: str
    model_name: str
    api_key: Optional[str] = None
    timeout: int = 300


@dataclass
class RoutingDecision:
    """Result of routing decision."""
    agent_type: str  # "work" or "family"
    config: LLMConfig
    reason: str


class LLMRouter:
    """Routes LLM requests to appropriate servers."""

    def __init__(self):
        """Initialize router with server configurations."""
        import os
        from pathlib import Path

        # Load .env file from project root
        try:
            from dotenv import load_dotenv
            env_path = Path(__file__).parent.parent / ".env"
            load_dotenv(env_path)
        except ImportError:
            # python-dotenv not installed, use environment variables only
            pass

        # 4080 Work Agent (remote GPU VM or local for testing)
        # Load from .env file or environment variable
        work_host = os.getenv("OLLAMA_HOST", "localhost")
        work_port = int(os.getenv("OLLAMA_PORT", "11434"))

        # Model names - load from .env file or environment variables
        work_model = os.getenv("OLLAMA_WORK_MODEL", os.getenv("OLLAMA_MODEL", "llama3:latest"))
        family_model = os.getenv("OLLAMA_FAMILY_MODEL", os.getenv("OLLAMA_MODEL", "llama3:latest"))

        self.work_agent = LLMConfig(
            base_url=f"http://{work_host}:{work_port}",
            model_name=work_model,
            timeout=300
        )

        # 1050 Family Agent (uses same local Ollama for testing)
        self.family_agent = LLMConfig(
            base_url=f"http://{work_host}:{work_port}",  # Same host for testing
            model_name=family_model,
            timeout=60
        )

    def route_request(self,
                     user_id: Optional[str] = None,
                     origin: Optional[str] = None,
                     agent_type: Optional[str] = None,
                     client_type: Optional[str] = None) -> RoutingDecision:
        """
        Route a request to the appropriate LLM server.

        Args:
            user_id: User identifier (if available)
            origin: Request origin (IP, device, etc.)
            agent_type: Explicit agent type if specified ("work" or "family")
            client_type: Type of client making request

        Returns:
            RoutingDecision with agent type and config
        """
        # Explicit agent type takes precedence
        if agent_type:
            if agent_type == "work":
                return RoutingDecision(
                    agent_type="work",
                    config=self.work_agent,
                    reason=f"Explicit agent type: {agent_type}"
                )
            elif agent_type == "family":
                return RoutingDecision(
                    agent_type="family",
                    config=self.family_agent,
                    reason=f"Explicit agent type: {agent_type}"
                )

        # Route based on client type
        if client_type:
            if client_type in ["work", "desktop", "workstation"]:
                return RoutingDecision(
                    agent_type="work",
                    config=self.work_agent,
                    reason=f"Client type: {client_type}"
                )
            elif client_type in ["family", "phone", "tablet", "home"]:
                return RoutingDecision(
                    agent_type="family",
                    config=self.family_agent,
                    reason=f"Client type: {client_type}"
                )

        # Route based on origin/IP (if configured)
        # For now, default to family agent for safety
        # In production, you might check IP ranges, device names, etc.
        if origin:
            # Example: Check if origin is work network
            # if origin.startswith("10.0.1."):  # Work network
            #     return RoutingDecision("work", self.work_agent, f"Origin: {origin}")
            pass

        # Default: family agent (safer default)
        return RoutingDecision(
            agent_type="family",
            config=self.family_agent,
            reason="Default routing to family agent"
        )

    def make_request(self,
                    routing: RoutingDecision,
                    messages: list,
                    tools: Optional[list] = None,
                    temperature: float = 0.7,
                    stream: bool = False) -> Dict[str, Any]:
        """
        Make a request to the routed LLM server.

        Args:
            routing: Routing decision
            messages: Conversation messages
            tools: Optional tool definitions
            temperature: Sampling temperature
            stream: Whether to stream response

        Returns:
            LLM response
        """
        config = routing.config
        url = f"{config.base_url}/api/chat"

        payload = {
            "model": config.model_name,
            "messages": messages,
            "temperature": temperature,
            "stream": stream
        }

        if tools:
            payload["tools"] = tools

        try:
            logger.info(f"Making request to {routing.agent_type} agent at {url}")
            response = requests.post(url, json=payload, timeout=config.timeout)
            response.raise_for_status()
            return response.json()
        except requests.exceptions.RequestException as e:
            logger.error(f"Request to {routing.agent_type} agent failed: {e}")
            raise Exception(f"LLM request failed: {e}")

    def health_check(self, agent_type: str) -> bool:
        """
        Check if an LLM server is healthy.

        Args:
            agent_type: "work" or "family"

        Returns:
            True if server is reachable
        """
        config = self.work_agent if agent_type == "work" else self.family_agent

        try:
            # Try to list models (lightweight check)
            response = requests.get(f"{config.base_url}/api/tags", timeout=5)
            return response.status_code == 200
        except Exception as e:
            logger.warning(f"Health check failed for {agent_type} agent: {e}")
            return False


# Global router instance
_router = LLMRouter()


def get_router() -> LLMRouter:
    """Get the global router instance."""
    return _router