""" LLM Router - Routes requests to work or family agent based on identity/origin. """ import logging import requests from typing import Any, Dict, Optional from dataclasses import dataclass logger = logging.getLogger(__name__) @dataclass class LLMConfig: """Configuration for an LLM server.""" base_url: str model_name: str api_key: Optional[str] = None timeout: int = 300 @dataclass class RoutingDecision: """Result of routing decision.""" agent_type: str # "work" or "family" config: LLMConfig reason: str class LLMRouter: """Routes LLM requests to appropriate servers.""" def __init__(self): """Initialize router with server configurations.""" import os from pathlib import Path # Load .env file from project root try: from dotenv import load_dotenv env_path = Path(__file__).parent.parent / ".env" load_dotenv(env_path) except ImportError: # python-dotenv not installed, use environment variables only pass # 4080 Work Agent (remote GPU VM or local for testing) # Load from .env file or environment variable work_host = os.getenv("OLLAMA_HOST", "localhost") work_port = int(os.getenv("OLLAMA_PORT", "11434")) # Model names - load from .env file or environment variables work_model = os.getenv("OLLAMA_WORK_MODEL", os.getenv("OLLAMA_MODEL", "llama3:latest")) family_model = os.getenv("OLLAMA_FAMILY_MODEL", os.getenv("OLLAMA_MODEL", "llama3:latest")) self.work_agent = LLMConfig( base_url=f"http://{work_host}:{work_port}", model_name=work_model, timeout=300 ) # 1050 Family Agent (uses same local Ollama for testing) self.family_agent = LLMConfig( base_url=f"http://{work_host}:{work_port}", # Same host for testing model_name=family_model, timeout=60 ) def route_request(self, user_id: Optional[str] = None, origin: Optional[str] = None, agent_type: Optional[str] = None, client_type: Optional[str] = None) -> RoutingDecision: """ Route a request to the appropriate LLM server. Args: user_id: User identifier (if available) origin: Request origin (IP, device, etc.) agent_type: Explicit agent type if specified ("work" or "family") client_type: Type of client making request Returns: RoutingDecision with agent type and config """ # Explicit agent type takes precedence if agent_type: if agent_type == "work": return RoutingDecision( agent_type="work", config=self.work_agent, reason=f"Explicit agent type: {agent_type}" ) elif agent_type == "family": return RoutingDecision( agent_type="family", config=self.family_agent, reason=f"Explicit agent type: {agent_type}" ) # Route based on client type if client_type: if client_type in ["work", "desktop", "workstation"]: return RoutingDecision( agent_type="work", config=self.work_agent, reason=f"Client type: {client_type}" ) elif client_type in ["family", "phone", "tablet", "home"]: return RoutingDecision( agent_type="family", config=self.family_agent, reason=f"Client type: {client_type}" ) # Route based on origin/IP (if configured) # For now, default to family agent for safety # In production, you might check IP ranges, device names, etc. if origin: # Example: Check if origin is work network # if origin.startswith("10.0.1."): # Work network # return RoutingDecision("work", self.work_agent, f"Origin: {origin}") pass # Default: family agent (safer default) return RoutingDecision( agent_type="family", config=self.family_agent, reason="Default routing to family agent" ) def make_request(self, routing: RoutingDecision, messages: list, tools: Optional[list] = None, temperature: float = 0.7, stream: bool = False) -> Dict[str, Any]: """ Make a request to the routed LLM server. Args: routing: Routing decision messages: Conversation messages tools: Optional tool definitions temperature: Sampling temperature stream: Whether to stream response Returns: LLM response """ config = routing.config url = f"{config.base_url}/api/chat" payload = { "model": config.model_name, "messages": messages, "temperature": temperature, "stream": stream } if tools: payload["tools"] = tools try: logger.info(f"Making request to {routing.agent_type} agent at {url}") response = requests.post(url, json=payload, timeout=config.timeout) response.raise_for_status() return response.json() except requests.exceptions.RequestException as e: logger.error(f"Request to {routing.agent_type} agent failed: {e}") raise Exception(f"LLM request failed: {e}") def health_check(self, agent_type: str) -> bool: """ Check if an LLM server is healthy. Args: agent_type: "work" or "family" Returns: True if server is reachable """ config = self.work_agent if agent_type == "work" else self.family_agent try: # Try to list models (lightweight check) response = requests.get(f"{config.base_url}/api/tags", timeout=5) return response.status_code == 200 except Exception as e: logger.warning(f"Health check failed for {agent_type} agent: {e}") return False # Global router instance _router = LLMRouter() def get_router() -> LLMRouter: """Get the global router instance.""" return _router