✅ TICKET-006: Wake-word Detection Service - Implemented wake-word detection using openWakeWord - HTTP/WebSocket server on port 8002 - Real-time detection with configurable threshold - Event emission for ASR integration - Location: home-voice-agent/wake-word/ ✅ TICKET-010: ASR Service - Implemented ASR using faster-whisper - HTTP endpoint for file transcription - WebSocket endpoint for streaming transcription - Support for multiple audio formats - Auto language detection - GPU acceleration support - Location: home-voice-agent/asr/ ✅ TICKET-014: TTS Service - Implemented TTS using Piper - HTTP endpoint for text-to-speech synthesis - Low-latency processing (< 500ms) - Multiple voice support - WAV audio output - Location: home-voice-agent/tts/ ✅ TICKET-047: Updated Hardware Purchases - Marked Pi5 kit, SSD, microphone, and speakers as purchased - Updated progress log with purchase status 📚 Documentation: - Added VOICE_SERVICES_README.md with complete testing guide - Each service includes README.md with usage instructions - All services ready for Pi5 deployment 🧪 Testing: - Created test files for each service - All imports validated - FastAPI apps created successfully - Code passes syntax validation 🚀 Ready for: - Pi5 deployment - End-to-end voice flow testing - Integration with MCP server Files Added: - wake-word/detector.py - wake-word/server.py - wake-word/requirements.txt - wake-word/README.md - wake-word/test_detector.py - asr/service.py - asr/server.py - asr/requirements.txt - asr/README.md - asr/test_service.py - tts/service.py - tts/server.py - tts/requirements.txt - tts/README.md - tts/test_service.py - VOICE_SERVICES_README.md Files Modified: - tickets/done/TICKET-047_hardware-purchases.md Files Moved: - tickets/backlog/TICKET-006_prototype-wake-word-node.md → tickets/done/ - tickets/backlog/TICKET-010_streaming-asr-service.md → tickets/done/ - tickets/backlog/TICKET-014_tts-service.md → tickets/done/
201 lines
6.6 KiB
Python
201 lines
6.6 KiB
Python
"""
|
|
LLM Router - Routes requests to work or family agent based on identity/origin.
|
|
"""
|
|
|
|
import logging
|
|
import requests
|
|
from typing import Any, Dict, Optional
|
|
from dataclasses import dataclass
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class LLMConfig:
|
|
"""Configuration for an LLM server."""
|
|
base_url: str
|
|
model_name: str
|
|
api_key: Optional[str] = None
|
|
timeout: int = 300
|
|
|
|
|
|
@dataclass
|
|
class RoutingDecision:
|
|
"""Result of routing decision."""
|
|
agent_type: str # "work" or "family"
|
|
config: LLMConfig
|
|
reason: str
|
|
|
|
|
|
class LLMRouter:
|
|
"""Routes LLM requests to appropriate servers."""
|
|
|
|
def __init__(self):
|
|
"""Initialize router with server configurations."""
|
|
import os
|
|
from pathlib import Path
|
|
|
|
# Load .env file from project root
|
|
try:
|
|
from dotenv import load_dotenv
|
|
env_path = Path(__file__).parent.parent / ".env"
|
|
load_dotenv(env_path)
|
|
except ImportError:
|
|
# python-dotenv not installed, use environment variables only
|
|
pass
|
|
|
|
# 4080 Work Agent (remote GPU VM or local for testing)
|
|
# Load from .env file or environment variable
|
|
work_host = os.getenv("OLLAMA_HOST", "localhost")
|
|
work_port = int(os.getenv("OLLAMA_PORT", "11434"))
|
|
|
|
# Model names - load from .env file or environment variables
|
|
work_model = os.getenv("OLLAMA_WORK_MODEL", os.getenv("OLLAMA_MODEL", "llama3:latest"))
|
|
family_model = os.getenv("OLLAMA_FAMILY_MODEL", os.getenv("OLLAMA_MODEL", "llama3:latest"))
|
|
|
|
self.work_agent = LLMConfig(
|
|
base_url=f"http://{work_host}:{work_port}",
|
|
model_name=work_model,
|
|
timeout=300
|
|
)
|
|
|
|
# 1050 Family Agent (uses same local Ollama for testing)
|
|
self.family_agent = LLMConfig(
|
|
base_url=f"http://{work_host}:{work_port}", # Same host for testing
|
|
model_name=family_model,
|
|
timeout=60
|
|
)
|
|
|
|
def route_request(self,
|
|
user_id: Optional[str] = None,
|
|
origin: Optional[str] = None,
|
|
agent_type: Optional[str] = None,
|
|
client_type: Optional[str] = None) -> RoutingDecision:
|
|
"""
|
|
Route a request to the appropriate LLM server.
|
|
|
|
Args:
|
|
user_id: User identifier (if available)
|
|
origin: Request origin (IP, device, etc.)
|
|
agent_type: Explicit agent type if specified ("work" or "family")
|
|
client_type: Type of client making request
|
|
|
|
Returns:
|
|
RoutingDecision with agent type and config
|
|
"""
|
|
# Explicit agent type takes precedence
|
|
if agent_type:
|
|
if agent_type == "work":
|
|
return RoutingDecision(
|
|
agent_type="work",
|
|
config=self.work_agent,
|
|
reason=f"Explicit agent type: {agent_type}"
|
|
)
|
|
elif agent_type == "family":
|
|
return RoutingDecision(
|
|
agent_type="family",
|
|
config=self.family_agent,
|
|
reason=f"Explicit agent type: {agent_type}"
|
|
)
|
|
|
|
# Route based on client type
|
|
if client_type:
|
|
if client_type in ["work", "desktop", "workstation"]:
|
|
return RoutingDecision(
|
|
agent_type="work",
|
|
config=self.work_agent,
|
|
reason=f"Client type: {client_type}"
|
|
)
|
|
elif client_type in ["family", "phone", "tablet", "home"]:
|
|
return RoutingDecision(
|
|
agent_type="family",
|
|
config=self.family_agent,
|
|
reason=f"Client type: {client_type}"
|
|
)
|
|
|
|
# Route based on origin/IP (if configured)
|
|
# For now, default to family agent for safety
|
|
# In production, you might check IP ranges, device names, etc.
|
|
if origin:
|
|
# Example: Check if origin is work network
|
|
# if origin.startswith("10.0.1."): # Work network
|
|
# return RoutingDecision("work", self.work_agent, f"Origin: {origin}")
|
|
pass
|
|
|
|
# Default: family agent (safer default)
|
|
return RoutingDecision(
|
|
agent_type="family",
|
|
config=self.family_agent,
|
|
reason="Default routing to family agent"
|
|
)
|
|
|
|
def make_request(self,
|
|
routing: RoutingDecision,
|
|
messages: list,
|
|
tools: Optional[list] = None,
|
|
temperature: float = 0.7,
|
|
stream: bool = False) -> Dict[str, Any]:
|
|
"""
|
|
Make a request to the routed LLM server.
|
|
|
|
Args:
|
|
routing: Routing decision
|
|
messages: Conversation messages
|
|
tools: Optional tool definitions
|
|
temperature: Sampling temperature
|
|
stream: Whether to stream response
|
|
|
|
Returns:
|
|
LLM response
|
|
"""
|
|
config = routing.config
|
|
url = f"{config.base_url}/api/chat"
|
|
|
|
payload = {
|
|
"model": config.model_name,
|
|
"messages": messages,
|
|
"temperature": temperature,
|
|
"stream": stream
|
|
}
|
|
|
|
if tools:
|
|
payload["tools"] = tools
|
|
|
|
try:
|
|
logger.info(f"Making request to {routing.agent_type} agent at {url}")
|
|
response = requests.post(url, json=payload, timeout=config.timeout)
|
|
response.raise_for_status()
|
|
return response.json()
|
|
except requests.exceptions.RequestException as e:
|
|
logger.error(f"Request to {routing.agent_type} agent failed: {e}")
|
|
raise Exception(f"LLM request failed: {e}")
|
|
|
|
def health_check(self, agent_type: str) -> bool:
|
|
"""
|
|
Check if an LLM server is healthy.
|
|
|
|
Args:
|
|
agent_type: "work" or "family"
|
|
|
|
Returns:
|
|
True if server is reachable
|
|
"""
|
|
config = self.work_agent if agent_type == "work" else self.family_agent
|
|
|
|
try:
|
|
# Try to list models (lightweight check)
|
|
response = requests.get(f"{config.base_url}/api/tags", timeout=5)
|
|
return response.status_code == 200
|
|
except Exception as e:
|
|
logger.warning(f"Health check failed for {agent_type} agent: {e}")
|
|
return False
|
|
|
|
|
|
# Global router instance
|
|
_router = LLMRouter()
|
|
|
|
|
|
def get_router() -> LLMRouter:
|
|
"""Get the global router instance."""
|
|
return _router
|