#!/usr/bin/env python3 """ Configuration for 4080 LLM Server (Work Agent). This server runs on a remote GPU VM or locally for testing. Configuration is loaded from .env file in the project root. """ import os from pathlib import Path # Load .env file from project root (home-voice-agent/) try: from dotenv import load_dotenv env_path = Path(__file__).parent.parent.parent / ".env" load_dotenv(env_path) except ImportError: # python-dotenv not installed, use environment variables only pass # Ollama server endpoint # Load from .env file or environment variable, default to localhost OLLAMA_HOST = os.getenv("OLLAMA_HOST", "localhost") OLLAMA_PORT = int(os.getenv("OLLAMA_PORT", "11434")) OLLAMA_BASE_URL = f"http://{OLLAMA_HOST}:{OLLAMA_PORT}" # Model configuration # Load from .env file or environment variable, default to llama3:latest MODEL_NAME = os.getenv("OLLAMA_MODEL", "llama3:latest") MODEL_CONTEXT_WINDOW = 8192 # 8K tokens practical limit MAX_CONCURRENT_REQUESTS = 2 # API endpoints API_CHAT = f"{OLLAMA_BASE_URL}/api/chat" API_GENERATE = f"{OLLAMA_BASE_URL}/api/generate" API_TAGS = f"{OLLAMA_BASE_URL}/api/tags" # Timeout settings REQUEST_TIMEOUT = 300 # 5 minutes for large requests