"""Configuration for the LLM Council.""" import os from dotenv import load_dotenv load_dotenv() # Helpers def _parse_int_env(name: str, default: int) -> int: raw = os.getenv(name) if raw is None or raw.strip() == "": return default try: return int(raw.strip()) except ValueError: return default def _parse_float_env(name: str, default: float) -> float: raw = os.getenv(name) if raw is None or raw.strip() == "": return default try: return float(raw.strip()) except ValueError: return default def _parse_list_env(name: str) -> list[str] | None: """ Parses a list from an env var. Supported formats: - Comma-separated: "a,b,c" - Newline-separated: "a\\nb\\nc" """ raw = os.getenv(name) if raw is None: return None raw = raw.strip() if raw == "": return [] # Allow either commas or newlines. parts = [] for chunk in raw.replace("\r\n", "\n").split("\n"): parts.extend(chunk.split(",")) return [p.strip() for p in parts if p.strip()] # Council members - list of model identifiers (Ollama model names) # Can be overridden via env var COUNCIL_MODELS (comma or newline separated). _DEFAULT_COUNCIL_MODELS = [ "llama3.2:3b", "qwen2.5:3b", "gemma2:2b", ] COUNCIL_MODELS = _parse_list_env("COUNCIL_MODELS") or _DEFAULT_COUNCIL_MODELS # Chairman model - synthesizes final response CHAIRMAN_MODEL = os.getenv("CHAIRMAN_MODEL") or "llama3.2:3b" # Maximum tokens per request # Default: 2048 tokens (reasonable for most responses) # Increase if you need longer responses MAX_TOKENS = _parse_int_env("MAX_TOKENS", 2048) # Request timeout configuration (in seconds) # Default timeout for general LLM queries (Stage 1: council responses) # Used by llm_client.py and passed to openai_compat.query_model() LLM_TIMEOUT_SECONDS = _parse_float_env("LLM_TIMEOUT_SECONDS", 120.0) # Timeout for chairman synthesis (may need longer for complex responses) CHAIRMAN_TIMEOUT_SECONDS = _parse_float_env("CHAIRMAN_TIMEOUT_SECONDS", 180.0) # Timeout for title generation (short responses) TITLE_GENERATION_TIMEOUT_SECONDS = _parse_float_env("TITLE_GENERATION_TIMEOUT_SECONDS", 120.0) # OpenAI-compatible provider tuning (Ollama / vLLM / TGI) # If USE_LOCAL_OLLAMA=true, automatically set base URL to localhost:11434 (convenience flag) if os.getenv("USE_LOCAL_OLLAMA", "").strip().lower() in ("true", "1", "yes"): _openai_compat_base_url = "http://localhost:11434" else: _openai_compat_base_url = os.getenv("OPENAI_COMPAT_BASE_URL") OPENAI_COMPAT_BASE_URL = _openai_compat_base_url # HTTP client timeout (fallback when timeout not explicitly passed to openai_compat functions) # Used by: list_models() and as fallback in query_model() if called directly without timeout # Should be >= LLM_TIMEOUT_SECONDS for safety, but list_models() is fast so can be lower OPENAI_COMPAT_TIMEOUT_SECONDS = _parse_float_env("OPENAI_COMPAT_TIMEOUT_SECONDS", 300.0) # HTTP client connection timeout (time to establish connection) OPENAI_COMPAT_CONNECT_TIMEOUT_SECONDS = _parse_float_env("OPENAI_COMPAT_CONNECT_TIMEOUT_SECONDS", 10.0) # HTTP client write timeout (time to send request) OPENAI_COMPAT_WRITE_TIMEOUT_SECONDS = _parse_float_env("OPENAI_COMPAT_WRITE_TIMEOUT_SECONDS", 10.0) # HTTP client pool timeout (time to get connection from pool) OPENAI_COMPAT_POOL_TIMEOUT_SECONDS = _parse_float_env("OPENAI_COMPAT_POOL_TIMEOUT_SECONDS", 10.0) # Number of retries for failed requests (retryable HTTP errors: 408, 409, 425, 429, 500, 502, 503, 504) OPENAI_COMPAT_RETRIES = _parse_int_env("OPENAI_COMPAT_RETRIES", 2) # Exponential backoff base delay between retries (seconds) - actual delay is backoff * (2^attempt) OPENAI_COMPAT_RETRY_BACKOFF_SECONDS = _parse_float_env("OPENAI_COMPAT_RETRY_BACKOFF_SECONDS", 0.5) # Debug mode - show debug logs in console (set DEBUG=true in .env) DEBUG = os.getenv("DEBUG", "").strip().lower() in ("true", "1", "yes") # Markdown uploads (per-conversation) DOCS_DIR = os.getenv("DOCS_DIR") or "data/docs" MAX_DOC_BYTES = _parse_int_env("MAX_DOC_BYTES", 1_000_000) # 1MB MAX_DOC_PREVIEW_CHARS = _parse_int_env("MAX_DOC_PREVIEW_CHARS", 20_000) # Data directory for conversation storage DATA_DIR = "data/conversations"