Add Groq provider support and improve Ollama timeout handling

- Add Groq provider config for voice transcription support - Pass Groq API key to Telegram channel for voice transcription - Increase Ollama timeout settings (10min read timeout for slow GPU responses) - Improve timeout handling in custom provider
2026-03-03 13:11:06 -05:00 · 2026-03-03 13:11:06 -05:00 · 63d1edba91
commit 63d1edba91
parent 7933245ec3
3 changed files with 20 additions and 4 deletions
--- a/nanobot/channels/manager.py
+++ b/nanobot/channels/manager.py
@ -38,10 +38,12 @@ class ChannelManager:
        if self.config.channels.telegram.enabled:
            try:
                from nanobot.channels.telegram import TelegramChannel
+                # Get groq API key if configured (optional, used for voice transcription)
+                groq_api_key = getattr(self.config.providers.groq, "api_key", "") or ""
                self.channels["telegram"] = TelegramChannel(
                    self.config.channels.telegram,
                    self.bus,
-                    groq_api_key=self.config.providers.groq.api_key,
+                    groq_api_key=groq_api_key,
                )
                logger.info("Telegram channel enabled")
            except ImportError as e:
--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@ -221,6 +221,7 @@ class ProvidersConfig(Base):
    siliconflow: ProviderConfig = Field(default_factory=ProviderConfig)  # SiliconFlow (硅基流动) API gateway
    openai_codex: ProviderConfig = Field(default_factory=ProviderConfig)  # OpenAI Codex (OAuth)
    github_copilot: ProviderConfig = Field(default_factory=ProviderConfig)  # Github Copilot (OAuth)
+    groq: ProviderConfig = Field(default_factory=ProviderConfig)  # Groq (for voice transcription)


 class GatewayConfig(Base):
--- a/nanobot/providers/custom_provider.py
+++ b/nanobot/providers/custom_provider.py
@ -15,7 +15,20 @@ class CustomProvider(LLMProvider):
    def __init__(self, api_key: str = "no-key", api_base: str = "http://localhost:8000/v1", default_model: str = "default"):
        super().__init__(api_key, api_base)
        self.default_model = default_model
-        self._client = AsyncOpenAI(api_key=api_key, base_url=api_base)
+        # Set longer timeout for Ollama (especially with GPU, first load can be slow)
+        from openai import Timeout
+        # Set separate timeouts: connect, read, write, pool
+        # Ollama can be slow, especially on first request
+        self._client = AsyncOpenAI(
+            api_key=api_key, 
+            base_url=api_base,
+            timeout=Timeout(
+                connect=60.0,  # Connection timeout
+                read=600.0,    # Read timeout (10 min for slow Ollama responses)
+                write=60.0,    # Write timeout
+                pool=60.0      # Pool timeout
+            )
+        )

    async def chat(self, messages: list[dict[str, Any]], tools: list[dict[str, Any]] | None = None,
                   model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7) -> LLMResponse:
@ -28,10 +41,10 @@ class CustomProvider(LLMProvider):
            # Add explicit timeout wrapper (longer for Ollama)
            return self._parse(await asyncio.wait_for(
                self._client.chat.completions.create(**kwargs),
-                timeout=130.0  # Slightly longer than client timeout (120s)
+                timeout=310.0  # Slightly longer than client timeout (300s)
            ))
        except asyncio.TimeoutError:
-            return LLMResponse(content="Error: Request timed out after 130 seconds", finish_reason="error")
+            return LLMResponse(content="Error: Request timed out after 310 seconds", finish_reason="error")
        except Exception as e:
            return LLMResponse(content=f"Error: {e}", finish_reason="error")