From 63d1edba9110219daf1529af983bdbb1e6d9cd0c Mon Sep 17 00:00:00 2001
From: tanyar09 <tatiana.romlit@gmail.com>
Date: Tue, 3 Mar 2026 13:11:06 -0500
Subject: [PATCH] Add Groq provider support and improve Ollama timeout handling

- Add Groq provider config for voice transcription support
- Pass Groq API key to Telegram channel for voice transcription
- Increase Ollama timeout settings (10min read timeout for slow GPU responses)
- Improve timeout handling in custom provider
---
 nanobot/channels/manager.py          |  4 +++-
 nanobot/config/schema.py             |  1 +
 nanobot/providers/custom_provider.py | 19 ++++++++++++++++---
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/nanobot/channels/manager.py b/nanobot/channels/manager.py
index e860d26..1d6df68 100644
--- a/nanobot/channels/manager.py
+++ b/nanobot/channels/manager.py
@@ -38,10 +38,12 @@ class ChannelManager:
         if self.config.channels.telegram.enabled:
             try:
                 from nanobot.channels.telegram import TelegramChannel
+                # Get groq API key if configured (optional, used for voice transcription)
+                groq_api_key = getattr(self.config.providers.groq, "api_key", "") or ""
                 self.channels["telegram"] = TelegramChannel(
                     self.config.channels.telegram,
                     self.bus,
-                    groq_api_key=self.config.providers.groq.api_key,
+                    groq_api_key=groq_api_key,
                 )
                 logger.info("Telegram channel enabled")
             except ImportError as e:
diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py
index b9d8756..cc83d24 100644
--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@@ -221,6 +221,7 @@ class ProvidersConfig(Base):
     siliconflow: ProviderConfig = Field(default_factory=ProviderConfig)  # SiliconFlow (硅基流动) API gateway
     openai_codex: ProviderConfig = Field(default_factory=ProviderConfig)  # OpenAI Codex (OAuth)
     github_copilot: ProviderConfig = Field(default_factory=ProviderConfig)  # Github Copilot (OAuth)
+    groq: ProviderConfig = Field(default_factory=ProviderConfig)  # Groq (for voice transcription)
 
 
 class GatewayConfig(Base):
diff --git a/nanobot/providers/custom_provider.py b/nanobot/providers/custom_provider.py
index f0ea835..dac563a 100644
--- a/nanobot/providers/custom_provider.py
+++ b/nanobot/providers/custom_provider.py
@@ -15,7 +15,20 @@ class CustomProvider(LLMProvider):
     def __init__(self, api_key: str = "no-key", api_base: str = "http://localhost:8000/v1", default_model: str = "default"):
         super().__init__(api_key, api_base)
         self.default_model = default_model
-        self._client = AsyncOpenAI(api_key=api_key, base_url=api_base)
+        # Set longer timeout for Ollama (especially with GPU, first load can be slow)
+        from openai import Timeout
+        # Set separate timeouts: connect, read, write, pool
+        # Ollama can be slow, especially on first request
+        self._client = AsyncOpenAI(
+            api_key=api_key, 
+            base_url=api_base,
+            timeout=Timeout(
+                connect=60.0,  # Connection timeout
+                read=600.0,    # Read timeout (10 min for slow Ollama responses)
+                write=60.0,    # Write timeout
+                pool=60.0      # Pool timeout
+            )
+        )
 
     async def chat(self, messages: list[dict[str, Any]], tools: list[dict[str, Any]] | None = None,
                    model: str | None = None, max_tokens: int = 4096, temperature: float = 0.7) -> LLMResponse:
@@ -28,10 +41,10 @@ class CustomProvider(LLMProvider):
             # Add explicit timeout wrapper (longer for Ollama)
             return self._parse(await asyncio.wait_for(
                 self._client.chat.completions.create(**kwargs),
-                timeout=130.0  # Slightly longer than client timeout (120s)
+                timeout=310.0  # Slightly longer than client timeout (300s)
             ))
         except asyncio.TimeoutError:
-            return LLMResponse(content="Error: Request timed out after 130 seconds", finish_reason="error")
+            return LLMResponse(content="Error: Request timed out after 310 seconds", finish_reason="error")
         except Exception as e:
             return LLMResponse(content=f"Error: {e}", finish_reason="error")