From 096d76430b45022e5e55f28fb37699cc52876049 Mon Sep 17 00:00:00 2001
From: tanyar09 <tatiana.romlit@gmail.com>
Date: Tue, 3 Mar 2026 13:10:53 -0500
Subject: [PATCH] Improve agent reliability and error handling

- Add timeout protection (120s) for LLM provider calls
- Skip memory consolidation for CLI mode to avoid blocking
- Add timeout protection for memory consolidation (120s)
- Improve error handling with better logging
- Add parameter type coercion before validation
- Allow None values for optional parameters in validation
- Fix type coercion for memory updates (handle dict responses)
---
 nanobot/agent/loop.py           | 65 +++++++++++++++++++++++++--------
 nanobot/agent/tools/base.py     | 33 +++++++++++++++++
 nanobot/agent/tools/registry.py |  6 ++-
 3 files changed, 87 insertions(+), 17 deletions(-)

diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index b814ac3..3909f03 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -186,14 +186,26 @@ class AgentLoop:
 
         while iteration < self.max_iterations:
             iteration += 1
+            logger.debug(f"Agent loop iteration {iteration}/{self.max_iterations}, calling LLM provider...")
 
-            response = await self.provider.chat(
-                messages=messages,
-                tools=self.tools.get_definitions(),
-                model=self.model,
-                temperature=self.temperature,
-                max_tokens=self.max_tokens,
-            )
+            try:
+                response = await asyncio.wait_for(
+                    self.provider.chat(
+                        messages=messages,
+                        tools=self.tools.get_definitions(),
+                        model=self.model,
+                        temperature=self.temperature,
+                        max_tokens=self.max_tokens,
+                    ),
+                    timeout=120.0  # 2 minute timeout per LLM call
+                )
+                logger.debug(f"LLM provider returned response, has_tool_calls={response.has_tool_calls}")
+            except asyncio.TimeoutError:
+                logger.error(f"LLM provider call timed out after 120 seconds")
+                return "Error: Request timed out. The LLM provider may be slow or unresponsive.", tools_used
+            except Exception as e:
+                logger.error(f"LLM provider error: {e}")
+                return f"Error calling LLM: {str(e)}", tools_used
 
             if response.has_tool_calls:
                 if on_progress:
@@ -324,8 +336,21 @@ class AgentLoop:
             return OutboundMessage(channel=msg.channel, chat_id=msg.chat_id,
                                   content="🐈 nanobot commands:\n/new — Start a new conversation\n/help — Show available commands")
         
-        if len(session.messages) > self.memory_window:
-            asyncio.create_task(self._consolidate_memory(session))
+        # Skip memory consolidation for CLI mode to avoid blocking/hanging
+        # Memory consolidation can be slow and CLI users want fast responses
+        if len(session.messages) > self.memory_window and msg.channel != "cli":
+            # Start memory consolidation in background with timeout protection
+            async def _consolidate_with_timeout():
+                try:
+                    await asyncio.wait_for(
+                        self._consolidate_memory(session),
+                        timeout=120.0  # 2 minute timeout for memory consolidation
+                    )
+                except asyncio.TimeoutError:
+                    logger.warning(f"Memory consolidation timed out for session {session.key}")
+                except Exception as e:
+                    logger.error(f"Memory consolidation error: {e}")
+            asyncio.create_task(_consolidate_with_timeout())
 
         self._set_tool_context(msg.channel, msg.chat_id)
         initial_messages = self.context.build_messages(
@@ -460,12 +485,16 @@ class AgentLoop:
 Respond with ONLY valid JSON, no markdown fences."""
 
         try:
-            response = await self.provider.chat(
-                messages=[
-                    {"role": "system", "content": "You are a memory consolidation agent. Respond only with valid JSON."},
-                    {"role": "user", "content": prompt},
-                ],
-                model=self.model,
+            # Add timeout to memory consolidation LLM call
+            response = await asyncio.wait_for(
+                self.provider.chat(
+                    messages=[
+                        {"role": "system", "content": "You are a memory consolidation agent. Respond only with valid JSON."},
+                        {"role": "user", "content": prompt},
+                    ],
+                    model=self.model,
+                ),
+                timeout=120.0  # 2 minute timeout for consolidation LLM call
             )
             text = (response.content or "").strip()
             if not text:
@@ -479,8 +508,14 @@ Respond with ONLY valid JSON, no markdown fences."""
                 return
 
             if entry := result.get("history_entry"):
+                # Convert to string if LLM returned a non-string (e.g., dict)
+                if not isinstance(entry, str):
+                    entry = str(entry)
                 memory.append_history(entry)
             if update := result.get("memory_update"):
+                # Convert to string if LLM returned a non-string (e.g., dict)
+                if not isinstance(update, str):
+                    update = str(update)
                 if update != current_memory:
                     memory.write_long_term(update)
 
diff --git a/nanobot/agent/tools/base.py b/nanobot/agent/tools/base.py
index ca9bcc2..bf62143 100644
--- a/nanobot/agent/tools/base.py
+++ b/nanobot/agent/tools/base.py
@@ -52,6 +52,36 @@ class Tool(ABC):
         """
         pass
 
+    def coerce_params(self, params: dict[str, Any]) -> dict[str, Any]:
+        """Coerce parameter types based on schema before validation."""
+        schema = self.parameters or {}
+        if schema.get("type", "object") != "object":
+            return params
+        
+        coerced = params.copy()
+        props = schema.get("properties", {})
+        
+        for key, value in list(coerced.items()):  # Use list() to avoid modification during iteration
+            if key in props:
+                prop_schema = props[key]
+                param_type = prop_schema.get("type")
+                
+                # Coerce types if value is not already the correct type
+                if param_type == "integer" and isinstance(value, str):
+                    try:
+                        coerced[key] = int(value)
+                    except (ValueError, TypeError):
+                        pass  # Let validation catch the error
+                elif param_type == "number" and isinstance(value, str):
+                    try:
+                        coerced[key] = float(value)
+                    except (ValueError, TypeError):
+                        pass
+                elif param_type == "boolean" and isinstance(value, str):
+                    coerced[key] = value.lower() in ("true", "1", "yes", "on")
+        
+        return coerced
+
     def validate_params(self, params: dict[str, Any]) -> list[str]:
         """Validate tool parameters against JSON schema. Returns error list (empty if valid)."""
         schema = self.parameters or {}
@@ -61,6 +91,9 @@ class Tool(ABC):
 
     def _validate(self, val: Any, schema: dict[str, Any], path: str) -> list[str]:
         t, label = schema.get("type"), path or "parameter"
+        # Allow None/null for optional parameters (not in required list)
+        if val is None:
+            return []
         if t in self._TYPE_MAP and not isinstance(val, self._TYPE_MAP[t]):
             return [f"{label} should be {t}"]
         
diff --git a/nanobot/agent/tools/registry.py b/nanobot/agent/tools/registry.py
index d9b33ff..f948c0f 100644
--- a/nanobot/agent/tools/registry.py
+++ b/nanobot/agent/tools/registry.py
@@ -54,10 +54,12 @@ class ToolRegistry:
             return f"Error: Tool '{name}' not found"
 
         try:
-            errors = tool.validate_params(params)
+            # Coerce parameter types before validation
+            coerced_params = tool.coerce_params(params)
+            errors = tool.validate_params(coerced_params)
             if errors:
                 return f"Error: Invalid parameters for tool '{name}': " + "; ".join(errors)
-            return await tool.execute(**params)
+            return await tool.execute(**coerced_params)
         except Exception as e:
             return f"Error executing {name}: {str(e)}"