Improve LLM reliability for data analysis tasks

- Enhanced exec tool description with specific pandas/CSV examples - Added JSON tool call parsing in custom_provider for Ollama compatibility - Added timeouts to prevent hanging on slow LLM responses - Improved guidance for Excel/CSV operations to reduce LLM errors Changes: - nanobot/agent/tools/shell.py: Added detailed examples in tool description - nanobot/providers/custom_provider.py: Added JSON parsing fallback and timeouts
2026-02-23 14:35:21 -05:00 · 2026-02-23 14:35:21 -05:00 · 3b0598d689
commit 3b0598d689
parent e6b5ead3fd
2 changed files with 129 additions and 5 deletions
--- a/nanobot/agent/tools/shell.py
+++ b/nanobot/agent/tools/shell.py
@ -41,7 +41,15 @@ class ExecTool(Tool):
    
    @property
    def description(self) -> str:
-        return "Execute a shell command and return its output. Use with caution."
+        return """Execute a shell command and return its output. Use with caution.
+
+IMPORTANT: For data analysis tasks (Excel, CSV, JSON files), ALWAYS use Python with pandas:
+- Excel files: python3 -c "import pandas as pd; df = pd.read_excel('file.xlsx'); result = df['Column Name'].sum(); print(result)"
+- CSV files: python3 -c "import pandas as pd; df = pd.read_csv('file.csv'); result = df['Column Name'].sum(); print(result)"
+- NEVER use pandas/openpyxl as command-line tools (they don't exist)
+- NEVER use non-existent tools like csvcalc, xlsxcalc, etc.
+- For calculations: Use pandas operations like .sum(), .mean(), .max(), etc.
+- For total inventory value: (df['Unit Price'] * df['Quantity']).sum()"""
    
    @property
    def parameters(self) -> dict[str, Any]:
@ -50,7 +58,7 @@ class ExecTool(Tool):
            "properties": {
                "command": {
                    "type": "string",
-                    "description": "The shell command to execute"
+                    "description": "The shell command to execute. For data analysis, use: python3 -c \"import pandas as pd; df = pd.read_csv('file.csv'); print(df['Column'].sum())\""
                },
                "working_dir": {
                    "type": "string",
@ -66,6 +74,10 @@ class ExecTool(Tool):
        if guard_error:
            return guard_error
        
+        # DEBUG: Log command details
+        from loguru import logger
+        logger.debug(f"ExecTool: command={command[:200]}, cwd={cwd}, working_dir={working_dir}")
+        
        try:
            process = await asyncio.create_subprocess_shell(
                command,
@ -86,18 +98,60 @@ class ExecTool(Tool):
            output_parts = []
            
            if stdout:
-                output_parts.append(stdout.decode("utf-8", errors="replace"))
+                stdout_text = stdout.decode("utf-8", errors="replace")
+                output_parts.append(stdout_text)
+                logger.debug(f"ExecTool stdout: {stdout_text[:200]}")
            
            if stderr:
                stderr_text = stderr.decode("utf-8", errors="replace")
                if stderr_text.strip():
                    output_parts.append(f"STDERR:\n{stderr_text}")
+                    logger.debug(f"ExecTool stderr: {stderr_text[:200]}")
            
            if process.returncode != 0:
                output_parts.append(f"\nExit code: {process.returncode}")
+                logger.warning(f"ExecTool: Command failed with exit code {process.returncode}")
            
            result = "\n".join(output_parts) if output_parts else "(no output)"
            
+            # DEBUG: For Excel operations, verify file was actually modified
+            if "to_excel" in command or ".xlsx" in command:
+                import re
+                import time
+                xlsx_matches = re.findall(r"['\"]([^'\"]*\.xlsx)['\"]", command)
+                if xlsx_matches:
+                    file_path = Path(xlsx_matches[0]).expanduser()
+                    logger.debug(f"ExecTool: Checking Excel file {file_path}")
+                    if file_path.exists():
+                        file_mtime = file_path.stat().st_mtime
+                        time_since_mod = time.time() - file_mtime
+                        logger.debug(f"ExecTool: File mtime={file_mtime}, time_since_mod={time_since_mod}")
+                        if time_since_mod < 5:
+                            result += f"\n✅ Verified: File {file_path} was modified {time_since_mod:.2f}s ago"
+                        else:
+                            result += f"\n⚠️  WARNING: File {file_path} was NOT recently modified (last modified {time_since_mod:.2f}s ago). Command may not have saved changes."
+                            logger.warning(f"ExecTool: Excel file {file_path} was not recently modified!")
+                    else:
+                        logger.warning(f"ExecTool: Excel file {file_path} does not exist!")
+                        result += f"\n⚠️  WARNING: File {file_path} does not exist!"
+            
+            # Verify file operations for Excel files (common issue: pandas to_excel not saving)
+            # Check if command mentions Excel file operations
+            if "to_excel" in command or ".xlsx" in command:
+                import re
+                # Try to extract file path from command
+                xlsx_matches = re.findall(r"['\"]([^'\"]*\.xlsx)['\"]", command)
+                if xlsx_matches:
+                    file_path = Path(xlsx_matches[0]).expanduser()
+                    if file_path.exists():
+                        # Check if file was recently modified (within last 5 seconds)
+                        import time
+                        file_mtime = file_path.stat().st_mtime
+                        if time.time() - file_mtime < 5:
+                            result += f"\n✅ Verified: File {file_path} was modified"
+                        else:
+                            result += f"\n⚠️  Warning: File {file_path} exists but wasn't recently modified. Command may not have saved changes."
+            
            # Truncate very long output
            max_len = 10000
            if len(result) > max_len:
--- a/nanobot/providers/custom_provider.py
+++ b/nanobot/providers/custom_provider.py
@ -24,21 +24,91 @@ class CustomProvider(LLMProvider):
        if tools:
            kwargs.update(tools=tools, tool_choice="auto")
        try:
-            return self._parse(await self._client.chat.completions.create(**kwargs))
+            import asyncio
+            # Add explicit timeout wrapper (longer for Ollama)
+            return self._parse(await asyncio.wait_for(
+                self._client.chat.completions.create(**kwargs),
+                timeout=130.0  # Slightly longer than client timeout (120s)
+            ))
+        except asyncio.TimeoutError:
+            return LLMResponse(content="Error: Request timed out after 130 seconds", finish_reason="error")
        except Exception as e:
            return LLMResponse(content=f"Error: {e}", finish_reason="error")

    def _parse(self, response: Any) -> LLMResponse:
        choice = response.choices[0]
        msg = choice.message
+        
+        # First, try to get structured tool calls
        tool_calls = [
            ToolCallRequest(id=tc.id, name=tc.function.name,
                            arguments=json_repair.loads(tc.function.arguments) if isinstance(tc.function.arguments, str) else tc.function.arguments)
            for tc in (msg.tool_calls or [])
        ]
+        
+        # If no structured tool calls, try to parse from content (Ollama sometimes returns JSON in content)
+        # Only parse if content looks like it contains a tool call JSON (to avoid false positives)
+        content = msg.content or ""
+        if not tool_calls and content and '"name"' in content and '"parameters"' in content:
+            import re
+            # Look for JSON tool call patterns: {"name": "exec", "parameters": {...}}
+            # Find complete JSON objects by matching braces
+            pattern = r'\{\s*"name"\s*:\s*"(\w+)"'
+            start_pos = 0
+            max_iterations = 5  # Safety limit
+            iteration = 0
+            while iteration < max_iterations:
+                iteration += 1
+                match = re.search(pattern, content[start_pos:])
+                if not match:
+                    break
+                
+                json_start = start_pos + match.start()
+                name = match.group(1)
+                
+                # Find the matching closing brace by counting braces
+                brace_count = 0
+                json_end = json_start
+                found_end = False
+                for i, char in enumerate(content[json_start:], json_start):
+                    if char == '{':
+                        brace_count += 1
+                    elif char == '}':
+                        brace_count -= 1
+                        if brace_count == 0:
+                            json_end = i + 1
+                            found_end = True
+                            break
+                
+                if found_end:
+                    # Try to parse the complete JSON object
+                    try:
+                        json_str = content[json_start:json_end]
+                        tool_obj = json_repair.loads(json_str)
+                        # Only accept if it has both name and parameters, and name is a valid tool name
+                        valid_tools = ["exec", "read_file", "write_file", "list_dir", "web_search"]
+                        if (isinstance(tool_obj, dict) and 
+                            "name" in tool_obj and 
+                            "parameters" in tool_obj and
+                            isinstance(tool_obj["name"], str) and
+                            tool_obj["name"] in valid_tools):
+                            tool_calls.append(ToolCallRequest(
+                                id=f"call_{len(tool_calls)}",
+                                name=tool_obj["name"],
+                                arguments=tool_obj["parameters"] if isinstance(tool_obj["parameters"], dict) else {"raw": str(tool_obj["parameters"])}
+                            ))
+                            # Remove the tool call from content
+                            content = content[:json_start] + content[json_end:].strip()
+                            start_pos = json_start  # Stay at same position since we removed text
+                            continue
+                    except Exception:
+                        pass  # If parsing fails, skip this match
+                
+                start_pos = json_start + 1  # Move past this match
+        
        u = response.usage
        return LLMResponse(
-            content=msg.content, tool_calls=tool_calls, finish_reason=choice.finish_reason or "stop",
+            content=content, tool_calls=tool_calls, finish_reason=choice.finish_reason or "stop",
            usage={"prompt_tokens": u.prompt_tokens, "completion_tokens": u.completion_tokens, "total_tokens": u.total_tokens} if u else {},
            reasoning_content=getattr(msg, "reasoning_content", None),
        )