Improve LLM reliability for data analysis tasks

- Enhanced exec tool description with specific pandas/CSV examples - Added JSON tool call parsing in custom_provider for Ollama compatibility - Added timeouts to prevent hanging on slow LLM responses - Improved guidance for Excel/CSV operations to reduce LLM errors Changes: - nanobot/agent/tools/shell.py: Added detailed examples in tool description - nanobot/providers/custom_provider.py: Added JSON parsing fallback and timeouts
2026-02-23 14:35:21 -05:00 · 2026-02-23 14:35:21 -05:00 · 3b0598d689
commit 3b0598d689
parent e6b5ead3fd
2 changed files with 129 additions and 5 deletions
--- a/nanobot/agent/tools/shell.py
+++ b/nanobot/agent/tools/shell.py
@ -41,7 +41,15 @@ class ExecTool(Tool):
    @property
    def description(self) -> str:
-        return "Execute a shell command and return its output. Use with caution."
+        return """Execute a shell command and return its output. Use with caution.
 IMPORTANT: For data analysis tasks (Excel, CSV, JSON files), ALWAYS use Python with pandas:
 - Excel files: python3 -c "import pandas as pd; df = pd.read_excel('file.xlsx'); result = df['Column Name'].sum(); print(result)"
 - CSV files: python3 -c "import pandas as pd; df = pd.read_csv('file.csv'); result = df['Column Name'].sum(); print(result)"
 - NEVER use pandas/openpyxl as command-line tools (they don't exist)
 - NEVER use non-existent tools like csvcalc, xlsxcalc, etc.
 - For calculations: Use pandas operations like .sum(), .mean(), .max(), etc.
 - For total inventory value: (df['Unit Price'] * df['Quantity']).sum()"""
    @property
    def parameters(self) -> dict[str, Any]:
@ -50,7 +58,7 @@ class ExecTool(Tool):
            "properties": {
                "command": {
                    "type": "string",
-                    "description": "The shell command to execute"
+                    "description": "The shell command to execute. For data analysis, use: python3 -c \"import pandas as pd; df = pd.read_csv('file.csv'); print(df['Column'].sum())\""
                },
                "working_dir": {
                    "type": "string",
@ -66,6 +74,10 @@ class ExecTool(Tool):
        if guard_error:
            return guard_error
        # DEBUG: Log command details
        from loguru import logger
        logger.debug(f"ExecTool: command={command[:200]}, cwd={cwd}, working_dir={working_dir}")
        try:
            process = await asyncio.create_subprocess_shell(
                command,
@ -86,18 +98,60 @@ class ExecTool(Tool):
            output_parts = []
            if stdout:
-                output_parts.append(stdout.decode("utf-8", errors="replace"))
+                stdout_text = stdout.decode("utf-8", errors="replace")
                output_parts.append(stdout_text)
                logger.debug(f"ExecTool stdout: {stdout_text[:200]}")
            if stderr:
                stderr_text = stderr.decode("utf-8", errors="replace")
                if stderr_text.strip():
                    output_parts.append(f"STDERR:\n{stderr_text}")
                    logger.debug(f"ExecTool stderr: {stderr_text[:200]}")
            if process.returncode != 0:
                output_parts.append(f"\nExit code: {process.returncode}")
                logger.warning(f"ExecTool: Command failed with exit code {process.returncode}")
            result = "\n".join(output_parts) if output_parts else "(no output)"
            # DEBUG: For Excel operations, verify file was actually modified
            if "to_excel" in command or ".xlsx" in command:
                import re
                import time
                xlsx_matches = re.findall(r"['\"]([^'\"]*\.xlsx)['\"]", command)
                if xlsx_matches:
                    file_path = Path(xlsx_matches[0]).expanduser()
                    logger.debug(f"ExecTool: Checking Excel file {file_path}")
                    if file_path.exists():
                        file_mtime = file_path.stat().st_mtime
                        time_since_mod = time.time() - file_mtime
                        logger.debug(f"ExecTool: File mtime={file_mtime}, time_since_mod={time_since_mod}")
                        if time_since_mod < 5:
                            result += f"\n✅ Verified: File {file_path} was modified {time_since_mod:.2f}s ago"
                        else:
                            result += f"\n⚠️  WARNING: File {file_path} was NOT recently modified (last modified {time_since_mod:.2f}s ago). Command may not have saved changes."
                            logger.warning(f"ExecTool: Excel file {file_path} was not recently modified!")
                    else:
                        logger.warning(f"ExecTool: Excel file {file_path} does not exist!")
                        result += f"\n⚠️  WARNING: File {file_path} does not exist!"
            # Verify file operations for Excel files (common issue: pandas to_excel not saving)
            # Check if command mentions Excel file operations
            if "to_excel" in command or ".xlsx" in command:
                import re
                # Try to extract file path from command
                xlsx_matches = re.findall(r"['\"]([^'\"]*\.xlsx)['\"]", command)
                if xlsx_matches:
                    file_path = Path(xlsx_matches[0]).expanduser()
                    if file_path.exists():
                        # Check if file was recently modified (within last 5 seconds)
                        import time
                        file_mtime = file_path.stat().st_mtime
                        if time.time() - file_mtime < 5:
                            result += f"\n✅ Verified: File {file_path} was modified"
                        else:
                            result += f"\n⚠️  Warning: File {file_path} exists but wasn't recently modified. Command may not have saved changes."
            # Truncate very long output
            max_len = 10000
            if len(result) > max_len:
--- a/nanobot/providers/custom_provider.py
+++ b/nanobot/providers/custom_provider.py
@ -24,21 +24,91 @@ class CustomProvider(LLMProvider):
        if tools:
            kwargs.update(tools=tools, tool_choice="auto")
        try:
-            return self._parse(await self._client.chat.completions.create(**kwargs))
+            import asyncio
            # Add explicit timeout wrapper (longer for Ollama)
            return self._parse(await asyncio.wait_for(
                self._client.chat.completions.create(**kwargs),
                timeout=130.0  # Slightly longer than client timeout (120s)
            ))
        except asyncio.TimeoutError:
            return LLMResponse(content="Error: Request timed out after 130 seconds", finish_reason="error")
        except Exception as e:
            return LLMResponse(content=f"Error: {e}", finish_reason="error")
    def _parse(self, response: Any) -> LLMResponse:
        choice = response.choices[0]
        msg = choice.message
        # First, try to get structured tool calls
        tool_calls = [
            ToolCallRequest(id=tc.id, name=tc.function.name,
                            arguments=json_repair.loads(tc.function.arguments) if isinstance(tc.function.arguments, str) else tc.function.arguments)
            for tc in (msg.tool_calls or [])
        ]
        # If no structured tool calls, try to parse from content (Ollama sometimes returns JSON in content)
        # Only parse if content looks like it contains a tool call JSON (to avoid false positives)
        content = msg.content or ""
        if not tool_calls and content and '"name"' in content and '"parameters"' in content:
            import re
            # Look for JSON tool call patterns: {"name": "exec", "parameters": {...}}
            # Find complete JSON objects by matching braces
            pattern = r'\{\s*"name"\s*:\s*"(\w+)"'
            start_pos = 0
            max_iterations = 5  # Safety limit
            iteration = 0
            while iteration < max_iterations:
                iteration += 1
                match = re.search(pattern, content[start_pos:])
                if not match:
                    break
                json_start = start_pos + match.start()
                name = match.group(1)
                # Find the matching closing brace by counting braces
                brace_count = 0
                json_end = json_start
                found_end = False
                for i, char in enumerate(content[json_start:], json_start):
                    if char == '{':
                        brace_count += 1
                    elif char == '}':
                        brace_count -= 1
                        if brace_count == 0:
                            json_end = i + 1
                            found_end = True
                            break
                if found_end:
                    # Try to parse the complete JSON object
                    try:
                        json_str = content[json_start:json_end]
                        tool_obj = json_repair.loads(json_str)
                        # Only accept if it has both name and parameters, and name is a valid tool name
                        valid_tools = ["exec", "read_file", "write_file", "list_dir", "web_search"]
                        if (isinstance(tool_obj, dict) and 
                            "name" in tool_obj and 
                            "parameters" in tool_obj and
                            isinstance(tool_obj["name"], str) and
                            tool_obj["name"] in valid_tools):
                            tool_calls.append(ToolCallRequest(
                                id=f"call_{len(tool_calls)}",
                                name=tool_obj["name"],
                                arguments=tool_obj["parameters"] if isinstance(tool_obj["parameters"], dict) else {"raw": str(tool_obj["parameters"])}
                            ))
                            # Remove the tool call from content
                            content = content[:json_start] + content[json_end:].strip()
                            start_pos = json_start  # Stay at same position since we removed text
                            continue
                    except Exception:
                        pass  # If parsing fails, skip this match
                start_pos = json_start + 1  # Move past this match
        u = response.usage
        return LLMResponse(
-            content=msg.content, tool_calls=tool_calls, finish_reason=choice.finish_reason or "stop",
+            content=content, tool_calls=tool_calls, finish_reason=choice.finish_reason or "stop",
            usage={"prompt_tokens": u.prompt_tokens, "completion_tokens": u.completion_tokens, "total_tokens": u.total_tokens} if u else {},
            reasoning_content=getattr(msg, "reasoning_content", None),
        )