Improve LLM reliability for data analysis tasks

- Enhanced exec tool description with specific pandas/CSV examples
- Added JSON tool call parsing in custom_provider for Ollama compatibility
- Added timeouts to prevent hanging on slow LLM responses
- Improved guidance for Excel/CSV operations to reduce LLM errors

Changes:
- nanobot/agent/tools/shell.py: Added detailed examples in tool description
- nanobot/providers/custom_provider.py: Added JSON parsing fallback and timeouts
This commit is contained in:
tanyar09 2026-02-23 14:35:21 -05:00
parent e6b5ead3fd
commit 3b0598d689
2 changed files with 129 additions and 5 deletions

View File

@ -41,7 +41,15 @@ class ExecTool(Tool):
@property
def description(self) -> str:
return "Execute a shell command and return its output. Use with caution."
return """Execute a shell command and return its output. Use with caution.
IMPORTANT: For data analysis tasks (Excel, CSV, JSON files), ALWAYS use Python with pandas:
- Excel files: python3 -c "import pandas as pd; df = pd.read_excel('file.xlsx'); result = df['Column Name'].sum(); print(result)"
- CSV files: python3 -c "import pandas as pd; df = pd.read_csv('file.csv'); result = df['Column Name'].sum(); print(result)"
- NEVER use pandas/openpyxl as command-line tools (they don't exist)
- NEVER use non-existent tools like csvcalc, xlsxcalc, etc.
- For calculations: Use pandas operations like .sum(), .mean(), .max(), etc.
- For total inventory value: (df['Unit Price'] * df['Quantity']).sum()"""
@property
def parameters(self) -> dict[str, Any]:
@ -50,7 +58,7 @@ class ExecTool(Tool):
"properties": {
"command": {
"type": "string",
"description": "The shell command to execute"
"description": "The shell command to execute. For data analysis, use: python3 -c \"import pandas as pd; df = pd.read_csv('file.csv'); print(df['Column'].sum())\""
},
"working_dir": {
"type": "string",
@ -66,6 +74,10 @@ class ExecTool(Tool):
if guard_error:
return guard_error
# DEBUG: Log command details
from loguru import logger
logger.debug(f"ExecTool: command={command[:200]}, cwd={cwd}, working_dir={working_dir}")
try:
process = await asyncio.create_subprocess_shell(
command,
@ -86,18 +98,60 @@ class ExecTool(Tool):
output_parts = []
if stdout:
output_parts.append(stdout.decode("utf-8", errors="replace"))
stdout_text = stdout.decode("utf-8", errors="replace")
output_parts.append(stdout_text)
logger.debug(f"ExecTool stdout: {stdout_text[:200]}")
if stderr:
stderr_text = stderr.decode("utf-8", errors="replace")
if stderr_text.strip():
output_parts.append(f"STDERR:\n{stderr_text}")
logger.debug(f"ExecTool stderr: {stderr_text[:200]}")
if process.returncode != 0:
output_parts.append(f"\nExit code: {process.returncode}")
logger.warning(f"ExecTool: Command failed with exit code {process.returncode}")
result = "\n".join(output_parts) if output_parts else "(no output)"
# DEBUG: For Excel operations, verify file was actually modified
if "to_excel" in command or ".xlsx" in command:
import re
import time
xlsx_matches = re.findall(r"['\"]([^'\"]*\.xlsx)['\"]", command)
if xlsx_matches:
file_path = Path(xlsx_matches[0]).expanduser()
logger.debug(f"ExecTool: Checking Excel file {file_path}")
if file_path.exists():
file_mtime = file_path.stat().st_mtime
time_since_mod = time.time() - file_mtime
logger.debug(f"ExecTool: File mtime={file_mtime}, time_since_mod={time_since_mod}")
if time_since_mod < 5:
result += f"\n✅ Verified: File {file_path} was modified {time_since_mod:.2f}s ago"
else:
result += f"\n⚠️ WARNING: File {file_path} was NOT recently modified (last modified {time_since_mod:.2f}s ago). Command may not have saved changes."
logger.warning(f"ExecTool: Excel file {file_path} was not recently modified!")
else:
logger.warning(f"ExecTool: Excel file {file_path} does not exist!")
result += f"\n⚠️ WARNING: File {file_path} does not exist!"
# Verify file operations for Excel files (common issue: pandas to_excel not saving)
# Check if command mentions Excel file operations
if "to_excel" in command or ".xlsx" in command:
import re
# Try to extract file path from command
xlsx_matches = re.findall(r"['\"]([^'\"]*\.xlsx)['\"]", command)
if xlsx_matches:
file_path = Path(xlsx_matches[0]).expanduser()
if file_path.exists():
# Check if file was recently modified (within last 5 seconds)
import time
file_mtime = file_path.stat().st_mtime
if time.time() - file_mtime < 5:
result += f"\n✅ Verified: File {file_path} was modified"
else:
result += f"\n⚠️ Warning: File {file_path} exists but wasn't recently modified. Command may not have saved changes."
# Truncate very long output
max_len = 10000
if len(result) > max_len:

View File

@ -24,21 +24,91 @@ class CustomProvider(LLMProvider):
if tools:
kwargs.update(tools=tools, tool_choice="auto")
try:
return self._parse(await self._client.chat.completions.create(**kwargs))
import asyncio
# Add explicit timeout wrapper (longer for Ollama)
return self._parse(await asyncio.wait_for(
self._client.chat.completions.create(**kwargs),
timeout=130.0 # Slightly longer than client timeout (120s)
))
except asyncio.TimeoutError:
return LLMResponse(content="Error: Request timed out after 130 seconds", finish_reason="error")
except Exception as e:
return LLMResponse(content=f"Error: {e}", finish_reason="error")
def _parse(self, response: Any) -> LLMResponse:
choice = response.choices[0]
msg = choice.message
# First, try to get structured tool calls
tool_calls = [
ToolCallRequest(id=tc.id, name=tc.function.name,
arguments=json_repair.loads(tc.function.arguments) if isinstance(tc.function.arguments, str) else tc.function.arguments)
for tc in (msg.tool_calls or [])
]
# If no structured tool calls, try to parse from content (Ollama sometimes returns JSON in content)
# Only parse if content looks like it contains a tool call JSON (to avoid false positives)
content = msg.content or ""
if not tool_calls and content and '"name"' in content and '"parameters"' in content:
import re
# Look for JSON tool call patterns: {"name": "exec", "parameters": {...}}
# Find complete JSON objects by matching braces
pattern = r'\{\s*"name"\s*:\s*"(\w+)"'
start_pos = 0
max_iterations = 5 # Safety limit
iteration = 0
while iteration < max_iterations:
iteration += 1
match = re.search(pattern, content[start_pos:])
if not match:
break
json_start = start_pos + match.start()
name = match.group(1)
# Find the matching closing brace by counting braces
brace_count = 0
json_end = json_start
found_end = False
for i, char in enumerate(content[json_start:], json_start):
if char == '{':
brace_count += 1
elif char == '}':
brace_count -= 1
if brace_count == 0:
json_end = i + 1
found_end = True
break
if found_end:
# Try to parse the complete JSON object
try:
json_str = content[json_start:json_end]
tool_obj = json_repair.loads(json_str)
# Only accept if it has both name and parameters, and name is a valid tool name
valid_tools = ["exec", "read_file", "write_file", "list_dir", "web_search"]
if (isinstance(tool_obj, dict) and
"name" in tool_obj and
"parameters" in tool_obj and
isinstance(tool_obj["name"], str) and
tool_obj["name"] in valid_tools):
tool_calls.append(ToolCallRequest(
id=f"call_{len(tool_calls)}",
name=tool_obj["name"],
arguments=tool_obj["parameters"] if isinstance(tool_obj["parameters"], dict) else {"raw": str(tool_obj["parameters"])}
))
# Remove the tool call from content
content = content[:json_start] + content[json_end:].strip()
start_pos = json_start # Stay at same position since we removed text
continue
except Exception:
pass # If parsing fails, skip this match
start_pos = json_start + 1 # Move past this match
u = response.usage
return LLMResponse(
content=msg.content, tool_calls=tool_calls, finish_reason=choice.finish_reason or "stop",
content=content, tool_calls=tool_calls, finish_reason=choice.finish_reason or "stop",
usage={"prompt_tokens": u.prompt_tokens, "completion_tokens": u.completion_tokens, "total_tokens": u.total_tokens} if u else {},
reasoning_content=getattr(msg, "reasoning_content", None),
)