Improve LLM reliability for data analysis tasks
- Enhanced exec tool description with specific pandas/CSV examples - Added JSON tool call parsing in custom_provider for Ollama compatibility - Added timeouts to prevent hanging on slow LLM responses - Improved guidance for Excel/CSV operations to reduce LLM errors Changes: - nanobot/agent/tools/shell.py: Added detailed examples in tool description - nanobot/providers/custom_provider.py: Added JSON parsing fallback and timeouts
This commit is contained in:
parent
e6b5ead3fd
commit
3b0598d689
@ -41,7 +41,15 @@ class ExecTool(Tool):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def description(self) -> str:
|
def description(self) -> str:
|
||||||
return "Execute a shell command and return its output. Use with caution."
|
return """Execute a shell command and return its output. Use with caution.
|
||||||
|
|
||||||
|
IMPORTANT: For data analysis tasks (Excel, CSV, JSON files), ALWAYS use Python with pandas:
|
||||||
|
- Excel files: python3 -c "import pandas as pd; df = pd.read_excel('file.xlsx'); result = df['Column Name'].sum(); print(result)"
|
||||||
|
- CSV files: python3 -c "import pandas as pd; df = pd.read_csv('file.csv'); result = df['Column Name'].sum(); print(result)"
|
||||||
|
- NEVER use pandas/openpyxl as command-line tools (they don't exist)
|
||||||
|
- NEVER use non-existent tools like csvcalc, xlsxcalc, etc.
|
||||||
|
- For calculations: Use pandas operations like .sum(), .mean(), .max(), etc.
|
||||||
|
- For total inventory value: (df['Unit Price'] * df['Quantity']).sum()"""
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def parameters(self) -> dict[str, Any]:
|
def parameters(self) -> dict[str, Any]:
|
||||||
@ -50,7 +58,7 @@ class ExecTool(Tool):
|
|||||||
"properties": {
|
"properties": {
|
||||||
"command": {
|
"command": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The shell command to execute"
|
"description": "The shell command to execute. For data analysis, use: python3 -c \"import pandas as pd; df = pd.read_csv('file.csv'); print(df['Column'].sum())\""
|
||||||
},
|
},
|
||||||
"working_dir": {
|
"working_dir": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
@ -66,6 +74,10 @@ class ExecTool(Tool):
|
|||||||
if guard_error:
|
if guard_error:
|
||||||
return guard_error
|
return guard_error
|
||||||
|
|
||||||
|
# DEBUG: Log command details
|
||||||
|
from loguru import logger
|
||||||
|
logger.debug(f"ExecTool: command={command[:200]}, cwd={cwd}, working_dir={working_dir}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
process = await asyncio.create_subprocess_shell(
|
process = await asyncio.create_subprocess_shell(
|
||||||
command,
|
command,
|
||||||
@ -86,18 +98,60 @@ class ExecTool(Tool):
|
|||||||
output_parts = []
|
output_parts = []
|
||||||
|
|
||||||
if stdout:
|
if stdout:
|
||||||
output_parts.append(stdout.decode("utf-8", errors="replace"))
|
stdout_text = stdout.decode("utf-8", errors="replace")
|
||||||
|
output_parts.append(stdout_text)
|
||||||
|
logger.debug(f"ExecTool stdout: {stdout_text[:200]}")
|
||||||
|
|
||||||
if stderr:
|
if stderr:
|
||||||
stderr_text = stderr.decode("utf-8", errors="replace")
|
stderr_text = stderr.decode("utf-8", errors="replace")
|
||||||
if stderr_text.strip():
|
if stderr_text.strip():
|
||||||
output_parts.append(f"STDERR:\n{stderr_text}")
|
output_parts.append(f"STDERR:\n{stderr_text}")
|
||||||
|
logger.debug(f"ExecTool stderr: {stderr_text[:200]}")
|
||||||
|
|
||||||
if process.returncode != 0:
|
if process.returncode != 0:
|
||||||
output_parts.append(f"\nExit code: {process.returncode}")
|
output_parts.append(f"\nExit code: {process.returncode}")
|
||||||
|
logger.warning(f"ExecTool: Command failed with exit code {process.returncode}")
|
||||||
|
|
||||||
result = "\n".join(output_parts) if output_parts else "(no output)"
|
result = "\n".join(output_parts) if output_parts else "(no output)"
|
||||||
|
|
||||||
|
# DEBUG: For Excel operations, verify file was actually modified
|
||||||
|
if "to_excel" in command or ".xlsx" in command:
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
xlsx_matches = re.findall(r"['\"]([^'\"]*\.xlsx)['\"]", command)
|
||||||
|
if xlsx_matches:
|
||||||
|
file_path = Path(xlsx_matches[0]).expanduser()
|
||||||
|
logger.debug(f"ExecTool: Checking Excel file {file_path}")
|
||||||
|
if file_path.exists():
|
||||||
|
file_mtime = file_path.stat().st_mtime
|
||||||
|
time_since_mod = time.time() - file_mtime
|
||||||
|
logger.debug(f"ExecTool: File mtime={file_mtime}, time_since_mod={time_since_mod}")
|
||||||
|
if time_since_mod < 5:
|
||||||
|
result += f"\n✅ Verified: File {file_path} was modified {time_since_mod:.2f}s ago"
|
||||||
|
else:
|
||||||
|
result += f"\n⚠️ WARNING: File {file_path} was NOT recently modified (last modified {time_since_mod:.2f}s ago). Command may not have saved changes."
|
||||||
|
logger.warning(f"ExecTool: Excel file {file_path} was not recently modified!")
|
||||||
|
else:
|
||||||
|
logger.warning(f"ExecTool: Excel file {file_path} does not exist!")
|
||||||
|
result += f"\n⚠️ WARNING: File {file_path} does not exist!"
|
||||||
|
|
||||||
|
# Verify file operations for Excel files (common issue: pandas to_excel not saving)
|
||||||
|
# Check if command mentions Excel file operations
|
||||||
|
if "to_excel" in command or ".xlsx" in command:
|
||||||
|
import re
|
||||||
|
# Try to extract file path from command
|
||||||
|
xlsx_matches = re.findall(r"['\"]([^'\"]*\.xlsx)['\"]", command)
|
||||||
|
if xlsx_matches:
|
||||||
|
file_path = Path(xlsx_matches[0]).expanduser()
|
||||||
|
if file_path.exists():
|
||||||
|
# Check if file was recently modified (within last 5 seconds)
|
||||||
|
import time
|
||||||
|
file_mtime = file_path.stat().st_mtime
|
||||||
|
if time.time() - file_mtime < 5:
|
||||||
|
result += f"\n✅ Verified: File {file_path} was modified"
|
||||||
|
else:
|
||||||
|
result += f"\n⚠️ Warning: File {file_path} exists but wasn't recently modified. Command may not have saved changes."
|
||||||
|
|
||||||
# Truncate very long output
|
# Truncate very long output
|
||||||
max_len = 10000
|
max_len = 10000
|
||||||
if len(result) > max_len:
|
if len(result) > max_len:
|
||||||
|
|||||||
@ -24,21 +24,91 @@ class CustomProvider(LLMProvider):
|
|||||||
if tools:
|
if tools:
|
||||||
kwargs.update(tools=tools, tool_choice="auto")
|
kwargs.update(tools=tools, tool_choice="auto")
|
||||||
try:
|
try:
|
||||||
return self._parse(await self._client.chat.completions.create(**kwargs))
|
import asyncio
|
||||||
|
# Add explicit timeout wrapper (longer for Ollama)
|
||||||
|
return self._parse(await asyncio.wait_for(
|
||||||
|
self._client.chat.completions.create(**kwargs),
|
||||||
|
timeout=130.0 # Slightly longer than client timeout (120s)
|
||||||
|
))
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
return LLMResponse(content="Error: Request timed out after 130 seconds", finish_reason="error")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return LLMResponse(content=f"Error: {e}", finish_reason="error")
|
return LLMResponse(content=f"Error: {e}", finish_reason="error")
|
||||||
|
|
||||||
def _parse(self, response: Any) -> LLMResponse:
|
def _parse(self, response: Any) -> LLMResponse:
|
||||||
choice = response.choices[0]
|
choice = response.choices[0]
|
||||||
msg = choice.message
|
msg = choice.message
|
||||||
|
|
||||||
|
# First, try to get structured tool calls
|
||||||
tool_calls = [
|
tool_calls = [
|
||||||
ToolCallRequest(id=tc.id, name=tc.function.name,
|
ToolCallRequest(id=tc.id, name=tc.function.name,
|
||||||
arguments=json_repair.loads(tc.function.arguments) if isinstance(tc.function.arguments, str) else tc.function.arguments)
|
arguments=json_repair.loads(tc.function.arguments) if isinstance(tc.function.arguments, str) else tc.function.arguments)
|
||||||
for tc in (msg.tool_calls or [])
|
for tc in (msg.tool_calls or [])
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# If no structured tool calls, try to parse from content (Ollama sometimes returns JSON in content)
|
||||||
|
# Only parse if content looks like it contains a tool call JSON (to avoid false positives)
|
||||||
|
content = msg.content or ""
|
||||||
|
if not tool_calls and content and '"name"' in content and '"parameters"' in content:
|
||||||
|
import re
|
||||||
|
# Look for JSON tool call patterns: {"name": "exec", "parameters": {...}}
|
||||||
|
# Find complete JSON objects by matching braces
|
||||||
|
pattern = r'\{\s*"name"\s*:\s*"(\w+)"'
|
||||||
|
start_pos = 0
|
||||||
|
max_iterations = 5 # Safety limit
|
||||||
|
iteration = 0
|
||||||
|
while iteration < max_iterations:
|
||||||
|
iteration += 1
|
||||||
|
match = re.search(pattern, content[start_pos:])
|
||||||
|
if not match:
|
||||||
|
break
|
||||||
|
|
||||||
|
json_start = start_pos + match.start()
|
||||||
|
name = match.group(1)
|
||||||
|
|
||||||
|
# Find the matching closing brace by counting braces
|
||||||
|
brace_count = 0
|
||||||
|
json_end = json_start
|
||||||
|
found_end = False
|
||||||
|
for i, char in enumerate(content[json_start:], json_start):
|
||||||
|
if char == '{':
|
||||||
|
brace_count += 1
|
||||||
|
elif char == '}':
|
||||||
|
brace_count -= 1
|
||||||
|
if brace_count == 0:
|
||||||
|
json_end = i + 1
|
||||||
|
found_end = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if found_end:
|
||||||
|
# Try to parse the complete JSON object
|
||||||
|
try:
|
||||||
|
json_str = content[json_start:json_end]
|
||||||
|
tool_obj = json_repair.loads(json_str)
|
||||||
|
# Only accept if it has both name and parameters, and name is a valid tool name
|
||||||
|
valid_tools = ["exec", "read_file", "write_file", "list_dir", "web_search"]
|
||||||
|
if (isinstance(tool_obj, dict) and
|
||||||
|
"name" in tool_obj and
|
||||||
|
"parameters" in tool_obj and
|
||||||
|
isinstance(tool_obj["name"], str) and
|
||||||
|
tool_obj["name"] in valid_tools):
|
||||||
|
tool_calls.append(ToolCallRequest(
|
||||||
|
id=f"call_{len(tool_calls)}",
|
||||||
|
name=tool_obj["name"],
|
||||||
|
arguments=tool_obj["parameters"] if isinstance(tool_obj["parameters"], dict) else {"raw": str(tool_obj["parameters"])}
|
||||||
|
))
|
||||||
|
# Remove the tool call from content
|
||||||
|
content = content[:json_start] + content[json_end:].strip()
|
||||||
|
start_pos = json_start # Stay at same position since we removed text
|
||||||
|
continue
|
||||||
|
except Exception:
|
||||||
|
pass # If parsing fails, skip this match
|
||||||
|
|
||||||
|
start_pos = json_start + 1 # Move past this match
|
||||||
|
|
||||||
u = response.usage
|
u = response.usage
|
||||||
return LLMResponse(
|
return LLMResponse(
|
||||||
content=msg.content, tool_calls=tool_calls, finish_reason=choice.finish_reason or "stop",
|
content=content, tool_calls=tool_calls, finish_reason=choice.finish_reason or "stop",
|
||||||
usage={"prompt_tokens": u.prompt_tokens, "completion_tokens": u.completion_tokens, "total_tokens": u.total_tokens} if u else {},
|
usage={"prompt_tokens": u.prompt_tokens, "completion_tokens": u.completion_tokens, "total_tokens": u.total_tokens} if u else {},
|
||||||
reasoning_content=getattr(msg, "reasoning_content", None),
|
reasoning_content=getattr(msg, "reasoning_content", None),
|
||||||
)
|
)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user