Merge branch 'main' into pr-18

This commit is contained in:
Re-bin 2026-02-03 07:01:05 +00:00
commit 5ea4025b01
14 changed files with 214 additions and 31 deletions

1
.gitignore vendored
View File

@ -3,6 +3,7 @@
*.pyc *.pyc
dist/ dist/
build/ build/
docs/
*.egg-info/ *.egg-info/
*.egg *.egg
*.pyc *.pyc

View File

@ -60,6 +60,12 @@
## 📦 Install ## 📦 Install
**Install with [uv](https://github.com/astral-sh/uv)** (recommended for speed)
```bash
uv tool install nanobot-ai
```
**Install from PyPi** **Install from PyPi**
```bash ```bash
@ -74,6 +80,14 @@ cd nanobot
pip install -e . pip install -e .
``` ```
**Install with uv**
```bash
uv venv
source .venv/bin/activate
uv pip install nanobot-ai
```
## 🚀 Quick Start ## 🚀 Quick Start
> [!TIP] > [!TIP]
@ -331,22 +345,29 @@ nanobot/
└── cli/ # 🖥️ Commands └── cli/ # 🖥️ Commands
``` ```
## 🗺️ Roadmap ## 🤝 Contribute & Roadmap
PRs welcome! The codebase is intentionally small and readable. 🤗
**Roadmap** — Pick an item and [open a PR](https://github.com/HKUDS/nanobot/pulls)!
- [x] **Voice Transcription** — Support for Groq Whisper (Issue #13)
- [ ] **Multi-modal** — See and hear (images, voice, video) - [ ] **Multi-modal** — See and hear (images, voice, video)
- [ ] **Long-term memory** — Never forget important context - [ ] **Long-term memory** — Never forget important context
- [ ] **Better reasoning** — Multi-step planning and reflection - [ ] **Better reasoning** — Multi-step planning and reflection
- [ ] **More integrations** — Discord, Slack, email, calendar - [ ] **More integrations** — Discord, Slack, email, calendar
- [ ] **Self-improvement** — Learn from feedback and mistakes - [ ] **Self-improvement** — Learn from feedback and mistakes
**Want to help?** Pick an item and [open a PR](https://github.com/HKUDS/nanobot/pulls)! ### Contributors
<a href="https://github.com/HKUDS/nanobot/graphs/contributors">
<img src="https://contrib.rocks/image?repo=HKUDS/nanobot" />
</a>
--- ---
## ⭐ Star History ## ⭐ Star History
*Community Growth Trajectory*
<div align="center"> <div align="center">
<a href="https://star-history.com/#HKUDS/nanobot&Date"> <a href="https://star-history.com/#HKUDS/nanobot&Date">
<picture> <picture>
@ -357,12 +378,6 @@ nanobot/
</a> </a>
</div> </div>
---
## 🤝 Contribute
PRs welcome! The codebase is intentionally small and readable. 🤗
<p align="center"> <p align="center">
<em> Thanks for visiting ✨ nanobot!</em><br><br> <em> Thanks for visiting ✨ nanobot!</em><br><br>
<img src="https://visitor-badge.laobi.icu/badge?page_id=HKUDS.nanobot&style=for-the-badge&color=00d4ff" alt="Views"> <img src="https://visitor-badge.laobi.icu/badge?page_id=HKUDS.nanobot&style=for-the-badge&color=00d4ff" alt="Views">

View File

@ -160,6 +160,11 @@ export class WhatsAppClient {
return `[Document] ${message.documentMessage.caption}`; return `[Document] ${message.documentMessage.caption}`;
} }
// Voice/Audio message
if (message.audioMessage) {
return `[Voice Message]`;
}
return null; return null;
} }

View File

@ -1,5 +1,7 @@
"""Context builder for assembling agent prompts.""" """Context builder for assembling agent prompts."""
import base64
import mimetypes
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
@ -114,32 +116,53 @@ When remembering something, write to {workspace_path}/memory/MEMORY.md"""
self, self,
history: list[dict[str, Any]], history: list[dict[str, Any]],
current_message: str, current_message: str,
skill_names: list[str] | None = None skill_names: list[str] | None = None,
media: list[str] | None = None,
) -> list[dict[str, Any]]: ) -> list[dict[str, Any]]:
""" """
Build the complete message list for an LLM call. Build the complete message list for an LLM call.
Args: Args:
history: Previous conversation messages. history: Previous conversation messages.
current_message: The new user message. current_message: The new user message.
skill_names: Optional skills to include. skill_names: Optional skills to include.
media: Optional list of local file paths for images/media.
Returns: Returns:
List of messages including system prompt. List of messages including system prompt.
""" """
messages = [] messages = []
# System prompt # System prompt
system_prompt = self.build_system_prompt(skill_names) system_prompt = self.build_system_prompt(skill_names)
messages.append({"role": "system", "content": system_prompt}) messages.append({"role": "system", "content": system_prompt})
# History # History
messages.extend(history) messages.extend(history)
# Current message # Current message (with optional image attachments)
messages.append({"role": "user", "content": current_message}) user_content = self._build_user_content(current_message, media)
messages.append({"role": "user", "content": user_content})
return messages return messages
def _build_user_content(self, text: str, media: list[str] | None) -> str | list[dict[str, Any]]:
"""Build user message content with optional base64-encoded images."""
if not media:
return text
images = []
for path in media:
p = Path(path)
mime, _ = mimetypes.guess_type(path)
if not p.is_file() or not mime or not mime.startswith("image/"):
continue
b64 = base64.b64encode(p.read_bytes()).decode()
images.append({"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}})
if not images:
return text
return images + [{"type": "text", "text": text}]
def add_tool_result( def add_tool_result(
self, self,

View File

@ -152,7 +152,8 @@ class AgentLoop:
# Build initial messages (use get_history for LLM-formatted messages) # Build initial messages (use get_history for LLM-formatted messages)
messages = self.context.build_messages( messages = self.context.build_messages(
history=session.get_history(), history=session.get_history(),
current_message=msg.content current_message=msg.content,
media=msg.media if msg.media else None,
) )
# Agent loop # Agent loop
@ -189,7 +190,8 @@ class AgentLoop:
# Execute tools # Execute tools
for tool_call in response.tool_calls: for tool_call in response.tool_calls:
logger.debug(f"Executing tool: {tool_call.name}") args_str = json.dumps(tool_call.arguments)
logger.debug(f"Executing tool: {tool_call.name} with arguments: {args_str}")
result = await self.tools.execute(tool_call.name, tool_call.arguments) result = await self.tools.execute(tool_call.name, tool_call.arguments)
messages = self.context.add_tool_result( messages = self.context.add_tool_result(
messages, tool_call.id, tool_call.name, result messages, tool_call.id, tool_call.name, result
@ -281,7 +283,8 @@ class AgentLoop:
) )
for tool_call in response.tool_calls: for tool_call in response.tool_calls:
logger.debug(f"Executing tool: {tool_call.name}") args_str = json.dumps(tool_call.arguments)
logger.debug(f"Executing tool: {tool_call.name} with arguments: {args_str}")
result = await self.tools.execute(tool_call.name, tool_call.arguments) result = await self.tools.execute(tool_call.name, tool_call.arguments)
messages = self.context.add_tool_result( messages = self.context.add_tool_result(
messages, tool_call.id, tool_call.name, result messages, tool_call.id, tool_call.name, result

View File

@ -72,7 +72,14 @@ class BaseChannel(ABC):
if not allow_list: if not allow_list:
return True return True
return str(sender_id) in allow_list sender_str = str(sender_id)
if sender_str in allow_list:
return True
if "|" in sender_str:
for part in sender_str.split("|"):
if part and part in allow_list:
return True
return False
async def _handle_message( async def _handle_message(
self, self,

View File

@ -37,7 +37,9 @@ class ChannelManager:
try: try:
from nanobot.channels.telegram import TelegramChannel from nanobot.channels.telegram import TelegramChannel
self.channels["telegram"] = TelegramChannel( self.channels["telegram"] = TelegramChannel(
self.config.channels.telegram, self.bus self.config.channels.telegram,
self.bus,
groq_api_key=self.config.providers.groq.api_key,
) )
logger.info("Telegram channel enabled") logger.info("Telegram channel enabled")
except ImportError as e: except ImportError as e:

View File

@ -85,9 +85,10 @@ class TelegramChannel(BaseChannel):
name = "telegram" name = "telegram"
def __init__(self, config: TelegramConfig, bus: MessageBus): def __init__(self, config: TelegramConfig, bus: MessageBus, groq_api_key: str = ""):
super().__init__(config, bus) super().__init__(config, bus)
self.config: TelegramConfig = config self.config: TelegramConfig = config
self.groq_api_key = groq_api_key
self._app: Application | None = None self._app: Application | None = None
self._chat_ids: dict[str, int] = {} # Map sender_id to chat_id for replies self._chat_ids: dict[str, int] = {} # Map sender_id to chat_id for replies
@ -199,8 +200,10 @@ class TelegramChannel(BaseChannel):
user = update.effective_user user = update.effective_user
chat_id = message.chat_id chat_id = message.chat_id
# Get sender identifier (prefer username, fallback to user_id) # Use stable numeric ID, but keep username for allowlist compatibility
sender_id = str(user.username or user.id) sender_id = str(user.id)
if user.username:
sender_id = f"{sender_id}|{user.username}"
# Store chat_id for replies # Store chat_id for replies
self._chat_ids[sender_id] = chat_id self._chat_ids[sender_id] = chat_id
@ -247,7 +250,20 @@ class TelegramChannel(BaseChannel):
await file.download_to_drive(str(file_path)) await file.download_to_drive(str(file_path))
media_paths.append(str(file_path)) media_paths.append(str(file_path))
content_parts.append(f"[{media_type}: {file_path}]")
# Handle voice transcription
if media_type == "voice" or media_type == "audio":
from nanobot.providers.transcription import GroqTranscriptionProvider
transcriber = GroqTranscriptionProvider(api_key=self.groq_api_key)
transcription = await transcriber.transcribe(file_path)
if transcription:
logger.info(f"Transcribed {media_type}: {transcription[:50]}...")
content_parts.append(f"[transcription: {transcription}]")
else:
content_parts.append(f"[{media_type}: {file_path}]")
else:
content_parts.append(f"[{media_type}: {file_path}]")
logger.debug(f"Downloaded {media_type} to {file_path}") logger.debug(f"Downloaded {media_type} to {file_path}")
except Exception as e: except Exception as e:
logger.error(f"Failed to download media: {e}") logger.error(f"Failed to download media: {e}")

View File

@ -107,6 +107,11 @@ class WhatsAppChannel(BaseChannel):
# Extract just the phone number as chat_id # Extract just the phone number as chat_id
chat_id = sender.split("@")[0] if "@" in sender else sender chat_id = sender.split("@")[0] if "@" in sender else sender
# Handle voice transcription if it's a voice message
if content == "[Voice Message]":
logger.info(f"Voice message received from {chat_id}, but direct download from bridge is not yet supported.")
content = "[Voice Message: Transcription not available for WhatsApp yet]"
await self._handle_message( await self._handle_message(
sender_id=chat_id, sender_id=chat_id,
chat_id=sender, # Use full JID for replies chat_id=sender, # Use full JID for replies

View File

@ -506,6 +506,7 @@ def cron_add(
at: str = typer.Option(None, "--at", help="Run once at time (ISO format)"), at: str = typer.Option(None, "--at", help="Run once at time (ISO format)"),
deliver: bool = typer.Option(False, "--deliver", "-d", help="Deliver response to channel"), deliver: bool = typer.Option(False, "--deliver", "-d", help="Deliver response to channel"),
to: str = typer.Option(None, "--to", help="Recipient for delivery"), to: str = typer.Option(None, "--to", help="Recipient for delivery"),
channel: str = typer.Option(None, "--channel", help="Channel for delivery (e.g. 'telegram', 'whatsapp')"),
): ):
"""Add a scheduled job.""" """Add a scheduled job."""
from nanobot.config.loader import get_data_dir from nanobot.config.loader import get_data_dir
@ -534,6 +535,7 @@ def cron_add(
message=message, message=message,
deliver=deliver, deliver=deliver,
to=to, to=to,
channel=channel,
) )
console.print(f"[green]✓[/green] Added job '{job.name}' ({job.id})") console.print(f"[green]✓[/green] Added job '{job.name}' ({job.id})")
@ -624,11 +626,13 @@ def status():
has_openrouter = bool(config.providers.openrouter.api_key) has_openrouter = bool(config.providers.openrouter.api_key)
has_anthropic = bool(config.providers.anthropic.api_key) has_anthropic = bool(config.providers.anthropic.api_key)
has_openai = bool(config.providers.openai.api_key) has_openai = bool(config.providers.openai.api_key)
has_gemini = bool(config.providers.gemini.api_key)
has_vllm = bool(config.providers.vllm.api_base) has_vllm = bool(config.providers.vllm.api_base)
console.print(f"OpenRouter API: {'[green]✓[/green]' if has_openrouter else '[dim]not set[/dim]'}") console.print(f"OpenRouter API: {'[green]✓[/green]' if has_openrouter else '[dim]not set[/dim]'}")
console.print(f"Anthropic API: {'[green]✓[/green]' if has_anthropic else '[dim]not set[/dim]'}") console.print(f"Anthropic API: {'[green]✓[/green]' if has_anthropic else '[dim]not set[/dim]'}")
console.print(f"OpenAI API: {'[green]✓[/green]' if has_openai else '[dim]not set[/dim]'}") console.print(f"OpenAI API: {'[green]✓[/green]' if has_openai else '[dim]not set[/dim]'}")
console.print(f"Gemini API: {'[green]✓[/green]' if has_gemini else '[dim]not set[/dim]'}")
vllm_status = f"[green]✓ {config.providers.vllm.api_base}[/green]" if has_vllm else "[dim]not set[/dim]" vllm_status = f"[green]✓ {config.providers.vllm.api_base}[/green]" if has_vllm else "[dim]not set[/dim]"
console.print(f"vLLM/Local: {vllm_status}") console.print(f"vLLM/Local: {vllm_status}")

View File

@ -50,7 +50,10 @@ class ProvidersConfig(BaseModel):
anthropic: ProviderConfig = Field(default_factory=ProviderConfig) anthropic: ProviderConfig = Field(default_factory=ProviderConfig)
openai: ProviderConfig = Field(default_factory=ProviderConfig) openai: ProviderConfig = Field(default_factory=ProviderConfig)
openrouter: ProviderConfig = Field(default_factory=ProviderConfig) openrouter: ProviderConfig = Field(default_factory=ProviderConfig)
groq: ProviderConfig = Field(default_factory=ProviderConfig)
zhipu: ProviderConfig = Field(default_factory=ProviderConfig)
vllm: ProviderConfig = Field(default_factory=ProviderConfig) vllm: ProviderConfig = Field(default_factory=ProviderConfig)
gemini: ProviderConfig = Field(default_factory=ProviderConfig)
class GatewayConfig(BaseModel): class GatewayConfig(BaseModel):
@ -89,19 +92,24 @@ class Config(BaseSettings):
return Path(self.agents.defaults.workspace).expanduser() return Path(self.agents.defaults.workspace).expanduser()
def get_api_key(self) -> str | None: def get_api_key(self) -> str | None:
"""Get API key in priority order: OpenRouter > Anthropic > OpenAI > vLLM.""" """Get API key in priority order: OpenRouter > Anthropic > OpenAI > Gemini > Zhipu > Groq > vLLM."""
return ( return (
self.providers.openrouter.api_key or self.providers.openrouter.api_key or
self.providers.anthropic.api_key or self.providers.anthropic.api_key or
self.providers.openai.api_key or self.providers.openai.api_key or
self.providers.gemini.api_key or
self.providers.zhipu.api_key or
self.providers.groq.api_key or
self.providers.vllm.api_key or self.providers.vllm.api_key or
None None
) )
def get_api_base(self) -> str | None: def get_api_base(self) -> str | None:
"""Get API base URL if using OpenRouter or vLLM.""" """Get API base URL if using OpenRouter, Zhipu or vLLM."""
if self.providers.openrouter.api_key: if self.providers.openrouter.api_key:
return self.providers.openrouter.api_base or "https://openrouter.ai/api/v1" return self.providers.openrouter.api_base or "https://openrouter.ai/api/v1"
if self.providers.zhipu.api_key:
return self.providers.zhipu.api_base
if self.providers.vllm.api_base: if self.providers.vllm.api_base:
return self.providers.vllm.api_base return self.providers.vllm.api_base
return None return None

View File

@ -13,7 +13,7 @@ class LiteLLMProvider(LLMProvider):
""" """
LLM provider using LiteLLM for multi-provider support. LLM provider using LiteLLM for multi-provider support.
Supports OpenRouter, Anthropic, OpenAI, and many other providers through Supports OpenRouter, Anthropic, OpenAI, Gemini, and many other providers through
a unified interface. a unified interface.
""" """
@ -47,6 +47,12 @@ class LiteLLMProvider(LLMProvider):
os.environ.setdefault("ANTHROPIC_API_KEY", api_key) os.environ.setdefault("ANTHROPIC_API_KEY", api_key)
elif "openai" in default_model or "gpt" in default_model: elif "openai" in default_model or "gpt" in default_model:
os.environ.setdefault("OPENAI_API_KEY", api_key) os.environ.setdefault("OPENAI_API_KEY", api_key)
elif "gemini" in default_model.lower():
os.environ.setdefault("GEMINI_API_KEY", api_key)
elif "zhipu" in default_model or "glm" in default_model or "zai" in default_model:
os.environ.setdefault("ZHIPUAI_API_KEY", api_key)
elif "groq" in default_model:
os.environ.setdefault("GROQ_API_KEY", api_key)
if api_base: if api_base:
litellm.api_base = api_base litellm.api_base = api_base
@ -81,11 +87,24 @@ class LiteLLMProvider(LLMProvider):
if self.is_openrouter and not model.startswith("openrouter/"): if self.is_openrouter and not model.startswith("openrouter/"):
model = f"openrouter/{model}" model = f"openrouter/{model}"
# For Zhipu/Z.ai, ensure prefix is present
# Handle cases like "glm-4.7-flash" -> "zhipu/glm-4.7-flash"
if ("glm" in model.lower() or "zhipu" in model.lower()) and not (
model.startswith("zhipu/") or
model.startswith("zai/") or
model.startswith("openrouter/")
):
model = f"zhipu/{model}"
# For vLLM, use hosted_vllm/ prefix per LiteLLM docs # For vLLM, use hosted_vllm/ prefix per LiteLLM docs
# Convert openai/ prefix to hosted_vllm/ if user specified it # Convert openai/ prefix to hosted_vllm/ if user specified it
if self.is_vllm: if self.is_vllm:
model = f"hosted_vllm/{model}" model = f"hosted_vllm/{model}"
# For Gemini, ensure gemini/ prefix if not already present
if "gemini" in model.lower() and not model.startswith("gemini/"):
model = f"gemini/{model}"
kwargs: dict[str, Any] = { kwargs: dict[str, Any] = {
"model": model, "model": model,
"messages": messages, "messages": messages,

View File

@ -0,0 +1,65 @@
"""Voice transcription provider using Groq."""
import os
from pathlib import Path
from typing import Any
import httpx
from loguru import logger
class GroqTranscriptionProvider:
"""
Voice transcription provider using Groq's Whisper API.
Groq offers extremely fast transcription with a generous free tier.
"""
def __init__(self, api_key: str | None = None):
self.api_key = api_key or os.environ.get("GROQ_API_KEY")
self.api_url = "https://api.groq.com/openai/v1/audio/transcriptions"
async def transcribe(self, file_path: str | Path) -> str:
"""
Transcribe an audio file using Groq.
Args:
file_path: Path to the audio file.
Returns:
Transcribed text.
"""
if not self.api_key:
logger.warning("Groq API key not configured for transcription")
return ""
path = Path(file_path)
if not path.exists():
logger.error(f"Audio file not found: {file_path}")
return ""
try:
async with httpx.AsyncClient() as client:
with open(path, "rb") as f:
files = {
"file": (path.name, f),
"model": (None, "whisper-large-v3"),
}
headers = {
"Authorization": f"Bearer {self.api_key}",
}
response = await client.post(
self.api_url,
headers=headers,
files=files,
timeout=60.0
)
response.raise_for_status()
data = response.json()
return data.get("text", "")
except Exception as e:
logger.error(f"Groq transcription error: {e}")
return ""

View File

@ -22,6 +22,16 @@ You have access to:
- Use `memory/` directory for daily notes - Use `memory/` directory for daily notes
- Use `MEMORY.md` for long-term information - Use `MEMORY.md` for long-term information
## Scheduled Reminders
When user asks for a reminder at a specific time, use `exec` to run:
```
nanobot cron add --name "reminder" --message "Your message" --at "YYYY-MM-DDTHH:MM:SS" --deliver --to "USER_ID" --channel "CHANNEL"
```
Get USER_ID and CHANNEL from the current session (e.g., `8281248569` and `telegram` from `telegram:8281248569`).
**Do NOT just write reminders to MEMORY.md** — that won't trigger actual notifications.
## Heartbeat Tasks ## Heartbeat Tasks
`HEARTBEAT.md` is checked every 30 minutes. You can manage periodic tasks by editing this file: `HEARTBEAT.md` is checked every 30 minutes. You can manage periodic tasks by editing this file: