Merge branch 'main' into pr-23

This commit is contained in:
Re-bin 2026-02-03 17:21:27 +00:00
commit a4026a8bf7
18 changed files with 404 additions and 72 deletions

13
.dockerignore Normal file
View File

@ -0,0 +1,13 @@
__pycache__
*.pyc
*.pyo
*.pyd
*.egg-info
dist/
build/
.git
.env
.assets
node_modules/
bridge/dist/
workspace/

1
.gitignore vendored
View File

@ -3,6 +3,7 @@
*.pyc *.pyc
dist/ dist/
build/ build/
docs/
*.egg-info/ *.egg-info/
*.egg *.egg
*.pyc *.pyc

40
Dockerfile Normal file
View File

@ -0,0 +1,40 @@
FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim
# Install Node.js 20 for the WhatsApp bridge
RUN apt-get update && \
apt-get install -y --no-install-recommends curl ca-certificates gnupg git && \
mkdir -p /etc/apt/keyrings && \
curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \
echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" > /etc/apt/sources.list.d/nodesource.list && \
apt-get update && \
apt-get install -y --no-install-recommends nodejs && \
apt-get purge -y gnupg && \
apt-get autoremove -y && \
rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Install Python dependencies first (cached layer)
COPY pyproject.toml README.md LICENSE ./
RUN mkdir -p nanobot bridge && touch nanobot/__init__.py && \
uv pip install --system --no-cache . && \
rm -rf nanobot bridge
# Copy the full source and install
COPY nanobot/ nanobot/
COPY bridge/ bridge/
RUN uv pip install --system --no-cache .
# Build the WhatsApp bridge
WORKDIR /app/bridge
RUN npm install && npm run build
WORKDIR /app
# Create config directory
RUN mkdir -p /root/.nanobot
# Gateway default port
EXPOSE 18790
ENTRYPOINT ["nanobot"]
CMD ["status"]

View File

@ -8,6 +8,7 @@
<img src="https://img.shields.io/badge/license-MIT-green" alt="License"> <img src="https://img.shields.io/badge/license-MIT-green" alt="License">
<a href="./COMMUNICATION.md"><img src="https://img.shields.io/badge/Feishu-Group-E9DBFC?style=flat&logo=feishu&logoColor=white" alt="Feishu"></a> <a href="./COMMUNICATION.md"><img src="https://img.shields.io/badge/Feishu-Group-E9DBFC?style=flat&logo=feishu&logoColor=white" alt="Feishu"></a>
<a href="./COMMUNICATION.md"><img src="https://img.shields.io/badge/WeChat-Group-C5EAB4?style=flat&logo=wechat&logoColor=white" alt="WeChat"></a> <a href="./COMMUNICATION.md"><img src="https://img.shields.io/badge/WeChat-Group-C5EAB4?style=flat&logo=wechat&logoColor=white" alt="WeChat"></a>
<a href="https://discord.gg/MnCvHqpUGB"><img src="https://img.shields.io/badge/Discord-Community-5865F2?style=flat&logo=discord&logoColor=white" alt="Discord"></a>
</p> </p>
</div> </div>
@ -17,7 +18,7 @@
## 📢 News ## 📢 News
- **2025-02-01** 🎉 nanobot launched! Welcome to try 🐈 nanobot! - **2026-02-01** 🎉 nanobot launched! Welcome to try 🐈 nanobot!
## Key Features of nanobot: ## Key Features of nanobot:
@ -60,13 +61,7 @@
## 📦 Install ## 📦 Install
**Install from PyPi** **Install from source** (latest features, recommended for development)
```bash
pip install nanobot-ai
```
**Install from source** (recommended for development)
```bash ```bash
git clone https://github.com/HKUDS/nanobot.git git clone https://github.com/HKUDS/nanobot.git
@ -74,6 +69,18 @@ cd nanobot
pip install -e . pip install -e .
``` ```
**Install with [uv](https://github.com/astral-sh/uv)** (stable, fast)
```bash
uv tool install nanobot-ai
```
**Install from PyPI** (stable)
```bash
pip install nanobot-ai
```
## 🚀 Quick Start ## 🚀 Quick Start
> [!TIP] > [!TIP]
@ -233,6 +240,22 @@ nanobot gateway
## ⚙️ Configuration ## ⚙️ Configuration
Config file: `~/.nanobot/config.json`
### Providers
> [!NOTE]
> Groq provides free voice transcription via Whisper. If configured, Telegram voice messages will be automatically transcribed.
| Provider | Purpose | Get API Key |
|----------|---------|-------------|
| `openrouter` | LLM (recommended, access to all models) | [openrouter.ai](https://openrouter.ai) |
| `anthropic` | LLM (Claude direct) | [console.anthropic.com](https://console.anthropic.com) |
| `openai` | LLM (GPT direct) | [platform.openai.com](https://platform.openai.com) |
| `groq` | LLM + **Voice transcription** (Whisper) | [console.groq.com](https://console.groq.com) |
| `gemini` | LLM (Gemini direct) | [aistudio.google.com](https://aistudio.google.com) |
<details> <details>
<summary><b>Full config example</b></summary> <summary><b>Full config example</b></summary>
@ -246,6 +269,9 @@ nanobot gateway
"providers": { "providers": {
"openrouter": { "openrouter": {
"apiKey": "sk-or-v1-xxx" "apiKey": "sk-or-v1-xxx"
},
"groq": {
"apiKey": "gsk_xxx"
} }
}, },
"channels": { "channels": {
@ -299,6 +325,31 @@ nanobot cron remove <job_id>
</details> </details>
## 🐳 Docker
> [!TIP]
> The `-v ~/.nanobot:/root/.nanobot` flag mounts your local config directory into the container, so your config and workspace persist across container restarts.
Build and run nanobot in a container:
```bash
# Build the image
docker build -t nanobot .
# Initialize config (first time only)
docker run -v ~/.nanobot:/root/.nanobot --rm nanobot onboard
# Edit config on host to add API keys
vim ~/.nanobot/config.json
# Run gateway (connects to Telegram/WhatsApp)
docker run -v ~/.nanobot:/root/.nanobot -p 18790:18790 nanobot gateway
# Or run a single command
docker run -v ~/.nanobot:/root/.nanobot --rm nanobot agent -m "Hello!"
docker run -v ~/.nanobot:/root/.nanobot --rm nanobot status
```
## 📁 Project Structure ## 📁 Project Structure
``` ```
@ -321,22 +372,28 @@ nanobot/
└── cli/ # 🖥️ Commands └── cli/ # 🖥️ Commands
``` ```
## 🗺️ Roadmap ## 🤝 Contribute & Roadmap
PRs welcome! The codebase is intentionally small and readable. 🤗
**Roadmap** — Pick an item and [open a PR](https://github.com/HKUDS/nanobot/pulls)!
- [x] **Voice Transcription** — Support for Groq Whisper (Issue #13)
- [ ] **Multi-modal** — See and hear (images, voice, video) - [ ] **Multi-modal** — See and hear (images, voice, video)
- [ ] **Long-term memory** — Never forget important context - [ ] **Long-term memory** — Never forget important context
- [ ] **Better reasoning** — Multi-step planning and reflection - [ ] **Better reasoning** — Multi-step planning and reflection
- [ ] **More integrations** — Discord, Slack, email, calendar - [ ] **More integrations** — Discord, Slack, email, calendar
- [ ] **Self-improvement** — Learn from feedback and mistakes - [ ] **Self-improvement** — Learn from feedback and mistakes
**Want to help?** Pick an item and [open a PR](https://github.com/HKUDS/nanobot/pulls)! ### Contributors
<a href="https://github.com/HKUDS/nanobot/graphs/contributors">
<img src="https://contrib.rocks/image?repo=HKUDS/nanobot" />
</a>
---
## ⭐ Star History ## ⭐ Star History
*Community Growth Trajectory*
<div align="center"> <div align="center">
<a href="https://star-history.com/#HKUDS/nanobot&Date"> <a href="https://star-history.com/#HKUDS/nanobot&Date">
<picture> <picture>
@ -347,13 +404,12 @@ nanobot/
</a> </a>
</div> </div>
---
## 🤝 Contribute
PRs welcome! The codebase is intentionally small and readable. 🤗
<p align="center"> <p align="center">
<em> Thanks for visiting ✨ nanobot!</em><br><br> <em> Thanks for visiting ✨ nanobot!</em><br><br>
<img src="https://visitor-badge.laobi.icu/badge?page_id=HKUDS.nanobot&style=for-the-badge&color=00d4ff" alt="Views"> <img src="https://visitor-badge.laobi.icu/badge?page_id=HKUDS.nanobot&style=for-the-badge&color=00d4ff" alt="Views">
</p> </p>
<p align="center">
<sub>nanobot is for educational, research, and technical exchange purposes only</sub>
</p>

View File

@ -160,6 +160,11 @@ export class WhatsAppClient {
return `[Document] ${message.documentMessage.caption}`; return `[Document] ${message.documentMessage.caption}`;
} }
// Voice/Audio message
if (message.audioMessage) {
return `[Voice Message]`;
}
return null; return null;
} }

View File

@ -1,5 +1,7 @@
"""Context builder for assembling agent prompts.""" """Context builder for assembling agent prompts."""
import base64
import mimetypes
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
@ -114,7 +116,8 @@ When remembering something, write to {workspace_path}/memory/MEMORY.md"""
self, self,
history: list[dict[str, Any]], history: list[dict[str, Any]],
current_message: str, current_message: str,
skill_names: list[str] | None = None skill_names: list[str] | None = None,
media: list[str] | None = None,
) -> list[dict[str, Any]]: ) -> list[dict[str, Any]]:
""" """
Build the complete message list for an LLM call. Build the complete message list for an LLM call.
@ -123,6 +126,7 @@ When remembering something, write to {workspace_path}/memory/MEMORY.md"""
history: Previous conversation messages. history: Previous conversation messages.
current_message: The new user message. current_message: The new user message.
skill_names: Optional skills to include. skill_names: Optional skills to include.
media: Optional list of local file paths for images/media.
Returns: Returns:
List of messages including system prompt. List of messages including system prompt.
@ -136,11 +140,30 @@ When remembering something, write to {workspace_path}/memory/MEMORY.md"""
# History # History
messages.extend(history) messages.extend(history)
# Current message # Current message (with optional image attachments)
messages.append({"role": "user", "content": current_message}) user_content = self._build_user_content(current_message, media)
messages.append({"role": "user", "content": user_content})
return messages return messages
def _build_user_content(self, text: str, media: list[str] | None) -> str | list[dict[str, Any]]:
"""Build user message content with optional base64-encoded images."""
if not media:
return text
images = []
for path in media:
p = Path(path)
mime, _ = mimetypes.guess_type(path)
if not p.is_file() or not mime or not mime.startswith("image/"):
continue
b64 = base64.b64encode(p.read_bytes()).decode()
images.append({"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}})
if not images:
return text
return images + [{"type": "text", "text": text}]
def add_tool_result( def add_tool_result(
self, self,
messages: list[dict[str, Any]], messages: list[dict[str, Any]],

View File

@ -152,7 +152,8 @@ class AgentLoop:
# Build initial messages (use get_history for LLM-formatted messages) # Build initial messages (use get_history for LLM-formatted messages)
messages = self.context.build_messages( messages = self.context.build_messages(
history=session.get_history(), history=session.get_history(),
current_message=msg.content current_message=msg.content,
media=msg.media if msg.media else None,
) )
# Agent loop # Agent loop
@ -189,7 +190,8 @@ class AgentLoop:
# Execute tools # Execute tools
for tool_call in response.tool_calls: for tool_call in response.tool_calls:
logger.debug(f"Executing tool: {tool_call.name}") args_str = json.dumps(tool_call.arguments)
logger.debug(f"Executing tool: {tool_call.name} with arguments: {args_str}")
result = await self.tools.execute(tool_call.name, tool_call.arguments) result = await self.tools.execute(tool_call.name, tool_call.arguments)
messages = self.context.add_tool_result( messages = self.context.add_tool_result(
messages, tool_call.id, tool_call.name, result messages, tool_call.id, tool_call.name, result
@ -281,7 +283,8 @@ class AgentLoop:
) )
for tool_call in response.tool_calls: for tool_call in response.tool_calls:
logger.debug(f"Executing tool: {tool_call.name}") args_str = json.dumps(tool_call.arguments)
logger.debug(f"Executing tool: {tool_call.name} with arguments: {args_str}")
result = await self.tools.execute(tool_call.name, tool_call.arguments) result = await self.tools.execute(tool_call.name, tool_call.arguments)
messages = self.context.add_tool_result( messages = self.context.add_tool_result(
messages, tool_call.id, tool_call.name, result messages, tool_call.id, tool_call.name, result

View File

@ -31,30 +31,16 @@ def _normalize(text: str) -> str:
def _validate_url(url: str) -> tuple[bool, str]: def _validate_url(url: str) -> tuple[bool, str]:
""" """Validate URL: must be http(s) with valid domain."""
Validate URL for security.
Returns:
(is_valid, error_message): Tuple of validation result and error message if invalid.
"""
try: try:
parsed = urlparse(url) p = urlparse(url)
if p.scheme not in ('http', 'https'):
# Check if scheme exists return False, f"Only http/https allowed, got '{p.scheme or 'none'}'"
if not parsed.scheme: if not p.netloc:
return False, "URL must include a scheme (http:// or https://)" return False, "Missing domain"
# Only allow http and https schemes
if parsed.scheme.lower() not in ('http', 'https'):
return False, f"Invalid URL scheme '{parsed.scheme}'. Only http:// and https:// are allowed"
# Check if netloc (domain) exists
if not parsed.netloc:
return False, "URL must include a valid domain"
return True, "" return True, ""
except Exception as e: except Exception as e:
return False, f"Invalid URL format: {str(e)}" return False, str(e)
class WebSearchTool(Tool): class WebSearchTool(Tool):

View File

@ -72,7 +72,14 @@ class BaseChannel(ABC):
if not allow_list: if not allow_list:
return True return True
return str(sender_id) in allow_list sender_str = str(sender_id)
if sender_str in allow_list:
return True
if "|" in sender_str:
for part in sender_str.split("|"):
if part and part in allow_list:
return True
return False
async def _handle_message( async def _handle_message(
self, self,

View File

@ -37,7 +37,9 @@ class ChannelManager:
try: try:
from nanobot.channels.telegram import TelegramChannel from nanobot.channels.telegram import TelegramChannel
self.channels["telegram"] = TelegramChannel( self.channels["telegram"] = TelegramChannel(
self.config.channels.telegram, self.bus self.config.channels.telegram,
self.bus,
groq_api_key=self.config.providers.groq.api_key,
) )
logger.info("Telegram channel enabled") logger.info("Telegram channel enabled")
except ImportError as e: except ImportError as e:

View File

@ -85,9 +85,10 @@ class TelegramChannel(BaseChannel):
name = "telegram" name = "telegram"
def __init__(self, config: TelegramConfig, bus: MessageBus): def __init__(self, config: TelegramConfig, bus: MessageBus, groq_api_key: str = ""):
super().__init__(config, bus) super().__init__(config, bus)
self.config: TelegramConfig = config self.config: TelegramConfig = config
self.groq_api_key = groq_api_key
self._app: Application | None = None self._app: Application | None = None
self._chat_ids: dict[str, int] = {} # Map sender_id to chat_id for replies self._chat_ids: dict[str, int] = {} # Map sender_id to chat_id for replies
@ -199,8 +200,10 @@ class TelegramChannel(BaseChannel):
user = update.effective_user user = update.effective_user
chat_id = message.chat_id chat_id = message.chat_id
# Get sender identifier (prefer username, fallback to user_id) # Use stable numeric ID, but keep username for allowlist compatibility
sender_id = str(user.username or user.id) sender_id = str(user.id)
if user.username:
sender_id = f"{sender_id}|{user.username}"
# Store chat_id for replies # Store chat_id for replies
self._chat_ids[sender_id] = chat_id self._chat_ids[sender_id] = chat_id
@ -247,7 +250,20 @@ class TelegramChannel(BaseChannel):
await file.download_to_drive(str(file_path)) await file.download_to_drive(str(file_path))
media_paths.append(str(file_path)) media_paths.append(str(file_path))
# Handle voice transcription
if media_type == "voice" or media_type == "audio":
from nanobot.providers.transcription import GroqTranscriptionProvider
transcriber = GroqTranscriptionProvider(api_key=self.groq_api_key)
transcription = await transcriber.transcribe(file_path)
if transcription:
logger.info(f"Transcribed {media_type}: {transcription[:50]}...")
content_parts.append(f"[transcription: {transcription}]")
else:
content_parts.append(f"[{media_type}: {file_path}]") content_parts.append(f"[{media_type}: {file_path}]")
else:
content_parts.append(f"[{media_type}: {file_path}]")
logger.debug(f"Downloaded {media_type} to {file_path}") logger.debug(f"Downloaded {media_type} to {file_path}")
except Exception as e: except Exception as e:
logger.error(f"Failed to download media: {e}") logger.error(f"Failed to download media: {e}")

View File

@ -107,6 +107,11 @@ class WhatsAppChannel(BaseChannel):
# Extract just the phone number as chat_id # Extract just the phone number as chat_id
chat_id = sender.split("@")[0] if "@" in sender else sender chat_id = sender.split("@")[0] if "@" in sender else sender
# Handle voice transcription if it's a voice message
if content == "[Voice Message]":
logger.info(f"Voice message received from {chat_id}, but direct download from bridge is not yet supported.")
content = "[Voice Message: Transcription not available for WhatsApp yet]"
await self._handle_message( await self._handle_message(
sender_id=chat_id, sender_id=chat_id,
chat_id=sender, # Use full JID for replies chat_id=sender, # Use full JID for replies

View File

@ -178,11 +178,13 @@ def gateway(
# Create components # Create components
bus = MessageBus() bus = MessageBus()
# Create provider (supports OpenRouter, Anthropic, OpenAI) # Create provider (supports OpenRouter, Anthropic, OpenAI, Bedrock)
api_key = config.get_api_key() api_key = config.get_api_key()
api_base = config.get_api_base() api_base = config.get_api_base()
model = config.agents.defaults.model
is_bedrock = model.startswith("bedrock/")
if not api_key: if not api_key and not is_bedrock:
console.print("[red]Error: No API key configured.[/red]") console.print("[red]Error: No API key configured.[/red]")
console.print("Set one in ~/.nanobot/config.json under providers.openrouter.apiKey") console.print("Set one in ~/.nanobot/config.json under providers.openrouter.apiKey")
raise typer.Exit(1) raise typer.Exit(1)
@ -289,8 +291,10 @@ def agent(
api_key = config.get_api_key() api_key = config.get_api_key()
api_base = config.get_api_base() api_base = config.get_api_base()
model = config.agents.defaults.model
is_bedrock = model.startswith("bedrock/")
if not api_key: if not api_key and not is_bedrock:
console.print("[red]Error: No API key configured.[/red]") console.print("[red]Error: No API key configured.[/red]")
raise typer.Exit(1) raise typer.Exit(1)
@ -354,8 +358,9 @@ def channels_status():
table = Table(title="Channel Status") table = Table(title="Channel Status")
table.add_column("Channel", style="cyan") table.add_column("Channel", style="cyan")
table.add_column("Enabled", style="green") table.add_column("Enabled", style="green")
table.add_column("Bridge URL", style="yellow") table.add_column("Configuration", style="yellow")
# WhatsApp
wa = config.channels.whatsapp wa = config.channels.whatsapp
table.add_row( table.add_row(
"WhatsApp", "WhatsApp",
@ -363,6 +368,15 @@ def channels_status():
wa.bridge_url wa.bridge_url
) )
# Telegram
tg = config.channels.telegram
tg_config = f"token: {tg.token[:10]}..." if tg.token else "[dim]not configured[/dim]"
table.add_row(
"Telegram",
"" if tg.enabled else "",
tg_config
)
console.print(table) console.print(table)
@ -506,6 +520,7 @@ def cron_add(
at: str = typer.Option(None, "--at", help="Run once at time (ISO format)"), at: str = typer.Option(None, "--at", help="Run once at time (ISO format)"),
deliver: bool = typer.Option(False, "--deliver", "-d", help="Deliver response to channel"), deliver: bool = typer.Option(False, "--deliver", "-d", help="Deliver response to channel"),
to: str = typer.Option(None, "--to", help="Recipient for delivery"), to: str = typer.Option(None, "--to", help="Recipient for delivery"),
channel: str = typer.Option(None, "--channel", help="Channel for delivery (e.g. 'telegram', 'whatsapp')"),
): ):
"""Add a scheduled job.""" """Add a scheduled job."""
from nanobot.config.loader import get_data_dir from nanobot.config.loader import get_data_dir
@ -534,6 +549,7 @@ def cron_add(
message=message, message=message,
deliver=deliver, deliver=deliver,
to=to, to=to,
channel=channel,
) )
console.print(f"[green]✓[/green] Added job '{job.name}' ({job.id})") console.print(f"[green]✓[/green] Added job '{job.name}' ({job.id})")
@ -624,11 +640,13 @@ def status():
has_openrouter = bool(config.providers.openrouter.api_key) has_openrouter = bool(config.providers.openrouter.api_key)
has_anthropic = bool(config.providers.anthropic.api_key) has_anthropic = bool(config.providers.anthropic.api_key)
has_openai = bool(config.providers.openai.api_key) has_openai = bool(config.providers.openai.api_key)
has_gemini = bool(config.providers.gemini.api_key)
has_vllm = bool(config.providers.vllm.api_base) has_vllm = bool(config.providers.vllm.api_base)
console.print(f"OpenRouter API: {'[green]✓[/green]' if has_openrouter else '[dim]not set[/dim]'}") console.print(f"OpenRouter API: {'[green]✓[/green]' if has_openrouter else '[dim]not set[/dim]'}")
console.print(f"Anthropic API: {'[green]✓[/green]' if has_anthropic else '[dim]not set[/dim]'}") console.print(f"Anthropic API: {'[green]✓[/green]' if has_anthropic else '[dim]not set[/dim]'}")
console.print(f"OpenAI API: {'[green]✓[/green]' if has_openai else '[dim]not set[/dim]'}") console.print(f"OpenAI API: {'[green]✓[/green]' if has_openai else '[dim]not set[/dim]'}")
console.print(f"Gemini API: {'[green]✓[/green]' if has_gemini else '[dim]not set[/dim]'}")
vllm_status = f"[green]✓ {config.providers.vllm.api_base}[/green]" if has_vllm else "[dim]not set[/dim]" vllm_status = f"[green]✓ {config.providers.vllm.api_base}[/green]" if has_vllm else "[dim]not set[/dim]"
console.print(f"vLLM/Local: {vllm_status}") console.print(f"vLLM/Local: {vllm_status}")

View File

@ -50,7 +50,10 @@ class ProvidersConfig(BaseModel):
anthropic: ProviderConfig = Field(default_factory=ProviderConfig) anthropic: ProviderConfig = Field(default_factory=ProviderConfig)
openai: ProviderConfig = Field(default_factory=ProviderConfig) openai: ProviderConfig = Field(default_factory=ProviderConfig)
openrouter: ProviderConfig = Field(default_factory=ProviderConfig) openrouter: ProviderConfig = Field(default_factory=ProviderConfig)
groq: ProviderConfig = Field(default_factory=ProviderConfig)
zhipu: ProviderConfig = Field(default_factory=ProviderConfig)
vllm: ProviderConfig = Field(default_factory=ProviderConfig) vllm: ProviderConfig = Field(default_factory=ProviderConfig)
gemini: ProviderConfig = Field(default_factory=ProviderConfig)
class GatewayConfig(BaseModel): class GatewayConfig(BaseModel):
@ -89,19 +92,24 @@ class Config(BaseSettings):
return Path(self.agents.defaults.workspace).expanduser() return Path(self.agents.defaults.workspace).expanduser()
def get_api_key(self) -> str | None: def get_api_key(self) -> str | None:
"""Get API key in priority order: OpenRouter > Anthropic > OpenAI > vLLM.""" """Get API key in priority order: OpenRouter > Anthropic > OpenAI > Gemini > Zhipu > Groq > vLLM."""
return ( return (
self.providers.openrouter.api_key or self.providers.openrouter.api_key or
self.providers.anthropic.api_key or self.providers.anthropic.api_key or
self.providers.openai.api_key or self.providers.openai.api_key or
self.providers.gemini.api_key or
self.providers.zhipu.api_key or
self.providers.groq.api_key or
self.providers.vllm.api_key or self.providers.vllm.api_key or
None None
) )
def get_api_base(self) -> str | None: def get_api_base(self) -> str | None:
"""Get API base URL if using OpenRouter or vLLM.""" """Get API base URL if using OpenRouter, Zhipu or vLLM."""
if self.providers.openrouter.api_key: if self.providers.openrouter.api_key:
return self.providers.openrouter.api_base or "https://openrouter.ai/api/v1" return self.providers.openrouter.api_base or "https://openrouter.ai/api/v1"
if self.providers.zhipu.api_key:
return self.providers.zhipu.api_base
if self.providers.vllm.api_base: if self.providers.vllm.api_base:
return self.providers.vllm.api_base return self.providers.vllm.api_base
return None return None

View File

@ -13,7 +13,7 @@ class LiteLLMProvider(LLMProvider):
""" """
LLM provider using LiteLLM for multi-provider support. LLM provider using LiteLLM for multi-provider support.
Supports OpenRouter, Anthropic, OpenAI, and many other providers through Supports OpenRouter, Anthropic, OpenAI, Gemini, and many other providers through
a unified interface. a unified interface.
""" """
@ -47,6 +47,12 @@ class LiteLLMProvider(LLMProvider):
os.environ.setdefault("ANTHROPIC_API_KEY", api_key) os.environ.setdefault("ANTHROPIC_API_KEY", api_key)
elif "openai" in default_model or "gpt" in default_model: elif "openai" in default_model or "gpt" in default_model:
os.environ.setdefault("OPENAI_API_KEY", api_key) os.environ.setdefault("OPENAI_API_KEY", api_key)
elif "gemini" in default_model.lower():
os.environ.setdefault("GEMINI_API_KEY", api_key)
elif "zhipu" in default_model or "glm" in default_model or "zai" in default_model:
os.environ.setdefault("ZHIPUAI_API_KEY", api_key)
elif "groq" in default_model:
os.environ.setdefault("GROQ_API_KEY", api_key)
if api_base: if api_base:
litellm.api_base = api_base litellm.api_base = api_base
@ -81,11 +87,24 @@ class LiteLLMProvider(LLMProvider):
if self.is_openrouter and not model.startswith("openrouter/"): if self.is_openrouter and not model.startswith("openrouter/"):
model = f"openrouter/{model}" model = f"openrouter/{model}"
# For Zhipu/Z.ai, ensure prefix is present
# Handle cases like "glm-4.7-flash" -> "zhipu/glm-4.7-flash"
if ("glm" in model.lower() or "zhipu" in model.lower()) and not (
model.startswith("zhipu/") or
model.startswith("zai/") or
model.startswith("openrouter/")
):
model = f"zhipu/{model}"
# For vLLM, use hosted_vllm/ prefix per LiteLLM docs # For vLLM, use hosted_vllm/ prefix per LiteLLM docs
# Convert openai/ prefix to hosted_vllm/ if user specified it # Convert openai/ prefix to hosted_vllm/ if user specified it
if self.is_vllm: if self.is_vllm:
model = f"hosted_vllm/{model}" model = f"hosted_vllm/{model}"
# For Gemini, ensure gemini/ prefix if not already present
if "gemini" in model.lower() and not model.startswith("gemini/"):
model = f"gemini/{model}"
kwargs: dict[str, Any] = { kwargs: dict[str, Any] = {
"model": model, "model": model,
"messages": messages, "messages": messages,

View File

@ -0,0 +1,65 @@
"""Voice transcription provider using Groq."""
import os
from pathlib import Path
from typing import Any
import httpx
from loguru import logger
class GroqTranscriptionProvider:
"""
Voice transcription provider using Groq's Whisper API.
Groq offers extremely fast transcription with a generous free tier.
"""
def __init__(self, api_key: str | None = None):
self.api_key = api_key or os.environ.get("GROQ_API_KEY")
self.api_url = "https://api.groq.com/openai/v1/audio/transcriptions"
async def transcribe(self, file_path: str | Path) -> str:
"""
Transcribe an audio file using Groq.
Args:
file_path: Path to the audio file.
Returns:
Transcribed text.
"""
if not self.api_key:
logger.warning("Groq API key not configured for transcription")
return ""
path = Path(file_path)
if not path.exists():
logger.error(f"Audio file not found: {file_path}")
return ""
try:
async with httpx.AsyncClient() as client:
with open(path, "rb") as f:
files = {
"file": (path.name, f),
"model": (None, "whisper-large-v3"),
}
headers = {
"Authorization": f"Bearer {self.api_key}",
}
response = await client.post(
self.api_url,
headers=headers,
files=files,
timeout=60.0
)
response.raise_for_status()
data = response.json()
return data.get("text", "")
except Exception as e:
logger.error(f"Groq transcription error: {e}")
return ""

55
test_docker.sh Executable file
View File

@ -0,0 +1,55 @@
#!/usr/bin/env bash
set -euo pipefail
IMAGE_NAME="nanobot-test"
echo "=== Building Docker image ==="
docker build -t "$IMAGE_NAME" .
echo ""
echo "=== Running 'nanobot onboard' ==="
docker run --name nanobot-test-run "$IMAGE_NAME" onboard
echo ""
echo "=== Running 'nanobot status' ==="
STATUS_OUTPUT=$(docker commit nanobot-test-run nanobot-test-onboarded > /dev/null && \
docker run --rm nanobot-test-onboarded status 2>&1) || true
echo "$STATUS_OUTPUT"
echo ""
echo "=== Validating output ==="
PASS=true
check() {
if echo "$STATUS_OUTPUT" | grep -q "$1"; then
echo " PASS: found '$1'"
else
echo " FAIL: missing '$1'"
PASS=false
fi
}
check "nanobot Status"
check "Config:"
check "Workspace:"
check "Model:"
check "OpenRouter API:"
check "Anthropic API:"
check "OpenAI API:"
echo ""
if $PASS; then
echo "=== All checks passed ==="
else
echo "=== Some checks FAILED ==="
exit 1
fi
# Cleanup
echo ""
echo "=== Cleanup ==="
docker rm -f nanobot-test-run 2>/dev/null || true
docker rmi -f nanobot-test-onboarded 2>/dev/null || true
docker rmi -f "$IMAGE_NAME" 2>/dev/null || true
echo "Done."

View File

@ -22,6 +22,16 @@ You have access to:
- Use `memory/` directory for daily notes - Use `memory/` directory for daily notes
- Use `MEMORY.md` for long-term information - Use `MEMORY.md` for long-term information
## Scheduled Reminders
When user asks for a reminder at a specific time, use `exec` to run:
```
nanobot cron add --name "reminder" --message "Your message" --at "YYYY-MM-DDTHH:MM:SS" --deliver --to "USER_ID" --channel "CHANNEL"
```
Get USER_ID and CHANNEL from the current session (e.g., `8281248569` and `telegram` from `telegram:8281248569`).
**Do NOT just write reminders to MEMORY.md** — that won't trigger actual notifications.
## Heartbeat Tasks ## Heartbeat Tasks
`HEARTBEAT.md` is checked every 30 minutes. You can manage periodic tasks by editing this file: `HEARTBEAT.md` is checked every 30 minutes. You can manage periodic tasks by editing this file: