Merge branch 'main' into feat/discord-support

This commit is contained in:
Anunay Aatipamula 2026-02-03 21:15:15 +05:30 committed by GitHub
commit 7d2bebcfa3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 342 additions and 41 deletions

13
.dockerignore Normal file
View File

@ -0,0 +1,13 @@
__pycache__
*.pyc
*.pyo
*.pyd
*.egg-info
dist/
build/
.git
.env
.assets
node_modules/
bridge/dist/
workspace/

40
Dockerfile Normal file
View File

@ -0,0 +1,40 @@
FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim
# Install Node.js 20 for the WhatsApp bridge
RUN apt-get update && \
apt-get install -y --no-install-recommends curl ca-certificates gnupg git && \
mkdir -p /etc/apt/keyrings && \
curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \
echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" > /etc/apt/sources.list.d/nodesource.list && \
apt-get update && \
apt-get install -y --no-install-recommends nodejs && \
apt-get purge -y gnupg && \
apt-get autoremove -y && \
rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Install Python dependencies first (cached layer)
COPY pyproject.toml README.md LICENSE ./
RUN mkdir -p nanobot bridge && touch nanobot/__init__.py && \
uv pip install --system --no-cache . && \
rm -rf nanobot bridge
# Copy the full source and install
COPY nanobot/ nanobot/
COPY bridge/ bridge/
RUN uv pip install --system --no-cache .
# Build the WhatsApp bridge
WORKDIR /app/bridge
RUN npm install && npm run build
WORKDIR /app
# Create config directory
RUN mkdir -p /root/.nanobot
# Gateway default port
EXPOSE 18790
ENTRYPOINT ["nanobot"]
CMD ["status"]

View File

@ -8,6 +8,7 @@
<img src="https://img.shields.io/badge/license-MIT-green" alt="License"> <img src="https://img.shields.io/badge/license-MIT-green" alt="License">
<a href="./COMMUNICATION.md"><img src="https://img.shields.io/badge/Feishu-Group-E9DBFC?style=flat&logo=feishu&logoColor=white" alt="Feishu"></a> <a href="./COMMUNICATION.md"><img src="https://img.shields.io/badge/Feishu-Group-E9DBFC?style=flat&logo=feishu&logoColor=white" alt="Feishu"></a>
<a href="./COMMUNICATION.md"><img src="https://img.shields.io/badge/WeChat-Group-C5EAB4?style=flat&logo=wechat&logoColor=white" alt="WeChat"></a> <a href="./COMMUNICATION.md"><img src="https://img.shields.io/badge/WeChat-Group-C5EAB4?style=flat&logo=wechat&logoColor=white" alt="WeChat"></a>
<a href="https://discord.gg/MnCvHqpUGB"><img src="https://img.shields.io/badge/Discord-Community-5865F2?style=flat&logo=discord&logoColor=white" alt="Discord"></a>
</p> </p>
</div> </div>
@ -17,7 +18,7 @@
## 📢 News ## 📢 News
- **2025-02-01** 🎉 nanobot launched! Welcome to try 🐈 nanobot! - **2026-02-01** 🎉 nanobot launched! Welcome to try 🐈 nanobot!
## Key Features of nanobot: ## Key Features of nanobot:
@ -60,13 +61,7 @@
## 📦 Install ## 📦 Install
**Install from PyPi** **Install from source** (latest features, recommended for development)
```bash
pip install nanobot-ai
```
**Install from source** (recommended for development)
```bash ```bash
git clone https://github.com/HKUDS/nanobot.git git clone https://github.com/HKUDS/nanobot.git
@ -74,6 +69,18 @@ cd nanobot
pip install -e . pip install -e .
``` ```
**Install with [uv](https://github.com/astral-sh/uv)** (stable, fast)
```bash
uv tool install nanobot-ai
```
**Install from PyPI** (stable)
```bash
pip install nanobot-ai
```
## 🚀 Quick Start ## 🚀 Quick Start
> [!TIP] > [!TIP]
@ -279,6 +286,22 @@ nanobot gateway
## ⚙️ Configuration ## ⚙️ Configuration
Config file: `~/.nanobot/config.json`
### Providers
> [!NOTE]
> Groq provides free voice transcription via Whisper. If configured, Telegram voice messages will be automatically transcribed.
| Provider | Purpose | Get API Key |
|----------|---------|-------------|
| `openrouter` | LLM (recommended, access to all models) | [openrouter.ai](https://openrouter.ai) |
| `anthropic` | LLM (Claude direct) | [console.anthropic.com](https://console.anthropic.com) |
| `openai` | LLM (GPT direct) | [platform.openai.com](https://platform.openai.com) |
| `groq` | LLM + **Voice transcription** (Whisper) | [console.groq.com](https://console.groq.com) |
| `gemini` | LLM (Gemini direct) | [aistudio.google.com](https://aistudio.google.com) |
<details> <details>
<summary><b>Full config example</b></summary> <summary><b>Full config example</b></summary>
@ -292,6 +315,9 @@ nanobot gateway
"providers": { "providers": {
"openrouter": { "openrouter": {
"apiKey": "sk-or-v1-xxx" "apiKey": "sk-or-v1-xxx"
},
"groq": {
"apiKey": "gsk_xxx"
} }
}, },
"channels": { "channels": {
@ -350,6 +376,31 @@ nanobot cron remove <job_id>
</details> </details>
## 🐳 Docker
> [!TIP]
> The `-v ~/.nanobot:/root/.nanobot` flag mounts your local config directory into the container, so your config and workspace persist across container restarts.
Build and run nanobot in a container:
```bash
# Build the image
docker build -t nanobot .
# Initialize config (first time only)
docker run -v ~/.nanobot:/root/.nanobot --rm nanobot onboard
# Edit config on host to add API keys
vim ~/.nanobot/config.json
# Run gateway (connects to Telegram/WhatsApp)
docker run -v ~/.nanobot:/root/.nanobot -p 18790:18790 nanobot gateway
# Or run a single command
docker run -v ~/.nanobot:/root/.nanobot --rm nanobot agent -m "Hello!"
docker run -v ~/.nanobot:/root/.nanobot --rm nanobot status
```
## 📁 Project Structure ## 📁 Project Structure
``` ```
@ -378,6 +429,7 @@ PRs welcome! The codebase is intentionally small and readable. 🤗
**Roadmap** — Pick an item and [open a PR](https://github.com/HKUDS/nanobot/pulls)! **Roadmap** — Pick an item and [open a PR](https://github.com/HKUDS/nanobot/pulls)!
- [x] **Voice Transcription** — Support for Groq Whisper (Issue #13)
- [ ] **Multi-modal** — See and hear (images, voice, video) - [ ] **Multi-modal** — See and hear (images, voice, video)
- [ ] **Long-term memory** — Never forget important context - [ ] **Long-term memory** — Never forget important context
- [ ] **Better reasoning** — Multi-step planning and reflection - [ ] **Better reasoning** — Multi-step planning and reflection
@ -390,7 +442,6 @@ PRs welcome! The codebase is intentionally small and readable. 🤗
<img src="https://contrib.rocks/image?repo=HKUDS/nanobot" /> <img src="https://contrib.rocks/image?repo=HKUDS/nanobot" />
</a> </a>
---
## ⭐ Star History ## ⭐ Star History
@ -408,3 +459,8 @@ PRs welcome! The codebase is intentionally small and readable. 🤗
<em> Thanks for visiting ✨ nanobot!</em><br><br> <em> Thanks for visiting ✨ nanobot!</em><br><br>
<img src="https://visitor-badge.laobi.icu/badge?page_id=HKUDS.nanobot&style=for-the-badge&color=00d4ff" alt="Views"> <img src="https://visitor-badge.laobi.icu/badge?page_id=HKUDS.nanobot&style=for-the-badge&color=00d4ff" alt="Views">
</p> </p>
<p align="center">
<sub>nanobot is for educational, research, and technical exchange purposes only</sub>
</p>

View File

@ -160,6 +160,11 @@ export class WhatsAppClient {
return `[Document] ${message.documentMessage.caption}`; return `[Document] ${message.documentMessage.caption}`;
} }
// Voice/Audio message
if (message.audioMessage) {
return `[Voice Message]`;
}
return null; return null;
} }

View File

@ -1,5 +1,7 @@
"""Context builder for assembling agent prompts.""" """Context builder for assembling agent prompts."""
import base64
import mimetypes
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
@ -114,7 +116,8 @@ When remembering something, write to {workspace_path}/memory/MEMORY.md"""
self, self,
history: list[dict[str, Any]], history: list[dict[str, Any]],
current_message: str, current_message: str,
skill_names: list[str] | None = None skill_names: list[str] | None = None,
media: list[str] | None = None,
) -> list[dict[str, Any]]: ) -> list[dict[str, Any]]:
""" """
Build the complete message list for an LLM call. Build the complete message list for an LLM call.
@ -123,6 +126,7 @@ When remembering something, write to {workspace_path}/memory/MEMORY.md"""
history: Previous conversation messages. history: Previous conversation messages.
current_message: The new user message. current_message: The new user message.
skill_names: Optional skills to include. skill_names: Optional skills to include.
media: Optional list of local file paths for images/media.
Returns: Returns:
List of messages including system prompt. List of messages including system prompt.
@ -136,11 +140,30 @@ When remembering something, write to {workspace_path}/memory/MEMORY.md"""
# History # History
messages.extend(history) messages.extend(history)
# Current message # Current message (with optional image attachments)
messages.append({"role": "user", "content": current_message}) user_content = self._build_user_content(current_message, media)
messages.append({"role": "user", "content": user_content})
return messages return messages
def _build_user_content(self, text: str, media: list[str] | None) -> str | list[dict[str, Any]]:
"""Build user message content with optional base64-encoded images."""
if not media:
return text
images = []
for path in media:
p = Path(path)
mime, _ = mimetypes.guess_type(path)
if not p.is_file() or not mime or not mime.startswith("image/"):
continue
b64 = base64.b64encode(p.read_bytes()).decode()
images.append({"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}})
if not images:
return text
return images + [{"type": "text", "text": text}]
def add_tool_result( def add_tool_result(
self, self,
messages: list[dict[str, Any]], messages: list[dict[str, Any]],

View File

@ -152,7 +152,8 @@ class AgentLoop:
# Build initial messages (use get_history for LLM-formatted messages) # Build initial messages (use get_history for LLM-formatted messages)
messages = self.context.build_messages( messages = self.context.build_messages(
history=session.get_history(), history=session.get_history(),
current_message=msg.content current_message=msg.content,
media=msg.media if msg.media else None,
) )
# Agent loop # Agent loop

View File

@ -37,7 +37,9 @@ class ChannelManager:
try: try:
from nanobot.channels.telegram import TelegramChannel from nanobot.channels.telegram import TelegramChannel
self.channels["telegram"] = TelegramChannel( self.channels["telegram"] = TelegramChannel(
self.config.channels.telegram, self.bus self.config.channels.telegram,
self.bus,
groq_api_key=self.config.providers.groq.api_key,
) )
logger.info("Telegram channel enabled") logger.info("Telegram channel enabled")
except ImportError as e: except ImportError as e:

View File

@ -85,9 +85,10 @@ class TelegramChannel(BaseChannel):
name = "telegram" name = "telegram"
def __init__(self, config: TelegramConfig, bus: MessageBus): def __init__(self, config: TelegramConfig, bus: MessageBus, groq_api_key: str = ""):
super().__init__(config, bus) super().__init__(config, bus)
self.config: TelegramConfig = config self.config: TelegramConfig = config
self.groq_api_key = groq_api_key
self._app: Application | None = None self._app: Application | None = None
self._chat_ids: dict[str, int] = {} # Map sender_id to chat_id for replies self._chat_ids: dict[str, int] = {} # Map sender_id to chat_id for replies
@ -249,7 +250,20 @@ class TelegramChannel(BaseChannel):
await file.download_to_drive(str(file_path)) await file.download_to_drive(str(file_path))
media_paths.append(str(file_path)) media_paths.append(str(file_path))
content_parts.append(f"[{media_type}: {file_path}]")
# Handle voice transcription
if media_type == "voice" or media_type == "audio":
from nanobot.providers.transcription import GroqTranscriptionProvider
transcriber = GroqTranscriptionProvider(api_key=self.groq_api_key)
transcription = await transcriber.transcribe(file_path)
if transcription:
logger.info(f"Transcribed {media_type}: {transcription[:50]}...")
content_parts.append(f"[transcription: {transcription}]")
else:
content_parts.append(f"[{media_type}: {file_path}]")
else:
content_parts.append(f"[{media_type}: {file_path}]")
logger.debug(f"Downloaded {media_type} to {file_path}") logger.debug(f"Downloaded {media_type} to {file_path}")
except Exception as e: except Exception as e:
logger.error(f"Failed to download media: {e}") logger.error(f"Failed to download media: {e}")

View File

@ -107,6 +107,11 @@ class WhatsAppChannel(BaseChannel):
# Extract just the phone number as chat_id # Extract just the phone number as chat_id
chat_id = sender.split("@")[0] if "@" in sender else sender chat_id = sender.split("@")[0] if "@" in sender else sender
# Handle voice transcription if it's a voice message
if content == "[Voice Message]":
logger.info(f"Voice message received from {chat_id}, but direct download from bridge is not yet supported.")
content = "[Voice Message: Transcription not available for WhatsApp yet]"
await self._handle_message( await self._handle_message(
sender_id=chat_id, sender_id=chat_id,
chat_id=sender, # Use full JID for replies chat_id=sender, # Use full JID for replies

View File

@ -178,11 +178,13 @@ def gateway(
# Create components # Create components
bus = MessageBus() bus = MessageBus()
# Create provider (supports OpenRouter, Anthropic, OpenAI) # Create provider (supports OpenRouter, Anthropic, OpenAI, Bedrock)
api_key = config.get_api_key() api_key = config.get_api_key()
api_base = config.get_api_base() api_base = config.get_api_base()
model = config.agents.defaults.model
is_bedrock = model.startswith("bedrock/")
if not api_key: if not api_key and not is_bedrock:
console.print("[red]Error: No API key configured.[/red]") console.print("[red]Error: No API key configured.[/red]")
console.print("Set one in ~/.nanobot/config.json under providers.openrouter.apiKey") console.print("Set one in ~/.nanobot/config.json under providers.openrouter.apiKey")
raise typer.Exit(1) raise typer.Exit(1)
@ -289,8 +291,10 @@ def agent(
api_key = config.get_api_key() api_key = config.get_api_key()
api_base = config.get_api_base() api_base = config.get_api_base()
model = config.agents.defaults.model
is_bedrock = model.startswith("bedrock/")
if not api_key: if not api_key and not is_bedrock:
console.print("[red]Error: No API key configured.[/red]") console.print("[red]Error: No API key configured.[/red]")
raise typer.Exit(1) raise typer.Exit(1)
@ -354,8 +358,9 @@ def channels_status():
table = Table(title="Channel Status") table = Table(title="Channel Status")
table.add_column("Channel", style="cyan") table.add_column("Channel", style="cyan")
table.add_column("Enabled", style="green") table.add_column("Enabled", style="green")
table.add_column("Bridge URL", style="yellow") table.add_column("Configuration", style="yellow")
# WhatsApp
wa = config.channels.whatsapp wa = config.channels.whatsapp
table.add_row( table.add_row(
"WhatsApp", "WhatsApp",
@ -363,13 +368,6 @@ def channels_status():
wa.bridge_url wa.bridge_url
) )
tg = config.channels.telegram
table.add_row(
"Telegram",
"" if tg.enabled else "",
"polling"
)
dc = config.channels.discord dc = config.channels.discord
table.add_row( table.add_row(
"Discord", "Discord",
@ -377,6 +375,15 @@ def channels_status():
dc.gateway_url dc.gateway_url
) )
# Telegram
tg = config.channels.telegram
tg_config = f"token: {tg.token[:10]}..." if tg.token else "[dim]not configured[/dim]"
table.add_row(
"Telegram",
"" if tg.enabled else "",
tg_config
)
console.print(table) console.print(table)
@ -520,6 +527,7 @@ def cron_add(
at: str = typer.Option(None, "--at", help="Run once at time (ISO format)"), at: str = typer.Option(None, "--at", help="Run once at time (ISO format)"),
deliver: bool = typer.Option(False, "--deliver", "-d", help="Deliver response to channel"), deliver: bool = typer.Option(False, "--deliver", "-d", help="Deliver response to channel"),
to: str = typer.Option(None, "--to", help="Recipient for delivery"), to: str = typer.Option(None, "--to", help="Recipient for delivery"),
channel: str = typer.Option(None, "--channel", help="Channel for delivery (e.g. 'telegram', 'whatsapp')"),
): ):
"""Add a scheduled job.""" """Add a scheduled job."""
from nanobot.config.loader import get_data_dir from nanobot.config.loader import get_data_dir
@ -548,6 +556,7 @@ def cron_add(
message=message, message=message,
deliver=deliver, deliver=deliver,
to=to, to=to,
channel=channel,
) )
console.print(f"[green]✓[/green] Added job '{job.name}' ({job.id})") console.print(f"[green]✓[/green] Added job '{job.name}' ({job.id})")

View File

@ -60,6 +60,7 @@ class ProvidersConfig(BaseModel):
anthropic: ProviderConfig = Field(default_factory=ProviderConfig) anthropic: ProviderConfig = Field(default_factory=ProviderConfig)
openai: ProviderConfig = Field(default_factory=ProviderConfig) openai: ProviderConfig = Field(default_factory=ProviderConfig)
openrouter: ProviderConfig = Field(default_factory=ProviderConfig) openrouter: ProviderConfig = Field(default_factory=ProviderConfig)
groq: ProviderConfig = Field(default_factory=ProviderConfig)
zhipu: ProviderConfig = Field(default_factory=ProviderConfig) zhipu: ProviderConfig = Field(default_factory=ProviderConfig)
vllm: ProviderConfig = Field(default_factory=ProviderConfig) vllm: ProviderConfig = Field(default_factory=ProviderConfig)
gemini: ProviderConfig = Field(default_factory=ProviderConfig) gemini: ProviderConfig = Field(default_factory=ProviderConfig)
@ -101,14 +102,14 @@ class Config(BaseSettings):
return Path(self.agents.defaults.workspace).expanduser() return Path(self.agents.defaults.workspace).expanduser()
def get_api_key(self) -> str | None: def get_api_key(self) -> str | None:
"""Get API key in priority order: OpenRouter > Anthropic > OpenAI > Gemini > vLLM.""" """Get API key in priority order: OpenRouter > Anthropic > OpenAI > Gemini > Zhipu > Groq > vLLM."""
"""Get API key in priority order: OpenRouter > Anthropic > OpenAI > Gemini > Zhipu > vLLM."""
return ( return (
self.providers.openrouter.api_key or self.providers.openrouter.api_key or
self.providers.anthropic.api_key or self.providers.anthropic.api_key or
self.providers.openai.api_key or self.providers.openai.api_key or
self.providers.gemini.api_key or self.providers.gemini.api_key or
self.providers.zhipu.api_key or self.providers.zhipu.api_key or
self.providers.groq.api_key or
self.providers.vllm.api_key or self.providers.vllm.api_key or
None None
) )

View File

@ -51,6 +51,8 @@ class LiteLLMProvider(LLMProvider):
os.environ.setdefault("GEMINI_API_KEY", api_key) os.environ.setdefault("GEMINI_API_KEY", api_key)
elif "zhipu" in default_model or "glm" in default_model or "zai" in default_model: elif "zhipu" in default_model or "glm" in default_model or "zai" in default_model:
os.environ.setdefault("ZHIPUAI_API_KEY", api_key) os.environ.setdefault("ZHIPUAI_API_KEY", api_key)
elif "groq" in default_model:
os.environ.setdefault("GROQ_API_KEY", api_key)
if api_base: if api_base:
litellm.api_base = api_base litellm.api_base = api_base

View File

@ -0,0 +1,65 @@
"""Voice transcription provider using Groq."""
import os
from pathlib import Path
from typing import Any
import httpx
from loguru import logger
class GroqTranscriptionProvider:
"""
Voice transcription provider using Groq's Whisper API.
Groq offers extremely fast transcription with a generous free tier.
"""
def __init__(self, api_key: str | None = None):
self.api_key = api_key or os.environ.get("GROQ_API_KEY")
self.api_url = "https://api.groq.com/openai/v1/audio/transcriptions"
async def transcribe(self, file_path: str | Path) -> str:
"""
Transcribe an audio file using Groq.
Args:
file_path: Path to the audio file.
Returns:
Transcribed text.
"""
if not self.api_key:
logger.warning("Groq API key not configured for transcription")
return ""
path = Path(file_path)
if not path.exists():
logger.error(f"Audio file not found: {file_path}")
return ""
try:
async with httpx.AsyncClient() as client:
with open(path, "rb") as f:
files = {
"file": (path.name, f),
"model": (None, "whisper-large-v3"),
}
headers = {
"Authorization": f"Bearer {self.api_key}",
}
response = await client.post(
self.api_url,
headers=headers,
files=files,
timeout=60.0
)
response.raise_for_status()
data = response.json()
return data.get("text", "")
except Exception as e:
logger.error(f"Groq transcription error: {e}")
return ""

55
test_docker.sh Executable file
View File

@ -0,0 +1,55 @@
#!/usr/bin/env bash
set -euo pipefail
IMAGE_NAME="nanobot-test"
echo "=== Building Docker image ==="
docker build -t "$IMAGE_NAME" .
echo ""
echo "=== Running 'nanobot onboard' ==="
docker run --name nanobot-test-run "$IMAGE_NAME" onboard
echo ""
echo "=== Running 'nanobot status' ==="
STATUS_OUTPUT=$(docker commit nanobot-test-run nanobot-test-onboarded > /dev/null && \
docker run --rm nanobot-test-onboarded status 2>&1) || true
echo "$STATUS_OUTPUT"
echo ""
echo "=== Validating output ==="
PASS=true
check() {
if echo "$STATUS_OUTPUT" | grep -q "$1"; then
echo " PASS: found '$1'"
else
echo " FAIL: missing '$1'"
PASS=false
fi
}
check "nanobot Status"
check "Config:"
check "Workspace:"
check "Model:"
check "OpenRouter API:"
check "Anthropic API:"
check "OpenAI API:"
echo ""
if $PASS; then
echo "=== All checks passed ==="
else
echo "=== Some checks FAILED ==="
exit 1
fi
# Cleanup
echo ""
echo "=== Cleanup ==="
docker rm -f nanobot-test-run 2>/dev/null || true
docker rmi -f nanobot-test-onboarded 2>/dev/null || true
docker rmi -f "$IMAGE_NAME" 2>/dev/null || true
echo "Done."

View File

@ -22,6 +22,16 @@ You have access to:
- Use `memory/` directory for daily notes - Use `memory/` directory for daily notes
- Use `MEMORY.md` for long-term information - Use `MEMORY.md` for long-term information
## Scheduled Reminders
When user asks for a reminder at a specific time, use `exec` to run:
```
nanobot cron add --name "reminder" --message "Your message" --at "YYYY-MM-DDTHH:MM:SS" --deliver --to "USER_ID" --channel "CHANNEL"
```
Get USER_ID and CHANNEL from the current session (e.g., `8281248569` and `telegram` from `telegram:8281248569`).
**Do NOT just write reminders to MEMORY.md** — that won't trigger actual notifications.
## Heartbeat Tasks ## Heartbeat Tasks
`HEARTBEAT.md` is checked every 30 minutes. You can manage periodic tasks by editing this file: `HEARTBEAT.md` is checked every 30 minutes. You can manage periodic tasks by editing this file: