diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..020b9ec
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,13 @@
+__pycache__
+*.pyc
+*.pyo
+*.pyd
+*.egg-info
+dist/
+build/
+.git
+.env
+.assets
+node_modules/
+bridge/dist/
+workspace/
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..8132747
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,40 @@
+FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim
+
+# Install Node.js 20 for the WhatsApp bridge
+RUN apt-get update && \
+ apt-get install -y --no-install-recommends curl ca-certificates gnupg git && \
+ mkdir -p /etc/apt/keyrings && \
+ curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \
+ echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" > /etc/apt/sources.list.d/nodesource.list && \
+ apt-get update && \
+ apt-get install -y --no-install-recommends nodejs && \
+ apt-get purge -y gnupg && \
+ apt-get autoremove -y && \
+ rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+# Install Python dependencies first (cached layer)
+COPY pyproject.toml README.md LICENSE ./
+RUN mkdir -p nanobot bridge && touch nanobot/__init__.py && \
+ uv pip install --system --no-cache . && \
+ rm -rf nanobot bridge
+
+# Copy the full source and install
+COPY nanobot/ nanobot/
+COPY bridge/ bridge/
+RUN uv pip install --system --no-cache .
+
+# Build the WhatsApp bridge
+WORKDIR /app/bridge
+RUN npm install && npm run build
+WORKDIR /app
+
+# Create config directory
+RUN mkdir -p /root/.nanobot
+
+# Gateway default port
+EXPOSE 18790
+
+ENTRYPOINT ["nanobot"]
+CMD ["status"]
diff --git a/README.md b/README.md
index 358d23e..f4b1df2 100644
--- a/README.md
+++ b/README.md
@@ -8,6 +8,7 @@
+
+ nanobot is for educational, research, and technical exchange purposes only +
diff --git a/bridge/src/whatsapp.ts b/bridge/src/whatsapp.ts index 4185632..a3a82fc 100644 --- a/bridge/src/whatsapp.ts +++ b/bridge/src/whatsapp.ts @@ -160,6 +160,11 @@ export class WhatsAppClient { return `[Document] ${message.documentMessage.caption}`; } + // Voice/Audio message + if (message.audioMessage) { + return `[Voice Message]`; + } + return null; } diff --git a/nanobot/agent/tools/web.py b/nanobot/agent/tools/web.py index c9d989c..9de1d3c 100644 --- a/nanobot/agent/tools/web.py +++ b/nanobot/agent/tools/web.py @@ -5,6 +5,7 @@ import json import os import re from typing import Any +from urllib.parse import urlparse import httpx @@ -12,6 +13,7 @@ from nanobot.agent.tools.base import Tool # Shared constants USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_7_2) AppleWebKit/537.36" +MAX_REDIRECTS = 5 # Limit redirects to prevent DoS attacks def _strip_tags(text: str) -> str: @@ -28,6 +30,19 @@ def _normalize(text: str) -> str: return re.sub(r'\n{3,}', '\n\n', text).strip() +def _validate_url(url: str) -> tuple[bool, str]: + """Validate URL: must be http(s) with valid domain.""" + try: + p = urlparse(url) + if p.scheme not in ('http', 'https'): + return False, f"Only http/https allowed, got '{p.scheme or 'none'}'" + if not p.netloc: + return False, "Missing domain" + return True, "" + except Exception as e: + return False, str(e) + + class WebSearchTool(Tool): """Search the web using Brave Search API.""" @@ -95,12 +110,21 @@ class WebFetchTool(Tool): async def execute(self, url: str, extractMode: str = "markdown", maxChars: int | None = None, **kwargs: Any) -> str: from readability import Document - + max_chars = maxChars or self.max_chars - + + # Validate URL before fetching + is_valid, error_msg = _validate_url(url) + if not is_valid: + return json.dumps({"error": f"URL validation failed: {error_msg}", "url": url}) + try: - async with httpx.AsyncClient() as client: - r = await client.get(url, headers={"User-Agent": USER_AGENT}, follow_redirects=True, timeout=30.0) + async with httpx.AsyncClient( + follow_redirects=True, + max_redirects=MAX_REDIRECTS, + timeout=30.0 + ) as client: + r = await client.get(url, headers={"User-Agent": USER_AGENT}) r.raise_for_status() ctype = r.headers.get("content-type", "") diff --git a/nanobot/channels/manager.py b/nanobot/channels/manager.py index 04abf5f..73c3334 100644 --- a/nanobot/channels/manager.py +++ b/nanobot/channels/manager.py @@ -37,7 +37,9 @@ class ChannelManager: try: from nanobot.channels.telegram import TelegramChannel self.channels["telegram"] = TelegramChannel( - self.config.channels.telegram, self.bus + self.config.channels.telegram, + self.bus, + groq_api_key=self.config.providers.groq.api_key, ) logger.info("Telegram channel enabled") except ImportError as e: diff --git a/nanobot/channels/telegram.py b/nanobot/channels/telegram.py index ac2dba4..23e1de0 100644 --- a/nanobot/channels/telegram.py +++ b/nanobot/channels/telegram.py @@ -85,9 +85,10 @@ class TelegramChannel(BaseChannel): name = "telegram" - def __init__(self, config: TelegramConfig, bus: MessageBus): + def __init__(self, config: TelegramConfig, bus: MessageBus, groq_api_key: str = ""): super().__init__(config, bus) self.config: TelegramConfig = config + self.groq_api_key = groq_api_key self._app: Application | None = None self._chat_ids: dict[str, int] = {} # Map sender_id to chat_id for replies @@ -249,7 +250,20 @@ class TelegramChannel(BaseChannel): await file.download_to_drive(str(file_path)) media_paths.append(str(file_path)) - content_parts.append(f"[{media_type}: {file_path}]") + + # Handle voice transcription + if media_type == "voice" or media_type == "audio": + from nanobot.providers.transcription import GroqTranscriptionProvider + transcriber = GroqTranscriptionProvider(api_key=self.groq_api_key) + transcription = await transcriber.transcribe(file_path) + if transcription: + logger.info(f"Transcribed {media_type}: {transcription[:50]}...") + content_parts.append(f"[transcription: {transcription}]") + else: + content_parts.append(f"[{media_type}: {file_path}]") + else: + content_parts.append(f"[{media_type}: {file_path}]") + logger.debug(f"Downloaded {media_type} to {file_path}") except Exception as e: logger.error(f"Failed to download media: {e}") diff --git a/nanobot/channels/whatsapp.py b/nanobot/channels/whatsapp.py index efbd3e1..c14a6c3 100644 --- a/nanobot/channels/whatsapp.py +++ b/nanobot/channels/whatsapp.py @@ -107,6 +107,11 @@ class WhatsAppChannel(BaseChannel): # Extract just the phone number as chat_id chat_id = sender.split("@")[0] if "@" in sender else sender + # Handle voice transcription if it's a voice message + if content == "[Voice Message]": + logger.info(f"Voice message received from {chat_id}, but direct download from bridge is not yet supported.") + content = "[Voice Message: Transcription not available for WhatsApp yet]" + await self._handle_message( sender_id=chat_id, chat_id=sender, # Use full JID for replies diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index 6caa0a7..5ecc31b 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -178,11 +178,13 @@ def gateway( # Create components bus = MessageBus() - # Create provider (supports OpenRouter, Anthropic, OpenAI) + # Create provider (supports OpenRouter, Anthropic, OpenAI, Bedrock) api_key = config.get_api_key() api_base = config.get_api_base() - - if not api_key: + model = config.agents.defaults.model + is_bedrock = model.startswith("bedrock/") + + if not api_key and not is_bedrock: console.print("[red]Error: No API key configured.[/red]") console.print("Set one in ~/.nanobot/config.json under providers.openrouter.apiKey") raise typer.Exit(1) @@ -289,11 +291,13 @@ def agent( api_key = config.get_api_key() api_base = config.get_api_base() - - if not api_key: + model = config.agents.defaults.model + is_bedrock = model.startswith("bedrock/") + + if not api_key and not is_bedrock: console.print("[red]Error: No API key configured.[/red]") raise typer.Exit(1) - + bus = MessageBus() provider = LiteLLMProvider( api_key=api_key, @@ -348,21 +352,31 @@ app.add_typer(channels_app, name="channels") def channels_status(): """Show channel status.""" from nanobot.config.loader import load_config - + config = load_config() - + table = Table(title="Channel Status") table.add_column("Channel", style="cyan") table.add_column("Enabled", style="green") - table.add_column("Bridge URL", style="yellow") - + table.add_column("Configuration", style="yellow") + + # WhatsApp wa = config.channels.whatsapp table.add_row( "WhatsApp", "✓" if wa.enabled else "✗", wa.bridge_url ) - + + # Telegram + tg = config.channels.telegram + tg_config = f"token: {tg.token[:10]}..." if tg.token else "[dim]not configured[/dim]" + table.add_row( + "Telegram", + "✓" if tg.enabled else "✗", + tg_config + ) + console.print(table) @@ -608,24 +622,17 @@ def cron_run( def status(): """Show nanobot status.""" from nanobot.config.loader import load_config, get_config_path - from nanobot.utils.helpers import get_workspace_path config_path = get_config_path() - - # Load config first to get the correct workspace path - if config_path.exists(): - config = load_config() - workspace = config.workspace_path - else: - config = None - workspace = get_workspace_path() + config = load_config() + workspace = config.workspace_path console.print(f"{__logo__} nanobot Status\n") console.print(f"Config: {config_path} {'[green]✓[/green]' if config_path.exists() else '[red]✗[/red]'}") console.print(f"Workspace: {workspace} {'[green]✓[/green]' if workspace.exists() else '[red]✗[/red]'}") - if config is not None: + if config_path.exists(): console.print(f"Model: {config.agents.defaults.model}") # Check API keys diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index c2109a1..71e3361 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -50,6 +50,7 @@ class ProvidersConfig(BaseModel): anthropic: ProviderConfig = Field(default_factory=ProviderConfig) openai: ProviderConfig = Field(default_factory=ProviderConfig) openrouter: ProviderConfig = Field(default_factory=ProviderConfig) + groq: ProviderConfig = Field(default_factory=ProviderConfig) zhipu: ProviderConfig = Field(default_factory=ProviderConfig) vllm: ProviderConfig = Field(default_factory=ProviderConfig) gemini: ProviderConfig = Field(default_factory=ProviderConfig) @@ -91,13 +92,14 @@ class Config(BaseSettings): return Path(self.agents.defaults.workspace).expanduser() def get_api_key(self) -> str | None: - """Get API key in priority order: OpenRouter > Anthropic > OpenAI > Gemini > Zhipu > vLLM.""" + """Get API key in priority order: OpenRouter > Anthropic > OpenAI > Gemini > Zhipu > Groq > vLLM.""" return ( self.providers.openrouter.api_key or self.providers.anthropic.api_key or self.providers.openai.api_key or self.providers.gemini.api_key or self.providers.zhipu.api_key or + self.providers.groq.api_key or self.providers.vllm.api_key or None ) diff --git a/nanobot/heartbeat/service.py b/nanobot/heartbeat/service.py index 4cb469e..221ed27 100644 --- a/nanobot/heartbeat/service.py +++ b/nanobot/heartbeat/service.py @@ -115,7 +115,7 @@ class HeartbeatService: response = await self.on_heartbeat(HEARTBEAT_PROMPT) # Check if agent said "nothing to do" - if HEARTBEAT_OK_TOKEN in response.upper().replace("_", ""): + if HEARTBEAT_OK_TOKEN.replace("_", "") in response.upper().replace("_", ""): logger.info("Heartbeat: OK (no action needed)") else: logger.info(f"Heartbeat: completed task") diff --git a/nanobot/providers/litellm_provider.py b/nanobot/providers/litellm_provider.py index 42b4bf5..547626d 100644 --- a/nanobot/providers/litellm_provider.py +++ b/nanobot/providers/litellm_provider.py @@ -51,6 +51,8 @@ class LiteLLMProvider(LLMProvider): os.environ.setdefault("GEMINI_API_KEY", api_key) elif "zhipu" in default_model or "glm" in default_model or "zai" in default_model: os.environ.setdefault("ZHIPUAI_API_KEY", api_key) + elif "groq" in default_model: + os.environ.setdefault("GROQ_API_KEY", api_key) if api_base: litellm.api_base = api_base diff --git a/nanobot/providers/transcription.py b/nanobot/providers/transcription.py new file mode 100644 index 0000000..8ce909b --- /dev/null +++ b/nanobot/providers/transcription.py @@ -0,0 +1,65 @@ +"""Voice transcription provider using Groq.""" + +import os +from pathlib import Path +from typing import Any + +import httpx +from loguru import logger + + +class GroqTranscriptionProvider: + """ + Voice transcription provider using Groq's Whisper API. + + Groq offers extremely fast transcription with a generous free tier. + """ + + def __init__(self, api_key: str | None = None): + self.api_key = api_key or os.environ.get("GROQ_API_KEY") + self.api_url = "https://api.groq.com/openai/v1/audio/transcriptions" + + async def transcribe(self, file_path: str | Path) -> str: + """ + Transcribe an audio file using Groq. + + Args: + file_path: Path to the audio file. + + Returns: + Transcribed text. + """ + if not self.api_key: + logger.warning("Groq API key not configured for transcription") + return "" + + path = Path(file_path) + if not path.exists(): + logger.error(f"Audio file not found: {file_path}") + return "" + + try: + async with httpx.AsyncClient() as client: + with open(path, "rb") as f: + files = { + "file": (path.name, f), + "model": (None, "whisper-large-v3"), + } + headers = { + "Authorization": f"Bearer {self.api_key}", + } + + response = await client.post( + self.api_url, + headers=headers, + files=files, + timeout=60.0 + ) + + response.raise_for_status() + data = response.json() + return data.get("text", "") + + except Exception as e: + logger.error(f"Groq transcription error: {e}") + return "" diff --git a/test_docker.sh b/test_docker.sh new file mode 100755 index 0000000..a90e080 --- /dev/null +++ b/test_docker.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +set -euo pipefail + +IMAGE_NAME="nanobot-test" + +echo "=== Building Docker image ===" +docker build -t "$IMAGE_NAME" . + +echo "" +echo "=== Running 'nanobot onboard' ===" +docker run --name nanobot-test-run "$IMAGE_NAME" onboard + +echo "" +echo "=== Running 'nanobot status' ===" +STATUS_OUTPUT=$(docker commit nanobot-test-run nanobot-test-onboarded > /dev/null && \ + docker run --rm nanobot-test-onboarded status 2>&1) || true + +echo "$STATUS_OUTPUT" + +echo "" +echo "=== Validating output ===" +PASS=true + +check() { + if echo "$STATUS_OUTPUT" | grep -q "$1"; then + echo " PASS: found '$1'" + else + echo " FAIL: missing '$1'" + PASS=false + fi +} + +check "nanobot Status" +check "Config:" +check "Workspace:" +check "Model:" +check "OpenRouter API:" +check "Anthropic API:" +check "OpenAI API:" + +echo "" +if $PASS; then + echo "=== All checks passed ===" +else + echo "=== Some checks FAILED ===" + exit 1 +fi + +# Cleanup +echo "" +echo "=== Cleanup ===" +docker rm -f nanobot-test-run 2>/dev/null || true +docker rmi -f nanobot-test-onboarded 2>/dev/null || true +docker rmi -f "$IMAGE_NAME" 2>/dev/null || true +echo "Done."