fix: handle Telegram message length limit with smart splitting
This commit is contained in:
parent
48a14edbda
commit
db0e8aa61b
2
.gitignore
vendored
2
.gitignore
vendored
@ -19,4 +19,4 @@ __pycache__/
|
||||
poetry.lock
|
||||
.pytest_cache/
|
||||
botpy.log
|
||||
|
||||
tests/
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
|
||||
⚡️ Delivers core agent functionality in just **~4,000** lines of code — **99% smaller** than Clawdbot's 430k+ lines.
|
||||
|
||||
📏 Real-time line count: **3,663 lines** (run `bash core_agent_lines.sh` to verify anytime)
|
||||
📏 Real-time line count: **3,668 lines** (run `bash core_agent_lines.sh` to verify anytime)
|
||||
|
||||
## 📢 News
|
||||
|
||||
|
||||
@ -82,17 +82,20 @@ def _split_message(content: str, max_len: int = 4000) -> list[str]:
|
||||
"""Split content into chunks within max_len, preferring line breaks."""
|
||||
if len(content) <= max_len:
|
||||
return [content]
|
||||
chunks = []
|
||||
while len(content) > max_len:
|
||||
chunk = content[:max_len]
|
||||
break_pos = chunk.rfind('\n')
|
||||
if break_pos == -1:
|
||||
break_pos = chunk.rfind(' ')
|
||||
if break_pos == -1:
|
||||
break_pos = max_len
|
||||
chunks.append(chunk[:break_pos])
|
||||
content = content[break_pos:].lstrip()
|
||||
return chunks + [content]
|
||||
chunks: list[str] = []
|
||||
while content:
|
||||
if len(content) <= max_len:
|
||||
chunks.append(content)
|
||||
break
|
||||
cut = content[:max_len]
|
||||
pos = cut.rfind('\n')
|
||||
if pos == -1:
|
||||
pos = cut.rfind(' ')
|
||||
if pos == -1:
|
||||
pos = max_len
|
||||
chunks.append(content[:pos])
|
||||
content = content[pos:].lstrip()
|
||||
return chunks
|
||||
|
||||
|
||||
class TelegramChannel(BaseChannel):
|
||||
@ -211,7 +214,8 @@ class TelegramChannel(BaseChannel):
|
||||
|
||||
for chunk in _split_message(msg.content):
|
||||
try:
|
||||
await self._app.bot.send_message(chat_id=chat_id, text=_markdown_to_telegram_html(chunk), parse_mode="HTML")
|
||||
html = _markdown_to_telegram_html(chunk)
|
||||
await self._app.bot.send_message(chat_id=chat_id, text=html, parse_mode="HTML")
|
||||
except Exception as e:
|
||||
logger.warning(f"HTML parse failed, falling back to plain text: {e}")
|
||||
try:
|
||||
|
||||
@ -1,416 +0,0 @@
|
||||
"""Tests for Telegram channel implementation."""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
from nanobot.bus.events import OutboundMessage
|
||||
from nanobot.bus.queue import MessageBus
|
||||
from nanobot.channels.telegram import TelegramChannel, _markdown_to_telegram_html
|
||||
from nanobot.config.schema import TelegramConfig
|
||||
|
||||
|
||||
def _make_config() -> TelegramConfig:
|
||||
return TelegramConfig(
|
||||
enabled=True,
|
||||
token="fake-token",
|
||||
allow_from=[],
|
||||
proxy=None,
|
||||
)
|
||||
|
||||
|
||||
class TestMarkdownToTelegramHtml:
|
||||
"""Tests for markdown to Telegram HTML conversion."""
|
||||
|
||||
def test_empty_text(self) -> None:
|
||||
assert _markdown_to_telegram_html("") == ""
|
||||
|
||||
def test_plain_text_passthrough(self) -> None:
|
||||
text = "Hello world"
|
||||
assert _markdown_to_telegram_html(text) == "Hello world"
|
||||
|
||||
def test_bold_double_asterisks(self) -> None:
|
||||
text = "This is **bold** text"
|
||||
assert _markdown_to_telegram_html(text) == "This is <b>bold</b> text"
|
||||
|
||||
def test_bold_double_underscore(self) -> None:
|
||||
text = "This is __bold__ text"
|
||||
assert _markdown_to_telegram_html(text) == "This is <b>bold</b> text"
|
||||
|
||||
def test_italic_underscore(self) -> None:
|
||||
text = "This is _italic_ text"
|
||||
assert _markdown_to_telegram_html(text) == "This is <i>italic</i> text"
|
||||
|
||||
def test_italic_not_inside_words(self) -> None:
|
||||
text = "some_var_name"
|
||||
assert _markdown_to_telegram_html(text) == "some_var_name"
|
||||
|
||||
def test_strikethrough(self) -> None:
|
||||
text = "This is ~~deleted~~ text"
|
||||
assert _markdown_to_telegram_html(text) == "This is <s>deleted</s> text"
|
||||
|
||||
def test_inline_code(self) -> None:
|
||||
text = "Use `print()` function"
|
||||
result = _markdown_to_telegram_html(text)
|
||||
assert "<code>print()</code>" in result
|
||||
|
||||
def test_inline_code_escapes_html(self) -> None:
|
||||
text = "Use `<div>` tag"
|
||||
result = _markdown_to_telegram_html(text)
|
||||
assert "<code><div></code>" in result
|
||||
|
||||
def test_code_block(self) -> None:
|
||||
text = """Here is code:
|
||||
```python
|
||||
def hello():
|
||||
return "world"
|
||||
```
|
||||
Done.
|
||||
"""
|
||||
result = _markdown_to_telegram_html(text)
|
||||
assert "<pre><code>" in result
|
||||
assert "def hello():" in result
|
||||
assert "</code></pre>" in result
|
||||
|
||||
def test_code_block_escapes_html(self) -> None:
|
||||
text = """```
|
||||
<div>test</div>
|
||||
```"""
|
||||
result = _markdown_to_telegram_html(text)
|
||||
assert "<div>test</div>" in result
|
||||
|
||||
def test_headers_stripped(self) -> None:
|
||||
text = "# Header 1\n## Header 2\n### Header 3"
|
||||
result = _markdown_to_telegram_html(text)
|
||||
assert "# Header 1" not in result
|
||||
assert "Header 1" in result
|
||||
assert "Header 2" in result
|
||||
assert "Header 3" in result
|
||||
|
||||
def test_blockquotes_stripped(self) -> None:
|
||||
text = "> This is a quote\nMore text"
|
||||
result = _markdown_to_telegram_html(text)
|
||||
assert "> " not in result
|
||||
assert "This is a quote" in result
|
||||
|
||||
def test_links_converted(self) -> None:
|
||||
text = "Check [this link](https://example.com) out"
|
||||
result = _markdown_to_telegram_html(text)
|
||||
assert '<a href="https://example.com">this link</a>' in result
|
||||
|
||||
def test_bullet_list_converted(self) -> None:
|
||||
text = "- Item 1\n* Item 2"
|
||||
result = _markdown_to_telegram_html(text)
|
||||
assert "• Item 1" in result
|
||||
assert "• Item 2" in result
|
||||
|
||||
def test_html_special_chars_escaped(self) -> None:
|
||||
text = "5 < 10 and 10 > 5"
|
||||
result = _markdown_to_telegram_html(text)
|
||||
assert "5 < 10" in result
|
||||
assert "10 > 5" in result
|
||||
|
||||
def test_complex_nested_formatting(self) -> None:
|
||||
text = "**Bold _and italic_** and `code`"
|
||||
result = _markdown_to_telegram_html(text)
|
||||
assert "<b>Bold <i>and italic</i></b>" in result
|
||||
assert "<code>code</code>" in result
|
||||
|
||||
|
||||
class TestTelegramChannelSend:
|
||||
"""Tests for TelegramChannel.send() method."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_short_message_single_chunk(self, monkeypatch) -> None:
|
||||
"""Short messages are sent as a single message."""
|
||||
sent_messages = []
|
||||
|
||||
class FakeBot:
|
||||
async def send_message(self, chat_id, text, parse_mode=None):
|
||||
sent_messages.append({"chat_id": chat_id, "text": text, "parse_mode": parse_mode})
|
||||
|
||||
fake_app = MagicMock()
|
||||
fake_app.bot = FakeBot()
|
||||
|
||||
channel = TelegramChannel(_make_config(), MessageBus())
|
||||
channel._app = fake_app
|
||||
|
||||
await channel.send(OutboundMessage(
|
||||
channel="telegram",
|
||||
chat_id="123456",
|
||||
content="Hello world"
|
||||
))
|
||||
|
||||
assert len(sent_messages) == 1
|
||||
assert sent_messages[0]["chat_id"] == 123456
|
||||
assert "Hello world" in sent_messages[0]["text"]
|
||||
assert sent_messages[0]["parse_mode"] == "HTML"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_long_message_split_into_chunks(self, monkeypatch) -> None:
|
||||
"""Long messages exceeding 4000 chars are split."""
|
||||
sent_messages = []
|
||||
|
||||
class FakeBot:
|
||||
async def send_message(self, chat_id, text, parse_mode=None):
|
||||
sent_messages.append({"chat_id": chat_id, "text": text, "parse_mode": parse_mode})
|
||||
|
||||
fake_app = MagicMock()
|
||||
fake_app.bot = FakeBot()
|
||||
|
||||
channel = TelegramChannel(_make_config(), MessageBus())
|
||||
channel._app = fake_app
|
||||
|
||||
# Create a message longer than 4000 chars
|
||||
long_content = "A" * 1000 + "\n" + "B" * 1000 + "\n" + "C" * 1000 + "\n" + "D" * 1000 + "\n" + "E" * 1000
|
||||
|
||||
await channel.send(OutboundMessage(
|
||||
channel="telegram",
|
||||
chat_id="123456",
|
||||
content=long_content
|
||||
))
|
||||
|
||||
assert len(sent_messages) == 2 # Should be split into 2 messages
|
||||
assert all(m["chat_id"] == 123456 for m in sent_messages)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_splits_at_newline_when_possible(self, monkeypatch) -> None:
|
||||
"""Message splitting prefers newline boundaries."""
|
||||
sent_messages = []
|
||||
|
||||
class FakeBot:
|
||||
async def send_message(self, chat_id, text, parse_mode=None):
|
||||
sent_messages.append({"chat_id": chat_id, "text": text, "parse_mode": parse_mode})
|
||||
|
||||
fake_app = MagicMock()
|
||||
fake_app.bot = FakeBot()
|
||||
|
||||
channel = TelegramChannel(_make_config(), MessageBus())
|
||||
channel._app = fake_app
|
||||
|
||||
# Create content with clear paragraph breaks
|
||||
paragraphs = [f"Paragraph {i}: " + "x" * 100 for i in range(50)]
|
||||
content = "\n".join(paragraphs)
|
||||
|
||||
await channel.send(OutboundMessage(
|
||||
channel="telegram",
|
||||
chat_id="123456",
|
||||
content=content
|
||||
))
|
||||
|
||||
# Each chunk should end with a complete paragraph (no partial lines)
|
||||
for msg in sent_messages:
|
||||
# Message should not start with whitespace after stripping
|
||||
text = msg["text"]
|
||||
assert text == text.lstrip()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_falls_back_to_space_boundary(self, monkeypatch) -> None:
|
||||
"""When no newline available, split at space boundary."""
|
||||
sent_messages = []
|
||||
|
||||
class FakeBot:
|
||||
async def send_message(self, chat_id, text, parse_mode=None):
|
||||
sent_messages.append({"chat_id": chat_id, "text": text, "parse_mode": parse_mode})
|
||||
|
||||
fake_app = MagicMock()
|
||||
fake_app.bot = FakeBot()
|
||||
|
||||
channel = TelegramChannel(_make_config(), MessageBus())
|
||||
channel._app = fake_app
|
||||
|
||||
# Long content without newlines but with spaces
|
||||
content = "word " * 2000 # ~10000 chars
|
||||
|
||||
await channel.send(OutboundMessage(
|
||||
channel="telegram",
|
||||
chat_id="123456",
|
||||
content=content
|
||||
))
|
||||
|
||||
assert len(sent_messages) >= 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_forces_split_when_no_good_boundary(self, monkeypatch) -> None:
|
||||
"""When no newline or space, force split at max length."""
|
||||
sent_messages = []
|
||||
|
||||
class FakeBot:
|
||||
async def send_message(self, chat_id, text, parse_mode=None):
|
||||
sent_messages.append({"chat_id": chat_id, "text": text, "parse_mode": parse_mode})
|
||||
|
||||
fake_app = MagicMock()
|
||||
fake_app.bot = FakeBot()
|
||||
|
||||
channel = TelegramChannel(_make_config(), MessageBus())
|
||||
channel._app = fake_app
|
||||
|
||||
# Long content without any spaces or newlines
|
||||
content = "A" * 10000
|
||||
|
||||
await channel.send(OutboundMessage(
|
||||
channel="telegram",
|
||||
chat_id="123456",
|
||||
content=content
|
||||
))
|
||||
|
||||
assert len(sent_messages) >= 2
|
||||
# Verify all chunks combined equal original
|
||||
combined = "".join(m["text"] for m in sent_messages)
|
||||
assert combined == content
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_invalid_chat_id_logs_error(self, monkeypatch) -> None:
|
||||
"""Invalid chat_id should log error and not send."""
|
||||
sent_messages = []
|
||||
|
||||
class FakeBot:
|
||||
async def send_message(self, chat_id, text, parse_mode=None):
|
||||
sent_messages.append({"chat_id": chat_id, "text": text})
|
||||
|
||||
fake_app = MagicMock()
|
||||
fake_app.bot = FakeBot()
|
||||
|
||||
channel = TelegramChannel(_make_config(), MessageBus())
|
||||
channel._app = fake_app
|
||||
|
||||
await channel.send(OutboundMessage(
|
||||
channel="telegram",
|
||||
chat_id="not-a-number",
|
||||
content="Hello"
|
||||
))
|
||||
|
||||
assert len(sent_messages) == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_html_parse_error_falls_back_to_plain_text(self, monkeypatch) -> None:
|
||||
"""When HTML parsing fails, fall back to plain text."""
|
||||
sent_messages = []
|
||||
call_count = 0
|
||||
|
||||
class FakeBot:
|
||||
async def send_message(self, chat_id, text, parse_mode=None):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
if parse_mode == "HTML" and call_count == 1:
|
||||
raise Exception("Bad markup")
|
||||
sent_messages.append({"chat_id": chat_id, "text": text, "parse_mode": parse_mode})
|
||||
|
||||
fake_app = MagicMock()
|
||||
fake_app.bot = FakeBot()
|
||||
|
||||
channel = TelegramChannel(_make_config(), MessageBus())
|
||||
channel._app = fake_app
|
||||
|
||||
await channel.send(OutboundMessage(
|
||||
channel="telegram",
|
||||
chat_id="123456",
|
||||
content="Hello **world**"
|
||||
))
|
||||
|
||||
# Should have 2 calls: first HTML (fails), second plain text (succeeds)
|
||||
assert call_count == 2
|
||||
assert len(sent_messages) == 1
|
||||
assert sent_messages[0]["parse_mode"] is None # Plain text
|
||||
assert "Hello **world**" in sent_messages[0]["text"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_not_running_warns(self, monkeypatch) -> None:
|
||||
"""If bot not running, log warning."""
|
||||
warning_logged = []
|
||||
|
||||
def mock_warning(msg, *args):
|
||||
warning_logged.append(msg)
|
||||
|
||||
monkeypatch.setattr("nanobot.channels.telegram.logger", MagicMock(warning=mock_warning))
|
||||
|
||||
channel = TelegramChannel(_make_config(), MessageBus())
|
||||
channel._app = None # Not running
|
||||
|
||||
await channel.send(OutboundMessage(
|
||||
channel="telegram",
|
||||
chat_id="123456",
|
||||
content="Hello"
|
||||
))
|
||||
|
||||
assert any("not running" in str(m) for m in warning_logged)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_stops_typing_indicator(self, monkeypatch) -> None:
|
||||
"""Sending message should stop typing indicator."""
|
||||
stopped_chats = []
|
||||
|
||||
class FakeBot:
|
||||
async def send_message(self, chat_id, text, parse_mode=None):
|
||||
pass
|
||||
|
||||
fake_app = MagicMock()
|
||||
fake_app.bot = FakeBot()
|
||||
|
||||
channel = TelegramChannel(_make_config(), MessageBus())
|
||||
channel._app = fake_app
|
||||
channel._stop_typing = lambda chat_id: stopped_chats.append(chat_id)
|
||||
|
||||
await channel.send(OutboundMessage(
|
||||
channel="telegram",
|
||||
chat_id="123456",
|
||||
content="Hello"
|
||||
))
|
||||
|
||||
assert "123456" in stopped_chats
|
||||
|
||||
|
||||
class TestTelegramChannelTyping:
|
||||
"""Tests for typing indicator functionality."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_start_typing_creates_task(self) -> None:
|
||||
channel = TelegramChannel(_make_config(), MessageBus())
|
||||
|
||||
# Mock _typing_loop to avoid actual async execution
|
||||
channel._typing_loop = AsyncMock()
|
||||
|
||||
channel._start_typing("123456")
|
||||
|
||||
assert "123456" in channel._typing_tasks
|
||||
assert not channel._typing_tasks["123456"].done()
|
||||
|
||||
# Clean up
|
||||
channel._stop_typing("123456")
|
||||
|
||||
def test_stop_typing_cancels_task(self) -> None:
|
||||
channel = TelegramChannel(_make_config(), MessageBus())
|
||||
|
||||
# Create a mock task
|
||||
mock_task = MagicMock()
|
||||
mock_task.done.return_value = False
|
||||
channel._typing_tasks["123456"] = mock_task
|
||||
|
||||
channel._stop_typing("123456")
|
||||
|
||||
mock_task.cancel.assert_called_once()
|
||||
assert "123456" not in channel._typing_tasks
|
||||
|
||||
|
||||
class TestTelegramChannelMediaExtensions:
|
||||
"""Tests for media file extension detection."""
|
||||
|
||||
def test_get_extension_from_mime_type(self) -> None:
|
||||
channel = TelegramChannel(_make_config(), MessageBus())
|
||||
|
||||
assert channel._get_extension("image", "image/jpeg") == ".jpg"
|
||||
assert channel._get_extension("image", "image/png") == ".png"
|
||||
assert channel._get_extension("image", "image/gif") == ".gif"
|
||||
assert channel._get_extension("audio", "audio/ogg") == ".ogg"
|
||||
assert channel._get_extension("audio", "audio/mpeg") == ".mp3"
|
||||
|
||||
def test_get_extension_fallback_to_type(self) -> None:
|
||||
channel = TelegramChannel(_make_config(), MessageBus())
|
||||
|
||||
assert channel._get_extension("image", None) == ".jpg"
|
||||
assert channel._get_extension("voice", None) == ".ogg"
|
||||
assert channel._get_extension("audio", None) == ".mp3"
|
||||
|
||||
def test_get_extension_unknown_type(self) -> None:
|
||||
channel = TelegramChannel(_make_config(), MessageBus())
|
||||
|
||||
assert channel._get_extension("unknown", None) == ""
|
||||
Loading…
x
Reference in New Issue
Block a user