From d888e51d1c4b5d728b99116154ef4f021d7366bf Mon Sep 17 00:00:00 2001 From: Re-bin Date: Sun, 1 Feb 2026 18:35:27 +0000 Subject: [PATCH] feat(telegram): markdown support --- nanobot/channels/telegram.py | 80 ++++++++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 3 deletions(-) diff --git a/nanobot/channels/telegram.py b/nanobot/channels/telegram.py index 78a7e98..840c250 100644 --- a/nanobot/channels/telegram.py +++ b/nanobot/channels/telegram.py @@ -1,7 +1,7 @@ """Telegram channel implementation using python-telegram-bot.""" import asyncio -from typing import Any +import re from loguru import logger from telegram import Update @@ -13,6 +13,69 @@ from nanobot.channels.base import BaseChannel from nanobot.config.schema import TelegramConfig +def _markdown_to_telegram_html(text: str) -> str: + """ + Convert markdown to Telegram-safe HTML. + """ + if not text: + return "" + + # 1. Extract and protect code blocks (preserve content from other processing) + code_blocks: list[str] = [] + def save_code_block(m: re.Match) -> str: + code_blocks.append(m.group(1)) + return f"\x00CB{len(code_blocks) - 1}\x00" + + text = re.sub(r'```[\w]*\n?([\s\S]*?)```', save_code_block, text) + + # 2. Extract and protect inline code + inline_codes: list[str] = [] + def save_inline_code(m: re.Match) -> str: + inline_codes.append(m.group(1)) + return f"\x00IC{len(inline_codes) - 1}\x00" + + text = re.sub(r'`([^`]+)`', save_inline_code, text) + + # 3. Headers # Title -> just the title text + text = re.sub(r'^#{1,6}\s+(.+)$', r'\1', text, flags=re.MULTILINE) + + # 4. Blockquotes > text -> just the text (before HTML escaping) + text = re.sub(r'^>\s*(.*)$', r'\1', text, flags=re.MULTILINE) + + # 5. Escape HTML special characters + text = text.replace("&", "&").replace("<", "<").replace(">", ">") + + # 6. Links [text](url) - must be before bold/italic to handle nested cases + text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'\1', text) + + # 7. Bold **text** or __text__ + text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) + text = re.sub(r'__(.+?)__', r'\1', text) + + # 8. Italic _text_ (avoid matching inside words like some_var_name) + text = re.sub(r'(?\1', text) + + # 9. Strikethrough ~~text~~ + text = re.sub(r'~~(.+?)~~', r'\1', text) + + # 10. Bullet lists - item -> • item + text = re.sub(r'^[-*]\s+', '• ', text, flags=re.MULTILINE) + + # 11. Restore inline code with HTML tags + for i, code in enumerate(inline_codes): + # Escape HTML in code content + escaped = code.replace("&", "&").replace("<", "<").replace(">", ">") + text = text.replace(f"\x00IC{i}\x00", f"{escaped}") + + # 12. Restore code blocks with HTML tags + for i, code in enumerate(code_blocks): + # Escape HTML in code content + escaped = code.replace("&", "&").replace("<", "<").replace(">", ">") + text = text.replace(f"\x00CB{i}\x00", f"
{escaped}
") + + return text + + class TelegramChannel(BaseChannel): """ Telegram channel using long polling. @@ -96,14 +159,25 @@ class TelegramChannel(BaseChannel): try: # chat_id should be the Telegram chat ID (integer) chat_id = int(msg.chat_id) + # Convert markdown to Telegram HTML + html_content = _markdown_to_telegram_html(msg.content) await self._app.bot.send_message( chat_id=chat_id, - text=msg.content + text=html_content, + parse_mode="HTML" ) except ValueError: logger.error(f"Invalid chat_id: {msg.chat_id}") except Exception as e: - logger.error(f"Error sending Telegram message: {e}") + # Fallback to plain text if HTML parsing fails + logger.warning(f"HTML parse failed, falling back to plain text: {e}") + try: + await self._app.bot.send_message( + chat_id=int(msg.chat_id), + text=msg.content + ) + except Exception as e2: + logger.error(f"Error sending Telegram message: {e2}") async def _on_start(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: """Handle /start command."""