refactor: simplify Slack markdown-to-mrkdwn conversion
This commit is contained in:
parent
3706903978
commit
e8e7215d3e
@ -204,126 +204,46 @@ class SlackChannel(BaseChannel):
|
|||||||
return text
|
return text
|
||||||
return re.sub(rf"<@{re.escape(self._bot_user_id)}>\s*", "", text).strip()
|
return re.sub(rf"<@{re.escape(self._bot_user_id)}>\s*", "", text).strip()
|
||||||
|
|
||||||
|
# Markdown → Slack mrkdwn formatting rules (order matters: longest markers first)
|
||||||
|
_MD_TO_SLACK = (
|
||||||
|
(r'(?m)(^|[^\*])\*\*\*(.+?)\*\*\*([^\*]|$)', r'\1*_\2_*\3'), # ***bold italic***
|
||||||
|
(r'(?m)(^|[^_])___(.+?)___([^_]|$)', r'\1*_\2_*\3'), # ___bold italic___
|
||||||
|
(r'(?m)(^|[^\*])\*\*(.+?)\*\*([^\*]|$)', r'\1*\2*\3'), # **bold**
|
||||||
|
(r'(?m)(^|[^_])__(.+?)__([^_]|$)', r'\1*\2*\3'), # __bold__
|
||||||
|
(r'(?m)(^|[^\*])\*(.+?)\*([^\*]|$)', r'\1_\2_\3'), # *italic*
|
||||||
|
(r'(?m)(^|[^~])~~(.+?)~~([^~]|$)', r'\1~\2~\3'), # ~~strike~~
|
||||||
|
(r'(?m)(^|[^!])\[(.+?)\]\((http.+?)\)', r'\1<\3|\2>'), # [text](url)
|
||||||
|
(r'!\[.+?\]\((http.+?)(?:\s".*?")?\)', r'<\1>'), # 
|
||||||
|
)
|
||||||
|
_TABLE_RE = re.compile(r'(?m)^\|.*?\|$(?:\n(?:\|\:?-{3,}\:?)*?\|$)(?:\n\|.*?\|$)*')
|
||||||
|
|
||||||
def _convert_markdown(self, text: str) -> str:
|
def _convert_markdown(self, text: str) -> str:
|
||||||
|
"""Convert standard Markdown to Slack mrkdwn format."""
|
||||||
if not text:
|
if not text:
|
||||||
return text
|
return text
|
||||||
def convert_formatting(input: str) -> str:
|
for pattern, repl in self._MD_TO_SLACK:
|
||||||
# Convert italics
|
text = re.sub(pattern, repl, text)
|
||||||
# Step 1: *text* -> _text_
|
return self._TABLE_RE.sub(self._convert_table, text)
|
||||||
converted_text = re.sub(
|
|
||||||
r"(?m)(^|[^\*])\*([^\*].+?[^\*])\*([^\*]|$)", r"\1_\2_\3", input)
|
|
||||||
# Convert bold
|
|
||||||
# Step 2.a: **text** -> *text*
|
|
||||||
converted_text = re.sub(
|
|
||||||
r"(?m)(^|[^\*])\*\*([^\*].+?[^\*])\*\*([^\*]|$)", r"\1*\2*\3", converted_text)
|
|
||||||
# Step 2.b: __text__ -> *text*
|
|
||||||
converted_text = re.sub(
|
|
||||||
r"(?m)(^|[^_])__([^_].+?[^_])__([^_]|$)", r"\1*\2*\3", converted_text)
|
|
||||||
# convert bold italics
|
|
||||||
# Step 3.a: ***text*** -> *_text_*
|
|
||||||
converted_text = re.sub(
|
|
||||||
r"(?m)(^|[^\*])\*\*\*([^\*].+?[^\*])\*\*\*([^\*]|$)", r"\1*_\2_*\3", converted_text)
|
|
||||||
# Step 3.b - ___text___ -> *_text_*
|
|
||||||
converted_text = re.sub(
|
|
||||||
r"(?m)(^|[^_])___([^_].+?[^_])___([^_]|$)", r"\1*_\2_*\3", converted_text)
|
|
||||||
# Convert strikethrough
|
|
||||||
# Step 4: ~~text~~ -> ~text~
|
|
||||||
converted_text = re.sub(
|
|
||||||
r"(?m)(^|[^~])~~([^~].+?[^~])~~([^~]|$)", r"\1~\2~\3", converted_text)
|
|
||||||
# Convert URL formatting
|
|
||||||
# Step 6: [text](URL) -> <URL|text>
|
|
||||||
converted_text = re.sub(
|
|
||||||
r"(?m)(^|[^!])\[(.+?)\]\((http.+?)\)", r"\1<\3|\2>", converted_text)
|
|
||||||
# Convert image URL
|
|
||||||
# Step 6:  -> <URL>
|
|
||||||
converted_text = re.sub(
|
|
||||||
r"[!]\[.+?\]\((http.+?)(?: \".*?\")?\)", r"<\1>", converted_text)
|
|
||||||
return converted_text
|
|
||||||
def escape_mrkdwn(text: str) -> str:
|
|
||||||
return (text.replace('&', '&')
|
|
||||||
.replace('<', '<')
|
|
||||||
.replace('>', '>'))
|
|
||||||
def convert_table(match: re.Match) -> str:
|
|
||||||
# Slack doesn't support Markdown tables
|
|
||||||
# Convert table to bulleted list with sections
|
|
||||||
# -- input_md:
|
|
||||||
# Some text before the table.
|
|
||||||
# | Col1 | Col2 | Col3 |
|
|
||||||
# |-----|----------|------|
|
|
||||||
# | Row1 - A | Row1 - B | Row1 - C |
|
|
||||||
# | Row2 - D | Row2 - E | Row2 - F |
|
|
||||||
#
|
|
||||||
# Some text after the table.
|
|
||||||
#
|
|
||||||
# -- will be converted to:
|
|
||||||
# Some text before the table.
|
|
||||||
# > *Col1* : Row1 - A
|
|
||||||
# • *Col2*: Row1 - B
|
|
||||||
# • *Col3*: Row1 - C
|
|
||||||
# > *Col1* : Row2 - D
|
|
||||||
# • *Col2*: Row2 - E
|
|
||||||
# • *Col3*: Row2 - F
|
|
||||||
#
|
|
||||||
# Some text after the table.
|
|
||||||
|
|
||||||
block = match.group(0).strip()
|
|
||||||
lines = [line.strip()
|
|
||||||
for line in block.split('\n') if line.strip()]
|
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _convert_table(match: re.Match) -> str:
|
||||||
|
"""Convert Markdown table to Slack quote + bullet format."""
|
||||||
|
lines = [l.strip() for l in match.group(0).strip().split('\n') if l.strip()]
|
||||||
if len(lines) < 2:
|
if len(lines) < 2:
|
||||||
return block
|
return match.group(0)
|
||||||
|
|
||||||
# 1. Parse Headers from the first line
|
headers = [h.strip() for h in lines[0].strip('|').split('|')]
|
||||||
# Split by pipe, filtering out empty start/end strings caused by outer pipes
|
start = 2 if not re.search(r'[^|\-\s:]', lines[1]) else 1
|
||||||
header_line = lines[0].strip('|')
|
|
||||||
headers = [escape_mrkdwn(h.strip())
|
|
||||||
for h in header_line.split('|')]
|
|
||||||
|
|
||||||
# 2. Identify Data Start (Skip Separator)
|
result: list[str] = []
|
||||||
data_start_idx = 1
|
for line in lines[start:]:
|
||||||
# If line 2 contains only separator chars (|-: ), skip it
|
cells = [c.strip() for c in line.strip('|').split('|')]
|
||||||
if len(lines) > 1 and not re.search(r'[^|\-\s:]', lines[1]):
|
cells = (cells + [''] * len(headers))[:len(headers)]
|
||||||
data_start_idx = 2
|
|
||||||
|
|
||||||
# 3. Process Data Rows
|
|
||||||
slack_lines = []
|
|
||||||
for line in lines[data_start_idx:]:
|
|
||||||
# Clean and split cells
|
|
||||||
clean_line = line.strip('|')
|
|
||||||
cells = [escape_mrkdwn(c.strip())
|
|
||||||
for c in clean_line.split('|')]
|
|
||||||
|
|
||||||
# Normalize cell count to match headers
|
|
||||||
if len(cells) < len(headers):
|
|
||||||
cells += [''] * (len(headers) - len(cells))
|
|
||||||
cells = cells[:len(headers)]
|
|
||||||
|
|
||||||
# Skip empty rows
|
|
||||||
if not any(cells):
|
if not any(cells):
|
||||||
continue
|
continue
|
||||||
|
result.append(f"> *{headers[0]}*: {cells[0] or '--'}")
|
||||||
# Key is the first column
|
|
||||||
key = cells[0]
|
|
||||||
label = headers[0]
|
|
||||||
slack_lines.append(
|
|
||||||
f"> *{label}* : {key}" if key else "> *{label}* : --")
|
|
||||||
|
|
||||||
# Sub-bullets for remaining columns
|
|
||||||
for i, cell in enumerate(cells[1:], 1):
|
for i, cell in enumerate(cells[1:], 1):
|
||||||
if cell:
|
if cell and i < len(headers):
|
||||||
label = headers[i] if i < len(headers) else "Col"
|
result.append(f" \u2022 *{headers[i]}*: {cell}")
|
||||||
slack_lines.append(f" • *{label}*: {cell}")
|
result.append("")
|
||||||
|
return '\n'.join(result).rstrip()
|
||||||
slack_lines.append("") # Spacer between items
|
|
||||||
|
|
||||||
return "\n".join(slack_lines).rstrip()
|
|
||||||
|
|
||||||
# (?m) : Multiline mode so ^ matches start of line and $ end of line
|
|
||||||
# ^\| : Start of line and a literal pipe
|
|
||||||
# .*?\|$ : Rest of the line and a pipe at the end
|
|
||||||
# (?:\n(?:\|\:?-{3,}\:?)*?\|$) : A heading line with at least three dashes in each column, pipes, and : e.g. |:---|----|:---:|
|
|
||||||
# (?:\n\|.*?\|$)* : Zero or more subsequent lines that ALSO start and end with a pipe
|
|
||||||
table_pattern = r'(?m)^\|.*?\|$(?:\n(?:\|\:?-{3,}\:?)*?\|$)(?:\n\|.*?\|$)*'
|
|
||||||
|
|
||||||
input_md = convert_formatting(text)
|
|
||||||
return re.sub(table_pattern, convert_table, input_md)
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user