diff --git a/.github/result.png b/.github/result.png
new file mode 100644
index 0000000..1dd9d39
Binary files /dev/null and b/.github/result.png differ
diff --git a/README.md b/README.md
index 150da4e..8a973db 100644
--- a/README.md
+++ b/README.md
@@ -25,6 +25,30 @@ or if you use `pdm`:
pdm add telegramify-markdown
```
+## Supported Features
+
+- [x] Headings (1-6)
+- [x] Links [text](url)
+- [x] Images ![alt]
+- [x] Lists (Ordered, Unordered)
+- [x] Tables |-|-|
+- [x] Horizontal Rule ----
+- [x] *Text* **Styles**
+- [x] __Underline__
+- [x] Code Blocks
+- [x] `Inline Code`
+- [x] Block Quotes
+- [x] ~~Strikethrough~~
+- [ ] Task Lists
+- [ ] ~Strikethrough~
+- [ ] ||Spoiler||
+- [ ] Tg Emoji
+- [ ] Tg User At
+
+> [!NOTE]
+> Since mistletoe doesn't parse TODO and Spoiler, we can't apply it.
+~Strikethrough~ is incorrect, even if it comes from official documentation, please use ~~Strikethrough~~ format.
+
## Use case
````python3
@@ -34,17 +58,29 @@ from telegramify_markdown.customize import markdown_symbol
markdown_symbol.head_level_1 = "📌" # If you want, Customizing the head level 1 symbol
markdown_symbol.link = "🔗" # If you want, Customizing the link symbol
md = """
-# 一级标题 `c!ode` # 一级标题 `code`
-[Link!AA](https://www.example.com)
-
-[key!]: https://www.google.com "a title!"
-
-[这是!链接2][asd!asd](https://www.example.com)
-[rttt]()
-![PIC](https://www.example.com/image.jpg)
-1. Order!ed
- 1. Order!ed sub
-- Unord*-.ered
+'\_', '\*', '\[', '\]', '\(', '\)', '\~', '\`', '\>', '\#', '\+', '\-', '\=', '\|', '\{', '\}', '\.', '\!'
+_ , * , [ , ] , ( , ) , ~ , ` , > , # , + , - , = , | , { , } , . , !
+**bold text**
+*bold text*
+_italic text_
+__underline__
+~no valid strikethrough~
+~~strikethrough~~
+||spoiler||
+*bold _italic bold ~~italic bold strikethrough ||italic bold strikethrough spoiler||~~ __underline italic bold___ bold*
+__underline italic bold__
+[link](https://www.google.com)
+- [ ] Uncompleted task list item
+- [x] Completed task list item
+> Quote
+```python
+print("Hello, World!")
+```
+This is `inline code`
+1. First ordered list item
+2. Another item
+ - Unordered sub-list.
+1. Actual numbers don't matter, just that it's a number
"""
converted = telegramify_markdown.convert(md)
print(converted)
@@ -52,19 +88,4 @@ print(converted)
output as follows:
-```markdown
-*📌 一级标题 `c\!ode` \# 一级标题 `code`*
-[Link\!AA](https://www\.example\.com)
-
-🔗[a title\!](https://www\.google\.com)
-
-\[这是\!链接2\][asd\!asd](https://www\.example\.com)
-[rttt]()
-🖼[PIC](https://www\.example\.com/image\.jpg)
-1\. Order\!ed
-1\. Order\!ed sub
-⦁ Unord\*\-\.ered
-```
-
-> Note: Telegram Server automatically processes the double of `\`(`\\`) again (even after escaping), which is beyond the
-> control of us.
\ No newline at end of file
+![.github/result.png](.github/result.png)
diff --git a/pdm.lock b/pdm.lock
index 16507b5..85008b3 100644
--- a/pdm.lock
+++ b/pdm.lock
@@ -5,7 +5,7 @@
groups = ["default", "dev"]
strategy = ["cross_platform", "inherit_metadata"]
lock_version = "4.4.1"
-content_hash = "sha256:5dbe4a4926736575a5f5d2800d9a8b5a41ea23ecd7e12b28140ebe8fdf2c2524"
+content_hash = "sha256:a0723c63fba5731ca1c0d4ee5fa63e03889d1356546d79c91be6a58ccad70961"
[[package]]
name = "certifi"
diff --git a/playground/exp1.md b/playground/exp1.md
index 52967d3..9278c4a 100644
--- a/playground/exp1.md
+++ b/playground/exp1.md
@@ -2,11 +2,15 @@
key: value
---
-# 一级标题 `c!ode` # 一级标题 `code`
+\(c!ode\)
-## 二级标题
+\# Heading Level 1 `c!ode`
-### 三级标题
+# Heading Level 1 `c!ode`
+
+## Heading Level 2
+
+### Heading Level 3
Header
======
@@ -14,24 +18,24 @@ Header
included as literal
1231asdasd
-**这是粗!体文本**
-*这是斜!体文本*
-~~这是删!除线文本~~
+**Bold text**
+*Italic text*
+~~Strikethrough text~~
-> 这是引用!文本
+> Blockquote text
-`这是内联!代码\\\\`
+`Inline code`
-\\\/\111`sad`
+\\/\\111`sad`
```
-这是代码块!
+Code block
```
```python
-# 这是带有语言指定的代码块
+# Code block with specified language
print("Hello, World!")
```
@@ -42,29 +46,50 @@ print("Hello, World!")
1. numbered item
-[key!]: https://www.google.com "a title!"
-
-
some text
+[some text](https://www.example.com)
-[这是链!接](https://www.example.com)
-
-[这是!链接2][asd!asd](https://www.example.com)
+[some text2][asd!asd](https://www.example.com)
[rttt]()
-[这是链接3][asdasd2]
+[some text3][asdasd2]
-![这是图片](https://www.example.com/image.jpg)
+![Image](https://www.example.com/image.jpg)
---
-这是水平线
-
-内置的 **加粗** 和 *斜体* 文本
-
-| 表头 | 表头 |
-|-----|-----|
-| 单元格 | 单元格 |
-| 单元格 | 单元格 |
-
-- [ ] 这是未完成的任务列表项
-- [x] 这是已完成的任务列表项
+Horizontal Rule
+
+**Bold** and *Italic* text
+
+| Header | Header |
+|--------|--------|
+| Cell | Cell |
+| Cell | Cell |
+
+- [ ] Uncompleted task list item
+- [x] Completed task list item
+
+In all other places characters '_', '*', '[', ']', '(', ')', '~', '`', '>', '#', '+', '-', '=', '|', '{', '}', '.', '!' must be escaped with the preceding character '\'.
+In all other places characters '\_', '\*', '\[', '\]', '\(', '\)', '\~', '\`', '\>', '\#', '\+', '\-', '\=', '\|', '\{', '\}', '\.', '\!' must be escaped with the preceding character '\'.
+
+*bold \*text*
+_italic \*text_
+__underline__
+~strikethrough~
+||spoiler||
+*bold _italic bold ~italic bold strikethrough ||italic bold strikethrough spoiler||~ __underline italic bold___ bold*
+[inline URL](http://www.example.com/)
+[inline mention of a user](tg://user?id=123456789)
+![👍](tg://emoji?id=5368324170671202286)
+`inline fixed-width code`
+```
+pre-formatted fixed-width code block
+```
+```lua
+pre-formatted fixed-width code block written in the Python programming language
+```
+>Block quotation started
+>Block quotation continued
+>The last line of the block quotation**
+>The second block quotation started right after the previous\r
+>The third block quotation started right after the previous
\ No newline at end of file
diff --git a/playground/show_send.py b/playground/show_send.py
new file mode 100644
index 0000000..e1cf9ac
--- /dev/null
+++ b/playground/show_send.py
@@ -0,0 +1,43 @@
+import os
+
+from dotenv import load_dotenv
+from telebot import TeleBot
+
+import telegramify_markdown
+
+md = """
+'\_', '\*', '\[', '\]', '\(', '\)', '\~', '\`', '\>', '\#', '\+', '\-', '\=', '\|', '\{', '\}', '\.', '\!'
+_ , * , [ , ] , ( , ) , ~ , ` , > , # , + , - , = , | , { , } , . , !
+**bold text**
+*bold text*
+_italic text_
+__underline__
+~no valid strikethrough~
+~~strikethrough~~
+||spoiler||
+*bold _italic bold ~~italic bold strikethrough ||italic bold strikethrough spoiler||~~ __underline italic bold___ bold*
+__underline italic bold__
+[link](https://www.google.com)
+- [ ] Uncompleted task list item
+- [x] Completed task list item
+> Quote
+```python
+print("Hello, World!")
+```
+This is `inline code`
+1. First ordered list item
+2. Another item
+ - Unordered sub-list.
+1. Actual numbers don't matter, just that it's a number
+"""
+converted = telegramify_markdown.convert(md)
+print(converted)
+load_dotenv()
+telegram_bot_token = os.getenv("TELEGRAM_BOT_TOKEN", None)
+chat_id = os.getenv("TELEGRAM_CHAT_ID", None)
+bot = TeleBot(telegram_bot_token)
+bot.send_message(
+ chat_id,
+ converted,
+ parse_mode="MarkdownV2"
+)
diff --git a/playground/telegram_exp.py b/playground/telegram_exp.py
index 908c0d9..9a11e7c 100644
--- a/playground/telegram_exp.py
+++ b/playground/telegram_exp.py
@@ -2,6 +2,7 @@
def ignore(a):
+ print(a)
pass
@@ -29,3 +30,5 @@ def ignore(a):
""">Hello, World\!"""
ignore(formatting.escape_markdown("Hello, World!"))
"""Hello, World\!"""
+ignore(formatting.escape_markdown("\(Hello, World!)"))
+"""Hello, World\!"""
diff --git a/playground/use_case.py b/playground/use_case.py
index 3cbab14..2d210c4 100644
--- a/playground/use_case.py
+++ b/playground/use_case.py
@@ -4,17 +4,8 @@
markdown_symbol.head_level_1 = "📌" # If you want, Customizing the head level 1 symbol
markdown_symbol.link = "🔗" # If you want, Customizing the link symbol
md = """
-# 一级标题 `c!ode` # 一级标题 `code`
-[Link!AA](https://www.example.com)
-
-[key!]: https://www.google.com "a title!"
-
-[这是!链接2][asd!asd](https://www.example.com)
-[rttt]()
-![PIC](https://www.example.com/image.jpg)
-1. Order!ed
- 1. Order!ed sub
-- Unord*-.ered
+*bold _italic bold ~italic bold strikethrough ||italic bold strikethrough spoiler||~ __underline italic bold___ bold*
+~strikethrough~
"""
converted = telegramify_markdown.convert(md)
print(converted)
diff --git a/pyproject.toml b/pyproject.toml
index 2d600e1..4e9e56a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,12 +1,12 @@
[project]
name = "telegramify-markdown"
-version = "0.1.2"
+version = "0.1.3"
description = "Convert Markdown to a format usable by Telegram."
authors = [
{ name = "sudoskys", email = "coldlando@hotmail.com" },
]
dependencies = [
- "mistletoe>=1.3.0",
+ "mistletoe==1.3.0",
"pytelegrambotapi>=4.16.1",
"emoji>=2.10.1",
]
diff --git a/src/telegramify_markdown/__init__.py b/src/telegramify_markdown/__init__.py
index 4664f1a..ffe7526 100644
--- a/src/telegramify_markdown/__init__.py
+++ b/src/telegramify_markdown/__init__.py
@@ -1,4 +1,3 @@
-import os
from typing import Union
import mistletoe
@@ -10,16 +9,24 @@
from .render import TelegramMarkdownRenderer
+def markdownify(text: str):
+ # '_', '*', '[', ']', '(', ')', '~', '`', '>', '#', '+', '-', '=', '|', '{', '}', '.', '!'
+ # if text in ["_", "*", "[", "]", "(", ")", "~", "`", ">", "#", "+", "-", "=", "|", "{", "}", ".", "!"]:
+ # return text
+ return formatting.escape_markdown(text)
+
+
def _update_text(token: Union[SpanToken, BlockToken]):
"""Update the text contents of a span token and its children.
`InlineCode` tokens are left unchanged."""
if isinstance(token, ThematicBreak):
token.line = formatting.escape_markdown("————————")
+ pass
elif isinstance(token, LinkReferenceDefinition):
pass
else:
assert hasattr(token, "content"), f"Token {token} has no content attribute"
- token.content = formatting.escape_markdown(token.content)
+ token.content = markdownify(token.content)
def _update_block(token: BlockToken):
diff --git a/src/telegramify_markdown/render.py b/src/telegramify_markdown/render.py
index 26064fe..a27aa2e 100644
--- a/src/telegramify_markdown/render.py
+++ b/src/telegramify_markdown/render.py
@@ -3,6 +3,7 @@
from mistletoe import block_token, span_token
from mistletoe.markdown_renderer import MarkdownRenderer, LinkReferenceDefinition, Fragment
from telebot import formatting
+
from .customize import markdown_symbol
@@ -64,11 +65,14 @@ def render_setext_heading(
yield formatting.escape_markdown("——" * 5)
def render_emphasis(self, token: span_token.Emphasis) -> Iterable[Fragment]:
- token.delimiter = "_"
return super().render_emphasis(token)
def render_strong(self, token: span_token.Strong) -> Iterable[Fragment]:
- return self.embed_span(Fragment(token.delimiter * 1), token.children)
+ # Telegram strong: *text* but __text__ for emphasis, so we need to check the delimiter
+ if token.delimiter == "*":
+ return self.embed_span(Fragment(token.delimiter * 1), token.children)
+ # __
+ return self.embed_span(Fragment(token.delimiter * 2), token.children)
def render_strikethrough(
self, token: span_token.Strikethrough
@@ -129,6 +133,13 @@ def render_link_or_image(
def render_auto_link(self, token: span_token.AutoLink) -> Iterable[Fragment]:
yield Fragment(formatting.escape_markdown("<") + token.children[0].content + formatting.escape_markdown(">"))
+ def render_escape_sequence(
+ self, token: span_token.EscapeSequence
+ ) -> Iterable[Fragment]:
+ # 渲染转义字符
+ # because the escape_markdown already happened in the parser, we can skip it here.
+ yield Fragment("" + token.children[0].content)
+
def render_table(
self, token: block_token.Table, max_line_length: int
) -> Iterable[str]:
diff --git a/tests/exp1.md b/tests/exp1.md
index e2be6fa..d789f3b 100644
--- a/tests/exp1.md
+++ b/tests/exp1.md
@@ -2,11 +2,15 @@
key: value
---
-# 一级标题 `c!ode` # 一级标题 `code`
+\(c!ode\)
-## 二级标题
+\# Heading Level 1 `c!ode`
-### 三级标题
+# Heading Level 1 `c!ode`
+
+## Heading Level 2
+
+### Heading Level 3
Header
======
@@ -14,55 +18,83 @@ Header
included as literal
1231asdasd
-**这是粗!体文本**
-*这是斜!体文本*
-~~这是删!除线文本~~
+**Bold text**
+*Italic text*
+~~Strikethrough text~~
-> 这是引用!文本
+> Blockquote text
-`这是内联!代码\\`
+`Inline code`
-\\\/\111`sad`
+\\/\\111`sad`
```
-这是代码块!
+Code block
```
```python
-# 这是带有语言指定的代码块
+# Code block with specified language
print("Hello, World!")
```
+```print("Hello, Inline Muti!")```
+
- item
- nested item
1. numbered item
-[key!]: https://www.google.com "a title!"
-
-some text
+[some text](https://www.example.com)
-[这是链!接](https://www.example.com)
-
-[这是!链接2][asd!asd](https://www.example.com)
+[some text2][asd!asd](https://www.example.com)
[rttt]()
-[这是链接3][asdasd2]
+[some text3][asdasd2]
-![这是图片](https://www.example.com/image.jpg)
+![Image](https://www.example.com/image.jpg)
---
-这是水平线
+Horizontal Rule
+
+**Bold** and *Italic* text
+
+| Header | Header |
+|--------|--------|
+| Cell | Cell |
+| Cell | Cell |
+
+- [ ] Uncompleted task list item
+- [x] Completed task list item
+
+In all other places
+characters '_', '*', '[', ']', '(', ')', '~', '`', '>', '#', '+', '-', '=', '|', '{', '}', '.', '!' must be escaped with the preceding character '\'.
+In all other places characters '\_', '\*', '\[', '\]', '\(', '\)', '\~', '\`', '\>', '\#', '\+', '\-', '\=', '\|', '\{', '\}', '\.', '\!'
+must be escaped with the preceding character '\'.
+
+*bold \*text*
+_italic \*text_
+__underline__
+~strikethrough~
+||spoiler||
+*bold _italic bold ~italic bold strikethrough ||italic bold strikethrough spoiler||~ __underline italic bold___ bold*
+[inline URL](http://www.example.com/)
+[inline mention of a user](tg://user?id=123456789)
+![👍](tg://emoji?id=5368324170671202286)
+`inline fixed-width code`
-内置的 **加粗** 和 *斜体* 文本
+```
+pre-formatted fixed-width code block
+```
-| 表头 | 表头 |
-|-----|-----|
-| 单元格 | 单元格 |
-| 单元格 | 单元格 |
+```lua
+pre-formatted fixed-width code block written in the Python programming language
+```
-- [ ] 这是未完成的任务列表项
-- [x] 这是已完成的任务列表项
+> Block quotation started
+> Block quotation continued
+> The last line of the block quotation**
+> The second block quotation started right after the previous\r
+> The third block quotation started right after the previous
\ No newline at end of file