diff --git a/.github/result.png b/.github/result.png new file mode 100644 index 0000000..1dd9d39 Binary files /dev/null and b/.github/result.png differ diff --git a/README.md b/README.md index 150da4e..8a973db 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,30 @@ or if you use `pdm`: pdm add telegramify-markdown ``` +## Supported Features + +- [x] Headings (1-6) +- [x] Links [text](url) +- [x] Images ![alt] +- [x] Lists (Ordered, Unordered) +- [x] Tables |-|-| +- [x] Horizontal Rule ---- +- [x] *Text* **Styles** +- [x] __Underline__ +- [x] Code Blocks +- [x] `Inline Code` +- [x] Block Quotes +- [x] ~~Strikethrough~~ +- [ ] Task Lists +- [ ] ~Strikethrough~ +- [ ] ||Spoiler|| +- [ ] Tg Emoji +- [ ] Tg User At + +> [!NOTE] +> Since mistletoe doesn't parse TODO and Spoiler, we can't apply it. +~Strikethrough~ is incorrect, even if it comes from official documentation, please use ~~Strikethrough~~ format. + ## Use case ````python3 @@ -34,17 +58,29 @@ from telegramify_markdown.customize import markdown_symbol markdown_symbol.head_level_1 = "📌" # If you want, Customizing the head level 1 symbol markdown_symbol.link = "🔗" # If you want, Customizing the link symbol md = """ -# 一级标题 `c!ode` # 一级标题 `code` -[Link!AA](https://www.example.com) - -[key!]: https://www.google.com "a title!" - -[这是!链接2][asd!asd](https://www.example.com) -[rttt]() -![PIC](https://www.example.com/image.jpg) -1. Order!ed - 1. Order!ed sub -- Unord*-.ered +'\_', '\*', '\[', '\]', '\(', '\)', '\~', '\`', '\>', '\#', '\+', '\-', '\=', '\|', '\{', '\}', '\.', '\!' +_ , * , [ , ] , ( , ) , ~ , ` , > , # , + , - , = , | , { , } , . , ! +**bold text** +*bold text* +_italic text_ +__underline__ +~no valid strikethrough~ +~~strikethrough~~ +||spoiler|| +*bold _italic bold ~~italic bold strikethrough ||italic bold strikethrough spoiler||~~ __underline italic bold___ bold* +__underline italic bold__ +[link](https://www.google.com) +- [ ] Uncompleted task list item +- [x] Completed task list item +> Quote +```python +print("Hello, World!") +``` +This is `inline code` +1. First ordered list item +2. Another item + - Unordered sub-list. +1. Actual numbers don't matter, just that it's a number """ converted = telegramify_markdown.convert(md) print(converted) @@ -52,19 +88,4 @@ print(converted) output as follows: -```markdown -*📌 一级标题 `c\!ode` \# 一级标题 `code`* -[Link\!AA](https://www\.example\.com) - -🔗[a title\!](https://www\.google\.com) - -\[这是\!链接2\][asd\!asd](https://www\.example\.com) -[rttt]() -🖼[PIC](https://www\.example\.com/image\.jpg) -1\. Order\!ed -1\. Order\!ed sub -⦁ Unord\*\-\.ered -``` - -> Note: Telegram Server automatically processes the double of `\`(`\\`) again (even after escaping), which is beyond the -> control of us. \ No newline at end of file +![.github/result.png](.github/result.png) diff --git a/pdm.lock b/pdm.lock index 16507b5..85008b3 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev"] strategy = ["cross_platform", "inherit_metadata"] lock_version = "4.4.1" -content_hash = "sha256:5dbe4a4926736575a5f5d2800d9a8b5a41ea23ecd7e12b28140ebe8fdf2c2524" +content_hash = "sha256:a0723c63fba5731ca1c0d4ee5fa63e03889d1356546d79c91be6a58ccad70961" [[package]] name = "certifi" diff --git a/playground/exp1.md b/playground/exp1.md index 52967d3..9278c4a 100644 --- a/playground/exp1.md +++ b/playground/exp1.md @@ -2,11 +2,15 @@ key: value --- -# 一级标题 `c!ode` # 一级标题 `code` +\(c!ode\) -## 二级标题 +\# Heading Level 1 `c!ode` -### 三级标题 +# Heading Level 1 `c!ode` + +## Heading Level 2 + +### Heading Level 3 Header ====== @@ -14,24 +18,24 @@ Header included as literal 1231asdasd -**这是粗!体文本** -*这是斜!体文本* -~~这是删!除线文本~~ +**Bold text** +*Italic text* +~~Strikethrough text~~ -> 这是引用!文本 +> Blockquote text -`这是内联!代码\\\\` +`Inline code` -\\\/\111`sad` +\\/\\111`sad` ``` -这是代码块! +Code block ``` ```python -# 这是带有语言指定的代码块 +# Code block with specified language print("Hello, World!") ``` @@ -42,29 +46,50 @@ print("Hello, World!") 1. numbered item -[key!]: https://www.google.com "a title!" - -

some text

+[some text](https://www.example.com) -[这是链!接](https://www.example.com) - -[这是!链接2][asd!asd](https://www.example.com) +[some text2][asd!asd](https://www.example.com) [rttt]() -[这是链接3][asdasd2] +[some text3][asdasd2] -![这是图片](https://www.example.com/image.jpg) +![Image](https://www.example.com/image.jpg) --- -这是水平线 - -内置的 **加粗** 和 *斜体* 文本 - -| 表头 | 表头 | -|-----|-----| -| 单元格 | 单元格 | -| 单元格 | 单元格 | - -- [ ] 这是未完成的任务列表项 -- [x] 这是已完成的任务列表项 +Horizontal Rule + +**Bold** and *Italic* text + +| Header | Header | +|--------|--------| +| Cell | Cell | +| Cell | Cell | + +- [ ] Uncompleted task list item +- [x] Completed task list item + +In all other places characters '_', '*', '[', ']', '(', ')', '~', '`', '>', '#', '+', '-', '=', '|', '{', '}', '.', '!' must be escaped with the preceding character '\'. +In all other places characters '\_', '\*', '\[', '\]', '\(', '\)', '\~', '\`', '\>', '\#', '\+', '\-', '\=', '\|', '\{', '\}', '\.', '\!' must be escaped with the preceding character '\'. + +*bold \*text* +_italic \*text_ +__underline__ +~strikethrough~ +||spoiler|| +*bold _italic bold ~italic bold strikethrough ||italic bold strikethrough spoiler||~ __underline italic bold___ bold* +[inline URL](http://www.example.com/) +[inline mention of a user](tg://user?id=123456789) +![👍](tg://emoji?id=5368324170671202286) +`inline fixed-width code` +``` +pre-formatted fixed-width code block +``` +```lua +pre-formatted fixed-width code block written in the Python programming language +``` +>Block quotation started +>Block quotation continued +>The last line of the block quotation** +>The second block quotation started right after the previous\r +>The third block quotation started right after the previous \ No newline at end of file diff --git a/playground/show_send.py b/playground/show_send.py new file mode 100644 index 0000000..e1cf9ac --- /dev/null +++ b/playground/show_send.py @@ -0,0 +1,43 @@ +import os + +from dotenv import load_dotenv +from telebot import TeleBot + +import telegramify_markdown + +md = """ +'\_', '\*', '\[', '\]', '\(', '\)', '\~', '\`', '\>', '\#', '\+', '\-', '\=', '\|', '\{', '\}', '\.', '\!' +_ , * , [ , ] , ( , ) , ~ , ` , > , # , + , - , = , | , { , } , . , ! +**bold text** +*bold text* +_italic text_ +__underline__ +~no valid strikethrough~ +~~strikethrough~~ +||spoiler|| +*bold _italic bold ~~italic bold strikethrough ||italic bold strikethrough spoiler||~~ __underline italic bold___ bold* +__underline italic bold__ +[link](https://www.google.com) +- [ ] Uncompleted task list item +- [x] Completed task list item +> Quote +```python +print("Hello, World!") +``` +This is `inline code` +1. First ordered list item +2. Another item + - Unordered sub-list. +1. Actual numbers don't matter, just that it's a number +""" +converted = telegramify_markdown.convert(md) +print(converted) +load_dotenv() +telegram_bot_token = os.getenv("TELEGRAM_BOT_TOKEN", None) +chat_id = os.getenv("TELEGRAM_CHAT_ID", None) +bot = TeleBot(telegram_bot_token) +bot.send_message( + chat_id, + converted, + parse_mode="MarkdownV2" +) diff --git a/playground/telegram_exp.py b/playground/telegram_exp.py index 908c0d9..9a11e7c 100644 --- a/playground/telegram_exp.py +++ b/playground/telegram_exp.py @@ -2,6 +2,7 @@ def ignore(a): + print(a) pass @@ -29,3 +30,5 @@ def ignore(a): """>Hello, World\!""" ignore(formatting.escape_markdown("Hello, World!")) """Hello, World\!""" +ignore(formatting.escape_markdown("\(Hello, World!)")) +"""Hello, World\!""" diff --git a/playground/use_case.py b/playground/use_case.py index 3cbab14..2d210c4 100644 --- a/playground/use_case.py +++ b/playground/use_case.py @@ -4,17 +4,8 @@ markdown_symbol.head_level_1 = "📌" # If you want, Customizing the head level 1 symbol markdown_symbol.link = "🔗" # If you want, Customizing the link symbol md = """ -# 一级标题 `c!ode` # 一级标题 `code` -[Link!AA](https://www.example.com) - -[key!]: https://www.google.com "a title!" - -[这是!链接2][asd!asd](https://www.example.com) -[rttt]() -![PIC](https://www.example.com/image.jpg) -1. Order!ed - 1. Order!ed sub -- Unord*-.ered +*bold _italic bold ~italic bold strikethrough ||italic bold strikethrough spoiler||~ __underline italic bold___ bold* +~strikethrough~ """ converted = telegramify_markdown.convert(md) print(converted) diff --git a/pyproject.toml b/pyproject.toml index 2d600e1..4e9e56a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,12 +1,12 @@ [project] name = "telegramify-markdown" -version = "0.1.2" +version = "0.1.3" description = "Convert Markdown to a format usable by Telegram." authors = [ { name = "sudoskys", email = "coldlando@hotmail.com" }, ] dependencies = [ - "mistletoe>=1.3.0", + "mistletoe==1.3.0", "pytelegrambotapi>=4.16.1", "emoji>=2.10.1", ] diff --git a/src/telegramify_markdown/__init__.py b/src/telegramify_markdown/__init__.py index 4664f1a..ffe7526 100644 --- a/src/telegramify_markdown/__init__.py +++ b/src/telegramify_markdown/__init__.py @@ -1,4 +1,3 @@ -import os from typing import Union import mistletoe @@ -10,16 +9,24 @@ from .render import TelegramMarkdownRenderer +def markdownify(text: str): + # '_', '*', '[', ']', '(', ')', '~', '`', '>', '#', '+', '-', '=', '|', '{', '}', '.', '!' + # if text in ["_", "*", "[", "]", "(", ")", "~", "`", ">", "#", "+", "-", "=", "|", "{", "}", ".", "!"]: + # return text + return formatting.escape_markdown(text) + + def _update_text(token: Union[SpanToken, BlockToken]): """Update the text contents of a span token and its children. `InlineCode` tokens are left unchanged.""" if isinstance(token, ThematicBreak): token.line = formatting.escape_markdown("————————") + pass elif isinstance(token, LinkReferenceDefinition): pass else: assert hasattr(token, "content"), f"Token {token} has no content attribute" - token.content = formatting.escape_markdown(token.content) + token.content = markdownify(token.content) def _update_block(token: BlockToken): diff --git a/src/telegramify_markdown/render.py b/src/telegramify_markdown/render.py index 26064fe..a27aa2e 100644 --- a/src/telegramify_markdown/render.py +++ b/src/telegramify_markdown/render.py @@ -3,6 +3,7 @@ from mistletoe import block_token, span_token from mistletoe.markdown_renderer import MarkdownRenderer, LinkReferenceDefinition, Fragment from telebot import formatting + from .customize import markdown_symbol @@ -64,11 +65,14 @@ def render_setext_heading( yield formatting.escape_markdown("——" * 5) def render_emphasis(self, token: span_token.Emphasis) -> Iterable[Fragment]: - token.delimiter = "_" return super().render_emphasis(token) def render_strong(self, token: span_token.Strong) -> Iterable[Fragment]: - return self.embed_span(Fragment(token.delimiter * 1), token.children) + # Telegram strong: *text* but __text__ for emphasis, so we need to check the delimiter + if token.delimiter == "*": + return self.embed_span(Fragment(token.delimiter * 1), token.children) + # __ + return self.embed_span(Fragment(token.delimiter * 2), token.children) def render_strikethrough( self, token: span_token.Strikethrough @@ -129,6 +133,13 @@ def render_link_or_image( def render_auto_link(self, token: span_token.AutoLink) -> Iterable[Fragment]: yield Fragment(formatting.escape_markdown("<") + token.children[0].content + formatting.escape_markdown(">")) + def render_escape_sequence( + self, token: span_token.EscapeSequence + ) -> Iterable[Fragment]: + # 渲染转义字符 + # because the escape_markdown already happened in the parser, we can skip it here. + yield Fragment("" + token.children[0].content) + def render_table( self, token: block_token.Table, max_line_length: int ) -> Iterable[str]: diff --git a/tests/exp1.md b/tests/exp1.md index e2be6fa..d789f3b 100644 --- a/tests/exp1.md +++ b/tests/exp1.md @@ -2,11 +2,15 @@ key: value --- -# 一级标题 `c!ode` # 一级标题 `code` +\(c!ode\) -## 二级标题 +\# Heading Level 1 `c!ode` -### 三级标题 +# Heading Level 1 `c!ode` + +## Heading Level 2 + +### Heading Level 3 Header ====== @@ -14,55 +18,83 @@ Header included as literal 1231asdasd -**这是粗!体文本** -*这是斜!体文本* -~~这是删!除线文本~~ +**Bold text** +*Italic text* +~~Strikethrough text~~ -> 这是引用!文本 +> Blockquote text -`这是内联!代码\\` +`Inline code` -\\\/\111`sad` +\\/\\111`sad` ``` -这是代码块! +Code block ``` ```python -# 这是带有语言指定的代码块 +# Code block with specified language print("Hello, World!") ``` +```print("Hello, Inline Muti!")``` + - item - nested item 1. numbered item -[key!]: https://www.google.com "a title!" - -

some text

+[some text](https://www.example.com) -[这是链!接](https://www.example.com) - -[这是!链接2][asd!asd](https://www.example.com) +[some text2][asd!asd](https://www.example.com) [rttt]() -[这是链接3][asdasd2] +[some text3][asdasd2] -![这是图片](https://www.example.com/image.jpg) +![Image](https://www.example.com/image.jpg) --- -这是水平线 +Horizontal Rule + +**Bold** and *Italic* text + +| Header | Header | +|--------|--------| +| Cell | Cell | +| Cell | Cell | + +- [ ] Uncompleted task list item +- [x] Completed task list item + +In all other places +characters '_', '*', '[', ']', '(', ')', '~', '`', '>', '#', '+', '-', '=', '|', '{', '}', '.', '!' must be escaped with the preceding character '\'. +In all other places characters '\_', '\*', '\[', '\]', '\(', '\)', '\~', '\`', '\>', '\#', '\+', '\-', '\=', '\|', '\{', '\}', '\.', '\!' +must be escaped with the preceding character '\'. + +*bold \*text* +_italic \*text_ +__underline__ +~strikethrough~ +||spoiler|| +*bold _italic bold ~italic bold strikethrough ||italic bold strikethrough spoiler||~ __underline italic bold___ bold* +[inline URL](http://www.example.com/) +[inline mention of a user](tg://user?id=123456789) +![👍](tg://emoji?id=5368324170671202286) +`inline fixed-width code` -内置的 **加粗** 和 *斜体* 文本 +``` +pre-formatted fixed-width code block +``` -| 表头 | 表头 | -|-----|-----| -| 单元格 | 单元格 | -| 单元格 | 单元格 | +```lua +pre-formatted fixed-width code block written in the Python programming language +``` -- [ ] 这是未完成的任务列表项 -- [x] 这是已完成的任务列表项 +> Block quotation started +> Block quotation continued +> The last line of the block quotation** +> The second block quotation started right after the previous\r +> The third block quotation started right after the previous \ No newline at end of file