Skip to content

Commit

Permalink
making line break in some rare cases
Browse files Browse the repository at this point in the history
  • Loading branch information
mercuree committed Oct 23, 2020
1 parent c28566d commit 2443922
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 3 deletions.
5 changes: 4 additions & 1 deletion html_telegraph_poster/html_to_telegraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,10 @@ def preprocess_fragments(fragments):
body = fragments[0].getparent()

# remove para inside blockquote/aside/figure (telegraph removes it anyway) and replace with line-break
paras_inside_quote = body.xpath('.//*[self::blockquote|self::aside|self::figure]//p[text()][following-sibling::*[text()]]')
paras_inside_quote = body.xpath(
'.//*[self::blockquote|self::aside|self::figure]//'
'p[descendant-or-self::*[text()]][following-sibling::*[text()]]'
)
for para in paras_inside_quote:
para.tail = '\n'

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
long_description = fh.read()

setup(name='html_telegraph_poster',
version='0.2.21',
version='0.2.22',
description='Posts your html to telegra.ph blogging service',
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down
10 changes: 9 additions & 1 deletion tests/test_htp.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,6 @@ def test_br_tags(self):
)

def test_iframe(self):
# multiple br tags should be replaced with one line break
html = '<iframe src="//www.youtube.com/embed/abcdef">legacy text</iframe>'
iframe_empty_src = '<iframe src=""></iframe>'
iframe_no_src = '<iframe></iframe>'
Expand Down Expand Up @@ -474,9 +473,14 @@ def test_blockquote(self):
def test_bad_para(self):
html = '<aside><p>text inside para</p><p>another para</p></aside>'
html2 = '<figure><figcaption><p>text inside para</p><p>another para</p></figcaption></figure>'
figcaption_para_with_link = '<figure><figcaption>' \
'<p><a href="https://telegram.org/">Telegram</a></p><p>Text after link</p>' \
'</figcaption></figure>'
html3 = '<blockquote><p></p><p>second para</p></blockquote>'
html4 = '<blockquote><p>first para</p><strong><em></em></strong><em>em text</em></blockquote>'
html5 = '<blockquote><p>first para</p><em></em><strong></strong><em></em></blockquote>'
# TODO: write html6 test
html6 = '''<blockquote class="cut">\n<p>text inside</p>\n</blockquote>'''
self.assertJson(
[{'children': ['text inside para\nanother para'], 'tag': 'aside'}],
convert_html_to_telegraph_format(html, clean_html=True)
Expand All @@ -497,6 +501,10 @@ def test_bad_para(self):
[{'children': [u'first para'], 'tag': 'blockquote'}],
convert_html_to_telegraph_format(html5, clean_html=True)
)
self.assertJson(
[{"tag": "figure", "children": [{"tag": "figcaption", "children": ["Telegram\nText after link"]}]}],
convert_html_to_telegraph_format(figcaption_para_with_link, clean_html=True)
)

def test_convert_without_clean(self):
# multiple br tags should be replaced with one line break
Expand Down

0 comments on commit 2443922

Please sign in to comment.