Skip to content

Commit

Permalink
Android fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
bohdanbobrowski committed Nov 14, 2024
1 parent 68a2c0e commit cf294e3
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions blog2epub/crawlers/article_factory/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
from datetime import datetime
from typing import Optional

import bs4
import dateutil
import html5lib
from lxml.etree import tostring
from lxml.html.soupparser import fromstring
from strip_tags import strip_tags # type: ignore
Expand Down Expand Up @@ -125,6 +127,11 @@ def get_content(self) -> str:
content_html = re.sub(r'<a name=["\']more["\']/>', "", content_html)
content_html = re.sub(r"<div[^>]*>", "<p>", content_html)
content_html = content_html.replace("</div>", "")
self.interface.print(f"html5lib {html5lib.__version__}")
self.interface.print(f"BeautifulSoup {bs4.__version__}")
soup = bs4.BeautifulSoup(content_html, "html5lib", multi_valued_attributes=False)
if soup:
pass
content = strip_tags(
content_html,
minify=True,
Expand Down

0 comments on commit cf294e3

Please sign in to comment.