From dbe5543f28f21eab889984788b6fdf38c11baa43 Mon Sep 17 00:00:00 2001 From: Bohdan Bobrowski Date: Tue, 19 Nov 2024 08:43:28 +0100 Subject: [PATCH] Split integration tests to separate files and add test for salam pax's blog --- .../crawlers/article_factory/blogspot.py | 5 ++-- blog2epub/crawlers/article_factory/default.py | 8 ++++-- poetry.lock | 27 ++++++++++++++++++- pyproject.toml | 1 + 4 files changed, 36 insertions(+), 5 deletions(-) diff --git a/blog2epub/crawlers/article_factory/blogspot.py b/blog2epub/crawlers/article_factory/blogspot.py index 5be457b..a5e2ff3 100644 --- a/blog2epub/crawlers/article_factory/blogspot.py +++ b/blog2epub/crawlers/article_factory/blogspot.py @@ -9,8 +9,9 @@ class BlogspotArticleFactory(DefaultArticleFactory): def get_title(self) -> Optional[str]: title = super().get_title() - if self.blog_title is not None and title == self.blog_title: - title = "Pomidor" + date = self.get_date() + if self.blog_title is not None and title == self.blog_title and date is not None: + title = date.strftime("%A, %d %B %Y, %H:%M") return title def process(self) -> ArticleModel: diff --git a/blog2epub/crawlers/article_factory/default.py b/blog2epub/crawlers/article_factory/default.py index 3a32538..853f67f 100644 --- a/blog2epub/crawlers/article_factory/default.py +++ b/blog2epub/crawlers/article_factory/default.py @@ -22,9 +22,13 @@ def get_title(self) -> Optional[str]: if title_pattern.xpath: title = self.tree.xpath(title_pattern.xpath) if len(title) > 1: - title = title[0] - title = html.unescape(title.strip()) + title = html.unescape(title[0].strip()) break + while isinstance(title, list): + try: + title = title[0] + except IndexError: + title = None return title def get_date(self) -> Optional[datetime]: diff --git a/poetry.lock b/poetry.lock index 5578bea..c049f14 100644 --- a/poetry.lock +++ b/poetry.lock @@ -455,6 +455,20 @@ files = [ {file = "filetype-1.2.0.tar.gz", hash = "sha256:66b56cd6474bf41d8c54660347d37afcc3f7d1970648de365c102ef77548aadb"}, ] +[[package]] +name = "ftfy" +version = "6.3.1" +description = "Fixes mojibake and other problems with Unicode, after the fact" +optional = false +python-versions = ">=3.9" +files = [ + {file = "ftfy-6.3.1-py3-none-any.whl", hash = "sha256:7c70eb532015cd2f9adb53f101fb6c7945988d023a085d127d1573dc49dd0083"}, + {file = "ftfy-6.3.1.tar.gz", hash = "sha256:9b3c3d90f84fb267fe64d375a07b7f8912d817cf86009ae134aa03e1819506ec"}, +] + +[package.dependencies] +wcwidth = "*" + [[package]] name = "ghp-import" version = "2.1.0" @@ -2064,6 +2078,17 @@ files = [ [package.extras] watchmedo = ["PyYAML (>=3.10)"] +[[package]] +name = "wcwidth" +version = "0.2.13" +description = "Measures the displayed width of unicode strings in a terminal" +optional = false +python-versions = "*" +files = [ + {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, + {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"}, +] + [[package]] name = "webencodings" version = "0.5.1" @@ -2097,4 +2122,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = ">=3.10, <3.13" -content-hash = "db9b0522bfb0aa734b35dee3b6c6efcfcb46dfe46b2f53438e06f851102003e3" +content-hash = "5c0b244392da2860cfb67984837cd3df79c40a6eebb470b66d668d92ecffa9bc" diff --git a/pyproject.toml b/pyproject.toml index 0517642..d8c1c60 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ click = "^8.1.7" webencodings = "^0.5.1" soupsieve = "^2.6" pytz = "^2024.2" +ftfy = "^6.3.1" [tool.poetry.group.docs.dependencies] mkdocs = "^1.6.0"