From 0643dbf0964fc20d174fbf0d4023c83d12189239 Mon Sep 17 00:00:00 2001 From: Bohdan Bobrowski Date: Wed, 13 Nov 2024 21:24:27 +0100 Subject: [PATCH] Download canceling improvement --- blog2epub/blog2epub_cli.py | 4 ++-- blog2epub/crawlers/article_factory/abstract.py | 2 +- blog2epub/crawlers/default.py | 2 +- blog2epub/crawlers/zeissikonveb.py | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/blog2epub/blog2epub_cli.py b/blog2epub/blog2epub_cli.py index 07ce583..a5f115b 100755 --- a/blog2epub/blog2epub_cli.py +++ b/blog2epub/blog2epub_cli.py @@ -4,8 +4,8 @@ class CliInterface(EmptyInterface): - def print(self, text: str): - print(text) + def print(self, text: str, end: str = "\n"): + print(text, end=end) def exception(self, e): print(e) diff --git a/blog2epub/crawlers/article_factory/abstract.py b/blog2epub/crawlers/article_factory/abstract.py index 92eb20c..3aa0f54 100644 --- a/blog2epub/crawlers/article_factory/abstract.py +++ b/blog2epub/crawlers/article_factory/abstract.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Optional, Callable +from typing import Callable, Optional from lxml.html.soupparser import fromstring diff --git a/blog2epub/crawlers/default.py b/blog2epub/crawlers/default.py index 4de798c..1d609c3 100644 --- a/blog2epub/crawlers/default.py +++ b/blog2epub/crawlers/default.py @@ -269,7 +269,7 @@ def crawl(self): dirs=self.dirs, language=self.language, downloader=self.downloader, - download_callback=self._break_the_loop + download_callback=self._break_the_loop, ) art = art_factory.process() self.images = self.images + art.images diff --git a/blog2epub/crawlers/zeissikonveb.py b/blog2epub/crawlers/zeissikonveb.py index 2dc1069..46a461b 100644 --- a/blog2epub/crawlers/zeissikonveb.py +++ b/blog2epub/crawlers/zeissikonveb.py @@ -22,7 +22,7 @@ def get_title(self) -> Optional[str]: class ZeissIkonVEBCrawler(DefaultCrawler): - """TODO: https://zeissikonveb.de""" + """https://zeissikonveb.de""" def __init__(self, **kwargs): super().__init__(**kwargs) @@ -36,7 +36,7 @@ def __init__(self, **kwargs): Pattern(regex=r"font-size:[\s0-9]+px\;"), Pattern(regex=r"line-height:[\s0-9]+\;"), Pattern(regex=r"font-family:Arial, Helvetica, sans-serif"), - Pattern(regex=r""), + Pattern(regex=r'


<\/span><\/p>'), ] self.patterns.date = [ Pattern(regex=r"letzte Ă„nderung[\s:]*([0-9]{1,2})[\.\s]*([A-Za-z]+)[\.\s]*([0-9]{4})"),