Skip to content

Commit

Permalink
Download progress
Browse files Browse the repository at this point in the history
  • Loading branch information
bohdanbobrowski committed Nov 13, 2024
1 parent 46c875a commit 4743371
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 13 deletions.
24 changes: 16 additions & 8 deletions blog2epub/blog2epub_gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def __init__(self, **kwargs):
self._define_tab_about()
self.tabs.add_widget(self.tab_about)

self.interface = KivyInterface(self.console_output, self.console_clear)
self.interface = KivyInterface(self.console_output, self.console_clear, self.console_delete_last_line)
# DEBUG:
# self.interface.print(self.blog2epub_settings.data.dict())
# self.interface.print(str(pydantic.version.version_info()))
Expand Down Expand Up @@ -411,13 +411,17 @@ def _validate_email(self, input_widget, text):
input_widget.error = True

@mainthread
def console_output(self, text: str):
self.console.text = self.console.text + str(text) + "\n"
def console_output(self, text: str, end: str = "\n"):
self.console.text = self.console.text + str(text) + end

@mainthread
def console_clear(self):
self.console.text = ""

@mainthread
def console_delete_last_line(self):
self.console.text = "\n".join(self.console.text.split("\n")[:-1])

def _get_url(self):
if urllib.parse.urlparse(self.url_entry.text):
port, self.url_entry.text = prepare_port_and_url(self.url_entry.text)
Expand Down Expand Up @@ -546,7 +550,6 @@ def cancel_download(self, *args, **kwargs):

@mainthread
def _disable_download_button(self):
self.interface.print("Downloading...")
self.download_button_container.remove_widget(self.download_button)
self.download_button_container.add_widget(self.cancel_button)

Expand Down Expand Up @@ -641,13 +644,18 @@ def success(self, ebook: Book):


class KivyInterface(EmptyInterface):
def __init__(self, console_output, console_clear):
def __init__(self, console_output, console_clear, console_delete_last_line):
self.console_output = console_output
self.console_clear = console_clear
self.console_delete_last_line = console_delete_last_line

def print(self, text: str, end: str = "\n"):
if len(text) > 1:
logging.info(text)
self.console_output(text, end)

def print(self, text: str):
logging.info(text)
self.console_output(text)
def delete_line(self):
self.console_delete_last_line()

def exception(self, e):
logging.error("Exception: " + str(e))
Expand Down
6 changes: 5 additions & 1 deletion blog2epub/common/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ def download_image(self, image_obj: ImageModel) -> bool:
image_obj.url = self._fix_image_url(image_obj.url)
img_hash = self.get_urlhash(image_obj.url)
img_type = os.path.splitext(image_obj.url)[1].lower()
img_type = img_type.split("?")[0]
if img_type not in [".jpeg", ".jpg", ".png", ".bmp", ".gif", ".webp"]:
return False
original_fn = os.path.join(self.dirs.originals, img_hash + "." + img_type)
Expand All @@ -179,6 +180,9 @@ def download_image(self, image_obj: ImageModel) -> bool:
picture.thumbnail(self.images_size, Image.LANCZOS) # type: ignore
converted_picture = picture.convert("L")
converted_picture.save(resized_fn, format="JPEG", quality=self.images_quality)
os.remove(original_fn)
try:
os.remove(original_fn)
except PermissionError:
pass
return True
return False
10 changes: 8 additions & 2 deletions blog2epub/common/interfaces.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
import sys


class EmptyInterface:
"""Empty interface for script output."""

def print(self, text: str):
print(text)
def delete_line(self):
sys.stdout.write("\033[K")

def print(self, text: str, end: str = "\n"):
print(text, end=end)

def exception(self, **kwargs):
print(kwargs)
4 changes: 4 additions & 0 deletions blog2epub/crawlers/article_factory/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def _remove_images(self, images_html: list[bytes], images_list: list[ImageModel]
def get_images(self) -> list[ImageModel]:
self.images_list = []
images_html: list[bytes] = []
self.interface.print("Downloading", end="")
if self.patterns is not None:
for pattern in self.patterns.images:
if pattern.regex:
Expand All @@ -80,9 +81,12 @@ def get_images(self) -> list[ImageModel]:
image_obj = ImageModel(url=image_url, description=image_description)
if self.downloader.download_image(image_obj):
self.images_list.append(image_obj)
self.interface.print(".", end="")
images_html.append(tostring(image_element))
self._remove_images(images_html=images_html, images_list=self.images_list)
# images will be inserted back after cleaning the content
self.interface.delete_line()
self.interface.print("")
return self.images_list

def _insert_images(self, article_content: str, images_list: list[ImageModel]) -> str:
Expand Down
6 changes: 4 additions & 2 deletions blog2epub/crawlers/zeissikonveb.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,15 @@ def __init__(self, **kwargs):
self.patterns.content_cleanup = [
Pattern(xpath='//div[@data-kind="MENU"]'),
Pattern(regex=r"background-color:rgba(255,255,255,1);color:rgba(156,156,156,1);"),
Pattern(regex=r"font-size:[0-9]+px\;"),
Pattern(regex=r"line-height:[0-9]+\;"),
Pattern(regex=r"font-size:[\s0-9]+px\;"),
Pattern(regex=r"line-height:[\s0-9]+\;"),
Pattern(regex=r"font-family:Arial, Helvetica, sans-serif"),
Pattern(regex=r"<p/>"),
]
self.patterns.date = [
Pattern(regex=r"letzte Änderung[\s:]*([0-9]{1,2})[\.\s]*([A-Za-z]+)[\.\s]*([0-9]{4})"),
Pattern(regex=r"letzte Aktualisierung[\s:]*([0-9]{1,2})[\.\s]*([A-Za-z]+)[\.\s]*([0-9]{4})"),
Pattern(regex=r"last update[\s:]*([0-9]{1,2})[\.\s]*([A-Za-z]+)[\.\s]*([0-9]{4})"),
]
self.patterns.images = [
Pattern(xpath='//div[@data-kind="IMAGE"]//img'),
Expand Down

0 comments on commit 4743371

Please sign in to comment.