diff --git a/README.md b/README.md index 04491fd..32a88c1 100755 --- a/README.md +++ b/README.md @@ -159,6 +159,8 @@ Example: - [X] image download and attachment bug solved (ex. modernistyczny-poznan.blogspot.com) - [X] improved resistance to http errors - [X] dedicated crawler class for zeissikonveb.de +- [X] (on GUI) skip value is enlarged on limiet value (if such is set) +- [X] download progress is much more verbose, also on GUI it can be cancelled everytime [» Complete Change Log here «](https://github.com/bohdanbobrowski/blog2epub/blob/master/CHANGELOG.md) diff --git a/blog2epub/blog2epub_gui.py b/blog2epub/blog2epub_gui.py index ffb27b6..dbc99b3 100644 --- a/blog2epub/blog2epub_gui.py +++ b/blog2epub/blog2epub_gui.py @@ -421,6 +421,15 @@ def console_clear(self): def console_delete_last_line(self): self.console.text = "\n".join(self.console.text.split("\n")[:-1]) + @mainthread + def _update_skip_value(self): + if self.blog2epub_settings.data.limit and int(self.blog2epub_settings.data.limit) > 0: + skip = int(self.blog2epub_settings.data.limit) + if self.blog2epub_settings.data.skip and int(self.blog2epub_settings.data.skip) > 0: + skip += int(self.blog2epub_settings.data.skip) + self.skip_entry.text = str(skip) + self.save_settings() + def _get_url(self): if urllib.parse.urlparse(self.url_entry.text): port, self.url_entry.text = prepare_port_and_url(self.url_entry.text) @@ -440,6 +449,7 @@ def _download_ebook(self, blog2epub: Blog2Epub): self._update_tab_generate() if not blog2epub.crawler.cancelled: self.interface.print("Download completed.") + self._update_skip_value() if platform != "android": notification.notify( title="blog2epub - download completed", diff --git a/blog2epub/crawlers/default.py b/blog2epub/crawlers/default.py index c3270e4..5b3ea44 100644 --- a/blog2epub/crawlers/default.py +++ b/blog2epub/crawlers/default.py @@ -223,6 +223,7 @@ def _break_the_loop(self): return False def _get_sitemap_url(self) -> str: + self.interface.print("Analysing sitemaps", end="") robots_parser = robotparser.RobotFileParser() robots_parser.set_url(urljoin(self.url, "/robots.txt")) robots_parser.read() @@ -296,6 +297,8 @@ def _get_pages_urls(self, sitemap_url: str) -> list[str] | None: sitemap = requests.get(sitemap_url) pages = None if sitemap.status_code == 404: + self.interface.print("") + self.interface.print("Sitemap not found!") pages = self._get_pages_from_blog_archive_widget() if sitemap.status_code == 200: sitemap_pages = [] @@ -310,7 +313,9 @@ def _get_pages_urls(self, sitemap_url: str) -> list[str] | None: or re.search("wp-sitemap-posts-(post|page)-[0-9]+.xml$", sub_sitemap) or re.search("(post|page)-sitemap[0-9-]*.xml$", sub_sitemap) ): + self.interface.print(".", end="") pages += self._get_pages_from_sub_sitemap(sub_sitemap) + self.interface.print("") if pages is not None: self.interface.print(f"Found {len(pages)} articles to crawl.") try: