Merge pull request #1275 from dipu-bd/dev

Version 2.29.7
dipu-bd · Feb 13, 2022 · 04143af · 04143af
2 parents 5cd79d2 + 6eefea1
commit 04143af
Show file tree

Hide file tree

Showing 6 changed files with 107 additions and 31 deletions.
diff --git a/README.md b/README.md
@@ -27,6 +27,7 @@ An app to download novels from online sources and generate e-books.
   - [Installation](#installation)
     - [Standalone Bundle (Windows, Linux)](#standalone-bundle-windows-linux)
     - [PIP (Windows, Mac, and Linux)](#pip-windows-mac-and-linux)
+    - [Docker](#docker)
     - [Termux (Android)](#termux-android)
     - [Chatbots](#chatbots)
       - [Discord](#discord)
@@ -75,48 +76,74 @@ Without it, you will only get output in epub, text, and web formats.
 To install this app or to update installed one via `pip`, just run:
 
 ```bash
-$ pip install --user -U lightnovel-crawler
+$ pip install -U lightnovel-crawler
 ```
 
 In some cases you have to use `python3 -m pip` or `pip3` or `python -m pip`. And you do not need `--user` option, if you are running from root.
 
 Next, open your terminal and enter:
 
-```bash
-$ lightnovel-crawler
-
-# Or, a shortcut:
+```
 $ lncrawl
 ```
 
 > To view extra logs, use: `lncrawl -lll`
 
+### Docker
+
+Docker is a convenient way to run it anywhere.
+
+- First clone the project.
+
+```
+$ git clone https://github.com/dipu-bd/lightnovel-crawler
+```
+
+- Build docker:
+
+```
+$ cd lightnovel-crawler
+$ docker build -t lncrawl -f ./scripts/Dockerfile .
+```
+
+- Run commands using docker:
+
+```
+$ mkdir ~/Lightnovels
+$ docker run -v ~/Lightnovels:/app/Lightnovels -it lncrawl
+```
+
+> You can setup _alias_ to the above command in your terminal's profile to run using single a single-word command.
+
 ### Termux (Android)
 
-> There is no official support to run python in mobile devices.
-> It is not guaranteed that the app will run smoothly in all devices.
-> It is recommended to use the bots on either Discord or Telegram if you are on mobile.
+> Please read before proceeding:
+> - It is not guaranteed that the app will run smoothly in all devices.
+> - It may take a long time to install depending on your mobile processor.
+> - It is recommended to use the bots on either Discord or Telegram if you are on mobile.
 
 📱 Using Termux, you can run this app in your android phones too. Follow this instructions:
 
 - Install [Termux](https://play.google.com/store/apps/details?id=com.termux) from playstore.
 - Open the app and run these commands one by one:
-  - `apt update && apt upgrade`
+  - `pkg upgrade`
+  - `pkg install python libxml2 libxslt libjpeg-turbo`
+  - `pip install -U pip wheel setuptools`
+  - `pip install lightnovel-crawler`
   - `termux-setup-storage`
-  - `pkg install ndk-sysroot make python zlib clang`
-  - `pkg install libxml2 libxslt libiconv libcrypt libffi zlib libjpeg-turbo`
-  - `pip install -U lightnovel-crawler` to install the latest version of this app.
-- Now exit the console and relaunch it.
-- Type `cd ~/storage/downloads` to store novels there.
-- Type `lncrawl` to start.
-- You can navigate up using <kbd>Volume UP</kbd> + <kbd>W</kbd> and down using <kbd>Volume UP</kbd> + <kbd>S</kbd>.
+  - `cd ~/storage/downloads`
+  - `lncrawl`
+- You can navigate up using <kbd>Vol UP</kbd> + <kbd>W</kbd> and down using <kbd>Vol UP</kbd> + <kbd>S</kbd>.
 
 When there is a new update available, you can install it just by running `pip install -U lightnovel-crawler`. You will not have to run all the above commands again.
 
 **PyDroid**
 
 You can also use PyDroid in Android phones. Check this discussion for a custom script to run the app: https://github.com/dipu-bd/lightnovel-crawler/discussions/1137
 
+<!-- TODO -->
+<!-- ### Google Colab -->
+
 ### Chatbots
 
 #### Discord
@@ -150,7 +177,7 @@ $ git clone https://github.com/dipu-bd/lightnovel-crawler
 - Open command prompt inside of the project folder and install requirements:
 
 ```bash
-$ pip install --user -r requirements.txt
+$ pip install -r requirements.txt
 ```
 
 - Run the program (use python v3.6 or higher):
@@ -176,7 +203,7 @@ $ git clone https://github.com/dipu-bd/lightnovel-crawler
 - Install requirements
 
 ```bash
-$ pip3 install --user -r requirements.txt
+$ pip3 install -r requirements.txt
 ```
 
 - Copy `.env.example` file to `.env` file. Edit this file and give your API credentials here.
@@ -209,7 +236,7 @@ $ lncrawl -h
 ┃┃╱╭╋┫╭╮┃╭╮┃┃┃╭╮┫╭╮┃╰╯┃┃━┫┃╱┃┃╱╭┫╭┫╭╮┃╰╯╰╯┃┃┃┃━┫╭╯
 ┃╰━╯┃┃╰╯┃┃┃┃╰┫┃┃┃╰╯┣╮╭┫┃━┫╰╮┃╰━╯┃┃┃╭╮┣╮╭╮╭┫╰┫┃━┫┃
 ╰━━━┻┻━╮┣╯╰┻━┻╯╰┻━━╯╰╯╰━━┻━╯╰━━━┻╯╰╯╰╯╰╯╰╯╰━┻━━┻╯
-╱╱╱╱╱╭━╯┃ v2.29.6
+╱╱╱╱╱╭━╯┃ v2.29.7
 ╱╱╱╱╱╰━━╯ 🔗 https://github.com/dipu-bd/lightnovel-crawler
 --------------------------------------------------------------------------------
 usage: lncrawl [options...]

diff --git a/lncrawl/VERSION b/lncrawl/VERSION
@@ -1 +1 @@
-2.29.6
+2.29.7
diff --git a/lncrawl/bots/console/output_style.py b/lncrawl/bots/console/output_style.py
@@ -75,8 +75,8 @@ def force_replace_old(self):
             'name': 'replace',
             'message': 'What to do with existing folder?',
             'choices': [
-                'Remove old folder and start fresh',
                 'Download remaining chapters only',
+                'Remove old folder and start fresh'                
             ],
         },
     ])

diff --git a/lncrawl/core/app.py b/lncrawl/core/app.py
@@ -23,16 +23,16 @@ class App:
     '''Bots are based on top of an instance of this app'''
 
     def __init__(self):
-        self.progress = 0
-        self.user_input = None
+        self.progress: float = 0
+        self.user_input: Optional[str] = None
         self.crawler_links: List[str] = []
         self.crawler: Optional[Crawler] = None
         self.login_data: Optional[Tuple[str, str]] = None
         self.search_results: List[Dict[str, Any]] = []
         self.output_path = C.DEFAULT_OUTPUT_PATH
         self.pack_by_volume = False
         self.chapters: List[Dict[str, Any]] = []
-        self.book_cover = None
+        self.book_cover: Optional[str] = None
         self.output_formats: Dict[str, bool] = {}
         self.archived_outputs = None
         self.good_file_name: str = ''

diff --git a/lncrawl/core/downloader.py b/lncrawl/core/downloader.py
@@ -10,6 +10,7 @@
 import time
 from io import BytesIO
 
+import bs4
 from PIL import Image
 from tqdm import tqdm
 
@@ -19,7 +20,10 @@
 logger = logging.getLogger(__name__)
 
 def download_image(app, url) -> Image.Image:
-    '''Download image'''
+    from .app import App
+    assert isinstance(app, App)
+    assert app.crawler is not None
+
     assert isinstance(url, str)
     if len(url) > 1000 or url.startswith('data:'):
         content = base64.b64decode(url.split('base64,')[-1])
@@ -30,6 +34,10 @@ def download_image(app, url) -> Image.Image:
 # end def
 
 def download_cover(app):
+    from .app import App
+    assert isinstance(app, App)
+    assert app.crawler is not None
+
     filename = None
     filename = os.path.join(app.output_path, 'cover.jpg')
     if os.path.exists(filename):
@@ -67,6 +75,10 @@ def download_cover(app):
 
 
 def download_chapter_body(app, chapter):
+    from .app import App
+    assert isinstance(app, App)
+    assert app.crawler is not None
+
     result = None
     chapter['body'] = read_chapter_body(app, chapter)
 
@@ -103,6 +115,9 @@ def download_chapter_body(app, chapter):
 
 
 def get_chapter_filename(app, chapter):
+    from .app import App
+    assert isinstance(app, App)
+
     dir_name = os.path.join(app.output_path, 'json')
     if app.pack_by_volume:
         vol_name = 'Volume ' + str(chapter['volume']).rjust(2, '0')
@@ -115,6 +130,9 @@ def get_chapter_filename(app, chapter):
 
 
 def read_chapter_body(app, chapter):
+    from .app import App
+    assert isinstance(app, App)
+
     file_name = get_chapter_filename(app, chapter)
 
     chapter['body'] = ''
@@ -131,6 +149,9 @@ def read_chapter_body(app, chapter):
 
 
 def save_chapter_body(app, chapter):
+    from .app import App
+    assert isinstance(app, App)
+
     file_name = get_chapter_filename(app, chapter)
 
     title = chapter['title'].replace('>', '&gt;').replace('<', '&lt;')
@@ -149,6 +170,9 @@ def save_chapter_body(app, chapter):
 
 
 def download_content_image(app, url, filename):
+    from .app import App
+    assert isinstance(app, App)
+
     image_folder = os.path.join(app.output_path, 'images')
     image_file = os.path.join(image_folder, filename)
     try:
@@ -174,6 +198,10 @@ def download_content_image(app, url, filename):
 # end def
 
 def download_chapters(app):
+    from .app import App
+    assert isinstance(app, App)
+    assert app.crawler is not None
+
     app.progress = 0
     bar = tqdm(desc='Downloading', total=len(app.chapters), unit='ch')
     if os.getenv('debug_mode') == 'yes':
@@ -209,6 +237,10 @@ def download_chapters(app):
 
 
 def download_chapter_images(app):
+    from .app import App
+    assert isinstance(app, App)
+    assert app.crawler is not None
+
     app.progress = 0
 
     # download or generate cover
@@ -226,8 +258,12 @@ def download_chapter_images(app):
 
         soup = app.crawler.make_soup(chapter['body'])
         for img in soup.select('img'):
+            if not isinstance(img, bs4.Tag) or not img.has_attr('src'):
+                continue
+            # end if
+
             full_url = app.crawler.absolute_url(img['src'], page_url=chapter['url'])
-            filename = hashlib.md5(img['src'].encode()).hexdigest() + '.jpg'
+            filename = hashlib.md5(str(img['src']).encode()).hexdigest() + '.jpg'
             future = app.crawler.executor.submit(download_content_image, app, full_url, filename)
             futures_to_check.setdefault(chapter['id'], [])
             futures_to_check[chapter['id']].append(future)
@@ -252,14 +288,25 @@ def download_chapter_images(app):
 
         images = []
         for future in futures_to_check[chapter['id']]:
-            images.append(future.result())
-            bar.update()
+            try:
+                images.append(future.result())
+            except KeyboardInterrupt as ex:
+                raise LNException('Cancelled by user')
+            except Exception as ex:
+                logger.warn('Failed to download image: %s', str(ex))
+            finally:
+                bar.update()
+            # end try
         # end for
         logger.debug(images)
 
         soup = app.crawler.make_soup(chapter['body'])
         for img in soup.select('img'):
-            filename = hashlib.md5(img['src'].encode()).hexdigest() + '.jpg'
+            if not isinstance(img, bs4.Tag) or not img.has_attr('src'):
+                img.extract()
+                continue
+            # end if
+            filename = hashlib.md5(str(img['src']).encode()).hexdigest() + '.jpg'
             if filename in images:
                 img.attrs = {'src': 'images/%s' % filename, 'alt': filename}
                 # img['style'] = 'float: left; margin: 15px; width: 100%;'
@@ -268,7 +315,9 @@ def download_chapter_images(app):
             # end if
         # end for
 
-        chapter['body'] = ''.join([str(x) for x in soup.select_one('body').contents])
+        soup_body = soup.select_one('body')
+        assert isinstance(soup_body, bs4.Tag)
+        chapter['body'] = ''.join([str(x) for x in soup_body.contents])
         save_chapter_body(app, chapter)
     # end for
 

diff --git a/sources/_index.json b/sources/_index.json