From 5b2c12ffe431a52cfd27414fb2c6e6b839e7eb27 Mon Sep 17 00:00:00 2001 From: XingZai <1416818143@qq.com> Date: Sat, 18 Jan 2025 22:30:56 +0800 Subject: [PATCH] update v1.0.6 --- README.md | 7 +++++++ novel_spider.py | 19 +++++++++++++------ requirements.txt | 6 +++--- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 3641e14..04f1af1 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,10 @@ ## 使用方法 +v1.0.6 + +已添加最新的打包好的.exe文件直接可以去release里面下,默认是10个线程,想要改多个线程需要自己安装python以及安装对应的库。 + 首先需要安装python,随便装个版本就行,3.10啊,3.11啊都是可以的 点击这个超链接就可以前往python的官网 [python官网](https://www.python.org/) @@ -112,6 +116,9 @@ python3 novel_spider.py ## 版本更新 +* v1.0.6 + 1. 更新了最新的url + 2. 新增.exe打包文件到release * v1.0.5 1. 新增封面的爬取,现在都有封面了 2. 修复了进度条bug diff --git a/novel_spider.py b/novel_spider.py index 38e3481..e58b8f6 100644 --- a/novel_spider.py +++ b/novel_spider.py @@ -10,6 +10,13 @@ # 多线程个数(速度倍数,默认为十倍) thread_count_global = 10 +# 填一个头 +header = { + "Accept": "*/*", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 " + "Safari/537.36" +} + # v1.0.4 加入多线程 class SpiderThread(threading.Thread): @@ -72,7 +79,7 @@ def get_novel(bk_id, write_type=1): book.set_title(novel_name) book.set_language('zh') book.add_author(author_name) - book.set_cover(file_name="cover.jpg",content=cover) + book.set_cover(file_name="cover.jpg", content=cover) spine = store_content(novel_name, url, length, write_type, book=book, thread_count=thread_count_global) book.add_item(epub.EpubNcx()) book.add_item(epub.EpubNav()) @@ -85,13 +92,13 @@ def get_novel(bk_id, write_type=1): # v1.0.4 获得书名 def get_book_name(bk_id, print_c=True): - url = "https://www.bqg70.com/book/" + str(bk_id) - text = requests.get(url=url).text + url = "https://www.biqu70.cc/book/" + str(bk_id) + text = requests.get(url=url, headers=header).text length = len(re.findall("
", text)) novel_name = re.findall(">.*", text)[0][1:-5] author_name = re.findall("作者[::]\\w*", text)[0][3:] cover_url = re.findall("src=\"\\S+", re.findall("", text)[0])[0][5:-1] - content = requests.get(url=cover_url).content + content = requests.get(url=cover_url, headers=header).content # with open("1.jpg", 'wb') as f: # f.write(content) @@ -104,7 +111,7 @@ def get_book_name(bk_id, print_c=True): def get_result_and_title(url): while True: try: - text = requests.get(url=url).text + text = requests.get(url=url, headers=header).text pattern = ">.*
" pattern2 = ">.*" content = re.findall(pattern, text)[0] @@ -115,7 +122,7 @@ def get_result_and_title(url): title = title[1:-5] return [result, title, epub_result] except: - # traceback.print_exc() + traceback.print_exc() continue diff --git a/requirements.txt b/requirements.txt index dfa5ca2..cb549ee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -EbookLib==0.18 -requests==2.31.0 -tqdm==4.65.0 \ No newline at end of file +EbookLib>=0.18 +requests>=2.31.0 +tqdm>=4.65.0 \ No newline at end of file