Skip to content

Commit

Permalink
update v1.0.6
Browse files Browse the repository at this point in the history
  • Loading branch information
XingzaiUnrivaled committed Jan 18, 2025
1 parent 9e3c0e3 commit 5b2c12f
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 9 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@

## 使用方法

v1.0.6

已添加最新的打包好的.exe文件直接可以去release里面下,默认是10个线程,想要改多个线程需要自己安装python以及安装对应的库。

首先需要安装python,随便装个版本就行,3.10啊,3.11啊都是可以的

点击这个超链接就可以前往python的官网 [python官网](https://www.python.org/)
Expand Down Expand Up @@ -112,6 +116,9 @@ python3 novel_spider.py

## 版本更新

* v1.0.6
1. 更新了最新的url
2. 新增.exe打包文件到release
* v1.0.5
1. 新增封面的爬取,现在都有封面了
2. 修复了进度条bug
Expand Down
19 changes: 13 additions & 6 deletions novel_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@
# 多线程个数(速度倍数,默认为十倍)
thread_count_global = 10

# 填一个头
header = {
"Accept": "*/*",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 "
"Safari/537.36"
}


# v1.0.4 加入多线程
class SpiderThread(threading.Thread):
Expand Down Expand Up @@ -72,7 +79,7 @@ def get_novel(bk_id, write_type=1):
book.set_title(novel_name)
book.set_language('zh')
book.add_author(author_name)
book.set_cover(file_name="cover.jpg",content=cover)
book.set_cover(file_name="cover.jpg", content=cover)
spine = store_content(novel_name, url, length, write_type, book=book, thread_count=thread_count_global)
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
Expand All @@ -85,13 +92,13 @@ def get_novel(bk_id, write_type=1):

# v1.0.4 获得书名
def get_book_name(bk_id, print_c=True):
url = "https://www.bqg70.com/book/" + str(bk_id)
text = requests.get(url=url).text
url = "https://www.biqu70.cc/book/" + str(bk_id)
text = requests.get(url=url, headers=header).text
length = len(re.findall("<dd><a href =\"/book/" + str(bk_id) + "/.*</dd>", text))
novel_name = re.findall(">.*</h1>", text)[0][1:-5]
author_name = re.findall("作者[::]\\w*", text)[0][3:]
cover_url = re.findall("src=\"\\S+", re.findall("<img.*>", text)[0])[0][5:-1]
content = requests.get(url=cover_url).content
content = requests.get(url=cover_url, headers=header).content
# with open("1.jpg", 'wb') as f:
# f.write(content)

Expand All @@ -104,7 +111,7 @@ def get_book_name(bk_id, print_c=True):
def get_result_and_title(url):
while True:
try:
text = requests.get(url=url).text
text = requests.get(url=url, headers=header).text
pattern = ">.*<br ?/?>"
pattern2 = ">.*</h1>"
content = re.findall(pattern, text)[0]
Expand All @@ -115,7 +122,7 @@ def get_result_and_title(url):
title = title[1:-5]
return [result, title, epub_result]
except:
# traceback.print_exc()
traceback.print_exc()
continue


Expand Down
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
EbookLib==0.18
requests==2.31.0
tqdm==4.65.0
EbookLib>=0.18
requests>=2.31.0
tqdm>=4.65.0

0 comments on commit 5b2c12f

Please sign in to comment.