diff --git a/PicImageSearch/baidu.py b/PicImageSearch/baidu.py index 71fdcfc0..33614f1a 100644 --- a/PicImageSearch/baidu.py +++ b/PicImageSearch/baidu.py @@ -1,3 +1,4 @@ +import re from json import loads as json_loads from pathlib import Path from typing import Any, Dict, Optional, Union @@ -25,8 +26,13 @@ async def search( ) else: raise ValueError("url or file is required") - resp_text, resp_url, _ = await self.post( + resp_text, _, _ = await self.post( "https://graph.baidu.com/upload", params=params, data=data ) - resp_text, resp_url, _ = await self.get((json_loads(resp_text))["data"]["url"]) - return BaiDuResponse(resp_text, resp_url) + next_url = (json_loads(resp_text))["data"]["url"] + resp_text, resp_url, _ = await self.get(next_url) + next_url = (re.search(r'"firstUrl":"([^"]+)"', resp_text)[1]).replace(r"\/", "/") # type: ignore + resp_text, _, _ = await self.get(next_url) + next_url = (json_loads(resp_text))["data"]["ajaxTextUrl"] + resp_text, _, _ = await self.get(next_url) + return BaiDuResponse(json_loads(resp_text), resp_url) diff --git a/PicImageSearch/model/baidu.py b/PicImageSearch/model/baidu.py index 979157f6..ed997ca0 100644 --- a/PicImageSearch/model/baidu.py +++ b/PicImageSearch/model/baidu.py @@ -1,45 +1,19 @@ -import json -import re -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List class BaiDuItem: def __init__(self, data: Dict[str, Any]): self.origin: Dict[str, Any] = data # 原始数据 - self.page_title: str = data["fromPageTitle"] # 页面标题 - self.title: str = data["title"][0] # 标题 - self.abstract: str = data["abstract"] # 说明文字 - self.image_src: str = data["image_src"] # 图片地址 - self.url: str = data["url"] # 图片所在网页地址 - self.img_list: List[str] = data.get("imgList", []) # 其他图片地址列表 + self.similarity: float = float(f"{float(data['simi']) * 100:.2f}") + self.title: str = data["fromPageTitle"] # 页面标题 + self.thumbnail: str = data["thumbUrl"] # 图片地址 + self.url: str = data["fromUrl"] # 图片所在网页地址 class BaiDuResponse: - def __init__(self, resp_text: str, resp_url: str): + def __init__(self, resp_json: Dict[str, Any], resp_url: str): self.url: str = resp_url # 搜索结果地址 self.similar: List[Dict[str, Any]] = [] # 相似结果返回值 - self.raw: List[BaiDuItem] = [] # 来源结果返回值 - # 原始数据 - self.origin: List[Dict[str, Any]] = json.loads( - re.search(r"cardData = (.+);window\.commonData", resp_text)[1] # type: ignore - ) - self.same: Optional[Dict[str, Any]] = {} - for i in self.origin: - setattr(self, i["cardName"], i) - if self.same: - self.raw = [BaiDuItem(x) for x in self.same["tplData"]["list"]] - info = self.same["extData"]["showInfo"] - del info["other_info"] - for y in info: - for z in info[y]: - try: - self.similar[info[y].index(z)][y] = z - except IndexError: - self.similar.append({y: z}) - # 获取所有卡片名 - self.item: List[str] = [ - attr - for attr in dir(self) - if not callable(getattr(self, attr)) - and not attr.startswith(("__", "origin", "raw", "same", "url")) - ] + self.origin: Dict[str, Any] = resp_json # 原始数据 + # 来源结果返回值 + self.raw: List[BaiDuItem] = [BaiDuItem(i) for i in resp_json["data"]["list"]] diff --git a/demo/demo_baidu.py b/demo/demo_baidu.py index d7aaa216..fbdaa4c5 100644 --- a/demo/demo_baidu.py +++ b/demo/demo_baidu.py @@ -31,17 +31,12 @@ def test_sync() -> None: def show_result(resp: BaiDuResponse) -> None: # logger.info(resp.origin) # 原始数据 - logger.info(resp.item) - if resp.same: # 存在来源结果 - # logger.info(resp.raw[0].origin) - logger.info(resp.raw[0].page_title) - logger.info(resp.raw[0].title) - logger.info(resp.raw[0].abstract) - logger.info(resp.raw[0].url) - logger.info(resp.raw[0].image_src) - logger.info(resp.raw[0].img_list) - else: - logger.info(resp.similar) + logger.info(resp.url) + # logger.info(resp.raw[0].origin) + logger.info(resp.raw[0].similarity) + logger.info(resp.raw[0].title) + logger.info(resp.raw[0].url) + logger.info(resp.raw[0].thumbnail) logger.info("-" * 50)