-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
135 lines (107 loc) · 5.03 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# -*- coding:utf-8 -*-
import os
import sys
import time
import pandas
import json
import datetime
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager
from chardet import detect
BASE_URL = "https://deps.dev/"
CONFIG_FILE = "config.json"
def main():
json_data = path_get()
file_name = f"vulnerability_check_{datetime.date.today()}.xlsx"
if json_data["output_folder_path"] == "":
output_folder_path = os.path.expanduser('~/Downloads')
else:
output_folder_path = json_data["output_folder_path"]
export_full_path = f"{output_folder_path}/{file_name}"
data_dict = txt_read()
pd_data = scraping(data_dict)
pd_data = pd_data.rename(columns={"library_name": "ライブラリ名",
"search_value": "検索値",
"library_ver": "バージョン",
"security_advisories": "セキュリティアドバイス",
"search_url": "URL"})
pd_data.to_excel(export_full_path, index=False, header=True)
def path_get():
json_open = open(CONFIG_FILE, 'r')
json_load = json.load(json_open)
json_open.close()
return json_load
# スクレイピング
def scraping(data_dict):
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
for i in range(len(data_dict)):
library_name = data_dict[i]['library_name']
search_value = str(library_name).lower()
library_ver = data_dict[i]['library_ver']
url = f"{BASE_URL}pypi/{search_value}/{library_ver}"
driver.get(url)
no_advisories_xpath = "/html/body/div/div[1]/div/div/div/div/div[1]/div[1]/div[1]/span"
warning_xpath = "/html/body/div/div[1]/div/div/div/div/div[1]/div[1]/div[1]/div[2]/div/div/div[1]/a"
time.sleep(3)
try:
security_advisories = driver.find_element(by=By.XPATH, value=no_advisories_xpath).text
except NoSuchElementException: # Xpathが見つからない場合
try:
security_advisories = driver.find_element(by=By.XPATH, value=warning_xpath).text # 警告が出てないか確認
except NoSuchElementException: # Xpathが見つからない場合→おそらくバージョンの有効数字が足らない
library_ver = library_ver + ".0" # 有効数字付加
significant_figures_url = f"{BASE_URL}pypi/{search_value}/{library_ver}"
time.sleep(3)
try: # 有効数字付加して再度実行
driver.get(significant_figures_url)
time.sleep(3)
security_advisories = driver.find_element(by=By.XPATH, value=no_advisories_xpath).text
# Xpathが見つからない場合→原因不明
except NoSuchElementException: # Version Not Found
data_dict[i]["search_url"] = significant_figures_url
data_dict[i]["search_value"] = search_value
data_dict[i]["library_ver"] = library_ver
data_dict[i]["security_advisories"] = "error"
else:
data_dict[i]["search_url"] = significant_figures_url
data_dict[i]["search_value"] = search_value
data_dict[i]["library_ver"] = library_ver
data_dict[i]["security_advisories"] = security_advisories
else:
data_dict[i]["search_url"] = url
data_dict[i]["search_value"] = search_value
data_dict[i]["library_ver"] = library_ver
data_dict[i]["security_advisories"] = security_advisories
else:
data_dict[i]["search_url"] = url
data_dict[i]["search_value"] = search_value
data_dict[i]["library_ver"] = library_ver
data_dict[i]["security_advisories"] = security_advisories
time.sleep(3)
driver.close()
data = pandas.DataFrame.from_dict(data_dict).T
data = data.reindex(columns=["library_name", "search_value", "library_ver", "security_advisories", "search_url"])
return data
# txtファイル読み込み
def txt_read():
count = 0
data_dict = {}
with open(requirements_path, 'rb') as bf: # バイナリーで読み込み
binary_data = bf.read()
encode_data = detect(binary_data)
with open(requirements_path, mode='r', encoding=encode_data['encoding']) as f:
for line in f.readlines():
data_dict[count] = {}
data_dict[count]["library_name"] = line.split('==')[0]
data_dict[count]["library_ver"] = line.split('==')[1].replace("\n", "")
count += 1
return data_dict
if __name__ == '__main__':
try:
requirements_path = sys.argv[1]
except IndexError:
requirements_path = "./requirements.txt"
main()