-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
39 lines (32 loc) · 1.18 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import urllib.request as ul
from bs4 import BeautifulSoup
import urllib.parse
import re
import os
baseUrl = 'https://uva.onlinejudge.org/'
def downloadFile(fileUrl, direc):
filename = ''.join(fileUrl.split('/')[-1])
req = ul.Request(fileUrl)
resp = ul.urlopen(req)
with open(direc+'/'+filename, 'wb') as out_file:
out_file.write(resp.read())
def scraper(url, direc):
data = ul.urlopen(url).read()
soup = BeautifulSoup(data, 'html.parser')
all_links = soup.select('td > a')
fileTag = soup.find('iframe')
if fileTag!=None:
fileUrl = soup.select('a[href^="external"]')
fileUrl = baseUrl+fileUrl[0].get('href')
print(direc)
downloadFile(fileUrl,direc)
return
for i in all_links:
if i.get('class') == None and 'udebug' not in i.get('href'):
print(i.string)
if not os.path.exists(direc+'/'+i.string):
os.makedirs(direc+'/'+i.string)
scraper(baseUrl+re.sub("&","&",i.get('href')),direc+'/'+i.string)
if __name__ == "__main__":
the_url = 'https://uva.onlinejudge.org/index.php?option=com_onlinejudge&Itemid=8&category=604'
scraper(the_url, '.')