-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathget_movies_simple.py
39 lines (31 loc) · 1.02 KB
/
get_movies_simple.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# -*- encoding: utf-8 -*-
#########################################################
#
# Alejandro German
#
# https://github.com/seralexger/filmaffinity-scraper
#
#########################################################
import json
import time
import glob
import re
from tqdm import tqdm
from scraper.filmaffinity import Filmaffinity
from proxy_manager.proxyManager import ProxyManager
PROXY_MANAGER = ProxyManager()
proxies_list = PROXY_MANAGER.generate_proxies()
indice_arr = json.loads(open('data/indice.json').read())
movies_safe = []
for path in glob.glob("data/movies/*"):
movies_safe.append(int(re.sub('[^0-9]','', path)))
scraper = Filmaffinity()
for item in tqdm(indice_arr):
if item["movId"] not in movies_safe:
try:
movie_info = scraper.scrap_movie_web(item["url"], "en", {"https": proxies_list[random.randint(0,len(proxies_list)-1)]})
if movie_info != None:
with open("data/movies/"+ str(item["movId"]) + '.json', 'w') as fp:
json.dump(movie_info, fp, indent = 4)
except Exception as e:
print(e)