-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbs_to.py
74 lines (57 loc) · 2.15 KB
/
bs_to.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# TYPES:
# episode {"id": String, "title": String, "hosts": [(name: String, url: String)]}
# video {"size": String, "format": String, "url": String}
def get_series_title(html):
soup = BeautifulSoup(html, "html.parser")
serie = soup.find("section", {"class": "serie"})
h2 = serie.h2
title = list(h2.children)[0].strip()
return title
def get_episodes(html):
soup = BeautifulSoup(html, "html.parser")
episodes = soup.find("table", {"class": "episodes"}).find_all("tr")
def episode(tr):
id = tr.find("a").text.strip()
title = tr.find("strong").text.strip()
hosts = list(tr.children)[5]
hosts = hosts.find_all("a")
hosts = list(map(
lambda host: (host["title"], host["href"]), hosts))
return {
"id": id,
"title": title,
"hosts": hosts
}
return list(map(episode, episodes))
def get_host_url(html):
soup = BeautifulSoup(html, "html.parser")
player = soup.find("div", {"class": "hoster-player"})
a = player.find("a")
return a["href"]
############################################################################
def driver(driver, url):
driver.get(url)
# Click Play
# once for ad-tab, once for play
for i in range(2):
driver.switch_to_window(driver.window_handles[-1])
player = driver.find_element_by_class_name("hoster-player")
player.click()
# Solve CAPTCHA, if present
wait = WebDriverWait(driver, 20)
while True:
try:
print("Please solve the CAPTCHA in the browser if needed.")
print("(You may reload the page)")
wait.until(EC.presence_of_element_located(
(By.CSS_SELECTOR, ".hoster-player > a")))
print("CAPTCHA completed.")
break # break out of loop
except TimeoutException:
pass
return get_host_url(driver.page_source)