-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscrape.py
32 lines (24 loc) · 943 Bytes
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import requests
import re
def scrape_signing_savvy(word):
base_url = 'https://www.signingsavvy.com'
search_url = f'{base_url}/search/{word}'
response = requests.get(search_url)
print(f'Requesting URL: {search_url}')
if response.status_code == 200:
html_content = response.text
# Regular expression pattern to extract URLs ending with .mp4
pattern = r'href="([^"]+\.mp4)"'
# Using re.search to find the first match
match = re.search(pattern, html_content)
if match:
# Extracting the URL from the match
mp4_link = match.group(1)
print(f'Found .mp4 link: {mp4_link}')
return mp4_link
else:
print('No .mp4 link found')
return "No .mp4 link found"
else:
print(f'Failed to retrieve the webpage. Status code: {response.status_code}')
return "Failed to retrieve the webpage"