-
Notifications
You must be signed in to change notification settings - Fork 0
/
product_img_downloader.py
38 lines (31 loc) · 1.35 KB
/
product_img_downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import pandas as pd
from utils import *
from seleniumbase import BaseCase
BaseCase.main(__name__, __file__)
class ScrapeAmazon(BaseCase):
def test_scrape(self):
filename = r'data/python_book_links.csv'
# Read the CSV file and get links from the 'link' column
links_df = pd.read_csv(filename)
links = links_df['link'].tolist()
for link in links:
self.open(link)
self.wait_for_element("h1")
soup = self.get_beautiful_soup()
# Extract the product title
title = soup.find(id="title")
title = title.get_text().strip()
title = format_title(title)
# Extract the product image link
product_img = soup.find(id="landingImage")
if product_img:
product_img_url = product_img.get("data")
# Split the URL at the last dot before the file extension
base_url = product_img_url.split('._')[0] # Split on '._' to remove the transformation part
final_url = base_url + ".jpg" # Reconstruct the URL without the transformation part
# Saving the image to a certain directory
path = "img"
mkdir(path)
save_img(path, title, final_url)
else:
print("Product image not found.")