This repository has been archived by the owner on Jan 25, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
♻️ refact(multithread): Create copy of chromedriver for each thread (#…
…115) This fix and issue on docker with "text file busy". I also added a docker test that can be run with the command line: ```bash docker build -t ecoindex-scrap:114 . && docker run -it --rm -v /tmp/ecoindex-cli:/tmp/ecoindex-cli ecoindex-scrap:114 python tests/docker.py ```
- Loading branch information
Showing
4 changed files
with
71 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,47 +1,33 @@ | ||
# Build image | ||
FROM python:3.11-slim as requirements-stage | ||
FROM python:3.11-slim | ||
|
||
ARG CHROME_VERSION_MAIN=111 | ||
ENV CHROME_VERSION_MAIN=${CHROME_VERSION_MAIN} | ||
ENV CHROMEDRIVER_PATH=/usr/bin/chromedriver | ||
|
||
WORKDIR /tmp | ||
COPY ./ ./ | ||
|
||
# Install required deps | ||
RUN apt update && apt install -y unzip wget | ||
RUN pip install poetry | ||
RUN poetry build | ||
|
||
# Build requirements.txt file | ||
COPY ./pyproject.toml ./poetry.lock /tmp/ | ||
RUN poetry export --output=requirements.txt --without-hashes | ||
|
||
# Download chromedriver and chrome | ||
RUN wget "https://chromedriver.storage.googleapis.com/LATEST_RELEASE_${CHROME_VERSION_MAIN}" -O /tmp/chrome_version | ||
RUN wget "https://chromedriver.storage.googleapis.com/$(cat /tmp/chrome_version)/chromedriver_linux64.zip" \ | ||
&& unzip -o chromedriver_linux64.zip | ||
&& unzip -o chromedriver_linux64.zip && rm chromedriver_linux64.zip \ | ||
&& mv chromedriver ${CHROMEDRIVER_PATH} \ | ||
&& chmod +x ${CHROMEDRIVER_PATH} | ||
RUN wget "https://dl.google.com/linux/chrome/deb/pool/main/g/google-chrome-stable/google-chrome-stable_$(cat /tmp/chrome_version)-1_amd64.deb" \ | ||
-O google-chrome-stable.deb | ||
|
||
|
||
# Main image | ||
FROM python:3.11-slim | ||
|
||
ARG CHROME_VERSION_MAIN=111 | ||
ENV CHROME_VERSION_MAIN=${CHROME_VERSION_MAIN} | ||
ENV CHROMEDRIVER_PATH=/usr/bin/chromedriver | ||
RUN apt update && \ | ||
apt -y install libpq-dev gcc /tmp/google-chrome-stable.deb && \ | ||
rm -rf /var/lib/apt/lists/* && \ | ||
rm /tmp/google-chrome-stable.deb | ||
|
||
WORKDIR /code | ||
ENV PYTHONPATH "/code" | ||
|
||
# Copy requirements.txt, chromedriver, chrome_version, google-chrome-stable.deb from requirements-stage | ||
COPY --from=requirements-stage /tmp/ /tmp/ | ||
COPY --from=requirements-stage /tmp/chromedriver /usr/bin/chromedriver | ||
COPY --from=requirements-stage /tmp/chrome_version /tmp/chrome_version | ||
COPY --from=requirements-stage /tmp/google-chrome-stable.deb /tmp/ | ||
COPY poetry.lock pyproject.toml ./ | ||
RUN pip install poetry && \ | ||
poetry export --output requirements.txt --without-hashes && \ | ||
pip install -r requirements.txt | ||
|
||
# Install google chrome and make chromedriver executable | ||
RUN apt update && apt -y install libpq-dev gcc /tmp/google-chrome-stable.deb | ||
RUN chmod +x /usr/bin/chromedriver | ||
|
||
COPY ./ /code/ | ||
|
||
RUN pip install -r /tmp/requirements.txt | ||
COPY ./ ./ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import asyncio | ||
from concurrent.futures import ThreadPoolExecutor, as_completed | ||
|
||
from ecoindex_scraper.scrap import EcoindexScraper | ||
|
||
|
||
def run_page_analysis(url): | ||
try: | ||
ecoindex = asyncio.run( | ||
EcoindexScraper( | ||
url=url, | ||
driver_executable_path="/usr/bin/chromedriver", | ||
chrome_version_main=114, | ||
) | ||
.init_chromedriver() | ||
.get_page_analysis() | ||
) | ||
|
||
return ecoindex | ||
|
||
except Exception as e: | ||
print(e) | ||
|
||
|
||
with ThreadPoolExecutor(max_workers=8) as executor: | ||
future_to_analysis = {} | ||
|
||
url = "https://www.ecoindex.fr" | ||
|
||
for i in range(10): | ||
future_to_analysis[ | ||
executor.submit( | ||
run_page_analysis, | ||
url, | ||
) | ||
] = url | ||
|
||
for future in as_completed(future_to_analysis): | ||
try: | ||
print(future.result()) | ||
except Exception as e: | ||
print(e) |