-
Notifications
You must be signed in to change notification settings - Fork 50
/
Dockerfile
86 lines (70 loc) · 2.84 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# Stage 1: Build stage
FROM python:3.11-slim AS builder
ENV DEBIAN_FRONTEND=noninteractive \
HOME=/home/vectara \
XDG_RUNTIME_DIR=/tmp \
RAY_DEDUP_LOGS="0" \
CUDA_VISIBLE_DEVICES="" \
UV_SYSTEM_PYTHON=1
# Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
libopenblas-dev \
wget \
git \
curl \
python3-dev \
&& rm -rf /var/lib/apt/lists/* /tmp/*
# Install Python packages
WORKDIR ${HOME}
COPY requirements.txt requirements-extra.txt $HOME/
RUN pip install --no-cache-dir uv==0.5.6
RUN uv pip install --no-cache-dir torch==2.4.1 torchvision==0.19.1 --index-url https://download.pytorch.org/whl/cpu \
&& uv pip install --no-cache-dir -r requirements.txt
ARG INSTALL_EXTRA=false
RUN if [ "$INSTALL_EXTRA" = "true" ]; then \
uv pip install --no-cache-dir -r requirements-extra.txt && \
python3 -m spacy download en_core_web_lg; \
fi
# Clean up unnecessary files
RUN find /usr/local -type d \( -name test -o -name tests \) -exec rm -rf '{}' + \
&& find /usr/local -type f \( -name '*.pyc' -o -name '*.pyo' \) -exec rm -rf '{}' + \
&& find /usr/local -type d -name '__pycache__' -exec rm -rf '{}' + \
&& rm -rf /root/.cache/* /tmp/*
# Clean up unnecessary filesin site-packages
RUN find /usr/local/lib/python3.11/site-packages \
-type d \( -name 'tests' -o -name 'test' -o -name 'examples' \) -exec rm -rf '{}' + \
&& find /usr/local/lib/python3.11/site-packages -type d -name '__pycache__' -exec rm -rf '{}' + \
&& find /usr/local/lib/python3.11/site-packages -type f -name '*.pyc' -exec rm -f '{}' + \
&& find /usr/local/lib/python3.11/site-packages -type f -name '*.pyo' -exec rm -f '{}' +
# Stage 2: Final image
FROM python:3.11-slim
ENV DEBIAN_FRONTEND=noninteractive \
HOME=/home/vectara \
XDG_RUNTIME_DIR=/tmp \
RAY_DEDUP_LOGS="0" \
CUDA_VISIBLE_DEVICES=""
# Install runtime dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
# libopenblas-dev \
tesseract-ocr \
# xvfb \
unixodbc poppler-utils libmagic1 libjpeg62-turbo \
libfontconfig fonts-noto-color-emoji unifont fonts-indic xfonts-75dpi \
&& rm -rf /var/lib/apt/lists/*
# Copy Python packages and application code from the builder stage
COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
COPY --from=builder /usr/local/bin /usr/local/bin
# Install Playwright browsers
RUN playwright install --with-deps firefox \
&& rm -f /usr/local/bin/pwdebug \
&& rm -rf /var/lib/apt/lists/* /tmp/* /root/.cache/*
# Set working directory
WORKDIR ${HOME}
# Copy application code
COPY *.py $HOME/
COPY core/*.py $HOME/core/
COPY crawlers/ $HOME/crawlers/
# Set entrypoint and command
ENTRYPOINT ["/bin/bash", "-l", "-c"]
CMD ["python3 ingest.py $CONFIG $PROFILE"]