Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "dockerfile improvements" #54

Merged
merged 1 commit into from
Jun 13, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 55 additions & 18 deletions Dockerfile.ubi
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,15 @@
ARG BASE_UBI_IMAGE_TAG=9.4
ARG PYTHON_VERSION=3.11

ARG TORCH_CUDA_ARCH_LIST="7.5 8.0 8.6 8.9 9.0+PTX"
ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"


## Base Layer ##################################################################
FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as base
ARG PYTHON_VERSION

# Some utils for dev/build purposes - tar required for kubectl cp
RUN microdnf install -y \
python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel \
which procps findutils tar git \
&& microdnf clean all

WORKDIR /workspace
Expand Down Expand Up @@ -40,21 +39,61 @@ RUN microdnf install -y \
## CUDA Base ###################################################################
FROM python-install as cuda-base

ENV CUDA_VERSION=12.5.0
# The Nvidia operator won't allow deploying on CUDA 12.0 hosts if
# this env var is set to 12.2.0, even though it's compatible
#ENV CUDA_VERSION=12.2.0 \
ENV CUDA_VERSION=12.0.0 \
NV_CUDA_LIB_VERSION=12.2.0-1 \
NVIDIA_VISIBLE_DEVICES=all \
NVIDIA_DRIVER_CAPABILITIES=compute,utility \
NV_CUDA_CUDART_VERSION=12.2.53-1 \
NV_CUDA_COMPAT_VERSION=535.104.12

RUN curl -Lo /etc/yum.repos.d/cuda-rhel9.repo \
https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo

RUN microdnf install -y \
cuda-nvcc-12-2 cuda-nvtx-12-2 cuda-libraries-devel-12-2 && \
microdnf clean all
cuda-cudart-12-2-${NV_CUDA_CUDART_VERSION} \
cuda-compat-12-2-${NV_CUDA_COMPAT_VERSION} \
&& microdnf clean all


ENV CUDA_HOME="/usr/local/cuda" \
ARG CUDA_HOME="/usr/local/cuda"
ENV CUDA_HOME=${CUDA_HOME}\
PATH="${CUDA_HOME}/bin:${PATH}" \
LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/extras/CUPTI/lib64:${LD_LIBRARY_PATH}"


## CUDA Development ############################################################
FROM cuda-base as cuda-devel

ENV NV_CUDA_CUDART_DEV_VERSION=12.2.53-1 \
NV_NVML_DEV_VERSION=12.2.81-1 \
NV_LIBCUBLAS_DEV_VERSION=12.2.1.16-1 \
NV_LIBNPP_DEV_VERSION=12.1.1.14-1 \
NV_LIBNCCL_DEV_PACKAGE_VERSION=2.18.5-1+cuda12.2

RUN microdnf install -y \
cuda-command-line-tools-12-2-${NV_CUDA_LIB_VERSION} \
cuda-libraries-devel-12-2-${NV_CUDA_LIB_VERSION} \
cuda-minimal-build-12-2-${NV_CUDA_LIB_VERSION} \
cuda-cudart-devel-12-2-${NV_CUDA_CUDART_DEV_VERSION} \
cuda-nvml-devel-12-2-${NV_NVML_DEV_VERSION} \
libcublas-devel-12-2-${NV_LIBCUBLAS_DEV_VERSION} \
libnpp-devel-12-2-${NV_LIBNPP_DEV_VERSION} \
libnccl-devel-${NV_LIBNCCL_DEV_PACKAGE_VERSION} \
&& microdnf clean all

ENV LIBRARY_PATH="$CUDA_HOME/lib64/stubs"

# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image
# or future versions of triton.
RUN ldconfig /usr/local/cuda-12.2/compat/

## Python cuda base #################################################################
FROM cuda-base AS python-cuda-base
FROM cuda-devel AS python-cuda-base

ENV VIRTUAL_ENV=/opt/vllm
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
Expand All @@ -64,8 +103,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \
--mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \
pip install \
-r requirements-cuda.txt && \
find /opt/vllm/lib/ -name ".*\.so.*" -exec strip {} \;
-r requirements-cuda.txt

## Development #################################################################
FROM python-cuda-base AS dev
Expand All @@ -88,8 +126,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
pip install -r requirements-build.txt

# install compiler cache to speed up compilation leveraging local or remote caching
# git is required for the cutlass kernels
RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && rpm -ql epel-release && microdnf install -y git ccache && microdnf clean all
RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && rpm -ql epel-release && microdnf install -y ccache && microdnf clean all
# install build dependencies

# copy input files
Expand Down Expand Up @@ -123,12 +160,13 @@ COPY vllm vllm
ENV CCACHE_DIR=/root/.cache/ccache
RUN --mount=type=cache,target=/root/.cache/ccache \
--mount=type=cache,target=/root/.cache/pip \
env CFLAGS="-march=haswell" \
CXXFLAGS="$CFLAGS $CXXFLAGS" \
CMAKE_BUILD_TYPE=Release \
python3 setup.py bdist_wheel --dist-dir=dist
CMAKE_BUILD_TYPE=Release python3 setup.py bdist_wheel --dist-dir=dist

## Release #####################################################################
# Note from the non-UBI Dockerfile:
# We used base cuda image because pytorch installs its own cuda libraries.
# However pynccl depends on cuda libraries so we had to switch to the runtime image
# In the future it would be nice to get a container with pytorch and cuda without duplicating cuda
FROM python-install AS vllm-openai

WORKDIR /workspace
Expand All @@ -143,8 +181,7 @@ RUN microdnf install -y gcc \
# install vllm wheel first, so that torch etc will be installed
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \
--mount=type=cache,target=/root/.cache/pip \
pip install dist/*.whl --verbose && \
find /opt/vllm/lib/ -regex ".*\.so.*" -exec strip {} \;
pip install dist/*.whl --verbose

ENV HF_HUB_OFFLINE=1 \
PORT=8000 \
Expand Down
Loading