Skip to content

Commit

Permalink
Merge pull request #316 from aws-samples/bugfix/nccl_tests
Browse files Browse the repository at this point in the history
Fix aws ofi nccl version expansion
  • Loading branch information
verdimrc authored May 8, 2024
2 parents c1f106a + e9437be commit 8214ab7
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions micro-benchmarks/nccl-tests/nccl-tests.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: MIT-0
FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
FROM nvidia/cuda:12.2.2-devel-ubuntu22.04

ARG GDRCOPY_VERSION=v2.4.1
ARG EFA_INSTALLER_VERSION=1.31.0
Expand Down Expand Up @@ -88,6 +88,8 @@ RUN git clone -b ${NCCL_VERSION} https://github.com/NVIDIA/nccl.git /opt/nccl \
###################################################
## Install AWS-OFI-NCCL plugin
RUN DEBIAN_FRONTEND=noninteractive apt-get install -y libhwloc-dev
#Switch from sh to bash to allow parameter expansion
SHELL ["/bin/bash", "-c"]
RUN curl -OL https://github.com/aws/aws-ofi-nccl/releases/download/${AWS_OFI_NCCL_VERSION}/aws-ofi-nccl-${AWS_OFI_NCCL_VERSION//v}.tar.gz \
&& tar -xf aws-ofi-nccl-${AWS_OFI_NCCL_VERSION//v}.tar.gz \
&& cd aws-ofi-nccl-${AWS_OFI_NCCL_VERSION//v} \
Expand All @@ -102,6 +104,8 @@ RUN curl -OL https://github.com/aws/aws-ofi-nccl/releases/download/${AWS_OFI_NCC
&& rm -rf aws-ofi-nccl-${AWS_OFI_NCCL_VERSION//v} \
&& rm aws-ofi-nccl-${AWS_OFI_NCCL_VERSION//v}.tar.gz

SHELL ["/bin/sh", "-c"]

###################################################
## Install NCCL-tests
RUN git clone -b ${NCCL_TESTS_VERSION} https://github.com/NVIDIA/nccl-tests.git /opt/nccl-tests \
Expand All @@ -120,7 +124,7 @@ ENV OMPI_MCA_pml=^cm,ucx \
OMPI_MCA_btl=tcp,self \
OMPI_MCA_btl_tcp_if_exclude=lo,docker0,veth_def_agent\
OPAL_PREFIX=/opt/amazon/openmpi \
NCCL_SOCKET_IFNAME=^docker,lo
NCCL_SOCKET_IFNAME=^docker,lo,veth_def_agent

## Turn off PMIx Error https://github.com/open-mpi/ompi/issues/7516
ENV PMIX_MCA_gds=hash
Expand Down

0 comments on commit 8214ab7

Please sign in to comment.