Skip to content

Commit

Permalink
add initial implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
alexeldeib committed Aug 3, 2022
1 parent 5235e6a commit 95944c4
Show file tree
Hide file tree
Showing 10 changed files with 311 additions and 8 deletions.
35 changes: 35 additions & 0 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Building and Pushing to MCR
on: [workflow_dispatch]
# on:
# push:
# branches:
# - main
# workflow_dispatch: {}
permissions:
id-token: write
contents: read

jobs:
publish:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: paulhatch/semantic-version@v5.0.0-alpha2
with:
bump_each_commit: true
id: semver
- name: 'Check version'
run: |
echo "version is ${{ steps.semver.output.version }}"
- name: 'Az CLI login'
uses: azure/login@v1
with:
client-id: ${{ secrets.AZURE_CLIENT_ID }}
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
- name: 'Run Azure CLI commands'
run: |
docker build -f Dockerfile -t ${{ secrets.AZURE_REGISTRY_SERVER }}/public/aks/aks-gpu:${{ steps.semver.outputs.version }} .
az acr login -n ${{ secrets.AZURE_REGISTRY_SERVER }}
# docker push ${{ secrets.AZURE_REGISTRY_SERVER }}/public/aks/aks-gpu:${{ steps.semver.outputs.version }}
# echo "acr push done"
18 changes: 18 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
FROM ubuntu:18.04 as gpu
RUN apt update && apt install -y curl xz-utils gnupg2 ca-certificates --no-install-recommends

WORKDIR /opt/gpu
COPY blacklist-nouveau.conf blacklist-nouveau.conf
COPY config.sh config.sh
COPY download.sh download.sh
RUN bash download.sh

FROM ubuntu:18.04

COPY --from=gpu /opt/gpu /opt/gpu
COPY entrypoint.sh /entrypoint.sh
COPY install.sh /opt/actions/install.sh

RUN mkdir -p /mnt

ENTRYPOINT ["/entrypoint.sh"]
32 changes: 24 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,30 @@
# Project
# Driver container image for AKS VHD

> This repo has been populated by an initial template to help get you started. Please
> make sure to update the content to build a great experience for community-building.
This repo provides steps to build a container image with all components required for
Kubernetes Nvidia GPU integration. Run it as a privileged container in the host PID namespace.
It will enter the host mount namespace and install the nvidia drivers, container runtime,
and associated libraries on the host, validating their functionality

As the maintainer of this project, please make a few updates:
## Build
```
docker build -f Dockerfile -t docker.io/alexeldeib/aks-gpu:latest .
docker push docker.io/alexeldeib/aks-gpu:latest
```

## Run
```bash
mkdir -p /opt/{actions,gpu}
ctr image pull docker.io/alexeldeib/aks-gpu:latest
ctr run --privileged --net-host --with-ns pid:/proc/1/ns/pid --mount type=bind,src=/opt/gpu,dst=/mnt/gpu,options=rbind --mount type=bind,src=/opt/actions,dst=/mnt/actions,options=rbind -t docker.io/alexeldeib/aks-gpu:latest /entrypoint.sh install.sh
```

or Docker (untested...)
```bash
docker run --privileged -v /opt/gpu:/mnt/gpu -v /opt/actions:/mnt/actions docker.io/alexeldeib/aks-gpu:latest
```

Note the `--with-ns pid:/proc/1/ns/pid` and `--privileged`, as well as the bind mounts, these are key.

- Improving this README.MD file to provide a great experience
- Updating SUPPORT.MD with content about this project's support experience
- Understanding the security reporting process in SECURITY.MD
- Remove this section from the README

## Contributing

Expand Down
2 changes: 2 additions & 0 deletions blacklist-nouveau.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
blacklist nouveau
options nouveau modeset=0
5 changes: 5 additions & 0 deletions config.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
DRIVER_VERSION="470.57.02"
NVIDIA_CONTAINER_RUNTIME_VERSION="3.6.0"
NVIDIA_CONTAINER_TOOLKIT_VER="1.6.0"
NVIDIA_PACKAGES="libnvidia-container1 libnvidia-container-tools nvidia-container-toolkit"
GPU_DEST="/usr/local/nvidia"
43 changes: 43 additions & 0 deletions download.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/env bash
set -euo pipefail

source /etc/os-release
source /opt/gpu/config.sh

NVIDIA_CONTAINER_RUNTIME_VERSION="3.6.0"
NVIDIA_CONTAINER_TOOLKIT_VER="1.6.0"
NVIDIA_PACKAGES="libnvidia-container1 libnvidia-container-tools nvidia-container-toolkit"
GPU_DEST="/usr/local/nvidia"

workdir="$(mktemp -d)"
pushd "$workdir" || exit

# download nvidia drivers, move to permanent cache
curl -fsSLO https://us.download.nvidia.com/tesla/${DRIVER_VERSION}/NVIDIA-Linux-x86_64-${DRIVER_VERSION}.run
mv NVIDIA-Linux-x86_64-${DRIVER_VERSION}.run /opt/gpu/NVIDIA-Linux-x86_64-${DRIVER_VERSION}.run

# download fabricmanager for nvlink based systems, e.g. multi instance gpu vms.
curl -fsSLO https://developer.download.nvidia.com/compute/cuda/redist/fabricmanager/linux-x86_64/fabricmanager-linux-x86_64-${DRIVER_VERSION}-archive.tar.xz
tar -xvf fabricmanager-linux-x86_64-${DRIVER_VERSION}-archive.tar.xz
mv fabricmanager-linux-x86_64-${DRIVER_VERSION}-archive /opt/gpu/fabricmanager-linux-x86_64-${DRIVER_VERSION}

# configure nvidia apt repo to cache packages
curl -fsSLO https://nvidia.github.io/nvidia-docker/gpgkey
gpg --dearmor -o aptnvidia.gpg gpgkey
mv aptnvidia.gpg /etc/apt/trusted.gpg.d/aptnvidia.gpg
curl -fsSL https://nvidia.github.io/nvidia-docker/ubuntu${VERSION_ID}/nvidia-docker.list -o /etc/apt/sources.list.d/nvidia-docker.list

apt update

# download nvidia debian packages for nvidia-container-runtime compat
for apt_package in $NVIDIA_PACKAGES; do
apt-get download ${apt_package}=${NVIDIA_CONTAINER_TOOLKIT_VER}*
mv ${apt_package}_${NVIDIA_CONTAINER_TOOLKIT_VER}* /opt/gpu
done
apt-get download nvidia-container-runtime=${NVIDIA_CONTAINER_RUNTIME_VERSION}*

# move debs to permanent cache
mv nvidia-container-runtime_${NVIDIA_CONTAINER_RUNTIME_VERSION}* /opt/gpu

popd || exit
rm -r "$workdir"
48 changes: 48 additions & 0 deletions entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/usr/bin/env bash
set -o errexit
set -o pipefail
set -o nounset

set -x

if [[ -z "${1}" ]]; then
echo "Must provide a non-empty action as first argument"
exit 1
fi

if [[ "${1}" == "copy" ]]; then
echo "copying gpu cache files and exiting"
cp -a /opt/gpu/. /mnt/gpu/
echo "Completed successfully!"
exit 0
fi

ACTION_FILE="/opt/actions/${1}"

if [[ ! -f "$ACTION_FILE" ]]; then
echo "Expected to find action file '$ACTION_FILE', but did not exist"
exit 1
fi

echo "Cleaning up stale actions"

rm -rf /mnt/actions/*

echo "Copying fresh actions"

cp -R /opt/actions/. /mnt/actions

echo "Executing nsenter"

cp -a /opt/gpu/. /mnt/gpu/
nsenter -t 1 -m bash "${ACTION_FILE}"
RESULT="${PIPESTATUS[0]}"

if [ $RESULT -eq 0 ]; then
# Success.
rm -rf /mnt/actions/*
echo "Completed successfully!"
else
echo "Failed during nsenter command execution"
exit 1
fi
68 changes: 68 additions & 0 deletions install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/usr/bin/env bash
set -euxo pipefail

source /opt/gpu/config.sh

KERNEL_NAME=$(uname -r)
LOG_FILE_NAME="/var/log/nvidia-installer-$(date +%s).log"

# host needs these tools to build and load kernel module, can remove ca-certificates, was only for testing
apt update && apt install -y kmod gcc make dkms initramfs-tools linux-headers-$(uname -r) ca-certificates --no-install-recommends

# install cached nvidia debian packages for container runtime compatibility
for apt_package in $NVIDIA_PACKAGES; do
dpkg -i /opt/gpu/${apt_package}*
done
dpkg -i /opt/gpu/nvidia-container-runtime*

# blacklist nouveau driver, nvidia driver dependency
cp /opt/gpu/blacklist-nouveau.conf /etc/modprobe.d/blacklist-nouveau.conf
update-initramfs -u

# clean up lingering files from previous install
set +e
umount -l /usr/lib/x86_64-linux-gnu || true
umount -l /tmp/overlay || true
rm -r /tmp/overlay
rm -r /opt/gpu/NVIDIA-Linux-x86_64-${DRIVER_VERSION}
set -e

# set up overlayfs to change install location of nvidia libs from /usr/lib/x86_64-linux-gnu to /usr/local/nvidia
# add an extra layer of indirection via tmpfs because it's not possible to have an overlayfs on an overlayfs (i.e., inside a container)
mkdir /tmp/overlay
mount -t tmpfs tmpfs /tmp/overlay
mkdir /tmp/overlay/{workdir,lib64}
mkdir -p ${GPU_DEST}/lib64
mount -t overlay overlay -o lowerdir=/usr/lib/x86_64-linux-gnu,upperdir=/tmp/overlay/lib64,workdir=/tmp/overlay/workdir /usr/lib/x86_64-linux-gnu

# clean up previously uncompressed driver, if it exists
# causes driver installer to fail if it exists
pushd /opt/gpu
# extract runfile, takes some time, so do ahead of time
sh /opt/gpu/NVIDIA-Linux-x86_64-${DRIVER_VERSION}.run -x
popd

# install nvidia drivers
/opt/gpu/NVIDIA-Linux-x86_64-${DRIVER_VERSION}/nvidia-installer -s -k=$KERNEL_NAME --log-file-name=${LOG_FILE_NAME} -a --no-drm --dkms --utility-prefix="${GPU_DEST}" --opengl-prefix="${GPU_DEST}"

# move nvidia libs to correct location from temporary overlayfs
cp -a /tmp/overlay/lib64 ${GPU_DEST}/lib64

# move nvidia binaries to /usr/bin...because we like that?
cp -rvT ${GPU_DEST}/bin /usr/bin

# configure system to know about nvidia lib paths
echo "${GPU_DEST}/lib64" > /etc/ld.so.conf.d/nvidia.conf
ldconfig

# unmount, cleanup
set +e
umount -l /usr/lib/x86_64-linux-gnu
umount /tmp/overlay
rm -r /tmp/overlay
set -e

# validate that nvidia driver is working
dkms status
nvidia-modprobe -u -c0
nvidia-smi
7 changes: 7 additions & 0 deletions justfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
default: push

push: (containerize)
docker push docker.io/alexeldeib/aks-gpu:latest

containerize:
docker build -f Dockerfile -t docker.io/alexeldeib/aks-gpu:latest .
61 changes: 61 additions & 0 deletions manifests.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: &name nsenter
labels:
app: *name
spec:
selector:
matchLabels:
app: *name
template:
metadata:
labels:
app: *name
spec:
# affinity:
# nodeAffinity:
# requiredDuringSchedulingIgnoredDuringExecution:
# nodeSelectorTerms:
# - matchExpressions:
# - key: node.kubernetes.io/instance-type
# operator: In
# values:
# - Standard_NP10s
# - Standard_NP20s
# - Standard_NP40s
hostNetwork: true
hostPID: true
containers:
- image: docker.io/alexeldeib/aks-gpu:latest # requires an image with bash, curl, sleep, and nsenter (vanilla ubuntu works)
imagePullPolicy: Always
name: *name
command: ["/entrypoint.sh"]
args: ["install.sh"] # if you don't use my image or build one from Dockerfile, set this to "downloadandinstall"
resources:
requests:
{}
limits:
{}
securityContext:
privileged: true
volumeMounts:
- name: actions
mountPath: "/opt/actions"
- name: hostmount
mountPath: "/mnt/actions"
- name: gpu
mountPath: "/mnt/gpu"
volumes:
- name: gpu
hostPath:
path: /opt/gpu
type: DirectoryOrCreate
- name: hostmount
hostPath:
path: /opt/actions
type: DirectoryOrCreate
- name: actions
configMap:
name: nsenter-actions
---

0 comments on commit 95944c4

Please sign in to comment.