diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index bcb68007..970ce32d 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -115,98 +115,98 @@ jobs: cache-from: type=gha,src=docker/env-cuda-12-1 cache-to: type=gha,mode=max - # build_runner: - # name: Docker Env Image (github-worker-12-1) + build_runner: + name: Docker Env Image (github-worker-12-1) - # needs: build_env - # runs-on: ubuntu-latest - # permissions: - # contents: read - # packages: write - # # This is used to complete the identity challenge - # # with sigstore/fulcio when running outside of PRs. - # id-token: write - - # steps: - # # Get and log the free space - # - name: Get system free space (Before reclaim) - # run: | - # echo "Free space:" - # df -h + needs: build_env + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + # This is used to complete the identity challenge + # with sigstore/fulcio when running outside of PRs. + id-token: write + + steps: + # Get and log the free space + - name: Get system free space (Before reclaim) + run: | + echo "Free space:" + df -h - # # Due to the docker image being > available space on the runner - # # we need to do some optimization, to create more space. - # # https://github.com/marketplace/actions/disk-space-reclaimer - # # https://stackoverflow.com/questions/76294509/github-actions-docker-service-container-25gb-cannot-be-loaded - # - name: Maximize build space - # uses: insightsengineering/disk-space-reclaimer@v1 - # with: - # # this might remove tools that are actually needed, - # # if set to "true" but frees about 6 GB - # tools-cache: true - - # # all of these default to true, but feel free to set to - # # "false" if necessary for your workflow - # android: true - # dotnet: true - # haskell: true - # large-packages: true - # swap-storage: true - # docker-images: true + # Due to the docker image being > available space on the runner + # we need to do some optimization, to create more space. + # https://github.com/marketplace/actions/disk-space-reclaimer + # https://stackoverflow.com/questions/76294509/github-actions-docker-service-container-25gb-cannot-be-loaded + - name: Maximize build space + uses: insightsengineering/disk-space-reclaimer@v1 + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tools-cache: true + + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: true + docker-images: true - # # Get and log the free space - # - name: Get system free space (After reclaim) - # run: | - # echo "Free space:" - # df -h - - # - name: Checkout repository - # uses: actions/checkout@v3 - - # # Install the cosign tool except on PR - # # https://github.com/sigstore/cosign-installer - # - name: Install cosign - # if: github.event_name != 'pull_request' - # uses: sigstore/cosign-installer@v3.3.0 - # with: - # cosign-release: 'v3.3.0' - - # # Workaround: https://github.com/docker/build-push-action/issues/461 - # - name: Setup Docker buildx - # uses: docker/setup-buildx-action@v2 - - # # Login against a Docker registry except on PR - # # https://github.com/docker/login-action - # - name: Log into registry ${{ env.REGISTRY }} - # if: github.event_name != 'pull_request' - # uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c - # with: - # registry: ${{ env.REGISTRY }} - # username: ${{ github.actor }} - # password: ${{ secrets.GITHUB_TOKEN }} - - # # Extract metadata (tags, labels) for Docker - # # https://github.com/docker/metadata-action - # - name: Extract Docker metadata - # id: meta - # uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 - # with: - # images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + # Get and log the free space + - name: Get system free space (After reclaim) + run: | + echo "Free space:" + df -h + + - name: Checkout repository + uses: actions/checkout@v3 + + # Install the cosign tool except on PR + # https://github.com/sigstore/cosign-installer + - name: Install cosign + if: github.event_name != 'pull_request' + uses: sigstore/cosign-installer@v3.3.0 + with: + cosign-release: 'v3.3.0' + + # Workaround: https://github.com/docker/build-push-action/issues/461 + - name: Setup Docker buildx + uses: docker/setup-buildx-action@v2 + + # Login against a Docker registry except on PR + # https://github.com/docker/login-action + - name: Log into registry ${{ env.REGISTRY }} + if: github.event_name != 'pull_request' + uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + # Extract metadata (tags, labels) for Docker + # https://github.com/docker/metadata-action + - name: Extract Docker metadata + id: meta + uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - # - name: downcase IMAGE_NAME - # run: | - # echo "IMAGE_NAME_LC=${IMAGE_NAME,,}" >>${GITHUB_ENV} + - name: downcase IMAGE_NAME + run: | + echo "IMAGE_NAME_LC=${IMAGE_NAME,,}" >>${GITHUB_ENV} - # # Build and push Docker image with Buildx (don't push on PR) - # # https://github.com/docker/build-push-action - # - name: Build and push Docker image (github-worker-cuda-12-1) - # id: build-and-push - # uses: docker/build-push-action@v4 - # with: - # context: "{{defaultContext}}:docker/github-worker-cuda-12-1" - # push: ${{ github.event_name != 'pull_request' }} # Don't push on PR - # tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME_LC }}:github-worker-cuda-12-1 - # # tags: ${{ steps.meta.outputs.tags }} - # labels: ${{ steps.meta.outputs.labels }} - # cache-from: type=gha,src=docker/github-worker-cuda-12-1 - # cache-to: type=gha,mode=max + # Build and push Docker image with Buildx (don't push on PR) + # https://github.com/docker/build-push-action + - name: Build and push Docker image (github-worker-cuda-12-1) + id: build-and-push + uses: docker/build-push-action@v4 + with: + context: "{{defaultContext}}:docker/github-worker-cuda-12-1" + push: ${{ github.event_name != 'pull_request' }} # Don't push on PR + tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME_LC }}:github-worker-cuda-12-1 + # tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha,src=docker/github-worker-cuda-12-1 + cache-to: type=gha,mode=max diff --git a/docker/github-worker-cuda-12-1/Dockerfile b/docker/github-worker-cuda-12-1/Dockerfile new file mode 100644 index 00000000..9204c588 --- /dev/null +++ b/docker/github-worker-cuda-12-1/Dockerfile @@ -0,0 +1,41 @@ +# Temporary, until the rwkv package is public +FROM ghcr.io/rwkv/rwkv-lm-lora:env-cuda-12-1 +# FROM ghcr.io/rwkv/rwkv-infctx-trainer:env-cuda-12-1 + +# Install the github runner +RUN cd / && mkdir actions-runner && cd actions-runner && \ + curl -o actions-runner-linux-x64-2.312.0.tar.gz -L \ + https://github.com/actions/runner/releases/download/v2.312.0/actions-runner-linux-x64-2.312.0.tar.gz && \ + tar xzf ./actions-runner-linux-x64-2.312.0.tar.gz && \ + rm ./actions-runner-linux-x64-2.312.0.tar.gz + +# Clone the runner, for lane2 track +RUN cd / && cp -r /actions-runner /actions-runner-lane2 + +# Install dependencies +RUN cd /actions-runner && ./bin/installdependencies.sh && \ + cd /actions-runner-lane2 && ./bin/installdependencies.sh + +# Copy the entrypoint script, and set it up +COPY entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh +ENTRYPOINT ["/entrypoint.sh"] + +# Configure default env variables +ENV RUNNER_LABELS="" +ENV RUNNER_NAME="" +ENV RUNNER_TOKEN="" +ENV RUNNER_REPO_URL="https://github.com/RWKV" + +# Runner with lane2 track +# --- +# this helps setup dual runs on the same machine +# to help ensure better utilization of GPUs. +# +# In general DS2/3_offload should be used. +# +# Tags should be adjusted to be half their original spec +# to account for the fact that we are running two runners +# +# This is only useful for high GPU, and high ram count machines +ENV RUNNER_LANE2="false" \ No newline at end of file diff --git a/docker/github-worker-cuda-12-1/entrypoint.sh b/docker/github-worker-cuda-12-1/entrypoint.sh new file mode 100644 index 00000000..b0cf6cc8 --- /dev/null +++ b/docker/github-worker-cuda-12-1/entrypoint.sh @@ -0,0 +1,73 @@ +#!/bin/bash + +export RUNNER_ALLOW_RUNASROOT="1" +cd /actions-runner + +# CUDA version for label +CUDA_VER="cuda-12-1" + +# Check the URL, token, and name of the runner from the container ENV vars +# and if they are not set, provide default values +if [[ -z "${RUNNER_NAME}" ]]; then + export RUNNER_NAME=$(hostname) +fi +if [[ -z "${RUNNER_TOKEN}" ]]; then + echo "# [WARNING] RUNNER_TOKEN is missing, skipping github runner setup" +else + echo "# [INFO] lane1 starting up ... " + + # If lane2 runner is enabled, start it + # this is enabled with RUNNER_LANE2=true + if [ "$RUNNER_LANE2" != true ]; then + + # Configure unattended + ./config.sh \ + --unattended \ + --url "${RUNNER_REPO_URL}" \ + --token "${RUNNER_TOKEN}" \ + --name "${RUNNER_NAME}" \ + --replace \ + --labels "nolane,${CUDA_VER},${RUNNER_LABELS}" + + # Run it in background, and get the PID + ./run.sh & + + echo "# [INFO] lane2 runner is disabled" + else + # Configure unattended + ./config.sh \ + --unattended \ + --url "${RUNNER_REPO_URL}" \ + --token "${RUNNER_TOKEN}" \ + --name "${RUNNER_NAME}-lane1" \ + --replace \ + --labels "lane1,${CUDA_VER},${RUNNER_LABELS}" + + # Run it in background, and get the PID + ./run.sh & + + echo "# [INFO] lane2 starting up ... " + + cd /actions-runner-lane2 + ./config.sh \ + --unattended \ + --url "${RUNNER_REPO_URL}" \ + --token "${RUNNER_TOKEN}" \ + --name "${RUNNER_NAME}-lane2" \ + --replace \ + --labels "lane2,${CUDA_VER},${RUNNER_LABELS}" + + # Run it in background, and get the PID + ./run.sh & + fi +fi + +# Follow up on any forwarded command args +if [[ $# -gt 0 ]]; then + cd /root + exec "$@" +fi + +# Wait for everything to exit +# wait $RUNNER_PID +wait \ No newline at end of file