From 5d5f1eb80355006edb4a4c28c6ba676e2e53abc6 Mon Sep 17 00:00:00 2001 From: Daniel Dale Date: Thu, 7 Nov 2024 13:42:43 -0800 Subject: [PATCH] bump stable pytorch to 2.5.1, update docker image accordingly --- .azure-pipelines/gpu-tests.yml | 2 +- .github/ISSUE_TEMPLATE/bug_report.md | 2 +- .github/workflows/release-docker.yml | 2 +- README.md | 2 +- dockers/base-cuda/Dockerfile | 2 +- dockers/docker_images_main.sh | 2 +- dockers/docker_images_release.sh | 2 +- dockers/fts-az-base/Dockerfile | 2 +- dockers/release-conda/Dockerfile | 2 +- dockers/release/Dockerfile | 2 +- requirements/pl_adjust_versions.py | 3 ++- src/fts_examples/patching/dep_patch_shim.py | 2 +- 12 files changed, 13 insertions(+), 12 deletions(-) diff --git a/.azure-pipelines/gpu-tests.yml b/.azure-pipelines/gpu-tests.yml index f68e317..e490b99 100644 --- a/.azure-pipelines/gpu-tests.yml +++ b/.azure-pipelines/gpu-tests.yml @@ -46,7 +46,7 @@ jobs: strategy: matrix: 'PyTorch | latest': - image: "speediedan/finetuning-scheduler:py3.12-pt2.5.0-pl2.5-azpl-init" + image: "speediedan/finetuning-scheduler:py3.12-pt2.5.1-pl2.5-azpl-init" scope: "" # how long to run the job before automatically cancelling timeoutInMinutes: "100" diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 6c16417..626cec1 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -41,7 +41,7 @@ You can also fill out the list below manually. - Fine-Tuning Scheduler Version (e.g., 2.5.0): - Lightning Version (e.g., 2.5.0): -- PyTorch Version (e.g., 2.5.0): +- PyTorch Version (e.g., 2.5.1): - Python version (e.g., 3.12): - OS (e.g., Linux): - CUDA/cuDNN version: diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml index 6f6d08c..cc77c44 100644 --- a/.github/workflows/release-docker.yml +++ b/.github/workflows/release-docker.yml @@ -31,7 +31,7 @@ jobs: matrix: # initially building only the latest supported configuration python_version: ["3.12"] - pytorch_version: ["2.5.0"] + pytorch_version: ["2.5.1"] cust_base: ["cu12.4.0-"] pl_version: ["2.5"] steps: diff --git a/README.md b/README.md index bc0c36f..d9d4599 100644 --- a/README.md +++ b/README.md @@ -145,7 +145,7 @@ To ensure maximum stability, the latest Lightning patch release fully tested wit
Current build statuses for Fine-Tuning Scheduler -| System / (PyTorch/Python ver) | 2.2.2/3.9 | 2.5.0/3.9, 2.5.0/3.12 | +| System / (PyTorch/Python ver) | 2.2.2/3.9 | 2.5.1/3.9, 2.5.1/3.12 | | :---------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | | Linux \[GPUs\*\*\] | - | [![Build Status](https://dev.azure.com//speediedan/finetuning-scheduler/_apis/build/status/Multi-GPU%20&%20Example%20Tests?branchName=main)](https://dev.azure.com/speediedan/finetuning-scheduler/_build/latest?definitionId=1&branchName=main) | | Linux (Ubuntu 22.04) | [![Test](https://github.com/speediedan/finetuning-scheduler/actions/workflows/ci_test-full.yml/badge.svg?branch=main&event=push)](https://github.com/speediedan/finetuning-scheduler/actions/workflows/ci_test-full.yml) | [![Test](https://github.com/speediedan/finetuning-scheduler/actions/workflows/ci_test-full.yml/badge.svg?branch=main&event=push)](https://github.com/speediedan/finetuning-scheduler/actions/workflows/ci_test-full.yml) | diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index e6ceb5c..b07aa81 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -17,7 +17,7 @@ ARG OS_VER=ubuntu22.04 FROM nvidia/cuda:${CUDA_VERSION}-devel-${OS_VER} ARG PYTHON_VERSION=3.12 -ARG PYTORCH_VERSION=2.5.0 +ARG PYTORCH_VERSION=2.5.1 ARG CUST_BUILD=0 ARG MKL_THREADING_LAYER=GNU diff --git a/dockers/docker_images_main.sh b/dockers/docker_images_main.sh index d2f0922..fa2fa15 100755 --- a/dockers/docker_images_main.sh +++ b/dockers/docker_images_main.sh @@ -43,7 +43,7 @@ maybe_build(){ build_eval(){ # latest PyTorch image supported by release - declare -A iv=(["cuda"]="12.4.0" ["python"]="3.12" ["pytorch"]="2.5.0" ["lightning"]="2.5" ["cust_build"]="1") + declare -A iv=(["cuda"]="12.4.0" ["python"]="3.12" ["pytorch"]="2.5.1" ["lightning"]="2.5" ["cust_build"]="1") export latest_pt="base-cu${iv["cuda"]}-py${iv["python"]}-pt${iv["pytorch"]}-pl${iv["lightning"]}" export latest_azpl="py${iv["python"]}-pt${iv["pytorch"]}-pl${iv["lightning"]}-azpl-init" maybe_build iv "${latest_pt}" "${latest_azpl}" diff --git a/dockers/docker_images_release.sh b/dockers/docker_images_release.sh index 9d1afc4..4cf8353 100755 --- a/dockers/docker_images_release.sh +++ b/dockers/docker_images_release.sh @@ -44,7 +44,7 @@ maybe_build(){ build_eval(){ # latest PyTorch image supported by release - declare -A iv=(["cuda"]="12.4.0" ["python"]="3.12" ["pytorch"]="2.5.0" ["lightning"]="2.5" ["cust_build"]="0") + declare -A iv=(["cuda"]="12.4.0" ["python"]="3.12" ["pytorch"]="2.5.1" ["lightning"]="2.5" ["cust_build"]="0") export latest_pt="base-cu${iv["cuda"]}-py${iv["python"]}-pt${iv["pytorch"]}-pl${iv["lightning"]}" export latest_azpl="py${iv["python"]}-pt${iv["pytorch"]}-pl${iv["lightning"]}-azpl-init" maybe_build iv "${latest_pt}" "${latest_azpl}" diff --git a/dockers/fts-az-base/Dockerfile b/dockers/fts-az-base/Dockerfile index 6b0df64..a26cfc9 100644 --- a/dockers/fts-az-base/Dockerfile +++ b/dockers/fts-az-base/Dockerfile @@ -11,7 +11,7 @@ # limitations under the License. ARG PYTHON_VERSION=3.12 -ARG PYTORCH_VERSION=2.5.0 +ARG PYTORCH_VERSION=2.5.1 ARG LIGHTNING_VERSION=2.5 ARG CUST_BASE diff --git a/dockers/release-conda/Dockerfile b/dockers/release-conda/Dockerfile index 9b47720..d49b307 100644 --- a/dockers/release-conda/Dockerfile +++ b/dockers/release-conda/Dockerfile @@ -18,7 +18,7 @@ FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 ARG CUDATOOLKIT_VERSION=12.4 ARG PYTHON_VERSION=3.12 -ARG PYTORCH_VERSION=2.5.0 +ARG PYTORCH_VERSION=2.5.1 ARG CONDA_VERSION=4.13.0 SHELL ["/bin/bash", "-c"] diff --git a/dockers/release/Dockerfile b/dockers/release/Dockerfile index 61f125d..76cb2a1 100644 --- a/dockers/release/Dockerfile +++ b/dockers/release/Dockerfile @@ -11,7 +11,7 @@ # limitations under the License. ARG PYTHON_VERSION=3.12 -ARG PYTORCH_VERSION=2.5.0 +ARG PYTORCH_VERSION=2.5.1 ARG LIGHTNING_VERSION=2.5 ARG CUST_BASE diff --git a/requirements/pl_adjust_versions.py b/requirements/pl_adjust_versions.py index 176ea82..659b347 100644 --- a/requirements/pl_adjust_versions.py +++ b/requirements/pl_adjust_versions.py @@ -6,7 +6,8 @@ # IMPORTANT: this list needs to be sorted in reverse VERSIONS = [ dict(torch="2.6.0", torchvision="0.21.0"), # nightly - dict(torch="2.5.0", torchvision="0.20.0"), # stable + dict(torch="2.5.1", torchvision="0.20.1"), # stable + dict(torch="2.5.0", torchvision="0.20.0"), dict(torch="2.4.0", torchvision="0.19.0"), dict(torch="2.3.1", torchvision="0.18.1"), dict(torch="2.3.0", torchvision="0.18.0"), diff --git a/src/fts_examples/patching/dep_patch_shim.py b/src/fts_examples/patching/dep_patch_shim.py index e617ecd..e2d4f76 100644 --- a/src/fts_examples/patching/dep_patch_shim.py +++ b/src/fts_examples/patching/dep_patch_shim.py @@ -56,7 +56,7 @@ def _patch_triton(): # required for `torch==2.5.x`, TBD wrt subsequent versions einsum_strategies_patch = DependencyPatch( - condition=(lwt_compare_version("torch", operator.le, "2.5.1"), + condition=(lwt_compare_version("torch", operator.le, "2.5.2"), lwt_compare_version("torch", operator.ge, "2.5.0"),), env_flag=OSEnvToggle("ENABLE_FTS_EINSUM_STRATEGY_PATCH", default="0"), function=_patch_einsum_strategies, patched_package='torch',