initial 2.5.0dev version

speediedan · Aug 16, 2024 · e1703d9 · e1703d9
1 parent 2e7be86
commit e1703d9
Showing 21 changed files with 65 additions and 69 deletions.
diff --git a/.azure-pipelines/gpu-tests.yml b/.azure-pipelines/gpu-tests.yml
@@ -45,7 +45,7 @@ jobs:
     strategy:
       matrix:
         'PyTorch | latest':
-          image: "speediedan/finetuning-scheduler:py3.12-pt2.4.0-pl2.4-azpl-init"
+          image: "speediedan/finetuning-scheduler:py3.12-pt2.5.0-pl2.5-azpl-init"
           scope: ""
     # how long to run the job before automatically cancelling
     timeoutInMinutes: "100"

diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -39,9 +39,9 @@ python collect_env_details.py
 You can also fill out the list below manually.
 -->
 
-- Fine-Tuning Scheduler Version (e.g., 2.4.0):
-- Lightning Version (e.g., 2.4.0):
-- PyTorch Version (e.g., 2.4.0):
+- Fine-Tuning Scheduler Version (e.g., 2.5.0):
+- Lightning Version (e.g., 2.5.0):
+- PyTorch Version (e.g., 2.5.0):
 - Python version (e.g., 3.12):
 - OS (e.g., Linux):
 - CUDA/cuDNN version:

diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml
@@ -31,9 +31,9 @@ jobs:
       matrix:
         # initially building only the latest supported configuration
         python_version: ["3.12"]
-        pytorch_version: ["2.4.0"]
+        pytorch_version: ["2.5.0"]
         cust_base: ["cu12.4.0-"]
-        pl_version: ["2.4"]
+        pl_version: ["2.5"]
     steps:
       - name: Checkout
         uses: actions/checkout@v3

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,17 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
+## [2.5.0] - 2024-XX-XX
+
+### Added
+
+- Support for Lightning and PyTorch `2.5.0`
+
+### Deprecated
+
+- removed support for PyTorch `2.1`
+
+
 ## [2.4.0] - 2024-08-15
 
 ### Added

diff --git a/CITATION.cff b/CITATION.cff
@@ -6,7 +6,7 @@ date-released: 2022-02-04
 authors:
   - family-names: "Dale"
     given-names: "Dan"
-version: 2.4.0
+version: 2.5.0
 identifiers:
   - description: "Fine-Tuning Scheduler (all versions)"
     type: doi
@@ -119,6 +119,9 @@ identifiers:
   - description: "Fine-Tuning Scheduler (v2.3.3)"
     type: doi
     value: 10.5281/zenodo.12701573
+  - description: "Fine-Tuning Scheduler (v2.4.0)"
+    type: doi
+    value: 10.5281/zenodo.13327792
 license: "Apache-2.0"
 url: "https://finetuning-scheduler.readthedocs.io/"
 repository-code: "https://github.com/speediedan/finetuning-scheduler"

diff --git a/README.md b/README.md
@@ -145,7 +145,7 @@ To ensure maximum stability, the latest Lightning patch release fully tested wit
 <details>
   <summary>Current build statuses for Fine-Tuning Scheduler </summary>
 
-| System / (PyTorch/Python ver) |                                                                                                        2.1.2/3.9                                                                                                         |                                                                                                              2.4.0/3.9, 2.4.0/3.12                                                                                                               |
+| System / (PyTorch/Python ver) |                                                                                                        2.2.2/3.9                                                                                                         |                                                                                                              2.5.0/3.9, 2.5.0/3.12                                                                                                               |
 | :---------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
 |      Linux \[GPUs\*\*\]       |                                                                                                            -                                                                                                             | [![Build Status](https://dev.azure.com//speediedan/finetuning-scheduler/_apis/build/status/Multi-GPU%20&%20Example%20Tests?branchName=main)](https://dev.azure.com/speediedan/finetuning-scheduler/_build/latest?definitionId=1&branchName=main) |
 |     Linux (Ubuntu 22.04)      | [![Test](https://github.com/speediedan/finetuning-scheduler/actions/workflows/ci_test-full.yml/badge.svg?branch=main&event=push)](https://github.com/speediedan/finetuning-scheduler/actions/workflows/ci_test-full.yml) |             [![Test](https://github.com/speediedan/finetuning-scheduler/actions/workflows/ci_test-full.yml/badge.svg?branch=main&event=push)](https://github.com/speediedan/finetuning-scheduler/actions/workflows/ci_test-full.yml)             |

diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile
@@ -17,7 +17,7 @@ ARG OS_VER=ubuntu22.04
 FROM nvidia/cuda:${CUDA_VERSION}-devel-${OS_VER}
 
 ARG PYTHON_VERSION=3.12
-ARG PYTORCH_VERSION=2.4.0
+ARG PYTORCH_VERSION=2.5.0
 ARG CUST_BUILD=0
 ARG MKL_THREADING_LAYER=GNU
 
@@ -85,11 +85,11 @@ RUN \
     else \
         # or target a specific cuda build, by specifying a particular index url w/...
         # ... default channel
-        pip install torch torchvision --index-url https://download.pytorch.org/whl/cu124; \
+        #pip install torch torchvision --index-url https://download.pytorch.org/whl/cu124; \
         # ... pytorch patch version
         # pip install torch==1.11.1+cu113 torchvision==0.11.3+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html; \
         # ... pytorch nightly dev version
-        #pip install --pre torch==2.4.0.dev20240601  torchvision==0.19.0.dev20240601 -f https://download.pytorch.org/whl/nightly/cu121/torch_nightly.html; \
+        pip install --pre torch==2.5.0.dev20240814  torchvision==0.20.0.dev20240814 --index-url https://download.pytorch.org/whl/nightly/cu124; \
         # ... test channel
         #pip install --pre torch==2.4.0 torchvision --index-url https://download.pytorch.org/whl/test/cu124; \
     fi && \

diff --git a/dockers/docker_images_main.sh b/dockers/docker_images_main.sh
@@ -43,7 +43,7 @@ maybe_build(){
 
 build_eval(){
 	# latest PyTorch image supported by release
-	declare -A iv=(["cuda"]="12.4.0" ["python"]="3.12" ["pytorch"]="2.4.0" ["lightning"]="2.4" ["cust_build"]="1")
+	declare -A iv=(["cuda"]="12.4.0" ["python"]="3.12" ["pytorch"]="2.5.0" ["lightning"]="2.5" ["cust_build"]="1")
 	export latest_pt="base-cu${iv["cuda"]}-py${iv["python"]}-pt${iv["pytorch"]}-pl${iv["lightning"]}"
 	export latest_azpl="py${iv["python"]}-pt${iv["pytorch"]}-pl${iv["lightning"]}-azpl-init"
 	maybe_build iv "${latest_pt}" "${latest_azpl}"

diff --git a/dockers/docker_images_release.sh b/dockers/docker_images_release.sh
@@ -44,7 +44,7 @@ maybe_build(){
 
 build_eval(){
 	# latest PyTorch image supported by release
-	declare -A iv=(["cuda"]="12.4.0" ["python"]="3.12" ["pytorch"]="2.4.0" ["lightning"]="2.4" ["cust_build"]="0")
+	declare -A iv=(["cuda"]="12.4.0" ["python"]="3.12" ["pytorch"]="2.5.0" ["lightning"]="2.5" ["cust_build"]="0")
 	export latest_pt="base-cu${iv["cuda"]}-py${iv["python"]}-pt${iv["pytorch"]}-pl${iv["lightning"]}"
 	export latest_azpl="py${iv["python"]}-pt${iv["pytorch"]}-pl${iv["lightning"]}-azpl-init"
 	maybe_build iv "${latest_pt}" "${latest_azpl}"

diff --git a/dockers/fts-az-base/Dockerfile b/dockers/fts-az-base/Dockerfile
@@ -11,8 +11,8 @@
 # limitations under the License.
 
 ARG PYTHON_VERSION=3.12
-ARG PYTORCH_VERSION=2.4.0
-ARG LIGHTNING_VERSION=2.4
+ARG PYTORCH_VERSION=2.5.0
+ARG LIGHTNING_VERSION=2.5
 ARG CUST_BASE
 
 FROM speediedan/finetuning-scheduler:base-${CUST_BASE}py${PYTHON_VERSION}-pt${PYTORCH_VERSION}-pl${LIGHTNING_VERSION}

diff --git a/dockers/release-conda/Dockerfile b/dockers/release-conda/Dockerfile
@@ -18,7 +18,7 @@ FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
 
 ARG CUDATOOLKIT_VERSION=12.4
 ARG PYTHON_VERSION=3.12
-ARG PYTORCH_VERSION=2.4.0
+ARG PYTORCH_VERSION=2.5.0
 ARG CONDA_VERSION=4.13.0
 
 SHELL ["/bin/bash", "-c"]

diff --git a/dockers/release/Dockerfile b/dockers/release/Dockerfile
@@ -11,8 +11,8 @@
 # limitations under the License.
 
 ARG PYTHON_VERSION=3.12
-ARG PYTORCH_VERSION=2.4.0
-ARG LIGHTNING_VERSION=2.4
+ARG PYTORCH_VERSION=2.5.0
+ARG LIGHTNING_VERSION=2.5
 ARG CUST_BASE
 
 FROM speediedan/finetuning-scheduler:base-${CUST_BASE}py${PYTHON_VERSION}-pt${PYTORCH_VERSION}-pl${LIGHTNING_VERSION}

diff --git a/docs/source/advanced/fsdp_scheduled_fine_tuning.rst b/docs/source/advanced/fsdp_scheduled_fine_tuning.rst
@@ -31,7 +31,7 @@ Demonstration FTS FSDP training/profiling configurations and a DDP baseline for
 
 Most of these FTS FSDP training examples have the same dependencies as the basic
 :ref:`scheduled fine-tuning for SuperGLUE<scheduled-fine-tuning-superglue>` examples. Running the
-:ref:`basic example<basic-fsdp-fine-tuning-example>` requires PyTorch >= ``2.1.0``.
+:ref:`basic example<basic-fsdp-fine-tuning-example>`.
 
 .. note::
 
@@ -50,7 +50,7 @@ The demo schedule configurations are composed with the basic FTS example's share
 
     export TORCH_CPP_LOG_LEVEL=ERROR
 
-    # Profiled demo of basic scheduled fine-tuning with FSDP (requires PyTorch >= 2.1.0)
+    # Profiled demo of basic scheduled fine-tuning with FSDP
     python fts_superglue.py fit --config config/advanced/fsdp/fts_fsdp_basic_profile.yaml
 
     # Profiled demo of FSDP scheduled fine-tuning using the ``awp_overrides`` option:
@@ -68,8 +68,7 @@ The demo schedule configurations are composed with the basic FTS example's share
 Basic Scheduled Fine-Tuning with FSDP
 *************************************
 
-Beginning with PyTorch version ``2.1.0``, the effective constraints FSDP imposed on fine-tuning schedules were substantially relaxed. As you'll see below,
-scheduled fine-tuning with FSDP is pretty straightforward! All one need do:
+As you'll see below, scheduled fine-tuning with FSDP is pretty straightforward! All one need do:
 
 1. Pass ``use_orig_params`` to the FSDP strategy configuration.
 2. Provide a simple ``auto_wrap_policy`` configuration (not technically required but almost always desired).
@@ -112,7 +111,7 @@ We can just define an ``auto_wrap_policy`` for our DeBERTa-v3 module, directing
           module_classes: !!set
             ? transformers.models.deberta_v2.modeling_deberta_v2.DebertaV2Layer
 
-That's it! Note that we set ``use_orig_params`` to ``True`` in line 5 as it allows for more flexible fine-tuning schedules with PyTorch >= ``2.1.0``.
+That's it! Note that we set ``use_orig_params`` to ``True`` in line 5 as it allows for more flexible fine-tuning schedules.
 
 In the next section, we'll cover some of the more advanced configuration options available for customizing scheduled fine-tuning with FSDP.
 
@@ -133,8 +132,6 @@ There are a number of usage contexts that might motivate moving beyond the simpl
      - :attr:`~finetuning_scheduler.strategy_adapters.FSDPStrategyAdapter.awp_overrides`
    * - A desire to use FSDP in the default "use_orig_params=False" mode
      - `See PyTorch documentation for possible issues <https://pytorch.org/docs/master/fsdp.html?highlight=use_orig_params>`_
-   * - if using a version of PyTorch < ``2.1.0``
-     -
 
 As with standard FSDP module wrapping, one can use an ``auto_wrap_policy`` to wrap a model for FSDP scheduled
 fine-tuning. In the current FTS release, there is only one FTS-specific FSDP configuration enhancement to consider:
@@ -262,7 +259,7 @@ Additional FSDP Wrapping and Debugging Guidance
 
 In order to support multi-phase scheduled fine-tuning with FSDP in ``use_orig_params=False`` mode, FTS's key precondition
 is that the defined fine-tuning schedule phases have disjoint sets of FSDP-flattened parameters (a ``FlatParameter`` is created when wrapping a set of
-modules in a FSDP instance/unit). This constraint is derived from the fact that (for PyTorch < ``2.1.0`` or ``use_orig_params=False`` mode) the ``requires_grad`` attribute
+modules in a FSDP instance/unit). This constraint is derived from the fact that (if in ``use_orig_params=False`` mode) the ``requires_grad`` attribute
 must be the same for all parameters flattened into the same ``FlatParameter``. [#]_
 
 FTS will attempt to validate that the module is wrapped in a manner that aligns with the defined fine-tuning
@@ -279,7 +276,7 @@ FTS stops before beginning training and provides extensive context via this erro
 
 .. code-block:: bash
 
-  "Fine-tuning schedule phases do not have disjoint FSDP-flattened parameter sets. Because the `requires_grad` attribute of FSDP-flattened parameters currently must be the same for all flattened parameters (for PyTorch < ``2.1.0`` or if in ``use_orig_params=False`` mode), fine-tuning schedules must avoid thawing parameters in the same FSDP-flattened parameter in different phases. Please ensure parameters associated with each phase are wrapped in separate phase-aligned FSDP instances.
+  "Fine-tuning schedule phases do not have disjoint FSDP-flattened parameter sets. Because the `requires_grad` attribute of FSDP-flattened parameters currently must be the same for all flattened parameters (if in ``use_orig_params=False`` mode), fine-tuning schedules must avoid thawing parameters in the same FSDP-flattened parameter in different phases. Please ensure parameters associated with each phase are wrapped in separate phase-aligned FSDP instances.
 
   In this particular case, there are parameters not included in your fine-tuning schedule that span more than one fine-tuning phase. HINT: parameters associated with unwrapped modules will be included in the top-level (aka 'root') FSDP instance so ensuring all modules associated with fine-tuning scheduled parameters are wrapped separately from the top-level FSDP instance may avoid triggering this exception.
 
@@ -325,17 +322,11 @@ As always, if needed, one can alternatively override ``configure_model`` and man
 
 .. tip::
 
-  If FSDP training with PyTorch >= ``2.1.0`` and ``use_orig_params=True``, ``DEBUG`` level logging will provide
-  parameter shard allocation diagnostic info where relevant.
+  If FSDP training with ``use_orig_params=True``, ``DEBUG`` level logging will provide parameter shard allocation
+  diagnostic info where relevant.
 
 .. tip::
 
   If you want to extend FTS to use a custom, currently unsupported strategy or override current FTS behavior with a
   given training strategy, subclassing :class:`~finetuning_scheduler.strategy_adapters.StrategyAdapter` is a way to do
   so.
-
-Footnotes
-*********
-
-.. [#] As of PyTorch ``2.1.0``, ``FlatParameter`` s constructed in ``use_orig_params`` mode are allowed to contain
-  original params with non-uniform ``requires_grad``.
diff --git a/requirements/base.txt b/requirements/base.txt
@@ -1,4 +1,4 @@
-#lightning>=2.4.0,<2.4.1
+#lightning>=2.5.0,<2.5.1
 # the below is uncommented when master is targeting a specific pl dev master commit
-git+https://github.com/Lightning-AI/lightning.git@2064887b12dd934a5f9a2bf45897f29e3bfc74d1#egg=lightning
-torch>=2.1.0
+git+https://github.com/Lightning-AI/lightning.git@1551a16b94f5234a4a78801098f64d0732ef5cb5#egg=lightning
+torch>=2.2.0
diff --git a/requirements/standalone_base.txt b/requirements/standalone_base.txt
@@ -1,4 +1,4 @@
-#pytorch-lightning>=2.4.0,<2.4.1
+#pytorch-lightning>=2.5.0,<2.5.1
 # the below is uncommented when master is targeting a specific pl dev master commit
-git+https://github.com/Lightning-AI/pytorch-lightning.git@2064887b12dd934a5f9a2bf45897f29e3bfc74d1#egg=pytorch-lightning
-torch>=2.1.0
+git+https://github.com/Lightning-AI/pytorch-lightning.git@1551a16b94f5234a4a78801098f64d0732ef5cb5#egg=pytorch-lightning
+torch>=2.2.0
diff --git a/setup.py b/setup.py
@@ -135,7 +135,7 @@ def _setup_args(standalone: bool = False) -> Dict[str, Any]:
         _INSTALL_PATHS["require"],
         file_name=base_reqs,
         standalone=standalone,
-        pl_commit="2064887b12dd934a5f9a2bf45897f29e3bfc74d1",
+        pl_commit="1551a16b94f5234a4a78801098f64d0732ef5cb5",
     )
     base_setup["install_requires"] = install_requires
     return base_setup

diff --git a/src/finetuning_scheduler/__about__.py b/src/finetuning_scheduler/__about__.py
@@ -1,7 +1,7 @@
 import time
 
 _this_year = time.strftime("%Y")
-__version__ = "2.4.0.dev0"
+__version__ = "2.5.0.dev0"
 __author__ = "Dan Dale"
 __author_email__ = "danny.dale@gmail.com"
 __license__ = "Apache-2.0"

diff --git a/src/finetuning_scheduler/strategy_adapters/fsdp.py b/src/finetuning_scheduler/strategy_adapters/fsdp.py
@@ -75,12 +75,12 @@ class FSDPStrategyAdapter(StrategyAdapter):
     fine-tuning schedule phases have disjoint sets of FSDP-flattened parameters (i.e. ``FlatParameter`` s, which are
     created when wrapping a set of modules in a FSDP instance/unit). This constraint is derived from the fact that the
     ``requires_grad`` attribute currently must be the same for all parameters flattened into the same ``FlatParameter``
-    (for PyTorch < ``2.1.0`` or if in ``use_orig_params=False`` mode).
+    (if in ``use_orig_params=False`` mode).
 
     In order to support multi-phase scheduled fine-tuning with FSDP in ``use_orig_params=False`` mode, FTS's key
     precondition is that the defined fine-tuning schedule phases have disjoint sets of FSDP-flattened parameters (i.e.
     ``FlatParameter`` s, which are created when wrapping a set of modules in a FSDP instance/unit). This constraint is
-    derived from the fact that (for PyTorch < ``2.1.0`` or ``use_orig_params=False`` mode) the ``requires_grad``
+    derived from the fact that (if in ``use_orig_params=False`` mode) the ``requires_grad``
     attribute must be the same for all parameters flattened into the same ``FlatParameter``.
 
     To facilitate module wrapping in alignment with fine-tuning schedule phases, FTS provides the
@@ -114,13 +114,6 @@ class FSDPStrategyAdapter(StrategyAdapter):
        approach to auto-wrapping in alignment with a fine-tuning schedule. As always, if needed, one can override
        ``configure_model`` and manually wrap a given
        :external+pl:class:`~lightning.pytorch.core.module.LightningModule` to align with a desired fine-tuning schedule.
-
-    .. deprecated:: v2.1.0
-
-        :class:`~finetuning_scheduler.strategy_adapters.FSDPStrategyAdapter` now uses the ``configure_model`` hook
-        rather than the deprecated ``configure_sharded_model`` hook to apply the relevant model wrapping. See `this PR
-        <https://github.com/Lightning-AI/lightning/pull/18004>`_ for more context regarding
-        ``configure_sharded_model`` deprecation.
     """
 
     _fsdp_flat_to_unflat_mapping: Dict
@@ -519,8 +512,7 @@ def _validate_fsdp_phases_disjoint(self) -> Tuple:
         feedback_nonerrors: List[str] = []
         if self._allow_mixed_req_grad:
             rank_zero_debug(
-                "Bypassing FSDP-specific phase disjointness validation because `use_orig_params` is "
-                "``True`` and PyTorch is >= `2.1.0`"
+                "Bypassing FSDP-specific phase disjointness validation because `use_orig_params` is ``True``"
             )
             assert self.pl_module._trainer is not None
             # check only required for mixed-precision training with DEBUG level logging requested
@@ -644,7 +636,7 @@ def get_fsdp_owner(lp: str) -> str:
         warn_msg = (
             "\n\nFine-tuning schedule phases do not have disjoint FSDP-flattened parameter sets. Because the"
             " `requires_grad` attribute of FSDP-flattened parameters currently must be the same for all flattened"
-            " parameters (for PyTorch < ``2.1.0`` or if in ``use_orig_params=False`` mode), fine-tuning schedules must"
+            " parameters (if in ``use_orig_params=False`` mode), fine-tuning schedules must"
             " avoid thawing parameters in the same FSDP-flattened parameter in different phases. Please ensure"
             " parameters associated with each phase are wrapped in separate phase-aligned FSDP instances.\n\n"
             f"""{unsched_param_msg if unsched_msg else ''}\n\n"""

diff --git a/src/fts_examples/stable/fts_superglue.py b/src/fts_examples/stable/fts_superglue.py
@@ -135,6 +135,7 @@ def __init__(
         }
         os.environ["TOKENIZERS_PARALLELISM"] = "true" if self.hparams.tokenizers_parallelism else "false"
         self.tokenizer = AutoTokenizer.from_pretrained(self.hparams.model_name_or_path, use_fast=True,
+                                                       clean_up_tokenization_spaces=True,
                                                        local_files_only=False)
 
     def prepare_data(self):

diff --git a/src/fts_examples/stable/ipynb_src/fts_superglue_nb.py b/src/fts_examples/stable/ipynb_src/fts_superglue_nb.py
@@ -262,7 +262,7 @@ def __init__(
         self.save_hyperparameters()
         os.environ["TOKENIZERS_PARALLELISM"] = "true" if self.hparams.tokenizers_parallelism else "false"
         self.tokenizer = AutoTokenizer.from_pretrained(
-            self.hparams.model_name_or_path, use_fast=True, local_files_only=False
+            self.hparams.model_name_or_path, use_fast=True, local_files_only=False, clean_up_tokenization_spaces=True,
         )
 
     def prepare_data(self):

diff --git a/tests/test_fsdp.py b/tests/test_fsdp.py
@@ -18,7 +18,6 @@
 
 import pytest
 import torch
-from lightning.fabric.utilities.imports import _TORCH_GREATER_EQUAL_2_2
 from lightning.pytorch import seed_everything, Trainer
 from lightning.pytorch.plugins.precision.fsdp import FSDPPrecision
 from lightning.pytorch.strategies import FSDPStrategy
@@ -73,6 +72,8 @@
     "when logging on epoch level in distributed",  # validating FTS handling in this scenario
     "torch.cpu.amp.autocast",  # required as of PT 2.4
     "FSDP.state_dict_type", # temporarily required until Lightning uses new FSDP state dict API with PT 2.4
+    "of Tensor.pin_memory",  # required as of PT 2.5 nightly for FSDP1 `_flat_param` internal usage
+    "Tensor.is_pinned",  # required as of PT 2.5 nightly for FSDP1 `_flat_param` internal usage
 ]
 EXPECTED_WARNS.extend(additional_fsdp_warns)
 FSDP_BASE_WARNS = EXPECTED_WARNS
@@ -338,14 +339,11 @@ def configure_model(self) -> None:
                 self.layer[i] = wrap(layer)
         self.layer = wrap(self.layer)
 
-        if _TORCH_GREATER_EQUAL_2_2:
-            # starting with https://github.com/pytorch/pytorch/pull/108033, FSDP no longer moves ignored parameters
-            # (or buffers) to device. We need to manually move them to device in versions > 2.1.x (precise version TBD)
-            for param in self.layer._ignored_params:
-                with torch.no_grad():
-                    param.data = param.to(self.device)
-                    if param.grad is not None:
-                        param.grad.data = param.grad.to(self.device)
+        for param in self.layer._ignored_params:
+            with torch.no_grad():
+                param.data = param.to(self.device)
+                if param.grad is not None:
+                    param.grad.data = param.grad.to(self.device)
 
         # verify activation checkpointing can be manually applied
         check_fn = lambda submodule: isinstance(submodule, tuple([torch.nn.Linear]))  # noqa E731
@@ -611,7 +609,7 @@ def policy(self):
 
 # RunIf aliases
 runif_map = {
-    "min2_2": {"min_torch": "2.2.0"},
+    #"min2_2": {"min_torch": "2.2.0"},
     #"max3_12_min2_2": {"max_python": "3.12", "min_torch": "2.2.0"},
 }
 
@@ -680,7 +678,7 @@ def policy(self):
     ),
     "cust_awp_noprec_dynamo": (
         (nond_loss_adam_model, cust_awp, False, 7, unwrap_7_dyn, None, epoch_t_only, max_epoch_4, None),
-        "min2_2",
+        None,
         (path_default_orig_eo_dyn, *nones(3)),
     ),
     "cust_awp_mwp_2_1_reinitlr_optim_no_use_orig": (
@@ -705,12 +703,12 @@ def policy(self):
     ),
     "cust_awp_nop_ignore_m_no_ofld_no_use_orig": (
         (cust_model, None, False, 0, unwrap_4_7, *nones(3), ignore_mod_cfg),
-        "min2_2",
+        None,
         (path_8_14, *nones(3)),
     ),  # TODO: once PyTorch deprecates ``ignored_modules``, check for the warning with this test
     "cust_awp_nop_ignore_p_no_ofld": (
         (cust_model, None, False, 0, unwrap_4_7, *nones(3), ignore_states_cfg),
-        "min2_2",
+        None,
         (path_ignore_p_uo, *nones(3)),
     ),
     "non_disjoint_params_allowed": (