diff --git a/README.md b/README.md index 9375cfc220..4a8c34d93b 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ A Unified Library for Parameter-Efficient and Modular Transfer Learning Paper -![Tests](https://github.com/Adapter-Hub/adapters/workflows/Tests/badge.svg?branch=adapters) +![Tests](https://github.com/Adapter-Hub/adapters/workflows/Tests/badge.svg) [![GitHub](https://img.shields.io/github/license/adapter-hub/adapters.svg?color=blue)](https://github.com/adapter-hub/adapters/blob/main/LICENSE) [![PyPI](https://img.shields.io/pypi/v/adapters)](https://pypi.org/project/adapters/) @@ -45,7 +45,7 @@ _Adapters_ provides a unified interface for efficient fine-tuning and modular tr ## Installation -`adapters` currently supports **Python 3.8+** and **PyTorch 1.10+**. +`adapters` currently supports **Python 3.9+** and **PyTorch 2.0+**. After [installing PyTorch](https://pytorch.org/get-started/locally/), you can install `adapters` from PyPI ... ``` @@ -147,7 +147,7 @@ Currently, adapters integrates all architectures and methods listed below: | Method | Paper(s) | Quick Links | | --- | --- | --- | -| Bottleneck adapters | [Houlsby et al. (2019)](https://arxiv.org/pdf/1902.00751.pdf)
[Bapna and Firat (2019)](https://arxiv.org/pdf/1909.08478.pdf) | [Quickstart](https://docs.adapterhub.ml/quickstart.html), [Notebook](https://colab.research.google.com/github/Adapter-Hub/adapters/blob/main/notebooks/01_Adapter_Training.ipynb) | +| Bottleneck adapters | [Houlsby et al. (2019)](https://arxiv.org/pdf/1902.00751.pdf)
[Bapna and Firat (2019)](https://arxiv.org/pdf/1909.08478.pdf)
[Steitz and Roth (2024)](https://openaccess.thecvf.com/content/CVPR2024/papers/Steitz_Adapters_Strike_Back_CVPR_2024_paper.pdf) | [Quickstart](https://docs.adapterhub.ml/quickstart.html), [Notebook](https://colab.research.google.com/github/Adapter-Hub/adapters/blob/main/notebooks/01_Adapter_Training.ipynb) | | AdapterFusion | [Pfeiffer et al. (2021)](https://aclanthology.org/2021.eacl-main.39.pdf) | [Docs: Training](https://docs.adapterhub.ml/training.html#train-adapterfusion), [Notebook](https://colab.research.google.com/github/Adapter-Hub/adapters/blob/main/notebooks/03_Adapter_Fusion.ipynb) | | MAD-X,
Invertible adapters | [Pfeiffer et al. (2020)](https://aclanthology.org/2020.emnlp-main.617/) | [Notebook](https://colab.research.google.com/github/Adapter-Hub/adapters/blob/main/notebooks/04_Cross_Lingual_Transfer.ipynb) | | AdapterDrop | [Rücklé et al. (2021)](https://arxiv.org/pdf/2010.11918.pdf) | [Notebook](https://colab.research.google.com/github/Adapter-Hub/adapters/blob/main/notebooks/05_Adapter_Drop_Training.ipynb) | diff --git a/docs/installation.md b/docs/installation.md index c3b8468eb8..51a5eaa3b0 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -1,7 +1,7 @@ # Installation The `adapters` package is designed as an add-on for Hugging Face's Transformers library. -It currently supports Python 3.8+ and PyTorch 1.10+. You will have to [install PyTorch](https://pytorch.org/get-started/locally/) first. +It currently supports Python 3.9+ and PyTorch 2.0+. You will have to [install PyTorch](https://pytorch.org/get-started/locally/) first. ```{eval-rst} .. important:: diff --git a/setup.py b/setup.py index d7a15ef921..e3af210570 100644 --- a/setup.py +++ b/setup.py @@ -155,7 +155,7 @@ def deps_list(*pkgs): packages=find_packages("src"), zip_safe=False, extras_require=extras, - python_requires=">=3.8.0", + python_requires=">=3.9.0", install_requires=install_requires, classifiers=[ "Development Status :: 5 - Production/Stable", diff --git a/src/adapters/loading.py b/src/adapters/loading.py index 55ba1db45b..154951c01a 100644 --- a/src/adapters/loading.py +++ b/src/adapters/loading.py @@ -160,10 +160,10 @@ def load_weights( else: logger.info(f"No safetensors file found in {save_directory}. Falling back to torch.load...") weights_file = join(save_directory, self.weights_name) - state_dict = torch.load(weights_file, map_location="cpu") + state_dict = torch.load(weights_file, map_location="cpu", weights_only=True) else: weights_file = join(save_directory, self.weights_name) - state_dict = torch.load(weights_file, map_location="cpu") + state_dict = torch.load(weights_file, map_location="cpu", weights_only=True) except Exception: raise OSError("Unable to load weights from pytorch checkpoint file. ") logger.info("Loading module weights from {}".format(weights_file)) diff --git a/src/adapters/model_mixin.py b/src/adapters/model_mixin.py index 62de6178ac..ca4db8092c 100644 --- a/src/adapters/model_mixin.py +++ b/src/adapters/model_mixin.py @@ -257,7 +257,7 @@ def load_embeddings(self, path: str, name: str): embedding_path = os.path.join(path, EMBEDDING_FILE) if not os.path.isfile(embedding_path): raise FileNotFoundError("No embeddings found at {}".format(embedding_path)) - weights = torch.load(embedding_path) + weights = torch.load(embedding_path, weights_only=True) self.loaded_embeddings[name] = nn.Embedding.from_pretrained(weights) self.set_active_embeddings(name) diff --git a/src/adapters/trainer.py b/src/adapters/trainer.py index 2896585bcf..ca7662d449 100644 --- a/src/adapters/trainer.py +++ b/src/adapters/trainer.py @@ -4,21 +4,28 @@ import torch from torch import nn -from torch.utils.data.dataset import Dataset +from torch.utils.data.dataset import Dataset, IterableDataset from transformers import PreTrainedModel, Seq2SeqTrainer, Trainer, __version__ from transformers.configuration_utils import PretrainedConfig from transformers.data.data_collator import DataCollator +from transformers.feature_extraction_utils import FeatureExtractionMixin +from transformers.image_processing_utils import BaseImageProcessor from transformers.modeling_utils import unwrap_model +from transformers.processing_utils import ProcessorMixin from transformers.tokenization_utils_base import PreTrainedTokenizerBase from transformers.trainer_callback import TrainerCallback, TrainerControl, TrainerState from transformers.trainer_utils import EvalPrediction from transformers.training_args import TrainingArguments -from transformers.utils import CONFIG_NAME, WEIGHTS_NAME, is_sagemaker_mp_enabled, logging +from transformers.utils import CONFIG_NAME, WEIGHTS_NAME, is_datasets_available, is_sagemaker_mp_enabled, logging from .composition import AdapterCompositionBlock, Fuse +if is_datasets_available(): + import datasets + + if is_sagemaker_mp_enabled(): import smdistributed.modelparallel.torch as smp @@ -32,15 +39,19 @@ def __init__( model: Union[PreTrainedModel, nn.Module] = None, args: TrainingArguments = None, data_collator: Optional[DataCollator] = None, - train_dataset: Optional[Dataset] = None, - eval_dataset: Optional[Dataset] = None, + train_dataset: Optional[Union[Dataset, IterableDataset, "datasets.Dataset"]] = None, + eval_dataset: Optional[Union[Dataset, Dict[str, Dataset], "datasets.Dataset"]] = None, tokenizer: Optional[PreTrainedTokenizerBase] = None, - model_init: Callable[[], PreTrainedModel] = None, + processing_class: Optional[ + Union[PreTrainedTokenizerBase, BaseImageProcessor, FeatureExtractionMixin, ProcessorMixin] + ] = None, + model_init: Optional[Callable[[], PreTrainedModel]] = None, compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None, callbacks: Optional[List[TrainerCallback]] = None, + optimizers: Tuple[Optional[torch.optim.Optimizer], Optional[torch.optim.lr_scheduler.LambdaLR]] = (None, None), + preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, adapter_names: Optional[List[List[str]]] = None, - optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None), - preprocess_logits_for_metrics: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] = None, + **kwargs, ): if model is not None: model_quantized = getattr(model, "is_quantized", False) @@ -51,12 +62,13 @@ def __init__( data_collator, train_dataset, eval_dataset, - tokenizer=tokenizer, + processing_class=processing_class or tokenizer, model_init=model_init, compute_metrics=compute_metrics, callbacks=[AdapterTrainerCallback(self)] + callbacks if callbacks else [AdapterTrainerCallback(self)], optimizers=optimizers, preprocess_logits_for_metrics=preprocess_logits_for_metrics, + **kwargs, ) if model is not None: model.is_quantized = model_quantized diff --git a/tests/composition/test_parallel.py b/tests/composition/test_parallel.py index 80e1ae8616..8a15a9f1c5 100644 --- a/tests/composition/test_parallel.py +++ b/tests/composition/test_parallel.py @@ -214,7 +214,7 @@ def run_parallel_training_test(self, adapter_config, filter_key): do_train=True, learning_rate=1.0, max_steps=20, - no_cuda=True, + use_cpu=True, remove_unused_columns=False, ) diff --git a/tests/extended/test_adapter_trainer_ext.py b/tests/extended/test_adapter_trainer_ext.py index 6e14944654..8da0ea07c8 100644 --- a/tests/extended/test_adapter_trainer_ext.py +++ b/tests/extended/test_adapter_trainer_ext.py @@ -300,7 +300,7 @@ def run_trainer( --per_device_eval_batch_size 4 --max_eval_samples 8 --val_max_target_length {max_len} - --evaluation_strategy steps + --eval_strategy steps --eval_steps {str(eval_steps)} --train_adapter """.split() diff --git a/tests/methods/base.py b/tests/methods/base.py index 0d20f32fef..0758a06aa0 100644 --- a/tests/methods/base.py +++ b/tests/methods/base.py @@ -192,11 +192,11 @@ def run_load_test(self, adapter_config): name = "dummy_adapter" model1.add_adapter(name, config=adapter_config) model1.set_active_adapters(name) - with tempfile.TemporaryDirectory() as temp_dir: + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir: model1.save_adapter(temp_dir, name) # Check that there are actually weights saved - weights = torch.load(os.path.join(temp_dir, WEIGHTS_NAME), map_location="cpu") + weights = torch.load(os.path.join(temp_dir, WEIGHTS_NAME), map_location="cpu", weights_only=True) self.assertTrue(len(weights) > 0) # also tests that set_active works @@ -225,7 +225,7 @@ def run_full_model_load_test(self, adapter_config): name = "dummy" model1.add_adapter(name, config=adapter_config) - with tempfile.TemporaryDirectory() as temp_dir: + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir: model1.save_pretrained(temp_dir) model2, loading_info = load_model(temp_dir, self.model_class, output_loading_info=True) @@ -256,7 +256,7 @@ def trainings_run(self, model, lr=1.0, steps=8): do_train=True, learning_rate=lr, max_steps=steps, - no_cuda=True, + use_cpu=True, per_device_train_batch_size=2, remove_unused_columns=False, ) diff --git a/tests/test_adapter_conversion.py b/tests/test_adapter_conversion.py index 9653b3f340..067b1b9665 100644 --- a/tests/test_adapter_conversion.py +++ b/tests/test_adapter_conversion.py @@ -37,7 +37,7 @@ def run_test(self, static_model, input_shape=None, label_dict=None): ): self.skipTest("Skipping as base model classes are different.") - with tempfile.TemporaryDirectory() as temp_dir: + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir: static_model.save_head(temp_dir) loading_info = {} @@ -193,7 +193,7 @@ def test_equivalent_language_generation(self): static_model.eval() flex_model.eval() - with tempfile.TemporaryDirectory() as temp_dir: + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir: static_model.save_adapter(temp_dir, "dummy") loading_info = {} @@ -209,7 +209,7 @@ def test_equivalent_language_generation(self): model_gen = static_model.generate(**input_samples) flex_model_gen = flex_model.generate(**input_samples) - self.assertEquals(model_gen.shape, flex_model_gen.shape) + self.assertEqual(model_gen.shape, flex_model_gen.shape) self.assertTrue(torch.equal(model_gen, flex_model_gen)) def test_full_model_conversion(self): @@ -220,7 +220,7 @@ def test_full_model_conversion(self): adapters.init(static_head_model) static_head_model.eval() - with tempfile.TemporaryDirectory() as temp_dir: + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir: static_head_model.save_pretrained(temp_dir) flex_head_model, loading_info = AutoAdapterModel.from_pretrained(temp_dir, output_loading_info=True) diff --git a/tests/test_adapter_embeddings.py b/tests/test_adapter_embeddings.py index 160828c776..0284b7c384 100644 --- a/tests/test_adapter_embeddings.py +++ b/tests/test_adapter_embeddings.py @@ -105,7 +105,7 @@ def test_training_embedding(self): do_train=True, learning_rate=0.4, max_steps=15, - no_cuda=True, + use_cpu=True, per_device_train_batch_size=2, label_names=["labels"], ) diff --git a/tests/test_adapter_fusion_common.py b/tests/test_adapter_fusion_common.py index 695808eb24..b8472483ee 100644 --- a/tests/test_adapter_fusion_common.py +++ b/tests/test_adapter_fusion_common.py @@ -126,7 +126,7 @@ def test_load_full_model_fusion(self): model1.add_adapter(name2) model1.add_adapter_fusion([name1, name2]) # save & reload model - with tempfile.TemporaryDirectory() as temp_dir: + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir: model1.save_pretrained(temp_dir) model2 = load_model(temp_dir, self.model_class) diff --git a/tests/test_adapter_heads.py b/tests/test_adapter_heads.py index cb7ea7078c..df7a0ac7f8 100644 --- a/tests/test_adapter_heads.py +++ b/tests/test_adapter_heads.py @@ -315,7 +315,7 @@ def test_load_full_model(self): self.add_head(model, "dummy", layers=1) true_config = model.get_prediction_heads_config() - with tempfile.TemporaryDirectory() as temp_dir: + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir: # save model.save_pretrained(temp_dir) # reload diff --git a/tests/test_adapter_hub.py b/tests/test_adapter_hub.py index fa29d13b19..0dee5eb0a6 100644 --- a/tests/test_adapter_hub.py +++ b/tests/test_adapter_hub.py @@ -76,7 +76,7 @@ def test_load_task_adapter_from_hub(self): overwrite_cache=True, ) eval_dataset = GlueDataset(data_args, tokenizer=tokenizer, mode="dev") - training_args = TrainingArguments(output_dir="./examples", no_cuda=True) + training_args = TrainingArguments(output_dir="./examples", use_cpu=True) # evaluate trainer = Trainer( diff --git a/tests/test_adapter_trainer.py b/tests/test_adapter_trainer.py index fd1647865b..8630a31479 100644 --- a/tests/test_adapter_trainer.py +++ b/tests/test_adapter_trainer.py @@ -237,7 +237,7 @@ def test_training_load_best_model_at_end_full_model(self): save_steps=1, remove_unused_columns=False, load_best_model_at_end=True, - evaluation_strategy="epoch", + eval_strategy="epoch", save_strategy="epoch", num_train_epochs=2, ) @@ -273,7 +273,7 @@ def test_training_load_best_model_at_end_adapter(self): save_steps=1, remove_unused_columns=False, load_best_model_at_end=True, - evaluation_strategy="epoch", + eval_strategy="epoch", save_strategy="epoch", num_train_epochs=2, ) @@ -309,7 +309,7 @@ def test_training_load_best_model_at_end_fusion(self): save_steps=1, remove_unused_columns=False, load_best_model_at_end=True, - evaluation_strategy="epoch", + eval_strategy="epoch", save_strategy="epoch", num_train_epochs=2, ) @@ -600,7 +600,7 @@ def forward(self, x): output_dir=tempdir, per_device_train_batch_size=1, per_device_eval_batch_size=1, - evaluation_strategy="steps", + eval_strategy="steps", logging_steps=10, max_steps=5, lr_scheduler_type="constant",