diff --git a/README.md b/README.md
index 9375cfc220..4a8c34d93b 100644
--- a/README.md
+++ b/README.md
@@ -32,7 +32,7 @@ A Unified Library for Parameter-Efficient and Modular Transfer Learning
Paper
-![Tests](https://github.com/Adapter-Hub/adapters/workflows/Tests/badge.svg?branch=adapters)
+![Tests](https://github.com/Adapter-Hub/adapters/workflows/Tests/badge.svg)
[![GitHub](https://img.shields.io/github/license/adapter-hub/adapters.svg?color=blue)](https://github.com/adapter-hub/adapters/blob/main/LICENSE)
[![PyPI](https://img.shields.io/pypi/v/adapters)](https://pypi.org/project/adapters/)
@@ -45,7 +45,7 @@ _Adapters_ provides a unified interface for efficient fine-tuning and modular tr
## Installation
-`adapters` currently supports **Python 3.8+** and **PyTorch 1.10+**.
+`adapters` currently supports **Python 3.9+** and **PyTorch 2.0+**.
After [installing PyTorch](https://pytorch.org/get-started/locally/), you can install `adapters` from PyPI ...
```
@@ -147,7 +147,7 @@ Currently, adapters integrates all architectures and methods listed below:
| Method | Paper(s) | Quick Links |
| --- | --- | --- |
-| Bottleneck adapters | [Houlsby et al. (2019)](https://arxiv.org/pdf/1902.00751.pdf)
[Bapna and Firat (2019)](https://arxiv.org/pdf/1909.08478.pdf) | [Quickstart](https://docs.adapterhub.ml/quickstart.html), [Notebook](https://colab.research.google.com/github/Adapter-Hub/adapters/blob/main/notebooks/01_Adapter_Training.ipynb) |
+| Bottleneck adapters | [Houlsby et al. (2019)](https://arxiv.org/pdf/1902.00751.pdf)
[Bapna and Firat (2019)](https://arxiv.org/pdf/1909.08478.pdf)
[Steitz and Roth (2024)](https://openaccess.thecvf.com/content/CVPR2024/papers/Steitz_Adapters_Strike_Back_CVPR_2024_paper.pdf) | [Quickstart](https://docs.adapterhub.ml/quickstart.html), [Notebook](https://colab.research.google.com/github/Adapter-Hub/adapters/blob/main/notebooks/01_Adapter_Training.ipynb) |
| AdapterFusion | [Pfeiffer et al. (2021)](https://aclanthology.org/2021.eacl-main.39.pdf) | [Docs: Training](https://docs.adapterhub.ml/training.html#train-adapterfusion), [Notebook](https://colab.research.google.com/github/Adapter-Hub/adapters/blob/main/notebooks/03_Adapter_Fusion.ipynb) |
| MAD-X,
Invertible adapters | [Pfeiffer et al. (2020)](https://aclanthology.org/2020.emnlp-main.617/) | [Notebook](https://colab.research.google.com/github/Adapter-Hub/adapters/blob/main/notebooks/04_Cross_Lingual_Transfer.ipynb) |
| AdapterDrop | [Rücklé et al. (2021)](https://arxiv.org/pdf/2010.11918.pdf) | [Notebook](https://colab.research.google.com/github/Adapter-Hub/adapters/blob/main/notebooks/05_Adapter_Drop_Training.ipynb) |
diff --git a/docs/installation.md b/docs/installation.md
index c3b8468eb8..51a5eaa3b0 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -1,7 +1,7 @@
# Installation
The `adapters` package is designed as an add-on for Hugging Face's Transformers library.
-It currently supports Python 3.8+ and PyTorch 1.10+. You will have to [install PyTorch](https://pytorch.org/get-started/locally/) first.
+It currently supports Python 3.9+ and PyTorch 2.0+. You will have to [install PyTorch](https://pytorch.org/get-started/locally/) first.
```{eval-rst}
.. important::
diff --git a/setup.py b/setup.py
index d7a15ef921..e3af210570 100644
--- a/setup.py
+++ b/setup.py
@@ -155,7 +155,7 @@ def deps_list(*pkgs):
packages=find_packages("src"),
zip_safe=False,
extras_require=extras,
- python_requires=">=3.8.0",
+ python_requires=">=3.9.0",
install_requires=install_requires,
classifiers=[
"Development Status :: 5 - Production/Stable",
diff --git a/src/adapters/loading.py b/src/adapters/loading.py
index 55ba1db45b..154951c01a 100644
--- a/src/adapters/loading.py
+++ b/src/adapters/loading.py
@@ -160,10 +160,10 @@ def load_weights(
else:
logger.info(f"No safetensors file found in {save_directory}. Falling back to torch.load...")
weights_file = join(save_directory, self.weights_name)
- state_dict = torch.load(weights_file, map_location="cpu")
+ state_dict = torch.load(weights_file, map_location="cpu", weights_only=True)
else:
weights_file = join(save_directory, self.weights_name)
- state_dict = torch.load(weights_file, map_location="cpu")
+ state_dict = torch.load(weights_file, map_location="cpu", weights_only=True)
except Exception:
raise OSError("Unable to load weights from pytorch checkpoint file. ")
logger.info("Loading module weights from {}".format(weights_file))
diff --git a/src/adapters/model_mixin.py b/src/adapters/model_mixin.py
index 62de6178ac..ca4db8092c 100644
--- a/src/adapters/model_mixin.py
+++ b/src/adapters/model_mixin.py
@@ -257,7 +257,7 @@ def load_embeddings(self, path: str, name: str):
embedding_path = os.path.join(path, EMBEDDING_FILE)
if not os.path.isfile(embedding_path):
raise FileNotFoundError("No embeddings found at {}".format(embedding_path))
- weights = torch.load(embedding_path)
+ weights = torch.load(embedding_path, weights_only=True)
self.loaded_embeddings[name] = nn.Embedding.from_pretrained(weights)
self.set_active_embeddings(name)
diff --git a/src/adapters/trainer.py b/src/adapters/trainer.py
index 2896585bcf..ca7662d449 100644
--- a/src/adapters/trainer.py
+++ b/src/adapters/trainer.py
@@ -4,21 +4,28 @@
import torch
from torch import nn
-from torch.utils.data.dataset import Dataset
+from torch.utils.data.dataset import Dataset, IterableDataset
from transformers import PreTrainedModel, Seq2SeqTrainer, Trainer, __version__
from transformers.configuration_utils import PretrainedConfig
from transformers.data.data_collator import DataCollator
+from transformers.feature_extraction_utils import FeatureExtractionMixin
+from transformers.image_processing_utils import BaseImageProcessor
from transformers.modeling_utils import unwrap_model
+from transformers.processing_utils import ProcessorMixin
from transformers.tokenization_utils_base import PreTrainedTokenizerBase
from transformers.trainer_callback import TrainerCallback, TrainerControl, TrainerState
from transformers.trainer_utils import EvalPrediction
from transformers.training_args import TrainingArguments
-from transformers.utils import CONFIG_NAME, WEIGHTS_NAME, is_sagemaker_mp_enabled, logging
+from transformers.utils import CONFIG_NAME, WEIGHTS_NAME, is_datasets_available, is_sagemaker_mp_enabled, logging
from .composition import AdapterCompositionBlock, Fuse
+if is_datasets_available():
+ import datasets
+
+
if is_sagemaker_mp_enabled():
import smdistributed.modelparallel.torch as smp
@@ -32,15 +39,19 @@ def __init__(
model: Union[PreTrainedModel, nn.Module] = None,
args: TrainingArguments = None,
data_collator: Optional[DataCollator] = None,
- train_dataset: Optional[Dataset] = None,
- eval_dataset: Optional[Dataset] = None,
+ train_dataset: Optional[Union[Dataset, IterableDataset, "datasets.Dataset"]] = None,
+ eval_dataset: Optional[Union[Dataset, Dict[str, Dataset], "datasets.Dataset"]] = None,
tokenizer: Optional[PreTrainedTokenizerBase] = None,
- model_init: Callable[[], PreTrainedModel] = None,
+ processing_class: Optional[
+ Union[PreTrainedTokenizerBase, BaseImageProcessor, FeatureExtractionMixin, ProcessorMixin]
+ ] = None,
+ model_init: Optional[Callable[[], PreTrainedModel]] = None,
compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
callbacks: Optional[List[TrainerCallback]] = None,
+ optimizers: Tuple[Optional[torch.optim.Optimizer], Optional[torch.optim.lr_scheduler.LambdaLR]] = (None, None),
+ preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None,
adapter_names: Optional[List[List[str]]] = None,
- optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
- preprocess_logits_for_metrics: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] = None,
+ **kwargs,
):
if model is not None:
model_quantized = getattr(model, "is_quantized", False)
@@ -51,12 +62,13 @@ def __init__(
data_collator,
train_dataset,
eval_dataset,
- tokenizer=tokenizer,
+ processing_class=processing_class or tokenizer,
model_init=model_init,
compute_metrics=compute_metrics,
callbacks=[AdapterTrainerCallback(self)] + callbacks if callbacks else [AdapterTrainerCallback(self)],
optimizers=optimizers,
preprocess_logits_for_metrics=preprocess_logits_for_metrics,
+ **kwargs,
)
if model is not None:
model.is_quantized = model_quantized
diff --git a/tests/composition/test_parallel.py b/tests/composition/test_parallel.py
index 80e1ae8616..8a15a9f1c5 100644
--- a/tests/composition/test_parallel.py
+++ b/tests/composition/test_parallel.py
@@ -214,7 +214,7 @@ def run_parallel_training_test(self, adapter_config, filter_key):
do_train=True,
learning_rate=1.0,
max_steps=20,
- no_cuda=True,
+ use_cpu=True,
remove_unused_columns=False,
)
diff --git a/tests/extended/test_adapter_trainer_ext.py b/tests/extended/test_adapter_trainer_ext.py
index 6e14944654..8da0ea07c8 100644
--- a/tests/extended/test_adapter_trainer_ext.py
+++ b/tests/extended/test_adapter_trainer_ext.py
@@ -300,7 +300,7 @@ def run_trainer(
--per_device_eval_batch_size 4
--max_eval_samples 8
--val_max_target_length {max_len}
- --evaluation_strategy steps
+ --eval_strategy steps
--eval_steps {str(eval_steps)}
--train_adapter
""".split()
diff --git a/tests/methods/base.py b/tests/methods/base.py
index 0d20f32fef..0758a06aa0 100644
--- a/tests/methods/base.py
+++ b/tests/methods/base.py
@@ -192,11 +192,11 @@ def run_load_test(self, adapter_config):
name = "dummy_adapter"
model1.add_adapter(name, config=adapter_config)
model1.set_active_adapters(name)
- with tempfile.TemporaryDirectory() as temp_dir:
+ with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir:
model1.save_adapter(temp_dir, name)
# Check that there are actually weights saved
- weights = torch.load(os.path.join(temp_dir, WEIGHTS_NAME), map_location="cpu")
+ weights = torch.load(os.path.join(temp_dir, WEIGHTS_NAME), map_location="cpu", weights_only=True)
self.assertTrue(len(weights) > 0)
# also tests that set_active works
@@ -225,7 +225,7 @@ def run_full_model_load_test(self, adapter_config):
name = "dummy"
model1.add_adapter(name, config=adapter_config)
- with tempfile.TemporaryDirectory() as temp_dir:
+ with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir:
model1.save_pretrained(temp_dir)
model2, loading_info = load_model(temp_dir, self.model_class, output_loading_info=True)
@@ -256,7 +256,7 @@ def trainings_run(self, model, lr=1.0, steps=8):
do_train=True,
learning_rate=lr,
max_steps=steps,
- no_cuda=True,
+ use_cpu=True,
per_device_train_batch_size=2,
remove_unused_columns=False,
)
diff --git a/tests/test_adapter_conversion.py b/tests/test_adapter_conversion.py
index 9653b3f340..067b1b9665 100644
--- a/tests/test_adapter_conversion.py
+++ b/tests/test_adapter_conversion.py
@@ -37,7 +37,7 @@ def run_test(self, static_model, input_shape=None, label_dict=None):
):
self.skipTest("Skipping as base model classes are different.")
- with tempfile.TemporaryDirectory() as temp_dir:
+ with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir:
static_model.save_head(temp_dir)
loading_info = {}
@@ -193,7 +193,7 @@ def test_equivalent_language_generation(self):
static_model.eval()
flex_model.eval()
- with tempfile.TemporaryDirectory() as temp_dir:
+ with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir:
static_model.save_adapter(temp_dir, "dummy")
loading_info = {}
@@ -209,7 +209,7 @@ def test_equivalent_language_generation(self):
model_gen = static_model.generate(**input_samples)
flex_model_gen = flex_model.generate(**input_samples)
- self.assertEquals(model_gen.shape, flex_model_gen.shape)
+ self.assertEqual(model_gen.shape, flex_model_gen.shape)
self.assertTrue(torch.equal(model_gen, flex_model_gen))
def test_full_model_conversion(self):
@@ -220,7 +220,7 @@ def test_full_model_conversion(self):
adapters.init(static_head_model)
static_head_model.eval()
- with tempfile.TemporaryDirectory() as temp_dir:
+ with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir:
static_head_model.save_pretrained(temp_dir)
flex_head_model, loading_info = AutoAdapterModel.from_pretrained(temp_dir, output_loading_info=True)
diff --git a/tests/test_adapter_embeddings.py b/tests/test_adapter_embeddings.py
index 160828c776..0284b7c384 100644
--- a/tests/test_adapter_embeddings.py
+++ b/tests/test_adapter_embeddings.py
@@ -105,7 +105,7 @@ def test_training_embedding(self):
do_train=True,
learning_rate=0.4,
max_steps=15,
- no_cuda=True,
+ use_cpu=True,
per_device_train_batch_size=2,
label_names=["labels"],
)
diff --git a/tests/test_adapter_fusion_common.py b/tests/test_adapter_fusion_common.py
index 695808eb24..b8472483ee 100644
--- a/tests/test_adapter_fusion_common.py
+++ b/tests/test_adapter_fusion_common.py
@@ -126,7 +126,7 @@ def test_load_full_model_fusion(self):
model1.add_adapter(name2)
model1.add_adapter_fusion([name1, name2])
# save & reload model
- with tempfile.TemporaryDirectory() as temp_dir:
+ with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir:
model1.save_pretrained(temp_dir)
model2 = load_model(temp_dir, self.model_class)
diff --git a/tests/test_adapter_heads.py b/tests/test_adapter_heads.py
index cb7ea7078c..df7a0ac7f8 100644
--- a/tests/test_adapter_heads.py
+++ b/tests/test_adapter_heads.py
@@ -315,7 +315,7 @@ def test_load_full_model(self):
self.add_head(model, "dummy", layers=1)
true_config = model.get_prediction_heads_config()
- with tempfile.TemporaryDirectory() as temp_dir:
+ with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir:
# save
model.save_pretrained(temp_dir)
# reload
diff --git a/tests/test_adapter_hub.py b/tests/test_adapter_hub.py
index fa29d13b19..0dee5eb0a6 100644
--- a/tests/test_adapter_hub.py
+++ b/tests/test_adapter_hub.py
@@ -76,7 +76,7 @@ def test_load_task_adapter_from_hub(self):
overwrite_cache=True,
)
eval_dataset = GlueDataset(data_args, tokenizer=tokenizer, mode="dev")
- training_args = TrainingArguments(output_dir="./examples", no_cuda=True)
+ training_args = TrainingArguments(output_dir="./examples", use_cpu=True)
# evaluate
trainer = Trainer(
diff --git a/tests/test_adapter_trainer.py b/tests/test_adapter_trainer.py
index fd1647865b..8630a31479 100644
--- a/tests/test_adapter_trainer.py
+++ b/tests/test_adapter_trainer.py
@@ -237,7 +237,7 @@ def test_training_load_best_model_at_end_full_model(self):
save_steps=1,
remove_unused_columns=False,
load_best_model_at_end=True,
- evaluation_strategy="epoch",
+ eval_strategy="epoch",
save_strategy="epoch",
num_train_epochs=2,
)
@@ -273,7 +273,7 @@ def test_training_load_best_model_at_end_adapter(self):
save_steps=1,
remove_unused_columns=False,
load_best_model_at_end=True,
- evaluation_strategy="epoch",
+ eval_strategy="epoch",
save_strategy="epoch",
num_train_epochs=2,
)
@@ -309,7 +309,7 @@ def test_training_load_best_model_at_end_fusion(self):
save_steps=1,
remove_unused_columns=False,
load_best_model_at_end=True,
- evaluation_strategy="epoch",
+ eval_strategy="epoch",
save_strategy="epoch",
num_train_epochs=2,
)
@@ -600,7 +600,7 @@ def forward(self, x):
output_dir=tempdir,
per_device_train_batch_size=1,
per_device_eval_batch_size=1,
- evaluation_strategy="steps",
+ eval_strategy="steps",
logging_steps=10,
max_steps=5,
lr_scheduler_type="constant",