diff --git a/src/peft/tuners/adalora/bnb.py b/src/peft/tuners/adalora/bnb.py index a37745569a..b5e2b8a1c4 100644 --- a/src/peft/tuners/adalora/bnb.py +++ b/src/peft/tuners/adalora/bnb.py @@ -70,7 +70,8 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: if requires_conversion: output = output.to(expected_dtype) output = output * scaling / ranknum - result += output + # inplace operation on view is forbidden for MatMul8bitLtBackward, so avoid it + result = result + output return result def __repr__(self) -> str: @@ -127,7 +128,7 @@ def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor: requires_conversion = not torch.is_autocast_enabled() if requires_conversion: expected_dtype = result.dtype - compute_dtype = lora_A.weight.dtype + compute_dtype = lora_A.dtype if x.dtype != compute_dtype: x = x.to(compute_dtype) diff --git a/src/peft/tuners/adalora/model.py b/src/peft/tuners/adalora/model.py index 71f2ed7579..7ccf13e8c9 100644 --- a/src/peft/tuners/adalora/model.py +++ b/src/peft/tuners/adalora/model.py @@ -236,7 +236,7 @@ def __getattr__(self, name: str): def forward(self, *args, **kwargs): outputs = self.model.forward(*args, **kwargs) - if getattr(outputs, "loss", None) is not None: + if (getattr(outputs, "loss", None) is not None) and isinstance(outputs.loss, torch.Tensor): # Calculate the orthogonal regularization orth_reg_weight = self.peft_config[self.trainable_adapter_name].orth_reg_weight diff --git a/tests/test_gpu_examples.py b/tests/test_gpu_examples.py index 103808e251..1af1919ad3 100644 --- a/tests/test_gpu_examples.py +++ b/tests/test_gpu_examples.py @@ -125,6 +125,14 @@ def tearDown(self): torch.cuda.empty_cache() gc.collect() + def _check_inference_finite(self, model, batch): + # try inference without Trainer class + training = model.training + model.eval() + output = model(**batch.to(model.device)) + self.assertTrue(torch.isfinite(output.logits).all()) + model.train(training) + @pytest.mark.single_gpu_tests def test_causal_lm_training(self): r""" @@ -335,6 +343,71 @@ def test_4bit_adalora_causalLM(self): data = load_dataset("ybelkada/english_quotes_copy") data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True) + batch = tokenizer(data["train"][:3]["quote"], return_tensors="pt", padding=True) + self._check_inference_finite(model, batch) + + with tempfile.TemporaryDirectory() as tmp_dir: + trainer = Trainer( + model=model, + train_dataset=data["train"], + args=TrainingArguments( + per_device_train_batch_size=4, + gradient_accumulation_steps=4, + warmup_steps=2, + max_steps=3, + learning_rate=2e-4, + fp16=True, + logging_steps=1, + output_dir=tmp_dir, + ), + data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False), + ) + model.config.use_cache = False + trainer.train() + + model.cpu().save_pretrained(tmp_dir) + + self.assertTrue("adapter_config.json" in os.listdir(tmp_dir)) + self.assertTrue(SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir)) + + # assert loss is not None + self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"]) + + @pytest.mark.single_gpu_tests + @require_torch_gpu + def test_8bit_adalora_causalLM(self): + r""" + Tests the 8bit training with adalora + """ + model_id = "facebook/opt-350m" + + model = AutoModelForCausalLM.from_pretrained(model_id, load_in_8bit=True) + tokenizer = AutoTokenizer.from_pretrained(model_id) + + model.gradient_checkpointing_enable() + model = prepare_model_for_kbit_training(model) + + peft_config = AdaLoraConfig( + init_r=6, + target_r=4, + tinit=50, + tfinal=100, + deltaT=5, + beta1=0.3, + beta2=0.3, + orth_reg_weight=0.2, + lora_alpha=32, + lora_dropout=0.05, + bias="none", + task_type="CAUSAL_LM", + ) + + model = get_peft_model(model, peft_config) + + data = load_dataset("ybelkada/english_quotes_copy") + data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True) + batch = tokenizer(data["train"][:3]["quote"], return_tensors="pt", padding=True) + self._check_inference_finite(model, batch) with tempfile.TemporaryDirectory() as tmp_dir: trainer = Trainer( @@ -671,6 +744,14 @@ def tearDown(self): gc.collect() torch.cuda.empty_cache() + def _check_inference_finite(self, model, batch): + # try inference without Trainer class + training = model.training + model.eval() + output = model(**batch.to(model.device)) + self.assertTrue(torch.isfinite(output.logits).all()) + model.train(training) + @pytest.mark.single_gpu_tests def test_causal_lm_training(self): r""" @@ -738,6 +819,7 @@ def test_adalora_causalLM(self): quantization_config=self.quantization_config, ) + tokenizer = AutoTokenizer.from_pretrained(self.causal_lm_model_id) model = prepare_model_for_kbit_training(model) peft_config = AdaLoraConfig( @@ -759,6 +841,8 @@ def test_adalora_causalLM(self): data = load_dataset("ybelkada/english_quotes_copy") data = data.map(lambda samples: self.tokenizer(samples["quote"]), batched=True) + batch = tokenizer(data["train"][:3]["quote"], return_tensors="pt", padding=True) + self._check_inference_finite(model, batch) with tempfile.TemporaryDirectory() as tmp_dir: trainer = Trainer(