Merge remote-tracking branch 'hf/main' into ci/bump-macos-12-to-13

githubnemo · Nov 27, 2024 · 47e4e72 · 47e4e72
2 parents 3113b1f + d13d7a4
commit 47e4e72
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 1 deletion.
diff --git a/tests/test_decoder_models.py b/tests/test_decoder_models.py
@@ -538,6 +538,12 @@ def test_prompt_learning_with_gradient_checkpointing(self, test_name, model_id,
         # Test prompt learning methods with gradient checkpointing in a semi realistic setting.
         # Prefix tuning does not work if the model uses the new caching implementation. In that case, a helpful error
         # should be raised.
+
+        # skip if multi GPU, since this results in DataParallel usage by Trainer, which fails with "CUDA device
+        # assertion", breaking subsequent tests
+        if torch.cuda.device_count() > 1:
+            pytest.skip("Skip prompt_learning_with_gradient_checkpointing test on multi-GPU setups")
+
         peft_config = config_cls(
             base_model_name_or_path=model_id,
             **config_kwargs,

diff --git a/tests/test_vision_models.py b/tests/test_vision_models.py
@@ -56,7 +56,7 @@
 # Ensure that models like Llava that pass past_key_values automatically do not fail, see #1938
 class TestPastKV:
     def test_past_kv(self):
-        model_id = "trl-internal-testing/tiny-random-LlavaForConditionalGeneration"
+        model_id = "peft-internal-testing/tiny-LlavaForConditionalGeneration"
         prompt = "USER: <image>\nWhat are these?\nASSISTANT:"
 
         # prepare model and inputs