huggingface · ArthurZucker · Oct 24, 2024 · Oct 8, 2024 · Oct 8, 2024 · Oct 8, 2024
diff --git a/src/transformers/generation/utils.py b/src/transformers/generation/utils.py
@@ -1440,6 +1440,8 @@ def _prepare_generated_length(
             and not self.config.is_encoder_decoder
         ):
             generation_config.max_length -= inputs_tensor.shape[1]
+        else:  # by default let's always generate 10 new tokens
+            generation_config.max_length = generation_config.max_length + input_ids_length
 
         # same for min length
         if generation_config.min_new_tokens is not None:

diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py
@@ -881,18 +881,7 @@ def __init__(
                 # Take the first device used by `accelerate`.
                 device = next(iter(hf_device_map.values()))
             else:
-                device = -1
-                if (
-                    is_torch_mlu_available()
-                    or is_torch_cuda_available()
-                    or is_torch_npu_available()
-                    or is_torch_xpu_available(check_device=True)
-                    or is_torch_mps_available()
-                ):
-                    logger.warning(
-                        "Hardware accelerator e.g. GPU is available in the environment, but no `device` argument"
-                        " is passed to the `Pipeline` object. Model will be on CPU."
-                    )
+                device = 0
 
         if is_torch_available() and self.framework == "pt":
             if device == -1 and self.model.device is not None:
@@ -920,10 +909,12 @@ def __init__(
             elif is_torch_mps_available():
                 self.device = torch.device(f"mps:{device}")
             else:
-                raise ValueError(f"{device} unrecognized or not available.")
+                self.device = torch.device("cpu")
         else:
             self.device = device if device is not None else -1
 
+        logger.warning(f"Device set to use {self.device}")
+
         self.binary_output = binary_output
         # We shouldn't call `model.to()` for models loaded with accelerate as well as the case that model is already on device
         if (