huggingface · sgugger · Aug 1, 2022 · Jul 29, 2022 · Jul 29, 2022 · Aug 1, 2022
diff --git a/docs/source/en/add_new_model.mdx b/docs/source/en/add_new_model.mdx
@@ -813,13 +813,9 @@ checkpoint and to get the required access rights to be able to upload the model
 *brand_new_bert*. The `push_to_hub` method, present in all models in `transformers`, is a quick and efficient way to push your checkpoint to the hub. A little snippet is pasted below:
 
 ```python
-brand_new_bert.push_to_hub(
- repo_path_or_name="brand_new_bert",
- # Uncomment the following line to push to an organization
- # organization="<ORGANIZATION>",
- commit_message="Add model",
- use_temp_dir=True,
-)
+brand_new_bert.push_to_hub("brand_new_bert")
+# Uncomment the following line to push to an organization.
+# brand_new_bert.push_to_hub("<organization>/brand_new_bert")
 ```
 
 It is worth spending some time to create fitting model cards for each checkpoint. The model cards should highlight the

diff --git a/docs/source/en/model_sharing.mdx b/docs/source/en/model_sharing.mdx
@@ -179,10 +179,10 @@ This creates a repository under your username with the model name `my-awesome-mo
 >>> model = AutoModel.from_pretrained("your_username/my-awesome-model")
 ```
 
-If you belong to an organization and want to push your model under the organization name instead, add the `organization` parameter:
+If you belong to an organization and want to push your model under the organization name instead, just add it to the `repo_id`:
 
 ```py
->>> pt_model.push_to_hub("my-awesome-model", organization="my-awesome-org")
+>>> pt_model.push_to_hub("my-awesome-org/my-awesome-model")
 ```
 
 The `push_to_hub` function can also be used to add other files to a model repository. For example, add a tokenizer to a model repository:

diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py
@@ -417,27 +417,22 @@ def save_pretrained(self, save_directory: Union[str, os.PathLike], push_to_hub:
  save_directory (`str` or `os.PathLike`):
  Directory where the configuration JSON file will be saved (will be created if it does not exist).
  push_to_hub (`bool`, *optional*, defaults to `False`):
- Whether or not to push your model to the Hugging Face model hub after saving it.
-
- <Tip warning={true}>
-
- Using `push_to_hub=True` will synchronize the repository you are pushing to with `save_directory`,
- which requires `save_directory` to be a local clone of the repo you are pushing to if it's an existing
- folder. Pass along `temp_dir=True` to use a temporary directory instead.
-
- </Tip>
-
+ Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
+ repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
+ namespace).
  kwargs:
  Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
  """
  if os.path.isfile(save_directory):
  raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file")
 
+ os.makedirs(save_directory, exist_ok=True)
+
  if push_to_hub:
  commit_message = kwargs.pop("commit_message", None)
- repo = self._create_or_get_repo(save_directory, **kwargs)
-
- os.makedirs(save_directory, exist_ok=True)
+ repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1])
+ repo_id, token = self._create_repo(repo_id, **kwargs)
+  files_timestamps = self._get_files_timestamps(save_directory)
 
  # If we have a custom config, we copy the file defining it in the folder and set the attributes so it can be
  # loaded from the Hub.
@@ -451,8 +446,9 @@ def save_pretrained(self, save_directory: Union[str, os.PathLike], push_to_hub:
  logger.info(f"Configuration saved in {output_config_file}")
 
  if push_to_hub:
- url = self._push_to_hub(repo, commit_message=commit_message)
- logger.info(f"Configuration pushed to the hub in this commit: {url}")
+ self._upload_modified_files(
+ save_directory, repo_id, files_timestamps, commit_message=commit_message, token=token
+ )
 
  @classmethod
  def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":

diff --git a/src/transformers/feature_extraction_utils.py b/src/transformers/feature_extraction_utils.py
@@ -318,41 +318,38 @@ def save_pretrained(self, save_directory: Union[str, os.PathLike], push_to_hub:
  save_directory (`str` or `os.PathLike`):
  Directory where the feature extractor JSON file will be saved (will be created if it does not exist).
  push_to_hub (`bool`, *optional*, defaults to `False`):
- Whether or not to push your feature extractor to the Hugging Face model hub after saving it.
-
- <Tip warning={true}>
-
- Using `push_to_hub=True` will synchronize the repository you are pushing to with `save_directory`,
- which requires `save_directory` to be a local clone of the repo you are pushing to if it's an existing
- folder. Pass along `temp_dir=True` to use a temporary directory instead.
-
- </Tip>
-
+ Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
+ repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
+ namespace).
  kwargs:
  Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
  """
  if os.path.isfile(save_directory):
  raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file")
 
+ os.makedirs(save_directory, exist_ok=True)
+
  if push_to_hub:
  commit_message = kwargs.pop("commit_message", None)
- repo = self._create_or_get_repo(save_directory, **kwargs)
+ repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1])
+ repo_id, token = self._create_repo(repo_id, **kwargs)
+ files_timestamps = self._get_files_timestamps(save_directory)
 
  # If we have a custom config, we copy the file defining it in the folder and set the attributes so it can be
  # loaded from the Hub.
  if self._auto_class is not None:
  custom_object_save(self, save_directory, config=self)
 
- os.makedirs(save_directory, exist_ok=True)
  # If we save using the predefined names, we can load using `from_pretrained`
  output_feature_extractor_file = os.path.join(save_directory, FEATURE_EXTRACTOR_NAME)
 
  self.to_json_file(output_feature_extractor_file)
  logger.info(f"Feature extractor saved in {output_feature_extractor_file}")
 
  if push_to_hub:
- url = self._push_to_hub(repo, commit_message=commit_message)
- logger.info(f"Feature extractor pushed to the hub in this commit: {url}")
+ self._upload_modified_files(
+ save_directory, repo_id, files_timestamps, commit_message=commit_message, token=token
+ )
 
  return [output_feature_extractor_file]
 

diff --git a/src/transformers/modeling_flax_utils.py b/src/transformers/modeling_flax_utils.py
@@ -941,16 +941,9 @@ def save_pretrained(
  save_directory (`str` or `os.PathLike`):
  Directory to which to save. Will be created if it doesn't exist.
  push_to_hub (`bool`, *optional*, defaults to `False`):
- Whether or not to push your model to the Hugging Face model hub after saving it.
-
- <Tip warning={true}>
-
- Using `push_to_hub=True` will synchronize the repository you are pushing to with `save_directory`,
- which requires `save_directory` to be a local clone of the repo you are pushing to if it's an existing
- folder. Pass along `temp_dir=True` to use a temporary directory instead.
-
- </Tip>
-
+ Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
+ repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
+ namespace).
  max_shard_size (`int` or `str`, *optional*, defaults to `"10GB"`):
  The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size
  lower than this size. If expressed as a string, needs to be digits followed by a unit (like `"5MB"`).
@@ -969,11 +962,13 @@ def save_pretrained(
  logger.error(f"Provided path ({save_directory}) should be a directory, not a file")
  return
 
+ os.makedirs(save_directory, exist_ok=True)
+
  if push_to_hub:
  commit_message = kwargs.pop("commit_message", None)
- repo = self._create_or_get_repo(save_directory, **kwargs)
-
- os.makedirs(save_directory, exist_ok=True)
+ repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1])
+ repo_id, token = self._create_repo(repo_id, **kwargs)
+  files_timestamps = self._get_files_timestamps(save_directory)
 
  # get abs dir
  save_directory = os.path.abspath(save_directory)
@@ -1028,8 +1023,9 @@ def save_pretrained(
  logger.info(f"Model weights saved in {output_model_file}")
 
  if push_to_hub:
- url = self._push_to_hub(repo, commit_message=commit_message)
- logger.info(f"Model pushed to the hub in this commit: {url}")
+ self._upload_modified_files(
+ save_directory, repo_id, files_timestamps, commit_message=commit_message, token=token
+ )
 
  @classmethod
  def register_for_auto_class(cls, auto_class="FlaxAutoModel"):

diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py
@@ -24,6 +24,7 @@
 import re
 import warnings
 from collections.abc import Mapping
+from pathlib import Path
 from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
 
 import h5py
@@ -58,14 +59,14 @@
  RepositoryNotFoundError,
  RevisionNotFoundError,
  cached_path,
- copy_func,
  find_labels,
  has_file,
  hf_bucket_url,
  is_offline_mode,
  is_remote_url,
  logging,
  requires_backends,
+ working_or_temp_dir,
 )
 
 
@@ -1919,6 +1920,7 @@ def save_pretrained(
  version=1,
  push_to_hub=False,
  max_shard_size: Union[int, str] = "10GB",
+ create_pr: bool = False,
  **kwargs
  ):
  """
@@ -1935,16 +1937,9 @@ def save_pretrained(
  TensorFlow Serving as detailed in the official documentation
  https://www.tensorflow.org/tfx/serving/serving_basic
  push_to_hub (`bool`, *optional*, defaults to `False`):
- Whether or not to push your model to the Hugging Face model hub after saving it.
-
- <Tip warning={true}>
-
- Using `push_to_hub=True` will synchronize the repository you are pushing to with `save_directory`,
- which requires `save_directory` to be a local clone of the repo you are pushing to if it's an existing
- folder. Pass along `temp_dir=True` to use a temporary directory instead.
-
- </Tip>
-
+ Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
+ repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
+ namespace).
  max_shard_size (`int` or `str`, *optional*, defaults to `"10GB"`):
  The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size
  lower than this size. If expressed as a string, needs to be digits followed by a unit (like `"5MB"`).
@@ -1956,18 +1951,23 @@ def save_pretrained(
 
  </Tip>
 
+ create_pr (`bool`, *optional*, defaults to `False`):
+ Whether or not to create a PR with the uploaded files or directly commit.
+
  kwargs:
  Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
  """
  if os.path.isfile(save_directory):
  logger.error(f"Provided path ({save_directory}) should be a directory, not a file")
  return
 
+ os.makedirs(save_directory, exist_ok=True)
+
  if push_to_hub:
  commit_message = kwargs.pop("commit_message", None)
- repo = self._create_or_get_repo(save_directory, **kwargs)
-
- os.makedirs(save_directory, exist_ok=True)
+ repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1])
+ repo_id, token = self._create_repo(repo_id, **kwargs)
+  files_timestamps = self._get_files_timestamps(save_directory)
 
  if saved_model:
  saved_model_dir = os.path.join(save_directory, "saved_model", str(version))
@@ -2030,8 +2030,9 @@ def save_pretrained(
  param_dset[:] = layer.numpy()
 
  if push_to_hub:
- url = self._push_to_hub(repo, commit_message=commit_message)
- logger.info(f"Model pushed to the hub in this commit: {url}")
+ self._upload_modified_files(
+ save_directory, repo_id, files_timestamps, commit_message=commit_message, token=token
+ )
 
  @classmethod
  def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
@@ -2475,12 +2476,95 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
 
  return model
 
+ def push_to_hub(
+ self,
+ repo_id: str,
+ use_temp_dir: Optional[bool] = None,
+ commit_message: Optional[str] = None,
+ private: Optional[bool] = None,
+ use_auth_token: Optional[Union[bool, str]] = None,
+ max_shard_size: Optional[Union[int, str]] = "10GB",
+ **model_card_kwargs
+ ) -> str:
+ """
+ Upload the model files to the 🤗 Model Hub while synchronizing a local clone of the repo in `repo_path_or_name`.
 
-# To update the docstring, we need to copy the method, otherwise we change the original docstring.
-TFPreTrainedModel.push_to_hub = copy_func(TFPreTrainedModel.push_to_hub)
-TFPreTrainedModel.push_to_hub.__doc__ = TFPreTrainedModel.push_to_hub.__doc__.format(
- object="model", object_class="TFAutoModel", object_files="model checkpoint"
-)
+ Parameters:
+ repo_id (`str`):
+ The name of the repository you want to push your model to. It should contain your organization name
+ when pushing to a given organization.
+ use_temp_dir (`bool`, *optional*):
+ Whether or not to use a temporary directory to store the files saved before they are pushed to the Hub.
+ Will default to `True` if there is no directory named like `repo_id`, `False` otherwise.
+ commit_message (`str`, *optional*):
+ Message to commit while pushing. Will default to `"Upload model"`.
+ private (`bool`, *optional*):
+ Whether or not the repository created should be private (requires a paying subscription).
+ use_auth_token (`bool` or `str`, *optional*):
+ The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
+ when running `transformers-cli login` (stored in `~/.huggingface`). Will default to `True` if
+ `repo_url` is not specified.
+ max_shard_size (`int` or `str`, *optional*, defaults to `"10GB"`):
+ Only applicable for models. The maximum size for a checkpoint before being sharded. Checkpoints shard
+ will then be each of size lower than this size. If expressed as a string, needs to be digits followed
+ by a unit (like `"5MB"`).
+ model_card_kwargs:
+ Additional keyword arguments passed along to the [`~TFPreTrainedModel.create_model_card`] method.
+
+ Examples:
+
+ ```python
+ from transformers import TFAutoModel
+
+ model = TFAutoModel.from_pretrained("bert-base-cased")
+
+ # Push the model to your namespace with the name "my-finetuned-bert".
+ model.push_to_hub("my-finetuned-bert")
+
+ # Push the model to an organization with the name "my-finetuned-bert".
+ model.push_to_hub("huggingface/my-finetuned-bert")
+ ```
+ """
+ if "repo_path_or_name" in model_card_kwargs:
+ warnings.warn(
+ "The `repo_path_or_name` argument is deprecated and will be removed in v5 of Transformers. Use "
+ "`repo_id` instead."
+ )
+ repo_id = model_card_kwargs.pop("repo_path_or_name")
+ # Deprecation warning will be sent after for repo_url and organization
+ repo_url = model_card_kwargs.pop("repo_url", None)
+ organization = model_card_kwargs.pop("organization", None)
+
+ if os.path.isdir(repo_id):
+ working_dir = repo_id
+ repo_id = repo_id.split(os.path.sep)[-1]
+ else:
+ working_dir = repo_id.split("/")[-1]
+
+ repo_id, token = self._create_repo(
+ repo_id, private=private, use_auth_token=use_auth_token, repo_url=repo_url, organization=organization
+ )
+
+ if use_temp_dir is None:
+ use_temp_dir = not os.path.isdir(working_dir)
+
+ with working_or_temp_dir(working_dir=working_dir, use_temp_dir=use_temp_dir) as work_dir:
+ files_timestamps = self._get_files_timestamps(work_dir)
+
+ # Save all files.
+ self.save_pretrained(work_dir, max_shard_size=max_shard_size)
+ if hasattr(self, "history") and hasattr(self, "create_model_card"):
+ # This is a Keras model and we might be able to fish out its History and make a model card out of it
+ base_model_card_args = {
+ "output_dir": work_dir,
+ "model_name": Path(repo_id).name,
+ }
+ base_model_card_args.update(model_card_kwargs)
+ self.create_model_card(**base_model_card_args)
+
+ self._upload_modified_files(
+ work_dir, repo_id, files_timestamps, commit_message=commit_message, token=token
+ )
 
 
 class TFConv1D(tf.keras.layers.Layer):