huggingface · sgugger · Mar 31, 2021 · Mar 30, 2021 · Mar 30, 2021 · Mar 30, 2021
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -221,7 +221,7 @@ jobs:
                   key: v0.4-torch-{{ checksum "setup.py" }}
                   paths:
                       - '~/.cache/pip'
-            - run: RUN_PIPELINE_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_pipelines_torch -m is_pipeline_test ./tests/ | tee tests_output.txt
+            - run: TRANSFORMERS_IS_CI=1 RUN_PIPELINE_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_pipelines_torch -m is_pipeline_test ./tests/ | tee tests_output.txt
             - store_artifacts:
                   path: ~/transformers/tests_output.txt
             - store_artifacts:
@@ -247,7 +247,7 @@ jobs:
                   key: v0.4-tf-{{ checksum "setup.py" }}
                   paths:
                       - '~/.cache/pip'
-            - run: RUN_PIPELINE_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_pipelines_tf ./tests/ -m is_pipeline_test | tee tests_output.txt
+            - run: TRANSFORMERS_IS_CI=1 RUN_PIPELINE_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_pipelines_tf ./tests/ -m is_pipeline_test | tee tests_output.txt
             - store_artifacts:
                   path: ~/transformers/tests_output.txt
             - store_artifacts:
@@ -272,7 +272,7 @@ jobs:
                   key: v0.4-custom_tokenizers-{{ checksum "setup.py" }}
                   paths:
                       - '~/.cache/pip'
-            - run: python -m pytest -s --make-reports=tests_custom_tokenizers ./tests/test_tokenization_bert_japanese.py | tee tests_output.txt
+            - run: TRANSFORMERS_IS_CI=1 python -m pytest -s --make-reports=tests_custom_tokenizers ./tests/test_tokenization_bert_japanese.py | tee tests_output.txt
             - store_artifacts:
                   path: ~/transformers/tests_output.txt
             - store_artifacts:
@@ -299,7 +299,7 @@ jobs:
                   key: v0.4-torch_examples-{{ checksum "setup.py" }}
                   paths:
                       - '~/.cache/pip'
-            - run: python -m pytest -n 8 --dist=loadfile -s --make-reports=examples_torch ./examples/ | tee examples_output.txt
+            - run: TRANSFORMERS_IS_CI=1 python -m pytest -n 8 --dist=loadfile -s --make-reports=examples_torch ./examples/ | tee examples_output.txt
             - store_artifacts:
                   path: ~/transformers/examples_output.txt
             - store_artifacts:
@@ -319,7 +319,7 @@ jobs:
                 git config --global user.name "ci"
             - run: pip install --upgrade pip
             - run: pip install .[testing]
-            - run: RUN_GIT_LFS_TESTS=1 python -m pytest -sv ./tests/test_hf_api.py -k "HfLargefilesTest"
+            - run: TRANSFORMERS_IS_CI=1 RUN_GIT_LFS_TESTS=1 python -m pytest -sv ./tests/test_hf_api.py -k "HfLargefilesTest"
 
     build_doc:
         working_directory: ~/transformers

diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml
@@ -45,7 +45,7 @@ jobs:
           MKL_NUM_THREADS: 8
           HF_HOME: /mnt/cache
         run: |
-          python -m pytest -n 2 --dist=loadfile --make-reports=tests_torch_gpu tests
+          TRANSFORMERS_IS_CI=1 python -m pytest -n 2 --dist=loadfile --make-reports=tests_torch_gpu tests
 
       - name: Failure short reports
         if: ${{ always() }}
@@ -89,7 +89,7 @@ jobs:
           TF_NUM_INTEROP_THREADS: 1
           HF_HOME: /mnt/cache
         run: |
-          python -m pytest -n 2 --dist=loadfile --make-reports=tests_tf_gpu tests
+          TRANSFORMERS_IS_CI=1 python -m pytest -n 2 --dist=loadfile --make-reports=tests_tf_gpu tests
 
       - name: Failure short reports
         if: ${{ always() }}
@@ -136,7 +136,7 @@ jobs:
           MKL_SERVICE_FORCE_INTEL: 1
           HF_HOME: /mnt/cache
         run: |
-          python -m pytest -n 2 --dist=loadfile --make-reports=tests_torch_multi_gpu tests
+          TRANSFORMERS_IS_CI=1 python -m pytest -n 2 --dist=loadfile --make-reports=tests_torch_multi_gpu tests
 
       - name: Failure short reports
         if: ${{ always() }}
@@ -180,7 +180,7 @@ jobs:
           TF_NUM_INTEROP_THREADS: 1
           HF_HOME: /mnt/cache
         run: |
-          python -m pytest -n 2 --dist=loadfile --make-reports=tests_tf_multi_gpu tests
+          TRANSFORMERS_IS_CI=1 python -m pytest -n 2 --dist=loadfile --make-reports=tests_tf_multi_gpu tests
 
       - name: Failure short reports
         if: ${{ always() }}

diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml
@@ -42,7 +42,7 @@ jobs:
           RUN_SLOW: yes
           HF_HOME: /mnt/cache
         run: |
-          python -m pytest -n 1 --dist=loadfile --make-reports=tests_torch_gpu tests
+          TRANSFORMERS_IS_CI=1 python -m pytest -n 1 --dist=loadfile --make-reports=tests_torch_gpu tests
 
       - name: Failure short reports
         if: ${{ always() }}
@@ -72,7 +72,7 @@ jobs:
           RUN_PIPELINE_TESTS: yes
           HF_HOME: /mnt/cache
         run: |
-          python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_torch_pipeline_gpu tests
+          TRANSFORMERS_IS_CI=1 python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_torch_pipeline_gpu tests
 
       - name: Failure short reports
         if: ${{ always() }}
@@ -117,7 +117,7 @@ jobs:
           TF_NUM_INTRAOP_THREADS: 16
           MKL_NUM_THREADS: 16
         run: |
-          python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_gpu tests
+          TRANSFORMERS_IS_CI=1 python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_gpu tests
 
       - name: Failure short reports
         if: ${{ always() }}
@@ -134,7 +134,7 @@ jobs:
           TF_NUM_INTRAOP_THREADS: 16
           MKL_NUM_THREADS: 16
         run: |
-          python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_tf_pipeline_gpu tests
+          TRANSFORMERS_IS_CI=1 python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_tf_pipeline_gpu tests
 
       - name: Failure short reports
         if: ${{ always() }}
@@ -181,7 +181,7 @@ jobs:
           MKL_NUM_THREADS: 16
           MKL_SERVICE_FORCE_INTEL: 1
         run: |
-          python -m pytest -n 1 --dist=loadfile --make-reports=tests_torch_multi_gpu tests
+          TRANSFORMERS_IS_CI=1 python -m pytest -n 1 --dist=loadfile --make-reports=tests_torch_multi_gpu tests
 
       - name: Failure short reports
         if: ${{ always() }}
@@ -196,7 +196,7 @@ jobs:
           RUN_PIPELINE_TESTS: yes
           HF_HOME: /mnt/cache
         run: |
-          python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_torch_pipeline_multi_gpu tests
+          TRANSFORMERS_IS_CI=1 python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_torch_pipeline_multi_gpu tests
 
       - name: Failure short reports
         if: ${{ always() }}
@@ -241,7 +241,7 @@ jobs:
           TF_NUM_INTRAOP_THREADS: 16
           HF_HOME: /mnt/cache
         run: |
-          python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_multi_gpu tests
+          TRANSFORMERS_IS_CI=1 python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_multi_gpu tests
 
       - name: Failure short reports
         if: ${{ always() }}
@@ -258,7 +258,7 @@ jobs:
           TF_NUM_INTRAOP_THREADS: 16
           HF_HOME: /mnt/cache
         run: |
-          python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_tf_pipeline_multi_gpu tests
+          TRANSFORMERS_IS_CI=1 python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_tf_pipeline_multi_gpu tests
 
       - name: Failure short reports
         if: ${{ always() }}

diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py
@@ -420,6 +420,12 @@ def get_config_dict(
         use_auth_token = kwargs.pop("use_auth_token", None)
         local_files_only = kwargs.pop("local_files_only", False)
         revision = kwargs.pop("revision", None)
+        from_pipeline = kwargs.pop("_from_pipeline", None)
+        from_auto_class = kwargs.pop("_from_auto", False)
+
+        user_agent = {"file_type": "config", "from_auto_class": from_auto_class}
+        if from_pipeline is not None:
+            user_agent["using_pipeline"] = from_pipeline
 
         if is_offline_mode() and not local_files_only:
             logger.info("Offline mode: forcing local_files_only=True")
@@ -445,6 +451,7 @@ def get_config_dict(
                 resume_download=resume_download,
                 local_files_only=local_files_only,
                 use_auth_token=use_auth_token,
+                user_agent=user_agent,
             )
             # Load config dict
             config_dict = cls._dict_from_json_file(resolved_config_file)

diff --git a/src/transformers/file_utils.py b/src/transformers/file_utils.py
@@ -212,7 +212,7 @@
 PYTORCH_TRANSFORMERS_CACHE = os.getenv("PYTORCH_TRANSFORMERS_CACHE", PYTORCH_PRETRAINED_BERT_CACHE)
 TRANSFORMERS_CACHE = os.getenv("TRANSFORMERS_CACHE", PYTORCH_TRANSFORMERS_CACHE)
 SESSION_ID = uuid4().hex
-DISABLE_TELEMETRY = os.getenv("DISABLE_TELEMETRY", False)
+DISABLE_TELEMETRY = os.getenv("DISABLE_TELEMETRY", False) in ENV_VARS_TRUE_VALUES
 
 WEIGHTS_NAME = "pytorch_model.bin"
 TF2_WEIGHTS_NAME = "tf_model.h5"
@@ -367,7 +367,7 @@ def is_sagemaker_distributed_available():
 
 
 def is_training_run_on_sagemaker():
-    return "SAGEMAKER_JOB_NAME" in os.environ and not DISABLE_TELEMETRY
+    return "SAGEMAKER_JOB_NAME" in os.environ
 
 
 def is_soundfile_availble():
@@ -1227,13 +1227,18 @@ def http_user_agent(user_agent: Union[Dict, str, None] = None) -> str:
     """
     Formats a user-agent string with basic info about a request.
     """
+    if DISABLE_TELEMETRY:
+        return "telemetry/off"
     ua = f"transformers/{__version__}; python/{sys.version.split()[0]}; session_id/{SESSION_ID}"
     if is_torch_available():
         ua += f"; torch/{_torch_version}"
     if is_tf_available():
         ua += f"; tensorflow/{_tf_version}"
     if is_training_run_on_sagemaker():
         ua += "; " + "; ".join(f"{k}/{v}" for k, v in define_sagemaker_information().items())
+    # CI will set this value to True
+    if os.environ.get("TRANSFORMERS_IS_CI", "").upper() in ENV_VARS_TRUE_VALUES:
+        ua += "; is_ci/true"
     if isinstance(user_agent, dict):
         ua += "; " + "; ".join(f"{k}/{v}" for k, v in user_agent.items())
     elif isinstance(user_agent, str):
@@ -1243,7 +1248,7 @@ def http_user_agent(user_agent: Union[Dict, str, None] = None) -> str:
 
 def http_get(url: str, temp_file: BinaryIO, proxies=None, resume_size=0, headers: Optional[Dict[str, str]] = None):
     """
-    Donwload remote file. Do not gobble up errors.
+    Download remote file. Do not gobble up errors.
     """
     headers = copy.deepcopy(headers)
     if resume_size > 0:

diff --git a/src/transformers/modelcard.py b/src/transformers/modelcard.py
@@ -133,6 +133,11 @@ def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
         proxies = kwargs.pop("proxies", None)
         find_from_standard_name = kwargs.pop("find_from_standard_name", True)
         return_unused_kwargs = kwargs.pop("return_unused_kwargs", False)
+        from_pipeline = kwargs.pop("_from_pipeline", None)
+
+        user_agent = {"file_type": "model_card"}
+        if from_pipeline is not None:
+            user_agent["using_pipeline"] = from_pipeline
 
         if pretrained_model_name_or_path in ALL_PRETRAINED_CONFIG_ARCHIVE_MAP:
             # For simplicity we use the same pretrained url than the configuration files
@@ -152,7 +157,9 @@ def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
 
         try:
             # Load from URL or cache if already cached
-            resolved_model_card_file = cached_path(model_card_file, cache_dir=cache_dir, proxies=proxies)
+            resolved_model_card_file = cached_path(
+                model_card_file, cache_dir=cache_dir, proxies=proxies, user_agent=user_agent
+            )
             if resolved_model_card_file == model_card_file:
                 logger.info("loading model card file {}".format(model_card_file))
             else:

diff --git a/src/transformers/modeling_flax_utils.py b/src/transformers/modeling_flax_utils.py
@@ -228,6 +228,12 @@ def from_pretrained(
         local_files_only = kwargs.pop("local_files_only", False)
         use_auth_token = kwargs.pop("use_auth_token", None)
         revision = kwargs.pop("revision", None)
+        from_pipeline = kwargs.pop("_from_pipeline", None)
+        from_auto_class = kwargs.pop("_from_auto", False)
+
+        user_agent = {"file_type": "model", "framework": "flax", "from_auto_class": from_auto_class}
+        if from_pipeline is not None:
+            user_agent["using_pipeline"] = from_pipeline
 
         if is_offline_mode() and not local_files_only:
             logger.info("Offline mode: forcing local_files_only=True")
@@ -247,6 +253,8 @@ def from_pretrained(
                 local_files_only=local_files_only,
                 use_auth_token=use_auth_token,
                 revision=revision,
+                _from_auto=from_auto_class,
+                _from_pipeline=from_pipeline,
                 **kwargs,
             )
         else:
@@ -290,6 +298,7 @@ def from_pretrained(
                     resume_download=resume_download,
                     local_files_only=local_files_only,
                     use_auth_token=use_auth_token,
+                    user_agent=user_agent,
                 )
             except EnvironmentError as err:
                 logger.error(err)

diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py
@@ -1164,6 +1164,12 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
         revision = kwargs.pop("revision", None)
         mirror = kwargs.pop("mirror", None)
         load_weight_prefix = kwargs.pop("load_weight_prefix", None)
+        from_pipeline = kwargs.pop("_from_pipeline", None)
+        from_auto_class = kwargs.pop("_from_auto", False)
+
+        user_agent = {"file_type": "model", "framework": "tensorflow", "from_auto_class": from_auto_class}
+        if from_pipeline is not None:
+            user_agent["using_pipeline"] = from_pipeline
 
         if is_offline_mode() and not local_files_only:
             logger.info("Offline mode: forcing local_files_only=True")
@@ -1183,6 +1189,8 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
                 local_files_only=local_files_only,
                 use_auth_token=use_auth_token,
                 revision=revision,
+                _from_auto=from_auto_class,
+                _from_pipeline=from_pipeline,
                 **kwargs,
             )
         else:
@@ -1225,6 +1233,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
                     resume_download=resume_download,
                     local_files_only=local_files_only,
                     use_auth_token=use_auth_token,
+                    user_agent=user_agent,
                 )
             except EnvironmentError as err:
                 logger.error(err)

diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py
@@ -964,6 +964,12 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
         use_auth_token = kwargs.pop("use_auth_token", None)
         revision = kwargs.pop("revision", None)
         mirror = kwargs.pop("mirror", None)
+        from_pipeline = kwargs.pop("_from_pipeline", None)
+        from_auto_class = kwargs.pop("_from_auto", False)
+
+        user_agent = {"file_type": "model", "framework": "pytorch", "from_auto_class": from_auto_class}
+        if from_pipeline is not None:
+            user_agent["using_pipeline"] = from_pipeline
 
         if is_offline_mode() and not local_files_only:
             logger.info("Offline mode: forcing local_files_only=True")
@@ -983,6 +989,8 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                 local_files_only=local_files_only,
                 use_auth_token=use_auth_token,
                 revision=revision,
+                _from_auto=from_auto_class,
+                _from_pipeline=from_pipeline,
                 **kwargs,
             )
         else:
@@ -1003,19 +1011,17 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                     archive_file = os.path.join(pretrained_model_name_or_path, WEIGHTS_NAME)
                 else:
                     raise EnvironmentError(
-                        "Error no file named {} found in directory {} or `from_tf` set to False".format(
-                            [WEIGHTS_NAME, TF2_WEIGHTS_NAME, TF_WEIGHTS_NAME + ".index"],
-                            pretrained_model_name_or_path,
-                        )
+                        f"Error no file named {[WEIGHTS_NAME, TF2_WEIGHTS_NAME, TF_WEIGHTS_NAME + '.index']} found in "
+                        f"directory {pretrained_model_name_or_path} or `from_tf` set to False."
                     )
             elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path):
                 archive_file = pretrained_model_name_or_path
             elif os.path.isfile(pretrained_model_name_or_path + ".index"):
-                assert (
-                    from_tf
-                ), "We found a TensorFlow checkpoint at {}, please set from_tf to True to load from this checkpoint".format(
-                    pretrained_model_name_or_path + ".index"
-                )
+                if not from_tf:
+                    raise ValueError(
+                        f"We found a TensorFlow checkpoint at {pretrained_model_name_or_path + '.index'}, please set "
+                        "from_tf to True to load from this checkpoint."
+                    )
                 archive_file = pretrained_model_name_or_path + ".index"
             else:
                 archive_file = hf_bucket_url(
@@ -1035,6 +1041,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                     resume_download=resume_download,
                     local_files_only=local_files_only,
                     use_auth_token=use_auth_token,
+                    user_agent=user_agent,
                 )
             except EnvironmentError as err:
                 logger.error(err)

diff --git a/src/transformers/models/auto/configuration_auto.py b/src/transformers/models/auto/configuration_auto.py
@@ -384,6 +384,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
             >>> config.unused_kwargs
             {'foo': False}
         """
+        kwargs["_from_auto"] = True
         config_dict, _ = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)
         if "model_type" in config_dict:
             config_class = CONFIG_MAPPING[config_dict["model_type"]]