From 27f3666e78425d0d2e18049a5fb7998c44b106fb Mon Sep 17 00:00:00 2001
From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
Date: Wed, 31 Mar 2021 09:36:07 -0400
Subject: [PATCH] Add more metadata to the user agent (#10972)

* Add more metadata to the user agent

* Fix typo

* Use DISABLE_TELEMETRY

* Address review comments

* Use global env

* Add clean envs on circle CI
---
 .circleci/config.yml                          | 30 ++++++++++----
 .github/workflows/self-push.yml               | 19 +++------
 .github/workflows/self-scheduled.yml          | 41 ++++---------------
 src/transformers/configuration_utils.py       |  7 ++++
 src/transformers/file_utils.py                | 11 +++--
 src/transformers/modelcard.py                 |  9 +++-
 src/transformers/modeling_flax_utils.py       |  9 ++++
 src/transformers/modeling_tf_utils.py         |  9 ++++
 src/transformers/modeling_utils.py            | 25 +++++++----
 .../models/auto/configuration_auto.py         |  1 +
 src/transformers/models/auto/modeling_auto.py | 12 ++++++
 .../models/auto/modeling_flax_auto.py         |  4 +-
 .../models/auto/modeling_tf_auto.py           | 12 +++++-
 .../models/auto/tokenization_auto.py          |  1 +
 src/transformers/pipelines/__init__.py        | 16 +++++---
 src/transformers/pipelines/base.py            | 12 +++---
 src/transformers/tokenization_utils_base.py   |  7 ++++
 17 files changed, 146 insertions(+), 79 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 342c538bc1b5d0..28b4f52abd3d97 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -3,7 +3,6 @@ orbs:
     gcp-gke: circleci/gcp-gke@1.0.4
     go: circleci/go@1.3.0
 
-
 # TPU REFERENCES
 references:
     checkout_ml_testing: &checkout_ml_testing
@@ -69,6 +68,8 @@ jobs:
             - image: circleci/python:3.6
         environment:
             OMP_NUM_THREADS: 1
+            RUN_PT_TF_CROSS_TESTS: yes
+            TRANSFORMERS_IS_CI: yes
         resource_class: xlarge
         parallelism: 1
         steps:
@@ -85,7 +86,7 @@ jobs:
                 key: v0.4-{{ checksum "setup.py" }}
                 paths:
                     - '~/.cache/pip'
-            - run: RUN_PT_TF_CROSS_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_torch_and_tf ./tests/ -m is_pt_tf_cross_test --durations=0 | tee tests_output.txt
+            - run: python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_torch_and_tf ./tests/ -m is_pt_tf_cross_test --durations=0 | tee tests_output.txt
             - store_artifacts:
                   path: ~/transformers/tests_output.txt
             - store_artifacts:
@@ -97,6 +98,8 @@ jobs:
             - image: circleci/python:3.6
         environment:
             OMP_NUM_THREADS: 1
+            RUN_PT_FLAX_CROSS_TESTS: yes
+            TRANSFORMERS_IS_CI: yes
         resource_class: xlarge
         parallelism: 1
         steps:
@@ -113,7 +116,7 @@ jobs:
                 key: v0.4-{{ checksum "setup.py" }}
                 paths:
                     - '~/.cache/pip'
-            - run: RUN_PT_FLAX_CROSS_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_torch_and_flax ./tests/ -m is_pt_flax_cross_test --durations=0 | tee tests_output.txt
+            - run: python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_torch_and_flax ./tests/ -m is_pt_flax_cross_test --durations=0 | tee tests_output.txt
             - store_artifacts:
                   path: ~/transformers/tests_output.txt
             - store_artifacts:
@@ -125,6 +128,7 @@ jobs:
             - image: circleci/python:3.7
         environment:
             OMP_NUM_THREADS: 1
+            TRANSFORMERS_IS_CI: yes
         resource_class: xlarge
         parallelism: 1
         steps:
@@ -153,6 +157,7 @@ jobs:
             - image: circleci/python:3.7
         environment:
             OMP_NUM_THREADS: 1
+            TRANSFORMERS_IS_CI: yes
         resource_class: xlarge
         parallelism: 1
         steps:
@@ -179,6 +184,7 @@ jobs:
             - image: circleci/python:3.7
         environment:
             OMP_NUM_THREADS: 1
+            TRANSFORMERS_IS_CI: yes
         resource_class: xlarge
         parallelism: 1
         steps:
@@ -205,6 +211,8 @@ jobs:
             - image: circleci/python:3.7
         environment:
             OMP_NUM_THREADS: 1
+            RUN_PIPELINE_TESTS: yes
+            TRANSFORMERS_IS_CI: yes
         resource_class: xlarge
         parallelism: 1
         steps:
@@ -221,7 +229,7 @@ jobs:
                   key: v0.4-torch-{{ checksum "setup.py" }}
                   paths:
                       - '~/.cache/pip'
-            - run: RUN_PIPELINE_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_pipelines_torch -m is_pipeline_test ./tests/ | tee tests_output.txt
+            - run: python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_pipelines_torch -m is_pipeline_test ./tests/ | tee tests_output.txt
             - store_artifacts:
                   path: ~/transformers/tests_output.txt
             - store_artifacts:
@@ -233,6 +241,8 @@ jobs:
             - image: circleci/python:3.7
         environment:
             OMP_NUM_THREADS: 1
+            RUN_PIPELINE_TESTS: yes
+            TRANSFORMERS_IS_CI: yes
         resource_class: xlarge
         parallelism: 1
         steps:
@@ -247,7 +257,7 @@ jobs:
                   key: v0.4-tf-{{ checksum "setup.py" }}
                   paths:
                       - '~/.cache/pip'
-            - run: RUN_PIPELINE_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_pipelines_tf ./tests/ -m is_pipeline_test | tee tests_output.txt
+            - run: python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_pipelines_tf ./tests/ -m is_pipeline_test | tee tests_output.txt
             - store_artifacts:
                   path: ~/transformers/tests_output.txt
             - store_artifacts:
@@ -259,6 +269,7 @@ jobs:
             - image: circleci/python:3.7
         environment:
             RUN_CUSTOM_TOKENIZERS: yes
+            TRANSFORMERS_IS_CI: yes
         steps:
             - checkout
             - restore_cache:
@@ -284,6 +295,7 @@ jobs:
             - image: circleci/python:3.6
         environment:
             OMP_NUM_THREADS: 1
+            TRANSFORMERS_IS_CI: yes
         resource_class: xlarge
         parallelism: 1
         steps:
@@ -299,7 +311,7 @@ jobs:
                   key: v0.4-torch_examples-{{ checksum "setup.py" }}
                   paths:
                       - '~/.cache/pip'
-            - run: python -m pytest -n 8 --dist=loadfile -s --make-reports=examples_torch ./examples/ | tee examples_output.txt
+            - run: TRANSFORMERS_IS_CI=1 python -m pytest -n 8 --dist=loadfile -s --make-reports=examples_torch ./examples/ | tee examples_output.txt
             - store_artifacts:
                   path: ~/transformers/examples_output.txt
             - store_artifacts:
@@ -309,6 +321,9 @@ jobs:
         working_directory: ~/transformers
         docker:
             - image: circleci/python:3.7
+        environment:
+            RUN_GIT_LFS_TESTS: yes
+            TRANSFORMERS_IS_CI: yes
         resource_class: xlarge
         parallelism: 1
         steps:
@@ -319,7 +334,7 @@ jobs:
                 git config --global user.name "ci"
             - run: pip install --upgrade pip
             - run: pip install .[testing]
-            - run: RUN_GIT_LFS_TESTS=1 python -m pytest -sv ./tests/test_hf_api.py -k "HfLargefilesTest"
+            - run: python -m pytest -sv ./tests/test_hf_api.py -k "HfLargefilesTest"
 
     build_doc:
         working_directory: ~/transformers
@@ -408,6 +423,7 @@ jobs:
             - image: circleci/python:3.6
         environment:
             OMP_NUM_THREADS: 1
+            TRANSFORMERS_IS_CI: yes
         resource_class: xlarge
         parallelism: 1
         steps:
diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml
index 8af6f8ea5c23f8..210076f14163c2 100644
--- a/.github/workflows/self-push.yml
+++ b/.github/workflows/self-push.yml
@@ -12,6 +12,12 @@ on:
       - "templates/**"
   repository_dispatch:
 
+env:
+  HF_HOME: /mnt/cache
+  TRANSFORMERS_IS_CI: yes
+  OMP_NUM_THREADS: 8
+  MKL_NUM_THREADS: 8
+
 jobs:
   run_tests_torch_gpu:
     runs-on: [self-hosted, docker-gpu, single-gpu]
@@ -40,10 +46,6 @@ jobs:
           python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())"
 
       - name: Run all non-slow tests on GPU
-        env:
-          OMP_NUM_THREADS: 8
-          MKL_NUM_THREADS: 8
-          HF_HOME: /mnt/cache
         run: |
           python -m pytest -n 2 --dist=loadfile --make-reports=tests_torch_gpu tests
 
@@ -83,11 +85,8 @@ jobs:
 
       - name: Run all non-slow tests on GPU
         env:
-          OMP_NUM_THREADS: 8
-          MKL_NUM_THREADS: 8
           TF_NUM_INTRAOP_THREADS: 8
           TF_NUM_INTEROP_THREADS: 1
-          HF_HOME: /mnt/cache
         run: |
           python -m pytest -n 2 --dist=loadfile --make-reports=tests_tf_gpu tests
 
@@ -131,10 +130,7 @@ jobs:
 
       - name: Run all non-slow tests on GPU
         env:
-          OMP_NUM_THREADS: 8
-          MKL_NUM_THREADS: 8
           MKL_SERVICE_FORCE_INTEL: 1
-          HF_HOME: /mnt/cache
         run: |
           python -m pytest -n 2 --dist=loadfile --make-reports=tests_torch_multi_gpu tests
 
@@ -174,11 +170,8 @@ jobs:
 
       - name: Run all non-slow tests on GPU
         env:
-          OMP_NUM_THREADS: 8
-          MKL_NUM_THREADS: 8
           TF_NUM_INTRAOP_THREADS: 8
           TF_NUM_INTEROP_THREADS: 1
-          HF_HOME: /mnt/cache
         run: |
           python -m pytest -n 2 --dist=loadfile --make-reports=tests_tf_multi_gpu tests
 
diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml
index 50720411135101..3b72baea0d2b76 100644
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@@ -8,6 +8,13 @@ on:
   schedule:
     - cron: "0 0 * * *"
 
+env:
+  HF_HOME: /mnt/cache
+  TRANSFORMERS_IS_CI: yes
+  RUN_SLOW: yes
+  OMP_NUM_THREADS: 16
+  MKL_NUM_THREADS: 16
+
 jobs:
   run_all_tests_torch_gpu:
     runs-on: [self-hosted, docker-gpu, single-gpu]
@@ -36,11 +43,6 @@ jobs:
           python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())"
 
       - name: Run all tests on GPU
-        env:
-          OMP_NUM_THREADS: 16
-          MKL_NUM_THREADS: 16
-          RUN_SLOW: yes
-          HF_HOME: /mnt/cache
         run: |
           python -m pytest -n 1 --dist=loadfile --make-reports=tests_torch_gpu tests
 
@@ -55,6 +57,7 @@ jobs:
           MKL_NUM_THREADS: 16
           RUN_SLOW: yes
           HF_HOME: /mnt/cache
+          TRANSFORMERS_IS_CI: yes
         run: |
           pip install -r examples/_tests_requirements.txt
           python -m pytest -n 1 --dist=loadfile --make-reports=examples_torch_gpu examples
@@ -66,11 +69,7 @@ jobs:
       - name: Run all pipeline tests on GPU
         if: ${{ always() }}
         env:
-          OMP_NUM_THREADS: 16
-          MKL_NUM_THREADS: 16
-          RUN_SLOW: yes
           RUN_PIPELINE_TESTS: yes
-          HF_HOME: /mnt/cache
         run: |
           python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_torch_pipeline_gpu tests
 
@@ -110,12 +109,8 @@ jobs:
 
       - name: Run all tests on GPU
         env:
-          RUN_SLOW: yes
-          HF_HOME: /mnt/cache
-          OMP_NUM_THREADS: 16
           TF_NUM_INTEROP_THREADS: 1
           TF_NUM_INTRAOP_THREADS: 16
-          MKL_NUM_THREADS: 16
         run: |
           python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_gpu tests
 
@@ -126,13 +121,9 @@ jobs:
       - name: Run all pipeline tests on GPU
         if: ${{ always() }}
         env:
-          RUN_SLOW: yes
-          HF_HOME: /mnt/cache
-          OMP_NUM_THREADS: 16
           RUN_PIPELINE_TESTS: yes
           TF_NUM_INTEROP_THREADS: 1
           TF_NUM_INTRAOP_THREADS: 16
-          MKL_NUM_THREADS: 16
         run: |
           python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_tf_pipeline_gpu tests
 
@@ -175,10 +166,6 @@ jobs:
 
       - name: Run all tests on GPU
         env:
-          RUN_SLOW: yes
-          HF_HOME: /mnt/cache
-          OMP_NUM_THREADS: 16
-          MKL_NUM_THREADS: 16
           MKL_SERVICE_FORCE_INTEL: 1
         run: |
           python -m pytest -n 1 --dist=loadfile --make-reports=tests_torch_multi_gpu tests
@@ -190,11 +177,7 @@ jobs:
       - name: Run all pipeline tests on GPU
         if: ${{ always() }}
         env:
-          OMP_NUM_THREADS: 16
-          MKL_NUM_THREADS: 16
-          RUN_SLOW: yes
           RUN_PIPELINE_TESTS: yes
-          HF_HOME: /mnt/cache
         run: |
           python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_torch_pipeline_multi_gpu tests
 
@@ -234,12 +217,8 @@ jobs:
 
       - name: Run all tests on GPU
         env:
-          OMP_NUM_THREADS: 16
-          RUN_SLOW: yes
-          MKL_NUM_THREADS: 16
           TF_NUM_INTEROP_THREADS: 1
           TF_NUM_INTRAOP_THREADS: 16
-          HF_HOME: /mnt/cache
         run: |
           python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_multi_gpu tests
 
@@ -250,13 +229,9 @@ jobs:
       - name: Run all pipeline tests on GPU
         if: ${{ always() }}
         env:
-          OMP_NUM_THREADS: 16
-          RUN_SLOW: yes
           RUN_PIPELINE_TESTS: yes
-          MKL_NUM_THREADS: 16
           TF_NUM_INTEROP_THREADS: 1
           TF_NUM_INTRAOP_THREADS: 16
-          HF_HOME: /mnt/cache
         run: |
           python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_tf_pipeline_multi_gpu tests
 
diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py
index 1c428eae5cf463..d0631d3f1abc44 100755
--- a/src/transformers/configuration_utils.py
+++ b/src/transformers/configuration_utils.py
@@ -420,6 +420,12 @@ def get_config_dict(
         use_auth_token = kwargs.pop("use_auth_token", None)
         local_files_only = kwargs.pop("local_files_only", False)
         revision = kwargs.pop("revision", None)
+        from_pipeline = kwargs.pop("_from_pipeline", None)
+        from_auto_class = kwargs.pop("_from_auto", False)
+
+        user_agent = {"file_type": "config", "from_auto_class": from_auto_class}
+        if from_pipeline is not None:
+            user_agent["using_pipeline"] = from_pipeline
 
         if is_offline_mode() and not local_files_only:
             logger.info("Offline mode: forcing local_files_only=True")
@@ -445,6 +451,7 @@ def get_config_dict(
                 resume_download=resume_download,
                 local_files_only=local_files_only,
                 use_auth_token=use_auth_token,
+                user_agent=user_agent,
             )
             # Load config dict
             config_dict = cls._dict_from_json_file(resolved_config_file)
diff --git a/src/transformers/file_utils.py b/src/transformers/file_utils.py
index 89585e30e309db..e795501ad34e4a 100644
--- a/src/transformers/file_utils.py
+++ b/src/transformers/file_utils.py
@@ -212,7 +212,7 @@
 PYTORCH_TRANSFORMERS_CACHE = os.getenv("PYTORCH_TRANSFORMERS_CACHE", PYTORCH_PRETRAINED_BERT_CACHE)
 TRANSFORMERS_CACHE = os.getenv("TRANSFORMERS_CACHE", PYTORCH_TRANSFORMERS_CACHE)
 SESSION_ID = uuid4().hex
-DISABLE_TELEMETRY = os.getenv("DISABLE_TELEMETRY", False)
+DISABLE_TELEMETRY = os.getenv("DISABLE_TELEMETRY", False) in ENV_VARS_TRUE_VALUES
 
 WEIGHTS_NAME = "pytorch_model.bin"
 TF2_WEIGHTS_NAME = "tf_model.h5"
@@ -367,7 +367,7 @@ def is_sagemaker_distributed_available():
 
 
 def is_training_run_on_sagemaker():
-    return "SAGEMAKER_JOB_NAME" in os.environ and not DISABLE_TELEMETRY
+    return "SAGEMAKER_JOB_NAME" in os.environ
 
 
 def is_soundfile_availble():
@@ -1232,8 +1232,13 @@ def http_user_agent(user_agent: Union[Dict, str, None] = None) -> str:
         ua += f"; torch/{_torch_version}"
     if is_tf_available():
         ua += f"; tensorflow/{_tf_version}"
+    if DISABLE_TELEMETRY:
+        return ua + "; telemetry/off"
     if is_training_run_on_sagemaker():
         ua += "; " + "; ".join(f"{k}/{v}" for k, v in define_sagemaker_information().items())
+    # CI will set this value to True
+    if os.environ.get("TRANSFORMERS_IS_CI", "").upper() in ENV_VARS_TRUE_VALUES:
+        ua += "; is_ci/true"
     if isinstance(user_agent, dict):
         ua += "; " + "; ".join(f"{k}/{v}" for k, v in user_agent.items())
     elif isinstance(user_agent, str):
@@ -1243,7 +1248,7 @@ def http_user_agent(user_agent: Union[Dict, str, None] = None) -> str:
 
 def http_get(url: str, temp_file: BinaryIO, proxies=None, resume_size=0, headers: Optional[Dict[str, str]] = None):
     """
-    Donwload remote file. Do not gobble up errors.
+    Download remote file. Do not gobble up errors.
     """
     headers = copy.deepcopy(headers)
     if resume_size > 0:
diff --git a/src/transformers/modelcard.py b/src/transformers/modelcard.py
index 2daab84649bfc4..d5063eacf657ef 100644
--- a/src/transformers/modelcard.py
+++ b/src/transformers/modelcard.py
@@ -133,6 +133,11 @@ def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
         proxies = kwargs.pop("proxies", None)
         find_from_standard_name = kwargs.pop("find_from_standard_name", True)
         return_unused_kwargs = kwargs.pop("return_unused_kwargs", False)
+        from_pipeline = kwargs.pop("_from_pipeline", None)
+
+        user_agent = {"file_type": "model_card"}
+        if from_pipeline is not None:
+            user_agent["using_pipeline"] = from_pipeline
 
         if pretrained_model_name_or_path in ALL_PRETRAINED_CONFIG_ARCHIVE_MAP:
             # For simplicity we use the same pretrained url than the configuration files
@@ -152,7 +157,9 @@ def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
 
         try:
             # Load from URL or cache if already cached
-            resolved_model_card_file = cached_path(model_card_file, cache_dir=cache_dir, proxies=proxies)
+            resolved_model_card_file = cached_path(
+                model_card_file, cache_dir=cache_dir, proxies=proxies, user_agent=user_agent
+            )
             if resolved_model_card_file == model_card_file:
                 logger.info("loading model card file {}".format(model_card_file))
             else:
diff --git a/src/transformers/modeling_flax_utils.py b/src/transformers/modeling_flax_utils.py
index 55d7e371434687..8815f700a29bcc 100644
--- a/src/transformers/modeling_flax_utils.py
+++ b/src/transformers/modeling_flax_utils.py
@@ -224,6 +224,12 @@ def from_pretrained(
         local_files_only = kwargs.pop("local_files_only", False)
         use_auth_token = kwargs.pop("use_auth_token", None)
         revision = kwargs.pop("revision", None)
+        from_pipeline = kwargs.pop("_from_pipeline", None)
+        from_auto_class = kwargs.pop("_from_auto", False)
+
+        user_agent = {"file_type": "model", "framework": "flax", "from_auto_class": from_auto_class}
+        if from_pipeline is not None:
+            user_agent["using_pipeline"] = from_pipeline
 
         if is_offline_mode() and not local_files_only:
             logger.info("Offline mode: forcing local_files_only=True")
@@ -243,6 +249,8 @@ def from_pretrained(
                 local_files_only=local_files_only,
                 use_auth_token=use_auth_token,
                 revision=revision,
+                _from_auto=from_auto_class,
+                _from_pipeline=from_pipeline,
                 **kwargs,
             )
         else:
@@ -286,6 +294,7 @@ def from_pretrained(
                     resume_download=resume_download,
                     local_files_only=local_files_only,
                     use_auth_token=use_auth_token,
+                    user_agent=user_agent,
                 )
             except EnvironmentError as err:
                 logger.error(err)
diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py
index c97032676fa1d8..cf71b25a1a0cb2 100644
--- a/src/transformers/modeling_tf_utils.py
+++ b/src/transformers/modeling_tf_utils.py
@@ -1164,6 +1164,12 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
         revision = kwargs.pop("revision", None)
         mirror = kwargs.pop("mirror", None)
         load_weight_prefix = kwargs.pop("load_weight_prefix", None)
+        from_pipeline = kwargs.pop("_from_pipeline", None)
+        from_auto_class = kwargs.pop("_from_auto", False)
+
+        user_agent = {"file_type": "model", "framework": "tensorflow", "from_auto_class": from_auto_class}
+        if from_pipeline is not None:
+            user_agent["using_pipeline"] = from_pipeline
 
         if is_offline_mode() and not local_files_only:
             logger.info("Offline mode: forcing local_files_only=True")
@@ -1183,6 +1189,8 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
                 local_files_only=local_files_only,
                 use_auth_token=use_auth_token,
                 revision=revision,
+                _from_auto=from_auto_class,
+                _from_pipeline=from_pipeline,
                 **kwargs,
             )
         else:
@@ -1225,6 +1233,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
                     resume_download=resume_download,
                     local_files_only=local_files_only,
                     use_auth_token=use_auth_token,
+                    user_agent=user_agent,
                 )
             except EnvironmentError as err:
                 logger.error(err)
diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py
index 9a4f421a0de136..3846f524a8be8a 100755
--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -964,6 +964,12 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
         use_auth_token = kwargs.pop("use_auth_token", None)
         revision = kwargs.pop("revision", None)
         mirror = kwargs.pop("mirror", None)
+        from_pipeline = kwargs.pop("_from_pipeline", None)
+        from_auto_class = kwargs.pop("_from_auto", False)
+
+        user_agent = {"file_type": "model", "framework": "pytorch", "from_auto_class": from_auto_class}
+        if from_pipeline is not None:
+            user_agent["using_pipeline"] = from_pipeline
 
         if is_offline_mode() and not local_files_only:
             logger.info("Offline mode: forcing local_files_only=True")
@@ -983,6 +989,8 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                 local_files_only=local_files_only,
                 use_auth_token=use_auth_token,
                 revision=revision,
+                _from_auto=from_auto_class,
+                _from_pipeline=from_pipeline,
                 **kwargs,
             )
         else:
@@ -1003,19 +1011,17 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                     archive_file = os.path.join(pretrained_model_name_or_path, WEIGHTS_NAME)
                 else:
                     raise EnvironmentError(
-                        "Error no file named {} found in directory {} or `from_tf` set to False".format(
-                            [WEIGHTS_NAME, TF2_WEIGHTS_NAME, TF_WEIGHTS_NAME + ".index"],
-                            pretrained_model_name_or_path,
-                        )
+                        f"Error no file named {[WEIGHTS_NAME, TF2_WEIGHTS_NAME, TF_WEIGHTS_NAME + '.index']} found in "
+                        f"directory {pretrained_model_name_or_path} or `from_tf` set to False."
                     )
             elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path):
                 archive_file = pretrained_model_name_or_path
             elif os.path.isfile(pretrained_model_name_or_path + ".index"):
-                assert (
-                    from_tf
-                ), "We found a TensorFlow checkpoint at {}, please set from_tf to True to load from this checkpoint".format(
-                    pretrained_model_name_or_path + ".index"
-                )
+                if not from_tf:
+                    raise ValueError(
+                        f"We found a TensorFlow checkpoint at {pretrained_model_name_or_path + '.index'}, please set "
+                        "from_tf to True to load from this checkpoint."
+                    )
                 archive_file = pretrained_model_name_or_path + ".index"
             else:
                 archive_file = hf_bucket_url(
@@ -1035,6 +1041,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                     resume_download=resume_download,
                     local_files_only=local_files_only,
                     use_auth_token=use_auth_token,
+                    user_agent=user_agent,
                 )
             except EnvironmentError as err:
                 logger.error(err)
diff --git a/src/transformers/models/auto/configuration_auto.py b/src/transformers/models/auto/configuration_auto.py
index f4d5bf4de9f09e..ac5f4731716806 100644
--- a/src/transformers/models/auto/configuration_auto.py
+++ b/src/transformers/models/auto/configuration_auto.py
@@ -392,6 +392,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
             >>> config.unused_kwargs
             {'foo': False}
         """
+        kwargs["_from_auto"] = True
         config_dict, _ = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)
         if "model_type" in config_dict:
             config_class = CONFIG_MAPPING[config_dict["model_type"]]
diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py
index 2dce0c1a7543e5..22b895309e8c56 100644
--- a/src/transformers/models/auto/modeling_auto.py
+++ b/src/transformers/models/auto/modeling_auto.py
@@ -830,6 +830,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
             >>> model = AutoModel.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
         """
         config = kwargs.pop("config", None)
+        kwargs["_from_auto"] = True
         if not isinstance(config, PretrainedConfig):
             config, kwargs = AutoConfig.from_pretrained(
                 pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
@@ -924,6 +925,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
             >>> model = AutoModelForPreTraining.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
         """
         config = kwargs.pop("config", None)
+        kwargs["_from_auto"] = True
         if not isinstance(config, PretrainedConfig):
             config, kwargs = AutoConfig.from_pretrained(
                 pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
@@ -1035,6 +1037,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
             FutureWarning,
         )
         config = kwargs.pop("config", None)
+        kwargs["_from_auto"] = True
         if not isinstance(config, PretrainedConfig):
             config, kwargs = AutoConfig.from_pretrained(
                 pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
@@ -1128,6 +1131,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
             >>> model = AutoModelForCausalLM.from_pretrained('./tf_model/gpt2_tf_checkpoint.ckpt.index', from_tf=True, config=config)
         """
         config = kwargs.pop("config", None)
+        kwargs["_from_auto"] = True
         if not isinstance(config, PretrainedConfig):
             config, kwargs = AutoConfig.from_pretrained(
                 pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
@@ -1221,6 +1225,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
             >>> model = AutoModelForMaskedLM.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
         """
         config = kwargs.pop("config", None)
+        kwargs["_from_auto"] = True
         if not isinstance(config, PretrainedConfig):
             config, kwargs = AutoConfig.from_pretrained(
                 pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
@@ -1317,6 +1322,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
             >>> model = AutoModelForSeq2SeqLM.from_pretrained('./tf_model/t5_tf_checkpoint.ckpt.index', from_tf=True, config=config)
         """
         config = kwargs.pop("config", None)
+        kwargs["_from_auto"] = True
         if not isinstance(config, PretrainedConfig):
             config, kwargs = AutoConfig.from_pretrained(
                 pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
@@ -1415,6 +1421,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
             >>> model = AutoModelForSequenceClassification.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
         """
         config = kwargs.pop("config", None)
+        kwargs["_from_auto"] = True
         if not isinstance(config, PretrainedConfig):
             config, kwargs = AutoConfig.from_pretrained(
                 pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
@@ -1512,6 +1519,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
             >>> model = AutoModelForQuestionAnswering.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
         """
         config = kwargs.pop("config", None)
+        kwargs["_from_auto"] = True
         if not isinstance(config, PretrainedConfig):
             config, kwargs = AutoConfig.from_pretrained(
                 pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
@@ -1612,6 +1620,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
             >>> model = AutoModelForQuestionAnswering.from_pretrained('./tf_model/tapas_tf_checkpoint.ckpt.index', from_tf=True, config=config)
         """
         config = kwargs.pop("config", None)
+        kwargs["_from_auto"] = True
         if not isinstance(config, PretrainedConfig):
             config, kwargs = AutoConfig.from_pretrained(
                 pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
@@ -1710,6 +1719,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
             >>> model = AutoModelForTokenClassification.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
         """
         config = kwargs.pop("config", None)
+        kwargs["_from_auto"] = True
         if not isinstance(config, PretrainedConfig):
             config, kwargs = AutoConfig.from_pretrained(
                 pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
@@ -1810,6 +1820,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
             >>> model = AutoModelForMultipleChoice.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
         """
         config = kwargs.pop("config", None)
+        kwargs["_from_auto"] = True
         if not isinstance(config, PretrainedConfig):
             config, kwargs = AutoConfig.from_pretrained(
                 pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
@@ -1910,6 +1921,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
             >>> model = AutoModelForNextSentencePrediction.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
         """
         config = kwargs.pop("config", None)
+        kwargs["_from_auto"] = True
         if not isinstance(config, PretrainedConfig):
             config, kwargs = AutoConfig.from_pretrained(
                 pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
diff --git a/src/transformers/models/auto/modeling_flax_auto.py b/src/transformers/models/auto/modeling_flax_auto.py
index 0a65f332cd3ec7..f91cc496e6b681 100644
--- a/src/transformers/models/auto/modeling_flax_auto.py
+++ b/src/transformers/models/auto/modeling_flax_auto.py
@@ -158,7 +158,9 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
 
         for config_class, model_class in FLAX_MODEL_MAPPING.items():
             if isinstance(config, config_class):
-                return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, config=config, **kwargs)
+                return model_class.from_pretrained(
+                    pretrained_model_name_or_path, *model_args, config=config, _from_auto=True, **kwargs
+                )
         raise ValueError(
             f"Unrecognized configuration class {config.__class__} "
             f"for this kind of FlaxAutoModel: {cls.__name__}.\n"
diff --git a/src/transformers/models/auto/modeling_tf_auto.py b/src/transformers/models/auto/modeling_tf_auto.py
index f0bf137bd93d2c..ece15c0445b11e 100644
--- a/src/transformers/models/auto/modeling_tf_auto.py
+++ b/src/transformers/models/auto/modeling_tf_auto.py
@@ -622,6 +622,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
             >>> model = TFAutoModel.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
         """
         config = kwargs.pop("config", None)
+        kwargs["_from_auto"] = True
         if not isinstance(config, PretrainedConfig):
             config, kwargs = AutoConfig.from_pretrained(
                 pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
@@ -716,6 +717,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
             >>> model = TFAutoModelForPreTraining.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
         """
         config = kwargs.pop("config", None)
+        kwargs["_from_auto"] = True
         if not isinstance(config, PretrainedConfig):
             config, kwargs = AutoConfig.from_pretrained(
                 pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
@@ -827,7 +829,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
             FutureWarning,
         )
         config = kwargs.pop("config", None)
-
+        kwargs["_from_auto"] = True
         if not isinstance(config, PretrainedConfig):
             config, kwargs = AutoConfig.from_pretrained(
                 pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
@@ -921,6 +923,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
             >>> model = TFAutoModelForCausalLM.from_pretrained('./pt_model/gpt2_pytorch_model.bin', from_pt=True, config=config)
         """
         config = kwargs.pop("config", None)
+        kwargs["_from_auto"] = True
         if not isinstance(config, PretrainedConfig):
             config, kwargs = AutoConfig.from_pretrained(
                 pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
@@ -1014,6 +1017,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
             >>> model = TFAutoModelForMaskedLM.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
         """
         config = kwargs.pop("config", None)
+        kwargs["_from_auto"] = True
         if not isinstance(config, PretrainedConfig):
             config, kwargs = AutoConfig.from_pretrained(
                 pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
@@ -1110,6 +1114,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
             >>> model = TFAutoModelForSeq2SeqLM.from_pretrained('./pt_model/t5_pytorch_model.bin', from_pt=True, config=config)
         """
         config = kwargs.pop("config", None)
+        kwargs["_from_auto"] = True
         if not isinstance(config, PretrainedConfig):
             config, kwargs = AutoConfig.from_pretrained(
                 pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
@@ -1208,6 +1213,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
             >>> model = TFAutoModelForSequenceClassification.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
         """
         config = kwargs.pop("config", None)
+        kwargs["_from_auto"] = True
         if not isinstance(config, PretrainedConfig):
             config, kwargs = AutoConfig.from_pretrained(
                 pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
@@ -1305,6 +1311,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
             >>> model = TFAutoModelForQuestionAnswering.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
         """
         config = kwargs.pop("config", None)
+        kwargs["_from_auto"] = True
         if not isinstance(config, PretrainedConfig):
             config, kwargs = AutoConfig.from_pretrained(
                 pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
@@ -1401,6 +1408,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
             >>> model = TFAutoModelForTokenClassification.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
         """
         config = kwargs.pop("config", None)
+        kwargs["_from_auto"] = True
         if not isinstance(config, PretrainedConfig):
             config, kwargs = AutoConfig.from_pretrained(
                 pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
@@ -1499,6 +1507,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
             >>> model = TFAutoModelForMultipleChoice.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
         """
         config = kwargs.pop("config", None)
+        kwargs["_from_auto"] = True
         if not isinstance(config, PretrainedConfig):
             config, kwargs = AutoConfig.from_pretrained(
                 pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
@@ -1597,6 +1606,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
             >>> model = TFAutoModelForNextSentencePrediction.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config)
         """
         config = kwargs.pop("config", None)
+        kwargs["_from_auto"] = True
         if not isinstance(config, PretrainedConfig):
             config, kwargs = AutoConfig.from_pretrained(
                 pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
diff --git a/src/transformers/models/auto/tokenization_auto.py b/src/transformers/models/auto/tokenization_auto.py
index 0c97ea165306c4..06985c129a68b6 100644
--- a/src/transformers/models/auto/tokenization_auto.py
+++ b/src/transformers/models/auto/tokenization_auto.py
@@ -385,6 +385,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
 
         """
         config = kwargs.pop("config", None)
+        kwargs["_from_auto"] = True
         if not isinstance(config, PretrainedConfig):
             config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
 
diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py
index 43b1549627cdb3..0e4d4a754d9f03 100755
--- a/src/transformers/pipelines/__init__.py
+++ b/src/transformers/pipelines/__init__.py
@@ -363,7 +363,7 @@ def pipeline(
 
     # Infer the framework form the model
     if framework is None:
-        framework, model = infer_framework_from_model(model, targeted_task, revision=revision)
+        framework, model = infer_framework_from_model(model, targeted_task, revision=revision, task=task)
 
     task_class, model_class = targeted_task["impl"], targeted_task[framework]
 
@@ -373,18 +373,20 @@ def pipeline(
             # For tuple we have (tokenizer name, {kwargs})
             use_fast = tokenizer[1].pop("use_fast", use_fast)
             tokenizer = AutoTokenizer.from_pretrained(
-                tokenizer[0], use_fast=use_fast, revision=revision, **tokenizer[1]
+                tokenizer[0], use_fast=use_fast, revision=revision, _from_pipeline=task, **tokenizer[1]
             )
         else:
-            tokenizer = AutoTokenizer.from_pretrained(tokenizer, revision=revision, use_fast=use_fast)
+            tokenizer = AutoTokenizer.from_pretrained(
+                tokenizer, revision=revision, use_fast=use_fast, _from_pipeline=task
+            )
 
     # Instantiate config if needed
     if isinstance(config, str):
-        config = AutoConfig.from_pretrained(config, revision=revision)
+        config = AutoConfig.from_pretrained(config, revision=revision, _from_pipeline=task)
 
     # Instantiate modelcard if needed
     if isinstance(modelcard, str):
-        modelcard = ModelCard.from_pretrained(modelcard, revision=revision)
+        modelcard = ModelCard.from_pretrained(modelcard, revision=revision, _from_pipeline=task)
 
     # Instantiate model if needed
     if isinstance(model, str):
@@ -407,7 +409,9 @@ def pipeline(
                 f"Pipeline using {framework} framework, but this framework is not supported by this pipeline."
             )
 
-        model = model_class.from_pretrained(model, config=config, revision=revision, **model_kwargs)
+        model = model_class.from_pretrained(
+            model, config=config, revision=revision, _from_pipeline=task, **model_kwargs
+        )
 
     if task == "translation" and model.config.task_specific_params:
         for key in model.config.task_specific_params:
diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py
index f4369a96d46790..9f582db4b82f8f 100644
--- a/src/transformers/pipelines/base.py
+++ b/src/transformers/pipelines/base.py
@@ -47,7 +47,9 @@
 logger = logging.get_logger(__name__)
 
 
-def infer_framework_from_model(model, model_classes: Optional[Dict[str, type]] = None, revision: Optional[str] = None):
+def infer_framework_from_model(
+    model, model_classes: Optional[Dict[str, type]] = None, revision: Optional[str] = None, task: Optional[str] = None
+):
     """
     Select framework (TensorFlow or PyTorch) to use from the :obj:`model` passed. Returns a tuple (framework, model).
 
@@ -80,17 +82,17 @@ def infer_framework_from_model(model, model_classes: Optional[Dict[str, type]] =
     if isinstance(model, str):
         if is_torch_available() and not is_tf_available():
             model_class = model_classes.get("pt", AutoModel)
-            model = model_class.from_pretrained(model, revision=revision)
+            model = model_class.from_pretrained(model, revision=revision, _from_pipeline=task)
         elif is_tf_available() and not is_torch_available():
             model_class = model_classes.get("tf", TFAutoModel)
-            model = model_class.from_pretrained(model, revision=revision)
+            model = model_class.from_pretrained(model, revision=revision, _from_pipeline=task)
         else:
             try:
                 model_class = model_classes.get("pt", AutoModel)
-                model = model_class.from_pretrained(model, revision=revision)
+                model = model_class.from_pretrained(model, revision=revision, _from_pipeline=task)
             except OSError:
                 model_class = model_classes.get("tf", TFAutoModel)
-                model = model_class.from_pretrained(model, revision=revision)
+                model = model_class.from_pretrained(model, revision=revision, _from_pipeline=task)
 
     framework = "tf" if model.__class__.__name__.startswith("TF") else "pt"
     return framework, model
diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index 92614e154e1418..7d388d170b051c 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -1596,6 +1596,12 @@ def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike],
         use_auth_token = kwargs.pop("use_auth_token", None)
         revision = kwargs.pop("revision", None)
         subfolder = kwargs.pop("subfolder", None)
+        from_pipeline = kwargs.pop("_from_pipeline", None)
+        from_auto_class = kwargs.pop("_from_auto", False)
+
+        user_agent = {"file_type": "tokenizer", "from_auto_class": from_auto_class, "is_fast": "Fast" in cls.__name__}
+        if from_pipeline is not None:
+            user_agent["using_pipeline"] = from_pipeline
 
         if is_offline_mode() and not local_files_only:
             logger.info("Offline mode: forcing local_files_only=True")
@@ -1663,6 +1669,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike],
                         resume_download=resume_download,
                         local_files_only=local_files_only,
                         use_auth_token=use_auth_token,
+                        user_agent=user_agent,
                     )
 
                 except FileNotFoundError as error: