From 99ee11b23f7f7b76d2d7e38169b391a2219649f7 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger Date: Tue, 30 Mar 2021 11:36:19 -0400 Subject: [PATCH 1/6] Add more metadata to the user agent --- .circleci/config.yml | 10 ++++---- .github/workflows/self-push.yml | 8 +++--- .github/workflows/self-scheduled.yml | 16 ++++++------ src/transformers/configuration_utils.py | 7 ++++++ src/transformers/file_utils.py | 5 +++- src/transformers/modelcard.py | 9 ++++++- src/transformers/modeling_flax_utils.py | 9 +++++++ src/transformers/modeling_tf_utils.py | 9 +++++++ src/transformers/modeling_utils.py | 25 ++++++++++++------- .../models/auto/configuration_auto.py | 1 + src/transformers/models/auto/modeling_auto.py | 12 +++++++++ .../models/auto/modeling_flax_auto.py | 4 ++- .../models/auto/modeling_tf_auto.py | 14 +++++++++-- .../models/auto/tokenization_auto.py | 1 + src/transformers/pipelines/__init__.py | 16 +++++++----- src/transformers/pipelines/base.py | 12 +++++---- src/transformers/tokenization_utils_base.py | 7 ++++++ 17 files changed, 123 insertions(+), 42 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 342c538bc1b5d0..9c999e26958e09 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -221,7 +221,7 @@ jobs: key: v0.4-torch-{{ checksum "setup.py" }} paths: - '~/.cache/pip' - - run: RUN_PIPELINE_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_pipelines_torch -m is_pipeline_test ./tests/ | tee tests_output.txt + - run: TRANSFORMERS_IS_CI=1 RUN_PIPELINE_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_pipelines_torch -m is_pipeline_test ./tests/ | tee tests_output.txt - store_artifacts: path: ~/transformers/tests_output.txt - store_artifacts: @@ -247,7 +247,7 @@ jobs: key: v0.4-tf-{{ checksum "setup.py" }} paths: - '~/.cache/pip' - - run: RUN_PIPELINE_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_pipelines_tf ./tests/ -m is_pipeline_test | tee tests_output.txt + - run: TRANSFORMERS_IS_CI=1 RUN_PIPELINE_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_pipelines_tf ./tests/ -m is_pipeline_test | tee tests_output.txt - store_artifacts: path: ~/transformers/tests_output.txt - store_artifacts: @@ -272,7 +272,7 @@ jobs: key: v0.4-custom_tokenizers-{{ checksum "setup.py" }} paths: - '~/.cache/pip' - - run: python -m pytest -s --make-reports=tests_custom_tokenizers ./tests/test_tokenization_bert_japanese.py | tee tests_output.txt + - run: TRANSFORMERS_IS_CI=1 python -m pytest -s --make-reports=tests_custom_tokenizers ./tests/test_tokenization_bert_japanese.py | tee tests_output.txt - store_artifacts: path: ~/transformers/tests_output.txt - store_artifacts: @@ -299,7 +299,7 @@ jobs: key: v0.4-torch_examples-{{ checksum "setup.py" }} paths: - '~/.cache/pip' - - run: python -m pytest -n 8 --dist=loadfile -s --make-reports=examples_torch ./examples/ | tee examples_output.txt + - run: TRANSFORMERS_IS_CI=1 python -m pytest -n 8 --dist=loadfile -s --make-reports=examples_torch ./examples/ | tee examples_output.txt - store_artifacts: path: ~/transformers/examples_output.txt - store_artifacts: @@ -319,7 +319,7 @@ jobs: git config --global user.name "ci" - run: pip install --upgrade pip - run: pip install .[testing] - - run: RUN_GIT_LFS_TESTS=1 python -m pytest -sv ./tests/test_hf_api.py -k "HfLargefilesTest" + - run: TRANSFORMERS_IS_CI=1 RUN_GIT_LFS_TESTS=1 python -m pytest -sv ./tests/test_hf_api.py -k "HfLargefilesTest" build_doc: working_directory: ~/transformers diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml index 8af6f8ea5c23f8..947787f16b1173 100644 --- a/.github/workflows/self-push.yml +++ b/.github/workflows/self-push.yml @@ -45,7 +45,7 @@ jobs: MKL_NUM_THREADS: 8 HF_HOME: /mnt/cache run: | - python -m pytest -n 2 --dist=loadfile --make-reports=tests_torch_gpu tests + TRANSFORMERS_IS_CI=1 python -m pytest -n 2 --dist=loadfile --make-reports=tests_torch_gpu tests - name: Failure short reports if: ${{ always() }} @@ -89,7 +89,7 @@ jobs: TF_NUM_INTEROP_THREADS: 1 HF_HOME: /mnt/cache run: | - python -m pytest -n 2 --dist=loadfile --make-reports=tests_tf_gpu tests + TRANSFORMERS_IS_CI=1 python -m pytest -n 2 --dist=loadfile --make-reports=tests_tf_gpu tests - name: Failure short reports if: ${{ always() }} @@ -136,7 +136,7 @@ jobs: MKL_SERVICE_FORCE_INTEL: 1 HF_HOME: /mnt/cache run: | - python -m pytest -n 2 --dist=loadfile --make-reports=tests_torch_multi_gpu tests + TRANSFORMERS_IS_CI=1 python -m pytest -n 2 --dist=loadfile --make-reports=tests_torch_multi_gpu tests - name: Failure short reports if: ${{ always() }} @@ -180,7 +180,7 @@ jobs: TF_NUM_INTEROP_THREADS: 1 HF_HOME: /mnt/cache run: | - python -m pytest -n 2 --dist=loadfile --make-reports=tests_tf_multi_gpu tests + TRANSFORMERS_IS_CI=1 python -m pytest -n 2 --dist=loadfile --make-reports=tests_tf_multi_gpu tests - name: Failure short reports if: ${{ always() }} diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index 50720411135101..cefba52c3b57c0 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -42,7 +42,7 @@ jobs: RUN_SLOW: yes HF_HOME: /mnt/cache run: | - python -m pytest -n 1 --dist=loadfile --make-reports=tests_torch_gpu tests + TRANSFORMERS_IS_CI=1 python -m pytest -n 1 --dist=loadfile --make-reports=tests_torch_gpu tests - name: Failure short reports if: ${{ always() }} @@ -72,7 +72,7 @@ jobs: RUN_PIPELINE_TESTS: yes HF_HOME: /mnt/cache run: | - python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_torch_pipeline_gpu tests + TRANSFORMERS_IS_CI=1 python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_torch_pipeline_gpu tests - name: Failure short reports if: ${{ always() }} @@ -117,7 +117,7 @@ jobs: TF_NUM_INTRAOP_THREADS: 16 MKL_NUM_THREADS: 16 run: | - python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_gpu tests + TRANSFORMERS_IS_CI=1 python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_gpu tests - name: Failure short reports if: ${{ always() }} @@ -134,7 +134,7 @@ jobs: TF_NUM_INTRAOP_THREADS: 16 MKL_NUM_THREADS: 16 run: | - python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_tf_pipeline_gpu tests + TRANSFORMERS_IS_CI=1 python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_tf_pipeline_gpu tests - name: Failure short reports if: ${{ always() }} @@ -181,7 +181,7 @@ jobs: MKL_NUM_THREADS: 16 MKL_SERVICE_FORCE_INTEL: 1 run: | - python -m pytest -n 1 --dist=loadfile --make-reports=tests_torch_multi_gpu tests + TRANSFORMERS_IS_CI=1 python -m pytest -n 1 --dist=loadfile --make-reports=tests_torch_multi_gpu tests - name: Failure short reports if: ${{ always() }} @@ -196,7 +196,7 @@ jobs: RUN_PIPELINE_TESTS: yes HF_HOME: /mnt/cache run: | - python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_torch_pipeline_multi_gpu tests + TRANSFORMERS_IS_CI=1 python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_torch_pipeline_multi_gpu tests - name: Failure short reports if: ${{ always() }} @@ -241,7 +241,7 @@ jobs: TF_NUM_INTRAOP_THREADS: 16 HF_HOME: /mnt/cache run: | - python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_multi_gpu tests + TRANSFORMERS_IS_CI=1 python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_multi_gpu tests - name: Failure short reports if: ${{ always() }} @@ -258,7 +258,7 @@ jobs: TF_NUM_INTRAOP_THREADS: 16 HF_HOME: /mnt/cache run: | - python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_tf_pipeline_multi_gpu tests + TRANSFORMERS_IS_CI=1 python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_tf_pipeline_multi_gpu tests - name: Failure short reports if: ${{ always() }} diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index 1c428eae5cf463..d0631d3f1abc44 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -420,6 +420,12 @@ def get_config_dict( use_auth_token = kwargs.pop("use_auth_token", None) local_files_only = kwargs.pop("local_files_only", False) revision = kwargs.pop("revision", None) + from_pipeline = kwargs.pop("_from_pipeline", None) + from_auto_class = kwargs.pop("_from_auto", False) + + user_agent = {"file_type": "config", "from_auto_class": from_auto_class} + if from_pipeline is not None: + user_agent["using_pipeline"] = from_pipeline if is_offline_mode() and not local_files_only: logger.info("Offline mode: forcing local_files_only=True") @@ -445,6 +451,7 @@ def get_config_dict( resume_download=resume_download, local_files_only=local_files_only, use_auth_token=use_auth_token, + user_agent=user_agent, ) # Load config dict config_dict = cls._dict_from_json_file(resolved_config_file) diff --git a/src/transformers/file_utils.py b/src/transformers/file_utils.py index 89585e30e309db..25e7aba282f47e 100644 --- a/src/transformers/file_utils.py +++ b/src/transformers/file_utils.py @@ -1234,6 +1234,9 @@ def http_user_agent(user_agent: Union[Dict, str, None] = None) -> str: ua += f"; tensorflow/{_tf_version}" if is_training_run_on_sagemaker(): ua += "; " + "; ".join(f"{k}/{v}" for k, v in define_sagemaker_information().items()) + # CI will set this value to True + if os.environ.get("TRANSFORMERS_IS_CI", "").upper() in ENV_VARS_TRUE_VALUES: + ua += "; is_ci/true" if isinstance(user_agent, dict): ua += "; " + "; ".join(f"{k}/{v}" for k, v in user_agent.items()) elif isinstance(user_agent, str): @@ -1243,7 +1246,7 @@ def http_user_agent(user_agent: Union[Dict, str, None] = None) -> str: def http_get(url: str, temp_file: BinaryIO, proxies=None, resume_size=0, headers: Optional[Dict[str, str]] = None): """ - Donwload remote file. Do not gobble up errors. + Download remote file. Do not gobble up errors. """ headers = copy.deepcopy(headers) if resume_size > 0: diff --git a/src/transformers/modelcard.py b/src/transformers/modelcard.py index 2daab84649bfc4..d5063eacf657ef 100644 --- a/src/transformers/modelcard.py +++ b/src/transformers/modelcard.py @@ -133,6 +133,11 @@ def from_pretrained(cls, pretrained_model_name_or_path, **kwargs): proxies = kwargs.pop("proxies", None) find_from_standard_name = kwargs.pop("find_from_standard_name", True) return_unused_kwargs = kwargs.pop("return_unused_kwargs", False) + from_pipeline = kwargs.pop("_from_pipeline", None) + + user_agent = {"file_type": "model_card"} + if from_pipeline is not None: + user_agent["using_pipeline"] = from_pipeline if pretrained_model_name_or_path in ALL_PRETRAINED_CONFIG_ARCHIVE_MAP: # For simplicity we use the same pretrained url than the configuration files @@ -152,7 +157,9 @@ def from_pretrained(cls, pretrained_model_name_or_path, **kwargs): try: # Load from URL or cache if already cached - resolved_model_card_file = cached_path(model_card_file, cache_dir=cache_dir, proxies=proxies) + resolved_model_card_file = cached_path( + model_card_file, cache_dir=cache_dir, proxies=proxies, user_agent=user_agent + ) if resolved_model_card_file == model_card_file: logger.info("loading model card file {}".format(model_card_file)) else: diff --git a/src/transformers/modeling_flax_utils.py b/src/transformers/modeling_flax_utils.py index 8b245f6546d102..1ebb2021c499cd 100644 --- a/src/transformers/modeling_flax_utils.py +++ b/src/transformers/modeling_flax_utils.py @@ -228,6 +228,12 @@ def from_pretrained( local_files_only = kwargs.pop("local_files_only", False) use_auth_token = kwargs.pop("use_auth_token", None) revision = kwargs.pop("revision", None) + from_pipeline = kwargs.pop("_from_pipeline", None) + from_auto_class = kwargs.pop("_from_auto", False) + + user_agent = {"file_type": "model", "framework": "flax", "from_auto_class": from_auto_class} + if from_pipeline is not None: + user_agent["using_pipeline"] = from_pipeline if is_offline_mode() and not local_files_only: logger.info("Offline mode: forcing local_files_only=True") @@ -247,6 +253,8 @@ def from_pretrained( local_files_only=local_files_only, use_auth_token=use_auth_token, revision=revision, + _from_auto=from_auto_class, + _from_pipeline=from_pipeline, **kwargs, ) else: @@ -290,6 +298,7 @@ def from_pretrained( resume_download=resume_download, local_files_only=local_files_only, use_auth_token=use_auth_token, + user_agent=user_agent, ) except EnvironmentError as err: logger.error(err) diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index c97032676fa1d8..cf71b25a1a0cb2 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -1164,6 +1164,12 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): revision = kwargs.pop("revision", None) mirror = kwargs.pop("mirror", None) load_weight_prefix = kwargs.pop("load_weight_prefix", None) + from_pipeline = kwargs.pop("_from_pipeline", None) + from_auto_class = kwargs.pop("_from_auto", False) + + user_agent = {"file_type": "model", "framework": "tensorflow", "from_auto_class": from_auto_class} + if from_pipeline is not None: + user_agent["using_pipeline"] = from_pipeline if is_offline_mode() and not local_files_only: logger.info("Offline mode: forcing local_files_only=True") @@ -1183,6 +1189,8 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): local_files_only=local_files_only, use_auth_token=use_auth_token, revision=revision, + _from_auto=from_auto_class, + _from_pipeline=from_pipeline, **kwargs, ) else: @@ -1225,6 +1233,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): resume_download=resume_download, local_files_only=local_files_only, use_auth_token=use_auth_token, + user_agent=user_agent, ) except EnvironmentError as err: logger.error(err) diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 9a4f421a0de136..3846f524a8be8a 100755 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -964,6 +964,12 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P use_auth_token = kwargs.pop("use_auth_token", None) revision = kwargs.pop("revision", None) mirror = kwargs.pop("mirror", None) + from_pipeline = kwargs.pop("_from_pipeline", None) + from_auto_class = kwargs.pop("_from_auto", False) + + user_agent = {"file_type": "model", "framework": "pytorch", "from_auto_class": from_auto_class} + if from_pipeline is not None: + user_agent["using_pipeline"] = from_pipeline if is_offline_mode() and not local_files_only: logger.info("Offline mode: forcing local_files_only=True") @@ -983,6 +989,8 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P local_files_only=local_files_only, use_auth_token=use_auth_token, revision=revision, + _from_auto=from_auto_class, + _from_pipeline=from_pipeline, **kwargs, ) else: @@ -1003,19 +1011,17 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P archive_file = os.path.join(pretrained_model_name_or_path, WEIGHTS_NAME) else: raise EnvironmentError( - "Error no file named {} found in directory {} or `from_tf` set to False".format( - [WEIGHTS_NAME, TF2_WEIGHTS_NAME, TF_WEIGHTS_NAME + ".index"], - pretrained_model_name_or_path, - ) + f"Error no file named {[WEIGHTS_NAME, TF2_WEIGHTS_NAME, TF_WEIGHTS_NAME + '.index']} found in " + f"directory {pretrained_model_name_or_path} or `from_tf` set to False." ) elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path): archive_file = pretrained_model_name_or_path elif os.path.isfile(pretrained_model_name_or_path + ".index"): - assert ( - from_tf - ), "We found a TensorFlow checkpoint at {}, please set from_tf to True to load from this checkpoint".format( - pretrained_model_name_or_path + ".index" - ) + if not from_tf: + raise ValueError( + f"We found a TensorFlow checkpoint at {pretrained_model_name_or_path + '.index'}, please set " + "from_tf to True to load from this checkpoint." + ) archive_file = pretrained_model_name_or_path + ".index" else: archive_file = hf_bucket_url( @@ -1035,6 +1041,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P resume_download=resume_download, local_files_only=local_files_only, use_auth_token=use_auth_token, + user_agent=user_agent, ) except EnvironmentError as err: logger.error(err) diff --git a/src/transformers/models/auto/configuration_auto.py b/src/transformers/models/auto/configuration_auto.py index c28d3190dce2ce..e615cf53ff1014 100644 --- a/src/transformers/models/auto/configuration_auto.py +++ b/src/transformers/models/auto/configuration_auto.py @@ -384,6 +384,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, **kwargs): >>> config.unused_kwargs {'foo': False} """ + kwargs["_from_auto"] = True config_dict, _ = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs) if "model_type" in config_dict: config_class = CONFIG_MAPPING[config_dict["model_type"]] diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py index 4d11dbaa37b65f..9a685a5786bdc1 100644 --- a/src/transformers/models/auto/modeling_auto.py +++ b/src/transformers/models/auto/modeling_auto.py @@ -805,6 +805,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): >>> model = AutoModel.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) """ config = kwargs.pop("config", None) + kwargs["_from_auto"] = True if not isinstance(config, PretrainedConfig): config, kwargs = AutoConfig.from_pretrained( pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs @@ -899,6 +900,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): >>> model = AutoModelForPreTraining.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) """ config = kwargs.pop("config", None) + kwargs["_from_auto"] = True if not isinstance(config, PretrainedConfig): config, kwargs = AutoConfig.from_pretrained( pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs @@ -1010,6 +1012,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): FutureWarning, ) config = kwargs.pop("config", None) + kwargs["_from_auto"] = True if not isinstance(config, PretrainedConfig): config, kwargs = AutoConfig.from_pretrained( pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs @@ -1103,6 +1106,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): >>> model = AutoModelForCausalLM.from_pretrained('./tf_model/gpt2_tf_checkpoint.ckpt.index', from_tf=True, config=config) """ config = kwargs.pop("config", None) + kwargs["_from_auto"] = True if not isinstance(config, PretrainedConfig): config, kwargs = AutoConfig.from_pretrained( pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs @@ -1196,6 +1200,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): >>> model = AutoModelForMaskedLM.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) """ config = kwargs.pop("config", None) + kwargs["_from_auto"] = True if not isinstance(config, PretrainedConfig): config, kwargs = AutoConfig.from_pretrained( pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs @@ -1292,6 +1297,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): >>> model = AutoModelForSeq2SeqLM.from_pretrained('./tf_model/t5_tf_checkpoint.ckpt.index', from_tf=True, config=config) """ config = kwargs.pop("config", None) + kwargs["_from_auto"] = True if not isinstance(config, PretrainedConfig): config, kwargs = AutoConfig.from_pretrained( pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs @@ -1390,6 +1396,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): >>> model = AutoModelForSequenceClassification.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) """ config = kwargs.pop("config", None) + kwargs["_from_auto"] = True if not isinstance(config, PretrainedConfig): config, kwargs = AutoConfig.from_pretrained( pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs @@ -1487,6 +1494,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): >>> model = AutoModelForQuestionAnswering.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) """ config = kwargs.pop("config", None) + kwargs["_from_auto"] = True if not isinstance(config, PretrainedConfig): config, kwargs = AutoConfig.from_pretrained( pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs @@ -1587,6 +1595,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): >>> model = AutoModelForQuestionAnswering.from_pretrained('./tf_model/tapas_tf_checkpoint.ckpt.index', from_tf=True, config=config) """ config = kwargs.pop("config", None) + kwargs["_from_auto"] = True if not isinstance(config, PretrainedConfig): config, kwargs = AutoConfig.from_pretrained( pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs @@ -1685,6 +1694,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): >>> model = AutoModelForTokenClassification.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) """ config = kwargs.pop("config", None) + kwargs["_from_auto"] = True if not isinstance(config, PretrainedConfig): config, kwargs = AutoConfig.from_pretrained( pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs @@ -1785,6 +1795,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): >>> model = AutoModelForMultipleChoice.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) """ config = kwargs.pop("config", None) + kwargs["_from_auto"] = True if not isinstance(config, PretrainedConfig): config, kwargs = AutoConfig.from_pretrained( pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs @@ -1885,6 +1896,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): >>> model = AutoModelForNextSentencePrediction.from_pretrained('./tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) """ config = kwargs.pop("config", None) + kwargs["_from_auto"] = True if not isinstance(config, PretrainedConfig): config, kwargs = AutoConfig.from_pretrained( pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs diff --git a/src/transformers/models/auto/modeling_flax_auto.py b/src/transformers/models/auto/modeling_flax_auto.py index 0a65f332cd3ec7..f91cc496e6b681 100644 --- a/src/transformers/models/auto/modeling_flax_auto.py +++ b/src/transformers/models/auto/modeling_flax_auto.py @@ -158,7 +158,9 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): for config_class, model_class in FLAX_MODEL_MAPPING.items(): if isinstance(config, config_class): - return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, config=config, **kwargs) + return model_class.from_pretrained( + pretrained_model_name_or_path, *model_args, config=config, _from_auto=True, **kwargs + ) raise ValueError( f"Unrecognized configuration class {config.__class__} " f"for this kind of FlaxAutoModel: {cls.__name__}.\n" diff --git a/src/transformers/models/auto/modeling_tf_auto.py b/src/transformers/models/auto/modeling_tf_auto.py index f0bf137bd93d2c..21bd08d85110c0 100644 --- a/src/transformers/models/auto/modeling_tf_auto.py +++ b/src/transformers/models/auto/modeling_tf_auto.py @@ -622,6 +622,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): >>> model = TFAutoModel.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config) """ config = kwargs.pop("config", None) + kwargs["_from_auto"] = True if not isinstance(config, PretrainedConfig): config, kwargs = AutoConfig.from_pretrained( pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs @@ -716,6 +717,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): >>> model = TFAutoModelForPreTraining.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config) """ config = kwargs.pop("config", None) + kwargs["_from_auto"] = True if not isinstance(config, PretrainedConfig): config, kwargs = AutoConfig.from_pretrained( pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs @@ -827,7 +829,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): FutureWarning, ) config = kwargs.pop("config", None) - + kwargs["_from_auto"] = True if not isinstance(config, PretrainedConfig): config, kwargs = AutoConfig.from_pretrained( pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs @@ -921,6 +923,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): >>> model = TFAutoModelForCausalLM.from_pretrained('./pt_model/gpt2_pytorch_model.bin', from_pt=True, config=config) """ config = kwargs.pop("config", None) + kwargs["_from_auto"] = True if not isinstance(config, PretrainedConfig): config, kwargs = AutoConfig.from_pretrained( pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs @@ -1014,6 +1017,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): >>> model = TFAutoModelForMaskedLM.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config) """ config = kwargs.pop("config", None) + kwargs["_from_auto"] = True if not isinstance(config, PretrainedConfig): config, kwargs = AutoConfig.from_pretrained( pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs @@ -1110,9 +1114,10 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): >>> model = TFAutoModelForSeq2SeqLM.from_pretrained('./pt_model/t5_pytorch_model.bin', from_pt=True, config=config) """ config = kwargs.pop("config", None) + kwargs["_from_auto"] = True if not isinstance(config, PretrainedConfig): config, kwargs = AutoConfig.from_pretrained( - pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs + pretrained_model_name_or_path, return_unused_kwargs=True, _from_auto=True, **kwargs ) if type(config) in TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.keys(): @@ -1208,6 +1213,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): >>> model = TFAutoModelForSequenceClassification.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config) """ config = kwargs.pop("config", None) + kwargs["_from_auto"] = True if not isinstance(config, PretrainedConfig): config, kwargs = AutoConfig.from_pretrained( pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs @@ -1305,6 +1311,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): >>> model = TFAutoModelForQuestionAnswering.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config) """ config = kwargs.pop("config", None) + kwargs["_from_auto"] = True if not isinstance(config, PretrainedConfig): config, kwargs = AutoConfig.from_pretrained( pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs @@ -1401,6 +1408,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): >>> model = TFAutoModelForTokenClassification.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config) """ config = kwargs.pop("config", None) + kwargs["_from_auto"] = True if not isinstance(config, PretrainedConfig): config, kwargs = AutoConfig.from_pretrained( pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs @@ -1499,6 +1507,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): >>> model = TFAutoModelForMultipleChoice.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config) """ config = kwargs.pop("config", None) + kwargs["_from_auto"] = True if not isinstance(config, PretrainedConfig): config, kwargs = AutoConfig.from_pretrained( pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs @@ -1597,6 +1606,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): >>> model = TFAutoModelForNextSentencePrediction.from_pretrained('./pt_model/bert_pytorch_model.bin', from_pt=True, config=config) """ config = kwargs.pop("config", None) + kwargs["_from_auto"] = True if not isinstance(config, PretrainedConfig): config, kwargs = AutoConfig.from_pretrained( pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs diff --git a/src/transformers/models/auto/tokenization_auto.py b/src/transformers/models/auto/tokenization_auto.py index bf58b75099d278..7c835abca628d9 100644 --- a/src/transformers/models/auto/tokenization_auto.py +++ b/src/transformers/models/auto/tokenization_auto.py @@ -379,6 +379,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs): """ config = kwargs.pop("config", None) + kwargs["_from_auto"] = True if not isinstance(config, PretrainedConfig): config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs) diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py index 43b1549627cdb3..0e4d4a754d9f03 100755 --- a/src/transformers/pipelines/__init__.py +++ b/src/transformers/pipelines/__init__.py @@ -363,7 +363,7 @@ def pipeline( # Infer the framework form the model if framework is None: - framework, model = infer_framework_from_model(model, targeted_task, revision=revision) + framework, model = infer_framework_from_model(model, targeted_task, revision=revision, task=task) task_class, model_class = targeted_task["impl"], targeted_task[framework] @@ -373,18 +373,20 @@ def pipeline( # For tuple we have (tokenizer name, {kwargs}) use_fast = tokenizer[1].pop("use_fast", use_fast) tokenizer = AutoTokenizer.from_pretrained( - tokenizer[0], use_fast=use_fast, revision=revision, **tokenizer[1] + tokenizer[0], use_fast=use_fast, revision=revision, _from_pipeline=task, **tokenizer[1] ) else: - tokenizer = AutoTokenizer.from_pretrained(tokenizer, revision=revision, use_fast=use_fast) + tokenizer = AutoTokenizer.from_pretrained( + tokenizer, revision=revision, use_fast=use_fast, _from_pipeline=task + ) # Instantiate config if needed if isinstance(config, str): - config = AutoConfig.from_pretrained(config, revision=revision) + config = AutoConfig.from_pretrained(config, revision=revision, _from_pipeline=task) # Instantiate modelcard if needed if isinstance(modelcard, str): - modelcard = ModelCard.from_pretrained(modelcard, revision=revision) + modelcard = ModelCard.from_pretrained(modelcard, revision=revision, _from_pipeline=task) # Instantiate model if needed if isinstance(model, str): @@ -407,7 +409,9 @@ def pipeline( f"Pipeline using {framework} framework, but this framework is not supported by this pipeline." ) - model = model_class.from_pretrained(model, config=config, revision=revision, **model_kwargs) + model = model_class.from_pretrained( + model, config=config, revision=revision, _from_pipeline=task, **model_kwargs + ) if task == "translation" and model.config.task_specific_params: for key in model.config.task_specific_params: diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index 01d3699c6f656f..481cc027ced71f 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -47,7 +47,9 @@ logger = logging.get_logger(__name__) -def infer_framework_from_model(model, model_classes: Optional[Dict[str, type]] = None, revision: Optional[str] = None): +def infer_framework_from_model( + model, model_classes: Optional[Dict[str, type]] = None, revision: Optional[str] = None, task: Optional[str] = None +): """ Select framework (TensorFlow or PyTorch) to use from the :obj:`model` passed. Returns a tuple (framework, model). @@ -80,17 +82,17 @@ def infer_framework_from_model(model, model_classes: Optional[Dict[str, type]] = if isinstance(model, str): if is_torch_available() and not is_tf_available(): model_class = model_classes.get("pt", AutoModel) - model = model_class.from_pretrained(model, revision=revision) + model = model_class.from_pretrained(model, revision=revision, _from_pipeline=task) elif is_tf_available() and not is_torch_available(): model_class = model_classes.get("tf", TFAutoModel) - model = model_class.from_pretrained(model, revision=revision) + model = model_class.from_pretrained(model, revision=revision, _from_pipeline=task) else: try: model_class = model_classes.get("pt", AutoModel) - model = model_class.from_pretrained(model, revision=revision) + model = model_class.from_pretrained(model, revision=revision, _from_pipeline=task) except OSError: model_class = model_classes.get("tf", TFAutoModel) - model = model_class.from_pretrained(model, revision=revision) + model = model_class.from_pretrained(model, revision=revision, _from_pipeline=task) framework = "tf" if model.__class__.__name__.startswith("TF") else "pt" return framework, model diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index 92614e154e1418..7d388d170b051c 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -1596,6 +1596,12 @@ def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], use_auth_token = kwargs.pop("use_auth_token", None) revision = kwargs.pop("revision", None) subfolder = kwargs.pop("subfolder", None) + from_pipeline = kwargs.pop("_from_pipeline", None) + from_auto_class = kwargs.pop("_from_auto", False) + + user_agent = {"file_type": "tokenizer", "from_auto_class": from_auto_class, "is_fast": "Fast" in cls.__name__} + if from_pipeline is not None: + user_agent["using_pipeline"] = from_pipeline if is_offline_mode() and not local_files_only: logger.info("Offline mode: forcing local_files_only=True") @@ -1663,6 +1669,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], resume_download=resume_download, local_files_only=local_files_only, use_auth_token=use_auth_token, + user_agent=user_agent, ) except FileNotFoundError as error: From 2dd4ed9b90960af8bc72779e3874ee9e92d9df72 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger Date: Tue, 30 Mar 2021 11:45:55 -0400 Subject: [PATCH 2/6] Fix typo --- src/transformers/models/auto/modeling_tf_auto.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/auto/modeling_tf_auto.py b/src/transformers/models/auto/modeling_tf_auto.py index 21bd08d85110c0..ece15c0445b11e 100644 --- a/src/transformers/models/auto/modeling_tf_auto.py +++ b/src/transformers/models/auto/modeling_tf_auto.py @@ -1117,7 +1117,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): kwargs["_from_auto"] = True if not isinstance(config, PretrainedConfig): config, kwargs = AutoConfig.from_pretrained( - pretrained_model_name_or_path, return_unused_kwargs=True, _from_auto=True, **kwargs + pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs ) if type(config) in TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.keys(): From 7bed4a1c8269af42d85c8708ff965dbb463820fe Mon Sep 17 00:00:00 2001 From: Sylvain Gugger Date: Tue, 30 Mar 2021 13:42:01 -0400 Subject: [PATCH 3/6] Use DISABLE_TELEMETRY --- src/transformers/file_utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/transformers/file_utils.py b/src/transformers/file_utils.py index 25e7aba282f47e..a5967b0b63c0fc 100644 --- a/src/transformers/file_utils.py +++ b/src/transformers/file_utils.py @@ -212,7 +212,7 @@ PYTORCH_TRANSFORMERS_CACHE = os.getenv("PYTORCH_TRANSFORMERS_CACHE", PYTORCH_PRETRAINED_BERT_CACHE) TRANSFORMERS_CACHE = os.getenv("TRANSFORMERS_CACHE", PYTORCH_TRANSFORMERS_CACHE) SESSION_ID = uuid4().hex -DISABLE_TELEMETRY = os.getenv("DISABLE_TELEMETRY", False) +DISABLE_TELEMETRY = os.getenv("DISABLE_TELEMETRY", False) in ENV_VARS_TRUE_VALUES WEIGHTS_NAME = "pytorch_model.bin" TF2_WEIGHTS_NAME = "tf_model.h5" @@ -367,7 +367,7 @@ def is_sagemaker_distributed_available(): def is_training_run_on_sagemaker(): - return "SAGEMAKER_JOB_NAME" in os.environ and not DISABLE_TELEMETRY + return "SAGEMAKER_JOB_NAME" in os.environ def is_soundfile_availble(): @@ -1227,6 +1227,8 @@ def http_user_agent(user_agent: Union[Dict, str, None] = None) -> str: """ Formats a user-agent string with basic info about a request. """ + if DISABLE_TELEMETRY: + return "telemetry/off" ua = f"transformers/{__version__}; python/{sys.version.split()[0]}; session_id/{SESSION_ID}" if is_torch_available(): ua += f"; torch/{_torch_version}" From c6b6891376f044976dda3f47a94b4a51d91cc756 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger Date: Tue, 30 Mar 2021 14:01:40 -0400 Subject: [PATCH 4/6] Address review comments --- .github/workflows/self-push.yml | 12 ++++++++---- .github/workflows/self-scheduled.yml | 25 +++++++++++++++++-------- src/transformers/file_utils.py | 4 ++-- 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml index 947787f16b1173..c5912095c04058 100644 --- a/.github/workflows/self-push.yml +++ b/.github/workflows/self-push.yml @@ -44,8 +44,9 @@ jobs: OMP_NUM_THREADS: 8 MKL_NUM_THREADS: 8 HF_HOME: /mnt/cache + TRANSFORMERS_IS_CI: yes run: | - TRANSFORMERS_IS_CI=1 python -m pytest -n 2 --dist=loadfile --make-reports=tests_torch_gpu tests + python -m pytest -n 2 --dist=loadfile --make-reports=tests_torch_gpu tests - name: Failure short reports if: ${{ always() }} @@ -88,8 +89,9 @@ jobs: TF_NUM_INTRAOP_THREADS: 8 TF_NUM_INTEROP_THREADS: 1 HF_HOME: /mnt/cache + TRANSFORMERS_IS_CI: yes run: | - TRANSFORMERS_IS_CI=1 python -m pytest -n 2 --dist=loadfile --make-reports=tests_tf_gpu tests + python -m pytest -n 2 --dist=loadfile --make-reports=tests_tf_gpu tests - name: Failure short reports if: ${{ always() }} @@ -135,8 +137,9 @@ jobs: MKL_NUM_THREADS: 8 MKL_SERVICE_FORCE_INTEL: 1 HF_HOME: /mnt/cache + TRANSFORMERS_IS_CI: yes run: | - TRANSFORMERS_IS_CI=1 python -m pytest -n 2 --dist=loadfile --make-reports=tests_torch_multi_gpu tests + python -m pytest -n 2 --dist=loadfile --make-reports=tests_torch_multi_gpu tests - name: Failure short reports if: ${{ always() }} @@ -179,8 +182,9 @@ jobs: TF_NUM_INTRAOP_THREADS: 8 TF_NUM_INTEROP_THREADS: 1 HF_HOME: /mnt/cache + TRANSFORMERS_IS_CI: yes run: | - TRANSFORMERS_IS_CI=1 python -m pytest -n 2 --dist=loadfile --make-reports=tests_tf_multi_gpu tests + python -m pytest -n 2 --dist=loadfile --make-reports=tests_tf_multi_gpu tests - name: Failure short reports if: ${{ always() }} diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index cefba52c3b57c0..5969fed0775541 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -41,8 +41,9 @@ jobs: MKL_NUM_THREADS: 16 RUN_SLOW: yes HF_HOME: /mnt/cache + TRANSFORMERS_IS_CI: yes run: | - TRANSFORMERS_IS_CI=1 python -m pytest -n 1 --dist=loadfile --make-reports=tests_torch_gpu tests + python -m pytest -n 1 --dist=loadfile --make-reports=tests_torch_gpu tests - name: Failure short reports if: ${{ always() }} @@ -55,6 +56,7 @@ jobs: MKL_NUM_THREADS: 16 RUN_SLOW: yes HF_HOME: /mnt/cache + TRANSFORMERS_IS_CI: yes run: | pip install -r examples/_tests_requirements.txt python -m pytest -n 1 --dist=loadfile --make-reports=examples_torch_gpu examples @@ -71,8 +73,9 @@ jobs: RUN_SLOW: yes RUN_PIPELINE_TESTS: yes HF_HOME: /mnt/cache + TRANSFORMERS_IS_CI: yes run: | - TRANSFORMERS_IS_CI=1 python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_torch_pipeline_gpu tests + python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_torch_pipeline_gpu tests - name: Failure short reports if: ${{ always() }} @@ -116,8 +119,9 @@ jobs: TF_NUM_INTEROP_THREADS: 1 TF_NUM_INTRAOP_THREADS: 16 MKL_NUM_THREADS: 16 + TRANSFORMERS_IS_CI: yes run: | - TRANSFORMERS_IS_CI=1 python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_gpu tests + python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_gpu tests - name: Failure short reports if: ${{ always() }} @@ -133,8 +137,9 @@ jobs: TF_NUM_INTEROP_THREADS: 1 TF_NUM_INTRAOP_THREADS: 16 MKL_NUM_THREADS: 16 + TRANSFORMERS_IS_CI: yes run: | - TRANSFORMERS_IS_CI=1 python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_tf_pipeline_gpu tests + python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_tf_pipeline_gpu tests - name: Failure short reports if: ${{ always() }} @@ -180,8 +185,9 @@ jobs: OMP_NUM_THREADS: 16 MKL_NUM_THREADS: 16 MKL_SERVICE_FORCE_INTEL: 1 + TRANSFORMERS_IS_CI: yes run: | - TRANSFORMERS_IS_CI=1 python -m pytest -n 1 --dist=loadfile --make-reports=tests_torch_multi_gpu tests + python -m pytest -n 1 --dist=loadfile --make-reports=tests_torch_multi_gpu tests - name: Failure short reports if: ${{ always() }} @@ -195,8 +201,9 @@ jobs: RUN_SLOW: yes RUN_PIPELINE_TESTS: yes HF_HOME: /mnt/cache + TRANSFORMERS_IS_CI: yes run: | - TRANSFORMERS_IS_CI=1 python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_torch_pipeline_multi_gpu tests + python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_torch_pipeline_multi_gpu tests - name: Failure short reports if: ${{ always() }} @@ -240,8 +247,9 @@ jobs: TF_NUM_INTEROP_THREADS: 1 TF_NUM_INTRAOP_THREADS: 16 HF_HOME: /mnt/cache + TRANSFORMERS_IS_CI: yes run: | - TRANSFORMERS_IS_CI=1 python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_multi_gpu tests + python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_multi_gpu tests - name: Failure short reports if: ${{ always() }} @@ -257,8 +265,9 @@ jobs: TF_NUM_INTEROP_THREADS: 1 TF_NUM_INTRAOP_THREADS: 16 HF_HOME: /mnt/cache + TRANSFORMERS_IS_CI: yes run: | - TRANSFORMERS_IS_CI=1 python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_tf_pipeline_multi_gpu tests + python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_tf_pipeline_multi_gpu tests - name: Failure short reports if: ${{ always() }} diff --git a/src/transformers/file_utils.py b/src/transformers/file_utils.py index a5967b0b63c0fc..e795501ad34e4a 100644 --- a/src/transformers/file_utils.py +++ b/src/transformers/file_utils.py @@ -1227,13 +1227,13 @@ def http_user_agent(user_agent: Union[Dict, str, None] = None) -> str: """ Formats a user-agent string with basic info about a request. """ - if DISABLE_TELEMETRY: - return "telemetry/off" ua = f"transformers/{__version__}; python/{sys.version.split()[0]}; session_id/{SESSION_ID}" if is_torch_available(): ua += f"; torch/{_torch_version}" if is_tf_available(): ua += f"; tensorflow/{_tf_version}" + if DISABLE_TELEMETRY: + return ua + "; telemetry/off" if is_training_run_on_sagemaker(): ua += "; " + "; ".join(f"{k}/{v}" for k, v in define_sagemaker_information().items()) # CI will set this value to True From e647d81adafd5df2d4f415a5e4952445d21c2bc5 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger Date: Tue, 30 Mar 2021 15:55:37 -0400 Subject: [PATCH 5/6] Use global env --- .github/workflows/self-push.yml | 23 ++++--------- .github/workflows/self-scheduled.yml | 48 ++++------------------------ 2 files changed, 13 insertions(+), 58 deletions(-) diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml index c5912095c04058..210076f14163c2 100644 --- a/.github/workflows/self-push.yml +++ b/.github/workflows/self-push.yml @@ -12,6 +12,12 @@ on: - "templates/**" repository_dispatch: +env: + HF_HOME: /mnt/cache + TRANSFORMERS_IS_CI: yes + OMP_NUM_THREADS: 8 + MKL_NUM_THREADS: 8 + jobs: run_tests_torch_gpu: runs-on: [self-hosted, docker-gpu, single-gpu] @@ -40,11 +46,6 @@ jobs: python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())" - name: Run all non-slow tests on GPU - env: - OMP_NUM_THREADS: 8 - MKL_NUM_THREADS: 8 - HF_HOME: /mnt/cache - TRANSFORMERS_IS_CI: yes run: | python -m pytest -n 2 --dist=loadfile --make-reports=tests_torch_gpu tests @@ -84,12 +85,8 @@ jobs: - name: Run all non-slow tests on GPU env: - OMP_NUM_THREADS: 8 - MKL_NUM_THREADS: 8 TF_NUM_INTRAOP_THREADS: 8 TF_NUM_INTEROP_THREADS: 1 - HF_HOME: /mnt/cache - TRANSFORMERS_IS_CI: yes run: | python -m pytest -n 2 --dist=loadfile --make-reports=tests_tf_gpu tests @@ -133,11 +130,7 @@ jobs: - name: Run all non-slow tests on GPU env: - OMP_NUM_THREADS: 8 - MKL_NUM_THREADS: 8 MKL_SERVICE_FORCE_INTEL: 1 - HF_HOME: /mnt/cache - TRANSFORMERS_IS_CI: yes run: | python -m pytest -n 2 --dist=loadfile --make-reports=tests_torch_multi_gpu tests @@ -177,12 +170,8 @@ jobs: - name: Run all non-slow tests on GPU env: - OMP_NUM_THREADS: 8 - MKL_NUM_THREADS: 8 TF_NUM_INTRAOP_THREADS: 8 TF_NUM_INTEROP_THREADS: 1 - HF_HOME: /mnt/cache - TRANSFORMERS_IS_CI: yes run: | python -m pytest -n 2 --dist=loadfile --make-reports=tests_tf_multi_gpu tests diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index 5969fed0775541..3b72baea0d2b76 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -8,6 +8,13 @@ on: schedule: - cron: "0 0 * * *" +env: + HF_HOME: /mnt/cache + TRANSFORMERS_IS_CI: yes + RUN_SLOW: yes + OMP_NUM_THREADS: 16 + MKL_NUM_THREADS: 16 + jobs: run_all_tests_torch_gpu: runs-on: [self-hosted, docker-gpu, single-gpu] @@ -36,12 +43,6 @@ jobs: python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())" - name: Run all tests on GPU - env: - OMP_NUM_THREADS: 16 - MKL_NUM_THREADS: 16 - RUN_SLOW: yes - HF_HOME: /mnt/cache - TRANSFORMERS_IS_CI: yes run: | python -m pytest -n 1 --dist=loadfile --make-reports=tests_torch_gpu tests @@ -68,12 +69,7 @@ jobs: - name: Run all pipeline tests on GPU if: ${{ always() }} env: - OMP_NUM_THREADS: 16 - MKL_NUM_THREADS: 16 - RUN_SLOW: yes RUN_PIPELINE_TESTS: yes - HF_HOME: /mnt/cache - TRANSFORMERS_IS_CI: yes run: | python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_torch_pipeline_gpu tests @@ -113,13 +109,8 @@ jobs: - name: Run all tests on GPU env: - RUN_SLOW: yes - HF_HOME: /mnt/cache - OMP_NUM_THREADS: 16 TF_NUM_INTEROP_THREADS: 1 TF_NUM_INTRAOP_THREADS: 16 - MKL_NUM_THREADS: 16 - TRANSFORMERS_IS_CI: yes run: | python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_gpu tests @@ -130,14 +121,9 @@ jobs: - name: Run all pipeline tests on GPU if: ${{ always() }} env: - RUN_SLOW: yes - HF_HOME: /mnt/cache - OMP_NUM_THREADS: 16 RUN_PIPELINE_TESTS: yes TF_NUM_INTEROP_THREADS: 1 TF_NUM_INTRAOP_THREADS: 16 - MKL_NUM_THREADS: 16 - TRANSFORMERS_IS_CI: yes run: | python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_tf_pipeline_gpu tests @@ -180,12 +166,7 @@ jobs: - name: Run all tests on GPU env: - RUN_SLOW: yes - HF_HOME: /mnt/cache - OMP_NUM_THREADS: 16 - MKL_NUM_THREADS: 16 MKL_SERVICE_FORCE_INTEL: 1 - TRANSFORMERS_IS_CI: yes run: | python -m pytest -n 1 --dist=loadfile --make-reports=tests_torch_multi_gpu tests @@ -196,12 +177,7 @@ jobs: - name: Run all pipeline tests on GPU if: ${{ always() }} env: - OMP_NUM_THREADS: 16 - MKL_NUM_THREADS: 16 - RUN_SLOW: yes RUN_PIPELINE_TESTS: yes - HF_HOME: /mnt/cache - TRANSFORMERS_IS_CI: yes run: | python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_torch_pipeline_multi_gpu tests @@ -241,13 +217,8 @@ jobs: - name: Run all tests on GPU env: - OMP_NUM_THREADS: 16 - RUN_SLOW: yes - MKL_NUM_THREADS: 16 TF_NUM_INTEROP_THREADS: 1 TF_NUM_INTRAOP_THREADS: 16 - HF_HOME: /mnt/cache - TRANSFORMERS_IS_CI: yes run: | python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_multi_gpu tests @@ -258,14 +229,9 @@ jobs: - name: Run all pipeline tests on GPU if: ${{ always() }} env: - OMP_NUM_THREADS: 16 - RUN_SLOW: yes RUN_PIPELINE_TESTS: yes - MKL_NUM_THREADS: 16 TF_NUM_INTEROP_THREADS: 1 TF_NUM_INTRAOP_THREADS: 16 - HF_HOME: /mnt/cache - TRANSFORMERS_IS_CI: yes run: | python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_tf_pipeline_multi_gpu tests From 9e0c18a2e997b18b3176fae59ba34618ec08b36d Mon Sep 17 00:00:00 2001 From: Sylvain Gugger Date: Wed, 31 Mar 2021 09:12:31 -0400 Subject: [PATCH 6/6] Add clean envs on circle CI --- .circleci/config.yml | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 9c999e26958e09..28b4f52abd3d97 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -3,7 +3,6 @@ orbs: gcp-gke: circleci/gcp-gke@1.0.4 go: circleci/go@1.3.0 - # TPU REFERENCES references: checkout_ml_testing: &checkout_ml_testing @@ -69,6 +68,8 @@ jobs: - image: circleci/python:3.6 environment: OMP_NUM_THREADS: 1 + RUN_PT_TF_CROSS_TESTS: yes + TRANSFORMERS_IS_CI: yes resource_class: xlarge parallelism: 1 steps: @@ -85,7 +86,7 @@ jobs: key: v0.4-{{ checksum "setup.py" }} paths: - '~/.cache/pip' - - run: RUN_PT_TF_CROSS_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_torch_and_tf ./tests/ -m is_pt_tf_cross_test --durations=0 | tee tests_output.txt + - run: python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_torch_and_tf ./tests/ -m is_pt_tf_cross_test --durations=0 | tee tests_output.txt - store_artifacts: path: ~/transformers/tests_output.txt - store_artifacts: @@ -97,6 +98,8 @@ jobs: - image: circleci/python:3.6 environment: OMP_NUM_THREADS: 1 + RUN_PT_FLAX_CROSS_TESTS: yes + TRANSFORMERS_IS_CI: yes resource_class: xlarge parallelism: 1 steps: @@ -113,7 +116,7 @@ jobs: key: v0.4-{{ checksum "setup.py" }} paths: - '~/.cache/pip' - - run: RUN_PT_FLAX_CROSS_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_torch_and_flax ./tests/ -m is_pt_flax_cross_test --durations=0 | tee tests_output.txt + - run: python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_torch_and_flax ./tests/ -m is_pt_flax_cross_test --durations=0 | tee tests_output.txt - store_artifacts: path: ~/transformers/tests_output.txt - store_artifacts: @@ -125,6 +128,7 @@ jobs: - image: circleci/python:3.7 environment: OMP_NUM_THREADS: 1 + TRANSFORMERS_IS_CI: yes resource_class: xlarge parallelism: 1 steps: @@ -153,6 +157,7 @@ jobs: - image: circleci/python:3.7 environment: OMP_NUM_THREADS: 1 + TRANSFORMERS_IS_CI: yes resource_class: xlarge parallelism: 1 steps: @@ -179,6 +184,7 @@ jobs: - image: circleci/python:3.7 environment: OMP_NUM_THREADS: 1 + TRANSFORMERS_IS_CI: yes resource_class: xlarge parallelism: 1 steps: @@ -205,6 +211,8 @@ jobs: - image: circleci/python:3.7 environment: OMP_NUM_THREADS: 1 + RUN_PIPELINE_TESTS: yes + TRANSFORMERS_IS_CI: yes resource_class: xlarge parallelism: 1 steps: @@ -221,7 +229,7 @@ jobs: key: v0.4-torch-{{ checksum "setup.py" }} paths: - '~/.cache/pip' - - run: TRANSFORMERS_IS_CI=1 RUN_PIPELINE_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_pipelines_torch -m is_pipeline_test ./tests/ | tee tests_output.txt + - run: python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_pipelines_torch -m is_pipeline_test ./tests/ | tee tests_output.txt - store_artifacts: path: ~/transformers/tests_output.txt - store_artifacts: @@ -233,6 +241,8 @@ jobs: - image: circleci/python:3.7 environment: OMP_NUM_THREADS: 1 + RUN_PIPELINE_TESTS: yes + TRANSFORMERS_IS_CI: yes resource_class: xlarge parallelism: 1 steps: @@ -247,7 +257,7 @@ jobs: key: v0.4-tf-{{ checksum "setup.py" }} paths: - '~/.cache/pip' - - run: TRANSFORMERS_IS_CI=1 RUN_PIPELINE_TESTS=1 python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_pipelines_tf ./tests/ -m is_pipeline_test | tee tests_output.txt + - run: python -m pytest -n 8 --dist=loadfile -rA -s --make-reports=tests_pipelines_tf ./tests/ -m is_pipeline_test | tee tests_output.txt - store_artifacts: path: ~/transformers/tests_output.txt - store_artifacts: @@ -259,6 +269,7 @@ jobs: - image: circleci/python:3.7 environment: RUN_CUSTOM_TOKENIZERS: yes + TRANSFORMERS_IS_CI: yes steps: - checkout - restore_cache: @@ -272,7 +283,7 @@ jobs: key: v0.4-custom_tokenizers-{{ checksum "setup.py" }} paths: - '~/.cache/pip' - - run: TRANSFORMERS_IS_CI=1 python -m pytest -s --make-reports=tests_custom_tokenizers ./tests/test_tokenization_bert_japanese.py | tee tests_output.txt + - run: python -m pytest -s --make-reports=tests_custom_tokenizers ./tests/test_tokenization_bert_japanese.py | tee tests_output.txt - store_artifacts: path: ~/transformers/tests_output.txt - store_artifacts: @@ -284,6 +295,7 @@ jobs: - image: circleci/python:3.6 environment: OMP_NUM_THREADS: 1 + TRANSFORMERS_IS_CI: yes resource_class: xlarge parallelism: 1 steps: @@ -309,6 +321,9 @@ jobs: working_directory: ~/transformers docker: - image: circleci/python:3.7 + environment: + RUN_GIT_LFS_TESTS: yes + TRANSFORMERS_IS_CI: yes resource_class: xlarge parallelism: 1 steps: @@ -319,7 +334,7 @@ jobs: git config --global user.name "ci" - run: pip install --upgrade pip - run: pip install .[testing] - - run: TRANSFORMERS_IS_CI=1 RUN_GIT_LFS_TESTS=1 python -m pytest -sv ./tests/test_hf_api.py -k "HfLargefilesTest" + - run: python -m pytest -sv ./tests/test_hf_api.py -k "HfLargefilesTest" build_doc: working_directory: ~/transformers @@ -408,6 +423,7 @@ jobs: - image: circleci/python:3.6 environment: OMP_NUM_THREADS: 1 + TRANSFORMERS_IS_CI: yes resource_class: xlarge parallelism: 1 steps: