From df5e4232f59e6fea08911eddd0adc965d1b59c15 Mon Sep 17 00:00:00 2001 From: Yassine Date: Mon, 1 Aug 2022 21:32:12 +0200 Subject: [PATCH] fix: create a copy for tokenizer object (#18408) --- src/transformers/tokenization_utils_fast.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/transformers/tokenization_utils_fast.py b/src/transformers/tokenization_utils_fast.py index cdb606e7c60d94..a061685b0bf1b7 100644 --- a/src/transformers/tokenization_utils_fast.py +++ b/src/transformers/tokenization_utils_fast.py @@ -16,6 +16,7 @@ Tokenization classes for fast tokenizers (provided by HuggingFace's tokenizers library). For slow (python) tokenizers see tokenization_utils.py """ +import copy import json import os from collections import defaultdict @@ -104,7 +105,7 @@ def __init__(self, *args, **kwargs): ) if tokenizer_object is not None: - fast_tokenizer = tokenizer_object + fast_tokenizer = copy.deepcopy(tokenizer_object) elif fast_tokenizer_file is not None and not from_slow: # We have a serialization from tokenizers which let us directly build the backend fast_tokenizer = TokenizerFast.from_file(fast_tokenizer_file)