Fix code style

PaddlePaddle · Nov 16, 2022 · a9d3d07 · a9d3d07
1 parent 430729c
commit a9d3d07
Show file tree

Hide file tree

Showing 2 changed files with 20 additions and 8 deletions.
diff --git a/fast_tokenizer/README.md b/fast_tokenizer/README.md
@@ -70,6 +70,13 @@ print("type_ids: ", output.type_ids)
 print("tokens: ", output.tokens)
 print("offsets: ", output.offsets)
 print("attention_mask: ", output.attention_mask)
+
+# 5. 示例输出
+# ids:  [1, 75, 329, 12, 20, 2]
+# type_ids:  [0, 0, 0, 0, 0, 0]
+# tokens:  ['[CLS]', '我', '爱', '中', '国', '[SEP]']
+# offsets:  [(0, 0), (0, 1), (1, 2), (2, 3), (3, 4), (0, 0)]
+# attention_mask:  [1, 1, 1, 1, 1, 1]
 ```
 
 ### FastTokenizer在PaddleNLP Tokenizer模块加速示例
@@ -89,6 +96,11 @@ text2 = fast_tokenizer('自然语言处理')
 
 print(text1)
 print(text2)
+
+# 示例输出
+# {'input_ids': [1, 67, 187, 405, 545, 239, 38, 2], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0]}
+# {'input_ids': [1, 67, 187, 405, 545, 239, 38, 2], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0]}
+
 ```
 
 目前PaddleNLP已支持BERT、ERNIE、TinyBERT以及ERNIE-M 4种Tokenizer的Fast版本，其余模型的Tokenizer暂不支持Fast版本。

diff --git a/paddlenlp/transformers/tokenizer_utils_base.py b/paddlenlp/transformers/tokenizer_utils_base.py
@@ -545,8 +545,8 @@ def token_to_chars(self,
         else:
             batch_index = 0
             token_index = batch_or_token_index
-        return CharSpan(*(
-            self._encodings[batch_index].token_to_chars(token_index)))
+        return CharSpan(
+            *(self._encodings[batch_index].token_to_chars(token_index)))
 
     def char_to_token(self,
                       batch_or_char_index: int,
@@ -1713,7 +1713,7 @@ def save_pretrained(self,
         `tokenizer_config_file` indicating file (thus `tokenizer_config.json`),
         and resources would be saved into `resource_files_names` indicating files
         by using `self.save_resources(save_directory)`.
-        
+
         The `save_directory` can be used in `from_pretrained` as argument value
         of `pretrained_model_name_or_path` to re-load the tokenizer.
 
@@ -2116,7 +2116,7 @@ def __call__(self,
                 Decide the format for returned encoded batch inputs. Only works when
                 input is a batch of data.
                 ::
-                    - If True, encoded inputs would be a dictionary like: 
+                    - If True, encoded inputs would be a dictionary like:
                         {'input_ids': [[1, 4444, 4385, 1545, 6712],[1, 4444, 4385]],
                         'token_type_ids': [[0, 0, 0, 0, 0], [0, 0, 0]]}
                     - If False, encoded inputs would be a list like:
@@ -2126,9 +2126,9 @@ def __call__(self,
 
                 Defaults to `True`.
             return_offsets_mapping (bool, optional):
-                Whether to include the list of pair preserving the index of start 
+                Whether to include the list of pair preserving the index of start
                 and end char in original input for each token in the returned
-                dictionary. Would be automatically set to `True` when `stride` > 0. 
+                dictionary. Would be automatically set to `True` when `stride` > 0.
                 Defaults to `False`.
             add_special_tokens (bool, optional):
                 Whether to add the special tokens associated with the corresponding model
@@ -2145,7 +2145,7 @@ def __call__(self,
                 Defaults to `None`.
             verbose (bool, optional):
                 Whether or not to print more information and warnings. Defaults to True.
-                 
+
         Returns:
             dict or list[dict] (for batch input):
                 The dict has the following optional items:
@@ -2171,7 +2171,7 @@ def __call__(self,
                   Included when `return_special_tokens_mask` is `True`.
                 - **offset_mapping** (list[int], optional): list of pair preserving the
                   index of start and end char in original input for each token.
-                  For a sqecial token, the index pair is `(0, 0)`. Included when 
+                  For a sqecial token, the index pair is `(0, 0)`. Included when
                   `return_overflowing_tokens` is True or `stride` > 0.
                 - **overflow_to_sample** (int or list[int], optional): Index of example from which this
                   feature is generated. Included when `stride` works.