From 85d75543861429a9b3ee74e4f95527c11cb56497 Mon Sep 17 00:00:00 2001
From: Ceyda Cinarel <snu-ceyda@users.noreply.github.com>
Date: Wed, 22 Jul 2020 19:35:55 +0900
Subject: [PATCH 01/19] Bug fix: NER pipeline shouldn't group separate entities
 of same type

---
 src/transformers/pipelines.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/transformers/pipelines.py b/src/transformers/pipelines.py
index eb0e60a3a412..e82f4a957005 100755
--- a/src/transformers/pipelines.py
+++ b/src/transformers/pipelines.py
@@ -1114,6 +1114,7 @@ def group_entities(self, entities: List[dict]) -> List[dict]:
             # The split is meant to account for the "B" and "I" suffixes
             if (
                 entity["entity"].split("-")[-1] == entity_group_disagg[-1]["entity"].split("-")[-1]
+                and entity["entity"].split("-")[0] != 'B'
                 and entity["index"] == entity_group_disagg[-1]["index"] + 1
             ):
                 entity_group_disagg += [entity]

From 31176c0bfad7e296bcb10827a2f5fc4a698c2000 Mon Sep 17 00:00:00 2001
From: Ceyda Cinarel <snu-ceyda@users.noreply.github.com>
Date: Wed, 22 Jul 2020 20:26:07 +0900
Subject: [PATCH 02/19] style fix

---
 src/transformers/pipelines.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/transformers/pipelines.py b/src/transformers/pipelines.py
index e82f4a957005..f32fc76693b2 100755
--- a/src/transformers/pipelines.py
+++ b/src/transformers/pipelines.py
@@ -1114,7 +1114,7 @@ def group_entities(self, entities: List[dict]) -> List[dict]:
             # The split is meant to account for the "B" and "I" suffixes
             if (
                 entity["entity"].split("-")[-1] == entity_group_disagg[-1]["entity"].split("-")[-1]
-                and entity["entity"].split("-")[0] != 'B'
+                and entity["entity"].split("-")[0] != "B"
                 and entity["index"] == entity_group_disagg[-1]["index"] + 1
             ):
                 entity_group_disagg += [entity]

From 590ed80bd6915d789bd618ea36344546be494aee Mon Sep 17 00:00:00 2001
From: Ceyda Cinarel <snu-ceyda@users.noreply.github.com>
Date: Wed, 22 Jul 2020 21:43:00 +0900
Subject: [PATCH 03/19] [Bug Fix] Shouldn't group entities that are both 'B'
 even if they are same type 	(B-type1 B-type1) != (B-type1 I-type1) [Bug
 Fix] add an option `ignore_subwords` to ignore subsequent ##wordpieces in
 predictions. Because some models train on only the first token of a word and
 not on the subsequent wordpieces (BERT NER default). So it makes sense doing
 the same thing at inference time. 	The simplest fix is to just group the
 subwords with the first wordpiece. 	[TODO] how to handle ignored scores?
 just set them to 0 and calculate zero invariant mean ? 	[TODO] handle
 different wordpiece_prefix ## ? possible approaches: 		get it from
 tokenizer? but currently most tokenizers dont have a wordpiece_prefix
 property? 		have an _is_subword(token) [Feature add] added option
 to `skip_special_tokens`. Cause It was harder to remove them after grouping.
 [Additional Changes] remove B/I prefix on returned grouped_entities [Feature
 Request/TODO] Return indexes? [Bug TODO]  can't use fast tokenizer with
 grouped_entities ('BertTokenizerFast' object has no attribute
 'convert_tokens_to_string')

---
 src/transformers/pipelines.py | 42 ++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 11 deletions(-)

diff --git a/src/transformers/pipelines.py b/src/transformers/pipelines.py
index f32fc76693b2..c682af1e4251 100755
--- a/src/transformers/pipelines.py
+++ b/src/transformers/pipelines.py
@@ -998,6 +998,8 @@ def __init__(
         ignore_labels=["O"],
         task: str = "",
         grouped_entities: bool = False,
+        skip_special_tokens: bool = False,
+        ignore_subwords: bool = False,
     ):
         super().__init__(
             model=model,
@@ -1019,6 +1021,8 @@ def __init__(
         self._basic_tokenizer = BasicTokenizer(do_lower_case=False)
         self.ignore_labels = ignore_labels
         self.grouped_entities = grouped_entities
+        self.skip_special_tokens = skip_special_tokens
+        self.ignore_subwords = ignore_subwords
 
     def __call__(self, *args, **kwargs):
         inputs = self._args_parser(*args, **kwargs)
@@ -1054,15 +1058,18 @@ def __call__(self, *args, **kwargs):
             ]
 
             for idx, label_idx in filtered_labels_idx:
+                word = self.tokenizer.convert_ids_to_tokens(
+                    [int(input_ids[idx])], skip_special_tokens=self.skip_special_tokens
+                )
+                if word:
+                    entity = {
+                        "word": word[0],
+                        "score": score[idx][label_idx].item(),
+                        "entity": self.model.config.id2label[label_idx],
+                        "index": idx,
+                    }
 
-                entity = {
-                    "word": self.tokenizer.convert_ids_to_tokens(int(input_ids[idx])),
-                    "score": score[idx][label_idx].item(),
-                    "entity": self.model.config.id2label[label_idx],
-                    "index": idx,
-                }
-
-                entities += [entity]
+                    entities += [entity]
 
             # Append grouped entities
             if self.grouped_entities:
@@ -1080,8 +1087,8 @@ def group_sub_entities(self, entities: List[dict]) -> dict:
         Returns grouped sub entities
         """
         # Get the first entity in the entity group
-        entity = entities[0]["entity"]
-        scores = np.mean([entity["score"] for entity in entities])
+        entity = entities[0]["entity"].split("-")[-1]
+        scores = np.nanmean([entity["score"] for entity in entities])
         tokens = [entity["word"] for entity in entities]
 
         entity_group = {
@@ -1096,6 +1103,11 @@ def group_entities(self, entities: List[dict]) -> List[dict]:
         Returns grouped entities
         """
 
+        def is_subword(token: str) -> bool:
+            if token.startswith("##"):
+                return True
+            return False
+
         entity_groups = []
         entity_group_disagg = []
 
@@ -1103,7 +1115,9 @@ def group_entities(self, entities: List[dict]) -> List[dict]:
             last_idx = entities[-1]["index"]
 
         for entity in entities:
+
             is_last_idx = entity["index"] == last_idx
+            is_subword = self.ignore_subwords and is_subword(entity["word"])
             if not entity_group_disagg:
                 entity_group_disagg += [entity]
                 if is_last_idx:
@@ -1112,11 +1126,17 @@ def group_entities(self, entities: List[dict]) -> List[dict]:
 
             # If the current entity is similar and adjacent to the previous entity, append it to the disaggregated entity group
             # The split is meant to account for the "B" and "I" suffixes
+            # Shouldn't merge if both entities are B-type
             if (
                 entity["entity"].split("-")[-1] == entity_group_disagg[-1]["entity"].split("-")[-1]
                 and entity["entity"].split("-")[0] != "B"
                 and entity["index"] == entity_group_disagg[-1]["index"] + 1
-            ):
+            ) or is_subword:
+                # Modify subword type to be previous_type
+                if is_subword:
+                    entity["entity"] = entity_group_disagg[-1]["entity"].split("-")[-1]
+                    entity["score"] = np.nan  # Handle ignored scores as 0/nan?
+                    # How to handle index?
                 entity_group_disagg += [entity]
                 # Group the entities at the last entity
                 if is_last_idx:

From 56860f7af35e78dceee0a61dc685e8f90b7a2ad6 Mon Sep 17 00:00:00 2001
From: Ceyda Cinarel <snu-ceyda@users.noreply.github.com>
Date: Mon, 14 Sep 2020 19:43:17 +0900
Subject: [PATCH 04/19] use offset_mapping to fix [UNK] token problem

---
 src/transformers/pipelines.py | 73 ++++++++++++++++++++++++++---------
 1 file changed, 54 insertions(+), 19 deletions(-)

diff --git a/src/transformers/pipelines.py b/src/transformers/pipelines.py
index c682af1e4251..11ec56971eb0 100755
--- a/src/transformers/pipelines.py
+++ b/src/transformers/pipelines.py
@@ -998,7 +998,6 @@ def __init__(
         ignore_labels=["O"],
         task: str = "",
         grouped_entities: bool = False,
-        skip_special_tokens: bool = False,
         ignore_subwords: bool = False,
     ):
         super().__init__(
@@ -1021,20 +1020,30 @@ def __init__(
         self._basic_tokenizer = BasicTokenizer(do_lower_case=False)
         self.ignore_labels = ignore_labels
         self.grouped_entities = grouped_entities
-        self.skip_special_tokens = skip_special_tokens
         self.ignore_subwords = ignore_subwords
 
     def __call__(self, *args, **kwargs):
         inputs = self._args_parser(*args, **kwargs)
         answers = []
-        for sentence in inputs:
+
+        for i, sentence in enumerate(inputs):
+            if "offset_mapping" in kwargs:
+                offset_mapping = kwargs["offset_mapping"][i]
 
             # Manage correct placement of the tensors
             with self.device_placement():
 
                 tokens = self.tokenizer(
-                    sentence, return_attention_mask=False, return_tensors=self.framework, truncation=True,
+                    sentence,
+                    return_attention_mask=False,
+                    return_tensors=self.framework,
+                    truncation=True,
+                    return_special_tokens_mask=True,
+                    return_offsets_mapping=self.tokenizer.is_fast,
                 )
+                if "offset_mapping" in tokens:
+                    offset_mapping = tokens["offset_mapping"].cpu().numpy()
+                    del tokens["offset_mapping"]
 
                 # Forward
                 if self.framework == "tf":
@@ -1042,6 +1051,8 @@ def __call__(self, *args, **kwargs):
                     input_ids = tokens["input_ids"].numpy()[0]
                 else:
                     with torch.no_grad():
+                        special_tokens_mask = tokens["special_tokens_mask"].cpu().numpy()[0]
+                        del tokens["special_tokens_mask"]
                         tokens = self.ensure_tensor_on_device(**tokens)
                         entities = self.model(**tokens)[0][0].cpu().numpy()
                         input_ids = tokens["input_ids"].cpu().numpy()[0]
@@ -1058,12 +1069,19 @@ def __call__(self, *args, **kwargs):
             ]
 
             for idx, label_idx in filtered_labels_idx:
-                word = self.tokenizer.convert_ids_to_tokens(
-                    [int(input_ids[idx])], skip_special_tokens=self.skip_special_tokens
-                )
-                if word:
+
+                if not special_tokens_mask[idx]:
+                    if int(input_ids[idx]) == self.tokenizer.unk_token_id:
+                        if offset_mapping is not None:
+                            start_ind, end_ind = offset_mapping[idx]
+                            word = sentence[start_ind:end_ind]
+                        else:
+                            raise Exception("Use a fast tokenizer or provide offset_mapping parameter")
+                    else:
+                        word = self.tokenizer.convert_ids_to_tokens([int(input_ids[idx])])[0]
+
                     entity = {
-                        "word": word[0],
+                        "word": word,
                         "score": score[idx][label_idx].item(),
                         "entity": self.model.config.id2label[label_idx],
                         "index": idx,
@@ -1094,30 +1112,44 @@ def group_sub_entities(self, entities: List[dict]) -> dict:
         entity_group = {
             "entity_group": entity,
             "score": np.mean(scores),
-            "word": self.tokenizer.convert_tokens_to_string(tokens),
+            "word": self.convert_tokens_to_string(tokens),
         }
         return entity_group
 
+    def is_subword_fn(self, token: str) -> bool:
+        if token.startswith("##"):
+            return True
+        return False
+
+    def convert_tokens_to_string(self, tokens):
+        """ Converts a sequence of tokens (string) in a single string. """
+        if hasattr(self.tokenizer, "convert_tokens_to_string"):
+            # fast tokenizers dont have convert_tokens_to_string?!
+            return self.tokenizer.convert_tokens_to_string(tokens)
+        else:
+            out_string = " ".join(tokens).replace(" ##", "").strip()
+            return out_string
+
     def group_entities(self, entities: List[dict]) -> List[dict]:
         """
         Returns grouped entities
         """
 
-        def is_subword(token: str) -> bool:
-            if token.startswith("##"):
-                return True
-            return False
-
         entity_groups = []
         entity_group_disagg = []
 
+        if hasattr(self.tokenizer, "is_subword_fn"):
+            is_subword_fn = self.tokenizer.is_subword_fn
+        else:
+            is_subword_fn = self.is_subword_fn
+
         if entities:
             last_idx = entities[-1]["index"]
 
         for entity in entities:
 
             is_last_idx = entity["index"] == last_idx
-            is_subword = self.ignore_subwords and is_subword(entity["word"])
+            is_subword = self.ignore_subwords and is_subword_fn(entity["word"])
             if not entity_group_disagg:
                 entity_group_disagg += [entity]
                 if is_last_idx:
@@ -1128,14 +1160,17 @@ def is_subword(token: str) -> bool:
             # The split is meant to account for the "B" and "I" suffixes
             # Shouldn't merge if both entities are B-type
             if (
-                entity["entity"].split("-")[-1] == entity_group_disagg[-1]["entity"].split("-")[-1]
-                and entity["entity"].split("-")[0] != "B"
+                (
+                    (entity["entity"].split("-")[-1] == entity_group_disagg[-1]["entity"].split("-")[-1])
+                    and entity["entity"].split("-")[0] != "B"
+                )
                 and entity["index"] == entity_group_disagg[-1]["index"] + 1
             ) or is_subword:
                 # Modify subword type to be previous_type
                 if is_subword:
                     entity["entity"] = entity_group_disagg[-1]["entity"].split("-")[-1]
-                    entity["score"] = np.nan  # Handle ignored scores as 0/nan?
+                    entity["entity"] = None  # and use np.nanmean
+                    # How to handle scores? 0?
                     # How to handle index?
                 entity_group_disagg += [entity]
                 # Group the entities at the last entity

From 22d21cb7874e85365298fa1cca9e0160c5fa8e45 Mon Sep 17 00:00:00 2001
From: Ceyda Cinarel <snu-ceyda@users.noreply.github.com>
Date: Mon, 14 Sep 2020 20:20:43 +0900
Subject: [PATCH 05/19] ignore score for subwords

---
 src/transformers/pipelines.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/transformers/pipelines.py b/src/transformers/pipelines.py
index 11ec56971eb0..7cfede6c7a08 100755
--- a/src/transformers/pipelines.py
+++ b/src/transformers/pipelines.py
@@ -1169,7 +1169,7 @@ def group_entities(self, entities: List[dict]) -> List[dict]:
                 # Modify subword type to be previous_type
                 if is_subword:
                     entity["entity"] = entity_group_disagg[-1]["entity"].split("-")[-1]
-                    entity["entity"] = None  # and use np.nanmean
+                    entity["score"] = np.nan  # and use np.nanmean
                     # How to handle scores? 0?
                     # How to handle index?
                 entity_group_disagg += [entity]

From 77f93e173a894053fd0411d95c868b52f7c015f7 Mon Sep 17 00:00:00 2001
From: Ceyda Cinarel <snu-ceyda@users.noreply.github.com>
Date: Wed, 16 Sep 2020 19:03:01 +0900
Subject: [PATCH 06/19] modify ner_pipeline test

---
 tests/test_pipelines.py | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py
index 7551350c43c5..2d0b377f8991 100644
--- a/tests/test_pipelines.py
+++ b/tests/test_pipelines.py
@@ -1,11 +1,10 @@
 import unittest
 from typing import Iterable, List, Optional
 
-from transformers import pipeline
+from transformers import pipeline, AutoTokenizer
 from transformers.pipelines import SUPPORTED_TASKS, Conversation, DefaultArgumentHandler, Pipeline
 from transformers.testing_utils import require_tf, require_torch, slow, torch_device
 
-
 DEFAULT_DEVICE_NUM = -1 if torch_device == "cpu" else 0
 VALID_INPUTS = ["A simple string", ["list of strings"]]
 
@@ -738,13 +737,13 @@ def _test_ner_pipeline(
         ]
         expected_grouped_ner_results = [
             [
-                {"entity_group": "B-PER", "score": 0.9710702640669686, "word": "Consuelo Araújo Noguera"},
-                {"entity_group": "B-PER", "score": 0.9997273534536362, "word": "Andrés Pastrana"},
-                {"entity_group": "B-ORG", "score": 0.8589080572128296, "word": "Farc"},
+                {"entity_group": "PER", "score": 0.9710702640669686, "word": "Consuelo Araújo Noguera"},
+                {"entity_group": "PER", "score": 0.9997273534536362, "word": "Andrés Pastrana"},
+                {"entity_group": "ORG", "score": 0.8589080572128296, "word": "Farc"},
             ],
             [
-                {"entity_group": "I-PER", "score": 0.9962901175022125, "word": "Enzo"},
-                {"entity_group": "I-ORG", "score": 0.9986497163772583, "word": "UN"},
+                {"entity_group": "PER", "score": 0.9962901175022125, "word": "Enzo"},
+                {"entity_group": "ORG", "score": 0.9986497163772583, "word": "UN"},
             ],
         ]
 
@@ -778,6 +777,7 @@ def _test_ner_pipeline(
     def test_torch_ner(self):
         mandatory_keys = {"entity", "word", "score"}
         for model_name in NER_FINETUNED_MODELS:
+            tokenizer = AutoTokenizer.from_pretrained(model_name, is_fast=True)
             nlp = pipeline(task="ner", model=model_name, tokenizer=model_name)
             self._test_ner_pipeline(nlp, mandatory_keys)
 
@@ -785,13 +785,17 @@ def test_torch_ner(self):
     def test_ner_grouped(self):
         mandatory_keys = {"entity_group", "word", "score"}
         for model_name in NER_FINETUNED_MODELS:
-            nlp = pipeline(task="ner", model=model_name, tokenizer=model_name, grouped_entities=True)
+            tokenizer = AutoTokenizer.from_pretrained(model_name, is_fast=True)
+            nlp = pipeline(
+                task="ner", model=model_name, tokenizer=tokenizer, grouped_entities=True, ignore_subwords=True
+            )
             self._test_ner_pipeline(nlp, mandatory_keys)
 
     @require_tf
     def test_tf_ner(self):
         mandatory_keys = {"entity", "word", "score"}
         for model_name in NER_FINETUNED_MODELS:
+            tokenizer = AutoTokenizer.from_pretrained(model_name, is_fast=True)
             nlp = pipeline(task="ner", model=model_name, tokenizer=model_name, framework="tf")
             self._test_ner_pipeline(nlp, mandatory_keys)
 
@@ -799,7 +803,15 @@ def test_tf_ner(self):
     def test_tf_ner_grouped(self):
         mandatory_keys = {"entity_group", "word", "score"}
         for model_name in NER_FINETUNED_MODELS:
-            nlp = pipeline(task="ner", model=model_name, tokenizer=model_name, framework="tf", grouped_entities=True)
+            tokenizer = AutoTokenizer.from_pretrained(model_name, is_fast=True)
+            nlp = pipeline(
+                task="ner",
+                model=model_name,
+                tokenizer=tokenizer,
+                framework="tf",
+                grouped_entities=True,
+                ignore_subwords=True,
+            )
             self._test_ner_pipeline(nlp, mandatory_keys)
 
 

From 87c327eb62a18dd377ef4cfb7cb34955f223910f Mon Sep 17 00:00:00 2001
From: Ceyda Cinarel <snu-ceyda@users.noreply.github.com>
Date: Wed, 16 Sep 2020 19:23:55 +0900
Subject: [PATCH 07/19] modify ner_pipeline test

---
 tests/test_pipelines.py | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py
index 2d0b377f8991..dc85764cf015 100644
--- a/tests/test_pipelines.py
+++ b/tests/test_pipelines.py
@@ -1,10 +1,11 @@
 import unittest
 from typing import Iterable, List, Optional
 
-from transformers import pipeline, AutoTokenizer
+from transformers import pipeline
 from transformers.pipelines import SUPPORTED_TASKS, Conversation, DefaultArgumentHandler, Pipeline
 from transformers.testing_utils import require_tf, require_torch, slow, torch_device
 
+
 DEFAULT_DEVICE_NUM = -1 if torch_device == "cpu" else 0
 VALID_INPUTS = ["A simple string", ["list of strings"]]
 
@@ -737,13 +738,13 @@ def _test_ner_pipeline(
         ]
         expected_grouped_ner_results = [
             [
-                {"entity_group": "PER", "score": 0.9710702640669686, "word": "Consuelo Araújo Noguera"},
-                {"entity_group": "PER", "score": 0.9997273534536362, "word": "Andrés Pastrana"},
-                {"entity_group": "ORG", "score": 0.8589080572128296, "word": "Farc"},
+                {"entity_group": "B-PER", "score": 0.9710702640669686, "word": "Consuelo Araújo Noguera"},
+                {"entity_group": "B-PER", "score": 0.9997273534536362, "word": "Andrés Pastrana"},
+                {"entity_group": "B-ORG", "score": 0.8589080572128296, "word": "Farc"},
             ],
             [
-                {"entity_group": "PER", "score": 0.9962901175022125, "word": "Enzo"},
-                {"entity_group": "ORG", "score": 0.9986497163772583, "word": "UN"},
+                {"entity_group": "I-PER", "score": 0.9962901175022125, "word": "Enzo"},
+                {"entity_group": "I-ORG", "score": 0.9986497163772583, "word": "UN"},
             ],
         ]
 
@@ -777,7 +778,6 @@ def _test_ner_pipeline(
     def test_torch_ner(self):
         mandatory_keys = {"entity", "word", "score"}
         for model_name in NER_FINETUNED_MODELS:
-            tokenizer = AutoTokenizer.from_pretrained(model_name, is_fast=True)
             nlp = pipeline(task="ner", model=model_name, tokenizer=model_name)
             self._test_ner_pipeline(nlp, mandatory_keys)
 
@@ -785,9 +785,8 @@ def test_torch_ner(self):
     def test_ner_grouped(self):
         mandatory_keys = {"entity_group", "word", "score"}
         for model_name in NER_FINETUNED_MODELS:
-            tokenizer = AutoTokenizer.from_pretrained(model_name, is_fast=True)
             nlp = pipeline(
-                task="ner", model=model_name, tokenizer=tokenizer, grouped_entities=True, ignore_subwords=True
+                task="ner", model=model_name, tokenizer=model_name, grouped_entities=True, ignore_subwords=True
             )
             self._test_ner_pipeline(nlp, mandatory_keys)
 
@@ -795,7 +794,6 @@ def test_ner_grouped(self):
     def test_tf_ner(self):
         mandatory_keys = {"entity", "word", "score"}
         for model_name in NER_FINETUNED_MODELS:
-            tokenizer = AutoTokenizer.from_pretrained(model_name, is_fast=True)
             nlp = pipeline(task="ner", model=model_name, tokenizer=model_name, framework="tf")
             self._test_ner_pipeline(nlp, mandatory_keys)
 
@@ -803,11 +801,10 @@ def test_tf_ner(self):
     def test_tf_ner_grouped(self):
         mandatory_keys = {"entity_group", "word", "score"}
         for model_name in NER_FINETUNED_MODELS:
-            tokenizer = AutoTokenizer.from_pretrained(model_name, is_fast=True)
             nlp = pipeline(
                 task="ner",
                 model=model_name,
-                tokenizer=tokenizer,
+                tokenizer=model_name,
                 framework="tf",
                 grouped_entities=True,
                 ignore_subwords=True,

From 456451a1cb0b1ce11bd734d282daeff83146337c Mon Sep 17 00:00:00 2001
From: Ceyda Cinarel <snu-ceyda@users.noreply.github.com>
Date: Wed, 16 Sep 2020 20:14:15 +0900
Subject: [PATCH 08/19] modify ner_pipeline test

---
 src/transformers/pipelines.py | 39 +++++++++++++++++------------------
 tests/test_pipelines.py       | 10 ++++-----
 2 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/src/transformers/pipelines.py b/src/transformers/pipelines.py
index 48c7d42f1842..3c36b4ad6b22 100755
--- a/src/transformers/pipelines.py
+++ b/src/transformers/pipelines.py
@@ -1411,32 +1411,32 @@ def __call__(self, *args, **kwargs):
 
             entities = []
             # Filter to labels not in `self.ignore_labels`
+            # Filter special_tokens
             filtered_labels_idx = [
                 (idx, label_idx)
                 for idx, label_idx in enumerate(labels_idx)
-                if self.model.config.id2label[label_idx] not in self.ignore_labels
+                if (self.model.config.id2label[label_idx] not in self.ignore_labels) and not special_tokens_mask[idx]
             ]
 
             for idx, label_idx in filtered_labels_idx:
 
-                if not special_tokens_mask[idx]:
-                    if int(input_ids[idx]) == self.tokenizer.unk_token_id:
-                        if offset_mapping is not None:
-                            start_ind, end_ind = offset_mapping[idx]
-                            word = sentence[start_ind:end_ind]
-                        else:
-                            raise Exception("Use a fast tokenizer or provide offset_mapping parameter")
+                if int(input_ids[idx]) == self.tokenizer.unk_token_id:
+                    if offset_mapping is not None:
+                        start_ind, end_ind = offset_mapping[idx]
+                        word = sentence[start_ind:end_ind]
                     else:
-                        word = self.tokenizer.convert_ids_to_tokens([int(input_ids[idx])])[0]
+                        raise Exception("Use a fast tokenizer or provide offset_mapping parameter")
+                else:
+                    word = self.tokenizer.convert_ids_to_tokens([int(input_ids[idx])])[0]
 
-                    entity = {
-                        "word": word,
-                        "score": score[idx][label_idx].item(),
-                        "entity": self.model.config.id2label[label_idx],
-                        "index": idx,
-                    }
+                entity = {
+                    "word": word,
+                    "score": score[idx][label_idx].item(),
+                    "entity": self.model.config.id2label[label_idx],
+                    "index": idx,
+                }
 
-                    entities += [entity]
+                entities += [entity]
 
             # Append grouped entities
             if self.grouped_entities:
@@ -1516,7 +1516,7 @@ def group_entities(self, entities: List[dict]) -> List[dict]:
             # Shouldn't merge if both entities are B-type
             if (
                 (
-                    (entity["entity"].split("-")[-1] == entity_group_disagg[-1]["entity"].split("-")[-1])
+                    entity["entity"].split("-")[-1] == entity_group_disagg[-1]["entity"].split("-")[-1]
                     and entity["entity"].split("-")[0] != "B"
                 )
                 and entity["index"] == entity_group_disagg[-1]["index"] + 1
@@ -1524,9 +1524,8 @@ def group_entities(self, entities: List[dict]) -> List[dict]:
                 # Modify subword type to be previous_type
                 if is_subword:
                     entity["entity"] = entity_group_disagg[-1]["entity"].split("-")[-1]
-                    entity["score"] = np.nan  # and use np.nanmean
-                    # How to handle scores? 0?
-                    # How to handle index?
+                    entity["score"] = np.nan  # set ignored scores to nan and use np.nanmean
+
                 entity_group_disagg += [entity]
                 # Group the entities at the last entity
                 if is_last_idx:
diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py
index dc85764cf015..22301a539288 100644
--- a/tests/test_pipelines.py
+++ b/tests/test_pipelines.py
@@ -738,13 +738,13 @@ def _test_ner_pipeline(
         ]
         expected_grouped_ner_results = [
             [
-                {"entity_group": "B-PER", "score": 0.9710702640669686, "word": "Consuelo Araújo Noguera"},
-                {"entity_group": "B-PER", "score": 0.9997273534536362, "word": "Andrés Pastrana"},
-                {"entity_group": "B-ORG", "score": 0.8589080572128296, "word": "Farc"},
+                {"entity_group": "PER", "score": 0.999369223912557, "word": "Consuelo Araújo Noguera"},
+                {"entity_group": "PER", "score": 0.9997771680355072, "word": "Andrés Pastrana"},
+                {"entity_group": "ORG", "score": 0.9989739060401917, "word": "Farc"},
             ],
             [
-                {"entity_group": "I-PER", "score": 0.9962901175022125, "word": "Enzo"},
-                {"entity_group": "I-ORG", "score": 0.9986497163772583, "word": "UN"},
+                {"entity_group": "PER", "score": 0.9968166351318359, "word": "Enzo"},
+                {"entity_group": "ORG", "score": 0.9986497163772583, "word": "UN"},
             ],
         ]
 

From 99f7aadd9a069b5c4a5c964853db4a90dbe99d61 Mon Sep 17 00:00:00 2001
From: Ceyda Cinarel <snu-ceyda@users.noreply.github.com>
Date: Wed, 16 Sep 2020 20:36:18 +0900
Subject: [PATCH 09/19] ner_pipeline change ignore_subwords default to true

---
 src/transformers/pipelines.py | 2 +-
 tests/test_pipelines.py       | 8 +++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/transformers/pipelines.py b/src/transformers/pipelines.py
index 3c36b4ad6b22..745de139dc42 100755
--- a/src/transformers/pipelines.py
+++ b/src/transformers/pipelines.py
@@ -1329,7 +1329,7 @@ def __init__(
         ignore_labels=["O"],
         task: str = "",
         grouped_entities: bool = False,
-        ignore_subwords: bool = False,
+        ignore_subwords: bool = True,
     ):
         super().__init__(
             model=model,
diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py
index 22301a539288..f35cc85f9632 100644
--- a/tests/test_pipelines.py
+++ b/tests/test_pipelines.py
@@ -736,6 +736,7 @@ def _test_ner_pipeline(
                 {"entity": "I-ORG", "index": 7, "score": 0.9986497163772583, "word": "UN"},
             ],
         ]
+
         expected_grouped_ner_results = [
             [
                 {"entity_group": "PER", "score": 0.999369223912557, "word": "Consuelo Araújo Noguera"},
@@ -770,9 +771,10 @@ def _test_ner_pipeline(
         for result in multi_result:
             for key in output_keys:
                 self.assertIn(key, result)
-
-        for ungrouped_input, grouped_result in zip(ungrouped_ner_inputs, expected_grouped_ner_results):
-            self.assertEqual(nlp.group_entities(ungrouped_input), grouped_result)
+                
+        if nlp.grouped_entities:
+            for ungrouped_input, grouped_result in zip(ungrouped_ner_inputs, expected_grouped_ner_results):
+                self.assertEqual(nlp.group_entities(ungrouped_input), grouped_result)
 
     @require_torch
     def test_torch_ner(self):

From 188fc0b9cbe1cb6dda3781e214a9e50cf44d0b09 Mon Sep 17 00:00:00 2001
From: Ceyda Cinarel <snu-ceyda@users.noreply.github.com>
Date: Wed, 16 Sep 2020 21:14:42 +0900
Subject: [PATCH 10/19] add ner_pipeline ignore_subword=False test case

---
 tests/test_pipelines.py | 40 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 37 insertions(+), 3 deletions(-)

diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py
index f35cc85f9632..579bd12e002f 100644
--- a/tests/test_pipelines.py
+++ b/tests/test_pipelines.py
@@ -749,6 +749,19 @@ def _test_ner_pipeline(
             ],
         ]
 
+        expected_grouped_ner_results_w_subword = [
+            [
+                {"entity_group": "PER", "score": 0.9994944930076599, "word": "Cons"},
+                {"entity_group": "PER", "score": 0.9663328925768534, "word": "##uelo Araújo Noguera"},
+                {"entity_group": "PER", "score": 0.9997273534536362, "word": "Andrés Pastrana"},
+                {"entity_group": "ORG", "score": 0.8589080572128296, "word": "Farc"},
+            ],
+            [
+                {"entity_group": "PER", "score": 0.9962901175022125, "word": "Enzo"},
+                {"entity_group": "ORG", "score": 0.9986497163772583, "word": "UN"},
+            ],
+        ]
+
         self.assertIsNotNone(nlp)
 
         mono_result = nlp(VALID_INPUTS[0])
@@ -771,10 +784,16 @@ def _test_ner_pipeline(
         for result in multi_result:
             for key in output_keys:
                 self.assertIn(key, result)
-                
+
         if nlp.grouped_entities:
-            for ungrouped_input, grouped_result in zip(ungrouped_ner_inputs, expected_grouped_ner_results):
-                self.assertEqual(nlp.group_entities(ungrouped_input), grouped_result)
+            if nlp.ignore_subwords:
+                for ungrouped_input, grouped_result in zip(ungrouped_ner_inputs, expected_grouped_ner_results):
+                    self.assertEqual(nlp.group_entities(ungrouped_input), grouped_result)
+            else:
+                for ungrouped_input, grouped_result in zip(
+                    ungrouped_ner_inputs, expected_grouped_ner_results_w_subword
+                ):
+                    self.assertEqual(nlp.group_entities(ungrouped_input), grouped_result)
 
     @require_torch
     def test_torch_ner(self):
@@ -791,6 +810,11 @@ def test_ner_grouped(self):
                 task="ner", model=model_name, tokenizer=model_name, grouped_entities=True, ignore_subwords=True
             )
             self._test_ner_pipeline(nlp, mandatory_keys)
+        for model_name in NER_FINETUNED_MODELS:
+            nlp = pipeline(
+                task="ner", model=model_name, tokenizer=model_name, grouped_entities=True, ignore_subwords=False
+            )
+            self._test_ner_pipeline(nlp, mandatory_keys)
 
     @require_tf
     def test_tf_ner(self):
@@ -812,6 +836,16 @@ def test_tf_ner_grouped(self):
                 ignore_subwords=True,
             )
             self._test_ner_pipeline(nlp, mandatory_keys)
+        for model_name in NER_FINETUNED_MODELS:
+            nlp = pipeline(
+                task="ner",
+                model=model_name,
+                tokenizer=model_name,
+                framework="tf",
+                grouped_entities=True,
+                ignore_subwords=False,
+            )
+            self._test_ner_pipeline(nlp, mandatory_keys)
 
 
 class PipelineCommonTests(unittest.TestCase):

From b8d4b99129c4c2d9a086bd7e5865ef11f0a27230 Mon Sep 17 00:00:00 2001
From: Ceyda Cinarel <snu-ceyda@users.noreply.github.com>
Date: Mon, 21 Sep 2020 17:11:00 +0900
Subject: [PATCH 11/19] fix offset_mapping index

---
 src/transformers/pipelines.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/transformers/pipelines.py b/src/transformers/pipelines.py
index 745de139dc42..9492bb82aadb 100755
--- a/src/transformers/pipelines.py
+++ b/src/transformers/pipelines.py
@@ -1391,17 +1391,17 @@ def __call__(self, *args, **kwargs):
                     return_offsets_mapping=self.tokenizer.is_fast,
                 )
                 if "offset_mapping" in tokens:
-                    offset_mapping = tokens["offset_mapping"].cpu().numpy()
+                    offset_mapping = tokens["offset_mapping"].cpu().numpy()[0]
                     del tokens["offset_mapping"]
-
+                special_tokens_mask = tokens["special_tokens_mask"].cpu().numpy()[0]
+                del tokens["special_tokens_mask"]
+                
                 # Forward
                 if self.framework == "tf":
                     entities = self.model(tokens.data)[0][0].numpy()
                     input_ids = tokens["input_ids"].numpy()[0]
                 else:
                     with torch.no_grad():
-                        special_tokens_mask = tokens["special_tokens_mask"].cpu().numpy()[0]
-                        del tokens["special_tokens_mask"]
                         tokens = self.ensure_tensor_on_device(**tokens)
                         entities = self.model(**tokens)[0][0].cpu().numpy()
                         input_ids = tokens["input_ids"].cpu().numpy()[0]

From bd1c9bb2d7259d68c5f10a6b11a54fe9a0c13386 Mon Sep 17 00:00:00 2001
From: Ceyda Cinarel <snu-ceyda@users.noreply.github.com>
Date: Mon, 21 Sep 2020 17:21:57 +0900
Subject: [PATCH 12/19] fix style again duh

---
 src/transformers/pipelines.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/transformers/pipelines.py b/src/transformers/pipelines.py
index 9492bb82aadb..a9d0aad6c7c9 100755
--- a/src/transformers/pipelines.py
+++ b/src/transformers/pipelines.py
@@ -1395,7 +1395,7 @@ def __call__(self, *args, **kwargs):
                     del tokens["offset_mapping"]
                 special_tokens_mask = tokens["special_tokens_mask"].cpu().numpy()[0]
                 del tokens["special_tokens_mask"]
-                
+
                 # Forward
                 if self.framework == "tf":
                     entities = self.model(tokens.data)[0][0].numpy()

From 9221ca66e536dc42af152680043039160fcf319f Mon Sep 17 00:00:00 2001
From: Ceyda Cinarel <snu-ceyda@users.noreply.github.com>
Date: Mon, 26 Oct 2020 17:26:10 +0900
Subject: [PATCH 13/19] change is_subword and convert_tokens_to_string logic

---
 src/transformers/pipelines.py | 80 ++++++++++++++++++++---------------
 tests/test_pipelines.py       | 32 +++++++-------
 2 files changed, 62 insertions(+), 50 deletions(-)

diff --git a/src/transformers/pipelines.py b/src/transformers/pipelines.py
index 7b5701490b4d..49cefe0db5d7 100755
--- a/src/transformers/pipelines.py
+++ b/src/transformers/pipelines.py
@@ -1299,6 +1299,29 @@ def __call__(self, *args, targets=None, **kwargs):
         return results
 
 
+class TokenClassificationArgumentHandler(ArgumentHandler):
+    """
+    Handles arguments for token classification.
+    """
+
+    def __call__(self, *args, **kwargs):
+
+        if args is not None and len(args) > 0:
+            if isinstance(args, str):
+                inputs = [args]
+            else:
+                inputs = args
+            batch_size = len(inputs)
+
+        offset_mapping = kwargs.get("offset_mapping", None)
+        if offset_mapping:
+            if isinstance(offset_mapping, list) and isinstance(offset_mapping[0], tuple):
+                offset_mapping = [offset_mapping]
+            if len(offset_mapping) != batch_size:
+                raise ("offset_mapping should have the same batch size as the input")
+        return inputs, offset_mapping
+
+
 @add_end_docstrings(
     PIPELINE_INIT_ARGS,
     r"""
@@ -1343,7 +1366,7 @@ def __init__(
             tokenizer=tokenizer,
             modelcard=modelcard,
             framework=framework,
-            args_parser=args_parser,
+            args_parser=TokenClassificationArgumentHandler(),
             device=device,
             binary_output=binary_output,
             task=task,
@@ -1379,12 +1402,11 @@ def __call__(self, *args, **kwargs):
             - **index** (:obj:`int`, only present when ``self.grouped_entities=False``) -- The index of the
               corresponding token in the sentence.
         """
-        inputs = self._args_parser(*args, **kwargs)
+
+        inputs, offset_mappings = self._args_parser(*args, **kwargs)
         answers = []
 
         for i, sentence in enumerate(inputs):
-            if "offset_mapping" in kwargs:
-                offset_mapping = kwargs["offset_mapping"][i]
 
             # Manage correct placement of the tensors
             with self.device_placement():
@@ -1397,9 +1419,13 @@ def __call__(self, *args, **kwargs):
                     return_special_tokens_mask=True,
                     return_offsets_mapping=self.tokenizer.is_fast,
                 )
-                if "offset_mapping" in tokens:
+                if self.tokenizer.is_fast:
                     offset_mapping = tokens["offset_mapping"].cpu().numpy()[0]
                     del tokens["offset_mapping"]
+                elif offset_mappings:
+                    offset_mapping = offset_mappings[i]
+                else:
+                    raise Exception("To decode [UNK] tokens use a fast tokenizer or provide offset_mapping parameter")
                 special_tokens_mask = tokens["special_tokens_mask"].cpu().numpy()[0]
                 del tokens["special_tokens_mask"]
 
@@ -1426,15 +1452,14 @@ def __call__(self, *args, **kwargs):
             ]
 
             for idx, label_idx in filtered_labels_idx:
+                start_ind, end_ind = offset_mapping[idx]
+                word_ref = sentence[start_ind:end_ind]
+                word = self.tokenizer.convert_ids_to_tokens([int(input_ids[idx])])[0]
+                is_subword = len(word_ref) != len(word)
 
                 if int(input_ids[idx]) == self.tokenizer.unk_token_id:
-                    if offset_mapping is not None:
-                        start_ind, end_ind = offset_mapping[idx]
-                        word = sentence[start_ind:end_ind]
-                    else:
-                        raise Exception("Use a fast tokenizer or provide offset_mapping parameter")
-                else:
-                    word = self.tokenizer.convert_ids_to_tokens([int(input_ids[idx])])[0]
+                    word = word_ref
+                    is_subword = False
 
                 entity = {
                     "word": word,
@@ -1443,6 +1468,9 @@ def __call__(self, *args, **kwargs):
                     "index": idx,
                 }
 
+                if self.grouped_entities and self.ignore_subwords:
+                    entity["is_subword"] = is_subword
+
                 entities += [entity]
 
             # Append grouped entities
@@ -1467,28 +1495,17 @@ def group_sub_entities(self, entities: List[dict]) -> dict:
         entity = entities[0]["entity"].split("-")[-1]
         scores = np.nanmean([entity["score"] for entity in entities])
         tokens = [entity["word"] for entity in entities]
-
+        if self.tokenizer.is_fast:
+            word = self.tokenizer.decode(self.tokenizer.convert_tokens_to_ids(tokens))
+        else:
+            word = self.tokenizer.convert_tokens_to_string(tokens)
         entity_group = {
             "entity_group": entity,
             "score": np.mean(scores),
-            "word": self.convert_tokens_to_string(tokens),
+            "word": word,
         }
         return entity_group
 
-    def is_subword_fn(self, token: str) -> bool:
-        if token.startswith("##"):
-            return True
-        return False
-
-    def convert_tokens_to_string(self, tokens):
-        """ Converts a sequence of tokens (string) in a single string. """
-        if hasattr(self.tokenizer, "convert_tokens_to_string"):
-            # fast tokenizers dont have convert_tokens_to_string?!
-            return self.tokenizer.convert_tokens_to_string(tokens)
-        else:
-            out_string = " ".join(tokens).replace(" ##", "").strip()
-            return out_string
-
     def group_entities(self, entities: List[dict]) -> List[dict]:
         """
         Find and group together the adjacent tokens with the same entity predicted.
@@ -1500,18 +1517,13 @@ def group_entities(self, entities: List[dict]) -> List[dict]:
         entity_groups = []
         entity_group_disagg = []
 
-        if hasattr(self.tokenizer, "is_subword_fn"):
-            is_subword_fn = self.tokenizer.is_subword_fn
-        else:
-            is_subword_fn = self.is_subword_fn
-
         if entities:
             last_idx = entities[-1]["index"]
 
         for entity in entities:
 
             is_last_idx = entity["index"] == last_idx
-            is_subword = self.ignore_subwords and is_subword_fn(entity["word"])
+            is_subword = self.ignore_subwords and entity["is_subword"]
             if not entity_group_disagg:
                 entity_group_disagg += [entity]
                 if is_last_idx:
diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py
index fab658c3bb23..b4373792f5db 100644
--- a/tests/test_pipelines.py
+++ b/tests/test_pipelines.py
@@ -718,24 +718,24 @@ def _test_ner_pipeline(
 
         ungrouped_ner_inputs = [
             [
-                {"entity": "B-PER", "index": 1, "score": 0.9994944930076599, "word": "Cons"},
-                {"entity": "B-PER", "index": 2, "score": 0.8025449514389038, "word": "##uelo"},
-                {"entity": "I-PER", "index": 3, "score": 0.9993102550506592, "word": "Ara"},
-                {"entity": "I-PER", "index": 4, "score": 0.9993743896484375, "word": "##új"},
-                {"entity": "I-PER", "index": 5, "score": 0.9992871880531311, "word": "##o"},
-                {"entity": "I-PER", "index": 6, "score": 0.9993029236793518, "word": "No"},
-                {"entity": "I-PER", "index": 7, "score": 0.9981776475906372, "word": "##guera"},
-                {"entity": "B-PER", "index": 15, "score": 0.9998136162757874, "word": "Andrés"},
-                {"entity": "I-PER", "index": 16, "score": 0.999740719795227, "word": "Pas"},
-                {"entity": "I-PER", "index": 17, "score": 0.9997414350509644, "word": "##tran"},
-                {"entity": "I-PER", "index": 18, "score": 0.9996136426925659, "word": "##a"},
-                {"entity": "B-ORG", "index": 28, "score": 0.9989739060401917, "word": "Far"},
-                {"entity": "I-ORG", "index": 29, "score": 0.7188422083854675, "word": "##c"},
+                {"entity": "B-PER", "index": 1, "score": 0.9994944930076599, "is_subword": False, "word": "Cons"},
+                {"entity": "B-PER", "index": 2, "score": 0.8025449514389038, "is_subword": True, "word": "##uelo"},
+                {"entity": "I-PER", "index": 3, "score": 0.9993102550506592, "is_subword": False, "word": "Ara"},
+                {"entity": "I-PER", "index": 4, "score": 0.9993743896484375, "is_subword": True, "word": "##új"},
+                {"entity": "I-PER", "index": 5, "score": 0.9992871880531311, "is_subword": True, "word": "##o"},
+                {"entity": "I-PER", "index": 6, "score": 0.9993029236793518, "is_subword": False, "word": "No"},
+                {"entity": "I-PER", "index": 7, "score": 0.9981776475906372, "is_subword": True, "word": "##guera"},
+                {"entity": "B-PER", "index": 15, "score": 0.9998136162757874, "is_subword": False, "word": "Andrés"},
+                {"entity": "I-PER", "index": 16, "score": 0.999740719795227, "is_subword": False, "word": "Pas"},
+                {"entity": "I-PER", "index": 17, "score": 0.9997414350509644, "is_subword": True, "word": "##tran"},
+                {"entity": "I-PER", "index": 18, "score": 0.9996136426925659, "is_subword": True, "word": "##a"},
+                {"entity": "B-ORG", "index": 28, "score": 0.9989739060401917, "is_subword": False, "word": "Far"},
+                {"entity": "I-ORG", "index": 29, "score": 0.7188422083854675, "is_subword": True, "word": "##c"},
             ],
             [
-                {"entity": "I-PER", "index": 1, "score": 0.9968166351318359, "word": "En"},
-                {"entity": "I-PER", "index": 2, "score": 0.9957635998725891, "word": "##zo"},
-                {"entity": "I-ORG", "index": 7, "score": 0.9986497163772583, "word": "UN"},
+                {"entity": "I-PER", "index": 1, "score": 0.9968166351318359, "is_subword": False, "word": "En"},
+                {"entity": "I-PER", "index": 2, "score": 0.9957635998725891, "is_subword": True, "word": "##zo"},
+                {"entity": "I-ORG", "index": 7, "score": 0.9986497163772583, "is_subword": False, "word": "UN"},
             ],
         ]
 

From 47797d1c38493289a756ae9d9d456b5b1b375181 Mon Sep 17 00:00:00 2001
From: Ceyda Cinarel <snu-ceyda@users.noreply.github.com>
Date: Mon, 26 Oct 2020 18:12:11 +0900
Subject: [PATCH 14/19] merge tests with new test structure

---
 tests/test_pipelines_ner.py | 119 ++++++++++++++++++++++++++++--------
 1 file changed, 95 insertions(+), 24 deletions(-)

diff --git a/tests/test_pipelines_ner.py b/tests/test_pipelines_ner.py
index 4fb58d5e3c0c..89c630f8ae91 100644
--- a/tests/test_pipelines_ner.py
+++ b/tests/test_pipelines_ner.py
@@ -2,7 +2,7 @@
 
 from transformers import pipeline
 from transformers.pipelines import Pipeline
-from transformers.testing_utils import require_tf
+from transformers.testing_utils import require_tf, require_torch, slow
 
 from .test_pipelines_common import CustomInputPipelineCommonMixin
 
@@ -22,35 +22,49 @@ def _test_pipeline(self, nlp: Pipeline):
 
         ungrouped_ner_inputs = [
             [
-                {"entity": "B-PER", "index": 1, "score": 0.9994944930076599, "word": "Cons"},
-                {"entity": "B-PER", "index": 2, "score": 0.8025449514389038, "word": "##uelo"},
-                {"entity": "I-PER", "index": 3, "score": 0.9993102550506592, "word": "Ara"},
-                {"entity": "I-PER", "index": 4, "score": 0.9993743896484375, "word": "##új"},
-                {"entity": "I-PER", "index": 5, "score": 0.9992871880531311, "word": "##o"},
-                {"entity": "I-PER", "index": 6, "score": 0.9993029236793518, "word": "No"},
-                {"entity": "I-PER", "index": 7, "score": 0.9981776475906372, "word": "##guera"},
-                {"entity": "B-PER", "index": 15, "score": 0.9998136162757874, "word": "Andrés"},
-                {"entity": "I-PER", "index": 16, "score": 0.999740719795227, "word": "Pas"},
-                {"entity": "I-PER", "index": 17, "score": 0.9997414350509644, "word": "##tran"},
-                {"entity": "I-PER", "index": 18, "score": 0.9996136426925659, "word": "##a"},
-                {"entity": "B-ORG", "index": 28, "score": 0.9989739060401917, "word": "Far"},
-                {"entity": "I-ORG", "index": 29, "score": 0.7188422083854675, "word": "##c"},
+                {"entity": "B-PER", "index": 1, "score": 0.9994944930076599, "is_subword": False, "word": "Cons"},
+                {"entity": "B-PER", "index": 2, "score": 0.8025449514389038, "is_subword": True, "word": "##uelo"},
+                {"entity": "I-PER", "index": 3, "score": 0.9993102550506592, "is_subword": False, "word": "Ara"},
+                {"entity": "I-PER", "index": 4, "score": 0.9993743896484375, "is_subword": True, "word": "##új"},
+                {"entity": "I-PER", "index": 5, "score": 0.9992871880531311, "is_subword": True, "word": "##o"},
+                {"entity": "I-PER", "index": 6, "score": 0.9993029236793518, "is_subword": False, "word": "No"},
+                {"entity": "I-PER", "index": 7, "score": 0.9981776475906372, "is_subword": True, "word": "##guera"},
+                {"entity": "B-PER", "index": 15, "score": 0.9998136162757874, "is_subword": False, "word": "Andrés"},
+                {"entity": "I-PER", "index": 16, "score": 0.999740719795227, "is_subword": False, "word": "Pas"},
+                {"entity": "I-PER", "index": 17, "score": 0.9997414350509644, "is_subword": True, "word": "##tran"},
+                {"entity": "I-PER", "index": 18, "score": 0.9996136426925659, "is_subword": True, "word": "##a"},
+                {"entity": "B-ORG", "index": 28, "score": 0.9989739060401917, "is_subword": False, "word": "Far"},
+                {"entity": "I-ORG", "index": 29, "score": 0.7188422083854675, "is_subword": True, "word": "##c"},
             ],
             [
-                {"entity": "I-PER", "index": 1, "score": 0.9968166351318359, "word": "En"},
-                {"entity": "I-PER", "index": 2, "score": 0.9957635998725891, "word": "##zo"},
-                {"entity": "I-ORG", "index": 7, "score": 0.9986497163772583, "word": "UN"},
+                {"entity": "I-PER", "index": 1, "score": 0.9968166351318359, "is_subword": False, "word": "En"},
+                {"entity": "I-PER", "index": 2, "score": 0.9957635998725891, "is_subword": True, "word": "##zo"},
+                {"entity": "I-ORG", "index": 7, "score": 0.9986497163772583, "is_subword": False, "word": "UN"},
             ],
         ]
+
         expected_grouped_ner_results = [
             [
-                {"entity_group": "B-PER", "score": 0.9710702640669686, "word": "Consuelo Araújo Noguera"},
-                {"entity_group": "B-PER", "score": 0.9997273534536362, "word": "Andrés Pastrana"},
-                {"entity_group": "B-ORG", "score": 0.8589080572128296, "word": "Farc"},
+                {"entity_group": "PER", "score": 0.999369223912557, "word": "Consuelo Araújo Noguera"},
+                {"entity_group": "PER", "score": 0.9997771680355072, "word": "Andrés Pastrana"},
+                {"entity_group": "ORG", "score": 0.9989739060401917, "word": "Farc"},
             ],
             [
-                {"entity_group": "I-PER", "score": 0.9962901175022125, "word": "Enzo"},
-                {"entity_group": "I-ORG", "score": 0.9986497163772583, "word": "UN"},
+                {"entity_group": "PER", "score": 0.9968166351318359, "word": "Enzo"},
+                {"entity_group": "ORG", "score": 0.9986497163772583, "word": "UN"},
+            ],
+        ]
+
+        expected_grouped_ner_results_w_subword = [
+            [
+                {"entity_group": "PER", "score": 0.9994944930076599, "word": "Cons"},
+                {"entity_group": "PER", "score": 0.9663328925768534, "word": "##uelo Araújo Noguera"},
+                {"entity_group": "PER", "score": 0.9997273534536362, "word": "Andrés Pastrana"},
+                {"entity_group": "ORG", "score": 0.8589080572128296, "word": "Farc"},
+            ],
+            [
+                {"entity_group": "PER", "score": 0.9962901175022125, "word": "Enzo"},
+                {"entity_group": "ORG", "score": 0.9986497163772583, "word": "UN"},
             ],
         ]
 
@@ -77,8 +91,15 @@ def _test_pipeline(self, nlp: Pipeline):
             for key in output_keys:
                 self.assertIn(key, result)
 
-        for ungrouped_input, grouped_result in zip(ungrouped_ner_inputs, expected_grouped_ner_results):
-            self.assertEqual(nlp.group_entities(ungrouped_input), grouped_result)
+        if nlp.grouped_entities:
+            if nlp.ignore_subwords:
+                for ungrouped_input, grouped_result in zip(ungrouped_ner_inputs, expected_grouped_ner_results):
+                    self.assertEqual(nlp.group_entities(ungrouped_input), grouped_result)
+            else:
+                for ungrouped_input, grouped_result in zip(
+                    ungrouped_ner_inputs, expected_grouped_ner_results_w_subword
+                ):
+                    self.assertEqual(nlp.group_entities(ungrouped_input), grouped_result)
 
     @require_tf
     def test_tf_only(self):
@@ -86,3 +107,53 @@ def test_tf_only(self):
         # We test that if we don't specificy framework='tf', it gets detected automatically
         nlp = pipeline(task="ner", model=model_name, tokenizer=model_name)
         self._test_pipeline(nlp)
+        
+    @require_tf
+    def test_tf_ner(self):
+        for model_name in self.small_models:
+            nlp = pipeline(task="ner", model=model_name, tokenizer=model_name, framework="tf")
+        self._test_pipeline(nlp)
+
+    @require_tf
+    def test_tf_ner_grouped(self):
+        for model_name in self.small_models:
+            nlp = pipeline(
+                task="ner",
+                model=model_name,
+                tokenizer=model_name,
+                framework="tf",
+                grouped_entities=True,
+                ignore_subwords=True,
+            )
+            self._test_pipeline(nlp)
+        for model_name in self.small_models:
+            nlp = pipeline(
+                task="ner",
+                model=model_name,
+                tokenizer=model_name,
+                framework="tf",
+                grouped_entities=True,
+                ignore_subwords=False,
+            )
+            self._test_pipeline(nlp)
+            
+    @require_torch
+    def test_torch_ner(self):
+        for model_name in self.small_models:
+            nlp = pipeline(task="ner", model=model_name, tokenizer=model_name)
+            self._test_pipeline(nlp)
+
+    @require_torch
+    def test_ner_grouped(self):
+        for model_name in self.small_models:
+            nlp = pipeline(
+                task="ner", model=model_name, tokenizer=model_name, grouped_entities=True, ignore_subwords=True
+            )
+            self._test_pipeline(nlp)
+        for model_name in self.small_models:
+            nlp = pipeline(
+                task="ner", model=model_name, tokenizer=model_name, grouped_entities=True, ignore_subwords=False
+            )
+            self._test_pipeline(nlp)
+
+        

From 92115ee91b102f53931fe86be16657373221d5c1 Mon Sep 17 00:00:00 2001
From: Ceyda Cinarel <snu-ceyda@users.noreply.github.com>
Date: Mon, 26 Oct 2020 18:27:19 +0900
Subject: [PATCH 15/19] change test names

---
 tests/test_pipelines_ner.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/tests/test_pipelines_ner.py b/tests/test_pipelines_ner.py
index 89c630f8ae91..a4bebee2bee2 100644
--- a/tests/test_pipelines_ner.py
+++ b/tests/test_pipelines_ner.py
@@ -107,15 +107,15 @@ def test_tf_only(self):
         # We test that if we don't specificy framework='tf', it gets detected automatically
         nlp = pipeline(task="ner", model=model_name, tokenizer=model_name)
         self._test_pipeline(nlp)
-        
+
     @require_tf
-    def test_tf_ner(self):
+    def test_tf_defaults(self):
         for model_name in self.small_models:
             nlp = pipeline(task="ner", model=model_name, tokenizer=model_name, framework="tf")
         self._test_pipeline(nlp)
 
     @require_tf
-    def test_tf_ner_grouped(self):
+    def test_tf_small(self):
         for model_name in self.small_models:
             nlp = pipeline(
                 task="ner",
@@ -136,15 +136,15 @@ def test_tf_ner_grouped(self):
                 ignore_subwords=False,
             )
             self._test_pipeline(nlp)
-            
+
     @require_torch
-    def test_torch_ner(self):
+    def test_pt_defaults(self):
         for model_name in self.small_models:
             nlp = pipeline(task="ner", model=model_name, tokenizer=model_name)
             self._test_pipeline(nlp)
 
     @require_torch
-    def test_ner_grouped(self):
+    def test_torch_small(self):
         for model_name in self.small_models:
             nlp = pipeline(
                 task="ner", model=model_name, tokenizer=model_name, grouped_entities=True, ignore_subwords=True
@@ -155,5 +155,3 @@ def test_ner_grouped(self):
                 task="ner", model=model_name, tokenizer=model_name, grouped_entities=True, ignore_subwords=False
             )
             self._test_pipeline(nlp)
-
-        

From 0cf0e738264d9d0434d796d19e48b2d16370b1aa Mon Sep 17 00:00:00 2001
From: Ceyda Cinarel <snu-ceyda@users.noreply.github.com>
Date: Mon, 26 Oct 2020 18:29:39 +0900
Subject: [PATCH 16/19] remove old tests

---
 tests/test_pipelines.py | 870 ----------------------------------------
 1 file changed, 870 deletions(-)
 delete mode 100644 tests/test_pipelines.py

diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py
deleted file mode 100644
index b4373792f5db..000000000000
--- a/tests/test_pipelines.py
+++ /dev/null
@@ -1,870 +0,0 @@
-import unittest
-from typing import Iterable, List, Optional
-
-from transformers import pipeline
-from transformers.pipelines import SUPPORTED_TASKS, Conversation, DefaultArgumentHandler, Pipeline
-from transformers.testing_utils import require_tf, require_torch, slow, torch_device
-
-
-DEFAULT_DEVICE_NUM = -1 if torch_device == "cpu" else 0
-VALID_INPUTS = ["A simple string", ["list of strings"]]
-
-NER_FINETUNED_MODELS = ["sshleifer/tiny-dbmdz-bert-large-cased-finetuned-conll03-english"]
-
-# xlnet-base-cased disabled for now, since it crashes TF2
-FEATURE_EXTRACT_FINETUNED_MODELS = ["sshleifer/tiny-distilbert-base-cased"]
-TEXT_CLASSIF_FINETUNED_MODELS = ["sshleifer/tiny-distilbert-base-uncased-finetuned-sst-2-english"]
-TEXT_GENERATION_FINETUNED_MODELS = ["sshleifer/tiny-ctrl"]
-
-FILL_MASK_FINETUNED_MODELS = ["sshleifer/tiny-distilroberta-base"]
-LARGE_FILL_MASK_FINETUNED_MODELS = ["distilroberta-base"]  # @slow
-
-SUMMARIZATION_FINETUNED_MODELS = ["sshleifer/bart-tiny-random", "patrickvonplaten/t5-tiny-random"]
-TF_SUMMARIZATION_FINETUNED_MODELS = ["patrickvonplaten/t5-tiny-random"]
-
-TRANSLATION_FINETUNED_MODELS = [
-    ("patrickvonplaten/t5-tiny-random", "translation_en_to_de"),
-    ("patrickvonplaten/t5-tiny-random", "translation_en_to_ro"),
-]
-TF_TRANSLATION_FINETUNED_MODELS = [("patrickvonplaten/t5-tiny-random", "translation_en_to_fr")]
-
-TEXT2TEXT_FINETUNED_MODELS = ["patrickvonplaten/t5-tiny-random"]
-TF_TEXT2TEXT_FINETUNED_MODELS = ["patrickvonplaten/t5-tiny-random"]
-
-DIALOGUE_FINETUNED_MODELS = ["microsoft/DialoGPT-medium"]  # @slow
-
-expected_fill_mask_result = [
-    [
-        {"sequence": "<s>My name is John</s>", "score": 0.00782308354973793, "token": 610, "token_str": "ĠJohn"},
-        {"sequence": "<s>My name is Chris</s>", "score": 0.007475061342120171, "token": 1573, "token_str": "ĠChris"},
-    ],
-    [
-        {"sequence": "<s>The largest city in France is Paris</s>", "score": 0.3185044229030609, "token": 2201},
-        {"sequence": "<s>The largest city in France is Lyon</s>", "score": 0.21112334728240967, "token": 12790},
-    ],
-]
-
-expected_fill_mask_target_result = [
-    [
-        {
-            "sequence": "<s>My name is Patrick</s>",
-            "score": 0.004992353264242411,
-            "token": 3499,
-            "token_str": "ĠPatrick",
-        },
-        {
-            "sequence": "<s>My name is Clara</s>",
-            "score": 0.00019297805556561798,
-            "token": 13606,
-            "token_str": "ĠClara",
-        },
-    ]
-]
-
-SUMMARIZATION_KWARGS = dict(num_beams=2, min_length=2, max_length=5)
-
-
-class DefaultArgumentHandlerTestCase(unittest.TestCase):
-    def setUp(self) -> None:
-        self.handler = DefaultArgumentHandler()
-
-    def test_kwargs_x(self):
-        mono_data = {"X": "This is a sample input"}
-        mono_args = self.handler(**mono_data)
-
-        self.assertTrue(isinstance(mono_args, list))
-        self.assertEqual(len(mono_args), 1)
-
-        multi_data = {"x": ["This is a sample input", "This is a second sample input"]}
-        multi_args = self.handler(**multi_data)
-
-        self.assertTrue(isinstance(multi_args, list))
-        self.assertEqual(len(multi_args), 2)
-
-    def test_kwargs_data(self):
-        mono_data = {"data": "This is a sample input"}
-        mono_args = self.handler(**mono_data)
-
-        self.assertTrue(isinstance(mono_args, list))
-        self.assertEqual(len(mono_args), 1)
-
-        multi_data = {"data": ["This is a sample input", "This is a second sample input"]}
-        multi_args = self.handler(**multi_data)
-
-        self.assertTrue(isinstance(multi_args, list))
-        self.assertEqual(len(multi_args), 2)
-
-    def test_multi_kwargs(self):
-        mono_data = {"data": "This is a sample input", "X": "This is a sample input 2"}
-        mono_args = self.handler(**mono_data)
-
-        self.assertTrue(isinstance(mono_args, list))
-        self.assertEqual(len(mono_args), 2)
-
-        multi_data = {
-            "data": ["This is a sample input", "This is a second sample input"],
-            "test": ["This is a sample input 2", "This is a second sample input 2"],
-        }
-        multi_args = self.handler(**multi_data)
-
-        self.assertTrue(isinstance(multi_args, list))
-        self.assertEqual(len(multi_args), 4)
-
-    def test_args(self):
-        mono_data = "This is a sample input"
-        mono_args = self.handler(mono_data)
-
-        self.assertTrue(isinstance(mono_args, list))
-        self.assertEqual(len(mono_args), 1)
-
-        mono_data = ["This is a sample input"]
-        mono_args = self.handler(mono_data)
-
-        self.assertTrue(isinstance(mono_args, list))
-        self.assertEqual(len(mono_args), 1)
-
-        multi_data = ["This is a sample input", "This is a second sample input"]
-        multi_args = self.handler(multi_data)
-
-        self.assertTrue(isinstance(multi_args, list))
-        self.assertEqual(len(multi_args), 2)
-
-        multi_data = ["This is a sample input", "This is a second sample input"]
-        multi_args = self.handler(*multi_data)
-
-        self.assertTrue(isinstance(multi_args, list))
-        self.assertEqual(len(multi_args), 2)
-
-
-class MonoColumnInputTestCase(unittest.TestCase):
-    def _test_mono_column_pipeline(
-        self,
-        nlp: Pipeline,
-        valid_inputs: List,
-        output_keys: Iterable[str],
-        invalid_inputs: List = [None],
-        expected_multi_result: Optional[List] = None,
-        expected_check_keys: Optional[List[str]] = None,
-        **kwargs,
-    ):
-        self.assertIsNotNone(nlp)
-
-        mono_result = nlp(valid_inputs[0], **kwargs)
-        self.assertIsInstance(mono_result, list)
-        self.assertIsInstance(mono_result[0], (dict, list))
-
-        if isinstance(mono_result[0], list):
-            mono_result = mono_result[0]
-
-        for key in output_keys:
-            self.assertIn(key, mono_result[0])
-
-        multi_result = [nlp(input, **kwargs) for input in valid_inputs]
-        self.assertIsInstance(multi_result, list)
-        self.assertIsInstance(multi_result[0], (dict, list))
-
-        if expected_multi_result is not None:
-            for result, expect in zip(multi_result, expected_multi_result):
-                for key in expected_check_keys or []:
-                    self.assertEqual(
-                        set([o[key] for o in result]),
-                        set([o[key] for o in expect]),
-                    )
-
-        if isinstance(multi_result[0], list):
-            multi_result = multi_result[0]
-
-        for result in multi_result:
-            for key in output_keys:
-                self.assertIn(key, result)
-
-        self.assertRaises(Exception, nlp, invalid_inputs)
-
-    @require_torch
-    def test_torch_sentiment_analysis(self):
-        mandatory_keys = {"label", "score"}
-        for model_name in TEXT_CLASSIF_FINETUNED_MODELS:
-            nlp = pipeline(task="sentiment-analysis", model=model_name, tokenizer=model_name)
-            self._test_mono_column_pipeline(nlp, VALID_INPUTS, mandatory_keys)
-
-    @require_tf
-    def test_tf_sentiment_analysis(self):
-        mandatory_keys = {"label", "score"}
-        for model_name in TEXT_CLASSIF_FINETUNED_MODELS:
-            nlp = pipeline(task="sentiment-analysis", model=model_name, tokenizer=model_name, framework="tf")
-            self._test_mono_column_pipeline(nlp, VALID_INPUTS, mandatory_keys)
-
-    @require_torch
-    def test_torch_feature_extraction(self):
-        for model_name in FEATURE_EXTRACT_FINETUNED_MODELS:
-            nlp = pipeline(task="feature-extraction", model=model_name, tokenizer=model_name)
-            self._test_mono_column_pipeline(nlp, VALID_INPUTS, {})
-
-    @require_tf
-    def test_tf_feature_extraction(self):
-        for model_name in FEATURE_EXTRACT_FINETUNED_MODELS:
-            nlp = pipeline(task="feature-extraction", model=model_name, tokenizer=model_name, framework="tf")
-            self._test_mono_column_pipeline(nlp, VALID_INPUTS, {})
-
-    @require_torch
-    def test_torch_fill_mask(self):
-        mandatory_keys = {"sequence", "score", "token"}
-        valid_inputs = [
-            "My name is <mask>",
-            "The largest city in France is <mask>",
-        ]
-        invalid_inputs = [
-            "This is <mask> <mask>"  # More than 1 mask_token in the input is not supported
-            "This is"  # No mask_token is not supported
-        ]
-        for model_name in FILL_MASK_FINETUNED_MODELS:
-            nlp = pipeline(
-                task="fill-mask",
-                model=model_name,
-                tokenizer=model_name,
-                framework="pt",
-                topk=2,
-            )
-            self._test_mono_column_pipeline(
-                nlp, valid_inputs, mandatory_keys, invalid_inputs, expected_check_keys=["sequence"]
-            )
-
-    @require_tf
-    def test_tf_fill_mask(self):
-        mandatory_keys = {"sequence", "score", "token"}
-        valid_inputs = [
-            "My name is <mask>",
-            "The largest city in France is <mask>",
-        ]
-        invalid_inputs = [
-            "This is <mask> <mask>"  # More than 1 mask_token in the input is not supported
-            "This is"  # No mask_token is not supported
-        ]
-        for model_name in FILL_MASK_FINETUNED_MODELS:
-            nlp = pipeline(
-                task="fill-mask",
-                model=model_name,
-                tokenizer=model_name,
-                framework="tf",
-                topk=2,
-            )
-            self._test_mono_column_pipeline(
-                nlp, valid_inputs, mandatory_keys, invalid_inputs, expected_check_keys=["sequence"]
-            )
-
-    @require_torch
-    def test_torch_fill_mask_with_targets(self):
-        valid_inputs = ["My name is <mask>"]
-        valid_targets = [[" Teven", " Patrick", " Clara"], [" Sam"]]
-        invalid_targets = [[], [""], ""]
-        for model_name in FILL_MASK_FINETUNED_MODELS:
-            nlp = pipeline(task="fill-mask", model=model_name, tokenizer=model_name, framework="pt")
-            for targets in valid_targets:
-                outputs = nlp(valid_inputs, targets=targets)
-                self.assertIsInstance(outputs, list)
-                self.assertEqual(len(outputs), len(targets))
-            for targets in invalid_targets:
-                self.assertRaises(ValueError, nlp, valid_inputs, targets=targets)
-
-    @require_tf
-    def test_tf_fill_mask_with_targets(self):
-        valid_inputs = ["My name is <mask>"]
-        valid_targets = [[" Teven", " Patrick", " Clara"], [" Sam"]]
-        invalid_targets = [[], [""], ""]
-        for model_name in FILL_MASK_FINETUNED_MODELS:
-            nlp = pipeline(task="fill-mask", model=model_name, tokenizer=model_name, framework="tf")
-            for targets in valid_targets:
-                outputs = nlp(valid_inputs, targets=targets)
-                self.assertIsInstance(outputs, list)
-                self.assertEqual(len(outputs), len(targets))
-            for targets in invalid_targets:
-                self.assertRaises(ValueError, nlp, valid_inputs, targets=targets)
-
-    @require_torch
-    @slow
-    def test_torch_fill_mask_results(self):
-        mandatory_keys = {"sequence", "score", "token"}
-        valid_inputs = [
-            "My name is <mask>",
-            "The largest city in France is <mask>",
-        ]
-        valid_targets = [" Patrick", " Clara"]
-        for model_name in LARGE_FILL_MASK_FINETUNED_MODELS:
-            nlp = pipeline(
-                task="fill-mask",
-                model=model_name,
-                tokenizer=model_name,
-                framework="pt",
-                topk=2,
-            )
-            self._test_mono_column_pipeline(
-                nlp,
-                valid_inputs,
-                mandatory_keys,
-                expected_multi_result=expected_fill_mask_result,
-                expected_check_keys=["sequence"],
-            )
-            self._test_mono_column_pipeline(
-                nlp,
-                valid_inputs[:1],
-                mandatory_keys,
-                expected_multi_result=expected_fill_mask_target_result,
-                expected_check_keys=["sequence"],
-                targets=valid_targets,
-            )
-
-    @require_tf
-    @slow
-    def test_tf_fill_mask_results(self):
-        mandatory_keys = {"sequence", "score", "token"}
-        valid_inputs = [
-            "My name is <mask>",
-            "The largest city in France is <mask>",
-        ]
-        valid_targets = [" Patrick", " Clara"]
-        for model_name in LARGE_FILL_MASK_FINETUNED_MODELS:
-            nlp = pipeline(task="fill-mask", model=model_name, tokenizer=model_name, framework="tf", topk=2)
-            self._test_mono_column_pipeline(
-                nlp,
-                valid_inputs,
-                mandatory_keys,
-                expected_multi_result=expected_fill_mask_result,
-                expected_check_keys=["sequence"],
-            )
-            self._test_mono_column_pipeline(
-                nlp,
-                valid_inputs[:1],
-                mandatory_keys,
-                expected_multi_result=expected_fill_mask_target_result,
-                expected_check_keys=["sequence"],
-                targets=valid_targets,
-            )
-
-    @require_torch
-    def test_torch_summarization(self):
-        invalid_inputs = [4, "<mask>"]
-        mandatory_keys = ["summary_text"]
-        for model in SUMMARIZATION_FINETUNED_MODELS:
-            nlp = pipeline(task="summarization", model=model, tokenizer=model)
-            self._test_mono_column_pipeline(
-                nlp, VALID_INPUTS, mandatory_keys, invalid_inputs=invalid_inputs, **SUMMARIZATION_KWARGS
-            )
-
-    @require_torch
-    @slow
-    def test_integration_torch_summarization(self):
-        nlp = pipeline(task="summarization", device=DEFAULT_DEVICE_NUM)
-        cnn_article = ' (CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based. The Palestinians signed the ICC\'s founding Rome Statute in January, when they also accepted its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the situation in Palestinian territories, paving the way for possible war crimes investigations against Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and the United States, neither of which is an ICC member, opposed the Palestinians\' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday\'s ceremony, said it was a move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the world is also a step closer to ending a long era of impunity and injustice," he said, according to an ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine acquires all the rights as well as responsibilities that come with being a State Party to the Statute. These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should immediately end their pressure, and countries that support universal acceptance of the court\'s treaty should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the group. "What\'s objectionable is the attempts to undermine international justice, not Palestine\'s decision to join a treaty to which over 100 countries around the world are members." In January, when the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an outrage, saying the court was overstepping its boundaries. The United States also said it "strongly" disagreed with the court\'s decision. "As we have said repeatedly, we do not believe that Palestine is a state and therefore we do not believe that it is eligible to join the ICC," the State Department said in a statement. It urged the warring sides to resolve their differences through direct negotiations. "We will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace," it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou Bensouda said her office would "conduct its analysis in full independence and impartiality." The war between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry will include alleged war crimes committed since June. The International Criminal Court was set up in 2002 to prosecute genocide, crimes against humanity and war crimes. CNN\'s Vasco Cotovio, Kareem Khadder and Faith Karimi contributed to this report.'
-        expected_cnn_summary = " The Palestinian Authority becomes the 123rd member of the International Criminal Court . The move gives the court jurisdiction over alleged crimes in Palestinian territories . Israel and the United States opposed the Palestinians' efforts to join the court . Rights group Human Rights Watch welcomes the move, says governments seeking to penalize Palestine should end pressure ."
-        result = nlp(cnn_article)
-        self.assertEqual(result[0]["summary_text"], expected_cnn_summary)
-
-    @require_tf
-    @slow
-    def test_tf_summarization(self):
-        invalid_inputs = [4, "<mask>"]
-        mandatory_keys = ["summary_text"]
-        for model_name in TF_SUMMARIZATION_FINETUNED_MODELS:
-            nlp = pipeline(
-                task="summarization",
-                model=model_name,
-                tokenizer=model_name,
-                framework="tf",
-            )
-            self._test_mono_column_pipeline(
-                nlp, VALID_INPUTS, mandatory_keys, invalid_inputs=invalid_inputs, **SUMMARIZATION_KWARGS
-            )
-
-    @require_torch
-    def test_torch_translation(self):
-        invalid_inputs = [4, "<mask>"]
-        mandatory_keys = ["translation_text"]
-        for model_name, task in TRANSLATION_FINETUNED_MODELS:
-            nlp = pipeline(task=task, model=model_name, tokenizer=model_name)
-            self._test_mono_column_pipeline(
-                nlp,
-                VALID_INPUTS,
-                mandatory_keys,
-                invalid_inputs,
-            )
-
-    @require_tf
-    @slow
-    def test_tf_translation(self):
-        invalid_inputs = [4, "<mask>"]
-        mandatory_keys = ["translation_text"]
-        for model, task in TF_TRANSLATION_FINETUNED_MODELS:
-            nlp = pipeline(task=task, model=model, tokenizer=model, framework="tf")
-            self._test_mono_column_pipeline(nlp, VALID_INPUTS, mandatory_keys, invalid_inputs=invalid_inputs)
-
-    @require_torch
-    def test_torch_text2text(self):
-        invalid_inputs = [4, "<mask>"]
-        mandatory_keys = ["generated_text"]
-        for model_name in TEXT2TEXT_FINETUNED_MODELS:
-            nlp = pipeline(task="text2text-generation", model=model_name, tokenizer=model_name)
-            self._test_mono_column_pipeline(
-                nlp,
-                VALID_INPUTS,
-                mandatory_keys,
-                invalid_inputs,
-            )
-
-    @require_tf
-    @slow
-    def test_tf_text2text(self):
-        invalid_inputs = [4, "<mask>"]
-        mandatory_keys = ["generated_text"]
-        for model in TEXT2TEXT_FINETUNED_MODELS:
-            nlp = pipeline(task="text2text-generation", model=model, tokenizer=model, framework="tf")
-            self._test_mono_column_pipeline(nlp, VALID_INPUTS, mandatory_keys, invalid_inputs=invalid_inputs)
-
-    @require_torch
-    def test_torch_text_generation(self):
-        for model_name in TEXT_GENERATION_FINETUNED_MODELS:
-            nlp = pipeline(task="text-generation", model=model_name, tokenizer=model_name, framework="pt")
-            self._test_mono_column_pipeline(nlp, VALID_INPUTS, {})
-        self._test_mono_column_pipeline(nlp, VALID_INPUTS, {}, prefix="This is ")
-
-    @require_tf
-    def test_tf_text_generation(self):
-        for model_name in TEXT_GENERATION_FINETUNED_MODELS:
-            nlp = pipeline(task="text-generation", model=model_name, tokenizer=model_name, framework="tf")
-            self._test_mono_column_pipeline(nlp, VALID_INPUTS, {})
-        self._test_mono_column_pipeline(nlp, VALID_INPUTS, {}, prefix="This is ")
-
-    @require_torch
-    @slow
-    def test_integration_torch_conversation(self):
-        # When
-        nlp = pipeline(task="conversational", device=DEFAULT_DEVICE_NUM)
-        conversation_1 = Conversation("Going to the movies tonight - any suggestions?")
-        conversation_2 = Conversation("What's the last book you have read?")
-        # Then
-        self.assertEqual(len(conversation_1.past_user_inputs), 0)
-        self.assertEqual(len(conversation_2.past_user_inputs), 0)
-        # When
-        result = nlp([conversation_1, conversation_2], do_sample=False, max_length=1000)
-        # Then
-        self.assertEqual(result, [conversation_1, conversation_2])
-        self.assertEqual(len(result[0].past_user_inputs), 1)
-        self.assertEqual(len(result[1].past_user_inputs), 1)
-        self.assertEqual(len(result[0].generated_responses), 1)
-        self.assertEqual(len(result[1].generated_responses), 1)
-        self.assertEqual(result[0].past_user_inputs[0], "Going to the movies tonight - any suggestions?")
-        self.assertEqual(result[0].generated_responses[0], "The Big Lebowski")
-        self.assertEqual(result[1].past_user_inputs[0], "What's the last book you have read?")
-        self.assertEqual(result[1].generated_responses[0], "The Last Question")
-        # When
-        conversation_2.add_user_input("Why do you recommend it?")
-        result = nlp(conversation_2, do_sample=False, max_length=1000)
-        # Then
-        self.assertEqual(result, conversation_2)
-        self.assertEqual(len(result.past_user_inputs), 2)
-        self.assertEqual(len(result.generated_responses), 2)
-        self.assertEqual(result.past_user_inputs[1], "Why do you recommend it?")
-        self.assertEqual(result.generated_responses[1], "It's a good book.")
-
-    @require_torch
-    @slow
-    def test_integration_torch_conversation_truncated_history(self):
-        # When
-        nlp = pipeline(task="conversational", min_length_for_response=24, device=DEFAULT_DEVICE_NUM)
-        conversation_1 = Conversation("Going to the movies tonight - any suggestions?")
-        # Then
-        self.assertEqual(len(conversation_1.past_user_inputs), 0)
-        # When
-        result = nlp(conversation_1, do_sample=False, max_length=36)
-        # Then
-        self.assertEqual(result, conversation_1)
-        self.assertEqual(len(result.past_user_inputs), 1)
-        self.assertEqual(len(result.generated_responses), 1)
-        self.assertEqual(result.past_user_inputs[0], "Going to the movies tonight - any suggestions?")
-        self.assertEqual(result.generated_responses[0], "The Big Lebowski")
-        # When
-        conversation_1.add_user_input("Is it an action movie?")
-        result = nlp(conversation_1, do_sample=False, max_length=36)
-        # Then
-        self.assertEqual(result, conversation_1)
-        self.assertEqual(len(result.past_user_inputs), 2)
-        self.assertEqual(len(result.generated_responses), 2)
-        self.assertEqual(result.past_user_inputs[1], "Is it an action movie?")
-        self.assertEqual(result.generated_responses[1], "It's a comedy.")
-
-
-QA_FINETUNED_MODELS = ["sshleifer/tiny-distilbert-base-cased-distilled-squad"]
-
-
-class ZeroShotClassificationPipelineTests(unittest.TestCase):
-    def _test_scores_sum_to_one(self, result):
-        sum = 0.0
-        for score in result["scores"]:
-            sum += score
-        self.assertAlmostEqual(sum, 1.0)
-
-    def _test_zero_shot_pipeline(self, nlp):
-        output_keys = {"sequence", "labels", "scores"}
-        valid_mono_inputs = [
-            {"sequences": "Who are you voting for in 2020?", "candidate_labels": "politics"},
-            {"sequences": "Who are you voting for in 2020?", "candidate_labels": ["politics"]},
-            {"sequences": "Who are you voting for in 2020?", "candidate_labels": "politics, public health"},
-            {"sequences": "Who are you voting for in 2020?", "candidate_labels": ["politics", "public health"]},
-            {"sequences": ["Who are you voting for in 2020?"], "candidate_labels": "politics"},
-            {
-                "sequences": "Who are you voting for in 2020?",
-                "candidate_labels": "politics",
-                "hypothesis_template": "This text is about {}",
-            },
-        ]
-        valid_multi_input = {
-            "sequences": ["Who are you voting for in 2020?", "What is the capital of Spain?"],
-            "candidate_labels": "politics",
-        }
-        invalid_inputs = [
-            {"sequences": None, "candidate_labels": "politics"},
-            {"sequences": "", "candidate_labels": "politics"},
-            {"sequences": "Who are you voting for in 2020?", "candidate_labels": None},
-            {"sequences": "Who are you voting for in 2020?", "candidate_labels": ""},
-            {
-                "sequences": "Who are you voting for in 2020?",
-                "candidate_labels": "politics",
-                "hypothesis_template": None,
-            },
-            {
-                "sequences": "Who are you voting for in 2020?",
-                "candidate_labels": "politics",
-                "hypothesis_template": "",
-            },
-            {
-                "sequences": "Who are you voting for in 2020?",
-                "candidate_labels": "politics",
-                "hypothesis_template": "Template without formatting syntax.",
-            },
-        ]
-        self.assertIsNotNone(nlp)
-
-        for mono_input in valid_mono_inputs:
-            mono_result = nlp(**mono_input)
-            self.assertIsInstance(mono_result, dict)
-            if len(mono_result["labels"]) > 1:
-                self._test_scores_sum_to_one(mono_result)
-
-            for key in output_keys:
-                self.assertIn(key, mono_result)
-
-        multi_result = nlp(**valid_multi_input)
-        self.assertIsInstance(multi_result, list)
-        self.assertIsInstance(multi_result[0], dict)
-        self.assertEqual(len(multi_result), len(valid_multi_input["sequences"]))
-
-        for result in multi_result:
-            for key in output_keys:
-                self.assertIn(key, result)
-
-            if len(result["labels"]) > 1:
-                self._test_scores_sum_to_one(result)
-
-        for bad_input in invalid_inputs:
-            self.assertRaises(Exception, nlp, **bad_input)
-
-    def _test_zero_shot_pipeline_outputs(self, nlp):
-        inputs = [
-            {
-                "sequences": "Who are you voting for in 2020?",
-                "candidate_labels": ["politics", "public health", "science"],
-            },
-            {
-                "sequences": "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks in an encoder-decoder configuration. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our model achieves 28.4 BLEU on the WMT 2014 English-to-German translation task, improving over the existing best results, including ensembles by over 2 BLEU. On the WMT 2014 English-to-French translation task, our model establishes a new single-model state-of-the-art BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small fraction of the training costs of the best models from the literature. We show that the Transformer generalizes well to other tasks by applying it successfully to English constituency parsing both with large and limited training data.",
-                "candidate_labels": ["machine learning", "statistics", "translation", "vision"],
-                "multi_class": True,
-            },
-        ]
-
-        expected_outputs = [
-            {
-                "sequence": "Who are you voting for in 2020?",
-                "labels": ["politics", "public health", "science"],
-                "scores": [0.975, 0.015, 0.008],
-            },
-            {
-                "sequence": "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks in an encoder-decoder configuration. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our model achieves 28.4 BLEU on the WMT 2014 English-to-German translation task, improving over the existing best results, including ensembles by over 2 BLEU. On the WMT 2014 English-to-French translation task, our model establishes a new single-model state-of-the-art BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small fraction of the training costs of the best models from the literature. We show that the Transformer generalizes well to other tasks by applying it successfully to English constituency parsing both with large and limited training data.",
-                "labels": ["translation", "machine learning", "vision", "statistics"],
-                "scores": [0.817, 0.712, 0.018, 0.017],
-            },
-        ]
-
-        for input, expected_output in zip(inputs, expected_outputs):
-            output = nlp(**input)
-            for key in output:
-                if key == "scores":
-                    for output_score, expected_score in zip(output[key], expected_output[key]):
-                        self.assertAlmostEqual(output_score, expected_score, places=2)
-                else:
-                    self.assertEqual(output[key], expected_output[key])
-
-    @require_torch
-    def test_torch_zero_shot_classification(self):
-        for model_name in TEXT_CLASSIF_FINETUNED_MODELS:
-            nlp = pipeline(task="zero-shot-classification", model=model_name, tokenizer=model_name)
-            self._test_zero_shot_pipeline(nlp)
-
-    @require_tf
-    def test_tf_zero_shot_classification(self):
-        for model_name in TEXT_CLASSIF_FINETUNED_MODELS:
-            nlp = pipeline(task="zero-shot-classification", model=model_name, tokenizer=model_name, framework="tf")
-            self._test_zero_shot_pipeline(nlp)
-
-    @require_torch
-    @slow
-    def test_torch_zero_shot_outputs(self):
-        nlp = pipeline(task="zero-shot-classification", model="roberta-large-mnli")
-        self._test_zero_shot_pipeline_outputs(nlp)
-
-    @require_tf
-    @slow
-    def test_tf_zero_shot_outputs(self):
-        nlp = pipeline(task="zero-shot-classification", model="roberta-large-mnli", framework="tf")
-        self._test_zero_shot_pipeline_outputs(nlp)
-
-
-class DialoguePipelineTests(unittest.TestCase):
-    def _test_conversation_pipeline(self, nlp):
-        valid_inputs = [Conversation("Hi there!"), [Conversation("Hi there!"), Conversation("How are you?")]]
-        invalid_inputs = ["Hi there!", Conversation()]
-        self.assertIsNotNone(nlp)
-
-        mono_result = nlp(valid_inputs[0])
-        self.assertIsInstance(mono_result, Conversation)
-
-        multi_result = nlp(valid_inputs[1])
-        self.assertIsInstance(multi_result, list)
-        self.assertIsInstance(multi_result[0], Conversation)
-        # Inactive conversations passed to the pipeline raise a ValueError
-        self.assertRaises(ValueError, nlp, valid_inputs[1])
-
-        for bad_input in invalid_inputs:
-            self.assertRaises(Exception, nlp, bad_input)
-        self.assertRaises(Exception, nlp, invalid_inputs)
-
-    @require_torch
-    @slow
-    def test_torch_conversation(self):
-        for model_name in DIALOGUE_FINETUNED_MODELS:
-            nlp = pipeline(task="conversational", model=model_name, tokenizer=model_name)
-            self._test_conversation_pipeline(nlp)
-
-    @require_tf
-    @slow
-    def test_tf_conversation(self):
-        for model_name in DIALOGUE_FINETUNED_MODELS:
-            nlp = pipeline(task="conversational", model=model_name, tokenizer=model_name, framework="tf")
-            self._test_conversation_pipeline(nlp)
-
-
-class QAPipelineTests(unittest.TestCase):
-    def _test_qa_pipeline(self, nlp):
-        output_keys = {"score", "answer", "start", "end"}
-        valid_inputs = [
-            {"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."},
-            {
-                "question": "In what field is HuggingFace working ?",
-                "context": "HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP.",
-            },
-        ]
-        invalid_inputs = [
-            {"question": "", "context": "This is a test to try empty question edge case"},
-            {"question": None, "context": "This is a test to try empty question edge case"},
-            {"question": "What is does with empty context ?", "context": ""},
-            {"question": "What is does with empty context ?", "context": None},
-        ]
-        self.assertIsNotNone(nlp)
-
-        mono_result = nlp(valid_inputs[0])
-        self.assertIsInstance(mono_result, dict)
-
-        for key in output_keys:
-            self.assertIn(key, mono_result)
-
-        multi_result = nlp(valid_inputs)
-        self.assertIsInstance(multi_result, list)
-        self.assertIsInstance(multi_result[0], dict)
-
-        for result in multi_result:
-            for key in output_keys:
-                self.assertIn(key, result)
-        for bad_input in invalid_inputs:
-            self.assertRaises(Exception, nlp, bad_input)
-        self.assertRaises(Exception, nlp, invalid_inputs)
-
-    @require_torch
-    def test_torch_question_answering(self):
-        for model_name in QA_FINETUNED_MODELS:
-            nlp = pipeline(task="question-answering", model=model_name, tokenizer=model_name)
-            self._test_qa_pipeline(nlp)
-
-    @require_tf
-    def test_tf_question_answering(self):
-        for model_name in QA_FINETUNED_MODELS:
-            nlp = pipeline(task="question-answering", model=model_name, tokenizer=model_name, framework="tf")
-            self._test_qa_pipeline(nlp)
-
-
-class NerPipelineTests(unittest.TestCase):
-    def _test_ner_pipeline(
-        self,
-        nlp: Pipeline,
-        output_keys: Iterable[str],
-    ):
-
-        ungrouped_ner_inputs = [
-            [
-                {"entity": "B-PER", "index": 1, "score": 0.9994944930076599, "is_subword": False, "word": "Cons"},
-                {"entity": "B-PER", "index": 2, "score": 0.8025449514389038, "is_subword": True, "word": "##uelo"},
-                {"entity": "I-PER", "index": 3, "score": 0.9993102550506592, "is_subword": False, "word": "Ara"},
-                {"entity": "I-PER", "index": 4, "score": 0.9993743896484375, "is_subword": True, "word": "##új"},
-                {"entity": "I-PER", "index": 5, "score": 0.9992871880531311, "is_subword": True, "word": "##o"},
-                {"entity": "I-PER", "index": 6, "score": 0.9993029236793518, "is_subword": False, "word": "No"},
-                {"entity": "I-PER", "index": 7, "score": 0.9981776475906372, "is_subword": True, "word": "##guera"},
-                {"entity": "B-PER", "index": 15, "score": 0.9998136162757874, "is_subword": False, "word": "Andrés"},
-                {"entity": "I-PER", "index": 16, "score": 0.999740719795227, "is_subword": False, "word": "Pas"},
-                {"entity": "I-PER", "index": 17, "score": 0.9997414350509644, "is_subword": True, "word": "##tran"},
-                {"entity": "I-PER", "index": 18, "score": 0.9996136426925659, "is_subword": True, "word": "##a"},
-                {"entity": "B-ORG", "index": 28, "score": 0.9989739060401917, "is_subword": False, "word": "Far"},
-                {"entity": "I-ORG", "index": 29, "score": 0.7188422083854675, "is_subword": True, "word": "##c"},
-            ],
-            [
-                {"entity": "I-PER", "index": 1, "score": 0.9968166351318359, "is_subword": False, "word": "En"},
-                {"entity": "I-PER", "index": 2, "score": 0.9957635998725891, "is_subword": True, "word": "##zo"},
-                {"entity": "I-ORG", "index": 7, "score": 0.9986497163772583, "is_subword": False, "word": "UN"},
-            ],
-        ]
-
-        expected_grouped_ner_results = [
-            [
-                {"entity_group": "PER", "score": 0.999369223912557, "word": "Consuelo Araújo Noguera"},
-                {"entity_group": "PER", "score": 0.9997771680355072, "word": "Andrés Pastrana"},
-                {"entity_group": "ORG", "score": 0.9989739060401917, "word": "Farc"},
-            ],
-            [
-                {"entity_group": "PER", "score": 0.9968166351318359, "word": "Enzo"},
-                {"entity_group": "ORG", "score": 0.9986497163772583, "word": "UN"},
-            ],
-        ]
-
-        expected_grouped_ner_results_w_subword = [
-            [
-                {"entity_group": "PER", "score": 0.9994944930076599, "word": "Cons"},
-                {"entity_group": "PER", "score": 0.9663328925768534, "word": "##uelo Araújo Noguera"},
-                {"entity_group": "PER", "score": 0.9997273534536362, "word": "Andrés Pastrana"},
-                {"entity_group": "ORG", "score": 0.8589080572128296, "word": "Farc"},
-            ],
-            [
-                {"entity_group": "PER", "score": 0.9962901175022125, "word": "Enzo"},
-                {"entity_group": "ORG", "score": 0.9986497163772583, "word": "UN"},
-            ],
-        ]
-
-        self.assertIsNotNone(nlp)
-
-        mono_result = nlp(VALID_INPUTS[0])
-        self.assertIsInstance(mono_result, list)
-        self.assertIsInstance(mono_result[0], (dict, list))
-
-        if isinstance(mono_result[0], list):
-            mono_result = mono_result[0]
-
-        for key in output_keys:
-            self.assertIn(key, mono_result[0])
-
-        multi_result = [nlp(input) for input in VALID_INPUTS]
-        self.assertIsInstance(multi_result, list)
-        self.assertIsInstance(multi_result[0], (dict, list))
-
-        if isinstance(multi_result[0], list):
-            multi_result = multi_result[0]
-
-        for result in multi_result:
-            for key in output_keys:
-                self.assertIn(key, result)
-
-        if nlp.grouped_entities:
-            if nlp.ignore_subwords:
-                for ungrouped_input, grouped_result in zip(ungrouped_ner_inputs, expected_grouped_ner_results):
-                    self.assertEqual(nlp.group_entities(ungrouped_input), grouped_result)
-            else:
-                for ungrouped_input, grouped_result in zip(
-                    ungrouped_ner_inputs, expected_grouped_ner_results_w_subword
-                ):
-                    self.assertEqual(nlp.group_entities(ungrouped_input), grouped_result)
-
-    @require_torch
-    def test_torch_ner(self):
-        mandatory_keys = {"entity", "word", "score"}
-        for model_name in NER_FINETUNED_MODELS:
-            nlp = pipeline(task="ner", model=model_name, tokenizer=model_name)
-            self._test_ner_pipeline(nlp, mandatory_keys)
-
-    @require_torch
-    def test_ner_grouped(self):
-        mandatory_keys = {"entity_group", "word", "score"}
-        for model_name in NER_FINETUNED_MODELS:
-            nlp = pipeline(
-                task="ner", model=model_name, tokenizer=model_name, grouped_entities=True, ignore_subwords=True
-            )
-            self._test_ner_pipeline(nlp, mandatory_keys)
-        for model_name in NER_FINETUNED_MODELS:
-            nlp = pipeline(
-                task="ner", model=model_name, tokenizer=model_name, grouped_entities=True, ignore_subwords=False
-            )
-            self._test_ner_pipeline(nlp, mandatory_keys)
-
-    @require_tf
-    def test_tf_ner(self):
-        mandatory_keys = {"entity", "word", "score"}
-        for model_name in NER_FINETUNED_MODELS:
-            nlp = pipeline(task="ner", model=model_name, tokenizer=model_name, framework="tf")
-            self._test_ner_pipeline(nlp, mandatory_keys)
-
-    @require_tf
-    def test_tf_ner_grouped(self):
-        mandatory_keys = {"entity_group", "word", "score"}
-        for model_name in NER_FINETUNED_MODELS:
-            nlp = pipeline(
-                task="ner",
-                model=model_name,
-                tokenizer=model_name,
-                framework="tf",
-                grouped_entities=True,
-                ignore_subwords=True,
-            )
-            self._test_ner_pipeline(nlp, mandatory_keys)
-        for model_name in NER_FINETUNED_MODELS:
-            nlp = pipeline(
-                task="ner",
-                model=model_name,
-                tokenizer=model_name,
-                framework="tf",
-                grouped_entities=True,
-                ignore_subwords=False,
-            )
-            self._test_ner_pipeline(nlp, mandatory_keys)
-
-
-class PipelineCommonTests(unittest.TestCase):
-    pipelines = SUPPORTED_TASKS.keys()
-
-    @require_tf
-    @slow
-    def test_tf_defaults(self):
-        # Test that pipelines can be correctly loaded without any argument
-        for task in self.pipelines:
-            with self.subTest(msg="Testing TF defaults with TF and {}".format(task)):
-                pipeline(task, framework="tf")
-
-    @require_torch
-    @slow
-    def test_pt_defaults(self):
-        # Test that pipelines can be correctly loaded without any argument
-        for task in self.pipelines:
-            with self.subTest(msg="Testing Torch defaults with PyTorch and {}".format(task)):
-                pipeline(task, framework="pt")

From 8e77d26c3cc30310469a8dc43fac9db67879c265 Mon Sep 17 00:00:00 2001
From: Ceyda Cinarel <snu-ceyda@users.noreply.github.com>
Date: Mon, 26 Oct 2020 20:07:20 +0900
Subject: [PATCH 17/19] ner tests for fast tokenizer

---
 tests/test_pipelines_ner.py | 59 +++++++++++++++++++++++--------------
 1 file changed, 37 insertions(+), 22 deletions(-)

diff --git a/tests/test_pipelines_ner.py b/tests/test_pipelines_ner.py
index a4bebee2bee2..274cec92be7f 100644
--- a/tests/test_pipelines_ner.py
+++ b/tests/test_pipelines_ner.py
@@ -1,8 +1,8 @@
 import unittest
 
-from transformers import pipeline
+from transformers import AutoTokenizer, pipeline
 from transformers.pipelines import Pipeline
-from transformers.testing_utils import require_tf, require_torch, slow
+from transformers.testing_utils import require_tf, require_torch
 
 from .test_pipelines_common import CustomInputPipelineCommonMixin
 
@@ -19,6 +19,8 @@ class NerPipelineTests(CustomInputPipelineCommonMixin, unittest.TestCase):
 
     def _test_pipeline(self, nlp: Pipeline):
         output_keys = {"entity", "word", "score"}
+        if nlp.grouped_entities:
+            output_keys = {"entity_group", "word", "score"}
 
         ungrouped_ner_inputs = [
             [
@@ -105,53 +107,66 @@ def _test_pipeline(self, nlp: Pipeline):
     def test_tf_only(self):
         model_name = "Narsil/small"  # This model only has a TensorFlow version
         # We test that if we don't specificy framework='tf', it gets detected automatically
-        nlp = pipeline(task="ner", model=model_name, tokenizer=model_name)
+        tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
+        nlp = pipeline(task="ner", model=model_name, tokenizer=tokenizer)
         self._test_pipeline(nlp)
 
+    #         offset=tokenizer(VALID_INPUTS[0],return_offsets_mapping=True)['offset_mapping']
+    #         pipeline_running_kwargs = {"offset_mapping"}  # Additional kwargs to run the pipeline with
+
     @require_tf
     def test_tf_defaults(self):
         for model_name in self.small_models:
-            nlp = pipeline(task="ner", model=model_name, tokenizer=model_name, framework="tf")
+            tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
+            nlp = pipeline(task="ner", model=model_name, tokenizer=tokenizer, framework="tf")
         self._test_pipeline(nlp)
 
     @require_tf
     def test_tf_small(self):
         for model_name in self.small_models:
+            tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
             nlp = pipeline(
                 task="ner",
                 model=model_name,
-                tokenizer=model_name,
+                tokenizer=tokenizer,
                 framework="tf",
                 grouped_entities=True,
                 ignore_subwords=True,
             )
             self._test_pipeline(nlp)
-        for model_name in self.small_models:
-            nlp = pipeline(
-                task="ner",
-                model=model_name,
-                tokenizer=model_name,
-                framework="tf",
-                grouped_entities=True,
-                ignore_subwords=False,
-            )
-            self._test_pipeline(nlp)
+
+    #         for model_name in self.small_models:
+    #             tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
+    #             nlp = pipeline(
+    #                 task="ner",
+    #                 model=model_name,
+    #                 tokenizer=tokenizer,
+    #                 framework="tf",
+    #                 grouped_entities=True,
+    #                 ignore_subwords=False,
+    #             )
+    #             self._test_pipeline(nlp)
 
     @require_torch
     def test_pt_defaults(self):
         for model_name in self.small_models:
-            nlp = pipeline(task="ner", model=model_name, tokenizer=model_name)
+            tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
+            nlp = pipeline(task="ner", model=model_name, tokenizer=tokenizer)
             self._test_pipeline(nlp)
 
     @require_torch
     def test_torch_small(self):
         for model_name in self.small_models:
+            tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
             nlp = pipeline(
-                task="ner", model=model_name, tokenizer=model_name, grouped_entities=True, ignore_subwords=True
-            )
-            self._test_pipeline(nlp)
-        for model_name in self.small_models:
-            nlp = pipeline(
-                task="ner", model=model_name, tokenizer=model_name, grouped_entities=True, ignore_subwords=False
+                task="ner", model=model_name, tokenizer=tokenizer, grouped_entities=True, ignore_subwords=True
             )
             self._test_pipeline(nlp)
+
+
+#         for model_name in self.small_models:
+#             tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
+#             nlp = pipeline(
+#                 task="ner", model=model_name, tokenizer=tokenizer, grouped_entities=True, ignore_subwords=False
+#             )
+#             self._test_pipeline(nlp)

From 4b3d8eb4e3295679c3405d01fdc08c041e4ae313 Mon Sep 17 00:00:00 2001
From: Ceyda Cinarel <snu-ceyda@users.noreply.github.com>
Date: Mon, 26 Oct 2020 22:53:49 +0900
Subject: [PATCH 18/19] fast tokenizers have convert_tokens_to_string

---
 src/transformers/pipelines.py |  8 ++------
 tests/test_pipelines_ner.py   | 36 +++++++++++++++++------------------
 2 files changed, 20 insertions(+), 24 deletions(-)

diff --git a/src/transformers/pipelines.py b/src/transformers/pipelines.py
index 1ab7fd7cf810..69e13e29d59e 100755
--- a/src/transformers/pipelines.py
+++ b/src/transformers/pipelines.py
@@ -1532,7 +1532,6 @@ def __call__(self, *args, **kwargs):
 
                 entities += [entity]
 
-            # Append grouped entities
             if self.grouped_entities:
                 answers += [self.group_entities(entities)]
             # Append ungrouped entities
@@ -1554,14 +1553,11 @@ def group_sub_entities(self, entities: List[dict]) -> dict:
         entity = entities[0]["entity"].split("-")[-1]
         scores = np.nanmean([entity["score"] for entity in entities])
         tokens = [entity["word"] for entity in entities]
-        if self.tokenizer.is_fast:
-            word = self.tokenizer.decode(self.tokenizer.convert_tokens_to_ids(tokens))
-        else:
-            word = self.tokenizer.convert_tokens_to_string(tokens)
+
         entity_group = {
             "entity_group": entity,
             "score": np.mean(scores),
-            "word": word,
+            "word": self.tokenizer.convert_tokens_to_string(tokens),
         }
         return entity_group
 
diff --git a/tests/test_pipelines_ner.py b/tests/test_pipelines_ner.py
index 274cec92be7f..a4a240a8d908 100644
--- a/tests/test_pipelines_ner.py
+++ b/tests/test_pipelines_ner.py
@@ -124,6 +124,7 @@ def test_tf_defaults(self):
     @require_tf
     def test_tf_small(self):
         for model_name in self.small_models:
+            print(model_name)
             tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
             nlp = pipeline(
                 task="ner",
@@ -135,17 +136,17 @@ def test_tf_small(self):
             )
             self._test_pipeline(nlp)
 
-    #         for model_name in self.small_models:
-    #             tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
-    #             nlp = pipeline(
-    #                 task="ner",
-    #                 model=model_name,
-    #                 tokenizer=tokenizer,
-    #                 framework="tf",
-    #                 grouped_entities=True,
-    #                 ignore_subwords=False,
-    #             )
-    #             self._test_pipeline(nlp)
+            for model_name in self.small_models:
+                tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
+                nlp = pipeline(
+                    task="ner",
+                    model=model_name,
+                    tokenizer=tokenizer,
+                    framework="tf",
+                    grouped_entities=True,
+                    ignore_subwords=False,
+                )
+                self._test_pipeline(nlp)
 
     @require_torch
     def test_pt_defaults(self):
@@ -163,10 +164,9 @@ def test_torch_small(self):
             )
             self._test_pipeline(nlp)
 
-
-#         for model_name in self.small_models:
-#             tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
-#             nlp = pipeline(
-#                 task="ner", model=model_name, tokenizer=tokenizer, grouped_entities=True, ignore_subwords=False
-#             )
-#             self._test_pipeline(nlp)
+        for model_name in self.small_models:
+            tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
+            nlp = pipeline(
+                task="ner", model=model_name, tokenizer=tokenizer, grouped_entities=True, ignore_subwords=False
+            )
+            self._test_pipeline(nlp)

From 70a4dc52087cef5ff0fc27ee0984a8afaeeafdab Mon Sep 17 00:00:00 2001
From: Lysandre <lysandre.debut@reseau.eseo.fr>
Date: Tue, 3 Nov 2020 17:07:52 -0500
Subject: [PATCH 19/19] Fix the incorrect merge

---
 src/transformers/pipelines.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/transformers/pipelines.py b/src/transformers/pipelines.py
index 00109e1b1605..e37607c136b8 100755
--- a/src/transformers/pipelines.py
+++ b/src/transformers/pipelines.py
@@ -1430,9 +1430,9 @@ def __call__(self, inputs: Union[str, List[str]], **kwargs):
 
         if isinstance(inputs, str):
             inputs = [inputs]
-            
-        offset_mappings = kwargs["offset_mappings"]
-        
+
+        offset_mappings = kwargs.get("offset_mappings")
+
         answers = []
 
         for i, sentence in enumerate(inputs):