Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
Checklist update (#5438)
Browse files Browse the repository at this point in the history
* textual entailment more default tests

* cleanup

* fix style
  • Loading branch information
AkshitaB authored Oct 22, 2021
1 parent ebd6b5b commit 0c79807
Show file tree
Hide file tree
Showing 5 changed files with 392 additions and 22 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added

- Added support to push models directly to the [Hugging Face Hub](https://huggingface.co/) with the command `allennlp push-to-hf`.
- More default tests for the `TextualEntailmentSuite`.

### Changed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from checklist.perturb import Perturb
from allennlp.confidence_checks.task_checklists.task_suite import TaskSuite
from allennlp.confidence_checks.task_checklists import utils
from allennlp.predictors import Predictor


def _crossproduct(template: CheckListTemplate):
Expand Down Expand Up @@ -43,7 +44,7 @@ def __init__(

super().__init__(suite, **kwargs)

def _prediction_and_confidence_scores(self, predictor):
def _prediction_and_confidence_scores(self, predictor: Predictor):
def preds_and_confs_fn(data):
data = [{self._context_key: pair[0], self._question_key: pair[1]} for pair in data]
predictions = predictor.predict_batch_json(data)
Expand Down Expand Up @@ -142,13 +143,13 @@ def _setup_editor(self):
self.editor.add_lexicon("comp_pairs", comp_pairs, overwrite=True)

@overrides
def _default_tests(self, data: Optional[Iterable[Tuple]], num_test_cases=100):
def _default_tests(self, data: Optional[Iterable[Tuple]], num_test_cases: int = 100):
super()._default_tests(data, num_test_cases)
self._setup_editor()
self._default_vocabulary_tests(data, num_test_cases)
self._default_taxonomy_tests(data, num_test_cases)

def _default_vocabulary_tests(self, data: Optional[Iterable[Tuple]], num_test_cases=100):
def _default_vocabulary_tests(self, data: Optional[Iterable[Tuple]], num_test_cases: int = 100):

template = self.editor.template(
[
Expand All @@ -175,7 +176,7 @@ def _default_vocabulary_tests(self, data: Optional[Iterable[Tuple]], num_test_ca
)
self.add_test(test)

def _default_taxonomy_tests(self, data: Optional[Iterable[Tuple]], num_test_cases=100):
def _default_taxonomy_tests(self, data: Optional[Iterable[Tuple]], num_test_cases: int = 100):
template = _crossproduct(
self.editor.template(
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from allennlp.confidence_checks.task_checklists.task_suite import TaskSuite
from allennlp.confidence_checks.task_checklists import utils
from allennlp.data.instance import Instance
from allennlp.predictors import Predictor


def _add_phrase_function(phrases: List[str], num_samples: int = 10):
Expand Down Expand Up @@ -47,7 +48,7 @@ def __init__(
super().__init__(suite, **kwargs)

@overrides
def _prediction_and_confidence_scores(self, predictor):
def _prediction_and_confidence_scores(self, predictor: Predictor):
def preds_and_confs_fn(data):
labels = []
confs = []
Expand Down Expand Up @@ -86,7 +87,7 @@ def _format_failing_examples(
return ret

@overrides
def _default_tests(self, data: Optional[Iterable[str]], num_test_cases=100):
def _default_tests(self, data: Optional[Iterable[str]], num_test_cases: int = 100):
super()._default_tests(data, num_test_cases)
self._setup_editor()
self._default_vocabulary_tests(data, num_test_cases)
Expand Down Expand Up @@ -244,7 +245,7 @@ def _setup_editor(self):
self.monotonic_label = Expect.monotonic(increasing=True, tolerance=0.1)
self.monotonic_label_down = Expect.monotonic(increasing=False, tolerance=0.1)

def _default_vocabulary_tests(self, data: Optional[Iterable[str]], num_test_cases=100):
def _default_vocabulary_tests(self, data: Optional[Iterable[str]], num_test_cases: int = 100):

positive_words = (
self.editor.lexicons["pos_adj"]
Expand Down Expand Up @@ -441,7 +442,7 @@ def _default_vocabulary_tests(self, data: Optional[Iterable[str]], num_test_case

self.add_test(test)

def _default_robustness_tests(self, data: Optional[Iterable[str]], num_test_cases=100):
def _default_robustness_tests(self, data: Optional[Iterable[str]], num_test_cases: int = 100):

template = Perturb.perturb(data, utils.add_random_strings, nsamples=num_test_cases)
test = INV(
Expand All @@ -453,7 +454,7 @@ def _default_robustness_tests(self, data: Optional[Iterable[str]], num_test_case

self.add_test(test)

def _default_ner_tests(self, data: Optional[Iterable[str]], num_test_cases=100):
def _default_ner_tests(self, data: Optional[Iterable[str]], num_test_cases: int = 100):
if data:
template = Perturb.perturb(
data, utils.spacy_wrap(Perturb.change_names, ner=True), nsamples=num_test_cases
Expand Down Expand Up @@ -488,7 +489,7 @@ def _default_ner_tests(self, data: Optional[Iterable[str]], num_test_cases=100):
)
self.add_test(test)

def _default_temporal_tests(self, data: Optional[Iterable[str]], num_test_cases=100):
def _default_temporal_tests(self, data: Optional[Iterable[str]], num_test_cases: int = 100):
self._setup_editor()

change = ["but", "even though", "although", ""]
Expand Down Expand Up @@ -590,7 +591,7 @@ def _default_temporal_tests(self, data: Optional[Iterable[str]], num_test_cases=

self.add_test(test)

def _default_fairness_tests(self, data: Optional[Iterable[str]], num_test_cases=100):
def _default_fairness_tests(self, data: Optional[Iterable[str]], num_test_cases: int = 100):
protected = {
"race": ["a black", "a hispanic", "a white", "an asian"], # add more here.
"sexuality": self.editor.template("{a:sexual_adj}").data,
Expand Down Expand Up @@ -622,7 +623,7 @@ def _default_fairness_tests(self, data: Optional[Iterable[str]], num_test_cases=

self.add_test(test)

def _default_negation_tests(self, data: Optional[Iterable[str]], num_test_cases=100):
def _default_negation_tests(self, data: Optional[Iterable[str]], num_test_cases: int = 100):
template = self.editor.template(
"{it} {noun} {nt} {pos_adj}.",
it=["This", "That", "The"],
Expand Down
3 changes: 2 additions & 1 deletion allennlp/confidence_checks/task_checklists/task_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,13 @@ def __init__(
suite: Optional[TestSuite] = None,
add_default_tests: bool = True,
data: Optional[List[Any]] = None,
num_test_cases: int = 100,
**kwargs,
):
self.suite = suite or TestSuite()

if add_default_tests:
self._default_tests(data, **kwargs)
self._default_tests(data, num_test_cases)

def _prediction_and_confidence_scores(self, predictor: Predictor) -> Callable:
"""
Expand Down
Loading

0 comments on commit 0c79807

Please sign in to comment.