From e9502f47413fd3aebfd24452245e460090fff867 Mon Sep 17 00:00:00 2001 From: VijayKalmath Date: Wed, 27 Jul 2022 21:01:17 +0000 Subject: [PATCH 1/3] Migrate metric to Evaluate library in tf examples Currently tensorflow examples use `load_metric` function from Datasets library , commit migrates function call to `load` function to Evaluate library. Fix for #18306 --- examples/tensorflow/question-answering/requirements.txt | 1 + examples/tensorflow/question-answering/run_qa.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/tensorflow/question-answering/requirements.txt b/examples/tensorflow/question-answering/requirements.txt index 136ddf899b00c4..99aff2bb32b2bb 100644 --- a/examples/tensorflow/question-answering/requirements.txt +++ b/examples/tensorflow/question-answering/requirements.txt @@ -1,2 +1,3 @@ datasets >= 1.4.0 tensorflow >= 2.3.0 +evaluate >= 0.2.0 \ No newline at end of file diff --git a/examples/tensorflow/question-answering/run_qa.py b/examples/tensorflow/question-answering/run_qa.py index d5d869cfa20030..bd233f378a4dc9 100755 --- a/examples/tensorflow/question-answering/run_qa.py +++ b/examples/tensorflow/question-answering/run_qa.py @@ -26,8 +26,9 @@ from typing import Optional import tensorflow as tf -from datasets import load_dataset, load_metric +from datasets import load_dataset +import evaluate import transformers from transformers import ( AutoConfig, @@ -600,7 +601,7 @@ def post_processing_function(examples, features, predictions, stage="eval"): references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples] return EvalPrediction(predictions=formatted_predictions, label_ids=references) - metric = load_metric("squad_v2" if data_args.version_2_with_negative else "squad") + metric = evaluate.load("squad_v2" if data_args.version_2_with_negative else "squad") def compute_metrics(p: EvalPrediction): return metric.compute(predictions=p.predictions, references=p.label_ids) From 568632213ebf864805c44fa25538d7f28d1ba0bf Mon Sep 17 00:00:00 2001 From: VijayKalmath Date: Wed, 27 Jul 2022 21:01:17 +0000 Subject: [PATCH 2/3] Migrate metric to Evaluate library in tf examples Currently tensorflow examples use `load_metric` function from Datasets library , commit migrates function call to `load` function to Evaluate library. Fix for #18306 --- examples/tensorflow/question-answering/requirements.txt | 1 + examples/tensorflow/question-answering/run_qa.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/tensorflow/question-answering/requirements.txt b/examples/tensorflow/question-answering/requirements.txt index 136ddf899b00c4..99aff2bb32b2bb 100644 --- a/examples/tensorflow/question-answering/requirements.txt +++ b/examples/tensorflow/question-answering/requirements.txt @@ -1,2 +1,3 @@ datasets >= 1.4.0 tensorflow >= 2.3.0 +evaluate >= 0.2.0 \ No newline at end of file diff --git a/examples/tensorflow/question-answering/run_qa.py b/examples/tensorflow/question-answering/run_qa.py index d5d869cfa20030..bd233f378a4dc9 100755 --- a/examples/tensorflow/question-answering/run_qa.py +++ b/examples/tensorflow/question-answering/run_qa.py @@ -26,8 +26,9 @@ from typing import Optional import tensorflow as tf -from datasets import load_dataset, load_metric +from datasets import load_dataset +import evaluate import transformers from transformers import ( AutoConfig, @@ -600,7 +601,7 @@ def post_processing_function(examples, features, predictions, stage="eval"): references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples] return EvalPrediction(predictions=formatted_predictions, label_ids=references) - metric = load_metric("squad_v2" if data_args.version_2_with_negative else "squad") + metric = evaluate.load("squad_v2" if data_args.version_2_with_negative else "squad") def compute_metrics(p: EvalPrediction): return metric.compute(predictions=p.predictions, references=p.label_ids) From cfc59fd5bd586400734a9461e6df5a2dca04828a Mon Sep 17 00:00:00 2001 From: VijayKalmath Date: Thu, 28 Jul 2022 18:00:28 +0000 Subject: [PATCH 3/3] Migrate `metric` to Evaluate for all tf examples Currently tensorflow examples use `load_metric` function from Datasets library , commit migrates function call to `load` function to Evaluate library. --- examples/tensorflow/summarization/requirements.txt | 3 +++ examples/tensorflow/summarization/run_summarization.py | 5 +++-- examples/tensorflow/text-classification/requirements.txt | 3 ++- examples/tensorflow/text-classification/run_glue.py | 5 +++-- examples/tensorflow/token-classification/requirements.txt | 3 +++ examples/tensorflow/token-classification/run_ner.py | 5 +++-- examples/tensorflow/translation/requirements.txt | 3 +++ examples/tensorflow/translation/run_translation.py | 5 +++-- 8 files changed, 23 insertions(+), 9 deletions(-) create mode 100644 examples/tensorflow/summarization/requirements.txt create mode 100644 examples/tensorflow/token-classification/requirements.txt create mode 100644 examples/tensorflow/translation/requirements.txt diff --git a/examples/tensorflow/summarization/requirements.txt b/examples/tensorflow/summarization/requirements.txt new file mode 100644 index 00000000000000..99aff2bb32b2bb --- /dev/null +++ b/examples/tensorflow/summarization/requirements.txt @@ -0,0 +1,3 @@ +datasets >= 1.4.0 +tensorflow >= 2.3.0 +evaluate >= 0.2.0 \ No newline at end of file diff --git a/examples/tensorflow/summarization/run_summarization.py b/examples/tensorflow/summarization/run_summarization.py index b8b97c93eab5d5..a265b4f20fc523 100644 --- a/examples/tensorflow/summarization/run_summarization.py +++ b/examples/tensorflow/summarization/run_summarization.py @@ -29,9 +29,10 @@ import nltk # Here to have a nice missing dependency error message early on import numpy as np import tensorflow as tf -from datasets import load_dataset, load_metric +from datasets import load_dataset from tqdm import tqdm +import evaluate import transformers from filelock import FileLock from transformers import ( @@ -634,7 +635,7 @@ def masked_sparse_categorical_crossentropy(y_true, y_pred): # endregion # region Metric - metric = load_metric("rouge") + metric = evaluate.load("rouge") # endregion # region Training diff --git a/examples/tensorflow/text-classification/requirements.txt b/examples/tensorflow/text-classification/requirements.txt index 03d42cc5c89b98..494a82127ab06d 100644 --- a/examples/tensorflow/text-classification/requirements.txt +++ b/examples/tensorflow/text-classification/requirements.txt @@ -1,4 +1,5 @@ datasets >= 1.1.3 sentencepiece != 0.1.92 protobuf -tensorflow >= 2.3 \ No newline at end of file +tensorflow >= 2.3 +evaluate >= 0.2.0 \ No newline at end of file diff --git a/examples/tensorflow/text-classification/run_glue.py b/examples/tensorflow/text-classification/run_glue.py index 5c312d87f6dda3..fe7ef66ece129c 100644 --- a/examples/tensorflow/text-classification/run_glue.py +++ b/examples/tensorflow/text-classification/run_glue.py @@ -24,8 +24,9 @@ import numpy as np import tensorflow as tf -from datasets import load_dataset, load_metric +from datasets import load_dataset +import evaluate import transformers from transformers import ( AutoConfig, @@ -366,7 +367,7 @@ def preprocess_function(examples): # endregion # region Metric function - metric = load_metric("glue", data_args.task_name) + metric = evaluate.load("glue", data_args.task_name) def compute_metrics(preds, label_ids): preds = preds["logits"] diff --git a/examples/tensorflow/token-classification/requirements.txt b/examples/tensorflow/token-classification/requirements.txt new file mode 100644 index 00000000000000..99aff2bb32b2bb --- /dev/null +++ b/examples/tensorflow/token-classification/requirements.txt @@ -0,0 +1,3 @@ +datasets >= 1.4.0 +tensorflow >= 2.3.0 +evaluate >= 0.2.0 \ No newline at end of file diff --git a/examples/tensorflow/token-classification/run_ner.py b/examples/tensorflow/token-classification/run_ner.py index 7eecf240cacd7a..cd4eea6feeb6dc 100644 --- a/examples/tensorflow/token-classification/run_ner.py +++ b/examples/tensorflow/token-classification/run_ner.py @@ -27,8 +27,9 @@ import datasets import numpy as np import tensorflow as tf -from datasets import ClassLabel, load_dataset, load_metric +from datasets import ClassLabel, load_dataset +import evaluate import transformers from transformers import ( CONFIG_MAPPING, @@ -478,7 +479,7 @@ def dummy_loss(y_true, y_pred): # endregion # Metrics - metric = load_metric("seqeval") + metric = evaluate.load("seqeval") def get_labels(y_pred, y_true): # Transform predictions and references tensos to numpy arrays diff --git a/examples/tensorflow/translation/requirements.txt b/examples/tensorflow/translation/requirements.txt new file mode 100644 index 00000000000000..99aff2bb32b2bb --- /dev/null +++ b/examples/tensorflow/translation/requirements.txt @@ -0,0 +1,3 @@ +datasets >= 1.4.0 +tensorflow >= 2.3.0 +evaluate >= 0.2.0 \ No newline at end of file diff --git a/examples/tensorflow/translation/run_translation.py b/examples/tensorflow/translation/run_translation.py index 91f24034cd554d..5db26d20ecad95 100644 --- a/examples/tensorflow/translation/run_translation.py +++ b/examples/tensorflow/translation/run_translation.py @@ -28,9 +28,10 @@ import datasets import numpy as np import tensorflow as tf -from datasets import load_dataset, load_metric +from datasets import load_dataset from tqdm import tqdm +import evaluate import transformers from transformers import ( AutoConfig, @@ -590,7 +591,7 @@ def masked_sparse_categorical_crossentropy(y_true, y_pred): # endregion # region Metric and postprocessing - metric = load_metric("sacrebleu") + metric = evaluate.load("sacrebleu") def postprocess_text(preds, labels): preds = [pred.strip() for pred in preds]