Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate metric to Evaluate library for tensorflow examples #18327

Merged
merged 5 commits into from
Jul 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/tensorflow/question-answering/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
datasets >= 1.4.0
tensorflow >= 2.3.0
evaluate >= 0.2.0
5 changes: 3 additions & 2 deletions examples/tensorflow/question-answering/run_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@
from typing import Optional

import tensorflow as tf
from datasets import load_dataset, load_metric
from datasets import load_dataset

import evaluate
import transformers
from transformers import (
AutoConfig,
Expand Down Expand Up @@ -600,7 +601,7 @@ def post_processing_function(examples, features, predictions, stage="eval"):
references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
return EvalPrediction(predictions=formatted_predictions, label_ids=references)

metric = load_metric("squad_v2" if data_args.version_2_with_negative else "squad")
metric = evaluate.load("squad_v2" if data_args.version_2_with_negative else "squad")

def compute_metrics(p: EvalPrediction):
return metric.compute(predictions=p.predictions, references=p.label_ids)
Expand Down
3 changes: 3 additions & 0 deletions examples/tensorflow/summarization/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
datasets >= 1.4.0
tensorflow >= 2.3.0
evaluate >= 0.2.0
5 changes: 3 additions & 2 deletions examples/tensorflow/summarization/run_summarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,10 @@
import nltk # Here to have a nice missing dependency error message early on
import numpy as np
import tensorflow as tf
from datasets import load_dataset, load_metric
from datasets import load_dataset
from tqdm import tqdm

import evaluate
import transformers
from filelock import FileLock
from transformers import (
Expand Down Expand Up @@ -634,7 +635,7 @@ def masked_sparse_categorical_crossentropy(y_true, y_pred):
# endregion

# region Metric
metric = load_metric("rouge")
metric = evaluate.load("rouge")
# endregion

# region Training
Expand Down
3 changes: 2 additions & 1 deletion examples/tensorflow/text-classification/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
datasets >= 1.1.3
sentencepiece != 0.1.92
protobuf
tensorflow >= 2.3
tensorflow >= 2.3
evaluate >= 0.2.0
5 changes: 3 additions & 2 deletions examples/tensorflow/text-classification/run_glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@

import numpy as np
import tensorflow as tf
from datasets import load_dataset, load_metric
from datasets import load_dataset

import evaluate
import transformers
from transformers import (
AutoConfig,
Expand Down Expand Up @@ -366,7 +367,7 @@ def preprocess_function(examples):
# endregion

# region Metric function
metric = load_metric("glue", data_args.task_name)
metric = evaluate.load("glue", data_args.task_name)

def compute_metrics(preds, label_ids):
preds = preds["logits"]
Expand Down
3 changes: 3 additions & 0 deletions examples/tensorflow/token-classification/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
datasets >= 1.4.0
tensorflow >= 2.3.0
evaluate >= 0.2.0
5 changes: 3 additions & 2 deletions examples/tensorflow/token-classification/run_ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@
import datasets
import numpy as np
import tensorflow as tf
from datasets import ClassLabel, load_dataset, load_metric
from datasets import ClassLabel, load_dataset

import evaluate
import transformers
from transformers import (
CONFIG_MAPPING,
Expand Down Expand Up @@ -478,7 +479,7 @@ def dummy_loss(y_true, y_pred):
# endregion

# Metrics
metric = load_metric("seqeval")
metric = evaluate.load("seqeval")

def get_labels(y_pred, y_true):
# Transform predictions and references tensos to numpy arrays
Expand Down
3 changes: 3 additions & 0 deletions examples/tensorflow/translation/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
datasets >= 1.4.0
tensorflow >= 2.3.0
evaluate >= 0.2.0
5 changes: 3 additions & 2 deletions examples/tensorflow/translation/run_translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@
import datasets
import numpy as np
import tensorflow as tf
from datasets import load_dataset, load_metric
from datasets import load_dataset
from tqdm import tqdm

import evaluate
import transformers
from transformers import (
AutoConfig,
Expand Down Expand Up @@ -590,7 +591,7 @@ def masked_sparse_categorical_crossentropy(y_true, y_pred):
# endregion

# region Metric and postprocessing
metric = load_metric("sacrebleu")
metric = evaluate.load("sacrebleu")

def postprocess_text(preds, labels):
preds = [pred.strip() for pred in preds]
Expand Down