Skip to content

Commit

Permalink
Update convert_example in run_glue.py to use test data. (PaddlePaddle#91
Browse files Browse the repository at this point in the history
)
  • Loading branch information
guoshengCS authored Mar 9, 2021
1 parent 4c723b0 commit 3ff8c4c
Show file tree
Hide file tree
Showing 7 changed files with 17 additions and 19 deletions.
2 changes: 1 addition & 1 deletion benchmark/bert/run_glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def convert_example(example,
label = example['labels']
label = np.array([label], dtype=label_dtype)
# Convert raw text to feature
if len(example) == 2:
if (int(is_test) + len(example)) == 2:
example = tokenizer(example['sentence'], max_seq_len=max_seq_length)
else:
example = tokenizer(
Expand Down
2 changes: 1 addition & 1 deletion examples/benchmark/glue/run_glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def convert_example(example,
label = example['labels']
label = np.array([label], dtype=label_dtype)
# Convert raw text to feature
if len(example) == 2:
if (int(is_test) + len(example)) == 2:
example = tokenizer(example['sentence'], max_seq_len=max_seq_length)
else:
example = tokenizer(
Expand Down
24 changes: 11 additions & 13 deletions examples/language_model/bert/predict_glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@

import paddle
from paddle import inference
from paddlenlp.datasets import load_dataset
from paddlenlp.data import Stack, Tuple, Pad

from run_glue import convert_example, TASK_CLASSES, MODEL_CLASSES
from run_glue import convert_example, METRIC_CLASSES, MODEL_CLASSES


def parse_args():
Expand All @@ -33,7 +34,7 @@ def parse_args():
type=str,
required=True,
help="The name of the task to perform predict, selected in the list: " +
", ".join(TASK_CLASSES.keys()), )
", ".join(METRIC_CLASSES.keys()), )
parser.add_argument(
"--model_type",
default=None,
Expand Down Expand Up @@ -131,28 +132,25 @@ def main():
predictor = Predictor.create_predictor(args)

args.task_name = args.task_name.lower()
dataset_class, metric_class = TASK_CLASSES[args.task_name]
args.model_type = args.model_type.lower()
model_class, tokenizer_class = MODEL_CLASSES[args.model_type]

dataset = dataset_class.get_datasets("test")
tokenizer = tokenizer_class.from_pretrained(
os.path.dirname(args.model_path))
transform_fn = partial(
test_ds = load_dataset('glue', args.task_name, splits="test")
tokenizer = tokenizer_class.from_pretrained(args.model_name_or_path)

trans_func = partial(
convert_example,
tokenizer=tokenizer,
label_list=dataset.get_labels(),
label_list=test_ds.label_list,
max_seq_length=args.max_seq_length,
is_test=True)
test_ds = test_ds.map(trans_func)
batchify_fn = lambda samples, fn=Tuple(
Pad(axis=0, pad_val=tokenizer.pad_token_id), # input
Pad(axis=0, pad_val=tokenizer.pad_token_type_id), # segment
Stack(), # length
): [data for i, data in enumerate(fn(samples)) if i != 2]
dataset = dataset.apply(transform_fn)

): fn(samples)
predictor.predict(
dataset, batch_size=args.batch_size, collate_fn=batchify_fn)
test_ds, batch_size=args.batch_size, collate_fn=batchify_fn)


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion examples/language_model/bert/run_glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def convert_example(example,
label = example['labels']
label = np.array([label], dtype=label_dtype)
# Convert raw text to feature
if len(example) == 2:
if (int(is_test) + len(example)) == 2:
example = tokenizer(example['sentence'], max_seq_len=max_seq_length)
else:
example = tokenizer(
Expand Down
2 changes: 1 addition & 1 deletion examples/language_model/electra/run_glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def convert_example(example,
label = example['labels']
label = np.array([label], dtype=label_dtype)
# Convert raw text to feature
if len(example) == 2:
if (int(is_test) + len(example)) == 2:
example = tokenizer(example['sentence'], max_seq_len=max_seq_length)
else:
example = tokenizer(
Expand Down
2 changes: 1 addition & 1 deletion examples/language_model/xlnet/run_glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def convert_example(example,
label = example['labels']
label = np.array([label], dtype=label_dtype)
# Convert raw text to feature
if len(example) == 2:
if (int(is_test) + len(example)) == 2:
example = tokenizer(
example['sentence'],
max_seq_len=max_seq_length,
Expand Down
2 changes: 1 addition & 1 deletion examples/model_compression/ofa/run_glue_ofa.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ def convert_example(example,
label = example['labels']
label = np.array([label], dtype=label_dtype)
# Convert raw text to feature
if len(example) == 2:
if (int(is_test) + len(example)) == 2:
example = tokenizer(example['sentence'], max_seq_len=max_seq_length)
else:
example = tokenizer(
Expand Down

0 comments on commit 3ff8c4c

Please sign in to comment.