Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support pp accuracy calculation #9379

Merged
merged 11 commits into from
Nov 29, 2024
39 changes: 38 additions & 1 deletion paddlenlp/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
from packaging import version
from paddle import framework

from paddlenlp.utils import infohub

try:
from paddle.base import core
except:
Expand Down Expand Up @@ -3172,7 +3174,13 @@ def evaluation_loop(

# Metrics!
if self.compute_metrics is not None and all_preds is not None and all_labels is not None:
metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
# all_labels maybe is a tuple when prediction_steps output label_mask
if isinstance(all_labels, (list, tuple)):
# compute_metrics in train.py
metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels[0]))
else:
# compute_metrics in modeling.py
metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
else:
metrics = {}

Expand Down Expand Up @@ -3268,6 +3276,17 @@ def prediction_pipeline_step(
labels = None
inputs = inputs.pop("input_ids")

# evaluation dont support drop last,
# so set the `accumulate_steps` to actually
# eval batch size.
model_config_backup = model.accumulate_steps
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个命名是不是不太规范? 很明显这个又不是一个model config

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if isinstance(inputs, tuple):
input_ids = inputs[0]
else:
input_ids = inputs

model.accumulate_steps = input_ids.shape[0]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

要不就要把model.micro_batch_size直接设为1


with paddle.no_grad():
if has_labels:
with self.autocast_smart_context_manager():
Expand All @@ -3276,9 +3295,25 @@ def prediction_pipeline_step(
loss = loss.mean().detach()
else:
raise ValueError("pipeline mode eval need label!")
# reset the `accumulate_steps`.
model.accumulate_steps = model_config_backup

return (loss, None, labels)

def prediction_pipeline_step_with_logits_acc(
self,
*args,
**kwargs,
):
loss, _, labels = self.prediction_pipeline_step(*args, **kwargs)
logits = None
if "pp_logits" in infohub:
logits = paddle.concat(infohub["pp_logits"], axis=0)
logits = logits._copy_to(paddle.framework._current_expected_place(), False)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里拷贝的原因是pp_logits是放在cpu memory 或者 cuda pin memory?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

是的,如果这里不放在cpu或者 pin memory 会在 concat 的时候造成增加两倍 logits 大小的峰值显存,导致 OOM

infohub["pp_logits"] = []

return (loss, logits, labels)

def prediction_step(
self,
model: nn.Layer,
Expand Down Expand Up @@ -3312,6 +3347,8 @@ def prediction_step(
if self.args.pipeline_parallel_degree > 1:
# hack for pipeline mode
inputs = self._prepare_inputs(inputs)
if self.args.metric_for_best_model == "accuracy":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个建议不要放在trainer,放在SFTTrainer更加合理

return self.prediction_pipeline_step_with_logits_acc(model, inputs, prediction_loss_only, ignore_keys)
return self.prediction_pipeline_step(model, inputs, prediction_loss_only, ignore_keys)

has_labels = all(inputs.get(k) is not None for k in self.label_names)
Expand Down
1 change: 1 addition & 0 deletions tests/trainer/test_unified_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
"fp16_opt_level": "O2",
"max_grad_norm": 1.0,
"dataloader_num_workers": 0,
"metric_for_best_model": "accuracy",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

后续也在开源模型适配

"continue_training": 0,
"do_train": "true",
"do_eval": "false",
Expand Down
Loading