From 4830a0786213b0dc15053bb2f55c37fba1a953ce Mon Sep 17 00:00:00 2001 From: Anna Shors Date: Tue, 10 Dec 2024 13:39:05 -0800 Subject: [PATCH 1/2] docs: add eval documentation (#428) Signed-off-by: ashors1 --- docs/user-guide/aligner-algo-header.rst | 4 +- docs/user-guide/evaluation.rst | 39 +++++++++++++++++++ .../nlp/data/sft/remove_long_dialogues.py | 2 +- 3 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 docs/user-guide/evaluation.rst diff --git a/docs/user-guide/aligner-algo-header.rst b/docs/user-guide/aligner-algo-header.rst index 15114dc02..a9e029784 100644 --- a/docs/user-guide/aligner-algo-header.rst +++ b/docs/user-guide/aligner-algo-header.rst @@ -1,4 +1,6 @@ .. important:: Before starting this tutorial, be sure to review the :ref:`introduction ` for tips on setting up your NeMo-Aligner environment. - If you run into any problems, refer to NeMo's `Known Issues page `__. The page enumerates known issues and provides suggested workarounds where appropriate. \ No newline at end of file + If you run into any problems, refer to NeMo's `Known Issues page `__. The page enumerates known issues and provides suggested workarounds where appropriate. + + After completing this tutorial, refer to the :ref:`evaluation documentation ` for tips on evaluating a trained model. \ No newline at end of file diff --git a/docs/user-guide/evaluation.rst b/docs/user-guide/evaluation.rst new file mode 100644 index 000000000..0922905a8 --- /dev/null +++ b/docs/user-guide/evaluation.rst @@ -0,0 +1,39 @@ +.. include:: /content/nemo.rsts + +.. _nemo-aligner-eval: + +Evaluate a Trained Model +@@@@@@@@@@@@@@@@@@@@@@@@ + +After training a model, you may want to run evaluation to understand how the model performs on unseen tasks. You can use Eleuther AI's `Language Model Evaluation Harness `_ +to quickly run a variety of popular benchmarks, including MMLU, SuperGLUE, HellaSwag, and WinoGrande. +A full list of supported tasks can be found `here `_. + +Install the LM Evaluation Harness +################################# + +Run the following commands inside of a NeMo container to install the LM Evaluation Harness: + +.. code-block:: bash + + git clone --depth 1 https://github.com/EleutherAI/lm-evaluation-harness + cd lm-evaluation-harness + pip install -e . + + +Run Evaluations +############### + +A detailed description of running evaluation with ``.nemo`` models can be found in Eleuther AI's `documentation `_. +Single- and multi-GPU evaluation is supported. The following is an example of running evaluation using 8 GPUs on the ``hellaswag``, ``super_glue``, and ``winogrande`` tasks using a ``.nemo`` file from NeMo-Aligner. +Please note that while it is recommended, you are not required to unzip your .nemo file before running evaluations. + +.. code-block:: bash + + mkdir unzipped_checkpoint + tar -xvf /path/to/model.nemo -c unzipped_checkpoint + + torchrun --nproc-per-node=8 --no-python lm_eval --model nemo_lm \ + --model_args path='unzipped_checkpoint',devices=8,tensor_model_parallel_size=8 \ + --tasks lambada_openai,super-glue-lm-eval-v1,winogrande \ + --batch_size 8 diff --git a/examples/nlp/data/sft/remove_long_dialogues.py b/examples/nlp/data/sft/remove_long_dialogues.py index 680f91606..95208f440 100644 --- a/examples/nlp/data/sft/remove_long_dialogues.py +++ b/examples/nlp/data/sft/remove_long_dialogues.py @@ -25,7 +25,7 @@ Usage: python3 remove_long_dialogues.py \ --tokenizer_path \ - --tokenizer_type sentencepiece + --tokenizer_type sentencepiece \ --dataset_file \ --output_file \ --seq_len From 2ead6bf14d37f776f82c3b3204b3542cef2b226b Mon Sep 17 00:00:00 2001 From: Anna Shors Date: Wed, 11 Dec 2024 10:28:37 -0800 Subject: [PATCH 2/2] fix: bug fix for KD + PP (#443) Signed-off-by: ashors1 --- .../models/nlp/gpt/megatron_gpt_knowledge_distillation.py | 6 ++++-- tests/functional/kd.sh | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/nemo_aligner/models/nlp/gpt/megatron_gpt_knowledge_distillation.py b/nemo_aligner/models/nlp/gpt/megatron_gpt_knowledge_distillation.py index b67d858ed..db93d29ec 100644 --- a/nemo_aligner/models/nlp/gpt/megatron_gpt_knowledge_distillation.py +++ b/nemo_aligner/models/nlp/gpt/megatron_gpt_knowledge_distillation.py @@ -72,7 +72,7 @@ def fwd_output_and_loss_func(dataloader_iter, model, checkpoint_activations_all_ required_keys.update(("tokens", "position_ids")) if parallel_state.is_pipeline_last_stage(): - required_keys.update(("labels", "loss_mask")) + required_keys.update(("labels", "loss_mask", "topk_logits", "topk_token_ids")) batch = {key: val.cuda(non_blocking=True) if key in required_keys else None for key, val in batch.items()} @@ -83,7 +83,9 @@ def fwd_output_and_loss_func(dataloader_iter, model, checkpoint_activations_all_ tokens = batch["tokens"] labels = batch["labels"] - loss_mask = batch["loss_mask"].clamp(min=0, max=1) + loss_mask = batch["loss_mask"] + if loss_mask is not None: + loss_mask = loss_mask.clamp(min=0, max=1) target_topk_logits = batch["topk_logits"] target_topk_token_ids = batch["topk_token_ids"] # Model forward pass diff --git a/tests/functional/kd.sh b/tests/functional/kd.sh index 83e472f52..fa7c2a4b9 100644 --- a/tests/functional/kd.sh +++ b/tests/functional/kd.sh @@ -83,7 +83,7 @@ torchrun --nproc-per-node 2 ${GPFS}/examples/nlp/gpt/train_gpt_knowledge_distill exp_manager.create_checkpoint_callback=False \ model.data.num_workers=2 \ ++model.tensor_model_parallel_size=1 \ - ++model.pipeline_model_parallel_size=1 \ + ++model.pipeline_model_parallel_size=2 \ exp_manager.explicit_log_dir=${RESULTS_DIR} \ ++model.activations_checkpoint_granularity=full \ ++model.activations_checkpoint_method=uniform \