From 9d80f84bc101282046707d55ed2b1ef490f31a80 Mon Sep 17 00:00:00 2001 From: Huiying Date: Fri, 22 Nov 2024 15:12:47 -0800 Subject: [PATCH] add metric calc (#11381) Signed-off-by: HuiyingLi --- .../llama-3/nemo2-sft-peft/nemo2-peft.ipynb | 25 +++++++++++++++++++ .../llama-3/nemo2-sft-peft/nemo2-sft.ipynb | 25 +++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/tutorials/llm/llama-3/nemo2-sft-peft/nemo2-peft.ipynb b/tutorials/llm/llama-3/nemo2-sft-peft/nemo2-peft.ipynb index cd3bae1cc627..aa463e2b84be 100644 --- a/tutorials/llm/llama-3/nemo2-sft-peft/nemo2-peft.ipynb +++ b/tutorials/llm/llama-3/nemo2-sft-peft/nemo2-peft.ipynb @@ -499,6 +499,31 @@ "```" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 5. Calculate Evaluation Metrics\n", + "\n", + "We can evaluate the model's predictions by calculating the Exact Match (EM) and F1 scores.\n", + "- Exact Match is a binary measure (0 or 1) checking if the model outputs match one of the\n", + "ground truth answer exactly.\n", + "- F1 score is the harmonic mean of precision and recall for the answer words.\n", + "\n", + "Below is a script that computes these metrics. The sample scores can be improved by training the model further and performing hyperparameter tuning. In this notebook, we only train for 20 steps.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "!python /opt/NeMo/scripts/metric_calculation/peft_metric_calc.py --pred_file peft_prediction.jsonl --label_field \"original_answers\" --pred_field \"prediction\"" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/tutorials/llm/llama-3/nemo2-sft-peft/nemo2-sft.ipynb b/tutorials/llm/llama-3/nemo2-sft-peft/nemo2-sft.ipynb index 479d81928e98..e84ff916fc4e 100644 --- a/tutorials/llm/llama-3/nemo2-sft-peft/nemo2-sft.ipynb +++ b/tutorials/llm/llama-3/nemo2-sft-peft/nemo2-sft.ipynb @@ -606,6 +606,31 @@ "{\"input\": \"Muckle Water is a long, narrow fresh water loch on Ward Hill on Rousay, Orkney, Scotland. It is the biggest loch on the island and is popular for fishing. It can be reached by a track from the roadside. The Suso Burn on the north eastern shore drains the loch into the Sound of Rousay.\\n\\nWhere is Muckle Water?\", \"category\": \"closed_qa\", \"label\": \"Muckle water is located in Rousay, Orkney, Scotland.\", \"prediction\": \" Muckle Water is a long, narrow fresh water loch on Ward Hill on Rousay,\"}\n", "```" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 5. Calculate Evaluation Metrics\n", + "\n", + "We can evaluate the model's predictions by calculating the Exact Match (EM) and F1 scores.\n", + "- Exact Match is a binary measure (0 or 1) checking if the model outputs match one of the\n", + "ground truth answer exactly.\n", + "- F1 score is the harmonic mean of precision and recall for the answer words.\n", + "\n", + "Below is a script that computes these metrics. The sample scores can be improved by training the model further and performing hyperparameter tuning. In this notebook, we only train for 20 steps." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "!python /opt/NeMo/scripts/metric_calculation/peft_metric_calc.py --pred_file sft_prediction.jsonl --label_field \"label\" --pred_field \"prediction\"" + ] } ], "metadata": {