agentscope-ai · yanxi-chen · Jul 30, 2025 · Jul 30, 2025 · Jul 30, 2025
diff --git a/examples/grpo_math/README.md b/examples/grpo_math/README.md
@@ -1,6 +1,14 @@
-# Example: PPO on MATH dataset
+# Example: GRPO on MATH dataset
+
+This example shows the usage of [RM-Gallery](https://github.com/modelscope/RM-Gallery/) by running GRPO on a MATH dataset. You need to install RM-Gallery first.
+The dataset is organized as:
+
+```jsonl
+
+{"question": "what is 2+2?", "gt_answer": 4}
+{"question": "what is 2+3?", "gt_answer": 5}
+```
 
-This example shows the usage of PPO on the MATH dataset, adapted from [simpleRL](https://github.com/hkust-nlp/simpleRL-reason/tree/v0).
 
 For more detailed information, please refer to the [documentation](../../docs/sphinx_doc/source/tutorial/example_reasoning_basic.md).
 

diff --git a/trinity/common/rewards/reward_fn.py b/trinity/common/rewards/reward_fn.py
@@ -69,7 +69,7 @@ def _build_sample_from_experience(
         ]
 
         sample = DataSample(
-            unique_id=experience.unique_id,
+            unique_id=experience.eid.uid,
             input=to_rm_gallery_messages(messages),
             output=output,
             metadata=experience.info,