Fix CI experimental tests TypeError for GRPOWithReplayBufferTrainer.update_with_replay_buffer (#4366)

albertvillanova · qgallouedec · web-flow · commit 50b96e25a867 · 2025-10-31T17:24:47.000+01:00
Co-authored-by: Quentin Gallouédec &lt;gallouedec.quentin@gmail.com&gt;
diff --git a/tests/experimental/test_grpo_with_replay_buffer_trainer.py b/tests/experimental/test_grpo_with_replay_buffer_trainer.py
@@ -140,7 +140,7 @@ def _make_inputs(self, group_advantages, with_pixels=False, with_logprobs=False)
             "prompt_mask": torch.ones(4, 2, dtype=torch.long),
             "completion_ids": torch.tensor([[9, 10], [11, 12], [13, 14], [15, 16]]),
             "completion_mask": torch.ones(4, 2, dtype=torch.long),
-            "prompt_inputs": {"pixel_values": torch.randn(4, 3, 224, 224)} if with_pixels else {},
+            "forward_kwargs": {"pixel_values": torch.randn(4, 3, 224, 224)} if with_pixels else {},
             "old_per_token_logps": torch.randn(4, 2) if with_logprobs else None,
         }
         inputs["group_std_rewards"] = group_advantages.std(dim=1).expand_as(group_advantages)
@@ -217,7 +217,7 @@ def test_update_with_inputs_different_seq_len(self):
                 ]
             ),
             "completion_mask": torch.tensor([[1, 1, 0], [1, 1, 1], [1, 1, 0], [1, 1, 1]], dtype=torch.long),
-            "prompt_inputs": {},
+            "forward_kwargs": {},
         }
         inputs["group_std_rewards"] = group_advantages.std(dim=1).expand_as(group_advantages)
 

Original file line number	Diff line number	Diff line change
`@@ -140,7 +140,7 @@ def _make_inputs(self, group_advantages, with_pixels=False, with_logprobs=False)`
`140`	`140`	`"prompt_mask": torch.ones(4, 2, dtype=torch.long),`
`141`	`141`	`"completion_ids": torch.tensor([[9, 10], [11, 12], [13, 14], [15, 16]]),`
`142`	`142`	`"completion_mask": torch.ones(4, 2, dtype=torch.long),`
`143`		`- "prompt_inputs": {"pixel_values": torch.randn(4, 3, 224, 224)} if with_pixels else {},`
	`143`	`+ "forward_kwargs": {"pixel_values": torch.randn(4, 3, 224, 224)} if with_pixels else {},`
`144`	`144`	`"old_per_token_logps": torch.randn(4, 2) if with_logprobs else None,`
`145`	`145`	`}`
`146`	`146`	`inputs["group_std_rewards"] = group_advantages.std(dim=1).expand_as(group_advantages)`
`@@ -217,7 +217,7 @@ def test_update_with_inputs_different_seq_len(self):`
`217`	`217`	`]`
`218`	`218`	`),`
`219`	`219`	`"completion_mask": torch.tensor([[1, 1, 0], [1, 1, 1], [1, 1, 0], [1, 1, 1]], dtype=torch.long),`
`220`		`- "prompt_inputs": {},`
	`220`	`+ "forward_kwargs": {},`
`221`	`221`	`}`
`222`	`222`	`inputs["group_std_rewards"] = group_advantages.std(dim=1).expand_as(group_advantages)`
`223`	`223`