From efda82e7b55c7c86bc0e8fcdcbb992b9d8a2f530 Mon Sep 17 00:00:00 2001 From: Mark Holmstrom Date: Tue, 10 Sep 2024 20:45:10 -0600 Subject: [PATCH 1/2] Update model_wrappers.py Add hybrid action space support to action noise wrapper. --- ding/model/wrapper/model_wrappers.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ding/model/wrapper/model_wrappers.py b/ding/model/wrapper/model_wrappers.py index e427587327..61169968a5 100644 --- a/ding/model/wrapper/model_wrappers.py +++ b/ding/model/wrapper/model_wrappers.py @@ -866,10 +866,13 @@ def forward(self, *args, **kwargs): assert isinstance(output, dict), "model output must be dict, but find {}".format(type(output)) if 'action' in output or 'action_args' in output: key = 'action' if 'action' in output else 'action_args' - action = output[key] + action = output[key]['action_args'] if isinstance(output[key], dict) else output[key] assert isinstance(action, torch.Tensor) action = self.add_noise(action) - output[key] = action + if isinstance(output[key], dict): + output[key]['action_args'] = action + else: + output[key] = action return output def add_noise(self, action: torch.Tensor) -> torch.Tensor: From 70839ba5ed100891a46b69a20fbe41c7ef6c557e Mon Sep 17 00:00:00 2001 From: Mark Holmstrom Date: Tue, 17 Sep 2024 23:47:26 -0600 Subject: [PATCH 2/2] Updated syntax and added comment --- ding/model/wrapper/model_wrappers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ding/model/wrapper/model_wrappers.py b/ding/model/wrapper/model_wrappers.py index 61169968a5..94f5b86ac4 100644 --- a/ding/model/wrapper/model_wrappers.py +++ b/ding/model/wrapper/model_wrappers.py @@ -866,7 +866,8 @@ def forward(self, *args, **kwargs): assert isinstance(output, dict), "model output must be dict, but find {}".format(type(output)) if 'action' in output or 'action_args' in output: key = 'action' if 'action' in output else 'action_args' - action = output[key]['action_args'] if isinstance(output[key], dict) else output[key] + # handle hybrid action space by adding noise to continuous part of model output + action = output[key]['action_args'] if isinstance(output[key], dict) else output[key] assert isinstance(action, torch.Tensor) action = self.add_noise(action) if isinstance(output[key], dict):