diff --git a/README.md b/README.md index bf034e5..efc5905 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,15 @@ SERL provides a set of libraries, env wrappers, and examples to train RL policie - [Contribution](#contribution) - [Citation](#citation) -## Major bug fix +## Major updates +#### June 24, 2024 +For people who use SERL for tasks involving controlling the gripper (e.g.,pick up objects), we strong recommend adding a small penalty to the gripper action change, as it will greatly improves the training speed. +For detail, please refer to: [PR #65](https://github.com/rail-berkeley/serl/pull/65). + + +Further, we also recommend providing interventions online during training in addition to loading the offline demos. If you have a Franka robot and SpaceMouse, this can be as easy as just touching the SpaceMouse during training. + +#### April 25, 2024 We fixed a major issue in the intervention action frame. See release [v0.1.1](https://github.com/rail-berkeley/serl/releases/tag/v0.1.1) Please update your code with the main branch. ## Installation diff --git a/serl_robot_infra/franka_env/envs/bin_relocation_env/config.py b/serl_robot_infra/franka_env/envs/bin_relocation_env/config.py index 6bd6c8a..f4887a0 100644 --- a/serl_robot_infra/franka_env/envs/bin_relocation_env/config.py +++ b/serl_robot_infra/franka_env/envs/bin_relocation_env/config.py @@ -21,7 +21,7 @@ class BinEnvConfig(DefaultEnvConfig): ] ) RESET_POSE = TARGET_POSE + np.array([0.0, 0.0, 0.1, 0.0, 0.0, 0.0]) - REWARD_THRESHOLD: np.ndarray = np.array([0.01, 0.01, 0.01, 0.2, 0.2, 0.2]) + REWARD_THRESHOLD: np.ndarray = np.zeros(6) ACTION_SCALE = np.array([0.05, 0.1, 1]) RANDOM_RESET = False RANDOM_XY_RANGE = 0.1 diff --git a/serl_robot_infra/franka_env/envs/cable_env/config.py b/serl_robot_infra/franka_env/envs/cable_env/config.py index c8d53ad..8cf4603 100644 --- a/serl_robot_infra/franka_env/envs/cable_env/config.py +++ b/serl_robot_infra/franka_env/envs/cable_env/config.py @@ -21,7 +21,8 @@ class CableEnvConfig(DefaultEnvConfig): ] ) RESET_POSE = TARGET_POSE + np.array([0.0, 0.0, 0.1, 0.0, 0.0, 0.0]) - REWARD_THRESHOLD: np.ndarray = np.array([0.01, 0.01, 0.01, 0.2, 0.2, 0.2]) + REWARD_THRESHOLD: np.ndarray = np.zeros(6) + APPLY_GRIPPER_PENALTY = False ACTION_SCALE = np.array([0.05, 0.3, 1]) RANDOM_RESET = True RANDOM_XY_RANGE = 0.1 diff --git a/serl_robot_infra/franka_env/envs/cable_env/franka_cable_route.py b/serl_robot_infra/franka_env/envs/cable_env/franka_cable_route.py index 7a562bd..d6d3b63 100644 --- a/serl_robot_infra/franka_env/envs/cable_env/franka_cable_route.py +++ b/serl_robot_infra/franka_env/envs/cable_env/franka_cable_route.py @@ -19,6 +19,7 @@ def go_to_rest(self, joint_reset=False): Move to the rest position defined in base class. Add a small z offset before going to rest to avoid collision with object. """ + self._send_gripper_command(-1) self._update_currpos() self._send_pos_command(self.currpos) time.sleep(0.5) diff --git a/serl_robot_infra/franka_env/envs/franka_env.py b/serl_robot_infra/franka_env/envs/franka_env.py index e94683c..66d3f30 100644 --- a/serl_robot_infra/franka_env/envs/franka_env.py +++ b/serl_robot_infra/franka_env/envs/franka_env.py @@ -59,6 +59,9 @@ class DefaultEnvConfig: ABS_POSE_LIMIT_LOW = np.zeros((6,)) COMPLIANCE_PARAM: Dict[str, float] = {} PRECISION_PARAM: Dict[str, float] = {} + BINARY_GRIPPER_THREASHOLD: float = 0.5 + APPLY_GRIPPER_PENALTY: bool = True + GRIPPER_PENALTY: float = 0.1 ############################################################################## @@ -94,6 +97,7 @@ def __init__( self.currjacobian = np.zeros((6, 7)) self.curr_gripper_pos = 0 + self.gripper_binary_state = 0 # 0 for open, 1 for closed self.lastsent = time.time() self.randomreset = config.RANDOM_RESET self.random_xy_range = config.RANDOM_XY_RANGE @@ -201,7 +205,7 @@ def step(self, action: np.ndarray) -> tuple: gripper_action = action[6] * self.action_scale[2] - self._send_gripper_command(gripper_action) + gripper_action_effective = self._send_gripper_command(gripper_action) self._send_pos_command(self.clip_safety_box(self.nextpos)) self.curr_path_length += 1 @@ -210,11 +214,11 @@ def step(self, action: np.ndarray) -> tuple: self._update_currpos() ob = self._get_obs() - reward = self.compute_reward(ob) - done = self.curr_path_length >= self.max_episode_length or reward - return ob, int(reward), done, False, {} + reward = self.compute_reward(ob, gripper_action_effective) + done = self.curr_path_length >= self.max_episode_length or reward == 1 + return ob, reward, done, False, {} - def compute_reward(self, obs) -> bool: + def compute_reward(self, obs, gripper_action_effective) -> bool: """We are using a sparse reward function.""" current_pose = obs["state"]["tcp_pose"] # convert from quat to euler first @@ -223,10 +227,15 @@ def compute_reward(self, obs) -> bool: current_pose = np.hstack([current_pose[:3], euler_angles]) delta = np.abs(current_pose - self._TARGET_POSE) if np.all(delta < self._REWARD_THRESHOLD): - return True + reward = 1 else: # print(f'Goal not reached, the difference is {delta}, the desired threshold is {_REWARD_THRESHOLD}') - return False + reward = 0 + + if self.config.APPLY_GRIPPER_PENALTY and gripper_action_effective: + reward -= self.config.GRIPPER_PENALTY + + return reward def crop_image(self, name, image) -> np.ndarray: """Crop realsense images to be a square.""" @@ -379,12 +388,24 @@ def _send_pos_command(self, pos: np.ndarray): def _send_gripper_command(self, pos: float, mode="binary"): """Internal function to send gripper command to the robot.""" if mode == "binary": - if (pos >= -1) and (pos <= -0.9): # close gripper + if ( + pos <= -self.config.BINARY_GRIPPER_THREASHOLD + and self.gripper_binary_state == 0 + ): # close gripper requests.post(self.url + "close_gripper") - elif (pos >= 0.9) and (pos <= 1): # open gripper + time.sleep(0.6) + self.gripper_binary_state = 1 + return True + elif ( + pos >= self.config.BINARY_GRIPPER_THREASHOLD + and self.gripper_binary_state == 1 + ): # open gripper requests.post(self.url + "open_gripper") + time.sleep(0.6) + self.gripper_binary_state = 0 + return True else: # do nothing to the gripper - return + return False elif mode == "continuous": raise NotImplementedError("Continuous gripper control is optional") diff --git a/serl_robot_infra/franka_env/envs/pcb_env/config.py b/serl_robot_infra/franka_env/envs/pcb_env/config.py index 8d87e5a..0255098 100644 --- a/serl_robot_infra/franka_env/envs/pcb_env/config.py +++ b/serl_robot_infra/franka_env/envs/pcb_env/config.py @@ -22,6 +22,7 @@ class PCBEnvConfig(DefaultEnvConfig): ) RESET_POSE = TARGET_POSE + np.array([0.0, 0.0, 0.04, 0.0, 0.0, 0.0]) REWARD_THRESHOLD = [0.003, 0.003, 0.001, 0.1, 0.1, 0.1] + APPLY_GRIPPER_PENALTY = False ACTION_SCALE = (0.02, 0.2, 1) RANDOM_RESET = True RANDOM_XY_RANGE = 0.05 diff --git a/serl_robot_infra/franka_env/envs/pcb_env/franka_pcb_insert.py b/serl_robot_infra/franka_env/envs/pcb_env/franka_pcb_insert.py index 46aa11d..8e99700 100644 --- a/serl_robot_infra/franka_env/envs/pcb_env/franka_pcb_insert.py +++ b/serl_robot_infra/franka_env/envs/pcb_env/franka_pcb_insert.py @@ -27,6 +27,7 @@ def go_to_rest(self, joint_reset=False): Move to the rest position defined in base class. Add a small z offset before going to rest to avoid collision with object. """ + self._send_gripper_command(-1) self._update_currpos() self._send_pos_command(self.currpos) time.sleep(0.5) diff --git a/serl_robot_infra/franka_env/envs/peg_env/config.py b/serl_robot_infra/franka_env/envs/peg_env/config.py index 37241e3..d2bcae9 100644 --- a/serl_robot_infra/franka_env/envs/peg_env/config.py +++ b/serl_robot_infra/franka_env/envs/peg_env/config.py @@ -22,6 +22,7 @@ class PegEnvConfig(DefaultEnvConfig): ) RESET_POSE = TARGET_POSE + np.array([0.0, 0.0, 0.1, 0.0, 0.0, 0.0]) REWARD_THRESHOLD: np.ndarray = np.array([0.01, 0.01, 0.01, 0.2, 0.2, 0.2]) + APPLY_GRIPPER_PENALTY = False ACTION_SCALE = np.array([0.02, 0.1, 1]) RANDOM_RESET = True RANDOM_XY_RANGE = 0.05 diff --git a/serl_robot_infra/franka_env/envs/peg_env/franka_peg_insert.py b/serl_robot_infra/franka_env/envs/peg_env/franka_peg_insert.py index 07f98e4..25f602f 100644 --- a/serl_robot_infra/franka_env/envs/peg_env/franka_peg_insert.py +++ b/serl_robot_infra/franka_env/envs/peg_env/franka_peg_insert.py @@ -18,6 +18,7 @@ def go_to_rest(self, joint_reset=False): Move to the rest position defined in base class. Add a small z offset before going to rest to avoid collision with object. """ + self._send_gripper_command(-1) self._update_currpos() self._send_pos_command(self.currpos) time.sleep(0.5) diff --git a/serl_robot_infra/franka_env/envs/wrappers.py b/serl_robot_infra/franka_env/envs/wrappers.py index d803218..af69605 100644 --- a/serl_robot_infra/franka_env/envs/wrappers.py +++ b/serl_robot_infra/franka_env/envs/wrappers.py @@ -49,8 +49,9 @@ def compute_reward(self, obs): def step(self, action): obs, rew, done, truncated, info = self.env.step(action) - rew = self.compute_reward(self.env.get_front_cam_obs()) - done = done or rew + success = self.compute_reward(self.env.get_front_cam_obs()) + rew += success + done = done or success return obs, rew, done, truncated, info @@ -72,8 +73,9 @@ def compute_reward(self, obs): def step(self, action): obs, rew, done, truncated, info = self.env.step(action) - rew = self.compute_reward(self.env.get_front_cam_obs()) - done = done or rew + success = self.compute_reward(self.env.get_front_cam_obs()) + rew += success + done = done or success return obs, rew, done, truncated, info @@ -94,8 +96,9 @@ def compute_reward(self, obs): def step(self, action): obs, rew, done, truncated, info = self.env.step(action) - rew = self.compute_reward(obs) - done = done or rew + success = self.compute_reward(obs) + rew += success + done = done or success return obs, rew, done, truncated, info