opendr-eu · minhquoc0712 · Oct 11, 2022 · Sep 29, 2022 · Sep 29, 2022 · Sep 29, 2022
@@ -14,40 +14,44 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import argparse
+import os
+import cv2
+import numpy as np
+import torch
 
 import rospy
-import torch
-import numpy as np
+import message_filters
 from sensor_msgs.msg import Image as ROS_Image
-from opendr_bridge import ROSBridge
-import os
-from opendr.perception.multimodal_human_centric import RgbdHandGestureLearner
-from opendr.engine.data import Image
 from vision_msgs.msg import Classification2D
-import message_filters
-import cv2
+
+from opendr.engine.data import Image
+from opendr.perception.multimodal_human_centric import RgbdHandGestureLearner
+from opendr_bridge import ROSBridge
 
 
 class RgbdHandGestureNode:
 
-    def __init__(self, input_image_topic="/usb_cam/image_raw", input_depth_image_topic="/usb_cam/image_raw",
-                 gesture_annotations_topic="/opendr/gestures", device="cuda"):
+    def __init__(self, input_rgb_image_topic="/kinect2/qhd/image_color_rect",
+                 input_depth_image_topic="/kinect2/qhd/image_depth_rect",
+                 output_gestures_topic="/opendr/gestures", device="cuda"):
         """
-        Creates a ROS Node for gesture recognition from RGBD
-        :param input_image_topic: Topic from which we are reading the input image
-        :type input_image_topic: str
+        Creates a ROS Node for gesture recognition from RGBD. Assuming that the following drivers have been installed:
+        https://github.com/OpenKinect/libfreenect2 and https://github.com/code-iai/iai_kinect2.
+        :param input_rgb_image_topic: Topic from which we are reading the input image
+        :type input_rgb_image_topic: str
         :param input_depth_image_topic: Topic from which we are reading the input depth image
         :type input_depth_image_topic: str
-        :param gesture_annotations_topic: Topic to which we are publishing the predicted gesture class
-        :type gesture_annotations_topic: str
+        :param output_gestures_topic: Topic to which we are publishing the predicted gesture class
+        :type output_gestures_topic: str
         :param device: device on which we are running inference ('cpu' or 'cuda')
         :type device: str
         """
 
-        self.gesture_publisher = rospy.Publisher(gesture_annotations_topic, Classification2D, queue_size=10)
+        self.gesture_publisher = rospy.Publisher(output_gestures_topic, Classification2D, queue_size=10)
 
-        image_sub = message_filters.Subscriber(input_image_topic, ROS_Image)
-        depth_sub = message_filters.Subscriber(input_depth_image_topic, ROS_Image)
+        image_sub = message_filters.Subscriber(input_rgb_image_topic, ROS_Image, queue_size=1, buff_size=10000000)
+        depth_sub = message_filters.Subscriber(input_depth_image_topic, ROS_Image, queue_size=1, buff_size=10000000)
         # synchronize image and depth data topics
         ts = message_filters.TimeSynchronizer([image_sub, depth_sub], 10)
         ts.registerCallback(self.callback)
@@ -73,20 +77,20 @@ def listen(self):
         rospy.loginfo("RGBD gesture recognition node started!")
         rospy.spin()
 
-    def callback(self, image_data, depth_data):
+    def callback(self, rgb_data, depth_data):
         """
         Callback that process the input data and publishes to the corresponding topics
-        :param image_data: input image message
-        :type image_data: sensor_msgs.msg.Image
+        :param rgb_data: input image message
+        :type rgb_data: sensor_msgs.msg.Image
         :param depth_data: input depth image message
         :type depth_data: sensor_msgs.msg.Image
         """
 
         # Convert sensor_msgs.msg.Image into OpenDR Image and preprocess
-        image = self.bridge.from_ros_image(image_data, encoding='bgr8')
+        rgb_image = self.bridge.from_ros_image(rgb_data, encoding='bgr8')
         depth_data.encoding = 'mono16'
         depth_image = self.bridge.from_ros_image_to_depth(depth_data, encoding='mono16')
-        img = self.preprocess(image, depth_image)
+        img = self.preprocess(rgb_image, depth_image)
 
         # Run gesture recognition
         gesture_class = self.gesture_learner.infer(img)
@@ -95,37 +99,58 @@ def callback(self, image_data, depth_data):
         ros_gesture = self.bridge.from_category_to_rosclass(gesture_class)
         self.gesture_publisher.publish(ros_gesture)
 
-    def preprocess(self, image, depth_img):
-        '''
-        Preprocess image, depth_image and concatenate them
-        :param image_data: input image
-        :type image_data: engine.data.Image
-        :param depth_data: input depth image
-        :type depth_data: engine.data.Image
-        '''
-        image = image.convert(format='channels_last') / (2**8 - 1)
-        depth_img = depth_img.convert(format='channels_last') / (2**16 - 1)
+    def preprocess(self, rgb_image, depth_image):
+        """
+        Preprocess rgb_image, depth_image and concatenate them
+        :param rgb_image: input RGB image
+        :type rgb_image: engine.data.Image
+        :param depth_image: input depth image
+        :type depth_image: engine.data.Image
+        """
+        rgb_image = rgb_image.convert(format='channels_last') / (2**8 - 1)
+        depth_image = depth_image.convert(format='channels_last') / (2**16 - 1)
 
         # resize the images to 224x224
-        image = cv2.resize(image, (224, 224))
-        depth_img = cv2.resize(depth_img, (224, 224))
+        rgb_image = cv2.resize(rgb_image, (224, 224))
+        depth_image = cv2.resize(depth_image, (224, 224))
 
         # concatenate and standardize
-        img = np.concatenate([image, np.expand_dims(depth_img, axis=-1)], axis=-1)
+        img = np.concatenate([rgb_image, np.expand_dims(depth_image, axis=-1)], axis=-1)
         img = (img - self.mean) / self.std
         img = Image(img, dtype=np.float32)
         return img
 
+
 if __name__ == '__main__':
+    # default topics are according to kinectv2 drivers at https://github.com/OpenKinect/libfreenect2
+    # and https://github.com/code-iai-iai_kinect2
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input_rgb_image_topic", help="Topic name for input rgb image",
+                        type=str, default="/kinect2/qhd/image_color_rect")
+    parser.add_argument("--input_depth_image_topic", help="Topic name for input depth image",
+                        type=str, default="/kinect2/qhd/image_depth_rect")
+    parser.add_argument("--output_gestures_topic", help="Topic name for predicted gesture class",
+                        type=str, default="/opendr/gestures")
+    parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda",
+                        choices=["cuda", "cpu"])
+
+    args = parser.parse_args()
+
     # Select the device for running
     try:
-        device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        if args.device == "cuda" and torch.cuda.is_available():
+            device = "cuda"
+        elif args.device == "cuda":
+            print("GPU not found. Using CPU instead.")
+            device = "cpu"
+        else:
+            print("Using CPU")
+            device = "cpu"
     except:
-        device = 'cpu'
+        print("Using CPU")
+        device = "cpu"
 
-    # default topics are according to kinectv2 drivers at https://github.com/OpenKinect/libfreenect2
-    # and https://github.com/code-iai-iai_kinect2
-    depth_topic = "/kinect2/qhd/image_depth_rect"
-    image_topic = "/kinect2/qhd/image_color_rect"
-    gesture_node = RgbdHandGestureNode(input_image_topic=image_topic, input_depth_image_topic=depth_topic, device=device)
+    gesture_node = RgbdHandGestureNode(input_rgb_image_topic=args.input_rgb_image_topic,
+                                       input_depth_image_topic=args.input_depth_image_topic,
+                                       output_gestures_topic=args.output_gestures_topic, device=device)
     gesture_node.listen()