fix onnxruntime wrapper for gpu inference (#123)

* fix ncnn wrapper for ort-gpu * resolve comment * fix lint
open-mmlab · Feb 8, 2022 · 9f9670e · 9f9670e
1 parent 51fa2ff
commit 9f9670e
Showing 1 changed file with 4 additions and 2 deletions.
diff --git a/mmdeploy/backend/onnxruntime/wrapper.py b/mmdeploy/backend/onnxruntime/wrapper.py
@@ -27,7 +27,7 @@ class ORTWrapper(BaseWrapper):
          >>> import torch
          >>>
          >>> onnx_file = 'model.onnx'
-         >>> model = ORTWrapper(onnx_file, -1)
+         >>> model = ORTWrapper(onnx_file, 'cpu')
          >>> inputs = dict(input=torch.randn(1, 3, 224, 224, device='cpu'))
          >>> outputs = model(inputs)
          >>> print(outputs)
@@ -79,7 +79,9 @@ def forward(self, inputs: Dict[str,
             input_tensor = input_tensor.contiguous()
             if not self.is_cuda_available:
                 input_tensor = input_tensor.cpu()
-            element_type = input_tensor.numpy().dtype
+            # Avoid unnecessary data transfer between host and device
+            element_type = input_tensor.new_zeros(
+                1, device='cpu').numpy().dtype
             self.io_binding.bind_input(
                 name=name,
                 device_type=self.device_type,