Skip to content

Commit

Permalink
Add testing for GPU tensor error handling
Browse files Browse the repository at this point in the history
  • Loading branch information
Tabrizian committed Jun 1, 2023
1 parent 3d8d517 commit 0a1b2a3
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 0 deletions.
66 changes: 66 additions & 0 deletions qa/L0_backend_python/python_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import os

from tritonclient.utils import *
import tritonclient.utils.cuda_shared_memory as cuda_shared_memory
import tritonclient.http as httpclient


Expand All @@ -59,6 +60,13 @@ def _infer_help(self, model_name, shape, data_type):
output0 = result.as_numpy('OUTPUT0')
self.assertTrue(np.all(input_data_0 == output0))

def _create_cuda_region(self, client, size, name):
shm0_handle = cuda_shared_memory.create_shared_memory_region(
name, byte_size=size, device_id=0)
client.register_cuda_shared_memory(
name, cuda_shared_memory.get_raw_handle(shm0_handle), 0, size)
return shm0_handle

def _optional_input_infer(self, model_name, has_input0, has_input1):
with httpclient.InferenceServerClient("localhost:8000") as client:
shape = (1,)
Expand Down Expand Up @@ -144,6 +152,64 @@ def test_growth_error(self):
with self._shm_leak_detector.Probe() as shm_probe:
self._infer_help(model_name, shape, dtype)

# CUDA Shared memory is not supported on jetson
def test_gpu_tensor_error(self):
model_name = 'identity_bool'
with httpclient.InferenceServerClient("localhost:8000") as client:
input_data = np.array([[True] * 1000], dtype=bool)
inputs = [
httpclient.InferInput("INPUT0", input_data.shape,
np_to_triton_dtype(input_data.dtype))
]
inputs[0].set_data_from_numpy(input_data)

requested_outputs = [httpclient.InferRequestedOutput('OUTPUT0')]

# intentionally create a shared memory region with not enough size.
client.unregister_cuda_shared_memory()
shm0_handle = self._create_cuda_region(client, 1,
'output0_data')

requested_outputs[0].set_shared_memory('output0_data', 1)
with self.assertRaises(InferenceServerException) as ex:
client.infer(model_name, inputs, outputs=requested_outputs)
self.assertIn(
"should be at least 1000 bytes to hold the results",
str(ex.exception))
client.unregister_cuda_shared_memory()
cuda_shared_memory.destroy_shared_memory_region(shm0_handle)

def test_dlpack_tensor_error(self):
model_name = 'dlpack_identity'
with httpclient.InferenceServerClient("localhost:8000") as client:
input_data = np.array([[1] * 1000], dtype=np.float32)
inputs = [
httpclient.InferInput("INPUT0", input_data.shape,
np_to_triton_dtype(input_data.dtype))
]

requested_outputs = [httpclient.InferRequestedOutput('OUTPUT0')]
input_data_size = input_data.itemsize * input_data.size
client.unregister_cuda_shared_memory()
input_region = self._create_cuda_region(client, input_data_size,
'input0_data')
inputs[0].set_shared_memory('input0_data', input_data_size)
cuda_shared_memory.set_shared_memory_region(
input_region, [input_data])

# Intentionally create a small region to trigger an error
shm0_handle = self._create_cuda_region(client, 1,
'output0_data')
requested_outputs[0].set_shared_memory('output0_data', 1)

with self.assertRaises(InferenceServerException) as ex:
client.infer(model_name, inputs, outputs=requested_outputs)
self.assertIn(
"should be at least 4000 bytes to hold the results",
str(ex.exception))
client.unregister_cuda_shared_memory()
cuda_shared_memory.destroy_shared_memory_region(shm0_handle)

def test_async_infer(self):
model_name = "identity_uint8"
request_parallelism = 4
Expand Down
4 changes: 4 additions & 0 deletions qa/L0_backend_python/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,10 @@ mkdir -p models/string_fixed/1/
cp ../python_models/string_fixed/model.py ./models/string_fixed/1/
cp ../python_models/string_fixed/config.pbtxt ./models/string_fixed

mkdir -p models/dlpack_identity/1/
cp ../python_models/dlpack_identity/model.py ./models/dlpack_identity/1/
cp ../python_models/dlpack_identity/config.pbtxt ./models/dlpack_identity

# Skip torch install on Jetson since it is already installed.
if [ "$TEST_JETSON" == "0" ]; then
pip3 install torch==1.13.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
Expand Down

0 comments on commit 0a1b2a3

Please sign in to comment.