Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix L0_backend_python on Jetson #5728

Merged
merged 4 commits into from
May 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions qa/L0_backend_python/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,8 @@ fi
prev_num_pages=`get_shm_pages`
run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
cat $SERVER_LOG
echo -e "\n***\n*** Failed to start $SERVER\n***"
exit 1
fi

Expand Down Expand Up @@ -176,8 +176,8 @@ prev_num_pages=`get_shm_pages`
# Triton non-graceful exit
run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
cat $SERVER_LOG
echo -e "\n***\n*** Failed to start $SERVER\n***"
exit 1
fi

Expand Down Expand Up @@ -216,8 +216,8 @@ if [ "$TEST_JETSON" == "0" ]; then
prev_num_pages=`get_shm_pages`
run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
cat $SERVER_LOG
echo -e "\n***\n*** Failed to start $SERVER\n***"
exit 1
fi

Expand Down Expand Up @@ -252,8 +252,8 @@ cp ../python_models/identity_fp32/config.pbtxt ./models/multi_file/
prev_num_pages=`get_shm_pages`
run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
cat $SERVER_LOG
echo -e "\n***\n*** Failed to start $SERVER\n***"
exit 1
fi

Expand Down Expand Up @@ -286,9 +286,9 @@ export MY_ENV="MY_ENV"
prev_num_pages=`get_shm_pages`
run_server
if [ "$SERVER_PID" == "0" ]; then
cat $SERVER_LOG
echo -e "\n***\n*** Failed to start $SERVER\n***"
echo -e "\n***\n*** Environment variable test failed \n***"
cat $SERVER_LOG
exit 1
fi

Expand All @@ -315,8 +315,8 @@ SERVER_ARGS="$BASE_SERVER_ARGS --backend-config=python,shm-default-byte-size=$sh

run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
cat $SERVER_LOG
echo -e "\n***\n*** Failed to start $SERVER\n***"
exit 1
fi

Expand Down Expand Up @@ -355,6 +355,7 @@ else
if grep "$ERROR_MESSAGE" $SERVER_LOG; then
echo -e "Found \"$ERROR_MESSAGE\"" >> $CLIENT_LOG
else
echo $CLIENT_LOG
echo -e "Not found \"$ERROR_MESSAGE\"" >> $CLIENT_LOG
RET=1
fi
Expand Down Expand Up @@ -444,7 +445,6 @@ fi
if [ $RET -eq 0 ]; then
echo -e "\n***\n*** Test Passed\n***"
else
cat $SERVER_LOG
echo -e "\n***\n*** Test FAILED\n***"
fi

Expand Down
17 changes: 15 additions & 2 deletions qa/common/shm_util.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -35,6 +35,7 @@
# By default, find tritonserver on "localhost", but can be overridden
# with TRITONSERVER_IPADDR envvar
_tritonserver_ipaddr = os.environ.get('TRITONSERVER_IPADDR', 'localhost')
_test_jetson = bool(int(os.environ.get('TEST_JETSON', 0)))


def _range_repr_dtype(dtype):
Expand Down Expand Up @@ -357,13 +358,17 @@ def __init__(self, shm_monitors):
self._shm_monitors = shm_monitors

def __enter__(self):
if _test_jetson:
return self
self._shm_region_free_sizes = []
for shm_monitor in self._shm_monitors:
self._shm_region_free_sizes.append(shm_monitor.free_memory())

return self

def __exit__(self, type, value, traceback):
if _test_jetson:
return
current_shm_sizes = []
for shm_monitor in self._shm_monitors:
current_shm_sizes.append(shm_monitor.free_memory())
Expand All @@ -379,6 +384,8 @@ def __exit__(self, type, value, traceback):
assert not shm_leak_detected, "Shared memory leak detected."

def __init__(self, prefix='triton_python_backend_shm_region'):
if _test_jetson:
return
import triton_shm_monitor
self._shm_monitors = []
shm_regions = listdir('/dev/shm')
Expand All @@ -388,4 +395,10 @@ def __init__(self, prefix='triton_python_backend_shm_region'):
triton_shm_monitor.SharedMemoryManager(shm_region))

def Probe(self):
return self.ShmLeakProbe(self._shm_monitors)
# Jetson cleanup takes too long and results in false positives.
# Do not use the shared memory check on Jetson.
nv-kmcgill53 marked this conversation as resolved.
Show resolved Hide resolved
# [DLIS-4876] Investigate how to re-enable shared memory check on Jetson.
if _test_jetson:
return self.ShmLeakProbe(None)
else:
return self.ShmLeakProbe(self._shm_monitors)