From cee57386ac9de0d145fe1a14196b7af4b7b5cc81 Mon Sep 17 00:00:00 2001 From: Olga Andreeva Date: Wed, 5 Jul 2023 16:53:29 -0700 Subject: [PATCH] Revised according to comments, refactored test, added ensemble check --- docs/user_guide/trace.md | 1 + qa/L0_trace/opentelemetry_unittest.py | 89 +++++++++++++++++++++------ qa/L0_trace/test.sh | 58 +++++++---------- qa/L0_trace/trace_collector.py | 66 -------------------- src/tracer.cc | 19 +++--- src/tracer.h | 8 +-- 6 files changed, 110 insertions(+), 131 deletions(-) delete mode 100644 qa/L0_trace/trace_collector.py diff --git a/docs/user_guide/trace.md b/docs/user_guide/trace.md index 1188a10edf0..473ce8075c0 100644 --- a/docs/user_guide/trace.md +++ b/docs/user_guide/trace.md @@ -430,6 +430,7 @@ The meaning of the trace timestamps is: ## OpenTelemetry trace support Triton provides an option to generate and export traces +for standalone and ensemble models using [OpenTelemetry APIs and SDKs](https://opentelemetry.io/). To specify OpenTelemetry mode for tracing, specify the `--trace-config` diff --git a/qa/L0_trace/opentelemetry_unittest.py b/qa/L0_trace/opentelemetry_unittest.py index daaffe192e3..24901c006ca 100644 --- a/qa/L0_trace/opentelemetry_unittest.py +++ b/qa/L0_trace/opentelemetry_unittest.py @@ -27,28 +27,38 @@ import sys sys.path.append("../common") - import json import unittest - +import tritonclient.http as httpclient +import tritonclient.grpc as grpcclient +import numpy as np import test_util as tu +import time +EXPECTED_NUM_SPANS = 10 class OpenTelemetryTest(tu.TestResultCollector): def setUp(self): - with open('trace_collector.log', 'rt') as f: - data = f.read() - + while True: + with open('trace_collector.log', 'rt') as f: + data = f.read() + if data.count("resource_spans") != EXPECTED_NUM_SPANS: + time.sleep(5) + continue + else: + break + data = data.split('\n') - full_spans = [entry for entry in data if "resource_spans" in entry] + full_spans = [entry.split('POST')[0] for entry in data if "resource_spans" in entry] self.spans = [] for span in full_spans: span = json.loads(span) self.spans.append( span["resource_spans"][0]['scope_spans'][0]['spans'][0]) - self.model_name = "simple" + self.simple_model_name = "simple" + self.ensemble_model_name = "ensemble_add_sub_int32_int32_int32" self.root_span = "InferRequest" def _check_events(self, span_name, events): @@ -102,7 +112,7 @@ def _check_events(self, span_name, events): self.assertFalse( all(entry in events for entry in compute_events)) - elif span_name == self.model_name: + elif span_name == self.simple_model_name: # Check that all request related events (and only them) # are recorded in request span self.assertTrue(all(entry in events for entry in request_events)) @@ -131,14 +141,15 @@ def test_spans(self): parsed_spans.append(span_name) # There should be 6 spans in total: - # 3 for http request and 3 for grpc request. - self.assertEqual(len(self.spans), 6) - # We should have 2 compute spans - self.assertEqual(parsed_spans.count("compute"), 2) - # 2 request spans (named simple - same as our model name) - self.assertEqual(parsed_spans.count(self.model_name), 2) - # 2 root spans - self.assertEqual(parsed_spans.count(self.root_span), 2) + # 3 for http request, 3 for grpc request, 4 for ensemble + self.assertEqual(len(self.spans), 10) + # We should have 3 compute spans + self.assertEqual(parsed_spans.count("compute"), 3) + # 4 request spans (3 named simple - same as our model name, 1 ensemble) + self.assertEqual(parsed_spans.count(self.simple_model_name), 3) + self.assertEqual(parsed_spans.count(self.ensemble_model_name), 1) + # 3 root spans + self.assertEqual(parsed_spans.count(self.root_span), 3) def test_nested_spans(self): @@ -156,9 +167,9 @@ def test_nested_spans(self): self.spans[2], "root span has a parent_span_id specified") - # Last 3 spans in `self.spans` belong to GRPC request + # Next 3 spans in `self.spans` belong to GRPC request # Order of spans and their relationship described earlier - for child, parent in zip(self.spans[3:], self.spans[4:]): + for child, parent in zip(self.spans[3:6], self.spans[4:6]): self._check_parent(child, parent) # root_span should not have `parent_span_id` field @@ -167,6 +178,48 @@ def test_nested_spans(self): self.spans[5], "root span has a parent_span_id specified") + # Final 4 spans in `self.spans` belong to ensemble request + # Order of spans: compute span - request span - request span - root span + for child, parent in zip(self.spans[6:10], self.spans[7:10]): + self._check_parent(child, parent) + + # root_span should not have `parent_span_id` field + self.assertNotIn( + 'parent_span_id', + self.spans[9], + "root span has a parent_span_id specified") + +def prepare_data(client): + + inputs = [] + outputs = [] + input0_data = np.full(shape=(1, 16), fill_value=-1, dtype=np.int32) + input1_data = np.full(shape=(1, 16), fill_value=-1, dtype=np.int32) + + inputs.append(client.InferInput('INPUT0', [1, 16], "INT32")) + inputs.append(client.InferInput('INPUT1', [1, 16], "INT32")) + + # Initialize the data + inputs[0].set_data_from_numpy(input0_data) + inputs[1].set_data_from_numpy(input1_data) + + return inputs + +def prepare_traces(): + + triton_client_http = httpclient.InferenceServerClient("localhost:8000", + verbose=True) + triton_client_grpc = grpcclient.InferenceServerClient("localhost:8001", + verbose=True) + inputs = prepare_data(httpclient) + triton_client_http.infer("simple",inputs) + + inputs = prepare_data(grpcclient) + triton_client_grpc.infer("simple", inputs) + + inputs = prepare_data(httpclient) + triton_client_http.infer("ensemble_add_sub_int32_int32_int32", inputs) + if __name__ == '__main__': unittest.main() diff --git a/qa/L0_trace/test.sh b/qa/L0_trace/test.sh index 991b0aa250d..66f7b007eb0 100755 --- a/qa/L0_trace/test.sh +++ b/qa/L0_trace/test.sh @@ -66,14 +66,19 @@ rm -f *.log rm -fr $MODELSDIR && mkdir -p $MODELSDIR # set up simple and global_simple model using MODELBASE -rm -fr $MODELSDIR && mkdir -p $MODELSDIR && \ - cp -r $DATADIR/$MODELBASE $MODELSDIR/simple && \ +cp -r $DATADIR/$MODELBASE $MODELSDIR/simple && \ rm -r $MODELSDIR/simple/2 && rm -r $MODELSDIR/simple/3 && \ (cd $MODELSDIR/simple && \ sed -i "s/^name:.*/name: \"simple\"/" config.pbtxt) && \ cp -r $MODELSDIR/simple $MODELSDIR/global_simple && \ (cd $MODELSDIR/global_simple && \ sed -i "s/^name:.*/name: \"global_simple\"/" config.pbtxt) && \ + cp -r $ENSEMBLEDIR/simple_onnx_int32_int32_int32 $MODELSDIR/ensemble_add_sub_int32_int32_int32 && \ + rm -r $MODELSDIR/ensemble_add_sub_int32_int32_int32/2 && \ + rm -r $MODELSDIR/ensemble_add_sub_int32_int32_int32/3 && \ + (cd $MODELSDIR/ensemble_add_sub_int32_int32_int32 && \ + sed -i "s/^name:.*/name: \"ensemble_add_sub_int32_int32_int32\"/" config.pbtxt && \ + sed -i "s/model_name:.*/model_name: \"simple\"/" config.pbtxt) RET=0 @@ -661,11 +666,12 @@ wait $SERVER_PID OPENTELEMETRY_TEST=opentelemetry_unittest.py OPENTELEMETRY_LOG="opentelemetry_unittest.log" -TEST_RESULT_FILE="opentelemetry_results.txt" EXPECTED_NUM_TESTS="2" -SERVER_ARGS="--trace-config=triton,file=some_file.log --trace-config=level=TIMESTAMPS \ - --trace-config=rate=1 --trace-config=count=6 --trace-config=mode=opentelemetry --trace-config=opentelemetry,url=localhost:$OTLP_PORT --model-repository=$MODELSDIR" +SERVER_ARGS="--trace-config=level=TIMESTAMPS --trace-config=rate=1 \ + --trace-config=count=100 --trace-config=mode=opentelemetry \ + --trace-config=opentelemetry,url=localhost:$OTLP_PORT \ + --model-repository=$MODELSDIR" SERVER_LOG="./inference_server_trace_config.log" run_server if [ "$SERVER_PID" == "0" ]; then @@ -674,37 +680,18 @@ if [ "$SERVER_PID" == "0" ]; then exit 1 fi -# This is a simple python code that opens port -python $TRACE_COLLECTOR $OTLP_PORT $TRACE_COLLECTOR_LOG & -COLLECTOR_PID=$! - -set +e - -# To make sure receiver is ready -sleep 3 - -# Send http request and collect trace -$SIMPLE_HTTP_CLIENT >> client_update.log 2>&1 -if [ $? -ne 0 ]; then - cat client_update.log - RET=1 -fi - -# Send grpc request and collect trace -$SIMPLE_GRPC_CLIENT >> client_update.log 2>&1 -if [ $? -ne 0 ]; then - cat client_update.log - RET=1 -fi -# To make sure log gets all data -sleep 3 - -kill $COLLECTOR_PID -wait $COLLECTOR_PID +# Using netcat as trace collector +apt-get update && apt-get install -y netcat +nc -l -k 127.0.0.1 $OTLP_PORT >> $TRACE_COLLECTOR_LOG 2>&1 & COLLECTOR_PID=$! +# Preparing traces for unittest. +# Note: need to run this separately, to speed up trace collection. +# Otherwise internal (opentelemetry_unittest.OpenTelemetryTest.setUp) check +# will slow down collection. +python -c 'import opentelemetry_unittest; opentelemetry_unittest.prepare_traces()' >>$CLIENT_LOG 2>&1 set +e - +# Unittest will not start untill expected number of spans is collected. python $OPENTELEMETRY_TEST >>$OPENTELEMETRY_LOG 2>&1 if [ $? -ne 0 ]; then cat $OPENTELEMETRY_LOG @@ -718,11 +705,12 @@ else fi fi +kill $COLLECTOR_PID +wait $COLLECTOR_PID + set -e kill $SERVER_PID wait $SERVER_PID -set +e - exit $RET diff --git a/qa/L0_trace/trace_collector.py b/qa/L0_trace/trace_collector.py deleted file mode 100644 index ddfdfa7853e..00000000000 --- a/qa/L0_trace/trace_collector.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/python - -# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import socket -import sys - -if __name__ == "__main__": - """ - This script is intended to be a mock opentelemetry trace collector. - It sets up a “listening” socket on provided port and receives data. - It is intended to be used with small traces (under 4096 bytes). - After trace is received, it is printed into the log file. - - Port and log file path can be provided with command line arguments: - - python trace_collector.py 10000 my.log - - By default, port is set to 10000 and file_path to "trace_collector.log" - - NOTE: It does not support OpenTelemetry protocol and is not intended to - support OTLP, use for validating exported tests only. - """ - - port = 1000 if sys.argv[1] is None else int(sys.argv[1]) - file_path = "trace_collector.log" if sys.argv[2] is None else sys.argv[2] - - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - server_address = ('localhost', port) - sock.bind(server_address) - sock.listen(1) - - while True: - trace = '' - connection, client_address = sock.accept() - with connection: - with open(file_path, "a") as sys.stdout: - chunk = connection.recv(4096) - if not chunk: - break - connection.sendall(chunk) - trace = chunk.decode() - print(trace) diff --git a/src/tracer.cc b/src/tracer.cc index 73508684c75..ad23a4a0234 100644 --- a/src/tracer.cc +++ b/src/tracer.cc @@ -374,8 +374,8 @@ TraceManager::Trace::InitTracer( std::chrono::steady_clock::now().time_since_epoch()) .count(); auto root_span = - StartSpan("InferRequest", steady_timestamp_ns, true /*is_root_span*/); - // Initializing OTel context and storring "InferRequest" span as a root span + StartSpan("InferRequest", steady_timestamp_ns); + // Initializing OTel context and storing "InferRequest" span as a root span // to keep it alive for the duration of the request. otel_context_ = opentelemetry::context::Context({kRootSpan, root_span}); } @@ -391,6 +391,8 @@ TraceManager::Trace::StartSpan( // and can not be a parent of any sub-span. If parent_id==0, // then current model is either a standalone model, or an ensemble model. // In both cases, the parent of the new request sub-span is the kRootSpan. + // A request span with trace id = `trace_id` is a parent of a compute span, + // started in the same trace. // If parent_id > 0, then this is a child trace, spawned from // the ensamble's main request. For this instance, the parent // span is the ensembles's request span. @@ -399,7 +401,11 @@ TraceManager::Trace::StartSpan( TRITONSERVER_InferenceTraceParentId(trace, &parent_id), "getting trace parent id"); std::string parent_span_key = - (parent_id != 0) ? kRequestSpan + std::to_string(parent_id) : kRootSpan; + (parent_id == 0 && activity == TRITONSERVER_TRACE_REQUEST_START) + ? kRootSpan + : (activity == TRITONSERVER_TRACE_REQUEST_START) + ? kRequestSpan + std::to_string(parent_id) + : kRequestSpan + std::to_string(trace_id); std::string display_name = "compute"; const char* model_name; @@ -410,8 +416,7 @@ TraceManager::Trace::StartSpan( display_name = model_name; } - auto span = StartSpan( - display_name, timestamp_ns, false /*is_root_span*/, parent_span_key); + auto span = StartSpan(display_name, timestamp_ns, parent_span_key); if (activity == TRITONSERVER_TRACE_REQUEST_START) { int64_t model_version; @@ -430,7 +435,7 @@ TraceManager::Trace::StartSpan( opentelemetry::nostd::shared_ptr TraceManager::Trace::StartSpan( std::string display_name, const uint64_t& raw_timestamp_ns, - bool is_root_span, std::string parent_span_key) + std::string parent_span_key) { otel_trace_api::StartSpanOptions options; options.kind = otel_trace_api::SpanKind::kServer; @@ -441,7 +446,7 @@ TraceManager::Trace::StartSpan( // If the new span is a child span, we need to retrieve its parent from // the context and provide it through StartSpanOptions to the child span - if (!is_root_span && otel_context_.HasKey(parent_span_key)) { + if (!parent_span_key.empty() && otel_context_.HasKey(parent_span_key)) { auto parent_span = opentelemetry::nostd::get< opentelemetry::nostd::shared_ptr>( otel_context_.GetValue(parent_span_key)); diff --git a/src/tracer.h b/src/tracer.h index 15893417f28..55bf2b9800a 100644 --- a/src/tracer.h +++ b/src/tracer.h @@ -250,15 +250,13 @@ class TraceManager { /// OpenTelemetry SystemTimestamp to display span on a timeline, and /// OpenTelemetry SteadyTimestamp to calculate the duration on the span /// with better precision. - /// \param is_root_span If true, a root span will be started, - /// i.e. with no parent span specified. If false, a new child span will - /// be started. /// \param parent_span_key A span key, to find a parent span in the - /// OpenTelemetry context. + /// OpenTelemetry context. If empty, a root span will be started, + /// i.e. with no parent span specified. /// \return A shared pointer to a newly created OpenTelemetry span. opentelemetry::nostd::shared_ptr StartSpan( std::string display_name, const uint64_t& raw_timestamp_ns, - bool is_root_span, std::string parent_span_key = ""); + std::string parent_span_key = ""); /// Ends the provided span. ///