From 17890906f51f48911d58f2e8daa259f28345bd60 Mon Sep 17 00:00:00 2001 From: Daniel Bunting Date: Mon, 19 Jun 2023 18:30:13 +0100 Subject: [PATCH 1/6] Working but slower --- Dockerfile | 31 ++++++ models/.DS_Store | Bin 0 -> 6148 bytes .../1/model.savedmodel/fingerprint.pb | 1 + .../1/model.savedmodel/saved_model.pb | Bin 0 -> 13286 bytes .../variables/variables.data-00000-of-00001 | Bin 0 -> 93 bytes .../variables/variables.index | Bin 0 -> 144 bytes models/category_tensorflow_model/config.pbtxt | 43 +++++++++ models/test_bls/1/model.py | 77 +++++++++++++++ models/test_bls/config.pbtxt | 22 +++++ src/pb_stub.cc | 15 ++- src/pb_tensor.cc | 90 ++++++++++++++++++ src/pb_tensor.h | 4 + 12 files changed, 280 insertions(+), 3 deletions(-) create mode 100644 Dockerfile create mode 100644 models/.DS_Store create mode 100644 models/category_tensorflow_model/1/model.savedmodel/fingerprint.pb create mode 100644 models/category_tensorflow_model/1/model.savedmodel/saved_model.pb create mode 100644 models/category_tensorflow_model/1/model.savedmodel/variables/variables.data-00000-of-00001 create mode 100644 models/category_tensorflow_model/1/model.savedmodel/variables/variables.index create mode 100755 models/category_tensorflow_model/config.pbtxt create mode 100644 models/test_bls/1/model.py create mode 100644 models/test_bls/config.pbtxt diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..34dd6839 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,31 @@ +FROM nvcr.io/nvidia/tritonserver:23.05-tf2-python-py3 + +#RUN DEBIAN_FRONTEND="noninteractive" apt-get update && apt-get -y install tzdata + +RUN apt-get update \ + && apt-get install -y build-essential \ + gcc \ + g++ \ + gdb \ + clang \ + make \ + ninja-build \ + cmake \ + autoconf \ + automake \ + libtool \ + valgrind \ + locales-all \ + dos2unix \ + rsync \ + tar +RUN apt-get install -y python3-pip python3.10-dev +RUN apt-get install -y rapidjson-dev libarchive-dev zlib1g-dev +RUN apt-get install -y git +RUN pip3 install numpy +RUN rm -r /opt/tritonserver/backends/python +RUN git config --global --add safe.directory '*' +RUN apt-get install -y ssh + +RUN useradd -m user && yes password | passwd user +RUN apt-get install gdbserver \ No newline at end of file diff --git a/models/.DS_Store b/models/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..de05d4be4d0f41bf235093a6bce94a0695254860 GIT binary patch literal 6148 zcmeHKy-veG4EB{ugdd5HFmXFm2lk^1PtXS_q$y=cNku`3EwJ%mya5v%Z^CE$R2otk zB7_jKCHuSF*>~}GQXCVJ8{Fo7q8;yorr~W9 ze~|&cy8{~2Ic{u1tM@mdD_Y2XUQhwN!>!JMIsA%Z+8>U~*<4dP-fg$btW49aC^JMS zZ`+UaqrK;owmaoNwA1`@H8V*t5pEW+Bg(+VqMrTc3%~zopVi!6pB_STD7mfi`*`#3 z{Oa%JaiobcU W17L_)DPjg87b6`tyzpZ4>3>?yD9*|K-D>1;O6gk9ScmY6|d6FZw^H`yO-gF+Iv)1GeI zy&g{w-97$UBoq(=Vks!%Cx;xD9HK~E;D!JR1mcJQ2_ZPaJtq#F_<7Ym{WmjhkFzU6 zBwMnkyQ{nE)vNEl_tmQ>5&E}v_-T>8QH1M|UQ}&|4glGe)5}(Eh2iC%oPNf3R^&Xm zk7(N2YwG-tKW*Qg0BOZ)HEOD()AXuo)|oJ!f_ca+nho1w!;m#wPP6668O{BA7&6;x zy`|Hvqc?2Rl83-+@%n=cAYIY)hGRH;6d+?X__t(*4F(;N6&Yn~j}I-#UQ?ZGtvbyD zQSnXwyUM@FL#}C=TC2vtmnUG*vDAj$G;Li`Ls8=mtC5DuV36tv6e{_`C=tYgba8w_ zqVP=u^qOw1>tCoWZs@hmrfD=Bn@+%JEx1*hmcC}}>UJqd$AN0P?HCQ!G0aB0rIeE= zVOZ1ax}z&*T~jo}vI7Z-`~Zxq^}4yEY#Fv~G}e_hqppW9q`plEWFdXsywQYfkiMzb zHfaH}u6}KtbeUX_>n#uDIu%jUX);S9T9k(&t?nAO+s1@a3VuRh_@-*%GN8D+wuo}j zL43(tw=3d`(O@`^x3HD;3SP*gz#mhn8@4meSNhRuBA)=avEwPO-WpA>A!zW0Y|U(} z8S6A_I~Gon2{a`$$a_ywPo`tW+9qL0-_>g^$FvkaOtcXeVen>Mt?3)6Ox@}RUmR;V zddA*Ru@CQurK>`TAa=!@<_?F^ZW~T}98P$DhXd(k2#W&bD^MV#LN+4~ASj-K+>)-= zpE0%*zdjXyO<|-`;Gjxpiy3izg7;v;z!Jj5v=CPa|Ez}~#Lo!iJ$q3a>XwVB6ENZm zuc!@8vFk>SKV?P8dGFbktZQ(#T!Qvhd>Lj5hff`{%j))r9E~1zv`aV|qP^zOAsE0V z)?9d%fUApJDgV{EvXiV`gUqtKVo}#{&WEBbArf6w?31GOL5}7n!h&Z}yiY;!*z-F~ zm=&vNOBFP6cpipv6id^!XVKue!f76)Rn>N{aWw{9;8!VD7)HH!#5|~(a(ATcD>K{Av zM7+I+kb4SF@F^IzC4F7reYRDvqXVyPaHYE59#M?Q_nb=!WW-ogbsXJl_@)m3B#>W5 zcYzDE%z+=(EiOoARbMw6a^^}y(|2*@M7^Q;cO#JFjs|^k`$-biaQj!bRC_aUnCSl`9H!%837u8FZZvJf?rv%5I|_HY9W9gGuF(~4_Gm;r0iSqd z{VK*AG@`7lF*JPc>O-yNFi1x|{R9lydQ-Jj^eny>xu=Ei5D=GJTQLQmgrcSMu*n0J zfi{YsMNY36+HPD|V=%C(>rD@-LFkv|(8Eo@ScM18bS5he4CaRl!y`pHIzE9e$?dP( z^yG}KTibd$Te&b-xiDLq-1aWcTeZ3KbCvV!v)by#Pd}l}YEN98C-h&>!8b5a)i63K zHS~jqhNfZDXb>pRLP7Ds#ErCx&8K0i%IKi#-KzDs2A$aSB@(hR-ygdaIO3kcuUv6yc z2;qjhjuSUDo1&*GKh&l4A`DlD;(9&J>BTqjfZN5r9+A0stX!@6lfoOI)1p@vL!=RL=Ib^CR_$)raa3W>APuj2W z*KR<;#;9$%gwTyoG068NE~4DE2Z|Gc9|*~DB`M&r4jGuIyBKhCxj1lghXE%^^!tvL zvm7fAES-YMC{`3#^T?-AyUG*u@Ld5;Cd6B1j`pjWGR7cIj$PaNztzYiTqE+*893cb zBWRwSmMZh}^z|oUe;6*qX!JeT`BmmQ9|}EjiE!c?b%`ay84|`{cZY{}ufZ4wpqZ|S z)3(HZOiwIN`ld%0M$oQORrq({+u2vV5Xa*ji^;-H_Z+$EMGG|h91mqY2t3kd|6?#( zEm8rkI~qsI!_k;1OcQp)C#)WCjSCPCz`6#IAX`o2075m7GmmL`W$uvnO3==g?$J=(U--+-wcp7}ym&I+gZtr@l&!SW zGj5WGjmKmP$AWfZh9efQc@_uJn>ZS~3K`t!&g}WBPm(IehsJ#1MSd`{f80;29+=rdj+?zR!i4#2kmdASstx4=z`_2%j%sEW1Q@D z0wv9K6uV|}3a!-DJ>8n|vq#=9!z__IJ|BZcT$#UkM{owkXv%4oa(D1Hfs@@L_*+DO zugK)?cRcZ`?Tl7*{(W(+(;YiYOkJ?3lU$3W-GT&MFFD$>1`87Er5`!uQ=#*xML*FO zb~X)TNGmaMXtnnrc;o214jraLo0mDoTxwzD*wV`{z>vojeO6zE`GZ*ffo-o!Df$u| zd8Ytp;e_HwH@m6Vj5R~o-1H31(+F~^x6n>r{t&&NOX$5oV_^*bQiP2IX$(bL$&(U6 z+L1?PPI0y?p0;tzjd9zL%F0|RhkNzAq(_F6?Ii5pd+zC3bl5nlE+;y{v2eo=RJvt9 zUY(h_P-ZV5MA4Ibi#V@bJnC_Zp1FX#Vuz`aTjZz<;#|M7#(Iyj5Z*f|j=t5LafkRC zrfS={ul6rH)hi7W*a9oKB8B%OZ4rYfokF&~E1zc1@WR$&*@H+XA+}Cz*pu_1ajsLL zBqWaWVl*dqgy)oH zTbNQ7pTE5L!p$33uCF}3@Zx3V#=_Ohiz~`=OHbc?o|(8sbW_D#{Y^YX3G&YODol)1 z_`;;vpSj7nBhvOSV;!2V`ngsN)>A#y6sVh>Nsv8!Bjyo1m**Vj6gF@ZkBN?!ipeGn zOO{zzG`)#gKTI^eUO;IkDB65&YQ9EN3X@yt@F^dxWmN;CzU5zn0-IaH1LXoSSsT<({RwwiAPdhN zdOXmXZ0M0%VEY8dgF=^&dShb&3uXBq?QMQYqLn^j(NCPZJ`l*OnJ*0=^rqX#}lj(8xurf9Yl4OCY{$~hA62?nVT1Qu`TZl zOSnv|$i+mD(NN4L#dBEC36?*LXA^E=fd7ioFdY8M!}Ot~Ch)$+l3G&!D*?CkgZ_cQ zrRXU+oom+)g$L)=*%87Hs8Pa7*Vq{VeH;sqc@v}`CjG8wabM-c_81>qJBg86XD(?wBOd68|Zwc@#@?-HA1O|D9;nUXoG9J;;C%qD8 zdhs2x0{Pn!9|wioF^vM;marHE*SN6`+1qKi+HVMQ+gL@1MbNy;2@9g}5Z&6`ac?F- z()2a8Rma+;!Mmaa_y+&SB3ann;U!M)cj7x@iei?~dlY=ZU!*XoHX0_cjl{zUD!wnK z%2)1+&%%TI!fAQ{8n9^j@K^3cD~sU4u&xLz8i#^HB*Z-yMY&7yTVdhX3EGY5Eq)eN z669me$7@1xasEUW{z&?W~P`?aZk;bd?V_hwj|^!I)i<>!RGN$zshi+9Boe2u&>eic3wo6r$wc&TH0N1gGJ GCi!0puR%`$ literal 0 HcmV?d00001 diff --git a/models/category_tensorflow_model/1/model.savedmodel/variables/variables.data-00000-of-00001 b/models/category_tensorflow_model/1/model.savedmodel/variables/variables.data-00000-of-00001 new file mode 100644 index 0000000000000000000000000000000000000000..94b922e8b26ce0b6fe329e8f559948485e869984 GIT binary patch literal 93 zcma!;{iMo(OPhZWuZvy}{4i~fl literal 0 HcmV?d00001 diff --git a/models/category_tensorflow_model/config.pbtxt b/models/category_tensorflow_model/config.pbtxt new file mode 100755 index 00000000..8f8d8dbd --- /dev/null +++ b/models/category_tensorflow_model/config.pbtxt @@ -0,0 +1,43 @@ +name: "category_tensorflow_model" +platform: "tensorflow_savedmodel" +max_batch_size: 0 + +parameters: { + key: "TF_SIGNATURE_DEF" + value: { + string_value: "call" + } +} + +input [ + { + name: "candidates" + data_type: TYPE_FP32 + dims: [ -1 , -1] + + } +] +input [ + { + name: "user_history" + data_type: TYPE_FP32 + dims: [ -1 , -1] + + } +] +output [ + { + name: "scores" + data_type: TYPE_FP32 + dims: [ -1 ] + } +] + +instance_group [ + { + count: 2 + kind: KIND_CPU + } + ] + +dynamic_batching { } \ No newline at end of file diff --git a/models/test_bls/1/model.py b/models/test_bls/1/model.py new file mode 100644 index 00000000..ff5e255a --- /dev/null +++ b/models/test_bls/1/model.py @@ -0,0 +1,77 @@ +""" +Category model +""" +import time +from typing import cast + +import numpy as np + +try: + import triton_python_backend_utils as pb_utils +except ImportError: + import tests.stub.triton_python_backend_utils + + pb_utils: tests.stub.triton_python_backend_utils = cast( + tests.stub.triton_python_backend_utils, None + ) + + +def breakpoint(): + import pydevd_pycharm + + pydevd_pycharm.settrace( + 'host.docker.internal', port=5858, stdoutToServer=True, stderrToServer=True + ) + + +class TritonPythonModel: + def initialize(self, args): + import triton_python_backend_utils + self.shm = triton_python_backend_utils.shared_memory + + def execute_request(self, request): + candidates_cache = np.random.random((500000, 200)).astype(np.float32) + n = pb_utils.get_input_tensor_by_name(request, "n").as_numpy()[0] + candidates = np.random.randint(100000, size=int(n)) + + candidate_tensor: pb_utils.Tensor = pb_utils.new_shm_tensor("candidates", self.shm, (n, 200), np.float32) + arr = np.ndarray((n, 200), dtype=np.float32, buffer=candidate_tensor.memory_view()) + #arr[:] = np.random.random((int(n), 200)).astype(np.float32).data + + s1 = time.time() + np.take(candidates_cache, candidates, axis=0, out=arr) + s2 = time.time() + pb_utils.Logger.log_error(f"Take time - {s2-s1}") + + context_array = np.random.random((10, 200)).astype(np.float32) + #candidates_array = np.take(candidates_cache, candidates, axis=0) + + #candidate_tensor = pb_utils.Tensor( + # "candidates", + # candidates_array, + #) + + context_tensor = pb_utils.Tensor( + "user_history", + context_array, + ) + + inference_response = pb_utils.InferenceRequest( + model_name="category_tensorflow_model", + requested_output_names=["scores"], + inputs=[candidate_tensor, context_tensor], + ).exec() + + if inference_response.has_error(): + raise pb_utils.TritonModelException(inference_response.error().message()) + else: + scores = pb_utils.get_output_tensor_by_name(inference_response, "scores") + + out_scores = pb_utils.Tensor("scores", scores.as_numpy()[:400]) + + response = pb_utils.InferenceResponse(output_tensors=[out_scores]) + + return response + + def execute(self, requests): + return [self.execute_request(request) for request in requests] diff --git a/models/test_bls/config.pbtxt b/models/test_bls/config.pbtxt new file mode 100644 index 00000000..95b8e3ce --- /dev/null +++ b/models/test_bls/config.pbtxt @@ -0,0 +1,22 @@ +name: "test_bls" +backend: "python" + +input [ + { + name: "n" + data_type: TYPE_INT32 + dims: [ -1] + + } +] + +output [ + { + name: "scores" + data_type: TYPE_FP32 + dims: [ -1 ] + } +] + + +instance_group [{ kind: KIND_CPU }] diff --git a/src/pb_stub.cc b/src/pb_stub.cc index 9539a250..26b7daf9 100644 --- a/src/pb_stub.cc +++ b/src/pb_stub.cc @@ -431,8 +431,12 @@ Stub::StubSetup() py::setattr( python_backend_utils, "MetricFamily", c_python_backend_utils.attr("MetricFamily")); + py::setattr( + python_backend_utils, "new_shm_tensor", + c_python_backend_utils.attr("new_shm_tensor")); c_python_backend_utils.attr("shared_memory") = py::cast(shm_pool_.get()); + python_backend_utils.attr("shared_memory") = py::cast(shm_pool_.get()); deserialize_bytes_ = python_backend_utils.attr("deserialize_bytes_tensor"); serialize_bytes_ = python_backend_utils.attr("serialize_byte_tensor"); @@ -494,6 +498,7 @@ Stub::Initialize(bi::managed_external_buffer::handle_t map_handle) python_backend_utils, "InferenceResponse", c_python_backend_utils.attr("InferenceResponse")); c_python_backend_utils.attr("shared_memory") = py::cast(shm_pool_.get()); + python_backend_utils.attr("shared_memory") = py::cast(shm_pool_.get()); py::object TritonPythonModel = sys.attr("TritonPythonModel"); deserialize_bytes_ = python_backend_utils.attr("deserialize_bytes_tensor"); @@ -1516,7 +1521,7 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) .def("get_response_sender", &InferRequest::GetResponseSender); py::class_>(module, "Tensor") - .def(py::init(&PbTensor::FromNumpy)) + .def(py::init(&PbTensor::FromNumpy), py::arg("name"), py::arg("numpy_array")) .def("name", &PbTensor::Name) // The reference_internal is added to make sure that the NumPy object has // the same lifetime as the tensor object. This means even when the NumPy @@ -1531,8 +1536,10 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) .def("shape", &PbTensor::Dims) .def("from_dlpack", &PbTensor::FromDLPack) .def("__dlpack__", &PbTensor::DLPack, py::arg("stream") = py::none()) - .def("__dlpack_device__", &PbTensor::DLPackDevice); - + .def("__dlpack_device__", &PbTensor::DLPackDevice) + .def("memory_view", [](std::shared_ptr& t) { + return py::memoryview::from_memory(t->DataPtr(), t->ByteSize() * 8); + }); py::class_>( module, "InferenceResponse") .def( @@ -1603,6 +1610,8 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) py::register_exception( module, "TritonModelException"); + + module.def("new_shm_tensor", &PbTensor::CreateInSHM, "Creates a new Tensor directly into shared memory"); } extern "C" { diff --git a/src/pb_tensor.cc b/src/pb_tensor.cc index 20d5302f..7fecf451 100644 --- a/src/pb_tensor.cc +++ b/src/pb_tensor.cc @@ -35,6 +35,9 @@ namespace py = pybind11; #endif #include "pb_tensor.h" +#ifndef TRITON_PB_STUB +#include "triton/common/logging.h" +#endif namespace triton { namespace backend { namespace python { @@ -226,6 +229,67 @@ delete_unused_dltensor(PyObject* dlp) } } +std::shared_ptr +PbTensor::CreateInSHM(const std::string& name, SharedMemoryManager& shm_pool, std::vector dims, py::object data_type) +{ + + // Input params of tensor + //std::vector dims = std::vector({10, 10}); + TRITONSERVER_DataType dtype = numpy_to_triton_type(data_type); + + TRITONSERVER_MemoryType memory_type_ = TRITONSERVER_MEMORY_CPU; + uint64_t elements = 1; + for (size_t i = 0; i < dims.size(); i++) { + elements *= dims[i]; + } + py::module np = py::module::import("numpy"); + uint64_t byte_size_ = elements * np.attr("dtype")(data_type).attr("itemsize").cast(); + + uint64_t byte_size; + byte_size = sizeof(TensorShm) + sizeof(int64_t) * dims.size() + + PbString::ShmStructSize(name) + + PbMemory::ShmStructSize(memory_type_, byte_size_); + + // Do the allocation + AllocatedSharedMemory tensor_shm = shm_pool.Construct(byte_size); + + // Wrap the raw memory in TensorShm + auto* tensor_shm_ptr = reinterpret_cast(tensor_shm.data_.get()); + tensor_shm_ptr->dtype = dtype; + tensor_shm_ptr->dims_count = dims.size(); + auto shm_handle = tensor_shm.handle_; + + // Write the dimensions data to shared memory. + auto* dims_shm_ptr_ = reinterpret_cast( + reinterpret_cast(tensor_shm_ptr) + sizeof(TensorShm)); + for (size_t i = 0; i < dims.size(); i++) { + dims_shm_ptr_[i] = dims[i]; + } + + // Write the name data to shared memory. + std::size_t name_offset = sizeof(TensorShm) + sizeof(int64_t) * dims.size(); + auto name_shm = PbString::Create(name, reinterpret_cast(tensor_shm_ptr) + name_offset, shm_handle + name_offset); + + int64_t memory_type_id_ = 0; // Maybe + + std::size_t pb_memory_offset = name_offset + PbString::ShmStructSize(name); + auto pb_memory = PbMemory::Create( + memory_type_, memory_type_id_, byte_size_, + nullptr, + reinterpret_cast(tensor_shm_ptr) + pb_memory_offset, + shm_handle + pb_memory_offset, false); + tensor_shm_ptr->memory = 0; + + LOG_INFO << "CreateInSHM written to: "; + LOG_INFO << "tensor_shm_ptr: " << tensor_shm_ptr ; + LOG_INFO << "name_offset: " << name_offset ; + LOG_INFO << "pb_memory_offset: " << pb_memory_offset ; + + return std::unique_ptr( + new PbTensor(tensor_shm, name_shm, pb_memory)); +} + + std::shared_ptr PbTensor::FromNumpy(const std::string& name, py::array& numpy_array) { @@ -531,7 +595,17 @@ void PbTensor::SaveToSharedMemory( std::unique_ptr& shm_pool, bool copy_gpu) { + LOG_INFO << "Save to memory PbTensor- " << name_; + LOG_INFO << "tensor_shm_.data_" << static_cast(tensor_shm_.data_.get()); + + auto mem = static_cast(memory_ptr_); + for ( unsigned int i = 0; i < 10; i++ ) { + LOG_INFO << mem[i] << " "; + } + if (!tensor_shm_.data_) { + LOG_INFO << "SaveToSharedMemory - tensor_shm_.data is empty" << name_; + uint64_t byte_size; if (!pb_memory_) { byte_size = sizeof(TensorShm) + sizeof(int64_t) * dims_.size() + @@ -602,6 +676,8 @@ PbTensor::LoadFromSharedMemory( pb_memory = PbMemory::LoadFromSharedMemory( shm_pool, tensor_shm_ptr->memory, open_cuda_handle); } + // + LOG_INFO << "Loaded from memory PbTensor- " << name_shm->String(); return std::unique_ptr( new PbTensor(tensor_shm, name_shm, pb_memory)); @@ -631,11 +707,15 @@ PbTensor::PbTensor( : tensor_shm_(std::move(tensor_shm)), name_shm_(std::move(name_shm)), pb_memory_(std::move(pb_memory)) { + + tensor_shm_ptr_ = reinterpret_cast(tensor_shm_.data_.get()); dims_shm_ptr_ = reinterpret_cast( reinterpret_cast(tensor_shm_ptr_) + sizeof(TensorShm)); name_ = name_shm_->String(); + LOG_INFO << "Creating PbTensor from SHM" << name_; + dims_ = std::vector( dims_shm_ptr_, dims_shm_ptr_ + tensor_shm_ptr_->dims_count); dtype_ = tensor_shm_ptr_->dtype; @@ -646,6 +726,16 @@ PbTensor::PbTensor( memory_type_id_ = pb_memory_->MemoryTypeId(); shm_handle_ = tensor_shm_.handle_; + + auto mem = static_cast(memory_ptr_); + for ( unsigned int i = 0; i < 10; i++ ) { + LOG_INFO << mem[i] << " "; + } + + LOG_INFO << " shm_handle_ -" << shm_handle_; + LOG_INFO << " memory_ptr_ -" << memory_ptr_; + LOG_INFO << " byte_size_ -" << byte_size_; + #ifdef TRITON_PB_STUB if (memory_type_ == TRITONSERVER_MEMORY_CPU || memory_type_ == TRITONSERVER_MEMORY_CPU_PINNED) { diff --git a/src/pb_tensor.h b/src/pb_tensor.h index 79adf500..570469da 100644 --- a/src/pb_tensor.h +++ b/src/pb_tensor.h @@ -130,6 +130,10 @@ class PbTensor { static std::shared_ptr FromNumpy( const std::string& name, py::array& numpy_array); + static std::shared_ptr CreateInSHM( + const std::string& name, SharedMemoryManager& shm_pool, std::vector dims, py::object data_type + ); + /// Get device type in DLPack format. DLDeviceType DeviceType(); From a76cc1e4b48f08b0312a7b8fb11c4c9a8a7b2b36 Mon Sep 17 00:00:00 2001 From: Daniel Bunting Date: Tue, 20 Jun 2023 09:52:23 +0000 Subject: [PATCH 2/6] VM updates --- Dockerfile | 2 +- models/test_bls/1/model.py | 26 ++++++++++++++------------ 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/Dockerfile b/Dockerfile index 34dd6839..30973e42 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM nvcr.io/nvidia/tritonserver:23.05-tf2-python-py3 +FROM asnpdsacr.azurecr.io/public/tritonserver:23.05-tf2-python-py3 #RUN DEBIAN_FRONTEND="noninteractive" apt-get update && apt-get -y install tzdata diff --git a/models/test_bls/1/model.py b/models/test_bls/1/model.py index ff5e255a..7c6094fc 100644 --- a/models/test_bls/1/model.py +++ b/models/test_bls/1/model.py @@ -27,29 +27,31 @@ def breakpoint(): class TritonPythonModel: def initialize(self, args): import triton_python_backend_utils - self.shm = triton_python_backend_utils.shared_memory + #self.shm = triton_python_backend_utils.shared_memory def execute_request(self, request): candidates_cache = np.random.random((500000, 200)).astype(np.float32) n = pb_utils.get_input_tensor_by_name(request, "n").as_numpy()[0] candidates = np.random.randint(100000, size=int(n)) - candidate_tensor: pb_utils.Tensor = pb_utils.new_shm_tensor("candidates", self.shm, (n, 200), np.float32) - arr = np.ndarray((n, 200), dtype=np.float32, buffer=candidate_tensor.memory_view()) + #candidate_tensor: pb_utils.Tensor = pb_utils.new_shm_tensor("candidates", self.shm, (n, 200), np.float32) + #arr = np.ndarray((n, 200), dtype=np.float32, buffer=candidate_tensor.memory_view()) #arr[:] = np.random.random((int(n), 200)).astype(np.float32).data - s1 = time.time() - np.take(candidates_cache, candidates, axis=0, out=arr) - s2 = time.time() - pb_utils.Logger.log_error(f"Take time - {s2-s1}") + #s1 = time.time() + + # Take time - 0.008064508438110352 + #np.take(candidates_cache, candidates, axis=0, out=arr) + #s2 = time.time() + #pb_utils.Logger.log_error(f"Take time - {s2-s1}") context_array = np.random.random((10, 200)).astype(np.float32) - #candidates_array = np.take(candidates_cache, candidates, axis=0) + candidates_array = np.take(candidates_cache, candidates, axis=0) - #candidate_tensor = pb_utils.Tensor( - # "candidates", - # candidates_array, - #) + candidate_tensor = pb_utils.Tensor( + "candidates", + candidates_array, + ) context_tensor = pb_utils.Tensor( "user_history", From 6ed6f52307a2fc1aaa36e8e1f403dbc87b64d7a5 Mon Sep 17 00:00:00 2001 From: Daniel Bunting Date: Fri, 23 Jun 2023 13:33:42 +0000 Subject: [PATCH 3/6] Working! --- models/test_take/1/model.py | 56 +++++++++++++++++++++++++++++++++++ models/test_take/config.pbtxt | 22 ++++++++++++++ src/pb_stub.cc | 6 ++-- src/pb_tensor.cc | 40 ++++--------------------- src/test_tesnor.cpp | 3 ++ 5 files changed, 89 insertions(+), 38 deletions(-) create mode 100644 models/test_take/1/model.py create mode 100644 models/test_take/config.pbtxt create mode 100644 src/test_tesnor.cpp diff --git a/models/test_take/1/model.py b/models/test_take/1/model.py new file mode 100644 index 00000000..459b4bcd --- /dev/null +++ b/models/test_take/1/model.py @@ -0,0 +1,56 @@ +""" +Category model +""" +import time +from typing import cast +import timeit +import numpy as np + +try: + import triton_python_backend_utils as pb_utils +except ImportError: + import tests.stub.triton_python_backend_utils + + pb_utils: tests.stub.triton_python_backend_utils = cast( + tests.stub.triton_python_backend_utils, None + ) + + +def breakpoint(): + import pydevd_pycharm + + pydevd_pycharm.settrace( + 'host.docker.internal', port=5858, stdoutToServer=True, stderrToServer=True + ) + + +class TritonPythonModel: + def initialize(self, args): + import triton_python_backend_utils + shm = triton_python_backend_utils.shared_memory + n = 100000 + candidate_tensor = pb_utils.new_shm_tensor("candidatesss", shm, (n, 200), np.float32) # Offset is 68 + buffer = candidate_tensor.as_numpy() + + pb_utils.Logger.log_error(f"buffer - {buffer}, {buffer.dtype}, {buffer.shape}, {buffer.flags}, {buffer.base}") + candidates_cache = np.random.random((500000, 200)).astype(np.float32) + candidates = np.random.randint(100000, size=n) + np_out = np.empty((n, 200), dtype=np.float32) + + r1 = timeit.timeit("buffer[:] = np.take(candidates_cache, candidates, axis=0, mode='clip')", number=100, globals={"candidates_cache":candidates_cache, "candidates":candidates, "buffer": buffer, "np":np})*10 + r2 = timeit.timeit("np.take(candidates_cache, candidates, axis=0, mode='clip', out=buffer)", number=100, globals={"candidates_cache":candidates_cache, "candidates":candidates, "buffer": buffer, "np":np})*10 + r3 = timeit.timeit("r = np.take(candidates_cache, candidates, axis=0, mode='clip')", number=100, globals={"candidates_cache":candidates_cache, "candidates":candidates, "buffer": buffer, "np":np})*10 + r4 = timeit.timeit("np.take(candidates_cache, candidates, axis=0, mode='clip', out=np_out)", number=100, globals={"candidates_cache":candidates_cache, "candidates":candidates, "buffer": buffer, "np":np, "np_out":np_out})*10 + + pb_utils.Logger.log_error(f"Buffer - assignment - {r1}") + pb_utils.Logger.log_error(f"Buffer - output - {r2}") + pb_utils.Logger.log_error(f"Baseline - assignment - {r3}") + pb_utils.Logger.log_error(f"Baseline - np out - {r4}") + pb_utils.Logger.log_error(f"numpy version {np.__version__}") + + + def execute_request(self, request): + pass + + def execute(self, requests): + return [self.execute_request(request) for request in requests] diff --git a/models/test_take/config.pbtxt b/models/test_take/config.pbtxt new file mode 100644 index 00000000..5fa89919 --- /dev/null +++ b/models/test_take/config.pbtxt @@ -0,0 +1,22 @@ +name: "test_take" +backend: "python" + +input [ + { + name: "n" + data_type: TYPE_INT32 + dims: [ -1] + + } +] + +output [ + { + name: "scores" + data_type: TYPE_FP32 + dims: [ -1 ] + } +] + + +instance_group [{ kind: KIND_CPU }] diff --git a/src/pb_stub.cc b/src/pb_stub.cc index 26b7daf9..6b3cb983 100644 --- a/src/pb_stub.cc +++ b/src/pb_stub.cc @@ -1536,10 +1536,8 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) .def("shape", &PbTensor::Dims) .def("from_dlpack", &PbTensor::FromDLPack) .def("__dlpack__", &PbTensor::DLPack, py::arg("stream") = py::none()) - .def("__dlpack_device__", &PbTensor::DLPackDevice) - .def("memory_view", [](std::shared_ptr& t) { - return py::memoryview::from_memory(t->DataPtr(), t->ByteSize() * 8); - }); + .def("__dlpack_device__", &PbTensor::DLPackDevice); + py::class_>( module, "InferenceResponse") .def( diff --git a/src/pb_tensor.cc b/src/pb_tensor.cc index 7fecf451..79ad131d 100644 --- a/src/pb_tensor.cc +++ b/src/pb_tensor.cc @@ -279,14 +279,9 @@ PbTensor::CreateInSHM(const std::string& name, SharedMemoryManager& shm_pool, st reinterpret_cast(tensor_shm_ptr) + pb_memory_offset, shm_handle + pb_memory_offset, false); tensor_shm_ptr->memory = 0; - - LOG_INFO << "CreateInSHM written to: "; - LOG_INFO << "tensor_shm_ptr: " << tensor_shm_ptr ; - LOG_INFO << "name_offset: " << name_offset ; - LOG_INFO << "pb_memory_offset: " << pb_memory_offset ; - + std::cout << "Offset is - " << pb_memory_offset<< "\n"; return std::unique_ptr( - new PbTensor(tensor_shm, name_shm, pb_memory)); + new PbTensor(tensor_shm, name_shm, pb_memory)); } @@ -595,17 +590,7 @@ void PbTensor::SaveToSharedMemory( std::unique_ptr& shm_pool, bool copy_gpu) { - LOG_INFO << "Save to memory PbTensor- " << name_; - LOG_INFO << "tensor_shm_.data_" << static_cast(tensor_shm_.data_.get()); - - auto mem = static_cast(memory_ptr_); - for ( unsigned int i = 0; i < 10; i++ ) { - LOG_INFO << mem[i] << " "; - } - if (!tensor_shm_.data_) { - LOG_INFO << "SaveToSharedMemory - tensor_shm_.data is empty" << name_; - uint64_t byte_size; if (!pb_memory_) { byte_size = sizeof(TensorShm) + sizeof(int64_t) * dims_.size() + @@ -676,11 +661,8 @@ PbTensor::LoadFromSharedMemory( pb_memory = PbMemory::LoadFromSharedMemory( shm_pool, tensor_shm_ptr->memory, open_cuda_handle); } - // - LOG_INFO << "Loaded from memory PbTensor- " << name_shm->String(); - return std::unique_ptr( - new PbTensor(tensor_shm, name_shm, pb_memory)); + new PbTensor(tensor_shm, name_shm, pb_memory)); } TRITONSERVER_DataType @@ -714,7 +696,6 @@ PbTensor::PbTensor( reinterpret_cast(tensor_shm_ptr_) + sizeof(TensorShm)); name_ = name_shm_->String(); - LOG_INFO << "Creating PbTensor from SHM" << name_; dims_ = std::vector( dims_shm_ptr_, dims_shm_ptr_ + tensor_shm_ptr_->dims_count); @@ -727,26 +708,17 @@ PbTensor::PbTensor( shm_handle_ = tensor_shm_.handle_; - auto mem = static_cast(memory_ptr_); - for ( unsigned int i = 0; i < 10; i++ ) { - LOG_INFO << mem[i] << " "; - } - - LOG_INFO << " shm_handle_ -" << shm_handle_; - LOG_INFO << " memory_ptr_ -" << memory_ptr_; - LOG_INFO << " byte_size_ -" << byte_size_; - #ifdef TRITON_PB_STUB if (memory_type_ == TRITONSERVER_MEMORY_CPU || memory_type_ == TRITONSERVER_MEMORY_CPU_PINNED) { if (dtype_ != TRITONSERVER_TYPE_BYTES) { py::object numpy_array = - py::array(triton_to_pybind_dtype(dtype_), dims_, (void*)memory_ptr_); - numpy_array_ = numpy_array.attr("view")(triton_to_numpy_type(dtype_)); + py::array(triton_to_pybind_dtype(dtype_), dims_, (void*)memory_ptr_, py::none()); + numpy_array_ = numpy_array.attr("view")(triton_to_numpy_type(dtype_)); } else { py::object numpy_array = py::array( triton_to_pybind_dtype(TRITONSERVER_TYPE_UINT8), {byte_size_}, - (void*)memory_ptr_); + (void*)memory_ptr_, py::none()); py::module triton_pb_utils = py::module::import("triton_python_backend_utils"); numpy_array_ = diff --git a/src/test_tesnor.cpp b/src/test_tesnor.cpp new file mode 100644 index 00000000..a6d569ac --- /dev/null +++ b/src/test_tesnor.cpp @@ -0,0 +1,3 @@ +// +// Created by azureuser on 6/22/23. +// From 9504358c4d330dec6d2fc62620c623ed741cdffa Mon Sep 17 00:00:00 2001 From: Daniel Bunting Date: Fri, 23 Jun 2023 13:34:34 +0000 Subject: [PATCH 4/6] Clearn up --- src/test_tesnor.cpp | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 src/test_tesnor.cpp diff --git a/src/test_tesnor.cpp b/src/test_tesnor.cpp deleted file mode 100644 index a6d569ac..00000000 --- a/src/test_tesnor.cpp +++ /dev/null @@ -1,3 +0,0 @@ -// -// Created by azureuser on 6/22/23. -// From e407d0d3e9ac12c1e6a48484cb0873770d543604 Mon Sep 17 00:00:00 2001 From: Daniel Bunting Date: Sat, 24 Jun 2023 09:48:13 +0000 Subject: [PATCH 5/6] Calculate the required padding --- src/pb_tensor.cc | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/pb_tensor.cc b/src/pb_tensor.cc index 79ad131d..b405b661 100644 --- a/src/pb_tensor.cc +++ b/src/pb_tensor.cc @@ -243,8 +243,14 @@ PbTensor::CreateInSHM(const std::string& name, SharedMemoryManager& shm_pool, st elements *= dims[i]; } py::module np = py::module::import("numpy"); - uint64_t byte_size_ = elements * np.attr("dtype")(data_type).attr("itemsize").cast(); + uint64_t item_size = np.attr("dtype")(data_type).attr("itemsize").cast(); + uint64_t byte_size_ = elements * item_size; + // Calculate the offset of the data and add padding so the numpy array is memory aligned + std::size_t name_offset = sizeof(TensorShm) + sizeof(int64_t) * dims.size(); + std::size_t pb_memory_offset = name_offset + PbString::ShmStructSize(name); + std::size_t padding = pb_memory_offset % item_size; + std::cout << "Required padding " << padding << "\n"; uint64_t byte_size; byte_size = sizeof(TensorShm) + sizeof(int64_t) * dims.size() + PbString::ShmStructSize(name) + @@ -252,12 +258,13 @@ PbTensor::CreateInSHM(const std::string& name, SharedMemoryManager& shm_pool, st // Do the allocation AllocatedSharedMemory tensor_shm = shm_pool.Construct(byte_size); + auto shm_handle = tensor_shm.handle_; + auto shm_data = tensor_shm.data_.get(); // Wrap the raw memory in TensorShm - auto* tensor_shm_ptr = reinterpret_cast(tensor_shm.data_.get()); + auto* tensor_shm_ptr = reinterpret_cast(shm_data); tensor_shm_ptr->dtype = dtype; tensor_shm_ptr->dims_count = dims.size(); - auto shm_handle = tensor_shm.handle_; // Write the dimensions data to shared memory. auto* dims_shm_ptr_ = reinterpret_cast( @@ -267,12 +274,10 @@ PbTensor::CreateInSHM(const std::string& name, SharedMemoryManager& shm_pool, st } // Write the name data to shared memory. - std::size_t name_offset = sizeof(TensorShm) + sizeof(int64_t) * dims.size(); auto name_shm = PbString::Create(name, reinterpret_cast(tensor_shm_ptr) + name_offset, shm_handle + name_offset); int64_t memory_type_id_ = 0; // Maybe - std::size_t pb_memory_offset = name_offset + PbString::ShmStructSize(name); auto pb_memory = PbMemory::Create( memory_type_, memory_type_id_, byte_size_, nullptr, @@ -280,6 +285,7 @@ PbTensor::CreateInSHM(const std::string& name, SharedMemoryManager& shm_pool, st shm_handle + pb_memory_offset, false); tensor_shm_ptr->memory = 0; std::cout << "Offset is - " << pb_memory_offset<< "\n"; + return std::unique_ptr( new PbTensor(tensor_shm, name_shm, pb_memory)); } From e609166b1b73f720a5f572dbdeee210b967e03c4 Mon Sep 17 00:00:00 2001 From: Daniel Bunting Date: Sat, 24 Jun 2023 09:49:45 +0000 Subject: [PATCH 6/6] Add model with candidatesss --- .../1/model.savedmodel/fingerprint.pb | 2 +- .../1/model.savedmodel/saved_model.pb | Bin 13286 -> 13327 bytes models/category_tensorflow_model/config.pbtxt | 2 +- models/test_bls/1/model.py | 28 ++------ models/test_bls_before/1/model.py | 68 ++++++++++++++++++ models/test_bls_before/config.pbtxt | 22 ++++++ 6 files changed, 98 insertions(+), 24 deletions(-) create mode 100644 models/test_bls_before/1/model.py create mode 100644 models/test_bls_before/config.pbtxt diff --git a/models/category_tensorflow_model/1/model.savedmodel/fingerprint.pb b/models/category_tensorflow_model/1/model.savedmodel/fingerprint.pb index 23a5dc10..9dd4a74c 100644 --- a/models/category_tensorflow_model/1/model.savedmodel/fingerprint.pb +++ b/models/category_tensorflow_model/1/model.savedmodel/fingerprint.pb @@ -1 +1 @@ -ەWл~߷ (2 \ No newline at end of file +֧Dٲٿ (2 \ No newline at end of file diff --git a/models/category_tensorflow_model/1/model.savedmodel/saved_model.pb b/models/category_tensorflow_model/1/model.savedmodel/saved_model.pb index 16dd18379ec5392afd82d5871dc3fca89f13ff0a..b91baea3534a842cca00fbaf26e503ce2c1a14c3 100644 GIT binary patch delta 774 zcmaEs-k-tB!6?*~v5|EjhxlXkBS{xL3_{998+ z5JPS=7neQ@`y^Yg3H)5{lO6cP_yo9kko0aoBiGNu*fcpt*^n1mym+!AzwzeN%BvWe z<2V>5$Lj=eBk7uaQ8j7vNzuC;8X#l2gyZ8g^U_j_QuC5ifer(@&eTMR8uo0ysCtQ+ zWeyux$mD!OWi+QxJ|m~X*bnkH538Y7o8N@x@d z8FLAv2XFD@hq?-r_vlVyTsqlbFD>dg6BjoJix7KBQ6kX021cuxum`{-Caxolo0tzU za&fzr=HxgQr3V+JCJWhfu_P9y7YmusVm9I8%R+HFuf8y2_Gu=k6F7`SR$MH^Q(Bx_ z6rYhTlsY#5BnEM#Hm=`fK0F&@4<|alidsIC_IE<+< G&I15Fdjwqo delta 695 zcmeCrc$Ut}!6@`FeIx5W4$;5vTrG@T{K<(qIq}Jfc`2DGi6yC%Z*lx%Tr!zgONAdz zW-}j`J_~z;Ems#mm+Rz*dcyoh7D9YnTyU+MGx+*h7#k-45HjROh!#&)=QrM*DZGl2 zHHL#hC}OgNs4=6;TK1y&B(<93Ou1pj9m6eMidJfatWe_YVqXXx(bu8 z=}uytHaSx-P4Fla7dHos5PL~cVsdJ{fzgV|5A?(hCNOawV%)^MkCBVptu!acu_!&b zAT?RYmWw5^D7{$7bQZG_7f%*am}%+@GqN0KaykOi?t_ph7UC%_PA!Vh$Sf|&FRGMa zQXZ(INX%PHFU diff --git a/models/category_tensorflow_model/config.pbtxt b/models/category_tensorflow_model/config.pbtxt index 8f8d8dbd..cdd72390 100755 --- a/models/category_tensorflow_model/config.pbtxt +++ b/models/category_tensorflow_model/config.pbtxt @@ -11,7 +11,7 @@ parameters: { input [ { - name: "candidates" + name: "candidatesss" data_type: TYPE_FP32 dims: [ -1 , -1] diff --git a/models/test_bls/1/model.py b/models/test_bls/1/model.py index 7c6094fc..76a63602 100644 --- a/models/test_bls/1/model.py +++ b/models/test_bls/1/model.py @@ -27,32 +27,16 @@ def breakpoint(): class TritonPythonModel: def initialize(self, args): import triton_python_backend_utils - #self.shm = triton_python_backend_utils.shared_memory + self.shm = triton_python_backend_utils.shared_memory + self.candidates_cache = np.random.random((500000, 200)).astype(np.float32) def execute_request(self, request): - candidates_cache = np.random.random((500000, 200)).astype(np.float32) - n = pb_utils.get_input_tensor_by_name(request, "n").as_numpy()[0] - candidates = np.random.randint(100000, size=int(n)) - - #candidate_tensor: pb_utils.Tensor = pb_utils.new_shm_tensor("candidates", self.shm, (n, 200), np.float32) - #arr = np.ndarray((n, 200), dtype=np.float32, buffer=candidate_tensor.memory_view()) - #arr[:] = np.random.random((int(n), 200)).astype(np.float32).data - - #s1 = time.time() - - # Take time - 0.008064508438110352 - #np.take(candidates_cache, candidates, axis=0, out=arr) - #s2 = time.time() - #pb_utils.Logger.log_error(f"Take time - {s2-s1}") + n = int(pb_utils.get_input_tensor_by_name(request, "n").as_numpy()[0]) + candidates = np.random.randint(100000, size=n) + candidate_tensor: pb_utils.Tensor = pb_utils.new_shm_tensor("candidatesss", self.shm, (n, 200), np.float32) + np.take(self.candidates_cache, candidates, axis=0, out=candidate_tensor.as_numpy(), mode='clip') context_array = np.random.random((10, 200)).astype(np.float32) - candidates_array = np.take(candidates_cache, candidates, axis=0) - - candidate_tensor = pb_utils.Tensor( - "candidates", - candidates_array, - ) - context_tensor = pb_utils.Tensor( "user_history", context_array, diff --git a/models/test_bls_before/1/model.py b/models/test_bls_before/1/model.py new file mode 100644 index 00000000..c2f8ff33 --- /dev/null +++ b/models/test_bls_before/1/model.py @@ -0,0 +1,68 @@ +""" +Category model +""" +import time +from typing import cast + +import numpy as np + +try: + import triton_python_backend_utils as pb_utils +except ImportError: + import tests.stub.triton_python_backend_utils + + pb_utils: tests.stub.triton_python_backend_utils = cast( + tests.stub.triton_python_backend_utils, None + ) + + +def breakpoint(): + import pydevd_pycharm + + pydevd_pycharm.settrace( + 'host.docker.internal', port=5858, stdoutToServer=True, stderrToServer=True + ) + + +class TritonPythonModel: + def initialize(self, args): + import triton_python_backend_utils + self.shm = triton_python_backend_utils.shared_memory + self.candidates_cache = np.random.random((500000, 200)).astype(np.float32) + + def execute_request(self, request): + n = pb_utils.get_input_tensor_by_name(request, "n").as_numpy()[0] + candidates = np.random.randint(100000, size=int(n)) + + context_array = np.random.random((10, 200)).astype(np.float32) + candidates_array = np.take(self.candidates_cache, candidates, axis=0) + + candidate_tensor = pb_utils.Tensor( + "candidatesss", + candidates_array, + ) + + context_tensor = pb_utils.Tensor( + "user_history", + context_array, + ) + + inference_response = pb_utils.InferenceRequest( + model_name="category_tensorflow_model", + requested_output_names=["scores"], + inputs=[candidate_tensor, context_tensor], + ).exec() + + if inference_response.has_error(): + raise pb_utils.TritonModelException(inference_response.error().message()) + else: + scores = pb_utils.get_output_tensor_by_name(inference_response, "scores") + + out_scores = pb_utils.Tensor("scores", scores.as_numpy()[:400]) + + response = pb_utils.InferenceResponse(output_tensors=[out_scores]) + + return response + + def execute(self, requests): + return [self.execute_request(request) for request in requests] diff --git a/models/test_bls_before/config.pbtxt b/models/test_bls_before/config.pbtxt new file mode 100644 index 00000000..22171c53 --- /dev/null +++ b/models/test_bls_before/config.pbtxt @@ -0,0 +1,22 @@ +name: "test_bls_before" +backend: "python" + +input [ + { + name: "n" + data_type: TYPE_INT32 + dims: [ -1] + + } +] + +output [ + { + name: "scores" + data_type: TYPE_FP32 + dims: [ -1 ] + } +] + + +instance_group [{ kind: KIND_CPU }]