From fc3d23d3f93a1f6fecc2efe5b00dd1928f3127dc Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Mon, 8 Feb 2021 19:15:48 +0000 Subject: [PATCH 01/27] Removed OpCreateTensor in favour of manager memory ownership --- src/Manager.cpp | 11 +++ src/OpAlgoBase.cpp | 2 +- src/OpTensorCopy.cpp | 2 +- src/OpTensorCreate.cpp | 76 ------------------- src/OpTensorSyncDevice.cpp | 2 +- src/OpTensorSyncLocal.cpp | 2 +- src/Tensor.cpp | 10 ++- src/include/kompute/Manager.hpp | 45 +++++++++-- src/include/kompute/operations/OpBase.hpp | 8 +- .../kompute/operations/OpTensorCreate.hpp | 74 ------------------ 10 files changed, 64 insertions(+), 168 deletions(-) delete mode 100644 src/OpTensorCreate.cpp delete mode 100644 src/include/kompute/operations/OpTensorCreate.hpp diff --git a/src/Manager.cpp b/src/Manager.cpp index df9d64db..11d11a26 100755 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -58,6 +58,17 @@ Manager::~Manager() return; } + if (this->mManagedTensors.size()) { + SPDLOG_DEBUG("Kompute Manager explicitly freeing tensors"); + for (const std::shared_ptr& tensor : this->mManagedTensors) { + if (!tensor->isInit()) { + SPDLOG_ERROR("Kompute Manager attempted to free managed tensor but not tensor is not initialised"); + } + tensor->freeMemoryDestroyGPUResources(); + } + this->mManagedTensors.clear(); + } + if (this->mManagedSequences.size()) { SPDLOG_DEBUG("Kompute Manager explicitly running destructor for " "managed sequences"); diff --git a/src/OpAlgoBase.cpp b/src/OpAlgoBase.cpp index c6ecf316..ad4bbc17 100644 --- a/src/OpAlgoBase.cpp +++ b/src/OpAlgoBase.cpp @@ -14,7 +14,7 @@ OpAlgoBase::OpAlgoBase(std::shared_ptr physicalDevice, std::shared_ptr commandBuffer, std::vector>& tensors, KomputeWorkgroup komputeWorkgroup) - : OpBase(physicalDevice, device, commandBuffer, tensors, false) + : OpBase(physicalDevice, device, commandBuffer, tensors) { SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {}", tensors.size()); diff --git a/src/OpTensorCopy.cpp b/src/OpTensorCopy.cpp index 3df23aa5..3726c71e 100644 --- a/src/OpTensorCopy.cpp +++ b/src/OpTensorCopy.cpp @@ -12,7 +12,7 @@ OpTensorCopy::OpTensorCopy(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, std::vector> tensors) - : OpBase(physicalDevice, device, commandBuffer, tensors, false) + : OpBase(physicalDevice, device, commandBuffer, tensors) { SPDLOG_DEBUG("Kompute OpTensorCopy constructor with params"); } diff --git a/src/OpTensorCreate.cpp b/src/OpTensorCreate.cpp deleted file mode 100644 index 7918415e..00000000 --- a/src/OpTensorCreate.cpp +++ /dev/null @@ -1,76 +0,0 @@ - -#include "kompute/Tensor.hpp" - -#include "kompute/operations/OpTensorCreate.hpp" - -namespace kp { - -OpTensorCreate::OpTensorCreate() -{ - SPDLOG_DEBUG("Kompute OpTensorCreate constructor base"); -} - -OpTensorCreate::OpTensorCreate( - std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector> tensors) - : OpBase(physicalDevice, device, commandBuffer, tensors, true) -{ - SPDLOG_DEBUG("Kompute OpTensorCreate constructor with params"); -} - -OpTensorCreate::~OpTensorCreate() -{ - SPDLOG_DEBUG("Kompute OpTensorCreate destructor started"); -} - -void -OpTensorCreate::init() -{ - SPDLOG_DEBUG("Kompute OpTensorCreate init called"); - - if (this->mTensors.size() < 1) { - throw std::runtime_error( - "Kompute OpTensorCreate called with less than 1 tensor"); - } - - for (std::shared_ptr tensor : this->mTensors) { - if (tensor->isInit()) { - throw std::runtime_error( - "Kompute OpTensorCreate: Tensor has already been initialized"); - } - if (tensor->tensorType() != Tensor::TensorTypes::eStorage) { - tensor->init(this->mPhysicalDevice, this->mDevice); - - tensor->mapDataIntoHostMemory(); - } - } -} - -void -OpTensorCreate::record() -{ - SPDLOG_DEBUG("Kompute OpTensorCreate record called"); - - for (size_t i = 0; i < this->mTensors.size(); i++) { - if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) { - this->mTensors[i]->recordCopyFromStagingToDevice( - this->mCommandBuffer, false); - } - } -} - -void -OpTensorCreate::preEval() -{ - SPDLOG_DEBUG("Kompute OpTensorCreate preEval called"); -} - -void -OpTensorCreate::postEval() -{ - SPDLOG_DEBUG("Kompute OpTensorCreate postEval called"); -} - -} diff --git a/src/OpTensorSyncDevice.cpp b/src/OpTensorSyncDevice.cpp index 340786eb..92bd7512 100644 --- a/src/OpTensorSyncDevice.cpp +++ b/src/OpTensorSyncDevice.cpp @@ -15,7 +15,7 @@ OpTensorSyncDevice::OpTensorSyncDevice( std::shared_ptr device, std::shared_ptr commandBuffer, std::vector> tensors) - : OpBase(physicalDevice, device, commandBuffer, tensors, false) + : OpBase(physicalDevice, device, commandBuffer, tensors) { SPDLOG_DEBUG("Kompute OpTensorSyncDevice constructor with params"); } diff --git a/src/OpTensorSyncLocal.cpp b/src/OpTensorSyncLocal.cpp index 09d966e1..c7a4fb63 100644 --- a/src/OpTensorSyncLocal.cpp +++ b/src/OpTensorSyncLocal.cpp @@ -15,7 +15,7 @@ OpTensorSyncLocal::OpTensorSyncLocal( std::shared_ptr device, std::shared_ptr commandBuffer, std::vector> tensors) - : OpBase(physicalDevice, device, commandBuffer, tensors, false) + : OpBase(physicalDevice, device, commandBuffer, tensors) { SPDLOG_DEBUG("Kompute OpTensorSyncLocal constructor with params"); } diff --git a/src/Tensor.cpp b/src/Tensor.cpp index f04165cf..7400dfff 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -229,8 +229,11 @@ Tensor::mapDataFromHostMemory() if (this->mTensorType == TensorTypes::eHost) { hostVisibleMemory = this->mPrimaryMemory; - } else { + } else if (this->mTensorType == TensorTypes::eDevice) { hostVisibleMemory = this->mStagingMemory; + } else { + SPDLOG_WARN("Kompute Tensor mapping data not supported on storage tensor"); + return; } vk::DeviceSize bufferSize = this->memorySize(); @@ -252,8 +255,11 @@ Tensor::mapDataIntoHostMemory() if (this->mTensorType == TensorTypes::eHost) { hostVisibleMemory = this->mPrimaryMemory; - } else { + } else if (this->mTensorType == TensorTypes::eDevice) { hostVisibleMemory = this->mStagingMemory; + } else { + SPDLOG_WARN("Kompute Tensor mapping data not supported on storage tensor"); + return; } vk::DeviceSize bufferSize = this->memorySize(); diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index 8c689ba5..973a0039 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -1,13 +1,12 @@ #pragma once #include +#include #include "kompute/Core.hpp" #include "kompute/Sequence.hpp" -#include "kompute/operations/OpTensorCreate.hpp" - #define KP_DEFAULT_SESSION "DEFAULT" namespace kp { @@ -231,8 +230,8 @@ class Manager /** * Function that simplifies the common workflow of tensor creation and * initialization. It will take the constructor parameters for a Tensor - * and will will us it to create a new Tensor and then create it using - * the OpCreateTensor command. + * and will will us it to create a new Tensor and then create it. The + * tensor memory will then be managed and owned by the manager. * * @param data The data to initialize the tensor with * @param tensorType The type of tensor to initialize @@ -242,17 +241,49 @@ class Manager const std::vector& data, Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) { - SPDLOG_DEBUG("Kompute Manager createInitTensor triggered"); + SPDLOG_DEBUG("Kompute Manager buildTensor triggered"); SPDLOG_DEBUG("Kompute Manager creating new tensor shared ptr"); std::shared_ptr tensor = std::make_shared(kp::Tensor(data, tensorType)); - this->evalOpDefault({ tensor }); + tensor->init(this->mPhysicalDevice, this->mDevice); + if (tensor->tensorType() != Tensor::TensorTypes::eStorage) { + tensor->mapDataIntoHostMemory(); + } + this->mManagedTensors.insert(tensor); return tensor; } + /** + * Function that simplifies the common workflow of tensor initialisation. It will take the constructor parameters for a Tensor and will will us it to create a new Tensor. The tensor memory will then be managed and owned by the manager. + * + * @param data The data to initialize the tensor with + * @param tensorType The type of tensor to initialize + * @returns Initialized Tensor with memory Syncd to GPU device + */ + void rebuildTensors(std::vector> tensors) + { + SPDLOG_DEBUG("Kompute Manager rebuildTensors triggered"); + for (std::shared_ptr tensor : tensors) { + + if (tensor->isInit()) { + tensor->freeMemoryDestroyGPUResources(); + } + + tensor->init(this->mPhysicalDevice, this->mDevice); + if (tensor->tensorType() != Tensor::TensorTypes::eStorage) { + tensor->mapDataIntoHostMemory(); + } + + std::set>::iterator it = this->mManagedTensors.find(tensor); + if (it == this->mManagedTensors.end()) { + this->mManagedTensors.insert(tensor); + } + } + } + private: // -------------- OPTIONALLY OWNED RESOURCES std::shared_ptr mInstance = nullptr; @@ -263,6 +294,8 @@ class Manager bool mFreeDevice = false; // -------------- ALWAYS OWNED RESOURCES + std::set> mManagedTensors; + std::unordered_map> mManagedSequences; diff --git a/src/include/kompute/operations/OpBase.hpp b/src/include/kompute/operations/OpBase.hpp index 6e35df99..a423abc2 100644 --- a/src/include/kompute/operations/OpBase.hpp +++ b/src/include/kompute/operations/OpBase.hpp @@ -31,13 +31,11 @@ class OpBase * @param device Vulkan logical device for passing to Algorithm * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation - * @param freeTensors Whether operation manages the memory of the Tensors */ OpBase(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector>& tensors, - bool freeTensors) + std::vector>& tensors) { SPDLOG_DEBUG("Compute OpBase constructor with params"); @@ -45,14 +43,12 @@ class OpBase this->mDevice = device; this->mCommandBuffer = commandBuffer; this->mTensors = tensors; - this->mFreeTensors = freeTensors; } /** * Default destructor for OpBase class. This OpBase destructor class should * always be called to destroy and free owned resources unless it is - * intended to destroy the resources in the parent class. This can be done - * by passing the mFreeTensors=false. + * intended to destroy the resources in the parent class. */ virtual ~OpBase() { diff --git a/src/include/kompute/operations/OpTensorCreate.hpp b/src/include/kompute/operations/OpTensorCreate.hpp deleted file mode 100644 index 4b8c784c..00000000 --- a/src/include/kompute/operations/OpTensorCreate.hpp +++ /dev/null @@ -1,74 +0,0 @@ -#pragma once - -#include "kompute/Core.hpp" - -#include "kompute/Tensor.hpp" - -#include "kompute/operations/OpBase.hpp" - -namespace kp { - -/** - Operation that creates tensor and manages the memory of the components - created -*/ -class OpTensorCreate : public OpBase -{ - public: - OpTensorCreate(); - - /** - * Default constructor with parameters that provides the bare minimum - * requirements for the operations to be able to create and manage their - * sub-components. - * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into - * @param tensors Tensors that will be used to create in operation. - * @param freeTensors Whether operation manages the memory of the Tensors - */ - OpTensorCreate(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector> tensors); - - /** - * Default destructor which in this case expects the parent class to free - * the tensors - */ - ~OpTensorCreate() override; - - /** - * In charge of initialising the primary Tensor as well as the staging - * tensor as required. It will only initialise a staging tensor if the - * Primary tensor is of type Device. For staging tensors it performs a - * mapDataIntoHostMemory which would perform immediately as opposed to - * on sequence eval/submission. - */ - void init() override; - - /** - * Record runs the core actions to create the tensors. For device tensors - * it records a copyCommand to move the data from the staging tensor to the - * device tensor. The mapping for staging tensors happens in the init function - * not in the record function. - */ - void record() override; - - /** - * Does not perform any preEval commands. - */ - virtual void preEval() override; - - /** - * Performs a copy back into the main tensor to ensure that the data - * contained is the one that is now being stored in the GPU. - */ - virtual void postEval() override; - - - private: -}; - -} // End namespace kp From 4dedfadfef933b29ea87f8f70ed3a3c7c509792b Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Mon, 8 Feb 2021 19:17:32 +0000 Subject: [PATCH 02/27] Updated tests to reflect manager tensor memory ownership --- test/TestAsyncOperations.cpp | 6 ++-- test/TestLogisticRegression.cpp | 16 +++------ test/TestManager.cpp | 28 ++++++++++----- test/TestMultipleAlgoExecutions.cpp | 22 +++++++++--- test/TestOpAlgoLoopsPassingData.cpp | 4 ++- test/TestOpShadersFromStringAndFile.cpp | 16 ++++++--- test/TestOpTensorCopy.cpp | 24 +++++++++---- test/TestOpTensorCreate.cpp | 46 +++++++++++++++++++------ test/TestOpTensorSync.cpp | 4 +-- test/TestTensor.cpp | 5 +-- 10 files changed, 117 insertions(+), 54 deletions(-) diff --git a/test/TestAsyncOperations.cpp b/test/TestAsyncOperations.cpp index 43bccf99..c43f5648 100644 --- a/test/TestAsyncOperations.cpp +++ b/test/TestAsyncOperations.cpp @@ -49,7 +49,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) inputsSyncB.push_back(std::make_shared(kp::Tensor(data))); } - mgr.evalOpDefault(inputsSyncB); + mgr.rebuildTensors(inputsSyncB); auto startSync = std::chrono::high_resolution_clock::now(); @@ -77,7 +77,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) inputsAsyncB.push_back(std::make_shared(kp::Tensor(data))); } - mgrAsync.evalOpDefault(inputsAsyncB); + mgrAsync.rebuildTensors(inputsAsyncB); for (uint32_t i = 0; i < numParallel; i++) { mgrAsync.createManagedSequence("async" + std::to_string(i), i); @@ -149,7 +149,7 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution) mgr.createManagedSequence("asyncOne"); mgr.createManagedSequence("asyncTwo"); - mgr.evalOpDefault({ tensorA, tensorB }); + mgr.rebuildTensors({ tensorA, tensorB }); mgr.evalOpAsync( { tensorA }, "asyncOne", std::vector(shader.begin(), shader.end())); diff --git a/test/TestLogisticRegression.cpp b/test/TestLogisticRegression.cpp index 7c3f1538..e25b1416 100644 --- a/test/TestLogisticRegression.cpp +++ b/test/TestLogisticRegression.cpp @@ -32,12 +32,8 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegression) { kp::Manager mgr; - std::shared_ptr sqTensor = mgr.createManagedSequence(); - - sqTensor->begin(); - sqTensor->record(params); - sqTensor->end(); - sqTensor->eval(); + mgr.rebuildTensors(params); + mgr.evalOpDefault(params); std::shared_ptr sq = mgr.createManagedSequence(); @@ -122,12 +118,8 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegressionManualCopy) { kp::Manager mgr; - std::shared_ptr sqTensor = mgr.createManagedSequence(); - - sqTensor->begin(); - sqTensor->record(params); - sqTensor->end(); - sqTensor->eval(); + mgr.rebuildTensors(params); + mgr.evalOpDefault(params); std::shared_ptr sq = mgr.createManagedSequence(); diff --git a/test/TestManager.cpp b/test/TestManager.cpp index 198e617a..3e1db7b1 100644 --- a/test/TestManager.cpp +++ b/test/TestManager.cpp @@ -8,14 +8,16 @@ TEST(TestManager, EndToEndOpMultFlow) kp::Manager mgr; std::shared_ptr tensorLHS{ new kp::Tensor({ 0, 1, 2 }) }; - mgr.evalOpDefault({ tensorLHS }); + mgr.rebuildTensors({ tensorLHS }); std::shared_ptr tensorRHS{ new kp::Tensor({ 2, 4, 6 }) }; - mgr.evalOpDefault({ tensorRHS }); + mgr.rebuildTensors({ tensorRHS }); std::shared_ptr tensorOutput{ new kp::Tensor({ 0, 0, 0 }) }; - mgr.evalOpDefault({ tensorOutput }); + mgr.rebuildTensors({ tensorOutput }); + + mgr.evalOpDefault({ tensorLHS, tensorRHS, tensorOutput }); mgr.evalOpDefault({ tensorLHS, tensorRHS, tensorOutput }); @@ -36,14 +38,16 @@ TEST(TestManager, OpMultSequenceFlow) kp::Manager mgr; { + mgr.rebuildTensors({ tensorLHS, tensorRHS, tensorOutput }); + std::shared_ptr sq = mgr.getOrCreateManagedSequence("newSequence"); sq->begin(); - sq->record({ tensorLHS }); - sq->record({ tensorRHS }); - sq->record({ tensorOutput }); + sq->record({ tensorLHS }); + sq->record({ tensorRHS }); + sq->record({ tensorOutput }); sq->record({ tensorLHS, tensorRHS, tensorOutput }); @@ -93,14 +97,16 @@ TEST(TestManager, TestMultipleTensorsAtOnce) mgr.getOrCreateManagedSequence("newSequence"); { - sq->begin(); - - sq->record({ tensorLHS, tensorRHS, tensorOutput }); + mgr.rebuildTensors({ tensorLHS, tensorRHS, tensorOutput }); EXPECT_TRUE(tensorLHS->isInit()); EXPECT_TRUE(tensorRHS->isInit()); EXPECT_TRUE(tensorOutput->isInit()); + sq->begin(); + + sq->record({ tensorLHS, tensorRHS, tensorOutput }); + sq->record({ tensorLHS, tensorRHS, tensorOutput }); sq->record({ tensorOutput }); @@ -119,6 +125,10 @@ TEST(TestManager, TestCreateInitTensor) std::shared_ptr tensorA = mgr.buildTensor({ 0, 1, 2 }); std::shared_ptr tensorB = mgr.buildTensor({ 0, 0, 0 }); + mgr.rebuildTensors({ tensorA, tensorB }); + + mgr.evalOpDefault({ tensorA, tensorB }); + mgr.evalOpDefault({ tensorA, tensorB }); mgr.evalOpDefault({ tensorB }); diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp index 11e94caa..9d696d55 100644 --- a/test/TestMultipleAlgoExecutions.cpp +++ b/test/TestMultipleAlgoExecutions.cpp @@ -19,13 +19,15 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord) pa[index] = pa[index] + 1; })"); + mgr.rebuildTensors({ tensorA }); + std::shared_ptr sq = mgr.getOrCreateManagedSequence("newSequence"); { sq->begin(); - sq->record({ tensorA }); + sq->record({ tensorA }); sq->record( { tensorA }, std::vector(shader.begin(), shader.end())); @@ -58,13 +60,15 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords) pa[index] = pa[index] + 1; })"); + mgr.rebuildTensors({ tensorA }); + std::shared_ptr sqTensor = mgr.createManagedSequence(); std::shared_ptr sq = mgr.createManagedSequence(); // First create the tensor in a separate sequence sqTensor->begin(); - sqTensor->record({ tensorA }); + sqTensor->record({ tensorA }); sqTensor->end(); sqTensor->eval(); @@ -111,13 +115,15 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) pa[index] = pa[index] + 1; })"); + mgr.rebuildTensors({ tensorA }); + { std::shared_ptr sq = mgr.getOrCreateManagedSequence("newSequence"); sq->begin(); - sq->record({ tensorA }); + sq->record({ tensorA }); sq->record( { tensorA }, std::vector(shader.begin(), shader.end())); @@ -183,13 +189,15 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) pa[index] = pa[index] + 1; })"); + mgr.rebuildTensors({ tensorA }); + { std::shared_ptr sq = mgr.getOrCreateManagedSequence("newSequence"); sq->begin(); - sq->record({ tensorA }); + sq->record({ tensorA }); sq->end(); sq->eval(); @@ -238,7 +246,9 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrOpCreate) std::shared_ptr tensorInB{ new kp::Tensor({ 0.0, 1.0, 2.0 }) }; std::shared_ptr tensorOut{ new kp::Tensor({ 0.0, 0.0, 0.0 }) }; - mgr.evalOpDefault({ tensorInA, tensorInB, tensorOut }); + mgr.rebuildTensors({ tensorInA, tensorInB, tensorOut }); + + mgr.evalOpDefault({ tensorInA, tensorInB, tensorOut }); std::string shader(R"( // The version to use @@ -296,6 +306,8 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate) } )"); + mgr.evalOpDefault({ tensorInA, tensorInB, tensorOut }); + mgr.evalOpDefault( { tensorInA, tensorInB, tensorOut }, std::vector(shader.begin(), shader.end())); diff --git a/test/TestOpAlgoLoopsPassingData.cpp b/test/TestOpAlgoLoopsPassingData.cpp index bd772779..b04ef383 100644 --- a/test/TestOpAlgoLoopsPassingData.cpp +++ b/test/TestOpAlgoLoopsPassingData.cpp @@ -30,13 +30,15 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies) } )"); + mgr.rebuildTensors({ tensorA, tensorB }); + { std::shared_ptr sq = mgr.getOrCreateManagedSequence("default"); sq->begin(); - sq->record({ tensorA, tensorB }); + sq->record({ tensorA, tensorB }); sq->end(); diff --git a/test/TestOpShadersFromStringAndFile.cpp b/test/TestOpShadersFromStringAndFile.cpp index 273421b2..7b0db3de 100644 --- a/test/TestOpShadersFromStringAndFile.cpp +++ b/test/TestOpShadersFromStringAndFile.cpp @@ -11,7 +11,7 @@ TEST(TestOpAlgoBase, ShaderRawDataFromConstructor) std::shared_ptr tensorA{ new kp::Tensor({ 3, 4, 5 }) }; std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; - mgr.evalOpDefault({ tensorA, tensorB }); + mgr.rebuildTensors({ tensorA, tensorB }); std::string shader(R"( #version 450 @@ -28,6 +28,8 @@ TEST(TestOpAlgoBase, ShaderRawDataFromConstructor) } )"); + mgr.evalOpDefault({ tensorA, tensorB }); + mgr.evalOpDefault( { tensorA, tensorB }, std::vector(shader.begin(), shader.end())); @@ -43,7 +45,9 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromConstructor) std::shared_ptr tensorA{ new kp::Tensor({ 3, 4, 5 }) }; std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; - mgr.evalOpDefault({ tensorA, tensorB }); + mgr.rebuildTensors({ tensorA, tensorB }); + + mgr.evalOpDefault({ tensorA, tensorB }); mgr.evalOpDefault( { tensorA, tensorB }, @@ -65,7 +69,9 @@ TEST(TestOpAlgoBase, ShaderRawDataFromFile) std::shared_ptr tensorA{ new kp::Tensor({ 3, 4, 5 }) }; std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; - mgr.evalOpDefault({ tensorA, tensorB }); + mgr.rebuildTensors({ tensorA, tensorB }); + + mgr.evalOpDefault({ tensorA, tensorB }); mgr.evalOpDefault( { tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp"); @@ -82,7 +88,9 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromFile) std::shared_ptr tensorA{ new kp::Tensor({ 3, 4, 5 }) }; std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; - mgr.evalOpDefault({ tensorA, tensorB }); + mgr.rebuildTensors({ tensorA, tensorB }); + + mgr.evalOpDefault({ tensorA, tensorB }); mgr.evalOpDefault( { tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp.spv"); diff --git a/test/TestOpTensorCopy.cpp b/test/TestOpTensorCopy.cpp index 0e840cad..ae99bf17 100644 --- a/test/TestOpTensorCopy.cpp +++ b/test/TestOpTensorCopy.cpp @@ -14,7 +14,9 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor) std::shared_ptr tensorA{ new kp::Tensor(testVecA) }; std::shared_ptr tensorB{ new kp::Tensor(testVecB) }; - mgr.evalOpDefault({ tensorA, tensorB }); + mgr.rebuildTensors({ tensorA, tensorB }); + + mgr.evalOpDefault({ tensorA, tensorB }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -41,7 +43,9 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti) std::shared_ptr tensorB{ new kp::Tensor(testVecB) }; std::shared_ptr tensorC{ new kp::Tensor(testVecC) }; - mgr.evalOpDefault({ tensorA, tensorB, tensorC }); + mgr.rebuildTensors({ tensorA, tensorB, tensorC }); + + mgr.evalOpDefault({ tensorA, tensorB }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -70,7 +74,10 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor) std::shared_ptr tensorB{ new kp::Tensor( testVecB, kp::Tensor::TensorTypes::eHost) }; - mgr.evalOpDefault({ tensorA, tensorB }); + mgr.rebuildTensors({ tensorA, tensorB }); + + // Only calling sync on device type tensor + mgr.evalOpDefault({ tensorA }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -96,7 +103,10 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor) testVecA, kp::Tensor::TensorTypes::eHost) }; std::shared_ptr tensorB{ new kp::Tensor(testVecB) }; - mgr.evalOpDefault({ tensorA, tensorB }); + mgr.rebuildTensors({ tensorA, tensorB }); + + // Only calling sync on device type tensor + mgr.evalOpDefault({ tensorB }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -123,7 +133,9 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor) std::shared_ptr tensorB{ new kp::Tensor( testVecB, kp::Tensor::TensorTypes::eHost) }; - mgr.evalOpDefault({ tensorA, tensorB }); + mgr.rebuildTensors({ tensorA, tensorB }); + + // Not calling OpTensorSyncDevice EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -147,7 +159,7 @@ TEST(TestOpTensorCopy, SingleTensorShouldFail) std::shared_ptr tensorA{ new kp::Tensor( testVecA, kp::Tensor::TensorTypes::eHost) }; - mgr.evalOpDefault({ tensorA }); + mgr.rebuildTensors({ tensorA }); EXPECT_TRUE(tensorA->isInit()); diff --git a/test/TestOpTensorCreate.cpp b/test/TestOpTensorCreate.cpp index f0ba8743..a57142cf 100644 --- a/test/TestOpTensorCreate.cpp +++ b/test/TestOpTensorCreate.cpp @@ -12,7 +12,7 @@ TEST(TestOpTensorCreate, CreateSingleTensorSingleOp) std::shared_ptr tensorA{ new kp::Tensor(testVecA) }; - mgr.evalOpDefault({ tensorA }); + mgr.rebuildTensors({ tensorA }); EXPECT_TRUE(tensorA->isInit()); @@ -33,7 +33,7 @@ TEST(TestOpTensorCreate, CreateMultipleTensorSingleOp) std::shared_ptr tensorA{ new kp::Tensor(testVecA) }; std::shared_ptr tensorB{ new kp::Tensor(testVecB) }; - mgr.evalOpDefault({ tensorA, tensorB }); + mgr.rebuildTensors({ tensorA, tensorB }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -53,8 +53,8 @@ TEST(TestOpTensorCreate, CreateMultipleTensorMultipleOp) std::shared_ptr tensorA{ new kp::Tensor(testVecA) }; std::shared_ptr tensorB{ new kp::Tensor(testVecB) }; - mgr.evalOpDefault({ tensorA }); - mgr.evalOpDefault({ tensorB }); + mgr.rebuildTensors({ tensorA }); + mgr.rebuildTensors({ tensorB }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -63,7 +63,7 @@ TEST(TestOpTensorCreate, CreateMultipleTensorMultipleOp) EXPECT_EQ(tensorB->data(), testVecB); } -TEST(TestOpTensorCreate, ManageTensorMemoryWhenOpTensorCreateDestroyed) +TEST(TestOpTensorCreate, TestTensorMemoryManagedByManagerDestroyed) { std::vector testVecA{ 9, 8, 7 }; @@ -74,8 +74,8 @@ TEST(TestOpTensorCreate, ManageTensorMemoryWhenOpTensorCreateDestroyed) { kp::Manager mgr; - mgr.evalOpDefault({ tensorA }); - mgr.evalOpDefault({ tensorB }); + mgr.rebuildTensors({ tensorA }); + mgr.rebuildTensors({ tensorB }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -88,6 +88,32 @@ TEST(TestOpTensorCreate, ManageTensorMemoryWhenOpTensorCreateDestroyed) EXPECT_FALSE(tensorB->isInit()); } +TEST(TestOpTensorCreate, TestTensorMemoryManagedByManagerNOTDestroyed) +{ + + std::vector testVecA{ 9, 8, 7 }; + std::vector testVecB{ 6, 5, 4 }; + + std::shared_ptr tensorA{ new kp::Tensor(testVecA) }; + std::shared_ptr tensorB{ new kp::Tensor(testVecB) }; + + kp::Manager mgr; + + { + mgr.rebuildTensors({ tensorA }); + mgr.rebuildTensors({ tensorB }); + + EXPECT_TRUE(tensorA->isInit()); + EXPECT_TRUE(tensorB->isInit()); + + EXPECT_EQ(tensorA->data(), testVecA); + EXPECT_EQ(tensorB->data(), testVecB); + } + + EXPECT_TRUE(tensorA->isInit()); + EXPECT_TRUE(tensorB->isInit()); +} + TEST(TestOpTensorCreate, NoErrorIfTensorFreedBefore) { @@ -99,8 +125,8 @@ TEST(TestOpTensorCreate, NoErrorIfTensorFreedBefore) kp::Manager mgr; - mgr.evalOpDefault({ tensorA }); - mgr.evalOpDefault({ tensorB }); + mgr.rebuildTensors({ tensorA }); + mgr.rebuildTensors({ tensorB }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -123,7 +149,7 @@ TEST(TestOpTensorCreate, ExceptionOnZeroSizeTensor) kp::Manager mgr; try { - mgr.evalOpDefault({ tensorA }); + mgr.rebuildTensors({ tensorA }); } catch (const std::runtime_error& err) { // check exception ASSERT_TRUE(std::string(err.what()).find("zero-sized") != diff --git a/test/TestOpTensorSync.cpp b/test/TestOpTensorSync.cpp index 72f53ac5..3808941f 100644 --- a/test/TestOpTensorSync.cpp +++ b/test/TestOpTensorSync.cpp @@ -13,7 +13,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemorySingleTensor) std::shared_ptr tensorA{ new kp::Tensor(testVecPreA) }; - mgr.evalOpDefault({ tensorA }); + mgr.rebuildTensors({ tensorA }); EXPECT_TRUE(tensorA->isInit()); @@ -37,7 +37,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor) std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; std::shared_ptr tensorC{ new kp::Tensor({ 0, 0, 0 }) }; - mgr.evalOpDefault({ tensorA, tensorB, tensorC }); + mgr.rebuildTensors({ tensorA, tensorB, tensorC }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); diff --git a/test/TestTensor.cpp b/test/TestTensor.cpp index 5e54e858..764b5fa9 100644 --- a/test/TestTensor.cpp +++ b/test/TestTensor.cpp @@ -23,12 +23,13 @@ TEST(TestTensor, CopyFromHostData) kp::Manager mgr; + mgr.rebuildTensors({ tensorA, tensorB }); + mgr.evalOpDefault({ tensorA, tensorB }); + if (std::shared_ptr sq = mgr.getOrCreateManagedSequence("new")) { sq->begin(); - sq->record({ tensorA, tensorB }); - sq->record({ tensorA, tensorB }); sq->end(); From f356e646448f19658373320efec1dd762eb16860 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Mon, 8 Feb 2021 19:17:40 +0000 Subject: [PATCH 03/27] Updated aggregate headers --- single_include/AggregateHeaders.cpp | 1 - single_include/kompute/Kompute.hpp | 154 ++++++++++++---------------- 2 files changed, 67 insertions(+), 88 deletions(-) diff --git a/single_include/AggregateHeaders.cpp b/single_include/AggregateHeaders.cpp index 9ce53e85..57ab728c 100644 --- a/single_include/AggregateHeaders.cpp +++ b/single_include/AggregateHeaders.cpp @@ -8,7 +8,6 @@ #include "kompute/operations/OpAlgoBase.hpp" #include "kompute/operations/OpAlgoLhsRhsOut.hpp" #include "kompute/operations/OpMult.hpp" -#include "kompute/operations/OpTensorCreate.hpp" #include "kompute/operations/OpTensorCopy.hpp" #include "kompute/operations/OpTensorSyncDevice.hpp" #include "kompute/operations/OpTensorSyncLocal.hpp" diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index d388fa24..62037c91 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -698,6 +698,7 @@ static const unsigned int shaders_glsl_logisticregression_comp_spv_len = 4920; #endif // define SHADEROP_SHADERLOGISTICREGRESSION_HPP #include +#include #define KP_MAX_DIM_SIZE 1 @@ -723,7 +724,7 @@ class Tensor enum class TensorTypes { eDevice = 0, ///< Type is device memory, source and destination - eHost = 1, ///< Type is host memory, source and destination + eHost = 1, ///< Type is host memory, source and destination eStorage = 2, ///< Type is Device memory (only) }; @@ -736,7 +737,8 @@ class Tensor * Default constructor with data provided which would be used to create the * respective vulkan buffer and memory. * - * @param data Non-zero-sized vector of data that will be used by the tensor + * @param data Non-zero-sized vector of data that will be used by the + * tensor * @param tensorType Type for the tensor which is of type TensorTypes */ Tensor(const std::vector& data, @@ -829,24 +831,30 @@ class Tensor bool createBarrier); /** - * Records a copy from the internal staging memory to the device memory using an optional barrier to wait for the operation. This function would only be relevant for kp::Tensors of type eDevice. + * Records a copy from the internal staging memory to the device memory + * using an optional barrier to wait for the operation. This function would + * only be relevant for kp::Tensors of type eDevice. * * @param commandBuffer Vulkan Command Buffer to record the commands into * @param createBarrier Whether to create a barrier that ensures the data is * copied before further operations. Default is true. */ - void recordCopyFromStagingToDevice(std::shared_ptr commandBuffer, - bool createBarrier); + void recordCopyFromStagingToDevice( + std::shared_ptr commandBuffer, + bool createBarrier); /** - * Records a copy from the internal device memory to the staging memory using an optional barrier to wait for the operation. This function would only be relevant for kp::Tensors of type eDevice. + * Records a copy from the internal device memory to the staging memory + * using an optional barrier to wait for the operation. This function would + * only be relevant for kp::Tensors of type eDevice. * * @param commandBuffer Vulkan Command Buffer to record the commands into * @param createBarrier Whether to create a barrier that ensures the data is * copied before further operations. Default is true. */ - void recordCopyFromDeviceToStaging(std::shared_ptr commandBuffer, - bool createBarrier); + void recordCopyFromDeviceToStaging( + std::shared_ptr commandBuffer, + bool createBarrier); /** * Records the buffer memory barrier into the command buffer which @@ -908,9 +916,17 @@ class Tensor bool mIsInit = false; void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer - void createBuffer(std::shared_ptr buffer, vk::BufferUsageFlags bufferUsageFlags); - void allocateBindMemory(std::shared_ptr buffer, std::shared_ptr memory, vk::MemoryPropertyFlags memoryPropertyFlags); - void copyBuffer(std::shared_ptr commandBuffer, std::shared_ptr bufferFrom, std::shared_ptr bufferTo, vk::DeviceSize bufferSize, vk::BufferCopy copyRegion, bool createBarrier); + void createBuffer(std::shared_ptr buffer, + vk::BufferUsageFlags bufferUsageFlags); + void allocateBindMemory(std::shared_ptr buffer, + std::shared_ptr memory, + vk::MemoryPropertyFlags memoryPropertyFlags); + void copyBuffer(std::shared_ptr commandBuffer, + std::shared_ptr bufferFrom, + std::shared_ptr bufferTo, + vk::DeviceSize bufferSize, + vk::BufferCopy copyRegion, + bool createBarrier); // Private util functions vk::BufferUsageFlags getPrimaryBufferUsageFlags(); @@ -949,13 +965,11 @@ class OpBase * @param device Vulkan logical device for passing to Algorithm * @param commandBuffer Vulkan Command Buffer to record commands into * @param tensors Tensors that are to be used in this operation - * @param freeTensors Whether operation manages the memory of the Tensors */ OpBase(std::shared_ptr physicalDevice, std::shared_ptr device, std::shared_ptr commandBuffer, - std::vector>& tensors, - bool freeTensors) + std::vector>& tensors) { SPDLOG_DEBUG("Compute OpBase constructor with params"); @@ -963,14 +977,12 @@ class OpBase this->mDevice = device; this->mCommandBuffer = commandBuffer; this->mTensors = tensors; - this->mFreeTensors = freeTensors; } /** * Default destructor for OpBase class. This OpBase destructor class should * always be called to destroy and free owned resources unless it is - * intended to destroy the resources in the parent class. This can be done - * by passing the mFreeTensors=false. + * intended to destroy the resources in the parent class. */ virtual ~OpBase() { @@ -1231,72 +1243,6 @@ class Sequence } // End namespace kp -namespace kp { - -/** - Operation that creates tensor and manages the memory of the components - created -*/ -class OpTensorCreate : public OpBase -{ - public: - OpTensorCreate(); - - /** - * Default constructor with parameters that provides the bare minimum - * requirements for the operations to be able to create and manage their - * sub-components. - * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into - * @param tensors Tensors that will be used to create in operation. - * @param freeTensors Whether operation manages the memory of the Tensors - */ - OpTensorCreate(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector> tensors); - - /** - * Default destructor which in this case expects the parent class to free - * the tensors - */ - ~OpTensorCreate() override; - - /** - * In charge of initialising the primary Tensor as well as the staging - * tensor as required. It will only initialise a staging tensor if the - * Primary tensor is of type Device. For staging tensors it performs a - * mapDataIntoHostMemory which would perform immediately as opposed to - * on sequence eval/submission. - */ - void init() override; - - /** - * Record runs the core actions to create the tensors. For device tensors - * it records a copyCommand to move the data from the staging tensor to the - * device tensor. The mapping for staging tensors happens in the init function - * not in the record function. - */ - void record() override; - - /** - * Does not perform any preEval commands. - */ - virtual void preEval() override; - - /** - * Performs a copy back into the main tensor to ensure that the data - * contained is the one that is now being stored in the GPU. - */ - virtual void postEval() override; - - private: -}; - -} // End namespace kp - #define KP_DEFAULT_SESSION "DEFAULT" namespace kp { @@ -1520,8 +1466,8 @@ class Manager /** * Function that simplifies the common workflow of tensor creation and * initialization. It will take the constructor parameters for a Tensor - * and will will us it to create a new Tensor and then create it using - * the OpCreateTensor command. + * and will will us it to create a new Tensor and then create it. The + * tensor memory will then be managed and owned by the manager. * * @param data The data to initialize the tensor with * @param tensorType The type of tensor to initialize @@ -1531,17 +1477,49 @@ class Manager const std::vector& data, Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) { - SPDLOG_DEBUG("Kompute Manager createInitTensor triggered"); + SPDLOG_DEBUG("Kompute Manager buildTensor triggered"); SPDLOG_DEBUG("Kompute Manager creating new tensor shared ptr"); std::shared_ptr tensor = std::make_shared(kp::Tensor(data, tensorType)); - this->evalOpDefault({ tensor }); + tensor->init(this->mPhysicalDevice, this->mDevice); + if (tensor->tensorType() != Tensor::TensorTypes::eStorage) { + tensor->mapDataIntoHostMemory(); + } + this->mManagedTensors.insert(tensor); return tensor; } + /** + * Function that simplifies the common workflow of tensor initialisation. It will take the constructor parameters for a Tensor and will will us it to create a new Tensor. The tensor memory will then be managed and owned by the manager. + * + * @param data The data to initialize the tensor with + * @param tensorType The type of tensor to initialize + * @returns Initialized Tensor with memory Syncd to GPU device + */ + void rebuildTensors(std::vector> tensors) + { + SPDLOG_DEBUG("Kompute Manager rebuildTensors triggered"); + for (std::shared_ptr tensor : tensors) { + + if (tensor->isInit()) { + tensor->freeMemoryDestroyGPUResources(); + } + + tensor->init(this->mPhysicalDevice, this->mDevice); + if (tensor->tensorType() != Tensor::TensorTypes::eStorage) { + tensor->mapDataIntoHostMemory(); + } + + std::set>::iterator it = this->mManagedTensors.find(tensor); + if (it == this->mManagedTensors.end()) { + this->mManagedTensors.insert(tensor); + } + } + } + private: // -------------- OPTIONALLY OWNED RESOURCES std::shared_ptr mInstance = nullptr; @@ -1552,6 +1530,8 @@ class Manager bool mFreeDevice = false; // -------------- ALWAYS OWNED RESOURCES + std::set> mManagedTensors; + std::unordered_map> mManagedSequences; From 984709a4e7f335e180b3c0b5cfdf5bb2bfac0a1a Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Mon, 8 Feb 2021 19:17:52 +0000 Subject: [PATCH 04/27] Removd opcreatetensor from docs --- docs/overview/reference.rst | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/docs/overview/reference.rst b/docs/overview/reference.rst index 65721fb8..8b6160eb 100644 --- a/docs/overview/reference.rst +++ b/docs/overview/reference.rst @@ -86,16 +86,6 @@ The kp::OpMult operation is a sample implementation of the kp::OpAlgoBase class. .. doxygenclass:: kp::OpMult :members: -OpTensorCreate -------- - -The kp::OpTensorCreate is a tensor only operations which initialises a kp::Tensor by creating the respective vk::Buffer and vk::Memory, as well as transferring the local data into the GPU. - -.. image:: ../images/kompute-vulkan-architecture-opcreatetensor.jpg - :width: 100% - -.. doxygenclass:: kp::OpTensorCreate - :members: OpTensorCopy ------- From f62e353f4a5576aa98ac35405d27a2daa85087f5 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Mon, 8 Feb 2021 19:39:58 +0000 Subject: [PATCH 05/27] Removed persistent anonymous sequences --- src/Manager.cpp | 3 +-- src/include/kompute/Manager.hpp | 9 +++------ 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/Manager.cpp b/src/Manager.cpp index 11d11a26..1db452d5 100755 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -143,8 +143,7 @@ Manager::createManagedSequence(std::string sequenceName, uint32_t queueIndex) if (sequenceName.empty()) { this->mCurrentSequenceIndex++; this->mManagedSequences.insert( - { KP_DEFAULT_SESSION + std::to_string(this->mCurrentSequenceIndex), - sq }); + { KP_DEFAULT_SESSION, sq }); } else { // TODO: Check if sequence doesn't already exist this->mManagedSequences.insert({ sequenceName, sq }); diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index 973a0039..d7a08c49 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -126,8 +126,7 @@ class Manager SPDLOG_DEBUG("Kompute Manager evalOp Default triggered"); this->mCurrentSequenceIndex++; this->evalOp(tensors, - KP_DEFAULT_SESSION + - std::to_string(this->mCurrentSequenceIndex), + KP_DEFAULT_SESSION, std::forward(params)...); } @@ -179,8 +178,7 @@ class Manager SPDLOG_DEBUG("Kompute Manager evalOpAsyncDefault triggered"); this->mCurrentSequenceIndex++; this->evalOpAsync(tensors, - KP_DEFAULT_SESSION + - std::to_string(this->mCurrentSequenceIndex), + KP_DEFAULT_SESSION, std::forward(params)...); } @@ -222,8 +220,7 @@ class Manager void evalOpAwaitDefault(uint64_t waitFor = UINT64_MAX) { SPDLOG_DEBUG("Kompute Manager evalOpAwaitDefault triggered"); - this->evalOpAwait(KP_DEFAULT_SESSION + - std::to_string(this->mCurrentSequenceIndex), + this->evalOpAwait(KP_DEFAULT_SESSION, waitFor); } From aa75fdae47e128395f6f6c0ae7220e2ca73d105c Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Mon, 8 Feb 2021 19:43:50 +0000 Subject: [PATCH 06/27] format --- single_include/kompute/Kompute.hpp | 26 ++++++++++++-------------- src/Manager.cpp | 6 +++--- src/Tensor.cpp | 6 ++++-- src/include/kompute/Manager.hpp | 23 ++++++++++++----------- test/TestManager.cpp | 6 ++++-- test/TestMultipleAlgoExecutions.cpp | 6 ++++-- 6 files changed, 39 insertions(+), 34 deletions(-) mode change 100755 => 100644 src/Manager.cpp diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 62037c91..b63b766a 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -697,8 +697,8 @@ static const unsigned int shaders_glsl_logisticregression_comp_spv_len = 4920; } #endif // define SHADEROP_SHADERLOGISTICREGRESSION_HPP -#include #include +#include #define KP_MAX_DIM_SIZE 1 @@ -1361,10 +1361,8 @@ class Manager { SPDLOG_DEBUG("Kompute Manager evalOp Default triggered"); this->mCurrentSequenceIndex++; - this->evalOp(tensors, - KP_DEFAULT_SESSION + - std::to_string(this->mCurrentSequenceIndex), - std::forward(params)...); + this->evalOp( + tensors, KP_DEFAULT_SESSION, std::forward(params)...); } /** @@ -1414,10 +1412,8 @@ class Manager { SPDLOG_DEBUG("Kompute Manager evalOpAsyncDefault triggered"); this->mCurrentSequenceIndex++; - this->evalOpAsync(tensors, - KP_DEFAULT_SESSION + - std::to_string(this->mCurrentSequenceIndex), - std::forward(params)...); + this->evalOpAsync( + tensors, KP_DEFAULT_SESSION, std::forward(params)...); } /** @@ -1458,9 +1454,7 @@ class Manager void evalOpAwaitDefault(uint64_t waitFor = UINT64_MAX) { SPDLOG_DEBUG("Kompute Manager evalOpAwaitDefault triggered"); - this->evalOpAwait(KP_DEFAULT_SESSION + - std::to_string(this->mCurrentSequenceIndex), - waitFor); + this->evalOpAwait(KP_DEFAULT_SESSION, waitFor); } /** @@ -1493,7 +1487,10 @@ class Manager } /** - * Function that simplifies the common workflow of tensor initialisation. It will take the constructor parameters for a Tensor and will will us it to create a new Tensor. The tensor memory will then be managed and owned by the manager. + * Function that simplifies the common workflow of tensor initialisation. It + * will take the constructor parameters for a Tensor and will will us it to + * create a new Tensor. The tensor memory will then be managed and owned by + * the manager. * * @param data The data to initialize the tensor with * @param tensorType The type of tensor to initialize @@ -1513,7 +1510,8 @@ class Manager tensor->mapDataIntoHostMemory(); } - std::set>::iterator it = this->mManagedTensors.find(tensor); + std::set>::iterator it = + this->mManagedTensors.find(tensor); if (it == this->mManagedTensors.end()) { this->mManagedTensors.insert(tensor); } diff --git a/src/Manager.cpp b/src/Manager.cpp old mode 100755 new mode 100644 index 1db452d5..98d07e49 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -62,7 +62,8 @@ Manager::~Manager() SPDLOG_DEBUG("Kompute Manager explicitly freeing tensors"); for (const std::shared_ptr& tensor : this->mManagedTensors) { if (!tensor->isInit()) { - SPDLOG_ERROR("Kompute Manager attempted to free managed tensor but not tensor is not initialised"); + SPDLOG_ERROR("Kompute Manager attempted to free managed tensor " + "but not tensor is not initialised"); } tensor->freeMemoryDestroyGPUResources(); } @@ -142,8 +143,7 @@ Manager::createManagedSequence(std::string sequenceName, uint32_t queueIndex) if (sequenceName.empty()) { this->mCurrentSequenceIndex++; - this->mManagedSequences.insert( - { KP_DEFAULT_SESSION, sq }); + this->mManagedSequences.insert({ KP_DEFAULT_SESSION, sq }); } else { // TODO: Check if sequence doesn't already exist this->mManagedSequences.insert({ sequenceName, sq }); diff --git a/src/Tensor.cpp b/src/Tensor.cpp index 7400dfff..a1ba1544 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -232,7 +232,8 @@ Tensor::mapDataFromHostMemory() } else if (this->mTensorType == TensorTypes::eDevice) { hostVisibleMemory = this->mStagingMemory; } else { - SPDLOG_WARN("Kompute Tensor mapping data not supported on storage tensor"); + SPDLOG_WARN( + "Kompute Tensor mapping data not supported on storage tensor"); return; } @@ -258,7 +259,8 @@ Tensor::mapDataIntoHostMemory() } else if (this->mTensorType == TensorTypes::eDevice) { hostVisibleMemory = this->mStagingMemory; } else { - SPDLOG_WARN("Kompute Tensor mapping data not supported on storage tensor"); + SPDLOG_WARN( + "Kompute Tensor mapping data not supported on storage tensor"); return; } diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index d7a08c49..758206b9 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -1,7 +1,7 @@ #pragma once -#include #include +#include #include "kompute/Core.hpp" @@ -125,9 +125,8 @@ class Manager { SPDLOG_DEBUG("Kompute Manager evalOp Default triggered"); this->mCurrentSequenceIndex++; - this->evalOp(tensors, - KP_DEFAULT_SESSION, - std::forward(params)...); + this->evalOp( + tensors, KP_DEFAULT_SESSION, std::forward(params)...); } /** @@ -177,9 +176,8 @@ class Manager { SPDLOG_DEBUG("Kompute Manager evalOpAsyncDefault triggered"); this->mCurrentSequenceIndex++; - this->evalOpAsync(tensors, - KP_DEFAULT_SESSION, - std::forward(params)...); + this->evalOpAsync( + tensors, KP_DEFAULT_SESSION, std::forward(params)...); } /** @@ -220,8 +218,7 @@ class Manager void evalOpAwaitDefault(uint64_t waitFor = UINT64_MAX) { SPDLOG_DEBUG("Kompute Manager evalOpAwaitDefault triggered"); - this->evalOpAwait(KP_DEFAULT_SESSION, - waitFor); + this->evalOpAwait(KP_DEFAULT_SESSION, waitFor); } /** @@ -254,7 +251,10 @@ class Manager } /** - * Function that simplifies the common workflow of tensor initialisation. It will take the constructor parameters for a Tensor and will will us it to create a new Tensor. The tensor memory will then be managed and owned by the manager. + * Function that simplifies the common workflow of tensor initialisation. It + * will take the constructor parameters for a Tensor and will will us it to + * create a new Tensor. The tensor memory will then be managed and owned by + * the manager. * * @param data The data to initialize the tensor with * @param tensorType The type of tensor to initialize @@ -274,7 +274,8 @@ class Manager tensor->mapDataIntoHostMemory(); } - std::set>::iterator it = this->mManagedTensors.find(tensor); + std::set>::iterator it = + this->mManagedTensors.find(tensor); if (it == this->mManagedTensors.end()) { this->mManagedTensors.insert(tensor); } diff --git a/test/TestManager.cpp b/test/TestManager.cpp index 3e1db7b1..d822a13d 100644 --- a/test/TestManager.cpp +++ b/test/TestManager.cpp @@ -17,7 +17,8 @@ TEST(TestManager, EndToEndOpMultFlow) mgr.rebuildTensors({ tensorOutput }); - mgr.evalOpDefault({ tensorLHS, tensorRHS, tensorOutput }); + mgr.evalOpDefault( + { tensorLHS, tensorRHS, tensorOutput }); mgr.evalOpDefault({ tensorLHS, tensorRHS, tensorOutput }); @@ -105,7 +106,8 @@ TEST(TestManager, TestMultipleTensorsAtOnce) sq->begin(); - sq->record({ tensorLHS, tensorRHS, tensorOutput }); + sq->record( + { tensorLHS, tensorRHS, tensorOutput }); sq->record({ tensorLHS, tensorRHS, tensorOutput }); diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp index 9d696d55..c98ba178 100644 --- a/test/TestMultipleAlgoExecutions.cpp +++ b/test/TestMultipleAlgoExecutions.cpp @@ -248,7 +248,8 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrOpCreate) mgr.rebuildTensors({ tensorInA, tensorInB, tensorOut }); - mgr.evalOpDefault({ tensorInA, tensorInB, tensorOut }); + mgr.evalOpDefault( + { tensorInA, tensorInB, tensorOut }); std::string shader(R"( // The version to use @@ -306,7 +307,8 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate) } )"); - mgr.evalOpDefault({ tensorInA, tensorInB, tensorOut }); + mgr.evalOpDefault( + { tensorInA, tensorInB, tensorOut }); mgr.evalOpDefault( { tensorInA, tensorInB, tensorOut }, From 65cb1b7582273ea01cca5a67d5fa85b1b78bcf20 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Mon, 8 Feb 2021 19:44:07 +0000 Subject: [PATCH 07/27] Removed destroy tensor function to avoid error logs in test --- test/TestOpTensorCreate.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/test/TestOpTensorCreate.cpp b/test/TestOpTensorCreate.cpp index a57142cf..c1dd200f 100644 --- a/test/TestOpTensorCreate.cpp +++ b/test/TestOpTensorCreate.cpp @@ -5,20 +5,19 @@ TEST(TestOpTensorCreate, CreateSingleTensorSingleOp) { - - kp::Manager mgr; - std::vector testVecA{ 9, 8, 7 }; - std::shared_ptr tensorA{ new kp::Tensor(testVecA) }; - mgr.rebuildTensors({ tensorA }); + { + kp::Manager mgr; - EXPECT_TRUE(tensorA->isInit()); + mgr.rebuildTensors({ tensorA }); - EXPECT_EQ(tensorA->data(), testVecA); + EXPECT_TRUE(tensorA->isInit()); + + EXPECT_EQ(tensorA->data(), testVecA); + } - tensorA->freeMemoryDestroyGPUResources(); EXPECT_FALSE(tensorA->isInit()); } From aa25f980d6c8362f0a8c8ba4a12f1fa28b0c981a Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Mon, 8 Feb 2021 21:41:48 +0000 Subject: [PATCH 08/27] Added OpTensorSyncDevice by default on manager buildtensor functions with ability to disable with parameter --- src/include/kompute/Manager.hpp | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index 758206b9..b20fa310 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -7,6 +7,8 @@ #include "kompute/Sequence.hpp" +#include "kompute/operations/OpTensorSyncDevice.hpp" + #define KP_DEFAULT_SESSION "DEFAULT" namespace kp { @@ -229,11 +231,13 @@ class Manager * * @param data The data to initialize the tensor with * @param tensorType The type of tensor to initialize + * @param syncDataToGPU Whether to sync the data to GPU memory * @returns Initialized Tensor with memory Syncd to GPU device */ std::shared_ptr buildTensor( const std::vector& data, - Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) + Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice, + bool syncDataToGPU = true) { SPDLOG_DEBUG("Kompute Manager buildTensor triggered"); @@ -242,11 +246,13 @@ class Manager std::make_shared(kp::Tensor(data, tensorType)); tensor->init(this->mPhysicalDevice, this->mDevice); - if (tensor->tensorType() != Tensor::TensorTypes::eStorage) { - tensor->mapDataIntoHostMemory(); + + if (syncDataToGPU) { + this->evalOpDefault({tensor}); } this->mManagedTensors.insert(tensor); + return tensor; } @@ -258,9 +264,10 @@ class Manager * * @param data The data to initialize the tensor with * @param tensorType The type of tensor to initialize + * @param syncDataToGPU Whether to sync the data to GPU memory * @returns Initialized Tensor with memory Syncd to GPU device */ - void rebuildTensors(std::vector> tensors) + void rebuildTensors(std::vector> tensors, bool syncDataToGPU = true) { SPDLOG_DEBUG("Kompute Manager rebuildTensors triggered"); for (std::shared_ptr tensor : tensors) { @@ -270,9 +277,6 @@ class Manager } tensor->init(this->mPhysicalDevice, this->mDevice); - if (tensor->tensorType() != Tensor::TensorTypes::eStorage) { - tensor->mapDataIntoHostMemory(); - } std::set>::iterator it = this->mManagedTensors.find(tensor); @@ -280,6 +284,10 @@ class Manager this->mManagedTensors.insert(tensor); } } + + if (syncDataToGPU) { + this->evalOpDefault(tensors); + } } private: From d7fe53eae6e5f9b37ec2c7e3a325bb634d1db5a1 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Mon, 8 Feb 2021 21:42:02 +0000 Subject: [PATCH 09/27] Updated tests to align with manager parameters update --- test/TestLogisticRegression.cpp | 2 -- test/TestManager.cpp | 14 -------------- test/TestMultipleAlgoExecutions.cpp | 17 +++++------------ test/TestOpAlgoLoopsPassingData.cpp | 2 +- test/TestOpShadersFromStringAndFile.cpp | 8 -------- test/TestOpTensorCopy.cpp | 12 +++--------- test/TestOpTensorSync.cpp | 4 ++-- test/TestTensor.cpp | 1 - 8 files changed, 11 insertions(+), 49 deletions(-) diff --git a/test/TestLogisticRegression.cpp b/test/TestLogisticRegression.cpp index e25b1416..a2b69505 100644 --- a/test/TestLogisticRegression.cpp +++ b/test/TestLogisticRegression.cpp @@ -33,7 +33,6 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegression) kp::Manager mgr; mgr.rebuildTensors(params); - mgr.evalOpDefault(params); std::shared_ptr sq = mgr.createManagedSequence(); @@ -119,7 +118,6 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegressionManualCopy) kp::Manager mgr; mgr.rebuildTensors(params); - mgr.evalOpDefault(params); std::shared_ptr sq = mgr.createManagedSequence(); diff --git a/test/TestManager.cpp b/test/TestManager.cpp index d822a13d..2bd7fc47 100644 --- a/test/TestManager.cpp +++ b/test/TestManager.cpp @@ -17,9 +17,6 @@ TEST(TestManager, EndToEndOpMultFlow) mgr.rebuildTensors({ tensorOutput }); - mgr.evalOpDefault( - { tensorLHS, tensorRHS, tensorOutput }); - mgr.evalOpDefault({ tensorLHS, tensorRHS, tensorOutput }); mgr.evalOpDefault({ tensorOutput }); @@ -46,10 +43,6 @@ TEST(TestManager, OpMultSequenceFlow) sq->begin(); - sq->record({ tensorLHS }); - sq->record({ tensorRHS }); - sq->record({ tensorOutput }); - sq->record({ tensorLHS, tensorRHS, tensorOutput }); sq->record({ tensorOutput }); @@ -106,9 +99,6 @@ TEST(TestManager, TestMultipleTensorsAtOnce) sq->begin(); - sq->record( - { tensorLHS, tensorRHS, tensorOutput }); - sq->record({ tensorLHS, tensorRHS, tensorOutput }); sq->record({ tensorOutput }); @@ -127,10 +117,6 @@ TEST(TestManager, TestCreateInitTensor) std::shared_ptr tensorA = mgr.buildTensor({ 0, 1, 2 }); std::shared_ptr tensorB = mgr.buildTensor({ 0, 0, 0 }); - mgr.rebuildTensors({ tensorA, tensorB }); - - mgr.evalOpDefault({ tensorA, tensorB }); - mgr.evalOpDefault({ tensorA, tensorB }); mgr.evalOpDefault({ tensorB }); diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp index c98ba178..208f1f9c 100644 --- a/test/TestMultipleAlgoExecutions.cpp +++ b/test/TestMultipleAlgoExecutions.cpp @@ -27,8 +27,6 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord) { sq->begin(); - sq->record({ tensorA }); - sq->record( { tensorA }, std::vector(shader.begin(), shader.end())); sq->record( @@ -60,7 +58,7 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords) pa[index] = pa[index] + 1; })"); - mgr.rebuildTensors({ tensorA }); + mgr.rebuildTensors({ tensorA }, false); std::shared_ptr sqTensor = mgr.createManagedSequence(); @@ -123,8 +121,6 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) sq->begin(); - sq->record({ tensorA }); - sq->record( { tensorA }, std::vector(shader.begin(), shader.end())); @@ -189,7 +185,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) pa[index] = pa[index] + 1; })"); - mgr.rebuildTensors({ tensorA }); + mgr.rebuildTensors({ tensorA }, false); { std::shared_ptr sq = @@ -248,9 +244,6 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrOpCreate) mgr.rebuildTensors({ tensorInA, tensorInB, tensorOut }); - mgr.evalOpDefault( - { tensorInA, tensorInB, tensorOut }); - std::string shader(R"( // The version to use #version 450 @@ -284,9 +277,9 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate) kp::Manager mgr; - auto tensorInA = mgr.buildTensor({ 2.0, 4.0, 6.0 }); - auto tensorInB = mgr.buildTensor({ 0.0, 1.0, 2.0 }); - auto tensorOut = mgr.buildTensor({ 0.0, 0.0, 0.0 }); + auto tensorInA = mgr.buildTensor({ 2.0, 4.0, 6.0 }, kp::Tensor::TensorTypes::eDevice, false); + auto tensorInB = mgr.buildTensor({ 0.0, 1.0, 2.0 }, kp::Tensor::TensorTypes::eDevice, false); + auto tensorOut = mgr.buildTensor({ 0.0, 0.0, 0.0 }, kp::Tensor::TensorTypes::eDevice, false); std::string shader(R"( // The version to use diff --git a/test/TestOpAlgoLoopsPassingData.cpp b/test/TestOpAlgoLoopsPassingData.cpp index b04ef383..63bab299 100644 --- a/test/TestOpAlgoLoopsPassingData.cpp +++ b/test/TestOpAlgoLoopsPassingData.cpp @@ -30,7 +30,7 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies) } )"); - mgr.rebuildTensors({ tensorA, tensorB }); + mgr.rebuildTensors({ tensorA, tensorB }, false); { std::shared_ptr sq = diff --git a/test/TestOpShadersFromStringAndFile.cpp b/test/TestOpShadersFromStringAndFile.cpp index 7b0db3de..7d73bd7f 100644 --- a/test/TestOpShadersFromStringAndFile.cpp +++ b/test/TestOpShadersFromStringAndFile.cpp @@ -28,8 +28,6 @@ TEST(TestOpAlgoBase, ShaderRawDataFromConstructor) } )"); - mgr.evalOpDefault({ tensorA, tensorB }); - mgr.evalOpDefault( { tensorA, tensorB }, std::vector(shader.begin(), shader.end())); @@ -47,8 +45,6 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromConstructor) std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; mgr.rebuildTensors({ tensorA, tensorB }); - mgr.evalOpDefault({ tensorA, tensorB }); - mgr.evalOpDefault( { tensorA, tensorB }, std::vector( @@ -71,8 +67,6 @@ TEST(TestOpAlgoBase, ShaderRawDataFromFile) std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; mgr.rebuildTensors({ tensorA, tensorB }); - mgr.evalOpDefault({ tensorA, tensorB }); - mgr.evalOpDefault( { tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp"); @@ -90,8 +84,6 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromFile) std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; mgr.rebuildTensors({ tensorA, tensorB }); - mgr.evalOpDefault({ tensorA, tensorB }); - mgr.evalOpDefault( { tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp.spv"); diff --git a/test/TestOpTensorCopy.cpp b/test/TestOpTensorCopy.cpp index ae99bf17..ca4539d6 100644 --- a/test/TestOpTensorCopy.cpp +++ b/test/TestOpTensorCopy.cpp @@ -16,8 +16,6 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor) mgr.rebuildTensors({ tensorA, tensorB }); - mgr.evalOpDefault({ tensorA, tensorB }); - EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -45,8 +43,6 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti) mgr.rebuildTensors({ tensorA, tensorB, tensorC }); - mgr.evalOpDefault({ tensorA, tensorB }); - EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); EXPECT_TRUE(tensorC->isInit()); @@ -74,7 +70,7 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor) std::shared_ptr tensorB{ new kp::Tensor( testVecB, kp::Tensor::TensorTypes::eHost) }; - mgr.rebuildTensors({ tensorA, tensorB }); + mgr.rebuildTensors({ tensorA, tensorB }, false); // Only calling sync on device type tensor mgr.evalOpDefault({ tensorA }); @@ -103,7 +99,7 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor) testVecA, kp::Tensor::TensorTypes::eHost) }; std::shared_ptr tensorB{ new kp::Tensor(testVecB) }; - mgr.rebuildTensors({ tensorA, tensorB }); + mgr.rebuildTensors({ tensorA, tensorB }, false); // Only calling sync on device type tensor mgr.evalOpDefault({ tensorB }); @@ -135,8 +131,6 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor) mgr.rebuildTensors({ tensorA, tensorB }); - // Not calling OpTensorSyncDevice - EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -159,7 +153,7 @@ TEST(TestOpTensorCopy, SingleTensorShouldFail) std::shared_ptr tensorA{ new kp::Tensor( testVecA, kp::Tensor::TensorTypes::eHost) }; - mgr.rebuildTensors({ tensorA }); + mgr.rebuildTensors({ tensorA }, false); EXPECT_TRUE(tensorA->isInit()); diff --git a/test/TestOpTensorSync.cpp b/test/TestOpTensorSync.cpp index 3808941f..9080194a 100644 --- a/test/TestOpTensorSync.cpp +++ b/test/TestOpTensorSync.cpp @@ -13,7 +13,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemorySingleTensor) std::shared_ptr tensorA{ new kp::Tensor(testVecPreA) }; - mgr.rebuildTensors({ tensorA }); + mgr.rebuildTensors({ tensorA }, false); EXPECT_TRUE(tensorA->isInit()); @@ -37,7 +37,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor) std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; std::shared_ptr tensorC{ new kp::Tensor({ 0, 0, 0 }) }; - mgr.rebuildTensors({ tensorA, tensorB, tensorC }); + mgr.rebuildTensors({ tensorA, tensorB, tensorC }, false); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); diff --git a/test/TestTensor.cpp b/test/TestTensor.cpp index 764b5fa9..7ceea1ba 100644 --- a/test/TestTensor.cpp +++ b/test/TestTensor.cpp @@ -24,7 +24,6 @@ TEST(TestTensor, CopyFromHostData) kp::Manager mgr; mgr.rebuildTensors({ tensorA, tensorB }); - mgr.evalOpDefault({ tensorA, tensorB }); if (std::shared_ptr sq = mgr.getOrCreateManagedSequence("new")) { From 71cde2d5b2f68339e80aa87bf8fd09f05cc6f6b1 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Mon, 8 Feb 2021 21:42:14 +0000 Subject: [PATCH 10/27] Updated single include header --- single_include/kompute/Kompute.hpp | 125 +++++++++++++++-------------- 1 file changed, 65 insertions(+), 60 deletions(-) diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index b63b766a..722a625a 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -1243,6 +1243,59 @@ class Sequence } // End namespace kp +namespace kp { + +/** + Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging. +*/ +class OpTensorSyncDevice : public OpBase +{ + public: + OpTensorSyncDevice(); + + /** + * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage. + * + * @param physicalDevice Vulkan physical device used to find device queues + * @param device Vulkan logical device for passing to Algorithm + * @param commandBuffer Vulkan Command Buffer to record commands into + * @param tensors Tensors that will be used to create in operation. + */ + OpTensorSyncDevice(std::shared_ptr physicalDevice, + std::shared_ptr device, + std::shared_ptr commandBuffer, + std::vector> tensors); + + /** + * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release. + */ + ~OpTensorSyncDevice() override; + + /** + * Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element. + */ + void init() override; + + /** + * For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory. + */ + void record() override; + + /** + * Does not perform any preEval commands. + */ + virtual void preEval() override; + + /** + * Does not perform any postEval commands. + */ + virtual void postEval() override; + + private: +}; + +} // End namespace kp + #define KP_DEFAULT_SESSION "DEFAULT" namespace kp { @@ -1465,11 +1518,13 @@ class Manager * * @param data The data to initialize the tensor with * @param tensorType The type of tensor to initialize + * @param syncDataToGPU Whether to sync the data to GPU memory * @returns Initialized Tensor with memory Syncd to GPU device */ std::shared_ptr buildTensor( const std::vector& data, - Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) + Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice, + bool syncDataToGPU = true) { SPDLOG_DEBUG("Kompute Manager buildTensor triggered"); @@ -1478,8 +1533,9 @@ class Manager std::make_shared(kp::Tensor(data, tensorType)); tensor->init(this->mPhysicalDevice, this->mDevice); - if (tensor->tensorType() != Tensor::TensorTypes::eStorage) { - tensor->mapDataIntoHostMemory(); + + if (syncDataToGPU) { + this->evalOpDefault({tensor}); } this->mManagedTensors.insert(tensor); @@ -1494,9 +1550,10 @@ class Manager * * @param data The data to initialize the tensor with * @param tensorType The type of tensor to initialize + * @param syncDataToGPU Whether to sync the data to GPU memory * @returns Initialized Tensor with memory Syncd to GPU device */ - void rebuildTensors(std::vector> tensors) + void rebuildTensors(std::vector> tensors, bool syncDataToGPU = true) { SPDLOG_DEBUG("Kompute Manager rebuildTensors triggered"); for (std::shared_ptr tensor : tensors) { @@ -1506,9 +1563,6 @@ class Manager } tensor->init(this->mPhysicalDevice, this->mDevice); - if (tensor->tensorType() != Tensor::TensorTypes::eStorage) { - tensor->mapDataIntoHostMemory(); - } std::set>::iterator it = this->mManagedTensors.find(tensor); @@ -1516,6 +1570,10 @@ class Manager this->mManagedTensors.insert(tensor); } } + + if (syncDataToGPU) { + this->evalOpDefault(tensors); + } } private: @@ -1977,59 +2035,6 @@ class OpTensorCopy : public OpBase namespace kp { -/** - Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging. -*/ -class OpTensorSyncDevice : public OpBase -{ - public: - OpTensorSyncDevice(); - - /** - * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage. - * - * @param physicalDevice Vulkan physical device used to find device queues - * @param device Vulkan logical device for passing to Algorithm - * @param commandBuffer Vulkan Command Buffer to record commands into - * @param tensors Tensors that will be used to create in operation. - */ - OpTensorSyncDevice(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr commandBuffer, - std::vector> tensors); - - /** - * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release. - */ - ~OpTensorSyncDevice() override; - - /** - * Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element. - */ - void init() override; - - /** - * For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory. - */ - void record() override; - - /** - * Does not perform any preEval commands. - */ - virtual void preEval() override; - - /** - * Does not perform any postEval commands. - */ - virtual void postEval() override; - - private: -}; - -} // End namespace kp - -namespace kp { - /** Operation that syncs tensor's local memory by mapping device data into the local CPU memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging. */ From 3547810476755ab02b6dac3dca7a959410b12e14 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Mon, 8 Feb 2021 21:42:32 +0000 Subject: [PATCH 11/27] reformat --- src/include/kompute/Manager.hpp | 6 +++--- test/TestMultipleAlgoExecutions.cpp | 9 ++++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index b20fa310..ad3facd9 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -248,11 +248,10 @@ class Manager tensor->init(this->mPhysicalDevice, this->mDevice); if (syncDataToGPU) { - this->evalOpDefault({tensor}); + this->evalOpDefault({ tensor }); } this->mManagedTensors.insert(tensor); - return tensor; } @@ -267,7 +266,8 @@ class Manager * @param syncDataToGPU Whether to sync the data to GPU memory * @returns Initialized Tensor with memory Syncd to GPU device */ - void rebuildTensors(std::vector> tensors, bool syncDataToGPU = true) + void rebuildTensors(std::vector> tensors, + bool syncDataToGPU = true) { SPDLOG_DEBUG("Kompute Manager rebuildTensors triggered"); for (std::shared_ptr tensor : tensors) { diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp index 208f1f9c..4d2a44a9 100644 --- a/test/TestMultipleAlgoExecutions.cpp +++ b/test/TestMultipleAlgoExecutions.cpp @@ -277,9 +277,12 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate) kp::Manager mgr; - auto tensorInA = mgr.buildTensor({ 2.0, 4.0, 6.0 }, kp::Tensor::TensorTypes::eDevice, false); - auto tensorInB = mgr.buildTensor({ 0.0, 1.0, 2.0 }, kp::Tensor::TensorTypes::eDevice, false); - auto tensorOut = mgr.buildTensor({ 0.0, 0.0, 0.0 }, kp::Tensor::TensorTypes::eDevice, false); + auto tensorInA = mgr.buildTensor( + { 2.0, 4.0, 6.0 }, kp::Tensor::TensorTypes::eDevice, false); + auto tensorInB = mgr.buildTensor( + { 0.0, 1.0, 2.0 }, kp::Tensor::TensorTypes::eDevice, false); + auto tensorOut = mgr.buildTensor( + { 0.0, 0.0, 0.0 }, kp::Tensor::TensorTypes::eDevice, false); std::string shader(R"( // The version to use From 667841d1366d22f16694cc7952d9fcc097f19f40 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 9 Feb 2021 19:17:38 +0000 Subject: [PATCH 12/27] Updated ccls to include python --- .ccls | 1 + 1 file changed, 1 insertion(+) diff --git a/.ccls b/.ccls index f215ea9d..ab626158 100644 --- a/.ccls +++ b/.ccls @@ -13,6 +13,7 @@ -DDEBUG=1 -DKOMPUTE_INCLUDE_FOR_SYNTAX +-I/usr/include/python3.6/ -I./python/pybind11/include/ -I./external/Vulkan-Headers/include/ -I./external/googletest/googletest/include/ From dead40c871206dacc7ae9aaffeee45cc844d2632 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 9 Feb 2021 19:17:48 +0000 Subject: [PATCH 13/27] Added python target --- Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Makefile b/Makefile index 2f0b5e2f..da1df2cb 100644 --- a/Makefile +++ b/Makefile @@ -156,6 +156,11 @@ vs_run_tests: vs_build_tests ./build/test/$(VS_BUILD_TYPE)/test_kompute.exe --gtest_filter=$(FILTER_TESTS) +#### PYTHONG #### + +test_python: + python -m pytest -s --log-cli-level=DEBUG -v python/test/ + ####### Run CI Commands ####### # This command uses act to replicate github action From 650975838c35e8786a05b02a62f33c77e0f0a4e0 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 9 Feb 2021 19:19:09 +0000 Subject: [PATCH 14/27] Updated python to align with new structure --- python/src/main.cpp | 15 ++++----------- python/test/test_array_multiplication.py | 2 +- python/test/test_kompute.py | 20 ++++++++++++-------- python/test/test_logistic_regression.py | 2 +- 4 files changed, 18 insertions(+), 21 deletions(-) diff --git a/python/src/main.cpp b/python/src/main.cpp index 6e795fad..74f010f0 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -105,8 +105,6 @@ PYBIND11_MODULE(kp, m) { .def("is_init", &kp::Sequence::isInit, "Checks if the Sequence has been initialized") // record - .def("record_tensor_create", &kp::Sequence::record, - "Records operation to create and initialise tensor GPU memory and buffer") .def("record_tensor_copy", &kp::Sequence::record, "Records operation to copy one tensor to one or many tensors") .def("record_tensor_sync_device", &kp::Sequence::record, @@ -161,7 +159,10 @@ PYBIND11_MODULE(kp, m) { .def("create_sequence", &kp::Manager::createManagedSequence, py::arg("name") = "", py::arg("queueIndex") = 0, "Create a sequence with specific name and specified index of available queues") .def("build_tensor", &kp::Manager::buildTensor, - py::arg("data"), py::arg("tensorType") = kp::Tensor::TensorTypes::eDevice, + py::arg("data"), py::arg("tensorType") = kp::Tensor::TensorTypes::eDevice, py::arg("syncDataToGPU") = true, + "Build and initialise tensor") + .def("rebuild_tensors", &kp::Manager::rebuildTensors, + py::arg("tensors"), py::arg("syncDataToGPU") = true, "Build and initialise tensor") // Await functions @@ -172,8 +173,6 @@ PYBIND11_MODULE(kp, m) { py::arg("waitFor") = UINT64_MAX, "Awaits for asynchronous operation on the last anonymous Sequence created") // eval default - .def("eval_tensor_create_def", &kp::Manager::evalOpDefault, - "Evaluates operation to create and initialise tensor GPU memory and buffer with new anonymous Sequence") .def("eval_tensor_copy_def", &kp::Manager::evalOpDefault, "Evaluates operation to copy one tensor to one or many tensors with new anonymous Sequence") .def("eval_tensor_sync_device_def", &kp::Manager::evalOpDefault, @@ -209,8 +208,6 @@ PYBIND11_MODULE(kp, m) { "Evaluates operation to run left right out operation with custom shader with new anonymous Sequence") // eval - .def("eval_tensor_create", &kp::Manager::evalOp, - "Evaluates operation to create and initialise tensor GPU memory and buffer with explicitly named Sequence") .def("eval_tensor_copy", &kp::Manager::evalOp, "Evaluates operation to copy one tensor to one or many tensors with explicitly named Sequence") .def("eval_tensor_sync_device", &kp::Manager::evalOp, @@ -249,8 +246,6 @@ PYBIND11_MODULE(kp, m) { "Evaluates operation to run left right out operation with custom shader with explicitly named Sequence") // eval async default - .def("eval_async_tensor_create_def", &kp::Manager::evalOpAsyncDefault, - "Evaluates asynchronously operation to create and initialise tensor GPU memory and buffer with anonymous Sequence") .def("eval_async_tensor_copy_def", &kp::Manager::evalOpAsyncDefault, "Evaluates asynchronously operation to copy one tensor to one or many tensors with anonymous Sequence") .def("eval_async_tensor_sync_device_def", &kp::Manager::evalOpAsyncDefault, @@ -286,8 +281,6 @@ PYBIND11_MODULE(kp, m) { "Evaluates asynchronously operation to run left right out operation with custom shader with anonymous Sequence") // eval async - .def("eval_async_tensor_create", &kp::Manager::evalOpAsync, - "Evaluates asynchronously operation to create and initialise tensor GPU memory and buffer with explicitly named Sequence") .def("eval_async_tensor_copy", &kp::Manager::evalOpAsync, "Evaluates asynchronously operation to copy one tensor to one or many tensors with explicitly named Sequence") .def("eval_async_tensor_sync_device", &kp::Manager::evalOpAsync, diff --git a/python/test/test_array_multiplication.py b/python/test/test_array_multiplication.py index 337c7a5d..fac2ed41 100644 --- a/python/test/test_array_multiplication.py +++ b/python/test/test_array_multiplication.py @@ -14,7 +14,7 @@ def test_array_multiplication(): tensor_out = kp.Tensor([0, 0, 0]) # 3. Initialise the Kompute Tensors in the GPU - mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) + mgr.rebuild_tensors([tensor_in_a, tensor_in_b, tensor_out]) # 4. Define the multiplication shader code to run on the GPU @ps.python2shader diff --git a/python/test/test_kompute.py b/python/test/test_kompute.py index 9dee9df9..bec4b40e 100644 --- a/python/test/test_kompute.py +++ b/python/test/test_kompute.py @@ -2,6 +2,7 @@ import kp import numpy as np +import logging DIRNAME = os.path.dirname(os.path.abspath(__file__)) @@ -16,7 +17,7 @@ def test_opmult(): mgr = kp.Manager() - mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) + mgr.rebuild_tensors([tensor_in_a, tensor_in_b, tensor_out]) mgr.eval_algo_mult_def([tensor_in_a, tensor_in_b, tensor_out]) @@ -52,7 +53,7 @@ def test_opalgobase_data(): } """ - mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) + mgr.rebuild_tensors([tensor_in_a, tensor_in_b, tensor_out]) mgr.eval_algo_str_def([tensor_in_a, tensor_in_b, tensor_out], shaderData) @@ -75,7 +76,7 @@ def test_opalgobase_file(): shaderFilePath = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp") - mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) + mgr.rebuild_tensors([tensor_in_a, tensor_in_b, tensor_out]) mgr.eval_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath) @@ -93,7 +94,7 @@ def test_sequence(): tensor_in_b = kp.Tensor([1, 2, 3]) tensor_out = kp.Tensor([0, 0, 0]) - mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) + mgr.rebuild_tensors([tensor_in_a, tensor_in_b, tensor_out]) shaderFilePath = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp") mgr.eval_async_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath) @@ -118,7 +119,8 @@ def test_workgroup(): tensor_a = kp.Tensor(np.zeros([16,8])) tensor_b = kp.Tensor(np.zeros([16,8])) - mgr.eval_tensor_create_def([tensor_a, tensor_b]) + + mgr.rebuild_tensors([tensor_a, tensor_b]) shader_src = """ #version 450 @@ -131,19 +133,21 @@ def test_workgroup(): void main() { uint index = gl_WorkGroupID.x*gl_NumWorkGroups.y + gl_WorkGroupID.y; - + toutx[index] = gl_GlobalInvocationID.x; touty[index] = gl_GlobalInvocationID.y; } """ shader_src = bytes(shader_src, encoding='utf8') - seq = mgr.create_sequence() + seq = mgr.create_sequence("new") seq.begin() seq.record_algo_data([tensor_a, tensor_b], shader_src, (16,8,1)) seq.end() seq.eval() - + mgr.eval_tensor_sync_local_def([tensor_a, tensor_b]) + assert np.all(tensor_a.numpy() == np.stack([np.arange(16)]*8, axis=1).ravel()) assert np.all(tensor_b.numpy() == np.stack([np.arange(8)]*16, axis=0).ravel()) + diff --git a/python/test/test_logistic_regression.py b/python/test/test_logistic_regression.py index f8737588..1fbcd5bc 100644 --- a/python/test/test_logistic_regression.py +++ b/python/test/test_logistic_regression.py @@ -66,7 +66,7 @@ def compute_shader( params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i, tensor_w_out_j, tensor_b_in, tensor_b_out, tensor_l_out, tensor_m] - mgr.eval_tensor_create_def(params) + mgr.rebuild_tensors(params) # Create a managed sequence sq = mgr.create_sequence() From b34984b7132301cd1e3827393879a6c0471f0d90 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 9 Feb 2021 19:19:39 +0000 Subject: [PATCH 15/27] Updating sequence to have isInit until init run --- src/Manager.cpp | 20 ++++++++++---------- src/Sequence.cpp | 4 +++- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/Manager.cpp b/src/Manager.cpp index 98d07e49..7885c2eb 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -58,6 +58,16 @@ Manager::~Manager() return; } + if (this->mManagedSequences.size()) { + SPDLOG_DEBUG("Kompute Manager explicitly running destructor for " + "managed sequences"); + for (const std::pair>& sqPair : + this->mManagedSequences) { + sqPair.second->freeMemoryDestroyGPUResources(); + } + this->mManagedSequences.clear(); + } + if (this->mManagedTensors.size()) { SPDLOG_DEBUG("Kompute Manager explicitly freeing tensors"); for (const std::shared_ptr& tensor : this->mManagedTensors) { @@ -70,16 +80,6 @@ Manager::~Manager() this->mManagedTensors.clear(); } - if (this->mManagedSequences.size()) { - SPDLOG_DEBUG("Kompute Manager explicitly running destructor for " - "managed sequences"); - for (const std::pair>& sqPair : - this->mManagedSequences) { - sqPair.second->freeMemoryDestroyGPUResources(); - } - this->mManagedSequences.clear(); - } - if (this->mFreeDevice) { SPDLOG_INFO("Destroying device"); this->mDevice->destroy( diff --git a/src/Sequence.cpp b/src/Sequence.cpp index 50ef8b0f..3c3b7b10 100644 --- a/src/Sequence.cpp +++ b/src/Sequence.cpp @@ -20,7 +20,7 @@ Sequence::Sequence(std::shared_ptr physicalDevice, this->mDevice = device; this->mComputeQueue = computeQueue; this->mQueueIndex = queueIndex; - this->mIsInit = true; + this->mIsInit = false; } Sequence::~Sequence() @@ -203,6 +203,8 @@ Sequence::isInit() void Sequence::freeMemoryDestroyGPUResources() { + SPDLOG_DEBUG("Kompute Sequence freeMemoryDestroyGPUResources called"); + if (!this->mIsInit) { SPDLOG_ERROR("Kompute Sequence freeMemoryDestroyGPUResources called " "but Sequence is not initialized so there's no relevant " From 39d02dd42829f16a6585205992c07117ed965fce Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 9 Feb 2021 19:20:21 +0000 Subject: [PATCH 16/27] Added test that verifies memory violation sequence --- test/TestMultipleAlgoExecutions.cpp | 36 +++++++++++++++++++++++++++++ test/TestSequence.cpp | 1 + 2 files changed, 37 insertions(+) diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp index 4d2a44a9..b6eaea54 100644 --- a/test/TestMultipleAlgoExecutions.cpp +++ b/test/TestMultipleAlgoExecutions.cpp @@ -314,3 +314,39 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate) EXPECT_EQ(tensorOut->data(), std::vector({ 0.0, 4.0, 12.0 })); } + +TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope) +{ + std::shared_ptr tensorA{ new kp::Tensor({ 0, 0, 0 }) }; + + std::string shader(R"( + #version 450 + layout (local_size_x = 1) in; + layout(set = 0, binding = 0) buffer a { float pa[]; }; + void main() { + uint index = gl_GlobalInvocationID.x; + pa[index] = pa[index] + 1; + })"); + + { + std::shared_ptr sq = nullptr; + + { + kp::Manager mgr; + + mgr.rebuildTensors({ tensorA }); + + sq = mgr.createManagedSequence(); + + sq->begin(); + sq->record( + { tensorA }, std::vector(shader.begin(), shader.end())); + sq->end(); + + sq->eval(); + + mgr.evalOpDefault({ tensorA }); + } + } + EXPECT_EQ(tensorA->data(), std::vector({ 1, 1, 1 })); +} diff --git a/test/TestSequence.cpp b/test/TestSequence.cpp index 882729dc..2d0a8a4b 100644 --- a/test/TestSequence.cpp +++ b/test/TestSequence.cpp @@ -39,3 +39,4 @@ TEST(TestSequence, SequenceDestructorViaManager) EXPECT_FALSE(sq->isInit()); } + From 91252201cebf120dbd277980a293d11ed7056139 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 9 Feb 2021 19:20:30 +0000 Subject: [PATCH 17/27] updating single include --- single_include/kompute/Kompute.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index 722a625a..f3ebd990 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -1535,7 +1535,7 @@ class Manager tensor->init(this->mPhysicalDevice, this->mDevice); if (syncDataToGPU) { - this->evalOpDefault({tensor}); + this->evalOpDefault({ tensor }); } this->mManagedTensors.insert(tensor); @@ -1553,7 +1553,8 @@ class Manager * @param syncDataToGPU Whether to sync the data to GPU memory * @returns Initialized Tensor with memory Syncd to GPU device */ - void rebuildTensors(std::vector> tensors, bool syncDataToGPU = true) + void rebuildTensors(std::vector> tensors, + bool syncDataToGPU = true) { SPDLOG_DEBUG("Kompute Manager rebuildTensors triggered"); for (std::shared_ptr tensor : tensors) { From 0d9a9758dac2dac212164be5a3255ac5274c01cd Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 9 Feb 2021 21:29:24 +0000 Subject: [PATCH 18/27] Renamed tensor and rebuild functions --- src/Manager.cpp | 44 +++++++------------ src/include/kompute/Manager.hpp | 77 +++++++++++++++++++-------------- 2 files changed, 60 insertions(+), 61 deletions(-) diff --git a/src/Manager.cpp b/src/Manager.cpp index 7885c2eb..e7bb88f2 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -111,44 +111,32 @@ Manager::~Manager() } std::shared_ptr -Manager::getOrCreateManagedSequence(std::string sequenceName) +Manager::sequence(std::string sequenceName, uint32_t queueIndex) { - SPDLOG_DEBUG("Kompute Manager creating Sequence object"); + SPDLOG_DEBUG("Kompute Manager sequence() with sequenceName: {} " + "and queueIndex: {}", + sequenceName, + queueIndex); + + std::shared_ptr sq = nullptr; std::unordered_map>::iterator found = this->mManagedSequences.find(sequenceName); if (found == this->mManagedSequences.end()) { - return this->createManagedSequence(sequenceName); - } else { - return found->second; - } -} - -std::shared_ptr -Manager::createManagedSequence(std::string sequenceName, uint32_t queueIndex) -{ + std::shared_ptr sq = + std::make_shared(this->mPhysicalDevice, + this->mDevice, + this->mComputeQueues[queueIndex], + this->mComputeQueueFamilyIndices[queueIndex]); + sq->init(); - SPDLOG_DEBUG("Kompute Manager createManagedSequence with sequenceName: {} " - "and queueIndex: {}", - sequenceName, - queueIndex); - - std::shared_ptr sq = - std::make_shared(this->mPhysicalDevice, - this->mDevice, - this->mComputeQueues[queueIndex], - this->mComputeQueueFamilyIndices[queueIndex]); - sq->init(); + this->mManagedSequences.insert({ sequenceName, sq }); - if (sequenceName.empty()) { - this->mCurrentSequenceIndex++; - this->mManagedSequences.insert({ KP_DEFAULT_SESSION, sq }); + return sq; } else { - // TODO: Check if sequence doesn't already exist - this->mManagedSequences.insert({ sequenceName, sq }); + return found->second; } - return sq; } void diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index ad3facd9..5ef32ff6 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -64,23 +64,12 @@ class Manager * * @param sequenceName The name for the named sequence to be retrieved or * created - * @return Shared pointer to the manager owned sequence resource - */ - std::shared_ptr getOrCreateManagedSequence( - std::string sequenceName); - - /** - * Create a new managed Kompute sequence so it's available within the - * manager. - * - * @param sequenceName The name for the named sequence to be created, if - * empty then default indexed value is used * @param queueIndex The queue to use from the available queues - * @return Weak pointer to the manager owned sequence resource + * @return Shared pointer to the manager owned sequence resource */ - std::shared_ptr createManagedSequence( - std::string sequenceName = "", - uint32_t queueIndex = 0); + std::shared_ptr sequence( + std::string sequenceName = KP_DEFAULT_SESSION, + uint32_t queueIndex = 0); /** * Function that evaluates operation against named sequence. @@ -97,7 +86,7 @@ class Manager { SPDLOG_DEBUG("Kompute Manager evalOp triggered"); std::shared_ptr sq = - this->getOrCreateManagedSequence(sequenceName); + this->sequence(sequenceName); SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN"); sq->begin(); @@ -147,7 +136,7 @@ class Manager SPDLOG_DEBUG("Kompute Manager evalOpAsync triggered"); std::shared_ptr sq = - this->getOrCreateManagedSequence(sequenceName); + this->sequence(sequenceName); SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN"); sq->begin(); @@ -234,12 +223,12 @@ class Manager * @param syncDataToGPU Whether to sync the data to GPU memory * @returns Initialized Tensor with memory Syncd to GPU device */ - std::shared_ptr buildTensor( + std::shared_ptr tensor( const std::vector& data, Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice, bool syncDataToGPU = true) { - SPDLOG_DEBUG("Kompute Manager buildTensor triggered"); + SPDLOG_DEBUG("Kompute Manager tensor triggered"); SPDLOG_DEBUG("Kompute Manager creating new tensor shared ptr"); std::shared_ptr tensor = @@ -261,32 +250,54 @@ class Manager * create a new Tensor. The tensor memory will then be managed and owned by * the manager. * - * @param data The data to initialize the tensor with - * @param tensorType The type of tensor to initialize + * @param tensors Array of tensors to rebuild * @param syncDataToGPU Whether to sync the data to GPU memory * @returns Initialized Tensor with memory Syncd to GPU device */ - void rebuildTensors(std::vector> tensors, + void rebuild(std::vector> tensors, bool syncDataToGPU = true) { - SPDLOG_DEBUG("Kompute Manager rebuildTensors triggered"); + SPDLOG_DEBUG("Kompute Manager rebuild triggered"); for (std::shared_ptr tensor : tensors) { - if (tensor->isInit()) { - tensor->freeMemoryDestroyGPUResources(); - } + // False syncData to run all tensors at once instead one by one + this->rebuild(tensor, false); + } - tensor->init(this->mPhysicalDevice, this->mDevice); + if (syncDataToGPU) { + this->evalOpDefault(tensors); + } + } - std::set>::iterator it = - this->mManagedTensors.find(tensor); - if (it == this->mManagedTensors.end()) { - this->mManagedTensors.insert(tensor); - } + /** + * Function that simplifies the common workflow of tensor initialisation. It + * will take the constructor parameters for a Tensor and will will us it to + * create a new Tensor. The tensor memory will then be managed and owned by + * the manager. + * + * @param tensors Single tensor to rebuild + * @param syncDataToGPU Whether to sync the data to GPU memory + * @returns Initialized Tensor with memory Syncd to GPU device + */ + void rebuild(std::shared_ptr tensor, + bool syncDataToGPU = true) + { + SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered"); + + if (tensor->isInit()) { + tensor->freeMemoryDestroyGPUResources(); + } + + tensor->init(this->mPhysicalDevice, this->mDevice); + + std::set>::iterator it = + this->mManagedTensors.find(tensor); + if (it == this->mManagedTensors.end()) { + this->mManagedTensors.insert(tensor); } if (syncDataToGPU) { - this->evalOpDefault(tensors); + this->evalOpDefault({ tensor }); } } From 4baba3368179f84ba8634e2104deb7f21b785b64 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 9 Feb 2021 21:29:43 +0000 Subject: [PATCH 19/27] Updated tests to match new functions and added test to ensure seuqence is destroyed --- test/TestAsyncOperations.cpp | 12 ++++---- test/TestLogisticRegression.cpp | 8 ++--- test/TestManager.cpp | 28 ++++++++--------- test/TestMultipleAlgoExecutions.cpp | 40 ++++++++++++------------- test/TestOpAlgoLoopsPassingData.cpp | 8 ++--- test/TestOpShadersFromStringAndFile.cpp | 8 ++--- test/TestOpTensorCopy.cpp | 12 ++++---- test/TestOpTensorCreate.cpp | 22 +++++++------- test/TestOpTensorSync.cpp | 4 +-- test/TestSequence.cpp | 4 +-- test/TestTensor.cpp | 4 +-- 11 files changed, 75 insertions(+), 75 deletions(-) diff --git a/test/TestAsyncOperations.cpp b/test/TestAsyncOperations.cpp index c43f5648..ddbcb659 100644 --- a/test/TestAsyncOperations.cpp +++ b/test/TestAsyncOperations.cpp @@ -49,7 +49,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) inputsSyncB.push_back(std::make_shared(kp::Tensor(data))); } - mgr.rebuildTensors(inputsSyncB); + mgr.rebuild(inputsSyncB); auto startSync = std::chrono::high_resolution_clock::now(); @@ -77,10 +77,10 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) inputsAsyncB.push_back(std::make_shared(kp::Tensor(data))); } - mgrAsync.rebuildTensors(inputsAsyncB); + mgrAsync.rebuild(inputsAsyncB); for (uint32_t i = 0; i < numParallel; i++) { - mgrAsync.createManagedSequence("async" + std::to_string(i), i); + mgrAsync.sequence("async" + std::to_string(i), i); } auto startAsync = std::chrono::high_resolution_clock::now(); @@ -146,10 +146,10 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution) std::shared_ptr tensorA{ new kp::Tensor(data) }; std::shared_ptr tensorB{ new kp::Tensor(data) }; - mgr.createManagedSequence("asyncOne"); - mgr.createManagedSequence("asyncTwo"); + mgr.sequence("asyncOne"); + mgr.sequence("asyncTwo"); - mgr.rebuildTensors({ tensorA, tensorB }); + mgr.rebuild({ tensorA, tensorB }); mgr.evalOpAsync( { tensorA }, "asyncOne", std::vector(shader.begin(), shader.end())); diff --git a/test/TestLogisticRegression.cpp b/test/TestLogisticRegression.cpp index a2b69505..b974655a 100644 --- a/test/TestLogisticRegression.cpp +++ b/test/TestLogisticRegression.cpp @@ -32,9 +32,9 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegression) { kp::Manager mgr; - mgr.rebuildTensors(params); + mgr.rebuild(params); - std::shared_ptr sq = mgr.createManagedSequence(); + std::shared_ptr sq = mgr.sequence(); // Record op algo base sq->begin(); @@ -117,9 +117,9 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegressionManualCopy) { kp::Manager mgr; - mgr.rebuildTensors(params); + mgr.rebuild(params); - std::shared_ptr sq = mgr.createManagedSequence(); + std::shared_ptr sq = mgr.sequence(); // Record op algo base sq->begin(); diff --git a/test/TestManager.cpp b/test/TestManager.cpp index 2bd7fc47..75494156 100644 --- a/test/TestManager.cpp +++ b/test/TestManager.cpp @@ -8,14 +8,14 @@ TEST(TestManager, EndToEndOpMultFlow) kp::Manager mgr; std::shared_ptr tensorLHS{ new kp::Tensor({ 0, 1, 2 }) }; - mgr.rebuildTensors({ tensorLHS }); + mgr.rebuild({ tensorLHS }); std::shared_ptr tensorRHS{ new kp::Tensor({ 2, 4, 6 }) }; - mgr.rebuildTensors({ tensorRHS }); + mgr.rebuild({ tensorRHS }); std::shared_ptr tensorOutput{ new kp::Tensor({ 0, 0, 0 }) }; - mgr.rebuildTensors({ tensorOutput }); + mgr.rebuild({ tensorOutput }); mgr.evalOpDefault({ tensorLHS, tensorRHS, tensorOutput }); @@ -36,10 +36,10 @@ TEST(TestManager, OpMultSequenceFlow) kp::Manager mgr; { - mgr.rebuildTensors({ tensorLHS, tensorRHS, tensorOutput }); + mgr.rebuild({ tensorLHS, tensorRHS, tensorOutput }); std::shared_ptr sq = - mgr.getOrCreateManagedSequence("newSequence"); + mgr.sequence("newSequence"); sq->begin(); @@ -59,16 +59,16 @@ TEST(TestManager, TestMultipleSequences) kp::Manager mgr; std::shared_ptr sqOne = - mgr.getOrCreateManagedSequence("sqOne"); + mgr.sequence("sqOne"); std::shared_ptr sqTwo = - mgr.getOrCreateManagedSequence("sqTwo"); + mgr.sequence("sqTwo"); std::shared_ptr sqOneRef = - mgr.getOrCreateManagedSequence("sqOne"); + mgr.sequence("sqOne"); std::shared_ptr sqTwoRef = - mgr.getOrCreateManagedSequence("sqTwo"); + mgr.sequence("sqTwo"); EXPECT_EQ(sqOne, sqOneRef); EXPECT_NE(sqTwo, sqOneRef); @@ -88,10 +88,10 @@ TEST(TestManager, TestMultipleTensorsAtOnce) kp::Manager mgr; std::shared_ptr sq = - mgr.getOrCreateManagedSequence("newSequence"); + mgr.sequence("newSequence"); { - mgr.rebuildTensors({ tensorLHS, tensorRHS, tensorOutput }); + mgr.rebuild({ tensorLHS, tensorRHS, tensorOutput }); EXPECT_TRUE(tensorLHS->isInit()); EXPECT_TRUE(tensorRHS->isInit()); @@ -114,8 +114,8 @@ TEST(TestManager, TestCreateInitTensor) { kp::Manager mgr; - std::shared_ptr tensorA = mgr.buildTensor({ 0, 1, 2 }); - std::shared_ptr tensorB = mgr.buildTensor({ 0, 0, 0 }); + std::shared_ptr tensorA = mgr.tensor({ 0, 1, 2 }); + std::shared_ptr tensorB = mgr.tensor({ 0, 0, 0 }); mgr.evalOpDefault({ tensorA, tensorB }); @@ -124,7 +124,7 @@ TEST(TestManager, TestCreateInitTensor) EXPECT_EQ(tensorB->data(), std::vector({ 0, 1, 2 })); std::shared_ptr tensorC = - mgr.buildTensor({ 0, 0, 0 }, kp::Tensor::TensorTypes::eHost); + mgr.tensor({ 0, 0, 0 }, kp::Tensor::TensorTypes::eHost); mgr.evalOpDefault({ tensorA, tensorC }); diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp index b6eaea54..63f9778b 100644 --- a/test/TestMultipleAlgoExecutions.cpp +++ b/test/TestMultipleAlgoExecutions.cpp @@ -19,10 +19,10 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord) pa[index] = pa[index] + 1; })"); - mgr.rebuildTensors({ tensorA }); + mgr.rebuild({ tensorA }); std::shared_ptr sq = - mgr.getOrCreateManagedSequence("newSequence"); + mgr.sequence("newSequence"); { sq->begin(); @@ -58,11 +58,11 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords) pa[index] = pa[index] + 1; })"); - mgr.rebuildTensors({ tensorA }, false); + mgr.rebuild({ tensorA }, false); - std::shared_ptr sqTensor = mgr.createManagedSequence(); + std::shared_ptr sqTensor = mgr.sequence(); - std::shared_ptr sq = mgr.createManagedSequence(); + std::shared_ptr sq = mgr.sequence(); // First create the tensor in a separate sequence sqTensor->begin(); @@ -113,11 +113,11 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) pa[index] = pa[index] + 1; })"); - mgr.rebuildTensors({ tensorA }); + mgr.rebuild({ tensorA }); { std::shared_ptr sq = - mgr.getOrCreateManagedSequence("newSequence"); + mgr.sequence("newSequence"); sq->begin(); @@ -130,7 +130,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) { std::shared_ptr sq = - mgr.getOrCreateManagedSequence("newSequence2"); + mgr.sequence("newSequence2"); sq->begin(); @@ -143,7 +143,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) { std::shared_ptr sq = - mgr.getOrCreateManagedSequence("newSequence3"); + mgr.sequence("newSequence3"); sq->begin(); @@ -156,7 +156,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences) { std::shared_ptr sq = - mgr.getOrCreateManagedSequence("newSequence5"); + mgr.sequence("newSequence5"); sq->begin(); @@ -185,11 +185,11 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) pa[index] = pa[index] + 1; })"); - mgr.rebuildTensors({ tensorA }, false); + mgr.rebuild({ tensorA }, false); { std::shared_ptr sq = - mgr.getOrCreateManagedSequence("newSequence"); + mgr.sequence("newSequence"); sq->begin(); @@ -201,7 +201,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) { std::shared_ptr sq = - mgr.getOrCreateManagedSequence("newSequence2"); + mgr.sequence("newSequence2"); sq->begin(); @@ -217,7 +217,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval) { std::shared_ptr sq = - mgr.getOrCreateManagedSequence("newSequence3"); + mgr.sequence("newSequence3"); sq->begin(); @@ -242,7 +242,7 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrOpCreate) std::shared_ptr tensorInB{ new kp::Tensor({ 0.0, 1.0, 2.0 }) }; std::shared_ptr tensorOut{ new kp::Tensor({ 0.0, 0.0, 0.0 }) }; - mgr.rebuildTensors({ tensorInA, tensorInB, tensorOut }); + mgr.rebuild({ tensorInA, tensorInB, tensorOut }); std::string shader(R"( // The version to use @@ -277,11 +277,11 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate) kp::Manager mgr; - auto tensorInA = mgr.buildTensor( + auto tensorInA = mgr.tensor( { 2.0, 4.0, 6.0 }, kp::Tensor::TensorTypes::eDevice, false); - auto tensorInB = mgr.buildTensor( + auto tensorInB = mgr.tensor( { 0.0, 1.0, 2.0 }, kp::Tensor::TensorTypes::eDevice, false); - auto tensorOut = mgr.buildTensor( + auto tensorOut = mgr.tensor( { 0.0, 0.0, 0.0 }, kp::Tensor::TensorTypes::eDevice, false); std::string shader(R"( @@ -334,9 +334,9 @@ TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope) { kp::Manager mgr; - mgr.rebuildTensors({ tensorA }); + mgr.rebuild({ tensorA }); - sq = mgr.createManagedSequence(); + sq = mgr.sequence(); sq->begin(); sq->record( diff --git a/test/TestOpAlgoLoopsPassingData.cpp b/test/TestOpAlgoLoopsPassingData.cpp index 63bab299..c2431bc4 100644 --- a/test/TestOpAlgoLoopsPassingData.cpp +++ b/test/TestOpAlgoLoopsPassingData.cpp @@ -30,11 +30,11 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies) } )"); - mgr.rebuildTensors({ tensorA, tensorB }, false); + mgr.rebuild({ tensorA, tensorB }, false); { std::shared_ptr sq = - mgr.getOrCreateManagedSequence("default"); + mgr.sequence("default"); sq->begin(); @@ -47,7 +47,7 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies) { std::shared_ptr sq = - mgr.getOrCreateManagedSequence("run"); + mgr.sequence("run"); sq->begin(); @@ -65,7 +65,7 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies) { std::shared_ptr sq = - mgr.getOrCreateManagedSequence("export"); + mgr.sequence("export"); sq->begin(); diff --git a/test/TestOpShadersFromStringAndFile.cpp b/test/TestOpShadersFromStringAndFile.cpp index 7d73bd7f..ae2bfce1 100644 --- a/test/TestOpShadersFromStringAndFile.cpp +++ b/test/TestOpShadersFromStringAndFile.cpp @@ -11,7 +11,7 @@ TEST(TestOpAlgoBase, ShaderRawDataFromConstructor) std::shared_ptr tensorA{ new kp::Tensor({ 3, 4, 5 }) }; std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; - mgr.rebuildTensors({ tensorA, tensorB }); + mgr.rebuild({ tensorA, tensorB }); std::string shader(R"( #version 450 @@ -43,7 +43,7 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromConstructor) std::shared_ptr tensorA{ new kp::Tensor({ 3, 4, 5 }) }; std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; - mgr.rebuildTensors({ tensorA, tensorB }); + mgr.rebuild({ tensorA, tensorB }); mgr.evalOpDefault( { tensorA, tensorB }, @@ -65,7 +65,7 @@ TEST(TestOpAlgoBase, ShaderRawDataFromFile) std::shared_ptr tensorA{ new kp::Tensor({ 3, 4, 5 }) }; std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; - mgr.rebuildTensors({ tensorA, tensorB }); + mgr.rebuild({ tensorA, tensorB }); mgr.evalOpDefault( { tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp"); @@ -82,7 +82,7 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromFile) std::shared_ptr tensorA{ new kp::Tensor({ 3, 4, 5 }) }; std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; - mgr.rebuildTensors({ tensorA, tensorB }); + mgr.rebuild({ tensorA, tensorB }); mgr.evalOpDefault( { tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp.spv"); diff --git a/test/TestOpTensorCopy.cpp b/test/TestOpTensorCopy.cpp index ca4539d6..7b064107 100644 --- a/test/TestOpTensorCopy.cpp +++ b/test/TestOpTensorCopy.cpp @@ -14,7 +14,7 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor) std::shared_ptr tensorA{ new kp::Tensor(testVecA) }; std::shared_ptr tensorB{ new kp::Tensor(testVecB) }; - mgr.rebuildTensors({ tensorA, tensorB }); + mgr.rebuild({ tensorA, tensorB }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -41,7 +41,7 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti) std::shared_ptr tensorB{ new kp::Tensor(testVecB) }; std::shared_ptr tensorC{ new kp::Tensor(testVecC) }; - mgr.rebuildTensors({ tensorA, tensorB, tensorC }); + mgr.rebuild({ tensorA, tensorB, tensorC }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -70,7 +70,7 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor) std::shared_ptr tensorB{ new kp::Tensor( testVecB, kp::Tensor::TensorTypes::eHost) }; - mgr.rebuildTensors({ tensorA, tensorB }, false); + mgr.rebuild({ tensorA, tensorB }, false); // Only calling sync on device type tensor mgr.evalOpDefault({ tensorA }); @@ -99,7 +99,7 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor) testVecA, kp::Tensor::TensorTypes::eHost) }; std::shared_ptr tensorB{ new kp::Tensor(testVecB) }; - mgr.rebuildTensors({ tensorA, tensorB }, false); + mgr.rebuild({ tensorA, tensorB }, false); // Only calling sync on device type tensor mgr.evalOpDefault({ tensorB }); @@ -129,7 +129,7 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor) std::shared_ptr tensorB{ new kp::Tensor( testVecB, kp::Tensor::TensorTypes::eHost) }; - mgr.rebuildTensors({ tensorA, tensorB }); + mgr.rebuild({ tensorA, tensorB }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -153,7 +153,7 @@ TEST(TestOpTensorCopy, SingleTensorShouldFail) std::shared_ptr tensorA{ new kp::Tensor( testVecA, kp::Tensor::TensorTypes::eHost) }; - mgr.rebuildTensors({ tensorA }, false); + mgr.rebuild({ tensorA }, false); EXPECT_TRUE(tensorA->isInit()); diff --git a/test/TestOpTensorCreate.cpp b/test/TestOpTensorCreate.cpp index c1dd200f..ca347357 100644 --- a/test/TestOpTensorCreate.cpp +++ b/test/TestOpTensorCreate.cpp @@ -11,7 +11,7 @@ TEST(TestOpTensorCreate, CreateSingleTensorSingleOp) { kp::Manager mgr; - mgr.rebuildTensors({ tensorA }); + mgr.rebuild({ tensorA }); EXPECT_TRUE(tensorA->isInit()); @@ -32,7 +32,7 @@ TEST(TestOpTensorCreate, CreateMultipleTensorSingleOp) std::shared_ptr tensorA{ new kp::Tensor(testVecA) }; std::shared_ptr tensorB{ new kp::Tensor(testVecB) }; - mgr.rebuildTensors({ tensorA, tensorB }); + mgr.rebuild({ tensorA, tensorB }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -52,8 +52,8 @@ TEST(TestOpTensorCreate, CreateMultipleTensorMultipleOp) std::shared_ptr tensorA{ new kp::Tensor(testVecA) }; std::shared_ptr tensorB{ new kp::Tensor(testVecB) }; - mgr.rebuildTensors({ tensorA }); - mgr.rebuildTensors({ tensorB }); + mgr.rebuild({ tensorA }); + mgr.rebuild({ tensorB }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -73,8 +73,8 @@ TEST(TestOpTensorCreate, TestTensorMemoryManagedByManagerDestroyed) { kp::Manager mgr; - mgr.rebuildTensors({ tensorA }); - mgr.rebuildTensors({ tensorB }); + mgr.rebuild({ tensorA }); + mgr.rebuild({ tensorB }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -99,8 +99,8 @@ TEST(TestOpTensorCreate, TestTensorMemoryManagedByManagerNOTDestroyed) kp::Manager mgr; { - mgr.rebuildTensors({ tensorA }); - mgr.rebuildTensors({ tensorB }); + mgr.rebuild({ tensorA }); + mgr.rebuild({ tensorB }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -124,8 +124,8 @@ TEST(TestOpTensorCreate, NoErrorIfTensorFreedBefore) kp::Manager mgr; - mgr.rebuildTensors({ tensorA }); - mgr.rebuildTensors({ tensorB }); + mgr.rebuild({ tensorA }); + mgr.rebuild({ tensorB }); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -148,7 +148,7 @@ TEST(TestOpTensorCreate, ExceptionOnZeroSizeTensor) kp::Manager mgr; try { - mgr.rebuildTensors({ tensorA }); + mgr.rebuild({ tensorA }); } catch (const std::runtime_error& err) { // check exception ASSERT_TRUE(std::string(err.what()).find("zero-sized") != diff --git a/test/TestOpTensorSync.cpp b/test/TestOpTensorSync.cpp index 9080194a..f992805f 100644 --- a/test/TestOpTensorSync.cpp +++ b/test/TestOpTensorSync.cpp @@ -13,7 +13,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemorySingleTensor) std::shared_ptr tensorA{ new kp::Tensor(testVecPreA) }; - mgr.rebuildTensors({ tensorA }, false); + mgr.rebuild({ tensorA }, false); EXPECT_TRUE(tensorA->isInit()); @@ -37,7 +37,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor) std::shared_ptr tensorB{ new kp::Tensor({ 0, 0, 0 }) }; std::shared_ptr tensorC{ new kp::Tensor({ 0, 0, 0 }) }; - mgr.rebuildTensors({ tensorA, tensorB, tensorC }, false); + mgr.rebuild({ tensorA, tensorB, tensorC }, false); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); diff --git a/test/TestSequence.cpp b/test/TestSequence.cpp index 2d0a8a4b..0dec484b 100644 --- a/test/TestSequence.cpp +++ b/test/TestSequence.cpp @@ -9,7 +9,7 @@ TEST(TestSequence, CmdBufSequenceBeginEnd) { std::shared_ptr sq = - mgr.getOrCreateManagedSequence("newSequence"); + mgr.sequence("newSequence"); EXPECT_TRUE(sq->eval()); EXPECT_TRUE(!sq->isRecording()); @@ -32,7 +32,7 @@ TEST(TestSequence, SequenceDestructorViaManager) { kp::Manager mgr; - sq = mgr.getOrCreateManagedSequence("newSequence"); + sq = mgr.sequence("newSequence"); EXPECT_TRUE(sq->isInit()); } diff --git a/test/TestTensor.cpp b/test/TestTensor.cpp index 7ceea1ba..705c825f 100644 --- a/test/TestTensor.cpp +++ b/test/TestTensor.cpp @@ -23,10 +23,10 @@ TEST(TestTensor, CopyFromHostData) kp::Manager mgr; - mgr.rebuildTensors({ tensorA, tensorB }); + mgr.rebuild({ tensorA, tensorB }); if (std::shared_ptr sq = - mgr.getOrCreateManagedSequence("new")) { + mgr.sequence("new")) { sq->begin(); sq->record({ tensorA, tensorB }); From 3e91a7737e5f0fcf809b501194068b3e74463598 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 9 Feb 2021 21:29:58 +0000 Subject: [PATCH 20/27] Updated docs to match functions --- docs/overview/advanced-examples.rst | 8 ++++---- docs/overview/async-parallel.rst | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/overview/advanced-examples.rst b/docs/overview/advanced-examples.rst index 5823c6df..665c4f0c 100644 --- a/docs/overview/advanced-examples.rst +++ b/docs/overview/advanced-examples.rst @@ -97,7 +97,7 @@ Record commands in a single submit by using a Sequence to send in batch to GPU. mgr.evalOpDefault({tensorLHS, tensorRHS, tensorOutput}); // Create a new sequence - std::weak_ptr sqWeakPtr = mgr.getOrCreateManagedSequence(); + std::weak_ptr sqWeakPtr = mgr.sequence(); if (std::shared_ptr sq = sqWeakPtr.lock()) { @@ -226,8 +226,8 @@ Back to `examples list <#simple-examples>`_. // We need to create explicit sequences with their respective queues // The second parameter is the index in the familyIndex array which is relative // to the vector we created the manager with. - mgr.createManagedSequence("queueOne", 0); - mgr.createManagedSequence("queueTwo", 1); + mgr.sequence("queueOne", 0); + mgr.sequence("queueTwo", 1); // Creates tensor an initializes GPU memory (below we show more granularity) auto tensorA = std::make_shared(kp::Tensor(std::vector(10, 0.0))); @@ -422,7 +422,7 @@ Now that we have the inputs and outputs we will be able to use them in the proce kp::Manager mgr; if (std::shared_ptr sq = - mgr.getOrCreateManagedSequence("createTensors").lock()) + mgr.sequence("createTensors").lock()) { // ... diff --git a/docs/overview/async-parallel.rst b/docs/overview/async-parallel.rst index 88df7ac6..8c983bc7 100644 --- a/docs/overview/async-parallel.rst +++ b/docs/overview/async-parallel.rst @@ -208,8 +208,8 @@ It's worth mentioning you can have multiple sequences referencing the same queue // We need to create explicit sequences with their respective queues // The second parameter is the index in the familyIndex array which is relative // to the vector we created the manager with. - mgr.createManagedSequence("queueOne", 0); - mgr.createManagedSequence("queueTwo", 1); + mgr.sequence("queueOne", 0); + mgr.sequence("queueTwo", 1); We create the tensors without modifications. From b243d432c13371ea7410e79ceb4179fbbbb9ab3c Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 9 Feb 2021 21:30:22 +0000 Subject: [PATCH 21/27] Updated readme --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 4b199fa6..3b96a4c6 100644 --- a/README.md +++ b/README.md @@ -54,9 +54,9 @@ int main() { kp::Manager mgr; // 2. Create and initialise Kompute Tensors through manager - auto tensorInA = mgr.buildTensor({ 2., 2., 2. }); - auto tensorInB = mgr.buildTensor({ 1., 2., 3. }); - auto tensorOut = mgr.buildTensor({ 0., 0., 0. }); + auto tensorInA = mgr.tensor({ 2., 2., 2. }); + auto tensorInB = mgr.tensor({ 1., 2., 3. }); + auto tensorOut = mgr.tensor({ 0., 0., 0. }); // 3. Specify "multiply shader" code (can also be raw string, spir-v bytes or file path) std::string shaderString = (R"( From 4e9888e7d6e399810cc664ede35974f84601fcc2 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 9 Feb 2021 21:30:33 +0000 Subject: [PATCH 22/27] Updated examples --- .../android-simple/app/src/main/cpp/KomputeModelML.cpp | 4 ++-- examples/array_multiplication/src/Main.cpp | 6 +++--- .../custom_module/kompute_summator/KomputeSummatorNode.cpp | 6 +++--- .../godot_examples/gdnative_shared/src/KomputeSummator.cpp | 6 +++--- .../custom_module/kompute_model_ml/KomputeModelMLNode.cpp | 4 ++-- .../gdnative_shared/src/KomputeModelML.cpp | 4 ++-- examples/logistic_regression/src/Main.cpp | 4 ++-- 7 files changed, 17 insertions(+), 17 deletions(-) diff --git a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp index e22f2aa0..80c03951 100755 --- a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp +++ b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp @@ -44,14 +44,14 @@ void KomputeModelML::train(std::vector yData, std::vector xIData, { std::shared_ptr sqTensor = - mgr.createManagedSequence(); + mgr.sequence(); sqTensor->begin(); sqTensor->record(params); sqTensor->end(); sqTensor->eval(); - std::shared_ptr sq = mgr.createManagedSequence(); + std::shared_ptr sq = mgr.sequence(); // Record op algo base sq->begin(); diff --git a/examples/array_multiplication/src/Main.cpp b/examples/array_multiplication/src/Main.cpp index 14b58cba..161bb8bd 100755 --- a/examples/array_multiplication/src/Main.cpp +++ b/examples/array_multiplication/src/Main.cpp @@ -14,9 +14,9 @@ int main() kp::Manager mgr; - auto tensorInA = mgr.buildTensor({ 2.0, 4.0, 6.0 }); - auto tensorInB = mgr.buildTensor({ 0.0, 1.0, 2.0 }); - auto tensorOut = mgr.buildTensor({ 0.0, 0.0, 0.0 }); + auto tensorInA = mgr.tensor({ 2.0, 4.0, 6.0 }); + auto tensorInB = mgr.tensor({ 0.0, 1.0, 2.0 }); + auto tensorOut = mgr.tensor({ 0.0, 0.0, 0.0 }); #ifdef KOMPUTE_ANDROID_SHADER_FROM_STRING std::string shader(R"( diff --git a/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp b/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp index 3f17f088..05c800b2 100644 --- a/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp +++ b/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp @@ -29,9 +29,9 @@ float KomputeSummatorNode::get_total() const { void KomputeSummatorNode::_init() { std::cout << "CALLING INIT" << std::endl; - this->mPrimaryTensor = this->mManager.buildTensor({ 0.0 }); - this->mSecondaryTensor = this->mManager.buildTensor({ 0.0 }); - this->mSequence = this->mManager.getOrCreateManagedSequence("AdditionSeq"); + this->mPrimaryTensor = this->mManager.tensor({ 0.0 }); + this->mSecondaryTensor = this->mManager.tensor({ 0.0 }); + this->mSequence = this->mManager.sequence("AdditionSeq"); // We now record the steps in the sequence if (std::shared_ptr sq = this->mSequence.lock()) diff --git a/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp b/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp index 788486e8..26a38181 100644 --- a/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp +++ b/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp @@ -28,9 +28,9 @@ float KomputeSummator::get_total() const { void KomputeSummator::_init() { std::cout << "CALLING INIT" << std::endl; - this->mPrimaryTensor = this->mManager.buildTensor({ 0.0 }); - this->mSecondaryTensor = this->mManager.buildTensor({ 0.0 }); - this->mSequence = this->mManager.getOrCreateManagedSequence("AdditionSeq"); + this->mPrimaryTensor = this->mManager.tensor({ 0.0 }); + this->mSecondaryTensor = this->mManager.tensor({ 0.0 }); + this->mSequence = this->mManager.sequence("AdditionSeq"); // We now record the steps in the sequence { diff --git a/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp b/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp index f583d910..38dd6772 100644 --- a/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp +++ b/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp @@ -51,14 +51,14 @@ void KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr) { kp::Manager mgr; std::shared_ptr sqTensor = - mgr.createManagedSequence(); + mgr.sequence(); sqTensor->begin(); sqTensor->record(params); sqTensor->end(); sqTensor->eval(); - std::shared_ptr sq = mgr.createManagedSequence(); + std::shared_ptr sq = mgr.sequence(); // Record op algo base sq->begin(); diff --git a/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp b/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp index 4135e83e..f868f506 100644 --- a/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp +++ b/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp @@ -56,14 +56,14 @@ void KomputeModelML::train(Array yArr, Array xIArr, Array xJArr) { { std::shared_ptr sqTensor = - mgr.createManagedSequence(); + mgr.sequence(); sqTensor->begin(); sqTensor->record(params); sqTensor->end(); sqTensor->eval(); - std::shared_ptr sq = mgr.createManagedSequence(); + std::shared_ptr sq = mgr.sequence(); // Record op algo base sq->begin(); diff --git a/examples/logistic_regression/src/Main.cpp b/examples/logistic_regression/src/Main.cpp index d3b8b355..1efbe83b 100755 --- a/examples/logistic_regression/src/Main.cpp +++ b/examples/logistic_regression/src/Main.cpp @@ -36,14 +36,14 @@ int main() kp::Manager mgr; std::shared_ptr sqTensor = - mgr.createManagedSequence(); + mgr.sequence(); sqTensor->begin(); sqTensor->record(params); sqTensor->end(); sqTensor->eval(); - std::shared_ptr sq = mgr.createManagedSequence(); + std::shared_ptr sq = mgr.sequence(); // Record op algo base sq->begin(); From 1edcb425ce8c53a1ba4979dbab66d72f0f86bcca Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 9 Feb 2021 21:30:52 +0000 Subject: [PATCH 23/27] Single include --- single_include/kompute/Kompute.hpp | 77 +++++++++++++++++------------- 1 file changed, 44 insertions(+), 33 deletions(-) diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp index f3ebd990..772397a2 100755 --- a/single_include/kompute/Kompute.hpp +++ b/single_include/kompute/Kompute.hpp @@ -1351,23 +1351,12 @@ class Manager * * @param sequenceName The name for the named sequence to be retrieved or * created - * @return Shared pointer to the manager owned sequence resource - */ - std::shared_ptr getOrCreateManagedSequence( - std::string sequenceName); - - /** - * Create a new managed Kompute sequence so it's available within the - * manager. - * - * @param sequenceName The name for the named sequence to be created, if - * empty then default indexed value is used * @param queueIndex The queue to use from the available queues - * @return Weak pointer to the manager owned sequence resource + * @return Shared pointer to the manager owned sequence resource */ - std::shared_ptr createManagedSequence( - std::string sequenceName = "", - uint32_t queueIndex = 0); + std::shared_ptr sequence( + std::string sequenceName = KP_DEFAULT_SESSION, + uint32_t queueIndex = 0); /** * Function that evaluates operation against named sequence. @@ -1384,7 +1373,7 @@ class Manager { SPDLOG_DEBUG("Kompute Manager evalOp triggered"); std::shared_ptr sq = - this->getOrCreateManagedSequence(sequenceName); + this->sequence(sequenceName); SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN"); sq->begin(); @@ -1434,7 +1423,7 @@ class Manager SPDLOG_DEBUG("Kompute Manager evalOpAsync triggered"); std::shared_ptr sq = - this->getOrCreateManagedSequence(sequenceName); + this->sequence(sequenceName); SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN"); sq->begin(); @@ -1521,12 +1510,12 @@ class Manager * @param syncDataToGPU Whether to sync the data to GPU memory * @returns Initialized Tensor with memory Syncd to GPU device */ - std::shared_ptr buildTensor( + std::shared_ptr tensor( const std::vector& data, Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice, bool syncDataToGPU = true) { - SPDLOG_DEBUG("Kompute Manager buildTensor triggered"); + SPDLOG_DEBUG("Kompute Manager tensor triggered"); SPDLOG_DEBUG("Kompute Manager creating new tensor shared ptr"); std::shared_ptr tensor = @@ -1548,32 +1537,54 @@ class Manager * create a new Tensor. The tensor memory will then be managed and owned by * the manager. * - * @param data The data to initialize the tensor with - * @param tensorType The type of tensor to initialize + * @param tensors Array of tensors to rebuild * @param syncDataToGPU Whether to sync the data to GPU memory * @returns Initialized Tensor with memory Syncd to GPU device */ - void rebuildTensors(std::vector> tensors, + void rebuild(std::vector> tensors, bool syncDataToGPU = true) { - SPDLOG_DEBUG("Kompute Manager rebuildTensors triggered"); + SPDLOG_DEBUG("Kompute Manager rebuild triggered"); for (std::shared_ptr tensor : tensors) { - if (tensor->isInit()) { - tensor->freeMemoryDestroyGPUResources(); - } + // False syncData to run all tensors at once instead one by one + this->rebuild(tensor, false); + } - tensor->init(this->mPhysicalDevice, this->mDevice); + if (syncDataToGPU) { + this->evalOpDefault(tensors); + } + } - std::set>::iterator it = - this->mManagedTensors.find(tensor); - if (it == this->mManagedTensors.end()) { - this->mManagedTensors.insert(tensor); - } + /** + * Function that simplifies the common workflow of tensor initialisation. It + * will take the constructor parameters for a Tensor and will will us it to + * create a new Tensor. The tensor memory will then be managed and owned by + * the manager. + * + * @param tensors Single tensor to rebuild + * @param syncDataToGPU Whether to sync the data to GPU memory + * @returns Initialized Tensor with memory Syncd to GPU device + */ + void rebuild(std::shared_ptr tensor, + bool syncDataToGPU = true) + { + SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered"); + + if (tensor->isInit()) { + tensor->freeMemoryDestroyGPUResources(); + } + + tensor->init(this->mPhysicalDevice, this->mDevice); + + std::set>::iterator it = + this->mManagedTensors.find(tensor); + if (it == this->mManagedTensors.end()) { + this->mManagedTensors.insert(tensor); } if (syncDataToGPU) { - this->evalOpDefault(tensors); + this->evalOpDefault({ tensor }); } } From d8041d696d68d768168f8a816f2112541b612912 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 9 Feb 2021 21:47:40 +0000 Subject: [PATCH 24/27] Added python updated functions --- python/src/docstrings.hpp | 13 ++----------- python/src/main.cpp | 12 +++++++----- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/python/src/docstrings.hpp b/python/src/docstrings.hpp index 6b3a1dc7..79b864f8 100644 --- a/python/src/docstrings.hpp +++ b/python/src/docstrings.hpp @@ -119,7 +119,7 @@ integrate with the vulkan kompute use. @param device Vulkan logical device to use for all base resources @param physicalDeviceIndex Index for vulkan physical device used)doc"; -static const char *__doc_kp_Manager_buildTensor = +static const char *__doc_kp_Manager_tensor = R"doc(Function that simplifies the common workflow of tensor creation and initialization. It will take the constructor parameters for a Tensor and will will us it to create a new Tensor and then create it using @@ -133,15 +133,6 @@ static const char *__doc_kp_Manager_createDevice = R"doc()doc"; static const char *__doc_kp_Manager_createInstance = R"doc()doc"; -static const char *__doc_kp_Manager_createManagedSequence = -R"doc(Create a new managed Kompute sequence so it's available within the -manager. - -@param sequenceName The name for the named sequence to be created, if -empty then default indexed value is used @param queueIndex The queue -to use from the available queues @return Weak pointer to the manager -owned sequence resource)doc"; - static const char *__doc_kp_Manager_evalOp = R"doc(Function that evaluates operation against named sequence. @@ -187,7 +178,7 @@ R"doc(Function that evaluates operation against a newly created sequence. TArgs Template parameters that will be used to initialise Operation to allow for extensible configurations on initialisation)doc"; -static const char *__doc_kp_Manager_getOrCreateManagedSequence = +static const char *__doc_kp_Manager_sequence = R"doc(Get or create a managed Sequence that will be contained by this manager. If the named sequence does not currently exist, it would be created and initialised. diff --git a/python/src/main.cpp b/python/src/main.cpp index 74f010f0..889084c7 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -155,14 +155,16 @@ PYBIND11_MODULE(kp, m) { [](uint32_t physicalDeviceIndex, const std::vector& familyQueueIndices) { return std::unique_ptr(new kp::Manager(physicalDeviceIndex, familyQueueIndices)); }), "Manager initialiser can provide specified device and array of GPU queueFamilies to load.") - .def("get_create_sequence", &kp::Manager::getOrCreateManagedSequence, "Get a Sequence or create a new one with given name") - .def("create_sequence", &kp::Manager::createManagedSequence, - py::arg("name") = "", py::arg("queueIndex") = 0, "Create a sequence with specific name and specified index of available queues") - .def("build_tensor", &kp::Manager::buildTensor, + .def("sequence", &kp::Manager::sequence, + py::arg("name") = "", py::arg("queueIndex") = 0, "Get or create a sequence with specific name and specified index of available queues") + .def("tensor", &kp::Manager::tensor, py::arg("data"), py::arg("tensorType") = kp::Tensor::TensorTypes::eDevice, py::arg("syncDataToGPU") = true, "Build and initialise tensor") - .def("rebuild_tensors", &kp::Manager::rebuildTensors, + .def("rebuild", py::overload_cast>, bool>(&kp::Manager::rebuild), py::arg("tensors"), py::arg("syncDataToGPU") = true, + "Build and initialise list of tensors") + .def("rebuild", py::overload_cast, bool>(&kp::Manager::rebuild), + py::arg("tensor"), py::arg("syncDataToGPU") = true, "Build and initialise tensor") // Await functions From a828bb9f79769163fb18bcb6be7fb5c493dacd19 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 9 Feb 2021 21:48:23 +0000 Subject: [PATCH 25/27] Updated python tests --- python/test/test_array_multiplication.py | 2 +- python/test/test_kompute.py | 18 +++++++++--------- python/test/test_logistic_regression.py | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/python/test/test_array_multiplication.py b/python/test/test_array_multiplication.py index fac2ed41..bcad405a 100644 --- a/python/test/test_array_multiplication.py +++ b/python/test/test_array_multiplication.py @@ -14,7 +14,7 @@ def test_array_multiplication(): tensor_out = kp.Tensor([0, 0, 0]) # 3. Initialise the Kompute Tensors in the GPU - mgr.rebuild_tensors([tensor_in_a, tensor_in_b, tensor_out]) + mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out]) # 4. Define the multiplication shader code to run on the GPU @ps.python2shader diff --git a/python/test/test_kompute.py b/python/test/test_kompute.py index bec4b40e..7050b9c2 100644 --- a/python/test/test_kompute.py +++ b/python/test/test_kompute.py @@ -17,7 +17,7 @@ def test_opmult(): mgr = kp.Manager() - mgr.rebuild_tensors([tensor_in_a, tensor_in_b, tensor_out]) + mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out]) mgr.eval_algo_mult_def([tensor_in_a, tensor_in_b, tensor_out]) @@ -42,7 +42,7 @@ def test_opalgobase_data(): layout (local_size_x = 1) in; - // The input tensors bind index is relative to index in parameter passed + // The input rebuild bind index is relative to index in parameter passed layout(set = 0, binding = 0) buffer bina { float tina[]; }; layout(set = 0, binding = 1) buffer binb { float tinb[]; }; layout(set = 0, binding = 2) buffer bout { float tout[]; }; @@ -53,7 +53,7 @@ def test_opalgobase_data(): } """ - mgr.rebuild_tensors([tensor_in_a, tensor_in_b, tensor_out]) + mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out]) mgr.eval_algo_str_def([tensor_in_a, tensor_in_b, tensor_out], shaderData) @@ -76,7 +76,7 @@ def test_opalgobase_file(): shaderFilePath = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp") - mgr.rebuild_tensors([tensor_in_a, tensor_in_b, tensor_out]) + mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out]) mgr.eval_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath) @@ -94,14 +94,14 @@ def test_sequence(): tensor_in_b = kp.Tensor([1, 2, 3]) tensor_out = kp.Tensor([0, 0, 0]) - mgr.rebuild_tensors([tensor_in_a, tensor_in_b, tensor_out]) + mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out]) shaderFilePath = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp") mgr.eval_async_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath) mgr.eval_await_def() - seq = mgr.create_sequence("op") + seq = mgr.sequence("op") seq.begin() seq.record_tensor_sync_local([tensor_in_a]) seq.record_tensor_sync_local([tensor_in_b]) @@ -120,14 +120,14 @@ def test_workgroup(): tensor_a = kp.Tensor(np.zeros([16,8])) tensor_b = kp.Tensor(np.zeros([16,8])) - mgr.rebuild_tensors([tensor_a, tensor_b]) + mgr.rebuild([tensor_a, tensor_b]) shader_src = """ #version 450 layout (local_size_x = 1) in; - // The input tensors bind index is relative to index in parameter passed + // The input rebuild bind index is relative to index in parameter passed layout(set = 0, binding = 0) writeonly buffer bout { float toutx[]; }; layout(set = 0, binding = 1) writeonly buffer bout2 { float touty[]; }; @@ -140,7 +140,7 @@ def test_workgroup(): """ shader_src = bytes(shader_src, encoding='utf8') - seq = mgr.create_sequence("new") + seq = mgr.sequence("new") seq.begin() seq.record_algo_data([tensor_a, tensor_b], shader_src, (16,8,1)) seq.end() diff --git a/python/test/test_logistic_regression.py b/python/test/test_logistic_regression.py index 1fbcd5bc..6783bbc8 100644 --- a/python/test/test_logistic_regression.py +++ b/python/test/test_logistic_regression.py @@ -66,10 +66,10 @@ def compute_shader( params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i, tensor_w_out_j, tensor_b_in, tensor_b_out, tensor_l_out, tensor_m] - mgr.rebuild_tensors(params) + mgr.rebuild(params) # Create a managed sequence - sq = mgr.create_sequence() + sq = mgr.sequence() # Clear previous operations and begin recording for new operations sq.begin() From 3c486ebf72e192a667cd66df25e1e284027cc9ef Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 9 Feb 2021 22:01:26 +0000 Subject: [PATCH 26/27] Updated test to cover sequences --- test/TestOpTensorCopy.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/test/TestOpTensorCopy.cpp b/test/TestOpTensorCopy.cpp index 7b064107..3f2bc950 100644 --- a/test/TestOpTensorCopy.cpp +++ b/test/TestOpTensorCopy.cpp @@ -8,7 +8,7 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor) kp::Manager mgr; - std::vector testVecA{ 9, 8, 7 }; + std::vector testVecA{ 1, 2, 3 }; std::vector testVecB{ 0, 0, 0 }; std::shared_ptr tensorA{ new kp::Tensor(testVecA) }; @@ -33,7 +33,7 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti) kp::Manager mgr; - std::vector testVecA{ 9, 8, 7 }; + std::vector testVecA{ 2, 3, 4 }; std::vector testVecB{ 0, 0, 0 }; std::vector testVecC{ 0, 0, 0 }; @@ -63,7 +63,7 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor) kp::Manager mgr; - std::vector testVecA{ 9, 8, 7 }; + std::vector testVecA{ 3, 4, 5 }; std::vector testVecB{ 0, 0, 0 }; std::shared_ptr tensorA{ new kp::Tensor(testVecA) }; @@ -92,7 +92,7 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor) kp::Manager mgr; - std::vector testVecA{ 9, 8, 7 }; + std::vector testVecA{ 4, 5, 6 }; std::vector testVecB{ 0, 0, 0 }; std::shared_ptr tensorA{ new kp::Tensor( @@ -101,6 +101,9 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor) mgr.rebuild({ tensorA, tensorB }, false); + // Manually copy data into host memory of Tensor + tensorA->mapDataIntoHostMemory(); + // Only calling sync on device type tensor mgr.evalOpDefault({ tensorB }); @@ -121,7 +124,7 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor) kp::Manager mgr; - std::vector testVecA{ 9, 8, 7 }; + std::vector testVecA{ 5, 6, 7 }; std::vector testVecB{ 0, 0, 0 }; std::shared_ptr tensorA{ new kp::Tensor( @@ -148,7 +151,7 @@ TEST(TestOpTensorCopy, SingleTensorShouldFail) kp::Manager mgr; - std::vector testVecA{ 9, 8, 7 }; + std::vector testVecA{ 6, 7, 8 }; std::shared_ptr tensorA{ new kp::Tensor( testVecA, kp::Tensor::TensorTypes::eHost) }; From 48f041d9f372941e490791b5ad2065501646f687 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Tue, 9 Feb 2021 22:15:07 +0000 Subject: [PATCH 27/27] Updated the examples --- .../android-simple/app/src/main/cpp/KomputeModelML.cpp | 9 +-------- .../kompute_summator/KomputeSummatorNode.cpp | 6 +++--- .../kompute_model_ml/KomputeModelMLNode.cpp | 9 ++------- .../gdnative_shared/src/KomputeModelML.cpp | 8 +------- examples/logistic_regression/src/Main.cpp | 8 +------- 5 files changed, 8 insertions(+), 32 deletions(-) diff --git a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp index 80c03951..0337fa63 100755 --- a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp +++ b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp @@ -42,14 +42,7 @@ void KomputeModelML::train(std::vector yData, std::vector xIData, kp::Manager mgr; { - - std::shared_ptr sqTensor = - mgr.sequence(); - - sqTensor->begin(); - sqTensor->record(params); - sqTensor->end(); - sqTensor->eval(); + mgr.rebuild(params); std::shared_ptr sq = mgr.sequence(); diff --git a/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp b/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp index 05c800b2..304416a0 100644 --- a/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp +++ b/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp @@ -12,7 +12,7 @@ void KomputeSummatorNode::add(float value) { // Set the new data in the local device this->mSecondaryTensor->setData({value}); // Execute recorded sequence - if (std::shared_ptr sq = this->mSequence.lock()) { + if (std::shared_ptr sq = this->mSequence) { sq->eval(); } else { @@ -34,7 +34,7 @@ void KomputeSummatorNode::_init() { this->mSequence = this->mManager.sequence("AdditionSeq"); // We now record the steps in the sequence - if (std::shared_ptr sq = this->mSequence.lock()) + if (std::shared_ptr sq = this->mSequence) { std::string shader(R"( @@ -59,7 +59,7 @@ void KomputeSummatorNode::_init() { { this->mSecondaryTensor }); // Then we run the operation with both tensors - sq->record>( + sq->record( { this->mPrimaryTensor, this->mSecondaryTensor }, std::vector(shader.begin(), shader.end())); diff --git a/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp b/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp index 38dd6772..010a3164 100644 --- a/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp +++ b/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp @@ -50,14 +50,9 @@ void KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr) { { kp::Manager mgr; - std::shared_ptr sqTensor = - mgr.sequence(); - - sqTensor->begin(); - sqTensor->record(params); - sqTensor->end(); - sqTensor->eval(); + mgr.rebuild(params); + { std::shared_ptr sq = mgr.sequence(); // Record op algo base diff --git a/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp b/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp index f868f506..e9a9c51b 100644 --- a/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp +++ b/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp @@ -55,13 +55,7 @@ void KomputeModelML::train(Array yArr, Array xIArr, Array xJArr) { kp::Manager mgr; { - std::shared_ptr sqTensor = - mgr.sequence(); - - sqTensor->begin(); - sqTensor->record(params); - sqTensor->end(); - sqTensor->eval(); + mgr.rebuild(params); std::shared_ptr sq = mgr.sequence(); diff --git a/examples/logistic_regression/src/Main.cpp b/examples/logistic_regression/src/Main.cpp index 1efbe83b..14664a56 100755 --- a/examples/logistic_regression/src/Main.cpp +++ b/examples/logistic_regression/src/Main.cpp @@ -35,13 +35,7 @@ int main() kp::Manager mgr; - std::shared_ptr sqTensor = - mgr.sequence(); - - sqTensor->begin(); - sqTensor->record(params); - sqTensor->end(); - sqTensor->eval(); + mgr.rebuild(params); std::shared_ptr sq = mgr.sequence();