From fc3d23d3f93a1f6fecc2efe5b00dd1928f3127dc Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Mon, 8 Feb 2021 19:15:48 +0000
Subject: [PATCH 01/27] Removed OpCreateTensor in favour of manager memory
 ownership

---
 src/Manager.cpp                               | 11 +++
 src/OpAlgoBase.cpp                            |  2 +-
 src/OpTensorCopy.cpp                          |  2 +-
 src/OpTensorCreate.cpp                        | 76 -------------------
 src/OpTensorSyncDevice.cpp                    |  2 +-
 src/OpTensorSyncLocal.cpp                     |  2 +-
 src/Tensor.cpp                                | 10 ++-
 src/include/kompute/Manager.hpp               | 45 +++++++++--
 src/include/kompute/operations/OpBase.hpp     |  8 +-
 .../kompute/operations/OpTensorCreate.hpp     | 74 ------------------
 10 files changed, 64 insertions(+), 168 deletions(-)
 delete mode 100644 src/OpTensorCreate.cpp
 delete mode 100644 src/include/kompute/operations/OpTensorCreate.hpp
diff --git a/src/Manager.cpp b/src/Manager.cpp
index df9d64db..11d11a26 100755
--- a/src/Manager.cpp
+++ b/src/Manager.cpp
@@ -58,6 +58,17 @@ Manager::~Manager()
         return;
     }
 
+    if (this->mManagedTensors.size()) {
+        SPDLOG_DEBUG("Kompute Manager explicitly freeing tensors");
+        for (const std::shared_ptr<Tensor>& tensor : this->mManagedTensors) {
+            if (!tensor->isInit()) {
+                SPDLOG_ERROR("Kompute Manager attempted to free managed tensor but not tensor is not initialised");
+            }
+            tensor->freeMemoryDestroyGPUResources();
+        }
+        this->mManagedTensors.clear();
+    }
+
     if (this->mManagedSequences.size()) {
         SPDLOG_DEBUG("Kompute Manager explicitly running destructor for "
                      "managed sequences");
diff --git a/src/OpAlgoBase.cpp b/src/OpAlgoBase.cpp
index c6ecf316..ad4bbc17 100644
--- a/src/OpAlgoBase.cpp
+++ b/src/OpAlgoBase.cpp
@@ -14,7 +14,7 @@ OpAlgoBase::OpAlgoBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
                        std::shared_ptr<vk::CommandBuffer> commandBuffer,
                        std::vector<std::shared_ptr<Tensor>>& tensors,
                        KomputeWorkgroup komputeWorkgroup)
-  : OpBase(physicalDevice, device, commandBuffer, tensors, false)
+  : OpBase(physicalDevice, device, commandBuffer, tensors)
 {
     SPDLOG_DEBUG("Kompute OpAlgoBase constructor with params numTensors: {}",
                  tensors.size());
diff --git a/src/OpTensorCopy.cpp b/src/OpTensorCopy.cpp
index 3df23aa5..3726c71e 100644
--- a/src/OpTensorCopy.cpp
+++ b/src/OpTensorCopy.cpp
@@ -12,7 +12,7 @@ OpTensorCopy::OpTensorCopy(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
                            std::shared_ptr<vk::Device> device,
                            std::shared_ptr<vk::CommandBuffer> commandBuffer,
                            std::vector<std::shared_ptr<Tensor>> tensors)
-  : OpBase(physicalDevice, device, commandBuffer, tensors, false)
+  : OpBase(physicalDevice, device, commandBuffer, tensors)
 {
     SPDLOG_DEBUG("Kompute OpTensorCopy constructor with params");
 }
diff --git a/src/OpTensorCreate.cpp b/src/OpTensorCreate.cpp
deleted file mode 100644
index 7918415e..00000000
--- a/src/OpTensorCreate.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-
-#include "kompute/Tensor.hpp"
-
-#include "kompute/operations/OpTensorCreate.hpp"
-
-namespace kp {
-
-OpTensorCreate::OpTensorCreate()
-{
-    SPDLOG_DEBUG("Kompute OpTensorCreate constructor base");
-}
-
-OpTensorCreate::OpTensorCreate(
-  std::shared_ptr<vk::PhysicalDevice> physicalDevice,
-  std::shared_ptr<vk::Device> device,
-  std::shared_ptr<vk::CommandBuffer> commandBuffer,
-  std::vector<std::shared_ptr<Tensor>> tensors)
-  : OpBase(physicalDevice, device, commandBuffer, tensors, true)
-{
-    SPDLOG_DEBUG("Kompute OpTensorCreate constructor with params");
-}
-
-OpTensorCreate::~OpTensorCreate()
-{
-    SPDLOG_DEBUG("Kompute OpTensorCreate destructor started");
-}
-
-void
-OpTensorCreate::init()
-{
-    SPDLOG_DEBUG("Kompute OpTensorCreate init called");
-
-    if (this->mTensors.size() < 1) {
-        throw std::runtime_error(
-          "Kompute OpTensorCreate called with less than 1 tensor");
-    }
-
-    for (std::shared_ptr<Tensor> tensor : this->mTensors) {
-        if (tensor->isInit()) {
-            throw std::runtime_error(
-              "Kompute OpTensorCreate: Tensor has already been initialized");
-        }
-        if (tensor->tensorType() != Tensor::TensorTypes::eStorage) {
-            tensor->init(this->mPhysicalDevice, this->mDevice);
-
-            tensor->mapDataIntoHostMemory();
-        }
-    }
-}
-
-void
-OpTensorCreate::record()
-{
-    SPDLOG_DEBUG("Kompute OpTensorCreate record called");
-
-    for (size_t i = 0; i < this->mTensors.size(); i++) {
-        if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
-            this->mTensors[i]->recordCopyFromStagingToDevice(
-              this->mCommandBuffer, false);
-        }
-    }
-}
-
-void
-OpTensorCreate::preEval()
-{
-    SPDLOG_DEBUG("Kompute OpTensorCreate preEval called");
-}
-
-void
-OpTensorCreate::postEval()
-{
-    SPDLOG_DEBUG("Kompute OpTensorCreate postEval called");
-}
-
-}
diff --git a/src/OpTensorSyncDevice.cpp b/src/OpTensorSyncDevice.cpp
index 340786eb..92bd7512 100644
--- a/src/OpTensorSyncDevice.cpp
+++ b/src/OpTensorSyncDevice.cpp
@@ -15,7 +15,7 @@ OpTensorSyncDevice::OpTensorSyncDevice(
   std::shared_ptr<vk::Device> device,
   std::shared_ptr<vk::CommandBuffer> commandBuffer,
   std::vector<std::shared_ptr<Tensor>> tensors)
-  : OpBase(physicalDevice, device, commandBuffer, tensors, false)
+  : OpBase(physicalDevice, device, commandBuffer, tensors)
 {
     SPDLOG_DEBUG("Kompute OpTensorSyncDevice constructor with params");
 }
diff --git a/src/OpTensorSyncLocal.cpp b/src/OpTensorSyncLocal.cpp
index 09d966e1..c7a4fb63 100644
--- a/src/OpTensorSyncLocal.cpp
+++ b/src/OpTensorSyncLocal.cpp
@@ -15,7 +15,7 @@ OpTensorSyncLocal::OpTensorSyncLocal(
   std::shared_ptr<vk::Device> device,
   std::shared_ptr<vk::CommandBuffer> commandBuffer,
   std::vector<std::shared_ptr<Tensor>> tensors)
-  : OpBase(physicalDevice, device, commandBuffer, tensors, false)
+  : OpBase(physicalDevice, device, commandBuffer, tensors)
 {
     SPDLOG_DEBUG("Kompute OpTensorSyncLocal constructor with params");
 }
diff --git a/src/Tensor.cpp b/src/Tensor.cpp
index f04165cf..7400dfff 100644
--- a/src/Tensor.cpp
+++ b/src/Tensor.cpp
@@ -229,8 +229,11 @@ Tensor::mapDataFromHostMemory()
 
     if (this->mTensorType == TensorTypes::eHost) {
         hostVisibleMemory = this->mPrimaryMemory;
-    } else {
+    } else if (this->mTensorType == TensorTypes::eDevice) {
         hostVisibleMemory = this->mStagingMemory;
+    } else {
+        SPDLOG_WARN("Kompute Tensor mapping data not supported on storage tensor");
+        return;
     }
 
     vk::DeviceSize bufferSize = this->memorySize();
@@ -252,8 +255,11 @@ Tensor::mapDataIntoHostMemory()
 
     if (this->mTensorType == TensorTypes::eHost) {
         hostVisibleMemory = this->mPrimaryMemory;
-    } else {
+    } else if (this->mTensorType == TensorTypes::eDevice) {
         hostVisibleMemory = this->mStagingMemory;
+    } else {
+        SPDLOG_WARN("Kompute Tensor mapping data not supported on storage tensor");
+        return;
     }
 
     vk::DeviceSize bufferSize = this->memorySize();
diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp
index 8c689ba5..973a0039 100644
--- a/src/include/kompute/Manager.hpp
+++ b/src/include/kompute/Manager.hpp
@@ -1,13 +1,12 @@
 #pragma once
 
 #include <unordered_map>
+#include <set>
 
 #include "kompute/Core.hpp"
 
 #include "kompute/Sequence.hpp"
 
-#include "kompute/operations/OpTensorCreate.hpp"
-
 #define KP_DEFAULT_SESSION "DEFAULT"
 
 namespace kp {
@@ -231,8 +230,8 @@ class Manager
     /**
      * Function that simplifies the common workflow of tensor creation and
      * initialization. It will take the constructor parameters for a Tensor
-     * and will will us it to create a new Tensor and then create it using
-     * the OpCreateTensor command.
+     * and will will us it to create a new Tensor and then create it. The
+     * tensor memory will then be managed and owned by the manager.
      *
      * @param data The data to initialize the tensor with
      * @param tensorType The type of tensor to initialize
@@ -242,17 +241,49 @@ class Manager
       const std::vector<float>& data,
       Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
     {
-        SPDLOG_DEBUG("Kompute Manager createInitTensor triggered");
+        SPDLOG_DEBUG("Kompute Manager buildTensor triggered");
 
         SPDLOG_DEBUG("Kompute Manager creating new tensor shared ptr");
         std::shared_ptr<Tensor> tensor =
           std::make_shared<Tensor>(kp::Tensor(data, tensorType));
 
-        this->evalOpDefault<OpTensorCreate>({ tensor });
+        tensor->init(this->mPhysicalDevice, this->mDevice);
+        if (tensor->tensorType() != Tensor::TensorTypes::eStorage) {
+            tensor->mapDataIntoHostMemory();
+        }
+        this->mManagedTensors.insert(tensor);
 
         return tensor;
     }
 
+    /**
+     * Function that simplifies the common workflow of tensor initialisation. It will take the constructor parameters for a Tensor and will will us it to create a new Tensor. The tensor memory will then be managed and owned by the manager.
+     *
+     * @param data The data to initialize the tensor with
+     * @param tensorType The type of tensor to initialize
+     * @returns Initialized Tensor with memory Syncd to GPU device
+     */
+    void rebuildTensors(std::vector<std::shared_ptr<kp::Tensor>> tensors)
+    {
+        SPDLOG_DEBUG("Kompute Manager rebuildTensors triggered");
+        for (std::shared_ptr<Tensor> tensor : tensors) {
+
+            if (tensor->isInit()) {
+                tensor->freeMemoryDestroyGPUResources();
+            }
+
+            tensor->init(this->mPhysicalDevice, this->mDevice);
+            if (tensor->tensorType() != Tensor::TensorTypes::eStorage) {
+                tensor->mapDataIntoHostMemory();
+            }
+
+            std::set<std::shared_ptr<Tensor>>::iterator it = this->mManagedTensors.find(tensor);
+            if (it == this->mManagedTensors.end()) {
+                this->mManagedTensors.insert(tensor);
+            }
+        }
+    }
+
   private:
     // -------------- OPTIONALLY OWNED RESOURCES
     std::shared_ptr<vk::Instance> mInstance = nullptr;
@@ -263,6 +294,8 @@ class Manager
     bool mFreeDevice = false;
 
     // -------------- ALWAYS OWNED RESOURCES
+    std::set<std::shared_ptr<Tensor>> mManagedTensors;
+
     std::unordered_map<std::string, std::shared_ptr<Sequence>>
       mManagedSequences;
 
diff --git a/src/include/kompute/operations/OpBase.hpp b/src/include/kompute/operations/OpBase.hpp
index 6e35df99..a423abc2 100644
--- a/src/include/kompute/operations/OpBase.hpp
+++ b/src/include/kompute/operations/OpBase.hpp
@@ -31,13 +31,11 @@ class OpBase
      * @param device Vulkan logical device for passing to Algorithm
      * @param commandBuffer Vulkan Command Buffer to record commands into
      * @param tensors Tensors that are to be used in this operation
-     * @param freeTensors Whether operation manages the memory of the Tensors
      */
     OpBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
            std::shared_ptr<vk::Device> device,
            std::shared_ptr<vk::CommandBuffer> commandBuffer,
-           std::vector<std::shared_ptr<Tensor>>& tensors,
-           bool freeTensors)
+           std::vector<std::shared_ptr<Tensor>>& tensors)
     {
         SPDLOG_DEBUG("Compute OpBase constructor with params");
 
@@ -45,14 +43,12 @@ class OpBase
         this->mDevice = device;
         this->mCommandBuffer = commandBuffer;
         this->mTensors = tensors;
-        this->mFreeTensors = freeTensors;
     }
 
     /**
      * Default destructor for OpBase class. This OpBase destructor class should
      * always be called to destroy and free owned resources unless it is
-     * intended to destroy the resources in the parent class. This can be done
-     * by passing the mFreeTensors=false.
+     * intended to destroy the resources in the parent class.
      */
     virtual ~OpBase()
     {
diff --git a/src/include/kompute/operations/OpTensorCreate.hpp b/src/include/kompute/operations/OpTensorCreate.hpp
deleted file mode 100644
index 4b8c784c..00000000
--- a/src/include/kompute/operations/OpTensorCreate.hpp
+++ /dev/null
@@ -1,74 +0,0 @@
-#pragma once
-
-#include "kompute/Core.hpp"
-
-#include "kompute/Tensor.hpp"
-
-#include "kompute/operations/OpBase.hpp"
-
-namespace kp {
-
-/**
-    Operation that creates tensor and manages the memory of the components
-   created
-*/
-class OpTensorCreate : public OpBase
-{
-  public:
-    OpTensorCreate();
-
-    /**
-     * Default constructor with parameters that provides the bare minimum
-     * requirements for the operations to be able to create and manage their
-     * sub-components.
-     *
-     * @param physicalDevice Vulkan physical device used to find device queues
-     * @param device Vulkan logical device for passing to Algorithm
-     * @param commandBuffer Vulkan Command Buffer to record commands into
-     * @param tensors Tensors that will be used to create in operation.
-     * @param freeTensors Whether operation manages the memory of the Tensors
-     */
-    OpTensorCreate(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
-                   std::shared_ptr<vk::Device> device,
-                   std::shared_ptr<vk::CommandBuffer> commandBuffer,
-                   std::vector<std::shared_ptr<Tensor>> tensors);
-
-    /**
-     * Default destructor which in this case expects the parent class to free
-     * the tensors
-     */
-    ~OpTensorCreate() override;
-
-    /**
-     * In charge of initialising the primary Tensor as well as the staging
-     * tensor as required. It will only initialise a staging tensor if the
-     * Primary tensor is of type Device. For staging tensors it performs a 
-     * mapDataIntoHostMemory which would perform immediately as opposed to 
-     * on sequence eval/submission.
-     */
-    void init() override;
-
-    /**
-     * Record runs the core actions to create the tensors. For device tensors
-     * it records a copyCommand to move the data from the staging tensor to the 
-     * device tensor. The mapping for staging tensors happens in the init function
-     * not in the record function.
-     */
-    void record() override;
-
-    /**
-     * Does not perform any preEval commands.
-     */
-    virtual void preEval() override;
-
-    /**
-     * Performs a copy back into the main tensor to ensure that the data
-     * contained is the one that is now being stored in the GPU.
-     */
-    virtual void postEval() override;
-
-
-  private:
-};
-
-} // End namespace kp

From 4dedfadfef933b29ea87f8f70ed3a3c7c509792b Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Mon, 8 Feb 2021 19:17:32 +0000
Subject: [PATCH 02/27] Updated tests to reflect manager tensor memory
 ownership

---
 test/TestAsyncOperations.cpp            |  6 ++--
 test/TestLogisticRegression.cpp         | 16 +++------
 test/TestManager.cpp                    | 28 ++++++++++-----
 test/TestMultipleAlgoExecutions.cpp     | 22 +++++++++---
 test/TestOpAlgoLoopsPassingData.cpp     |  4 ++-
 test/TestOpShadersFromStringAndFile.cpp | 16 ++++++---
 test/TestOpTensorCopy.cpp               | 24 +++++++++----
 test/TestOpTensorCreate.cpp             | 46 +++++++++++++++++++------
 test/TestOpTensorSync.cpp               |  4 +--
 test/TestTensor.cpp                     |  5 +--
 10 files changed, 117 insertions(+), 54 deletions(-)

diff --git a/test/TestAsyncOperations.cpp b/test/TestAsyncOperations.cpp
index 43bccf99..c43f5648 100644
--- a/test/TestAsyncOperations.cpp
+++ b/test/TestAsyncOperations.cpp
@@ -49,7 +49,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
         inputsSyncB.push_back(std::make_shared<kp::Tensor>(kp::Tensor(data)));
     }
 
-    mgr.evalOpDefault<kp::OpTensorCreate>(inputsSyncB);
+    mgr.rebuildTensors(inputsSyncB);
 
     auto startSync = std::chrono::high_resolution_clock::now();
 
@@ -77,7 +77,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
         inputsAsyncB.push_back(std::make_shared<kp::Tensor>(kp::Tensor(data)));
     }
 
-    mgrAsync.evalOpDefault<kp::OpTensorCreate>(inputsAsyncB);
+    mgrAsync.rebuildTensors(inputsAsyncB);
 
     for (uint32_t i = 0; i < numParallel; i++) {
         mgrAsync.createManagedSequence("async" + std::to_string(i), i);
@@ -149,7 +149,7 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution)
     mgr.createManagedSequence("asyncOne");
     mgr.createManagedSequence("asyncTwo");
 
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
+    mgr.rebuildTensors({ tensorA, tensorB });
 
     mgr.evalOpAsync<kp::OpAlgoBase>(
       { tensorA }, "asyncOne", std::vector<char>(shader.begin(), shader.end()));
diff --git a/test/TestLogisticRegression.cpp b/test/TestLogisticRegression.cpp
index 7c3f1538..e25b1416 100644
--- a/test/TestLogisticRegression.cpp
+++ b/test/TestLogisticRegression.cpp
@@ -32,12 +32,8 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegression)
     {
         kp::Manager mgr;
 
-        std::shared_ptr<kp::Sequence> sqTensor = mgr.createManagedSequence();
-
-        sqTensor->begin();
-        sqTensor->record<kp::OpTensorCreate>(params);
-        sqTensor->end();
-        sqTensor->eval();
+        mgr.rebuildTensors(params);
+        mgr.evalOpDefault<kp::OpTensorSyncDevice>(params);
 
         std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
 
@@ -122,12 +118,8 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegressionManualCopy)
     {
         kp::Manager mgr;
 
-        std::shared_ptr<kp::Sequence> sqTensor = mgr.createManagedSequence();
-
-        sqTensor->begin();
-        sqTensor->record<kp::OpTensorCreate>(params);
-        sqTensor->end();
-        sqTensor->eval();
+        mgr.rebuildTensors(params);
+        mgr.evalOpDefault<kp::OpTensorSyncDevice>(params);
 
         std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
 
diff --git a/test/TestManager.cpp b/test/TestManager.cpp
index 198e617a..3e1db7b1 100644
--- a/test/TestManager.cpp
+++ b/test/TestManager.cpp
@@ -8,14 +8,16 @@ TEST(TestManager, EndToEndOpMultFlow)
     kp::Manager mgr;
 
     std::shared_ptr<kp::Tensor> tensorLHS{ new kp::Tensor({ 0, 1, 2 }) };
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorLHS });
+    mgr.rebuildTensors({ tensorLHS });
 
     std::shared_ptr<kp::Tensor> tensorRHS{ new kp::Tensor({ 2, 4, 6 }) };
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorRHS });
+    mgr.rebuildTensors({ tensorRHS });
 
     std::shared_ptr<kp::Tensor> tensorOutput{ new kp::Tensor({ 0, 0, 0 }) };
 
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorOutput });
+    mgr.rebuildTensors({ tensorOutput });
+
+    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorLHS, tensorRHS, tensorOutput });
 
     mgr.evalOpDefault<kp::OpMult>({ tensorLHS, tensorRHS, tensorOutput });
 
@@ -36,14 +38,16 @@ TEST(TestManager, OpMultSequenceFlow)
     kp::Manager mgr;
 
     {
+        mgr.rebuildTensors({ tensorLHS, tensorRHS, tensorOutput });
+
         std::shared_ptr<kp::Sequence> sq =
           mgr.getOrCreateManagedSequence("newSequence");
 
         sq->begin();
 
-        sq->record<kp::OpTensorCreate>({ tensorLHS });
-        sq->record<kp::OpTensorCreate>({ tensorRHS });
-        sq->record<kp::OpTensorCreate>({ tensorOutput });
+        sq->record<kp::OpTensorSyncDevice>({ tensorLHS });
+        sq->record<kp::OpTensorSyncDevice>({ tensorRHS });
+        sq->record<kp::OpTensorSyncDevice>({ tensorOutput });
 
         sq->record<kp::OpMult>({ tensorLHS, tensorRHS, tensorOutput });
 
@@ -93,14 +97,16 @@ TEST(TestManager, TestMultipleTensorsAtOnce)
       mgr.getOrCreateManagedSequence("newSequence");
 
     {
-        sq->begin();
-
-        sq->record<kp::OpTensorCreate>({ tensorLHS, tensorRHS, tensorOutput });
+        mgr.rebuildTensors({ tensorLHS, tensorRHS, tensorOutput });
 
         EXPECT_TRUE(tensorLHS->isInit());
         EXPECT_TRUE(tensorRHS->isInit());
         EXPECT_TRUE(tensorOutput->isInit());
 
+        sq->begin();
+
+        sq->record<kp::OpTensorSyncDevice>({ tensorLHS, tensorRHS, tensorOutput });
+
         sq->record<kp::OpMult>({ tensorLHS, tensorRHS, tensorOutput });
 
         sq->record<kp::OpTensorSyncLocal>({ tensorOutput });
@@ -119,6 +125,10 @@ TEST(TestManager, TestCreateInitTensor)
     std::shared_ptr<kp::Tensor> tensorA = mgr.buildTensor({ 0, 1, 2 });
     std::shared_ptr<kp::Tensor> tensorB = mgr.buildTensor({ 0, 0, 0 });
 
+    mgr.rebuildTensors({ tensorA, tensorB });
+
+    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorA, tensorB });
+
     mgr.evalOpDefault<kp::OpTensorCopy>({ tensorA, tensorB });
 
     mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorB });
diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp
index 11e94caa..9d696d55 100644
--- a/test/TestMultipleAlgoExecutions.cpp
+++ b/test/TestMultipleAlgoExecutions.cpp
@@ -19,13 +19,15 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord)
           pa[index] = pa[index] + 1;
       })");
 
+    mgr.rebuildTensors({ tensorA });
+
     std::shared_ptr<kp::Sequence> sq =
       mgr.getOrCreateManagedSequence("newSequence");
 
     {
         sq->begin();
 
-        sq->record<kp::OpTensorCreate>({ tensorA });
+        sq->record<kp::OpTensorSyncDevice>({ tensorA });
 
         sq->record<kp::OpAlgoBase>(
           { tensorA }, std::vector<char>(shader.begin(), shader.end()));
@@ -58,13 +60,15 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords)
           pa[index] = pa[index] + 1;
       })");
 
+    mgr.rebuildTensors({ tensorA });
+
     std::shared_ptr<kp::Sequence> sqTensor = mgr.createManagedSequence();
 
     std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
 
     // First create the tensor in a separate sequence
     sqTensor->begin();
-    sqTensor->record<kp::OpTensorCreate>({ tensorA });
+    sqTensor->record<kp::OpTensorSyncDevice>({ tensorA });
     sqTensor->end();
     sqTensor->eval();
 
@@ -111,13 +115,15 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
           pa[index] = pa[index] + 1;
       })");
 
+    mgr.rebuildTensors({ tensorA });
+
     {
         std::shared_ptr<kp::Sequence> sq =
           mgr.getOrCreateManagedSequence("newSequence");
 
         sq->begin();
 
-        sq->record<kp::OpTensorCreate>({ tensorA });
+        sq->record<kp::OpTensorSyncDevice>({ tensorA });
 
         sq->record<kp::OpAlgoBase>(
           { tensorA }, std::vector<char>(shader.begin(), shader.end()));
@@ -183,13 +189,15 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
           pa[index] = pa[index] + 1;
       })");
 
+    mgr.rebuildTensors({ tensorA });
+
     {
         std::shared_ptr<kp::Sequence> sq =
           mgr.getOrCreateManagedSequence("newSequence");
 
         sq->begin();
 
-        sq->record<kp::OpTensorCreate>({ tensorA });
+        sq->record<kp::OpTensorSyncDevice>({ tensorA });
 
         sq->end();
         sq->eval();
@@ -238,7 +246,9 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrOpCreate)
     std::shared_ptr<kp::Tensor> tensorInB{ new kp::Tensor({ 0.0, 1.0, 2.0 }) };
     std::shared_ptr<kp::Tensor> tensorOut{ new kp::Tensor({ 0.0, 0.0, 0.0 }) };
 
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorInA, tensorInB, tensorOut });
+    mgr.rebuildTensors({ tensorInA, tensorInB, tensorOut });
+
+    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorInA, tensorInB, tensorOut });
 
     std::string shader(R"(
         // The version to use 
@@ -296,6 +306,8 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate)
         }
       )");
 
+    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorInA, tensorInB, tensorOut });
+
     mgr.evalOpDefault<kp::OpAlgoBase>(
       { tensorInA, tensorInB, tensorOut },
       std::vector<char>(shader.begin(), shader.end()));
diff --git a/test/TestOpAlgoLoopsPassingData.cpp b/test/TestOpAlgoLoopsPassingData.cpp
index bd772779..b04ef383 100644
--- a/test/TestOpAlgoLoopsPassingData.cpp
+++ b/test/TestOpAlgoLoopsPassingData.cpp
@@ -30,13 +30,15 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies)
         }
     )");
 
+    mgr.rebuildTensors({ tensorA, tensorB });
+
     {
         std::shared_ptr<kp::Sequence> sq =
           mgr.getOrCreateManagedSequence("default");
 
         sq->begin();
 
-        sq->record<kp::OpTensorCreate>({ tensorA, tensorB });
+        sq->record<kp::OpTensorSyncDevice>({ tensorA, tensorB });
 
         sq->end();
 
diff --git a/test/TestOpShadersFromStringAndFile.cpp b/test/TestOpShadersFromStringAndFile.cpp
index 273421b2..7b0db3de 100644
--- a/test/TestOpShadersFromStringAndFile.cpp
+++ b/test/TestOpShadersFromStringAndFile.cpp
@@ -11,7 +11,7 @@ TEST(TestOpAlgoBase, ShaderRawDataFromConstructor)
 
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
+    mgr.rebuildTensors({ tensorA, tensorB });
 
     std::string shader(R"(
         #version 450
@@ -28,6 +28,8 @@ TEST(TestOpAlgoBase, ShaderRawDataFromConstructor)
         }
     )");
 
+    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorA, tensorB });
+
     mgr.evalOpDefault<kp::OpAlgoBase>(
       { tensorA, tensorB }, std::vector<char>(shader.begin(), shader.end()));
 
@@ -43,7 +45,9 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromConstructor)
 
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
+    mgr.rebuildTensors({ tensorA, tensorB });
+
+    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorA, tensorB });
 
     mgr.evalOpDefault<kp::OpAlgoBase>(
       { tensorA, tensorB },
@@ -65,7 +69,9 @@ TEST(TestOpAlgoBase, ShaderRawDataFromFile)
 
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
+    mgr.rebuildTensors({ tensorA, tensorB });
+
+    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorA, tensorB });
 
     mgr.evalOpDefault<kp::OpAlgoBase>(
       { tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp");
@@ -82,7 +88,9 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromFile)
 
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
+    mgr.rebuildTensors({ tensorA, tensorB });
+
+    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorA, tensorB });
 
     mgr.evalOpDefault<kp::OpAlgoBase>(
       { tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp.spv");
diff --git a/test/TestOpTensorCopy.cpp b/test/TestOpTensorCopy.cpp
index 0e840cad..ae99bf17 100644
--- a/test/TestOpTensorCopy.cpp
+++ b/test/TestOpTensorCopy.cpp
@@ -14,7 +14,9 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor)
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
 
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
+    mgr.rebuildTensors({ tensorA, tensorB });
+
+    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorA, tensorB });
 
     EXPECT_TRUE(tensorA->isInit());
     EXPECT_TRUE(tensorB->isInit());
@@ -41,7 +43,9 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti)
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
     std::shared_ptr<kp::Tensor> tensorC{ new kp::Tensor(testVecC) };
 
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB, tensorC });
+    mgr.rebuildTensors({ tensorA, tensorB, tensorC });
+
+    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorA, tensorB });
 
     EXPECT_TRUE(tensorA->isInit());
     EXPECT_TRUE(tensorB->isInit());
@@ -70,7 +74,10 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor)
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(
       testVecB, kp::Tensor::TensorTypes::eHost) };
 
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
+    mgr.rebuildTensors({ tensorA, tensorB });
+
+    //  Only calling sync on device type tensor
+    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorA });
 
     EXPECT_TRUE(tensorA->isInit());
     EXPECT_TRUE(tensorB->isInit());
@@ -96,7 +103,10 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor)
       testVecA, kp::Tensor::TensorTypes::eHost) };
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
 
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
+    mgr.rebuildTensors({ tensorA, tensorB });
+
+    //  Only calling sync on device type tensor
+    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorB });
 
     EXPECT_TRUE(tensorA->isInit());
     EXPECT_TRUE(tensorB->isInit());
@@ -123,7 +133,9 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor)
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(
       testVecB, kp::Tensor::TensorTypes::eHost) };
 
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
+    mgr.rebuildTensors({ tensorA, tensorB });
+
+    // Not calling OpTensorSyncDevice
 
     EXPECT_TRUE(tensorA->isInit());
     EXPECT_TRUE(tensorB->isInit());
@@ -147,7 +159,7 @@ TEST(TestOpTensorCopy, SingleTensorShouldFail)
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(
       testVecA, kp::Tensor::TensorTypes::eHost) };
 
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
+    mgr.rebuildTensors({ tensorA });
 
     EXPECT_TRUE(tensorA->isInit());
 
diff --git a/test/TestOpTensorCreate.cpp b/test/TestOpTensorCreate.cpp
index f0ba8743..a57142cf 100644
--- a/test/TestOpTensorCreate.cpp
+++ b/test/TestOpTensorCreate.cpp
@@ -12,7 +12,7 @@ TEST(TestOpTensorCreate, CreateSingleTensorSingleOp)
 
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
 
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
+    mgr.rebuildTensors({ tensorA });
 
     EXPECT_TRUE(tensorA->isInit());
 
@@ -33,7 +33,7 @@ TEST(TestOpTensorCreate, CreateMultipleTensorSingleOp)
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
 
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB });
+    mgr.rebuildTensors({ tensorA, tensorB });
 
     EXPECT_TRUE(tensorA->isInit());
     EXPECT_TRUE(tensorB->isInit());
@@ -53,8 +53,8 @@ TEST(TestOpTensorCreate, CreateMultipleTensorMultipleOp)
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
 
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorB });
+    mgr.rebuildTensors({ tensorA });
+    mgr.rebuildTensors({ tensorB });
 
     EXPECT_TRUE(tensorA->isInit());
     EXPECT_TRUE(tensorB->isInit());
@@ -63,7 +63,7 @@ TEST(TestOpTensorCreate, CreateMultipleTensorMultipleOp)
     EXPECT_EQ(tensorB->data(), testVecB);
 }
 
-TEST(TestOpTensorCreate, ManageTensorMemoryWhenOpTensorCreateDestroyed)
+TEST(TestOpTensorCreate, TestTensorMemoryManagedByManagerDestroyed)
 {
 
     std::vector<float> testVecA{ 9, 8, 7 };
@@ -74,8 +74,8 @@ TEST(TestOpTensorCreate, ManageTensorMemoryWhenOpTensorCreateDestroyed)
 
     {
         kp::Manager mgr;
-        mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
-        mgr.evalOpDefault<kp::OpTensorCreate>({ tensorB });
+        mgr.rebuildTensors({ tensorA });
+        mgr.rebuildTensors({ tensorB });
 
         EXPECT_TRUE(tensorA->isInit());
         EXPECT_TRUE(tensorB->isInit());
@@ -88,6 +88,32 @@ TEST(TestOpTensorCreate, ManageTensorMemoryWhenOpTensorCreateDestroyed)
     EXPECT_FALSE(tensorB->isInit());
 }
 
+TEST(TestOpTensorCreate, TestTensorMemoryManagedByManagerNOTDestroyed)
+{
+
+    std::vector<float> testVecA{ 9, 8, 7 };
+    std::vector<float> testVecB{ 6, 5, 4 };
+
+    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
+    std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
+
+    kp::Manager mgr;
+
+    {
+        mgr.rebuildTensors({ tensorA });
+        mgr.rebuildTensors({ tensorB });
+
+        EXPECT_TRUE(tensorA->isInit());
+        EXPECT_TRUE(tensorB->isInit());
+
+        EXPECT_EQ(tensorA->data(), testVecA);
+        EXPECT_EQ(tensorB->data(), testVecB);
+    }
+
+    EXPECT_TRUE(tensorA->isInit());
+    EXPECT_TRUE(tensorB->isInit());
+}
+
 TEST(TestOpTensorCreate, NoErrorIfTensorFreedBefore)
 {
 
@@ -99,8 +125,8 @@ TEST(TestOpTensorCreate, NoErrorIfTensorFreedBefore)
 
     kp::Manager mgr;
 
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorB });
+    mgr.rebuildTensors({ tensorA });
+    mgr.rebuildTensors({ tensorB });
 
     EXPECT_TRUE(tensorA->isInit());
     EXPECT_TRUE(tensorB->isInit());
@@ -123,7 +149,7 @@ TEST(TestOpTensorCreate, ExceptionOnZeroSizeTensor)
     kp::Manager mgr;
 
     try {
-        mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
+        mgr.rebuildTensors({ tensorA });
     } catch (const std::runtime_error& err) {
         // check exception
         ASSERT_TRUE(std::string(err.what()).find("zero-sized") !=
diff --git a/test/TestOpTensorSync.cpp b/test/TestOpTensorSync.cpp
index 72f53ac5..3808941f 100644
--- a/test/TestOpTensorSync.cpp
+++ b/test/TestOpTensorSync.cpp
@@ -13,7 +13,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemorySingleTensor)
 
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecPreA) };
 
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA });
+    mgr.rebuildTensors({ tensorA });
 
     EXPECT_TRUE(tensorA->isInit());
 
@@ -37,7 +37,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor)
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
     std::shared_ptr<kp::Tensor> tensorC{ new kp::Tensor({ 0, 0, 0 }) };
 
-    mgr.evalOpDefault<kp::OpTensorCreate>({ tensorA, tensorB, tensorC });
+    mgr.rebuildTensors({ tensorA, tensorB, tensorC });
 
     EXPECT_TRUE(tensorA->isInit());
     EXPECT_TRUE(tensorB->isInit());
diff --git a/test/TestTensor.cpp b/test/TestTensor.cpp
index 5e54e858..764b5fa9 100644
--- a/test/TestTensor.cpp
+++ b/test/TestTensor.cpp
@@ -23,12 +23,13 @@ TEST(TestTensor, CopyFromHostData)
 
     kp::Manager mgr;
 
+    mgr.rebuildTensors({ tensorA, tensorB });
+    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorA, tensorB });
+
     if (std::shared_ptr<kp::Sequence> sq =
           mgr.getOrCreateManagedSequence("new")) {
         sq->begin();
 
-        sq->record<kp::OpTensorCreate>({ tensorA, tensorB });
-
         sq->record<kp::OpTensorCopy>({ tensorA, tensorB });
 
         sq->end();

From f356e646448f19658373320efec1dd762eb16860 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Mon, 8 Feb 2021 19:17:40 +0000
Subject: [PATCH 03/27] Updated aggregate headers

---
 single_include/AggregateHeaders.cpp |   1 -
 single_include/kompute/Kompute.hpp  | 154 ++++++++++++----------------
 2 files changed, 67 insertions(+), 88 deletions(-)

diff --git a/single_include/AggregateHeaders.cpp b/single_include/AggregateHeaders.cpp
index 9ce53e85..57ab728c 100644
--- a/single_include/AggregateHeaders.cpp
+++ b/single_include/AggregateHeaders.cpp
@@ -8,7 +8,6 @@
 #include "kompute/operations/OpAlgoBase.hpp"
 #include "kompute/operations/OpAlgoLhsRhsOut.hpp"
 #include "kompute/operations/OpMult.hpp"
-#include "kompute/operations/OpTensorCreate.hpp"
 #include "kompute/operations/OpTensorCopy.hpp"
 #include "kompute/operations/OpTensorSyncDevice.hpp"
 #include "kompute/operations/OpTensorSyncLocal.hpp"
diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index d388fa24..62037c91 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -698,6 +698,7 @@ static const unsigned int shaders_glsl_logisticregression_comp_spv_len = 4920;
 #endif // define SHADEROP_SHADERLOGISTICREGRESSION_HPP
 
 #include <unordered_map>
+#include <set>
 
 #define KP_MAX_DIM_SIZE 1
 
@@ -723,7 +724,7 @@ class Tensor
     enum class TensorTypes
     {
         eDevice = 0,  ///< Type is device memory, source and destination
-        eHost = 1, ///< Type is host memory, source and destination
+        eHost = 1,    ///< Type is host memory, source and destination
         eStorage = 2, ///< Type is Device memory (only)
     };
 
@@ -736,7 +737,8 @@ class Tensor
      *  Default constructor with data provided which would be used to create the
      * respective vulkan buffer and memory.
      *
-     *  @param data Non-zero-sized vector of data that will be used by the tensor
+     *  @param data Non-zero-sized vector of data that will be used by the
+     * tensor
      *  @param tensorType Type for the tensor which is of type TensorTypes
      */
     Tensor(const std::vector<float>& data,
@@ -829,24 +831,30 @@ class Tensor
                         bool createBarrier);
 
     /**
-     * Records a copy from the internal staging memory to the device memory using an optional barrier to wait for the operation. This function would only be relevant for kp::Tensors of type eDevice.
+     * Records a copy from the internal staging memory to the device memory
+     * using an optional barrier to wait for the operation. This function would
+     * only be relevant for kp::Tensors of type eDevice.
      *
      * @param commandBuffer Vulkan Command Buffer to record the commands into
      * @param createBarrier Whether to create a barrier that ensures the data is
      * copied before further operations. Default is true.
      */
-    void recordCopyFromStagingToDevice(std::shared_ptr<vk::CommandBuffer> commandBuffer,
-                        bool createBarrier);
+    void recordCopyFromStagingToDevice(
+      std::shared_ptr<vk::CommandBuffer> commandBuffer,
+      bool createBarrier);
 
     /**
-     * Records a copy from the internal device memory to the staging memory using an optional barrier to wait for the operation. This function would only be relevant for kp::Tensors of type eDevice.
+     * Records a copy from the internal device memory to the staging memory
+     * using an optional barrier to wait for the operation. This function would
+     * only be relevant for kp::Tensors of type eDevice.
      *
      * @param commandBuffer Vulkan Command Buffer to record the commands into
      * @param createBarrier Whether to create a barrier that ensures the data is
      * copied before further operations. Default is true.
      */
-    void recordCopyFromDeviceToStaging(std::shared_ptr<vk::CommandBuffer> commandBuffer,
-                        bool createBarrier);
+    void recordCopyFromDeviceToStaging(
+      std::shared_ptr<vk::CommandBuffer> commandBuffer,
+      bool createBarrier);
 
     /**
      * Records the buffer memory barrier into the command buffer which
@@ -908,9 +916,17 @@ class Tensor
     bool mIsInit = false;
 
     void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer
-    void createBuffer(std::shared_ptr<vk::Buffer> buffer, vk::BufferUsageFlags bufferUsageFlags);
-    void allocateBindMemory(std::shared_ptr<vk::Buffer> buffer, std::shared_ptr<vk::DeviceMemory> memory, vk::MemoryPropertyFlags memoryPropertyFlags);
-    void copyBuffer(std::shared_ptr<vk::CommandBuffer> commandBuffer, std::shared_ptr<vk::Buffer> bufferFrom, std::shared_ptr<vk::Buffer> bufferTo, vk::DeviceSize bufferSize, vk::BufferCopy copyRegion, bool createBarrier);
+    void createBuffer(std::shared_ptr<vk::Buffer> buffer,
+                      vk::BufferUsageFlags bufferUsageFlags);
+    void allocateBindMemory(std::shared_ptr<vk::Buffer> buffer,
+                            std::shared_ptr<vk::DeviceMemory> memory,
+                            vk::MemoryPropertyFlags memoryPropertyFlags);
+    void copyBuffer(std::shared_ptr<vk::CommandBuffer> commandBuffer,
+                    std::shared_ptr<vk::Buffer> bufferFrom,
+                    std::shared_ptr<vk::Buffer> bufferTo,
+                    vk::DeviceSize bufferSize,
+                    vk::BufferCopy copyRegion,
+                    bool createBarrier);
 
     // Private util functions
     vk::BufferUsageFlags getPrimaryBufferUsageFlags();
@@ -949,13 +965,11 @@ class OpBase
      * @param device Vulkan logical device for passing to Algorithm
      * @param commandBuffer Vulkan Command Buffer to record commands into
      * @param tensors Tensors that are to be used in this operation
-     * @param freeTensors Whether operation manages the memory of the Tensors
      */
     OpBase(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
            std::shared_ptr<vk::Device> device,
            std::shared_ptr<vk::CommandBuffer> commandBuffer,
-           std::vector<std::shared_ptr<Tensor>>& tensors,
-           bool freeTensors)
+           std::vector<std::shared_ptr<Tensor>>& tensors)
     {
         SPDLOG_DEBUG("Compute OpBase constructor with params");
 
@@ -963,14 +977,12 @@ class OpBase
         this->mDevice = device;
         this->mCommandBuffer = commandBuffer;
         this->mTensors = tensors;
-        this->mFreeTensors = freeTensors;
     }
 
     /**
      * Default destructor for OpBase class. This OpBase destructor class should
      * always be called to destroy and free owned resources unless it is
-     * intended to destroy the resources in the parent class. This can be done
-     * by passing the mFreeTensors=false.
+     * intended to destroy the resources in the parent class.
      */
     virtual ~OpBase()
     {
@@ -1231,72 +1243,6 @@ class Sequence
 
 } // End namespace kp
 
-namespace kp {
-
-/**
-    Operation that creates tensor and manages the memory of the components
-   created
-*/
-class OpTensorCreate : public OpBase
-{
-  public:
-    OpTensorCreate();
-
-    /**
-     * Default constructor with parameters that provides the bare minimum
-     * requirements for the operations to be able to create and manage their
-     * sub-components.
-     *
-     * @param physicalDevice Vulkan physical device used to find device queues
-     * @param device Vulkan logical device for passing to Algorithm
-     * @param commandBuffer Vulkan Command Buffer to record commands into
-     * @param tensors Tensors that will be used to create in operation.
-     * @param freeTensors Whether operation manages the memory of the Tensors
-     */
-    OpTensorCreate(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
-                   std::shared_ptr<vk::Device> device,
-                   std::shared_ptr<vk::CommandBuffer> commandBuffer,
-                   std::vector<std::shared_ptr<Tensor>> tensors);
-
-    /**
-     * Default destructor which in this case expects the parent class to free
-     * the tensors
-     */
-    ~OpTensorCreate() override;
-
-    /**
-     * In charge of initialising the primary Tensor as well as the staging
-     * tensor as required. It will only initialise a staging tensor if the
-     * Primary tensor is of type Device. For staging tensors it performs a 
-     * mapDataIntoHostMemory which would perform immediately as opposed to 
-     * on sequence eval/submission.
-     */
-    void init() override;
-
-    /**
-     * Record runs the core actions to create the tensors. For device tensors
-     * it records a copyCommand to move the data from the staging tensor to the 
-     * device tensor. The mapping for staging tensors happens in the init function
-     * not in the record function.
-     */
-    void record() override;
-
-    /**
-     * Does not perform any preEval commands.
-     */
-    virtual void preEval() override;
-
-    /**
-     * Performs a copy back into the main tensor to ensure that the data
-     * contained is the one that is now being stored in the GPU.
-     */
-    virtual void postEval() override;
-
-  private:
-};
-
-} // End namespace kp
-
 #define KP_DEFAULT_SESSION "DEFAULT"
 
 namespace kp {
@@ -1520,8 +1466,8 @@ class Manager
     /**
      * Function that simplifies the common workflow of tensor creation and
      * initialization. It will take the constructor parameters for a Tensor
-     * and will will us it to create a new Tensor and then create it using
-     * the OpCreateTensor command.
+     * and will will us it to create a new Tensor and then create it. The
+     * tensor memory will then be managed and owned by the manager.
      *
      * @param data The data to initialize the tensor with
      * @param tensorType The type of tensor to initialize
@@ -1531,17 +1477,49 @@ class Manager
       const std::vector<float>& data,
       Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
     {
-        SPDLOG_DEBUG("Kompute Manager createInitTensor triggered");
+        SPDLOG_DEBUG("Kompute Manager buildTensor triggered");
 
         SPDLOG_DEBUG("Kompute Manager creating new tensor shared ptr");
         std::shared_ptr<Tensor> tensor =
           std::make_shared<Tensor>(kp::Tensor(data, tensorType));
 
-        this->evalOpDefault<OpTensorCreate>({ tensor });
+        tensor->init(this->mPhysicalDevice, this->mDevice);
+        if (tensor->tensorType() != Tensor::TensorTypes::eStorage) {
+            tensor->mapDataIntoHostMemory();
+        }
+        this->mManagedTensors.insert(tensor);
 
         return tensor;
     }
 
+    /**
+     * Function that simplifies the common workflow of tensor initialisation. It will take the constructor parameters for a Tensor and will will us it to create a new Tensor. The tensor memory will then be managed and owned by the manager.
+     *
+     * @param data The data to initialize the tensor with
+     * @param tensorType The type of tensor to initialize
+     * @returns Initialized Tensor with memory Syncd to GPU device
+     */
+    void rebuildTensors(std::vector<std::shared_ptr<kp::Tensor>> tensors)
+    {
+        SPDLOG_DEBUG("Kompute Manager rebuildTensors triggered");
+        for (std::shared_ptr<Tensor> tensor : tensors) {
+
+            if (tensor->isInit()) {
+                tensor->freeMemoryDestroyGPUResources();
+            }
+
+            tensor->init(this->mPhysicalDevice, this->mDevice);
+            if (tensor->tensorType() != Tensor::TensorTypes::eStorage) {
+                tensor->mapDataIntoHostMemory();
+            }
+
+            std::set<std::shared_ptr<Tensor>>::iterator it = this->mManagedTensors.find(tensor);
+            if (it == this->mManagedTensors.end()) {
+                this->mManagedTensors.insert(tensor);
+            }
+        }
+    }
+
   private:
     // -------------- OPTIONALLY OWNED RESOURCES
     std::shared_ptr<vk::Instance> mInstance = nullptr;
@@ -1552,6 +1530,8 @@ class Manager
     bool mFreeDevice = false;
 
     // -------------- ALWAYS OWNED RESOURCES
+    std::set<std::shared_ptr<Tensor>> mManagedTensors;
+
     std::unordered_map<std::string, std::shared_ptr<Sequence>>
       mManagedSequences;
 

From 984709a4e7f335e180b3c0b5cfdf5bb2bfac0a1a Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Mon, 8 Feb 2021 19:17:52 +0000
Subject: [PATCH 04/27] Removd opcreatetensor from docs

---
 docs/overview/reference.rst | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/docs/overview/reference.rst b/docs/overview/reference.rst
index 65721fb8..8b6160eb 100644
--- a/docs/overview/reference.rst
+++ b/docs/overview/reference.rst
@@ -86,16 +86,6 @@ The kp::OpMult operation is a sample implementation of the kp::OpAlgoBase class.
 .. doxygenclass:: kp::OpMult
    :members:
 
-OpTensorCreate
--------
-
-The kp::OpTensorCreate is a tensor only operations which initialises a kp::Tensor by creating the respective vk::Buffer and vk::Memory, as well as transferring the local data into the GPU.
-
-.. image:: ../images/kompute-vulkan-architecture-opcreatetensor.jpg
-   :width: 100%
-
-.. doxygenclass:: kp::OpTensorCreate
-   :members:
 
 OpTensorCopy
 -------

From f62e353f4a5576aa98ac35405d27a2daa85087f5 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Mon, 8 Feb 2021 19:39:58 +0000
Subject: [PATCH 05/27] Removed persistent anonymous sequences

---
 src/Manager.cpp                 | 3 +--
 src/include/kompute/Manager.hpp | 9 +++------
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/Manager.cpp b/src/Manager.cpp
index 11d11a26..1db452d5 100755
--- a/src/Manager.cpp
+++ b/src/Manager.cpp
@@ -143,8 +143,7 @@ Manager::createManagedSequence(std::string sequenceName, uint32_t queueIndex)
     if (sequenceName.empty()) {
         this->mCurrentSequenceIndex++;
         this->mManagedSequences.insert(
-          { KP_DEFAULT_SESSION + std::to_string(this->mCurrentSequenceIndex),
-            sq });
+          { KP_DEFAULT_SESSION, sq });
     } else {
         // TODO: Check if sequence doesn't already exist
         this->mManagedSequences.insert({ sequenceName, sq });
diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp
index 973a0039..d7a08c49 100644
--- a/src/include/kompute/Manager.hpp
+++ b/src/include/kompute/Manager.hpp
@@ -126,8 +126,7 @@ class Manager
         SPDLOG_DEBUG("Kompute Manager evalOp Default triggered");
         this->mCurrentSequenceIndex++;
         this->evalOp<T>(tensors,
-                        KP_DEFAULT_SESSION +
-                          std::to_string(this->mCurrentSequenceIndex),
+                        KP_DEFAULT_SESSION,
                         std::forward<TArgs>(params)...);
     }
 
@@ -179,8 +178,7 @@ class Manager
         SPDLOG_DEBUG("Kompute Manager evalOpAsyncDefault triggered");
         this->mCurrentSequenceIndex++;
         this->evalOpAsync<T>(tensors,
-                             KP_DEFAULT_SESSION +
-                               std::to_string(this->mCurrentSequenceIndex),
+                             KP_DEFAULT_SESSION,
                              std::forward<TArgs>(params)...);
     }
 
@@ -222,8 +220,7 @@ class Manager
     void evalOpAwaitDefault(uint64_t waitFor = UINT64_MAX)
     {
         SPDLOG_DEBUG("Kompute Manager evalOpAwaitDefault triggered");
-        this->evalOpAwait(KP_DEFAULT_SESSION +
-                            std::to_string(this->mCurrentSequenceIndex),
+        this->evalOpAwait(KP_DEFAULT_SESSION,
                           waitFor);
     }
 

From aa75fdae47e128395f6f6c0ae7220e2ca73d105c Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Mon, 8 Feb 2021 19:43:50 +0000
Subject: [PATCH 06/27] format

---
 single_include/kompute/Kompute.hpp  | 26 ++++++++++++--------------
 src/Manager.cpp                     |  6 +++---
 src/Tensor.cpp                      |  6 ++++--
 src/include/kompute/Manager.hpp     | 23 ++++++++++++-----------
 test/TestManager.cpp                |  6 ++++--
 test/TestMultipleAlgoExecutions.cpp |  6 ++++--
 6 files changed, 39 insertions(+), 34 deletions(-)
 mode change 100755 => 100644 src/Manager.cpp

diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index 62037c91..b63b766a 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -697,8 +697,8 @@ static const unsigned int shaders_glsl_logisticregression_comp_spv_len = 4920;
 }
 #endif // define SHADEROP_SHADERLOGISTICREGRESSION_HPP
 
-#include <unordered_map>
 #include <set>
+#include <unordered_map>
 
 #define KP_MAX_DIM_SIZE 1
 
@@ -1361,10 +1361,8 @@ class Manager
     {
         SPDLOG_DEBUG("Kompute Manager evalOp Default triggered");
         this->mCurrentSequenceIndex++;
-        this->evalOp<T>(tensors,
-                        KP_DEFAULT_SESSION +
-                          std::to_string(this->mCurrentSequenceIndex),
-                        std::forward<TArgs>(params)...);
+        this->evalOp<T>(
+          tensors, KP_DEFAULT_SESSION, std::forward<TArgs>(params)...);
     }
 
     /**
@@ -1414,10 +1412,8 @@ class Manager
     {
         SPDLOG_DEBUG("Kompute Manager evalOpAsyncDefault triggered");
         this->mCurrentSequenceIndex++;
-        this->evalOpAsync<T>(tensors,
-                             KP_DEFAULT_SESSION +
-                               std::to_string(this->mCurrentSequenceIndex),
-                             std::forward<TArgs>(params)...);
+        this->evalOpAsync<T>(
+          tensors, KP_DEFAULT_SESSION, std::forward<TArgs>(params)...);
     }
 
     /**
@@ -1458,9 +1454,7 @@ class Manager
     void evalOpAwaitDefault(uint64_t waitFor = UINT64_MAX)
     {
         SPDLOG_DEBUG("Kompute Manager evalOpAwaitDefault triggered");
-        this->evalOpAwait(KP_DEFAULT_SESSION +
-                            std::to_string(this->mCurrentSequenceIndex),
-                          waitFor);
+        this->evalOpAwait(KP_DEFAULT_SESSION, waitFor);
     }
 
     /**
@@ -1493,7 +1487,10 @@ class Manager
     }
 
     /**
-     * Function that simplifies the common workflow of tensor initialisation. It will take the constructor parameters for a Tensor and will will us it to create a new Tensor. The tensor memory will then be managed and owned by the manager.
+     * Function that simplifies the common workflow of tensor initialisation. It
+     * will take the constructor parameters for a Tensor and will will us it to
+     * create a new Tensor. The tensor memory will then be managed and owned by
+     * the manager.
      *
      * @param data The data to initialize the tensor with
      * @param tensorType The type of tensor to initialize
@@ -1513,7 +1510,8 @@ class Manager
                 tensor->mapDataIntoHostMemory();
             }
 
-            std::set<std::shared_ptr<Tensor>>::iterator it = this->mManagedTensors.find(tensor);
+            std::set<std::shared_ptr<Tensor>>::iterator it =
+              this->mManagedTensors.find(tensor);
             if (it == this->mManagedTensors.end()) {
                 this->mManagedTensors.insert(tensor);
             }
diff --git a/src/Manager.cpp b/src/Manager.cpp
old mode 100755
new mode 100644
index 1db452d5..98d07e49
--- a/src/Manager.cpp
+++ b/src/Manager.cpp
@@ -62,7 +62,8 @@ Manager::~Manager()
         SPDLOG_DEBUG("Kompute Manager explicitly freeing tensors");
         for (const std::shared_ptr<Tensor>& tensor : this->mManagedTensors) {
             if (!tensor->isInit()) {
-                SPDLOG_ERROR("Kompute Manager attempted to free managed tensor but not tensor is not initialised");
+                SPDLOG_ERROR("Kompute Manager attempted to free managed tensor "
+                             "but not tensor is not initialised");
             }
             tensor->freeMemoryDestroyGPUResources();
         }
@@ -142,8 +143,7 @@ Manager::createManagedSequence(std::string sequenceName, uint32_t queueIndex)
 
     if (sequenceName.empty()) {
         this->mCurrentSequenceIndex++;
-        this->mManagedSequences.insert(
-          { KP_DEFAULT_SESSION, sq });
+        this->mManagedSequences.insert({ KP_DEFAULT_SESSION, sq });
     } else {
         // TODO: Check if sequence doesn't already exist
         this->mManagedSequences.insert({ sequenceName, sq });
diff --git a/src/Tensor.cpp b/src/Tensor.cpp
index 7400dfff..a1ba1544 100644
--- a/src/Tensor.cpp
+++ b/src/Tensor.cpp
@@ -232,7 +232,8 @@ Tensor::mapDataFromHostMemory()
     } else if (this->mTensorType == TensorTypes::eDevice) {
         hostVisibleMemory = this->mStagingMemory;
     } else {
-        SPDLOG_WARN("Kompute Tensor mapping data not supported on storage tensor");
+        SPDLOG_WARN(
+          "Kompute Tensor mapping data not supported on storage tensor");
         return;
     }
 
@@ -258,7 +259,8 @@ Tensor::mapDataIntoHostMemory()
     } else if (this->mTensorType == TensorTypes::eDevice) {
         hostVisibleMemory = this->mStagingMemory;
     } else {
-        SPDLOG_WARN("Kompute Tensor mapping data not supported on storage tensor");
+        SPDLOG_WARN(
+          "Kompute Tensor mapping data not supported on storage tensor");
         return;
     }
 
diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp
index d7a08c49..758206b9 100644
--- a/src/include/kompute/Manager.hpp
+++ b/src/include/kompute/Manager.hpp
@@ -1,7 +1,7 @@
 #pragma once
 
-#include <unordered_map>
 #include <set>
+#include <unordered_map>
 
 #include "kompute/Core.hpp"
 
@@ -125,9 +125,8 @@ class Manager
     {
         SPDLOG_DEBUG("Kompute Manager evalOp Default triggered");
         this->mCurrentSequenceIndex++;
-        this->evalOp<T>(tensors,
-                        KP_DEFAULT_SESSION,
-                        std::forward<TArgs>(params)...);
+        this->evalOp<T>(
+          tensors, KP_DEFAULT_SESSION, std::forward<TArgs>(params)...);
     }
 
     /**
@@ -177,9 +176,8 @@ class Manager
     {
         SPDLOG_DEBUG("Kompute Manager evalOpAsyncDefault triggered");
         this->mCurrentSequenceIndex++;
-        this->evalOpAsync<T>(tensors,
-                             KP_DEFAULT_SESSION,
-                             std::forward<TArgs>(params)...);
+        this->evalOpAsync<T>(
+          tensors, KP_DEFAULT_SESSION, std::forward<TArgs>(params)...);
     }
 
     /**
@@ -220,8 +218,7 @@ class Manager
     void evalOpAwaitDefault(uint64_t waitFor = UINT64_MAX)
     {
         SPDLOG_DEBUG("Kompute Manager evalOpAwaitDefault triggered");
-        this->evalOpAwait(KP_DEFAULT_SESSION,
-                          waitFor);
+        this->evalOpAwait(KP_DEFAULT_SESSION, waitFor);
     }
 
     /**
@@ -254,7 +251,10 @@ class Manager
     }
 
     /**
-     * Function that simplifies the common workflow of tensor initialisation. It will take the constructor parameters for a Tensor and will will us it to create a new Tensor. The tensor memory will then be managed and owned by the manager.
+     * Function that simplifies the common workflow of tensor initialisation. It
+     * will take the constructor parameters for a Tensor and will will us it to
+     * create a new Tensor. The tensor memory will then be managed and owned by
+     * the manager.
      *
      * @param data The data to initialize the tensor with
      * @param tensorType The type of tensor to initialize
@@ -274,7 +274,8 @@ class Manager
                 tensor->mapDataIntoHostMemory();
             }
 
-            std::set<std::shared_ptr<Tensor>>::iterator it = this->mManagedTensors.find(tensor);
+            std::set<std::shared_ptr<Tensor>>::iterator it =
+              this->mManagedTensors.find(tensor);
             if (it == this->mManagedTensors.end()) {
                 this->mManagedTensors.insert(tensor);
             }
diff --git a/test/TestManager.cpp b/test/TestManager.cpp
index 3e1db7b1..d822a13d 100644
--- a/test/TestManager.cpp
+++ b/test/TestManager.cpp
@@ -17,7 +17,8 @@ TEST(TestManager, EndToEndOpMultFlow)
 
     mgr.rebuildTensors({ tensorOutput });
 
-    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorLHS, tensorRHS, tensorOutput });
+    mgr.evalOpDefault<kp::OpTensorSyncDevice>(
+      { tensorLHS, tensorRHS, tensorOutput });
 
     mgr.evalOpDefault<kp::OpMult>({ tensorLHS, tensorRHS, tensorOutput });
 
@@ -105,7 +106,8 @@ TEST(TestManager, TestMultipleTensorsAtOnce)
 
         sq->begin();
 
-        sq->record<kp::OpTensorSyncDevice>({ tensorLHS, tensorRHS, tensorOutput });
+        sq->record<kp::OpTensorSyncDevice>(
+          { tensorLHS, tensorRHS, tensorOutput });
 
         sq->record<kp::OpMult>({ tensorLHS, tensorRHS, tensorOutput });
 
diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp
index 9d696d55..c98ba178 100644
--- a/test/TestMultipleAlgoExecutions.cpp
+++ b/test/TestMultipleAlgoExecutions.cpp
@@ -248,7 +248,8 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrOpCreate)
 
     mgr.rebuildTensors({ tensorInA, tensorInB, tensorOut });
 
-    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorInA, tensorInB, tensorOut });
+    mgr.evalOpDefault<kp::OpTensorSyncDevice>(
+      { tensorInA, tensorInB, tensorOut });
 
     std::string shader(R"(
         // The version to use 
@@ -306,7 +307,8 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate)
         }
       )");
 
-    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorInA, tensorInB, tensorOut });
+    mgr.evalOpDefault<kp::OpTensorSyncDevice>(
+      { tensorInA, tensorInB, tensorOut });
 
     mgr.evalOpDefault<kp::OpAlgoBase>(
       { tensorInA, tensorInB, tensorOut },

From 65cb1b7582273ea01cca5a67d5fa85b1b78bcf20 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Mon, 8 Feb 2021 19:44:07 +0000
Subject: [PATCH 07/27] Removed destroy tensor function to avoid error logs in
 test

---
 test/TestOpTensorCreate.cpp | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/test/TestOpTensorCreate.cpp b/test/TestOpTensorCreate.cpp
index a57142cf..c1dd200f 100644
--- a/test/TestOpTensorCreate.cpp
+++ b/test/TestOpTensorCreate.cpp
@@ -5,20 +5,19 @@
 
 TEST(TestOpTensorCreate, CreateSingleTensorSingleOp)
 {
-
-    kp::Manager mgr;
-
     std::vector<float> testVecA{ 9, 8, 7 };
-
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
 
-    mgr.rebuildTensors({ tensorA });
+    {
+        kp::Manager mgr;
 
-    EXPECT_TRUE(tensorA->isInit());
+        mgr.rebuildTensors({ tensorA });
 
-    EXPECT_EQ(tensorA->data(), testVecA);
+        EXPECT_TRUE(tensorA->isInit());
+
+        EXPECT_EQ(tensorA->data(), testVecA);
+    }
 
-    tensorA->freeMemoryDestroyGPUResources();
     EXPECT_FALSE(tensorA->isInit());
 }
 

From aa25f980d6c8362f0a8c8ba4a12f1fa28b0c981a Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Mon, 8 Feb 2021 21:41:48 +0000
Subject: [PATCH 08/27] Added OpTensorSyncDevice by default on manager
 buildtensor functions with ability to disable with parameter

---
 src/include/kompute/Manager.hpp | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp
index 758206b9..b20fa310 100644
--- a/src/include/kompute/Manager.hpp
+++ b/src/include/kompute/Manager.hpp
@@ -7,6 +7,8 @@
 
 #include "kompute/Sequence.hpp"
 
+#include "kompute/operations/OpTensorSyncDevice.hpp"
+
 #define KP_DEFAULT_SESSION "DEFAULT"
 
 namespace kp {
@@ -229,11 +231,13 @@ class Manager
      *
      * @param data The data to initialize the tensor with
      * @param tensorType The type of tensor to initialize
+     * @param syncDataToGPU Whether to sync the data to GPU memory
      * @returns Initialized Tensor with memory Syncd to GPU device
      */
     std::shared_ptr<Tensor> buildTensor(
       const std::vector<float>& data,
-      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
+      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice,
+      bool syncDataToGPU = true)
     {
         SPDLOG_DEBUG("Kompute Manager buildTensor triggered");
 
@@ -242,11 +246,13 @@ class Manager
           std::make_shared<Tensor>(kp::Tensor(data, tensorType));
 
         tensor->init(this->mPhysicalDevice, this->mDevice);
-        if (tensor->tensorType() != Tensor::TensorTypes::eStorage) {
-            tensor->mapDataIntoHostMemory();
+
+        if (syncDataToGPU) {
+            this->evalOpDefault<OpTensorSyncDevice>({tensor});
         }
         this->mManagedTensors.insert(tensor);
 
+
         return tensor;
     }
 
@@ -258,9 +264,10 @@ class Manager
      *
      * @param data The data to initialize the tensor with
      * @param tensorType The type of tensor to initialize
+     * @param syncDataToGPU Whether to sync the data to GPU memory
      * @returns Initialized Tensor with memory Syncd to GPU device
      */
-    void rebuildTensors(std::vector<std::shared_ptr<kp::Tensor>> tensors)
+    void rebuildTensors(std::vector<std::shared_ptr<kp::Tensor>> tensors, bool syncDataToGPU = true)
     {
         SPDLOG_DEBUG("Kompute Manager rebuildTensors triggered");
         for (std::shared_ptr<Tensor> tensor : tensors) {
@@ -270,9 +277,6 @@ class Manager
             }
 
             tensor->init(this->mPhysicalDevice, this->mDevice);
-            if (tensor->tensorType() != Tensor::TensorTypes::eStorage) {
-                tensor->mapDataIntoHostMemory();
-            }
 
             std::set<std::shared_ptr<Tensor>>::iterator it =
               this->mManagedTensors.find(tensor);
@@ -280,6 +284,10 @@ class Manager
                 this->mManagedTensors.insert(tensor);
             }
         }
+
+        if (syncDataToGPU) {
+            this->evalOpDefault<OpTensorSyncDevice>(tensors);
+        }
     }
 
   private:

From d7fe53eae6e5f9b37ec2c7e3a325bb634d1db5a1 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Mon, 8 Feb 2021 21:42:02 +0000
Subject: [PATCH 09/27] Updated tests to align with manager parameters update

---
 test/TestLogisticRegression.cpp         |  2 --
 test/TestManager.cpp                    | 14 --------------
 test/TestMultipleAlgoExecutions.cpp     | 17 +++++------------
 test/TestOpAlgoLoopsPassingData.cpp     |  2 +-
 test/TestOpShadersFromStringAndFile.cpp |  8 --------
 test/TestOpTensorCopy.cpp               | 12 +++---------
 test/TestOpTensorSync.cpp               |  4 ++--
 test/TestTensor.cpp                     |  1 -
 8 files changed, 11 insertions(+), 49 deletions(-)

diff --git a/test/TestLogisticRegression.cpp b/test/TestLogisticRegression.cpp
index e25b1416..a2b69505 100644
--- a/test/TestLogisticRegression.cpp
+++ b/test/TestLogisticRegression.cpp
@@ -33,7 +33,6 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegression)
         kp::Manager mgr;
 
         mgr.rebuildTensors(params);
-        mgr.evalOpDefault<kp::OpTensorSyncDevice>(params);
 
         std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
 
@@ -119,7 +118,6 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegressionManualCopy)
         kp::Manager mgr;
 
         mgr.rebuildTensors(params);
-        mgr.evalOpDefault<kp::OpTensorSyncDevice>(params);
 
         std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
 
diff --git a/test/TestManager.cpp b/test/TestManager.cpp
index d822a13d..2bd7fc47 100644
--- a/test/TestManager.cpp
+++ b/test/TestManager.cpp
@@ -17,9 +17,6 @@ TEST(TestManager, EndToEndOpMultFlow)
 
     mgr.rebuildTensors({ tensorOutput });
 
-    mgr.evalOpDefault<kp::OpTensorSyncDevice>(
-      { tensorLHS, tensorRHS, tensorOutput });
-
     mgr.evalOpDefault<kp::OpMult>({ tensorLHS, tensorRHS, tensorOutput });
 
     mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorOutput });
@@ -46,10 +43,6 @@ TEST(TestManager, OpMultSequenceFlow)
 
         sq->begin();
 
-        sq->record<kp::OpTensorSyncDevice>({ tensorLHS });
-        sq->record<kp::OpTensorSyncDevice>({ tensorRHS });
-        sq->record<kp::OpTensorSyncDevice>({ tensorOutput });
-
         sq->record<kp::OpMult>({ tensorLHS, tensorRHS, tensorOutput });
 
         sq->record<kp::OpTensorSyncLocal>({ tensorOutput });
@@ -106,9 +99,6 @@ TEST(TestManager, TestMultipleTensorsAtOnce)
 
         sq->begin();
 
-        sq->record<kp::OpTensorSyncDevice>(
-          { tensorLHS, tensorRHS, tensorOutput });
-
         sq->record<kp::OpMult>({ tensorLHS, tensorRHS, tensorOutput });
 
         sq->record<kp::OpTensorSyncLocal>({ tensorOutput });
@@ -127,10 +117,6 @@ TEST(TestManager, TestCreateInitTensor)
     std::shared_ptr<kp::Tensor> tensorA = mgr.buildTensor({ 0, 1, 2 });
     std::shared_ptr<kp::Tensor> tensorB = mgr.buildTensor({ 0, 0, 0 });
 
-    mgr.rebuildTensors({ tensorA, tensorB });
-
-    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorA, tensorB });
-
     mgr.evalOpDefault<kp::OpTensorCopy>({ tensorA, tensorB });
 
     mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorB });
diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp
index c98ba178..208f1f9c 100644
--- a/test/TestMultipleAlgoExecutions.cpp
+++ b/test/TestMultipleAlgoExecutions.cpp
@@ -27,8 +27,6 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord)
     {
         sq->begin();
 
-        sq->record<kp::OpTensorSyncDevice>({ tensorA });
-
         sq->record<kp::OpAlgoBase>(
           { tensorA }, std::vector<char>(shader.begin(), shader.end()));
         sq->record<kp::OpAlgoBase>(
@@ -60,7 +58,7 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords)
           pa[index] = pa[index] + 1;
       })");
 
-    mgr.rebuildTensors({ tensorA });
+    mgr.rebuildTensors({ tensorA }, false);
 
     std::shared_ptr<kp::Sequence> sqTensor = mgr.createManagedSequence();
 
@@ -123,8 +121,6 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
 
         sq->begin();
 
-        sq->record<kp::OpTensorSyncDevice>({ tensorA });
-
         sq->record<kp::OpAlgoBase>(
           { tensorA }, std::vector<char>(shader.begin(), shader.end()));
 
@@ -189,7 +185,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
           pa[index] = pa[index] + 1;
       })");
 
-    mgr.rebuildTensors({ tensorA });
+    mgr.rebuildTensors({ tensorA }, false);
 
     {
         std::shared_ptr<kp::Sequence> sq =
@@ -248,9 +244,6 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrOpCreate)
 
     mgr.rebuildTensors({ tensorInA, tensorInB, tensorOut });
 
-    mgr.evalOpDefault<kp::OpTensorSyncDevice>(
-      { tensorInA, tensorInB, tensorOut });
-
     std::string shader(R"(
         // The version to use 
         #version 450
@@ -284,9 +277,9 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate)
 
     kp::Manager mgr;
 
-    auto tensorInA = mgr.buildTensor({ 2.0, 4.0, 6.0 });
-    auto tensorInB = mgr.buildTensor({ 0.0, 1.0, 2.0 });
-    auto tensorOut = mgr.buildTensor({ 0.0, 0.0, 0.0 });
+    auto tensorInA = mgr.buildTensor({ 2.0, 4.0, 6.0 }, kp::Tensor::TensorTypes::eDevice, false);
+    auto tensorInB = mgr.buildTensor({ 0.0, 1.0, 2.0 }, kp::Tensor::TensorTypes::eDevice, false);
+    auto tensorOut = mgr.buildTensor({ 0.0, 0.0, 0.0 }, kp::Tensor::TensorTypes::eDevice, false);
 
     std::string shader(R"(
         // The version to use 
diff --git a/test/TestOpAlgoLoopsPassingData.cpp b/test/TestOpAlgoLoopsPassingData.cpp
index b04ef383..63bab299 100644
--- a/test/TestOpAlgoLoopsPassingData.cpp
+++ b/test/TestOpAlgoLoopsPassingData.cpp
@@ -30,7 +30,7 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies)
         }
     )");
 
-    mgr.rebuildTensors({ tensorA, tensorB });
+    mgr.rebuildTensors({ tensorA, tensorB }, false);
 
     {
         std::shared_ptr<kp::Sequence> sq =
diff --git a/test/TestOpShadersFromStringAndFile.cpp b/test/TestOpShadersFromStringAndFile.cpp
index 7b0db3de..7d73bd7f 100644
--- a/test/TestOpShadersFromStringAndFile.cpp
+++ b/test/TestOpShadersFromStringAndFile.cpp
@@ -28,8 +28,6 @@ TEST(TestOpAlgoBase, ShaderRawDataFromConstructor)
         }
     )");
 
-    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorA, tensorB });
-
     mgr.evalOpDefault<kp::OpAlgoBase>(
       { tensorA, tensorB }, std::vector<char>(shader.begin(), shader.end()));
 
@@ -47,8 +45,6 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromConstructor)
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
     mgr.rebuildTensors({ tensorA, tensorB });
 
-    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorA, tensorB });
-
     mgr.evalOpDefault<kp::OpAlgoBase>(
       { tensorA, tensorB },
       std::vector<char>(
@@ -71,8 +67,6 @@ TEST(TestOpAlgoBase, ShaderRawDataFromFile)
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
     mgr.rebuildTensors({ tensorA, tensorB });
 
-    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorA, tensorB });
-
     mgr.evalOpDefault<kp::OpAlgoBase>(
       { tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp");
 
@@ -90,8 +84,6 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromFile)
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
     mgr.rebuildTensors({ tensorA, tensorB });
 
-    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorA, tensorB });
-
     mgr.evalOpDefault<kp::OpAlgoBase>(
       { tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp.spv");
 
diff --git a/test/TestOpTensorCopy.cpp b/test/TestOpTensorCopy.cpp
index ae99bf17..ca4539d6 100644
--- a/test/TestOpTensorCopy.cpp
+++ b/test/TestOpTensorCopy.cpp
@@ -16,8 +16,6 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor)
 
     mgr.rebuildTensors({ tensorA, tensorB });
 
-    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorA, tensorB });
-
     EXPECT_TRUE(tensorA->isInit());
     EXPECT_TRUE(tensorB->isInit());
 
@@ -45,8 +43,6 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti)
 
     mgr.rebuildTensors({ tensorA, tensorB, tensorC });
 
-    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorA, tensorB });
-
     EXPECT_TRUE(tensorA->isInit());
     EXPECT_TRUE(tensorB->isInit());
     EXPECT_TRUE(tensorC->isInit());
@@ -74,7 +70,7 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor)
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(
       testVecB, kp::Tensor::TensorTypes::eHost) };
 
-    mgr.rebuildTensors({ tensorA, tensorB });
+    mgr.rebuildTensors({ tensorA, tensorB }, false);
 
     //  Only calling sync on device type tensor
     mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorA });
@@ -103,7 +99,7 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor)
       testVecA, kp::Tensor::TensorTypes::eHost) };
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
 
-    mgr.rebuildTensors({ tensorA, tensorB });
+    mgr.rebuildTensors({ tensorA, tensorB }, false);
 
     //  Only calling sync on device type tensor
     mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorB });
@@ -135,8 +131,6 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor)
 
     mgr.rebuildTensors({ tensorA, tensorB });
 
-    // Not calling OpTensorSyncDevice
-
     EXPECT_TRUE(tensorA->isInit());
     EXPECT_TRUE(tensorB->isInit());
 
@@ -159,7 +153,7 @@ TEST(TestOpTensorCopy, SingleTensorShouldFail)
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(
       testVecA, kp::Tensor::TensorTypes::eHost) };
 
-    mgr.rebuildTensors({ tensorA });
+    mgr.rebuildTensors({ tensorA }, false);
 
     EXPECT_TRUE(tensorA->isInit());
 
diff --git a/test/TestOpTensorSync.cpp b/test/TestOpTensorSync.cpp
index 3808941f..9080194a 100644
--- a/test/TestOpTensorSync.cpp
+++ b/test/TestOpTensorSync.cpp
@@ -13,7 +13,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemorySingleTensor)
 
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecPreA) };
 
-    mgr.rebuildTensors({ tensorA });
+    mgr.rebuildTensors({ tensorA }, false);
 
     EXPECT_TRUE(tensorA->isInit());
 
@@ -37,7 +37,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor)
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
     std::shared_ptr<kp::Tensor> tensorC{ new kp::Tensor({ 0, 0, 0 }) };
 
-    mgr.rebuildTensors({ tensorA, tensorB, tensorC });
+    mgr.rebuildTensors({ tensorA, tensorB, tensorC }, false);
 
     EXPECT_TRUE(tensorA->isInit());
     EXPECT_TRUE(tensorB->isInit());
diff --git a/test/TestTensor.cpp b/test/TestTensor.cpp
index 764b5fa9..7ceea1ba 100644
--- a/test/TestTensor.cpp
+++ b/test/TestTensor.cpp
@@ -24,7 +24,6 @@ TEST(TestTensor, CopyFromHostData)
     kp::Manager mgr;
 
     mgr.rebuildTensors({ tensorA, tensorB });
-    mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorA, tensorB });
 
     if (std::shared_ptr<kp::Sequence> sq =
           mgr.getOrCreateManagedSequence("new")) {

From 71cde2d5b2f68339e80aa87bf8fd09f05cc6f6b1 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Mon, 8 Feb 2021 21:42:14 +0000
Subject: [PATCH 10/27] Updated single include header

---
 single_include/kompute/Kompute.hpp | 125 +++++++++++++++--------------
 1 file changed, 65 insertions(+), 60 deletions(-)

diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index b63b766a..722a625a 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -1243,6 +1243,59 @@ class Sequence
 
 } // End namespace kp
 
+namespace kp {
+
+/**
+    Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
+*/
+class OpTensorSyncDevice : public OpBase
+{
+  public:
+    OpTensorSyncDevice();
+
+    /**
+     * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage.
+     *
+     * @param physicalDevice Vulkan physical device used to find device queues
+     * @param device Vulkan logical device for passing to Algorithm
+     * @param commandBuffer Vulkan Command Buffer to record commands into
+     * @param tensors Tensors that will be used to create in operation.
+     */
+    OpTensorSyncDevice(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
+                   std::shared_ptr<vk::Device> device,
+                   std::shared_ptr<vk::CommandBuffer> commandBuffer,
+                   std::vector<std::shared_ptr<Tensor>> tensors);
+
+    /**
+     * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
+     */
+    ~OpTensorSyncDevice() override;
+
+    /**
+     * Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
+     */
+    void init() override;
+
+    /**
+     * For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory.
+     */
+    void record() override;
+
+    /**
+     * Does not perform any preEval commands.
+     */
+    virtual void preEval() override;
+
+    /**
+     * Does not perform any postEval commands.
+     */
+    virtual void postEval() override;
+
+  private:
+};
+
+} // End namespace kp
+
 #define KP_DEFAULT_SESSION "DEFAULT"
 
 namespace kp {
@@ -1465,11 +1518,13 @@ class Manager
      *
      * @param data The data to initialize the tensor with
      * @param tensorType The type of tensor to initialize
+     * @param syncDataToGPU Whether to sync the data to GPU memory
      * @returns Initialized Tensor with memory Syncd to GPU device
      */
     std::shared_ptr<Tensor> buildTensor(
       const std::vector<float>& data,
-      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice)
+      Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice,
+      bool syncDataToGPU = true)
     {
         SPDLOG_DEBUG("Kompute Manager buildTensor triggered");
 
@@ -1478,8 +1533,9 @@ class Manager
           std::make_shared<Tensor>(kp::Tensor(data, tensorType));
 
         tensor->init(this->mPhysicalDevice, this->mDevice);
-        if (tensor->tensorType() != Tensor::TensorTypes::eStorage) {
-            tensor->mapDataIntoHostMemory();
+
+        if (syncDataToGPU) {
+            this->evalOpDefault<OpTensorSyncDevice>({tensor});
         }
         this->mManagedTensors.insert(tensor);
 
@@ -1494,9 +1550,10 @@ class Manager
      *
      * @param data The data to initialize the tensor with
      * @param tensorType The type of tensor to initialize
+     * @param syncDataToGPU Whether to sync the data to GPU memory
      * @returns Initialized Tensor with memory Syncd to GPU device
      */
-    void rebuildTensors(std::vector<std::shared_ptr<kp::Tensor>> tensors)
+    void rebuildTensors(std::vector<std::shared_ptr<kp::Tensor>> tensors, bool syncDataToGPU = true)
     {
         SPDLOG_DEBUG("Kompute Manager rebuildTensors triggered");
         for (std::shared_ptr<Tensor> tensor : tensors) {
@@ -1506,9 +1563,6 @@ class Manager
             }
 
             tensor->init(this->mPhysicalDevice, this->mDevice);
-            if (tensor->tensorType() != Tensor::TensorTypes::eStorage) {
-                tensor->mapDataIntoHostMemory();
-            }
 
             std::set<std::shared_ptr<Tensor>>::iterator it =
               this->mManagedTensors.find(tensor);
@@ -1516,6 +1570,10 @@ class Manager
                 this->mManagedTensors.insert(tensor);
             }
         }
+
+        if (syncDataToGPU) {
+            this->evalOpDefault<OpTensorSyncDevice>(tensors);
+        }
     }
 
   private:
@@ -1977,59 +2035,6 @@ class OpTensorCopy : public OpBase
 
 namespace kp {
 
-/**
-    Operation that syncs tensor's device by mapping local data into the device memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
-*/
-class OpTensorSyncDevice : public OpBase
-{
-  public:
-    OpTensorSyncDevice();
-
-    /**
-     * Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage.
-     *
-     * @param physicalDevice Vulkan physical device used to find device queues
-     * @param device Vulkan logical device for passing to Algorithm
-     * @param commandBuffer Vulkan Command Buffer to record commands into
-     * @param tensors Tensors that will be used to create in operation.
-     */
-    OpTensorSyncDevice(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
-                   std::shared_ptr<vk::Device> device,
-                   std::shared_ptr<vk::CommandBuffer> commandBuffer,
-                   std::vector<std::shared_ptr<Tensor>> tensors);
-
-    /**
-     * Default destructor. This class does not manage memory so it won't be expecting the parent to perform a release.
-     */
-    ~OpTensorSyncDevice() override;
-
-    /**
-     * Performs basic checks such as ensuring that there is at least one tensor provided with min memory of 1 element.
-     */
-    void init() override;
-
-    /**
-     * For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory.
-     */
-    void record() override;
-
-    /**
-     * Does not perform any preEval commands.
-     */
-    virtual void preEval() override;
-
-    /**
-     * Does not perform any postEval commands.
-     */
-    virtual void postEval() override;
-
-  private:
-};
-
-} // End namespace kp
-
-namespace kp {
-
 /**
     Operation that syncs tensor's local memory by mapping device data into the local CPU memory. For TensorTypes::eDevice it will use a record operation for the memory to be syncd into GPU memory which means that the operation will be done in sync with GPU commands. For TensorTypes::eStaging it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched. This operation won't have any effect on TensorTypes::eStaging.
 */

From 3547810476755ab02b6dac3dca7a959410b12e14 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Mon, 8 Feb 2021 21:42:32 +0000
Subject: [PATCH 11/27] reformat

---
 src/include/kompute/Manager.hpp     | 6 +++---
 test/TestMultipleAlgoExecutions.cpp | 9 ++++++---
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp
index b20fa310..ad3facd9 100644
--- a/src/include/kompute/Manager.hpp
+++ b/src/include/kompute/Manager.hpp
@@ -248,11 +248,10 @@ class Manager
         tensor->init(this->mPhysicalDevice, this->mDevice);
 
         if (syncDataToGPU) {
-            this->evalOpDefault<OpTensorSyncDevice>({tensor});
+            this->evalOpDefault<OpTensorSyncDevice>({ tensor });
         }
         this->mManagedTensors.insert(tensor);
 
-
         return tensor;
     }
 
@@ -267,7 +266,8 @@ class Manager
      * @param syncDataToGPU Whether to sync the data to GPU memory
      * @returns Initialized Tensor with memory Syncd to GPU device
      */
-    void rebuildTensors(std::vector<std::shared_ptr<kp::Tensor>> tensors, bool syncDataToGPU = true)
+    void rebuildTensors(std::vector<std::shared_ptr<kp::Tensor>> tensors,
+                        bool syncDataToGPU = true)
     {
         SPDLOG_DEBUG("Kompute Manager rebuildTensors triggered");
         for (std::shared_ptr<Tensor> tensor : tensors) {
diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp
index 208f1f9c..4d2a44a9 100644
--- a/test/TestMultipleAlgoExecutions.cpp
+++ b/test/TestMultipleAlgoExecutions.cpp
@@ -277,9 +277,12 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate)
 
     kp::Manager mgr;
 
-    auto tensorInA = mgr.buildTensor({ 2.0, 4.0, 6.0 }, kp::Tensor::TensorTypes::eDevice, false);
-    auto tensorInB = mgr.buildTensor({ 0.0, 1.0, 2.0 }, kp::Tensor::TensorTypes::eDevice, false);
-    auto tensorOut = mgr.buildTensor({ 0.0, 0.0, 0.0 }, kp::Tensor::TensorTypes::eDevice, false);
+    auto tensorInA = mgr.buildTensor(
+      { 2.0, 4.0, 6.0 }, kp::Tensor::TensorTypes::eDevice, false);
+    auto tensorInB = mgr.buildTensor(
+      { 0.0, 1.0, 2.0 }, kp::Tensor::TensorTypes::eDevice, false);
+    auto tensorOut = mgr.buildTensor(
+      { 0.0, 0.0, 0.0 }, kp::Tensor::TensorTypes::eDevice, false);
 
     std::string shader(R"(
         // The version to use 

From 667841d1366d22f16694cc7952d9fcc097f19f40 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Tue, 9 Feb 2021 19:17:38 +0000
Subject: [PATCH 12/27] Updated ccls to include python

---
 .ccls | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.ccls b/.ccls
index f215ea9d..ab626158 100644
--- a/.ccls
+++ b/.ccls
@@ -13,6 +13,7 @@
 -DDEBUG=1
 -DKOMPUTE_INCLUDE_FOR_SYNTAX
 
+-I/usr/include/python3.6/
 -I./python/pybind11/include/
 -I./external/Vulkan-Headers/include/
 -I./external/googletest/googletest/include/

From dead40c871206dacc7ae9aaffeee45cc844d2632 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Tue, 9 Feb 2021 19:17:48 +0000
Subject: [PATCH 13/27] Added python target

---
 Makefile | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Makefile b/Makefile
index 2f0b5e2f..da1df2cb 100644
--- a/Makefile
+++ b/Makefile
@@ -156,6 +156,11 @@ vs_run_tests: vs_build_tests
 	./build/test/$(VS_BUILD_TYPE)/test_kompute.exe --gtest_filter=$(FILTER_TESTS)
 
 
+#### PYTHONG ####
+
+test_python:
+	python -m pytest -s --log-cli-level=DEBUG -v python/test/
+
 ####### Run CI Commands #######
 
 # This command uses act to replicate github action

From 650975838c35e8786a05b02a62f33c77e0f0a4e0 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Tue, 9 Feb 2021 19:19:09 +0000
Subject: [PATCH 14/27] Updated python to align with new structure

---
 python/src/main.cpp                      | 15 ++++-----------
 python/test/test_array_multiplication.py |  2 +-
 python/test/test_kompute.py              | 20 ++++++++++++--------
 python/test/test_logistic_regression.py  |  2 +-
 4 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/python/src/main.cpp b/python/src/main.cpp
index 6e795fad..74f010f0 100644
--- a/python/src/main.cpp
+++ b/python/src/main.cpp
@@ -105,8 +105,6 @@ PYBIND11_MODULE(kp, m) {
         .def("is_init", &kp::Sequence::isInit, "Checks if the Sequence has been initialized")
         
         // record
-        .def("record_tensor_create", &kp::Sequence::record<kp::OpTensorCreate>,
-            "Records operation to create and initialise tensor GPU memory and buffer")
         .def("record_tensor_copy", &kp::Sequence::record<kp::OpTensorCopy>,
             "Records operation to copy one tensor to one or many tensors")
         .def("record_tensor_sync_device", &kp::Sequence::record<kp::OpTensorSyncDevice>,
@@ -161,7 +159,10 @@ PYBIND11_MODULE(kp, m) {
         .def("create_sequence", &kp::Manager::createManagedSequence,
                 py::arg("name") = "", py::arg("queueIndex") = 0, "Create a sequence with specific name and specified index of available queues")
         .def("build_tensor", &kp::Manager::buildTensor, 
-                py::arg("data"), py::arg("tensorType") = kp::Tensor::TensorTypes::eDevice,
+                py::arg("data"), py::arg("tensorType") = kp::Tensor::TensorTypes::eDevice, py::arg("syncDataToGPU") = true,
+                "Build and initialise tensor")
+        .def("rebuild_tensors", &kp::Manager::rebuildTensors, 
+                py::arg("tensors"), py::arg("syncDataToGPU") = true,
                 "Build and initialise tensor")
         
         // Await functions
@@ -172,8 +173,6 @@ PYBIND11_MODULE(kp, m) {
                 py::arg("waitFor") = UINT64_MAX, "Awaits for asynchronous operation on the last anonymous Sequence created")
         
         // eval default
-        .def("eval_tensor_create_def", &kp::Manager::evalOpDefault<kp::OpTensorCreate>,
-            "Evaluates operation to create and initialise tensor GPU memory and buffer with new anonymous Sequence")
         .def("eval_tensor_copy_def", &kp::Manager::evalOpDefault<kp::OpTensorCopy>,
             "Evaluates operation to copy one tensor to one or many tensors with new anonymous Sequence")
         .def("eval_tensor_sync_device_def", &kp::Manager::evalOpDefault<kp::OpTensorSyncDevice>,
@@ -209,8 +208,6 @@ PYBIND11_MODULE(kp, m) {
             "Evaluates operation to run left right out operation with custom shader with new anonymous Sequence")
         
         // eval
-        .def("eval_tensor_create", &kp::Manager::evalOp<kp::OpTensorCreate>,
-            "Evaluates operation to create and initialise tensor GPU memory and buffer with explicitly named Sequence")
         .def("eval_tensor_copy", &kp::Manager::evalOp<kp::OpTensorCopy>,
             "Evaluates operation to copy one tensor to one or many tensors with explicitly named Sequence")
         .def("eval_tensor_sync_device", &kp::Manager::evalOp<kp::OpTensorSyncDevice>,
@@ -249,8 +246,6 @@ PYBIND11_MODULE(kp, m) {
             "Evaluates operation to run left right out operation with custom shader with explicitly named Sequence")
         
         // eval async default
-        .def("eval_async_tensor_create_def", &kp::Manager::evalOpAsyncDefault<kp::OpTensorCreate>,
-            "Evaluates asynchronously operation to create and initialise tensor GPU memory and buffer with anonymous Sequence")
         .def("eval_async_tensor_copy_def", &kp::Manager::evalOpAsyncDefault<kp::OpTensorCopy>,
             "Evaluates asynchronously operation to copy one tensor to one or many tensors with anonymous Sequence")
         .def("eval_async_tensor_sync_device_def", &kp::Manager::evalOpAsyncDefault<kp::OpTensorSyncDevice>,
@@ -286,8 +281,6 @@ PYBIND11_MODULE(kp, m) {
             "Evaluates asynchronously operation to run left right out operation with custom shader with anonymous Sequence")
         
         // eval async
-        .def("eval_async_tensor_create", &kp::Manager::evalOpAsync<kp::OpTensorCreate>,
-            "Evaluates asynchronously operation to create and initialise tensor GPU memory and buffer with explicitly named Sequence")
         .def("eval_async_tensor_copy", &kp::Manager::evalOpAsync<kp::OpTensorCopy>,
             "Evaluates asynchronously operation to copy one tensor to one or many tensors with explicitly named Sequence")
         .def("eval_async_tensor_sync_device", &kp::Manager::evalOpAsync<kp::OpTensorSyncDevice>,
diff --git a/python/test/test_array_multiplication.py b/python/test/test_array_multiplication.py
index 337c7a5d..fac2ed41 100644
--- a/python/test/test_array_multiplication.py
+++ b/python/test/test_array_multiplication.py
@@ -14,7 +14,7 @@ def test_array_multiplication():
     tensor_out = kp.Tensor([0, 0, 0])
 
     # 3. Initialise the Kompute Tensors in the GPU
-    mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
+    mgr.rebuild_tensors([tensor_in_a, tensor_in_b, tensor_out])
 
     # 4. Define the multiplication shader code to run on the GPU
     @ps.python2shader
diff --git a/python/test/test_kompute.py b/python/test/test_kompute.py
index 9dee9df9..bec4b40e 100644
--- a/python/test/test_kompute.py
+++ b/python/test/test_kompute.py
@@ -2,6 +2,7 @@
 
 import kp
 import numpy as np
+import logging
 
 DIRNAME = os.path.dirname(os.path.abspath(__file__))
 
@@ -16,7 +17,7 @@ def test_opmult():
 
     mgr = kp.Manager()
 
-    mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
+    mgr.rebuild_tensors([tensor_in_a, tensor_in_b, tensor_out])
 
     mgr.eval_algo_mult_def([tensor_in_a, tensor_in_b, tensor_out])
 
@@ -52,7 +53,7 @@ def test_opalgobase_data():
         }
     """
 
-    mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
+    mgr.rebuild_tensors([tensor_in_a, tensor_in_b, tensor_out])
 
     mgr.eval_algo_str_def([tensor_in_a, tensor_in_b, tensor_out], shaderData)
 
@@ -75,7 +76,7 @@ def test_opalgobase_file():
 
     shaderFilePath = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp")
 
-    mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
+    mgr.rebuild_tensors([tensor_in_a, tensor_in_b, tensor_out])
 
     mgr.eval_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath)
 
@@ -93,7 +94,7 @@ def test_sequence():
     tensor_in_b = kp.Tensor([1, 2, 3])
     tensor_out = kp.Tensor([0, 0, 0])
 
-    mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
+    mgr.rebuild_tensors([tensor_in_a, tensor_in_b, tensor_out])
 
     shaderFilePath = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp")
     mgr.eval_async_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath)
@@ -118,7 +119,8 @@ def test_workgroup():
 
     tensor_a = kp.Tensor(np.zeros([16,8]))
     tensor_b = kp.Tensor(np.zeros([16,8]))
-    mgr.eval_tensor_create_def([tensor_a, tensor_b])
+
+    mgr.rebuild_tensors([tensor_a, tensor_b])
 
     shader_src = """
         #version 450
@@ -131,19 +133,21 @@ def test_workgroup():
 
         void main() {
             uint index   = gl_WorkGroupID.x*gl_NumWorkGroups.y + gl_WorkGroupID.y;
-            
+
             toutx[index] = gl_GlobalInvocationID.x;
             touty[index] = gl_GlobalInvocationID.y;
         }
     """
     shader_src = bytes(shader_src, encoding='utf8')
 
-    seq = mgr.create_sequence()
+    seq = mgr.create_sequence("new")
     seq.begin()
     seq.record_algo_data([tensor_a, tensor_b], shader_src, (16,8,1))
     seq.end()
     seq.eval()
-    
+
     mgr.eval_tensor_sync_local_def([tensor_a, tensor_b])
+
     assert np.all(tensor_a.numpy() == np.stack([np.arange(16)]*8, axis=1).ravel())
     assert np.all(tensor_b.numpy() == np.stack([np.arange(8)]*16, axis=0).ravel())
+
diff --git a/python/test/test_logistic_regression.py b/python/test/test_logistic_regression.py
index f8737588..1fbcd5bc 100644
--- a/python/test/test_logistic_regression.py
+++ b/python/test/test_logistic_regression.py
@@ -66,7 +66,7 @@ def compute_shader(
     params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i,
         tensor_w_out_j, tensor_b_in, tensor_b_out, tensor_l_out, tensor_m]
 
-    mgr.eval_tensor_create_def(params)
+    mgr.rebuild_tensors(params)
 
     # Create a managed sequence
     sq = mgr.create_sequence()

From b34984b7132301cd1e3827393879a6c0471f0d90 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Tue, 9 Feb 2021 19:19:39 +0000
Subject: [PATCH 15/27] Updating sequence to have isInit until init run

---
 src/Manager.cpp  | 20 ++++++++++----------
 src/Sequence.cpp |  4 +++-
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/Manager.cpp b/src/Manager.cpp
index 98d07e49..7885c2eb 100644
--- a/src/Manager.cpp
+++ b/src/Manager.cpp
@@ -58,6 +58,16 @@ Manager::~Manager()
         return;
     }
 
+    if (this->mManagedSequences.size()) {
+        SPDLOG_DEBUG("Kompute Manager explicitly running destructor for "
+                     "managed sequences");
+        for (const std::pair<std::string, std::shared_ptr<Sequence>>& sqPair :
+             this->mManagedSequences) {
+            sqPair.second->freeMemoryDestroyGPUResources();
+        }
+        this->mManagedSequences.clear();
+    }
+
     if (this->mManagedTensors.size()) {
         SPDLOG_DEBUG("Kompute Manager explicitly freeing tensors");
         for (const std::shared_ptr<Tensor>& tensor : this->mManagedTensors) {
@@ -70,16 +80,6 @@ Manager::~Manager()
         this->mManagedTensors.clear();
     }
 
-    if (this->mManagedSequences.size()) {
-        SPDLOG_DEBUG("Kompute Manager explicitly running destructor for "
-                     "managed sequences");
-        for (const std::pair<std::string, std::shared_ptr<Sequence>>& sqPair :
-             this->mManagedSequences) {
-            sqPair.second->freeMemoryDestroyGPUResources();
-        }
-        this->mManagedSequences.clear();
-    }
-
     if (this->mFreeDevice) {
         SPDLOG_INFO("Destroying device");
         this->mDevice->destroy(
diff --git a/src/Sequence.cpp b/src/Sequence.cpp
index 50ef8b0f..3c3b7b10 100644
--- a/src/Sequence.cpp
+++ b/src/Sequence.cpp
@@ -20,7 +20,7 @@ Sequence::Sequence(std::shared_ptr<vk::PhysicalDevice> physicalDevice,
     this->mDevice = device;
     this->mComputeQueue = computeQueue;
     this->mQueueIndex = queueIndex;
-    this->mIsInit = true;
+    this->mIsInit = false;
 }
 
 Sequence::~Sequence()
@@ -203,6 +203,8 @@ Sequence::isInit()
 void
 Sequence::freeMemoryDestroyGPUResources()
 {
+    SPDLOG_DEBUG("Kompute Sequence freeMemoryDestroyGPUResources called");
+
     if (!this->mIsInit) {
         SPDLOG_ERROR("Kompute Sequence freeMemoryDestroyGPUResources called "
                      "but Sequence is not initialized so there's no relevant "

From 39d02dd42829f16a6585205992c07117ed965fce Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Tue, 9 Feb 2021 19:20:21 +0000
Subject: [PATCH 16/27] Added test that  verifies memory violation sequence

---
 test/TestMultipleAlgoExecutions.cpp | 36 +++++++++++++++++++++++++++++
 test/TestSequence.cpp               |  1 +
 2 files changed, 37 insertions(+)

diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp
index 4d2a44a9..b6eaea54 100644
--- a/test/TestMultipleAlgoExecutions.cpp
+++ b/test/TestMultipleAlgoExecutions.cpp
@@ -314,3 +314,39 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate)
 
     EXPECT_EQ(tensorOut->data(), std::vector<float>({ 0.0, 4.0, 12.0 }));
 }
+
+TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope)
+{
+    std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 0, 0, 0 }) };
+
+    std::string shader(R"(
+      #version 450
+      layout (local_size_x = 1) in;
+      layout(set = 0, binding = 0) buffer a { float pa[]; };
+      void main() {
+          uint index = gl_GlobalInvocationID.x;
+          pa[index] = pa[index] + 1;
+      })");
+
+    {
+        std::shared_ptr<kp::Sequence> sq = nullptr;
+
+        {
+            kp::Manager mgr;
+
+            mgr.rebuildTensors({ tensorA });
+
+            sq = mgr.createManagedSequence();
+
+            sq->begin();
+            sq->record<kp::OpAlgoBase>(
+              { tensorA }, std::vector<char>(shader.begin(), shader.end()));
+            sq->end();
+
+            sq->eval();
+
+            mgr.evalOpDefault<kp::OpTensorSyncLocal>({ tensorA });
+        }
+    }
+    EXPECT_EQ(tensorA->data(), std::vector<float>({ 1, 1, 1 }));
+}
diff --git a/test/TestSequence.cpp b/test/TestSequence.cpp
index 882729dc..2d0a8a4b 100644
--- a/test/TestSequence.cpp
+++ b/test/TestSequence.cpp
@@ -39,3 +39,4 @@ TEST(TestSequence, SequenceDestructorViaManager)
 
     EXPECT_FALSE(sq->isInit());
 }
+

From 91252201cebf120dbd277980a293d11ed7056139 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Tue, 9 Feb 2021 19:20:30 +0000
Subject: [PATCH 17/27] updating single include

---
 single_include/kompute/Kompute.hpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index 722a625a..f3ebd990 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -1535,7 +1535,7 @@ class Manager
         tensor->init(this->mPhysicalDevice, this->mDevice);
 
         if (syncDataToGPU) {
-            this->evalOpDefault<OpTensorSyncDevice>({tensor});
+            this->evalOpDefault<OpTensorSyncDevice>({ tensor });
         }
         this->mManagedTensors.insert(tensor);
 
@@ -1553,7 +1553,8 @@ class Manager
      * @param syncDataToGPU Whether to sync the data to GPU memory
      * @returns Initialized Tensor with memory Syncd to GPU device
      */
-    void rebuildTensors(std::vector<std::shared_ptr<kp::Tensor>> tensors, bool syncDataToGPU = true)
+    void rebuildTensors(std::vector<std::shared_ptr<kp::Tensor>> tensors,
+                        bool syncDataToGPU = true)
     {
         SPDLOG_DEBUG("Kompute Manager rebuildTensors triggered");
         for (std::shared_ptr<Tensor> tensor : tensors) {

From 0d9a9758dac2dac212164be5a3255ac5274c01cd Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Tue, 9 Feb 2021 21:29:24 +0000
Subject: [PATCH 18/27] Renamed tensor and rebuild functions

---
 src/Manager.cpp                 | 44 +++++++------------
 src/include/kompute/Manager.hpp | 77 +++++++++++++++++++--------------
 2 files changed, 60 insertions(+), 61 deletions(-)

diff --git a/src/Manager.cpp b/src/Manager.cpp
index 7885c2eb..e7bb88f2 100644
--- a/src/Manager.cpp
+++ b/src/Manager.cpp
@@ -111,44 +111,32 @@ Manager::~Manager()
 }
 
 std::shared_ptr<Sequence>
-Manager::getOrCreateManagedSequence(std::string sequenceName)
+Manager::sequence(std::string sequenceName, uint32_t queueIndex)
 {
-    SPDLOG_DEBUG("Kompute Manager creating Sequence object");
+    SPDLOG_DEBUG("Kompute Manager sequence() with sequenceName: {} "
+                 "and queueIndex: {}",
+                 sequenceName,
+                 queueIndex);
+
+    std::shared_ptr<Sequence> sq = nullptr;
 
     std::unordered_map<std::string, std::shared_ptr<Sequence>>::iterator found =
       this->mManagedSequences.find(sequenceName);
 
     if (found == this->mManagedSequences.end()) {
-        return this->createManagedSequence(sequenceName);
-    } else {
-        return found->second;
-    }
-}
-
-std::shared_ptr<Sequence>
-Manager::createManagedSequence(std::string sequenceName, uint32_t queueIndex)
-{
+        std::shared_ptr<Sequence> sq =
+          std::make_shared<Sequence>(this->mPhysicalDevice,
+                                     this->mDevice,
+                                     this->mComputeQueues[queueIndex],
+                                     this->mComputeQueueFamilyIndices[queueIndex]);
+        sq->init();
 
-    SPDLOG_DEBUG("Kompute Manager createManagedSequence with sequenceName: {} "
-                 "and queueIndex: {}",
-                 sequenceName,
-                 queueIndex);
-
-    std::shared_ptr<Sequence> sq =
-      std::make_shared<Sequence>(this->mPhysicalDevice,
-                                 this->mDevice,
-                                 this->mComputeQueues[queueIndex],
-                                 this->mComputeQueueFamilyIndices[queueIndex]);
-    sq->init();
+        this->mManagedSequences.insert({ sequenceName, sq });
 
-    if (sequenceName.empty()) {
-        this->mCurrentSequenceIndex++;
-        this->mManagedSequences.insert({ KP_DEFAULT_SESSION, sq });
+        return sq;
     } else {
-        // TODO: Check if sequence doesn't already exist
-        this->mManagedSequences.insert({ sequenceName, sq });
+        return found->second;
     }
-    return sq;
 }
 
 void
diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp
index ad3facd9..5ef32ff6 100644
--- a/src/include/kompute/Manager.hpp
+++ b/src/include/kompute/Manager.hpp
@@ -64,23 +64,12 @@ class Manager
      *
      * @param sequenceName The name for the named sequence to be retrieved or
      * created
-     * @return Shared pointer to the manager owned sequence resource
-     */
-    std::shared_ptr<Sequence> getOrCreateManagedSequence(
-      std::string sequenceName);
-
-    /**
-     * Create a new managed Kompute sequence so it's available within the
-     * manager.
-     *
-     * @param sequenceName The name for the named sequence to be created, if
-     * empty then default indexed value is used
      * @param queueIndex The queue to use from the available queues
-     * @return Weak pointer to the manager owned sequence resource
+     * @return Shared pointer to the manager owned sequence resource
      */
-    std::shared_ptr<Sequence> createManagedSequence(
-      std::string sequenceName = "",
-      uint32_t queueIndex = 0);
+    std::shared_ptr<Sequence> sequence(
+            std::string sequenceName = KP_DEFAULT_SESSION,
+            uint32_t queueIndex = 0);
 
     /**
      * Function that evaluates operation against named sequence.
@@ -97,7 +86,7 @@ class Manager
     {
         SPDLOG_DEBUG("Kompute Manager evalOp triggered");
         std::shared_ptr<kp::Sequence> sq =
-          this->getOrCreateManagedSequence(sequenceName);
+          this->sequence(sequenceName);
 
         SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN");
         sq->begin();
@@ -147,7 +136,7 @@ class Manager
         SPDLOG_DEBUG("Kompute Manager evalOpAsync triggered");
 
         std::shared_ptr<kp::Sequence> sq =
-          this->getOrCreateManagedSequence(sequenceName);
+          this->sequence(sequenceName);
 
         SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN");
         sq->begin();
@@ -234,12 +223,12 @@ class Manager
      * @param syncDataToGPU Whether to sync the data to GPU memory
      * @returns Initialized Tensor with memory Syncd to GPU device
      */
-    std::shared_ptr<Tensor> buildTensor(
+    std::shared_ptr<Tensor> tensor(
       const std::vector<float>& data,
       Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice,
       bool syncDataToGPU = true)
     {
-        SPDLOG_DEBUG("Kompute Manager buildTensor triggered");
+        SPDLOG_DEBUG("Kompute Manager tensor triggered");
 
         SPDLOG_DEBUG("Kompute Manager creating new tensor shared ptr");
         std::shared_ptr<Tensor> tensor =
@@ -261,32 +250,54 @@ class Manager
      * create a new Tensor. The tensor memory will then be managed and owned by
      * the manager.
      *
-     * @param data The data to initialize the tensor with
-     * @param tensorType The type of tensor to initialize
+     * @param tensors Array of tensors to rebuild
      * @param syncDataToGPU Whether to sync the data to GPU memory
      * @returns Initialized Tensor with memory Syncd to GPU device
      */
-    void rebuildTensors(std::vector<std::shared_ptr<kp::Tensor>> tensors,
+    void rebuild(std::vector<std::shared_ptr<kp::Tensor>> tensors,
                         bool syncDataToGPU = true)
     {
-        SPDLOG_DEBUG("Kompute Manager rebuildTensors triggered");
+        SPDLOG_DEBUG("Kompute Manager rebuild triggered");
         for (std::shared_ptr<Tensor> tensor : tensors) {
 
-            if (tensor->isInit()) {
-                tensor->freeMemoryDestroyGPUResources();
-            }
+            // False syncData to run all tensors at once instead one by one
+            this->rebuild(tensor, false);
+        }
 
-            tensor->init(this->mPhysicalDevice, this->mDevice);
+        if (syncDataToGPU) {
+            this->evalOpDefault<OpTensorSyncDevice>(tensors);
+        }
+    }
 
-            std::set<std::shared_ptr<Tensor>>::iterator it =
-              this->mManagedTensors.find(tensor);
-            if (it == this->mManagedTensors.end()) {
-                this->mManagedTensors.insert(tensor);
-            }
+    /**
+     * Function that simplifies the common workflow of tensor initialisation. It
+     * will take the constructor parameters for a Tensor and will will us it to
+     * create a new Tensor. The tensor memory will then be managed and owned by
+     * the manager.
+     *
+     * @param tensors Single tensor to rebuild
+     * @param syncDataToGPU Whether to sync the data to GPU memory
+     * @returns Initialized Tensor with memory Syncd to GPU device
+     */
+    void rebuild(std::shared_ptr<kp::Tensor> tensor,
+                        bool syncDataToGPU = true)
+    {
+        SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered");
+
+        if (tensor->isInit()) {
+            tensor->freeMemoryDestroyGPUResources();
+        }
+
+        tensor->init(this->mPhysicalDevice, this->mDevice);
+
+        std::set<std::shared_ptr<Tensor>>::iterator it =
+          this->mManagedTensors.find(tensor);
+        if (it == this->mManagedTensors.end()) {
+            this->mManagedTensors.insert(tensor);
         }
 
         if (syncDataToGPU) {
-            this->evalOpDefault<OpTensorSyncDevice>(tensors);
+            this->evalOpDefault<OpTensorSyncDevice>({ tensor });
         }
     }
 

From 4baba3368179f84ba8634e2104deb7f21b785b64 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Tue, 9 Feb 2021 21:29:43 +0000
Subject: [PATCH 19/27] Updated tests to match new functions and added test to
 ensure seuqence is destroyed

---
 test/TestAsyncOperations.cpp            | 12 ++++----
 test/TestLogisticRegression.cpp         |  8 ++---
 test/TestManager.cpp                    | 28 ++++++++---------
 test/TestMultipleAlgoExecutions.cpp     | 40 ++++++++++++-------------
 test/TestOpAlgoLoopsPassingData.cpp     |  8 ++---
 test/TestOpShadersFromStringAndFile.cpp |  8 ++---
 test/TestOpTensorCopy.cpp               | 12 ++++----
 test/TestOpTensorCreate.cpp             | 22 +++++++-------
 test/TestOpTensorSync.cpp               |  4 +--
 test/TestSequence.cpp                   |  4 +--
 test/TestTensor.cpp                     |  4 +--
 11 files changed, 75 insertions(+), 75 deletions(-)

diff --git a/test/TestAsyncOperations.cpp b/test/TestAsyncOperations.cpp
index c43f5648..ddbcb659 100644
--- a/test/TestAsyncOperations.cpp
+++ b/test/TestAsyncOperations.cpp
@@ -49,7 +49,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
         inputsSyncB.push_back(std::make_shared<kp::Tensor>(kp::Tensor(data)));
     }
 
-    mgr.rebuildTensors(inputsSyncB);
+    mgr.rebuild(inputsSyncB);
 
     auto startSync = std::chrono::high_resolution_clock::now();
 
@@ -77,10 +77,10 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
         inputsAsyncB.push_back(std::make_shared<kp::Tensor>(kp::Tensor(data)));
     }
 
-    mgrAsync.rebuildTensors(inputsAsyncB);
+    mgrAsync.rebuild(inputsAsyncB);
 
     for (uint32_t i = 0; i < numParallel; i++) {
-        mgrAsync.createManagedSequence("async" + std::to_string(i), i);
+        mgrAsync.sequence("async" + std::to_string(i), i);
     }
 
     auto startAsync = std::chrono::high_resolution_clock::now();
@@ -146,10 +146,10 @@ TEST(TestAsyncOperations, TestManagerAsyncExecution)
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(data) };
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(data) };
 
-    mgr.createManagedSequence("asyncOne");
-    mgr.createManagedSequence("asyncTwo");
+    mgr.sequence("asyncOne");
+    mgr.sequence("asyncTwo");
 
-    mgr.rebuildTensors({ tensorA, tensorB });
+    mgr.rebuild({ tensorA, tensorB });
 
     mgr.evalOpAsync<kp::OpAlgoBase>(
       { tensorA }, "asyncOne", std::vector<char>(shader.begin(), shader.end()));
diff --git a/test/TestLogisticRegression.cpp b/test/TestLogisticRegression.cpp
index a2b69505..b974655a 100644
--- a/test/TestLogisticRegression.cpp
+++ b/test/TestLogisticRegression.cpp
@@ -32,9 +32,9 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegression)
     {
         kp::Manager mgr;
 
-        mgr.rebuildTensors(params);
+        mgr.rebuild(params);
 
-        std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
+        std::shared_ptr<kp::Sequence> sq = mgr.sequence();
 
         // Record op algo base
         sq->begin();
@@ -117,9 +117,9 @@ TEST(TestLogisticRegressionAlgorithm, TestMainLogisticRegressionManualCopy)
     {
         kp::Manager mgr;
 
-        mgr.rebuildTensors(params);
+        mgr.rebuild(params);
 
-        std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
+        std::shared_ptr<kp::Sequence> sq = mgr.sequence();
 
         // Record op algo base
         sq->begin();
diff --git a/test/TestManager.cpp b/test/TestManager.cpp
index 2bd7fc47..75494156 100644
--- a/test/TestManager.cpp
+++ b/test/TestManager.cpp
@@ -8,14 +8,14 @@ TEST(TestManager, EndToEndOpMultFlow)
     kp::Manager mgr;
 
     std::shared_ptr<kp::Tensor> tensorLHS{ new kp::Tensor({ 0, 1, 2 }) };
-    mgr.rebuildTensors({ tensorLHS });
+    mgr.rebuild({ tensorLHS });
 
     std::shared_ptr<kp::Tensor> tensorRHS{ new kp::Tensor({ 2, 4, 6 }) };
-    mgr.rebuildTensors({ tensorRHS });
+    mgr.rebuild({ tensorRHS });
 
     std::shared_ptr<kp::Tensor> tensorOutput{ new kp::Tensor({ 0, 0, 0 }) };
 
-    mgr.rebuildTensors({ tensorOutput });
+    mgr.rebuild({ tensorOutput });
 
     mgr.evalOpDefault<kp::OpMult>({ tensorLHS, tensorRHS, tensorOutput });
 
@@ -36,10 +36,10 @@ TEST(TestManager, OpMultSequenceFlow)
     kp::Manager mgr;
 
     {
-        mgr.rebuildTensors({ tensorLHS, tensorRHS, tensorOutput });
+        mgr.rebuild({ tensorLHS, tensorRHS, tensorOutput });
 
         std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("newSequence");
+          mgr.sequence("newSequence");
 
         sq->begin();
 
@@ -59,16 +59,16 @@ TEST(TestManager, TestMultipleSequences)
     kp::Manager mgr;
 
     std::shared_ptr<kp::Sequence> sqOne =
-      mgr.getOrCreateManagedSequence("sqOne");
+      mgr.sequence("sqOne");
 
     std::shared_ptr<kp::Sequence> sqTwo =
-      mgr.getOrCreateManagedSequence("sqTwo");
+      mgr.sequence("sqTwo");
 
     std::shared_ptr<kp::Sequence> sqOneRef =
-      mgr.getOrCreateManagedSequence("sqOne");
+      mgr.sequence("sqOne");
 
     std::shared_ptr<kp::Sequence> sqTwoRef =
-      mgr.getOrCreateManagedSequence("sqTwo");
+      mgr.sequence("sqTwo");
 
     EXPECT_EQ(sqOne, sqOneRef);
     EXPECT_NE(sqTwo, sqOneRef);
@@ -88,10 +88,10 @@ TEST(TestManager, TestMultipleTensorsAtOnce)
     kp::Manager mgr;
 
     std::shared_ptr<kp::Sequence> sq =
-      mgr.getOrCreateManagedSequence("newSequence");
+      mgr.sequence("newSequence");
 
     {
-        mgr.rebuildTensors({ tensorLHS, tensorRHS, tensorOutput });
+        mgr.rebuild({ tensorLHS, tensorRHS, tensorOutput });
 
         EXPECT_TRUE(tensorLHS->isInit());
         EXPECT_TRUE(tensorRHS->isInit());
@@ -114,8 +114,8 @@ TEST(TestManager, TestCreateInitTensor)
 {
     kp::Manager mgr;
 
-    std::shared_ptr<kp::Tensor> tensorA = mgr.buildTensor({ 0, 1, 2 });
-    std::shared_ptr<kp::Tensor> tensorB = mgr.buildTensor({ 0, 0, 0 });
+    std::shared_ptr<kp::Tensor> tensorA = mgr.tensor({ 0, 1, 2 });
+    std::shared_ptr<kp::Tensor> tensorB = mgr.tensor({ 0, 0, 0 });
 
     mgr.evalOpDefault<kp::OpTensorCopy>({ tensorA, tensorB });
 
@@ -124,7 +124,7 @@ TEST(TestManager, TestCreateInitTensor)
     EXPECT_EQ(tensorB->data(), std::vector<float>({ 0, 1, 2 }));
 
     std::shared_ptr<kp::Tensor> tensorC =
-      mgr.buildTensor({ 0, 0, 0 }, kp::Tensor::TensorTypes::eHost);
+      mgr.tensor({ 0, 0, 0 }, kp::Tensor::TensorTypes::eHost);
 
     mgr.evalOpDefault<kp::OpTensorCopy>({ tensorA, tensorC });
 
diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp
index b6eaea54..63f9778b 100644
--- a/test/TestMultipleAlgoExecutions.cpp
+++ b/test/TestMultipleAlgoExecutions.cpp
@@ -19,10 +19,10 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord)
           pa[index] = pa[index] + 1;
       })");
 
-    mgr.rebuildTensors({ tensorA });
+    mgr.rebuild({ tensorA });
 
     std::shared_ptr<kp::Sequence> sq =
-      mgr.getOrCreateManagedSequence("newSequence");
+      mgr.sequence("newSequence");
 
     {
         sq->begin();
@@ -58,11 +58,11 @@ TEST(TestMultipleAlgoExecutions, MultipleCmdBufRecords)
           pa[index] = pa[index] + 1;
       })");
 
-    mgr.rebuildTensors({ tensorA }, false);
+    mgr.rebuild({ tensorA }, false);
 
-    std::shared_ptr<kp::Sequence> sqTensor = mgr.createManagedSequence();
+    std::shared_ptr<kp::Sequence> sqTensor = mgr.sequence();
 
-    std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
+    std::shared_ptr<kp::Sequence> sq = mgr.sequence();
 
     // First create the tensor in a separate sequence
     sqTensor->begin();
@@ -113,11 +113,11 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
           pa[index] = pa[index] + 1;
       })");
 
-    mgr.rebuildTensors({ tensorA });
+    mgr.rebuild({ tensorA });
 
     {
         std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("newSequence");
+          mgr.sequence("newSequence");
 
         sq->begin();
 
@@ -130,7 +130,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
 
     {
         std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("newSequence2");
+          mgr.sequence("newSequence2");
 
         sq->begin();
 
@@ -143,7 +143,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
 
     {
         std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("newSequence3");
+          mgr.sequence("newSequence3");
 
         sq->begin();
 
@@ -156,7 +156,7 @@ TEST(TestMultipleAlgoExecutions, MultipleSequences)
 
     {
         std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("newSequence5");
+          mgr.sequence("newSequence5");
 
         sq->begin();
 
@@ -185,11 +185,11 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
           pa[index] = pa[index] + 1;
       })");
 
-    mgr.rebuildTensors({ tensorA }, false);
+    mgr.rebuild({ tensorA }, false);
 
     {
         std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("newSequence");
+          mgr.sequence("newSequence");
 
         sq->begin();
 
@@ -201,7 +201,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
 
     {
         std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("newSequence2");
+          mgr.sequence("newSequence2");
 
         sq->begin();
 
@@ -217,7 +217,7 @@ TEST(TestMultipleAlgoExecutions, SingleRecordMultipleEval)
 
     {
         std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("newSequence3");
+          mgr.sequence("newSequence3");
 
         sq->begin();
 
@@ -242,7 +242,7 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrOpCreate)
     std::shared_ptr<kp::Tensor> tensorInB{ new kp::Tensor({ 0.0, 1.0, 2.0 }) };
     std::shared_ptr<kp::Tensor> tensorOut{ new kp::Tensor({ 0.0, 0.0, 0.0 }) };
 
-    mgr.rebuildTensors({ tensorInA, tensorInB, tensorOut });
+    mgr.rebuild({ tensorInA, tensorInB, tensorOut });
 
     std::string shader(R"(
         // The version to use 
@@ -277,11 +277,11 @@ TEST(TestMultipleAlgoExecutions, ManagerEvalMultSourceStrMgrCreate)
 
     kp::Manager mgr;
 
-    auto tensorInA = mgr.buildTensor(
+    auto tensorInA = mgr.tensor(
       { 2.0, 4.0, 6.0 }, kp::Tensor::TensorTypes::eDevice, false);
-    auto tensorInB = mgr.buildTensor(
+    auto tensorInB = mgr.tensor(
       { 0.0, 1.0, 2.0 }, kp::Tensor::TensorTypes::eDevice, false);
-    auto tensorOut = mgr.buildTensor(
+    auto tensorOut = mgr.tensor(
       { 0.0, 0.0, 0.0 }, kp::Tensor::TensorTypes::eDevice, false);
 
     std::string shader(R"(
@@ -334,9 +334,9 @@ TEST(TestMultipleAlgoExecutions, SequenceAlgoDestroyOutsideManagerScope)
         {
             kp::Manager mgr;
 
-            mgr.rebuildTensors({ tensorA });
+            mgr.rebuild({ tensorA });
 
-            sq = mgr.createManagedSequence();
+            sq = mgr.sequence();
 
             sq->begin();
             sq->record<kp::OpAlgoBase>(
diff --git a/test/TestOpAlgoLoopsPassingData.cpp b/test/TestOpAlgoLoopsPassingData.cpp
index 63bab299..c2431bc4 100644
--- a/test/TestOpAlgoLoopsPassingData.cpp
+++ b/test/TestOpAlgoLoopsPassingData.cpp
@@ -30,11 +30,11 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies)
         }
     )");
 
-    mgr.rebuildTensors({ tensorA, tensorB }, false);
+    mgr.rebuild({ tensorA, tensorB }, false);
 
     {
         std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("default");
+          mgr.sequence("default");
 
         sq->begin();
 
@@ -47,7 +47,7 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies)
 
     {
         std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("run");
+          mgr.sequence("run");
 
         sq->begin();
 
@@ -65,7 +65,7 @@ TEST(TestProcessingIterations, IterateThroughMultipleSumAndCopies)
 
     {
         std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("export");
+          mgr.sequence("export");
 
         sq->begin();
 
diff --git a/test/TestOpShadersFromStringAndFile.cpp b/test/TestOpShadersFromStringAndFile.cpp
index 7d73bd7f..ae2bfce1 100644
--- a/test/TestOpShadersFromStringAndFile.cpp
+++ b/test/TestOpShadersFromStringAndFile.cpp
@@ -11,7 +11,7 @@ TEST(TestOpAlgoBase, ShaderRawDataFromConstructor)
 
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
-    mgr.rebuildTensors({ tensorA, tensorB });
+    mgr.rebuild({ tensorA, tensorB });
 
     std::string shader(R"(
         #version 450
@@ -43,7 +43,7 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromConstructor)
 
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
-    mgr.rebuildTensors({ tensorA, tensorB });
+    mgr.rebuild({ tensorA, tensorB });
 
     mgr.evalOpDefault<kp::OpAlgoBase>(
       { tensorA, tensorB },
@@ -65,7 +65,7 @@ TEST(TestOpAlgoBase, ShaderRawDataFromFile)
 
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
-    mgr.rebuildTensors({ tensorA, tensorB });
+    mgr.rebuild({ tensorA, tensorB });
 
     mgr.evalOpDefault<kp::OpAlgoBase>(
       { tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp");
@@ -82,7 +82,7 @@ TEST(TestOpAlgoBase, ShaderCompiledDataFromFile)
 
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor({ 3, 4, 5 }) };
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
-    mgr.rebuildTensors({ tensorA, tensorB });
+    mgr.rebuild({ tensorA, tensorB });
 
     mgr.evalOpDefault<kp::OpAlgoBase>(
       { tensorA, tensorB }, "test/shaders/glsl/test_op_custom_shader.comp.spv");
diff --git a/test/TestOpTensorCopy.cpp b/test/TestOpTensorCopy.cpp
index ca4539d6..7b064107 100644
--- a/test/TestOpTensorCopy.cpp
+++ b/test/TestOpTensorCopy.cpp
@@ -14,7 +14,7 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor)
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
 
-    mgr.rebuildTensors({ tensorA, tensorB });
+    mgr.rebuild({ tensorA, tensorB });
 
     EXPECT_TRUE(tensorA->isInit());
     EXPECT_TRUE(tensorB->isInit());
@@ -41,7 +41,7 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti)
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
     std::shared_ptr<kp::Tensor> tensorC{ new kp::Tensor(testVecC) };
 
-    mgr.rebuildTensors({ tensorA, tensorB, tensorC });
+    mgr.rebuild({ tensorA, tensorB, tensorC });
 
     EXPECT_TRUE(tensorA->isInit());
     EXPECT_TRUE(tensorB->isInit());
@@ -70,7 +70,7 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor)
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(
       testVecB, kp::Tensor::TensorTypes::eHost) };
 
-    mgr.rebuildTensors({ tensorA, tensorB }, false);
+    mgr.rebuild({ tensorA, tensorB }, false);
 
     //  Only calling sync on device type tensor
     mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorA });
@@ -99,7 +99,7 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor)
       testVecA, kp::Tensor::TensorTypes::eHost) };
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
 
-    mgr.rebuildTensors({ tensorA, tensorB }, false);
+    mgr.rebuild({ tensorA, tensorB }, false);
 
     //  Only calling sync on device type tensor
     mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorB });
@@ -129,7 +129,7 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor)
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(
       testVecB, kp::Tensor::TensorTypes::eHost) };
 
-    mgr.rebuildTensors({ tensorA, tensorB });
+    mgr.rebuild({ tensorA, tensorB });
 
     EXPECT_TRUE(tensorA->isInit());
     EXPECT_TRUE(tensorB->isInit());
@@ -153,7 +153,7 @@ TEST(TestOpTensorCopy, SingleTensorShouldFail)
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(
       testVecA, kp::Tensor::TensorTypes::eHost) };
 
-    mgr.rebuildTensors({ tensorA }, false);
+    mgr.rebuild({ tensorA }, false);
 
     EXPECT_TRUE(tensorA->isInit());
 
diff --git a/test/TestOpTensorCreate.cpp b/test/TestOpTensorCreate.cpp
index c1dd200f..ca347357 100644
--- a/test/TestOpTensorCreate.cpp
+++ b/test/TestOpTensorCreate.cpp
@@ -11,7 +11,7 @@ TEST(TestOpTensorCreate, CreateSingleTensorSingleOp)
     {
         kp::Manager mgr;
 
-        mgr.rebuildTensors({ tensorA });
+        mgr.rebuild({ tensorA });
 
         EXPECT_TRUE(tensorA->isInit());
 
@@ -32,7 +32,7 @@ TEST(TestOpTensorCreate, CreateMultipleTensorSingleOp)
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
 
-    mgr.rebuildTensors({ tensorA, tensorB });
+    mgr.rebuild({ tensorA, tensorB });
 
     EXPECT_TRUE(tensorA->isInit());
     EXPECT_TRUE(tensorB->isInit());
@@ -52,8 +52,8 @@ TEST(TestOpTensorCreate, CreateMultipleTensorMultipleOp)
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor(testVecB) };
 
-    mgr.rebuildTensors({ tensorA });
-    mgr.rebuildTensors({ tensorB });
+    mgr.rebuild({ tensorA });
+    mgr.rebuild({ tensorB });
 
     EXPECT_TRUE(tensorA->isInit());
     EXPECT_TRUE(tensorB->isInit());
@@ -73,8 +73,8 @@ TEST(TestOpTensorCreate, TestTensorMemoryManagedByManagerDestroyed)
 
     {
         kp::Manager mgr;
-        mgr.rebuildTensors({ tensorA });
-        mgr.rebuildTensors({ tensorB });
+        mgr.rebuild({ tensorA });
+        mgr.rebuild({ tensorB });
 
         EXPECT_TRUE(tensorA->isInit());
         EXPECT_TRUE(tensorB->isInit());
@@ -99,8 +99,8 @@ TEST(TestOpTensorCreate, TestTensorMemoryManagedByManagerNOTDestroyed)
     kp::Manager mgr;
 
     {
-        mgr.rebuildTensors({ tensorA });
-        mgr.rebuildTensors({ tensorB });
+        mgr.rebuild({ tensorA });
+        mgr.rebuild({ tensorB });
 
         EXPECT_TRUE(tensorA->isInit());
         EXPECT_TRUE(tensorB->isInit());
@@ -124,8 +124,8 @@ TEST(TestOpTensorCreate, NoErrorIfTensorFreedBefore)
 
     kp::Manager mgr;
 
-    mgr.rebuildTensors({ tensorA });
-    mgr.rebuildTensors({ tensorB });
+    mgr.rebuild({ tensorA });
+    mgr.rebuild({ tensorB });
 
     EXPECT_TRUE(tensorA->isInit());
     EXPECT_TRUE(tensorB->isInit());
@@ -148,7 +148,7 @@ TEST(TestOpTensorCreate, ExceptionOnZeroSizeTensor)
     kp::Manager mgr;
 
     try {
-        mgr.rebuildTensors({ tensorA });
+        mgr.rebuild({ tensorA });
     } catch (const std::runtime_error& err) {
         // check exception
         ASSERT_TRUE(std::string(err.what()).find("zero-sized") !=
diff --git a/test/TestOpTensorSync.cpp b/test/TestOpTensorSync.cpp
index 9080194a..f992805f 100644
--- a/test/TestOpTensorSync.cpp
+++ b/test/TestOpTensorSync.cpp
@@ -13,7 +13,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemorySingleTensor)
 
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecPreA) };
 
-    mgr.rebuildTensors({ tensorA }, false);
+    mgr.rebuild({ tensorA }, false);
 
     EXPECT_TRUE(tensorA->isInit());
 
@@ -37,7 +37,7 @@ TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor)
     std::shared_ptr<kp::Tensor> tensorB{ new kp::Tensor({ 0, 0, 0 }) };
     std::shared_ptr<kp::Tensor> tensorC{ new kp::Tensor({ 0, 0, 0 }) };
 
-    mgr.rebuildTensors({ tensorA, tensorB, tensorC }, false);
+    mgr.rebuild({ tensorA, tensorB, tensorC }, false);
 
     EXPECT_TRUE(tensorA->isInit());
     EXPECT_TRUE(tensorB->isInit());
diff --git a/test/TestSequence.cpp b/test/TestSequence.cpp
index 2d0a8a4b..0dec484b 100644
--- a/test/TestSequence.cpp
+++ b/test/TestSequence.cpp
@@ -9,7 +9,7 @@ TEST(TestSequence, CmdBufSequenceBeginEnd)
 
     {
         std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("newSequence");
+          mgr.sequence("newSequence");
 
         EXPECT_TRUE(sq->eval());
         EXPECT_TRUE(!sq->isRecording());
@@ -32,7 +32,7 @@ TEST(TestSequence, SequenceDestructorViaManager)
     {
         kp::Manager mgr;
 
-        sq = mgr.getOrCreateManagedSequence("newSequence");
+        sq = mgr.sequence("newSequence");
 
         EXPECT_TRUE(sq->isInit());
     }
diff --git a/test/TestTensor.cpp b/test/TestTensor.cpp
index 7ceea1ba..705c825f 100644
--- a/test/TestTensor.cpp
+++ b/test/TestTensor.cpp
@@ -23,10 +23,10 @@ TEST(TestTensor, CopyFromHostData)
 
     kp::Manager mgr;
 
-    mgr.rebuildTensors({ tensorA, tensorB });
+    mgr.rebuild({ tensorA, tensorB });
 
     if (std::shared_ptr<kp::Sequence> sq =
-          mgr.getOrCreateManagedSequence("new")) {
+          mgr.sequence("new")) {
         sq->begin();
 
         sq->record<kp::OpTensorCopy>({ tensorA, tensorB });

From 3e91a7737e5f0fcf809b501194068b3e74463598 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Tue, 9 Feb 2021 21:29:58 +0000
Subject: [PATCH 20/27] Updated docs to match functions

---
 docs/overview/advanced-examples.rst | 8 ++++----
 docs/overview/async-parallel.rst    | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/overview/advanced-examples.rst b/docs/overview/advanced-examples.rst
index 5823c6df..665c4f0c 100644
--- a/docs/overview/advanced-examples.rst
+++ b/docs/overview/advanced-examples.rst
@@ -97,7 +97,7 @@ Record commands in a single submit by using a Sequence to send in batch to GPU.
        mgr.evalOpDefault<kp::OpCreateTensor>({tensorLHS, tensorRHS, tensorOutput});
 
        // Create a new sequence
-       std::weak_ptr<kp::Sequence> sqWeakPtr = mgr.getOrCreateManagedSequence();
+       std::weak_ptr<kp::Sequence> sqWeakPtr = mgr.sequence();
 
        if (std::shared_ptr<kp::Sequence> sq = sqWeakPtr.lock())
        {
@@ -226,8 +226,8 @@ Back to `examples list <#simple-examples>`_.
        // We need to create explicit sequences with their respective queues
        // The second parameter is the index in the familyIndex array which is relative
        //      to the vector we created the manager with.
-       mgr.createManagedSequence("queueOne", 0);
-       mgr.createManagedSequence("queueTwo", 1);
+       mgr.sequence("queueOne", 0);
+       mgr.sequence("queueTwo", 1);
 
        // Creates tensor an initializes GPU memory (below we show more granularity)
        auto tensorA = std::make_shared<kp::Tensor>(kp::Tensor(std::vector<float>(10, 0.0)));
@@ -422,7 +422,7 @@ Now that we have the inputs and outputs we will be able to use them in the proce
     kp::Manager mgr;
 
     if (std::shared_ptr<kp::Sequence> sq = 
-            mgr.getOrCreateManagedSequence("createTensors").lock()) 
+            mgr.sequence("createTensors").lock()) 
     {
         // ...
 
diff --git a/docs/overview/async-parallel.rst b/docs/overview/async-parallel.rst
index 88df7ac6..8c983bc7 100644
--- a/docs/overview/async-parallel.rst
+++ b/docs/overview/async-parallel.rst
@@ -208,8 +208,8 @@ It's worth mentioning you can have multiple sequences referencing the same queue
     // We need to create explicit sequences with their respective queues
     // The second parameter is the index in the familyIndex array which is relative
     //      to the vector we created the manager with.
-    mgr.createManagedSequence("queueOne", 0);
-    mgr.createManagedSequence("queueTwo", 1);
+    mgr.sequence("queueOne", 0);
+    mgr.sequence("queueTwo", 1);
 
 We create the tensors without modifications.
 

From b243d432c13371ea7410e79ceb4179fbbbb9ab3c Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Tue, 9 Feb 2021 21:30:22 +0000
Subject: [PATCH 21/27] Updated readme

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 4b199fa6..3b96a4c6 100644
--- a/README.md
+++ b/README.md
@@ -54,9 +54,9 @@ int main() {
     kp::Manager mgr; 
 
     // 2. Create and initialise Kompute Tensors through manager
-    auto tensorInA = mgr.buildTensor({ 2., 2., 2. });
-    auto tensorInB = mgr.buildTensor({ 1., 2., 3. });
-    auto tensorOut = mgr.buildTensor({ 0., 0., 0. });
+    auto tensorInA = mgr.tensor({ 2., 2., 2. });
+    auto tensorInB = mgr.tensor({ 1., 2., 3. });
+    auto tensorOut = mgr.tensor({ 0., 0., 0. });
 
     // 3. Specify "multiply shader" code (can also be raw string, spir-v bytes or file path)
     std::string shaderString = (R"(

From 4e9888e7d6e399810cc664ede35974f84601fcc2 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Tue, 9 Feb 2021 21:30:33 +0000
Subject: [PATCH 22/27] Updated examples

---
 .../android-simple/app/src/main/cpp/KomputeModelML.cpp      | 4 ++--
 examples/array_multiplication/src/Main.cpp                  | 6 +++---
 .../custom_module/kompute_summator/KomputeSummatorNode.cpp  | 6 +++---
 .../godot_examples/gdnative_shared/src/KomputeSummator.cpp  | 6 +++---
 .../custom_module/kompute_model_ml/KomputeModelMLNode.cpp   | 4 ++--
 .../gdnative_shared/src/KomputeModelML.cpp                  | 4 ++--
 examples/logistic_regression/src/Main.cpp                   | 4 ++--
 7 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp
index e22f2aa0..80c03951 100755
--- a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp
+++ b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp
@@ -44,14 +44,14 @@ void KomputeModelML::train(std::vector<float> yData, std::vector<float> xIData,
         {
 
             std::shared_ptr<kp::Sequence> sqTensor =
-              mgr.createManagedSequence();
+              mgr.sequence();
 
             sqTensor->begin();
             sqTensor->record<kp::OpTensorCreate>(params);
             sqTensor->end();
             sqTensor->eval();
 
-            std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
+            std::shared_ptr<kp::Sequence> sq = mgr.sequence();
 
             // Record op algo base
             sq->begin();
diff --git a/examples/array_multiplication/src/Main.cpp b/examples/array_multiplication/src/Main.cpp
index 14b58cba..161bb8bd 100755
--- a/examples/array_multiplication/src/Main.cpp
+++ b/examples/array_multiplication/src/Main.cpp
@@ -14,9 +14,9 @@ int main()
 
     kp::Manager mgr;
 
-    auto tensorInA = mgr.buildTensor({ 2.0, 4.0, 6.0 });
-    auto tensorInB = mgr.buildTensor({ 0.0, 1.0, 2.0 });
-    auto tensorOut = mgr.buildTensor({ 0.0, 0.0, 0.0 });
+    auto tensorInA = mgr.tensor({ 2.0, 4.0, 6.0 });
+    auto tensorInB = mgr.tensor({ 0.0, 1.0, 2.0 });
+    auto tensorOut = mgr.tensor({ 0.0, 0.0, 0.0 });
 
 #ifdef KOMPUTE_ANDROID_SHADER_FROM_STRING
     std::string shader(R"(
diff --git a/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp b/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp
index 3f17f088..05c800b2 100644
--- a/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp
+++ b/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp
@@ -29,9 +29,9 @@ float KomputeSummatorNode::get_total() const {
 
 void KomputeSummatorNode::_init() {
     std::cout << "CALLING INIT" << std::endl;
-    this->mPrimaryTensor = this->mManager.buildTensor({ 0.0 });
-    this->mSecondaryTensor = this->mManager.buildTensor({ 0.0 });
-    this->mSequence = this->mManager.getOrCreateManagedSequence("AdditionSeq");
+    this->mPrimaryTensor = this->mManager.tensor({ 0.0 });
+    this->mSecondaryTensor = this->mManager.tensor({ 0.0 });
+    this->mSequence = this->mManager.sequence("AdditionSeq");
 
     // We now record the steps in the sequence
     if (std::shared_ptr<kp::Sequence> sq = this->mSequence.lock())
diff --git a/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp b/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp
index 788486e8..26a38181 100644
--- a/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp
+++ b/examples/godot_examples/gdnative_shared/src/KomputeSummator.cpp
@@ -28,9 +28,9 @@ float KomputeSummator::get_total() const {
 
 void KomputeSummator::_init() {
     std::cout << "CALLING INIT" << std::endl;
-    this->mPrimaryTensor = this->mManager.buildTensor({ 0.0 });
-    this->mSecondaryTensor = this->mManager.buildTensor({ 0.0 });
-    this->mSequence = this->mManager.getOrCreateManagedSequence("AdditionSeq");
+    this->mPrimaryTensor = this->mManager.tensor({ 0.0 });
+    this->mSecondaryTensor = this->mManager.tensor({ 0.0 });
+    this->mSequence = this->mManager.sequence("AdditionSeq");
 
     // We now record the steps in the sequence
     {
diff --git a/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp b/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp
index f583d910..38dd6772 100644
--- a/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp
+++ b/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp
@@ -51,14 +51,14 @@ void KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr) {
         kp::Manager mgr;
 
             std::shared_ptr<kp::Sequence> sqTensor =
-              mgr.createManagedSequence();
+              mgr.sequence();
 
             sqTensor->begin();
             sqTensor->record<kp::OpTensorCreate>(params);
             sqTensor->end();
             sqTensor->eval();
 
-            std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
+            std::shared_ptr<kp::Sequence> sq = mgr.sequence();
 
             // Record op algo base
             sq->begin();
diff --git a/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp b/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp
index 4135e83e..f868f506 100644
--- a/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp
+++ b/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp
@@ -56,14 +56,14 @@ void KomputeModelML::train(Array yArr, Array xIArr, Array xJArr) {
 
         {
             std::shared_ptr<kp::Sequence> sqTensor =
-              mgr.createManagedSequence();
+              mgr.sequence();
 
             sqTensor->begin();
             sqTensor->record<kp::OpTensorCreate>(params);
             sqTensor->end();
             sqTensor->eval();
 
-            std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
+            std::shared_ptr<kp::Sequence> sq = mgr.sequence();
 
             // Record op algo base
             sq->begin();
diff --git a/examples/logistic_regression/src/Main.cpp b/examples/logistic_regression/src/Main.cpp
index d3b8b355..1efbe83b 100755
--- a/examples/logistic_regression/src/Main.cpp
+++ b/examples/logistic_regression/src/Main.cpp
@@ -36,14 +36,14 @@ int main()
     kp::Manager mgr;
 
     std::shared_ptr<kp::Sequence> sqTensor =
-      mgr.createManagedSequence();
+      mgr.sequence();
 
     sqTensor->begin();
     sqTensor->record<kp::OpTensorCreate>(params);
     sqTensor->end();
     sqTensor->eval();
 
-    std::shared_ptr<kp::Sequence> sq = mgr.createManagedSequence();
+    std::shared_ptr<kp::Sequence> sq = mgr.sequence();
 
     // Record op algo base
     sq->begin();

From 1edcb425ce8c53a1ba4979dbab66d72f0f86bcca Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Tue, 9 Feb 2021 21:30:52 +0000
Subject: [PATCH 23/27] Single include

---
 single_include/kompute/Kompute.hpp | 77 +++++++++++++++++-------------
 1 file changed, 44 insertions(+), 33 deletions(-)

diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index f3ebd990..772397a2 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -1351,23 +1351,12 @@ class Manager
      *
      * @param sequenceName The name for the named sequence to be retrieved or
      * created
-     * @return Shared pointer to the manager owned sequence resource
-     */
-    std::shared_ptr<Sequence> getOrCreateManagedSequence(
-      std::string sequenceName);
-
-    /**
-     * Create a new managed Kompute sequence so it's available within the
-     * manager.
-     *
-     * @param sequenceName The name for the named sequence to be created, if
-     * empty then default indexed value is used
      * @param queueIndex The queue to use from the available queues
-     * @return Weak pointer to the manager owned sequence resource
+     * @return Shared pointer to the manager owned sequence resource
      */
-    std::shared_ptr<Sequence> createManagedSequence(
-      std::string sequenceName = "",
-      uint32_t queueIndex = 0);
+    std::shared_ptr<Sequence> sequence(
+            std::string sequenceName = KP_DEFAULT_SESSION,
+            uint32_t queueIndex = 0);
 
     /**
      * Function that evaluates operation against named sequence.
@@ -1384,7 +1373,7 @@ class Manager
     {
         SPDLOG_DEBUG("Kompute Manager evalOp triggered");
         std::shared_ptr<kp::Sequence> sq =
-          this->getOrCreateManagedSequence(sequenceName);
+          this->sequence(sequenceName);
 
         SPDLOG_DEBUG("Kompute Manager evalOp running sequence BEGIN");
         sq->begin();
@@ -1434,7 +1423,7 @@ class Manager
         SPDLOG_DEBUG("Kompute Manager evalOpAsync triggered");
 
         std::shared_ptr<kp::Sequence> sq =
-          this->getOrCreateManagedSequence(sequenceName);
+          this->sequence(sequenceName);
 
         SPDLOG_DEBUG("Kompute Manager evalOpAsync running sequence BEGIN");
         sq->begin();
@@ -1521,12 +1510,12 @@ class Manager
      * @param syncDataToGPU Whether to sync the data to GPU memory
      * @returns Initialized Tensor with memory Syncd to GPU device
      */
-    std::shared_ptr<Tensor> buildTensor(
+    std::shared_ptr<Tensor> tensor(
       const std::vector<float>& data,
       Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice,
       bool syncDataToGPU = true)
     {
-        SPDLOG_DEBUG("Kompute Manager buildTensor triggered");
+        SPDLOG_DEBUG("Kompute Manager tensor triggered");
 
         SPDLOG_DEBUG("Kompute Manager creating new tensor shared ptr");
         std::shared_ptr<Tensor> tensor =
@@ -1548,32 +1537,54 @@ class Manager
      * create a new Tensor. The tensor memory will then be managed and owned by
      * the manager.
      *
-     * @param data The data to initialize the tensor with
-     * @param tensorType The type of tensor to initialize
+     * @param tensors Array of tensors to rebuild
      * @param syncDataToGPU Whether to sync the data to GPU memory
      * @returns Initialized Tensor with memory Syncd to GPU device
      */
-    void rebuildTensors(std::vector<std::shared_ptr<kp::Tensor>> tensors,
+    void rebuild(std::vector<std::shared_ptr<kp::Tensor>> tensors,
                         bool syncDataToGPU = true)
     {
-        SPDLOG_DEBUG("Kompute Manager rebuildTensors triggered");
+        SPDLOG_DEBUG("Kompute Manager rebuild triggered");
         for (std::shared_ptr<Tensor> tensor : tensors) {
 
-            if (tensor->isInit()) {
-                tensor->freeMemoryDestroyGPUResources();
-            }
+            // False syncData to run all tensors at once instead one by one
+            this->rebuild(tensor, false);
+        }
 
-            tensor->init(this->mPhysicalDevice, this->mDevice);
+        if (syncDataToGPU) {
+            this->evalOpDefault<OpTensorSyncDevice>(tensors);
+        }
+    }
 
-            std::set<std::shared_ptr<Tensor>>::iterator it =
-              this->mManagedTensors.find(tensor);
-            if (it == this->mManagedTensors.end()) {
-                this->mManagedTensors.insert(tensor);
-            }
+    /**
+     * Function that simplifies the common workflow of tensor initialisation. It
+     * will take the constructor parameters for a Tensor and will will us it to
+     * create a new Tensor. The tensor memory will then be managed and owned by
+     * the manager.
+     *
+     * @param tensors Single tensor to rebuild
+     * @param syncDataToGPU Whether to sync the data to GPU memory
+     * @returns Initialized Tensor with memory Syncd to GPU device
+     */
+    void rebuild(std::shared_ptr<kp::Tensor> tensor,
+                        bool syncDataToGPU = true)
+    {
+        SPDLOG_DEBUG("Kompute Manager rebuild Tensor triggered");
+
+        if (tensor->isInit()) {
+            tensor->freeMemoryDestroyGPUResources();
+        }
+
+        tensor->init(this->mPhysicalDevice, this->mDevice);
+
+        std::set<std::shared_ptr<Tensor>>::iterator it =
+          this->mManagedTensors.find(tensor);
+        if (it == this->mManagedTensors.end()) {
+            this->mManagedTensors.insert(tensor);
         }
 
         if (syncDataToGPU) {
-            this->evalOpDefault<OpTensorSyncDevice>(tensors);
+            this->evalOpDefault<OpTensorSyncDevice>({ tensor });
         }
     }
 

From d8041d696d68d768168f8a816f2112541b612912 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Tue, 9 Feb 2021 21:47:40 +0000
Subject: [PATCH 24/27] Added python updated functions

---
 python/src/docstrings.hpp | 13 ++-----------
 python/src/main.cpp       | 12 +++++++-----
 2 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/python/src/docstrings.hpp b/python/src/docstrings.hpp
index 6b3a1dc7..79b864f8 100644
--- a/python/src/docstrings.hpp
+++ b/python/src/docstrings.hpp
@@ -119,7 +119,7 @@ integrate with the vulkan kompute use.
 @param device Vulkan logical device to use for all base resources
 @param physicalDeviceIndex Index for vulkan physical device used)doc";
 
-static const char *__doc_kp_Manager_buildTensor =
+static const char *__doc_kp_Manager_tensor =
 R"doc(Function that simplifies the common workflow of tensor creation and
 initialization. It will take the constructor parameters for a Tensor
 and will will us it to create a new Tensor and then create it using
@@ -133,15 +133,6 @@ static const char *__doc_kp_Manager_createDevice = R"doc()doc";
 
 static const char *__doc_kp_Manager_createInstance = R"doc()doc";
 
-static const char *__doc_kp_Manager_createManagedSequence =
-R"doc(Create a new managed Kompute sequence so it's available within the
-manager.
-
-@param sequenceName The name for the named sequence to be created, if
-empty then default indexed value is used @param queueIndex The queue
-to use from the available queues @return Weak pointer to the manager
-owned sequence resource)doc";
-
 static const char *__doc_kp_Manager_evalOp =
 R"doc(Function that evaluates operation against named sequence.
 
@@ -187,7 +178,7 @@ R"doc(Function that evaluates operation against a newly created sequence.
 TArgs Template parameters that will be used to initialise Operation to
 allow for extensible configurations on initialisation)doc";
 
-static const char *__doc_kp_Manager_getOrCreateManagedSequence =
+static const char *__doc_kp_Manager_sequence =
 R"doc(Get or create a managed Sequence that will be contained by this
 manager. If the named sequence does not currently exist, it would be
 created and initialised.
diff --git a/python/src/main.cpp b/python/src/main.cpp
index 74f010f0..889084c7 100644
--- a/python/src/main.cpp
+++ b/python/src/main.cpp
@@ -155,14 +155,16 @@ PYBIND11_MODULE(kp, m) {
             [](uint32_t physicalDeviceIndex, const std::vector<uint32_t>& familyQueueIndices) {
                 return std::unique_ptr<kp::Manager>(new kp::Manager(physicalDeviceIndex, familyQueueIndices));
             }), "Manager initialiser can provide specified device and array of GPU queueFamilies to load.")
-        .def("get_create_sequence", &kp::Manager::getOrCreateManagedSequence, "Get a Sequence or create a new one with given name")
-        .def("create_sequence", &kp::Manager::createManagedSequence,
-                py::arg("name") = "", py::arg("queueIndex") = 0, "Create a sequence with specific name and specified index of available queues")
-        .def("build_tensor", &kp::Manager::buildTensor, 
+        .def("sequence", &kp::Manager::sequence,
+                py::arg("name") = "", py::arg("queueIndex") = 0, "Get or create a sequence with specific name and specified index of available queues")
+        .def("tensor", &kp::Manager::tensor, 
                 py::arg("data"), py::arg("tensorType") = kp::Tensor::TensorTypes::eDevice, py::arg("syncDataToGPU") = true,
                 "Build and initialise tensor")
-        .def("rebuild_tensors", &kp::Manager::rebuildTensors, 
+        .def("rebuild", py::overload_cast<std::vector<std::shared_ptr<kp::Tensor>>, bool>(&kp::Manager::rebuild),
                 py::arg("tensors"), py::arg("syncDataToGPU") = true,
+                "Build and initialise list of tensors")
+        .def("rebuild", py::overload_cast<std::shared_ptr<kp::Tensor>, bool>(&kp::Manager::rebuild),
+                py::arg("tensor"), py::arg("syncDataToGPU") = true,
                 "Build and initialise tensor")
         
         // Await functions

From a828bb9f79769163fb18bcb6be7fb5c493dacd19 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Tue, 9 Feb 2021 21:48:23 +0000
Subject: [PATCH 25/27] Updated python tests

---
 python/test/test_array_multiplication.py |  2 +-
 python/test/test_kompute.py              | 18 +++++++++---------
 python/test/test_logistic_regression.py  |  4 ++--
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/python/test/test_array_multiplication.py b/python/test/test_array_multiplication.py
index fac2ed41..bcad405a 100644
--- a/python/test/test_array_multiplication.py
+++ b/python/test/test_array_multiplication.py
@@ -14,7 +14,7 @@ def test_array_multiplication():
     tensor_out = kp.Tensor([0, 0, 0])
 
     # 3. Initialise the Kompute Tensors in the GPU
-    mgr.rebuild_tensors([tensor_in_a, tensor_in_b, tensor_out])
+    mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])
 
     # 4. Define the multiplication shader code to run on the GPU
     @ps.python2shader
diff --git a/python/test/test_kompute.py b/python/test/test_kompute.py
index bec4b40e..7050b9c2 100644
--- a/python/test/test_kompute.py
+++ b/python/test/test_kompute.py
@@ -17,7 +17,7 @@ def test_opmult():
 
     mgr = kp.Manager()
 
-    mgr.rebuild_tensors([tensor_in_a, tensor_in_b, tensor_out])
+    mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])
 
     mgr.eval_algo_mult_def([tensor_in_a, tensor_in_b, tensor_out])
 
@@ -42,7 +42,7 @@ def test_opalgobase_data():
 
         layout (local_size_x = 1) in;
 
-        // The input tensors bind index is relative to index in parameter passed
+        // The input rebuild bind index is relative to index in parameter passed
         layout(set = 0, binding = 0) buffer bina { float tina[]; };
         layout(set = 0, binding = 1) buffer binb { float tinb[]; };
         layout(set = 0, binding = 2) buffer bout { float tout[]; };
@@ -53,7 +53,7 @@ def test_opalgobase_data():
         }
     """
 
-    mgr.rebuild_tensors([tensor_in_a, tensor_in_b, tensor_out])
+    mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])
 
     mgr.eval_algo_str_def([tensor_in_a, tensor_in_b, tensor_out], shaderData)
 
@@ -76,7 +76,7 @@ def test_opalgobase_file():
 
     shaderFilePath = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp")
 
-    mgr.rebuild_tensors([tensor_in_a, tensor_in_b, tensor_out])
+    mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])
 
     mgr.eval_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath)
 
@@ -94,14 +94,14 @@ def test_sequence():
     tensor_in_b = kp.Tensor([1, 2, 3])
     tensor_out = kp.Tensor([0, 0, 0])
 
-    mgr.rebuild_tensors([tensor_in_a, tensor_in_b, tensor_out])
+    mgr.rebuild([tensor_in_a, tensor_in_b, tensor_out])
 
     shaderFilePath = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp")
     mgr.eval_async_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath)
 
     mgr.eval_await_def()
 
-    seq = mgr.create_sequence("op")
+    seq = mgr.sequence("op")
     seq.begin()
     seq.record_tensor_sync_local([tensor_in_a])
     seq.record_tensor_sync_local([tensor_in_b])
@@ -120,14 +120,14 @@ def test_workgroup():
     tensor_a = kp.Tensor(np.zeros([16,8]))
     tensor_b = kp.Tensor(np.zeros([16,8]))
 
-    mgr.rebuild_tensors([tensor_a, tensor_b])
+    mgr.rebuild([tensor_a, tensor_b])
 
     shader_src = """
         #version 450
 
         layout (local_size_x = 1) in;
 
-        // The input tensors bind index is relative to index in parameter passed
+        // The input rebuild bind index is relative to index in parameter passed
         layout(set = 0, binding = 0) writeonly buffer bout  { float toutx[]; };
         layout(set = 0, binding = 1) writeonly buffer bout2 { float touty[]; };
 
@@ -140,7 +140,7 @@ def test_workgroup():
     """
     shader_src = bytes(shader_src, encoding='utf8')
 
-    seq = mgr.create_sequence("new")
+    seq = mgr.sequence("new")
     seq.begin()
     seq.record_algo_data([tensor_a, tensor_b], shader_src, (16,8,1))
     seq.end()
diff --git a/python/test/test_logistic_regression.py b/python/test/test_logistic_regression.py
index 1fbcd5bc..6783bbc8 100644
--- a/python/test/test_logistic_regression.py
+++ b/python/test/test_logistic_regression.py
@@ -66,10 +66,10 @@ def compute_shader(
     params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i,
         tensor_w_out_j, tensor_b_in, tensor_b_out, tensor_l_out, tensor_m]
 
-    mgr.rebuild_tensors(params)
+    mgr.rebuild(params)
 
     # Create a managed sequence
-    sq = mgr.create_sequence()
+    sq = mgr.sequence()
 
     # Clear previous operations and begin recording for new operations
     sq.begin()

From 3c486ebf72e192a667cd66df25e1e284027cc9ef Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Tue, 9 Feb 2021 22:01:26 +0000
Subject: [PATCH 26/27] Updated test to cover sequences

---
 test/TestOpTensorCopy.cpp | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/test/TestOpTensorCopy.cpp b/test/TestOpTensorCopy.cpp
index 7b064107..3f2bc950 100644
--- a/test/TestOpTensorCopy.cpp
+++ b/test/TestOpTensorCopy.cpp
@@ -8,7 +8,7 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensor)
 
     kp::Manager mgr;
 
-    std::vector<float> testVecA{ 9, 8, 7 };
+    std::vector<float> testVecA{ 1, 2, 3 };
     std::vector<float> testVecB{ 0, 0, 0 };
 
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
@@ -33,7 +33,7 @@ TEST(TestOpTensorCopy, CopyDeviceToDeviceTensorMulti)
 
     kp::Manager mgr;
 
-    std::vector<float> testVecA{ 9, 8, 7 };
+    std::vector<float> testVecA{ 2, 3, 4 };
     std::vector<float> testVecB{ 0, 0, 0 };
     std::vector<float> testVecC{ 0, 0, 0 };
 
@@ -63,7 +63,7 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor)
 
     kp::Manager mgr;
 
-    std::vector<float> testVecA{ 9, 8, 7 };
+    std::vector<float> testVecA{ 3, 4, 5 };
     std::vector<float> testVecB{ 0, 0, 0 };
 
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(testVecA) };
@@ -92,7 +92,7 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor)
 
     kp::Manager mgr;
 
-    std::vector<float> testVecA{ 9, 8, 7 };
+    std::vector<float> testVecA{ 4, 5, 6 };
     std::vector<float> testVecB{ 0, 0, 0 };
 
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(
@@ -101,6 +101,9 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor)
 
     mgr.rebuild({ tensorA, tensorB }, false);
 
+    // Manually copy data into host memory of Tensor
+    tensorA->mapDataIntoHostMemory();
+
     //  Only calling sync on device type tensor
     mgr.evalOpDefault<kp::OpTensorSyncDevice>({ tensorB });
 
@@ -121,7 +124,7 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor)
 
     kp::Manager mgr;
 
-    std::vector<float> testVecA{ 9, 8, 7 };
+    std::vector<float> testVecA{ 5, 6, 7 };
     std::vector<float> testVecB{ 0, 0, 0 };
 
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(
@@ -148,7 +151,7 @@ TEST(TestOpTensorCopy, SingleTensorShouldFail)
 
     kp::Manager mgr;
 
-    std::vector<float> testVecA{ 9, 8, 7 };
+    std::vector<float> testVecA{ 6, 7, 8 };
 
     std::shared_ptr<kp::Tensor> tensorA{ new kp::Tensor(
       testVecA, kp::Tensor::TensorTypes::eHost) };

From 48f041d9f372941e490791b5ad2065501646f687 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Tue, 9 Feb 2021 22:15:07 +0000
Subject: [PATCH 27/27] Updated the examples

---
 .../android-simple/app/src/main/cpp/KomputeModelML.cpp   | 9 +--------
 .../kompute_summator/KomputeSummatorNode.cpp             | 6 +++---
 .../kompute_model_ml/KomputeModelMLNode.cpp              | 9 ++-------
 .../gdnative_shared/src/KomputeModelML.cpp               | 8 +-------
 examples/logistic_regression/src/Main.cpp                | 8 +-------
 5 files changed, 8 insertions(+), 32 deletions(-)

diff --git a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp
index 80c03951..0337fa63 100755
--- a/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp
+++ b/examples/android/android-simple/app/src/main/cpp/KomputeModelML.cpp
@@ -42,14 +42,7 @@ void KomputeModelML::train(std::vector<float> yData, std::vector<float> xIData,
         kp::Manager mgr;
 
         {
-
-            std::shared_ptr<kp::Sequence> sqTensor =
-              mgr.sequence();
-
-            sqTensor->begin();
-            sqTensor->record<kp::OpTensorCreate>(params);
-            sqTensor->end();
-            sqTensor->eval();
+            mgr.rebuild(params);
 
             std::shared_ptr<kp::Sequence> sq = mgr.sequence();
 
diff --git a/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp b/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp
index 05c800b2..304416a0 100644
--- a/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp
+++ b/examples/godot_examples/custom_module/kompute_summator/KomputeSummatorNode.cpp
@@ -12,7 +12,7 @@ void KomputeSummatorNode::add(float value) {
     // Set the new data in the local device
     this->mSecondaryTensor->setData({value});
     // Execute recorded sequence
-    if (std::shared_ptr<kp::Sequence> sq = this->mSequence.lock()) {
+    if (std::shared_ptr<kp::Sequence> sq = this->mSequence) {
         sq->eval();
     }
     else {
@@ -34,7 +34,7 @@ void KomputeSummatorNode::_init() {
     this->mSequence = this->mManager.sequence("AdditionSeq");
 
     // We now record the steps in the sequence
-    if (std::shared_ptr<kp::Sequence> sq = this->mSequence.lock())
+    if (std::shared_ptr<kp::Sequence> sq = this->mSequence)
     {
 
         std::string shader(R"(
@@ -59,7 +59,7 @@ void KomputeSummatorNode::_init() {
                 { this->mSecondaryTensor });
 
         // Then we run the operation with both tensors
-        sq->record<kp::OpAlgoBase<>>(
+        sq->record<kp::OpAlgoBase>(
             { this->mPrimaryTensor, this->mSecondaryTensor }, 
             std::vector<char>(shader.begin(), shader.end()));
 
diff --git a/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp b/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp
index 38dd6772..010a3164 100644
--- a/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp
+++ b/examples/godot_logistic_regression/custom_module/kompute_model_ml/KomputeModelMLNode.cpp
@@ -50,14 +50,9 @@ void KomputeModelMLNode::train(Array yArr, Array xIArr, Array xJArr) {
     {
         kp::Manager mgr;
 
-            std::shared_ptr<kp::Sequence> sqTensor =
-              mgr.sequence();
-
-            sqTensor->begin();
-            sqTensor->record<kp::OpTensorCreate>(params);
-            sqTensor->end();
-            sqTensor->eval();
+        mgr.rebuild(params);
 
+        {
             std::shared_ptr<kp::Sequence> sq = mgr.sequence();
 
             // Record op algo base
diff --git a/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp b/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp
index f868f506..e9a9c51b 100644
--- a/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp
+++ b/examples/godot_logistic_regression/gdnative_shared/src/KomputeModelML.cpp
@@ -55,13 +55,7 @@ void KomputeModelML::train(Array yArr, Array xIArr, Array xJArr) {
         kp::Manager mgr;
 
         {
-            std::shared_ptr<kp::Sequence> sqTensor =
-              mgr.sequence();
-
-            sqTensor->begin();
-            sqTensor->record<kp::OpTensorCreate>(params);
-            sqTensor->end();
-            sqTensor->eval();
+            mgr.rebuild(params);
 
             std::shared_ptr<kp::Sequence> sq = mgr.sequence();
 
diff --git a/examples/logistic_regression/src/Main.cpp b/examples/logistic_regression/src/Main.cpp
index 1efbe83b..14664a56 100755
--- a/examples/logistic_regression/src/Main.cpp
+++ b/examples/logistic_regression/src/Main.cpp
@@ -35,13 +35,7 @@ int main()
 
     kp::Manager mgr;
 
-    std::shared_ptr<kp::Sequence> sqTensor =
-      mgr.sequence();
-
-    sqTensor->begin();
-    sqTensor->record<kp::OpTensorCreate>(params);
-    sqTensor->end();
-    sqTensor->eval();
+    mgr.rebuild(params);
 
     std::shared_ptr<kp::Sequence> sq = mgr.sequence();