From 1088ee893e923d27a292e2569bdbe2a4b568a7c1 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sat, 13 Mar 2021 17:00:29 +0000
Subject: [PATCH 1/5] Moved all functions from hpp to cpp in tensor

---
 src/Tensor.cpp                 |  89 ++++++++++++++++++++++++
 src/include/kompute/Tensor.hpp | 119 +++++++++++++--------------------
 2 files changed, 137 insertions(+), 71 deletions(-)
diff --git a/src/Tensor.cpp b/src/Tensor.cpp
index c1d391fd..aa3584dc 100644
--- a/src/Tensor.cpp
+++ b/src/Tensor.cpp
@@ -70,6 +70,95 @@ Tensor::isInit()
            this->mRawData;
 }
 
+uint32_t
+Tensor::size()
+{
+    return this->mSize;
+}
+
+uint32_t
+Tensor::dataTypeMemorySize()
+{
+    return this->mDataTypeMemorySize;
+}
+
+uint32_t
+Tensor::memorySize()
+{
+    return this->mSize * this->mDataTypeMemorySize;
+}
+
+kp::Tensor::TensorDataTypes
+Tensor::dataType()
+{
+    return this->mDataType;
+}
+
+void*
+Tensor::rawData()
+{
+    return this->mRawData;
+}
+
+void
+Tensor::setRawData(const void* data)
+{
+    memcpy(this->mRawData, data, this->memorySize());
+}
+
+void
+Tensor::mapRawData()
+{
+
+    KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
+
+    std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
+
+    if (this->mTensorType == TensorTypes::eHost) {
+        hostVisibleMemory = this->mPrimaryMemory;
+    } else if (this->mTensorType == TensorTypes::eDevice) {
+        hostVisibleMemory = this->mStagingMemory;
+    } else {
+        KP_LOG_WARN(
+          "Kompute Tensor mapping data not supported on storage tensor");
+        return;
+    }
+
+    vk::DeviceSize bufferSize = this->memorySize();
+
+    // Given we request coherent host memory we don't need to invalidate /
+    // flush
+    this->mRawData = this->mDevice->mapMemory(
+      *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
+
+    vk::MappedMemoryRange mappedMemoryRange(
+      *hostVisibleMemory, 0, bufferSize);
+}
+
+void
+Tensor::unmapRawData()
+{
+
+    KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
+
+    std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
+
+    if (this->mTensorType == TensorTypes::eHost) {
+        hostVisibleMemory = this->mPrimaryMemory;
+    } else if (this->mTensorType == TensorTypes::eDevice) {
+        hostVisibleMemory = this->mStagingMemory;
+    } else {
+        KP_LOG_WARN(
+          "Kompute Tensor mapping data not supported on storage tensor");
+        return;
+    }
+
+    vk::DeviceSize bufferSize = this->memorySize();
+    vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize);
+    this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
+    this->mDevice->unmapMemory(*hostVisibleMemory);
+}
+
 void
 Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer,
                        std::shared_ptr<Tensor> copyFromTensor)
diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp
index 6cd75996..ebc9b43b 100644
--- a/src/include/kompute/Tensor.hpp
+++ b/src/include/kompute/Tensor.hpp
@@ -166,47 +166,72 @@ class Tensor
      *
      * @return Unsigned integer representing the total number of elements
      */
-    // TODO: move to cpp
-    uint32_t size() { return this->mSize; }
+    uint32_t size();
 
-    // TODO: move to cpp
-    uint32_t dataTypeMemorySize() { return this->mDataTypeMemorySize; }
+    /**
+     * Returns the total size of a single element of the respective data type
+     * that this tensor holds.
+     *
+     * @return Unsigned integer representing the memory of a single element of the
+     * respective data type.
+     */
+    uint32_t dataTypeMemorySize();
 
-    // TODO: move to cpp
-    uint32_t memorySize() { return this->mSize * this->mDataTypeMemorySize; }
+    /**
+     * Returns the total memory size of the data contained by the Tensor object which
+     * would equate to (this->size() * this->dataTypeMemorySize())
+     *
+     * @return Unsigned integer representing the memory of a single element of the
+     * respective data type.
+     */
+    uint32_t memorySize();
 
     /**
-     * Retrieve the underlying data type of the Tensor
+     * Retrieve the data type of the tensor (host, device, storage)
      *
      * @return Data type of tensor of type kp::Tensor::TensorDataTypes
      */
-    TensorDataTypes dataType() { return this->mDataType; }
+    TensorDataTypes dataType();
 
-    void* rawData() { return this->mRawData; }
+    /**
+     * Retrieve the raw data via the pointer to the memory that contains the raw memory
+     * of this current tensor. This tensor gets changed to a nullptr when the Tensor is 
+     * removed.
+     *
+     * @return Pointer to raw memory containing raw bytes data of Tensor.
+     */
+    void* rawData();
 
-    // TODO: move to cpp
+    /**
+     * Sets / resets the data of the tensor which is directly done on the GPU host visible
+     * memory available by the tensor.
+     */
+    void setRawData(const void* data);
+
+    /**
+     * Template to return the pointer data converted by specific type, which would be
+     * any of the supported types including float, double, int32, uint32 and bool.
+     *
+     * @return Pointer to raw memory containing raw bytes data of Tensor.
+     */
     template<typename T>
     T* data()
     {
         return (T*)this->mRawData;
     }
 
+    /**
+     * Template to get the data of the current tensor as a vector of specific type, which would be
+     * any of the supported types including float, double, int32, uint32 and bool.
+     *
+     * @return Vector of type provided by template.
+     */
     template<typename T>
     std::vector<T> vector()
     {
         return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() };
     }
 
-    /**
-     * Sets / resets the vector data of the tensor. This function does not
-     * perform any copies into GPU memory and is only performed on the host.
-     */
-    void setRawData(const void* data)
-    {
-        // Copy data
-        memcpy(this->mRawData, data, this->memorySize());
-    }
-
   protected:
     // -------------- ALWAYS OWNED RESOURCES
     TensorTypes mTensorType;
@@ -216,56 +241,6 @@ class Tensor
     void* mRawData;
 
   private:
-    void mapRawData()
-    {
-
-        KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
-
-        std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
-
-        if (this->mTensorType == TensorTypes::eHost) {
-            hostVisibleMemory = this->mPrimaryMemory;
-        } else if (this->mTensorType == TensorTypes::eDevice) {
-            hostVisibleMemory = this->mStagingMemory;
-        } else {
-            KP_LOG_WARN(
-              "Kompute Tensor mapping data not supported on storage tensor");
-            return;
-        }
-
-        vk::DeviceSize bufferSize = this->memorySize();
-
-        // Given we request coherent host memory we don't need to invalidate /
-        // flush
-        this->mRawData = this->mDevice->mapMemory(
-          *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
-
-        vk::MappedMemoryRange mappedMemoryRange(
-          *hostVisibleMemory, 0, bufferSize);
-    }
-
-    void unmapRawData()
-    {
-
-        KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
-
-        std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
-
-        if (this->mTensorType == TensorTypes::eHost) {
-            hostVisibleMemory = this->mPrimaryMemory;
-        } else if (this->mTensorType == TensorTypes::eDevice) {
-            hostVisibleMemory = this->mStagingMemory;
-        } else {
-            KP_LOG_WARN(
-              "Kompute Tensor mapping data not supported on storage tensor");
-            return;
-        }
-
-        vk::DeviceSize bufferSize = this->memorySize();
-        vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize);
-        this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
-        this->mDevice->unmapMemory(*hostVisibleMemory);
-    }
 
     // -------------- NEVER OWNED RESOURCES
     std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
@@ -304,9 +279,11 @@ class Tensor
     vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags();
     vk::BufferUsageFlags getStagingBufferUsageFlags();
     vk::MemoryPropertyFlags getStagingMemoryPropertyFlags();
+
+    void mapRawData();
+    void unmapRawData();
 };
 
-// TODO: Limit T to be only float, bool, double, etc
 template<typename T>
 class TensorT : public Tensor
 {

From 53ae5c2d0a67d31b321a1728330b28ccc99fdb5c Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sat, 13 Mar 2021 17:02:06 +0000
Subject: [PATCH 2/5] Updated kompute

---
 single_include/kompute/Kompute.hpp | 119 ++++++++++++-----------------
 1 file changed, 48 insertions(+), 71 deletions(-)

diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index 17f6cb24..fa93d229 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -955,47 +955,72 @@ class Tensor
      *
      * @return Unsigned integer representing the total number of elements
      */
-    // TODO: move to cpp
-    uint32_t size() { return this->mSize; }
+    uint32_t size();
 
-    // TODO: move to cpp
-    uint32_t dataTypeMemorySize() { return this->mDataTypeMemorySize; }
+    /**
+     * Returns the total size of a single element of the respective data type
+     * that this tensor holds.
+     *
+     * @return Unsigned integer representing the memory of a single element of the
+     * respective data type.
+     */
+    uint32_t dataTypeMemorySize();
 
-    // TODO: move to cpp
-    uint32_t memorySize() { return this->mSize * this->mDataTypeMemorySize; }
+    /**
+     * Returns the total memory size of the data contained by the Tensor object which
+     * would equate to (this->size() * this->dataTypeMemorySize())
+     *
+     * @return Unsigned integer representing the memory of a single element of the
+     * respective data type.
+     */
+    uint32_t memorySize();
 
     /**
-     * Retrieve the underlying data type of the Tensor
+     * Retrieve the data type of the tensor (host, device, storage)
      *
      * @return Data type of tensor of type kp::Tensor::TensorDataTypes
      */
-    TensorDataTypes dataType() { return this->mDataType; }
+    TensorDataTypes dataType();
 
-    void* rawData() { return this->mRawData; }
+    /**
+     * Retrieve the raw data via the pointer to the memory that contains the raw memory
+     * of this current tensor. This tensor gets changed to a nullptr when the Tensor is 
+     * removed.
+     *
+     * @return Pointer to raw memory containing raw bytes data of Tensor.
+     */
+    void* rawData();
 
-    // TODO: move to cpp
+    /**
+     * Sets / resets the data of the tensor which is directly done on the GPU host visible
+     * memory available by the tensor.
+     */
+    void setRawData(const void* data);
+
+    /**
+     * Template to return the pointer data converted by specific type, which would be
+     * any of the supported types including float, double, int32, uint32 and bool.
+     *
+     * @return Pointer to raw memory containing raw bytes data of Tensor.
+     */
     template<typename T>
     T* data()
     {
         return (T*)this->mRawData;
     }
 
+    /**
+     * Template to get the data of the current tensor as a vector of specific type, which would be
+     * any of the supported types including float, double, int32, uint32 and bool.
+     *
+     * @return Vector of type provided by template.
+     */
     template<typename T>
     std::vector<T> vector()
     {
         return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() };
     }
 
-    /**
-     * Sets / resets the vector data of the tensor. This function does not
-     * perform any copies into GPU memory and is only performed on the host.
-     */
-    void setRawData(const void* data)
-    {
-        // Copy data
-        memcpy(this->mRawData, data, this->memorySize());
-    }
-
   protected:
     // -------------- ALWAYS OWNED RESOURCES
     TensorTypes mTensorType;
@@ -1005,56 +1030,6 @@ class Tensor
     void* mRawData;
 
   private:
-    void mapRawData()
-    {
-
-        KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
-
-        std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
-
-        if (this->mTensorType == TensorTypes::eHost) {
-            hostVisibleMemory = this->mPrimaryMemory;
-        } else if (this->mTensorType == TensorTypes::eDevice) {
-            hostVisibleMemory = this->mStagingMemory;
-        } else {
-            KP_LOG_WARN(
-              "Kompute Tensor mapping data not supported on storage tensor");
-            return;
-        }
-
-        vk::DeviceSize bufferSize = this->memorySize();
-
-        // Given we request coherent host memory we don't need to invalidate /
-        // flush
-        this->mRawData = this->mDevice->mapMemory(
-          *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
-
-        vk::MappedMemoryRange mappedMemoryRange(
-          *hostVisibleMemory, 0, bufferSize);
-    }
-
-    void unmapRawData()
-    {
-
-        KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");
-
-        std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;
-
-        if (this->mTensorType == TensorTypes::eHost) {
-            hostVisibleMemory = this->mPrimaryMemory;
-        } else if (this->mTensorType == TensorTypes::eDevice) {
-            hostVisibleMemory = this->mStagingMemory;
-        } else {
-            KP_LOG_WARN(
-              "Kompute Tensor mapping data not supported on storage tensor");
-            return;
-        }
-
-        vk::DeviceSize bufferSize = this->memorySize();
-        vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize);
-        this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
-        this->mDevice->unmapMemory(*hostVisibleMemory);
-    }
 
     // -------------- NEVER OWNED RESOURCES
     std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
@@ -1093,9 +1068,11 @@ class Tensor
     vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags();
     vk::BufferUsageFlags getStagingBufferUsageFlags();
     vk::MemoryPropertyFlags getStagingMemoryPropertyFlags();
+
+    void mapRawData();
+    void unmapRawData();
 };
 
-// TODO: Limit T to be only float, bool, double, etc
 template<typename T>
 class TensorT : public Tensor
 {

From ea3a3039845c7b5c46d5cb290110feedccb82a57 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sat, 13 Mar 2021 17:02:23 +0000
Subject: [PATCH 3/5] Reformat

---
 src/OpMemoryBarrier.cpp        | 47 ++++++++++++------------
 src/OpTensorCopy.cpp           |  3 +-
 src/OpTensorSyncLocal.cpp      | 22 ++++++------
 src/Tensor.cpp                 | 39 ++++++++++----------
 src/include/kompute/Tensor.hpp | 66 ++++++++++++++++++----------------
 5 files changed, 89 insertions(+), 88 deletions(-)

diff --git a/src/OpMemoryBarrier.cpp b/src/OpMemoryBarrier.cpp
index 9e45d0c4..09a91f06 100644
--- a/src/OpMemoryBarrier.cpp
+++ b/src/OpMemoryBarrier.cpp
@@ -5,21 +5,20 @@
 namespace kp {
 
 OpMemoryBarrier::OpMemoryBarrier(
-            const std::vector<std::shared_ptr<Tensor>>& tensors,
-            const vk::AccessFlagBits& srcAccessMask,
-            const vk::AccessFlagBits& dstAccessMask,
-            const vk::PipelineStageFlagBits& srcStageMask,
-            const vk::PipelineStageFlagBits& dstStageMask,
-            bool barrierOnPrimary)
-    : mTensors(tensors),
-      mSrcAccessMask(srcAccessMask),
-      mDstAccessMask(dstAccessMask),
-      mSrcStageMask(srcStageMask),
-      mDstStageMask(dstStageMask),
-      mBarrierOnPrimary(barrierOnPrimary)
+  const std::vector<std::shared_ptr<Tensor>>& tensors,
+  const vk::AccessFlagBits& srcAccessMask,
+  const vk::AccessFlagBits& dstAccessMask,
+  const vk::PipelineStageFlagBits& srcStageMask,
+  const vk::PipelineStageFlagBits& dstStageMask,
+  bool barrierOnPrimary)
+  : mTensors(tensors)
+  , mSrcAccessMask(srcAccessMask)
+  , mDstAccessMask(dstAccessMask)
+  , mSrcStageMask(srcStageMask)
+  , mDstStageMask(dstStageMask)
+  , mBarrierOnPrimary(barrierOnPrimary)
 {
     KP_LOG_DEBUG("Kompute OpMemoryBarrier constructor");
-
 }
 
 OpMemoryBarrier::~OpMemoryBarrier()
@@ -35,21 +34,19 @@ OpMemoryBarrier::record(const vk::CommandBuffer& commandBuffer)
     // Barrier to ensure the data is finished writing to buffer memory
     if (this->mBarrierOnPrimary) {
         for (const std::shared_ptr<Tensor>& tensor : this->mTensors) {
-            tensor->recordPrimaryBufferMemoryBarrier(
-              commandBuffer,
-              this->mSrcAccessMask,
-              this->mDstAccessMask,
-              this->mSrcStageMask,
-              this->mDstStageMask);
+            tensor->recordPrimaryBufferMemoryBarrier(commandBuffer,
+                                                     this->mSrcAccessMask,
+                                                     this->mDstAccessMask,
+                                                     this->mSrcStageMask,
+                                                     this->mDstStageMask);
         }
     } else {
         for (const std::shared_ptr<Tensor>& tensor : this->mTensors) {
-            tensor->recordStagingBufferMemoryBarrier(
-              commandBuffer,
-              this->mSrcAccessMask,
-              this->mDstAccessMask,
-              this->mSrcStageMask,
-              this->mDstStageMask);
+            tensor->recordStagingBufferMemoryBarrier(commandBuffer,
+                                                     this->mSrcAccessMask,
+                                                     this->mDstAccessMask,
+                                                     this->mSrcStageMask,
+                                                     this->mDstStageMask);
         }
     }
 }
diff --git a/src/OpTensorCopy.cpp b/src/OpTensorCopy.cpp
index 13e189a5..b78dd520 100644
--- a/src/OpTensorCopy.cpp
+++ b/src/OpTensorCopy.cpp
@@ -44,8 +44,7 @@ OpTensorCopy::record(const vk::CommandBuffer& commandBuffer)
 
     // We iterate from the second tensor onwards and record a copy to all
     for (size_t i = 1; i < this->mTensors.size(); i++) {
-        this->mTensors[i]->recordCopyFrom(
-          commandBuffer, this->mTensors[0]);
+        this->mTensors[i]->recordCopyFrom(commandBuffer, this->mTensors[0]);
     }
 }
 
diff --git a/src/OpTensorSyncLocal.cpp b/src/OpTensorSyncLocal.cpp
index 5e653154..fc3e0b93 100644
--- a/src/OpTensorSyncLocal.cpp
+++ b/src/OpTensorSyncLocal.cpp
@@ -31,19 +31,21 @@ OpTensorSyncLocal::record(const vk::CommandBuffer& commandBuffer)
     for (size_t i = 0; i < this->mTensors.size(); i++) {
         if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
 
-            this->mTensors[i]->recordPrimaryBufferMemoryBarrier(commandBuffer,
-                                        vk::AccessFlagBits::eShaderWrite,
-                                        vk::AccessFlagBits::eTransferRead,
-                                        vk::PipelineStageFlagBits::eComputeShader,
-                                        vk::PipelineStageFlagBits::eTransfer);
+            this->mTensors[i]->recordPrimaryBufferMemoryBarrier(
+              commandBuffer,
+              vk::AccessFlagBits::eShaderWrite,
+              vk::AccessFlagBits::eTransferRead,
+              vk::PipelineStageFlagBits::eComputeShader,
+              vk::PipelineStageFlagBits::eTransfer);
 
             this->mTensors[i]->recordCopyFromDeviceToStaging(commandBuffer);
 
-            this->mTensors[i]->recordPrimaryBufferMemoryBarrier(commandBuffer,
-                                        vk::AccessFlagBits::eTransferWrite,
-                                        vk::AccessFlagBits::eHostRead,
-                                        vk::PipelineStageFlagBits::eTransfer,
-                                        vk::PipelineStageFlagBits::eHost);
+            this->mTensors[i]->recordPrimaryBufferMemoryBarrier(
+              commandBuffer,
+              vk::AccessFlagBits::eTransferWrite,
+              vk::AccessFlagBits::eHostRead,
+              vk::PipelineStageFlagBits::eTransfer,
+              vk::PipelineStageFlagBits::eHost);
         }
     }
 }
diff --git a/src/Tensor.cpp b/src/Tensor.cpp
index aa3584dc..601d2f62 100644
--- a/src/Tensor.cpp
+++ b/src/Tensor.cpp
@@ -131,8 +131,7 @@ Tensor::mapRawData()
     this->mRawData = this->mDevice->mapMemory(
       *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());
 
-    vk::MappedMemoryRange mappedMemoryRange(
-      *hostVisibleMemory, 0, bufferSize);
+    vk::MappedMemoryRange mappedMemoryRange(*hostVisibleMemory, 0, bufferSize);
 }
 
 void
@@ -219,36 +218,36 @@ Tensor::recordCopyBuffer(const vk::CommandBuffer& commandBuffer,
 
 void
 Tensor::recordPrimaryBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
-                                  vk::AccessFlagBits srcAccessMask,
-                                  vk::AccessFlagBits dstAccessMask,
-                                  vk::PipelineStageFlagBits srcStageMask,
-                                  vk::PipelineStageFlagBits dstStageMask)
+                                         vk::AccessFlagBits srcAccessMask,
+                                         vk::AccessFlagBits dstAccessMask,
+                                         vk::PipelineStageFlagBits srcStageMask,
+                                         vk::PipelineStageFlagBits dstStageMask)
 {
     KP_LOG_DEBUG("Kompute Tensor recording PRIMARY buffer memory barrier");
 
     this->recordBufferMemoryBarrier(commandBuffer,
-            *this->mPrimaryBuffer,
-            srcAccessMask,
-            dstAccessMask,
-            srcStageMask,
-            dstStageMask);
+                                    *this->mPrimaryBuffer,
+                                    srcAccessMask,
+                                    dstAccessMask,
+                                    srcStageMask,
+                                    dstStageMask);
 }
 
 void
 Tensor::recordStagingBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
-                                  vk::AccessFlagBits srcAccessMask,
-                                  vk::AccessFlagBits dstAccessMask,
-                                  vk::PipelineStageFlagBits srcStageMask,
-                                  vk::PipelineStageFlagBits dstStageMask)
+                                         vk::AccessFlagBits srcAccessMask,
+                                         vk::AccessFlagBits dstAccessMask,
+                                         vk::PipelineStageFlagBits srcStageMask,
+                                         vk::PipelineStageFlagBits dstStageMask)
 {
     KP_LOG_DEBUG("Kompute Tensor recording PRIMARY buffer memory barrier");
 
     this->recordBufferMemoryBarrier(commandBuffer,
-            *this->mStagingBuffer,
-            srcAccessMask,
-            dstAccessMask,
-            srcStageMask,
-            dstStageMask);
+                                    *this->mStagingBuffer,
+                                    srcAccessMask,
+                                    dstAccessMask,
+                                    srcStageMask,
+                                    dstStageMask);
 }
 
 void
diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp
index ebc9b43b..db274517 100644
--- a/src/include/kompute/Tensor.hpp
+++ b/src/include/kompute/Tensor.hpp
@@ -120,8 +120,9 @@ class Tensor
     void recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer);
 
     /**
-     * Records the buffer memory barrier into the primary buffer and command buffer which
-     * ensures that relevant data transfers are carried out correctly.
+     * Records the buffer memory barrier into the primary buffer and command
+     * buffer which ensures that relevant data transfers are carried out
+     * correctly.
      *
      * @param commandBuffer Vulkan Command Buffer to record the commands into
      * @param srcAccessMask Access flags for source access mask
@@ -129,14 +130,16 @@ class Tensor
      * @param scrStageMask Pipeline stage flags for source stage mask
      * @param dstStageMask Pipeline stage flags for destination stage mask
      */
-    void recordPrimaryBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
-                                          vk::AccessFlagBits srcAccessMask,
-                                          vk::AccessFlagBits dstAccessMask,
-                                          vk::PipelineStageFlagBits srcStageMask,
-                                          vk::PipelineStageFlagBits dstStageMask);
+    void recordPrimaryBufferMemoryBarrier(
+      const vk::CommandBuffer& commandBuffer,
+      vk::AccessFlagBits srcAccessMask,
+      vk::AccessFlagBits dstAccessMask,
+      vk::PipelineStageFlagBits srcStageMask,
+      vk::PipelineStageFlagBits dstStageMask);
     /**
-     * Records the buffer memory barrier into the staging buffer and command buffer which
-     * ensures that relevant data transfers are carried out correctly.
+     * Records the buffer memory barrier into the staging buffer and command
+     * buffer which ensures that relevant data transfers are carried out
+     * correctly.
      *
      * @param commandBuffer Vulkan Command Buffer to record the commands into
      * @param srcAccessMask Access flags for source access mask
@@ -144,12 +147,12 @@ class Tensor
      * @param scrStageMask Pipeline stage flags for source stage mask
      * @param dstStageMask Pipeline stage flags for destination stage mask
      */
-    void recordStagingBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
-                                          vk::AccessFlagBits srcAccessMask,
-                                          vk::AccessFlagBits dstAccessMask,
-                                          vk::PipelineStageFlagBits srcStageMask,
-                                          vk::PipelineStageFlagBits dstStageMask);
-
+    void recordStagingBufferMemoryBarrier(
+      const vk::CommandBuffer& commandBuffer,
+      vk::AccessFlagBits srcAccessMask,
+      vk::AccessFlagBits dstAccessMask,
+      vk::PipelineStageFlagBits srcStageMask,
+      vk::PipelineStageFlagBits dstStageMask);
 
     /**
      * Constructs a vulkan descriptor buffer info which can be used to specify
@@ -172,17 +175,17 @@ class Tensor
      * Returns the total size of a single element of the respective data type
      * that this tensor holds.
      *
-     * @return Unsigned integer representing the memory of a single element of the
-     * respective data type.
+     * @return Unsigned integer representing the memory of a single element of
+     * the respective data type.
      */
     uint32_t dataTypeMemorySize();
 
     /**
-     * Returns the total memory size of the data contained by the Tensor object which
-     * would equate to (this->size() * this->dataTypeMemorySize())
+     * Returns the total memory size of the data contained by the Tensor object
+     * which would equate to (this->size() * this->dataTypeMemorySize())
      *
-     * @return Unsigned integer representing the memory of a single element of the
-     * respective data type.
+     * @return Unsigned integer representing the memory of a single element of
+     * the respective data type.
      */
     uint32_t memorySize();
 
@@ -194,23 +197,24 @@ class Tensor
     TensorDataTypes dataType();
 
     /**
-     * Retrieve the raw data via the pointer to the memory that contains the raw memory
-     * of this current tensor. This tensor gets changed to a nullptr when the Tensor is 
-     * removed.
+     * Retrieve the raw data via the pointer to the memory that contains the raw
+     * memory of this current tensor. This tensor gets changed to a nullptr when
+     * the Tensor is removed.
      *
      * @return Pointer to raw memory containing raw bytes data of Tensor.
      */
     void* rawData();
 
     /**
-     * Sets / resets the data of the tensor which is directly done on the GPU host visible
-     * memory available by the tensor.
+     * Sets / resets the data of the tensor which is directly done on the GPU
+     * host visible memory available by the tensor.
      */
     void setRawData(const void* data);
 
     /**
-     * Template to return the pointer data converted by specific type, which would be
-     * any of the supported types including float, double, int32, uint32 and bool.
+     * Template to return the pointer data converted by specific type, which
+     * would be any of the supported types including float, double, int32,
+     * uint32 and bool.
      *
      * @return Pointer to raw memory containing raw bytes data of Tensor.
      */
@@ -221,8 +225,9 @@ class Tensor
     }
 
     /**
-     * Template to get the data of the current tensor as a vector of specific type, which would be
-     * any of the supported types including float, double, int32, uint32 and bool.
+     * Template to get the data of the current tensor as a vector of specific
+     * type, which would be any of the supported types including float, double,
+     * int32, uint32 and bool.
      *
      * @return Vector of type provided by template.
      */
@@ -241,7 +246,6 @@ class Tensor
     void* mRawData;
 
   private:
-
     // -------------- NEVER OWNED RESOURCES
     std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
     std::shared_ptr<vk::Device> mDevice;

From 156e3b4964ebd5b2968f564477a11697f2f96106 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sat, 13 Mar 2021 17:03:41 +0000
Subject: [PATCH 4/5] Reformat kompute

---
 single_include/kompute/Kompute.hpp | 65 ++++++++++++++++--------------
 1 file changed, 35 insertions(+), 30 deletions(-)

diff --git a/single_include/kompute/Kompute.hpp b/single_include/kompute/Kompute.hpp
index fa93d229..cd313e6e 100755
--- a/single_include/kompute/Kompute.hpp
+++ b/single_include/kompute/Kompute.hpp
@@ -910,8 +910,9 @@ class Tensor
     void recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer);
 
     /**
-     * Records the buffer memory barrier into the primary buffer and command buffer which
-     * ensures that relevant data transfers are carried out correctly.
+     * Records the buffer memory barrier into the primary buffer and command
+     * buffer which ensures that relevant data transfers are carried out
+     * correctly.
      *
      * @param commandBuffer Vulkan Command Buffer to record the commands into
      * @param srcAccessMask Access flags for source access mask
@@ -919,14 +920,16 @@ class Tensor
      * @param scrStageMask Pipeline stage flags for source stage mask
      * @param dstStageMask Pipeline stage flags for destination stage mask
      */
-    void recordPrimaryBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
-                                          vk::AccessFlagBits srcAccessMask,
-                                          vk::AccessFlagBits dstAccessMask,
-                                          vk::PipelineStageFlagBits srcStageMask,
-                                          vk::PipelineStageFlagBits dstStageMask);
+    void recordPrimaryBufferMemoryBarrier(
+      const vk::CommandBuffer& commandBuffer,
+      vk::AccessFlagBits srcAccessMask,
+      vk::AccessFlagBits dstAccessMask,
+      vk::PipelineStageFlagBits srcStageMask,
+      vk::PipelineStageFlagBits dstStageMask);
     /**
-     * Records the buffer memory barrier into the staging buffer and command buffer which
-     * ensures that relevant data transfers are carried out correctly.
+     * Records the buffer memory barrier into the staging buffer and command
+     * buffer which ensures that relevant data transfers are carried out
+     * correctly.
      *
      * @param commandBuffer Vulkan Command Buffer to record the commands into
      * @param srcAccessMask Access flags for source access mask
@@ -934,11 +937,12 @@ class Tensor
      * @param scrStageMask Pipeline stage flags for source stage mask
      * @param dstStageMask Pipeline stage flags for destination stage mask
      */
-    void recordStagingBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
-                                          vk::AccessFlagBits srcAccessMask,
-                                          vk::AccessFlagBits dstAccessMask,
-                                          vk::PipelineStageFlagBits srcStageMask,
-                                          vk::PipelineStageFlagBits dstStageMask);
+    void recordStagingBufferMemoryBarrier(
+      const vk::CommandBuffer& commandBuffer,
+      vk::AccessFlagBits srcAccessMask,
+      vk::AccessFlagBits dstAccessMask,
+      vk::PipelineStageFlagBits srcStageMask,
+      vk::PipelineStageFlagBits dstStageMask);
 
     /**
      * Constructs a vulkan descriptor buffer info which can be used to specify
@@ -961,17 +965,17 @@ class Tensor
      * Returns the total size of a single element of the respective data type
      * that this tensor holds.
      *
-     * @return Unsigned integer representing the memory of a single element of the
-     * respective data type.
+     * @return Unsigned integer representing the memory of a single element of
+     * the respective data type.
      */
     uint32_t dataTypeMemorySize();
 
     /**
-     * Returns the total memory size of the data contained by the Tensor object which
-     * would equate to (this->size() * this->dataTypeMemorySize())
+     * Returns the total memory size of the data contained by the Tensor object
+     * which would equate to (this->size() * this->dataTypeMemorySize())
      *
-     * @return Unsigned integer representing the memory of a single element of the
-     * respective data type.
+     * @return Unsigned integer representing the memory of a single element of
+     * the respective data type.
      */
     uint32_t memorySize();
 
@@ -983,23 +987,24 @@ class Tensor
     TensorDataTypes dataType();
 
     /**
-     * Retrieve the raw data via the pointer to the memory that contains the raw memory
-     * of this current tensor. This tensor gets changed to a nullptr when the Tensor is 
-     * removed.
+     * Retrieve the raw data via the pointer to the memory that contains the raw
+     * memory of this current tensor. This tensor gets changed to a nullptr when
+     * the Tensor is removed.
      *
      * @return Pointer to raw memory containing raw bytes data of Tensor.
      */
     void* rawData();
 
     /**
-     * Sets / resets the data of the tensor which is directly done on the GPU host visible
-     * memory available by the tensor.
+     * Sets / resets the data of the tensor which is directly done on the GPU
+     * host visible memory available by the tensor.
      */
     void setRawData(const void* data);
 
     /**
-     * Template to return the pointer data converted by specific type, which would be
-     * any of the supported types including float, double, int32, uint32 and bool.
+     * Template to return the pointer data converted by specific type, which
+     * would be any of the supported types including float, double, int32,
+     * uint32 and bool.
      *
      * @return Pointer to raw memory containing raw bytes data of Tensor.
      */
@@ -1010,8 +1015,9 @@ class Tensor
     }
 
     /**
-     * Template to get the data of the current tensor as a vector of specific type, which would be
-     * any of the supported types including float, double, int32, uint32 and bool.
+     * Template to get the data of the current tensor as a vector of specific
+     * type, which would be any of the supported types including float, double,
+     * int32, uint32 and bool.
      *
      * @return Vector of type provided by template.
      */
@@ -1030,7 +1036,6 @@ class Tensor
     void* mRawData;
 
   private:
-
     // -------------- NEVER OWNED RESOURCES
     std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
     std::shared_ptr<vk::Device> mDevice;

From 7d2c7825ffe3e8eb7c1880e68e40622b094b2663 Mon Sep 17 00:00:00 2001
From: Alejandro Saucedo <axsauze@gmail.com>
Date: Sat, 13 Mar 2021 17:06:37 +0000
Subject: [PATCH 5/5] Removed unused code in optensorcopy

---
 src/OpTensorCopy.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/OpTensorCopy.cpp b/src/OpTensorCopy.cpp
index b78dd520..4438f8d6 100644
--- a/src/OpTensorCopy.cpp
+++ b/src/OpTensorCopy.cpp
@@ -59,10 +59,6 @@ OpTensorCopy::postEval(const vk::CommandBuffer& commandBuffer)
 {
     KP_LOG_DEBUG("Kompute OpTensorCopy postEval called");
 
-    // TODO: Simplify with a copyRawData
-    uint32_t size = this->mTensors[0]->size();
-    uint32_t dataTypeMemSize = this->mTensors[0]->dataTypeMemorySize();
-    uint32_t memSize = size * dataTypeMemSize;
     void* data = this->mTensors[0]->rawData();
 
     // Copy the data from the first tensor into all the tensors