Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resolve moving all functions from tensor HPP to CPP #186

Merged
merged 5 commits into from
Mar 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 68 additions & 86 deletions single_include/kompute/Kompute.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -910,35 +910,39 @@ class Tensor
void recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer);

/**
* Records the buffer memory barrier into the primary buffer and command buffer which
* ensures that relevant data transfers are carried out correctly.
* Records the buffer memory barrier into the primary buffer and command
* buffer which ensures that relevant data transfers are carried out
* correctly.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
* @param srcAccessMask Access flags for source access mask
* @param dstAccessMask Access flags for destination access mask
* @param scrStageMask Pipeline stage flags for source stage mask
* @param dstStageMask Pipeline stage flags for destination stage mask
*/
void recordPrimaryBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask);
void recordPrimaryBufferMemoryBarrier(
const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask);
/**
* Records the buffer memory barrier into the staging buffer and command buffer which
* ensures that relevant data transfers are carried out correctly.
* Records the buffer memory barrier into the staging buffer and command
* buffer which ensures that relevant data transfers are carried out
* correctly.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
* @param srcAccessMask Access flags for source access mask
* @param dstAccessMask Access flags for destination access mask
* @param scrStageMask Pipeline stage flags for source stage mask
* @param dstStageMask Pipeline stage flags for destination stage mask
*/
void recordStagingBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask);
void recordStagingBufferMemoryBarrier(
const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask);

/**
* Constructs a vulkan descriptor buffer info which can be used to specify
Expand All @@ -955,47 +959,74 @@ class Tensor
*
* @return Unsigned integer representing the total number of elements
*/
// TODO: move to cpp
uint32_t size() { return this->mSize; }
uint32_t size();

// TODO: move to cpp
uint32_t dataTypeMemorySize() { return this->mDataTypeMemorySize; }
/**
* Returns the total size of a single element of the respective data type
* that this tensor holds.
*
* @return Unsigned integer representing the memory of a single element of
* the respective data type.
*/
uint32_t dataTypeMemorySize();

// TODO: move to cpp
uint32_t memorySize() { return this->mSize * this->mDataTypeMemorySize; }
/**
* Returns the total memory size of the data contained by the Tensor object
* which would equate to (this->size() * this->dataTypeMemorySize())
*
* @return Unsigned integer representing the memory of a single element of
* the respective data type.
*/
uint32_t memorySize();

/**
* Retrieve the underlying data type of the Tensor
* Retrieve the data type of the tensor (host, device, storage)
*
* @return Data type of tensor of type kp::Tensor::TensorDataTypes
*/
TensorDataTypes dataType() { return this->mDataType; }
TensorDataTypes dataType();

void* rawData() { return this->mRawData; }
/**
* Retrieve the raw data via the pointer to the memory that contains the raw
* memory of this current tensor. This tensor gets changed to a nullptr when
* the Tensor is removed.
*
* @return Pointer to raw memory containing raw bytes data of Tensor.
*/
void* rawData();

// TODO: move to cpp
/**
* Sets / resets the data of the tensor which is directly done on the GPU
* host visible memory available by the tensor.
*/
void setRawData(const void* data);

/**
* Template to return the pointer data converted by specific type, which
* would be any of the supported types including float, double, int32,
* uint32 and bool.
*
* @return Pointer to raw memory containing raw bytes data of Tensor.
*/
template<typename T>
T* data()
{
return (T*)this->mRawData;
}

/**
* Template to get the data of the current tensor as a vector of specific
* type, which would be any of the supported types including float, double,
* int32, uint32 and bool.
*
* @return Vector of type provided by template.
*/
template<typename T>
std::vector<T> vector()
{
return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() };
}

/**
* Sets / resets the vector data of the tensor. This function does not
* perform any copies into GPU memory and is only performed on the host.
*/
void setRawData(const void* data)
{
// Copy data
memcpy(this->mRawData, data, this->memorySize());
}

protected:
// -------------- ALWAYS OWNED RESOURCES
TensorTypes mTensorType;
Expand All @@ -1005,57 +1036,6 @@ class Tensor
void* mRawData;

private:
void mapRawData()
{

KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");

std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;

if (this->mTensorType == TensorTypes::eHost) {
hostVisibleMemory = this->mPrimaryMemory;
} else if (this->mTensorType == TensorTypes::eDevice) {
hostVisibleMemory = this->mStagingMemory;
} else {
KP_LOG_WARN(
"Kompute Tensor mapping data not supported on storage tensor");
return;
}

vk::DeviceSize bufferSize = this->memorySize();

// Given we request coherent host memory we don't need to invalidate /
// flush
this->mRawData = this->mDevice->mapMemory(
*hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());

vk::MappedMemoryRange mappedMemoryRange(
*hostVisibleMemory, 0, bufferSize);
}

void unmapRawData()
{

KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");

std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;

if (this->mTensorType == TensorTypes::eHost) {
hostVisibleMemory = this->mPrimaryMemory;
} else if (this->mTensorType == TensorTypes::eDevice) {
hostVisibleMemory = this->mStagingMemory;
} else {
KP_LOG_WARN(
"Kompute Tensor mapping data not supported on storage tensor");
return;
}

vk::DeviceSize bufferSize = this->memorySize();
vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize);
this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
this->mDevice->unmapMemory(*hostVisibleMemory);
}

// -------------- NEVER OWNED RESOURCES
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
std::shared_ptr<vk::Device> mDevice;
Expand Down Expand Up @@ -1093,9 +1073,11 @@ class Tensor
vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags();
vk::BufferUsageFlags getStagingBufferUsageFlags();
vk::MemoryPropertyFlags getStagingMemoryPropertyFlags();

void mapRawData();
void unmapRawData();
};

// TODO: Limit T to be only float, bool, double, etc
template<typename T>
class TensorT : public Tensor
{
Expand Down
47 changes: 22 additions & 25 deletions src/OpMemoryBarrier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,20 @@
namespace kp {

OpMemoryBarrier::OpMemoryBarrier(
const std::vector<std::shared_ptr<Tensor>>& tensors,
const vk::AccessFlagBits& srcAccessMask,
const vk::AccessFlagBits& dstAccessMask,
const vk::PipelineStageFlagBits& srcStageMask,
const vk::PipelineStageFlagBits& dstStageMask,
bool barrierOnPrimary)
: mTensors(tensors),
mSrcAccessMask(srcAccessMask),
mDstAccessMask(dstAccessMask),
mSrcStageMask(srcStageMask),
mDstStageMask(dstStageMask),
mBarrierOnPrimary(barrierOnPrimary)
const std::vector<std::shared_ptr<Tensor>>& tensors,
const vk::AccessFlagBits& srcAccessMask,
const vk::AccessFlagBits& dstAccessMask,
const vk::PipelineStageFlagBits& srcStageMask,
const vk::PipelineStageFlagBits& dstStageMask,
bool barrierOnPrimary)
: mTensors(tensors)
, mSrcAccessMask(srcAccessMask)
, mDstAccessMask(dstAccessMask)
, mSrcStageMask(srcStageMask)
, mDstStageMask(dstStageMask)
, mBarrierOnPrimary(barrierOnPrimary)
{
KP_LOG_DEBUG("Kompute OpMemoryBarrier constructor");

}

OpMemoryBarrier::~OpMemoryBarrier()
Expand All @@ -35,21 +34,19 @@ OpMemoryBarrier::record(const vk::CommandBuffer& commandBuffer)
// Barrier to ensure the data is finished writing to buffer memory
if (this->mBarrierOnPrimary) {
for (const std::shared_ptr<Tensor>& tensor : this->mTensors) {
tensor->recordPrimaryBufferMemoryBarrier(
commandBuffer,
this->mSrcAccessMask,
this->mDstAccessMask,
this->mSrcStageMask,
this->mDstStageMask);
tensor->recordPrimaryBufferMemoryBarrier(commandBuffer,
this->mSrcAccessMask,
this->mDstAccessMask,
this->mSrcStageMask,
this->mDstStageMask);
}
} else {
for (const std::shared_ptr<Tensor>& tensor : this->mTensors) {
tensor->recordStagingBufferMemoryBarrier(
commandBuffer,
this->mSrcAccessMask,
this->mDstAccessMask,
this->mSrcStageMask,
this->mDstStageMask);
tensor->recordStagingBufferMemoryBarrier(commandBuffer,
this->mSrcAccessMask,
this->mDstAccessMask,
this->mSrcStageMask,
this->mDstStageMask);
}
}
}
Expand Down
7 changes: 1 addition & 6 deletions src/OpTensorCopy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@ OpTensorCopy::record(const vk::CommandBuffer& commandBuffer)

// We iterate from the second tensor onwards and record a copy to all
for (size_t i = 1; i < this->mTensors.size(); i++) {
this->mTensors[i]->recordCopyFrom(
commandBuffer, this->mTensors[0]);
this->mTensors[i]->recordCopyFrom(commandBuffer, this->mTensors[0]);
}
}

Expand All @@ -60,10 +59,6 @@ OpTensorCopy::postEval(const vk::CommandBuffer& commandBuffer)
{
KP_LOG_DEBUG("Kompute OpTensorCopy postEval called");

// TODO: Simplify with a copyRawData
uint32_t size = this->mTensors[0]->size();
uint32_t dataTypeMemSize = this->mTensors[0]->dataTypeMemorySize();
uint32_t memSize = size * dataTypeMemSize;
void* data = this->mTensors[0]->rawData();

// Copy the data from the first tensor into all the tensors
Expand Down
22 changes: 12 additions & 10 deletions src/OpTensorSyncLocal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,21 @@ OpTensorSyncLocal::record(const vk::CommandBuffer& commandBuffer)
for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {

this->mTensors[i]->recordPrimaryBufferMemoryBarrier(commandBuffer,
vk::AccessFlagBits::eShaderWrite,
vk::AccessFlagBits::eTransferRead,
vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eTransfer);
this->mTensors[i]->recordPrimaryBufferMemoryBarrier(
commandBuffer,
vk::AccessFlagBits::eShaderWrite,
vk::AccessFlagBits::eTransferRead,
vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eTransfer);

this->mTensors[i]->recordCopyFromDeviceToStaging(commandBuffer);

this->mTensors[i]->recordPrimaryBufferMemoryBarrier(commandBuffer,
vk::AccessFlagBits::eTransferWrite,
vk::AccessFlagBits::eHostRead,
vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eHost);
this->mTensors[i]->recordPrimaryBufferMemoryBarrier(
commandBuffer,
vk::AccessFlagBits::eTransferWrite,
vk::AccessFlagBits::eHostRead,
vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eHost);
}
}
}
Expand Down
Loading