Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

eStorage tensor fix, eval clears operations in sequence, added one test and modified tests that were broken #304

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
[submodule "external/Vulkan-Headers"]
path = external/Vulkan-Headers
url = https://github.com/KhronosGroup/Vulkan-Headers
branch = v1.2.158
branch = v1.3.227
[submodule "external/spdlog"]
path = external/spdlog
url = https://github.com/gabime/spdlog
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM nvidia/vulkan:1.1.121
FROM nvidia/vulkan:1.3-470

RUN apt update -y
RUN apt install g++ -y
Expand Down
2 changes: 1 addition & 1 deletion docker-builders/VulkanSDK.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
FROM amd64/ubuntu:20.04

ARG VULKAN_SDK_VERSION=1.2.154.0
ARG VULKAN_SDK_VERSION=1.3.227

# First install vulkan
RUN apt-get update
Expand Down
2 changes: 1 addition & 1 deletion docs/overview/build-system.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ Compile Flags
- Sets the default api version to use for kompute api
* - -DKOMPUTE_VK_API_MAJOR_VERSION=1
- Major version to use for the Vulkan SDK
* - -DKOMPUTE_VK_API_MINOR_VERSION=1
* - -DKOMPUTE_VK_API_MINOR_VERSION=3
- Minor version to use for the Vulkan SDK
* - -DKOMPUTE_ENABLE_SPDLOG=1
- Enables the build with SPDLOG and FMT dependencies (must be installed)
Expand Down
2 changes: 2 additions & 0 deletions python/src/docstrings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -765,6 +765,8 @@ static const char *__doc_kp_Tensor_getStagingBufferUsageFlags = R"doc()doc";

static const char *__doc_kp_Tensor_getStagingMemoryPropertyFlags = R"doc()doc";

static const char *__doc_kp_Tensor_isDeviceOnlyTensor= R"doc()doc";

static const char *__doc_kp_Tensor_isInit =
R"doc(Check whether tensor is initialized based on the created gpu
resources.
Expand Down
2 changes: 1 addition & 1 deletion python/src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ PYBIND11_MODULE(kp, m) {
.def("tensor_type", &kp::Tensor::tensorType, DOC(kp, Tensor, tensorType))
.def("data_type", &kp::Tensor::dataType, DOC(kp, Tensor, dataType))
.def("is_init", &kp::Tensor::isInit, DOC(kp, Tensor, isInit))
.def("is_device_only_tensor", &kp::Tensor::isDeviceOnlyTensor, DOC(kp, Tensor, isDeviceOnlyTensor))
.def("destroy", &kp::Tensor::destroy, DOC(kp, Tensor, destroy));

py::class_<kp::Sequence, std::shared_ptr<kp::Sequence>>(m, "Sequence")
Expand Down Expand Up @@ -340,4 +341,3 @@ PYBIND11_MODULE(kp, m) {
m.attr("__version__") = "dev";
#endif
}

25 changes: 16 additions & 9 deletions python/test/test_logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,24 +72,31 @@ def compute_shader(
# Create a managed sequence
sq = mgr.sequence()

# Record operation to sync memory from local to GPU memory
sq.record(kp.OpTensorSyncDevice([tensor_w_in, tensor_b_in]))

# Record operation to execute GPU shader against all our parameters
sq.record(kp.OpAlgoDispatch(mgr.algorithm(params, compute_shader.to_spirv())))

# Record operation to sync memory from GPU to local memory
sq.record(kp.OpTensorSyncLocal([tensor_w_out_i, tensor_w_out_j, tensor_b_out, tensor_l_out]))

ITERATIONS = 100
learning_rate = 0.1

# Perform machine learning training and inference across all input X and Y
for i_iter in range(ITERATIONS):

# Execute an iteration of the algorithm
# We have to call eval after each record becasue recorded commands
# are not guaranteed to run in order

# Also now eval clears recorded operations so we have to record them again

# Record operation to sync memory from local to GPU memory
sq.record(kp.OpTensorSyncDevice([tensor_w_in, tensor_b_in]))
sq.eval()

# Record operation to execute GPU shader against all our parameters
sq.record(kp.OpAlgoDispatch(mgr.algorithm(params, compute_shader.to_spirv())))
sq.eval()

# Record operation to sync memory from GPU to local memory
sq.record(kp.OpTensorSyncLocal([tensor_w_out_i, tensor_w_out_j, tensor_b_out, tensor_l_out]))
sq.eval()


# Calculate the parameters based on the respective derivatives calculated
for j_iter in range(tensor_b_out.size()):
tensor_w_in.data()[0] -= learning_rate * tensor_w_out_i.data()[j_iter]
Expand Down
2,639 changes: 1,332 additions & 1,307 deletions single_include/kompute/Kompute.hpp

Large diffs are not rendered by default.

13 changes: 9 additions & 4 deletions src/OpTensorCopy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,16 @@ OpTensorCopy::postEval(const vk::CommandBuffer& /*commandBuffer*/)
{
KP_LOG_DEBUG("Kompute OpTensorCopy postEval called");

void* data = this->mTensors[0]->rawData();
if (!this->mTensors[0]->isDeviceOnlyTensor()) {
KP_LOG_DEBUG("Copying raw data on host memory to another tensors");
void* data = this->mTensors[0]->rawData();

// Copy the data from the first tensor into all the tensors
for (size_t i = 1; i < this->mTensors.size(); i++) {
this->mTensors[i]->setRawData(data);
// Copy the data from the first tensor into all the tensors
for (auto tensor : this->mTensors) {
if (!tensor->isDeviceOnlyTensor()) {
tensor->setRawData(data);
}
}
}
}

Expand Down
13 changes: 12 additions & 1 deletion src/Sequence.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,16 @@ Sequence::begin()
}

KP_LOG_INFO("Kompute Sequence command now started recording");
this->mCommandBuffer->begin(vk::CommandBufferBeginInfo());
const auto commandBufferBeginInfo = vk::CommandBufferBeginInfo
{
// This has to be set because otherwise chaining multiple evals in row will
// submit same work multiple times. For example:
// mgr.sequence()
// ->eval<kp::OpTensorSyncDevice>({tensor_a})
// ->eval<kp::OpTensorCopy>({tensor_a, tensor_b})
vk::CommandBufferUsageFlagBits::eOneTimeSubmit
};
this->mCommandBuffer->begin(commandBufferBeginInfo);
this->mRecording = true;

// latch the first timestamp before any commands are submitted
Expand Down Expand Up @@ -170,6 +179,8 @@ Sequence::evalAwait(uint64_t waitFor)
this->mOperations[i]->postEval(*this->mCommandBuffer);
}

this->mOperations.clear();

return shared_from_this();
}

Expand Down
22 changes: 16 additions & 6 deletions src/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,11 @@ Tensor::rebuild(void* data,
}

this->allocateMemoryCreateGPUResources();
this->mapRawData();

memcpy(this->mRawData, data, this->memorySize());
if (!this->isDeviceOnlyTensor()) {
this->mapRawData();
this->setRawData(data);
}
}

Tensor::TensorTypes
Expand All @@ -64,6 +66,12 @@ Tensor::tensorType()
return this->mTensorType;
}

bool
Tensor::isDeviceOnlyTensor()
{
return this->mTensorType == TensorTypes::eStorage;
}

bool
Tensor::isInit()
{
Expand Down Expand Up @@ -121,7 +129,7 @@ Tensor::mapRawData()
hostVisibleMemory = this->mStagingMemory;
} else {
KP_LOG_WARN(
"Kompute Tensor mapping data not supported on storage tensor");
"Kompute Tensor mapping data not supported on {} tensor", this->mTensorType);
return;
}

Expand All @@ -131,7 +139,6 @@ Tensor::mapRawData()
// flush
this->mRawData = this->mDevice->mapMemory(
*hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());

}

void
Expand All @@ -148,7 +155,7 @@ Tensor::unmapRawData()
hostVisibleMemory = this->mStagingMemory;
} else {
KP_LOG_WARN(
"Kompute Tensor mapping data not supported on storage tensor");
"Kompute Tensor mapping data not supported on {} tensor", this->mTensorType);
return;
}

Expand Down Expand Up @@ -486,7 +493,10 @@ Tensor::destroy()
}

// Unmap the current memory data
this->unmapRawData();
if (!this->isDeviceOnlyTensor()) {
this->unmapRawData();

}

if (this->mFreePrimaryBuffer) {
if (!this->mPrimaryBuffer) {
Expand Down
2 changes: 1 addition & 1 deletion src/include/kompute/Core.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ typedef std::vector<float> Constants;
#define KOMPUTE_VK_API_MAJOR_VERSION 1
#endif // KOMPUTE_VK_API_MAJOR_VERSION
#ifndef KOMPUTE_VK_API_MINOR_VERSION
#define KOMPUTE_VK_API_MINOR_VERSION 1
#define KOMPUTE_VK_API_MINOR_VERSION 3
#endif // KOMPUTE_VK_API_MINOR_VERSION
#define KOMPUTE_VK_API_VERSION \
VK_MAKE_VERSION( \
Expand Down
10 changes: 10 additions & 0 deletions src/include/kompute/Sequence.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ class Sequence : public std::enable_shared_from_this<Sequence>
/**
* Eval sends all the recorded and stored operations in the vector of
* operations into the gpu as a submit job synchronously (with a barrier).
*
* It also clears operations recorded to sequence.
*
* @return shared_ptr<Sequence> of the Sequence class itself
*/
Expand All @@ -99,6 +101,8 @@ class Sequence : public std::enable_shared_from_this<Sequence>
* Resets all the recorded and stored operations, records the operation
* provided and submits into the gpu as a submit job synchronously (with a
* barrier).
*
* It also clears operations recorded to sequence.
*
* @return shared_ptr<Sequence> of the Sequence class itself
*/
Expand All @@ -107,6 +111,8 @@ class Sequence : public std::enable_shared_from_this<Sequence>
/**
* Eval sends all the recorded and stored operations in the vector of
* operations into the gpu as a submit job with a barrier.
*
* It also clears operations recorded to sequence.
*
* @param tensors Vector of tensors to use for the operation
* @param TArgs Template parameters that are used to initialise operation
Expand All @@ -123,6 +129,8 @@ class Sequence : public std::enable_shared_from_this<Sequence>
/**
* Eval sends all the recorded and stored operations in the vector of
* operations into the gpu as a submit job with a barrier.
*
* It also clears operations recorded to sequence.
*
* @param algorithm Algorithm to use for the record often used for OpAlgo
* operations
Expand Down Expand Up @@ -196,6 +204,8 @@ class Sequence : public std::enable_shared_from_this<Sequence>
/**
* Eval Await waits for the fence to finish processing and then once it
* finishes, it runs the postEval of all operations.
*
* It also clears operations recorded to sequence.
*
* @param waitFor Number of milliseconds to wait before timing out.
* @return shared_ptr<Sequence> of the Sequence class itself
Expand Down
9 changes: 9 additions & 0 deletions src/include/kompute/Tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,15 @@ class Tensor
*/
bool isInit();

/**
* Tells if data of tensor is only located at device. Used for
* determing if mapping and copying of raw data is needed in rebuild.
* For example eStorage tensors are device only.
*
* @return Boolean stating wherer data in tensor is device only
*/
bool isDeviceOnlyTensor();

/**
* Retrieve the tensor type of the Tensor
*
Expand Down
33 changes: 33 additions & 0 deletions test/TestOpTensorCopy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,36 @@ TEST(TestOpTensorCopy, SingleTensorShouldFail)
EXPECT_THROW(mgr.sequence()->eval<kp::OpTensorCopy>({ tensorA }),
std::runtime_error);
}

TEST(TestOpTensorCopy, eStorageTensorCopy)
{
kp::Manager mgr;

const auto vec_in = std::vector<float>{ 0.0, 1.2, 3.2 };
const auto vec_out_with_wrong_values =
std::vector<float>(vec_in.size(), 0.0);

auto tensor_in = mgr.tensor(vec_in, kp::Tensor::TensorTypes::eDevice);
auto tensor_temp = mgr.tensor(nullptr,
vec_in.size(),
sizeof(decltype(vec_in.back())),
kp::Tensor::TensorDataTypes::eFloat,
kp::Tensor::TensorTypes::eStorage);
auto tensor_out = mgr.tensor(vec_out_with_wrong_values, kp::Tensor::TensorTypes::eDevice);

EXPECT_TRUE(tensor_in->isInit());

// For eStorage tensors isInit() will return false if provided nullptr as data pointer
EXPECT_TRUE(!tensor_temp->isInit());

EXPECT_TRUE(tensor_out->isInit());

mgr.sequence()
->eval<kp::OpTensorSyncDevice>({tensor_in})
->eval<kp::OpTensorCopy>({tensor_in, tensor_temp})
->eval<kp::OpTensorCopy>({tensor_temp, tensor_out})
->eval<kp::OpTensorSyncLocal>({tensor_out});

EXPECT_EQ(tensor_in->vector(), tensor_out->vector());

}
6 changes: 4 additions & 2 deletions test/TestSequence.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,10 @@ TEST(TestSequence, RerecordSequence)

algo->rebuild({ tensorOut, tensorA, tensorB }, spirv);

// Refresh and trigger a rerecord
sq->rerecord();
// Rerecord (Cannot call rerecord because operations from sequence gets cleared in eval())
sq->record<kp::OpAlgoDispatch>(algo)->record<kp::OpTensorSyncLocal>(
{ tensorA, tensorB, tensorOut });

sq->eval();

EXPECT_EQ(tensorB->vector(), std::vector<float>({ 2, 8, 18 }));
Expand Down