Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Hexagon] [runtime] VTCM Allocator #12947

Merged
merged 16 commits into from
Oct 3, 2022
39 changes: 14 additions & 25 deletions src/runtime/hexagon/hexagon_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
#include <string>
#include <utility>

#include "HAP_compute_res.h"
#include "hexagon_common.h"
#include "hexagon_device_api.h"

namespace tvm {
namespace runtime {
Expand Down Expand Up @@ -57,35 +57,24 @@ struct DDRAllocation : public Allocation {

struct VTCMAllocation : public Allocation {
VTCMAllocation(size_t nbytes, size_t alignment) : Allocation(nbytes, alignment) {
compute_res_attr_t res_info;
HEXAGON_SAFE_CALL(HAP_compute_res_attr_init(&res_info));

// allocate nbytes of vtcm on a single page
HEXAGON_SAFE_CALL(HAP_compute_res_attr_set_vtcm_param(&res_info, /*vtcm_size = */ nbytes,
/*b_single_page = */ 0));

// TODO(HWE): Investigate why a non-zero timeout results in
// hanging, both in the simulator and on hardware.
context_id_ = HAP_compute_res_acquire(&res_info, /*timeout = */ 0);

if (context_id_) {
data_ = HAP_compute_res_attr_get_vtcm_ptr(&res_info);
if (!data_) {
LOG(ERROR) << "ERROR: HAP_compute_res_acquire returned nullptr when allocating VTCM.";
HEXAGON_SAFE_CALL(HAP_compute_res_release(context_id_));
return;
}
} else {
LOG(FATAL) << "FATAL: HAP_compute_res_acquire failed to acquire requested VTCM resource.";
throw std::runtime_error(
"HAP_compute_res_acquire failed to acquire requested VTCM resource.");
CHECK(allocation_nbytes_ == nbytes);
CHECK(alignment <= 0x800) << "VTCMAllocation called for invalid alignment";
if ((nbytes & 0x7FF) && ((alignment & 0x7FF) == 0)) {
janetsc marked this conversation as resolved.
Show resolved Hide resolved
nbytes = nbytes >> 11;
nbytes = nbytes << 11;
nbytes += 0x800;
DLOG(INFO) << "VTCMAllocation size adjusted for alignment " << allocation_nbytes_ << " to "
<< nbytes;
allocation_nbytes_ = nbytes;
}
data_ = HexagonDeviceAPI::Global()->VtcmPool()->Allocate(allocation_nbytes_);
DLOG(INFO) << "VTCMAllocation " << data_ << " " << allocation_nbytes_ << " " << alignment;
}
~VTCMAllocation() {
HEXAGON_SAFE_CALL(HAP_compute_res_release(context_id_));
DLOG(INFO) << "~VTCMAllocation " << data_ << " " << allocation_nbytes_;
HexagonDeviceAPI::Global()->VtcmPool()->Free(data_, allocation_nbytes_);
data_ = nullptr;
}
unsigned int context_id_{0};
};

template <HexagonBuffer::StorageScope S>
Expand Down
1 change: 0 additions & 1 deletion src/runtime/hexagon/hexagon_device_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@

#include "../workspace_pool.h"
#include "hexagon_common.h"
#include "hexagon_user_dma.h"

namespace tvm {
namespace runtime {
Expand Down
23 changes: 16 additions & 7 deletions src/runtime/hexagon/hexagon_device_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "hexagon_buffer_manager.h"
#include "hexagon_thread_manager.h"
#include "hexagon_user_dma.h"
#include "hexagon_vtcm_pool.h"

namespace tvm {
namespace runtime {
Expand All @@ -54,37 +55,37 @@ class HexagonDeviceAPI final : public DeviceAPI {

//! \brief Ensures resource managers are in a good state for the runtime
void AcquireResources() {
CHECK_EQ(runtime_vtcm, nullptr);
runtime_vtcm = std::make_unique<HexagonVtcmPool>();

CHECK_EQ(runtime_hexbuffs, nullptr);
runtime_hexbuffs = std::make_unique<HexagonBufferManager>();
DLOG(INFO) << "runtime_hexbuffs created";
mgr = runtime_hexbuffs.get();

CHECK_EQ(runtime_threads, nullptr);
runtime_threads = std::make_unique<HexagonThreadManager>(threads, stack_size, pipe_size);
DLOG(INFO) << "runtime_threads created";

CHECK_EQ(runtime_dma, nullptr);
runtime_dma = std::make_unique<HexagonUserDMA>();
DLOG(INFO) << "runtime_dma created";
}

//! \brief Ensures all runtime resources are freed
void ReleaseResources() {
CHECK(runtime_dma) << "runtime_dma was not created in AcquireResources";
runtime_dma.reset();
DLOG(INFO) << "runtime_dma reset";

CHECK(runtime_threads) << "runtime_threads was not created in AcquireResources";
runtime_threads.reset();
DLOG(INFO) << "runtime_threads reset";

CHECK(runtime_hexbuffs) << "runtime_hexbuffs was not created in AcquireResources";
if (runtime_hexbuffs && !runtime_hexbuffs->empty()) {
DLOG(INFO) << "runtime_hexbuffs was not empty in ReleaseResources";
LOG(INFO) << "runtime_hexbuffs was not empty in ReleaseResources";
}
mgr = &hexbuffs;
DLOG(INFO) << "runtime_hexbuffs reset";
runtime_hexbuffs.reset();

CHECK(runtime_vtcm) << "runtime_vtcm was not created in AcquireResources";
runtime_vtcm.reset();
}

/*! \brief Currently unimplemented interface to specify the active
Expand Down Expand Up @@ -168,6 +169,11 @@ class HexagonDeviceAPI final : public DeviceAPI {
return runtime_dma.get();
}

HexagonVtcmPool* VtcmPool() {
CHECK(runtime_vtcm) << "runtime_vtcm has not been created";
return runtime_vtcm.get();
}

protected:
//! Standard Device API interface to copy data from one storage to another.
void CopyDataFromTo(const void* from, size_t from_offset, void* to, size_t to_offset, size_t size,
Expand Down Expand Up @@ -202,6 +208,9 @@ class HexagonDeviceAPI final : public DeviceAPI {

//! \brief User DMA manager
std::unique_ptr<HexagonUserDMA> runtime_dma;

//! \brief VTCM memory manager
std::unique_ptr<HexagonVtcmPool> runtime_vtcm;
};
} // namespace hexagon
} // namespace runtime
Expand Down
160 changes: 160 additions & 0 deletions src/runtime/hexagon/hexagon_vtcm_pool.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "hexagon_vtcm_pool.h"

#include "HAP_compute_res.h"
#include "hexagon_common.h"

namespace tvm {
namespace runtime {
namespace hexagon {

HexagonVtcmPool::HexagonVtcmPool() {
compute_res_attr_t res_info;
HEXAGON_SAFE_CALL(HAP_compute_res_attr_init(&res_info));

// TODO(HWE): get the max and min size programmatically
const unsigned int max_size = 4 * 1024 * 1024;
const unsigned int min_size = 1024 * 1024;

// allocate nbytes of vtcm on a single page
HEXAGON_SAFE_CALL(HAP_compute_res_attr_set_vtcm_param_v2(&res_info,
/*vtcm_size = */ max_size,
/*min_page_size = */ 1,
/*min_vtcm_size = */ min_size));

// TODO(HWE): Investigate why a non-zero timeout results in
// hanging, both in the simulator and on hardware.
context_id_ = HAP_compute_res_acquire(&res_info, /*timeout = */ 0);
CHECK(context_id_) << "HAP_compute_res_acquire failed to acquire requested VTCM resource.";
HEXAGON_SAFE_CALL(HAP_compute_res_attr_get_vtcm_ptr_v2(&res_info, &vtcm_data_, &vtcm_size_));
CHECK(vtcm_data_ != nullptr) << "HAP_compute_res_acquire returned nullptr when allocating VTCM.";
CHECK(vtcm_size_ >= min_size)
<< "HAP_compute_res_acquire failed to allocate minimum amount of VTCM";
free_.emplace_back(std::pair<char*, size_t>(static_cast<char*>(vtcm_data_), vtcm_size_));
// DebugDump();
}

HexagonVtcmPool::~HexagonVtcmPool() { HEXAGON_SAFE_CALL(HAP_compute_res_release(context_id_)); }

void* HexagonVtcmPool::Allocate(size_t nbytes) {
std::lock_guard<std::mutex> lock(mutex_);

CHECK(!free_.empty()) << "No free VTCM";

// If this is not aligned on a 2k block, allocate from the end to avoid fragmentation
if (nbytes & size_t(0x7FF)) {
DLOG(INFO) << "VTCM nbytes requested: " << nbytes << " allocate from the end";
auto last_free_entry = free_.rbegin();
CHECK(last_free_entry->second >= nbytes)
<< "Not enough contiguous VTCM space at the end to allocate";
char* ptr = last_free_entry->first + (last_free_entry->second - nbytes);
allocations_.emplace_back(std::pair<char*, size_t>(ptr, nbytes));
janetsc marked this conversation as resolved.
Show resolved Hide resolved
last_free_entry->second -= nbytes;
// DebugDump();
return ptr;
}

std::pair<char*, size_t>& entry_to_allocate = free_.front();
for (auto entry : free_) {
if ((entry.second < entry_to_allocate.second) && (entry.second >= nbytes)) {
entry_to_allocate = entry;
janetsc marked this conversation as resolved.
Show resolved Hide resolved
if (entry_to_allocate.second == nbytes) {
break;
}
}
}
CHECK(entry_to_allocate.second >= nbytes) << "Not enough contiguous VTCM space to allocate";
char* ptr = entry_to_allocate.first;
allocations_.emplace(allocations_.end(), std::pair<char*, size_t>(ptr, nbytes));

for (auto it = free_.begin(); it != free_.end(); it++) {
janetsc marked this conversation as resolved.
Show resolved Hide resolved
if (ptr == it->first) {
if (it->second == nbytes) {
free_.erase(it);
} else {
it->first = it->first + nbytes;
it->second = it->second - nbytes;
}
break;
}
}
// DebugDump();
return ptr;
}

void HexagonVtcmPool::Free(void* ptr, size_t nbytes) {
char* ptr_to_free = static_cast<char*>(ptr);
std::lock_guard<std::mutex> lock(mutex_);

auto it = std::find_if(allocations_.begin(), allocations_.end(),
[&](auto entry) { return entry.first == ptr_to_free; });
CHECK(it != allocations_.end()) << "Attempted to free a pointer that had not been allocated";
CHECK(it->second == nbytes) << "Attempted to free a different size than was allocated";
allocations_.erase(it);

for (it = free_.begin(); it != free_.end(); it++) {
CHECK(ptr_to_free != it->first) << "Attempting to free a pointer that was already free";
if (ptr_to_free < it->first) {
CHECK(ptr_to_free + nbytes <= it->first)
<< "free_ is in an inconsistent state, freed block overlaps with next";
if (ptr_to_free + nbytes == it->first) {
// Make this entry bigger
it->first = ptr_to_free;
it->second += nbytes;
} else {
// Insert an entry before this
it = free_.emplace(it, std::pair<char*, size_t>(ptr_to_free, nbytes));
}
break;
}
}

if (it == free_.end()) {
// Insert an entry at the end
it = free_.emplace(it, std::pair<char*, size_t>(ptr_to_free, nbytes));
}

// Check for overlap with the previous entry
if (it != free_.begin()) {
auto it_prev = it;
it_prev--;
CHECK(it_prev->first + it_prev->second <= ptr_to_free)
<< "free_ is in an inconsistent state, freed block overlaps with previous";
if (it_prev->first + it_prev->second == ptr_to_free) {
it_prev->second += it->second;
free_.erase(it);
}
}
// DebugDump();
}

void HexagonVtcmPool::DebugDump() {
LOG(INFO) << "VTCM list state";
for (auto entry : allocations_) {
LOG(INFO) << "VTCM alloc: " << static_cast<void*>(entry.first) << " " << entry.second;
}
for (auto entry : free_) {
LOG(INFO) << "VTCM free: " << static_cast<void*>(entry.first) << " " << entry.second;
}
}

} // namespace hexagon
} // namespace runtime
} // namespace tvm
Loading