Skip to content

Commit

Permalink
[Hexagon] [runtime] VTCM Allocator (#12947)
Browse files Browse the repository at this point in the history
Adds a VTCM Memory Pool class, which allocates the largest contiguous buffer possible within 1 page upon construction.

Allocations and free space are maintained in two lists.  Buffers that align on 2k size boundaries will choose the smallest open buffer which will satisfy the request.  Non-aligned buffers will be allocated from the end of the free space.

HexagonBuffer will use this pool to service VTCM scope requests, replacing the individual calls to allocated the memory on separate pages.

The pool is created and destroyed in the device API Acquire/ReleaseResources.

Adds unit tests to exercise edge cases.
  • Loading branch information
janetsc authored Oct 3, 2022
1 parent fa17da2 commit f121e5e
Show file tree
Hide file tree
Showing 8 changed files with 420 additions and 35 deletions.
41 changes: 16 additions & 25 deletions src/runtime/hexagon/hexagon_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
#include <string>
#include <utility>

#include "HAP_compute_res.h"
#include "hexagon_common.h"
#include "hexagon_device_api.h"

namespace tvm {
namespace runtime {
Expand Down Expand Up @@ -57,35 +57,26 @@ struct DDRAllocation : public Allocation {

struct VTCMAllocation : public Allocation {
VTCMAllocation(size_t nbytes, size_t alignment) : Allocation(nbytes, alignment) {
compute_res_attr_t res_info;
HEXAGON_SAFE_CALL(HAP_compute_res_attr_init(&res_info));

// allocate nbytes of vtcm on a single page
HEXAGON_SAFE_CALL(HAP_compute_res_attr_set_vtcm_param(&res_info, /*vtcm_size = */ nbytes,
/*b_single_page = */ 0));

// TODO(HWE): Investigate why a non-zero timeout results in
// hanging, both in the simulator and on hardware.
context_id_ = HAP_compute_res_acquire(&res_info, /*timeout = */ 0);

if (context_id_) {
data_ = HAP_compute_res_attr_get_vtcm_ptr(&res_info);
if (!data_) {
LOG(ERROR) << "ERROR: HAP_compute_res_acquire returned nullptr when allocating VTCM.";
HEXAGON_SAFE_CALL(HAP_compute_res_release(context_id_));
return;
}
} else {
LOG(FATAL) << "FATAL: HAP_compute_res_acquire failed to acquire requested VTCM resource.";
throw std::runtime_error(
"HAP_compute_res_acquire failed to acquire requested VTCM resource.");
// TODO(HWE): Handle alignments greater than 2k
CHECK(alignment <= 0x800) << "VTCMAllocation called for invalid alignment";
if ((nbytes & 0x7FF) && ((alignment & 0x7FF) == 0)) {
// Caller has requested 2k alignment, but the size is not a multiple of 2k
// Adjust size to be a multiple of 2k so that we will allocate from the front of the pool
nbytes = nbytes >> 11;
nbytes = nbytes << 11;
nbytes += 0x800;
DLOG(INFO) << "VTCMAllocation size adjusted for alignment " << allocation_nbytes_ << " to "
<< nbytes;
allocation_nbytes_ = nbytes;
}
data_ = HexagonDeviceAPI::Global()->VtcmPool()->Allocate(allocation_nbytes_);
DLOG(INFO) << "VTCMAllocation " << data_ << " " << allocation_nbytes_ << " " << alignment;
}
~VTCMAllocation() {
HEXAGON_SAFE_CALL(HAP_compute_res_release(context_id_));
DLOG(INFO) << "~VTCMAllocation " << data_ << " " << allocation_nbytes_;
HexagonDeviceAPI::Global()->VtcmPool()->Free(data_, allocation_nbytes_);
data_ = nullptr;
}
unsigned int context_id_{0};
};

template <HexagonBuffer::StorageScope S>
Expand Down
1 change: 0 additions & 1 deletion src/runtime/hexagon/hexagon_device_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@

#include "../workspace_pool.h"
#include "hexagon_common.h"
#include "hexagon_user_dma.h"

namespace tvm {
namespace runtime {
Expand Down
23 changes: 16 additions & 7 deletions src/runtime/hexagon/hexagon_device_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "hexagon_buffer_manager.h"
#include "hexagon_thread_manager.h"
#include "hexagon_user_dma.h"
#include "hexagon_vtcm_pool.h"

namespace tvm {
namespace runtime {
Expand All @@ -54,37 +55,37 @@ class HexagonDeviceAPI final : public DeviceAPI {

//! \brief Ensures resource managers are in a good state for the runtime
void AcquireResources() {
CHECK_EQ(runtime_vtcm, nullptr);
runtime_vtcm = std::make_unique<HexagonVtcmPool>();

CHECK_EQ(runtime_hexbuffs, nullptr);
runtime_hexbuffs = std::make_unique<HexagonBufferManager>();
DLOG(INFO) << "runtime_hexbuffs created";
mgr = runtime_hexbuffs.get();

CHECK_EQ(runtime_threads, nullptr);
runtime_threads = std::make_unique<HexagonThreadManager>(threads, stack_size, pipe_size);
DLOG(INFO) << "runtime_threads created";

CHECK_EQ(runtime_dma, nullptr);
runtime_dma = std::make_unique<HexagonUserDMA>();
DLOG(INFO) << "runtime_dma created";
}

//! \brief Ensures all runtime resources are freed
void ReleaseResources() {
CHECK(runtime_dma) << "runtime_dma was not created in AcquireResources";
runtime_dma.reset();
DLOG(INFO) << "runtime_dma reset";

CHECK(runtime_threads) << "runtime_threads was not created in AcquireResources";
runtime_threads.reset();
DLOG(INFO) << "runtime_threads reset";

CHECK(runtime_hexbuffs) << "runtime_hexbuffs was not created in AcquireResources";
if (runtime_hexbuffs && !runtime_hexbuffs->empty()) {
DLOG(INFO) << "runtime_hexbuffs was not empty in ReleaseResources";
LOG(INFO) << "runtime_hexbuffs was not empty in ReleaseResources";
}
mgr = &hexbuffs;
DLOG(INFO) << "runtime_hexbuffs reset";
runtime_hexbuffs.reset();

CHECK(runtime_vtcm) << "runtime_vtcm was not created in AcquireResources";
runtime_vtcm.reset();
}

/*! \brief Currently unimplemented interface to specify the active
Expand Down Expand Up @@ -168,6 +169,11 @@ class HexagonDeviceAPI final : public DeviceAPI {
return runtime_dma.get();
}

HexagonVtcmPool* VtcmPool() {
CHECK(runtime_vtcm) << "runtime_vtcm has not been created";
return runtime_vtcm.get();
}

protected:
//! Standard Device API interface to copy data from one storage to another.
void CopyDataFromTo(const void* from, size_t from_offset, void* to, size_t to_offset, size_t size,
Expand Down Expand Up @@ -202,6 +208,9 @@ class HexagonDeviceAPI final : public DeviceAPI {

//! \brief User DMA manager
std::unique_ptr<HexagonUserDMA> runtime_dma;

//! \brief VTCM memory manager
std::unique_ptr<HexagonVtcmPool> runtime_vtcm;
};
} // namespace hexagon
} // namespace runtime
Expand Down
152 changes: 152 additions & 0 deletions src/runtime/hexagon/hexagon_vtcm_pool.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "hexagon_vtcm_pool.h"

#include "HAP_compute_res.h"
#include "hexagon_common.h"

namespace tvm {
namespace runtime {
namespace hexagon {

HexagonVtcmPool::HexagonVtcmPool() {
compute_res_attr_t res_info;
HEXAGON_SAFE_CALL(HAP_compute_res_attr_init(&res_info));

// TODO(HWE): get the max and min size programmatically
const unsigned int max_size = 4 * 1024 * 1024;
const unsigned int min_size = 1024 * 1024;

// allocate nbytes of vtcm on a single page
HEXAGON_SAFE_CALL(HAP_compute_res_attr_set_vtcm_param_v2(&res_info,
/*vtcm_size = */ max_size,
/*min_page_size = */ 1,
/*min_vtcm_size = */ min_size));

// TODO(HWE): Investigate why a non-zero timeout results in
// hanging, both in the simulator and on hardware.
context_id_ = HAP_compute_res_acquire(&res_info, /*timeout = */ 0);
CHECK(context_id_) << "HAP_compute_res_acquire failed to acquire requested VTCM resource.";
HEXAGON_SAFE_CALL(HAP_compute_res_attr_get_vtcm_ptr_v2(&res_info, &vtcm_data_, &vtcm_size_));
CHECK(vtcm_data_ != nullptr) << "HAP_compute_res_acquire returned nullptr when allocating VTCM.";
CHECK(vtcm_size_ >= min_size)
<< "HAP_compute_res_acquire failed to allocate minimum amount of VTCM";
free_.emplace_back(std::pair<char*, size_t>(static_cast<char*>(vtcm_data_), vtcm_size_));
// DebugDump();
}

HexagonVtcmPool::~HexagonVtcmPool() { HEXAGON_SAFE_CALL(HAP_compute_res_release(context_id_)); }

void* HexagonVtcmPool::Allocate(size_t nbytes) {
std::lock_guard<std::mutex> lock(mutex_);

CHECK(!free_.empty()) << "No free VTCM";

// If this is not aligned on a 2k block, allocate from the end to avoid fragmentation
if (nbytes & size_t(0x7FF)) {
DLOG(INFO) << "VTCM nbytes requested: " << nbytes << " allocate from the end";
auto last_free_entry = free_.rbegin();
CHECK(last_free_entry->second >= nbytes)
<< "Not enough contiguous VTCM space at the end to allocate";
char* ptr = last_free_entry->first + (last_free_entry->second - nbytes);
allocations_.emplace_back(std::pair<char*, size_t>(ptr, nbytes));
last_free_entry->second -= nbytes;
// DebugDump();
return ptr;
}

auto entry_to_allocate = free_.begin();
for (auto it = free_.begin(); it != free_.end(); it++) {
if ((it->second < entry_to_allocate->second) && (it->second >= nbytes)) {
entry_to_allocate = it;
if (entry_to_allocate->second == nbytes) {
break;
}
}
}
CHECK(entry_to_allocate->second >= nbytes) << "Not enough contiguous VTCM space to allocate";
char* ptr = entry_to_allocate->first;
allocations_.emplace(allocations_.end(), std::pair<char*, size_t>(ptr, nbytes));

if (entry_to_allocate->second == nbytes) {
free_.erase(entry_to_allocate);
} else {
entry_to_allocate->first = entry_to_allocate->first + nbytes;
entry_to_allocate->second = entry_to_allocate->second - nbytes;
}
// DebugDump();
return ptr;
}

void HexagonVtcmPool::Free(void* ptr, size_t nbytes) {
char* ptr_to_free = static_cast<char*>(ptr);
std::lock_guard<std::mutex> lock(mutex_);

auto it = std::find_if(allocations_.begin(), allocations_.end(),
[&](auto entry) { return entry.first == ptr_to_free; });
CHECK(it != allocations_.end()) << "Attempted to free a pointer that had not been allocated";
CHECK(it->second == nbytes) << "Attempted to free a different size than was allocated";
allocations_.erase(it);

it = std::lower_bound(free_.begin(), free_.end(), std::pair<char*, size_t>(ptr_to_free, nbytes),
[](auto p, auto q) { return p.first <= q.first; });
if (it == free_.end()) {
// Insert an entry at the end
it = free_.emplace(it, std::pair<char*, size_t>(ptr_to_free, nbytes));
} else {
CHECK(ptr_to_free != it->first) << "Attempting to free a pointer that was already free";
CHECK(ptr_to_free + nbytes <= it->first)
<< "free_ is in an inconsistent state, freed block overlaps with next";
if (ptr_to_free + nbytes == it->first) {
// Make this entry bigger
it->first = ptr_to_free;
it->second += nbytes;
} else {
// Insert an entry before this
it = free_.emplace(it, std::pair<char*, size_t>(ptr_to_free, nbytes));
}
}

// Check for overlap with the previous entry
if (it != free_.begin()) {
auto it_prev = it;
it_prev--;
CHECK(it_prev->first + it_prev->second <= ptr_to_free)
<< "free_ is in an inconsistent state, freed block overlaps with previous";
if (it_prev->first + it_prev->second == ptr_to_free) {
it_prev->second += it->second;
free_.erase(it);
}
}
// DebugDump();
}

void HexagonVtcmPool::DebugDump() {
LOG(INFO) << "VTCM list state";
for (auto entry : allocations_) {
LOG(INFO) << "VTCM alloc: " << static_cast<void*>(entry.first) << " " << entry.second;
}
for (auto entry : free_) {
LOG(INFO) << "VTCM free: " << static_cast<void*>(entry.first) << " " << entry.second;
}
}

} // namespace hexagon
} // namespace runtime
} // namespace tvm
100 changes: 100 additions & 0 deletions src/runtime/hexagon/hexagon_vtcm_pool.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#ifndef TVM_RUNTIME_HEXAGON_HEXAGON_VTCM_POOL_H_
#define TVM_RUNTIME_HEXAGON_HEXAGON_VTCM_POOL_H_

#include <tvm/runtime/c_runtime_api.h>
#include <tvm/runtime/device_api.h>
#include <tvm/runtime/logging.h>
#include <tvm/runtime/ndarray.h>
#include <tvm/runtime/packed_func.h>

#include <utility>
#include <vector>

namespace tvm {
namespace runtime {
namespace hexagon {

class HexagonVtcmPool {
public:
//! \brief Allocates all of VTCM memory, and manages allocations from the runtime
HexagonVtcmPool();

//! \brief Destruction deallocates the underlying VTCM allocation.
~HexagonVtcmPool();

//! \brief Prevent copy construction of HexagonVtcmPool.
HexagonVtcmPool(const HexagonVtcmPool&) = delete;

//! \brief Prevent copy assignment with HexagonVtcmPool.
HexagonVtcmPool& operator=(const HexagonVtcmPool&) = delete;

//! \brief Prevent move construction.
HexagonVtcmPool(HexagonVtcmPool&&) = delete;

//! \brief Prevent move assignment.
HexagonVtcmPool& operator=(HexagonVtcmPool&&) = delete;

/* \brief Allocate memory from the VTCM manager
*
* \param nbytes The number of bytes to allocate.
*/
void* Allocate(size_t nbytes);

/* \brief Copy data from a Hexagon Buffer an external buffer.
*
* \param ptr The pointer to the buffer to be freed.
*
* \param nbytes The number of bytes to be freed.
*/
void Free(void* ptr, size_t nbytes);

//! \brief Returns the total number of bytes in this pool
size_t TotalBytes() { return reinterpret_cast<size_t>(vtcm_size_); }

private:
//! \brief Context for HAP_compute_res_*
unsigned int vtcm_size_;

//! \brief Context for HAP_compute_res_*
void* vtcm_data_;

//! \brief Context for HAP_compute_res_*
unsigned int context_id_{0};

//! \brief List of allocations
std::vector<std::pair<char*, size_t>> allocations_;

//! \brief List of free segments
std::vector<std::pair<char*, size_t>> free_;

//! \brief Mutext to protect access to the lists
std::mutex mutex_;

//! \brief Debug only dump of the state of the lists
void DebugDump();
};

} // namespace hexagon
} // namespace runtime
} // namespace tvm

#endif // TVM_RUNTIME_HEXAGON_HEXAGON_VTCM_POOL_H_
Loading

0 comments on commit f121e5e

Please sign in to comment.