Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion apps/android_rpc/app/src/main/jni/tvm_runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@
#include "../src/runtime/opencl/opencl_device_api.cc"
#include "../src/runtime/opencl/opencl_module.cc"
#include "../src/runtime/opencl/opencl_wrapper/opencl_wrapper.cc"
#include "../src/runtime/opencl/texture_pool.cc"
#include "../src/runtime/source_utils.cc"
#endif

Expand Down
1 change: 1 addition & 0 deletions include/tvm/runtime/device_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ enum DeviceAttrKind : int {
kL2CacheSizeBytes = 13,
kTotalGlobalMemory = 14,
kAvailableGlobalMemory = 15,
kImagePitchAlignment = 16,
};

#ifdef TVM_KALLOC_ALIGNMENT
Expand Down
34 changes: 33 additions & 1 deletion include/tvm/runtime/memory/memory_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,26 @@ class Allocator {
* \return A sized allocation in the form of a buffer.
*/
TVM_DLL virtual Buffer Alloc(Device dev, ShapeTuple shape, DLDataType type_hint,
const std::string& mem_scope = "") = 0;
const std::string& mem_scope = "");

/*! \brief Create a view for the buffer given a shape, type and scope.
* \param buffer The existing buffer upon which we need to create a view.
* \param shape The shape of the view.
* \param type_hint A type hint to the view.
* \param mem_scope A memory scope of the view.
* \return A device pointer to the created view.
*/
TVM_DLL virtual void* CreateView(const Buffer& buffer, ShapeTuple shape, DLDataType type_hint,
const std::string& mem_scope = "global") {
return buffer.data;
}

/*! \brief Release the view .
* \param dev is the device where this view is created
* \param data The view pointer to be freed.
*/
TVM_DLL virtual void FreeView(Device dev, void* data) {}

/*! \brief Free a buffer allocated by the allocator.
* \param buffer The buffer to free.
*/
Expand Down Expand Up @@ -147,6 +166,13 @@ class StorageObj : public Object {
/*! \brief Allocate an NDArray from a given piece of storage. */
TVM_DLL NDArray AllocNDArray(int64_t offset, ShapeTuple shape, DLDataType dtype);

/*! \brief Allocate an NDArray with memory scope from a given piece of storage. */
TVM_DLL NDArray AllocNDArrayScoped(int64_t offset, ShapeTuple shape, DLDataType dtype,
String scope = "global");

/*! \brief The deleter for an NDArray when allocated from underlying storage. */
static void ScopedDeleter(Object* ptr);

/*! \brief The deleter for an NDArray when allocated from underlying storage. */
static void Deleter(Object* ptr);

Expand All @@ -170,6 +196,12 @@ class Storage : public ObjectRef {
};

} // namespace memory

using memory::Allocator;
using memory::AllocatorType;
using memory::MemoryManager;
using memory::StorageObj;

} // namespace runtime
} // namespace tvm

Expand Down
33 changes: 20 additions & 13 deletions src/relay/backend/graph_plan_memory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,16 @@ class StorageAllocator : public StorageAllocaBaseVisitor {
VLOG_CONTEXT << "StorageAllocator";
VLOG(1) << "planning:" << std::endl << PrettyPrint(func);
prototype_ = StorageAllocaInit(&arena_).GetInitTokenMap(func);
// Backup the virtual devices as token reuse might lost the original memory scope
std::unordered_map<const ExprNode*, std::vector<VirtualDevice>> virtual_device_map_;
for (const auto& kv : prototype_) {
std::vector<VirtualDevice> virtual_devices;
virtual_devices.reserve(kv.second.size());
for (StorageToken* tok : kv.second) {
virtual_devices.push_back(tok->virtual_device);
}
virtual_device_map_.insert({kv.first, virtual_devices});
}
this->Run(func);

// The value of smap contains two integer arrays where the first array
Expand All @@ -252,9 +262,13 @@ class StorageAllocator : public StorageAllocaBaseVisitor {
}
num_nodes++;
storage_ids.push_back(tok->storage_id);
virtual_devices.push_back(tok->virtual_device);
sid_sizes_byte.push_back(allocator_.GetMemorySize(tok));
}
ICHECK(kv.second.size() == virtual_device_map_[kv.first].size())
<< "Mismatch of tokens and virtual devices";
for (auto vdev : virtual_device_map_[kv.first]) {
virtual_devices.push_back(vdev);
}
auto storage_info = backend::StorageInfo(std::move(storage_ids), std::move(virtual_devices),
std::move(sid_sizes_byte));
smap.Set(GetRef<Expr>(kv.first), storage_info);
Expand Down Expand Up @@ -356,34 +370,27 @@ class StorageAllocator : public StorageAllocaBaseVisitor {

class TokenAllocator {
public:
StorageToken* Alloc(StorageToken* proto) {
return Is2DStorage(proto) ? token_2d_.Alloc(proto, storage_ids_++)
: token_1d_.Alloc(proto, storage_ids_++);
}
StorageToken* Alloc(StorageToken* proto) { return token_mixed_.Alloc(proto, storage_ids_++); }
StorageToken* Request(StorageToken* proto) {
StorageToken* token =
Is2DStorage(proto) ? token_2d_.Request(proto) : token_1d_.Request(proto);
StorageToken* token = token_mixed_.Request(proto);
return token ? token : this->Alloc(proto);
}
void CheckForRelease(StorageToken* tok) {
return Is2DStorage(tok) ? token_2d_.CheckForRelease(tok) : token_1d_.CheckForRelease(tok);
}
void CheckForRelease(StorageToken* tok) { return token_mixed_.CheckForRelease(tok); }

size_t GetMemorySize(StorageToken* tok) {
// TODO(amalyshe): figure out who requries sizes and for what
// size in case of texture is not enough - we can return any value if it
// assumed to be used for memory allocatoion or we can return real size
// if it is just for information
return Is2DStorage(tok) ? 0 : token_1d_.GetMemorySize(tok);
return token_mixed_.GetMemorySize(tok);
}
static bool Is2DStorage(StorageToken* tok) {
return relay::Is2DStorage(tok->virtual_device->memory_scope);
}

private:
int64_t storage_ids_{0};
TokenAllocator1D token_1d_;
TokenAllocator2D token_2d_;
TokenAllocatorMixed token_mixed_;
};

private:
Expand Down
184 changes: 69 additions & 115 deletions src/relay/backend/token_allocator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,45 @@

namespace tvm {
namespace relay {
constexpr auto Is2DStorage = runtime::IsTextureStorage;

size_t TokenAllocator1D::GetMemorySize(StorageToken* prototype) {
/*
* Mixed mode memory allocator
*/
size_t TokenAllocatorMixed::GetMemorySize(StorageToken* prototype) {
TensorType ttype = prototype->ttype;
ICHECK(ttype.defined());
size_t size = 1;
for (IndexExpr dim : ttype->shape) {
const int64_t* pval = tir::as_const_int(dim);
ICHECK(pval != nullptr) << "Cannot allocate memory symbolic tensor shape " << ttype->shape;
ICHECK_GE(*pval, 0) << "Cannot allocate memory for tensor with negative shape" << *pval;
size *= static_cast<size_t>(pval[0]);
if (relay::Is2DStorage(prototype->virtual_device->memory_scope)) {
size = GetSize2D(prototype);
} else {
for (IndexExpr dim : ttype->shape) {
const int64_t* pval = tir::as_const_int(dim);
ICHECK(pval != nullptr) << "Cannot allocate memory symbolic tensor shape " << ttype->shape;
ICHECK_GE(*pval, 0) << "Cannot allocate memory for tensor with negative shape" << *pval;
size *= static_cast<size_t>(pval[0]);
}
size *= DivRoundUp(ttype->dtype.bits() * ttype->dtype.lanes(), 8);
}
size *= DivRoundUp(ttype->dtype.bits() * ttype->dtype.lanes(), 8);
return size;
}

StorageToken* TokenAllocator1D::Request(StorageToken* prototype) {
String GetDeviceCompatibleToken(StorageToken* tok) {
Target null_tgt{nullptr};
if (null_tgt == tok->virtual_device->target) {
return tok->virtual_device->memory_scope;
}
std::string dev_kind = tok->virtual_device->target->kind->name;
auto* device_scope_handler = tvm::runtime::Registry::Get("DeviceScopeCompatibility." + dev_kind);
if (device_scope_handler) {
String dev_scope =
(*device_scope_handler)(tok->virtual_device->target, tok->virtual_device->memory_scope);
return dev_scope;
}
return tok->virtual_device->memory_scope;
}

StorageToken* TokenAllocatorMixed::Request(StorageToken* prototype) {
// calculate the size;
size_t size = GetMemorySize(prototype);
// search memory block in [size / match_range_, size * match_range_)
Expand All @@ -59,142 +82,73 @@ StorageToken* TokenAllocator1D::Request(StorageToken* prototype) {
// search for memory blocks larger than requested
for (auto it = mid; it != end; ++it) {
StorageToken* tok = it->second;
if (!tok->is_compatible(*prototype)) continue;
ICHECK_EQ(tok->ref_counter, 0);
// Use exect matching strategy
tok->max_bytes = std::max(size, tok->max_bytes);
tok->ref_counter = prototype->ref_counter;
// find a exact match, erase from map and return
free_.erase(it);
return tok;
bool dev_compatible = (GetDeviceCompatibleToken(tok) == GetDeviceCompatibleToken(prototype));
if (tok->is_compatible(*prototype) || (dev_compatible)) {
ICHECK_EQ(tok->ref_counter, 0);
// Use exect matching strategy
if (size > tok->max_bytes) {
tok->max_bytes = size;
tok->ttype = prototype->ttype;
}
tok->ref_counter = prototype->ref_counter;
// find a exact match, erase from map and return
free_.erase(it);
return tok;
}
}
// then search for memory blocks smaller than requested space
for (auto it = mid; it != begin;) {
--it;
StorageToken* tok = it->second;
if (!tok->is_compatible(*prototype)) continue;
ICHECK_EQ(tok->ref_counter, 0);
// Use exect matching strategy
tok->max_bytes = std::max(size, tok->max_bytes);
tok->ref_counter = prototype->ref_counter;
// erase from map and return
free_.erase(it);
return tok;
bool dev_compatible = (GetDeviceCompatibleToken(tok) == GetDeviceCompatibleToken(prototype));
if (tok->is_compatible(*prototype) || (dev_compatible)) {
ICHECK_EQ(tok->ref_counter, 0);
// Use exect matching strategy
if (size > tok->max_bytes) {
tok->max_bytes = size;
tok->ttype = prototype->ttype;
}
tok->ref_counter = prototype->ref_counter;
// erase from map and return
free_.erase(it);
return tok;
}
}
return nullptr;
}

StorageToken* TokenAllocator1D::Alloc(StorageToken* prototype, int64_t storage_id) {
StorageToken* TokenAllocatorMixed::Alloc(StorageToken* prototype, int64_t storage_id) {
size_t size = GetMemorySize(prototype);
prototype->max_bytes = size;
prototype->storage_id = storage_id;
data_.push_back(prototype);
return prototype;
}

void TokenAllocator1D::CheckForRelease(StorageToken* tok) {
void TokenAllocatorMixed::CheckForRelease(StorageToken* tok) {
ICHECK_GE(tok->storage_id, 0);
ICHECK_GE(tok->ref_counter, 0);
if (tok->ref_counter == 0) {
free_.insert({tok->max_bytes, tok});
}
}

StorageToken* TokenAllocator2D::Request(StorageToken* prototype) {
auto shape = GetSize2D(prototype);
const int64_t max_ratio = 5;
int64_t min_added_size_x = std::numeric_limits<int64_t>::max();
int64_t min_added_size_y = std::numeric_limits<int64_t>::max();
int64_t min_wasted_size_x = std::numeric_limits<int64_t>::max();
int64_t min_wasted_size_y = std::numeric_limits<int64_t>::max();
int64_t best_storage_id = -1;
MemBlock new_mem;
for (int64_t free_id : free_list_) {
MemBlock& cached = blocks_[free_id];
// Can only reuse texture 2d blocks of the same type
if (cached.token_->ttype->dtype != prototype->ttype->dtype) {
continue;
}
// Can only reuse texture 2d blocks of the same scope
// Because reusing textures with different memory scope may lead to
// accuracy issues, because the data will be packed in a different way for
// different memory scopes.
if (cached.token_->virtual_device->memory_scope != prototype->virtual_device->memory_scope) {
continue;
}
// avoid reusing too small and too big textures
if (shape.width / cached.x_ > max_ratio || cached.x_ / shape.width > max_ratio ||
shape.height / cached.y_ > max_ratio || cached.y_ / shape.height > max_ratio) {
continue;
}
int64_t new_width = std::max(cached.x_, shape.width);
int64_t new_height = std::max(cached.y_, shape.height);
int64_t added_size_x = new_width - cached.x_;
int64_t added_size_y = new_height - cached.y_;
int64_t wasted_size_x = new_width - shape.width;
int64_t wasted_size_y = new_height - shape.height;
// Prioritize minimization of added size first, then minimize
// wasted size among blocks which would not require expansion
if ((min_added_size_x > 0 && added_size_x < min_added_size_x) ||
(min_added_size_y > 0 && added_size_y < min_added_size_y) ||
(min_added_size_x == added_size_x && wasted_size_x < min_wasted_size_x) ||
(min_added_size_y == added_size_y && wasted_size_y < min_wasted_size_y)) {
min_added_size_x = added_size_x;
min_added_size_y = added_size_y;
min_wasted_size_x = wasted_size_x;
min_wasted_size_y = wasted_size_y;
best_storage_id = free_id;
new_mem.x_ = new_width;
new_mem.y_ = new_height;
}
}

if (min_added_size_x == 0 && min_added_size_y == 0) {
// use existing block
free_list_.erase(best_storage_id);
blocks_[best_storage_id].token_->ref_counter += prototype->ref_counter;
return blocks_[best_storage_id].token_;
} else if (min_added_size_x <= shape.width || min_added_size_y <= shape.height) {
// Reset the reference counter of the now live token
free_list_.erase(best_storage_id);
new_mem.token_ = prototype;
new_mem.token_->ref_counter += 1;
new_mem.token_->storage_id = best_storage_id;
blocks_[best_storage_id] = new_mem;
return new_mem.token_;
}
return nullptr;
}

StorageToken* TokenAllocator2D::Alloc(StorageToken* prototype, int64_t storage_id) {
auto shape = GetSize2D(prototype);
MemBlock block;
block.x_ = shape.width;
block.y_ = shape.height;
prototype->storage_id = storage_id;
block.token_ = prototype;
blocks_[prototype->storage_id] = block;
return prototype;
}

void TokenAllocator2D::CheckForRelease(StorageToken* tok) {
ICHECK_GE(tok->storage_id, 0);
ICHECK_GE(tok->ref_counter, 0);
if (tok->ref_counter == 0) {
free_list_.insert(tok->storage_id);
}
}

runtime::Texture2DShape<int64_t> TokenAllocator2D::GetSize2D(StorageToken* prototype) {
size_t TokenAllocatorMixed::GetSize2D(StorageToken* prototype) {
TensorType ttype = prototype->ttype;
ICHECK(ttype.defined());
size_t axis = runtime::DefaultTextureLayoutSeparator(ttype->shape.size(),
prototype->virtual_device->memory_scope);
struct Shape {
const Array<PrimExpr>& shape;
int64_t operator[](size_t i) const { return *tir::as_const_int(shape[i]); }
int size() { return this->shape.size(); }
};
return runtime::ApplyTexture2DFlattening<int64_t>(Shape{ttype->shape}, ttype->shape.size(), axis);
auto shape = Shape{ttype->shape};
int image_row_align =
prototype->virtual_device->target->GetAttr<Integer>("image_base_address_alignment")
.value_or(Integer(64))
->value;
return runtime::GetTextureMemorySize<Shape>(shape, ttype->dtype.bits(), ttype->dtype.lanes(),
prototype->virtual_device->memory_scope,
image_row_align);
}

} // namespace relay
Expand Down
Loading
Loading