From baf0c6ab4a29f5c311dea94044a794b6794f3f69 Mon Sep 17 00:00:00 2001 From: Egor Churaev Date: Tue, 8 Nov 2022 14:45:33 +0300 Subject: [PATCH] [Adreno][Textures] Fix static memory planner (#13253) * [Adreno][Textures] Fix static memory planner Fix memory reusage in static memory planner. * Move token allocators to separate file * Add test on TokenAllocator2d * Apply comments and fix CI --- src/relay/backend/graph_plan_memory.cc | 243 +----------- src/relay/backend/token_allocator.cc | 201 ++++++++++ src/relay/backend/token_allocator.h | 161 ++++++++ .../relay/backend/graph_plan_token_alloc.cc | 351 ++++++++++++++++++ 4 files changed, 714 insertions(+), 242 deletions(-) create mode 100644 src/relay/backend/token_allocator.cc create mode 100644 src/relay/backend/token_allocator.h create mode 100644 tests/cpp/relay/backend/graph_plan_token_alloc.cc diff --git a/src/relay/backend/graph_plan_memory.cc b/src/relay/backend/graph_plan_memory.cc index f927bf633732..d85ffd78291c 100644 --- a/src/relay/backend/graph_plan_memory.cc +++ b/src/relay/backend/graph_plan_memory.cc @@ -37,6 +37,7 @@ #include "../op/call/call.h" #include "../op/memory/memory.h" #include "../transforms/device_aware_visitors.h" +#include "./token_allocator.h" #include "./utils.h" namespace tvm { @@ -50,33 +51,6 @@ using backend::StaticMemoryPlan; using backend::StorageInfo; using IntegerArray = Array; -/*! A representation of a block of memory required at runtime on some device. */ -struct StorageToken { - /*! \brief Reference counter */ - int ref_counter{0}; - /*! \brief number of bytes */ - size_t max_bytes{0}; - /*! \brief The corresponding tensor type. */ - TensorType ttype{nullptr}; - /*! \brief VirtualDevice on which the memory will reside. */ - VirtualDevice virtual_device = VirtualDevice::FullyUnconstrained(); - /*! \brief The storage id */ - int64_t storage_id{-1}; - - bool is_valid() const { return !virtual_device->IsFullyUnconstrained(); } - - bool is_compatible(const StorageToken& that) const { - return virtual_device == that.virtual_device; - } - - std::string ToString() const { - std::ostringstream os; - os << "{storage_id: " << storage_id << ", max_bytes: " << max_bytes - << ", ttype: " << PrettyPrint(ttype) << ", virtual_device: " << virtual_device << "}"; - return os.str(); - } -}; - class StorageAllocaBaseVisitor : public transform::DeviceAwareExprVisitor { public: StorageAllocaBaseVisitor() : transform::DeviceAwareExprVisitor(Optional()) {} @@ -380,221 +354,6 @@ class StorageAllocator : public StorageAllocaBaseVisitor { } } - /** - * @brief Memory manager for flattened 1d memory (buffers) - */ - class TokenAllocator1D { - public: - /*! - * \brief ceil(size/word_size) to get number of words. - * \param size The original size. - * \param word_size The element size. - */ - static size_t DivRoundUp(size_t size, size_t word_size) { - return (size + word_size - 1) / word_size; - } - - /*! - * \brief Get the memory requirement. - * \param prototype The prototype token. - * \return The required memory size. - * - * TODO(mbs): Gf GetMemorySizeBytes in aot_executor_codegen.cc, - * CalculateRelayExprSizeBytes in utils.cc - */ - size_t GetMemorySize(StorageToken* prototype) { - TensorType ttype = prototype->ttype; - ICHECK(ttype.defined()); - size_t size = 1; - for (IndexExpr dim : ttype->shape) { - const int64_t* pval = tir::as_const_int(dim); - ICHECK(pval != nullptr) << "Cannot allocate memory symbolic tensor shape " << ttype->shape; - ICHECK_GE(*pval, 0) << "Cannot allocate memory for tensor with negative shape" << *pval; - size *= static_cast(pval[0]); - } - size *= DivRoundUp(ttype->dtype.bits() * ttype->dtype.lanes(), 8); - return size; - } - /*! - * \brief Request a storage token for a given prototype. - * \param prototype. The prototype storage token. - * \return The result token. - */ - StorageToken* Request(StorageToken* prototype) { - // calculate the size; - size_t size = GetMemorySize(prototype); - // search memory block in [size / match_range_, size * match_range_) - if (match_range_ == 0) { - return nullptr; - } - auto begin = free_.lower_bound(size / match_range_); - auto mid = free_.lower_bound(size); - auto end = free_.upper_bound(size * match_range_); - // search for memory blocks larger than requested - for (auto it = mid; it != end; ++it) { - StorageToken* tok = it->second; - if (!tok->is_compatible(*prototype)) continue; - ICHECK_EQ(tok->ref_counter, 0); - // Use exect matching strategy - tok->max_bytes = std::max(size, tok->max_bytes); - tok->ref_counter = prototype->ref_counter; - // find a exact match, erase from map and return - free_.erase(it); - return tok; - } - // then search for memory blocks smaller than requested space - for (auto it = mid; it != begin;) { - --it; - StorageToken* tok = it->second; - if (!tok->is_compatible(*prototype)) continue; - ICHECK_EQ(tok->ref_counter, 0); - // Use exect matching strategy - tok->max_bytes = std::max(size, tok->max_bytes); - tok->ref_counter = prototype->ref_counter; - // erase from map and return - free_.erase(it); - return tok; - } - return nullptr; - } - /*! - * \brief Alloacte a storage token by consuming prototype - * \param prototype The prototype token. - * \param size The size of memory being requested. - */ - StorageToken* Alloc(StorageToken* prototype, int64_t storage_id) { - size_t size = GetMemorySize(prototype); - prototype->max_bytes = size; - prototype->storage_id = storage_id; - data_.push_back(prototype); - return prototype; - } - /*! - * \brief Check if we can release token. - * \param tok The token to be released. - */ - void CheckForRelease(StorageToken* tok) { - ICHECK_GE(tok->storage_id, 0); - ICHECK_GE(tok->ref_counter, 0); - if (tok->ref_counter == 0) { - free_.insert({tok->max_bytes, tok}); - } - } - - private: - // scale used for rough match - const size_t match_range_{16}; - // free list of storage entry - std::multimap free_; - // all the storage resources available - std::vector data_; - }; - - /** - * @brief Memory manager for 2d memory (textures) - */ - class TokenAllocator2D { - public: - /*! - * \brief Request a storage token for a given prototype. - * \param prototype. The prototype storage token. - * \return The result token. - */ - StorageToken* Request(StorageToken* prototype) { - auto shape = GetSize2D(prototype); - int64_t requested_size = shape.height * shape.width; - int64_t min_added_size = std::numeric_limits::max(); - int64_t min_wasted_size = std::numeric_limits::max(); - int64_t best_storage_id = -1; - MemBlock best_mem, new_mem; - for (int64_t free_id : free_list_) { - MemBlock& cached = blocks_[free_id]; - // Can only reuse texture 2d blocks of the same type - if (cached.token_->ttype->dtype != prototype->ttype->dtype) { - continue; - } - int64_t cached_size = cached.x_ * cached.y_; - new_mem.x_ = std::max(cached.x_, shape.width); - new_mem.y_ = std::max(cached.y_, shape.height); - int64_t expanded_size = new_mem.x_ * new_mem.y_; - int64_t added_size = expanded_size - cached_size; - int64_t wasted_size = expanded_size - requested_size; - // Prioritize minimization of added size first, then minimize - // wasted size among blocks which would not require expansion - if ((min_added_size > 0 && added_size < min_added_size) || - (min_added_size == 0 && wasted_size < min_wasted_size)) { - min_added_size = added_size; - min_wasted_size = wasted_size; - best_storage_id = free_id; - best_mem = new_mem; - } - } - - if (min_added_size <= requested_size) { - best_mem.token_ = blocks_[best_storage_id].token_; - // Reset the reference counter of the now live token - best_mem.token_->ref_counter = prototype->ref_counter; - blocks_[best_storage_id] = best_mem; - free_list_.erase(best_storage_id); - return best_mem.token_; - } - return nullptr; - } - /*! - * \brief Alloacte a storage token by consuming prototype - * \param prototype The prototype token. - * \param size The size of memory being requested. - */ - StorageToken* Alloc(StorageToken* prototype, int64_t storage_id) { - auto shape = GetSize2D(prototype); - MemBlock block; - block.x_ = shape.width; - block.y_ = shape.height; - prototype->storage_id = storage_id; - block.token_ = prototype; - blocks_[prototype->storage_id] = block; - return prototype; - } - /*! - * \brief Check if we can release token. - * \param tok The token to be released. - */ - void CheckForRelease(StorageToken* tok) { - ICHECK_GE(tok->storage_id, 0); - ICHECK_GE(tok->ref_counter, 0); - if (tok->ref_counter == 0) { - free_list_.insert(tok->storage_id); - } - } - /*! - * \brief Get the texture 2d size requirement - * \param prototype The prototype token. - * \return The required texture 2d memory size in (width, height, channel). - */ - Texture2DShape GetSize2D(StorageToken* prototype) { - TensorType ttype = prototype->ttype; - ICHECK(ttype.defined()); - size_t axis = runtime::DefaultTextureLayoutSeparator(ttype->shape.size(), - prototype->virtual_device->memory_scope); - struct Shape { - const Array& shape; - int64_t operator[](size_t i) const { return *tir::as_const_int(shape[i]); } - }; - return runtime::ApplyTexture2DFlattening(Shape{ttype->shape}, ttype->shape.size(), - axis); - } - - private: - struct MemBlock { - StorageToken* token_; - int64_t x_; - int64_t y_; - }; - - std::unordered_map blocks_; - std::unordered_set free_list_; - }; - class TokenAllocator { public: StorageToken* Alloc(StorageToken* proto) { diff --git a/src/relay/backend/token_allocator.cc b/src/relay/backend/token_allocator.cc new file mode 100644 index 000000000000..bdecba9afad7 --- /dev/null +++ b/src/relay/backend/token_allocator.cc @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file relay/backend/token_allocator.cc + * \brief Token allocation classes for backend + */ + +#include "token_allocator.h" + +#include + +#include +#include + +namespace tvm { +namespace relay { + +size_t TokenAllocator1D::GetMemorySize(StorageToken* prototype) { + TensorType ttype = prototype->ttype; + ICHECK(ttype.defined()); + size_t size = 1; + for (IndexExpr dim : ttype->shape) { + const int64_t* pval = tir::as_const_int(dim); + ICHECK(pval != nullptr) << "Cannot allocate memory symbolic tensor shape " << ttype->shape; + ICHECK_GE(*pval, 0) << "Cannot allocate memory for tensor with negative shape" << *pval; + size *= static_cast(pval[0]); + } + size *= DivRoundUp(ttype->dtype.bits() * ttype->dtype.lanes(), 8); + return size; +} + +StorageToken* TokenAllocator1D::Request(StorageToken* prototype) { + // calculate the size; + size_t size = GetMemorySize(prototype); + // search memory block in [size / match_range_, size * match_range_) + if (match_range_ == 0) { + return nullptr; + } + auto begin = free_.lower_bound(size / match_range_); + auto mid = free_.lower_bound(size); + auto end = free_.upper_bound(size * match_range_); + // search for memory blocks larger than requested + for (auto it = mid; it != end; ++it) { + StorageToken* tok = it->second; + if (!tok->is_compatible(*prototype)) continue; + ICHECK_EQ(tok->ref_counter, 0); + // Use exect matching strategy + tok->max_bytes = std::max(size, tok->max_bytes); + tok->ref_counter = prototype->ref_counter; + // find a exact match, erase from map and return + free_.erase(it); + return tok; + } + // then search for memory blocks smaller than requested space + for (auto it = mid; it != begin;) { + --it; + StorageToken* tok = it->second; + if (!tok->is_compatible(*prototype)) continue; + ICHECK_EQ(tok->ref_counter, 0); + // Use exect matching strategy + tok->max_bytes = std::max(size, tok->max_bytes); + tok->ref_counter = prototype->ref_counter; + // erase from map and return + free_.erase(it); + return tok; + } + return nullptr; +} + +StorageToken* TokenAllocator1D::Alloc(StorageToken* prototype, int64_t storage_id) { + size_t size = GetMemorySize(prototype); + prototype->max_bytes = size; + prototype->storage_id = storage_id; + data_.push_back(prototype); + return prototype; +} + +void TokenAllocator1D::CheckForRelease(StorageToken* tok) { + ICHECK_GE(tok->storage_id, 0); + ICHECK_GE(tok->ref_counter, 0); + if (tok->ref_counter == 0) { + free_.insert({tok->max_bytes, tok}); + } +} + +StorageToken* TokenAllocator2D::Request(StorageToken* prototype) { + auto shape = GetSize2D(prototype); + const int64_t max_ratio = 5; + int64_t min_added_size_x = std::numeric_limits::max(); + int64_t min_added_size_y = std::numeric_limits::max(); + int64_t min_wasted_size_x = std::numeric_limits::max(); + int64_t min_wasted_size_y = std::numeric_limits::max(); + int64_t best_storage_id = -1; + MemBlock new_mem; + for (int64_t free_id : free_list_) { + MemBlock& cached = blocks_[free_id]; + // Can only reuse texture 2d blocks of the same type + if (cached.token_->ttype->dtype != prototype->ttype->dtype) { + continue; + } + // Can only reuse texture 2d blocks of the same scope + // Because reusing textures with different memory scope may lead to + // accuracy issues, because the data will be packed in a different way for + // different memory scopes. + if (cached.token_->virtual_device->memory_scope != prototype->virtual_device->memory_scope) { + continue; + } + // avoid reusing too small and too big textures + if (shape.width / cached.x_ > max_ratio || cached.x_ / shape.width > max_ratio || + shape.height / cached.y_ > max_ratio || cached.y_ / shape.height > max_ratio) { + continue; + } + int64_t new_width = std::max(cached.x_, shape.width); + int64_t new_height = std::max(cached.y_, shape.height); + int64_t added_size_x = new_width - cached.x_; + int64_t added_size_y = new_height - cached.y_; + int64_t wasted_size_x = new_width - shape.width; + int64_t wasted_size_y = new_height - shape.height; + // Prioritize minimization of added size first, then minimize + // wasted size among blocks which would not require expansion + if ((min_added_size_x > 0 && added_size_x < min_added_size_x) || + (min_added_size_y > 0 && added_size_y < min_added_size_y) || + (min_added_size_x == added_size_x && wasted_size_x < min_wasted_size_x) || + (min_added_size_y == added_size_y && wasted_size_y < min_wasted_size_y)) { + min_added_size_x = added_size_x; + min_added_size_y = added_size_y; + min_wasted_size_x = wasted_size_x; + min_wasted_size_y = wasted_size_y; + best_storage_id = free_id; + new_mem.x_ = new_width; + new_mem.y_ = new_height; + } + } + + if (min_added_size_x == 0 && min_added_size_y == 0) { + // use existing block + free_list_.erase(best_storage_id); + blocks_[best_storage_id].token_->ref_counter += prototype->ref_counter; + return blocks_[best_storage_id].token_; + } else if (min_added_size_x <= shape.width || min_added_size_y <= shape.height) { + // Reset the reference counter of the now live token + free_list_.erase(best_storage_id); + new_mem.token_ = prototype; + new_mem.token_->ref_counter += 1; + new_mem.token_->storage_id = best_storage_id; + blocks_[best_storage_id] = new_mem; + return new_mem.token_; + } + return nullptr; +} + +StorageToken* TokenAllocator2D::Alloc(StorageToken* prototype, int64_t storage_id) { + auto shape = GetSize2D(prototype); + MemBlock block; + block.x_ = shape.width; + block.y_ = shape.height; + prototype->storage_id = storage_id; + block.token_ = prototype; + blocks_[prototype->storage_id] = block; + return prototype; +} + +void TokenAllocator2D::CheckForRelease(StorageToken* tok) { + ICHECK_GE(tok->storage_id, 0); + ICHECK_GE(tok->ref_counter, 0); + if (tok->ref_counter == 0) { + free_list_.insert(tok->storage_id); + } +} + +runtime::Texture2DShape TokenAllocator2D::GetSize2D(StorageToken* prototype) { + TensorType ttype = prototype->ttype; + ICHECK(ttype.defined()); + size_t axis = runtime::DefaultTextureLayoutSeparator(ttype->shape.size(), + prototype->virtual_device->memory_scope); + struct Shape { + const Array& shape; + int64_t operator[](size_t i) const { return *tir::as_const_int(shape[i]); } + }; + return runtime::ApplyTexture2DFlattening(Shape{ttype->shape}, ttype->shape.size(), axis); +} + +} // namespace relay +} // namespace tvm diff --git a/src/relay/backend/token_allocator.h b/src/relay/backend/token_allocator.h new file mode 100644 index 000000000000..3aebd71b6c2b --- /dev/null +++ b/src/relay/backend/token_allocator.h @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file relay/backend/token_allocator.h + * \brief Token allocation classes for backend + */ +#ifndef TVM_RELAY_BACKEND_TOKEN_ALLOCATOR_H_ +#define TVM_RELAY_BACKEND_TOKEN_ALLOCATOR_H_ + +#include +#include + +#include +#include +#include +#include +#include + +#include "../../runtime/texture.h" + +namespace tvm { +namespace relay { + +/*! A representation of a block of memory required at runtime on some device. */ +struct StorageToken { + /*! \brief Reference counter */ + int ref_counter{0}; + /*! \brief number of bytes */ + size_t max_bytes{0}; + /*! \brief The corresponding tensor type. */ + TensorType ttype{nullptr}; + /*! \brief VirtualDevice on which the memory will reside. */ + VirtualDevice virtual_device = VirtualDevice::FullyUnconstrained(); + /*! \brief The storage id */ + int64_t storage_id{-1}; + + bool is_valid() const { return !virtual_device->IsFullyUnconstrained(); } + + bool is_compatible(const StorageToken& that) const { + return virtual_device == that.virtual_device; + } + + std::string ToString() const { + std::ostringstream os; + os << "{storage_id: " << storage_id << ", max_bytes: " << max_bytes + << ", ttype: " << PrettyPrint(ttype) << ", virtual_device: " << virtual_device << "}"; + return os.str(); + } +}; + +/** + * @brief Memory manager for flattened 1d memory (buffers) + */ +class TokenAllocator1D { + public: + /*! + * \brief ceil(size/word_size) to get number of words. + * \param size The original size. + * \param word_size The element size. + */ + static size_t DivRoundUp(size_t size, size_t word_size) { + return (size + word_size - 1) / word_size; + } + + /*! + * \brief Get the memory requirement. + * \param prototype The prototype token. + * \return The required memory size. + * + * TODO(mbs): Gf GetMemorySizeBytes in aot_executor_codegen.cc, + * CalculateRelayExprSizeBytes in utils.cc + */ + size_t GetMemorySize(StorageToken* prototype); + /*! + * \brief Request a storage token for a given prototype. + * \param prototype. The prototype storage token. + * \return The result token. + */ + StorageToken* Request(StorageToken* prototype); + /*! + * \brief Alloacte a storage token by consuming prototype + * \param prototype The prototype token. + * \param size The size of memory being requested. + */ + StorageToken* Alloc(StorageToken* prototype, int64_t storage_id); + /*! + * \brief Check if we can release token. + * \param tok The token to be released. + */ + void CheckForRelease(StorageToken* tok); + + private: + // scale used for rough match + const size_t match_range_{16}; + // free list of storage entry + std::multimap free_; + // all the storage resources available + std::vector data_; +}; + +/** + * @brief Memory manager for 2d memory (textures) + */ +class TokenAllocator2D { + public: + /*! + * \brief Request a storage token for a given prototype. + * \param prototype. The prototype storage token. + * \return The result token. + */ + StorageToken* Request(StorageToken* prototype); + /*! + * \brief Alloacte a storage token by consuming prototype + * \param prototype The prototype token. + * \param size The size of memory being requested. + */ + StorageToken* Alloc(StorageToken* prototype, int64_t storage_id); + /*! + * \brief Check if we can release token. + * \param tok The token to be released. + */ + void CheckForRelease(StorageToken* tok); + /*! + * \brief Get the texture 2d size requirement + * \param prototype The prototype token. + * \return The required texture 2d memory size in (width, height, channel). + */ + runtime::Texture2DShape GetSize2D(StorageToken* prototype); + + protected: + struct MemBlock { + StorageToken* token_; + int64_t x_; + int64_t y_; + }; + + std::unordered_map blocks_; + std::unordered_set free_list_; +}; + +} // namespace relay +} // namespace tvm + +#endif // TVM_RELAY_BACKEND_TOKEN_ALLOCATOR_H_ diff --git a/tests/cpp/relay/backend/graph_plan_token_alloc.cc b/tests/cpp/relay/backend/graph_plan_token_alloc.cc new file mode 100644 index 000000000000..4641da2cb8b5 --- /dev/null +++ b/tests/cpp/relay/backend/graph_plan_token_alloc.cc @@ -0,0 +1,351 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include + +#include "../src/relay/backend/token_allocator.h" + +namespace tvm { +namespace relay { + +// TokenAllocator2d is necessary because in class TokenAllocator2D we don't +// have an access to its protected members. In this class we add new methods +// which allow us to get and check internal state of class TokenAllocator2D +class TokenAllocator2DWrapper : public TokenAllocator2D { + public: + inline size_t FreeListSize() const { return free_list_.size(); } + inline size_t BlockMapSize() const { return blocks_.size(); } +}; + +TEST(Token2DAlloc, OneToken) { + TokenAllocator2DWrapper alloc; + int storage_ids = 0; + EXPECT_EQ(alloc.BlockMapSize(), 0); + EXPECT_EQ(alloc.FreeListSize(), 0); + + TensorType tt1({1, 22, 20, 20, 4}, DataType(kDLFloat, 32, 1)); + VirtualDevice vd1(kDLOpenCL, 0, {}, MemoryScope("global.texture-nhwc")); + StorageToken tok1 = { + 1, // ref_counter + 0, // max bytes + tt1, // tensor type + vd1, // virtual device + -1 // storage_id + }; + auto size2d = alloc.GetSize2D(&tok1); + EXPECT_EQ(size2d.channel, 4); + EXPECT_EQ(size2d.height, 22); + EXPECT_EQ(size2d.width, 400); + EXPECT_EQ(alloc.Request(&tok1), nullptr); + + alloc.Alloc(&tok1, storage_ids++); + EXPECT_EQ(alloc.BlockMapSize(), 1); + EXPECT_EQ(alloc.FreeListSize(), 0); + + tok1.ref_counter -= 1; + alloc.CheckForRelease(&tok1); + EXPECT_EQ(alloc.BlockMapSize(), 1); + EXPECT_EQ(alloc.FreeListSize(), 1); +} + +TEST(Token2DAlloc, EqualSizeTokenReuse) { + TokenAllocator2DWrapper alloc; + int storage_ids = 0; + EXPECT_EQ(alloc.BlockMapSize(), 0); + EXPECT_EQ(alloc.FreeListSize(), 0); + + TensorType tt1({1, 22, 20, 20, 4}, DataType(kDLFloat, 32, 1)); + VirtualDevice vd1(kDLOpenCL, 0, {}, MemoryScope("global.texture-nhwc")); + StorageToken tok1 = { + 1, // ref_counter + 0, // max bytes + tt1, // tensor type + vd1, // virtual device + -1 // storage_id + }; + auto size2d = alloc.GetSize2D(&tok1); + EXPECT_EQ(size2d.channel, 4); + EXPECT_EQ(size2d.height, 22); + EXPECT_EQ(size2d.width, 400); + EXPECT_EQ(alloc.Request(&tok1), nullptr); + + alloc.Alloc(&tok1, storage_ids++); + EXPECT_EQ(alloc.BlockMapSize(), 1); + EXPECT_EQ(alloc.FreeListSize(), 0); + + tok1.ref_counter -= 1; + alloc.CheckForRelease(&tok1); + EXPECT_EQ(alloc.BlockMapSize(), 1); + EXPECT_EQ(alloc.FreeListSize(), 1); + + StorageToken tok2 = { + 1, // ref_counter + 0, // max bytes + tt1, // tensor type + vd1, // virtual device + -1 // storage_id + }; + auto req = alloc.Request(&tok2); + EXPECT_NE(req, nullptr); + EXPECT_EQ(alloc.BlockMapSize(), 1); + EXPECT_EQ(alloc.FreeListSize(), 0); + EXPECT_EQ(req->storage_id, storage_ids - 1); + EXPECT_EQ(req->ref_counter, 1); + auto sizeReq = alloc.GetSize2D(req); + EXPECT_EQ(sizeReq.channel, 4); + EXPECT_EQ(sizeReq.height, 22); + EXPECT_EQ(sizeReq.width, 400); +} + +TEST(Token2DAlloc, EqualSizeDiffTypes) { + TokenAllocator2DWrapper alloc; + int storage_ids = 0; + EXPECT_EQ(alloc.BlockMapSize(), 0); + EXPECT_EQ(alloc.FreeListSize(), 0); + + TensorType tt1({1, 22, 20, 20, 4}, DataType(kDLFloat, 32, 1)); + VirtualDevice vd1(kDLOpenCL, 0, {}, MemoryScope("global.texture-nhwc")); + StorageToken tok1 = { + 1, // ref_counter + 0, // max bytes + tt1, // tensor type + vd1, // virtual device + -1 // storage_id + }; + auto size2d = alloc.GetSize2D(&tok1); + EXPECT_EQ(size2d.channel, 4); + EXPECT_EQ(size2d.height, 22); + EXPECT_EQ(size2d.width, 400); + EXPECT_EQ(alloc.Request(&tok1), nullptr); + + alloc.Alloc(&tok1, storage_ids++); + EXPECT_EQ(alloc.BlockMapSize(), 1); + EXPECT_EQ(alloc.FreeListSize(), 0); + + tok1.ref_counter -= 1; + alloc.CheckForRelease(&tok1); + EXPECT_EQ(alloc.BlockMapSize(), 1); + EXPECT_EQ(alloc.FreeListSize(), 1); + + TensorType tt2({1, 22, 20, 20, 4}, DataType(kDLFloat, 16, 1)); + StorageToken tok2 = { + 1, // ref_counter + 0, // max bytes + tt2, // tensor type + vd1, // virtual device + -1 // storage_id + }; + EXPECT_EQ(alloc.Request(&tok2), nullptr); + EXPECT_EQ(alloc.BlockMapSize(), 1); + EXPECT_EQ(alloc.FreeListSize(), 1); + + alloc.Alloc(&tok2, storage_ids++); + EXPECT_EQ(alloc.BlockMapSize(), 2); + EXPECT_EQ(alloc.FreeListSize(), 1); + + tok2.ref_counter -= 1; + alloc.CheckForRelease(&tok2); + EXPECT_EQ(alloc.BlockMapSize(), 2); + EXPECT_EQ(alloc.FreeListSize(), 2); +} + +TEST(Token2DAlloc, DifferentSizesTokenReuse) { + TokenAllocator2DWrapper alloc; + int storage_ids = 0; + EXPECT_EQ(alloc.BlockMapSize(), 0); + EXPECT_EQ(alloc.FreeListSize(), 0); + + TensorType tt1({1, 22, 20, 20, 4}, DataType(kDLFloat, 32, 1)); + VirtualDevice vd1(kDLOpenCL, 0, {}, MemoryScope("global.texture-nhwc")); + StorageToken tok1 = { + 1, // ref_counter + 0, // max bytes + tt1, // tensor type + vd1, // virtual device + -1 // storage_id + }; + auto size2d = alloc.GetSize2D(&tok1); + EXPECT_EQ(size2d.channel, 4); + EXPECT_EQ(size2d.height, 22); + EXPECT_EQ(size2d.width, 400); + EXPECT_EQ(alloc.Request(&tok1), nullptr); + + alloc.Alloc(&tok1, storage_ids++); + EXPECT_EQ(alloc.BlockMapSize(), 1); + EXPECT_EQ(alloc.FreeListSize(), 0); + + tok1.ref_counter -= 1; + alloc.CheckForRelease(&tok1); + EXPECT_EQ(alloc.BlockMapSize(), 1); + EXPECT_EQ(alloc.FreeListSize(), 1); + + TensorType tt2({1, 40, 30, 30, 4}, DataType(kDLFloat, 32, 1)); + StorageToken tok2 = { + 1, // ref_counter + 0, // max bytes + tt2, // tensor type + vd1, // virtual device + -1 // storage_id + }; + auto req = alloc.Request(&tok2); + EXPECT_NE(req, nullptr); + EXPECT_EQ(alloc.BlockMapSize(), 1); + EXPECT_EQ(alloc.FreeListSize(), 0); + EXPECT_EQ(req->storage_id, storage_ids - 1); + EXPECT_EQ(req->ref_counter, 2); + auto sizeReq = alloc.GetSize2D(req); + EXPECT_EQ(sizeReq.channel, 4); + EXPECT_EQ(sizeReq.height, 40); + EXPECT_EQ(sizeReq.width, 900); + + tok2.ref_counter -= 1; + req->ref_counter -= 1; + alloc.CheckForRelease(&tok1); + EXPECT_EQ(alloc.BlockMapSize(), 1); + EXPECT_EQ(alloc.FreeListSize(), 1); + + TensorType tt3({1, 25, 30, 30, 4}, DataType(kDLFloat, 32, 1)); + StorageToken tok3 = { + 1, // ref_counter + 0, // max bytes + tt3, // tensor type + vd1, // virtual device + -1 // storage_id + }; + auto req2 = alloc.Request(&tok3); + EXPECT_NE(req2, nullptr); + EXPECT_EQ(alloc.BlockMapSize(), 1); + EXPECT_EQ(alloc.FreeListSize(), 0); + EXPECT_EQ(req2->storage_id, storage_ids - 1); + EXPECT_EQ(req2->ref_counter, 1); + auto sizeReq2 = alloc.GetSize2D(req2); + EXPECT_EQ(sizeReq2.channel, 4); + EXPECT_EQ(sizeReq2.height, 40); + EXPECT_EQ(sizeReq2.width, 900); +} + +TEST(Token2DAlloc, DifferentSizesTokenReuse2) { + TokenAllocator2DWrapper alloc; + int storage_ids = 0; + EXPECT_EQ(alloc.BlockMapSize(), 0); + EXPECT_EQ(alloc.FreeListSize(), 0); + + TensorType tt1({1, 22, 20, 20, 4}, DataType(kDLFloat, 32, 1)); + VirtualDevice vd1(kDLOpenCL, 0, {}, MemoryScope("global.texture-nhwc")); + StorageToken tok1 = { + 1, // ref_counter + 0, // max bytes + tt1, // tensor type + vd1, // virtual device + -1 // storage_id + }; + auto size2d = alloc.GetSize2D(&tok1); + EXPECT_EQ(size2d.channel, 4); + EXPECT_EQ(size2d.height, 22); + EXPECT_EQ(size2d.width, 400); + EXPECT_EQ(alloc.Request(&tok1), nullptr); + + alloc.Alloc(&tok1, storage_ids++); + EXPECT_EQ(alloc.BlockMapSize(), 1); + EXPECT_EQ(alloc.FreeListSize(), 0); + + tok1.ref_counter -= 1; + alloc.CheckForRelease(&tok1); + EXPECT_EQ(alloc.BlockMapSize(), 1); + EXPECT_EQ(alloc.FreeListSize(), 1); + + TensorType tt2({1, 5, 30, 20, 4}, DataType(kDLFloat, 32, 1)); + StorageToken tok2 = { + 1, // ref_counter + 0, // max bytes + tt2, // tensor type + vd1, // virtual device + -1 // storage_id + }; + auto req = alloc.Request(&tok2); + EXPECT_NE(req, nullptr); + EXPECT_EQ(alloc.BlockMapSize(), 1); + EXPECT_EQ(alloc.FreeListSize(), 0); + EXPECT_EQ(req->storage_id, storage_ids - 1); + EXPECT_EQ(req->ref_counter, 2); + auto sizeReq = alloc.GetSize2D(req); + EXPECT_EQ(sizeReq.channel, 4); + EXPECT_EQ(sizeReq.height, 5); + EXPECT_EQ(sizeReq.width, 600); +} + +TEST(Token2DAlloc, SameSizesButDiffMemoryScopes) { + TokenAllocator2DWrapper alloc; + int storage_ids = 0; + EXPECT_EQ(alloc.BlockMapSize(), 0); + EXPECT_EQ(alloc.FreeListSize(), 0); + + TensorType tt1({28, 676, 1, 1, 4}, DataType(kDLFloat, 32, 1)); + VirtualDevice vd1(kDLOpenCL, 0, {}, MemoryScope("global.texture-weight")); + StorageToken tok1 = { + 1, // ref_counter + 0, // max bytes + tt1, // tensor type + vd1, // virtual device + -1 // storage_id + }; + auto size2d = alloc.GetSize2D(&tok1); + EXPECT_EQ(size2d.channel, 4); + EXPECT_EQ(size2d.height, 28); + EXPECT_EQ(size2d.width, 676); + EXPECT_EQ(alloc.Request(&tok1), nullptr); + + alloc.Alloc(&tok1, storage_ids++); + EXPECT_EQ(alloc.BlockMapSize(), 1); + EXPECT_EQ(alloc.FreeListSize(), 0); + + tok1.ref_counter -= 1; + alloc.CheckForRelease(&tok1); + EXPECT_EQ(alloc.BlockMapSize(), 1); + EXPECT_EQ(alloc.FreeListSize(), 1); + + TensorType tt2({1, 28, 26, 26, 4}, DataType(kDLFloat, 32, 1)); + VirtualDevice vd2(kDLOpenCL, 0, {}, MemoryScope("global.texture-nhwc")); + StorageToken tok2 = { + 1, // ref_counter + 0, // max bytes + tt2, // tensor type + vd2, // virtual device + -1 // storage_id + }; + auto tok2Size = alloc.GetSize2D(&tok2); + EXPECT_EQ(tok2Size.channel, 4); + EXPECT_EQ(tok2Size.height, 28); + EXPECT_EQ(tok2Size.width, 676); + + EXPECT_EQ(alloc.Request(&tok2), nullptr); + EXPECT_EQ(alloc.BlockMapSize(), 1); + EXPECT_EQ(alloc.FreeListSize(), 1); + + alloc.Alloc(&tok2, storage_ids++); + EXPECT_EQ(alloc.BlockMapSize(), 2); + EXPECT_EQ(alloc.FreeListSize(), 1); + + tok2.ref_counter -= 1; + alloc.CheckForRelease(&tok2); + EXPECT_EQ(alloc.BlockMapSize(), 2); + EXPECT_EQ(alloc.FreeListSize(), 2); +} +} // namespace relay +} // namespace tvm