From baf0c6ab4a29f5c311dea94044a794b6794f3f69 Mon Sep 17 00:00:00 2001
From: Egor Churaev <egor.churaev@gmail.com>
Date: Tue, 8 Nov 2022 14:45:33 +0300
Subject: [PATCH] [Adreno][Textures] Fix static memory planner  (#13253)

* [Adreno][Textures] Fix static memory planner

Fix memory reusage in static memory planner.

* Move token allocators to separate file

* Add test on TokenAllocator2d

* Apply comments and fix CI
---
 src/relay/backend/graph_plan_memory.cc        | 243 +-----------
 src/relay/backend/token_allocator.cc          | 201 ++++++++++
 src/relay/backend/token_allocator.h           | 161 ++++++++
 .../relay/backend/graph_plan_token_alloc.cc   | 351 ++++++++++++++++++
 4 files changed, 714 insertions(+), 242 deletions(-)
 create mode 100644 src/relay/backend/token_allocator.cc
 create mode 100644 src/relay/backend/token_allocator.h
 create mode 100644 tests/cpp/relay/backend/graph_plan_token_alloc.cc
diff --git a/src/relay/backend/graph_plan_memory.cc b/src/relay/backend/graph_plan_memory.cc
index f927bf633732..d85ffd78291c 100644
--- a/src/relay/backend/graph_plan_memory.cc
+++ b/src/relay/backend/graph_plan_memory.cc
@@ -37,6 +37,7 @@
 #include "../op/call/call.h"
 #include "../op/memory/memory.h"
 #include "../transforms/device_aware_visitors.h"
+#include "./token_allocator.h"
 #include "./utils.h"
 
 namespace tvm {
@@ -50,33 +51,6 @@ using backend::StaticMemoryPlan;
 using backend::StorageInfo;
 using IntegerArray = Array<Integer>;
 
-/*! A representation of a block of memory required at runtime on some device. */
-struct StorageToken {
-  /*! \brief Reference counter */
-  int ref_counter{0};
-  /*! \brief number of bytes */
-  size_t max_bytes{0};
-  /*! \brief The corresponding tensor type. */
-  TensorType ttype{nullptr};
-  /*! \brief VirtualDevice on which the memory will reside. */
-  VirtualDevice virtual_device = VirtualDevice::FullyUnconstrained();
-  /*! \brief The storage id */
-  int64_t storage_id{-1};
-
-  bool is_valid() const { return !virtual_device->IsFullyUnconstrained(); }
-
-  bool is_compatible(const StorageToken& that) const {
-    return virtual_device == that.virtual_device;
-  }
-
-  std::string ToString() const {
-    std::ostringstream os;
-    os << "{storage_id: " << storage_id << ", max_bytes: " << max_bytes
-       << ", ttype: " << PrettyPrint(ttype) << ", virtual_device: " << virtual_device << "}";
-    return os.str();
-  }
-};
-
 class StorageAllocaBaseVisitor : public transform::DeviceAwareExprVisitor {
  public:
   StorageAllocaBaseVisitor() : transform::DeviceAwareExprVisitor(Optional<IRModule>()) {}
@@ -380,221 +354,6 @@ class StorageAllocator : public StorageAllocaBaseVisitor {
     }
   }
 
-  /**
-   * @brief Memory manager for flattened 1d memory (buffers)
-   */
-  class TokenAllocator1D {
-   public:
-    /*!
-     * \brief ceil(size/word_size) to get number of words.
-     * \param size The original size.
-     * \param word_size The element size.
-     */
-    static size_t DivRoundUp(size_t size, size_t word_size) {
-      return (size + word_size - 1) / word_size;
-    }
-
-    /*!
-     * \brief Get the memory requirement.
-     * \param prototype The prototype token.
-     * \return The required memory size.
-     *
-     * TODO(mbs): Gf GetMemorySizeBytes in aot_executor_codegen.cc,
-     * CalculateRelayExprSizeBytes in utils.cc
-     */
-    size_t GetMemorySize(StorageToken* prototype) {
-      TensorType ttype = prototype->ttype;
-      ICHECK(ttype.defined());
-      size_t size = 1;
-      for (IndexExpr dim : ttype->shape) {
-        const int64_t* pval = tir::as_const_int(dim);
-        ICHECK(pval != nullptr) << "Cannot allocate memory symbolic tensor shape " << ttype->shape;
-        ICHECK_GE(*pval, 0) << "Cannot allocate memory for tensor with negative shape" << *pval;
-        size *= static_cast<size_t>(pval[0]);
-      }
-      size *= DivRoundUp(ttype->dtype.bits() * ttype->dtype.lanes(), 8);
-      return size;
-    }
-    /*!
-     * \brief Request a storage token for a given prototype.
-     * \param prototype. The prototype storage token.
-     * \return The result token.
-     */
-    StorageToken* Request(StorageToken* prototype) {
-      // calculate the size;
-      size_t size = GetMemorySize(prototype);
-      // search memory block in [size / match_range_, size * match_range_)
-      if (match_range_ == 0) {
-        return nullptr;
-      }
-      auto begin = free_.lower_bound(size / match_range_);
-      auto mid = free_.lower_bound(size);
-      auto end = free_.upper_bound(size * match_range_);
-      // search for memory blocks larger than requested
-      for (auto it = mid; it != end; ++it) {
-        StorageToken* tok = it->second;
-        if (!tok->is_compatible(*prototype)) continue;
-        ICHECK_EQ(tok->ref_counter, 0);
-        // Use exect matching strategy
-        tok->max_bytes = std::max(size, tok->max_bytes);
-        tok->ref_counter = prototype->ref_counter;
-        // find a exact match, erase from map and return
-        free_.erase(it);
-        return tok;
-      }
-      // then search for memory blocks smaller than requested space
-      for (auto it = mid; it != begin;) {
-        --it;
-        StorageToken* tok = it->second;
-        if (!tok->is_compatible(*prototype)) continue;
-        ICHECK_EQ(tok->ref_counter, 0);
-        // Use exect matching strategy
-        tok->max_bytes = std::max(size, tok->max_bytes);
-        tok->ref_counter = prototype->ref_counter;
-        // erase from map and return
-        free_.erase(it);
-        return tok;
-      }
-      return nullptr;
-    }
-    /*!
-     * \brief Alloacte a storage token by consuming prototype
-     * \param prototype The prototype token.
-     * \param size The size of memory being requested.
-     */
-    StorageToken* Alloc(StorageToken* prototype, int64_t storage_id) {
-      size_t size = GetMemorySize(prototype);
-      prototype->max_bytes = size;
-      prototype->storage_id = storage_id;
-      data_.push_back(prototype);
-      return prototype;
-    }
-    /*!
-     * \brief Check if we can release token.
-     * \param tok The token to be released.
-     */
-    void CheckForRelease(StorageToken* tok) {
-      ICHECK_GE(tok->storage_id, 0);
-      ICHECK_GE(tok->ref_counter, 0);
-      if (tok->ref_counter == 0) {
-        free_.insert({tok->max_bytes, tok});
-      }
-    }
-
-   private:
-    // scale used for rough match
-    const size_t match_range_{16};
-    // free list of storage entry
-    std::multimap<size_t, StorageToken*> free_;
-    // all the storage resources available
-    std::vector<StorageToken*> data_;
-  };
-
-  /**
-   * @brief Memory manager for 2d memory (textures)
-   */
-  class TokenAllocator2D {
-   public:
-    /*!
-     * \brief Request a storage token for a given prototype.
-     * \param prototype. The prototype storage token.
-     * \return The result token.
-     */
-    StorageToken* Request(StorageToken* prototype) {
-      auto shape = GetSize2D(prototype);
-      int64_t requested_size = shape.height * shape.width;
-      int64_t min_added_size = std::numeric_limits<int64_t>::max();
-      int64_t min_wasted_size = std::numeric_limits<int64_t>::max();
-      int64_t best_storage_id = -1;
-      MemBlock best_mem, new_mem;
-      for (int64_t free_id : free_list_) {
-        MemBlock& cached = blocks_[free_id];
-        // Can only reuse texture 2d blocks of the same type
-        if (cached.token_->ttype->dtype != prototype->ttype->dtype) {
-          continue;
-        }
-        int64_t cached_size = cached.x_ * cached.y_;
-        new_mem.x_ = std::max(cached.x_, shape.width);
-        new_mem.y_ = std::max(cached.y_, shape.height);
-        int64_t expanded_size = new_mem.x_ * new_mem.y_;
-        int64_t added_size = expanded_size - cached_size;
-        int64_t wasted_size = expanded_size - requested_size;
-        // Prioritize minimization of added size first, then minimize
-        // wasted size among blocks which would not require expansion
-        if ((min_added_size > 0 && added_size < min_added_size) ||
-            (min_added_size == 0 && wasted_size < min_wasted_size)) {
-          min_added_size = added_size;
-          min_wasted_size = wasted_size;
-          best_storage_id = free_id;
-          best_mem = new_mem;
-        }
-      }
-
-      if (min_added_size <= requested_size) {
-        best_mem.token_ = blocks_[best_storage_id].token_;
-        // Reset the reference counter of the now live token
-        best_mem.token_->ref_counter = prototype->ref_counter;
-        blocks_[best_storage_id] = best_mem;
-        free_list_.erase(best_storage_id);
-        return best_mem.token_;
-      }
-      return nullptr;
-    }
-    /*!
-     * \brief Alloacte a storage token by consuming prototype
-     * \param prototype The prototype token.
-     * \param size The size of memory being requested.
-     */
-    StorageToken* Alloc(StorageToken* prototype, int64_t storage_id) {
-      auto shape = GetSize2D(prototype);
-      MemBlock block;
-      block.x_ = shape.width;
-      block.y_ = shape.height;
-      prototype->storage_id = storage_id;
-      block.token_ = prototype;
-      blocks_[prototype->storage_id] = block;
-      return prototype;
-    }
-    /*!
-     * \brief Check if we can release token.
-     * \param tok The token to be released.
-     */
-    void CheckForRelease(StorageToken* tok) {
-      ICHECK_GE(tok->storage_id, 0);
-      ICHECK_GE(tok->ref_counter, 0);
-      if (tok->ref_counter == 0) {
-        free_list_.insert(tok->storage_id);
-      }
-    }
-    /*!
-     * \brief Get the texture 2d size requirement
-     * \param prototype The prototype token.
-     * \return The required texture 2d memory size in (width, height, channel).
-     */
-    Texture2DShape GetSize2D(StorageToken* prototype) {
-      TensorType ttype = prototype->ttype;
-      ICHECK(ttype.defined());
-      size_t axis = runtime::DefaultTextureLayoutSeparator(ttype->shape.size(),
-                                                           prototype->virtual_device->memory_scope);
-      struct Shape {
-        const Array<PrimExpr>& shape;
-        int64_t operator[](size_t i) const { return *tir::as_const_int(shape[i]); }
-      };
-      return runtime::ApplyTexture2DFlattening<int64_t>(Shape{ttype->shape}, ttype->shape.size(),
-                                                        axis);
-    }
-
-   private:
-    struct MemBlock {
-      StorageToken* token_;
-      int64_t x_;
-      int64_t y_;
-    };
-
-    std::unordered_map<int64_t, MemBlock> blocks_;
-    std::unordered_set<int64_t> free_list_;
-  };
-
   class TokenAllocator {
    public:
     StorageToken* Alloc(StorageToken* proto) {
diff --git a/src/relay/backend/token_allocator.cc b/src/relay/backend/token_allocator.cc
new file mode 100644
index 000000000000..bdecba9afad7
--- /dev/null
+++ b/src/relay/backend/token_allocator.cc
@@ -0,0 +1,201 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file relay/backend/token_allocator.cc
+ * \brief Token allocation classes for backend
+ */
+
+#include "token_allocator.h"
+
+#include <tvm/tir/op.h>
+
+#include <algorithm>
+#include <limits>
+
+namespace tvm {
+namespace relay {
+
+size_t TokenAllocator1D::GetMemorySize(StorageToken* prototype) {
+  TensorType ttype = prototype->ttype;
+  ICHECK(ttype.defined());
+  size_t size = 1;
+  for (IndexExpr dim : ttype->shape) {
+    const int64_t* pval = tir::as_const_int(dim);
+    ICHECK(pval != nullptr) << "Cannot allocate memory symbolic tensor shape " << ttype->shape;
+    ICHECK_GE(*pval, 0) << "Cannot allocate memory for tensor with negative shape" << *pval;
+    size *= static_cast<size_t>(pval[0]);
+  }
+  size *= DivRoundUp(ttype->dtype.bits() * ttype->dtype.lanes(), 8);
+  return size;
+}
+
+StorageToken* TokenAllocator1D::Request(StorageToken* prototype) {
+  // calculate the size;
+  size_t size = GetMemorySize(prototype);
+  // search memory block in [size / match_range_, size * match_range_)
+  if (match_range_ == 0) {
+    return nullptr;
+  }
+  auto begin = free_.lower_bound(size / match_range_);
+  auto mid = free_.lower_bound(size);
+  auto end = free_.upper_bound(size * match_range_);
+  // search for memory blocks larger than requested
+  for (auto it = mid; it != end; ++it) {
+    StorageToken* tok = it->second;
+    if (!tok->is_compatible(*prototype)) continue;
+    ICHECK_EQ(tok->ref_counter, 0);
+    // Use exect matching strategy
+    tok->max_bytes = std::max(size, tok->max_bytes);
+    tok->ref_counter = prototype->ref_counter;
+    // find a exact match, erase from map and return
+    free_.erase(it);
+    return tok;
+  }
+  // then search for memory blocks smaller than requested space
+  for (auto it = mid; it != begin;) {
+    --it;
+    StorageToken* tok = it->second;
+    if (!tok->is_compatible(*prototype)) continue;
+    ICHECK_EQ(tok->ref_counter, 0);
+    // Use exect matching strategy
+    tok->max_bytes = std::max(size, tok->max_bytes);
+    tok->ref_counter = prototype->ref_counter;
+    // erase from map and return
+    free_.erase(it);
+    return tok;
+  }
+  return nullptr;
+}
+
+StorageToken* TokenAllocator1D::Alloc(StorageToken* prototype, int64_t storage_id) {
+  size_t size = GetMemorySize(prototype);
+  prototype->max_bytes = size;
+  prototype->storage_id = storage_id;
+  data_.push_back(prototype);
+  return prototype;
+}
+
+void TokenAllocator1D::CheckForRelease(StorageToken* tok) {
+  ICHECK_GE(tok->storage_id, 0);
+  ICHECK_GE(tok->ref_counter, 0);
+  if (tok->ref_counter == 0) {
+    free_.insert({tok->max_bytes, tok});
+  }
+}
+
+StorageToken* TokenAllocator2D::Request(StorageToken* prototype) {
+  auto shape = GetSize2D(prototype);
+  const int64_t max_ratio = 5;
+  int64_t min_added_size_x = std::numeric_limits<int64_t>::max();
+  int64_t min_added_size_y = std::numeric_limits<int64_t>::max();
+  int64_t min_wasted_size_x = std::numeric_limits<int64_t>::max();
+  int64_t min_wasted_size_y = std::numeric_limits<int64_t>::max();
+  int64_t best_storage_id = -1;
+  MemBlock new_mem;
+  for (int64_t free_id : free_list_) {
+    MemBlock& cached = blocks_[free_id];
+    // Can only reuse texture 2d blocks of the same type
+    if (cached.token_->ttype->dtype != prototype->ttype->dtype) {
+      continue;
+    }
+    // Can only reuse texture 2d blocks of the same scope
+    // Because reusing textures with different memory scope may lead to
+    // accuracy issues, because the data will be packed in a different way for
+    // different memory scopes.
+    if (cached.token_->virtual_device->memory_scope != prototype->virtual_device->memory_scope) {
+      continue;
+    }
+    // avoid reusing too small and too big textures
+    if (shape.width / cached.x_ > max_ratio || cached.x_ / shape.width > max_ratio ||
+        shape.height / cached.y_ > max_ratio || cached.y_ / shape.height > max_ratio) {
+      continue;
+    }
+    int64_t new_width = std::max(cached.x_, shape.width);
+    int64_t new_height = std::max(cached.y_, shape.height);
+    int64_t added_size_x = new_width - cached.x_;
+    int64_t added_size_y = new_height - cached.y_;
+    int64_t wasted_size_x = new_width - shape.width;
+    int64_t wasted_size_y = new_height - shape.height;
+    // Prioritize minimization of added size first, then minimize
+    // wasted size among blocks which would not require expansion
+    if ((min_added_size_x > 0 && added_size_x < min_added_size_x) ||
+        (min_added_size_y > 0 && added_size_y < min_added_size_y) ||
+        (min_added_size_x == added_size_x && wasted_size_x < min_wasted_size_x) ||
+        (min_added_size_y == added_size_y && wasted_size_y < min_wasted_size_y)) {
+      min_added_size_x = added_size_x;
+      min_added_size_y = added_size_y;
+      min_wasted_size_x = wasted_size_x;
+      min_wasted_size_y = wasted_size_y;
+      best_storage_id = free_id;
+      new_mem.x_ = new_width;
+      new_mem.y_ = new_height;
+    }
+  }
+
+  if (min_added_size_x == 0 && min_added_size_y == 0) {
+    // use existing block
+    free_list_.erase(best_storage_id);
+    blocks_[best_storage_id].token_->ref_counter += prototype->ref_counter;
+    return blocks_[best_storage_id].token_;
+  } else if (min_added_size_x <= shape.width || min_added_size_y <= shape.height) {
+    // Reset the reference counter of the now live token
+    free_list_.erase(best_storage_id);
+    new_mem.token_ = prototype;
+    new_mem.token_->ref_counter += 1;
+    new_mem.token_->storage_id = best_storage_id;
+    blocks_[best_storage_id] = new_mem;
+    return new_mem.token_;
+  }
+  return nullptr;
+}
+
+StorageToken* TokenAllocator2D::Alloc(StorageToken* prototype, int64_t storage_id) {
+  auto shape = GetSize2D(prototype);
+  MemBlock block;
+  block.x_ = shape.width;
+  block.y_ = shape.height;
+  prototype->storage_id = storage_id;
+  block.token_ = prototype;
+  blocks_[prototype->storage_id] = block;
+  return prototype;
+}
+
+void TokenAllocator2D::CheckForRelease(StorageToken* tok) {
+  ICHECK_GE(tok->storage_id, 0);
+  ICHECK_GE(tok->ref_counter, 0);
+  if (tok->ref_counter == 0) {
+    free_list_.insert(tok->storage_id);
+  }
+}
+
+runtime::Texture2DShape<int64_t> TokenAllocator2D::GetSize2D(StorageToken* prototype) {
+  TensorType ttype = prototype->ttype;
+  ICHECK(ttype.defined());
+  size_t axis = runtime::DefaultTextureLayoutSeparator(ttype->shape.size(),
+                                                       prototype->virtual_device->memory_scope);
+  struct Shape {
+    const Array<PrimExpr>& shape;
+    int64_t operator[](size_t i) const { return *tir::as_const_int(shape[i]); }
+  };
+  return runtime::ApplyTexture2DFlattening<int64_t>(Shape{ttype->shape}, ttype->shape.size(), axis);
+}
+
+}  // namespace relay
+}  // namespace tvm
diff --git a/src/relay/backend/token_allocator.h b/src/relay/backend/token_allocator.h
new file mode 100644
index 000000000000..3aebd71b6c2b
--- /dev/null
+++ b/src/relay/backend/token_allocator.h
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file relay/backend/token_allocator.h
+ * \brief Token allocation classes for backend
+ */
+#ifndef TVM_RELAY_BACKEND_TOKEN_ALLOCATOR_H_
+#define TVM_RELAY_BACKEND_TOKEN_ALLOCATOR_H_
+
+#include <tvm/relay/type.h>
+#include <tvm/target/virtual_device.h>
+
+#include <map>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "../../runtime/texture.h"
+
+namespace tvm {
+namespace relay {
+
+/*! A representation of a block of memory required at runtime on some device. */
+struct StorageToken {
+  /*! \brief Reference counter */
+  int ref_counter{0};
+  /*! \brief number of bytes */
+  size_t max_bytes{0};
+  /*! \brief The corresponding tensor type. */
+  TensorType ttype{nullptr};
+  /*! \brief VirtualDevice on which the memory will reside. */
+  VirtualDevice virtual_device = VirtualDevice::FullyUnconstrained();
+  /*! \brief The storage id */
+  int64_t storage_id{-1};
+
+  bool is_valid() const { return !virtual_device->IsFullyUnconstrained(); }
+
+  bool is_compatible(const StorageToken& that) const {
+    return virtual_device == that.virtual_device;
+  }
+
+  std::string ToString() const {
+    std::ostringstream os;
+    os << "{storage_id: " << storage_id << ", max_bytes: " << max_bytes
+       << ", ttype: " << PrettyPrint(ttype) << ", virtual_device: " << virtual_device << "}";
+    return os.str();
+  }
+};
+
+/**
+ * @brief Memory manager for flattened 1d memory (buffers)
+ */
+class TokenAllocator1D {
+ public:
+  /*!
+   * \brief ceil(size/word_size) to get number of words.
+   * \param size The original size.
+   * \param word_size The element size.
+   */
+  static size_t DivRoundUp(size_t size, size_t word_size) {
+    return (size + word_size - 1) / word_size;
+  }
+
+  /*!
+   * \brief Get the memory requirement.
+   * \param prototype The prototype token.
+   * \return The required memory size.
+   *
+   * TODO(mbs): Gf GetMemorySizeBytes in aot_executor_codegen.cc,
+   * CalculateRelayExprSizeBytes in utils.cc
+   */
+  size_t GetMemorySize(StorageToken* prototype);
+  /*!
+   * \brief Request a storage token for a given prototype.
+   * \param prototype. The prototype storage token.
+   * \return The result token.
+   */
+  StorageToken* Request(StorageToken* prototype);
+  /*!
+   * \brief Alloacte a storage token by consuming prototype
+   * \param prototype The prototype token.
+   * \param size The size of memory being requested.
+   */
+  StorageToken* Alloc(StorageToken* prototype, int64_t storage_id);
+  /*!
+   * \brief Check if we can release token.
+   * \param tok The token to be released.
+   */
+  void CheckForRelease(StorageToken* tok);
+
+ private:
+  // scale used for rough match
+  const size_t match_range_{16};
+  // free list of storage entry
+  std::multimap<size_t, StorageToken*> free_;
+  // all the storage resources available
+  std::vector<StorageToken*> data_;
+};
+
+/**
+ * @brief Memory manager for 2d memory (textures)
+ */
+class TokenAllocator2D {
+ public:
+  /*!
+   * \brief Request a storage token for a given prototype.
+   * \param prototype. The prototype storage token.
+   * \return The result token.
+   */
+  StorageToken* Request(StorageToken* prototype);
+  /*!
+   * \brief Alloacte a storage token by consuming prototype
+   * \param prototype The prototype token.
+   * \param size The size of memory being requested.
+   */
+  StorageToken* Alloc(StorageToken* prototype, int64_t storage_id);
+  /*!
+   * \brief Check if we can release token.
+   * \param tok The token to be released.
+   */
+  void CheckForRelease(StorageToken* tok);
+  /*!
+   * \brief Get the texture 2d size requirement
+   * \param prototype The prototype token.
+   * \return The required texture 2d memory size in (width, height, channel).
+   */
+  runtime::Texture2DShape<int64_t> GetSize2D(StorageToken* prototype);
+
+ protected:
+  struct MemBlock {
+    StorageToken* token_;
+    int64_t x_;
+    int64_t y_;
+  };
+
+  std::unordered_map<int64_t, MemBlock> blocks_;
+  std::unordered_set<int64_t> free_list_;
+};
+
+}  // namespace relay
+}  // namespace tvm
+
+#endif  // TVM_RELAY_BACKEND_TOKEN_ALLOCATOR_H_
diff --git a/tests/cpp/relay/backend/graph_plan_token_alloc.cc b/tests/cpp/relay/backend/graph_plan_token_alloc.cc
new file mode 100644
index 000000000000..4641da2cb8b5
--- /dev/null
+++ b/tests/cpp/relay/backend/graph_plan_token_alloc.cc
@@ -0,0 +1,351 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "../src/relay/backend/token_allocator.h"
+
+namespace tvm {
+namespace relay {
+
+// TokenAllocator2d is necessary because in class TokenAllocator2D we don't
+// have an access to its protected members. In this class we add new methods
+// which allow us to get and check internal state of class TokenAllocator2D
+class TokenAllocator2DWrapper : public TokenAllocator2D {
+ public:
+  inline size_t FreeListSize() const { return free_list_.size(); }
+  inline size_t BlockMapSize() const { return blocks_.size(); }
+};
+
+TEST(Token2DAlloc, OneToken) {
+  TokenAllocator2DWrapper alloc;
+  int storage_ids = 0;
+  EXPECT_EQ(alloc.BlockMapSize(), 0);
+  EXPECT_EQ(alloc.FreeListSize(), 0);
+
+  TensorType tt1({1, 22, 20, 20, 4}, DataType(kDLFloat, 32, 1));
+  VirtualDevice vd1(kDLOpenCL, 0, {}, MemoryScope("global.texture-nhwc"));
+  StorageToken tok1 = {
+      1,    // ref_counter
+      0,    // max bytes
+      tt1,  // tensor type
+      vd1,  // virtual device
+      -1    // storage_id
+  };
+  auto size2d = alloc.GetSize2D(&tok1);
+  EXPECT_EQ(size2d.channel, 4);
+  EXPECT_EQ(size2d.height, 22);
+  EXPECT_EQ(size2d.width, 400);
+  EXPECT_EQ(alloc.Request(&tok1), nullptr);
+
+  alloc.Alloc(&tok1, storage_ids++);
+  EXPECT_EQ(alloc.BlockMapSize(), 1);
+  EXPECT_EQ(alloc.FreeListSize(), 0);
+
+  tok1.ref_counter -= 1;
+  alloc.CheckForRelease(&tok1);
+  EXPECT_EQ(alloc.BlockMapSize(), 1);
+  EXPECT_EQ(alloc.FreeListSize(), 1);
+}
+
+TEST(Token2DAlloc, EqualSizeTokenReuse) {
+  TokenAllocator2DWrapper alloc;
+  int storage_ids = 0;
+  EXPECT_EQ(alloc.BlockMapSize(), 0);
+  EXPECT_EQ(alloc.FreeListSize(), 0);
+
+  TensorType tt1({1, 22, 20, 20, 4}, DataType(kDLFloat, 32, 1));
+  VirtualDevice vd1(kDLOpenCL, 0, {}, MemoryScope("global.texture-nhwc"));
+  StorageToken tok1 = {
+      1,    // ref_counter
+      0,    // max bytes
+      tt1,  // tensor type
+      vd1,  // virtual device
+      -1    // storage_id
+  };
+  auto size2d = alloc.GetSize2D(&tok1);
+  EXPECT_EQ(size2d.channel, 4);
+  EXPECT_EQ(size2d.height, 22);
+  EXPECT_EQ(size2d.width, 400);
+  EXPECT_EQ(alloc.Request(&tok1), nullptr);
+
+  alloc.Alloc(&tok1, storage_ids++);
+  EXPECT_EQ(alloc.BlockMapSize(), 1);
+  EXPECT_EQ(alloc.FreeListSize(), 0);
+
+  tok1.ref_counter -= 1;
+  alloc.CheckForRelease(&tok1);
+  EXPECT_EQ(alloc.BlockMapSize(), 1);
+  EXPECT_EQ(alloc.FreeListSize(), 1);
+
+  StorageToken tok2 = {
+      1,    // ref_counter
+      0,    // max bytes
+      tt1,  // tensor type
+      vd1,  // virtual device
+      -1    // storage_id
+  };
+  auto req = alloc.Request(&tok2);
+  EXPECT_NE(req, nullptr);
+  EXPECT_EQ(alloc.BlockMapSize(), 1);
+  EXPECT_EQ(alloc.FreeListSize(), 0);
+  EXPECT_EQ(req->storage_id, storage_ids - 1);
+  EXPECT_EQ(req->ref_counter, 1);
+  auto sizeReq = alloc.GetSize2D(req);
+  EXPECT_EQ(sizeReq.channel, 4);
+  EXPECT_EQ(sizeReq.height, 22);
+  EXPECT_EQ(sizeReq.width, 400);
+}
+
+TEST(Token2DAlloc, EqualSizeDiffTypes) {
+  TokenAllocator2DWrapper alloc;
+  int storage_ids = 0;
+  EXPECT_EQ(alloc.BlockMapSize(), 0);
+  EXPECT_EQ(alloc.FreeListSize(), 0);
+
+  TensorType tt1({1, 22, 20, 20, 4}, DataType(kDLFloat, 32, 1));
+  VirtualDevice vd1(kDLOpenCL, 0, {}, MemoryScope("global.texture-nhwc"));
+  StorageToken tok1 = {
+      1,    // ref_counter
+      0,    // max bytes
+      tt1,  // tensor type
+      vd1,  // virtual device
+      -1    // storage_id
+  };
+  auto size2d = alloc.GetSize2D(&tok1);
+  EXPECT_EQ(size2d.channel, 4);
+  EXPECT_EQ(size2d.height, 22);
+  EXPECT_EQ(size2d.width, 400);
+  EXPECT_EQ(alloc.Request(&tok1), nullptr);
+
+  alloc.Alloc(&tok1, storage_ids++);
+  EXPECT_EQ(alloc.BlockMapSize(), 1);
+  EXPECT_EQ(alloc.FreeListSize(), 0);
+
+  tok1.ref_counter -= 1;
+  alloc.CheckForRelease(&tok1);
+  EXPECT_EQ(alloc.BlockMapSize(), 1);
+  EXPECT_EQ(alloc.FreeListSize(), 1);
+
+  TensorType tt2({1, 22, 20, 20, 4}, DataType(kDLFloat, 16, 1));
+  StorageToken tok2 = {
+      1,    // ref_counter
+      0,    // max bytes
+      tt2,  // tensor type
+      vd1,  // virtual device
+      -1    // storage_id
+  };
+  EXPECT_EQ(alloc.Request(&tok2), nullptr);
+  EXPECT_EQ(alloc.BlockMapSize(), 1);
+  EXPECT_EQ(alloc.FreeListSize(), 1);
+
+  alloc.Alloc(&tok2, storage_ids++);
+  EXPECT_EQ(alloc.BlockMapSize(), 2);
+  EXPECT_EQ(alloc.FreeListSize(), 1);
+
+  tok2.ref_counter -= 1;
+  alloc.CheckForRelease(&tok2);
+  EXPECT_EQ(alloc.BlockMapSize(), 2);
+  EXPECT_EQ(alloc.FreeListSize(), 2);
+}
+
+TEST(Token2DAlloc, DifferentSizesTokenReuse) {
+  TokenAllocator2DWrapper alloc;
+  int storage_ids = 0;
+  EXPECT_EQ(alloc.BlockMapSize(), 0);
+  EXPECT_EQ(alloc.FreeListSize(), 0);
+
+  TensorType tt1({1, 22, 20, 20, 4}, DataType(kDLFloat, 32, 1));
+  VirtualDevice vd1(kDLOpenCL, 0, {}, MemoryScope("global.texture-nhwc"));
+  StorageToken tok1 = {
+      1,    // ref_counter
+      0,    // max bytes
+      tt1,  // tensor type
+      vd1,  // virtual device
+      -1    // storage_id
+  };
+  auto size2d = alloc.GetSize2D(&tok1);
+  EXPECT_EQ(size2d.channel, 4);
+  EXPECT_EQ(size2d.height, 22);
+  EXPECT_EQ(size2d.width, 400);
+  EXPECT_EQ(alloc.Request(&tok1), nullptr);
+
+  alloc.Alloc(&tok1, storage_ids++);
+  EXPECT_EQ(alloc.BlockMapSize(), 1);
+  EXPECT_EQ(alloc.FreeListSize(), 0);
+
+  tok1.ref_counter -= 1;
+  alloc.CheckForRelease(&tok1);
+  EXPECT_EQ(alloc.BlockMapSize(), 1);
+  EXPECT_EQ(alloc.FreeListSize(), 1);
+
+  TensorType tt2({1, 40, 30, 30, 4}, DataType(kDLFloat, 32, 1));
+  StorageToken tok2 = {
+      1,    // ref_counter
+      0,    // max bytes
+      tt2,  // tensor type
+      vd1,  // virtual device
+      -1    // storage_id
+  };
+  auto req = alloc.Request(&tok2);
+  EXPECT_NE(req, nullptr);
+  EXPECT_EQ(alloc.BlockMapSize(), 1);
+  EXPECT_EQ(alloc.FreeListSize(), 0);
+  EXPECT_EQ(req->storage_id, storage_ids - 1);
+  EXPECT_EQ(req->ref_counter, 2);
+  auto sizeReq = alloc.GetSize2D(req);
+  EXPECT_EQ(sizeReq.channel, 4);
+  EXPECT_EQ(sizeReq.height, 40);
+  EXPECT_EQ(sizeReq.width, 900);
+
+  tok2.ref_counter -= 1;
+  req->ref_counter -= 1;
+  alloc.CheckForRelease(&tok1);
+  EXPECT_EQ(alloc.BlockMapSize(), 1);
+  EXPECT_EQ(alloc.FreeListSize(), 1);
+
+  TensorType tt3({1, 25, 30, 30, 4}, DataType(kDLFloat, 32, 1));
+  StorageToken tok3 = {
+      1,    // ref_counter
+      0,    // max bytes
+      tt3,  // tensor type
+      vd1,  // virtual device
+      -1    // storage_id
+  };
+  auto req2 = alloc.Request(&tok3);
+  EXPECT_NE(req2, nullptr);
+  EXPECT_EQ(alloc.BlockMapSize(), 1);
+  EXPECT_EQ(alloc.FreeListSize(), 0);
+  EXPECT_EQ(req2->storage_id, storage_ids - 1);
+  EXPECT_EQ(req2->ref_counter, 1);
+  auto sizeReq2 = alloc.GetSize2D(req2);
+  EXPECT_EQ(sizeReq2.channel, 4);
+  EXPECT_EQ(sizeReq2.height, 40);
+  EXPECT_EQ(sizeReq2.width, 900);
+}
+
+TEST(Token2DAlloc, DifferentSizesTokenReuse2) {
+  TokenAllocator2DWrapper alloc;
+  int storage_ids = 0;
+  EXPECT_EQ(alloc.BlockMapSize(), 0);
+  EXPECT_EQ(alloc.FreeListSize(), 0);
+
+  TensorType tt1({1, 22, 20, 20, 4}, DataType(kDLFloat, 32, 1));
+  VirtualDevice vd1(kDLOpenCL, 0, {}, MemoryScope("global.texture-nhwc"));
+  StorageToken tok1 = {
+      1,    // ref_counter
+      0,    // max bytes
+      tt1,  // tensor type
+      vd1,  // virtual device
+      -1    // storage_id
+  };
+  auto size2d = alloc.GetSize2D(&tok1);
+  EXPECT_EQ(size2d.channel, 4);
+  EXPECT_EQ(size2d.height, 22);
+  EXPECT_EQ(size2d.width, 400);
+  EXPECT_EQ(alloc.Request(&tok1), nullptr);
+
+  alloc.Alloc(&tok1, storage_ids++);
+  EXPECT_EQ(alloc.BlockMapSize(), 1);
+  EXPECT_EQ(alloc.FreeListSize(), 0);
+
+  tok1.ref_counter -= 1;
+  alloc.CheckForRelease(&tok1);
+  EXPECT_EQ(alloc.BlockMapSize(), 1);
+  EXPECT_EQ(alloc.FreeListSize(), 1);
+
+  TensorType tt2({1, 5, 30, 20, 4}, DataType(kDLFloat, 32, 1));
+  StorageToken tok2 = {
+      1,    // ref_counter
+      0,    // max bytes
+      tt2,  // tensor type
+      vd1,  // virtual device
+      -1    // storage_id
+  };
+  auto req = alloc.Request(&tok2);
+  EXPECT_NE(req, nullptr);
+  EXPECT_EQ(alloc.BlockMapSize(), 1);
+  EXPECT_EQ(alloc.FreeListSize(), 0);
+  EXPECT_EQ(req->storage_id, storage_ids - 1);
+  EXPECT_EQ(req->ref_counter, 2);
+  auto sizeReq = alloc.GetSize2D(req);
+  EXPECT_EQ(sizeReq.channel, 4);
+  EXPECT_EQ(sizeReq.height, 5);
+  EXPECT_EQ(sizeReq.width, 600);
+}
+
+TEST(Token2DAlloc, SameSizesButDiffMemoryScopes) {
+  TokenAllocator2DWrapper alloc;
+  int storage_ids = 0;
+  EXPECT_EQ(alloc.BlockMapSize(), 0);
+  EXPECT_EQ(alloc.FreeListSize(), 0);
+
+  TensorType tt1({28, 676, 1, 1, 4}, DataType(kDLFloat, 32, 1));
+  VirtualDevice vd1(kDLOpenCL, 0, {}, MemoryScope("global.texture-weight"));
+  StorageToken tok1 = {
+      1,    // ref_counter
+      0,    // max bytes
+      tt1,  // tensor type
+      vd1,  // virtual device
+      -1    // storage_id
+  };
+  auto size2d = alloc.GetSize2D(&tok1);
+  EXPECT_EQ(size2d.channel, 4);
+  EXPECT_EQ(size2d.height, 28);
+  EXPECT_EQ(size2d.width, 676);
+  EXPECT_EQ(alloc.Request(&tok1), nullptr);
+
+  alloc.Alloc(&tok1, storage_ids++);
+  EXPECT_EQ(alloc.BlockMapSize(), 1);
+  EXPECT_EQ(alloc.FreeListSize(), 0);
+
+  tok1.ref_counter -= 1;
+  alloc.CheckForRelease(&tok1);
+  EXPECT_EQ(alloc.BlockMapSize(), 1);
+  EXPECT_EQ(alloc.FreeListSize(), 1);
+
+  TensorType tt2({1, 28, 26, 26, 4}, DataType(kDLFloat, 32, 1));
+  VirtualDevice vd2(kDLOpenCL, 0, {}, MemoryScope("global.texture-nhwc"));
+  StorageToken tok2 = {
+      1,    // ref_counter
+      0,    // max bytes
+      tt2,  // tensor type
+      vd2,  // virtual device
+      -1    // storage_id
+  };
+  auto tok2Size = alloc.GetSize2D(&tok2);
+  EXPECT_EQ(tok2Size.channel, 4);
+  EXPECT_EQ(tok2Size.height, 28);
+  EXPECT_EQ(tok2Size.width, 676);
+
+  EXPECT_EQ(alloc.Request(&tok2), nullptr);
+  EXPECT_EQ(alloc.BlockMapSize(), 1);
+  EXPECT_EQ(alloc.FreeListSize(), 1);
+
+  alloc.Alloc(&tok2, storage_ids++);
+  EXPECT_EQ(alloc.BlockMapSize(), 2);
+  EXPECT_EQ(alloc.FreeListSize(), 1);
+
+  tok2.ref_counter -= 1;
+  alloc.CheckForRelease(&tok2);
+  EXPECT_EQ(alloc.BlockMapSize(), 2);
+  EXPECT_EQ(alloc.FreeListSize(), 2);
+}
+}  // namespace relay
+}  // namespace tvm