Skip to content

Commit

Permalink
[Relay][VM] Memory planner (part 1) (apache#5144)
Browse files Browse the repository at this point in the history
* Start on memory planning

WIP

Move to test_memory_passes.py

Work on memory planning

Post-rebase and VM changes

Plumb through the offsets

Basic tests all pass, fix offset to data buffer.

Fix compile errors

Fix ws

Apply suggestions from code review

Co-Authored-By: Haichen Shen <shenhaichen@gmail.com>

Address CR

Update src/runtime/vm/vm.cc

Co-Authored-By: Haichen Shen <shenhaichen@gmail.com>

Fix another comment

Fix lint

Fix

Fix

Fix

Lint is done?

Fix

More fix

Trying to debug

No clue

Fix lint

* Fix docs

* Disable aggressive constant eval

* It works

* Fix lint

* Found issue with dynamic

* Fix the pass, but runtime segfaults

* fix scalar tensor, test_any_elemwise passes

* Fix split pass

* Fix 0-rank issues

* Fix

* debug

* apply Haichen's patch and clean up

* lintgit add .

* fix serializer and test_tyck_alloc_tensor test

* Fix the constant lift pass in presence of closures

* Restore old finder

* Fix rebase issues

* Fix

* Fix

* Fix issue coercing the shapes incorrectly from i64 to i32

* Fix linting

* Fix clang format

* Format memory.cc

* Fix 0-rank case

* Add fix for (0,) shape

* Ignore shapes for now

* Apply suggestions from code review

Co-authored-by: Zhi <5145158+zhiics@users.noreply.github.com>

* Update src/runtime/vm/executable.cc

Co-authored-by: Zhi <5145158+zhiics@users.noreply.github.com>

* Fix

* lint

Co-authored-by: Zhi Chen <chzhi@amazon.com>
Co-authored-by: Zhi <5145158+zhiics@users.noreply.github.com>
  • Loading branch information
3 people authored and Trevor Morris committed Jun 9, 2020
1 parent 1c7941c commit 7c4270b
Show file tree
Hide file tree
Showing 16 changed files with 649 additions and 126 deletions.
2 changes: 2 additions & 0 deletions include/tvm/runtime/ndarray.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#define TVM_RUNTIME_NDARRAY_H_

#include <tvm/runtime/c_runtime_api.h>
#include <tvm/runtime/data_type.h>
#include <tvm/runtime/object.h>
#include <tvm/runtime/serializer.h>

Expand Down Expand Up @@ -160,6 +161,7 @@ class NDArray : public ObjectRef {
TVMStreamHandle stream = nullptr);

TVM_DLL std::vector<int64_t> Shape() const;
TVM_DLL runtime::DataType DataType() const;
// internal namespace
struct Internal;

Expand Down
12 changes: 9 additions & 3 deletions include/tvm/runtime/vm.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ struct Instruction {
struct /* AllocTensor Operands */ {
/*! \brief The storage to allocate from. */
RegName storage;
/*! \brief The offset into the storage to allocate from. */
Index offset;
/*! \brief The number of dimensions. */
uint32_t ndim;
/*! \brief The shape of tensor. */
Expand All @@ -146,6 +148,8 @@ struct Instruction {
struct /* AllocTensorReg Operands */ {
/*! \brief The storage to allocate from. */
RegName storage;
/*! \brief The offset into the storage to allocate from. */
Index offset;
/*! \brief The register to read the shape out of. */
RegName shape_register;
/*! \brief The datatype of tensor to be allocated. */
Expand Down Expand Up @@ -267,23 +271,25 @@ struct Instruction {
/*!
* \brief Construct an allocate tensor instruction with constant shape.
* \param storage The storage to allocate out of.
* \param offset The offset to allocate at.
* \param shape The shape of the tensor.
* \param dtype The dtype of the tensor.
* \param dst The destination register.
* \return The allocate tensor instruction.
*/
static Instruction AllocTensor(RegName storage, const std::vector<int64_t>& shape,
static Instruction AllocTensor(RegName storage, Index offset, const std::vector<int64_t>& shape,
DLDataType dtype, RegName dst);
/*!
* \brief Construct an allocate tensor instruction with register.
* \param storage The storage to allocate out of.
* \param offset The offset into the storage to allocate from.
* \param shape_register The register containing the shape.
* \param dtype The dtype of the tensor.
* \param dst The destination register.
* \return The allocate tensor instruction.
*/
static Instruction AllocTensorReg(RegName storage, RegName shape_register, DLDataType dtype,
RegName dst);
static Instruction AllocTensorReg(RegName storage, Index offset, RegName shape_register,
DLDataType dtype, RegName dst);
/*!
* \brief Construct an allocate datatype instruction.
* \param tag The datatype tag.
Expand Down
6 changes: 6 additions & 0 deletions python/tvm/relay/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@
# Dialects
from . import qnn

from .scope_builder import ScopeBuilder

# Load Memory Passes
from .transform import memory_alloc
from .transform import memory_plan

# Required to traverse large programs
setrecursionlimit(10000)

Expand Down
1 change: 1 addition & 0 deletions python/tvm/relay/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,7 @@ def const(value, dtype=None):

if not isinstance(value, _nd.NDArray):
raise ValueError("value has to be scalar or NDArray")

return Constant(value)


Expand Down
7 changes: 5 additions & 2 deletions python/tvm/relay/op/memory/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,17 @@ def invoke_tvm_op(func, inputs, outputs):
"""
return _make.invoke_tvm_op(func, inputs, outputs)

def alloc_tensor(storage, shape, dtype='float32', assert_shape=None):
def alloc_tensor(storage, offset, shape, dtype='float32', assert_shape=None):
"""Allocate a tensor with the provided shape, and dtype.
Parameters
----------
storage : tvm.relay.Expr
The storage to allocate from.
offset : tvm.relay.Expr
The offset to allocate from.
shape : tvm.relay.Expr
The shape of the tensor to allocate.
Expand All @@ -61,7 +64,7 @@ def alloc_tensor(storage, shape, dtype='float32', assert_shape=None):
result : tvm.relay.Expr
The alloc_tensor expression.
"""
return _make.alloc_tensor(storage, shape, dtype, assert_shape)
return _make.alloc_tensor(storage, offset, shape, dtype, assert_shape)

def alloc_storage(size, alignment, ctx, dtype_hint='float32'):
"""Allocate a piece of tensor storage.
Expand Down
1 change: 0 additions & 1 deletion python/tvm/relay/transform/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,4 @@
"""The Relay IR namespace containing transformations."""
# transformation passes
from .transform import *

from . import memory_alloc
22 changes: 12 additions & 10 deletions python/tvm/relay/transform/memory_alloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,21 @@
from ..backend import compile_engine
from ..op.memory import flatten_tuple_type, from_tuple_type, to_tuple_type
from ...import cpu
from ..op.memory import alloc_storage

def alloc_tensor(storage, shape, dtype='float32', assert_shape=None):
offset = expr.const(0, dtype="int64")
return op.memory.alloc_tensor(storage, offset, shape, dtype, assert_shape)

def is_primitive(call):
return hasattr(call, 'op') and hasattr(call.op, 'attrs') and \
hasattr(call.op.attrs, 'Primitive') and int(call.op.attrs.Primitive) == 1

class ManifestAllocPass(ExprMutator):
"""A pass for explictly manifesting all memory allocations in Relay."""
"""A pass for explicitly manifesting all memory allocations in Relay."""

def __init__(self, target_host):
self.invoke_tvm = op.memory.invoke_tvm_op
self.alloc_storage = op.memory.alloc_storage
self.alloc_tensor = op.memory.alloc_tensor
self.shape_func = op.memory.shape_func
self.scopes = [ScopeBuilder()]
self.target_host = target_host
Expand Down Expand Up @@ -94,17 +96,16 @@ def make_static_allocation(self, scope, tensor_type, i):
"""Allocate a tensor with a statically known shape."""
shape = [int(sh) for sh in tensor_type.shape]
if len(shape) == 0:
shape = expr.const(np.array([]).astype(
self.compute_dtype), dtype=self.compute_dtype)
shape = expr.const(np.empty((), dtype=self.compute_dtype), dtype=self.compute_dtype)
else:
shape = expr.const(np.array(shape), dtype=self.compute_dtype)
size = self.compute_storage(tensor_type)
alignment = self.compute_alignment(tensor_type.dtype)
dtype = tensor_type.dtype
sto = scope.let("storage_{0}".format(i), self.alloc_storage(
sto = scope.let("storage_{0}".format(i), alloc_storage(
size, alignment, self.default_context, dtype))
# TODO(@jroesch): There is a bug with typing based on the constant shape.
tensor = self.alloc_tensor(sto, shape, dtype, tensor_type.shape)
tensor = alloc_tensor(sto, shape, dtype, tensor_type.shape)
return scope.let("tensor_{0}".format(i), tensor)

def visit_let(self, let):
Expand Down Expand Up @@ -172,14 +173,14 @@ def dynamic_invoke(self, scope, func, ins, new_args, out_types, ret_type):
size = self.compute_storage_in_relay(
out_shape, out_type.dtype)
alignment = self.compute_alignment(out_type.dtype)
sto = scope.let("storage_{i}".format(i=i), self.alloc_storage(
sto = scope.let("storage_{i}".format(i=i), alloc_storage(
size, alignment, self.default_context, out_type.dtype))
storages.append(sto)

outs = []
sh_ty_storage = zip(out_shapes, out_types, storages)
for i, (out_shape, out_type, storage) in enumerate(sh_ty_storage):
alloc = self.alloc_tensor(
alloc = alloc_tensor(
storage,
out_shape,
out_type.dtype,
Expand All @@ -204,6 +205,7 @@ def visit_call(self, call):
# Because we are in ANF we do not need to visit the arguments.
scope = self.current_scope()
new_args = [self.visit(arg) for arg in call.args]

ins = expr.Tuple(new_args)
ret_type = call.checked_type
out_types = flatten_tuple_type(ret_type)
Expand Down Expand Up @@ -233,7 +235,7 @@ def __init__(self, target_host):
self.target_host = target_host

def transform_function(self, func, mod, _):
# TODO(@jroesch): Is there a way to do one shot initilization?
# TODO(@jroesch): Is there a way to do one shot initialization?
# can we have def pass_init?
mod.import_from_std("core.rly")
ea = ManifestAllocPass(self.target_host)
Expand Down
Loading

0 comments on commit 7c4270b

Please sign in to comment.