apache · tqchen · Jul 18, 2018 · Jul 18, 2018 · Jul 18, 2018 · jroesch
diff --git a/HalideIR b/HalideIR
diff --git a/include/tvm/runtime/ndarray.h b/include/tvm/runtime/ndarray.h
@@ -10,6 +10,7 @@
 #include <vector>
 #include <utility>
 #include "./c_runtime_api.h"
+#include "./serializer.h"
 
 namespace tvm {
 namespace runtime {
@@ -103,8 +104,25 @@ class NDArray {
    * \note The copy may happen asynchrously if it involves a GPU context.
    *       TVMSynchronize is necessary.
    */
-  inline void CopyTo(DLTensor* other);
-  inline void CopyTo(const NDArray& other);
+  inline void CopyTo(DLTensor* other) const;
+  inline void CopyTo(const NDArray& other) const;
+  /*!
+   * \brief Copy the data to another context.
+   * \param ctx The target context.
+   * \return The array under another context.
+   */
+  inline NDArray CopyTo(const DLContext& ctx) const;
+  /*!
+   * \brief Load NDArray from stream
+   * \param stream The input data stream
+   * \return Whether load is successful
+   */
+  inline bool Load(dmlc::Stream* stream);
+  /*!
+   * \brief Save NDArray to stream
+   * \param stream The output data stream
+   */
+  inline void Save(dmlc::Stream* stream) const;
   /*!
    * \brief Create a NDArray that shares the data memory with the current one.
    * \param shape The shape of the new array.
@@ -161,6 +179,13 @@ class NDArray {
   friend class TVMArgsSetter;
 };
 
+/*!
+ * \brief Save a DLTensor to stream
+ * \param strm The outpu stream
+ * \param tensor The tensor to be saved.
+ */
+inline bool SaveDLTensor(dmlc::Stream* strm, const DLTensor* tensor);
+
 /*!
  * \brief Reference counted Container object used to back NDArray.
  *
@@ -260,17 +285,26 @@ inline void NDArray::CopyFrom(const NDArray& other) {
   CopyFromTo(&(other.data_->dl_tensor), &(data_->dl_tensor));
 }
 
-inline void NDArray::CopyTo(DLTensor* other) {
+inline void NDArray::CopyTo(DLTensor* other) const {
   CHECK(data_ != nullptr);
   CopyFromTo(&(data_->dl_tensor), other);
 }
 
-inline void NDArray::CopyTo(const NDArray& other) {
+inline void NDArray::CopyTo(const NDArray& other) const {
   CHECK(data_ != nullptr);
   CHECK(other.data_ != nullptr);
   CopyFromTo(&(data_->dl_tensor), &(other.data_->dl_tensor));
 }
 
+inline NDArray NDArray::CopyTo(const DLContext& ctx) const {
+  CHECK(data_ != nullptr);
+  const DLTensor* dptr = operator->();
+  NDArray ret = Empty(std::vector<int64_t>(dptr->shape, dptr->shape + dptr->ndim),
+                      dptr->dtype, ctx);
+  this->CopyTo(ret);
+  return ret;
+}
+
 inline int NDArray::use_count() const {
   if (data_ == nullptr) return 0;
   return data_->ref_counter_.load(std::memory_order_relaxed);
@@ -280,7 +314,106 @@ inline const DLTensor* NDArray::operator->() const {
   return &(data_->dl_tensor);
 }
 
+/*! \brief Magic number for NDArray file */
+constexpr uint64_t kTVMNDArrayMagic = 0xDD5E40F096B4A13F;
+
+inline bool SaveDLTensor(dmlc::Stream* strm,
+                         DLTensor* tensor) {
+  uint64_t header = kTVMNDArrayMagic, reserved = 0;
+  strm->Write(header);
+  strm->Write(reserved);
+  // Always save data as CPU context
+  //
+  // Parameters that get serialized should be in CPU by default.
+  // So even the array's context is GPU, it will be stored as CPU array.
+  // This is used to prevent case when another user loads the parameters
+  // back on machine that do not have GPU or related context.
+  //
+  // We can always do array.CopyTo(target_ctx) to get a corresponding
+  // array in the target context.
+  DLContext cpu_ctx;
+  cpu_ctx.device_type = kDLCPU;
+  cpu_ctx.device_id = 0;
+  strm->Write(cpu_ctx);
+  strm->Write(tensor->ndim);
+  strm->Write(tensor->dtype);
+  int ndim = tensor->ndim;
+  strm->WriteArray(tensor->shape, ndim);
+  int type_bytes = tensor->dtype.bits / 8;
+  int64_t num_elems = 1;
+  for (int i = 0; i < ndim; ++i) {
+    num_elems *= tensor->shape[i];
+  }
+  int64_t data_byte_size = type_bytes * num_elems;
+  strm->Write(data_byte_size);
+
+  if (DMLC_IO_NO_ENDIAN_SWAP &&
+      tensor->ctx.device_type == kDLCPU &&
+      tensor->strides == nullptr &&
+      tensor->byte_offset == 0) {
+    // quick path
+    strm->Write(tensor->data, data_byte_size);
+  } else {
+    std::vector<uint8_t> bytes(data_byte_size);
+    CHECK_EQ(TVMArrayCopyToBytes(
+        tensor, dmlc::BeginPtr(bytes), data_byte_size), 0)
+        << TVMGetLastError();
+    if (!DMLC_IO_NO_ENDIAN_SWAP) {
+      dmlc::ByteSwap(dmlc::BeginPtr(bytes), type_bytes, num_elems);
+    }
+    strm->Write(dmlc::BeginPtr(bytes), data_byte_size);
+  }
+  return true;
+}
+
+inline void NDArray::Save(dmlc::Stream* strm) const {
+  SaveDLTensor(strm, const_cast<DLTensor*>(operator->()));
+}
+
+inline bool NDArray::Load(dmlc::Stream* strm) {
+  uint64_t header, reserved;
+  CHECK(strm->Read(&header))
+      << "Invalid DLTensor file format";
+  CHECK(strm->Read(&reserved))
+      << "Invalid DLTensor file format";
+  CHECK(header == kTVMNDArrayMagic)
+      << "Invalid DLTensor file format";
+  DLContext ctx;
+  int ndim;
+  DLDataType dtype;
+  CHECK(strm->Read(&ctx))
+      << "Invalid DLTensor file format";
+  CHECK(strm->Read(&ndim))
+      << "Invalid DLTensor file format";
+  CHECK(strm->Read(&dtype))
+      << "Invalid DLTensor file format";
+  CHECK_EQ(ctx.device_type, kDLCPU)
+      << "Invalid DLTensor context: can only save as CPU tensor";
+  std::vector<int64_t> shape(ndim);
+  if (ndim != 0) {
+    CHECK(strm->ReadArray(&shape[0], ndim))
+        << "Invalid DLTensor file format";
+  }
+  NDArray ret = NDArray::Empty(shape, dtype, ctx);
+  int64_t num_elems = 1;
+  int elem_bytes = (ret->dtype.bits + 7) / 8;
+  for (int i = 0; i < ret->ndim; ++i) {
+    num_elems *= ret->shape[i];
+  }
+  int64_t data_byte_size;
+  CHECK(strm->Read(&data_byte_size))
+      << "Invalid DLTensor file format";
+  CHECK(data_byte_size == num_elems * elem_bytes)
+      << "Invalid DLTensor file format";
+  CHECK(strm->Read(ret->data, data_byte_size))
+      << "Invalid DLTensor file format";
+  if (!DMLC_IO_NO_ENDIAN_SWAP) {
+    dmlc::ByteSwap(ret->data, elem_bytes, num_elems);
+  }
+  *this = ret;
+  return true;
+}
+
 }  // namespace runtime
 }  // namespace tvm
-
 #endif  // TVM_RUNTIME_NDARRAY_H_
diff --git a/include/tvm/runtime/serializer.h b/include/tvm/runtime/serializer.h
@@ -10,6 +10,7 @@
 #include <dmlc/io.h>
 #include <dmlc/serializer.h>
 #include "./c_runtime_api.h"
+#include "./ndarray.h"
 
 namespace dmlc {
 namespace serializer {

diff --git a/nnvm/python/nnvm/compiler/param_dict.py b/nnvm/python/nnvm/compiler/param_dict.py
@@ -1,8 +1,6 @@
 # pylint: disable=invalid-name
 """Helper utility to save parameter dict"""
-import ctypes
 import tvm
-from tvm._ffi.runtime_ctypes import TVMArrayHandle
 
 _save_param_dict = tvm.get_global_func("nnvm.compiler._save_param_dict")
 _load_param_dict = tvm.get_global_func("nnvm.compiler._load_param_dict")
@@ -59,11 +57,5 @@ def load_param_dict(param_bytes):
     """
     if isinstance(param_bytes, (bytes, str)):
         param_bytes = bytearray(param_bytes)
-    load_mod = _load_param_dict(param_bytes)
-    size = load_mod(0)
-    param_dict = {}
-    for i in range(size):
-        key = load_mod(1, i)
-        dltensor_handle = ctypes.cast(load_mod(2, i), TVMArrayHandle)
-        param_dict[key] = tvm.nd.NDArray(dltensor_handle, False)
-    return param_dict
+    load_arr = _load_param_dict(param_bytes)
+    return {v.name : v.array for v in load_arr}
diff --git a/nnvm/src/compiler/graph_runtime.cc b/nnvm/src/compiler/graph_runtime.cc
@@ -4,10 +4,6 @@
  * \brief Interface code with TVM graph runtime.
 */
 #include <dmlc/memory_io.h>
-#include <tvm/runtime/packed_func.h>
-#include <tvm/runtime/c_runtime_api.h>
-#include <tvm/runtime/registry.h>
-#include <tvm/runtime/serializer.h>
 #include "./graph_runtime.h"
 
 namespace nnvm {
@@ -37,81 +33,6 @@ NNVM_REGISTER_OP(tvm_op)
     return param.num_outputs;
   });
 
-bool SaveDLTensor(dmlc::Stream* strm, DLTensor* tensor) {
-  uint64_t header = kTVMNDArrayMagic, reserved = 0;
-  strm->Write(header);
-  strm->Write(reserved);
-  strm->Write(tensor->ctx);
-  strm->Write(tensor->ndim);
-  strm->Write(tensor->dtype);
-  int ndim = tensor->ndim;
-  strm->WriteArray(tensor->shape, ndim);
-
-  int type_bytes = tensor->dtype.bits / 8;
-  int64_t num_elems = 1;
-  for (int i = 0; i < ndim; ++i) {
-    num_elems *= tensor->shape[i];
-  }
-  int64_t data_byte_size = type_bytes * num_elems;
-  strm->Write(data_byte_size);
-  // handle endianness of data correctly.
-  if (DMLC_IO_NO_ENDIAN_SWAP) {
-    strm->Write(tensor->data, data_byte_size);
-  } else {
-    uint8_t* dptr = reinterpret_cast<uint8_t*>(tensor->data);
-    std::vector<uint8_t> bytes(dptr, dptr + data_byte_size);
-    dmlc::ByteSwap(dmlc::BeginPtr(bytes), type_bytes, num_elems);
-    strm->Write(dmlc::BeginPtr(bytes), data_byte_size);
-  }
-  return true;
-}
-
-DLTensor* LoadDLTensor(dmlc::Stream* strm) {
-  uint64_t header, reserved;
-  CHECK(strm->Read(&header))
-      << "Invalid DLTensor file format";
-  CHECK(strm->Read(&reserved))
-      << "Invalid DLTensor file format";
-  CHECK(header == kTVMNDArrayMagic)
-      << "Invalid DLTensor file format";
-  DLTensor tensor;
-  CHECK(strm->Read(&(tensor.ctx)))
-      << "Invalid DLTensor file format";
-  CHECK(strm->Read(&(tensor.ndim)))
-      << "Invalid DLTensor file format";
-  CHECK(strm->Read(&(tensor.dtype)))
-      << "Invalid DLTensor file format";
-  std::vector<int64_t> shape(tensor.ndim);
-  if (tensor.ndim != 0) {
-    CHECK(strm->ReadArray(&shape[0], tensor.ndim))
-        << "Invalid DLTensor file format";
-  }
-  DLTensor* ret;
-  CHECK_EQ(TVMArrayAlloc(shape.data(),
-                         tensor.ndim,
-                         tensor.dtype.code,
-                         tensor.dtype.bits,
-                         tensor.dtype.lanes,
-                         static_cast<int>(tensor.ctx.device_type),
-                         tensor.ctx.device_id,
-                         &ret), 0) << TVMGetLastError();
-  int64_t num_elems = 1;
-  int elem_bytes = (ret->dtype.bits + 7) / 8;
-  for (int i = 0; i < ret->ndim; ++i) {
-    num_elems *= ret->shape[i];
-  }
-  int64_t data_byte_size;
-  CHECK(strm->Read(&data_byte_size))
-      << "Invalid DLTensor file format";
-  CHECK(data_byte_size == num_elems * elem_bytes)
-      << "Invalid DLTensor file format";
-  CHECK(strm->Read(ret->data, data_byte_size))
-      << "Invalid DLTensor file format";
-  if (!DMLC_IO_NO_ENDIAN_SWAP) {
-    dmlc::ByteSwap(ret->data, elem_bytes, num_elems);
-  }
-  return ret;
-}
 
 TVM_REGISTER_GLOBAL("nnvm.compiler._save_param_dict")
 .set_body([](TVMArgs args, TVMRetValue *rv) {
@@ -136,7 +57,7 @@ TVM_REGISTER_GLOBAL("nnvm.compiler._save_param_dict")
       uint64_t sz = static_cast<uint64_t>(arrays.size());
       fo->Write(sz);
       for (size_t i = 0; i < sz; ++i) {
-        SaveDLTensor(fo, arrays[i]);
+        tvm::runtime::SaveDLTensor(fo, arrays[i]);
       }
     }
     TVMByteArray arr;
@@ -149,11 +70,9 @@ TVM_REGISTER_GLOBAL("nnvm.compiler._save_param_dict")
 TVM_REGISTER_GLOBAL("nnvm.compiler._load_param_dict")
 .set_body([](TVMArgs args, TVMRetValue *rv) {
     std::string bytes = args[0];
-    std::vector<DLTensor*> data;
     std::vector<std::string> names;
     dmlc::MemoryStringStream memstrm(&bytes);
     dmlc::Stream* strm = &memstrm;
-
     uint64_t header, reserved;
     CHECK(strm->Read(&header))
         << "Invalid parameters file format";
@@ -168,23 +87,19 @@ TVM_REGISTER_GLOBAL("nnvm.compiler._load_param_dict")
     size_t size = static_cast<size_t>(sz);
     CHECK(size == names.size())
         << "Invalid parameters file format";
+    tvm::Array<NDArrayWrapper> ret;
     for (size_t i = 0; i < size; ++i) {
-      data.push_back(LoadDLTensor(strm));
+      tvm::runtime::NDArray temp;
+      temp.Load(strm);
+      std::shared_ptr<NDArrayWrapperNode> n
+          = std::make_shared<NDArrayWrapperNode>();
+      n->name = std::move(names[i]);
+      n->array = temp;
+      ret.push_back(NDArrayWrapper(n));
     }
-    auto packed = [data, names](TVMArgs args, TVMRetValue* rv) {
-      int code = args[0];
-      if (code == 0) {
-        *rv = static_cast<int64_t>(data.size());
-      } else if (code == 1) {
-        int index = args[1];
-        *rv = names[index];
-      } else {
-        CHECK_EQ(code, 2);
-        int index = args[1];
-        *rv = static_cast<void*>(data[index]);
-      }
-    };
-    *rv = PackedFunc(packed);
+    *rv = ret;
   });
+
+TVM_REGISTER_NODE_TYPE(NDArrayWrapperNode);
 }  // namespace compiler
 }  // namespace nnvm