sophgo · Learnmore666 · Jun 8, 2023 · Jun 19, 2023 · Jun 25, 2023 · Jun 25, 2023
diff --git a/bindings/pymlir/pymlir.cpp b/bindings/pymlir/pymlir.cpp
@@ -140,6 +140,11 @@ class py_module {
  return getPyArray(std::move(tensor), shape);
  }
 
+ py::str get_tempfile() {
+ auto filename = interpreter_->getTempFile();
+ return filename;
+ }
+
  // Tip: not using copy in python, since independent mem
  py::array get_fp32_tensor(std::string name) {
  auto tensor = interpreter_->getTensor(name, true);
@@ -246,6 +251,7 @@ PYBIND11_MODULE(pymlir, m) {
  .def("set_tensor", &py_module::set_tensor)
  .def("set_tensor_from_int", &py_module::set_tensor_from_int)
  .def("get_tensor", &py_module::get_tensor, "get one tensor data")
+ .def("get_tempfile", &py_module::get_tempfile, "get file in value to disk mode")
  .def("get_fp32_tensor", &py_module::get_fp32_tensor, "get one fp32 tensor data")
  .def("get_all_tensor", &py_module::getAllTensor, "dump all tensor data")
  .def("invoke", &py_module::invoke)

diff --git a/include/tpu_mlir/Support/ModuleInterpreter.h b/include/tpu_mlir/Support/ModuleInterpreter.h
@@ -55,6 +55,8 @@ class ModuleInterpreter {
  float &scale, int &zp);
  llvm::ArrayRef<int64_t> getTensorShape(const std::string &name);
  bool is_no_mem_op(Operation *op);
+ std::string getTempFile();
+ void setTempFile(std::string filename);
 
 private:
  void allocate_part_tensor_in_mem();
@@ -73,7 +75,7 @@ class ModuleInterpreter {
  std::vector<std::string>
  all_tensor_names; // activation tensor, without weight
  std::vector<std::string> all_weight_names; // weight tensor
-
+ std::string temp_file_name = "Default";
 private:
  ModuleOp module;
  int64_t num_infer_op;
@@ -82,6 +84,7 @@ class ModuleInterpreter {
  std::map<std::string, Value> value_map;
  std::map<std::string, std::shared_ptr<InferenceParameter>> inference_map;
  std::map<std::string, std::shared_ptr<std::vector<float>>> mem_map;
+ std::vector<size_t> store_disk_shape;
 };
 
 } // namespace tpu_mlir

diff --git a/lib/Support/ModuleInterpreter.cpp b/lib/Support/ModuleInterpreter.cpp
@@ -19,6 +19,8 @@
 #include <functional>
 #include <memory>
 #include <numeric>
+#include <fstream>
+#include <llvm/Support/FileSystem.h>
 
 #define DEBUG_TYPE "interpreter"
 
@@ -240,6 +242,7 @@ void ModuleInterpreter::allocate_all_tensor_in_disk() {
  }
  });
  module::detachWeightFile(); // to free weight memory
+ func.walk([&](InferenceInterface infer_op) { num_infer_op++; });
  }
 }
 void ModuleInterpreter::allocate_all_tensor_in_mem() {
@@ -361,6 +364,20 @@ void ModuleInterpreter::invoke(bool express_type) {
  case mem_mode_t::PART_TENSOR_IN_MEM:
  invoke_part_in_mem(express_type);
  break;
+ case mem_mode_t::ALL_TENSOR_IN_DISK:
+ llvm::Twine str1 = "inference_value_in_disk_%%%%.npz";
+ auto tempfile = llvm::sys::fs::TempFile::create(str1, 146);
+ if (tempfile) {
+ llvm::sys::fs::TempFile &tmp = tempfile.get();
+ invoke_to_disk(tmp.TmpName.c_str(), express_type);
+ setTempFile(tmp.TmpName.c_str());
+ if (tmp.keep()) {
+ llvm_unreachable("tmp.keep failed!");
+ }
+ } else {
+ llvm_unreachable("create tempfile failed!");
+ }
+ break;
  default:
  llvm_unreachable("Mem not enough, please use invoke_to_disk");
  break;
@@ -450,17 +467,19 @@ void ModuleInterpreter::value_to_disk(const std::string &filename,
  const std::string &name,
  std::vector<float> &data,
  bool express_type) {
- // auto value = value_map.at(name);
- // if (express_type && module::isState(module::State::TPU_LOWERED)) {
- // if (module::isUniformQuantized(value)) {
- // auto qtype = module::getUniformQuantizedType(value);
- // for (auto &d : data) {
- // d = (d - (float)qtype.getZeroPoint()) * (float)qtype.getScale();
- // }
- // }
- // }
- // cnpy::npz_save(filename, name, data, "a");
- llvm_unreachable("Not Implemented");
+ auto value = value_map.at(name);
+ if (express_type && module::isState(module::State::TPU_LOWERED)) {
+ if (module::isUniformQuantized(value)) {
+ auto qtype = module::getUniformQuantizedType(value);
+ for (auto &d : data) {
+ d = (d - (float)qtype.getZeroPoint()) * (float)qtype.getScale();
+ }
+ }
+ }
+ if (store_disk_shape.cbegin() != store_disk_shape.cend())
+ store_disk_shape.clear();
+ store_disk_shape.push_back(data.size());
+ cnpy::npz_save(filename, name, &data[0], store_disk_shape, "a");
 }
 
 void ModuleInterpreter::invoke_to_disk(const std::string &filename,
@@ -487,7 +506,15 @@ void ModuleInterpreter::invoke_to_disk(const std::string &filename,
  }
  auto iter = mem_uses.find(name);
  if (iter == mem_uses.end()) {
- continue;
+ if (auto WeightOp = dyn_cast<top::WeightOp>(in.getDefiningOp())) {
+ int num_uses = std::distance(in.user_begin(), in.user_end());
+ if (num_uses = 1) {
+ to_free.push_back(name);
+ continue;
+ } else
+ mem_uses[name] = num_uses - 1;
+ } else
+ continue;
  }
  iter->second--;
  if (iter->second == 0) {
@@ -861,4 +888,9 @@ ModuleInterpreter::getTensorShape(const std::string &name) {
  return it->second.getType().cast<RankedTensorType>().getShape();
 }
 
+void ModuleInterpreter::setTempFile(std::string filename) {
+ temp_file_name = filename;
+}
+
+std::string ModuleInterpreter::getTempFile() { return temp_file_name; }
 } // namespace tpu_mlir
diff --git a/python/tools/model_runner.py b/python/tools/model_runner.py
@@ -224,6 +224,7 @@ def mlir_inference(inputs: dict, mlir_file: str, dump_all: bool = True, debug=No
  if dump_all:
  return tensors
  outputs = dict()
+ temp_file_name = g_mlir_module.get_tempfile()
  for name in g_mlir_module.output_names:
  outputs[name] = tensors[name]
  # assume output of op has the same name
@@ -232,6 +233,13 @@ def mlir_inference(inputs: dict, mlir_file: str, dump_all: bool = True, debug=No
  pre_op = parser.get_pre_op_by_op_name(name)[0]
  if pre_op in tensors:
  outputs[pre_op] = tensors[pre_op]
+ else:
+ #if file exists,read tensor for compare
+ if temp_file_name!="Default" and os.path.isfile(temp_file_name):
+ x = np.load(temp_file_name)
+ outputs[pre_op]=x[pre_op]
+ if temp_file_name!="Default" and os.path.isfile(temp_file_name):
+ os.remove(temp_file_name)
  return outputs
 
 

diff --git a/third_party/cnpy/cnpy.cpp b/third_party/cnpy/cnpy.cpp
@@ -331,22 +331,29 @@ void npz_save(std::string zipname, std::string fname,
  if(mode == "a") fp = fopen(zipname.c_str(),"r+b");
 
  if(fp) {
- //zip file exists. we need to add a new npy file to it.
- //first read the footer.
- //this gives us the offset and size of the global header
- //then read and store the global header.
- //below, we will write the the new data at the start of the global
- //header then append the global header and footer below it
- size_t global_header_size;
- parse_zip_footer(fp,nrecs,global_header_size,global_header_offset);
- fseek(fp,global_header_offset,SEEK_SET);
- global_header.resize(global_header_size);
- size_t res = fread(&global_header[0],sizeof(char),global_header_size,fp);
- if(res != global_header_size){
- throw std::runtime_error("npz_save: "
+ fseek(fp, 0, SEEK_END);
+ long size = ftell(fp);
+ if (size != 0) {
+ // zip file exists. we need to add a new npy file to it.
+ // first read the footer.
+ // this gives us the offset and size of the global header
+ // then read and store the global header.
+ // below, we will write the the new data at the start of the global
+ // header then append the global header and footer below it
+ size_t global_header_size;
+ parse_zip_footer(fp, nrecs, global_header_size,
+ global_header_offset);
+ fseek(fp, global_header_offset, SEEK_SET);
+ global_header.resize(global_header_size);
+ size_t res =
+ fread(&global_header[0], sizeof(char), global_header_size, fp);
+ if (res != global_header_size) {
+ throw std::runtime_error(
+ "npz_save: "
  "header read error while adding to existing zip");
+ }
+ fseek(fp, global_header_offset, SEEK_SET);
  }
- fseek(fp,global_header_offset,SEEK_SET);
  }
  else {
  fp = fopen(zipname.c_str(),"wb");