sophgo · Learnmore666 · Jun 8, 2023 · Jun 19, 2023 · Jun 25, 2023 · Jun 25, 2023
diff --git a/include/tpu_mlir/Support/ModuleInterpreter.h b/include/tpu_mlir/Support/ModuleInterpreter.h
@@ -82,6 +82,7 @@ class ModuleInterpreter {
  std::map<std::string, Value> value_map;
  std::map<std::string, std::shared_ptr<InferenceParameter>> inference_map;
  std::map<std::string, std::shared_ptr<std::vector<float>>> mem_map;
+ std::vector<size_t> store_disk_shape;
 };
 
 } // namespace tpu_mlir

diff --git a/lib/Support/ModuleInterpreter.cpp b/lib/Support/ModuleInterpreter.cpp
@@ -19,6 +19,7 @@
 #include <functional>
 #include <memory>
 #include <numeric>
+#include <fstream>
 
 #define DEBUG_TYPE "interpreter"
 
@@ -240,6 +241,9 @@ void ModuleInterpreter::allocate_all_tensor_in_disk() {
  }
  });
  module::detachWeightFile(); // to free weight memory
+ func.walk([&](InferenceInterface infer_op) {
+ num_infer_op++;
+ });
  }
 }
 void ModuleInterpreter::allocate_all_tensor_in_mem() {
@@ -361,6 +365,13 @@ void ModuleInterpreter::invoke(bool express_type) {
  case mem_mode_t::PART_TENSOR_IN_MEM:
  invoke_part_in_mem(express_type);
  break;
+ case mem_mode_t::ALL_TENSOR_IN_DISK:
+ if(FILE *file = fopen("./value2disk.npz", "rb")){
+ fclose(file);
+ std::remove("./value2disk.npz");
+ }
+ invoke_to_disk("./value2disk.npz",express_type);
+ break;
  default:
  llvm_unreachable("Mem not enough, please use invoke_to_disk");
  break;
@@ -450,17 +461,19 @@ void ModuleInterpreter::value_to_disk(const std::string &filename,
  const std::string &name,
  std::vector<float> &data,
  bool express_type) {
- // auto value = value_map.at(name);
- // if (express_type && module::isState(module::State::TPU_LOWERED)) {
- // if (module::isUniformQuantized(value)) {
- // auto qtype = module::getUniformQuantizedType(value);
- // for (auto &d : data) {
- // d = (d - (float)qtype.getZeroPoint()) * (float)qtype.getScale();
- // }
- // }
- // }
- // cnpy::npz_save(filename, name, data, "a");
- llvm_unreachable("Not Implemented");
+ auto value = value_map.at(name);
+ if (express_type && module::isState(module::State::TPU_LOWERED)) {
+ if (module::isUniformQuantized(value)) {
+ auto qtype = module::getUniformQuantizedType(value);
+ for (auto &d : data) {
+ d = (d - (float)qtype.getZeroPoint()) * (float)qtype.getScale();
+ }
+ }
+ }
+ if(store_disk_shape.cbegin()!=store_disk_shape.cend())
+ store_disk_shape.clear();
+ store_disk_shape.push_back(data.size());
+ cnpy::npz_save(filename,name,&data[0],store_disk_shape,"a");
 }
 
 void ModuleInterpreter::invoke_to_disk(const std::string &filename,
@@ -487,7 +500,16 @@ void ModuleInterpreter::invoke_to_disk(const std::string &filename,
  }
  auto iter = mem_uses.find(name);
  if (iter == mem_uses.end()) {
- continue;
+ if (auto WeightOp = dyn_cast<top::WeightOp>(in.getDefiningOp())) {
+ int num_uses = std::distance(in.user_begin(),in.user_end());
+ if (num_uses=1){
+ to_free.push_back(name);
+ continue;
+ }
+ else mem_uses[name] = num_uses-1;
+ }
+ else
+ continue;
  }
  iter->second--;
  if (iter->second == 0) {

diff --git a/python/tools/model_runner.py b/python/tools/model_runner.py
@@ -232,6 +232,11 @@ def mlir_inference(inputs: dict, mlir_file: str, dump_all: bool = True, debug=No
  pre_op = parser.get_pre_op_by_op_name(name)[0]
  if pre_op in tensors:
  outputs[pre_op] = tensors[pre_op]
+ else:
+ #if file exists,read tensor for compare
+ if os.path.isfile("./value2disk.npz"):
+ x = np.load("./value2disk.npz")
+ outputs[pre_op]=x[pre_op]
  return outputs