Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support inference value2disk #119

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions bindings/pymlir/pymlir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,11 @@ class py_module {
return getPyArray(std::move(tensor), shape);
}

py::str get_tempfile() {
auto filename = interpreter_->getTempFile();
return filename;
}

// Tip: not using copy in python, since independent mem
py::array get_fp32_tensor(std::string name) {
auto tensor = interpreter_->getTensor(name, true);
Expand Down Expand Up @@ -246,6 +251,7 @@ PYBIND11_MODULE(pymlir, m) {
.def("set_tensor", &py_module::set_tensor)
.def("set_tensor_from_int", &py_module::set_tensor_from_int)
.def("get_tensor", &py_module::get_tensor, "get one tensor data")
.def("get_tempfile", &py_module::get_tempfile, "get file in value to disk mode")
.def("get_fp32_tensor", &py_module::get_fp32_tensor, "get one fp32 tensor data")
.def("get_all_tensor", &py_module::getAllTensor, "dump all tensor data")
.def("invoke", &py_module::invoke)
Expand Down
5 changes: 4 additions & 1 deletion include/tpu_mlir/Support/ModuleInterpreter.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ class ModuleInterpreter {
float &scale, int &zp);
llvm::ArrayRef<int64_t> getTensorShape(const std::string &name);
bool is_no_mem_op(Operation *op);
std::string getTempFile();
void setTempFile(std::string filename);

private:
void allocate_part_tensor_in_mem();
Expand All @@ -73,7 +75,7 @@ class ModuleInterpreter {
std::vector<std::string>
all_tensor_names; // activation tensor, without weight
std::vector<std::string> all_weight_names; // weight tensor

std::string temp_file_name = "Default";
private:
ModuleOp module;
int64_t num_infer_op;
Expand All @@ -82,6 +84,7 @@ class ModuleInterpreter {
std::map<std::string, Value> value_map;
std::map<std::string, std::shared_ptr<InferenceParameter>> inference_map;
std::map<std::string, std::shared_ptr<std::vector<float>>> mem_map;
std::vector<size_t> store_disk_shape;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need this variable?

};

} // namespace tpu_mlir
Expand Down
56 changes: 44 additions & 12 deletions lib/Support/ModuleInterpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include <functional>
#include <memory>
#include <numeric>
#include <fstream>
#include <llvm/Support/FileSystem.h>

#define DEBUG_TYPE "interpreter"

Expand Down Expand Up @@ -240,6 +242,7 @@ void ModuleInterpreter::allocate_all_tensor_in_disk() {
}
});
module::detachWeightFile(); // to free weight memory
func.walk([&](InferenceInterface infer_op) { num_infer_op++; });
}
}
void ModuleInterpreter::allocate_all_tensor_in_mem() {
Expand Down Expand Up @@ -361,6 +364,20 @@ void ModuleInterpreter::invoke(bool express_type) {
case mem_mode_t::PART_TENSOR_IN_MEM:
invoke_part_in_mem(express_type);
break;
case mem_mode_t::ALL_TENSOR_IN_DISK:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

line 37-47 can not set mem_mode to ALL_TENSOR_IN_DISK, so this branch cannot be executed?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, there is no corresponding case here. I modified the mem_ mode=mem_ mode_t: : ALL_ TENSOR_ IN_ DISK for testing, perhaps a threshold greater than 16GB is needed to activate the case. I don't know what the threshold is, but I hope you can set it. In the above test, it was passed.

llvm::Twine str1 = "inference_value_in_disk_%%%%.npz";
auto tempfile = llvm::sys::fs::TempFile::create(str1, 146);
if (tempfile) {
llvm::sys::fs::TempFile &tmp = tempfile.get();
invoke_to_disk(tmp.TmpName.c_str(), express_type);
setTempFile(tmp.TmpName.c_str());
if (tmp.keep()) {
llvm_unreachable("tmp.keep failed!");
}
} else {
llvm_unreachable("create tempfile failed!");
}
break;
default:
llvm_unreachable("Mem not enough, please use invoke_to_disk");
break;
Expand Down Expand Up @@ -450,17 +467,19 @@ void ModuleInterpreter::value_to_disk(const std::string &filename,
const std::string &name,
std::vector<float> &data,
bool express_type) {
// auto value = value_map.at(name);
// if (express_type && module::isState(module::State::TPU_LOWERED)) {
// if (module::isUniformQuantized(value)) {
// auto qtype = module::getUniformQuantizedType(value);
// for (auto &d : data) {
// d = (d - (float)qtype.getZeroPoint()) * (float)qtype.getScale();
// }
// }
// }
// cnpy::npz_save(filename, name, data, "a");
llvm_unreachable("Not Implemented");
auto value = value_map.at(name);
if (express_type && module::isState(module::State::TPU_LOWERED)) {
if (module::isUniformQuantized(value)) {
auto qtype = module::getUniformQuantizedType(value);
for (auto &d : data) {
d = (d - (float)qtype.getZeroPoint()) * (float)qtype.getScale();
}
}
}
if (store_disk_shape.cbegin() != store_disk_shape.cend())
store_disk_shape.clear();
store_disk_shape.push_back(data.size());
cnpy::npz_save(filename, name, &data[0], store_disk_shape, "a");
}

void ModuleInterpreter::invoke_to_disk(const std::string &filename,
Expand All @@ -487,7 +506,15 @@ void ModuleInterpreter::invoke_to_disk(const std::string &filename,
}
auto iter = mem_uses.find(name);
if (iter == mem_uses.end()) {
continue;
if (auto WeightOp = dyn_cast<top::WeightOp>(in.getDefiningOp())) {
int num_uses = std::distance(in.user_begin(), in.user_end());
if (num_uses = 1) {
to_free.push_back(name);
continue;
} else
mem_uses[name] = num_uses - 1;
} else
continue;
}
iter->second--;
if (iter->second == 0) {
Expand Down Expand Up @@ -861,4 +888,9 @@ ModuleInterpreter::getTensorShape(const std::string &name) {
return it->second.getType().cast<RankedTensorType>().getShape();
}

void ModuleInterpreter::setTempFile(std::string filename) {
temp_file_name = filename;
}

std::string ModuleInterpreter::getTempFile() { return temp_file_name; }
} // namespace tpu_mlir
8 changes: 8 additions & 0 deletions python/tools/model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ def mlir_inference(inputs: dict, mlir_file: str, dump_all: bool = True, debug=No
if dump_all:
return tensors
outputs = dict()
temp_file_name = g_mlir_module.get_tempfile()
for name in g_mlir_module.output_names:
outputs[name] = tensors[name]
# assume output of op has the same name
Expand All @@ -232,6 +233,13 @@ def mlir_inference(inputs: dict, mlir_file: str, dump_all: bool = True, debug=No
pre_op = parser.get_pre_op_by_op_name(name)[0]
if pre_op in tensors:
outputs[pre_op] = tensors[pre_op]
else:
#if file exists,read tensor for compare
if temp_file_name!="Default" and os.path.isfile(temp_file_name):
x = np.load(temp_file_name)
outputs[pre_op]=x[pre_op]
if temp_file_name!="Default" and os.path.isfile(temp_file_name):
os.remove(temp_file_name)
return outputs


Expand Down
35 changes: 21 additions & 14 deletions third_party/cnpy/cnpy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,22 +331,29 @@ void npz_save(std::string zipname, std::string fname,
if(mode == "a") fp = fopen(zipname.c_str(),"r+b");

if(fp) {
//zip file exists. we need to add a new npy file to it.
//first read the footer.
//this gives us the offset and size of the global header
//then read and store the global header.
//below, we will write the the new data at the start of the global
//header then append the global header and footer below it
size_t global_header_size;
parse_zip_footer(fp,nrecs,global_header_size,global_header_offset);
fseek(fp,global_header_offset,SEEK_SET);
global_header.resize(global_header_size);
size_t res = fread(&global_header[0],sizeof(char),global_header_size,fp);
if(res != global_header_size){
throw std::runtime_error("npz_save: "
fseek(fp, 0, SEEK_END);
long size = ftell(fp);
if (size != 0) {
// zip file exists. we need to add a new npy file to it.
// first read the footer.
// this gives us the offset and size of the global header
// then read and store the global header.
// below, we will write the the new data at the start of the global
// header then append the global header and footer below it
size_t global_header_size;
parse_zip_footer(fp, nrecs, global_header_size,
global_header_offset);
fseek(fp, global_header_offset, SEEK_SET);
global_header.resize(global_header_size);
size_t res =
fread(&global_header[0], sizeof(char), global_header_size, fp);
if (res != global_header_size) {
throw std::runtime_error(
"npz_save: "
"header read error while adding to existing zip");
}
fseek(fp, global_header_offset, SEEK_SET);
}
fseek(fp,global_header_offset,SEEK_SET);
}
else {
fp = fopen(zipname.c_str(),"wb");
Expand Down