Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bundled program and plan_execute in pybindings #4595

Merged
merged 1 commit into from
Aug 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
205 changes: 126 additions & 79 deletions extension/pybindings/pybindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,34 @@ void write_data_to_file(const std::string& path, void* buf, size_t size) {
}
}

void setup_output_storage(
executor::Method& method,
const std::vector<Span<uint8_t>>& output_storages) {
if (output_storages.size() != method.outputs_size()) {
THROW_IF_ERROR(
Error(),
"number of output storages %zu does not match number of outputs %zu",
output_storages.size(),
method.outputs_size());
}
for (size_t i = 0; i < output_storages.size(); ++i) {
if (output_storages[i].size() == 0) {
// Skip empty output storages, this would happen for non-tensor outputs.
continue;
}
Error output_status = method.set_output_data_ptr(
output_storages[i].data(), output_storages[i].size(), i);
// InvalidState can be the status if outputs are already memory planned.
// That's fine and we don't need to alert the user to that error.
if (output_status != Error::Ok && output_status != Error::InvalidState) {
ET_LOG(
Error,
"Cannot set_output_data_ptr(): 0x%" PRIx32,
static_cast<uint32_t>(output_status));
}
}
}

using util::BufferDataLoader;
using util::MallocMemoryAllocator;
using util::MmapDataLoader;
Expand Down Expand Up @@ -209,33 +237,19 @@ class Module final {
c10::autograd_dispatch_keyset);
#endif
if (output_storages) {
if (output_storages->size() != method->outputs_size()) {
THROW_IF_ERROR(
Error(),
"number of output storages %zu does not match number of outputs %zu",
output_storages->size(),
method->outputs_size());
}
for (size_t i = 0; i < output_storages->size(); ++i) {
Error output_status = method->set_output_data_ptr(
(*output_storages)[i].data(), (*output_storages)[i].size(), i);
// InvalidState can be the status if outputs are already memory planned.
// That's fine and we don't need to alert the user to that error.
if (output_status != Error::Ok &&
output_status != Error::InvalidState) {
ET_LOG(
Error,
"Cannot set_output_data_ptr(): 0x%" PRIx32,
static_cast<uint32_t>(output_status));
}
}
setup_output_storage(*method, *output_storages);
}
Error execute_status = method->execute();
THROW_IF_ERROR(
execute_status,
"method->execute() failed with error 0x%" PRIx32,
static_cast<uint32_t>(execute_status));
// process outputs
return get_outputs(method_name);
}

std::vector<EValue> get_outputs(const std::string& method_name) {
auto& method = methods_[method_name];
std::vector<EValue> result(method->outputs_size());

Error get_outputs_status =
Expand Down Expand Up @@ -556,62 +570,17 @@ struct PyModule final {

const auto& method = module_->get_method(method_name);
const auto num_outputs = method.outputs_size();
// These output storages will not be used if the ExecuTorch program already
// pre-allocated output space. That is represented by an error from
// set_output_data_ptr.
std::vector<std::unique_ptr<uint8_t[]>> output_storages(num_outputs);
output_storages_ = make_output_storages(method);
std::vector<Span<uint8_t>> output_storage_spans(num_outputs);
for (size_t i = 0; i < num_outputs; ++i) {
const auto& output_tensor_meta =
method.method_meta().output_tensor_meta(i);
if (!output_tensor_meta.ok()) {
// If the output isn't a tensor it won't have a tensor meta.
ET_LOG(
Info,
"Tensor meta doesn't exist for output %zu, error is 0x%" PRIx32
", skipping allocating storage",
i,
static_cast<uint32_t>(output_tensor_meta.error()));
output_storage_spans[i] = Span<uint8_t>();
continue;
}
const size_t output_size = output_tensor_meta.get().nbytes();
std::unique_ptr<uint8_t[]> output(new uint8_t[output_size]);
output_storage_spans[i] = Span<uint8_t>(output.get(), output_size);
output_storages[i] = std::move(output);
for (int i = 0; i < output_storages_.size(); ++i) {
output_storage_spans[i] =
Span<uint8_t>(output_storages_[i].data(), output_storages_[i].size());
}
auto outputs =
module_->run_method(method_name, cpp_inputs, output_storage_spans);

// Retrieve outputs
const auto outputs_size = outputs.size();
py::list list(outputs_size);
for (size_t i = 0; i < outputs_size; ++i) {
auto& v = outputs[i];
if (Tag::None == v.tag) {
list[i] = py::none();
} else if (Tag::Int == v.tag) {
list[i] = py::cast(v.toInt());
} else if (Tag::Double == v.tag) {
list[i] = py::cast(v.toDouble());
} else if (Tag::Bool == v.tag) {
list[i] = py::cast(v.toBool());
} else if (Tag::String == v.tag) {
list[i] = py::cast(std::string(v.toString().data()));
} else if (Tag::Tensor == v.tag) {
#ifdef USE_ATEN_LIB
// Clone so the outputs in python do not share a lifetime with the
// module object
list[i] = py::cast(v.toTensor().clone());
#else
list[i] = py::cast(
torch::util::alias_attensor_to_etensor(v.toTensor()).clone());
#endif
} else {
ET_ASSERT_UNREACHABLE_MSG("Invalid model output type");
}
}
return list;
return get_outputs_as_py_list(outputs);
}

py::list forward(const py::sequence& inputs) {
Expand Down Expand Up @@ -670,35 +639,113 @@ struct PyModule final {
static_cast<uint32_t>(status));
}

void verify_result_with_bundled_expected_output(
py::list verify_result_with_bundled_expected_output(
PyBundledModule& m,
const string method_name,
size_t testset_idx,
double rtol = 1e-5,
double atol = 1e-8) {
const void* bundled_program_ptr = m.get_bundled_program_ptr();
Error status = bundled_program::VerifyResultWithBundledExpectedOutput(
module_->get_method(method_name),
bundled_program_ptr,
testset_idx,
rtol,
atol);
auto& method = module_->get_method(method_name);
Error status = bundled_program::LoadBundledInput(
method, bundled_program_ptr, testset_idx);
THROW_IF_ERROR(
status,
"LoadBundledInput failed with status %" PRIu32,
static_cast<uint32_t>(status));
py::list outputs = plan_execute(method_name);
status = bundled_program::VerifyResultWithBundledExpectedOutput(
method, bundled_program_ptr, testset_idx, rtol, atol);
THROW_IF_ERROR(
status,
"Result verification failed with status %" PRIu32,
static_cast<uint32_t>(status));
return outputs;
}

void plan_execute(const string method_name) {
auto status = module_->get_method(method_name).execute();
py::list plan_execute(const string method_name) {
auto& method = module_->get_method(method_name);
// Need to pre-allocate space for outputs just like in run_method.
const auto num_outputs = method.outputs_size();
output_storages_ = make_output_storages(method);
std::vector<Span<uint8_t>> output_storage_spans(num_outputs);
for (int i = 0; i < output_storages_.size(); ++i) {
output_storage_spans[i] =
Span<uint8_t>(output_storages_[i].data(), output_storages_[i].size());
}
setup_output_storage(method, output_storage_spans);
auto status = method.execute();
THROW_IF_ERROR(
status,
"executing execution plan for method 'forward' failed with error: 0x%" PRIx32,
static_cast<uint32_t>(status));
const auto outputs = module_->get_outputs(method_name);
return get_outputs_as_py_list(outputs);
}

py::list get_outputs_as_py_list(const std::vector<EValue>& outputs) {
const auto outputs_size = outputs.size();
py::list list(outputs_size);
for (size_t i = 0; i < outputs_size; ++i) {
auto& v = outputs[i];
if (Tag::None == v.tag) {
list[i] = py::none();
} else if (Tag::Int == v.tag) {
list[i] = py::cast(v.toInt());
} else if (Tag::Double == v.tag) {
list[i] = py::cast(v.toDouble());
} else if (Tag::Bool == v.tag) {
list[i] = py::cast(v.toBool());
} else if (Tag::String == v.tag) {
list[i] = py::cast(std::string(v.toString().data()));
} else if (Tag::Tensor == v.tag) {
#ifdef USE_ATEN_LIB
// Clone so the outputs in python do not share a lifetime with the
// module object
list[i] = py::cast(v.toTensor().clone());
#else
list[i] = py::cast(
torch::util::alias_attensor_to_etensor(v.toTensor()).clone());
#endif
} else {
ET_ASSERT_UNREACHABLE_MSG("Invalid model output type");
}
}
return list;
}

private:
std::unique_ptr<Module> module_;
// Need to keep-alive output storages until they can be compared in case of
// bundled programs.
std::vector<std::vector<uint8_t>> output_storages_;

std::vector<std::vector<uint8_t>> make_output_storages(
const executor::Method& method) {
const auto num_outputs = method.outputs_size();
// These output storages will not be used if the ExecuTorch program already
// pre-allocated output space. That is represented by an error from
// set_output_data_ptr.
std::vector<std::vector<uint8_t>> output_storages(num_outputs);
for (size_t i = 0; i < num_outputs; ++i) {
const auto& output_tensor_meta =
method.method_meta().output_tensor_meta(i);
if (!output_tensor_meta.ok()) {
// If the output isn't a tensor it won't have a tensor meta.
ET_LOG(
Error,
"Tensor meta doesn't exist for output %zu, error is 0x%" PRIx32
", skipping allocating storage",
i,
static_cast<uint32_t>(output_tensor_meta.error()));
output_storages[i] = std::vector<uint8_t>();
continue;
}
const size_t output_size = output_tensor_meta.get().nbytes();
output_storages[i] = std::vector<uint8_t>(output_size);
}
return output_storages;
}
};

void create_profile_block(const std::string& name) {
Expand Down
5 changes: 4 additions & 1 deletion extension/pybindings/pybindings.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,21 @@ class ExecuTorchModule:
def run_method(self, method_name: str, inputs: Sequence[Any]) -> List[Any]: ...
# pyre-ignore[2, 3]: "Any" in parameter and return type annotations.
def forward(self, inputs: Sequence[Any]) -> List[Any]: ...
# pyre-ignore[3]: "Any" in return type annotations.
def plan_execute(self) -> List[Any]: ...
# Bundled program methods.
def load_bundled_input(
self, bundle: BundledModule, method_name: str, testset_idx: int
) -> None: ...
# pyre-ignore[3]: "Any" in return type annotations.
def verify_result_with_bundled_expected_output(
self,
bundle: BundledModule,
method_name: str,
testset_idx: int,
rtol: float = 1e-5,
atol: float = 1e-8,
) -> None: ...
) -> List[Any]: ...
def has_etdump(self) -> bool: ...
def write_etdump_result_to_file(
self, path: str, debug_buffer_path: Optional[str] = None
Expand Down
Loading