diff --git a/extension/pybindings/pybindings.cpp b/extension/pybindings/pybindings.cpp index 1f6d1075a1..0dba760329 100644 --- a/extension/pybindings/pybindings.cpp +++ b/extension/pybindings/pybindings.cpp @@ -95,6 +95,34 @@ void write_data_to_file(const std::string& path, void* buf, size_t size) { } } +void setup_output_storage( + executor::Method& method, + const std::vector>& output_storages) { + if (output_storages.size() != method.outputs_size()) { + THROW_IF_ERROR( + Error(), + "number of output storages %zu does not match number of outputs %zu", + output_storages.size(), + method.outputs_size()); + } + for (size_t i = 0; i < output_storages.size(); ++i) { + if (output_storages[i].size() == 0) { + // Skip empty output storages, this would happen for non-tensor outputs. + continue; + } + Error output_status = method.set_output_data_ptr( + output_storages[i].data(), output_storages[i].size(), i); + // InvalidState can be the status if outputs are already memory planned. + // That's fine and we don't need to alert the user to that error. + if (output_status != Error::Ok && output_status != Error::InvalidState) { + ET_LOG( + Error, + "Cannot set_output_data_ptr(): 0x%" PRIx32, + static_cast(output_status)); + } + } +} + using util::BufferDataLoader; using util::MallocMemoryAllocator; using util::MmapDataLoader; @@ -209,26 +237,7 @@ class Module final { c10::autograd_dispatch_keyset); #endif if (output_storages) { - if (output_storages->size() != method->outputs_size()) { - THROW_IF_ERROR( - Error(), - "number of output storages %zu does not match number of outputs %zu", - output_storages->size(), - method->outputs_size()); - } - for (size_t i = 0; i < output_storages->size(); ++i) { - Error output_status = method->set_output_data_ptr( - (*output_storages)[i].data(), (*output_storages)[i].size(), i); - // InvalidState can be the status if outputs are already memory planned. - // That's fine and we don't need to alert the user to that error. - if (output_status != Error::Ok && - output_status != Error::InvalidState) { - ET_LOG( - Error, - "Cannot set_output_data_ptr(): 0x%" PRIx32, - static_cast(output_status)); - } - } + setup_output_storage(*method, *output_storages); } Error execute_status = method->execute(); THROW_IF_ERROR( @@ -236,6 +245,11 @@ class Module final { "method->execute() failed with error 0x%" PRIx32, static_cast(execute_status)); // process outputs + return get_outputs(method_name); + } + + std::vector get_outputs(const std::string& method_name) { + auto& method = methods_[method_name]; std::vector result(method->outputs_size()); Error get_outputs_status = @@ -556,62 +570,17 @@ struct PyModule final { const auto& method = module_->get_method(method_name); const auto num_outputs = method.outputs_size(); - // These output storages will not be used if the ExecuTorch program already - // pre-allocated output space. That is represented by an error from - // set_output_data_ptr. - std::vector> output_storages(num_outputs); + output_storages_ = make_output_storages(method); std::vector> output_storage_spans(num_outputs); - for (size_t i = 0; i < num_outputs; ++i) { - const auto& output_tensor_meta = - method.method_meta().output_tensor_meta(i); - if (!output_tensor_meta.ok()) { - // If the output isn't a tensor it won't have a tensor meta. - ET_LOG( - Info, - "Tensor meta doesn't exist for output %zu, error is 0x%" PRIx32 - ", skipping allocating storage", - i, - static_cast(output_tensor_meta.error())); - output_storage_spans[i] = Span(); - continue; - } - const size_t output_size = output_tensor_meta.get().nbytes(); - std::unique_ptr output(new uint8_t[output_size]); - output_storage_spans[i] = Span(output.get(), output_size); - output_storages[i] = std::move(output); + for (int i = 0; i < output_storages_.size(); ++i) { + output_storage_spans[i] = + Span(output_storages_[i].data(), output_storages_[i].size()); } auto outputs = module_->run_method(method_name, cpp_inputs, output_storage_spans); // Retrieve outputs - const auto outputs_size = outputs.size(); - py::list list(outputs_size); - for (size_t i = 0; i < outputs_size; ++i) { - auto& v = outputs[i]; - if (Tag::None == v.tag) { - list[i] = py::none(); - } else if (Tag::Int == v.tag) { - list[i] = py::cast(v.toInt()); - } else if (Tag::Double == v.tag) { - list[i] = py::cast(v.toDouble()); - } else if (Tag::Bool == v.tag) { - list[i] = py::cast(v.toBool()); - } else if (Tag::String == v.tag) { - list[i] = py::cast(std::string(v.toString().data())); - } else if (Tag::Tensor == v.tag) { -#ifdef USE_ATEN_LIB - // Clone so the outputs in python do not share a lifetime with the - // module object - list[i] = py::cast(v.toTensor().clone()); -#else - list[i] = py::cast( - torch::util::alias_attensor_to_etensor(v.toTensor()).clone()); -#endif - } else { - ET_ASSERT_UNREACHABLE_MSG("Invalid model output type"); - } - } - return list; + return get_outputs_as_py_list(outputs); } py::list forward(const py::sequence& inputs) { @@ -670,35 +639,113 @@ struct PyModule final { static_cast(status)); } - void verify_result_with_bundled_expected_output( + py::list verify_result_with_bundled_expected_output( PyBundledModule& m, const string method_name, size_t testset_idx, double rtol = 1e-5, double atol = 1e-8) { const void* bundled_program_ptr = m.get_bundled_program_ptr(); - Error status = bundled_program::VerifyResultWithBundledExpectedOutput( - module_->get_method(method_name), - bundled_program_ptr, - testset_idx, - rtol, - atol); + auto& method = module_->get_method(method_name); + Error status = bundled_program::LoadBundledInput( + method, bundled_program_ptr, testset_idx); + THROW_IF_ERROR( + status, + "LoadBundledInput failed with status %" PRIu32, + static_cast(status)); + py::list outputs = plan_execute(method_name); + status = bundled_program::VerifyResultWithBundledExpectedOutput( + method, bundled_program_ptr, testset_idx, rtol, atol); THROW_IF_ERROR( status, "Result verification failed with status %" PRIu32, static_cast(status)); + return outputs; } - void plan_execute(const string method_name) { - auto status = module_->get_method(method_name).execute(); + py::list plan_execute(const string method_name) { + auto& method = module_->get_method(method_name); + // Need to pre-allocate space for outputs just like in run_method. + const auto num_outputs = method.outputs_size(); + output_storages_ = make_output_storages(method); + std::vector> output_storage_spans(num_outputs); + for (int i = 0; i < output_storages_.size(); ++i) { + output_storage_spans[i] = + Span(output_storages_[i].data(), output_storages_[i].size()); + } + setup_output_storage(method, output_storage_spans); + auto status = method.execute(); THROW_IF_ERROR( status, "executing execution plan for method 'forward' failed with error: 0x%" PRIx32, static_cast(status)); + const auto outputs = module_->get_outputs(method_name); + return get_outputs_as_py_list(outputs); + } + + py::list get_outputs_as_py_list(const std::vector& outputs) { + const auto outputs_size = outputs.size(); + py::list list(outputs_size); + for (size_t i = 0; i < outputs_size; ++i) { + auto& v = outputs[i]; + if (Tag::None == v.tag) { + list[i] = py::none(); + } else if (Tag::Int == v.tag) { + list[i] = py::cast(v.toInt()); + } else if (Tag::Double == v.tag) { + list[i] = py::cast(v.toDouble()); + } else if (Tag::Bool == v.tag) { + list[i] = py::cast(v.toBool()); + } else if (Tag::String == v.tag) { + list[i] = py::cast(std::string(v.toString().data())); + } else if (Tag::Tensor == v.tag) { +#ifdef USE_ATEN_LIB + // Clone so the outputs in python do not share a lifetime with the + // module object + list[i] = py::cast(v.toTensor().clone()); +#else + list[i] = py::cast( + torch::util::alias_attensor_to_etensor(v.toTensor()).clone()); +#endif + } else { + ET_ASSERT_UNREACHABLE_MSG("Invalid model output type"); + } + } + return list; } private: std::unique_ptr module_; + // Need to keep-alive output storages until they can be compared in case of + // bundled programs. + std::vector> output_storages_; + + std::vector> make_output_storages( + const executor::Method& method) { + const auto num_outputs = method.outputs_size(); + // These output storages will not be used if the ExecuTorch program already + // pre-allocated output space. That is represented by an error from + // set_output_data_ptr. + std::vector> output_storages(num_outputs); + for (size_t i = 0; i < num_outputs; ++i) { + const auto& output_tensor_meta = + method.method_meta().output_tensor_meta(i); + if (!output_tensor_meta.ok()) { + // If the output isn't a tensor it won't have a tensor meta. + ET_LOG( + Error, + "Tensor meta doesn't exist for output %zu, error is 0x%" PRIx32 + ", skipping allocating storage", + i, + static_cast(output_tensor_meta.error())); + output_storages[i] = std::vector(); + continue; + } + const size_t output_size = output_tensor_meta.get().nbytes(); + output_storages[i] = std::vector(output_size); + } + return output_storages; + } }; void create_profile_block(const std::string& name) { diff --git a/extension/pybindings/pybindings.pyi b/extension/pybindings/pybindings.pyi index a085911672..e02ae0046f 100644 --- a/extension/pybindings/pybindings.pyi +++ b/extension/pybindings/pybindings.pyi @@ -14,10 +14,13 @@ class ExecuTorchModule: def run_method(self, method_name: str, inputs: Sequence[Any]) -> List[Any]: ... # pyre-ignore[2, 3]: "Any" in parameter and return type annotations. def forward(self, inputs: Sequence[Any]) -> List[Any]: ... + # pyre-ignore[3]: "Any" in return type annotations. + def plan_execute(self) -> List[Any]: ... # Bundled program methods. def load_bundled_input( self, bundle: BundledModule, method_name: str, testset_idx: int ) -> None: ... + # pyre-ignore[3]: "Any" in return type annotations. def verify_result_with_bundled_expected_output( self, bundle: BundledModule, @@ -25,7 +28,7 @@ class ExecuTorchModule: testset_idx: int, rtol: float = 1e-5, atol: float = 1e-8, - ) -> None: ... + ) -> List[Any]: ... def has_etdump(self) -> bool: ... def write_etdump_result_to_file( self, path: str, debug_buffer_path: Optional[str] = None