-
Notifications
You must be signed in to change notification settings - Fork 3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Enabling L2+ Optimizations for EPs #23517
base: main
Are you sure you want to change the base?
Changes from all commits
1d5ca89
e9119d5
b7a0b79
3b28ffc
309341e
d0cbc65
b239db0
a83dd11
372342c
39fa897
627a00a
06ca086
a965ffb
4c2697c
2b81789
0c10cd4
3360dfd
5f7da9f
e610bc8
e95f2c3
bad19b9
d4968cb
df5aca9
60d9599
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,8 +2,10 @@ | |
// Licensed under the MIT License. | ||
|
||
#pragma once | ||
#include <functional> | ||
#include "core/common/common.h" | ||
#include "core/graph/indexed_sub_graph.h" | ||
#include "core/graph/graph.h" | ||
|
||
namespace onnxruntime { | ||
// A structure encodes a subgraph and the method to run it. | ||
|
@@ -21,5 +23,22 @@ | |
|
||
ComputeCapability(std::unique_ptr<IndexedSubGraph> t_sub_graph) | ||
: sub_graph(std::move(t_sub_graph)) {} | ||
|
||
// Optional function to optimize this ComputeCapability. | ||
// This will be called by ORT once the ComputeCapability is assigned to the EP | ||
// Optimization: std::function<Status(const Graph& graph, const ComputeCapability& this_optimization, ComputeCapability& cc_to_update)> | ||
std::function<Status(Graph&, const ComputeCapability&, ComputeCapability&)> optimization_func; | ||
|
||
// Optional key/value strings to configure an optimizer | ||
std::unordered_map<std::string, std::string> optimization_configs; | ||
Check warning on line 33 in onnxruntime/core/framework/compute_capability.h
|
||
Comment on lines
+32
to
+33
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These config values may be needed during selection, so this may not be the best place for them. If we provide them to the selection function it's free to pass through to the optimization function via a lambda with capture if needed. |
||
|
||
// optional ComputeCapability instances for sets of nodes within this ComputeCapability that should be optimized. | ||
// when an optimization is applied, ORT will update this ComputeCapability to reflect the changes made. | ||
// IndexedSubGraph.nodes: | ||
// - update based on RemovedNode/AddNode calls | ||
// IndexedSubGraph.MetaDef (if present): | ||
// - inputs and outputs will be unchanged | ||
// - constant_initializers MAY change if we constant fold an initializer during optimization | ||
std::vector<std::unique_ptr<ComputeCapability>> nodes_to_optimize; | ||
Check warning on line 42 in onnxruntime/core/framework/compute_capability.h
|
||
}; | ||
} // namespace onnxruntime |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -280,11 +280,14 @@ static Node* PlaceNode(Graph& graph, const IndexedSubGraph& capability, | |
IExecutionProvider::FusionStyle fusion_style, | ||
const std::string& provider_type, | ||
GraphPartitioner::Mode mode, | ||
int& fused_node_unique_id) { | ||
int& fused_node_unique_id, | ||
bool* subgraph_assigned_to_ep) { | ||
Node* result = nullptr; | ||
*subgraph_assigned_to_ep = false; | ||
|
||
if (nullptr == capability.GetMetaDef()) { | ||
TryAssignSingleNode(graph, capability, provider_type); | ||
*subgraph_assigned_to_ep = true; | ||
} else { | ||
// The <provider> can run a fused <sub_graph> in the <graph>. | ||
|
||
|
@@ -347,6 +350,7 @@ static Node* PlaceNode(Graph& graph, const IndexedSubGraph& capability, | |
} | ||
} | ||
} | ||
*subgraph_assigned_to_ep = true; | ||
} | ||
} | ||
|
||
|
@@ -426,7 +430,20 @@ static Status PartitionOnnxFormatModelImpl(Graph& graph, FuncManager& func_mgr, | |
entry->sub_graph->GetMetaDef() != nullptr; | ||
})); | ||
for (auto& capability : capabilities) { | ||
Node* n = PlaceNode(graph, *capability->sub_graph, fusion_style, type, mode, fused_node_unique_id); | ||
bool subgraph_assigned_to_ep = false; | ||
Node* n = PlaceNode(graph, *capability->sub_graph, fusion_style, type, mode, fused_node_unique_id, &subgraph_assigned_to_ep); | ||
|
||
// If the subgraph is assigned to the EP and the ComputeCapability has nodes_to_optimize, | ||
// run EP related optimizations and update ComputeCapability. | ||
if (subgraph_assigned_to_ep && !capability->nodes_to_optimize.empty()) { | ||
for (auto& optimization_cc : capability->nodes_to_optimize) { | ||
if (optimization_cc->optimization_func) { | ||
optimization_cc->optimization_func(graph, *optimization_cc, *capability); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's add |
||
// #TODO: Handle nested optimization ComputeCapability | ||
} | ||
} | ||
} | ||
|
||
if (n != nullptr) { | ||
// searching in kernel registries, if no kernel registered for the fused_node, use compile approach | ||
if (!KernelRegistryManager::HasImplementationOf(kernel_registry_mgr, *n, type, logger)) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,6 +28,19 @@ ConstantFolding::ConstantFolding(const IExecutionProvider& execution_provider, | |
execution_provider_(execution_provider) { | ||
} | ||
|
||
ConstantFolding::ConstantFolding(const std::string& name, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Original ctor can call this ctor (as a delegating constructor) instead of duplicating the implementation. |
||
const IExecutionProvider& execution_provider, | ||
bool skip_dequantize_linear, | ||
const ConfigOptions& config_options, | ||
const InlinedHashSet<std::string_view>& compatible_execution_providers, | ||
const InlinedHashSet<std::string>& excluded_initializers) noexcept | ||
: GraphTransformer(name, compatible_execution_providers), | ||
skip_dequantize_linear_(skip_dequantize_linear), | ||
config_options_(config_options), | ||
excluded_initializers_(excluded_initializers), | ||
execution_provider_(execution_provider) { | ||
} | ||
|
||
// We need to handle a Shape node separately as the input doesn't need to be a constant initializer for | ||
// Shape to be able to be constant folded. | ||
static bool ConstantFoldShapeNode(Graph& graph, Node& node) { | ||
|
@@ -144,7 +157,7 @@ Status ConstantFolding::ApplyImpl(Graph& graph, bool& modified, int graph_level, | |
|
||
for (NodeIndex i : order) { | ||
auto* node = graph.GetNode(i); | ||
if (!node) { | ||
if (!node || !AllowConstantFolding(*node)) { | ||
continue; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
|
||
// Licensed under the MIT License. | ||
|
||
#include "core/optimizer/graph_optimizer_registry.h" | ||
#include "core/optimizer/graph_transformer_utils.h" | ||
#include "core/optimizer/selection_and_optimization_func.h" | ||
#include "core/optimizer/qdq_transformer/constant_folding_dq_node.h" | ||
|
||
using namespace onnxruntime; | ||
Check warning on line 9 in onnxruntime/core/optimizer/graph_optimizer_registry.cc
|
||
using namespace ::onnxruntime::common; | ||
Check warning on line 10 in onnxruntime/core/optimizer/graph_optimizer_registry.cc
|
||
|
||
namespace onnxruntime { | ||
|
||
GraphOptimizerRegistry::GraphOptimizerRegistry() { | ||
logger_ = &logging::LoggingManager::DefaultLogger(); | ||
} | ||
|
||
common::Status GraphOptimizerRegistry::AddPredefinedOptimizerNames(std::vector<std::string>& optimizer_names) { | ||
for (auto name : optimizer_names) { | ||
if (name_to_transformer_map_.find(name) != name_to_transformer_map_.end()) { | ||
LOGS(*logger_, WARNING) << "This transformer name is already added " << name; | ||
return Status::OK(); | ||
} | ||
name_to_transformer_map_[name] = nullptr; // The transformer will be instantizted only when EP requests it | ||
|
||
if (name == kCONSTANT_FOLDING_DQ) { | ||
transformer_name_to_selection_func_[name] = ConstantFoldingDQ_selection; | ||
} | ||
} | ||
return Status::OK(); | ||
} | ||
|
||
common::Status GraphOptimizerRegistry::CreateOptimizer(std::string& name, std::unordered_map<std::string, std::string>& key_value_configs) { | ||
Check warning on line 33 in onnxruntime/core/optimizer/graph_optimizer_registry.cc
|
||
if (name == kCONSTANT_FOLDING_DQ) { | ||
const InlinedHashSet<NodeIndex> node_index_set = {}; | ||
auto transformer = std::make_unique<ConstantFoldingDQ>(*cpu_ep_, false /*skip_dequantize_linear*/, | ||
session_options_->config_options, node_index_set); | ||
Get()->Register(std::move(transformer)); | ||
return Status::OK(); | ||
} | ||
|
||
LOGS(*logger_, WARNING) << "Can't create optimizer for " << name << ". It's not in the predefined optimizer list."; | ||
return Status::OK(); | ||
} | ||
|
||
common::Status GraphOptimizerRegistry::Register(std::unique_ptr<GraphTransformer> transformer) { | ||
const auto& name = transformer->Name(); | ||
if (name_to_transformer_map_.find(name) != name_to_transformer_map_.end() && | ||
name_to_transformer_map_.at(name)) { | ||
LOGS(*logger_, WARNING) << "This optimizer is already created and registered " << name; | ||
return Status::OK(); | ||
} | ||
|
||
name_to_transformer_map_[name] = transformer.get(); | ||
transformer_list_.push_back(std::move(transformer)); | ||
|
||
return Status::OK(); | ||
} | ||
|
||
std::optional<std::function<std::vector<std::unique_ptr<ComputeCapability>>(const GraphViewer&)>> GraphOptimizerRegistry::GetSelectionFunc(std::string& name) const { | ||
auto lookup = transformer_name_to_selection_func_.find(name); | ||
if (lookup != transformer_name_to_selection_func_.end()) { | ||
return transformer_name_to_selection_func_.at(name); | ||
} | ||
LOGS(*logger_, WARNING) << "Can't find selection function of " << name; | ||
return std::nullopt; | ||
} | ||
|
||
GraphTransformer* GraphOptimizerRegistry::GetTransformerByName(std::string& name) const { | ||
if (name_to_transformer_map_.find(name) != name_to_transformer_map_.end()) { | ||
return name_to_transformer_map_.at(name); | ||
} | ||
return nullptr; | ||
} | ||
|
||
// Create and register all the predefined transformers for EP | ||
common::Status GraphOptimizerRegistry::AddPredefinedOptimizers( | ||
const onnxruntime::SessionOptions& sess_options, | ||
const onnxruntime::IExecutionProvider& cpu_ep, | ||
const logging::Logger& logger) { | ||
// TODO: Apply optimization level here if we later decide to do so | ||
Check warning on line 81 in onnxruntime/core/optimizer/graph_optimizer_registry.cc
|
||
auto transformers_to_register = [&]() { | ||
return optimizer_utils::GenerateTransformersForEP(sess_options, cpu_ep, logger); | ||
}(); | ||
|
||
for (auto& entry : transformers_to_register) { | ||
ORT_RETURN_IF_ERROR(Get()->Register(std::move(entry))); | ||
Check warning on line 87 in onnxruntime/core/optimizer/graph_optimizer_registry.cc
|
||
} | ||
return Status::OK(); | ||
} | ||
|
||
common::Status GraphOptimizerRegistry::ApplyTransformer(Graph& graph, std::string& name, | ||
Check warning on line 92 in onnxruntime/core/optimizer/graph_optimizer_registry.cc
|
||
const logging::Logger& logger) const { | ||
auto transformer = GetTransformerByName(name); | ||
if (!transformer) { | ||
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "This transformer is not registered " + name); | ||
} | ||
|
||
bool modified = false; | ||
ORT_RETURN_IF_ERROR(transformer->Apply(graph, modified, logger)); | ||
|
||
return Status::OK(); | ||
} | ||
|
||
common::Status GraphOptimizerRegistry::AddCpuEpReference(onnxruntime::IExecutionProvider* cpu_ep) { | ||
cpu_ep_ = cpu_ep; | ||
return Status::OK(); | ||
} | ||
|
||
common::Status GraphOptimizerRegistry::AddSessionOptionsReference(onnxruntime::SessionOptions* session_options) { | ||
session_options_ = session_options; | ||
return Status::OK(); | ||
} | ||
|
||
// Initialize static members | ||
std::shared_ptr<GraphOptimizerRegistry> onnxruntime::GraphOptimizerRegistry::graph_optimizer_registry = nullptr; | ||
Check warning on line 116 in onnxruntime/core/optimizer/graph_optimizer_registry.cc
|
||
std::mutex GraphOptimizerRegistry::registry_mutex; | ||
} // namespace onnxruntime |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,105 @@ | ||||||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||||||
|
||||||
// Licensed under the MIT License. | ||||||
|
||||||
#pragma once | ||||||
|
||||||
#include "core/common/inlined_containers.h" | ||||||
#include "core/common/logging/logging.h" | ||||||
#include "core/optimizer/graph_transformer.h" | ||||||
#include "core/framework/execution_providers.h" | ||||||
#include "core/framework/compute_capability.h" | ||||||
|
||||||
namespace onnxruntime { | ||||||
/** | ||||||
* A registration/lookup class for re-usable optimizers for EPs. | ||||||
*/ | ||||||
class GraphOptimizerRegistry { | ||||||
public: | ||||||
explicit GraphOptimizerRegistry(); | ||||||
GraphOptimizerRegistry(const GraphOptimizerRegistry&) = delete; | ||||||
|
||||||
/** | ||||||
* Get GraphOptimizerRegistry instance as a singleton. | ||||||
*/ | ||||||
static std::shared_ptr<GraphOptimizerRegistry> Get() { | ||||||
if (!graph_optimizer_registry) { // First Check (without locking) | ||||||
std::lock_guard<std::mutex> lock(registry_mutex); | ||||||
if (!graph_optimizer_registry) { // Second Check (with locking) | ||||||
graph_optimizer_registry = std::make_shared<GraphOptimizerRegistry>(); | ||||||
} | ||||||
} | ||||||
return graph_optimizer_registry; | ||||||
} | ||||||
|
||||||
/** | ||||||
* Register all the predefined optimizer names, only name not the optimizer instance. | ||||||
* | ||||||
* The optimizer will later be instantizted only when EP requests it by calling GetOptimizerByName in provider bridge. | ||||||
*/ | ||||||
common::Status GraphOptimizerRegistry::AddPredefinedOptimizerNames(std::vector<std::string>& optimizer_names); | ||||||
|
||||||
/** | ||||||
* Create and register all predefined optimizers. | ||||||
*/ | ||||||
common::Status AddPredefinedOptimizers(const onnxruntime::SessionOptions& sess_options, | ||||||
const onnxruntime::IExecutionProvider& cpu_ep, | ||||||
const logging::Logger& logger); | ||||||
Comment on lines
+34
to
+46
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be simpler to have a That also means functions to get the CPU EP and session options can return a reference so the caller doesn't need to check for nullptr returns. InferenceSession can call the Create which also allows the session logger to be used (always preferred over the default logger). |
||||||
|
||||||
/** | ||||||
* Create and register optimizer. | ||||||
*/ | ||||||
common::Status GraphOptimizerRegistry::CreateOptimizer(std::string& name, std::unordered_map<std::string, std::string>& key_value_configs); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
/** | ||||||
* Get optimizer by name. | ||||||
*/ | ||||||
GraphTransformer* GraphOptimizerRegistry::GetTransformerByName(std::string& name) const; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this need to directly provide a GraphTransformer instance? The EP can get a selection function by name. The internals of the selection function define the optimization function and populate the ComputeCapability with that. The implementation of the optimization function can create the transformer instance. That allows all the ORT implementation to be opaque to the EP, and we should only need to query the registry for the selection function. If there's a concern about creating a transformer each time the optimization function is called a static local could be used, or if we need to have one on a per-GetCapability basis the selection function could create the transformer instance and provide it in a capture to a lambda for the optimization function.
Suggested change
|
||||||
|
||||||
/** | ||||||
* Run the optimizer. | ||||||
*/ | ||||||
common::Status ApplyTransformer(Graph& graph, std::string& name, | ||||||
const logging::Logger& logger) const; | ||||||
|
||||||
/** | ||||||
* Register optimizer and its optimization selection function. | ||||||
*/ | ||||||
common::Status Register(std::unique_ptr<GraphTransformer> transformer); | ||||||
|
||||||
/** | ||||||
* Get optimizer selection function. If the optimizer name can't be found, return nullopt. | ||||||
*/ | ||||||
std::optional<std::function<std::vector<std::unique_ptr<ComputeCapability>>(const GraphViewer&)>> GraphOptimizerRegistry::GetSelectionFunc(std::string& name) const; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we pass in the unordered_map<string, string> key/value config for the optimizer as an arg to the selection func? Might be slightly more readable to also have an alias for the selection function signature. |
||||||
|
||||||
/** | ||||||
* Add CPU EP reference from InferenceSession as it's needed for some optimizers, ex: ConstantFoldingDQ. | ||||||
*/ | ||||||
common::Status AddCpuEpReference(onnxruntime::IExecutionProvider* cpu_ep); | ||||||
|
||||||
/** | ||||||
* Get CPU EP reference. | ||||||
*/ | ||||||
onnxruntime::IExecutionProvider* GetCpuEpReference() const { return cpu_ep_; } | ||||||
|
||||||
/** | ||||||
* Add session options reference from InferenceSession as it's needed for some optimizers, ex: ConstantFoldingDQ. | ||||||
*/ | ||||||
common::Status AddSessionOptionsReference(onnxruntime::SessionOptions* session_options); | ||||||
|
||||||
/** | ||||||
* Get Session Options reference. | ||||||
*/ | ||||||
onnxruntime::SessionOptions* GetSessionOptionsReference() const { return session_options_; } | ||||||
|
||||||
private: | ||||||
InlinedVector<std::unique_ptr<GraphTransformer>> transformer_list_; | ||||||
InlinedHashMap<std::string, GraphTransformer*> name_to_transformer_map_; | ||||||
InlinedHashMap<std::string, std::function<std::vector<std::unique_ptr<ComputeCapability>>(const GraphViewer&)>> transformer_name_to_selection_func_; | ||||||
const logging::Logger* logger_; | ||||||
onnxruntime::IExecutionProvider* cpu_ep_; | ||||||
onnxruntime::SessionOptions* session_options_; | ||||||
|
||||||
static std::shared_ptr<GraphOptimizerRegistry> graph_optimizer_registry; | ||||||
static std::mutex registry_mutex; | ||||||
}; | ||||||
} // namespace onnxruntime |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Method providing mutable access to class internals should not be const.