Skip to content

Commit

Permalink
Update banks to accept Const and stub addTensor initial API
Browse files Browse the repository at this point in the history
  • Loading branch information
smirnov-alexey committed Sep 16, 2024
1 parent bedea83 commit 2c8e4fa
Show file tree
Hide file tree
Showing 9 changed files with 73 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,7 @@ void ov::npuw::JustInferRequest::unpack_closure(std::size_t idx, RqPtr request)
// Remember where the unpack is required
closure_unpack_required.push_back(cidx);
} else if (comp_model_desc.update_required[cidx]) {
// FIXME: remove once closures are reworked with weights bank
if (needs_copy(idx)) {
// Remember where copy is requried
closure_copy_required.push_back(cidx);
Expand Down
5 changes: 5 additions & 0 deletions src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "lazy_tensor.hpp"
53 changes: 53 additions & 0 deletions src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <list>
#include <memory>
#include <mutex>
#include <tuple>
#include <unordered_map>
#include <variant>

#include "openvino/runtime/make_tensor.hpp"
#include "openvino/runtime/tensor.hpp"

namespace ov {
namespace npuw {
namespace weights {

enum class TransformType {
ORIG,
PERMUTE,
CONVERT,
CONCAT // TODO: support
};

class LazyTensor {
public:
using Transform = std::variant<ov::Tensor, std::vector<std::size_t>, std::monostate>;

class Hash {
public:
std::size_t operator()(const LazyTensor& lt) {
// FIXME: implement
return 0;
}
};

explicit LazyTensor() = default;

bool operator==(const LazyTensor& other) {
// FIXME: implement
return false;
}

private:
std::list<std::pair<TransformType, Transform>> m_transforms;
};

} // namespace weights
} // namespace npuw
} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -1457,7 +1457,8 @@ void Partitioner::createFunction(FunctionPipeline& func_ggg) {
new_param_idx++;

LOG_DEBUG("Register " << prod_output << " in the function closure");
funcall._closure.push_back(bank->update(input_node)); // (n)/1/i/c
funcall._closure.push_back(
bank->update(std::dynamic_pointer_cast<ov::op::v0::Constant>(input_node))); // (n)/1/i/c
} else if (ov::op::util::is_parameter(input_node)) {
LOG_DEBUG("Handling a Parameter input " << prod_output);
LOG_BLOCK();
Expand Down Expand Up @@ -1550,7 +1551,8 @@ void Partitioner::matchRepeatedSubgraphs(const std::string& func_name) {
std::make_pair(proto_layer_name, input_desc.get_index())); // (t)/1/b
LOG_DEBUG("Register " << prod_output << " in the function closure[" << param_idx
<< "] (via prototype " << proto_layer_name << ")");
funcall._closure[param_idx - function._param_offset] = bank->update(input_node); // (t)/1/c
funcall._closure[param_idx - function._param_offset] =
bank->update(std::dynamic_pointer_cast<ov::op::v0::Constant>(input_node)); // (t)/1/c
}
} // for (inputs)
} // for(nodes)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ ClosureRemap build_remap(const Function& fbody,
auto zerop_iter = params_to.zerops.find(param);
if (zerop_iter != params_to.zerops.end()) {
LOG_DEBUG("This parameter requires zero point: " << zerop_iter->second);
m.zero_points.push_back(bank->update(zerop_iter->second));
m.zero_points.push_back(bank->update(std::dynamic_pointer_cast<ov::op::v0::Constant>(zerop_iter->second)));
} else {
m.zero_points.push_back(ov::Tensor());
}
Expand Down
9 changes: 2 additions & 7 deletions src/plugins/intel_npu/src/plugin/npuw/util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
#include <sstream>

#include "logging.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/transpose.hpp"
#include "openvino/op/util/op_types.hpp"

Expand All @@ -39,12 +38,8 @@ bool ov::npuw::util::is_set(const std::size_t sub_idx, const std::string& opt) {
return false;
}

ov::Tensor ov::npuw::util::tensor_from_const(const std::shared_ptr<ov::Node>& node) {
NPUW_ASSERT(ov::op::util::is_constant(node));
NPUW_ASSERT(node->outputs().size() == 1);
const auto port = node->output(0);
auto cnst_node = std::dynamic_pointer_cast<ov::op::v0::Constant>(node);
return ov::Tensor(port.get_element_type(), port.get_shape(), const_cast<void*>(cnst_node->get_data_ptr()));
ov::Tensor ov::npuw::util::tensor_from_const(const std::shared_ptr<ov::op::v0::Constant>& node) {
return ov::Tensor(node->get_element_type(), node->get_shape(), const_cast<void*>(node->get_data_ptr()));
}

bool ov::npuw::util::starts_with(const std::string& str, const std::string& prefix) {
Expand Down
3 changes: 2 additions & 1 deletion src/plugins/intel_npu/src/plugin/npuw/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <string>

#include "logging.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/runtime/itensor.hpp"
#include "openvino/runtime/so_ptr.hpp"

Expand All @@ -18,7 +19,7 @@ bool is_set(const std::size_t sub_idx, const std::string& opt);

// Every great project has its own string class...
// NB: Newer C++ standards would allow to use string views or smt
ov::Tensor tensor_from_const(const std::shared_ptr<ov::Node>& node);
ov::Tensor tensor_from_const(const std::shared_ptr<ov::op::v0::Constant>& node);

bool starts_with(const std::string& str, const std::string& prefix);

Expand Down
7 changes: 1 addition & 6 deletions src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,12 @@ class BankManager {
std::mutex m_mutex;
};

ov::Tensor Bank::update(const std::shared_ptr<ov::Node>& node) {
if (!node) {
OPENVINO_THROW("Uninitialized ov::Node in weights bank allocation!");
}

ov::Tensor Bank::update(const std::shared_ptr<ov::op::v0::Constant>& node) {
std::lock_guard<std::mutex> guard(m_mutex);

auto tensor = ov::npuw::util::tensor_from_const(node);

if (m_bank.find(tensor.data()) == m_bank.end()) {
// need to allocate first
m_bank[tensor.data()] = node;
}

Expand Down
6 changes: 4 additions & 2 deletions src/plugins/intel_npu/src/plugin/npuw/weights_bank.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#include <tuple>
#include <unordered_map>

#include "lazy_tensor.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/runtime/iplugin.hpp"
#include "openvino/runtime/iremote_context.hpp"
#include "openvino/runtime/make_tensor.hpp"
Expand All @@ -23,7 +25,7 @@ class Bank {
explicit Bank(const std::shared_ptr<const ov::ICore>& core) : m_core(core) {}

// Capture CPU version of the tensor
ov::Tensor update(const std::shared_ptr<ov::Node>& tensor);
ov::Tensor update(const std::shared_ptr<ov::op::v0::Constant>& node);

// Based on previously captured tensor allocate a new tensor (if needed) on a specified device
ov::Tensor get(const ov::Tensor& tensor, const std::string& device);
Expand All @@ -33,7 +35,7 @@ class Bank {
void drop(const ov::Tensor& tensor);

// Default CPU bank. Filled by update()
std::unordered_map<void*, std::shared_ptr<ov::Node>> m_bank;
std::unordered_map<void*, std::shared_ptr<ov::op::v0::Constant>> m_bank;
// Bank for specified device and their allocated memory
std::unordered_map<std::string, std::unordered_map<void*, ov::Tensor>> m_device_bank;
std::mutex m_mutex;
Expand Down

0 comments on commit 2c8e4fa

Please sign in to comment.