Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix trie serialize #1388

Merged
merged 15 commits into from
Nov 9, 2022
2 changes: 1 addition & 1 deletion cmake/Hunter/config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ hunter_config(

hunter_config(
libp2p
VERSION 0.1.7
VERSION 0.1.8
KEEP_PACKAGE_SOURCES
)

Expand Down
9 changes: 3 additions & 6 deletions core/api/service/child_state/impl/child_state_api_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include "api/service/child_state/impl/child_state_api_impl.hpp"

#include <boost/algorithm/string/predicate.hpp>
#include <unordered_map>
#include <utility>

Expand Down Expand Up @@ -64,9 +65,7 @@ namespace kagome::api {
BOOST_ASSERT(key.has_value());

// make sure our key begins with prefix
auto min_size = std::min(prefix.size(), key->size());
if (not std::equal(
prefix.begin(), prefix.begin() + min_size, key.value().begin())) {
if (!boost::starts_with(key.value(), prefix)) {
break;
}
result.push_back(cursor->key().value());
Expand Down Expand Up @@ -114,9 +113,7 @@ namespace kagome::api {
BOOST_ASSERT(key.has_value());

// make sure our key begins with prefix
auto min_size = std::min(prefix.size(), key->size());
if (not std::equal(
prefix.begin(), prefix.begin() + min_size, key.value().begin())) {
if (!boost::starts_with(key.value(), prefix)) {
break;
}
result.push_back(cursor->key().value());
Expand Down
5 changes: 2 additions & 3 deletions core/api/service/state/impl/state_api_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include "api/service/state/impl/state_api_impl.hpp"

#include <boost/algorithm/string/predicate.hpp>
#include <unordered_map>
#include <utility>

Expand Down Expand Up @@ -102,9 +103,7 @@ namespace kagome::api {
BOOST_ASSERT(key.has_value());

// make sure our key begins with prefix
auto min_size = std::min<ssize_t>(prefix.size(), key->size());
if (not std::equal(
prefix.begin(), prefix.begin() + min_size, key.value().begin())) {
if (!boost::starts_with(key.value(), prefix)) {
break;
}
result.push_back(cursor->key().value());
Expand Down
8 changes: 3 additions & 5 deletions core/network/impl/state_protocol_observer_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
*/

#include "network/impl/state_protocol_observer_impl.hpp"
#include <boost/algorithm/string/predicate.hpp>
#include <boost/bind/storage.hpp>
#include <libp2p/outcome/outcome.hpp>

Expand Down Expand Up @@ -100,8 +101,7 @@ namespace kagome::network {
if (request.start.size() == 2) {
const auto &parent_key = request.start[0];
const auto &child_prefix = storage::kChildStorageDefaultPrefix;
if (parent_key.size() < child_prefix.size()
|| parent_key.subbuffer(0, child_prefix.size()) != child_prefix) {
if (!boost::starts_with(parent_key, child_prefix)) {
return Error::INVALID_CHILD_ROOTHASH;
}
if (auto value_res = batch->tryGet(parent_key);
Expand Down Expand Up @@ -134,9 +134,7 @@ namespace kagome::network {
size +=
entry.entries.back().key.size() + entry.entries.back().value.size();
// if key is child state storage hash iterate child storage keys
if (cursor->key().value().size() > child_prefix.size()
&& cursor->key().value().subbuffer(0, child_prefix.size())
== child_prefix) {
if (boost::starts_with(cursor->key().value(), child_prefix)) {
OUTCOME_TRY(hash,
storage::trie::RootHash::fromSpan(
value_res.value().value().get()));
Expand Down
30 changes: 28 additions & 2 deletions core/storage/trie/codec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@
#include "storage/trie/node.hpp"

namespace kagome::storage::trie {
/**
* @brief Stores encoded children node for later retrieval
*/
struct StoreChildren {
turuslan marked this conversation as resolved.
Show resolved Hide resolved
virtual ~StoreChildren() = default;
virtual outcome::result<void> store(const common::BufferView &hash,
common::Buffer &&encoded) {
return outcome::success();
}
};

/**
* @brief Internal codec for nodes in the Trie. Eth and substrate have
Expand All @@ -20,13 +30,24 @@ namespace kagome::storage::trie {
public:
virtual ~Codec() = default;

/**
* @brief Encode node to byte representation and store children
* @param node node in the trie
* @param store_children chidren storer
* @return encoded representation of a {@param node}
*/
virtual outcome::result<common::Buffer> encodeNodeAndStoreChildren(
const Node &node, StoreChildren &store_children) const = 0;

/**
* @brief Encode node to byte representation
* @param node node in the trie
* @return encoded representation of a {@param node}
*/
virtual outcome::result<common::Buffer> encodeNode(
const Node &node) const = 0;
outcome::result<common::Buffer> encodeNode(const Node &node) const {
StoreChildren store_children;
return encodeNodeAndStoreChildren(node, store_children);
}

/**
* @brief Decode node from bytes
Expand All @@ -43,6 +64,11 @@ namespace kagome::storage::trie {
*/
virtual common::Buffer merkleValue(const common::BufferView &buf) const = 0;

/**
* @brief is this a hash of value, or value itself
*/
virtual bool isMerkleHash(const common::BufferView &buf) const = 0;

/**
* @brief Get the hash of a node
* @param buf byte representation of the node
Expand Down
11 changes: 6 additions & 5 deletions core/storage/trie/impl/topper_trie_batch_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

#include "storage/trie/impl/topper_trie_batch_impl.hpp"

#include <boost/algorithm/string/predicate.hpp>

#include "common/buffer.hpp"
#include "storage/trie/polkadot_trie/polkadot_trie_cursor.hpp"
#include "storage/trie/polkadot_trie/trie_error.hpp"
Expand Down Expand Up @@ -108,7 +110,7 @@ namespace kagome::storage::trie {
outcome::result<std::tuple<bool, uint32_t>> TopperTrieBatchImpl::clearPrefix(
const BufferView &prefix, std::optional<uint64_t>) {
for (auto it = cache_.lower_bound(prefix);
it != cache_.end() && it->first.subbuffer(0, prefix.size()) == prefix;
it != cache_.end() && boost::starts_with(it->first, prefix);
++it)
it->second = std::nullopt;

Expand Down Expand Up @@ -138,10 +140,9 @@ namespace kagome::storage::trie {

bool TopperTrieBatchImpl::wasClearedByPrefix(const BufferView &key) const {
for (const auto &prefix : cleared_prefixes_) {
auto key_end = key.begin();
std::advance(key_end, std::min<size_t>(key.size(), prefix.size()) - 1);
auto is_cleared = std::equal(key.begin(), key_end, prefix.begin());
if (is_cleared) return true;
if (boost::starts_with(key, prefix)) {
return true;
}
turuslan marked this conversation as resolved.
Show resolved Hide resolved
}
return false;
}
Expand Down
26 changes: 19 additions & 7 deletions core/storage/trie/serialization/polkadot_codec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ namespace kagome::storage::trie {
return Buffer{hash256(buf)};
}

bool PolkadotCodec::isMerkleHash(const common::BufferView &buf) const {
const auto size = static_cast<size_t>(buf.size());
assert(size <= common::Hash256::size());
turuslan marked this conversation as resolved.
Show resolved Hide resolved
return size == common::Hash256::size();
}

common::Hash256 PolkadotCodec::hash256(const common::BufferView &buf) const {
common::Hash256 out;

Expand All @@ -61,21 +67,23 @@ namespace kagome::storage::trie {
return out;
}

outcome::result<common::Buffer> PolkadotCodec::encodeNode(
const Node &node) const {
outcome::result<common::Buffer> PolkadotCodec::encodeNodeAndStoreChildren(
const Node &node, StoreChildren &store_children) const {
switch (static_cast<TrieNode::Type>(node.getType())) {
case TrieNode::Type::Leaf:
return encodeLeaf(dynamic_cast<const LeafNode &>(node));

case TrieNode::Type::BranchEmptyValue:
case TrieNode::Type::BranchWithValue:
return encodeBranch(dynamic_cast<const BranchNode &>(node));
return encodeBranch(dynamic_cast<const BranchNode &>(node),
turuslan marked this conversation as resolved.
Show resolved Hide resolved
store_children);

case TrieNode::Type::LeafContainingHashes:
return encodeLeaf(dynamic_cast<const LeafNode &>(node));

case TrieNode::Type::BranchContainingHashes:
return encodeBranch(dynamic_cast<const BranchNode &>(node));
return encodeBranch(dynamic_cast<const BranchNode &>(node),
store_children);

case TrieNode::Type::Empty:
return std::errc::invalid_argument;
Expand Down Expand Up @@ -160,7 +168,7 @@ namespace kagome::storage::trie {
}

outcome::result<common::Buffer> PolkadotCodec::encodeBranch(
const BranchNode &node) const {
const BranchNode &node, StoreChildren &store_children) const {
// node header
OUTCOME_TRY(encoding, encodeHeader(node));

Expand All @@ -185,8 +193,12 @@ namespace kagome::storage::trie {
OUTCOME_TRY(scale_enc, scale::encode(std::move(merkle_value)));
encoding.put(scale_enc);
} else {
OUTCOME_TRY(enc, encodeNode(*child));
OUTCOME_TRY(scale_enc, scale::encode(merkleValue(enc)));
OUTCOME_TRY(enc, encodeNodeAndStoreChildren(*child, store_children));
auto merkle = merkleValue(enc);
if (isMerkleHash(merkle)) {
OUTCOME_TRY(store_children.store(merkle, std::move(enc)));
}
OUTCOME_TRY(scale_enc, scale::encode(merkle));
encoding.put(scale_enc);
}
}
Expand Down
8 changes: 6 additions & 2 deletions core/storage/trie/serialization/polkadot_codec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,16 @@ namespace kagome::storage::trie {

~PolkadotCodec() override = default;

outcome::result<Buffer> encodeNode(const Node &node) const override;
outcome::result<Buffer> encodeNodeAndStoreChildren(
const Node &node, StoreChildren &store_children) const override;

outcome::result<std::shared_ptr<Node>> decodeNode(
gsl::span<const uint8_t> encoded_data) const override;

common::Buffer merkleValue(const BufferView &buf) const override;

bool isMerkleHash(const common::BufferView &buf) const override;

common::Hash256 hash256(const BufferView &buf) const override;

/**
Expand All @@ -47,7 +50,8 @@ namespace kagome::storage::trie {
outcome::result<Buffer> encodeHeader(const TrieNode &node) const;

private:
outcome::result<Buffer> encodeBranch(const BranchNode &node) const;
outcome::result<Buffer> encodeBranch(const BranchNode &node,
StoreChildren &store_children) const;
outcome::result<Buffer> encodeLeaf(const LeafNode &node) const;

outcome::result<std::pair<TrieNode::Type, size_t>> decodeHeader(
Expand Down
62 changes: 19 additions & 43 deletions core/storage/trie/serialization/trie_serializer_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,16 @@
#include "storage/trie/trie_storage_backend.hpp"

namespace kagome::storage::trie {
struct StoreChildrenToBatch : StoreChildren {
StoreChildrenToBatch(storage::BufferBatch &batch) : batch{batch} {}

outcome::result<void> store(const common::BufferView &hash,
common::Buffer &&encoded) override {
return batch.put(hash, std::move(encoded));
}

storage::BufferBatch &batch;
};

TrieSerializerImpl::TrieSerializerImpl(
std::shared_ptr<PolkadotTrieFactory> factory,
Expand Down Expand Up @@ -54,56 +64,16 @@ namespace kagome::storage::trie {

outcome::result<RootHash> TrieSerializerImpl::storeRootNode(TrieNode &node) {
auto batch = backend_->batch();
using T = TrieNode::Type;

// if node is a branch node, its children must be stored to the storage
// before it, as their hashes, which are used as database keys, are a part
// of its encoded representation required to save it to the storage
if (node.getTrieType() == T::BranchEmptyValue
|| node.getTrieType() == T::BranchWithValue) {
auto &branch = dynamic_cast<BranchNode &>(node);
OUTCOME_TRY(storeChildren(branch, *batch));
}

OUTCOME_TRY(enc, codec_->encodeNode(node));
StoreChildrenToBatch store_children{*batch};
OUTCOME_TRY(enc, codec_->encodeNodeAndStoreChildren(node, store_children));
auto key = codec_->hash256(enc);
OUTCOME_TRY(batch->put(key, enc));
OUTCOME_TRY(batch->commit());

return key;
}

outcome::result<common::Buffer> TrieSerializerImpl::storeNode(
TrieNode &node, BufferBatch &batch) {
using T = TrieNode::Type;

// if node is a branch node, its children must be stored to the storage
// before it, as their hashes, which are used as database keys, are a part
// of its encoded representation required to save it to the storage
if (node.getTrieType() == T::BranchEmptyValue
|| node.getTrieType() == T::BranchWithValue) {
auto &branch = dynamic_cast<BranchNode &>(node);
OUTCOME_TRY(storeChildren(branch, batch));
}
OUTCOME_TRY(enc, codec_->encodeNode(node));
auto key = Buffer{codec_->merkleValue(enc)};
OUTCOME_TRY(batch.put(key, enc));
return key;
}

outcome::result<void> TrieSerializerImpl::storeChildren(BranchNode &branch,
BufferBatch &batch) {
for (auto &child : branch.children) {
if (auto c = std::dynamic_pointer_cast<TrieNode>(child); c != nullptr) {
OUTCOME_TRY(hash, storeNode(*c, batch));
// when a node is written to the storage, it is replaced with a dummy
// node to avoid memory waste
child = std::make_shared<DummyNode>(hash);
}
}
return outcome::success();
}

outcome::result<PolkadotTrie::NodePtr> TrieSerializerImpl::retrieveNode(
const std::shared_ptr<OpaqueTrieNode> &parent) const {
if (auto p = std::dynamic_pointer_cast<DummyNode>(parent); p != nullptr) {
Expand All @@ -118,7 +88,13 @@ namespace kagome::storage::trie {
if (db_key.empty() or db_key == getEmptyRootHash()) {
return nullptr;
}
OUTCOME_TRY(enc, backend_->load(db_key));
Buffer enc;
if (codec_->isMerkleHash(db_key)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here it's better to comment that a db key is actually the merkle value of the node, or better wrap Buffer in some MerkleValue type to ensure this contract on type level.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not clear from this comment why it is this way. I think it's better clarify it, or it may become a source of confusion.

OUTCOME_TRY(db, backend_->load(db_key));
enc = std::move(db);
} else {
enc = db_key;
}
OUTCOME_TRY(n, codec_->decodeNode(enc));
return std::dynamic_pointer_cast<TrieNode>(n);
}
Expand Down
3 changes: 0 additions & 3 deletions core/storage/trie/serialization/trie_serializer_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,6 @@ namespace kagome::storage::trie {
* avoid memory waste
*/
outcome::result<RootHash> storeRootNode(TrieNode &node);
outcome::result<common::Buffer> storeNode(TrieNode &node,
BufferBatch &batch);
outcome::result<void> storeChildren(BranchNode &branch, BufferBatch &batch);
/**
* Fetches a node from the storage. A nullptr is returned in case that there
* is no entry for provided key. Mind that a branch node will have dummy
Expand Down
6 changes: 3 additions & 3 deletions core/utils/kagome_db_editor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#undef TRUE
#undef FALSE

#include <boost/algorithm/string/predicate.hpp>
#include <boost/di.hpp>
#include <soralog/impl/configurator_from_yaml.hpp>

Expand Down Expand Up @@ -143,8 +144,7 @@ void child_storage_root_hashes(
auto res = cursor->seekUpperBound(child_prefix);
if (res.has_value()) {
auto key = cursor->key();
while (key.has_value() && key.value().size() >= child_prefix.size()
&& key.value().subbuffer(0, child_prefix.size()) == child_prefix) {
while (key.has_value() && boost::starts_with(key.value(), child_prefix)) {
if (auto value_res = batch->tryGet(key.value());
value_res.has_value() && value_res.value().has_value()) {
auto &value_opt = value_res.value();
Expand Down Expand Up @@ -363,7 +363,7 @@ int main(int argc, char *argv[]) {
{
TicToc t2("Process DB.", log);
while (db_cursor->isValid() && db_cursor->key().has_value()
&& db_cursor->key().value()[0] == prefix[0]) {
&& boost::starts_with(db_cursor->key().value(), prefix)) {
auto res2 = check(db_batch->remove(db_cursor->key().value()));
count++;
if (not(count % 10000000)) {
Expand Down
Loading