Skip to content

Commit

Permalink
Fix trie serialize (#1388)
Browse files Browse the repository at this point in the history
Signed-off-by: turuslan <turuslan.devbox@gmail.com>
  • Loading branch information
turuslan authored Nov 9, 2022
1 parent eab73e2 commit ed531b0
Show file tree
Hide file tree
Showing 12 changed files with 82 additions and 84 deletions.
2 changes: 1 addition & 1 deletion cmake/Hunter/config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ hunter_config(

hunter_config(
libp2p
VERSION 0.1.7
VERSION 0.1.8
KEEP_PACKAGE_SOURCES
)

Expand Down
9 changes: 3 additions & 6 deletions core/api/service/child_state/impl/child_state_api_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include "api/service/child_state/impl/child_state_api_impl.hpp"

#include <boost/algorithm/string/predicate.hpp>
#include <unordered_map>
#include <utility>

Expand Down Expand Up @@ -64,9 +65,7 @@ namespace kagome::api {
BOOST_ASSERT(key.has_value());

// make sure our key begins with prefix
auto min_size = std::min(prefix.size(), key->size());
if (not std::equal(
prefix.begin(), prefix.begin() + min_size, key.value().begin())) {
if (!boost::starts_with(key.value(), prefix)) {
break;
}
result.push_back(cursor->key().value());
Expand Down Expand Up @@ -114,9 +113,7 @@ namespace kagome::api {
BOOST_ASSERT(key.has_value());

// make sure our key begins with prefix
auto min_size = std::min(prefix.size(), key->size());
if (not std::equal(
prefix.begin(), prefix.begin() + min_size, key.value().begin())) {
if (!boost::starts_with(key.value(), prefix)) {
break;
}
result.push_back(cursor->key().value());
Expand Down
5 changes: 2 additions & 3 deletions core/api/service/state/impl/state_api_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include "api/service/state/impl/state_api_impl.hpp"

#include <boost/algorithm/string/predicate.hpp>
#include <unordered_map>
#include <utility>

Expand Down Expand Up @@ -102,9 +103,7 @@ namespace kagome::api {
BOOST_ASSERT(key.has_value());

// make sure our key begins with prefix
auto min_size = std::min<ssize_t>(prefix.size(), key->size());
if (not std::equal(
prefix.begin(), prefix.begin() + min_size, key.value().begin())) {
if (!boost::starts_with(key.value(), prefix)) {
break;
}
result.push_back(cursor->key().value());
Expand Down
8 changes: 3 additions & 5 deletions core/network/impl/state_protocol_observer_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
*/

#include "network/impl/state_protocol_observer_impl.hpp"
#include <boost/algorithm/string/predicate.hpp>
#include <boost/bind/storage.hpp>
#include <libp2p/outcome/outcome.hpp>

Expand Down Expand Up @@ -100,8 +101,7 @@ namespace kagome::network {
if (request.start.size() == 2) {
const auto &parent_key = request.start[0];
const auto &child_prefix = storage::kChildStorageDefaultPrefix;
if (parent_key.size() < child_prefix.size()
|| parent_key.subbuffer(0, child_prefix.size()) != child_prefix) {
if (!boost::starts_with(parent_key, child_prefix)) {
return Error::INVALID_CHILD_ROOTHASH;
}
if (auto value_res = batch->tryGet(parent_key);
Expand Down Expand Up @@ -134,9 +134,7 @@ namespace kagome::network {
size +=
entry.entries.back().key.size() + entry.entries.back().value.size();
// if key is child state storage hash iterate child storage keys
if (cursor->key().value().size() > child_prefix.size()
&& cursor->key().value().subbuffer(0, child_prefix.size())
== child_prefix) {
if (boost::starts_with(cursor->key().value(), child_prefix)) {
OUTCOME_TRY(hash,
storage::trie::RootHash::fromSpan(
value_res.value().value().get()));
Expand Down
26 changes: 23 additions & 3 deletions core/storage/trie/codec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,37 @@
#include "storage/trie/node.hpp"

namespace kagome::storage::trie {

/**
* @brief Internal codec for nodes in the Trie. Eth and substrate have
* different codecs, but rest of the code should be same.
*/
class Codec {
public:
using StoreChildren = std::function<outcome::result<void>(
common::BufferView, common::Buffer &&)>;

virtual ~Codec() = default;

/**
* @brief Encode node to byte representation and store children
* @param node node in the trie
* @param store_children chidren storer
* @return encoded representation of a {@param node}
*/
virtual outcome::result<common::Buffer> encodeNodeAndStoreChildren(
const Node &node, const StoreChildren &store_children) const = 0;

/**
* @brief Encode node to byte representation
* @param node node in the trie
* @return encoded representation of a {@param node}
*/
virtual outcome::result<common::Buffer> encodeNode(
const Node &node) const = 0;
outcome::result<common::Buffer> encodeNode(const Node &node) const {
return encodeNodeAndStoreChildren(
node, [](common::BufferView, common::Buffer &&) {
return outcome::success();
});
}

/**
* @brief Decode node from bytes
Expand All @@ -43,6 +58,11 @@ namespace kagome::storage::trie {
*/
virtual common::Buffer merkleValue(const common::BufferView &buf) const = 0;

/**
* @brief is this a hash of value, or value itself
*/
virtual bool isMerkleHash(const common::BufferView &buf) const = 0;

/**
* @brief Get the hash of a node
* @param buf byte representation of the node
Expand Down
11 changes: 6 additions & 5 deletions core/storage/trie/impl/topper_trie_batch_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

#include "storage/trie/impl/topper_trie_batch_impl.hpp"

#include <boost/algorithm/string/predicate.hpp>

#include "common/buffer.hpp"
#include "storage/trie/polkadot_trie/polkadot_trie_cursor.hpp"
#include "storage/trie/polkadot_trie/trie_error.hpp"
Expand Down Expand Up @@ -108,7 +110,7 @@ namespace kagome::storage::trie {
outcome::result<std::tuple<bool, uint32_t>> TopperTrieBatchImpl::clearPrefix(
const BufferView &prefix, std::optional<uint64_t>) {
for (auto it = cache_.lower_bound(prefix);
it != cache_.end() && it->first.subbuffer(0, prefix.size()) == prefix;
it != cache_.end() && boost::starts_with(it->first, prefix);
++it)
it->second = std::nullopt;

Expand Down Expand Up @@ -138,10 +140,9 @@ namespace kagome::storage::trie {

bool TopperTrieBatchImpl::wasClearedByPrefix(const BufferView &key) const {
for (const auto &prefix : cleared_prefixes_) {
auto key_end = key.begin();
std::advance(key_end, std::min<size_t>(key.size(), prefix.size()) - 1);
auto is_cleared = std::equal(key.begin(), key_end, prefix.begin());
if (is_cleared) return true;
if (boost::starts_with(key, prefix)) {
return true;
}
}
return false;
}
Expand Down
26 changes: 19 additions & 7 deletions core/storage/trie/serialization/polkadot_codec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ namespace kagome::storage::trie {
return Buffer{hash256(buf)};
}

bool PolkadotCodec::isMerkleHash(const common::BufferView &buf) const {
const auto size = static_cast<size_t>(buf.size());
BOOST_ASSERT(size <= common::Hash256::size());
return size == common::Hash256::size();
}

common::Hash256 PolkadotCodec::hash256(const common::BufferView &buf) const {
common::Hash256 out;

Expand All @@ -61,21 +67,23 @@ namespace kagome::storage::trie {
return out;
}

outcome::result<common::Buffer> PolkadotCodec::encodeNode(
const Node &node) const {
outcome::result<common::Buffer> PolkadotCodec::encodeNodeAndStoreChildren(
const Node &node, const StoreChildren &store_children) const {
switch (static_cast<TrieNode::Type>(node.getType())) {
case TrieNode::Type::Leaf:
return encodeLeaf(dynamic_cast<const LeafNode &>(node));

case TrieNode::Type::BranchEmptyValue:
case TrieNode::Type::BranchWithValue:
return encodeBranch(dynamic_cast<const BranchNode &>(node));
return encodeBranch(dynamic_cast<const BranchNode &>(node),
store_children);

case TrieNode::Type::LeafContainingHashes:
return encodeLeaf(dynamic_cast<const LeafNode &>(node));

case TrieNode::Type::BranchContainingHashes:
return encodeBranch(dynamic_cast<const BranchNode &>(node));
return encodeBranch(dynamic_cast<const BranchNode &>(node),
store_children);

case TrieNode::Type::Empty:
return std::errc::invalid_argument;
Expand Down Expand Up @@ -160,7 +168,7 @@ namespace kagome::storage::trie {
}

outcome::result<common::Buffer> PolkadotCodec::encodeBranch(
const BranchNode &node) const {
const BranchNode &node, const StoreChildren &store_children) const {
// node header
OUTCOME_TRY(encoding, encodeHeader(node));

Expand All @@ -185,8 +193,12 @@ namespace kagome::storage::trie {
OUTCOME_TRY(scale_enc, scale::encode(std::move(merkle_value)));
encoding.put(scale_enc);
} else {
OUTCOME_TRY(enc, encodeNode(*child));
OUTCOME_TRY(scale_enc, scale::encode(merkleValue(enc)));
OUTCOME_TRY(enc, encodeNodeAndStoreChildren(*child, store_children));
auto merkle = merkleValue(enc);
if (isMerkleHash(merkle)) {
OUTCOME_TRY(store_children(merkle, std::move(enc)));
}
OUTCOME_TRY(scale_enc, scale::encode(merkle));
encoding.put(scale_enc);
}
}
Expand Down
8 changes: 6 additions & 2 deletions core/storage/trie/serialization/polkadot_codec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,16 @@ namespace kagome::storage::trie {

~PolkadotCodec() override = default;

outcome::result<Buffer> encodeNode(const Node &node) const override;
outcome::result<Buffer> encodeNodeAndStoreChildren(
const Node &node, const StoreChildren &store_children) const override;

outcome::result<std::shared_ptr<Node>> decodeNode(
gsl::span<const uint8_t> encoded_data) const override;

common::Buffer merkleValue(const BufferView &buf) const override;

bool isMerkleHash(const common::BufferView &buf) const override;

common::Hash256 hash256(const BufferView &buf) const override;

/**
Expand All @@ -47,7 +50,8 @@ namespace kagome::storage::trie {
outcome::result<Buffer> encodeHeader(const TrieNode &node) const;

private:
outcome::result<Buffer> encodeBranch(const BranchNode &node) const;
outcome::result<Buffer> encodeBranch(
const BranchNode &node, const StoreChildren &store_children) const;
outcome::result<Buffer> encodeLeaf(const LeafNode &node) const;

outcome::result<std::pair<TrieNode::Type, size_t>> decodeHeader(
Expand Down
58 changes: 14 additions & 44 deletions core/storage/trie/serialization/trie_serializer_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#include "storage/trie/trie_storage_backend.hpp"

namespace kagome::storage::trie {

TrieSerializerImpl::TrieSerializerImpl(
std::shared_ptr<PolkadotTrieFactory> factory,
std::shared_ptr<Codec> codec,
Expand Down Expand Up @@ -54,56 +53,20 @@ namespace kagome::storage::trie {

outcome::result<RootHash> TrieSerializerImpl::storeRootNode(TrieNode &node) {
auto batch = backend_->batch();
using T = TrieNode::Type;

// if node is a branch node, its children must be stored to the storage
// before it, as their hashes, which are used as database keys, are a part
// of its encoded representation required to save it to the storage
if (node.getTrieType() == T::BranchEmptyValue
|| node.getTrieType() == T::BranchWithValue) {
auto &branch = dynamic_cast<BranchNode &>(node);
OUTCOME_TRY(storeChildren(branch, *batch));
}

OUTCOME_TRY(enc, codec_->encodeNode(node));
OUTCOME_TRY(
enc,
codec_->encodeNodeAndStoreChildren(
node, [&](common::BufferView hash, common::Buffer &&encoded) {
return batch->put(hash, std::move(encoded));
}));
auto key = codec_->hash256(enc);
OUTCOME_TRY(batch->put(key, enc));
OUTCOME_TRY(batch->commit());

return key;
}

outcome::result<common::Buffer> TrieSerializerImpl::storeNode(
TrieNode &node, BufferBatch &batch) {
using T = TrieNode::Type;

// if node is a branch node, its children must be stored to the storage
// before it, as their hashes, which are used as database keys, are a part
// of its encoded representation required to save it to the storage
if (node.getTrieType() == T::BranchEmptyValue
|| node.getTrieType() == T::BranchWithValue) {
auto &branch = dynamic_cast<BranchNode &>(node);
OUTCOME_TRY(storeChildren(branch, batch));
}
OUTCOME_TRY(enc, codec_->encodeNode(node));
auto key = Buffer{codec_->merkleValue(enc)};
OUTCOME_TRY(batch.put(key, enc));
return key;
}

outcome::result<void> TrieSerializerImpl::storeChildren(BranchNode &branch,
BufferBatch &batch) {
for (auto &child : branch.children) {
if (auto c = std::dynamic_pointer_cast<TrieNode>(child); c != nullptr) {
OUTCOME_TRY(hash, storeNode(*c, batch));
// when a node is written to the storage, it is replaced with a dummy
// node to avoid memory waste
child = std::make_shared<DummyNode>(hash);
}
}
return outcome::success();
}

outcome::result<PolkadotTrie::NodePtr> TrieSerializerImpl::retrieveNode(
const std::shared_ptr<OpaqueTrieNode> &parent) const {
if (auto p = std::dynamic_pointer_cast<DummyNode>(parent); p != nullptr) {
Expand All @@ -118,7 +81,14 @@ namespace kagome::storage::trie {
if (db_key.empty() or db_key == getEmptyRootHash()) {
return nullptr;
}
OUTCOME_TRY(enc, backend_->load(db_key));
Buffer enc;
if (codec_->isMerkleHash(db_key)) {
OUTCOME_TRY(db, backend_->load(db_key));
enc = std::move(db);
} else {
// `isMerkleHash(db_key) == false` means `db_key` is value itself
enc = db_key;
}
OUTCOME_TRY(n, codec_->decodeNode(enc));
return std::dynamic_pointer_cast<TrieNode>(n);
}
Expand Down
3 changes: 0 additions & 3 deletions core/storage/trie/serialization/trie_serializer_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,6 @@ namespace kagome::storage::trie {
* avoid memory waste
*/
outcome::result<RootHash> storeRootNode(TrieNode &node);
outcome::result<common::Buffer> storeNode(TrieNode &node,
BufferBatch &batch);
outcome::result<void> storeChildren(BranchNode &branch, BufferBatch &batch);
/**
* Fetches a node from the storage. A nullptr is returned in case that there
* is no entry for provided key. Mind that a branch node will have dummy
Expand Down
6 changes: 3 additions & 3 deletions core/utils/kagome_db_editor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#undef TRUE
#undef FALSE

#include <boost/algorithm/string/predicate.hpp>
#include <boost/di.hpp>
#include <soralog/impl/configurator_from_yaml.hpp>

Expand Down Expand Up @@ -143,8 +144,7 @@ void child_storage_root_hashes(
auto res = cursor->seekUpperBound(child_prefix);
if (res.has_value()) {
auto key = cursor->key();
while (key.has_value() && key.value().size() >= child_prefix.size()
&& key.value().subbuffer(0, child_prefix.size()) == child_prefix) {
while (key.has_value() && boost::starts_with(key.value(), child_prefix)) {
if (auto value_res = batch->tryGet(key.value());
value_res.has_value() && value_res.value().has_value()) {
auto &value_opt = value_res.value();
Expand Down Expand Up @@ -363,7 +363,7 @@ int main(int argc, char *argv[]) {
{
TicToc t2("Process DB.", log);
while (db_cursor->isValid() && db_cursor->key().has_value()
&& db_cursor->key().value()[0] == prefix[0]) {
&& boost::starts_with(db_cursor->key().value(), prefix)) {
auto res2 = check(db_batch->remove(db_cursor->key().value()));
count++;
if (not(count % 10000000)) {
Expand Down
Loading

0 comments on commit ed531b0

Please sign in to comment.