Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix fast sync on Kusama #1367

Merged
merged 91 commits into from
Nov 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
91 commits
Select commit Hold shift + click to select a range
5fb66f8
add `--sync` program option (#1164)
Apr 14, 2022
6b67c60
implement block appender for fast sync (#1164)
Apr 15, 2022
82464b9
apply sync state and start validating (#1164)
Apr 19, 2022
2eb5893
recursive fast sync call (#1164)
Apr 25, 2022
ebf7fef
add calculation of authorities from origin (#1164)
Apr 27, 2022
f677f08
fastsync working on rococo (#1164)
May 28, 2022
b2c7628
implement child state request (#1164)
Jun 3, 2022
6c535b2
implement state response (#1164)
Jun 10, 2022
6589f95
fastsync working on polkadot (#1164)
Jun 18, 2022
0214220
clang-14 fixes
Jul 27, 2022
760fef3
updates for PR
Jul 27, 2022
c6bb6e4
git: Merge branch 'master' into feature/fast-sync-update
xDimon Aug 4, 2022
bff1ee2
fix: processing of epoch digests during fast syncing
xDimon Aug 4, 2022
e911f09
git: Merge branch 'master' into feature/fast-sync-update
xDimon Aug 4, 2022
1e3db6d
fix: tests
xDimon Aug 5, 2022
f0e62b2
Merge branch 'master' into feature/fast-sync-update
xDimon Aug 8, 2022
91e2f4f
Build fix for external blob (#1306)
iceseer Aug 8, 2022
bf1d618
Feature: change logging level over RPC in real time #1305
xDimon Aug 9, 2022
9516996
feature: change logging level over RPC
xDimon Aug 5, 2022
9481e76
feature: accelerate hasDirectChain check for case of finalized chain
xDimon Aug 9, 2022
70b5877
refactor: optimize prepareFromGenesis of AuthorityManager
xDimon Aug 9, 2022
db4d41c
refactor: preload a bit more blocks while fast syncing
xDimon Aug 9, 2022
af65850
fix: logging some outcome error
xDimon Aug 10, 2022
d02c0bf
feature: smart processing of duplicating blocks during fast sync
xDimon Aug 10, 2022
bd15e37
git: Merge branch 'master' into feature/fast-sync-update
xDimon Aug 10, 2022
8f2c6d7
feature: indicate average speed of appending while fast syncing
xDimon Aug 10, 2022
908f4ae
feature: down rating of peer at disconnection
xDimon Aug 10, 2022
c7e40b4
tune: preload more headers while fast syncing
xDimon Aug 10, 2022
bef9c70
clean: remove commented piece of code
xDimon Aug 10, 2022
a984c52
fix: test
xDimon Aug 10, 2022
8a4ef62
fix: allow recovery to block without state
xDimon Aug 11, 2022
b233deb
fix: fallback way to process justification with votes for future blocks
xDimon Aug 11, 2022
12420f5
git: Merge branch 'master' into feature/fast-sync-update
xDimon Aug 12, 2022
21e1267
fix: typo
xDimon Aug 12, 2022
bb80b7f
refactor: assert
xDimon Aug 12, 2022
c55804e
feature: additional state of babe for syncing
xDimon Aug 12, 2022
84f362b
clean: unused code
xDimon Aug 12, 2022
b94a602
fix: test
xDimon Aug 14, 2022
eebfd61
Merge with master
Harrm Aug 15, 2022
3107ffc
Fix mocks
Harrm Aug 15, 2022
2a53eea
fix: block data flags
xDimon Aug 15, 2022
989ab07
fix: assert
xDimon Aug 15, 2022
ce570b2
temp: way to save state of authmngr
xDimon Aug 15, 2022
b82f40b
fix: remove useless assert
xDimon Aug 15, 2022
18f82dd
fix: minor issues
xDimon Aug 15, 2022
990b7c1
fix: sync state
xDimon Aug 15, 2022
dda6011
fix: sync state
xDimon Aug 16, 2022
1a55181
refactor: level log messages of state protocol
xDimon Aug 16, 2022
d63e548
fix: remove non finalized block before state syncing start
xDimon Aug 16, 2022
5987010
fix: add log to view last finalized before rolling back his descendants
xDimon Aug 16, 2022
28dd797
Fixes in authority manager
Harrm Aug 18, 2022
782f0ce
Add todo for schedule change processing
Harrm Jul 14, 2022
89d15b5
Remove debug code from verify()
Harrm Jul 14, 2022
83eaa6c
Remove commented code
Harrm Jul 14, 2022
2a91f55
Fixes from review
Harrm Jul 15, 2022
5a0bd7c
Fix recovery logging
Harrm Jul 15, 2022
9ca0c4a
Make NoAction alias Empty
Harrm Jul 15, 2022
e1cc1b4
Fix authority manager tests
Harrm Jul 15, 2022
ef97ba2
Fix alleged stack-use-after-free
Harrm Jul 16, 2022
0627676
Fix force change recovery problem
Harrm Jul 17, 2022
5f50a15
Fix authority manager tests' ASAN problem
Harrm Jul 17, 2022
e3de787
Fixing authority manager
Harrm Aug 15, 2022
8d9b7ac
Fix authority manager on synced chain
Harrm Aug 25, 2022
224b413
Fix bad merge of test.yml
Harrm Aug 25, 2022
b30f506
Fix bad merge of test.yml
Harrm Aug 25, 2022
8a78055
Self review
Harrm Aug 26, 2022
80834eb
Merge with master
Harrm Sep 2, 2022
6e840f4
Improve voting round error logging
Harrm Sep 8, 2022
7894fc8
Fixes for fast sync in kusama
Harrm Oct 3, 2022
465fb12
Cleaning up
Harrm Oct 6, 2022
0559729
Merge with master
Harrm Oct 11, 2022
bf70c8f
Merge branch 'master' into feature/kusama-fast-sync-fix
Harrm Oct 11, 2022
9355c20
Fix CI
Harrm Oct 12, 2022
79663aa
Merge branch 'feature/kusama-fast-sync-fix' of github.com:soramitsu/k…
Harrm Oct 12, 2022
74bceb3
Merge with master
Harrm Oct 12, 2022
e9108f0
Merge branch 'master' into feature/kusama-fast-sync-fix
Harrm Oct 13, 2022
8dd155d
Fix merge
Harrm Oct 13, 2022
25fa778
Self review
Harrm Oct 13, 2022
80502b9
Self review
Harrm Oct 13, 2022
979abe1
Enable VoteCryptoProviderImpl::verify
Harrm Oct 18, 2022
eee9861
Merge branch 'master' into feature/kusama-fast-sync-fix
Harrm Oct 19, 2022
1a81fb5
Fixes from review
Harrm Oct 19, 2022
5194cc0
Merge remote-tracking branch 'origin/feature/kusama-fast-sync-fix' in…
Harrm Oct 19, 2022
a4f411c
Merge branch 'master' of github.com:soramitsu/kagome into feature/kus…
Harrm Oct 25, 2022
78360d5
Fixes from review
Harrm Oct 26, 2022
d63e344
Merge branch 'master' into feature/kusama-fast-sync-fix
Harrm Oct 26, 2022
aa1c4d5
Revert offchain extension test
Harrm Oct 26, 2022
0c357b4
Reformat
Harrm Oct 27, 2022
192afd3
Merge branch 'master' into feature/kusama-fast-sync-fix
kamilsa Nov 1, 2022
2dd826a
Merge branch 'master' into feature/kusama-fast-sync-fix
xDimon Nov 2, 2022
715f0c9
Merge branch 'master' into feature/kusama-fast-sync-fix
xDimon Nov 2, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
config/__pycahe__
**/__pycache__/*
__pycache__/*
build/*
build-*/*
**/build/*
**/build-*/*
*.DS_Store
*.vscode/
*.swp
Expand Down
9 changes: 1 addition & 8 deletions core/consensus/authority/authority_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

namespace kagome::storage::trie {
class TrieStorage;
class TrieBatch;
}

namespace kagome::crypto {
Expand All @@ -24,14 +25,6 @@ namespace kagome::authority {

using IsBlockFinalized = Tagged<bool, struct IsBlockFinalizedTag>;

/**
* Obtain the current authority set id from the runtime storage
*/
outcome::result<std::optional<primitives::AuthoritySetId>>
fetchSetIdFromTrieStorage(storage::trie::TrieStorage const &trie_storage,
crypto::Hasher const &hasher,
storage::trie::RootHash const &state);

class AuthorityManager {
public:
virtual ~AuthorityManager() = default;
Expand Down
2 changes: 2 additions & 0 deletions core/consensus/authority/authority_manager_error.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ OUTCOME_CPP_DEFINE_CATEGORY(kagome::authority, AuthorityManagerError, e) {
return "Can not save state";
case E::CANT_RECALCULATE_ON_PRUNED_STATE:
return "Can't recalculate authority set ids on a pruned database";
case E::FAILED_TO_INITIALIZE_SET_ID:
return "Failed to initialize the current authority set id on startup";
}
return "unknown error (invalid AuthorityManagerError)";
}
1 change: 1 addition & 0 deletions core/consensus/authority/authority_manager_error.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ namespace kagome::authority {
ORPHAN_BLOCK_OR_ALREADY_FINALIZED,
CAN_NOT_SAVE_STATE,
CANT_RECALCULATE_ON_PRUNED_STATE,
FAILED_TO_INITIALIZE_SET_ID,
};
}

Expand Down
108 changes: 69 additions & 39 deletions core/consensus/authority/impl/authority_manager_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,15 @@
#include "application/app_state_manager.hpp"
#include "blockchain/block_header_repository.hpp"
#include "blockchain/block_tree.hpp"
#include "blockchain/block_tree_error.hpp"
#include "common/visitor.hpp"
#include "consensus/authority/authority_manager_error.hpp"
#include "consensus/authority/authority_update_observer_error.hpp"
#include "consensus/authority/impl/schedule_node.hpp"
#include "crypto/hasher.hpp"
#include "log/profiling_logger.hpp"
#include "runtime/runtime_api/grandpa_api.hpp"
#include "storage/database_error.hpp"
#include "storage/trie/trie_storage.hpp"

using kagome::common::Buffer;
Expand Down Expand Up @@ -100,11 +102,9 @@ namespace kagome::authority {
}

outcome::result<std::optional<AuthoritySetId>> fetchSetIdFromTrieStorage(
storage::trie::TrieStorage const &trie_storage,
storage::trie::TrieBatch const &trie_batch,
crypto::Hasher const &hasher,
storage::trie::RootHash const &state) {
OUTCOME_TRY(batch, trie_storage.getEphemeralBatchAt(state));

std::optional<AuthoritySetId> set_id_opt;
auto current_set_id_keypart =
hasher.twox_128(Buffer::fromString("CurrentSetId"));
Expand All @@ -113,7 +113,7 @@ namespace kagome::authority {
auto set_id_key =
Buffer().put(prefix_key_part).put(current_set_id_keypart);

OUTCOME_TRY(val_opt, batch->tryGet(set_id_key));
OUTCOME_TRY(val_opt, trie_batch.tryGet(set_id_key));
if (val_opt.has_value()) {
auto &val = val_opt.value();
set_id_opt.emplace(scale::decode<AuthoritySetId>(val.get()).value());
Expand All @@ -129,11 +129,10 @@ namespace kagome::authority {
outcome::result<std::optional<std::unique_ptr<ScheduleNode>>>
fetchScheduleGraphRoot(storage::BufferStorage const &storage) {
OUTCOME_TRY(opt_root, storage.tryLoad(kScheduleGraphRootKey));
if (not opt_root) return std::nullopt;
if (!opt_root) return std::nullopt;
auto &encoded_root = opt_root.value();
OUTCOME_TRY(root,
scale::decode<std::unique_ptr<ScheduleNode>>(encoded_root));
return std::make_optional(std::move(root));
OUTCOME_TRY(root, scale::decode<ScheduleNode>(encoded_root));
return std::make_unique<ScheduleNode>(std::move(root));
}

outcome::result<void> storeScheduleGraphRoot(storage::BufferStorage &storage,
Expand Down Expand Up @@ -170,7 +169,7 @@ namespace kagome::authority {

for (auto hash = finalized_block.hash; !found_set_change;) {
auto header_res = block_tree.getBlockHeader(hash);
if (header_res.has_error()) {
if (!header_res) {
SL_ERROR(
log, "Failed to obtain the last finalized block header {}", hash);
}
Expand Down Expand Up @@ -244,10 +243,12 @@ namespace kagome::authority {
BOOST_UNREACHABLE_RETURN({})
}

AuthorityManagerImpl::~AuthorityManagerImpl() {}

bool AuthorityManagerImpl::prepare() {
const auto finalized_block = block_tree_->getLastFinalized();
auto res = initializeAt(finalized_block);
if (res.has_error()) {
if (!res) {
SL_ERROR(log_,
"Error initializing authority manager: {}",
res.error().message());
Expand All @@ -268,27 +269,31 @@ namespace kagome::authority {
block_tree_->getBlockHeader(graph_root_block.hash));

auto set_id_from_runtime_res = readSetIdFromRuntime(root_header);
auto set_id_from_runtime_opt = set_id_from_runtime_res.has_value()
? set_id_from_runtime_res.value()
: std::nullopt;

OUTCOME_TRY(opt_root, fetchScheduleGraphRoot(*persistent_storage_));
auto last_finalized_block = block_tree_->getLastFinalized();

if (opt_root
&& opt_root.value()->current_block.number
<= last_finalized_block.number) {
// TODO(Harrm): #1334
// Correction to bypass the bug where after finishing syncing
// and restarting the node we get a set id off by one
if (set_id_from_runtime_res.has_value()
if (set_id_from_runtime_opt.has_value()
&& opt_root.value()->current_authorities->id
== set_id_from_runtime_res.value() - 1) {
== set_id_from_runtime_opt.value() - 1) {
auto &authority_list =
opt_root.value()->current_authorities->authorities;
opt_root.value()->current_authorities =
std::make_shared<primitives::AuthoritySet>(
set_id_from_runtime_res.value(), authority_list);
set_id_from_runtime_opt.value(), authority_list);
}

root_ = std::move(opt_root.value());
SL_TRACE(log_,
SL_DEBUG(log_,
"Fetched authority set graph root from database with id {}",
root_->current_authorities->id);

Expand All @@ -299,7 +304,8 @@ namespace kagome::authority {
std::make_shared<primitives::AuthoritySet>(
0, std::move(initial_authorities)),
{0, genesis_hash});
} else if (set_id_from_runtime_res.has_value()){
} else if (set_id_from_runtime_res.has_value()
&& set_id_from_runtime_opt.has_value()) {
SL_WARN(
log_,
"Storage does not contain valid info about the root authority set; "
Expand All @@ -309,7 +315,7 @@ namespace kagome::authority {
grandpa_api_->authorities(graph_root_block.hash));

auto authority_set = std::make_shared<primitives::AuthoritySet>(
set_id_from_runtime_res.value(), std::move(authorities));
set_id_from_runtime_opt.value(), std::move(authorities));
root_ = authority::ScheduleNode::createAsRoot(authority_set,
graph_root_block);

Expand All @@ -319,7 +325,9 @@ namespace kagome::authority {
"storage",
root_->current_authorities->id);
} else {
SL_ERROR(log_, "Failed to initialize authority manager; Try running recovery mode");
SL_ERROR(
log_,
"Failed to initialize authority manager; Try running recovery mode");
return set_id_from_runtime_res.as_failure();
}

Expand Down Expand Up @@ -349,18 +357,39 @@ namespace kagome::authority {
return outcome::success();
}

outcome::result<AuthoritySetId> AuthorityManagerImpl::readSetIdFromRuntime(
outcome::result<std::optional<AuthoritySetId>>
AuthorityManagerImpl::readSetIdFromRuntime(
primitives::BlockHeader const &header) const {
AuthoritySetId set_id{};

auto batch_res = trie_storage_->getEphemeralBatchAt(header.state_root);
if (batch_res.has_error()) {
if (batch_res.error() == storage::DatabaseError::NOT_FOUND) {
SL_DEBUG(log_,
"Failed to fetch set id from trie storage: state {} is not in "
"the storage",
header.state_root);
return std::nullopt;
}
return batch_res.as_failure();
}

OUTCOME_TRY(hash, primitives::calculateBlockHash(header, *hasher_));
auto set_id_res = grandpa_api_->current_set_id(hash);
if (set_id_res) {
set_id = set_id_res.value();
} else {
OUTCOME_TRY(set_id_,
fetchSetIdFromTrieStorage(
*trie_storage_, *hasher_, header.state_root));
set_id = set_id_.value();
auto &batch = batch_res.value();

OUTCOME_TRY(
set_id_opt,
fetchSetIdFromTrieStorage(*batch, *hasher_, header.state_root));
if (set_id_opt) return set_id_opt.value();

SL_DEBUG(log_,
"Failed to read authority set id from runtime (attempted both "
"GrandpaApi_current_set_id and trie storage)");
return std::nullopt;
}
return set_id;
}
Expand All @@ -370,6 +399,7 @@ namespace kagome::authority {
auto genesis_hash = block_tree_->getGenesisBlockHash();

OUTCOME_TRY(initial_authorities, grandpa_api_->authorities(genesis_hash));
primitives::BlockInfo genesis_info{0, block_tree_->getGenesisBlockHash()};

root_ = ScheduleNode::createAsRoot(
std::make_shared<primitives::AuthoritySet>(0, initial_authorities),
Expand All @@ -379,19 +409,15 @@ namespace kagome::authority {
// if state is pruned
if (header_repo_->getBlockHeader(1).has_error()) {
SL_WARN(log_,
"Can't recalculate authority set id on a prune state, fall back "
"to fetching from runtime");
"Can't recalculate authority set id on a prune state, fall"
" back to fetching from runtime");
return clearScheduleGraphRoot(*persistent_storage_);
}

auto start = std::chrono::steady_clock::now();
for (primitives::BlockNumber number = 0; number <= last_finalized_number;
number++) {
auto header_res = header_repo_->getBlockHeader(number);
if (!header_res)
continue; // Temporary workaround about the justification pruning bug
auto &header = header_res.value();

OUTCOME_TRY(header, header_repo_->getBlockHeader(number));
OUTCOME_TRY(hash, header_repo_->getHashByNumber(number));
primitives::BlockInfo info{number, hash};

Expand Down Expand Up @@ -451,6 +477,9 @@ namespace kagome::authority {
"Pick authority set with id {} for block {}",
adjusted_node->current_authorities->id,
target_block);
for (auto &authority : adjusted_node->current_authorities->authorities) {
SL_TRACE(log_, "Authority {}: {}", authority.id.id, authority.weight);
}
return adjusted_node->current_authorities;
}

Expand Down Expand Up @@ -501,7 +530,7 @@ namespace kagome::authority {

size_t index = 0;
for (auto &authority : *new_authorities) {
SL_DEBUG(log_,
SL_TRACE(log_,
"New authority ({}/{}): id={} weight={}",
++index,
new_authorities->authorities.size(),
Expand Down Expand Up @@ -555,8 +584,7 @@ namespace kagome::authority {
OUTCOME_TRY(schedule_change(ancestor_node));
} else {
KAGOME_PROFILE_START(make_descendant)
auto new_node =
ancestor_node->makeDescendant(block, is_ancestor_node_finalized);
auto new_node = ancestor_node->makeDescendant(block, true);
KAGOME_PROFILE_END(make_descendant)

if (maybe_set.has_value()) {
Expand Down Expand Up @@ -633,7 +661,7 @@ namespace kagome::authority {

size_t index = 0;
for (auto &authority : *new_authorities) {
SL_DEBUG(log_,
SL_TRACE(log_,
"New authority ({}/{}): id={} weight={}",
++index,
new_authorities->authorities.size(),
Expand Down Expand Up @@ -794,7 +822,7 @@ namespace kagome::authority {
message.consensus_engine_id.toString());

OUTCOME_TRY(decoded, message.decode());
auto res = visit_in_place(
return visit_in_place(
decoded.asGrandpaDigest(),
[this, &block](
const primitives::ScheduledChange &msg) -> outcome::result<void> {
Expand All @@ -806,19 +834,20 @@ namespace kagome::authority {
block, msg.authorities, msg.delay_start, msg.subchain_length);
},
[this, &block](const primitives::OnDisabled &msg) {
SL_DEBUG(log_, "OnDisabled {}", msg.authority_index);
return applyOnDisabled(block, msg.authority_index);
},
[this, &block](const primitives::Pause &msg) {
SL_DEBUG(log_, "Pause {}", msg.subchain_length);
return applyPause(block, block.number + msg.subchain_length);
},
[this, &block](const primitives::Resume &msg) {
SL_DEBUG(log_, "Resume {}", msg.subchain_length);
return applyResume(block, block.number + msg.subchain_length);
},
[](auto &) {
return AuthorityUpdateObserverError::UNSUPPORTED_MESSAGE_TYPE;
});
return res;

} else if (message.consensus_engine_id == primitives::kBabeEngineId
or message.consensus_engine_id
== primitives::kUnsupportedEngineId_BEEF
Expand Down Expand Up @@ -886,8 +915,10 @@ namespace kagome::authority {
while (ancestor->current_block != block) {
bool goto_next_generation = false;
for (const auto &node : ancestor->descendants) {
if (node->current_block == block
|| directChainExists(node->current_block, block)) {
if (node->current_block == block) {
return node;
}
if (directChainExists(node->current_block, block)) {
ancestor = node;
goto_next_generation = true;
break;
Expand Down Expand Up @@ -987,5 +1018,4 @@ namespace kagome::authority {
ancestor->descendants.erase(it);
}
}

} // namespace kagome::authority
Loading