From 5b2244bcc0c5e35c57a929c6f1e03fbe6c5cc2a9 Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Wed, 15 Jan 2020 16:16:06 +0000 Subject: [PATCH 01/42] use status messages to send append entries and deserialise/store them in replica --- src/consensus/consensustypes.h | 1 + src/consensus/pbft/libbyz/LedgerWriter.cpp | 2 +- src/consensus/pbft/libbyz/LedgerWriter.h | 4 +- src/consensus/pbft/libbyz/Principal.h | 14 ++ src/consensus/pbft/libbyz/Replica.cpp | 228 ++++++++++++------ src/consensus/pbft/libbyz/Replica.h | 10 +- src/consensus/pbft/libbyz/State.cpp | 1 + src/consensus/pbft/libbyz/Status.cpp | 11 +- src/consensus/pbft/libbyz/Status.h | 27 ++- src/consensus/pbft/libbyz/libbyz.cpp | 2 +- src/consensus/pbft/libbyz/libbyz.h | 2 +- .../pbft/libbyz/receive_message_base.h | 4 +- .../pbft/libbyz/test/replica_main.cpp | 8 +- .../pbft/libbyz/test/replica_test.cpp | 8 +- .../pbft/libbyz/test/test_ledger_replay.cpp | 22 +- src/consensus/pbft/libbyz/types.h | 13 +- src/consensus/pbft/pbft.h | 109 ++++++++- src/consensus/pbft/pbftconfig.h | 9 +- src/consensus/pbft/pbftrequests.h | 32 ++- src/consensus/pbft/pbfttypes.h | 44 +++- src/enclave/enclavetypes.h | 5 + src/enclave/rpchandler.h | 3 +- src/host/nodeconnections.h | 48 ++-- src/kv/kv.h | 128 +++++++--- src/kv/kvtypes.h | 1 + src/node/history.h | 16 +- src/node/nodestate.h | 10 +- src/node/rpc/frontend.h | 21 +- 28 files changed, 591 insertions(+), 192 deletions(-) diff --git a/src/consensus/consensustypes.h b/src/consensus/consensustypes.h index e9ccfc39fe9e..4ae22ac052f3 100644 --- a/src/consensus/consensustypes.h +++ b/src/consensus/consensustypes.h @@ -7,4 +7,5 @@ namespace ccf { using ObjectId = uint64_t; using NodeId = ObjectId; + using Index = int64_t; } \ No newline at end of file diff --git a/src/consensus/pbft/libbyz/LedgerWriter.cpp b/src/consensus/pbft/libbyz/LedgerWriter.cpp index 1c8bd0c27fb3..c1515de2a9dd 100644 --- a/src/consensus/pbft/libbyz/LedgerWriter.cpp +++ b/src/consensus/pbft/libbyz/LedgerWriter.cpp @@ -5,7 +5,7 @@ #include "Request.h" LedgerWriter::LedgerWriter( - pbft::Store& store_, pbft::PrePreparesMap& pbft_pre_prepares_map_) : + pbft::PbftStore& store_, pbft::PrePreparesMap& pbft_pre_prepares_map_) : store(store_), pbft_pre_prepares_map(pbft_pre_prepares_map_) {} diff --git a/src/consensus/pbft/libbyz/LedgerWriter.h b/src/consensus/pbft/libbyz/LedgerWriter.h index f2c0445c546e..ad66544fccc3 100644 --- a/src/consensus/pbft/libbyz/LedgerWriter.h +++ b/src/consensus/pbft/libbyz/LedgerWriter.h @@ -14,12 +14,12 @@ class LedgerWriter { private: - pbft::Store& store; + pbft::PbftStore& store; pbft::PrePreparesMap& pbft_pre_prepares_map; public: LedgerWriter( - pbft::Store& store_, pbft::PrePreparesMap& pbft_pre_prepares_map_); + pbft::PbftStore& store_, pbft::PrePreparesMap& pbft_pre_prepares_map_); virtual ~LedgerWriter() = default; void write_prepare(const Prepared_cert& prepared_cert, Seqno seqno); void write_pre_prepare(Pre_prepare* pp); diff --git a/src/consensus/pbft/libbyz/Principal.h b/src/consensus/pbft/libbyz/Principal.h index 54c84d7007ac..e80317fea235 100644 --- a/src/consensus/pbft/libbyz/Principal.h +++ b/src/consensus/pbft/libbyz/Principal.h @@ -71,6 +71,8 @@ class Principal : public IPrincipal void set_certificate(const std::vector& cert_); bool has_certificate_set(); + Index get_last_ae_sent() const; + void set_last_ae_sent(Index ae); private: int id; @@ -85,6 +87,8 @@ class Principal : public IPrincipal ULong tstamp; // last timestamp in a new-key message from this principal Time my_tstamp; // my time when message was accepted + Index last_ae_sent = 0; // last append entry index sent to this principal + Request_id last_fetch; // Last request_id in a fetch message from this principal bool has_received_network_open_msg; @@ -140,4 +144,14 @@ inline Request_id Principal::last_fetch_rid() const inline void Principal::set_last_fetch_rid(Request_id r) { last_fetch = r; +} + +inline Index Principal::get_last_ae_sent() const +{ + return last_ae_sent; +} + +inline void Principal::set_last_ae_sent(Index ae) +{ + last_ae_sent = ae; } \ No newline at end of file diff --git a/src/consensus/pbft/libbyz/Replica.cpp b/src/consensus/pbft/libbyz/Replica.cpp index fefb6722e669..8f8f273885c4 100644 --- a/src/consensus/pbft/libbyz/Replica.cpp +++ b/src/consensus/pbft/libbyz/Replica.cpp @@ -70,7 +70,7 @@ Replica::Replica( INetwork* network, pbft::RequestsMap& pbft_requests_map_, pbft::PrePreparesMap& pbft_pre_prepares_map_, - pbft::Store& store) : + pbft::PbftStore& store) : Node(node_info), rqueue(), plog(max_out), @@ -237,16 +237,6 @@ bool Replica::compare_execution_results( { auto& pp_root = pre_prepare->get_full_state_merkle_root(); auto& r_pp_root = pre_prepare->get_replicated_state_merkle_root(); - if (!std::equal( - std::begin(pp_root), - std::end(pp_root), - std::begin(info.full_state_merkle_root))) - { - LOG_FAIL << "Full state merkle root between execution and the pre_prepare " - "message does not match, seqno:" - << pre_prepare->seqno() << std::endl; - return false; - } if (!std::equal( std::begin(r_pp_root), @@ -266,15 +256,30 @@ bool Replica::compare_execution_results( "does not match, seqno:" << pre_prepare->seqno() << ", tx_ctx:" << tx_ctx << ", info.ctx:" << info.ctx << std::endl; + // return false; + } + + if (!std::equal( + std::begin(pp_root), + std::end(pp_root), + std::begin(info.full_state_merkle_root))) + { + LOG_FAIL << "Full state merkle root between execution and the pre_prepare " + "message does not match, seqno:" + << pre_prepare->seqno() << std::endl; return false; } + return true; } void Replica::playback_request(const pbft::Request& request) { + LOG_INFO_FMT( + "Playback request for request with size {}", request.pbft_raw.size()); auto req = - create_message(request.raw.data(), request.raw.size()).release(); + create_message(request.pbft_raw.data(), request.pbft_raw.size()) + .release(); if (!brt.add_request(req)) { rqueue.append(req); @@ -283,12 +288,13 @@ void Replica::playback_request(const pbft::Request& request) void Replica::playback_pre_prepare(const pbft::PrePrepare& pre_prepare) { + LOG_INFO_FMT("playback pre-prepare {}", pre_prepare.seqno); auto executable_pp = create_message( pre_prepare.contents.data(), pre_prepare.contents.size()); auto seqno = executable_pp->seqno(); ByzInfo info; - if (execute_tentative(executable_pp.get(), info)) + if (execute_tentative(executable_pp.get(), info, true)) { if (!compare_execution_results(info, executable_pp.get())) { @@ -317,6 +323,8 @@ void Replica::playback_pre_prepare(const pbft::PrePrepare& pre_prepare) } else { + LOG_INFO_FMT( + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALALALLAND"); LOG_DEBUG << "Received entries could not be processed. Received seqno: " << seqno << ". Truncating ledger to last executed: " << last_executed @@ -529,8 +537,10 @@ void Replica::handle(Request* m) Digest rd = m->digest(); LOG_TRACE << "Received request with rid: " << m->request_id() - << " id:" << id() << " primary:" << primary() - << " with cid: " << m->client_id() + << " replier: " << m->replier() << " is_signed: " << m->is_signed() + << " is read only: " << m->is_read_only() + << " contents size: " << m->contents_size() << " id:" << id() + << " primary:" << primary() << " with cid: " << m->client_id() << " current seqno: " << next_pp_seqno << " last executed: " << last_executed << " digest: " << rd.hash() << std::endl; @@ -1083,7 +1093,28 @@ void Replica::emit_signature_on_next_pp(int64_t version) signed_version = version; } -void Replica::activate_pbft_local_hooks() +void Replica::activate_playback_local_hooks() +{ + pbft_requests_map.set_deserialise_hook( + [this](const pbft::RequestsMap::Write& w) { + for (auto& [key, value] : w) + { + append_entries_index++; + playback_request(value.value); + } + }); + + pbft_pre_prepares_map.set_deserialise_hook( + [this](const pbft::PrePreparesMap::Write& w) { + for (auto& [key, value] : w) + { + append_entries_index++; + playback_pre_prepare(value.value); + } + }); +} + +void Replica::activate_local_hooks() { pbft_requests_map.set_local_hook([this]( kv::Version version, @@ -1091,7 +1122,7 @@ void Replica::activate_pbft_local_hooks() const pbft::RequestsMap::Write& w) { for (auto& [key, value] : w) { - playback_request(value.value); + append_entries_index++; } }); @@ -1101,17 +1132,11 @@ void Replica::activate_pbft_local_hooks() const pbft::PrePreparesMap::Write& w) { for (auto& [key, value] : w) { - playback_pre_prepare(value.value); + append_entries_index++; } }); } -void Replica::deactivate_pbft_local_hooks() -{ - pbft_requests_map.set_local_hook(nullptr); - pbft_pre_prepares_map.set_local_hook(nullptr); -} - View Replica::view() const { return Node::view(); @@ -1169,6 +1194,35 @@ void Replica::handle(Status* m) return; } + LOG_INFO_FMT( + "Received status message, replica {} is at state {} ", + m->id(), + m->to_ae_index()); + auto principal = get_principal(m->id()); + principal->set_last_ae_sent(m->to_ae_index()); + if (m->to_ae_index() < append_entries_index) + { + LOG_INFO_FMT( + "I WOULD NEED TO SEND AE HERE: mine {} theirs {}", + append_entries_index, + m->to_ae_index()); + LOG_INFO_FMT( + "Sending status message with from {} to ae index {}", + m->to_ae_index(), + append_entries_index); + Status s( + v, + last_stable, + last_executed, + m->to_ae_index(), + append_entries_index, + has_complete_new_view(), + vi.has_nv_message(v)); + + s.authenticate(); + send(&s, m->id()); + } + return; // Retransmit messages that the sender is missing. if (last_stable > m->last_stable() + max_out) { @@ -1788,8 +1842,8 @@ bool Replica::execute_read_only(Request* request) int request_id = request->request_id(); std::shared_ptr cp = get_principal(client_id); ByzInfo info; - int error = - exec_command(&inb, outb, 0, client_id, request_id, true, 0, info); + int error = exec_command( + &inb, outb, 0, client_id, request_id, true, nullptr, 0, 0, info, false); right_pad_contents(outb); if (!error) @@ -1909,7 +1963,7 @@ void Replica::execute_prepared(bool committed) } } -bool Replica::execute_tentative(Pre_prepare* pp, ByzInfo& info) +bool Replica::execute_tentative(Pre_prepare* pp, ByzInfo& info, bool playback) { LOG_DEBUG << "in execute tentative: " << pp->seqno() << std::endl; if ( @@ -1970,8 +2024,11 @@ bool Replica::execute_tentative(Pre_prepare* pp, ByzInfo& info) client_id, rid, false, + (uint8_t*)request.contents(), + request.contents_size(), replies.total_requests_processed(), - info); + info, + playback); right_pad_contents(outb); // Finish constructing the reply. LOG_DEBUG << "Executed from tentative exec: " << pp->seqno() @@ -2429,23 +2486,27 @@ void Replica::mark_stable(Seqno n, bool have_state) void Replica::handle(Data* m) { - state.handle(m); + LOG_INFO_FMT("HANDLE DATA"); + // state.handle(m); } void Replica::handle(Meta_data* m) { - state.handle(m); + LOG_INFO_FMT("HANDLE META DATA"); + // state.handle(m); } void Replica::handle(Meta_data_d* m) { - state.handle(m); + LOG_INFO_FMT("HANDLE META DATA D"); + // state.handle(m); } void Replica::handle(Fetch* m) { - int mid = m->id(); - state.handle(m, last_stable); + LOG_INFO_FMT("HANDLE FETCH"); + // int mid = m->id(); + // state.handle(m, last_stable); } void Replica::send_status(bool send_now) @@ -2479,49 +2540,70 @@ void Replica::send_status(bool send_now) return; } - Status s( - v, - last_stable, - last_executed, - has_complete_new_view(), - vi.has_nv_message(v)); - - if (has_complete_new_view()) + LOG_INFO_FMT( + "Sending status message with ae index {}", append_entries_index); + for (auto& [id, principal] : *get_principals()) { - // Set prepared and committed bitmaps correctly - Seqno max = last_stable + max_out; - Seqno min = std::max(last_executed, last_stable) + 1; - for (Seqno n = min; n <= max; n++) + if (id == node_id) { - Prepared_cert& pc = plog.fetch(n); - if (pc.is_complete() || state.in_check_state()) - { - s.mark_prepared(n); - if (clog.fetch(n).is_complete() || state.in_check_state()) - { - s.mark_committed(n); - } - } - else - { - // Ask for missing big requests - if ( - !pc.is_pp_complete() && pc.pre_prepare() && pc.num_correct() >= f()) - { - s.add_breqs(n, pc.missing_reqs()); - } - } + continue; } + LOG_INFO_FMT( + "Sending to {} from {} to {}", + id, + principal->get_last_ae_sent(), + append_entries_index); + + Status s( + v, + last_stable, + last_executed, + principal->get_last_ae_sent(), + append_entries_index, + has_complete_new_view(), + vi.has_nv_message(v)); + + s.authenticate(); + send(&s, id); } - else - { - vi.set_received_vcs(&s); - vi.set_missing_pps(&s); - } - - // Multicast status to all replicas. - s.authenticate(); - send(&s, All_replicas); + return; + // if (has_complete_new_view()) + // { + // // Set prepared and committed bitmaps correctly + // Seqno max = last_stable + max_out; + // Seqno min = std::max(last_executed, last_stable) + 1; + // for (Seqno n = min; n <= max; n++) + // { + // Prepared_cert& pc = plog.fetch(n); + // if (pc.is_complete() || state.in_check_state()) + // { + // s.mark_prepared(n); + // if (clog.fetch(n).is_complete() || state.in_check_state()) + // { + // s.mark_committed(n); + // } + // } + // else + // { + // // Ask for missing big requests + // if ( + // !pc.is_pp_complete() && pc.pre_prepare() && pc.num_correct() >= + // f()) + // { + // s.add_breqs(n, pc.missing_reqs()); + // } + // } + // } + // } + // else + // { + // vi.set_received_vcs(&s); + // vi.set_missing_pps(&s); + // } + + // // Multicast status to all replicas. + // s.authenticate(); + // send(&s, All_replicas); } } @@ -3069,4 +3151,4 @@ void Replica::try_end_recovery() } int Replica::min_pre_prepare_batch_size = - Replica::min_min_pre_prepare_batch_size; + Replica::min_min_pre_prepare_batch_size; \ No newline at end of file diff --git a/src/consensus/pbft/libbyz/Replica.h b/src/consensus/pbft/libbyz/Replica.h index 0872ee6ff1af..3a8476da8223 100644 --- a/src/consensus/pbft/libbyz/Replica.h +++ b/src/consensus/pbft/libbyz/Replica.h @@ -62,7 +62,7 @@ class Replica : public Node, public IMessageReceiveBase INetwork* network, pbft::RequestsMap& pbft_requests_map_, pbft::PrePreparesMap& pbft_pre_prepares_map_, - pbft::Store& store_); + pbft::PbftStore& store_); // Requires: "mem" is vm page aligned and nbytes is a multiple of the // vm page size. // Effects: Create a new server replica using the information in @@ -134,8 +134,8 @@ class Replica : public Node, public IMessageReceiveBase size_t f() const; void set_f(ccf::NodeId f); void emit_signature_on_next_pp(int64_t version); - void activate_pbft_local_hooks(); - void deactivate_pbft_local_hooks(); + void activate_local_hooks(); + void activate_playback_local_hooks(); View view() const; bool is_primary() const; int primary() const; @@ -287,7 +287,7 @@ class Replica : public Node, public IMessageReceiveBase // Effects: Sends back replies that have been executed tentatively // to the client. The replies are tentative unless "committed" is true. - bool execute_tentative(Pre_prepare* pp, ByzInfo& info); + bool execute_tentative(Pre_prepare* pp, ByzInfo& info, bool playback = false); // Effects: Tentatively executes as many commands as possible. It // extracts requests to execute commands from a message "m"; calls // exec_command for each command; and sends back replies to the @@ -392,6 +392,8 @@ class Replica : public Node, public IMessageReceiveBase static constexpr auto num_look_back_to_set_batch_size = 10; static constexpr auto max_pre_prepare_request_batch_wait_ms = 2; + pbft::Index append_entries_index = 0; + // Logging variables used to measure average batch size int nbreqs; // The number of requests executed in current interval int nbrounds; // The number of rounds of BFT executed in current interval diff --git a/src/consensus/pbft/libbyz/State.cpp b/src/consensus/pbft/libbyz/State.cpp index 964018b93f96..825df9e5c393 100644 --- a/src/consensus/pbft/libbyz/State.cpp +++ b/src/consensus/pbft/libbyz/State.cpp @@ -661,6 +661,7 @@ void State::start_fetch(Seqno le, Seqno c, Digest* cd, bool stable) void State::send_fetch(bool change_replier) { + LOG_INFO_FMT("Sending FETCH!"); START_CC(fetch_cycles); last_fetch_t = ITimer::current_time(); diff --git a/src/consensus/pbft/libbyz/Status.cpp b/src/consensus/pbft/libbyz/Status.cpp index 28984969addc..4c59106ee022 100644 --- a/src/consensus/pbft/libbyz/Status.cpp +++ b/src/consensus/pbft/libbyz/Status.cpp @@ -12,7 +12,14 @@ #include -Status::Status(View v, Seqno ls, Seqno le, bool hnvi, bool hnvm) : +Status::Status( + View v, + Seqno ls, + Seqno le, + Index from_ae_index, + Index to_ae_index, + bool hnvi, + bool hnvm) : Message(Status_tag, Max_message_size) { rep().extra = (hnvi) ? 1 : 0; @@ -20,6 +27,8 @@ Status::Status(View v, Seqno ls, Seqno le, bool hnvi, bool hnvm) : rep().v = v; rep().ls = ls; rep().le = le; + rep().to_ae_index = to_ae_index; + rep().from_ae_index = from_ae_index; rep().id = pbft::GlobalState::get_node().id(); rep().brsz = 0; diff --git a/src/consensus/pbft/libbyz/Status.h b/src/consensus/pbft/libbyz/Status.h index 5fb7f2027f07..c62b6cf61c4b 100644 --- a/src/consensus/pbft/libbyz/Status.h +++ b/src/consensus/pbft/libbyz/Status.h @@ -38,6 +38,8 @@ struct Status_rep : public Message_rep View v; // Replica's current view Seqno ls; // seqno of last stable checkpoint Seqno le; // seqno of last request executed + Index to_ae_index; // index of ledger append entries + Index from_ae_index; // index of ledger append entries int id; // id of the replica that generated the message. short sz; // size of prepared and committed or pps short brsz; // size of breqs @@ -73,7 +75,14 @@ class Status : public Message // Status messages // public: - Status(View v, Seqno ls, Seqno le, bool hnvi, bool hnvm); + Status( + View v, + Seqno ls, + Seqno le, + Index from_ae_index, + Index to_ae_index, + bool hnvi, + bool hnvm); // Effects: Creates a new unauthenticated Status message. "v" // should be the sending replica's current view, "ls" should be the // sequence number of the last stable checkpoint, "le" the sequence @@ -138,6 +147,12 @@ class Status : public Message Seqno last_executed() const; // Effects: Returns seqno of last request executed by principal id(). + Index to_ae_index() const; + // Effects: Returns the index of the latest entry appended into the ledger + + Index from_ae_index() const; + // Effects: Returns the index of the latest entry appended into the ledger + // // Observers when has_nv_info() // @@ -341,6 +356,16 @@ inline Seqno Status::last_executed() const return rep().le; } +inline Index Status::to_ae_index() const +{ + return rep().to_ae_index; +} + +inline Index Status::from_ae_index() const +{ + return rep().from_ae_index; +} + inline bool Status::is_prepared(Seqno n) { PBFT_ASSERT(has_nv_info(), "Invalid state"); diff --git a/src/consensus/pbft/libbyz/libbyz.cpp b/src/consensus/pbft/libbyz/libbyz.cpp index f9efe7287b24..b983bc88facf 100644 --- a/src/consensus/pbft/libbyz/libbyz.cpp +++ b/src/consensus/pbft/libbyz/libbyz.cpp @@ -115,7 +115,7 @@ int Byz_init_replica( INetwork* network, pbft::RequestsMap& pbft_requests_map, pbft::PrePreparesMap& pbft_pre_prepares_map, - pbft::Store& store, + pbft::PbftStore& store, IMessageReceiveBase** message_receiver) { // Initialize random number generator diff --git a/src/consensus/pbft/libbyz/libbyz.h b/src/consensus/pbft/libbyz/libbyz.h index 8a0281f0e83f..24e976230f46 100644 --- a/src/consensus/pbft/libbyz/libbyz.h +++ b/src/consensus/pbft/libbyz/libbyz.h @@ -104,7 +104,7 @@ int Byz_init_replica( INetwork* network, pbft::RequestsMap& pbft_requests_map, pbft::PrePreparesMap& pbft_pre_prepares_map, - pbft::Store& store_, + pbft::PbftStore& store_, IMessageReceiveBase** message_receiver = nullptr); /* Requires: "mem" is vm page aligned and "size" is a multiple of the vm page size. diff --git a/src/consensus/pbft/libbyz/receive_message_base.h b/src/consensus/pbft/libbyz/receive_message_base.h index 1a35ac1e061e..a277e740f542 100644 --- a/src/consensus/pbft/libbyz/receive_message_base.h +++ b/src/consensus/pbft/libbyz/receive_message_base.h @@ -31,8 +31,8 @@ class IMessageReceiveBase virtual Seqno get_last_executed() const = 0; virtual int my_id() const = 0; virtual void emit_signature_on_next_pp(int64_t version) = 0; - virtual void activate_pbft_local_hooks() = 0; - virtual void deactivate_pbft_local_hooks() = 0; + virtual void activate_playback_local_hooks() = 0; + virtual void activate_local_hooks() = 0; virtual char* create_response_message( int client_id, Request_id rid, uint32_t size) = 0; }; diff --git a/src/consensus/pbft/libbyz/test/replica_main.cpp b/src/consensus/pbft/libbyz/test/replica_main.cpp index 378232cfc17d..58db10452519 100644 --- a/src/consensus/pbft/libbyz/test/replica_main.cpp +++ b/src/consensus/pbft/libbyz/test/replica_main.cpp @@ -62,8 +62,11 @@ ExecCommand exec_command = []( int client, Request_id rid, bool ro, + uint8_t* req_start, + size_t req_size, Seqno n, - ByzInfo& info) { + ByzInfo& info, + bool playback) { outb.contents = message_receiver->create_response_message(client, rid, 8); Long& counter = *(Long*)service_mem; @@ -218,7 +221,8 @@ int main(int argc, char** argv) pbft::Tables::PBFT_REQUESTS, kv::SecurityDomain::PUBLIC); auto& pbft_pre_prepares_map = store->create( pbft::Tables::PBFT_PRE_PREPARES, kv::SecurityDomain::PUBLIC); - auto replica_store = std::make_unique>(store); + auto replica_store = + std::make_unique>(store); int used_bytes = Byz_init_replica( node_info, diff --git a/src/consensus/pbft/libbyz/test/replica_test.cpp b/src/consensus/pbft/libbyz/test/replica_test.cpp index 3e969ccded52..504ab7295469 100644 --- a/src/consensus/pbft/libbyz/test/replica_test.cpp +++ b/src/consensus/pbft/libbyz/test/replica_test.cpp @@ -192,8 +192,11 @@ ExecCommand exec_command = []( int client, Request_id rid, bool ro, + uint8_t* req_start, + size_t req_size, Seqno total_requests_executed, - ByzInfo& info) { + ByzInfo& info, + bool playback) { outb.contents = message_receive_base->create_response_message(client, rid, 8); Long& counter = *(Long*)service_mem; @@ -396,7 +399,8 @@ int main(int argc, char** argv) pbft::Tables::PBFT_REQUESTS, kv::SecurityDomain::PUBLIC); auto& pbft_pre_prepares_map = store->create( pbft::Tables::PBFT_PRE_PREPARES, kv::SecurityDomain::PUBLIC); - auto replica_store = std::make_unique>(store); + auto replica_store = + std::make_unique>(store); int used_bytes = Byz_init_replica( node_info, diff --git a/src/consensus/pbft/libbyz/test/test_ledger_replay.cpp b/src/consensus/pbft/libbyz/test/test_ledger_replay.cpp index 57f1702f0cf8..11a34bd48dbe 100644 --- a/src/consensus/pbft/libbyz/test/test_ledger_replay.cpp +++ b/src/consensus/pbft/libbyz/test/test_ledger_replay.cpp @@ -40,8 +40,11 @@ class ExecutionMock int client, Request_id rid, bool ro, + uint8_t* req_start, + size_t req_size, Seqno total_requests_executed, - ByzInfo& info) { + ByzInfo& info, + bool playback) { // increase total number of commands executed to compare with fake_req command_counter++; @@ -81,7 +84,7 @@ NodeInfo get_node_info() void create_replica( std::vector& service_mem, - pbft::Store& store, + pbft::PbftStore& store, pbft::RequestsMap& pbft_requests_map, pbft::PrePreparesMap& pbft_pre_prepares_map) { @@ -125,7 +128,8 @@ TEST_CASE("Test Ledger Replay") "derived_map", kv::SecurityDomain::PUBLIC); auto write_pbft_store = - std::make_unique>(write_store); + std::make_unique>( + write_store); int mem_size = 400 * 8192; std::vector service_mem(mem_size, 0); @@ -155,11 +159,16 @@ TEST_CASE("Test Ledger Replay") ccf::Store::Tx tx; auto req_view = tx.get_view(write_pbft_requests_map); + + int command_size; + auto command_start = request->command(command_size); + req_view->put( 0, {0, 0, {}, + {command_start, command_start + command_size}, {(const uint8_t*)request->contents(), (const uint8_t*)request->contents() + request->size()}}); @@ -187,7 +196,10 @@ TEST_CASE("Test Ledger Replay") pbft::Tables::PBFT_PRE_PREPARES, kv::SecurityDomain::PUBLIC); auto& derived_map = store->create( "derived_map", kv::SecurityDomain::PUBLIC); - auto replica_store = std::make_unique>(store); + + auto replica_store = + std::make_unique>( + store); int mem_size = 400 * 8192; std::vector service_mem(mem_size, 0); @@ -196,7 +208,7 @@ TEST_CASE("Test Ledger Replay") create_replica( service_mem, *replica_store, pbft_requests_map, pbft_pre_prepares_map); pbft::GlobalState::get_replica().register_exec(exec_mock.exec_command); - pbft::GlobalState::get_replica().activate_pbft_local_hooks(); + pbft::GlobalState::get_replica().activate_playback_local_hooks(); // ledgerenclave work std::vector> entries; while (true) diff --git a/src/consensus/pbft/libbyz/types.h b/src/consensus/pbft/libbyz/types.h index 9c9fea45f7d0..e1bce784430e 100644 --- a/src/consensus/pbft/libbyz/types.h +++ b/src/consensus/pbft/libbyz/types.h @@ -18,6 +18,7 @@ using Long = int64_t; using ULong = uint64_t; +using Index = Long; using Seqno = Long; using View = Long; using Request_id = ULong; @@ -51,4 +52,14 @@ struct ByzInfo }; using ExecCommand = std::function; \ No newline at end of file + Byz_req*, + Byz_rep&, + Byz_buffer*, + int, + Request_id, + bool, + uint8_t* req_start, + size_t req_size, + Seqno, + ByzInfo&, + bool)>; \ No newline at end of file diff --git a/src/consensus/pbft/pbft.h b/src/consensus/pbft/pbft.h index aec170c47a5c..29ca96718b75 100644 --- a/src/consensus/pbft/pbft.h +++ b/src/consensus/pbft/pbft.h @@ -5,6 +5,7 @@ #include "consensus/ledgerenclave.h" #include "consensus/pbft/libbyz/Big_req_table.h" #include "consensus/pbft/libbyz/Client_proxy.h" +#include "consensus/pbft/libbyz/Status.h" #include "consensus/pbft/libbyz/libbyz.h" #include "consensus/pbft/libbyz/network.h" #include "consensus/pbft/libbyz/receive_message_base.h" @@ -73,6 +74,21 @@ namespace pbft n2n_channels->send_authenticated( ccf::NodeMsgType::consensus_msg, to, serialized_msg); + if (msg->tag() == 7) + { + Status* status; + Status::convert(msg, status); + + AppendEntries ae = { + pbft_append_entries, + id, + status->to_ae_index(), + status->from_ae_index(), + }; + n2n_channels->send_authenticated( + ccf::NodeMsgType::consensus_msg, to, ae); + return msg->size(); + } return msg->size(); } @@ -106,9 +122,13 @@ namespace pbft std::shared_ptr rpcsessions; SeqNo global_commit_seqno; View last_commit_view; - std::unique_ptr store; + std::unique_ptr store; std::unique_ptr ledger; + // When this is set, only public domain is deserialised when receving append + // entries + bool public_only = false; + struct view_change_info { view_change_info(View view_, SeqNo min_global_commit_) : @@ -124,7 +144,7 @@ namespace pbft struct register_global_commit_info { - pbft::Store* store; + pbft::PbftStore* store; SeqNo* global_commit_seqno; View* last_commit_view; std::vector* view_change_list; @@ -132,7 +152,7 @@ namespace pbft public: Pbft( - std::unique_ptr store_, + std::unique_ptr store_, std::shared_ptr channels_, NodeId id, size_t sig_max_tx, @@ -197,6 +217,9 @@ namespace pbft &message_receiver_base); LOG_INFO_FMT("PBFT setup for local_id: {}", local_id); + message_receiver_base->activate_local_hooks(); + message_receiver_base->activate_playback_local_hooks(); + pbft_config->set_service_mem(mem + used_bytes); pbft_config->set_receiver(message_receiver_base); pbft_network->set_receiver(message_receiver_base); @@ -243,7 +266,7 @@ namespace pbft bool on_request(const kv::TxHistory::RequestCallbackArgs& args) override { pbft::Request request = { - args.actor, args.caller_id, args.caller_cert, args.request}; + args.actor, args.caller_id, args.caller_cert, args.request, {}}; auto serialized_req = request.serialise(); auto rep_cb = [&]( @@ -370,11 +393,85 @@ namespace pbft switch (serialized::peek(data, size)) { case pbft_message: + { serialized::skip(data, size, sizeof(PbftHeader)); message_receiver_base->receive_message(data, size); break; - default: - {} + } + case pbft_append_entries: + { + AppendEntries r; + message_receiver_base->activate_playback_local_hooks(); + + try + { + r = + channels->template recv_authenticated(data, size); + } + catch (const std::logic_error& err) + { + LOG_FAIL_FMT(err.what()); + return; + } + + // TODO maybe check with our AE where we are and if we want to take + // the message could potentially include a term history just like raft + // and also send the view at least skip the entries that we have? + + for (Index i = r.prev_idx + 1; i <= r.idx; i++) + { + LOG_INFO_FMT( + "RECORDING ENTRY FOR INDEX {} FOR DATA WITH SIZE {}", i, size); + // if (i <= last_idx) + // { + // // If the current entry has already been deserialised, skip the + // // payload for that entry + // ledger->skip_entry(data, size); + // continue; + // } + Index last_idx = i; + auto ret = ledger->record_entry(data, size); + + if (!ret.second) + { + // NB: This will currently never be triggered. + // This should only fail if there is malformed data. Truncate + // the log and reply false. + LOG_FAIL_FMT( + "Recv append entries to {} from {} but the data is malformed", + local_id, + r.from_node); + + last_idx = r.prev_idx; + ledger->truncate(r.prev_idx); + return; + } + + auto deserialise_success = + store->deserialise(ret.first, public_only, false); + + switch (deserialise_success) + { + case kv::DeserialiseSuccess::FAILED: + { + throw std::logic_error( + "Replica failed to apply log entry " + std::to_string(i)); + break; + } + case kv::DeserialiseSuccess::PASS: + { + break; + } + case kv::DeserialiseSuccess::PASS_SIGNATURE: + { + throw std::logic_error( + "Received a history signature while running with PBFT!"); + break; + } + } + } + break; + } } } diff --git a/src/consensus/pbft/pbftconfig.h b/src/consensus/pbft/pbftconfig.h index bae655843006..403208f24840 100644 --- a/src/consensus/pbft/pbftconfig.h +++ b/src/consensus/pbft/pbftconfig.h @@ -53,8 +53,11 @@ namespace pbft int client, Request_id rid, bool ro, + uint8_t* req_start, + size_t req_size, Seqno total_requests_executed, - ByzInfo& info) { + ByzInfo& info, + bool playback) { pbft::Request request; request.deserialise({inb->contents, inb->contents + inb->size}); @@ -75,7 +78,9 @@ namespace pbft const auto n = ctx.method.find_last_of('/'); ctx.method = ctx.method.substr(n + 1, ctx.method.size()); - auto rep = frontend->process_pbft(ctx); + ctx.pbft_raw = {req_start, req_start + req_size}; + + auto rep = frontend->process_pbft(ctx, playback); static_assert( sizeof(info.full_state_merkle_root) == sizeof(crypto::Sha256Hash)); diff --git a/src/consensus/pbft/pbftrequests.h b/src/consensus/pbft/pbftrequests.h index e59225ce088d..a876ac74f2fd 100644 --- a/src/consensus/pbft/pbftrequests.h +++ b/src/consensus/pbft/pbftrequests.h @@ -16,14 +16,15 @@ namespace pbft uint64_t caller_id; std::vector caller_cert; std::vector raw; + std::vector pbft_raw; - MSGPACK_DEFINE(actor, caller_id, caller_cert, raw); + MSGPACK_DEFINE(actor, caller_id, caller_cert, raw, pbft_raw); std::vector serialise() { bool include_caller = false; - size_t size = - sizeof(actor) + sizeof(caller_id) + sizeof(bool) + raw.size(); + size_t size = sizeof(actor) + sizeof(caller_id) + sizeof(bool) + + sizeof(size_t) + raw.size() + sizeof(size_t) + pbft_raw.size(); if (!caller_cert.empty()) { size += sizeof(size_t) + caller_cert.size(); @@ -41,8 +42,13 @@ namespace pbft serialized::write(data_, size_, caller_cert.size()); serialized::write(data_, size_, caller_cert.data(), caller_cert.size()); } + serialized::write(data_, size_, raw.size()); serialized::write(data_, size_, raw.data(), raw.size()); - + serialized::write(data_, size_, pbft_raw.size()); + if (!pbft_raw.empty()) + { + serialized::write(data_, size_, pbft_raw.data(), pbft_raw.size()); + } return serialized_req; } @@ -59,12 +65,26 @@ namespace pbft auto caller_size = serialized::read(data_, size_); caller_cert = serialized::read(data_, size_, caller_size); } - raw = serialized::read(data_, size_, size_); + auto raw_size = serialized::read(data_, size_); + raw = serialized::read(data_, size_, raw_size); + LOG_INFO_FMT("getting pbft raw size"); + auto pbft_raw_size = serialized::read(data_, size_); + LOG_INFO_FMT("pbft raw size {}", pbft_raw_size); + if (pbft_raw_size > 0) + { + LOG_INFO_FMT("hmmmm"); + pbft_raw = serialized::read(data_, size_, pbft_raw_size); + } + else + { + pbft_raw = {}; + } } }; DECLARE_JSON_TYPE(Request); - DECLARE_JSON_REQUIRED_FIELDS(Request, actor, caller_id, caller_cert, raw); + DECLARE_JSON_REQUIRED_FIELDS( + Request, actor, caller_id, caller_cert, raw, pbft_raw); // size_t is used as the key of the table. This key will always be 0 since we // don't want to store the requests in the kv over time, we just want to get diff --git a/src/consensus/pbft/pbfttypes.h b/src/consensus/pbft/pbfttypes.h index a266df85c829..e34322746a93 100644 --- a/src/consensus/pbft/pbfttypes.h +++ b/src/consensus/pbft/pbfttypes.h @@ -13,10 +13,12 @@ namespace pbft using NodeId = uint64_t; using Node2NodeMsg = uint64_t; using CallerId = uint64_t; + using RequestId = uint64_t; enum PbftMsgType : Node2NodeMsg { pbft_message = 1000, + pbft_append_entries }; #pragma pack(push, 1) @@ -25,22 +27,34 @@ namespace pbft PbftMsgType msg; NodeId from_node; }; + + struct AppendEntries : PbftHeader + { + Index idx; + Index prev_idx; + }; + #pragma pack(pop) + template class Store { public: virtual ~Store() {} + virtual S deserialise( + const std::vector& data, + bool public_only = false, + bool commit = true, + Term* term = nullptr) = 0; virtual void compact(Index v) = 0; - virtual void rollback(Index v) = 0; virtual kv::Version current_version() = 0; virtual void commit_pre_prepare( const pbft::PrePrepare& pp, pbft::PrePreparesMap& pbft_pre_prepares_map) = 0; }; - template - class Adaptor : public pbft::Store + template + class Adaptor : public pbft::Store { private: std::weak_ptr x; @@ -48,6 +62,19 @@ namespace pbft public: Adaptor(std::shared_ptr x) : x(x) {} + S deserialise( + const std::vector& data, + bool public_only = false, + bool commit = true, + Term* term = nullptr) + { + auto p = x.lock(); + if (p) + return p->deserialise(data, public_only, term); + + return S::FAILED; + } + void commit_pre_prepare( const pbft::PrePrepare& pp, pbft::PrePreparesMap& pbft_pre_prepares_map) { @@ -84,15 +111,6 @@ namespace pbft } } - void rollback(Index v) - { - auto p = x.lock(); - if (p) - { - p->rollback(v); - } - } - kv::Version current_version() { auto p = x.lock(); @@ -103,4 +121,6 @@ namespace pbft return kv::NoVersion; } }; + + using PbftStore = pbft::Store; } \ No newline at end of file diff --git a/src/enclave/enclavetypes.h b/src/enclave/enclavetypes.h index 0d7c1fff15eb..4ed393379b8e 100644 --- a/src/enclave/enclavetypes.h +++ b/src/enclave/enclavetypes.h @@ -60,6 +60,11 @@ namespace enclave // TODO: Avoid unnecessary copies std::vector raw = {}; + // raw pbft Request + // TODO see if you can get raw from pbft_raw and if raw is used in raft or + // not + std::vector pbft_raw = {}; + nlohmann::json unpacked_rpc = {}; std::optional signed_request = std::nullopt; diff --git a/src/enclave/rpchandler.h b/src/enclave/rpchandler.h index cc169b5c9a56..b7abd00d6c07 100644 --- a/src/enclave/rpchandler.h +++ b/src/enclave/rpchandler.h @@ -37,6 +37,7 @@ namespace enclave kv::Version version; }; - virtual ProcessPbftResp process_pbft(RPCContext& ctx) = 0; + virtual ProcessPbftResp process_pbft( + RPCContext& ctx, bool playback = false) = 0; }; } diff --git a/src/host/nodeconnections.h b/src/host/nodeconnections.h index e5f6938f702b..7743318303ae 100644 --- a/src/host/nodeconnections.h +++ b/src/host/nodeconnections.h @@ -2,6 +2,8 @@ // Licensed under the Apache 2.0 License. #pragma once +#include "consensus/consensustypes.h" +#include "consensus/pbft/pbfttypes.h" #include "consensus/raft/rafttypes.h" #include "ledger.h" #include "node/nodetypes.h" @@ -207,44 +209,60 @@ namespace asynchost auto to = serialized::read(data, size); auto node = find(to, true); + LOG_INFO_FMT("NNNAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"); + if (!node) return; auto data_to_send = data; auto size_to_send = size; - // If the message is a raft append entries message, affix the + // If the message is a consensus append entries message, affix the // corresponding ledger entries if ( serialized::read(data, size) == - ccf::NodeMsgType::consensus_msg && - serialized::peek(data, size) == - raft::raft_append_entries) + ccf::NodeMsgType::consensus_msg && + serialized::peek(data, size) == + raft::raft_append_entries || + serialized::peek(data, size) == + pbft::pbft_append_entries) { // Parse the indices to be sent to the recipient. auto p = data; auto psize = size; - const auto& ae = serialized::overlay(p, psize); + + ccf::Index idx; + ccf::Index prev_idx; + if ( + serialized::peek(data, size) == + raft::raft_append_entries) + { + const auto& ae = + serialized::overlay(p, psize); + idx = ae.idx; + prev_idx = ae.prev_idx; + } + else + { + const auto& ae = + serialized::overlay(p, psize); + idx = ae.idx; + prev_idx = ae.prev_idx; + } // Find the total frame size, and write it along with the header. - auto count = ae.idx - ae.prev_idx; + auto count = idx - prev_idx; uint32_t frame = (uint32_t)( - size_to_send + - ledger.framed_entries_size(ae.prev_idx + 1, ae.idx)); + size_to_send + ledger.framed_entries_size(prev_idx + 1, idx)); LOG_DEBUG_FMT( - "raft send AE to {} [{}]: {}, {}", - to, - frame, - ae.idx, - ae.prev_idx); + "send AE to {} [{}]: {}, {}", to, frame, idx, prev_idx); // TODO(#performance): writev node.value()->write(sizeof(uint32_t), (uint8_t*)&frame); node.value()->write(size_to_send, data_to_send); - auto framed_entries = - ledger.read_framed_entries(ae.prev_idx + 1, ae.idx); + auto framed_entries = ledger.read_framed_entries(prev_idx + 1, idx); frame = (uint32_t)framed_entries.size(); node.value()->write(frame, framed_entries.data()); } diff --git a/src/kv/kv.h b/src/kv/kv.h index cbc991bc0552..8f318fc8c65b 100644 --- a/src/kv/kv.h +++ b/src/kv/kv.h @@ -80,6 +80,8 @@ namespace kv using Write = std::unordered_map; /// Signature for transaction commit handlers using CommitHook = std::function; + /// Signature for transaction deserialisation handlers + using DeserialiseHook = std::function; private: using This = Map; @@ -98,6 +100,7 @@ namespace kv std::unique_ptr roll; CommitHook local_hook; CommitHook global_hook; + DeserialiseHook deserialise_hook; LocalCommits commit_deltas; SpinLock sl; const SecurityDomain security_domain; @@ -109,7 +112,8 @@ namespace kv SecurityDomain security_domain_, bool replicated_, CommitHook local_hook_, - CommitHook global_hook_) : + CommitHook global_hook_, + DeserialiseHook deserialise_hook_) : store(store_), name(name_), roll(std::make_unique()), @@ -117,7 +121,8 @@ namespace kv security_domain(security_domain_), replicated(replicated_), local_hook(local_hook_), - global_hook(global_hook_) + global_hook(global_hook_), + deserialise_hook(deserialise_hook_) { roll->push_back({0, State(), Write()}); } @@ -133,7 +138,7 @@ namespace kv throw std::logic_error("Failed to cast store in Map clone"); return new Map( - store_, name, security_domain, replicated, nullptr, nullptr); + store_, name, security_domain, replicated, nullptr, nullptr, nullptr); } /** Get the name of the map @@ -174,6 +179,16 @@ namespace kv global_hook = hook; } + /** Set handler to be called on map deserialisation + * + * @param hook function to be called on map deserialisation + */ + void set_deserialise_hook(DeserialiseHook hook) + { + std::lock_guard guard(sl); + deserialise_hook = hook; + } + /** Get security domain of a Map * * @return Security domain of the map (affects serialisation) @@ -695,6 +710,13 @@ namespace kv writes[r] = {NoVersion, V()}; } + // add deserialise hook + if (map.deserialise_hook) + { + LOG_INFO_FMT("Calling deserialise hook!"); + map.deserialise_hook(writes); + } + return true; } @@ -1376,10 +1398,11 @@ namespace kv std::string name, SecurityDomain security_domain = kv::SecurityDomain::PRIVATE, typename Map::CommitHook local_hook = nullptr, - typename Map::CommitHook global_hook = nullptr) + typename Map::CommitHook global_hook = nullptr, + typename Map::DeserialiseHook deserialise_hook = nullptr) { return create>( - name, security_domain, local_hook, global_hook); + name, security_domain, local_hook, global_hook, deserialise_hook); } /** Create a Map @@ -1397,7 +1420,8 @@ namespace kv std::string name, SecurityDomain security_domain = kv::SecurityDomain::PRIVATE, typename M::CommitHook local_hook = nullptr, - typename M::CommitHook global_hook = nullptr) + typename M::CommitHook global_hook = nullptr, + typename M::DeserialiseHook deserialise_hook = nullptr) { std::lock_guard mguard(maps_lock); @@ -1417,8 +1441,14 @@ namespace kv } } - auto result = - new M(this, name, security_domain, replicated, local_hook, global_hook); + auto result = new M( + this, + name, + security_domain, + replicated, + local_hook, + global_hook, + deserialise_hook); maps[name] = std::unique_ptr>(result); return *result; } @@ -1491,6 +1521,7 @@ namespace kv DeserialiseSuccess deserialise( const std::vector& data, bool public_only = false, + bool commit = true, Term* term = nullptr) override { // This will return FAILED if the serialised transaction is being @@ -1620,45 +1651,68 @@ namespace kv } } - auto c = Tx::commit(views, [v]() { return v; }); - if (!c.has_value()) - { - LOG_FAIL_FMT("Failed to commit deserialised Tx at version {}", v); - return DeserialiseSuccess::FAILED; - } - - { - std::lock_guard vguard(version_lock); - version = v; - last_replicated = v; - } - auto success = DeserialiseSuccess::PASS; - auto h = get_history(); - if (h) + if (commit) { - // TODO: the reference to the entity should be in the history - auto search = views.find("ccf.signatures"); - if (search != views.end()) + auto c = Tx::commit(views, [v]() { return v; }); + if (!c.has_value()) + { + LOG_FAIL_FMT("Failed to commit deserialised Tx at version {}", v); + return DeserialiseSuccess::FAILED; + } { - // Transactions containing a signature must only contain - // a signature and must be verified - if (views.size() > 1) + std::lock_guard vguard(version_lock); + version = v; + last_replicated = v; + } + + auto h = get_history(); + if (h) + { + // TODO: the reference to the entity should be in the history + auto search = views.find("ccf.signatures"); + if (search != views.end()) { - LOG_FAIL_FMT("Unexpected contents in signature transaction {}", v); - return DeserialiseSuccess::FAILED; + // Transactions containing a signature must only contain + // a signature and must be verified + if (views.size() > 1) + { + LOG_FAIL_FMT( + "Unexpected contents in signature transaction {}", v); + return DeserialiseSuccess::FAILED; + } + + if (!h->verify(term)) + { + LOG_FAIL_FMT("Signature in transaction {} failed to verify", v); + return DeserialiseSuccess::FAILED; + } + success = DeserialiseSuccess::PASS_SIGNATURE; } + auto rep = frame::replicated(data.data()); + h->append(rep.p, rep.n, data.data(), data.size()); + } + } + // else + // { + // mguard.~lock_guard(); + // rollback(version); + // } - if (!h->verify(term)) + else + { + auto search = views.find("ccf.pbft.requests"); + if (search != views.end()) + { + if (search->second.view->has_writes()) { - LOG_FAIL_FMT("Signature in transaction {} failed to verify", v); - return DeserialiseSuccess::FAILED; + // auto ws = search->second.view->get(0); + // search->second.view->lock(); + // search->second.view->post_commit(); + // search->second.map->unlock(); } - success = DeserialiseSuccess::PASS_SIGNATURE; } - auto rep = frame::replicated(data.data()); - h->append(rep.p, rep.n, data.data(), data.size()); } return success; diff --git a/src/kv/kvtypes.h b/src/kv/kvtypes.h index 287db5018e1e..1c1aea710512 100644 --- a/src/kv/kvtypes.h +++ b/src/kv/kvtypes.h @@ -334,6 +334,7 @@ namespace kv virtual DeserialiseSuccess deserialise( const std::vector& data, bool public_only = false, + bool commit = true, Term* term = nullptr) = 0; virtual void compact(Version v) = 0; virtual void rollback(Version v) = 0; diff --git a/src/node/history.h b/src/node/history.h index 9f94b25198c0..d402f3244469 100644 --- a/src/node/history.h +++ b/src/node/history.h @@ -460,9 +460,9 @@ namespace ccf const uint8_t* all_data, size_t all_data_size) override { - crypto::Sha256Hash h({{all_data, all_data_size}}); - log_hash(h, APPEND); - full_state_tree.append(h); + // crypto::Sha256Hash h({{all_data, all_data_size}}); + // log_hash(h, APPEND); + // full_state_tree.append(h); if (is_replicated_tree_enabled()) { @@ -502,8 +502,8 @@ namespace ccf void rollback(kv::Version v) override { - full_state_tree.retract(v); - log_hash(full_state_tree.get_root(), ROLLBACK); + // full_state_tree.retract(v); + // log_hash(full_state_tree.get_root(), ROLLBACK); if (is_replicated_tree_enabled()) { @@ -514,9 +514,9 @@ namespace ccf void compact(kv::Version v) override { - if (v > MAX_HISTORY_LEN) - full_state_tree.flush(v - MAX_HISTORY_LEN); - log_hash(full_state_tree.get_root(), COMPACT); + // if (v > MAX_HISTORY_LEN) + // full_state_tree.flush(v - MAX_HISTORY_LEN); + // log_hash(full_state_tree.get_root(), COMPACT); if (is_replicated_tree_enabled()) { diff --git a/src/node/nodestate.h b/src/node/nodestate.h index d96d9d30f1e2..e6370dabafdf 100644 --- a/src/node/nodestate.h +++ b/src/node/nodestate.h @@ -1419,8 +1419,16 @@ namespace ccf void setup_pbft(const CCFConfig& config) { setup_n2n_channels(); + + network.pbft_requests_map.set_deserialise_hook(nullptr); + network.pbft_pre_prepares_map.set_deserialise_hook(nullptr); + + network.pbft_requests_map.set_local_hook(nullptr); + network.pbft_pre_prepares_map.set_local_hook(nullptr); + consensus = std::make_shared( - std::make_unique>(network.tables), + std::make_unique>( + network.tables), n2n_channels, self, config.signature_intervals.sig_max_tx, diff --git a/src/node/rpc/frontend.h b/src/node/rpc/frontend.h index e07d010bd590..17cbda23fb80 100644 --- a/src/node/rpc/frontend.h +++ b/src/node/rpc/frontend.h @@ -779,7 +779,8 @@ namespace ccf * * @param ctx Context for this RPC */ - ProcessPbftResp process_pbft(enclave::RPCContext& ctx) override + ProcessPbftResp process_pbft( + enclave::RPCContext& ctx, bool playback = false) override { // TODO(#PBFT): Refactor this with process_forwarded(). Store::Tx tx; @@ -813,13 +814,17 @@ namespace ccf history->register_on_result(cb); - auto req_view = tx.get_view(*pbft_requests_map); - req_view->put( - 0, - {ctx.actor, - ctx.session.fwd.value().caller_id, - ctx.session.caller_cert, - ctx.raw}); + if (!playback) + { + auto req_view = tx.get_view(*pbft_requests_map); + req_view->put( + 0, + {ctx.actor, + ctx.session.fwd.value().caller_id, + ctx.session.caller_cert, + ctx.raw, + ctx.pbft_raw}); + } auto rep = process_json(ctx, tx, ctx.session.fwd->caller_id); From 20f8283b57e5ce6fb530dbabaf671c56d7e5f47b Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Thu, 16 Jan 2020 13:31:39 +0000 Subject: [PATCH 02/42] deserialise but don't commit --- src/consensus/pbft/libbyz/Replica.cpp | 102 +++++++---- src/consensus/pbft/libbyz/Replica.h | 16 +- .../pbft/libbyz/receive_message_base.h | 2 +- .../pbft/libbyz/test/test_ledger_replay.cpp | 1 - src/consensus/pbft/pbft.h | 9 +- src/consensus/pbft/pbfttypes.h | 12 +- src/enclave/rpchandler.h | 2 + src/kv/kv.h | 163 ++++++++---------- src/kv/kvtypes.h | 1 - src/node/nodestate.h | 3 - src/node/rpc/frontend.h | 8 +- 11 files changed, 176 insertions(+), 143 deletions(-) diff --git a/src/consensus/pbft/libbyz/Replica.cpp b/src/consensus/pbft/libbyz/Replica.cpp index 8f8f273885c4..cc30c25efcbd 100644 --- a/src/consensus/pbft/libbyz/Replica.cpp +++ b/src/consensus/pbft/libbyz/Replica.cpp @@ -273,6 +273,32 @@ bool Replica::compare_execution_results( return true; } +void Replica::playback_transaction(ccf::Store::Tx& tx) +{ + { + auto view = tx.get_view(pbft_requests_map); + auto req = view->get(0); + if (req.has_value()) + { + const pbft::Request request = req.value(); + playback_request(request); + return; + } + } + { + auto view = tx.get_view(pbft_pre_prepares_map); + auto pp = view->get(0); + if (pp.has_value()) + { + const pbft::PrePrepare pre_prepare = pp.value(); + playback_pre_prepare(pre_prepare); + return; + } + } + throw std::logic_error( + "Playback transaction doesn't contain request nor pre prepare data."); +} + void Replica::playback_request(const pbft::Request& request) { LOG_INFO_FMT( @@ -280,10 +306,47 @@ void Replica::playback_request(const pbft::Request& request) auto req = create_message(request.pbft_raw.data(), request.pbft_raw.size()) .release(); - if (!brt.add_request(req)) - { - rqueue.append(req); - } + + // TODO refactor with execute tentative method + + last_tentative_execute = last_tentative_execute + 1; + LOG_TRACE << "in execute tentative with last_tentative_execute: " + << last_tentative_execute << " and last_executed: " << last_executed + << std::endl; + + int client_id = req->client_id(); + + // Obtain "in" and "out" buffers to call exec_command + Byz_req inb; + Byz_rep outb; + Byz_buffer non_det; + inb.contents = req->command(inb.size); + + // TODO do we need this? + // non_det.contents = pp->choices(non_det.size); + Request_id rid = req->request_id(); + // Execute command in a regular request. + replies.count_request(); + + exec_command( + &inb, + outb, + &non_det, + client_id, + rid, + false, + (uint8_t*)req->contents(), + req->contents_size(), + replies.total_requests_processed(), + playback_byz_info, + true); + right_pad_contents(outb); + // Finish constructing the reply. + LOG_DEBUG << "Executed from tentative exec: " + << " from client: " << client_id << " rid: " << rid + << " commit_id: " << playback_byz_info.ctx << std::endl; + + replies.end_reply(client_id, rid, last_tentative_execute, outb.size); } void Replica::playback_pre_prepare(const pbft::PrePrepare& pre_prepare) @@ -293,14 +356,8 @@ void Replica::playback_pre_prepare(const pbft::PrePrepare& pre_prepare) pre_prepare.contents.data(), pre_prepare.contents.size()); auto seqno = executable_pp->seqno(); - ByzInfo info; - if (execute_tentative(executable_pp.get(), info, true)) + if (compare_execution_results(playback_byz_info, executable_pp.get())) { - if (!compare_execution_results(info, executable_pp.get())) - { - return; - } - next_pp_seqno = seqno; if (seqno > last_prepared) @@ -320,6 +377,8 @@ void Replica::playback_pre_prepare(const pbft::PrePrepare& pre_prepare) { mark_stable(last_executed, true); } + + ledger_writer->write_pre_prepare(executable_pp.get()); } else { @@ -1093,27 +1152,6 @@ void Replica::emit_signature_on_next_pp(int64_t version) signed_version = version; } -void Replica::activate_playback_local_hooks() -{ - pbft_requests_map.set_deserialise_hook( - [this](const pbft::RequestsMap::Write& w) { - for (auto& [key, value] : w) - { - append_entries_index++; - playback_request(value.value); - } - }); - - pbft_pre_prepares_map.set_deserialise_hook( - [this](const pbft::PrePreparesMap::Write& w) { - for (auto& [key, value] : w) - { - append_entries_index++; - playback_pre_prepare(value.value); - } - }); -} - void Replica::activate_local_hooks() { pbft_requests_map.set_local_hook([this]( diff --git a/src/consensus/pbft/libbyz/Replica.h b/src/consensus/pbft/libbyz/Replica.h index 3a8476da8223..7c4ceb9e400e 100644 --- a/src/consensus/pbft/libbyz/Replica.h +++ b/src/consensus/pbft/libbyz/Replica.h @@ -135,7 +135,6 @@ class Replica : public Node, public IMessageReceiveBase void set_f(ccf::NodeId f); void emit_signature_on_next_pp(int64_t version); void activate_local_hooks(); - void activate_playback_local_hooks(); View view() const; bool is_primary() const; int primary() const; @@ -193,11 +192,7 @@ class Replica : public Node, public IMessageReceiveBase // Compare the merkle root and batch ctx between the pre-prepare and the // the corresponding fields in info after execution - void playback_request(const pbft::Request& request); - // Effects: Requests are stored in queue - void playback_pre_prepare(const pbft::PrePrepare& pre_prepare); - // Effects: pre-prepares are executed if they - // are able to If not any requests are cleared from the request queues + void playback_transaction(ccf::Store::Tx& tx); void init_state(); void recv_start(); @@ -245,6 +240,13 @@ class Replica : public Node, public IMessageReceiveBase #endif // Effects: Handle timeouts of corresponding timers. + // Playback methods + void playback_request(const pbft::Request& request); + // Effects: Requests are executed + void playback_pre_prepare(const pbft::PrePrepare& pre_prepare); + // Effects: pre-prepare is verified, if merkle roots match + // we update the pre-prepare related meta-data, if not we rollback + // // Auxiliary methods used by primary to send messages to the replica // group: @@ -429,6 +431,8 @@ class Replica : public Node, public IMessageReceiveBase Rep_info replies; #endif + ByzInfo playback_byz_info; + // used to register a callback for a client proxy to collect replies sent to // this replica reply_handler_cb rep_cb; diff --git a/src/consensus/pbft/libbyz/receive_message_base.h b/src/consensus/pbft/libbyz/receive_message_base.h index a277e740f542..361578456c3a 100644 --- a/src/consensus/pbft/libbyz/receive_message_base.h +++ b/src/consensus/pbft/libbyz/receive_message_base.h @@ -31,8 +31,8 @@ class IMessageReceiveBase virtual Seqno get_last_executed() const = 0; virtual int my_id() const = 0; virtual void emit_signature_on_next_pp(int64_t version) = 0; - virtual void activate_playback_local_hooks() = 0; virtual void activate_local_hooks() = 0; + virtual void playback_transaction(ccf::Store::Tx& tx) = 0; virtual char* create_response_message( int client_id, Request_id rid, uint32_t size) = 0; }; diff --git a/src/consensus/pbft/libbyz/test/test_ledger_replay.cpp b/src/consensus/pbft/libbyz/test/test_ledger_replay.cpp index 11a34bd48dbe..0cf8d6a9e476 100644 --- a/src/consensus/pbft/libbyz/test/test_ledger_replay.cpp +++ b/src/consensus/pbft/libbyz/test/test_ledger_replay.cpp @@ -208,7 +208,6 @@ TEST_CASE("Test Ledger Replay") create_replica( service_mem, *replica_store, pbft_requests_map, pbft_pre_prepares_map); pbft::GlobalState::get_replica().register_exec(exec_mock.exec_command); - pbft::GlobalState::get_replica().activate_playback_local_hooks(); // ledgerenclave work std::vector> entries; while (true) diff --git a/src/consensus/pbft/pbft.h b/src/consensus/pbft/pbft.h index 29ca96718b75..840bbaa3bd24 100644 --- a/src/consensus/pbft/pbft.h +++ b/src/consensus/pbft/pbft.h @@ -218,7 +218,6 @@ namespace pbft LOG_INFO_FMT("PBFT setup for local_id: {}", local_id); message_receiver_base->activate_local_hooks(); - message_receiver_base->activate_playback_local_hooks(); pbft_config->set_service_mem(mem + used_bytes); pbft_config->set_receiver(message_receiver_base); @@ -401,7 +400,6 @@ namespace pbft case pbft_append_entries: { AppendEntries r; - message_receiver_base->activate_playback_local_hooks(); try { @@ -430,6 +428,7 @@ namespace pbft // continue; // } Index last_idx = i; + // TODO DO NOT RECORD ENTRY HERE! auto ret = ledger->record_entry(data, size); if (!ret.second) @@ -447,8 +446,9 @@ namespace pbft return; } - auto deserialise_success = - store->deserialise(ret.first, public_only, false); + ccf::Store::Tx tx; + auto deserialise_success = store->deserialise_views( + ret.first, public_only, false, nullptr, &tx); switch (deserialise_success) { @@ -460,6 +460,7 @@ namespace pbft } case kv::DeserialiseSuccess::PASS: { + message_receiver_base->playback_transaction(tx); break; } case kv::DeserialiseSuccess::PASS_SIGNATURE: diff --git a/src/consensus/pbft/pbfttypes.h b/src/consensus/pbft/pbfttypes.h index e34322746a93..13019958c2fc 100644 --- a/src/consensus/pbft/pbfttypes.h +++ b/src/consensus/pbft/pbfttypes.h @@ -41,11 +41,12 @@ namespace pbft { public: virtual ~Store() {} - virtual S deserialise( + virtual S deserialise_views( const std::vector& data, bool public_only = false, bool commit = true, - Term* term = nullptr) = 0; + Term* term = nullptr, + ccf::Store::Tx* tx = nullptr) = 0; virtual void compact(Index v) = 0; virtual kv::Version current_version() = 0; virtual void commit_pre_prepare( @@ -62,15 +63,16 @@ namespace pbft public: Adaptor(std::shared_ptr x) : x(x) {} - S deserialise( + S deserialise_views( const std::vector& data, bool public_only = false, bool commit = true, - Term* term = nullptr) + Term* term = nullptr, + ccf::Store::Tx* tx = nullptr) { auto p = x.lock(); if (p) - return p->deserialise(data, public_only, term); + return p->deserialise_views(data, public_only, commit, term, tx); return S::FAILED; } diff --git a/src/enclave/rpchandler.h b/src/enclave/rpchandler.h index b7abd00d6c07..43ed9e960159 100644 --- a/src/enclave/rpchandler.h +++ b/src/enclave/rpchandler.h @@ -39,5 +39,7 @@ namespace enclave virtual ProcessPbftResp process_pbft( RPCContext& ctx, bool playback = false) = 0; + virtual ProcessPbftResp process_pbft( + enclave::RPCContext& ctx, ccf::Store::Tx& tx, bool playback) = 0; }; } diff --git a/src/kv/kv.h b/src/kv/kv.h index 8f318fc8c65b..3fd8b12009be 100644 --- a/src/kv/kv.h +++ b/src/kv/kv.h @@ -80,8 +80,6 @@ namespace kv using Write = std::unordered_map; /// Signature for transaction commit handlers using CommitHook = std::function; - /// Signature for transaction deserialisation handlers - using DeserialiseHook = std::function; private: using This = Map; @@ -100,7 +98,6 @@ namespace kv std::unique_ptr roll; CommitHook local_hook; CommitHook global_hook; - DeserialiseHook deserialise_hook; LocalCommits commit_deltas; SpinLock sl; const SecurityDomain security_domain; @@ -112,8 +109,7 @@ namespace kv SecurityDomain security_domain_, bool replicated_, CommitHook local_hook_, - CommitHook global_hook_, - DeserialiseHook deserialise_hook_) : + CommitHook global_hook_) : store(store_), name(name_), roll(std::make_unique()), @@ -121,8 +117,7 @@ namespace kv security_domain(security_domain_), replicated(replicated_), local_hook(local_hook_), - global_hook(global_hook_), - deserialise_hook(deserialise_hook_) + global_hook(global_hook_) { roll->push_back({0, State(), Write()}); } @@ -138,7 +133,7 @@ namespace kv throw std::logic_error("Failed to cast store in Map clone"); return new Map( - store_, name, security_domain, replicated, nullptr, nullptr, nullptr); + store_, name, security_domain, replicated, nullptr, nullptr); } /** Get the name of the map @@ -179,16 +174,6 @@ namespace kv global_hook = hook; } - /** Set handler to be called on map deserialisation - * - * @param hook function to be called on map deserialisation - */ - void set_deserialise_hook(DeserialiseHook hook) - { - std::lock_guard guard(sl); - deserialise_hook = hook; - } - /** Get security domain of a Map * * @return Security domain of the map (affects serialisation) @@ -710,13 +695,6 @@ namespace kv writes[r] = {NoVersion, V()}; } - // add deserialise hook - if (map.deserialise_hook) - { - LOG_INFO_FMT("Calling deserialise hook!"); - map.deserialise_hook(writes); - } - return true; } @@ -956,6 +934,13 @@ namespace kv Tx(const Tx& that) = delete; + void set_view_list(OrderedViews& view_list_) + { + // if view list is not empty then any coinciding keys will not be + // overwritten + view_list.merge(view_list_); + } + void set_req_id(const kv::TxHistory::RequestID& req_id_) { req_id = req_id_; @@ -1293,6 +1278,23 @@ namespace kv return grouped_maps; } + DeserialiseSuccess commit_deserialised( + OrderedViews& views, Version& v) + { + auto c = Tx::commit(views, [v]() { return v; }); + if (!c.has_value()) + { + LOG_FAIL_FMT("Failed to commit deserialised Tx at version {}", v); + return DeserialiseSuccess::FAILED; + } + { + std::lock_guard vguard(version_lock); + version = v; + last_replicated = version; + } + return DeserialiseSuccess::PASS; + } + public: // TODO(#api): This (along with other parts of the API) should be // hidden @@ -1398,11 +1400,10 @@ namespace kv std::string name, SecurityDomain security_domain = kv::SecurityDomain::PRIVATE, typename Map::CommitHook local_hook = nullptr, - typename Map::CommitHook global_hook = nullptr, - typename Map::DeserialiseHook deserialise_hook = nullptr) + typename Map::CommitHook global_hook = nullptr) { return create>( - name, security_domain, local_hook, global_hook, deserialise_hook); + name, security_domain, local_hook, global_hook); } /** Create a Map @@ -1420,8 +1421,7 @@ namespace kv std::string name, SecurityDomain security_domain = kv::SecurityDomain::PRIVATE, typename M::CommitHook local_hook = nullptr, - typename M::CommitHook global_hook = nullptr, - typename M::DeserialiseHook deserialise_hook = nullptr) + typename M::CommitHook global_hook = nullptr) { std::lock_guard mguard(maps_lock); @@ -1441,14 +1441,8 @@ namespace kv } } - auto result = new M( - this, - name, - security_domain, - replicated, - local_hook, - global_hook, - deserialise_hook); + auto result = + new M(this, name, security_domain, replicated, local_hook, global_hook); maps[name] = std::unique_ptr>(result); return *result; } @@ -1518,11 +1512,12 @@ namespace kv h->rollback(v); } - DeserialiseSuccess deserialise( + DeserialiseSuccess deserialise_views( const std::vector& data, bool public_only = false, bool commit = true, - Term* term = nullptr) override + Term* term = nullptr, + Tx* tx = nullptr) { // This will return FAILED if the serialised transaction is being // applied out of order. @@ -1630,12 +1625,16 @@ namespace kv } auto view = search->second->create_view(v); - if (!view->deserialise(*d, v)) + // if we are not committing now then use NoVersion to deserialise + // otherwise the view will be considered as having a committed + // version + auto deserialise_version = (commit ? v : NoVersion); + if (!view->deserialise(*d, deserialise_version)) { LOG_FAIL_FMT( "Could not deserialise Tx for map {} at version {}", map_name, - v); + deserialise_version); return DeserialiseSuccess::FAILED; } @@ -1655,69 +1654,55 @@ namespace kv if (commit) { - auto c = Tx::commit(views, [v]() { return v; }); - if (!c.has_value()) - { - LOG_FAIL_FMT("Failed to commit deserialised Tx at version {}", v); - return DeserialiseSuccess::FAILED; - } + success = commit_deserialised(views, v); + if (success == DeserialiseSuccess::FAILED) { - std::lock_guard vguard(version_lock); - version = v; - last_replicated = v; + return success; } + } - auto h = get_history(); - if (h) + auto h = get_history(); + if (h) + { + // TODO: the reference to the entity should be in the history + auto search = views.find("ccf.signatures"); + if (search != views.end()) { - // TODO: the reference to the entity should be in the history - auto search = views.find("ccf.signatures"); - if (search != views.end()) + // Transactions containing a signature must only contain + // a signature and must be verified + if (views.size() > 1) { - // Transactions containing a signature must only contain - // a signature and must be verified - if (views.size() > 1) - { - LOG_FAIL_FMT( - "Unexpected contents in signature transaction {}", v); - return DeserialiseSuccess::FAILED; - } + LOG_FAIL_FMT("Unexpected contents in signature transaction {}", v); + return DeserialiseSuccess::FAILED; + } - if (!h->verify(term)) - { - LOG_FAIL_FMT("Signature in transaction {} failed to verify", v); - return DeserialiseSuccess::FAILED; - } - success = DeserialiseSuccess::PASS_SIGNATURE; + if (!h->verify(term)) + { + LOG_FAIL_FMT("Signature in transaction {} failed to verify", v); + return DeserialiseSuccess::FAILED; } - auto rep = frame::replicated(data.data()); - h->append(rep.p, rep.n, data.data(), data.size()); + success = DeserialiseSuccess::PASS_SIGNATURE; } + auto rep = frame::replicated(data.data()); + h->append(rep.p, rep.n, data.data(), data.size()); } - // else - // { - // mguard.~lock_guard(); - // rollback(version); - // } - else + if (tx) { - auto search = views.find("ccf.pbft.requests"); - if (search != views.end()) - { - if (search->second.view->has_writes()) - { - // auto ws = search->second.view->get(0); - // search->second.view->lock(); - // search->second.view->post_commit(); - // search->second.map->unlock(); - } - } + tx->set_view_list(views); } return success; } + DeserialiseSuccess deserialise( + const std::vector& data, + bool public_only = false, + Term* term = nullptr) override + { + return deserialise_views(data, public_only, true, term); + } + bool operator==(const Store& that) const { // Only used for debugging, not thread safe. diff --git a/src/kv/kvtypes.h b/src/kv/kvtypes.h index 1c1aea710512..287db5018e1e 100644 --- a/src/kv/kvtypes.h +++ b/src/kv/kvtypes.h @@ -334,7 +334,6 @@ namespace kv virtual DeserialiseSuccess deserialise( const std::vector& data, bool public_only = false, - bool commit = true, Term* term = nullptr) = 0; virtual void compact(Version v) = 0; virtual void rollback(Version v) = 0; diff --git a/src/node/nodestate.h b/src/node/nodestate.h index e6370dabafdf..d0a7de02d929 100644 --- a/src/node/nodestate.h +++ b/src/node/nodestate.h @@ -1420,9 +1420,6 @@ namespace ccf { setup_n2n_channels(); - network.pbft_requests_map.set_deserialise_hook(nullptr); - network.pbft_pre_prepares_map.set_deserialise_hook(nullptr); - network.pbft_requests_map.set_local_hook(nullptr); network.pbft_pre_prepares_map.set_local_hook(nullptr); diff --git a/src/node/rpc/frontend.h b/src/node/rpc/frontend.h index 17cbda23fb80..0b9752157998 100644 --- a/src/node/rpc/frontend.h +++ b/src/node/rpc/frontend.h @@ -782,8 +782,14 @@ namespace ccf ProcessPbftResp process_pbft( enclave::RPCContext& ctx, bool playback = false) override { - // TODO(#PBFT): Refactor this with process_forwarded(). Store::Tx tx; + return process_pbft(ctx, tx, playback); + } + + ProcessPbftResp process_pbft( + enclave::RPCContext& ctx, Store::Tx& tx, bool playback) override + { + // TODO(#PBFT): Refactor this with process_forwarded(). crypto::Sha256Hash full_state_merkle_root; crypto::Sha256Hash replicated_state_merkle_root; kv::Version version = kv::NoVersion; From 07ddead11dbceb006e6608cac3e17e3c89c7545a Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Thu, 16 Jan 2020 15:10:48 +0000 Subject: [PATCH 03/42] playback request on the same tx --- src/consensus/pbft/libbyz/Replica.cpp | 31 ++++++++++---- src/consensus/pbft/libbyz/Replica.h | 4 +- .../pbft/libbyz/test/replica_main.cpp | 3 +- .../pbft/libbyz/test/replica_test.cpp | 3 +- .../pbft/libbyz/test/test_ledger_replay.cpp | 12 +++++- src/consensus/pbft/libbyz/types.h | 4 +- src/consensus/pbft/pbftconfig.h | 13 +++++- src/enclave/rpchandler.h | 3 +- src/kv/kv.h | 42 +++++++++---------- src/node/history.h | 22 +++++----- src/node/rpc/frontend.h | 5 +-- 11 files changed, 89 insertions(+), 53 deletions(-) diff --git a/src/consensus/pbft/libbyz/Replica.cpp b/src/consensus/pbft/libbyz/Replica.cpp index cc30c25efcbd..43bfd9915a31 100644 --- a/src/consensus/pbft/libbyz/Replica.cpp +++ b/src/consensus/pbft/libbyz/Replica.cpp @@ -281,7 +281,7 @@ void Replica::playback_transaction(ccf::Store::Tx& tx) if (req.has_value()) { const pbft::Request request = req.value(); - playback_request(request); + playback_request(request, tx); return; } } @@ -290,6 +290,7 @@ void Replica::playback_transaction(ccf::Store::Tx& tx) auto pp = view->get(0); if (pp.has_value()) { + // TODO what to do with pps transaction const pbft::PrePrepare pre_prepare = pp.value(); playback_pre_prepare(pre_prepare); return; @@ -299,13 +300,12 @@ void Replica::playback_transaction(ccf::Store::Tx& tx) "Playback transaction doesn't contain request nor pre prepare data."); } -void Replica::playback_request(const pbft::Request& request) +void Replica::playback_request(const pbft::Request& request, ccf::Store::Tx& tx) { LOG_INFO_FMT( "Playback request for request with size {}", request.pbft_raw.size()); auto req = - create_message(request.pbft_raw.data(), request.pbft_raw.size()) - .release(); + create_message(request.pbft_raw.data(), request.pbft_raw.size()); // TODO refactor with execute tentative method @@ -339,7 +339,8 @@ void Replica::playback_request(const pbft::Request& request) req->contents_size(), replies.total_requests_processed(), playback_byz_info, - true); + true, + &tx); right_pad_contents(outb); // Finish constructing the reply. LOG_DEBUG << "Executed from tentative exec: " @@ -349,6 +350,8 @@ void Replica::playback_request(const pbft::Request& request) replies.end_reply(client_id, rid, last_tentative_execute, outb.size); } +// TODO have pre prepare commit it's pre prepare without creating anew +// transaction void Replica::playback_pre_prepare(const pbft::PrePrepare& pre_prepare) { LOG_INFO_FMT("playback pre-prepare {}", pre_prepare.seqno); @@ -1881,7 +1884,18 @@ bool Replica::execute_read_only(Request* request) std::shared_ptr cp = get_principal(client_id); ByzInfo info; int error = exec_command( - &inb, outb, 0, client_id, request_id, true, nullptr, 0, 0, info, false); + &inb, + outb, + 0, + client_id, + request_id, + true, + nullptr, + 0, + 0, + info, + false, + nullptr); right_pad_contents(outb); if (!error) @@ -2001,7 +2015,7 @@ void Replica::execute_prepared(bool committed) } } -bool Replica::execute_tentative(Pre_prepare* pp, ByzInfo& info, bool playback) +bool Replica::execute_tentative(Pre_prepare* pp, ByzInfo& info) { LOG_DEBUG << "in execute tentative: " << pp->seqno() << std::endl; if ( @@ -2066,7 +2080,8 @@ bool Replica::execute_tentative(Pre_prepare* pp, ByzInfo& info, bool playback) request.contents_size(), replies.total_requests_processed(), info, - playback); + false, + nullptr); right_pad_contents(outb); // Finish constructing the reply. LOG_DEBUG << "Executed from tentative exec: " << pp->seqno() diff --git a/src/consensus/pbft/libbyz/Replica.h b/src/consensus/pbft/libbyz/Replica.h index 7c4ceb9e400e..c0e8151180a7 100644 --- a/src/consensus/pbft/libbyz/Replica.h +++ b/src/consensus/pbft/libbyz/Replica.h @@ -241,7 +241,7 @@ class Replica : public Node, public IMessageReceiveBase // Effects: Handle timeouts of corresponding timers. // Playback methods - void playback_request(const pbft::Request& request); + void playback_request(const pbft::Request& request, ccf::Store::Tx& tx); // Effects: Requests are executed void playback_pre_prepare(const pbft::PrePrepare& pre_prepare); // Effects: pre-prepare is verified, if merkle roots match @@ -289,7 +289,7 @@ class Replica : public Node, public IMessageReceiveBase // Effects: Sends back replies that have been executed tentatively // to the client. The replies are tentative unless "committed" is true. - bool execute_tentative(Pre_prepare* pp, ByzInfo& info, bool playback = false); + bool execute_tentative(Pre_prepare* pp, ByzInfo& info); // Effects: Tentatively executes as many commands as possible. It // extracts requests to execute commands from a message "m"; calls // exec_command for each command; and sends back replies to the diff --git a/src/consensus/pbft/libbyz/test/replica_main.cpp b/src/consensus/pbft/libbyz/test/replica_main.cpp index 58db10452519..d6741acd36e3 100644 --- a/src/consensus/pbft/libbyz/test/replica_main.cpp +++ b/src/consensus/pbft/libbyz/test/replica_main.cpp @@ -66,7 +66,8 @@ ExecCommand exec_command = []( size_t req_size, Seqno n, ByzInfo& info, - bool playback) { + bool playback = false, + ccf::Store::Tx* tx = nullptr) { outb.contents = message_receiver->create_response_message(client, rid, 8); Long& counter = *(Long*)service_mem; diff --git a/src/consensus/pbft/libbyz/test/replica_test.cpp b/src/consensus/pbft/libbyz/test/replica_test.cpp index 504ab7295469..6e0ccff5020f 100644 --- a/src/consensus/pbft/libbyz/test/replica_test.cpp +++ b/src/consensus/pbft/libbyz/test/replica_test.cpp @@ -196,7 +196,8 @@ ExecCommand exec_command = []( size_t req_size, Seqno total_requests_executed, ByzInfo& info, - bool playback) { + bool playback = false, + ccf::Store::Tx* tx = nullptr) { outb.contents = message_receive_base->create_response_message(client, rid, 8); Long& counter = *(Long*)service_mem; diff --git a/src/consensus/pbft/libbyz/test/test_ledger_replay.cpp b/src/consensus/pbft/libbyz/test/test_ledger_replay.cpp index 0cf8d6a9e476..fabe0f3c763f 100644 --- a/src/consensus/pbft/libbyz/test/test_ledger_replay.cpp +++ b/src/consensus/pbft/libbyz/test/test_ledger_replay.cpp @@ -44,7 +44,8 @@ class ExecutionMock size_t req_size, Seqno total_requests_executed, ByzInfo& info, - bool playback) { + bool playback = false, + ccf::Store::Tx* tx = nullptr) { // increase total number of commands executed to compare with fake_req command_counter++; @@ -241,7 +242,14 @@ TEST_CASE("Test Ledger Replay") // apply all of the data in order for (const auto& entry : entries) { - REQUIRE(store->deserialise(entry) == kv::DeserialiseSuccess::PASS); + { + ccf::Store::Tx tx; + REQUIRE( + store->deserialise_views(entry, false, false, nullptr, &tx) == + kv::DeserialiseSuccess::PASS); + pbft::GlobalState::get_replica().playback_transaction(tx); + REQUIRE(tx.commit() == kv::CommitSuccess::OK); + } ccf::Store::Tx tx; if (iterations % 2) { diff --git a/src/consensus/pbft/libbyz/types.h b/src/consensus/pbft/libbyz/types.h index e1bce784430e..adafa3325cbd 100644 --- a/src/consensus/pbft/libbyz/types.h +++ b/src/consensus/pbft/libbyz/types.h @@ -9,6 +9,7 @@ * Definitions of various types. */ +#include "consensus/pbft/pbfttypes.h" #include "parameters.h" #include @@ -62,4 +63,5 @@ using ExecCommand = std::function; \ No newline at end of file + bool, + ccf::Store::Tx*)>; \ No newline at end of file diff --git a/src/consensus/pbft/pbftconfig.h b/src/consensus/pbft/pbftconfig.h index 403208f24840..7adfb074a456 100644 --- a/src/consensus/pbft/pbftconfig.h +++ b/src/consensus/pbft/pbftconfig.h @@ -57,7 +57,8 @@ namespace pbft size_t req_size, Seqno total_requests_executed, ByzInfo& info, - bool playback) { + bool playback = false, + ccf::Store::Tx* tx = nullptr) { pbft::Request request; request.deserialise({inb->contents, inb->contents + inb->size}); @@ -80,7 +81,15 @@ namespace pbft ctx.pbft_raw = {req_start, req_start + req_size}; - auto rep = frontend->process_pbft(ctx, playback); + ccf::MemberRpcFrontend::ProcessPbftResp rep; + if (playback && tx) + { + rep = frontend->process_pbft(ctx, *tx, playback); + } + else + { + rep = frontend->process_pbft(ctx); + } static_assert( sizeof(info.full_state_merkle_root) == sizeof(crypto::Sha256Hash)); diff --git a/src/enclave/rpchandler.h b/src/enclave/rpchandler.h index 43ed9e960159..88ef3b14cff2 100644 --- a/src/enclave/rpchandler.h +++ b/src/enclave/rpchandler.h @@ -37,8 +37,7 @@ namespace enclave kv::Version version; }; - virtual ProcessPbftResp process_pbft( - RPCContext& ctx, bool playback = false) = 0; + virtual ProcessPbftResp process_pbft(RPCContext& ctx) = 0; virtual ProcessPbftResp process_pbft( enclave::RPCContext& ctx, ccf::Store::Tx& tx, bool playback) = 0; }; diff --git a/src/kv/kv.h b/src/kv/kv.h index 3fd8b12009be..34f257dd19bf 100644 --- a/src/kv/kv.h +++ b/src/kv/kv.h @@ -1659,32 +1659,32 @@ namespace kv { return success; } - } - - auto h = get_history(); - if (h) - { - // TODO: the reference to the entity should be in the history - auto search = views.find("ccf.signatures"); - if (search != views.end()) + auto h = get_history(); + if (h) { - // Transactions containing a signature must only contain - // a signature and must be verified - if (views.size() > 1) + // TODO: the reference to the entity should be in the history + auto search = views.find("ccf.signatures"); + if (search != views.end()) { - LOG_FAIL_FMT("Unexpected contents in signature transaction {}", v); - return DeserialiseSuccess::FAILED; - } + // Transactions containing a signature must only contain + // a signature and must be verified + if (views.size() > 1) + { + LOG_FAIL_FMT( + "Unexpected contents in signature transaction {}", v); + return DeserialiseSuccess::FAILED; + } - if (!h->verify(term)) - { - LOG_FAIL_FMT("Signature in transaction {} failed to verify", v); - return DeserialiseSuccess::FAILED; + if (!h->verify(term)) + { + LOG_FAIL_FMT("Signature in transaction {} failed to verify", v); + return DeserialiseSuccess::FAILED; + } + success = DeserialiseSuccess::PASS_SIGNATURE; } - success = DeserialiseSuccess::PASS_SIGNATURE; + auto rep = frame::replicated(data.data()); + h->append(rep.p, rep.n, data.data(), data.size()); } - auto rep = frame::replicated(data.data()); - h->append(rep.p, rep.n, data.data(), data.size()); } if (tx) diff --git a/src/node/history.h b/src/node/history.h index d402f3244469..9d3fd80ba0e2 100644 --- a/src/node/history.h +++ b/src/node/history.h @@ -460,9 +460,9 @@ namespace ccf const uint8_t* all_data, size_t all_data_size) override { - // crypto::Sha256Hash h({{all_data, all_data_size}}); - // log_hash(h, APPEND); - // full_state_tree.append(h); + crypto::Sha256Hash h({{all_data, all_data_size}}); + log_hash(h, APPEND); + full_state_tree.append(h); if (is_replicated_tree_enabled()) { @@ -502,8 +502,8 @@ namespace ccf void rollback(kv::Version v) override { - // full_state_tree.retract(v); - // log_hash(full_state_tree.get_root(), ROLLBACK); + full_state_tree.retract(v); + log_hash(full_state_tree.get_root(), ROLLBACK); if (is_replicated_tree_enabled()) { @@ -514,9 +514,9 @@ namespace ccf void compact(kv::Version v) override { - // if (v > MAX_HISTORY_LEN) - // full_state_tree.flush(v - MAX_HISTORY_LEN); - // log_hash(full_state_tree.get_root(), COMPACT); + if (v > MAX_HISTORY_LEN) + full_state_tree.flush(v - MAX_HISTORY_LEN); + log_hash(full_state_tree.get_root(), COMPACT); if (is_replicated_tree_enabled()) { @@ -606,9 +606,10 @@ namespace ccf auto root = get_full_state_root(); LOG_DEBUG_FMT("HISTORY: add_result {0} {1} {2}", id, version, root); #ifdef PBFT + auto replicated_root = get_replicated_state_root(); results[id] = {version, root}; if (on_result.has_value()) - on_result.value()({id, version, root}); + on_result.value()({id, version, root, replicated_root}); #endif } @@ -617,9 +618,10 @@ namespace ccf auto root = get_full_state_root(); LOG_DEBUG_FMT("HISTORY: add_result {0} {1} {2}", id, version, root); #ifdef PBFT + auto replicated_root = get_replicated_state_root(); results[id] = {version, root}; if (on_result.has_value()) - on_result.value()({id, version, root}); + on_result.value()({id, version, root, replicated_root}); #endif } diff --git a/src/node/rpc/frontend.h b/src/node/rpc/frontend.h index 0b9752157998..3a6872de7fe2 100644 --- a/src/node/rpc/frontend.h +++ b/src/node/rpc/frontend.h @@ -779,11 +779,10 @@ namespace ccf * * @param ctx Context for this RPC */ - ProcessPbftResp process_pbft( - enclave::RPCContext& ctx, bool playback = false) override + ProcessPbftResp process_pbft(enclave::RPCContext& ctx) override { Store::Tx tx; - return process_pbft(ctx, tx, playback); + return process_pbft(ctx, tx, false); } ProcessPbftResp process_pbft( From a99332e30354d74f29add6057971ed3dbe332d9d Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Thu, 16 Jan 2020 16:11:38 +0000 Subject: [PATCH 04/42] remove local hooks, ae tracked by Pbft --- src/consensus/pbft/libbyz/Principal.h | 12 -- src/consensus/pbft/libbyz/Replica.cpp | 152 +++++------------- src/consensus/pbft/libbyz/Replica.h | 3 - src/consensus/pbft/libbyz/Status.cpp | 4 - src/consensus/pbft/libbyz/Status.h | 20 --- .../pbft/libbyz/receive_message_base.h | 1 - src/consensus/pbft/pbft.h | 12 +- src/consensus/pbft/pbfttypes.h | 2 +- 8 files changed, 44 insertions(+), 162 deletions(-) diff --git a/src/consensus/pbft/libbyz/Principal.h b/src/consensus/pbft/libbyz/Principal.h index e80317fea235..10105705f5f3 100644 --- a/src/consensus/pbft/libbyz/Principal.h +++ b/src/consensus/pbft/libbyz/Principal.h @@ -87,8 +87,6 @@ class Principal : public IPrincipal ULong tstamp; // last timestamp in a new-key message from this principal Time my_tstamp; // my time when message was accepted - Index last_ae_sent = 0; // last append entry index sent to this principal - Request_id last_fetch; // Last request_id in a fetch message from this principal bool has_received_network_open_msg; @@ -144,14 +142,4 @@ inline Request_id Principal::last_fetch_rid() const inline void Principal::set_last_fetch_rid(Request_id r) { last_fetch = r; -} - -inline Index Principal::get_last_ae_sent() const -{ - return last_ae_sent; -} - -inline void Principal::set_last_ae_sent(Index ae) -{ - last_ae_sent = ae; } \ No newline at end of file diff --git a/src/consensus/pbft/libbyz/Replica.cpp b/src/consensus/pbft/libbyz/Replica.cpp index 43bfd9915a31..187ac9305a25 100644 --- a/src/consensus/pbft/libbyz/Replica.cpp +++ b/src/consensus/pbft/libbyz/Replica.cpp @@ -1155,29 +1155,6 @@ void Replica::emit_signature_on_next_pp(int64_t version) signed_version = version; } -void Replica::activate_local_hooks() -{ - pbft_requests_map.set_local_hook([this]( - kv::Version version, - const pbft::RequestsMap::State& s, - const pbft::RequestsMap::Write& w) { - for (auto& [key, value] : w) - { - append_entries_index++; - } - }); - - pbft_pre_prepares_map.set_local_hook([this]( - kv::Version version, - const pbft::PrePreparesMap::State& s, - const pbft::PrePreparesMap::Write& w) { - for (auto& [key, value] : w) - { - append_entries_index++; - } - }); -} - View Replica::view() const { return Node::view(); @@ -1234,36 +1211,6 @@ void Replica::handle(Status* m) { return; } - - LOG_INFO_FMT( - "Received status message, replica {} is at state {} ", - m->id(), - m->to_ae_index()); - auto principal = get_principal(m->id()); - principal->set_last_ae_sent(m->to_ae_index()); - if (m->to_ae_index() < append_entries_index) - { - LOG_INFO_FMT( - "I WOULD NEED TO SEND AE HERE: mine {} theirs {}", - append_entries_index, - m->to_ae_index()); - LOG_INFO_FMT( - "Sending status message with from {} to ae index {}", - m->to_ae_index(), - append_entries_index); - Status s( - v, - last_stable, - last_executed, - m->to_ae_index(), - append_entries_index, - has_complete_new_view(), - vi.has_nv_message(v)); - - s.authenticate(); - send(&s, m->id()); - } - return; // Retransmit messages that the sender is missing. if (last_stable > m->last_stable() + max_out) { @@ -2593,70 +2540,49 @@ void Replica::send_status(bool send_now) return; } - LOG_INFO_FMT( - "Sending status message with ae index {}", append_entries_index); - for (auto& [id, principal] : *get_principals()) + Status s( + v, + last_stable, + last_executed, + has_complete_new_view(), + vi.has_nv_message(v)); + + if (has_complete_new_view()) { - if (id == node_id) + // Set prepared and committed bitmaps correctly + Seqno max = last_stable + max_out; + Seqno min = std::max(last_executed, last_stable) + 1; + for (Seqno n = min; n <= max; n++) { - continue; + Prepared_cert& pc = plog.fetch(n); + if (pc.is_complete() || state.in_check_state()) + { + s.mark_prepared(n); + if (clog.fetch(n).is_complete() || state.in_check_state()) + { + s.mark_committed(n); + } + } + else + { + // Ask for missing big requests + if ( + !pc.is_pp_complete() && pc.pre_prepare() && pc.num_correct() >= f()) + { + s.add_breqs(n, pc.missing_reqs()); + } + } } - LOG_INFO_FMT( - "Sending to {} from {} to {}", - id, - principal->get_last_ae_sent(), - append_entries_index); - - Status s( - v, - last_stable, - last_executed, - principal->get_last_ae_sent(), - append_entries_index, - has_complete_new_view(), - vi.has_nv_message(v)); - - s.authenticate(); - send(&s, id); } - return; - // if (has_complete_new_view()) - // { - // // Set prepared and committed bitmaps correctly - // Seqno max = last_stable + max_out; - // Seqno min = std::max(last_executed, last_stable) + 1; - // for (Seqno n = min; n <= max; n++) - // { - // Prepared_cert& pc = plog.fetch(n); - // if (pc.is_complete() || state.in_check_state()) - // { - // s.mark_prepared(n); - // if (clog.fetch(n).is_complete() || state.in_check_state()) - // { - // s.mark_committed(n); - // } - // } - // else - // { - // // Ask for missing big requests - // if ( - // !pc.is_pp_complete() && pc.pre_prepare() && pc.num_correct() >= - // f()) - // { - // s.add_breqs(n, pc.missing_reqs()); - // } - // } - // } - // } - // else - // { - // vi.set_received_vcs(&s); - // vi.set_missing_pps(&s); - // } - - // // Multicast status to all replicas. - // s.authenticate(); - // send(&s, All_replicas); + else + { + vi.set_received_vcs(&s); + vi.set_missing_pps(&s); + } + + // Multicast status to all replicas. + s.authenticate(); + send(&s, All_replicas); } } diff --git a/src/consensus/pbft/libbyz/Replica.h b/src/consensus/pbft/libbyz/Replica.h index c0e8151180a7..cd62846798bf 100644 --- a/src/consensus/pbft/libbyz/Replica.h +++ b/src/consensus/pbft/libbyz/Replica.h @@ -134,7 +134,6 @@ class Replica : public Node, public IMessageReceiveBase size_t f() const; void set_f(ccf::NodeId f); void emit_signature_on_next_pp(int64_t version); - void activate_local_hooks(); View view() const; bool is_primary() const; int primary() const; @@ -394,8 +393,6 @@ class Replica : public Node, public IMessageReceiveBase static constexpr auto num_look_back_to_set_batch_size = 10; static constexpr auto max_pre_prepare_request_batch_wait_ms = 2; - pbft::Index append_entries_index = 0; - // Logging variables used to measure average batch size int nbreqs; // The number of requests executed in current interval int nbrounds; // The number of rounds of BFT executed in current interval diff --git a/src/consensus/pbft/libbyz/Status.cpp b/src/consensus/pbft/libbyz/Status.cpp index 4c59106ee022..078b6b642657 100644 --- a/src/consensus/pbft/libbyz/Status.cpp +++ b/src/consensus/pbft/libbyz/Status.cpp @@ -16,8 +16,6 @@ Status::Status( View v, Seqno ls, Seqno le, - Index from_ae_index, - Index to_ae_index, bool hnvi, bool hnvm) : Message(Status_tag, Max_message_size) @@ -27,8 +25,6 @@ Status::Status( rep().v = v; rep().ls = ls; rep().le = le; - rep().to_ae_index = to_ae_index; - rep().from_ae_index = from_ae_index; rep().id = pbft::GlobalState::get_node().id(); rep().brsz = 0; diff --git a/src/consensus/pbft/libbyz/Status.h b/src/consensus/pbft/libbyz/Status.h index c62b6cf61c4b..c60ba2f5857d 100644 --- a/src/consensus/pbft/libbyz/Status.h +++ b/src/consensus/pbft/libbyz/Status.h @@ -38,8 +38,6 @@ struct Status_rep : public Message_rep View v; // Replica's current view Seqno ls; // seqno of last stable checkpoint Seqno le; // seqno of last request executed - Index to_ae_index; // index of ledger append entries - Index from_ae_index; // index of ledger append entries int id; // id of the replica that generated the message. short sz; // size of prepared and committed or pps short brsz; // size of breqs @@ -79,8 +77,6 @@ class Status : public Message View v, Seqno ls, Seqno le, - Index from_ae_index, - Index to_ae_index, bool hnvi, bool hnvm); // Effects: Creates a new unauthenticated Status message. "v" @@ -147,12 +143,6 @@ class Status : public Message Seqno last_executed() const; // Effects: Returns seqno of last request executed by principal id(). - Index to_ae_index() const; - // Effects: Returns the index of the latest entry appended into the ledger - - Index from_ae_index() const; - // Effects: Returns the index of the latest entry appended into the ledger - // // Observers when has_nv_info() // @@ -356,16 +346,6 @@ inline Seqno Status::last_executed() const return rep().le; } -inline Index Status::to_ae_index() const -{ - return rep().to_ae_index; -} - -inline Index Status::from_ae_index() const -{ - return rep().from_ae_index; -} - inline bool Status::is_prepared(Seqno n) { PBFT_ASSERT(has_nv_info(), "Invalid state"); diff --git a/src/consensus/pbft/libbyz/receive_message_base.h b/src/consensus/pbft/libbyz/receive_message_base.h index 361578456c3a..2644cd8f3d79 100644 --- a/src/consensus/pbft/libbyz/receive_message_base.h +++ b/src/consensus/pbft/libbyz/receive_message_base.h @@ -31,7 +31,6 @@ class IMessageReceiveBase virtual Seqno get_last_executed() const = 0; virtual int my_id() const = 0; virtual void emit_signature_on_next_pp(int64_t version) = 0; - virtual void activate_local_hooks() = 0; virtual void playback_transaction(ccf::Store::Tx& tx) = 0; virtual char* create_response_message( int client_id, Request_id rid, uint32_t size) = 0; diff --git a/src/consensus/pbft/pbft.h b/src/consensus/pbft/pbft.h index 840bbaa3bd24..69f80fc559cc 100644 --- a/src/consensus/pbft/pbft.h +++ b/src/consensus/pbft/pbft.h @@ -79,12 +79,7 @@ namespace pbft Status* status; Status::convert(msg, status); - AppendEntries ae = { - pbft_append_entries, - id, - status->to_ae_index(), - status->from_ae_index(), - }; + AppendEntries ae = {pbft_append_entries, id, 0, 0}; n2n_channels->send_authenticated( ccf::NodeMsgType::consensus_msg, to, ae); return msg->size(); @@ -124,6 +119,7 @@ namespace pbft View last_commit_view; std::unique_ptr store; std::unique_ptr ledger; + Index append_entries_index = 0; // When this is set, only public domain is deserialised when receving append // entries @@ -217,8 +213,6 @@ namespace pbft &message_receiver_base); LOG_INFO_FMT("PBFT setup for local_id: {}", local_id); - message_receiver_base->activate_local_hooks(); - pbft_config->set_service_mem(mem + used_bytes); pbft_config->set_receiver(message_receiver_base); pbft_network->set_receiver(message_receiver_base); @@ -373,6 +367,7 @@ namespace pbft { for (auto& [index, data, globally_committable] : entries) { + append_entries_index++; write_to_ledger(data); } return true; @@ -382,6 +377,7 @@ namespace pbft { for (auto& [index, data, globally_committable] : entries) { + append_entries_index++; write_to_ledger(data); } return true; diff --git a/src/consensus/pbft/pbfttypes.h b/src/consensus/pbft/pbfttypes.h index 13019958c2fc..ce8ccb72d18e 100644 --- a/src/consensus/pbft/pbfttypes.h +++ b/src/consensus/pbft/pbfttypes.h @@ -8,7 +8,7 @@ namespace pbft { - using Index = int64_t; + using Index = uint64_t; using Term = uint64_t; using NodeId = uint64_t; using Node2NodeMsg = uint64_t; From ea450c00f25c82d44d28755cc4f6878fb62d14e7 Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Thu, 16 Jan 2020 16:43:36 +0000 Subject: [PATCH 05/42] cleanup --- src/consensus/pbft/libbyz/Status.cpp | 7 +------ src/consensus/pbft/libbyz/Status.h | 7 +------ src/consensus/pbft/pbft.h | 17 ++--------------- tests/node_suspension.py | 5 +++-- 4 files changed, 7 insertions(+), 29 deletions(-) diff --git a/src/consensus/pbft/libbyz/Status.cpp b/src/consensus/pbft/libbyz/Status.cpp index 078b6b642657..28984969addc 100644 --- a/src/consensus/pbft/libbyz/Status.cpp +++ b/src/consensus/pbft/libbyz/Status.cpp @@ -12,12 +12,7 @@ #include -Status::Status( - View v, - Seqno ls, - Seqno le, - bool hnvi, - bool hnvm) : +Status::Status(View v, Seqno ls, Seqno le, bool hnvi, bool hnvm) : Message(Status_tag, Max_message_size) { rep().extra = (hnvi) ? 1 : 0; diff --git a/src/consensus/pbft/libbyz/Status.h b/src/consensus/pbft/libbyz/Status.h index c60ba2f5857d..5fb7f2027f07 100644 --- a/src/consensus/pbft/libbyz/Status.h +++ b/src/consensus/pbft/libbyz/Status.h @@ -73,12 +73,7 @@ class Status : public Message // Status messages // public: - Status( - View v, - Seqno ls, - Seqno le, - bool hnvi, - bool hnvm); + Status(View v, Seqno ls, Seqno le, bool hnvi, bool hnvm); // Effects: Creates a new unauthenticated Status message. "v" // should be the sending replica's current view, "ls" should be the // sequence number of the last stable checkpoint, "le" the sequence diff --git a/src/consensus/pbft/pbft.h b/src/consensus/pbft/pbft.h index 69f80fc559cc..7c7e66e8b546 100644 --- a/src/consensus/pbft/pbft.h +++ b/src/consensus/pbft/pbft.h @@ -74,16 +74,7 @@ namespace pbft n2n_channels->send_authenticated( ccf::NodeMsgType::consensus_msg, to, serialized_msg); - if (msg->tag() == 7) - { - Status* status; - Status::convert(msg, status); - AppendEntries ae = {pbft_append_entries, id, 0, 0}; - n2n_channels->send_authenticated( - ccf::NodeMsgType::consensus_msg, to, ae); - return msg->size(); - } return msg->size(); } @@ -408,10 +399,6 @@ namespace pbft return; } - // TODO maybe check with our AE where we are and if we want to take - // the message could potentially include a term history just like raft - // and also send the view at least skip the entries that we have? - for (Index i = r.prev_idx + 1; i <= r.idx; i++) { LOG_INFO_FMT( @@ -424,8 +411,8 @@ namespace pbft // continue; // } Index last_idx = i; - // TODO DO NOT RECORD ENTRY HERE! - auto ret = ledger->record_entry(data, size); + // TODO record entry here? + // auto ret = ledger->record_entry(data, size); if (!ret.second) { diff --git a/tests/node_suspension.py b/tests/node_suspension.py index 9a4731d7953b..045e641a5399 100644 --- a/tests/node_suspension.py +++ b/tests/node_suspension.py @@ -40,7 +40,8 @@ def run(args): with infra.ccf.network( hosts, args.build_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb ) as network: - first_node, (backups) = network.start_and_join(args) + network.start_and_join(args) + first_node, backups = network.find_nodes() term_info = {} long_msg = "X" * (2 ** 14) @@ -100,7 +101,7 @@ def run(args): assert new_node # give new_node a second to catch up - time.sleep(1) + time.sleep(5) with new_node.user_client(format="json") as c: check( From cbf19f28290b6facfc66882d25dd9938074789b8 Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Thu, 16 Jan 2020 16:48:38 +0000 Subject: [PATCH 06/42] cleanup --- src/consensus/pbft/libbyz/Replica.cpp | 16 +++++----------- src/consensus/pbft/pbft.h | 11 ++++++++++- src/host/nodeconnections.h | 2 -- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/consensus/pbft/libbyz/Replica.cpp b/src/consensus/pbft/libbyz/Replica.cpp index 187ac9305a25..72cc2e611247 100644 --- a/src/consensus/pbft/libbyz/Replica.cpp +++ b/src/consensus/pbft/libbyz/Replica.cpp @@ -385,8 +385,6 @@ void Replica::playback_pre_prepare(const pbft::PrePrepare& pre_prepare) } else { - LOG_INFO_FMT( - "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALALALLAND"); LOG_DEBUG << "Received entries could not be processed. Received seqno: " << seqno << ". Truncating ledger to last executed: " << last_executed @@ -2486,27 +2484,23 @@ void Replica::mark_stable(Seqno n, bool have_state) void Replica::handle(Data* m) { - LOG_INFO_FMT("HANDLE DATA"); - // state.handle(m); + state.handle(m); } void Replica::handle(Meta_data* m) { - LOG_INFO_FMT("HANDLE META DATA"); - // state.handle(m); + state.handle(m); } void Replica::handle(Meta_data_d* m) { - LOG_INFO_FMT("HANDLE META DATA D"); - // state.handle(m); + state.handle(m); } void Replica::handle(Fetch* m) { - LOG_INFO_FMT("HANDLE FETCH"); - // int mid = m->id(); - // state.handle(m, last_stable); + int mid = m->id(); + state.handle(m, last_stable); } void Replica::send_status(bool send_now) diff --git a/src/consensus/pbft/pbft.h b/src/consensus/pbft/pbft.h index 7c7e66e8b546..49f9888e3c39 100644 --- a/src/consensus/pbft/pbft.h +++ b/src/consensus/pbft/pbft.h @@ -74,7 +74,16 @@ namespace pbft n2n_channels->send_authenticated( ccf::NodeMsgType::consensus_msg, to, serialized_msg); - + // if (msg->tag() == 7) + // { + // Status* status; + // Status::convert(msg, status); + + // AppendEntries ae = {pbft_append_entries, id, 0, 0}; + // n2n_channels->send_authenticated( + // ccf::NodeMsgType::consensus_msg, to, ae); + // return msg->size(); + // } return msg->size(); } diff --git a/src/host/nodeconnections.h b/src/host/nodeconnections.h index 7743318303ae..2196963f2c8c 100644 --- a/src/host/nodeconnections.h +++ b/src/host/nodeconnections.h @@ -209,8 +209,6 @@ namespace asynchost auto to = serialized::read(data, size); auto node = find(to, true); - LOG_INFO_FMT("NNNAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"); - if (!node) return; From 70a4bbb0c796ecf10a4c086234773dc72b52b81b Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Fri, 17 Jan 2020 09:32:26 +0000 Subject: [PATCH 07/42] more tidying up --- src/consensus/pbft/libbyz/Replica.cpp | 2 +- src/consensus/pbft/pbft.h | 2 +- src/node/nodestate.h | 3 --- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/consensus/pbft/libbyz/Replica.cpp b/src/consensus/pbft/libbyz/Replica.cpp index 72cc2e611247..4e3d27559542 100644 --- a/src/consensus/pbft/libbyz/Replica.cpp +++ b/src/consensus/pbft/libbyz/Replica.cpp @@ -256,7 +256,7 @@ bool Replica::compare_execution_results( "does not match, seqno:" << pre_prepare->seqno() << ", tx_ctx:" << tx_ctx << ", info.ctx:" << info.ctx << std::endl; - // return false; + return false; } if (!std::equal( diff --git a/src/consensus/pbft/pbft.h b/src/consensus/pbft/pbft.h index 49f9888e3c39..b8f90291a24f 100644 --- a/src/consensus/pbft/pbft.h +++ b/src/consensus/pbft/pbft.h @@ -421,7 +421,7 @@ namespace pbft // } Index last_idx = i; // TODO record entry here? - // auto ret = ledger->record_entry(data, size); + auto ret = ledger->record_entry(data, size); if (!ret.second) { diff --git a/src/node/nodestate.h b/src/node/nodestate.h index d0a7de02d929..831514e06782 100644 --- a/src/node/nodestate.h +++ b/src/node/nodestate.h @@ -1420,9 +1420,6 @@ namespace ccf { setup_n2n_channels(); - network.pbft_requests_map.set_local_hook(nullptr); - network.pbft_pre_prepares_map.set_local_hook(nullptr); - consensus = std::make_shared( std::make_unique>( network.tables), From f55529342103d97e18d49bcd345cd25d9585215d Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Wed, 22 Jan 2020 12:52:09 +0000 Subject: [PATCH 08/42] fixing node_suspension, sending append entries message from status only when needed --- cmake/pbft.cmake | 1 + src/consensus/ledgerenclave.h | 11 + src/consensus/pbft/libbyz/Append_entries.cpp | 36 +++ src/consensus/pbft/libbyz/Append_entries.h | 30 +++ src/consensus/pbft/libbyz/Certificate.h | 10 + src/consensus/pbft/libbyz/Message_tags.h | 3 +- src/consensus/pbft/libbyz/Pre_prepare.cpp | 6 + src/consensus/pbft/libbyz/Prepared_cert.h | 10 +- src/consensus/pbft/libbyz/Replica.cpp | 201 ++++++++++----- src/consensus/pbft/libbyz/Status.cpp | 3 + src/consensus/pbft/libbyz/Status.h | 30 +++ src/consensus/pbft/pbft.h | 244 ++++++++++++++++--- src/consensus/pbft/pbfttypes.h | 8 +- src/kv/kv.h | 18 +- src/node/rpc/frontend.h | 1 + src/node/rpc/memberfrontend.h | 3 + tests/infra/node.py | 6 + tests/node_suspension.py | 102 ++++++-- 18 files changed, 594 insertions(+), 129 deletions(-) create mode 100644 src/consensus/pbft/libbyz/Append_entries.cpp create mode 100644 src/consensus/pbft/libbyz/Append_entries.h diff --git a/cmake/pbft.cmake b/cmake/pbft.cmake index 8aaff64c2f26..676273c34f7d 100644 --- a/cmake/pbft.cmake +++ b/cmake/pbft.cmake @@ -47,6 +47,7 @@ set(PBFT_SRC ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/request_id_gen.cpp ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/New_principal.cpp ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Network_open.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Append_entries.cpp ) if("sgx" IN_LIST TARGET) diff --git a/src/consensus/ledgerenclave.h b/src/consensus/ledgerenclave.h index 3b0ca3385a4a..7a175e409f35 100644 --- a/src/consensus/ledgerenclave.h +++ b/src/consensus/ledgerenclave.h @@ -83,6 +83,17 @@ namespace consensus serialized::skip(data, size, entry_len); } + std::pair, bool> get_entry( + const uint8_t*& data, size_t& size) + { + auto entry_len = serialized::read(data, size); + std::vector entry(data, data + entry_len); + + serialized::skip(data, size, entry_len); + + return std::make_pair(std::move(entry), true); + } + /** * Truncate the ledger at a given index. * diff --git a/src/consensus/pbft/libbyz/Append_entries.cpp b/src/consensus/pbft/libbyz/Append_entries.cpp new file mode 100644 index 000000000000..606dcb44ce13 --- /dev/null +++ b/src/consensus/pbft/libbyz/Append_entries.cpp @@ -0,0 +1,36 @@ +// Copyright (c) Microsoft Corporation. +// Copyright (c) 1999 Miguel Castro, Barbara Liskov. +// Copyright (c) 2000, 2001 Miguel Castro, Rodrigo Rodrigues, Barbara Liskov. +// Licensed under the MIT license. +#include "Append_entries.h" + +#include "Message_tags.h" +#include "ds/logger.h" +#include "pbft_assert.h" + +Append_entries::Append_entries(): + Message(Append_entries_tag, sizeof(Append_entries_rep)) +{} + +bool Append_entries::verify() +{ + return true; +} + +bool Append_entries::convert(Message* m1, Append_entries*& m2) +{ + if (!m1->has_tag(Append_entries_tag, sizeof(Append_entries_rep))) + { + return false; + } + + m1->trim(); + m2 = (Append_entries*)m1; + return true; +} + +Append_entries_rep& Append_entries::rep() const +{ + PBFT_ASSERT(ALIGNED(msg), "Improperly aligned pointer"); + return *((Append_entries_rep*)msg); +} \ No newline at end of file diff --git a/src/consensus/pbft/libbyz/Append_entries.h b/src/consensus/pbft/libbyz/Append_entries.h new file mode 100644 index 000000000000..0d2e010c07cf --- /dev/null +++ b/src/consensus/pbft/libbyz/Append_entries.h @@ -0,0 +1,30 @@ +// Copyright (c) Microsoft Corporation. +// Copyright (c) 1999 Miguel Castro, Barbara Liskov. +// Copyright (c) 2000, 2001 Miguel Castro, Rodrigo Rodrigues, Barbara Liskov. +// Licensed under the MIT license. +#pragma once + +#include "Message.h" +#include "nodeinfo.h" + +// +// New principal messages have the following format: +// +#pragma pack(push) +#pragma pack(1) +struct Append_entries_rep : public Message_rep +{}; +#pragma pack(pop) + +class Append_entries : public Message +{ +public: + Append_entries(); + + bool verify(); + + static bool convert(Message* m1, Append_entries*& m2); + +private: + Append_entries_rep& rep() const; +}; diff --git a/src/consensus/pbft/libbyz/Certificate.h b/src/consensus/pbft/libbyz/Certificate.h index 931dfc88b3cf..625c5ba1d428 100644 --- a/src/consensus/pbft/libbyz/Certificate.h +++ b/src/consensus/pbft/libbyz/Certificate.h @@ -347,6 +347,16 @@ void Certificate::reset_f() template bool Certificate::add(T* m) { + // auto principal = pbft::GlobalState::get_node().get_principal(m->id()); + // if (!principal) + // { + // LOG_TRACE_FMT( + // "Principal with id {} has not been configured yet, rejecting the message", + // m->id()); + // delete m; + // return false; + // } + if (bmap.none() && f != pbft::GlobalState::get_node().f()) { reset_f(); diff --git a/src/consensus/pbft/libbyz/Message_tags.h b/src/consensus/pbft/libbyz/Message_tags.h index e5b8a17ddc45..67aec5d56b72 100644 --- a/src/consensus/pbft/libbyz/Message_tags.h +++ b/src/consensus/pbft/libbyz/Message_tags.h @@ -29,7 +29,8 @@ const short Fetch_tag = 15; const short Query_stable_tag = 16; const short Reply_stable_tag = 17; const short Network_open_tag = 18; -const short Max_message_tag = 19; +const short Append_entries_tag = 19; +const short Max_message_tag = 20; // Message used for testing are in the 100+ range const short New_principal_tag = 100; diff --git a/src/consensus/pbft/libbyz/Pre_prepare.cpp b/src/consensus/pbft/libbyz/Pre_prepare.cpp index 62ee95fc59d1..6f4b47f22e7e 100644 --- a/src/consensus/pbft/libbyz/Pre_prepare.cpp +++ b/src/consensus/pbft/libbyz/Pre_prepare.cpp @@ -289,6 +289,12 @@ bool Pre_prepare::pre_verify() { auto sender_principal = pbft::GlobalState::get_node().get_principal(sender); + if (!sender_principal) + { + LOG_TRACE_FMT( + "Sender principal has not been configured yet {}", sender); + return false; + } if ( !sender_principal->has_certificate_set() && diff --git a/src/consensus/pbft/libbyz/Prepared_cert.h b/src/consensus/pbft/libbyz/Prepared_cert.h index ba100a2d0d3b..2a6d13018f63 100644 --- a/src/consensus/pbft/libbyz/Prepared_cert.h +++ b/src/consensus/pbft/libbyz/Prepared_cert.h @@ -140,8 +140,14 @@ class Prepared_cert inline bool Prepared_cert::add(Prepare* m) { -#ifdef SIGN_BATCH int id = m->id(); + auto principal = pbft::GlobalState::get_node().get_principal(id); + if (!principal) + { + return false; + } + +#ifdef SIGN_BATCH PbftSignature& digest_sig = m->digest_sig(); PrePrepareProof proof; std::copy( @@ -153,7 +159,7 @@ inline bool Prepared_cert::add(Prepare* m) #ifdef SIGN_BATCH if (result) { - proof.cert = pbft::GlobalState::get_node().get_principal(id)->get_cert(); + proof.cert = principal->get_cert(); pre_prepare_proof.insert({id, proof}); } #endif diff --git a/src/consensus/pbft/libbyz/Replica.cpp b/src/consensus/pbft/libbyz/Replica.cpp index 4e3d27559542..8983fd77646a 100644 --- a/src/consensus/pbft/libbyz/Replica.cpp +++ b/src/consensus/pbft/libbyz/Replica.cpp @@ -21,6 +21,7 @@ # include #endif +#include "Append_entries.h" #include "Checkpoint.h" #include "Commit.h" #include "Data.h" @@ -275,25 +276,36 @@ bool Replica::compare_execution_results( void Replica::playback_transaction(ccf::Store::Tx& tx) { + if (vtimer->get_state() == ITimer::State::running) + { + vtimer->restart(); + } { auto view = tx.get_view(pbft_requests_map); - auto req = view->get(0); - if (req.has_value()) + if (view->has_writes()) { - const pbft::Request request = req.value(); - playback_request(request, tx); - return; + // hasn't been committed yet + auto req = view->get(0); + if (req.has_value()) + { + pbft::Request request = req.value(); + playback_request(request, tx); + return; + } } } { auto view = tx.get_view(pbft_pre_prepares_map); - auto pp = view->get(0); - if (pp.has_value()) + if (view->has_writes()) { - // TODO what to do with pps transaction - const pbft::PrePrepare pre_prepare = pp.value(); - playback_pre_prepare(pre_prepare); - return; + auto pp = view->get(0); + if (pp.has_value()) + { + // TODO what to do with pps transaction + pbft::PrePrepare pre_prepare = pp.value(); + playback_pre_prepare(pre_prepare); + return; + } } } throw std::logic_error( @@ -348,6 +360,14 @@ void Replica::playback_request(const pbft::Request& request, ccf::Store::Tx& tx) << " commit_id: " << playback_byz_info.ctx << std::endl; replies.end_reply(client_id, rid, last_tentative_execute, outb.size); + + // Remove the request from rqueue if present. + if (rqueue.remove(client_id, req->request_id())) + { + LOG_INFO_FMT( + "Removed request with cid {} and rid {}", client_id, req->request_id()); + vtimer->stop(); + } } // TODO have pre prepare commit it's pre prepare without creating anew @@ -368,6 +388,8 @@ void Replica::playback_pre_prepare(const pbft::PrePrepare& pre_prepare) last_prepared = seqno; } + ledger_writer->write_pre_prepare(executable_pp.get()); + if (global_commit_cb != nullptr) { global_commit_cb( @@ -380,11 +402,10 @@ void Replica::playback_pre_prepare(const pbft::PrePrepare& pre_prepare) { mark_stable(last_executed, true); } - - ledger_writer->write_pre_prepare(executable_pp.get()); } else { + throw std::logic_error("Entries don't match playback"); LOG_DEBUG << "Received entries could not be processed. Received seqno: " << seqno << ". Truncating ledger to last executed: " << last_executed @@ -867,6 +888,7 @@ void Replica::send_prepare(Seqno seqno, std::optional byz_info) // https://github.com/microsoft/CCF/issues/357 if (!compare_execution_results(info, pp)) { + throw std::logic_error("Execution results don't match send pp"); break; } @@ -999,6 +1021,14 @@ void Replica::handle(Checkpoint* m) return; } + if (ms > last_executed || ms > last_tentative_execute) + { + LOG_TRACE_FMT( + "Received Checkpoint out of order from {} with seqno {}", m->id(), ms); + delete m; + return; + } + if (ms <= last_stable + max_out) { // Checkpoint is within my window. @@ -1209,12 +1239,22 @@ void Replica::handle(Status* m) { return; } + + LOG_INFO_FMT("RECEIVED STATUS MESSAGE FROM: {}", m->id()); // Retransmit messages that the sender is missing. + if (last_stable > m->last_stable() + max_out) { + LOG_INFO_FMT("Sending append entries"); // Node is so out-of-date that it will not accept any // pre-prepare/prepare/commmit messages in my log. // Send a stable checkpoint message for my stable checkpoint. + if (m->id() != id()) + { + Append_entries ae; + send(&ae, m->id()); + } + Checkpoint* c = elog.fetch(last_stable).mine(t_sent); if (c != 0 && c->stable()) { @@ -1240,6 +1280,15 @@ void Replica::handle(Status* m) } } + LOG_INFO_FMT( + "my last stable {}, m->laststable {}, last executed {}, m->last_executed " + "{}, max_out {}", + last_stable, + m->last_stable(), + last_executed, + m->last_executed(), + max_out); + if (m->view() < v) { // Retransmit my latest view-change message @@ -1257,67 +1306,80 @@ void Replica::handle(Status* m) if (m->has_nv_info()) { min = std::max(last_stable + 1, m->last_executed() + 1); - for (Seqno n = min; n <= max; n++) + LOG_INFO_FMT("Rentransmitting from min {} to max {}", min, max); + if ( + last_stable > m->last_stable() && + last_executed > m->last_executed() + 2) + { + LOG_INFO_FMT( + "Sending append entries to {} since we are way off", m->id()); + Append_entries ae; + send(&ae, m->id()); + } + else { - if (m->is_committed(n)) + for (Seqno n = min; n <= max; n++) { - // No need for retransmission of commit or pre-prepare/prepare - // message. - continue; - } + if (m->is_committed(n)) + { + // No need for retransmission of commit or pre-prepare/prepare + // message. + continue; + } - Commit* c = clog.fetch(n).mine(t_sent); - if (c != 0) - { - retransmit(c, current, t_sent, p.get()); - } + Commit* c = clog.fetch(n).mine(t_sent); + if (c != 0) + { + retransmit(c, current, t_sent, p.get()); + } - if (m->is_prepared(n)) - { - // No need for retransmission of pre-prepare/prepare message. - continue; - } + if (m->is_prepared(n)) + { + // No need for retransmission of pre-prepare/prepare message. + continue; + } - // If I have a pre-prepare/prepare send it, provided I have sent - // a pre-prepare/prepare for view v. - if (primary() == node_id) - { - Pre_prepare* pp = plog.fetch(n).my_pre_prepare(t_sent); - if (pp != 0) + // If I have a pre-prepare/prepare send it, provided I have sent + // a pre-prepare/prepare for view v. + if (primary() == node_id) { - retransmit(pp, current, t_sent, p.get()); + Pre_prepare* pp = plog.fetch(n).my_pre_prepare(t_sent); + if (pp != 0) + { + retransmit(pp, current, t_sent, p.get()); + } } - } - else - { - Prepare* pr = plog.fetch(n).my_prepare(t_sent); - if (pr != 0) + else { - retransmit(pr, current, t_sent, p.get()); + Prepare* pr = plog.fetch(n).my_prepare(t_sent); + if (pr != 0) + { + retransmit(pr, current, t_sent, p.get()); + } } } - } - - if (id() == primary()) - { - // For now only primary retransmits big requests. - Status::BRS_iter gen(m); - int count = 0; - Seqno ppn; - BR_map mrmap; - while (gen.get(ppn, mrmap) && count <= max_ret_bytes) + if (id() == primary()) { - if (plog.within_range(ppn)) + // For now only primary retransmits big requests. + Status::BRS_iter gen(m); + + int count = 0; + Seqno ppn; + BR_map mrmap; + while (gen.get(ppn, mrmap) && count <= max_ret_bytes) { - Pre_prepare_info::BRS_iter gen( - plog.fetch(ppn).prep_info(), mrmap); - Request* r; - while (gen.get(r)) + if (plog.within_range(ppn)) { - INCR_OP(message_counts_retransmitted[m->tag()]); - send(r, m->id()); - count += r->size(); + Pre_prepare_info::BRS_iter gen( + plog.fetch(ppn).prep_info(), mrmap); + Request* r; + while (gen.get(r)) + { + INCR_OP(message_counts_retransmitted[m->tag()]); + send(r, m->id()); + count += r->size(); + } } } } @@ -1325,6 +1387,7 @@ void Replica::handle(Status* m) } else { + LOG_INFO_FMT("HAS NV INFO FALSE"); if (!m->has_vc(node_id)) { // p does not have my view-change: send it. @@ -2101,6 +2164,12 @@ void Replica::execute_committed(bool was_f_0) { ByzInfo info; auto executed_ok = execute_tentative(pp, info); + if (!compare_execution_results(info, pp)) + { + throw std::logic_error( + "Execution results don't match handle commit"); + } + ledger_writer->write_pre_prepare(pp); PBFT_ASSERT( executed_ok, "tentative execution while executing committed failed"); @@ -2330,6 +2399,7 @@ void Replica::new_state(Seqno c) mark_stable(c, true); } + LOG_INFO_FMT("Calling execute committed from new state"); // Execute any committed requests execute_committed(); @@ -2400,6 +2470,8 @@ void Replica::mark_stable(Seqno n, bool have_state) state.discard_checkpoints(last_stable, last_executed); brt.mark_stable(last_stable); + // mark_stable_callback() + if (have_state) { // Re-authenticate my checkpoint message to mark it as stable or @@ -2540,6 +2612,14 @@ void Replica::send_status(bool send_now) last_executed, has_complete_new_view(), vi.has_nv_message(v)); + LOG_INFO_FMT( + "Sending status message with v {}, last_stable {}, last_executed {}, " + "has_c_new_view {}, vi.has_nv_msg {}", + v, + last_stable, + last_executed, + has_complete_new_view(), + vi.has_nv_message(v)); if (has_complete_new_view()) { @@ -2570,6 +2650,7 @@ void Replica::send_status(bool send_now) } else { + LOG_INFO_FMT("set received cvs and set missing pps"); vi.set_received_vcs(&s); vi.set_missing_pps(&s); } diff --git a/src/consensus/pbft/libbyz/Status.cpp b/src/consensus/pbft/libbyz/Status.cpp index 28984969addc..5ecb705fdefa 100644 --- a/src/consensus/pbft/libbyz/Status.cpp +++ b/src/consensus/pbft/libbyz/Status.cpp @@ -6,6 +6,7 @@ #include "Status.h" #include "Message_tags.h" +#include "Append_entries.h" #include "Node.h" #include "Principal.h" #include "pbft_assert.h" @@ -20,6 +21,8 @@ Status::Status(View v, Seqno ls, Seqno le, bool hnvi, bool hnvm) : rep().v = v; rep().ls = ls; rep().le = le; + rep().send_ae = false; + rep().ae_index = 0; rep().id = pbft::GlobalState::get_node().id(); rep().brsz = 0; diff --git a/src/consensus/pbft/libbyz/Status.h b/src/consensus/pbft/libbyz/Status.h index 5fb7f2027f07..846a53027757 100644 --- a/src/consensus/pbft/libbyz/Status.h +++ b/src/consensus/pbft/libbyz/Status.h @@ -38,6 +38,8 @@ struct Status_rep : public Message_rep View v; // Replica's current view Seqno ls; // seqno of last stable checkpoint Seqno le; // seqno of last request executed + bool send_ae; + Index ae_index; int id; // id of the replica that generated the message. short sz; // size of prepared and committed or pps short brsz; // size of breqs @@ -120,6 +122,14 @@ class Status : public Message int id() const; // Effects: Fetches the identifier of the replica from the message. + bool send_ae() const; + + void set_send_ae(); + + void set_ae_index(Index ae); + + Index get_ae_index() const; + View view() const; // Effects: Returns the principal id()'s view in the message. @@ -316,6 +326,26 @@ inline int Status::id() const return rep().id; } +inline bool Status::send_ae() const +{ + return rep().send_ae; +} + +inline void Status::set_send_ae() +{ + rep().send_ae = true; +} + +inline void Status::set_ae_index(Index ae) +{ + rep().ae_index = ae; +} + +inline Index Status::get_ae_index() const +{ + return rep().ae_index; +} + inline View Status::view() const { return rep().v; diff --git a/src/consensus/pbft/pbft.h b/src/consensus/pbft/pbft.h index b8f90291a24f..bd8f02aca4b2 100644 --- a/src/consensus/pbft/pbft.h +++ b/src/consensus/pbft/pbft.h @@ -3,9 +3,10 @@ #pragma once #include "consensus/ledgerenclave.h" +#include "consensus/pbft/libbyz/Append_entries.h" #include "consensus/pbft/libbyz/Big_req_table.h" #include "consensus/pbft/libbyz/Client_proxy.h" -#include "consensus/pbft/libbyz/Status.h" +#include "consensus/pbft/libbyz/Message_tags.h" #include "consensus/pbft/libbyz/libbyz.h" #include "consensus/pbft/libbyz/network.h" #include "consensus/pbft/libbyz/receive_message_base.h" @@ -26,13 +27,28 @@ namespace pbft { + struct NodeState + { + // the highest matching index that we send + Index match_idx; + }; + using NodesMap = std::unordered_map; class PbftEnclaveNetwork : public INetwork { public: PbftEnclaveNetwork( - pbft::NodeId id, std::shared_ptr n2n_channels) : + pbft::NodeId id, + std::shared_ptr n2n_channels, + NodesMap& nodes_, + pbft::Index& append_entries_index_, + pbft::Index& latest_stable_ae_index_, + SpinLock& lock_) : n2n_channels(n2n_channels), - id(id) + id(id), + nodes(nodes_), + append_entries_index(append_entries_index_), + latest_stable_ae_index(latest_stable_ae_index_), + lock(lock_) {} virtual ~PbftEnclaveNetwork() = default; @@ -49,7 +65,7 @@ namespace pbft int Send(Message* msg, IPrincipal& principal) override { - NodeId to = principal.pid(); + pbft::NodeId to = principal.pid(); if (to == id) { // If a replica sends a message to itself (e.g. if f == 0), handle @@ -72,21 +88,97 @@ namespace pbft reinterpret_cast(msg->contents()), msg->size()); + if (msg->tag() == Append_entries_tag) + { + // // status messages are intercepted to add the append entries info + // Append_entries* ae; + // Append_entries::convert(msg, ae); + // // set my append entries index to the status message + + auto node = nodes.find(to); + + pbft::Index match_idx = 0; + // match_idx = nodes.at(to).match_idx; + if (node != nodes.end()) + { + match_idx = node->second.match_idx; + } + + if (match_idx < append_entries_index) + { + send_append_entries(to, match_idx + 1); + } + return msg->size(); + } + if (msg->tag() == Status_tag) + { + LOG_INFO_FMT("SENDING SM TO {} WITH AE {}", to, append_entries_index); + + StatusMessage sm = {pbft_status_message, id, append_entries_index}; + + n2n_channels->send_authenticated( + ccf::NodeMsgType::consensus_msg, to, sm); + } n2n_channels->send_authenticated( ccf::NodeMsgType::consensus_msg, to, serialized_msg); - // if (msg->tag() == 7) - // { - // Status* status; - // Status::convert(msg, status); - - // AppendEntries ae = {pbft_append_entries, id, 0, 0}; - // n2n_channels->send_authenticated( - // ccf::NodeMsgType::consensus_msg, to, ae); - // return msg->size(); - // } return msg->size(); } + void send_append_entries(pbft::NodeId to, pbft::Index start_idx) + { + std::lock_guard guard(lock); + size_t entries_batch_size = 10; + + pbft::Index end_idx = (append_entries_index == 0) ? + 0 : + std::min(start_idx + entries_batch_size, append_entries_index); + + for (pbft::Index i = end_idx; i < append_entries_index; + i += entries_batch_size) + { + send_append_entries_range(to, start_idx, i); + start_idx = std::min(i + 1, append_entries_index); + } + + if (append_entries_index == 0 || end_idx <= append_entries_index) + { + send_append_entries_range(to, start_idx, append_entries_index); + } + } + + void send_append_entries_range( + pbft::NodeId to, pbft::Index start_idx, pbft::Index end_idx) + { + const auto prev_idx = start_idx - 1; + + LOG_INFO_FMT( + "Send append entried from {} to {}: {} to {}", + id, + to, + start_idx, + end_idx); + + AppendEntries ae = {pbft_append_entries, id, end_idx, prev_idx}; + + auto node = nodes.find(to); + if (node != nodes.end()) + { + node->second.match_idx = end_idx; + } + else + { + nodes[to] = {end_idx}; + } + // auto& node = nodes.at(to); + + // Record the most recent index we have sent to this node. + // node.match_idx = end_idx; + + // The host will append log entries to this message when it is + // sent to the destination node. + n2n_channels->send_authenticated(ccf::NodeMsgType::consensus_msg, to, ae); + } + virtual Message* GetNextMessage() override { assert("Should not be called"); @@ -102,12 +194,18 @@ namespace pbft std::shared_ptr n2n_channels; IMessageReceiveBase* message_receiver_base = nullptr; NodeId id; + NodesMap& nodes; + pbft::Index& append_entries_index; + pbft::Index& latest_stable_ae_index; + SpinLock& lock; }; template class Pbft : public kv::Consensus { private: + NodesMap nodes; + std::shared_ptr channels; IMessageReceiveBase* message_receiver_base = nullptr; char* mem; @@ -120,6 +218,9 @@ namespace pbft std::unique_ptr store; std::unique_ptr ledger; Index append_entries_index = 0; + Index latest_stable_ae_index = 0; + SpinLock lock; + SpinLock playback_lock; // When this is set, only public domain is deserialised when receving append // entries @@ -196,7 +297,13 @@ namespace pbft mem = (char*)malloc(mem_size); bzero(mem, mem_size); - pbft_network = std::make_unique(local_id, channels); + pbft_network = std::make_unique( + local_id, + channels, + nodes, + append_entries_index, + latest_stable_ae_index, + lock); pbft_config = std::make_unique(rpc_map); auto used_bytes = Byz_init_replica( @@ -341,6 +448,10 @@ namespace pbft info.is_replica = true; Byz_add_principal(info); LOG_INFO_FMT("PBFT added node, id: {}", info.id); + + nodes[node_conf.node_id] = {0}; + + // pbft_network->send_append_entries(node_conf.node_id, 1); } void periodic(std::chrono::milliseconds elapsed) override @@ -367,7 +478,9 @@ namespace pbft { for (auto& [index, data, globally_committable] : entries) { + std::lock_guard guard(lock); append_entries_index++; + LOG_INFO_FMT("Increasing my ae index to {}", append_entries_index); write_to_ledger(data); } return true; @@ -377,7 +490,9 @@ namespace pbft { for (auto& [index, data, globally_committable] : entries) { + std::lock_guard guard(lock); append_entries_index++; + LOG_INFO_FMT("Increasing my ae index to {}", append_entries_index); write_to_ledger(data); } return true; @@ -393,8 +508,54 @@ namespace pbft message_receiver_base->receive_message(data, size); break; } + case pbft_status_message: + { + StatusMessage sm; + + try + { + sm = + channels->template recv_authenticated(data, size); + } + catch (const std::logic_error& err) + { + LOG_FAIL_FMT(err.what()); + return; + } + + // update node's index + auto node = nodes.find(sm.from_node); + if (node != nodes.end()) + { + node->second.match_idx = sm.idx; + } + else + { + nodes[sm.from_node] = {sm.idx}; + } + LOG_INFO_FMT( + "Received message from: {} with idx {} and my ae is {}", + sm.from_node, + sm.idx, + append_entries_index); + if (sm.idx < append_entries_index) + { + LOG_INFO_FMT( + "Received append entries response from node {} and it is behind. " + "Node is at {} and I am at {}", + sm.from_node, + sm.idx, + append_entries_index); + // pbft_network->send_append_entries(sm.from_node, sm.idx + 1); + } + break; + } case pbft_append_entries: { + std::lock_guard guard(playback_lock); + LOG_INFO_FMT( + "New append entries message, my ae index is {}", + append_entries_index); AppendEntries r; try @@ -408,20 +569,44 @@ namespace pbft return; } + auto node = nodes.find(r.from_node); + if (node != nodes.end()) + { + node->second.match_idx = r.idx; + } + else + { + nodes[r.from_node] = {r.idx}; + } + + if (r.idx <= append_entries_index) + { + LOG_INFO_FMT( + "Skipping INDEX {} as we are at index {}", + r.idx, + append_entries_index); + break; + } + for (Index i = r.prev_idx + 1; i <= r.idx; i++) { LOG_INFO_FMT( "RECORDING ENTRY FOR INDEX {} FOR DATA WITH SIZE {}", i, size); - // if (i <= last_idx) - // { - // // If the current entry has already been deserialised, skip the - // // payload for that entry - // ledger->skip_entry(data, size); - // continue; - // } - Index last_idx = i; - // TODO record entry here? - auto ret = ledger->record_entry(data, size); + pbft::Index aei; + { + std::lock_guard guard(lock); + aei = append_entries_index; + } + if (i <= aei) + { + // If the current entry has already been deserialised, skip the + // payload for that entry + LOG_INFO_FMT("Skipping INDEX {} as we are at index {}", i, aei); + ledger->skip_entry(data, size); + continue; + } + + auto ret = ledger->get_entry(data, size); if (!ret.second) { @@ -432,8 +617,10 @@ namespace pbft "Recv append entries to {} from {} but the data is malformed", local_id, r.from_node); - - last_idx = r.prev_idx; + { + std::lock_guard guard(lock); + append_entries_index = r.prev_idx; + } ledger->truncate(r.prev_idx); return; } @@ -446,8 +633,7 @@ namespace pbft { case kv::DeserialiseSuccess::FAILED: { - throw std::logic_error( - "Replica failed to apply log entry " + std::to_string(i)); + LOG_FAIL_FMT("Replica failed to apply log entry {}", i); break; } case kv::DeserialiseSuccess::PASS: diff --git a/src/consensus/pbft/pbfttypes.h b/src/consensus/pbft/pbfttypes.h index ce8ccb72d18e..9952a8f5cca7 100644 --- a/src/consensus/pbft/pbfttypes.h +++ b/src/consensus/pbft/pbfttypes.h @@ -18,7 +18,8 @@ namespace pbft enum PbftMsgType : Node2NodeMsg { pbft_message = 1000, - pbft_append_entries + pbft_append_entries, + pbft_status_message }; #pragma pack(push, 1) @@ -34,6 +35,11 @@ namespace pbft Index prev_idx; }; + struct StatusMessage : PbftHeader + { + Index idx; + }; + #pragma pack(pop) template diff --git a/src/kv/kv.h b/src/kv/kv.h index 34f257dd19bf..54b48577ece8 100644 --- a/src/kv/kv.h +++ b/src/kv/kv.h @@ -493,12 +493,12 @@ namespace kv return map.is_replicated(); } - private: virtual bool has_writes() { return committed_writes || !writes.empty(); } + private: virtual bool has_changes() { return changes; @@ -899,16 +899,14 @@ namespace kv throw std::logic_error( "Transaction must be over maps in the same store"); } - else + + if (read_version == NoVersion) { - if (read_version == NoVersion) - { - // Grab opacity version that all Maps should be queried at. - if (read_globally_committed) - read_version = m.get_store()->commit_version(); - else - read_version = m.get_store()->current_version(); - } + // Grab opacity version that all Maps should be queried at. + if (read_globally_committed) + read_version = m.get_store()->commit_version(); + else + read_version = m.get_store()->current_version(); } typename M::TxView* view = m.create_view(read_version); diff --git a/src/node/rpc/frontend.h b/src/node/rpc/frontend.h index 3a6872de7fe2..a6cdae969b4a 100644 --- a/src/node/rpc/frontend.h +++ b/src/node/rpc/frontend.h @@ -430,6 +430,7 @@ namespace ccf } } + LOG_INFO_FMT("TX PRIMARY UNKNOWN"); return jsonrpc::error( jsonrpc::CCFErrorCodes::TX_PRIMARY_UNKNOWN, "Primary unknown."); }; diff --git a/src/node/rpc/memberfrontend.h b/src/node/rpc/memberfrontend.h index 847cc02623ae..b99fbe0d27e5 100644 --- a/src/node/rpc/memberfrontend.h +++ b/src/node/rpc/memberfrontend.h @@ -443,11 +443,14 @@ namespace ccf // node for the genesis transaction to initialise the service if (g.is_service_created()) { + LOG_INFO_FMT("Service is already created"); return jsonrpc::error( jsonrpc::StandardErrorCodes::INTERNAL_ERROR, "Service is already created"); } + LOG_INFO_FMT("Creating service"); + g.init_values(); for (auto& cert : in.member_cert) { diff --git a/tests/infra/node.py b/tests/infra/node.py index 46f2e18ad2de..70b7e7f4b74a 100644 --- a/tests/infra/node.py +++ b/tests/infra/node.py @@ -227,6 +227,12 @@ def member_client(self, format="msgpack", member_id=1, **kwargs): **kwargs, ) + def suspend(self): + self.remote.suspend() + + def resume(self): + self.remote.resume() + @contextmanager def node(node_id, host, build_directory, debug=False, perf=False, pdb=False): diff --git a/tests/node_suspension.py b/tests/node_suspension.py index 045e641a5399..a34bcaeb049f 100644 --- a/tests/node_suspension.py +++ b/tests/node_suspension.py @@ -19,7 +19,7 @@ from loguru import logger as LOG # 256 is the number of most recent messages that PBFT keeps in memory before needing to replay the ledger -TOTAL_REQUESTS = 256 +TOTAL_REQUESTS = 56 def timeout(node, suspend, election_timeout): @@ -47,18 +47,19 @@ def run(args): long_msg = "X" * (2 ** 14) # first timer determines after how many seconds each node will be suspended - timeouts = [] - t = random.uniform(1, 10) - LOG.info(f"Initial timer for node {first_node.node_id} is {t} seconds...") - timeouts.append((t, first_node)) - for backup in backups: + if not args.skip_suspension: + timeouts = [] t = random.uniform(1, 10) - LOG.info(f"Initial timer for node {backup.node_id} is {t} seconds...") - timeouts.append((t, backup)) + LOG.info(f"Initial timer for node {first_node.node_id} is {t} seconds...") + timeouts.append((t, first_node)) + for backup in backups: + t = random.uniform(1, 10) + LOG.info(f"Initial timer for node {backup.node_id} is {t} seconds...") + timeouts.append((t, backup)) - for t, node in timeouts: - tm = Timer(t, timeout, args=[node, True, args.election_timeout / 1000],) - tm.start() + for t, node in timeouts: + tm = Timer(t, timeout, args=[node, True, args.election_timeout / 1000],) + tm.start() with first_node.node_client() as mc: check_commit = infra.checker.Checker(mc) @@ -72,6 +73,13 @@ def run(args): clients.append(es.enter_context(backup.user_client(format="json"))) node_id = 0 for id in range(1, TOTAL_REQUESTS): + if id == 5: + LOG.error("Adding another node after f = 0 but before any node has checkpointed") + # check that a new node can catch up naturally + new_node = network.create_and_trust_node( + lib_name=args.package, host="localhost", args=args, + ) + assert new_node node_id += 1 c = clients[node_id % len(clients)] try: @@ -95,30 +103,72 @@ def run(args): ) # check that a new node can catch up after all the requests - new_node = network.create_and_trust_node( + last_node = network.create_and_trust_node( lib_name=args.package, host="localhost", args=args, ) - assert new_node + assert last_node - # give new_node a second to catch up - time.sleep(5) + # # give new_node a second to catch up + # time.sleep(1) - with new_node.user_client(format="json") as c: - check( - c.rpc("LOG_get", {"id": 1000}), result={"msg": final_msg}, - ) + ## send more messages I want to check that the new node will catchup and then start processing pre prepares, etc + clients = [] + with contextlib.ExitStack() as es: + LOG.info("Write messages to nodes using round robin") + clients.append(es.enter_context(first_node.user_client(format="json"))) + for backup in backups: + clients.append(es.enter_context(backup.user_client(format="json"))) + node_id = 0 + for id in range(1001, (1001 + TOTAL_REQUESTS)): + node_id += 1 + c = clients[node_id % len(clients)] + try: + resp = c.rpc("LOG_record", {"id": id, "msg": long_msg}) + except Exception: + LOG.info("Trying to access a suspended node") + try: + cur_primary, cur_term = network.find_primary() + term_info[cur_term] = cur_primary.node_id + except Exception: + LOG.info("Trying to access a suspended node") + id += 1 + + ## send final final message + # wait for the last request to commit + final_msg2 = "Hello world Hello!" + check_commit( + c.rpc("LOG_record", {"id": 2000, "msg": final_msg2}), result=True, + ) + check( + c.rpc("LOG_get", {"id": 2000}), result={"msg": final_msg2}, + ) + + with last_node.user_client(format="json") as c: + check( + c.rpc("LOG_get", {"id": 1000}), result={"msg": final_msg}, + ) + with last_node.user_client(format="json") as c: + check( + c.rpc("LOG_get", {"id": 2000}), result={"msg": final_msg2}, + ) - # assert that view changes actually did occur - assert len(term_info) > 1 + if not args.skip_suspension: + # assert that view changes actually did occur + assert len(term_info) > 1 - LOG.success("----------- terms and primaries recorded -----------") - for term, primary in term_info.items(): - LOG.success(f"term {term} - primary {primary}") + LOG.success("----------- terms and primaries recorded -----------") + for term, primary in term_info.items(): + LOG.success(f"term {term} - primary {primary}") if __name__ == "__main__": - - args = e2e_args.cli_args() + def add(parser): + parser.add_argument( + "--skip-suspension", + help="Don't suspend any nodes (i.e. just do late join)", + action="store_true" + ) + args = e2e_args.cli_args(add) args.package = args.app_script and "libluagenericenc" or "libloggingenc" notify_server_host = "localhost" From 53a0b1f4ff0253ce81005949dbb4af87e48406d4 Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Fri, 24 Jan 2020 11:14:13 +0000 Subject: [PATCH 09/42] send append entries until last-stable seqno --- src/consensus/pbft/libbyz/Prepared_cert.h | 3 + src/consensus/pbft/libbyz/Replica.cpp | 109 +++++++++++++----- src/consensus/pbft/libbyz/Replica.h | 22 +++- .../pbft/libbyz/receive_message_base.h | 2 + src/consensus/pbft/pbft.h | 41 +++++-- tests/node_suspension.py | 10 ++ 6 files changed, 143 insertions(+), 44 deletions(-) diff --git a/src/consensus/pbft/libbyz/Prepared_cert.h b/src/consensus/pbft/libbyz/Prepared_cert.h index 2a6d13018f63..ce9ac9cf44a2 100644 --- a/src/consensus/pbft/libbyz/Prepared_cert.h +++ b/src/consensus/pbft/libbyz/Prepared_cert.h @@ -144,6 +144,9 @@ inline bool Prepared_cert::add(Prepare* m) auto principal = pbft::GlobalState::get_node().get_principal(id); if (!principal) { + LOG_INFO_FMT( + "Returning false from prepared cert, probably need to delete this"); + delete m; return false; } diff --git a/src/consensus/pbft/libbyz/Replica.cpp b/src/consensus/pbft/libbyz/Replica.cpp index 8983fd77646a..033cd8649e42 100644 --- a/src/consensus/pbft/libbyz/Replica.cpp +++ b/src/consensus/pbft/libbyz/Replica.cpp @@ -318,13 +318,26 @@ void Replica::playback_request(const pbft::Request& request, ccf::Store::Tx& tx) "Playback request for request with size {}", request.pbft_raw.size()); auto req = create_message(request.pbft_raw.data(), request.pbft_raw.size()); - // TODO refactor with execute tentative method - last_tentative_execute = last_tentative_execute + 1; - LOG_TRACE << "in execute tentative with last_tentative_execute: " - << last_tentative_execute << " and last_executed: " << last_executed - << std::endl; + if (!waiting_for_playback_pp) + { + // only increment last tentative execute once per pre-prepare (a pre-prepare + // could have batched requests but we can't increment last_tentative_execute + // for each one individually) + // TODO remove this waiting check if we refactor to use execute tentative + // method + last_tentative_execute = last_tentative_execute + 1; + LOG_TRACE_FMT( + "in playback execute tentative with lte {}, le {}, for rid {} with cid " + "{}", + last_tentative_execute, + last_executed, + req->request_id(), + req->client_id()); + } + + waiting_for_playback_pp = true; int client_id = req->client_id(); @@ -360,14 +373,6 @@ void Replica::playback_request(const pbft::Request& request, ccf::Store::Tx& tx) << " commit_id: " << playback_byz_info.ctx << std::endl; replies.end_reply(client_id, rid, last_tentative_execute, outb.size); - - // Remove the request from rqueue if present. - if (rqueue.remove(client_id, req->request_id())) - { - LOG_INFO_FMT( - "Removed request with cid {} and rid {}", client_id, req->request_id()); - vtimer->stop(); - } } // TODO have pre prepare commit it's pre prepare without creating anew @@ -378,6 +383,8 @@ void Replica::playback_pre_prepare(const pbft::PrePrepare& pre_prepare) auto executable_pp = create_message( pre_prepare.contents.data(), pre_prepare.contents.size()); auto seqno = executable_pp->seqno(); + playback_pp_seqno = seqno; + waiting_for_playback_pp = false; if (compare_execution_results(playback_byz_info, executable_pp.get())) { @@ -390,7 +397,7 @@ void Replica::playback_pre_prepare(const pbft::PrePrepare& pre_prepare) ledger_writer->write_pre_prepare(executable_pp.get()); - if (global_commit_cb != nullptr) + if (global_commit_cb != nullptr && executable_pp->is_signed()) { global_commit_cb( executable_pp->get_ctx(), executable_pp->view(), global_commit_ctx); @@ -406,12 +413,6 @@ void Replica::playback_pre_prepare(const pbft::PrePrepare& pre_prepare) else { throw std::logic_error("Entries don't match playback"); - LOG_DEBUG << "Received entries could not be processed. Received seqno: " - << seqno - << ". Truncating ledger to last executed: " << last_executed - << std::endl; - rqueue.clear(); - brt.clear(); } } @@ -826,6 +827,13 @@ bool Replica::in_wv(T* m) void Replica::handle(Pre_prepare* m) { + if (playback_pp_seqno >= m->seqno() || waiting_for_playback_pp) + { + LOG_TRACE_FMT("Reject pre prepare with seqno {}", m->seqno()); + delete m; + return; + } + const Seqno ms = m->seqno(); Byz_buffer b; @@ -955,6 +963,13 @@ void Replica::send_commit(Seqno s, bool send_only_to_self) void Replica::handle(Prepare* m) { + if (playback_pp_seqno >= m->seqno() || waiting_for_playback_pp) + { + LOG_TRACE_FMT("Reject prepare with seqno {}", m->seqno()); + delete m; + return; + } + const Seqno ms = m->seqno(); // Only accept prepare messages that are not sent by the primary for // current view. @@ -962,7 +977,6 @@ void Replica::handle(Prepare* m) in_wv(m) && ms > low_bound && primary() != m->id() && has_complete_new_view()) { - LOG_TRACE << "handle prepare for seqno: " << ms << std::endl; Prepared_cert& ps = plog.fetch(ms); if (ps.add(m) && ps.is_complete()) { @@ -990,6 +1004,12 @@ void Replica::handle(Prepare* m) void Replica::handle(Commit* m) { + if (playback_pp_seqno >= m->seqno() || waiting_for_playback_pp) + { + LOG_TRACE_FMT("Reject commit with seqno {}", m->seqno()); + delete m; + return; + } const Seqno ms = m->seqno(); // Only accept messages with the current view. TODO: change to @@ -1130,6 +1150,12 @@ void Replica::register_global_commit(global_commit_handler_cb cb, void* ctx) global_commit_ctx = ctx; } +void Replica::register_mark_stable(mark_stable_handler_cb cb, void* ctx) +{ + mark_stable_cb = cb; + mark_stable_ctx = ctx; +} + template std::unique_ptr Replica::create_message( const uint8_t* message_data, size_t data_size) @@ -1376,6 +1402,10 @@ void Replica::handle(Status* m) Request* r; while (gen.get(r)) { + LOG_TRACE_FMT( + "Retransmitting request with id {} and cid {}", + r->request_id(), + r->client_id()); INCR_OP(message_counts_retransmitted[m->tag()]); send(r, m->id()); count += r->size(); @@ -1785,21 +1815,26 @@ void Replica::process_new_view(Seqno min, Digest d, Seqno max, Seqno ms) { ByzInfo info; pc.add_mine(pp); - if (ledger_writer) + if (execute_tentative(pp, info)) { - ledger_writer->write_pre_prepare(pp); + if (ledger_writer) + { + ledger_writer->write_pre_prepare(pp); + } } - execute_tentative(pp, info); } else { ByzInfo info; pc.add_old(pp); - if (ledger_writer) + + if (execute_tentative(pp, info)) { - ledger_writer->write_pre_prepare(pp); + if (ledger_writer) + { + ledger_writer->write_pre_prepare(pp); + } } - execute_tentative(pp, info); Prepare* p = new Prepare(v, i, d, nullptr, pp->is_signed()); pc.add_mine(p); @@ -2025,7 +2060,10 @@ void Replica::execute_prepared(bool committed) bool Replica::execute_tentative(Pre_prepare* pp, ByzInfo& info) { - LOG_DEBUG << "in execute tentative: " << pp->seqno() << std::endl; + LOG_DEBUG_FMT( + "in execute tentative for seqno {} and last_tentnative_execute {}", + pp->seqno(), + last_tentative_execute); if ( pp->seqno() == last_tentative_execute + 1 && !state.in_fetch_state() && !state.in_check_state() && has_complete_new_view()) @@ -2069,14 +2107,18 @@ bool Replica::execute_tentative(Pre_prepare* pp, ByzInfo& info) #ifdef ENFORCE_EXACTLY_ONCE outb.contents = replies.new_reply(client_id); -#else #endif non_det.contents = pp->choices(non_det.size); Request_id rid = request.request_id(); // Execute command in a regular request. replies.count_request(); - LOG_TRACE << "before exec command with seqno: " << pp->seqno() - << std::endl; + LOG_TRACE_FMT( + "before exec command with seqno: {} rid {} cid {} rid digest {}", + pp->seqno(), + rid, + request.client_id(), + request.digest().hash()); + exec_command( &inb, outb, @@ -2470,7 +2512,10 @@ void Replica::mark_stable(Seqno n, bool have_state) state.discard_checkpoints(last_stable, last_executed); brt.mark_stable(last_stable); - // mark_stable_callback() + if (mark_stable_cb != nullptr) + { + mark_stable_cb(mark_stable_ctx); + } if (have_state) { diff --git a/src/consensus/pbft/libbyz/Replica.h b/src/consensus/pbft/libbyz/Replica.h index cd62846798bf..72ba98a06502 100644 --- a/src/consensus/pbft/libbyz/Replica.h +++ b/src/consensus/pbft/libbyz/Replica.h @@ -126,6 +126,8 @@ class Replica : public Node, public IMessageReceiveBase void register_global_commit(global_commit_handler_cb cb, void* ctx); // Effects:: Registers a handler that is called when a batch is committed + void register_mark_stable(mark_stable_handler_cb cb, void* ctx); + template std::unique_ptr create_message( const uint8_t* message_data, size_t data_size); @@ -429,11 +431,19 @@ class Replica : public Node, public IMessageReceiveBase #endif ByzInfo playback_byz_info; + // Latest byz info when we are in playback mode. Used to compare the latest + // execution mt roots and version with the ones in the pre prepare we will get + // while we are at playback mode + Seqno playback_pp_seqno = 0; + // seqno of latest pre prepare executed in playback mode + bool waiting_for_playback_pp = false; + // indicates if we are in append entries playback mode and have executed a + // request but haven't gotten the pre prepare yet - // used to register a callback for a client proxy to collect replies sent to - // this replica reply_handler_cb rep_cb; void* rep_cb_ctx; + // used to register a callback for a client proxy to collect replies sent to + // this replica pbft::RequestsMap& pbft_requests_map; pbft::PrePreparesMap& pbft_pre_prepares_map; @@ -442,6 +452,14 @@ class Replica : public Node, public IMessageReceiveBase global_commit_handler_cb global_commit_cb; void* global_commit_ctx; + mark_stable_handler_cb mark_stable_cb = nullptr; + void* mark_stable_ctx; + // callback when we call mark_stable + // Used to not the append_entries_index of the stable seqno + // We don't want to send append entries further than the latest stable seqno + // since the replicas store enough messages in that case so that the late + // joiner can catch up by the usual execution route + std::unique_ptr ledger_writer; // State abstraction manages state checkpointing and digesting diff --git a/src/consensus/pbft/libbyz/receive_message_base.h b/src/consensus/pbft/libbyz/receive_message_base.h index 2644cd8f3d79..db73d55faabc 100644 --- a/src/consensus/pbft/libbyz/receive_message_base.h +++ b/src/consensus/pbft/libbyz/receive_message_base.h @@ -18,8 +18,10 @@ class IMessageReceiveBase virtual void register_reply_handler(reply_handler_cb cb, void* ctx) = 0; typedef void (*global_commit_handler_cb)( int64_t tx_ctx, View view, void* cb_ctx); + typedef void (*mark_stable_handler_cb)(void* bc_ctx); virtual void register_global_commit( global_commit_handler_cb cb, void* ctx) = 0; + virtual void register_mark_stable(mark_stable_handler_cb cb, void* ctx) = 0; virtual size_t num_correct_replicas() const = 0; virtual size_t f() const = 0; virtual void set_f(ccf::NodeId f) = 0; diff --git a/src/consensus/pbft/pbft.h b/src/consensus/pbft/pbft.h index bd8f02aca4b2..9cc2fd164fd5 100644 --- a/src/consensus/pbft/pbft.h +++ b/src/consensus/pbft/pbft.h @@ -104,7 +104,7 @@ namespace pbft match_idx = node->second.match_idx; } - if (match_idx < append_entries_index) + if (match_idx < latest_stable_ae_index) { send_append_entries(to, match_idx + 1); } @@ -129,20 +129,20 @@ namespace pbft std::lock_guard guard(lock); size_t entries_batch_size = 10; - pbft::Index end_idx = (append_entries_index == 0) ? + pbft::Index end_idx = (latest_stable_ae_index == 0) ? 0 : - std::min(start_idx + entries_batch_size, append_entries_index); + std::min(start_idx + entries_batch_size, latest_stable_ae_index); - for (pbft::Index i = end_idx; i < append_entries_index; + for (pbft::Index i = end_idx; i < latest_stable_ae_index; i += entries_batch_size) { send_append_entries_range(to, start_idx, i); - start_idx = std::min(i + 1, append_entries_index); + start_idx = std::min(i + 1, latest_stable_ae_index); } - if (append_entries_index == 0 || end_idx <= append_entries_index) + if (latest_stable_ae_index == 0 || end_idx <= latest_stable_ae_index) { - send_append_entries_range(to, start_idx, append_entries_index); + send_append_entries_range(to, start_idx, latest_stable_ae_index); } } @@ -247,6 +247,12 @@ namespace pbft std::vector* view_change_list; } register_global_commit_ctx; + struct register_mark_stable_info + { + pbft::Index* append_entries_idx; + pbft::Index* latest_stable_ae_idx; + } register_mark_stable_ctx; + public: Pbft( std::unique_ptr store_, @@ -339,6 +345,19 @@ namespace pbft message_receiver_base->register_reply_handler( reply_handler_cb, client_proxy.get()); + auto mark_stable_cb = [](void* ctx) { + auto ms_ctx = static_cast(ctx); + *ms_ctx->latest_stable_ae_idx = *ms_ctx->append_entries_idx; + LOG_INFO_FMT( + "latest_stable_ae_index is set to {}", *ms_ctx->latest_stable_ae_idx); + }; + + register_mark_stable_ctx.append_entries_idx = &append_entries_index; + register_mark_stable_ctx.latest_stable_ae_idx = &latest_stable_ae_index; + + message_receiver_base->register_mark_stable( + mark_stable_cb, ®ister_mark_stable_ctx); + auto global_commit_cb = [](kv::Version version, ::View view, void* ctx) { auto p = static_cast(ctx); if (version == kv::NoVersion || version < *p->global_commit_seqno) @@ -553,9 +572,6 @@ namespace pbft case pbft_append_entries: { std::lock_guard guard(playback_lock); - LOG_INFO_FMT( - "New append entries message, my ae index is {}", - append_entries_index); AppendEntries r; try @@ -569,6 +585,11 @@ namespace pbft return; } + LOG_INFO_FMT( + "New append entries message from {}, my ae index is {}", + r.from_node, + append_entries_index); + auto node = nodes.find(r.from_node); if (node != nodes.end()) { diff --git a/tests/node_suspension.py b/tests/node_suspension.py index a34bcaeb049f..59aab5b527eb 100644 --- a/tests/node_suspension.py +++ b/tests/node_suspension.py @@ -102,6 +102,14 @@ def run(args): c.rpc("LOG_get", {"id": 1000}), result={"msg": final_msg}, ) + # check that new node has caught up ok + with new_node.user_client(format="json") as c: + check( + c.rpc("LOG_get", {"id": 1000}), result={"msg": final_msg}, + ) + # add new node to backups list + backups.append(new_node) + # check that a new node can catch up after all the requests last_node = network.create_and_trust_node( lib_name=args.package, host="localhost", args=args, @@ -151,6 +159,8 @@ def run(args): check( c.rpc("LOG_get", {"id": 2000}), result={"msg": final_msg2}, ) + + # all the nodes should be caught up by now if not args.skip_suspension: # assert that view changes actually did occur From a9c5a9bd451b5ffdaaab4fa8fac1edbe70fdc011 Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Fri, 24 Jan 2020 11:55:08 +0000 Subject: [PATCH 10/42] refactor execute tentative request for playback and normal execution --- src/consensus/pbft/libbyz/Replica.cpp | 195 +++++++++++--------------- src/consensus/pbft/libbyz/Replica.h | 14 ++ 2 files changed, 99 insertions(+), 110 deletions(-) diff --git a/src/consensus/pbft/libbyz/Replica.cpp b/src/consensus/pbft/libbyz/Replica.cpp index 033cd8649e42..06d56e8c71cf 100644 --- a/src/consensus/pbft/libbyz/Replica.cpp +++ b/src/consensus/pbft/libbyz/Replica.cpp @@ -318,15 +318,12 @@ void Replica::playback_request(const pbft::Request& request, ccf::Store::Tx& tx) "Playback request for request with size {}", request.pbft_raw.size()); auto req = create_message(request.pbft_raw.data(), request.pbft_raw.size()); - // TODO refactor with execute tentative method if (!waiting_for_playback_pp) { // only increment last tentative execute once per pre-prepare (a pre-prepare // could have batched requests but we can't increment last_tentative_execute // for each one individually) - // TODO remove this waiting check if we refactor to use execute tentative - // method last_tentative_execute = last_tentative_execute + 1; LOG_TRACE_FMT( "in playback execute tentative with lte {}, le {}, for rid {} with cid " @@ -338,56 +335,30 @@ void Replica::playback_request(const pbft::Request& request, ccf::Store::Tx& tx) } waiting_for_playback_pp = true; - - int client_id = req->client_id(); - - // Obtain "in" and "out" buffers to call exec_command - Byz_req inb; - Byz_rep outb; Byz_buffer non_det; - inb.contents = req->command(inb.size); - - // TODO do we need this? - // non_det.contents = pp->choices(non_det.size); - Request_id rid = req->request_id(); - // Execute command in a regular request. - replies.count_request(); - - exec_command( - &inb, - outb, - &non_det, - client_id, - rid, - false, - (uint8_t*)req->contents(), - req->contents_size(), - replies.total_requests_processed(), - playback_byz_info, - true, - &tx); - right_pad_contents(outb); - // Finish constructing the reply. - LOG_DEBUG << "Executed from tentative exec: " - << " from client: " << client_id << " rid: " << rid - << " commit_id: " << playback_byz_info.ctx << std::endl; - - replies.end_reply(client_id, rid, last_tentative_execute, outb.size); + execute_tentative_request( + *req, playback_byz_info, playback_max_local_commit_value, non_det); } -// TODO have pre prepare commit it's pre prepare without creating anew -// transaction void Replica::playback_pre_prepare(const pbft::PrePrepare& pre_prepare) { - LOG_INFO_FMT("playback pre-prepare {}", pre_prepare.seqno); + LOG_TRACE_FMT("playback pre-prepare {}", pre_prepare.seqno); auto executable_pp = create_message( pre_prepare.contents.data(), pre_prepare.contents.size()); auto seqno = executable_pp->seqno(); playback_pp_seqno = seqno; waiting_for_playback_pp = false; + playback_max_local_commit_value = INT64_MIN; if (compare_execution_results(playback_byz_info, executable_pp.get())) { + // we are done executing the pre-prepare batch we need to check if we need + // to checkpoint + if (last_tentative_execute % checkpoint_interval == 0) + { + state.checkpoint(last_tentative_execute); + } + next_pp_seqno = seqno; if (seqno > last_prepared) @@ -2058,6 +2029,68 @@ void Replica::execute_prepared(bool committed) } } +void Replica::execute_tentative_request( + Request& request, + ByzInfo& info, + int64_t& max_local_commit_value, + Byz_buffer& non_det, + char* nondet_choices, + Seqno seqno) +{ + int client_id = request.client_id(); + + // Obtain "in" and "out" buffers to call exec_command + Byz_req inb; + Byz_rep outb; + inb.contents = request.command(inb.size); + + if (non_det_choices) + { + non_det.contents = nondet_choices; + } + + Request_id rid = request.request_id(); + // Execute command in a regular request. + replies.count_request(); + LOG_TRACE_FMT( + "before exec command with seqno: {} rid {} cid {} rid digest {}", + seqno, + rid, + request.client_id(), + request.digest().hash()); + + exec_command( + &inb, + outb, + &non_det, + client_id, + rid, + false, + (uint8_t*)request.contents(), + request.contents_size(), + replies.total_requests_processed(), + info, + false, + nullptr); + right_pad_contents(outb); + // Finish constructing the reply. + LOG_DEBUG_FMT( + "Executed from tentative exec: {} from client: {} rid {} commit_id {}", + seqno, + client_id, + rid, + info.ctx); + + if (info.ctx > max_local_commit_value) + { + max_local_commit_value = info.ctx; + } + + info.ctx = max_local_commit_value; + + replies.end_reply(client_id, rid, last_tentative_execute, outb.size); +} + bool Replica::execute_tentative(Pre_prepare* pp, ByzInfo& info) { LOG_DEBUG_FMT( @@ -2081,78 +2114,20 @@ bool Replica::execute_tentative(Pre_prepare* pp, ByzInfo& info) while (iter.get(request)) { - int client_id = request.client_id(); - -#ifdef ENFORCE_EXACTLY_ONCE - if (replies.req_id(client_id) >= request.request_id()) - { - // Request has already been executed and we have the reply to - // the request. Resend reply and don't execute request - // to ensure idempotence. - INCR_OP(message_counts_retransmitted[Reply_tag]); - replies.send_reply( - client_id, view(), id(), !replies.is_committed(client_id)); - LOG_DEBUG << "Sending from tentative exec: " << pp->seqno() - << " from client: " << client_id - << " rid: " << request.request_id() << std::endl; - continue; - } -#endif - - // Obtain "in" and "out" buffers to call exec_command - Byz_req inb; - Byz_rep outb; Byz_buffer non_det; - inb.contents = request.command(inb.size); - -#ifdef ENFORCE_EXACTLY_ONCE - outb.contents = replies.new_reply(client_id); -#endif - non_det.contents = pp->choices(non_det.size); - Request_id rid = request.request_id(); - // Execute command in a regular request. - replies.count_request(); - LOG_TRACE_FMT( - "before exec command with seqno: {} rid {} cid {} rid digest {}", - pp->seqno(), - rid, - request.client_id(), - request.digest().hash()); - - exec_command( - &inb, - outb, - &non_det, - client_id, - rid, - false, - (uint8_t*)request.contents(), - request.contents_size(), - replies.total_requests_processed(), + execute_tentative_request( + request, info, - false, - nullptr); - right_pad_contents(outb); - // Finish constructing the reply. - LOG_DEBUG << "Executed from tentative exec: " << pp->seqno() - << " from client: " << client_id << " rid: " << rid - << " commit_id: " << info.ctx << std::endl; - - if (info.ctx > max_local_commit_value) - { - max_local_commit_value = info.ctx; - } - - info.ctx = max_local_commit_value; -#ifdef ENFORCE_EXACTLY_ONCE - replies.end_reply(client_id, rid, outb.size); -#else - replies.end_reply(client_id, rid, last_tentative_execute, outb.size); -#endif - } - LOG_DEBUG << "Executed from tentative exec: " << pp->seqno() - << " rid: " << request.request_id() << " commit_id: " << info.ctx - << std::endl; + max_local_commit_value, + non_det, + pp->choices(non_det.size), + pp->seqno()); + } + LOG_DEBUG_FMT( + "Executed from tentative exec: {} rid {} commit_id {}", + pp->seqno(), + request.request_id(), + info.ctx); if (last_tentative_execute % checkpoint_interval == 0) { diff --git a/src/consensus/pbft/libbyz/Replica.h b/src/consensus/pbft/libbyz/Replica.h index 72ba98a06502..b806979e1c08 100644 --- a/src/consensus/pbft/libbyz/Replica.h +++ b/src/consensus/pbft/libbyz/Replica.h @@ -296,6 +296,16 @@ class Replica : public Node, public IMessageReceiveBase // exec_command for each command; and sends back replies to the // client. The replies are tentative unless "committed" is true. + void execute_tentative_request( + Request& request, + ByzInfo& info, + int64_t& max_local_commit_value, + Byz_buffer& non_det, + char* nondet_choices = nullptr, + Seqno seqno = -1); + // Effects: called by execute_tentative or playback_request to execute the + // request. seqno == -1 means we are running it from playback + void create_recovery_reply( int client_id, int last_tentative_execute, Byz_rep& outb); // Handle recovery requests, i.e., requests from replicas, @@ -439,6 +449,10 @@ class Replica : public Node, public IMessageReceiveBase bool waiting_for_playback_pp = false; // indicates if we are in append entries playback mode and have executed a // request but haven't gotten the pre prepare yet + int64_t playback_max_local_commit_value = INT64_MIN; + // playback max local commit value used for when we are playing back batched + // requests when playback pre-prepare is called it will reset it since the + // batch for that pre-prepare has executed reply_handler_cb rep_cb; void* rep_cb_ctx; From 65074ef0ced62bbdfe751814365c7ed989020c95 Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Fri, 24 Jan 2020 13:03:00 +0000 Subject: [PATCH 11/42] re-using transaction in playback pre prepare --- src/consensus/pbft/libbyz/LedgerWriter.cpp | 9 ++++++++- src/consensus/pbft/libbyz/LedgerWriter.h | 1 + src/consensus/pbft/libbyz/Replica.cpp | 6 +++--- src/consensus/pbft/libbyz/Replica.h | 3 ++- src/consensus/pbft/pbfttypes.h | 9 ++++++--- src/kv/kv.h | 5 +++++ 6 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/consensus/pbft/libbyz/LedgerWriter.cpp b/src/consensus/pbft/libbyz/LedgerWriter.cpp index c1515de2a9dd..c8db8e0985db 100644 --- a/src/consensus/pbft/libbyz/LedgerWriter.cpp +++ b/src/consensus/pbft/libbyz/LedgerWriter.cpp @@ -35,6 +35,12 @@ void LedgerWriter::write_prepare( } void LedgerWriter::write_pre_prepare(Pre_prepare* pp) +{ + ccf::Store::Tx tx; + write_pre_prepare(pp, tx); +} + +void LedgerWriter::write_pre_prepare(Pre_prepare* pp, ccf::Store::Tx& tx) { store.commit_pre_prepare( {pp->seqno(), @@ -42,7 +48,8 @@ void LedgerWriter::write_pre_prepare(Pre_prepare* pp) pp->get_digest_sig(), {(const uint8_t*)pp->contents(), (const uint8_t*)pp->contents() + pp->size()}}, - pbft_pre_prepares_map); + pbft_pre_prepares_map, + tx); } void LedgerWriter::write_view_change(View_change* vc) diff --git a/src/consensus/pbft/libbyz/LedgerWriter.h b/src/consensus/pbft/libbyz/LedgerWriter.h index ad66544fccc3..b0dbf6f24700 100644 --- a/src/consensus/pbft/libbyz/LedgerWriter.h +++ b/src/consensus/pbft/libbyz/LedgerWriter.h @@ -23,5 +23,6 @@ class LedgerWriter virtual ~LedgerWriter() = default; void write_prepare(const Prepared_cert& prepared_cert, Seqno seqno); void write_pre_prepare(Pre_prepare* pp); + void write_pre_prepare(Pre_prepare* pp, ccf::Store::Tx& tx); void write_view_change(View_change* vc); }; diff --git a/src/consensus/pbft/libbyz/Replica.cpp b/src/consensus/pbft/libbyz/Replica.cpp index 06d56e8c71cf..b6ae58dd6cdb 100644 --- a/src/consensus/pbft/libbyz/Replica.cpp +++ b/src/consensus/pbft/libbyz/Replica.cpp @@ -301,9 +301,8 @@ void Replica::playback_transaction(ccf::Store::Tx& tx) auto pp = view->get(0); if (pp.has_value()) { - // TODO what to do with pps transaction pbft::PrePrepare pre_prepare = pp.value(); - playback_pre_prepare(pre_prepare); + playback_pre_prepare(pre_prepare, tx); return; } } @@ -340,7 +339,8 @@ void Replica::playback_request(const pbft::Request& request, ccf::Store::Tx& tx) *req, playback_byz_info, playback_max_local_commit_value, non_det); } -void Replica::playback_pre_prepare(const pbft::PrePrepare& pre_prepare) +void Replica::playback_pre_prepare( + const pbft::PrePrepare& pre_prepare, ccf::Store::Tx& tx) { LOG_TRACE_FMT("playback pre-prepare {}", pre_prepare.seqno); auto executable_pp = create_message( diff --git a/src/consensus/pbft/libbyz/Replica.h b/src/consensus/pbft/libbyz/Replica.h index b806979e1c08..37e388a472fe 100644 --- a/src/consensus/pbft/libbyz/Replica.h +++ b/src/consensus/pbft/libbyz/Replica.h @@ -244,7 +244,8 @@ class Replica : public Node, public IMessageReceiveBase // Playback methods void playback_request(const pbft::Request& request, ccf::Store::Tx& tx); // Effects: Requests are executed - void playback_pre_prepare(const pbft::PrePrepare& pre_prepare); + void playback_pre_prepare( + const pbft::PrePrepare& pre_prepare, ccf::Store::Tx& tx); // Effects: pre-prepare is verified, if merkle roots match // we update the pre-prepare related meta-data, if not we rollback diff --git a/src/consensus/pbft/pbfttypes.h b/src/consensus/pbft/pbfttypes.h index 9952a8f5cca7..81159c511169 100644 --- a/src/consensus/pbft/pbfttypes.h +++ b/src/consensus/pbft/pbfttypes.h @@ -57,7 +57,8 @@ namespace pbft virtual kv::Version current_version() = 0; virtual void commit_pre_prepare( const pbft::PrePrepare& pp, - pbft::PrePreparesMap& pbft_pre_prepares_map) = 0; + pbft::PrePreparesMap& pbft_pre_prepares_map, + ccf::Store::Tx& tx) = 0; }; template @@ -84,7 +85,9 @@ namespace pbft } void commit_pre_prepare( - const pbft::PrePrepare& pp, pbft::PrePreparesMap& pbft_pre_prepares_map) + const pbft::PrePrepare& pp, + pbft::PrePreparesMap& pbft_pre_prepares_map, + ccf::Store::Tx& tx) { while (true) { @@ -96,7 +99,7 @@ namespace pbft auto success = p->commit( version, [&]() { - ccf::Store::Tx tx(version); + tx.set_reserved_version(version); auto pp_view = tx.get_view(pbft_pre_prepares_map); pp_view->put(0, pp); return tx.commit_reserved(); diff --git a/src/kv/kv.h b/src/kv/kv.h index 54b48577ece8..2581a4d2b02a 100644 --- a/src/kv/kv.h +++ b/src/kv/kv.h @@ -939,6 +939,11 @@ namespace kv view_list.merge(view_list_); } + void set_reserved_version(Version reserved) + { + version = reserved; + } + void set_req_id(const kv::TxHistory::RequestID& req_id_) { req_id = req_id_; From 3c6c306e0c005f8aca0e04fb7ab5453c836dc5df Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Fri, 24 Jan 2020 13:09:50 +0000 Subject: [PATCH 12/42] added view change trace logging --- src/consensus/pbft/libbyz/Replica.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/consensus/pbft/libbyz/Replica.cpp b/src/consensus/pbft/libbyz/Replica.cpp index b6ae58dd6cdb..d60619755ef7 100644 --- a/src/consensus/pbft/libbyz/Replica.cpp +++ b/src/consensus/pbft/libbyz/Replica.cpp @@ -1292,6 +1292,8 @@ void Replica::handle(Status* m) View_change* vc = vi.my_view_change(t_sent); if (vc != 0) { + LOG_TRACE_FMT( + "Re transmitting view change with digest: {}", vc->digest().hash()); retransmit(vc, current, t_sent, p.get()); } delete m; @@ -1388,12 +1390,13 @@ void Replica::handle(Status* m) } else { - LOG_INFO_FMT("HAS NV INFO FALSE"); if (!m->has_vc(node_id)) { // p does not have my view-change: send it. View_change* vc = vi.my_view_change(t_sent); PBFT_ASSERT(vc != 0, "Invalid state"); + LOG_TRACE_FMT( + "Re transmitting view change with digest: {}", vc->digest().hash()); retransmit(vc, current, t_sent, p.get()); } @@ -1513,8 +1516,12 @@ void Replica::handle(Status* m) void Replica::handle(View_change* m) { - LOG_INFO << "Received view change for " << m->view() << " from " << m->id() - << ", v:" << v << std::endl; + LOG_INFO_FMT( + "Received view change for {} from {} with digest {}, v: {}", + m->view(), + m->id(), + m->digest().hash(), + v); if (m->id() == primary() && m->view() > v) { @@ -1677,6 +1684,11 @@ void Replica::write_view_change_to_ledger() continue; } + LOG_TRACE_FMT( + "Writing view for {} with digest {} to ledger", + vc->view(), + vc->digest().hash()); + ledger_writer->write_view_change(vc); } } From 233777990dbd6ed48b4c1ad5b92efa0765b265db Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Fri, 24 Jan 2020 13:50:44 +0000 Subject: [PATCH 13/42] cleanup --- src/consensus/pbft/pbft.h | 81 ++++------------------------------ src/consensus/pbft/pbfttypes.h | 8 +--- src/kv/kv.h | 1 + 3 files changed, 10 insertions(+), 80 deletions(-) diff --git a/src/consensus/pbft/pbft.h b/src/consensus/pbft/pbft.h index 9cc2fd164fd5..e30f949bb528 100644 --- a/src/consensus/pbft/pbft.h +++ b/src/consensus/pbft/pbft.h @@ -27,12 +27,8 @@ namespace pbft { - struct NodeState - { - // the highest matching index that we send - Index match_idx; - }; - using NodesMap = std::unordered_map; + // maps node to last sent index to that node + using NodesMap = std::unordered_map; class PbftEnclaveNetwork : public INetwork { public: @@ -90,18 +86,12 @@ namespace pbft if (msg->tag() == Append_entries_tag) { - // // status messages are intercepted to add the append entries info - // Append_entries* ae; - // Append_entries::convert(msg, ae); - // // set my append entries index to the status message - auto node = nodes.find(to); pbft::Index match_idx = 0; - // match_idx = nodes.at(to).match_idx; if (node != nodes.end()) { - match_idx = node->second.match_idx; + match_idx = node->second; } if (match_idx < latest_stable_ae_index) @@ -110,15 +100,6 @@ namespace pbft } return msg->size(); } - if (msg->tag() == Status_tag) - { - LOG_INFO_FMT("SENDING SM TO {} WITH AE {}", to, append_entries_index); - - StatusMessage sm = {pbft_status_message, id, append_entries_index}; - - n2n_channels->send_authenticated( - ccf::NodeMsgType::consensus_msg, to, sm); - } n2n_channels->send_authenticated( ccf::NodeMsgType::consensus_msg, to, serialized_msg); return msg->size(); @@ -163,16 +144,12 @@ namespace pbft auto node = nodes.find(to); if (node != nodes.end()) { - node->second.match_idx = end_idx; + node->second = end_idx; } else { - nodes[to] = {end_idx}; + nodes[to] = end_idx; } - // auto& node = nodes.at(to); - - // Record the most recent index we have sent to this node. - // node.match_idx = end_idx; // The host will append log entries to this message when it is // sent to the destination node. @@ -468,7 +445,7 @@ namespace pbft Byz_add_principal(info); LOG_INFO_FMT("PBFT added node, id: {}", info.id); - nodes[node_conf.node_id] = {0}; + nodes[node_conf.node_id] = 0; // pbft_network->send_append_entries(node_conf.node_id, 1); } @@ -527,48 +504,6 @@ namespace pbft message_receiver_base->receive_message(data, size); break; } - case pbft_status_message: - { - StatusMessage sm; - - try - { - sm = - channels->template recv_authenticated(data, size); - } - catch (const std::logic_error& err) - { - LOG_FAIL_FMT(err.what()); - return; - } - - // update node's index - auto node = nodes.find(sm.from_node); - if (node != nodes.end()) - { - node->second.match_idx = sm.idx; - } - else - { - nodes[sm.from_node] = {sm.idx}; - } - LOG_INFO_FMT( - "Received message from: {} with idx {} and my ae is {}", - sm.from_node, - sm.idx, - append_entries_index); - if (sm.idx < append_entries_index) - { - LOG_INFO_FMT( - "Received append entries response from node {} and it is behind. " - "Node is at {} and I am at {}", - sm.from_node, - sm.idx, - append_entries_index); - // pbft_network->send_append_entries(sm.from_node, sm.idx + 1); - } - break; - } case pbft_append_entries: { std::lock_guard guard(playback_lock); @@ -593,11 +528,11 @@ namespace pbft auto node = nodes.find(r.from_node); if (node != nodes.end()) { - node->second.match_idx = r.idx; + node->second = r.idx; } else { - nodes[r.from_node] = {r.idx}; + nodes[r.from_node] = r.idx; } if (r.idx <= append_entries_index) diff --git a/src/consensus/pbft/pbfttypes.h b/src/consensus/pbft/pbfttypes.h index 81159c511169..658118de5e38 100644 --- a/src/consensus/pbft/pbfttypes.h +++ b/src/consensus/pbft/pbfttypes.h @@ -18,8 +18,7 @@ namespace pbft enum PbftMsgType : Node2NodeMsg { pbft_message = 1000, - pbft_append_entries, - pbft_status_message + pbft_append_entries }; #pragma pack(push, 1) @@ -35,11 +34,6 @@ namespace pbft Index prev_idx; }; - struct StatusMessage : PbftHeader - { - Index idx; - }; - #pragma pack(pop) template diff --git a/src/kv/kv.h b/src/kv/kv.h index 2581a4d2b02a..151b31cbc283 100644 --- a/src/kv/kv.h +++ b/src/kv/kv.h @@ -942,6 +942,7 @@ namespace kv void set_reserved_version(Version reserved) { version = reserved; + read_version = reserved; } void set_req_id(const kv::TxHistory::RequestID& req_id_) From 6c44b2fbea199958f1227a7a6218a48517c49adf Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Fri, 24 Jan 2020 14:07:50 +0000 Subject: [PATCH 14/42] removing un-needed locks --- src/consensus/pbft/pbft.h | 35 +++++++++++------------------------ 1 file changed, 11 insertions(+), 24 deletions(-) diff --git a/src/consensus/pbft/pbft.h b/src/consensus/pbft/pbft.h index e30f949bb528..cec4edcd9196 100644 --- a/src/consensus/pbft/pbft.h +++ b/src/consensus/pbft/pbft.h @@ -37,14 +37,12 @@ namespace pbft std::shared_ptr n2n_channels, NodesMap& nodes_, pbft::Index& append_entries_index_, - pbft::Index& latest_stable_ae_index_, - SpinLock& lock_) : + pbft::Index& latest_stable_ae_index_) : n2n_channels(n2n_channels), id(id), nodes(nodes_), append_entries_index(append_entries_index_), - latest_stable_ae_index(latest_stable_ae_index_), - lock(lock_) + latest_stable_ae_index(latest_stable_ae_index_) {} virtual ~PbftEnclaveNetwork() = default; @@ -107,7 +105,6 @@ namespace pbft void send_append_entries(pbft::NodeId to, pbft::Index start_idx) { - std::lock_guard guard(lock); size_t entries_batch_size = 10; pbft::Index end_idx = (latest_stable_ae_index == 0) ? @@ -174,7 +171,6 @@ namespace pbft NodesMap& nodes; pbft::Index& append_entries_index; pbft::Index& latest_stable_ae_index; - SpinLock& lock; }; template @@ -196,8 +192,6 @@ namespace pbft std::unique_ptr ledger; Index append_entries_index = 0; Index latest_stable_ae_index = 0; - SpinLock lock; - SpinLock playback_lock; // When this is set, only public domain is deserialised when receving append // entries @@ -285,8 +279,7 @@ namespace pbft channels, nodes, append_entries_index, - latest_stable_ae_index, - lock); + latest_stable_ae_index); pbft_config = std::make_unique(rpc_map); auto used_bytes = Byz_init_replica( @@ -474,9 +467,8 @@ namespace pbft { for (auto& [index, data, globally_committable] : entries) { - std::lock_guard guard(lock); append_entries_index++; - LOG_INFO_FMT("Increasing my ae index to {}", append_entries_index); + LOG_TRACE_FMT("Increasing my ae index to {}", append_entries_index); write_to_ledger(data); } return true; @@ -486,9 +478,8 @@ namespace pbft { for (auto& [index, data, globally_committable] : entries) { - std::lock_guard guard(lock); append_entries_index++; - LOG_INFO_FMT("Increasing my ae index to {}", append_entries_index); + LOG_TRACE_FMT("Increasing my ae index to {}", append_entries_index); write_to_ledger(data); } return true; @@ -506,7 +497,6 @@ namespace pbft } case pbft_append_entries: { - std::lock_guard guard(playback_lock); AppendEntries r; try @@ -520,8 +510,8 @@ namespace pbft return; } - LOG_INFO_FMT( - "New append entries message from {}, my ae index is {}", + LOG_TRACE_FMT( + "Append entries message from {}, my ae index is {}", r.from_node, append_entries_index); @@ -537,8 +527,8 @@ namespace pbft if (r.idx <= append_entries_index) { - LOG_INFO_FMT( - "Skipping INDEX {} as we are at index {}", + LOG_TRACE_FMT( + "Skipping append entries msg for index {} as we are at index {}", r.idx, append_entries_index); break; @@ -546,18 +536,16 @@ namespace pbft for (Index i = r.prev_idx + 1; i <= r.idx; i++) { - LOG_INFO_FMT( - "RECORDING ENTRY FOR INDEX {} FOR DATA WITH SIZE {}", i, size); + LOG_TRACE_FMT("Recording entry for index {}", i); pbft::Index aei; { - std::lock_guard guard(lock); aei = append_entries_index; } if (i <= aei) { // If the current entry has already been deserialised, skip the // payload for that entry - LOG_INFO_FMT("Skipping INDEX {} as we are at index {}", i, aei); + LOG_INFO_FMT("Skipping index {} as we are at index {}", i, aei); ledger->skip_entry(data, size); continue; } @@ -574,7 +562,6 @@ namespace pbft local_id, r.from_node); { - std::lock_guard guard(lock); append_entries_index = r.prev_idx; } ledger->truncate(r.prev_idx); From 9695ec2e48889befab4594b7050d0330453b4ad4 Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Fri, 24 Jan 2020 14:14:43 +0000 Subject: [PATCH 15/42] check principal is configured before accepting certificate --- src/consensus/pbft/libbyz/Append_entries.cpp | 4 ++-- src/consensus/pbft/libbyz/Certificate.h | 18 +++++++++--------- src/consensus/pbft/libbyz/Status.cpp | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/consensus/pbft/libbyz/Append_entries.cpp b/src/consensus/pbft/libbyz/Append_entries.cpp index 606dcb44ce13..c576278fca63 100644 --- a/src/consensus/pbft/libbyz/Append_entries.cpp +++ b/src/consensus/pbft/libbyz/Append_entries.cpp @@ -8,13 +8,13 @@ #include "ds/logger.h" #include "pbft_assert.h" -Append_entries::Append_entries(): +Append_entries::Append_entries() : Message(Append_entries_tag, sizeof(Append_entries_rep)) {} bool Append_entries::verify() { - return true; + return true; } bool Append_entries::convert(Message* m1, Append_entries*& m2) diff --git a/src/consensus/pbft/libbyz/Certificate.h b/src/consensus/pbft/libbyz/Certificate.h index 625c5ba1d428..472ccf0c851e 100644 --- a/src/consensus/pbft/libbyz/Certificate.h +++ b/src/consensus/pbft/libbyz/Certificate.h @@ -347,15 +347,15 @@ void Certificate::reset_f() template bool Certificate::add(T* m) { - // auto principal = pbft::GlobalState::get_node().get_principal(m->id()); - // if (!principal) - // { - // LOG_TRACE_FMT( - // "Principal with id {} has not been configured yet, rejecting the message", - // m->id()); - // delete m; - // return false; - // } + auto principal = pbft::GlobalState::get_node().get_principal(m->id()); + if (!principal) + { + LOG_TRACE_FMT( + "Principal with id {} has not been configured yet, rejecting the message", + m->id()); + delete m; + return false; + } if (bmap.none() && f != pbft::GlobalState::get_node().f()) { diff --git a/src/consensus/pbft/libbyz/Status.cpp b/src/consensus/pbft/libbyz/Status.cpp index 5ecb705fdefa..979928029570 100644 --- a/src/consensus/pbft/libbyz/Status.cpp +++ b/src/consensus/pbft/libbyz/Status.cpp @@ -5,8 +5,8 @@ #include "Status.h" -#include "Message_tags.h" #include "Append_entries.h" +#include "Message_tags.h" #include "Node.h" #include "Principal.h" #include "pbft_assert.h" From 0ed2bd13c46c6a1e583cad462fa5e2f81a1b3c44 Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Fri, 24 Jan 2020 14:49:35 +0000 Subject: [PATCH 16/42] killing backups and making sure new nodes still make progress --- tests/infra/node.py | 2 +- tests/node_suspension.py | 62 ++++++++++++++++++++++++++++------------ 2 files changed, 45 insertions(+), 19 deletions(-) diff --git a/tests/infra/node.py b/tests/infra/node.py index 70b7e7f4b74a..de2df8951906 100644 --- a/tests/infra/node.py +++ b/tests/infra/node.py @@ -229,7 +229,7 @@ def member_client(self, format="msgpack", member_id=1, **kwargs): def suspend(self): self.remote.suspend() - + def resume(self): self.remote.resume() diff --git a/tests/node_suspension.py b/tests/node_suspension.py index 59aab5b527eb..9f6525d5f012 100644 --- a/tests/node_suspension.py +++ b/tests/node_suspension.py @@ -46,6 +46,11 @@ def run(args): term_info = {} long_msg = "X" * (2 ** 14) + nodes_to_kill = [] + for b in backups: + nodes_to_kill.append(b) + nodes_to_keep = [first_node] + # first timer determines after how many seconds each node will be suspended if not args.skip_suspension: timeouts = [] @@ -73,13 +78,16 @@ def run(args): clients.append(es.enter_context(backup.user_client(format="json"))) node_id = 0 for id in range(1, TOTAL_REQUESTS): - if id == 5: - LOG.error("Adding another node after f = 0 but before any node has checkpointed") + if id == 1: + LOG.info( + "Adding another node after f = 0 but before we need to send append entries" + ) # check that a new node can catch up naturally new_node = network.create_and_trust_node( lib_name=args.package, host="localhost", args=args, ) assert new_node + nodes_to_keep.append(new_node) node_id += 1 c = clients[node_id % len(clients)] try: @@ -94,18 +102,20 @@ def run(args): id += 1 # wait for the last request to commit - final_msg = "Hello world!" + first_catchup_msg = "Hello world!" check_commit( - c.rpc("LOG_record", {"id": 1000, "msg": final_msg}), result=True, + c.rpc("LOG_record", {"id": 1000, "msg": first_catchup_msg}), + result=True, ) check( - c.rpc("LOG_get", {"id": 1000}), result={"msg": final_msg}, + c.rpc("LOG_get", {"id": 1000}), result={"msg": first_catchup_msg}, ) # check that new node has caught up ok with new_node.user_client(format="json") as c: check( - c.rpc("LOG_get", {"id": 1000}), result={"msg": final_msg}, + c.rpc("LOG_get", {"id": 1000}), + result={"msg": first_catchup_msg}, ) # add new node to backups list backups.append(new_node) @@ -115,9 +125,7 @@ def run(args): lib_name=args.package, host="localhost", args=args, ) assert last_node - - # # give new_node a second to catch up - # time.sleep(1) + nodes_to_keep.append(last_node) ## send more messages I want to check that the new node will catchup and then start processing pre prepares, etc clients = [] @@ -141,26 +149,42 @@ def run(args): LOG.info("Trying to access a suspended node") id += 1 - ## send final final message # wait for the last request to commit - final_msg2 = "Hello world Hello!" + second_catchup_msg = "Hello world Hello!" check_commit( - c.rpc("LOG_record", {"id": 2000, "msg": final_msg2}), result=True, + c.rpc("LOG_record", {"id": 2000, "msg": second_catchup_msg}), + result=True, ) check( - c.rpc("LOG_get", {"id": 2000}), result={"msg": final_msg2}, + c.rpc("LOG_get", {"id": 2000}), result={"msg": second_catchup_msg}, ) with last_node.user_client(format="json") as c: check( - c.rpc("LOG_get", {"id": 1000}), result={"msg": final_msg}, + c.rpc("LOG_get", {"id": 1000}), result={"msg": first_catchup_msg}, ) with last_node.user_client(format="json") as c: check( - c.rpc("LOG_get", {"id": 2000}), result={"msg": final_msg2}, + c.rpc("LOG_get", {"id": 2000}), result={"msg": second_catchup_msg}, ) - - # all the nodes should be caught up by now + + # replace the 2 backups with the 2 new nodes, kill the old ones and ensure we are still making progress + for node in nodes_to_kill: + LOG.info(f"Stopping node {node.node_id}") + node.stop() + + for i, node in enumerate(nodes_to_keep): + with node.user_client(format="json") as c: + final_msg = "Goodby world!" + check_commit( + c.rpc("LOG_record", {"id": 3000 + i, "msg": final_msg}), + result=True, + ) + check( + c.rpc("LOG_get", {"id": 3000 + i}), result={"msg": final_msg}, + ) + + # all the nodes should be caught up by now if not args.skip_suspension: # assert that view changes actually did occur @@ -172,12 +196,14 @@ def run(args): if __name__ == "__main__": + def add(parser): parser.add_argument( "--skip-suspension", help="Don't suspend any nodes (i.e. just do late join)", - action="store_true" + action="store_true", ) + args = e2e_args.cli_args(add) args.package = args.app_script and "libluagenericenc" or "libloggingenc" From 8e18dcf51509f08dd4e90591de44829d97ea77b9 Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Fri, 24 Jan 2020 14:56:44 +0000 Subject: [PATCH 17/42] rename node_suspension to late_joiners and add to CI --- cmake/pbft.cmake | 6 ++++++ tests/{node_suspension.py => late_joiners.py} | 0 2 files changed, 6 insertions(+) rename tests/{node_suspension.py => late_joiners.py} (100%) diff --git a/cmake/pbft.cmake b/cmake/pbft.cmake index 676273c34f7d..1dd0e1bcf36a 100644 --- a/cmake/pbft.cmake +++ b/cmake/pbft.cmake @@ -78,6 +78,12 @@ if (NOT HTTP) NAME end_to_end_pbft PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_logging_pbft.py ) + + add_e2e_test( + NAME late_joiners + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/late_joiners.py + ADDITIONAL_ARGS --skip-suspension + ) endif() if("virtual" IN_LIST TARGET) diff --git a/tests/node_suspension.py b/tests/late_joiners.py similarity index 100% rename from tests/node_suspension.py rename to tests/late_joiners.py From 99aab087fa4637b954a116ac16db1f094d9f16ef Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Mon, 27 Jan 2020 11:02:09 +0000 Subject: [PATCH 18/42] cleanup --- cmake/pbft.cmake | 1 + src/consensus/pbft/libbyz/Append_entries.h | 2 +- src/consensus/pbft/libbyz/Principal.h | 2 - src/consensus/pbft/libbyz/Replica.cpp | 2 +- src/consensus/pbft/pbft.h | 31 ++-- src/consensus/pbft/pbftrequests.h | 3 - src/enclave/enclavetypes.h | 158 --------------------- src/node/rpc/memberfrontend.h | 2 - 8 files changed, 18 insertions(+), 183 deletions(-) delete mode 100644 src/enclave/enclavetypes.h diff --git a/cmake/pbft.cmake b/cmake/pbft.cmake index 8cc1ca1cc3de..0a51db9858ec 100644 --- a/cmake/pbft.cmake +++ b/cmake/pbft.cmake @@ -118,6 +118,7 @@ if("virtual" IN_LIST TARGET) ${EVERCRYPT_INC} ) + add_dependencies(libcommontest.mock flatbuffers) target_compile_options(libcommontest.mock PRIVATE -stdlib=libc++) function(use_libbyz name) diff --git a/src/consensus/pbft/libbyz/Append_entries.h b/src/consensus/pbft/libbyz/Append_entries.h index 0d2e010c07cf..64e5f386c818 100644 --- a/src/consensus/pbft/libbyz/Append_entries.h +++ b/src/consensus/pbft/libbyz/Append_entries.h @@ -8,7 +8,7 @@ #include "nodeinfo.h" // -// New principal messages have the following format: +// Append entries messages have the following format: // #pragma pack(push) #pragma pack(1) diff --git a/src/consensus/pbft/libbyz/Principal.h b/src/consensus/pbft/libbyz/Principal.h index 10105705f5f3..54c84d7007ac 100644 --- a/src/consensus/pbft/libbyz/Principal.h +++ b/src/consensus/pbft/libbyz/Principal.h @@ -71,8 +71,6 @@ class Principal : public IPrincipal void set_certificate(const std::vector& cert_); bool has_certificate_set(); - Index get_last_ae_sent() const; - void set_last_ae_sent(Index ae); private: int id; diff --git a/src/consensus/pbft/libbyz/Replica.cpp b/src/consensus/pbft/libbyz/Replica.cpp index 683d9e0eb891..30ac42505699 100644 --- a/src/consensus/pbft/libbyz/Replica.cpp +++ b/src/consensus/pbft/libbyz/Replica.cpp @@ -313,7 +313,7 @@ void Replica::playback_transaction(ccf::Store::Tx& tx) void Replica::playback_request(const pbft::Request& request, ccf::Store::Tx& tx) { - LOG_INFO_FMT( + LOG_TRACE_FMT( "Playback request for request with size {}", request.pbft_raw.size()); auto req = create_message(request.pbft_raw.data(), request.pbft_raw.size()); diff --git a/src/consensus/pbft/pbft.h b/src/consensus/pbft/pbft.h index b4427c8564b5..7ab869a34207 100644 --- a/src/consensus/pbft/pbft.h +++ b/src/consensus/pbft/pbft.h @@ -28,16 +28,16 @@ namespace pbft { // maps node to last sent index to that node - using NodesMap = std::unordered_map; + using NodesMap = std::unordered_map; class PbftEnclaveNetwork : public INetwork { public: PbftEnclaveNetwork( - pbft::NodeId id, + NodeId id, std::shared_ptr n2n_channels, NodesMap& nodes_, - pbft::Index& append_entries_index_, - pbft::Index& latest_stable_ae_index_) : + Index& append_entries_index_, + Index& latest_stable_ae_index_) : n2n_channels(n2n_channels), id(id), nodes(nodes_), @@ -59,7 +59,7 @@ namespace pbft int Send(Message* msg, IPrincipal& principal) override { - pbft::NodeId to = principal.pid(); + NodeId to = principal.pid(); if (to == id) { // If a replica sends a message to itself (e.g. if f == 0), handle @@ -86,7 +86,7 @@ namespace pbft { auto node = nodes.find(to); - pbft::Index match_idx = 0; + Index match_idx = 0; if (node != nodes.end()) { match_idx = node->second; @@ -103,15 +103,15 @@ namespace pbft return msg->size(); } - void send_append_entries(pbft::NodeId to, pbft::Index start_idx) + void send_append_entries(NodeId to, Index start_idx) { size_t entries_batch_size = 10; - pbft::Index end_idx = (latest_stable_ae_index == 0) ? + Index end_idx = (latest_stable_ae_index == 0) ? 0 : std::min(start_idx + entries_batch_size, latest_stable_ae_index); - for (pbft::Index i = end_idx; i < latest_stable_ae_index; + for (Index i = end_idx; i < latest_stable_ae_index; i += entries_batch_size) { send_append_entries_range(to, start_idx, i); @@ -124,8 +124,7 @@ namespace pbft } } - void send_append_entries_range( - pbft::NodeId to, pbft::Index start_idx, pbft::Index end_idx) + void send_append_entries_range(NodeId to, Index start_idx, Index end_idx) { const auto prev_idx = start_idx - 1; @@ -169,8 +168,8 @@ namespace pbft IMessageReceiveBase* message_receiver_base = nullptr; NodeId id; NodesMap& nodes; - pbft::Index& append_entries_index; - pbft::Index& latest_stable_ae_index; + Index& append_entries_index; + Index& latest_stable_ae_index; }; template @@ -220,8 +219,8 @@ namespace pbft struct register_mark_stable_info { - pbft::Index* append_entries_idx; - pbft::Index* latest_stable_ae_idx; + Index* append_entries_idx; + Index* latest_stable_ae_idx; } register_mark_stable_ctx; public: @@ -535,7 +534,7 @@ namespace pbft for (Index i = r.prev_idx + 1; i <= r.idx; i++) { LOG_TRACE_FMT("Recording entry for index {}", i); - pbft::Index aei; + Index aei; { aei = append_entries_index; } diff --git a/src/consensus/pbft/pbftrequests.h b/src/consensus/pbft/pbftrequests.h index a876ac74f2fd..ff42cda23c9d 100644 --- a/src/consensus/pbft/pbftrequests.h +++ b/src/consensus/pbft/pbftrequests.h @@ -67,12 +67,9 @@ namespace pbft } auto raw_size = serialized::read(data_, size_); raw = serialized::read(data_, size_, raw_size); - LOG_INFO_FMT("getting pbft raw size"); auto pbft_raw_size = serialized::read(data_, size_); - LOG_INFO_FMT("pbft raw size {}", pbft_raw_size); if (pbft_raw_size > 0) { - LOG_INFO_FMT("hmmmm"); pbft_raw = serialized::read(data_, size_, pbft_raw_size); } else diff --git a/src/enclave/enclavetypes.h b/src/enclave/enclavetypes.h deleted file mode 100644 index 4ed393379b8e..000000000000 --- a/src/enclave/enclavetypes.h +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the Apache 2.0 License. -#pragma once - -#include "node/clientsignatures.h" -#include "node/entities.h" -#include "node/rpc/jsonrpc.h" - -#include - -namespace enclave -{ - static constexpr size_t InvalidSessionId = std::numeric_limits::max(); - - struct SessionContext - { - size_t client_session_id = InvalidSessionId; - std::vector caller_cert = {}; - - // - // Only set in the case of a forwarded RPC - // - struct Forwarded - { - // Initialised when forwarded context is created - const size_t client_session_id; - const ccf::CallerId caller_id; - - Forwarded(size_t client_session_id_, ccf::CallerId caller_id_) : - client_session_id(client_session_id_), - caller_id(caller_id_) - {} - }; - std::optional fwd = std::nullopt; - - // Constructor used for non-forwarded RPC - SessionContext( - size_t client_session_id_, const std::vector& caller_cert_) : - client_session_id(client_session_id_), - caller_cert(caller_cert_) - {} - - // Constructor used for forwarded and PBFT RPC - SessionContext( - size_t fwd_session_id_, - ccf::CallerId caller_id_, - const std::vector& caller_cert_ = {}) : - fwd(std::make_optional(fwd_session_id_, caller_id_)), - caller_cert(caller_cert_) - {} - }; - - struct RPCContext - { - SessionContext session; - - // Packing format of original request, should be used to pack response - std::optional pack = std::nullopt; - - // TODO: Avoid unnecessary copies - std::vector raw = {}; - - // raw pbft Request - // TODO see if you can get raw from pbft_raw and if raw is used in raft or - // not - std::vector pbft_raw = {}; - - nlohmann::json unpacked_rpc = {}; - - std::optional signed_request = std::nullopt; - - // Actor type to dispatch to appropriate frontend - ccf::ActorsType actor = ccf::ActorsType::unknown; - - // Method indicates specific handler for this request - std::string method = {}; - - uint64_t seq_no = {}; - - nlohmann::json params = {}; - - bool is_create_request = false; - - RPCContext(const SessionContext& s) : session(s) {} - }; - - inline void parse_rpc_context(RPCContext& rpc_ctx, const nlohmann::json& rpc) - { - const auto sig_it = rpc.find(jsonrpc::SIG); - if (sig_it != rpc.end()) - { - rpc_ctx.unpacked_rpc = rpc.at(jsonrpc::REQ); - ccf::SignedReq signed_req; - signed_req.sig = sig_it->get(); - signed_req.req = nlohmann::json::to_msgpack(rpc_ctx.unpacked_rpc); - rpc_ctx.signed_request = signed_req; - } - else - { - rpc_ctx.unpacked_rpc = rpc; - rpc_ctx.signed_request = std::nullopt; - } - - const auto method_it = rpc_ctx.unpacked_rpc.find(jsonrpc::METHOD); - if (method_it != rpc_ctx.unpacked_rpc.end()) - { - rpc_ctx.method = method_it->get(); - } - - const auto seq_it = rpc_ctx.unpacked_rpc.find(jsonrpc::ID); - if (seq_it != rpc_ctx.unpacked_rpc.end()) - { - rpc_ctx.seq_no = seq_it->get(); - } - - const auto params_it = rpc_ctx.unpacked_rpc.find(jsonrpc::PARAMS); - if (params_it != rpc_ctx.unpacked_rpc.end()) - { - rpc_ctx.params = *params_it; - } - } - - inline RPCContext make_rpc_context( - const SessionContext& s, const std::vector& packed) - { - RPCContext rpc_ctx(s); - - auto [success, rpc] = jsonrpc::unpack_rpc(packed, rpc_ctx.pack); - if (!success) - { - throw std::logic_error(fmt::format("Failed to unpack: {}", rpc.dump())); - } - - parse_rpc_context(rpc_ctx, rpc); - rpc_ctx.raw = packed; - - return rpc_ctx; - } - - class AbstractRPCResponder - { - public: - virtual ~AbstractRPCResponder() {} - virtual bool reply_async(size_t id, const std::vector& data) = 0; - }; - - class AbstractForwarder - { - public: - virtual ~AbstractForwarder() {} - - virtual bool forward_command( - const enclave::RPCContext& rpc_ctx, - ccf::NodeId to, - ccf::CallerId caller_id, - const std::vector& caller_cert) = 0; - }; -} \ No newline at end of file diff --git a/src/node/rpc/memberfrontend.h b/src/node/rpc/memberfrontend.h index 0153c5431553..feb44ed7ba20 100644 --- a/src/node/rpc/memberfrontend.h +++ b/src/node/rpc/memberfrontend.h @@ -521,8 +521,6 @@ namespace ccf return; } - LOG_INFO_FMT("Creating service"); - g.init_values(); for (auto& cert : in.member_cert) { From 973be035c593ffedf98a1d52d040653f145d16b4 Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Mon, 27 Jan 2020 12:06:38 +0000 Subject: [PATCH 19/42] remove un-used type --- src/consensus/pbft/pbfttypes.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/consensus/pbft/pbfttypes.h b/src/consensus/pbft/pbfttypes.h index 658118de5e38..9ea98452e6f3 100644 --- a/src/consensus/pbft/pbfttypes.h +++ b/src/consensus/pbft/pbfttypes.h @@ -13,7 +13,6 @@ namespace pbft using NodeId = uint64_t; using Node2NodeMsg = uint64_t; using CallerId = uint64_t; - using RequestId = uint64_t; enum PbftMsgType : Node2NodeMsg { From ef822daaf5e4d06ca071e4f4594046e3a89d5b60 Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Tue, 28 Jan 2020 17:33:58 +0000 Subject: [PATCH 20/42] actually use the transaction on playback --- src/consensus/pbft/libbyz/Prepared_cert.h | 2 +- src/consensus/pbft/libbyz/Replica.cpp | 18 ++++++++++++++---- src/consensus/pbft/libbyz/Replica.h | 2 ++ 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/consensus/pbft/libbyz/Prepared_cert.h b/src/consensus/pbft/libbyz/Prepared_cert.h index ce9ac9cf44a2..0212f1b2a69d 100644 --- a/src/consensus/pbft/libbyz/Prepared_cert.h +++ b/src/consensus/pbft/libbyz/Prepared_cert.h @@ -145,7 +145,7 @@ inline bool Prepared_cert::add(Prepare* m) if (!principal) { LOG_INFO_FMT( - "Returning false from prepared cert, probably need to delete this"); + "Principal with id {} has not been configured yet, rejecting prepare", id); delete m; return false; } diff --git a/src/consensus/pbft/libbyz/Replica.cpp b/src/consensus/pbft/libbyz/Replica.cpp index a7b8ad4006b0..a90e761f0c8c 100644 --- a/src/consensus/pbft/libbyz/Replica.cpp +++ b/src/consensus/pbft/libbyz/Replica.cpp @@ -411,7 +411,13 @@ void Replica::playback_request(const pbft::Request& request, ccf::Store::Tx& tx) waiting_for_playback_pp = true; Byz_buffer non_det; execute_tentative_request( - *req, playback_byz_info, playback_max_local_commit_value, non_det); + *req, + playback_byz_info, + playback_max_local_commit_value, + non_det, + nullptr, + &tx, + true); } void Replica::playback_pre_prepare( @@ -441,7 +447,7 @@ void Replica::playback_pre_prepare( last_prepared = seqno; } - ledger_writer->write_pre_prepare(executable_pp.get()); + ledger_writer->write_pre_prepare(executable_pp.get(), tx); if (global_commit_cb != nullptr && executable_pp->is_signed()) { @@ -2119,6 +2125,8 @@ void Replica::execute_tentative_request( int64_t& max_local_commit_value, Byz_buffer& non_det, char* nondet_choices, + ccf::Store::Tx* tx, + bool playback, Seqno seqno) { int client_id = request.client_id(); @@ -2154,8 +2162,8 @@ void Replica::execute_tentative_request( request.contents_size(), replies.total_requests_processed(), info, - false, - nullptr); + playback, + tx); right_pad_contents(outb); // Finish constructing the reply. LOG_DEBUG_FMT( @@ -2205,6 +2213,8 @@ bool Replica::execute_tentative(Pre_prepare* pp, ByzInfo& info) max_local_commit_value, non_det, pp->choices(non_det.size), + nullptr, + false, pp->seqno()); } LOG_DEBUG_FMT( diff --git a/src/consensus/pbft/libbyz/Replica.h b/src/consensus/pbft/libbyz/Replica.h index 25a0c45c6aa6..f52adedfec3a 100644 --- a/src/consensus/pbft/libbyz/Replica.h +++ b/src/consensus/pbft/libbyz/Replica.h @@ -308,6 +308,8 @@ class Replica : public Node, public IMessageReceiveBase int64_t& max_local_commit_value, Byz_buffer& non_det, char* nondet_choices = nullptr, + ccf::Store::Tx* tx = nullptr, + bool playback = false, Seqno seqno = -1); // Effects: called by execute_tentative or playback_request to execute the // request. seqno == -1 means we are running it from playback From 1451a10fe2ff941d6ce5648a054e2b0f6d57c8d1 Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Tue, 28 Jan 2020 17:43:43 +0000 Subject: [PATCH 21/42] replacing append_entries_index with store->current_version() as they are equivalent --- src/consensus/pbft/pbft.h | 37 ++++++++++++++----------------------- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/src/consensus/pbft/pbft.h b/src/consensus/pbft/pbft.h index 7ab869a34207..a3bd7da238f8 100644 --- a/src/consensus/pbft/pbft.h +++ b/src/consensus/pbft/pbft.h @@ -36,12 +36,10 @@ namespace pbft NodeId id, std::shared_ptr n2n_channels, NodesMap& nodes_, - Index& append_entries_index_, Index& latest_stable_ae_index_) : n2n_channels(n2n_channels), id(id), nodes(nodes_), - append_entries_index(append_entries_index_), latest_stable_ae_index(latest_stable_ae_index_) {} @@ -168,7 +166,6 @@ namespace pbft IMessageReceiveBase* message_receiver_base = nullptr; NodeId id; NodesMap& nodes; - Index& append_entries_index; Index& latest_stable_ae_index; }; @@ -189,7 +186,6 @@ namespace pbft View last_commit_view; std::unique_ptr store; std::unique_ptr ledger; - Index append_entries_index = 0; Index latest_stable_ae_index = 0; // When this is set, only public domain is deserialised when receving append @@ -219,7 +215,7 @@ namespace pbft struct register_mark_stable_info { - Index* append_entries_idx; + pbft::PbftStore* store; Index* latest_stable_ae_idx; } register_mark_stable_ctx; @@ -274,11 +270,7 @@ namespace pbft bzero(mem, mem_size); pbft_network = std::make_unique( - local_id, - channels, - nodes, - append_entries_index, - latest_stable_ae_index); + local_id, channels, nodes, latest_stable_ae_index); pbft_config = std::make_unique(rpc_map); auto used_bytes = Byz_init_replica( @@ -316,12 +308,12 @@ namespace pbft auto mark_stable_cb = [](void* ctx) { auto ms_ctx = static_cast(ctx); - *ms_ctx->latest_stable_ae_idx = *ms_ctx->append_entries_idx; - LOG_INFO_FMT( + *ms_ctx->latest_stable_ae_idx = ms_ctx->store->current_version(); + LOG_TRACE_FMT( "latest_stable_ae_index is set to {}", *ms_ctx->latest_stable_ae_idx); }; - register_mark_stable_ctx.append_entries_idx = &append_entries_index; + register_mark_stable_ctx.store = store.get(); register_mark_stable_ctx.latest_stable_ae_idx = &latest_stable_ae_index; message_receiver_base->register_mark_stable( @@ -464,8 +456,6 @@ namespace pbft { for (auto& [index, data, globally_committable] : entries) { - append_entries_index++; - LOG_TRACE_FMT("Increasing my ae index to {}", append_entries_index); write_to_ledger(data); } return true; @@ -475,8 +465,6 @@ namespace pbft { for (auto& [index, data, globally_committable] : entries) { - append_entries_index++; - LOG_TRACE_FMT("Increasing my ae index to {}", append_entries_index); write_to_ledger(data); } return true; @@ -496,6 +484,8 @@ namespace pbft { AppendEntries r; + auto append_entries_index = store->current_version(); + try { r = @@ -533,16 +523,17 @@ namespace pbft for (Index i = r.prev_idx + 1; i <= r.idx; i++) { + append_entries_index = store->current_version(); LOG_TRACE_FMT("Recording entry for index {}", i); - Index aei; - { - aei = append_entries_index; - } - if (i <= aei) + + if (i <= append_entries_index) { // If the current entry has already been deserialised, skip the // payload for that entry - LOG_INFO_FMT("Skipping index {} as we are at index {}", i, aei); + LOG_INFO_FMT( + "Skipping index {} as we are at index {}", + i, + append_entries_index); ledger->skip_entry(data, size); continue; } From a724888eb8643f2d139c138b458074db062ebbe6 Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Tue, 28 Jan 2020 17:44:47 +0000 Subject: [PATCH 22/42] remove un-used --- src/consensus/pbft/pbft.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/consensus/pbft/pbft.h b/src/consensus/pbft/pbft.h index a3bd7da238f8..2194eaa8bce5 100644 --- a/src/consensus/pbft/pbft.h +++ b/src/consensus/pbft/pbft.h @@ -549,9 +549,6 @@ namespace pbft "Recv append entries to {} from {} but the data is malformed", local_id, r.from_node); - { - append_entries_index = r.prev_idx; - } ledger->truncate(r.prev_idx); return; } From 91d54f8665d56b077f192dc835b1196b041fb189 Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Wed, 29 Jan 2020 13:28:38 +0000 Subject: [PATCH 23/42] addressing some PR comments --- cmake/pbft.cmake | 5 +++++ tests/late_joiners.py | 24 +++++++++++------------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/cmake/pbft.cmake b/cmake/pbft.cmake index aee2c2fa63be..d1cb606732b0 100644 --- a/cmake/pbft.cmake +++ b/cmake/pbft.cmake @@ -5,6 +5,11 @@ add_definitions(-DSIGN_BATCH) set(SIGN_BATCH ON) +if(SAN) + add_definitions(-DUSE_STD_MALLOC) + set(USE_STD_MALLOC ON) +endif() + set(PBFT_SRC ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/globalstate.cpp ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Client.cpp diff --git a/tests/late_joiners.py b/tests/late_joiners.py index 9f6525d5f012..8dbde99a77f1 100644 --- a/tests/late_joiners.py +++ b/tests/late_joiners.py @@ -46,7 +46,7 @@ def run(args): term_info = {} long_msg = "X" * (2 ** 14) - nodes_to_kill = [] + nodes_to_kill = [backups[0]] for b in backups: nodes_to_kill.append(b) nodes_to_keep = [first_node] @@ -66,6 +66,14 @@ def run(args): tm = Timer(t, timeout, args=[node, True, args.election_timeout / 1000],) tm.start() + LOG.info("Adding another node after f = 0 but before we need to send append entries") + # check that a new node can catch up naturally + new_node = network.create_and_trust_node( + lib_name=args.package, host="localhost", args=args, + ) + assert new_node + nodes_to_keep.append(new_node) + with first_node.node_client() as mc: check_commit = infra.checker.Checker(mc) check = infra.checker.Checker() @@ -78,16 +86,6 @@ def run(args): clients.append(es.enter_context(backup.user_client(format="json"))) node_id = 0 for id in range(1, TOTAL_REQUESTS): - if id == 1: - LOG.info( - "Adding another node after f = 0 but before we need to send append entries" - ) - # check that a new node can catch up naturally - new_node = network.create_and_trust_node( - lib_name=args.package, host="localhost", args=args, - ) - assert new_node - nodes_to_keep.append(new_node) node_id += 1 c = clients[node_id % len(clients)] try: @@ -175,7 +173,7 @@ def run(args): for i, node in enumerate(nodes_to_keep): with node.user_client(format="json") as c: - final_msg = "Goodby world!" + final_msg = "Goodbye world!" check_commit( c.rpc("LOG_record", {"id": 3000 + i, "msg": final_msg}), result=True, @@ -184,7 +182,7 @@ def run(args): c.rpc("LOG_get", {"id": 3000 + i}), result={"msg": final_msg}, ) - # all the nodes should be caught up by now + # we have asserted that all nodes are caught up if not args.skip_suspension: # assert that view changes actually did occur From 6c7b48dd59d5e2dd61eb7dc68f178df138fcef95 Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Wed, 29 Jan 2020 13:29:57 +0000 Subject: [PATCH 24/42] trying to strip out an AppendEntriesIndex for both AppendEntries --- src/consensus/consensustypes.h | 12 ++++++++++++ src/consensus/pbft/pbft.h | 2 +- src/consensus/pbft/pbfttypes.h | 9 +++------ src/consensus/raft/raft.h | 2 +- src/consensus/raft/rafttypes.h | 5 ++--- src/host/nodeconnections.h | 31 ++++++++----------------------- 6 files changed, 27 insertions(+), 34 deletions(-) diff --git a/src/consensus/consensustypes.h b/src/consensus/consensustypes.h index 4ae22ac052f3..5890de0eabd9 100644 --- a/src/consensus/consensustypes.h +++ b/src/consensus/consensustypes.h @@ -8,4 +8,16 @@ namespace ccf using ObjectId = uint64_t; using NodeId = ObjectId; using Index = int64_t; +} + +namespace consensus +{ +#pragma pack(push, 1) + struct AppendEntriesIndex + { + ccf::Index idx; + ccf::Index prev_idx; + }; + +#pragma pack(pop) } \ No newline at end of file diff --git a/src/consensus/pbft/pbft.h b/src/consensus/pbft/pbft.h index 2194eaa8bce5..1732873ed7c9 100644 --- a/src/consensus/pbft/pbft.h +++ b/src/consensus/pbft/pbft.h @@ -103,7 +103,7 @@ namespace pbft void send_append_entries(NodeId to, Index start_idx) { - size_t entries_batch_size = 10; + Index entries_batch_size = 10; Index end_idx = (latest_stable_ae_index == 0) ? 0 : diff --git a/src/consensus/pbft/pbfttypes.h b/src/consensus/pbft/pbfttypes.h index 9ea98452e6f3..cec76141a69a 100644 --- a/src/consensus/pbft/pbfttypes.h +++ b/src/consensus/pbft/pbfttypes.h @@ -8,7 +8,7 @@ namespace pbft { - using Index = uint64_t; + using Index = int64_t; using Term = uint64_t; using NodeId = uint64_t; using Node2NodeMsg = uint64_t; @@ -27,11 +27,8 @@ namespace pbft NodeId from_node; }; - struct AppendEntries : PbftHeader - { - Index idx; - Index prev_idx; - }; + struct AppendEntries : PbftHeader, consensus::AppendEntriesIndex + {}; #pragma pack(pop) diff --git a/src/consensus/raft/raft.h b/src/consensus/raft/raft.h index a290363137cc..fd8323ea08a0 100644 --- a/src/consensus/raft/raft.h +++ b/src/consensus/raft/raft.h @@ -478,8 +478,8 @@ namespace raft AppendEntries ae = {raft_append_entries, local_id, end_idx, - current_term, prev_idx, + current_term, prev_term, commit_idx, term_of_idx}; diff --git a/src/consensus/raft/rafttypes.h b/src/consensus/raft/rafttypes.h index e4ea6e002de3..5bd0b77dd722 100644 --- a/src/consensus/raft/rafttypes.h +++ b/src/consensus/raft/rafttypes.h @@ -2,6 +2,7 @@ // Licensed under the Apache 2.0 License. #pragma once +#include "consensus/consensustypes.h" #include "ds/ringbuffer_types.h" #include @@ -89,11 +90,9 @@ namespace raft NodeId from_node; }; - struct AppendEntries : RaftHeader + struct AppendEntries : RaftHeader, consensus::AppendEntriesIndex { - Index idx; Term term; - Index prev_idx; Term prev_term; Index leader_commit_idx; Term term_of_idx; diff --git a/src/host/nodeconnections.h b/src/host/nodeconnections.h index 2196963f2c8c..3a17a9600ad1 100644 --- a/src/host/nodeconnections.h +++ b/src/host/nodeconnections.h @@ -229,38 +229,23 @@ namespace asynchost auto p = data; auto psize = size; - ccf::Index idx; - ccf::Index prev_idx; - if ( - serialized::peek(data, size) == - raft::raft_append_entries) - { - const auto& ae = - serialized::overlay(p, psize); - idx = ae.idx; - prev_idx = ae.prev_idx; - } - else - { - const auto& ae = - serialized::overlay(p, psize); - idx = ae.idx; - prev_idx = ae.prev_idx; - } - + const auto& ae = + serialized::overlay(p, psize); // Find the total frame size, and write it along with the header. - auto count = idx - prev_idx; + auto count = ae.idx - ae.prev_idx; uint32_t frame = (uint32_t)( - size_to_send + ledger.framed_entries_size(prev_idx + 1, idx)); + size_to_send + + ledger.framed_entries_size(ae.prev_idx + 1, ae.idx)); LOG_DEBUG_FMT( - "send AE to {} [{}]: {}, {}", to, frame, idx, prev_idx); + "send AE to {} [{}]: {}, {}", to, frame, ae.idx, ae.prev_idx); // TODO(#performance): writev node.value()->write(sizeof(uint32_t), (uint8_t*)&frame); node.value()->write(size_to_send, data_to_send); - auto framed_entries = ledger.read_framed_entries(prev_idx + 1, idx); + auto framed_entries = + ledger.read_framed_entries(ae.prev_idx + 1, ae.idx); frame = (uint32_t)framed_entries.size(); node.value()->write(frame, framed_entries.data()); } From aea1bc21202cf701988df02976fb20048e7e1aaf Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Wed, 29 Jan 2020 13:41:06 +0000 Subject: [PATCH 25/42] cleanup and formatting --- src/consensus/pbft/libbyz/Prepared_cert.h | 3 ++- src/consensus/pbft/pbft.h | 2 +- src/consensus/pbft/pbfttypes.h | 2 +- tests/late_joiners.py | 15 +++++++-------- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/consensus/pbft/libbyz/Prepared_cert.h b/src/consensus/pbft/libbyz/Prepared_cert.h index 0212f1b2a69d..1d194b2d65ae 100644 --- a/src/consensus/pbft/libbyz/Prepared_cert.h +++ b/src/consensus/pbft/libbyz/Prepared_cert.h @@ -145,7 +145,8 @@ inline bool Prepared_cert::add(Prepare* m) if (!principal) { LOG_INFO_FMT( - "Principal with id {} has not been configured yet, rejecting prepare", id); + "Principal with id {} has not been configured yet, rejecting prepare", + id); delete m; return false; } diff --git a/src/consensus/pbft/pbft.h b/src/consensus/pbft/pbft.h index 2194eaa8bce5..1732873ed7c9 100644 --- a/src/consensus/pbft/pbft.h +++ b/src/consensus/pbft/pbft.h @@ -103,7 +103,7 @@ namespace pbft void send_append_entries(NodeId to, Index start_idx) { - size_t entries_batch_size = 10; + Index entries_batch_size = 10; Index end_idx = (latest_stable_ae_index == 0) ? 0 : diff --git a/src/consensus/pbft/pbfttypes.h b/src/consensus/pbft/pbfttypes.h index 9ea98452e6f3..5e4159a1fd42 100644 --- a/src/consensus/pbft/pbfttypes.h +++ b/src/consensus/pbft/pbfttypes.h @@ -8,7 +8,7 @@ namespace pbft { - using Index = uint64_t; + using Index = int64_t; using Term = uint64_t; using NodeId = uint64_t; using Node2NodeMsg = uint64_t; diff --git a/tests/late_joiners.py b/tests/late_joiners.py index 8dbde99a77f1..3b5cbea1a321 100644 --- a/tests/late_joiners.py +++ b/tests/late_joiners.py @@ -54,26 +54,25 @@ def run(args): # first timer determines after how many seconds each node will be suspended if not args.skip_suspension: timeouts = [] - t = random.uniform(1, 10) - LOG.info(f"Initial timer for node {first_node.node_id} is {t} seconds...") - timeouts.append((t, first_node)) - for backup in backups: + for node in network.get_joined_nodes(): t = random.uniform(1, 10) - LOG.info(f"Initial timer for node {backup.node_id} is {t} seconds...") - timeouts.append((t, backup)) + LOG.info(f"Initial timer for node {node.node_id} is {t} seconds...") + timeouts.append((t, node)) for t, node in timeouts: tm = Timer(t, timeout, args=[node, True, args.election_timeout / 1000],) tm.start() - LOG.info("Adding another node after f = 0 but before we need to send append entries") + LOG.info( + "Adding another node after f = 0 but before we need to send append entries" + ) # check that a new node can catch up naturally new_node = network.create_and_trust_node( lib_name=args.package, host="localhost", args=args, ) assert new_node nodes_to_keep.append(new_node) - + with first_node.node_client() as mc: check_commit = infra.checker.Checker(mc) check = infra.checker.Checker() From 20cc7f1a0099ae05979029e0500a6a157281758e Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Wed, 29 Jan 2020 15:21:54 +0000 Subject: [PATCH 26/42] refactoring python test --- src/consensus/pbft/libbyz/Replica.cpp | 5 +- tests/late_joiners.py | 206 ++++++++++++-------------- 2 files changed, 95 insertions(+), 116 deletions(-) diff --git a/src/consensus/pbft/libbyz/Replica.cpp b/src/consensus/pbft/libbyz/Replica.cpp index a90e761f0c8c..ac80c8fd1d72 100644 --- a/src/consensus/pbft/libbyz/Replica.cpp +++ b/src/consensus/pbft/libbyz/Replica.cpp @@ -351,10 +351,6 @@ bool Replica::compare_execution_results( void Replica::playback_transaction(ccf::Store::Tx& tx) { - if (vtimer->get_state() == ITimer::State::running) - { - vtimer->restart(); - } { auto view = tx.get_view(pbft_requests_map); if (view->has_writes()) @@ -461,6 +457,7 @@ void Replica::playback_pre_prepare( { mark_stable(last_executed, true); } + rqueue.clear(); } else { diff --git a/tests/late_joiners.py b/tests/late_joiners.py index 3b5cbea1a321..7c9a4f816a6e 100644 --- a/tests/late_joiners.py +++ b/tests/late_joiners.py @@ -34,6 +34,50 @@ def timeout(node, suspend, election_timeout): node.resume() +def assert_node_up_to_date(check, node, final_msg, final_msg_id): + with node.user_client(format="json") as c: + check( + c.rpc("LOG_get", {"id": final_msg_id}), result={"msg": final_msg}, + ) + + +def run_requests(network, nodes, total_requests, start_id, final_msg, final_msg_id): + term_info = {} + with nodes[0].node_client() as mc: + check_commit = infra.checker.Checker(mc) + check = infra.checker.Checker() + clients = [] + with contextlib.ExitStack() as es: + for node in nodes: + clients.append(es.enter_context(node.user_client(format="json"))) + node_id = 0 + long_msg = "X" * (2 ** 14) + for id in range(start_id, total_requests): + node_id += 1 + c = clients[node_id % len(clients)] + try: + c.rpc("LOG_record", {"id": id, "msg": long_msg}) + except Exception: + LOG.info("Trying to access a suspended node") + # track if there is a new primary + try: + cur_primary, cur_term = network.find_primary() + term_info[cur_term] = cur_primary.node_id + except Exception: + LOG.info("Trying to access a suspended node") + id += 1 + with node.user_client(format="json") as c: + check_commit( + c.rpc("LOG_record", {"id": final_msg_id, "msg": final_msg}), + result=True, + ) + + # assert all nodes are caught up + for node in nodes: + assert_node_up_to_date(check, node, final_msg, final_msg_id) + return term_info + + def run(args): hosts = ["localhost", "localhost", "localhost"] @@ -42,19 +86,20 @@ def run(args): ) as network: network.start_and_join(args) first_node, backups = network.find_nodes() + all_nodes = network.get_joined_nodes() term_info = {} - long_msg = "X" * (2 ** 14) + fisrt_msg = "Hello, world!" + second_msg = "Hello, world hello!" + final_msg = "Goodbye, world!" nodes_to_kill = [backups[0]] - for b in backups: - nodes_to_kill.append(b) - nodes_to_keep = [first_node] + nodes_to_keep = [first_node, backups[1]] # first timer determines after how many seconds each node will be suspended if not args.skip_suspension: timeouts = [] - for node in network.get_joined_nodes(): + for node in all_nodes: t = random.uniform(1, 10) LOG.info(f"Initial timer for node {node.node_id} is {t} seconds...") timeouts.append((t, node)) @@ -77,119 +122,56 @@ def run(args): check_commit = infra.checker.Checker(mc) check = infra.checker.Checker() - clients = [] - with contextlib.ExitStack() as es: - LOG.info("Write messages to nodes using round robin") - clients.append(es.enter_context(first_node.user_client(format="json"))) - for backup in backups: - clients.append(es.enter_context(backup.user_client(format="json"))) - node_id = 0 - for id in range(1, TOTAL_REQUESTS): - node_id += 1 - c = clients[node_id % len(clients)] - try: - resp = c.rpc("LOG_record", {"id": id, "msg": long_msg}) - except Exception: - LOG.info("Trying to access a suspended node") - try: - cur_primary, cur_term = network.find_primary() - term_info[cur_term] = cur_primary.node_id - except Exception: - LOG.info("Trying to access a suspended node") - id += 1 - - # wait for the last request to commit - first_catchup_msg = "Hello world!" - check_commit( - c.rpc("LOG_record", {"id": 1000, "msg": first_catchup_msg}), - result=True, - ) - check( - c.rpc("LOG_get", {"id": 1000}), result={"msg": first_catchup_msg}, - ) + term_info.update( + run_requests(network, all_nodes, TOTAL_REQUESTS, 0, fisrt_msg, 1000) + ) - # check that new node has caught up ok - with new_node.user_client(format="json") as c: - check( - c.rpc("LOG_get", {"id": 1000}), - result={"msg": first_catchup_msg}, - ) - # add new node to backups list - backups.append(new_node) - - # check that a new node can catch up after all the requests - last_node = network.create_and_trust_node( - lib_name=args.package, host="localhost", args=args, - ) - assert last_node - nodes_to_keep.append(last_node) - - ## send more messages I want to check that the new node will catchup and then start processing pre prepares, etc - clients = [] - with contextlib.ExitStack() as es: - LOG.info("Write messages to nodes using round robin") - clients.append(es.enter_context(first_node.user_client(format="json"))) - for backup in backups: - clients.append(es.enter_context(backup.user_client(format="json"))) - node_id = 0 - for id in range(1001, (1001 + TOTAL_REQUESTS)): - node_id += 1 - c = clients[node_id % len(clients)] - try: - resp = c.rpc("LOG_record", {"id": id, "msg": long_msg}) - except Exception: - LOG.info("Trying to access a suspended node") - try: - cur_primary, cur_term = network.find_primary() - term_info[cur_term] = cur_primary.node_id - except Exception: - LOG.info("Trying to access a suspended node") - id += 1 - - # wait for the last request to commit - second_catchup_msg = "Hello world Hello!" + # check that new node has caught up ok + assert_node_up_to_date(check, new_node, fisrt_msg, 1000) + # add new node to backups list + all_nodes.append(new_node) + + # check that a new node can catch up after all the requests + LOG.info("Adding a very late joiner") + last_node = network.create_and_trust_node( + lib_name=args.package, host="localhost", args=args, + ) + assert last_node + nodes_to_keep.append(last_node) + + term_info.update( + run_requests(network, all_nodes, TOTAL_REQUESTS, 1001, second_msg, 2000) + ) + + # give new joiner a few seconds to catch up + time.sleep(10) + + assert_node_up_to_date(check, last_node, fisrt_msg, 1000) + assert_node_up_to_date(check, last_node, second_msg, 2000) + + # replace the 2 backups with the 2 new nodes, kill the old ones and ensure we are still making progress + for node in nodes_to_kill: + LOG.info(f"Stopping node {node.node_id}") + node.stop() + + for i, node in enumerate(nodes_to_keep): + with node.user_client(format="json") as c: + final_msg = "Goodbye world!" check_commit( - c.rpc("LOG_record", {"id": 2000, "msg": second_catchup_msg}), + c.rpc("LOG_record", {"id": 3000 + i, "msg": final_msg}), result=True, ) - check( - c.rpc("LOG_get", {"id": 2000}), result={"msg": second_catchup_msg}, - ) + assert_node_up_to_date(node, final_msg, 3000 + i) - with last_node.user_client(format="json") as c: - check( - c.rpc("LOG_get", {"id": 1000}), result={"msg": first_catchup_msg}, - ) - with last_node.user_client(format="json") as c: - check( - c.rpc("LOG_get", {"id": 2000}), result={"msg": second_catchup_msg}, - ) + # we have asserted that all nodes are caught up + + if not args.skip_suspension: + # assert that view changes actually did occur + assert len(term_info) > 1 - # replace the 2 backups with the 2 new nodes, kill the old ones and ensure we are still making progress - for node in nodes_to_kill: - LOG.info(f"Stopping node {node.node_id}") - node.stop() - - for i, node in enumerate(nodes_to_keep): - with node.user_client(format="json") as c: - final_msg = "Goodbye world!" - check_commit( - c.rpc("LOG_record", {"id": 3000 + i, "msg": final_msg}), - result=True, - ) - check( - c.rpc("LOG_get", {"id": 3000 + i}), result={"msg": final_msg}, - ) - - # we have asserted that all nodes are caught up - - if not args.skip_suspension: - # assert that view changes actually did occur - assert len(term_info) > 1 - - LOG.success("----------- terms and primaries recorded -----------") - for term, primary in term_info.items(): - LOG.success(f"term {term} - primary {primary}") + LOG.success("----------- terms and primaries recorded -----------") + for term, primary in term_info.items(): + LOG.success(f"term {term} - primary {primary}") if __name__ == "__main__": From cc079ebe3c7c71b7a3c40960f1c4ac83af4a632f Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Wed, 29 Jan 2020 15:38:41 +0000 Subject: [PATCH 27/42] separating append entries index --- src/consensus/consensustypes.h | 9 ++++++++- src/consensus/pbft/pbfttypes.h | 3 ++- src/consensus/raft/rafttypes.h | 3 ++- src/host/nodeconnections.h | 3 +++ 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/consensus/consensustypes.h b/src/consensus/consensustypes.h index 5890de0eabd9..614b82b82aca 100644 --- a/src/consensus/consensustypes.h +++ b/src/consensus/consensustypes.h @@ -8,16 +8,23 @@ namespace ccf using ObjectId = uint64_t; using NodeId = ObjectId; using Index = int64_t; + using Node2NodeMsg = uint64_t; } namespace consensus { #pragma pack(push, 1) + template + struct ConsensusHeader + { + T msg; + ccf::NodeId from_node; + }; + struct AppendEntriesIndex { ccf::Index idx; ccf::Index prev_idx; }; - #pragma pack(pop) } \ No newline at end of file diff --git a/src/consensus/pbft/pbfttypes.h b/src/consensus/pbft/pbfttypes.h index cec76141a69a..12ffc4ab792e 100644 --- a/src/consensus/pbft/pbfttypes.h +++ b/src/consensus/pbft/pbfttypes.h @@ -27,7 +27,8 @@ namespace pbft NodeId from_node; }; - struct AppendEntries : PbftHeader, consensus::AppendEntriesIndex + struct AppendEntries : consensus::ConsensusHeader, + consensus::AppendEntriesIndex {}; #pragma pack(pop) diff --git a/src/consensus/raft/rafttypes.h b/src/consensus/raft/rafttypes.h index 5bd0b77dd722..d1e4fbf0cb5c 100644 --- a/src/consensus/raft/rafttypes.h +++ b/src/consensus/raft/rafttypes.h @@ -90,7 +90,8 @@ namespace raft NodeId from_node; }; - struct AppendEntries : RaftHeader, consensus::AppendEntriesIndex + struct AppendEntries : consensus::ConsensusHeader, + consensus::AppendEntriesIndex { Term term; Term prev_term; diff --git a/src/host/nodeconnections.h b/src/host/nodeconnections.h index 3a17a9600ad1..59fec72ec9fb 100644 --- a/src/host/nodeconnections.h +++ b/src/host/nodeconnections.h @@ -229,6 +229,9 @@ namespace asynchost auto p = data; auto psize = size; + serialized::overlay>( + p, psize); + const auto& ae = serialized::overlay(p, psize); // Find the total frame size, and write it along with the header. From 4b764f4f88238895316d82d5514b0c5cb3fe5954 Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Wed, 29 Jan 2020 18:24:14 +0000 Subject: [PATCH 28/42] cleanup late_joiners.py --- tests/late_joiners.py | 101 +++++++++++++++++++++++------------------- 1 file changed, 56 insertions(+), 45 deletions(-) diff --git a/tests/late_joiners.py b/tests/late_joiners.py index 7c9a4f816a6e..85c418b8d698 100644 --- a/tests/late_joiners.py +++ b/tests/late_joiners.py @@ -22,6 +22,29 @@ TOTAL_REQUESTS = 56 +def timeout(node, suspend, election_timeout): + if suspend: + # We want to suspend the nodes' process so we need to initiate a new timer to wake it up eventually + node.suspend() + next_timeout = random.uniform(2 * election_timeout, 3 * election_timeout) + LOG.info(f"New timer set for node {node.node_id} is {next_timeout} seconds") + t = Timer(next_timeout, timeout, args=[node, False, 0]) + t.start() + else: + node.resume() + + +def find_primary(network): + # track if there is a new primary + term_info = {} + try: + cur_primary, cur_term = network.find_primary() + term_info[cur_term] = cur_primary.node_id + except TimeoutError: + LOG.info("Trying to access a suspended node") + return term_info + + def timeout(node, suspend, election_timeout): if suspend: # We want to suspend the nodes' process so we need to initiate a new timer to wake it up eventually @@ -41,8 +64,23 @@ def assert_node_up_to_date(check, node, final_msg, final_msg_id): ) -def run_requests(network, nodes, total_requests, start_id, final_msg, final_msg_id): - term_info = {} +def wait_for_nodes(nodes, final_msg, final_msg_id): + with nodes[0].node_client() as mc: + check_commit = infra.checker.Checker(mc) + check = infra.checker.Checker() + for i, node in enumerate(nodes): + with node.user_client(format="json") as c: + check_commit( + c.rpc("LOG_record", {"id": final_msg_id + i, "msg": final_msg}), + result=True, + ) + + # assert all nodes are caught up + for node in nodes: + assert_node_up_to_date(check, node, final_msg, final_msg_id) + + +def run_requests(nodes, total_requests, start_id, final_msg, final_msg_id): with nodes[0].node_client() as mc: check_commit = infra.checker.Checker(mc) check = infra.checker.Checker() @@ -52,30 +90,15 @@ def run_requests(network, nodes, total_requests, start_id, final_msg, final_msg_ clients.append(es.enter_context(node.user_client(format="json"))) node_id = 0 long_msg = "X" * (2 ** 14) - for id in range(start_id, total_requests): + for id in range(start_id, (start_id + total_requests)): node_id += 1 c = clients[node_id % len(clients)] try: c.rpc("LOG_record", {"id": id, "msg": long_msg}) - except Exception: - LOG.info("Trying to access a suspended node") - # track if there is a new primary - try: - cur_primary, cur_term = network.find_primary() - term_info[cur_term] = cur_primary.node_id - except Exception: + except TimeoutError: LOG.info("Trying to access a suspended node") id += 1 - with node.user_client(format="json") as c: - check_commit( - c.rpc("LOG_record", {"id": final_msg_id, "msg": final_msg}), - result=True, - ) - - # assert all nodes are caught up - for node in nodes: - assert_node_up_to_date(check, node, final_msg, final_msg_id) - return term_info + wait_for_nodes(nodes, final_msg, final_msg_id) def run(args): @@ -88,7 +111,7 @@ def run(args): first_node, backups = network.find_nodes() all_nodes = network.get_joined_nodes() - term_info = {} + term_info = find_primary(network) fisrt_msg = "Hello, world!" second_msg = "Hello, world hello!" final_msg = "Goodbye, world!" @@ -122,9 +145,8 @@ def run(args): check_commit = infra.checker.Checker(mc) check = infra.checker.Checker() - term_info.update( - run_requests(network, all_nodes, TOTAL_REQUESTS, 0, fisrt_msg, 1000) - ) + run_requests(all_nodes, TOTAL_REQUESTS, 0, fisrt_msg, 1000) + term_info = find_primary(network) # check that new node has caught up ok assert_node_up_to_date(check, new_node, fisrt_msg, 1000) @@ -139,12 +161,8 @@ def run(args): assert last_node nodes_to_keep.append(last_node) - term_info.update( - run_requests(network, all_nodes, TOTAL_REQUESTS, 1001, second_msg, 2000) - ) - - # give new joiner a few seconds to catch up - time.sleep(10) + run_requests(all_nodes, TOTAL_REQUESTS, 1001, second_msg, 2000) + term_info = find_primary(network) assert_node_up_to_date(check, last_node, fisrt_msg, 1000) assert_node_up_to_date(check, last_node, second_msg, 2000) @@ -154,24 +172,17 @@ def run(args): LOG.info(f"Stopping node {node.node_id}") node.stop() - for i, node in enumerate(nodes_to_keep): - with node.user_client(format="json") as c: - final_msg = "Goodbye world!" - check_commit( - c.rpc("LOG_record", {"id": 3000 + i, "msg": final_msg}), - result=True, - ) - assert_node_up_to_date(node, final_msg, 3000 + i) + wait_for_nodes(nodes_to_keep, final_msg, 4000) - # we have asserted that all nodes are caught up + # we have asserted that all nodes are caught up - if not args.skip_suspension: - # assert that view changes actually did occur - assert len(term_info) > 1 + if not args.skip_suspension: + # assert that view changes actually did occur + assert len(term_info) > 1 - LOG.success("----------- terms and primaries recorded -----------") - for term, primary in term_info.items(): - LOG.success(f"term {term} - primary {primary}") + LOG.success("----------- terms and primaries recorded -----------") + for term, primary in term_info.items(): + LOG.success(f"term {term} - primary {primary}") if __name__ == "__main__": From 72d91c52e026e4c6ef93f05e52609f594a3f2877 Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Wed, 29 Jan 2020 18:30:45 +0000 Subject: [PATCH 29/42] indentation --- tests/late_joiners.py | 58 +++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/tests/late_joiners.py b/tests/late_joiners.py index 85c418b8d698..22c25fc5df02 100644 --- a/tests/late_joiners.py +++ b/tests/late_joiners.py @@ -145,44 +145,44 @@ def run(args): check_commit = infra.checker.Checker(mc) check = infra.checker.Checker() - run_requests(all_nodes, TOTAL_REQUESTS, 0, fisrt_msg, 1000) - term_info = find_primary(network) + run_requests(all_nodes, TOTAL_REQUESTS, 0, fisrt_msg, 1000) + term_info = find_primary(network) - # check that new node has caught up ok - assert_node_up_to_date(check, new_node, fisrt_msg, 1000) - # add new node to backups list - all_nodes.append(new_node) + # check that new node has caught up ok + assert_node_up_to_date(check, new_node, fisrt_msg, 1000) + # add new node to backups list + all_nodes.append(new_node) - # check that a new node can catch up after all the requests - LOG.info("Adding a very late joiner") - last_node = network.create_and_trust_node( - lib_name=args.package, host="localhost", args=args, - ) - assert last_node - nodes_to_keep.append(last_node) + # check that a new node can catch up after all the requests + LOG.info("Adding a very late joiner") + last_node = network.create_and_trust_node( + lib_name=args.package, host="localhost", args=args, + ) + assert last_node + nodes_to_keep.append(last_node) - run_requests(all_nodes, TOTAL_REQUESTS, 1001, second_msg, 2000) - term_info = find_primary(network) + run_requests(all_nodes, TOTAL_REQUESTS, 1001, second_msg, 2000) + term_info = find_primary(network) - assert_node_up_to_date(check, last_node, fisrt_msg, 1000) - assert_node_up_to_date(check, last_node, second_msg, 2000) + assert_node_up_to_date(check, last_node, fisrt_msg, 1000) + assert_node_up_to_date(check, last_node, second_msg, 2000) - # replace the 2 backups with the 2 new nodes, kill the old ones and ensure we are still making progress - for node in nodes_to_kill: - LOG.info(f"Stopping node {node.node_id}") - node.stop() + # replace the 2 backups with the 2 new nodes, kill the old ones and ensure we are still making progress + for node in nodes_to_kill: + LOG.info(f"Stopping node {node.node_id}") + node.stop() - wait_for_nodes(nodes_to_keep, final_msg, 4000) + wait_for_nodes(nodes_to_keep, final_msg, 4000) - # we have asserted that all nodes are caught up + # we have asserted that all nodes are caught up - if not args.skip_suspension: - # assert that view changes actually did occur - assert len(term_info) > 1 + if not args.skip_suspension: + # assert that view changes actually did occur + assert len(term_info) > 1 - LOG.success("----------- terms and primaries recorded -----------") - for term, primary in term_info.items(): - LOG.success(f"term {term} - primary {primary}") + LOG.success("----------- terms and primaries recorded -----------") + for term, primary in term_info.items(): + LOG.success(f"term {term} - primary {primary}") if __name__ == "__main__": From 305edfc0fb1464bba073caa8c8cb61d4a906c991 Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Thu, 30 Jan 2020 10:50:52 +0000 Subject: [PATCH 30/42] cleanup --- CMakeLists.txt | 6 ++++++ cmake/pbft.cmake | 6 ------ src/consensus/pbft/libbyz/Replica.cpp | 2 +- src/node/rpc/frontend.h | 3 ++- tests/late_joiners.py | 24 ++++++++++++++++-------- 5 files changed, 25 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 902091e89263..cf2483a0f7cd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -601,6 +601,12 @@ if(BUILD_TESTS) ADDITIONAL_ARGS --election-timeout 2000 ) + add_e2e_test( + NAME late_joiners + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/late_joiners.py + ADDITIONAL_ARGS --skip-suspension + ) + if(NOT PBFT) # Logging scenario perf test add_perf_test( diff --git a/cmake/pbft.cmake b/cmake/pbft.cmake index f22cfe80db72..8f350dec0120 100644 --- a/cmake/pbft.cmake +++ b/cmake/pbft.cmake @@ -78,12 +78,6 @@ endif() set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -add_e2e_test( - NAME late_joiners - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/late_joiners.py - ADDITIONAL_ARGS --skip-suspension -) - if("virtual" IN_LIST TARGET) add_library(libbyz.host STATIC ${PBFT_SRC}) diff --git a/src/consensus/pbft/libbyz/Replica.cpp b/src/consensus/pbft/libbyz/Replica.cpp index 407dd9c2b03c..f3a7ec92fbb9 100644 --- a/src/consensus/pbft/libbyz/Replica.cpp +++ b/src/consensus/pbft/libbyz/Replica.cpp @@ -1387,7 +1387,7 @@ void Replica::handle(Status* m) LOG_TRACE_FMT("Rentransmitting from min {} to max {}", min, max); if ( last_stable > m->last_stable() && - last_executed > m->last_executed() + 2) + last_executed > m->last_executed() + 1) { LOG_TRACE_FMT( "Sending append entries to {} since we are way off", m->id()); diff --git a/src/node/rpc/frontend.h b/src/node/rpc/frontend.h index 7c8a73db2425..3815bd23caf2 100644 --- a/src/node/rpc/frontend.h +++ b/src/node/rpc/frontend.h @@ -344,7 +344,8 @@ namespace ccf ProcessPbftResp process_pbft( std::shared_ptr ctx, Store::Tx& tx, - bool playback, bool include_merkle_roots) override + bool playback, + bool include_merkle_roots) override { // TODO(#PBFT): Refactor this with process_forwarded(). crypto::Sha256Hash full_state_merkle_root; diff --git a/tests/late_joiners.py b/tests/late_joiners.py index 22c25fc5df02..102ca8082664 100644 --- a/tests/late_joiners.py +++ b/tests/late_joiners.py @@ -11,7 +11,7 @@ import infra.proc import infra.notification import infra.net -import e2e_args +import infra.e2e_args from threading import Timer import random import contextlib @@ -59,9 +59,12 @@ def timeout(node, suspend, election_timeout): def assert_node_up_to_date(check, node, final_msg, final_msg_id): with node.user_client(format="json") as c: - check( - c.rpc("LOG_get", {"id": final_msg_id}), result={"msg": final_msg}, - ) + try: + check( + c.rpc("LOG_get", {"id": final_msg_id}), result={"msg": final_msg}, + ) + except TimeoutError: + LOG.error(f"Timeout error for LOG_get on node {node.node_id}") def wait_for_nodes(nodes, final_msg, final_msg_id): @@ -146,7 +149,7 @@ def run(args): check = infra.checker.Checker() run_requests(all_nodes, TOTAL_REQUESTS, 0, fisrt_msg, 1000) - term_info = find_primary(network) + term_info.update(find_primary(network)) # check that new node has caught up ok assert_node_up_to_date(check, new_node, fisrt_msg, 1000) @@ -162,7 +165,7 @@ def run(args): nodes_to_keep.append(last_node) run_requests(all_nodes, TOTAL_REQUESTS, 1001, second_msg, 2000) - term_info = find_primary(network) + term_info.update(find_primary(network)) assert_node_up_to_date(check, last_node, fisrt_msg, 1000) assert_node_up_to_date(check, last_node, second_msg, 2000) @@ -194,8 +197,13 @@ def add(parser): action="store_true", ) - args = e2e_args.cli_args(add) - args.package = args.app_script and "libluagenericenc" or "libloggingenc" + args = infra.e2e_args.cli_args(add) + if args.js_app_script: + args.package = "libjsgeneric" + elif args.app_script: + args.package = "libluageneric" + else: + args.package = "liblogging" notify_server_host = "localhost" args.notify_server = ( From a7dc790e66ec7ef350a34f7a183332fabe0a4101 Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Thu, 30 Jan 2020 11:42:37 +0000 Subject: [PATCH 31/42] addressing PR comments --- src/consensus/consensustypes.h | 2 +- src/consensus/pbft/libbyz/Certificate.h | 2 +- src/consensus/pbft/libbyz/Pre_prepare.cpp | 3 +-- src/consensus/pbft/libbyz/Replica.cpp | 8 ++++---- src/consensus/pbft/pbft.h | 5 +++-- src/consensus/pbft/pbftconfig.h | 4 ++-- src/enclave/rpccontext.h | 23 ++++++++++++++++++----- 7 files changed, 30 insertions(+), 17 deletions(-) diff --git a/src/consensus/consensustypes.h b/src/consensus/consensustypes.h index 614b82b82aca..8340f0f2feb7 100644 --- a/src/consensus/consensustypes.h +++ b/src/consensus/consensustypes.h @@ -14,7 +14,7 @@ namespace ccf namespace consensus { #pragma pack(push, 1) - template + template struct ConsensusHeader { T msg; diff --git a/src/consensus/pbft/libbyz/Certificate.h b/src/consensus/pbft/libbyz/Certificate.h index 472ccf0c851e..31d4c4f71e42 100644 --- a/src/consensus/pbft/libbyz/Certificate.h +++ b/src/consensus/pbft/libbyz/Certificate.h @@ -350,7 +350,7 @@ bool Certificate::add(T* m) auto principal = pbft::GlobalState::get_node().get_principal(m->id()); if (!principal) { - LOG_TRACE_FMT( + LOG_INFO_FMT( "Principal with id {} has not been configured yet, rejecting the message", m->id()); delete m; diff --git a/src/consensus/pbft/libbyz/Pre_prepare.cpp b/src/consensus/pbft/libbyz/Pre_prepare.cpp index 673527888c29..c9e0be1eb010 100644 --- a/src/consensus/pbft/libbyz/Pre_prepare.cpp +++ b/src/consensus/pbft/libbyz/Pre_prepare.cpp @@ -291,8 +291,7 @@ bool Pre_prepare::pre_verify() pbft::GlobalState::get_node().get_principal(sender); if (!sender_principal) { - LOG_TRACE_FMT( - "Sender principal has not been configured yet {}", sender); + LOG_INFO_FMT("Sender principal has not been configured yet {}", sender); return false; } diff --git a/src/consensus/pbft/libbyz/Replica.cpp b/src/consensus/pbft/libbyz/Replica.cpp index f3a7ec92fbb9..9a58ae8db052 100644 --- a/src/consensus/pbft/libbyz/Replica.cpp +++ b/src/consensus/pbft/libbyz/Replica.cpp @@ -465,7 +465,7 @@ void Replica::playback_pre_prepare( } else { - throw std::logic_error("Entries don't match playback"); + PBFT_ASSERT(false, "Merkle roots don't match in playback pre-prepare"); } } @@ -949,7 +949,7 @@ void Replica::send_prepare(Seqno seqno, std::optional byz_info) // https://github.com/microsoft/CCF/issues/357 if (!compare_execution_results(info, pp)) { - throw std::logic_error("Execution results don't match send pp"); + PBFT_ASSERT(false, "Merkle roots don't match in send_prepare"); break; } @@ -2279,8 +2279,8 @@ void Replica::execute_committed(bool was_f_0) auto executed_ok = execute_tentative(pp, info); if (!compare_execution_results(info, pp)) { - throw std::logic_error( - "Execution results don't match handle commit"); + PBFT_ASSERT(false, "Merkle roots don't match execute committed"); + return; } ledger_writer->write_pre_prepare(pp); PBFT_ASSERT( diff --git a/src/consensus/pbft/pbft.h b/src/consensus/pbft/pbft.h index c8a14a58f8a1..aef3ba222014 100644 --- a/src/consensus/pbft/pbft.h +++ b/src/consensus/pbft/pbft.h @@ -29,6 +29,9 @@ namespace pbft { // maps node to last sent index to that node using NodesMap = std::unordered_map; + // TODO remove hard coded value (https://github.com/microsoft/CCF/issues/753) + static constexpr Index entries_batch_size = 10; + class PbftEnclaveNetwork : public INetwork { public: @@ -108,8 +111,6 @@ namespace pbft void send_append_entries(NodeId to, Index start_idx) { - Index entries_batch_size = 10; - Index end_idx = (latest_stable_ae_index == 0) ? 0 : std::min(start_idx + entries_batch_size, latest_stable_ae_index); diff --git a/src/consensus/pbft/pbftconfig.h b/src/consensus/pbft/pbftconfig.h index 5a15674c9ddd..9ef944a9b675 100644 --- a/src/consensus/pbft/pbftconfig.h +++ b/src/consensus/pbft/pbftconfig.h @@ -75,12 +75,12 @@ namespace pbft // TODO: Should serialise context directly, rather than reconstructing const enclave::SessionContext session( enclave::InvalidSessionId, request.caller_id, request.caller_cert); - auto ctx = enclave::make_rpc_context(session, request.raw); + auto ctx = enclave::make_rpc_context( + session, request.raw, {req_start, req_start + req_size}); ctx->actor = (ccf::ActorsType)request.actor; const auto n = ctx->method.find_last_of('/'); ctx->method = ctx->method.substr(n + 1, ctx->method.size()); - ctx->pbft_raw = {req_start, req_start + req_size}; // TODO: HTTP signatures are not handled by PBFT // https://github.com/microsoft/CCF/issues/720 #ifdef HTTP diff --git a/src/enclave/rpccontext.h b/src/enclave/rpccontext.h index cc601c2ac00d..1b08523eea1a 100644 --- a/src/enclave/rpccontext.h +++ b/src/enclave/rpccontext.h @@ -94,6 +94,16 @@ namespace enclave bool is_create_request = false; RpcContext(const SessionContext& s) : session(s) {} + + RpcContext( + const SessionContext& s, + const std::vector& raw_, + const std::vector pbft_raw_) : + session(s), + raw(raw_), + pbft_raw(pbft_raw_) + {} + virtual ~RpcContext() {} void set_request_index(size_t ri) @@ -213,8 +223,10 @@ namespace enclave std::optional pack_format = std::nullopt; JsonRpcContext( - const SessionContext& s, const std::vector& packed) : - RpcContext(s) + const SessionContext& s, + const std::vector& packed, + const std::vector& pbft_raw = {}) : + RpcContext(s, packed, pbft_raw) { std::optional p; @@ -225,7 +237,6 @@ namespace enclave } init(p.value(), rpc); - raw = packed; } JsonRpcContext( @@ -285,8 +296,10 @@ namespace enclave }; inline std::shared_ptr make_rpc_context( - const SessionContext& s, const std::vector& packed) + const SessionContext& s, + const std::vector& packed, + const std::vector& raw_pbft = {}) { - return std::make_shared(s, packed); + return std::make_shared(s, packed, raw_pbft); } } From f294d21d05188acbf3be3c04a37c715eb01a4e07 Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Thu, 30 Jan 2020 11:59:20 +0000 Subject: [PATCH 32/42] cmake formatting --- cmake/pbft.cmake | 84 ++++++++++++++++++++++++------------------------ 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/cmake/pbft.cmake b/cmake/pbft.cmake index 8f350dec0120..b8d406a94a3c 100644 --- a/cmake/pbft.cmake +++ b/cmake/pbft.cmake @@ -11,48 +11,48 @@ if(SAN) endif() set(PBFT_SRC - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/globalstate.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Client.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Replica.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Commit.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Message.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Reply.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Digest.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Node.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Request.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Checkpoint.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Pre_prepare.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Req_queue.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Prepare.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Status.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Prepared_cert.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Principal.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Log_allocator.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Meta_data.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Data.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Fetch.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Meta_data_cert.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/State.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/libbyz.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/View_change.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/New_view.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/View_change_ack.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/View_info.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/NV_info.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Rep_info.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Rep_info_exactly_once.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Meta_data_d.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Query_stable.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Reply_stable.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Stable_estimator.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Big_req_table.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Pre_prepare_info.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/LedgerWriter.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/key_format.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/request_id_gen.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/New_principal.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Network_open.cpp - ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Append_entries.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/globalstate.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Client.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Replica.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Commit.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Message.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Reply.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Digest.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Node.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Request.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Checkpoint.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Pre_prepare.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Req_queue.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Prepare.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Status.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Prepared_cert.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Principal.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Log_allocator.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Meta_data.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Data.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Fetch.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Meta_data_cert.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/State.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/libbyz.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/View_change.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/New_view.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/View_change_ack.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/View_info.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/NV_info.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Rep_info.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Rep_info_exactly_once.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Meta_data_d.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Query_stable.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Reply_stable.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Stable_estimator.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Big_req_table.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Pre_prepare_info.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/LedgerWriter.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/key_format.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/request_id_gen.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/New_principal.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Network_open.cpp + ${CMAKE_SOURCE_DIR}/src/consensus/pbft/libbyz/Append_entries.cpp ) if("sgx" IN_LIST TARGET) From 75b77f2ec87100621fc71ccd1cbdf9221701891a Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Thu, 30 Jan 2020 12:59:08 +0000 Subject: [PATCH 33/42] fix ledger replay test --- src/consensus/pbft/libbyz/test/test_ledger_replay.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/consensus/pbft/libbyz/test/test_ledger_replay.cpp b/src/consensus/pbft/libbyz/test/test_ledger_replay.cpp index e2b2eb5d1f92..a9fe009310a1 100644 --- a/src/consensus/pbft/libbyz/test/test_ledger_replay.cpp +++ b/src/consensus/pbft/libbyz/test/test_ledger_replay.cpp @@ -252,7 +252,11 @@ TEST_CASE("Test Ledger Replay") store->deserialise_views(entry, false, false, nullptr, &tx) == kv::DeserialiseSuccess::PASS); pbft::GlobalState::get_replica().playback_transaction(tx); - REQUIRE(tx.commit() == kv::CommitSuccess::OK); + if (!(iterations % 2)) + { + // pre-prepares are committed in playback_pre_prepare + REQUIRE(tx.commit() == kv::CommitSuccess::OK); + } } ccf::Store::Tx tx; if (iterations % 2) From 7d0ae50a41e4e66f6686546816132d50a5701e70 Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Thu, 30 Jan 2020 13:10:09 +0000 Subject: [PATCH 34/42] print seed --- tests/late_joiners.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/late_joiners.py b/tests/late_joiners.py index 102ca8082664..fd7cefb43a0f 100644 --- a/tests/late_joiners.py +++ b/tests/late_joiners.py @@ -20,6 +20,9 @@ # 256 is the number of most recent messages that PBFT keeps in memory before needing to replay the ledger TOTAL_REQUESTS = 56 +s = random.randint(1, 10) +LOG.info(f"setting seed to {s}") +random.seed(s) def timeout(node, suspend, election_timeout): From fba4b960bf926109388b1c2bfec2b3b2d3bb2dfa Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Thu, 30 Jan 2020 16:58:23 +0000 Subject: [PATCH 35/42] error handling --- tests/late_joiners.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/late_joiners.py b/tests/late_joiners.py index fd7cefb43a0f..5fb4f5ac712f 100644 --- a/tests/late_joiners.py +++ b/tests/late_joiners.py @@ -15,6 +15,7 @@ from threading import Timer import random import contextlib +import requests from loguru import logger as LOG @@ -101,7 +102,7 @@ def run_requests(nodes, total_requests, start_id, final_msg, final_msg_id): c = clients[node_id % len(clients)] try: c.rpc("LOG_record", {"id": id, "msg": long_msg}) - except TimeoutError: + except (TimeoutError, requests.exceptions.ReadTimeout,) as e: LOG.info("Trying to access a suspended node") id += 1 wait_for_nodes(nodes, final_msg, final_msg_id) From d069f079968281590284317dac6bc55720c91e8a Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Fri, 31 Jan 2020 14:09:09 +0000 Subject: [PATCH 36/42] remove set_reserved_version and address minor PR comments --- cmake/pbft.cmake | 1 - src/consensus/pbft/libbyz/LedgerWriter.cpp | 10 ++++----- src/consensus/pbft/libbyz/LedgerWriter.h | 2 +- src/consensus/pbft/libbyz/Replica.cpp | 4 +++- src/consensus/pbft/libbyz/types.h | 5 +++++ src/consensus/pbft/pbfttypes.h | 26 +++++++++++++++++----- src/enclave/rpccontext.h | 2 +- src/host/nodeconnections.h | 4 ++-- src/kv/kv.h | 6 ----- 9 files changed, 36 insertions(+), 24 deletions(-) diff --git a/cmake/pbft.cmake b/cmake/pbft.cmake index b8d406a94a3c..b221977c3bd7 100644 --- a/cmake/pbft.cmake +++ b/cmake/pbft.cmake @@ -7,7 +7,6 @@ set(SIGN_BATCH ON) if(SAN) add_definitions(-DUSE_STD_MALLOC) - set(USE_STD_MALLOC ON) endif() set(PBFT_SRC diff --git a/src/consensus/pbft/libbyz/LedgerWriter.cpp b/src/consensus/pbft/libbyz/LedgerWriter.cpp index c8db8e0985db..d44308792729 100644 --- a/src/consensus/pbft/libbyz/LedgerWriter.cpp +++ b/src/consensus/pbft/libbyz/LedgerWriter.cpp @@ -34,13 +34,12 @@ void LedgerWriter::write_prepare( } } -void LedgerWriter::write_pre_prepare(Pre_prepare* pp) +void LedgerWriter::write_pre_prepare(ccf::Store::Tx& tx) { - ccf::Store::Tx tx; - write_pre_prepare(pp, tx); + store.commit_tx(tx); } -void LedgerWriter::write_pre_prepare(Pre_prepare* pp, ccf::Store::Tx& tx) +void LedgerWriter::write_pre_prepare(Pre_prepare* pp) { store.commit_pre_prepare( {pp->seqno(), @@ -48,8 +47,7 @@ void LedgerWriter::write_pre_prepare(Pre_prepare* pp, ccf::Store::Tx& tx) pp->get_digest_sig(), {(const uint8_t*)pp->contents(), (const uint8_t*)pp->contents() + pp->size()}}, - pbft_pre_prepares_map, - tx); + pbft_pre_prepares_map); } void LedgerWriter::write_view_change(View_change* vc) diff --git a/src/consensus/pbft/libbyz/LedgerWriter.h b/src/consensus/pbft/libbyz/LedgerWriter.h index b0dbf6f24700..1239660a4c0a 100644 --- a/src/consensus/pbft/libbyz/LedgerWriter.h +++ b/src/consensus/pbft/libbyz/LedgerWriter.h @@ -23,6 +23,6 @@ class LedgerWriter virtual ~LedgerWriter() = default; void write_prepare(const Prepared_cert& prepared_cert, Seqno seqno); void write_pre_prepare(Pre_prepare* pp); - void write_pre_prepare(Pre_prepare* pp, ccf::Store::Tx& tx); + void write_pre_prepare(ccf::Store::Tx& tx); void write_view_change(View_change* vc); }; diff --git a/src/consensus/pbft/libbyz/Replica.cpp b/src/consensus/pbft/libbyz/Replica.cpp index 9a58ae8db052..ba45835b415a 100644 --- a/src/consensus/pbft/libbyz/Replica.cpp +++ b/src/consensus/pbft/libbyz/Replica.cpp @@ -447,7 +447,9 @@ void Replica::playback_pre_prepare( last_prepared = seqno; } - ledger_writer->write_pre_prepare(executable_pp.get(), tx); + LOG_TRACE_FMT("Storing pre prepare at seqno {}", seqno); + + ledger_writer->write_pre_prepare(tx); if (global_commit_cb != nullptr && executable_pp->is_signed()) { diff --git a/src/consensus/pbft/libbyz/types.h b/src/consensus/pbft/libbyz/types.h index d29d1060cd4f..52f81b5c377c 100644 --- a/src/consensus/pbft/libbyz/types.h +++ b/src/consensus/pbft/libbyz/types.h @@ -57,12 +57,17 @@ using ExecCommand = std::function; \ No newline at end of file diff --git a/src/consensus/pbft/pbfttypes.h b/src/consensus/pbft/pbfttypes.h index 12ffc4ab792e..16351f674e9c 100644 --- a/src/consensus/pbft/pbfttypes.h +++ b/src/consensus/pbft/pbfttypes.h @@ -48,8 +48,8 @@ namespace pbft virtual kv::Version current_version() = 0; virtual void commit_pre_prepare( const pbft::PrePrepare& pp, - pbft::PrePreparesMap& pbft_pre_prepares_map, - ccf::Store::Tx& tx) = 0; + pbft::PrePreparesMap& pbft_pre_prepares_map) = 0; + virtual void commit_tx(ccf::Store::Tx& tx) = 0; }; template @@ -76,9 +76,7 @@ namespace pbft } void commit_pre_prepare( - const pbft::PrePrepare& pp, - pbft::PrePreparesMap& pbft_pre_prepares_map, - ccf::Store::Tx& tx) + const pbft::PrePrepare& pp, pbft::PrePreparesMap& pbft_pre_prepares_map) { while (true) { @@ -90,7 +88,7 @@ namespace pbft auto success = p->commit( version, [&]() { - tx.set_reserved_version(version); + ccf::Store::Tx tx(version); auto pp_view = tx.get_view(pbft_pre_prepares_map); pp_view->put(0, pp); return tx.commit_reserved(); @@ -104,6 +102,22 @@ namespace pbft } } + void commit_tx(ccf::Store::Tx& tx) + { + while (true) + { + auto p = x.lock(); + if (p) + { + auto success = tx.commit(); + if (success == kv::CommitSuccess::OK) + { + break; + } + } + } + } + void compact(Index v) { auto p = x.lock(); diff --git a/src/enclave/rpccontext.h b/src/enclave/rpccontext.h index 1b08523eea1a..df316fbf1157 100644 --- a/src/enclave/rpccontext.h +++ b/src/enclave/rpccontext.h @@ -98,7 +98,7 @@ namespace enclave RpcContext( const SessionContext& s, const std::vector& raw_, - const std::vector pbft_raw_) : + const std::vector& pbft_raw_) : session(s), raw(raw_), pbft_raw(pbft_raw_) diff --git a/src/host/nodeconnections.h b/src/host/nodeconnections.h index 59fec72ec9fb..fbd13970b2ba 100644 --- a/src/host/nodeconnections.h +++ b/src/host/nodeconnections.h @@ -217,9 +217,9 @@ namespace asynchost // If the message is a consensus append entries message, affix the // corresponding ledger entries + auto msg_type = serialized::read(data, size); if ( - serialized::read(data, size) == - ccf::NodeMsgType::consensus_msg && + msg_type == ccf::NodeMsgType::consensus_msg && serialized::peek(data, size) == raft::raft_append_entries || serialized::peek(data, size) == diff --git a/src/kv/kv.h b/src/kv/kv.h index c9fce9d7ca09..affbf043cd98 100644 --- a/src/kv/kv.h +++ b/src/kv/kv.h @@ -939,12 +939,6 @@ namespace kv view_list.merge(view_list_); } - void set_reserved_version(Version reserved) - { - version = reserved; - read_version = reserved; - } - void set_req_id(const kv::TxHistory::RequestID& req_id_) { req_id = req_id_; From ea1b7b31903e976ca2b16854ef589e4b3f4166d1 Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Fri, 31 Jan 2020 15:02:11 +0000 Subject: [PATCH 37/42] remove commit bool from deserialise_views --- src/consensus/pbft/libbyz/test/test_ledger_replay.cpp | 2 +- src/consensus/pbft/pbft.h | 2 +- src/consensus/pbft/pbfttypes.h | 4 +--- src/kv/kv.h | 7 +++++-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/consensus/pbft/libbyz/test/test_ledger_replay.cpp b/src/consensus/pbft/libbyz/test/test_ledger_replay.cpp index a9fe009310a1..cff89a5e4aae 100644 --- a/src/consensus/pbft/libbyz/test/test_ledger_replay.cpp +++ b/src/consensus/pbft/libbyz/test/test_ledger_replay.cpp @@ -249,7 +249,7 @@ TEST_CASE("Test Ledger Replay") { ccf::Store::Tx tx; REQUIRE( - store->deserialise_views(entry, false, false, nullptr, &tx) == + store->deserialise_views(entry, false, nullptr, &tx) == kv::DeserialiseSuccess::PASS); pbft::GlobalState::get_replica().playback_transaction(tx); if (!(iterations % 2)) diff --git a/src/consensus/pbft/pbft.h b/src/consensus/pbft/pbft.h index 271b3d35d9f2..19f9deb738c3 100644 --- a/src/consensus/pbft/pbft.h +++ b/src/consensus/pbft/pbft.h @@ -558,7 +558,7 @@ namespace pbft ccf::Store::Tx tx; auto deserialise_success = store->deserialise_views( - ret.first, public_only, false, nullptr, &tx); + ret.first, public_only, nullptr, &tx); switch (deserialise_success) { diff --git a/src/consensus/pbft/pbfttypes.h b/src/consensus/pbft/pbfttypes.h index 16351f674e9c..c627aa94310e 100644 --- a/src/consensus/pbft/pbfttypes.h +++ b/src/consensus/pbft/pbfttypes.h @@ -41,7 +41,6 @@ namespace pbft virtual S deserialise_views( const std::vector& data, bool public_only = false, - bool commit = true, Term* term = nullptr, ccf::Store::Tx* tx = nullptr) = 0; virtual void compact(Index v) = 0; @@ -64,13 +63,12 @@ namespace pbft S deserialise_views( const std::vector& data, bool public_only = false, - bool commit = true, Term* term = nullptr, ccf::Store::Tx* tx = nullptr) { auto p = x.lock(); if (p) - return p->deserialise_views(data, public_only, commit, term, tx); + return p->deserialise_views(data, public_only, term, tx); return S::FAILED; } diff --git a/src/kv/kv.h b/src/kv/kv.h index affbf043cd98..421e8be55d7a 100644 --- a/src/kv/kv.h +++ b/src/kv/kv.h @@ -1520,10 +1520,13 @@ namespace kv DeserialiseSuccess deserialise_views( const std::vector& data, bool public_only = false, - bool commit = true, Term* term = nullptr, Tx* tx = nullptr) { + // if we pass in a transaction we don't want to commit, just deserialise + // and put the views into that transaction + bool commit = (tx == nullptr); + // This will return FAILED if the serialised transaction is being // applied out of order. // Processing transactions locally and also deserialising to the @@ -1705,7 +1708,7 @@ namespace kv bool public_only = false, Term* term = nullptr) override { - return deserialise_views(data, public_only, true, term); + return deserialise_views(data, public_only, term); } bool operator==(const Store& that) const From c6b0f17352a7f702d5024428e445a3c811dd264a Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Fri, 31 Jan 2020 15:29:48 +0000 Subject: [PATCH 38/42] removing playback bool from exec_command and execute_tentative_request, typo and comment addition --- src/consensus/pbft/libbyz/Log_allocator.cpp | 4 ++++ src/consensus/pbft/libbyz/Replica.cpp | 16 +--------------- src/consensus/pbft/libbyz/Replica.h | 1 - src/consensus/pbft/libbyz/test/replica_main.cpp | 1 - src/consensus/pbft/libbyz/test/replica_test.cpp | 1 - .../pbft/libbyz/test/test_ledger_replay.cpp | 1 - src/consensus/pbft/libbyz/types.h | 4 ++-- src/consensus/pbft/pbftconfig.h | 6 ++---- src/kv/kv.h | 2 +- tests/late_joiners.py | 13 ++++++++----- 10 files changed, 18 insertions(+), 31 deletions(-) diff --git a/src/consensus/pbft/libbyz/Log_allocator.cpp b/src/consensus/pbft/libbyz/Log_allocator.cpp index 9155b65f0e4d..62ab7f9faef2 100644 --- a/src/consensus/pbft/libbyz/Log_allocator.cpp +++ b/src/consensus/pbft/libbyz/Log_allocator.cpp @@ -21,7 +21,9 @@ Log_allocator::Log_allocator(int csz, int nc) num_chunks = nc; free_chunks = 0; chunks = 0; +#ifndef USE_STD_MALLOC cur = alloc_chunk(); +#endif } void Log_allocator::should_use_malloc(bool _use_malloc) @@ -39,7 +41,9 @@ bool Log_allocator::use_malloc = Log_allocator::~Log_allocator() { +#ifndef USE_STD_MALLOC ::free(chunks); +#endif } Log_allocator::Chunk* Log_allocator::alloc_chunk() diff --git a/src/consensus/pbft/libbyz/Replica.cpp b/src/consensus/pbft/libbyz/Replica.cpp index ba45835b415a..938c637c992b 100644 --- a/src/consensus/pbft/libbyz/Replica.cpp +++ b/src/consensus/pbft/libbyz/Replica.cpp @@ -1991,18 +1991,7 @@ bool Replica::execute_read_only(Request* request) std::shared_ptr cp = get_principal(client_id); ByzInfo info; int error = exec_command( - &inb, - outb, - 0, - client_id, - request_id, - true, - nullptr, - 0, - 0, - info, - false, - nullptr); + &inb, outb, 0, client_id, request_id, true, nullptr, 0, 0, info, nullptr); right_pad_contents(outb); if (!error) @@ -2129,7 +2118,6 @@ void Replica::execute_tentative_request( Byz_buffer& non_det, char* nondet_choices, ccf::Store::Tx* tx, - bool playback, Seqno seqno) { int client_id = request.client_id(); @@ -2165,7 +2153,6 @@ void Replica::execute_tentative_request( request.contents_size(), replies.total_requests_processed(), info, - playback, tx); right_pad_contents(outb); // Finish constructing the reply. @@ -2218,7 +2205,6 @@ bool Replica::execute_tentative(Pre_prepare* pp, ByzInfo& info) non_det, pp->choices(non_det.size), nullptr, - false, pp->seqno()); } LOG_DEBUG_FMT( diff --git a/src/consensus/pbft/libbyz/Replica.h b/src/consensus/pbft/libbyz/Replica.h index f52adedfec3a..cb6862ae1ea9 100644 --- a/src/consensus/pbft/libbyz/Replica.h +++ b/src/consensus/pbft/libbyz/Replica.h @@ -309,7 +309,6 @@ class Replica : public Node, public IMessageReceiveBase Byz_buffer& non_det, char* nondet_choices = nullptr, ccf::Store::Tx* tx = nullptr, - bool playback = false, Seqno seqno = -1); // Effects: called by execute_tentative or playback_request to execute the // request. seqno == -1 means we are running it from playback diff --git a/src/consensus/pbft/libbyz/test/replica_main.cpp b/src/consensus/pbft/libbyz/test/replica_main.cpp index 9c3157c57c7b..1c1efec46cd6 100644 --- a/src/consensus/pbft/libbyz/test/replica_main.cpp +++ b/src/consensus/pbft/libbyz/test/replica_main.cpp @@ -70,7 +70,6 @@ ExecCommand exec_command = []( size_t req_size, Seqno n, ByzInfo& info, - bool playback = false, ccf::Store::Tx* tx = nullptr) { outb.contents = message_receiver->create_response_message(client, rid, 8); diff --git a/src/consensus/pbft/libbyz/test/replica_test.cpp b/src/consensus/pbft/libbyz/test/replica_test.cpp index 2bb7f8c83c53..71ade15a72eb 100644 --- a/src/consensus/pbft/libbyz/test/replica_test.cpp +++ b/src/consensus/pbft/libbyz/test/replica_test.cpp @@ -200,7 +200,6 @@ ExecCommand exec_command = []( size_t req_size, Seqno total_requests_executed, ByzInfo& info, - bool playback = false, ccf::Store::Tx* tx = nullptr) { outb.contents = message_receive_base->create_response_message(client, rid, 8); diff --git a/src/consensus/pbft/libbyz/test/test_ledger_replay.cpp b/src/consensus/pbft/libbyz/test/test_ledger_replay.cpp index cff89a5e4aae..577e89294556 100644 --- a/src/consensus/pbft/libbyz/test/test_ledger_replay.cpp +++ b/src/consensus/pbft/libbyz/test/test_ledger_replay.cpp @@ -47,7 +47,6 @@ class ExecutionMock size_t req_size, Seqno total_requests_executed, ByzInfo& info, - bool playback = false, ccf::Store::Tx* tx = nullptr) { // increase total number of commands executed to compare with fake_req command_counter++; diff --git a/src/consensus/pbft/libbyz/types.h b/src/consensus/pbft/libbyz/types.h index 52f81b5c377c..016cd89961fa 100644 --- a/src/consensus/pbft/libbyz/types.h +++ b/src/consensus/pbft/libbyz/types.h @@ -68,6 +68,6 @@ using ExecCommand = std::function; \ No newline at end of file diff --git a/src/consensus/pbft/pbftconfig.h b/src/consensus/pbft/pbftconfig.h index ae3eca51f1b3..7d090a670e17 100644 --- a/src/consensus/pbft/pbftconfig.h +++ b/src/consensus/pbft/pbftconfig.h @@ -58,7 +58,6 @@ namespace pbft size_t req_size, Seqno total_requests_executed, ByzInfo& info, - bool playback = false, ccf::Store::Tx* tx = nullptr) { pbft::Request request; request.deserialise({inb->contents, inb->contents + inb->size}); @@ -88,10 +87,9 @@ namespace pbft #endif enclave::RpcHandler::ProcessPbftResp rep; - if (playback && tx) + if (tx != nullptr) { - rep = - frontend->process_pbft(ctx, *tx, playback, info.include_merkle_roots); + rep = frontend->process_pbft(ctx, *tx, true, info.include_merkle_roots); } else { diff --git a/src/kv/kv.h b/src/kv/kv.h index 421e8be55d7a..cee870f1e27e 100644 --- a/src/kv/kv.h +++ b/src/kv/kv.h @@ -1525,7 +1525,7 @@ namespace kv { // if we pass in a transaction we don't want to commit, just deserialise // and put the views into that transaction - bool commit = (tx == nullptr); + auto commit = (tx == nullptr); // This will return FAILED if the serialised transaction is being // applied out of order. diff --git a/tests/late_joiners.py b/tests/late_joiners.py index 5fb4f5ac712f..4442725960aa 100644 --- a/tests/late_joiners.py +++ b/tests/late_joiners.py @@ -20,7 +20,10 @@ from loguru import logger as LOG # 256 is the number of most recent messages that PBFT keeps in memory before needing to replay the ledger -TOTAL_REQUESTS = 56 +# the rpc requests that are issued for spinning up the network and checking that all nodes have joined, +# along with TOTAL_REQUESTS, are enough to exceed this limit +TOTAL_REQUESTS = 60 + s = random.randint(1, 10) LOG.info(f"setting seed to {s}") random.seed(s) @@ -119,7 +122,7 @@ def run(args): all_nodes = network.get_joined_nodes() term_info = find_primary(network) - fisrt_msg = "Hello, world!" + first_msg = "Hello, world!" second_msg = "Hello, world hello!" final_msg = "Goodbye, world!" @@ -152,11 +155,11 @@ def run(args): check_commit = infra.checker.Checker(mc) check = infra.checker.Checker() - run_requests(all_nodes, TOTAL_REQUESTS, 0, fisrt_msg, 1000) + run_requests(all_nodes, TOTAL_REQUESTS, 0, first_msg, 1000) term_info.update(find_primary(network)) # check that new node has caught up ok - assert_node_up_to_date(check, new_node, fisrt_msg, 1000) + assert_node_up_to_date(check, new_node, first_msg, 1000) # add new node to backups list all_nodes.append(new_node) @@ -171,7 +174,7 @@ def run(args): run_requests(all_nodes, TOTAL_REQUESTS, 1001, second_msg, 2000) term_info.update(find_primary(network)) - assert_node_up_to_date(check, last_node, fisrt_msg, 1000) + assert_node_up_to_date(check, last_node, first_msg, 1000) assert_node_up_to_date(check, last_node, second_msg, 2000) # replace the 2 backups with the 2 new nodes, kill the old ones and ensure we are still making progress From 9869c901190d97d0a96cdd47d8fe194a12a71e8a Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Fri, 31 Jan 2020 16:27:26 +0000 Subject: [PATCH 39/42] formatting --- src/consensus/pbft/pbft.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/consensus/pbft/pbft.h b/src/consensus/pbft/pbft.h index 19f9deb738c3..6a7432028d23 100644 --- a/src/consensus/pbft/pbft.h +++ b/src/consensus/pbft/pbft.h @@ -557,8 +557,8 @@ namespace pbft } ccf::Store::Tx tx; - auto deserialise_success = store->deserialise_views( - ret.first, public_only, nullptr, &tx); + auto deserialise_success = + store->deserialise_views(ret.first, public_only, nullptr, &tx); switch (deserialise_success) { From 1c2468eac21c98fd1718e22aeb093f809d86503c Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Fri, 31 Jan 2020 18:31:18 +0000 Subject: [PATCH 40/42] Replace void* ctx in global_commit_cb and mark_stable_cb --- src/consensus/pbft/libbyz/Log_allocator.cpp | 4 - src/consensus/pbft/libbyz/Replica.cpp | 21 +++-- src/consensus/pbft/libbyz/Replica.h | 10 ++- .../pbft/libbyz/receive_message_base.h | 15 +++- src/consensus/pbft/pbft.h | 85 ++++++++++--------- 5 files changed, 72 insertions(+), 63 deletions(-) diff --git a/src/consensus/pbft/libbyz/Log_allocator.cpp b/src/consensus/pbft/libbyz/Log_allocator.cpp index 62ab7f9faef2..9155b65f0e4d 100644 --- a/src/consensus/pbft/libbyz/Log_allocator.cpp +++ b/src/consensus/pbft/libbyz/Log_allocator.cpp @@ -21,9 +21,7 @@ Log_allocator::Log_allocator(int csz, int nc) num_chunks = nc; free_chunks = 0; chunks = 0; -#ifndef USE_STD_MALLOC cur = alloc_chunk(); -#endif } void Log_allocator::should_use_malloc(bool _use_malloc) @@ -41,9 +39,7 @@ bool Log_allocator::use_malloc = Log_allocator::~Log_allocator() { -#ifndef USE_STD_MALLOC ::free(chunks); -#endif } Log_allocator::Chunk* Log_allocator::alloc_chunk() diff --git a/src/consensus/pbft/libbyz/Replica.cpp b/src/consensus/pbft/libbyz/Replica.cpp index 938c637c992b..459797a3ee0c 100644 --- a/src/consensus/pbft/libbyz/Replica.cpp +++ b/src/consensus/pbft/libbyz/Replica.cpp @@ -454,7 +454,7 @@ void Replica::playback_pre_prepare( if (global_commit_cb != nullptr && executable_pp->is_signed()) { global_commit_cb( - executable_pp->get_ctx(), executable_pp->view(), global_commit_ctx); + executable_pp->get_ctx(), executable_pp->view(), global_commit_info); } last_executed++; @@ -1199,16 +1199,18 @@ void Replica::register_reply_handler(reply_handler_cb cb, void* ctx) rep_cb_ctx = ctx; } -void Replica::register_global_commit(global_commit_handler_cb cb, void* ctx) +void Replica::register_global_commit( + global_commit_handler_cb cb, pbft::GlobalCommitInfo* gb_info) { global_commit_cb = cb; - global_commit_ctx = ctx; + global_commit_info = gb_info; } -void Replica::register_mark_stable(mark_stable_handler_cb cb, void* ctx) +void Replica::register_mark_stable( + mark_stable_handler_cb cb, pbft::MarkStableInfo* ms_info) { mark_stable_cb = cb; - mark_stable_ctx = ctx; + mark_stable_info = ms_info; } template @@ -2100,12 +2102,9 @@ void Replica::execute_prepared(bool committed) if (global_commit_cb != nullptr && pp->is_signed()) { - LOG_TRACE_FMT( - "Global_commit: {}, signed_version: {}", - pp->get_ctx(), - global_commit_ctx); + LOG_TRACE_FMT("Global_commit: {}", pp->get_ctx()); - global_commit_cb(pp->get_ctx(), pp->view(), global_commit_ctx); + global_commit_cb(pp->get_ctx(), pp->view(), global_commit_info); signed_version = 0; } } @@ -2572,7 +2571,7 @@ void Replica::mark_stable(Seqno n, bool have_state) if (mark_stable_cb != nullptr) { - mark_stable_cb(mark_stable_ctx); + mark_stable_cb(mark_stable_info); } if (have_state) diff --git a/src/consensus/pbft/libbyz/Replica.h b/src/consensus/pbft/libbyz/Replica.h index cb6862ae1ea9..5000b254f5bf 100644 --- a/src/consensus/pbft/libbyz/Replica.h +++ b/src/consensus/pbft/libbyz/Replica.h @@ -123,10 +123,12 @@ class Replica : public Node, public IMessageReceiveBase void register_reply_handler(reply_handler_cb cb, void* ctx); // Effects: Registers a handler that takes reply messages - void register_global_commit(global_commit_handler_cb cb, void* ctx); + void register_global_commit( + global_commit_handler_cb cb, pbft::GlobalCommitInfo* ctx); // Effects:: Registers a handler that is called when a batch is committed - void register_mark_stable(mark_stable_handler_cb cb, void* ctx); + void register_mark_stable( + mark_stable_handler_cb cb, pbft::MarkStableInfo* ctx); template std::unique_ptr create_message( @@ -466,10 +468,10 @@ class Replica : public Node, public IMessageReceiveBase // used to callback when we have committed a batch global_commit_handler_cb global_commit_cb; - void* global_commit_ctx; + pbft::GlobalCommitInfo* global_commit_info; mark_stable_handler_cb mark_stable_cb = nullptr; - void* mark_stable_ctx; + pbft::MarkStableInfo* mark_stable_info; // callback when we call mark_stable // Used to not the append_entries_index of the stable seqno // We don't want to send append entries further than the latest stable seqno diff --git a/src/consensus/pbft/libbyz/receive_message_base.h b/src/consensus/pbft/libbyz/receive_message_base.h index db73d55faabc..300843698675 100644 --- a/src/consensus/pbft/libbyz/receive_message_base.h +++ b/src/consensus/pbft/libbyz/receive_message_base.h @@ -8,6 +8,12 @@ #include "Reply.h" #include "Request.h" +namespace pbft +{ + struct MarkStableInfo; + struct GlobalCommitInfo; +} + class IMessageReceiveBase { public: @@ -17,11 +23,12 @@ class IMessageReceiveBase typedef void (*reply_handler_cb)(Reply* m, void* ctx); virtual void register_reply_handler(reply_handler_cb cb, void* ctx) = 0; typedef void (*global_commit_handler_cb)( - int64_t tx_ctx, View view, void* cb_ctx); - typedef void (*mark_stable_handler_cb)(void* bc_ctx); + int64_t tx_ctx, View view, pbft::GlobalCommitInfo* cb_ctx); + typedef void (*mark_stable_handler_cb)(pbft::MarkStableInfo* ms_info); virtual void register_global_commit( - global_commit_handler_cb cb, void* ctx) = 0; - virtual void register_mark_stable(mark_stable_handler_cb cb, void* ctx) = 0; + global_commit_handler_cb cb, pbft::GlobalCommitInfo* gb_info) = 0; + virtual void register_mark_stable( + mark_stable_handler_cb cb, pbft::MarkStableInfo* ms_info) = 0; virtual size_t num_correct_replicas() const = 0; virtual size_t f() const = 0; virtual void set_f(ccf::NodeId f) = 0; diff --git a/src/consensus/pbft/pbft.h b/src/consensus/pbft/pbft.h index 6a7432028d23..3b418361c4b7 100644 --- a/src/consensus/pbft/pbft.h +++ b/src/consensus/pbft/pbft.h @@ -27,6 +27,35 @@ namespace pbft { + using SeqNo = kv::Consensus::SeqNo; + using View = kv::Consensus::View; + + struct ViewChangeInfo + { + ViewChangeInfo( + View view_, SeqNo min_global_commit_) : + min_global_commit(min_global_commit_), + view(view_) + {} + + SeqNo min_global_commit; + View view; + }; + + struct MarkStableInfo + { + pbft::PbftStore* store; + Index* latest_stable_ae_idx; + } register_mark_stable_ctx; + + struct GlobalCommitInfo + { + pbft::PbftStore* store; + SeqNo* global_commit_seqno; + View* last_commit_view; + std::vector* view_change_list; + } register_global_commit_ctx; + // maps node to last sent index to that node using NodesMap = std::unordered_map; // TODO remove hard coded value (https://github.com/microsoft/CCF/issues/753) @@ -194,33 +223,7 @@ namespace pbft // When this is set, only public domain is deserialised when receving append // entries bool public_only = false; - - struct view_change_info - { - view_change_info(View view_, SeqNo min_global_commit_) : - min_global_commit(min_global_commit_), - view(view_) - {} - - SeqNo min_global_commit; - View view; - }; - - std::vector view_change_list; - - struct register_global_commit_info - { - pbft::PbftStore* store; - SeqNo* global_commit_seqno; - View* last_commit_view; - std::vector* view_change_list; - } register_global_commit_ctx; - - struct register_mark_stable_info - { - pbft::PbftStore* store; - Index* latest_stable_ae_idx; - } register_mark_stable_ctx; + std::vector view_change_list; public: Pbft( @@ -242,7 +245,7 @@ namespace pbft global_commit_seqno(1), last_commit_view(0), store(std::move(store_)), - view_change_list(1, view_change_info(0, 0)) + view_change_list(1, ViewChangeInfo(0, 0)) { // configure replica GeneralInfo general_info; @@ -309,11 +312,11 @@ namespace pbft message_receiver_base->register_reply_handler( reply_handler_cb, client_proxy.get()); - auto mark_stable_cb = [](void* ctx) { - auto ms_ctx = static_cast(ctx); - *ms_ctx->latest_stable_ae_idx = ms_ctx->store->current_version(); + auto mark_stable_cb = [](MarkStableInfo* ms_info) { + *ms_info->latest_stable_ae_idx = ms_info->store->current_version(); LOG_TRACE_FMT( - "latest_stable_ae_index is set to {}", *ms_ctx->latest_stable_ae_idx); + "latest_stable_ae_index is set to {}", + *ms_info->latest_stable_ae_idx); }; register_mark_stable_ctx.store = store.get(); @@ -322,19 +325,21 @@ namespace pbft message_receiver_base->register_mark_stable( mark_stable_cb, ®ister_mark_stable_ctx); - auto global_commit_cb = [](kv::Version version, ::View view, void* ctx) { - auto p = static_cast(ctx); - if (version == kv::NoVersion || version < *p->global_commit_seqno) + auto global_commit_cb = []( + kv::Version version, + ::View view, + GlobalCommitInfo* gb_info) { + if (version == kv::NoVersion || version < *gb_info->global_commit_seqno) { return; } - *p->global_commit_seqno = version; + *gb_info->global_commit_seqno = version; - if (*p->last_commit_view < view) + if (*gb_info->last_commit_view < view) { - p->view_change_list->emplace_back(view, version); + gb_info->view_change_list->emplace_back(view, version); } - p->store->compact(version); + gb_info->store->compact(version); }; register_global_commit_ctx.store = store.get(); @@ -383,7 +388,7 @@ namespace pbft for (auto rit = view_change_list.rbegin(); rit != view_change_list.rend(); ++rit) { - view_change_info& info = *rit; + ViewChangeInfo& info = *rit; if (info.min_global_commit <= seqno) { return info.view + 2; From 375db5528f0cb6f285368e9667cc6c6d4cabe520 Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Fri, 31 Jan 2020 18:35:22 +0000 Subject: [PATCH 41/42] formatting --- src/consensus/pbft/pbft.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/consensus/pbft/pbft.h b/src/consensus/pbft/pbft.h index 3b418361c4b7..84dd0fdcf94c 100644 --- a/src/consensus/pbft/pbft.h +++ b/src/consensus/pbft/pbft.h @@ -32,8 +32,7 @@ namespace pbft struct ViewChangeInfo { - ViewChangeInfo( - View view_, SeqNo min_global_commit_) : + ViewChangeInfo(View view_, SeqNo min_global_commit_) : min_global_commit(min_global_commit_), view(view_) {} From 10163d821a7caebef960b79aa1d336d85a065210 Mon Sep 17 00:00:00 2001 From: Olga Vrousgou Date: Mon, 3 Feb 2020 11:09:36 +0000 Subject: [PATCH 42/42] disabing late joiners test for a stable CI - will re-enable with rollback implementation --- CMakeLists.txt | 6 ------ 1 file changed, 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2abaaf9f4565..8f6cc2225dbd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -601,12 +601,6 @@ if(BUILD_TESTS) ADDITIONAL_ARGS --election-timeout 2000 ) - add_e2e_test( - NAME late_joiners - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/late_joiners.py - ADDITIONAL_ARGS --skip-suspension - ) - if(NOT PBFT) # Logging scenario perf test add_perf_test(