From b1f14f47c6ecbe40322b27fe6f40180e1a55edc0 Mon Sep 17 00:00:00 2001 From: teor Date: Mon, 4 Jan 2021 13:25:35 +1000 Subject: [PATCH] Rewrite GetData handling to match the zcashd implementation (#1518) * Rewrite GetData handling to match the zcashd implementation `zcashd` silently ignores missing blocks, but sends found transactions followed by a `NotFound` message: https://github.com/zcash/zcash/blob/e7b425298f6d9a54810cb7183f00be547e4d9415/src/main.cpp#L5497 This is significantly different to the behaviour expected by the old Zebra connection state machine, which expected `NotFound` for blocks. Also change Zebra's GetData responses to peer request so they ignore missing blocks. * Stop hanging on incomplete transaction or block responses Instead, if the peer sends an unexpected block, unexpected transaction, or NotFound message: 1. end the request, and return a partial response containing any items that were successfully received 2. if none of the expected blocks or transactions were received, return an error, and close the connection --- zebra-network/src/peer/connection.rs | 206 +++++++++++++----- .../src/protocol/external/message.rs | 15 ++ .../src/protocol/internal/request.rs | 4 +- zebrad/src/components/inbound.rs | 13 +- 4 files changed, 182 insertions(+), 56 deletions(-) diff --git a/zebra-network/src/peer/connection.rs b/zebra-network/src/peer/connection.rs index 24322335a64..78838c57041 100644 --- a/zebra-network/src/peer/connection.rs +++ b/zebra-network/src/peer/connection.rs @@ -1,18 +1,11 @@ -// NOT A PLACE OF HONOR -// -// NO ESTEEMED DEED IS COMMEMORATED HERE -// -// NOTHING VALUED IS HERE -// -// What is here was dangerous and repulsive to us. This message is a warning -// about danger. -// -// The danger is in a particular module... it increases towards a center... the -// center of danger is pub async fn Connection::run the danger is still present, -// in your time, as it was in ours. -// -// The danger is to the mind. The danger is unleashed only if you substantially -// disturb this code. This code is best shunned and left encapsulated. +//! Zcash peer connection protocol handing for Zebra. +//! +//! Maps the external Zcash/Bitcoin protocol to Zebra's internal request/response +//! protocol. +//! +//! This module contains a lot of undocumented state, assumptions and invariants. +//! And it's unclear if these assumptions match the `zcashd` implementation. +//! It should be refactored into a cleaner set of request/response pairs (#1515). use std::collections::HashSet; use std::sync::Arc; @@ -91,6 +84,10 @@ impl Handler { } } (Handler::Peers, Message::Addr(addrs)) => Handler::Finished(Ok(Response::Peers(addrs))), + // `zcashd` returns requested transactions in a single batch of messages. + // Other transaction or non-transaction messages can come before or after the batch. + // After the transaction batch, `zcashd` sends `NotFound` if any transactions are missing: + // https://github.com/zcash/zcash/blob/e7b425298f6d9a54810cb7183f00be547e4d9415/src/main.cpp#L5617 ( Handler::TransactionsByHash { mut hashes, @@ -98,20 +95,52 @@ impl Handler { }, Message::Tx(transaction), ) => { + // assumptions: + // - the transaction messages are sent in a single continous batch + // - missing transaction hashes are included in a `NotFound` message if hashes.remove(&transaction.hash()) { + // we are in the middle of the continous transaction messages transactions.push(transaction); + if hashes.is_empty() { + Handler::Finished(Ok(Response::Transactions(transactions))) + } else { + Handler::TransactionsByHash { + hashes, + transactions, + } + } } else { + // We got a transaction we didn't ask for. If the caller doesn't know any of the + // transactions, they should have sent a `NotFound` with all the hashes, rather + // than an unsolicited transaction. + // + // So either: + // 1. The peer implements the protocol badly, skipping `NotFound`. + // We should cancel the request, so we don't hang waiting for transactions + // that will never arrive. + // 2. The peer sent an unsolicited transaction. + // We should ignore the transaction, and wait for the actual response. + // + // We end the request, so we don't hang on bad peers (case 1). But we keep the + // connection open, so the inbound service can process transactions from good + // peers (case 2). ignored_msg = Some(Message::Tx(transaction)); - } - if hashes.is_empty() { - Handler::Finished(Ok(Response::Transactions(transactions))) - } else { - Handler::TransactionsByHash { - hashes, - transactions, + if !transactions.is_empty() { + // if our peers start sending mixed solicited and unsolicited transactions, + // we should update this code to handle those responses + error!("unexpected transaction from peer: transaction responses should be sent in a continuous batch, followed by notfound. Using partial received transactions as the peer response"); + // TODO: does the caller need a list of missing transactions? (#1515) + Handler::Finished(Ok(Response::Transactions(transactions))) + } else { + // TODO: is it really an error if we ask for a transaction hash, but the peer + // doesn't know it? Should we close the connection on that kind of error? + // Should we fake a NotFound response here? (#1515) + let items = hashes.iter().map(|h| InventoryHash::Tx(*h)).collect(); + Handler::Finished(Err(PeerError::NotFound(items))) } } } + // `zcashd` peers actually return this response ( Handler::TransactionsByHash { hashes, @@ -119,23 +148,46 @@ impl Handler { }, Message::NotFound(items), ) => { - let hash_in_hashes = |item: &InventoryHash| { - if let InventoryHash::Tx(hash) = item { - hashes.contains(hash) - } else { - false - } - }; - if items.iter().all(hash_in_hashes) { - Handler::Finished(Err(PeerError::NotFound(items))) + // assumptions: + // - the peer eventually returns a transaction or a `NotFound` entry + // for each hash + // - all `NotFound` entries are contained in a single message + // - the `NotFound` message comes after the transaction messages + // + // If we're in sync with the peer, then the `NotFound` should contain the remaining + // hashes from the handler. If we're not in sync with the peer, we should return + // what we got so far, and log an error. + let missing_transactions: HashSet<_> = items + .iter() + .filter_map(|inv| match &inv { + InventoryHash::Tx(tx) => Some(tx), + _ => None, + }) + .cloned() + .collect(); + if missing_transactions != hashes { + trace!(?items, ?missing_transactions, ?hashes); + // if these errors are noisy, we should replace them with debugs + error!("unexpected notfound message from peer: all remaining transaction hashes should be listed in the notfound. Using partial received transactions as the peer response"); + } + if missing_transactions.len() != items.len() { + trace!(?items, ?missing_transactions, ?hashes); + error!("unexpected notfound message from peer: notfound contains duplicate hashes or non-transaction hashes. Using partial received transactions as the peer response"); + } + + if !transactions.is_empty() { + // TODO: does the caller need a list of missing transactions? (#1515) + Handler::Finished(Ok(Response::Transactions(transactions))) } else { - ignored_msg = Some(Message::NotFound(items)); - Handler::TransactionsByHash { - hashes, - transactions, - } + // TODO: is it really an error if we ask for a transaction hash, but the peer + // doesn't know it? Should we close the connection on that kind of error? (#1515) + Handler::Finished(Err(PeerError::NotFound(items))) } } + // `zcashd` returns requested blocks in a single batch of messages. + // Other blocks or non-blocks messages can come before or after the batch. + // `zcashd` silently skips missing blocks, rather than sending a final `NotFound` message. + // https://github.com/zcash/zcash/blob/e7b425298f6d9a54810cb7183f00be547e4d9415/src/main.cpp#L5523 ( Handler::BlocksByHash { mut hashes, @@ -143,30 +195,80 @@ impl Handler { }, Message::Block(block), ) => { + // assumptions: + // - the block messages are sent in a single continuous batch + // - missing blocks are silently skipped + // (there is no `NotFound` message at the end of the batch) if hashes.remove(&block.hash()) { + // we are in the middle of the continuous block messages blocks.push(block); + if hashes.is_empty() { + Handler::Finished(Ok(Response::Blocks(blocks))) + } else { + Handler::BlocksByHash { hashes, blocks } + } } else { + // We got a block we didn't ask for. + // + // So either: + // 1. The peer doesn't know any of the blocks we asked for. + // We should cancel the request, so we don't hang waiting for blocks that + // will never arrive. + // 2. The peer sent an unsolicited block. + // We should ignore that block, and wait for the actual response. + // + // We end the request, so we don't hang on forked or lagging peers (case 1). + // But we keep the connection open, so the inbound service can process blocks + // from good peers (case 2). ignored_msg = Some(Message::Block(block)); - } - if hashes.is_empty() { - Handler::Finished(Ok(Response::Blocks(blocks))) - } else { - Handler::BlocksByHash { hashes, blocks } + if !blocks.is_empty() { + // TODO: does the caller need a list of missing blocks? (#1515) + Handler::Finished(Ok(Response::Blocks(blocks))) + } else { + // TODO: is it really an error if we ask for a block hash, but the peer + // doesn't know it? Should we close the connection on that kind of error? + // Should we fake a NotFound response here? (#1515) + let items = hashes.iter().map(|h| InventoryHash::Block(*h)).collect(); + Handler::Finished(Err(PeerError::NotFound(items))) + } } } + // peers are allowed to return this response, but `zcashd` never does (Handler::BlocksByHash { hashes, blocks }, Message::NotFound(items)) => { - let hash_in_hashes = |item: &InventoryHash| { - if let InventoryHash::Block(hash) = item { - hashes.contains(hash) - } else { - false - } - }; - if items.iter().all(hash_in_hashes) { - Handler::Finished(Err(PeerError::NotFound(items))) + // assumptions: + // - the peer eventually returns a block or a `NotFound` entry + // for each hash + // - all `NotFound` entries are contained in a single message + // - the `NotFound` message comes after the block messages + // + // If we're in sync with the peer, then the `NotFound` should contain the remaining + // hashes from the handler. If we're not in sync with the peer, we should return + // what we got so far, and log an error. + let missing_blocks: HashSet<_> = items + .iter() + .filter_map(|inv| match &inv { + InventoryHash::Block(b) => Some(b), + _ => None, + }) + .cloned() + .collect(); + if missing_blocks != hashes { + trace!(?items, ?missing_blocks, ?hashes); + // if these errors are noisy, we should replace them with debugs + error!("unexpected notfound message from peer: all remaining block hashes should be listed in the notfound. Using partial received blocks as the peer response"); + } + if missing_blocks.len() != items.len() { + trace!(?items, ?missing_blocks, ?hashes); + error!("unexpected notfound message from peer: notfound contains duplicate hashes or non-block hashes. Using partial received blocks as the peer response"); + } + + if !blocks.is_empty() { + // TODO: does the caller need a list of missing blocks? (#1515) + Handler::Finished(Ok(Response::Blocks(blocks))) } else { - ignored_msg = Some(Message::NotFound(items)); - Handler::BlocksByHash { hashes, blocks } + // TODO: is it really an error if we ask for a block hash, but the peer + // doesn't know it? Should we close the connection on that kind of error? (#1515) + Handler::Finished(Err(PeerError::NotFound(items))) } } (Handler::FindBlocks, Message::Inv(items)) diff --git a/zebra-network/src/protocol/external/message.rs b/zebra-network/src/protocol/external/message.rs index 930faed81ef..60d4ae1d423 100644 --- a/zebra-network/src/protocol/external/message.rs +++ b/zebra-network/src/protocol/external/message.rs @@ -193,7 +193,13 @@ pub enum Message { /// content of a specific object, and is usually sent after /// receiving an `inv` packet, after filtering known elements. /// + /// `zcashd` returns requested items in a single batch of messages. + /// Missing blocks are silently skipped. Missing transaction hashes are + /// included in a single `NotFound` message following the transactions. + /// Other item or non-item messages can come before or after the batch. + /// /// [Bitcoin reference](https://en.bitcoin.it/wiki/Protocol_documentation#getdata) + /// [zcashd code](https://github.com/zcash/zcash/blob/e7b425298f6d9a54810cb7183f00be547e4d9415/src/main.cpp#L5523) GetData(Vec), /// A `block` message. @@ -208,7 +214,16 @@ pub enum Message { /// A `notfound` message. /// + /// When a peer requests a list of transaction hashes, `zcashd` returns: + /// - a batch of messages containing found transactions, then + /// - a `NotFound` message containing a list of transaction hashes that + /// aren't available in its mempool or state. + /// + /// But when a peer requests blocks or headers, any missing items are + /// silently skipped, without any `NotFound` messages. + /// /// [Bitcoin reference](https://en.bitcoin.it/wiki/Protocol_documentation#notfound) + /// [zcashd code](https://github.com/zcash/zcash/blob/e7b425298f6d9a54810cb7183f00be547e4d9415/src/main.cpp#L5632) // See note above on `Inventory`. NotFound(Vec), diff --git a/zebra-network/src/protocol/internal/request.rs b/zebra-network/src/protocol/internal/request.rs index b5b18e9cd98..364f0a0d543 100644 --- a/zebra-network/src/protocol/internal/request.rs +++ b/zebra-network/src/protocol/internal/request.rs @@ -78,7 +78,7 @@ pub enum Request { /// Returns [`Response::Transactions`](super::Response::Transactions). TransactionsByHash(HashSet), - /// Request block hashes of subsequent blocks in the chain, giving hashes of + /// Request block hashes of subsequent blocks in the chain, given hashes of /// known blocks. /// /// # Returns @@ -104,7 +104,7 @@ pub enum Request { stop: Option, }, - /// Request headers of subsequent blocks in the chain, giving hashes of + /// Request headers of subsequent blocks in the chain, given hashes of /// known blocks. /// /// # Returns diff --git a/zebrad/src/components/inbound.rs b/zebrad/src/components/inbound.rs index 416d56c3243..59bacabef00 100644 --- a/zebrad/src/components/inbound.rs +++ b/zebrad/src/components/inbound.rs @@ -167,8 +167,10 @@ impl Service for Inbound { .try_filter_map(|rsp| { futures::future::ready(match rsp { zs::Response::Block(Some(block)) => Ok(Some(block)), - // XXX: check how zcashd handles missing blocks? - zs::Response::Block(None) => Err("missing block".into()), + // `zcashd` ignores missing blocks in GetData responses, + // rather than including them in a trailing `NotFound` + // message + zs::Response::Block(None) => Ok(None), _ => unreachable!("wrong response from state"), }) }) @@ -177,6 +179,13 @@ impl Service for Inbound { .boxed() } zn::Request::TransactionsByHash(_transactions) => { + // `zcashd` returns a list of found transactions, followed by a + // `NotFound` message if any transactions are missing. `zcashd` + // says that Simplified Payment Verification (SPV) clients rely on + // this behaviour - are there any of them on the Zcash network? + // https://github.com/zcash/zcash/blob/e7b425298f6d9a54810cb7183f00be547e4d9415/src/main.cpp#L5632 + // We'll implement this request once we have a mempool: + // https://en.bitcoin.it/wiki/Protocol_documentation#getdata debug!("ignoring unimplemented request"); async { Ok(zn::Response::Nil) }.boxed() }