Skip to content
This repository has been archived by the owner on Aug 16, 2021. It is now read-only.

Fixed peer that were stuck #3296

11 changes: 6 additions & 5 deletions src/Stratis.Bitcoin/Connection/ConnectionManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -234,8 +234,10 @@ private void AddComponentStats(StringBuilder builder)
{
var chainHeadersBehavior = peer.Behavior<ConsensusManagerBehavior>();

string peerHeights = $"(r/s):{(chainHeadersBehavior.BestReceivedTip != null ? chainHeadersBehavior.BestReceivedTip.Height.ToString() : peer.PeerVersion?.StartHeight + "*" ?? "-")}";
peerHeights += $"/{(chainHeadersBehavior.BestSentHeader != null ? chainHeadersBehavior.BestSentHeader.Height.ToString() : peer.PeerVersion?.StartHeight + "*" ?? "-")}";
string peerHeights = $"(r/s/c):" +
$"{(chainHeadersBehavior.BestReceivedTip != null ? chainHeadersBehavior.BestReceivedTip.Height.ToString() : peer.PeerVersion != null ? peer.PeerVersion.StartHeight + "*" : "-")}" +
$"/{(chainHeadersBehavior.BestSentHeader != null ? chainHeadersBehavior.BestSentHeader.Height.ToString() : peer.PeerVersion != null ? peer.PeerVersion.StartHeight + "*" : "-")}" +
$"/{chainHeadersBehavior.GetCachedItemsCount()}";

// TODO: Need a snapshot cache so that not only currently connected peers are summed
string peerTraffic = $"R/S MB: {peer.Counter.ReadBytes.BytesToMegaBytes()}/{peer.Counter.WrittenBytes.BytesToMegaBytes()}";
Expand All @@ -244,9 +246,8 @@ private void AddComponentStats(StringBuilder builder)

string agent = peer.PeerVersion != null ? peer.PeerVersion.UserAgent : "[Unknown]";
peerBuilder.AppendLine(
"Peer:" + (peer.RemoteSocketEndpoint + ", ").PadRight(LoggingConfiguration.ColumnLength + 15) +
(" connected:" + (peer.Inbound ? "inbound" : "outbound") + ",").PadRight(LoggingConfiguration.ColumnLength + 7)
+ peerHeights.PadRight(LoggingConfiguration.ColumnLength + 7)
(peer.Inbound ? "IN " : "OUT ") + "Peer:" + (peer.RemoteSocketEndpoint + ", ").PadRight(LoggingConfiguration.ColumnLength + 15)
+ peerHeights.PadRight(LoggingConfiguration.ColumnLength + 14)
+ peerTraffic.PadRight(LoggingConfiguration.ColumnLength + 7)
+ " agent:" + agent);
}
Expand Down
32 changes: 31 additions & 1 deletion src/Stratis.Bitcoin/Consensus/ChainedHeaderTree.cs
Original file line number Diff line number Diff line change
Expand Up @@ -654,8 +654,14 @@ public ConnectNewHeadersResult ConnectNewHeaders(int networkPeerId, List<BlockHe

if (insufficientInfo)
{
if (this.TryFindLastValidatedHeader(headers, out ChainedHeader lastAlreadyValidatedHeader))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks correct, gj

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thats the main fix indeed.

{
MithrilMan marked this conversation as resolved.
Show resolved Hide resolved
this.logger.LogTrace("Some headers were already validated.");
this.AddOrReplacePeerTip(networkPeerId, lastAlreadyValidatedHeader.Header.GetHash());
}

this.logger.LogTrace("(-)[INSUFF_INFO]");
return new ConnectNewHeadersResult() { Consumed = null };
return new ConnectNewHeadersResult() { Consumed = lastAlreadyValidatedHeader };
}

if (newChainedHeaders == null)
Expand Down Expand Up @@ -1103,6 +1109,30 @@ private bool TryFindNewHeaderIndex(List<BlockHeader> headers, out int newHeaderI
return false;
}

/// <summary>
/// Find the last validated <see cref="ChainedHeader"/> header in the given list of <see cref="headers"/>.
/// </summary>
/// <remarks>Headers are supposed to be consecutive and sorted by height</remarks>
private bool TryFindLastValidatedHeader(List<BlockHeader> headers, out ChainedHeader lastValidatedHeader)
{
lastValidatedHeader = null;
foreach (BlockHeader header in headers)
{
uint256 currentBlockHash = header.GetHash();
if (this.chainedHeadersByHash.TryGetValue(currentBlockHash, out ChainedHeader currentChainedHeader))
{
lastValidatedHeader = currentChainedHeader;
}
else
{
// Stop at the first header not found.
break;
}
}

return lastValidatedHeader != null;
}

/// <summary>
/// Checks if switching to specified <paramref name="chainedHeader"/> would require rewinding consensus behind the finalized block height.
/// </summary>
Expand Down
12 changes: 6 additions & 6 deletions src/Stratis.Bitcoin/Consensus/ConsensusManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,12 @@ public ConnectNewHeadersResult HeadersPresented(INetworkPeer peer, List<BlockHea

connectNewHeadersResult = this.chainedHeaderTree.ConnectNewHeaders(peerId, headers);

if (!this.peersByPeerId.ContainsKey(peerId))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The position in the locked code segment that this is placed seems a bit arbitrary (both doing it last and moving it higher up). What was the reasoning behind putting it after ConnectNewHeaders in particular instead of, say, just before it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dangershony didn't wanted to put it before because in case ConnectNewHeaders throws the peer will be disconnected, so no need to att to the peersByPeerId dictionary.

Probably it's right because we may incur in a deadlock if we put it before ConnectNewHeaders because it causes peer to disconnect.

Anyway there are more things to investigate about peersByPeerId because it seems this gets populated only for peers supporting PH (after last checkpoint) and this may cause problems

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dangershony @MithrilMan I'm also sketchy on why this has been moved up? Care to elaborate?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its a bit long but here goes:

The bellow code is valid scenarios of the flow

From CM.HeadersPresented method

                if (connectNewHeadersResult == null)
                {
                    this.logger.LogTrace("(-)[NO_HEADERS_CONNECTED]:null");
                    return null;
                }

                if (connectNewHeadersResult.Consumed == null)
                {
                    this.logger.LogTrace("(-)[NOTHING_CONSUMED]");
                    return connectNewHeadersResult;
                }

If both the above conditions are true this means insufficient info to validate a proven header.
This will often happen when the first header is inefficient then connectNewHeadersResult.Consumed will be null (all though @MithrilMan also fixed another bug where we would always report it as null, see the changes to CHT)

If Consumed == null causes CMB to do this (basically headers are added to the cache)

                if (result.Consumed == null)
                {
                    this.cachedHeaders.AddRange(headers);
                    this.logger.LogDebug("All {0} items were not consumed and added to cache.", headers.Count);

                    return;
                }

But this issue is actually reflected later in the method NotifyBehaviorsOnConsensusTipChangedAsync this code:

     lock (this.peerLock)
            {
                foreach (INetworkPeer peer in this.peersByPeerId.Values)
                    behaviors.Add(peer.Behavior<ConsensusManagerBehavior>());
            }

That method will trigger when consensus tip changed and should notify the peer to send the next batch of headers (or actually its proven headers) that are waiting in cache to be processed (because we could not connect them earlier due to missing UTXOs). but the peer was never added to this list this.peersByPeerId

So the bug that this change fixes (because the fix to CHT will fix the scenario where any of the headers had insufficient info) is that if we connected to a peer and the first headers it presents could not be connected the peer would be stuck because we would not notify the peer's behaviour to try presenting those headers again when consensus advances.

{
this.peersByPeerId.Add(peerId, peer);
this.logger.LogTrace("New peer with ID {0} was added.", peerId);
}

if (connectNewHeadersResult == null)
{
this.logger.LogTrace("(-)[NO_HEADERS_CONNECTED]:null");
Expand All @@ -272,12 +278,6 @@ public ConnectNewHeadersResult HeadersPresented(INetworkPeer peer, List<BlockHea
this.chainState.IsAtBestChainTip = this.IsConsensusConsideredToBeSyncedLocked();

this.blockPuller.NewPeerTipClaimed(peer, connectNewHeadersResult.Consumed);

if (!this.peersByPeerId.ContainsKey(peerId))
{
this.peersByPeerId.Add(peerId, peer);
this.logger.LogTrace("New peer with ID {0} was added.", peerId);
}
}

if (triggerDownload && (connectNewHeadersResult.DownloadTo != null))
Expand Down
32 changes: 30 additions & 2 deletions src/Stratis.Bitcoin/Consensus/ConsensusManagerBehavior.cs
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ public async Task<ConnectNewHeadersResult> ConsensusTipChangedAsync()
this.BestReceivedTip = result.Consumed;
this.UpdateBestSentHeader(this.BestReceivedTip);

int consumedCount = this.cachedHeaders.IndexOf(result.Consumed.Header) + 1;
int consumedCount = this.GetConsumedHeadersCount(this.cachedHeaders, result.Consumed.Header);

this.cachedHeaders.RemoveRange(0, consumedCount);
int cacheSize = this.cachedHeaders.Count;

Expand Down Expand Up @@ -379,7 +380,7 @@ protected virtual async Task ProcessHeadersAsync(INetworkPeer peer, List<BlockHe
if (result.Consumed.HashBlock != headers.Last().GetHash())
{
// Some headers were not consumed, add to cache.
int consumedCount = headers.IndexOf(result.Consumed.Header) + 1;
int consumedCount = this.GetConsumedHeadersCount(headers, result.Consumed.Header);
this.cachedHeaders.AddRange(headers.Skip(consumedCount));

this.logger.LogDebug("{0} out of {1} items were not consumed and added to cache.", headers.Count - consumedCount, headers.Count);
Expand Down Expand Up @@ -659,5 +660,32 @@ public override object Clone()
{
return new ConsensusManagerBehavior(this.chain, this.initialBlockDownloadState, this.consensusManager, this.peerBanning, this.loggerFactory);
}

internal int GetCachedItemsCount()
{
return this.cachedHeaders.Count;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm what about a lock?

Maybe for taking the count it snot a big deal.

}

/// <summary>
/// Gets the count of consumed headers in a <paramref name="headers"/> list, giving a <paramref name="consumedHeader"/> reference.
/// All items up to <paramref name="consumedHeader"/> are considered consumed.
/// </summary>
/// <param name="headers">List of headers to use to get the consumed count.</param>
/// <param name="consumedHeader">The consumed header reference.</param>
/// <returns>The number of consumed cached items.</returns>
private int GetConsumedHeadersCount(List<BlockHeader> headers, BlockHeader consumedHeader)
{
uint256 consumedHeaderHash = consumedHeader.GetHash();

for (int i = 0; i < headers.Count; i++)
{
if (headers[i].GetHash() == consumedHeaderHash)
{
return i + 1;
}
}

return 0;
}
}
}