-
Notifications
You must be signed in to change notification settings - Fork 180
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[BFT] Epoch Recovery integration test #6823
base: feature/efm-recovery
Are you sure you want to change the base?
Changes from all commits
97c4177
0b4410a
a2d9891
a4ee5a1
eea3846
cbed983
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -2,7 +2,6 @@ package cohort2 | |||||||
|
||||||||
import ( | ||||||||
"fmt" | ||||||||
|
||||||||
"strings" | ||||||||
"testing" | ||||||||
"time" | ||||||||
|
@@ -15,11 +14,13 @@ import ( | |||||||
sdk "github.com/onflow/flow-go-sdk" | ||||||||
|
||||||||
"github.com/onflow/flow-go/cmd/bootstrap/run" | ||||||||
"github.com/onflow/flow-go/cmd/util/cmd/common" | ||||||||
"github.com/onflow/flow-go/integration/tests/epochs" | ||||||||
"github.com/onflow/flow-go/integration/utils" | ||||||||
"github.com/onflow/flow-go/model/bootstrap" | ||||||||
"github.com/onflow/flow-go/model/flow" | ||||||||
"github.com/onflow/flow-go/model/flow/filter" | ||||||||
"github.com/onflow/flow-go/utils/unittest" | ||||||||
) | ||||||||
|
||||||||
func TestRecoverEpoch(t *testing.T) { | ||||||||
|
@@ -39,9 +40,9 @@ func (s *RecoverEpochSuite) SetupTest() { | |||||||
s.EpochLen = 150 | ||||||||
s.FinalizationSafetyThreshold = 20 | ||||||||
s.NumOfCollectionClusters = 1 | ||||||||
// we need to use 3 consensus nodes to be able to eject a single node from the consensus committee | ||||||||
// and still have a Random Beacon committee which meets the protocol.RandomBeaconSafetyThreshold | ||||||||
s.NumOfConsensusNodes = 3 | ||||||||
// we need to use 4 consensus nodes to be able to eject a single node and still have a super-majority and | ||||||||
// have a Random Beacon committee which meets the protocol.RandomBeaconSafetyThreshold. | ||||||||
s.NumOfConsensusNodes = 4 | ||||||||
|
||||||||
// run the generic setup, which starts up the network | ||||||||
s.BaseSuite.SetupTest() | ||||||||
|
@@ -290,3 +291,120 @@ func (s *RecoverEpochSuite) TestRecoverEpochNodeEjected() { | |||||||
|
||||||||
s.AssertInEpoch(s.Ctx, 1) | ||||||||
} | ||||||||
|
||||||||
// TestRecoverEpochEjectNodeDifferentDKG ensures that the recover epoch governance transaction flow works as expected, and a network that | ||||||||
// enters Epoch Fallback Mode can successfully recover. | ||||||||
// For this specific scenario, we are testing a scenario where the consensus committee and Random Beacon committee form a symmetric difference with | ||||||||
// cardinality 1. In other words, there is a node which is part of the consensus committee but not part of the Random Beacon committee and | ||||||||
// another node which is part of the Random Beacon committee but not part of the consensus committee. | ||||||||
// This test will do the following: | ||||||||
// 1. Triggers EFM by turning off the sole collection node before the end of the DKG forcing the DKG to fail. | ||||||||
// 2. Generates epoch recover transaction args using the epoch efm-recover-tx-args. | ||||||||
// 3. Eject consensus node by modifying the snapshot before generating the recover epoch transaction args. | ||||||||
// 4. Eject consensus node from the Random Beacon committee by modifying the snapshot before generating the recover epoch transaction args. | ||||||||
// 5. Submit recover epoch transaction. | ||||||||
// 6. Ensure expected EpochRecover event is emitted. | ||||||||
// 7. Ensure the network transitions into the recovery epoch and finalizes the first view of the recovery epoch. | ||||||||
func (s *RecoverEpochSuite) TestRecoverEpochEjectNodeDifferentDKG() { | ||||||||
// 1. Manually trigger EFM | ||||||||
|
||||||||
// pause the collection node to trigger EFM by failing DKG | ||||||||
ln := s.GetContainersByRole(flow.RoleCollection)[0] | ||||||||
require.NoError(s.T(), ln.Pause()) | ||||||||
s.AwaitFinalizedView(s.Ctx, s.GetDKGEndView(), 2*time.Minute, 500*time.Millisecond) | ||||||||
// start the paused collection node now that we are in EFM | ||||||||
require.NoError(s.T(), ln.Start()) | ||||||||
|
||||||||
// get final view from the latest snapshot | ||||||||
epoch1FinalView, err := s.Net.BootstrapSnapshot.Epochs().Current().FinalView() | ||||||||
require.NoError(s.T(), err) | ||||||||
|
||||||||
// Wait for at least the first view past the current epoch's original FinalView to be finalized. | ||||||||
s.TimedLogf("waiting for epoch transition (finalized view %d)", epoch1FinalView+1) | ||||||||
s.AwaitFinalizedView(s.Ctx, epoch1FinalView+1, 2*time.Minute, 500*time.Millisecond) | ||||||||
s.TimedLogf("observed finalized view %d", epoch1FinalView+1) | ||||||||
|
||||||||
// assert that we are in EFM | ||||||||
snapshot, err := s.Client.GetLatestProtocolSnapshot(s.Ctx) | ||||||||
require.NoError(s.T(), err) | ||||||||
epochPhase, err := snapshot.EpochPhase() | ||||||||
require.NoError(s.T(), err) | ||||||||
require.Equal(s.T(), flow.EpochPhaseFallback, epochPhase, "network must enter EFM by this point") | ||||||||
|
||||||||
// 2. Generate transaction arguments for epoch recover transaction. | ||||||||
collectionClusters := s.NumOfCollectionClusters | ||||||||
recoveryEpochCounter := uint64(1) | ||||||||
|
||||||||
// read internal node info from one of the consensus nodes | ||||||||
internalNodePrivInfoDir, nodeConfigJson := s.getNodeInfoDirs(flow.RoleConsensus) | ||||||||
internalNodes, err := common.ReadFullInternalNodeInfos(unittest.Logger(), internalNodePrivInfoDir, nodeConfigJson) | ||||||||
require.NoError(s.T(), err) | ||||||||
// 3. Eject consensus node by modifying the snapshot before generating the recover epoch transaction args. | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
// By ejecting a node from the consensus committee but keeping it in the Random Beacon committee, we ensure that the there is a node | ||||||||
// which is not part of the consensus committee but is part of the Random Beacon committee. | ||||||||
currentIdentityTable := snapshot.Encodable().SealingSegment.LatestProtocolStateEntry().EpochEntry.CurrentEpochIdentityTable | ||||||||
ejectedIdentity := currentIdentityTable.Filter(filter.HasRole[flow.Identity](flow.RoleConsensus))[0] | ||||||||
ejectedIdentity.EpochParticipationStatus = flow.EpochParticipationStatusEjected | ||||||||
|
||||||||
// 4. Modify DKG data by removing the last node of the consensus committee from DKG committee. We can do this | ||||||||
// by altering the DKG index map and DKG public key shares. | ||||||||
// This way we ensure that consensus committee has a node which is not part of the Random Beacon committee. | ||||||||
randomBeaconParticipants := currentIdentityTable.Filter(filter.HasRole[flow.Identity](flow.RoleConsensus)) | ||||||||
nConsensusNodes := len(randomBeaconParticipants) - 1 | ||||||||
|
||||||||
dkgIndexMap := make(flow.DKGIndexMap, nConsensusNodes) | ||||||||
for i, participant := range randomBeaconParticipants[:nConsensusNodes] { | ||||||||
dkgIndexMap[participant.NodeID] = i | ||||||||
} | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Putting this next to the corresponding index map construction logic |
||||||||
|
||||||||
epochProtocolState, err := snapshot.EpochProtocolState() | ||||||||
require.NoError(s.T(), err) | ||||||||
dkg, err := epochProtocolState.DKG() | ||||||||
require.NoError(s.T(), err) | ||||||||
|
||||||||
// At this point we have a node which is part of the consensus committee but not part of the Random Beacon committee and | ||||||||
// another node which is part of the Random Beacon committee but not part of the consensus committee. | ||||||||
txArgs, err := run.GenerateRecoverTxArgsWithDKG( | ||||||||
s.Log, | ||||||||
internalNodes, | ||||||||
collectionClusters, | ||||||||
recoveryEpochCounter, | ||||||||
flow.Localnet, | ||||||||
s.StakingAuctionLen, | ||||||||
s.EpochLen, | ||||||||
3000, | ||||||||
false, | ||||||||
dkgIndexMap, | ||||||||
dkg.KeyShares()[:nConsensusNodes], | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
dkg.GroupKey(), | ||||||||
snapshot, | ||||||||
) | ||||||||
require.NoError(s.T(), err) | ||||||||
|
||||||||
// 5. Submit recover epoch transaction to the network. | ||||||||
env := utils.LocalnetEnv() | ||||||||
result := s.recoverEpoch(env, txArgs) | ||||||||
require.NoError(s.T(), result.Error) | ||||||||
require.Equal(s.T(), result.Status, sdk.TransactionStatusSealed) | ||||||||
|
||||||||
// 6. Ensure expected EpochRecover event is emitted. | ||||||||
eventType := "" | ||||||||
for _, evt := range result.Events { | ||||||||
if strings.Contains(evt.Type, "FlowEpoch.EpochRecover") { | ||||||||
eventType = evt.Type | ||||||||
break | ||||||||
} | ||||||||
} | ||||||||
require.NotEmpty(s.T(), eventType, "expected FlowEpoch.EpochRecover event type") | ||||||||
events, err := s.Client.GetEventsForBlockIDs(s.Ctx, eventType, []sdk.Identifier{result.BlockID}) | ||||||||
require.NoError(s.T(), err) | ||||||||
require.Equal(s.T(), events[0].Events[0].Type, eventType) | ||||||||
|
||||||||
// 7. Ensure the network transitions into the recovery epoch and finalizes the first view of the recovery epoch. | ||||||||
startViewOfNextEpoch := uint64(txArgs[1].(cadence.UInt64)) | ||||||||
s.TimedLogf("waiting to transition into recovery epoch (finalized view %d)", startViewOfNextEpoch) | ||||||||
s.AwaitFinalizedView(s.Ctx, startViewOfNextEpoch, 2*time.Minute, 500*time.Millisecond) | ||||||||
s.TimedLogf("observed finalized first view of recovery epoch %d", startViewOfNextEpoch) | ||||||||
|
||||||||
s.AssertInEpoch(s.Ctx, 1) | ||||||||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.