diff --git a/cabal.project b/cabal.project index 76af3b5684..f8c5c57e4b 100644 --- a/cabal.project +++ b/cabal.project @@ -47,3 +47,13 @@ if(os(windows)) -- https://github.com/ulidtko/cabal-doctest/issues/85 constraints: Cabal < 3.13 + +source-repository-package + type: git + location: https://github.com/IntersectMBO/ouroboros-network + tag: bb0a7d0ff41e265a8ec47bc94377cb4d65e0b498 + --sha256: sha256-P7m+nsjtogNQsdpXQnaH1kWxYibEWa0UC6iNGg0+bH4= + subdir: + ouroboros-network + ouroboros-network-api + ouroboros-network-protocols diff --git a/ouroboros-consensus-diffusion/changelog.d/20240807_100458_alexander.esgen_milestone_1.md b/ouroboros-consensus-diffusion/changelog.d/20240807_100458_alexander.esgen_milestone_1.md new file mode 100644 index 0000000000..05f1db55a7 --- /dev/null +++ b/ouroboros-consensus-diffusion/changelog.d/20240807_100458_alexander.esgen_milestone_1.md @@ -0,0 +1,3 @@ +### Breaking + +- Adapted to Genesis-related changes in `ouroboros-consensus` ([#1179](https://github.com/IntersectMBO/ouroboros-consensus/pull/1179)). diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs index d86bc6a4ec..ba08a02408 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs @@ -1,24 +1,32 @@ +{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveTraversable #-} {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE NumericUnderscores #-} +{-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ScopedTypeVariables #-} module Ouroboros.Consensus.Node.Genesis ( -- * 'GenesisConfig' GenesisConfig (..) + , GenesisConfigFlags (..) , LoEAndGDDConfig (..) + , defaultGenesisConfigFlags , disableGenesisConfig , enableGenesisConfigDefault + , mkGenesisConfig -- * NodeKernel helpers , GenesisNodeKernelArgs (..) + , LoEAndGDDNodeKernelArgs (..) , mkGenesisNodeKernelArgs , setGetLoEFragment ) where import Control.Monad (join) +import Data.Maybe (fromMaybe) import Data.Traversable (for) +import GHC.Generics (Generic) import Ouroboros.Consensus.Block import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (CSJConfig (..), CSJEnabledConfig (..), @@ -34,57 +42,143 @@ import Ouroboros.Consensus.Util.Args import Ouroboros.Consensus.Util.IOLike import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import qualified Ouroboros.Network.AnchoredFragment as AF +import Ouroboros.Network.BlockFetch + (GenesisBlockFetchConfiguration (..)) -- | Whether to en-/disable the Limit on Eagerness and the Genesis Density -- Disconnector. data LoEAndGDDConfig a = LoEAndGDDEnabled !a | LoEAndGDDDisabled - deriving stock (Show, Functor, Foldable, Traversable) + deriving stock (Eq, Generic, Show, Functor, Foldable, Traversable) -- | Aggregating the various configs for Genesis-related subcomponents. -data GenesisConfig = GenesisConfig { - gcChainSyncLoPBucketConfig :: !ChainSyncLoPBucketConfig +-- +-- Usually, 'enableGenesisConfigDefault' or 'disableGenesisConfig' can be used. +-- See the haddocks of the types of the individual fields for details. +data GenesisConfig = GenesisConfig + { gcBlockFetchConfig :: !GenesisBlockFetchConfiguration + , gcChainSyncLoPBucketConfig :: !ChainSyncLoPBucketConfig , gcCSJConfig :: !CSJConfig - , gcLoEAndGDDConfig :: !(LoEAndGDDConfig ()) + , gcLoEAndGDDConfig :: !(LoEAndGDDConfig LoEAndGDDParams) , gcHistoricityCutoff :: !(Maybe HistoricityCutoff) + } deriving stock (Eq, Generic, Show) + +-- | Genesis configuration flags and low-level args, as parsed from config file or CLI +data GenesisConfigFlags = GenesisConfigFlags + { gcfEnableCSJ :: Bool + , gcfEnableLoEAndGDD :: Bool + , gcfEnableLoP :: Bool + , gcfBlockFetchGracePeriod :: Maybe Integer + , gcfBucketCapacity :: Maybe Integer + , gcfBucketRate :: Maybe Integer + , gcfCSJJumpSize :: Maybe Integer + , gcfGDDRateLimit :: Maybe DiffTime + } deriving stock (Eq, Generic, Show) + +defaultGenesisConfigFlags :: GenesisConfigFlags +defaultGenesisConfigFlags = GenesisConfigFlags + { gcfEnableCSJ = True + , gcfEnableLoEAndGDD = True + , gcfEnableLoP = True + , gcfBlockFetchGracePeriod = Nothing + , gcfBucketCapacity = Nothing + , gcfBucketRate = Nothing + , gcfCSJJumpSize = Nothing + , gcfGDDRateLimit = Nothing } --- TODO justification/derivation from other parameters enableGenesisConfigDefault :: GenesisConfig -enableGenesisConfigDefault = GenesisConfig { - gcChainSyncLoPBucketConfig = ChainSyncLoPBucketEnabled ChainSyncLoPBucketEnabledConfig { - csbcCapacity = 100_000 -- number of tokens - , csbcRate = 500 -- tokens per second leaking, 1/2ms - } - , gcCSJConfig = CSJEnabled CSJEnabledConfig { - csjcJumpSize = 3 * 2160 * 20 -- mainnet forecast range - } - , gcLoEAndGDDConfig = LoEAndGDDEnabled () - -- Duration in seconds of one Cardano mainnet Shelley stability window - -- (3k/f slots times one second per slot) plus one extra hour as a - -- safety margin. - , gcHistoricityCutoff = Just $ HistoricityCutoff $ 3 * 2160 * 20 + 3600 - } +enableGenesisConfigDefault = mkGenesisConfig $ Just defaultGenesisConfigFlags -- | Disable all Genesis components, yielding Praos behavior. disableGenesisConfig :: GenesisConfig -disableGenesisConfig = GenesisConfig { - gcChainSyncLoPBucketConfig = ChainSyncLoPBucketDisabled +disableGenesisConfig = mkGenesisConfig Nothing + +mkGenesisConfig :: Maybe GenesisConfigFlags -> GenesisConfig +mkGenesisConfig Nothing = -- disable Genesis + GenesisConfig + { gcBlockFetchConfig = GenesisBlockFetchConfiguration + { gbfcGracePeriod = 0 -- no grace period when Genesis is disabled + } + , gcChainSyncLoPBucketConfig = ChainSyncLoPBucketDisabled , gcCSJConfig = CSJDisabled , gcLoEAndGDDConfig = LoEAndGDDDisabled , gcHistoricityCutoff = Nothing } +mkGenesisConfig (Just GenesisConfigFlags{..}) = + GenesisConfig + { gcBlockFetchConfig = GenesisBlockFetchConfiguration + { gbfcGracePeriod + } + , gcChainSyncLoPBucketConfig = if gcfEnableLoP + then ChainSyncLoPBucketEnabled ChainSyncLoPBucketEnabledConfig + { csbcCapacity + , csbcRate + } + else ChainSyncLoPBucketDisabled + , gcCSJConfig = if gcfEnableCSJ + then CSJEnabled CSJEnabledConfig + { csjcJumpSize + } + else CSJDisabled + , gcLoEAndGDDConfig = if gcfEnableLoEAndGDD + then LoEAndGDDEnabled LoEAndGDDParams{lgpGDDRateLimit} + else LoEAndGDDDisabled + , -- Duration in seconds of one Cardano mainnet Shelley stability window + -- (3k/f slots times one second per slot) plus one extra hour as a + -- safety margin. + gcHistoricityCutoff = Just $ HistoricityCutoff $ 3 * 2160 * 20 + 3600 + } + where + -- The minimum amount of time during which the Genesis BlockFetch logic will + -- download blocks from a specific peer (even if it is not performing well + -- during that period). + defaultBlockFetchGracePeriod = 10 -- seconds + + -- LoP parameters. Empirically, it takes less than 1ms to validate a header, + -- so leaking one token per 2ms is conservative. The capacity of 100_000 + -- tokens corresponds to 200s, which is definitely enough to handle long GC + -- pauses; we could even make this more conservative. + defaultCapacity = 100_000 -- number of tokens + defaultRate = 500 -- tokens per second leaking, 1/2ms + + -- The larger Shelley forecast range (3 * 2160 * 20) works in more recent + -- ranges of slots, but causes syncing to block in Byron. A future + -- improvement would be to make this era-dynamic, such that we can use the + -- larger (and hence more efficient) larger CSJ jump size in Shelley-based + -- eras. + defaultCSJJumpSize = 2 * 2160 -- Byron forecast range + + -- Limiting the performance impact of the GDD. + defaultGDDRateLimit = 1.0 -- seconds + + gbfcGracePeriod = fromInteger $ fromMaybe defaultBlockFetchGracePeriod gcfBlockFetchGracePeriod + csbcCapacity = fromInteger $ fromMaybe defaultCapacity gcfBucketCapacity + csbcRate = fromInteger $ fromMaybe defaultRate gcfBucketRate + csjcJumpSize = fromInteger $ fromMaybe defaultCSJJumpSize gcfCSJJumpSize + lgpGDDRateLimit = fromMaybe defaultGDDRateLimit gcfGDDRateLimit + +newtype LoEAndGDDParams = LoEAndGDDParams + { -- | How often to evaluate GDD. 0 means as soon as possible. + -- Otherwise, no faster than once every T seconds, where T is the + -- value of the field. + lgpGDDRateLimit :: DiffTime + } deriving stock (Eq, Generic, Show) -- | Genesis-related arguments needed by the NodeKernel initialization logic. data GenesisNodeKernelArgs m blk = GenesisNodeKernelArgs { + gnkaLoEAndGDDArgs :: !(LoEAndGDDConfig (LoEAndGDDNodeKernelArgs m blk)) + } + +data LoEAndGDDNodeKernelArgs m blk = LoEAndGDDNodeKernelArgs { -- | A TVar containing an action that returns the 'ChainDB.GetLoEFragment' -- action. We use this extra indirection to update this action after we -- opened the ChainDB (which happens before we initialize the NodeKernel). -- After that, this TVar will not be modified again. - gnkaGetLoEFragment :: !(LoEAndGDDConfig (StrictTVar m (ChainDB.GetLoEFragment m blk))) + lgnkaLoEFragmentTVar :: !(StrictTVar m (ChainDB.GetLoEFragment m blk)) + , lgnkaGDDRateLimit :: DiffTime } - -- | Create the initial 'GenesisNodeKernelArgs" (with a temporary -- 'ChainDB.GetLoEFragment' that will be replaced via 'setGetLoEFragment') and a -- function to update the 'ChainDbArgs' accordingly. @@ -95,20 +189,24 @@ mkGenesisNodeKernelArgs :: , Complete ChainDbArgs m blk -> Complete ChainDbArgs m blk ) mkGenesisNodeKernelArgs gcfg = do - gnkaGetLoEFragment <- for (gcLoEAndGDDConfig gcfg) $ \() -> - newTVarIO $ pure $ + gnkaLoEAndGDDArgs <- for (gcLoEAndGDDConfig gcfg) $ \p -> do + loeFragmentTVar <- newTVarIO $ pure $ -- Use the most conservative LoE fragment until 'setGetLoEFragment' -- is called. ChainDB.LoEEnabled $ AF.Empty AF.AnchorGenesis - let updateChainDbArgs = case gnkaGetLoEFragment of + pure LoEAndGDDNodeKernelArgs + { lgnkaLoEFragmentTVar = loeFragmentTVar + , lgnkaGDDRateLimit = lgpGDDRateLimit p + } + let updateChainDbArgs = case gnkaLoEAndGDDArgs of LoEAndGDDDisabled -> id - LoEAndGDDEnabled varGetLoEFragment -> \cfg -> + LoEAndGDDEnabled lgnkArgs -> \cfg -> cfg { ChainDB.cdbsArgs = (ChainDB.cdbsArgs cfg) { ChainDB.cdbsLoE = getLoEFragment } } where - getLoEFragment = join $ readTVarIO varGetLoEFragment - pure (GenesisNodeKernelArgs {gnkaGetLoEFragment}, updateChainDbArgs) + getLoEFragment = join $ readTVarIO $ lgnkaLoEFragmentTVar lgnkArgs + pure (GenesisNodeKernelArgs{gnkaLoEAndGDDArgs}, updateChainDbArgs) -- | Set 'gnkaGetLoEFragment' to the actual logic for determining the current -- LoE fragment. @@ -124,9 +222,10 @@ setGetLoEFragment readGsmState readLoEFragment varGetLoEFragment = where getLoEFragment :: ChainDB.GetLoEFragment m blk getLoEFragment = atomically $ readGsmState >>= \case - -- When the Honest Availability Assumption cannot currently be guaranteed, we should not select - -- any blocks that would cause our immutable tip to advance, so we - -- return the most conservative LoE fragment. + -- When the Honest Availability Assumption cannot currently be + -- guaranteed, we should not select any blocks that would cause our + -- immutable tip to advance, so we return the most conservative LoE + -- fragment. GSM.PreSyncing -> pure $ ChainDB.LoEEnabled $ AF.Empty AF.AnchorGenesis -- When we are syncing, return the current LoE fragment. diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Tracers.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Tracers.hs index e56e7924f9..d1383c2559 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Tracers.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Tracers.hs @@ -32,14 +32,17 @@ import Ouroboros.Consensus.MiniProtocol.BlockFetch.Server (TraceBlockFetchServerEvent) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (TraceChainSyncClientEvent) +import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping as CSJumping import Ouroboros.Consensus.MiniProtocol.ChainSync.Server (TraceChainSyncServerEvent) import Ouroboros.Consensus.MiniProtocol.LocalTxSubmission.Server (TraceLocalTxSubmissionServerEvent (..)) import Ouroboros.Consensus.Node.GSM (TraceGsmEvent) import Ouroboros.Network.Block (Tip) -import Ouroboros.Network.BlockFetch (FetchDecision, - TraceFetchClientState, TraceLabelPeer) +import Ouroboros.Network.BlockFetch (TraceFetchClientState, + TraceLabelPeer) +import Ouroboros.Network.BlockFetch.Decision.Trace + (TraceDecisionEvent) import Ouroboros.Network.KeepAlive (TraceKeepAliveClient) import Ouroboros.Network.TxSubmission.Inbound (TraceTxSubmissionInbound) @@ -54,7 +57,7 @@ data Tracers' remotePeer localPeer blk f = Tracers { chainSyncClientTracer :: f (TraceLabelPeer remotePeer (TraceChainSyncClientEvent blk)) , chainSyncServerHeaderTracer :: f (TraceLabelPeer remotePeer (TraceChainSyncServerEvent blk)) , chainSyncServerBlockTracer :: f (TraceChainSyncServerEvent blk) - , blockFetchDecisionTracer :: f [TraceLabelPeer remotePeer (FetchDecision [Point (Header blk)])] + , blockFetchDecisionTracer :: f (TraceDecisionEvent remotePeer (Header blk)) , blockFetchClientTracer :: f (TraceLabelPeer remotePeer (TraceFetchClientState (Header blk))) , blockFetchServerTracer :: f (TraceLabelPeer remotePeer (TraceBlockFetchServerEvent blk)) , txInboundTracer :: f (TraceLabelPeer remotePeer (TraceTxSubmissionInbound (GenTxId blk) (GenTx blk))) @@ -69,6 +72,7 @@ data Tracers' remotePeer localPeer blk f = Tracers , consensusErrorTracer :: f SomeException , gsmTracer :: f (TraceGsmEvent (Tip blk)) , gddTracer :: f (TraceGDDEvent remotePeer blk) + , csjTracer :: f (CSJumping.TraceEvent remotePeer) } instance (forall a. Semigroup (f a)) @@ -92,6 +96,7 @@ instance (forall a. Semigroup (f a)) , consensusErrorTracer = f consensusErrorTracer , gsmTracer = f gsmTracer , gddTracer = f gddTracer + , csjTracer = f csjTracer } where f :: forall a. Semigroup a @@ -123,6 +128,7 @@ nullTracers = Tracers , consensusErrorTracer = nullTracer , gsmTracer = nullTracer , gddTracer = nullTracer + , csjTracer = nullTracer } showTracers :: ( Show blk @@ -157,6 +163,7 @@ showTracers tr = Tracers , consensusErrorTracer = showTracing tr , gsmTracer = showTracing tr , gddTracer = showTracing tr + , csjTracer = showTracing tr } {------------------------------------------------------------------------------- diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs index 224cba0d08..c65159737c 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs @@ -42,7 +42,6 @@ import Data.Function (on) import Data.Functor ((<&>)) import Data.Hashable (Hashable) import Data.List.NonEmpty (NonEmpty) -import Data.Map.Strict (Map) import Data.Maybe (isJust, mapMaybe) import Data.Proxy import qualified Data.Text as Text @@ -62,14 +61,16 @@ import Ouroboros.Consensus.Ledger.SupportsProtocol import Ouroboros.Consensus.Mempool import qualified Ouroboros.Consensus.MiniProtocol.BlockFetch.ClientInterface as BlockFetchClientInterface import Ouroboros.Consensus.MiniProtocol.ChainSync.Client - (ChainSyncClientHandle (..), ChainSyncState (..), - viewChainSyncState) + (ChainSyncClientHandle (..), + ChainSyncClientHandleCollection (..), ChainSyncState (..), + newChainSyncClientHandleCollection) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client.HistoricityCheck (HistoricityCheck) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client.InFutureCheck (SomeHeaderInFutureCheck) import Ouroboros.Consensus.Node.Genesis (GenesisNodeKernelArgs (..), - LoEAndGDDConfig (..), setGetLoEFragment) + LoEAndGDDConfig (..), LoEAndGDDNodeKernelArgs (..), + setGetLoEFragment) import Ouroboros.Consensus.Node.GSM (GsmNodeKernelArgs (..)) import qualified Ouroboros.Consensus.Node.GSM as GSM import Ouroboros.Consensus.Node.Run @@ -94,6 +95,7 @@ import Ouroboros.Network.AnchoredFragment (AnchoredFragment, import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.Block (castTip, tipFromHeader) import Ouroboros.Network.BlockFetch +import Ouroboros.Network.ConsensusMode (ConsensusMode (..)) import Ouroboros.Network.Diffusion (PublicPeerSelectionState) import Ouroboros.Network.NodeToNode (ConnectionId, MiniProtocolParameters (..)) @@ -143,7 +145,7 @@ data NodeKernel m addrNTN addrNTC blk = NodeKernel { , getGsmState :: STM m GSM.GsmState -- | The kill handle and exposed state for each ChainSync client. - , getChainSyncHandles :: StrictTVar m (Map (ConnectionId addrNTN) (ChainSyncClientHandle m blk)) + , getChainSyncHandles :: ChainSyncClientHandleCollection (ConnectionId addrNTN) m blk -- | Read the current peer sharing registry, used for interacting with -- the PeerSharing protocol @@ -252,7 +254,7 @@ initNodeKernel args@NodeKernelArgs { registry, cfg, tracers <&> \wd (_headers, lst) -> GSM.getDurationUntilTooOld wd (getTipSlot lst) , GSM.equivalent = (==) `on` (AF.headPoint . fst) - , GSM.getChainSyncStates = fmap cschState <$> readTVar varChainSyncHandles + , GSM.getChainSyncStates = fmap cschState <$> cschcMap varChainSyncHandles , GSM.getCurrentSelection = do headers <- ChainDB.getCurrentChain chainDB extLedgerState <- ChainDB.getCurrentLedger chainDB @@ -264,7 +266,7 @@ initNodeKernel args@NodeKernelArgs { registry, cfg, tracers , GSM.writeGsmState = \gsmState -> atomicallyWithMonotonicTime $ \time -> do writeTVar varGsmState gsmState - handles <- readTVar varChainSyncHandles + handles <- cschcMap varChainSyncHandles traverse_ (($ time) . ($ gsmState) . cschOnGsmStateChanged) handles , GSM.isHaaSatisfied = do readTVar varOutboundConnectionsState <&> \case @@ -283,23 +285,24 @@ initNodeKernel args@NodeKernelArgs { registry, cfg, tracers ps_POLICY_PEER_SHARE_STICKY_TIME ps_POLICY_PEER_SHARE_MAX_PEERS - case gnkaGetLoEFragment genesisArgs of - LoEAndGDDDisabled -> pure () - LoEAndGDDEnabled varGetLoEFragment -> do + case gnkaLoEAndGDDArgs genesisArgs of + LoEAndGDDDisabled -> pure () + LoEAndGDDEnabled lgArgs -> do varLoEFragment <- newTVarIO $ AF.Empty AF.AnchorGenesis setGetLoEFragment (readTVar varGsmState) (readTVar varLoEFragment) - varGetLoEFragment + (lgnkaLoEFragmentTVar lgArgs) void $ forkLinkedWatcher registry "NodeKernel.GDD" $ gddWatcher cfg (gddTracer tracers) chainDB + (lgnkaGDDRateLimit lgArgs) (readTVar varGsmState) -- TODO GDD should only consider (big) ledger peers - (readTVar varChainSyncHandles) + (cschcMap varChainSyncHandles) varLoEFragment void $ forkLinkedThread registry "NodeKernel.blockForging" $ @@ -356,7 +359,7 @@ data InternalState m addrNTN addrNTC blk = IS { , chainDB :: ChainDB m blk , blockFetchInterface :: BlockFetchConsensusInterface (ConnectionId addrNTN) (Header blk) blk m , fetchClientRegistry :: FetchClientRegistry (ConnectionId addrNTN) (Header blk) blk m - , varChainSyncHandles :: StrictTVar m (Map (ConnectionId addrNTN) (ChainSyncClientHandle m blk)) + , varChainSyncHandles :: ChainSyncClientHandleCollection (ConnectionId addrNTN) m blk , varGsmState :: StrictTVar m GSM.GsmState , mempool :: Mempool m blk , peerSharingRegistry :: PeerSharingRegistry addrNTN m @@ -376,6 +379,7 @@ initInternalState NodeKernelArgs { tracers, chainDB, registry, cfg , mempoolCapacityOverride , gsmArgs, getUseBootstrapPeers , getDiffusionPipeliningSupport + , genesisArgs } = do varGsmState <- do let GsmNodeKernelArgs {..} = gsmArgs @@ -385,7 +389,7 @@ initInternalState NodeKernelArgs { tracers, chainDB, registry, cfg gsmMarkerFileView newTVarIO gsmState - varChainSyncHandles <- newTVarIO mempty + varChainSyncHandles <- atomically newChainSyncClientHandleCollection mempool <- openMempool registry (chainDBLedgerInterface chainDB) (configLedger cfg) @@ -394,20 +398,19 @@ initInternalState NodeKernelArgs { tracers, chainDB, registry, cfg fetchClientRegistry <- newFetchClientRegistry - let getCandidates :: STM m (Map (ConnectionId addrNTN) (AnchoredFragment (Header blk))) - getCandidates = viewChainSyncState varChainSyncHandles csCandidate - slotForgeTimeOracle <- BlockFetchClientInterface.initSlotForgeTimeOracle cfg chainDB let readFetchMode = BlockFetchClientInterface.readFetchModeDefault + (toConsensusMode $ gnkaLoEAndGDDArgs genesisArgs) btime (ChainDB.getCurrentChain chainDB) getUseBootstrapPeers (GSM.gsmStateToLedgerJudgement <$> readTVar varGsmState) blockFetchInterface :: BlockFetchConsensusInterface (ConnectionId addrNTN) (Header blk) blk m blockFetchInterface = BlockFetchClientInterface.mkBlockFetchConsensusInterface + (csjTracer tracers) (configBlock cfg) (BlockFetchClientInterface.defaultChainDbView chainDB) - getCandidates + varChainSyncHandles blockFetchSize slotForgeTimeOracle readFetchMode @@ -416,6 +419,11 @@ initInternalState NodeKernelArgs { tracers, chainDB, registry, cfg peerSharingRegistry <- newPeerSharingRegistry return IS {..} + where + toConsensusMode :: forall a. LoEAndGDDConfig a -> ConsensusMode + toConsensusMode = \case + LoEAndGDDDisabled -> PraosMode + LoEAndGDDEnabled _ -> GenesisMode forkBlockForging :: forall m addrNTN addrNTC blk. diff --git a/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs b/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs index eb58519102..d7dd9e75cc 100644 --- a/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs +++ b/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs @@ -1016,10 +1016,11 @@ runThreadNetwork systemTime ThreadNetworkArgs bfcMaxConcurrencyBulkSync = 1 , bfcMaxConcurrencyDeadline = 2 , bfcMaxRequestsInflight = 10 - , bfcDecisionLoopInterval = 0.0 -- Mock testsuite can use sub-second slot - -- interval which doesn't play nice with - -- blockfetch descision interval. + , bfcDecisionLoopIntervalPraos = 0.0 -- Mock testsuite can use sub-second slot + , bfcDecisionLoopIntervalGenesis = 0.0 -- interval which doesn't play nice with + -- blockfetch descision interval. , bfcSalt = 0 + , bfcGenesisBFConfig = gcBlockFetchConfig enableGenesisConfigDefault } , gsmArgs = GSM.GsmNodeKernelArgs { gsmAntiThunderingHerd = kaRng @@ -1034,7 +1035,7 @@ runThreadNetwork systemTime ThreadNetworkArgs , getUseBootstrapPeers = pure DontUseBootstrapPeers , publicPeerSelectionStateVar , genesisArgs = GenesisNodeKernelArgs { - gnkaGetLoEFragment = LoEAndGDDDisabled + gnkaLoEAndGDDArgs = LoEAndGDDDisabled } , getDiffusionPipeliningSupport = DiffusionPipeliningOn } diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup.hs index 95b2169865..112e147d5d 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup.hs @@ -19,7 +19,7 @@ import Control.Monad.IOSim (IOSim, runSimStrictShutdown) import Control.Tracer (debugTracer, traceWith) import Data.Maybe (mapMaybe) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client - (ChainSyncClientException (DensityTooLow, EmptyBucket)) + (ChainSyncClientException (..)) import Ouroboros.Consensus.Util.Condense import Ouroboros.Consensus.Util.IOLike (Exception, fromException) import Ouroboros.Network.Driver.Limits @@ -126,6 +126,7 @@ forAllGenesisTest generator schedulerConfig shrinker mkProperty = | Just DensityTooLow <- e = true | Just (ExceededTimeLimit _) <- e = true | Just AsyncCancelled <- e = true + | Just CandidateTooSparse{} <- e = true | otherwise = counterexample ("Encountered unexpected exception: " ++ show exn) False diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/GenChains.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/GenChains.hs index 28cbe2430b..e6ec79b721 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/GenChains.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/GenChains.hs @@ -2,7 +2,6 @@ {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE ExistentialQuantification #-} {-# LANGUAGE NamedFieldPuns #-} -{-# LANGUAGE NumericUnderscores #-} {-# LANGUAGE ScopedTypeVariables #-} module Test.Consensus.Genesis.Setup.GenChains ( @@ -11,13 +10,12 @@ module Test.Consensus.Genesis.Setup.GenChains ( , genChainsWithExtraHonestPeers ) where -import Cardano.Slotting.Time (SlotLength, getSlotLength, - slotLengthFromSec) +import Cardano.Slotting.Time (slotLengthFromSec) import Control.Monad (replicateM) import qualified Control.Monad.Except as Exn import Data.List as List (foldl') import Data.Proxy (Proxy (..)) -import Data.Time.Clock (DiffTime, secondsToDiffTime) +import Data.Time.Clock (DiffTime) import qualified Data.Vector.Unboxed as Vector import Data.Word (Word8) import Ouroboros.Consensus.Block.Abstract hiding (Header) @@ -111,7 +109,7 @@ genChains = genChainsWithExtraHonestPeers (pure 0) -- However, in the future it could also be used to generate "short forks" near the tip of the trunk. genChainsWithExtraHonestPeers :: QC.Gen Word -> QC.Gen Word -> QC.Gen (GenesisTest TestBlock ()) genChainsWithExtraHonestPeers genNumExtraHonest genNumForks = do - (asc, honestRecipe, someHonestChainSchema) <- genHonestChainSchema + (_, honestRecipe, someHonestChainSchema) <- genHonestChainSchema H.SomeHonestChainSchema _ _ honestChainSchema <- pure someHonestChainSchema let ChainSchema _ vH = honestChainSchema @@ -129,13 +127,14 @@ genChainsWithExtraHonestPeers genNumExtraHonest genNumForks = do gtGenesisWindow = GenesisWindow (fromIntegral scg), gtForecastRange = ForecastRange (fromIntegral scg), -- REVIEW: Do we want to generate those randomly? gtDelay = delta, - gtSlotLength, - gtChainSyncTimeouts = chainSyncTimeouts gtSlotLength asc, + gtSlotLength = slotLengthFromSec 20, + gtChainSyncTimeouts = chainSyncTimeouts, gtBlockFetchTimeouts = blockFetchTimeouts, - gtLoPBucketParams = LoPBucketParams { lbpCapacity = 100_000, lbpRate = 1_000 }, - -- ^ REVIEW: Do we want to generate those randomly? For now, the chosen - -- values carry no special meaning. Someone needs to think about what values - -- would make for interesting tests. + gtLoPBucketParams = LoPBucketParams { lbpCapacity = 50, lbpRate = 10 }, + -- These values give little enough leeway (5s) so that some adversaries get disconnected + -- by the LoP during the stalling attack test. Maybe we should design a way to override + -- those values for individual tests? + -- Also, we might want to generate these randomly. gtCSJParams = CSJParams $ fromIntegral scg, gtBlockTree = List.foldl' (flip BT.addBranch') (BT.mkTrunk goodChain) $ zipWith (genAdversarialFragment goodBlocks) [1..] alternativeChainSchemas, gtExtraHonestPeers, @@ -143,8 +142,6 @@ genChainsWithExtraHonestPeers genNumExtraHonest genNumForks = do } where - gtSlotLength = slotLengthFromSec 20 - genAdversarialFragment :: [TestBlock] -> Int -> (Int, [S]) -> AnchoredFragment TestBlock genAdversarialFragment goodBlocks forkNo (prefixCount, slotsA) = mkTestFragment (mkTestBlocks prefix slotsA forkNo) @@ -169,11 +166,8 @@ genChainsWithExtraHonestPeers genNumExtraHonest genNumForks = do incSlot :: SlotNo -> TestBlock -> TestBlock incSlot n b = b { tbSlot = tbSlot b + n } -chainSyncTimeouts :: - SlotLength -> - Asc -> - ChainSyncTimeout -chainSyncTimeouts t f = +chainSyncTimeouts :: ChainSyncTimeout +chainSyncTimeouts = ChainSyncTimeout { canAwaitTimeout, intersectTimeout, @@ -186,21 +180,22 @@ chainSyncTimeouts t f = intersectTimeout :: Maybe DiffTime intersectTimeout = shortWait idleTimeout :: Maybe DiffTime - idleTimeout = Just 3673 -- taken from Ouroboros.Consensus.Node.stdChainSyncTimeout - -- | The following timeout is derived from the average length of a streak of - -- empty slots. If the probability of the election of a leader is @f@ and - -- @Y@ is a probability, then a streak of empty slots will be shorter than - -- @log (1 - Y) / log (1 - f)@ with probability @Y@. Main net nodes pick a - -- random value for @Y@ between 99.9% and 99.999%. For our use case, we - -- choose the tightest bound of 99.9%. + -- | The default from 'Ouroboros.Consensus.Node.stdChainSyncTimeout' is + -- 3673s, which is virtually infinite, so let us make it actually infinite + -- for our test environment. + idleTimeout = Nothing + -- | The 'mustReplyTimeout' must be disabled in our context, because the + -- chains are finite, and therefore an honest peer can only serve it all, + -- then send 'MsgAwaitReply' (therefore entering 'StMustReply'), and then + -- stall forever, and it must not be killed for it. + -- + -- Note that this allows the adversaries to stall us forever in that same + -- situation. However, that peer is only allowed to send 'MsgAwaitReply' + -- when they have served their tip, which leaves them fully vulnerable to + -- the Genesis Density Disconnection (GDD) logic. A bug related to this + -- disabled timeout is in fact either a bug in the GDD or in the tests. mustReplyTimeout :: Maybe DiffTime - mustReplyTimeout = - Just $ - secondsToDiffTime $ - round $ - realToFrac (getSlotLength t) - * log (1 - 0.999) - / log (1 - ascVal f) + mustReplyTimeout = Nothing blockFetchTimeouts :: BlockFetchTimeout blockFetchTimeouts = diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs index 3dd65b6d55..1610e05399 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs @@ -6,12 +6,15 @@ module Test.Consensus.Genesis.Tests.CSJ (tests) where import Data.List (nub) import qualified Data.Map.Strict as Map import Data.Maybe (mapMaybe) -import Ouroboros.Consensus.Block (Header, blockSlot, succWithOrigin) +import Ouroboros.Consensus.Block (Header, blockSlot, succWithOrigin, + unSlotNo) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (TraceChainSyncClientEvent (..)) import Ouroboros.Consensus.Util.Condense (PaddingDirection (..), condenseListWithPadding) import qualified Ouroboros.Network.AnchoredFragment as AF +import Ouroboros.Network.Protocol.ChainSync.Codec + (ChainSyncTimeout (mustReplyTimeout), idleTimeout) import Test.Consensus.BlockTree (BlockTree (..)) import Test.Consensus.Genesis.Setup import Test.Consensus.Genesis.Tests.Uniform (genUniformSchedulePoints) @@ -28,10 +31,12 @@ import Test.Tasty.QuickCheck import Test.Util.Orphans.IOLike () import Test.Util.PartialAccessors import Test.Util.TestBlock (TestBlock) -import Test.Util.TestEnv (adjustQuickCheckMaxSize) +import Test.Util.TestEnv (adjustQuickCheckMaxSize, + adjustQuickCheckTests) tests :: TestTree tests = + adjustQuickCheckTests (* 10) $ adjustQuickCheckMaxSize (`div` 5) $ testGroup "CSJ" @@ -49,6 +54,7 @@ tests = -- | A flag to indicate if properties are tested with adversarial peers data WithAdversariesFlag = NoAdversaries | WithAdversaries + deriving Eq -- | A flag to indicate if properties are tested using the same schedule for the -- honest peers, or if each peer should used its own schedule. @@ -81,7 +87,7 @@ prop_CSJ adversariesFlag numHonestSchedules = do NoAdversaries -> pure 0 WithAdversaries -> choose (2, 4) forAllGenesisTest - ( case numHonestSchedules of + ( disableBoringTimeouts <$> case numHonestSchedules of OneScheduleForAllPeers -> genChains genForks `enrichedWith` genDuplicatedHonestSchedule @@ -93,6 +99,13 @@ prop_CSJ adversariesFlag numHonestSchedules = do { scEnableCSJ = True , scEnableLoE = True , scEnableLoP = True + , scEnableChainSelStarvation = adversariesFlag == NoAdversaries + -- ^ NOTE: When there are adversaries and the ChainSel + -- starvation detection of BlockFetch is enabled, then our property does + -- not actually hold, because peer simulator-based tests have virtually + -- infinite CPU, and therefore ChainSel gets starved at every tick, which + -- makes us cycle the dynamos, which can lead to some extra headers being + -- downloaded. } ) shrinkPeerSchedules @@ -111,8 +124,16 @@ prop_CSJ adversariesFlag numHonestSchedules = do _ -> Nothing ) svTrace + -- We receive headers at most once from honest peer. The only + -- exception is when an honest peer gets to be the objector, until an + -- adversary dies, and then the dynamo. In that specific case, we + -- might re-download jumpSize blocks. TODO: If we ever choose to + -- promote objectors to dynamo to reuse their state, then we could + -- make this bound tighter. receivedHeadersAtMostOnceFromHonestPeers = - length (nub $ snd <$> headerHonestDownloadEvents) == length headerHonestDownloadEvents + length headerHonestDownloadEvents <= + length (nub $ snd <$> headerHonestDownloadEvents) + + (fromIntegral $ unSlotNo $ csjpJumpSize $ gtCSJParams gt) in tabulate "" [ if headerHonestDownloadEvents == [] @@ -152,3 +173,12 @@ prop_CSJ adversariesFlag numHonestSchedules = do in -- Sanity check: add @1 +@ after @>@ and watch the World burn. hdrSlot + jumpSize >= succWithOrigin tipSlot + + disableBoringTimeouts gt = + gt + { gtChainSyncTimeouts = + (gtChainSyncTimeouts gt) + { mustReplyTimeout = Nothing, + idleTimeout = Nothing + } + } diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs index e08b57e1f7..bb87438891 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs @@ -31,8 +31,7 @@ import Ouroboros.Consensus.Config.SecurityParam import Ouroboros.Consensus.Genesis.Governor (DensityBounds, densityDisconnect, sharedCandidatePrefix) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client - (ChainSyncClientException (DensityTooLow), - ChainSyncState (..)) + (ChainSyncClientException (..), ChainSyncState (..)) import Ouroboros.Consensus.Util.Condense (condense) import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import qualified Ouroboros.Network.AnchoredFragment as AF @@ -68,7 +67,7 @@ import Test.Util.TestEnv (adjustQuickCheckMaxSize, tests :: TestTree tests = - adjustQuickCheckTests (* 4) $ + adjustQuickCheckTests (* 10) $ adjustQuickCheckMaxSize (`div` 5) $ testGroup "gdd" [ testProperty "basic" prop_densityDisconnectStatic, @@ -474,9 +473,10 @@ prop_densityDisconnectTriggersChainSel = let othersCount = Map.size (adversarialPeers $ psSchedule gtSchedule) exnCorrect = case exceptionsByComponent ChainSyncClient stateView of - [fromException -> Just DensityTooLow] -> True - [] | othersCount == 0 -> True - _ -> False + [fromException -> Just DensityTooLow] -> True + [fromException -> Just CandidateTooSparse{}] -> True + [] | othersCount == 0 -> True + _ -> False tipPointCorrect = Just (getTrunkTip gtBlockTree) == svTipBlock in counterexample "Unexpected exceptions" exnCorrect .&&. @@ -499,7 +499,8 @@ prop_densityDisconnectTriggersChainSel = (AF.Empty _) -> Origin (_ AF.:> tipBlock) -> At tipBlock advTip = getOnlyBranchTip tree - in mkPointSchedule $ peers' + in PointSchedule { + psSchedule = peers' -- Eagerly serve the honest tree, but after the adversary has -- advertised its chain up to the intersection. [[(Time 0, scheduleTipPoint trunkTip), @@ -514,4 +515,7 @@ prop_densityDisconnectTriggersChainSel = (Time 0, ScheduleBlockPoint intersect), (Time 1, scheduleHeaderPoint advTip), (Time 1, scheduleBlockPoint advTip) - ]] + ]], + psStartOrder = [], + psMinEndTime = Time 0 + } diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs index 9a52188592..8fb3957b9a 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs @@ -26,21 +26,30 @@ import Test.Tasty import Test.Tasty.QuickCheck import Test.Util.Orphans.IOLike () import Test.Util.PartialAccessors -import Test.Util.TestEnv (adjustQuickCheckTests) +import Test.Util.TestEnv (adjustQuickCheckMaxSize, + adjustQuickCheckTests) tests :: TestTree tests = + adjustQuickCheckTests (* 10) $ testGroup "LoE" [ - adjustQuickCheckTests (`div` 5) $ + adjustQuickCheckMaxSize (`div` 5) $ testProperty "adversary does not hit timeouts" (prop_adversaryHitsTimeouts False), - adjustQuickCheckTests (`div` 5) $ + adjustQuickCheckMaxSize (`div` 5) $ testProperty "adversary hits timeouts" (prop_adversaryHitsTimeouts True) ] -- | Tests that the selection advances in presence of the LoE when a peer is --- killed by something that is not LoE-aware, eg. the timeouts. +-- killed by something that is not LoE-aware, eg. the timeouts. This test +-- features an honest peer behaving normally and an adversarial peer behaving +-- such that it will get killed by timeouts. We check that, after the adversary +-- gets disconnected, the LoE gets updated to stop taking it into account. There +-- are two variants of the test: one with timeouts enabled, and one without. In +-- the case where timeouts are disabled, we check that we do in fact remain +-- stuck at the intersection between trunk and other chain. +-- -- NOTE: Same as 'LoP.prop_delayAttack' with timeouts instead of LoP. prop_adversaryHitsTimeouts :: Bool -> Property prop_adversaryHitsTimeouts timeoutsEnabled = @@ -115,4 +124,4 @@ prop_adversaryHitsTimeouts timeoutsEnabled = ] -- We want to wait more than the short wait timeout psMinEndTime = Time 11 - in PointSchedule {psSchedule, psMinEndTime} + in PointSchedule {psSchedule, psStartOrder = [], psMinEndTime} diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs index f3f4a7a6fa..56049d2a1e 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs @@ -22,7 +22,8 @@ import Test.Consensus.PeerSimulator.Run (SchedulerConfig (..), defaultSchedulerConfig) import Test.Consensus.PeerSimulator.StateView import Test.Consensus.PointSchedule -import Test.Consensus.PointSchedule.Peers (peers', peersOnlyHonest) +import Test.Consensus.PointSchedule.Peers (peers', peersOnlyAdversary, + peersOnlyHonest) import Test.Consensus.PointSchedule.Shrinking (shrinkPeerSchedules) import Test.Consensus.PointSchedule.SinglePeer (scheduleBlockPoint, scheduleHeaderPoint, scheduleTipPoint) @@ -30,10 +31,12 @@ import Test.Tasty import Test.Tasty.QuickCheck import Test.Util.Orphans.IOLike () import Test.Util.PartialAccessors -import Test.Util.TestEnv (adjustQuickCheckTests) +import Test.Util.TestEnv (adjustQuickCheckMaxSize, + adjustQuickCheckTests) tests :: TestTree tests = + adjustQuickCheckTests (* 10) $ testGroup "LoP" [ -- \| NOTE: Running the test that must _not_ timeout (@prop_smoke False@) takes @@ -41,19 +44,28 @@ tests = -- does all the computation (serving the headers, validating them, serving the -- block, validating them) while the former does nothing, because it timeouts -- before reaching the last tick of the point schedule. - adjustQuickCheckTests (`div` 10) $ + adjustQuickCheckMaxSize (`div` 5) $ testProperty "wait just enough" (prop_wait False), testProperty "wait too much" (prop_wait True), + adjustQuickCheckMaxSize (`div` 5) $ testProperty "wait behind forecast horizon" prop_waitBehindForecastHorizon, - adjustQuickCheckTests (`div` 5) $ + adjustQuickCheckMaxSize (`div` 5) $ testProperty "serve just fast enough" (prop_serve False), + adjustQuickCheckMaxSize (`div` 5) $ testProperty "serve too slow" (prop_serve True), - adjustQuickCheckTests (`div` 5) $ + adjustQuickCheckMaxSize (`div` 5) $ testProperty "delaying attack succeeds without LoP" (prop_delayAttack False), - adjustQuickCheckTests (`div` 5) $ + adjustQuickCheckMaxSize (`div` 5) $ testProperty "delaying attack fails with LoP" (prop_delayAttack True) ] +-- | Simple test in which we connect to only one peer, who advertises the tip of +-- the block tree trunk and then does nothing. If the given boolean, +-- @mustTimeout@, if @True@, then we wait just long enough for the LoP bucket to +-- empty; we expect to observe an 'EmptyBucket' exception in the ChainSync +-- client. If @mustTimeout@ is @False@, then we wait not quite as long, so the +-- LoP bucket should not be empty at the end of the test and we should observe +-- no exception in the ChainSync client. prop_wait :: Bool -> Property prop_wait mustTimeout = forAllGenesisTest @@ -78,10 +90,20 @@ prop_wait mustTimeout = dullSchedule timeout (_ AF.:> tipBlock) = let offset :: DiffTime = if mustTimeout then 1 else -1 in PointSchedule - { psSchedule = peersOnlyHonest [(Time 0, scheduleTipPoint tipBlock)] + { psSchedule = + (if mustTimeout then peersOnlyAdversary else peersOnlyHonest) + [(Time 0, scheduleTipPoint tipBlock)] + , psStartOrder = [] , psMinEndTime = Time $ timeout + offset } +-- | Simple test in which we connect to only one peer, who advertises the tip of +-- the block tree trunk, serves all of its headers, and then does nothing. +-- Because the peer does not send its blocks, then the ChainSync client will end +-- up stuck, waiting behind the forecast horizon. We expect that the LoP will +-- then be disabled and that, therefore, one could wait forever in this state. +-- We disable the timeouts and check that, indeed, the ChainSync client observes +-- no exception. prop_waitBehindForecastHorizon :: Property prop_waitBehindForecastHorizon = forAllGenesisTest @@ -108,6 +130,7 @@ prop_waitBehindForecastHorizon = [ (Time 0, scheduleTipPoint tipBlock) , (Time 0, scheduleHeaderPoint tipBlock) ] + , psStartOrder = [] , psMinEndTime = Time 11 } @@ -166,13 +189,18 @@ prop_serve mustTimeout = makeSchedule :: (HasHeader blk) => AnchoredFragment blk -> PointSchedule blk makeSchedule (AF.Empty _) = error "fragment must have at least one block" makeSchedule fragment@(_ AF.:> tipBlock) = - mkPointSchedule $ peersOnlyHonest $ + PointSchedule { + psSchedule = + (if mustTimeout then peersOnlyAdversary else peersOnlyHonest) $ (Time 0, scheduleTipPoint tipBlock) : ( flip concatMap (zip [1 ..] (AF.toOldestFirst fragment)) $ \(i, block) -> [ (Time (secondsRationalToDiffTime (i * timeBetweenBlocks)), scheduleHeaderPoint block), (Time (secondsRationalToDiffTime (i * timeBetweenBlocks)), scheduleBlockPoint block) ] - ) + ), + psStartOrder = [], + psMinEndTime = Time 0 + } -- NOTE: Same as 'LoE.prop_adversaryHitsTimeouts' with LoP instead of timeouts. prop_delayAttack :: Bool -> Property @@ -249,4 +277,4 @@ prop_delayAttack lopEnabled = ] -- Wait for LoP bucket to empty psMinEndTime = Time 11 - in PointSchedule {psSchedule, psMinEndTime} + in PointSchedule {psSchedule, psStartOrder = [], psMinEndTime} diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LongRangeAttack.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LongRangeAttack.hs index 135dd6892e..31ba9c078f 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LongRangeAttack.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LongRangeAttack.hs @@ -34,6 +34,12 @@ tests = testProperty "one adversary" prop_longRangeAttack ] +-- | This test case features a long-range attack with one adversary. The honest +-- peer serves the block tree trunk, while the adversary serves its own chain, +-- forking off the trunk by at least @k@ blocks, but less good than the trunk. +-- The adversary serves the chain more rapidly than the honest peer. We check at +-- the end that the selection is honest. This property does not hold with Praos, +-- but should hold with Genesis. prop_longRangeAttack :: Property prop_longRangeAttack = -- NOTE: `shrinkPeerSchedules` only makes sense for tests that expect the diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs index 81b7a24bc0..8370ba9c44 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs @@ -18,11 +18,12 @@ module Test.Consensus.Genesis.Tests.Uniform ( import Cardano.Slotting.Slot (SlotNo (SlotNo), WithOrigin (..)) import Control.Monad (replicateM) -import Control.Monad.Class.MonadTime.SI (Time, addTime) -import Data.List (intercalate, sort) +import Control.Monad.Class.MonadTime.SI (Time (..), addTime) +import Data.List (intercalate, sort, uncons) import qualified Data.List.NonEmpty as NE import qualified Data.Map.Strict as Map import Data.Maybe (fromMaybe, mapMaybe) +import qualified Data.Set as Set import Data.Word (Word64) import GHC.Stack (HasCallStack) import Ouroboros.Consensus.Block.Abstract (WithOrigin (NotOrigin)) @@ -40,7 +41,8 @@ import Test.Consensus.PeerSimulator.Run (SchedulerConfig (..), defaultSchedulerConfig) import Test.Consensus.PeerSimulator.StateView import Test.Consensus.PointSchedule -import Test.Consensus.PointSchedule.Peers (Peers (..), isHonestPeerId) +import Test.Consensus.PointSchedule.Peers (Peers (..), getPeerIds, + isHonestPeerId, peers') import Test.Consensus.PointSchedule.Shrinking (shrinkByRemovingAdversaries, shrinkPeerSchedules) import Test.Consensus.PointSchedule.SinglePeer @@ -72,9 +74,15 @@ tests = -- because this test writes the immutable chain to disk and `instance Binary TestBlock` -- chokes on long chains. adjustQuickCheckMaxSize (const 10) $ - testProperty "the node is shut down and restarted after some time" prop_downtime + testProperty "the node is shut down and restarted after some time" prop_downtime, + testProperty "block fetch leashing attack" prop_blockFetchLeashingAttack ] +-- | The conjunction of +-- +-- * no honest peer has been disconnected, +-- * the immutable tip is on the best chain, and +-- * the immutable tip is no older than s + d + 1 slots theProperty :: GenesisTestFull TestBlock -> StateView TestBlock -> @@ -89,8 +97,8 @@ theProperty genesisTest stateView@StateView{svSelectedChain} = -- to the governor that the density is too low. longerThanGenesisWindow ==> conjoin [ - counterexample "An honest peer was disconnected" (not $ any isHonestPeerId disconnected), - counterexample ("The immutable tip is not honest: " ++ show immutableTip) $ + counterexample "Honest peers shouldn't be disconnected" (not $ any isHonestPeerId disconnected), + counterexample ("The immutable tip should be honest: " ++ show immutableTip) $ property (isHonest immutableTipHash), immutableTipIsRecent ] @@ -98,7 +106,7 @@ theProperty genesisTest stateView@StateView{svSelectedChain} = advCount = Map.size (adversarialPeers (psSchedule $ gtSchedule genesisTest)) immutableTipIsRecent = - counterexample ("Age of the immutable tip: " ++ show immutableTipAge) $ + counterexample ("The immutable tip is too old: " ++ show immutableTipAge) $ immutableTipAge `le` s + fromIntegral d + 1 SlotNo immutableTipAge = case (honestTipSlot, immutableTipSlot) of @@ -203,13 +211,14 @@ prop_leashingAttackStalling :: Property prop_leashingAttackStalling = forAllGenesisTest - (disableBoringTimeouts <$> genChains (QC.choose (1, 4)) `enrichedWith` genLeashingSchedule) + (genChains (QC.choose (1, 4)) `enrichedWith` genLeashingSchedule) defaultSchedulerConfig { scTrace = False , scEnableLoE = True , scEnableLoP = True , scEnableCSJ = True + , scEnableBlockFetchTimeouts = False } shrinkPeerSchedules @@ -228,47 +237,37 @@ prop_leashingAttackStalling = advs <- mapM dropRandomPoints $ adversarialPeers sch pure $ ps {psSchedule = sch {adversarialPeers = advs}} - disableBoringTimeouts gt = - gt { gtChainSyncTimeouts = (gtChainSyncTimeouts gt) - { mustReplyTimeout = Nothing - , idleTimeout = Nothing - } - } - - dropRandomPoints :: [(Time, SchedulePoint blk)] -> QC.Gen [(Time, SchedulePoint blk)] - dropRandomPoints ps = do +dropRandomPoints :: [(Time, SchedulePoint blk)] -> QC.Gen [(Time, SchedulePoint blk)] +dropRandomPoints ps = do let lenps = length ps - dropCount <- QC.choose (0, max 1 $ div lenps 5) + dropsMax = max 1 $ lenps - 1 + dropCount <- QC.choose (div dropsMax 2, dropsMax) let dedup = map NE.head . NE.group is <- fmap (dedup . sort) $ replicateM dropCount $ QC.choose (0, lenps - 1) pure $ dropElemsAt ps is - + where dropElemsAt :: [a] -> [Int] -> [a] - dropElemsAt xs [] = xs - dropElemsAt xs (i:is) = - let (ys, zs) = splitAt i xs - in ys ++ dropElemsAt (drop 1 zs) is + dropElemsAt xs is' = + let is = Set.fromList is' + in map fst $ filter (\(_, i) -> not $ i `Set.member` is) (zip xs [0..]) -- | Test that the leashing attacks do not delay the immutable tip after. The -- immutable tip needs to be advanced enough when the honest peer has offered -- all of its ticks. -- --- This test is expected to fail because we don't test a genesis implementation --- yet. --- -- See Note [Leashing attacks] prop_leashingAttackTimeLimited :: Property prop_leashingAttackTimeLimited = forAllGenesisTest - (disableBoringTimeouts <$> genChains (QC.choose (1, 4)) `enrichedWith` genTimeLimitedSchedule) + (genChains (QC.choose (1, 4)) `enrichedWith` genTimeLimitedSchedule) defaultSchedulerConfig { scTrace = False , scEnableLoE = True , scEnableLoP = True - , scEnableBlockFetchTimeouts = False , scEnableCSJ = True + , scEnableBlockFetchTimeouts = False } shrinkPeerSchedules @@ -285,22 +284,16 @@ prop_leashingAttackTimeLimited = (gtLoPBucketParams genesisTest) (getHonestPeer honests) (Map.elems advs0) - advs = fmap (takePointsUntil timeLimit) advs0 + advs1 = fmap (takePointsUntil timeLimit) advs0 + advs <- mapM dropRandomPoints advs1 pure $ PointSchedule { psSchedule = Peers honests advs - , psMinEndTime = timeLimit + , psStartOrder = [] + , psMinEndTime = addGracePeriodDelay (length advs) timeLimit } takePointsUntil limit = takeWhile ((<= limit) . fst) - disableBoringTimeouts gt = - gt { gtChainSyncTimeouts = (gtChainSyncTimeouts gt) - { canAwaitTimeout = Nothing - , mustReplyTimeout = Nothing - , idleTimeout = Nothing - } - } - estimateTimeBound :: AF.HasHeader blk => ChainSyncTimeout @@ -346,11 +339,8 @@ headCallStack = \case x:_ -> x _ -> error "headCallStack: empty list" --- | Test that enabling the LoE using the updater that sets the LoE fragment to --- the shared prefix (as used by the GDDG) causes the selection to remain at +-- | Test that enabling the LoE causes the selection to remain at -- the first fork intersection (keeping the immutable tip honest). --- --- This is pretty slow since it relies on timeouts to terminate the test. prop_loeStalling :: Property prop_loeStalling = forAllGenesisTest @@ -363,7 +353,8 @@ prop_loeStalling = defaultSchedulerConfig { scEnableLoE = True, - scEnableCSJ = True + scEnableCSJ = True, + scEnableBlockFetchTimeouts = False } shrinkPeerSchedules @@ -404,14 +395,78 @@ prop_downtime = forAllGenesisTest , scEnableLoP = True , scDowntime = Just 11 , scEnableCSJ = True + , scEnableBlockFetchTimeouts = False } shrinkPeerSchedules - theProperty + (\genesisTest stateView -> + counterexample (unlines + [ "TODO: Shutting down the node inserts delays in the simulation that" + , "are not reflected in the point schedule table. Reporting these delays" + , "correctly is still to be done." + ]) $ + theProperty genesisTest stateView + ) where pointsGeneratorParams gt = PointsGeneratorParams { pgpExtraHonestPeers = fromIntegral (gtExtraHonestPeers gt) , pgpDowntime = DowntimeWithSecurityParam (gtSecurityParam gt) } + +-- | Test that the block fetch leashing attack does not delay the immutable tip. +-- This leashing attack consists in having adversarial peers that behave +-- honestly when it comes to ChainSync but refuse to send blocks. A proper node +-- under test should detect those behaviours as adversarial and find a way to +-- make progress. +prop_blockFetchLeashingAttack :: Property +prop_blockFetchLeashingAttack = + forAllGenesisTest + (genChains (pure 0) `enrichedWith` genBlockFetchLeashingSchedule) + defaultSchedulerConfig + { scEnableLoE = True, + scEnableLoP = True, + scEnableCSJ = True, + scEnableBlockFetchTimeouts = False + } + shrinkPeerSchedules + theProperty + where + genBlockFetchLeashingSchedule :: GenesisTest TestBlock () -> QC.Gen (PointSchedule TestBlock) + genBlockFetchLeashingSchedule genesisTest = do + -- A schedule with several honest peers and no adversaries. We will then + -- keep one of those as honest and remove the block points from the + -- others, hence producing one honest peer and several adversaries. + PointSchedule {psSchedule} <- + stToGen $ + uniformPoints + (PointsGeneratorParams {pgpExtraHonestPeers = 1, pgpDowntime = NoDowntime}) + (gtBlockTree genesisTest) + peers <- QC.shuffle $ Map.elems $ honestPeers psSchedule + let (honest, adversaries) = fromMaybe (error "blockFetchLeashingAttack") $ uncons peers + adversaries' = map (filter (not . isBlockPoint . snd)) adversaries + psSchedule' = peers' [honest] adversaries' + -- Important to shuffle the order in which the peers start, otherwise the + -- honest peer starts first and systematically becomes dynamo. + psStartOrder <- shuffle $ getPeerIds psSchedule' + let maxTime = addGracePeriodDelay (length adversaries') $ maximum $ + Time 0 : [ pt | s <- honest : adversaries', (pt, _) <- take 1 (reverse s) ] + pure $ PointSchedule { + psSchedule = psSchedule', + psStartOrder, + -- Allow to run the blockfetch decision logic after the last tick + -- 11 is the grace period for unresponsive peers that should send + -- blocks + psMinEndTime = addTime 11 maxTime + } + + isBlockPoint :: SchedulePoint blk -> Bool + isBlockPoint (ScheduleBlockPoint _) = True + isBlockPoint _ = False + +-- | Add a delay at the end of tests to account for retention of blocks +-- by adversarial peers in blockfetch. This delay is 10 seconds per +-- adversarial peer. +addGracePeriodDelay :: Int -> Time -> Time +addGracePeriodDelay adversaryCount = addTime (fromIntegral adversaryCount * 10) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs index def5645104..b7319b8eaf 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs @@ -6,8 +6,6 @@ {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeFamilies #-} -{-# OPTIONS_GHC -Wno-missing-export-lists #-} - -- | Functions that call to the BlockFetch API to start clients and servers module Test.Consensus.PeerSimulator.BlockFetch ( blockFetchNoTimeouts @@ -17,32 +15,34 @@ module Test.Consensus.PeerSimulator.BlockFetch ( , startKeepAliveThread ) where -import Control.Exception (SomeException) import Control.Monad (void) import Control.Monad.Class.MonadTime import Control.Monad.Class.MonadTimer.SI (MonadTimer) import Control.ResourceRegistry import Control.Tracer (Tracer, nullTracer, traceWith) import Data.Functor.Contravariant ((>$<)) -import Data.Map.Strict (Map) import Network.TypedProtocol.Codec (ActiveState, AnyMessage, StateToken, notActiveState) import Ouroboros.Consensus.Block (HasHeader) import Ouroboros.Consensus.Block.Abstract (Header, Point (..)) import Ouroboros.Consensus.Config import qualified Ouroboros.Consensus.MiniProtocol.BlockFetch.ClientInterface as BlockFetchClientInterface +import Ouroboros.Consensus.MiniProtocol.ChainSync.Client + (ChainSyncClientHandleCollection) +import Ouroboros.Consensus.Node.Genesis (GenesisConfig (..), + enableGenesisConfigDefault) import Ouroboros.Consensus.Node.ProtocolInfo (NumCoreNodes (NumCoreNodes)) import Ouroboros.Consensus.Storage.ChainDB.API import Ouroboros.Consensus.Util (ShowProxy) -import Ouroboros.Consensus.Util.IOLike (DiffTime, - Exception (fromException), IOLike, STM, atomically, retry, - try) -import Ouroboros.Network.AnchoredFragment (AnchoredFragment) +import Ouroboros.Consensus.Util.IOLike import Ouroboros.Network.BlockFetch (BlockFetchConfiguration (..), - FetchClientRegistry, FetchMode (..), blockFetchLogic, - bracketFetchClient, bracketKeepAliveClient) + FetchClientRegistry, GenesisBlockFetchConfiguration (..), + blockFetchLogic, bracketFetchClient, + bracketKeepAliveClient) import Ouroboros.Network.BlockFetch.Client (blockFetchClient) +import Ouroboros.Network.BlockFetch.ConsensusInterface + (FetchMode (..)) import Ouroboros.Network.Channel (Channel) import Ouroboros.Network.ControlMessage (ControlMessageSTM) import Ouroboros.Network.Driver (runPeer) @@ -73,50 +73,52 @@ import Test.Util.Time (dawnOfTime) startBlockFetchLogic :: forall m. - (IOLike m) - => ResourceRegistry m + (IOLike m, MonadTimer m) + => Bool -- ^ Whether to enable chain selection starvation + -> ResourceRegistry m -> Tracer m (TraceEvent TestBlock) -> ChainDB m TestBlock -> FetchClientRegistry PeerId (Header TestBlock) TestBlock m - -> STM m (Map PeerId (AnchoredFragment (Header TestBlock))) + -> ChainSyncClientHandleCollection PeerId m TestBlock -> m () -startBlockFetchLogic registry tracer chainDb fetchClientRegistry getCandidates = do +startBlockFetchLogic enableChainSelStarvation registry tracer chainDb fetchClientRegistry csHandlesCol = do let slotForgeTime :: BlockFetchClientInterface.SlotForgeTimeOracle m blk slotForgeTime _ = pure dawnOfTime blockFetchConsensusInterface = BlockFetchClientInterface.mkBlockFetchConsensusInterface + nullTracer -- FIXME (TestBlockConfig $ NumCoreNodes 0) -- Only needed when minting blocks (BlockFetchClientInterface.defaultChainDbView chainDb) - getCandidates + csHandlesCol -- The size of headers in bytes is irrelevant because our tests -- do not serialize the blocks. (\_hdr -> 1000) slotForgeTime - -- Initially, we tried FetchModeBulkSync, but adversaries had the - -- opportunity to delay syncing by not responding to block requests. - -- The BlockFetch logic would then wait for the timeout to expire - -- before trying to download the block from another peer. - (pure FetchModeDeadline) + -- This is a syncing test, so we use 'FetchModeGenesis'. + (pure FetchModeGenesis) DiffusionPipeliningOn + bfcGenesisBFConfig = if enableChainSelStarvation + then GenesisBlockFetchConfiguration + { gbfcGracePeriod = + if enableChainSelStarvation then + 10 -- default value for cardano-node at the time of writing + else + 1000000 -- (more than 11 days) + } + else gcBlockFetchConfig enableGenesisConfigDefault + -- Values taken from -- ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs blockFetchCfg = BlockFetchConfiguration - { -- We set a higher value here to allow downloading blocks from all - -- peers. - -- - -- If the value is too low, block downloads from a peer may prevent - -- blocks from being downloaded from other peers. This can be - -- problematic, since the batch download of a simulated BlockFetch - -- server can last serveral ticks if the block pointer is not - -- advanced to allow completion of the batch. - -- - bfcMaxConcurrencyBulkSync = 50 - , bfcMaxConcurrencyDeadline = 50 + { bfcMaxConcurrencyBulkSync = 50 + , bfcMaxConcurrencyDeadline = 50 -- unused because of @pure FetchModeBulkSync@ above , bfcMaxRequestsInflight = 10 - , bfcDecisionLoopInterval = 0 + , bfcDecisionLoopIntervalPraos = 0 + , bfcDecisionLoopIntervalGenesis = 0 , bfcSalt = 0 + , bfcGenesisBFConfig } void $ forkLinkedThread registry "BlockFetchLogic" $ diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/CSJInvariants.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/CSJInvariants.hs index 3caac268e5..97e8b50f29 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/CSJInvariants.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/CSJInvariants.hs @@ -19,7 +19,7 @@ import Data.Typeable (Typeable) import Ouroboros.Consensus.Block (Point, StandardHash, castPoint) import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State as CSState import Ouroboros.Consensus.Util.IOLike (Exception, MonadSTM (STM), - MonadThrow (throwIO), StrictTVar, readTVar) + MonadThrow (throwIO), readTVar) import Ouroboros.Consensus.Util.STM (Watcher (..)) -------------------------------------------------------------------------------- @@ -109,10 +109,10 @@ readAndView :: forall m peer blk. ( MonadSTM m ) => - StrictTVar m (Map peer (CSState.ChainSyncClientHandle m blk)) -> + STM m (Map peer (CSState.ChainSyncClientHandle m blk)) -> STM m (View peer blk) -readAndView handles = - traverse (fmap idealiseState . readTVar . CSState.cschJumping) =<< readTVar handles +readAndView readHandles = + traverse (fmap idealiseState . readTVar . CSState.cschJumping) =<< readHandles where -- Idealise the state of a ChainSync peer with respect to ChainSync jumping. -- In particular, we get rid of non-comparable information such as the TVars @@ -170,7 +170,7 @@ watcher :: Typeable blk, StandardHash blk ) => - StrictTVar m (Map peer (CSState.ChainSyncClientHandle m blk)) -> + STM m (Map peer (CSState.ChainSyncClientHandle m blk)) -> Watcher m (View peer blk) (View peer blk) watcher handles = Watcher diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ChainSync.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ChainSync.hs index 3083b07399..d0b63d10b8 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ChainSync.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ChainSync.hs @@ -13,7 +13,6 @@ module Test.Consensus.PeerSimulator.ChainSync ( import Control.Exception (SomeException) import Control.Monad.Class.MonadTimer.SI (MonadTimer) import Control.Tracer (Tracer (Tracer), nullTracer, traceWith) -import Data.Map.Strict (Map) import Data.Proxy (Proxy (..)) import Network.TypedProtocol.Codec (AnyMessage) import Ouroboros.Consensus.Block (Header, Point) @@ -23,16 +22,17 @@ import Ouroboros.Consensus.Config (DiffusionPipeliningSupport (..), import Ouroboros.Consensus.Ledger.SupportsProtocol (LedgerSupportsProtocol) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client - (CSJConfig (..), ChainDbView, ChainSyncClientHandle, - ChainSyncLoPBucketConfig, ChainSyncStateView (..), - Consensus, bracketChainSyncClient, chainSyncClient) + (CSJConfig (..), ChainDbView, + ChainSyncClientHandleCollection, ChainSyncLoPBucketConfig, + ChainSyncStateView (..), Consensus, bracketChainSyncClient, + chainSyncClient) import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client as CSClient import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.HistoricityCheck as HistoricityCheck import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.InFutureCheck as InFutureCheck import Ouroboros.Consensus.Node.GsmState (GsmState (Syncing)) import Ouroboros.Consensus.Util (ShowProxy) import Ouroboros.Consensus.Util.IOLike (Exception (fromException), - IOLike, MonadCatch (try), StrictTVar) + IOLike, MonadCatch (try)) import Ouroboros.Network.Block (Tip) import Ouroboros.Network.Channel (Channel) import Ouroboros.Network.ControlMessage (ControlMessage (..)) @@ -134,7 +134,7 @@ runChainSyncClient :: -- ^ Configuration for ChainSync Jumping StateViewTracers blk m -> -- ^ Tracers used to record information for the future 'StateView'. - StrictTVar m (Map PeerId (ChainSyncClientHandle m blk)) -> + ChainSyncClientHandleCollection PeerId m blk -> -- ^ A TVar containing a map of states for each peer. This -- function will (via 'bracketChainSyncClient') register and de-register a -- TVar for the state of the peer. @@ -165,7 +165,7 @@ runChainSyncClient res <- try $ runPipelinedPeerWithLimits - nullTracer + (Tracer $ traceWith tracer . TraceChainSyncSendRecvEvent peerId "Client") codecChainSyncId chainSyncNoSizeLimits (timeLimitsChainSync chainSyncTimeouts) @@ -218,8 +218,8 @@ runChainSyncServer :: ChainSyncServer (Header blk) (Point blk) (Tip blk) m () -> Channel m (AnyMessage (ChainSync (Header blk) (Point blk) (Tip blk))) -> m () -runChainSyncServer _tracer peerId StateViewTracers {svtPeerSimulatorResultsTracer} server channel = - (try $ runPeer nullTracer codecChainSyncId channel (chainSyncServerPeer server)) >>= \case +runChainSyncServer tracer peerId StateViewTracers {svtPeerSimulatorResultsTracer} server channel = + (try $ runPeer sendRecvTracer codecChainSyncId channel (chainSyncServerPeer server)) >>= \case Right ((), msgRes) -> traceWith svtPeerSimulatorResultsTracer $ PeerSimulatorResult peerId $ SomeChainSyncServerResult $ Right msgRes Left exn -> do @@ -228,3 +228,5 @@ runChainSyncServer _tracer peerId StateViewTracers {svtPeerSimulatorResultsTrace -- NOTE: here we are able to trace exceptions, as what is done in `runChainSyncClient` case fromException exn of (_ :: Maybe SomeException) -> pure () + where + sendRecvTracer = Tracer $ traceWith tracer . TraceChainSyncSendRecvEvent peerId "Server" diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/NodeLifecycle.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/NodeLifecycle.hs index 60f7476286..d8566722dc 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/NodeLifecycle.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/NodeLifecycle.hs @@ -20,6 +20,8 @@ import Data.Set (Set) import qualified Data.Set as Set import Ouroboros.Consensus.Block import Ouroboros.Consensus.Config (TopLevelConfig (..)) +import Ouroboros.Consensus.MiniProtocol.ChainSync.Client + (ChainSyncClientHandleCollection (..)) import Ouroboros.Consensus.Storage.ChainDB.API import qualified Ouroboros.Consensus.Storage.ChainDB.API as ChainDB import qualified Ouroboros.Consensus.Storage.ChainDB.Impl as ChainDB @@ -204,7 +206,7 @@ lifecycleStop resources LiveNode {lnStateViewTracers, lnCopyToImmDb, lnPeers} = releaseAll lrRegistry -- Reset the resources in TVars that were allocated by the simulator atomically $ do - modifyTVar psrHandles (const mempty) + cschcRemoveAllHandles psrHandles case lrLoEVar of LoEEnabled var -> modifyTVar var (const (AF.Empty AF.AnchorGenesis)) LoEDisabled -> pure () diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Resources.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Resources.hs index c4fe394a60..a594d9059c 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Resources.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Resources.hs @@ -25,7 +25,8 @@ import Data.Traversable (for) import Ouroboros.Consensus.Block (WithOrigin (Origin)) import Ouroboros.Consensus.Block.Abstract (Header, Point (..)) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client - (ChainSyncClientHandle) + (ChainSyncClientHandleCollection, + newChainSyncClientHandleCollection) import Ouroboros.Consensus.Util.IOLike (IOLike, MonadSTM (STM), StrictTVar, readTVar, uncheckedNewTVarM, writeTVar) import qualified Ouroboros.Network.AnchoredFragment as AF @@ -115,7 +116,7 @@ data PeerSimulatorResources m blk = -- | Handles to interact with the ChainSync client of each peer. -- See 'ChainSyncClientHandle' for more details. - psrHandles :: StrictTVar m (Map PeerId (ChainSyncClientHandle m TestBlock)) + psrHandles :: ChainSyncClientHandleCollection PeerId m TestBlock } -- | Create 'ChainSyncServerHandlers' for our default implementation using 'NodeState'. @@ -233,5 +234,5 @@ makePeerSimulatorResources tracer blockTree peers = do resources <- for peers $ \ peerId -> do peerResources <- makePeerResources tracer blockTree peerId pure (peerId, peerResources) - psrHandles <- uncheckedNewTVarM mempty + psrHandles <- atomically newChainSyncClientHandleCollection pure PeerSimulatorResources {psrPeers = Map.fromList $ toList resources, psrHandles} diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs index 1010c7eda3..df63faadf7 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs @@ -10,13 +10,14 @@ module Test.Consensus.PeerSimulator.Run ( , runPointSchedule ) where -import Control.Monad (foldM, forM, void) +import Control.Monad (foldM, forM, void, when) import Control.Monad.Class.MonadTime (MonadTime) import Control.Monad.Class.MonadTimer.SI (MonadTimer) import Control.ResourceRegistry import Control.Tracer (Tracer (..), nullTracer, traceWith) import Data.Coerce (coerce) import Data.Foldable (for_) +import Data.List (sort) import qualified Data.List.NonEmpty as NonEmpty import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map @@ -27,7 +28,9 @@ import Ouroboros.Consensus.Ledger.SupportsProtocol (LedgerSupportsProtocol) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (CSJConfig (..), CSJEnabledConfig (..), ChainDbView, - ChainSyncClientHandle, ChainSyncLoPBucketConfig (..), + ChainSyncClientHandle, + ChainSyncClientHandleCollection (..), + ChainSyncLoPBucketConfig (..), ChainSyncLoPBucketEnabledConfig (..), viewChainSyncState) import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client as CSClient import qualified Ouroboros.Consensus.Node.GsmState as GSM @@ -102,6 +105,11 @@ data SchedulerConfig = -- duration to trigger it. , scDowntime :: Maybe DiffTime + -- | Enable the use of ChainSel starvation information in the block fetch + -- decision logic. It is never actually disabled, but rather the grace + -- period is made virtually infinite. + , scEnableChainSelStarvation :: Bool + -- | Whether to enable ChainSync Jumping. The parameters come from -- 'GenesisTest'. , scEnableCSJ :: Bool @@ -119,6 +127,7 @@ defaultSchedulerConfig = scEnableLoE = False, scEnableLoP = False, scDowntime = Nothing, + scEnableChainSelStarvation = True, scEnableCSJ = False } @@ -147,7 +156,7 @@ startChainSyncConnectionThread :: ChainSyncLoPBucketConfig -> CSJConfig -> StateViewTracers blk m -> - StrictTVar m (Map PeerId (ChainSyncClientHandle m blk)) -> + ChainSyncClientHandleCollection PeerId m blk -> m (Thread m (), Thread m ()) startChainSyncConnectionThread registry @@ -212,8 +221,8 @@ smartDelay :: LiveNode blk m -> DiffTime -> m (LiveNode blk m) -smartDelay NodeLifecycle {nlMinDuration, nlStart, nlShutdown} node duration - | Just minInterval <- nlMinDuration, duration > minInterval = do +smartDelay lifecycle@NodeLifecycle {nlStart, nlShutdown} node duration + | itIsTimeToRestartTheNode lifecycle duration = do results <- nlShutdown node threadDelay duration nlStart results @@ -221,6 +230,12 @@ smartDelay _ node duration = do threadDelay duration pure node +itIsTimeToRestartTheNode :: NodeLifecycle blk m -> DiffTime -> Bool +itIsTimeToRestartTheNode NodeLifecycle {nlMinDuration} duration = + case nlMinDuration of + Just minInterval -> duration > minInterval + Nothing -> False + -- | The 'Tick' contains a state update for a specific peer. -- If the peer has not terminated by protocol rules, this will update its TMVar -- with the new state, thereby unblocking the handler that's currently waiting @@ -230,7 +245,7 @@ smartDelay _ node duration = do dispatchTick :: forall m blk. IOLike m => Tracer m (TraceSchedulerEvent blk) -> - StrictTVar m (Map PeerId (ChainSyncClientHandle m blk)) -> + STM m (Map PeerId (ChainSyncClientHandle m blk)) -> Map PeerId (PeerResources m blk) -> NodeLifecycle blk m -> LiveNode blk m -> @@ -250,7 +265,7 @@ dispatchTick tracer varHandles peers lifecycle node (number, (duration, Peer pid traceNewTick = do currentChain <- atomically $ ChainDB.getCurrentChain (lnChainDb node) (csState, jumpingStates) <- atomically $ do - m <- readTVar varHandles + m <- varHandles csState <- traverse (readTVar . CSClient.cschState) (m Map.!? pid) jumpingStates <- forM (Map.toList m) $ \(peer, h) -> do st <- readTVar (CSClient.cschJumping h) @@ -272,7 +287,7 @@ dispatchTick tracer varHandles peers lifecycle node (number, (duration, Peer pid runScheduler :: IOLike m => Tracer m (TraceSchedulerEvent blk) -> - StrictTVar m (Map PeerId (ChainSyncClientHandle m blk)) -> + STM m (Map PeerId (ChainSyncClientHandle m blk)) -> PointSchedule blk -> Map PeerId (PeerResources m blk) -> NodeLifecycle blk m -> @@ -287,7 +302,16 @@ runScheduler tracer varHandles ps@PointSchedule{psMinEndTime} peers lifecycle@No else Nothing _ -> Just $ coerce psMinEndTime LiveNode{lnChainDb, lnStateViewTracers} <- - maybe (pure nodeEnd) (smartDelay lifecycle nodeEnd) extraDelay + case extraDelay of + Just duration -> do + nodeEnd' <- smartDelay lifecycle nodeEnd duration + -- Give an opportunity to the node to finish whatever it was doing at + -- shutdown + when (itIsTimeToRestartTheNode lifecycle duration) $ + threadDelay $ coerce psMinEndTime + pure nodeEnd' + Nothing -> + pure nodeEnd traceWith tracer TraceEndOfTime pure (lnChainDb, lnStateViewTracers) where @@ -314,7 +338,7 @@ mkStateTracer :: m (Tracer m ()) mkStateTracer schedulerConfig GenesisTest {gtBlockTree} PeerSimulatorResources {psrHandles, psrPeers} chainDb | scTraceState schedulerConfig - , let getCandidates = viewChainSyncState psrHandles CSClient.csCandidate + , let getCandidates = viewChainSyncState (cschcMap psrHandles) CSClient.csCandidate getCurrentChain = ChainDB.getCurrentChain chainDb getPoints = traverse readTVar (srCurrentState . prShared <$> psrPeers) = peerSimStateDiagramSTMTracerDebug gtBlockTree getCurrentChain getCandidates getPoints @@ -333,13 +357,18 @@ startNode :: LiveInterval TestBlock m -> m () startNode schedulerConfig genesisTest interval = do - let - handles = psrHandles lrPeerSim - getCandidates = viewChainSyncState handles CSClient.csCandidate + let handles = psrHandles lrPeerSim fetchClientRegistry <- newFetchClientRegistry let chainDbView = CSClient.defaultChainDbView lnChainDb - activePeers = Map.restrictKeys (psrPeers lrPeerSim) (lirActive liveResult) - for_ activePeers $ \PeerResources {prShared, prChainSync, prBlockFetch} -> do + activePeers = Map.toList $ Map.restrictKeys (psrPeers lrPeerSim) (lirActive liveResult) + peersStartOrder = psStartOrder ++ sort [pid | (pid, _) <- activePeers, pid `notElem` psStartOrder] + activePeersOrdered = [ + peerResources + | pid <- peersStartOrder + , (pid', peerResources) <- activePeers + , pid == pid' + ] + for_ activePeersOrdered $ \PeerResources {prShared, prChainSync, prBlockFetch} -> do let pid = srPeerId prShared forkLinkedThread lrRegistry ("Peer overview " ++ show pid) $ -- The peerRegistry helps ensuring that if any thread fails, then @@ -375,7 +404,13 @@ startNode schedulerConfig genesisTest interval = do -- The block fetch logic needs to be started after the block fetch clients -- otherwise, an internal assertion fails because getCandidates yields more -- peer fragments than registered clients. - BlockFetch.startBlockFetchLogic lrRegistry lrTracer lnChainDb fetchClientRegistry getCandidates + BlockFetch.startBlockFetchLogic + (scEnableChainSelStarvation schedulerConfig) + lrRegistry + lrTracer + lnChainDb + fetchClientRegistry + handles for_ lrLoEVar $ \ var -> do forkLinkedWatcher lrRegistry "LoE updater background" $ @@ -383,11 +418,16 @@ startNode schedulerConfig genesisTest interval = do lrConfig (mkGDDTracerTestBlock lrTracer) lnChainDb + 0.0 -- The rate limit makes simpler the calculations of how long tests + -- should run and still should produce interesting interleavings. + -- It is similar to the setting of bfcDecisionLoopInterval in + -- Test.Consensus.PeerSimulator.BlockFetch (pure GSM.Syncing) -- TODO actually run GSM - (readTVar handles) + (cschcMap handles) var - void $ forkLinkedWatcher lrRegistry "CSJ invariants watcher" $ CSJInvariants.watcher handles + void $ forkLinkedWatcher lrRegistry "CSJ invariants watcher" $ + CSJInvariants.watcher (cschcMap handles) where LiveResources {lrRegistry, lrTracer, lrConfig, lrPeerSim, lrLoEVar} = resources @@ -402,6 +442,7 @@ startNode schedulerConfig genesisTest interval = do , gtBlockFetchTimeouts , gtLoPBucketParams = LoPBucketParams { lbpCapacity, lbpRate } , gtCSJParams = CSJParams { csjpJumpSize } + , gtSchedule = PointSchedule {psStartOrder} } = genesisTest StateViewTracers{svtTraceTracer} = lnStateViewTracers @@ -483,7 +524,7 @@ runPointSchedule schedulerConfig genesisTest tracer0 = lifecycle <- nodeLifecycle schedulerConfig genesisTest tracer registry peerSim (chainDb, stateViewTracers) <- runScheduler (Tracer $ traceWith tracer . TraceSchedulerEvent) - (psrHandles peerSim) + (cschcMap (psrHandles peerSim)) gtSchedule (psrPeers peerSim) lifecycle diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/LinkedThreads.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/LinkedThreads.hs index d36d846850..c5c2cad189 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/LinkedThreads.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/LinkedThreads.hs @@ -9,15 +9,16 @@ module Test.Consensus.PeerSimulator.Tests.LinkedThreads (tests) where import Control.Monad.Class.MonadAsync (AsyncCancelled (..)) import Control.Monad.Class.MonadTime.SI (Time (Time)) import Data.Functor (($>)) -import Data.Maybe (fromJust) -import Ouroboros.Consensus.Util.IOLike (DiffTime, fromException) +import Ouroboros.Consensus.Util.IOLike (fromException) import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.Driver.Limits (ProtocolLimitFailure (ExceededTimeLimit)) import Ouroboros.Network.Protocol.ChainSync.Codec (mustReplyTimeout) import Test.Consensus.BlockTree (BlockTree (..)) import Test.Consensus.Genesis.Setup -import Test.Consensus.PeerSimulator.Run (defaultSchedulerConfig) +import Test.Consensus.PeerSimulator.Run + (SchedulerConfig (scEnableChainSyncTimeouts), + defaultSchedulerConfig) import Test.Consensus.PeerSimulator.StateView import Test.Consensus.PointSchedule import Test.Consensus.PointSchedule.Peers (peersOnlyHonest) @@ -39,13 +40,15 @@ tests = testProperty "ChainSync kills BlockFetch" prop_chainSyncKillsBlockFetch prop_chainSyncKillsBlockFetch :: Property prop_chainSyncKillsBlockFetch = do forAllGenesisTest - (do gt@GenesisTest{gtChainSyncTimeouts} <- genChains (pure 0) - let schedule = dullSchedule gt (fromJust $ mustReplyTimeout gtChainSyncTimeouts) - pure $ gt $> schedule + (do gt@GenesisTest{gtBlockTree} <- genChains (pure 0) + pure $ enableMustReplyTimeout $ gt $> dullSchedule (btTrunk gtBlockTree) ) - defaultSchedulerConfig + + defaultSchedulerConfig {scEnableChainSyncTimeouts = True} + -- No shrinking because the schedule is tiny and hand-crafted (\_ _ -> []) + ( \_ stateView@StateView {svTipBlock} -> svTipBlock == Nothing && case exceptionsByComponent ChainSyncClient stateView of @@ -62,9 +65,11 @@ prop_chainSyncKillsBlockFetch = do _ -> False ) where - dullSchedule :: GenesisTest blk () -> DiffTime -> PointSchedule blk - dullSchedule GenesisTest {gtBlockTree} timeout = - let (firstBlock, secondBlock) = case AF.toOldestFirst $ btTrunk gtBlockTree of + timeout = 10 + + dullSchedule :: AF.AnchoredFragment blk -> PointSchedule blk + dullSchedule trunk = + let (firstBlock, secondBlock) = case AF.toOldestFirst trunk of b1 : b2 : _ -> (b1, b2) _ -> error "block tree must have two blocks" psSchedule = peersOnlyHonest $ @@ -72,4 +77,7 @@ prop_chainSyncKillsBlockFetch = do (Time 0, scheduleHeaderPoint firstBlock) ] psMinEndTime = Time $ timeout + 1 - in PointSchedule {psSchedule, psMinEndTime} + in PointSchedule {psSchedule, psStartOrder = [], psMinEndTime} + + enableMustReplyTimeout :: GenesisTest blk schedule -> GenesisTest blk schedule + enableMustReplyTimeout gt = gt { gtChainSyncTimeouts = (gtChainSyncTimeouts gt) { mustReplyTimeout = Just timeout } } diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Rollback.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Rollback.hs index f260bc6683..b45ef7447f 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Rollback.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Rollback.hs @@ -95,7 +95,11 @@ rollbackSchedule n blockTree = , banalSchedulePoints trunkSuffix , banalSchedulePoints (btbSuffix branch) ] - in mkPointSchedule $ peersOnlyHonest $ zip (map (Time . (/30)) [0..]) schedulePoints + in PointSchedule { + psSchedule = peersOnlyHonest $ zip (map (Time . (/30)) [0..]) schedulePoints, + psStartOrder = [], + psMinEndTime = Time 0 + } where banalSchedulePoints :: AnchoredFragment blk -> [SchedulePoint blk] banalSchedulePoints = concatMap banalSchedulePoints' . toOldestFirst diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Timeouts.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Timeouts.hs index 74625bf04a..5d45137f09 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Timeouts.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Timeouts.hs @@ -5,7 +5,6 @@ module Test.Consensus.PeerSimulator.Tests.Timeouts (tests) where import Data.Functor (($>)) -import Data.Maybe (fromJust) import Ouroboros.Consensus.Util.Condense import Ouroboros.Consensus.Util.IOLike (DiffTime, Time (Time), fromException) @@ -15,10 +14,13 @@ import Ouroboros.Network.Driver.Limits import Ouroboros.Network.Protocol.ChainSync.Codec (mustReplyTimeout) import Test.Consensus.BlockTree (btTrunk) import Test.Consensus.Genesis.Setup -import Test.Consensus.PeerSimulator.Run (defaultSchedulerConfig) +import Test.Consensus.PeerSimulator.Run + (SchedulerConfig (scEnableChainSyncTimeouts), + defaultSchedulerConfig) import Test.Consensus.PeerSimulator.StateView import Test.Consensus.PointSchedule -import Test.Consensus.PointSchedule.Peers (peersOnlyHonest) +import Test.Consensus.PointSchedule.Peers (peersOnlyAdversary, + peersOnlyHonest) import Test.Consensus.PointSchedule.SinglePeer (scheduleBlockPoint, scheduleHeaderPoint, scheduleTipPoint) import Test.QuickCheck @@ -37,12 +39,11 @@ prop_timeouts :: Bool -> Property prop_timeouts mustTimeout = do forAllGenesisTest - (do gt@GenesisTest{gtChainSyncTimeouts, gtBlockTree} <- genChains (pure 0) - let schedule = dullSchedule (fromJust $ mustReplyTimeout gtChainSyncTimeouts) (btTrunk gtBlockTree) - pure $ gt $> schedule + (do gt@GenesisTest{gtBlockTree} <- genChains (pure 0) + pure $ enableMustReplyTimeout $ gt $> dullSchedule (btTrunk gtBlockTree) ) - -- Timeouts are enabled by default - defaultSchedulerConfig + + defaultSchedulerConfig {scEnableChainSyncTimeouts = True} -- Here we can't shrink because we exploit the properties of the point schedule to wait -- at the end of the test for the adversaries to get disconnected, by adding an extra point. @@ -59,15 +60,20 @@ prop_timeouts mustTimeout = do ) where - dullSchedule :: AF.HasHeader blk => DiffTime -> AF.AnchoredFragment blk -> PointSchedule blk - dullSchedule _ (AF.Empty _) = error "requires a non-empty block tree" - dullSchedule timeout (_ AF.:> tipBlock) = + timeout = 10 + + dullSchedule :: AF.HasHeader blk => AF.AnchoredFragment blk -> PointSchedule blk + dullSchedule (AF.Empty _) = error "requires a non-empty block tree" + dullSchedule (_ AF.:> tipBlock) = let offset :: DiffTime = if mustTimeout then 1 else -1 - psSchedule = peersOnlyHonest $ [ + psSchedule = (if mustTimeout then peersOnlyAdversary else peersOnlyHonest) $ [ (Time 0, scheduleTipPoint tipBlock), (Time 0, scheduleHeaderPoint tipBlock), (Time 0, scheduleBlockPoint tipBlock) ] -- This keeps the test running long enough to pass the timeout by 'offset'. psMinEndTime = Time $ timeout + offset - in PointSchedule {psSchedule, psMinEndTime} + in PointSchedule {psSchedule, psStartOrder = [], psMinEndTime} + + enableMustReplyTimeout :: GenesisTest blk schedule -> GenesisTest blk schedule + enableMustReplyTimeout gt = gt { gtChainSyncTimeouts = (gtChainSyncTimeouts gt) { mustReplyTimeout = Just timeout } } diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs index a1885f3064..0868e243f5 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs @@ -25,6 +25,7 @@ import Data.Bifunctor (second) import Data.List (intersperse) import qualified Data.List.NonEmpty as NE import Data.Time.Clock (DiffTime, diffTimeToPicoseconds) +import Network.TypedProtocol.Codec (AnyMessage (..)) import Ouroboros.Consensus.Block (GenesisWindow (..), Header, Point, WithOrigin (NotOrigin, Origin), succWithOrigin) import Ouroboros.Consensus.Genesis.Governor (DensityBounds (..), @@ -42,6 +43,7 @@ import qualified Ouroboros.Consensus.Storage.ChainDB.Impl as ChainDB import Ouroboros.Consensus.Storage.ChainDB.Impl.Types (TraceAddBlockEvent (..)) import Ouroboros.Consensus.Util.Condense (condense) +import Ouroboros.Consensus.Util.Enclose import Ouroboros.Consensus.Util.IOLike (IOLike, MonadMonotonicTime, Time (Time), atomically, getMonotonicTime, readTVarIO, uncheckedNewTVarM, writeTVar) @@ -49,6 +51,9 @@ import Ouroboros.Network.AnchoredFragment (AnchoredFragment, headPoint) import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.Block (SlotNo (SlotNo), Tip, castPoint) +import Ouroboros.Network.Driver.Simple (TraceSendRecv (..)) +import Ouroboros.Network.Protocol.ChainSync.Type (ChainSync, + Message (..)) import Test.Consensus.PointSchedule.NodeState (NodeState) import Test.Consensus.PointSchedule.Peers (Peer (Peer), PeerId) import Test.Util.TersePrinting (terseAnchor, terseBlock, @@ -130,6 +135,7 @@ data TraceEvent blk | TraceChainSyncClientTerminationEvent PeerId TraceChainSyncClientTerminationEvent | TraceBlockFetchClientTerminationEvent PeerId TraceBlockFetchClientTerminationEvent | TraceGenesisDDEvent (TraceGDDEvent PeerId blk) + | TraceChainSyncSendRecvEvent PeerId String (TraceSendRecv (ChainSync (Header blk) (Point blk) (Tip blk))) | TraceOther String -- * 'TestBlock'-specific tracers for the peer simulator @@ -182,6 +188,7 @@ traceEventTestBlockWith setTickTime tracer0 tracer = \case TraceChainSyncClientTerminationEvent peerId traceEvent -> traceChainSyncClientTerminationEventTestBlockWith peerId tracer traceEvent TraceBlockFetchClientTerminationEvent peerId traceEvent -> traceBlockFetchClientTerminationEventTestBlockWith peerId tracer traceEvent TraceGenesisDDEvent gddEvent -> traceWith tracer (terseGDDEvent gddEvent) + TraceChainSyncSendRecvEvent peerId peerType traceEvent -> traceChainSyncSendRecvEventTestBlockWith peerId peerType tracer traceEvent TraceOther msg -> traceWith tracer msg traceSchedulerEventTestBlockWith :: @@ -191,7 +198,7 @@ traceSchedulerEventTestBlockWith :: Tracer m String -> TraceSchedulerEvent TestBlock -> m () -traceSchedulerEventTestBlockWith setTickTime tracer0 _tracer = \case +traceSchedulerEventTestBlockWith setTickTime tracer0 tracer = \case TraceBeginningOfTime -> traceWith tracer0 "Running point schedule ..." TraceEndOfTime -> @@ -222,13 +229,13 @@ traceSchedulerEventTestBlockWith setTickTime tracer0 _tracer = \case " jumping states:\n" ++ traceJumpingStates jumpingStates ] TraceNodeShutdownStart immTip -> - traceWith tracer0 (" Initiating node shutdown with immutable tip at slot " ++ condense immTip) + traceWith tracer (" Initiating node shutdown with immutable tip at slot " ++ condense immTip) TraceNodeShutdownComplete -> - traceWith tracer0 " Node shutdown complete" + traceWith tracer " Node shutdown complete" TraceNodeStartupStart -> - traceWith tracer0 " Initiating node startup" + traceWith tracer " Initiating node startup" TraceNodeStartupComplete selection -> - traceWith tracer0 (" Node startup complete with selection " ++ terseHFragment selection) + traceWith tracer (" Node startup complete with selection " ++ terseHFragment selection) where traceJumpingStates :: [(PeerId, ChainSyncJumpingState m TestBlock)] -> String @@ -238,7 +245,7 @@ traceSchedulerEventTestBlockWith setTickTime tracer0 _tracer = \case traceJumpingState = \case Dynamo initState lastJump -> let showInitState = case initState of - DynamoStarting ji -> terseJumpInfo ji + DynamoStarting ji -> "(DynamoStarting " ++ terseJumpInfo ji ++ ")" DynamoStarted -> "DynamoStarted" in unwords ["Dynamo", showInitState, terseWithOrigin show lastJump] Objector initState goodJumpInfo badPoint -> unwords @@ -370,6 +377,10 @@ traceChainDBEventTestBlockWith tracer = \case AddedReprocessLoEBlocksToQueue -> trace $ "Requested ChainSel run" _ -> pure () + ChainDB.TraceChainSelStarvationEvent (ChainDB.ChainSelStarvation RisingEdge) -> + trace "ChainSel starvation started" + ChainDB.TraceChainSelStarvationEvent (ChainDB.ChainSelStarvation (FallingEdgeWith pt)) -> + trace $ "ChainSel starvation ended thanks to " ++ terseRealPoint pt _ -> pure () where trace = traceUnitWith tracer "ChainDB" @@ -415,6 +426,8 @@ traceChainSyncClientEventTestBlockWith pid tracer = \case trace "Waiting for next instruction from the jumping governor" TraceJumpingInstructionIs instr -> trace $ "Received instruction: " ++ showInstr instr + TraceDrainingThePipe n -> + trace $ "Draining the pipe, remaining messages: " ++ show n where trace = traceUnitWith tracer ("ChainSyncClient " ++ condense pid) @@ -458,6 +471,33 @@ traceBlockFetchClientTerminationEventTestBlockWith pid tracer = \case where trace = traceUnitWith tracer ("BlockFetchClient " ++ condense pid) +-- | Trace all the SendRecv events of the ChainSync mini-protocol. +traceChainSyncSendRecvEventTestBlockWith :: + Applicative m => + PeerId -> + String -> + Tracer m String -> + TraceSendRecv (ChainSync (Header TestBlock) (Point TestBlock) (Tip TestBlock)) -> + m () +traceChainSyncSendRecvEventTestBlockWith pid ptp tracer = \case + TraceSendMsg amsg -> traceMsg "send" amsg + TraceRecvMsg amsg -> traceMsg "recv" amsg + where + -- This can be very verbose and is only useful in rare situations, so it + -- does nothing by default. + -- trace = traceUnitWith tracer ("ChainSync " ++ condense pid) . ((ptp ++ " ") ++) + trace = (\_ _ _ -> const (pure ())) pid ptp tracer + traceMsg kd amsg = trace $ kd ++ " " ++ case amsg of + AnyMessage msg -> case msg of + MsgRequestNext -> "MsgRequestNext" + MsgAwaitReply -> "MsgAwaitReply" + MsgRollForward header tip -> "MsgRollForward " ++ terseHeader header ++ " " ++ terseTip tip + MsgRollBackward point tip -> "MsgRollBackward " ++ tersePoint point ++ " " ++ terseTip tip + MsgFindIntersect points -> "MsgFindIntersect [" ++ unwords (map tersePoint points) ++ "]" + MsgIntersectFound point tip -> "MsgIntersectFound " ++ tersePoint point ++ " " ++ terseTip tip + MsgIntersectNotFound tip -> "MsgIntersectNotFound " ++ terseTip tip + MsgDone -> "MsgDone" + prettyDensityBounds :: [(PeerId, DensityBounds TestBlock)] -> [String] prettyDensityBounds bounds = showPeers (second showBounds <$> bounds) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs index 9651884ae3..179efa1f3b 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs @@ -37,7 +37,6 @@ module Test.Consensus.PointSchedule ( , ensureScheduleDuration , genesisNodeState , longRangeAttack - , mkPointSchedule , peerSchedulesBlocks , peerStates , peersStates @@ -53,6 +52,7 @@ import Control.Monad (replicateM) import Control.Monad.Class.MonadTime.SI (Time (Time), addTime, diffTime) import Control.Monad.ST (ST) +import Data.Bifunctor (first) import Data.Functor (($>)) import Data.List (mapAccumL, partition, scanl') import qualified Data.Map.Strict as Map @@ -77,8 +77,8 @@ import Test.Consensus.BlockTree (BlockTree (..), BlockTreeBranch (..), import Test.Consensus.PeerSimulator.StateView (StateView) import Test.Consensus.PointSchedule.NodeState (NodeState (..), genesisNodeState) -import Test.Consensus.PointSchedule.Peers (Peer (..), Peers (..), - peers', peersList) +import Test.Consensus.PointSchedule.Peers (Peer (..), PeerId, + Peers (..), getPeerIds, peers', peersList) import Test.Consensus.PointSchedule.SinglePeer (IsTrunk (IsBranch, IsTrunk), PeerScheduleParams (..), SchedulePoint (..), defaultPeerScheduleParams, mergeOn, @@ -97,21 +97,24 @@ prettyPointSchedule :: (CondenseList (NodeState blk)) => PointSchedule blk -> [String] -prettyPointSchedule peers = - [ "honest peers: " ++ show (Map.size (honestPeers $ psSchedule peers)) - , "adversaries: " ++ show (Map.size (adversarialPeers $ psSchedule peers)) - , "minimal duration: " ++ show (psMinEndTime peers) - ] ++ - zipWith3 - (\number time peerState -> - number ++ ": " ++ peerState ++ " @ " ++ time - ) - (condenseListWithPadding PadLeft $ fst <$> numberedPeersStates) - (showDT . fst . snd <$> numberedPeersStates) - (condenseList $ (snd . snd) <$> numberedPeersStates) +prettyPointSchedule ps@PointSchedule {psStartOrder, psMinEndTime} = + [] + ++ [ "psSchedule =" + ] + ++ ( zipWith3 + ( \number time peerState -> + " " ++ number ++ ": " ++ peerState ++ " @ " ++ time + ) + (condenseListWithPadding PadLeft $ fst <$> numberedPeersStates) + (showDT . fst . snd <$> numberedPeersStates) + (condenseList $ (snd . snd) <$> numberedPeersStates) + ) + ++ [ "psStartOrder = " ++ show psStartOrder, + "psMinEndTime = " ++ show psMinEndTime + ] where numberedPeersStates :: [(Int, (Time, Peer (NodeState blk)))] - numberedPeersStates = zip [0..] (peersStates peers) + numberedPeersStates = zip [0 ..] (peersStates ps) showDT :: Time -> String showDT (Time dt) = printf "%.6f" (realToFrac dt :: Double) @@ -125,12 +128,6 @@ prettyPointSchedule peers = -- Accumulates the new points in each tick into the previous state, starting with a set of all -- 'Origin' points. -- --- Also shifts all tick start times so that the first tip point is announced at the very beginning --- of the test, keeping the relative delays of the schedule intact. --- This is a preliminary measure to make the long range attack test work, since that relies on the --- honest node sending headers later than the adversary, which is not possible if the adversary's --- first tip point is delayed by 20 or more seconds due to being in a later slot. --- -- Finally, drops the first state, since all points being 'Origin' (in particular the tip) has no -- useful effects in the simulator, but it could set the tip in the GDD governor to 'Origin', which -- causes slow nodes to be disconnected right away. @@ -138,14 +135,8 @@ prettyPointSchedule peers = -- TODO Remove dropping the first state in favor of better GDD logic peerStates :: Peer (PeerSchedule blk) -> [(Time, Peer (NodeState blk))] peerStates Peer {name, value = schedulePoints} = - drop 1 (zip (Time 0 : (map shiftTime times)) (Peer name <$> scanl' modPoint genesisNodeState points)) + drop 1 (zip (Time 0 : times) (Peer name <$> scanl' modPoint genesisNodeState points)) where - shiftTime :: Time -> Time - shiftTime t = addTime (- firstTipOffset) t - - firstTipOffset :: DiffTime - firstTipOffset = case times of [] -> 0; (Time dt : _) -> dt - modPoint z = \case ScheduleTipPoint nsTip -> z {nsTip} ScheduleHeaderPoint nsHeader -> z {nsHeader} @@ -177,15 +168,17 @@ peerScheduleBlocks = mapMaybe (withOriginToMaybe . schedulePointToBlock . snd) data PointSchedule blk = PointSchedule { -- | The actual point schedule psSchedule :: Peers (PeerSchedule blk), + -- | The order in which the peers start and connect to the node under test. + -- The peers that are absent from 'psSchedule' are ignored; the peers from + -- 'psSchedule' that are absent of 'psStartOrder' are started in the end in + -- the order of 'PeerId'. + psStartOrder :: [PeerId], -- | Minimum duration for the simulation of this point schedule. -- If no point in the schedule is larger than 'psMinEndTime', -- the simulation will still run until this time is reached. psMinEndTime :: Time } -mkPointSchedule :: Peers (PeerSchedule blk) -> PointSchedule blk -mkPointSchedule sch = PointSchedule sch $ Time 0 - -- | List of all blocks appearing in the schedules. peerSchedulesBlocks :: Peers (PeerSchedule blk) -> [blk] peerSchedulesBlocks = concatMap (peerScheduleBlocks . value) . peersList @@ -208,7 +201,11 @@ longRangeAttack :: longRangeAttack BlockTree {btTrunk, btBranches = [branch]} g = do honest <- peerScheduleFromTipPoints g honParams [(IsTrunk, [AF.length btTrunk - 1])] btTrunk [] adv <- peerScheduleFromTipPoints g advParams [(IsBranch, [AF.length (btbFull branch) - 1])] btTrunk [btbFull branch] - pure $ mkPointSchedule $ peers' [honest] [adv] + pure $ shiftPointSchedule $ PointSchedule { + psSchedule = peers' [honest] [adv], + psStartOrder = [], + psMinEndTime = Time 0 + } where honParams = defaultPeerScheduleParams {pspHeaderDelayInterval = (0.3, 0.4)} advParams = defaultPeerScheduleParams {pspTipDelayInterval = (0, 0.1)} @@ -229,9 +226,33 @@ uniformPoints :: BlockTree blk -> g -> m (PointSchedule blk) -uniformPoints PointsGeneratorParams {pgpExtraHonestPeers, pgpDowntime} = case pgpDowntime of - NoDowntime -> uniformPointsWithExtraHonestPeers pgpExtraHonestPeers - DowntimeWithSecurityParam k -> uniformPointsWithExtraHonestPeersAndDowntime pgpExtraHonestPeers k +uniformPoints PointsGeneratorParams {pgpExtraHonestPeers, pgpDowntime} bt = + fmap shiftPointSchedule . case pgpDowntime of + NoDowntime -> + uniformPointsWithExtraHonestPeers pgpExtraHonestPeers bt + DowntimeWithSecurityParam k -> + uniformPointsWithExtraHonestPeersAndDowntime pgpExtraHonestPeers k bt + +-- | Shifts all tick start times so that the first tip point is announced at +-- the very beginning of the test, keeping the relative delays of the schedule +-- intact. +-- +-- This is a measure to make the long range attack test work, since that +-- relies on the honest node sending headers later than the adversary, which +-- is not possible if the adversary's first tip point is delayed by 20 or +-- more seconds due to being in a later slot. +shiftPointSchedule :: PointSchedule blk -> PointSchedule blk +shiftPointSchedule s = s {psSchedule = shiftPeerSchedule <$> psSchedule s} + where + shiftPeerSchedule :: PeerSchedule blk -> PeerSchedule blk + shiftPeerSchedule times = map (first shiftTime) times + where + shiftTime :: Time -> Time + shiftTime t = addTime (- firstTipOffset) t + + firstTipOffset :: DiffTime + firstTipOffset = case times of [] -> 0; ((Time dt, _) : _) -> dt + -- | Generate a schedule in which the trunk is served by @pgpExtraHonestPeers + 1@ peers, -- and extra branches are served by one peer each, using a single tip point, @@ -240,6 +261,7 @@ uniformPoints PointsGeneratorParams {pgpExtraHonestPeers, pgpDowntime} = case pg -- Include rollbacks in a percentage of adversaries, in which case that peer uses two branchs. -- uniformPointsWithExtraHonestPeers :: + forall g m blk. (StatefulGen g m, AF.HasHeader blk) => Int -> BlockTree blk -> @@ -254,7 +276,9 @@ uniformPointsWithExtraHonestPeers honests <- replicateM (extraHonestPeers + 1) $ mkSchedule [(IsTrunk, [honestTip0 .. AF.length btTrunk - 1])] [] advs <- takeBranches btBranches - pure $ mkPointSchedule $ peers' honests advs + let psSchedule = peers' honests advs + psStartOrder <- shuffle (getPeerIds psSchedule) + pure $ PointSchedule {psSchedule, psStartOrder, psMinEndTime = Time 0} where takeBranches = \case [] -> pure [] @@ -305,6 +329,15 @@ uniformPointsWithExtraHonestPeers rollbackProb = 0.2 + -- Inefficient implementation, but sufficient for small lists. + shuffle :: [a] -> m [a] + shuffle [] = pure [] + shuffle xs = do + i <- Random.uniformRM (0, length xs - 1) g + let x = xs !! i + xs' = take i xs ++ drop (i+1) xs + (x :) <$> shuffle xs' + minusClamp :: (Ord a, Num a) => a -> a -> a minusClamp a b | a <= b = 0 | otherwise = a - b @@ -361,6 +394,7 @@ syncTips honests advs = -- -- Includes rollbacks in some schedules. uniformPointsWithExtraHonestPeersAndDowntime :: + forall g m blk. (StatefulGen g m, AF.HasHeader blk) => Int -> SecurityParam -> @@ -383,7 +417,9 @@ uniformPointsWithExtraHonestPeersAndDowntime mkSchedule [(IsTrunk, [honestTip0, minusClamp (AF.length btTrunk) 1])] [] advs <- takeBranches pauseSlot btBranches let (honests', advs') = syncTips honests advs - pure $ mkPointSchedule $ peers' honests' advs' + psSchedule = peers' honests' advs' + psStartOrder <- shuffle $ getPeerIds psSchedule + pure $ PointSchedule {psSchedule, psStartOrder, psMinEndTime = Time 0} where takeBranches pause = \case [] -> pure [] @@ -438,6 +474,15 @@ uniformPointsWithExtraHonestPeersAndDowntime rollbackProb = 0.2 + -- Inefficient implementation, but sufficient for small lists. + shuffle :: [a] -> m [a] + shuffle [] = pure [] + shuffle xs = do + i <- Random.uniformRM (0, length xs - 1) g + let x = xs !! i + xs' = take i xs ++ drop (i+1) xs + (x :) <$> shuffle xs' + newtype ForecastRange = ForecastRange { unForecastRange :: Word64 } deriving (Show) @@ -545,19 +590,22 @@ stToGen gen = do pure (runSTGen_ seed gen) ensureScheduleDuration :: GenesisTest blk a -> PointSchedule blk -> PointSchedule blk -ensureScheduleDuration gt PointSchedule{psSchedule, psMinEndTime} = +ensureScheduleDuration gt PointSchedule{psSchedule, psStartOrder, psMinEndTime} = PointSchedule { psSchedule + , psStartOrder , psMinEndTime = max psMinEndTime (Time endingDelay) } where endingDelay = let cst = gtChainSyncTimeouts gt bft = gtBlockFetchTimeouts gt - in 1 + fromIntegral peerCount * maximum (0 : catMaybes + bfGracePeriodDelay = fromIntegral adversaryCount * 10 + in 1 + bfGracePeriodDelay + fromIntegral peerCount * maximum (0 : catMaybes [ canAwaitTimeout cst , intersectTimeout cst , busyTimeout bft , streamingTimeout bft ]) peerCount = length (peersList psSchedule) + adversaryCount = Map.size (adversarialPeers psSchedule) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs index 26e2342492..d8a31e8125 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs @@ -32,6 +32,7 @@ module Test.Consensus.PointSchedule.Peers ( , peersFromPeerIdList' , peersFromPeerList , peersList + , peersOnlyAdversary , peersOnlyHonest , toMap , toMap' @@ -147,6 +148,13 @@ peersOnlyHonest value = adversarialPeers = Map.empty } +peersOnlyAdversary :: a -> Peers a +peersOnlyAdversary value = + Peers + { adversarialPeers = Map.singleton 1 value, + honestPeers = Map.empty + } + -- | Extract all 'PeerId's. getPeerIds :: Peers a -> [PeerId] getPeerIds Peers {honestPeers, adversarialPeers} = diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking.hs index 89d337dd06..7443b0a50b 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking.hs @@ -41,7 +41,7 @@ shrinkPeerSchedules :: StateView TestBlock -> [GenesisTestFull TestBlock] shrinkPeerSchedules genesisTest@GenesisTest{gtBlockTree, gtSchedule} _stateView = - let PointSchedule {psSchedule} = gtSchedule + let PointSchedule {psSchedule, psStartOrder} = gtSchedule simulationDuration = duration gtSchedule trimmedBlockTree sch = trimBlockTree' sch gtBlockTree shrunkAdversarialPeers = @@ -50,6 +50,7 @@ shrinkPeerSchedules genesisTest@GenesisTest{gtBlockTree, gtSchedule} _stateView genesisTest { gtSchedule = PointSchedule { psSchedule = shrunkSchedule + , psStartOrder , psMinEndTime = simulationDuration } , gtBlockTree = trimmedBlockTree shrunkSchedule @@ -61,6 +62,7 @@ shrinkPeerSchedules genesisTest@GenesisTest{gtBlockTree, gtSchedule} _stateView <&> \shrunkSchedule -> genesisTest { gtSchedule = PointSchedule { psSchedule = shrunkSchedule + , psStartOrder , psMinEndTime = simulationDuration } } @@ -81,6 +83,7 @@ shrinkByRemovingAdversaries genesisTest@GenesisTest{gtSchedule, gtBlockTree} _st in genesisTest { gtSchedule = PointSchedule { psSchedule = shrunkSchedule + , psStartOrder = psStartOrder gtSchedule , psMinEndTime = simulationDuration } , gtBlockTree = trimmedBlockTree diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking/Tests.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking/Tests.hs index b3ce2a7ac1..8b07cf4d63 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking/Tests.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking/Tests.hs @@ -82,14 +82,14 @@ checkShrinkProperty :: (Peers (PeerSchedule TestBlock) -> Peers (PeerSchedule Te checkShrinkProperty prop = forAllBlind (genChains (choose (1, 4)) >>= genUniformSchedulePoints) - (\sch@PointSchedule{psSchedule, psMinEndTime} -> + (\sch@PointSchedule{psSchedule, psStartOrder, psMinEndTime} -> conjoin $ map (\shrunk -> counterexample ( "Original schedule:\n" ++ unlines (map (" " ++) $ prettyPointSchedule sch) ++ "\nShrunk schedule:\n" - ++ unlines (map (" " ++) $ prettyPointSchedule $ PointSchedule shrunk psMinEndTime) + ++ unlines (map (" " ++) $ prettyPointSchedule $ PointSchedule {psSchedule = shrunk, psStartOrder, psMinEndTime}) ) (prop psSchedule shrunk) ) diff --git a/ouroboros-consensus/changelog.d/20240807_095933_alexander.esgen_milestone_1.md b/ouroboros-consensus/changelog.d/20240807_095933_alexander.esgen_milestone_1.md new file mode 100644 index 0000000000..bca96bf61b --- /dev/null +++ b/ouroboros-consensus/changelog.d/20240807_095933_alexander.esgen_milestone_1.md @@ -0,0 +1,11 @@ +### Breaking + +- Integrated new bulk sync BlockFetch logic. + +- CSJ: implemented rotation of dynamos. + +- ChainDB: let the BlockFetch client add blocks asynchronously + +- GDD: added rate limit + +- Tweaked certain edge cases in the GDD and ChainSync client ([#1179](https://github.com/IntersectMBO/ouroboros-consensus/pull/1179)) diff --git a/ouroboros-consensus/ouroboros-consensus.cabal b/ouroboros-consensus/ouroboros-consensus.cabal index 2a5d2e6cda..360c4265f1 100644 --- a/ouroboros-consensus/ouroboros-consensus.cabal +++ b/ouroboros-consensus/ouroboros-consensus.cabal @@ -292,6 +292,7 @@ library io-classes ^>=1.5, measures, mtl, + multiset ^>=0.3, nothunks ^>=0.2, ouroboros-network-api ^>=0.11, ouroboros-network-mock ^>=0.1, diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs index a1cd17a3d1..c6c47b2e46 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs @@ -43,7 +43,7 @@ import Data.List.NonEmpty (NonEmpty) import qualified Data.List.NonEmpty as NE import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map -import Data.Maybe (mapMaybe, maybeToList) +import Data.Maybe (maybeToList) import Data.Maybe.Strict (StrictMaybe) import Data.Word (Word64) import Ouroboros.Consensus.Block @@ -86,6 +86,9 @@ gddWatcher :: => TopLevelConfig blk -> Tracer m (TraceGDDEvent peer blk) -> ChainDB m blk + -> DiffTime -- ^ How often to evaluate GDD. 0 means as soon as possible. + -- Otherwise, no faster than once every T seconds, where T is + -- the provided value. -> STM m GsmState -> STM m (Map peer (ChainSyncClientHandle m blk)) -- ^ The ChainSync handles. We trigger the GDD whenever our 'GsmState' @@ -98,7 +101,7 @@ gddWatcher :: -> Watcher m (GsmState, GDDStateView m blk peer) (Map peer (StrictMaybe (WithOrigin SlotNo), Bool)) -gddWatcher cfg tracer chainDb getGsmState getHandles varLoEFrag = +gddWatcher cfg tracer chainDb rateLimit getGsmState getHandles varLoEFrag = Watcher { wInitial = Nothing , wReader = (,) <$> getGsmState <*> getGDDStateView @@ -140,12 +143,17 @@ gddWatcher cfg tracer chainDb getGsmState getHandles varLoEFrag = wNotify :: (GsmState, GDDStateView m blk peer) -> m () wNotify (_gsmState, stateView) = do + t0 <- getMonotonicTime loeFrag <- evaluateGDD cfg tracer stateView oldLoEFrag <- atomically $ swapTVar varLoEFrag loeFrag -- The chain selection only depends on the LoE tip, so there -- is no point in retriggering it if the LoE tip hasn't changed. when (AF.headHash oldLoEFrag /= AF.headHash loeFrag) $ void $ ChainDB.triggerChainSelectionAsync chainDb + tf <- getMonotonicTime + -- We limit the rate at which GDD is evaluated, otherwise it would + -- be called every time a new header is validated. + threadDelay $ rateLimit - diffTime tf t0 -- | Pure snapshot of the dynamic data the GDD operates on. data GDDStateView m blk peer = GDDStateView { @@ -247,16 +255,41 @@ sharedCandidatePrefix curChain candidates = immutableTip = AF.anchorPoint curChain splitAfterImmutableTip (peer, frag) = - (,) peer . snd <$> AF.splitAfterPoint frag immutableTip + case AF.splitAfterPoint frag immutableTip of + -- When there is no intersection, we assume the candidate fragment is + -- empty and anchored at the immutable tip. + -- See Note [CSJ truncates the candidate fragments]. + Nothing -> (peer, AF.takeOldest 0 curChain) + Just (_, suffix) -> (peer, suffix) immutableTipSuffixes = - -- If a ChainSync client's candidate forks off before the - -- immutable tip, then this transaction is currently winning an - -- innocuous race versus the thread that will fatally raise - -- 'InvalidIntersection' within that ChainSync client, so it's - -- sound to pre-emptively discard their candidate from this - -- 'Map' via 'mapMaybe'. - mapMaybe splitAfterImmutableTip candidates + map splitAfterImmutableTip candidates + +-- Note [CSJ truncates the candidate fragments] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- Before CSJ, only rollback could cause truncation of a candidate fragment. +-- Truncation is a serious business to GDD because the LoE might have allowed +-- the selection to advance, based on the tips of the candidate fragments. +-- +-- Truncating a candidate fragment risks moving the LoE back, which could be +-- earlier than the anchor of the latest selection. When rollbacks where the +-- only mechanism to truncate, it was fine to ignore candidate fragments that +-- don't intersect with the current selection. This could only happen if the +-- peer is rolling back more than k blocks, which is dishonest behavior. +-- +-- With CSJ, however, the candidate fragments can recede without a rollback. +-- A former objector might be asked to jump back when it becomes a jumper again. +-- The jump point might still be a descendent of the immutable tip. But by the +-- time the jump is accepted, the immutable tip might have advanced, and the +-- candidate fragment of the otherwise honest peer might be ignored by GDD. +-- +-- Therefore, at the moment, when there is no intersection with the current +-- selection, the GDD assumes that the candidate fragment is empty and anchored +-- at the immutable tip. It is the job of the ChainSync client to update the +-- candidate fragment so it intersects with the selection or to disconnect the +-- peer if no such fragment can be established. +-- data DensityBounds blk = DensityBounds { @@ -357,11 +390,7 @@ densityDisconnect (GenesisWindow sgen) (SecurityParam k) states candidateSuffixe , upperBound = ub0 , hasBlockAfter = hasBlockAfter0 , idling = idling0 - }) -> - -- If the density is 0, the peer should be disconnected. This affects - -- ChainSync jumping, where genesis windows with no headers prevent jumps - -- from happening. - if ub0 == 0 then pure peer0 else do + }) -> do (_peer1, DensityBounds {clippedFragment = frag1, offersMoreThanK, lowerBound = lb1 }) <- densityBounds -- Don't disconnect peer0 if it sent no headers after the intersection yet @@ -369,8 +398,6 @@ densityDisconnect (GenesisWindow sgen) (SecurityParam k) states candidateSuffixe -- -- See Note [Chain disagreement] -- - -- Note: hasBlockAfter0 is False if frag0 is empty and ub0>0. - -- But we leave it here as a reminder that we care about it. guard $ idling0 || not (AF.null frag0) || hasBlockAfter0 -- ensure that the two peer fragments don't share any -- headers after the LoE diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs index b0d5f1cbb1..90a3ffe876 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs @@ -14,6 +14,7 @@ module Ouroboros.Consensus.MiniProtocol.BlockFetch.ClientInterface ( ) where import Control.Monad +import Control.Tracer (Tracer) import Data.Map.Strict (Map) import Data.Time.Clock (UTCTime) import GHC.Stack (HasCallStack) @@ -26,7 +27,12 @@ import qualified Ouroboros.Consensus.HardFork.Abstract as History import qualified Ouroboros.Consensus.HardFork.History as History import Ouroboros.Consensus.Ledger.Abstract import Ouroboros.Consensus.Ledger.Extended -import Ouroboros.Consensus.Storage.ChainDB.API (ChainDB) +import Ouroboros.Consensus.Ledger.SupportsProtocol + (LedgerSupportsProtocol) +import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client as CSClient +import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping as CSJumping +import Ouroboros.Consensus.Storage.ChainDB.API (AddBlockPromise, + ChainDB) import qualified Ouroboros.Consensus.Storage.ChainDB.API as ChainDB import Ouroboros.Consensus.Storage.ChainDB.API.Types.InvalidBlockPunishment (InvalidBlockPunishment) @@ -38,8 +44,10 @@ import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.Block (MaxSlotNo) import Ouroboros.Network.BlockFetch.ConsensusInterface - (BlockFetchConsensusInterface (..), FetchMode (..), - FromConsensus (..)) + (BlockFetchConsensusInterface (..), + ChainSelStarvation (..), FetchMode (..), + FromConsensus (..), PraosFetchMode (..), mkReadFetchMode) +import Ouroboros.Network.ConsensusMode (ConsensusMode) import Ouroboros.Network.PeerSelection.Bootstrap (UseBootstrapPeers, requiresBootstrapPeers) import Ouroboros.Network.PeerSelection.LedgerPeers.Type @@ -51,15 +59,17 @@ data ChainDbView m blk = ChainDbView { getCurrentChain :: STM m (AnchoredFragment (Header blk)) , getIsFetched :: STM m (Point blk -> Bool) , getMaxSlotNo :: STM m MaxSlotNo - , addBlockWaitWrittenToDisk :: InvalidBlockPunishment m -> blk -> m Bool + , addBlockAsync :: InvalidBlockPunishment m -> blk -> m (AddBlockPromise m blk) + , getChainSelStarvation :: STM m ChainSelStarvation } -defaultChainDbView :: IOLike m => ChainDB m blk -> ChainDbView m blk +defaultChainDbView :: ChainDB m blk -> ChainDbView m blk defaultChainDbView chainDB = ChainDbView { getCurrentChain = ChainDB.getCurrentChain chainDB , getIsFetched = ChainDB.getIsFetched chainDB , getMaxSlotNo = ChainDB.getMaxSlotNo chainDB - , addBlockWaitWrittenToDisk = ChainDB.addBlockWaitWrittenToDisk chainDB + , addBlockAsync = ChainDB.addBlockAsync chainDB + , getChainSelStarvation = ChainDB.getChainSelStarvation chainDB } -- | How to get the wall-clock time of a slot. Note that this is a very @@ -133,47 +143,53 @@ initSlotForgeTimeOracle cfg chainDB = do readFetchModeDefault :: (MonadSTM m, HasHeader blk) - => BlockchainTime m + => ConsensusMode + -> BlockchainTime m -> STM m (AnchoredFragment blk) -> STM m UseBootstrapPeers -> STM m LedgerStateJudgement -> STM m FetchMode -readFetchModeDefault btime getCurrentChain - getUseBootstrapPeers getLedgerStateJudgement = do - mCurSlot <- getCurrentSlot btime - usingBootstrapPeers <- requiresBootstrapPeers <$> getUseBootstrapPeers - <*> getLedgerStateJudgement +readFetchModeDefault consensusMode btime getCurrentChain + getUseBootstrapPeers getLedgerStateJudgement = + mkReadFetchMode consensusMode getLedgerStateJudgement praosFetchMode + where + praosFetchMode = do + mCurSlot <- getCurrentSlot btime + usingBootstrapPeers <- requiresBootstrapPeers <$> getUseBootstrapPeers + <*> getLedgerStateJudgement - -- This logic means that when the node is using bootstrap peers and is in - -- TooOld state it will always return BulkSync. Otherwise if the node - -- isn't using bootstrap peers (i.e. has them disabled it will use the old - -- logic of returning BulkSync if behind 1000 slots - case (usingBootstrapPeers, mCurSlot) of - (True, _) -> return FetchModeBulkSync - (False, CurrentSlotUnknown) -> return FetchModeBulkSync - (False, CurrentSlot curSlot) -> do - curChainSlot <- AF.headSlot <$> getCurrentChain - let slotsBehind = case curChainSlot of - -- There's nothing in the chain. If the current slot is 0, then - -- we're 1 slot behind. - Origin -> unSlotNo curSlot + 1 - NotOrigin slot -> unSlotNo curSlot - unSlotNo slot - maxSlotsBehind = 1000 - return $ if slotsBehind < maxSlotsBehind - -- When the current chain is near to "now", use deadline mode, - -- when it is far away, use bulk sync mode. - then FetchModeDeadline - else FetchModeBulkSync + -- This logic means that when the node is using bootstrap peers and is in + -- TooOld state it will always return BulkSync. Otherwise if the node + -- isn't using bootstrap peers (i.e. has them disabled it will use the old + -- logic of returning BulkSync if behind 1000 slots + case (usingBootstrapPeers, mCurSlot) of + (True, _) -> return FetchModeBulkSync + (False, CurrentSlotUnknown) -> return FetchModeBulkSync + (False, CurrentSlot curSlot) -> do + curChainSlot <- AF.headSlot <$> getCurrentChain + let slotsBehind = case curChainSlot of + -- There's nothing in the chain. If the current slot is 0, then + -- we're 1 slot behind. + Origin -> unSlotNo curSlot + 1 + NotOrigin slot -> unSlotNo curSlot - unSlotNo slot + maxSlotsBehind = 1000 + return $ if slotsBehind < maxSlotsBehind + -- When the current chain is near to "now", use deadline mode, + -- when it is far away, use bulk sync mode. + then FetchModeDeadline + else FetchModeBulkSync mkBlockFetchConsensusInterface :: forall m peer blk. ( IOLike m , BlockSupportsDiffusionPipelining blk - , BlockSupportsProtocol blk + , Ord peer + , LedgerSupportsProtocol blk ) - => BlockConfig blk + => Tracer m (CSJumping.TraceEvent peer) + -> BlockConfig blk -> ChainDbView m blk - -> STM m (Map peer (AnchoredFragment (Header blk))) + -> CSClient.ChainSyncClientHandleCollection peer m blk -> (Header blk -> SizeInBytes) -> SlotForgeTimeOracle m blk -- ^ Slot forge time, see 'headerForgeUTCTime' and 'blockForgeUTCTime'. @@ -182,9 +198,12 @@ mkBlockFetchConsensusInterface :: -> DiffusionPipeliningSupport -> BlockFetchConsensusInterface peer (Header blk) blk m mkBlockFetchConsensusInterface - bcfg chainDB getCandidates blockFetchSize slotForgeTime readFetchMode pipelining = + csjTracer bcfg chainDB csHandlesCol blockFetchSize slotForgeTime readFetchMode pipelining = BlockFetchConsensusInterface {..} where + getCandidates :: STM m (Map peer (AnchoredFragment (Header blk))) + getCandidates = CSClient.viewChainSyncState (CSClient.cschcMap csHandlesCol) CSClient.csCandidate + blockMatchesHeader :: Header blk -> blk -> Bool blockMatchesHeader = Block.blockMatchesHeader @@ -204,8 +223,8 @@ mkBlockFetchConsensusInterface pipeliningPunishment <- InvalidBlockPunishment.mkForDiffusionPipelining pure $ mkAddFetchedBlock_ pipeliningPunishment pipelining - -- Waits until the block has been written to disk, but not until chain - -- selection has processed the block. + -- Hand over the block to the ChainDB, but don't wait until it has been + -- written to disk or processed. mkAddFetchedBlock_ :: ( BlockConfig blk -> Header blk @@ -249,7 +268,7 @@ mkBlockFetchConsensusInterface DiffusionPipeliningOff -> disconnect DiffusionPipeliningOn -> pipeliningPunishment bcfg (getHeader blk) disconnect - addBlockWaitWrittenToDisk + addBlockAsync chainDB punishment blk @@ -329,3 +348,8 @@ mkBlockFetchConsensusInterface headerForgeUTCTime = slotForgeTime . headerRealPoint . unFromConsensus blockForgeUTCTime = slotForgeTime . blockRealPoint . unFromConsensus + + readChainSelStarvation = getChainSelStarvation chainDB + + demoteChainSyncJumpingDynamo :: peer -> m () + demoteChainSyncJumpingDynamo = CSJumping.rotateDynamo csjTracer csHandlesCol diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs index ae0edd3420..d9a70817d9 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs @@ -63,10 +63,12 @@ module Ouroboros.Consensus.MiniProtocol.ChainSync.Client ( , TraceChainSyncClientEvent (..) -- * State shared with other components , ChainSyncClientHandle (..) + , ChainSyncClientHandleCollection (..) , ChainSyncState (..) , ChainSyncStateView (..) , Jumping.noJumping , chainSyncStateFor + , newChainSyncClientHandleCollection , noIdling , noLoPBucket , viewChainSyncState @@ -116,7 +118,8 @@ import Ouroboros.Consensus.Protocol.Abstract import Ouroboros.Consensus.Storage.ChainDB (ChainDB) import qualified Ouroboros.Consensus.Storage.ChainDB as ChainDB import Ouroboros.Consensus.Util -import Ouroboros.Consensus.Util.AnchoredFragment (cross) +import Ouroboros.Consensus.Util.AnchoredFragment (cross, + preferAnchoredCandidate) import Ouroboros.Consensus.Util.Assert (assertWithMsg) import Ouroboros.Consensus.Util.EarlyExit (WithEarlyExit, exitEarly) import qualified Ouroboros.Consensus.Util.EarlyExit as EarlyExit @@ -165,7 +168,7 @@ data ChainSyncLoPBucketEnabledConfig = ChainSyncLoPBucketEnabledConfig { csbcCapacity :: Integer, -- | The rate of the bucket (think tokens per second). csbcRate :: Rational - } + } deriving stock (Eq, Generic, Show) -- | Configuration of the leaky bucket. data ChainSyncLoPBucketConfig @@ -176,6 +179,7 @@ data ChainSyncLoPBucketConfig | -- | Enable the leaky bucket. ChainSyncLoPBucketEnabled ChainSyncLoPBucketEnabledConfig + deriving stock (Eq, Generic, Show) -- | Configuration of ChainSync Jumping data CSJConfig @@ -186,6 +190,7 @@ data CSJConfig | -- | Enable ChainSync Jumping CSJEnabled CSJEnabledConfig + deriving stock (Eq, Generic, Show) newtype CSJEnabledConfig = CSJEnabledConfig { -- | The _ideal_ size for ChainSync jumps. Note that the algorithm @@ -205,7 +210,7 @@ newtype CSJEnabledConfig = CSJEnabledConfig { -- window has a higher change that dishonest peers can delay syncing by a -- small margin (around 2 minutes per dishonest peer with mainnet parameters). csjcJumpSize :: SlotNo -} +} deriving stock (Eq, Generic, Show) defaultChainDbView :: (IOLike m, LedgerSupportsProtocol blk) @@ -231,11 +236,11 @@ newtype Our a = Our { unOur :: a } -- data from 'ChainSyncState'. viewChainSyncState :: IOLike m => - StrictTVar m (Map peer (ChainSyncClientHandle m blk)) -> + STM m (Map peer (ChainSyncClientHandle m blk)) -> (ChainSyncState blk -> a) -> STM m (Map peer a) -viewChainSyncState varHandles f = - Map.map f <$> (traverse (readTVar . cschState) =<< readTVar varHandles) +viewChainSyncState readHandles f = + Map.map f <$> (traverse (readTVar . cschState) =<< readHandles) -- | Convenience function for reading the 'ChainSyncState' for a single peer -- from a nested set of TVars. @@ -329,7 +334,7 @@ bracketChainSyncClient :: ) => Tracer m (TraceChainSyncClientEvent blk) -> ChainDbView m blk - -> StrictTVar m (Map peer (ChainSyncClientHandle m blk)) + -> ChainSyncClientHandleCollection peer m blk -- ^ The kill handle and states for each peer, we need the whole map because we -- (de)register nodes (@peer@). -> STM m GsmState @@ -404,8 +409,8 @@ bracketChainSyncClient insertHandle = atomicallyWithMonotonicTime $ \time -> do initialGsmState <- getGsmState updateLopBucketConfig lopBucket initialGsmState time - modifyTVar varHandles $ Map.insert peer handle - deleteHandle = atomically $ modifyTVar varHandles $ Map.delete peer + cschcAddHandle varHandles peer handle + deleteHandle = atomically $ cschcRemoveHandle varHandles peer bracket_ insertHandle deleteHandle $ f Jumping.noJumping withCSJCallbacks lopBucket csHandleState (CSJEnabled csjEnabledConfig) f = @@ -912,7 +917,9 @@ chainSyncClient cfgEnv dynEnv = Nat n' -> s -> m (Consensus (ClientPipelinedStIdle n') blk m) - go n s = case n of + go n s = do + traceWith tracer $ TraceDrainingThePipe n + case n of Zero -> continueWithState s m Succ n' -> return $ CollectResponse Nothing $ ClientStNext { recvMsgRollForward = \_hdr _tip -> go n' s @@ -1637,7 +1644,8 @@ checkKnownInvalid cfgEnv dynEnv intEnv hdr = case scrutinee of -- Finally, the client will block on the intersection a second time, if -- necessary, since it's possible for a ledger state to determine the slot's -- onset's timestamp without also determining the slot's 'LedgerView'. During --- this pause, the LoP bucket is paused. +-- this pause, the LoP bucket is paused. If we need to block and their fragment +-- is not preferrable to ours, we disconnect. checkTime :: forall m blk arrival judgment. ( IOLike m @@ -1746,10 +1754,43 @@ checkTime cfgEnv dynEnv intEnv = ) $ getPastLedger mostRecentIntersection case prj lst of - Nothing -> retry + Nothing -> do + checkPreferTheirsOverOurs kis' + retry Just ledgerView -> return $ return $ Intersects kis' ledgerView + -- Note [Candidate comparing beyond the forecast horizon] + -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + -- + -- When a header is beyond the forecast horizon and their fragment is not + -- preferrable to our selection (ourFrag), then we disconnect, as we will + -- never end up selecting it. + -- + -- In the context of Genesis, one can think of the candidate losing a + -- density comparison against the selection. See the Genesis documentation + -- for why this check is necessary. + -- + -- In particular, this means that we will disconnect from peers who offer us + -- a chain containing a slot gap larger than a forecast window. + checkPreferTheirsOverOurs :: KnownIntersectionState blk -> STM m () + checkPreferTheirsOverOurs kis + | -- Precondition is fulfilled as ourFrag and theirFrag intersect by + -- construction. + preferAnchoredCandidate (configBlock cfg) ourFrag theirFrag + = pure () + | otherwise + = throwSTM $ CandidateTooSparse + mostRecentIntersection + (ourTipFromChain ourFrag) + (theirTipFromChain theirFrag) + where + KnownIntersectionState { + mostRecentIntersection + , ourFrag + , theirFrag + } = kis + -- Returns 'Nothing' if the ledger state cannot forecast the ledger view -- that far into the future. projectLedgerView :: @@ -1934,6 +1975,12 @@ ourTipFromChain :: -> Our (Tip blk) ourTipFromChain = Our . AF.anchorToTip . AF.headAnchor +theirTipFromChain :: + HasHeader (Header blk) + => AnchoredFragment (Header blk) + -> Their (Tip blk) +theirTipFromChain = Their . AF.anchorToTip . AF.headAnchor + -- | A type-legos auxillary function used in 'readLedgerState'. castM :: Monad m => m (WithEarlyExit m x) -> WithEarlyExit m x castM = join . EarlyExit.lift @@ -2157,6 +2204,14 @@ data ChainSyncClientException = -- different from the previous argument. (ExtValidationError blk) -- ^ The upstream node's chain contained a block that we know is invalid. + | + forall blk. BlockSupportsProtocol blk => + CandidateTooSparse + (Point blk) -- ^ Intersection + (Our (Tip blk)) + (Their (Tip blk)) + -- ^ The upstream node's chain was so sparse that it was worse than our + -- selection despite being blocked on the forecast horizon. | InFutureHeaderExceedsClockSkew !InFutureCheck.HeaderArrivalException -- ^ A header arrived from the far future. @@ -2192,6 +2247,12 @@ instance Eq ChainSyncClientException where | Just Refl <- eqT @blk @blk' = (a, b, c) == (a', b', c') + (==) + (CandidateTooSparse (a :: Point blk ) b c ) + (CandidateTooSparse (a' :: Point blk') b' c') + | Just Refl <- eqT @blk @blk' + = (a, b, c) == (a', b', c') + (==) (InFutureHeaderExceedsClockSkew a ) (InFutureHeaderExceedsClockSkew a') @@ -2215,6 +2276,7 @@ instance Eq ChainSyncClientException where HeaderError{} == _ = False InvalidIntersection{} == _ = False InvalidBlock{} == _ = False + CandidateTooSparse{} == _ = False InFutureHeaderExceedsClockSkew{} == _ = False HistoricityError{} == _ = False EmptyBucket == _ = False @@ -2274,12 +2336,8 @@ data TraceChainSyncClientEvent blk = | TraceJumpingInstructionIs (Jumping.Instruction blk) -- ^ ChainSync Jumping -- the ChainSync client got its next instruction. - -deriving instance - ( BlockSupportsProtocol blk - , Eq (Header blk) - ) - => Eq (TraceChainSyncClientEvent blk) + | + forall n. TraceDrainingThePipe (Nat n) deriving instance ( BlockSupportsProtocol blk diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs index 4fe5f24a47..f38656498d 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs @@ -74,6 +74,21 @@ -- when the client should pause, download headers, or ask about agreement with -- a given point (jumping). See the 'Jumping' type for more details. -- +-- CSJ depends on the ChainSync client to disconnect dynamos that have an empty +-- genesis window after their intersection with the selection. This is necessary +-- because otherwise there are no points to jump to, and CSJ could would get +-- stuck when the dynamo blocks on the forecast horizon. See +-- Note [Candidate comparing beyond the forecast horizon] in +-- "Ouroboros.Consensus.MiniProtocol.ChainSync.Client". +-- +-- Interactions with the BlockFetch logic +-- -------------------------------------- +-- +-- When syncing, the BlockFetch logic might request to change the dynamo with +-- a call to 'rotateDynamo'. This is because the choice of dynamo influences +-- which peer is selected to download blocks. See the note "Interactions with +-- ChainSync Jumping" in "Ouroboros.Network.BlockFetch.Decision.BulkSync". +-- -- Interactions with the Limit on Patience -- --------------------------------------- -- @@ -100,27 +115,32 @@ -- -- > j ╔════════╗ -- > ╭────────── ║ Dynamo ║ ◀─────────╮ --- > │ ╚════════╝ │f --- > ▼ ▲ │ --- > ┌────────────┐ │ k ┌──────────┐ --- > │ Disengaged │ ◀───────────│────────── │ Objector │ --- > └────────────┘ ╭─────│────────── └──────────┘ --- > │ │ ▲ ▲ │ --- > g│ │e b │ │ │ --- > │ │ ╭─────╯ i│ │c --- > ╭╌╌╌╌╌╌╌▼╌╌╌╌╌╌╌╌╌╌╌╌╌│╌╌╌╌╌╌╌╌╌╌│╌▼╌╌╌╮ --- > ┆ ╔═══════╗ a ┌──────┐ d ┌─────┐ | --- > ┆ ║ Happy ║ ───▶ │ LFI* │ ───▶ │ FI* │ | --- > ┆ ╚═══════╝ ◀─╮ └──────┘ └─────┘ | --- > ┆ Jumper ╰─────┴────────────╯h | +-- > │ ╭─ ╚════════╝ │f +-- > ▼ │ ▲ │ +-- > ┌────────────┐ │ │ k ┌──────────┐ +-- > │ Disengaged │ ◀─│─────────│────────── │ Objector │ +-- > └────────────┘ │ ╭─────│────────── └──────────┘ +-- > │ │ │ ▲ ▲ │ +-- > l│ g│ │e b │ │ │ +-- > │ │ │ ╭─────╯ i│ │c +-- > ╭╌╌╌▼╌╌╌▼╌╌╌╌╌╌╌╌╌╌╌╌╌│╌╌╌╌╌╌╌╌╌╌│╌▼╌╌╌╮ +-- > ┆ ╔═══════╗ a ┌──────┐ d ┌─────┐ ┆ +-- > ┆ ║ Happy ║ ───▶ │ LFI* │ ───▶ │ FI* │ ┆ +-- > ┆ ╚═══════╝ ◀─╮ └──────┘ └─────┘ ┆ +-- > ┆ Jumper ╰─────┴────────────╯h ┆ -- > ╰╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╯ -- -- *: LookingForIntersection and FoundIntersection, abbreviated for this -- drawing only; this abbreviation will not be used elsewhere. -- +-- In the following walk-through, we will point to transitions in the drawing +-- between parentheses, like so: (a) (b+c) (e|f). We will use `+` to express +-- that both transitions happen simultaneously (for different peers) and `|` to +-- express a choice. +-- -- A new peer starts as the dynamo if there is no other peer or as a Happy -- jumper otherwise. The dynamo periodically requests jumps from happy --- jumpers who, in the ideal case, accept them. +-- jumpers who, in the ideal case, accept them and remain happy jumpers. -- -- In the event that a jumper rejects a jump, it goes from Happy to LFI* (a). -- From there starts a back-and-forth of intersection search messages until @@ -128,24 +148,34 @@ -- -- Once the exact point of disagreement is found, and if there is no objector -- yet, the jumper becomes the objector (b). If there is an objector, then we --- compare the intersections of the objector and the jumper. If the jumper's --- intersection is strictly older, then the jumper replaces the objector (b+c). +-- compare the intersection of the objector with the dynamo and the intersection +-- of the jumper with the dynamo. If the jumper's intersection is strictly +-- older, then the jumper replaces the objector, who is marked as FI* (b+c). -- Otherwise, the jumper is marked as FI* (d). -- -- If the dynamo disconnects or is disengaged, one peer is elected as the new --- dynamo (e|f) and all other peers revert to being happy jumpers (g+h). +-- dynamo (e|f) and all the other peers revert to being happy jumpers (g+h). -- -- If the objector disconnects or is disengaged, and there are FI* jumpers, then -- the one with the oldest intersection with the dynamo gets elected (i). +-- Otherwise, we are left with no dynamo. -- -- If the dynamo rolls back to a point older than the last jump it requested, it --- is disengaged (j) and a new dynamo is elected (e|f). +-- is disengaged (j), a new dynamo is elected (e|f), and all the other peers +-- revert to being happy jumpers (g+h). -- -- If the objector agrees with the dynamo, it is disengaged (k). If there are -- FI* jumpers, then one of them gets elected as the new objector (i). +-- Otherwise, we are left with no dynamo. -- --- If dynamo or objector claim to have no more headers, they are disengaged --- (j|k). +-- If the dynamo or the objector claim to have no more headers, they are +-- disengaged (j|k), triggering the same chain of effect as described in the two +-- previous points. +-- +-- The BlockFetch logic can ask to change the dynamo if it is not serving blocks +-- fast enough. If there are other non-disengaged peers, the dynamo (and the +-- objector if there is one, and all the other peers) is demoted to a happy +-- jumper (l+g+h) and a new dynamo is elected (e). -- module Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping ( Context @@ -154,20 +184,26 @@ module Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping ( , JumpInstruction (..) , JumpResult (..) , Jumping (..) + , TraceEvent (..) + , getDynamo , makeContext , mkJumping , noJumping , registerClient + , rotateDynamo , unregisterClient ) where import Cardano.Slotting.Slot (SlotNo (..), WithOrigin (..)) -import Control.Monad (forM, forM_, when) +import Control.Monad (forM, forM_, void, when) +import Control.Tracer (Tracer, traceWith) +import Data.Foldable (toList, traverse_) import Data.List (sortOn) -import Data.Map.Strict (Map) -import qualified Data.Map.Strict as Map +import qualified Data.Map as Map import Data.Maybe (catMaybes, fromMaybe) import Data.Maybe.Strict (StrictMaybe (..)) +import Data.Sequence.Strict (StrictSeq) +import qualified Data.Sequence.Strict as Seq import GHC.Generics (Generic) import Ouroboros.Consensus.Block (HasHeader (getHeaderFields), Header, Point (..), castPoint, pointSlot, succWithOrigin) @@ -175,6 +211,7 @@ import Ouroboros.Consensus.Ledger.SupportsProtocol (LedgerSupportsProtocol) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State (ChainSyncClientHandle (..), + ChainSyncClientHandleCollection (..), ChainSyncJumpingJumperState (..), ChainSyncJumpingState (..), ChainSyncState (..), DisengagedInitState (..), DynamoInitState (..), @@ -257,16 +294,16 @@ mkJumping peerContext = Jumping -- -- Invariants: -- --- - If 'handlesVar' is not empty, then there is exactly one dynamo in it. --- - There is at most one objector in 'handlesVar'. --- - If there exist 'FoundIntersection' jumpers in 'handlesVar', then there +-- - If 'handlesCol' is not empty, then there is exactly one dynamo in it. +-- - There is at most one objector in 'handlesCol'. +-- - If there exist 'FoundIntersection' jumpers in 'handlesCol', then there -- is an objector and the intersection of the objector with the dynamo is -- at least as old as the oldest intersection of the `FoundIntersection` jumpers -- with the dynamo. data ContextWith peerField handleField m peer blk = Context { peer :: !peerField, handle :: !handleField, - handlesVar :: !(StrictTVar m (Map peer (ChainSyncClientHandle m blk))), + handlesCol :: !(ChainSyncClientHandleCollection peer m blk), jumpSize :: !SlotNo } @@ -276,12 +313,12 @@ type Context = ContextWith () () -- | A peer-specific context for ChainSync jumping. This is a 'ContextWith' -- pointing on the handler of the peer in question. -- --- Invariant: The binding from 'peer' to 'handle' is present in 'handlesVar'. +-- Invariant: The binding from 'peer' to 'handle' is present in 'handlesCol'. type PeerContext m peer blk = ContextWith peer (ChainSyncClientHandle m blk) m peer blk makeContext :: MonadSTM m => - StrictTVar m (Map peer (ChainSyncClientHandle m blk)) -> + ChainSyncClientHandleCollection peer m blk -> SlotNo -> -- ^ The size of jumps, in number of slots. STM m (Context m peer blk) @@ -427,8 +464,8 @@ onRollForward context point = setJumps (Just jumpInfo) = do writeTVar (cschJumping (handle context)) $ Dynamo DynamoStarted $ pointSlot $ AF.headPoint $ jTheirFragment jumpInfo - handles <- readTVar (handlesVar context) - forM_ (Map.elems handles) $ \h -> + handles <- cschcSeq (handlesCol context) + forM_ handles $ \(_, h) -> readTVar (cschJumping h) >>= \case Jumper nextJumpVar Happy{} -> writeTVar nextJumpVar (Just jumpInfo) _ -> pure () @@ -459,7 +496,7 @@ onRollBackward context slot = Dynamo _ lastJumpSlot | slot < lastJumpSlot -> do disengage (handle context) - electNewDynamo (stripContext context) + void $ electNewDynamo (stripContext context) | otherwise -> pure () -- | This function is called when we receive a 'MsgAwaitReply' message. @@ -477,7 +514,7 @@ onAwaitReply context = readTVar (cschJumping (handle context)) >>= \case Dynamo{} -> do disengage (handle context) - electNewDynamo (stripContext context) + void $ electNewDynamo (stripContext context) Objector{} -> do disengage (handle context) electNewObjector (stripContext context) @@ -510,7 +547,7 @@ processJumpResult context jumpResult = updateChainSyncState (handle context) jumpInfo RejectedJump JumpToGoodPoint{} -> do startDisengaging (handle context) - electNewDynamo (stripContext context) + void $ electNewDynamo (stripContext context) -- Not interesting in the dynamo state AcceptedJump JumpTo{} -> pure () @@ -660,11 +697,11 @@ updateJumpInfo context jumpInfo = -- of the dynamo, or 'Nothing' if there is none. getDynamo :: (MonadSTM m) => - StrictTVar m (Map peer (ChainSyncClientHandle m blk)) -> - STM m (Maybe (ChainSyncClientHandle m blk)) -getDynamo handlesVar = do - handles <- Map.elems <$> readTVar handlesVar - findM (\handle -> isDynamo <$> readTVar (cschJumping handle)) handles + ChainSyncClientHandleCollection peer m blk -> + STM m (Maybe (peer, ChainSyncClientHandle m blk)) +getDynamo handlesCol = do + handles <- cschcSeq handlesCol + findM (\(_, handle) -> isDynamo <$> readTVar (cschJumping handle)) handles where isDynamo Dynamo{} = True isDynamo _ = False @@ -705,8 +742,7 @@ newJumper jumpInfo jumperState = do -- that peer. If there is no dynamo, the peer starts as dynamo; otherwise, it -- starts as a jumper. registerClient :: - ( Ord peer, - LedgerSupportsProtocol blk, + ( LedgerSupportsProtocol blk, IOLike m ) => Context m peer blk -> @@ -716,16 +752,16 @@ registerClient :: (StrictTVar m (ChainSyncJumpingState m blk) -> ChainSyncClientHandle m blk) -> STM m (PeerContext m peer blk) registerClient context peer csState mkHandle = do - csjState <- getDynamo (handlesVar context) >>= \case + csjState <- getDynamo (handlesCol context) >>= \case Nothing -> do fragment <- csCandidate <$> readTVar csState pure $ Dynamo DynamoStarted $ pointSlot $ AF.anchorPoint fragment - Just handle -> do + Just (_, handle) -> do mJustInfo <- readTVar (cschJumpInfo handle) newJumper mJustInfo (Happy FreshJumper Nothing) cschJumping <- newTVar csjState let handle = mkHandle cschJumping - modifyTVar (handlesVar context) $ Map.insert peer handle + cschcAddHandle (handlesCol context) peer handle pure $ context {peer, handle} -- | Unregister a client from a 'PeerContext'; this might trigger the election @@ -738,13 +774,60 @@ unregisterClient :: PeerContext m peer blk -> STM m () unregisterClient context = do - modifyTVar (handlesVar context) $ Map.delete (peer context) + cschcRemoveHandle (handlesCol context) (peer context) let context' = stripContext context readTVar (cschJumping (handle context)) >>= \case Disengaged{} -> pure () Jumper{} -> pure () Objector{} -> electNewObjector context' - Dynamo{} -> electNewDynamo context' + Dynamo{} -> void $ electNewDynamo context' + +-- | Elects a new dynamo by demoting the given dynamo (and the objector if there +-- is one) to a jumper, moving the peer to the end of the queue of chain sync +-- handles and electing a new dynamo. +-- +-- It does nothing if there is no other engaged peer to elect or if the given +-- peer is not the dynamo. +rotateDynamo :: + ( Ord peer, + LedgerSupportsProtocol blk, + MonadSTM m + ) => + Tracer m (TraceEvent peer) -> + ChainSyncClientHandleCollection peer m blk -> + peer -> + m () +rotateDynamo tracer handlesCol peer = do + traceEvent <- atomically $ do + handles <- cschcMap handlesCol + case handles Map.!? peer of + Nothing -> + -- Do not re-elect a dynamo if the peer has been disconnected. + pure Nothing + Just oldDynHandle -> + readTVar (cschJumping oldDynHandle) >>= \case + Dynamo{} -> do + cschcRotateHandle handlesCol peer + peerStates <- cschcSeq handlesCol + mEngaged <- findNonDisengaged peerStates + case mEngaged of + Nothing -> + -- There are no engaged peers. This case cannot happen, as the + -- dynamo is always engaged. + error "rotateDynamo: no engaged peer found" + Just (newDynamoId, newDynHandle) + | newDynamoId == peer -> + -- The old dynamo is the only engaged peer left. + pure Nothing + | otherwise -> do + newJumper Nothing (Happy FreshJumper Nothing) + >>= writeTVar (cschJumping oldDynHandle) + promoteToDynamo peerStates newDynamoId newDynHandle + pure $ Just $ RotatedDynamo peer newDynamoId + _ -> + -- Do not re-elect a dynamo if the peer is not the dynamo. + pure Nothing + traverse_ (traceWith tracer) traceEvent -- | Choose an unspecified new non-idling dynamo and demote all other peers to -- jumpers. @@ -754,49 +837,68 @@ electNewDynamo :: LedgerSupportsProtocol blk ) => Context m peer blk -> - STM m () + STM m (Maybe (peer, ChainSyncClientHandle m blk)) electNewDynamo context = do - peerStates <- Map.toList <$> readTVar (handlesVar context) + peerStates <- cschcSeq (handlesCol context) mDynamo <- findNonDisengaged peerStates case mDynamo of - Nothing -> pure () + Nothing -> pure Nothing Just (dynId, dynamo) -> do - fragment <- csCandidate <$> readTVar (cschState dynamo) - mJumpInfo <- readTVar (cschJumpInfo dynamo) - -- If there is no jump info, the dynamo must be just starting and - -- there is no need to set the intersection of the ChainSync server. - let dynamoInitState = maybe DynamoStarted DynamoStarting mJumpInfo - writeTVar (cschJumping dynamo) $ - Dynamo dynamoInitState $ pointSlot $ AF.headPoint fragment - -- Demote all other peers to jumpers - forM_ peerStates $ \(peer, st) -> - when (peer /= dynId) $ do - jumpingState <- readTVar (cschJumping st) - when (not (isDisengaged jumpingState)) $ - newJumper mJumpInfo (Happy FreshJumper Nothing) - >>= writeTVar (cschJumping st) - where - findNonDisengaged = - findM $ \(_, st) -> not . isDisengaged <$> readTVar (cschJumping st) - isDisengaged Disengaged{} = True - isDisengaged _ = False + promoteToDynamo peerStates dynId dynamo + pure $ Just (dynId, dynamo) + +-- | Promote the given peer to dynamo and demote all other peers to jumpers. +promoteToDynamo :: + ( MonadSTM m, + Eq peer, + LedgerSupportsProtocol blk + ) => + StrictSeq (peer, ChainSyncClientHandle m blk) -> + peer -> + ChainSyncClientHandle m blk -> + STM m () +promoteToDynamo peerStates dynId dynamo = do + fragment <- csCandidate <$> readTVar (cschState dynamo) + mJumpInfo <- readTVar (cschJumpInfo dynamo) + -- If there is no jump info, the dynamo must be just starting and + -- there is no need to set the intersection of the ChainSync server. + let dynamoInitState = maybe DynamoStarted DynamoStarting mJumpInfo + writeTVar (cschJumping dynamo) $ + Dynamo dynamoInitState $ pointSlot $ AF.headPoint fragment + -- Demote all other peers to jumpers + forM_ peerStates $ \(peer, st) -> + when (peer /= dynId) $ do + jumpingState <- readTVar (cschJumping st) + when (not (isDisengaged jumpingState)) $ + newJumper mJumpInfo (Happy FreshJumper Nothing) + >>= writeTVar (cschJumping st) + +-- | Find a non-disengaged peer in the given sequence +findNonDisengaged :: + (MonadSTM m) => + StrictSeq (peer, ChainSyncClientHandle m blk) -> + STM m (Maybe (peer, ChainSyncClientHandle m blk)) +findNonDisengaged = + findM $ \(_, st) -> not . isDisengaged <$> readTVar (cschJumping st) + +isDisengaged :: ChainSyncJumpingState m blk -> Bool +isDisengaged Disengaged{} = True +isDisengaged _ = False -findM :: Monad m => (a -> m Bool) -> [a] -> m (Maybe a) -findM _ [] = pure Nothing -findM p (x : xs) = p x >>= \case - True -> pure (Just x) - False -> findM p xs +findM :: (Foldable f, Monad m) => (a -> m Bool) -> f a -> m (Maybe a) +findM p = + foldr (\x mb -> p x >>= \case True -> pure (Just x); False -> mb) (pure Nothing) -- | Find the objector in a context, if there is one. findObjector :: (MonadSTM m) => Context m peer blk -> STM m (Maybe (ObjectorInitState, JumpInfo blk, Point (Header blk), ChainSyncClientHandle m blk)) -findObjector context = do - readTVar (handlesVar context) >>= go . Map.toList +findObjector context = + cschcSeq (handlesCol context) >>= go where - go [] = pure Nothing - go ((_, handle):xs) = + go Seq.Empty = pure Nothing + go ((_, handle) Seq.:<| xs) = readTVar (cschJumping handle) >>= \case Objector initState goodJump badPoint -> pure $ Just (initState, goodJump, badPoint, handle) @@ -809,7 +911,7 @@ electNewObjector :: Context m peer blk -> STM m () electNewObjector context = do - peerStates <- Map.toList <$> readTVar (handlesVar context) + peerStates <- toList <$> cschcSeq (handlesCol context) dissentingJumpers <- collectDissentingJumpers peerStates let sortedJumpers = sortOn (pointSlot . fst) dissentingJumpers case sortedJumpers of @@ -826,3 +928,7 @@ electNewObjector context = do pure $ Just (badPoint, (initState, goodJumpInfo, handle)) _ -> pure Nothing + +data TraceEvent peer + = RotatedDynamo peer peer + deriving (Show) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs index f850ccdf89..6faef18cff 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs @@ -9,6 +9,7 @@ module Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State ( ChainSyncClientHandle (..) + , ChainSyncClientHandleCollection (..) , ChainSyncJumpingJumperState (..) , ChainSyncJumpingState (..) , ChainSyncState (..) @@ -17,11 +18,16 @@ module Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State ( , JumpInfo (..) , JumperInitState (..) , ObjectorInitState (..) + , newChainSyncClientHandleCollection ) where import Cardano.Slotting.Slot (SlotNo, WithOrigin) import Data.Function (on) +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as Map import Data.Maybe.Strict (StrictMaybe (..)) +import Data.Sequence.Strict (StrictSeq) +import qualified Data.Sequence.Strict as Seq import Data.Typeable (Proxy (..), typeRep) import GHC.Generics (Generic) import Ouroboros.Consensus.Block (HasHeader, Header, Point) @@ -30,7 +36,7 @@ import Ouroboros.Consensus.Ledger.SupportsProtocol (LedgerSupportsProtocol) import Ouroboros.Consensus.Node.GsmState (GsmState) import Ouroboros.Consensus.Util.IOLike (IOLike, NoThunks (..), STM, - StrictTVar, Time) + StrictTVar, Time, modifyTVar, newTVar, readTVar) import Ouroboros.Network.AnchoredFragment (AnchoredFragment, headPoint) @@ -96,9 +102,74 @@ deriving anyclass instance ( NoThunks (Header blk) ) => NoThunks (ChainSyncClientHandle m blk) +-- | A collection of ChainSync client handles for the peers of this node. +-- +-- Sometimes we want to see the collection as a Map, and sometimes as a sequence. +-- The implementation keeps both views in sync. +data ChainSyncClientHandleCollection peer m blk = ChainSyncClientHandleCollection { + -- | A map containing the handles for the peers in the collection + cschcMap :: !(STM m (Map peer (ChainSyncClientHandle m blk))) + -- | A sequence containing the handles for the peers in the collection + , cschcSeq :: !(STM m (StrictSeq (peer, ChainSyncClientHandle m blk))) + -- | Add the handle for the given peer to the collection + -- PRECONDITION: The peer is not already in the collection + , cschcAddHandle :: !(peer -> ChainSyncClientHandle m blk -> STM m ()) + -- | Remove the handle for the given peer from the collection + , cschcRemoveHandle :: !(peer -> STM m ()) + -- | Moves the handle for the given peer to the end of the sequence + , cschcRotateHandle :: !(peer -> STM m ()) + -- | Remove all the handles from the collection + , cschcRemoveAllHandles :: !(STM m ()) + } + deriving stock (Generic) + +deriving anyclass instance ( + IOLike m, + HasHeader blk, + LedgerSupportsProtocol blk, + NoThunks (STM m ()), + NoThunks (Header blk), + NoThunks (STM m (Map peer (ChainSyncClientHandle m blk))), + NoThunks (STM m (StrictSeq (peer, ChainSyncClientHandle m blk))) + ) => NoThunks (ChainSyncClientHandleCollection peer m blk) + +newChainSyncClientHandleCollection :: + ( Ord peer, + IOLike m, + LedgerSupportsProtocol blk, + NoThunks peer + ) + => STM m (ChainSyncClientHandleCollection peer m blk) +newChainSyncClientHandleCollection = do + handlesMap <- newTVar mempty + handlesSeq <- newTVar mempty + + return ChainSyncClientHandleCollection { + cschcMap = readTVar handlesMap + , cschcSeq = readTVar handlesSeq + , cschcAddHandle = \peer handle -> do + modifyTVar handlesMap (Map.insert peer handle) + modifyTVar handlesSeq (Seq.|> (peer, handle)) + , cschcRemoveHandle = \peer -> do + modifyTVar handlesMap (Map.delete peer) + modifyTVar handlesSeq $ \s -> + let (xs, ys) = Seq.spanl ((/= peer) . fst) s + in xs Seq.>< Seq.drop 1 ys + , cschcRotateHandle = \peer -> + modifyTVar handlesSeq $ \s -> + let (xs, ys) = Seq.spanl ((/= peer) . fst) s + in xs Seq.>< Seq.drop 1 ys Seq.>< Seq.take 1 ys + , cschcRemoveAllHandles = do + modifyTVar handlesMap (const mempty) + modifyTVar handlesSeq (const mempty) + } + data DynamoInitState blk - = -- | The dynamo has not yet started jumping and we first need to jump to the - -- given jump info to set the intersection of the ChainSync server. + = -- | The dynamo still has to set the intersection of the ChainSync server + -- before it can resume downloading headers. This is because + -- the message pipeline might be drained to do jumps, and this causes + -- the intersection on the ChainSync server to diverge from the tip of + -- the candidate fragment. DynamoStarting !(JumpInfo blk) | DynamoStarted deriving (Generic) @@ -111,7 +182,10 @@ deriving anyclass instance data ObjectorInitState = -- | The objector still needs to set the intersection of the ChainSync - -- server before resuming retrieval of headers. + -- server before resuming retrieval of headers. This is mainly because + -- the message pipeline might be drained to do jumps, and this causes + -- the intersection on the ChainSync server to diverge from the tip of + -- the candidate fragment. Starting | Started deriving (Generic, Show, NoThunks) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs index d905d6b240..dfc656e4c3 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs @@ -91,6 +91,8 @@ import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.Block (ChainUpdate, MaxSlotNo, Serialised (..)) import qualified Ouroboros.Network.Block as Network +import Ouroboros.Network.BlockFetch.ConsensusInterface + (ChainSelStarvation (..)) import Ouroboros.Network.Mock.Chain (Chain (..)) import qualified Ouroboros.Network.Mock.Chain as Chain import System.FS.API.Types (FsError) @@ -210,7 +212,9 @@ data ChainDB m blk = ChainDB { , getBlockComponent :: forall b. BlockComponent blk b -> RealPoint blk -> m (Maybe b) - -- | Return membership check function for recent blocks + -- | Return membership check function for recent blocks. This includes + -- blocks in the VolatileDB and blocks that are currently being processed + -- or are waiting in a queue to be processed. -- -- This check is only reliable for blocks up to @k@ away from the tip. -- For blocks older than that the results should be regarded as @@ -236,7 +240,8 @@ data ChainDB m blk = ChainDB { -- are part of a shorter fork. , getIsValid :: STM m (RealPoint blk -> Maybe Bool) - -- | Get the highest slot number stored in the ChainDB. + -- | Get the highest slot number stored in the ChainDB (this includes + -- blocks that are waiting in the background queue to be processed). -- -- Note that the corresponding block doesn't have to be part of the -- current chain, it could be part of some fork, or even be a @@ -347,6 +352,10 @@ data ChainDB m blk = ChainDB { -- invalid block is detected. These blocks are likely to be valid. , getIsInvalidBlock :: STM m (WithFingerprint (HeaderHash blk -> Maybe (ExtValidationError blk))) + -- | Whether ChainSel is currently starved, or when was last time it + -- stopped being starved. + , getChainSelStarvation :: STM m ChainSelStarvation + , closeDB :: m () -- | Return 'True' when the database is open. diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs index 87dca9f1f4..4df6eaf832 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs @@ -16,6 +16,7 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl ( , LgrDB.TraceReplayEvent , SelectionChangedInfo (..) , TraceAddBlockEvent (..) + , TraceChainSelStarvationEvent (..) , TraceCopyToImmutableDBEvent (..) , TraceEvent (..) , TraceFollowerEvent (..) @@ -69,6 +70,8 @@ import Ouroboros.Consensus.Util.IOLike import Ouroboros.Consensus.Util.STM (Fingerprint (..), WithFingerprint (..)) import qualified Ouroboros.Network.AnchoredFragment as AF +import Ouroboros.Network.BlockFetch.ConsensusInterface + (ChainSelStarvation (..)) {------------------------------------------------------------------------------- Initialization @@ -174,6 +177,7 @@ openDBInternal args launchBgTasks = runWithTempRegistry $ do copyFuse <- newFuse "copy to immutable db" chainSelFuse <- newFuse "chain selection" chainSelQueue <- newChainSelQueue (Args.cdbsBlocksToAddSize cdbSpecificArgs) + varChainSelStarvation <- newTVarIO ChainSelStarvationOngoing let env = CDB { cdbImmutableDB = immutableDB , cdbVolatileDB = volatileDB @@ -196,6 +200,7 @@ openDBInternal args launchBgTasks = runWithTempRegistry $ do , cdbKillBgThreads = varKillBgThreads , cdbChainSelQueue = chainSelQueue , cdbLoE = Args.cdbsLoE cdbSpecificArgs + , cdbChainSelStarvation = varChainSelStarvation } h <- fmap CDBHandle $ newTVarIO $ ChainDbOpen env let chainDB = API.ChainDB @@ -214,6 +219,7 @@ openDBInternal args launchBgTasks = runWithTempRegistry $ do , stream = Iterator.stream h , newFollower = Follower.newFollower h , getIsInvalidBlock = getEnvSTM h Query.getIsInvalidBlock + , getChainSelStarvation = getEnvSTM h Query.getChainSelStarvation , closeDB = closeDB h , isOpen = isOpen h } diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs index aab651ccb1..9d7c31af8a 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs @@ -522,7 +522,7 @@ addBlockRunner fuse cdb@CDB{..} = forever $ do -- exception (or it errored), notify the blocked thread withFuse fuse $ bracketOnError - (lift $ getChainSelMessage cdbChainSelQueue) + (lift $ getChainSelMessage starvationTracer cdbChainSelStarvation cdbChainSelQueue) (\message -> lift $ atomically $ do case message of ChainSelReprocessLoEBlocks varProcessed -> @@ -541,4 +541,7 @@ addBlockRunner fuse cdb@CDB{..} = forever $ do ChainSelAddBlock BlockToAdd{blockToAdd} -> trace $ PoppedBlockFromQueue $ FallingEdgeWith $ blockRealPoint blockToAdd - chainSelSync cdb message) + chainSelSync cdb message + lift $ atomically $ processedChainSelMessage cdbChainSelQueue message) + where + starvationTracer = Tracer $ traceWith cdbTracer . TraceChainSelStarvationEvent diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs index 5bea8cd37c..9d03c5f5a0 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs @@ -22,6 +22,7 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Query ( , getAnyBlockComponent , getAnyKnownBlock , getAnyKnownBlockComponent + , getChainSelStarvation ) where import qualified Data.Map.Strict as Map @@ -50,6 +51,8 @@ import Ouroboros.Consensus.Util.STM (WithFingerprint (..)) import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.Block (MaxSlotNo, maxSlotNoFromWithOrigin) +import Ouroboros.Network.BlockFetch.ConsensusInterface + (ChainSelStarvation (..)) -- | Return the last @k@ headers. -- @@ -161,18 +164,15 @@ getBlockComponent :: getBlockComponent CDB{..} = getAnyBlockComponent cdbImmutableDB cdbVolatileDB getIsFetched :: - forall m blk. IOLike m + forall m blk. (IOLike m, HasHeader blk) => ChainDbEnv m blk -> STM m (Point blk -> Bool) -getIsFetched CDB{..} = basedOnHash <$> VolatileDB.getIsMember cdbVolatileDB - where - -- The volatile DB indexes by hash only, not by points. However, it should - -- not be possible to have two points with the same hash but different - -- slot numbers. - basedOnHash :: (HeaderHash blk -> Bool) -> Point blk -> Bool - basedOnHash f p = - case pointHash p of - BlockHash hash -> f hash - GenesisHash -> False +getIsFetched CDB{..} = do + checkQueue <- memberChainSelQueue cdbChainSelQueue + checkVolDb <- VolatileDB.getIsMember cdbVolatileDB + return $ \pt -> + case pointToWithOriginRealPoint pt of + Origin -> False + NotOrigin pt' -> checkQueue pt' || checkVolDb (realPointHash pt') getIsInvalidBlock :: forall m blk. (IOLike m, HasHeader blk) @@ -181,6 +181,12 @@ getIsInvalidBlock :: getIsInvalidBlock CDB{..} = fmap (fmap (fmap invalidBlockReason) . flip Map.lookup) <$> readTVar cdbInvalid +getChainSelStarvation :: + forall m blk. IOLike m + => ChainDbEnv m blk + -> STM m ChainSelStarvation +getChainSelStarvation CDB {..} = readTVar cdbChainSelStarvation + getIsValid :: forall m blk. (IOLike m, HasHeader blk) => ChainDbEnv m blk @@ -209,10 +215,13 @@ getMaxSlotNo CDB{..} = do -- contains block 9'. The ImmutableDB contains blocks 1-10. The max slot -- of the current chain will be 10 (being the anchor point of the empty -- current chain), while the max slot of the VolatileDB will be 9. + -- + -- Moreover, we have to look in 'ChainSelQueue' too. curChainMaxSlotNo <- maxSlotNoFromWithOrigin . AF.headSlot <$> readTVar cdbChain - volatileDbMaxSlotNo <- VolatileDB.getMaxSlotNo cdbVolatileDB - return $ curChainMaxSlotNo `max` volatileDbMaxSlotNo + volatileDbMaxSlotNo <- VolatileDB.getMaxSlotNo cdbVolatileDB + queuedMaxSlotNo <- getMaxSlotNoChainSelQueue cdbChainSelQueue + return $ curChainMaxSlotNo `max` volatileDbMaxSlotNo `max` queuedMaxSlotNo {------------------------------------------------------------------------------- Unifying interface over the immutable DB and volatile DB, but independent diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs index 2eaaad37aa..03e880f16a 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs @@ -1,4 +1,5 @@ {-# LANGUAGE BangPatterns #-} +{-# LANGUAGE BlockArguments #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE DeriveGeneric #-} @@ -42,15 +43,19 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Types ( -- * Blocks to add , BlockToAdd (..) , ChainSelMessage (..) - , ChainSelQueue + , ChainSelQueue -- opaque , addBlockToAdd , addReprocessLoEBlocks , closeChainSelQueue , getChainSelMessage + , getMaxSlotNoChainSelQueue + , memberChainSelQueue , newChainSelQueue + , processedChainSelMessage -- * Trace types , SelectionChangedInfo (..) , TraceAddBlockEvent (..) + , TraceChainSelStarvationEvent (..) , TraceCopyToImmutableDBEvent (..) , TraceEvent (..) , TraceFollowerEvent (..) @@ -62,12 +67,15 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Types ( , TraceValidationEvent (..) ) where +import Control.Monad (when) import Control.ResourceRegistry import Control.Tracer import Data.Foldable (traverse_) import Data.Map.Strict (Map) import Data.Maybe (mapMaybe) import Data.Maybe.Strict (StrictMaybe (..)) +import Data.MultiSet (MultiSet) +import qualified Data.MultiSet as MultiSet import Data.Set (Set) import Data.Typeable import Data.Void (Void) @@ -104,7 +112,9 @@ import Ouroboros.Consensus.Util.Enclose (Enclosing, Enclosing' (..)) import Ouroboros.Consensus.Util.IOLike import Ouroboros.Consensus.Util.STM (WithFingerprint) import Ouroboros.Network.AnchoredFragment (AnchoredFragment) -import Ouroboros.Network.Block (MaxSlotNo) +import Ouroboros.Network.Block (MaxSlotNo (..)) +import Ouroboros.Network.BlockFetch.ConsensusInterface + (ChainSelStarvation (..)) -- | All the serialisation related constraints needed by the ChainDB. class ( ImmutableDbSerialiseConstraints blk @@ -254,6 +264,9 @@ data ChainDbEnv m blk = CDB -- switch back to a chain containing it. The fragment is usually anchored at -- a recent immutable tip; if it does not, it will conservatively be treated -- as the empty fragment anchored in the current immutable tip. + , cdbChainSelStarvation :: !(StrictTVar m ChainSelStarvation) + -- ^ Information on the last starvation of ChainSel, whether ongoing or + -- ended recently. } deriving (Generic) -- | We include @blk@ in 'showTypeOf' because it helps resolving type families @@ -411,7 +424,19 @@ data InvalidBlockInfo blk = InvalidBlockInfo -- | FIFO queue used to add blocks asynchronously to the ChainDB. Blocks are -- read from this queue by a background thread, which processes the blocks -- synchronously. -newtype ChainSelQueue m blk = ChainSelQueue (TBQueue m (ChainSelMessage m blk)) +-- +-- We also maintain a multiset of the points of all of the blocks in the queue, +-- plus potentially the one block for which chain selection is currently in +-- progress. It is used to account for queued blocks in eg 'getIsFetched' and +-- 'getMaxSlotNo'. +-- +-- INVARIANT: Counted with multiplicity, @varChainSelPoints@ contains exactly +-- the same hashes or at most one additional hash compared to the hashes of +-- blocks in @varChainSelQueue@. +data ChainSelQueue m blk = ChainSelQueue { + varChainSelQueue :: TBQueue m (ChainSelMessage m blk) + , varChainSelPoints :: StrictTVar m (MultiSet (RealPoint blk)) + } deriving NoThunks via OnlyCheckWhnfNamed "ChainSelQueue" (ChainSelQueue m blk) -- | Entry in the 'ChainSelQueue' queue: a block together with the 'TMVar's used @@ -437,9 +462,14 @@ data ChainSelMessage m blk -- ^ Used for 'ChainSelectionPromise'. -- | Create a new 'ChainSelQueue' with the given size. -newChainSelQueue :: IOLike m => Word -> m (ChainSelQueue m blk) -newChainSelQueue queueSize = ChainSelQueue <$> - atomically (newTBQueue (fromIntegral queueSize)) +newChainSelQueue :: (IOLike m, StandardHash blk, Typeable blk) => Word -> m (ChainSelQueue m blk) +newChainSelQueue chainSelQueueCapacity = do + varChainSelQueue <- newTBQueueIO (fromIntegral chainSelQueueCapacity) + varChainSelPoints <- newTVarIO MultiSet.empty + pure ChainSelQueue { + varChainSelQueue + , varChainSelPoints + } -- | Add a block to the 'ChainSelQueue' queue. Can block when the queue is full. addBlockToAdd :: @@ -449,7 +479,7 @@ addBlockToAdd :: -> InvalidBlockPunishment m -> blk -> m (AddBlockPromise m blk) -addBlockToAdd tracer (ChainSelQueue queue) punish blk = do +addBlockToAdd tracer (ChainSelQueue {varChainSelQueue, varChainSelPoints}) punish blk = do varBlockWrittenToDisk <- newEmptyTMVarIO varBlockProcessed <- newEmptyTMVarIO let !toAdd = BlockToAdd @@ -458,10 +488,12 @@ addBlockToAdd tracer (ChainSelQueue queue) punish blk = do , varBlockWrittenToDisk , varBlockProcessed } - traceWith tracer $ AddedBlockToQueue (blockRealPoint blk) RisingEdge + pt = blockRealPoint blk + traceWith tracer $ AddedBlockToQueue pt RisingEdge queueSize <- atomically $ do - writeTBQueue queue (ChainSelAddBlock toAdd) - lengthTBQueue queue + writeTBQueue varChainSelQueue (ChainSelAddBlock toAdd) + modifyTVar varChainSelPoints $ MultiSet.insert pt + lengthTBQueue varChainSelQueue traceWith tracer $ AddedBlockToQueue (blockRealPoint blk) (FallingEdgeWith (fromIntegral queueSize)) return AddBlockPromise @@ -475,22 +507,59 @@ addReprocessLoEBlocks => Tracer m (TraceAddBlockEvent blk) -> ChainSelQueue m blk -> m (ChainSelectionPromise m) -addReprocessLoEBlocks tracer (ChainSelQueue queue) = do +addReprocessLoEBlocks tracer ChainSelQueue {varChainSelQueue} = do varProcessed <- newEmptyTMVarIO let waitUntilRan = atomically $ readTMVar varProcessed traceWith tracer $ AddedReprocessLoEBlocksToQueue - atomically $ writeTBQueue queue $ ChainSelReprocessLoEBlocks varProcessed + atomically $ writeTBQueue varChainSelQueue $ + ChainSelReprocessLoEBlocks varProcessed return $ ChainSelectionPromise waitUntilRan -- | Get the oldest message from the 'ChainSelQueue' queue. Can block when the --- queue is empty. -getChainSelMessage :: IOLike m => ChainSelQueue m blk -> m (ChainSelMessage m blk) -getChainSelMessage (ChainSelQueue queue) = atomically $ readTBQueue queue +-- queue is empty; in that case, reports the starvation (and its end) via the +-- given tracer. +getChainSelMessage + :: forall m blk. (HasHeader blk, IOLike m) + => Tracer m (TraceChainSelStarvationEvent blk) + -> StrictTVar m ChainSelStarvation + -> ChainSelQueue m blk + -> m (ChainSelMessage m blk) +getChainSelMessage starvationTracer starvationVar chainSelQueue = + atomically (tryReadTBQueue' queue) >>= \case + Just msg -> pure msg + Nothing -> do + startStarvationMeasure + msg <- atomically $ readTBQueue queue + terminateStarvationMeasure msg + pure msg + where + ChainSelQueue { + varChainSelQueue = queue + } = chainSelQueue + + startStarvationMeasure :: m () + startStarvationMeasure = do + prevStarvation <- atomically $ swapTVar starvationVar ChainSelStarvationOngoing + when (prevStarvation /= ChainSelStarvationOngoing) $ + traceWith starvationTracer $ ChainSelStarvation RisingEdge + + terminateStarvationMeasure :: ChainSelMessage m blk -> m () + terminateStarvationMeasure = \case + ChainSelAddBlock BlockToAdd{blockToAdd=block} -> do + let pt = blockRealPoint block + traceWith starvationTracer $ ChainSelStarvation (FallingEdgeWith pt) + atomically . writeTVar starvationVar . ChainSelStarvationEndedAt =<< getMonotonicTime + ChainSelReprocessLoEBlocks{} -> pure () + +-- TODO Can't use tryReadTBQueue from io-classes because it is broken for IOSim +-- (but not for IO). https://github.com/input-output-hk/io-sim/issues/195 +tryReadTBQueue' :: MonadSTM m => TBQueue m a -> STM m (Maybe a) +tryReadTBQueue' q = (Just <$> readTBQueue q) `orElse` pure Nothing -- | Flush the 'ChainSelQueue' queue and notify the waiting threads. -- closeChainSelQueue :: IOLike m => ChainSelQueue m blk -> STM m () -closeChainSelQueue (ChainSelQueue queue) = do +closeChainSelQueue ChainSelQueue{varChainSelQueue = queue} = do as <- mapMaybe blockAdd <$> flushTBQueue queue traverse_ (\a -> tryPutTMVar (varBlockProcessed a) (FailedToAddBlock "Queue flushed")) @@ -500,6 +569,41 @@ closeChainSelQueue (ChainSelQueue queue) = do ChainSelAddBlock ab -> Just ab ChainSelReprocessLoEBlocks _ -> Nothing +-- | To invoke when the given 'ChainSelMessage' has been processed by ChainSel. +-- This is used to remove the respective point from the multiset of points in +-- the 'ChainSelQueue' (as the block has now been written to disk by ChainSel). +processedChainSelMessage :: + (IOLike m, HasHeader blk) + => ChainSelQueue m blk + -> ChainSelMessage m blk + -> STM m () +processedChainSelMessage ChainSelQueue {varChainSelPoints} = \case + ChainSelAddBlock BlockToAdd{blockToAdd = blk} -> + modifyTVar varChainSelPoints $ MultiSet.delete (blockRealPoint blk) + ChainSelReprocessLoEBlocks{} -> + pure () + +-- | Return a function to test the membership +memberChainSelQueue :: + (IOLike m, HasHeader blk) + => ChainSelQueue m blk + -> STM m (RealPoint blk -> Bool) +memberChainSelQueue ChainSelQueue {varChainSelPoints} = + flip MultiSet.member <$> readTVar varChainSelPoints + +getMaxSlotNoChainSelQueue :: + IOLike m + => ChainSelQueue m blk + -> STM m MaxSlotNo +getMaxSlotNoChainSelQueue ChainSelQueue {varChainSelPoints} = + aux <$> readTVar varChainSelPoints + where + -- | The 'Ord' instance of 'RealPoint' orders by 'SlotNo' first, so the + -- maximal key of the map has the greatest 'SlotNo'. + aux :: MultiSet (RealPoint blk) -> MaxSlotNo + aux pts = case MultiSet.maxView pts of + Nothing -> NoMaxSlotNo + Just (RealPoint s _, _) -> MaxSlotNo s {------------------------------------------------------------------------------- Trace types @@ -519,6 +623,7 @@ data TraceEvent blk | TraceImmutableDBEvent (ImmutableDB.TraceEvent blk) | TraceVolatileDBEvent (VolatileDB.TraceEvent blk) | TraceLastShutdownUnclean + | TraceChainSelStarvationEvent(TraceChainSelStarvationEvent blk) deriving (Generic) @@ -827,3 +932,16 @@ data TraceIteratorEvent blk -- next block we're looking for. | SwitchBackToVolatileDB deriving (Generic, Eq, Show) + +-- | Chain selection is /starved/ when the background thread runs out of work. +-- This is the usual case and innocent while caught-up; but while syncing, it +-- means that we are downloading blocks at a smaller rate than we can validate +-- them, even though we generally expect to be CPU-bound. +-- +-- TODO: Investigate why it happens regularly during syncing for very short +-- times. +-- +-- The point in the trace is the block that finished the starvation. +newtype TraceChainSelStarvationEvent blk = + ChainSelStarvation (Enclosing' (RealPoint blk)) + deriving (Generic, Eq, Show) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Util/Orphans.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Util/Orphans.hs index 06c2757d52..0ba537b87b 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Util/Orphans.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Util/Orphans.hs @@ -23,6 +23,8 @@ import Data.Bimap (Bimap) import qualified Data.Bimap as Bimap import Data.IntPSQ (IntPSQ) import qualified Data.IntPSQ as PSQ +import Data.MultiSet (MultiSet) +import qualified Data.MultiSet as MultiSet import Data.SOP.BasicFunctors import NoThunks.Class (InspectHeap (..), InspectHeapNamed (..), NoThunks (..), OnlyCheckWhnfNamed (..), allNoThunks, @@ -75,6 +77,10 @@ instance NoThunks a => NoThunks (K a b) where showTypeOf _ = showTypeOf (Proxy @a) wNoThunks ctxt (K a) = wNoThunks ("K":ctxt) a +instance NoThunks a => NoThunks (MultiSet a) where + showTypeOf _ = "MultiSet" + wNoThunks ctxt = wNoThunks ctxt . MultiSet.toMap + {------------------------------------------------------------------------------- fs-api -------------------------------------------------------------------------------} diff --git a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs index c03639a6bb..caa58f3bfc 100644 --- a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs +++ b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs @@ -8,6 +8,7 @@ {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE TypeApplications #-} -- | A test for the consensus-specific parts of the BlockFetch client. -- @@ -51,11 +52,13 @@ import Ouroboros.Consensus.Util.STM (blockUntilJust, import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.BlockFetch (BlockFetchConfiguration (..), - BlockFetchConsensusInterface, FetchMode (..), - blockFetchLogic, bracketFetchClient, - bracketKeepAliveClient, bracketSyncWithFetchClient, - newFetchClientRegistry) + BlockFetchConsensusInterface (..), FetchMode (..), + GenesisBlockFetchConfiguration (..), blockFetchLogic, + bracketFetchClient, bracketKeepAliveClient, + bracketSyncWithFetchClient, newFetchClientRegistry) import Ouroboros.Network.BlockFetch.Client (blockFetchClient) +import Ouroboros.Network.BlockFetch.ConsensusInterface + (PraosFetchMode (..)) import Ouroboros.Network.ControlMessage (ControlMessage (..)) import Ouroboros.Network.Mock.Chain (Chain) import qualified Ouroboros.Network.Mock.Chain as Chain @@ -95,7 +98,10 @@ prop_blockFetch bfcts@BlockFetchClientTestSetup{..} = ] <> [ Map.keysSet bfcoBlockFetchResults === Map.keysSet peerUpdates , counterexample ("Fetched blocks per peer: " <> condense bfcoFetchedBlocks) $ - property $ all (> 0) bfcoFetchedBlocks + property $ case blockFetchMode of + PraosFetchMode FetchModeDeadline -> all (> 0) bfcoFetchedBlocks + PraosFetchMode FetchModeBulkSync -> all (> 0) bfcoFetchedBlocks + FetchModeGenesis -> any (> 0) bfcoFetchedBlocks ] where BlockFetchClientOutcome{..} = runSimOrThrow $ runBlockFetchTest bfcts @@ -254,10 +260,11 @@ runBlockFetchTest BlockFetchClientTestSetup{..} = withRegistry \registry -> do let -- Always return the empty chain such that the BlockFetch logic -- downloads all chains. - getCurrentChain = pure $ AF.Empty AF.AnchorGenesis - getIsFetched = ChainDB.getIsFetched chainDB - getMaxSlotNo = ChainDB.getMaxSlotNo chainDB - addBlockWaitWrittenToDisk = ChainDB.addBlockWaitWrittenToDisk chainDB + getCurrentChain = pure $ AF.Empty AF.AnchorGenesis + getIsFetched = ChainDB.getIsFetched chainDB + getMaxSlotNo = ChainDB.getMaxSlotNo chainDB + addBlockAsync = ChainDB.addBlockAsync chainDB + getChainSelStarvation = ChainDB.getChainSelStarvation chainDB pure BlockFetchClientInterface.ChainDbView {..} where -- Needs to be larger than any chain length in this test, to ensure that @@ -276,14 +283,18 @@ runBlockFetchTest BlockFetchClientTestSetup{..} = withRegistry \registry -> do -> BlockFetchClientInterface.ChainDbView m TestBlock -> BlockFetchConsensusInterface PeerId (Header TestBlock) TestBlock m mkTestBlockFetchConsensusInterface getCandidates chainDbView = - BlockFetchClientInterface.mkBlockFetchConsensusInterface + (BlockFetchClientInterface.mkBlockFetchConsensusInterface @m @PeerId + nullTracer (TestBlockConfig numCoreNodes) chainDbView - getCandidates + (error "ChainSyncClientHandleCollection not provided to mkBlockFetchConsensusInterface") (\_hdr -> 1000) -- header size, only used for peer prioritization slotForgeTime (pure blockFetchMode) - blockFetchPipelining + blockFetchPipelining) + { readCandidateChains = getCandidates + , demoteChainSyncJumpingDynamo = const (pure ()) + } where -- Bogus implementation; this is fine as this is only used for -- enriching tracing information ATM. @@ -353,7 +364,11 @@ instance Arbitrary BlockFetchClientTestSetup where peerUpdates <- Map.fromList . zip peerIds <$> replicateM numPeers (genUpdateSchedule blockFetchPipelining) - blockFetchMode <- elements [FetchModeBulkSync, FetchModeDeadline] + blockFetchMode <- elements + [ PraosFetchMode FetchModeBulkSync + , PraosFetchMode FetchModeDeadline + , FetchModeGenesis + ] blockFetchCfg <- do let -- ensure that we can download blocks from all peers bfcMaxConcurrencyBulkSync = fromIntegral numPeers @@ -361,9 +376,12 @@ instance Arbitrary BlockFetchClientTestSetup where -- This is used to introduce a minimal delay between BlockFetch -- logic iterations in case the monitored state vars change too -- fast, which we don't have to worry about in this test. - bfcDecisionLoopInterval = 0 + bfcDecisionLoopIntervalGenesis = 0 + bfcDecisionLoopIntervalPraos = 0 bfcMaxRequestsInflight <- chooseEnum (2, 10) bfcSalt <- arbitrary + gbfcGracePeriod <- fromIntegral <$> chooseInteger (5, 60) + let bfcGenesisBFConfig = GenesisBlockFetchConfiguration {..} pure BlockFetchConfiguration {..} pure BlockFetchClientTestSetup {..} where diff --git a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/ChainSync/Client.hs b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/ChainSync/Client.hs index 60c5ebdc31..63e810a572 100644 --- a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/ChainSync/Client.hs +++ b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/ChainSync/Client.hs @@ -81,12 +81,14 @@ import Ouroboros.Consensus.Ledger.Abstract import Ouroboros.Consensus.Ledger.Extended hiding (ledgerState) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (CSJConfig (..), ChainDbView (..), - ChainSyncClientException, ChainSyncClientResult (..), - ChainSyncLoPBucketConfig (..), ChainSyncState (..), - ChainSyncStateView (..), ConfigEnv (..), Consensus, - DynamicEnv (..), Our (..), Their (..), - TraceChainSyncClientEvent (..), bracketChainSyncClient, - chainSyncClient, chainSyncStateFor, viewChainSyncState) + ChainSyncClientException, + ChainSyncClientHandleCollection (..), + ChainSyncClientResult (..), ChainSyncLoPBucketConfig (..), + ChainSyncState (..), ChainSyncStateView (..), + ConfigEnv (..), Consensus, DynamicEnv (..), Our (..), + Their (..), TraceChainSyncClientEvent (..), + bracketChainSyncClient, chainSyncClient, chainSyncStateFor, + newChainSyncClientHandleCollection, viewChainSyncState) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client.HistoricityCheck (HistoricityCheck, HistoricityCutoff (..)) import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.HistoricityCheck as HistoricityCheck @@ -353,7 +355,7 @@ runChainSync skew securityParam (ClientUpdates clientUpdates) -- separate map too, one that isn't emptied. We can use this map to look -- at the final state of each candidate. varFinalCandidates <- uncheckedNewTVarM Map.empty - varHandles <- uncheckedNewTVarM Map.empty + cschCol <- atomically newChainSyncClientHandleCollection (tracer, getTrace) <- do (tracer', getTrace) <- recordingTracerTVar @@ -506,7 +508,7 @@ runChainSync skew securityParam (ClientUpdates clientUpdates) bracketChainSyncClient chainSyncTracer chainDbView - varHandles + cschCol -- 'Syncing' only ever impacts the LoP, which is disabled in -- this test, so any value would do. (pure Syncing) @@ -517,7 +519,7 @@ runChainSync skew securityParam (ClientUpdates clientUpdates) diffusionPipelining $ \csState -> do atomically $ do - handles <- readTVar varHandles + handles <- cschcMap cschCol modifyTVar varFinalCandidates $ Map.insert serverId (handles Map.! serverId) (result, _) <- runPipelinedPeer protocolTracer codecChainSyncId clientChannel $ @@ -538,7 +540,7 @@ runChainSync skew securityParam (ClientUpdates clientUpdates) let checkTipTime :: m () checkTipTime = do now <- systemTimeCurrent clientSystemTime - candidates <- atomically $ viewChainSyncState varHandles csCandidate + candidates <- atomically $ viewChainSyncState (cschcMap cschCol) csCandidate forM_ candidates $ \candidate -> do let p = castPoint $ AF.headPoint candidate :: Point TestBlock case pointSlot p of diff --git a/ouroboros-consensus/test/storage-test/Test/Ouroboros/Storage/ChainDB/StateMachine.hs b/ouroboros-consensus/test/storage-test/Test/Ouroboros/Storage/ChainDB/StateMachine.hs index eed9f661ea..6d4e4cc0f6 100644 --- a/ouroboros-consensus/test/storage-test/Test/Ouroboros/Storage/ChainDB/StateMachine.hs +++ b/ouroboros-consensus/test/storage-test/Test/Ouroboros/Storage/ChainDB/StateMachine.hs @@ -1241,6 +1241,8 @@ deriving instance SOP.Generic (ImmutableDB.TraceEvent blk) deriving instance SOP.HasDatatypeInfo (ImmutableDB.TraceEvent blk) deriving instance SOP.Generic (VolatileDB.TraceEvent blk) deriving instance SOP.HasDatatypeInfo (VolatileDB.TraceEvent blk) +deriving anyclass instance SOP.Generic (TraceChainSelStarvationEvent blk) +deriving anyclass instance SOP.HasDatatypeInfo (TraceChainSelStarvationEvent blk) data Tag = TagGetIsValidJust @@ -1635,6 +1637,7 @@ traceEventName = \case TraceImmutableDBEvent ev -> "ImmutableDB." <> constrName ev TraceVolatileDBEvent ev -> "VolatileDB." <> constrName ev TraceLastShutdownUnclean -> "LastShutdownUnclean" + TraceChainSelStarvationEvent ev -> "ChainSelStarvation." <> constrName ev mkArgs :: IOLike m => TopLevelConfig Blk