Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Full rework of the BlockFetch logic for bulk sync mode #1179

Merged
merged 34 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
f596da3
Introduce a collection of chainsync handles that synchronizes a map a…
facundominguez May 27, 2024
76217aa
Implement a call to rotate dynamos in CSJ
facundominguez May 27, 2024
e1120d6
Specify the order in which to start the peers
Niols Jun 19, 2024
b4adc16
Add a BlockFetch leashing attack test
Niols Jun 18, 2024
5253563
Accomodate for changes to BlockFetch
Niols Jun 26, 2024
e1e6f56
Track the last time the ChainDB thread was starved
Niols Jun 20, 2024
4e91897
Add explicit tracing events for CSJ
Niols Jul 15, 2024
318e9a5
ChainDB: let the BlockFetch client add blocks asynchronously
facundominguez Jul 18, 2024
d07aba2
Update Genesis configuration
nbacquey Jul 16, 2024
adbc26b
Set the jump size to smaller size for byron
facundominguez Jul 23, 2024
31b1fa5
Limit the rate at which GDD is evaluated
facundominguez Jul 24, 2024
75c0642
Documentation edits for CSJ
facundominguez Jun 26, 2024
b8dd484
ChainSync client: disconnect if stuck and not better than selection
amesgen Jul 31, 2024
906f397
Update tests
Niols Jul 11, 2024
bcff78b
Don't let GDD drop candidates that do not intersect with the selection
facundominguez Aug 1, 2024
36fa7e9
Introduce `peersOnlyAdversary` and classify abnormal test peers as ad…
nbacquey Aug 6, 2024
7163d06
Document all tests that did not have documentation
Niols Aug 6, 2024
11964a5
Depend on the ouroboros-network fork with the latest blockfetch
facundominguez Jul 11, 2024
3c2c434
Add changelog fragments
amesgen Aug 7, 2024
7d10a4e
Fix `dropElemsAt` implementation
nbacquey Aug 7, 2024
689783e
Adjust stalling test to have more kills by LoP
facundominguez Aug 7, 2024
d88a0e2
Document prop_blockFetchLeashingAttack
Niols Aug 8, 2024
a98dd8d
Disable blockfetch timeouts in uniform tests
facundominguez Aug 8, 2024
b36e927
Groom comments and counterexample messages.
facundominguez Aug 8, 2024
aa35539
Drop random points from adversarial schedules in the time limited lea…
facundominguez Aug 8, 2024
fac6489
Update configuration after recovering BulkSync in ouroboros-network
facundominguez Aug 20, 2024
c6c2e18
Improve and clarify CSJ documentation
Niols Dec 18, 2024
59fb73f
Add a `TraceDrainingThePipe` event
Niols Dec 19, 2024
544ebeb
Make the `DynamoStarting` trace more explicit
Niols Dec 19, 2024
ff745eb
Log ChainSync mini-protocol events if need be
Niols Dec 19, 2024
0b23940
Rework default ChainSyncTimeouts in peer simulator
Niols Dec 18, 2024
d53d14e
Fix tests that relied on default timeouts
Niols Dec 19, 2024
3a1e4c3
Enrich comment about disabled `mustReplyTimeout`
Niols Dec 19, 2024
6333df0
Make `ChainSelStarvation` carry an `Enclosed`
jasagredo Dec 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions cabal.project
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,13 @@ if(os(windows))

-- https://github.com/ulidtko/cabal-doctest/issues/85
constraints: Cabal < 3.13

source-repository-package
type: git
location: https://github.com/IntersectMBO/ouroboros-network
tag: bb0a7d0ff41e265a8ec47bc94377cb4d65e0b498
--sha256: sha256-P7m+nsjtogNQsdpXQnaH1kWxYibEWa0UC6iNGg0+bH4=
subdir:
ouroboros-network
ouroboros-network-api
ouroboros-network-protocols
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
### Breaking

- Adapted to Genesis-related changes in `ouroboros-consensus` ([#1179](https://github.com/IntersectMBO/ouroboros-consensus/pull/1179)).
Original file line number Diff line number Diff line change
@@ -1,24 +1,32 @@
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}

module Ouroboros.Consensus.Node.Genesis (
-- * 'GenesisConfig'
GenesisConfig (..)
, GenesisConfigFlags (..)
, LoEAndGDDConfig (..)
, defaultGenesisConfigFlags
, disableGenesisConfig
, enableGenesisConfigDefault
, mkGenesisConfig
-- * NodeKernel helpers
, GenesisNodeKernelArgs (..)
, LoEAndGDDNodeKernelArgs (..)
, mkGenesisNodeKernelArgs
, setGetLoEFragment
) where

import Control.Monad (join)
import Data.Maybe (fromMaybe)
import Data.Traversable (for)
import GHC.Generics (Generic)
import Ouroboros.Consensus.Block
import Ouroboros.Consensus.MiniProtocol.ChainSync.Client
(CSJConfig (..), CSJEnabledConfig (..),
Expand All @@ -34,57 +42,143 @@ import Ouroboros.Consensus.Util.Args
import Ouroboros.Consensus.Util.IOLike
import Ouroboros.Network.AnchoredFragment (AnchoredFragment)
import qualified Ouroboros.Network.AnchoredFragment as AF
import Ouroboros.Network.BlockFetch
(GenesisBlockFetchConfiguration (..))

-- | Whether to en-/disable the Limit on Eagerness and the Genesis Density
-- Disconnector.
data LoEAndGDDConfig a =
LoEAndGDDEnabled !a
| LoEAndGDDDisabled
deriving stock (Show, Functor, Foldable, Traversable)
deriving stock (Eq, Generic, Show, Functor, Foldable, Traversable)

-- | Aggregating the various configs for Genesis-related subcomponents.
data GenesisConfig = GenesisConfig {
gcChainSyncLoPBucketConfig :: !ChainSyncLoPBucketConfig
--
-- Usually, 'enableGenesisConfigDefault' or 'disableGenesisConfig' can be used.
-- See the haddocks of the types of the individual fields for details.
data GenesisConfig = GenesisConfig
{ gcBlockFetchConfig :: !GenesisBlockFetchConfiguration
, gcChainSyncLoPBucketConfig :: !ChainSyncLoPBucketConfig
, gcCSJConfig :: !CSJConfig
, gcLoEAndGDDConfig :: !(LoEAndGDDConfig ())
, gcLoEAndGDDConfig :: !(LoEAndGDDConfig LoEAndGDDParams)
, gcHistoricityCutoff :: !(Maybe HistoricityCutoff)
} deriving stock (Eq, Generic, Show)

-- | Genesis configuration flags and low-level args, as parsed from config file or CLI
amesgen marked this conversation as resolved.
Show resolved Hide resolved
data GenesisConfigFlags = GenesisConfigFlags
{ gcfEnableCSJ :: Bool
, gcfEnableLoEAndGDD :: Bool
, gcfEnableLoP :: Bool
, gcfBlockFetchGracePeriod :: Maybe Integer
, gcfBucketCapacity :: Maybe Integer
, gcfBucketRate :: Maybe Integer
, gcfCSJJumpSize :: Maybe Integer
, gcfGDDRateLimit :: Maybe DiffTime
} deriving stock (Eq, Generic, Show)

defaultGenesisConfigFlags :: GenesisConfigFlags
defaultGenesisConfigFlags = GenesisConfigFlags
{ gcfEnableCSJ = True
, gcfEnableLoEAndGDD = True
, gcfEnableLoP = True
, gcfBlockFetchGracePeriod = Nothing
, gcfBucketCapacity = Nothing
, gcfBucketRate = Nothing
, gcfCSJJumpSize = Nothing
, gcfGDDRateLimit = Nothing
}

-- TODO justification/derivation from other parameters
enableGenesisConfigDefault :: GenesisConfig
enableGenesisConfigDefault = GenesisConfig {
gcChainSyncLoPBucketConfig = ChainSyncLoPBucketEnabled ChainSyncLoPBucketEnabledConfig {
csbcCapacity = 100_000 -- number of tokens
, csbcRate = 500 -- tokens per second leaking, 1/2ms
}
, gcCSJConfig = CSJEnabled CSJEnabledConfig {
csjcJumpSize = 3 * 2160 * 20 -- mainnet forecast range
}
, gcLoEAndGDDConfig = LoEAndGDDEnabled ()
-- Duration in seconds of one Cardano mainnet Shelley stability window
-- (3k/f slots times one second per slot) plus one extra hour as a
-- safety margin.
, gcHistoricityCutoff = Just $ HistoricityCutoff $ 3 * 2160 * 20 + 3600
}
enableGenesisConfigDefault = mkGenesisConfig $ Just defaultGenesisConfigFlags

-- | Disable all Genesis components, yielding Praos behavior.
disableGenesisConfig :: GenesisConfig
disableGenesisConfig = GenesisConfig {
gcChainSyncLoPBucketConfig = ChainSyncLoPBucketDisabled
disableGenesisConfig = mkGenesisConfig Nothing

mkGenesisConfig :: Maybe GenesisConfigFlags -> GenesisConfig
mkGenesisConfig Nothing = -- disable Genesis
GenesisConfig
{ gcBlockFetchConfig = GenesisBlockFetchConfiguration
{ gbfcGracePeriod = 0 -- no grace period when Genesis is disabled
}
, gcChainSyncLoPBucketConfig = ChainSyncLoPBucketDisabled
, gcCSJConfig = CSJDisabled
, gcLoEAndGDDConfig = LoEAndGDDDisabled
, gcHistoricityCutoff = Nothing
}
mkGenesisConfig (Just GenesisConfigFlags{..}) =
GenesisConfig
{ gcBlockFetchConfig = GenesisBlockFetchConfiguration
{ gbfcGracePeriod
}
, gcChainSyncLoPBucketConfig = if gcfEnableLoP
then ChainSyncLoPBucketEnabled ChainSyncLoPBucketEnabledConfig
{ csbcCapacity
, csbcRate
}
else ChainSyncLoPBucketDisabled
, gcCSJConfig = if gcfEnableCSJ
then CSJEnabled CSJEnabledConfig
{ csjcJumpSize
}
else CSJDisabled
, gcLoEAndGDDConfig = if gcfEnableLoEAndGDD
then LoEAndGDDEnabled LoEAndGDDParams{lgpGDDRateLimit}
else LoEAndGDDDisabled
, -- Duration in seconds of one Cardano mainnet Shelley stability window
-- (3k/f slots times one second per slot) plus one extra hour as a
-- safety margin.
gcHistoricityCutoff = Just $ HistoricityCutoff $ 3 * 2160 * 20 + 3600
}
where
-- The minimum amount of time during which the Genesis BlockFetch logic will
-- download blocks from a specific peer (even if it is not performing well
-- during that period).
defaultBlockFetchGracePeriod = 10 -- seconds

-- LoP parameters. Empirically, it takes less than 1ms to validate a header,
-- so leaking one token per 2ms is conservative. The capacity of 100_000
-- tokens corresponds to 200s, which is definitely enough to handle long GC
-- pauses; we could even make this more conservative.
defaultCapacity = 100_000 -- number of tokens
defaultRate = 500 -- tokens per second leaking, 1/2ms

-- The larger Shelley forecast range (3 * 2160 * 20) works in more recent
-- ranges of slots, but causes syncing to block in Byron. A future
-- improvement would be to make this era-dynamic, such that we can use the
-- larger (and hence more efficient) larger CSJ jump size in Shelley-based
-- eras.
defaultCSJJumpSize = 2 * 2160 -- Byron forecast range

-- Limiting the performance impact of the GDD.
defaultGDDRateLimit = 1.0 -- seconds

gbfcGracePeriod = fromInteger $ fromMaybe defaultBlockFetchGracePeriod gcfBlockFetchGracePeriod
csbcCapacity = fromInteger $ fromMaybe defaultCapacity gcfBucketCapacity
csbcRate = fromInteger $ fromMaybe defaultRate gcfBucketRate
csjcJumpSize = fromInteger $ fromMaybe defaultCSJJumpSize gcfCSJJumpSize
lgpGDDRateLimit = fromMaybe defaultGDDRateLimit gcfGDDRateLimit

newtype LoEAndGDDParams = LoEAndGDDParams
{ -- | How often to evaluate GDD. 0 means as soon as possible.
-- Otherwise, no faster than once every T seconds, where T is the
-- value of the field.
lgpGDDRateLimit :: DiffTime
} deriving stock (Eq, Generic, Show)

-- | Genesis-related arguments needed by the NodeKernel initialization logic.
data GenesisNodeKernelArgs m blk = GenesisNodeKernelArgs {
gnkaLoEAndGDDArgs :: !(LoEAndGDDConfig (LoEAndGDDNodeKernelArgs m blk))
}

data LoEAndGDDNodeKernelArgs m blk = LoEAndGDDNodeKernelArgs {
-- | A TVar containing an action that returns the 'ChainDB.GetLoEFragment'
-- action. We use this extra indirection to update this action after we
-- opened the ChainDB (which happens before we initialize the NodeKernel).
-- After that, this TVar will not be modified again.
gnkaGetLoEFragment :: !(LoEAndGDDConfig (StrictTVar m (ChainDB.GetLoEFragment m blk)))
lgnkaLoEFragmentTVar :: !(StrictTVar m (ChainDB.GetLoEFragment m blk))
, lgnkaGDDRateLimit :: DiffTime
}

-- | Create the initial 'GenesisNodeKernelArgs" (with a temporary
-- 'ChainDB.GetLoEFragment' that will be replaced via 'setGetLoEFragment') and a
-- function to update the 'ChainDbArgs' accordingly.
Expand All @@ -95,20 +189,24 @@ mkGenesisNodeKernelArgs ::
, Complete ChainDbArgs m blk -> Complete ChainDbArgs m blk
)
mkGenesisNodeKernelArgs gcfg = do
gnkaGetLoEFragment <- for (gcLoEAndGDDConfig gcfg) $ \() ->
newTVarIO $ pure $
gnkaLoEAndGDDArgs <- for (gcLoEAndGDDConfig gcfg) $ \p -> do
loeFragmentTVar <- newTVarIO $ pure $
-- Use the most conservative LoE fragment until 'setGetLoEFragment'
-- is called.
ChainDB.LoEEnabled $ AF.Empty AF.AnchorGenesis
let updateChainDbArgs = case gnkaGetLoEFragment of
pure LoEAndGDDNodeKernelArgs
{ lgnkaLoEFragmentTVar = loeFragmentTVar
, lgnkaGDDRateLimit = lgpGDDRateLimit p
}
let updateChainDbArgs = case gnkaLoEAndGDDArgs of
LoEAndGDDDisabled -> id
LoEAndGDDEnabled varGetLoEFragment -> \cfg ->
LoEAndGDDEnabled lgnkArgs -> \cfg ->
cfg { ChainDB.cdbsArgs =
(ChainDB.cdbsArgs cfg) { ChainDB.cdbsLoE = getLoEFragment }
}
where
getLoEFragment = join $ readTVarIO varGetLoEFragment
pure (GenesisNodeKernelArgs {gnkaGetLoEFragment}, updateChainDbArgs)
getLoEFragment = join $ readTVarIO $ lgnkaLoEFragmentTVar lgnkArgs
pure (GenesisNodeKernelArgs{gnkaLoEAndGDDArgs}, updateChainDbArgs)

-- | Set 'gnkaGetLoEFragment' to the actual logic for determining the current
-- LoE fragment.
Expand All @@ -124,9 +222,10 @@ setGetLoEFragment readGsmState readLoEFragment varGetLoEFragment =
where
getLoEFragment :: ChainDB.GetLoEFragment m blk
getLoEFragment = atomically $ readGsmState >>= \case
-- When the Honest Availability Assumption cannot currently be guaranteed, we should not select
-- any blocks that would cause our immutable tip to advance, so we
-- return the most conservative LoE fragment.
-- When the Honest Availability Assumption cannot currently be
-- guaranteed, we should not select any blocks that would cause our
-- immutable tip to advance, so we return the most conservative LoE
-- fragment.
GSM.PreSyncing ->
pure $ ChainDB.LoEEnabled $ AF.Empty AF.AnchorGenesis
-- When we are syncing, return the current LoE fragment.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,17 @@ import Ouroboros.Consensus.MiniProtocol.BlockFetch.Server
(TraceBlockFetchServerEvent)
import Ouroboros.Consensus.MiniProtocol.ChainSync.Client
(TraceChainSyncClientEvent)
import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping as CSJumping
import Ouroboros.Consensus.MiniProtocol.ChainSync.Server
(TraceChainSyncServerEvent)
import Ouroboros.Consensus.MiniProtocol.LocalTxSubmission.Server
(TraceLocalTxSubmissionServerEvent (..))
import Ouroboros.Consensus.Node.GSM (TraceGsmEvent)
import Ouroboros.Network.Block (Tip)
import Ouroboros.Network.BlockFetch (FetchDecision,
TraceFetchClientState, TraceLabelPeer)
import Ouroboros.Network.BlockFetch (TraceFetchClientState,
TraceLabelPeer)
import Ouroboros.Network.BlockFetch.Decision.Trace
(TraceDecisionEvent)
import Ouroboros.Network.KeepAlive (TraceKeepAliveClient)
import Ouroboros.Network.TxSubmission.Inbound
(TraceTxSubmissionInbound)
Expand All @@ -54,7 +57,7 @@ data Tracers' remotePeer localPeer blk f = Tracers
{ chainSyncClientTracer :: f (TraceLabelPeer remotePeer (TraceChainSyncClientEvent blk))
, chainSyncServerHeaderTracer :: f (TraceLabelPeer remotePeer (TraceChainSyncServerEvent blk))
, chainSyncServerBlockTracer :: f (TraceChainSyncServerEvent blk)
, blockFetchDecisionTracer :: f [TraceLabelPeer remotePeer (FetchDecision [Point (Header blk)])]
, blockFetchDecisionTracer :: f (TraceDecisionEvent remotePeer (Header blk))
, blockFetchClientTracer :: f (TraceLabelPeer remotePeer (TraceFetchClientState (Header blk)))
, blockFetchServerTracer :: f (TraceLabelPeer remotePeer (TraceBlockFetchServerEvent blk))
, txInboundTracer :: f (TraceLabelPeer remotePeer (TraceTxSubmissionInbound (GenTxId blk) (GenTx blk)))
Expand All @@ -69,6 +72,7 @@ data Tracers' remotePeer localPeer blk f = Tracers
, consensusErrorTracer :: f SomeException
, gsmTracer :: f (TraceGsmEvent (Tip blk))
, gddTracer :: f (TraceGDDEvent remotePeer blk)
, csjTracer :: f (CSJumping.TraceEvent remotePeer)
}

instance (forall a. Semigroup (f a))
Expand All @@ -92,6 +96,7 @@ instance (forall a. Semigroup (f a))
, consensusErrorTracer = f consensusErrorTracer
, gsmTracer = f gsmTracer
, gddTracer = f gddTracer
, csjTracer = f csjTracer
}
where
f :: forall a. Semigroup a
Expand Down Expand Up @@ -123,6 +128,7 @@ nullTracers = Tracers
, consensusErrorTracer = nullTracer
, gsmTracer = nullTracer
, gddTracer = nullTracer
, csjTracer = nullTracer
}

showTracers :: ( Show blk
Expand Down Expand Up @@ -157,6 +163,7 @@ showTracers tr = Tracers
, consensusErrorTracer = showTracing tr
, gsmTracer = showTracing tr
, gddTracer = showTracing tr
, csjTracer = showTracing tr
}

{-------------------------------------------------------------------------------
Expand Down
Loading
Loading