Skip to content

Commit

Permalink
Fix: don't modify clean state on startup
Browse files Browse the repository at this point in the history
  • Loading branch information
jasagredo committed Dec 2, 2021
1 parent ca13a6a commit 1a72165
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 88 deletions.
124 changes: 55 additions & 69 deletions ouroboros-consensus/src/Ouroboros/Consensus/Node.hs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ module Ouroboros.Consensus.Node (
) where

import Codec.Serialise (DeserialiseFailure)
import Control.Monad (when)
import Control.Tracer (Tracer, contramap)
import Data.ByteString.Lazy (ByteString)
import Data.Hashable (Hashable)
Expand Down Expand Up @@ -183,8 +182,14 @@ data LowLevelRunNodeArgs m addrNTN addrNTC versionDataNTN versionDataNTC blk
(p2p :: Diffusion.P2P) =
LowLevelRunNodeArgs {

-- | How to manage the clean-shutdown marker on disk
llrnWithCheckedDB :: forall a. (LastShutDownWasClean -> m a) -> m a
-- | An action that will receive a marker indicating whether the previous
-- shutdown was considered clean and a wrapper for installing a handler to
-- create a clean file on exit if needed. See
-- 'Ouroboros.Consensus.Node.Recovery.runWithCheckedDB'.
llrnWithCheckedDB :: forall a. ( LastShutDownWasClean
-> (ChainDB m blk -> m a -> m a)
-> m a)
-> m a

-- | The " static " ChainDB arguments
, llrnChainDbArgsDefaults :: ChainDbArgs Defaults m blk
Expand Down Expand Up @@ -277,9 +282,8 @@ runWith :: forall m addrNTN addrNTC versionDataNTN versionDataNTC blk p2p.
-> m ()
runWith RunNodeArgs{..} LowLevelRunNodeArgs{..} =

llrnWithCheckedDB $ \(LastShutDownWasClean lastShutDownWasClean) ->
llrnWithCheckedDB $ \(LastShutDownWasClean lastShutDownWasClean) wrapper ->
withRegistry $ \registry -> do

let systemStart :: SystemStart
systemStart = getSystemStart (configBlock cfg)

Expand Down Expand Up @@ -311,48 +315,49 @@ runWith RunNodeArgs{..} LowLevelRunNodeArgs{..} =
chainDB <- openChainDB registry inFuture cfg initLedger
llrnChainDbArgsDefaults customiseChainDbArgs'

btime <-
hardForkBlockchainTime $
llrnCustomiseHardForkBlockchainTimeArgs $
HardForkBlockchainTimeArgs
{ hfbtBackoffDelay = pure $ BackoffDelay 60
, hfbtGetLedgerState =
ledgerState <$> ChainDB.getCurrentLedger chainDB
, hfbtLedgerConfig = configLedger cfg
, hfbtRegistry = registry
, hfbtSystemTime = systemTime
, hfbtTracer =
contramap (fmap (fromRelativeTime systemStart))
(blockchainTimeTracer rnTraceConsensus)
, hfbtMaxClockRewind = secondsToNominalDiffTime 20
}

nodeKernelArgs <-
fmap (nodeKernelArgsEnforceInvariants . llrnCustomiseNodeKernelArgs) $
mkNodeKernelArgs
registry
llrnBfcSalt
llrnKeepAliveRng
cfg
blockForging
rnTraceConsensus
btime
chainDB
nodeKernel <- initNodeKernel nodeKernelArgs
rnNodeKernelHook registry nodeKernel

peerMetrics <- newPeerMetric
let ntnApps = mkNodeToNodeApps nodeKernelArgs nodeKernel peerMetrics
ntcApps = mkNodeToClientApps nodeKernelArgs nodeKernel
(apps, appsExtra) = mkDiffusionApplications
rnEnableP2P
(miniProtocolParameters nodeKernelArgs)
ntnApps
ntcApps
nodeKernel
peerMetrics

llrnRunDataDiffusion registry apps appsExtra
wrapper chainDB $ do
btime <-
hardForkBlockchainTime $
llrnCustomiseHardForkBlockchainTimeArgs $
HardForkBlockchainTimeArgs
{ hfbtBackoffDelay = pure $ BackoffDelay 60
, hfbtGetLedgerState =
ledgerState <$> ChainDB.getCurrentLedger chainDB
, hfbtLedgerConfig = configLedger cfg
, hfbtRegistry = registry
, hfbtSystemTime = systemTime
, hfbtTracer =
contramap (fmap (fromRelativeTime systemStart))
(blockchainTimeTracer rnTraceConsensus)
, hfbtMaxClockRewind = secondsToNominalDiffTime 20
}

nodeKernelArgs <-
fmap (nodeKernelArgsEnforceInvariants . llrnCustomiseNodeKernelArgs) $
mkNodeKernelArgs
registry
llrnBfcSalt
llrnKeepAliveRng
cfg
blockForging
rnTraceConsensus
btime
chainDB
nodeKernel <- initNodeKernel nodeKernelArgs
rnNodeKernelHook registry nodeKernel

peerMetrics <- newPeerMetric
let ntnApps = mkNodeToNodeApps nodeKernelArgs nodeKernel peerMetrics
ntcApps = mkNodeToClientApps nodeKernelArgs nodeKernel
(apps, appsExtra) = mkDiffusionApplications
rnEnableP2P
(miniProtocolParameters nodeKernelArgs)
ntnApps
ntcApps
nodeKernel
peerMetrics

llrnRunDataDiffusion registry apps appsExtra
where
ProtocolInfo
{ pInfoConfig = cfg
Expand Down Expand Up @@ -488,20 +493,16 @@ runWith RunNodeArgs{..} LowLevelRunNodeArgs{..} =
localRethrowPolicy :: RethrowPolicy
localRethrowPolicy = mempty

-- | Did the ChainDB already have existing clean-shutdown marker on disk?
newtype LastShutDownWasClean = LastShutDownWasClean Bool
deriving (Eq, Show)

-- | Check the DB marker, lock the DB and look for the clean shutdown marker.
--
-- Run the body action with the DB locked, and if the last shutdown was clean.
-- Run the body action with the DB locked.
--
stdWithCheckedDB ::
forall blk a. (StandardHash blk, Typeable blk)
=> Proxy blk
-> FilePath
-> NetworkMagic
-> (LastShutDownWasClean -> IO a) -- ^ Body action with last shutdown was clean.
-> (LastShutDownWasClean -> (ChainDB IO blk -> IO a -> IO a) -> IO a) -- ^ Body action with last shutdown was clean.
-> IO a
stdWithCheckedDB pb databasePath networkMagic body = do

Expand All @@ -513,22 +514,7 @@ stdWithCheckedDB pb databasePath networkMagic body = do
networkMagic

-- Then create the lock file.
withLockDB mountPoint $ do

-- When we shut down cleanly, we create a marker file so that the next
-- time we start, we know we don't have to validate the contents of the
-- whole ChainDB. When we shut down with an exception indicating
-- corruption or something going wrong with the file system, we don't
-- create this marker file so that the next time we start, we do a full
-- validation.
lastShutDownWasClean <- hasCleanShutdownMarker hasFS
when lastShutDownWasClean $ removeCleanShutdownMarker hasFS

-- On a clean shutdown, create a marker in the database folder so that
-- next time we start up, we know we don't have to validate the whole
-- database.
createMarkerOnCleanShutdown pb hasFS $
body (LastShutDownWasClean lastShutDownWasClean)
withLockDB mountPoint $ runWithCheckedDB pb hasFS body
where
mountPoint = MountPoint databasePath
hasFS = ioHasFS mountPoint
Expand Down
88 changes: 69 additions & 19 deletions ouroboros-consensus/src/Ouroboros/Consensus/Node/Recovery.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
{-# LANGUAGE ScopedTypeVariables #-}

module Ouroboros.Consensus.Node.Recovery (
createCleanShutdownMarker
, createMarkerOnCleanShutdown
LastShutDownWasClean (..)
, createCleanShutdownMarker
, hasCleanShutdownMarker
, removeCleanShutdownMarker
, runWithCheckedDB
) where

import Control.Monad (unless, when)
Expand All @@ -16,6 +17,7 @@ import Ouroboros.Consensus.Block (StandardHash)
import Ouroboros.Consensus.Node.Exit (ExitReason (..), toExitReason)
import Ouroboros.Consensus.Util.IOLike

import Ouroboros.Consensus.Storage.ChainDB
import Ouroboros.Consensus.Storage.FS.API (HasFS, doesFileExist,
removeFile, withFile)
import Ouroboros.Consensus.Storage.FS.API.Types (AllowExisting (..),
Expand All @@ -25,23 +27,9 @@ import Ouroboros.Consensus.Storage.FS.API.Types (AllowExisting (..),
cleanShutdownMarkerFile :: FsPath
cleanShutdownMarkerFile = mkFsPath ["clean"]

-- | When the given action terminates with a /clean/ exception, create the
-- /clean shutdown marker file/.
--
-- NOTE: we assume the action (i.e., the node itself) never terminates without
-- an exception.
--
-- A /clean/ exception is an exception for 'exceptionRequiresRecovery' returns
-- 'False'.
createMarkerOnCleanShutdown ::
(IOLike m, StandardHash blk, Typeable blk)
=> Proxy blk
-> HasFS m h
-> m a -- ^ Action to run
-> m a
createMarkerOnCleanShutdown pb mp = onExceptionIf
(not . exceptionRequiresRecovery pb)
(createCleanShutdownMarker mp)
-- | Did the ChainDB already have existing clean-shutdown marker on disk?
newtype LastShutDownWasClean = LastShutDownWasClean Bool
deriving (Eq, Show)

-- | Return 'True' when 'cleanShutdownMarkerFile' exists.
hasCleanShutdownMarker
Expand Down Expand Up @@ -83,6 +71,68 @@ exceptionRequiresRecovery pb e = case toExitReason pb e of
DatabaseCorruption -> True
_ -> False

-- | A bracket function that manages the clean-shutdown marker on disk.
--
-- - If the marker is missing on startup, then ChainDB initialization will
-- revalidate the database contents.
--
-- - If the OS kills the nodes, then we don't have the opportunity to write out
-- the marker file, which is fine, since we want the next startup to do
-- revalidation.
--
-- - If initialization was cleanly interrupted (eg SIGINT), then we leave the
-- marker the marker in the same state as it was at the beginning of said
-- initialization.
--
-- - At the end of a successful initialization, we remove the marker and install
-- a shutdown handler that writes the marker except for certain exceptions
-- (see 'exceptionRequiresRecovery') that indicate corruption, for which we
-- want the next startup to do revalidation.
runWithCheckedDB
:: forall a m h blk. (IOLike m, StandardHash blk, Typeable blk)
=> Proxy blk
-> HasFS m h
-> (LastShutDownWasClean -> (ChainDB m blk -> m a -> m a) -> m a)
-> m a
runWithCheckedDB pb hasFS body = do
-- When we shut down cleanly, we create a marker file so that the next
-- time we start, we know we don't have to validate the contents of the
-- whole ChainDB. When we shut down with an exception indicating
-- corruption or something going wrong with the file system, we don't
-- create this marker file so that the next time we start, we do a full
-- validation.
wasClean <- hasCleanShutdownMarker hasFS

removeMarkerOnUncleanShutdown wasClean
$ body
(LastShutDownWasClean wasClean)
(\_cdb runWithInitializedChainDB -> createMarkerOnCleanShutdown $ do
-- ChainDB initialization has finished by the time we reach this
-- point. We remove the marker so that a SIGKILL will cause an unclean
-- shutdown.
when wasClean $ removeCleanShutdownMarker hasFS
runWithInitializedChainDB
)
where
-- | If there is a unclean exception during ChainDB initialization, we want
-- to remove the marker file, so we install this handler.
--
-- It is OK to also wrap this handler around code that runs after ChainDB
-- initialization, because the condition on this handler is the opposite of
-- the condition in the @createMarkerOnCleanShutdown@ handler.
removeMarkerOnUncleanShutdown wasClean = if not wasClean then id else onExceptionIf
(exceptionRequiresRecovery pb)
(removeCleanShutdownMarker hasFS)

-- | If a clean exception terminates the running node after ChainDB
-- initialization, we want to create the marker file.
--
-- NOTE: we assume the action (i.e., the node itself) never terminates without
-- an exception.
createMarkerOnCleanShutdown = onExceptionIf
(not . exceptionRequiresRecovery pb)
(createCleanShutdownMarker hasFS)

{-------------------------------------------------------------------------------
Auxiliary
-------------------------------------------------------------------------------}
Expand Down

0 comments on commit 1a72165

Please sign in to comment.