Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Calculate and compare CRC when writing and reading ledger snapshots #1319

Merged
merged 1 commit into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 17 additions & 4 deletions ouroboros-consensus-cardano/app/DBAnalyser/Parsers.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{-# LANGUAGE ApplicativeDo #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE ApplicativeDo #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE PatternSynonyms #-}

module DBAnalyser.Parsers (
BlockType (..)
Expand All @@ -21,6 +22,7 @@ import Options.Applicative
import Ouroboros.Consensus.Block
import Ouroboros.Consensus.Byron.Node (PBftSignatureThreshold (..))
import Ouroboros.Consensus.Shelley.Node (Nonce (..))
import Ouroboros.Consensus.Storage.LedgerDB.DiskPolicy (pattern DoDiskSnapshotChecksum, pattern NoDoDiskSnapshotChecksum)

{-------------------------------------------------------------------------------
Parsing
Expand All @@ -44,6 +46,10 @@ parseDBAnalyserConfig = DBAnalyserConfig
<*> parseValidationPolicy
<*> parseAnalysis
<*> parseLimit
<*> flag DoDiskSnapshotChecksum NoDoDiskSnapshotChecksum (mconcat [
long "no-snapshot-checksum-on-read"
, help "Don't check the '.checksum' file when reading a ledger snapshot"
])

parseSelectDB :: Parser SelectDB
parseSelectDB =
Expand Down Expand Up @@ -130,7 +136,14 @@ storeLedgerParser = do
<> "This is much slower than block reapplication (the default)."
)
)
pure $ StoreLedgerStateAt slot ledgerValidation
doChecksum <- flag DoDiskSnapshotChecksum NoDoDiskSnapshotChecksum
(mconcat [ long "no-snapshot-checksum-on-write"
, help (unlines [ "Don't calculate the checksum and"
, "write the '.checksum' file"
, "when taking a ledger snapshot"
])
])
pure $ StoreLedgerStateAt slot ledgerValidation doChecksum

checkNoThunksParser :: Parser AnalysisName
checkNoThunksParser = CheckNoThunksEvery <$> option auto
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ import qualified Ouroboros.Consensus.Storage.ImmutableDB as ImmutableDB
import Ouroboros.Consensus.Storage.LedgerDB (DiskSnapshot (..),
writeSnapshot)
import Ouroboros.Consensus.Storage.Serialisation (encodeDisk)
import Ouroboros.Consensus.Util ((..:))
import Ouroboros.Consensus.Util (Flag (..), (..:))
import qualified Ouroboros.Consensus.Util.IOLike as IOLike
import Ouroboros.Network.SizeInBytes
import System.FS.API (SomeHasFS (..))
Expand Down Expand Up @@ -102,19 +102,19 @@ runAnalysis analysisName = case go analysisName of
pure result
where
go :: AnalysisName -> SomeAnalysis blk
go ShowSlotBlockNo = mkAnalysis $ showSlotBlockNo
go CountTxOutputs = mkAnalysis $ countTxOutputs
go ShowBlockHeaderSize = mkAnalysis $ showHeaderSize
go ShowBlockTxsSize = mkAnalysis $ showBlockTxsSize
go ShowEBBs = mkAnalysis $ showEBBs
go OnlyValidation = mkAnalysis @StartFromPoint $ \_ -> pure Nothing
go (StoreLedgerStateAt slotNo lgrAppMode) = mkAnalysis $ storeLedgerStateAt slotNo lgrAppMode
go CountBlocks = mkAnalysis $ countBlocks
go (CheckNoThunksEvery nBks) = mkAnalysis $ checkNoThunksEvery nBks
go TraceLedgerProcessing = mkAnalysis $ traceLedgerProcessing
go (ReproMempoolAndForge nBks) = mkAnalysis $ reproMempoolForge nBks
go (BenchmarkLedgerOps mOutfile lgrAppMode) = mkAnalysis $ benchmarkLedgerOps mOutfile lgrAppMode
go (GetBlockApplicationMetrics nrBlocks mOutfile) = mkAnalysis $ getBlockApplicationMetrics nrBlocks mOutfile
go ShowSlotBlockNo = mkAnalysis $ showSlotBlockNo
go CountTxOutputs = mkAnalysis $ countTxOutputs
go ShowBlockHeaderSize = mkAnalysis $ showHeaderSize
go ShowBlockTxsSize = mkAnalysis $ showBlockTxsSize
go ShowEBBs = mkAnalysis $ showEBBs
go OnlyValidation = mkAnalysis @StartFromPoint $ \_ -> pure Nothing
go (StoreLedgerStateAt slotNo lgrAppMode doChecksum) = mkAnalysis $ storeLedgerStateAt slotNo lgrAppMode doChecksum
go CountBlocks = mkAnalysis $ countBlocks
go (CheckNoThunksEvery nBks) = mkAnalysis $ checkNoThunksEvery nBks
go TraceLedgerProcessing = mkAnalysis $ traceLedgerProcessing
go (ReproMempoolAndForge nBks) = mkAnalysis $ reproMempoolForge nBks
go (BenchmarkLedgerOps mOutfile lgrAppMode) = mkAnalysis $ benchmarkLedgerOps mOutfile lgrAppMode
go (GetBlockApplicationMetrics nrBlocks mOutfile) = mkAnalysis $ getBlockApplicationMetrics nrBlocks mOutfile

mkAnalysis ::
forall startFrom. SingI startFrom
Expand Down Expand Up @@ -382,8 +382,9 @@ storeLedgerStateAt ::
)
=> SlotNo
-> LedgerApplicationMode
-> Flag "DoDiskSnapshotChecksum"
-> Analysis blk StartFromLedgerState
storeLedgerStateAt slotNo ledgerAppMode env = do
storeLedgerStateAt slotNo ledgerAppMode doChecksum env = do
void $ processAllUntil db registry GetBlock startFrom limit initLedger process
pure Nothing
where
Expand Down Expand Up @@ -422,7 +423,7 @@ storeLedgerStateAt slotNo ledgerAppMode env = do
storeLedgerState ledgerState = case pointSlot pt of
NotOrigin slot -> do
let snapshot = DiskSnapshot (unSlotNo slot) (Just "db-analyser")
writeSnapshot ledgerDbFS encLedger snapshot ledgerState
writeSnapshot ledgerDbFS doChecksum encLedger snapshot ledgerState
traceWith tracer $ SnapshotStoredEvent slot
Origin -> pure ()
where
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ analyse ::
=> DBAnalyserConfig
-> Args blk
-> IO (Maybe AnalysisResult)
analyse DBAnalyserConfig{analysis, confLimit, dbDir, selectDB, validation, verbose} args =
analyse DBAnalyserConfig{analysis, confLimit, dbDir, selectDB, validation, verbose, diskSnapshotChecksumOnRead} args =
withRegistry $ \registry -> do
lock <- newMVar ()
chainDBTracer <- mkTracer lock verbose
Expand Down Expand Up @@ -92,6 +92,7 @@ analyse DBAnalyserConfig{analysis, confLimit, dbDir, selectDB, validation, verbo
ledgerDbFS
(decodeDiskExtLedgerState $ configCodec cfg)
decode
diskSnapshotChecksumOnRead
(DiskSnapshot slot (Just "db-analyser"))
-- TODO @readSnapshot@ has type @ExceptT ReadIncrementalErr m
-- (ExtLedgerState blk)@ but it also throws exceptions! This makes
Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}

module Cardano.Tools.DBAnalyser.Types (module Cardano.Tools.DBAnalyser.Types) where

import Data.Word
import Ouroboros.Consensus.Block
import Ouroboros.Consensus.Util (Flag)

data SelectDB =
SelectImmutableDB (WithOrigin SlotNo)

data DBAnalyserConfig = DBAnalyserConfig {
dbDir :: FilePath
, verbose :: Bool
, selectDB :: SelectDB
, validation :: Maybe ValidateBlocks
, analysis :: AnalysisName
, confLimit :: Limit
dbDir :: FilePath
, verbose :: Bool
, selectDB :: SelectDB
, validation :: Maybe ValidateBlocks
, analysis :: AnalysisName
, confLimit :: Limit
, diskSnapshotChecksumOnRead :: Flag "DoDiskSnapshotChecksum"
}

data AnalysisName =
Expand All @@ -24,7 +27,7 @@ data AnalysisName =
| ShowBlockTxsSize
| ShowEBBs
| OnlyValidation
| StoreLedgerStateAt SlotNo LedgerApplicationMode
| StoreLedgerStateAt SlotNo LedgerApplicationMode (Flag "DoDiskSnapshotChecksum")
| CountBlocks
| CheckNoThunksEvery Word64
| TraceLedgerProcessing
Expand Down
17 changes: 11 additions & 6 deletions ouroboros-consensus-cardano/test/tools-test/Main.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
{-# LANGUAGE PatternSynonyms #-}

module Main (main) where

import qualified Cardano.Tools.DBAnalyser.Block.Cardano as Cardano
Expand All @@ -8,6 +10,8 @@ import qualified Cardano.Tools.DBSynthesizer.Run as DBSynthesizer
import Cardano.Tools.DBSynthesizer.Types
import Ouroboros.Consensus.Block
import Ouroboros.Consensus.Cardano.Block
import Ouroboros.Consensus.Storage.LedgerDB.DiskPolicy
(pattern NoDoDiskSnapshotChecksum)
import qualified Test.Cardano.Tools.Headers
import Test.Tasty
import Test.Tasty.HUnit
Expand Down Expand Up @@ -62,12 +66,13 @@ testImmutaliserConfig =
testAnalyserConfig :: DBAnalyserConfig
testAnalyserConfig =
DBAnalyserConfig {
dbDir = chainDB
, verbose = False
, selectDB = SelectImmutableDB Origin
, validation = Just ValidateAllBlocks
, analysis = CountBlocks
, confLimit = Unlimited
dbDir = chainDB
, verbose = False
, selectDB = SelectImmutableDB Origin
, validation = Just ValidateAllBlocks
, analysis = CountBlocks
, confLimit = Unlimited
, diskSnapshotChecksumOnRead = NoDoDiskSnapshotChecksum
}

testBlockArgs :: Cardano.Args (CardanoBlock StandardCrypto)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MonadComprehensions #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
Expand Down Expand Up @@ -51,6 +52,8 @@ module Ouroboros.Consensus.Node (
, RunNodeArgs (..)
, Tracers
, Tracers' (..)
, pattern DoDiskSnapshotChecksum
, pattern NoDoDiskSnapshotChecksum
-- * Internal helpers
, mkNodeKernelArgs
, nodeKernelArgsEnforceInvariants
Expand Down Expand Up @@ -107,7 +110,8 @@ import Ouroboros.Consensus.Storage.ChainDB (ChainDB, ChainDbArgs,
import qualified Ouroboros.Consensus.Storage.ChainDB as ChainDB
import qualified Ouroboros.Consensus.Storage.ChainDB.Impl.Args as ChainDB
import Ouroboros.Consensus.Storage.LedgerDB.DiskPolicy
(DiskPolicyArgs (..))
(DiskPolicyArgs (..), pattern DoDiskSnapshotChecksum,
pattern NoDoDiskSnapshotChecksum)
import Ouroboros.Consensus.Util.Args
import Ouroboros.Consensus.Util.IOLike
import Ouroboros.Consensus.Util.Orphans ()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
### Breaking

- When writing a ledger state snapshot to disk, calculate the state's CRC32 checksum and write it to a separate file, which is named the same as the snapshot file, plus the `.checksum` extension.
- When reading a snapshot file in `readSnapshot`, calculate its checksum and compare it to the value in the corresponding `.checksum` file. Return an error if the checksum is different or invalid. Issue a warning if the checksum file does not exist, but still initialise the ledger DB.
- To support the previous item, change the error type of the `readSnapshot` from `ReadIncrementalErr` to the extended `ReadSnaphotErr`.
- Checksumming the snapshots is controlled via the `doChecksum :: Flag "DoDiskSnapshotChecksum"` parameter of `initFromSnapshot`. Ultimately, this parameter comes from the Node's configuration file via the `DiskPolicy` data type.
- Extend the `DiskPolicyArgs` data type to enable the node to pass `Flag "DoDiskSnapshotChecksum"` to Consensus.

### Non-breaking

- Make `Ouroboros.Consensus.Util.CBOR.readIncremental` optionally compute the checksum of the data as it is read.
- Introduce an explicit `Ord` instance for `DiskSnapshot` that compares the values on `dsNumber`.
- Introduce a new utility newtype `Flag` to represent type-safe boolean flags. See ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Util.hs.
- Use `Flag "DoDiskSnapshotChecksum"` to control the check of the snapshot checksum file in `takeSnapshot`, `readSnapshot` and `writeSnapshot`.
Original file line number Diff line number Diff line change
Expand Up @@ -232,9 +232,11 @@ initFromDisk LgrDbArgs { lgrHasFS = hasFS, .. }
lgrConfig
lgrGenesis
(streamAPI immutableDB)
doDiskSnapshotChecksum
return (db, replayed)
where
ccfg = configCodec $ getExtLedgerCfg $ LedgerDB.ledgerDbCfg lgrConfig
LedgerDB.DiskPolicyArgs _ _ doDiskSnapshotChecksum = lgrDiskPolicyArgs

-- | For testing purposes
mkLgrDB :: StrictTVar m (LedgerDB' blk)
Expand Down Expand Up @@ -280,11 +282,12 @@ takeSnapshot ::
, IsLedger (LedgerState blk)
)
=> LgrDB m blk -> m (Maybe (LedgerDB.DiskSnapshot, RealPoint blk))
takeSnapshot lgrDB@LgrDB{ cfg, tracer, hasFS } = wrapFailure (Proxy @blk) $ do
takeSnapshot lgrDB@LgrDB{ cfg, tracer, hasFS, diskPolicy } = wrapFailure (Proxy @blk) $ do
ledgerDB <- LedgerDB.ledgerDbAnchor <$> atomically (getCurrent lgrDB)
LedgerDB.takeSnapshot
tracer
hasFS
(LedgerDB.onDiskShouldChecksumSnapshots diskPolicy)
(encodeDiskExtLedgerState ccfg)
ledgerDB
where
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
{-# LANGUAGE PatternSynonyms #-}

-- | The Ledger DB is responsible for the following tasks:
--
-- - __Maintaining the in-memory ledger state at the tip__: When we try to
Expand Down Expand Up @@ -134,6 +136,8 @@ module Ouroboros.Consensus.Storage.LedgerDB (
, SnapshotFailure (..)
, diskSnapshotIsTemporary
, listSnapshots
, pattern DoDiskSnapshotChecksum
, pattern NoDoDiskSnapshotChecksum
, readSnapshot
-- ** Write to disk
, takeSnapshot
Expand All @@ -160,7 +164,9 @@ module Ouroboros.Consensus.Storage.LedgerDB (
import Ouroboros.Consensus.Storage.LedgerDB.DiskPolicy
(DiskPolicy (..), DiskPolicyArgs (..),
NumOfDiskSnapshots (..), SnapshotInterval (..),
TimeSinceLast (..), defaultDiskPolicyArgs, mkDiskPolicy)
TimeSinceLast (..), defaultDiskPolicyArgs, mkDiskPolicy,
pattern DoDiskSnapshotChecksum,
pattern NoDoDiskSnapshotChecksum)
import Ouroboros.Consensus.Storage.LedgerDB.Init (InitLog (..),
ReplayGoal (..), ReplayStart (..), TraceReplayEvent (..),
decorateReplayTracerWithGoal,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE RecordWildCards #-}

module Ouroboros.Consensus.Storage.LedgerDB.DiskPolicy (
Expand All @@ -13,6 +14,10 @@ module Ouroboros.Consensus.Storage.LedgerDB.DiskPolicy (
, TimeSinceLast (..)
, defaultDiskPolicyArgs
, mkDiskPolicy
, pattern DoDiskSnapshotChecksum
, pattern NoDoDiskSnapshotChecksum
-- * Re-exports
, Flag (..)
) where

import Control.Monad.Class.MonadTime.SI
Expand All @@ -21,27 +26,36 @@ import Data.Word
import GHC.Generics
import NoThunks.Class (NoThunks, OnlyCheckWhnf (..))
import Ouroboros.Consensus.Config.SecurityParam
import Ouroboros.Consensus.Util (Flag (..))

-- | Length of time, requested by the user, that has to pass after which
-- a snapshot is taken. It can be:
--
-- 1. either explicitly provided by user in seconds
-- 2. or default value can be requested - the specific DiskPolicy determines
-- 2. or default value can be requested - the specific @'DiskPolicy'@ determines
-- what that is exactly, see `mkDiskPolicy` as an example
data SnapshotInterval =
DefaultSnapshotInterval
| RequestedSnapshotInterval DiffTime
deriving stock (Eq, Generic, Show)

-- | Number of snapshots to be stored on disk. This is either the default value
-- as determined by the DiskPolicy, or it is provided by the user. See the
-- `DiskPolicy` documentation for more information.
-- as determined by the @'DiskPolicy'@, or it is provided by the user. See the
-- @'DiskPolicy'@ documentation for more information.
data NumOfDiskSnapshots =
DefaultNumOfDiskSnapshots
| RequestedNumOfDiskSnapshots Word
deriving stock (Eq, Generic, Show)

data DiskPolicyArgs = DiskPolicyArgs SnapshotInterval NumOfDiskSnapshots
-- | Type-safe flag to regulate the checksum policy of the ledger state snapshots.
--
-- These patterns are exposed to cardano-node and will be passed as part of @'DiskPolicy'@.
pattern DoDiskSnapshotChecksum, NoDoDiskSnapshotChecksum :: Flag "DoDiskSnapshotChecksum"
pattern DoDiskSnapshotChecksum = Flag True
pattern NoDoDiskSnapshotChecksum = Flag False

-- | The components used by cardano-node to construct a @'DiskPolicy'@.
data DiskPolicyArgs = DiskPolicyArgs SnapshotInterval NumOfDiskSnapshots (Flag "DoDiskSnapshotChecksum")
jasagredo marked this conversation as resolved.
Show resolved Hide resolved

-- | On-disk policy
--
Expand All @@ -67,7 +81,7 @@ data DiskPolicy = DiskPolicy {
-- the next snapshot, we delete the oldest one, leaving the middle
-- one available in case of truncation of the write. This is
-- probably a sane value in most circumstances.
onDiskNumSnapshots :: Word
onDiskNumSnapshots :: Word

-- | Should we write a snapshot of the ledger state to disk?
--
Expand All @@ -87,7 +101,11 @@ data DiskPolicy = DiskPolicy {
-- blocks had to be replayed.
--
-- See also 'mkDiskPolicy'
, onDiskShouldTakeSnapshot :: TimeSinceLast DiffTime -> Word64 -> Bool
, onDiskShouldTakeSnapshot :: TimeSinceLast DiffTime -> Word64 -> Bool

-- | Whether or not to checksum the ledger snapshots to detect data corruption on disk.
-- "yes" if @'DoDiskSnapshotChecksum'@; "no" if @'NoDoDiskSnapshotChecksum'@.
, onDiskShouldChecksumSnapshots :: Flag "DoDiskSnapshotChecksum"
}
deriving NoThunks via OnlyCheckWhnf DiskPolicy

Expand All @@ -97,10 +115,10 @@ data TimeSinceLast time = NoSnapshotTakenYet | TimeSinceLast time
-- | Default on-disk policy arguments suitable to use with cardano-node
--
defaultDiskPolicyArgs :: DiskPolicyArgs
defaultDiskPolicyArgs = DiskPolicyArgs DefaultSnapshotInterval DefaultNumOfDiskSnapshots
defaultDiskPolicyArgs = DiskPolicyArgs DefaultSnapshotInterval DefaultNumOfDiskSnapshots DoDiskSnapshotChecksum

mkDiskPolicy :: SecurityParam -> DiskPolicyArgs -> DiskPolicy
mkDiskPolicy (SecurityParam k) (DiskPolicyArgs reqInterval reqNumOfSnapshots) =
mkDiskPolicy (SecurityParam k) (DiskPolicyArgs reqInterval reqNumOfSnapshots onDiskShouldChecksumSnapshots) =
DiskPolicy {..}
where
onDiskNumSnapshots :: Word
Expand Down
Loading
Loading