From 44356eaffa786697a86872e2d812a1adc1f9356f Mon Sep 17 00:00:00 2001 From: Georgy Lukyanov Date: Wed, 4 Dec 2024 11:10:20 +0100 Subject: [PATCH 1/3] Integrate ledger snapshot checksum - Categorise `LedgerDB.SnapshotMissingChecksum` trace as `Warning` - Expose snapshot checksum switch in config file --- cabal.project | 13 ++++++++++++ .../src/Cardano/Node/Configuration/POM.hs | 20 ++++++++++++++----- cardano-node/src/Cardano/Node/Orphans.hs | 10 ++++++++-- cardano-node/src/Cardano/Node/Parsers.hs | 1 + cardano-node/src/Cardano/Node/Run.hs | 4 +++- .../Cardano/Node/Tracing/Tracers/ChainDB.hs | 11 ++++++++++ .../Tracing/OrphanInstances/Consensus.hs | 7 +++++++ cardano-node/test/Test/Cardano/Node/POM.hs | 6 +++++- 8 files changed, 63 insertions(+), 9 deletions(-) diff --git a/cabal.project b/cabal.project index 1a927bf6eaf..089c7e82eb1 100644 --- a/cabal.project +++ b/cabal.project @@ -68,3 +68,16 @@ allow-newer: -- IMPORTANT -- Do NOT add more source-repository-package stanzas here unless they are strictly -- temporary! Please read the section in CONTRIBUTING about updating dependencies. +source-repository-package + type: git + location: https://github.com/IntersectMBO/ouroboros-consensus + -- use branch geo2a/issue-892-checksum-snaphot-file-release-ouroboros-consensus-0.21.0.0-backport + tag: 0ff4c0445 + --sha256: xWBEq9kq2eUTlOnMBkSftH8NUUGbpwpamu1G1TgGETU= + subdir: + ouroboros-consensus + ouroboros-consensus-cardano + ouroboros-consensus-diffusion + ouroboros-consensus-protocol + sop-extras + strict-sop-core diff --git a/cardano-node/src/Cardano/Node/Configuration/POM.hs b/cardano-node/src/Cardano/Node/Configuration/POM.hs index 3ce3341e431..e5c1fab0f3e 100644 --- a/cardano-node/src/Cardano/Node/Configuration/POM.hs +++ b/cardano-node/src/Cardano/Node/Configuration/POM.hs @@ -1,7 +1,9 @@ +{-# LANGUAGE DataKinds #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -35,8 +37,8 @@ import Ouroboros.Consensus.Ledger.SupportsMempool import Ouroboros.Consensus.Mempool (MempoolCapacityBytesOverride (..)) import Ouroboros.Consensus.Node (NodeDatabasePaths (..)) import qualified Ouroboros.Consensus.Node as Consensus (NetworkP2PMode (..)) -import Ouroboros.Consensus.Storage.LedgerDB.DiskPolicy (NumOfDiskSnapshots (..), - SnapshotInterval (..)) +import Ouroboros.Consensus.Storage.LedgerDB.DiskPolicy (Flag (..), + NumOfDiskSnapshots (..), SnapshotInterval (..), pattern DoDiskSnapshotChecksum) import Ouroboros.Network.NodeToNode (AcceptedConnectionsLimit (..), DiffusionMode (..)) import Ouroboros.Network.PeerSelection.PeerSharing (PeerSharing (..)) @@ -103,9 +105,10 @@ data NodeConfiguration , ncProtocolConfig :: !NodeProtocolConfiguration -- Node parameters, not protocol-specific: - , ncDiffusionMode :: !DiffusionMode - , ncNumOfDiskSnapshots :: !NumOfDiskSnapshots - , ncSnapshotInterval :: !SnapshotInterval + , ncDiffusionMode :: !DiffusionMode + , ncNumOfDiskSnapshots :: !NumOfDiskSnapshots + , ncSnapshotInterval :: !SnapshotInterval + , ncDoDiskSnapshotChecksum :: !(Flag "DoDiskSnapshotChecksum") -- | During the development and integration of new network protocols -- (node-to-node and node-to-client) we wish to be able to test them @@ -188,6 +191,7 @@ data PartialNodeConfiguration , pncDiffusionMode :: !(Last DiffusionMode ) , pncNumOfDiskSnapshots :: !(Last NumOfDiskSnapshots) , pncSnapshotInterval :: !(Last SnapshotInterval) + , pncDoDiskSnapshotChecksum :: !(Last (Flag "DoDiskSnapshotChecksum")) , pncExperimentalProtocolsEnabled :: !(Last Bool) -- BlockFetch configuration @@ -250,6 +254,8 @@ instance FromJSON PartialNodeConfiguration where <- Last . fmap RequestedNumOfDiskSnapshots <$> v .:? "NumOfDiskSnapshots" pncSnapshotInterval <- Last . fmap RequestedSnapshotInterval <$> v .:? "SnapshotInterval" + pncDoDiskSnapshotChecksum + <- Last <$> v .:? "DoDiskSnapshotChecksum" pncExperimentalProtocolsEnabled <- fmap Last $ do mValue <- v .:? "ExperimentalProtocolsEnabled" @@ -329,6 +335,7 @@ instance FromJSON PartialNodeConfiguration where , pncDiffusionMode , pncNumOfDiskSnapshots , pncSnapshotInterval + , pncDoDiskSnapshotChecksum , pncExperimentalProtocolsEnabled , pncMaxConcurrencyBulkSync , pncMaxConcurrencyDeadline @@ -500,6 +507,7 @@ defaultPartialNodeConfiguration = , pncDiffusionMode = Last $ Just InitiatorAndResponderDiffusionMode , pncNumOfDiskSnapshots = Last $ Just DefaultNumOfDiskSnapshots , pncSnapshotInterval = Last $ Just DefaultSnapshotInterval + , pncDoDiskSnapshotChecksum = Last $ Just DoDiskSnapshotChecksum , pncExperimentalProtocolsEnabled = Last $ Just False , pncTopologyFile = Last . Just $ TopologyFile "configuration/cardano/mainnet-topology.json" , pncProtocolFiles = mempty @@ -552,6 +560,7 @@ makeNodeConfiguration pnc = do diffusionMode <- lastToEither "Missing DiffusionMode" $ pncDiffusionMode pnc numOfDiskSnapshots <- lastToEither "Missing NumOfDiskSnapshots" $ pncNumOfDiskSnapshots pnc snapshotInterval <- lastToEither "Missing SnapshotInterval" $ pncSnapshotInterval pnc + doDiskSnapshotChecksum <- lastToEither "Missing DoDiskSnapshotChecksum" $ pncDoDiskSnapshotChecksum pnc shutdownConfig <- lastToEither "Missing ShutdownConfig" $ pncShutdownConfig pnc socketConfig <- lastToEither "Missing SocketConfig" $ pncSocketConfig pnc @@ -621,6 +630,7 @@ makeNodeConfiguration pnc = do , ncDiffusionMode = diffusionMode , ncNumOfDiskSnapshots = numOfDiskSnapshots , ncSnapshotInterval = snapshotInterval + , ncDoDiskSnapshotChecksum = doDiskSnapshotChecksum , ncExperimentalProtocolsEnabled = experimentalProtocols , ncMaxConcurrencyBulkSync = getLast $ pncMaxConcurrencyBulkSync pnc , ncMaxConcurrencyDeadline = getLast $ pncMaxConcurrencyDeadline pnc diff --git a/cardano-node/src/Cardano/Node/Orphans.hs b/cardano-node/src/Cardano/Node/Orphans.hs index b0246e0f6e7..eefa6e7b84b 100644 --- a/cardano-node/src/Cardano/Node/Orphans.hs +++ b/cardano-node/src/Cardano/Node/Orphans.hs @@ -1,4 +1,6 @@ {-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE StandaloneDeriving #-} @@ -8,6 +10,7 @@ module Cardano.Node.Orphans () where import Cardano.Api () +import Ouroboros.Consensus.Storage.LedgerDB.DiskPolicy (Flag(..)) import Ouroboros.Consensus.Node import qualified Data.Text as Text import Ouroboros.Network.NodeToNode (AcceptedConnectionsLimit (..)) @@ -46,11 +49,14 @@ instance FromJSON AcceptedConnectionsLimit where <*> v .: "delay" instance FromJSON NodeDatabasePaths where - parseJSON o@(Object{})= - withObject "NodeDatabasePaths" + parseJSON o@(Object{})= + withObject "NodeDatabasePaths" (\v -> MultipleDbPaths <$> v .: "ImmutableDbPath" <*> v .: "VolatileDbPath" ) o parseJSON (String s) = return . OnePathForAllDbs $ Text.unpack s parseJSON _ = fail "NodeDatabasePaths must be an object or a string" + +deriving newtype instance FromJSON (Flag symbol) +deriving newtype instance ToJSON (Flag symbol) diff --git a/cardano-node/src/Cardano/Node/Parsers.hs b/cardano-node/src/Cardano/Node/Parsers.hs index d77bb991e28..92dbbcea06e 100644 --- a/cardano-node/src/Cardano/Node/Parsers.hs +++ b/cardano-node/src/Cardano/Node/Parsers.hs @@ -92,6 +92,7 @@ nodeRunParser = do , pncDiffusionMode = mempty , pncNumOfDiskSnapshots = numOfDiskSnapshots , pncSnapshotInterval = snapshotInterval + , pncDoDiskSnapshotChecksum = mempty , pncExperimentalProtocolsEnabled = mempty , pncProtocolFiles = Last $ Just ProtocolFilepaths { byronCertFile diff --git a/cardano-node/src/Cardano/Node/Run.hs b/cardano-node/src/Cardano/Node/Run.hs index 3b88274a99e..7e4f5f2e7ce 100644 --- a/cardano-node/src/Cardano/Node/Run.hs +++ b/cardano-node/src/Cardano/Node/Run.hs @@ -1,4 +1,5 @@ {-# LANGUAGE BangPatterns #-} +{-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE CPP #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE ExplicitNamespaces #-} @@ -63,7 +64,7 @@ import Cardano.Tracing.Config (TraceOptions (..), TraceSelection (..)) import Cardano.Tracing.Tracers import qualified Ouroboros.Consensus.Config as Consensus import Ouroboros.Consensus.Config.SupportsNode (ConfigSupportsNode (..)) -import Ouroboros.Consensus.Node (DiskPolicyArgs (..), NetworkP2PMode (..), +import Ouroboros.Consensus.Node (DiskPolicyArgs (..), pattern DoDiskSnapshotChecksum, pattern NoDoDiskSnapshotChecksum, NetworkP2PMode (..), NodeDatabasePaths (..), RunNodeArgs (..), StdRunNodeArgs (..)) import qualified Ouroboros.Consensus.Node as Node (NodeDatabasePaths (..), getChainDB, run) import Ouroboros.Consensus.Node.Genesis @@ -650,6 +651,7 @@ handleSimpleNode blockType runP p2pMode tracers nc onKernel = do DiskPolicyArgs (ncSnapshotInterval nc) (ncNumOfDiskSnapshots nc) + (ncDoDiskSnapshotChecksum nc) -------------------------------------------------------------------------------- -- SIGHUP Handlers diff --git a/cardano-node/src/Cardano/Node/Tracing/Tracers/ChainDB.hs b/cardano-node/src/Cardano/Node/Tracing/Tracers/ChainDB.hs index a0ecf536704..c6920498fdb 100644 --- a/cardano-node/src/Cardano/Node/Tracing/Tracers/ChainDB.hs +++ b/cardano-node/src/Cardano/Node/Tracing/Tracers/ChainDB.hs @@ -1522,6 +1522,8 @@ instance ( StandardHash blk " This is most likely an expected change in the serialization format," <> " which currently requires a chain replay" _ -> "" + forHuman (LedgerDB.SnapshotMissingChecksum snap) = + "Checksum file is missing for snapshot " <> showT snap forMachine dtals (LedgerDB.TookSnapshot snap pt enclosedTiming) = mconcat [ "kind" .= String "TookSnapshot" @@ -1535,15 +1537,21 @@ instance ( StandardHash blk mconcat [ "kind" .= String "InvalidSnapshot" , "snapshot" .= forMachine dtals snap , "failure" .= show failure ] + forMachine dtals (LedgerDB.SnapshotMissingChecksum snap) = + mconcat [ "kind" .= String "SnapshotMissingChecksum" + , "snapshot" .= forMachine dtals snap + ] instance MetaTrace (LedgerDB.TraceSnapshotEvent blk) where namespaceFor LedgerDB.TookSnapshot {} = Namespace [] ["TookSnapshot"] namespaceFor LedgerDB.DeletedSnapshot {} = Namespace [] ["DeletedSnapshot"] namespaceFor LedgerDB.InvalidSnapshot {} = Namespace [] ["InvalidSnapshot"] + namespaceFor LedgerDB.SnapshotMissingChecksum {} = Namespace [] ["SnapshotMissingChecksum"] severityFor (Namespace _ ["TookSnapshot"]) _ = Just Info severityFor (Namespace _ ["DeletedSnapshot"]) _ = Just Debug severityFor (Namespace _ ["InvalidSnapshot"]) _ = Just Error + severityFor (Namespace _ ["SnapshotMissingChecksum"]) _ = Just Warning severityFor _ _ = Nothing documentFor (Namespace _ ["TookSnapshot"]) = Just $ mconcat @@ -1555,12 +1563,15 @@ instance MetaTrace (LedgerDB.TraceSnapshotEvent blk) where "A snapshot was deleted from the disk." documentFor (Namespace _ ["InvalidSnapshot"]) = Just "An on disk snapshot was invalid. Unless it was suffixed, it will be deleted" + documentFor (Namespace _ ["SnapshotMissingChecksum"]) = Just + "Checksum file was missing for snapshot." documentFor _ = Nothing allNamespaces = [ Namespace [] ["TookSnapshot"] , Namespace [] ["DeletedSnapshot"] , Namespace [] ["InvalidSnapshot"] + , Namespace [] ["SnapshotMissingChecksum"] ] diff --git a/cardano-node/src/Cardano/Tracing/OrphanInstances/Consensus.hs b/cardano-node/src/Cardano/Tracing/OrphanInstances/Consensus.hs index 2a3c60f141b..f9ccdfeeab9 100644 --- a/cardano-node/src/Cardano/Tracing/OrphanInstances/Consensus.hs +++ b/cardano-node/src/Cardano/Tracing/OrphanInstances/Consensus.hs @@ -175,6 +175,7 @@ instance HasSeverityAnnotation (ChainDB.TraceEvent blk) where LedgerDB.TookSnapshot {} -> Info LedgerDB.DeletedSnapshot {} -> Debug LedgerDB.InvalidSnapshot {} -> Error + LedgerDB.SnapshotMissingChecksum {} -> Warning getSeverityAnnotation (ChainDB.TraceCopyToImmutableDBEvent ev) = case ev of ChainDB.CopiedBlockToImmutableDB {} -> Debug @@ -615,6 +616,8 @@ instance ( ConvertRawHash blk " This is most likely an expected change in the serialization format," <> " which currently requires a chain replay" _ -> "" + LedgerDB.SnapshotMissingChecksum snap -> + "Checksum file is missing for snapshot " <> showT snap LedgerDB.TookSnapshot snap pt RisingEdge -> "Taking ledger snapshot " <> showT snap <> @@ -1101,6 +1104,10 @@ instance ( ConvertRawHash blk mconcat [ "kind" .= String "TraceSnapshotEvent.InvalidSnapshot" , "snapshot" .= toObject verb snap , "failure" .= show failure ] + LedgerDB.SnapshotMissingChecksum snap -> + mconcat [ "kind" .= String "TraceSnapshotEvent.SnapshotMissingChecksum" + , "snapshot" .= toObject verb snap + ] toObject verb (ChainDB.TraceCopyToImmutableDBEvent ev) = case ev of ChainDB.CopiedBlockToImmutableDB pt -> diff --git a/cardano-node/test/Test/Cardano/Node/POM.hs b/cardano-node/test/Test/Cardano/Node/POM.hs index 2b2c416d704..877dbd344b0 100644 --- a/cardano-node/test/Test/Cardano/Node/POM.hs +++ b/cardano-node/test/Test/Cardano/Node/POM.hs @@ -1,4 +1,5 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE TemplateHaskell #-} module Test.Cardano.Node.POM @@ -15,7 +16,7 @@ import Cardano.Tracing.Config (PartialTraceOptions (..), defaultPartia import Ouroboros.Consensus.Node (NodeDatabasePaths (..)) import qualified Ouroboros.Consensus.Node as Consensus (NetworkP2PMode (..)) import Ouroboros.Consensus.Storage.LedgerDB.DiskPolicy (NumOfDiskSnapshots (..), - SnapshotInterval (..)) + SnapshotInterval (..), pattern DoDiskSnapshotChecksum) import Ouroboros.Network.Block (SlotNo (..)) import Ouroboros.Network.NodeToNode (AcceptedConnectionsLimit (..), DiffusionMode (InitiatorAndResponderDiffusionMode)) @@ -119,6 +120,7 @@ testPartialYamlConfig = , pncDiffusionMode = Last Nothing , pncNumOfDiskSnapshots = Last Nothing , pncSnapshotInterval = mempty + , pncDoDiskSnapshotChecksum = Last . Just $ DoDiskSnapshotChecksum , pncExperimentalProtocolsEnabled = Last Nothing , pncMaxConcurrencyBulkSync = Last Nothing , pncMaxConcurrencyDeadline = Last Nothing @@ -161,6 +163,7 @@ testPartialCliConfig = , pncDiffusionMode = mempty , pncNumOfDiskSnapshots = Last Nothing , pncSnapshotInterval = Last . Just . RequestedSnapshotInterval $ secondsToDiffTime 100 + , pncDoDiskSnapshotChecksum = Last . Just $ DoDiskSnapshotChecksum , pncExperimentalProtocolsEnabled = Last $ Just True , pncProtocolFiles = Last . Just $ ProtocolFilepaths Nothing Nothing Nothing Nothing Nothing Nothing , pncValidateDB = Last $ Just True @@ -205,6 +208,7 @@ eExpectedConfig = do , ncDiffusionMode = InitiatorAndResponderDiffusionMode , ncNumOfDiskSnapshots = DefaultNumOfDiskSnapshots , ncSnapshotInterval = RequestedSnapshotInterval $ secondsToDiffTime 100 + , ncDoDiskSnapshotChecksum = DoDiskSnapshotChecksum , ncExperimentalProtocolsEnabled = True , ncMaxConcurrencyBulkSync = Nothing , ncMaxConcurrencyDeadline = Nothing From 0417d03d2f7cdcaf6b4147d3c590ac6644a4e86a Mon Sep 17 00:00:00 2001 From: Georgy Lukyanov Date: Mon, 9 Dec 2024 16:27:04 +0100 Subject: [PATCH 2/3] Clean-up snapshot tracing display code Remember that `Text.unwords` exists --- .../Cardano/Node/Tracing/Tracers/ChainDB.hs | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/cardano-node/src/Cardano/Node/Tracing/Tracers/ChainDB.hs b/cardano-node/src/Cardano/Node/Tracing/Tracers/ChainDB.hs index c6920498fdb..9e49ebd000f 100644 --- a/cardano-node/src/Cardano/Node/Tracing/Tracers/ChainDB.hs +++ b/cardano-node/src/Cardano/Node/Tracing/Tracers/ChainDB.hs @@ -1507,15 +1507,27 @@ instance ( StandardHash blk , ConvertRawHash blk) => LogFormatting (LedgerDB.TraceSnapshotEvent blk) where forHuman (LedgerDB.TookSnapshot snap pt RisingEdge) = - "Taking ledger snapshot " <> showT snap <> - " at " <> renderRealPointAsPhrase pt + Text.unwords [ "Taking ledger snapshot" + , showT snap + , "at" + , renderRealPointAsPhrase pt + ] forHuman (LedgerDB.TookSnapshot snap pt (FallingEdgeWith t)) = - "Took ledger snapshot " <> showT snap <> - " at " <> renderRealPointAsPhrase pt <> ", duration: " <> showT t + Text.unwords [ "Took ledger snapshot" + , showT snap + , "at" + , renderRealPointAsPhrase pt + , ", duration:" + , showT t + ] forHuman (LedgerDB.DeletedSnapshot snap) = - "Deleted old snapshot " <> showT snap + Text.unwords ["Deleted old snapshot", showT snap] forHuman (LedgerDB.InvalidSnapshot snap failure) = - "Invalid snapshot " <> showT snap <> showT failure <> context + Text.unwords [ "Invalid snapshot" + , showT snap + , showT failure + , context + ] where context = case failure of LedgerDB.InitFailureRead{} -> From 58c5f1f1edb7735c6012e062ae56edc66f1f905b Mon Sep 17 00:00:00 2001 From: Georgy Lukyanov Date: Thu, 5 Dec 2024 17:43:08 +0100 Subject: [PATCH 3/3] Update s-r-p stanza --- cabal.project | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cabal.project b/cabal.project index 089c7e82eb1..20f4520330a 100644 --- a/cabal.project +++ b/cabal.project @@ -72,8 +72,8 @@ source-repository-package type: git location: https://github.com/IntersectMBO/ouroboros-consensus -- use branch geo2a/issue-892-checksum-snaphot-file-release-ouroboros-consensus-0.21.0.0-backport - tag: 0ff4c0445 - --sha256: xWBEq9kq2eUTlOnMBkSftH8NUUGbpwpamu1G1TgGETU= + tag: bc9f10c0a + --sha256: JGuQlFgW46mck545klipsiajhkr9A51JYXVRvmr0KYI= subdir: ouroboros-consensus ouroboros-consensus-cardano