Skip to content

Commit

Permalink
feat(e2e): keep minimal state for non-archive nodes (#1612)
Browse files Browse the repository at this point in the history
Archive nodes: Keep everything.
Anything else: Prune "everything", keep the bare minimum (Cosmos
enforces the necessary limits based on snapshots, state needed for
Byzantine behaviour detection, etc.).

This PR also adds archive nodes to our e2e test networks, in order to
support the monitor.

issue: #1546
  • Loading branch information
arajasek authored Jul 30, 2024
1 parent 17afe0f commit 1a1e406
Show file tree
Hide file tree
Showing 18 changed files with 80 additions and 33 deletions.
7 changes: 5 additions & 2 deletions e2e/app/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@ import (

func LogMetrics(ctx context.Context, def Definition) error {
extNetwork := networkFromDef(def)
archiveNode, ok := def.Testnet.ArchiveNode()
if !ok {
return errors.New("monitor must use archive node, no archive node found")
}

// Pick a random node to monitor.
if err := MonitorCProvider(ctx, def.Testnet.BroadcastNode(), extNetwork); err != nil {
if err := MonitorCProvider(ctx, archiveNode, extNetwork); err != nil {
return errors.Wrap(err, "monitoring cchain provider")
}

Expand Down
21 changes: 13 additions & 8 deletions e2e/app/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,10 @@ func Setup(ctx context.Context, def Definition, depCfg DeployConfig) error {
}

logCfg := logConfig(def)
if err := writeMonitorConfig(ctx, def, logCfg, valPrivKeys); err != nil {
return err
if def.Testnet.HasArchiveNode() {
if err := writeMonitorConfig(ctx, def, logCfg, valPrivKeys); err != nil {
return err
}
}

if err := writeRelayerConfig(ctx, def, logCfg); err != nil {
Expand Down Expand Up @@ -406,15 +408,13 @@ func writeHaloConfig(
cfg := halocfg.DefaultConfig()

switch mode {
case e2e.ModeValidator, e2e.ModeFull:
case types.ModeArchive:
cfg.PruningOption = "nothing"
// Setting this to 0 retains all blocks
cfg.MinRetainBlocks = 0
case e2e.ModeSeed, e2e.ModeLight:
cfg.PruningOption = "everything"
cfg.MinRetainBlocks = 1
default:
cfg.PruningOption = "default"
cfg.MinRetainBlocks = 0
cfg.MinRetainBlocks = 1
}

cfg.Network = network
Expand Down Expand Up @@ -546,10 +546,15 @@ func writeMonitorConfig(ctx context.Context, def Definition, logCfg log.Config,
}
}

archiveNode, ok := def.Testnet.ArchiveNode()
if !ok {
return errors.New("monitor must use archive node, no archive node found")
}

cfg := monapp.DefaultConfig()
cfg.PrivateKey = privKeyFile
cfg.Network = def.Testnet.Network
cfg.HaloURL = def.Testnet.BroadcastNode().AddressRPC()
cfg.HaloURL = archiveNode.AddressRPC()
cfg.LoadGen.ValidatorKeysGlob = validatorKeyGlob
cfg.RPCEndpoints = endpoints

Expand Down
5 changes: 4 additions & 1 deletion e2e/docker/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,10 @@ func additionalServices(testnet types.Testnet) []string {
resp = append(resp, "prometheus")
}

resp = append(resp, "monitor")
// Monitor must connect to an archive node.
if testnet.HasArchiveNode() {
resp = append(resp, "monitor")
}

// In monitor only mode, we only start monitor and prometheus.
if testnet.OnlyMonitor {
Expand Down
3 changes: 3 additions & 0 deletions e2e/manifests/ci.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,6 @@ full01 = 100 # Add full01 as validator by depositing 100 ether $OMNI (the minimu
validator02_evm = ["stopstart"]
relayer = ["restart"]
validator03 = ["rollback"]

[node.fullnode02]
mode = "archive"
3 changes: 3 additions & 0 deletions e2e/manifests/devnet1.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@ prometheus = true

[node.validator01]
[node.validator02]

[node.fullnode01]
mode="archive"
4 changes: 4 additions & 0 deletions e2e/manifests/fuzzyhead.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,7 @@ pingpong_n = 6 # Increased ping pong to span forks

[perturb]
mock_l1 = ["fuzzyhead_dropmsgs","fuzzyhead_dropblocks","fuzzyhead_attroot","fuzzyhead_moremsgs"]


[node.fullnode01]
mode = "archive"
7 changes: 7 additions & 0 deletions e2e/test/attestations_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"testing"

"github.com/omni-network/omni/e2e/types"
"github.com/omni-network/omni/lib/cchain"
"github.com/omni-network/omni/lib/cchain/provider"
"github.com/omni-network/omni/lib/k1util"
Expand All @@ -21,6 +22,12 @@ func TestApprovedAttestations(t *testing.T) {
t.Parallel()
testNode(t, func(t *testing.T, network netconf.Network, node *e2e.Node, portals []Portal) {
t.Helper()

// Only archive nodes have the necessary state to fetch all attestations
if node.Mode != types.ModeArchive {
return
}

client, err := node.Client()
require.NoError(t, err)
cprov := provider.NewABCIProvider(client, network.ID, netconf.ChainVersionNamer(netconf.Simnet))
Expand Down
23 changes: 23 additions & 0 deletions e2e/types/testnet.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,29 @@ func (t Testnet) BroadcastNode() *e2e.Node {
return t.Nodes[0]
}

// HasArchiveNode returns whether the Testnet has any nodes running in ModeArchive.
func (t Testnet) HasArchiveNode() bool {
for _, node := range t.Nodes {
if node.Mode == ModeArchive {
return true
}
}

return false
}

// ArchiveNode returns the first node running in ModeArchive.
// Note that this is different from the CometBFT Testnet.ArchiveNodes() method.
func (t Testnet) ArchiveNode() (*e2e.Node, bool) {
for _, node := range t.Nodes {
if node.Mode == ModeArchive {
return node, true
}
}

return nil, false
}

// HasPerturbations returns whether the network has any perturbations.
func (t Testnet) HasPerturbations() bool {
if len(t.Perturb) > 0 {
Expand Down
3 changes: 0 additions & 3 deletions halo/app/app_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,6 @@ const (
genesisVoteExtLimit = 256
genesisTrimLag = 1 // Delete attestations state after each epoch, only storing the very latest attestations.
genesisCTrimLag = 72_000 // Delete consensus attestations state after +-1 day (given a period of 1.2s).

defaultPruningKeep = 72_000 // Keep 1 day's of application state by default (given period of 1.2s).
defaultPruningInterval = 300 // Prune every 5 minutes or so.
)

// init initializes the Cosmos SDK configuration.
Expand Down
5 changes: 2 additions & 3 deletions halo/app/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,9 +258,8 @@ func makeBaseAppOpts(cfg Config) ([]func(*baseapp.BaseApp), error) {

pruneOpts := pruningtypes.NewPruningOptionsFromString(cfg.PruningOption)
if cfg.PruningOption == pruningtypes.PruningOptionDefault {
// Override the default cosmosSDK pruning values with much more aggressive defaults
// since historical state isn't very important for most use-cases.
pruneOpts = pruningtypes.NewCustomPruningOptions(defaultPruningKeep, defaultPruningInterval)
// We interpret "default" to be PruningEverything, since historical state isn't very important.
pruneOpts = pruningtypes.NewPruningOptions(pruningtypes.PruningEverything)
}

return []func(*baseapp.BaseApp){
Expand Down
4 changes: 2 additions & 2 deletions halo/cmd/testdata/TestCLIReference_rollback.golden
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ Flags:
--log-color string Log color (only applicable to console format); auto, force, disable (default "auto")
--log-format string Log format; console, json (default "console")
--log-level string Log level; debug, info, warn, error (default "info")
--min-retain-blocks uint Minimum block height offset during ABCI commit to prune CometBFT blocks
--min-retain-blocks uint Minimum block height offset during ABCI commit to prune CometBFT blocks (default 1)
--network string Omni network to participate in: mainnet, testnet, devnet
--pruning string Pruning strategy (default|nothing|everything) (default "nothing")
--pruning string Pruning strategy (default|nothing|everything) (default "default")
--snapshot-interval uint State sync snapshot interval (default 1000)
--snapshot-keep-recent uint State sync snapshot to keep (default 2)
--tracing-endpoint string Tracing OTLP endpoint
Expand Down
4 changes: 2 additions & 2 deletions halo/cmd/testdata/TestCLIReference_run.golden
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ Flags:
--log-color string Log color (only applicable to console format); auto, force, disable (default "auto")
--log-format string Log format; console, json (default "console")
--log-level string Log level; debug, info, warn, error (default "info")
--min-retain-blocks uint Minimum block height offset during ABCI commit to prune CometBFT blocks
--min-retain-blocks uint Minimum block height offset during ABCI commit to prune CometBFT blocks (default 1)
--network string Omni network to participate in: mainnet, testnet, devnet
--pruning string Pruning strategy (default|nothing|everything) (default "nothing")
--pruning string Pruning strategy (default|nothing|everything) (default "default")
--snapshot-interval uint State sync snapshot interval (default 1000)
--snapshot-keep-recent uint State sync snapshot to keep (default 2)
--tracing-endpoint string Tracing OTLP endpoint
Expand Down
4 changes: 2 additions & 2 deletions halo/cmd/testdata/TestRunCmd_defaults.golden
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
"SnapshotInterval": 1000,
"SnapshotKeepRecent": 2,
"BackendType": "goleveldb",
"MinRetainBlocks": 0,
"PruningOption": "nothing",
"MinRetainBlocks": 1,
"PruningOption": "default",
"EVMBuildDelay": 600000000,
"EVMBuildOptimistic": true,
"Tracer": {
Expand Down
4 changes: 2 additions & 2 deletions halo/cmd/testdata/TestRunCmd_flags.golden
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
"SnapshotInterval": 1000,
"SnapshotKeepRecent": 2,
"BackendType": "goleveldb",
"MinRetainBlocks": 0,
"PruningOption": "nothing",
"MinRetainBlocks": 1,
"PruningOption": "default",
"EVMBuildDelay": 600000000,
"EVMBuildOptimistic": true,
"Tracer": {
Expand Down
4 changes: 2 additions & 2 deletions halo/cmd/testdata/TestRunCmd_json_files.golden
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
"SnapshotInterval": 123,
"SnapshotKeepRecent": 2,
"BackendType": "goleveldb",
"MinRetainBlocks": 0,
"PruningOption": "nothing",
"MinRetainBlocks": 1,
"PruningOption": "default",
"EVMBuildDelay": 600000000,
"EVMBuildOptimistic": true,
"Tracer": {
Expand Down
4 changes: 2 additions & 2 deletions halo/cmd/testdata/TestRunCmd_toml_files.golden
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
"SnapshotInterval": 999,
"SnapshotKeepRecent": 2,
"BackendType": "goleveldb",
"MinRetainBlocks": 0,
"PruningOption": "nothing",
"MinRetainBlocks": 1,
"PruningOption": "default",
"EVMBuildDelay": 600000000,
"EVMBuildOptimistic": true,
"Tracer": {
Expand Down
4 changes: 2 additions & 2 deletions halo/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ const (
DefaultHomeDir = "./halo" // Defaults to "halo" in current directory
defaultSnapshotInterval = 1000 // Roughly once an hour (given 3s blocks)
defaultSnapshotKeepRecent = 2
defaultMinRetainBlocks = 0 // Retain all blocks
defaultMinRetainBlocks = 1 // Prune all blocks by default, Cosmsos will still respect other needs like snapshots

defaultPruningOption = pruningtypes.PruningOptionNothing // Prune nothing
defaultPruningOption = pruningtypes.PruningOptionDefault // Note that Halo interprets this to be PruningEverything
defaultDBBackend = db.GoLevelDBBackend
defaultEVMBuildDelay = time.Millisecond * 600 // 100ms longer than geth's --miner.recommit=500ms.
defaultEVMBuildOptimistic = true
Expand Down
4 changes: 2 additions & 2 deletions halo/config/testdata/default_halo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,12 @@ snapshot-keep-recent = 2
# with the unbonding (safety threshold) period, state pruning and state sync
# snapshot parameters to determine the correct minimum value of
# ResponseCommit.RetainHeight.
min-retain-blocks = 0
min-retain-blocks = 1

# default: the last 362880 states are kept, pruning at 10 block intervals
# nothing: all historic states will be saved, nothing will be deleted (i.e. archiving node)
# everything: 2 latest states will be kept; pruning at 10 block intervals.
pruning = "nothing"
pruning = "default"

# AppDBBackend defines the database backend type to use for the application and snapshots DBs.
# An empty string indicates that a fallback will be used.
Expand Down

0 comments on commit 1a1e406

Please sign in to comment.