diff --git a/core/chains/evm/chain.go b/core/chains/evm/chain.go index b4986ad0c25..2b6ee55474a 100644 --- a/core/chains/evm/chain.go +++ b/core/chains/evm/chain.go @@ -484,7 +484,7 @@ func newEthClientFromChain(cfg evmconfig.NodePool, noNewHeadsThreshold time.Dura primaries = append(primaries, primary) } } - return evmclient.NewClientWithNodes(lggr, cfg.SelectionMode(), noNewHeadsThreshold, primaries, sendonlys, chainID, chainType) + return evmclient.NewClientWithNodes(lggr, cfg.SelectionMode(), cfg.LeaseDuration(), noNewHeadsThreshold, primaries, sendonlys, chainID, chainType) } func newPrimary(cfg evmconfig.NodePool, noNewHeadsThreshold time.Duration, lggr logger.Logger, n *toml.Node, id int32, chainID *big.Int) (evmclient.Node, error) { diff --git a/core/chains/evm/client/client.go b/core/chains/evm/client/client.go index 339542f4ce1..3a3b8b23a92 100644 --- a/core/chains/evm/client/client.go +++ b/core/chains/evm/client/client.go @@ -108,8 +108,8 @@ var _ htrktypes.Client[*evmtypes.Head, ethereum.Subscription, *big.Int, common.H // NewClientWithNodes instantiates a client from a list of nodes // Currently only supports one primary -func NewClientWithNodes(logger logger.Logger, selectionMode string, noNewHeadsThreshold time.Duration, primaryNodes []Node, sendOnlyNodes []SendOnlyNode, chainID *big.Int, chainType config.ChainType) (*client, error) { - pool := NewPool(logger, selectionMode, noNewHeadsThreshold, primaryNodes, sendOnlyNodes, chainID, chainType) +func NewClientWithNodes(logger logger.Logger, selectionMode string, leaseDuration time.Duration, noNewHeadsThreshold time.Duration, primaryNodes []Node, sendOnlyNodes []SendOnlyNode, chainID *big.Int, chainType config.ChainType) (*client, error) { + pool := NewPool(logger, selectionMode, leaseDuration, noNewHeadsThreshold, primaryNodes, sendOnlyNodes, chainID, chainType) return &client{ logger: logger, pool: pool, diff --git a/core/chains/evm/client/erroring_node.go b/core/chains/evm/client/erroring_node.go index 152b52a7acb..21c4d269ea4 100644 --- a/core/chains/evm/client/erroring_node.go +++ b/core/chains/evm/client/erroring_node.go @@ -20,6 +20,12 @@ type erroringNode struct { errMsg string } +func (e *erroringNode) UnsubscribeAllExceptAliveLoop() {} + +func (e *erroringNode) SubscribersCount() int32 { + return 0 +} + func (e *erroringNode) ChainID() (chainID *big.Int) { return nil } func (e *erroringNode) Start(ctx context.Context) error { return errors.New(e.errMsg) } diff --git a/core/chains/evm/client/helpers_test.go b/core/chains/evm/client/helpers_test.go index 8a660eb38db..342a9143432 100644 --- a/core/chains/evm/client/helpers_test.go +++ b/core/chains/evm/client/helpers_test.go @@ -19,12 +19,16 @@ type TestNodePoolConfig struct { NodePollInterval time.Duration NodeSelectionMode string NodeSyncThreshold uint32 + NodeLeaseDuration time.Duration } func (tc TestNodePoolConfig) PollFailureThreshold() uint32 { return tc.NodePollFailureThreshold } func (tc TestNodePoolConfig) PollInterval() time.Duration { return tc.NodePollInterval } func (tc TestNodePoolConfig) SelectionMode() string { return tc.NodeSelectionMode } func (tc TestNodePoolConfig) SyncThreshold() uint32 { return tc.NodeSyncThreshold } +func (tc TestNodePoolConfig) LeaseDuration() time.Duration { + return tc.NodeLeaseDuration +} func NewClientWithTestNode(t *testing.T, nodePoolCfg config.NodePool, noNewHeadsThreshold time.Duration, rpcUrl string, rpcHTTPURL *url.URL, sendonlyRPCURLs []url.URL, id int32, chainID *big.Int) (*client, error) { parsed, err := url.ParseRequestURI(rpcUrl) @@ -50,7 +54,7 @@ func NewClientWithTestNode(t *testing.T, nodePoolCfg config.NodePool, noNewHeads sendonlys = append(sendonlys, s) } - pool := NewPool(lggr, nodePoolCfg.SelectionMode(), noNewHeadsThreshold, primaries, sendonlys, chainID, "") + pool := NewPool(lggr, nodePoolCfg.SelectionMode(), nodePoolCfg.LeaseDuration(), noNewHeadsThreshold, primaries, sendonlys, chainID, "") c := &client{logger: lggr, pool: pool} t.Cleanup(c.Close) return c, nil diff --git a/core/chains/evm/client/node.go b/core/chains/evm/client/node.go index 84344a5a076..4f7132a6cc4 100644 --- a/core/chains/evm/client/node.go +++ b/core/chains/evm/client/node.go @@ -94,6 +94,8 @@ type Node interface { Name() string ChainID() *big.Int Order() int32 + SubscribersCount() int32 + UnsubscribeAllExceptAliveLoop() CallContext(ctx context.Context, result interface{}, method string, args ...interface{}) error BatchCallContext(ctx context.Context, b []rpc.BatchElem) error @@ -153,6 +155,9 @@ type node struct { // close the underlying subscription subs []ethereum.Subscription + // Need to track the aliveLoop subscription, so we do not cancel it when checking lease + aliveLoopSub ethereum.Subscription + // chStopInFlight can be closed to immediately cancel all in-flight requests on // this node. Closing and replacing should be serialized through // stateMu since it can happen on state transitions as well as node Close. @@ -380,6 +385,26 @@ func (n *node) disconnectAll() { n.unsubscribeAll() } +// SubscribersCount returns the number of client subscribed to the node +func (n *node) SubscribersCount() int32 { + n.stateMu.RLock() + defer n.stateMu.RUnlock() + return int32(len(n.subs)) +} + +// UnsubscribeAllExceptAliveLoop disconnects all subscriptions to the node except the alive loop subscription +// while holding the n.stateMu lock +func (n *node) UnsubscribeAllExceptAliveLoop() { + n.stateMu.Lock() + defer n.stateMu.Unlock() + + for _, s := range n.subs { + if s != n.aliveLoopSub { + s.Unsubscribe() + } + } +} + // cancelInflightRequests closes and replaces the chStopInFlight // WARNING: NOT THREAD-SAFE // This must be called from within the n.stateMu lock diff --git a/core/chains/evm/client/pool.go b/core/chains/evm/client/pool.go index f9dca7e9cf8..7e4667623de 100644 --- a/core/chains/evm/client/pool.go +++ b/core/chains/evm/client/pool.go @@ -51,6 +51,7 @@ type NodeSelector interface { type PoolConfig interface { NodeSelectionMode() string NodeNoNewHeadsThreshold() time.Duration + LeaseDuration() time.Duration } // Pool represents an abstraction over one or more primary nodes @@ -65,6 +66,8 @@ type Pool struct { selectionMode string noNewHeadsThreshold time.Duration nodeSelector NodeSelector + leaseDuration time.Duration + leaseTicker *time.Ticker activeMu sync.RWMutex activeNode Node @@ -73,7 +76,7 @@ type Pool struct { wg sync.WaitGroup } -func NewPool(logger logger.Logger, selectionMode string, noNewHeadsTreshold time.Duration, nodes []Node, sendonlys []SendOnlyNode, chainID *big.Int, chainType config.ChainType) *Pool { +func NewPool(logger logger.Logger, selectionMode string, leaseDuration time.Duration, noNewHeadsTreshold time.Duration, nodes []Node, sendonlys []SendOnlyNode, chainID *big.Int, chainType config.ChainType) *Pool { if chainID == nil { panic("chainID is required") } @@ -105,6 +108,7 @@ func NewPool(logger logger.Logger, selectionMode string, noNewHeadsTreshold time noNewHeadsThreshold: noNewHeadsTreshold, nodeSelector: nodeSelector, chStop: make(chan struct{}), + leaseDuration: leaseDuration, } p.logger.Debugf("The pool is configured to use NodeSelectionMode: %s", selectionMode) @@ -150,6 +154,14 @@ func (p *Pool) Dial(ctx context.Context) error { p.wg.Add(1) go p.runLoop() + if p.leaseDuration.Seconds() > 0 && p.selectionMode != NodeSelectionMode_RoundRobin { + p.logger.Infof("The pool will switch to best node every %s", p.leaseDuration.String()) + p.wg.Add(1) + go p.checkLeaseLoop() + } else { + p.logger.Info("Best node switching is disabled") + } + return nil }) } @@ -172,6 +184,39 @@ func (p *Pool) nLiveNodes() (nLiveNodes int, blockNumber int64, totalDifficulty return } +func (p *Pool) checkLease() { + bestNode := p.nodeSelector.Select() + for _, n := range p.nodes { + // Terminate client subscriptions. Services are responsible for reconnecting, which will be routed to the new + // best node. Only terminate connections with more than 1 subscription to account for the aliveLoop subscription + if n.State() == NodeStateAlive && n != bestNode && n.SubscribersCount() > 1 { + p.logger.Infof("Switching to best node from %q to %q", n.String(), bestNode.String()) + n.UnsubscribeAllExceptAliveLoop() + } + } + + if bestNode != p.activeNode { + p.activeMu.Lock() + p.activeNode = bestNode + p.activeMu.Unlock() + } +} + +func (p *Pool) checkLeaseLoop() { + defer p.wg.Done() + p.leaseTicker = time.NewTicker(p.leaseDuration) + defer p.leaseTicker.Stop() + + for { + select { + case <-p.leaseTicker.C: + p.checkLease() + case <-p.chStop: + return + } + } +} + func (p *Pool) runLoop() { defer p.wg.Done() @@ -271,6 +316,9 @@ func (p *Pool) selectNode() (node Node) { return &erroringNode{errMsg: errmsg.Error()} } + if p.leaseTicker != nil { + p.leaseTicker.Reset(p.leaseDuration) + } return p.activeNode } @@ -317,7 +365,7 @@ func (p *Pool) BatchCallContextAll(ctx context.Context, b []rpc.BatchElem) error return main.BatchCallContext(ctx, b) } -// Wrapped Geth client methods +// SendTransaction wrapped Geth client methods func (p *Pool) SendTransaction(ctx context.Context, tx *types.Transaction) error { main := p.selectNode() var all []SendOnlyNode diff --git a/core/chains/evm/client/pool_test.go b/core/chains/evm/client/pool_test.go index 00c42597c36..15a6484756d 100644 --- a/core/chains/evm/client/pool_test.go +++ b/core/chains/evm/client/pool_test.go @@ -5,6 +5,7 @@ import ( "math/big" "net/http/httptest" "net/url" + "sync" "testing" "time" @@ -27,6 +28,7 @@ import ( type poolConfig struct { selectionMode string noNewHeadsThreshold time.Duration + leaseDuration time.Duration } func (c poolConfig) NodeSelectionMode() string { @@ -37,9 +39,14 @@ func (c poolConfig) NodeNoNewHeadsThreshold() time.Duration { return c.noNewHeadsThreshold } +func (c poolConfig) LeaseDuration() time.Duration { + return c.leaseDuration +} + var defaultConfig evmclient.PoolConfig = &poolConfig{ selectionMode: evmclient.NodeSelectionMode_RoundRobin, noNewHeadsThreshold: 0, + leaseDuration: time.Second * 0, } func TestPool_Dial(t *testing.T) { @@ -157,7 +164,7 @@ func TestPool_Dial(t *testing.T) { for i, n := range test.sendNodes { sendNodes[i] = n.newSendOnlyNode(t, test.sendNodeChainID) } - p := evmclient.NewPool(logger.TestLogger(t), defaultConfig.NodeSelectionMode(), time.Second*0, nodes, sendNodes, test.poolChainID, "") + p := evmclient.NewPool(logger.TestLogger(t), defaultConfig.NodeSelectionMode(), defaultConfig.LeaseDuration(), time.Second*0, nodes, sendNodes, test.poolChainID, "") err := p.Dial(ctx) if err == nil { t.Cleanup(func() { assert.NoError(t, p.Close()) }) @@ -250,7 +257,7 @@ func TestUnit_Pool_RunLoop(t *testing.T) { nodes := []evmclient.Node{n1, n2, n3} lggr, observedLogs := logger.TestLoggerObserved(t, zap.ErrorLevel) - p := evmclient.NewPool(lggr, defaultConfig.NodeSelectionMode(), time.Second*0, nodes, []evmclient.SendOnlyNode{}, &cltest.FixtureChainID, "") + p := evmclient.NewPool(lggr, defaultConfig.NodeSelectionMode(), defaultConfig.LeaseDuration(), time.Second*0, nodes, []evmclient.SendOnlyNode{}, &cltest.FixtureChainID, "") n1.On("String").Maybe().Return("n1") n2.On("String").Maybe().Return("n2") @@ -324,9 +331,66 @@ func TestUnit_Pool_BatchCallContextAll(t *testing.T) { sendonlys = append(sendonlys, s) } - p := evmclient.NewPool(logger.TestLogger(t), defaultConfig.NodeSelectionMode(), time.Second*0, nodes, sendonlys, &cltest.FixtureChainID, "") + p := evmclient.NewPool(logger.TestLogger(t), defaultConfig.NodeSelectionMode(), defaultConfig.LeaseDuration(), time.Second*0, nodes, sendonlys, &cltest.FixtureChainID, "") assert.True(t, p.ChainType().IsValid()) assert.False(t, p.ChainType().IsL2()) require.NoError(t, p.BatchCallContextAll(ctx, b)) } + +func TestUnit_Pool_LeaseDuration(t *testing.T) { + t.Parallel() + + n1 := evmmocks.NewNode(t) + n2 := evmmocks.NewNode(t) + nodes := []evmclient.Node{n1, n2} + type nodeStateSwitch struct { + isAlive bool + mu sync.RWMutex + } + + nodeSwitch := nodeStateSwitch{ + isAlive: true, + mu: sync.RWMutex{}, + } + + n1.On("String").Maybe().Return("n1") + n2.On("String").Maybe().Return("n2") + n1.On("Close").Maybe().Return(nil) + n2.On("Close").Maybe().Return(nil) + n2.On("UnsubscribeAllExceptAliveLoop").Return() + n2.On("SubscribersCount").Return(int32(2)) + + n1.On("Start", mock.Anything).Return(nil).Once() + n1.On("State").Return(func() evmclient.NodeState { + nodeSwitch.mu.RLock() + defer nodeSwitch.mu.RUnlock() + if nodeSwitch.isAlive { + return evmclient.NodeStateAlive + } + return evmclient.NodeStateOutOfSync + }) + n1.On("Order").Return(int32(1)) + n1.On("ChainID").Return(testutils.FixtureChainID).Once() + + n2.On("Start", mock.Anything).Return(nil).Once() + n2.On("State").Return(evmclient.NodeStateAlive) + n2.On("Order").Return(int32(2)) + n2.On("ChainID").Return(testutils.FixtureChainID).Once() + + lggr, observedLogs := logger.TestLoggerObserved(t, zap.InfoLevel) + p := evmclient.NewPool(lggr, "PriorityLevel", time.Second*2, time.Second*0, nodes, []evmclient.SendOnlyNode{}, &cltest.FixtureChainID, "") + require.NoError(t, p.Dial(testutils.Context(t))) + t.Cleanup(func() { assert.NoError(t, p.Close()) }) + + testutils.WaitForLogMessage(t, observedLogs, "The pool will switch to best node every 2s") + nodeSwitch.mu.Lock() + nodeSwitch.isAlive = false + nodeSwitch.mu.Unlock() + testutils.WaitForLogMessage(t, observedLogs, "At least one EVM primary node is dead") + nodeSwitch.mu.Lock() + nodeSwitch.isAlive = true + nodeSwitch.mu.Unlock() + testutils.WaitForLogMessage(t, observedLogs, `Switching to best node from "n2" to "n1"`) + +} diff --git a/core/chains/evm/config/chain_scoped_node_pool.go b/core/chains/evm/config/chain_scoped_node_pool.go index 2f26aaab0c7..8244d620a53 100644 --- a/core/chains/evm/config/chain_scoped_node_pool.go +++ b/core/chains/evm/config/chain_scoped_node_pool.go @@ -25,3 +25,7 @@ func (n *nodePoolConfig) SelectionMode() string { func (n *nodePoolConfig) SyncThreshold() uint32 { return *n.c.SyncThreshold } + +func (n *nodePoolConfig) LeaseDuration() time.Duration { + return n.c.LeaseDuration.Duration() +} diff --git a/core/chains/evm/config/config.go b/core/chains/evm/config/config.go index 18c075dc24a..f8ec030969e 100644 --- a/core/chains/evm/config/config.go +++ b/core/chains/evm/config/config.go @@ -125,6 +125,7 @@ type NodePool interface { PollInterval() time.Duration SelectionMode() string SyncThreshold() uint32 + LeaseDuration() time.Duration } // TODO BCF-2509 does the chainscopedconfig really need the entire app config? diff --git a/core/chains/evm/config/toml/config.go b/core/chains/evm/config/toml/config.go index a62c554a21e..8097f752dc5 100644 --- a/core/chains/evm/config/toml/config.go +++ b/core/chains/evm/config/toml/config.go @@ -689,6 +689,7 @@ type NodePool struct { PollInterval *models.Duration SelectionMode *string SyncThreshold *uint32 + LeaseDuration *models.Duration } func (p *NodePool) setFrom(f *NodePool) { @@ -704,6 +705,9 @@ func (p *NodePool) setFrom(f *NodePool) { if v := f.SyncThreshold; v != nil { p.SyncThreshold = v } + if v := f.LeaseDuration; v != nil { + p.LeaseDuration = v + } } type OCR struct { diff --git a/core/chains/evm/config/toml/defaults/fallback.toml b/core/chains/evm/config/toml/defaults/fallback.toml index a0c4f9b4c6b..a75cfa0bf3b 100644 --- a/core/chains/evm/config/toml/defaults/fallback.toml +++ b/core/chains/evm/config/toml/defaults/fallback.toml @@ -58,6 +58,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [OCR] ContractConfirmations = 4 diff --git a/core/chains/evm/mocks/node.go b/core/chains/evm/mocks/node.go index 9470bd39387..f993ff6e8f1 100644 --- a/core/chains/evm/mocks/node.go +++ b/core/chains/evm/mocks/node.go @@ -591,6 +591,20 @@ func (_m *Node) SubscribeFilterLogs(ctx context.Context, q ethereum.FilterQuery, return r0, r1 } +// SubscribersCount provides a mock function with given fields: +func (_m *Node) SubscribersCount() int32 { + ret := _m.Called() + + var r0 int32 + if rf, ok := ret.Get(0).(func() int32); ok { + r0 = rf() + } else { + r0 = ret.Get(0).(int32) + } + + return r0 +} + // SuggestGasPrice provides a mock function with given fields: ctx func (_m *Node) SuggestGasPrice(ctx context.Context) (*big.Int, error) { ret := _m.Called(ctx) @@ -695,6 +709,11 @@ func (_m *Node) TransactionReceipt(ctx context.Context, txHash common.Hash) (*ty return r0, r1 } +// UnsubscribeAllExceptAliveLoop provides a mock function with given fields: +func (_m *Node) UnsubscribeAllExceptAliveLoop() { + _m.Called() +} + type mockConstructorTestingTNewNode interface { mock.TestingT Cleanup(func()) diff --git a/core/config/docs/chains-evm.toml b/core/config/docs/chains-evm.toml index 7517eff61be..c8b5395d6d7 100644 --- a/core/config/docs/chains-evm.toml +++ b/core/config/docs/chains-evm.toml @@ -311,6 +311,7 @@ PollInterval = '10s' # Default # SelectionMode controls node selection strategy: # - HighestHead: use the node with the highest head number # - RoundRobin: rotate through nodes, per-request +# - PriorityLevel: use the node with the smallest order number # - TotalDifficulty: use the node with the greatest total difficulty SelectionMode = 'HighestHead' # Default # SyncThreshold controls how far a node may lag behind the best node before being marked out-of-sync. @@ -318,6 +319,13 @@ SelectionMode = 'HighestHead' # Default # # Set to 0 to disable this check. SyncThreshold = 5 # Default +# LeaseDuration is the minimum duration that the selected "best" node (as defined by SelectionMode) will be used, +# before switching to a better one if available. It also controls how often the lease check is done. +# Setting this to a low value (under 1m) might cause RPC to switch too aggressively. +# Recommended value is over 5m +# +# Set to '0s' to disable +LeaseDuration = '0s' # Default [EVM.OCR] # ContractConfirmations sets `OCR.ContractConfirmations` for this EVM chain. diff --git a/core/services/chainlink/config_test.go b/core/services/chainlink/config_test.go index 480d06b5806..46b7b97cedd 100644 --- a/core/services/chainlink/config_test.go +++ b/core/services/chainlink/config_test.go @@ -191,6 +191,7 @@ var ( ) func TestConfig_Marshal(t *testing.T) { + zeroSeconds := models.MustMakeDuration(time.Second * 0) second := models.MustMakeDuration(time.Second) minute := models.MustMakeDuration(time.Minute) hour := models.MustMakeDuration(time.Hour) @@ -528,6 +529,7 @@ func TestConfig_Marshal(t *testing.T) { PollInterval: &minute, SelectionMode: &selectionMode, SyncThreshold: ptr[uint32](13), + LeaseDuration: &zeroSeconds, }, OCR: evmcfg.OCR{ ContractConfirmations: ptr[uint16](11), @@ -926,6 +928,7 @@ PollFailureThreshold = 5 PollInterval = '1m0s' SelectionMode = 'HighestHead' SyncThreshold = 13 +LeaseDuration = '0s' [EVM.OCR] ContractConfirmations = 11 diff --git a/core/services/chainlink/testdata/config-full.toml b/core/services/chainlink/testdata/config-full.toml index 92d0b553d6e..c919d766ead 100644 --- a/core/services/chainlink/testdata/config-full.toml +++ b/core/services/chainlink/testdata/config-full.toml @@ -290,6 +290,7 @@ PollFailureThreshold = 5 PollInterval = '1m0s' SelectionMode = 'HighestHead' SyncThreshold = 13 +LeaseDuration = '0s' [EVM.OCR] ContractConfirmations = 11 diff --git a/core/services/chainlink/testdata/config-multi-chain-effective.toml b/core/services/chainlink/testdata/config-multi-chain-effective.toml index 665de9be8cb..63a37101305 100644 --- a/core/services/chainlink/testdata/config-multi-chain-effective.toml +++ b/core/services/chainlink/testdata/config-multi-chain-effective.toml @@ -271,6 +271,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [EVM.OCR] ContractConfirmations = 4 @@ -355,6 +356,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [EVM.OCR] ContractConfirmations = 4 @@ -433,6 +435,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 10 +LeaseDuration = '0s' [EVM.OCR] ContractConfirmations = 4 diff --git a/core/web/resolver/testdata/config-full.toml b/core/web/resolver/testdata/config-full.toml index ff7eb832c9c..35d338224bd 100644 --- a/core/web/resolver/testdata/config-full.toml +++ b/core/web/resolver/testdata/config-full.toml @@ -289,6 +289,7 @@ PollFailureThreshold = 5 PollInterval = '1m0s' SelectionMode = 'HighestHead' SyncThreshold = 13 +LeaseDuration = '0s' [EVM.OCR] ContractConfirmations = 11 diff --git a/core/web/resolver/testdata/config-multi-chain-effective.toml b/core/web/resolver/testdata/config-multi-chain-effective.toml index 665de9be8cb..63a37101305 100644 --- a/core/web/resolver/testdata/config-multi-chain-effective.toml +++ b/core/web/resolver/testdata/config-multi-chain-effective.toml @@ -271,6 +271,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [EVM.OCR] ContractConfirmations = 4 @@ -355,6 +356,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [EVM.OCR] ContractConfirmations = 4 @@ -433,6 +435,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 10 +LeaseDuration = '0s' [EVM.OCR] ContractConfirmations = 4 diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 64bace19935..724c95af6da 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Simple password use in production builds is now disallowed - nodes with this configuration will not boot and will not pass config validation. - Helper migrations function for injecting env vars into goose migrations. This was done to inject chainID into evm chain id not null in specs migrations. - OCR2 jobs now support querying the state contract for configurations if it has been deployed. This can help on chains such as BSC which "manage" state bloat by arbitrarily deleting logs older than a certain date. In this case, if logs are missing we will query the contract directly and retrieve the latest config from chain state. Chainlink will perform no extra RPC calls unless the job spec has this feature explicitly enabled. On chains that require this, nops may see an increase in RPC calls. This can be enabled for OCR2 jobs by specifying `ConfigContractAddress` in the relay config TOML. +- Added new configuration field named `LeaseDuration` for `EVM.NodePool` that will periodically check if internal subscriptions are connected to the "best" (as defined by the `SelectionMode`) node and switch to it if necessary. Setting this value to `0s` will disable this feature. ### Removed diff --git a/docs/CONFIG.md b/docs/CONFIG.md index ceb3d3dfe08..0105a8c0e80 100644 --- a/docs/CONFIG.md +++ b/docs/CONFIG.md @@ -1490,6 +1490,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [OCR] ContractConfirmations = 4 @@ -1568,6 +1569,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [OCR] ContractConfirmations = 4 @@ -1646,6 +1648,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [OCR] ContractConfirmations = 4 @@ -1724,6 +1727,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [OCR] ContractConfirmations = 4 @@ -1803,6 +1807,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 10 +LeaseDuration = '0s' [OCR] ContractConfirmations = 1 @@ -1881,6 +1886,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [OCR] ContractConfirmations = 4 @@ -1959,6 +1965,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [OCR] ContractConfirmations = 4 @@ -2038,6 +2045,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [OCR] ContractConfirmations = 4 @@ -2116,6 +2124,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 10 +LeaseDuration = '0s' [OCR] ContractConfirmations = 4 @@ -2193,6 +2202,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [OCR] ContractConfirmations = 4 @@ -2270,6 +2280,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [OCR] ContractConfirmations = 4 @@ -2348,6 +2359,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 10 +LeaseDuration = '0s' [OCR] ContractConfirmations = 4 @@ -2427,6 +2439,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [OCR] ContractConfirmations = 4 @@ -2505,6 +2518,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 10 +LeaseDuration = '0s' [OCR] ContractConfirmations = 4 @@ -2583,6 +2597,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 10 +LeaseDuration = '0s' [OCR] ContractConfirmations = 4 @@ -2661,6 +2676,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [OCR] ContractConfirmations = 4 @@ -2740,6 +2756,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 10 +LeaseDuration = '0s' [OCR] ContractConfirmations = 1 @@ -2818,6 +2835,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 10 +LeaseDuration = '0s' [OCR] ContractConfirmations = 1 @@ -2895,6 +2913,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [OCR] ContractConfirmations = 1 @@ -2973,6 +2992,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 10 +LeaseDuration = '0s' [OCR] ContractConfirmations = 1 @@ -3050,6 +3070,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [OCR] ContractConfirmations = 1 @@ -3128,6 +3149,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [OCR] ContractConfirmations = 4 @@ -3205,6 +3227,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [OCR] ContractConfirmations = 1 @@ -3283,6 +3306,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 10 +LeaseDuration = '0s' [OCR] ContractConfirmations = 1 @@ -3362,6 +3386,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 10 +LeaseDuration = '0s' [OCR] ContractConfirmations = 1 @@ -3440,6 +3465,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [OCR] ContractConfirmations = 1 @@ -3518,6 +3544,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [OCR] ContractConfirmations = 1 @@ -3596,6 +3623,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [OCR] ContractConfirmations = 1 @@ -3674,6 +3702,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [OCR] ContractConfirmations = 1 @@ -3752,6 +3781,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 10 +LeaseDuration = '0s' [OCR] ContractConfirmations = 4 @@ -3830,6 +3860,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 10 +LeaseDuration = '0s' [OCR] ContractConfirmations = 1 @@ -3909,6 +3940,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 10 +LeaseDuration = '0s' [OCR] ContractConfirmations = 1 @@ -3988,6 +4020,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 10 +LeaseDuration = '0s' [OCR] ContractConfirmations = 1 @@ -4065,6 +4098,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [OCR] ContractConfirmations = 1 @@ -4142,6 +4176,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [OCR] ContractConfirmations = 1 @@ -4220,6 +4255,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [OCR] ContractConfirmations = 4 @@ -4298,6 +4334,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [OCR] ContractConfirmations = 4 @@ -4376,6 +4413,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [OCR] ContractConfirmations = 4 @@ -4996,6 +5034,7 @@ PollFailureThreshold = 5 # Default PollInterval = '10s' # Default SelectionMode = 'HighestHead' # Default SyncThreshold = 5 # Default +LeaseDuration = '0s' # Default ``` The node pool manages multiple RPC endpoints. @@ -5024,6 +5063,7 @@ SelectionMode = 'HighestHead' # Default SelectionMode controls node selection strategy: - HighestHead: use the node with the highest head number - RoundRobin: rotate through nodes, per-request +- PriorityLevel: use the node with the smallest order number - TotalDifficulty: use the node with the greatest total difficulty ### SyncThreshold @@ -5035,6 +5075,17 @@ Depending on `SelectionMode`, this represents a difference in the number of bloc Set to 0 to disable this check. +### LeaseDuration +```toml +LeaseDuration = '0s' # Default +``` +LeaseDuration is the minimum duration that the selected "best" node (as defined by SelectionMode) will be used, +before switching to a better one if available. It also controls how often the lease check is done. +Setting this to a low value (under 1m) might cause RPC to switch too aggressively. +Recommended value is over 5m + +Set to '0s' to disable + ## EVM.OCR ```toml [EVM.OCR] diff --git a/testdata/scripts/node/validate/disk-based-logging-disabled.txtar b/testdata/scripts/node/validate/disk-based-logging-disabled.txtar index 5f02793ff57..beb0f341c48 100644 --- a/testdata/scripts/node/validate/disk-based-logging-disabled.txtar +++ b/testdata/scripts/node/validate/disk-based-logging-disabled.txtar @@ -327,6 +327,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [EVM.OCR] ContractConfirmations = 4 diff --git a/testdata/scripts/node/validate/disk-based-logging-no-dir.txtar b/testdata/scripts/node/validate/disk-based-logging-no-dir.txtar index 527a739f7ca..32007bb53b8 100644 --- a/testdata/scripts/node/validate/disk-based-logging-no-dir.txtar +++ b/testdata/scripts/node/validate/disk-based-logging-no-dir.txtar @@ -327,6 +327,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [EVM.OCR] ContractConfirmations = 4 diff --git a/testdata/scripts/node/validate/disk-based-logging.txtar b/testdata/scripts/node/validate/disk-based-logging.txtar index 791a8aad076..4750e37ee3c 100644 --- a/testdata/scripts/node/validate/disk-based-logging.txtar +++ b/testdata/scripts/node/validate/disk-based-logging.txtar @@ -327,6 +327,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [EVM.OCR] ContractConfirmations = 4 diff --git a/testdata/scripts/node/validate/invalid.txtar b/testdata/scripts/node/validate/invalid.txtar index e9db92fb8f7..b523b1a2cf1 100644 --- a/testdata/scripts/node/validate/invalid.txtar +++ b/testdata/scripts/node/validate/invalid.txtar @@ -317,6 +317,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [EVM.OCR] ContractConfirmations = 4 diff --git a/testdata/scripts/node/validate/valid.txtar b/testdata/scripts/node/validate/valid.txtar index f48fa1926d8..69a46dfb7a0 100644 --- a/testdata/scripts/node/validate/valid.txtar +++ b/testdata/scripts/node/validate/valid.txtar @@ -324,6 +324,7 @@ PollFailureThreshold = 5 PollInterval = '10s' SelectionMode = 'HighestHead' SyncThreshold = 5 +LeaseDuration = '0s' [EVM.OCR] ContractConfirmations = 4