From 83b18422e8d6a2d9889eebf0bac41b0a405b6c81 Mon Sep 17 00:00:00 2001 From: Gustavo Gama Date: Tue, 17 Dec 2024 22:56:16 -0300 Subject: [PATCH] feat(job-distributor): periodically sync node info with job distributors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There’s a behavior that we’ve observed for some time on the NOP side where they will add/update a chain configuration of the Job Distributor panel but the change is not reflected on the service itself. This leads to inefficiencies as NOPs are unaware of this and thus need to be notified so that they may "reapply" the configuration. After some investigation, we suspect that this is due to connectivity issues between the nodes and the job distributor instance, which causes the message with the update to be lost. As a fix, Brendon suggested periodically resending the latest chain updates to all connected job distributors. This PR implements this solution by creating a goroutine upon job distributor service start, which sets up a `time.Tick` that calls the `SyncNodeInfo` method according to a parameterized interval. The default interval is set to 1 hour, as suggested by the operations team. Ticket Number: DPA-1371 --- .changeset/neat-penguins-report.md | 5 +++ core/config/app_config.go | 1 + core/config/docs/core.toml | 2 + core/config/toml/types.go | 18 +++++--- core/services/chainlink/config_general.go | 4 ++ core/services/chainlink/config_test.go | 8 ++-- .../chainlink/mocks/general_config.go | 45 +++++++++++++++++++ .../testdata/config-empty-effective.toml | 1 + .../chainlink/testdata/config-full.toml | 1 + .../config-multi-chain-effective.toml | 1 + core/services/feeds/config.go | 1 + core/services/feeds/service.go | 29 ++++++++++++ .../testdata/config-empty-effective.toml | 1 + core/web/resolver/testdata/config-full.toml | 1 + .../config-multi-chain-effective.toml | 1 + docs/CONFIG.md | 7 +++ .../scripts/config/merge_raw_configs.txtar | 1 + testdata/scripts/node/validate/default.txtar | 1 + .../node/validate/defaults-override.txtar | 1 + .../disk-based-logging-disabled.txtar | 1 + .../validate/disk-based-logging-no-dir.txtar | 1 + .../node/validate/disk-based-logging.txtar | 1 + .../node/validate/invalid-ocr-p2p.txtar | 1 + testdata/scripts/node/validate/invalid.txtar | 1 + testdata/scripts/node/validate/valid.txtar | 1 + testdata/scripts/node/validate/warnings.txtar | 1 + 26 files changed, 127 insertions(+), 9 deletions(-) create mode 100644 .changeset/neat-penguins-report.md diff --git a/.changeset/neat-penguins-report.md b/.changeset/neat-penguins-report.md new file mode 100644 index 00000000000..45f3cb28ad6 --- /dev/null +++ b/.changeset/neat-penguins-report.md @@ -0,0 +1,5 @@ +--- +"chainlink": patch +--- + +#added periodically sync node info with job distributors diff --git a/core/config/app_config.go b/core/config/app_config.go index 3f2a5472b24..0a14e898723 100644 --- a/core/config/app_config.go +++ b/core/config/app_config.go @@ -18,6 +18,7 @@ type AppConfig interface { AppID() uuid.UUID RootDir() string ShutdownGracePeriod() time.Duration + FeedsManagerSyncInterval() time.Duration InsecureFastScrypt() bool EVMEnabled() bool EVMRPCEnabled() bool diff --git a/core/config/docs/core.toml b/core/config/docs/core.toml index 20c519e81a1..dfee55ebac7 100644 --- a/core/config/docs/core.toml +++ b/core/config/docs/core.toml @@ -5,6 +5,8 @@ InsecureFastScrypt = false # Default RootDir = '~/.chainlink' # Default # ShutdownGracePeriod is the maximum time allowed to shut down gracefully. If exceeded, the node will terminate immediately to avoid being SIGKILLed. ShutdownGracePeriod = '5s' # Default +# FeedsManagerSyncInterval is the interval between calls to the feeds manager instance to synchronize the chain config +FeedsManagerSyncInterval = '1h' # Default [Feature] # FeedsManager enables the feeds manager service. diff --git a/core/config/toml/types.go b/core/config/toml/types.go index 620f7d96eee..f1d82f7cff6 100644 --- a/core/config/toml/types.go +++ b/core/config/toml/types.go @@ -34,10 +34,11 @@ var ErrUnsupported = errors.New("unsupported with config v2") // Core holds the core configuration. See chainlink.Config for more information. type Core struct { // General/misc - AppID uuid.UUID `toml:"-"` // random or test - InsecureFastScrypt *bool - RootDir *string - ShutdownGracePeriod *commonconfig.Duration + AppID uuid.UUID `toml:"-"` // random or test + InsecureFastScrypt *bool + RootDir *string + ShutdownGracePeriod *commonconfig.Duration + FeedsManagerSyncInterval *commonconfig.Duration Feature Feature `toml:",omitempty"` Database Database `toml:",omitempty"` @@ -72,6 +73,9 @@ func (c *Core) SetFrom(f *Core) { if v := f.ShutdownGracePeriod; v != nil { c.ShutdownGracePeriod = v } + if v := f.FeedsManagerSyncInterval; v != nil { + c.FeedsManagerSyncInterval = v + } c.Feature.setFrom(&f.Feature) c.Database.setFrom(&f.Database) @@ -410,8 +414,10 @@ func (l *DatabaseLock) Mode() string { func (l *DatabaseLock) ValidateConfig() (err error) { if l.LeaseRefreshInterval.Duration() > l.LeaseDuration.Duration()/2 { - err = multierr.Append(err, configutils.ErrInvalid{Name: "LeaseRefreshInterval", Value: l.LeaseRefreshInterval.String(), - Msg: fmt.Sprintf("must be less than or equal to half of LeaseDuration (%s)", l.LeaseDuration.String())}) + err = multierr.Append(err, configutils.ErrInvalid{ + Name: "LeaseRefreshInterval", Value: l.LeaseRefreshInterval.String(), + Msg: fmt.Sprintf("must be less than or equal to half of LeaseDuration (%s)", l.LeaseDuration.String()), + }) } return } diff --git a/core/services/chainlink/config_general.go b/core/services/chainlink/config_general.go index dd0dc87b59a..4d3670154f8 100644 --- a/core/services/chainlink/config_general.go +++ b/core/services/chainlink/config_general.go @@ -422,6 +422,10 @@ func (g *generalConfig) ShutdownGracePeriod() time.Duration { return g.c.ShutdownGracePeriod.Duration() } +func (g *generalConfig) FeedsManagerSyncInterval() time.Duration { + return g.c.FeedsManagerSyncInterval.Duration() +} + func (g *generalConfig) FluxMonitor() config.FluxMonitor { return &fluxMonitorConfig{c: g.c.FluxMonitor} } diff --git a/core/services/chainlink/config_test.go b/core/services/chainlink/config_test.go index 769005feb72..941c7479252 100644 --- a/core/services/chainlink/config_test.go +++ b/core/services/chainlink/config_test.go @@ -262,9 +262,10 @@ func TestConfig_Marshal(t *testing.T) { global := Config{ Core: toml.Core{ - InsecureFastScrypt: ptr(true), - RootDir: ptr("test/root/dir"), - ShutdownGracePeriod: commoncfg.MustNewDuration(10 * time.Second), + InsecureFastScrypt: ptr(true), + RootDir: ptr("test/root/dir"), + ShutdownGracePeriod: commoncfg.MustNewDuration(10 * time.Second), + FeedsManagerSyncInterval: commoncfg.MustNewDuration(15 * time.Minute), Insecure: toml.Insecure{ DevWebServer: ptr(false), OCRDevelopmentMode: ptr(false), @@ -857,6 +858,7 @@ func TestConfig_Marshal(t *testing.T) { {"global", global, `InsecureFastScrypt = true RootDir = 'test/root/dir' ShutdownGracePeriod = '10s' +FeedsManagerSyncInterval = '15m0s' [Insecure] DevWebServer = false diff --git a/core/services/chainlink/mocks/general_config.go b/core/services/chainlink/mocks/general_config.go index db1efb3d86f..dce955a48f9 100644 --- a/core/services/chainlink/mocks/general_config.go +++ b/core/services/chainlink/mocks/general_config.go @@ -694,6 +694,51 @@ func (_c *GeneralConfig_Feature_Call) RunAndReturn(run func() config.Feature) *G return _c } +// FeedsManagerSyncInterval provides a mock function with given fields: +func (_m *GeneralConfig) FeedsManagerSyncInterval() time.Duration { + ret := _m.Called() + + if len(ret) == 0 { + panic("no return value specified for FeedsManagerSyncInterval") + } + + var r0 time.Duration + if rf, ok := ret.Get(0).(func() time.Duration); ok { + r0 = rf() + } else { + r0 = ret.Get(0).(time.Duration) + } + + return r0 +} + +// GeneralConfig_FeedsManagerSyncInterval_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'FeedsManagerSyncInterval' +type GeneralConfig_FeedsManagerSyncInterval_Call struct { + *mock.Call +} + +// FeedsManagerSyncInterval is a helper method to define mock.On call +func (_e *GeneralConfig_Expecter) FeedsManagerSyncInterval() *GeneralConfig_FeedsManagerSyncInterval_Call { + return &GeneralConfig_FeedsManagerSyncInterval_Call{Call: _e.mock.On("FeedsManagerSyncInterval")} +} + +func (_c *GeneralConfig_FeedsManagerSyncInterval_Call) Run(run func()) *GeneralConfig_FeedsManagerSyncInterval_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *GeneralConfig_FeedsManagerSyncInterval_Call) Return(_a0 time.Duration) *GeneralConfig_FeedsManagerSyncInterval_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *GeneralConfig_FeedsManagerSyncInterval_Call) RunAndReturn(run func() time.Duration) *GeneralConfig_FeedsManagerSyncInterval_Call { + _c.Call.Return(run) + return _c +} + // FluxMonitor provides a mock function with given fields: func (_m *GeneralConfig) FluxMonitor() config.FluxMonitor { ret := _m.Called() diff --git a/core/services/chainlink/testdata/config-empty-effective.toml b/core/services/chainlink/testdata/config-empty-effective.toml index a2052c04a8e..4f2ea1ad86a 100644 --- a/core/services/chainlink/testdata/config-empty-effective.toml +++ b/core/services/chainlink/testdata/config-empty-effective.toml @@ -1,6 +1,7 @@ InsecureFastScrypt = false RootDir = '~/.chainlink' ShutdownGracePeriod = '5s' +FeedsManagerSyncInterval = '1h0m0s' [Feature] FeedsManager = true diff --git a/core/services/chainlink/testdata/config-full.toml b/core/services/chainlink/testdata/config-full.toml index 47193f80184..41df441e24d 100644 --- a/core/services/chainlink/testdata/config-full.toml +++ b/core/services/chainlink/testdata/config-full.toml @@ -1,6 +1,7 @@ InsecureFastScrypt = true RootDir = 'test/root/dir' ShutdownGracePeriod = '10s' +FeedsManagerSyncInterval = '15m0s' [Feature] FeedsManager = true diff --git a/core/services/chainlink/testdata/config-multi-chain-effective.toml b/core/services/chainlink/testdata/config-multi-chain-effective.toml index 7e658b170db..a14d8d48c23 100644 --- a/core/services/chainlink/testdata/config-multi-chain-effective.toml +++ b/core/services/chainlink/testdata/config-multi-chain-effective.toml @@ -1,6 +1,7 @@ InsecureFastScrypt = false RootDir = 'my/root/dir' ShutdownGracePeriod = '5s' +FeedsManagerSyncInterval = '1h0m0s' [Feature] FeedsManager = true diff --git a/core/services/feeds/config.go b/core/services/feeds/config.go index 626dc862e94..a874f6f5658 100644 --- a/core/services/feeds/config.go +++ b/core/services/feeds/config.go @@ -10,6 +10,7 @@ import ( type GeneralConfig interface { OCR() coreconfig.OCR Insecure() coreconfig.Insecure + FeedsManagerSyncInterval() time.Duration } type FeatureConfig interface { diff --git a/core/services/feeds/service.go b/core/services/feeds/service.go index 04ab747c9ab..b638bb30f61 100644 --- a/core/services/feeds/service.go +++ b/core/services/feeds/service.go @@ -5,6 +5,7 @@ import ( "database/sql" "encoding/hex" "fmt" + "time" "github.com/ethereum/go-ethereum/common" "github.com/google/uuid" @@ -1135,6 +1136,8 @@ func (s *service) Start(ctx context.Context) error { s.lggr.Error("failed to observe job proposal count when starting service", err) } + go s.periodicallySyncNodeInfo(ctx) + return nil }) } @@ -1550,6 +1553,32 @@ func (s *service) isRevokable(propStatus JobProposalStatus, specStatus SpecStatu return propStatus != JobProposalStatusDeleted && (specStatus == SpecStatusPending || specStatus == SpecStatusCancelled) } +func (s *service) periodicallySyncNodeInfo(ctx context.Context) { + s.lggr.Info("starting periodic sync node info goroutine") + + timer := time.NewTicker(s.gCfg.FeedsManagerSyncInterval()) + for { + select { + case <-timer.C: + managers, err := s.ListManagers(ctx) + if err != nil { + s.lggr.Errorw("failed to list managers", "err", err) + } + + for _, manager := range managers { + s.lggr.Infow("synchronizing node info", "managerID", manager.ID) + err := s.SyncNodeInfo(ctx, manager.ID) + if err != nil { + s.lggr.Errorw("failed to sync node info", "id", manager.ID, "err", err) + } + } + case <-ctx.Done(): + s.lggr.Debugw("context done; exiting periodic sync node info goroutine") + return + } + } +} + var _ Service = &NullService{} // NullService defines an implementation of the Feeds Service that is used diff --git a/core/web/resolver/testdata/config-empty-effective.toml b/core/web/resolver/testdata/config-empty-effective.toml index a2052c04a8e..4f2ea1ad86a 100644 --- a/core/web/resolver/testdata/config-empty-effective.toml +++ b/core/web/resolver/testdata/config-empty-effective.toml @@ -1,6 +1,7 @@ InsecureFastScrypt = false RootDir = '~/.chainlink' ShutdownGracePeriod = '5s' +FeedsManagerSyncInterval = '1h0m0s' [Feature] FeedsManager = true diff --git a/core/web/resolver/testdata/config-full.toml b/core/web/resolver/testdata/config-full.toml index ef26bfea75a..5721ed4a753 100644 --- a/core/web/resolver/testdata/config-full.toml +++ b/core/web/resolver/testdata/config-full.toml @@ -1,6 +1,7 @@ InsecureFastScrypt = true RootDir = 'test/root/dir' ShutdownGracePeriod = '10s' +FeedsManagerSyncInterval = '15m0s' [Feature] FeedsManager = true diff --git a/core/web/resolver/testdata/config-multi-chain-effective.toml b/core/web/resolver/testdata/config-multi-chain-effective.toml index 7bdf50b9080..3f5b0ce98c9 100644 --- a/core/web/resolver/testdata/config-multi-chain-effective.toml +++ b/core/web/resolver/testdata/config-multi-chain-effective.toml @@ -1,6 +1,7 @@ InsecureFastScrypt = false RootDir = 'my/root/dir' ShutdownGracePeriod = '5s' +FeedsManagerSyncInterval = '1h0m0s' [Feature] FeedsManager = true diff --git a/docs/CONFIG.md b/docs/CONFIG.md index 946703695eb..b45b6d2f3e5 100644 --- a/docs/CONFIG.md +++ b/docs/CONFIG.md @@ -23,6 +23,7 @@ HTTPURL = 'https://foo.bar' # Required InsecureFastScrypt = false # Default RootDir = '~/.chainlink' # Default ShutdownGracePeriod = '5s' # Default +FeedsManagerSyncInterval = '1h' # Default ``` @@ -45,6 +46,12 @@ ShutdownGracePeriod = '5s' # Default ``` ShutdownGracePeriod is the maximum time allowed to shut down gracefully. If exceeded, the node will terminate immediately to avoid being SIGKILLed. +### FeedsManagerSyncInterval +```toml +FeedsManagerSyncInterval = '1h' # Default +``` +FeedsManagerSyncInterval is the interval between calls to the feeds manager instance to synchronize the chain config + ## Feature ```toml [Feature] diff --git a/testdata/scripts/config/merge_raw_configs.txtar b/testdata/scripts/config/merge_raw_configs.txtar index efac49f8ef8..ab6c1e5c7cc 100644 --- a/testdata/scripts/config/merge_raw_configs.txtar +++ b/testdata/scripts/config/merge_raw_configs.txtar @@ -148,6 +148,7 @@ Publickey = 'abcdef' InsecureFastScrypt = false RootDir = '~/.chainlink' ShutdownGracePeriod = '5s' +FeedsManagerSyncInterval = '1h0m0s' [Feature] FeedsManager = true diff --git a/testdata/scripts/node/validate/default.txtar b/testdata/scripts/node/validate/default.txtar index d4e4a188d2a..9240613aed3 100644 --- a/testdata/scripts/node/validate/default.txtar +++ b/testdata/scripts/node/validate/default.txtar @@ -13,6 +13,7 @@ AllowSimplePasswords = false InsecureFastScrypt = false RootDir = '~/.chainlink' ShutdownGracePeriod = '5s' +FeedsManagerSyncInterval = '1h0m0s' [Feature] FeedsManager = true diff --git a/testdata/scripts/node/validate/defaults-override.txtar b/testdata/scripts/node/validate/defaults-override.txtar index 336f170bd1b..6128074003d 100644 --- a/testdata/scripts/node/validate/defaults-override.txtar +++ b/testdata/scripts/node/validate/defaults-override.txtar @@ -74,6 +74,7 @@ HTTPURL = 'https://foo.bar' InsecureFastScrypt = false RootDir = '~/.chainlink' ShutdownGracePeriod = '5s' +FeedsManagerSyncInterval = '1h0m0s' [Feature] FeedsManager = true diff --git a/testdata/scripts/node/validate/disk-based-logging-disabled.txtar b/testdata/scripts/node/validate/disk-based-logging-disabled.txtar index 677058e1c08..74160737770 100644 --- a/testdata/scripts/node/validate/disk-based-logging-disabled.txtar +++ b/testdata/scripts/node/validate/disk-based-logging-disabled.txtar @@ -57,6 +57,7 @@ HTTPURL = 'https://foo.bar' InsecureFastScrypt = false RootDir = '~/.chainlink' ShutdownGracePeriod = '5s' +FeedsManagerSyncInterval = '1h0m0s' [Feature] FeedsManager = true diff --git a/testdata/scripts/node/validate/disk-based-logging-no-dir.txtar b/testdata/scripts/node/validate/disk-based-logging-no-dir.txtar index 0e5a78f4a39..3d6f5d90186 100644 --- a/testdata/scripts/node/validate/disk-based-logging-no-dir.txtar +++ b/testdata/scripts/node/validate/disk-based-logging-no-dir.txtar @@ -57,6 +57,7 @@ HTTPURL = 'https://foo.bar' InsecureFastScrypt = false RootDir = '~/.chainlink' ShutdownGracePeriod = '5s' +FeedsManagerSyncInterval = '1h0m0s' [Feature] FeedsManager = true diff --git a/testdata/scripts/node/validate/disk-based-logging.txtar b/testdata/scripts/node/validate/disk-based-logging.txtar index 7fc05533a47..a54dd9ae2f0 100644 --- a/testdata/scripts/node/validate/disk-based-logging.txtar +++ b/testdata/scripts/node/validate/disk-based-logging.txtar @@ -57,6 +57,7 @@ HTTPURL = 'https://foo.bar' InsecureFastScrypt = false RootDir = '~/.chainlink' ShutdownGracePeriod = '5s' +FeedsManagerSyncInterval = '1h0m0s' [Feature] FeedsManager = true diff --git a/testdata/scripts/node/validate/invalid-ocr-p2p.txtar b/testdata/scripts/node/validate/invalid-ocr-p2p.txtar index 2cc7b7afe0e..d1e206c1fc2 100644 --- a/testdata/scripts/node/validate/invalid-ocr-p2p.txtar +++ b/testdata/scripts/node/validate/invalid-ocr-p2p.txtar @@ -42,6 +42,7 @@ Enabled = false InsecureFastScrypt = false RootDir = '~/.chainlink' ShutdownGracePeriod = '5s' +FeedsManagerSyncInterval = '1h0m0s' [Feature] FeedsManager = true diff --git a/testdata/scripts/node/validate/invalid.txtar b/testdata/scripts/node/validate/invalid.txtar index b048af38a3b..9195eb2924c 100644 --- a/testdata/scripts/node/validate/invalid.txtar +++ b/testdata/scripts/node/validate/invalid.txtar @@ -47,6 +47,7 @@ HTTPURL = 'https://foo.bar' InsecureFastScrypt = false RootDir = '~/.chainlink' ShutdownGracePeriod = '5s' +FeedsManagerSyncInterval = '1h0m0s' [Feature] FeedsManager = true diff --git a/testdata/scripts/node/validate/valid.txtar b/testdata/scripts/node/validate/valid.txtar index bc84a9b2a37..7d2fc5827a3 100644 --- a/testdata/scripts/node/validate/valid.txtar +++ b/testdata/scripts/node/validate/valid.txtar @@ -54,6 +54,7 @@ HTTPURL = 'https://foo.bar' InsecureFastScrypt = false RootDir = '~/.chainlink' ShutdownGracePeriod = '5s' +FeedsManagerSyncInterval = '1h0m0s' [Feature] FeedsManager = true diff --git a/testdata/scripts/node/validate/warnings.txtar b/testdata/scripts/node/validate/warnings.txtar index 85b7bc6a253..34a702a19d0 100644 --- a/testdata/scripts/node/validate/warnings.txtar +++ b/testdata/scripts/node/validate/warnings.txtar @@ -36,6 +36,7 @@ TLSCertPath = 'something' InsecureFastScrypt = false RootDir = '~/.chainlink' ShutdownGracePeriod = '5s' +FeedsManagerSyncInterval = '1h0m0s' [Feature] FeedsManager = true