Skip to content

Commit

Permalink
feat(job-distributor): periodically sync node info with job distributors
Browse files Browse the repository at this point in the history
There’s a behavior that we’ve observed for some time on the NOP side
where they will add/update a chain configuration of the Job
Distributor panel but the change is not reflected on the service itself.
This leads to inefficiencies as NOPs are unaware of this and thus need
to be notified so that they may "reapply" the configuration.

After some investigation, we suspect that this is due to connectivity
issues between the nodes and the job distributor instance, which causes
the message with the update to be lost. As a fix, Brendon suggested
periodically resending the latest chain updates to all connected job
distributors.

This PR implements this solution by creating a goroutine upon job
distributor service start, which sets up a `time.Tick` that calls the
`SyncNodeInfo` method according to a parameterized interval. The default
interval is set to 1 hour, as suggested by the operations team.

Ticket Number: DPA-1371
  • Loading branch information
gustavogama-cll committed Dec 18, 2024
1 parent a744ed6 commit 301972c
Show file tree
Hide file tree
Showing 22 changed files with 117 additions and 9 deletions.
5 changes: 5 additions & 0 deletions .changeset/neat-penguins-report.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"chainlink": patch
---

#added periodically sync node info with job distributors
1 change: 1 addition & 0 deletions core/config/app_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ type AppConfig interface {
AppID() uuid.UUID
RootDir() string
ShutdownGracePeriod() time.Duration
FeedsManagerSyncInterval() time.Duration
InsecureFastScrypt() bool
EVMEnabled() bool
EVMRPCEnabled() bool
Expand Down
2 changes: 2 additions & 0 deletions core/config/docs/core.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ InsecureFastScrypt = false # Default
RootDir = '~/.chainlink' # Default
# ShutdownGracePeriod is the maximum time allowed to shut down gracefully. If exceeded, the node will terminate immediately to avoid being SIGKILLed.
ShutdownGracePeriod = '5s' # Default
# FeedsManagerSyncInterval is the interval between calls to the feeds manager instance to synchronize the chain config
FeedsManagerSyncInterval = '1h' # Default

[Feature]
# FeedsManager enables the feeds manager service.
Expand Down
18 changes: 12 additions & 6 deletions core/config/toml/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,11 @@ var ErrUnsupported = errors.New("unsupported with config v2")
// Core holds the core configuration. See chainlink.Config for more information.
type Core struct {
// General/misc
AppID uuid.UUID `toml:"-"` // random or test
InsecureFastScrypt *bool
RootDir *string
ShutdownGracePeriod *commonconfig.Duration
AppID uuid.UUID `toml:"-"` // random or test
InsecureFastScrypt *bool
RootDir *string
ShutdownGracePeriod *commonconfig.Duration
FeedsManagerSyncInterval *commonconfig.Duration

Feature Feature `toml:",omitempty"`
Database Database `toml:",omitempty"`
Expand Down Expand Up @@ -72,6 +73,9 @@ func (c *Core) SetFrom(f *Core) {
if v := f.ShutdownGracePeriod; v != nil {
c.ShutdownGracePeriod = v
}
if v := f.FeedsManagerSyncInterval; v != nil {
c.FeedsManagerSyncInterval = v
}

c.Feature.setFrom(&f.Feature)
c.Database.setFrom(&f.Database)
Expand Down Expand Up @@ -410,8 +414,10 @@ func (l *DatabaseLock) Mode() string {

func (l *DatabaseLock) ValidateConfig() (err error) {
if l.LeaseRefreshInterval.Duration() > l.LeaseDuration.Duration()/2 {
err = multierr.Append(err, configutils.ErrInvalid{Name: "LeaseRefreshInterval", Value: l.LeaseRefreshInterval.String(),
Msg: fmt.Sprintf("must be less than or equal to half of LeaseDuration (%s)", l.LeaseDuration.String())})
err = multierr.Append(err, configutils.ErrInvalid{
Name: "LeaseRefreshInterval", Value: l.LeaseRefreshInterval.String(),
Msg: fmt.Sprintf("must be less than or equal to half of LeaseDuration (%s)", l.LeaseDuration.String()),
})
}
return
}
Expand Down
4 changes: 4 additions & 0 deletions core/services/chainlink/config_general.go
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,10 @@ func (g *generalConfig) ShutdownGracePeriod() time.Duration {
return g.c.ShutdownGracePeriod.Duration()
}

func (g *generalConfig) FeedsManagerSyncInterval() time.Duration {
return g.c.FeedsManagerSyncInterval.Duration()
}

func (g *generalConfig) FluxMonitor() config.FluxMonitor {
return &fluxMonitorConfig{c: g.c.FluxMonitor}
}
Expand Down
8 changes: 5 additions & 3 deletions core/services/chainlink/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,10 @@ func TestConfig_Marshal(t *testing.T) {

global := Config{
Core: toml.Core{
InsecureFastScrypt: ptr(true),
RootDir: ptr("test/root/dir"),
ShutdownGracePeriod: commoncfg.MustNewDuration(10 * time.Second),
InsecureFastScrypt: ptr(true),
RootDir: ptr("test/root/dir"),
ShutdownGracePeriod: commoncfg.MustNewDuration(10 * time.Second),
FeedsManagerSyncInterval: commoncfg.MustNewDuration(15 * time.Minute),
Insecure: toml.Insecure{
DevWebServer: ptr(false),
OCRDevelopmentMode: ptr(false),
Expand Down Expand Up @@ -857,6 +858,7 @@ func TestConfig_Marshal(t *testing.T) {
{"global", global, `InsecureFastScrypt = true
RootDir = 'test/root/dir'
ShutdownGracePeriod = '10s'
FeedsManagerSyncInterval = '15m0s'
[Insecure]
DevWebServer = false
Expand Down
45 changes: 45 additions & 0 deletions core/services/chainlink/mocks/general_config.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
InsecureFastScrypt = false
RootDir = '~/.chainlink'
ShutdownGracePeriod = '5s'
FeedsManagerSyncInterval = '1h0m0s'

[Feature]
FeedsManager = true
Expand Down
1 change: 1 addition & 0 deletions core/services/chainlink/testdata/config-full.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
InsecureFastScrypt = true
RootDir = 'test/root/dir'
ShutdownGracePeriod = '10s'
FeedsManagerSyncInterval = '15m0s'

[Feature]
FeedsManager = true
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
InsecureFastScrypt = false
RootDir = 'my/root/dir'
ShutdownGracePeriod = '5s'
FeedsManagerSyncInterval = '1h0m0s'

[Feature]
FeedsManager = true
Expand Down
1 change: 1 addition & 0 deletions core/services/feeds/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
type GeneralConfig interface {
OCR() coreconfig.OCR
Insecure() coreconfig.Insecure
FeedsManagerSyncInterval() time.Duration
}

type FeatureConfig interface {
Expand Down
29 changes: 29 additions & 0 deletions core/services/feeds/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"database/sql"
"encoding/hex"
"fmt"
"time"

"github.com/ethereum/go-ethereum/common"
"github.com/google/uuid"
Expand Down Expand Up @@ -1135,6 +1136,8 @@ func (s *service) Start(ctx context.Context) error {
s.lggr.Error("failed to observe job proposal count when starting service", err)
}

go s.periodicallySyncNodeInfo(ctx)

return nil
})
}
Expand Down Expand Up @@ -1550,6 +1553,32 @@ func (s *service) isRevokable(propStatus JobProposalStatus, specStatus SpecStatu
return propStatus != JobProposalStatusDeleted && (specStatus == SpecStatusPending || specStatus == SpecStatusCancelled)
}

func (s *service) periodicallySyncNodeInfo(ctx context.Context) {
s.lggr.Info("starting periodic sync node info goroutine")

timer := time.NewTicker(s.gCfg.FeedsManagerSyncInterval())
for {
select {
case <-timer.C:
managers, err := s.ListManagers(ctx)
if err != nil {
s.lggr.Errorw("failed to list managers", "err", err)
}

for _, manager := range managers {
s.lggr.Infow("synchronizing node info", "managerID", manager.ID)
err := s.SyncNodeInfo(ctx, manager.ID)
if err != nil {
s.lggr.Errorw("failed to sync node info", "id", manager.ID, "err", err)
}
}
case <-ctx.Done():
s.lggr.Debugw("context done; exiting periodic sync node info goroutine")
return
}
}
}

var _ Service = &NullService{}

// NullService defines an implementation of the Feeds Service that is used
Expand Down
1 change: 1 addition & 0 deletions testdata/scripts/config/merge_raw_configs.txtar
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ Publickey = 'abcdef'
InsecureFastScrypt = false
RootDir = '~/.chainlink'
ShutdownGracePeriod = '5s'
FeedsManagerSyncInterval = '1h0m0s'

[Feature]
FeedsManager = true
Expand Down
1 change: 1 addition & 0 deletions testdata/scripts/node/validate/default.txtar
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ AllowSimplePasswords = false
InsecureFastScrypt = false
RootDir = '~/.chainlink'
ShutdownGracePeriod = '5s'
FeedsManagerSyncInterval = '1h0m0s'

[Feature]
FeedsManager = true
Expand Down
1 change: 1 addition & 0 deletions testdata/scripts/node/validate/defaults-override.txtar
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ HTTPURL = 'https://foo.bar'
InsecureFastScrypt = false
RootDir = '~/.chainlink'
ShutdownGracePeriod = '5s'
FeedsManagerSyncInterval = '1h0m0s'

[Feature]
FeedsManager = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ HTTPURL = 'https://foo.bar'
InsecureFastScrypt = false
RootDir = '~/.chainlink'
ShutdownGracePeriod = '5s'
FeedsManagerSyncInterval = '1h0m0s'

[Feature]
FeedsManager = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ HTTPURL = 'https://foo.bar'
InsecureFastScrypt = false
RootDir = '~/.chainlink'
ShutdownGracePeriod = '5s'
FeedsManagerSyncInterval = '1h0m0s'

[Feature]
FeedsManager = true
Expand Down
1 change: 1 addition & 0 deletions testdata/scripts/node/validate/disk-based-logging.txtar
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ HTTPURL = 'https://foo.bar'
InsecureFastScrypt = false
RootDir = '~/.chainlink'
ShutdownGracePeriod = '5s'
FeedsManagerSyncInterval = '1h0m0s'

[Feature]
FeedsManager = true
Expand Down
1 change: 1 addition & 0 deletions testdata/scripts/node/validate/invalid-ocr-p2p.txtar
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ Enabled = false
InsecureFastScrypt = false
RootDir = '~/.chainlink'
ShutdownGracePeriod = '5s'
FeedsManagerSyncInterval = '1h0m0s'

[Feature]
FeedsManager = true
Expand Down
1 change: 1 addition & 0 deletions testdata/scripts/node/validate/invalid.txtar
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ HTTPURL = 'https://foo.bar'
InsecureFastScrypt = false
RootDir = '~/.chainlink'
ShutdownGracePeriod = '5s'
FeedsManagerSyncInterval = '1h0m0s'

[Feature]
FeedsManager = true
Expand Down
1 change: 1 addition & 0 deletions testdata/scripts/node/validate/valid.txtar
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ HTTPURL = 'https://foo.bar'
InsecureFastScrypt = false
RootDir = '~/.chainlink'
ShutdownGracePeriod = '5s'
FeedsManagerSyncInterval = '1h0m0s'

[Feature]
FeedsManager = true
Expand Down
1 change: 1 addition & 0 deletions testdata/scripts/node/validate/warnings.txtar
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ TLSCertPath = 'something'
InsecureFastScrypt = false
RootDir = '~/.chainlink'
ShutdownGracePeriod = '5s'
FeedsManagerSyncInterval = '1h0m0s'

[Feature]
FeedsManager = true
Expand Down

0 comments on commit 301972c

Please sign in to comment.