From 412426687c3e69075bfd4b88b9eef04a85e0f008 Mon Sep 17 00:00:00 2001 From: asoliman Date: Thu, 27 Jun 2024 23:57:22 +0400 Subject: [PATCH 1/4] Fix race condition Add .gitignore --- .gitignore | 95 ++++++++++++++++++++++++++++++ internal/reader/home_chain.go | 31 ++++++++-- internal/reader/home_chain_test.go | 67 +++++++++++---------- 3 files changed, 158 insertions(+), 35 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..103e91287 --- /dev/null +++ b/.gitignore @@ -0,0 +1,95 @@ +# Ignore DevSpace cache and log folder +.devspace/ + +/core/scripts/ccip/json/credentials +/core/scripts/ccip/revert-reason/bin/ccip-revert-reason + +# dependencies generated after running `go mod vendor` +vendor/ +go.work* + +# This sometimes shows up for some reason +tools/flakeytests/coverage.txt + +# Fuzz tests can create these files +**/testdata/fuzz/* + +# Runtime test configuration that might contain secrets +override*.toml + +# Pythin venv +.venv/ + +ocr_soak_report.csv +-- VISUAL LINE -- +tmp-manifest-*.yaml +ztarrepo.tar.gz +**/test-ledger/* +__debug_bin* +.test_summary/ +.run.id +integration-tests/**/traces/ +benchmark_report.csv +benchmark_summary.json + +# goreleaser builds +cosign.* +dist/ +MacOSX* + +cache +core/services/ocr2/plugins/ccip/transactions.rlp +lcov.info +!core/services/ocr2/plugins/ccip/internal/cache/ + + +core/scripts/ccip/json/credentials +core/scripts/ccip/json/deployments +core/scripts/ccip/csv/node-wallets + +# Test & linter reports +*report.xml +*report.json +*.out + +contracts/yarn.lock + +# Ignore DevSpace cache and log folder +.devspace/ + +/core/scripts/ccip/json/credentials +/core/scripts/ccip/revert-reason/bin/ccip-revert-reason + +# dependencies generated after running `go mod vendor` +vendor/ +go.work* + +# This sometimes shows up for some reason +tools/flakeytests/coverage.txt + +# Fuzz tests can create these files +**/testdata/fuzz/* + +# Runtime test configuration that might contain secrets +override*.toml + +# Pythin venv +.venv/ + +ocr_soak_report.csv + +# misc +.DS_Store +.envrc +.env* +.dbenv +!crib/.env.example +!.github/actions/setup-postgres/.env +.direnv +.idea +.vscode/ +*.iml +debug.env +*.txt +operator_ui/install + diff --git a/internal/reader/home_chain.go b/internal/reader/home_chain.go index ff2409250..05fb0174a 100644 --- a/internal/reader/home_chain.go +++ b/internal/reader/home_chain.go @@ -7,6 +7,7 @@ import ( "time" mapset "github.com/deckarep/golang-set/v2" + "github.com/smartcontractkit/chainlink-common/pkg/logger" "github.com/smartcontractkit/chainlink-common/pkg/services" "github.com/smartcontractkit/chainlink-common/pkg/types" @@ -40,6 +41,7 @@ type state struct { } type homeChainPoller struct { + wg sync.WaitGroup stopCh services.StopChan sync services.StateMachine homeChainReader types.ContractReader @@ -77,12 +79,14 @@ func (r *homeChainPoller) Start(ctx context.Context) error { } r.lggr.Infow("Start Polling ChainConfig") return r.sync.StartOnce(r.Name(), func() error { + r.wg.Add(1) go r.poll() return nil }) } func (r *homeChainPoller) poll() { + defer r.wg.Done() ctx, cancel := r.stopCh.NewCtx() defer cancel() ticker := time.NewTicker(r.pollingDuration) @@ -99,7 +103,7 @@ func (r *homeChainPoller) poll() { r.mutex.Lock() r.failedPolls++ r.mutex.Unlock() - r.lggr.Errorw("fetching and setting configs failed", "failedPolls", r.failedPolls, "err", err) + //r.lggr.Errorw("failed to fetch chain configs", "err", err) } } } @@ -111,7 +115,6 @@ func (r *homeChainPoller) fetchAndSetConfigs(ctx context.Context) error { ctx, "CCIPCapabilityConfiguration", "getAllChainConfigs", nil, &chainConfigInfos, ) if err != nil { - r.lggr.Errorw("fetching on-chain configs failed", "err", err) return err } if len(chainConfigInfos) == 0 { @@ -124,7 +127,6 @@ func (r *homeChainPoller) fetchAndSetConfigs(ctx context.Context) error { r.lggr.Errorw("error converting OnChainConfigs to ChainConfig", "err", err) return err } - r.lggr.Infow("Setting ChainConfig") r.setState(homeChainConfigs) return nil } @@ -201,10 +203,31 @@ func (r *homeChainPoller) GetOCRConfigs( } func (r *homeChainPoller) Close() error { - return r.sync.StopOnce(r.Name(), func() error { + err := r.sync.StopOnce(r.Name(), func() error { + defer r.wg.Wait() close(r.stopCh) return nil }) + if err != nil { + return fmt.Errorf("failed to stop %s: %w", r.Name(), err) + } + // give it twice the polling duration to ensure the poller is caught up and stopped + //ticker := time.NewTicker(r.pollingDuration * 10) + //defer ticker.Stop() + timeout := time.After(r.pollingDuration * 10) + for { + // Make sure it's closed gracefully (Ready returns an error once it's not ready) + state := r.sync.State() + isStopped := state == "Stopped" + if isStopped { + return nil + } + select { + case <-timeout: + return fmt.Errorf("HomeChainReader did not close gracefully") + default: + } + } } func (r *homeChainPoller) Ready() error { diff --git a/internal/reader/home_chain_test.go b/internal/reader/home_chain_test.go index 7252fb472..41227f345 100644 --- a/internal/reader/home_chain_test.go +++ b/internal/reader/home_chain_test.go @@ -7,6 +7,7 @@ import ( "time" mapset "github.com/deckarep/golang-set/v2" + libocrtypes "github.com/smartcontractkit/libocr/ragep2p/types" "github.com/smartcontractkit/chainlink-ccip/internal/mocks" @@ -15,7 +16,6 @@ import ( cciptypes "github.com/smartcontractkit/chainlink-common/pkg/types/ccipocr3" "github.com/smartcontractkit/libocr/commontypes" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" ) @@ -43,31 +43,26 @@ func TestHomeChainConfigPoller_HealthReport(t *testing.T) { mock.Anything).Return(fmt.Errorf("error")) var ( - tickTime = 10 * time.Millisecond - totalSleepTime = 11 * tickTime + tickTime = 1 * time.Millisecond + totalSleepTime = 20 * tickTime // to allow it to fail 10 times at least ) - + lggr, err := logger.New() + require.NoError(t, err) configPoller := NewHomeChainConfigPoller( homeChainReader, - logger.Test(t), + lggr, tickTime, ) - _ = configPoller.Start(context.Background()) - + require.NoError(t, configPoller.Start(context.Background())) // Initially it's healthy healthy := configPoller.HealthReport() - assert.Equal(t, map[string]error{configPoller.Name(): error(nil)}, healthy) - + require.Equal(t, map[string]error{configPoller.Name(): error(nil)}, healthy) // After one second it will try polling 10 times and fail time.Sleep(totalSleepTime) - errors := configPoller.HealthReport() - - err := configPoller.Close() - time.Sleep(tickTime) - assert.NoError(t, err) - assert.Equal(t, 1, len(errors)) - assert.Errorf(t, errors[configPoller.Name()], "polling failed %d times in a row", MaxFailedPolls) + require.Equal(t, 1, len(errors)) + require.Errorf(t, errors[configPoller.Name()], "polling failed %d times in a row", MaxFailedPolls) + require.NoError(t, configPoller.Close()) } func Test_PollingWorking(t *testing.T) { @@ -130,31 +125,39 @@ func Test_PollingWorking(t *testing.T) { *arg = onChainConfigs }).Return(nil) + defer homeChainReader.AssertExpectations(t) + var ( - tickTime = 20 * time.Millisecond - totalSleepTime = (tickTime * 2) + (10 * time.Millisecond) - expNumCalls = int(totalSleepTime/tickTime) + 1 // +1 for the initial call + tickTime = 2 * time.Millisecond + totalSleepTime = tickTime * 4 ) + lggr, err := logger.New() + require.NoError(t, err) configPoller := NewHomeChainConfigPoller( homeChainReader, - logger.Test(t), + lggr, tickTime, ) - ctx := context.Background() - err := configPoller.Start(ctx) - assert.NoError(t, err) + require.NoError(t, configPoller.Start(context.Background())) + // sleep to allow polling to happen time.Sleep(totalSleepTime) - err = configPoller.Close() - assert.NoError(t, err) - - // called 3 times, once when it's started, and 2 times when it's polling - homeChainReader.AssertNumberOfCalls(t, "GetLatestValue", expNumCalls) + require.NoError(t, configPoller.Close()) + + calls := homeChainReader.Calls + callCount := 0 + for _, call := range calls { + if call.Method == "GetLatestValue" { + callCount++ + } + } + //called at least 2 times, one for start and one for the first tick + require.GreaterOrEqual(t, callCount, 2) configs, err := configPoller.GetAllChainConfigs() - assert.NoError(t, err) - assert.Equal(t, homeChainConfig, configs) + require.NoError(t, err) + require.Equal(t, homeChainConfig, configs) } func Test_HomeChainPoller_GetOCRConfig(t *testing.T) { @@ -185,9 +188,11 @@ func Test_HomeChainPoller_GetOCRConfig(t *testing.T) { }) defer homeChainReader.AssertExpectations(t) + lggr, err := logger.New() + require.NoError(t, err) configPoller := NewHomeChainConfigPoller( homeChainReader, - logger.Test(t), + lggr, 10*time.Millisecond, ) From 78128dd1c1ab68cbcffe3838fa1ee9fc0fee58d7 Mon Sep 17 00:00:00 2001 From: asoliman Date: Fri, 28 Jun 2024 16:37:52 +0400 Subject: [PATCH 2/4] Fix comments --- internal/reader/home_chain.go | 23 +---------------------- internal/reader/home_chain_test.go | 12 +++--------- 2 files changed, 4 insertions(+), 31 deletions(-) diff --git a/internal/reader/home_chain.go b/internal/reader/home_chain.go index 05fb0174a..1d2324c1a 100644 --- a/internal/reader/home_chain.go +++ b/internal/reader/home_chain.go @@ -103,7 +103,6 @@ func (r *homeChainPoller) poll() { r.mutex.Lock() r.failedPolls++ r.mutex.Unlock() - //r.lggr.Errorw("failed to fetch chain configs", "err", err) } } } @@ -203,31 +202,11 @@ func (r *homeChainPoller) GetOCRConfigs( } func (r *homeChainPoller) Close() error { - err := r.sync.StopOnce(r.Name(), func() error { + return r.sync.StopOnce(r.Name(), func() error { defer r.wg.Wait() close(r.stopCh) return nil }) - if err != nil { - return fmt.Errorf("failed to stop %s: %w", r.Name(), err) - } - // give it twice the polling duration to ensure the poller is caught up and stopped - //ticker := time.NewTicker(r.pollingDuration * 10) - //defer ticker.Stop() - timeout := time.After(r.pollingDuration * 10) - for { - // Make sure it's closed gracefully (Ready returns an error once it's not ready) - state := r.sync.State() - isStopped := state == "Stopped" - if isStopped { - return nil - } - select { - case <-timeout: - return fmt.Errorf("HomeChainReader did not close gracefully") - default: - } - } } func (r *homeChainPoller) Ready() error { diff --git a/internal/reader/home_chain_test.go b/internal/reader/home_chain_test.go index 41227f345..669aa4008 100644 --- a/internal/reader/home_chain_test.go +++ b/internal/reader/home_chain_test.go @@ -46,11 +46,9 @@ func TestHomeChainConfigPoller_HealthReport(t *testing.T) { tickTime = 1 * time.Millisecond totalSleepTime = 20 * tickTime // to allow it to fail 10 times at least ) - lggr, err := logger.New() - require.NoError(t, err) configPoller := NewHomeChainConfigPoller( homeChainReader, - lggr, + logger.Test(t), tickTime, ) require.NoError(t, configPoller.Start(context.Background())) @@ -132,11 +130,9 @@ func Test_PollingWorking(t *testing.T) { totalSleepTime = tickTime * 4 ) - lggr, err := logger.New() - require.NoError(t, err) configPoller := NewHomeChainConfigPoller( homeChainReader, - lggr, + logger.Test(t), tickTime, ) @@ -188,11 +184,9 @@ func Test_HomeChainPoller_GetOCRConfig(t *testing.T) { }) defer homeChainReader.AssertExpectations(t) - lggr, err := logger.New() - require.NoError(t, err) configPoller := NewHomeChainConfigPoller( homeChainReader, - lggr, + logger.Test(t), 10*time.Millisecond, ) From 382a727d4c43aae8736dd224f0fcbbefd5457355 Mon Sep 17 00:00:00 2001 From: asoliman Date: Fri, 28 Jun 2024 16:47:21 +0400 Subject: [PATCH 3/4] remove .gitignore --- .gitignore | 95 ------------------------------------------------------ 1 file changed, 95 deletions(-) delete mode 100644 .gitignore diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 103e91287..000000000 --- a/.gitignore +++ /dev/null @@ -1,95 +0,0 @@ -# Ignore DevSpace cache and log folder -.devspace/ - -/core/scripts/ccip/json/credentials -/core/scripts/ccip/revert-reason/bin/ccip-revert-reason - -# dependencies generated after running `go mod vendor` -vendor/ -go.work* - -# This sometimes shows up for some reason -tools/flakeytests/coverage.txt - -# Fuzz tests can create these files -**/testdata/fuzz/* - -# Runtime test configuration that might contain secrets -override*.toml - -# Pythin venv -.venv/ - -ocr_soak_report.csv --- VISUAL LINE -- -tmp-manifest-*.yaml -ztarrepo.tar.gz -**/test-ledger/* -__debug_bin* -.test_summary/ -.run.id -integration-tests/**/traces/ -benchmark_report.csv -benchmark_summary.json - -# goreleaser builds -cosign.* -dist/ -MacOSX* - -cache -core/services/ocr2/plugins/ccip/transactions.rlp -lcov.info -!core/services/ocr2/plugins/ccip/internal/cache/ - - -core/scripts/ccip/json/credentials -core/scripts/ccip/json/deployments -core/scripts/ccip/csv/node-wallets - -# Test & linter reports -*report.xml -*report.json -*.out - -contracts/yarn.lock - -# Ignore DevSpace cache and log folder -.devspace/ - -/core/scripts/ccip/json/credentials -/core/scripts/ccip/revert-reason/bin/ccip-revert-reason - -# dependencies generated after running `go mod vendor` -vendor/ -go.work* - -# This sometimes shows up for some reason -tools/flakeytests/coverage.txt - -# Fuzz tests can create these files -**/testdata/fuzz/* - -# Runtime test configuration that might contain secrets -override*.toml - -# Pythin venv -.venv/ - -ocr_soak_report.csv - -# misc -.DS_Store -.envrc -.env* -.dbenv -!crib/.env.example -!.github/actions/setup-postgres/.env -.direnv -.idea -.vscode/ -*.iml -debug.env -*.txt -operator_ui/install - From f4dc97a2e0600b2757ea130376cd8fd8fb21c28d Mon Sep 17 00:00:00 2001 From: asoliman Date: Fri, 28 Jun 2024 17:57:00 +0400 Subject: [PATCH 4/4] Move initial fetch inside poll function --- internal/reader/home_chain.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/internal/reader/home_chain.go b/internal/reader/home_chain.go index 1d2324c1a..9c11319bb 100644 --- a/internal/reader/home_chain.go +++ b/internal/reader/home_chain.go @@ -72,11 +72,6 @@ func NewHomeChainConfigPoller( } func (r *homeChainPoller) Start(ctx context.Context) error { - err := r.fetchAndSetConfigs(ctx) - if err != nil { - // Just log, don't return error as we want to keep polling - r.lggr.Errorw("Initial fetch of on-chain configs failed", "err", err) - } r.lggr.Infow("Start Polling ChainConfig") return r.sync.StartOnce(r.Name(), func() error { r.wg.Add(1) @@ -86,9 +81,16 @@ func (r *homeChainPoller) Start(ctx context.Context) error { } func (r *homeChainPoller) poll() { + defer r.wg.Done() ctx, cancel := r.stopCh.NewCtx() defer cancel() + // Initial fetch once poll is called before any ticks + if err := r.fetchAndSetConfigs(ctx); err != nil { + // Just log, don't return error as we want to keep polling + r.lggr.Errorw("Initial fetch of on-chain configs failed", "err", err) + } + ticker := time.NewTicker(r.pollingDuration) defer ticker.Stop() for {