From 7ff9cf9916ae1648ccc3475c86e45c9df6ab8d1f Mon Sep 17 00:00:00 2001 From: pavel-raykov <165708424+pavel-raykov@users.noreply.github.com> Date: Thu, 14 Nov 2024 15:42:20 +0100 Subject: [PATCH 01/13] Remove unused deprecated key interfaces. (#15210) * Remove unused deprecated key interfaces. * Minor * Minor * Minor * Minor * Minor * Minor --- .changeset/healthy-shirts-remain.md | 5 + core/services/keystore/keys/csakey/key.go | 65 --------- .../services/keystore/keys/csakey/key_test.go | 58 -------- core/services/keystore/keys/p2pkey/key.go | 125 ------------------ .../services/keystore/keys/p2pkey/key_test.go | 109 --------------- .../keystore/keys/p2pkey/key_v2_test.go | 8 +- core/web/presenters/csa_key_test.go | 12 +- 7 files changed, 13 insertions(+), 369 deletions(-) create mode 100644 .changeset/healthy-shirts-remain.md delete mode 100644 core/services/keystore/keys/csakey/key.go delete mode 100644 core/services/keystore/keys/csakey/key_test.go delete mode 100644 core/services/keystore/keys/p2pkey/key.go delete mode 100644 core/services/keystore/keys/p2pkey/key_test.go diff --git a/.changeset/healthy-shirts-remain.md b/.changeset/healthy-shirts-remain.md new file mode 100644 index 00000000000..0ce310e1ce3 --- /dev/null +++ b/.changeset/healthy-shirts-remain.md @@ -0,0 +1,5 @@ +--- +"chainlink": minor +--- + +#removed Remove unused deprecated key interfaces. diff --git a/core/services/keystore/keys/csakey/key.go b/core/services/keystore/keys/csakey/key.go deleted file mode 100644 index 054994f93ea..00000000000 --- a/core/services/keystore/keys/csakey/key.go +++ /dev/null @@ -1,65 +0,0 @@ -package csakey - -import ( - "crypto/ed25519" - "errors" - "time" - - "github.com/smartcontractkit/chainlink/v2/core/utils" - "github.com/smartcontractkit/chainlink/v2/core/utils/crypto" -) - -type Key struct { - ID uint - PublicKey crypto.PublicKey - privateKey []byte - EncryptedPrivateKey crypto.EncryptedPrivateKey - CreatedAt time.Time - UpdatedAt time.Time -} - -// New creates a new CSA key consisting of an ed25519 key. It encrypts the -// Key with the passphrase. -func New(passphrase string, scryptParams utils.ScryptParams) (*Key, error) { - pubkey, privkey, err := ed25519.GenerateKey(nil) - if err != nil { - return nil, err - } - - encPrivkey, err := crypto.NewEncryptedPrivateKey(privkey, passphrase, scryptParams) - if err != nil { - return nil, err - } - - return &Key{ - PublicKey: crypto.PublicKey(pubkey), - privateKey: privkey, - EncryptedPrivateKey: *encPrivkey, - }, nil -} - -func (k *Key) Unlock(password string) error { - pk, err := k.EncryptedPrivateKey.Decrypt(password) - if err != nil { - return err - } - k.privateKey = pk - return nil -} - -func (k *Key) Unsafe_GetPrivateKey() ([]byte, error) { - if k.privateKey == nil { - return nil, errors.New("key has not been unlocked") - } - - return k.privateKey, nil -} - -func (k Key) ToV2() KeyV2 { - pk := ed25519.PrivateKey(k.privateKey) - return KeyV2{ - privateKey: &pk, - PublicKey: ed25519.PublicKey(k.PublicKey), - Version: 1, - } -} diff --git a/core/services/keystore/keys/csakey/key_test.go b/core/services/keystore/keys/csakey/key_test.go deleted file mode 100644 index 8ac05f74cf5..00000000000 --- a/core/services/keystore/keys/csakey/key_test.go +++ /dev/null @@ -1,58 +0,0 @@ -package csakey - -import ( - "crypto/ed25519" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "github.com/smartcontractkit/chainlink/v2/core/utils" -) - -func Test_New(t *testing.T) { - passphrase := "passphrase" - key, err := New(passphrase, utils.FastScryptParams) - require.NoError(t, err) - - rawprivkey, err := key.EncryptedPrivateKey.Decrypt("passphrase") - require.NoError(t, err) - - privkey := ed25519.PrivateKey(rawprivkey) - assert.Equal(t, ed25519.PublicKey(key.PublicKey), privkey.Public()) -} - -func Test_Unlock(t *testing.T) { - passphrase := "passphrase" - key, err := New(passphrase, utils.FastScryptParams) - require.NoError(t, err) - - err = key.Unlock(passphrase) - require.NoError(t, err) - - expected, err := key.EncryptedPrivateKey.Decrypt(passphrase) - require.NoError(t, err) - - assert.Equal(t, expected, key.privateKey) -} - -func Test_GetPrivateKey(t *testing.T) { - passphrase := "passphrase" - key, err := New(passphrase, utils.FastScryptParams) - require.NoError(t, err) - - privkey, err := key.Unsafe_GetPrivateKey() - require.NoError(t, err) - assert.Equal(t, key.privateKey, privkey) -} - -func TestKey_ToV2(t *testing.T) { - passphrase := "passphrase" - key, err := New(passphrase, utils.FastScryptParams) - require.NoError(t, err) - - v2Key := key.ToV2() - - assert.Equal(t, key.PublicKey.String(), v2Key.PublicKeyString()) - assert.Equal(t, ed25519.PrivateKey(key.privateKey), *v2Key.privateKey) -} diff --git a/core/services/keystore/keys/p2pkey/key.go b/core/services/keystore/keys/p2pkey/key.go deleted file mode 100644 index abf4f70294c..00000000000 --- a/core/services/keystore/keys/p2pkey/key.go +++ /dev/null @@ -1,125 +0,0 @@ -package p2pkey - -import ( - "crypto/ed25519" - "database/sql/driver" - "encoding/hex" - "encoding/json" - "strconv" - "time" - - "github.com/ethereum/go-ethereum/accounts/keystore" - "github.com/pkg/errors" - - ragep2ptypes "github.com/smartcontractkit/libocr/ragep2p/types" -) - -// Key represents a p2p private key -type Key struct { - PrivKey ed25519.PrivateKey -} - -func (k Key) ToV2() KeyV2 { - return KeyV2{ - PrivKey: k.PrivKey, - peerID: k.PeerID(), - } -} - -// PublicKeyBytes is a [ed25519.PublicKey] -type PublicKeyBytes []byte - -func (pkb PublicKeyBytes) String() string { - return hex.EncodeToString(pkb) -} - -func (pkb PublicKeyBytes) MarshalJSON() ([]byte, error) { - return json.Marshal(hex.EncodeToString(pkb)) -} - -func (pkb *PublicKeyBytes) UnmarshalJSON(input []byte) error { - var hexString string - if err := json.Unmarshal(input, &hexString); err != nil { - return err - } - - result, err := hex.DecodeString(hexString) - if err != nil { - return err - } - - *pkb = result - return nil -} - -func (pkb *PublicKeyBytes) Scan(value interface{}) error { - switch v := value.(type) { - case []byte: - *pkb = v - return nil - default: - return errors.Errorf("invalid public key bytes got %T wanted []byte", v) - } -} - -func (pkb PublicKeyBytes) Value() (driver.Value, error) { - return []byte(pkb), nil -} - -func (k Key) GetPeerID() (PeerID, error) { - peerID, err := ragep2ptypes.PeerIDFromPrivateKey(k.PrivKey) - if err != nil { - return PeerID{}, errors.WithStack(err) - } - return PeerID(peerID), err -} - -func (k Key) PeerID() PeerID { - peerID, err := k.GetPeerID() - if err != nil { - panic(err) - } - return peerID -} - -type EncryptedP2PKey struct { - ID int32 - PeerID PeerID - PubKey PublicKeyBytes - EncryptedPrivKey []byte - CreatedAt time.Time - UpdatedAt time.Time - DeletedAt *time.Time -} - -func (ep2pk *EncryptedP2PKey) SetID(value string) error { - result, err := strconv.ParseInt(value, 10, 32) - - if err != nil { - return err - } - - ep2pk.ID = int32(result) - return nil -} - -// Decrypt returns the PrivateKey in e, decrypted via auth, or an error -func (ep2pk EncryptedP2PKey) Decrypt(auth string) (k Key, err error) { - var cryptoJSON keystore.CryptoJSON - err = json.Unmarshal(ep2pk.EncryptedPrivKey, &cryptoJSON) - if err != nil { - return k, errors.Wrapf(err, "invalid JSON for P2P key %s (0x%x)", ep2pk.PeerID.String(), ep2pk.PubKey) - } - marshalledPrivK, err := keystore.DecryptDataV3(cryptoJSON, adulteratedPassword(auth)) - if err != nil { - return k, errors.Wrapf(err, "could not decrypt P2P key %s (0x%x)", ep2pk.PeerID.String(), ep2pk.PubKey) - } - - privK, err := UnmarshalPrivateKey(marshalledPrivK) - if err != nil { - return k, errors.Wrapf(err, "could not unmarshal P2P private key for %s (0x%x)", ep2pk.PeerID.String(), ep2pk.PubKey) - } - return Key{ - privK, - }, nil -} diff --git a/core/services/keystore/keys/p2pkey/key_test.go b/core/services/keystore/keys/p2pkey/key_test.go deleted file mode 100644 index 57490483e86..00000000000 --- a/core/services/keystore/keys/p2pkey/key_test.go +++ /dev/null @@ -1,109 +0,0 @@ -package p2pkey - -import ( - "crypto/ed25519" - "crypto/rand" - "encoding/hex" - "encoding/json" - "testing" - - "github.com/ethereum/go-ethereum/accounts/keystore" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "github.com/smartcontractkit/chainlink/v2/core/utils" -) - -func TestP2PKeys_KeyStruct(t *testing.T) { - _, pk, err := ed25519.GenerateKey(rand.Reader) - require.NoError(t, err) - - k := Key{PrivKey: pk} - - t.Run("converts into V2 key", func(t *testing.T) { - k2 := k.ToV2() - - assert.Equal(t, k.PrivKey, k2.PrivKey) - assert.Equal(t, k.PeerID(), k2.peerID) - }) - - t.Run("returns PeerID", func(t *testing.T) { - pid, err := k.GetPeerID() - require.NoError(t, err) - pid2 := k.PeerID() - - assert.Equal(t, pid, pid2) - }) -} - -func TestP2PKeys_PublicKeyBytes(t *testing.T) { - pk, _, err := ed25519.GenerateKey(rand.Reader) - require.NoError(t, err) - - pkb := PublicKeyBytes(pk) - assert.Equal(t, hex.EncodeToString(pkb), pkb.String()) - - b, err := pkb.MarshalJSON() - require.NoError(t, err) - assert.NotEmpty(t, b) - - err = pkb.UnmarshalJSON(b) - assert.NoError(t, err) - - err = pkb.UnmarshalJSON([]byte("")) - assert.Error(t, err) - - err = pkb.Scan([]byte(pk)) - assert.NoError(t, err) - - err = pkb.Scan("invalid-type") - assert.Error(t, err) - - sv, err := pkb.Value() - assert.NoError(t, err) - assert.NotEmpty(t, sv) -} - -func TestP2PKeys_EncryptedP2PKey(t *testing.T) { - _, privk, err := ed25519.GenerateKey(rand.Reader) - require.NoError(t, err) - - k := Key{PrivKey: privk} - - pubkr := k.PrivKey.Public().(ed25519.PublicKey) - - var marshalledPrivK []byte - marshalledPrivK, err = MarshalPrivateKey(k.PrivKey) - require.NoError(t, err) - cryptoJSON, err := keystore.EncryptDataV3(marshalledPrivK, []byte(adulteratedPassword("password")), utils.FastScryptParams.N, utils.FastScryptParams.P) - require.NoError(t, err) - encryptedPrivKey, err := json.Marshal(&cryptoJSON) - require.NoError(t, err) - - p2pk := EncryptedP2PKey{ - ID: 1, - PeerID: k.PeerID(), - PubKey: []byte(pubkr), - EncryptedPrivKey: encryptedPrivKey, - } - - t.Run("sets a different ID", func(t *testing.T) { - err := p2pk.SetID("12") - require.NoError(t, err) - - assert.Equal(t, int32(12), p2pk.ID) - - err = p2pk.SetID("invalid") - assert.Error(t, err) - }) - - t.Run("decrypts key", func(t *testing.T) { - k, err := p2pk.Decrypt("invalid-pass") - assert.Empty(t, k) - assert.Error(t, err) - - k, err = p2pk.Decrypt("password") - require.NoError(t, err) - assert.NotEmpty(t, k) - }) -} diff --git a/core/services/keystore/keys/p2pkey/key_v2_test.go b/core/services/keystore/keys/p2pkey/key_v2_test.go index d93678b8f2d..56a93e4db1a 100644 --- a/core/services/keystore/keys/p2pkey/key_v2_test.go +++ b/core/services/keystore/keys/p2pkey/key_v2_test.go @@ -7,6 +7,7 @@ import ( "testing" ragep2ptypes "github.com/smartcontractkit/libocr/ragep2p/types" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -22,15 +23,12 @@ func TestP2PKeys_Raw(t *testing.T) { } func TestP2PKeys_KeyV2(t *testing.T) { - _, pk, err := ed25519.GenerateKey(rand.Reader) + kv2, err := NewV2() require.NoError(t, err) - k := Key{PrivKey: pk} - kv2 := k.ToV2() - pkv2 := kv2.PrivKey.Public().(ed25519.PublicKey) assert.Equal(t, kv2.String(), kv2.GoString()) - assert.Equal(t, ragep2ptypes.PeerID(k.PeerID()).String(), kv2.ID()) + assert.Equal(t, ragep2ptypes.PeerID(kv2.PeerID()).String(), kv2.ID()) assert.Equal(t, hex.EncodeToString(pkv2), kv2.PublicKeyHex()) } diff --git a/core/web/presenters/csa_key_test.go b/core/web/presenters/csa_key_test.go index 06f84db7dd5..d514519fafd 100644 --- a/core/web/presenters/csa_key_test.go +++ b/core/web/presenters/csa_key_test.go @@ -9,15 +9,13 @@ import ( "github.com/stretchr/testify/require" "github.com/smartcontractkit/chainlink/v2/core/services/keystore/keys/csakey" - "github.com/smartcontractkit/chainlink/v2/core/utils" ) func TestCSAKeyResource(t *testing.T) { - key, err := csakey.New("passphrase", utils.FastScryptParams) + keyV2, err := csakey.NewV2() require.NoError(t, err) - key.ID = 1 - r := NewCSAKeyResource(key.ToV2()) + r := NewCSAKeyResource(keyV2) b, err := jsonapi.Marshal(r) require.NoError(t, err) @@ -25,13 +23,13 @@ func TestCSAKeyResource(t *testing.T) { { "data":{ "type":"csaKeys", - "id":"%s", + "id":"%[1]s", "attributes":{ - "publicKey": "csa_%s", + "publicKey": "csa_%[1]s", "version": 1 } } - }`, key.PublicKey.String(), key.PublicKey.String()) + }`, keyV2.PublicKeyString()) assert.JSONEq(t, expected, string(b)) } From 2fcfb81bb5c72e516d838097e8eece11c35f66a4 Mon Sep 17 00:00:00 2001 From: Lukasz <120112546+lukaszcl@users.noreply.github.com> Date: Thu, 14 Nov 2024 15:55:33 +0100 Subject: [PATCH 02/13] TT-1831 Extend flaky test detector to run all core repository tests nightly (#15222) * Move runWithRace to extraArgs * Update flaky test worfklow to support running all tests * TO REVERT: Remove most of deployment unit test * update workflow * fix * fix * fix * debug * fix * change workflow name * fix * Update Github Summary * TO REVERT: Remove one unit test * Fix flakeguard binary path * Fix * Update runner counts * Add nightly workflow for flaky test detector * Fix * Upload all test results as short lived artifact * Fix total repeat count * Revert "TO REVERT: Remove one unit test" This reverts commit 5e0f921079ee80f0bc02a22d41b4ab6987e72ee0. * Fix * Add all_tests_runner arg * Fix default all tests runner * TO REVERT: fail test * Update go project pretty name * Fix * Revert "TO REVERT: Remove most of deployment unit test" This reverts commit 0c8418d4d8ad44e14011bc657f8204b0b20e52a1. * Update summary * Fix * Reapply "TO REVERT: Remove most of deployment unit test" This reverts commit 7252d78b27c8a0455585ccb2ac33beb0ac4d2d2e. * Update workflow names * Do not print failed tests to console by default * Update reporting * Update job name * Fix * Bump flakeguard * Fix workflow condition * Remove debug step * fix split-packages step * fix 2 * Show pass ratio percentage * Update nightly detector workflow * Revert removing unit tests * Fix ci-core.yml * TEST: update unit test * TEST: FAil deployment test * Update slack notification * TEST: fail test * bump flakeguard * Update name * Revert "TEST: fail test" This reverts commit 8c272b837b97db6f7d2402a5e141a138010f5585. * Revert "TEST: FAil deployment test" This reverts commit e08d2b6d3c1ed9ba81a5bb4e42fe78792662944a. * Fix test * Bump flakeguard * Use min pass ratio to not report tests as flaky if they failed in every run * Change flaky threshold to 95% * Bump flakeguard * Print failed tests in github logs for PRs * Run tests 3 times in PRs * TEST: always fail test * Bump flakeguard * Fix * Fix test * add test that randomly passes or fails * TO REVERT: repeat core tests 5 times * Update min pass ratio and threshold * Revert "add test that randomly passes or fails" This reverts commit 43e7c0b08ffda1b4628b27163fad2bfc8ae4c5a6. * Remove test-results artifacts after 1 day * Bump flakeguard and do not fail it when test fail but not flaky * Fix * TO REVERT: always fail test * Update * TO REVERT: add randomly failing test * Revert "TO REVERT: add randomly failing test" This reverts commit 5254cde896dd3e1e35b467bc579a0a21460aaa24. * Fix test * Bump * Bump all_tests_runner_count for nightly * Set min_pass_ratio=0 for nightly * Set timeout for job * Update --- .github/workflows/ci-core.yml | 12 +- .github/workflows/find-new-flaky-tests.yml | 239 ++++++++++++------ .../workflows/run-find-new-flaky-tests.yml | 16 +- .../run-nightly-flaky-test-detector.yml | 21 ++ 4 files changed, 200 insertions(+), 88 deletions(-) create mode 100644 .github/workflows/run-nightly-flaky-test-detector.yml diff --git a/.github/workflows/ci-core.yml b/.github/workflows/ci-core.yml index 23f4382f62a..48977cee35e 100644 --- a/.github/workflows/ci-core.yml +++ b/.github/workflows/ci-core.yml @@ -463,7 +463,7 @@ jobs: SONAR_SCANNER_OPTS: "-Xms6g -Xmx8g" trigger-flaky-test-detection-for-root-project: - name: Find New Flaky Tests In Root Project + name: Find New Flaky Tests In Chainlink Project uses: ./.github/workflows/find-new-flaky-tests.yml if: ${{ github.event_name == 'pull_request' }} with: @@ -471,12 +471,11 @@ jobs: projectPath: '.' baseRef: ${{ github.base_ref }} headRef: ${{ github.head_ref }} - runThreshold: '1' - runWithRace: true + runThreshold: '0.99' findByTestFilesDiff: true findByAffectedPackages: false slackNotificationAfterTestsChannelId: 'C07TRF65CNS' #flaky-test-detector-notifications - extraArgs: '{ "skipped_tests": "TestChainComponents" }' + extraArgs: '{ "skipped_tests": "TestChainComponents", "run_with_race": "true", "print_failed_tests": "true", "test_repeat_count": "3", "min_pass_ratio": "0.01" }' secrets: SLACK_BOT_TOKEN: ${{ secrets.QA_SLACK_API_KEY }} @@ -490,12 +489,11 @@ jobs: projectPath: 'deployment' baseRef: ${{ github.base_ref }} headRef: ${{ github.head_ref }} - runThreshold: '1' - runWithRace: true + runThreshold: '0.99' findByTestFilesDiff: true findByAffectedPackages: false slackNotificationAfterTestsChannelId: 'C07TRF65CNS' #flaky-test-detector-notifications - extraArgs: '{ "skipped_tests": "TestAddLane" }' + extraArgs: '{ "skipped_tests": "TestAddLane", "run_with_race": "true", "print_failed_tests": "true", "test_repeat_count": "3", "min_pass_ratio": "0.01" }' secrets: SLACK_BOT_TOKEN: ${{ secrets.QA_SLACK_API_KEY }} diff --git a/.github/workflows/find-new-flaky-tests.yml b/.github/workflows/find-new-flaky-tests.yml index fb3676b30c8..ee27ac37562 100644 --- a/.github/workflows/find-new-flaky-tests.yml +++ b/.github/workflows/find-new-flaky-tests.yml @@ -1,4 +1,4 @@ -name: Find New Flaky Tests +name: Find Flaky Tests on: workflow_call: @@ -19,17 +19,17 @@ on: headRef: required: false type: string - description: 'The head reference or branch to compare changes for detecting flaky tests. Default is the current branch.' + description: 'The head reference or branch to compare changes for detecting flaky tests. Default is the current branch.' + runAllTests: + required: false + type: boolean + description: 'Run all tests in the project.' + default: false runThreshold: required: false type: string description: 'The threshold for the number of times a test can fail before being considered flaky.' - default: '0.8' - runWithRace: - required: false - type: boolean - description: 'Run tests with -race flag.' - default: true + default: '0.9' findByTestFilesDiff: required: false type: boolean @@ -56,18 +56,25 @@ on: env: GIT_HEAD_REF: ${{ inputs.headRef || github.ref }} SKIPPED_TESTS: ${{ fromJson(inputs.extraArgs)['skipped_tests'] || '' }} # Comma separated list of test names to skip running in the flaky detector. Related issue: TT-1823 - MAX_GROUP_SIZE: ${{ fromJson(inputs.extraArgs)['max_group_size'] || '8' }} # The maximum number of jobs to run in parallel when running tests. - RUN_COUNT: ${{ fromJson(inputs.extraArgs)['run_count'] || '5' }} # The number of times to run the tests to detect flaky tests. + DEFAULT_MAX_RUNNER_COUNT: ${{ fromJson(inputs.extraArgs)['default_max_runner_count'] || '8' }} # The default maximum number of GitHub runners to use for parallel test execution. + ALL_TESTS_RUNNER_COUNT: ${{ fromJson(inputs.extraArgs)['all_tests_runner_count'] || '2' }} # The number of GitHub runners to use when running all tests `runAllTests=true`. + TEST_REPEAT_COUNT: ${{ fromJson(inputs.extraArgs)['test_repeat_count'] || '5' }} # The number of times each runner should run a test to detect flaky tests. + RUN_WITH_RACE: ${{ fromJson(inputs.extraArgs)['run_with_race'] || 'true' }} # Whether to run tests with -race flag. + ALL_TESTS_RUNNER: ${{ fromJson(inputs.extraArgs)['all_tests_runner'] || 'ubuntu22.04-32cores-128GB' }} # The runner to use for running all tests. + DEFAULT_RUNNER: 'ubuntu-latest' # The default runner to use for running tests. + UPLOAD_ALL_TEST_RESULTS: ${{ fromJson(inputs.extraArgs)['upload_all_test_results'] || 'false' }} # Whether to upload all test results as artifacts. + PRINT_FAILED_TESTS: ${{ fromJson(inputs.extraArgs)['print_failed_tests'] || 'false' }} # Whether to print failed tests in the GitHub console. + MIN_PASS_RATIO: ${{ fromJson(inputs.extraArgs)['min_pass_ratio'] || '0.001' }} # The minimum pass ratio for a test to be considered as flaky. Used to distinguish between tests that are truly flaky (with inconsistent results) and those that are consistently failing. Set to 0 if you want to consider all failed tests as flaky. jobs: - find-tests: - name: Find Tests To Run + get-tests: + name: Get Tests To Run runs-on: ubuntu-latest outputs: matrix: ${{ steps.split-packages.outputs.matrix }} workflow_id: ${{ steps.gen_id.outputs.workflow_id }} changed_test_files: ${{ steps.find-changed-test-files.outputs.test_files }} - affected_test_packages: ${{ steps.find-tests.outputs.packages }} + affected_test_packages: ${{ steps.get-tests.outputs.packages }} git_head_sha: ${{ steps.get_commit_sha.outputs.git_head_sha }} git_head_short_sha: ${{ steps.get_commit_sha.outputs.git_head_short_sha }} steps: @@ -93,10 +100,11 @@ jobs: - name: Install flakeguard shell: bash - run: go install github.com/smartcontractkit/chainlink-testing-framework/tools/flakeguard@cb4c307f6f0a79a20097129cda7c151d8c5b5d28 + run: go install github.com/smartcontractkit/chainlink-testing-framework/tools/flakeguard@897bca304fc9f0e68b87579558750c4a3e83adec - name: Find new or updated test packages - id: find-tests + if: ${{ inputs.runAllTests == false }} + id: get-tests shell: bash env: # Needed to run go test -list @@ -110,6 +118,7 @@ jobs: echo "packages=$PACKAGES" >> $GITHUB_OUTPUT - name: Find changed test files + if: ${{ inputs.runAllTests == false }} id: find-changed-test-files shell: bash env: @@ -125,11 +134,25 @@ jobs: - name: Split test packages into groups id: split-packages - if: steps.find-tests.outputs.packages != '' shell: bash run: | - PACKAGES=(${{ steps.find-tests.outputs.packages }}) - DESIRED_GROUP_COUNT=$((${{ env.MAX_GROUP_SIZE }})) + if [[ "${{ inputs.runAllTests }}" == "true" ]]; then + # Use ALL_TESTS_RUNNER for a specified number of groups, each with "./..." to run all tests + ALL_TESTS_RUNNER_COUNT=${{ env.ALL_TESTS_RUNNER_COUNT }} + + # Create the JSON array dynamically based on ALL_TESTS_RUNNER_COUNT + json_groups=$(jq -nc --argjson count "$ALL_TESTS_RUNNER_COUNT" \ + '[range(0; $count) | { "testPackages": "./...", "runs_on": "'"${{ env.ALL_TESTS_RUNNER }}"'" }]') + + echo "$json_groups" + echo "matrix<> $GITHUB_OUTPUT + echo "$json_groups" >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + exit 0 + fi + + PACKAGES=(${{ steps.get-tests.outputs.packages }}) + DESIRED_GROUP_COUNT=$((${{ env.DEFAULT_MAX_RUNNER_COUNT }})) TOTAL_PACKAGES=${#PACKAGES[@]} # Number of groups should be no more than the number of packages @@ -150,15 +173,17 @@ jobs: # Extract the packages for the current group if [[ $group_size -gt 0 ]]; then group=("${PACKAGES[@]:current_index:group_size}") - groups+=("$(IFS=,; echo "${group[*]}")") + groups+=("{\"testPackages\":\"$(IFS=,; echo "${group[*]}")\", \"runs_on\":\"${{ env.DEFAULT_RUNNER }}\"}") current_index=$(($current_index + $group_size)) fi done # Convert groups array into a JSON array - json_groups=$(printf '%s\n' "${groups[@]}" | jq -R . | jq -cs .) - echo $json_groups - echo "matrix=$json_groups" >> $GITHUB_OUTPUT + json_groups=$(printf '%s\n' "${groups[@]}" | jq -s .) + echo "$json_groups" + echo "matrix<> $GITHUB_OUTPUT + echo "$json_groups" >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT - name: Generate random workflow id id: gen_id @@ -167,13 +192,14 @@ jobs: run-tests: name: Run Tests - needs: find-tests - runs-on: ubuntu-latest - if: ${{ needs.find-tests.outputs.matrix != '' }} + needs: get-tests + runs-on: ${{ matrix.runs_on }} + if: ${{ needs.get-tests.outputs.matrix != '' && needs.get-tests.outputs.matrix != '[]' }} + timeout-minutes: 90 strategy: fail-fast: false matrix: - testPackages: ${{ fromJson(needs.find-tests.outputs.matrix) }} + include: ${{ fromJson(needs.get-tests.outputs.matrix) }} env: DB_URL: postgresql://postgres:postgres@localhost:5432/chainlink_test?sslmode=disable steps: @@ -233,11 +259,11 @@ jobs: - name: Install flakeguard shell: bash - run: go install github.com/smartcontractkit/chainlink-testing-framework/tools/flakeguard@cb4c307f6f0a79a20097129cda7c151d8c5b5d28 + run: go install github.com/smartcontractkit/chainlink-testing-framework/tools/flakeguard@897bca304fc9f0e68b87579558750c4a3e83adec - name: Run tests with flakeguard shell: bash - run: flakeguard run --project-path=${{ inputs.projectPath }} --test-packages=${{ matrix.testPackages }} --run-count=${{ env.RUN_COUNT }} --threshold=${{ inputs.runThreshold }} --race=${{ inputs.runWithRace }} --skip-tests=${{ env.SKIPPED_TESTS }} --output-json=test-result.json + run: flakeguard run --project-path=${{ inputs.projectPath }} --test-packages=${{ matrix.testPackages }} --run-count=${{ env.TEST_REPEAT_COUNT }} --min-pass-ratio=${{ env.MIN_PASS_RATIO }} --threshold=${{ inputs.runThreshold }} --race=${{ env.RUN_WITH_RACE }} --skip-tests=${{ env.SKIPPED_TESTS }} --print-failed-tests=${{ env.PRINT_FAILED_TESTS }} --output-json=test-result.json env: CL_DATABASE_URL: ${{ env.DB_URL }} @@ -245,12 +271,12 @@ jobs: if: always() uses: actions/upload-artifact@v4.4.3 with: - name: test-result-${{ needs.find-tests.outputs.workflow_id }}-${{ steps.gen_id.outputs.id }} + name: test-result-${{ needs.get-tests.outputs.workflow_id }}-${{ steps.gen_id.outputs.id }} path: test-result.json - retention-days: 7 + retention-days: 1 report: - needs: [find-tests, run-tests] + needs: [get-tests, run-tests] if: always() name: Report runs-on: ubuntu-latest @@ -261,9 +287,9 @@ jobs: id: set_project_path_pretty run: | if [ "${{ inputs.projectPath }}" = "." ]; then - echo "path=./go.mod" >> $GITHUB_OUTPUT + echo "path=github.com/${{ github.repository }}" >> $GITHUB_OUTPUT else - echo "path=${{ inputs.projectPath }}/go.mod" >> $GITHUB_OUTPUT + echo "path=github.com/${{ github.repository }}/${{ inputs.projectPath }}" >> $GITHUB_OUTPUT fi - name: Download all test result artifacts @@ -271,8 +297,12 @@ jobs: with: path: test_results pattern: - test-result-${{ needs.find-tests.outputs.workflow_id }}-* - + test-result-${{ needs.get-tests.outputs.workflow_id }}-* + + - name: Install flakeguard + shell: bash + run: go install github.com/smartcontractkit/chainlink-testing-framework/tools/flakeguard@897bca304fc9f0e68b87579558750c4a3e83adec + - name: Set combined test results id: set_test_results shell: bash @@ -281,12 +311,28 @@ jobs: if [ -d "test_results" ]; then cd test_results ls -R . - find . -name '*.json' -exec cat {} + | jq -s 'add | sort_by(.PassRatio)' > all_tests.json - ALL_TESTS_COUNT=$(jq 'length' all_tests.json) + + # Fix flakeguard binary path + PATH=$PATH:$(go env GOPATH)/bin + export PATH + + # Use flakeguard aggregate-all to aggregate test results + flakeguard aggregate-all --results-path . --output-results ../all_tests.json + + # Count all tests + ALL_TESTS_COUNT=$(jq 'length' ../all_tests.json) echo "All tests count: $ALL_TESTS_COUNT" echo "all_tests_count=$ALL_TESTS_COUNT" >> "$GITHUB_OUTPUT" - jq -c 'map(select(.PassRatio < ($runThreshold | tonumber) and .Skipped != true)) | map(.PassRatio |= (. * 100 | tostring + "%"))' all_tests.json --arg runThreshold '${{ inputs.runThreshold }}' > failed_tests.json - FAILED_TESTS_COUNT=$(jq 'length' failed_tests.json) + + # Use flakeguard aggregate-failed to filter and output failed tests based on PassRatio threshold + flakeguard aggregate-failed --threshold "${{ inputs.runThreshold }}" --min-pass-ratio=${{ env.MIN_PASS_RATIO }} --results-path . --output-results ../failed_tests.json --output-logs ../failed_test_logs.json + + # Count failed tests + if [ -f "../failed_tests.json" ]; then + FAILED_TESTS_COUNT=$(jq 'length' ../failed_tests.json) + else + FAILED_TESTS_COUNT=0 + fi echo "Failed tests count: $FAILED_TESTS_COUNT" echo "failed_tests_count=$FAILED_TESTS_COUNT" >> "$GITHUB_OUTPUT" else @@ -305,47 +351,73 @@ jobs: if: ${{ fromJson(steps.set_test_results.outputs.failed_tests_count) > 0 }} uses: actions/upload-artifact@v4.4.3 with: - name: failed_tests.json - path: test_results/failed_tests.json + path: failed_tests.json + name: failed-test-results.json retention-days: 7 + - name: Upload Failed Test Logs as Artifact + if: ${{ fromJson(steps.set_test_results.outputs.failed_tests_count) > 0 }} + uses: actions/upload-artifact@v4.4.3 + with: + path: failed_test_logs.json + name: failed-test-logs.json + retention-days: 7 + + - name: Upload All Test Results as Artifact + if: ${{ fromJson(steps.set_test_results.outputs.all_tests_count) > 0 && env.UPLOAD_ALL_TEST_RESULTS == 'true' }} + uses: actions/upload-artifact@v4.4.3 + with: + path: all_tests.json + name: all-test-results.json + retention-days: 7 + - name: Create ASCII table with failed test results if: ${{ fromJson(steps.set_test_results.outputs.failed_tests_count) > 0 }} shell: bash run: | - jq -r '["TestPackage", "TestName", "PassRatio", "RunCount", "Skipped"], ["---------", "---------", "---------", "---------", "---------"], (.[] | [.TestPackage, .TestName, .PassRatio, .Runs, .Skipped]) | @tsv' test_results/failed_tests.json | column -t -s$'\t' > test_results/failed_tests_ascii.txt - cat test_results/failed_tests_ascii.txt + jq -r '["TestPackage", "TestName", "PassRatio", "RunCount", "Skipped"], ["---------", "---------", "---------", "---------", "---------"], (.[] | [.TestPackage, .TestName, .PassRatioPercentage, .Runs, .Skipped]) | @tsv' failed_tests.json | column -t -s$'\t' > failed_tests_ascii.txt + cat failed_tests_ascii.txt - name: Create ASCII table with all test results if: ${{ fromJson(steps.set_test_results.outputs.all_tests_count) > 0 }} shell: bash run: | - jq -r '["TestPackage", "TestName", "PassRatio", "RunCount", "Skipped"], ["---------", "---------", "---------", "---------", "---------"], (.[] | [.TestPackage, .TestName, .PassRatio, .Runs, .Skipped]) | @tsv' test_results/all_tests.json | column -t -s$'\t' > test_results/all_tests_ascii.txt - cat test_results/all_tests_ascii.txt + jq -r '["TestPackage", "TestName", "PassRatio", "RunCount", "Skipped"], ["---------", "---------", "---------", "---------", "---------"], (.[] | [.TestPackage, .TestName, .PassRatioPercentage, .Runs, .Skipped]) | @tsv' all_tests.json | column -t -s$'\t' > all_tests_ascii.txt + cat all_tests_ascii.txt - - name: Create GitHub Summary + - name: Create GitHub Summary (General) + run: | + echo "## Flaky Test Detection Report for ${{ steps.set_project_path_pretty.outputs.path }} Project" >> $GITHUB_STEP_SUMMARY + + - name: Create GitHub Summary (Comparative Test Analysis) + if: ${{ inputs.runAllTests == false }} run: | - echo "## Flaky Test Detection Summary" >> $GITHUB_STEP_SUMMARY echo "### Comparative Test Analysis" >> $GITHUB_STEP_SUMMARY - echo "Checked changes between \`${{ inputs.baseRef }}\` and \`${{ env.GIT_HEAD_REF }}\` for ${{ steps.set_project_path_pretty.outputs.path }} project. See all changes [here](${{ inputs.repoUrl }}/compare/${{ inputs.baseRef }}...${{ needs.find-tests.outputs.git_head_sha }}#files_bucket)." >> $GITHUB_STEP_SUMMARY + echo "Checked changes between \`${{ inputs.baseRef }}\` and \`${{ env.GIT_HEAD_REF }}\`. See all changes [here](${{ inputs.repoUrl }}/compare/${{ inputs.baseRef }}...${{ needs.get-tests.outputs.git_head_sha }}#files_bucket)." >> $GITHUB_STEP_SUMMARY + + - name: Create GitHub Summary (All Tests) + if: ${{ inputs.runAllTests == 'true' }} + run: | + echo "### Running All Tests" >> $GITHUB_STEP_SUMMARY + echo "All tests are being executed as \`runAllTests\` is set to true." >> $GITHUB_STEP_SUMMARY - name: Append Changed Test Files to GitHub Summary - if: ${{ needs.find-tests.outputs.changed_test_files != '' && inputs.findByTestFilesDiff && !inputs.findByAffectedPackages }} + if: ${{ needs.get-tests.outputs.changed_test_files != '' && inputs.findByTestFilesDiff && !inputs.findByAffectedPackages }} run: | echo "### Changed Test Files" >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY - IFS=' ' read -ra ADDR <<< "${{ needs.find-tests.outputs.changed_test_files }}" + IFS=' ' read -ra ADDR <<< "${{ needs.get-tests.outputs.changed_test_files }}" for file in "${ADDR[@]}"; do echo "$file" >> $GITHUB_STEP_SUMMARY done echo '```' >> $GITHUB_STEP_SUMMARY - name: Append Affected Test Packages to GitHub Summary - if: ${{ needs.find-tests.outputs.affected_test_packages != '' }} + if: ${{ needs.get-tests.outputs.affected_test_packages != '' }} run: | echo "### Affected Test Packages" >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY - IFS=' ' read -ra ADDR <<< "${{ needs.find-tests.outputs.affected_test_packages }}" + IFS=' ' read -ra ADDR <<< "${{ needs.get-tests.outputs.affected_test_packages }}" for package in "${ADDR[@]}"; do echo "$package" >> $GITHUB_STEP_SUMMARY done @@ -355,55 +427,76 @@ jobs: if: ${{ fromJson(steps.set_test_results.outputs.failed_tests_count) > 0 }} id: read_failed_tests run: | - file_content=$(cat test_results/failed_tests_ascii.txt) + file_content=$(cat failed_tests_ascii.txt) echo "failed_tests_content<> $GITHUB_OUTPUT echo "$file_content" >> $GITHUB_OUTPUT echo "EOF" >> $GITHUB_OUTPUT - - name: Append Failed Tests to GitHub Summary + - name: Calculate Test Repeat Count + id: calculate_test_repeat_count + shell: bash + run: | + # Convert environment variables to integers + ALL_TESTS_RUNNER_COUNT=${{ env.ALL_TESTS_RUNNER_COUNT }} + TEST_REPEAT_COUNT=${{ env.TEST_REPEAT_COUNT }} + + # If runAllTests input is true, multiply the number of runners by the test repeat count as each runner runs all tests + # Otherwise, use the test repeat count as each runner runs unique tests + if [[ "${{ inputs.runAllTests }}" == "true" ]]; then + test_repeat_count=$(( ALL_TESTS_RUNNER_COUNT * TEST_REPEAT_COUNT )) + else + test_repeat_count=$TEST_REPEAT_COUNT + fi + echo "test_repeat_count=$test_repeat_count" >> $GITHUB_OUTPUT + + - name: Append Flaky Tests to GitHub Summary if: ${{ fromJson(steps.set_test_results.outputs.failed_tests_count) > 0 }} run: | - threshold_percentage=$(echo "${{ inputs.runThreshold }}" | awk '{printf "%.0f", $1 * 100}') - echo "### Failed Tests :x:" >> $GITHUB_STEP_SUMMARY - echo "Ran \`${{ steps.set_test_results.outputs.all_tests_count }}\` tests in total for all affected test packages. Below are the tests identified as flaky, with a pass ratio lower than the \`${threshold_percentage}%\` threshold:" >> $GITHUB_STEP_SUMMARY + threshold_percentage=$(echo "${{ inputs.runThreshold }}" | awk '{printf "%.2f", $1 * 100}') + min_pass_ratio_percentage=$(echo "${{ env.MIN_PASS_RATIO }}" | awk '{printf "%.2f", $1 * 100}') + echo "### Flaky Tests :x:" >> $GITHUB_STEP_SUMMARY + echo "Ran ${{ steps.set_test_results.outputs.all_tests_count }} unique tests ${{ steps.calculate_test_repeat_count.outputs.test_repeat_count }} times. Below are the tests identified as flaky, with a pass ratio lower than the ${threshold_percentage}% threshold:" >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY - cat test_results/failed_tests_ascii.txt >> $GITHUB_STEP_SUMMARY + cat failed_tests_ascii.txt >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY - echo "For detailed logs of the failed tests, please refer to the 'failed_tests.json' file in the Artifacts section at the bottom of the page." >> $GITHUB_STEP_SUMMARY + echo "For detailed logs of the failed tests, please refer to the failed-test-results.json and failed-test-logs.json files in the Artifacts section at the bottom of the page. failed-test-logs.json contains all outputs from failed tests." >> $GITHUB_STEP_SUMMARY - - name: Append Success Note if All Tests Passed + - name: Append Success Note if No Flaky Tests Found if: ${{ fromJson(steps.set_test_results.outputs.all_tests_count) > 0 && fromJson(steps.set_test_results.outputs.failed_tests_count) == 0 }} run: | - echo "### All Tests Passed! :white_check_mark:" >> $GITHUB_STEP_SUMMARY - echo "Ran \`${{ steps.set_test_results.outputs.all_tests_count }}\` tests in total and found no flakes." >> $GITHUB_STEP_SUMMARY + echo "### No Flaky Tests Found! :white_check_mark:" >> $GITHUB_STEP_SUMMARY + echo "Ran \`${{ steps.set_test_results.outputs.all_tests_count }}\` unique tests ${{ steps.calculate_test_repeat_count.outputs.test_repeat_count }} times and found no flakes." >> $GITHUB_STEP_SUMMARY - name: Append Additional Info to GitHub Summary if: ${{ fromJson(steps.set_test_results.outputs.all_tests_count) > 0 }} run: | echo "### Settings" >> $GITHUB_STEP_SUMMARY - threshold_percentage=$(echo "${{ inputs.runThreshold }}" | awk '{printf "%.0f", $1 * 100}') + threshold_percentage=$(echo "${{ inputs.runThreshold }}" | awk '{printf "%.2f", $1 * 100}') + min_pass_ratio_percentage=$(echo "${{ env.MIN_PASS_RATIO }}" | awk '{printf "%.2f", $1 * 100}') echo "| **Setting** | **Value** |" >> $GITHUB_STEP_SUMMARY echo "|-------------------------|------------|" >> $GITHUB_STEP_SUMMARY + echo "| Go Project | ${{ steps.set_project_path_pretty.outputs.path }} |" >> $GITHUB_STEP_SUMMARY + echo "| Minimum Pass Ratio | ${min_pass_ratio_percentage}% |" >> $GITHUB_STEP_SUMMARY echo "| Flakiness Threshold | ${threshold_percentage}% |" >> $GITHUB_STEP_SUMMARY - echo "| Test Run Count | ${{ env.RUN_COUNT }} |" >> $GITHUB_STEP_SUMMARY - echo "| Race Detection | ${{ inputs.runWithRace }} |" >> $GITHUB_STEP_SUMMARY + echo "| Test Run Count | ${{ steps.calculate_test_repeat_count.outputs.test_repeat_count }} |" >> $GITHUB_STEP_SUMMARY + echo "| Race Detection | ${{ env.RUN_WITH_RACE }} |" >> $GITHUB_STEP_SUMMARY echo "| Excluded Tests | ${{ env.SKIPPED_TESTS }} |" >> $GITHUB_STEP_SUMMARY - name: Append No Tests Found Message to GitHub Summary if: ${{ fromJson(steps.set_test_results.outputs.all_tests_count) == 0 }} run: | echo "### No Tests To Execute" >> $GITHUB_STEP_SUMMARY - echo "No updated or new tests found for \`${{ steps.set_project_path_pretty.outputs.path }}\` project. The flaky detector will not run." >> $GITHUB_STEP_SUMMARY + echo "No updated or new Go tests found for ${{ steps.set_project_path_pretty.outputs.path }} project. The flaky detector will not run." >> $GITHUB_STEP_SUMMARY - name: Post comment on PR if flaky tests found if: ${{ fromJson(steps.set_test_results.outputs.failed_tests_count) > 0 && github.event_name == 'pull_request' }} uses: actions/github-script@v7 env: MESSAGE_BODY_1: '### Flaky Test Detector for `${{ steps.set_project_path_pretty.outputs.path }}` project has failed :x:' - MESSAGE_BODY_2: 'Ran new or updated tests between `${{ inputs.baseRef }}` and ${{ needs.find-tests.outputs.git_head_sha }} (`${{ env.GIT_HEAD_REF }}`).' - MESSAGE_BODY_3: ${{ format('[View Flaky Detector Details]({0}/{1}/actions/runs/{2}) | [Compare Changes]({3}/compare/{4}...{5}#files_bucket)', github.server_url, github.repository, github.run_id, inputs.repoUrl, github.base_ref, needs.find-tests.outputs.git_head_sha) }} - MESSAGE_BODY_4: '#### Failed Tests' - MESSAGE_BODY_5: 'Ran ${{ steps.set_test_results.outputs.all_tests_count }} tests in total for all affected test packages. Below are the tests identified as flaky, with a pass ratio lower than the ${{ steps.calculate_threshold.outputs.threshold_percentage }}% threshold:' + MESSAGE_BODY_2: 'Ran new or updated tests between `${{ inputs.baseRef }}` and ${{ needs.get-tests.outputs.git_head_sha }} (`${{ env.GIT_HEAD_REF }}`).' + MESSAGE_BODY_3: ${{ format('[View Flaky Detector Details]({0}/{1}/actions/runs/{2}) | [Compare Changes]({3}/compare/{4}...{5}#files_bucket)', github.server_url, github.repository, github.run_id, inputs.repoUrl, github.base_ref, needs.get-tests.outputs.git_head_sha) }} + MESSAGE_BODY_4: '#### Flaky Tests' + MESSAGE_BODY_5: 'Ran ${{ steps.set_test_results.outputs.all_tests_count }} unique tests. Below are the tests identified as flaky, with a pass ratio lower than the ${{ steps.calculate_threshold.outputs.threshold_percentage }}% threshold:' MESSAGE_BODY_6: '```' MESSAGE_BODY_7: '${{ steps.read_failed_tests.outputs.failed_tests_content }}' MESSAGE_BODY_8: '```' @@ -450,21 +543,21 @@ jobs: "type": "section", "text": { "type": "mrkdwn", - "text": "Flaky Test Detector for ${{ steps.set_project_path_pretty.outputs.path }} project - ${{ contains(join(needs.*.result, ','), 'failure') && 'Failed :x:' || contains(join(needs.*.result, ','), 'cancelled') && 'Was cancelled :warning:' || 'Passed :white_check_mark:' }}" + "text": "Flaky Test Detector for `${{ steps.set_project_path_pretty.outputs.path }}` project - ${{ contains(join(needs.*.result, ','), 'failure') && 'Failed :x:' || contains(join(needs.*.result, ','), 'cancelled') && 'Was cancelled :warning:' || 'Passed :white_check_mark:' }}" } }, { "type": "section", "text": { "type": "mrkdwn", - "text": "Ran changed tests between `${{ inputs.baseRef }}` and `${{ needs.find-tests.outputs.git_head_short_sha }}` (`${{ env.GIT_HEAD_REF }}`)." + "text": "Ran changed tests between `${{ inputs.baseRef }}` and `${{ needs.get-tests.outputs.git_head_short_sha }}` (`${{ env.GIT_HEAD_REF }}`)." } }, { "type": "section", "text": { "type": "mrkdwn", - "text": "${{ format('<{0}/{1}/actions/runs/{2}|View Flaky Detector Details> | <{3}/compare/{4}...{5}#files_bucket|Compare Changes>{6}', github.server_url, github.repository, github.run_id, inputs.repoUrl, inputs.baseRef, needs.find-tests.outputs.git_head_sha, github.event_name == 'pull_request' && format(' | <{0}|View PR>', github.event.pull_request.html_url) || '') }}" + "text": "${{ format('<{0}/{1}/actions/runs/{2}|View Flaky Detector Details> | <{3}/compare/{4}...{5}#files_bucket|Compare Changes>{6}', github.server_url, github.repository, github.run_id, inputs.repoUrl, inputs.baseRef, needs.get-tests.outputs.git_head_sha, github.event_name == 'pull_request' && format(' | <{0}|View PR>', github.event.pull_request.html_url) || '') }}" } } ] diff --git a/.github/workflows/run-find-new-flaky-tests.yml b/.github/workflows/run-find-new-flaky-tests.yml index 238da78df2b..d1318719349 100644 --- a/.github/workflows/run-find-new-flaky-tests.yml +++ b/.github/workflows/run-find-new-flaky-tests.yml @@ -1,4 +1,4 @@ -name: Find New Flaky Tests +name: Find Flaky Tests on: workflow_dispatch: @@ -22,16 +22,16 @@ on: required: false type: string description: 'The head reference or branch to compare changes for detecting flaky tests. Default is the current branch.' + runAllTests: + required: false + type: boolean + description: 'Run all tests in the project.' + default: false runThreshold: required: false type: string description: 'The threshold for the number of times a test can fail before being considered flaky.' default: '0.8' - runWithRace: - required: false - type: boolean - description: 'Run tests with -race flag.' - default: true findByTestFilesDiff: required: false type: boolean @@ -54,7 +54,7 @@ on: jobs: trigger-flaky-test-detection: - name: Find New Flaky Tests + name: Find Flaky Tests uses: ./.github/workflows/find-new-flaky-tests.yml with: repoUrl: ${{ inputs.repoUrl }} @@ -62,7 +62,7 @@ jobs: projectPath: ${{ inputs.projectPath }} headRef: ${{ inputs.headRef }} runThreshold: ${{ inputs.runThreshold }} - runWithRace: ${{ inputs.runWithRace }} + runAllTests: ${{ inputs.runAllTests }} findByTestFilesDiff: ${{ inputs.findByTestFilesDiff }} findByAffectedPackages: ${{ inputs.findByAffectedPackages }} slackNotificationAfterTestsChannelId: ${{ inputs.slack_notification_after_tests_channel_id }} diff --git a/.github/workflows/run-nightly-flaky-test-detector.yml b/.github/workflows/run-nightly-flaky-test-detector.yml new file mode 100644 index 00000000000..615233a6106 --- /dev/null +++ b/.github/workflows/run-nightly-flaky-test-detector.yml @@ -0,0 +1,21 @@ +name: Run Nightly Flaky Test Detector + +on: + schedule: + # Run every night at 3:00 AM UTC + - cron: '0 3 * * *' + +jobs: + trigger-flaky-test-detection: + name: Find Flaky Tests + uses: ./.github/workflows/find-new-flaky-tests.yml + with: + repoUrl: 'https://github.com/smartcontractkit/chainlink' + baseRef: 'origin/develop' + projectPath: '.' + runThreshold: '1' + runAllTests: 'true' + extraArgs: '{ "skipped_tests": "TestChainComponents", "test_repeat_count": "5", "all_tests_runner": "ubuntu22.04-32cores-128GB", "all_tests_runner_count": "3", "min_pass_ratio": "0" }' + secrets: + SLACK_BOT_TOKEN: ${{ secrets.QA_SLACK_API_KEY }} + \ No newline at end of file From 99ab8b436b1c0c39861b3954967984f5dcbd258b Mon Sep 17 00:00:00 2001 From: Vyzaldy Sanchez Date: Thu, 14 Nov 2024 12:34:25 -0400 Subject: [PATCH 03/13] Refactor `DeleteJob` to improve performance (#15228) * Refactors `DeleteJob` to receive the job type * Fixes CI --- core/services/job/job_orm_test.go | 24 +++---- core/services/job/kv_orm_test.go | 2 +- core/services/job/mocks/orm.go | 21 +++--- core/services/job/orm.go | 75 +++++++------------- core/services/job/orm_test.go | 1 + core/services/job/runner_integration_test.go | 6 +- core/services/job/spawner.go | 2 +- 7 files changed, 56 insertions(+), 75 deletions(-) diff --git a/core/services/job/job_orm_test.go b/core/services/job/job_orm_test.go index bc6ec0d6ea2..0aa79f2f0b2 100644 --- a/core/services/job/job_orm_test.go +++ b/core/services/job/job_orm_test.go @@ -155,7 +155,7 @@ func TestORM(t *testing.T) { require.NoError(t, err) require.Len(t, dbSpecs, 3) - err = orm.DeleteJob(testutils.Context(t), jb.ID) + err = orm.DeleteJob(testutils.Context(t), jb.ID, jb.Type) require.NoError(t, err) dbSpecs = []job.Job{} @@ -312,7 +312,7 @@ func TestORM(t *testing.T) { require.Equal(t, bhsJob.BlockhashStoreSpec.RunTimeout, savedJob.BlockhashStoreSpec.RunTimeout) require.Equal(t, bhsJob.BlockhashStoreSpec.EVMChainID, savedJob.BlockhashStoreSpec.EVMChainID) require.Equal(t, bhsJob.BlockhashStoreSpec.FromAddresses, savedJob.BlockhashStoreSpec.FromAddresses) - err = orm.DeleteJob(ctx, bhsJob.ID) + err = orm.DeleteJob(ctx, bhsJob.ID, bhsJob.Type) require.NoError(t, err) _, err = orm.FindJob(testutils.Context(t), bhsJob.ID) require.Error(t, err) @@ -344,7 +344,7 @@ func TestORM(t *testing.T) { require.Equal(t, bhsJob.BlockHeaderFeederSpec.FromAddresses, savedJob.BlockHeaderFeederSpec.FromAddresses) require.Equal(t, bhsJob.BlockHeaderFeederSpec.GetBlockhashesBatchSize, savedJob.BlockHeaderFeederSpec.GetBlockhashesBatchSize) require.Equal(t, bhsJob.BlockHeaderFeederSpec.StoreBlockhashesBatchSize, savedJob.BlockHeaderFeederSpec.StoreBlockhashesBatchSize) - err = orm.DeleteJob(ctx, bhsJob.ID) + err = orm.DeleteJob(ctx, bhsJob.ID, bhsJob.Type) require.NoError(t, err) _, err = orm.FindJob(testutils.Context(t), bhsJob.ID) require.Error(t, err) @@ -387,7 +387,7 @@ func TestORM_DeleteJob_DeletesAssociatedRecords(t *testing.T) { cltest.AssertCount(t, db, "ocr_oracle_specs", 1) cltest.AssertCount(t, db, "pipeline_specs", 1) - err = jobORM.DeleteJob(ctx, jb.ID) + err = jobORM.DeleteJob(ctx, jb.ID, jb.Type) require.NoError(t, err) cltest.AssertCount(t, db, "ocr_oracle_specs", 0) cltest.AssertCount(t, db, "pipeline_specs", 0) @@ -403,7 +403,7 @@ func TestORM_DeleteJob_DeletesAssociatedRecords(t *testing.T) { cltest.AssertCount(t, db, "keeper_registries", 1) cltest.AssertCount(t, db, "upkeep_registrations", 1) - err := jobORM.DeleteJob(ctx, keeperJob.ID) + err := jobORM.DeleteJob(ctx, keeperJob.ID, keeperJob.Type) require.NoError(t, err) cltest.AssertCount(t, db, "keeper_specs", 0) cltest.AssertCount(t, db, "keeper_registries", 0) @@ -423,7 +423,7 @@ func TestORM_DeleteJob_DeletesAssociatedRecords(t *testing.T) { require.NoError(t, err) cltest.AssertCount(t, db, "vrf_specs", 1) cltest.AssertCount(t, db, "jobs", 1) - err = jobORM.DeleteJob(ctx, jb.ID) + err = jobORM.DeleteJob(ctx, jb.ID, jb.Type) require.NoError(t, err) cltest.AssertCount(t, db, "vrf_specs", 0) cltest.AssertCount(t, db, "jobs", 0) @@ -436,7 +436,7 @@ func TestORM_DeleteJob_DeletesAssociatedRecords(t *testing.T) { _, err := db.Exec(`INSERT INTO external_initiator_webhook_specs (external_initiator_id, webhook_spec_id, spec) VALUES ($1,$2,$3)`, ei.ID, webhookSpec.ID, `{"ei": "foo", "name": "webhookSpecTwoEIs"}`) require.NoError(t, err) - err = jobORM.DeleteJob(ctx, jb.ID) + err = jobORM.DeleteJob(ctx, jb.ID, jb.Type) require.NoError(t, err) cltest.AssertCount(t, db, "webhook_specs", 0) cltest.AssertCount(t, db, "external_initiator_webhook_specs", 0) @@ -523,7 +523,7 @@ func TestORM_CreateJob_VRFV2(t *testing.T) { var vrfOwnerAddress evmtypes.EIP55Address require.NoError(t, db.Get(&vrfOwnerAddress, `SELECT vrf_owner_address FROM vrf_specs LIMIT 1`)) require.Equal(t, "0x32891BD79647DC9136Fc0a59AAB48c7825eb624c", vrfOwnerAddress.Address().String()) - require.NoError(t, jobORM.DeleteJob(ctx, jb.ID)) + require.NoError(t, jobORM.DeleteJob(ctx, jb.ID, jb.Type)) cltest.AssertCount(t, db, "vrf_specs", 0) cltest.AssertCount(t, db, "jobs", 0) @@ -536,7 +536,7 @@ func TestORM_CreateJob_VRFV2(t *testing.T) { require.Equal(t, int64(0), requestedConfsDelay) require.NoError(t, db.Get(&requestTimeout, `SELECT request_timeout FROM vrf_specs LIMIT 1`)) require.Equal(t, 1*time.Hour, requestTimeout) - require.NoError(t, jobORM.DeleteJob(ctx, jb.ID)) + require.NoError(t, jobORM.DeleteJob(ctx, jb.ID, jb.Type)) cltest.AssertCount(t, db, "vrf_specs", 0) cltest.AssertCount(t, db, "jobs", 0) } @@ -607,7 +607,7 @@ func TestORM_CreateJob_VRFV2Plus(t *testing.T) { require.ElementsMatch(t, fromAddresses, actual) var vrfOwnerAddress evmtypes.EIP55Address require.Error(t, db.Get(&vrfOwnerAddress, `SELECT vrf_owner_address FROM vrf_specs LIMIT 1`)) - require.NoError(t, jobORM.DeleteJob(ctx, jb.ID)) + require.NoError(t, jobORM.DeleteJob(ctx, jb.ID, jb.Type)) cltest.AssertCount(t, db, "vrf_specs", 0) cltest.AssertCount(t, db, "jobs", 0) @@ -624,7 +624,7 @@ func TestORM_CreateJob_VRFV2Plus(t *testing.T) { require.Equal(t, int64(0), requestedConfsDelay) require.NoError(t, db.Get(&requestTimeout, `SELECT request_timeout FROM vrf_specs LIMIT 1`)) require.Equal(t, 1*time.Hour, requestTimeout) - require.NoError(t, jobORM.DeleteJob(ctx, jb.ID)) + require.NoError(t, jobORM.DeleteJob(ctx, jb.ID, jb.Type)) cltest.AssertCount(t, db, "vrf_specs", 0) cltest.AssertCount(t, db, "jobs", 0) } @@ -652,7 +652,7 @@ func TestORM_CreateJob_OCRBootstrap(t *testing.T) { require.NoError(t, db.Get(&relay, `SELECT relay FROM bootstrap_specs LIMIT 1`)) require.Equal(t, "evm", relay) - require.NoError(t, jobORM.DeleteJob(ctx, jb.ID)) + require.NoError(t, jobORM.DeleteJob(ctx, jb.ID, jb.Type)) cltest.AssertCount(t, db, "bootstrap_specs", 0) cltest.AssertCount(t, db, "jobs", 0) } diff --git a/core/services/job/kv_orm_test.go b/core/services/job/kv_orm_test.go index 0f229f09d88..f943a4bb6b4 100644 --- a/core/services/job/kv_orm_test.go +++ b/core/services/job/kv_orm_test.go @@ -73,5 +73,5 @@ func TestJobKVStore(t *testing.T) { require.NoError(t, err) require.Equal(t, td2, fetchedBytes) - require.NoError(t, jobORM.DeleteJob(ctx, jobID)) + require.NoError(t, jobORM.DeleteJob(ctx, jobID, jb.Type)) } diff --git a/core/services/job/mocks/orm.go b/core/services/job/mocks/orm.go index 38bf08ab3fd..89426b55a21 100644 --- a/core/services/job/mocks/orm.go +++ b/core/services/job/mocks/orm.go @@ -277,17 +277,17 @@ func (_c *ORM_DataSource_Call) RunAndReturn(run func() sqlutil.DataSource) *ORM_ return _c } -// DeleteJob provides a mock function with given fields: ctx, id -func (_m *ORM) DeleteJob(ctx context.Context, id int32) error { - ret := _m.Called(ctx, id) +// DeleteJob provides a mock function with given fields: ctx, id, jobType +func (_m *ORM) DeleteJob(ctx context.Context, id int32, jobType job.Type) error { + ret := _m.Called(ctx, id, jobType) if len(ret) == 0 { panic("no return value specified for DeleteJob") } var r0 error - if rf, ok := ret.Get(0).(func(context.Context, int32) error); ok { - r0 = rf(ctx, id) + if rf, ok := ret.Get(0).(func(context.Context, int32, job.Type) error); ok { + r0 = rf(ctx, id, jobType) } else { r0 = ret.Error(0) } @@ -303,13 +303,14 @@ type ORM_DeleteJob_Call struct { // DeleteJob is a helper method to define mock.On call // - ctx context.Context // - id int32 -func (_e *ORM_Expecter) DeleteJob(ctx interface{}, id interface{}) *ORM_DeleteJob_Call { - return &ORM_DeleteJob_Call{Call: _e.mock.On("DeleteJob", ctx, id)} +// - jobType job.Type +func (_e *ORM_Expecter) DeleteJob(ctx interface{}, id interface{}, jobType interface{}) *ORM_DeleteJob_Call { + return &ORM_DeleteJob_Call{Call: _e.mock.On("DeleteJob", ctx, id, jobType)} } -func (_c *ORM_DeleteJob_Call) Run(run func(ctx context.Context, id int32)) *ORM_DeleteJob_Call { +func (_c *ORM_DeleteJob_Call) Run(run func(ctx context.Context, id int32, jobType job.Type)) *ORM_DeleteJob_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(context.Context), args[1].(int32)) + run(args[0].(context.Context), args[1].(int32), args[2].(job.Type)) }) return _c } @@ -319,7 +320,7 @@ func (_c *ORM_DeleteJob_Call) Return(_a0 error) *ORM_DeleteJob_Call { return _c } -func (_c *ORM_DeleteJob_Call) RunAndReturn(run func(context.Context, int32) error) *ORM_DeleteJob_Call { +func (_c *ORM_DeleteJob_Call) RunAndReturn(run func(context.Context, int32, job.Type) error) *ORM_DeleteJob_Call { _c.Call.Return(run) return _c } diff --git a/core/services/job/orm.go b/core/services/job/orm.go index a86da5f7111..1e1d8a98700 100644 --- a/core/services/job/orm.go +++ b/core/services/job/orm.go @@ -52,7 +52,7 @@ type ORM interface { FindJobIDByAddress(ctx context.Context, address evmtypes.EIP55Address, evmChainID *big.Big) (int32, error) FindOCR2JobIDByAddress(ctx context.Context, contractID string, feedID *common.Hash) (int32, error) FindJobIDsWithBridge(ctx context.Context, name string) ([]int32, error) - DeleteJob(ctx context.Context, id int32) error + DeleteJob(ctx context.Context, id int32, jobType Type) error RecordError(ctx context.Context, jobID int32, description string) error // TryRecordError is a helper which calls RecordError and logs the returned error if present. TryRecordError(ctx context.Context, jobID int32, description string) @@ -709,14 +709,35 @@ func (o *orm) InsertJob(ctx context.Context, job *Job) error { } // DeleteJob removes a job -func (o *orm) DeleteJob(ctx context.Context, id int32) error { +func (o *orm) DeleteJob(ctx context.Context, id int32, jobType Type) error { o.lggr.Debugw("Deleting job", "jobID", id) + queries := map[Type]string{ + DirectRequest: `DELETE FROM direct_request_specs WHERE id IN (SELECT direct_request_spec_id FROM deleted_jobs)`, + FluxMonitor: `DELETE FROM flux_monitor_specs WHERE id IN (SELECT flux_monitor_spec_id FROM deleted_jobs)`, + OffchainReporting: `DELETE FROM ocr_oracle_specs WHERE id IN (SELECT ocr_oracle_spec_id FROM deleted_jobs)`, + OffchainReporting2: `DELETE FROM ocr2_oracle_specs WHERE id IN (SELECT ocr2_oracle_spec_id FROM deleted_jobs)`, + Keeper: `DELETE FROM keeper_specs WHERE id IN (SELECT keeper_spec_id FROM deleted_jobs)`, + Cron: `DELETE FROM cron_specs WHERE id IN (SELECT cron_spec_id FROM deleted_jobs)`, + VRF: `DELETE FROM vrf_specs WHERE id IN (SELECT vrf_spec_id FROM deleted_jobs)`, + Webhook: `DELETE FROM webhook_specs WHERE id IN (SELECT webhook_spec_id FROM deleted_jobs)`, + BlockhashStore: `DELETE FROM blockhash_store_specs WHERE id IN (SELECT blockhash_store_spec_id FROM deleted_jobs)`, + Bootstrap: `DELETE FROM bootstrap_specs WHERE id IN (SELECT bootstrap_spec_id FROM deleted_jobs)`, + BlockHeaderFeeder: `DELETE FROM block_header_feeder_specs WHERE id IN (SELECT block_header_feeder_spec_id FROM deleted_jobs)`, + Gateway: `DELETE FROM gateway_specs WHERE id IN (SELECT gateway_spec_id FROM deleted_jobs)`, + Workflow: `DELETE FROM workflow_specs WHERE id in (SELECT workflow_spec_id FROM deleted_jobs)`, + StandardCapabilities: `DELETE FROM standardcapabilities_specs WHERE id in (SELECT standard_capabilities_spec_id FROM deleted_jobs)`, + CCIP: `DELETE FROM ccip_specs WHERE id in (SELECT ccip_spec_id FROM deleted_jobs)`, + } + q, ok := queries[jobType] + if !ok { + return errors.Errorf("job type %s not supported", jobType) + } // Added a 1-minute timeout to this query since this can take a long time as data increases. // This was added specifically due to an issue with a database that had a million of pipeline_runs and pipeline_task_runs // and this query was taking ~40secs. ctx, cancel := context.WithTimeout(sqlutil.WithoutDefaultTimeout(ctx), time.Minute) defer cancel() - query := ` + query := fmt.Sprintf(` WITH deleted_jobs AS ( DELETE FROM jobs WHERE id = $1 RETURNING id, @@ -736,55 +757,13 @@ func (o *orm) DeleteJob(ctx context.Context, id int32) error { standard_capabilities_spec_id, ccip_spec_id ), - deleted_oracle_specs AS ( - DELETE FROM ocr_oracle_specs WHERE id IN (SELECT ocr_oracle_spec_id FROM deleted_jobs) - ), - deleted_oracle2_specs AS ( - DELETE FROM ocr2_oracle_specs WHERE id IN (SELECT ocr2_oracle_spec_id FROM deleted_jobs) - ), - deleted_keeper_specs AS ( - DELETE FROM keeper_specs WHERE id IN (SELECT keeper_spec_id FROM deleted_jobs) - ), - deleted_cron_specs AS ( - DELETE FROM cron_specs WHERE id IN (SELECT cron_spec_id FROM deleted_jobs) - ), - deleted_fm_specs AS ( - DELETE FROM flux_monitor_specs WHERE id IN (SELECT flux_monitor_spec_id FROM deleted_jobs) - ), - deleted_vrf_specs AS ( - DELETE FROM vrf_specs WHERE id IN (SELECT vrf_spec_id FROM deleted_jobs) - ), - deleted_webhook_specs AS ( - DELETE FROM webhook_specs WHERE id IN (SELECT webhook_spec_id FROM deleted_jobs) - ), - deleted_dr_specs AS ( - DELETE FROM direct_request_specs WHERE id IN (SELECT direct_request_spec_id FROM deleted_jobs) - ), - deleted_blockhash_store_specs AS ( - DELETE FROM blockhash_store_specs WHERE id IN (SELECT blockhash_store_spec_id FROM deleted_jobs) - ), - deleted_bootstrap_specs AS ( - DELETE FROM bootstrap_specs WHERE id IN (SELECT bootstrap_spec_id FROM deleted_jobs) - ), - deleted_block_header_feeder_specs AS ( - DELETE FROM block_header_feeder_specs WHERE id IN (SELECT block_header_feeder_spec_id FROM deleted_jobs) - ), - deleted_gateway_specs AS ( - DELETE FROM gateway_specs WHERE id IN (SELECT gateway_spec_id FROM deleted_jobs) - ), - deleted_workflow_specs AS ( - DELETE FROM workflow_specs WHERE id in (SELECT workflow_spec_id FROM deleted_jobs) - ), - deleted_standardcapabilities_specs AS ( - DELETE FROM standardcapabilities_specs WHERE id in (SELECT standard_capabilities_spec_id FROM deleted_jobs) - ), - deleted_ccip_specs AS ( - DELETE FROM ccip_specs WHERE id in (SELECT ccip_spec_id FROM deleted_jobs) + deleted_specific_specs AS ( + %s ), deleted_job_pipeline_specs AS ( DELETE FROM job_pipeline_specs WHERE job_id IN (SELECT id FROM deleted_jobs) RETURNING pipeline_spec_id ) - DELETE FROM pipeline_specs WHERE id IN (SELECT pipeline_spec_id FROM deleted_job_pipeline_specs)` + DELETE FROM pipeline_specs WHERE id IN (SELECT pipeline_spec_id FROM deleted_job_pipeline_specs)`, q) res, err := o.ds.ExecContext(ctx, query, id) if err != nil { return errors.Wrap(err, "DeleteJob failed to delete job") diff --git a/core/services/job/orm_test.go b/core/services/job/orm_test.go index 11f3e94f2d4..6bfe141ad59 100644 --- a/core/services/job/orm_test.go +++ b/core/services/job/orm_test.go @@ -7,6 +7,7 @@ import ( "github.com/stretchr/testify/require" "github.com/smartcontractkit/chainlink-common/pkg/sqlutil" + "github.com/smartcontractkit/chainlink/v2/core/bridges" "github.com/smartcontractkit/chainlink/v2/core/internal/testutils/configtest" "github.com/smartcontractkit/chainlink/v2/core/internal/testutils/evmtest" diff --git a/core/services/job/runner_integration_test.go b/core/services/job/runner_integration_test.go index af3a1f5698d..7856c4f151f 100644 --- a/core/services/job/runner_integration_test.go +++ b/core/services/job/runner_integration_test.go @@ -185,7 +185,7 @@ func TestRunner(t *testing.T) { require.Equal(t, 1, len(jids)) // But if we delete the job, then we can. - require.NoError(t, jobORM.DeleteJob(ctx, jb.ID)) + require.NoError(t, jobORM.DeleteJob(ctx, jb.ID, jb.Type)) jids, err = jobORM.FindJobIDsWithBridge(ctx, bridge.Name.String()) require.NoError(t, err) require.Equal(t, 0, len(jids)) @@ -660,7 +660,7 @@ answer1 [type=median index=0]; } // Ensure we can delete an errored - err = jobORM.DeleteJob(ctx, jb.ID) + err = jobORM.DeleteJob(ctx, jb.ID, jb.Type) require.NoError(t, err) se = []job.SpecError{} err = db.Select(&se, `SELECT * FROM job_spec_errors`) @@ -745,7 +745,7 @@ answer1 [type=median index=0]; assert.Equal(t, "4242", results.Values[0].(decimal.Decimal).String()) // Delete the job - err = jobORM.DeleteJob(ctx, jb.ID) + err = jobORM.DeleteJob(ctx, jb.ID, jb.Type) require.NoError(t, err) // Create another run, it should fail diff --git a/core/services/job/spawner.go b/core/services/job/spawner.go index 16889cbe10b..cc45320de10 100644 --- a/core/services/job/spawner.go +++ b/core/services/job/spawner.go @@ -315,7 +315,7 @@ func (js *spawner) DeleteJob(ctx context.Context, ds sqlutil.DataSource, jobID i lggr.Debugw("Callback: BeforeDeleteJob done") err := sqlutil.Transact(ctx, js.orm.WithDataSource, ds, nil, func(tx ORM) error { - err := tx.DeleteJob(ctx, jobID) + err := tx.DeleteJob(ctx, jobID, aj.spec.Type) if err != nil { js.lggr.Errorw("Error deleting job", "jobID", jobID, "err", err) return err From b805b82557de20a1c360b704d6c056799f4b38dd Mon Sep 17 00:00:00 2001 From: Patrick Date: Thu, 14 Nov 2024 12:21:56 -0500 Subject: [PATCH 04/13] adding custom message w/ workflow execution ID when duration exceeds 15 minutes (#15241) * adding custom message w/ workflow execution ID when duration exceeds 15 minutes * adding logging for internal evs * fixing Info --> Infof --- core/services/workflows/engine.go | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/core/services/workflows/engine.go b/core/services/workflows/engine.go index e20af85540d..69c36c1c174 100644 --- a/core/services/workflows/engine.go +++ b/core/services/workflows/engine.go @@ -27,6 +27,8 @@ import ( "github.com/smartcontractkit/chainlink/v2/core/services/workflows/store" ) +const fifteenMinutesMs = 15 * 60 * 1000 + type stepRequest struct { stepRef string state store.WorkflowExecution @@ -622,7 +624,7 @@ func (e *Engine) handleStepUpdate(ctx context.Context, stepUpdate store.Workflow // the async nature of the workflow engine would provide no guarantees. } logCustMsg(ctx, cma, "execution status: "+status, l) - return e.finishExecution(ctx, state.ExecutionID, status) + return e.finishExecution(ctx, cma, state.ExecutionID, status) } // Finally, since the workflow hasn't timed out or completed, let's @@ -669,9 +671,12 @@ func (e *Engine) queueIfReady(state store.WorkflowExecution, step *step) { } } -func (e *Engine) finishExecution(ctx context.Context, executionID string, status string) error { - e.logger.With(platform.KeyWorkflowExecutionID, executionID, "status", status).Info("finishing execution") +func (e *Engine) finishExecution(ctx context.Context, cma custmsg.MessageEmitter, executionID string, status string) error { + l := e.logger.With(platform.KeyWorkflowExecutionID, executionID, "status", status) metrics := e.metrics.with("status", status) + + l.Info("finishing execution") + err := e.executionStates.UpdateStatus(ctx, executionID, status) if err != nil { return err @@ -687,6 +692,13 @@ func (e *Engine) finishExecution(ctx context.Context, executionID string, status e.stepUpdatesChMap.remove(executionID) metrics.updateTotalWorkflowsGauge(ctx, e.stepUpdatesChMap.len()) metrics.updateWorkflowExecutionLatencyGauge(ctx, executionDuration) + + if executionDuration > fifteenMinutesMs { + logCustMsg(ctx, cma, fmt.Sprintf("execution duration exceeded 15 minutes: %d", executionDuration), l) + l.Warnf("execution duration exceeded 15 minutes: %d", executionDuration) + } + logCustMsg(ctx, cma, fmt.Sprintf("execution duration: %d", executionDuration), l) + l.Infof("execution duration: %d", executionDuration) e.onExecutionFinished(executionID) return nil } From c015da815f61533f63a0e7138d444293be9d4e9d Mon Sep 17 00:00:00 2001 From: Matthew Pendrey Date: Thu, 14 Nov 2024 21:26:59 +0000 Subject: [PATCH 05/13] Action Remote Shims Register/Unregister (#15232) * execute capability shims * shims * common bump * review comment * common bump * step ref * build --- core/capabilities/launcher.go | 58 ++- core/capabilities/launcher_test.go | 7 +- .../remote/{target => executable}/client.go | 147 ++++--- .../remote/executable/client_test.go | 383 ++++++++++++++++++ .../{target => executable}/endtoend_test.go | 263 +++++++++--- .../request/client_request.go | 120 ++++-- .../request/client_request_test.go | 189 ++++++++- .../request/server_request.go | 104 +++-- .../request/server_request_test.go | 155 +++++-- .../remote/{target => executable}/server.go | 40 +- .../{target => executable}/server_test.go | 166 +++++++- .../capabilities/remote/target/client_test.go | 316 --------------- core/capabilities/remote/types/types.go | 10 +- .../local_target_capability_test.go | 8 +- .../capabilities/transmission/transmission.go | 25 +- .../transmission/transmission_test.go | 14 +- core/scripts/go.mod | 2 +- core/scripts/go.sum | 4 +- core/services/workflows/engine.go | 4 +- deployment/go.mod | 2 +- deployment/go.sum | 4 +- go.mod | 2 +- go.sum | 4 +- integration-tests/go.mod | 2 +- integration-tests/go.sum | 4 +- integration-tests/load/go.mod | 2 +- integration-tests/load/go.sum | 4 +- 27 files changed, 1438 insertions(+), 601 deletions(-) rename core/capabilities/remote/{target => executable}/client.go (51%) create mode 100644 core/capabilities/remote/executable/client_test.go rename core/capabilities/remote/{target => executable}/endtoend_test.go (50%) rename core/capabilities/remote/{target => executable}/request/client_request.go (53%) rename core/capabilities/remote/{target => executable}/request/client_request_test.go (61%) rename core/capabilities/remote/{target => executable}/request/server_request.go (64%) rename core/capabilities/remote/{target => executable}/request/server_request_test.go (55%) rename core/capabilities/remote/{target => executable}/server.go (83%) rename core/capabilities/remote/{target => executable}/server_test.go (58%) delete mode 100644 core/capabilities/remote/target/client_test.go diff --git a/core/capabilities/launcher.go b/core/capabilities/launcher.go index 8deb42fdd68..e75f2ebbc8f 100644 --- a/core/capabilities/launcher.go +++ b/core/capabilities/launcher.go @@ -19,7 +19,7 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/services" "github.com/smartcontractkit/chainlink-common/pkg/values" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote" - "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/target" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/executable" remotetypes "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" "github.com/smartcontractkit/chainlink/v2/core/capabilities/streams" "github.com/smartcontractkit/chainlink/v2/core/logger" @@ -294,12 +294,26 @@ func (w *launcher) addRemoteCapabilities(ctx context.Context, myDON registrysync return fmt.Errorf("failed to add trigger shim: %w", err) } case capabilities.CapabilityTypeAction: - w.lggr.Warn("no remote client configured for capability type action, skipping configuration") + newActionFn := func(info capabilities.CapabilityInfo) (capabilityService, error) { + client := executable.NewClient( + info, + myDON.DON, + w.dispatcher, + defaultTargetRequestTimeout, + w.lggr, + ) + return client, nil + } + + err := w.addToRegistryAndSetDispatcher(ctx, capability, remoteDON, newActionFn) + if err != nil { + return fmt.Errorf("failed to add action shim: %w", err) + } case capabilities.CapabilityTypeConsensus: w.lggr.Warn("no remote client configured for capability type consensus, skipping configuration") case capabilities.CapabilityTypeTarget: newTargetFn := func(info capabilities.CapabilityInfo) (capabilityService, error) { - client := target.NewClient( + client := executable.NewClient( info, myDON.DON, w.dispatcher, @@ -419,7 +433,34 @@ func (w *launcher) exposeCapabilities(ctx context.Context, myPeerID p2ptypes.Pee // continue attempting other capabilities } case capabilities.CapabilityTypeAction: - w.lggr.Warn("no remote client configured for capability type action, skipping configuration") + newActionServer := func(cap capabilities.BaseCapability, info capabilities.CapabilityInfo) (remotetypes.ReceiverService, error) { + actionCapability, ok := (cap).(capabilities.ActionCapability) + if !ok { + return nil, errors.New("capability does not implement ActionCapability") + } + + remoteConfig := &capabilities.RemoteExecutableConfig{} + if capabilityConfig.RemoteTargetConfig != nil { + remoteConfig.RequestHashExcludedAttributes = capabilityConfig.RemoteTargetConfig.RequestHashExcludedAttributes + } + + return executable.NewServer( + capabilityConfig.RemoteExecutableConfig, + myPeerID, + actionCapability, + info, + don.DON, + idsToDONs, + w.dispatcher, + defaultTargetRequestTimeout, + w.lggr, + ), nil + } + + err = w.addReceiver(ctx, capability, don, newActionServer) + if err != nil { + return fmt.Errorf("failed to add action server-side receiver: %w", err) + } case capabilities.CapabilityTypeConsensus: w.lggr.Warn("no remote client configured for capability type consensus, skipping configuration") case capabilities.CapabilityTypeTarget: @@ -429,8 +470,13 @@ func (w *launcher) exposeCapabilities(ctx context.Context, myPeerID p2ptypes.Pee return nil, errors.New("capability does not implement TargetCapability") } - return target.NewServer( - capabilityConfig.RemoteTargetConfig, + remoteConfig := &capabilities.RemoteExecutableConfig{} + if capabilityConfig.RemoteTargetConfig != nil { + remoteConfig.RequestHashExcludedAttributes = capabilityConfig.RemoteTargetConfig.RequestHashExcludedAttributes + } + + return executable.NewServer( + remoteConfig, myPeerID, targetCapability, info, diff --git a/core/capabilities/launcher_test.go b/core/capabilities/launcher_test.go index 3ebed639cb0..013463bfdbb 100644 --- a/core/capabilities/launcher_test.go +++ b/core/capabilities/launcher_test.go @@ -199,7 +199,7 @@ func TestLauncher(t *testing.T) { ) dispatcher.On("SetReceiver", fullTriggerCapID, dID, mock.AnythingOfType("*remote.triggerPublisher")).Return(nil) - dispatcher.On("SetReceiver", fullTargetID, dID, mock.AnythingOfType("*target.server")).Return(nil) + dispatcher.On("SetReceiver", fullTargetID, dID, mock.AnythingOfType("*executable.server")).Return(nil) err = launcher.Launch(ctx, state) require.NoError(t, err) @@ -603,7 +603,8 @@ func TestLauncher_RemoteTriggerModeAggregatorShim(t *testing.T) { ) dispatcher.On("SetReceiver", fullTriggerCapID, capDonID, mock.AnythingOfType("*remote.triggerSubscriber")).Return(nil) - dispatcher.On("SetReceiver", fullTargetID, capDonID, mock.AnythingOfType("*target.client")).Return(nil) + dispatcher.On("SetReceiver", fullTargetID, capDonID, mock.AnythingOfType("*executable.client")).Return(nil) + dispatcher.On("Ready").Return(nil).Maybe() awaitRegistrationMessageCh := make(chan struct{}) dispatcher.On("Send", mock.Anything, mock.Anything).Return(nil).Run(func(args mock.Arguments) { select { @@ -919,7 +920,7 @@ func TestLauncher_WiresUpClientsForPublicWorkflowDON(t *testing.T) { ) dispatcher.On("SetReceiver", fullTriggerCapID, capDonID, mock.AnythingOfType("*remote.triggerSubscriber")).Return(nil) - dispatcher.On("SetReceiver", fullTargetID, capDonID, mock.AnythingOfType("*target.client")).Return(nil) + dispatcher.On("SetReceiver", fullTargetID, capDonID, mock.AnythingOfType("*executable.client")).Return(nil) err = launcher.Launch(ctx, state) require.NoError(t, err) diff --git a/core/capabilities/remote/target/client.go b/core/capabilities/remote/executable/client.go similarity index 51% rename from core/capabilities/remote/target/client.go rename to core/capabilities/remote/executable/client.go index 8249c40bb01..08c773cdb86 100644 --- a/core/capabilities/remote/target/client.go +++ b/core/capabilities/remote/executable/client.go @@ -1,4 +1,4 @@ -package target +package executable import ( "context" @@ -8,15 +8,15 @@ import ( "time" commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" + "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" "github.com/smartcontractkit/chainlink-common/pkg/services" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote" - "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/target/request" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/executable/request" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" - "github.com/smartcontractkit/chainlink/v2/core/capabilities/validation" "github.com/smartcontractkit/chainlink/v2/core/logger" ) -// client is a shim for remote target capabilities. +// client is a shim for remote executable capabilities. // It translates between capability API calls and network messages. // Its responsibilities are: // 1. Transmit capability requests to remote nodes according to a transmission schedule @@ -31,25 +31,25 @@ type client struct { dispatcher types.Dispatcher requestTimeout time.Duration - messageIDToCallerRequest map[string]*request.ClientRequest + requestIDToCallerRequest map[string]*request.ClientRequest mutex sync.Mutex stopCh services.StopChan wg sync.WaitGroup } -var _ commoncap.TargetCapability = &client{} +var _ commoncap.ExecutableCapability = &client{} var _ types.Receiver = &client{} var _ services.Service = &client{} func NewClient(remoteCapabilityInfo commoncap.CapabilityInfo, localDonInfo commoncap.DON, dispatcher types.Dispatcher, requestTimeout time.Duration, lggr logger.Logger) *client { return &client{ - lggr: lggr.Named("TargetClient"), + lggr: lggr.Named("ExecutableCapabilityClient"), remoteCapabilityInfo: remoteCapabilityInfo, localDONInfo: localDonInfo, dispatcher: dispatcher, requestTimeout: requestTimeout, - messageIDToCallerRequest: make(map[string]*request.ClientRequest), + requestIDToCallerRequest: make(map[string]*request.ClientRequest), stopCh: make(services.StopChan), } } @@ -61,7 +61,13 @@ func (c *client) Start(ctx context.Context) error { defer c.wg.Done() c.checkForExpiredRequests() }() - c.lggr.Info("TargetClient started") + c.wg.Add(1) + go func() { + defer c.wg.Done() + c.checkDispatcherReady() + }() + + c.lggr.Info("ExecutableCapability Client started") return nil }) } @@ -71,11 +77,26 @@ func (c *client) Close() error { close(c.stopCh) c.cancelAllRequests(errors.New("client closed")) c.wg.Wait() - c.lggr.Info("TargetClient closed") + c.lggr.Info("ExecutableCapability closed") return nil }) } +func (c *client) checkDispatcherReady() { + ticker := time.NewTicker(1 * time.Second) + defer ticker.Stop() + for { + select { + case <-c.stopCh: + return + case <-ticker.C: + if err := c.dispatcher.Ready(); err != nil { + c.cancelAllRequests(fmt.Errorf("dispatcher not ready: %w", err)) + } + } + } +} + func (c *client) checkForExpiredRequests() { ticker := time.NewTicker(c.requestTimeout) defer ticker.Stop() @@ -93,10 +114,15 @@ func (c *client) expireRequests() { c.mutex.Lock() defer c.mutex.Unlock() - for messageID, req := range c.messageIDToCallerRequest { + for messageID, req := range c.requestIDToCallerRequest { if req.Expired() { req.Cancel(errors.New("request expired")) - delete(c.messageIDToCallerRequest, messageID) + delete(c.requestIDToCallerRequest, messageID) + } + + if c.dispatcher.Ready() != nil { + c.cancelAllRequests(errors.New("dispatcher not ready")) + return } } } @@ -104,7 +130,7 @@ func (c *client) expireRequests() { func (c *client) cancelAllRequests(err error) { c.mutex.Lock() defer c.mutex.Unlock() - for _, req := range c.messageIDToCallerRequest { + for _, req := range c.requestIDToCallerRequest { req.Cancel(err) } } @@ -113,49 +139,80 @@ func (c *client) Info(ctx context.Context) (commoncap.CapabilityInfo, error) { return c.remoteCapabilityInfo, nil } -func (c *client) RegisterToWorkflow(ctx context.Context, request commoncap.RegisterToWorkflowRequest) error { - // do nothing - return nil -} +func (c *client) RegisterToWorkflow(ctx context.Context, registerRequest commoncap.RegisterToWorkflowRequest) error { + req, err := request.NewClientRegisterToWorkflowRequest(ctx, c.lggr, registerRequest, c.remoteCapabilityInfo, c.localDONInfo, c.dispatcher, + c.requestTimeout) + + if err != nil { + return fmt.Errorf("failed to create client request: %w", err) + } + + if err = c.sendRequest(req); err != nil { + return fmt.Errorf("failed to send request: %w", err) + } -func (c *client) UnregisterFromWorkflow(ctx context.Context, request commoncap.UnregisterFromWorkflowRequest) error { - // do nothing + resp := <-req.ResponseChan() + if resp.Err != nil { + return fmt.Errorf("error executing request: %w", resp.Err) + } return nil } -func (c *client) Execute(ctx context.Context, capReq commoncap.CapabilityRequest) (commoncap.CapabilityResponse, error) { - req, err := c.executeRequest(ctx, capReq) +func (c *client) UnregisterFromWorkflow(ctx context.Context, unregisterRequest commoncap.UnregisterFromWorkflowRequest) error { + req, err := request.NewClientUnregisterFromWorkflowRequest(ctx, c.lggr, unregisterRequest, c.remoteCapabilityInfo, + c.localDONInfo, c.dispatcher, c.requestTimeout) + if err != nil { - return commoncap.CapabilityResponse{}, fmt.Errorf("failed to execute request: %w", err) + return fmt.Errorf("failed to create client request: %w", err) + } + + if err = c.sendRequest(req); err != nil { + return fmt.Errorf("failed to send request: %w", err) } resp := <-req.ResponseChan() - return resp.CapabilityResponse, resp.Err + if resp.Err != nil { + return fmt.Errorf("error executing request: %w", resp.Err) + } + return nil } -func (c *client) executeRequest(ctx context.Context, capReq commoncap.CapabilityRequest) (*request.ClientRequest, error) { - c.mutex.Lock() - defer c.mutex.Unlock() - - messageID, err := GetMessageIDForRequest(capReq) +func (c *client) Execute(ctx context.Context, capReq commoncap.CapabilityRequest) (commoncap.CapabilityResponse, error) { + req, err := request.NewClientExecuteRequest(ctx, c.lggr, capReq, c.remoteCapabilityInfo, c.localDONInfo, c.dispatcher, + c.requestTimeout) if err != nil { - return nil, fmt.Errorf("failed to get message ID for request: %w", err) + return commoncap.CapabilityResponse{}, fmt.Errorf("failed to create client request: %w", err) } - c.lggr.Debugw("executing remote target", "messageID", messageID) + if err = c.sendRequest(req); err != nil { + return commoncap.CapabilityResponse{}, fmt.Errorf("failed to send request: %w", err) + } - if _, ok := c.messageIDToCallerRequest[messageID]; ok { - return nil, fmt.Errorf("request for message ID %s already exists", messageID) + resp := <-req.ResponseChan() + if resp.Err != nil { + return commoncap.CapabilityResponse{}, fmt.Errorf("error executing request: %w", resp.Err) } - req, err := request.NewClientRequest(ctx, c.lggr, capReq, messageID, c.remoteCapabilityInfo, c.localDONInfo, c.dispatcher, - c.requestTimeout) + capabilityResponse, err := pb.UnmarshalCapabilityResponse(resp.Result) if err != nil { - return nil, fmt.Errorf("failed to create client request: %w", err) + return commoncap.CapabilityResponse{}, fmt.Errorf("failed to unmarshal capability response: %w", err) + } + + return capabilityResponse, nil +} + +func (c *client) sendRequest(req *request.ClientRequest) error { + c.mutex.Lock() + defer c.mutex.Unlock() + + c.lggr.Debugw("executing remote execute capability", "requestID", req.ID()) + + if _, ok := c.requestIDToCallerRequest[req.ID()]; ok { + return fmt.Errorf("request for ID %s already exists", req.ID()) } - c.messageIDToCallerRequest[messageID] = req - return req, nil + c.requestIDToCallerRequest[req.ID()] = req + return nil } func (c *client) Receive(ctx context.Context, msg *types.MessageBody) { @@ -168,9 +225,9 @@ func (c *client) Receive(ctx context.Context, msg *types.MessageBody) { return } - c.lggr.Debugw("Remote client target receiving message", "messageID", messageID) + c.lggr.Debugw("Remote client executable receiving message", "messageID", messageID) - req := c.messageIDToCallerRequest[messageID] + req := c.requestIDToCallerRequest[messageID] if req == nil { c.lggr.Warnw("received response for unknown message ID ", "messageID", messageID) return @@ -181,18 +238,6 @@ func (c *client) Receive(ctx context.Context, msg *types.MessageBody) { } } -func GetMessageIDForRequest(req commoncap.CapabilityRequest) (string, error) { - if err := validation.ValidateWorkflowOrExecutionID(req.Metadata.WorkflowID); err != nil { - return "", fmt.Errorf("workflow ID is invalid: %w", err) - } - - if err := validation.ValidateWorkflowOrExecutionID(req.Metadata.WorkflowExecutionID); err != nil { - return "", fmt.Errorf("workflow execution ID is invalid: %w", err) - } - - return req.Metadata.WorkflowID + req.Metadata.WorkflowExecutionID, nil -} - func (c *client) Ready() error { return nil } diff --git a/core/capabilities/remote/executable/client_test.go b/core/capabilities/remote/executable/client_test.go new file mode 100644 index 00000000000..5c4da350b9e --- /dev/null +++ b/core/capabilities/remote/executable/client_test.go @@ -0,0 +1,383 @@ +package executable_test + +import ( + "context" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" + "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" + "github.com/smartcontractkit/chainlink-common/pkg/services/servicetest" + "github.com/smartcontractkit/chainlink-common/pkg/values" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/executable" + remotetypes "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/transmission" + "github.com/smartcontractkit/chainlink/v2/core/internal/testutils" + "github.com/smartcontractkit/chainlink/v2/core/logger" + p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" +) + +const ( + stepReferenceID1 = "step1" + workflowID1 = "15c631d295ef5e32deb99a10ee6804bc4af13855687559d7ff6552ac6dbb2ce0" + workflowExecutionID1 = "95ef5e32deb99a10ee6804bc4af13855687559d7ff6552ac6dbb2ce0abbadeed" + workflowOwnerID = "0xAA" +) + +func Test_Client_DonTopologies(t *testing.T) { + ctx := testutils.Context(t) + + transmissionSchedule, err := values.NewMap(map[string]any{ + "schedule": transmission.Schedule_OneAtATime, + "deltaStage": "10ms", + }) + require.NoError(t, err) + + responseTest := func(t *testing.T, response commoncap.CapabilityResponse, responseError error) { + require.NoError(t, responseError) + mp, err := response.Value.Unwrap() + require.NoError(t, err) + assert.Equal(t, "aValue1", mp.(map[string]any)["response"].(string)) + } + + capability := &TestCapability{} + + responseTimeOut := 10 * time.Minute + + var methods []func(caller commoncap.ExecutableCapability) + + methods = append(methods, func(caller commoncap.ExecutableCapability) { + executeInputs, err := values.NewMap(map[string]any{"executeValue1": "aValue1"}) + require.NoError(t, err) + executeMethod(ctx, caller, transmissionSchedule, executeInputs, responseTest, t) + }) + + methods = append(methods, func(caller commoncap.ExecutableCapability) { + registerToWorkflowMethod(ctx, caller, transmissionSchedule, func(t *testing.T, responseError error) { + require.NoError(t, responseError) + }, t) + }) + + methods = append(methods, func(caller commoncap.ExecutableCapability) { + unregisterFromWorkflowMethod(ctx, caller, transmissionSchedule, func(t *testing.T, responseError error) { + require.NoError(t, responseError) + }, t) + }) + + for _, method := range methods { + testClient(t, 1, responseTimeOut, 1, 0, + capability, method) + + testClient(t, 10, responseTimeOut, 1, 0, + capability, method) + + testClient(t, 1, responseTimeOut, 10, 3, + capability, method) + + testClient(t, 10, responseTimeOut, 10, 3, + capability, method) + + testClient(t, 10, responseTimeOut, 10, 9, + capability, method) + } +} + +func Test_Client_TransmissionSchedules(t *testing.T) { + ctx := testutils.Context(t) + + responseTest := func(t *testing.T, response commoncap.CapabilityResponse, responseError error) { + require.NoError(t, responseError) + mp, err := response.Value.Unwrap() + require.NoError(t, err) + assert.Equal(t, "aValue1", mp.(map[string]any)["response"].(string)) + } + + capability := &TestCapability{} + + responseTimeOut := 10 * time.Minute + + transmissionSchedule, err := values.NewMap(map[string]any{ + "schedule": transmission.Schedule_OneAtATime, + "deltaStage": "10ms", + }) + require.NoError(t, err) + + testClient(t, 1, responseTimeOut, 1, 0, + capability, func(caller commoncap.ExecutableCapability) { + executeInputs, err2 := values.NewMap(map[string]any{"executeValue1": "aValue1"}) + require.NoError(t, err2) + executeMethod(ctx, caller, transmissionSchedule, executeInputs, responseTest, t) + }) + testClient(t, 10, responseTimeOut, 10, 3, + capability, func(caller commoncap.ExecutableCapability) { + executeInputs, err2 := values.NewMap(map[string]any{"executeValue1": "aValue1"}) + require.NoError(t, err2) + executeMethod(ctx, caller, transmissionSchedule, executeInputs, responseTest, t) + }) + + transmissionSchedule, err = values.NewMap(map[string]any{ + "schedule": transmission.Schedule_AllAtOnce, + "deltaStage": "10ms", + }) + require.NoError(t, err) + + testClient(t, 1, responseTimeOut, 1, 0, + capability, func(caller commoncap.ExecutableCapability) { + executeInputs, err := values.NewMap(map[string]any{"executeValue1": "aValue1"}) + require.NoError(t, err) + executeMethod(ctx, caller, transmissionSchedule, executeInputs, responseTest, t) + }) + testClient(t, 10, responseTimeOut, 10, 3, + capability, func(caller commoncap.ExecutableCapability) { + executeInputs, err := values.NewMap(map[string]any{"executeValue1": "aValue1"}) + require.NoError(t, err) + executeMethod(ctx, caller, transmissionSchedule, executeInputs, responseTest, t) + }) +} + +func Test_Client_TimesOutIfInsufficientCapabilityPeerResponses(t *testing.T) { + ctx := testutils.Context(t) + + responseTest := func(t *testing.T, response commoncap.CapabilityResponse, responseError error) { + assert.Error(t, responseError) + } + + capability := &TestCapability{} + + transmissionSchedule, err := values.NewMap(map[string]any{ + "schedule": transmission.Schedule_AllAtOnce, + "deltaStage": "10ms", + }) + require.NoError(t, err) + + // number of capability peers is less than F + 1 + + testClient(t, 10, 1*time.Second, 10, 11, + capability, + func(caller commoncap.ExecutableCapability) { + executeInputs, err := values.NewMap(map[string]any{"executeValue1": "aValue1"}) + require.NoError(t, err) + executeMethod(ctx, caller, transmissionSchedule, executeInputs, responseTest, t) + }) +} + +func testClient(t *testing.T, numWorkflowPeers int, workflowNodeResponseTimeout time.Duration, + numCapabilityPeers int, capabilityDonF uint8, underlying commoncap.ExecutableCapability, + method func(caller commoncap.ExecutableCapability)) { + lggr := logger.TestLogger(t) + + capabilityPeers := make([]p2ptypes.PeerID, numCapabilityPeers) + for i := 0; i < numCapabilityPeers; i++ { + capabilityPeers[i] = NewP2PPeerID(t) + } + + capDonInfo := commoncap.DON{ + ID: 1, + Members: capabilityPeers, + F: capabilityDonF, + } + + capInfo := commoncap.CapabilityInfo{ + ID: "cap_id@1.0.0", + CapabilityType: commoncap.CapabilityTypeTrigger, + Description: "Remote Executable Capability", + DON: &capDonInfo, + } + + workflowPeers := make([]p2ptypes.PeerID, numWorkflowPeers) + for i := 0; i < numWorkflowPeers; i++ { + workflowPeers[i] = NewP2PPeerID(t) + } + + workflowDonInfo := commoncap.DON{ + Members: workflowPeers, + ID: 2, + } + + broker := newTestAsyncMessageBroker(t, 100) + + receivers := make([]remotetypes.Receiver, numCapabilityPeers) + for i := 0; i < numCapabilityPeers; i++ { + capabilityDispatcher := broker.NewDispatcherForNode(capabilityPeers[i]) + receiver := newTestServer(capabilityPeers[i], capabilityDispatcher, workflowDonInfo, underlying) + broker.RegisterReceiverNode(capabilityPeers[i], receiver) + receivers[i] = receiver + } + + callers := make([]commoncap.ExecutableCapability, numWorkflowPeers) + + for i := 0; i < numWorkflowPeers; i++ { + workflowPeerDispatcher := broker.NewDispatcherForNode(workflowPeers[i]) + caller := executable.NewClient(capInfo, workflowDonInfo, workflowPeerDispatcher, workflowNodeResponseTimeout, lggr) + servicetest.Run(t, caller) + broker.RegisterReceiverNode(workflowPeers[i], caller) + callers[i] = caller + } + + servicetest.Run(t, broker) + + wg := &sync.WaitGroup{} + wg.Add(len(callers)) + + // Fire off all the requests + for _, caller := range callers { + go func(caller commoncap.ExecutableCapability) { + defer wg.Done() + method(caller) + }(caller) + } + + wg.Wait() +} + +func registerToWorkflowMethod(ctx context.Context, caller commoncap.ExecutableCapability, transmissionSchedule *values.Map, + responseTest func(t *testing.T, responseError error), t *testing.T) { + err := caller.RegisterToWorkflow(ctx, commoncap.RegisterToWorkflowRequest{ + Metadata: commoncap.RegistrationMetadata{ + WorkflowID: workflowID1, + ReferenceID: stepReferenceID1, + WorkflowOwner: workflowOwnerID, + }, + Config: transmissionSchedule, + }) + + responseTest(t, err) +} + +func unregisterFromWorkflowMethod(ctx context.Context, caller commoncap.ExecutableCapability, transmissionSchedule *values.Map, + responseTest func(t *testing.T, responseError error), t *testing.T) { + err := caller.UnregisterFromWorkflow(ctx, commoncap.UnregisterFromWorkflowRequest{ + Metadata: commoncap.RegistrationMetadata{ + WorkflowID: workflowID1, + ReferenceID: stepReferenceID1, + WorkflowOwner: workflowOwnerID, + }, + Config: transmissionSchedule, + }) + + responseTest(t, err) +} + +func executeMethod(ctx context.Context, caller commoncap.ExecutableCapability, transmissionSchedule *values.Map, + executeInputs *values.Map, responseTest func(t *testing.T, responseCh commoncap.CapabilityResponse, responseError error), t *testing.T) { + responseCh, err := caller.Execute(ctx, + commoncap.CapabilityRequest{ + Metadata: commoncap.RequestMetadata{ + WorkflowID: workflowID1, + WorkflowExecutionID: workflowExecutionID1, + WorkflowOwner: workflowOwnerID, + }, + Config: transmissionSchedule, + Inputs: executeInputs, + }) + + responseTest(t, responseCh, err) +} + +// Simple client that only responds once it has received a message from each workflow peer +type clientTestServer struct { + peerID p2ptypes.PeerID + dispatcher remotetypes.Dispatcher + workflowDonInfo commoncap.DON + messageIDToSenders map[string]map[p2ptypes.PeerID]bool + + executableCapability commoncap.ExecutableCapability + + mux sync.Mutex +} + +func newTestServer(peerID p2ptypes.PeerID, dispatcher remotetypes.Dispatcher, workflowDonInfo commoncap.DON, + executableCapability commoncap.ExecutableCapability) *clientTestServer { + return &clientTestServer{ + dispatcher: dispatcher, + workflowDonInfo: workflowDonInfo, + peerID: peerID, + messageIDToSenders: make(map[string]map[p2ptypes.PeerID]bool), + executableCapability: executableCapability, + } +} + +func (t *clientTestServer) Receive(_ context.Context, msg *remotetypes.MessageBody) { + t.mux.Lock() + defer t.mux.Unlock() + + sender := toPeerID(msg.Sender) + messageID, err := executable.GetMessageID(msg) + if err != nil { + panic(err) + } + + if t.messageIDToSenders[messageID] == nil { + t.messageIDToSenders[messageID] = make(map[p2ptypes.PeerID]bool) + } + + sendersOfMessageID := t.messageIDToSenders[messageID] + if sendersOfMessageID[sender] { + panic("received duplicate message") + } + + sendersOfMessageID[sender] = true + + if len(t.messageIDToSenders[messageID]) == len(t.workflowDonInfo.Members) { + switch msg.Method { + case remotetypes.MethodExecute: + capabilityRequest, err := pb.UnmarshalCapabilityRequest(msg.Payload) + if err != nil { + panic(err) + } + resp, responseErr := t.executableCapability.Execute(context.Background(), capabilityRequest) + payload, marshalErr := pb.MarshalCapabilityResponse(resp) + t.sendResponse(messageID, responseErr, payload, marshalErr) + + case remotetypes.MethodRegisterToWorkflow: + registerRequest, err := pb.UnmarshalRegisterToWorkflowRequest(msg.Payload) + if err != nil { + panic(err) + } + responseErr := t.executableCapability.RegisterToWorkflow(context.Background(), registerRequest) + t.sendResponse(messageID, responseErr, nil, nil) + case remotetypes.MethodUnregisterFromWorkflow: + unregisterRequest, err := pb.UnmarshalUnregisterFromWorkflowRequest(msg.Payload) + if err != nil { + panic(err) + } + responseErr := t.executableCapability.UnregisterFromWorkflow(context.Background(), unregisterRequest) + t.sendResponse(messageID, responseErr, nil, nil) + default: + panic("unknown method") + } + } +} + +func (t *clientTestServer) sendResponse(messageID string, responseErr error, + payload []byte, marshalErr error) { + for receiver := range t.messageIDToSenders[messageID] { + var responseMsg = &remotetypes.MessageBody{ + CapabilityId: "cap_id@1.0.0", + CapabilityDonId: 1, + CallerDonId: t.workflowDonInfo.ID, + Method: remotetypes.MethodExecute, + MessageId: []byte(messageID), + Sender: t.peerID[:], + Receiver: receiver[:], + } + + if responseErr != nil { + responseMsg.Error = remotetypes.Error_INTERNAL_ERROR + } else { + if marshalErr != nil { + panic(marshalErr) + } + responseMsg.Payload = payload + } + + err := t.dispatcher.Send(receiver, responseMsg) + if err != nil { + panic(err) + } + } +} diff --git a/core/capabilities/remote/target/endtoend_test.go b/core/capabilities/remote/executable/endtoend_test.go similarity index 50% rename from core/capabilities/remote/target/endtoend_test.go rename to core/capabilities/remote/executable/endtoend_test.go index 0cade4f7855..29f29ed9ee1 100644 --- a/core/capabilities/remote/target/endtoend_test.go +++ b/core/capabilities/remote/executable/endtoend_test.go @@ -1,4 +1,4 @@ -package target_test +package executable_test import ( "context" @@ -18,7 +18,7 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/services/servicetest" "github.com/smartcontractkit/chainlink-common/pkg/utils/tests" "github.com/smartcontractkit/chainlink-common/pkg/values" - "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/target" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/executable" remotetypes "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" "github.com/smartcontractkit/chainlink/v2/core/capabilities/transmission" "github.com/smartcontractkit/chainlink/v2/core/internal/testutils" @@ -26,7 +26,7 @@ import ( p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" ) -func Test_RemoteTargetCapability_InsufficientCapabilityResponses(t *testing.T) { +func Test_RemoteExecutableCapability_InsufficientCapabilityResponses(t *testing.T) { ctx := testutils.Context(t) responseTest := func(t *testing.T, responseCh commoncap.CapabilityResponse, responseError error) { @@ -41,10 +41,30 @@ func Test_RemoteTargetCapability_InsufficientCapabilityResponses(t *testing.T) { }) require.NoError(t, err) - testRemoteTarget(ctx, t, capability, 10, 9, 10*time.Millisecond, 10, 10, 10*time.Minute, transmissionSchedule, responseTest) + var methods []func(ctx context.Context, caller commoncap.ExecutableCapability) + + methods = append(methods, func(ctx context.Context, caller commoncap.ExecutableCapability) { + executeCapability(ctx, t, caller, transmissionSchedule, responseTest) + }) + + methods = append(methods, func(ctx context.Context, caller commoncap.ExecutableCapability) { + registerWorkflow(ctx, t, caller, transmissionSchedule, func(t *testing.T, responseError error) { + require.Error(t, responseError) + }) + }) + + methods = append(methods, func(ctx context.Context, caller commoncap.ExecutableCapability) { + unregisterWorkflow(ctx, t, caller, transmissionSchedule, func(t *testing.T, responseError error) { + require.Error(t, responseError) + }) + }) + + for _, method := range methods { + testRemoteExecutableCapability(ctx, t, capability, 10, 9, 10*time.Millisecond, 10, 10, 10*time.Minute, method) + } } -func Test_RemoteTargetCapability_InsufficientWorkflowRequests(t *testing.T) { +func Test_RemoteExecutableCapability_InsufficientWorkflowRequests(t *testing.T) { ctx := testutils.Context(t) responseTest := func(t *testing.T, responseCh commoncap.CapabilityResponse, responseError error) { @@ -61,10 +81,30 @@ func Test_RemoteTargetCapability_InsufficientWorkflowRequests(t *testing.T) { }) require.NoError(t, err) - testRemoteTarget(ctx, t, capability, 10, 10, 10*time.Millisecond, 10, 9, timeOut, transmissionSchedule, responseTest) + var methods []func(ctx context.Context, caller commoncap.ExecutableCapability) + + methods = append(methods, func(ctx context.Context, caller commoncap.ExecutableCapability) { + executeCapability(ctx, t, caller, transmissionSchedule, responseTest) + }) + + methods = append(methods, func(ctx context.Context, caller commoncap.ExecutableCapability) { + registerWorkflow(ctx, t, caller, transmissionSchedule, func(t *testing.T, responseError error) { + require.Error(t, responseError) + }) + }) + + methods = append(methods, func(ctx context.Context, caller commoncap.ExecutableCapability) { + unregisterWorkflow(ctx, t, caller, transmissionSchedule, func(t *testing.T, responseError error) { + require.Error(t, responseError) + }) + }) + + for _, method := range methods { + testRemoteExecutableCapability(ctx, t, capability, 10, 10, 10*time.Millisecond, 10, 9, timeOut, method) + } } -func Test_RemoteTargetCapability_TransmissionSchedules(t *testing.T) { +func Test_RemoteExecutableCapability_TransmissionSchedules(t *testing.T) { ctx := testutils.Context(t) responseTest := func(t *testing.T, response commoncap.CapabilityResponse, responseError error) { @@ -84,18 +124,24 @@ func Test_RemoteTargetCapability_TransmissionSchedules(t *testing.T) { capability := &TestCapability{} - testRemoteTarget(ctx, t, capability, 10, 9, timeOut, 10, 9, timeOut, transmissionSchedule, responseTest) + method := func(ctx context.Context, caller commoncap.ExecutableCapability) { + executeCapability(ctx, t, caller, transmissionSchedule, responseTest) + } + testRemoteExecutableCapability(ctx, t, capability, 10, 9, timeOut, 10, 9, timeOut, method) transmissionSchedule, err = values.NewMap(map[string]any{ "schedule": transmission.Schedule_AllAtOnce, "deltaStage": "10ms", }) require.NoError(t, err) + method = func(ctx context.Context, caller commoncap.ExecutableCapability) { + executeCapability(ctx, t, caller, transmissionSchedule, responseTest) + } - testRemoteTarget(ctx, t, capability, 10, 9, timeOut, 10, 9, timeOut, transmissionSchedule, responseTest) + testRemoteExecutableCapability(ctx, t, capability, 10, 9, timeOut, 10, 9, timeOut, method) } -func Test_RemoteTargetCapability_DonTopologies(t *testing.T) { +func Test_RemoteExecutionCapability_DonTopologies(t *testing.T) { ctx := testutils.Context(t) responseTest := func(t *testing.T, response commoncap.CapabilityResponse, responseError error) { @@ -115,26 +161,42 @@ func Test_RemoteTargetCapability_DonTopologies(t *testing.T) { capability := &TestCapability{} - // Test scenarios where the number of submissions is greater than or equal to F + 1 - testRemoteTarget(ctx, t, capability, 1, 0, timeOut, 1, 0, timeOut, transmissionSchedule, responseTest) - testRemoteTarget(ctx, t, capability, 4, 3, timeOut, 1, 0, timeOut, transmissionSchedule, responseTest) - testRemoteTarget(ctx, t, capability, 10, 3, timeOut, 1, 0, timeOut, transmissionSchedule, responseTest) + var methods []func(ctx context.Context, caller commoncap.ExecutableCapability) - testRemoteTarget(ctx, t, capability, 1, 0, timeOut, 1, 0, timeOut, transmissionSchedule, responseTest) - testRemoteTarget(ctx, t, capability, 1, 0, timeOut, 4, 3, timeOut, transmissionSchedule, responseTest) - testRemoteTarget(ctx, t, capability, 1, 0, timeOut, 10, 3, timeOut, transmissionSchedule, responseTest) + methods = append(methods, func(ctx context.Context, caller commoncap.ExecutableCapability) { + executeCapability(ctx, t, caller, transmissionSchedule, responseTest) + }) - testRemoteTarget(ctx, t, capability, 4, 3, timeOut, 4, 3, timeOut, transmissionSchedule, responseTest) - testRemoteTarget(ctx, t, capability, 10, 3, timeOut, 10, 3, timeOut, transmissionSchedule, responseTest) - testRemoteTarget(ctx, t, capability, 10, 9, timeOut, 10, 9, timeOut, transmissionSchedule, responseTest) -} + methods = append(methods, func(ctx context.Context, caller commoncap.ExecutableCapability) { + registerWorkflow(ctx, t, caller, transmissionSchedule, func(t *testing.T, responseError error) { + require.NoError(t, responseError) + }) + }) -func Test_RemoteTargetCapability_CapabilityError(t *testing.T) { - ctx := testutils.Context(t) + methods = append(methods, func(ctx context.Context, caller commoncap.ExecutableCapability) { + unregisterWorkflow(ctx, t, caller, transmissionSchedule, func(t *testing.T, responseError error) { + require.NoError(t, responseError) + }) + }) - responseTest := func(t *testing.T, responseCh commoncap.CapabilityResponse, responseError error) { - assert.Equal(t, "failed to execute capability: an error", responseError.Error()) + for _, method := range methods { + // Test scenarios where the number of submissions is greater than or equal to F + 1 + testRemoteExecutableCapability(ctx, t, capability, 1, 0, timeOut, 1, 0, timeOut, method) + testRemoteExecutableCapability(ctx, t, capability, 4, 3, timeOut, 1, 0, timeOut, method) + testRemoteExecutableCapability(ctx, t, capability, 10, 3, timeOut, 1, 0, timeOut, method) + + testRemoteExecutableCapability(ctx, t, capability, 1, 0, timeOut, 1, 0, timeOut, method) + testRemoteExecutableCapability(ctx, t, capability, 1, 0, timeOut, 4, 3, timeOut, method) + testRemoteExecutableCapability(ctx, t, capability, 1, 0, timeOut, 10, 3, timeOut, method) + + testRemoteExecutableCapability(ctx, t, capability, 4, 3, timeOut, 4, 3, timeOut, method) + testRemoteExecutableCapability(ctx, t, capability, 10, 3, timeOut, 10, 3, timeOut, method) + testRemoteExecutableCapability(ctx, t, capability, 10, 9, timeOut, 10, 9, timeOut, method) } +} + +func Test_RemoteExecutionCapability_CapabilityError(t *testing.T) { + ctx := testutils.Context(t) capability := &TestErrorCapability{} @@ -144,15 +206,33 @@ func Test_RemoteTargetCapability_CapabilityError(t *testing.T) { }) require.NoError(t, err) - testRemoteTarget(ctx, t, capability, 10, 9, 10*time.Minute, 10, 9, 10*time.Minute, transmissionSchedule, responseTest) -} + var methods []func(ctx context.Context, caller commoncap.ExecutableCapability) -func Test_RemoteTargetCapability_RandomCapabilityError(t *testing.T) { - ctx := testutils.Context(t) + methods = append(methods, func(ctx context.Context, caller commoncap.ExecutableCapability) { + executeCapability(ctx, t, caller, transmissionSchedule, func(t *testing.T, responseCh commoncap.CapabilityResponse, responseError error) { + assert.Equal(t, "error executing request: failed to execute capability: an error", responseError.Error()) + }) + }) - responseTest := func(t *testing.T, response commoncap.CapabilityResponse, responseError error) { - assert.Equal(t, "request expired", responseError.Error()) + methods = append(methods, func(ctx context.Context, caller commoncap.ExecutableCapability) { + registerWorkflow(ctx, t, caller, transmissionSchedule, func(t *testing.T, responseError error) { + assert.Equal(t, "error executing request: failed to register to workflow: an error", responseError.Error()) + }) + }) + + methods = append(methods, func(ctx context.Context, caller commoncap.ExecutableCapability) { + unregisterWorkflow(ctx, t, caller, transmissionSchedule, func(t *testing.T, responseError error) { + assert.Equal(t, "error executing request: failed to unregister from workflow: an error", responseError.Error()) + }) + }) + + for _, method := range methods { + testRemoteExecutableCapability(ctx, t, capability, 10, 9, 10*time.Minute, 10, 9, 10*time.Minute, method) } +} + +func Test_RemoteExecutableCapability_RandomCapabilityError(t *testing.T) { + ctx := testutils.Context(t) capability := &TestRandomErrorCapability{} @@ -162,12 +242,35 @@ func Test_RemoteTargetCapability_RandomCapabilityError(t *testing.T) { }) require.NoError(t, err) - testRemoteTarget(ctx, t, capability, 10, 9, 10*time.Millisecond, 10, 9, 10*time.Minute, transmissionSchedule, responseTest) + var methods []func(ctx context.Context, caller commoncap.ExecutableCapability) + + methods = append(methods, func(ctx context.Context, caller commoncap.ExecutableCapability) { + executeCapability(ctx, t, caller, transmissionSchedule, func(t *testing.T, responseCh commoncap.CapabilityResponse, responseError error) { + assert.Equal(t, "error executing request: request expired", responseError.Error()) + }) + }) + + methods = append(methods, func(ctx context.Context, caller commoncap.ExecutableCapability) { + registerWorkflow(ctx, t, caller, transmissionSchedule, func(t *testing.T, responseError error) { + assert.Equal(t, "error executing request: request expired", responseError.Error()) + }) + }) + + methods = append(methods, func(ctx context.Context, caller commoncap.ExecutableCapability) { + unregisterWorkflow(ctx, t, caller, transmissionSchedule, func(t *testing.T, responseError error) { + assert.Equal(t, "error executing request: request expired", responseError.Error()) + }) + }) + + for _, method := range methods { + testRemoteExecutableCapability(ctx, t, capability, 10, 9, 10*time.Millisecond, 10, 9, 10*time.Minute, + method) + } } -func testRemoteTarget(ctx context.Context, t *testing.T, underlying commoncap.TargetCapability, numWorkflowPeers int, workflowDonF uint8, workflowNodeTimeout time.Duration, - numCapabilityPeers int, capabilityDonF uint8, capabilityNodeResponseTimeout time.Duration, transmissionSchedule *values.Map, - responseTest func(t *testing.T, response commoncap.CapabilityResponse, responseError error)) { +func testRemoteExecutableCapability(ctx context.Context, t *testing.T, underlying commoncap.ExecutableCapability, numWorkflowPeers int, workflowDonF uint8, workflowNodeTimeout time.Duration, + numCapabilityPeers int, capabilityDonF uint8, capabilityNodeResponseTimeout time.Duration, + method func(ctx context.Context, caller commoncap.ExecutableCapability)) { lggr := logger.TestLogger(t) capabilityPeers := make([]p2ptypes.PeerID, numCapabilityPeers) @@ -216,17 +319,17 @@ func testRemoteTarget(ctx context.Context, t *testing.T, underlying commoncap.Ta for i := 0; i < numCapabilityPeers; i++ { capabilityPeer := capabilityPeers[i] capabilityDispatcher := broker.NewDispatcherForNode(capabilityPeer) - capabilityNode := target.NewServer(&commoncap.RemoteTargetConfig{RequestHashExcludedAttributes: []string{}}, capabilityPeer, underlying, capInfo, capDonInfo, workflowDONs, capabilityDispatcher, + capabilityNode := executable.NewServer(&commoncap.RemoteExecutableConfig{RequestHashExcludedAttributes: []string{}}, capabilityPeer, underlying, capInfo, capDonInfo, workflowDONs, capabilityDispatcher, capabilityNodeResponseTimeout, lggr) servicetest.Run(t, capabilityNode) broker.RegisterReceiverNode(capabilityPeer, capabilityNode) capabilityNodes[i] = capabilityNode } - workflowNodes := make([]commoncap.TargetCapability, numWorkflowPeers) + workflowNodes := make([]commoncap.ExecutableCapability, numWorkflowPeers) for i := 0; i < numWorkflowPeers; i++ { workflowPeerDispatcher := broker.NewDispatcherForNode(workflowPeers[i]) - workflowNode := target.NewClient(capInfo, workflowDonInfo, workflowPeerDispatcher, workflowNodeTimeout, lggr) + workflowNode := executable.NewClient(capInfo, workflowDonInfo, workflowPeerDispatcher, workflowNodeTimeout, lggr) servicetest.Run(t, workflowNode) broker.RegisterReceiverNode(workflowPeers[i], workflowNode) workflowNodes[i] = workflowNode @@ -234,31 +337,13 @@ func testRemoteTarget(ctx context.Context, t *testing.T, underlying commoncap.Ta servicetest.Run(t, broker) - executeInputs, err := values.NewMap( - map[string]any{ - "executeValue1": "aValue1", - }, - ) - - require.NoError(t, err) - wg := &sync.WaitGroup{} wg.Add(len(workflowNodes)) for _, caller := range workflowNodes { - go func(caller commoncap.TargetCapability) { + go func(caller commoncap.ExecutableCapability) { defer wg.Done() - response, err := caller.Execute(ctx, - commoncap.CapabilityRequest{ - Metadata: commoncap.RequestMetadata{ - WorkflowID: workflowID1, - WorkflowExecutionID: workflowExecutionID1, - }, - Config: transmissionSchedule, - Inputs: executeInputs, - }) - - responseTest(t, response, err) + method(ctx, caller) }(caller) } @@ -413,6 +498,14 @@ func (t TestErrorCapability) Execute(ctx context.Context, request commoncap.Capa return commoncap.CapabilityResponse{}, errors.New("an error") } +func (t TestErrorCapability) RegisterToWorkflow(ctx context.Context, request commoncap.RegisterToWorkflowRequest) error { + return errors.New("an error") +} + +func (t TestErrorCapability) UnregisterFromWorkflow(ctx context.Context, request commoncap.UnregisterFromWorkflowRequest) error { + return errors.New("an error") +} + type TestRandomErrorCapability struct { abstractTestCapability } @@ -421,6 +514,14 @@ func (t TestRandomErrorCapability) Execute(ctx context.Context, request commonca return commoncap.CapabilityResponse{}, errors.New(uuid.New().String()) } +func (t TestRandomErrorCapability) RegisterToWorkflow(ctx context.Context, request commoncap.RegisterToWorkflowRequest) error { + return errors.New(uuid.New().String()) +} + +func (t TestRandomErrorCapability) UnregisterFromWorkflow(ctx context.Context, request commoncap.UnregisterFromWorkflowRequest) error { + return errors.New(uuid.New().String()) +} + func NewP2PPeerID(t *testing.T) p2ptypes.PeerID { id := p2ptypes.PeerID{} require.NoError(t, id.UnmarshalText([]byte(NewPeerID()))) @@ -442,3 +543,49 @@ func NewPeerID() string { func libp2pMagic() []byte { return []byte{0x00, 0x24, 0x08, 0x01, 0x12, 0x20} } + +func executeCapability(ctx context.Context, t *testing.T, caller commoncap.ExecutableCapability, transmissionSchedule *values.Map, responseTest func(t *testing.T, response commoncap.CapabilityResponse, responseError error)) { + executeInputs, err := values.NewMap( + map[string]any{ + "executeValue1": "aValue1", + }, + ) + require.NoError(t, err) + response, err := caller.Execute(ctx, + commoncap.CapabilityRequest{ + Metadata: commoncap.RequestMetadata{ + WorkflowID: workflowID1, + WorkflowExecutionID: workflowExecutionID1, + }, + Config: transmissionSchedule, + Inputs: executeInputs, + }) + + responseTest(t, response, err) +} + +func registerWorkflow(ctx context.Context, t *testing.T, caller commoncap.ExecutableCapability, transmissionSchedule *values.Map, responseTest func(t *testing.T, responseError error)) { + err := caller.RegisterToWorkflow(ctx, commoncap.RegisterToWorkflowRequest{ + Metadata: commoncap.RegistrationMetadata{ + WorkflowID: workflowID1, + ReferenceID: stepReferenceID1, + WorkflowOwner: workflowOwnerID, + }, + Config: transmissionSchedule, + }) + + responseTest(t, err) +} + +func unregisterWorkflow(ctx context.Context, t *testing.T, caller commoncap.ExecutableCapability, transmissionSchedule *values.Map, responseTest func(t *testing.T, responseError error)) { + err := caller.UnregisterFromWorkflow(ctx, commoncap.UnregisterFromWorkflowRequest{ + Metadata: commoncap.RegistrationMetadata{ + WorkflowID: workflowID1, + ReferenceID: stepReferenceID1, + WorkflowOwner: workflowOwnerID, + }, + Config: transmissionSchedule, + }) + + responseTest(t, err) +} diff --git a/core/capabilities/remote/target/request/client_request.go b/core/capabilities/remote/executable/request/client_request.go similarity index 53% rename from core/capabilities/remote/target/request/client_request.go rename to core/capabilities/remote/executable/request/client_request.go index 1a0d707146b..6b4b9e3a0cd 100644 --- a/core/capabilities/remote/target/request/client_request.go +++ b/core/capabilities/remote/executable/request/client_request.go @@ -12,6 +12,8 @@ import ( ragep2ptypes "github.com/smartcontractkit/libocr/ragep2p/types" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/validation" + "github.com/smartcontractkit/chainlink-common/pkg/capabilities" commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" @@ -22,14 +24,15 @@ import ( p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" ) -type asyncCapabilityResponse struct { - capabilities.CapabilityResponse - Err error +type clientResponse struct { + Result []byte + Err error } type ClientRequest struct { + id string cancelFn context.CancelFunc - responseCh chan asyncCapabilityResponse + responseCh chan clientResponse createdAt time.Time responseIDCount map[[32]byte]int errorCount map[string]int @@ -45,26 +48,91 @@ type ClientRequest struct { wg *sync.WaitGroup } -func NewClientRequest(ctx context.Context, lggr logger.Logger, req commoncap.CapabilityRequest, messageID string, +func NewClientRegisterToWorkflowRequest(ctx context.Context, lggr logger.Logger, req commoncap.RegisterToWorkflowRequest, remoteCapabilityInfo commoncap.CapabilityInfo, localDonInfo capabilities.DON, dispatcher types.Dispatcher, requestTimeout time.Duration) (*ClientRequest, error) { - remoteCapabilityDonInfo := remoteCapabilityInfo.DON - if remoteCapabilityDonInfo == nil { - return nil, errors.New("remote capability info missing DON") + rawRequest, err := proto.MarshalOptions{Deterministic: true}.Marshal(pb.RegisterToWorkflowRequestToProto(req)) + if err != nil { + return nil, fmt.Errorf("failed to marshal register to workflow request: %w", err) } - rawRequest, err := proto.MarshalOptions{Deterministic: true}.Marshal(pb.CapabilityRequestToProto(req)) + workflowID := req.Metadata.WorkflowID + if err := validation.ValidateWorkflowOrExecutionID(workflowID); err != nil { + return nil, fmt.Errorf("workflow ID is invalid: %w", err) + } + + requestID := types.MethodRegisterToWorkflow + ":" + workflowID + + tc := transmission.TransmissionConfig{ + Schedule: transmission.Schedule_AllAtOnce, + DeltaStage: 0, + } + + return newClientRequest(ctx, lggr, requestID, remoteCapabilityInfo, localDonInfo, dispatcher, requestTimeout, + tc, types.MethodRegisterToWorkflow, rawRequest) +} +func NewClientUnregisterFromWorkflowRequest(ctx context.Context, lggr logger.Logger, req commoncap.UnregisterFromWorkflowRequest, + remoteCapabilityInfo commoncap.CapabilityInfo, localDonInfo capabilities.DON, dispatcher types.Dispatcher, + requestTimeout time.Duration) (*ClientRequest, error) { + rawRequest, err := proto.MarshalOptions{Deterministic: true}.Marshal(pb.UnregisterFromWorkflowRequestToProto(req)) + if err != nil { + return nil, fmt.Errorf("failed to marshal unregister from workflow request: %w", err) + } + + workflowID := req.Metadata.WorkflowID + if err := validation.ValidateWorkflowOrExecutionID(workflowID); err != nil { + return nil, fmt.Errorf("workflow ID is invalid: %w", err) + } + + requestID := types.MethodUnregisterFromWorkflow + ":" + workflowID + + tc := transmission.TransmissionConfig{ + Schedule: transmission.Schedule_AllAtOnce, + DeltaStage: 0, + } + + return newClientRequest(ctx, lggr, requestID, remoteCapabilityInfo, localDonInfo, dispatcher, requestTimeout, + tc, types.MethodUnregisterFromWorkflow, rawRequest) +} + +func NewClientExecuteRequest(ctx context.Context, lggr logger.Logger, req commoncap.CapabilityRequest, + remoteCapabilityInfo commoncap.CapabilityInfo, localDonInfo capabilities.DON, dispatcher types.Dispatcher, + requestTimeout time.Duration) (*ClientRequest, error) { + rawRequest, err := proto.MarshalOptions{Deterministic: true}.Marshal(pb.CapabilityRequestToProto(req)) if err != nil { return nil, fmt.Errorf("failed to marshal capability request: %w", err) } - peerIDToTransmissionDelay, err := transmission.GetPeerIDToTransmissionDelay(remoteCapabilityDonInfo.Members, req) + workflowExecutionID := req.Metadata.WorkflowExecutionID + if err = validation.ValidateWorkflowOrExecutionID(workflowExecutionID); err != nil { + return nil, fmt.Errorf("workflow execution ID is invalid: %w", err) + } + + requestID := types.MethodExecute + ":" + workflowExecutionID + + tc, err := transmission.ExtractTransmissionConfig(req.Config) + if err != nil { + return nil, fmt.Errorf("failed to extract transmission config from request: %w", err) + } + + return newClientRequest(ctx, lggr, requestID, remoteCapabilityInfo, localDonInfo, dispatcher, requestTimeout, tc, types.MethodExecute, rawRequest) +} + +func newClientRequest(ctx context.Context, lggr logger.Logger, requestID string, remoteCapabilityInfo commoncap.CapabilityInfo, + localDonInfo commoncap.DON, dispatcher types.Dispatcher, requestTimeout time.Duration, + tc transmission.TransmissionConfig, methodType string, rawRequest []byte) (*ClientRequest, error) { + remoteCapabilityDonInfo := remoteCapabilityInfo.DON + if remoteCapabilityDonInfo == nil { + return nil, errors.New("remote capability info missing DON") + } + + peerIDToTransmissionDelay, err := transmission.GetPeerIDToTransmissionDelaysForConfig(remoteCapabilityDonInfo.Members, requestID, tc) if err != nil { return nil, fmt.Errorf("failed to get peer ID to transmission delay: %w", err) } - lggr.Debugw("sending request to peers", "execID", req.Metadata.WorkflowExecutionID, "schedule", peerIDToTransmissionDelay) + lggr.Debugw("sending request to peers", "requestID", requestID, "schedule", peerIDToTransmissionDelay) responseReceived := make(map[p2ptypes.PeerID]bool) @@ -79,17 +147,17 @@ func NewClientRequest(ctx context.Context, lggr logger.Logger, req commoncap.Cap CapabilityId: remoteCapabilityInfo.ID, CapabilityDonId: remoteCapabilityDonInfo.ID, CallerDonId: localDonInfo.ID, - Method: types.MethodExecute, + Method: methodType, Payload: rawRequest, - MessageId: []byte(messageID), + MessageId: []byte(requestID), } select { case <-ctxWithCancel.Done(): - lggr.Debugw("context done, not sending request to peer", "execID", req.Metadata.WorkflowExecutionID, "peerID", peerID) + lggr.Debugw("context done, not sending request to peer", "requestID", requestID, "peerID", peerID) return case <-time.After(delay): - lggr.Debugw("sending request to peer", "execID", req.Metadata.WorkflowExecutionID, "peerID", peerID) + lggr.Debugw("sending request to peer", "requestID", requestID, "peerID", peerID) err := dispatcher.Send(peerID, message) if err != nil { lggr.Errorw("failed to send message", "peerID", peerID, "err", err) @@ -99,6 +167,7 @@ func NewClientRequest(ctx context.Context, lggr logger.Logger, req commoncap.Cap } return &ClientRequest{ + id: requestID, cancelFn: cancelFn, createdAt: time.Now(), requestTimeout: requestTimeout, @@ -106,13 +175,17 @@ func NewClientRequest(ctx context.Context, lggr logger.Logger, req commoncap.Cap responseIDCount: make(map[[32]byte]int), errorCount: make(map[string]int), responseReceived: responseReceived, - responseCh: make(chan asyncCapabilityResponse, 1), + responseCh: make(chan clientResponse, 1), wg: wg, lggr: lggr, }, nil } -func (c *ClientRequest) ResponseChan() <-chan asyncCapabilityResponse { +func (c *ClientRequest) ID() string { + return c.id +} + +func (c *ClientRequest) ResponseChan() <-chan clientResponse { return c.responseCh } @@ -126,7 +199,7 @@ func (c *ClientRequest) Cancel(err error) { c.mux.Lock() defer c.mux.Unlock() if !c.respSent { - c.sendResponse(asyncCapabilityResponse{Err: err}) + c.sendResponse(clientResponse{Err: err}) } } @@ -169,24 +242,19 @@ func (c *ClientRequest) OnMessage(_ context.Context, msg *types.MessageBody) err } if c.responseIDCount[responseID] == c.requiredIdenticalResponses { - capabilityResponse, err := pb.UnmarshalCapabilityResponse(msg.Payload) - if err != nil { - c.sendResponse(asyncCapabilityResponse{Err: fmt.Errorf("failed to unmarshal capability response: %w", err)}) - } else { - c.sendResponse(asyncCapabilityResponse{CapabilityResponse: commoncap.CapabilityResponse{Value: capabilityResponse.Value}}) - } + c.sendResponse(clientResponse{Result: msg.Payload}) } } else { c.lggr.Warnw("received error response", "error", remote.SanitizeLogString(msg.ErrorMsg)) c.errorCount[msg.ErrorMsg]++ if c.errorCount[msg.ErrorMsg] == c.requiredIdenticalResponses { - c.sendResponse(asyncCapabilityResponse{Err: errors.New(msg.ErrorMsg)}) + c.sendResponse(clientResponse{Err: errors.New(msg.ErrorMsg)}) } } return nil } -func (c *ClientRequest) sendResponse(response asyncCapabilityResponse) { +func (c *ClientRequest) sendResponse(response clientResponse) { c.responseCh <- response close(c.responseCh) c.respSent = true diff --git a/core/capabilities/remote/target/request/client_request_test.go b/core/capabilities/remote/executable/request/client_request_test.go similarity index 61% rename from core/capabilities/remote/target/request/client_request_test.go rename to core/capabilities/remote/executable/request/client_request_test.go index 095c73e8ad9..c46fd1363a0 100644 --- a/core/capabilities/remote/target/request/client_request_test.go +++ b/core/capabilities/remote/executable/request/client_request_test.go @@ -12,8 +12,8 @@ import ( commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" "github.com/smartcontractkit/chainlink-common/pkg/values" - "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/target" - "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/target/request" + + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/executable/request" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" "github.com/smartcontractkit/chainlink/v2/core/capabilities/transmission" "github.com/smartcontractkit/chainlink/v2/core/logger" @@ -80,6 +80,15 @@ func Test_ClientRequest_MessageValidation(t *testing.T) { Config: transmissionSchedule, } + registerToWorkflowRequest := commoncap.RegisterToWorkflowRequest{ + Metadata: commoncap.RegistrationMetadata{ + WorkflowID: workflowID1, + WorkflowOwner: "0xaa", + ReferenceID: "refID", + }, + Config: transmissionSchedule, + } + m, err := values.NewMap(map[string]any{"response": "response1"}) require.NoError(t, err) capabilityResponse := commoncap.CapabilityResponse{ @@ -89,9 +98,6 @@ func Test_ClientRequest_MessageValidation(t *testing.T) { rawResponse, err := pb.MarshalCapabilityResponse(capabilityResponse) require.NoError(t, err) - messageID, err := target.GetMessageIDForRequest(capabilityRequest) - require.NoError(t, err) - msg := &types.MessageBody{ CapabilityId: capInfo.ID, CapabilityDonId: capDonInfo.ID, @@ -106,7 +112,7 @@ func Test_ClientRequest_MessageValidation(t *testing.T) { defer cancel() dispatcher := &clientRequestTestDispatcher{msgs: make(chan *types.MessageBody, 100)} - request, err := request.NewClientRequest(ctx, lggr, capabilityRequest, messageID, capInfo, + request, err := request.NewClientExecuteRequest(ctx, lggr, capabilityRequest, capInfo, workflowDonInfo, dispatcher, 10*time.Minute) defer request.Cancel(errors.New("test end")) @@ -149,7 +155,7 @@ func Test_ClientRequest_MessageValidation(t *testing.T) { defer cancel() dispatcher := &clientRequestTestDispatcher{msgs: make(chan *types.MessageBody, 100)} - request, err := request.NewClientRequest(ctx, lggr, capabilityRequest, messageID, capInfo, + request, err := request.NewClientExecuteRequest(ctx, lggr, capabilityRequest, capInfo, workflowDonInfo, dispatcher, 10*time.Minute) require.NoError(t, err) defer request.Cancel(errors.New("test end")) @@ -175,7 +181,7 @@ func Test_ClientRequest_MessageValidation(t *testing.T) { defer cancel() dispatcher := &clientRequestTestDispatcher{msgs: make(chan *types.MessageBody, 100)} - request, err := request.NewClientRequest(ctx, lggr, capabilityRequest, messageID, capInfo, + request, err := request.NewClientExecuteRequest(ctx, lggr, capabilityRequest, capInfo, workflowDonInfo, dispatcher, 10*time.Minute) require.NoError(t, err) defer request.Cancel(errors.New("test end")) @@ -198,7 +204,7 @@ func Test_ClientRequest_MessageValidation(t *testing.T) { defer cancel() dispatcher := &clientRequestTestDispatcher{msgs: make(chan *types.MessageBody, 100)} - request, err := request.NewClientRequest(ctx, lggr, capabilityRequest, messageID, capInfo, + request, err := request.NewClientExecuteRequest(ctx, lggr, capabilityRequest, capInfo, workflowDonInfo, dispatcher, 10*time.Minute) require.NoError(t, err) defer request.Cancel(errors.New("test end")) @@ -236,7 +242,7 @@ func Test_ClientRequest_MessageValidation(t *testing.T) { defer cancel() dispatcher := &clientRequestTestDispatcher{msgs: make(chan *types.MessageBody, 100)} - request, err := request.NewClientRequest(ctx, lggr, capabilityRequest, messageID, capInfo, + request, err := request.NewClientExecuteRequest(ctx, lggr, capabilityRequest, capInfo, workflowDonInfo, dispatcher, 10*time.Minute) require.NoError(t, err) defer request.Cancel(errors.New("test end")) @@ -281,12 +287,12 @@ func Test_ClientRequest_MessageValidation(t *testing.T) { } }) - t.Run("Send second valid message", func(t *testing.T) { + t.Run("Execute Request", func(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() dispatcher := &clientRequestTestDispatcher{msgs: make(chan *types.MessageBody, 100)} - request, err := request.NewClientRequest(ctx, lggr, capabilityRequest, messageID, capInfo, + request, err := request.NewClientExecuteRequest(ctx, lggr, capabilityRequest, capInfo, workflowDonInfo, dispatcher, 10*time.Minute) require.NoError(t, err) defer request.Cancel(errors.New("test end")) @@ -304,10 +310,167 @@ func Test_ClientRequest_MessageValidation(t *testing.T) { require.NoError(t, err) response := <-request.ResponseChan() - resp := response.Value.Underlying["response"] + capResponse, err := pb.UnmarshalCapabilityResponse(response.Result) + require.NoError(t, err) + + resp := capResponse.Value.Underlying["response"] assert.Equal(t, resp, values.NewString("response1")) }) + + t.Run("Register To Workflow Request", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + dispatcher := &clientRequestTestDispatcher{msgs: make(chan *types.MessageBody, 100)} + request, err := request.NewClientRegisterToWorkflowRequest(ctx, lggr, registerToWorkflowRequest, capInfo, + workflowDonInfo, dispatcher, 10*time.Minute) + require.NoError(t, err) + defer request.Cancel(errors.New("test end")) + + <-dispatcher.msgs + <-dispatcher.msgs + assert.Empty(t, dispatcher.msgs) + + msg := &types.MessageBody{ + CapabilityId: capInfo.ID, + CapabilityDonId: capDonInfo.ID, + CallerDonId: workflowDonInfo.ID, + Method: types.MethodRegisterToWorkflow, + Payload: nil, + MessageId: []byte("messageID"), + } + + msg.Sender = capabilityPeers[0][:] + err = request.OnMessage(ctx, msg) + require.NoError(t, err) + + msg.Sender = capabilityPeers[1][:] + err = request.OnMessage(ctx, msg) + require.NoError(t, err) + + response := <-request.ResponseChan() + require.Nil(t, response.Result) + require.NoError(t, response.Err) + }) + + t.Run("Register To Workflow Request with error", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + dispatcher := &clientRequestTestDispatcher{msgs: make(chan *types.MessageBody, 100)} + request, err := request.NewClientRegisterToWorkflowRequest(ctx, lggr, registerToWorkflowRequest, capInfo, + workflowDonInfo, dispatcher, 10*time.Minute) + require.NoError(t, err) + defer request.Cancel(errors.New("test end")) + + <-dispatcher.msgs + <-dispatcher.msgs + assert.Empty(t, dispatcher.msgs) + + msg := &types.MessageBody{ + CapabilityId: capInfo.ID, + CapabilityDonId: capDonInfo.ID, + CallerDonId: workflowDonInfo.ID, + Method: types.MethodRegisterToWorkflow, + Payload: nil, + MessageId: []byte("messageID"), + Error: types.Error_INTERNAL_ERROR, + ErrorMsg: "an error", + } + + msg.Sender = capabilityPeers[0][:] + err = request.OnMessage(ctx, msg) + require.NoError(t, err) + + msg.Sender = capabilityPeers[1][:] + err = request.OnMessage(ctx, msg) + require.NoError(t, err) + + response := <-request.ResponseChan() + require.Nil(t, response.Result) + assert.Equal(t, "an error", response.Err.Error()) + }) + + t.Run("Unregister From Workflow Request", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + dispatcher := &clientRequestTestDispatcher{msgs: make(chan *types.MessageBody, 100)} + request, err := request.NewClientUnregisterFromWorkflowRequest(ctx, lggr, commoncap.UnregisterFromWorkflowRequest{ + Metadata: commoncap.RegistrationMetadata{ + WorkflowID: workflowID1, + }, + }, capInfo, workflowDonInfo, dispatcher, 10*time.Minute) + require.NoError(t, err) + defer request.Cancel(errors.New("test end")) + + <-dispatcher.msgs + <-dispatcher.msgs + assert.Empty(t, dispatcher.msgs) + + msg := &types.MessageBody{ + CapabilityId: capInfo.ID, + CapabilityDonId: capDonInfo.ID, + CallerDonId: workflowDonInfo.ID, + Method: types.MethodUnregisterFromWorkflow, + Payload: nil, + MessageId: []byte("messageID"), + } + + msg.Sender = capabilityPeers[0][:] + err = request.OnMessage(ctx, msg) + require.NoError(t, err) + + msg.Sender = capabilityPeers[1][:] + err = request.OnMessage(ctx, msg) + require.NoError(t, err) + + response := <-request.ResponseChan() + require.Nil(t, response.Result) + require.NoError(t, response.Err) + }) + + t.Run("Unregister From Workflow Request with error", func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + dispatcher := &clientRequestTestDispatcher{msgs: make(chan *types.MessageBody, 100)} + request, err := request.NewClientUnregisterFromWorkflowRequest(ctx, lggr, commoncap.UnregisterFromWorkflowRequest{ + Metadata: commoncap.RegistrationMetadata{ + WorkflowID: workflowID1, + }, + }, capInfo, workflowDonInfo, dispatcher, 10*time.Minute) + require.NoError(t, err) + defer request.Cancel(errors.New("test end")) + + <-dispatcher.msgs + <-dispatcher.msgs + assert.Empty(t, dispatcher.msgs) + + msg := &types.MessageBody{ + CapabilityId: capInfo.ID, + CapabilityDonId: capDonInfo.ID, + CallerDonId: workflowDonInfo.ID, + Method: types.MethodUnregisterFromWorkflow, + Payload: nil, + MessageId: []byte("messageID"), + Error: types.Error_INTERNAL_ERROR, + ErrorMsg: "an error", + } + + msg.Sender = capabilityPeers[0][:] + err = request.OnMessage(ctx, msg) + require.NoError(t, err) + + msg.Sender = capabilityPeers[1][:] + err = request.OnMessage(ctx, msg) + require.NoError(t, err) + + response := <-request.ResponseChan() + require.Nil(t, response.Result) + assert.Equal(t, "an error", response.Err.Error()) + }) } type clientRequestTestDispatcher struct { diff --git a/core/capabilities/remote/target/request/server_request.go b/core/capabilities/remote/executable/request/server_request.go similarity index 64% rename from core/capabilities/remote/target/request/server_request.go rename to core/capabilities/remote/executable/request/server_request.go index d23ba93a44d..a4662e93987 100644 --- a/core/capabilities/remote/target/request/server_request.go +++ b/core/capabilities/remote/executable/request/server_request.go @@ -23,7 +23,7 @@ type response struct { } type ServerRequest struct { - capability capabilities.TargetCapability + capability capabilities.ExecutableCapability capabilityPeerId p2ptypes.PeerID capabilityID string @@ -41,26 +41,29 @@ type ServerRequest struct { callingDon commoncap.DON requestMessageID string + method string requestTimeout time.Duration mux sync.Mutex lggr logger.Logger } -func NewServerRequest(capability capabilities.TargetCapability, capabilityID string, capabilityDonID uint32, capabilityPeerId p2ptypes.PeerID, - callingDon commoncap.DON, requestMessageID string, +func NewServerRequest(capability capabilities.ExecutableCapability, method string, capabilityID string, capabilityDonID uint32, + capabilityPeerID p2ptypes.PeerID, + callingDon commoncap.DON, requestID string, dispatcher types.Dispatcher, requestTimeout time.Duration, lggr logger.Logger) *ServerRequest { return &ServerRequest{ capability: capability, createdTime: time.Now(), capabilityID: capabilityID, capabilityDonID: capabilityDonID, - capabilityPeerId: capabilityPeerId, + capabilityPeerId: capabilityPeerID, dispatcher: dispatcher, requesters: map[p2ptypes.PeerID]bool{}, responseSentToRequester: map[p2ptypes.PeerID]bool{}, callingDon: callingDon, - requestMessageID: requestMessageID, + requestMessageID: requestID, + method: method, requestTimeout: requestTimeout, lggr: lggr.Named("ServerRequest"), } @@ -85,8 +88,15 @@ func (e *ServerRequest) OnMessage(ctx context.Context, msg *types.MessageBody) e e.lggr.Debugw("OnMessage called for request", "msgId", msg.MessageId, "calls", len(e.requesters), "hasResponse", e.response != nil) if e.minimumRequiredRequestsReceived() && !e.hasResponse() { - if err := e.executeRequest(ctx, msg.Payload); err != nil { - e.setError(types.Error_INTERNAL_ERROR, err.Error()) + switch e.method { + case types.MethodExecute: + e.executeRequest(ctx, msg.Payload, executeCapabilityRequest) + case types.MethodRegisterToWorkflow: + e.executeRequest(ctx, msg.Payload, registerToWorkflow) + case types.MethodUnregisterFromWorkflow: + e.executeRequest(ctx, msg.Payload, unregisterFromWorkflow) + default: + e.setError(types.Error_INTERNAL_ERROR, "unknown method %s"+e.method) } } @@ -115,31 +125,17 @@ func (e *ServerRequest) Cancel(err types.Error, msg string) error { return nil } -func (e *ServerRequest) executeRequest(ctx context.Context, payload []byte) error { +func (e *ServerRequest) executeRequest(ctx context.Context, payload []byte, method func(ctx context.Context, lggr logger.Logger, capability capabilities.ExecutableCapability, + payload []byte) ([]byte, error)) { ctxWithTimeout, cancel := context.WithTimeout(ctx, e.requestTimeout) defer cancel() - capabilityRequest, err := pb.UnmarshalCapabilityRequest(payload) - if err != nil { - return fmt.Errorf("failed to unmarshal capability request: %w", err) - } - - e.lggr.Debugw("executing capability", "metadata", capabilityRequest.Metadata) - capResponse, err := e.capability.Execute(ctxWithTimeout, capabilityRequest) - + responsePayload, err := method(ctxWithTimeout, e.lggr, e.capability, payload) if err != nil { - e.lggr.Debugw("received execution error", "workflowExecutionID", capabilityRequest.Metadata.WorkflowExecutionID, "error", err) - return fmt.Errorf("failed to execute capability: %w", err) - } - - responsePayload, err := pb.MarshalCapabilityResponse(capResponse) - if err != nil { - return fmt.Errorf("failed to marshal capability response: %w", err) + e.setError(types.Error_INTERNAL_ERROR, err.Error()) + } else { + e.setResult(responsePayload) } - - e.lggr.Debugw("received execution results", "workflowExecutionID", capabilityRequest.Metadata.WorkflowExecutionID) - e.setResult(responsePayload) - return nil } func (e *ServerRequest) addRequester(from p2ptypes.PeerID) error { @@ -227,3 +223,57 @@ func (e *ServerRequest) sendResponse(requester p2ptypes.PeerID) error { return nil } + +func executeCapabilityRequest(ctx context.Context, lggr logger.Logger, capability capabilities.ExecutableCapability, + payload []byte) ([]byte, error) { + capabilityRequest, err := pb.UnmarshalCapabilityRequest(payload) + if err != nil { + return nil, fmt.Errorf("failed to unmarshal capability request: %w", err) + } + + lggr.Debugw("executing capability", "metadata", capabilityRequest.Metadata) + capResponse, err := capability.Execute(ctx, capabilityRequest) + + if err != nil { + lggr.Debugw("received execution error", "workflowExecutionID", capabilityRequest.Metadata.WorkflowExecutionID, "error", err) + return nil, fmt.Errorf("failed to execute capability: %w", err) + } + + responsePayload, err := pb.MarshalCapabilityResponse(capResponse) + if err != nil { + return nil, fmt.Errorf("failed to marshal capability response: %w", err) + } + + lggr.Debugw("received execution results", "workflowExecutionID", capabilityRequest.Metadata.WorkflowExecutionID) + return responsePayload, nil +} + +func registerToWorkflow(ctx context.Context, _ logger.Logger, capability capabilities.ExecutableCapability, + payload []byte) ([]byte, error) { + registerRequest, err := pb.UnmarshalRegisterToWorkflowRequest(payload) + if err != nil { + return nil, fmt.Errorf("failed to unmarshal register to workflow request: %w", err) + } + + err = capability.RegisterToWorkflow(ctx, registerRequest) + if err != nil { + return nil, fmt.Errorf("failed to register to workflow: %w", err) + } + + return nil, nil +} + +func unregisterFromWorkflow(ctx context.Context, _ logger.Logger, capability capabilities.ExecutableCapability, + payload []byte) ([]byte, error) { + unregisterRequest, err := pb.UnmarshalUnregisterFromWorkflowRequest(payload) + if err != nil { + return nil, fmt.Errorf("failed to unmarshal unregister from workflow request: %w", err) + } + + err = capability.UnregisterFromWorkflow(ctx, unregisterRequest) + if err != nil { + return nil, fmt.Errorf("failed to unregister from workflow: %w", err) + } + + return nil, nil +} diff --git a/core/capabilities/remote/target/request/server_request_test.go b/core/capabilities/remote/executable/request/server_request_test.go similarity index 55% rename from core/capabilities/remote/target/request/server_request_test.go rename to core/capabilities/remote/executable/request/server_request_test.go index b4bddbc955f..cbeec833a1f 100644 --- a/core/capabilities/remote/target/request/server_request_test.go +++ b/core/capabilities/remote/executable/request/server_request_test.go @@ -14,7 +14,7 @@ import ( commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" "github.com/smartcontractkit/chainlink-common/pkg/values" - "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/target/request" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/executable/request" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" "github.com/smartcontractkit/chainlink/v2/core/logger" p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" @@ -58,17 +58,17 @@ func Test_ServerRequest_MessageValidation(t *testing.T) { require.NoError(t, err) t.Run("Send duplicate message", func(t *testing.T) { - req := request.NewServerRequest(capability, "capabilityID", 2, + req := request.NewServerRequest(capability, types.MethodExecute, "capabilityID", 2, capabilityPeerID, callingDon, "requestMessageID", dispatcher, 10*time.Minute, lggr) err := sendValidRequest(req, workflowPeers, capabilityPeerID, rawRequest) require.NoError(t, err) err = sendValidRequest(req, workflowPeers, capabilityPeerID, rawRequest) - assert.NotNil(t, err) + assert.Error(t, err) }) t.Run("Send message with non calling don peer", func(t *testing.T) { - req := request.NewServerRequest(capability, "capabilityID", 2, + req := request.NewServerRequest(capability, types.MethodExecute, "capabilityID", 2, capabilityPeerID, callingDon, "requestMessageID", dispatcher, 10*time.Minute, lggr) err := sendValidRequest(req, workflowPeers, capabilityPeerID, rawRequest) @@ -87,11 +87,11 @@ func Test_ServerRequest_MessageValidation(t *testing.T) { Payload: rawRequest, }) - assert.NotNil(t, err) + assert.Error(t, err) }) t.Run("Send message invalid payload", func(t *testing.T) { - req := request.NewServerRequest(capability, "capabilityID", 2, + req := request.NewServerRequest(capability, types.MethodExecute, "capabilityID", 2, capabilityPeerID, callingDon, "requestMessageID", dispatcher, 10*time.Minute, lggr) err := sendValidRequest(req, workflowPeers, capabilityPeerID, rawRequest) @@ -108,15 +108,15 @@ func Test_ServerRequest_MessageValidation(t *testing.T) { Method: types.MethodExecute, Payload: append(rawRequest, []byte("asdf")...), }) - assert.NoError(t, err) - assert.Equal(t, 2, len(dispatcher.msgs)) - assert.Equal(t, dispatcher.msgs[0].Error, types.Error_INTERNAL_ERROR) - assert.Equal(t, dispatcher.msgs[1].Error, types.Error_INTERNAL_ERROR) + require.NoError(t, err) + assert.Len(t, dispatcher.msgs, 2) + assert.Equal(t, types.Error_INTERNAL_ERROR, dispatcher.msgs[0].Error) + assert.Equal(t, types.Error_INTERNAL_ERROR, dispatcher.msgs[1].Error) }) t.Run("Send second valid request when capability errors", func(t *testing.T) { dispatcher := &testDispatcher{} - req := request.NewServerRequest(TestErrorCapability{}, "capabilityID", 2, + req := request.NewServerRequest(TestErrorCapability{}, types.MethodExecute, "capabilityID", 2, capabilityPeerID, callingDon, "requestMessageID", dispatcher, 10*time.Minute, lggr) err := sendValidRequest(req, workflowPeers, capabilityPeerID, rawRequest) @@ -133,17 +133,17 @@ func Test_ServerRequest_MessageValidation(t *testing.T) { Method: types.MethodExecute, Payload: rawRequest, }) - assert.NoError(t, err) - assert.Equal(t, 2, len(dispatcher.msgs)) - assert.Equal(t, dispatcher.msgs[0].Error, types.Error_INTERNAL_ERROR) - assert.Equal(t, dispatcher.msgs[0].ErrorMsg, "failed to execute capability: an error") - assert.Equal(t, dispatcher.msgs[1].Error, types.Error_INTERNAL_ERROR) - assert.Equal(t, dispatcher.msgs[1].ErrorMsg, "failed to execute capability: an error") + require.NoError(t, err) + assert.Len(t, dispatcher.msgs, 2) + assert.Equal(t, types.Error_INTERNAL_ERROR, dispatcher.msgs[0].Error) + assert.Equal(t, "failed to execute capability: an error", dispatcher.msgs[0].ErrorMsg) + assert.Equal(t, types.Error_INTERNAL_ERROR, dispatcher.msgs[1].Error) + assert.Equal(t, "failed to execute capability: an error", dispatcher.msgs[1].ErrorMsg) }) - t.Run("Send second valid request", func(t *testing.T) { + t.Run("Execute capability", func(t *testing.T) { dispatcher := &testDispatcher{} - request := request.NewServerRequest(capability, "capabilityID", 2, + request := request.NewServerRequest(capability, types.MethodExecute, "capabilityID", 2, capabilityPeerID, callingDon, "requestMessageID", dispatcher, 10*time.Minute, lggr) err := sendValidRequest(request, workflowPeers, capabilityPeerID, rawRequest) @@ -160,10 +160,111 @@ func Test_ServerRequest_MessageValidation(t *testing.T) { Method: types.MethodExecute, Payload: rawRequest, }) - assert.NoError(t, err) - assert.Equal(t, 2, len(dispatcher.msgs)) - assert.Equal(t, dispatcher.msgs[0].Error, types.Error_OK) - assert.Equal(t, dispatcher.msgs[1].Error, types.Error_OK) + require.NoError(t, err) + assert.Len(t, dispatcher.msgs, 2) + assert.Equal(t, types.Error_OK, dispatcher.msgs[0].Error) + assert.Equal(t, types.Error_OK, dispatcher.msgs[1].Error) + }) + t.Run("Register to workflow request", func(t *testing.T) { + dispatcher := &testDispatcher{} + request := request.NewServerRequest(capability, types.MethodRegisterToWorkflow, "capabilityID", 2, + capabilityPeerID, callingDon, "requestMessageID", dispatcher, 10*time.Minute, lggr) + + err := sendValidRequest(request, workflowPeers, capabilityPeerID, rawRequest) + require.NoError(t, err) + + err = request.OnMessage(context.Background(), &types.MessageBody{ + Version: 0, + Sender: workflowPeers[1][:], + Receiver: capabilityPeerID[:], + MessageId: []byte("workflowID" + "workflowExecutionID"), + CapabilityId: "capabilityID", + CapabilityDonId: 2, + CallerDonId: 1, + Method: types.MethodRegisterToWorkflow, + Payload: rawRequest, + }) + require.NoError(t, err) + assert.Len(t, dispatcher.msgs, 2) + assert.Equal(t, types.Error_OK, dispatcher.msgs[0].Error) + assert.Equal(t, types.Error_OK, dispatcher.msgs[1].Error) + }) + t.Run("Register to workflow request errors", func(t *testing.T) { + dispatcher := &testDispatcher{} + req := request.NewServerRequest(TestErrorCapability{}, types.MethodRegisterToWorkflow, "capabilityID", 2, + capabilityPeerID, callingDon, "requestMessageID", dispatcher, 10*time.Minute, lggr) + + err := sendValidRequest(req, workflowPeers, capabilityPeerID, rawRequest) + require.NoError(t, err) + + err = req.OnMessage(context.Background(), &types.MessageBody{ + Version: 0, + Sender: workflowPeers[1][:], + Receiver: capabilityPeerID[:], + MessageId: []byte("workflowID" + "workflowExecutionID"), + CapabilityId: "capabilityID", + CapabilityDonId: 2, + CallerDonId: 1, + Method: types.MethodRegisterToWorkflow, + Payload: rawRequest, + }) + require.NoError(t, err) + assert.Len(t, dispatcher.msgs, 2) + assert.Equal(t, types.Error_INTERNAL_ERROR, dispatcher.msgs[0].Error) + assert.Equal(t, "failed to register to workflow: an error", dispatcher.msgs[0].ErrorMsg) + assert.Equal(t, types.Error_INTERNAL_ERROR, dispatcher.msgs[1].Error) + assert.Equal(t, "failed to register to workflow: an error", dispatcher.msgs[1].ErrorMsg) + }) + t.Run("Unregister from workflow request", func(t *testing.T) { + dispatcher := &testDispatcher{} + request := request.NewServerRequest(capability, types.MethodUnregisterFromWorkflow, "capabilityID", 2, + capabilityPeerID, callingDon, "requestMessageID", dispatcher, 10*time.Minute, lggr) + + err := sendValidRequest(request, workflowPeers, capabilityPeerID, rawRequest) + require.NoError(t, err) + + err = request.OnMessage(context.Background(), &types.MessageBody{ + Version: 0, + Sender: workflowPeers[1][:], + Receiver: capabilityPeerID[:], + MessageId: []byte("workflowID" + "workflowExecutionID"), + CapabilityId: "capabilityID", + CapabilityDonId: 2, + CallerDonId: 1, + Method: types.MethodUnregisterFromWorkflow, + Payload: rawRequest, + }) + require.NoError(t, err) + assert.Len(t, dispatcher.msgs, 2) + assert.Equal(t, types.Error_OK, dispatcher.msgs[0].Error) + assert.Equal(t, types.Error_OK, dispatcher.msgs[1].Error) + }) + + t.Run("Unregister from workflow request errors", func(t *testing.T) { + dispatcher := &testDispatcher{} + req := request.NewServerRequest(TestErrorCapability{}, types.MethodUnregisterFromWorkflow, "capabilityID", 2, + capabilityPeerID, callingDon, "requestMessageID", dispatcher, 10*time.Minute, lggr) + + err := sendValidRequest(req, workflowPeers, capabilityPeerID, rawRequest) + require.NoError(t, err) + + err = req.OnMessage(context.Background(), &types.MessageBody{ + Version: 0, + Sender: workflowPeers[1][:], + Receiver: capabilityPeerID[:], + MessageId: []byte("workflowID" + "workflowExecutionID"), + CapabilityId: "capabilityID", + CapabilityDonId: 2, + CallerDonId: 1, + Method: types.MethodUnregisterFromWorkflow, + Payload: rawRequest, + }) + require.NoError(t, err) + assert.Len(t, dispatcher.msgs, 2) + assert.Equal(t, types.Error_INTERNAL_ERROR, dispatcher.msgs[0].Error) + assert.Equal(t, "failed to unregister from workflow: an error", dispatcher.msgs[0].ErrorMsg) + assert.Equal(t, types.Error_INTERNAL_ERROR, dispatcher.msgs[1].Error) + assert.Equal(t, "failed to unregister from workflow: an error", dispatcher.msgs[1].ErrorMsg) }) } @@ -261,6 +362,14 @@ func (t TestErrorCapability) Execute(ctx context.Context, request commoncap.Capa return commoncap.CapabilityResponse{}, errors.New("an error") } +func (t TestErrorCapability) RegisterToWorkflow(ctx context.Context, request commoncap.RegisterToWorkflowRequest) error { + return errors.New("an error") +} + +func (t TestErrorCapability) UnregisterFromWorkflow(ctx context.Context, request commoncap.UnregisterFromWorkflowRequest) error { + return errors.New("an error") +} + func NewP2PPeerID(t *testing.T) p2ptypes.PeerID { id := p2ptypes.PeerID{} require.NoError(t, id.UnmarshalText([]byte(NewPeerID()))) diff --git a/core/capabilities/remote/target/server.go b/core/capabilities/remote/executable/server.go similarity index 83% rename from core/capabilities/remote/target/server.go rename to core/capabilities/remote/executable/server.go index 61725a8220c..b767a2d7030 100644 --- a/core/capabilities/remote/target/server.go +++ b/core/capabilities/remote/executable/server.go @@ -1,4 +1,4 @@ -package target +package executable import ( "context" @@ -12,7 +12,7 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" "github.com/smartcontractkit/chainlink-common/pkg/services" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote" - "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/target/request" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/executable/request" "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" "github.com/smartcontractkit/chainlink/v2/core/capabilities/validation" p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" @@ -20,9 +20,9 @@ import ( "github.com/smartcontractkit/chainlink/v2/core/logger" ) -// server manages all external users of a local target capability. +// server manages all external users of a local executable capability. // Its responsibilities are: -// 1. Manage requests from external nodes executing the target capability once sufficient requests are received. +// 1. Manage requests from external nodes executing the executable capability once sufficient requests are received. // 2. Send out responses produced by an underlying capability to all requesters. // // server communicates with corresponding client on remote nodes. @@ -30,9 +30,9 @@ type server struct { services.StateMachine lggr logger.Logger - config *commoncap.RemoteTargetConfig + config *commoncap.RemoteExecutableConfig peerID p2ptypes.PeerID - underlying commoncap.TargetCapability + underlying commoncap.ExecutableCapability capInfo commoncap.CapabilityInfo localDonInfo commoncap.DON workflowDONs map[uint32]commoncap.DON @@ -57,14 +57,15 @@ type requestAndMsgID struct { messageID string } -func NewServer(config *commoncap.RemoteTargetConfig, peerID p2ptypes.PeerID, underlying commoncap.TargetCapability, capInfo commoncap.CapabilityInfo, localDonInfo commoncap.DON, +func NewServer(remoteExecutableConfig *commoncap.RemoteExecutableConfig, peerID p2ptypes.PeerID, underlying commoncap.ExecutableCapability, + capInfo commoncap.CapabilityInfo, localDonInfo commoncap.DON, workflowDONs map[uint32]commoncap.DON, dispatcher types.Dispatcher, requestTimeout time.Duration, lggr logger.Logger) *server { - if config == nil { - lggr.Info("no config provided, using default values") - config = &commoncap.RemoteTargetConfig{} + if remoteExecutableConfig == nil { + lggr.Info("no remote config provided, using default values") + remoteExecutableConfig = &commoncap.RemoteExecutableConfig{} } return &server{ - config: config, + config: remoteExecutableConfig, underlying: underlying, peerID: peerID, capInfo: capInfo, @@ -76,7 +77,7 @@ func NewServer(config *commoncap.RemoteTargetConfig, peerID p2ptypes.PeerID, und messageIDToRequestIDsCount: map[string]map[string]int{}, requestTimeout: requestTimeout, - lggr: lggr.Named("TargetServer"), + lggr: lggr.Named("ExecutableCapabilityServer"), stopCh: make(services.StopChan), } } @@ -88,7 +89,7 @@ func (r *server) Start(ctx context.Context) error { defer r.wg.Done() ticker := time.NewTicker(r.requestTimeout) defer ticker.Stop() - r.lggr.Info("TargetServer started") + r.lggr.Info("executable capability server started") for { select { case <-r.stopCh: @@ -106,7 +107,7 @@ func (r *server) Close() error { return r.StopOnce(r.Name(), func() error { close(r.stopCh) r.wg.Wait() - r.lggr.Info("TargetServer closed") + r.lggr.Info("executable capability server closed") return nil }) } @@ -131,9 +132,10 @@ func (r *server) Receive(ctx context.Context, msg *types.MessageBody) { r.receiveLock.Lock() defer r.receiveLock.Unlock() - if msg.Method != types.MethodExecute { + switch msg.Method { + case types.MethodExecute, types.MethodRegisterToWorkflow, types.MethodUnregisterFromWorkflow: + default: r.lggr.Errorw("received request for unsupported method type", "method", remote.SanitizeLogString(msg.Method)) - return } messageId, err := GetMessageID(msg) @@ -175,7 +177,7 @@ func (r *server) Receive(ctx context.Context, msg *types.MessageBody) { } r.requestIDToRequest[requestID] = requestAndMsgID{ - request: request.NewServerRequest(r.underlying, r.capInfo.ID, r.localDonInfo.ID, r.peerID, + request: request.NewServerRequest(r.underlying, msg.Method, r.capInfo.ID, r.localDonInfo.ID, r.peerID, callingDon, messageId, r.dispatcher, r.requestTimeout, r.lggr), messageID: messageId, } @@ -196,8 +198,8 @@ func (r *server) getMessageHash(msg *types.MessageBody) ([32]byte, error) { } for _, path := range r.config.RequestHashExcludedAttributes { - if !req.Inputs.DeleteAtPath(path) { - return [32]byte{}, fmt.Errorf("failed to delete attribute from map at path: %s", path) + if req.Inputs != nil { + req.Inputs.DeleteAtPath(path) } } diff --git a/core/capabilities/remote/target/server_test.go b/core/capabilities/remote/executable/server_test.go similarity index 58% rename from core/capabilities/remote/target/server_test.go rename to core/capabilities/remote/executable/server_test.go index 505a2dcce5d..1fb5c2dd413 100644 --- a/core/capabilities/remote/target/server_test.go +++ b/core/capabilities/remote/executable/server_test.go @@ -1,4 +1,4 @@ -package target_test +package executable_test import ( "context" @@ -13,7 +13,7 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" "github.com/smartcontractkit/chainlink-common/pkg/services" "github.com/smartcontractkit/chainlink-common/pkg/values" - "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/target" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/executable" remotetypes "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" "github.com/smartcontractkit/chainlink/v2/core/internal/testutils" "github.com/smartcontractkit/chainlink/v2/core/logger" @@ -25,7 +25,7 @@ func Test_Server_ExcludesNonDeterministicInputAttributes(t *testing.T) { numCapabilityPeers := 4 - callers, srvcs := testRemoteTargetServer(ctx, t, &commoncap.RemoteTargetConfig{RequestHashExcludedAttributes: []string{"signed_report.Signatures"}}, + callers, srvcs := testRemoteExecutableCapabilityServer(ctx, t, &commoncap.RemoteExecutableConfig{RequestHashExcludedAttributes: []string{"signed_report.Signatures"}}, &TestCapability{}, 10, 9, numCapabilityPeers, 3, 10*time.Minute) for idx, caller := range callers { @@ -56,12 +56,12 @@ func Test_Server_ExcludesNonDeterministicInputAttributes(t *testing.T) { closeServices(t, srvcs) } -func Test_Server_RespondsAfterSufficientRequests(t *testing.T) { +func Test_Server_Execute_RespondsAfterSufficientRequests(t *testing.T) { ctx := testutils.Context(t) numCapabilityPeers := 4 - callers, srvcs := testRemoteTargetServer(ctx, t, &commoncap.RemoteTargetConfig{}, &TestCapability{}, 10, 9, numCapabilityPeers, 3, 10*time.Minute) + callers, srvcs := testRemoteExecutableCapabilityServer(ctx, t, &commoncap.RemoteExecutableConfig{}, &TestCapability{}, 10, 9, numCapabilityPeers, 3, 10*time.Minute) for _, caller := range callers { _, err := caller.Execute(context.Background(), @@ -83,12 +83,95 @@ func Test_Server_RespondsAfterSufficientRequests(t *testing.T) { closeServices(t, srvcs) } +func Test_Server_RegisterToWorkflow_RespondsAfterSufficientRequests(t *testing.T) { + ctx := testutils.Context(t) + + numCapabilityPeers := 4 + + callers, srvcs := testRemoteExecutableCapabilityServer(ctx, t, &commoncap.RemoteExecutableConfig{}, &TestCapability{}, 10, 9, numCapabilityPeers, 3, 10*time.Minute) + + for _, caller := range callers { + err := caller.RegisterToWorkflow(context.Background(), commoncap.RegisterToWorkflowRequest{ + Metadata: commoncap.RegistrationMetadata{ + WorkflowID: workflowID1, + ReferenceID: stepReferenceID1, + WorkflowOwner: workflowOwnerID, + }, + }) + + require.NoError(t, err) + } + + for _, caller := range callers { + for i := 0; i < numCapabilityPeers; i++ { + msg := <-caller.receivedMessages + assert.Equal(t, remotetypes.Error_OK, msg.Error) + } + } + closeServices(t, srvcs) +} + +func Test_Server_RegisterToWorkflow_Error(t *testing.T) { + ctx := testutils.Context(t) + + numCapabilityPeers := 4 + + callers, srvcs := testRemoteExecutableCapabilityServer(ctx, t, &commoncap.RemoteExecutableConfig{}, &TestErrorCapability{}, 10, 9, numCapabilityPeers, 3, 10*time.Minute) + + for _, caller := range callers { + err := caller.RegisterToWorkflow(context.Background(), commoncap.RegisterToWorkflowRequest{ + Metadata: commoncap.RegistrationMetadata{ + WorkflowID: workflowID1, + ReferenceID: stepReferenceID1, + WorkflowOwner: workflowOwnerID, + }, + }) + + require.NoError(t, err) + } + + for _, caller := range callers { + for i := 0; i < numCapabilityPeers; i++ { + msg := <-caller.receivedMessages + assert.Equal(t, remotetypes.Error_INTERNAL_ERROR, msg.Error) + } + } + closeServices(t, srvcs) +} + +func Test_Server_UnregisterFromWorkflow_RespondsAfterSufficientRequests(t *testing.T) { + ctx := testutils.Context(t) + + numCapabilityPeers := 4 + + callers, srvcs := testRemoteExecutableCapabilityServer(ctx, t, &commoncap.RemoteExecutableConfig{}, &TestCapability{}, 10, 9, numCapabilityPeers, 3, 10*time.Minute) + + for _, caller := range callers { + err := caller.UnregisterFromWorkflow(context.Background(), commoncap.UnregisterFromWorkflowRequest{ + Metadata: commoncap.RegistrationMetadata{ + WorkflowID: workflowID1, + ReferenceID: stepReferenceID1, + WorkflowOwner: workflowOwnerID, + }, + }) + require.NoError(t, err) + } + + for _, caller := range callers { + for i := 0; i < numCapabilityPeers; i++ { + msg := <-caller.receivedMessages + assert.Equal(t, remotetypes.Error_OK, msg.Error) + } + } + closeServices(t, srvcs) +} + func Test_Server_InsufficientCallers(t *testing.T) { ctx := testutils.Context(t) numCapabilityPeers := 4 - callers, srvcs := testRemoteTargetServer(ctx, t, &commoncap.RemoteTargetConfig{}, &TestCapability{}, 10, 10, numCapabilityPeers, 3, 100*time.Millisecond) + callers, srvcs := testRemoteExecutableCapabilityServer(ctx, t, &commoncap.RemoteExecutableConfig{}, &TestCapability{}, 10, 10, numCapabilityPeers, 3, 100*time.Millisecond) for _, caller := range callers { _, err := caller.Execute(context.Background(), @@ -115,7 +198,7 @@ func Test_Server_CapabilityError(t *testing.T) { numCapabilityPeers := 4 - callers, srvcs := testRemoteTargetServer(ctx, t, &commoncap.RemoteTargetConfig{}, &TestErrorCapability{}, 10, 9, numCapabilityPeers, 3, 100*time.Millisecond) + callers, srvcs := testRemoteExecutableCapabilityServer(ctx, t, &commoncap.RemoteExecutableConfig{}, &TestErrorCapability{}, 10, 9, numCapabilityPeers, 3, 100*time.Millisecond) for _, caller := range callers { _, err := caller.Execute(context.Background(), @@ -137,9 +220,9 @@ func Test_Server_CapabilityError(t *testing.T) { closeServices(t, srvcs) } -func testRemoteTargetServer(ctx context.Context, t *testing.T, - config *commoncap.RemoteTargetConfig, - underlying commoncap.TargetCapability, +func testRemoteExecutableCapabilityServer(ctx context.Context, t *testing.T, + config *commoncap.RemoteExecutableConfig, + underlying commoncap.ExecutableCapability, numWorkflowPeers int, workflowDonF uint8, numCapabilityPeers int, capabilityDonF uint8, capabilityNodeResponseTimeout time.Duration) ([]*serverTestClient, []services.Service) { lggr := logger.TestLogger(t) @@ -189,7 +272,7 @@ func testRemoteTargetServer(ctx context.Context, t *testing.T, for i := 0; i < numCapabilityPeers; i++ { capabilityPeer := capabilityPeers[i] capabilityDispatcher := broker.NewDispatcherForNode(capabilityPeer) - capabilityNode := target.NewServer(config, capabilityPeer, underlying, capInfo, capDonInfo, workflowDONs, capabilityDispatcher, + capabilityNode := executable.NewServer(config, capabilityPeer, underlying, capInfo, capDonInfo, workflowDONs, capabilityDispatcher, capabilityNodeResponseTimeout, lggr) require.NoError(t, capabilityNode.Start(ctx)) broker.RegisterReceiverNode(capabilityPeer, capabilityNode) @@ -236,12 +319,60 @@ func (r *serverTestClient) Info(ctx context.Context) (commoncap.CapabilityInfo, panic("not implemented") } -func (r *serverTestClient) RegisterToWorkflow(ctx context.Context, request commoncap.RegisterToWorkflowRequest) error { - panic("not implemented") +func (r *serverTestClient) RegisterToWorkflow(ctx context.Context, req commoncap.RegisterToWorkflowRequest) error { + rawRequest, err := pb.MarshalRegisterToWorkflowRequest(req) + if err != nil { + return err + } + + messageID := remotetypes.MethodRegisterToWorkflow + ":" + req.Metadata.WorkflowID + + for _, node := range r.capabilityDonInfo.Members { + message := &remotetypes.MessageBody{ + CapabilityId: "capability-id", + CapabilityDonId: 1, + CallerDonId: 2, + Method: remotetypes.MethodRegisterToWorkflow, + Payload: rawRequest, + MessageId: []byte(messageID), + Sender: r.peerID[:], + Receiver: node[:], + } + + if err = r.dispatcher.Send(node, message); err != nil { + return err + } + } + + return nil } -func (r *serverTestClient) UnregisterFromWorkflow(ctx context.Context, request commoncap.UnregisterFromWorkflowRequest) error { - panic("not implemented") +func (r *serverTestClient) UnregisterFromWorkflow(ctx context.Context, req commoncap.UnregisterFromWorkflowRequest) error { + rawRequest, err := pb.MarshalUnregisterFromWorkflowRequest(req) + if err != nil { + return err + } + + messageID := remotetypes.MethodUnregisterFromWorkflow + ":" + req.Metadata.WorkflowID + + for _, node := range r.capabilityDonInfo.Members { + message := &remotetypes.MessageBody{ + CapabilityId: "capability-id", + CapabilityDonId: 1, + CallerDonId: 2, + Method: remotetypes.MethodUnregisterFromWorkflow, + Payload: rawRequest, + MessageId: []byte(messageID), + Sender: r.peerID[:], + Receiver: node[:], + } + + if err = r.dispatcher.Send(node, message); err != nil { + return err + } + } + + return nil } func (r *serverTestClient) Execute(ctx context.Context, req commoncap.CapabilityRequest) (<-chan commoncap.CapabilityResponse, error) { @@ -250,10 +381,7 @@ func (r *serverTestClient) Execute(ctx context.Context, req commoncap.Capability return nil, err } - messageID, err := target.GetMessageIDForRequest(req) - if err != nil { - return nil, err - } + messageID := remotetypes.MethodExecute + ":" + req.Metadata.WorkflowExecutionID for _, node := range r.capabilityDonInfo.Members { message := &remotetypes.MessageBody{ diff --git a/core/capabilities/remote/target/client_test.go b/core/capabilities/remote/target/client_test.go deleted file mode 100644 index 697cb6e383a..00000000000 --- a/core/capabilities/remote/target/client_test.go +++ /dev/null @@ -1,316 +0,0 @@ -package target_test - -import ( - "context" - "sync" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - commoncap "github.com/smartcontractkit/chainlink-common/pkg/capabilities" - "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" - "github.com/smartcontractkit/chainlink-common/pkg/services/servicetest" - "github.com/smartcontractkit/chainlink-common/pkg/values" - "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/target" - remotetypes "github.com/smartcontractkit/chainlink/v2/core/capabilities/remote/types" - "github.com/smartcontractkit/chainlink/v2/core/capabilities/transmission" - "github.com/smartcontractkit/chainlink/v2/core/internal/testutils" - "github.com/smartcontractkit/chainlink/v2/core/logger" - p2ptypes "github.com/smartcontractkit/chainlink/v2/core/services/p2p/types" -) - -const ( - workflowID1 = "15c631d295ef5e32deb99a10ee6804bc4af13855687559d7ff6552ac6dbb2ce0" - workflowExecutionID1 = "95ef5e32deb99a10ee6804bc4af13855687559d7ff6552ac6dbb2ce0abbadeed" -) - -func Test_Client_DonTopologies(t *testing.T) { - ctx := testutils.Context(t) - - transmissionSchedule, err := values.NewMap(map[string]any{ - "schedule": transmission.Schedule_OneAtATime, - "deltaStage": "10ms", - }) - require.NoError(t, err) - - responseTest := func(t *testing.T, response commoncap.CapabilityResponse, responseError error) { - require.NoError(t, responseError) - mp, err := response.Value.Unwrap() - require.NoError(t, err) - assert.Equal(t, "aValue1", mp.(map[string]any)["response"].(string)) - } - - capability := &TestCapability{} - - responseTimeOut := 10 * time.Minute - - testClient(ctx, t, 1, responseTimeOut, 1, 0, - capability, transmissionSchedule, responseTest) - - testClient(ctx, t, 10, responseTimeOut, 1, 0, - capability, transmissionSchedule, responseTest) - - testClient(ctx, t, 1, responseTimeOut, 10, 3, - capability, transmissionSchedule, responseTest) - - testClient(ctx, t, 10, responseTimeOut, 10, 3, - capability, transmissionSchedule, responseTest) - - testClient(ctx, t, 10, responseTimeOut, 10, 9, - capability, transmissionSchedule, responseTest) -} - -func Test_Client_TransmissionSchedules(t *testing.T) { - ctx := testutils.Context(t) - - responseTest := func(t *testing.T, response commoncap.CapabilityResponse, responseError error) { - require.NoError(t, responseError) - mp, err := response.Value.Unwrap() - require.NoError(t, err) - assert.Equal(t, "aValue1", mp.(map[string]any)["response"].(string)) - } - - capability := &TestCapability{} - - responseTimeOut := 10 * time.Minute - - transmissionSchedule, err := values.NewMap(map[string]any{ - "schedule": transmission.Schedule_OneAtATime, - "deltaStage": "10ms", - }) - require.NoError(t, err) - - testClient(ctx, t, 1, responseTimeOut, 1, 0, - capability, transmissionSchedule, responseTest) - testClient(ctx, t, 10, responseTimeOut, 10, 3, - capability, transmissionSchedule, responseTest) - - transmissionSchedule, err = values.NewMap(map[string]any{ - "schedule": transmission.Schedule_AllAtOnce, - "deltaStage": "10ms", - }) - require.NoError(t, err) - - testClient(ctx, t, 1, responseTimeOut, 1, 0, - capability, transmissionSchedule, responseTest) - testClient(ctx, t, 10, responseTimeOut, 10, 3, - capability, transmissionSchedule, responseTest) -} - -func Test_Client_TimesOutIfInsufficientCapabilityPeerResponses(t *testing.T) { - ctx := testutils.Context(t) - - responseTest := func(t *testing.T, response commoncap.CapabilityResponse, responseError error) { - assert.NotNil(t, responseError) - } - - capability := &TestCapability{} - - transmissionSchedule, err := values.NewMap(map[string]any{ - "schedule": transmission.Schedule_AllAtOnce, - "deltaStage": "10ms", - }) - require.NoError(t, err) - - // number of capability peers is less than F + 1 - - testClient(ctx, t, 10, 1*time.Second, 10, 11, - capability, transmissionSchedule, responseTest) -} - -func testClient(ctx context.Context, t *testing.T, numWorkflowPeers int, workflowNodeResponseTimeout time.Duration, - numCapabilityPeers int, capabilityDonF uint8, underlying commoncap.TargetCapability, transmissionSchedule *values.Map, - responseTest func(t *testing.T, responseCh commoncap.CapabilityResponse, responseError error)) { - lggr := logger.TestLogger(t) - - capabilityPeers := make([]p2ptypes.PeerID, numCapabilityPeers) - for i := 0; i < numCapabilityPeers; i++ { - capabilityPeers[i] = NewP2PPeerID(t) - } - - capDonInfo := commoncap.DON{ - ID: 1, - Members: capabilityPeers, - F: capabilityDonF, - } - - capInfo := commoncap.CapabilityInfo{ - ID: "cap_id@1.0.0", - CapabilityType: commoncap.CapabilityTypeTarget, - Description: "Remote Target", - DON: &capDonInfo, - } - - workflowPeers := make([]p2ptypes.PeerID, numWorkflowPeers) - for i := 0; i < numWorkflowPeers; i++ { - workflowPeers[i] = NewP2PPeerID(t) - } - - workflowDonInfo := commoncap.DON{ - Members: workflowPeers, - ID: 2, - } - - broker := newTestAsyncMessageBroker(t, 100) - - receivers := make([]remotetypes.Receiver, numCapabilityPeers) - for i := 0; i < numCapabilityPeers; i++ { - capabilityDispatcher := broker.NewDispatcherForNode(capabilityPeers[i]) - receiver := newTestServer(capabilityPeers[i], capabilityDispatcher, workflowDonInfo, underlying) - broker.RegisterReceiverNode(capabilityPeers[i], receiver) - receivers[i] = receiver - } - - callers := make([]commoncap.TargetCapability, numWorkflowPeers) - - for i := 0; i < numWorkflowPeers; i++ { - workflowPeerDispatcher := broker.NewDispatcherForNode(workflowPeers[i]) - caller := target.NewClient(capInfo, workflowDonInfo, workflowPeerDispatcher, workflowNodeResponseTimeout, lggr) - servicetest.Run(t, caller) - broker.RegisterReceiverNode(workflowPeers[i], caller) - callers[i] = caller - } - - servicetest.Run(t, broker) - - executeInputs, err := values.NewMap( - map[string]any{ - "executeValue1": "aValue1", - }, - ) - - require.NoError(t, err) - - wg := &sync.WaitGroup{} - wg.Add(len(callers)) - - // Fire off all the requests - for _, caller := range callers { - go func(caller commoncap.TargetCapability) { - defer wg.Done() - responseCh, err := caller.Execute(ctx, - commoncap.CapabilityRequest{ - Metadata: commoncap.RequestMetadata{ - WorkflowID: workflowID1, - WorkflowExecutionID: workflowExecutionID1, - }, - Config: transmissionSchedule, - Inputs: executeInputs, - }) - - responseTest(t, responseCh, err) - }(caller) - } - - wg.Wait() -} - -// Simple client that only responds once it has received a message from each workflow peer -type clientTestServer struct { - peerID p2ptypes.PeerID - dispatcher remotetypes.Dispatcher - workflowDonInfo commoncap.DON - messageIDToSenders map[string]map[p2ptypes.PeerID]bool - - targetCapability commoncap.TargetCapability - - mux sync.Mutex -} - -func newTestServer(peerID p2ptypes.PeerID, dispatcher remotetypes.Dispatcher, workflowDonInfo commoncap.DON, - targetCapability commoncap.TargetCapability) *clientTestServer { - return &clientTestServer{ - dispatcher: dispatcher, - workflowDonInfo: workflowDonInfo, - peerID: peerID, - messageIDToSenders: make(map[string]map[p2ptypes.PeerID]bool), - targetCapability: targetCapability, - } -} - -func (t *clientTestServer) Receive(_ context.Context, msg *remotetypes.MessageBody) { - t.mux.Lock() - defer t.mux.Unlock() - - sender := toPeerID(msg.Sender) - messageID, err := target.GetMessageID(msg) - if err != nil { - panic(err) - } - - if t.messageIDToSenders[messageID] == nil { - t.messageIDToSenders[messageID] = make(map[p2ptypes.PeerID]bool) - } - - sendersOfMessageID := t.messageIDToSenders[messageID] - if sendersOfMessageID[sender] { - panic("received duplicate message") - } - - sendersOfMessageID[sender] = true - - if len(t.messageIDToSenders[messageID]) == len(t.workflowDonInfo.Members) { - capabilityRequest, err := pb.UnmarshalCapabilityRequest(msg.Payload) - if err != nil { - panic(err) - } - - resp, responseErr := t.targetCapability.Execute(context.Background(), capabilityRequest) - - for receiver := range t.messageIDToSenders[messageID] { - var responseMsg = &remotetypes.MessageBody{ - CapabilityId: "cap_id@1.0.0", - CapabilityDonId: 1, - CallerDonId: t.workflowDonInfo.ID, - Method: remotetypes.MethodExecute, - MessageId: []byte(messageID), - Sender: t.peerID[:], - Receiver: receiver[:], - } - - if responseErr != nil { - responseMsg.Error = remotetypes.Error_INTERNAL_ERROR - } else { - payload, marshalErr := pb.MarshalCapabilityResponse(resp) - if marshalErr != nil { - panic(marshalErr) - } - responseMsg.Payload = payload - } - - err = t.dispatcher.Send(receiver, responseMsg) - if err != nil { - panic(err) - } - } - } -} - -type TestDispatcher struct { - sentMessagesCh chan *remotetypes.MessageBody - receiver remotetypes.Receiver -} - -func NewTestDispatcher() *TestDispatcher { - return &TestDispatcher{ - sentMessagesCh: make(chan *remotetypes.MessageBody, 1), - } -} - -func (t *TestDispatcher) SendToReceiver(msgBody *remotetypes.MessageBody) { - t.receiver.Receive(context.Background(), msgBody) -} - -func (t *TestDispatcher) SetReceiver(capabilityId string, donId string, receiver remotetypes.Receiver) error { - t.receiver = receiver - return nil -} - -func (t *TestDispatcher) RemoveReceiver(capabilityId string, donId string) {} - -func (t *TestDispatcher) Send(peerID p2ptypes.PeerID, msgBody *remotetypes.MessageBody) error { - t.sentMessagesCh <- msgBody - return nil -} diff --git a/core/capabilities/remote/types/types.go b/core/capabilities/remote/types/types.go index 54ec16f09f1..fefc9a9b5fe 100644 --- a/core/capabilities/remote/types/types.go +++ b/core/capabilities/remote/types/types.go @@ -13,10 +13,12 @@ import ( ) const ( - MethodRegisterTrigger = "RegisterTrigger" - MethodUnRegisterTrigger = "UnregisterTrigger" - MethodTriggerEvent = "TriggerEvent" - MethodExecute = "Execute" + MethodRegisterTrigger = "RegisterTrigger" + MethodUnRegisterTrigger = "UnregisterTrigger" + MethodTriggerEvent = "TriggerEvent" + MethodExecute = "Execute" + MethodRegisterToWorkflow = "RegisterToWorkflow" + MethodUnregisterFromWorkflow = "UnregisterFromWorkflow" ) type Dispatcher interface { diff --git a/core/capabilities/transmission/local_target_capability_test.go b/core/capabilities/transmission/local_target_capability_test.go index 67f22753bda..e1057ed3f8d 100644 --- a/core/capabilities/transmission/local_target_capability_test.go +++ b/core/capabilities/transmission/local_target_capability_test.go @@ -61,15 +61,15 @@ func TestScheduledExecutionStrategy_LocalDON(t *testing.T) { name: "position 1; oneAtATime", position: 1, schedule: "oneAtATime", - low: 100 * time.Millisecond, - high: 300 * time.Millisecond, + low: 300 * time.Millisecond, + high: 400 * time.Millisecond, }, { name: "position 2; oneAtATime", position: 2, schedule: "oneAtATime", - low: 300 * time.Millisecond, - high: 400 * time.Millisecond, + low: 100 * time.Millisecond, + high: 200 * time.Millisecond, }, { name: "position 3; oneAtATime", diff --git a/core/capabilities/transmission/transmission.go b/core/capabilities/transmission/transmission.go index 88ce0fa3edd..8dd90414fb9 100644 --- a/core/capabilities/transmission/transmission.go +++ b/core/capabilities/transmission/transmission.go @@ -27,7 +27,7 @@ type TransmissionConfig struct { DeltaStage time.Duration } -func extractTransmissionConfig(config *values.Map) (TransmissionConfig, error) { +func ExtractTransmissionConfig(config *values.Map) (TransmissionConfig, error) { var tc struct { DeltaStage string Schedule string @@ -37,6 +37,14 @@ func extractTransmissionConfig(config *values.Map) (TransmissionConfig, error) { return TransmissionConfig{}, fmt.Errorf("failed to unwrap tranmission config from value map: %w", err) } + // Default if no schedule and deltaStage is provided + if len(tc.Schedule) == 0 && len(tc.DeltaStage) == 0 { + return TransmissionConfig{ + Schedule: Schedule_AllAtOnce, + DeltaStage: 0, + }, nil + } + duration, err := time.ParseDuration(tc.DeltaStage) if err != nil { return TransmissionConfig{}, fmt.Errorf("failed to parse DeltaStage %s as duration: %w", tc.DeltaStage, err) @@ -51,21 +59,20 @@ func extractTransmissionConfig(config *values.Map) (TransmissionConfig, error) { // GetPeerIDToTransmissionDelay returns a map of PeerID to the time.Duration that the node with that PeerID should wait // before transmitting the capability request. If a node is not in the map, it should not transmit. func GetPeerIDToTransmissionDelay(donPeerIDs []types.PeerID, req capabilities.CapabilityRequest) (map[types.PeerID]time.Duration, error) { - tc, err := extractTransmissionConfig(req.Config) + tc, err := ExtractTransmissionConfig(req.Config) if err != nil { return nil, fmt.Errorf("failed to extract transmission config from request: %w", err) } - if err = validation.ValidateWorkflowOrExecutionID(req.Metadata.WorkflowID); err != nil { - return nil, fmt.Errorf("workflow ID is invalid: %w", err) + workflowExecutionID := req.Metadata.WorkflowExecutionID + if err := validation.ValidateWorkflowOrExecutionID(workflowExecutionID); err != nil { + return nil, fmt.Errorf("workflow or execution ID is invalid: %w", err) } - if err = validation.ValidateWorkflowOrExecutionID(req.Metadata.WorkflowExecutionID); err != nil { - return nil, fmt.Errorf("workflow execution ID is invalid: %w", err) - } - - transmissionID := req.Metadata.WorkflowID + req.Metadata.WorkflowExecutionID + return GetPeerIDToTransmissionDelaysForConfig(donPeerIDs, workflowExecutionID, tc) +} +func GetPeerIDToTransmissionDelaysForConfig(donPeerIDs []types.PeerID, transmissionID string, tc TransmissionConfig) (map[types.PeerID]time.Duration, error) { donMemberCount := len(donPeerIDs) key := transmissionScheduleSeed(transmissionID) schedule, err := createTransmissionSchedule(tc.Schedule, donMemberCount) diff --git a/core/capabilities/transmission/transmission_test.go b/core/capabilities/transmission/transmission_test.go index aaa367e78cf..1cb91c364a5 100644 --- a/core/capabilities/transmission/transmission_test.go +++ b/core/capabilities/transmission/transmission_test.go @@ -38,10 +38,10 @@ func Test_GetPeerIDToTransmissionDelay(t *testing.T) { "100ms", "15c631d295ef5e32deb99a10ee6804bc4af13855687559d7ff6552ac6dbb2ce0", map[string]time.Duration{ - "one": 300 * time.Millisecond, - "two": 0 * time.Millisecond, - "three": 100 * time.Millisecond, - "four": 200 * time.Millisecond, + "one": 100 * time.Millisecond, + "two": 200 * time.Millisecond, + "three": 300 * time.Millisecond, + "four": 0 * time.Millisecond, }, }, @@ -66,10 +66,10 @@ func Test_GetPeerIDToTransmissionDelay(t *testing.T) { "100ms", "16c631d295ef5e32deb99a10ee6804bc4af13855687559d7ff6552ac6dbb2ce1", map[string]time.Duration{ - "one": 300 * time.Millisecond, + "one": 200 * time.Millisecond, "two": 100 * time.Millisecond, - "three": 200 * time.Millisecond, - "four": 0 * time.Millisecond, + "three": 0 * time.Millisecond, + "four": 300 * time.Millisecond, }, }, } diff --git a/core/scripts/go.mod b/core/scripts/go.mod index caf3d5e68e6..028eb9fac9e 100644 --- a/core/scripts/go.mod +++ b/core/scripts/go.mod @@ -24,7 +24,7 @@ require ( github.com/prometheus/client_golang v1.20.5 github.com/shopspring/decimal v1.4.0 github.com/smartcontractkit/chainlink-automation v0.8.1 - github.com/smartcontractkit/chainlink-common v0.3.1-0.20241113142256-8a7a997a0371 + github.com/smartcontractkit/chainlink-common v0.3.1-0.20241114134822-aadff98ef068 github.com/smartcontractkit/chainlink/deployment v0.0.0-00010101000000-000000000000 github.com/smartcontractkit/chainlink/v2 v2.14.0-mercury-20240807.0.20241106193309-5560cd76211a github.com/smartcontractkit/libocr v0.0.0-20241007185508-adbe57025f12 diff --git a/core/scripts/go.sum b/core/scripts/go.sum index 362d28f28c3..bdf3511c482 100644 --- a/core/scripts/go.sum +++ b/core/scripts/go.sum @@ -1094,8 +1094,8 @@ github.com/smartcontractkit/chainlink-automation v0.8.1 h1:sTc9LKpBvcKPc1JDYAmgB github.com/smartcontractkit/chainlink-automation v0.8.1/go.mod h1:Iij36PvWZ6blrdC5A/nrQUBuf3MH3JvsBB9sSyc9W08= github.com/smartcontractkit/chainlink-ccip v0.0.0-20241112095015-3e85d9f1898b h1:4kmZtaQ4fXwduHnw9xk5VmiIOW4nHg/Mx6iidlZJt5o= github.com/smartcontractkit/chainlink-ccip v0.0.0-20241112095015-3e85d9f1898b/go.mod h1:4adKaHNaxFsRvV/lYfqtbsWyyvIPUMLR0FdOJN/ljis= -github.com/smartcontractkit/chainlink-common v0.3.1-0.20241113142256-8a7a997a0371 h1:vnNqMaAvheZgR8IDMGw0QIV1Qen3XTh7IChwW40SNfU= -github.com/smartcontractkit/chainlink-common v0.3.1-0.20241113142256-8a7a997a0371/go.mod h1:ny87uTW6hLjCTLiBqBRNFEhETSXhHWevYlPclT5lSco= +github.com/smartcontractkit/chainlink-common v0.3.1-0.20241114134822-aadff98ef068 h1:2llRW4Tn9W/EZp2XvXclQ9IjeTBwwxVPrrqaerX+vCE= +github.com/smartcontractkit/chainlink-common v0.3.1-0.20241114134822-aadff98ef068/go.mod h1:ny87uTW6hLjCTLiBqBRNFEhETSXhHWevYlPclT5lSco= github.com/smartcontractkit/chainlink-cosmos v0.5.2-0.20241017133723-5277829bd53f h1:BwrIaQIx5Iy6eT+DfLhFfK2XqjxRm74mVdlX8gbu4dw= github.com/smartcontractkit/chainlink-cosmos v0.5.2-0.20241017133723-5277829bd53f/go.mod h1:wHtwSR3F1CQSJJZDQKuqaqFYnvkT+kMyget7dl8Clvo= github.com/smartcontractkit/chainlink-data-streams v0.1.1-0.20241018134907-a00ba3729b5e h1:JiETqdNM0bktAUGMc62COwXIaw3rR3M77Me6bBLG0Fg= diff --git a/core/services/workflows/engine.go b/core/services/workflows/engine.go index 69c36c1c174..b958e171c0c 100644 --- a/core/services/workflows/engine.go +++ b/core/services/workflows/engine.go @@ -285,6 +285,7 @@ func (e *Engine) initializeCapability(ctx context.Context, step *step) error { Metadata: capabilities.RegistrationMetadata{ WorkflowID: e.workflow.id, WorkflowOwner: e.workflow.owner, + ReferenceID: step.Vertex.Ref, }, Config: stepConfig, } @@ -1112,6 +1113,7 @@ func (e *Engine) Close() error { Metadata: capabilities.RegistrationMetadata{ WorkflowID: e.workflow.id, WorkflowOwner: e.workflow.owner, + ReferenceID: s.Vertex.Ref, }, Config: stepConfig, } @@ -1119,7 +1121,7 @@ func (e *Engine) Close() error { innerErr := s.capability.UnregisterFromWorkflow(ctx, reg) if innerErr != nil { return &workflowError{err: innerErr, - reason: fmt.Sprintf("failed to unregister capability from workflow: %+v", reg), + reason: fmt.Sprintf("failed to unregister capability from workflow: %+v", reg), labels: map[string]string{ platform.KeyWorkflowID: e.workflow.id, platform.KeyStepID: s.ID, diff --git a/deployment/go.mod b/deployment/go.mod index 19720794189..73e23aa83f3 100644 --- a/deployment/go.mod +++ b/deployment/go.mod @@ -23,7 +23,7 @@ require ( github.com/smartcontractkit/ccip-owner-contracts v0.0.0-20240926212305-a6deabdfce86 github.com/smartcontractkit/chain-selectors v1.0.29 github.com/smartcontractkit/chainlink-ccip v0.0.0-20241112095015-3e85d9f1898b - github.com/smartcontractkit/chainlink-common v0.3.1-0.20241113142256-8a7a997a0371 + github.com/smartcontractkit/chainlink-common v0.3.1-0.20241114134822-aadff98ef068 github.com/smartcontractkit/chainlink-protos/job-distributor v0.4.0 github.com/smartcontractkit/chainlink-testing-framework/lib v1.50.13 github.com/smartcontractkit/chainlink/v2 v2.0.0-00010101000000-000000000000 diff --git a/deployment/go.sum b/deployment/go.sum index ce9bf9e0b7f..6c372de39f2 100644 --- a/deployment/go.sum +++ b/deployment/go.sum @@ -1384,8 +1384,8 @@ github.com/smartcontractkit/chainlink-automation v0.8.1 h1:sTc9LKpBvcKPc1JDYAmgB github.com/smartcontractkit/chainlink-automation v0.8.1/go.mod h1:Iij36PvWZ6blrdC5A/nrQUBuf3MH3JvsBB9sSyc9W08= github.com/smartcontractkit/chainlink-ccip v0.0.0-20241112095015-3e85d9f1898b h1:4kmZtaQ4fXwduHnw9xk5VmiIOW4nHg/Mx6iidlZJt5o= github.com/smartcontractkit/chainlink-ccip v0.0.0-20241112095015-3e85d9f1898b/go.mod h1:4adKaHNaxFsRvV/lYfqtbsWyyvIPUMLR0FdOJN/ljis= -github.com/smartcontractkit/chainlink-common v0.3.1-0.20241113142256-8a7a997a0371 h1:vnNqMaAvheZgR8IDMGw0QIV1Qen3XTh7IChwW40SNfU= -github.com/smartcontractkit/chainlink-common v0.3.1-0.20241113142256-8a7a997a0371/go.mod h1:ny87uTW6hLjCTLiBqBRNFEhETSXhHWevYlPclT5lSco= +github.com/smartcontractkit/chainlink-common v0.3.1-0.20241114134822-aadff98ef068 h1:2llRW4Tn9W/EZp2XvXclQ9IjeTBwwxVPrrqaerX+vCE= +github.com/smartcontractkit/chainlink-common v0.3.1-0.20241114134822-aadff98ef068/go.mod h1:ny87uTW6hLjCTLiBqBRNFEhETSXhHWevYlPclT5lSco= github.com/smartcontractkit/chainlink-cosmos v0.5.2-0.20241017133723-5277829bd53f h1:BwrIaQIx5Iy6eT+DfLhFfK2XqjxRm74mVdlX8gbu4dw= github.com/smartcontractkit/chainlink-cosmos v0.5.2-0.20241017133723-5277829bd53f/go.mod h1:wHtwSR3F1CQSJJZDQKuqaqFYnvkT+kMyget7dl8Clvo= github.com/smartcontractkit/chainlink-data-streams v0.1.1-0.20241018134907-a00ba3729b5e h1:JiETqdNM0bktAUGMc62COwXIaw3rR3M77Me6bBLG0Fg= diff --git a/go.mod b/go.mod index 2b6f03333c0..d9a65c16063 100644 --- a/go.mod +++ b/go.mod @@ -77,7 +77,7 @@ require ( github.com/smartcontractkit/chain-selectors v1.0.29 github.com/smartcontractkit/chainlink-automation v0.8.1 github.com/smartcontractkit/chainlink-ccip v0.0.0-20241112095015-3e85d9f1898b - github.com/smartcontractkit/chainlink-common v0.3.1-0.20241113142256-8a7a997a0371 + github.com/smartcontractkit/chainlink-common v0.3.1-0.20241114134822-aadff98ef068 github.com/smartcontractkit/chainlink-cosmos v0.5.2-0.20241017133723-5277829bd53f github.com/smartcontractkit/chainlink-data-streams v0.1.1-0.20241018134907-a00ba3729b5e github.com/smartcontractkit/chainlink-feeds v0.1.1 diff --git a/go.sum b/go.sum index 13217384ff6..885518d69aa 100644 --- a/go.sum +++ b/go.sum @@ -1078,8 +1078,8 @@ github.com/smartcontractkit/chainlink-automation v0.8.1 h1:sTc9LKpBvcKPc1JDYAmgB github.com/smartcontractkit/chainlink-automation v0.8.1/go.mod h1:Iij36PvWZ6blrdC5A/nrQUBuf3MH3JvsBB9sSyc9W08= github.com/smartcontractkit/chainlink-ccip v0.0.0-20241112095015-3e85d9f1898b h1:4kmZtaQ4fXwduHnw9xk5VmiIOW4nHg/Mx6iidlZJt5o= github.com/smartcontractkit/chainlink-ccip v0.0.0-20241112095015-3e85d9f1898b/go.mod h1:4adKaHNaxFsRvV/lYfqtbsWyyvIPUMLR0FdOJN/ljis= -github.com/smartcontractkit/chainlink-common v0.3.1-0.20241113142256-8a7a997a0371 h1:vnNqMaAvheZgR8IDMGw0QIV1Qen3XTh7IChwW40SNfU= -github.com/smartcontractkit/chainlink-common v0.3.1-0.20241113142256-8a7a997a0371/go.mod h1:ny87uTW6hLjCTLiBqBRNFEhETSXhHWevYlPclT5lSco= +github.com/smartcontractkit/chainlink-common v0.3.1-0.20241114134822-aadff98ef068 h1:2llRW4Tn9W/EZp2XvXclQ9IjeTBwwxVPrrqaerX+vCE= +github.com/smartcontractkit/chainlink-common v0.3.1-0.20241114134822-aadff98ef068/go.mod h1:ny87uTW6hLjCTLiBqBRNFEhETSXhHWevYlPclT5lSco= github.com/smartcontractkit/chainlink-cosmos v0.5.2-0.20241017133723-5277829bd53f h1:BwrIaQIx5Iy6eT+DfLhFfK2XqjxRm74mVdlX8gbu4dw= github.com/smartcontractkit/chainlink-cosmos v0.5.2-0.20241017133723-5277829bd53f/go.mod h1:wHtwSR3F1CQSJJZDQKuqaqFYnvkT+kMyget7dl8Clvo= github.com/smartcontractkit/chainlink-data-streams v0.1.1-0.20241018134907-a00ba3729b5e h1:JiETqdNM0bktAUGMc62COwXIaw3rR3M77Me6bBLG0Fg= diff --git a/integration-tests/go.mod b/integration-tests/go.mod index aba17e10397..b5948028f32 100644 --- a/integration-tests/go.mod +++ b/integration-tests/go.mod @@ -37,7 +37,7 @@ require ( github.com/smartcontractkit/chain-selectors v1.0.29 github.com/smartcontractkit/chainlink-automation v0.8.1 github.com/smartcontractkit/chainlink-ccip v0.0.0-20241112095015-3e85d9f1898b - github.com/smartcontractkit/chainlink-common v0.3.1-0.20241113142256-8a7a997a0371 + github.com/smartcontractkit/chainlink-common v0.3.1-0.20241114134822-aadff98ef068 github.com/smartcontractkit/chainlink-protos/job-distributor v0.4.0 github.com/smartcontractkit/chainlink-testing-framework/havoc v1.50.2 github.com/smartcontractkit/chainlink-testing-framework/lib v1.50.13 diff --git a/integration-tests/go.sum b/integration-tests/go.sum index 5e6793bbb0f..5a4bcb648cd 100644 --- a/integration-tests/go.sum +++ b/integration-tests/go.sum @@ -1405,8 +1405,8 @@ github.com/smartcontractkit/chainlink-automation v0.8.1 h1:sTc9LKpBvcKPc1JDYAmgB github.com/smartcontractkit/chainlink-automation v0.8.1/go.mod h1:Iij36PvWZ6blrdC5A/nrQUBuf3MH3JvsBB9sSyc9W08= github.com/smartcontractkit/chainlink-ccip v0.0.0-20241112095015-3e85d9f1898b h1:4kmZtaQ4fXwduHnw9xk5VmiIOW4nHg/Mx6iidlZJt5o= github.com/smartcontractkit/chainlink-ccip v0.0.0-20241112095015-3e85d9f1898b/go.mod h1:4adKaHNaxFsRvV/lYfqtbsWyyvIPUMLR0FdOJN/ljis= -github.com/smartcontractkit/chainlink-common v0.3.1-0.20241113142256-8a7a997a0371 h1:vnNqMaAvheZgR8IDMGw0QIV1Qen3XTh7IChwW40SNfU= -github.com/smartcontractkit/chainlink-common v0.3.1-0.20241113142256-8a7a997a0371/go.mod h1:ny87uTW6hLjCTLiBqBRNFEhETSXhHWevYlPclT5lSco= +github.com/smartcontractkit/chainlink-common v0.3.1-0.20241114134822-aadff98ef068 h1:2llRW4Tn9W/EZp2XvXclQ9IjeTBwwxVPrrqaerX+vCE= +github.com/smartcontractkit/chainlink-common v0.3.1-0.20241114134822-aadff98ef068/go.mod h1:ny87uTW6hLjCTLiBqBRNFEhETSXhHWevYlPclT5lSco= github.com/smartcontractkit/chainlink-cosmos v0.5.2-0.20241017133723-5277829bd53f h1:BwrIaQIx5Iy6eT+DfLhFfK2XqjxRm74mVdlX8gbu4dw= github.com/smartcontractkit/chainlink-cosmos v0.5.2-0.20241017133723-5277829bd53f/go.mod h1:wHtwSR3F1CQSJJZDQKuqaqFYnvkT+kMyget7dl8Clvo= github.com/smartcontractkit/chainlink-data-streams v0.1.1-0.20241018134907-a00ba3729b5e h1:JiETqdNM0bktAUGMc62COwXIaw3rR3M77Me6bBLG0Fg= diff --git a/integration-tests/load/go.mod b/integration-tests/load/go.mod index c89baf21bd9..7398abc5af9 100644 --- a/integration-tests/load/go.mod +++ b/integration-tests/load/go.mod @@ -17,7 +17,7 @@ require ( github.com/pkg/errors v0.9.1 github.com/rs/zerolog v1.33.0 github.com/slack-go/slack v0.15.0 - github.com/smartcontractkit/chainlink-common v0.3.1-0.20241113142256-8a7a997a0371 + github.com/smartcontractkit/chainlink-common v0.3.1-0.20241114134822-aadff98ef068 github.com/smartcontractkit/chainlink-testing-framework/lib v1.50.13 github.com/smartcontractkit/chainlink-testing-framework/seth v1.50.5 github.com/smartcontractkit/chainlink-testing-framework/wasp v1.50.2 diff --git a/integration-tests/load/go.sum b/integration-tests/load/go.sum index f2c309ea33a..6ca863f3d08 100644 --- a/integration-tests/load/go.sum +++ b/integration-tests/load/go.sum @@ -1394,8 +1394,8 @@ github.com/smartcontractkit/chainlink-automation v0.8.1 h1:sTc9LKpBvcKPc1JDYAmgB github.com/smartcontractkit/chainlink-automation v0.8.1/go.mod h1:Iij36PvWZ6blrdC5A/nrQUBuf3MH3JvsBB9sSyc9W08= github.com/smartcontractkit/chainlink-ccip v0.0.0-20241112095015-3e85d9f1898b h1:4kmZtaQ4fXwduHnw9xk5VmiIOW4nHg/Mx6iidlZJt5o= github.com/smartcontractkit/chainlink-ccip v0.0.0-20241112095015-3e85d9f1898b/go.mod h1:4adKaHNaxFsRvV/lYfqtbsWyyvIPUMLR0FdOJN/ljis= -github.com/smartcontractkit/chainlink-common v0.3.1-0.20241113142256-8a7a997a0371 h1:vnNqMaAvheZgR8IDMGw0QIV1Qen3XTh7IChwW40SNfU= -github.com/smartcontractkit/chainlink-common v0.3.1-0.20241113142256-8a7a997a0371/go.mod h1:ny87uTW6hLjCTLiBqBRNFEhETSXhHWevYlPclT5lSco= +github.com/smartcontractkit/chainlink-common v0.3.1-0.20241114134822-aadff98ef068 h1:2llRW4Tn9W/EZp2XvXclQ9IjeTBwwxVPrrqaerX+vCE= +github.com/smartcontractkit/chainlink-common v0.3.1-0.20241114134822-aadff98ef068/go.mod h1:ny87uTW6hLjCTLiBqBRNFEhETSXhHWevYlPclT5lSco= github.com/smartcontractkit/chainlink-cosmos v0.5.2-0.20241017133723-5277829bd53f h1:BwrIaQIx5Iy6eT+DfLhFfK2XqjxRm74mVdlX8gbu4dw= github.com/smartcontractkit/chainlink-cosmos v0.5.2-0.20241017133723-5277829bd53f/go.mod h1:wHtwSR3F1CQSJJZDQKuqaqFYnvkT+kMyget7dl8Clvo= github.com/smartcontractkit/chainlink-data-streams v0.1.1-0.20241018134907-a00ba3729b5e h1:JiETqdNM0bktAUGMc62COwXIaw3rR3M77Me6bBLG0Fg= From 1d03154a18499fe1511b67ca2964c6cce2177eb0 Mon Sep 17 00:00:00 2001 From: Erik Burton Date: Thu, 14 Nov 2024 14:11:32 -0800 Subject: [PATCH 06/13] feat: delete merge queue caches on pr merge (#15234) * feat: delete merge queue caches on pr merge * fix: use env, rename variables --- .github/workflows/delete-caches.yml | 51 ++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/.github/workflows/delete-caches.yml b/.github/workflows/delete-caches.yml index 870b6b6d086..aaba0dda623 100644 --- a/.github/workflows/delete-caches.yml +++ b/.github/workflows/delete-caches.yml @@ -16,27 +16,48 @@ jobs: # See also: https://docs.github.com/en/rest/actions/cache?apiVersion=2022-11-28#delete-a-github-actions-cache-for-a-repository-using-a-cache-id actions: write contents: read + env: + REPO: ${{ github.repository }} + PR_NUMBER: ${{ github.event.pull_request.number }} steps: - name: Check out code uses: actions/checkout@v4.1.2 - - name: Cleanup Branch Caches - run: | - gh extension install actions/gh-actions-cache - - REPO=${{ github.repository }} - BRANCH=refs/pull/${{ github.event.pull_request.number }}/merge + - name: Setup gh-actions-cache extension + run: gh extension install actions/gh-actions-cache - echo "Fetching list of cache key" - cacheKeysForPR=$(gh actions-cache list -R $REPO -B $BRANCH | cut -f 1 ) + - name: Retrieve Trunk SHA + id: get-sha + run: | + SHA=$(gh pr view -R $REPO $PR_NUMBER --json mergeCommit --jq .mergeCommit.oid) + echo "sha=$SHA" >> $GITHUB_OUTPUT - ## Setting this to not fail the workflow while deleting cache keys. + - name: Cleanup Caches + env: + TRUNK_SHA: ${{ steps.get-sha.outputs.sha }} + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | set +e - echo "Deleting caches..." - for cacheKey in $cacheKeysForPR - do - gh actions-cache delete $cacheKey -R $REPO -B $BRANCH --confirm + + PR_BRANCH=refs/pull/$PR_NUMBER/merge + echo "Fetching list of cache keys for the PR branch ($PR_BRANCH)" + PR_CACHE_KEYS=$(gh actions-cache list -R $REPO -B $PR_BRANCH | cut -f 1) + + echo "Deleting caches for PR branch ($PR_BRANCH)..." + for CACHE_KEY in $PR_CACHE_KEYS; do + gh actions-cache delete $CACHE_KEY -R $REPO -B $PR_BRANCH --confirm done + + if [[ -n "$TRUNK_SHA" ]]; then + echo "Found corresponding merge commit $TRUNK_SHA" + QUEUE_BRANCH="gh-readonly-queue/develop/pr-${PR_NUMBER}-${TRUNK_SHA}" + echo "Fetching list of cache keys for the merge queue branch ($QUEUE_BRANCH)" + QUEUE_CACHE_KEYS=$(gh actions-cache list -R $REPO -B $QUEUE_BRANCH | cut -f 1) + + echo "Deleting caches for merge queue branch ($QUEUE_BRANCH)..." + for CACHE_KEY in $QUEUE_CACHE_KEYS; do + gh actions-cache delete $CACHE_KEY -R $REPO -B $QUEUE_BRANCH --confirm + done + fi + echo "Done" - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file From 0240fb8750640619a0862903c6ad5e94c110b8e3 Mon Sep 17 00:00:00 2001 From: Erik Burton Date: Thu, 14 Nov 2024 14:29:10 -0800 Subject: [PATCH 07/13] chore: update goreleaser github action dependencies (#15251) --- .github/actions/goreleaser-build-sign-publish/action.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/actions/goreleaser-build-sign-publish/action.yml b/.github/actions/goreleaser-build-sign-publish/action.yml index 0a70cc479c9..c57bff91488 100644 --- a/.github/actions/goreleaser-build-sign-publish/action.yml +++ b/.github/actions/goreleaser-build-sign-publish/action.yml @@ -33,14 +33,14 @@ runs: name: Set up QEMU uses: docker/setup-qemu-action@49b3bc8e6bdd4a60e6116a5414239cba5943d3cf # v3.2.0 - name: Setup docker buildx - uses: docker/setup-buildx-action@2b51285047da1547ffb1b2203d8be4c0af6b1f20 # v3.2.0 + uses: docker/setup-buildx-action@c47758b77c9736f4b2ef4073d4d51994fabfe349 # v3.7.0 - name: Set up Go uses: ./.github/actions/setup-go with: go-version-file: 'go.mod' only-modules: 'true' - name: Setup goreleaser - uses: goreleaser/goreleaser-action@286f3b13b1b49da4ac219696163fb8c1c93e1200 # v6.0.0 + uses: goreleaser/goreleaser-action@9ed2f89a662bf1735a48bc8557fd212fa902bebf # v6.1.0 with: distribution: goreleaser-pro install-only: true @@ -49,12 +49,12 @@ runs: GORELEASER_KEY: ${{ inputs.goreleaser-key }} - name: Login to docker registry - uses: docker/login-action@e92390c5fb421da1463c202d546fed0ec5c39f20 # v3.1.0 + uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0 with: registry: ${{ inputs.docker-registry }} - name: Install syft - uses: anchore/sbom-action/download-syft@61119d458adab75f756bc0b9e4bde25725f86a7a # v0.17.2 + uses: anchore/sbom-action/download-syft@fc46e51fd3cb168ffb36c6d1915723c47db58abb # v0.17.7 - name: Run goreleaser release shell: bash From 1a80fec3917b7b96d95d85dee2ccdc17f88f27ca Mon Sep 17 00:00:00 2001 From: Bolek <1416262+bolekk@users.noreply.github.com> Date: Thu, 14 Nov 2024 15:21:15 -0800 Subject: [PATCH 08/13] Prune workfllw DB entries (#15226) --- core/services/chainlink/application.go | 1 + core/services/workflows/delegate.go | 1 + core/services/workflows/store/store_db.go | 72 +++++++++++++++++++++-- core/web/testdata/body/health.html | 4 ++ core/web/testdata/body/health.json | 9 +++ core/web/testdata/body/health.txt | 1 + testdata/scripts/health/default.txtar | 10 ++++ testdata/scripts/health/multi-chain.txtar | 10 ++++ 8 files changed, 104 insertions(+), 4 deletions(-) diff --git a/core/services/chainlink/application.go b/core/services/chainlink/application.go index 0b2352f67d4..abbe9dad9ab 100644 --- a/core/services/chainlink/application.go +++ b/core/services/chainlink/application.go @@ -400,6 +400,7 @@ func NewApplication(opts ApplicationOpts) (Application, error) { streamRegistry = streams.NewRegistry(globalLogger, pipelineRunner) workflowORM = workflowstore.NewDBStore(opts.DS, globalLogger, clockwork.NewRealClock()) ) + srvcs = append(srvcs, workflowORM) promReporter := headreporter.NewPrometheusReporter(opts.DS, legacyEVMChains) chainIDs := make([]*big.Int, legacyEVMChains.Len()) diff --git a/core/services/workflows/delegate.go b/core/services/workflows/delegate.go index 72aff3033d0..1db26729ca6 100644 --- a/core/services/workflows/delegate.go +++ b/core/services/workflows/delegate.go @@ -75,6 +75,7 @@ func (d *Delegate) ServicesForSpec(ctx context.Context, spec job.Job) ([]job.Ser if err != nil { return nil, err } + d.logger.Infow("Creating Workflow Engine for workflow spec", "workflowID", spec.WorkflowSpec.WorkflowID, "workflowOwner", spec.WorkflowSpec.WorkflowOwner, "workflowName", spec.WorkflowSpec.WorkflowName, "jobName", spec.Name) return []job.ServiceCtx{engine}, nil } diff --git a/core/services/workflows/store/store_db.go b/core/services/workflows/store/store_db.go index 926dd06d09b..f15a6928e7e 100644 --- a/core/services/workflows/store/store_db.go +++ b/core/services/workflows/store/store_db.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "sync" "time" "google.golang.org/protobuf/proto" @@ -15,17 +16,31 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/values" valuespb "github.com/smartcontractkit/chainlink-common/pkg/values/pb" + commonservices "github.com/smartcontractkit/chainlink-common/pkg/services" "github.com/smartcontractkit/chainlink/v2/core/logger" + "github.com/smartcontractkit/chainlink/v2/core/services" +) + +const ( + defaultPruneFrequencySec = 20 + defaultPruneTimeoutSec = 60 + defaultPruneRecordAgeHours = 3 + defaultPruneBatchSize = 3000 ) // `DBStore` is a postgres-backed // data store that persists workflow progress. type DBStore struct { - lggr logger.Logger - db sqlutil.DataSource - clock clockwork.Clock + commonservices.StateMachine + lggr logger.Logger + db sqlutil.DataSource + shutdownWaitGroup sync.WaitGroup + chStop commonservices.StopChan + clock clockwork.Clock } +var _ services.ServiceCtx = (*DBStore)(nil) + // `workflowExecutionRow` describes a row // of the `workflow_executions` table type workflowExecutionRow struct { @@ -70,6 +85,47 @@ type workflowExecutionWithStep struct { WEFinishedAt *time.Time `db:"we_finished_at"` } +func (d *DBStore) Start(context.Context) error { + return d.StartOnce("DBStore", func() error { + d.shutdownWaitGroup.Add(1) + go d.pruneDBEntries() + return nil + }) +} + +func (d *DBStore) Close() error { + return d.StopOnce("DBStore", func() error { + close(d.chStop) + d.shutdownWaitGroup.Wait() + return nil + }) +} + +func (d *DBStore) pruneDBEntries() { + defer d.shutdownWaitGroup.Done() + ticker := time.NewTicker(defaultPruneFrequencySec * time.Second) + defer ticker.Stop() + for { + select { + case <-d.chStop: + return + case <-ticker.C: + ctx, cancel := d.chStop.CtxWithTimeout(defaultPruneTimeoutSec * time.Second) + err := sqlutil.TransactDataSource(ctx, d.db, nil, func(tx sqlutil.DataSource) error { + stmt := fmt.Sprintf("DELETE FROM workflow_executions WHERE (id) IN (SELECT id FROM workflow_executions WHERE (created_at < now() - interval '%d hours') LIMIT %d);", defaultPruneRecordAgeHours, defaultPruneBatchSize) + _, err := tx.ExecContext(ctx, stmt) + return err + }) + if err != nil { + d.lggr.Errorw("Failed to prune workflow_executions", "err", err) + } else { + d.lggr.Infow("Pruned oldest workflow_executions", "batchSize", defaultPruneBatchSize, "ageLimitHours", defaultPruneRecordAgeHours) + } + cancel() + } + } +} + // `UpdateStatus` updates the status of the given workflow execution func (d *DBStore) UpdateStatus(ctx context.Context, executionID string, status string) error { sql := `UPDATE workflow_executions SET status = $1, updated_at = $2 WHERE id = $3` @@ -407,5 +463,13 @@ func (d *DBStore) GetUnfinished(ctx context.Context, workflowID string, offset, } func NewDBStore(ds sqlutil.DataSource, lggr logger.Logger, clock clockwork.Clock) *DBStore { - return &DBStore{db: ds, lggr: lggr.Named("WorkflowDBStore"), clock: clock} + return &DBStore{db: ds, lggr: lggr.Named("WorkflowDBStore"), clock: clock, chStop: make(chan struct{})} +} + +func (d *DBStore) HealthReport() map[string]error { + return map[string]error{d.Name(): d.Healthy()} +} + +func (d *DBStore) Name() string { + return d.lggr.Name() } diff --git a/core/web/testdata/body/health.html b/core/web/testdata/body/health.html index 2bf427f5e00..4692d452a5b 100644 --- a/core/web/testdata/body/health.html +++ b/core/web/testdata/body/health.html @@ -156,3 +156,7 @@
TelemetryManager
+
+ WorkflowDBStore +
+ diff --git a/core/web/testdata/body/health.json b/core/web/testdata/body/health.json index d573e0bd5fc..8c4c3b312de 100644 --- a/core/web/testdata/body/health.json +++ b/core/web/testdata/body/health.json @@ -287,6 +287,15 @@ "status": "passing", "output": "" } + }, + { + "type": "checks", + "id": "WorkflowDBStore", + "attributes": { + "name": "WorkflowDBStore", + "status": "passing", + "output": "" + } } ] } diff --git a/core/web/testdata/body/health.txt b/core/web/testdata/body/health.txt index fde038dfc63..a098f906146 100644 --- a/core/web/testdata/body/health.txt +++ b/core/web/testdata/body/health.txt @@ -31,3 +31,4 @@ ok StarkNet.Baz.Chain ok StarkNet.Baz.Relayer ok StarkNet.Baz.Txm ok TelemetryManager +ok WorkflowDBStore \ No newline at end of file diff --git a/testdata/scripts/health/default.txtar b/testdata/scripts/health/default.txtar index a7db2308e35..73b82bc7e39 100644 --- a/testdata/scripts/health/default.txtar +++ b/testdata/scripts/health/default.txtar @@ -41,6 +41,7 @@ ok PipelineRunner ok PipelineRunner.BridgeCache ok RetirementReportCache ok TelemetryManager +ok WorkflowDBStore -- out.json -- { @@ -134,6 +135,15 @@ ok TelemetryManager "status": "passing", "output": "" } + }, + { + "type": "checks", + "id": "WorkflowDBStore", + "attributes": { + "name": "WorkflowDBStore", + "status": "passing", + "output": "" + } } ] } diff --git a/testdata/scripts/health/multi-chain.txtar b/testdata/scripts/health/multi-chain.txtar index f53bbfebf8c..d3a0caf67b5 100644 --- a/testdata/scripts/health/multi-chain.txtar +++ b/testdata/scripts/health/multi-chain.txtar @@ -101,6 +101,7 @@ ok StarkNet.Baz.Chain ok StarkNet.Baz.Relayer ok StarkNet.Baz.Txm ok TelemetryManager +ok WorkflowDBStore -- out-unhealthy.txt -- ! EVM.1.HeadTracker.HeadListener @@ -396,6 +397,15 @@ ok TelemetryManager "status": "passing", "output": "" } + }, + { + "type": "checks", + "id": "WorkflowDBStore", + "attributes": { + "name": "WorkflowDBStore", + "status": "passing", + "output": "" + } } ] } From 93ee17772daeab9d03c356b4b3df1019f1d35c5c Mon Sep 17 00:00:00 2001 From: Cedric Date: Fri, 15 Nov 2024 11:28:12 +0000 Subject: [PATCH 09/13] [chore] Small fixes (#15230) * [chore] Small fixes - Ensure we start the handler - Ensure fetch message ID is < 128 * Fix tests --- core/capabilities/compute/compute.go | 1 - core/capabilities/compute/compute_test.go | 1 - core/services/standardcapabilities/delegate.go | 2 +- 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/core/capabilities/compute/compute.go b/core/capabilities/compute/compute.go index 78a4cc1e033..cdb8ee19a6a 100644 --- a/core/capabilities/compute/compute.go +++ b/core/capabilities/compute/compute.go @@ -294,7 +294,6 @@ func (c *Compute) createFetcher() func(ctx context.Context, req *wasmpb.FetchReq ) messageID := strings.Join([]string{ - req.Metadata.WorkflowId, req.Metadata.WorkflowExecutionId, ghcapabilities.MethodComputeAction, c.idGenerator(), diff --git a/core/capabilities/compute/compute_test.go b/core/capabilities/compute/compute_test.go index 719bff82edf..e0f5a2f9750 100644 --- a/core/capabilities/compute/compute_test.go +++ b/core/capabilities/compute/compute_test.go @@ -189,7 +189,6 @@ func TestComputeFetch(t *testing.T) { th.connector.EXPECT().GatewayIDs().Return([]string{"gateway1", "gateway2"}) msgID := strings.Join([]string{ - workflowID, workflowExecutionID, ghcapabilities.MethodComputeAction, validRequestUUID, diff --git a/core/services/standardcapabilities/delegate.go b/core/services/standardcapabilities/delegate.go index a92e082dead..ceb69883e90 100644 --- a/core/services/standardcapabilities/delegate.go +++ b/core/services/standardcapabilities/delegate.go @@ -254,7 +254,7 @@ func (d *Delegate) ServicesForSpec(ctx context.Context, spec job.Job) ([]job.Ser } computeSrvc := compute.NewAction(cfg, log, d.registry, handler, idGeneratorFn) - return []job.ServiceCtx{computeSrvc}, nil + return []job.ServiceCtx{handler, computeSrvc}, nil } standardCapability := newStandardCapabilities(log, spec.StandardCapabilitiesSpec, d.cfg, telemetryService, kvStore, d.registry, errorLog, From 827e003a0c8c0212c6de0cb330e1f27a9c583dab Mon Sep 17 00:00:00 2001 From: chudilka1 Date: Fri, 15 Nov 2024 14:00:09 +0200 Subject: [PATCH 10/13] Update Log.Level config docs (#15244) --- .changeset/rude-geckos-switch.md | 5 +++++ core/config/docs/core.toml | 4 ++-- docs/CONFIG.md | 4 ++-- 3 files changed, 9 insertions(+), 4 deletions(-) create mode 100644 .changeset/rude-geckos-switch.md diff --git a/.changeset/rude-geckos-switch.md b/.changeset/rude-geckos-switch.md new file mode 100644 index 00000000000..866b1c40c63 --- /dev/null +++ b/.changeset/rude-geckos-switch.md @@ -0,0 +1,5 @@ +--- +"chainlink": minor +--- + +#bugfix Update Log.Level and MaxSize configs description in the docs diff --git a/core/config/docs/core.toml b/core/config/docs/core.toml index 083633ab57f..e0fc76f449c 100644 --- a/core/config/docs/core.toml +++ b/core/config/docs/core.toml @@ -124,7 +124,7 @@ JsonWrapperKey = 'event' # Example Headers = ['Authorization: token', 'X-SomeOther-Header: value with spaces | and a bar+*'] # Example [Log] -# Level determines both what is printed on the screen and what is written to the log file. +# Level determines only what is printed on the screen/console. This configuration does not apply to the logs that are recorded in a file (see [`Log.File`](#logfile) for more details). # # The available levels are: # - "debug": Useful for forensic debugging of issues. @@ -145,7 +145,7 @@ UnixTS = false # Default [Log.File] # Dir sets the log directory. By default, Chainlink nodes write log data to `$ROOT/log.jsonl`. Dir = '/my/log/directory' # Example -# MaxSize determines the log file's max size in megabytes before file rotation. Having this not set will disable logging to disk. If your disk doesn't have enough disk space, the logging will pause and the application will log errors until space is available again. +# MaxSize determines the log file's max size before file rotation. Having this not set or set to a value smaller than 1Mb will disable logging to disk. If your disk doesn't have enough disk space, the logging will pause and the application will log errors until space is available again. # # Values must have suffixes with a unit like: `5120mb` (5,120 megabytes). If no unit suffix is provided, the value defaults to `b` (bytes). The list of valid unit suffixes are: # diff --git a/docs/CONFIG.md b/docs/CONFIG.md index 47ba5b574cd..ff918468c07 100644 --- a/docs/CONFIG.md +++ b/docs/CONFIG.md @@ -389,7 +389,7 @@ UnixTS = false # Default ```toml Level = 'info' # Default ``` -Level determines both what is printed on the screen and what is written to the log file. +Level determines only what is printed on the screen/console. This configuration does not apply to the logs that are recorded in a file (see [`Log.File`](#logfile) for more details). The available levels are: - "debug": Useful for forensic debugging of issues. @@ -434,7 +434,7 @@ Dir sets the log directory. By default, Chainlink nodes write log data to `$ROOT ```toml MaxSize = '5120mb' # Default ``` -MaxSize determines the log file's max size in megabytes before file rotation. Having this not set will disable logging to disk. If your disk doesn't have enough disk space, the logging will pause and the application will log errors until space is available again. +MaxSize determines the log file's max size before file rotation. Having this not set or set to a value smaller than 1Mb will disable logging to disk. If your disk doesn't have enough disk space, the logging will pause and the application will log errors until space is available again. Values must have suffixes with a unit like: `5120mb` (5,120 megabytes). If no unit suffix is provided, the value defaults to `b` (bytes). The list of valid unit suffixes are: From 5f6d46d194c83610f075d12099f53073a2606051 Mon Sep 17 00:00:00 2001 From: Lukasz <120112546+lukaszcl@users.noreply.github.com> Date: Fri, 15 Nov 2024 13:43:30 +0100 Subject: [PATCH 11/13] Fix workflow for nightly flaky test detector (#15260) --- .github/workflows/run-nightly-flaky-test-detector.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-nightly-flaky-test-detector.yml b/.github/workflows/run-nightly-flaky-test-detector.yml index 615233a6106..1c5dc72d4a3 100644 --- a/.github/workflows/run-nightly-flaky-test-detector.yml +++ b/.github/workflows/run-nightly-flaky-test-detector.yml @@ -4,6 +4,7 @@ on: schedule: # Run every night at 3:00 AM UTC - cron: '0 3 * * *' + workflow_dispatch: # Allows manual trigger for debugging jobs: trigger-flaky-test-detection: @@ -14,8 +15,8 @@ jobs: baseRef: 'origin/develop' projectPath: '.' runThreshold: '1' - runAllTests: 'true' - extraArgs: '{ "skipped_tests": "TestChainComponents", "test_repeat_count": "5", "all_tests_runner": "ubuntu22.04-32cores-128GB", "all_tests_runner_count": "3", "min_pass_ratio": "0" }' + runAllTests: true + extraArgs: '{ "skipped_tests": "TestChainComponents", "test_repeat_count": "5", "all_tests_runner": "ubuntu22.04-32cores-128GB", "all_tests_runner_count": "3", "min_pass_ratio": "0", "run_with_race": "false" }' secrets: SLACK_BOT_TOKEN: ${{ secrets.QA_SLACK_API_KEY }} \ No newline at end of file From a4d3c22e4495b0d6731a398e46d4844b4e65b226 Mon Sep 17 00:00:00 2001 From: george-dorin <120329946+george-dorin@users.noreply.github.com> Date: Fri, 15 Nov 2024 15:18:24 +0200 Subject: [PATCH 12/13] Add dual contract transmitter (#15202) * -Remove adaptive send -Add dual transmitter relay config -Add dual transmitter as contract transmitter * -Fix lint * Add tx.Meta * Revert ocr2 helpers * Fix lint * Fix lint * Change tx.meta to pointers Send secondary tx even if primary fails * Add meta fields to test * Remove unused code Change DualTransmissionConfig to pointer * Implement feedback * Fix failing test --- common/txmgr/types/tx.go | 4 ++ core/services/job/job_orm_test.go | 63 +++++++++++++---- core/services/job/models.go | 24 ------- core/services/job/models_test.go | 61 ----------------- core/services/job/orm.go | 28 ++++++-- core/services/ocr2/validate/validate.go | 10 --- core/services/ocrcommon/transmitter.go | 73 +++++++++++++++++++- core/services/ocrcommon/transmitter_test.go | 76 +++++++++++++++++++++ core/services/relay/evm/evm.go | 1 + core/services/relay/evm/types/types.go | 10 +++ core/testdata/testspecs/v2_specs.go | 30 +++----- 11 files changed, 244 insertions(+), 136 deletions(-) diff --git a/common/txmgr/types/tx.go b/common/txmgr/types/tx.go index b65f7edf6e5..f04047a36c1 100644 --- a/common/txmgr/types/tx.go +++ b/common/txmgr/types/tx.go @@ -159,6 +159,10 @@ type TxMeta[ADDR types.Hashable, TX_HASH types.Hashable] struct { MessageIDs []string `json:"MessageIDs,omitempty"` // SeqNumbers is used by CCIP for tx to committed sequence numbers correlation in logs SeqNumbers []uint64 `json:"SeqNumbers,omitempty"` + + // Dual Broadcast + DualBroadcast *bool `json:"DualBroadcast,omitempty"` + DualBroadcastParams *string `json:"DualBroadcastParams,omitempty"` } type TxAttempt[ diff --git a/core/services/job/job_orm_test.go b/core/services/job/job_orm_test.go index 0aa79f2f0b2..9db99fcd48d 100644 --- a/core/services/job/job_orm_test.go +++ b/core/services/job/job_orm_test.go @@ -2014,7 +2014,7 @@ func mustInsertPipelineRun(t *testing.T, orm pipeline.ORM, j job.Job) pipeline.R return run } -func TestORM_CreateJob_OCR2_With_AdaptiveSend(t *testing.T) { +func TestORM_CreateJob_OCR2_With_DualTransmission(t *testing.T) { ctx := testutils.Context(t) customChainID := big.New(testutils.NewRandomEVMChainID()) @@ -2030,27 +2030,66 @@ func TestORM_CreateJob_OCR2_With_AdaptiveSend(t *testing.T) { db := pgtest.NewSqlxDB(t) keyStore := cltest.NewKeyStore(t, db) require.NoError(t, keyStore.OCR2().Add(ctx, cltest.DefaultOCR2Key)) - _, transmitterID := cltest.MustInsertRandomKey(t, keyStore.Eth()) + baseJobSpec := fmt.Sprintf(testspecs.OCR2EVMDualTransmissionSpecMinimalTemplate, transmitterID.String()) + lggr := logger.TestLogger(t) pipelineORM := pipeline.NewORM(db, lggr, config.JobPipeline().MaxSuccessfulRuns()) bridgesORM := bridges.NewORM(db) jobORM := NewTestORM(t, db, pipelineORM, bridgesORM, keyStore) - adaptiveSendKey := cltest.MustGenerateRandomKey(t) + // Enabled but no config set + enabledDualTransmissionSpec := ` + enableDualTransmission=true` - jb, err := ocr2validate.ValidatedOracleSpecToml(testutils.Context(t), config.OCR2(), config.Insecure(), testspecs.GetOCR2EVMWithAdaptiveSendSpecMinimal(cltest.DefaultOCR2Key.ID(), transmitterID.String(), adaptiveSendKey.EIP55Address.String()), nil) + jb, err := ocr2validate.ValidatedOracleSpecToml(testutils.Context(t), config.OCR2(), config.Insecure(), baseJobSpec+enabledDualTransmissionSpec, nil) + require.NoError(t, err) + require.ErrorContains(t, jobORM.CreateJob(ctx, &jb), "dual transmission is enabled but no dual transmission config present") + + // ContractAddress not set + emptyContractAddress := ` + enableDualTransmission=true + [relayConfig.dualTransmission] + contractAddress="" + ` + jb, err = ocr2validate.ValidatedOracleSpecToml(testutils.Context(t), config.OCR2(), config.Insecure(), baseJobSpec+emptyContractAddress, nil) + require.NoError(t, err) + require.ErrorContains(t, jobORM.CreateJob(ctx, &jb), "invalid contract address in dual transmission config") + + // Transmitter address not set + emptyTransmitterAddress := ` + enableDualTransmission=true + [relayConfig.dualTransmission] + contractAddress = '0x613a38AC1659769640aaE063C651F48E0250454C' + transmitterAddress = '' + ` + jb, err = ocr2validate.ValidatedOracleSpecToml(testutils.Context(t), config.OCR2(), config.Insecure(), baseJobSpec+emptyTransmitterAddress, nil) + require.NoError(t, err) + require.ErrorContains(t, jobORM.CreateJob(ctx, &jb), "invalid transmitter address in dual transmission config") + + dtTransmitterAddress := cltest.MustGenerateRandomKey(t) + completeDualTransmissionSpec := fmt.Sprintf(` + enableDualTransmission=true + [relayConfig.dualTransmission] + contractAddress = '0x613a38AC1659769640aaE063C651F48E0250454C' + transmitterAddress = '%s' + [relayConfig.dualTransmission.meta] + key1 = 'val1' + key2 = ['val2','val3'] + `, + dtTransmitterAddress.Address.String()) + + jb, err = ocr2validate.ValidatedOracleSpecToml(testutils.Context(t), config.OCR2(), config.Insecure(), baseJobSpec+completeDualTransmissionSpec, nil) require.NoError(t, err) - require.Equal(t, "arbitrary-value", jb.AdaptiveSendSpec.Metadata["arbitraryParam"]) - t.Run("unknown transmitter address", func(t *testing.T) { - require.ErrorContains(t, jobORM.CreateJob(ctx, &jb), "failed to validate AdaptiveSendSpec.TransmitterAddress: no EVM key matching") - }) + jb.OCR2OracleSpec.TransmitterID = null.StringFrom(transmitterID.String()) - t.Run("multiple jobs", func(t *testing.T) { - keyStore.Eth().XXXTestingOnlyAdd(ctx, adaptiveSendKey) - require.NoError(t, jobORM.CreateJob(ctx, &jb), "failed to validate AdaptiveSendSpec.TransmitterAddress: no EVM key matching") - }) + // Unknown transmitter address + require.ErrorContains(t, jobORM.CreateJob(ctx, &jb), "unknown dual transmission transmitterAddress: no EVM key matching:") + + // Should not error + keyStore.Eth().XXXTestingOnlyAdd(ctx, dtTransmitterAddress) + require.NoError(t, jobORM.CreateJob(ctx, &jb)) } diff --git a/core/services/job/models.go b/core/services/job/models.go index 5054abd08b5..231bf10fda0 100644 --- a/core/services/job/models.go +++ b/core/services/job/models.go @@ -185,7 +185,6 @@ type Job struct { CCIPSpecID *int32 CCIPSpec *CCIPSpec CCIPBootstrapSpecID *int32 - AdaptiveSendSpec *AdaptiveSendSpec `toml:"adaptiveSend"` JobSpecErrors []SpecError Type Type `toml:"type"` SchemaVersion uint32 `toml:"schemaVersion"` @@ -1061,26 +1060,3 @@ type CCIPSpec struct { // and RMN network info for offchain blessing. PluginConfig JSONConfig `toml:"pluginConfig"` } - -type AdaptiveSendSpec struct { - TransmitterAddress *evmtypes.EIP55Address `toml:"transmitterAddress"` - ContractAddress *evmtypes.EIP55Address `toml:"contractAddress"` - Delay time.Duration `toml:"delay"` - Metadata JSONConfig `toml:"metadata"` -} - -func (o *AdaptiveSendSpec) Validate() error { - if o.TransmitterAddress == nil { - return errors.New("no AdaptiveSendSpec.TransmitterAddress found") - } - - if o.ContractAddress == nil { - return errors.New("no AdaptiveSendSpec.ContractAddress found") - } - - if o.Delay.Seconds() <= 1 { - return errors.New("AdaptiveSendSpec.Delay not set or smaller than 1s") - } - - return nil -} diff --git a/core/services/job/models_test.go b/core/services/job/models_test.go index a21a4553219..5ef36ea9f48 100644 --- a/core/services/job/models_test.go +++ b/core/services/job/models_test.go @@ -11,8 +11,6 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/codec" "github.com/smartcontractkit/chainlink-common/pkg/types" pkgworkflows "github.com/smartcontractkit/chainlink-common/pkg/workflows" - "github.com/smartcontractkit/chainlink/v2/core/internal/cltest" - "github.com/smartcontractkit/chainlink/v2/core/internal/testutils" "github.com/smartcontractkit/chainlink/v2/core/services/job" "github.com/smartcontractkit/chainlink/v2/core/services/relay" @@ -352,62 +350,3 @@ func TestWorkflowSpec_Validate(t *testing.T) { require.NotEmpty(t, w.WorkflowID) }) } - -func TestAdaptiveSendConfig(t *testing.T) { - tests := []struct { - name string - shouldError bool - expectedErrorMessage string - config job.AdaptiveSendSpec - }{ - { - name: "AdaptiveSendSpec.TransmitterAddress not set", - shouldError: true, - expectedErrorMessage: "no AdaptiveSendSpec.TransmitterAddress found", - config: job.AdaptiveSendSpec{ - TransmitterAddress: nil, - ContractAddress: ptr(cltest.NewEIP55Address()), - Delay: time.Second * 30, - }, - }, - { - name: "AdaptiveSendSpec.ContractAddress not set", - shouldError: true, - expectedErrorMessage: "no AdaptiveSendSpec.ContractAddress found", - config: job.AdaptiveSendSpec{ - TransmitterAddress: ptr(cltest.NewEIP55Address()), - ContractAddress: nil, - Delay: time.Second * 30, - }, - }, - { - name: "AdaptiveSendSpec.Delay not set", - shouldError: true, - expectedErrorMessage: "AdaptiveSendSpec.Delay not set or smaller than 1s", - config: job.AdaptiveSendSpec{ - TransmitterAddress: ptr(cltest.NewEIP55Address()), - ContractAddress: ptr(cltest.NewEIP55Address()), - }, - }, - { - name: "AdaptiveSendSpec.Delay set to 50ms", - shouldError: true, - expectedErrorMessage: "AdaptiveSendSpec.Delay not set or smaller than 1s", - config: job.AdaptiveSendSpec{ - TransmitterAddress: ptr(cltest.NewEIP55Address()), - ContractAddress: ptr(cltest.NewEIP55Address()), - Delay: time.Millisecond * 50, - }, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - if test.shouldError { - require.ErrorContains(t, test.config.Validate(), test.expectedErrorMessage) - } else { - require.NoError(t, test.config.Validate()) - } - }) - } -} diff --git a/core/services/job/orm.go b/core/services/job/orm.go index 1e1d8a98700..5e8b5ce127f 100644 --- a/core/services/job/orm.go +++ b/core/services/job/orm.go @@ -20,7 +20,6 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/sqlutil" "github.com/smartcontractkit/chainlink-common/pkg/types" - "github.com/smartcontractkit/chainlink/v2/core/bridges" evmconfig "github.com/smartcontractkit/chainlink/v2/core/chains/evm/config" evmtypes "github.com/smartcontractkit/chainlink/v2/core/chains/evm/types" @@ -304,10 +303,29 @@ func (o *orm) CreateJob(ctx context.Context, jb *Job) error { } } - if jb.AdaptiveSendSpec != nil { - err = validateKeyStoreMatchForRelay(ctx, jb.OCR2OracleSpec.Relay, tx.keyStore, jb.AdaptiveSendSpec.TransmitterAddress.String()) - if err != nil { - return fmt.Errorf("failed to validate AdaptiveSendSpec.TransmitterAddress: %w", err) + if enableDualTransmission, ok := jb.OCR2OracleSpec.RelayConfig["enableDualTransmission"]; ok && enableDualTransmission != nil { + rawDualTransmissionConfig, ok := jb.OCR2OracleSpec.RelayConfig["dualTransmission"] + if !ok { + return errors.New("dual transmission is enabled but no dual transmission config present") + } + + dualTransmissionConfig, ok := rawDualTransmissionConfig.(map[string]interface{}) + if !ok { + return errors.New("invalid dual transmission config") + } + + dtContractAddress, ok := dualTransmissionConfig["contractAddress"].(string) + if !ok || !common.IsHexAddress(dtContractAddress) { + return errors.New("invalid contract address in dual transmission config") + } + + dtTransmitterAddress, ok := dualTransmissionConfig["transmitterAddress"].(string) + if !ok || !common.IsHexAddress(dtTransmitterAddress) { + return errors.New("invalid transmitter address in dual transmission config") + } + + if err = validateKeyStoreMatchForRelay(ctx, jb.OCR2OracleSpec.Relay, tx.keyStore, dtTransmitterAddress); err != nil { + return errors.Wrap(err, "unknown dual transmission transmitterAddress") } } diff --git a/core/services/ocr2/validate/validate.go b/core/services/ocr2/validate/validate.go index 40e2c11c3e7..c2f3e455232 100644 --- a/core/services/ocr2/validate/validate.go +++ b/core/services/ocr2/validate/validate.go @@ -73,9 +73,6 @@ func ValidatedOracleSpecToml(ctx context.Context, config OCR2Config, insConf Ins if err = validateTimingParameters(config, insConf, spec); err != nil { return jb, err } - if err = validateAdaptiveSendSpec(ctx, jb); err != nil { - return jb, err - } return jb, nil } @@ -380,10 +377,3 @@ func validateOCR2LLOSpec(jsonConfig job.JSONConfig) error { } return pkgerrors.Wrap(pluginConfig.Validate(), "LLO PluginConfig is invalid") } - -func validateAdaptiveSendSpec(ctx context.Context, spec job.Job) error { - if spec.AdaptiveSendSpec != nil { - return spec.AdaptiveSendSpec.Validate() - } - return nil -} diff --git a/core/services/ocrcommon/transmitter.go b/core/services/ocrcommon/transmitter.go index 5d2de45295f..8121f3778d2 100644 --- a/core/services/ocrcommon/transmitter.go +++ b/core/services/ocrcommon/transmitter.go @@ -2,7 +2,10 @@ package ocrcommon import ( "context" + errors2 "errors" + "fmt" "math/big" + "net/url" "slices" "github.com/ethereum/go-ethereum/common" @@ -11,6 +14,7 @@ import ( "github.com/smartcontractkit/chainlink/v2/common/txmgr/types" "github.com/smartcontractkit/chainlink/v2/core/chains/evm/forwarders" "github.com/smartcontractkit/chainlink/v2/core/chains/evm/txmgr" + types2 "github.com/smartcontractkit/chainlink/v2/core/services/relay/evm/types" ) type roundRobinKeystore interface { @@ -88,13 +92,14 @@ func NewOCR2FeedsTransmitter( checker txmgr.TransmitCheckerSpec, chainID *big.Int, keystore roundRobinKeystore, + dualTransmissionConfig *types2.DualTransmissionConfig, ) (Transmitter, error) { // Ensure that a keystore is provided. if keystore == nil { return nil, errors.New("nil keystore provided to transmitter") } - return &ocr2FeedsTransmitter{ + baseTransmitter := &ocr2FeedsTransmitter{ ocr2Aggregator: ocr2Aggregator, txManagerOCR2: txm, transmitter: transmitter{ @@ -107,7 +112,17 @@ func NewOCR2FeedsTransmitter( chainID: chainID, keystore: keystore, }, - }, nil + } + + if dualTransmissionConfig != nil { + return &ocr2FeedsDualTransmission{ + transmitter: *baseTransmitter, + secondaryContractAddress: dualTransmissionConfig.ContractAddress, + secondaryFromAddress: dualTransmissionConfig.TransmitterAddress, + secondaryMeta: dualTransmissionConfig.Meta, + }, nil + } + return baseTransmitter, nil } func (t *transmitter) CreateEthTransaction(ctx context.Context, toAddress common.Address, payload []byte, txMeta *txmgr.TxMeta) error { @@ -203,3 +218,57 @@ func (t *ocr2FeedsTransmitter) forwarderAddress(ctx context.Context, eoa, ocr2Ag return forwarderAddress, nil } + +type ocr2FeedsDualTransmission struct { + transmitter ocr2FeedsTransmitter + + secondaryContractAddress common.Address + secondaryFromAddress common.Address + secondaryMeta map[string][]string +} + +func (t *ocr2FeedsDualTransmission) CreateEthTransaction(ctx context.Context, toAddress common.Address, payload []byte, txMeta *txmgr.TxMeta) error { + // Primary transmission + errPrimary := t.transmitter.CreateEthTransaction(ctx, toAddress, payload, txMeta) + if errPrimary != nil { + errPrimary = fmt.Errorf("skipped primary transmission: %w", errPrimary) + } + + if txMeta == nil { + txMeta = &txmgr.TxMeta{} + } + + dualBroadcast := true + dualBroadcastParams := t.urlParams() + + txMeta.DualBroadcast = &dualBroadcast + txMeta.DualBroadcastParams = &dualBroadcastParams + + // Secondary transmission + _, errSecondary := t.transmitter.txm.CreateTransaction(ctx, txmgr.TxRequest{ + FromAddress: t.secondaryFromAddress, + ToAddress: t.secondaryContractAddress, + EncodedPayload: payload, + FeeLimit: t.transmitter.gasLimit, + Strategy: t.transmitter.strategy, + Checker: t.transmitter.checker, + Meta: txMeta, + }) + + errSecondary = errors.Wrap(errSecondary, "skipped secondary transmission") + return errors2.Join(errPrimary, errSecondary) +} + +func (t *ocr2FeedsDualTransmission) FromAddress(ctx context.Context) common.Address { + return t.transmitter.FromAddress(ctx) +} + +func (t *ocr2FeedsDualTransmission) urlParams() string { + values := url.Values{} + for k, v := range t.secondaryMeta { + for _, p := range v { + values.Add(k, p) + } + } + return values.Encode() +} diff --git a/core/services/ocrcommon/transmitter_test.go b/core/services/ocrcommon/transmitter_test.go index d6a07190800..5f434e59c62 100644 --- a/core/services/ocrcommon/transmitter_test.go +++ b/core/services/ocrcommon/transmitter_test.go @@ -5,16 +5,19 @@ import ( "testing" "github.com/ethereum/go-ethereum/common" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" commontxmmocks "github.com/smartcontractkit/chainlink/v2/common/txmgr/types/mocks" "github.com/smartcontractkit/chainlink/v2/core/chains/evm/txmgr" txmmocks "github.com/smartcontractkit/chainlink/v2/core/chains/evm/txmgr/mocks" + "github.com/smartcontractkit/chainlink/v2/core/chains/evm/utils" "github.com/smartcontractkit/chainlink/v2/core/internal/cltest" "github.com/smartcontractkit/chainlink/v2/core/internal/testutils" "github.com/smartcontractkit/chainlink/v2/core/internal/testutils/pgtest" "github.com/smartcontractkit/chainlink/v2/core/services/ocrcommon" + "github.com/smartcontractkit/chainlink/v2/core/services/relay/evm/types" ) func newMockTxStrategy(t *testing.T) *commontxmmocks.TxStrategy { @@ -169,3 +172,76 @@ func Test_DefaultTransmitter_Forwarding_Enabled_CreateEthTransaction_No_Keystore ) require.Error(t, err) } + +func Test_DualTransmitter(t *testing.T) { + t.Parallel() + + db := pgtest.NewSqlxDB(t) + ethKeyStore := cltest.NewKeyStore(t, db).Eth() + + _, fromAddress := cltest.MustInsertRandomKey(t, ethKeyStore) + _, secondaryFromAddress := cltest.MustInsertRandomKey(t, ethKeyStore) + + contractAddress := utils.RandomAddress() + secondaryContractAddress := utils.RandomAddress() + + gasLimit := uint64(1000) + chainID := big.NewInt(0) + effectiveTransmitterAddress := fromAddress + toAddress := testutils.NewAddress() + payload := []byte{1, 2, 3} + txm := txmmocks.NewMockEvmTxManager(t) + strategy := newMockTxStrategy(t) + dualTransmissionConfig := &types.DualTransmissionConfig{ + ContractAddress: secondaryContractAddress, + TransmitterAddress: secondaryFromAddress, + Meta: map[string][]string{ + "key1": {"value1"}, + "key2": {"value2", "value3"}, + "key3": {"value4", "value5", "value6"}, + }, + } + + transmitter, err := ocrcommon.NewOCR2FeedsTransmitter( + txm, + []common.Address{fromAddress}, + contractAddress, + gasLimit, + effectiveTransmitterAddress, + strategy, + txmgr.TransmitCheckerSpec{}, + chainID, + ethKeyStore, + dualTransmissionConfig, + ) + require.NoError(t, err) + + primaryTxConfirmed := false + secondaryTxConfirmed := false + + txm.On("CreateTransaction", mock.Anything, mock.MatchedBy(func(tx txmgr.TxRequest) bool { + switch tx.FromAddress { + case fromAddress: + // Primary transmission + assert.Equal(t, tx.ToAddress, toAddress, "unexpected primary toAddress") + assert.Nil(t, tx.Meta, "Meta should be empty") + primaryTxConfirmed = true + case secondaryFromAddress: + // Secondary transmission + assert.Equal(t, tx.ToAddress, secondaryContractAddress, "unexpected secondary toAddress") + assert.True(t, *tx.Meta.DualBroadcast, "DualBroadcast should be true") + assert.Equal(t, "key1=value1&key2=value2&key2=value3&key3=value4&key3=value5&key3=value6", *tx.Meta.DualBroadcastParams, "DualBroadcastParams not equal") + secondaryTxConfirmed = true + default: + // Should never be reached + return false + } + + return true + })).Twice().Return(txmgr.Tx{}, nil) + + require.NoError(t, transmitter.CreateEthTransaction(testutils.Context(t), toAddress, payload, nil)) + + require.True(t, primaryTxConfirmed) + require.True(t, secondaryTxConfirmed) +} diff --git a/core/services/relay/evm/evm.go b/core/services/relay/evm/evm.go index 7c070cc282d..db0fe90796b 100644 --- a/core/services/relay/evm/evm.go +++ b/core/services/relay/evm/evm.go @@ -806,6 +806,7 @@ func generateTransmitterFrom(ctx context.Context, rargs commontypes.RelayArgs, e checker, configWatcher.chain.ID(), ethKeystore, + relayConfig.DualTransmissionConfig, ) case commontypes.CCIPExecution: transmitter, err = cciptransmitter.NewTransmitterWithStatusChecker( diff --git a/core/services/relay/evm/types/types.go b/core/services/relay/evm/types/types.go index e1a61098be5..2b56aee6379 100644 --- a/core/services/relay/evm/types/types.go +++ b/core/services/relay/evm/types/types.go @@ -192,6 +192,12 @@ func (c LLOConfigMode) String() string { return string(c) } +type DualTransmissionConfig struct { + ContractAddress common.Address `json:"contractAddress" toml:"contractAddress"` + TransmitterAddress common.Address `json:"transmitterAddress" toml:"transmitterAddress"` + Meta map[string][]string `json:"meta" toml:"meta"` +} + type RelayConfig struct { ChainID *big.Big `json:"chainID"` FromBlock uint64 `json:"fromBlock"` @@ -215,6 +221,10 @@ type RelayConfig struct { // LLO-specific LLODONID uint32 `json:"lloDonID" toml:"lloDonID"` LLOConfigMode LLOConfigMode `json:"lloConfigMode" toml:"lloConfigMode"` + + // DualTransmission specific + EnableDualTransmission bool `json:"enableDualTransmission" toml:"enableDualTransmission"` + DualTransmissionConfig *DualTransmissionConfig `json:"dualTransmission" toml:"dualTransmission"` } var ErrBadRelayConfig = errors.New("bad relay config") diff --git a/core/testdata/testspecs/v2_specs.go b/core/testdata/testspecs/v2_specs.go index d3d72467a83..d519ace6479 100644 --- a/core/testdata/testspecs/v2_specs.go +++ b/core/testdata/testspecs/v2_specs.go @@ -951,42 +951,28 @@ targets: inputs: consensus_output: $(a-consensus.outputs) ` -var OCR2EVMSpecMinimalWithAdaptiveSendTemplate = ` +var OCR2EVMDualTransmissionSpecMinimalTemplate = ` type = "offchainreporting2" schemaVersion = 1 -name = "%s" +name = "test-job" +relay = "evm" contractID = "0x613a38AC1659769640aaE063C651F48E0250454C" p2pv2Bootstrappers = [] -ocrKeyBundleID = "%s" -relay = "evm" -pluginType = "median" transmitterID = "%s" - +pluginType = "median" observationSource = """ ds [type=http method=GET url="https://chain.link/ETH-USD"]; ds_parse [type=jsonparse path="data.price" separator="."]; ds_multiply [type=multiply times=100]; ds -> ds_parse -> ds_multiply; """ - [pluginConfig] juelsPerFeeCoinSource = """ - ds1 [type=http method=GET url="https://chain.link/jules" allowunrestrictednetworkaccess="true"]; - ds1_parse [type=jsonparse path="answer"]; - ds1_multiply [type=multiply times=1]; - ds1 -> ds1_parse -> ds1_multiply; + ds [type=http method=GET url="https://chain.link/ETH-USD"]; + ds_parse [type=jsonparse path="data.price" separator="."]; + ds_multiply [type=multiply times=100]; + ds -> ds_parse -> ds_multiply; """ [relayConfig] chainID = 0 - -[adaptiveSend] -transmitterAddress = '%s' -contractAddress = '0xF67D0290337bca0847005C7ffD1BC75BA9AAE6e4' -delay = '30s' -[adaptiveSend.metadata] -arbitraryParam = 'arbitrary-value' ` - -func GetOCR2EVMWithAdaptiveSendSpecMinimal(keyBundle, transmitterID, secondaryTransmitterAddress string) string { - return fmt.Sprintf(OCR2EVMSpecMinimalWithAdaptiveSendTemplate, uuid.New(), keyBundle, transmitterID, secondaryTransmitterAddress) -} From cb9c185910f054c9e162f8181cb2bebaa81988b9 Mon Sep 17 00:00:00 2001 From: Vyzaldy Sanchez Date: Fri, 15 Nov 2024 11:14:16 -0400 Subject: [PATCH 13/13] Add compute metrics (#15246) * Adds compute metrics * Fixes lint * Removes execution ID from metrics labels * Fixes CI * Fixes CI * Fixes CI --- core/capabilities/compute/compute.go | 23 +++++++++--- core/capabilities/compute/compute_test.go | 4 ++- core/capabilities/compute/monitoring.go | 35 +++++++++++++++++++ .../services/standardcapabilities/delegate.go | 6 +++- core/services/workflows/engine_test.go | 6 ++-- 5 files changed, 66 insertions(+), 8 deletions(-) diff --git a/core/capabilities/compute/compute.go b/core/capabilities/compute/compute.go index cdb8ee19a6a..7b11072b161 100644 --- a/core/capabilities/compute/compute.go +++ b/core/capabilities/compute/compute.go @@ -7,6 +7,7 @@ import ( "errors" "fmt" "net/http" + "strconv" "strings" "sync" "time" @@ -20,6 +21,7 @@ import ( capabilitiespb "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb" "github.com/smartcontractkit/chainlink-common/pkg/custmsg" "github.com/smartcontractkit/chainlink-common/pkg/logger" + "github.com/smartcontractkit/chainlink-common/pkg/metrics" "github.com/smartcontractkit/chainlink-common/pkg/services" coretypes "github.com/smartcontractkit/chainlink-common/pkg/types/core" "github.com/smartcontractkit/chainlink-common/pkg/workflows/wasm/host" @@ -75,8 +77,9 @@ var ( var _ capabilities.ActionCapability = (*Compute)(nil) type Compute struct { - stopCh services.StopChan - log logger.Logger + stopCh services.StopChan + log logger.Logger + metrics *computeMetricsLabeler // emitter is used to emit messages from the WASM module to a configured collector. emitter custmsg.MessageEmitter @@ -328,6 +331,13 @@ func (c *Compute) createFetcher() func(ctx context.Context, req *wasmpb.FetchReq return nil, fmt.Errorf("failed to unmarshal fetch response: %w", err) } + c.metrics.with( + "status", strconv.FormatUint(uint64(response.StatusCode), 10), + platform.KeyWorkflowID, req.Metadata.WorkflowId, + platform.KeyWorkflowName, req.Metadata.WorkflowName, + platform.KeyWorkflowOwner, req.Metadata.WorkflowOwner, + ).incrementHTTPRequestCounter(ctx) + // Only log if the response is not in the 200 range if response.StatusCode < http.StatusOK || response.StatusCode >= http.StatusMultipleChoices { msg := fmt.Sprintf("compute fetch request failed with status code %d", response.StatusCode) @@ -357,10 +367,14 @@ func NewAction( handler *webapi.OutgoingConnectorHandler, idGenerator func() string, opts ...func(*Compute), -) *Compute { +) (*Compute, error) { if config.NumWorkers == 0 { config.NumWorkers = defaultNumWorkers } + metricsLabeler, err := newComputeMetricsLabeler(metrics.NewLabeler().With("capability", CapabilityIDCompute)) + if err != nil { + return nil, fmt.Errorf("failed to create compute metrics labeler: %w", err) + } var ( lggr = logger.Named(log, "CustomCompute") labeler = custmsg.NewLabeler() @@ -368,6 +382,7 @@ func NewAction( stopCh: make(services.StopChan), log: lggr, emitter: labeler, + metrics: metricsLabeler, registry: registry, modules: newModuleCache(clockwork.NewRealClock(), 1*time.Minute, 10*time.Minute, 3), transformer: NewTransformer(lggr, labeler), @@ -382,5 +397,5 @@ func NewAction( opt(compute) } - return compute + return compute, nil } diff --git a/core/capabilities/compute/compute_test.go b/core/capabilities/compute/compute_test.go index e0f5a2f9750..c4146b7408e 100644 --- a/core/capabilities/compute/compute_test.go +++ b/core/capabilities/compute/compute_test.go @@ -18,6 +18,7 @@ import ( cappkg "github.com/smartcontractkit/chainlink-common/pkg/capabilities" "github.com/smartcontractkit/chainlink-common/pkg/utils/tests" "github.com/smartcontractkit/chainlink-common/pkg/values" + corecapabilities "github.com/smartcontractkit/chainlink/v2/core/capabilities" "github.com/smartcontractkit/chainlink/v2/core/capabilities/webapi" "github.com/smartcontractkit/chainlink/v2/core/services/gateway/api" @@ -60,7 +61,8 @@ func setup(t *testing.T, config Config) testHarness { connectorHandler, err := webapi.NewOutgoingConnectorHandler(connector, config.ServiceConfig, ghcapabilities.MethodComputeAction, log) require.NoError(t, err) - compute := NewAction(config, log, registry, connectorHandler, idGeneratorFn) + compute, err := NewAction(config, log, registry, connectorHandler, idGeneratorFn) + require.NoError(t, err) compute.modules.clock = clockwork.NewFakeClock() return testHarness{ diff --git a/core/capabilities/compute/monitoring.go b/core/capabilities/compute/monitoring.go index 4b676c25f7d..71354c014a7 100644 --- a/core/capabilities/compute/monitoring.go +++ b/core/capabilities/compute/monitoring.go @@ -1,3 +1,38 @@ package compute +import ( + "context" + "fmt" + + "go.opentelemetry.io/otel/metric" + + "github.com/smartcontractkit/chainlink-common/pkg/beholder" + "github.com/smartcontractkit/chainlink-common/pkg/metrics" + + localMonitoring "github.com/smartcontractkit/chainlink/v2/core/monitoring" +) + const timestampKey = "computeTimestamp" + +type computeMetricsLabeler struct { + metrics.Labeler + computeHTTPRequestCounter metric.Int64Counter +} + +func newComputeMetricsLabeler(l metrics.Labeler) (*computeMetricsLabeler, error) { + computeHTTPRequestCounter, err := beholder.GetMeter().Int64Counter("capabilities_compute_http_request_count") + if err != nil { + return nil, fmt.Errorf("failed to register compute http request counter: %w", err) + } + + return &computeMetricsLabeler{Labeler: l, computeHTTPRequestCounter: computeHTTPRequestCounter}, nil +} + +func (c *computeMetricsLabeler) with(keyValues ...string) *computeMetricsLabeler { + return &computeMetricsLabeler{c.With(keyValues...), c.computeHTTPRequestCounter} +} + +func (c *computeMetricsLabeler) incrementHTTPRequestCounter(ctx context.Context) { + otelLabels := localMonitoring.KvMapToOtelAttributes(c.Labels) + c.computeHTTPRequestCounter.Add(ctx, 1, metric.WithAttributes(otelLabels...)) +} diff --git a/core/services/standardcapabilities/delegate.go b/core/services/standardcapabilities/delegate.go index ceb69883e90..f4c0f360bf3 100644 --- a/core/services/standardcapabilities/delegate.go +++ b/core/services/standardcapabilities/delegate.go @@ -12,6 +12,7 @@ import ( "github.com/smartcontractkit/chainlink-common/pkg/sqlutil" "github.com/smartcontractkit/chainlink-common/pkg/types" "github.com/smartcontractkit/chainlink-common/pkg/types/core" + "github.com/smartcontractkit/chainlink/v2/core/capabilities/compute" gatewayconnector "github.com/smartcontractkit/chainlink/v2/core/capabilities/gateway_connector" "github.com/smartcontractkit/chainlink/v2/core/capabilities/webapi" @@ -253,7 +254,10 @@ func (d *Delegate) ServicesForSpec(ctx context.Context, spec job.Job) ([]job.Ser return uuid.New().String() } - computeSrvc := compute.NewAction(cfg, log, d.registry, handler, idGeneratorFn) + computeSrvc, err := compute.NewAction(cfg, log, d.registry, handler, idGeneratorFn) + if err != nil { + return nil, err + } return []job.ServiceCtx{handler, computeSrvc}, nil } diff --git a/core/services/workflows/engine_test.go b/core/services/workflows/engine_test.go index e6667fe0bc6..3a2bc17bc36 100644 --- a/core/services/workflows/engine_test.go +++ b/core/services/workflows/engine_test.go @@ -1448,7 +1448,8 @@ func TestEngine_WithCustomComputeStep(t *testing.T) { require.NoError(t, err) idGeneratorFn := func() string { return "validRequestID" } - compute := compute.NewAction(cfg, log, reg, handler, idGeneratorFn) + compute, err := compute.NewAction(cfg, log, reg, handler, idGeneratorFn) + require.NoError(t, err) require.NoError(t, compute.Start(ctx)) defer compute.Close() @@ -1513,7 +1514,8 @@ func TestEngine_CustomComputePropagatesBreaks(t *testing.T) { require.NoError(t, err) idGeneratorFn := func() string { return "validRequestID" } - compute := compute.NewAction(cfg, log, reg, handler, idGeneratorFn) + compute, err := compute.NewAction(cfg, log, reg, handler, idGeneratorFn) + require.NoError(t, err) require.NoError(t, compute.Start(ctx)) defer compute.Close()