Skip to content

Commit

Permalink
[testing] Add a job to test state sync bootstrap of testnet
Browse files Browse the repository at this point in the history
  • Loading branch information
marun committed Jul 4, 2024
1 parent ea1b24c commit 592daa7
Show file tree
Hide file tree
Showing 7 changed files with 212 additions and 8 deletions.
48 changes: 48 additions & 0 deletions .github/actions/check-bootstrap/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
name: 'Check bootstrap for a network and state sync configuration'
description: 'Checks that bootstrap is possible for the given network and state sync configuration'

inputs:
network_id:
required: true
state_sync_enabled:
required: true
prometheus_id:
required: true
prometheus_password:
required: true
loki_id:
required: true
loki_password:
required: true

runs:
using: composite
steps:
- name: Setup Go
uses: ./.github/actions/setup-go-for-project

- name: Build AvalancheGo Binary
shell: bash
run: ./scripts/build.sh -r

- name: Check avalanchego version
shell: bash
run: ./build/avalanchego --version

- name: Run bootstrap for testnet with state-sync
uses: ./.github/actions/run-monitored-tmpnet-cmd
with:
run: go run ./tests/bootstrap --avalanchego-path=./build/avalanchego --network-id=${{ inputs.network_id }} --state-sync-enabled=${{ inputs.state_sync_enabled }}
prometheus_id: ${{ inputs.prometheus_id }}
prometheus_password: ${{ inputs.prometheus_password }}
loki_id: ${{ inputs.loki_id }}
loki_password: ${{ inputs.loki_password }}

# Skip creation of an artifact in favor of metric collection

- name: Check size of tmpnet path
if: always()
shell: bash
run: |
echo "Checking tmpnet disk usage:"
du -sh ~/.tmpnet
27 changes: 27 additions & 0 deletions .github/workflows/check-testnet-state-sync-bootstrap.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: 'Testnet Bootstrap w/ State Sync'

on:
# TODO(marun) Add a schedule
workflow_dispatch:

# TODO(marun) For testing only - remove before merge
pull_request:

jobs:
check_bootstrap_testnet_state_sync:
name: Check Bootstrap
# TODO(marun) Update this to a self-hosted runner
runs-on: ubuntu-latest
timeout-minutes: 4320 # 3 days
steps:
- name: Checkout Repository
uses: actions/checkout@v4
- name: Check state sync bootstrap for testnet
uses: ./.github/actions/check-bootstrap
with:
network_id: 5 # testnet
state_sync_enabled: true
prometheus_id: ${{ secrets.PROMETHEUS_ID || '' }}
prometheus_password: ${{ secrets.PROMETHEUS_PASSWORD || '' }}
loki_id: ${{ secrets.LOKI_ID || '' }}
loki_password: ${{ secrets.LOKI_PASSWORD || '' }}
107 changes: 107 additions & 0 deletions tests/bootstrap/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved.
// See the file LICENSE for licensing terms.

package main

import (
"context"
"flag"
"fmt"
"log"
"os"
"time"

"github.com/google/uuid"

"github.com/ava-labs/avalanchego/config"
"github.com/ava-labs/avalanchego/tests/fixture/tmpnet"
"github.com/ava-labs/avalanchego/utils/constants"
"github.com/ava-labs/avalanchego/utils/logging"
)

// Simple test that starts a single node and waits for it to finish bootstrapping.

func main() {
avalanchegoPath := flag.String("avalanchego-path", "", "The path to an avalanchego binary")
networkID := flag.Int64("network-id", 0, "The ID of the network to bootstrap from")
stateSyncEnabled := flag.Bool("state-sync-enabled", false, "Whether state syncing should be enabled")
maxDuration := flag.Duration("max-duration", time.Hour*72, "The maximum duration the network should run for")

flag.Parse()

if len(*avalanchegoPath) == 0 {
log.Fatal("avalanchego-path is required")
}
if *networkID == 0 {
log.Fatal("network-id is required")
}
if *maxDuration == 0 {
log.Fatal("max-duration is required")
}

if err := checkBootstrap(*avalanchegoPath, *networkID, *stateSyncEnabled, *maxDuration); err != nil {
log.Fatalf("Failed to check bootstrap: %v\n", err)
}
}

func checkBootstrap(avalanchegoPath string, networkID int64, stateSyncEnabled bool, maxDuration time.Duration) error {

Check failure on line 47 in tests/bootstrap/main.go

View workflow job for this annotation

GitHub Actions / Lint

unused-parameter: parameter 'networkID' seems to be unused, consider removing or renaming it as _ (revive)

Check failure on line 47 in tests/bootstrap/main.go

View workflow job for this annotation

GitHub Actions / Lint

`checkBootstrap` - `networkID` is unused (unparam)
flags := tmpnet.DefaultLocalhostFlags()
flags.SetDefaults(tmpnet.FlagsMap{
config.HealthCheckFreqKey: "30s",
// Minimize logging overhead
config.LogDisplayLevelKey: logging.Off.String(),
config.LogLevelKey: logging.Info.String(),
})

// Create a new single-node network that will bootstrap from the specified network
network := &tmpnet.Network{
UUID: uuid.NewString(),
NetworkID: constants.TestnetID,
Owner: "bootstrap-test",
Nodes: tmpnet.NewNodesOrPanic(1),
DefaultFlags: flags,
DefaultRuntimeConfig: tmpnet.NodeRuntimeConfig{
// TODO(marun) Rename AvalancheGoPath to AvalanchegoPath
AvalancheGoPath: avalanchegoPath,
},
ChainConfigs: map[string]tmpnet.FlagsMap{
"C": {
"state-sync-enabled": stateSyncEnabled,
},
},
}

if err := network.Create(""); err != nil {
return fmt.Errorf("Failed to create network: %v\n", err)

Check failure on line 75 in tests/bootstrap/main.go

View workflow job for this annotation

GitHub Actions / Lint

non-wrapping format verb for fmt.Errorf. Use `%w` to format errors (errorlint)

Check failure on line 75 in tests/bootstrap/main.go

View workflow job for this annotation

GitHub Actions / Lint

ST1005: error strings should not end with punctuation or newlines (stylecheck)

Check failure on line 75 in tests/bootstrap/main.go

View workflow job for this annotation

GitHub Actions / Lint

ST1005: error strings should not be capitalized (stylecheck)
}
node := network.Nodes[0]

log.Printf("Starting node in path %s (UUID: %s)\n", network.Dir, network.UUID)

ctx, cancel := context.WithTimeout(context.Background(), tmpnet.DefaultNetworkTimeout)
defer cancel()
if err := network.StartNode(ctx, os.Stdout, node); err != nil {
return fmt.Errorf("Failed to start node: %v\n", err)

Check failure on line 84 in tests/bootstrap/main.go

View workflow job for this annotation

GitHub Actions / Lint

non-wrapping format verb for fmt.Errorf. Use `%w` to format errors (errorlint)

Check failure on line 84 in tests/bootstrap/main.go

View workflow job for this annotation

GitHub Actions / Lint

ST1005: error strings should not end with punctuation or newlines (stylecheck)

Check failure on line 84 in tests/bootstrap/main.go

View workflow job for this annotation

GitHub Actions / Lint

ST1005: error strings should not be capitalized (stylecheck)
}
defer func() {
ctx, cancel := context.WithTimeout(context.Background(), tmpnet.DefaultNetworkTimeout)
defer cancel()
if err := node.Stop(ctx); err != nil {
log.Printf("Failed to stop node: %v\n", err)
}
}()

log.Printf("Metrics: %s\n", tmpnet.DefaultMetricsLink(network.UUID, time.Now()))

log.Print("Waiting for node to indicate bootstrap complete by reporting healthy\n")

// Avoid checking too often to avoid log spam
healthCheckInterval := 1 * time.Minute

ctx, cancel = context.WithTimeout(context.Background(), maxDuration)
defer cancel()
if err := tmpnet.WaitForHealthyWithInterval(ctx, node, healthCheckInterval); err != nil {
return fmt.Errorf("Node failed to become healthy before timeout: %v\n", err)

Check failure on line 104 in tests/bootstrap/main.go

View workflow job for this annotation

GitHub Actions / Lint

non-wrapping format verb for fmt.Errorf. Use `%w` to format errors (errorlint)

Check failure on line 104 in tests/bootstrap/main.go

View workflow job for this annotation

GitHub Actions / Lint

ST1005: error strings should not end with punctuation or newlines (stylecheck)
}
return nil
}
13 changes: 10 additions & 3 deletions tests/fixture/tmpnet/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,20 +46,27 @@ func DefaultTestFlags() FlagsMap {
}
}

// Flags appropriate for networks that aren't intended to be publicly accessible.
func DefaultLocalhostFlags() FlagsMap {
return FlagsMap{
config.PublicIPKey: "127.0.0.1",
config.HTTPHostKey: "127.0.0.1",
config.StakingHostKey: "127.0.0.1",
}
}

// Flags appropriate for tmpnet networks.
func DefaultTmpnetFlags() FlagsMap {
// Supply only non-default configuration to ensure that default values will be used.
flags := FlagsMap{
// Specific to tmpnet deployment
config.PublicIPKey: "127.0.0.1",
config.HTTPHostKey: "127.0.0.1",
config.StakingHostKey: "127.0.0.1",
config.LogDisplayLevelKey: logging.Off.String(), // Display logging not needed since nodes run headless
config.LogLevelKey: logging.Debug.String(),
// Specific to e2e testing
config.MinStakeDurationKey: DefaultMinStakeDuration.String(),
config.ProposerVMUseCurrentHeightKey: true,
}
flags.SetDefaults(DefaultLocalhostFlags())
flags.SetDefaults(DefaultTestFlags())
return flags
}
Expand Down
6 changes: 5 additions & 1 deletion tests/fixture/tmpnet/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ func (n *Network) StartNodes(ctx context.Context, w io.Writer, nodesToStart ...*
return err
}
// Provide a link to the main dashboard filtered by the uuid and showing results from now till whenever the link is viewed
if _, err := fmt.Fprintf(w, "\nMetrics: https://grafana-experimental.avax-dev.network/d/kBQpRdWnk/avalanche-main-dashboard?&var-filter=network_uuid%%7C%%3D%%7C%s&var-filter=is_ephemeral_node%%7C%%3D%%7Cfalse&from=%d&to=now\n", n.UUID, startTime.UnixMilli()); err != nil {
if _, err := fmt.Fprintf(w, "\nMetrics: %s\n", DefaultMetricsLink(n.UUID, startTime)); err != nil {
return err
}

Expand Down Expand Up @@ -906,3 +906,7 @@ func GetReusableNetworkPathForOwner(owner string) (string, error) {
}
return filepath.Join(networkPath, "latest_"+owner), nil
}

func DefaultMetricsLink(uuid string, startTime time.Time) string {
return fmt.Sprintf("https://grafana-experimental.avax-dev.network/d/kBQpRdWnk/avalanche-main-dashboard?&var-filter=network_uuid%%7C%%3D%%7C%s&var-filter=is_ephemeral_node%%7C%%3D%%7Cfalse&from=%d&to=now", uuid, startTime.UnixMilli())
}
10 changes: 8 additions & 2 deletions tests/fixture/tmpnet/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,14 @@ func (n *Node) SetNetworkingConfig(bootstrapIDs []string, bootstrapIPs []string)
// Default to dynamic port allocation
n.Flags[config.StakingPortKey] = 0
}
n.Flags[config.BootstrapIDsKey] = strings.Join(bootstrapIDs, ",")
n.Flags[config.BootstrapIPsKey] = strings.Join(bootstrapIPs, ",")
if len(bootstrapIDs) == 0 {
// bootstrap-* should not be provided if bootstrapping from mainnet or testnet
delete(n.Flags, config.BootstrapIDsKey)
delete(n.Flags, config.BootstrapIPsKey)
} else {
n.Flags[config.BootstrapIDsKey] = strings.Join(bootstrapIDs, ",")
n.Flags[config.BootstrapIPsKey] = strings.Join(bootstrapIPs, ",")
}
}

// Ensures staking and signing keys are generated if not already present and
Expand Down
9 changes: 7 additions & 2 deletions tests/fixture/tmpnet/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,17 @@ const (

var ErrNotRunning = errors.New("not running")

// WaitForHealthy blocks until Node.IsHealthy returns true or an error (including context timeout) is observed.
// WaitForHealthy with the default interval.
func WaitForHealthy(ctx context.Context, node *Node) error {
return WaitForHealthyWithInterval(ctx, node, DefaultNodeTickerInterval)
}

// WaitForHealthy blocks until Node.IsHealthy returns true or an error (including context timeout) is observed.
func WaitForHealthyWithInterval(ctx context.Context, node *Node, interval time.Duration) error {
if _, ok := ctx.Deadline(); !ok {
return fmt.Errorf("unable to wait for health for node %q with a context without a deadline", node.NodeID)
}
ticker := time.NewTicker(DefaultNodeTickerInterval)
ticker := time.NewTicker(interval)
defer ticker.Stop()

for {
Expand Down

0 comments on commit 592daa7

Please sign in to comment.