Skip to content

Commit

Permalink
Merge pull request #370 from won-js/main
Browse files Browse the repository at this point in the history
Add API monitoring latency (info, validator)
  • Loading branch information
cam-schultz authored Aug 22, 2024
2 parents 4751746 + e83fad4 commit 820041e
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 5 deletions.
47 changes: 47 additions & 0 deletions peers/app_request_netrwork_metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package peers

import (
"errors"

"github.com/prometheus/client_golang/prometheus"
)

var (
ErrFailedToCreateAppRequestNetworkMetrics = errors.New("failed to create app request network metrics")
)

type AppRequestNetworkMetrics struct {
infoAPICallLatencyMS prometheus.Histogram
pChainAPICallLatencyMS prometheus.Histogram
}

func newAppRequestNetworkMetrics(registerer prometheus.Registerer) (*AppRequestNetworkMetrics, error) {
infoAPICallLatencyMS := prometheus.NewHistogram(
prometheus.HistogramOpts{
Name: "info_api_call_latency_ms",
Help: "Latency of calling info api in milliseconds",
Buckets: prometheus.ExponentialBucketsRange(100, 10000, 10),
},
)
if infoAPICallLatencyMS == nil {
return nil, ErrFailedToCreateAppRequestNetworkMetrics
}
registerer.MustRegister(infoAPICallLatencyMS)

pChainAPICallLatencyMS := prometheus.NewHistogram(
prometheus.HistogramOpts{
Name: "p_chain_api_call_latency_ms",
Help: "Latency of calling p-chain rpc in milliseconds",
Buckets: prometheus.ExponentialBucketsRange(100, 10000, 10),
},
)
if pChainAPICallLatencyMS == nil {
return nil, ErrFailedToCreateAppRequestNetworkMetrics
}
registerer.MustRegister(pChainAPICallLatencyMS)

return &AppRequestNetworkMetrics{
infoAPICallLatencyMS: infoAPICallLatencyMS,
pChainAPICallLatencyMS: pChainAPICallLatencyMS,
}, nil
}
37 changes: 33 additions & 4 deletions peers/app_request_network.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ type AppRequestNetwork struct {
logger logging.Logger
lock *sync.Mutex
validatorClient *validators.CanonicalValidatorClient
metrics *AppRequestNetworkMetrics
}

// NewNetwork creates a p2p network client for interacting with validators
Expand All @@ -53,8 +54,14 @@ func NewNetwork(
),
)

metrics, err := newAppRequestNetworkMetrics(registerer)
if err != nil {
logger.Fatal("Failed to create app request network metrics", zap.Error(err))
panic(err)
}

// Create the handler for handling inbound app responses
handler, err := NewRelayerExternalHandler(logger, registerer)
handler, err := NewRelayerExternalHandler(logger, prometheus.DefaultRegisterer)
if err != nil {
logger.Error(
"Failed to create p2p network handler",
Expand Down Expand Up @@ -98,6 +105,7 @@ func NewNetwork(
logger: logger,
lock: new(sync.Mutex),
validatorClient: validatorClient,
metrics: metrics,
}
go logger.RecoverAndPanic(func() {
testNetwork.Dispatch()
Expand All @@ -123,8 +131,10 @@ func (n *AppRequestNetwork) ConnectPeers(nodeIDs set.Set[ids.NodeID]) set.Set[id
// through connectedPeers for already tracked peers, just iterate through the full list,
// re-adding connections to already tracked peers.

startInfoAPICall := time.Now()
// Get the list of peers
peers, err := n.infoAPI.Peers(context.Background())
n.setInfoAPICallLatencyMS(float64(time.Since(startInfoAPICall).Milliseconds()))
if err != nil {
n.logger.Error(
"Failed to get peers",
Expand All @@ -147,13 +157,19 @@ func (n *AppRequestNetwork) ConnectPeers(nodeIDs set.Set[ids.NodeID]) set.Set[id

// If the Info API node is in nodeIDs, it will not be reflected in the call to info.Peers.
// In this case, we need to manually track the API node.
if apiNodeID, _, err := n.infoAPI.GetNodeID(context.Background()); err != nil {
startInfoAPICall = time.Now()
apiNodeID, _, err := n.infoAPI.GetNodeID(context.Background())
n.setInfoAPICallLatencyMS(float64(time.Since(startInfoAPICall).Milliseconds()))
if err != nil {
n.logger.Error(
"Failed to get API Node ID",
zap.Error(err),
)
} else if nodeIDs.Contains(apiNodeID) {
if apiNodeIPPort, err := n.infoAPI.GetNodeIP(context.Background()); err != nil {
startInfoAPICall = time.Now()
apiNodeIPPort, err := n.infoAPI.GetNodeIP(context.Background())
n.setInfoAPICallLatencyMS(float64(time.Since(startInfoAPICall).Milliseconds()))
if err != nil {
n.logger.Error(
"Failed to get API Node IP",
zap.Error(err),
Expand Down Expand Up @@ -186,11 +202,12 @@ func (c *ConnectedCanonicalValidators) GetValidator(nodeID ids.NodeID) (*warp.Va
// validator information
func (n *AppRequestNetwork) ConnectToCanonicalValidators(subnetID ids.ID) (*ConnectedCanonicalValidators, error) {
// Get the subnet's current canonical validator set
startPChainAPICall := time.Now()
validatorSet, totalValidatorWeight, err := n.validatorClient.GetCurrentCanonicalValidatorSet(subnetID)
n.setPChainAPICallLatencyMS(float64(time.Since(startPChainAPICall).Milliseconds()))
if err != nil {
return nil, err
}

// We make queries to node IDs, not unique validators as represented by a BLS pubkey, so we need this map to track
// responses from nodes and populate the signatureMap with the corresponding validator signature
// This maps node IDs to the index in the canonical validator set
Expand Down Expand Up @@ -241,3 +258,15 @@ func (n *AppRequestNetwork) RegisterRequestID(requestID uint32, numExpectedRespo
func (n *AppRequestNetwork) GetSubnetID(blockchainID ids.ID) (ids.ID, error) {
return n.validatorClient.GetSubnetID(context.Background(), blockchainID)
}

//
// Metrics
//

func (n *AppRequestNetwork) setInfoAPICallLatencyMS(latency float64) {
n.metrics.infoAPICallLatencyMS.Observe(latency)
}

func (n *AppRequestNetwork) setPChainAPICallLatencyMS(latency float64) {
n.metrics.pChainAPICallLatencyMS.Observe(latency)
}
2 changes: 1 addition & 1 deletion relayer/main/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ func main() {

network, err := peers.NewNetwork(
networkLogLevel,
prometheus.DefaultRegisterer,
registerer,
trackedSubnets,
&cfg,
)
Expand Down

0 comments on commit 820041e

Please sign in to comment.