From 5d9bab02fa358c3e8421ffd24ca5c6acd368632c Mon Sep 17 00:00:00 2001 From: Michael McGee Date: Wed, 27 Mar 2024 20:11:48 +0000 Subject: [PATCH] poh: add additional metrics --- src/app/fdctl/run/tiles/fd_metric.c | 3 ++ src/app/fdctl/run/tiles/fd_poh.c | 32 ++++++++++++++++++-- src/disco/metrics/fd_metrics.h | 1 + src/disco/metrics/generated/Local.mk | 2 +- src/disco/metrics/generated/fd_metrics_poh.c | 3 ++ src/disco/metrics/generated/fd_metrics_poh.h | 26 +++++++++++++++- src/disco/metrics/metrics.xml | 9 ++++++ 7 files changed, 72 insertions(+), 4 deletions(-) diff --git a/src/app/fdctl/run/tiles/fd_metric.c b/src/app/fdctl/run/tiles/fd_metric.c index dbc0794a5b..4095036f0e 100644 --- a/src/app/fdctl/run/tiles/fd_metric.c +++ b/src/app/fdctl/run/tiles/fd_metric.c @@ -257,6 +257,9 @@ prometheus_print( fd_topo_t * topo, PRINT( "\n" ); result = prometheus_print1( topo, out, out_len, "bank", FD_METRICS_BANK_TOTAL, FD_METRICS_BANK, PRINT_TILE ); if( FD_UNLIKELY( result<0 ) ) return result; + PRINT( "\n" ); + result = prometheus_print1( topo, out, out_len, "poh", FD_METRICS_POH_TOTAL, FD_METRICS_POH, PRINT_TILE ); + if( FD_UNLIKELY( result<0 ) ) return result; /* Now backfill Content-Length */ ulong printed; diff --git a/src/app/fdctl/run/tiles/fd_poh.c b/src/app/fdctl/run/tiles/fd_poh.c index f96988a7b8..7dd159fe44 100644 --- a/src/app/fdctl/run/tiles/fd_poh.c +++ b/src/app/fdctl/run/tiles/fd_poh.c @@ -310,6 +310,7 @@ #include "../../../../disco/shred/fd_stake_ci.h" #include "../../../../disco/bank/fd_bank_abi.h" #include "../../../../disco/keyguard/fd_keyload.h" +#include "../../../../disco/metrics/generated/fd_metrics_poh.h" #include "../../../../flamenco/leaders/fd_leaders.h" /* When we are becoming leader, and we think the prior leader might have @@ -463,6 +464,10 @@ typedef struct { ulong pack_out_chunk0; ulong pack_out_wmark; ulong pack_out_chunk; + + fd_histf_t begin_leader_delay[ 1 ]; + fd_histf_t first_microblock_delay[ 1 ]; + fd_histf_t slot_done_delay[ 1 ]; } fd_poh_ctx_t; /* The PoH recorder is implemented in Firedancer but for now needs to @@ -719,6 +724,9 @@ fd_ext_poh_reached_leader_slot( ulong * out_leader_slot, CALLED_FROM_RUST static void publish_became_leader( fd_poh_ctx_t * ctx, ulong slot ) { + double tick_per_ns = fd_tempo_tick_per_ns( NULL ); + fd_histf_sample( ctx->begin_leader_delay, (ulong)((double)(fd_log_wallclock()-ctx->reset_slot_start_ns)/tick_per_ns) ); + ulong leader_start_hashcnt = slot*ctx->hashcnt_per_slot; long slot_start_ns = ctx->reset_slot_start_ns + (long)((double)(leader_start_hashcnt-ctx->reset_slot_hashcnt)*ctx->hashcnt_duration_ns); @@ -808,6 +816,10 @@ no_longer_leader( fd_poh_ctx_t * ctx ) { ctx->expect_sequential_leader_slot = ctx->hashcnt/ctx->hashcnt_per_slot; ctx->current_leader_bank = NULL; ctx->next_leader_slot_hashcnt = next_leader_slot_hashcnt( ctx ); + + double tick_per_ns = fd_tempo_tick_per_ns( NULL ); + fd_histf_sample( ctx->slot_done_delay, (ulong)((double)(fd_log_wallclock()-ctx->reset_slot_start_ns)/tick_per_ns) ); + FD_COMPILER_MFENCE(); fd_ext_poh_signal_leader_change( ctx->signal_leader_change ); FD_LOG_INFO(( "no_longer_leader(next_leader_slot=%lu)", ctx->next_leader_slot_hashcnt/ctx->hashcnt_per_slot )); @@ -1045,8 +1057,8 @@ after_credit( void * _ctx, } static inline void -during_housekeeping( void * ctx ) { - (void)ctx; +during_housekeeping( void * _ctx ) { + fd_poh_ctx_t * ctx = (fd_poh_ctx_t *)_ctx; FD_COMPILER_MFENCE(); if( FD_UNLIKELY( fd_poh_waiting_lock ) ) { @@ -1060,6 +1072,10 @@ during_housekeeping( void * ctx ) { FD_VOLATILE( fd_poh_waiting_lock ) = 0UL; } FD_COMPILER_MFENCE(); + + FD_MHIST_COPY( POH_TILE, BEGIN_LEADER_DELAY_SECONDS, ctx->begin_leader_delay ); + FD_MHIST_COPY( POH_TILE, FIRST_MICROBLOCK_DELAY_SECONDS, ctx->first_microblock_delay ); + FD_MHIST_COPY( POH_TILE, SLOT_DONE_DELAY_SECONDS, ctx->slot_done_delay ); } static void @@ -1236,6 +1252,11 @@ after_frag( void * _ctx, return; } + if( FD_UNLIKELY( !ctx->microblocks_lower_bound ) ) { + double tick_per_ns = fd_tempo_tick_per_ns( NULL ); + fd_histf_sample( ctx->first_microblock_delay, (ulong)((double)(fd_log_wallclock()-ctx->reset_slot_start_ns)/tick_per_ns) ); + } + ulong target_slot = fd_disco_poh_sig_slot( *opt_sig ); ulong current_slot = ctx->hashcnt/ctx->hashcnt_per_slot; @@ -1488,6 +1509,13 @@ unprivileged_init( fd_topo_t * topo, if( FD_UNLIKELY( ctx->reset_slot_hashcnt==ULONG_MAX ) ) FD_LOG_ERR(( "PoH was not initialized by Solana Labs client" )); + fd_histf_join( fd_histf_new( ctx->begin_leader_delay, FD_MHIST_SECONDS_MIN( POH_TILE, BEGIN_LEADER_DELAY_SECONDS ), + FD_MHIST_SECONDS_MAX( POH_TILE, BEGIN_LEADER_DELAY_SECONDS ) ) ); + fd_histf_join( fd_histf_new( ctx->first_microblock_delay, FD_MHIST_SECONDS_MIN( POH_TILE, FIRST_MICROBLOCK_DELAY_SECONDS ), + FD_MHIST_SECONDS_MAX( POH_TILE, FIRST_MICROBLOCK_DELAY_SECONDS ) ) ); + fd_histf_join( fd_histf_new( ctx->slot_done_delay, FD_MHIST_SECONDS_MIN( POH_TILE, SLOT_DONE_DELAY_SECONDS ), + FD_MHIST_SECONDS_MAX( POH_TILE, SLOT_DONE_DELAY_SECONDS ) ) ); + for( ulong i=0; iin_cnt-1; i++ ) { fd_topo_link_t * link = &topo->links[ tile->in_link_id[ i ] ]; fd_topo_wksp_t * link_wksp = &topo->workspaces[ topo->objs[ link->dcache_obj_id ].wksp_id ]; diff --git a/src/disco/metrics/fd_metrics.h b/src/disco/metrics/fd_metrics.h index 18c97e4a12..7d9ec5c4bd 100644 --- a/src/disco/metrics/fd_metrics.h +++ b/src/disco/metrics/fd_metrics.h @@ -7,6 +7,7 @@ #include "generated/fd_metrics_quic.h" #include "generated/fd_metrics_pack.h" #include "generated/fd_metrics_bank.h" +#include "generated/fd_metrics_poh.h" #include "../../tango/tempo/fd_tempo.h" diff --git a/src/disco/metrics/generated/Local.mk b/src/disco/metrics/generated/Local.mk index 8aae95e18f..38ee780031 100644 --- a/src/disco/metrics/generated/Local.mk +++ b/src/disco/metrics/generated/Local.mk @@ -1,2 +1,2 @@ $(call add-hdrs,fd_metrics_all.h fd_metrics_quic.h) -$(call add-objs,fd_metrics_all fd_metrics_quic fd_metrics_pack fd_metrics_bank,fd_disco) +$(call add-objs,fd_metrics_all fd_metrics_quic fd_metrics_pack fd_metrics_bank fd_metrics_poh,fd_disco) diff --git a/src/disco/metrics/generated/fd_metrics_poh.c b/src/disco/metrics/generated/fd_metrics_poh.c index 47a21cf9c9..092f2bfed1 100644 --- a/src/disco/metrics/generated/fd_metrics_poh.c +++ b/src/disco/metrics/generated/fd_metrics_poh.c @@ -2,4 +2,7 @@ #include "fd_metrics_poh.h" const fd_metrics_meta_t FD_METRICS_POH[FD_METRICS_POH_TOTAL] = { + DECLARE_METRIC_HISTOGRAM_SECONDS( POH_TILE, BEGIN_LEADER_DELAY_SECONDS ), + DECLARE_METRIC_HISTOGRAM_SECONDS( POH_TILE, FIRST_MICROBLOCK_DELAY_SECONDS ), + DECLARE_METRIC_HISTOGRAM_SECONDS( POH_TILE, SLOT_DONE_DELAY_SECONDS ), }; diff --git a/src/disco/metrics/generated/fd_metrics_poh.h b/src/disco/metrics/generated/fd_metrics_poh.h index c405e9afb4..c4ba5ca4e6 100644 --- a/src/disco/metrics/generated/fd_metrics_poh.h +++ b/src/disco/metrics/generated/fd_metrics_poh.h @@ -2,6 +2,30 @@ #include "../fd_metrics_base.h" +#define FD_METRICS_HISTOGRAM_POH_TILE_BEGIN_LEADER_DELAY_SECONDS_OFF (174UL) +#define FD_METRICS_HISTOGRAM_POH_TILE_BEGIN_LEADER_DELAY_SECONDS_NAME "poh_tile_begin_leader_delay_seconds" +#define FD_METRICS_HISTOGRAM_POH_TILE_BEGIN_LEADER_DELAY_SECONDS_TYPE (FD_METRICS_TYPE_HISTOGRAM) +#define FD_METRICS_HISTOGRAM_POH_TILE_BEGIN_LEADER_DELAY_SECONDS_DESC "Delay between when we become leader in a slot and when we receive the bank." +#define FD_METRICS_HISTOGRAM_POH_TILE_BEGIN_LEADER_DELAY_SECONDS_MIN (1e-08) +#define FD_METRICS_HISTOGRAM_POH_TILE_BEGIN_LEADER_DELAY_SECONDS_MAX (0.01) +#define FD_METRICS_HISTOGRAM_POH_TILE_BEGIN_LEADER_DELAY_SECONDS_CVT (FD_METRICS_CONVERTER_SECONDS) -#define FD_METRICS_POH_TOTAL (0UL) +#define FD_METRICS_HISTOGRAM_POH_TILE_FIRST_MICROBLOCK_DELAY_SECONDS_OFF (191UL) +#define FD_METRICS_HISTOGRAM_POH_TILE_FIRST_MICROBLOCK_DELAY_SECONDS_NAME "poh_tile_first_microblock_delay_seconds" +#define FD_METRICS_HISTOGRAM_POH_TILE_FIRST_MICROBLOCK_DELAY_SECONDS_TYPE (FD_METRICS_TYPE_HISTOGRAM) +#define FD_METRICS_HISTOGRAM_POH_TILE_FIRST_MICROBLOCK_DELAY_SECONDS_DESC "Delay between when we become leader in a slot and when we receive the first microblock." +#define FD_METRICS_HISTOGRAM_POH_TILE_FIRST_MICROBLOCK_DELAY_SECONDS_MIN (1e-08) +#define FD_METRICS_HISTOGRAM_POH_TILE_FIRST_MICROBLOCK_DELAY_SECONDS_MAX (0.01) +#define FD_METRICS_HISTOGRAM_POH_TILE_FIRST_MICROBLOCK_DELAY_SECONDS_CVT (FD_METRICS_CONVERTER_SECONDS) + +#define FD_METRICS_HISTOGRAM_POH_TILE_SLOT_DONE_DELAY_SECONDS_OFF (208UL) +#define FD_METRICS_HISTOGRAM_POH_TILE_SLOT_DONE_DELAY_SECONDS_NAME "poh_tile_slot_done_delay_seconds" +#define FD_METRICS_HISTOGRAM_POH_TILE_SLOT_DONE_DELAY_SECONDS_TYPE (FD_METRICS_TYPE_HISTOGRAM) +#define FD_METRICS_HISTOGRAM_POH_TILE_SLOT_DONE_DELAY_SECONDS_DESC "Delay between when we become leader in a slot and when we finish the slotk." +#define FD_METRICS_HISTOGRAM_POH_TILE_SLOT_DONE_DELAY_SECONDS_MIN (0.001) +#define FD_METRICS_HISTOGRAM_POH_TILE_SLOT_DONE_DELAY_SECONDS_MAX (400.0) +#define FD_METRICS_HISTOGRAM_POH_TILE_SLOT_DONE_DELAY_SECONDS_CVT (FD_METRICS_CONVERTER_SECONDS) + + +#define FD_METRICS_POH_TOTAL (3UL) extern const fd_metrics_meta_t FD_METRICS_POH[FD_METRICS_POH_TOTAL]; diff --git a/src/disco/metrics/metrics.xml b/src/disco/metrics/metrics.xml index 931c0d09ea..45569130d9 100644 --- a/src/disco/metrics/metrics.xml +++ b/src/disco/metrics/metrics.xml @@ -222,6 +222,15 @@ metric introduced. + + Delay between when we become leader in a slot and when we receive the bank. + + + Delay between when we become leader in a slot and when we receive the first microblock. + + + Delay between when we become leader in a slot and when we finish the slot. +