Skip to content

Commit

Permalink
poh: add additional metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
mmcgee-jump committed Mar 27, 2024
1 parent 5f03c6c commit 5d9bab0
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 4 deletions.
3 changes: 3 additions & 0 deletions src/app/fdctl/run/tiles/fd_metric.c
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,9 @@ prometheus_print( fd_topo_t * topo,
PRINT( "\n" );
result = prometheus_print1( topo, out, out_len, "bank", FD_METRICS_BANK_TOTAL, FD_METRICS_BANK, PRINT_TILE );
if( FD_UNLIKELY( result<0 ) ) return result;
PRINT( "\n" );
result = prometheus_print1( topo, out, out_len, "poh", FD_METRICS_POH_TOTAL, FD_METRICS_POH, PRINT_TILE );
if( FD_UNLIKELY( result<0 ) ) return result;

/* Now backfill Content-Length */
ulong printed;
Expand Down
32 changes: 30 additions & 2 deletions src/app/fdctl/run/tiles/fd_poh.c
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@
#include "../../../../disco/shred/fd_stake_ci.h"
#include "../../../../disco/bank/fd_bank_abi.h"
#include "../../../../disco/keyguard/fd_keyload.h"
#include "../../../../disco/metrics/generated/fd_metrics_poh.h"
#include "../../../../flamenco/leaders/fd_leaders.h"

/* When we are becoming leader, and we think the prior leader might have
Expand Down Expand Up @@ -463,6 +464,10 @@ typedef struct {
ulong pack_out_chunk0;
ulong pack_out_wmark;
ulong pack_out_chunk;

fd_histf_t begin_leader_delay[ 1 ];
fd_histf_t first_microblock_delay[ 1 ];
fd_histf_t slot_done_delay[ 1 ];
} fd_poh_ctx_t;

/* The PoH recorder is implemented in Firedancer but for now needs to
Expand Down Expand Up @@ -719,6 +724,9 @@ fd_ext_poh_reached_leader_slot( ulong * out_leader_slot,
CALLED_FROM_RUST static void
publish_became_leader( fd_poh_ctx_t * ctx,
ulong slot ) {
double tick_per_ns = fd_tempo_tick_per_ns( NULL );
fd_histf_sample( ctx->begin_leader_delay, (ulong)((double)(fd_log_wallclock()-ctx->reset_slot_start_ns)/tick_per_ns) );

ulong leader_start_hashcnt = slot*ctx->hashcnt_per_slot;
long slot_start_ns = ctx->reset_slot_start_ns + (long)((double)(leader_start_hashcnt-ctx->reset_slot_hashcnt)*ctx->hashcnt_duration_ns);

Expand Down Expand Up @@ -808,6 +816,10 @@ no_longer_leader( fd_poh_ctx_t * ctx ) {
ctx->expect_sequential_leader_slot = ctx->hashcnt/ctx->hashcnt_per_slot;
ctx->current_leader_bank = NULL;
ctx->next_leader_slot_hashcnt = next_leader_slot_hashcnt( ctx );

double tick_per_ns = fd_tempo_tick_per_ns( NULL );
fd_histf_sample( ctx->slot_done_delay, (ulong)((double)(fd_log_wallclock()-ctx->reset_slot_start_ns)/tick_per_ns) );

FD_COMPILER_MFENCE();
fd_ext_poh_signal_leader_change( ctx->signal_leader_change );
FD_LOG_INFO(( "no_longer_leader(next_leader_slot=%lu)", ctx->next_leader_slot_hashcnt/ctx->hashcnt_per_slot ));
Expand Down Expand Up @@ -1045,8 +1057,8 @@ after_credit( void * _ctx,
}

static inline void
during_housekeeping( void * ctx ) {
(void)ctx;
during_housekeeping( void * _ctx ) {
fd_poh_ctx_t * ctx = (fd_poh_ctx_t *)_ctx;

FD_COMPILER_MFENCE();
if( FD_UNLIKELY( fd_poh_waiting_lock ) ) {
Expand All @@ -1060,6 +1072,10 @@ during_housekeeping( void * ctx ) {
FD_VOLATILE( fd_poh_waiting_lock ) = 0UL;
}
FD_COMPILER_MFENCE();

FD_MHIST_COPY( POH_TILE, BEGIN_LEADER_DELAY_SECONDS, ctx->begin_leader_delay );
FD_MHIST_COPY( POH_TILE, FIRST_MICROBLOCK_DELAY_SECONDS, ctx->first_microblock_delay );
FD_MHIST_COPY( POH_TILE, SLOT_DONE_DELAY_SECONDS, ctx->slot_done_delay );
}

static void
Expand Down Expand Up @@ -1236,6 +1252,11 @@ after_frag( void * _ctx,
return;
}

if( FD_UNLIKELY( !ctx->microblocks_lower_bound ) ) {
double tick_per_ns = fd_tempo_tick_per_ns( NULL );
fd_histf_sample( ctx->first_microblock_delay, (ulong)((double)(fd_log_wallclock()-ctx->reset_slot_start_ns)/tick_per_ns) );
}

ulong target_slot = fd_disco_poh_sig_slot( *opt_sig );

ulong current_slot = ctx->hashcnt/ctx->hashcnt_per_slot;
Expand Down Expand Up @@ -1488,6 +1509,13 @@ unprivileged_init( fd_topo_t * topo,

if( FD_UNLIKELY( ctx->reset_slot_hashcnt==ULONG_MAX ) ) FD_LOG_ERR(( "PoH was not initialized by Solana Labs client" ));

fd_histf_join( fd_histf_new( ctx->begin_leader_delay, FD_MHIST_SECONDS_MIN( POH_TILE, BEGIN_LEADER_DELAY_SECONDS ),
FD_MHIST_SECONDS_MAX( POH_TILE, BEGIN_LEADER_DELAY_SECONDS ) ) );
fd_histf_join( fd_histf_new( ctx->first_microblock_delay, FD_MHIST_SECONDS_MIN( POH_TILE, FIRST_MICROBLOCK_DELAY_SECONDS ),
FD_MHIST_SECONDS_MAX( POH_TILE, FIRST_MICROBLOCK_DELAY_SECONDS ) ) );
fd_histf_join( fd_histf_new( ctx->slot_done_delay, FD_MHIST_SECONDS_MIN( POH_TILE, SLOT_DONE_DELAY_SECONDS ),
FD_MHIST_SECONDS_MAX( POH_TILE, SLOT_DONE_DELAY_SECONDS ) ) );

for( ulong i=0; i<tile->in_cnt-1; i++ ) {
fd_topo_link_t * link = &topo->links[ tile->in_link_id[ i ] ];
fd_topo_wksp_t * link_wksp = &topo->workspaces[ topo->objs[ link->dcache_obj_id ].wksp_id ];
Expand Down
1 change: 1 addition & 0 deletions src/disco/metrics/fd_metrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "generated/fd_metrics_quic.h"
#include "generated/fd_metrics_pack.h"
#include "generated/fd_metrics_bank.h"
#include "generated/fd_metrics_poh.h"

#include "../../tango/tempo/fd_tempo.h"

Expand Down
2 changes: 1 addition & 1 deletion src/disco/metrics/generated/Local.mk
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
$(call add-hdrs,fd_metrics_all.h fd_metrics_quic.h)
$(call add-objs,fd_metrics_all fd_metrics_quic fd_metrics_pack fd_metrics_bank,fd_disco)
$(call add-objs,fd_metrics_all fd_metrics_quic fd_metrics_pack fd_metrics_bank fd_metrics_poh,fd_disco)
3 changes: 3 additions & 0 deletions src/disco/metrics/generated/fd_metrics_poh.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,7 @@
#include "fd_metrics_poh.h"

const fd_metrics_meta_t FD_METRICS_POH[FD_METRICS_POH_TOTAL] = {
DECLARE_METRIC_HISTOGRAM_SECONDS( POH_TILE, BEGIN_LEADER_DELAY_SECONDS ),
DECLARE_METRIC_HISTOGRAM_SECONDS( POH_TILE, FIRST_MICROBLOCK_DELAY_SECONDS ),
DECLARE_METRIC_HISTOGRAM_SECONDS( POH_TILE, SLOT_DONE_DELAY_SECONDS ),
};
26 changes: 25 additions & 1 deletion src/disco/metrics/generated/fd_metrics_poh.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,30 @@

#include "../fd_metrics_base.h"

#define FD_METRICS_HISTOGRAM_POH_TILE_BEGIN_LEADER_DELAY_SECONDS_OFF (174UL)
#define FD_METRICS_HISTOGRAM_POH_TILE_BEGIN_LEADER_DELAY_SECONDS_NAME "poh_tile_begin_leader_delay_seconds"
#define FD_METRICS_HISTOGRAM_POH_TILE_BEGIN_LEADER_DELAY_SECONDS_TYPE (FD_METRICS_TYPE_HISTOGRAM)
#define FD_METRICS_HISTOGRAM_POH_TILE_BEGIN_LEADER_DELAY_SECONDS_DESC "Delay between when we become leader in a slot and when we receive the bank."
#define FD_METRICS_HISTOGRAM_POH_TILE_BEGIN_LEADER_DELAY_SECONDS_MIN (1e-08)
#define FD_METRICS_HISTOGRAM_POH_TILE_BEGIN_LEADER_DELAY_SECONDS_MAX (0.01)
#define FD_METRICS_HISTOGRAM_POH_TILE_BEGIN_LEADER_DELAY_SECONDS_CVT (FD_METRICS_CONVERTER_SECONDS)

#define FD_METRICS_POH_TOTAL (0UL)
#define FD_METRICS_HISTOGRAM_POH_TILE_FIRST_MICROBLOCK_DELAY_SECONDS_OFF (191UL)
#define FD_METRICS_HISTOGRAM_POH_TILE_FIRST_MICROBLOCK_DELAY_SECONDS_NAME "poh_tile_first_microblock_delay_seconds"
#define FD_METRICS_HISTOGRAM_POH_TILE_FIRST_MICROBLOCK_DELAY_SECONDS_TYPE (FD_METRICS_TYPE_HISTOGRAM)
#define FD_METRICS_HISTOGRAM_POH_TILE_FIRST_MICROBLOCK_DELAY_SECONDS_DESC "Delay between when we become leader in a slot and when we receive the first microblock."
#define FD_METRICS_HISTOGRAM_POH_TILE_FIRST_MICROBLOCK_DELAY_SECONDS_MIN (1e-08)
#define FD_METRICS_HISTOGRAM_POH_TILE_FIRST_MICROBLOCK_DELAY_SECONDS_MAX (0.01)
#define FD_METRICS_HISTOGRAM_POH_TILE_FIRST_MICROBLOCK_DELAY_SECONDS_CVT (FD_METRICS_CONVERTER_SECONDS)

#define FD_METRICS_HISTOGRAM_POH_TILE_SLOT_DONE_DELAY_SECONDS_OFF (208UL)
#define FD_METRICS_HISTOGRAM_POH_TILE_SLOT_DONE_DELAY_SECONDS_NAME "poh_tile_slot_done_delay_seconds"
#define FD_METRICS_HISTOGRAM_POH_TILE_SLOT_DONE_DELAY_SECONDS_TYPE (FD_METRICS_TYPE_HISTOGRAM)
#define FD_METRICS_HISTOGRAM_POH_TILE_SLOT_DONE_DELAY_SECONDS_DESC "Delay between when we become leader in a slot and when we finish the slotk."
#define FD_METRICS_HISTOGRAM_POH_TILE_SLOT_DONE_DELAY_SECONDS_MIN (0.001)
#define FD_METRICS_HISTOGRAM_POH_TILE_SLOT_DONE_DELAY_SECONDS_MAX (400.0)
#define FD_METRICS_HISTOGRAM_POH_TILE_SLOT_DONE_DELAY_SECONDS_CVT (FD_METRICS_CONVERTER_SECONDS)


#define FD_METRICS_POH_TOTAL (3UL)
extern const fd_metrics_meta_t FD_METRICS_POH[FD_METRICS_POH_TOTAL];
9 changes: 9 additions & 0 deletions src/disco/metrics/metrics.xml
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,15 @@ metric introduced.
</group>

<group name="PohTile" tile="poh">
<histogram name="BeginLeaderDelaySeconds" min="0.00000001" max="0.01" converter="seconds">
<summary>Delay between when we become leader in a slot and when we receive the bank.</summary>
</histogram>
<histogram name="FirstMicroblockDelaySeconds" min="0.00000001" max="0.01" converter="seconds">
<summary>Delay between when we become leader in a slot and when we receive the first microblock.</summary>
</histogram>
<histogram name="SlotDoneDelaySeconds" min="0.001" max="400" converter="seconds">
<summary>Delay between when we become leader in a slot and when we finish the slot.</summary>
</histogram>
</group>

<group name="Quic" tile="quic">
Expand Down

0 comments on commit 5d9bab0

Please sign in to comment.