Skip to content

Commit

Permalink
pack: change model for pacing CUs
Browse files Browse the repository at this point in the history
  • Loading branch information
ptaffet-jump committed Dec 10, 2024
1 parent 950fe38 commit 3ddc933
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 41 deletions.
16 changes: 8 additions & 8 deletions src/app/fdctl/run/tiles/fd_pack.c
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,6 @@ typedef struct {
int poll_cursor; /* in [0, bank_cnt), the next bank to poll */
int use_consumed_cus;
long skip_cnt;
long schedule_next; /* the tick value at which to schedule the next block for pacing purposes */
ulong * bank_current[ FD_PACK_PACK_MAX_OUT ];
ulong bank_expect[ FD_PACK_PACK_MAX_OUT ];
/* bank_ready_at[x] means don't check bank x until tickcount is at
Expand Down Expand Up @@ -360,7 +359,8 @@ after_credit( fd_pack_ctx_t * ctx,

long now = fd_tickcount();

if( FD_UNLIKELY( now<ctx->schedule_next ) ) return;
int pacing_bank_cnt = (int)fd_pack_pacing_enabled_bank_cnt( ctx->pacer, now );
if( FD_UNLIKELY( !pacing_bank_cnt ) ) return;

ulong bank_cnt = ctx->bank_cnt;

Expand Down Expand Up @@ -478,8 +478,8 @@ after_credit( fd_pack_ctx_t * ctx,
*charge_busy = 1;

/* Try to schedule the next microblock. Do we have any idle bank
tiles? */
if( FD_LIKELY( ctx->bank_idle_bitset ) ) { /* Optimize for schedule */
tiles in the first `pacing_bank_cnt`? */
if( FD_LIKELY( ctx->bank_idle_bitset & fd_ulong_mask_lsb( pacing_bank_cnt ) ) ) { /* Optimize for schedule */
any_ready = 1;

int i = fd_ulong_find_lsb( ctx->bank_idle_bitset );
Expand Down Expand Up @@ -518,7 +518,7 @@ after_credit( fd_pack_ctx_t * ctx,

ctx->bank_idle_bitset = fd_ulong_pop_lsb( ctx->bank_idle_bitset );
ctx->skip_cnt = (long)schedule_cnt * fd_long_if( ctx->use_consumed_cus, (long)bank_cnt/2L, 1L );
ctx->schedule_next = fd_pack_pacing_next( ctx->pacer, fd_pack_current_block_cost( ctx->pack ), now2 );
fd_pack_pacing_update_consumed_cus( ctx->pacer, fd_pack_current_block_cost( ctx->pack ), now2 );
}
}

Expand Down Expand Up @@ -607,7 +607,7 @@ during_frag( fd_pack_ctx_t * ctx,
long end_ticks = now_ticks + (long)((double)fd_long_max( became_leader->slot_end_ns - now_ns, 1L )*ctx->ticks_per_ns);
/* We may still get overrun, but then we'll never use this and just
reinitialize it the next time when we actually become leader. */
fd_pack_pacing_init( ctx->pacer, now_ticks, end_ticks, ctx->slot_max_cost );
fd_pack_pacing_init( ctx->pacer, now_ticks, end_ticks, (float)ctx->ticks_per_ns, ctx->slot_max_cost );

FD_LOG_INFO(( "pack_became_leader(slot=%lu,ends_at=%ld)", ctx->leader_slot, became_leader->slot_end_ns ));

Expand Down Expand Up @@ -733,7 +733,7 @@ after_frag( fd_pack_ctx_t * ctx,

ctx->slot_end_ns = ctx->_slot_end_ns;
fd_pack_set_block_limits( ctx->pack, ctx->slot_max_microblocks, ctx->slot_max_data );
ctx->schedule_next = fd_pack_pacing_next( ctx->pacer, fd_pack_current_block_cost( ctx->pack ), now );
fd_pack_pacing_update_consumed_cus( ctx->pacer, fd_pack_current_block_cost( ctx->pack ), now );
break;
}
case IN_KIND_BUNDLE: {
Expand All @@ -746,7 +746,7 @@ after_frag( fd_pack_ctx_t * ctx,

fd_pack_rebate_cus( ctx->pack, ctx->pending_rebate, ctx->pending_rebate_cnt );
ctx->pending_rebate_cnt = 0UL;
ctx->schedule_next = fd_pack_pacing_next( ctx->pacer, fd_pack_current_block_cost( ctx->pack ), now );
fd_pack_pacing_update_consumed_cus( ctx->pacer, fd_pack_current_block_cost( ctx->pack ), now );
break;
}
case IN_KIND_RESOLV: {
Expand Down
99 changes: 66 additions & 33 deletions src/ballet/pack/fd_pack_pacing.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@ struct fd_pack_pacing_private {
/* Number of CUs in the block */
ulong max_cus;

ulong home_stretch_cutoff; /* in CUs, where the slope switches */
float raw_slope; /* in ticks per CU */
float offset; /* in ticks */
float ticks_per_cu;
float remaining_cus;
};

typedef struct fd_pack_pacing_private fd_pack_pacing_t;
Expand All @@ -33,46 +32,80 @@ static inline void
fd_pack_pacing_init( fd_pack_pacing_t * pacer,
long t_start,
long t_end,
float ticks_per_ns,
ulong max_cus ) {
/* The exact style of pacing needs to be the subject of quantitative
experimentation, so for now we're just doing something that passes
the gut check. We'll pace for 90% of the CUs through the first 75%
of the block time, and then the last 10% through the last 25% of the
block time. This gives us pretty good tolerance against transactions
taking longer to execute than we expect (the extreme of which being
transactions that fail to land). */

pacer->t_start = t_start;
pacer->t_end = t_end;
pacer->t_end = t_end - (long)((t_start-t_end)/50L); /* try to finish 98% of the way through */
pacer->max_cus = max_cus;

pacer->raw_slope = (float)(t_end - t_start)/(float)max_cus;
pacer->offset = 1.5f * (float)(t_end - t_start); /* the math works out to be 1.5x */
pacer->home_stretch_cutoff = (max_cus*9UL + 4UL)/10UL;
/* Time per CU depends on the hardware, the transaction mix, what
fraction of the transactions land, etc. It's hard to just come up
with a value, but a small sample says 8 ns/CU is in the right
ballpark. */
pacer->ticks_per_cu = 8.0f * ticks_per_ns;
pacer->remaining_cus = (float)max_cus;
}

/* fd_pack_pacing_next returns the time (in fd_tickcount() space) at
which the next attempt should be made to schedule transactions.
/* fd_pack_pacing_update_consumed_cus notes that the instantaneous value
of consumed CUs may have updated. pacer must be a local join.
consumed_cus should be below the value of max_cus but it's treated as
max_cus if it's larger. Now should be the time (in fd_tickcount
space) at which the measurement was taken. */
static inline void
fd_pack_pacing_update_consumed_cus( fd_pack_pacing_t * pacer,
ulong consumed_cus,
long now ) {
/* Keep this function separate so in the future we can learn the
ticks_per_cu rate. */
(void)now;
/* It's possible (but unlikely) that consumed_cus can be greater than
max_cus, so clamp the value at 0 */
pacer->remaining_cus = (float)(fd_ulong_max( pacer->max_cus, consumed_cus ) - consumed_cus);
}

The returned value will typically be between t_start and t_end, but
may be slightly out of range due to rounding or if consumed_cus is
larger than the max cu value provided in fd_pack_pacing_init.
consumed_cus need not increase monotonically between calls.

now should be the time at which the consumed_cus was measured. It's
not used right now, but is provided to allow for more sophisticated
implementations in the future.
/* fd_pack_pacing_enabled_bank_cnt computes how many banks should be
active at time `now` (in fd_tickcount space) given the most recent
value specified for consumed CUs. The returned value may be 0, which
indicates that no banks should be active at the moment. It may also
be higher than the number of available banks, which should be
interpreted as all banks being enabled. */
FD_FN_PURE static inline ulong
fd_pack_pacing_enabled_bank_cnt( fd_pack_pacing_t const * pacer,
long now ) {
/* We want to use as few banks as possible to fill the block in 400
milliseconds. That way we pass up the best transaction because it
conflicts with something actively running as infrequently as
possible. To do that, we draw lines through in the time-CU plane
that pass through (400 milliseconds, 48M CUs) with slope k*(single
bank speed), where k varies between 1 and the number of bank tiles
configured. This splits the plane into several regions, and the
region we are in tells us how many bank tiles to use.
fd_pack_pacing_init must have been called prior to the first call of
fd_pack_pacing_next. */
static inline long
fd_pack_pacing_next( fd_pack_pacing_t * pacer,
ulong consumed_cus,
long now ) {
(void)now;
int non_home_stretch = consumed_cus < pacer->home_stretch_cutoff;
return pacer->t_start + (long)( (float)consumed_cus * pacer->raw_slope * fd_float_if( non_home_stretch, 0.75f/0.9f, 0.25f/0.1f )
- fd_float_if( non_home_stretch, 0.0f, pacer->offset ));
48M - / /|
| / / /
| / // |
U | / / / /
s | 0 banks active / / / |
e | / / / /
d | / e / / |
| / k v / / /
C | / n i / / |
U | / a t / / /
s | / B c / / |
| / 1 a / 2 Banks / /
| / / active / ... |
0 |--------------------------------------------------------
0 ms 400ms
*/
/* We want to be pretty careful with the math here. We want to make
sure we never divide by 0, so clamp the denominator at 1. The
numerator is non-negative. Ticks_per_cu is between 1 and 100, so
it'll always fit in a ulong. */
return (ulong)(pacer->remaining_cus/
(float)(fd_long_max( 1L, pacer->t_end - now )) * pacer->ticks_per_cu );
}

#endif /* HEADER_fd_src_ballet_pack_fd_pack_pacing_h */

0 comments on commit 3ddc933

Please sign in to comment.