Skip to content

Commit

Permalink
Allow fddev to boot off of a funk checkpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
asiegel-jt committed May 16, 2024
1 parent dfa4374 commit 8b23b6b
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 20 deletions.
56 changes: 43 additions & 13 deletions src/app/fdctl/run/tiles/fd_replay.c
Original file line number Diff line number Diff line change
Expand Up @@ -354,11 +354,21 @@ read_snapshot( void * _ctx, char const * snapshotfile, char const * incremental

const char * snapshot = snapshotfile;

fd_snapshot_load(snapshot, ctx->slot_ctx, false, false, FD_SNAPSHOT_TYPE_FULL );
if ( strlen(incremental) > 0 ) {
ctx->epoch_ctx = fd_exec_epoch_ctx_join( fd_exec_epoch_ctx_new( ctx->epoch_ctx_mem, 2000000UL ) );
if ( strncmp(snapshot, "wksp:", 5) == 0 ) {
/* Already loaded the main snapshot when we initialized funk */
if ( strlen(incremental) > 0 ) {
ctx->epoch_ctx = fd_exec_epoch_ctx_join( fd_exec_epoch_ctx_new( ctx->epoch_ctx_mem, 2000000UL ) );
fd_snapshot_load(incremental, ctx->slot_ctx, false, false, FD_SNAPSHOT_TYPE_INCREMENTAL );
} else {
fd_runtime_recover_banks( ctx->slot_ctx, 0 );
}

fd_snapshot_load(incremental, ctx->slot_ctx, false, false, FD_SNAPSHOT_TYPE_INCREMENTAL );
} else {
fd_snapshot_load(snapshot, ctx->slot_ctx, false, false, FD_SNAPSHOT_TYPE_FULL );
if ( strlen(incremental) > 0 ) {
ctx->epoch_ctx = fd_exec_epoch_ctx_join( fd_exec_epoch_ctx_new( ctx->epoch_ctx_mem, 2000000UL ) );
fd_snapshot_load(incremental, ctx->slot_ctx, false, false, FD_SNAPSHOT_TYPE_INCREMENTAL );
}
}

fd_blockstore_snapshot_insert( ctx->slot_ctx->blockstore, &ctx->slot_ctx->slot_bank );
Expand Down Expand Up @@ -529,15 +539,35 @@ unprivileged_init( fd_topo_t * topo,
FD_LOG_ERR(( "no funk workspace" ));
}

fd_funk_t * funk;
void * shmem;
shmem = fd_wksp_alloc_laddr( ctx->funk_wksp, fd_funk_align(), fd_funk_footprint(), FD_FUNK_MAGIC );
if (shmem == NULL)
FD_LOG_ERR(( "failed to allocate a funky" ));
funk = fd_funk_join( fd_funk_new( shmem, FD_FUNK_MAGIC, ctx->funk_seed, tile->replay.txn_max, tile->replay.index_max ) );
if (funk == NULL) {
fd_wksp_free_laddr(shmem);
FD_LOG_ERR(( "failed to allocate a funky" ));
fd_funk_t * funk = NULL;
void * shmem = NULL;
ctx->snapshot = tile->replay.snapshot;
if ( strncmp(ctx->snapshot, "wksp:", 5) == 0 ) {
int err = fd_wksp_restore( ctx->funk_wksp, ctx->snapshot+5U, (uint)ctx->funk_seed );
if (err) {
FD_LOG_ERR(( "failed to restore %s: error %d", ctx->snapshot, err ));
}
fd_wksp_tag_query_info_t info;
ulong tag = FD_FUNK_MAGIC;
if( fd_wksp_tag_query( ctx->funk_wksp, &tag, 1, &info, 1 ) > 0 ) {
shmem = fd_wksp_laddr_fast( ctx->funk_wksp, info.gaddr_lo );
funk = fd_funk_join( shmem );
if( funk == NULL ) {
FD_LOG_ERR(( "failed to join a funky in %s", ctx->snapshot ));
}
} else {
FD_LOG_ERR(( "failed to find a funky in %s", ctx->snapshot ));
}

} else {
shmem = fd_wksp_alloc_laddr( ctx->funk_wksp, fd_funk_align(), fd_funk_footprint(), FD_FUNK_MAGIC );
if (shmem == NULL)
FD_LOG_ERR(( "failed to allocate a funky" ));
funk = fd_funk_join( fd_funk_new( shmem, FD_FUNK_MAGIC, ctx->funk_seed, tile->replay.txn_max, tile->replay.index_max ) );
if (funk == NULL) {
fd_wksp_free_laddr(shmem);
FD_LOG_ERR(( "failed to allocate a funky" ));
}
}

ctx->last_stake_weights_push_time = 0;
Expand Down
21 changes: 21 additions & 0 deletions src/app/fdctl/run/topos/fd_firedancer.c
Original file line number Diff line number Diff line change
Expand Up @@ -300,5 +300,26 @@ fd_topo_firedancer( config_t * _config ) {
}

fd_topob_finish( topo, fdctl_obj_align, fdctl_obj_footprint, fdctl_obj_loose );

const char * snapshot = config->tiles.replay.snapshot;
if ( strncmp(snapshot, "wksp:", 5) == 0 ) {
/* Make the funk workspace match the parameters used to create the
checkpoint. This is a bit nonintuitive because of the way
fd_topo_create_workspace works. */
uint seed;
ulong part_max;
ulong data_max;
int err = fd_wksp_restore_preview( snapshot+5, &seed, &part_max, &data_max );
if( err ) FD_LOG_ERR(( "unable to restore %s: error %d", snapshot, err ));
fd_topo_wksp_t * wksp = &topo->workspaces[ topo->objs[ funk_obj->id ].wksp_id ];
wksp->part_max = part_max;
wksp->known_footprint = 0;
wksp->total_footprint = data_max;
ulong page_sz = FD_SHMEM_GIGANTIC_PAGE_SZ;
wksp->page_sz = page_sz;
ulong footprint = fd_wksp_footprint( part_max, data_max );
wksp->page_cnt = footprint / page_sz;
}

config->topo = *topo;
}
4 changes: 2 additions & 2 deletions src/app/ledger/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ runtime_replay( fd_runtime_ctx_t * state, fd_runtime_args_t * args ) {

if( args->on_demand_block_ingest ) {
if( fd_blockstore_block_query( blockstore, slot ) == NULL && slot_meta.slot == slot ) {
int err = fd_rocksdb_import_block_blockstore( &rocks_db, &slot_meta, blockstore,
int err = fd_rocksdb_import_block_blockstore( &rocks_db, &slot_meta, blockstore,
args->copy_txn_status, slot == (args->trash_hash) ? trash_hash_buf : NULL );
if( FD_UNLIKELY( err ) ) {
FD_LOG_ERR(( "Failed to import block %lu", start_slot ));
Expand Down Expand Up @@ -314,7 +314,7 @@ runtime_replay( fd_runtime_ctx_t * state, fd_runtime_args_t * args ) {
if( tpool_scr_mem ) {
fd_valloc_free( state->slot_ctx->valloc, tpool_scr_mem );
}

if( args->on_demand_block_ingest ) {
fd_rocksdb_root_iter_destroy( &iter );
fd_rocksdb_destroy( &rocks_db );
Expand Down
7 changes: 3 additions & 4 deletions src/flamenco/runtime/fd_runtime.c
Original file line number Diff line number Diff line change
Expand Up @@ -2203,7 +2203,7 @@ fd_runtime_checkpt( fd_capture_ctx_t * capture_ctx,
fd_exec_slot_ctx_t * slot_ctx,
ulong slot ) {
int is_checkpt_freq = capture_ctx != NULL && slot % capture_ctx->checkpt_freq == 0;
int is_abort_slot = slot == ULONG_MAX;
int is_abort_slot = slot == ULONG_MAX;
if( !is_checkpt_freq && !is_abort_slot ) {
return;
}
Expand Down Expand Up @@ -2281,7 +2281,7 @@ fd_runtime_block_eval_tpool(fd_exec_slot_ctx_t *slot_ctx,
if( err != 0 ) {
return err;
}

fd_funk_t * funk = slot_ctx->acc_mgr->funk;

long block_eval_time = -fd_log_wallclock();
Expand Down Expand Up @@ -3600,8 +3600,7 @@ FD_SCRATCH_SCOPE_BEGIN {
fd_funk_rec_key_t id = fd_runtime_epoch_bank_key();
fd_funk_rec_t const * rec = fd_funk_rec_query_global(funk, txn, &id);
if ( rec == NULL )
__asm__("int $3");
// FD_LOG_ERR(("failed to read banks record"));
FD_LOG_ERR(("failed to read banks record"));
void * val = fd_funk_val( rec, fd_funk_wksp(funk) );
fd_bincode_decode_ctx_t ctx;
ctx.data = val;
Expand Down
8 changes: 8 additions & 0 deletions src/util/wksp/fd_wksp.h
Original file line number Diff line number Diff line change
Expand Up @@ -1014,6 +1014,14 @@ fd_wksp_restore( fd_wksp_t * wksp,
char const * path,
uint seed );

/* fd_wksp_restore_preview extracts key parameters from a checkpoint
file. These can be used with fd_funk_new for a correct restore. */
int
fd_wksp_restore_preview( char const * path,
uint * out_seed,
ulong * out_part_max,
ulong * out_data_max );

/* fd_wksp_mprotect marks all the memory in a workspace as read-only
(flag==1) or read-write (flag==0). Accessing read-only memory produces
a seg fault. */
Expand Down
60 changes: 59 additions & 1 deletion src/util/wksp/fd_wksp_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ fd_wksp_checkpt( fd_wksp_t * wksp,

ulong sz = gaddr_hi - gaddr_lo;
void * laddr_lo = fd_wksp_laddr_fast( wksp, gaddr_lo );

#if FD_HAS_DEEPASAN
/* Copy the entire wksp over. This includes regions that may have been
poisoned at one point. */
Expand Down Expand Up @@ -492,3 +492,61 @@ fd_wksp_restore( fd_wksp_t * wksp,
# undef RBUF_FOOTPRINT
# undef RBUF_ALIGN
}

int
fd_wksp_restore_preview( char const * path,
uint * out_seed,
ulong * out_part_max,
ulong * out_data_max ) {
if( FD_UNLIKELY( !path ) ) {
FD_LOG_WARNING(( "NULL path" ));
return FD_WKSP_ERR_INVAL;
}

int fd = open( path, O_RDONLY, (mode_t)0 );
if( FD_UNLIKELY( fd==-1 ) ) {
FD_LOG_WARNING(( "open(\"%s\",O_RDONLY,0) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
return FD_WKSP_ERR_FAIL;
}

# define RBUF_ALIGN (4096UL)
# define RBUF_FOOTPRINT (65536UL)

uchar rbuf[ RBUF_FOOTPRINT ] __attribute__((aligned( RBUF_ALIGN )));
fd_io_buffered_istream_t restore[1];
fd_io_buffered_istream_init( restore, fd, rbuf, RBUF_FOOTPRINT );

int err = FD_WKSP_SUCCESS;

# define RESTORE_ULONG(v) do { \
err = fd_wksp_private_restore_ulong( restore, &v ); \
if( FD_UNLIKELY( err ) ) { goto io_err; } \
} while(0)

ulong magic; RESTORE_ULONG( magic );
if( magic!=FD_WKSP_MAGIC ) { err = FD_WKSP_ERR_FAIL; goto io_err; }
ulong style_ul; RESTORE_ULONG( style_ul ); int style = (int)(uint)style_ul;

switch( style ) {
case FD_WKSP_CHECKPT_STYLE_RAW: {
ulong tseed_ul; RESTORE_ULONG( tseed_ul ); *out_seed = (uint)tseed_ul;
ulong tpart_max; RESTORE_ULONG( tpart_max ); *out_part_max = tpart_max;
ulong tdata_max; RESTORE_ULONG( tdata_max ); *out_data_max = tdata_max;
break;
} /* FD_WKSP_CHECKPT_STYLE_RAW */

default:
err = FD_WKSP_ERR_FAIL;
break;
}

io_err:
fd_io_buffered_istream_fini( restore );

if( FD_UNLIKELY( close( fd ) ) )
FD_LOG_WARNING(( "close(\"%s\") failed (%i-%s); attempting to continue", path, errno, fd_io_strerror( errno ) ));

return err;

#undef RESTORE_ULONG
}

0 comments on commit 8b23b6b

Please sign in to comment.