Skip to content

Commit

Permalink
fix(ghost): getting stuck when >100%
Browse files Browse the repository at this point in the history
  • Loading branch information
lidatong committed Jul 12, 2024
1 parent e4f9d36 commit b03a46d
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 32 deletions.
100 changes: 68 additions & 32 deletions src/choreo/ghost/fd_ghost.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,23 @@
#pragma GCC diagnostic ignored "-Wformat-extra-args"

/* clang-format off */

void *
fd_ghost_new( void * shmem, ulong node_max, ulong vote_max, ulong seed ) {

if( FD_UNLIKELY( !shmem ) ) {
FD_LOG_WARNING( ( "NULL mem" ) );
FD_LOG_WARNING(( "NULL mem" ));
return NULL;
}

if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shmem, fd_ghost_align() ) ) ) {
FD_LOG_WARNING( ( "misaligned mem" ) );
if( FD_UNLIKELY( !fd_ulong_is_aligned((ulong)shmem, fd_ghost_align() ) ) ) {
FD_LOG_WARNING(( "misaligned mem" ));
return NULL;
}

ulong footprint = fd_ghost_footprint( node_max, vote_max );
if( FD_UNLIKELY( !footprint ) ) {
FD_LOG_WARNING( ( "bad node_max (%lu) or vote_max (%lu)", node_max, vote_max ) );
FD_LOG_WARNING(( "bad node_max (%lu) or vote_max (%lu)", node_max, vote_max ));
return NULL;
}

Expand Down Expand Up @@ -53,15 +54,15 @@ fd_ghost_new( void * shmem, ulong node_max, ulong vote_max, ulong seed ) {
}

fd_ghost_t *
fd_ghost_join( void * shghost ) { /* process 1: 0xFA process 2: 0x2F */
fd_ghost_join( void * shghost ) {

if( FD_UNLIKELY( !shghost ) ) {
FD_LOG_WARNING( ( "NULL ghost" ) );
FD_LOG_WARNING(( "NULL ghost" ));
return NULL;
}

if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shghost, fd_ghost_align() ) ) ) {
FD_LOG_WARNING( ( "misaligned ghost" ) );
if( FD_UNLIKELY( !fd_ulong_is_aligned((ulong)shghost, fd_ghost_align() ) ) ) {
FD_LOG_WARNING(( "misaligned ghost" ));
return NULL;
}

Expand All @@ -88,17 +89,16 @@ fd_ghost_join( void * shghost ) { /* process 1: 0xFA process 2: 0x2F */
laddr += fd_ghost_vote_map_footprint( vote_max );

laddr = fd_ulong_align_up( laddr, fd_ghost_align() );
FD_TEST( laddr == (ulong)shghost + fd_ghost_footprint( node_max, vote_max ) );
FD_TEST( laddr == (ulong)shghost + fd_ghost_footprint( node_max, vote_max ));

return ghost;
}
/* clang-format on */

void *
fd_ghost_leave( fd_ghost_t const * ghost ) {

if( FD_UNLIKELY( !ghost ) ) {
FD_LOG_WARNING( ( "NULL ghost" ) );
FD_LOG_WARNING(( "NULL ghost" ));
return NULL;
}

Expand All @@ -109,12 +109,12 @@ void *
fd_ghost_delete( void * ghost ) {

if( FD_UNLIKELY( !ghost ) ) {
FD_LOG_WARNING( ( "NULL ghost" ) );
FD_LOG_WARNING(( "NULL ghost" ));
return NULL;
}

if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)ghost, fd_ghost_align() ) ) ) {
FD_LOG_WARNING( ( "misaligned ghost" ) );
if( FD_UNLIKELY( !fd_ulong_is_aligned((ulong)ghost, fd_ghost_align() ) ) ) {
FD_LOG_WARNING(( "misaligned ghost" ));
return NULL;
}

Expand All @@ -125,29 +125,33 @@ void
fd_ghost_init( fd_ghost_t * ghost, ulong root, ulong total_stake ) {

if( FD_UNLIKELY( !ghost ) ) {
FD_LOG_WARNING( ( "NULL ghost" ) );
FD_LOG_WARNING(( "NULL ghost" ));
return;
}

if( FD_UNLIKELY( root == FD_SLOT_NULL ) ) {
FD_LOG_WARNING( ( "NULL slot" ) );
FD_LOG_WARNING(( "NULL slot" ));
return;
}

if( FD_UNLIKELY( ghost->root ) ) {
FD_LOG_WARNING( ( "ghost already initialized" ) );
FD_LOG_WARNING(( "ghost already initialized" ));
return;
}

fd_ghost_node_t * node = fd_ghost_node_pool_ele_acquire( ghost->node_pool );
node->slot = root;
memset( node, 0, sizeof( fd_ghost_node_t ));
node->slot = root;

fd_ghost_node_map_ele_insert( ghost->node_map, node, ghost->node_pool );
ghost->root = node;
ghost->total_stake = total_stake;
ghost->root = node;
ghost->total_stake = total_stake;

return;
}

/* clang-format on */

fd_ghost_node_t *
fd_ghost_node_insert( fd_ghost_t * ghost, ulong slot, ulong parent_slot ) {

Expand All @@ -170,9 +174,8 @@ fd_ghost_node_insert( fd_ghost_t * ghost, ulong slot, ulong parent_slot ) {
#endif

fd_ghost_node_t * node = fd_ghost_node_pool_ele_acquire( ghost->node_pool );
node->slot = slot;
node->stake = 0;
node->weight = 0;
memset( node, 0, sizeof( fd_ghost_node_t ) );
node->slot = slot;

/* Insert into the map for O(1) random access. */

Expand All @@ -194,8 +197,7 @@ fd_ghost_node_insert( fd_ghost_t * ghost, ulong slot, ulong parent_slot ) {
&parent_slot,
NULL,
ghost->node_pool ) ) ) {
FD_LOG_WARNING( ( "[fd_ghost_node_insert] parent_slot %lu is missing from ghost.", parent_slot ) );
__asm__("int $3");
FD_LOG_ERR( ( "[fd_ghost_node_insert] missing parent_slot %lu.", parent_slot ) );
}
#endif

Expand Down Expand Up @@ -312,7 +314,18 @@ fd_ghost_replay_vote_upsert( fd_ghost_t * ghost,

if( FD_LIKELY( latest_vote ) ) {

/* Return early if this new vote is not newer than latest vote. */
/* Return if this new vote slot is not > than latest vote. It is
important that the vote slots are monotonically increasing,
because the order we receive blocks is non-deterministic (due to
network propagation variance), so we may process forks in a
different order from the sender of this vote.
For example, if a validator votes on A then switches to B, we
might instead process B then A. In this case, the validator's
vote account state on B would contain a strictly higher vote slot
than A (due to lockout), so we would observe while processing A,
that the vote slot < the latest vote slot we have saved for that
validator. */

if( FD_UNLIKELY( slot <= latest_vote->slot ) ) return;

Expand Down Expand Up @@ -351,8 +364,9 @@ fd_ghost_replay_vote_upsert( fd_ghost_t * ghost,
#if FD_GHOST_USE_HANDHOLDING
/* OOM: we've exceeded the max number of voter pubkeys that were
statically allocated. */
if( FD_UNLIKELY( !fd_ghost_vote_pool_free( ghost->vote_pool ) ) )
if( FD_UNLIKELY( !fd_ghost_vote_pool_free( ghost->vote_pool ) ) ) {
FD_LOG_ERR( ( "[ghost] vote_pool full. check # of validators." ) );
}
#endif

latest_vote = fd_ghost_vote_pool_ele_acquire( ghost->vote_pool );
Expand All @@ -374,6 +388,18 @@ fd_ghost_replay_vote_upsert( fd_ghost_t * ghost,
ancestor->weight += stake;
ancestor = ancestor->parent;
}

#if FD_GHOST_USE_HANDHOLDING
if( FD_UNLIKELY( node->stake > ghost->total_stake ) ) {
FD_LOG_ERR( ( "[fd_ghost_replay_vote_upsert] invariant violation. node->stake > total stake."
"slot: %lu, "
"node->stake %lu, "
"ghost->total_stake %lu",
slot,
node->stake,
ghost->total_stake ) );
}
#endif
}

void
Expand All @@ -390,11 +416,21 @@ fd_ghost_publish( fd_ghost_t * ghost, ulong slot ) {
fd_ghost_node_t * root = fd_ghost_node_query( ghost, slot );

#if FD_GHOST_USE_HANDHOLDING
if( FD_UNLIKELY( !root ) ) FD_LOG_ERR( ( "[fd_ghost_publish] slot %lu not found in ghost", slot ) );
#endif
if( FD_UNLIKELY( !root ) ) {
FD_LOG_ERR( ( "[fd_ghost_publish] publish slot %lu not found in ghost", slot ) );
}

if( FD_UNLIKELY( root < ghost->root ) ) {
FD_LOG_ERR( ( "[fd_ghost_publish] trying to publish slot %lu older than ghost->root %lu.",
slot,
ghost->root ) );
}

# if FD_GHOST_USE_HANDHOLDING
if( FD_UNLIKELY( root == ghost->root ) ) __asm__("int $3");
if( FD_UNLIKELY( root == ghost->root ) ) {
FD_LOG_ERR( ( "[fd_ghost_publish] publishing same slot %lu as ghost->root %lu.",
slot,
ghost->root ) );
}
#endif

/* First, remove the previous root, and add it to the prune list.
Expand Down Expand Up @@ -467,7 +503,7 @@ fd_ghost_is_ancestor( fd_ghost_t const * ghost, ulong ancestor_slot, ulong slot
#if FD_GHOST_USE_HANDHOLDING
if( FD_UNLIKELY( !ancestor ) ) {

/* slot not found in ghost. this can happen if ghost has pruned to a
/* Slot not found in ghost. This can happen if ghost has pruned to a
new root but forks has not yet been pruned to that same root. */

FD_LOG_WARNING( ( "[fd_ghost_is_ancestor] unable to find slot %lu in ghost. ghost root: %lu",
Expand Down
8 changes: 8 additions & 0 deletions src/choreo/ghost/fd_ghost.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ typedef struct fd_ghost fd_ghost_t;

FD_PROTOTYPES_BEGIN

/* Constructors */

/* fd_ghost_{align,footprint} return the required alignment and
footprint of a memory region suitable for use as ghost with up to
node_max nodes and vote_max votes. */
Expand Down Expand Up @@ -185,6 +187,8 @@ fd_ghost_delete( void * ghost );
void
fd_ghost_init( fd_ghost_t * ghost, ulong root, ulong total_stake );

/* Accessors */

/* fd_ghost_head_query returns ghost's head. Assumes caller has called
fd_ghost_init and that the ghost is non-empty, ie. has a root. */

Expand Down Expand Up @@ -215,6 +219,8 @@ fd_ghost_node_query( fd_ghost_t * ghost, ulong slot );
fd_ghost_node_t const *
fd_ghost_node_query_const( fd_ghost_t const * ghost, ulong slot );

/* Operations */

/* fd_ghost_replay_vote_upsert inserts a replay vote into ghost.
The stake associated with pubkey is added to the ancestry chain
Expand Down Expand Up @@ -252,6 +258,8 @@ fd_ghost_gossip_vote_upsert( fd_ghost_t * ghost,
fd_ghost_node_t *
fd_ghost_publish( fd_ghost_t * ghost, ulong slot );

/* Utilties */

/* fd_ghost_is_ancestor checks if ancestor_slot is in fact an ancestor
of slot. Returns 1 if true, 0 otherwise. Assumes slot is present in
ghost (does not assume the same for ancestor_slot but warns when
Expand Down
9 changes: 9 additions & 0 deletions src/choreo/tower/fd_tower.h
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,15 @@ fd_tower_is_max_lockout( fd_tower_t const * tower ) {
return fd_tower_votes_cnt( tower->votes ) == FD_TOWER_VOTE_MAX;
}

/* fd_tower_is_in_sync returns 1 if our local view of our tower is in
sync with the cluster view of our tower, 0 otherwise. It checks if
our latest vote account state in fork matches our local tower. Warns
if the cluster tower is more recent than ours (this indicates we
restarted). */

int
fd_tower_is_in_sync( fd_tower_t const * tower, fd_fork_t * fork );

/* fd_tower_publish publishes the tower. Returns the new root. Assumes
caller has already checked that tower has reached max lockout (see
fd_tower_is_max_lockout). */
Expand Down

0 comments on commit b03a46d

Please sign in to comment.