Skip to content

Commit

Permalink
Merge branch 'lr/variable-stake-table' into lr/double-quorum
Browse files Browse the repository at this point in the history
  • Loading branch information
lukaszrzasik committed Dec 5, 2024
2 parents 23d000d + 356a0e2 commit 89bce25
Show file tree
Hide file tree
Showing 9 changed files with 265 additions and 99 deletions.
44 changes: 16 additions & 28 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ derive_more = { version = "1.0" }
digest = "0.10"
either = "1.13"
espresso-systems-common = { git = "https://github.com/espressosystems/espresso-systems-common", tag = "0.4.1" }
primitive-types = { version = "0.13.1", default-features = false, features = [
primitive-types = { version = "0.12.2", default-features = false, features = [
"serde",
] }
futures = { version = "0.3", default-features = false }
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ RUST_LOG=$ERROR_LOG_LEVEL RUST_LOG_FORMAT=$ERROR_LOG_FORMAT just run_test test_s

## Careful

To double check for UB:
To double-check for UB:

```bash
nix develop .#correctnessShell
Expand Down
4 changes: 2 additions & 2 deletions crates/examples/push-cdn/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ sleep 1m
just example_fixed_leader multi-validator-push-cdn -- 9 http://127.0.0.1:4444
```

Where ones using `example_gpuvid_leader` could be the leader and should be running on a nvidia GPU, and other validators using `example_fixed_leader` will never be a leader. In practice, these url should be changed to the corresponding ip and port.
Where ones using `example_gpuvid_leader` could be the leader and should be running on an nvidia GPU, and other validators using `example_fixed_leader` will never be a leader. In practice, these url should be changed to the corresponding ip and port.


If you don't have a gpu but want to test out fixed leader, you can run:
Expand All @@ -65,4 +65,4 @@ sleep 1m
just example_fixed_leader multi-validator-push-cdn -- 9 http://127.0.0.1:4444
```

Remember, you have to run leaders first, then other validators, so that leaders will have lower index.
Remember, you have to run leaders first, then other validators, so that leaders will have lower index.
2 changes: 1 addition & 1 deletion crates/hotshot-stake-table/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,6 @@ pub trait ToFields<F: Field> {
/// convert a U256 to a field element.
pub(crate) fn u256_to_field<F: PrimeField>(v: &U256) -> F {
let mut bytes = vec![0u8; 32];
v.write_as_little_endian(&mut bytes);
v.to_little_endian(&mut bytes);
F::from_le_bytes_mod_order(&bytes)
}
17 changes: 12 additions & 5 deletions crates/hotshot/src/tasks/task_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,17 @@ use std::{
use async_trait::async_trait;
use chrono::Utc;
use hotshot_task_impls::{
builder::BuilderClient, consensus::ConsensusTaskState, da::DaTaskState,
quorum_proposal::QuorumProposalTaskState, quorum_proposal_recv::QuorumProposalRecvTaskState,
quorum_vote::QuorumVoteTaskState, request::NetworkRequestState, rewind::RewindTaskState,
transactions::TransactionTaskState, upgrade::UpgradeTaskState, vid::VidTaskState,
builder::BuilderClient,
consensus::ConsensusTaskState,
da::DaTaskState,
quorum_proposal::QuorumProposalTaskState,
quorum_proposal_recv::QuorumProposalRecvTaskState,
quorum_vote::{drb_computations::DrbComputations, QuorumVoteTaskState},
request::NetworkRequestState,
rewind::RewindTaskState,
transactions::TransactionTaskState,
upgrade::UpgradeTaskState,
vid::VidTaskState,
view_sync::ViewSyncTaskState,
};
use hotshot_types::{
Expand Down Expand Up @@ -235,7 +242,7 @@ impl<TYPES: NodeType, I: NodeImplementation<TYPES>, V: Versions> CreateTaskState
vote_dependencies: BTreeMap::new(),
network: Arc::clone(&handle.hotshot.network),
membership: (*handle.hotshot.memberships).clone().into(),
drb_computations: BTreeMap::new(),
drb_computations: DrbComputations::new(),
output_event_stream: handle.hotshot.external_event_stream.0.clone(),
id: handle.hotshot.id,
storage: Arc::clone(&handle.storage),
Expand Down
126 changes: 126 additions & 0 deletions crates/task-impls/src/quorum_vote/drb_computations.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
use std::collections::{btree_map, BTreeMap};

use hotshot_types::{
drb::{compute_drb_result, DrbResult, DrbSeedInput},
traits::node_implementation::{ConsensusTime, NodeType},
};
use tokio::{spawn, task::JoinHandle};

/// Number of previous results and seeds to keep
pub const KEEP_PREVIOUS_RESULT_COUNT: u64 = 8;

/// Helper struct to track state of DRB computations
pub struct DrbComputations<TYPES: NodeType> {
/// Stored results from computations
results: BTreeMap<TYPES::Epoch, DrbResult>,

/// Currently live computation
task: Option<(TYPES::Epoch, JoinHandle<DrbResult>)>,

/// Stored inputs to computations
seeds: BTreeMap<TYPES::Epoch, DrbSeedInput>,
}

impl<TYPES: NodeType> DrbComputations<TYPES> {
#[must_use]
/// Create a new DrbComputations
pub fn new() -> Self {
Self {
results: BTreeMap::new(),
task: None,
seeds: BTreeMap::new(),
}
}

/// If a task is currently live AND has finished, join it and save the result.
/// If the epoch for the calculation was the same as the provided epoch, return true
/// If a task is currently live and NOT finished, abort it UNLESS the task epoch is the same as
/// cur_epoch, in which case keep letting it run and return true.
/// Return false if a task should be spawned for the given epoch.
async fn join_or_abort_old_task(&mut self, epoch: TYPES::Epoch) -> bool {
if let Some((task_epoch, join_handle)) = &mut self.task {
if join_handle.is_finished() {
match join_handle.await {
Ok(result) => {
self.results.insert(*task_epoch, result);
let result = *task_epoch == epoch;
self.task = None;
result
}
Err(e) => {
tracing::error!("error joining DRB computation task: {e:?}");
false
}
}
} else if *task_epoch == epoch {
true
} else {
join_handle.abort();
self.task = None;
false
}
} else {
false
}
}

/// Stores a seed for a particular epoch for later use by start_task_if_not_running, called from handle_quorum_proposal_validated_drb_calculation_start
pub fn store_seed(&mut self, epoch: TYPES::Epoch, drb_seed_input: DrbSeedInput) {
self.seeds.insert(epoch, drb_seed_input);
}

/// Starts a new task. Cancels a current task if that task is not for the provided epoch. Allows a task to continue
/// running if it was already started for the given epoch. Avoids running the task if we already have a result for
/// the epoch.
pub async fn start_task_if_not_running(&mut self, epoch: TYPES::Epoch) {
// If join_or_abort_task returns true, then we either just completed a task for this epoch, or we currently
// have a running task for the epoch.
if self.join_or_abort_old_task(epoch).await {
return;
}

// In case we somehow ended up processing this epoch already, don't start it again
if self.results.contains_key(&epoch) {
return;
}

if let btree_map::Entry::Occupied(entry) = self.seeds.entry(epoch) {
let drb_seed_input = *entry.get();
let new_drb_task = spawn(async move { compute_drb_result::<TYPES>(drb_seed_input) });
self.task = Some((epoch, new_drb_task));
entry.remove();
}
}

/// Retrieves the result for a given epoch
pub fn get_result(&self, epoch: TYPES::Epoch) -> Option<DrbResult> {
self.results.get(&epoch).copied()
}

/// Retrieves the seed for a given epoch
pub fn get_seed(&self, epoch: TYPES::Epoch) -> Option<DrbSeedInput> {
self.seeds.get(&epoch).copied()
}

/// Garbage collects internal data structures
pub fn garbage_collect(&mut self, epoch: TYPES::Epoch) {
if epoch.u64() < KEEP_PREVIOUS_RESULT_COUNT {
return;
}

let retain_epoch = epoch - KEEP_PREVIOUS_RESULT_COUNT;
// N.B. x.split_off(y) returns the part of the map where key >= y

// Remove result entries older than EPOCH
self.results = self.results.split_off(&retain_epoch);

// Remove result entries older than EPOCH+1
self.seeds = self.seeds.split_off(&(retain_epoch + 1));
}
}

impl<TYPES: NodeType> Default for DrbComputations<TYPES> {
fn default() -> Self {
Self::new()
}
}
Loading

0 comments on commit 89bce25

Please sign in to comment.