Skip to content

Commit

Permalink
[Tech Debt] Allow nodes to rejoin by saving index and config (#2168)
Browse files Browse the repository at this point in the history
* allow nodes to rejoin by saving config file and index

* refactor

* improvements and comments

* Merge in main

* PR improvements
  • Loading branch information
rob-maron authored Dec 9, 2023
1 parent 81722f1 commit 2d46f02
Show file tree
Hide file tree
Showing 13 changed files with 308 additions and 121 deletions.
42 changes: 32 additions & 10 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion crates/hotshot/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -127,4 +127,4 @@ blake3 = { workspace = true }
clap = { version = "4.4", features = ["derive", "env"] }
serde_json = "1.0.108"
toml = { workspace = true }
hotshot-testing = { path = "../testing" }
hotshot-testing = { path = "../testing" }
1 change: 1 addition & 0 deletions crates/hotshot/examples/combined/all.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ async fn main() {
>(ValidatorArgs {
url: orchestrator_url,
public_ip: Some(IpAddr::V4(Ipv4Addr::LOCALHOST)),
network_config_file: None,
})
.await
});
Expand Down
25 changes: 4 additions & 21 deletions crates/hotshot/examples/combined/multi-validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@ use async_compatibility_layer::{
logging::{setup_backtrace, setup_logging},
};
use clap::Parser;
use hotshot_orchestrator::client::ValidatorArgs;
use hotshot_orchestrator::client::{MultiValidatorArgs, ValidatorArgs};
use hotshot_testing::state_types::TestTypes;
use std::net::IpAddr;
use surf_disco::Url;
use tracing::instrument;
use types::VIDNetwork;

Expand All @@ -17,17 +15,6 @@ pub mod types;
#[path = "../infra/mod.rs"]
pub mod infra;

#[derive(Parser, Debug, Clone)]
struct MultiValidatorArgs {
/// Number of validators to run
pub num_nodes: u16,
/// The address the orchestrator runs on
pub url: Url,
/// This node's public IP address, for libp2p
/// If no IP address is passed in, it will default to 127.0.0.1
pub public_ip: Option<IpAddr>,
}

#[cfg_attr(
async_executor_impl = "tokio",
tokio::main(flavor = "multi_thread", worker_threads = 2)
Expand All @@ -40,8 +27,8 @@ async fn main() {
let args = MultiValidatorArgs::parse();
tracing::error!("connecting to orchestrator at {:?}", args.url);
let mut nodes = Vec::new();
for _ in 0..args.num_nodes {
let url = args.url.clone();
for node_index in 0..args.num_nodes {
let args = args.clone();

let node = async_spawn(async move {
infra::main_entry_point::<
Expand All @@ -52,11 +39,7 @@ async fn main() {
VIDNetwork,
NodeImpl,
ThisRun,
>(ValidatorArgs {
url,

public_ip: args.public_ip,
})
>(ValidatorArgs::from_multi_args(args, node_index))
.await
});
nodes.push(node);
Expand Down
46 changes: 21 additions & 25 deletions crates/hotshot/examples/infra/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use hotshot::{
types::{SignatureKey, SystemContextHandle},
HotShotType, Memberships, Networks, SystemContext,
};
use hotshot_orchestrator::config::NetworkConfigSource;
use hotshot_orchestrator::{
self,
client::{OrchestratorClient, ValidatorArgs},
Expand Down Expand Up @@ -825,34 +826,27 @@ pub async fn main_entry_point<

error!("Starting validator");

let orchestrator_client: OrchestratorClient =
OrchestratorClient::connect_to_orchestrator(args.clone()).await;

// Identify with the orchestrator
// see what our public identity will be
let public_ip = match args.public_ip {
Some(ip) => ip,
None => local_ip_address::local_ip().unwrap(),
};
error!(
"Identifying with orchestrator using IP address {}",
public_ip.to_string()
);
let node_index: u16 = orchestrator_client
.identify_with_orchestrator(public_ip.to_string())
.await;
error!("Finished identifying; our node index is {node_index}");
error!("Getting config from orchestrator");

let mut run_config = orchestrator_client
.get_config_from_orchestrator::<TYPES>(node_index)
.await;

run_config.node_index = node_index.into();

let orchestrator_client: OrchestratorClient =
OrchestratorClient::new(args.clone(), public_ip.to_string()).await;

// conditionally save/load config from file or orchestrator
let (mut run_config, source) =
NetworkConfig::from_file_or_orchestrator(&orchestrator_client, args.network_config_file)
.await;

let node_index = run_config.node_index;
error!("Retrieved config; our node index is {node_index}");

run_config.config.my_own_validator_config =
ValidatorConfig::<<TYPES as NodeType>::SignatureKey>::generated_from_seed_indexed(
run_config.seed,
node_index.into(),
node_index,
1,
);
//run_config.libp2p_config.as_mut().unwrap().public_ip = args.public_ip.unwrap();
Expand Down Expand Up @@ -892,12 +886,14 @@ pub async fn main_entry_point<
}
}

error!("Waiting for start command from orchestrator");
orchestrator_client
.wait_for_all_nodes_ready(run_config.clone().node_index)
.await;
if let NetworkConfigSource::Orchestrator = source {
error!("Waiting for the start command from orchestrator");
orchestrator_client
.wait_for_all_nodes_ready(run_config.clone().node_index)
.await;
}

error!("All nodes are ready! Starting HotShot");
error!("Starting HotShot");
run.run_hotshot(
hotshot,
&mut transactions,
Expand Down
1 change: 1 addition & 0 deletions crates/hotshot/examples/libp2p/all.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ async fn main() {
>(ValidatorArgs {
url: orchestrator_url,
public_ip: Some(IpAddr::V4(Ipv4Addr::LOCALHOST)),
network_config_file: None,
})
.await
});
Expand Down
24 changes: 4 additions & 20 deletions crates/hotshot/examples/libp2p/multi-validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@ use async_compatibility_layer::{
logging::{setup_backtrace, setup_logging},
};
use clap::Parser;
use hotshot_orchestrator::client::ValidatorArgs;
use hotshot_orchestrator::client::{MultiValidatorArgs, ValidatorArgs};
use hotshot_testing::state_types::TestTypes;
use std::net::IpAddr;
use surf_disco::Url;
use tracing::instrument;
use types::VIDNetwork;

Expand All @@ -17,17 +15,6 @@ pub mod types;
#[path = "../infra/mod.rs"]
pub mod infra;

#[derive(Parser, Debug, Clone)]
struct MultiValidatorArgs {
/// Number of validators to run
pub num_nodes: u16,
/// The address the orchestrator runs on
pub url: Url,
/// This node's public IP address, for libp2p
/// If no IP address is passed in, it will default to 127.0.0.1
pub public_ip: Option<IpAddr>,
}

#[cfg_attr(
async_executor_impl = "tokio",
tokio::main(flavor = "multi_thread", worker_threads = 2)
Expand All @@ -40,8 +27,8 @@ async fn main() {
let args = MultiValidatorArgs::parse();
tracing::error!("connecting to orchestrator at {:?}", args.url);
let mut nodes = Vec::new();
for _ in 0..args.num_nodes {
let url = args.url.clone();
for node_index in 0..args.num_nodes {
let args = args.clone();

let node = async_spawn(async move {
infra::main_entry_point::<
Expand All @@ -52,10 +39,7 @@ async fn main() {
VIDNetwork,
NodeImpl,
ThisRun,
>(ValidatorArgs {
url,
public_ip: args.public_ip,
})
>(ValidatorArgs::from_multi_args(args, node_index))
.await
});
nodes.push(node);
Expand Down
1 change: 1 addition & 0 deletions crates/hotshot/examples/webserver/all.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ async fn main() {
>(ValidatorArgs {
url: orchestrator_url,
public_ip: Some(IpAddr::V4(Ipv4Addr::LOCALHOST)),
network_config_file: None,
})
.await
});
Expand Down
25 changes: 5 additions & 20 deletions crates/hotshot/examples/webserver/multi-validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@ use async_compatibility_layer::{
logging::{setup_backtrace, setup_logging},
};
use clap::Parser;
use hotshot_orchestrator::client::ValidatorArgs;
use hotshot_orchestrator::client::{MultiValidatorArgs, ValidatorArgs};
use hotshot_testing::state_types::TestTypes;
use std::net::IpAddr;
use surf_disco::Url;
use tracing::instrument;
use types::VIDNetwork;

Expand All @@ -17,17 +15,6 @@ pub mod types;
#[path = "../infra/mod.rs"]
pub mod infra;

#[derive(Parser, Debug, Clone)]
struct MultiValidatorArgs {
/// Number of validators to run
pub num_nodes: u16,
/// The address the orchestrator runs on
pub url: Url,
/// This node's public IP address, for libp2p
/// If no IP address is passed in, it will default to 127.0.0.1
pub public_ip: Option<IpAddr>,
}

#[cfg_attr(
async_executor_impl = "tokio",
tokio::main(flavor = "multi_thread", worker_threads = 2)
Expand All @@ -40,8 +27,9 @@ async fn main() {
let args = MultiValidatorArgs::parse();
tracing::error!("connecting to orchestrator at {:?}", args.url);
let mut nodes = Vec::new();
for _ in 0..args.num_nodes {
let url = args.url.clone();
for node_index in 0..args.num_nodes {
let args = args.clone();

let node = async_spawn(async move {
infra::main_entry_point::<
TestTypes,
Expand All @@ -51,10 +39,7 @@ async fn main() {
VIDNetwork,
NodeImpl,
ThisRun,
>(ValidatorArgs {
url,
public_ip: args.public_ip,
})
>(ValidatorArgs::from_multi_args(args, node_index))
.await
});
nodes.push(node);
Expand Down
1 change: 1 addition & 0 deletions crates/orchestrator/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ serde = { workspace = true }
serde_json = "1.0.96"
snafu = { workspace = true }
toml = { workspace = true }
thiserror = "1.0.50"

[target.'cfg(all(async_executor_impl = "tokio"))'.dependencies]
tokio = { workspace = true }
Expand Down
Loading

0 comments on commit 2d46f02

Please sign in to comment.