From 32b892cdbbd42cb0e3ad071ce900d984a980eaa4 Mon Sep 17 00:00:00 2001 From: Richard Watts Date: Mon, 2 Dec 2024 20:47:45 +0000 Subject: [PATCH 1/2] (feat) Make z2 work again by providing a bootstrap addres --- z2/src/node_spec.rs | 10 ++++----- z2/src/setup.rs | 53 ++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 55 insertions(+), 8 deletions(-) diff --git a/z2/src/node_spec.rs b/z2/src/node_spec.rs index 7db852608..88ac4e01e 100644 --- a/z2/src/node_spec.rs +++ b/z2/src/node_spec.rs @@ -1,5 +1,5 @@ use std::{ - collections::{HashMap, HashSet}, + collections::{BTreeMap, HashSet}, default::Default, fmt, }; @@ -15,7 +15,7 @@ pub struct NodeDesc { #[derive(Clone, Debug, Serialize, Deserialize, Default)] pub struct Composition { - pub nodes: HashMap, + pub nodes: BTreeMap, } #[derive(Clone, Debug, Serialize, Deserialize, Default)] @@ -63,7 +63,7 @@ fn indices_from_string(input: &str) -> Result> { impl Composition { pub fn parse(from: &str) -> Result { let mut components = from.split('/'); - let mut nodes = HashMap::new(); + let mut nodes = BTreeMap::new(); if let Some(val) = components.next() { for v in indices_from_string(val)? { nodes.insert(v, NodeDesc { is_validator: true }); @@ -73,13 +73,13 @@ impl Composition { } pub fn single_node(is_validator: bool) -> Self { - let mut nodes = HashMap::new(); + let mut nodes = BTreeMap::new(); nodes.insert(0, NodeDesc { is_validator }); Self { nodes } } pub fn small_network() -> Self { - let mut nodes = HashMap::new(); + let mut nodes = BTreeMap::new(); for i in 0..4 { nodes.insert(i, NodeDesc { is_validator: true }); } diff --git a/z2/src/setup.rs b/z2/src/setup.rs index d22c8fd87..dd866744e 100644 --- a/z2/src/setup.rs +++ b/z2/src/setup.rs @@ -10,6 +10,7 @@ use alloy::{ }; use anyhow::{anyhow, Context, Result}; use k256::ecdsa::SigningKey; +use libp2p::{Multiaddr, PeerId}; use serde::{Deserialize, Serialize}; use serde_yaml; use tera::Tera; @@ -296,6 +297,10 @@ impl Setup { self.config.base_port + 2002 } + pub fn get_p2p_port(&self, index: u16) -> u16 { + index + 401 + self.config.base_port + } + pub fn get_explorer_url(&self) -> String { format!("http://localhost:{0}", self.get_otterscan_port()) } @@ -308,8 +313,31 @@ impl Setup { format!("0.0.0.0:{0}", self.get_docs_port()) } + pub fn get_p2p_multiaddr(&self, index: u16) -> Multiaddr { + // unwrap() is safe because this is a constant string - it's a bug in the program if + // it fails to parse. + format!("/ip4/127.0.0.1/tcp/{0}", self.get_p2p_port(index)) + .parse() + .unwrap() + } + + pub fn get_peer_id(&self, index: u16) -> Result { + let node_data = self + .config + .node_data + .get(&u64::from(index)) + .ok_or(anyhow!("Cannot find node data for node {index}"))?; + let node_key = SecretKey::from_hex(&node_data.secret_key)?; + let libp2p_keypair = node_key.to_libp2p_keypair(); + Ok(PeerId::from_public_key(&libp2p_keypair.public())) + } + pub fn get_port_map(&self) -> String { let mut result = String::new(); + result.push_str(&format!( + "🦏 p2p ports are at {0}+\n", + self.get_p2p_port(0) + )); result.push_str(&format!( "🦏 JSON-RPC ports are at {0}+\n", self.get_json_rpc_port(0, false) @@ -495,13 +523,33 @@ impl Setup { self.config.shape.nodes.len(), &self.config_dir ); + + let bootstrap_address = + if let Some((first_index, _)) = self.config.shape.nodes.iter().next() { + let idx = u16::try_from(*first_index)?; + println!( + "Bootstrap_address has idx {idx} with node_data {0:?}", + self.config.node_data.get(&u64::from(idx)) + ); + + Some((self.get_peer_id(idx)?, self.get_p2p_multiaddr(idx))) + } else { + None + }; + for (node_index, _node_desc) in self.config.shape.nodes.iter() { println!("🎱 Generating configuration for node {node_index}..."); + let node_index_u16 = u16::try_from(*node_index)?; let mut cfg = zilliqa::cfg::Config { otlp_collector_endpoint: Some("http://localhost:4317".to_string()), - bootstrap_address: None, + bootstrap_address: bootstrap_address.clone(), nodes: Vec::new(), - p2p_port: 0, + p2p_port: self.get_p2p_port(node_index_u16), + // No external address is needed right now, because tcp connections can be + // called back, so can act as an autonat server. Effectively, this means the + // bootstrap_address specifies the external address. If this is ever not the + // case, we will need an external_address to be specified here. + // - rrw 2024-12-02 external_address: None, }; // @todo should pass this in! @@ -584,7 +632,6 @@ impl Setup { cfg.nodes = Vec::new(); cfg.nodes.push(node_config); - cfg.p2p_port = 0; // Now write the config. let config_path = self.get_config_path(*node_index)?; println!("🪅 Writing configuration file for node {0} .. ", node_index); From 374ce51ad4887dd01e91fbd243727576047148cc Mon Sep 17 00:00:00 2001 From: Richard Watts Date: Tue, 3 Dec 2024 15:17:47 +0000 Subject: [PATCH 2/2] (feat) Rebootstrap if you end up with no nodes and no active connections, unles you are the bootstrap, in which case wait for someone to contact you. --- z2/src/setup.rs | 14 ++++++++------ zilliqa/src/p2p_node.rs | 27 ++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/z2/src/setup.rs b/z2/src/setup.rs index dd866744e..e9336eacd 100644 --- a/z2/src/setup.rs +++ b/z2/src/setup.rs @@ -545,12 +545,14 @@ impl Setup { bootstrap_address: bootstrap_address.clone(), nodes: Vec::new(), p2p_port: self.get_p2p_port(node_index_u16), - // No external address is needed right now, because tcp connections can be - // called back, so can act as an autonat server. Effectively, this means the - // bootstrap_address specifies the external address. If this is ever not the - // case, we will need an external_address to be specified here. - // - rrw 2024-12-02 - external_address: None, + // libp2p's autonat will attempt to infer an external address by having + // the called peer call back. The caller attempts to facilitate this by + // careful choice of outgoing port. + // Sometimes this isn't possible, external address discovery fails, and in + // z2's case, the network cannot form. Specify the external address so that + // we never need to ask (autonat will still fail, but kademlia will be happy + // and the network will operate) + external_address: Some(self.get_p2p_multiaddr(node_index_u16)), }; // @todo should pass this in! let port = self.get_json_rpc_port(*node_index as u16, false); diff --git a/zilliqa/src/p2p_node.rs b/zilliqa/src/p2p_node.rs index 3e3d5e127..c37bac0d4 100644 --- a/zilliqa/src/p2p_node.rs +++ b/zilliqa/src/p2p_node.rs @@ -26,6 +26,7 @@ use tokio::{ signal::{self, unix::SignalKind}, sync::mpsc::{self, error::SendError, UnboundedSender}, task::JoinSet, + time::{self, Instant}, }; use tokio_stream::wrappers::UnboundedReceiverStream; use tracing::*; @@ -239,6 +240,9 @@ impl P2pNode { } } + let sleep = time::sleep(Duration::from_millis(5)); + tokio::pin!(sleep); + let mut terminate = signal::unix::signal(SignalKind::terminate())?; loop { @@ -294,7 +298,6 @@ impl P2pNode { } } } - SwarmEvent::Behaviour(BehaviourEvent::RequestResponse(request_response::Event::Message { message, peer: _source })) => { match message { request_response::Message::Request { request, channel: _channel, request_id: _request_id, .. } => { @@ -434,6 +437,28 @@ impl P2pNode { break; } } + () = &mut sleep => { + let net_info = self.swarm.network_info(); + trace!("p2p_node tick {0} / {1} ", net_info.num_peers(), net_info.connection_counters().num_connections() ); + if net_info.num_peers() == 0 && net_info.connection_counters().num_connections() == 0 { + // We have no peers and no connections. Try bootstrapping.. + if let Some((peer, address)) = &self.config.bootstrap_address { + if self.swarm.local_peer_id() == peer { + debug!("p2p_node: can't bootstrap against myself"); + } else { + debug!("p2p_node: no peers and no connections - bootstrapping!"); + self.swarm + .behaviour_mut() + .kademlia + .add_address(peer, address.clone()); + self.swarm.behaviour_mut().kademlia.bootstrap()?; + } + } else { + debug!("p2p_node: no peers and no connections, but no bootstrap either! We may be stuck"); + } + } + sleep.as_mut().reset(Instant::now() + Duration::from_millis(10000)); + }, _ = terminate.recv() => { self.shard_threads.shutdown().await; break;