Skip to content

Commit

Permalink
feat: DEVOPS-1797 implement z2 deployer monitor
Browse files Browse the repository at this point in the history
  • Loading branch information
frankmeds committed Dec 27, 2024
1 parent 30a2461 commit d98c207
Show file tree
Hide file tree
Showing 6 changed files with 223 additions and 22 deletions.
38 changes: 37 additions & 1 deletion z2/docs/deployer.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ Commands:
restore Restore a node data dir from a backup in the persistence bucket
reset Reset a network stopping all the nodes and cleaning the /data folder
restart Restart a network stopping all the nodes and starting the service again
block-number Show the network nodes block number
monitor Show the network nodes specified metrics
api Perform operation over the network API nodes
generate-private-keys Generate the node private keys. --force to replace if already existing
generate-genesis-key Generate the genesis key. --force to replace if already existing
Expand Down Expand Up @@ -636,3 +636,39 @@ Configuration file: zq2-prototestnet.yaml
```bash
z2 deployer api -o attach zq2-prototestnet.yaml
```
## Monitor the network nodes specified metrics
```bash
z2 deployer monitor --help
```
```bash
Monitor the network nodes specified metrics
Usage: z2 deployer monitor [OPTIONS] [CONFIG_FILE]
Arguments:
[CONFIG_FILE] The network deployer config file
Options:
--metric <METRIC> The metric to display. Default: block-number [possible values: block-number, consensus-info]
--select Enable nodes selection
--follow After showing the metrics, watch for changes
-v, --verbose... Increase logging verbosity
-q, --quiet... Decrease logging verbosity
-h, --help Print help
```
### Usage example
#### Monitor the nodes blocknumber
```yaml
Network name: zq2-prototestnet
Configuration file: zq2-prototestnet.yaml
```
```bash
z2 deployer monitor --metric block-number --follow zq2-prototestnet.yaml
```
34 changes: 21 additions & 13 deletions z2/src/bin/z2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use libp2p::PeerId;
use z2lib::{
chain::{self, node::NodePort},
components::Component,
deployer::ApiOperation,
deployer::{ApiOperation, Metrics},
node_spec::{Composition, NodeSpec},
plumbing, utils, validators,
};
Expand Down Expand Up @@ -102,8 +102,8 @@ enum DeployerCommands {
Reset(DeployerActionsArgs),
/// Restart a network stopping all the nodes and starting the service again
Restart(DeployerActionsArgs),
/// Show the network nodes block number
BlockNumber(DeployerBlockNumberArgs),
/// Monitor the network nodes specified metrics
Monitor(DeployerMonitorArgs),
/// Perform operation over the network API nodes
Api(DeployerApiArgs),
/// Generate the node private keys. --force to replace if already existing
Expand Down Expand Up @@ -174,15 +174,18 @@ pub struct DeployerActionsArgs {
}

#[derive(Args, Debug)]
pub struct DeployerBlockNumberArgs {
/// The network deployer config file
config_file: Option<String>,
pub struct DeployerMonitorArgs {
/// The metric to display. Default: block-number
#[clap(long)]
metric: Option<Metrics>,
/// Enable nodes selection
#[clap(long)]
select: bool,
/// After showing the block numbers, watch for changes
/// After showing the metrics, watch for changes
#[clap(long)]
follow: bool,
/// The network deployer config file
config_file: Option<String>,
}

#[derive(Args, Debug)]
Expand Down Expand Up @@ -929,17 +932,22 @@ async fn main() -> Result<()> {
})?;
Ok(())
}
DeployerCommands::BlockNumber(ref arg) => {
DeployerCommands::Monitor(ref arg) => {
let config_file: String = arg.config_file.clone().ok_or_else(|| {
anyhow::anyhow!(
"Provide a configuration file. [--config-file] mandatory argument"
)
})?;
plumbing::run_deployer_block_number(&config_file, arg.select, arg.follow)
.await
.map_err(|err| {
anyhow::anyhow!("Failed to run deployer block-number command: {}", err)
})?;
plumbing::run_deployer_monitor(
&config_file,
arg.metric.clone().unwrap_or_default(),
arg.select,
arg.follow,
)
.await
.map_err(|err| {
anyhow::anyhow!("Failed to run deployer monitor command: {}", err)
})?;
Ok(())
}
DeployerCommands::GenerateGenesisKey(ref arg) => {
Expand Down
142 changes: 141 additions & 1 deletion z2/src/chain/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,82 @@ impl Machine {

Ok(u64::from_str_radix(block_number, 16)?)
}

pub async fn get_consensus_info(&self, timeout: usize) -> Result<Value> {
let response: Value = serde_json::from_str(
&self
.get_rpc_response("admin_consensusInfo", &None, timeout, NodePort::Admin)
.await?,
)?;

let response = response
.get("result")
.ok_or_else(|| anyhow!("response has no result"))?;

Ok(response.to_owned())
}
}

#[derive(Debug, Deserialize)]
struct ConsensusInfo {
view: String,
high_qc: HighQc,
milliseconds_since_last_view_change: u64,
milliseconds_until_next_view_change: u64,
}

impl Default for ConsensusInfo {
fn default() -> Self {
Self {
view: "---".to_string(),
high_qc: HighQc::default(),
milliseconds_since_last_view_change: u64::MIN,
milliseconds_until_next_view_change: u64::MIN,
}
}
}

impl fmt::Display for ConsensusInfo {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"view: {}\ttime_since_last_view_change: {}ms\ttime_until_next_view_change: {}ms\n{} {}",
self.view,
self.milliseconds_since_last_view_change,
self.milliseconds_until_next_view_change,
"high_qc:".bold(),
self.high_qc
)
}
}

#[derive(Debug, Deserialize)]
struct HighQc {
signature: String,
cosigned: String,
view: String,
block_hash: String,
}

impl Default for HighQc {
fn default() -> Self {
Self {
signature: "---".to_string(),
cosigned: "---".to_string(),
view: "---".to_string(),
block_hash: "---".to_string(),
}
}
}

impl fmt::Display for HighQc {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"view: {}\tblock_hash: {}\tcosigned: {}\nsign: {}",
self.view, self.block_hash, self.cosigned, self.signature
)
}
}

#[derive(Clone, Debug)]
Expand Down Expand Up @@ -1016,7 +1092,7 @@ impl ChainNode {
Ok(())
}

pub async fn get_block(
pub async fn get_block_number(
&self,
multi_progress: &indicatif::MultiProgress,
follow: bool,
Expand Down Expand Up @@ -1099,6 +1175,70 @@ impl ChainNode {
Ok(())
}

pub async fn get_consensus_info(
&self,
multi_progress: &indicatif::MultiProgress,
follow: bool,
) -> Result<()> {
const BAR_SIZE: u64 = 40;
const INTERVAL_IN_SEC: u64 = 5;
const BAR_BLOCK_PER_TIME: u64 = 8;
const BAR_REFRESH_IN_MILLIS: u64 = INTERVAL_IN_SEC * 1000 / BAR_SIZE * BAR_BLOCK_PER_TIME;

let progress_bar = multi_progress.add(indicatif::ProgressBar::new(BAR_SIZE));
progress_bar.set_style(
indicatif::ProgressStyle::default_bar()
.template(&format!(
"--------------------------------------------------------\n{{spinner:.green}} {} {{bar:{}.cyan/blue}} {{msg}}",
self.name().yellow(),
BAR_SIZE
))
.unwrap()
.progress_chars("#>-"),
);

let response = self
.machine
.get_consensus_info(INTERVAL_IN_SEC as usize)
.await
.ok();

let consensus_info = response.map_or(ConsensusInfo::default(), |ci| {
serde_json::from_value(ci).expect("Failed to parse JSON")
});

let mut message = format!("{}", consensus_info);
progress_bar.set_message(message.clone());

if follow {
loop {
for i in 1..=(BAR_SIZE / BAR_BLOCK_PER_TIME) {
tokio::time::sleep(tokio::time::Duration::from_millis(BAR_REFRESH_IN_MILLIS))
.await;
progress_bar.set_position(i * BAR_BLOCK_PER_TIME);
}

let response = self
.machine
.get_consensus_info(INTERVAL_IN_SEC as usize)
.await
.ok();

let consensus_info = response.map_or(ConsensusInfo::default(), |ci| {
serde_json::from_value(ci).expect("Failed to parse JSON")
});

message = format!("{}", consensus_info);
progress_bar.set_message(message);
progress_bar.set_position(0);
}
}

progress_bar.finish_with_message(message);

Ok(())
}

pub async fn api_attach(&self, multi_progress: &MultiProgress) -> Result<()> {
let machine = &self.machine;
let progress_bar = multi_progress.add(cliclack::progress_bar(1));
Expand Down
20 changes: 18 additions & 2 deletions z2/src/deployer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -953,7 +953,19 @@ pub async fn run_api_operation(config_file: &str, operation: ApiOperation) -> Re
Ok(())
}

pub async fn run_block_number(config_file: &str, node_selection: bool, follow: bool) -> Result<()> {
#[derive(Clone, Debug, Default, ValueEnum)]
pub enum Metrics {
#[default]
BlockNumber,
ConsensusInfo,
}

pub async fn run_monitor(
config_file: &str,
metric: Metrics,
node_selection: bool,
follow: bool,
) -> Result<()> {
let config = NetworkConfig::from_file(config_file).await?;
let chain = ChainInstance::new(config).await?;
let mut chain_nodes = chain.nodes().await?;
Expand Down Expand Up @@ -987,10 +999,14 @@ pub async fn run_block_number(config_file: &str, node_selection: bool, follow: b
let multi_progress = indicatif::MultiProgress::new();

for node in target_nodes {
let metric = metric.to_owned();
let permit = semaphore.clone().acquire_owned().await?;
let mp = multi_progress.to_owned();
let future = task::spawn(async move {
let result = node.get_block(&mp, follow).await;
let result = match metric {
Metrics::BlockNumber => node.get_block_number(&mp, follow).await,
Metrics::ConsensusInfo => node.get_consensus_info(&mp, follow).await,
};
drop(permit); // Release the permit when the task is done
(node, result)
});
Expand Down
9 changes: 5 additions & 4 deletions z2/src/plumbing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use crate::{
self,
node::{NodePort, NodeRole},
},
deployer::ApiOperation,
deployer::{ApiOperation, Metrics},
kpi,
node_spec::{Composition, NodeSpec},
utils,
Expand Down Expand Up @@ -291,13 +291,14 @@ pub async fn run_deployer_restart(config_file: &str, node_selection: bool) -> Re
Ok(())
}

pub async fn run_deployer_block_number(
pub async fn run_deployer_monitor(
config_file: &str,
metric: Metrics,
node_selection: bool,
follow: bool,
) -> Result<()> {
println!("🦆 Running block-number for {config_file} .. ");
deployer::run_block_number(config_file, node_selection, follow).await?;
println!("🦆 Running monitor for {config_file} .. ");
deployer::run_monitor(config_file, metric, node_selection, follow).await?;
Ok(())
}

Expand Down
2 changes: 1 addition & 1 deletion zq2-infratest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@ roles:
- apps
- checkpoint
versions:
zq2: v0.5.1
zq2: 30a24610
otterscan: develop
spout: main

0 comments on commit d98c207

Please sign in to comment.