From bdc49619c82dd7a8d0e076cc171b452ef8955ca8 Mon Sep 17 00:00:00 2001 From: Asmir Avdicevic Date: Fri, 5 Jul 2024 14:07:09 +0200 Subject: [PATCH] refactor: metrics --- iroh-blobs/src/downloader.rs | 9 +++++ iroh-blobs/src/metrics.rs | 30 ++++++++++++++++ iroh-cli/src/commands/doctor.rs | 22 +----------- iroh-dns-server/src/metrics.rs | 4 +-- iroh-docs/src/actor.rs | 3 ++ iroh-docs/src/engine/gossip.rs | 6 ++++ iroh-docs/src/engine/live.rs | 8 +++++ iroh-docs/src/metrics.rs | 44 +++++++++++++++++++++++ iroh-gossip/src/metrics.rs | 24 +++++++++++++ iroh-gossip/src/net.rs | 14 +++++++- iroh-metrics/src/lib.rs | 22 ++++++++++++ iroh-net/src/magicsock.rs | 9 +++++ iroh-net/src/magicsock/metrics.rs | 58 +++++++++++++++++++++---------- iroh-net/src/netcheck/metrics.rs | 4 +-- iroh-net/src/relay/metrics.rs | 56 ++++++++++++++--------------- iroh/src/metrics.rs | 50 ++++++++++++++++++++++---- 16 files changed, 283 insertions(+), 80 deletions(-) diff --git a/iroh-blobs/src/downloader.rs b/iroh-blobs/src/downloader.rs index 2f8e3bc7db..587e09f85c 100644 --- a/iroh-blobs/src/downloader.rs +++ b/iroh-blobs/src/downloader.rs @@ -40,6 +40,7 @@ use std::{ use futures_lite::{future::BoxedLocal, Stream, StreamExt}; use hashlink::LinkedHashSet; use iroh_base::hash::{BlobFormat, Hash, HashAndFormat}; +use iroh_metrics::inc; use iroh_net::{endpoint, Endpoint, NodeAddr, NodeId}; use tokio::{ sync::{mpsc, oneshot}, @@ -50,6 +51,7 @@ use tracing::{debug, error_span, trace, warn, Instrument}; use crate::{ get::{db::DownloadProgress, Stats}, + metrics::Metrics, store::Store, util::progress::ProgressSender, }; @@ -566,13 +568,16 @@ impl, D: Dialer> Service { async fn run(mut self) { loop { trace!("wait for tick"); + inc!(Metrics, downloader_tick_main); tokio::select! { Some((node, conn_result)) = self.dialer.next() => { trace!(node=%node.fmt_short(), "tick: connection ready"); + inc!(Metrics, downloader_tick_connection_ready); self.on_connection_ready(node, conn_result); } maybe_msg = self.msg_rx.recv() => { trace!(msg=?maybe_msg, "tick: message received"); + inc!(Metrics, downloader_tick_message_received); match maybe_msg { Some(msg) => self.handle_message(msg).await, None => return self.shutdown().await, @@ -582,21 +587,25 @@ impl, D: Dialer> Service { match res { Ok((kind, result)) => { trace!(%kind, "tick: transfer completed"); + inc!(Metrics, downloader_tick_transfer_completed); self.on_download_completed(kind, result); } Err(err) => { warn!(?err, "transfer task panicked"); + inc!(Metrics, downloader_tick_transfer_failed); } } } Some(expired) = self.retry_nodes_queue.next() => { let node = expired.into_inner(); trace!(node=%node.fmt_short(), "tick: retry node"); + inc!(Metrics, downloader_tick_retry_node); self.on_retry_wait_elapsed(node); } Some(expired) = self.goodbye_nodes_queue.next() => { let node = expired.into_inner(); trace!(node=%node.fmt_short(), "tick: goodbye node"); + inc!(Metrics, downloader_tick_goodbye_node); self.disconnect_idle_node(node, "idle expired"); } } diff --git a/iroh-blobs/src/metrics.rs b/iroh-blobs/src/metrics.rs index cdb6d66033..d44f70cba6 100644 --- a/iroh-blobs/src/metrics.rs +++ b/iroh-blobs/src/metrics.rs @@ -14,6 +14,14 @@ pub struct Metrics { pub downloads_success: Counter, pub downloads_error: Counter, pub downloads_notfound: Counter, + + pub downloader_tick_main: Counter, + pub downloader_tick_connection_ready: Counter, + pub downloader_tick_message_received: Counter, + pub downloader_tick_transfer_completed: Counter, + pub downloader_tick_transfer_failed: Counter, + pub downloader_tick_retry_node: Counter, + pub downloader_tick_goodbye_node: Counter, } impl Default for Metrics { @@ -24,6 +32,28 @@ impl Default for Metrics { downloads_success: Counter::new("Total number of successful downloads"), downloads_error: Counter::new("Total number of downloads failed with error"), downloads_notfound: Counter::new("Total number of downloads failed with not found"), + + downloader_tick_main: Counter::new( + "Number of times the main downloader actor loop ticked", + ), + downloader_tick_connection_ready: Counter::new( + "Number of times the downloader actor ticked for a connection ready", + ), + downloader_tick_message_received: Counter::new( + "Number of times the downloader actor ticked for a message received", + ), + downloader_tick_transfer_completed: Counter::new( + "Number of times the downloader actor ticked for a transfer completed", + ), + downloader_tick_transfer_failed: Counter::new( + "Number of times the downloader actor ticked for a transfer failed", + ), + downloader_tick_retry_node: Counter::new( + "Number of times the downloader actor ticked for a retry node", + ), + downloader_tick_goodbye_node: Counter::new( + "Number of times the downloader actor ticked for a goodbye node", + ), } } } diff --git a/iroh-cli/src/commands/doctor.rs b/iroh-cli/src/commands/doctor.rs index 6f229e84ca..1d8ccd195e 100644 --- a/iroh-cli/src/commands/doctor.rs +++ b/iroh-cli/src/commands/doctor.rs @@ -1397,7 +1397,7 @@ impl PlotterApp { return; } let data = req.unwrap().text().await.unwrap(); - let metrics_response = parse_prometheus_metrics(&data); + let metrics_response = iroh_metrics::parse_prometheus_metrics(&data); if metrics_response.is_err() { return; } @@ -1423,23 +1423,3 @@ impl PlotterApp { } } } - -fn parse_prometheus_metrics(data: &str) -> anyhow::Result> { - let mut metrics = HashMap::new(); - for line in data.lines() { - if line.starts_with('#') { - continue; - } - let parts: Vec<&str> = line.split_whitespace().collect(); - if parts.len() < 2 { - continue; - } - let metric = parts[0]; - let value = parts[1].parse::(); - if value.is_err() { - continue; - } - metrics.insert(metric.to_string(), value.unwrap()); - } - Ok(metrics) -} diff --git a/iroh-dns-server/src/metrics.rs b/iroh-dns-server/src/metrics.rs index 7b07ac07a0..7ae51b39c6 100644 --- a/iroh-dns-server/src/metrics.rs +++ b/iroh-dns-server/src/metrics.rs @@ -9,7 +9,7 @@ use struct_iterable::Iterable; pub struct Metrics { pub pkarr_publish_update: Counter, pub pkarr_publish_noop: Counter, - pub pkarr_publish_error: Counter, + // pub pkarr_publish_error: Counter, pub dns_requests: Counter, pub dns_requests_udp: Counter, pub dns_requests_https: Counter, @@ -32,7 +32,7 @@ impl Default for Metrics { pkarr_publish_noop: Counter::new( "Number of pkarr relay puts that did not update the state", ), - pkarr_publish_error: Counter::new("Number of pkarr relay puts that failed"), + // pkarr_publish_error: Counter::new("Number of pkarr relay puts that failed"), dns_requests: Counter::new("DNS requests (total)"), dns_requests_udp: Counter::new("DNS requests via UDP"), dns_requests_https: Counter::new("DNS requests via HTTPS (DoH)"), diff --git a/iroh-docs/src/actor.rs b/iroh-docs/src/actor.rs index 04ff497d5f..1763e6c03b 100644 --- a/iroh-docs/src/actor.rs +++ b/iroh-docs/src/actor.rs @@ -12,11 +12,13 @@ use anyhow::{anyhow, Context, Result}; use bytes::Bytes; use futures_util::FutureExt; use iroh_base::hash::Hash; +use iroh_metrics::inc; use serde::{Deserialize, Serialize}; use tokio::{sync::oneshot, task::JoinSet}; use tracing::{debug, error, error_span, trace, warn}; use crate::{ + metrics::Metrics, ranger::Message, store::{ fs::{ContentHashesIterator, StoreInstance}, @@ -609,6 +611,7 @@ impl Actor { } }; trace!(%action, "tick"); + inc!(Metrics, actor_tick_main); match action { Action::Shutdown { reply } => { break reply; diff --git a/iroh-docs/src/engine/gossip.rs b/iroh-docs/src/engine/gossip.rs index 17077ac802..71ed8884f8 100644 --- a/iroh-docs/src/engine/gossip.rs +++ b/iroh-docs/src/engine/gossip.rs @@ -4,6 +4,7 @@ use anyhow::{Context, Result}; use futures_lite::StreamExt; use futures_util::FutureExt; use iroh_gossip::net::{Event, Gossip}; +use iroh_metrics::inc; use iroh_net::key::PublicKey; use tokio::{ sync::{broadcast, mpsc}, @@ -16,6 +17,7 @@ use tokio_stream::{ use tracing::{debug, error, trace, warn}; use crate::{actor::SyncHandle, ContentStatus, NamespaceId}; +use crate::metrics::Metrics; use super::live::{Op, ToLiveActor}; @@ -67,9 +69,11 @@ impl GossipActor { loop { i += 1; trace!(?i, "tick wait"); + inc!(Metrics, doc_gossip_tick_main); tokio::select! { next = self.gossip_events.next(), if !self.gossip_events.is_empty() => { trace!(?i, "tick: gossip_event"); + inc!(Metrics, doc_gossip_tick_event); if let Err(err) = self.on_gossip_event(next).await { error!("gossip actor died: {err:?}"); return Err(err); @@ -78,12 +82,14 @@ impl GossipActor { msg = self.inbox.recv() => { let msg = msg.context("to_actor closed")?; trace!(%msg, ?i, "tick: to_actor"); + inc!(Metrics, doc_gossip_tick_actor); if !self.on_actor_message(msg).await.context("on_actor_message")? { break; } } Some(res) = self.pending_joins.join_next(), if !self.pending_joins.is_empty() => { trace!(?i, "tick: pending_joins"); + inc!(Metrics, doc_gossip_tick_pending_join); let (namespace, res) = res.context("pending_joins closed")?; match res { Ok(stream) => { diff --git a/iroh-docs/src/engine/live.rs b/iroh-docs/src/engine/live.rs index 99404d4aba..f9420aaa2e 100644 --- a/iroh-docs/src/engine/live.rs +++ b/iroh-docs/src/engine/live.rs @@ -10,6 +10,7 @@ use iroh_blobs::get::Stats; use iroh_blobs::HashAndFormat; use iroh_blobs::{store::EntryStatus, Hash}; use iroh_gossip::{net::Gossip, proto::TopicId}; +use iroh_metrics::inc; use iroh_net::NodeId; use iroh_net::{key::PublicKey, Endpoint, NodeAddr}; use serde::{Deserialize, Serialize}; @@ -19,6 +20,7 @@ use tokio::{ }; use tracing::{debug, error, error_span, info, instrument, trace, warn, Instrument, Span}; +use crate::metrics::Metrics; use crate::{ actor::{OpenOpts, SyncHandle}, net::{ @@ -244,11 +246,13 @@ impl LiveActor { loop { i += 1; trace!(?i, "tick wait"); + inc!(Metrics, doc_live_tick_main); tokio::select! { biased; msg = self.inbox.recv() => { let msg = msg.context("to_actor closed")?; trace!(?i, %msg, "tick: to_actor"); + inc!(Metrics, doc_live_tick_actor); match msg { ToLiveActor::Shutdown { reply } => { break Ok(reply); @@ -260,6 +264,7 @@ impl LiveActor { } event = self.replica_events_rx.recv_async() => { trace!(?i, "tick: replica_event"); + inc!(Metrics, doc_live_tick_replica_event); let event = event.context("replica_events closed")?; if let Err(err) = self.on_replica_event(event).await { error!(?err, "Failed to process replica event"); @@ -267,17 +272,20 @@ impl LiveActor { } Some(res) = self.running_sync_connect.join_next(), if !self.running_sync_connect.is_empty() => { trace!(?i, "tick: running_sync_connect"); + inc!(Metrics, doc_live_tick_running_sync_connect); let (namespace, peer, reason, res) = res.context("running_sync_connect closed")?; self.on_sync_via_connect_finished(namespace, peer, reason, res).await; } Some(res) = self.running_sync_accept.join_next(), if !self.running_sync_accept.is_empty() => { trace!(?i, "tick: running_sync_accept"); + inc!(Metrics, doc_live_tick_running_sync_accept); let res = res.context("running_sync_accept closed")?; self.on_sync_via_accept_finished(res).await; } Some(res) = self.download_tasks.join_next(), if !self.download_tasks.is_empty() => { trace!(?i, "tick: pending_downloads"); + inc!(Metrics, doc_live_tick_pending_downloads); let (namespace, hash, res) = res.context("pending_downloads closed")?; self.on_download_ready(namespace, hash, res).await; diff --git a/iroh-docs/src/metrics.rs b/iroh-docs/src/metrics.rs index 90d562495c..69d7ab6733 100644 --- a/iroh-docs/src/metrics.rs +++ b/iroh-docs/src/metrics.rs @@ -17,6 +17,20 @@ pub struct Metrics { pub sync_via_connect_failure: Counter, pub sync_via_accept_success: Counter, pub sync_via_accept_failure: Counter, + + pub actor_tick_main: Counter, + + pub doc_gossip_tick_main: Counter, + pub doc_gossip_tick_event: Counter, + pub doc_gossip_tick_actor: Counter, + pub doc_gossip_tick_pending_join: Counter, + + pub doc_live_tick_main: Counter, + pub doc_live_tick_actor: Counter, + pub doc_live_tick_replica_event: Counter, + pub doc_live_tick_running_sync_connect: Counter, + pub doc_live_tick_running_sync_accept: Counter, + pub doc_live_tick_pending_downloads: Counter, } impl Default for Metrics { @@ -30,6 +44,36 @@ impl Default for Metrics { sync_via_accept_failure: Counter::new("Number of failed syncs (via accept)"), sync_via_connect_success: Counter::new("Number of successful syncs (via connect)"), sync_via_connect_failure: Counter::new("Number of failed syncs (via connect)"), + + actor_tick_main: Counter::new("Number of times the main actor loop ticked"), + + doc_gossip_tick_main: Counter::new("Number of times the gossip actor loop ticked"), + doc_gossip_tick_event: Counter::new( + "Number of times the gossip actor processed an event", + ), + doc_gossip_tick_actor: Counter::new( + "Number of times the gossip actor processed an actor event", + ), + doc_gossip_tick_pending_join: Counter::new( + "Number of times the gossip actor processed a pending join", + ), + + doc_live_tick_main: Counter::new("Number of times the live actor loop ticked"), + doc_live_tick_actor: Counter::new( + "Number of times the live actor processed an actor event", + ), + doc_live_tick_replica_event: Counter::new( + "Number of times the live actor processed a replica event", + ), + doc_live_tick_running_sync_connect: Counter::new( + "Number of times the live actor processed a running sync connect", + ), + doc_live_tick_running_sync_accept: Counter::new( + "Number of times the live actor processed a running sync accept", + ), + doc_live_tick_pending_downloads: Counter::new( + "Number of times the live actor processed a pending download", + ), } } } diff --git a/iroh-gossip/src/metrics.rs b/iroh-gossip/src/metrics.rs index ff89e1aa12..f3c445f88e 100644 --- a/iroh-gossip/src/metrics.rs +++ b/iroh-gossip/src/metrics.rs @@ -21,6 +21,14 @@ pub struct Metrics { pub neighbor_down: Counter, // pub topics_joined: Counter, // pub topics_left: Counter, + pub actor_tick_main: Counter, + pub actor_tick_rx: Counter, + pub actor_tick_endpoint: Counter, + pub actor_tick_dialer: Counter, + pub actor_tick_dialer_success: Counter, + pub actor_tick_dialer_failure: Counter, + pub actor_tick_in_event_rx: Counter, + pub actor_tick_timers: Counter, } impl Default for Metrics { @@ -38,6 +46,22 @@ impl Default for Metrics { neighbor_down: Counter::new("Number of times we disconnected from a peer"), // topics_joined: Counter::new("Number of times we joined a topic"), // topics_left: Counter::new("Number of times we left a topic"), + actor_tick_main: Counter::new("Number of times the main actor loop ticked"), + actor_tick_rx: Counter::new("Number of times the actor ticked for a message received"), + actor_tick_endpoint: Counter::new( + "Number of times the actor ticked for an endpoint event", + ), + actor_tick_dialer: Counter::new("Number of times the actor ticked for a dialer event"), + actor_tick_dialer_success: Counter::new( + "Number of times the actor ticked for a successful dialer event", + ), + actor_tick_dialer_failure: Counter::new( + "Number of times the actor ticked for a failed dialer event", + ), + actor_tick_in_event_rx: Counter::new( + "Number of times the actor ticked for an incoming event", + ), + actor_tick_timers: Counter::new("Number of times the actor ticked for a timer event"), } } } diff --git a/iroh-gossip/src/net.rs b/iroh-gossip/src/net.rs index 13d5940703..99d41b5769 100644 --- a/iroh-gossip/src/net.rs +++ b/iroh-gossip/src/net.rs @@ -4,6 +4,7 @@ use anyhow::{anyhow, Context}; use bytes::{Bytes, BytesMut}; use futures_lite::stream::Stream; use genawaiter::sync::{Co, Gen}; +use iroh_metrics::inc; use iroh_net::{ dialer::Dialer, endpoint::{get_remote_node_id, Connection}, @@ -20,7 +21,10 @@ use tokio::{ use tracing::{debug, error_span, trace, warn, Instrument}; use self::util::{read_message, write_message, Timers}; -use crate::proto::{self, PeerData, Scope, TopicId}; +use crate::{ + metrics::Metrics, + proto::{self, PeerData, Scope, TopicId}, +}; pub mod util; @@ -368,10 +372,12 @@ impl Actor { loop { i += 1; trace!(?i, "tick"); + inc!(Metrics, actor_tick_main); tokio::select! { biased; msg = self.to_actor_rx.recv() => { trace!(?i, "tick: to_actor_rx"); + inc!(Metrics, actor_tick_rx); match msg { Some(msg) => self.handle_to_actor_msg(msg, Instant::now()).await?, None => { @@ -383,6 +389,7 @@ impl Actor { new_endpoints = self.on_direct_addr_rx.recv() => { match new_endpoints { Some(endpoints) => { + inc!(Metrics, actor_tick_endpoint); let addr = NodeAddr::from_parts( self.endpoint.node_id(), self.endpoint.home_relay(), @@ -399,18 +406,22 @@ impl Actor { } (peer_id, res) = self.dialer.next_conn() => { trace!(?i, "tick: dialer"); + inc!(Metrics, actor_tick_dialer); match res { Ok(conn) => { debug!(peer = ?peer_id, "dial successful"); + inc!(Metrics, actor_tick_dialer_success); self.handle_to_actor_msg(ToActor::ConnIncoming(peer_id, ConnOrigin::Dial, conn), Instant::now()).await.context("dialer.next -> conn -> handle_to_actor_msg")?; } Err(err) => { warn!(peer = ?peer_id, "dial failed: {err}"); + inc!(Metrics, actor_tick_dialer_failure); } } } event = self.in_event_rx.recv() => { trace!(?i, "tick: in_event_rx"); + inc!(Metrics, actor_tick_in_event_rx); match event { Some(event) => { self.handle_in_event(event, Instant::now()).await.context("in_event_rx.recv -> handle_in_event")?; @@ -420,6 +431,7 @@ impl Actor { } drain = self.timers.wait_and_drain() => { trace!(?i, "tick: timers"); + inc!(Metrics, actor_tick_timers); let now = Instant::now(); for (_instant, timer) in drain { self.handle_in_event(InEvent::TimerExpired(timer), now).await.context("timers.drain_expired -> handle_in_event")?; diff --git a/iroh-metrics/src/lib.rs b/iroh-metrics/src/lib.rs index 5a0f684b3d..68dbd2acbc 100644 --- a/iroh-metrics/src/lib.rs +++ b/iroh-metrics/src/lib.rs @@ -11,6 +11,7 @@ pub mod core; mod service; use core::UsageStatsReport; +use std::collections::HashMap; /// Reexport to make matching versions easier. pub use struct_iterable; @@ -46,3 +47,24 @@ pub async fn report_usage_stats(report: &UsageStatsReport) { } } } + +/// Parse Prometheus metrics from a string. +pub fn parse_prometheus_metrics(data: &str) -> anyhow::Result> { + let mut metrics = HashMap::new(); + for line in data.lines() { + if line.starts_with('#') { + continue; + } + let parts: Vec<&str> = line.split_whitespace().collect(); + if parts.len() < 2 { + continue; + } + let metric = parts[0]; + let value = parts[1].parse::(); + if value.is_err() { + continue; + } + metrics.insert(metric.to_string(), value.unwrap()); + } + Ok(metrics) +} diff --git a/iroh-net/src/magicsock.rs b/iroh-net/src/magicsock.rs index e9fff85bca..4b2d46d1ca 100644 --- a/iroh-net/src/magicsock.rs +++ b/iroh-net/src/magicsock.rs @@ -1762,25 +1762,30 @@ impl Actor { }; loop { + inc!(Metrics, actor_tick_main); tokio::select! { Some(msg) = self.msg_receiver.recv() => { trace!(?msg, "tick: msg"); + inc!(Metrics, actor_tick_msg); if self.handle_actor_message(msg).await { return Ok(()); } } tick = self.periodic_re_stun_timer.tick() => { trace!("tick: re_stun {:?}", tick); + inc!(Metrics, actor_tick_re_stun); self.msock.re_stun("periodic"); } Ok(()) = portmap_watcher.changed() => { trace!("tick: portmap changed"); + inc!(Metrics, actor_tick_portmap_changed); let new_external_address = *portmap_watcher.borrow(); debug!("external address updated: {new_external_address:?}"); self.msock.re_stun("portmap_updated"); }, _ = endpoint_heartbeat_timer.tick() => { trace!("tick: endpoint heartbeat {} endpoints", self.msock.node_map.node_count()); + inc!(Metrics, actor_tick_endpoint_heartbeat); // TODO: this might trigger too many packets at once, pace this self.msock.node_map.prune_inactive(); @@ -1790,12 +1795,14 @@ impl Actor { _ = endpoints_update_receiver.changed() => { let reason = *endpoints_update_receiver.borrow(); trace!("tick: endpoints update receiver {:?}", reason); + inc!(Metrics, actor_tick_endpoints_update_receiver); if let Some(reason) = reason { self.update_endpoints(reason).await; } } _ = save_nodes_timer.tick(), if self.nodes_path.is_some() => { trace!("tick: nodes_timer"); + inc!(Metrics, actor_tick_nodes_timer); let path = self.nodes_path.as_ref().expect("precondition: `is_some()`"); self.msock.node_map.prune_inactive(); @@ -1806,10 +1813,12 @@ impl Actor { } Some(is_major) = link_change_r.recv() => { trace!("tick: link change {}", is_major); + inc!(Metrics, actor_link_change); self.handle_network_change(is_major).await; } else => { trace!("tick: other"); + inc!(Metrics, actor_tick_other); } } } diff --git a/iroh-net/src/magicsock/metrics.rs b/iroh-net/src/magicsock/metrics.rs index 4c48e83cd4..ff287a4596 100644 --- a/iroh-net/src/magicsock/metrics.rs +++ b/iroh-net/src/magicsock/metrics.rs @@ -7,19 +7,19 @@ use iroh_metrics::{ #[allow(missing_docs)] #[derive(Debug, Clone, Iterable)] pub struct Metrics { - pub rebind_calls: Counter, + // pub rebind_calls: Counter, pub re_stun_calls: Counter, pub update_endpoints: Counter, // Sends (data or disco) - pub send_relay_queued: Counter, - pub send_relay_error_chan: Counter, - pub send_relay_error_closed: Counter, - pub send_relay_error_queue: Counter, + // pub send_relay_queued: Counter, + // pub send_relay_error_chan: Counter, + // pub send_relay_error_closed: Counter, + // pub send_relay_error_queue: Counter, pub send_ipv4: Counter, - pub send_ipv4_error: Counter, + // pub send_ipv4_error: Counter, pub send_ipv6: Counter, - pub send_ipv6_error: Counter, + // pub send_ipv6_error: Counter, pub send_relay: Counter, pub send_relay_error: Counter, @@ -40,7 +40,7 @@ pub struct Metrics { pub sent_disco_ping: Counter, pub sent_disco_pong: Counter, pub sent_disco_call_me_maybe: Counter, - pub recv_disco_bad_peer: Counter, + // pub recv_disco_bad_peer: Counter, pub recv_disco_bad_key: Counter, pub recv_disco_bad_parse: Counter, @@ -49,7 +49,7 @@ pub struct Metrics { pub recv_disco_ping: Counter, pub recv_disco_pong: Counter, pub recv_disco_call_me_maybe: Counter, - pub recv_disco_call_me_maybe_bad_node: Counter, + // pub recv_disco_call_me_maybe_bad_node: Counter, pub recv_disco_call_me_maybe_bad_disco: Counter, // How many times our relay home node DI has changed from non-zero to a different non-zero. @@ -66,6 +66,16 @@ pub struct Metrics { pub num_relay_conns_added: Counter, /// The number of connections to peers we have removed over relay. pub num_relay_conns_removed: Counter, + + pub actor_tick_main: Counter, + pub actor_tick_msg: Counter, + pub actor_tick_re_stun: Counter, + pub actor_tick_portmap_changed: Counter, + pub actor_tick_endpoint_heartbeat: Counter, + pub actor_tick_endpoints_update_receiver: Counter, + pub actor_tick_nodes_timer: Counter, + pub actor_link_change: Counter, + pub actor_tick_other: Counter, } impl Default for Metrics { @@ -74,19 +84,19 @@ impl Default for Metrics { num_relay_conns_added: Counter::new("num_relay_conns added"), num_relay_conns_removed: Counter::new("num_relay_conns removed"), - rebind_calls: Counter::new("rebind_calls"), + // rebind_calls: Counter::new("rebind_calls"), re_stun_calls: Counter::new("restun_calls"), update_endpoints: Counter::new("update_endpoints"), // Sends (data or disco) - send_relay_queued: Counter::new("send_relay_queued"), - send_relay_error_chan: Counter::new("send_relay_error_chan"), - send_relay_error_closed: Counter::new("send_relay_error_closed"), - send_relay_error_queue: Counter::new("send_relay_error_queue"), + // send_relay_queued: Counter::new("send_relay_queued"), + // send_relay_error_chan: Counter::new("send_relay_error_chan"), + // send_relay_error_closed: Counter::new("send_relay_error_closed"), + // send_relay_error_queue: Counter::new("send_relay_error_queue"), send_ipv4: Counter::new("send_ipv4"), - send_ipv4_error: Counter::new("send_ipv4_error"), + // send_ipv4_error: Counter::new("send_ipv4_error"), send_ipv6: Counter::new("send_ipv6"), - send_ipv6_error: Counter::new("send_ipv6_error"), + // send_ipv6_error: Counter::new("send_ipv6_error"), send_relay: Counter::new("send_relay"), send_relay_error: Counter::new("send_relay_error"), @@ -106,7 +116,7 @@ impl Default for Metrics { sent_disco_ping: Counter::new("disco_sent_ping"), sent_disco_pong: Counter::new("disco_sent_pong"), sent_disco_call_me_maybe: Counter::new("disco_sent_callmemaybe"), - recv_disco_bad_peer: Counter::new("disco_recv_bad_peer"), + // recv_disco_bad_peer: Counter::new("disco_recv_bad_peer"), recv_disco_bad_key: Counter::new("disco_recv_bad_key"), recv_disco_bad_parse: Counter::new("disco_recv_bad_parse"), @@ -115,7 +125,7 @@ impl Default for Metrics { recv_disco_ping: Counter::new("disco_recv_ping"), recv_disco_pong: Counter::new("disco_recv_pong"), recv_disco_call_me_maybe: Counter::new("disco_recv_callmemaybe"), - recv_disco_call_me_maybe_bad_node: Counter::new("disco_recv_callmemaybe_bad_node"), + // recv_disco_call_me_maybe_bad_node: Counter::new("disco_recv_callmemaybe_bad_node"), recv_disco_call_me_maybe_bad_disco: Counter::new("disco_recv_callmemaybe_bad_disco"), // How many times our relay home node DI has changed from non-zero to a different non-zero. @@ -127,6 +137,18 @@ impl Default for Metrics { num_direct_conns_removed: Counter::new( "number of direct connections to a peer we have removed", ), + + actor_tick_main: Counter::new("actor_tick_main"), + actor_tick_msg: Counter::new("actor_tick_msg"), + actor_tick_re_stun: Counter::new("actor_tick_re_stun"), + actor_tick_portmap_changed: Counter::new("actor_tick_portmap_changed"), + actor_tick_endpoint_heartbeat: Counter::new("actor_tick_endpoint_heartbeat"), + actor_tick_endpoints_update_receiver: Counter::new( + "actor_tick_endpoints_update_receiver", + ), + actor_tick_nodes_timer: Counter::new("actor_tick_nodes_timer"), + actor_link_change: Counter::new("actor_link_change"), + actor_tick_other: Counter::new("actor_tick_other"), } } } diff --git a/iroh-net/src/netcheck/metrics.rs b/iroh-net/src/netcheck/metrics.rs index 49539f0035..cdc13f64c1 100644 --- a/iroh-net/src/netcheck/metrics.rs +++ b/iroh-net/src/netcheck/metrics.rs @@ -14,7 +14,7 @@ pub struct Metrics { pub stun_packets_recv_ipv6: Counter, pub reports: Counter, pub reports_full: Counter, - pub reports_error: Counter, + // pub reports_error: Counter, } impl Default for Metrics { @@ -29,7 +29,7 @@ impl Default for Metrics { stun_packets_recv_ipv6: Counter::new("Number of IPv6 STUN packets received"), reports: Counter::new("Number of reports executed by netcheck, including full reports"), reports_full: Counter::new("Number of full reports executed by netcheck"), - reports_error: Counter::new("Number of executed reports resulting in an error"), + // reports_error: Counter::new("Number of executed reports resulting in an error"), } } } diff --git a/iroh-net/src/relay/metrics.rs b/iroh-net/src/relay/metrics.rs index 923d375b4a..91ce2e0fb5 100644 --- a/iroh-net/src/relay/metrics.rs +++ b/iroh-net/src/relay/metrics.rs @@ -36,11 +36,10 @@ pub struct Metrics { /// Packets of other `FrameType`s dropped pub other_packets_dropped: Counter, - /// Number of packets we have forwarded out to another packet forwarder - pub packets_forwarded_out: Counter, - /// Number of packets we have been asked to forward - pub packets_forwarded_in: Counter, - + // /// Number of packets we have forwarded out to another packet forwarder + // pub packets_forwarded_out: Counter, + // /// Number of packets we have been asked to forward + // pub packets_forwarded_in: Counter, /// Number of `FrameType::Ping`s received pub got_ping: Counter, /// Number of `FrameType::Pong`s sent @@ -48,14 +47,13 @@ pub struct Metrics { /// Number of `FrameType::Unknown` received pub unknown_frames: Counter, - /* - * Metrics about peers - */ - /// Number of packet forwarders added - pub added_pkt_fwder: Counter, - /// Number of packet forwarders removed - pub removed_pkt_fwder: Counter, - + // /* + // * Metrics about peers + // */ + // /// Number of packet forwarders added + // pub added_pkt_fwder: Counter, + // /// Number of packet forwarders removed + // pub removed_pkt_fwder: Counter, /// Number of connections we have accepted pub accepts: Counter, /// Number of connections we have removed because of an error @@ -97,27 +95,25 @@ impl Default for Metrics { "Number of times a non-disco, non-'send; packet was dropped.", ), - packets_forwarded_out: Counter::new( - "Number of times the server has sent a forwarded packet", - ), - packets_forwarded_in: Counter::new( - "Number of times the server has received a forwarded packet.", - ), - + // packets_forwarded_out: Counter::new( + // "Number of times the server has sent a forwarded packet", + // ), + // packets_forwarded_in: Counter::new( + // "Number of times the server has received a forwarded packet.", + // ), got_ping: Counter::new("Number of times the server has received a Ping from a client."), sent_pong: Counter::new("Number of times the server has sent a Pong to a client."), unknown_frames: Counter::new("Number of unknown frames sent to this server."), - /* - * Metrics about peers - */ - added_pkt_fwder: Counter::new( - "Number of times a packeted forwarded was added to this server.", - ), - removed_pkt_fwder: Counter::new( - "Number of times a packet forwarded was removed to this server.", - ), - + // /* + // * Metrics about peers + // */ + // added_pkt_fwder: Counter::new( + // "Number of times a packeted forwarded was added to this server.", + // ), + // removed_pkt_fwder: Counter::new( + // "Number of times a packet forwarded was removed to this server.", + // ), accepts: Counter::new("Number of times this server has accepted a connection."), disconnects: Counter::new("Number of clients that have then disconnected."), diff --git a/iroh/src/metrics.rs b/iroh/src/metrics.rs index 8a4601745e..f829e82a65 100644 --- a/iroh/src/metrics.rs +++ b/iroh/src/metrics.rs @@ -11,17 +11,55 @@ use crate::rpc_protocol::node::CounterStats; #[allow(missing_docs)] #[derive(Debug, Clone, Iterable)] pub struct Metrics { - pub requests_total: Counter, - pub bytes_sent: Counter, - pub bytes_received: Counter, + // pub requests_total: Counter, + // pub bytes_sent: Counter, + // pub bytes_received: Counter, + pub doc_gossip_tick_main: Counter, + pub doc_gossip_tick_event: Counter, + pub doc_gossip_tick_actor: Counter, + pub doc_gossip_tick_pending_join: Counter, + + pub doc_live_tick_main: Counter, + pub doc_live_tick_actor: Counter, + pub doc_live_tick_replica_event: Counter, + pub doc_live_tick_running_sync_connect: Counter, + pub doc_live_tick_running_sync_accept: Counter, + pub doc_live_tick_pending_downloads: Counter, } impl Default for Metrics { fn default() -> Self { Self { - requests_total: Counter::new("Total number of requests received"), - bytes_sent: Counter::new("Number of bytes streamed"), - bytes_received: Counter::new("Number of bytes received"), + // requests_total: Counter::new("Total number of requests received"), + // bytes_sent: Counter::new("Number of bytes streamed"), + // bytes_received: Counter::new("Number of bytes received"), + doc_gossip_tick_main: Counter::new("Number of times the main gossip actor loop ticked"), + doc_gossip_tick_event: Counter::new( + "Number of times the gossip actor ticked for an event", + ), + doc_gossip_tick_actor: Counter::new( + "Number of times the gossip actor ticked for an actor message", + ), + doc_gossip_tick_pending_join: Counter::new( + "Number of times the gossip actor ticked pending join", + ), + + doc_live_tick_main: Counter::new("Number of times the main live actor loop ticked"), + doc_live_tick_actor: Counter::new( + "Number of times the live actor ticked for an actor message", + ), + doc_live_tick_replica_event: Counter::new( + "Number of times the live actor ticked for a replica event", + ), + doc_live_tick_running_sync_connect: Counter::new( + "Number of times the live actor ticked for a running sync connect", + ), + doc_live_tick_running_sync_accept: Counter::new( + "Number of times the live actor ticked for a running sync accept", + ), + doc_live_tick_pending_downloads: Counter::new( + "Number of times the live actor ticked for a pending download", + ), } } }