From a5e92833f7a7569575241e31ca67f9ae64e092c9 Mon Sep 17 00:00:00 2001 From: Divma <26765164+divagant-martian@users.noreply.github.com> Date: Thu, 21 Nov 2024 15:04:58 -0500 Subject: [PATCH] feat(iroh-base, iroh-net-report)!: intro net-report as a crate (#2921) ## Description Introduces net-report as its own crate. For this, introduces a new feature flag `relay` in `iroh-base` for the relay topology types. - in `iroh-base`, the `RelayUrl` is moved to its own file. It can still be used as before, this is not a breaking change. - `RelayMap`, `RelayNode` are moved to `iroh-base` under the `relay` feature flag. - `RelayMode` is moved to `endpoint`. See the NOTES section about this. - net-report now has metrics as a feature flag. I'm not sure how is that this was not feature flagged before but still worked. - ping is moved without changes from `iroh-net` to `iroh-net-report`. This is the only place where it's used so it should have probably been part of `netcheck` as a module from the beginig. - Adds missing license files ## Breaking Changes - `iroh-net`'s `NetcheckMetrics` are now called `NetReportMetrics` ## Notes & open questions - `RelayMode` remains in `iroh-net` instead of moving to `iroh-base` with other relay related because this is coupled with the staging and prod configuration of relay urls. Since this is more configuration than concept I think it's best to keep it in iroh-net. - The crate is called `iroh-net-report` because -at least currently- it depends on relay concepts. For example, the tests depend heavily on the internal order of the `RelayMap`. Can we change this? yes. Should we change this? we can discuss it. Is this PR the place for any of that? No. - The api needs some love. Some types that are supposed to be internal are used by dependents (in this case `iroh-net`) It's not terrible but it could be improved. ## Change checklist - [x] Self-review. - [x] Documentation updates following the [style guide](https://rust-lang.github.io/rfcs/1574-more-api-documentation-conventions.html#appendix-a-full-conventions-text), if relevant. - [ ] ~Tests if relevant.~ - [ ] ~All breaking changes documented.~ --- .github/workflows/ci.yml | 2 +- .github/workflows/tests.yaml | 2 +- Cargo.lock | 32 +++ Cargo.toml | 1 + iroh-base/Cargo.toml | 3 +- iroh-base/src/lib.rs | 5 + iroh-base/src/node_addr.rs | 122 +------- {iroh-net => iroh-base}/src/relay_map.rs | 33 +-- iroh-base/src/relay_url.rs | 121 ++++++++ iroh-cli/Cargo.toml | 1 + iroh-cli/src/commands/doctor.rs | 3 +- iroh-net-report/Cargo.toml | 54 ++++ iroh-net-report/LICENSE-BSD3 | 30 ++ iroh-net-report/README.md | 71 +++++ iroh-net-report/src/defaults.rs | 43 +++ iroh-net-report/src/dns.rs | 261 ++++++++++++++++++ .../netcheck.rs => iroh-net-report/src/lib.rs | 98 ++++--- .../src}/metrics.rs | 8 +- {iroh-net => iroh-net-report}/src/ping.rs | 4 +- .../src}/reportgen.rs | 122 ++++---- .../src}/reportgen/hairpin.rs | 61 ++-- .../src}/reportgen/probes.rs | 12 +- iroh-net/Cargo.toml | 1 + iroh-net/LICENSE-BSD3 | 7 +- iroh-net/bench/src/bin/bulk.rs | 6 +- iroh-net/src/defaults.rs | 36 +-- iroh-net/src/endpoint.rs | 30 +- iroh-net/src/lib.rs | 12 +- iroh-net/src/magicsock.rs | 78 +++--- iroh-net/src/magicsock/udp_conn.rs | 2 +- iroh-net/src/metrics.rs | 3 +- iroh-relay/LICENSE-BSD3 | 30 ++ iroh-relay/src/server.rs | 4 +- iroh/src/metrics.rs | 4 +- 34 files changed, 918 insertions(+), 384 deletions(-) rename {iroh-net => iroh-base}/src/relay_map.rs (79%) create mode 100644 iroh-base/src/relay_url.rs create mode 100644 iroh-net-report/Cargo.toml create mode 100644 iroh-net-report/LICENSE-BSD3 create mode 100644 iroh-net-report/README.md create mode 100644 iroh-net-report/src/defaults.rs create mode 100644 iroh-net-report/src/dns.rs rename iroh-net/src/netcheck.rs => iroh-net-report/src/lib.rs (94%) rename {iroh-net/src/netcheck => iroh-net-report/src}/metrics.rs (86%) rename {iroh-net => iroh-net-report}/src/ping.rs (98%) rename {iroh-net/src/netcheck => iroh-net-report/src}/reportgen.rs (94%) rename {iroh-net/src/netcheck => iroh-net-report/src}/reportgen/hairpin.rs (83%) rename {iroh-net/src/netcheck => iroh-net-report/src}/reportgen/probes.rs (98%) create mode 100644 iroh-relay/LICENSE-BSD3 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a8747768c7..4f55f6d60b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -190,7 +190,7 @@ jobs: # uses: obi1kenobi/cargo-semver-checks-action@v2 uses: n0-computer/cargo-semver-checks-action@feat-baseline with: - package: iroh, iroh-base, iroh-cli, iroh-dns-server, iroh-metrics, iroh-net, iroh-net-bench, iroh-node-util, iroh-router, netwatch, portmapper, iroh-relay + package: iroh, iroh-base, iroh-cli, iroh-dns-server, iroh-metrics, iroh-net, iroh-net-bench, iroh-node-util, iroh-router, netwatch, portmapper, iroh-relay, iroh-net-report baseline-rev: ${{ env.HEAD_COMMIT_SHA }} use-cache: false diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 5ef258c78e..49349da1ab 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -23,7 +23,7 @@ env: RUSTFLAGS: -Dwarnings RUSTDOCFLAGS: -Dwarnings SCCACHE_CACHE_SIZE: "50G" - CRATES_LIST: "iroh,iroh-node-util,iroh-metrics,iroh-net,iroh-net-bench,iroh-test,iroh-cli,iroh-dns-server,iroh-router,netwatch,portmapper,iroh-relay" + CRATES_LIST: "iroh,iroh-node-util,iroh-metrics,iroh-net,iroh-net-bench,iroh-test,iroh-cli,iroh-dns-server,iroh-router,netwatch,portmapper,iroh-relay,iroh-net-report" IROH_FORCE_STAGING_RELAYS: "1" jobs: diff --git a/Cargo.lock b/Cargo.lock index 4cf43919ef..28c9d69eb4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2783,6 +2783,7 @@ dependencies = [ "iroh-docs", "iroh-gossip", "iroh-metrics", + "iroh-net-report", "iroh-node-util", "nix 0.27.1", "parking_lot", @@ -3013,6 +3014,7 @@ dependencies = [ "iroh-base", "iroh-metrics", "iroh-net", + "iroh-net-report", "iroh-quinn", "iroh-quinn-proto", "iroh-quinn-udp", @@ -3091,6 +3093,36 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "iroh-net-report" +version = "0.28.0" +dependencies = [ + "anyhow", + "bytes", + "derive_more", + "futures-buffered", + "futures-lite 2.5.0", + "hickory-resolver", + "iroh-base", + "iroh-metrics", + "iroh-relay", + "iroh-test", + "netwatch", + "once_cell", + "portmapper", + "pretty_assertions", + "rand", + "reqwest", + "rustls", + "surge-ping", + "testresult", + "thiserror 1.0.68", + "tokio", + "tokio-util", + "tracing", + "url", +] + [[package]] name = "iroh-node-util" version = "0.28.0" diff --git a/Cargo.toml b/Cargo.toml index ee606e9f7b..c37cdec555 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ members = [ "iroh-router", "net-tools/netwatch", "net-tools/portmapper", + "iroh-net-report", "iroh-node-util", ] resolver = "2" diff --git a/iroh-base/Cargo.toml b/iroh-base/Cargo.toml index 22945ad860..f2b7fc1d0e 100644 --- a/iroh-base/Cargo.toml +++ b/iroh-base/Cargo.toml @@ -47,12 +47,13 @@ serde_json = "1.0.107" serde_test = "1.0.176" [features] -default = ["hash", "base32"] +default = ["hash", "base32", "relay"] hash = ["dep:blake3", "dep:data-encoding", "dep:postcard", "dep:derive_more", "base32"] base32 = ["dep:data-encoding", "dep:postcard"] redb = ["dep:redb"] key = ["dep:ed25519-dalek", "dep:once_cell", "dep:rand", "dep:rand_core", "dep:ssh-key", "dep:ttl_cache", "dep:aead", "dep:crypto_box", "dep:zeroize", "dep:url", "dep:derive_more", "dep:getrandom"] wasm = ["getrandom?/js"] +relay = ["dep:url", "dep:derive_more"] [package.metadata.docs.rs] all-features = true diff --git a/iroh-base/src/lib.rs b/iroh-base/src/lib.rs index 2c8ec03fdc..5445bf898f 100644 --- a/iroh-base/src/lib.rs +++ b/iroh-base/src/lib.rs @@ -13,6 +13,11 @@ pub mod key; #[cfg(feature = "key")] #[cfg_attr(iroh_docsrs, doc(cfg(feature = "key")))] pub mod node_addr; +#[cfg(feature = "relay")] +#[cfg_attr(iroh_docsrs, doc(cfg(feature = "relay")))] +pub mod relay_map; +#[cfg(any(feature = "relay", feature = "key"))] +mod relay_url; #[cfg(feature = "base32")] #[cfg_attr(iroh_docsrs, doc(cfg(feature = "base32")))] pub mod ticket; diff --git a/iroh-base/src/node_addr.rs b/iroh-base/src/node_addr.rs index 346e3f525a..c084f15f55 100644 --- a/iroh-base/src/node_addr.rs +++ b/iroh-base/src/node_addr.rs @@ -6,13 +6,12 @@ //! //! The primary way of addressing a node is by using the [`NodeAddr`]. -use std::{collections::BTreeSet, fmt, net::SocketAddr, ops::Deref, str::FromStr}; +use std::{collections::BTreeSet, net::SocketAddr}; -use anyhow::Context; use serde::{Deserialize, Serialize}; -use url::Url; use crate::key::{NodeId, PublicKey}; +pub use crate::relay_url::RelayUrl; /// Network-level addressing information for an iroh-net node. /// @@ -199,120 +198,3 @@ pub enum AddrInfoOptions { /// Includes the Node ID and the direct addresses. Addresses, } - -/// A URL identifying a relay server. -/// -/// This is but a wrapper around [`Url`], with a few custom tweaks: -/// -/// - A relay URL is never a relative URL, so an implicit `.` is added at the end of the -/// domain name if missing. -/// -/// - [`fmt::Debug`] is implemented so it prints the URL rather than the URL struct fields. -/// Useful when logging e.g. `Option`. -/// -/// To create a [`RelayUrl`] use the `From` implementation. -#[derive( - Clone, derive_more::Display, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, -)] -pub struct RelayUrl(Url); - -impl From for RelayUrl { - fn from(mut url: Url) -> Self { - if let Some(domain) = url.domain() { - if !domain.ends_with('.') { - let domain = String::from(domain) + "."; - - // This can fail, though it is unlikely the resulting URL is usable as a - // relay URL, probably it has the wrong scheme or is not a base URL or the - // like. We don't do full URL validation however, so just silently leave - // this bad URL in place. Something will fail later. - url.set_host(Some(&domain)).ok(); - } - } - Self(url) - } -} - -/// Support for parsing strings directly. -/// -/// If you need more control over the error first create a [`Url`] and use [`RelayUrl::from`] -/// instead. -impl FromStr for RelayUrl { - type Err = anyhow::Error; - - fn from_str(s: &str) -> Result { - let inner = Url::from_str(s).context("invalid URL")?; - Ok(RelayUrl::from(inner)) - } -} - -impl From for Url { - fn from(value: RelayUrl) -> Self { - value.0 - } -} - -/// Dereferences to the wrapped [`Url`]. -/// -/// Note that [`DerefMut`] is not implemented on purpose, so this type has more flexibility -/// to change the inner later. -/// -/// [`DerefMut`]: std::ops::DerefMut -impl Deref for RelayUrl { - type Target = Url; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl fmt::Debug for RelayUrl { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_tuple("RelayUrl") - .field(&DbgStr(self.0.as_str())) - .finish() - } -} - -/// Helper struct to format a &str without allocating a String. -/// -/// Maybe this is entirely unneeded and the compiler would be smart enough to never allocate -/// the String anyway. Who knows. Writing this was faster than checking the assembler -/// output. -struct DbgStr<'a>(&'a str); - -impl<'a> fmt::Debug for DbgStr<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, r#""{}""#, self.0) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_relay_url_debug_display() { - let url = RelayUrl::from(Url::parse("https://example.com").unwrap()); - - assert_eq!(format!("{url:?}"), r#"RelayUrl("https://example.com./")"#); - - assert_eq!(format!("{url}"), "https://example.com./"); - } - - #[test] - fn test_relay_url_absolute() { - let url = RelayUrl::from(Url::parse("https://example.com").unwrap()); - - assert_eq!(url.domain(), Some("example.com.")); - - let url1 = RelayUrl::from(Url::parse("https://example.com.").unwrap()); - assert_eq!(url, url1); - - let url2 = RelayUrl::from(Url::parse("https://example.com./").unwrap()); - assert_eq!(url, url2); - - let url3 = RelayUrl::from(Url::parse("https://example.com/").unwrap()); - assert_eq!(url, url3); - } -} diff --git a/iroh-net/src/relay_map.rs b/iroh-base/src/relay_map.rs similarity index 79% rename from iroh-net/src/relay_map.rs rename to iroh-base/src/relay_map.rs index 727ccf61c2..fd698accb4 100644 --- a/iroh-net/src/relay_map.rs +++ b/iroh-base/src/relay_map.rs @@ -3,39 +3,14 @@ use std::{collections::BTreeMap, fmt, sync::Arc}; use anyhow::{ensure, Result}; -pub use iroh_relay::RelayUrl; use serde::{Deserialize, Serialize}; -use crate::defaults::DEFAULT_STUN_PORT; +pub use crate::relay_url::RelayUrl; -/// Configuration of the relay servers for an [`Endpoint`]. +/// The default STUN port used by the Relay server. /// -/// [`Endpoint`]: crate::endpoint::Endpoint -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum RelayMode { - /// Disable relay servers completely. - Disabled, - /// Use the default relay map, with production relay servers from n0. - /// - /// See [`crate::defaults::prod`] for the severs used. - Default, - /// Use the staging relay servers from n0. - Staging, - /// Use a custom relay map. - Custom(RelayMap), -} - -impl RelayMode { - /// Returns the relay map for this mode. - pub fn relay_map(&self) -> RelayMap { - match self { - RelayMode::Disabled => RelayMap::empty(), - RelayMode::Default => crate::defaults::prod::default_relay_map(), - RelayMode::Staging => crate::defaults::staging::default_relay_map(), - RelayMode::Custom(relay_map) => relay_map.clone(), - } - } -} +/// The STUN port as defined by [RFC 8489]() +const DEFAULT_STUN_PORT: u16 = 3478; /// Configuration of all the relay servers that can be used. #[derive(Debug, Clone, PartialEq, Eq)] diff --git a/iroh-base/src/relay_url.rs b/iroh-base/src/relay_url.rs new file mode 100644 index 0000000000..fb828e8bf1 --- /dev/null +++ b/iroh-base/src/relay_url.rs @@ -0,0 +1,121 @@ +use std::{fmt, ops::Deref, str::FromStr}; + +use anyhow::Context; +use serde::{Deserialize, Serialize}; +use url::Url; +/// A URL identifying a relay server. +/// +/// This is but a wrapper around [`Url`], with a few custom tweaks: +/// +/// - A relay URL is never a relative URL, so an implicit `.` is added at the end of the +/// domain name if missing. +/// +/// - [`fmt::Debug`] is implemented so it prints the URL rather than the URL struct fields. +/// Useful when logging e.g. `Option`. +/// +/// To create a [`RelayUrl`] use the `From` implementation. +#[derive( + Clone, derive_more::Display, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, +)] +pub struct RelayUrl(Url); + +impl From for RelayUrl { + fn from(mut url: Url) -> Self { + if let Some(domain) = url.domain() { + if !domain.ends_with('.') { + let domain = String::from(domain) + "."; + + // This can fail, though it is unlikely the resulting URL is usable as a + // relay URL, probably it has the wrong scheme or is not a base URL or the + // like. We don't do full URL validation however, so just silently leave + // this bad URL in place. Something will fail later. + url.set_host(Some(&domain)).ok(); + } + } + Self(url) + } +} + +/// Support for parsing strings directly. +/// +/// If you need more control over the error first create a [`Url`] and use [`RelayUrl::from`] +/// instead. +impl FromStr for RelayUrl { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + let inner = Url::from_str(s).context("invalid URL")?; + Ok(RelayUrl::from(inner)) + } +} + +impl From for Url { + fn from(value: RelayUrl) -> Self { + value.0 + } +} + +/// Dereferences to the wrapped [`Url`]. +/// +/// Note that [`DerefMut`] is not implemented on purpose, so this type has more flexibility +/// to change the inner later. +/// +/// [`DerefMut`]: std::ops::DerefMut +impl Deref for RelayUrl { + type Target = Url; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl fmt::Debug for RelayUrl { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("RelayUrl") + .field(&DbgStr(self.0.as_str())) + .finish() + } +} + +/// Helper struct to format a &str without allocating a String. +/// +/// Maybe this is entirely unneeded and the compiler would be smart enough to never allocate +/// the String anyway. Who knows. Writing this was faster than checking the assembler +/// output. +struct DbgStr<'a>(&'a str); + +impl<'a> fmt::Debug for DbgStr<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, r#""{}""#, self.0) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_relay_url_debug_display() { + let url = RelayUrl::from(Url::parse("https://example.com").unwrap()); + + assert_eq!(format!("{url:?}"), r#"RelayUrl("https://example.com./")"#); + + assert_eq!(format!("{url}"), "https://example.com./"); + } + + #[test] + fn test_relay_url_absolute() { + let url = RelayUrl::from(Url::parse("https://example.com").unwrap()); + + assert_eq!(url.domain(), Some("example.com.")); + + let url1 = RelayUrl::from(Url::parse("https://example.com.").unwrap()); + assert_eq!(url, url1); + + let url2 = RelayUrl::from(Url::parse("https://example.com./").unwrap()); + assert_eq!(url, url2); + + let url3 = RelayUrl::from(Url::parse("https://example.com/").unwrap()); + assert_eq!(url, url3); + } +} diff --git a/iroh-cli/Cargo.toml b/iroh-cli/Cargo.toml index ce89de2c52..4bcd5d0541 100644 --- a/iroh-cli/Cargo.toml +++ b/iroh-cli/Cargo.toml @@ -44,6 +44,7 @@ iroh-blobs = { version = "0.28.1", features = ["cli"] } iroh-docs = { version = "0.28.0", features = ["cli"] } iroh-gossip = { version = "0.28.1", features = ["cli"] } iroh-metrics = { version = "0.28.0" } +net-report = { package = "iroh-net-report", path = "../iroh-net-report", version = "0.28" } iroh-node-util = { path = "../iroh-node-util", features = ["config", "logging", "cli"] } parking_lot = "0.12.1" pkarr = { version = "2.2.0", default-features = false } diff --git a/iroh-cli/src/commands/doctor.rs b/iroh-cli/src/commands/doctor.rs index b20d214749..3ebad96439 100644 --- a/iroh-cli/src/commands/doctor.rs +++ b/iroh-cli/src/commands/doctor.rs @@ -37,7 +37,6 @@ use iroh::{ endpoint::{self, Connection, ConnectionTypeStream, RecvStream, RemoteInfo, SendStream}, key::{PublicKey, SecretKey}, metrics::MagicsockMetrics, - netcheck, ticket::NodeTicket, Endpoint, NodeAddr, NodeId, RelayMap, RelayMode, RelayUrl, }, @@ -349,7 +348,7 @@ async fn report( ) -> anyhow::Result<()> { let port_mapper = portmapper::Client::default(); let dns_resolver = default_resolver().clone(); - let mut client = netcheck::Client::new(Some(port_mapper), dns_resolver)?; + let mut client = net_report::Client::new(Some(port_mapper), dns_resolver)?; let dm = match stun_host { Some(host_name) => { diff --git a/iroh-net-report/Cargo.toml b/iroh-net-report/Cargo.toml new file mode 100644 index 0000000000..d7367a8a97 --- /dev/null +++ b/iroh-net-report/Cargo.toml @@ -0,0 +1,54 @@ +[package] +name = "iroh-net-report" +version = "0.28.0" +edition = "2021" +readme = "README.md" +description = "detailed reporting on network conditions" +license = "MIT OR Apache-2.0" +authors = ["n0 team"] +repository = "https://github.com/n0-computer/iroh" +keywords = ["networking"] + +# Sadly this also needs to be updated in .github/workflows/ci.yml +rust-version = "1.76" + +[lints] +workspace = true + +[dependencies] +anyhow = "1" +bytes = "1.7" +derive_more = { version = "1.0.0", features = ["display"] } +futures-buffered = "0.2.8" +futures-lite = "2.3" +hickory-resolver = "=0.25.0-alpha.2" +iroh-base = { version = "0.28.0", default-features = false, features = ["relay"] } +iroh-metrics = { version = "0.28.0", default-features = false, optional = true } +iroh-relay = { version = "0.28", path = "../iroh-relay" } +netwatch = { version = "0.1.0", path = "../net-tools/netwatch" } +portmapper = { version = "0.1.0", path = "../net-tools/portmapper" } +rand = "0.8" +reqwest = { version = "0.12", default-features = false } +rustls = { version = "0.23", default-features = false } +surge-ping = "0.8.0" +thiserror = "1" +tokio = { version = "1", default-features = false, features = ["sync", "time", "macros", "rt"] } +tokio-util = { version = "0.7.12", default-features = false } +tracing = "0.1" +url = { version = "2.4" } + +[dev-dependencies] +iroh-relay = { version = "0.28", path = "../iroh-relay", features = ["test-utils", "server"] } +iroh-test = "0.28.0" +once_cell = "1.18.0" +pretty_assertions = "1.4" +testresult = "0.4.0" +tokio = { version = "1", default-features = false, features = ["test-util"] } + +[features] +default = ["metrics"] +metrics = ["dep:iroh-metrics"] + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "iroh_docsrs"] diff --git a/iroh-net-report/LICENSE-BSD3 b/iroh-net-report/LICENSE-BSD3 new file mode 100644 index 0000000000..94d6e6abce --- /dev/null +++ b/iroh-net-report/LICENSE-BSD3 @@ -0,0 +1,30 @@ +Parts of the code has been derived from tailscale, which is under the following license. + +BSD 3-Clause License + +Copyright (c) 2020 Tailscale Inc & AUTHORS. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/iroh-net-report/README.md b/iroh-net-report/README.md new file mode 100644 index 0000000000..c6ea20383a --- /dev/null +++ b/iroh-net-report/README.md @@ -0,0 +1,71 @@ +# Iroh Net Report + +`iroh-net-report` is a tool for generating detailed reports on network +connectivity and configuration on the current host for nodes powered by [iroh]. +`iroh-net-report` evaluates key aspects of network performance and behavior, +including NAT traversal capabilities, protocol availability, and relay server +latencies. Key features include: + +- **STUN diagnostics** + + Evaluates the completion of UDP STUN round trips for both IPv4 and IPv6. + Determine the variability of STUN results based on the destination server. + This helps understand the type of NAT for this host's network. + +- **IPv4 and IPv6 connectivity checks** + + Verifies basic connectivity for IPv4 and IPv6, including the ability to bind + sockets and send packets. + +- **ICMP diagnostics** + + Performs ICMP round trips for IPv4 and IPv6 to assess reachability. + +- **Hair-Pinning detection** + + Determines whether the router supports hair-pinning, enabling communication + between devices on the same NATed network via their public IP. + +- **Port Mapping protocol support** + + Detects the presence of port mapping protocols like UPnP, PCP, or NAT-PMP for + enhanced NAT traversal. + +- **Relay Server Latencies** + + Measures latency for the configured relay servers, keeping details about + IPv4-specific, and IPv6-specific measurements. + +- **Global IP Address Discovery** + + Identifies the public (global) IPv4 and IPv6 addresses for the host. + +- **Captive Portal Detection** + + Identifies if the network is using a captive portal to intercept HTTP + traffic. + +- **Preferred Relay Identification** + + Detect the best relay server for use. + +Used in [iroh], created with love by the [n0 team](https://n0.computer/). + +# License + +This project is licensed under either of + + * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or + http://www.apache.org/licenses/LICENSE-2.0) + * MIT license ([LICENSE-MIT](LICENSE-MIT) or + http://opensource.org/licenses/MIT) + +at your option. + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in this project by you, as defined in the Apache-2.0 license, +shall be dual licensed as above, without any additional terms or conditions. + +[iroh]: https://github.com/n0-computer/iroh diff --git a/iroh-net-report/src/defaults.rs b/iroh-net-report/src/defaults.rs new file mode 100644 index 0000000000..e40ae1e72a --- /dev/null +++ b/iroh-net-report/src/defaults.rs @@ -0,0 +1,43 @@ +//! Default values used in net_report. + +/// The default STUN port used by the Relay server. +/// +/// The STUN port as defined by [RFC 8489]() +pub const DEFAULT_STUN_PORT: u16 = 3478; + +/// Contains all timeouts that we use in `iroh-net_report`. +pub(crate) mod timeouts { + use std::time::Duration; + + // Timeouts for net_report + + /// The maximum amount of time net_report will spend gathering a single report. + pub(crate) const OVERALL_REPORT_TIMEOUT: Duration = Duration::from_secs(5); + + /// The total time we wait for all the probes. + /// + /// This includes the STUN, ICMP and HTTPS probes, which will all + /// start at different times based on the ProbePlan. + pub(crate) const PROBES_TIMEOUT: Duration = Duration::from_secs(3); + + /// How long to await for a captive-portal result. + /// + /// This delay is chosen so it starts after good-working STUN probes + /// would have finished, but not too long so the delay is bearable if + /// STUN is blocked. + pub(crate) const CAPTIVE_PORTAL_DELAY: Duration = Duration::from_millis(200); + + /// Timeout for captive portal checks + /// + /// Must be lower than [`OVERALL_REPORT_TIMEOUT`] minus + /// [`CAPTIVE_PORTAL_DELAY`]. + pub(crate) const CAPTIVE_PORTAL_TIMEOUT: Duration = Duration::from_secs(2); + + pub(crate) const DNS_TIMEOUT: Duration = Duration::from_secs(3); + + /// The amount of time we wait for a hairpinned packet to come back. + pub(crate) const HAIRPIN_CHECK_TIMEOUT: Duration = Duration::from_millis(100); + + /// Default Pinger timeout + pub(crate) const DEFAULT_PINGER_TIMEOUT: Duration = Duration::from_secs(5); +} diff --git a/iroh-net-report/src/dns.rs b/iroh-net-report/src/dns.rs new file mode 100644 index 0000000000..17ac6855da --- /dev/null +++ b/iroh-net-report/src/dns.rs @@ -0,0 +1,261 @@ +use std::{fmt::Write, net::IpAddr}; + +use anyhow::Result; +use futures_lite::{Future, StreamExt}; +use hickory_resolver::{IntoName, TokioAsyncResolver}; + +use crate::defaults::timeouts::DNS_TIMEOUT; + +/// Delay used to perform staggered dns queries. +pub(crate) const DNS_STAGGERING_MS: &[u64] = &[200, 300]; + +/// Extension trait to [`TokioAsyncResolver`]. +pub(crate) trait ResolverExt { + /// Perform an ipv4 lookup. + fn lookup_ipv4( + &self, + host: N, + ) -> impl Future>>; + + /// Perform an ipv6 lookup. + fn lookup_ipv6( + &self, + host: N, + ) -> impl Future>>; + + /// Race an ipv4 and ipv6. + fn lookup_ipv4_ipv6( + &self, + host: N, + ) -> impl Future>>; + + /// Perform an ipv4 lookup in a staggered fashion. + /// + /// From the moment this function is called, each lookup is scheduled after the delays in + /// [`DNS_STAGGERING_MS`] with the first call being done immediately. `[200ms, 300ms]` results + /// in calls at T+0ms, T+200ms and T+300ms. The `timeout` is applied to each call individually. + /// The result of the first successful call is returned, or a summary of all errors otherwise. + fn lookup_ipv4_staggered( + &self, + host: N, + ) -> impl Future>>; + + /// Perform an ipv6 lookup with a timeout in a staggered fashion. + /// + /// From the moment this function is called, each lookup is scheduled after the delays in + /// [`DNS_STAGGERING_MS`] with the first call being done immediately. `[200ms, 300ms]` results + /// in calls at T+0ms, T+200ms and T+300ms. The `timeout` is applied to each call individually. + /// The result of the first successful call is returned, or a summary of all errors otherwise. + fn lookup_ipv6_staggered( + &self, + host: N, + ) -> impl Future>>; + + /// Race an ipv4 and ipv6 lookup in a staggered fashion. + /// + /// From the moment this function is called, each lookup is scheduled after the delays in + /// [`DNS_STAGGERING_MS`] with the first call being done immediately. `[200ms, 300ms]` results + /// in calls at T+0ms, T+200ms and T+300ms. The [`DNS_TIMEOUT`] is applied as stated in + /// [`Self::lookup_ipv4_ipv6`]. The result of the first successful call is returned, or a + /// summary of all errors otherwise. + fn lookup_ipv4_ipv6_staggered( + &self, + host: N, + ) -> impl Future>>; +} + +impl ResolverExt for TokioAsyncResolver { + async fn lookup_ipv4(&self, host: N) -> Result> { + let addrs = tokio::time::timeout(DNS_TIMEOUT, self.ipv4_lookup(host)).await??; + Ok(addrs.into_iter().map(|ip| IpAddr::V4(ip.0))) + } + + async fn lookup_ipv6(&self, host: N) -> Result> { + let addrs = tokio::time::timeout(DNS_TIMEOUT, self.ipv6_lookup(host)).await??; + Ok(addrs.into_iter().map(|ip| IpAddr::V6(ip.0))) + } + + /// Resolve IPv4 and IPv6 in parallel. + /// + /// `LookupIpStrategy::Ipv4AndIpv6` will wait for ipv6 resolution timeout, even if it is + /// not usable on the stack, so we manually query both lookups concurrently and time them out + /// individually. + /// + /// See [`ResolverExt::lookup_ipv4_ipv6`]. + async fn lookup_ipv4_ipv6( + &self, + host: N, + ) -> Result> { + let res = tokio::join!(self.lookup_ipv4(host.clone()), self.lookup_ipv6(host)); + + match res { + (Ok(ipv4), Ok(ipv6)) => Ok(LookupIter::Both(ipv4.chain(ipv6))), + (Ok(ipv4), Err(_)) => Ok(LookupIter::Ipv4(ipv4)), + (Err(_), Ok(ipv6)) => Ok(LookupIter::Ipv6(ipv6)), + (Err(ipv4_err), Err(ipv6_err)) => { + anyhow::bail!("Ipv4: {:?}, Ipv6: {:?}", ipv4_err, ipv6_err) + } + } + } + + async fn lookup_ipv4_staggered( + &self, + host: N, + ) -> Result> { + let f = || self.lookup_ipv4(host.clone()); + stagger_call(f, DNS_STAGGERING_MS).await + } + + async fn lookup_ipv6_staggered( + &self, + host: N, + ) -> Result> { + let f = || self.lookup_ipv6(host.clone()); + stagger_call(f, DNS_STAGGERING_MS).await + } + + async fn lookup_ipv4_ipv6_staggered( + &self, + host: N, + ) -> Result> { + let f = || self.lookup_ipv4_ipv6(host.clone()); + stagger_call(f, DNS_STAGGERING_MS).await + } +} + +/// Helper enum to give a unified type to the iterators of [`ResolverExt::lookup_ipv4_ipv6`]. +enum LookupIter { + Ipv4(A), + Ipv6(B), + Both(std::iter::Chain), +} + +impl, B: Iterator> Iterator for LookupIter { + type Item = IpAddr; + + fn next(&mut self) -> Option { + match self { + LookupIter::Ipv4(iter) => iter.next(), + LookupIter::Ipv6(iter) => iter.next(), + LookupIter::Both(iter) => iter.next(), + } + } +} + +/// Staggers calls to the future F with the given delays. +/// +/// The first call is performed immediately. The first call to succeed generates an Ok result +/// ignoring any previous error. If all calls fail, an error summarizing all errors is returned. +async fn stagger_call Fut, Fut: Future>>( + f: F, + delays_ms: &[u64], +) -> Result { + let mut calls = futures_buffered::FuturesUnorderedBounded::new(delays_ms.len() + 1); + // NOTE: we add the 0 delay here to have a uniform set of futures. This is more performant than + // using alternatives that allow futures of different types. + for delay in std::iter::once(&0u64).chain(delays_ms) { + let delay = std::time::Duration::from_millis(*delay); + let fut = f(); + let staggered_fut = async move { + tokio::time::sleep(delay).await; + fut.await + }; + calls.push(staggered_fut) + } + + let mut errors = vec![]; + while let Some(call_result) = calls.next().await { + match call_result { + Ok(t) => return Ok(t), + Err(e) => errors.push(e), + } + } + + anyhow::bail!( + "no calls succeed: [ {}]", + errors.into_iter().fold(String::new(), |mut summary, e| { + write!(summary, "{e} ").expect("infallible"); + summary + }) + ) +} + +#[cfg(test)] +pub(crate) mod tests { + use std::{net::Ipv6Addr, sync::atomic::AtomicUsize}; + + use once_cell::sync::Lazy; + + use super::*; + + static DNS_RESOLVER: Lazy = + Lazy::new(|| create_default_resolver().expect("unable to create DNS resolver")); + + /// Get a DNS resolver suitable for testing. + pub fn resolver() -> &'static TokioAsyncResolver { + Lazy::force(&DNS_RESOLVER) + } + + /// Deprecated IPv6 site-local anycast addresses still configured by windows. + /// + /// Windows still configures these site-local addresses as soon even as an IPv6 loopback + /// interface is configured. We do not want to use these DNS servers, the chances of them + /// being usable are almost always close to zero, while the chance of DNS configuration + /// **only** relying on these servers and not also being configured normally are also almost + /// zero. The chance of the DNS resolver accidentally trying one of these and taking a + /// bunch of timeouts to figure out they're no good are on the other hand very high. + const WINDOWS_BAD_SITE_LOCAL_DNS_SERVERS: [IpAddr; 3] = [ + IpAddr::V6(Ipv6Addr::new(0xfec0, 0, 0, 0xffff, 0, 0, 0, 1)), + IpAddr::V6(Ipv6Addr::new(0xfec0, 0, 0, 0xffff, 0, 0, 0, 2)), + IpAddr::V6(Ipv6Addr::new(0xfec0, 0, 0, 0xffff, 0, 0, 0, 3)), + ]; + + /// Get resolver to query MX records. + /// + /// We first try to read the system's resolver from `/etc/resolv.conf`. + /// This does not work at least on some Androids, therefore we fallback + /// to the default `ResolverConfig` which uses eg. to google's `8.8.8.8` or `8.8.4.4`. + fn create_default_resolver() -> Result { + let (system_config, mut options) = + hickory_resolver::system_conf::read_system_conf().unwrap_or_default(); + + // Copy all of the system config, but strip the bad windows nameservers. Unfortunately + // there is no easy way to do this. + let mut config = hickory_resolver::config::ResolverConfig::new(); + if let Some(name) = system_config.domain() { + config.set_domain(name.clone()); + } + for name in system_config.search() { + config.add_search(name.clone()); + } + for nameserver_cfg in system_config.name_servers() { + if !WINDOWS_BAD_SITE_LOCAL_DNS_SERVERS.contains(&nameserver_cfg.socket_addr.ip()) { + config.add_name_server(nameserver_cfg.clone()); + } + } + + // see [`ResolverExt::lookup_ipv4_ipv6`] for info on why we avoid `LookupIpStrategy::Ipv4AndIpv6` + options.ip_strategy = hickory_resolver::config::LookupIpStrategy::Ipv4thenIpv6; + + let resolver = hickory_resolver::AsyncResolver::tokio(config, options); + Ok(resolver) + } + + #[tokio::test] + async fn stagger_basic() { + let _logging = iroh_test::logging::setup(); + const CALL_RESULTS: &[Result] = &[Err(2), Ok(3), Ok(5), Ok(7)]; + static DONE_CALL: AtomicUsize = AtomicUsize::new(0); + let f = || { + let r_pos = DONE_CALL.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + async move { + tracing::info!(r_pos, "call"); + CALL_RESULTS[r_pos].map_err(|e| anyhow::anyhow!("{e}")) + } + }; + + let delays = [1000, 15]; + let result = stagger_call(f, &delays).await.unwrap(); + assert_eq!(result, 5) + } +} diff --git a/iroh-net/src/netcheck.rs b/iroh-net-report/src/lib.rs similarity index 94% rename from iroh-net/src/netcheck.rs rename to iroh-net-report/src/lib.rs index 54994c2a0b..cd094cbbd6 100644 --- a/iroh-net/src/netcheck.rs +++ b/iroh-net-report/src/lib.rs @@ -1,10 +1,9 @@ //! Checks the network conditions from the current host. //! -//! Netcheck is responsible for finding out the network conditions of the current host, like +//! NetReport is responsible for finding out the network conditions of the current host, like //! whether it is connected to the internet via IPv4 and/or IPv6, what the NAT situation is -//! etc. -//! -//! Based on +//! etc and reachability to the configured relays. +// Based on use std::{ collections::{BTreeMap, HashMap}, @@ -16,6 +15,8 @@ use std::{ use anyhow::{anyhow, Context as _, Result}; use bytes::Bytes; use hickory_resolver::TokioAsyncResolver as DnsResolver; +use iroh_base::relay_map::{RelayMap, RelayNode, RelayUrl}; +#[cfg(feature = "metrics")] use iroh_metrics::inc; use iroh_relay::protos::stun; use netwatch::{IpFamily, UdpSocket}; @@ -26,13 +27,15 @@ use tokio::{ use tokio_util::{sync::CancellationToken, task::AbortOnDropHandle}; use tracing::{debug, error, info_span, trace, warn, Instrument}; -use crate::{RelayMap, RelayUrl}; - +mod defaults; +mod dns; +#[cfg(feature = "metrics")] mod metrics; +mod ping; mod reportgen; +#[cfg(feature = "metrics")] pub use metrics::Metrics; -use Metrics as NetcheckMetrics; const FULL_REPORT_INTERVAL: Duration = Duration::from_secs(5 * 60); @@ -45,7 +48,7 @@ const FULL_REPORT_INTERVAL: Duration = Duration::from_secs(5 * 60); /// default which will never be used. const DEFAULT_MAX_LATENCY: Duration = Duration::from_millis(100); -/// A netcheck report. +/// A net_report report. /// /// Can be obtained by calling [`Client::get_report`]. #[derive(Default, Debug, PartialEq, Eq, Clone)] @@ -156,15 +159,15 @@ impl RelayLatencies { } } -/// Client to run netchecks. +/// Client to run net_reports. /// -/// Creating this creates a netcheck actor which runs in the background. Most of the time +/// Creating this creates a net_report actor which runs in the background. Most of the time /// it is idle unless [`Client::get_report`] is called, which is the main interface. /// /// The [`Client`] struct can be cloned and results multiple handles to the running actor. /// If all [`Client`]s are dropped the actor stops running. /// -/// While running the netcheck actor expects to be passed all received stun packets using +/// While running the net_report actor expects to be passed all received stun packets using /// `Addr::receive_stun_packet`. #[derive(Debug)] pub struct Client { @@ -201,15 +204,16 @@ impl Default for Reports { } impl Client { - /// Creates a new netcheck client. + /// Creates a new net_report client. /// /// This starts a connected actor in the background. Once the client is dropped it will /// stop running. pub fn new(port_mapper: Option, dns_resolver: DnsResolver) -> Result { let mut actor = Actor::new(port_mapper, dns_resolver)?; let addr = actor.addr(); - let task = - tokio::spawn(async move { actor.run().await }.instrument(info_span!("netcheck.actor"))); + let task = tokio::spawn( + async move { actor.run().await }.instrument(info_span!("net_report.actor")), + ); let drop_guard = AbortOnDropHandle::new(task); Ok(Client { addr, @@ -221,11 +225,11 @@ impl Client { /// /// Unlike the client itself the returned [`Addr`] does not own the actor task, it only /// allows sending messages to the actor. - pub(crate) fn addr(&self) -> Addr { + pub fn addr(&self) -> Addr { self.addr.clone() } - /// Runs a netcheck, returning the report. + /// Runs a net_report, returning the report. /// /// It may not be called concurrently with itself, `&mut self` takes care of that. /// @@ -286,9 +290,9 @@ pub(crate) struct Inflight { /// Messages to send to the [`Actor`]. #[derive(Debug)] pub(crate) enum Message { - /// Run a netcheck. + /// Run a net_report. /// - /// Only one netcheck can be run at a time, trying to run multiple concurrently will + /// Only one net_report can be run at a time, trying to run multiple concurrently will /// fail. RunCheck { /// The relay configuration. @@ -326,25 +330,25 @@ pub(crate) enum Message { InFlightStun(Inflight, oneshot::Sender<()>), } -/// Sender to the [`Actor`]. +/// Sender to the main service. /// /// Unlike [`Client`] this is the raw channel to send messages over. Keeping this alive /// will not keep the actor alive, which makes this handy to pass to internal tasks. #[derive(Debug, Clone)] -pub(crate) struct Addr { +pub struct Addr { sender: mpsc::Sender, } impl Addr { - /// Pass a received STUN packet to the netchecker. + /// Pass a received STUN packet to the net_reporter. /// /// Normally the UDP sockets to send STUN messages from are passed in so that STUN /// packets are sent from the sockets that carry the real traffic. However because /// these sockets carry real traffic they will also receive non-STUN traffic, thus the - /// netcheck actor does not read from the sockets directly. If you receive a STUN + /// net_report actor does not read from the sockets directly. If you receive a STUN /// packet on the socket you should pass it to this method. /// - /// It is safe to call this even when the netcheck actor does not currently have any + /// It is safe to call this even when the net_report actor does not currently have any /// in-flight STUN probes. The actor will simply ignore any stray STUN packets. /// /// There is an implicit queue here which may drop packets if the actor does not keep up @@ -354,19 +358,20 @@ impl Addr { payload, from_addr: src, }) { - inc!(NetcheckMetrics, stun_packets_dropped); + #[cfg(feature = "metrics")] + inc!(Metrics, stun_packets_dropped); warn!("dropping stun packet from {}", src); } } async fn send(&self, msg: Message) -> Result<(), mpsc::error::SendError> { self.sender.send(msg).await.inspect_err(|_| { - error!("netcheck actor lost"); + error!("net_report actor lost"); }) } } -/// The netcheck actor. +/// The net_report actor. /// /// This actor runs for the entire duration there's a [`Client`] connected. #[derive(Debug)] @@ -435,7 +440,7 @@ impl Actor { /// It will now run and handle messages. Once the connected [`Client`] (including all /// its clones) is dropped this will terminate. async fn run(&mut self) { - debug!("netcheck actor starting"); + debug!("net_report actor starting"); while let Some(msg) = self.receiver.recv().await { trace!(?msg, "handling message"); match msg { @@ -499,7 +504,7 @@ impl Actor { || now.duration_since(self.reports.last_full) > FULL_REPORT_INTERVAL; // If the last report had a captive portal and reported no UDP access, - // it's possible that we didn't get a useful netcheck due to the + // it's possible that we didn't get a useful net_report due to the // captive portal blocking us. If so, make this report a full (non-incremental) one. if !do_full { if let Some(ref last) = self.reports.last { @@ -510,9 +515,11 @@ impl Actor { self.reports.last = None; // causes ProbePlan::new below to do a full (initial) plan self.reports.next_full = false; self.reports.last_full = now; - inc!(NetcheckMetrics, reports_full); + #[cfg(feature = "metrics")] + inc!(Metrics, reports_full); } - inc!(NetcheckMetrics, reports); + #[cfg(feature = "metrics")] + inc!(Metrics, reports); let actor = reportgen::Client::new( self.addr(), @@ -556,12 +563,13 @@ impl Actor { return; } + #[cfg(feature = "metrics")] match &src { SocketAddr::V4(_) => { - inc!(NetcheckMetrics, stun_packets_recv_ipv4); + inc!(Metrics, stun_packets_recv_ipv4); } SocketAddr::V6(_) => { - inc!(NetcheckMetrics, stun_packets_recv_ipv6); + inc!(Metrics, stun_packets_recv_ipv6); } } @@ -687,12 +695,12 @@ impl Actor { } } -/// State the netcheck actor needs for an in-progress report generation. +/// State the net_report actor needs for an in-progress report generation. #[derive(Debug)] struct ReportRun { /// The handle of the [`reportgen`] actor, cancels the actor on drop. _reportgen: reportgen::Client, - /// Drop guard to optionally kill workers started by netcheck to support reportgen. + /// Drop guard to optionally kill workers started by net_report to support reportgen. _drop_guard: tokio_util::sync::DropGuard, /// Where to send the completed report. report_tx: oneshot::Sender>>, @@ -765,7 +773,7 @@ async fn recv_stun_once(sock: &UdpSocket, buf: &mut [u8], actor_addr: &Addr) -> } /// Test if IPv6 works at all, or if it's been hard disabled at the OS level. -pub(crate) fn os_has_ipv6() -> bool { +pub fn os_has_ipv6() -> bool { UdpSocket::bind_local_v6(0).is_ok() } @@ -956,7 +964,7 @@ mod tests { let (stun_addr, stun_stats, _cleanup_guard) = stun_utils::serve("127.0.0.1".parse().unwrap()).await?; - let resolver = crate::dns::default_resolver(); + let resolver = crate::dns::tests::resolver(); let mut client = Client::new(None, resolver.clone())?; let dm = stun_utils::relay_map_of([stun_addr].into_iter()); @@ -1001,8 +1009,8 @@ mod tests { let dm = stun_utils::relay_map_of_opts([(stun_addr, false)].into_iter()); // Now create a client and generate a report. - let resolver = crate::dns::default_resolver().clone(); - let mut client = Client::new(None, resolver)?; + let resolver = crate::dns::tests::resolver(); + let mut client = Client::new(None, resolver.clone())?; let r = client.get_report(dm, None, None).await?; let mut r: Report = (*r).clone(); @@ -1202,10 +1210,10 @@ mod tests { want_relay: Some(relay_url(2)), // 2 got fast enough }, ]; + let resolver = crate::dns::tests::resolver(); for mut tt in tests { println!("test: {}", tt.name); - let resolver = crate::dns::default_resolver().clone(); - let mut actor = Actor::new(None, resolver).unwrap(); + let mut actor = Actor::new(None, resolver.clone()).unwrap(); for s in &mut tt.steps { // trigger the timer time::advance(Duration::from_secs(s.after)).await; @@ -1239,14 +1247,14 @@ mod tests { let dm = stun_utils::relay_map_of([stun_addr].into_iter()); dbg!(&dm); - let resolver = crate::dns::default_resolver().clone(); + let resolver = crate::dns::tests::resolver().clone(); let mut client = Client::new(None, resolver)?; // Set up an external socket to send STUN requests from, this will be discovered as // our public socket address by STUN. We send back any packets received on this - // socket to the netcheck client using Client::receive_stun_packet. Once we sent + // socket to the net_report client using Client::receive_stun_packet. Once we sent // the hairpin STUN request (from a different randomly bound socket) we are sending - // it to this socket, which is forwarnding it back to our netcheck client, because + // it to this socket, which is forwarnding it back to our net_report client, because // this dumb implementation just forwards anything even if it would be garbage. // Thus hairpinning detection will declare hairpinning to work. let sock = UdpSocket::bind_local(IpFamily::V4, 0)?; @@ -1254,7 +1262,7 @@ mod tests { info!(addr=?sock.local_addr().unwrap(), "Using local addr"); let task = { let sock = sock.clone(); - let addr = client.addr(); + let addr = client.addr.clone(); tokio::spawn( async move { let mut buf = BytesMut::zeroed(64 << 10); @@ -1263,7 +1271,7 @@ mod tests { info!( addr=?sock.local_addr().unwrap(), %count, - "Forwarding payload to netcheck client", + "Forwarding payload to net_report client", ); let payload = buf.split_to(count).freeze(); addr.receive_stun_packet(payload, src); diff --git a/iroh-net/src/netcheck/metrics.rs b/iroh-net-report/src/metrics.rs similarity index 86% rename from iroh-net/src/netcheck/metrics.rs rename to iroh-net-report/src/metrics.rs index f3891d4792..e68daff12e 100644 --- a/iroh-net/src/netcheck/metrics.rs +++ b/iroh-net-report/src/metrics.rs @@ -26,14 +26,16 @@ impl Default for Metrics { stun_packets_sent_ipv6: Counter::new("Number of IPv6 STUN packets sent"), stun_packets_recv_ipv4: Counter::new("Number of IPv4 STUN packets received"), stun_packets_recv_ipv6: Counter::new("Number of IPv6 STUN packets received"), - reports: Counter::new("Number of reports executed by netcheck, including full reports"), - reports_full: Counter::new("Number of full reports executed by netcheck"), + reports: Counter::new( + "Number of reports executed by net_report, including full reports", + ), + reports_full: Counter::new("Number of full reports executed by net_report"), } } } impl Metric for Metrics { fn name() -> &'static str { - "netcheck" + "net_report" } } diff --git a/iroh-net/src/ping.rs b/iroh-net-report/src/ping.rs similarity index 98% rename from iroh-net/src/ping.rs rename to iroh-net-report/src/ping.rs index 871fbe2376..159e7103a6 100644 --- a/iroh-net/src/ping.rs +++ b/iroh-net-report/src/ping.rs @@ -90,7 +90,7 @@ impl Pinger { let ident = PingIdentifier(rand::random()); debug!(%addr, %ident, "Creating pinger"); let mut pinger = client.pinger(addr, ident).await; - pinger.timeout(DEFAULT_TIMEOUT); // todo: timeout too large for netcheck + pinger.timeout(DEFAULT_TIMEOUT); // todo: timeout too large for net_report match pinger.ping(PingSequence(0), data).await? { (IcmpPacket::V4(packet), dur) => { debug!( @@ -157,7 +157,7 @@ mod tests { Ok(()) } - // See netcheck::reportgen::tests::test_icmp_probe_eu_relay for permissions to ping. + // See net_report::reportgen::tests::test_icmp_probe_eu_relay for permissions to ping. #[tokio::test] async fn test_ping_localhost() { let _guard = iroh_test::logging::setup(); diff --git a/iroh-net/src/netcheck/reportgen.rs b/iroh-net-report/src/reportgen.rs similarity index 94% rename from iroh-net/src/netcheck/reportgen.rs rename to iroh-net-report/src/reportgen.rs index 8e843b1a9b..307a806777 100644 --- a/iroh-net/src/netcheck/reportgen.rs +++ b/iroh-net-report/src/reportgen.rs @@ -1,4 +1,4 @@ -//! The reportgen actor is responsible for generating a single netcheck report. +//! The reportgen actor is responsible for generating a single net_report report. //! //! It is implemented as an actor with [`Client`] as handle. //! @@ -14,17 +14,20 @@ //! - Loops driving the futures and handling actor messages: //! - Disables futures as they are completed or aborted. //! - Stop if there are no outstanding tasks/futures, or on timeout. -//! - Sends the completed report to the netcheck actor. +//! - Sends the completed report to the net_report actor. use std::{ future::Future, net::{IpAddr, SocketAddr}, pin::Pin, sync::Arc, + task::{Context, Poll}, time::Duration, }; -use anyhow::{anyhow, bail, Context, Result}; +use anyhow::{anyhow, bail, Context as _, Result}; +use hickory_resolver::TokioAsyncResolver as DnsResolver; +#[cfg(feature = "metrics")] use iroh_metrics::inc; use iroh_relay::{http::RELAY_PROBE_PATH, protos::stun}; use netwatch::{interfaces, UdpSocket}; @@ -38,14 +41,14 @@ use tokio_util::task::AbortOnDropHandle; use tracing::{debug, debug_span, error, info_span, trace, warn, Instrument, Span}; use url::Host; -use super::NetcheckMetrics; +#[cfg(feature = "metrics")] +use crate::Metrics; use crate::{ + self as net_report, defaults::DEFAULT_STUN_PORT, - dns::{DnsResolver, ResolverExt}, - netcheck::{self, Report}, + dns::ResolverExt, ping::{PingError, Pinger}, - util::MaybeFuture, - RelayMap, RelayNode, RelayUrl, + RelayMap, RelayNode, RelayUrl, Report, }; mod hairpin; @@ -54,16 +57,12 @@ mod probes; use probes::{Probe, ProbePlan, ProbeProto}; use crate::defaults::timeouts::{ - CAPTIVE_PORTAL_DELAY, CAPTIVE_PORTAL_TIMEOUT, DNS_TIMEOUT, OVERALL_REPORT_TIMEOUT, - PROBES_TIMEOUT, + CAPTIVE_PORTAL_DELAY, CAPTIVE_PORTAL_TIMEOUT, OVERALL_REPORT_TIMEOUT, PROBES_TIMEOUT, }; const ENOUGH_NODES: usize = 3; -/// Delay used to perform staggered dns queries. -const DNS_STAGGERING_MS: &[u64] = &[200, 300]; - -/// Holds the state for a single invocation of [`netcheck::Client::get_report`]. +/// Holds the state for a single invocation of [`net_report::Client::get_report`]. /// /// Dropping this will cancel the actor and stop the report generation. #[derive(Debug)] @@ -78,7 +77,7 @@ impl Client { /// The actor starts running immediately and only generates a single report, after which /// it shuts down. Dropping this handle will abort the actor. pub(super) fn new( - netcheck: netcheck::Addr, + net_report: net_report::Addr, last_report: Option>, port_mapper: Option, relay_map: RelayMap, @@ -93,14 +92,14 @@ impl Client { let mut actor = Actor { msg_tx, msg_rx, - netcheck: netcheck.clone(), + net_report: net_report.clone(), last_report, port_mapper, relay_map, stun_sock4, stun_sock6, report: Report::default(), - hairpin_actor: hairpin::Client::new(netcheck, addr), + hairpin_actor: hairpin::Client::new(net_report, addr), outstanding_tasks: OutstandingTasks::default(), dns_resolver, }; @@ -158,8 +157,8 @@ struct Actor { msg_tx: mpsc::Sender, /// The receiver of the message channel. msg_rx: mpsc::Receiver, - /// The address of the netcheck actor. - netcheck: super::Addr, + /// The address of the net_report actor. + net_report: super::Addr, // Provided state /// The previous report, if it exists. @@ -197,8 +196,8 @@ impl Actor { match self.run_inner().await { Ok(_) => debug!("reportgen actor finished"), Err(err) => { - self.netcheck - .send(netcheck::Message::ReportAborted { err }) + self.net_report + .send(net_report::Message::ReportAborted { err }) .await .ok(); } @@ -216,7 +215,7 @@ impl Actor { /// - Drives all the above futures. /// - Receives actor messages (sent by those futures). /// - Updates the report, cancels unneeded futures. - /// - Sends the report to the netcheck actor. + /// - Sends the report to the net_report actor. async fn run_inner(&mut self) -> Result<()> { debug!( port_mapper = %self.port_mapper.is_some(), @@ -308,9 +307,9 @@ impl Actor { drop(probes); } - debug!("Sending report to netcheck actor"); - self.netcheck - .send(netcheck::Message::ReportReady { + debug!("Sending report to net_report actor"); + self.net_report + .send(net_report::Message::ReportReady { report: Box::new(self.report.clone()), }) .await?; @@ -547,7 +546,7 @@ impl Actor { let stun_sock6 = self.stun_sock6.clone(); let relay_node = probe.node().clone(); let probe = probe.clone(); - let netcheck = self.netcheck.clone(); + let net_report = self.net_report.clone(); let pinger = pinger.clone(); let dns_resolver = self.dns_resolver.clone(); @@ -558,7 +557,7 @@ impl Actor { stun_sock6, relay_node, probe.clone(), - netcheck, + net_report, pinger, dns_resolver, ) @@ -679,7 +678,7 @@ async fn run_probe( stun_sock6: Option>, relay_node: Arc, probe: Probe, - netcheck: netcheck::Addr, + net_report: net_report::Addr, pinger: Pinger, dns_resolver: DnsResolver, ) -> Result { @@ -730,7 +729,7 @@ async fn run_probe( }; match maybe_sock { Some(sock) => { - result = run_stun_probe(sock, relay_addr, netcheck, probe).await?; + result = run_stun_probe(sock, relay_addr, net_report, probe).await?; } None => { return Err(ProbeError::AbortSet( @@ -773,7 +772,7 @@ async fn run_probe( async fn run_stun_probe( sock: &Arc, relay_addr: SocketAddr, - netcheck: netcheck::Addr, + net_report: net_report::Addr, probe: Probe, ) -> Result { match probe.proto() { @@ -784,12 +783,12 @@ async fn run_stun_probe( let txid = stun::TransactionId::default(); let req = stun::request(txid); - // Setup netcheck to give us back the incoming STUN response. + // Setup net_report to give us back the incoming STUN response. let (stun_tx, stun_rx) = oneshot::channel(); let (inflight_ready_tx, inflight_ready_rx) = oneshot::channel(); - netcheck - .send(netcheck::Message::InFlightStun( - netcheck::Inflight { + net_report + .send(net_report::Message::InFlightStun( + net_report::Inflight { txn: txid, start: Instant::now(), s: stun_tx, @@ -810,10 +809,12 @@ async fn run_stun_probe( if matches!(probe, Probe::StunIpv4 { .. }) { result.ipv4_can_send = true; - inc!(NetcheckMetrics, stun_packets_sent_ipv4); + #[cfg(feature = "metrics")] + inc!(Metrics, stun_packets_sent_ipv4); } else { result.ipv6_can_send = true; - inc!(NetcheckMetrics, stun_packets_sent_ipv6); + #[cfg(feature = "metrics")] + inc!(Metrics, stun_packets_sent_ipv6); } let (delay, addr) = stun_rx .await @@ -896,7 +897,7 @@ async fn check_captive_portal( // Ideally we would try to resolve **both** IPv4 and IPv6 rather than purely race // them. But our resolver doesn't support that yet. let addrs: Vec<_> = dns_resolver - .lookup_ipv4_ipv6_staggered(domain, DNS_TIMEOUT, DNS_STAGGERING_MS) + .lookup_ipv4_ipv6_staggered(domain) .await? .map(|ipaddr| SocketAddr::new(ipaddr, 0)) .collect(); @@ -959,10 +960,7 @@ async fn get_relay_addr( ProbeProto::StunIpv4 | ProbeProto::IcmpV4 => match relay_node.url.host() { Some(url::Host::Domain(hostname)) => { debug!(?proto, %hostname, "Performing DNS A lookup for relay addr"); - match dns_resolver - .lookup_ipv4_staggered(hostname, DNS_TIMEOUT, DNS_STAGGERING_MS) - .await - { + match dns_resolver.lookup_ipv4_staggered(hostname).await { Ok(mut addrs) => addrs .next() .map(|ip| ip.to_canonical()) @@ -979,10 +977,7 @@ async fn get_relay_addr( ProbeProto::StunIpv6 | ProbeProto::IcmpV6 => match relay_node.url.host() { Some(url::Host::Domain(hostname)) => { debug!(?proto, %hostname, "Performing DNS AAAA lookup for relay addr"); - match dns_resolver - .lookup_ipv6_staggered(hostname, DNS_TIMEOUT, DNS_STAGGERING_MS) - .await - { + match dns_resolver.lookup_ipv6_staggered(hostname).await { Ok(mut addrs) => addrs .next() .map(|ip| ip.to_canonical()) @@ -1067,7 +1062,7 @@ async fn measure_https_latency( // but staggered for reliability. Ideally this tries to resolve **both** IPv4 and // IPv6 though. But our resolver does not have a function for that yet. let addrs: Vec<_> = dns_resolver - .lookup_ipv4_ipv6_staggered(domain, DNS_TIMEOUT, DNS_STAGGERING_MS) + .lookup_ipv4_ipv6_staggered(domain) .await? .map(|ipaddr| SocketAddr::new(ipaddr, 0)) .collect(); @@ -1109,7 +1104,7 @@ async fn measure_https_latency( } } -/// Updates a netcheck [`Report`] with a new [`ProbeReport`]. +/// Updates a net_report [`Report`] with a new [`ProbeReport`]. fn update_report(report: &mut Report, probe_report: ProbeReport) { let relay_node = probe_report.probe.node(); if let Some(latency) = probe_report.latency { @@ -1171,6 +1166,31 @@ fn update_report(report: &mut Report, probe_report: ProbeReport) { .or(probe_report.icmpv6); } +/// Resolves to pending if the inner is `None`. +#[derive(Debug)] +pub(crate) struct MaybeFuture { + /// Future to be polled. + pub inner: Option, +} + +// NOTE: explicit implementation to bypass derive unnecessary bounds +impl Default for MaybeFuture { + fn default() -> Self { + MaybeFuture { inner: None } + } +} + +impl Future for MaybeFuture { + type Output = T::Output; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + match self.inner { + Some(ref mut t) => Pin::new(t).poll(cx), + None => Poll::Pending, + } + } +} + #[cfg(test)] mod tests { use std::net::{Ipv4Addr, Ipv6Addr}; @@ -1338,11 +1358,11 @@ mod tests { // // Build the test binary: // - // cargo nextest run -p iroh_net netcheck::reportgen::tests --no-run + // cargo nextest run -p iroh_net net_report::reportgen::tests --no-run // // Find out the test binary location: // - // cargo nextest list --message-format json -p iroh-net netcheck::reportgen::tests \ + // cargo nextest list --message-format json -p iroh-net net_report::reportgen::tests \ // | jq '."rust-suites"."iroh-net"."binary-path"' | tr -d \" // // Set the CAP_NET_RAW permission, note that nextest runs each test in a child process @@ -1352,7 +1372,7 @@ mod tests { // // Finally run the test: // - // cargo nextest run -p iroh_net netcheck::reportgen::tests + // cargo nextest run -p iroh_net net_report::reportgen::tests // // This allows the pinger to create a SOCK_RAW socket for IPPROTO_ICMP. // @@ -1429,7 +1449,7 @@ mod tests { async fn test_measure_https_latency() -> TestResult { let _logging_guard = iroh_test::logging::setup(); let (server, relay) = test_utils::relay().await; - let dns_resolver = crate::dns::resolver(); + let dns_resolver = crate::dns::tests::resolver(); tracing::info!(relay_url = ?relay.url , "RELAY_URL"); let (latency, ip) = measure_https_latency(dns_resolver, &relay, server.certificates()).await?; diff --git a/iroh-net/src/netcheck/reportgen/hairpin.rs b/iroh-net-report/src/reportgen/hairpin.rs similarity index 83% rename from iroh-net/src/netcheck/reportgen/hairpin.rs rename to iroh-net-report/src/reportgen/hairpin.rs index b2a3fc806c..dc730a7c9a 100644 --- a/iroh-net/src/netcheck/reportgen/hairpin.rs +++ b/iroh-net-report/src/reportgen/hairpin.rs @@ -6,7 +6,7 @@ //! - binds socket //! - sends traffic from it's socket to trick some routers //! - When requested performs the hairpin probe. -//! - result is sent to netcheck actor addr. +//! - result is sent to net_report actor addr. //! - Shuts down //! //! Note it will only perform a single hairpin check before shutting down. Any further @@ -21,10 +21,7 @@ use tokio::{sync::oneshot, time::Instant}; use tokio_util::task::AbortOnDropHandle; use tracing::{debug, error, info_span, trace, warn, Instrument}; -use crate::{ - defaults::timeouts::HAIRPIN_CHECK_TIMEOUT, - netcheck::{self, reportgen, Inflight}, -}; +use crate::{self as net_report, defaults::timeouts::HAIRPIN_CHECK_TIMEOUT, reportgen, Inflight}; /// Handle to the hairpin actor. /// @@ -36,12 +33,12 @@ pub(super) struct Client { } impl Client { - pub(super) fn new(netcheck: netcheck::Addr, reportgen: reportgen::Addr) -> Self { + pub(super) fn new(net_report: net_report::Addr, reportgen: reportgen::Addr) -> Self { let (addr, msg_rx) = oneshot::channel(); let actor = Actor { msg_rx, - netcheck, + net_report, reportgen, }; @@ -84,7 +81,7 @@ enum Message { #[derive(Debug)] struct Actor { msg_rx: oneshot::Receiver, - netcheck: netcheck::Addr, + net_report: net_report::Addr, reportgen: reportgen::Addr, } @@ -118,11 +115,11 @@ impl Actor { s: stun_tx, }; let (msg_response_tx, msg_response_rx) = oneshot::channel(); - self.netcheck - .send(netcheck::Message::InFlightStun(inflight, msg_response_tx)) + self.net_report + .send(net_report::Message::InFlightStun(inflight, msg_response_tx)) .await - .context("netcheck actor gone")?; - msg_response_rx.await.context("netcheck actor died")?; + .context("net_report actor gone")?; + msg_response_rx.await.context("net_report actor died")?; if let Err(err) = socket.send_to(&stun::request(txn), dst).await { warn!(%dst, "failed to send hairpin check"); @@ -132,7 +129,7 @@ impl Actor { let now = Instant::now(); let hairpinning_works = match tokio::time::timeout(HAIRPIN_CHECK_TIMEOUT, stun_rx).await { Ok(Ok(_)) => true, - Ok(Err(_)) => bail!("netcheck actor dropped stun response channel"), + Ok(Err(_)) => bail!("net_report actor dropped stun response channel"), Err(_) => false, // Elapsed }; debug!( @@ -166,7 +163,7 @@ impl Actor { socket .send_to( - b"tailscale netcheck; see https://github.com/tailscale/tailscale/issues/188", + b"net_report; see https://github.com/tailscale/tailscale/issues/188", documentation_ip, ) .await?; @@ -201,10 +198,10 @@ mod tests { async fn test_hairpin(hairpinning_works: bool) { let _guard = iroh_test::logging::setup(); - // Setup fake netcheck and reportstate actors, hairpinning interacts with them. - let (netcheck_tx, mut netcheck_rx) = mpsc::channel(32); - let netcheck_addr = netcheck::Addr { - sender: netcheck_tx, + // Setup fake net_report and reportstate actors, hairpinning interacts with them. + let (net_report_tx, mut net_report_rx) = mpsc::channel(32); + let net_report_addr = net_report::Addr { + sender: net_report_tx, }; let (reportstate_tx, mut reportstate_rx) = mpsc::channel(32); let reportstate_addr = reportgen::Addr { @@ -212,7 +209,7 @@ mod tests { }; // Create hairpin actor - let mut actor = Client::new(netcheck_addr, reportstate_addr); + let mut actor = Client::new(net_report_addr, reportstate_addr); // Hairpinning works by asking the hairpin actor to send a STUN request to our // discovered public address. If the router returns it hairpinning works. We @@ -226,12 +223,12 @@ mod tests { }; actor.start_check(ipp_v4); - // This bit is our dummy netcheck actor: it handles the inflight request and sends + // This bit is our dummy net_report actor: it handles the inflight request and sends // back the STUN request once it arrives. - let dummy_netcheck = tokio::spawn( + let dummy_net_report = tokio::spawn( async move { - let netcheck::Message::InFlightStun(inflight, resp_tx) = - netcheck_rx.recv().await.unwrap() + let net_report::Message::InFlightStun(inflight, resp_tx) = + net_report_rx.recv().await.unwrap() else { panic!("Wrong message received"); }; @@ -258,7 +255,7 @@ mod tests { tokio::time::sleep(HAIRPIN_CHECK_TIMEOUT * 8).await; } } - .instrument(info_span!("dummy-netcheck")), + .instrument(info_span!("dummy-net_report")), ); // Next we expect our dummy reportstate to receive the result. @@ -268,18 +265,20 @@ mod tests { None => panic!("reportstate mpsc has no senders"), } - // Cleanup: our dummy netcheck actor should finish - dummy_netcheck.await.expect("error in dummy netcheck actor"); + // Cleanup: our dummy net_report actor should finish + dummy_net_report + .await + .expect("error in dummy net_report actor"); } #[tokio::test] async fn test_client_drop() { let _guard = iroh_test::logging::setup(); - // Setup fake netcheck and reportstate actors, hairpinning interacts with them. - let (netcheck_tx, _netcheck_rx) = mpsc::channel(32); - let netcheck_addr = netcheck::Addr { - sender: netcheck_tx, + // Setup fake net_report and reportstate actors, hairpinning interacts with them. + let (net_report_tx, _net_report_rx) = mpsc::channel(32); + let net_report_addr = net_report::Addr { + sender: net_report_tx, }; let (reportstate_tx, _reportstate_rx) = mpsc::channel(32); let reportstate_addr = reportgen::Addr { @@ -287,7 +286,7 @@ mod tests { }; // Create hairpin actor - let mut client = Client::new(netcheck_addr, reportstate_addr); + let mut client = Client::new(net_report_addr, reportstate_addr); // Save the addr, drop the client let addr = client.addr.take(); diff --git a/iroh-net/src/netcheck/reportgen/probes.rs b/iroh-net-report/src/reportgen/probes.rs similarity index 98% rename from iroh-net/src/netcheck/reportgen/probes.rs rename to iroh-net-report/src/reportgen/probes.rs index f2cd400aad..08bbe1163b 100644 --- a/iroh-net/src/netcheck/reportgen/probes.rs +++ b/iroh-net-report/src/reportgen/probes.rs @@ -10,9 +10,9 @@ use anyhow::{ensure, Result}; use netwatch::interfaces; use tokio::time::Duration; -use crate::{netcheck::Report, RelayMap, RelayNode, RelayUrl}; +use crate::{RelayMap, RelayNode, RelayUrl, Report}; -/// The retransmit interval used when netcheck first runs. +/// The retransmit interval used when net_report first runs. /// /// We have no past context to work with, and we want answers relatively quickly, so it's /// biased slightly more aggressive than [`DEFAULT_ACTIVE_RETRANSMIT_DELAY`]. A few extra @@ -36,7 +36,7 @@ const DEFAULT_ACTIVE_RETRANSMIT_DELAY: Duration = Duration::from_millis(200); /// time. const ACTIVE_RETRANSMIT_EXTRA_DELAY: Duration = Duration::from_millis(50); -/// The number of fastest relays to periodically re-query during incremental netcheck +/// The number of fastest relays to periodically re-query during incremental net_report /// reports. (During a full report, all relay servers are scanned.) const NUM_INCREMENTAL_RELAYS: usize = 3; @@ -190,7 +190,7 @@ impl fmt::Display for ProbeSet { /// The [`reportgen`] actor will also abort all the remaining [`ProbeSet`]s once it has /// sufficient information for a report. /// -/// [`reportgen`]: crate::netcheck::reportgen +/// [`reportgen`]: crate::reportgen #[derive(Debug, PartialEq, Eq)] pub(super) struct ProbePlan(BTreeSet); @@ -269,7 +269,7 @@ impl ProbePlan { plan } - /// Creates a follow up probe plan using a previous netcheck report. + /// Creates a follow up probe plan using a previous net_report report. pub(super) fn with_last_report( relay_map: &RelayMap, if_state: &interfaces::State, @@ -473,7 +473,7 @@ mod tests { use pretty_assertions::assert_eq; use super::*; - use crate::netcheck::{test_utils, RelayLatencies}; + use crate::{test_utils, RelayLatencies}; /// Shorthand which declares a new ProbeSet. /// diff --git a/iroh-net/Cargo.toml b/iroh-net/Cargo.toml index c47409976d..d84d95e754 100644 --- a/iroh-net/Cargo.toml +++ b/iroh-net/Cargo.toml @@ -114,6 +114,7 @@ webpki = { package = "rustls-webpki", version = "0.102" } webpki-roots = "0.26" x509-parser = "0.16" z32 = "1.0.3" +net-report = { package = "iroh-net-report", path = "../iroh-net-report", version = "0.28" } # metrics iroh-metrics = { version = "0.28.0", default-features = false } diff --git a/iroh-net/LICENSE-BSD3 b/iroh-net/LICENSE-BSD3 index 4b02691c20..c893b731d3 100644 --- a/iroh-net/LICENSE-BSD3 +++ b/iroh-net/LICENSE-BSD3 @@ -1,12 +1,7 @@ Parts of the code has been derived from tailscale, which is under the following license. Specifically the following files are most relevant -- ./src/derp** - ./src/magicsock** -- ./src/netcheck** -- ./src/disco.rs -- ./src/stun.rs - BSD 3-Clause License @@ -35,4 +30,4 @@ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/iroh-net/bench/src/bin/bulk.rs b/iroh-net/bench/src/bin/bulk.rs index e5280198b2..83f039439d 100644 --- a/iroh-net/bench/src/bin/bulk.rs +++ b/iroh-net/bench/src/bin/bulk.rs @@ -36,7 +36,7 @@ pub fn run_iroh(opt: Opt) -> Result<()> { iroh_metrics::core::Core::try_init(|reg, metrics| { use iroh_metrics::core::Metric; metrics.insert(iroh_net::metrics::MagicsockMetrics::new(reg)); - metrics.insert(iroh_net::metrics::NetcheckMetrics::new(reg)); + metrics.insert(iroh_net::metrics::NetReportMetrics::new(reg)); metrics.insert(iroh_net::metrics::PortmapMetrics::new(reg)); #[cfg(feature = "local-relay")] if opt.with_relay { @@ -113,8 +113,8 @@ pub fn run_iroh(opt: Opt) -> Result<()> { core.get_collector::(), ); collect_and_print( - "NetcheckMetrics", - core.get_collector::(), + "NetReportMetrics", + core.get_collector::(), ); collect_and_print( "PortmapMetrics", diff --git a/iroh-net/src/defaults.rs b/iroh-net/src/defaults.rs index 1d822c12e0..799dc078f3 100644 --- a/iroh-net/src/defaults.rs +++ b/iroh-net/src/defaults.rs @@ -130,38 +130,8 @@ pub mod staging { pub(crate) mod timeouts { use std::time::Duration; - // Timeouts for netcheck + // Timeouts for net_report - /// Maximum duration to wait for a netcheck report. - pub(crate) const NETCHECK_REPORT_TIMEOUT: Duration = Duration::from_secs(10); - - /// The maximum amount of time netcheck will spend gathering a single report. - pub(crate) const OVERALL_REPORT_TIMEOUT: Duration = Duration::from_secs(5); - - /// The total time we wait for all the probes. - /// - /// This includes the STUN, ICMP and HTTPS probes, which will all - /// start at different times based on the ProbePlan. - pub(crate) const PROBES_TIMEOUT: Duration = Duration::from_secs(3); - - /// How long to await for a captive-portal result. - /// - /// This delay is chosen so it starts after good-working STUN probes - /// would have finished, but not too long so the delay is bearable if - /// STUN is blocked. - pub(crate) const CAPTIVE_PORTAL_DELAY: Duration = Duration::from_millis(200); - - /// Timeout for captive portal checks - /// - /// Must be lower than [`OVERALL_REPORT_TIMEOUT`] minus - /// [`CAPTIVE_PORTAL_DELAY`]. - pub(crate) const CAPTIVE_PORTAL_TIMEOUT: Duration = Duration::from_secs(2); - - pub(crate) const DNS_TIMEOUT: Duration = Duration::from_secs(3); - - /// The amount of time we wait for a hairpinned packet to come back. - pub(crate) const HAIRPIN_CHECK_TIMEOUT: Duration = Duration::from_millis(100); - - /// Default Pinger timeout - pub(crate) const DEFAULT_PINGER_TIMEOUT: Duration = Duration::from_secs(5); + /// Maximum duration to wait for a net_report. + pub(crate) const NET_REPORT_TIMEOUT: Duration = Duration::from_secs(10); } diff --git a/iroh-net/src/endpoint.rs b/iroh-net/src/endpoint.rs index 8ffcbeba8a..afbd5d58dd 100644 --- a/iroh-net/src/endpoint.rs +++ b/iroh-net/src/endpoint.rs @@ -24,6 +24,7 @@ use std::{ use anyhow::{anyhow, bail, Context, Result}; use derive_more::Debug; use futures_lite::{Stream, StreamExt}; +use iroh_base::relay_map::RelayMap; use pin_project::pin_project; use tokio_util::sync::{CancellationToken, WaitForCancellationFuture}; use tracing::{debug, instrument, trace, warn}; @@ -36,7 +37,7 @@ use crate::{ dns::{default_resolver, DnsResolver}, key::{PublicKey, SecretKey}, magicsock::{self, Handle, QuicMappedAddr}, - tls, NodeId, RelayMode, RelayUrl, + tls, NodeId, RelayUrl, }; mod rtt_actor; @@ -1357,6 +1358,33 @@ fn proxy_url_from_env() -> Option { None } +/// Configuration of the relay servers for an [`Endpoint`]. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum RelayMode { + /// Disable relay servers completely. + Disabled, + /// Use the default relay map, with production relay servers from n0. + /// + /// See [`crate::defaults::prod`] for the severs used. + Default, + /// Use the staging relay servers from n0. + Staging, + /// Use a custom relay map. + Custom(RelayMap), +} + +impl RelayMode { + /// Returns the relay map for this mode. + pub fn relay_map(&self) -> RelayMap { + match self { + RelayMode::Disabled => RelayMap::empty(), + RelayMode::Default => crate::defaults::prod::default_relay_map(), + RelayMode::Staging => crate::defaults::staging::default_relay_map(), + RelayMode::Custom(relay_map) => relay_map.clone(), + } + } +} + /// Environment variable to force the use of staging relays. #[cfg_attr(iroh_docsrs, doc(cfg(not(test))))] pub const ENV_FORCE_STAGING_RELAYS: &str = "IROH_FORCE_STAGING_RELAYS"; diff --git a/iroh-net/src/lib.rs b/iroh-net/src/lib.rs index dcb1441120..478fb69e53 100644 --- a/iroh-net/src/lib.rs +++ b/iroh-net/src/lib.rs @@ -241,18 +241,18 @@ pub mod dns; pub mod endpoint; mod magicsock; pub mod metrics; -pub mod netcheck; -pub mod ping; -mod relay_map; pub mod ticket; pub mod tls; pub(crate) mod util; -pub use endpoint::{AddrInfo, Endpoint, NodeAddr}; -pub use iroh_base::{key, key::NodeId}; +pub use endpoint::{AddrInfo, Endpoint, NodeAddr, RelayMode}; +pub use iroh_base::{ + key, + key::NodeId, + relay_map::{RelayMap, RelayNode, RelayUrl}, +}; pub use iroh_relay as relay; -pub use relay_map::{RelayMap, RelayMode, RelayNode, RelayUrl}; #[cfg(any(test, feature = "test-utils"))] #[cfg_attr(iroh_docsrs, doc(cfg(any(test, feature = "test-utils"))))] diff --git a/iroh-net/src/magicsock.rs b/iroh-net/src/magicsock.rs index 446faafee7..f4870f8377 100644 --- a/iroh-net/src/magicsock.rs +++ b/iroh-net/src/magicsock.rs @@ -60,13 +60,13 @@ use self::{ udp_conn::UdpConn, }; use crate::{ - defaults::timeouts::NETCHECK_REPORT_TIMEOUT, + defaults::timeouts::NET_REPORT_TIMEOUT, disco::{self, CallMeMaybe, SendAddr}, discovery::{Discovery, DiscoveryItem}, dns::DnsResolver, endpoint::NodeAddr, key::{PublicKey, SecretKey, SharedSecret}, - netcheck, AddrInfo, RelayMap, RelayUrl, + AddrInfo, RelayMap, RelayUrl, }; mod metrics; @@ -203,7 +203,7 @@ pub(crate) struct MagicSock { closing: AtomicBool, /// Close was called. closed: AtomicBool, - /// If the last netcheck report, reports IPv6 to be available. + /// If the last net_report report, reports IPv6 to be available. ipv6_reported: Arc, /// None (or zero nodes) means relay is disabled. @@ -216,8 +216,8 @@ pub(crate) struct MagicSock { pconn4: UdpConn, /// UDP IPv6 socket pconn6: Option, - /// Netcheck client - net_checker: netcheck::Addr, + /// NetReport client + net_reporter: net_report::Addr, /// The state for an active DiscoKey. disco_secrets: DiscoSecrets, @@ -716,7 +716,7 @@ impl MagicSock { let packet_is_quic = if stun::is(packet) { trace!(src = %meta.addr, len = %meta.stride, "UDP recv: stun packet"); let packet2 = Bytes::copy_from_slice(packet); - self.net_checker.receive_stun_packet(packet2, meta.addr); + self.net_reporter.receive_stun_packet(packet2, meta.addr); false } else if let Some((sender, sealed_box)) = disco::source_and_box(packet) { // Disco? @@ -1217,7 +1217,7 @@ impl MagicSock { /// Sends the call-me-maybe DISCO message, queuing if addresses are too stale. /// /// To send the call-me-maybe message, we need to know our current direct addresses. If - /// this information is too stale, the call-me-maybe is queued while a netcheck run is + /// this information is too stale, the call-me-maybe is queued while a net_report run is /// scheduled. Once this run finishes, the call-me-maybe will be sent. fn send_or_queue_call_me_maybe(&self, url: &RelayUrl, dst_node: NodeId) { match self.direct_addrs.fresh_enough() { @@ -1305,7 +1305,7 @@ impl DiscoMessageSource { } } -/// Manages currently running direct addr discovery, aka netcheck runs. +/// Manages currently running direct addr discovery, aka net_report runs. /// /// Invariants: /// - only one direct addr update must be running at a time @@ -1405,7 +1405,8 @@ impl Handle { let ipv4_addr = pconn4.local_addr()?; let ipv6_addr = pconn6.as_ref().and_then(|c| c.local_addr().ok()); - let net_checker = netcheck::Client::new(Some(port_mapper.clone()), dns_resolver.clone())?; + let net_reporter = + net_report::Client::new(Some(port_mapper.clone()), dns_resolver.clone())?; let (actor_sender, actor_receiver) = mpsc::channel(256); let (relay_actor_sender, relay_actor_receiver) = mpsc::channel(256); @@ -1432,7 +1433,7 @@ impl Handle { my_relay: Default::default(), pconn4: pconn4.clone(), pconn6: pconn6.clone(), - net_checker: net_checker.addr(), + net_reporter: net_reporter.addr(), disco_secrets: DiscoSecrets::default(), node_map, relay_actor_sender: relay_actor_sender.clone(), @@ -1483,7 +1484,7 @@ impl Handle { pconn4, pconn6, no_v4_send: false, - net_checker, + net_reporter, network_monitor, }; @@ -1699,7 +1700,7 @@ enum ActorMessage { Shutdown, ReceiveRelay(RelayReadResult), EndpointPingExpired(usize, stun_rs::TransactionId), - NetcheckReport(Result>>, &'static str), + NetReport(Result>>, &'static str), NetworkChange, #[cfg(test)] ForceNetworkChange(bool), @@ -1731,7 +1732,7 @@ struct Actor { no_v4_send: bool, /// The prober that discovers local network conditions, including the closest relay relay and NAT mappings. - net_checker: netcheck::Client, + net_reporter: net_report::Client, network_monitor: netmon::Monitor, } @@ -1919,13 +1920,16 @@ impl Actor { ActorMessage::EndpointPingExpired(id, txid) => { self.msock.node_map.notify_ping_timeout(id, txid); } - ActorMessage::NetcheckReport(report, why) => { + ActorMessage::NetReport(report, why) => { match report { Ok(report) => { - self.handle_netcheck_report(report).await; + self.handle_net_report_report(report).await; } Err(err) => { - warn!("failed to generate netcheck report for: {}: {:?}", why, err); + warn!( + "failed to generate net_report report for: {}: {:?}", + why, err + ); } } self.finalize_direct_addrs_update(why); @@ -1997,7 +2001,7 @@ impl Actor { /// Refreshes knowledge about our direct addresses. /// - /// In other words, this triggers a netcheck run. + /// In other words, this triggers a net_report run. /// /// Note that invoking this is managed by the [`DirectAddrUpdateState`] and this should /// never be invoked directly. Some day this will be refactored to not allow this easy @@ -2017,9 +2021,9 @@ impl Actor { /// direct addresses from: /// /// - The portmapper. - /// - A netcheck report. + /// - A net_report report. /// - The local interfaces IP addresses. - fn update_direct_addresses(&mut self, netcheck_report: Option>) { + fn update_direct_addresses(&mut self, net_report_report: Option>) { let portmap_watcher = self.port_mapper.watch_external_address(); // We only want to have one DirectAddr for each SocketAddr we have. So we store @@ -2036,9 +2040,9 @@ impl Actor { self.set_net_info_have_port_map(); } - // Next add STUN addresses from the netcheck report. - if let Some(netcheck_report) = netcheck_report { - if let Some(global_v4) = netcheck_report.global_v4 { + // Next add STUN addresses from the net_report report. + if let Some(net_report_report) = net_report_report { + if let Some(global_v4) = net_report_report.global_v4 { addrs .entry(global_v4.into()) .or_insert(DirectAddrType::Stun); @@ -2048,7 +2052,7 @@ impl Actor { // port mapping on their router to the same explicit // port that we are running with. Worst case it's an invalid candidate mapping. let port = self.msock.port.load(Ordering::Relaxed); - if netcheck_report + if net_report_report .mapping_varies_by_dest_ip .unwrap_or_default() && port != 0 @@ -2060,7 +2064,7 @@ impl Actor { .or_insert(DirectAddrType::Stun4LocalPort); } } - if let Some(global_v6) = netcheck_report.global_v6 { + if let Some(global_v6) = net_report_report.global_v6 { addrs .entry(global_v6.into()) .or_insert(DirectAddrType::Stun); @@ -2185,7 +2189,7 @@ impl Actor { self.net_info_last = Some(ni); } - /// Calls netcheck. + /// Calls net_report. /// /// Note that invoking this is managed by [`DirectAddrUpdateState`] via /// [`Actor::refresh_direct_addrs`] and this should never be invoked directly. Some day @@ -2193,9 +2197,9 @@ impl Actor { #[instrument(level = "debug", skip_all)] async fn update_net_info(&mut self, why: &'static str) { if self.msock.relay_map.is_empty() { - debug!("skipping netcheck, empty RelayMap"); + debug!("skipping net_report, empty RelayMap"); self.msg_sender - .send(ActorMessage::NetcheckReport(Ok(None), why)) + .send(ActorMessage::NetReport(Ok(None), why)) .await .ok(); return; @@ -2205,38 +2209,38 @@ impl Actor { let pconn4 = Some(self.pconn4.as_socket()); let pconn6 = self.pconn6.as_ref().map(|p| p.as_socket()); - debug!("requesting netcheck report"); + debug!("requesting net_report report"); match self - .net_checker + .net_reporter .get_report_channel(relay_map, pconn4, pconn6) .await { Ok(rx) => { let msg_sender = self.msg_sender.clone(); tokio::task::spawn(async move { - let report = time::timeout(NETCHECK_REPORT_TIMEOUT, rx).await; + let report = time::timeout(NET_REPORT_TIMEOUT, rx).await; let report: anyhow::Result<_> = match report { Ok(Ok(Ok(report))) => Ok(Some(report)), Ok(Ok(Err(err))) => Err(err), - Ok(Err(_)) => Err(anyhow!("netcheck report not received")), - Err(err) => Err(anyhow!("netcheck report timeout: {:?}", err)), + Ok(Err(_)) => Err(anyhow!("net_report report not received")), + Err(err) => Err(anyhow!("net_report report timeout: {:?}", err)), }; msg_sender - .send(ActorMessage::NetcheckReport(report, why)) + .send(ActorMessage::NetReport(report, why)) .await .ok(); - // The receiver of the NetcheckReport message will call + // The receiver of the NetReport message will call // .finalize_direct_addrs_update(). }); } Err(err) => { - warn!("unable to start netcheck generation: {:?}", err); + warn!("unable to start net_report generation: {:?}", err); self.finalize_direct_addrs_update(why); } } } - async fn handle_netcheck_report(&mut self, report: Option>) { + async fn handle_net_report_report(&mut self, report: Option>) { if let Some(ref report) = report { self.msock .ipv6_reported @@ -2311,7 +2315,7 @@ impl Actor { true } - /// Returns a deterministic relay node to connect to. This is only used if netcheck + /// Returns a deterministic relay node to connect to. This is only used if net_report /// couldn't find the nearest one, for instance, if UDP is blocked and thus STUN /// latency checks aren't working. /// diff --git a/iroh-net/src/magicsock/udp_conn.rs b/iroh-net/src/magicsock/udp_conn.rs index c176ae8144..2c23d44f5b 100644 --- a/iroh-net/src/magicsock/udp_conn.rs +++ b/iroh-net/src/magicsock/udp_conn.rs @@ -231,7 +231,7 @@ mod tests { #[tokio::test] async fn test_rebinding_conn_send_recv_ipv6() -> Result<()> { let _guard = iroh_test::logging::setup(); - if !crate::netcheck::os_has_ipv6() { + if !net_report::os_has_ipv6() { return Ok(()); } rebinding_conn_send_recv(IpFamily::V6).await diff --git a/iroh-net/src/metrics.rs b/iroh-net/src/metrics.rs index 655cfe4476..e168a56a7a 100644 --- a/iroh-net/src/metrics.rs +++ b/iroh-net/src/metrics.rs @@ -2,6 +2,7 @@ #[cfg(feature = "test-utils")] #[cfg_attr(iroh_docsrs, doc(cfg(feature = "test-utils")))] pub use iroh_relay::server::Metrics as RelayMetrics; +pub use net_report::Metrics as NetReportMetrics; pub use portmapper::Metrics as PortmapMetrics; -pub use crate::{magicsock::Metrics as MagicsockMetrics, netcheck::Metrics as NetcheckMetrics}; +pub use crate::magicsock::Metrics as MagicsockMetrics; diff --git a/iroh-relay/LICENSE-BSD3 b/iroh-relay/LICENSE-BSD3 new file mode 100644 index 0000000000..94d6e6abce --- /dev/null +++ b/iroh-relay/LICENSE-BSD3 @@ -0,0 +1,30 @@ +Parts of the code has been derived from tailscale, which is under the following license. + +BSD 3-Clause License + +Copyright (c) 2020 Tailscale Inc & AUTHORS. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/iroh-relay/src/server.rs b/iroh-relay/src/server.rs index bbe16fe090..6f32da2863 100644 --- a/iroh-relay/src/server.rs +++ b/iroh-relay/src/server.rs @@ -12,8 +12,8 @@ //! The relay server hosts the following services: //! //! - HTTPS `/relay`: The main URL endpoint to which clients connect and sends traffic over. -//! - HTTPS `/ping`: Used for netcheck probes. -//! - HTTPS `/generate_204`: Used for netcheck probes. +//! - HTTPS `/ping`: Used for net_report probes. +//! - HTTPS `/generate_204`: Used for net_report probes. //! - STUN: UDP port for STUN requests/responses. use std::{fmt, future::Future, net::SocketAddr, pin::Pin, sync::Arc}; diff --git a/iroh/src/metrics.rs b/iroh/src/metrics.rs index e2db02c57e..33f7551f81 100644 --- a/iroh/src/metrics.rs +++ b/iroh/src/metrics.rs @@ -71,7 +71,7 @@ pub fn try_init_metrics_collection() -> std::io::Result<()> { metrics.insert(crate::metrics::Metrics::new(reg)); metrics.insert(iroh_docs::metrics::Metrics::new(reg)); metrics.insert(iroh_net::metrics::MagicsockMetrics::new(reg)); - metrics.insert(iroh_net::metrics::NetcheckMetrics::new(reg)); + metrics.insert(iroh_net::metrics::NetReportMetrics::new(reg)); metrics.insert(iroh_net::metrics::PortmapMetrics::new(reg)); }) } @@ -92,7 +92,7 @@ pub fn get_metrics() -> anyhow::Result> { &mut map, ); collect( - core.get_collector::(), + core.get_collector::(), &mut map, ); collect(