Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(iroh_net): less agressive best_addr clearing on pong timeout #2238

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 14 additions & 35 deletions iroh-net/src/magicsock/node_map/best_addr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ impl BestAddrInner {
.map(|trust_until| trust_until >= now)
.unwrap_or(false)
}

fn addr(&self) -> SocketAddr {
self.addr.addr
}
}

#[derive(Debug)]
Expand Down Expand Up @@ -81,49 +85,24 @@ impl BestAddr {
self.0.is_none()
}

pub fn clear(&mut self, reason: ClearReason, has_relay: bool) -> bool {
if let Some(addr) = self.addr() {
self.0 = None;
info!(?reason, ?has_relay, old_addr = %addr, "clearing best_addr");
/// Unconditionally clears the best address.
pub fn clear(&mut self, reason: ClearReason, has_relay: bool) {
let old = self.0.take();
if let Some(old_addr) = old.as_ref().map(BestAddrInner::addr) {
info!(?reason, ?has_relay, %old_addr, "clearing best_addr");
// no longer relying on the direct connection
inc!(MagicsockMetrics, num_direct_conns_removed);
if has_relay {
// we are now relying on the relay connection, add a relay conn
inc!(MagicsockMetrics, num_relay_conns_added);
}
true
} else {
false
}
}

pub fn clear_if_equals(
&mut self,
addr: SocketAddr,
reason: ClearReason,
has_relay: bool,
) -> bool {
match &self.addr() {
Some(best_addr) if *best_addr == addr => self.clear(reason, has_relay),
_ => false,
}
}

/// Clears best_addr if it equals `addr` and was confirmed before `confirmed_before`.
///
/// If the given addr is currently the best address, **and** the best address was
/// confirmed longer ago than the provided time, then this clears the best address.
pub fn clear_if_addr_older(
&mut self,
addr: SocketAddr,
confirmed_before: Instant,
reason: ClearReason,
has_relay: bool,
) {
if let Some(ref inner) = self.0 {
if inner.addr.addr == addr && inner.confirmed_at < confirmed_before {
self.clear(reason, has_relay);
}
/// Clears the best address if equal to `addr`.
pub fn clear_if_equals(&mut self, addr: SocketAddr, reason: ClearReason, has_relay: bool) {
if self.addr() == Some(addr) {
self.clear(reason, has_relay)
}
}

Expand Down Expand Up @@ -221,7 +200,7 @@ impl BestAddr {
}

pub fn addr(&self) -> Option<SocketAddr> {
self.0.as_ref().map(|a| a.addr.addr)
self.0.as_ref().map(BestAddrInner::addr)
}
}

Expand Down
35 changes: 24 additions & 11 deletions iroh-net/src/magicsock/node_map/node_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ use super::IpPort;
/// See [`NodeState::prune_direct_addresses`].
pub(super) const MAX_INACTIVE_DIRECT_ADDRESSES: usize = 20;

/// How long since an endpoint path was last active before it might be pruned.
/// How long since an endpoint path was last alive before it might be pruned.
const LAST_ALIVE_PRUNE_DURATION: Duration = Duration::from_secs(120);

/// How long we wait for a pong reply before assuming it's never coming.
Expand Down Expand Up @@ -410,15 +410,28 @@ impl NodeState {
SendAddr::Udp(addr) => {
if let Some(path_state) = self.direct_addr_state.get_mut(&addr.into()) {
path_state.last_ping = None;
// only clear the best address if there was no sign of life from this path
// within the time the pong should have arrived
let consider_alive = path_state
.last_alive()
.map(|last_alive| last_alive.elapsed() <= PING_TIMEOUT_DURATION)
.unwrap_or(false);
if !consider_alive {
self.best_addr.clear_if_equals(
addr,
ClearReason::PongTimeout,
self.relay_url().is_some(),
)
}
} else {
// If we have no state for the best addr it should have been cleared
// anyway.
self.best_addr.clear_if_equals(
addr,
ClearReason::PongTimeout,
self.relay_url.is_some(),
);
}

// If we fail to ping our current best addr, it is not that good anymore.
self.best_addr.clear_if_addr_older(
addr,
sp.at,
ClearReason::PongTimeout,
self.relay_url.is_some(),
);
}
SendAddr::Relay(ref url) => {
if let Some((home_relay, relay_state)) = self.relay_url.as_mut() {
Expand Down Expand Up @@ -1151,10 +1164,10 @@ impl PathState {

/// Check whether this path is considered active.
///
/// Active means the path has received payload messages within the lat
/// Active means the path has received payload messages within the last
/// [`SESSION_ACTIVE_TIMEOUT`].
///
/// Note that an endpoint might be alive but not active if it's contactable but not in
/// Note that a path might be alive but not active if it's contactable but not in
/// use.
pub(super) fn is_active(&self) -> bool {
self.last_payload_msg
Expand Down
Loading