Skip to content

Commit

Permalink
fix(iroh_net): less agressive best_addr clearing on pong timeout (#2238)
Browse files Browse the repository at this point in the history
## Description

Clears the best address only if within the time the pong should have
been received, nothing else was seen so that the address is considered
not alive in that period. This should result in less aggressive
best_addr clearing.

Specific changes include:
- remove `BestAddr::clear_if_addr_older` since it is no longer needed.
- change `BestAddr::clear*` return since it was never read anyway.
- check for alive withing the `PING_TIMEOUT_DURATION` before clearing
the best address on timeout
- misc docs fixes in code related to being alive/active

## Breaking Changes

n/a

## Notes & open questions

n/a

## Change checklist

- [x] Self-review.
- [x] Documentation updates if relevant.
- [ ] ~Tests if relevant.~
- [ ] ~All breaking changes documented.~
  • Loading branch information
divagant-martian authored Apr 26, 2024
1 parent 1fd81c0 commit 88ba244
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 46 deletions.
49 changes: 14 additions & 35 deletions iroh-net/src/magicsock/node_map/best_addr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ impl BestAddrInner {
.map(|trust_until| trust_until >= now)
.unwrap_or(false)
}

fn addr(&self) -> SocketAddr {
self.addr.addr
}
}

#[derive(Debug)]
Expand Down Expand Up @@ -81,49 +85,24 @@ impl BestAddr {
self.0.is_none()
}

pub fn clear(&mut self, reason: ClearReason, has_relay: bool) -> bool {
if let Some(addr) = self.addr() {
self.0 = None;
info!(?reason, ?has_relay, old_addr = %addr, "clearing best_addr");
/// Unconditionally clears the best address.
pub fn clear(&mut self, reason: ClearReason, has_relay: bool) {
let old = self.0.take();
if let Some(old_addr) = old.as_ref().map(BestAddrInner::addr) {
info!(?reason, ?has_relay, %old_addr, "clearing best_addr");
// no longer relying on the direct connection
inc!(MagicsockMetrics, num_direct_conns_removed);
if has_relay {
// we are now relying on the relay connection, add a relay conn
inc!(MagicsockMetrics, num_relay_conns_added);
}
true
} else {
false
}
}

pub fn clear_if_equals(
&mut self,
addr: SocketAddr,
reason: ClearReason,
has_relay: bool,
) -> bool {
match &self.addr() {
Some(best_addr) if *best_addr == addr => self.clear(reason, has_relay),
_ => false,
}
}

/// Clears best_addr if it equals `addr` and was confirmed before `confirmed_before`.
///
/// If the given addr is currently the best address, **and** the best address was
/// confirmed longer ago than the provided time, then this clears the best address.
pub fn clear_if_addr_older(
&mut self,
addr: SocketAddr,
confirmed_before: Instant,
reason: ClearReason,
has_relay: bool,
) {
if let Some(ref inner) = self.0 {
if inner.addr.addr == addr && inner.confirmed_at < confirmed_before {
self.clear(reason, has_relay);
}
/// Clears the best address if equal to `addr`.
pub fn clear_if_equals(&mut self, addr: SocketAddr, reason: ClearReason, has_relay: bool) {
if self.addr() == Some(addr) {
self.clear(reason, has_relay)
}
}

Expand Down Expand Up @@ -221,7 +200,7 @@ impl BestAddr {
}

pub fn addr(&self) -> Option<SocketAddr> {
self.0.as_ref().map(|a| a.addr.addr)
self.0.as_ref().map(BestAddrInner::addr)
}
}

Expand Down
35 changes: 24 additions & 11 deletions iroh-net/src/magicsock/node_map/node_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ use super::IpPort;
/// See [`NodeState::prune_direct_addresses`].
pub(super) const MAX_INACTIVE_DIRECT_ADDRESSES: usize = 20;

/// How long since an endpoint path was last active before it might be pruned.
/// How long since an endpoint path was last alive before it might be pruned.
const LAST_ALIVE_PRUNE_DURATION: Duration = Duration::from_secs(120);

/// How long we wait for a pong reply before assuming it's never coming.
Expand Down Expand Up @@ -410,15 +410,28 @@ impl NodeState {
SendAddr::Udp(addr) => {
if let Some(path_state) = self.direct_addr_state.get_mut(&addr.into()) {
path_state.last_ping = None;
// only clear the best address if there was no sign of life from this path
// within the time the pong should have arrived
let consider_alive = path_state
.last_alive()
.map(|last_alive| last_alive.elapsed() <= PING_TIMEOUT_DURATION)
.unwrap_or(false);
if !consider_alive {
self.best_addr.clear_if_equals(
addr,
ClearReason::PongTimeout,
self.relay_url().is_some(),
)
}
} else {
// If we have no state for the best addr it should have been cleared
// anyway.
self.best_addr.clear_if_equals(
addr,
ClearReason::PongTimeout,
self.relay_url.is_some(),
);
}

// If we fail to ping our current best addr, it is not that good anymore.
self.best_addr.clear_if_addr_older(
addr,
sp.at,
ClearReason::PongTimeout,
self.relay_url.is_some(),
);
}
SendAddr::Relay(ref url) => {
if let Some((home_relay, relay_state)) = self.relay_url.as_mut() {
Expand Down Expand Up @@ -1142,10 +1155,10 @@ impl PathState {

/// Check whether this path is considered active.
///
/// Active means the path has received payload messages within the lat
/// Active means the path has received payload messages within the last
/// [`SESSION_ACTIVE_TIMEOUT`].
///
/// Note that an endpoint might be alive but not active if it's contactable but not in
/// Note that a path might be alive but not active if it's contactable but not in
/// use.
pub(super) fn is_active(&self) -> bool {
self.last_payload_msg
Expand Down

0 comments on commit 88ba244

Please sign in to comment.