Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support async ChannelMonitorUpdate persist for claims against closed channels #3414

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
108 changes: 108 additions & 0 deletions lightning/src/ln/chanmon_update_fail_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3713,3 +3713,111 @@ fn test_partial_claim_mon_update_compl_actions() {
send_payment(&nodes[2], &[&nodes[3]], 100_000);
assert!(!get_monitor!(nodes[3], chan_4_id).get_stored_preimages().contains_key(&payment_hash));
}


#[test]
fn test_claim_to_closed_channel_blocks_forwarded_preimage_removal() {
// One of the last features for async persistence we implemented was the correct blocking of
// RAA(s) which remove a preimage from an outbound channel for a forwarded payment until the
// preimage write makes it durably to the closed inbound channel.
// This tests that behavior.
let chanmon_cfgs = create_chanmon_cfgs(3);
let node_cfgs = create_node_cfgs(3, &chanmon_cfgs);
let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[None, None, None]);
let nodes = create_network(3, &node_cfgs, &node_chanmgrs);

// First open channels, route a payment, and force-close the first hop.
let chan_a = create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 1_000_000, 500_000_000);
let chan_b = create_announced_chan_between_nodes_with_value(&nodes, 1, 2, 1_000_000, 500_000_000);

let (payment_preimage, payment_hash, ..) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 1_000_000);

nodes[0].node.force_close_broadcasting_latest_txn(&chan_a.2, &nodes[1].node.get_our_node_id(), String::new()).unwrap();
check_added_monitors!(nodes[0], 1);
let a_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true) };
check_closed_event!(nodes[0], 1, a_reason, [nodes[1].node.get_our_node_id()], 1000000);
check_closed_broadcast!(nodes[0], true);

let as_commit_tx = nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
assert_eq!(as_commit_tx.len(), 1);

mine_transaction(&nodes[1], &as_commit_tx[0]);
check_added_monitors!(nodes[1], 1);
check_closed_event!(nodes[1], 1, ClosureReason::CommitmentTxConfirmed, [nodes[0].node.get_our_node_id()], 1000000);
check_closed_broadcast!(nodes[1], true);

// Now that B has a pending forwarded payment across it with the inbound edge on-chain, claim
// the payment on C and give B the preimage for it.
nodes[2].node.claim_funds(payment_preimage);
check_added_monitors!(nodes[2], 1);
expect_payment_claimed!(nodes[2], payment_hash, 1_000_000);

let updates = get_htlc_update_msgs!(nodes[2], nodes[1].node.get_our_node_id());
chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
nodes[1].node.handle_update_fulfill_htlc(nodes[2].node.get_our_node_id(), &updates.update_fulfill_htlcs[0]);
check_added_monitors!(nodes[1], 1);
commitment_signed_dance!(nodes[1], nodes[2], updates.commitment_signed, false);

// At this point nodes[1] has the preimage and is waiting for the `ChannelMonitorUpdate` for
// channel A to hit disk. Until it does so, it shouldn't ever let the preimage dissapear from
// channel B's `ChannelMonitor`
assert!(get_monitor!(nodes[1], chan_b.2).get_all_current_outbound_htlcs().iter().any(|(_, (_, preimage))| *preimage == Some(payment_preimage)));

// Once we complete the `ChannelMonitorUpdate` on channel A, and the `ChannelManager` processes
// background events (via `get_and_clear_pending_msg_events`), the final `ChannelMonitorUpdate`
// will fly and we'll drop the preimage from channel B's `ChannelMonitor`. We'll also release
// the `Event::PaymentForwarded`.
check_added_monitors!(nodes[1], 0);
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
TheBlueMatt marked this conversation as resolved.
Show resolved Hide resolved
assert!(nodes[1].node.get_and_clear_pending_events().is_empty());

nodes[1].chain_monitor.complete_sole_pending_chan_update(&chan_a.2);
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
check_added_monitors!(nodes[1], 1);
assert!(!get_monitor!(nodes[1], chan_b.2).get_all_current_outbound_htlcs().iter().any(|(_, (_, preimage))| *preimage == Some(payment_preimage)));
expect_payment_forwarded!(nodes[1], nodes[0], nodes[2], None, true, false);
}

#[test]
fn test_claim_to_closed_channel_blocks_claimed_event() {
// One of the last features for async persistence we implemented was the correct blocking of
// event(s) until the preimage for a claimed HTLC is durably on disk in a ChannelMonitor for a
// closed channel.
// This tests that behavior.
let chanmon_cfgs = create_chanmon_cfgs(2);
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
let nodes = create_network(2, &node_cfgs, &node_chanmgrs);

// First open channels, route a payment, and force-close the first hop.
let chan_a = create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 1_000_000, 500_000_000);

let (payment_preimage, payment_hash, ..) = route_payment(&nodes[0], &[&nodes[1]], 1_000_000);

nodes[0].node.force_close_broadcasting_latest_txn(&chan_a.2, &nodes[1].node.get_our_node_id(), String::new()).unwrap();
check_added_monitors!(nodes[0], 1);
let a_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true) };
check_closed_event!(nodes[0], 1, a_reason, [nodes[1].node.get_our_node_id()], 1000000);
check_closed_broadcast!(nodes[0], true);

let as_commit_tx = nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
assert_eq!(as_commit_tx.len(), 1);

mine_transaction(&nodes[1], &as_commit_tx[0]);
check_added_monitors!(nodes[1], 1);
check_closed_event!(nodes[1], 1, ClosureReason::CommitmentTxConfirmed, [nodes[0].node.get_our_node_id()], 1000000);
check_closed_broadcast!(nodes[1], true);

// Now that B has a pending payment with the inbound HTLC on a closed channel, claim the
// payment on disk, but don't let the `ChannelMonitorUpdate` complete. This should prevent the
// `Event::PaymentClaimed` from being generated.
chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
nodes[1].node.claim_funds(payment_preimage);
check_added_monitors!(nodes[1], 1);
assert!(nodes[1].node.get_and_clear_pending_events().is_empty());

// Once we complete the `ChannelMonitorUpdate` the `Event::PaymentClaimed` will become
// available.
nodes[1].chain_monitor.complete_sole_pending_chan_update(&chan_a.2);
expect_payment_claimed!(nodes[1], payment_hash, 1_000_000);
}
123 changes: 39 additions & 84 deletions lightning/src/ln/channelmanager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3922,39 +3922,6 @@ where
self.close_channel_internal(channel_id, counterparty_node_id, target_feerate_sats_per_1000_weight, shutdown_script)
}

fn set_closed_chan_next_monitor_update_id(
peer_state: &mut PeerState<SP>, channel_id: ChannelId, monitor_update: &mut ChannelMonitorUpdate,
) {
match peer_state.closed_channel_monitor_update_ids.entry(channel_id) {
btree_map::Entry::Vacant(entry) => {
let is_closing_unupdated_monitor = monitor_update.update_id == 1
&& monitor_update.updates.len() == 1
&& matches!(&monitor_update.updates[0], ChannelMonitorUpdateStep::ChannelForceClosed { .. });
// If the ChannelMonitorUpdate is closing a channel that never got past initial
// funding (to have any commitment updates), we'll skip inserting in
// `locked_close_channel`, allowing us to avoid keeping around the PeerState for
// that peer. In that specific case we expect no entry in the map here. In any
// other cases, this is a bug, but in production we go ahead and recover by
// inserting the update_id and hoping its right.
debug_assert!(is_closing_unupdated_monitor, "Expected closing monitor against an unused channel, got {:?}", monitor_update);
if !is_closing_unupdated_monitor {
entry.insert(monitor_update.update_id);
}
},
btree_map::Entry::Occupied(entry) => {
// If we're running in a threaded environment its possible we generate updates for
// a channel that is closing, then apply some preimage update, then go back and
// apply the close monitor update here. In order to ensure the updates are still
// well-ordered, we have to use the `closed_channel_monitor_update_ids` map to
// override the `update_id`, taking care to handle old monitors where the
// `latest_update_id` is already `u64::MAX`.
let latest_update_id = entry.into_mut();
*latest_update_id = latest_update_id.saturating_add(1);
monitor_update.update_id = *latest_update_id;
}
}
}

/// Applies a [`ChannelMonitorUpdate`] which may or may not be for a channel which is closed.
#[must_use]
fn apply_post_close_monitor_update(
Expand Down Expand Up @@ -7220,38 +7187,53 @@ where
let mut peer_state = peer_state_opt.expect("peer_state_opt is always Some when the counterparty_node_id is Some");

let mut preimage_update = ChannelMonitorUpdate {
update_id: 0, // set in set_closed_chan_next_monitor_update_id
counterparty_node_id: prev_hop.counterparty_node_id,
update_id: 0, // set below
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you create preimage_update after calculating the update_id to avoid this?

counterparty_node_id: Some(counterparty_node_id),
updates: vec![ChannelMonitorUpdateStep::PaymentPreimage {
payment_preimage,
payment_info,
}],
channel_id: Some(prev_hop.channel_id),
};

// Note that the below is race-y - we set the `update_id` here and then drop the peer_state
// lock before applying the update in `apply_post_close_monitor_update` (or via the
// background events pipeline). During that time, some other update could be created and
// then applied, resultin in `ChannelMonitorUpdate`s being applied out of order and causing
// a panic.
Self::set_closed_chan_next_monitor_update_id(&mut *peer_state, prev_hop.channel_id, &mut preimage_update);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like this was suppose to be applied to 94d0735?

if let Some(latest_update_id) = peer_state.closed_channel_monitor_update_ids.get_mut(&chan_id) {
*latest_update_id = latest_update_id.saturating_add(1);
preimage_update.update_id = *latest_update_id;
} else {
let err = "We need the latest ChannelMonitorUpdate ID to build a new update.
This should have been checked for availability on startup but somehow it is no longer available.
This indicates a bug inside LDK. Please report this error at https://github.com/lightningdevkit/rust-lightning/issues/new";
log_error!(self.logger, "{}", err);
panic!("{}", err);
}

mem::drop(peer_state);
mem::drop(per_peer_state);
// Note that we do process the completion action here. This totally could be a
// duplicate claim, but we have no way of knowing without interrogating the
// `ChannelMonitor` we've provided the above update to. Instead, note that `Event`s are
// generally always allowed to be duplicative (and it's specifically noted in
// `PaymentForwarded`).
let (action_opt, raa_blocker_opt) = completion_action(None, false);

if let Some(raa_blocker) = raa_blocker_opt {
peer_state.actions_blocking_raa_monitor_updates
.entry(prev_hop.channel_id)
.or_default()
.push(raa_blocker);
}

// Given the fact that we're in a bit of a weird edge case, its worth hashing the preimage
// to include the `payment_hash` in the log metadata here.
let payment_hash = payment_preimage.into();
let logger = WithContext::from(&self.logger, Some(counterparty_node_id), Some(chan_id), Some(payment_hash));

if !during_init {
// We update the ChannelMonitor on the backward link, after
// receiving an `update_fulfill_htlc` from the forward link.
let update_res = self.apply_post_close_monitor_update(counterparty_node_id, prev_hop.channel_id, prev_hop.funding_txo, preimage_update);
if update_res != ChannelMonitorUpdateStatus::Completed {
// TODO: This needs to be handled somehow - if we receive a monitor update
// with a preimage we *must* somehow manage to propagate it to the upstream
// channel, or we must have an ability to receive the same event and try
// again on restart.
log_error!(WithContext::from(&self.logger, None, Some(prev_hop.channel_id), None),
"Critical error: failed to update channel monitor with preimage {:?}: {:?}",
payment_preimage, update_res);
if let Some(action) = action_opt {
log_trace!(logger, "Tracking monitor update completion action for closed channel {}: {:?}",
chan_id, action);
peer_state.monitor_update_blocked_actions.entry(chan_id).or_insert(Vec::new()).push(action);
}

handle_new_monitor_update!(self, prev_hop.funding_txo, preimage_update, peer_state, peer_state, per_peer_state, logger, chan_id, POST_CHANNEL_CLOSE);
} else {
// If we're running during init we cannot update a monitor directly - they probably
// haven't actually been loaded yet. Instead, push the monitor update as a background
Expand All @@ -7265,39 +7247,12 @@ where
update: preimage_update,
};
self.pending_background_events.lock().unwrap().push(event);
}

// Note that we do process the completion action here. This totally could be a
// duplicate claim, but we have no way of knowing without interrogating the
// `ChannelMonitor` we've provided the above update to. Instead, note that `Event`s are
// generally always allowed to be duplicative (and it's specifically noted in
// `PaymentForwarded`).
let (action_opt, raa_blocker_opt) = completion_action(None, false);

if let Some(raa_blocker) = raa_blocker_opt {
// TODO: Avoid always blocking the world for the write lock here.
let mut per_peer_state = self.per_peer_state.write().unwrap();
let peer_state_mutex = per_peer_state.entry(counterparty_node_id).or_insert_with(||
Mutex::new(PeerState {
channel_by_id: new_hash_map(),
inbound_channel_request_by_id: new_hash_map(),
latest_features: InitFeatures::empty(),
pending_msg_events: Vec::new(),
in_flight_monitor_updates: BTreeMap::new(),
monitor_update_blocked_actions: BTreeMap::new(),
actions_blocking_raa_monitor_updates: BTreeMap::new(),
closed_channel_monitor_update_ids: BTreeMap::new(),
is_connected: false,
}));
let mut peer_state = peer_state_mutex.lock().unwrap();
mem::drop(peer_state);
mem::drop(per_peer_state);

peer_state.actions_blocking_raa_monitor_updates
.entry(prev_hop.channel_id)
.or_default()
.push(raa_blocker);
self.handle_monitor_update_completion_actions(action_opt);
}

self.handle_monitor_update_completion_actions(action_opt);
}

fn finalize_claims(&self, sources: Vec<HTLCSource>) {
Expand Down