lightningdevkit · TheBlueMatt · Nov 13, 2024 · Nov 18, 2024 · Nov 18, 2024 · Nov 14, 2024
diff --git a/lightning/src/ln/chanmon_update_fail_tests.rs b/lightning/src/ln/chanmon_update_fail_tests.rs
@@ -3713,3 +3713,111 @@ fn test_partial_claim_mon_update_compl_actions() {
 	send_payment(&nodes[2], &[&nodes[3]], 100_000);
 	assert!(!get_monitor!(nodes[3], chan_4_id).get_stored_preimages().contains_key(&payment_hash));
 }
+
+
+#[test]
+fn test_claim_to_closed_channel_blocks_forwarded_preimage_removal() {
+	// One of the last features for async persistence we implemented was the correct blocking of
+	// RAA(s) which remove a preimage from an outbound channel for a forwarded payment until the
+	// preimage write makes it durably to the closed inbound channel.
+	// This tests that behavior.
+	let chanmon_cfgs = create_chanmon_cfgs(3);
+	let node_cfgs = create_node_cfgs(3, &chanmon_cfgs);
+	let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[None, None, None]);
+	let nodes = create_network(3, &node_cfgs, &node_chanmgrs);
+
+	// First open channels, route a payment, and force-close the first hop.
+	let chan_a = create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 1_000_000, 500_000_000);
+	let chan_b = create_announced_chan_between_nodes_with_value(&nodes, 1, 2, 1_000_000, 500_000_000);
+
+	let (payment_preimage, payment_hash, ..) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 1_000_000);
+
+	nodes[0].node.force_close_broadcasting_latest_txn(&chan_a.2, &nodes[1].node.get_our_node_id(), String::new()).unwrap();
+	check_added_monitors!(nodes[0], 1);
+	let a_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true) };
+	check_closed_event!(nodes[0], 1, a_reason, [nodes[1].node.get_our_node_id()], 1000000);
+	check_closed_broadcast!(nodes[0], true);
+
+	let as_commit_tx = nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
+	assert_eq!(as_commit_tx.len(), 1);
+
+	mine_transaction(&nodes[1], &as_commit_tx[0]);
+	check_added_monitors!(nodes[1], 1);
+	check_closed_event!(nodes[1], 1, ClosureReason::CommitmentTxConfirmed, [nodes[0].node.get_our_node_id()], 1000000);
+	check_closed_broadcast!(nodes[1], true);
+
+	// Now that B has a pending forwarded payment across it with the inbound edge on-chain, claim
+	// the payment on C and give B the preimage for it.
+	nodes[2].node.claim_funds(payment_preimage);
+	check_added_monitors!(nodes[2], 1);
+	expect_payment_claimed!(nodes[2], payment_hash, 1_000_000);
+
+	let updates = get_htlc_update_msgs!(nodes[2], nodes[1].node.get_our_node_id());
+	chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
+	nodes[1].node.handle_update_fulfill_htlc(nodes[2].node.get_our_node_id(), &updates.update_fulfill_htlcs[0]);
+	check_added_monitors!(nodes[1], 1);
+	commitment_signed_dance!(nodes[1], nodes[2], updates.commitment_signed, false);
+
+	// At this point nodes[1] has the preimage and is waiting for the `ChannelMonitorUpdate` for
+	// channel A to hit disk. Until it does so, it shouldn't ever let the preimage dissapear from
+	// channel B's `ChannelMonitor`
+	assert!(get_monitor!(nodes[1], chan_b.2).get_all_current_outbound_htlcs().iter().any(|(_, (_, preimage))| *preimage == Some(payment_preimage)));
+
+	// Once we complete the `ChannelMonitorUpdate` on channel A, and the `ChannelManager` processes
+	// background events (via `get_and_clear_pending_msg_events`), the final `ChannelMonitorUpdate`
+	// will fly and we'll drop the preimage from channel B's `ChannelMonitor`. We'll also release
+	// the `Event::PaymentForwarded`.
+	check_added_monitors!(nodes[1], 0);
+	assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
+	assert!(nodes[1].node.get_and_clear_pending_events().is_empty());
+
+	nodes[1].chain_monitor.complete_sole_pending_chan_update(&chan_a.2);
+	assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
+	check_added_monitors!(nodes[1], 1);
+	assert!(!get_monitor!(nodes[1], chan_b.2).get_all_current_outbound_htlcs().iter().any(|(_, (_, preimage))| *preimage == Some(payment_preimage)));
+	expect_payment_forwarded!(nodes[1], nodes[0], nodes[2], None, true, false);
+}
+
+#[test]
+fn test_claim_to_closed_channel_blocks_claimed_event() {
+	// One of the last features for async persistence we implemented was the correct blocking of
+	// event(s) until the preimage for a claimed HTLC is durably on disk in a ChannelMonitor for a
+	// closed channel.
+	// This tests that behavior.
+	let chanmon_cfgs = create_chanmon_cfgs(2);
+	let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
+	let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
+	let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
+
+	// First open channels, route a payment, and force-close the first hop.
+	let chan_a = create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 1_000_000, 500_000_000);
+
+	let (payment_preimage, payment_hash, ..) = route_payment(&nodes[0], &[&nodes[1]], 1_000_000);
+
+	nodes[0].node.force_close_broadcasting_latest_txn(&chan_a.2, &nodes[1].node.get_our_node_id(), String::new()).unwrap();
+	check_added_monitors!(nodes[0], 1);
+	let a_reason = ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true) };
+	check_closed_event!(nodes[0], 1, a_reason, [nodes[1].node.get_our_node_id()], 1000000);
+	check_closed_broadcast!(nodes[0], true);
+
+	let as_commit_tx = nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
+	assert_eq!(as_commit_tx.len(), 1);
+
+	mine_transaction(&nodes[1], &as_commit_tx[0]);
+	check_added_monitors!(nodes[1], 1);
+	check_closed_event!(nodes[1], 1, ClosureReason::CommitmentTxConfirmed, [nodes[0].node.get_our_node_id()], 1000000);
+	check_closed_broadcast!(nodes[1], true);
+
+	// Now that B has a pending payment with the inbound HTLC on a closed channel, claim the
+	// payment on disk, but don't let the `ChannelMonitorUpdate` complete. This should prevent the
+	// `Event::PaymentClaimed` from being generated.
+	chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
+	nodes[1].node.claim_funds(payment_preimage);
+	check_added_monitors!(nodes[1], 1);
+	assert!(nodes[1].node.get_and_clear_pending_events().is_empty());
+
+	// Once we complete the `ChannelMonitorUpdate` the `Event::PaymentClaimed` will become
+	// available.
+	nodes[1].chain_monitor.complete_sole_pending_chan_update(&chan_a.2);
+	expect_payment_claimed!(nodes[1], payment_hash, 1_000_000);
+}
diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs
@@ -3922,39 +3922,6 @@ where
 		self.close_channel_internal(channel_id, counterparty_node_id, target_feerate_sats_per_1000_weight, shutdown_script)
 	}
 
-	fn set_closed_chan_next_monitor_update_id(
-		peer_state: &mut PeerState<SP>, channel_id: ChannelId, monitor_update: &mut ChannelMonitorUpdate,
-	) {
-		match peer_state.closed_channel_monitor_update_ids.entry(channel_id) {
-			btree_map::Entry::Vacant(entry) => {
-				let is_closing_unupdated_monitor = monitor_update.update_id == 1
-					&& monitor_update.updates.len() == 1
-					&& matches!(&monitor_update.updates[0], ChannelMonitorUpdateStep::ChannelForceClosed { .. });
-				// If the ChannelMonitorUpdate is closing a channel that never got past initial
-				// funding (to have any commitment updates), we'll skip inserting in
-				// `locked_close_channel`, allowing us to avoid keeping around the PeerState for
-				// that peer. In that specific case we expect no entry in the map here. In any
-				// other cases, this is a bug, but in production we go ahead and recover by
-				// inserting the update_id and hoping its right.
-				debug_assert!(is_closing_unupdated_monitor, "Expected closing monitor against an unused channel, got {:?}", monitor_update);
-				if !is_closing_unupdated_monitor {
-					entry.insert(monitor_update.update_id);
-				}
-			},
-			btree_map::Entry::Occupied(entry) => {
-				// If we're running in a threaded environment its possible we generate updates for
-				// a channel that is closing, then apply some preimage update, then go back and
-				// apply the close monitor update here. In order to ensure the updates are still
-				// well-ordered, we have to use the `closed_channel_monitor_update_ids` map to
-				// override the `update_id`, taking care to handle old monitors where the
-				// `latest_update_id` is already `u64::MAX`.
-				let latest_update_id = entry.into_mut();
-				*latest_update_id = latest_update_id.saturating_add(1);
-				monitor_update.update_id = *latest_update_id;
-			}
-		}
-	}
-
 	/// Applies a [`ChannelMonitorUpdate`] which may or may not be for a channel which is closed.
 	#[must_use]
 	fn apply_post_close_monitor_update(
@@ -7220,38 +7187,53 @@ where
 		let mut peer_state = peer_state_opt.expect("peer_state_opt is always Some when the counterparty_node_id is Some");
 
 		let mut preimage_update = ChannelMonitorUpdate {
-			update_id: 0, // set in set_closed_chan_next_monitor_update_id
-			counterparty_node_id: prev_hop.counterparty_node_id,
+			update_id: 0, // set below
+			counterparty_node_id: Some(counterparty_node_id),
 			updates: vec![ChannelMonitorUpdateStep::PaymentPreimage {
 				payment_preimage,
 				payment_info,
 			}],
 			channel_id: Some(prev_hop.channel_id),
 		};
 
-		// Note that the below is race-y - we set the `update_id` here and then drop the peer_state
-		// lock before applying the update in `apply_post_close_monitor_update` (or via the
-		// background events pipeline). During that time, some other update could be created and
-		// then applied, resultin in `ChannelMonitorUpdate`s being applied out of order and causing
-		// a panic.
-		Self::set_closed_chan_next_monitor_update_id(&mut *peer_state, prev_hop.channel_id, &mut preimage_update);
+		if let Some(latest_update_id) = peer_state.closed_channel_monitor_update_ids.get_mut(&chan_id) {
+			*latest_update_id = latest_update_id.saturating_add(1);
+			preimage_update.update_id = *latest_update_id;
+		} else {
+			let err = "We need the latest ChannelMonitorUpdate ID to build a new update.
+This should have been checked for availability on startup but somehow it is no longer available.
+This indicates a bug inside LDK. Please report this error at https://github.com/lightningdevkit/rust-lightning/issues/new";
+			log_error!(self.logger, "{}", err);
+			panic!("{}", err);
+		}
 
-		mem::drop(peer_state);
-		mem::drop(per_peer_state);
+		// Note that we do process the completion action here. This totally could be a
+		// duplicate claim, but we have no way of knowing without interrogating the
+		// `ChannelMonitor` we've provided the above update to. Instead, note that `Event`s are
+		// generally always allowed to be duplicative (and it's specifically noted in
+		// `PaymentForwarded`).
+		let (action_opt, raa_blocker_opt) = completion_action(None, false);
+
+		if let Some(raa_blocker) = raa_blocker_opt {
+			peer_state.actions_blocking_raa_monitor_updates
+				.entry(prev_hop.channel_id)
+				.or_default()
+				.push(raa_blocker);
+		}
+
+		// Given the fact that we're in a bit of a weird edge case, its worth hashing the preimage
+		// to include the `payment_hash` in the log metadata here.
+		let payment_hash = payment_preimage.into();
+		let logger = WithContext::from(&self.logger, Some(counterparty_node_id), Some(chan_id), Some(payment_hash));
 
 		if !during_init {
-			// We update the ChannelMonitor on the backward link, after
-			// receiving an `update_fulfill_htlc` from the forward link.
-			let update_res = self.apply_post_close_monitor_update(counterparty_node_id, prev_hop.channel_id, prev_hop.funding_txo, preimage_update);
-			if update_res != ChannelMonitorUpdateStatus::Completed {
-				// TODO: This needs to be handled somehow - if we receive a monitor update
-				// with a preimage we *must* somehow manage to propagate it to the upstream
-				// channel, or we must have an ability to receive the same event and try
-				// again on restart.
-				log_error!(WithContext::from(&self.logger, None, Some(prev_hop.channel_id), None),
-					"Critical error: failed to update channel monitor with preimage {:?}: {:?}",
-					payment_preimage, update_res);
+			if let Some(action) = action_opt {
+				log_trace!(logger, "Tracking monitor update completion action for closed channel {}: {:?}",
+					chan_id, action);
+				peer_state.monitor_update_blocked_actions.entry(chan_id).or_insert(Vec::new()).push(action);
 			}
+
+			handle_new_monitor_update!(self, prev_hop.funding_txo, preimage_update, peer_state, peer_state, per_peer_state, logger, chan_id, POST_CHANNEL_CLOSE);
 		} else {
 			// If we're running during init we cannot update a monitor directly - they probably
 			// haven't actually been loaded yet. Instead, push the monitor update as a background
@@ -7265,39 +7247,12 @@ where
 				update: preimage_update,
 			};
 			self.pending_background_events.lock().unwrap().push(event);
-		}
 
-		// Note that we do process the completion action here. This totally could be a
-		// duplicate claim, but we have no way of knowing without interrogating the
-		// `ChannelMonitor` we've provided the above update to. Instead, note that `Event`s are
-		// generally always allowed to be duplicative (and it's specifically noted in
-		// `PaymentForwarded`).
-		let (action_opt, raa_blocker_opt) = completion_action(None, false);
-
-		if let Some(raa_blocker) = raa_blocker_opt {
-			// TODO: Avoid always blocking the world for the write lock here.
-			let mut per_peer_state = self.per_peer_state.write().unwrap();
-			let peer_state_mutex = per_peer_state.entry(counterparty_node_id).or_insert_with(||
-				Mutex::new(PeerState {
-					channel_by_id: new_hash_map(),
-					inbound_channel_request_by_id: new_hash_map(),
-					latest_features: InitFeatures::empty(),
-					pending_msg_events: Vec::new(),
-					in_flight_monitor_updates: BTreeMap::new(),
-					monitor_update_blocked_actions: BTreeMap::new(),
-					actions_blocking_raa_monitor_updates: BTreeMap::new(),
-					closed_channel_monitor_update_ids: BTreeMap::new(),
-					is_connected: false,
-				}));
-			let mut peer_state = peer_state_mutex.lock().unwrap();
+			mem::drop(peer_state);
+			mem::drop(per_peer_state);
 
-			peer_state.actions_blocking_raa_monitor_updates
-				.entry(prev_hop.channel_id)
-				.or_default()
-				.push(raa_blocker);
+			self.handle_monitor_update_completion_actions(action_opt);
 		}
-
-		self.handle_monitor_update_completion_actions(action_opt);
 	}
 
 	fn finalize_claims(&self, sources: Vec<HTLCSource>) {