Skip to content

Commit eb5492f

Browse files
committed
Handle mon update completion actions even with update(s) is blocked
If we complete a `ChannelMonitorUpdate` persistence but there are blocked `ChannelMonitorUpdate`s in the channel, we'll skip all the post-monitor-update logic entirely. While its correct that we can't resume the channel (as it expected the monitor updates it generated to complete, even if they ended up blocked), the post-update actions are a `channelmanager.rs` concept - they cannot be tied to blocked updates because `channelmanager.rs` doesn't even see blocked updates. This can lead to a channel getting stuck waiting on itself. In a production environment, an LDK user saw a case where: (a) an MPP payment was received over several channels, let's call them A + B. (b) channel B got into `AwaitingRAA` due to unrelated operations, (c) the MPP payment was claimed, with async monitor updating, (d) the `revoke_and_ack` we were waiting on was delivered, but the resulting `ChannelMonitorUpdate` was blocked due to the pending claim having inserted an RAA-blocking action, (e) the preimage `ChannelMonitorUpdate` generated for channel B completed persistence, which did nothing due to the blocked `ChannelMonitorUpdate`. (f) the `Event::PaymentClaimed` event was handled but it, too, failed to unblock the channel. Instead, here, we simply process post-update actions when an update completes, even if there are pending blocked updates. We do not fully unblock the channel, of course.
1 parent 6d9c676 commit eb5492f

File tree

3 files changed

+271
-135
lines changed

3 files changed

+271
-135
lines changed

lightning/src/ln/chanmon_update_fail_tests.rs

Lines changed: 113 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,13 @@ use crate::chain::transaction::OutPoint;
1919
use crate::chain::{ChannelMonitorUpdateStatus, Listen, Watch};
2020
use crate::events::{ClosureReason, Event, HTLCHandlingFailureType, PaymentPurpose};
2121
use crate::ln::channel::AnnouncementSigsState;
22-
use crate::ln::channelmanager::{PaymentId, RAACommitmentOrder, RecipientOnionFields};
22+
use crate::ln::channelmanager::{PaymentId, RAACommitmentOrder, RecipientOnionFields, Retry};
2323
use crate::ln::msgs;
2424
use crate::ln::msgs::{
2525
BaseMessageHandler, ChannelMessageHandler, MessageSendEvent, RoutingMessageHandler,
2626
};
2727
use crate::ln::types::ChannelId;
28+
use crate::routing::router::{PaymentParameters, RouteParameters};
2829
use crate::sign::NodeSigner;
2930
use crate::util::native_async::FutureQueue;
3031
use crate::util::persist::{
@@ -3535,7 +3536,7 @@ fn do_test_blocked_chan_preimage_release(completion_mode: BlockedUpdateComplMode
35353536
assert!(a.is_none());
35363537

35373538
nodes[1].node.handle_revoke_and_ack(node_a_id, &raa);
3538-
check_added_monitors(&nodes[1], 0);
3539+
check_added_monitors(&nodes[1], 1);
35393540
assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
35403541
}
35413542

@@ -3554,8 +3555,8 @@ fn do_test_blocked_chan_preimage_release(completion_mode: BlockedUpdateComplMode
35543555
panic!();
35553556
}
35563557

3557-
// The event processing should release the last RAA updates on both channels.
3558-
check_added_monitors(&nodes[1], 2);
3558+
// The event processing should release the last RAA update.
3559+
check_added_monitors(&nodes[1], 1);
35593560

35603561
// When we fetch the next update the message getter will generate the next update for nodes[2],
35613562
// generating a further monitor update.
@@ -5055,3 +5056,111 @@ fn native_async_persist() {
50555056
panic!();
50565057
}
50575058
}
5059+
5060+
#[test]
5061+
fn test_mpp_claim_to_holding_cell() {
5062+
// Previously, if an MPP payment was claimed while one channel was AwaitingRAA (causing the
5063+
// HTLC claim to go into the holding cell), and the RAA came in before the async monitor
5064+
// update with the preimage completed, the channel could hang waiting on itself.
5065+
// This tests that behavior.
5066+
let chanmon_cfgs = create_chanmon_cfgs(4);
5067+
let node_cfgs = create_node_cfgs(4, &chanmon_cfgs);
5068+
let node_chanmgrs = create_node_chanmgrs(4, &node_cfgs, &[None, None, None, None]);
5069+
let nodes = create_network(4, &node_cfgs, &node_chanmgrs);
5070+
5071+
let node_b_id = nodes[1].node.get_our_node_id();
5072+
let node_c_id = nodes[2].node.get_our_node_id();
5073+
let node_d_id = nodes[3].node.get_our_node_id();
5074+
5075+
// First open channels in a diamond and deliver the MPP payment.
5076+
let chan_1_scid = create_announced_chan_between_nodes(&nodes, 0, 1).0.contents.short_channel_id;
5077+
let chan_2_scid = create_announced_chan_between_nodes(&nodes, 0, 2).0.contents.short_channel_id;
5078+
let (chan_3_update, _, chan_3_id, ..) = create_announced_chan_between_nodes(&nodes, 1, 3);
5079+
let chan_3_scid = chan_3_update.contents.short_channel_id;
5080+
let (chan_4_update, _, chan_4_id, ..) = create_announced_chan_between_nodes(&nodes, 2, 3);
5081+
let chan_4_scid = chan_4_update.contents.short_channel_id;
5082+
5083+
let (mut route, paymnt_hash_1, preimage_1, payment_secret) =
5084+
get_route_and_payment_hash!(&nodes[0], nodes[3], 500_000);
5085+
let path = route.paths[0].clone();
5086+
route.paths.push(path);
5087+
route.paths[0].hops[0].pubkey = node_b_id;
5088+
route.paths[0].hops[0].short_channel_id = chan_1_scid;
5089+
route.paths[0].hops[1].short_channel_id = chan_3_scid;
5090+
route.paths[0].hops[1].fee_msat = 250_000;
5091+
route.paths[1].hops[0].pubkey = node_c_id;
5092+
route.paths[1].hops[0].short_channel_id = chan_2_scid;
5093+
route.paths[1].hops[1].short_channel_id = chan_4_scid;
5094+
route.paths[1].hops[1].fee_msat = 250_000;
5095+
let paths = &[&[&nodes[1], &nodes[3]][..], &[&nodes[2], &nodes[3]][..]];
5096+
send_along_route_with_secret(&nodes[0], route, paths, 500_000, paymnt_hash_1, payment_secret);
5097+
5098+
// Put the C <-> D channel into AwaitingRaa
5099+
let (preimage_2, paymnt_hash_2, payment_secret_2) = get_payment_preimage_hash!(nodes[3]);
5100+
let onion = RecipientOnionFields::secret_only(payment_secret_2);
5101+
let id = PaymentId([42; 32]);
5102+
let pay_params = PaymentParameters::from_node_id(node_d_id, TEST_FINAL_CLTV);
5103+
let route_params = RouteParameters::from_payment_params_and_value(pay_params, 400_000);
5104+
nodes[2].node.send_payment(paymnt_hash_2, onion, id, route_params, Retry::Attempts(0)).unwrap();
5105+
check_added_monitors(&nodes[2], 1);
5106+
5107+
let mut payment_event = SendEvent::from_node(&nodes[2]);
5108+
nodes[3].node.handle_update_add_htlc(node_c_id, &payment_event.msgs[0]);
5109+
nodes[3].node.handle_commitment_signed_batch_test(node_c_id, &payment_event.commitment_msg);
5110+
check_added_monitors(&nodes[3], 1);
5111+
5112+
let (raa, cs) = get_revoke_commit_msgs(&nodes[3], &node_c_id);
5113+
nodes[2].node.handle_revoke_and_ack(node_d_id, &raa);
5114+
check_added_monitors(&nodes[2], 1);
5115+
5116+
nodes[2].node.handle_commitment_signed_batch_test(node_d_id, &cs);
5117+
check_added_monitors(&nodes[2], 1);
5118+
5119+
let cs_raa = get_event_msg!(nodes[2], MessageSendEvent::SendRevokeAndACK, node_d_id);
5120+
5121+
// Now claim the payment, completing both channel monitor updates async
5122+
// In the current code, the C <-> D channel happens to be the `durable_preimage_channel`,
5123+
// improving coverage somewhat but it isn't strictly critical to the test.
5124+
chanmon_cfgs[3].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
5125+
chanmon_cfgs[3].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
5126+
nodes[3].node.claim_funds(preimage_1);
5127+
check_added_monitors(&nodes[3], 2);
5128+
5129+
// Complete the B <-> D monitor update, freeing the first fulfill.
5130+
let (latest_id, _) = get_latest_mon_update_id(&nodes[3], chan_3_id);
5131+
nodes[3].chain_monitor.chain_monitor.channel_monitor_updated(chan_3_id, latest_id).unwrap();
5132+
let mut b_claim = get_htlc_update_msgs(&nodes[3], &node_b_id);
5133+
5134+
// When we deliver the pre-claim RAA, node D will shove the monitor update into the blocked
5135+
// state since we have a pending MPP payment which is blocking RAA monitor updates.
5136+
nodes[3].node.handle_revoke_and_ack(node_c_id, &cs_raa);
5137+
check_added_monitors(&nodes[3], 0);
5138+
5139+
// Finally, complete the C <-> D monitor update. Previously, this unlock failed to be processed
5140+
// due to the existence of the blocked RAA update above.
5141+
let (latest_id, _) = get_latest_mon_update_id(&nodes[3], chan_4_id);
5142+
nodes[3].chain_monitor.chain_monitor.channel_monitor_updated(chan_4_id, latest_id).unwrap();
5143+
// Once we process monitor events (in this case by checking for the `PaymentClaimed` event, the
5144+
// RAA monitor update blocked above will be released.
5145+
expect_payment_claimed!(nodes[3], paymnt_hash_1, 500_000);
5146+
check_added_monitors(&nodes[3], 1);
5147+
5148+
// After the RAA monitor update completes, the C <-> D channel will be able to generate its
5149+
// fulfill updates as well.
5150+
let mut c_claim = get_htlc_update_msgs(&nodes[3], &node_c_id);
5151+
check_added_monitors(&nodes[3], 1);
5152+
5153+
// Finally, clear all the pending payments.
5154+
let path = [&[&nodes[1], &nodes[3]][..], &[&nodes[2], &nodes[3]][..]];
5155+
let mut args = ClaimAlongRouteArgs::new(&nodes[0], &path[..], preimage_1);
5156+
let b_claim_msgs = (b_claim.update_fulfill_htlcs.pop().unwrap(), b_claim.commitment_signed);
5157+
let c_claim_msgs = (c_claim.update_fulfill_htlcs.pop().unwrap(), c_claim.commitment_signed);
5158+
let claims = vec![(b_claim_msgs, node_b_id), (c_claim_msgs, node_c_id)];
5159+
pass_claimed_payment_along_route_from_ev(250_000, claims, args);
5160+
5161+
expect_payment_sent(&nodes[0], preimage_1, None, true, true);
5162+
5163+
expect_and_process_pending_htlcs(&nodes[3], false);
5164+
expect_payment_claimable!(nodes[3], paymnt_hash_2, payment_secret_2, 400_000);
5165+
claim_payment(&nodes[2], &[&nodes[3]], preimage_2);
5166+
}

lightning/src/ln/channelmanager.rs

Lines changed: 85 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1296,6 +1296,10 @@ impl MaybeReadable for EventUnblockedChannel {
12961296
}
12971297

12981298
#[derive(Debug)]
1299+
/// Note that these run after all *non-blocked* [`ChannelMonitorUpdate`]s have been persisted.
1300+
/// Thus, they're primarily useful for (and currently only used for) claims, where the
1301+
/// [`ChannelMonitorUpdate`] we care about is a preimage update, which bypass the monitor update
1302+
/// blocking logic entirely and can never be blocked.
12991303
pub(crate) enum MonitorUpdateCompletionAction {
13001304
/// Indicates that a payment ultimately destined for us was claimed and we should emit an
13011305
/// [`events::Event::PaymentClaimed`] to the user if we haven't yet generated such an event for
@@ -1580,6 +1584,11 @@ where
15801584
/// same `temporary_channel_id` (or final `channel_id` in the case of 0conf channels or prior
15811585
/// to funding appearing on-chain), the downstream `ChannelMonitor` set is required to ensure
15821586
/// duplicates do not occur, so such channels should fail without a monitor update completing.
1587+
///
1588+
/// Note that these run after all *non-blocked* [`ChannelMonitorUpdate`]s have been persisted.
1589+
/// Thus, they're primarily useful for (and currently only used for) claims, where the
1590+
/// [`ChannelMonitorUpdate`] we care about is a preimage update, which bypass the monitor
1591+
/// update blocking logic entirely and can never be blocked.
15831592
monitor_update_blocked_actions: BTreeMap<ChannelId, Vec<MonitorUpdateCompletionAction>>,
15841593
/// If another channel's [`ChannelMonitorUpdate`] needs to complete before a channel we have
15851594
/// with this peer can complete an RAA [`ChannelMonitorUpdate`] (e.g. because the RAA update
@@ -3353,78 +3362,90 @@ macro_rules! handle_monitor_update_completion {
33533362
let chan_id = $chan.context.channel_id();
33543363
let outbound_alias = $chan.context().outbound_scid_alias();
33553364
let cp_node_id = $chan.context.get_counterparty_node_id();
3365+
33563366
#[cfg(debug_assertions)]
33573367
{
33583368
let in_flight_updates = $peer_state.in_flight_monitor_updates.get(&chan_id);
33593369
assert!(in_flight_updates.map(|(_, updates)| updates.is_empty()).unwrap_or(true));
3360-
assert_eq!($chan.blocked_monitor_updates_pending(), 0);
3370+
assert!($chan.is_awaiting_monitor_update());
33613371
}
3372+
33623373
let logger = WithChannelContext::from(&$self.logger, &$chan.context, None);
3363-
let updates = $chan.monitor_updating_restored(
3364-
&&logger,
3365-
&$self.node_signer,
3366-
$self.chain_hash,
3367-
&*$self.config.read().unwrap(),
3368-
$self.best_block.read().unwrap().height,
3369-
|htlc_id| {
3370-
$self.path_for_release_held_htlc(htlc_id, outbound_alias, &chan_id, &cp_node_id)
3371-
},
3372-
);
3373-
let channel_update = if updates.channel_ready.is_some()
3374-
&& $chan.context.is_usable()
3375-
&& $peer_state.is_connected
3376-
{
3377-
// We only send a channel_update in the case where we are just now sending a
3378-
// channel_ready and the channel is in a usable state. We may re-send a
3379-
// channel_update later through the announcement_signatures process for public
3380-
// channels, but there's no reason not to just inform our counterparty of our fees
3381-
// now.
3382-
if let Ok((msg, _, _)) = $self.get_channel_update_for_unicast($chan) {
3383-
Some(MessageSendEvent::SendChannelUpdate { node_id: cp_node_id, msg })
3384-
} else {
3385-
None
3386-
}
3387-
} else {
3388-
None
3389-
};
33903374

33913375
let update_actions =
33923376
$peer_state.monitor_update_blocked_actions.remove(&chan_id).unwrap_or(Vec::new());
33933377

3394-
let (htlc_forwards, decode_update_add_htlcs) = $self.handle_channel_resumption(
3395-
&mut $peer_state.pending_msg_events,
3396-
$chan,
3397-
updates.raa,
3398-
updates.commitment_update,
3399-
updates.commitment_order,
3400-
updates.accepted_htlcs,
3401-
updates.pending_update_adds,
3402-
updates.funding_broadcastable,
3403-
updates.channel_ready,
3404-
updates.announcement_sigs,
3405-
updates.tx_signatures,
3406-
None,
3407-
updates.channel_ready_order,
3408-
);
3409-
if let Some(upd) = channel_update {
3410-
$peer_state.pending_msg_events.push(upd);
3411-
}
3412-
3413-
let unbroadcasted_batch_funding_txid =
3414-
$chan.context.unbroadcasted_batch_funding_txid(&$chan.funding);
3415-
core::mem::drop($peer_state_lock);
3416-
core::mem::drop($per_peer_state_lock);
3417-
3418-
$self.post_monitor_update_unlock(
3419-
chan_id,
3420-
cp_node_id,
3421-
unbroadcasted_batch_funding_txid,
3422-
update_actions,
3423-
htlc_forwards,
3424-
decode_update_add_htlcs,
3425-
updates.finalized_claimed_htlcs,
3426-
updates.failed_htlcs,
3427-
);
3378+
if $chan.blocked_monitor_updates_pending() != 0 {
3379+
mem::drop($peer_state_lock);
3380+
mem::drop($per_peer_state_lock);
3381+
3382+
log_debug!(logger, "Channel has blocked monitor updates, completing update actions but leaving channel blocked");
3383+
$self.handle_monitor_update_completion_actions(update_actions);
3384+
} else {
3385+
log_debug!(logger, "Channel is open and awaiting update, resuming it");
3386+
let updates = $chan.monitor_updating_restored(
3387+
&&logger,
3388+
&$self.node_signer,
3389+
$self.chain_hash,
3390+
&*$self.config.read().unwrap(),
3391+
$self.best_block.read().unwrap().height,
3392+
|htlc_id| {
3393+
$self.path_for_release_held_htlc(htlc_id, outbound_alias, &chan_id, &cp_node_id)
3394+
},
3395+
);
3396+
let channel_update = if updates.channel_ready.is_some()
3397+
&& $chan.context.is_usable()
3398+
&& $peer_state.is_connected
3399+
{
3400+
// We only send a channel_update in the case where we are just now sending a
3401+
// channel_ready and the channel is in a usable state. We may re-send a
3402+
// channel_update later through the announcement_signatures process for public
3403+
// channels, but there's no reason not to just inform our counterparty of our fees
3404+
// now.
3405+
if let Ok((msg, _, _)) = $self.get_channel_update_for_unicast($chan) {
3406+
Some(MessageSendEvent::SendChannelUpdate { node_id: cp_node_id, msg })
3407+
} else {
3408+
None
3409+
}
3410+
} else {
3411+
None
3412+
};
3413+
3414+
let (htlc_forwards, decode_update_add_htlcs) = $self.handle_channel_resumption(
3415+
&mut $peer_state.pending_msg_events,
3416+
$chan,
3417+
updates.raa,
3418+
updates.commitment_update,
3419+
updates.commitment_order,
3420+
updates.accepted_htlcs,
3421+
updates.pending_update_adds,
3422+
updates.funding_broadcastable,
3423+
updates.channel_ready,
3424+
updates.announcement_sigs,
3425+
updates.tx_signatures,
3426+
None,
3427+
updates.channel_ready_order,
3428+
);
3429+
if let Some(upd) = channel_update {
3430+
$peer_state.pending_msg_events.push(upd);
3431+
}
3432+
3433+
let unbroadcasted_batch_funding_txid =
3434+
$chan.context.unbroadcasted_batch_funding_txid(&$chan.funding);
3435+
core::mem::drop($peer_state_lock);
3436+
core::mem::drop($per_peer_state_lock);
3437+
3438+
$self.post_monitor_update_unlock(
3439+
chan_id,
3440+
cp_node_id,
3441+
unbroadcasted_batch_funding_txid,
3442+
update_actions,
3443+
htlc_forwards,
3444+
decode_update_add_htlcs,
3445+
updates.finalized_claimed_htlcs,
3446+
updates.failed_htlcs,
3447+
);
3448+
}
34283449
}};
34293450
}
34303451

@@ -3595,7 +3616,7 @@ macro_rules! handle_new_monitor_update {
35953616
$update,
35963617
WithChannelContext::from(&$self.logger, &$chan.context, None),
35973618
);
3598-
if all_updates_complete && $chan.blocked_monitor_updates_pending() == 0 {
3619+
if all_updates_complete {
35993620
handle_monitor_update_completion!(
36003621
$self,
36013622
$peer_state_lock,
@@ -9813,12 +9834,7 @@ This indicates a bug inside LDK. Please report this error at https://github.com/
98139834
.and_then(Channel::as_funded_mut)
98149835
{
98159836
if chan.is_awaiting_monitor_update() {
9816-
if chan.blocked_monitor_updates_pending() == 0 {
9817-
log_trace!(logger, "Channel is open and awaiting update, resuming it");
9818-
handle_monitor_update_completion!(self, peer_state_lock, peer_state, per_peer_state, chan);
9819-
} else {
9820-
log_trace!(logger, "Channel is open and awaiting update, leaving it blocked due to a blocked monitor update");
9821-
}
9837+
handle_monitor_update_completion!(self, peer_state_lock, peer_state, per_peer_state, chan);
98229838
} else {
98239839
log_trace!(logger, "Channel is open but not awaiting update");
98249840
}

0 commit comments

Comments
 (0)