Skip to content

Commit 63a5e03

Browse files
authored
Merge pull request #3790 from TheBlueMatt/2025-05-0.1-upgrade-fail
Read `ChannelManager` even if we have no-peer post-update actions
2 parents 02b5564 + 70b5552 commit 63a5e03

File tree

3 files changed

+183
-4
lines changed

3 files changed

+183
-4
lines changed

lightning-tests/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ lightning-invoice = { path = "../lightning-invoice", default-features = false }
1515
lightning-macros = { path = "../lightning-macros" }
1616
lightning = { path = "../lightning", features = ["_test_utils"] }
1717
lightning_0_1 = { package = "lightning", version = "0.1.1", features = ["_test_utils"] }
18+
lightning_0_0_125 = { package = "lightning", version = "0.0.125", features = ["_test_utils"] }
1819

1920
bitcoin = { version = "0.32.2", default-features = false }
2021

lightning-tests/src/upgrade_downgrade_tests.rs

Lines changed: 151 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,21 @@
1212
1313
use lightning_0_1::get_monitor as get_monitor_0_1;
1414
use lightning_0_1::ln::functional_test_utils as lightning_0_1_utils;
15-
use lightning_0_1::util::ser::Writeable;
15+
use lightning_0_1::util::ser::Writeable as _;
16+
17+
use lightning_0_0_125::chain::ChannelMonitorUpdateStatus as ChannelMonitorUpdateStatus_0_0_125;
18+
use lightning_0_0_125::check_added_monitors as check_added_monitors_0_0_125;
19+
use lightning_0_0_125::events::ClosureReason as ClosureReason_0_0_125;
20+
use lightning_0_0_125::expect_payment_claimed as expect_payment_claimed_0_0_125;
21+
use lightning_0_0_125::get_htlc_update_msgs as get_htlc_update_msgs_0_0_125;
22+
use lightning_0_0_125::get_monitor as get_monitor_0_0_125;
23+
use lightning_0_0_125::get_revoke_commit_msgs as get_revoke_commit_msgs_0_0_125;
24+
use lightning_0_0_125::ln::channelmanager::PaymentId as PaymentId_0_0_125;
25+
use lightning_0_0_125::ln::channelmanager::RecipientOnionFields as RecipientOnionFields_0_0_125;
26+
use lightning_0_0_125::ln::functional_test_utils as lightning_0_0_125_utils;
27+
use lightning_0_0_125::ln::msgs::ChannelMessageHandler as _;
28+
use lightning_0_0_125::routing::router as router_0_0_125;
29+
use lightning_0_0_125::util::ser::Writeable as _;
1630

1731
use lightning::ln::functional_test_utils::*;
1832

@@ -63,3 +77,139 @@ fn simple_upgrade() {
6377

6478
claim_payment(&nodes[0], &[&nodes[1]], preimage);
6579
}
80+
81+
#[test]
82+
fn test_125_dangling_post_update_actions() {
83+
// Tests a failure of upgrading from 0.0.125 to 0.1 when there's a dangling
84+
// `MonitorUpdateCompletionAction` due to the bug fixed in
85+
// 93b4479e472e6767af5df90fecdcdfb79074e260.
86+
let (node_d_ser, mon_ser);
87+
{
88+
// First, we get RAA-source monitor updates held by using async persistence (note that this
89+
// issue was first identified as a consequence of the bug fixed in
90+
// 93b4479e472e6767af5df90fecdcdfb79074e260 but in order to replicate that bug we need a
91+
// complicated multi-threaded race that is not deterministic, thus we "cheat" here by using
92+
// async persistence). We do this by simply claiming an MPP payment and not completing the
93+
// second channel's `ChannelMonitorUpdate`, blocking RAA `ChannelMonitorUpdate`s from the
94+
// first (which is ultimately a very similar bug to the one fixed in 93b4479e472e6767af5df).
95+
//
96+
// Then, we claim a second payment on the channel, which ultimately doesn't have its
97+
// `ChannelMonitorUpdate` completion handled due to the presence of the blocked
98+
// `ChannelMonitorUpdate`. The claim also generates a post-update completion action, but
99+
// the `ChannelMonitorUpdate` isn't queued due to the RAA-update block.
100+
let chanmon_cfgs = lightning_0_0_125_utils::create_chanmon_cfgs(4);
101+
let node_cfgs = lightning_0_0_125_utils::create_node_cfgs(4, &chanmon_cfgs);
102+
let node_chanmgrs =
103+
lightning_0_0_125_utils::create_node_chanmgrs(4, &node_cfgs, &[None, None, None, None]);
104+
let nodes = lightning_0_0_125_utils::create_network(4, &node_cfgs, &node_chanmgrs);
105+
106+
let node_b_id = nodes[1].node.get_our_node_id();
107+
let node_d_id = nodes[3].node.get_our_node_id();
108+
109+
lightning_0_0_125_utils::create_announced_chan_between_nodes_with_value(
110+
&nodes, 0, 1, 100_000, 0,
111+
);
112+
lightning_0_0_125_utils::create_announced_chan_between_nodes_with_value(
113+
&nodes, 0, 2, 100_000, 0,
114+
);
115+
let chan_id_1_3 = lightning_0_0_125_utils::create_announced_chan_between_nodes_with_value(
116+
&nodes, 1, 3, 100_000, 0,
117+
)
118+
.2;
119+
let chan_id_2_3 = lightning_0_0_125_utils::create_announced_chan_between_nodes_with_value(
120+
&nodes, 2, 3, 100_000, 0,
121+
)
122+
.2;
123+
124+
let (preimage, hash, secret) =
125+
lightning_0_0_125_utils::get_payment_preimage_hash(&nodes[3], Some(15_000_000), None);
126+
127+
let pay_params = router_0_0_125::PaymentParameters::from_node_id(
128+
node_d_id,
129+
lightning_0_0_125_utils::TEST_FINAL_CLTV,
130+
)
131+
.with_bolt11_features(nodes[3].node.bolt11_invoice_features())
132+
.unwrap();
133+
134+
let route_params =
135+
router_0_0_125::RouteParameters::from_payment_params_and_value(pay_params, 15_000_000);
136+
let route = lightning_0_0_125_utils::get_route(&nodes[0], &route_params).unwrap();
137+
138+
let onion = RecipientOnionFields_0_0_125::secret_only(secret);
139+
let id = PaymentId_0_0_125(hash.0);
140+
nodes[0].node.send_payment_with_route(route, hash, onion, id).unwrap();
141+
142+
check_added_monitors_0_0_125!(nodes[0], 2);
143+
let paths = &[&[&nodes[1], &nodes[3]][..], &[&nodes[2], &nodes[3]]];
144+
lightning_0_0_125_utils::pass_along_route(&nodes[0], paths, 15_000_000, hash, secret);
145+
146+
let preimage_2 = lightning_0_0_125_utils::route_payment(&nodes[1], &[&nodes[3]], 100_000).0;
147+
148+
chanmon_cfgs[3].persister.set_update_ret(ChannelMonitorUpdateStatus_0_0_125::InProgress);
149+
chanmon_cfgs[3].persister.set_update_ret(ChannelMonitorUpdateStatus_0_0_125::InProgress);
150+
nodes[3].node.claim_funds(preimage);
151+
check_added_monitors_0_0_125!(nodes[3], 2);
152+
153+
let (outpoint, update_id, _) = {
154+
let latest_monitors = nodes[3].chain_monitor.latest_monitor_update_id.lock().unwrap();
155+
latest_monitors.get(&chan_id_1_3).unwrap().clone()
156+
};
157+
nodes[3].chain_monitor.chain_monitor.channel_monitor_updated(outpoint, update_id).unwrap();
158+
expect_payment_claimed_0_0_125!(nodes[3], hash, 15_000_000);
159+
160+
let ds_fulfill = get_htlc_update_msgs_0_0_125!(nodes[3], node_b_id);
161+
// Due to an unrelated test bug in 0.0.125, we have to leave the `ChannelMonitorUpdate` for
162+
// the previous node un-completed or we will panic when dropping the `Node`.
163+
chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus_0_0_125::InProgress);
164+
nodes[1].node.handle_update_fulfill_htlc(&node_d_id, &ds_fulfill.update_fulfill_htlcs[0]);
165+
check_added_monitors_0_0_125!(nodes[1], 1);
166+
167+
nodes[1].node.handle_commitment_signed(&node_d_id, &ds_fulfill.commitment_signed);
168+
check_added_monitors_0_0_125!(nodes[1], 1);
169+
170+
// The `ChannelMonitorUpdate` generated by the RAA from node B to node D will be blocked.
171+
let (bs_raa, _) = get_revoke_commit_msgs_0_0_125!(nodes[1], node_d_id);
172+
nodes[3].node.handle_revoke_and_ack(&node_b_id, &bs_raa);
173+
check_added_monitors_0_0_125!(nodes[3], 0);
174+
175+
// Now that there is a blocked update in the B <-> D channel, we can claim the second
176+
// payment across it, which, while it will generate a `ChannelMonitorUpdate`, will not
177+
// complete its post-update actions.
178+
nodes[3].node.claim_funds(preimage_2);
179+
check_added_monitors_0_0_125!(nodes[3], 1);
180+
181+
// Finally, we set up the failure by force-closing the channel in question, ensuring that
182+
// 0.1 will not create a per-peer state for node B.
183+
let err = "Force Closing Channel".to_owned();
184+
nodes[3].node.force_close_without_broadcasting_txn(&chan_id_1_3, &node_b_id, err).unwrap();
185+
let reason =
186+
ClosureReason_0_0_125::HolderForceClosed { broadcasted_latest_txn: Some(false) };
187+
let peers = &[node_b_id];
188+
lightning_0_0_125_utils::check_closed_event(&nodes[3], 1, reason, false, peers, 100_000);
189+
lightning_0_0_125_utils::check_closed_broadcast(&nodes[3], 1, true);
190+
check_added_monitors_0_0_125!(nodes[3], 1);
191+
192+
node_d_ser = nodes[3].node.encode();
193+
mon_ser = get_monitor_0_0_125!(nodes[3], chan_id_2_3).encode();
194+
}
195+
196+
// Create a dummy node to reload over with the 0.0.125 state
197+
198+
let mut chanmon_cfgs = create_chanmon_cfgs(4);
199+
200+
// Our TestChannelSigner will fail as we're jumping ahead, so disable its state-based checks
201+
chanmon_cfgs[0].keys_manager.disable_all_state_policy_checks = true;
202+
chanmon_cfgs[1].keys_manager.disable_all_state_policy_checks = true;
203+
chanmon_cfgs[2].keys_manager.disable_all_state_policy_checks = true;
204+
chanmon_cfgs[3].keys_manager.disable_all_state_policy_checks = true;
205+
206+
let node_cfgs = create_node_cfgs(4, &chanmon_cfgs);
207+
let (persister, chain_mon);
208+
let node_chanmgrs = create_node_chanmgrs(4, &node_cfgs, &[None, None, None, None]);
209+
let node;
210+
let mut nodes = create_network(4, &node_cfgs, &node_chanmgrs);
211+
212+
// Finally, reload the node in the latest LDK. This previously failed.
213+
let config = test_default_channel_config();
214+
reload_node!(nodes[3], config, &node_d_ser, &[&mon_ser], persister, chain_mon, node);
215+
}

lightning/src/ln/channelmanager.rs

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14177,10 +14177,18 @@ where
1417714177
$peer_state: expr, $logger: expr, $channel_info_log: expr
1417814178
) => { {
1417914179
let mut max_in_flight_update_id = 0;
14180+
let starting_len = $chan_in_flight_upds.len();
1418014181
$chan_in_flight_upds.retain(|upd| upd.update_id > $monitor.get_latest_update_id());
14182+
if $chan_in_flight_upds.len() < starting_len {
14183+
log_debug!(
14184+
$logger,
14185+
"{} ChannelMonitorUpdates completed after ChannelManager was last serialized",
14186+
starting_len - $chan_in_flight_upds.len()
14187+
);
14188+
}
1418114189
let funding_txo = $monitor.get_funding_txo();
1418214190
for update in $chan_in_flight_upds.iter() {
14183-
log_trace!($logger, "Replaying ChannelMonitorUpdate {} for {}channel {}",
14191+
log_debug!($logger, "Replaying ChannelMonitorUpdate {} for {}channel {}",
1418414192
update.update_id, $channel_info_log, &$monitor.channel_id());
1418514193
max_in_flight_update_id = cmp::max(max_in_flight_update_id, update.update_id);
1418614194
pending_background_events.push(
@@ -14744,11 +14752,31 @@ where
1474414752
debug_assert!(false, "Non-event-generating channel freeing should not appear in our queue");
1474514753
}
1474614754
}
14755+
// Note that we may have a post-update action for a channel that has no pending
14756+
// `ChannelMonitorUpdate`s, but unlike the no-peer-state case, it may simply be
14757+
// because we had a `ChannelMonitorUpdate` complete after the last time this
14758+
// `ChannelManager` was serialized. In that case, we'll run the post-update
14759+
// actions as soon as we get going.
1474714760
}
1474814761
peer_state.lock().unwrap().monitor_update_blocked_actions = monitor_update_blocked_actions;
1474914762
} else {
14750-
log_error!(WithContext::from(&args.logger, Some(node_id), None, None), "Got blocked actions without a per-peer-state for {}", node_id);
14751-
return Err(DecodeError::InvalidValue);
14763+
for actions in monitor_update_blocked_actions.values() {
14764+
for action in actions.iter() {
14765+
if matches!(action, MonitorUpdateCompletionAction::PaymentClaimed { .. }) {
14766+
// If there are no state for this channel but we have pending
14767+
// post-update actions, its possible that one was left over from pre-0.1
14768+
// payment claims where MPP claims led to a channel blocked on itself
14769+
// and later `ChannelMonitorUpdate`s didn't get their post-update
14770+
// actions run.
14771+
// This should only have happened for `PaymentClaimed` post-update actions,
14772+
// which we ignore here.
14773+
} else {
14774+
let logger = WithContext::from(&args.logger, Some(node_id), None, None);
14775+
log_error!(logger, "Got blocked actions {:?} without a per-peer-state for {}", monitor_update_blocked_actions, node_id);
14776+
return Err(DecodeError::InvalidValue);
14777+
}
14778+
}
14779+
}
1475214780
}
1475314781
}
1475414782

0 commit comments

Comments
 (0)