Skip to content

Commit e727dea

Browse files
committed
TQ: Support persisting state to ledger
Builds on #9296 This commit persists state to a ledger, following the pattern used in the bootstore. It's done this way because the `PersistentState` itself is contained in the sans-io layer, but we must save it in the async task layer. The sans-io layer shouldn't know how the state is persisted, just that it is, and so we recreate the ledger for every time we write it. A follow up will PR will deal with the early networking information saved by the bootstore, and will be very similar.
1 parent a505cda commit e727dea

File tree

5 files changed

+271
-7
lines changed

5 files changed

+271
-7
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

trust-quorum/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ gfss.workspace = true
2222
hex.workspace = true
2323
hkdf.workspace = true
2424
iddqd.workspace = true
25+
omicron-common.workspace = true
2526
omicron-uuid-kinds.workspace = true
2627
rand = { workspace = true, features = ["os_rng"] }
2728
secrecy.workspace = true

trust-quorum/src/ledgers.rs

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
// This Source Code Form is subject to the terms of the Mozilla Public
2+
// License, v. 2.0. If a copy of the MPL was not distributed with this
3+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4+
5+
//! Persistent storage for the trust quorum task
6+
//!
7+
//! We write two pieces of data to M.2 devices in production via
8+
//! [`omicron_common::ledger::Ledger`]:
9+
//!
10+
//! 1. [`trust_quorum_protocol::PersistentState`] for trust quorum state
11+
//! 2. A network config blob required for pre-rack-unlock configuration
12+
13+
use camino::Utf8PathBuf;
14+
use omicron_common::ledger::{Ledger, Ledgerable};
15+
use serde::{Deserialize, Serialize};
16+
use slog::{Logger, info};
17+
use trust_quorum_protocol::PersistentState;
18+
19+
/// A wrapper type around [`PersistentState`] for use as a [`Ledger`]
20+
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
21+
pub struct PersistentStateLedger {
22+
pub generation: u64,
23+
pub state: PersistentState,
24+
}
25+
26+
impl Ledgerable for PersistentStateLedger {
27+
fn is_newer_than(&self, other: &Self) -> bool {
28+
self.generation > other.generation
29+
}
30+
31+
fn generation_bump(&mut self) {
32+
self.generation += 1;
33+
}
34+
}
35+
36+
impl PersistentStateLedger {
37+
/// Save the persistent state to a ledger and return the new generation
38+
/// number.
39+
///
40+
/// Panics if the ledger cannot be saved.
41+
pub async fn save(
42+
log: &Logger,
43+
paths: Vec<Utf8PathBuf>,
44+
generation: u64,
45+
state: PersistentState,
46+
) -> u64 {
47+
let persistent_state = PersistentStateLedger { generation, state };
48+
let mut ledger = Ledger::new_with(log, paths, persistent_state);
49+
ledger
50+
.commit()
51+
.await
52+
.expect("Critical: Failed to save bootstore ledger for Fsm::State");
53+
ledger.data().generation
54+
}
55+
56+
/// Return Some(`PersistentStateLedger`) if it exists on disk, otherwise
57+
/// return `None`.
58+
pub async fn load(
59+
log: &Logger,
60+
paths: Vec<Utf8PathBuf>,
61+
) -> Option<PersistentStateLedger> {
62+
let Some(ledger) =
63+
Ledger::<PersistentStateLedger>::new(&log, paths).await
64+
else {
65+
return None;
66+
};
67+
let persistent_state = ledger.into_inner();
68+
info!(
69+
log,
70+
"Loaded persistent state from ledger with generation {}",
71+
persistent_state.generation
72+
);
73+
Some(persistent_state)
74+
}
75+
}

trust-quorum/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
77
mod connection_manager;
88
pub(crate) mod established_conn;
9+
mod ledgers;
910
mod task;
1011

1112
pub(crate) use connection_manager::{

trust-quorum/src/task.rs

Lines changed: 193 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
use crate::connection_manager::{
99
ConnMgr, ConnMgrStatus, ConnToMainMsg, ConnToMainMsgInner,
1010
};
11+
use crate::ledgers::PersistentStateLedger;
12+
use camino::Utf8PathBuf;
1113
use omicron_uuid_kinds::RackUuid;
1214
use serde::{Deserialize, Serialize};
1315
use slog::{Logger, debug, error, info, o};
@@ -48,8 +50,8 @@ const CONN_TO_MAIN_CHANNEL_BOUND: usize = 1024;
4850
pub struct Config {
4951
pub baseboard_id: BaseboardId,
5052
pub listen_addr: SocketAddrV6,
51-
// pub tq_state_ledger_paths: Vec<Utf8PathBuf>,
52-
// pub network_config_ledger_paths: Vec<Utf8PathBuf>,
53+
pub tq_ledger_paths: Vec<Utf8PathBuf>,
54+
pub network_config_ledger_paths: Vec<Utf8PathBuf>,
5355
pub sprockets: SprocketsConfig,
5456
}
5557

@@ -340,8 +342,8 @@ impl NodeTaskHandle {
340342
pub struct NodeTask {
341343
shutdown: bool,
342344
log: Logger,
343-
#[expect(unused)]
344345
config: Config,
346+
tq_ledger_generation: u64,
345347
node: Node,
346348
ctx: NodeCtx,
347349
conn_mgr: ConnMgr,
@@ -368,8 +370,20 @@ impl NodeTask {
368370

369371
let baseboard_id = config.baseboard_id.clone();
370372

371-
// TODO: Load persistent state from ledger
372-
let mut ctx = NodeCtx::new(config.baseboard_id.clone());
373+
let (mut ctx, tq_ledger_generation) = if let Some(ps_ledger) =
374+
PersistentStateLedger::load(&log, config.tq_ledger_paths.clone())
375+
.await
376+
{
377+
(
378+
NodeCtx::new_with_persistent_state(
379+
config.baseboard_id.clone(),
380+
ps_ledger.state,
381+
),
382+
ps_ledger.generation,
383+
)
384+
} else {
385+
(NodeCtx::new(config.baseboard_id.clone()), 0)
386+
};
373387
let node = Node::new(&log, &mut ctx);
374388
let conn_mgr = ConnMgr::new(
375389
&log,
@@ -384,6 +398,7 @@ impl NodeTask {
384398
shutdown: false,
385399
log,
386400
config,
401+
tq_ledger_generation,
387402
node,
388403
ctx,
389404
conn_mgr,
@@ -423,6 +438,10 @@ impl NodeTask {
423438
}
424439

425440
// Handle messages from connection management tasks
441+
//
442+
// We persist state at the end of this method, which always occurs before
443+
// we send any outgoing messages in the `run` loop as a response of handling
444+
// this message.
426445
async fn on_conn_msg(&mut self, msg: ConnToMainMsg) {
427446
let task_id = msg.task_id;
428447
match msg.msg {
@@ -452,9 +471,14 @@ impl NodeTask {
452471
todo!();
453472
}
454473
}
474+
self.save_persistent_state().await;
455475
}
456476

457-
// TODO: Process `ctx`: save persistent state
477+
// Handle API requests from sled-agent
478+
//
479+
// NOTE: We persist state where necessary before responding to clients. Any
480+
// resulting output messages will also be sent in the `run` loop after we
481+
// persist state.
458482
async fn on_api_request(&mut self, request: NodeApiRequest) {
459483
match request {
460484
NodeApiRequest::BootstrapAddresses(addrs) => {
@@ -479,6 +503,7 @@ impl NodeTask {
479503
.map(|_| {
480504
self.ctx.persistent_state().commits.contains(&epoch)
481505
});
506+
self.save_persistent_state().await;
482507
let _ = responder.send(res);
483508
}
484509
NodeApiRequest::ConnMgrStatus { responder } => {
@@ -501,6 +526,7 @@ impl NodeTask {
501526
NodeApiRequest::LrtqUpgrade { msg, responder } => {
502527
let res =
503528
self.node.coordinate_upgrade_from_lrtq(&mut self.ctx, msg);
529+
self.save_persistent_state().await;
504530
let _ = responder.send(res);
505531
}
506532
NodeApiRequest::NodeStatus { responder } => {
@@ -518,11 +544,13 @@ impl NodeTask {
518544
.map(|_| {
519545
self.ctx.persistent_state().commits.contains(&epoch)
520546
});
547+
self.save_persistent_state().await;
521548
let _ = responder.send(res);
522549
}
523550
NodeApiRequest::Reconfigure { msg, responder } => {
524551
let res =
525552
self.node.coordinate_reconfiguration(&mut self.ctx, msg);
553+
self.save_persistent_state().await;
526554
let _ = responder.send(res);
527555
}
528556
NodeApiRequest::Shutdown => {
@@ -531,6 +559,19 @@ impl NodeTask {
531559
}
532560
}
533561
}
562+
563+
/// Save `PersistentState` to storage if necessary
564+
pub async fn save_persistent_state(&mut self) {
565+
if self.ctx.persistent_state_change_check_and_reset() {
566+
self.tq_ledger_generation = PersistentStateLedger::save(
567+
&self.log,
568+
self.config.tq_ledger_paths.clone(),
569+
self.tq_ledger_generation,
570+
self.ctx.persistent_state().clone(),
571+
)
572+
.await;
573+
}
574+
}
534575
}
535576

536577
#[cfg(test)]
@@ -587,7 +628,15 @@ mod tests {
587628
},
588629
roots: vec![cert_path(dir.clone(), &root_prefix())],
589630
};
590-
Config { baseboard_id, listen_addr, sprockets }
631+
let tq_ledger_paths =
632+
vec![dir.join(format!("test-tq-ledger-[{i}]"))];
633+
Config {
634+
baseboard_id,
635+
listen_addr,
636+
sprockets,
637+
tq_ledger_paths,
638+
network_config_ledger_paths: vec![],
639+
}
591640
})
592641
.collect()
593642
}
@@ -1435,4 +1484,141 @@ mod tests {
14351484

14361485
setup.cleanup_successful();
14371486
}
1487+
1488+
/// Ensure state is persisted as we expect
1489+
#[tokio::test]
1490+
pub async fn tq_persistent_state() {
1491+
let num_nodes = 4;
1492+
let mut setup =
1493+
TestSetup::spawn_nodes("tq_initial_config", num_nodes).await;
1494+
let rack_id = RackUuid::new_v4();
1495+
1496+
// Trigger an initial configuration by using the first node as a
1497+
// coordinator. We're pretending to be the sled-agent with instruction from
1498+
// Nexus here.
1499+
let initial_config = ReconfigureMsg {
1500+
rack_id,
1501+
epoch: Epoch(1),
1502+
last_committed_epoch: None,
1503+
members: setup.members().cloned().collect(),
1504+
threshold: trust_quorum_protocol::Threshold(3),
1505+
};
1506+
1507+
// Tell nodes how to reach each other
1508+
for h in &setup.node_handles {
1509+
h.load_peer_addresses(setup.listen_addrs.iter().cloned().collect())
1510+
.await
1511+
.unwrap();
1512+
}
1513+
1514+
let coordinator = setup.node_handles.first().unwrap();
1515+
coordinator.reconfigure(initial_config).await.unwrap();
1516+
1517+
let poll_interval = Duration::from_millis(10);
1518+
let poll_max = Duration::from_secs(10);
1519+
1520+
// Wait for the coordinator to see `PrepareAck`s from all nodes
1521+
wait_for_condition(
1522+
async || {
1523+
let Ok(Some(s)) = coordinator.coordinator_status().await else {
1524+
return Err(CondCheckError::<()>::NotYet);
1525+
};
1526+
if s.acked_prepares.len() == num_nodes {
1527+
Ok(())
1528+
} else {
1529+
Err(CondCheckError::<()>::NotYet)
1530+
}
1531+
},
1532+
&poll_interval,
1533+
&poll_max,
1534+
)
1535+
.await
1536+
.unwrap();
1537+
1538+
// Simulate a crash of the last node.
1539+
let join_handle = setup.join_handles.pop().unwrap();
1540+
let node_handle = setup.node_handles.pop().unwrap();
1541+
node_handle.shutdown().await.unwrap();
1542+
join_handle.await.unwrap();
1543+
let _ = setup.listen_addrs.pop().unwrap();
1544+
1545+
// Now Bring it back up with the same persistent state, which contains
1546+
// the initial config and prepare. Commit should work and everything
1547+
// should pick up as expected.
1548+
let (mut task, handle) = NodeTask::new(
1549+
setup.configs.last().unwrap().clone(),
1550+
&setup.logctx.log,
1551+
)
1552+
.await;
1553+
let listen_addr = handle.listen_addr();
1554+
setup.node_handles.push(handle);
1555+
setup.join_handles.push(tokio::spawn(async move { task.run().await }));
1556+
setup.listen_addrs.push(listen_addr);
1557+
1558+
// Tell nodes how to reach each other
1559+
for h in &setup.node_handles {
1560+
h.load_peer_addresses(setup.listen_addrs.iter().cloned().collect())
1561+
.await
1562+
.unwrap();
1563+
}
1564+
1565+
// Commit at each node
1566+
//
1567+
// Nexus retries this idempotent command until each node acks. So we
1568+
// simulate that here.
1569+
wait_for_condition(
1570+
async || {
1571+
let mut acked = 0;
1572+
for h in &setup.node_handles {
1573+
if h.commit(rack_id, Epoch(1)).await.unwrap() {
1574+
acked += 1;
1575+
}
1576+
}
1577+
if acked == num_nodes {
1578+
Ok(())
1579+
} else {
1580+
Err(CondCheckError::<()>::NotYet)
1581+
}
1582+
},
1583+
&poll_interval,
1584+
&poll_max,
1585+
)
1586+
.await
1587+
.unwrap();
1588+
1589+
// Simulate crash and restart again
1590+
let join_handle = setup.join_handles.pop().unwrap();
1591+
let node_handle = setup.node_handles.pop().unwrap();
1592+
node_handle.shutdown().await.unwrap();
1593+
join_handle.await.unwrap();
1594+
let _ = setup.listen_addrs.pop().unwrap();
1595+
let (mut task, handle) = NodeTask::new(
1596+
setup.configs.last().unwrap().clone(),
1597+
&setup.logctx.log,
1598+
)
1599+
.await;
1600+
let listen_addr = handle.listen_addr();
1601+
setup.node_handles.push(handle);
1602+
setup.join_handles.push(tokio::spawn(async move { task.run().await }));
1603+
setup.listen_addrs.push(listen_addr);
1604+
1605+
// Tell nodes how to reach each other
1606+
for h in &setup.node_handles {
1607+
h.load_peer_addresses(setup.listen_addrs.iter().cloned().collect())
1608+
.await
1609+
.unwrap();
1610+
}
1611+
1612+
// Now load the rack secret at all nodes
1613+
let mut secret = None;
1614+
for h in &setup.node_handles {
1615+
let rs = h.load_rack_secret(Epoch(1)).await.unwrap();
1616+
if secret.is_none() {
1617+
secret = Some(rs.clone());
1618+
}
1619+
assert_eq!(&rs, secret.as_ref().unwrap());
1620+
}
1621+
1622+
setup.cleanup_successful();
1623+
}
14381624
}

0 commit comments

Comments
 (0)