Skip to content

Commit 2dfd0f1

Browse files
authored
[reconfigurator] Prechecks and post-update actions for RoT updater (#8157)
Related: #7989
1 parent d4e0bc1 commit 2dfd0f1

File tree

23 files changed

+308
-74
lines changed

23 files changed

+308
-74
lines changed

Cargo.lock

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

clients/gateway-client/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ base64.workspace = true
1212
chrono.workspace = true
1313
daft.workspace = true
1414
gateway-messages.workspace = true
15+
gateway-types.workspace = true
1516
progenitor.workspace = true
1617
rand.workspace = true
1718
reqwest = { workspace = true, features = ["rustls-tls", "stream"] }

clients/gateway-client/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@ progenitor::generate_api!(
6464
ImageVersion = { derives = [PartialEq, Eq, PartialOrd, Ord] },
6565
RotImageDetails = { derives = [PartialEq, Eq, PartialOrd, Ord] },
6666
RotImageError = { derives = [ PartialEq, Eq, PartialOrd, Ord] },
67-
RotSlot = { derives = [PartialEq, Eq, PartialOrd, Ord] },
6867
RotState = { derives = [PartialEq, Eq, PartialOrd, Ord] },
6968
SpComponentCaboose = { derives = [PartialEq, Eq] },
7069
SpIdentifier = { derives = [Copy, PartialEq, Hash, Eq] },
@@ -75,6 +74,7 @@ progenitor::generate_api!(
7574
SpUpdateStatus = { derives = [PartialEq, Hash, Eq] },
7675
UpdatePreparationProgress = { derives = [PartialEq, Hash, Eq] },
7776
},
77+
replace = { RotSlot = gateway_types::rot::RotSlot },
7878
);
7979

8080
// Override the impl of Ord for SpIdentifier because the default one orders the

dev-tools/omdb/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ futures.workspace = true
2828
gateway-client.workspace = true
2929
gateway-messages.workspace = true
3030
gateway-test-utils.workspace = true
31+
gateway-types.workspace = true
3132
http.workspace = true
3233
humantime.workspace = true
3334
internal-dns-resolver.workspace = true

dev-tools/omdb/src/bin/omdb/mgs.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ use clap::Args;
1212
use clap::Subcommand;
1313
use futures::StreamExt;
1414
use gateway_client::types::PowerState;
15-
use gateway_client::types::RotSlot;
1615
use gateway_client::types::RotState;
1716
use gateway_client::types::SpComponentCaboose;
1817
use gateway_client::types::SpComponentInfo;
@@ -22,6 +21,7 @@ use gateway_client::types::SpIgnitionInfo;
2221
use gateway_client::types::SpIgnitionSystemType;
2322
use gateway_client::types::SpState;
2423
use gateway_client::types::SpType;
24+
use gateway_types::rot::RotSlot;
2525
use internal_dns_types::names::ServiceName;
2626
use tabled::Tabled;
2727

dev-tools/reconfigurator-sp-updater/src/main.rs

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -323,23 +323,22 @@ fn cmd_config(
323323
)?;
324324
}
325325
PendingMgsUpdateDetails::Rot {
326-
expected_slot_a_version,
327-
expected_slot_b_version,
328326
expected_active_slot,
327+
expected_inactive_version,
329328
expected_persistent_boot_preference,
330329
expected_pending_persistent_boot_preference,
331330
expected_transient_boot_preference,
332331
} => {
333332
writeln!(
334333
&mut s,
335-
" preconditions: expected_slot_a_version {:?}
336-
expected_slot_b_version {:?}
337-
expected active_slot {:?}
334+
" preconditions: expected active slot {:?}
335+
expected active version {:?}
336+
expected inactive version {:?}
338337
expected persistent_boot_preference {:?}
339338
expected pending_persistent_boot_preference {:?}
340339
expected transient_boot_preference {:?}",
341-
expected_slot_a_version, expected_slot_b_version,
342-
expected_active_slot, expected_persistent_boot_preference,
340+
expected_active_slot.slot(), expected_active_slot.version(),
341+
expected_inactive_version, expected_persistent_boot_preference,
343342
expected_pending_persistent_boot_preference,
344343
expected_transient_boot_preference,
345344
)?;

gateway-types/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ license = "MPL-2.0"
88
workspace = true
99

1010
[dependencies]
11+
daft.workspace = true
1112
gateway-messages.workspace = true
1213
# Avoid depending on gateway-sp-comms! It is a pretty heavy dependency and
1314
# would only be used for From impls anyway. We put those impls in

gateway-types/src/rot.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// License, v. 2.0. If a copy of the MPL was not distributed with this
33
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
44

5+
use daft::Diffable;
56
use schemars::JsonSchema;
67
use serde::{Deserialize, Serialize};
78

@@ -165,7 +166,9 @@ impl From<gateway_messages::RotBootInfo> for RotState {
165166

166167
#[derive(
167168
Debug,
169+
Diffable,
168170
Clone,
171+
Copy,
169172
PartialEq,
170173
Eq,
171174
PartialOrd,

nexus/inventory/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ clickhouse-admin-types.workspace = true
1717
futures.workspace = true
1818
gateway-client.workspace = true
1919
gateway-messages.workspace = true
20+
gateway-types.workspace = true
2021
id-map.workspace = true
2122
nexus-sled-agent-shared.workspace = true
2223
nexus-types.workspace = true

nexus/inventory/src/builder.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -591,11 +591,11 @@ mod test {
591591
use base64::Engine;
592592
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
593593
use gateway_client::types::PowerState;
594-
use gateway_client::types::RotSlot;
595594
use gateway_client::types::RotState;
596595
use gateway_client::types::SpComponentCaboose;
597596
use gateway_client::types::SpState;
598597
use gateway_client::types::SpType;
598+
use gateway_types::rot::RotSlot;
599599
use nexus_sled_agent_shared::inventory::SledRole;
600600
use nexus_types::inventory::BaseboardId;
601601
use nexus_types::inventory::Caboose;

nexus/inventory/src/examples.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@ use crate::CollectionBuilder;
88
use clickhouse_admin_types::ClickhouseKeeperClusterMembership;
99
use clickhouse_admin_types::KeeperId;
1010
use gateway_client::types::PowerState;
11-
use gateway_client::types::RotSlot;
1211
use gateway_client::types::RotState;
1312
use gateway_client::types::SpComponentCaboose;
1413
use gateway_client::types::SpState;
1514
use gateway_client::types::SpType;
15+
use gateway_types::rot::RotSlot;
1616
use nexus_sled_agent_shared::inventory::Baseboard;
1717
use nexus_sled_agent_shared::inventory::Inventory;
1818
use nexus_sled_agent_shared::inventory::InventoryDataset;

nexus/mgs-updates/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ workspace = true
1010
chrono.workspace = true
1111
futures.workspace = true
1212
gateway-client.workspace = true
13+
gateway-types.workspace = true
1314
id-map.workspace = true
1415
internal-dns-resolver.workspace = true
1516
internal-dns-types.workspace = true

nexus/mgs-updates/src/common_sp_update.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use super::UpdateProgress;
1010
use futures::future::BoxFuture;
1111
use gateway_client::types::SpType;
1212
use gateway_client::types::SpUpdateStatus;
13+
use gateway_types::rot::RotSlot;
1314
use nexus_types::deployment::ExpectedVersion;
1415
use nexus_types::deployment::PendingMgsUpdate;
1516
use slog::Logger;
@@ -278,9 +279,17 @@ pub enum PrecheckStatus {
278279

279280
#[derive(Debug, Error)]
280281
pub enum PrecheckError {
282+
#[error(
283+
"pending_persistent_boot_preference and/or transient_boot_preference is set"
284+
)]
285+
EphemeralRotBootPreferenceSet,
286+
281287
#[error("communicating with MGS")]
282288
GatewayClientError(#[from] GatewayClientError),
283289

290+
#[error("communicating with RoT: {message:?}")]
291+
RotCommunicationFailed { message: String },
292+
284293
#[error(
285294
"in {sp_type} slot {slot_id}, expected to find \
286295
part {expected_part:?} serial {expected_serial:?}, but found \
@@ -295,6 +304,9 @@ pub enum PrecheckError {
295304
found_serial: String,
296305
},
297306

307+
#[error("expected to find active slot {expected:?}, but found {found:?}")]
308+
WrongActiveSlot { expected: RotSlot, found: RotSlot },
309+
298310
#[error(
299311
"expected to find active version {:?}, but found {found:?}",
300312
.expected.as_str(),

nexus/mgs-updates/src/driver_update.rs

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,10 @@ impl SpComponentUpdate {
8989
target_sp_type: request.sp_type,
9090
target_sp_slot: request.slot_id,
9191
// Like the SP, we request an update to the inactive slot
92-
firmware_slot: expected_active_slot.toggled().to_u16(),
92+
firmware_slot: expected_active_slot
93+
.slot()
94+
.toggled()
95+
.to_u16(),
9396
update_id,
9497
}
9598
}
@@ -581,11 +584,14 @@ async fn wait_for_update_done(
581584
// Check if we're done.
582585
Ok(PrecheckStatus::UpdateComplete) => return Ok(()),
583586

584-
// An incorrect version in the "inactive" slot is normal during the
585-
// upgrade. We have no reason to think this won't converge so we
586-
// proceed with waiting.
587+
// An incorrect version in the "inactive" slot, incorrect active slot,
588+
// or non-empty pending_persistent_boot_preference/transient_boot_preference
589+
// are normal during the upgrade. We have no reason to think these won't
590+
// converge so we proceed with waiting.
587591
Err(PrecheckError::GatewayClientError(_))
588592
| Err(PrecheckError::WrongInactiveVersion { .. })
593+
| Err(PrecheckError::WrongActiveSlot { .. })
594+
| Err(PrecheckError::EphemeralRotBootPreferenceSet)
589595
| Ok(PrecheckStatus::ReadyForUpdate) => {
590596
if before.elapsed() >= timeout {
591597
return Err(UpdateWaitError::Timeout(timeout));
@@ -596,7 +602,8 @@ async fn wait_for_update_done(
596602
}
597603

598604
Err(error @ PrecheckError::WrongDevice { .. })
599-
| Err(error @ PrecheckError::WrongActiveVersion { .. }) => {
605+
| Err(error @ PrecheckError::WrongActiveVersion { .. })
606+
| Err(error @ PrecheckError::RotCommunicationFailed { .. }) => {
600607
// Stop trying to make this update happen. It's not going to
601608
// happen.
602609
return Err(UpdateWaitError::Indeterminate(error));

0 commit comments

Comments
 (0)