Skip to content

Planner wait conditions #8453

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ use nexus_types::deployment::ClickhouseMode;
use nexus_types::deployment::ClickhousePolicy;
use nexus_types::deployment::OximeterReadMode;
use nexus_types::deployment::OximeterReadPolicy;
use nexus_types::deployment::WaitCondition;
use nexus_types::internal_api::background::AbandonedVmmReaperStatus;
use nexus_types::internal_api::background::BlueprintPlannerStatus;
use nexus_types::internal_api::background::BlueprintRendezvousStatus;
Expand Down Expand Up @@ -1218,6 +1219,13 @@ fn print_task_abandoned_vmm_reaper(details: &serde_json::Value) {
}

fn print_task_blueprint_planner(details: &serde_json::Value) {
fn print_waiting_on(waiting_on: &[WaitCondition]) {
let n = waiting_on.len();
if n > 0 {
println!(" waiting on {n} events: {waiting_on:?}");
}
}

let status =
match serde_json::from_value::<BlueprintPlannerStatus>(details.clone())
{
Expand All @@ -1237,20 +1245,32 @@ fn print_task_blueprint_planner(details: &serde_json::Value) {
BlueprintPlannerStatus::Error(error) => {
println!(" task did not complete successfully: {error}");
}
BlueprintPlannerStatus::Unchanged { parent_blueprint_id } => {
BlueprintPlannerStatus::Unchanged {
parent_blueprint_id,
waiting_on,
} => {
println!(" plan unchanged from parent {parent_blueprint_id}");
print_waiting_on(&waiting_on);
}
BlueprintPlannerStatus::Planned { parent_blueprint_id, error } => {
BlueprintPlannerStatus::Planned {
parent_blueprint_id,
error,
waiting_on,
} => {
println!(
" planned new blueprint from parent {parent_blueprint_id}, \
but could not make it the target: {error}"
);
print_waiting_on(&waiting_on);
}
BlueprintPlannerStatus::Targeted { blueprint_id, .. } => {
BlueprintPlannerStatus::Targeted {
blueprint_id, waiting_on, ..
} => {
println!(
" planned new blueprint {blueprint_id}, \
and made it the current target"
);
print_waiting_on(&waiting_on);
}
}
}
Expand Down
7 changes: 6 additions & 1 deletion dev-tools/reconfigurator-cli/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ use nexus_reconfigurator_simulation::{BlueprintId, SimState};
use nexus_types::deployment::OmicronZoneNic;
use nexus_types::deployment::PlanningInput;
use nexus_types::deployment::SledFilter;
use nexus_types::deployment::ZoneExpungeReason;
use nexus_types::deployment::execution;
use nexus_types::deployment::execution::blueprint_external_dns_config;
use nexus_types::deployment::execution::blueprint_internal_dns_config;
Expand Down Expand Up @@ -1249,7 +1250,11 @@ fn cmd_blueprint_edit(
BlueprintEditCommands::ExpungeZone { zone_id } => {
let sled_id = sled_with_zone(&builder, &zone_id)?;
builder
.sled_expunge_zone(sled_id, zone_id)
.sled_expunge_zone(
sled_id,
zone_id,
&ZoneExpungeReason::ManualEdit,
)
.context("failed to expunge zone")?;
format!("expunged zone {zone_id} from sled {sled_id}")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -505,8 +505,7 @@ INFO sufficient ExternalDns zones exist in plan, desired_count: 0, current_count
WARN failed to place all new desired Nexus zones, placed: 0, wanted_to_place: 3
INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0
WARN cannot issue more SP updates (no current artifacts)
INFO some zones not yet up-to-date, sled_id: 89d02b1b-478c-401a-8e28-7a26f74fa41b
INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify
INFO some zones not yet up-to-date
generated blueprint 86db3308-f817-4626-8838-4085949a6a41 based on parent blueprint ade5749d-bdf3-4fab-a8ae-00bea01b3a5a

> blueprint-list
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -980,8 +980,7 @@ INFO added zone to sled, sled_id: 711ac7f8-d19e-4572-bdb9-e9b50f6e362a, kind: Ex
INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3
INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0
WARN cannot issue more SP updates (no current artifacts)
INFO some zones not yet up-to-date, sled_id: 711ac7f8-d19e-4572-bdb9-e9b50f6e362a
INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify
INFO some zones not yet up-to-date
generated blueprint 9c998c1d-1a7b-440a-ae0c-40f781dea6e2 based on parent blueprint 366b0b68-d80e-4bc1-abd3-dc69837847e0

> blueprint-diff 366b0b68-d80e-4bc1-abd3-dc69837847e0 9c998c1d-1a7b-440a-ae0c-40f781dea6e2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1013,8 +1013,7 @@ INFO sufficient ExternalDns zones exist in plan, desired_count: 3, current_count
INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3
INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0
WARN cannot issue more SP updates (no current artifacts)
INFO some zones not yet up-to-date, sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c
INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify
INFO some zones not yet up-to-date
generated blueprint af934083-59b5-4bf6-8966-6fb5292c29e1 based on parent blueprint 58d5e830-0884-47d8-a7cd-b2b3751adeb4

> blueprint-diff 58d5e830-0884-47d8-a7cd-b2b3751adeb4 af934083-59b5-4bf6-8966-6fb5292c29e1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,6 @@ INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3
INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0
INFO configuring SP update, artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 0, sp_type: Sled, serial_number: serial0, part_number: model0
INFO reached maximum number of pending SP updates, max: 1
INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify
generated blueprint 8da82a8e-bf97-4fbd-8ddd-9f6462732cf1 based on parent blueprint dbcbd3d6-41ff-48ae-ac0b-1becc9b2fd21

> blueprint-diff dbcbd3d6-41ff-48ae-ac0b-1becc9b2fd21 8da82a8e-bf97-4fbd-8ddd-9f6462732cf1
Expand Down Expand Up @@ -352,7 +351,6 @@ INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3
INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0
INFO SP update not yet completed (will keep it), artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 0, sp_type: Sled, serial_number: serial0, part_number: model0
INFO reached maximum number of pending SP updates, max: 1
INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify
generated blueprint 58d5e830-0884-47d8-a7cd-b2b3751adeb4 based on parent blueprint 8da82a8e-bf97-4fbd-8ddd-9f6462732cf1

> blueprint-diff 8da82a8e-bf97-4fbd-8ddd-9f6462732cf1 58d5e830-0884-47d8-a7cd-b2b3751adeb4
Expand Down Expand Up @@ -537,7 +535,6 @@ INFO SP update completed (will remove it and re-evaluate board), artifact_versio
INFO skipping board for SP update, serial_number: serial0, part_number: model0
INFO configuring SP update, artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 1, sp_type: Sled, serial_number: serial1, part_number: model1
INFO reached maximum number of pending SP updates, max: 1
INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify
generated blueprint af934083-59b5-4bf6-8966-6fb5292c29e1 based on parent blueprint 58d5e830-0884-47d8-a7cd-b2b3751adeb4

> blueprint-diff 58d5e830-0884-47d8-a7cd-b2b3751adeb4 af934083-59b5-4bf6-8966-6fb5292c29e1
Expand Down Expand Up @@ -729,7 +726,6 @@ INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0
INFO SP update impossible (will remove it and re-evaluate board), artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 1, sp_type: Sled, serial_number: serial1, part_number: model1
INFO configuring SP update, artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: Version(ArtifactVersion("0.5.0")), expected_active_version: 0.0.1, component: sp, sp_slot: 1, sp_type: Sled, serial_number: serial1, part_number: model1
INFO reached maximum number of pending SP updates, max: 1
INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify
generated blueprint df06bb57-ad42-4431-9206-abff322896c7 based on parent blueprint af934083-59b5-4bf6-8966-6fb5292c29e1

> blueprint-diff af934083-59b5-4bf6-8966-6fb5292c29e1 df06bb57-ad42-4431-9206-abff322896c7
Expand Down Expand Up @@ -922,7 +918,6 @@ INFO skipping board for SP update, serial_number: serial1, part_number: model1
INFO skipping board for SP update, serial_number: serial0, part_number: model0
INFO configuring SP update, artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 2, sp_type: Sled, serial_number: serial2, part_number: model2
INFO ran out of boards for SP update
INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify
generated blueprint 7f976e0d-d2a5-4eeb-9e82-c82bc2824aba based on parent blueprint df06bb57-ad42-4431-9206-abff322896c7

> blueprint-diff df06bb57-ad42-4431-9206-abff322896c7 7f976e0d-d2a5-4eeb-9e82-c82bc2824aba
Expand Down
7 changes: 6 additions & 1 deletion live-tests/tests/test_nexus_add_remove.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use nexus_reconfigurator_planning::planner::Planner;
use nexus_reconfigurator_preparation::PlanningInputFromDb;
use nexus_sled_agent_shared::inventory::ZoneKind;
use nexus_types::deployment::SledFilter;
use nexus_types::deployment::ZoneExpungeReason;
use omicron_common::address::NEXUS_INTERNAL_PORT;
use omicron_test_utils::dev::poll::CondCheckError;
use omicron_test_utils::dev::poll::wait_for_condition;
Expand Down Expand Up @@ -123,7 +124,11 @@ async fn test_nexus_add_remove(lc: &LiveTestContext) {
&nexus,
&|builder: &mut BlueprintBuilder| {
builder
.sled_expunge_zone(sled_id, new_zone.id)
.sled_expunge_zone(
sled_id,
new_zone.id,
&ZoneExpungeReason::Test,
)
.context("expunging zone")?;
Ok(())
},
Expand Down
10 changes: 9 additions & 1 deletion nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ use crate::blueprint_editor::ExternalSnatNetworkingChoice;
use crate::blueprint_editor::NoAvailableDnsSubnets;
use crate::blueprint_editor::SledEditError;
use crate::blueprint_editor::SledEditor;
use crate::planner::ZoneExpungeReason;
use crate::planner::rng::PlannerRng;
use anyhow::Context as _;
use anyhow::anyhow;
Expand Down Expand Up @@ -45,6 +44,7 @@ use nexus_types::deployment::PendingMgsUpdates;
use nexus_types::deployment::PlanningInput;
use nexus_types::deployment::SledFilter;
use nexus_types::deployment::SledResources;
use nexus_types::deployment::ZoneExpungeReason;
use nexus_types::deployment::ZpoolFilter;
use nexus_types::deployment::ZpoolName;
use nexus_types::deployment::blueprint_zone_type;
Expand Down Expand Up @@ -337,6 +337,13 @@ impl fmt::Display for Operation {
ZoneExpungeReason::ClickhouseSingleNodeDisabled => {
"clickhouse single-node disabled via policy"
}
ZoneExpungeReason::ManualEdit => {
"blueprint edited manually"
}
ZoneExpungeReason::Test => "for testing purposes",
ZoneExpungeReason::UpdatedSource { from, to } => {
&format!("updating from image source {from:?} → {to:?}")
}
};
write!(
f,
Expand Down Expand Up @@ -1741,6 +1748,7 @@ impl<'a> BlueprintBuilder<'a> {
&mut self,
sled_id: SledUuid,
zone_id: OmicronZoneUuid,
_reason: &ZoneExpungeReason,
) -> Result<SledEditCounts, Error> {
let editor = self.sled_editors.get_mut(&sled_id).ok_or_else(|| {
Error::Planner(anyhow!(
Expand Down
Loading
Loading