From d9fe3c95b34ff8556c42e95a36e7c4e8b815ad98 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Tue, 4 Mar 2025 15:03:23 +0000 Subject: [PATCH 1/2] Return when VCRs match in `target_replace` Notifications to Nexus from an Upstairs are best-effort only: they can be missed or dropped, or suffer from all sorts of bad weather. For this reason, Nexus will poll an Upstairs (eg a Pantry attachment or Propolis disk backed by Crucible) to query the state of a repair (especially during region replacement). For a given VCR replacement request, Propolis will: - call `target_replace(original, replacement)` - update its stored VCR after the replacement has taken place. The next time that Nexus sends the same VCR replacement request, Propolis will end up calling what amounts to `target_replace(replacement, replacement)`, and before this commit that would fail with "The VCRs have no difference". Nexus will poll using with VCR replacement requests, so this commit changes `target_replace` to return `VcrMatches` instead of an error. Note that Nexus will _not_ consider this a signal that a replacement has completed. --- integration_tests/src/lib.rs | 63 ++++++++++++++++++++++++++++++++++++ upstairs/src/volume.rs | 48 +++++++++++++++------------ 2 files changed, 90 insertions(+), 21 deletions(-) diff --git a/integration_tests/src/lib.rs b/integration_tests/src/lib.rs index 122220ec8..77c53df8c 100644 --- a/integration_tests/src/lib.rs +++ b/integration_tests/src/lib.rs @@ -5747,6 +5747,69 @@ mod test { assert_eq!(vec![0x55_u8; BLOCK_SIZE * 10], &buffer[..]); } + #[tokio::test] + async fn test_volume_replace_vcr_rerun_ok() { + const BLOCK_SIZE: usize = 512; + let log = csl(); + + // Make three downstairs + let tds = TestDownstairsSet::small(false).await.unwrap(); + let opts = tds.opts(); + let volume_id = Uuid::new_v4(); + + let original = VolumeConstructionRequest::Volume { + id: volume_id, + block_size: BLOCK_SIZE as u64, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: BLOCK_SIZE as u64, + blocks_per_extent: tds.blocks_per_extent(), + extent_count: tds.extent_count(), + opts: opts.clone(), + gen: 2, + }], + read_only_parent: None, + }; + + let volume = Volume::construct(original.clone(), None, log.clone()) + .await + .unwrap(); + volume.activate().await.unwrap(); + + // Make one new downstairs + let new_downstairs = tds.new_downstairs().await.unwrap(); + + let mut new_opts = tds.opts().clone(); + new_opts.target[0] = new_downstairs.address(); + + // Our "new" VCR must have a new downstairs in the opts, and have + // the generation number be larger than the original. + let replacement = VolumeConstructionRequest::Volume { + id: volume_id, + block_size: BLOCK_SIZE as u64, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: BLOCK_SIZE as u64, + blocks_per_extent: tds.blocks_per_extent(), + extent_count: tds.extent_count(), + opts: new_opts.clone(), + gen: 3, + }], + read_only_parent: None, + }; + + // If the caller (eg Propolis) is polled by Nexus with a `replacement` + // VCR, the first call will modify the block backend's VCR, and the + // subsequent polls will eventually be comparing two VCRs that are + // equal. Test that this doesn't produce an Err. + assert_eq!( + ReplaceResult::Started, + volume.target_replace(original, replacement.clone()).await.unwrap(), + ); + assert_eq!( + ReplaceResult::VcrMatches, + volume.target_replace(replacement.clone(), replacement).await.unwrap(), + ); + } + /// Getting a volume's status should work even if something else took over #[tokio::test] async fn test_pantry_get_status_after_activation() { diff --git a/upstairs/src/volume.rs b/upstairs/src/volume.rs index 17b3737c3..ddc7d9463 100644 --- a/upstairs/src/volume.rs +++ b/upstairs/src/volume.rs @@ -1427,6 +1427,8 @@ impl Volume { } if all_same { + // The caller should have already compared original and replacement + // VCRs. This function expects there to be some difference. crucible_bail!(ReplaceRequestInvalid, "The VCRs have no difference") } @@ -1599,27 +1601,27 @@ impl Volume { } } - // Given two VCRs where we expect the only type to be a - // VolumeConstructionRequest::Region. The VCR can be from a sub_volume - // or a read_only_parent. The caller is expected to know how to - // handle the result depending on what it sends us. - // - // We return: - // VCRDelta::Same - // If the two VCRs are identical - // - // VCRDelta::Generation - // If it's just a generation number increase in the new VCR. - // - // VCRDelta::NewMissing - // If the new VCR is missing (None). - // - // VCRDelta::Target(old_target, new_target) - // If we have both a generation number increase, and one and only - // one target is different in the new VCR. - // - // Any other difference is an error, and an error returned here means the - // VCRs are incompatible in a way that prevents one from replacing another. + /// Given two VCRs where we expect the only type to be a + /// VolumeConstructionRequest::Region. The VCR can be from a sub_volume + /// or a read_only_parent. The caller is expected to know how to + /// handle the result depending on what it sends us. + /// + /// We return: + /// VCRDelta::Same + /// If the two VCRs are identical + /// + /// VCRDelta::Generation + /// If it's just a generation number increase in the new VCR. + /// + /// VCRDelta::NewMissing + /// If the new VCR is missing (None). + /// + /// VCRDelta::Target(old_target, new_target) + /// If we have both a generation number increase, and one and only + /// one target is different in the new VCR. + /// + /// Any other difference is an error, and an error returned here means the + /// VCRs are incompatible in a way that prevents one from replacing another. fn compare_vcr_region_for_replacement( log: &Logger, o_vol: &VolumeConstructionRequest, @@ -1847,6 +1849,10 @@ impl Volume { original: VolumeConstructionRequest, replacement: VolumeConstructionRequest, ) -> Result { + if original == replacement { + return Ok(ReplaceResult::VcrMatches); + } + let (original_target, new_target) = match Self::compare_vcr_for_target_replacement( original, From 5d810fe625c0ad59f40147c24ac4d6caac748c6f Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Tue, 4 Mar 2025 15:23:13 +0000 Subject: [PATCH 2/2] fmt and clippy --- Cargo.lock | 2 +- integration_tests/src/lib.rs | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c72e84f14..68645c4f0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1113,7 +1113,7 @@ dependencies = [ "slog-term", "statistical", "tempfile", - "thiserror 1.0.66", + "thiserror 1.0.69", "tokio", "tokio-rustls 0.24.1", "tokio-util", diff --git a/integration_tests/src/lib.rs b/integration_tests/src/lib.rs index 77c53df8c..ecc5b016d 100644 --- a/integration_tests/src/lib.rs +++ b/integration_tests/src/lib.rs @@ -5802,11 +5802,17 @@ mod test { // equal. Test that this doesn't produce an Err. assert_eq!( ReplaceResult::Started, - volume.target_replace(original, replacement.clone()).await.unwrap(), + volume + .target_replace(original, replacement.clone()) + .await + .unwrap(), ); assert_eq!( ReplaceResult::VcrMatches, - volume.target_replace(replacement.clone(), replacement).await.unwrap(), + volume + .target_replace(replacement.clone(), replacement) + .await + .unwrap(), ); }