From 5614c59a3e48bb3b78ececf55cfa49ae90f6788d Mon Sep 17 00:00:00 2001 From: Matt LaFayette Date: Mon, 10 Nov 2025 09:40:47 -0800 Subject: [PATCH 1/3] wip: notes for handling restore on a node that does not support save restore (but openhcl does). --- openhcl/underhill_core/src/nvme_manager/device.rs | 15 ++++++++++++++- .../underhill_core/src/nvme_manager/manager.rs | 14 ++++++++++++++ openhcl/underhill_core/src/worker.rs | 5 +++++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/openhcl/underhill_core/src/nvme_manager/device.rs b/openhcl/underhill_core/src/nvme_manager/device.rs index 6d26d50799..b5f9a01d3c 100644 --- a/openhcl/underhill_core/src/nvme_manager/device.rs +++ b/openhcl/underhill_core/src/nvme_manager/device.rs @@ -91,7 +91,7 @@ impl CreateNvmeDriver for VfioNvmeDriverSpawner { } else { AllocationVisibility::Private }, - persistent_allocations: save_restore_supported, + persistent_allocations: save_restore_supported, // OK in restore-on-old-host, since this will correctly be `false`. But, ideally, we keep it as `true` so that we can save on a host without support and restore on a node that has it. }) .map_err(NvmeSpawnerError::DmaClient)?; @@ -131,6 +131,19 @@ impl CreateNvmeDriver for VfioNvmeDriverSpawner { driver: nvme_driver, })) } + + // DEVNOTE(mattkur): TODO: create a new trait fn that clears and resets state, in the case of + // a restore on a host that does not support restore. + // + // This will: + // 1) Create a DMA client, be sure to say that we require persistent memory (even though save_restore_supported will actually be false). + // 2) Reset the device (call `VfioDevice::new` to get a fresh handle, and make sure that you do it in a way such that Vfio calls FLR). + // 3) call dma_client.attach_pending_buffers().context("failed to restore allocations")?; + // 4) free all the allocations on the dma client. + // 5) close the handle to the device (oops, will probably FLR it *again*, but ... that's probably OK; it does multiply the time to be 250ms x 2 per device). + // + // Now device is in a clean state. + // ... after this ... the next calls to get a `namespace` object will first open a handle to the device (so NvmeManagerWorker::restore() can simply be done). } impl VfioNvmeDriverSpawner { diff --git a/openhcl/underhill_core/src/nvme_manager/manager.rs b/openhcl/underhill_core/src/nvme_manager/manager.rs index acb53feba0..1e5acaeed2 100644 --- a/openhcl/underhill_core/src/nvme_manager/manager.rs +++ b/openhcl/underhill_core/src/nvme_manager/manager.rs @@ -134,6 +134,13 @@ impl NvmeManager { } /// Restore NVMe manager's state after servicing. + /// + /// DEVNOTE(mattkur): If the environment says that save/restore is not supported, then + /// we are restoring on a host where we do not understand the state. We must + /// quiesce IO (reset the device), clear any allocations, and then create a new device. + /// + /// The "nice" thing is that we can just be in an empty state after restore. This will cause + /// us to create new drivers and namespaces. async fn restore( worker: &mut NvmeManagerWorker, saved_state: &NvmeSavedState, @@ -431,6 +438,13 @@ impl NvmeManagerWorker { } /// Restore NVMe manager and device states from the buffer after servicing. + /// + /// DEVNOTE(mattkur): If the environment says that save/restore is not supported, then + /// we are restoring on a host where we do not understand the state. We must + /// quiesce IO (reset the device), clear any allocations, and then create a new device. + /// + /// The "nice" thing is that we can just be in an empty state after restore. This will cause + /// us to create new drivers and namespaces. pub async fn restore(&mut self, saved_state: &NvmeManagerSavedState) -> anyhow::Result<()> { let mut restored_devices: HashMap = HashMap::new(); diff --git a/openhcl/underhill_core/src/worker.rs b/openhcl/underhill_core/src/worker.rs index 914c91ee7d..3306e71296 100644 --- a/openhcl/underhill_core/src/worker.rs +++ b/openhcl/underhill_core/src/worker.rs @@ -1998,6 +1998,11 @@ async fn new_underhill_vm( let nvme_manager = if env_cfg.nvme_vfio { // TODO: reevaluate enablement of nvme save restore when private pool // save restore to bootshim is available. + // + // DEVNOTE(mattkur): In the event that this UH is _restoring_ on a host that no + // longer supports nvme_keep_alive (but UH itself supports this, and thus + // has a private pool), private_pool_available will be true and save_restore_supported will be false. + // let private_pool_available = !runtime_params.private_pool_ranges().is_empty(); let save_restore_supported = env_cfg.nvme_keep_alive && private_pool_available; From dc7d9e094e453b76d9b5c689ad75f5bad48081a0 Mon Sep 17 00:00:00 2001 From: Matt LaFayette Date: Mon, 10 Nov 2025 10:23:55 -0800 Subject: [PATCH 2/3] oops, some files didn't save --- openhcl/underhill_core/src/nvme_manager/manager.rs | 2 +- openhcl/underhill_core/src/nvme_manager/mod.rs | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/openhcl/underhill_core/src/nvme_manager/manager.rs b/openhcl/underhill_core/src/nvme_manager/manager.rs index 1e5acaeed2..6aff2b9ce8 100644 --- a/openhcl/underhill_core/src/nvme_manager/manager.rs +++ b/openhcl/underhill_core/src/nvme_manager/manager.rs @@ -457,7 +457,7 @@ impl NvmeManagerWorker { &self.context.driver_source, &pci_id, saved_state.cpu_count, - true, // save_restore_supported is always `true` when restoring. + true, // save_restore_supported is always `true` when restoring. (TODO(mattkur): no longer true). Some(&disk.driver_state), ) .await?; diff --git a/openhcl/underhill_core/src/nvme_manager/mod.rs b/openhcl/underhill_core/src/nvme_manager/mod.rs index 83b3b9e49d..419636099b 100644 --- a/openhcl/underhill_core/src/nvme_manager/mod.rs +++ b/openhcl/underhill_core/src/nvme_manager/mod.rs @@ -110,4 +110,7 @@ pub trait CreateNvmeDriver: Inspect + Send + Sync { save_restore_supported: bool, saved_state: Option<&nvme_driver::NvmeDriverSavedState>, ) -> Result, NvmeSpawnerError>; + + // DEVNOTE(mattkur): TODO: create a new trait fn that clears and resets state, in the case of + // a restore on a host that does not support restore. } From 6150188dda40a0893f5e113cdde33af8a1e0b863 Mon Sep 17 00:00:00 2001 From: Matt LaFayette Date: Mon, 10 Nov 2025 10:40:56 -0800 Subject: [PATCH 3/3] boot shim notes --- openhcl/openhcl_boot/src/main.rs | 9 +++++++++ openhcl/underhill_core/src/options.rs | 1 + 2 files changed, 10 insertions(+) diff --git a/openhcl/openhcl_boot/src/main.rs b/openhcl/openhcl_boot/src/main.rs index 22c7502e2f..f7a30cef20 100644 --- a/openhcl/openhcl_boot/src/main.rs +++ b/openhcl/openhcl_boot/src/main.rs @@ -290,6 +290,15 @@ fn build_kernel_command_line( // Only when explicitly supported by Host. // TODO: Move from command line to device tree when stabilized. + // + // DEVNOTE (mattkur): Create a new command line flag to pass along that + // the host supports keep alive (even if there is vtl2 pool supported). + // e.g.: + // OPENHCL_NVME_KEEP_ALIVE=1 (legacy, let's remove this) + // OPENHCL_NVME_KEEP_ALIVE=host,privatepool (supported by host and we have private pool) + // OPENHCL_NVME_KEEP_ALIVE=nohost,privatepool (not supported by host but we have a private pool) + // OPENHCL_NVME_KEEP_ALIVE=host,noprivatepool (supported by host, but no private pool (KA will be disabled)) + // OPENHCL_NVME_KEEP_ALIVE=disabled,{no,}host,{no,}privatepool (explicitly disabled by command line, rest of flags as above) if partition_info.nvme_keepalive && vtl2_pool_supported && !disable_keep_alive { write!(cmdline, "OPENHCL_NVME_KEEP_ALIVE=1 ")?; } diff --git a/openhcl/underhill_core/src/options.rs b/openhcl/underhill_core/src/options.rs index 33d811aa0a..807f5f0909 100644 --- a/openhcl/underhill_core/src/options.rs +++ b/openhcl/underhill_core/src/options.rs @@ -181,6 +181,7 @@ pub struct Options { pub no_sidecar_hotplug: bool, /// (OPENHCL_NVME_KEEP_ALIVE=1) Enable nvme keep alive when servicing. + /// See note in openhcl_boot/src/main.rs and then change this type to an enum. pub nvme_keep_alive: bool, /// (OPENHCL_NVME_ALWAYS_FLR=1)