Skip to content

Commit 234fae8

Browse files
Brian-PerkinsBrian Perkins
andauthored
Cleanup intercept device shutdown (#2117)
1. Tie lifetime of the device to the vmbus parent a. Remove existing channel used to control lifetime. 2. If the vmbus relay parent is dropped, there will be no opportunity to close the GPADL associated with the special ring buffer memory pages. In this case, note the error and leak the device. Trying to change the VTL permissions on these memory pages while the host still has them open will result in an error. 3. Allow intercepted devices to be revoked and reoffered from the host. Co-authored-by: Brian Perkins <[email protected]>
1 parent 786d7a7 commit 234fae8

File tree

4 files changed

+57
-41
lines changed

4 files changed

+57
-41
lines changed

openhcl/underhill_core/src/dispatch/mod.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,6 @@ pub(crate) struct LoadedVm {
160160
pub host_vmbus_relay: Option<VmbusRelayHandle>,
161161
// channels are revoked when dropped, so make sure to keep them alive
162162
pub _vmbus_devices: Vec<SpawnedUnit<ChannelUnit<dyn VmbusDevice>>>,
163-
pub _vmbus_intercept_devices: Vec<mesh::OneshotSender<()>>,
164163
pub _ide_accel_devices: Vec<SpawnedUnit<ChannelUnit<storvsp::StorageDevice>>>,
165164
pub network_settings: Option<Box<dyn LoadedVmNetworkSettings>>,
166165
pub shutdown_relay: Option<(

openhcl/underhill_core/src/worker.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3114,8 +3114,6 @@ async fn new_underhill_vm(
31143114
);
31153115
}
31163116

3117-
let mut vmbus_intercept_devices = Vec::new();
3118-
31193117
let shutdown_relay = if let Some(recv) = intercepted_shutdown_ic {
31203118
let mut shutdown_guest = ShutdownGuestIc::new();
31213119
let recv_host_shutdown = shutdown_guest.get_shutdown_notifier();
@@ -3141,8 +3139,7 @@ async fn new_underhill_vm(
31413139
.context("shutdown relay dma client")?,
31423140
shutdown_guest,
31433141
)?;
3144-
vmbus_intercept_devices.push(shutdown_guest.detach(driver_source.simple(), recv)?);
3145-
3142+
shutdown_guest.detach(driver_source.simple(), recv)?;
31463143
Some((recv_host_shutdown, send_guest_shutdown))
31473144
} else {
31483145
None
@@ -3285,7 +3282,6 @@ async fn new_underhill_vm(
32853282
vmbus_server,
32863283
host_vmbus_relay,
32873284
_vmbus_devices: vmbus_devices,
3288-
_vmbus_intercept_devices: vmbus_intercept_devices,
32893285
_ide_accel_devices: ide_accel_devices,
32903286
network_settings,
32913287
shutdown_relay,

vm/devices/vmbus/vmbus_relay/src/lib.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -630,10 +630,6 @@ impl RelayTask {
630630
async fn handle_offer(&mut self, offer: client::OfferInfo) -> Result<()> {
631631
let channel_id = offer.offer.channel_id.0;
632632

633-
if self.channels.contains_key(&ChannelId(channel_id)) {
634-
anyhow::bail!("channel {channel_id} already exists");
635-
}
636-
637633
if let Some(intercept) = self.intercept_channels.get(&offer.offer.instance_id) {
638634
self.channels.insert(
639635
ChannelId(channel_id),
@@ -643,6 +639,10 @@ impl RelayTask {
643639
return Ok(());
644640
}
645641

642+
if self.channels.contains_key(&ChannelId(channel_id)) {
643+
anyhow::bail!("channel {channel_id} already exists");
644+
}
645+
646646
// Used to Recv requests from the server.
647647
let (request_send, request_recv) = mesh::channel();
648648
// Used to Send responses from the server

vm/devices/vmbus/vmbus_relay_intercept_device/src/lib.rs

Lines changed: 52 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -166,28 +166,36 @@ impl<T: SimpleVmbusClientDeviceAsync> SimpleVmbusClientDeviceWrapper<T> {
166166
mut self,
167167
driver: impl SpawnDriver,
168168
recv_relay: mesh::Receiver<InterceptChannelRequest>,
169-
) -> Result<mesh::OneshotSender<()>> {
169+
) -> Result<()> {
170+
let (send_disconnected, recv_disconnected) = mesh::oneshot();
170171
self.vmbus_listener.insert(
171172
&self.spawner,
172173
format!("{}", self.instance_id),
173174
SimpleVmbusClientDeviceTaskState {
174175
offer: None,
175176
recv_relay,
177+
send_disconnected: Some(send_disconnected),
176178
vtl_pages: None,
177179
},
178180
);
179-
let (driver_send, driver_recv) = mesh::oneshot();
180181
driver
181182
.spawn(
182183
format!("vmbus_relay_device {}", self.instance_id),
183184
async move {
184185
self.vmbus_listener.start();
185-
let _ = driver_recv.await;
186-
self.vmbus_listener.stop().await;
186+
let _ = recv_disconnected.await;
187+
assert!(!self.vmbus_listener.stop().await);
188+
if self.vmbus_listener.state().unwrap().vtl_pages.is_some() {
189+
// The VTL pages were not freed. This can occur if an
190+
// error is hit that drops the vmbus parent tasks. Just
191+
// pend here and let the outer error cause the VM to
192+
// exit.
193+
pending::<()>().await;
194+
}
187195
},
188196
)
189197
.detach();
190-
Ok(driver_send)
198+
Ok(())
191199
}
192200
}
193201

@@ -214,6 +222,8 @@ struct SimpleVmbusClientDeviceTaskState {
214222
offer: Option<OfferInfo>,
215223
#[inspect(skip)]
216224
recv_relay: mesh::Receiver<InterceptChannelRequest>,
225+
#[inspect(skip)]
226+
send_disconnected: Option<mesh::OneshotSender<()>>,
217227
#[inspect(hex, with = "|x| x.as_ref().map(|x| inspect::iter_by_index(x.pfns()))")]
218228
vtl_pages: Option<MemoryBlock>,
219229
}
@@ -233,7 +243,13 @@ impl<T: SimpleVmbusClientDeviceAsync> AsyncRun<SimpleVmbusClientDeviceTaskState>
233243
stop: &mut StopTask<'_>,
234244
state: &mut SimpleVmbusClientDeviceTaskState,
235245
) -> Result<(), Cancelled> {
236-
stop.until_stopped(self.process_messages(state)).await
246+
stop.until_stopped(self.process_messages(state)).await?;
247+
state
248+
.send_disconnected
249+
.take()
250+
.expect("task should not be restarted")
251+
.send(());
252+
Ok(())
237253
}
238254
}
239255

@@ -350,21 +366,27 @@ impl<T: SimpleVmbusClientDeviceAsync> SimpleVmbusClientDeviceTask<T> {
350366
};
351367

352368
if state.vtl_pages.is_some() {
353-
if let Err(err) = offer
369+
match offer
354370
.request_send
355371
.call(
356372
ChannelRequest::TeardownGpadl,
357373
GpadlId(state.vtl_pages.as_ref().unwrap().pfns()[1] as u32),
358374
)
359375
.await
360376
{
361-
tracing::error!(
362-
error = &err as &dyn std::error::Error,
363-
"failed to teardown gpadl"
364-
);
377+
Ok(()) => {
378+
state.vtl_pages = None;
379+
}
380+
Err(err) => {
381+
// If the ring buffer pages are still in use by the host, which
382+
// has to be assumed, the memory pages cannot be used again as
383+
// they have been marked as visible to VTL0.
384+
tracing::error!(
385+
error = &err as &dyn std::error::Error,
386+
"Failed to teardown gpadl -- leaking memory."
387+
);
388+
}
365389
}
366-
367-
state.vtl_pages = None;
368390
}
369391
}
370392

@@ -503,7 +525,7 @@ impl<T: SimpleVmbusClientDeviceAsync> SimpleVmbusClientDeviceTask<T> {
503525

504526
/// Responds to the channel being revoked by the host.
505527
async fn handle_revoke(&mut self, state: &mut SimpleVmbusClientDeviceTaskState) {
506-
let Some(offer) = state.offer.take() else {
528+
let Some(offer) = state.offer.as_ref() else {
507529
return;
508530
};
509531
tracing::info!("device revoked");
@@ -512,6 +534,7 @@ impl<T: SimpleVmbusClientDeviceAsync> SimpleVmbusClientDeviceTask<T> {
512534
self.device.task_mut().0.close(offer.offer.subchannel_index);
513535
}
514536
self.cleanup_device_resources(state).await;
537+
drop(state.offer.take());
515538
}
516539

517540
fn handle_save(&mut self) -> SavedStateBlob {
@@ -545,27 +568,25 @@ impl<T: SimpleVmbusClientDeviceAsync> SimpleVmbusClientDeviceTask<T> {
545568
#[expect(clippy::large_enum_variant)]
546569
enum Event {
547570
Request(InterceptChannelRequest),
548-
Revoke(()),
571+
Revoke,
549572
}
550-
let revoke = pin!(async {
551-
if let Some(offer) = &mut state.offer {
552-
(&mut offer.revoke_recv).await.ok();
553-
} else {
554-
pending().await
555-
}
556-
});
557-
let Some(r) = (
558-
(&mut state.recv_relay).map(Event::Request),
559-
futures::stream::once(revoke).map(Event::Revoke),
560-
)
561-
.merge()
562-
.next()
563-
.await
564-
else {
573+
let r = if let Some(offer) = &mut state.offer {
574+
(
575+
(&mut state.recv_relay).map(Event::Request),
576+
futures::stream::once(&mut offer.revoke_recv).map(|_| Event::Revoke),
577+
)
578+
.merge()
579+
.next()
580+
.await
581+
} else {
582+
let mut recv_relay = pin!(&mut state.recv_relay);
583+
recv_relay.next().await.map(Event::Request)
584+
};
585+
let Some(r) = r else {
565586
break;
566587
};
567588
match r {
568-
Event::Revoke(()) => {
589+
Event::Revoke => {
569590
self.handle_revoke(state).await;
570591
}
571592
Event::Request(InterceptChannelRequest::Offer(offer)) => {

0 commit comments

Comments
 (0)