diff --git a/Cargo.lock b/Cargo.lock index e35a1ac1bca..5be56e480b3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -429,7 +429,7 @@ dependencies = [ "serde_json", "thiserror 2.0.12", "vmm", - "vmm-sys-util", + "vmm-sys-util 0.12.1", ] [[package]] @@ -602,7 +602,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90b16fe5161a1160c9c7cece9f7504f2412ef5e2c0643d1e322eccf37692a42b" dependencies = [ "libc", - "vmm-sys-util", + "vmm-sys-util 0.12.1", ] [[package]] @@ -625,7 +625,7 @@ dependencies = [ "userfaultfd", "utils", "vmm", - "vmm-sys-util", + "vmm-sys-util 0.12.1", ] [[package]] @@ -822,7 +822,7 @@ dependencies = [ "regex", "thiserror 2.0.12", "utils", - "vmm-sys-util", + "vmm-sys-util 0.12.1", ] [[package]] @@ -866,7 +866,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b13baf7bdfda2e10bcb109fcb099ef40cff82374eb6b7cdcf4695bdec4e522c" dependencies = [ "serde", - "vmm-sys-util", + "vmm-sys-util 0.12.1", "zerocopy 0.7.35", ] @@ -879,7 +879,7 @@ dependencies = [ "bitflags 2.9.0", "kvm-bindings", "libc", - "vmm-sys-util", + "vmm-sys-util 0.12.1", ] [[package]] @@ -979,7 +979,7 @@ version = "0.1.0" source = "git+https://github.com/firecracker-microvm/micro-http#4f621532e81ee2ad096a9c9592fdacc40d19de48" dependencies = [ "libc", - "vmm-sys-util", + "vmm-sys-util 0.12.1", ] [[package]] @@ -1211,7 +1211,7 @@ dependencies = [ "log-instrument", "thiserror 2.0.12", "utils", - "vmm-sys-util", + "vmm-sys-util 0.12.1", ] [[package]] @@ -1369,7 +1369,7 @@ dependencies = [ "thiserror 2.0.12", "utils", "vmm", - "vmm-sys-util", + "vmm-sys-util 0.12.1", ] [[package]] @@ -1620,7 +1620,7 @@ dependencies = [ "libc", "uuid", "vm-memory", - "vmm-sys-util", + "vmm-sys-util 0.12.1", ] [[package]] @@ -1639,6 +1639,14 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "vm-device" +version = "0.1.0" +dependencies = [ + "serde", + "vmm-sys-util 0.13.0", +] + [[package]] name = "vm-fdt" version = "0.3.0" @@ -1701,10 +1709,11 @@ dependencies = [ "utils", "vhost", "vm-allocator", + "vm-device", "vm-fdt", "vm-memory", "vm-superio", - "vmm-sys-util", + "vmm-sys-util 0.12.1", "zerocopy 0.8.25", ] @@ -1720,6 +1729,18 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "vmm-sys-util" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "945fecc32d9b44069437b7aacd2257556a91a2054ae10e9e7538fe498e442db9" +dependencies = [ + "bitflags 1.3.2", + "libc", + "serde", + "serde_derive", +] + [[package]] name = "walkdir" version = "2.5.0" @@ -1863,9 +1884,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.7.9" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9fb597c990f03753e08d3c29efbfcf2019a003b4bf4ba19225c158e1549f0f3" +checksum = "c06928c8748d81b05c9be96aad92e1b6ff01833332f281e8cfca3be4b35fc9ec" dependencies = [ "memchr", ] diff --git a/src/vm-device/Cargo.toml b/src/vm-device/Cargo.toml new file mode 100644 index 00000000000..a385b9f7eea --- /dev/null +++ b/src/vm-device/Cargo.toml @@ -0,0 +1,16 @@ +[package] +authors = ["The Cloud Hypervisor Authors"] +edition = "2021" +name = "vm-device" +version = "0.1.0" +license = "Apache-2.0 AND BSD-3-Clause" + +[lib] +bench = false + +[features] +default = [] + +[dependencies] +serde = { version = "1.0.208", features = ["derive", "rc"] } +vmm-sys-util = { version = "0.13.0", features = ["with-serde"] } diff --git a/src/vm-device/src/bus.rs b/src/vm-device/src/bus.rs new file mode 100644 index 00000000000..31880d354bb --- /dev/null +++ b/src/vm-device/src/bus.rs @@ -0,0 +1,407 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE-BSD-3-Clause file. + +//! Handles routing to devices in an address space. + +use std::cmp::Ordering; +use std::collections::btree_map::BTreeMap; +use std::sync::{Arc, Barrier, Mutex, RwLock, Weak}; +use std::{convert, error, fmt, io, result}; + +/// Trait for devices that respond to reads or writes in an arbitrary address space. +/// +/// The device does not care where it exists in address space as each method is only given an offset +/// into its allocated portion of address space. +#[allow(unused_variables)] +pub trait BusDevice: Send { + /// Reads at `offset` from this device + fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {} + /// Writes at `offset` into this device + fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option> { + None + } +} + +#[allow(unused_variables)] +pub trait BusDeviceSync: Send + Sync { + /// Reads at `offset` from this device + fn read(&self, base: u64, offset: u64, data: &mut [u8]) {} + /// Writes at `offset` into this device + fn write(&self, base: u64, offset: u64, data: &[u8]) -> Option> { + None + } +} + +impl BusDeviceSync for Mutex { + /// Reads at `offset` from this device + fn read(&self, base: u64, offset: u64, data: &mut [u8]) { + self.lock() + .expect("Failed to acquire device lock") + .read(base, offset, data) + } + /// Writes at `offset` into this device + fn write(&self, base: u64, offset: u64, data: &[u8]) -> Option> { + self.lock() + .expect("Failed to acquire device lock") + .write(base, offset, data) + } +} + +#[derive(Debug)] +pub enum Error { + /// The insertion failed because the new device overlapped with an old device. + Overlap, + /// Failed to operate on zero sized range. + ZeroSizedRange, + /// Failed to find address range. + MissingAddressRange, +} + +pub type Result = result::Result; + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "bus_error: {self:?}") + } +} + +impl error::Error for Error {} + +impl convert::From for io::Error { + fn from(e: Error) -> Self { + io::Error::other(e) + } +} + +/// Holds a base and length representing the address space occupied by a `BusDevice`. +/// +/// * base - The address at which the range start. +/// * len - The length of the range in bytes. +#[derive(Debug, Copy, Clone)] +pub struct BusRange { + pub base: u64, + pub len: u64, +} + +impl BusRange { + /// Returns true if there is overlap with the given range. + pub fn overlaps(&self, base: u64, len: u64) -> bool { + self.base < (base + len) && base < self.base + self.len + } +} + +impl Eq for BusRange {} + +impl PartialEq for BusRange { + fn eq(&self, other: &BusRange) -> bool { + self.base == other.base + } +} + +impl Ord for BusRange { + fn cmp(&self, other: &BusRange) -> Ordering { + self.base.cmp(&other.base) + } +} + +impl PartialOrd for BusRange { + fn partial_cmp(&self, other: &BusRange) -> Option { + Some(self.cmp(other)) + } +} + +/// A device container for routing reads and writes over some address space. +/// +/// This doesn't have any restrictions on what kind of device or address space this applies to. The +/// only restriction is that no two devices can overlap in this address space. +#[derive(Default, Debug)] +pub struct Bus { + devices: RwLock>>, +} + +impl Bus { + /// Constructs an a bus with an empty address space. + pub fn new() -> Bus { + Bus { + devices: RwLock::new(BTreeMap::new()), + } + } + + fn first_before(&self, addr: u64) -> Option<(BusRange, Arc)> { + let devices = self.devices.read().unwrap(); + let (range, dev) = devices + .range(..=BusRange { base: addr, len: 1 }) + .next_back()?; + dev.upgrade().map(|d| (*range, d.clone())) + } + + #[allow(clippy::type_complexity)] + pub fn resolve(&self, addr: u64) -> Option<(u64, u64, Arc)> { + if let Some((range, dev)) = self.first_before(addr) { + let offset = addr - range.base; + if offset < range.len { + return Some((range.base, offset, dev)); + } + } + None + } + + pub fn insert(&self, device: Arc, base: u64, len: u64) -> Result<()> { + if len == 0 { + return Err(Error::ZeroSizedRange); + } + + // Reject all cases where the new device's range overlaps with an existing device. + if self + .devices + .read() + .unwrap() + .iter() + .any(|(range, _dev)| range.overlaps(base, len)) + { + return Err(Error::Overlap); + } + + if self + .devices + .write() + .unwrap() + .insert(BusRange { base, len }, Arc::downgrade(&device)) + .is_some() + { + return Err(Error::Overlap); + } + + Ok(()) + } + + /// Removes the device at the given address space range. + pub fn remove(&self, base: u64, len: u64) -> Result<()> { + if len == 0 { + return Err(Error::ZeroSizedRange); + } + + let bus_range = BusRange { base, len }; + + if self.devices.write().unwrap().remove(&bus_range).is_none() { + return Err(Error::MissingAddressRange); + } + + Ok(()) + } + + /// Removes all entries referencing the given device. + pub fn remove_by_device(&self, device: &Arc) -> Result<()> { + let mut device_list = self.devices.write().unwrap(); + let mut remove_key_list = Vec::new(); + + for (key, value) in device_list.iter() { + if Arc::ptr_eq(&value.upgrade().unwrap(), device) { + remove_key_list.push(*key); + } + } + + for key in remove_key_list.iter() { + device_list.remove(key); + } + + Ok(()) + } + + /// Updates the address range for an existing device. + pub fn update_range( + &self, + old_base: u64, + old_len: u64, + new_base: u64, + new_len: u64, + ) -> Result<()> { + // Retrieve the device corresponding to the range + let device = if let Some((_, _, dev)) = self.resolve(old_base) { + dev.clone() + } else { + return Err(Error::MissingAddressRange); + }; + + // Remove the old address range + self.remove(old_base, old_len)?; + + // Insert the new address range + self.insert(device, new_base, new_len) + } + + /// Reads data from the device that owns the range containing `addr` and puts it into `data`. + /// + /// Returns true on success, otherwise `data` is untouched. + pub fn read(&self, addr: u64, data: &mut [u8]) -> Result<()> { + if let Some((base, offset, dev)) = self.resolve(addr) { + // OK to unwrap as lock() failing is a serious error condition and should panic. + dev.read(base, offset, data); + Ok(()) + } else { + Err(Error::MissingAddressRange) + } + } + + /// Writes `data` to the device that owns the range containing `addr`. + /// + /// Returns true on success, otherwise `data` is untouched. + pub fn write(&self, addr: u64, data: &[u8]) -> Result>> { + if let Some((base, offset, dev)) = self.resolve(addr) { + // OK to unwrap as lock() failing is a serious error condition and should panic. + Ok(dev.write(base, offset, data)) + } else { + Err(Error::MissingAddressRange) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + struct DummyDevice; + impl BusDeviceSync for DummyDevice {} + + struct ConstantDevice; + impl BusDeviceSync for ConstantDevice { + fn read(&self, _base: u64, offset: u64, data: &mut [u8]) { + for (i, v) in data.iter_mut().enumerate() { + *v = (offset as u8) + (i as u8); + } + } + + fn write(&self, _base: u64, offset: u64, data: &[u8]) -> Option> { + for (i, v) in data.iter().enumerate() { + assert_eq!(*v, (offset as u8) + (i as u8)) + } + + None + } + } + + #[test] + fn bus_insert() { + let bus = Bus::new(); + let dummy = Arc::new(DummyDevice); + bus.insert(dummy.clone(), 0x10, 0).unwrap_err(); + bus.insert(dummy.clone(), 0x10, 0x10).unwrap(); + + let result = bus.insert(dummy.clone(), 0x0f, 0x10); + assert_eq!(format!("{result:?}"), "Err(Overlap)"); + + bus.insert(dummy.clone(), 0x10, 0x10).unwrap_err(); + bus.insert(dummy.clone(), 0x10, 0x15).unwrap_err(); + bus.insert(dummy.clone(), 0x12, 0x15).unwrap_err(); + bus.insert(dummy.clone(), 0x12, 0x01).unwrap_err(); + bus.insert(dummy.clone(), 0x0, 0x20).unwrap_err(); + bus.insert(dummy.clone(), 0x20, 0x05).unwrap(); + bus.insert(dummy.clone(), 0x25, 0x05).unwrap(); + bus.insert(dummy, 0x0, 0x10).unwrap(); + } + + #[test] + fn bus_remove() { + let bus = Bus::new(); + let dummy: Arc = Arc::new(DummyDevice); + + bus.remove(0x42, 0x0).unwrap_err(); + + bus.remove(0x13, 0x12).unwrap_err(); + + bus.insert(dummy.clone(), 0x13, 0x12).unwrap(); + bus.remove(0x42, 0x42).unwrap_err(); + bus.remove(0x13, 0x12).unwrap(); + + bus.insert(dummy.clone(), 0x16, 0x1).unwrap(); + bus.remove_by_device(&dummy).unwrap(); + bus.remove(0x16, 0x1).unwrap_err(); + } + + #[test] + #[allow(clippy::redundant_clone)] + fn bus_read_write() { + let bus = Bus::new(); + let dummy = Arc::new(DummyDevice); + bus.insert(dummy.clone(), 0x10, 0x10).unwrap(); + bus.read(0x10, &mut [0, 0, 0, 0]).unwrap(); + bus.write(0x10, &[0, 0, 0, 0]).unwrap(); + bus.read(0x11, &mut [0, 0, 0, 0]).unwrap(); + bus.write(0x11, &[0, 0, 0, 0]).unwrap(); + bus.read(0x16, &mut [0, 0, 0, 0]).unwrap(); + bus.write(0x16, &[0, 0, 0, 0]).unwrap(); + bus.read(0x20, &mut [0, 0, 0, 0]).unwrap_err(); + bus.write(0x20, &[0, 0, 0, 0]).unwrap_err(); + bus.read(0x06, &mut [0, 0, 0, 0]).unwrap_err(); + bus.write(0x06, &[0, 0, 0, 0]).unwrap_err(); + } + + #[test] + #[allow(clippy::redundant_clone)] + fn bus_read_write_values() { + let bus = Bus::new(); + let dummy = Arc::new(ConstantDevice); + bus.insert(dummy.clone(), 0x10, 0x10).unwrap(); + + let mut values = [0, 1, 2, 3]; + bus.read(0x10, &mut values).unwrap(); + assert_eq!(values, [0, 1, 2, 3]); + bus.write(0x10, &values).unwrap(); + bus.read(0x15, &mut values).unwrap(); + assert_eq!(values, [5, 6, 7, 8]); + bus.write(0x15, &values).unwrap(); + } + + #[test] + #[allow(clippy::redundant_clone)] + fn busrange_cmp() { + let range = BusRange { base: 0x10, len: 2 }; + assert_eq!(range, BusRange { base: 0x10, len: 3 }); + assert_eq!(range, BusRange { base: 0x10, len: 2 }); + + assert!(range < BusRange { base: 0x12, len: 1 }); + assert!(range < BusRange { base: 0x12, len: 3 }); + + assert_eq!(range, range.clone()); + + let bus = Bus::new(); + let mut data = [1, 2, 3, 4]; + let device = Arc::new(DummyDevice); + bus.insert(device.clone(), 0x10, 0x10).unwrap(); + bus.write(0x10, &data).unwrap(); + bus.read(0x10, &mut data).unwrap(); + assert_eq!(data, [1, 2, 3, 4]); + } + + #[test] + fn bus_range_overlap() { + let a = BusRange { + base: 0x1000, + len: 0x400, + }; + assert!(a.overlaps(0x1000, 0x400)); + assert!(a.overlaps(0xf00, 0x400)); + assert!(a.overlaps(0x1000, 0x01)); + assert!(a.overlaps(0xfff, 0x02)); + assert!(a.overlaps(0x1100, 0x100)); + assert!(a.overlaps(0x13ff, 0x100)); + assert!(!a.overlaps(0x1400, 0x100)); + assert!(!a.overlaps(0xf00, 0x100)); + } + + #[test] + fn bus_update_range() { + let bus = Bus::new(); + let dummy = Arc::new(DummyDevice); + + bus.update_range(0x13, 0x12, 0x16, 0x1).unwrap_err(); + bus.insert(dummy.clone(), 0x13, 12).unwrap(); + + bus.update_range(0x16, 0x1, 0x13, 0x12).unwrap_err(); + bus.update_range(0x13, 0x12, 0x16, 0x1).unwrap(); + } +} diff --git a/src/vm-device/src/dma_mapping/mod.rs b/src/vm-device/src/dma_mapping/mod.rs new file mode 100644 index 00000000000..6cba6e16488 --- /dev/null +++ b/src/vm-device/src/dma_mapping/mod.rs @@ -0,0 +1,18 @@ +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// Copyright © 2021 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause + +/// Trait to trigger DMA mapping updates for devices managed by virtio-iommu +/// +/// Trait meant for triggering the DMA mapping update related to an external +/// device not managed fully through virtio. It is dedicated to virtio-iommu +/// in order to trigger the map update anytime the mapping is updated from the +/// guest. +pub trait ExternalDmaMapping: Send + Sync { + /// Map a memory range + fn map(&self, iova: u64, gpa: u64, size: u64) -> std::result::Result<(), std::io::Error>; + + /// Unmap a memory range + fn unmap(&self, iova: u64, size: u64) -> std::result::Result<(), std::io::Error>; +} diff --git a/src/vm-device/src/interrupt/mod.rs b/src/vm-device/src/interrupt/mod.rs new file mode 100644 index 00000000000..f4aec52a2e0 --- /dev/null +++ b/src/vm-device/src/interrupt/mod.rs @@ -0,0 +1,194 @@ +// Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// Copyright (C) 2019 Alibaba Cloud. All rights reserved. +// Copyright © 2019 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause + +//! Traits and Structs to manage interrupt sources for devices. +//! +//! In system programming, an interrupt is a signal to the processor emitted by hardware or +//! software indicating an event that needs immediate attention. An interrupt alerts the processor +//! to a high-priority condition requiring the interruption of the current code the processor is +//! executing. The processor responds by suspending its current activities, saving its state, and +//! executing a function called an interrupt handler (or an interrupt service routine, ISR) to deal +//! with the event. This interruption is temporary, and, after the interrupt handler finishes, +//! unless handling the interrupt has emitted a fatal error, the processor resumes normal +//! activities. +//! +//! Hardware interrupts are used by devices to communicate that they require attention from the +//! operating system, or a bare-metal program running on the CPU if there are no OSes. The act of +//! initiating a hardware interrupt is referred to as an interrupt request (IRQ). Different devices +//! are usually associated with different interrupts using a unique value associated with each +//! interrupt. This makes it possible to know which hardware device caused which interrupts. +//! These interrupt values are often called IRQ lines, or just interrupt lines. +//! +//! Nowadays, IRQ lines is not the only mechanism to deliver device interrupts to processors. +//! MSI [(Message Signaled Interrupt)](https://en.wikipedia.org/wiki/Message_Signaled_Interrupts) +//! is another commonly used alternative in-band method of signaling an interrupt, using special +//! in-band messages to replace traditional out-of-band assertion of dedicated interrupt lines. +//! While more complex to implement in a device, message signaled interrupts have some significant +//! advantages over pin-based out-of-band interrupt signaling. Message signaled interrupts are +//! supported in PCI bus since its version 2.2, and in later available PCI Express bus. Some +//! non-PCI architectures also use message signaled interrupts. +//! +//! While IRQ is a term commonly used by Operating Systems when dealing with hardware +//! interrupts, the IRQ numbers managed by OSes are independent of the ones managed by VMM. +//! For simplicity sake, the term `Interrupt Source` is used instead of IRQ to represent both +//! pin-based interrupts and MSI interrupts. +//! +//! A device may support multiple types of interrupts, and each type of interrupt may support one +//! or multiple interrupt sources. For example, a PCI device may support: +//! * Legacy Irq: exactly one interrupt source. +//! * PCI MSI Irq: 1,2,4,8,16,32 interrupt sources. +//! * PCI MSIx Irq: 2^n(n=0-11) interrupt sources. +//! +//! A distinct Interrupt Source Identifier (ISID) will be assigned to each interrupt source. +//! An ID allocator will be used to allocate and free Interrupt Source Identifiers for devices. +//! To decouple the vm-device crate from the ID allocator, the vm-device crate doesn't take the +//! responsibility to allocate/free Interrupt Source IDs but only makes use of assigned IDs. +//! +//! The overall flow to deal with interrupts is: +//! * The VMM creates an interrupt manager +//! * The VMM creates a device manager, passing on an reference to the interrupt manager +//! * The device manager passes on an reference to the interrupt manager to all registered devices +//! * The guest kernel loads drivers for virtual devices +//! * The guest device driver determines the type and number of interrupts needed, and update the +//! device configuration +//! * The virtual device backend requests the interrupt manager to create an interrupt group +//! according to guest configuration information + +use std::sync::Arc; + +use vmm_sys_util::eventfd::EventFd; + +/// Reuse std::io::Result to simplify interoperability among crates. +pub type Result = std::io::Result; + +/// Data type to store an interrupt source identifier. +pub type InterruptIndex = u32; + +/// Configuration data for legacy interrupts. +/// +/// On x86 platforms, legacy interrupts means those interrupts routed through PICs or IOAPICs. +#[derive(Copy, Clone, Debug)] +pub struct LegacyIrqSourceConfig { + pub irqchip: u32, + pub pin: u32, +} + +/// Configuration data for MSI/MSI-X interrupts. +/// +/// On x86 platforms, these interrupts are vectors delivered directly to the LAPIC. +#[derive(Copy, Clone, Debug, Default)] +pub struct MsiIrqSourceConfig { + /// High address to delivery message signaled interrupt. + pub high_addr: u32, + /// Low address to delivery message signaled interrupt. + pub low_addr: u32, + /// Data to write to delivery message signaled interrupt. + pub data: u32, + /// Unique ID of the device to delivery message signaled interrupt. + pub devid: u32, +} + +/// Configuration data for an interrupt source. +#[derive(Copy, Clone, Debug)] +pub enum InterruptSourceConfig { + /// Configuration data for Legacy interrupts. + LegacyIrq(LegacyIrqSourceConfig), + /// Configuration data for PciMsi, PciMsix and generic MSI interrupts. + MsiIrq(MsiIrqSourceConfig), +} + +/// Configuration data for legacy, pin based interrupt groups. +/// +/// A legacy interrupt group only takes one irq number as its configuration. +#[derive(Copy, Clone, Debug)] +pub struct LegacyIrqGroupConfig { + /// Legacy irq number. + pub irq: InterruptIndex, +} + +/// Configuration data for MSI/MSI-X interrupt groups +/// +/// MSI/MSI-X interrupt groups are basically a set of vectors. +#[derive(Copy, Clone, Debug)] +pub struct MsiIrqGroupConfig { + /// First index of the MSI/MSI-X interrupt vectors + pub base: InterruptIndex, + /// Number of vectors in the MSI/MSI-X group. + pub count: InterruptIndex, +} + +/// Trait to manage interrupt sources for virtual device backends. +/// +/// The InterruptManager implementations should protect itself from concurrent accesses internally, +/// so it could be invoked from multi-threaded context. +pub trait InterruptManager: Send + Sync { + type GroupConfig; + + /// Create an [InterruptSourceGroup](trait.InterruptSourceGroup.html) object to manage + /// interrupt sources for a virtual device + /// + /// An [InterruptSourceGroup](trait.InterruptSourceGroup.html) object manages all interrupt + /// sources of the same type for a virtual device. + /// + /// # Arguments + /// * interrupt_type: type of interrupt source. + /// * base: base Interrupt Source ID to be managed by the group object. + /// * count: number of Interrupt Sources to be managed by the group object. + fn create_group(&self, config: Self::GroupConfig) -> Result>; + + /// Destroy an [InterruptSourceGroup](trait.InterruptSourceGroup.html) object created by + /// [create_group()](trait.InterruptManager.html#tymethod.create_group). + /// + /// Assume the caller takes the responsibility to disable all interrupt sources of the group + /// before calling destroy_group(). This assumption helps to simplify InterruptSourceGroup + /// implementations. + fn destroy_group(&self, group: Arc) -> Result<()>; +} + +pub trait InterruptSourceGroup: Send + Sync { + /// Enable the interrupt sources in the group to generate interrupts. + fn enable(&self) -> Result<()> { + // Not all interrupt sources can be enabled. + // To accommodate this, we can have a no-op here. + Ok(()) + } + + /// Disable the interrupt sources in the group to generate interrupts. + fn disable(&self) -> Result<()> { + // Not all interrupt sources can be disabled. + // To accommodate this, we can have a no-op here. + Ok(()) + } + + /// Inject an interrupt from this interrupt source into the guest. + fn trigger(&self, index: InterruptIndex) -> Result<()>; + + /// Returns an interrupt notifier from this interrupt. + /// + /// An interrupt notifier allows for external components and processes + /// to inject interrupts into a guest, by writing to the file returned + /// by this method. + #[allow(unused_variables)] + fn notifier(&self, index: InterruptIndex) -> Option; + + /// Update the interrupt source group configuration. + /// + /// # Arguments + /// * index: sub-index into the group. + /// * config: configuration data for the interrupt source. + /// * masked: if the interrupt is masked + /// * set_gsi: whether update the GSI routing table. + fn update( + &self, + index: InterruptIndex, + config: InterruptSourceConfig, + masked: bool, + set_gsi: bool, + ) -> Result<()>; + + /// Set the interrupt group GSI routing table. + fn set_gsi(&self) -> Result<()>; +} diff --git a/src/vm-device/src/lib.rs b/src/vm-device/src/lib.rs new file mode 100644 index 00000000000..fe06fd8b465 --- /dev/null +++ b/src/vm-device/src/lib.rs @@ -0,0 +1,63 @@ +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Copyright © 2020 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +use serde::{Deserialize, Serialize}; + +mod bus; +pub mod dma_mapping; +pub mod interrupt; + +pub use self::bus::{Bus, BusDevice, BusDeviceSync, Error as BusError}; + +/// Type of Message Signalled Interrupt +#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub enum MsiIrqType { + /// PCI MSI IRQ numbers. + PciMsi, + /// PCI MSIx IRQ numbers. + PciMsix, + /// Generic MSI IRQ numbers. + GenericMsi, +} + +#[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Debug)] +pub enum PciBarType { + Io, + Mmio32, + Mmio64, +} + +/// Enumeration for device resources. +#[allow(missing_docs)] +#[derive(Clone, Debug, Serialize, Deserialize)] +pub enum Resource { + /// IO Port address range. + PioAddressRange { base: u16, size: u16 }, + /// Memory Mapped IO address range. + MmioAddressRange { base: u64, size: u64 }, + /// PCI BAR + PciBar { + index: usize, + base: u64, + size: u64, + type_: PciBarType, + prefetchable: bool, + }, + /// Legacy IRQ number. + LegacyIrq(u32), + /// Message Signaled Interrupt + MsiIrq { + ty: MsiIrqType, + base: u32, + size: u32, + }, + /// Network Interface Card MAC address. + MacAddress(String), + /// KVM memslot index. + KvmMemSlot(u32), +} diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 86ba95e7768..c335c5f2fea 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -10,14 +10,17 @@ bench = false [dependencies] acpi_tables = { path = "../acpi-tables" } -aes-gcm = { version = "0.10.1", default-features = false, features = ["aes"] } +aes-gcm = { version = "0.10.1", default-features = false, features = ["aes"] } arrayvec = { version = "0.7.6", optional = true } aws-lc-rs = { version = "1.13.0", features = ["bindgen"] } base64 = "0.22.1" bincode = { version = "2.0.1", features = ["serde"] } bitflags = "2.9.0" crc64 = "2.0.0" -derive_more = { version = "2.0.1", default-features = false, features = ["from", "display"] } +derive_more = { version = "2.0.1", default-features = false, features = [ + "from", + "display", +] } displaydoc = "0.2.5" event-manager = "0.4.0" gdbstub = { version = "0.7.5", optional = true } @@ -40,7 +43,11 @@ userfaultfd = "0.8.1" utils = { path = "../utils" } vhost = { version = "0.13.0", features = ["vhost-user-frontend"] } vm-allocator = "0.1.0" -vm-memory = { version = "0.16.1", features = ["backend-mmap", "backend-bitmap"] } +vm-device = { path = "../vm-device" } +vm-memory = { version = "0.16.1", features = [ + "backend-mmap", + "backend-bitmap", +] } vm-superio = "0.8.0" vmm-sys-util = { version = "0.12.1", features = ["with-serde"] } zerocopy = { version = "0.8.25" } diff --git a/src/vmm/src/acpi/mod.rs b/src/vmm/src/acpi/mod.rs index 0b5c5edcbde..542e53409b7 100644 --- a/src/vmm/src/acpi/mod.rs +++ b/src/vmm/src/acpi/mod.rs @@ -10,8 +10,7 @@ use crate::Vcpu; use crate::acpi::x86_64::{ apic_addr, rsdp_addr, setup_arch_dsdt, setup_arch_fadt, setup_interrupt_controllers, }; -use crate::device_manager::acpi::ACPIDeviceManager; -use crate::device_manager::mmio::MMIODeviceManager; +use crate::device_manager::DeviceManager; use crate::device_manager::resources::ResourceAllocator; use crate::vstate::memory::{GuestAddress, GuestMemoryMmap}; @@ -45,7 +44,6 @@ pub enum AcpiError { /// allocator for allocating space for the tables struct AcpiTableWriter<'a> { mem: &'a GuestMemoryMmap, - resource_allocator: &'a mut ResourceAllocator, } impl AcpiTableWriter<'_> { @@ -53,11 +51,15 @@ impl AcpiTableWriter<'_> { /// /// This will allocate enough space inside guest memory and write the table in the allocated /// buffer. It returns the address in which it wrote the table. - fn write_acpi_table(&mut self, table: &mut S) -> Result + fn write_acpi_table( + &mut self, + resource_allocator: &mut ResourceAllocator, + table: &mut S, + ) -> Result where S: Sdt, { - let addr = self.resource_allocator.allocate_system_memory( + let addr = resource_allocator.allocate_system_memory( table.len().try_into().unwrap(), 1, AllocPolicy::FirstMatch, @@ -77,30 +79,32 @@ impl AcpiTableWriter<'_> { } /// Build the DSDT table for the guest - fn build_dsdt( - &mut self, - mmio_device_manager: &MMIODeviceManager, - acpi_device_manager: &ACPIDeviceManager, - ) -> Result { + fn build_dsdt(&mut self, device_manager: &mut DeviceManager) -> Result { let mut dsdt_data = Vec::new(); // Virtio-devices DSDT data - dsdt_data.extend_from_slice(&mmio_device_manager.dsdt_data); + dsdt_data.extend_from_slice(&device_manager.mmio_devices.dsdt_data); // Add GED and VMGenID AML data. - acpi_device_manager.append_aml_bytes(&mut dsdt_data)?; + device_manager + .acpi_devices + .append_aml_bytes(&mut dsdt_data)?; // Architecture specific DSDT data setup_arch_dsdt(&mut dsdt_data)?; let mut dsdt = Dsdt::new(OEM_ID, *b"FCVMDSDT", OEM_REVISION, dsdt_data); - self.write_acpi_table(&mut dsdt) + self.write_acpi_table(&mut device_manager.resource_allocator, &mut dsdt) } /// Build the FADT table for the guest /// /// This includes a pointer with the location of the DSDT in guest memory - fn build_fadt(&mut self, dsdt_addr: u64) -> Result { + fn build_fadt( + &mut self, + resource_allocator: &mut ResourceAllocator, + dsdt_addr: u64, + ) -> Result { let mut fadt = Fadt::new(OEM_ID, *b"FCVMFADT", OEM_REVISION); fadt.set_hypervisor_vendor_id(HYPERVISOR_VENDOR_ID); fadt.set_x_dsdt(dsdt_addr); @@ -108,13 +112,17 @@ impl AcpiTableWriter<'_> { (1 << FADT_F_HW_REDUCED_ACPI) | (1 << FADT_F_PWR_BUTTON) | (1 << FADT_F_SLP_BUTTON), ); setup_arch_fadt(&mut fadt); - self.write_acpi_table(&mut fadt) + self.write_acpi_table(resource_allocator, &mut fadt) } /// Build the MADT table for the guest /// /// This includes information about the interrupt controllers supported in the platform - fn build_madt(&mut self, nr_vcpus: u8) -> Result { + fn build_madt( + &mut self, + resource_allocator: &mut ResourceAllocator, + nr_vcpus: u8, + ) -> Result { let mut madt = Madt::new( OEM_ID, *b"FCVMMADT", @@ -122,20 +130,25 @@ impl AcpiTableWriter<'_> { apic_addr(), setup_interrupt_controllers(nr_vcpus), ); - self.write_acpi_table(&mut madt) + self.write_acpi_table(resource_allocator, &mut madt) } /// Build the XSDT table for the guest /// /// Currently, we pass to the guest just FADT and MADT tables. - fn build_xsdt(&mut self, fadt_addr: u64, madt_addr: u64) -> Result { + fn build_xsdt( + &mut self, + resource_allocator: &mut ResourceAllocator, + fadt_addr: u64, + madt_addr: u64, + ) -> Result { let mut xsdt = Xsdt::new( OEM_ID, *b"FCMVXSDT", OEM_REVISION, vec![fadt_addr, madt_addr], ); - self.write_acpi_table(&mut xsdt) + self.write_acpi_table(resource_allocator, &mut xsdt) } /// Build the RSDP pointer for the guest. @@ -163,20 +176,19 @@ impl AcpiTableWriter<'_> { /// such as interrupt controllers, vCPUs and VirtIO devices. pub(crate) fn create_acpi_tables( mem: &GuestMemoryMmap, - resource_allocator: &mut ResourceAllocator, - mmio_device_manager: &MMIODeviceManager, - acpi_device_manager: &ACPIDeviceManager, + device_manager: &mut DeviceManager, vcpus: &[Vcpu], ) -> Result<(), AcpiError> { - let mut writer = AcpiTableWriter { - mem, - resource_allocator, - }; - - let dsdt_addr = writer.build_dsdt(mmio_device_manager, acpi_device_manager)?; - let fadt_addr = writer.build_fadt(dsdt_addr)?; - let madt_addr = writer.build_madt(vcpus.len().try_into().unwrap())?; - let xsdt_addr = writer.build_xsdt(fadt_addr, madt_addr)?; + let mut writer = AcpiTableWriter { mem }; + + let dsdt_addr = writer.build_dsdt(device_manager)?; + let fadt_addr = writer.build_fadt(&mut device_manager.resource_allocator, dsdt_addr)?; + let madt_addr = writer.build_madt( + &mut device_manager.resource_allocator, + vcpus.len().try_into().unwrap(), + )?; + let xsdt_addr = + writer.build_xsdt(&mut device_manager.resource_allocator, fadt_addr, madt_addr)?; writer.build_rsdp(xsdt_addr) } @@ -218,17 +230,20 @@ mod tests { let mut vmm = default_vmm(); let mut writer = AcpiTableWriter { mem: vmm.vm.guest_memory(), - resource_allocator: &mut vmm.resource_allocator, }; // This should succeed let mut sdt = MockSdt(vec![0; 4096]); - let addr = writer.write_acpi_table(&mut sdt).unwrap(); + let addr = writer + .write_acpi_table(&mut vmm.device_manager.resource_allocator, &mut sdt) + .unwrap(); assert_eq!(addr, SYSTEM_MEM_START); // Let's try to write two 4K pages plus one byte let mut sdt = MockSdt(vec![0; usize::try_from(SYSTEM_MEM_SIZE + 1).unwrap()]); - let err = writer.write_acpi_table(&mut sdt).unwrap_err(); + let err = writer + .write_acpi_table(&mut vmm.device_manager.resource_allocator, &mut sdt) + .unwrap_err(); assert!( matches!( err, @@ -241,19 +256,29 @@ mod tests { // We are allocating memory for tables with alignment of 1 byte. All of these should // succeed. let mut sdt = MockSdt(vec![0; 5]); - let addr = writer.write_acpi_table(&mut sdt).unwrap(); + let addr = writer + .write_acpi_table(&mut vmm.device_manager.resource_allocator, &mut sdt) + .unwrap(); assert_eq!(addr, SYSTEM_MEM_START + 4096); let mut sdt = MockSdt(vec![0; 2]); - let addr = writer.write_acpi_table(&mut sdt).unwrap(); + let addr = writer + .write_acpi_table(&mut vmm.device_manager.resource_allocator, &mut sdt) + .unwrap(); assert_eq!(addr, SYSTEM_MEM_START + 4101); let mut sdt = MockSdt(vec![0; 4]); - let addr = writer.write_acpi_table(&mut sdt).unwrap(); + let addr = writer + .write_acpi_table(&mut vmm.device_manager.resource_allocator, &mut sdt) + .unwrap(); assert_eq!(addr, SYSTEM_MEM_START + 4103); let mut sdt = MockSdt(vec![0; 8]); - let addr = writer.write_acpi_table(&mut sdt).unwrap(); + let addr = writer + .write_acpi_table(&mut vmm.device_manager.resource_allocator, &mut sdt) + .unwrap(); assert_eq!(addr, SYSTEM_MEM_START + 4107); let mut sdt = MockSdt(vec![0; 16]); - let addr = writer.write_acpi_table(&mut sdt).unwrap(); + let addr = writer + .write_acpi_table(&mut vmm.device_manager.resource_allocator, &mut sdt) + .unwrap(); assert_eq!(addr, SYSTEM_MEM_START + 4115); } @@ -268,11 +293,13 @@ mod tests { let (_, vm) = setup_vm_with_memory(u64_to_usize(SYSTEM_MEM_START + SYSTEM_MEM_SIZE - 4096)); let mut writer = AcpiTableWriter { mem: vm.guest_memory(), - resource_allocator: &mut ResourceAllocator::new().unwrap(), }; + let mut resource_allocator = ResourceAllocator::new().unwrap(); let mut sdt = MockSdt(vec![0; usize::try_from(SYSTEM_MEM_SIZE).unwrap()]); - let err = writer.write_acpi_table(&mut sdt).unwrap_err(); + let err = writer + .write_acpi_table(&mut resource_allocator, &mut sdt) + .unwrap_err(); assert!( matches!( err, diff --git a/src/vmm/src/arch/aarch64/fdt.rs b/src/vmm/src/arch/aarch64/fdt.rs index 61200cb2148..be53ef6993d 100644 --- a/src/vmm/src/arch/aarch64/fdt.rs +++ b/src/vmm/src/arch/aarch64/fdt.rs @@ -5,16 +5,15 @@ // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. -use std::collections::HashMap; use std::ffi::CString; use std::fmt::Debug; use vm_fdt::{Error as VmFdtError, FdtWriter, FdtWriterNode}; use vm_memory::GuestMemoryError; -use super::super::DeviceType; use super::cache_info::{CacheEntry, read_cache_config}; use super::gic::GICDevice; +use crate::device_manager::DeviceManager; use crate::device_manager::mmio::MMIODeviceInfo; use crate::devices::acpi::vmgenid::{VMGENID_MEM_SIZE, VmGenId}; use crate::initrd::InitrdConfig; @@ -55,14 +54,14 @@ pub enum FdtError { WriteFdtToMemory(#[from] GuestMemoryError), } +#[allow(clippy::too_many_arguments)] /// Creates the flattened device tree for this aarch64 microVM. pub fn create_fdt( guest_mem: &GuestMemoryMmap, vcpu_mpidr: Vec, cmdline: CString, - device_info: &HashMap<(DeviceType, String), MMIODeviceInfo>, + device_manager: &DeviceManager, gic_device: &GICDevice, - vmgenid: &Option, initrd: &Option, ) -> Result, FdtError> { // Allocate stuff necessary for storing the blob. @@ -89,8 +88,8 @@ pub fn create_fdt( create_timer_node(&mut fdt_writer)?; create_clock_node(&mut fdt_writer)?; create_psci_node(&mut fdt_writer)?; - create_devices_node(&mut fdt_writer, device_info)?; - create_vmgenid_node(&mut fdt_writer, vmgenid)?; + create_devices_node(&mut fdt_writer, device_manager)?; + create_vmgenid_node(&mut fdt_writer, &device_manager.acpi_devices.vmgenid)?; // End Header node. fdt_writer.end_node(root)?; @@ -411,25 +410,21 @@ fn create_rtc_node(fdt: &mut FdtWriter, dev_info: &MMIODeviceInfo) -> Result<(), fn create_devices_node( fdt: &mut FdtWriter, - dev_info: &HashMap<(DeviceType, String), MMIODeviceInfo>, + device_manager: &DeviceManager, ) -> Result<(), FdtError> { - // Create one temp Vec to store all virtio devices - let mut ordered_virtio_device: Vec<&MMIODeviceInfo> = Vec::new(); - - for ((device_type, _device_id), info) in dev_info { - match device_type { - DeviceType::BootTimer => (), // since it's not a real device - DeviceType::Rtc => create_rtc_node(fdt, info)?, - DeviceType::Serial => create_serial_node(fdt, info)?, - DeviceType::Virtio(_) => { - ordered_virtio_device.push(info); - } - } + if let Some(rtc_info) = device_manager.mmio_devices.rtc_device_info() { + create_rtc_node(fdt, rtc_info)?; + } + + if let Some(serial_info) = device_manager.mmio_devices.serial_device_info() { + create_serial_node(fdt, serial_info)?; } + let mut virtio_mmio = device_manager.mmio_devices.virtio_device_info(); + // Sort out virtio devices by address from low to high and insert them into fdt table. - ordered_virtio_device.sort_by_key(|a| a.addr); - for ordered_device_info in ordered_virtio_device.drain(..) { + virtio_mmio.sort_by_key(|a| a.addr); + for ordered_device_info in virtio_mmio.drain(..) { create_virtio_node(fdt, ordered_device_info)?; } @@ -439,19 +434,20 @@ fn create_devices_node( #[cfg(test)] mod tests { use std::ffi::CString; - use std::num::NonZeroU32; + use std::sync::{Arc, Mutex}; use kvm_ioctls::Kvm; + use linux_loader::cmdline as kernel_cmdline; use super::*; + use crate::EventManager; use crate::arch::aarch64::gic::create_gic; use crate::arch::aarch64::layout; - use crate::device_manager::resources::ResourceAllocator; + use crate::device_manager::mmio::tests::DummyDevice; + use crate::device_manager::tests::default_device_manager; use crate::test_utils::arch_mem; use crate::vstate::memory::GuestAddress; - const LEN: u64 = 4096; - // The `load` function from the `device_tree` will mistakenly check the actual size // of the buffer with the allocated size. This works around that. fn set_size(buf: &mut [u8], pos: usize, val: u32) { @@ -464,47 +460,37 @@ mod tests { #[test] fn test_create_fdt_with_devices() { let mem = arch_mem(layout::FDT_MAX_SIZE + 0x1000); - - let dev_info: HashMap<(DeviceType, std::string::String), MMIODeviceInfo> = [ - ( - (DeviceType::Serial, DeviceType::Serial.to_string()), - MMIODeviceInfo { - addr: 0x00, - irq: NonZeroU32::new(1), - len: LEN, - }, - ), - ( - (DeviceType::Virtio(1), "virtio".to_string()), - MMIODeviceInfo { - addr: LEN, - irq: NonZeroU32::new(2), - len: LEN, - }, - ), - ( - (DeviceType::Rtc, "rtc".to_string()), - MMIODeviceInfo { - addr: 2 * LEN, - irq: NonZeroU32::new(3), - len: LEN, - }, - ), - ] - .iter() - .cloned() - .collect(); + let mut event_manager = EventManager::new().unwrap(); + let mut device_manager = default_device_manager(); let kvm = Kvm::new().unwrap(); let vm = kvm.create_vm().unwrap(); let gic = create_gic(&vm, 1, None).unwrap(); + let mut cmdline = kernel_cmdline::Cmdline::new(4096).unwrap(); + cmdline.insert("console", "/dev/tty0").unwrap(); + + device_manager + .attach_legacy_devices_aarch64(&vm, &mut event_manager, &mut cmdline) + .unwrap(); + let dummy = Arc::new(Mutex::new(DummyDevice::new())); + device_manager + .mmio_devices + .register_virtio_test_device( + &vm, + mem.clone(), + &mut device_manager.resource_allocator, + dummy, + &mut cmdline, + "dummy", + ) + .unwrap(); + create_fdt( &mem, vec![0], - CString::new("console=tty0").unwrap(), - &dev_info, + cmdline.as_cstring().unwrap(), + &device_manager, &gic, &None, - &None, ) .unwrap(); } @@ -512,18 +498,21 @@ mod tests { #[test] fn test_create_fdt_with_vmgenid() { let mem = arch_mem(layout::FDT_MAX_SIZE + 0x1000); - let mut resource_allocator = ResourceAllocator::new().unwrap(); - let vmgenid = VmGenId::new(&mem, &mut resource_allocator).unwrap(); + let mut device_manager = default_device_manager(); let kvm = Kvm::new().unwrap(); let vm = kvm.create_vm().unwrap(); let gic = create_gic(&vm, 1, None).unwrap(); + let mut cmdline = kernel_cmdline::Cmdline::new(4096).unwrap(); + cmdline.insert("console", "/dev/tty0").unwrap(); + + device_manager.attach_vmgenid_device(&mem, &vm).unwrap(); + create_fdt( &mem, vec![0], CString::new("console=tty0").unwrap(), - &HashMap::<(DeviceType, std::string::String), MMIODeviceInfo>::new(), + &device_manager, &gic, - &Some(vmgenid), &None, ) .unwrap(); @@ -532,6 +521,7 @@ mod tests { #[test] fn test_create_fdt() { let mem = arch_mem(layout::FDT_MAX_SIZE + 0x1000); + let device_manager = default_device_manager(); let kvm = Kvm::new().unwrap(); let vm = kvm.create_vm().unwrap(); let gic = create_gic(&vm, 1, None).unwrap(); @@ -546,10 +536,9 @@ mod tests { &mem, vec![0], CString::new("console=tty0").unwrap(), - &HashMap::<(DeviceType, std::string::String), MMIODeviceInfo>::new(), + &device_manager, &gic, &None, - &None, ) .unwrap(); @@ -589,6 +578,7 @@ mod tests { #[test] fn test_create_fdt_with_initrd() { let mem = arch_mem(layout::FDT_MAX_SIZE + 0x1000); + let device_manager = default_device_manager(); let kvm = Kvm::new().unwrap(); let vm = kvm.create_vm().unwrap(); let gic = create_gic(&vm, 1, None).unwrap(); @@ -608,9 +598,8 @@ mod tests { &mem, vec![0], CString::new("console=tty0").unwrap(), - &HashMap::<(DeviceType, std::string::String), MMIODeviceInfo>::new(), + &device_manager, &gic, - &None, &Some(initrd), ) .unwrap(); diff --git a/src/vmm/src/arch/aarch64/mod.rs b/src/vmm/src/arch/aarch64/mod.rs index ead827c08c4..6d1d0e26359 100644 --- a/src/vmm/src/arch/aarch64/mod.rs +++ b/src/vmm/src/arch/aarch64/mod.rs @@ -134,9 +134,8 @@ pub fn configure_system_for_boot( vmm.vm.guest_memory(), vcpu_mpidr, cmdline, - vmm.mmio_device_manager.get_device_info(), + &vmm.device_manager, vmm.vm.get_irqchip(), - &vmm.acpi_device_manager.vmgenid, initrd, )?; diff --git a/src/vmm/src/arch/aarch64/vcpu.rs b/src/vmm/src/arch/aarch64/vcpu.rs index 59c00c3ff86..005beb47ec4 100644 --- a/src/vmm/src/arch/aarch64/vcpu.rs +++ b/src/vmm/src/arch/aarch64/vcpu.rs @@ -7,6 +7,7 @@ use std::fmt::{Debug, Write}; use std::mem::offset_of; +use std::sync::Arc; use kvm_bindings::*; use kvm_ioctls::{VcpuExit, VcpuFd, VmFd}; @@ -126,7 +127,7 @@ pub struct KvmVcpu { #[derive(Default, Debug)] pub struct Peripherals { /// mmio bus. - pub mmio_bus: Option, + pub mmio_bus: Option>, } impl KvmVcpu { diff --git a/src/vmm/src/arch/x86_64/mod.rs b/src/vmm/src/arch/x86_64/mod.rs index ca350cbf9af..c54ec46c987 100644 --- a/src/vmm/src/arch/x86_64/mod.rs +++ b/src/vmm/src/arch/x86_64/mod.rs @@ -205,7 +205,7 @@ pub fn configure_system_for_boot( // Note that this puts the mptable at the last 1k of Linux's 640k base RAM mptable::setup_mptable( vmm.vm.guest_memory(), - &mut vmm.resource_allocator, + &mut vmm.device_manager.resource_allocator, vcpu_config.vcpu_count, ) .map_err(ConfigurationError::MpTableSetup)?; @@ -226,13 +226,7 @@ pub fn configure_system_for_boot( // Create ACPI tables and write them in guest memory // For the time being we only support ACPI in x86_64 - create_acpi_tables( - vmm.vm.guest_memory(), - &mut vmm.resource_allocator, - &vmm.mmio_device_manager, - &vmm.acpi_device_manager, - vcpus, - )?; + create_acpi_tables(vmm.vm.guest_memory(), &mut vmm.device_manager, vcpus)?; Ok(()) } diff --git a/src/vmm/src/arch/x86_64/vcpu.rs b/src/vmm/src/arch/x86_64/vcpu.rs index b46d8e07b59..eea1f24ae69 100644 --- a/src/vmm/src/arch/x86_64/vcpu.rs +++ b/src/vmm/src/arch/x86_64/vcpu.rs @@ -7,6 +7,7 @@ use std::collections::BTreeMap; use std::fmt::Debug; +use std::sync::Arc; use kvm_bindings::{ CpuId, KVM_MAX_CPUID_ENTRIES, KVM_MAX_MSR_ENTRIES, Msrs, Xsave, kvm_debugregs, kvm_lapic_state, @@ -159,9 +160,9 @@ pub struct KvmVcpu { #[derive(Default, Debug)] pub struct Peripherals { /// Pio bus. - pub pio_bus: Option, + pub pio_bus: Option>, /// Mmio bus. - pub mmio_bus: Option, + pub mmio_bus: Option>, } impl KvmVcpu { @@ -266,7 +267,7 @@ impl KvmVcpu { } /// Sets a Port Mapped IO bus for this vcpu. - pub fn set_pio_bus(&mut self, pio_bus: crate::devices::Bus) { + pub fn set_pio_bus(&mut self, pio_bus: Arc) { self.peripherals.pio_bus = Some(pio_bus); } @@ -710,7 +711,9 @@ impl Peripherals { VcpuExit::IoIn(addr, data) => { if let Some(pio_bus) = &self.pio_bus { let _metric = METRICS.vcpu.exit_io_in_agg.record_latency_metrics(); - pio_bus.read(u64::from(addr), data); + if let Err(err) = pio_bus.read(u64::from(addr), data) { + warn!("vcpu: IO read @ {addr:#x}:{:#x} failed: {err}", data.len()); + } METRICS.vcpu.exit_io_in.inc(); } Ok(VcpuEmulation::Handled) @@ -718,7 +721,9 @@ impl Peripherals { VcpuExit::IoOut(addr, data) => { if let Some(pio_bus) = &self.pio_bus { let _metric = METRICS.vcpu.exit_io_out_agg.record_latency_metrics(); - pio_bus.write(u64::from(addr), data); + if let Err(err) = pio_bus.write(u64::from(addr), data) { + warn!("vcpu: IO write @ {addr:#x}:{:#x} failed: {err}", data.len()); + } METRICS.vcpu.exit_io_out.inc(); } Ok(VcpuEmulation::Handled) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index ba54929d451..48590201f2d 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -9,17 +9,12 @@ use std::io; use std::sync::mpsc; use std::sync::{Arc, Mutex}; -use event_manager::{MutEventSubscriber, SubscriberOps}; -use libc::EFD_NONBLOCK; +use event_manager::SubscriberOps; use linux_loader::cmdline::Cmdline as LoaderKernelCmdline; use userfaultfd::Uffd; use utils::time::TimestampUs; #[cfg(target_arch = "aarch64")] use vm_memory::GuestAddress; -#[cfg(target_arch = "aarch64")] -use vm_superio::Rtc; -use vm_superio::Serial; -use vmm_sys_util::eventfd::EventFd; use crate::arch::{ConfigurationError, configure_system_for_boot, load_kernel}; #[cfg(target_arch = "aarch64")] @@ -27,35 +22,24 @@ use crate::construct_kvm_mpidrs; use crate::cpu_config::templates::{ GetCpuTemplate, GetCpuTemplateError, GuestConfigError, KvmCapability, }; -use crate::device_manager::acpi::ACPIDeviceManager; -#[cfg(target_arch = "x86_64")] -use crate::device_manager::legacy::PortIODeviceManager; -use crate::device_manager::mmio::{MMIODeviceManager, MmioError}; -use crate::device_manager::persist::{ - ACPIDeviceManagerConstructorArgs, ACPIDeviceManagerRestoreError, MMIODevManagerConstructorArgs, -}; -use crate::device_manager::resources::ResourceAllocator; -use crate::devices::BusDevice; -use crate::devices::acpi::vmgenid::{VmGenId, VmGenIdError}; #[cfg(target_arch = "aarch64")] -use crate::devices::legacy::RTCDevice; -use crate::devices::legacy::serial::SerialOut; -use crate::devices::legacy::{EventFdTrigger, SerialEventsWrapper, SerialWrapper}; +use crate::device_manager::AttachLegacyMmioDeviceError; +use crate::device_manager::{ + AttachMmioDeviceError, AttachVmgenidError, DeviceManager, DevicePersistError, DeviceRestoreArgs, +}; +use crate::devices::acpi::vmgenid::VmGenIdError; use crate::devices::virtio::balloon::Balloon; use crate::devices::virtio::block::device::Block; -use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::net::Net; use crate::devices::virtio::rng::Entropy; -use crate::devices::virtio::transport::mmio::{IrqTrigger, MmioTransport}; use crate::devices::virtio::vsock::{Vsock, VsockUnixBackend}; #[cfg(feature = "gdb")] use crate::gdb; use crate::initrd::{InitrdConfig, InitrdError}; -use crate::logger::{debug, error}; +use crate::logger::debug; use crate::persist::{MicrovmState, MicrovmStateError}; use crate::resources::VmResources; use crate::seccomp::BpfThreadMap; -use crate::snapshot::Persist; use crate::vmm_config::instance_info::InstanceInfo; use crate::vmm_config::machine_config::MachineConfigError; use crate::vstate::kvm::Kvm; @@ -70,7 +54,10 @@ pub enum StartMicrovmError { /// Unable to attach block device to Vmm: {0} AttachBlockDevice(io::Error), /// Unable to attach the VMGenID device: {0} - AttachVmgenidDevice(kvm_ioctls::Error), + AttachVmgenidDevice(#[from] AttachVmgenidError), + #[cfg(target_arch = "aarch64")] + /// Unable to attach legacy MMIO devices: {0} + AttachLegacyDevices(#[from] AttachLegacyMmioDeviceError), /// System configuration error: {0} ConfigureSystem(#[from] ConfigurationError), /// Failed to create guest config: {0} @@ -110,7 +97,7 @@ pub enum StartMicrovmError { /// Cannot open the block device backing file: {0} OpenBlockDevice(io::Error), /// Cannot initialize a MMIO Device or add a device to the MMIO Bus or cmdline: {0} - RegisterMmioDevice(#[from] device_manager::mmio::MmioError), + RegisterMmioDevice(#[from] device_manager::AttachMmioDeviceError), /// Cannot restore microvm state: {0} RestoreMicrovmState(MicrovmStateError), /// Cannot set vm resources: {0} @@ -147,35 +134,9 @@ fn create_vmm_and_vcpus( // Build custom CPU config if a custom template is provided. let mut vm = Vm::new(&kvm)?; - let resource_allocator = ResourceAllocator::new()?; - - // Instantiate the MMIO device manager. - let mmio_device_manager = MMIODeviceManager::new(); - - // Instantiate ACPI device manager. - let acpi_device_manager = ACPIDeviceManager::new(); - let (vcpus, vcpus_exit_evt) = vm.create_vcpus(vcpu_count)?; - #[cfg(target_arch = "x86_64")] - let pio_device_manager = { - // Make stdout non blocking. - set_stdout_nonblocking(); - - // Serial device setup. - let serial_device = setup_serial_device(event_manager, std::io::stdin(), io::stdout())?; - - // x86_64 uses the i8042 reset event as the Vmm exit event. - let reset_evt = vcpus_exit_evt.try_clone().map_err(VmmError::EventFd)?; - - // create pio dev manager with legacy devices - let mut pio_dev_mgr = - PortIODeviceManager::new(serial_device, reset_evt).map_err(VmmError::LegacyIOBus)?; - pio_dev_mgr - .register_devices(vm.fd()) - .map_err(VmmError::LegacyIOBus)?; - pio_dev_mgr - }; + let device_manager = DeviceManager::new(event_manager, &vcpus_exit_evt, vm.fd())?; let vmm = Vmm { events_observer: Some(std::io::stdin()), @@ -186,11 +147,7 @@ fn create_vmm_and_vcpus( uffd: None, vcpus_handles: Vec::new(), vcpus_exit_evt, - resource_allocator, - mmio_device_manager, - #[cfg(target_arch = "x86_64")] - pio_device_manager, - acpi_device_manager, + device_manager, }; Ok((vmm, vcpus)) @@ -261,7 +218,7 @@ pub fn build_microvm_for_boot( // to maintain the same MMIO address referenced in the documentation // and tests. if vm_resources.boot_timer { - attach_boot_timer_device(&mut vmm, request_ts)?; + vmm.device_manager.attach_boot_timer_device(request_ts)?; } if let Some(balloon) = vm_resources.balloon.get() { @@ -290,9 +247,14 @@ pub fn build_microvm_for_boot( } #[cfg(target_arch = "aarch64")] - attach_legacy_devices_aarch64(event_manager, &mut vmm, &mut boot_cmdline)?; + vmm.device_manager.attach_legacy_devices_aarch64( + vmm.vm.fd(), + event_manager, + &mut boot_cmdline, + )?; - attach_vmgenid_device(&mut vmm)?; + vmm.device_manager + .attach_vmgenid_device(vmm.vm.guest_memory(), vmm.vm.fd())?; #[cfg(target_arch = "aarch64")] if vcpus[0].kvm_vcpu.supports_pvtime() { @@ -411,10 +373,8 @@ pub enum BuildMicrovmFromSnapshotError { MissingVmmSeccompFilters, /// Failed to apply VMM secccomp filter: {0} SeccompFiltersInternal(#[from] crate::seccomp::InstallationError), - /// Failed to restore ACPI device manager: {0} - ACPIDeviManager(#[from] ACPIDeviceManagerRestoreError), - /// VMGenID update failed: {0} - VMGenIDUpdate(std::io::Error), + /// Failed to restore devices: {0} + RestoreDevices(#[from] DevicePersistError), } /// Builds and starts a microVM based on the provided MicrovmState. @@ -494,38 +454,17 @@ pub fn build_microvm_from_snapshot( vm_resources.boot_source.config = microvm_state.vm_info.boot_source; // Restore devices states. - let mmio_ctor_args = MMIODevManagerConstructorArgs { + let device_ctor_args = DeviceRestoreArgs { mem: vmm.vm.guest_memory(), vm: vmm.vm.fd(), event_manager, - resource_allocator: &mut vmm.resource_allocator, vm_resources, instance_id: &instance_info.id, restored_from_file: vmm.uffd.is_none(), }; - vmm.mmio_device_manager = - MMIODeviceManager::restore(mmio_ctor_args, µvm_state.device_states) - .map_err(MicrovmStateError::RestoreDevices)?; - vmm.emulate_serial_init()?; - - { - let acpi_ctor_args = ACPIDeviceManagerConstructorArgs { - mem: vmm.vm.guest_memory(), - resource_allocator: &mut vmm.resource_allocator, - vm: vmm.vm.fd(), - }; - - vmm.acpi_device_manager = - ACPIDeviceManager::restore(acpi_ctor_args, µvm_state.acpi_dev_state)?; - - // Inject the notification to VMGenID that we have resumed from a snapshot. - // This needs to happen before we resume vCPUs, so that we minimize the time between vCPUs - // resuming and notification being handled by the driver. - vmm.acpi_device_manager - .notify_vmgenid() - .map_err(BuildMicrovmFromSnapshotError::VMGenIDUpdate)?; - } + vmm.device_manager + .restore(µvm_state.device_states, device_ctor_args)?; // Move vcpus to their own threads and start their state machine in the 'Paused' state. vmm.start_vcpus( @@ -551,29 +490,6 @@ pub fn build_microvm_from_snapshot( Ok(vmm) } -/// Sets up the serial device. -pub fn setup_serial_device( - event_manager: &mut EventManager, - input: std::io::Stdin, - out: std::io::Stdout, -) -> Result>, VmmError> { - let interrupt_evt = EventFdTrigger::new(EventFd::new(EFD_NONBLOCK).map_err(VmmError::EventFd)?); - let kick_stdin_read_evt = - EventFdTrigger::new(EventFd::new(EFD_NONBLOCK).map_err(VmmError::EventFd)?); - let serial = Arc::new(Mutex::new(BusDevice::Serial(SerialWrapper { - serial: Serial::with_events( - interrupt_evt, - SerialEventsWrapper { - buffer_ready_event_fd: Some(kick_stdin_read_evt), - }, - SerialOut::Stdout(out), - ), - input: Some(input), - }))); - event_manager.add_subscriber(serial.clone()); - Ok(serial) -} - /// 64 bytes due to alignment requirement in 3.1 of https://www.kernel.org/doc/html/v5.8/virt/kvm/devices/vcpu.html#attribute-kvm-arm-vcpu-pvtime-ipa #[cfg(target_arch = "aarch64")] const STEALTIME_STRUCT_MEM_SIZE: u64 = 64; @@ -587,6 +503,7 @@ fn allocate_pvtime_region( ) -> Result { let size = STEALTIME_STRUCT_MEM_SIZE * vcpu_count as u64; let addr = vmm + .device_manager .resource_allocator .allocate_system_memory(size, STEALTIME_STRUCT_MEM_SIZE, policy) .map_err(StartMicrovmError::AllocateResources)?; @@ -612,108 +529,22 @@ fn setup_pvtime(vmm: &mut Vmm, vcpus: &mut [Vcpu]) -> Result<(), StartMicrovmErr Ok(()) } -#[cfg(target_arch = "aarch64")] -fn attach_legacy_devices_aarch64( - event_manager: &mut EventManager, - vmm: &mut Vmm, - cmdline: &mut LoaderKernelCmdline, -) -> Result<(), VmmError> { - // Serial device setup. - let cmdline_contains_console = cmdline - .as_cstring() - .map_err(|_| VmmError::Cmdline)? - .into_string() - .map_err(|_| VmmError::Cmdline)? - .contains("console="); - - if cmdline_contains_console { - // Make stdout non-blocking. - set_stdout_nonblocking(); - let serial = setup_serial_device(event_manager, std::io::stdin(), std::io::stdout())?; - vmm.mmio_device_manager - .register_mmio_serial(vmm.vm.fd(), &mut vmm.resource_allocator, serial, None) - .map_err(VmmError::RegisterMMIODevice)?; - vmm.mmio_device_manager - .add_mmio_serial_to_cmdline(cmdline) - .map_err(VmmError::RegisterMMIODevice)?; - } - - let rtc = RTCDevice(Rtc::with_events( - &crate::devices::legacy::rtc_pl031::METRICS, - )); - vmm.mmio_device_manager - .register_mmio_rtc(&mut vmm.resource_allocator, rtc, None) - .map_err(VmmError::RegisterMMIODevice) -} - -/// Attaches a VirtioDevice device to the device manager and event manager. -fn attach_virtio_device( - event_manager: &mut EventManager, - vmm: &mut Vmm, - id: String, - device: Arc>, - cmdline: &mut LoaderKernelCmdline, - is_vhost_user: bool, -) -> Result<(), MmioError> { - event_manager.add_subscriber(device.clone()); - - let interrupt = Arc::new(IrqTrigger::new()); - // The device mutex mustn't be locked here otherwise it will deadlock. - let device = MmioTransport::new( - vmm.vm.guest_memory().clone(), - interrupt, - device, - is_vhost_user, - ); - vmm.mmio_device_manager - .register_mmio_virtio_for_boot( - vmm.vm.fd(), - &mut vmm.resource_allocator, - id, - device, - cmdline, - ) - .map(|_| ()) -} - -pub(crate) fn attach_boot_timer_device( - vmm: &mut Vmm, - request_ts: TimestampUs, -) -> Result<(), MmioError> { - let boot_timer = crate::devices::pseudo::BootTimer::new(request_ts); - - vmm.mmio_device_manager - .register_mmio_boot_timer(&mut vmm.resource_allocator, boot_timer)?; - - Ok(()) -} - -fn attach_vmgenid_device(vmm: &mut Vmm) -> Result<(), StartMicrovmError> { - let vmgenid = VmGenId::new(vmm.vm.guest_memory(), &mut vmm.resource_allocator) - .map_err(StartMicrovmError::CreateVMGenID)?; - - vmm.acpi_device_manager - .attach_vmgenid(vmgenid, vmm.vm.fd()) - .map_err(StartMicrovmError::AttachVmgenidDevice)?; - - Ok(()) -} - fn attach_entropy_device( vmm: &mut Vmm, cmdline: &mut LoaderKernelCmdline, entropy_device: &Arc>, event_manager: &mut EventManager, -) -> Result<(), MmioError> { +) -> Result<(), AttachMmioDeviceError> { let id = entropy_device .lock() .expect("Poisoned lock") .id() .to_string(); - attach_virtio_device( - event_manager, - vmm, + event_manager.add_subscriber(entropy_device.clone()); + vmm.device_manager.attach_virtio_device( + vmm.vm.guest_memory(), + vmm.vm.fd(), id, entropy_device.clone(), cmdline, @@ -743,9 +574,10 @@ fn attach_block_devices<'a, I: Iterator>> + Debug>( (locked.id().to_string(), locked.is_vhost_user()) }; // The device mutex mustn't be locked here otherwise it will deadlock. - attach_virtio_device( - event_manager, - vmm, + event_manager.add_subscriber(block.clone()); + vmm.device_manager.attach_virtio_device( + vmm.vm.guest_memory(), + vmm.vm.fd(), id, block.clone(), cmdline, @@ -763,8 +595,16 @@ fn attach_net_devices<'a, I: Iterator>> + Debug>( ) -> Result<(), StartMicrovmError> { for net_device in net_devices { let id = net_device.lock().expect("Poisoned lock").id().clone(); + event_manager.add_subscriber(net_device.clone()); // The device mutex mustn't be locked here otherwise it will deadlock. - attach_virtio_device(event_manager, vmm, id, net_device.clone(), cmdline, false)?; + vmm.device_manager.attach_virtio_device( + vmm.vm.guest_memory(), + vmm.vm.fd(), + id, + net_device.clone(), + cmdline, + false, + )?; } Ok(()) } @@ -774,10 +614,18 @@ fn attach_unixsock_vsock_device( cmdline: &mut LoaderKernelCmdline, unix_vsock: &Arc>>, event_manager: &mut EventManager, -) -> Result<(), MmioError> { +) -> Result<(), AttachMmioDeviceError> { let id = String::from(unix_vsock.lock().expect("Poisoned lock").id()); + event_manager.add_subscriber(unix_vsock.clone()); // The device mutex mustn't be locked here otherwise it will deadlock. - attach_virtio_device(event_manager, vmm, id, unix_vsock.clone(), cmdline, false) + vmm.device_manager.attach_virtio_device( + vmm.vm.guest_memory(), + vmm.vm.fd(), + id, + unix_vsock.clone(), + cmdline, + false, + ) } fn attach_balloon_device( @@ -785,24 +633,18 @@ fn attach_balloon_device( cmdline: &mut LoaderKernelCmdline, balloon: &Arc>, event_manager: &mut EventManager, -) -> Result<(), MmioError> { +) -> Result<(), AttachMmioDeviceError> { let id = String::from(balloon.lock().expect("Poisoned lock").id()); + event_manager.add_subscriber(balloon.clone()); // The device mutex mustn't be locked here otherwise it will deadlock. - attach_virtio_device(event_manager, vmm, id, balloon.clone(), cmdline, false) -} - -// Adds `O_NONBLOCK` to the stdout flags. -pub(crate) fn set_stdout_nonblocking() { - // SAFETY: Call is safe since parameters are valid. - let flags = unsafe { libc::fcntl(libc::STDOUT_FILENO, libc::F_GETFL, 0) }; - if flags < 0 { - error!("Could not get Firecracker stdout flags."); - } - // SAFETY: Call is safe since parameters are valid. - let rc = unsafe { libc::fcntl(libc::STDOUT_FILENO, libc::F_SETFL, flags | libc::O_NONBLOCK) }; - if rc < 0 { - error!("Could not set Firecracker stdout to non-blocking."); - } + vmm.device_manager.attach_virtio_device( + vmm.vm.guest_memory(), + vmm.vm.fd(), + id, + balloon.clone(), + cmdline, + false, + ) } #[cfg(test)] @@ -812,8 +654,7 @@ pub(crate) mod tests { use vmm_sys_util::tempfile::TempFile; use super::*; - use crate::arch::DeviceType; - use crate::device_manager::resources::ResourceAllocator; + use crate::device_manager::tests::default_device_manager; use crate::devices::virtio::block::CacheType; use crate::devices::virtio::rng::device::ENTROPY_DEV_ID; use crate::devices::virtio::vsock::{TYPE_VSOCK, VSOCK_DEV_ID}; @@ -886,24 +727,6 @@ pub(crate) mod tests { pub(crate) fn default_vmm() -> Vmm { let (kvm, mut vm) = setup_vm_with_memory(mib_to_bytes(128)); - let mmio_device_manager = MMIODeviceManager::new(); - let acpi_device_manager = ACPIDeviceManager::new(); - #[cfg(target_arch = "x86_64")] - let pio_device_manager = PortIODeviceManager::new( - Arc::new(Mutex::new(BusDevice::Serial(SerialWrapper { - serial: Serial::with_events( - EventFdTrigger::new(EventFd::new(EFD_NONBLOCK).unwrap()), - SerialEventsWrapper { - buffer_ready_event_fd: None, - }, - SerialOut::Sink(std::io::sink()), - ), - input: None, - }))), - EventFd::new(libc::EFD_NONBLOCK).unwrap(), - ) - .unwrap(); - let (_, vcpus_exit_evt) = vm.create_vcpus(1).unwrap(); Vmm { @@ -915,11 +738,7 @@ pub(crate) mod tests { uffd: None, vcpus_handles: Vec::new(), vcpus_exit_evt, - resource_allocator: ResourceAllocator::new().unwrap(), - mmio_device_manager, - #[cfg(target_arch = "x86_64")] - pio_device_manager, - acpi_device_manager, + device_manager: default_device_manager(), } } @@ -1015,8 +834,9 @@ pub(crate) mod tests { attach_unixsock_vsock_device(vmm, cmdline, &vsock, event_manager).unwrap(); assert!( - vmm.mmio_device_manager - .get_device(DeviceType::Virtio(TYPE_VSOCK), &vsock_dev_id) + vmm.device_manager + .mmio_devices + .get_virtio_device(TYPE_VSOCK, &vsock_dev_id) .is_some() ); } @@ -1033,16 +853,19 @@ pub(crate) mod tests { attach_entropy_device(vmm, cmdline, &entropy, event_manager).unwrap(); assert!( - vmm.mmio_device_manager - .get_device(DeviceType::Virtio(TYPE_RNG), ENTROPY_DEV_ID) + vmm.device_manager + .mmio_devices + .get_virtio_device(TYPE_RNG, ENTROPY_DEV_ID) .is_some() ); } #[cfg(target_arch = "x86_64")] pub(crate) fn insert_vmgenid_device(vmm: &mut Vmm) { - attach_vmgenid_device(vmm).unwrap(); - assert!(vmm.acpi_device_manager.vmgenid.is_some()); + vmm.device_manager + .attach_vmgenid_device(vmm.vm.guest_memory(), vmm.vm.fd()) + .unwrap(); + assert!(vmm.device_manager.acpi_devices.vmgenid.is_some()); } pub(crate) fn insert_balloon_device( @@ -1058,8 +881,9 @@ pub(crate) mod tests { attach_balloon_device(vmm, cmdline, balloon, event_manager).unwrap(); assert!( - vmm.mmio_device_manager - .get_device(DeviceType::Virtio(TYPE_BALLOON), BALLOON_DEV_ID) + vmm.device_manager + .mmio_devices + .get_virtio_device(TYPE_BALLOON, BALLOON_DEV_ID) .is_some() ); } @@ -1109,8 +933,9 @@ pub(crate) mod tests { insert_block_devices(&mut vmm, &mut cmdline, &mut event_manager, block_configs); assert!(cmdline_contains(&cmdline, "root=/dev/vda ro")); assert!( - vmm.mmio_device_manager - .get_device(DeviceType::Virtio(TYPE_BLOCK), drive_id.as_str()) + vmm.device_manager + .mmio_devices + .get_virtio_device(TYPE_BLOCK, drive_id.as_str()) .is_some() ); } @@ -1130,8 +955,9 @@ pub(crate) mod tests { insert_block_devices(&mut vmm, &mut cmdline, &mut event_manager, block_configs); assert!(cmdline_contains(&cmdline, "root=PARTUUID=0eaa91a0-01 rw")); assert!( - vmm.mmio_device_manager - .get_device(DeviceType::Virtio(TYPE_BLOCK), drive_id.as_str()) + vmm.device_manager + .mmio_devices + .get_virtio_device(TYPE_BLOCK, drive_id.as_str()) .is_some() ); } @@ -1152,8 +978,9 @@ pub(crate) mod tests { assert!(!cmdline_contains(&cmdline, "root=PARTUUID=")); assert!(!cmdline_contains(&cmdline, "root=/dev/vda")); assert!( - vmm.mmio_device_manager - .get_device(DeviceType::Virtio(TYPE_BLOCK), drive_id.as_str()) + vmm.device_manager + .mmio_devices + .get_virtio_device(TYPE_BLOCK, drive_id.as_str()) .is_some() ); } @@ -1189,18 +1016,21 @@ pub(crate) mod tests { assert!(cmdline_contains(&cmdline, "root=PARTUUID=0eaa91a0-01 rw")); assert!( - vmm.mmio_device_manager - .get_device(DeviceType::Virtio(TYPE_BLOCK), "root") + vmm.device_manager + .mmio_devices + .get_virtio_device(TYPE_BLOCK, "root") .is_some() ); assert!( - vmm.mmio_device_manager - .get_device(DeviceType::Virtio(TYPE_BLOCK), "secondary") + vmm.device_manager + .mmio_devices + .get_virtio_device(TYPE_BLOCK, "secondary") .is_some() ); assert!( - vmm.mmio_device_manager - .get_device(DeviceType::Virtio(TYPE_BLOCK), "third") + vmm.device_manager + .mmio_devices + .get_virtio_device(TYPE_BLOCK, "third") .is_some() ); @@ -1228,8 +1058,9 @@ pub(crate) mod tests { insert_block_devices(&mut vmm, &mut cmdline, &mut event_manager, block_configs); assert!(cmdline_contains(&cmdline, "root=/dev/vda rw")); assert!( - vmm.mmio_device_manager - .get_device(DeviceType::Virtio(TYPE_BLOCK), drive_id.as_str()) + vmm.device_manager + .mmio_devices + .get_virtio_device(TYPE_BLOCK, drive_id.as_str()) .is_some() ); } @@ -1249,8 +1080,9 @@ pub(crate) mod tests { insert_block_devices(&mut vmm, &mut cmdline, &mut event_manager, block_configs); assert!(cmdline_contains(&cmdline, "root=PARTUUID=0eaa91a0-01 ro")); assert!( - vmm.mmio_device_manager - .get_device(DeviceType::Virtio(TYPE_BLOCK), drive_id.as_str()) + vmm.device_manager + .mmio_devices + .get_virtio_device(TYPE_BLOCK, drive_id.as_str()) .is_some() ); } @@ -1270,8 +1102,9 @@ pub(crate) mod tests { insert_block_devices(&mut vmm, &mut cmdline, &mut event_manager, block_configs); assert!(cmdline_contains(&cmdline, "root=/dev/vda rw")); assert!( - vmm.mmio_device_manager - .get_device(DeviceType::Virtio(TYPE_BLOCK), drive_id.as_str()) + vmm.device_manager + .mmio_devices + .get_virtio_device(TYPE_BLOCK, drive_id.as_str()) .is_some() ); } @@ -1282,13 +1115,9 @@ pub(crate) mod tests { let mut vmm = default_vmm(); let request_ts = TimestampUs::default(); - let res = attach_boot_timer_device(&mut vmm, request_ts); + let res = vmm.device_manager.attach_boot_timer_device(request_ts); res.unwrap(); - assert!( - vmm.mmio_device_manager - .get_device(DeviceType::BootTimer, &DeviceType::BootTimer.to_string()) - .is_some() - ); + assert!(vmm.device_manager.mmio_devices.boot_timer.is_some()); } #[test] diff --git a/src/vmm/src/device_manager/legacy.rs b/src/vmm/src/device_manager/legacy.rs index 20b008769a5..cedb7abc32c 100644 --- a/src/vmm/src/device_manager/legacy.rs +++ b/src/vmm/src/device_manager/legacy.rs @@ -16,15 +16,14 @@ use libc::EFD_NONBLOCK; use vm_superio::Serial; use vmm_sys_util::eventfd::EventFd; -use crate::devices::bus::BusDevice; use crate::devices::legacy::serial::SerialOut; -use crate::devices::legacy::{EventFdTrigger, SerialDevice, SerialEventsWrapper}; +use crate::devices::legacy::{EventFdTrigger, I8042Device, SerialDevice, SerialEventsWrapper}; /// Errors corresponding to the `PortIODeviceManager`. #[derive(Debug, derive_more::From, thiserror::Error, displaydoc::Display)] pub enum LegacyDeviceError { /// Failed to add legacy device to Bus: {0} - BusError(crate::devices::BusError), + BusError(vm_device::BusError), /// Failed to create EventFd: {0} EventFd(std::io::Error), } @@ -34,11 +33,10 @@ pub enum LegacyDeviceError { /// The `LegacyDeviceManger` should be initialized only by using the constructor. #[derive(Debug)] pub struct PortIODeviceManager { - pub io_bus: crate::devices::Bus, // BusDevice::Serial - pub stdio_serial: Arc>, + pub stdio_serial: Arc>, // BusDevice::I8042Device - pub i8042: Arc>, + pub i8042: Arc>, // Communication event on ports 1 & 3. pub com_evt_1_3: EventFdTrigger, @@ -73,29 +71,24 @@ impl PortIODeviceManager { /// Create a new DeviceManager handling legacy devices (uart, i8042). pub fn new( - serial: Arc>, - i8042_reset_evfd: EventFd, + stdio_serial: Arc>, + i8042: Arc>, ) -> Result { - debug_assert!(matches!(*serial.lock().unwrap(), BusDevice::Serial(_))); - let io_bus = crate::devices::Bus::new(); - let com_evt_1_3 = serial + let com_evt_1_3 = stdio_serial .lock() .expect("Poisoned lock") - .serial_mut() - .unwrap() .serial .interrupt_evt() .try_clone()?; let com_evt_2_4 = EventFdTrigger::new(EventFd::new(EFD_NONBLOCK)?); - let kbd_evt = EventFd::new(libc::EFD_NONBLOCK)?; - - let i8042 = Arc::new(Mutex::new(BusDevice::I8042Device( - crate::devices::legacy::I8042Device::new(i8042_reset_evfd, kbd_evt.try_clone()?), - ))); + let kbd_evt = i8042 + .lock() + .expect("Poisoned lock") + .kbd_interrupt_evt + .try_clone()?; Ok(PortIODeviceManager { - io_bus, - stdio_serial: serial, + stdio_serial, i8042, com_evt_1_3, com_evt_2_4, @@ -104,8 +97,12 @@ impl PortIODeviceManager { } /// Register supported legacy devices. - pub fn register_devices(&mut self, vm_fd: &VmFd) -> Result<(), LegacyDeviceError> { - let serial_2_4 = Arc::new(Mutex::new(BusDevice::Serial(SerialDevice { + pub fn register_devices( + &mut self, + io_bus: &vm_device::Bus, + vm_fd: &VmFd, + ) -> Result<(), LegacyDeviceError> { + let serial_2_4 = Arc::new(Mutex::new(SerialDevice { serial: Serial::with_events( self.com_evt_2_4.try_clone()?.try_clone()?, SerialEventsWrapper { @@ -114,8 +111,8 @@ impl PortIODeviceManager { SerialOut::Sink(std::io::sink()), ), input: None, - }))); - let serial_1_3 = Arc::new(Mutex::new(BusDevice::Serial(SerialDevice { + })); + let serial_1_3 = Arc::new(Mutex::new(SerialDevice { serial: Serial::with_events( self.com_evt_1_3.try_clone()?.try_clone()?, SerialEventsWrapper { @@ -124,28 +121,28 @@ impl PortIODeviceManager { SerialOut::Sink(std::io::sink()), ), input: None, - }))); - self.io_bus.insert( + })); + io_bus.insert( self.stdio_serial.clone(), Self::SERIAL_PORT_ADDRESSES[0], Self::SERIAL_PORT_SIZE, )?; - self.io_bus.insert( + io_bus.insert( serial_2_4.clone(), Self::SERIAL_PORT_ADDRESSES[1], Self::SERIAL_PORT_SIZE, )?; - self.io_bus.insert( + io_bus.insert( serial_1_3, Self::SERIAL_PORT_ADDRESSES[2], Self::SERIAL_PORT_SIZE, )?; - self.io_bus.insert( + io_bus.insert( serial_2_4, Self::SERIAL_PORT_ADDRESSES[3], Self::SERIAL_PORT_SIZE, )?; - self.io_bus.insert( + io_bus.insert( self.i8042.clone(), Self::I8042_KDB_DATA_REGISTER_ADDRESS, Self::I8042_KDB_DATA_REGISTER_SIZE, @@ -249,9 +246,10 @@ mod tests { #[test] fn test_register_legacy_devices() { let (_, vm) = setup_vm_with_memory(0x1000); + let io_bus = vm_device::Bus::new(); vm.setup_irqchip().unwrap(); let mut ldm = PortIODeviceManager::new( - Arc::new(Mutex::new(BusDevice::Serial(SerialDevice { + Arc::new(Mutex::new(SerialDevice { serial: Serial::with_events( EventFdTrigger::new(EventFd::new(EFD_NONBLOCK).unwrap()), SerialEventsWrapper { @@ -260,10 +258,12 @@ mod tests { SerialOut::Sink(std::io::sink()), ), input: None, - }))), - EventFd::new(libc::EFD_NONBLOCK).unwrap(), + })), + Arc::new(Mutex::new( + I8042Device::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()).unwrap(), + )), ) .unwrap(); - ldm.register_devices(vm.fd()).unwrap(); + ldm.register_devices(&io_bus, vm.fd()).unwrap(); } } diff --git a/src/vmm/src/device_manager/mmio.rs b/src/vmm/src/device_manager/mmio.rs index 333da93fa8a..398f97bc6ab 100644 --- a/src/vmm/src/device_manager/mmio.rs +++ b/src/vmm/src/device_manager/mmio.rs @@ -21,11 +21,8 @@ use serde::{Deserialize, Serialize}; use vm_allocator::AllocPolicy; use super::resources::ResourceAllocator; -use crate::arch::DeviceType; -use crate::arch::DeviceType::Virtio; -use crate::devices::BusDevice; #[cfg(target_arch = "aarch64")] -use crate::devices::legacy::RTCDevice; +use crate::devices::legacy::{RTCDevice, SerialDevice}; use crate::devices::pseudo::BootTimer; use crate::devices::virtio::balloon::Balloon; use crate::devices::virtio::block::device::Block; @@ -44,9 +41,9 @@ pub enum MmioError { /// Failed to allocate requested resource: {0} Allocator(#[from] vm_allocator::Error), /// Failed to insert device on the bus: {0} - BusInsert(crate::devices::BusError), + BusInsert(#[from] vm_device::BusError), /// Failed to allocate requested resourc: {0} - Cmdline(linux_loader::cmdline::Error), + Cmdline(#[from] linux_loader::cmdline::Error), /// Failed to find the device on the bus. DeviceNotFound, /// Invalid device type found on the MMIO bus. @@ -74,7 +71,7 @@ pub enum MmioError { pub const MMIO_LEN: u64 = 0x1000; /// Stores the address range and irq allocated to this device. -#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] pub struct MMIODeviceInfo { /// Mmio address at which the device is registered. pub addr: u64, @@ -118,11 +115,29 @@ fn add_virtio_aml( .append_aml_bytes(dsdt_data) } +#[derive(Debug, Clone)] +/// A descriptor for MMIO devices +pub struct MMIODevice { + /// MMIO resources allocated to the device + pub(crate) resources: MMIODeviceInfo, + /// The actual device + pub(crate) inner: Arc>, +} + /// Manages the complexities of registering a MMIO device. #[derive(Debug)] pub struct MMIODeviceManager { - pub(crate) bus: crate::devices::Bus, - pub(crate) id_to_dev_info: HashMap<(DeviceType, String), MMIODeviceInfo>, + /// VirtIO devices using an MMIO transport layer + pub(crate) virtio_devices: HashMap<(u32, String), MMIODevice>, + /// Boot timer device + pub(crate) boot_timer: Option>, + #[cfg(target_arch = "aarch64")] + /// Real-Time clock on Aarch64 platforms + pub(crate) rtc: Option>, + #[cfg(target_arch = "aarch64")] + /// Serial device on Aarch64 platforms + pub(crate) serial: Option>, + #[cfg(target_arch = "x86_64")] // We create the AML byte code for every VirtIO device in the order we build // it, so that we ensure the root block device is appears first in the DSDT. // This is needed, so that the root device appears as `/dev/vda` in the guest @@ -130,7 +145,6 @@ pub struct MMIODeviceManager { // The alternative would be that we iterate the bus to get the data after all // of the devices are build. However, iterating the bus won't give us the // devices in the order they were added. - #[cfg(target_arch = "x86_64")] pub(crate) dsdt_data: Vec, } @@ -138,8 +152,12 @@ impl MMIODeviceManager { /// Create a new DeviceManager handling mmio devices (virtio net, block). pub fn new() -> MMIODeviceManager { MMIODeviceManager { - bus: crate::devices::Bus::new(), - id_to_dev_info: HashMap::new(), + virtio_devices: HashMap::new(), + boot_timer: None, + #[cfg(target_arch = "aarch64")] + rtc: None, + #[cfg(target_arch = "aarch64")] + serial: None, #[cfg(target_arch = "x86_64")] dsdt_data: vec![], } @@ -169,40 +187,25 @@ impl MMIODeviceManager { Ok(device_info) } - /// Register a device at some MMIO address. - fn register_mmio_device( - &mut self, - identifier: (DeviceType, String), - device_info: MMIODeviceInfo, - device: Arc>, - ) -> Result<(), MmioError> { - self.bus - .insert(device, device_info.addr, device_info.len) - .map_err(MmioError::BusInsert)?; - self.id_to_dev_info.insert(identifier, device_info); - Ok(()) - } - /// Register a virtio-over-MMIO device to be used via MMIO transport at a specific slot. pub fn register_mmio_virtio( &mut self, vm: &VmFd, device_id: String, - mmio_device: MmioTransport, - device_info: &MMIODeviceInfo, + mmio_bus: &vm_device::Bus, + device: MMIODevice, ) -> Result<(), MmioError> { // Our virtio devices are currently hardcoded to use a single IRQ. // Validate that requirement. - let Some(irq) = device_info.irq else { - return Err(MmioError::InvalidIrqConfig); - }; + let irq = device.resources.irq.ok_or(MmioError::InvalidIrqConfig)?; let identifier; { + let mmio_device = device.inner.lock().expect("Poisoned lock"); let locked_device = mmio_device.locked_device(); - identifier = (DeviceType::Virtio(locked_device.device_type()), device_id); + identifier = (locked_device.device_type(), device_id); for (i, queue_evt) in locked_device.queue_events().iter().enumerate() { let io_addr = IoEventAddress::Mmio( - device_info.addr + u64::from(crate::devices::virtio::NOTIFY_REG_OFFSET), + device.resources.addr + u64::from(crate::devices::virtio::NOTIFY_REG_OFFSET), ); vm.register_ioevent(queue_evt, &io_addr, u32::try_from(i).unwrap()) .map_err(MmioError::RegisterIoEvent)?; @@ -211,11 +214,14 @@ impl MMIODeviceManager { .map_err(MmioError::RegisterIrqFd)?; } - self.register_mmio_device( - identifier, - device_info.clone(), - Arc::new(Mutex::new(BusDevice::MmioTransport(mmio_device))), - ) + mmio_bus.insert( + device.inner.clone(), + device.resources.addr, + device.resources.len, + )?; + self.virtio_devices.insert(identifier, device); + + Ok(()) } /// Append a registered virtio-over-MMIO device to the kernel cmdline. @@ -246,24 +252,29 @@ impl MMIODeviceManager { vm: &VmFd, resource_allocator: &mut ResourceAllocator, device_id: String, + mmio_bus: &vm_device::Bus, mmio_device: MmioTransport, _cmdline: &mut kernel_cmdline::Cmdline, - ) -> Result { - let device_info = self.allocate_mmio_resources(resource_allocator, 1)?; - self.register_mmio_virtio(vm, device_id, mmio_device, &device_info)?; + ) -> Result<(), MmioError> { + let device = MMIODevice { + resources: self.allocate_mmio_resources(resource_allocator, 1)?, + inner: Arc::new(Mutex::new(mmio_device)), + }; + #[cfg(target_arch = "x86_64")] { - Self::add_virtio_device_to_cmdline(_cmdline, &device_info)?; + Self::add_virtio_device_to_cmdline(_cmdline, &device.resources)?; add_virtio_aml( &mut self.dsdt_data, - device_info.addr, - device_info.len, + device.resources.addr, + device.resources.len, // We are sure that `irqs` has at least one element; allocate_mmio_resources makes // sure of it. - device_info.irq.unwrap().get(), + device.resources.irq.unwrap().get(), )?; } - Ok(device_info) + self.register_mmio_virtio(vm, device_id, mmio_bus, device)?; + Ok(()) } #[cfg(target_arch = "aarch64")] @@ -272,8 +283,9 @@ impl MMIODeviceManager { pub fn register_mmio_serial( &mut self, vm: &VmFd, + mmio_bus: &vm_device::Bus, resource_allocator: &mut ResourceAllocator, - serial: Arc>, + serial: Arc>, device_info_opt: Option, ) -> Result<(), MmioError> { // Create a new MMIODeviceInfo object on boot path or unwrap the @@ -285,35 +297,40 @@ impl MMIODeviceManager { }; vm.register_irqfd( - serial - .lock() - .expect("Poisoned lock") - .serial_ref() - .unwrap() - .serial - .interrupt_evt(), + serial.lock().expect("Poisoned lock").serial.interrupt_evt(), device_info.irq.unwrap().get(), ) .map_err(MmioError::RegisterIrqFd)?; - let identifier = (DeviceType::Serial, DeviceType::Serial.to_string()); - // Register the newly created Serial object. - self.register_mmio_device(identifier, device_info, serial) + let device = MMIODevice { + resources: device_info, + inner: serial, + }; + + mmio_bus.insert( + device.inner.clone(), + device.resources.addr, + device.resources.len, + )?; + + self.serial = Some(device); + Ok(()) } #[cfg(target_arch = "aarch64")] /// Append the registered early console to the kernel cmdline. + /// + /// This assumes that the device has been registered with the device manager. pub fn add_mmio_serial_to_cmdline( &self, cmdline: &mut kernel_cmdline::Cmdline, ) -> Result<(), MmioError> { - let device_info = self - .id_to_dev_info - .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) - .ok_or(MmioError::DeviceNotFound)?; - cmdline - .insert("earlycon", &format!("uart,mmio,0x{:08x}", device_info.addr)) - .map_err(MmioError::Cmdline) + let device = self.serial.as_ref().unwrap(); + cmdline.insert( + "earlycon", + &format!("uart,mmio,0x{:08x}", device.resources.addr), + )?; + Ok(()) } #[cfg(target_arch = "aarch64")] @@ -321,8 +338,9 @@ impl MMIODeviceManager { /// given as parameter, otherwise allocate a new MMIO resources for it. pub fn register_mmio_rtc( &mut self, + mmio_bus: &vm_device::Bus, resource_allocator: &mut ResourceAllocator, - rtc: RTCDevice, + rtc: Arc>, device_info_opt: Option, ) -> Result<(), MmioError> { // Create a new MMIODeviceInfo object on boot path or unwrap the @@ -333,88 +351,62 @@ impl MMIODeviceManager { self.allocate_mmio_resources(resource_allocator, 1)? }; - // Create a new identifier for the RTC device. - let identifier = (DeviceType::Rtc, DeviceType::Rtc.to_string()); - // Attach the newly created RTC device. - self.register_mmio_device( - identifier, - device_info, - Arc::new(Mutex::new(BusDevice::RTCDevice(rtc))), - ) + let device = MMIODevice { + resources: device_info, + inner: rtc, + }; + + mmio_bus.insert( + device.inner.clone(), + device.resources.addr, + device.resources.len, + )?; + self.rtc = Some(device); + Ok(()) } /// Register a boot timer device. pub fn register_mmio_boot_timer( &mut self, + mmio_bus: &vm_device::Bus, resource_allocator: &mut ResourceAllocator, - device: BootTimer, + boot_timer: Arc>, ) -> Result<(), MmioError> { // Attach a new boot timer device. let device_info = self.allocate_mmio_resources(resource_allocator, 0)?; + let device = MMIODevice { + resources: device_info, + inner: boot_timer, + }; - let identifier = (DeviceType::BootTimer, DeviceType::BootTimer.to_string()); - self.register_mmio_device( - identifier, - device_info, - Arc::new(Mutex::new(BusDevice::BootTimer(device))), - ) - } + mmio_bus.insert( + device.inner.clone(), + device.resources.addr, + device.resources.len, + )?; + self.boot_timer = Some(device); - /// Gets the information of the devices registered up to some point in time. - pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MMIODeviceInfo> { - &self.id_to_dev_info + Ok(()) } /// Gets the specified device. - pub fn get_device( + pub fn get_virtio_device( &self, - device_type: DeviceType, + virtio_type: u32, device_id: &str, - ) -> Option<&Mutex> { - if let Some(device_info) = self - .id_to_dev_info - .get(&(device_type, device_id.to_string())) - { - if let Some((_, device)) = self.bus.get_device(device_info.addr) { - return Some(device); - } - } - None - } - - /// Run fn for each registered device. - pub fn for_each_device(&self, mut f: F) -> Result<(), E> - where - F: FnMut(&DeviceType, &String, &MMIODeviceInfo, &Mutex) -> Result<(), E>, - { - for ((device_type, device_id), device_info) in self.get_device_info().iter() { - let bus_device = self - .get_device(*device_type, device_id) - // Safe to unwrap() because we know the device exists. - .unwrap(); - f(device_type, device_id, device_info, bus_device)?; - } - Ok(()) + ) -> Option<&MMIODevice> { + self.virtio_devices + .get(&(virtio_type, device_id.to_string())) } /// Run fn for each registered virtio device. pub fn for_each_virtio_device(&self, mut f: F) -> Result<(), E> where - F: FnMut(u32, &String, &MMIODeviceInfo, Arc>) -> Result<(), E>, + F: FnMut(&u32, &String, &MMIODevice) -> Result<(), E>, { - self.for_each_device(|device_type, device_id, device_info, bus_device| { - if let Virtio(virtio_type) = device_type { - let virtio_device = bus_device - .lock() - .expect("Poisoned lock") - .mmio_transport_ref() - .expect("Unexpected device type") - .device(); - f(*virtio_type, device_id, device_info, virtio_device)?; - } - Ok(()) - })?; - + for ((virtio_type, device_id), mmio_device) in &self.virtio_devices { + f(virtio_type, device_id, mmio_device)?; + } Ok(()) } @@ -429,13 +421,8 @@ impl MMIODeviceManager { T: VirtioDevice + 'static + Debug, F: FnOnce(&mut T) -> Result<(), String>, { - if let Some(busdev) = self.get_device(DeviceType::Virtio(virtio_type), id) { - let virtio_device = busdev - .lock() - .expect("Poisoned lock") - .mmio_transport_ref() - .expect("Unexpected device type") - .device(); + if let Some(device) = self.get_virtio_device(virtio_type, id) { + let virtio_device = device.inner.lock().expect("Poisoned lock").device(); let mut dev = virtio_device.lock().expect("Poisoned lock"); f(dev .as_mut_any() @@ -452,78 +439,97 @@ impl MMIODeviceManager { pub fn kick_devices(&self) { info!("Artificially kick devices."); // We only kick virtio devices for now. - let _: Result<(), MmioError> = - self.for_each_virtio_device(|virtio_type, id, _info, dev| { - let mut virtio = dev.lock().expect("Poisoned lock"); - match virtio_type { - TYPE_BALLOON => { - let balloon = virtio.as_mut_any().downcast_mut::().unwrap(); - // If device is activated, kick the balloon queue(s) to make up for any - // pending or in-flight epoll events we may have not captured in snapshot. - // Stats queue doesn't need kicking as it is notified via a `timer_fd`. - if balloon.is_activated() { - info!("kick balloon {}.", id); - balloon.process_virtio_queues(); - } - } - TYPE_BLOCK => { - // We only care about kicking virtio block. - // If we need to kick vhost-user-block we can do nothing. - if let Some(block) = virtio.as_mut_any().downcast_mut::() { - // If device is activated, kick the block queue(s) to make up for any - // pending or in-flight epoll events we may have not captured in - // snapshot. No need to kick Ratelimiters - // because they are restored 'unblocked' so - // any inflight `timer_fd` events can be safely discarded. - if block.is_activated() { - info!("kick block {}.", id); - block.process_virtio_queues(); - } - } + let _: Result<(), MmioError> = self.for_each_virtio_device(|virtio_type, id, device| { + let mmio_transport_locked = device.inner.lock().expect("Poisoned locked"); + let mut virtio = mmio_transport_locked.locked_device(); + match *virtio_type { + TYPE_BALLOON => { + let balloon = virtio.as_mut_any().downcast_mut::().unwrap(); + // If device is activated, kick the balloon queue(s) to make up for any + // pending or in-flight epoll events we may have not captured in snapshot. + // Stats queue doesn't need kicking as it is notified via a `timer_fd`. + if balloon.is_activated() { + info!("kick balloon {}.", id); + balloon.process_virtio_queues(); } - TYPE_NET => { - let net = virtio.as_mut_any().downcast_mut::().unwrap(); - // If device is activated, kick the net queue(s) to make up for any - // pending or in-flight epoll events we may have not captured in snapshot. - // No need to kick Ratelimiters because they are restored 'unblocked' so + } + TYPE_BLOCK => { + // We only care about kicking virtio block. + // If we need to kick vhost-user-block we can do nothing. + if let Some(block) = virtio.as_mut_any().downcast_mut::() { + // If device is activated, kick the block queue(s) to make up for any + // pending or in-flight epoll events we may have not captured in + // snapshot. No need to kick Ratelimiters + // because they are restored 'unblocked' so // any inflight `timer_fd` events can be safely discarded. - if net.is_activated() { - info!("kick net {}.", id); - net.process_virtio_queues(); + if block.is_activated() { + info!("kick block {}.", id); + block.process_virtio_queues(); } } - TYPE_VSOCK => { - // Vsock has complicated protocol that isn't resilient to any packet loss, - // so for Vsock we don't support connection persistence through snapshot. - // Any in-flight packets or events are simply lost. - // Vsock is restored 'empty'. - // The only reason we still `kick` it is to make guest process - // `TRANSPORT_RESET_EVENT` event we sent during snapshot creation. - let vsock = virtio - .as_mut_any() - .downcast_mut::>() - .unwrap(); - if vsock.is_activated() { - info!("kick vsock {id}."); - vsock.signal_used_queue(0).unwrap(); - } + } + TYPE_NET => { + let net = virtio.as_mut_any().downcast_mut::().unwrap(); + // If device is activated, kick the net queue(s) to make up for any + // pending or in-flight epoll events we may have not captured in snapshot. + // No need to kick Ratelimiters because they are restored 'unblocked' so + // any inflight `timer_fd` events can be safely discarded. + if net.is_activated() { + info!("kick net {}.", id); + net.process_virtio_queues(); } - TYPE_RNG => { - let entropy = virtio.as_mut_any().downcast_mut::().unwrap(); - if entropy.is_activated() { - info!("kick entropy {id}."); - entropy.process_virtio_queues(); - } + } + TYPE_VSOCK => { + // Vsock has complicated protocol that isn't resilient to any packet loss, + // so for Vsock we don't support connection persistence through snapshot. + // Any in-flight packets or events are simply lost. + // Vsock is restored 'empty'. + // The only reason we still `kick` it is to make guest process + // `TRANSPORT_RESET_EVENT` event we sent during snapshot creation. + let vsock = virtio + .as_mut_any() + .downcast_mut::>() + .unwrap(); + if vsock.is_activated() { + info!("kick vsock {id}."); + vsock.signal_used_queue(0).unwrap(); } - _ => (), } - Ok(()) - }); + TYPE_RNG => { + let entropy = virtio.as_mut_any().downcast_mut::().unwrap(); + if entropy.is_activated() { + info!("kick entropy {id}."); + entropy.process_virtio_queues(); + } + } + _ => (), + } + Ok(()) + }); + } + + #[cfg(target_arch = "aarch64")] + pub fn virtio_device_info(&self) -> Vec<&MMIODeviceInfo> { + let mut device_info = Vec::new(); + for (_, dev) in self.virtio_devices.iter() { + device_info.push(&dev.resources); + } + device_info + } + + #[cfg(target_arch = "aarch64")] + pub fn rtc_device_info(&self) -> Option<&MMIODeviceInfo> { + self.rtc.as_ref().map(|device| &device.resources) + } + + #[cfg(target_arch = "aarch64")] + pub fn serial_device_info(&self) -> Option<&MMIODeviceInfo> { + self.serial.as_ref().map(|device| &device.resources) } } #[cfg(test)] -mod tests { +pub(crate) mod tests { use std::ops::Deref; use std::sync::Arc; @@ -531,7 +537,6 @@ mod tests { use vmm_sys_util::eventfd::EventFd; use super::*; - use crate::Vm; use crate::devices::virtio::ActivateError; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::queue::Queue; @@ -540,11 +545,12 @@ mod tests { use crate::test_utils::multi_region_mem_raw; use crate::vstate::kvm::Kvm; use crate::vstate::memory::{GuestAddress, GuestMemoryMmap}; + use crate::{Vm, arch}; const QUEUE_SIZES: &[u16] = &[64]; impl MMIODeviceManager { - fn register_virtio_test_device( + pub(crate) fn register_virtio_test_device( &mut self, vm: &VmFd, guest_mem: GuestMemoryMmap, @@ -554,30 +560,36 @@ mod tests { dev_id: &str, ) -> Result { let interrupt = Arc::new(IrqTrigger::new()); - let mmio_device = MmioTransport::new(guest_mem, interrupt, device, false); - let device_info = self.register_mmio_virtio_for_boot( + let mmio_bus = vm_device::Bus::new(); + let mmio_device = MmioTransport::new(guest_mem, interrupt, device.clone(), false); + self.register_mmio_virtio_for_boot( vm, resource_allocator, dev_id.to_string(), + &mmio_bus, mmio_device, cmdline, )?; - Ok(device_info.addr) + Ok(self + .get_virtio_device(device.lock().unwrap().device_type(), dev_id) + .unwrap() + .resources + .addr) } #[cfg(target_arch = "x86_64")] /// Gets the number of interrupts used by the devices registered. pub fn used_irqs_count(&self) -> usize { - self.get_device_info() + self.virtio_devices .iter() - .filter(|(_, device_info)| device_info.irq.is_some()) + .filter(|(_, mmio_dev)| mmio_dev.resources.irq.is_some()) .count() } } #[allow(dead_code)] #[derive(Debug)] - struct DummyDevice { + pub(crate) struct DummyDevice { dummy: u32, queues: Vec, queue_evts: [EventFd; 1], @@ -683,6 +695,29 @@ mod tests { "dummy", ) .unwrap(); + + assert!(device_manager.get_virtio_device(0, "foo").is_none()); + let dev = device_manager.get_virtio_device(0, "dummy").unwrap(); + assert_eq!(dev.resources.addr, arch::MMIO_MEM_START); + assert_eq!(dev.resources.len, MMIO_LEN); + assert_eq!( + dev.resources.irq, + Some(NonZeroU32::try_from(arch::IRQ_BASE).unwrap()) + ); + + device_manager + .for_each_virtio_device(|virtio_type, device_id, mmio_device| { + assert_eq!(*virtio_type, 0); + assert_eq!(device_id, "dummy"); + assert_eq!(mmio_device.resources.addr, arch::MMIO_MEM_START); + assert_eq!(mmio_device.resources.len, MMIO_LEN); + assert_eq!( + mmio_device.resources.irq, + Some(NonZeroU32::try_from(arch::IRQ_BASE).unwrap()) + ); + Ok::<(), ()>(()) + }) + .unwrap(); } #[test] @@ -773,29 +808,24 @@ mod tests { &id, ) .unwrap(); - assert!( - device_manager - .get_device(DeviceType::Virtio(type_id), &id) - .is_some() - ); + assert!(device_manager.get_virtio_device(type_id, &id).is_some()); assert_eq!( addr, - device_manager.id_to_dev_info[&(DeviceType::Virtio(type_id), id.clone())].addr + device_manager.virtio_devices[&(type_id, id.clone())] + .resources + .addr ); assert_eq!( crate::arch::IRQ_BASE, - device_manager.id_to_dev_info[&(DeviceType::Virtio(type_id), id)] + device_manager.virtio_devices[&(type_id, id)] + .resources .irq .unwrap() .get() ); let id = "bar"; - assert!( - device_manager - .get_device(DeviceType::Virtio(type_id), id) - .is_none() - ); + assert!(device_manager.get_virtio_device(type_id, id).is_none()); let dummy2 = Arc::new(Mutex::new(DummyDevice::new())); let id2 = String::from("foo2"); @@ -811,15 +841,16 @@ mod tests { .unwrap(); let mut count = 0; - let _: Result<(), MmioError> = device_manager.for_each_device(|devtype, devid, _, _| { - assert_eq!(*devtype, DeviceType::Virtio(type_id)); - match devid.as_str() { - "foo" => count += 1, - "foo2" => count += 2, - _ => unreachable!(), - }; - Ok(()) - }); + let _: Result<(), MmioError> = + device_manager.for_each_virtio_device(|devtype, devid, _| { + assert_eq!(*devtype, type_id); + match devid.as_str() { + "foo" => count += 1, + "foo2" => count += 2, + _ => unreachable!(), + }; + Ok(()) + }); assert_eq!(count, 3); #[cfg(target_arch = "x86_64")] assert_eq!(device_manager.used_irqs_count(), 2); diff --git a/src/vmm/src/device_manager/mod.rs b/src/vmm/src/device_manager/mod.rs index bc16604b645..3e3f0f0ffda 100644 --- a/src/vmm/src/device_manager/mod.rs +++ b/src/vmm/src/device_manager/mod.rs @@ -5,6 +5,38 @@ // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. +use std::fmt::Debug; +use std::sync::{Arc, Mutex}; + +use acpi::ACPIDeviceManager; +use event_manager::{MutEventSubscriber, SubscriberOps}; +use kvm_ioctls::VmFd; +#[cfg(target_arch = "x86_64")] +use legacy::{LegacyDeviceError, PortIODeviceManager}; +use linux_loader::loader::Cmdline; +use log::error; +use mmio::{MMIODeviceManager, MmioError}; +use persist::{ACPIDeviceManagerConstructorArgs, MMIODevManagerConstructorArgs}; +use resources::ResourceAllocator; +use serde::{Deserialize, Serialize}; +use utils::time::TimestampUs; +use vmm_sys_util::eventfd::EventFd; + +use crate::devices::acpi::vmgenid::{VmGenId, VmGenIdError}; +#[cfg(target_arch = "x86_64")] +use crate::devices::legacy::I8042Device; +#[cfg(target_arch = "aarch64")] +use crate::devices::legacy::RTCDevice; +use crate::devices::legacy::serial::SerialOut; +use crate::devices::legacy::{IER_RDA_BIT, IER_RDA_OFFSET, SerialDevice}; +use crate::devices::pseudo::BootTimer; +use crate::devices::virtio::device::VirtioDevice; +use crate::devices::virtio::transport::mmio::{IrqTrigger, MmioTransport}; +use crate::resources::VmResources; +use crate::snapshot::Persist; +use crate::vstate::memory::GuestMemoryMmap; +use crate::{EmulateSerialInitError, EventManager}; + /// ACPI device manager. pub mod acpi; /// Legacy Device Manager. @@ -15,3 +47,418 @@ pub mod mmio; pub mod persist; /// Resource manager for devices. pub mod resources; + +#[derive(Debug, thiserror::Error, displaydoc::Display)] +/// Error while creating a new [`DeviceManager`] +pub enum DeviceManagerCreateError { + /// Error with EventFd: {0} + EventFd(#[from] std::io::Error), + #[cfg(target_arch = "x86_64")] + /// Legacy device manager error: {0} + PortIOError(#[from] LegacyDeviceError), + /// Resource allocator error: {0} + ResourceAllocator(#[from] vm_allocator::Error), +} + +#[derive(Debug, thiserror::Error, displaydoc::Display)] +/// Error while attaching a VirtIO device +pub enum AttachMmioDeviceError { + /// MMIO transport error: {0} + MmioTransport(#[from] MmioError), + /// Error inserting device in bus: {0} + Bus(#[from] vm_device::BusError), +} + +#[derive(Debug, thiserror::Error, displaydoc::Display)] +/// Error while attaching the VMGenID device +pub enum AttachVmgenidError { + /// Error creating VMGenID device: {0} + CreateVmGenID(#[from] VmGenIdError), + /// Error while registering VMGenID with KVM: {0} + AttachVmGenID(#[from] kvm_ioctls::Error), +} + +#[cfg(target_arch = "aarch64")] +#[derive(Debug, thiserror::Error, displaydoc::Display)] +/// Error while attaching the VMGenID device +pub enum AttachLegacyMmioDeviceError { + /// Cmdline error + Cmdline, + /// Error creating serial device: {0} + CreateSerial(#[from] std::io::Error), + /// Error registering device: {0} + RegisterMMIODevice(#[from] MmioError), + /// Error inserting device in the Bus: {0} + Bus(#[from] vm_device::BusError), +} + +#[derive(Debug)] +/// A manager of all peripheral devices of Firecracker +pub struct DeviceManager { + /// Allocator for system memory and interrupt numbers + pub resource_allocator: ResourceAllocator, + /// MMIO bus + pub mmio_bus: Arc, + /// MMIO devices + pub mmio_devices: MMIODeviceManager, + #[cfg(target_arch = "x86_64")] + /// Port IO bus + pub pio_bus: Arc, + #[cfg(target_arch = "x86_64")] + /// Legacy devices + pub legacy_devices: PortIODeviceManager, + /// ACPI devices + pub acpi_devices: ACPIDeviceManager, +} + +impl DeviceManager { + // Adds `O_NONBLOCK` to the stdout flags. + fn set_stdout_nonblocking() { + // SAFETY: Call is safe since parameters are valid. + let flags = unsafe { libc::fcntl(libc::STDOUT_FILENO, libc::F_GETFL, 0) }; + if flags < 0 { + error!("Could not get Firecracker stdout flags."); + } + // SAFETY: Call is safe since parameters are valid. + let rc = + unsafe { libc::fcntl(libc::STDOUT_FILENO, libc::F_SETFL, flags | libc::O_NONBLOCK) }; + if rc < 0 { + error!("Could not set Firecracker stdout to non-blocking."); + } + } + + /// Sets up the serial device. + fn setup_serial_device( + event_manager: &mut EventManager, + ) -> Result>, std::io::Error> { + let serial = Arc::new(Mutex::new(SerialDevice::new( + Some(std::io::stdin()), + SerialOut::Stdout(std::io::stdout()), + )?)); + event_manager.add_subscriber(serial.clone()); + Ok(serial) + } + + #[cfg_attr(target_arch = "aarch64", allow(unused))] + pub fn new( + event_manager: &mut EventManager, + vcpu_exit_evt: &EventFd, + vmfd: &VmFd, + ) -> Result { + let mmio_bus = Arc::new(vm_device::Bus::new()); + + #[cfg(target_arch = "x86_64")] + let pio_bus = Arc::new(vm_device::Bus::new()); + #[cfg(target_arch = "x86_64")] + let legacy_devices = { + Self::set_stdout_nonblocking(); + + // Create serial device + let serial = Self::setup_serial_device(event_manager)?; + let reset_evt = vcpu_exit_evt + .try_clone() + .map_err(DeviceManagerCreateError::EventFd)?; + // Create keyboard emulator for reset event + let i8042 = Arc::new(Mutex::new(I8042Device::new(reset_evt)?)); + + // create pio dev manager with legacy devices + let mut legacy_devices = PortIODeviceManager::new(serial, i8042)?; + legacy_devices.register_devices(&pio_bus, vmfd)?; + legacy_devices + }; + + Ok(DeviceManager { + resource_allocator: ResourceAllocator::new()?, + mmio_bus, + mmio_devices: MMIODeviceManager::new(), + #[cfg(target_arch = "x86_64")] + pio_bus, + #[cfg(target_arch = "x86_64")] + legacy_devices, + acpi_devices: ACPIDeviceManager::new(), + }) + } + + /// Attaches a VirtioDevice device to the device manager and event manager. + pub(crate) fn attach_virtio_device( + &mut self, + mem: &GuestMemoryMmap, + vmfd: &VmFd, + id: String, + device: Arc>, + cmdline: &mut Cmdline, + is_vhost_user: bool, + ) -> Result<(), AttachMmioDeviceError> { + let interrupt = Arc::new(IrqTrigger::new()); + // The device mutex mustn't be locked here otherwise it will deadlock. + let device = MmioTransport::new(mem.clone(), interrupt, device, is_vhost_user); + self.mmio_devices.register_mmio_virtio_for_boot( + vmfd, + &mut self.resource_allocator, + id, + &self.mmio_bus, + device, + cmdline, + )?; + + Ok(()) + } + + /// Attaches a [`BootTimer`] to the VM + pub(crate) fn attach_boot_timer_device( + &mut self, + request_ts: TimestampUs, + ) -> Result<(), AttachMmioDeviceError> { + let boot_timer = Arc::new(Mutex::new(BootTimer::new(request_ts))); + + self.mmio_devices.register_mmio_boot_timer( + &self.mmio_bus, + &mut self.resource_allocator, + boot_timer, + )?; + + Ok(()) + } + + pub(crate) fn attach_vmgenid_device( + &mut self, + mem: &GuestMemoryMmap, + vmfd: &VmFd, + ) -> Result<(), AttachVmgenidError> { + let vmgenid = VmGenId::new(mem, &mut self.resource_allocator)?; + self.acpi_devices.attach_vmgenid(vmgenid, vmfd)?; + Ok(()) + } + + #[cfg(target_arch = "aarch64")] + pub(crate) fn attach_legacy_devices_aarch64( + &mut self, + vmfd: &VmFd, + event_manager: &mut EventManager, + cmdline: &mut Cmdline, + ) -> Result<(), AttachLegacyMmioDeviceError> { + // Serial device setup. + let cmdline_contains_console = cmdline + .as_cstring() + .map_err(|_| AttachLegacyMmioDeviceError::Cmdline)? + .into_string() + .map_err(|_| AttachLegacyMmioDeviceError::Cmdline)? + .contains("console="); + + if cmdline_contains_console { + // Make stdout non-blocking. + Self::set_stdout_nonblocking(); + let serial = Self::setup_serial_device(event_manager)?; + self.mmio_devices.register_mmio_serial( + vmfd, + &self.mmio_bus, + &mut self.resource_allocator, + serial, + None, + )?; + self.mmio_devices.add_mmio_serial_to_cmdline(cmdline)?; + } + + let rtc = Arc::new(Mutex::new(RTCDevice::new())); + self.mmio_devices.register_mmio_rtc( + &self.mmio_bus, + &mut self.resource_allocator, + rtc, + None, + )?; + Ok(()) + } +} + +#[derive(Debug, Default, Clone, Serialize, Deserialize)] +/// State of devices in the system +pub struct DevicesState { + /// MMIO devices state + pub mmio_state: persist::DeviceStates, + /// ACPI devices state + pub acpi_state: persist::ACPIDeviceManagerState, +} + +#[derive(Debug, thiserror::Error, displaydoc::Display)] +pub enum DevicePersistError { + /// Error restoring MMIO devices: {0} + MmioRestore(#[from] persist::DevicePersistError), + /// Error restoring ACPI devices: {0} + AcpiRestore(#[from] persist::ACPIDeviceManagerRestoreError), + /// Error notifying VMGenID device: {0} + VmGenidUpdate(#[from] std::io::Error), + /// Error resetting serial console: {0} + SerialRestore(#[from] EmulateSerialInitError), + /// Error inserting device in bus: {0} + Bus(#[from] vm_device::BusError), +} + +pub struct DeviceRestoreArgs<'a> { + pub mem: &'a GuestMemoryMmap, + pub vm: &'a VmFd, + pub event_manager: &'a mut EventManager, + pub vm_resources: &'a mut VmResources, + pub instance_id: &'a str, + pub restored_from_file: bool, +} + +impl DeviceManager { + pub fn save(&self) -> DevicesState { + DevicesState { + mmio_state: self.mmio_devices.save(), + acpi_state: self.acpi_devices.save(), + } + } + + /// Sets RDA bit in serial console + pub fn emulate_serial_init(&self) -> Result<(), EmulateSerialInitError> { + // When restoring from a previously saved state, there is no serial + // driver initialization, therefore the RDA (Received Data Available) + // interrupt is not enabled. Because of that, the driver won't get + // notified of any bytes that we send to the guest. The clean solution + // would be to save the whole serial device state when we do the vm + // serialization. For now we set that bit manually + + #[cfg(target_arch = "aarch64")] + { + if let Some(device) = &self.mmio_devices.serial { + let mut device_locked = device.inner.lock().expect("Poisoned lock"); + + device_locked + .serial + .write(IER_RDA_OFFSET, IER_RDA_BIT) + .map_err(|_| EmulateSerialInitError(std::io::Error::last_os_error()))?; + } + Ok(()) + } + + #[cfg(target_arch = "x86_64")] + { + let mut serial = self + .legacy_devices + .stdio_serial + .lock() + .expect("Poisoned lock"); + + serial + .serial + .write(IER_RDA_OFFSET, IER_RDA_BIT) + .map_err(|_| EmulateSerialInitError(std::io::Error::last_os_error()))?; + Ok(()) + } + } + + pub fn restore( + &mut self, + state: &DevicesState, + restore_args: DeviceRestoreArgs, + ) -> Result<(), DevicePersistError> { + // Restore MMIO devices + let mmio_ctor_args = MMIODevManagerConstructorArgs { + mmio_bus: &self.mmio_bus, + mem: restore_args.mem, + vm: restore_args.vm, + event_manager: restore_args.event_manager, + resource_allocator: &mut self.resource_allocator, + vm_resources: restore_args.vm_resources, + instance_id: restore_args.instance_id, + restored_from_file: restore_args.restored_from_file, + }; + self.mmio_devices = MMIODeviceManager::restore(mmio_ctor_args, &state.mmio_state)?; + + // Restore serial. + // We need to do that after we restore mmio devices, otherwise it won't succeed in Aarch64 + self.emulate_serial_init()?; + + // Restore ACPI devices + let acpi_ctor_args = ACPIDeviceManagerConstructorArgs { + mem: restore_args.mem, + resource_allocator: &mut self.resource_allocator, + vm: restore_args.vm, + }; + self.acpi_devices = ACPIDeviceManager::restore(acpi_ctor_args, &state.acpi_state)?; + self.acpi_devices.notify_vmgenid()?; + + Ok(()) + } +} + +#[cfg(test)] +pub(crate) mod tests { + use super::*; + #[cfg(target_arch = "aarch64")] + use crate::builder::tests::default_vmm; + + pub(crate) fn default_device_manager() -> DeviceManager { + let mmio_bus = Arc::new(vm_device::Bus::new()); + #[cfg(target_arch = "x86_64")] + let pio_bus = Arc::new(vm_device::Bus::new()); + let mmio_devices = MMIODeviceManager::new(); + let acpi_devices = ACPIDeviceManager::new(); + let resource_allocator = ResourceAllocator::new().unwrap(); + + #[cfg(target_arch = "x86_64")] + let legacy_devices = PortIODeviceManager::new( + Arc::new(Mutex::new( + SerialDevice::new(None, SerialOut::Sink(std::io::sink())).unwrap(), + )), + Arc::new(Mutex::new( + I8042Device::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()).unwrap(), + )), + ) + .unwrap(); + + DeviceManager { + resource_allocator, + mmio_bus, + mmio_devices, + #[cfg(target_arch = "x86_64")] + pio_bus, + #[cfg(target_arch = "x86_64")] + legacy_devices, + acpi_devices, + } + } + + #[cfg(target_arch = "aarch64")] + #[test] + fn test_attach_legacy_serial() { + let mut vmm = default_vmm(); + assert!(vmm.device_manager.mmio_devices.rtc.is_none()); + assert!(vmm.device_manager.mmio_devices.serial.is_none()); + + let mut cmdline = Cmdline::new(4096).unwrap(); + let mut event_manager = EventManager::new().unwrap(); + vmm.device_manager + .attach_legacy_devices_aarch64(vmm.vm.fd(), &mut event_manager, &mut cmdline) + .unwrap(); + assert!(vmm.device_manager.mmio_devices.rtc.is_some()); + assert!(vmm.device_manager.mmio_devices.serial.is_none()); + + let mut vmm = default_vmm(); + cmdline.insert("console", "/dev/blah").unwrap(); + vmm.device_manager + .attach_legacy_devices_aarch64(vmm.vm.fd(), &mut event_manager, &mut cmdline) + .unwrap(); + assert!(vmm.device_manager.mmio_devices.rtc.is_some()); + assert!(vmm.device_manager.mmio_devices.serial.is_some()); + + assert!( + cmdline + .as_cstring() + .unwrap() + .into_string() + .unwrap() + .contains(&format!( + "earlycon=uart,mmio,0x{:08x}", + vmm.device_manager + .mmio_devices + .serial + .as_ref() + .unwrap() + .resources + .addr + )) + ); + } +} diff --git a/src/vmm/src/device_manager/persist.rs b/src/vmm/src/device_manager/persist.rs index 2f331e644ad..99216ec77e7 100644 --- a/src/vmm/src/device_manager/persist.rs +++ b/src/vmm/src/device_manager/persist.rs @@ -19,6 +19,10 @@ use crate::EventManager; #[cfg(target_arch = "aarch64")] use crate::arch::DeviceType; use crate::devices::acpi::vmgenid::{VMGenIDState, VMGenIdConstructorArgs, VmGenId, VmGenIdError}; +#[cfg(target_arch = "aarch64")] +use crate::devices::legacy::serial::SerialOut; +#[cfg(target_arch = "aarch64")] +use crate::devices::legacy::{RTCDevice, SerialDevice}; use crate::devices::virtio::balloon::persist::{BalloonConstructorArgs, BalloonState}; use crate::devices::virtio::balloon::{Balloon, BalloonError}; use crate::devices::virtio::block::BlockError; @@ -59,9 +63,11 @@ pub enum DevicePersistError { DeviceManager(#[from] super::mmio::MmioError), /// Mmio transport MmioTransport, + /// Bus error: {0} + Bus(#[from] vm_device::BusError), #[cfg(target_arch = "aarch64")] /// Legacy: {0} - Legacy(#[from] crate::VmmError), + Legacy(#[from] std::io::Error), /// Net: {0} Net(#[from] NetError), /// Vsock: {0} @@ -208,6 +214,7 @@ pub enum SharedDeviceType { } pub struct MMIODevManagerConstructorArgs<'a> { + pub mmio_bus: &'a vm_device::Bus, pub mem: &'a GuestMemoryMmap, pub vm: &'a VmFd, pub event_manager: &'a mut EventManager, @@ -285,32 +292,29 @@ impl<'a> Persist<'a> for MMIODeviceManager { fn save(&self) -> Self::State { let mut states = DeviceStates::default(); - let _: Result<(), ()> = self.for_each_device(|devtype, devid, device_info, bus_dev| { - if *devtype == crate::arch::DeviceType::BootTimer { - // No need to save BootTimer state. - return Ok(()); - } - #[cfg(target_arch = "aarch64")] - { - if *devtype == DeviceType::Serial || *devtype == DeviceType::Rtc { - states.legacy_devices.push(ConnectedLegacyState { - type_: *devtype, - device_info: device_info.clone(), - }); - return Ok(()); - } + #[cfg(target_arch = "aarch64")] + { + if let Some(device) = &self.serial { + states.legacy_devices.push(ConnectedLegacyState { + type_: DeviceType::Serial, + device_info: device.resources, + }); } - let locked_bus_dev = bus_dev.lock().expect("Poisoned lock"); - - let mmio_transport = locked_bus_dev - .mmio_transport_ref() - .expect("Unexpected device type"); + if let Some(device) = &self.rtc { + states.legacy_devices.push(ConnectedLegacyState { + type_: DeviceType::Rtc, + device_info: device.resources, + }); + } + } - let transport_state = mmio_transport.save(); + let _: Result<(), ()> = self.for_each_virtio_device(|_, devid, device| { + let mmio_transport_locked = device.inner.lock().expect("Poisoned lock"); + let transport_state = mmio_transport_locked.save(); - let mut locked_device = mmio_transport.locked_device(); + let mut locked_device = mmio_transport_locked.locked_device(); match locked_device.device_type() { TYPE_BALLOON => { let balloon_state = locked_device @@ -322,7 +326,7 @@ impl<'a> Persist<'a> for MMIODeviceManager { device_id: devid.clone(), device_state: balloon_state, transport_state, - device_info: device_info.clone(), + device_info: device.resources, }); } // Both virtio-block and vhost-user-block share same device type. @@ -339,7 +343,7 @@ impl<'a> Persist<'a> for MMIODeviceManager { device_id: devid.clone(), device_state: block.save(), transport_state, - device_info: device_info.clone(), + device_info: device.resources, }) } } @@ -356,7 +360,7 @@ impl<'a> Persist<'a> for MMIODeviceManager { device_id: devid.clone(), device_state: net.save(), transport_state, - device_info: device_info.clone(), + device_info: device.resources, }); } TYPE_VSOCK => { @@ -385,7 +389,7 @@ impl<'a> Persist<'a> for MMIODeviceManager { device_id: devid.clone(), device_state: vsock_state, transport_state, - device_info: device_info.clone(), + device_info: device.resources, }); } TYPE_RNG => { @@ -398,7 +402,7 @@ impl<'a> Persist<'a> for MMIODeviceManager { device_id: devid.clone(), device_state: entropy.save(), transport_state, - device_info: device_info.clone(), + device_info: device.resources, }); } _ => unreachable!(), @@ -421,11 +425,13 @@ impl<'a> Persist<'a> for MMIODeviceManager { { for state in &state.legacy_devices { if state.type_ == DeviceType::Serial { - let serial = crate::builder::setup_serial_device( - constructor_args.event_manager, - std::io::stdin(), - std::io::stdout(), - )?; + let serial = Arc::new(Mutex::new(SerialDevice::new( + Some(std::io::stdin()), + SerialOut::Stdout(std::io::stdout()), + )?)); + constructor_args + .event_manager + .add_subscriber(serial.clone()); constructor_args .resource_allocator @@ -440,15 +446,14 @@ impl<'a> Persist<'a> for MMIODeviceManager { dev_manager.register_mmio_serial( vm, + constructor_args.mmio_bus, constructor_args.resource_allocator, serial, - Some(state.device_info.clone()), + Some(state.device_info), )?; } if state.type_ == DeviceType::Rtc { - let rtc = crate::devices::legacy::RTCDevice(vm_superio::Rtc::with_events( - &crate::devices::legacy::rtc_pl031::METRICS, - )); + let rtc = Arc::new(Mutex::new(RTCDevice::new())); constructor_args .resource_allocator .allocate_mmio_memory( @@ -460,9 +465,10 @@ impl<'a> Persist<'a> for MMIODeviceManager { DevicePersistError::DeviceManager(super::mmio::MmioError::Allocator(e)) })?; dev_manager.register_mmio_rtc( + constructor_args.mmio_bus, constructor_args.resource_allocator, rtc, - Some(state.device_info.clone()), + Some(state.device_info), )?; } } @@ -475,6 +481,7 @@ impl<'a> Persist<'a> for MMIODeviceManager { state: &MmioTransportState, interrupt: Arc, device_info: &MMIODeviceInfo, + mmio_bus: &vm_device::Bus, event_manager: &mut EventManager| -> Result<(), Self::Error> { let restore_args = MmioTransportConstructorArgs { @@ -483,8 +490,10 @@ impl<'a> Persist<'a> for MMIODeviceManager { device, is_vhost_user, }; - let mmio_transport = MmioTransport::restore(restore_args, state) - .map_err(|()| DevicePersistError::MmioTransport)?; + let mmio_transport = Arc::new(Mutex::new( + MmioTransport::restore(restore_args, state) + .map_err(|()| DevicePersistError::MmioTransport)?, + )); // We do not currently require exact re-allocation of IDs via // `dev_manager.irq_allocator.allocate_id()` and currently cannot do @@ -507,7 +516,15 @@ impl<'a> Persist<'a> for MMIODeviceManager { DevicePersistError::DeviceManager(super::mmio::MmioError::Allocator(e)) })?; - dev_manager.register_mmio_virtio(vm, id.clone(), mmio_transport, device_info)?; + dev_manager.register_mmio_virtio( + vm, + id.clone(), + mmio_bus, + MMIODevice { + resources: *device_info, + inner: mmio_transport, + }, + )?; event_manager.add_subscriber(as_subscriber); Ok(()) @@ -536,6 +553,7 @@ impl<'a> Persist<'a> for MMIODeviceManager { &balloon_state.transport_state, interrupt, &balloon_state.device_info, + constructor_args.mmio_bus, constructor_args.event_manager, )?; } @@ -562,6 +580,7 @@ impl<'a> Persist<'a> for MMIODeviceManager { &block_state.transport_state, interrupt, &block_state.device_info, + constructor_args.mmio_bus, constructor_args.event_manager, )?; } @@ -610,6 +629,7 @@ impl<'a> Persist<'a> for MMIODeviceManager { &net_state.transport_state, interrupt, &net_state.device_info, + constructor_args.mmio_bus, constructor_args.event_manager, )?; } @@ -641,6 +661,7 @@ impl<'a> Persist<'a> for MMIODeviceManager { &vsock_state.transport_state, interrupt, &vsock_state.device_info, + constructor_args.mmio_bus, constructor_args.event_manager, )?; } @@ -666,6 +687,7 @@ impl<'a> Persist<'a> for MMIODeviceManager { &entropy_state.transport_state, interrupt, &entropy_state.device_info, + constructor_args.mmio_bus, constructor_args.event_manager, )?; } @@ -725,30 +747,26 @@ mod tests { } } - impl MMIODeviceManager { - fn soft_clone(&self) -> Self { - // We can unwrap here as we create with values directly in scope we - // know will results in `Ok` - let mut clone = MMIODeviceManager::new(); - // We only care about the device hashmap. - clone.id_to_dev_info.clone_from(&self.id_to_dev_info); - clone + impl PartialEq for MMIODevice { + fn eq(&self, other: &Self) -> bool { + self.resources == other.resources } } impl PartialEq for MMIODeviceManager { fn eq(&self, other: &MMIODeviceManager) -> bool { // We only care about the device hashmap. - if self.id_to_dev_info.len() != other.id_to_dev_info.len() { + if self.virtio_devices.len() != other.virtio_devices.len() { return false; } - for (key, val) in &self.id_to_dev_info { - match other.id_to_dev_info.get(key) { + for (key, val) in &self.virtio_devices { + match other.virtio_devices.get(key) { Some(other_val) if val == other_val => continue, _ => return false, - }; + } } - true + + self.boot_timer == other.boot_timer } } @@ -761,7 +779,7 @@ mod tests { let mut resource_allocator = ResourceAllocator::new().unwrap(); tmp_sock_file.remove().unwrap(); // Set up a vmm with one of each device, and get the serialized DeviceStates. - let original_mmio_device_manager = { + { let mut event_manager = EventManager::new().expect("Unable to create EventManager"); let mut vmm = default_vmm(); let mut cmdline = default_kernel_cmdline(); @@ -811,11 +829,9 @@ mod tests { let entropy_config = EntropyDeviceConfig::default(); insert_entropy_device(&mut vmm, &mut cmdline, &mut event_manager, entropy_config); - Snapshot::serialize(&mut buf.as_mut_slice(), &vmm.mmio_device_manager.save()).unwrap(); + Snapshot::serialize(&mut buf.as_mut_slice(), &vmm.device_manager.save()).unwrap(); + } - // We only want to keep the device map from the original MmioDeviceManager. - vmm.mmio_device_manager.soft_clone() - }; tmp_sock_file.remove().unwrap(); let mut event_manager = EventManager::new().expect("Unable to create EventManager"); @@ -823,6 +839,7 @@ mod tests { let device_states: DeviceStates = Snapshot::deserialize(&mut buf.as_slice()).unwrap(); let vm_resources = &mut VmResources::default(); let restore_args = MMIODevManagerConstructorArgs { + mmio_bus: &vmm.device_manager.mmio_bus, mem: vmm.vm.guest_memory(), vm: vmm.vm.fd(), event_manager: &mut event_manager, @@ -831,7 +848,7 @@ mod tests { instance_id: "microvm-id", restored_from_file: true, }; - let restored_dev_manager = + let _restored_dev_manager = MMIODeviceManager::restore(restore_args, &device_states).unwrap(); let expected_vm_resources = format!( @@ -908,8 +925,6 @@ mod tests { MmdsVersion::V2 ); assert_eq!(device_states.mmds_version.unwrap(), MmdsVersion::V2.into()); - - assert_eq!(restored_dev_manager, original_mmio_device_manager); assert_eq!( expected_vm_resources, serde_json::to_string_pretty(&VmmConfig::from(&*vm_resources)).unwrap() diff --git a/src/vmm/src/devices/bus.rs b/src/vmm/src/devices/bus.rs deleted file mode 100644 index d0e1b296998..00000000000 --- a/src/vmm/src/devices/bus.rs +++ /dev/null @@ -1,404 +0,0 @@ -// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 -// -// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the THIRD-PARTY file. - -//! Handles routing to devices in an address space. - -use std::cmp::{Ord, Ordering, PartialEq, PartialOrd}; -use std::collections::btree_map::BTreeMap; -use std::sync::{Arc, Mutex}; - -/// Errors triggered during bus operations. -#[derive(Debug, thiserror::Error, displaydoc::Display)] -pub enum BusError { - /// New device overlaps with an old device. - Overlap, -} - -#[derive(Debug, Copy, Clone)] -struct BusRange(u64, u64); - -impl Eq for BusRange {} - -impl PartialEq for BusRange { - fn eq(&self, other: &BusRange) -> bool { - self.0 == other.0 - } -} - -impl Ord for BusRange { - fn cmp(&self, other: &BusRange) -> Ordering { - self.0.cmp(&other.0) - } -} - -impl PartialOrd for BusRange { - fn partial_cmp(&self, other: &BusRange) -> Option { - Some(self.cmp(other)) - } -} - -/// A device container for routing reads and writes over some address space. -/// -/// This doesn't have any restrictions on what kind of device or address space this applies to. The -/// only restriction is that no two devices can overlap in this address space. -#[derive(Debug, Clone, Default)] -pub struct Bus { - devices: BTreeMap>>, -} - -use event_manager::{EventOps, Events, MutEventSubscriber}; - -#[cfg(target_arch = "aarch64")] -use super::legacy::RTCDevice; -use super::legacy::{I8042Device, SerialDevice}; -use super::pseudo::BootTimer; -use super::virtio::transport::mmio::MmioTransport; - -#[derive(Debug)] -pub enum BusDevice { - I8042Device(I8042Device), - #[cfg(target_arch = "aarch64")] - RTCDevice(RTCDevice), - BootTimer(BootTimer), - MmioTransport(MmioTransport), - Serial(SerialDevice), - #[cfg(test)] - Dummy(DummyDevice), - #[cfg(test)] - Constant(ConstantDevice), -} - -#[cfg(test)] -#[derive(Debug)] -pub struct DummyDevice; - -#[cfg(test)] -impl DummyDevice { - pub fn bus_write(&mut self, _offset: u64, _data: &[u8]) {} - pub fn bus_read(&mut self, _offset: u64, _data: &[u8]) {} -} - -#[cfg(test)] -#[derive(Debug)] -pub struct ConstantDevice; - -#[cfg(test)] -impl ConstantDevice { - pub fn bus_read(&mut self, offset: u64, data: &mut [u8]) { - for (i, v) in data.iter_mut().enumerate() { - *v = ((offset + i as u64) & 0xff) as u8; - } - } - - fn bus_write(&mut self, offset: u64, data: &[u8]) { - for (i, v) in data.iter().enumerate() { - assert_eq!(*v, ((offset + i as u64) & 0xff) as u8) - } - } -} - -impl BusDevice { - pub fn i8042_device_ref(&self) -> Option<&I8042Device> { - match self { - Self::I8042Device(x) => Some(x), - _ => None, - } - } - #[cfg(target_arch = "aarch64")] - pub fn rtc_device_ref(&self) -> Option<&RTCDevice> { - match self { - Self::RTCDevice(x) => Some(x), - _ => None, - } - } - pub fn boot_timer_ref(&self) -> Option<&BootTimer> { - match self { - Self::BootTimer(x) => Some(x), - _ => None, - } - } - pub fn mmio_transport_ref(&self) -> Option<&MmioTransport> { - match self { - Self::MmioTransport(x) => Some(x), - _ => None, - } - } - pub fn serial_ref(&self) -> Option<&SerialDevice> { - match self { - Self::Serial(x) => Some(x), - _ => None, - } - } - - pub fn i8042_device_mut(&mut self) -> Option<&mut I8042Device> { - match self { - Self::I8042Device(x) => Some(x), - _ => None, - } - } - #[cfg(target_arch = "aarch64")] - pub fn rtc_device_mut(&mut self) -> Option<&mut RTCDevice> { - match self { - Self::RTCDevice(x) => Some(x), - _ => None, - } - } - pub fn boot_timer_mut(&mut self) -> Option<&mut BootTimer> { - match self { - Self::BootTimer(x) => Some(x), - _ => None, - } - } - pub fn mmio_transport_mut(&mut self) -> Option<&mut MmioTransport> { - match self { - Self::MmioTransport(x) => Some(x), - _ => None, - } - } - pub fn serial_mut(&mut self) -> Option<&mut SerialDevice> { - match self { - Self::Serial(x) => Some(x), - _ => None, - } - } - - pub fn read(&mut self, offset: u64, data: &mut [u8]) { - match self { - Self::I8042Device(x) => x.bus_read(offset, data), - #[cfg(target_arch = "aarch64")] - Self::RTCDevice(x) => x.bus_read(offset, data), - Self::BootTimer(x) => x.bus_read(offset, data), - Self::MmioTransport(x) => x.bus_read(offset, data), - Self::Serial(x) => x.bus_read(offset, data), - #[cfg(test)] - Self::Dummy(x) => x.bus_read(offset, data), - #[cfg(test)] - Self::Constant(x) => x.bus_read(offset, data), - } - } - - pub fn write(&mut self, offset: u64, data: &[u8]) { - match self { - Self::I8042Device(x) => x.bus_write(offset, data), - #[cfg(target_arch = "aarch64")] - Self::RTCDevice(x) => x.bus_write(offset, data), - Self::BootTimer(x) => x.bus_write(offset, data), - Self::MmioTransport(x) => x.bus_write(offset, data), - Self::Serial(x) => x.bus_write(offset, data), - #[cfg(test)] - Self::Dummy(x) => x.bus_write(offset, data), - #[cfg(test)] - Self::Constant(x) => x.bus_write(offset, data), - } - } -} - -impl MutEventSubscriber for BusDevice { - fn process(&mut self, event: Events, ops: &mut EventOps) { - match self { - Self::Serial(serial) => serial.process(event, ops), - _ => panic!(), - } - } - fn init(&mut self, ops: &mut EventOps) { - match self { - Self::Serial(serial) => serial.init(ops), - _ => panic!(), - } - } -} - -impl Bus { - /// Constructs an a bus with an empty address space. - pub fn new() -> Bus { - Bus { - devices: BTreeMap::new(), - } - } - - fn first_before(&self, addr: u64) -> Option<(BusRange, &Mutex)> { - // for when we switch to rustc 1.17: self.devices.range(..addr).iter().rev().next() - for (range, dev) in self.devices.iter().rev() { - if range.0 <= addr { - return Some((*range, dev)); - } - } - None - } - - /// Returns the device found at some address. - pub fn get_device(&self, addr: u64) -> Option<(u64, &Mutex)> { - if let Some((BusRange(start, len), dev)) = self.first_before(addr) { - let offset = addr - start; - if offset < len { - return Some((offset, dev)); - } - } - None - } - - /// Puts the given device at the given address space. - pub fn insert( - &mut self, - device: Arc>, - base: u64, - len: u64, - ) -> Result<(), BusError> { - if len == 0 { - return Err(BusError::Overlap); - } - - // Reject all cases where the new device's base is within an old device's range. - if self.get_device(base).is_some() { - return Err(BusError::Overlap); - } - - // The above check will miss an overlap in which the new device's base address is before the - // range of another device. To catch that case, we search for a device with a range before - // the new device's range's end. If there is no existing device in that range that starts - // after the new device, then there will be no overlap. - if let Some((BusRange(start, _), _)) = self.first_before(base + len - 1) { - // Such a device only conflicts with the new device if it also starts after the new - // device because of our initial `get_device` check above. - if start >= base { - return Err(BusError::Overlap); - } - } - - if self.devices.insert(BusRange(base, len), device).is_some() { - return Err(BusError::Overlap); - } - - Ok(()) - } - - /// Reads data from the device that owns the range containing `addr` and puts it into `data`. - /// - /// Returns true on success, otherwise `data` is untouched. - pub fn read(&self, addr: u64, data: &mut [u8]) -> bool { - if let Some((offset, dev)) = self.get_device(addr) { - // OK to unwrap as lock() failing is a serious error condition and should panic. - dev.lock() - .expect("Failed to acquire device lock") - .read(offset, data); - true - } else { - false - } - } - - /// Writes `data` to the device that owns the range containing `addr`. - /// - /// Returns true on success, otherwise `data` is untouched. - pub fn write(&self, addr: u64, data: &[u8]) -> bool { - if let Some((offset, dev)) = self.get_device(addr) { - // OK to unwrap as lock() failing is a serious error condition and should panic. - dev.lock() - .expect("Failed to acquire device lock") - .write(offset, data); - true - } else { - false - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn bus_insert() { - let mut bus = Bus::new(); - let dummy = Arc::new(Mutex::new(BusDevice::Dummy(DummyDevice))); - // Insert len should not be 0. - bus.insert(dummy.clone(), 0x10, 0).unwrap_err(); - bus.insert(dummy.clone(), 0x10, 0x10).unwrap(); - - let result = bus.insert(dummy.clone(), 0x0f, 0x10); - // This overlaps the address space of the existing bus device at 0x10. - assert!(matches!(result, Err(BusError::Overlap)), "{:?}", result); - - // This overlaps the address space of the existing bus device at 0x10. - bus.insert(dummy.clone(), 0x10, 0x10).unwrap_err(); - // This overlaps the address space of the existing bus device at 0x10. - bus.insert(dummy.clone(), 0x10, 0x15).unwrap_err(); - // This overlaps the address space of the existing bus device at 0x10. - bus.insert(dummy.clone(), 0x12, 0x15).unwrap_err(); - // This overlaps the address space of the existing bus device at 0x10. - bus.insert(dummy.clone(), 0x12, 0x01).unwrap_err(); - // This overlaps the address space of the existing bus device at 0x10. - bus.insert(dummy.clone(), 0x0, 0x20).unwrap_err(); - bus.insert(dummy.clone(), 0x20, 0x05).unwrap(); - bus.insert(dummy.clone(), 0x25, 0x05).unwrap(); - bus.insert(dummy, 0x0, 0x10).unwrap(); - } - - #[test] - fn bus_read_write() { - let mut bus = Bus::new(); - let dummy = Arc::new(Mutex::new(BusDevice::Dummy(DummyDevice))); - bus.insert(dummy, 0x10, 0x10).unwrap(); - assert!(bus.read(0x10, &mut [0, 0, 0, 0])); - assert!(bus.write(0x10, &[0, 0, 0, 0])); - assert!(bus.read(0x11, &mut [0, 0, 0, 0])); - assert!(bus.write(0x11, &[0, 0, 0, 0])); - assert!(bus.read(0x16, &mut [0, 0, 0, 0])); - assert!(bus.write(0x16, &[0, 0, 0, 0])); - assert!(!bus.read(0x20, &mut [0, 0, 0, 0])); - assert!(!bus.write(0x20, &[0, 0, 0, 0])); - assert!(!bus.read(0x06, &mut [0, 0, 0, 0])); - assert!(!bus.write(0x06, &[0, 0, 0, 0])); - } - - #[test] - fn bus_read_write_values() { - let mut bus = Bus::new(); - let dummy = Arc::new(Mutex::new(BusDevice::Constant(ConstantDevice))); - bus.insert(dummy, 0x10, 0x10).unwrap(); - - let mut values = [0, 1, 2, 3]; - assert!(bus.read(0x10, &mut values)); - assert_eq!(values, [0, 1, 2, 3]); - assert!(bus.write(0x10, &values)); - assert!(bus.read(0x15, &mut values)); - assert_eq!(values, [5, 6, 7, 8]); - assert!(bus.write(0x15, &values)); - } - - #[test] - fn busrange_cmp_and_clone() { - assert_eq!(BusRange(0x10, 2), BusRange(0x10, 3)); - assert_eq!(BusRange(0x10, 2), BusRange(0x10, 2)); - - assert!(BusRange(0x10, 2) < BusRange(0x12, 1)); - assert!(BusRange(0x10, 2) < BusRange(0x12, 3)); - - let mut bus = Bus::new(); - let mut data = [1, 2, 3, 4]; - bus.insert( - Arc::new(Mutex::new(BusDevice::Dummy(DummyDevice))), - 0x10, - 0x10, - ) - .unwrap(); - assert!(bus.write(0x10, &data)); - let bus_clone = bus.clone(); - assert!(bus.read(0x10, &mut data)); - assert_eq!(data, [1, 2, 3, 4]); - assert!(bus_clone.read(0x10, &mut data)); - assert_eq!(data, [1, 2, 3, 4]); - } - - #[test] - fn test_display_error() { - assert_eq!( - format!("{}", BusError::Overlap), - "New device overlaps with an old device." - ); - } -} diff --git a/src/vmm/src/devices/legacy/i8042.rs b/src/vmm/src/devices/legacy/i8042.rs index bcf7bdd8c90..235ce2a7339 100644 --- a/src/vmm/src/devices/legacy/i8042.rs +++ b/src/vmm/src/devices/legacy/i8042.rs @@ -7,6 +7,7 @@ use std::io; use std::num::Wrapping; +use std::sync::{Arc, Barrier}; use log::warn; use serde::Serialize; @@ -96,7 +97,7 @@ pub struct I8042Device { reset_evt: EventFd, /// Keyboard interrupt event (IRQ 1). - kbd_interrupt_evt: EventFd, + pub kbd_interrupt_evt: EventFd, /// The i8042 status register. status: u8, @@ -118,10 +119,10 @@ pub struct I8042Device { impl I8042Device { /// Constructs an i8042 device that will signal the given event when the guest requests it. - pub fn new(reset_evt: EventFd, kbd_interrupt_evt: EventFd) -> I8042Device { - I8042Device { + pub fn new(reset_evt: EventFd) -> Result { + Ok(I8042Device { reset_evt, - kbd_interrupt_evt, + kbd_interrupt_evt: EventFd::new(libc::EFD_NONBLOCK)?, control: CB_POST_OK | CB_KBD_INT, cmd: 0, outp: 0, @@ -129,7 +130,7 @@ impl I8042Device { buf: [0; BUF_SIZE], bhead: Wrapping(0), btail: Wrapping(0), - } + }) } /// Signal a ctrl-alt-del (reset) event. @@ -209,8 +210,8 @@ impl I8042Device { } } -impl I8042Device { - pub fn bus_read(&mut self, offset: u64, data: &mut [u8]) { +impl vm_device::BusDevice for I8042Device { + fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) { // All our ports are byte-wide. We don't know how to handle any wider data. if data.len() != 1 { METRICS.missed_read_count.inc(); @@ -245,11 +246,11 @@ impl I8042Device { } } - pub fn bus_write(&mut self, offset: u64, data: &[u8]) { + fn write(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option> { // All our ports are byte-wide. We don't know how to handle any wider data. if data.len() != 1 { METRICS.missed_write_count.inc(); - return; + return None; } let mut write_ok = true; @@ -335,11 +336,15 @@ impl I8042Device { } else { METRICS.missed_write_count.inc(); } + + None } } #[cfg(test)] mod tests { + use vm_device::BusDevice; + use super::*; impl PartialEq for I8042Error { @@ -350,17 +355,14 @@ mod tests { #[test] fn test_i8042_read_write_and_event() { - let mut i8042 = I8042Device::new( - EventFd::new(libc::EFD_NONBLOCK).unwrap(), - EventFd::new(libc::EFD_NONBLOCK).unwrap(), - ); + let mut i8042 = I8042Device::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()).unwrap(); let reset_evt = i8042.reset_evt.try_clone().unwrap(); // Check if reading in a 2-length array doesn't have side effects. let mut data = [1, 2]; - i8042.bus_read(0, &mut data); + i8042.read(0x0, 0, &mut data); assert_eq!(data, [1, 2]); - i8042.bus_read(1, &mut data); + i8042.read(0x0, 1, &mut data); assert_eq!(data, [1, 2]); // Check if reset works. @@ -368,72 +370,66 @@ mod tests { // counter doesn't change (for 0 it blocks). reset_evt.write(1).unwrap(); let mut data = [CMD_RESET_CPU]; - i8042.bus_write(OFS_STATUS, &data); + i8042.write(0x0, OFS_STATUS, &data); assert_eq!(reset_evt.read().unwrap(), 2); // Check if reading with offset 1 doesn't have side effects. - i8042.bus_read(1, &mut data); + i8042.read(0x0, 1, &mut data); assert_eq!(data[0], CMD_RESET_CPU); // Check invalid `write`s. let before = METRICS.missed_write_count.count(); // offset != 0. - i8042.bus_write(1, &data); + i8042.write(0x0, 1, &data); // data != CMD_RESET_CPU data[0] = CMD_RESET_CPU + 1; - i8042.bus_write(1, &data); + i8042.write(0x0, 1, &data); // data.len() != 1 let data = [CMD_RESET_CPU; 2]; - i8042.bus_write(1, &data); + i8042.write(0x0, 1, &data); assert_eq!(METRICS.missed_write_count.count(), before + 3); } #[test] fn test_i8042_commands() { - let mut i8042 = I8042Device::new( - EventFd::new(libc::EFD_NONBLOCK).unwrap(), - EventFd::new(libc::EFD_NONBLOCK).unwrap(), - ); + let mut i8042 = I8042Device::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()).unwrap(); let mut data = [1]; // Test reading/writing the control register. data[0] = CMD_WRITE_CTR; - i8042.bus_write(OFS_STATUS, &data); + i8042.write(0x0, OFS_STATUS, &data); assert_ne!(i8042.status & SB_I8042_CMD_DATA, 0); data[0] = 0x52; - i8042.bus_write(OFS_DATA, &data); + i8042.write(0x0, OFS_DATA, &data); data[0] = CMD_READ_CTR; - i8042.bus_write(OFS_STATUS, &data); + i8042.write(0x0, OFS_STATUS, &data); assert_ne!(i8042.status & SB_OUT_DATA_AVAIL, 0); - i8042.bus_read(OFS_DATA, &mut data); + i8042.read(0x0, OFS_DATA, &mut data); assert_eq!(data[0], 0x52); // Test reading/writing the output port. data[0] = CMD_WRITE_OUTP; - i8042.bus_write(OFS_STATUS, &data); + i8042.write(0x0, OFS_STATUS, &data); assert_ne!(i8042.status & SB_I8042_CMD_DATA, 0); data[0] = 0x52; - i8042.bus_write(OFS_DATA, &data); + i8042.write(0x0, OFS_DATA, &data); data[0] = CMD_READ_OUTP; - i8042.bus_write(OFS_STATUS, &data); + i8042.write(0x0, OFS_STATUS, &data); assert_ne!(i8042.status & SB_OUT_DATA_AVAIL, 0); - i8042.bus_read(OFS_DATA, &mut data); + i8042.read(0x0, OFS_DATA, &mut data); assert_eq!(data[0], 0x52); // Test kbd commands. data[0] = 0x52; - i8042.bus_write(OFS_DATA, &data); + i8042.write(0x0, OFS_DATA, &data); assert_ne!(i8042.status & SB_OUT_DATA_AVAIL, 0); - i8042.bus_read(OFS_DATA, &mut data); + i8042.read(0x0, OFS_DATA, &mut data); assert_eq!(data[0], 0xFA); } #[test] fn test_i8042_buffer() { - let mut i8042 = I8042Device::new( - EventFd::new(libc::EFD_NONBLOCK).unwrap(), - EventFd::new(libc::EFD_NONBLOCK).unwrap(), - ); + let mut i8042 = I8042Device::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()).unwrap(); // Test push/pop. i8042.push_byte(52).unwrap(); @@ -457,10 +453,7 @@ mod tests { #[test] fn test_i8042_kbd() { - let mut i8042 = I8042Device::new( - EventFd::new(libc::EFD_NONBLOCK).unwrap(), - EventFd::new(libc::EFD_NONBLOCK).unwrap(), - ); + let mut i8042 = I8042Device::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()).unwrap(); fn expect_key(i8042: &mut I8042Device, key: u16) { let mut data = [1]; @@ -470,13 +463,13 @@ mod tests { assert!(i8042.kbd_interrupt_evt.read().unwrap() > 1); // The "data available" flag should be on. - i8042.bus_read(OFS_STATUS, &mut data); + i8042.read(0x0, OFS_STATUS, &mut data); let mut key_byte: u8; if key & 0xFF00 != 0 { // For extended keys, we should be able to read the MSB first. key_byte = ((key & 0xFF00) >> 8) as u8; - i8042.bus_read(OFS_DATA, &mut data); + i8042.read(0x0, OFS_DATA, &mut data); assert_eq!(data[0], key_byte); // And then do the same for the LSB. @@ -485,10 +478,10 @@ mod tests { i8042.trigger_kbd_interrupt().unwrap(); assert!(i8042.kbd_interrupt_evt.read().unwrap() > 1); // The "data available" flag should be on. - i8042.bus_read(OFS_STATUS, &mut data); + i8042.read(0x0, OFS_STATUS, &mut data); } key_byte = (key & 0xFF) as u8; - i8042.bus_read(OFS_DATA, &mut data); + i8042.read(0x0, OFS_DATA, &mut data); assert_eq!(data[0], key_byte); } @@ -530,9 +523,9 @@ mod tests { // Test kbd interrupt disable. let mut data = [1]; data[0] = CMD_WRITE_CTR; - i8042.bus_write(OFS_STATUS, &data); + i8042.write(0x0, OFS_STATUS, &data); data[0] = i8042.control & !CB_KBD_INT; - i8042.bus_write(OFS_DATA, &data); + i8042.write(0x0, OFS_DATA, &data); i8042.trigger_key(KEY_CTRL).unwrap(); assert_eq!( i8042.trigger_kbd_interrupt().unwrap_err(), diff --git a/src/vmm/src/devices/legacy/rtc_pl031.rs b/src/vmm/src/devices/legacy/rtc_pl031.rs index 754899a23a4..b025c1d1512 100644 --- a/src/vmm/src/devices/legacy/rtc_pl031.rs +++ b/src/vmm/src/devices/legacy/rtc_pl031.rs @@ -4,6 +4,7 @@ use std::convert::TryInto; use serde::Serialize; +use vm_superio::Rtc; use vm_superio::rtc_pl031::RtcEvents; use crate::logger::{IncMetric, SharedIncMetric, warn}; @@ -59,7 +60,19 @@ pub static METRICS: RTCDeviceMetrics = RTCDeviceMetrics::new(); /// Wrapper over vm_superio's RTC implementation. #[derive(Debug)] -pub struct RTCDevice(pub vm_superio::Rtc<&'static RTCDeviceMetrics>); +pub struct RTCDevice(vm_superio::Rtc<&'static RTCDeviceMetrics>); + +impl Default for RTCDevice { + fn default() -> Self { + RTCDevice(Rtc::with_events(&METRICS)) + } +} + +impl RTCDevice { + pub fn new() -> RTCDevice { + Default::default() + } +} impl std::ops::Deref for RTCDevice { type Target = vm_superio::Rtc<&'static RTCDeviceMetrics>; @@ -80,7 +93,7 @@ impl RTCDevice { pub fn bus_read(&mut self, offset: u64, data: &mut [u8]) { if let (Ok(offset), 4) = (u16::try_from(offset), data.len()) { // read() function from RTC implementation expects a slice of - // len 4, and we just validated that this is the data lengt + // len 4, and we just validated that this is the data length self.read(offset, data.try_into().unwrap()) } else { warn!( @@ -108,6 +121,23 @@ impl RTCDevice { } } +#[cfg(target_arch = "aarch64")] +impl vm_device::BusDevice for RTCDevice { + fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) { + self.bus_read(offset, data) + } + + fn write( + &mut self, + _base: u64, + offset: u64, + data: &[u8], + ) -> Option> { + self.bus_write(offset, data); + None + } +} + #[cfg(test)] mod tests { use vm_superio::Rtc; diff --git a/src/vmm/src/devices/legacy/serial.rs b/src/vmm/src/devices/legacy/serial.rs index 278c15a4464..afc47189c1e 100644 --- a/src/vmm/src/devices/legacy/serial.rs +++ b/src/vmm/src/devices/legacy/serial.rs @@ -7,16 +7,18 @@ //! Implements a wrapper over an UART serial device. use std::fmt::Debug; -use std::io; -use std::io::{Read, Write}; +use std::io::{self, Read, Stdin, Write}; use std::os::unix::io::{AsRawFd, RawFd}; +use std::sync::{Arc, Barrier}; use event_manager::{EventOps, Events, MutEventSubscriber}; +use libc::EFD_NONBLOCK; use log::{error, warn}; use serde::Serialize; use vm_superio::serial::{Error as SerialError, SerialEvents}; use vm_superio::{Serial, Trigger}; use vmm_sys_util::epoll::EventSet; +use vmm_sys_util::eventfd::EventFd; use crate::devices::legacy::EventFdTrigger; use crate::logger::{IncMetric, SharedIncMetric}; @@ -220,7 +222,27 @@ impl SerialWrapper = SerialWrapper; +pub type SerialDevice = SerialWrapper; + +impl SerialDevice { + pub fn new(serial_in: Option, serial_out: SerialOut) -> Result { + let interrupt_evt = EventFdTrigger::new(EventFd::new(EFD_NONBLOCK)?); + let buffer_read_event_fd = EventFdTrigger::new(EventFd::new(EFD_NONBLOCK)?); + + let serial = Serial::with_events( + interrupt_evt, + SerialEventsWrapper { + buffer_ready_event_fd: Some(buffer_read_event_fd), + }, + serial_out, + ); + + Ok(SerialDevice { + serial, + input: serial_in, + }) + } +} impl MutEventSubscriber for SerialWrapper @@ -337,10 +359,11 @@ fn is_fifo(fd: RawFd) -> bool { (stat.st_mode & libc::S_IFIFO) != 0 } -impl - SerialWrapper +impl vm_device::BusDevice for SerialWrapper +where + I: Read + AsRawFd + Send, { - pub fn bus_read(&mut self, offset: u64, data: &mut [u8]) { + fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) { if let (Ok(offset), 1) = (u8::try_from(offset), data.len()) { data[0] = self.serial.read(offset); } else { @@ -348,7 +371,7 @@ impl } } - pub fn bus_write(&mut self, offset: u64, data: &[u8]) { + fn write(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option> { if let (Ok(offset), 1) = (u8::try_from(offset), data.len()) { if let Err(err) = self.serial.write(offset, data[0]) { // Counter incremented for any handle_write() error. @@ -358,6 +381,7 @@ impl } else { METRICS.missed_write_count.inc(); } + None } } @@ -365,6 +389,7 @@ impl mod tests { #![allow(clippy::undocumented_unsafe_blocks)] + use vm_device::BusDevice; use vmm_sys_util::eventfd::EventFd; use super::*; @@ -390,13 +415,13 @@ mod tests { let invalid_reads_before = metrics.missed_read_count.count(); let mut v = [0x00; 2]; - serial.bus_read(0u64, &mut v); + serial.read(0x0, 0u64, &mut v); let invalid_reads_after = metrics.missed_read_count.count(); assert_eq!(invalid_reads_before + 1, invalid_reads_after); let mut v = [0x00; 1]; - serial.bus_read(0u64, &mut v); + serial.read(0x0, 0u64, &mut v); assert_eq!(v[0], b'a'); let invalid_reads_after_2 = metrics.missed_read_count.count(); diff --git a/src/vmm/src/devices/mod.rs b/src/vmm/src/devices/mod.rs index 0ca445b6f82..15d78e04907 100644 --- a/src/vmm/src/devices/mod.rs +++ b/src/vmm/src/devices/mod.rs @@ -10,12 +10,10 @@ use std::io; pub mod acpi; -pub mod bus; pub mod legacy; pub mod pseudo; pub mod virtio; -pub use bus::{Bus, BusDevice, BusError}; use log::error; use crate::devices::virtio::net::metrics::NetDeviceMetrics; diff --git a/src/vmm/src/devices/pseudo/boot_timer.rs b/src/vmm/src/devices/pseudo/boot_timer.rs index ba16e92355f..f0cf38977b5 100644 --- a/src/vmm/src/devices/pseudo/boot_timer.rs +++ b/src/vmm/src/devices/pseudo/boot_timer.rs @@ -1,6 +1,8 @@ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 +use std::sync::{Arc, Barrier}; + use utils::time::TimestampUs; use crate::logger::info; @@ -8,16 +10,16 @@ use crate::logger::info; const MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE: u8 = 123; /// Pseudo device to record the kernel boot time. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct BootTimer { start_ts: TimestampUs, } -impl BootTimer { - pub fn bus_write(&mut self, offset: u64, data: &[u8]) { +impl vm_device::BusDevice for BootTimer { + fn write(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option> { // Only handle byte length instructions at a zero offset. if data.len() != 1 || offset != 0 { - return; + return None; } if data[0] == MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE { @@ -33,8 +35,11 @@ impl BootTimer { boot_time_cpu_us / 1000 ); } + + None } - pub fn bus_read(&mut self, _offset: u64, _data: &[u8]) {} + + fn read(&mut self, _base: u64, _offset: u64, _data: &mut [u8]) {} } impl BootTimer { diff --git a/src/vmm/src/devices/virtio/transport/mmio.rs b/src/vmm/src/devices/virtio/transport/mmio.rs index 5557c4c500e..9871cb0ed6e 100644 --- a/src/vmm/src/devices/virtio/transport/mmio.rs +++ b/src/vmm/src/devices/virtio/transport/mmio.rs @@ -7,7 +7,7 @@ use std::fmt::Debug; use std::sync::atomic::{AtomicU32, Ordering}; -use std::sync::{Arc, Mutex, MutexGuard}; +use std::sync::{Arc, Barrier, Mutex, MutexGuard}; use vmm_sys_util::eventfd::EventFd; @@ -47,7 +47,7 @@ const MMIO_VERSION: u32 = 2; /// /// Typically one page (4096 bytes) of MMIO address space is sufficient to handle this transport /// and inner virtio device. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct MmioTransport { device: Arc>, // The register where feature bits are stored. @@ -239,8 +239,8 @@ impl MmioTransport { } } -impl MmioTransport { - pub fn bus_read(&mut self, offset: u64, data: &mut [u8]) { +impl vm_device::BusDevice for MmioTransport { + fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { match offset { 0x00..=0xff if data.len() == 4 => { let v = match offset { @@ -294,12 +294,15 @@ impl MmioTransport { } 0x100..=0xfff => self.locked_device().read_config(offset - 0x100, data), _ => { - warn!("invalid virtio mmio read: {:#x}:{:#x}", offset, data.len()); + warn!( + "invalid virtio mmio read: {base:#x}:{offset:#x}:{:#x}", + data.len() + ); } }; } - pub fn bus_write(&mut self, offset: u64, data: &[u8]) { + fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option> { fn hi(v: &mut GuestAddress, x: u32) { *v = (*v & 0xffff_ffff) | (u64::from(x) << 32) } @@ -361,9 +364,13 @@ impl MmioTransport { } } _ => { - warn!("invalid virtio mmio write: {:#x}:{:#x}", offset, data.len()); + warn!( + "invalid virtio mmio write: {base:#x}:{offset:#x}:{:#x}", + data.len() + ); } } + None } } @@ -462,6 +469,7 @@ pub(crate) mod tests { use std::ops::Deref; + use vm_device::BusDevice; use vmm_sys_util::eventfd::EventFd; use super::*; @@ -574,7 +582,7 @@ pub(crate) mod tests { fn set_device_status(d: &mut MmioTransport, status: u32) { let mut buf = [0; 4]; write_le_u32(&mut buf[..], status); - d.bus_write(0x70, &buf[..]); + d.write(0x0, 0x70, &buf[..]); } #[test] @@ -626,7 +634,7 @@ pub(crate) mod tests { // The following read shouldn't be valid, because the length of the buf is not 4. buf.push(0); - d.bus_read(0, &mut buf[..]); + d.read(0x0, 0, &mut buf[..]); assert_eq!(buf[..4], buf_copy[..]); // the length is ok again @@ -634,74 +642,74 @@ pub(crate) mod tests { // Now we test that reading at various predefined offsets works as intended. - d.bus_read(0, &mut buf[..]); + d.read(0x0, 0, &mut buf[..]); assert_eq!(read_le_u32(&buf[..]), MMIO_MAGIC_VALUE); - d.bus_read(0x04, &mut buf[..]); + d.read(0x0, 0x04, &mut buf[..]); assert_eq!(read_le_u32(&buf[..]), MMIO_VERSION); - d.bus_read(0x08, &mut buf[..]); + d.read(0x0, 0x08, &mut buf[..]); assert_eq!(read_le_u32(&buf[..]), d.locked_device().device_type()); - d.bus_read(0x0c, &mut buf[..]); + d.read(0x0, 0x0c, &mut buf[..]); assert_eq!(read_le_u32(&buf[..]), VENDOR_ID); d.features_select = 0; - d.bus_read(0x10, &mut buf[..]); + d.read(0x0, 0x10, &mut buf[..]); assert_eq!( read_le_u32(&buf[..]), d.locked_device().avail_features_by_page(0) ); d.features_select = 1; - d.bus_read(0x10, &mut buf[..]); + d.read(0x0, 0x10, &mut buf[..]); assert_eq!( read_le_u32(&buf[..]), d.locked_device().avail_features_by_page(0) | 0x1 ); - d.bus_read(0x34, &mut buf[..]); + d.read(0x0, 0x34, &mut buf[..]); assert_eq!(read_le_u32(&buf[..]), 16); - d.bus_read(0x44, &mut buf[..]); + d.read(0x0, 0x44, &mut buf[..]); assert_eq!(read_le_u32(&buf[..]), u32::from(false)); d.interrupt.irq_status.store(111, Ordering::SeqCst); - d.bus_read(0x60, &mut buf[..]); + d.read(0x0, 0x60, &mut buf[..]); assert_eq!(read_le_u32(&buf[..]), 111); d.is_vhost_user = true; - d.interrupt.irq_status.store(0, Ordering::SeqCst); - d.bus_read(0x60, &mut buf[..]); + d.interrupt.status().store(0, Ordering::SeqCst); + d.read(0x0, 0x60, &mut buf[..]); assert_eq!(read_le_u32(&buf[..]), VIRTIO_MMIO_INT_VRING); d.is_vhost_user = true; d.interrupt .irq_status .store(VIRTIO_MMIO_INT_CONFIG, Ordering::SeqCst); - d.bus_read(0x60, &mut buf[..]); + d.read(0x0, 0x60, &mut buf[..]); assert_eq!(read_le_u32(&buf[..]), VIRTIO_MMIO_INT_CONFIG); - d.bus_read(0x70, &mut buf[..]); + d.read(0x0, 0x70, &mut buf[..]); assert_eq!(read_le_u32(&buf[..]), 0); d.config_generation = 5; - d.bus_read(0xfc, &mut buf[..]); + d.read(0x0, 0xfc, &mut buf[..]); assert_eq!(read_le_u32(&buf[..]), 5); // This read shouldn't do anything, as it's past the readable generic registers, and // before the device specific configuration space. Btw, reads from the device specific // conf space are going to be tested a bit later, alongside writes. buf = buf_copy.to_vec(); - d.bus_read(0xfd, &mut buf[..]); + d.read(0x0, 0xfd, &mut buf[..]); assert_eq!(buf[..], buf_copy[..]); // Read from an invalid address in generic register range. - d.bus_read(0xfb, &mut buf[..]); + d.read(0x0, 0xfb, &mut buf[..]); assert_eq!(buf[..], buf_copy[..]); // Read from an invalid length in generic register range. - d.bus_read(0xfc, &mut buf[..3]); + d.read(0x0, 0xfc, &mut buf[..3]); assert_eq!(buf[..], buf_copy[..]); } @@ -717,7 +725,7 @@ pub(crate) mod tests { // Nothing should happen, because the slice len > 4. d.features_select = 0; - d.bus_write(0x14, &buf[..]); + d.write(0x0, 0x14, &buf[..]); assert_eq!(d.features_select, 0); buf.pop(); @@ -729,7 +737,7 @@ pub(crate) mod tests { assert_eq!(d.locked_device().acked_features(), 0x0); d.acked_features_select = 0x0; write_le_u32(&mut buf[..], 1); - d.bus_write(0x20, &buf[..]); + d.write(0x0, 0x20, &buf[..]); assert_eq!(d.locked_device().acked_features(), 0x0); // Write to device specific configuration space should be ignored before setting @@ -738,8 +746,8 @@ pub(crate) mod tests { for i in (0..0xeff).rev() { let mut buf2 = vec![0; 0xeff]; - d.bus_write(0x100 + i as u64, &buf1[i..]); - d.bus_read(0x100, &mut buf2[..]); + d.write(0x0, 0x100 + i as u64, &buf1[i..]); + d.read(0x0, 0x100, &mut buf2[..]); for item in buf2.iter().take(0xeff) { assert_eq!(*item, 0); @@ -755,7 +763,7 @@ pub(crate) mod tests { // now writes should work d.features_select = 0; write_le_u32(&mut buf[..], 1); - d.bus_write(0x14, &buf[..]); + d.write(0x0, 0x14, &buf[..]); assert_eq!(d.features_select, 1); // Test acknowledging features on bus. @@ -764,12 +772,12 @@ pub(crate) mod tests { // Set the device available features in order to make acknowledging possible. dummy_dev.lock().unwrap().set_avail_features(0x124); - d.bus_write(0x20, &buf[..]); + d.write(0x0, 0x20, &buf[..]); assert_eq!(d.locked_device().acked_features(), 0x124); d.acked_features_select = 0; write_le_u32(&mut buf[..], 2); - d.bus_write(0x24, &buf[..]); + d.write(0x0, 0x24, &buf[..]); assert_eq!(d.acked_features_select, 2); set_device_status( &mut d, @@ -780,31 +788,31 @@ pub(crate) mod tests { assert_eq!(d.locked_device().acked_features(), 0x124); d.acked_features_select = 0x0; write_le_u32(&mut buf[..], 1); - d.bus_write(0x20, &buf[..]); + d.write(0x0, 0x20, &buf[..]); assert_eq!(d.locked_device().acked_features(), 0x124); // Setup queues d.queue_select = 0; write_le_u32(&mut buf[..], 3); - d.bus_write(0x30, &buf[..]); + d.write(0x0, 0x30, &buf[..]); assert_eq!(d.queue_select, 3); d.queue_select = 0; assert_eq!(d.locked_device().queues()[0].size, 0); write_le_u32(&mut buf[..], 16); - d.bus_write(0x38, &buf[..]); + d.write(0x0, 0x38, &buf[..]); assert_eq!(d.locked_device().queues()[0].size, 16); assert!(!d.locked_device().queues()[0].ready); write_le_u32(&mut buf[..], 1); - d.bus_write(0x44, &buf[..]); + d.write(0x0, 0x44, &buf[..]); assert!(d.locked_device().queues()[0].ready); assert_eq!(d.locked_device().queues()[0].desc_table_address.0, 0); write_le_u32(&mut buf[..], 123); - d.bus_write(0x80, &buf[..]); + d.write(0x0, 0x80, &buf[..]); assert_eq!(d.locked_device().queues()[0].desc_table_address.0, 123); - d.bus_write(0x84, &buf[..]); + d.write(0x0, 0x84, &buf[..]); assert_eq!( d.locked_device().queues()[0].desc_table_address.0, 123 + (123 << 32) @@ -812,9 +820,9 @@ pub(crate) mod tests { assert_eq!(d.locked_device().queues()[0].avail_ring_address.0, 0); write_le_u32(&mut buf[..], 124); - d.bus_write(0x90, &buf[..]); + d.write(0x0, 0x90, &buf[..]); assert_eq!(d.locked_device().queues()[0].avail_ring_address.0, 124); - d.bus_write(0x94, &buf[..]); + d.write(0x0, 0x94, &buf[..]); assert_eq!( d.locked_device().queues()[0].avail_ring_address.0, 124 + (124 << 32) @@ -822,9 +830,9 @@ pub(crate) mod tests { assert_eq!(d.locked_device().queues()[0].used_ring_address.0, 0); write_le_u32(&mut buf[..], 125); - d.bus_write(0xa0, &buf[..]); + d.write(0x0, 0xa0, &buf[..]); assert_eq!(d.locked_device().queues()[0].used_ring_address.0, 125); - d.bus_write(0xa4, &buf[..]); + d.write(0x0, 0xa4, &buf[..]); assert_eq!( d.locked_device().queues()[0].used_ring_address.0, 125 + (125 << 32) @@ -840,17 +848,17 @@ pub(crate) mod tests { d.interrupt.irq_status.store(0b10_1010, Ordering::Relaxed); write_le_u32(&mut buf[..], 0b111); - d.bus_write(0x64, &buf[..]); + d.write(0x0, 0x64, &buf[..]); assert_eq!(d.interrupt.irq_status.load(Ordering::Relaxed), 0b10_1000); // Write to an invalid address in generic register range. write_le_u32(&mut buf[..], 0xf); d.config_generation = 0; - d.bus_write(0xfb, &buf[..]); + d.write(0x0, 0xfb, &buf[..]); assert_eq!(d.config_generation, 0); // Write to an invalid length in generic register range. - d.bus_write(0xfc, &buf[..2]); + d.write(0x0, 0xfc, &buf[..2]); assert_eq!(d.config_generation, 0); // Here we test writes/read into/from the device specific configuration space. @@ -858,8 +866,8 @@ pub(crate) mod tests { for i in (0..0xeff).rev() { let mut buf2 = vec![0; 0xeff]; - d.bus_write(0x100 + i as u64, &buf1[i..]); - d.bus_read(0x100, &mut buf2[..]); + d.write(0x0, 0x100 + i as u64, &buf1[i..]); + d.read(0x0, 0x100, &mut buf2[..]); for item in buf2.iter().take(i) { assert_eq!(*item, 0); @@ -915,9 +923,9 @@ pub(crate) mod tests { for q in 0..queue_len { d.queue_select = q.try_into().unwrap(); write_le_u32(&mut buf[..], 16); - d.bus_write(0x38, &buf[..]); + d.write(0x0, 0x38, &buf[..]); write_le_u32(&mut buf[..], 1); - d.bus_write(0x44, &buf[..]); + d.write(0x0, 0x44, &buf[..]); } assert!(d.are_queues_valid()); assert!(!d.locked_device().is_activated()); @@ -925,8 +933,8 @@ pub(crate) mod tests { // Device should be ready for activation now. // A couple of invalid writes; will trigger warnings; shouldn't activate the device. - d.bus_write(0xa8, &buf[..]); - d.bus_write(0x1000, &buf[..]); + d.write(0x0, 0xa8, &buf[..]); + d.write(0x0, 0x1000, &buf[..]); assert!(!d.locked_device().is_activated()); set_device_status( @@ -949,8 +957,8 @@ pub(crate) mod tests { // a warning path and have no effect on queue state. write_le_u32(&mut buf[..], 0); d.queue_select = 0; - d.bus_write(0x44, &buf[..]); - d.bus_read(0x44, &mut buf[..]); + d.write(0x0, 0x44, &buf[..]); + d.read(0x0, 0x44, &mut buf[..]); assert_eq!(read_le_u32(&buf[..]), 1); } @@ -976,9 +984,9 @@ pub(crate) mod tests { for q in 0..queue_len { d.queue_select = q.try_into().unwrap(); write_le_u32(&mut buf[..], 16); - d.bus_write(0x38, &buf[..]); + d.write(0x0, 0x38, &buf[..]); write_le_u32(&mut buf[..], 1); - d.bus_write(0x44, &buf[..]); + d.write(0x0, 0x44, &buf[..]); } assert!(d.are_queues_valid()); @@ -1023,9 +1031,9 @@ pub(crate) mod tests { for q in 0..queues_count { d.queue_select = q.try_into().unwrap(); write_le_u32(&mut buf[..], 16); - d.bus_write(0x38, &buf[..]); + d.write(0x0, 0x38, &buf[..]); write_le_u32(&mut buf[..], 1); - d.bus_write(0x44, &buf[..]); + d.write(0x0, 0x44, &buf[..]); } assert!(d.are_queues_valid()); assert!(!d.locked_device().is_activated()); @@ -1067,13 +1075,13 @@ pub(crate) mod tests { // Marking device as FAILED should not affect device_activated state write_le_u32(&mut buf[..], 0x8f); - d.bus_write(0x70, &buf[..]); + d.write(0x0, 0x70, &buf[..]); assert_eq!(d.device_status, 0x8f); assert!(d.locked_device().is_activated()); // Nothing happens when backend driver doesn't support reset write_le_u32(&mut buf[..], 0x0); - d.bus_write(0x70, &buf[..]); + d.write(0x0, 0x70, &buf[..]); assert_eq!(d.device_status, 0x8f); assert!(d.locked_device().is_activated()); } diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 29f3b0148ac..30104890e7d 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -121,9 +121,9 @@ use std::sync::mpsc::RecvTimeoutError; use std::sync::{Arc, Barrier, Mutex}; use std::time::Duration; -use device_manager::acpi::ACPIDeviceManager; -use device_manager::resources::ResourceAllocator; +use device_manager::DeviceManager; use devices::acpi::vmgenid::VmGenIdError; +use devices::virtio::device::VirtioDevice; use event_manager::{EventManager as BaseEventManager, EventOps, Events, MutEventSubscriber}; use seccomp::BpfProgram; use userfaultfd::Uffd; @@ -133,12 +133,7 @@ use vmm_sys_util::terminal::Terminal; use vstate::kvm::Kvm; use vstate::vcpu::{self, StartThreadedError, VcpuSendEventError}; -use crate::arch::DeviceType; use crate::cpu_config::templates::CpuConfiguration; -#[cfg(target_arch = "x86_64")] -use crate::device_manager::legacy::PortIODeviceManager; -use crate::device_manager::mmio::MMIODeviceManager; -use crate::devices::legacy::{IER_RDA_BIT, IER_RDA_OFFSET}; use crate::devices::virtio::balloon::{ BALLOON_DEV_ID, Balloon, BalloonConfig, BalloonError, BalloonStats, }; @@ -148,7 +143,6 @@ use crate::devices::virtio::{TYPE_BALLOON, TYPE_BLOCK, TYPE_NET}; use crate::logger::{METRICS, MetricsError, error, info, warn}; use crate::persist::{MicrovmState, MicrovmStateError, VmInfo}; use crate::rate_limiter::BucketUpdate; -use crate::snapshot::Persist; use crate::vmm_config::instance_info::{InstanceInfo, VmState}; use crate::vstate::memory::{GuestMemory, GuestMemoryMmap, GuestMemoryRegion}; use crate::vstate::vcpu::VcpuState; @@ -205,17 +199,15 @@ pub const HTTP_MAX_PAYLOAD_SIZE: usize = 51200; /// have permissions to open the KVM fd). #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum VmmError { - /// Failed to allocate guest resource: {0} - AllocateResources(#[from] vm_allocator::Error), #[cfg(target_arch = "aarch64")] /// Invalid command line error. Cmdline, /// Device manager error: {0} - DeviceManager(device_manager::mmio::MmioError), + DeviceManager(#[from] device_manager::DeviceManagerCreateError), + /// MMIO Device manager error: {0} + MmioDeviceManager(device_manager::mmio::MmioError), /// Error getting the KVM dirty bitmap. {0} DirtyBitmap(kvm_ioctls::Error), - /// Event fd error: {0} - EventFd(io::Error), /// I8042 error: {0} I8042Error(devices::legacy::I8042DeviceError), #[cfg(target_arch = "x86_64")] @@ -313,14 +305,8 @@ pub struct Vmm { vcpus_handles: Vec, // Used by Vcpus and devices to initiate teardown; Vmm should never write here. vcpus_exit_evt: EventFd, - - // Allocator for guest resources - resource_allocator: ResourceAllocator, - // Guest VM devices. - mmio_device_manager: MMIODeviceManager, - #[cfg(target_arch = "x86_64")] - pio_device_manager: PortIODeviceManager, - acpi_device_manager: ACPIDeviceManager, + // Device manager + device_manager: DeviceManager, } impl Vmm { @@ -340,12 +326,17 @@ impl Vmm { } /// Gets the specified bus device. - pub fn get_bus_device( + pub fn get_virtio_device( &self, - device_type: DeviceType, + device_type: u32, device_id: &str, - ) -> Option<&Mutex> { - self.mmio_device_manager.get_device(device_type, device_id) + ) -> Option>> { + let device = self + .device_manager + .mmio_devices + .get_virtio_device(device_type, device_id)?; + + Some(device.inner.lock().expect("Poisoned lock").device().clone()) } /// Starts the microVM vcpus. @@ -380,10 +371,10 @@ impl Vmm { self.vcpus_handles.reserve(vcpu_count); for mut vcpu in vcpus.drain(..) { - vcpu.set_mmio_bus(self.mmio_device_manager.bus.clone()); + vcpu.set_mmio_bus(self.device_manager.mmio_bus.clone()); #[cfg(target_arch = "x86_64")] vcpu.kvm_vcpu - .set_pio_bus(self.pio_device_manager.io_bus.clone()); + .set_pio_bus(self.device_manager.pio_bus.clone()); self.vcpus_handles .push(vcpu.start_threaded(vcpu_seccomp_filter.clone(), barrier.clone())?); @@ -397,7 +388,7 @@ impl Vmm { /// Sends a resume command to the vCPUs. pub fn resume_vm(&mut self) -> Result<(), VmmError> { - self.mmio_device_manager.kick_devices(); + self.device_manager.mmio_devices.kick_devices(); // Send the events. self.vcpus_handles @@ -441,60 +432,14 @@ impl Vmm { Ok(()) } - /// Sets RDA bit in serial console - pub fn emulate_serial_init(&self) -> Result<(), EmulateSerialInitError> { - // When restoring from a previously saved state, there is no serial - // driver initialization, therefore the RDA (Received Data Available) - // interrupt is not enabled. Because of that, the driver won't get - // notified of any bytes that we send to the guest. The clean solution - // would be to save the whole serial device state when we do the vm - // serialization. For now we set that bit manually - - #[cfg(target_arch = "aarch64")] - { - let serial_bus_device = self.get_bus_device(DeviceType::Serial, "Serial"); - if serial_bus_device.is_none() { - return Ok(()); - } - let mut serial_device_locked = - serial_bus_device.unwrap().lock().expect("Poisoned lock"); - let serial = serial_device_locked - .serial_mut() - .expect("Unexpected BusDeviceType"); - - serial - .serial - .write(IER_RDA_OFFSET, IER_RDA_BIT) - .map_err(|_| EmulateSerialInitError(std::io::Error::last_os_error()))?; - Ok(()) - } - - #[cfg(target_arch = "x86_64")] - { - let mut guard = self - .pio_device_manager - .stdio_serial - .lock() - .expect("Poisoned lock"); - let serial = guard.serial_mut().unwrap(); - - serial - .serial - .write(IER_RDA_OFFSET, IER_RDA_BIT) - .map_err(|_| EmulateSerialInitError(std::io::Error::last_os_error()))?; - Ok(()) - } - } - /// Injects CTRL+ALT+DEL keystroke combo in the i8042 device. #[cfg(target_arch = "x86_64")] pub fn send_ctrl_alt_del(&mut self) -> Result<(), VmmError> { - self.pio_device_manager + self.device_manager + .legacy_devices .i8042 .lock() .expect("i8042 lock was poisoned") - .i8042_device_mut() - .unwrap() .trigger_ctrl_alt_del() .map_err(VmmError::I8042Error) } @@ -516,9 +461,7 @@ impl Vmm { self.vm.save_state(&mpidrs).map_err(SaveVmState)? } }; - let device_states = self.mmio_device_manager.save(); - - let acpi_dev_state = self.acpi_device_manager.save(); + let device_states = self.device_manager.save(); Ok(MicrovmState { vm_info: vm_info.clone(), @@ -526,7 +469,6 @@ impl Vmm { vm_state, vcpu_states, device_states, - acpi_dev_state, }) } @@ -593,13 +535,14 @@ impl Vmm { drive_id: &str, path_on_host: String, ) -> Result<(), VmmError> { - self.mmio_device_manager + self.device_manager + .mmio_devices .with_virtio_device_with_id(TYPE_BLOCK, drive_id, |block: &mut Block| { block .update_disk_image(path_on_host) .map_err(|err| err.to_string()) }) - .map_err(VmmError::DeviceManager) + .map_err(VmmError::MmioDeviceManager) } /// Updates the rate limiter parameters for block device with `drive_id` id. @@ -609,22 +552,24 @@ impl Vmm { rl_bytes: BucketUpdate, rl_ops: BucketUpdate, ) -> Result<(), VmmError> { - self.mmio_device_manager + self.device_manager + .mmio_devices .with_virtio_device_with_id(TYPE_BLOCK, drive_id, |block: &mut Block| { block .update_rate_limiter(rl_bytes, rl_ops) .map_err(|err| err.to_string()) }) - .map_err(VmmError::DeviceManager) + .map_err(VmmError::MmioDeviceManager) } /// Updates the rate limiter parameters for block device with `drive_id` id. pub fn update_vhost_user_block_config(&mut self, drive_id: &str) -> Result<(), VmmError> { - self.mmio_device_manager + self.device_manager + .mmio_devices .with_virtio_device_with_id(TYPE_BLOCK, drive_id, |block: &mut Block| { block.update_config().map_err(|err| err.to_string()) }) - .map_err(VmmError::DeviceManager) + .map_err(VmmError::MmioDeviceManager) } /// Updates the rate limiter parameters for net device with `net_id` id. @@ -636,25 +581,18 @@ impl Vmm { tx_bytes: BucketUpdate, tx_ops: BucketUpdate, ) -> Result<(), VmmError> { - self.mmio_device_manager + self.device_manager + .mmio_devices .with_virtio_device_with_id(TYPE_NET, net_id, |net: &mut Net| { net.patch_rate_limiters(rx_bytes, rx_ops, tx_bytes, tx_ops); Ok(()) }) - .map_err(VmmError::DeviceManager) + .map_err(VmmError::MmioDeviceManager) } /// Returns a reference to the balloon device if present. pub fn balloon_config(&self) -> Result { - if let Some(busdev) = self.get_bus_device(DeviceType::Virtio(TYPE_BALLOON), BALLOON_DEV_ID) - { - let virtio_device = busdev - .lock() - .expect("Poisoned lock") - .mmio_transport_ref() - .expect("Unexpected device type") - .device(); - + if let Some(virtio_device) = self.get_virtio_device(TYPE_BALLOON, BALLOON_DEV_ID) { let config = virtio_device .lock() .expect("Poisoned lock") @@ -671,15 +609,7 @@ impl Vmm { /// Returns the latest balloon statistics if they are enabled. pub fn latest_balloon_stats(&self) -> Result { - if let Some(busdev) = self.get_bus_device(DeviceType::Virtio(TYPE_BALLOON), BALLOON_DEV_ID) - { - let virtio_device = busdev - .lock() - .expect("Poisoned lock") - .mmio_transport_ref() - .expect("Unexpected device type") - .device(); - + if let Some(virtio_device) = self.get_virtio_device(TYPE_BALLOON, BALLOON_DEV_ID) { let latest_stats = virtio_device .lock() .expect("Poisoned lock") @@ -704,16 +634,8 @@ impl Vmm { return Err(BalloonError::TooManyPagesRequested); } - if let Some(busdev) = self.get_bus_device(DeviceType::Virtio(TYPE_BALLOON), BALLOON_DEV_ID) - { + if let Some(virtio_device) = self.get_virtio_device(TYPE_BALLOON, BALLOON_DEV_ID) { { - let virtio_device = busdev - .lock() - .expect("Poisoned lock") - .mmio_transport_ref() - .expect("Unexpected device type") - .device(); - virtio_device .lock() .expect("Poisoned lock") @@ -734,16 +656,8 @@ impl Vmm { &mut self, stats_polling_interval_s: u16, ) -> Result<(), BalloonError> { - if let Some(busdev) = self.get_bus_device(DeviceType::Virtio(TYPE_BALLOON), BALLOON_DEV_ID) - { + if let Some(virtio_device) = self.get_virtio_device(TYPE_BALLOON, BALLOON_DEV_ID) { { - let virtio_device = busdev - .lock() - .expect("Poisoned lock") - .mmio_transport_ref() - .expect("Unexpected device type") - .device(); - virtio_device .lock() .expect("Poisoned lock") diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 4111d8d6c34..6fd5ca89081 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -25,7 +25,7 @@ use crate::cpu_config::templates::StaticCpuTemplate; use crate::cpu_config::x86_64::cpuid::CpuidTrait; #[cfg(target_arch = "x86_64")] use crate::cpu_config::x86_64::cpuid::common::get_vendor_id_from_host; -use crate::device_manager::persist::{ACPIDeviceManagerState, DevicePersistError, DeviceStates}; +use crate::device_manager::{DevicePersistError, DevicesState}; use crate::logger::{info, warn}; use crate::resources::VmResources; use crate::seccomp::BpfThreadMap; @@ -69,7 +69,7 @@ impl From<&VmResources> for VmInfo { } } -/// Contains the necesary state for saving/restoring a microVM. +/// Contains the necessary state for saving/restoring a microVM. #[derive(Debug, Default, Serialize, Deserialize)] pub struct MicrovmState { /// Miscellaneous VM info. @@ -81,9 +81,7 @@ pub struct MicrovmState { /// Vcpu states. pub vcpu_states: Vec, /// Device states. - pub device_states: DeviceStates, - /// ACPI devices state. - pub acpi_dev_state: ACPIDeviceManagerState, + pub device_states: DevicesState, } /// This describes the mapping between Firecracker base virtual address and @@ -118,7 +116,7 @@ pub enum MicrovmStateError { /// Operation not allowed: {0} NotAllowed(String), /// Cannot restore devices: {0} - RestoreDevices(DevicePersistError), + RestoreDevices(#[from] DevicePersistError), /// Cannot save Vcpu state: {0} SaveVcpuState(vstate::vcpu::VcpuError), /// Cannot save Vm state: {0} @@ -171,9 +169,11 @@ pub fn create_snapshot( // SAFETY: // This should never fail as we only mark pages only if device has already been activated, // and the address validation was already performed on device activation. - vmm.mmio_device_manager - .for_each_virtio_device(|_, _, _, dev| { - let d = dev.lock().unwrap(); + vmm.device_manager + .mmio_devices + .for_each_virtio_device(|_, _, device| { + let mmio_dev_locked = device.inner.lock().expect("Poisoned lock"); + let d = mmio_dev_locked.locked_device(); if d.is_activated() { d.mark_queue_memory_dirty(vmm.vm.guest_memory()) } else { @@ -334,7 +334,7 @@ pub fn restore_from_snapshot( ) -> Result>, RestoreFromSnapshotError> { let mut microvm_state = snapshot_state_from_file(¶ms.snapshot_path)?; for entry in ¶ms.network_overrides { - let net_devices = &mut microvm_state.device_states.net_devices; + let net_devices = &mut microvm_state.device_states.mmio_state.net_devices; if let Some(device) = net_devices .iter_mut() .find(|x| x.device_state.id == entry.iface_id) @@ -599,7 +599,6 @@ mod tests { #[cfg(target_arch = "aarch64")] use crate::construct_kvm_mpidrs; use crate::devices::virtio::block::CacheType; - use crate::snapshot::Persist; use crate::vmm_config::balloon::BalloonDeviceConfig; use crate::vmm_config::net::NetworkInterfaceConfig; use crate::vmm_config::vsock::tests::default_config; @@ -660,14 +659,14 @@ mod tests { #[test] fn test_microvm_state_snapshot() { let vmm = default_vmm_with_devices(); - let states = vmm.mmio_device_manager.save(); + let states = vmm.device_manager.save(); // Only checking that all devices are saved, actual device state // is tested by that device's tests. - assert_eq!(states.block_devices.len(), 1); - assert_eq!(states.net_devices.len(), 1); - assert!(states.vsock_device.is_some()); - assert!(states.balloon_device.is_some()); + assert_eq!(states.mmio_state.block_devices.len(), 1); + assert_eq!(states.mmio_state.net_devices.len(), 1); + assert!(states.mmio_state.vsock_device.is_some()); + assert!(states.mmio_state.balloon_device.is_some()); let vcpu_states = vec![VcpuState::default()]; #[cfg(target_arch = "aarch64")] @@ -684,7 +683,6 @@ mod tests { vm_state: vmm.vm.save_state(&mpidrs).unwrap(), #[cfg(target_arch = "x86_64")] vm_state: vmm.vm.save_state().unwrap(), - acpi_dev_state: vmm.acpi_device_manager.save(), }; let mut buf = vec![0; 10000]; @@ -695,8 +693,8 @@ mod tests { assert_eq!(restored_microvm_state.vm_info, microvm_state.vm_info); assert_eq!( - restored_microvm_state.device_states, - microvm_state.device_states + restored_microvm_state.device_states.mmio_state, + microvm_state.device_states.mmio_state ) } diff --git a/src/vmm/src/vstate/vcpu.rs b/src/vmm/src/vstate/vcpu.rs index 825af33eea4..f010500d16a 100644 --- a/src/vmm/src/vstate/vcpu.rs +++ b/src/vmm/src/vstate/vcpu.rs @@ -219,7 +219,7 @@ impl Vcpu { } /// Sets a MMIO bus for this vcpu. - pub fn set_mmio_bus(&mut self, mmio_bus: crate::devices::Bus) { + pub fn set_mmio_bus(&mut self, mmio_bus: Arc) { self.kvm_vcpu.peripherals.mmio_bus = Some(mmio_bus); } @@ -527,7 +527,9 @@ fn handle_kvm_exit( VcpuExit::MmioRead(addr, data) => { if let Some(mmio_bus) = &peripherals.mmio_bus { let _metric = METRICS.vcpu.exit_mmio_read_agg.record_latency_metrics(); - mmio_bus.read(addr, data); + if let Err(err) = mmio_bus.read(addr, data) { + warn!("Invalid MMIO read @ {addr:#x}:{:#x}: {err}", data.len()); + } METRICS.vcpu.exit_mmio_read.inc(); } Ok(VcpuEmulation::Handled) @@ -535,7 +537,9 @@ fn handle_kvm_exit( VcpuExit::MmioWrite(addr, data) => { if let Some(mmio_bus) = &peripherals.mmio_bus { let _metric = METRICS.vcpu.exit_mmio_write_agg.record_latency_metrics(); - mmio_bus.write(addr, data); + if let Err(err) = mmio_bus.write(addr, data) { + warn!("Invalid MMIO read @ {addr:#x}:{:#x}: {err}", data.len()); + } METRICS.vcpu.exit_mmio_write.inc(); } Ok(VcpuEmulation::Handled) @@ -766,13 +770,12 @@ pub(crate) mod tests { use std::sync::{Arc, Barrier, Mutex}; use linux_loader::loader::KernelLoader; + use vm_device::BusDevice; use vmm_sys_util::errno; use super::*; use crate::RECV_TIMEOUT_SEC; use crate::arch::{BootProtocol, EntryPoint}; - use crate::devices::BusDevice; - use crate::devices::bus::DummyDevice; use crate::seccomp::get_empty_filters; use crate::utils::mib_to_bytes; use crate::utils::signal::validate_signal_num; @@ -782,6 +785,16 @@ pub(crate) mod tests { use crate::vstate::vm::Vm; use crate::vstate::vm::tests::setup_vm_with_memory; + struct DummyDevice; + + impl BusDevice for DummyDevice { + fn read(&mut self, _base: u64, _offset: u64, _data: &mut [u8]) {} + + fn write(&mut self, _base: u64, _offset: u64, _data: &[u8]) -> Option> { + None + } + } + #[test] fn test_handle_kvm_exit() { let (_, _, mut vcpu) = setup_vcpu(0x1000); @@ -876,8 +889,8 @@ pub(crate) mod tests { ) ); - let mut bus = crate::devices::Bus::new(); - let dummy = Arc::new(Mutex::new(BusDevice::Dummy(DummyDevice))); + let bus = Arc::new(vm_device::Bus::new()); + let dummy = Arc::new(Mutex::new(DummyDevice)); bus.insert(dummy, 0x10, 0x10).unwrap(); vcpu.set_mmio_bus(bus); let addr = 0x10; @@ -1020,7 +1033,7 @@ pub(crate) mod tests { fn test_set_mmio_bus() { let (_, _, mut vcpu) = setup_vcpu(0x1000); assert!(vcpu.kvm_vcpu.peripherals.mmio_bus.is_none()); - vcpu.set_mmio_bus(crate::devices::Bus::new()); + vcpu.set_mmio_bus(Arc::new(vm_device::Bus::new())); assert!(vcpu.kvm_vcpu.peripherals.mmio_bus.is_some()); } diff --git a/src/vmm/tests/devices.rs b/src/vmm/tests/devices.rs index 1850bf540b0..e86f4765af4 100644 --- a/src/vmm/tests/devices.rs +++ b/src/vmm/tests/devices.rs @@ -8,6 +8,7 @@ use std::sync::{Arc, Mutex}; use event_manager::{EventManager, SubscriberOps}; use libc::EFD_NONBLOCK; +use vm_device::BusDevice; use vm_superio::Serial; use vmm::devices::legacy::serial::SerialOut; use vmm::devices::legacy::{EventFdTrigger, SerialEventsWrapper, SerialWrapper}; @@ -91,7 +92,7 @@ fn test_issue_serial_hangup_anon_pipe_while_registered_stdin() { serial .lock() .unwrap() - .bus_read(data_bus_offset, &mut data[i..=i]); + .read(0x0, data_bus_offset, &mut data[i..=i]); } assert!(data[..31] == dummy_data[..31]); @@ -138,7 +139,7 @@ fn test_issue_serial_hangup_anon_pipe_while_registered_stdin() { serial .lock() .unwrap() - .bus_read(data_bus_offset, &mut data[i..=i]); + .read(0x0, data_bus_offset, &mut data[i..=i]); } // Process the kick stdin event generated by the reading of the 64th byte of the serial FIFO. @@ -152,7 +153,7 @@ fn test_issue_serial_hangup_anon_pipe_while_registered_stdin() { serial .lock() .unwrap() - .bus_read(data_bus_offset, &mut data[i..=i]); + .read(0x0, data_bus_offset, &mut data[i..=i]); } // We try to read again, but we detect that stdin received previously EOF. @@ -239,7 +240,7 @@ fn test_issue_serial_hangup_anon_pipe_while_unregistered_stdin() { serial .lock() .unwrap() - .bus_read(data_bus_offset, &mut data[i..=i]); + .read(0x0, data_bus_offset, &mut data[i..=i]); } assert!(data[..31] == dummy_data[..31]); @@ -289,7 +290,7 @@ fn test_issue_serial_hangup_anon_pipe_while_unregistered_stdin() { serial .lock() .unwrap() - .bus_read(data_bus_offset, &mut data[i..=i]); + .read(0x0, data_bus_offset, &mut data[i..=i]); } // Process the kick stdin event generated by the reading of the 64th byte of the serial FIFO. @@ -305,7 +306,7 @@ fn test_issue_serial_hangup_anon_pipe_while_unregistered_stdin() { serial .lock() .unwrap() - .bus_read(data_bus_offset, &mut data[i..=i]); + .read(0x0, data_bus_offset, &mut data[i..=i]); } // We try to read again, but we detect that stdin received previously EOF. diff --git a/src/vmm/tests/integration_tests.rs b/src/vmm/tests/integration_tests.rs index 55fb07c1aae..6982bf08c5b 100644 --- a/src/vmm/tests/integration_tests.rs +++ b/src/vmm/tests/integration_tests.rs @@ -227,7 +227,7 @@ fn verify_create_snapshot(is_diff: bool) -> (TempFile, TempFile) { // Check that we can deserialize the microVM state from `snapshot_file`. let snapshot_path = snapshot_file.as_path().to_path_buf(); let snapshot_file_metadata = std::fs::metadata(snapshot_path).unwrap(); - let snapshot_len = snapshot_file_metadata.len() as usize; + let snapshot_len = snapshot_file_metadata.len().try_into().unwrap(); let (restored_microvm_state, _) = Snapshot::load::<_, MicrovmState>(&mut snapshot_file.as_file(), snapshot_len).unwrap(); @@ -235,9 +235,29 @@ fn verify_create_snapshot(is_diff: bool) -> (TempFile, TempFile) { // Verify deserialized data. // The default vmm has no devices and one vCPU. - assert_eq!(restored_microvm_state.device_states.block_devices.len(), 0); - assert_eq!(restored_microvm_state.device_states.net_devices.len(), 0); - assert!(restored_microvm_state.device_states.vsock_device.is_none()); + assert_eq!( + restored_microvm_state + .device_states + .mmio_state + .block_devices + .len(), + 0 + ); + assert_eq!( + restored_microvm_state + .device_states + .mmio_state + .net_devices + .len(), + 0 + ); + assert!( + restored_microvm_state + .device_states + .mmio_state + .vsock_device + .is_none() + ); assert_eq!(restored_microvm_state.vcpu_states.len(), 1); (snapshot_file, memory_file) diff --git a/tests/integration_tests/functional/test_api.py b/tests/integration_tests/functional/test_api.py index 864c6d5eda9..a77aeb03ad1 100644 --- a/tests/integration_tests/functional/test_api.py +++ b/tests/integration_tests/functional/test_api.py @@ -773,7 +773,7 @@ def test_send_ctrl_alt_del(uvm_plain_any): def _drive_patch(test_microvm, io_engine): """Exercise drive patch test scenarios.""" # Patches without mandatory fields for virtio block are not allowed. - expected_msg = "Unable to patch the block device: Device manager error: Running method expected different backend. Please verify the request arguments" + expected_msg = "Unable to patch the block device: MMIO Device manager error: Running method expected different backend. Please verify the request arguments" with pytest.raises(RuntimeError, match=expected_msg): test_microvm.api.drive.patch(drive_id="scratch") @@ -815,7 +815,7 @@ def _drive_patch(test_microvm, io_engine): ) # Updates to `path_on_host` with an invalid path are not allowed. - expected_msg = f"Unable to patch the block device: Device manager error: Virtio backend error: Error manipulating the backing file: No such file or directory (os error 2) {drive_path} Please verify the request arguments" + expected_msg = f"Unable to patch the block device: MMIO Device manager error: Virtio backend error: Error manipulating the backing file: No such file or directory (os error 2) {drive_path} Please verify the request arguments" with pytest.raises(RuntimeError, match=re.escape(expected_msg)): test_microvm.api.drive.patch(drive_id="scratch", path_on_host=drive_path) diff --git a/tests/integration_tests/functional/test_serial_io.py b/tests/integration_tests/functional/test_serial_io.py index aee9047f531..7a8c0b8c79d 100644 --- a/tests/integration_tests/functional/test_serial_io.py +++ b/tests/integration_tests/functional/test_serial_io.py @@ -146,6 +146,7 @@ def test_serial_dos(uvm_plain_any): vcpu_count=1, boot_args="console=ttyS0 reboot=k panic=1 pci=off", ) + microvm.add_net_iface() microvm.start() # Open an fd for firecracker process terminal.