Skip to content

Fixing VMM & RNG per-device metrics. (Minor Tap fix) #5196

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions src/vmm/src/devices/virtio/net/tap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -241,10 +241,17 @@ pub mod tests {
generated::ifreq__bindgen_ty_1::default().ifrn_name.len()
});

// Empty name - The tap should be named "tap0" by default
// Empty name - The tap should be named by the kernel (e.g., "tap0", "tap1", etc.)
let tap = Tap::open_named("").unwrap();
assert_eq!(b"tap0\0\0\0\0\0\0\0\0\0\0\0\0", &tap.if_name);
assert_eq!("tap0", tap.if_name_as_str());
let tap_name_str = tap.if_name_as_str();

// Check that it starts with "tap" and the remainder is numeric.
assert!(
Regex::new(r"^tap\d+$").unwrap().is_match(tap_name_str),
"Generated tap name '{}' does not match expected pattern",
tap_name_str
);


// Test using '%d' to have the kernel assign an unused name,
// and that we correctly copy back that generated name
Expand Down
111 changes: 60 additions & 51 deletions src/vmm/src/devices/virtio/rng/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use aws_lc_rs::rand;
use vm_memory::GuestMemoryError;
use vmm_sys_util::eventfd::EventFd;

use super::metrics::METRICS;
use super::metrics::EntropyMetricsPerDevice;
use super::{RNG_NUM_QUEUES, RNG_QUEUE};
use crate::devices::DeviceError;
use crate::devices::virtio::device::{DeviceState, IrqTrigger, IrqType, VirtioDevice};
Expand Down Expand Up @@ -113,14 +113,15 @@ impl Entropy {
}

fn handle_one(&mut self) -> Result<u32, EntropyError> {
let global = EntropyMetricsPerDevice::alloc("global".to_string());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please see my comments for the vsock device

// If guest provided us with an empty buffer just return directly
if self.buffer.is_empty() {
return Ok(0);
}

let mut rand_bytes = vec![0; self.buffer.len() as usize];
rand::fill(&mut rand_bytes).inspect_err(|_| {
METRICS.host_rng_fails.inc();
global.host_rng_fails.inc();
})?;

// It is ok to unwrap here. We are writing `iovec.len()` bytes at offset 0.
Expand All @@ -129,12 +130,13 @@ impl Entropy {
}

fn process_entropy_queue(&mut self) {
let global = EntropyMetricsPerDevice::alloc("global".to_string());
let mut used_any = false;
while let Some(desc) = self.queues[RNG_QUEUE].pop() {
// This is safe since we checked in the event handler that the device is activated.
let mem = self.device_state.mem().unwrap();
let index = desc.index;
METRICS.entropy_event_count.inc();
global.entropy_event_count.inc();

// SAFETY: This descriptor chain points to a single `DescriptorChain` memory buffer,
// no other `IoVecBufferMut` object points to the same `DescriptorChain` at the same
Expand All @@ -151,33 +153,33 @@ impl Entropy {
// to handle once we do have budget.
if !self.rate_limit_request(u64::from(self.buffer.len())) {
debug!("entropy: throttling entropy queue");
METRICS.entropy_rate_limiter_throttled.inc();
global.entropy_rate_limiter_throttled.inc();
self.queues[RNG_QUEUE].undo_pop();
break;
}

self.handle_one().unwrap_or_else(|err| {
error!("entropy: {err}");
METRICS.entropy_event_fails.inc();
global.entropy_event_fails.inc();
0
})
}
Err(err) => {
error!("entropy: Could not parse descriptor chain: {err}");
METRICS.entropy_event_fails.inc();
global.entropy_event_fails.inc();
0
}
};

match self.queues[RNG_QUEUE].add_used(index, bytes) {
Ok(_) => {
used_any = true;
METRICS.entropy_bytes.add(bytes.into());
global.entropy_bytes.add(bytes.into());
}
Err(err) => {
error!("entropy: Could not add used descriptor to queue: {err}");
Self::rate_limit_replenish_request(&mut self.rate_limiter, bytes.into());
METRICS.entropy_event_fails.inc();
global.entropy_event_fails.inc();
// If we are not able to add a buffer to the used queue, something
// is probably seriously wrong, so just stop processing additional
// buffers
Expand All @@ -189,33 +191,34 @@ impl Entropy {
if used_any {
self.signal_used_queue().unwrap_or_else(|err| {
error!("entropy: {err:?}");
METRICS.entropy_event_fails.inc()
global.entropy_event_fails.inc()
});
}
}

pub(crate) fn process_entropy_queue_event(&mut self) {
let global = EntropyMetricsPerDevice::alloc("global".to_string());
if let Err(err) = self.queue_events[RNG_QUEUE].read() {
error!("Failed to read entropy queue event: {err}");
METRICS.entropy_event_fails.inc();
global.entropy_event_fails.inc();
} else if !self.rate_limiter.is_blocked() {
// We are not throttled, handle the entropy queue
self.process_entropy_queue();
} else {
METRICS.rate_limiter_event_count.inc();
global.rate_limiter_event_count.inc();
}
}

pub(crate) fn process_rate_limiter_event(&mut self) {
METRICS.rate_limiter_event_count.inc();
global.rate_limiter_event_count.inc();
match self.rate_limiter.event_handler() {
Ok(_) => {
// There might be enough budget now to process entropy requests.
self.process_entropy_queue();
}
Err(err) => {
error!("entropy: Failed to handle rate-limiter event: {err:?}");
METRICS.entropy_event_fails.inc();
global.entropy_event_fails.inc();
}
}
}
Expand Down Expand Up @@ -291,13 +294,15 @@ impl VirtioDevice for Entropy {
}

fn activate(&mut self, mem: GuestMemoryMmap) -> Result<(), ActivateError> {
let global = EntropyMetricsPerDevice::alloc("global".to_string());

for q in self.queues.iter_mut() {
q.initialize(&mem)
.map_err(ActivateError::QueueMemoryError)?;
}

self.activate_event.write(1).map_err(|_| {
METRICS.activate_fails.inc();
global.activate_fails.inc();
ActivateError::EventFd
})?;
self.device_state = DeviceState::Activated(mem);
Expand Down Expand Up @@ -454,6 +459,7 @@ mod tests {

#[test]
fn test_entropy_event() {
let global = EntropyMetricsPerDevice::alloc("global".to_string());
let mem = create_virtio_mem();
let mut th = VirtioTestHelper::<Entropy>::new(&mem, default_entropy());

Expand All @@ -462,29 +468,29 @@ mod tests {
// Add a read-only descriptor (this should fail)
th.add_desc_chain(RNG_QUEUE, 0, &[(0, 64, 0)]);

let entropy_event_fails = METRICS.entropy_event_fails.count();
let entropy_event_count = METRICS.entropy_event_count.count();
let entropy_bytes = METRICS.entropy_bytes.count();
let host_rng_fails = METRICS.host_rng_fails.count();
let entropy_event_fails = global.entropy_event_fails.count();
let entropy_event_count = global.entropy_event_count.count();
let entropy_bytes = global.entropy_bytes.count();
let host_rng_fails = global.host_rng_fails.count();
assert_eq!(th.emulate_for_msec(100).unwrap(), 1);
assert_eq!(METRICS.entropy_event_fails.count(), entropy_event_fails + 1);
assert_eq!(METRICS.entropy_event_count.count(), entropy_event_count + 1);
assert_eq!(METRICS.entropy_bytes.count(), entropy_bytes);
assert_eq!(METRICS.host_rng_fails.count(), host_rng_fails);
assert_eq!(global.entropy_event_fails.count(), entropy_event_fails + 1);
assert_eq!(global.entropy_event_count.count(), entropy_event_count + 1);
assert_eq!(global.entropy_bytes.count(), entropy_bytes);
assert_eq!(global.host_rng_fails.count(), host_rng_fails);

// Add two good descriptors
th.add_desc_chain(RNG_QUEUE, 0, &[(1, 10, VIRTQ_DESC_F_WRITE)]);
th.add_desc_chain(RNG_QUEUE, 100, &[(2, 20, VIRTQ_DESC_F_WRITE)]);

let entropy_event_fails = METRICS.entropy_event_fails.count();
let entropy_event_count = METRICS.entropy_event_count.count();
let entropy_bytes = METRICS.entropy_bytes.count();
let host_rng_fails = METRICS.host_rng_fails.count();
let entropy_event_fails = global.entropy_event_fails.count();
let entropy_event_count = global.entropy_event_count.count();
let entropy_bytes = global.entropy_bytes.count();
let host_rng_fails = global.host_rng_fails.count();
assert_eq!(th.emulate_for_msec(100).unwrap(), 1);
assert_eq!(METRICS.entropy_event_fails.count(), entropy_event_fails);
assert_eq!(METRICS.entropy_event_count.count(), entropy_event_count + 2);
assert_eq!(METRICS.entropy_bytes.count(), entropy_bytes + 30);
assert_eq!(METRICS.host_rng_fails.count(), host_rng_fails);
assert_eq!(global.entropy_event_fails.count(), entropy_event_fails);
assert_eq!(global.entropy_event_count.count(), entropy_event_count + 2);
assert_eq!(global.entropy_bytes.count(), entropy_bytes + 30);
assert_eq!(global.host_rng_fails.count(), host_rng_fails);

th.add_desc_chain(
RNG_QUEUE,
Expand All @@ -496,34 +502,36 @@ mod tests {
],
);

let entropy_event_fails = METRICS.entropy_event_fails.count();
let entropy_event_count = METRICS.entropy_event_count.count();
let entropy_bytes = METRICS.entropy_bytes.count();
let host_rng_fails = METRICS.host_rng_fails.count();
let entropy_event_fails = global.entropy_event_fails.count();
let entropy_event_count = global.entropy_event_count.count();
let entropy_bytes = global.entropy_bytes.count();
let host_rng_fails = global.host_rng_fails.count();
assert_eq!(th.emulate_for_msec(100).unwrap(), 1);
assert_eq!(METRICS.entropy_event_fails.count(), entropy_event_fails);
assert_eq!(METRICS.entropy_event_count.count(), entropy_event_count + 1);
assert_eq!(METRICS.entropy_bytes.count(), entropy_bytes + 512);
assert_eq!(METRICS.host_rng_fails.count(), host_rng_fails);
assert_eq!(global.entropy_event_fails.count(), entropy_event_fails);
assert_eq!(global.entropy_event_count.count(), entropy_event_count + 1);
assert_eq!(global.entropy_bytes.count(), entropy_bytes + 512);
assert_eq!(global.host_rng_fails.count(), host_rng_fails);
}

#[test]
fn test_bad_rate_limiter_event() {
let global = EntropyMetricsPerDevice::alloc("global".to_string());
let mem = create_virtio_mem();
let mut th = VirtioTestHelper::<Entropy>::new(&mem, default_entropy());

th.activate_device(&mem);
let mut dev = th.device();

check_metric_after_block!(
&METRICS.entropy_event_fails,
&global.entropy_event_fails,
1,
dev.process_rate_limiter_event()
);
}

#[test]
fn test_bandwidth_rate_limiter() {
let global = EntropyMetricsPerDevice::alloc("global".to_string());
let mem = create_virtio_mem();
// Rate Limiter with 4000 bytes / sec allowance and no initial burst allowance
let device = Entropy::new(RateLimiter::new(4000, 0, 1000, 0, 0, 0).unwrap()).unwrap();
Expand All @@ -535,7 +543,7 @@ mod tests {
// buffer should be processed normally
th.add_desc_chain(RNG_QUEUE, 0, &[(0, 4000, VIRTQ_DESC_F_WRITE)]);
check_metric_after_block!(
METRICS.entropy_bytes,
global.entropy_bytes,
4000,
th.device().process_entropy_queue()
);
Expand All @@ -551,12 +559,12 @@ mod tests {
th.add_desc_chain(RNG_QUEUE, 0, &[(0, 4000, VIRTQ_DESC_F_WRITE)]);
th.add_desc_chain(RNG_QUEUE, 1, &[(1, 1000, VIRTQ_DESC_F_WRITE)]);
check_metric_after_block!(
METRICS.entropy_bytes,
global.entropy_bytes,
4000,
th.device().process_entropy_queue()
);
check_metric_after_block!(
METRICS.entropy_rate_limiter_throttled,
global.entropy_rate_limiter_throttled,
1,
th.device().process_entropy_queue()
);
Expand All @@ -565,12 +573,13 @@ mod tests {
// 250 msec should give enough time for replenishing 1000 bytes worth of tokens.
// Give it an extra 100 ms just to be sure the timer event reaches us from the kernel.
std::thread::sleep(Duration::from_millis(350));
check_metric_after_block!(METRICS.entropy_bytes, 1000, th.emulate_for_msec(100));
check_metric_after_block!(global.entropy_bytes, 1000, th.emulate_for_msec(100));
assert!(!th.device().rate_limiter().is_blocked());
}

#[test]
fn test_ops_rate_limiter() {
let global = EntropyMetricsPerDevice::alloc("global".to_string());
let mem = create_virtio_mem();
// Rate Limiter with unlimited bandwidth and allowance for 1 operation every 100 msec,
// (10 ops/sec), without initial burst.
Expand All @@ -583,7 +592,7 @@ mod tests {
// so this should succeed.
th.add_desc_chain(RNG_QUEUE, 0, &[(0, 4000, VIRTQ_DESC_F_WRITE)]);
check_metric_after_block!(
METRICS.entropy_bytes,
global.entropy_bytes,
4000,
th.device().process_entropy_queue()
);
Expand All @@ -593,30 +602,30 @@ mod tests {
std::thread::sleep(Duration::from_millis(1000));

// First one should succeed
let entropy_bytes = METRICS.entropy_bytes.count();
let entropy_bytes = global.entropy_bytes.count();
th.add_desc_chain(RNG_QUEUE, 0, &[(0, 64, VIRTQ_DESC_F_WRITE)]);
check_metric_after_block!(METRICS.entropy_bytes, 64, th.emulate_for_msec(100));
assert_eq!(METRICS.entropy_bytes.count(), entropy_bytes + 64);
check_metric_after_block!(global.entropy_bytes, 64, th.emulate_for_msec(100));
assert_eq!(global.entropy_bytes.count(), entropy_bytes + 64);
// The rate limiter is not blocked yet.
assert!(!th.device().rate_limiter().is_blocked());
// But immediately asking another operation should block it because we have 1 op every 100
// msec.
th.add_desc_chain(RNG_QUEUE, 0, &[(0, 64, VIRTQ_DESC_F_WRITE)]);
check_metric_after_block!(
METRICS.entropy_rate_limiter_throttled,
global.entropy_rate_limiter_throttled,
1,
th.emulate_for_msec(50)
);
// Entropy bytes count should not have increased.
assert_eq!(METRICS.entropy_bytes.count(), entropy_bytes + 64);
assert_eq!(global.entropy_bytes.count(), entropy_bytes + 64);
// After 100 msec (plus 50 msec for ensuring the event reaches us from the kernel), the
// timer of the rate limiter should fire saying that there's now more tokens available
check_metric_after_block!(
METRICS.rate_limiter_event_count,
global.rate_limiter_event_count,
1,
th.emulate_for_msec(150)
);
// The rate limiter event should have processed the pending buffer as well
assert_eq!(METRICS.entropy_bytes.count(), entropy_bytes + 128);
assert_eq!(global.entropy_bytes.count(), entropy_bytes + 128);
}
}
Loading