Skip to content

Commit d5e7aa8

Browse files
committed
test(uffd_utils): add handling for FaultRequest in secret freedom
There are two ways a UFFD handler receives a fault notification if Secret Fredom is enabled (which is inferred from 3 fds sent by Firecracker instead of 1): - a VMM- or KVM-triggered fault is delivered via a minor UFFD fault event. The handler is supposed to respond to it via memcpying the content of the page (if the page hasn't already been populated) followed by UFFDIO_CONTINUE call. - a vCPU-triggered fault is delievered via a FaultRequest message on the UDS socket. The handler is supposed to reply with a pwrite64 call on the guest_memfd to populate the page followed by a FaultReply message on the UDS socket. In both cases, the handler also needs to clear the bit in the userfault bitmap at the corresponding offset in order to stop fault notifications for the same page. UFFD handlers use the userfault bitmap for two purposes: - communicate to the kernel whether a fault at the corresponding guest_memfd offset will cause a VM exit - keep track of pages that have already been populated in order to avoid overwriting the content of the page that is already initialised. Signed-off-by: Nikita Kalyazin <[email protected]>
1 parent 85f95bf commit d5e7aa8

File tree

3 files changed

+257
-21
lines changed

3 files changed

+257
-21
lines changed

src/firecracker/examples/uffd/fault_all_handler.rs

Lines changed: 62 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,14 @@
88
mod uffd_utils;
99

1010
use std::fs::File;
11+
use std::os::fd::AsRawFd;
1112
use std::os::unix::net::UnixListener;
1213

1314
use uffd_utils::{Runtime, UffdHandler};
1415
use utils::time::{ClockType, get_time_us};
1516

17+
use crate::uffd_utils::uffd_continue;
18+
1619
fn main() {
1720
let mut args = std::env::args();
1821
let uffd_sock_path = args.nth(1).expect("No socket path given");
@@ -37,19 +40,69 @@ fn main() {
3740
.expect("Failed to read uffd_msg")
3841
.expect("uffd_msg not ready");
3942

40-
match event {
41-
userfaultfd::Event::Pagefault { .. } => {
42-
let start = get_time_us(ClockType::Monotonic);
43-
for region in uffd_handler.mem_regions.clone() {
44-
uffd_handler.serve_pf(region.base_host_virt_addr as _, region.size);
45-
}
46-
let end = get_time_us(ClockType::Monotonic);
43+
if let userfaultfd::Event::Pagefault { addr, .. } = event {
44+
let bit =
45+
uffd_handler.addr_to_offset(addr.cast()) as usize / uffd_handler.page_size;
46+
47+
// If Secret Free, we know if this is the first fault based on the userfault
48+
// bitmap state. Otherwise, we assume that we will ever only receive a single fault
49+
// event via UFFD.
50+
let are_we_faulted_yet = uffd_handler
51+
.userfault_bitmap
52+
.as_mut()
53+
.map_or(false, |bitmap| !bitmap.is_bit_set(bit));
4754

48-
println!("Finished Faulting All: {}us", end - start);
55+
if are_we_faulted_yet {
56+
// TODO: we currently ignore the result as we may attempt to
57+
// populate the page that is already present as we may receive
58+
// multiple minor fault events per page.
59+
let _ = uffd_continue(
60+
uffd_handler.uffd.as_raw_fd(),
61+
addr as _,
62+
uffd_handler.page_size as u64,
63+
)
64+
.inspect_err(|err| println!("Error during uffdio_continue: {:?}", err));
65+
} else {
66+
fault_all(uffd_handler, addr);
4967
}
50-
_ => panic!("Unexpected event on userfaultfd"),
5168
}
5269
},
5370
|_uffd_handler: &mut UffdHandler, _offset: usize| {},
5471
);
5572
}
73+
74+
fn fault_all(uffd_handler: &mut UffdHandler, fault_addr: *mut libc::c_void) {
75+
let start = get_time_us(ClockType::Monotonic);
76+
for region in uffd_handler.mem_regions.clone() {
77+
match uffd_handler.guest_memfd {
78+
None => {
79+
uffd_handler.serve_pf(region.base_host_virt_addr as _, region.size);
80+
}
81+
Some(_) => {
82+
let written = uffd_handler.populate_via_write(region.offset as usize, region.size);
83+
84+
// This code is written under the assumption that the first fault triggered by
85+
// Firecracker is either due to an MSR write (on x86) or due to device restoration
86+
// reading from guest memory to check the virtio queues are sane (on
87+
// ARM). This will be reported via a UFFD minor fault which needs to
88+
// be handled via memcpy. Importantly, we get to the UFFD handler
89+
// with the actual guest_memfd page already faulted in, meaning pwrite will stop
90+
// once it gets to the offset of that page (e.g. written < region.size above).
91+
// Thus, to fault in everything, we now need to skip this one page, write the
92+
// remaining region, and then deal with the "gap" via uffd_handler.serve_pf().
93+
94+
if written < region.size - uffd_handler.page_size {
95+
let r = uffd_handler.populate_via_write(
96+
region.offset as usize + written + uffd_handler.page_size,
97+
region.size - written - uffd_handler.page_size,
98+
);
99+
assert_eq!(written + r, region.size - uffd_handler.page_size);
100+
}
101+
}
102+
}
103+
}
104+
uffd_handler.serve_pf(fault_addr.cast(), uffd_handler.page_size);
105+
let end = get_time_us(ClockType::Monotonic);
106+
107+
println!("Finished Faulting All: {}us", end - start);
108+
}

src/firecracker/examples/uffd/on_demand_handler.rs

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,13 @@
88
mod uffd_utils;
99

1010
use std::fs::File;
11+
use std::os::fd::AsRawFd;
1112
use std::os::unix::net::UnixListener;
1213

1314
use uffd_utils::{Runtime, UffdHandler};
1415

16+
use crate::uffd_utils::uffd_continue;
17+
1518
fn main() {
1619
let mut args = std::env::args();
1720
let uffd_sock_path = args.nth(1).expect("No socket path given");
@@ -90,8 +93,36 @@ fn main() {
9093
// event (if the balloon device is enabled).
9194
match event {
9295
userfaultfd::Event::Pagefault { addr, .. } => {
93-
if !uffd_handler.serve_pf(addr.cast(), uffd_handler.page_size) {
94-
deferred_events.push(event);
96+
let bit = uffd_handler.addr_to_offset(addr.cast()) as usize
97+
/ uffd_handler.page_size;
98+
99+
if uffd_handler.userfault_bitmap.is_some() {
100+
if uffd_handler
101+
.userfault_bitmap
102+
.as_mut()
103+
.unwrap()
104+
.is_bit_set(bit)
105+
{
106+
if !uffd_handler.serve_pf(addr.cast(), uffd_handler.page_size) {
107+
deferred_events.push(event);
108+
}
109+
} else {
110+
// TODO: we currently ignore the result as we may attempt to
111+
// populate the page that is already present as we may receive
112+
// multiple minor fault events per page.
113+
let _ = uffd_continue(
114+
uffd_handler.uffd.as_raw_fd(),
115+
addr as _,
116+
uffd_handler.page_size as u64,
117+
)
118+
.inspect_err(|err| {
119+
println!("uffdio_continue error: {:?}", err)
120+
});
121+
}
122+
} else {
123+
if !uffd_handler.serve_pf(addr.cast(), uffd_handler.page_size) {
124+
deferred_events.push(event);
125+
}
95126
}
96127
}
97128
userfaultfd::Event::Remove { start, end } => {
@@ -111,6 +142,17 @@ fn main() {
111142
}
112143
}
113144
},
114-
|_uffd_handler: &mut UffdHandler, _offset: usize| {},
145+
|uffd_handler: &mut UffdHandler, offset: usize| {
146+
let bytes_written = uffd_handler.populate_via_write(offset, uffd_handler.page_size);
147+
148+
if bytes_written == 0 {
149+
println!(
150+
"got a vcpu fault for an already populated page at offset {}",
151+
offset
152+
);
153+
} else {
154+
assert_eq!(bytes_written, uffd_handler.page_size);
155+
}
156+
},
115157
);
116158
}

0 commit comments

Comments
 (0)