Skip to content

Commit 4e10e54

Browse files
committed
test(uffd_utils): add handling for FaultRequest in secret freedom
There are two ways a UFFD handler receives a fault notification if Secret Fredom is enabled (which is inferred from 3 fds sent by Firecracker instead of 1): - a VMM- or KVM-triggered fault is delivered via a minor UFFD fault event. The handler is supposed to respond to it via memcpying the content of the page (if the page hasn't already been populated) followed by UFFDIO_CONTINUE call. - a vCPU-triggered fault is delievered via a FaultRequest message on the UDS socket. The handler is supposed to reply with a pwrite64 call on the guest_memfd to populate the page followed by a FaultReply message on the UDS socket. In both cases, the handler also needs to clear the bit in the userfault bitmap at the corresponding offset in order to stop fault notifications for the same page. UFFD handlers use the userfault bitmap for two purposes: - communicate to the kernel whether a fault at the corresponding guest_memfd offset will cause a VM exit - keep track of pages that have already been populated in order to avoid overwriting the content of the page that is already initialised. Signed-off-by: Nikita Kalyazin <[email protected]>
1 parent 74ba592 commit 4e10e54

File tree

3 files changed

+257
-21
lines changed

3 files changed

+257
-21
lines changed

src/firecracker/examples/uffd/fault_all_handler.rs

Lines changed: 62 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,14 @@
88
mod uffd_utils;
99

1010
use std::fs::File;
11+
use std::os::fd::AsRawFd;
1112
use std::os::unix::net::UnixListener;
1213

1314
use uffd_utils::{Runtime, UffdHandler};
1415
use utils::time::{ClockType, get_time_us};
1516

17+
use crate::uffd_utils::uffd_continue;
18+
1619
fn main() {
1720
let mut args = std::env::args();
1821
let uffd_sock_path = args.nth(1).expect("No socket path given");
@@ -34,19 +37,69 @@ fn main() {
3437
.expect("Failed to read uffd_msg")
3538
.expect("uffd_msg not ready");
3639

37-
match event {
38-
userfaultfd::Event::Pagefault { .. } => {
39-
let start = get_time_us(ClockType::Monotonic);
40-
for region in uffd_handler.mem_regions.clone() {
41-
uffd_handler.serve_pf(region.base_host_virt_addr as _, region.size);
42-
}
43-
let end = get_time_us(ClockType::Monotonic);
40+
if let userfaultfd::Event::Pagefault { addr, .. } = event {
41+
let bit =
42+
uffd_handler.addr_to_offset(addr.cast()) as usize / uffd_handler.page_size;
43+
44+
// If Secret Free, we know if this is the first fault based on the userfault
45+
// bitmap state. Otherwise, we assume that we will ever only receive a single fault
46+
// event via UFFD.
47+
let are_we_faulted_yet = uffd_handler
48+
.userfault_bitmap
49+
.as_mut()
50+
.map_or(false, |bitmap| !bitmap.is_bit_set(bit));
4451

45-
println!("Finished Faulting All: {}us", end - start);
52+
if are_we_faulted_yet {
53+
// TODO: we currently ignore the result as we may attempt to
54+
// populate the page that is already present as we may receive
55+
// multiple minor fault events per page.
56+
let _ = uffd_continue(
57+
uffd_handler.uffd.as_raw_fd(),
58+
addr as _,
59+
uffd_handler.page_size as u64,
60+
)
61+
.inspect_err(|err| println!("Error during uffdio_continue: {:?}", err));
62+
} else {
63+
fault_all(uffd_handler, addr);
4664
}
47-
_ => panic!("Unexpected event on userfaultfd"),
4865
}
4966
},
5067
|_uffd_handler: &mut UffdHandler, _offset: usize| {},
5168
);
5269
}
70+
71+
fn fault_all(uffd_handler: &mut UffdHandler, fault_addr: *mut libc::c_void) {
72+
let start = get_time_us(ClockType::Monotonic);
73+
for region in uffd_handler.mem_regions.clone() {
74+
match uffd_handler.guest_memfd {
75+
None => {
76+
uffd_handler.serve_pf(region.base_host_virt_addr as _, region.size);
77+
}
78+
Some(_) => {
79+
let written = uffd_handler.populate_via_write(region.offset as usize, region.size);
80+
81+
// This code is written under the assumption that the first fault triggered by
82+
// Firecracker is either due to an MSR write (on x86) or due to device restoration
83+
// reading from guest memory to check the virtio queues are sane (on
84+
// ARM). This will be reported via a UFFD minor fault which needs to
85+
// be handled via memcpy. Importantly, we get to the UFFD handler
86+
// with the actual guest_memfd page already faulted in, meaning pwrite will stop
87+
// once it gets to the offset of that page (e.g. written < region.size above).
88+
// Thus, to fault in everything, we now need to skip this one page, write the
89+
// remaining region, and then deal with the "gap" via uffd_handler.serve_pf().
90+
91+
if written < region.size - uffd_handler.page_size {
92+
let r = uffd_handler.populate_via_write(
93+
region.offset as usize + written + uffd_handler.page_size,
94+
region.size - written - uffd_handler.page_size,
95+
);
96+
assert_eq!(written + r, region.size - uffd_handler.page_size);
97+
}
98+
}
99+
}
100+
}
101+
uffd_handler.serve_pf(fault_addr.cast(), uffd_handler.page_size);
102+
let end = get_time_us(ClockType::Monotonic);
103+
104+
println!("Finished Faulting All: {}us", end - start);
105+
}

src/firecracker/examples/uffd/on_demand_handler.rs

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,13 @@
88
mod uffd_utils;
99

1010
use std::fs::File;
11+
use std::os::fd::AsRawFd;
1112
use std::os::unix::net::UnixListener;
1213

1314
use uffd_utils::{Runtime, UffdHandler};
1415

16+
use crate::uffd_utils::uffd_continue;
17+
1518
fn main() {
1619
let mut args = std::env::args();
1720
let uffd_sock_path = args.nth(1).expect("No socket path given");
@@ -87,8 +90,36 @@ fn main() {
8790
// event (if the balloon device is enabled).
8891
match event {
8992
userfaultfd::Event::Pagefault { addr, .. } => {
90-
if !uffd_handler.serve_pf(addr.cast(), uffd_handler.page_size) {
91-
deferred_events.push(event);
93+
let bit = uffd_handler.addr_to_offset(addr.cast()) as usize
94+
/ uffd_handler.page_size;
95+
96+
if uffd_handler.userfault_bitmap.is_some() {
97+
if uffd_handler
98+
.userfault_bitmap
99+
.as_mut()
100+
.unwrap()
101+
.is_bit_set(bit)
102+
{
103+
if !uffd_handler.serve_pf(addr.cast(), uffd_handler.page_size) {
104+
deferred_events.push(event);
105+
}
106+
} else {
107+
// TODO: we currently ignore the result as we may attempt to
108+
// populate the page that is already present as we may receive
109+
// multiple minor fault events per page.
110+
let _ = uffd_continue(
111+
uffd_handler.uffd.as_raw_fd(),
112+
addr as _,
113+
uffd_handler.page_size as u64,
114+
)
115+
.inspect_err(|err| {
116+
println!("uffdio_continue error: {:?}", err)
117+
});
118+
}
119+
} else {
120+
if !uffd_handler.serve_pf(addr.cast(), uffd_handler.page_size) {
121+
deferred_events.push(event);
122+
}
92123
}
93124
}
94125
userfaultfd::Event::Remove { start, end } => {
@@ -108,6 +139,17 @@ fn main() {
108139
}
109140
}
110141
},
111-
|_uffd_handler: &mut UffdHandler, _offset: usize| {},
142+
|uffd_handler: &mut UffdHandler, offset: usize| {
143+
let bytes_written = uffd_handler.populate_via_write(offset, uffd_handler.page_size);
144+
145+
if bytes_written == 0 {
146+
println!(
147+
"got a vcpu fault for an already populated page at offset {}",
148+
offset
149+
);
150+
} else {
151+
assert_eq!(bytes_written, uffd_handler.page_size);
152+
}
153+
},
112154
);
113155
}

0 commit comments

Comments
 (0)