Skip to content

Commit 74ba592

Browse files
committed
test(uffd_utils): accept guest_memfd and bitmap memfd
Accept receiving 3 fds instead of 1, where fds[1] is guest_memfd and fds[2] is userfault bitmap memfd. Also handle the FaultRequest message over the UDS socket by calling a new callback in the Runtime and sending a FaultReply. Signed-off-by: Nikita Kalyazin <[email protected]>
1 parent 023b27f commit 74ba592

File tree

4 files changed

+247
-111
lines changed

4 files changed

+247
-111
lines changed

src/firecracker/examples/uffd/fault_all_handler.rs

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -26,24 +26,27 @@ fn main() {
2626

2727
let mut runtime = Runtime::new(stream, file);
2828
runtime.install_panic_hook();
29-
runtime.run(|uffd_handler: &mut UffdHandler| {
30-
// Read an event from the userfaultfd.
31-
let event = uffd_handler
32-
.read_event()
33-
.expect("Failed to read uffd_msg")
34-
.expect("uffd_msg not ready");
35-
36-
match event {
37-
userfaultfd::Event::Pagefault { .. } => {
38-
let start = get_time_us(ClockType::Monotonic);
39-
for region in uffd_handler.mem_regions.clone() {
40-
uffd_handler.serve_pf(region.base_host_virt_addr as _, region.size);
29+
runtime.run(
30+
|uffd_handler: &mut UffdHandler| {
31+
// Read an event from the userfaultfd.
32+
let event = uffd_handler
33+
.read_event()
34+
.expect("Failed to read uffd_msg")
35+
.expect("uffd_msg not ready");
36+
37+
match event {
38+
userfaultfd::Event::Pagefault { .. } => {
39+
let start = get_time_us(ClockType::Monotonic);
40+
for region in uffd_handler.mem_regions.clone() {
41+
uffd_handler.serve_pf(region.base_host_virt_addr as _, region.size);
42+
}
43+
let end = get_time_us(ClockType::Monotonic);
44+
45+
println!("Finished Faulting All: {}us", end - start);
4146
}
42-
let end = get_time_us(ClockType::Monotonic);
43-
44-
println!("Finished Faulting All: {}us", end - start);
47+
_ => panic!("Unexpected event on userfaultfd"),
4548
}
46-
_ => panic!("Unexpected event on userfaultfd"),
47-
}
48-
});
49+
},
50+
|_uffd_handler: &mut UffdHandler, _offset: usize| {},
51+
);
4952
}

src/firecracker/examples/uffd/malicious_handler.rs

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,18 @@ fn main() {
2323
let (stream, _) = listener.accept().expect("Cannot listen on UDS socket");
2424

2525
let mut runtime = Runtime::new(stream, file);
26-
runtime.run(|uffd_handler: &mut UffdHandler| {
27-
// Read an event from the userfaultfd.
28-
let event = uffd_handler
29-
.read_event()
30-
.expect("Failed to read uffd_msg")
31-
.expect("uffd_msg not ready");
32-
33-
if let userfaultfd::Event::Pagefault { .. } = event {
34-
panic!("Fear me! I am the malicious page fault handler.")
35-
}
36-
});
26+
runtime.run(
27+
|uffd_handler: &mut UffdHandler| {
28+
// Read an event from the userfaultfd.
29+
let event = uffd_handler
30+
.read_event()
31+
.expect("Failed to read uffd_msg")
32+
.expect("uffd_msg not ready");
33+
34+
if let userfaultfd::Event::Pagefault { .. } = event {
35+
panic!("Fear me! I am the malicious page fault handler.")
36+
}
37+
},
38+
|_uffd_handler: &mut UffdHandler, _offset: usize| {},
39+
);
3740
}

src/firecracker/examples/uffd/on_demand_handler.rs

Lines changed: 77 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -25,81 +25,89 @@ fn main() {
2525

2626
let mut runtime = Runtime::new(stream, file);
2727
runtime.install_panic_hook();
28-
runtime.run(|uffd_handler: &mut UffdHandler| {
29-
// !DISCLAIMER!
30-
// When using UFFD together with the balloon device, this handler needs to deal with
31-
// `remove` and `pagefault` events. There are multiple things to keep in mind in
32-
// such setups:
33-
//
34-
// As long as any `remove` event is pending in the UFFD queue, all ioctls return EAGAIN
35-
// -----------------------------------------------------------------------------------
36-
//
37-
// This means we cannot process UFFD events simply one-by-one anymore - if a `remove` event
38-
// arrives, we need to pre-fetch all other events up to the `remove` event, to unblock the
39-
// UFFD, and then go back to the process the pre-fetched events.
40-
//
41-
// UFFD might receive events in not in their causal order
42-
// -----------------------------------------------------
43-
//
44-
// For example, the guest
45-
// kernel might first respond to a balloon inflation by freeing some memory, and
46-
// telling Firecracker about this. Firecracker will then madvise(MADV_DONTNEED) the
47-
// free memory range, which causes a `remove` event to be sent to UFFD. Then, the
48-
// guest kernel might immediately fault the page in again (for example because
49-
// default_on_oom was set). which causes a `pagefault` event to be sent to UFFD.
50-
//
51-
// However, the pagefault will be triggered from inside KVM on the vCPU thread, while the
52-
// balloon device is handled by Firecracker on its VMM thread. This means that potentially
53-
// this handler can receive the `pagefault` _before_ the `remove` event.
54-
//
55-
// This means that the simple "greedy" strategy of simply prefetching _all_ UFFD events
56-
// to make sure no `remove` event is blocking us can result in the handler acting on
57-
// the `pagefault` event before the `remove` message (despite the `remove` event being
58-
// in the causal past of the `pagefault` event), which means that we will fault in a page
59-
// from the snapshot file, while really we should be faulting in a zero page.
60-
//
61-
// In this example handler, we ignore this problem, to avoid
62-
// complexity (under the assumption that the guest kernel will zero a newly faulted in
63-
// page anyway). A production handler will most likely want to ensure that `remove`
64-
// events for a specific range are always handled before `pagefault` events.
65-
//
66-
// Lastly, we still need to deal with the race condition where a `remove` event arrives
67-
// in the UFFD queue after we got done reading all events, in which case we need to go
68-
// back to reading more events before we can continue processing `pagefault`s.
69-
let mut deferred_events = Vec::new();
28+
runtime.run(
29+
|uffd_handler: &mut UffdHandler| {
30+
// !DISCLAIMER!
31+
// When using UFFD together with the balloon device, this handler needs to deal with
32+
// `remove` and `pagefault` events. There are multiple things to keep in mind in
33+
// such setups:
34+
//
35+
// As long as any `remove` event is pending in the UFFD queue, all ioctls return EAGAIN
36+
// -----------------------------------------------------------------------------------
37+
//
38+
// This means we cannot process UFFD events simply one-by-one anymore - if a `remove`
39+
// event arrives, we need to pre-fetch all other events up to the `remove`
40+
// event, to unblock the UFFD, and then go back to the process the
41+
// pre-fetched events.
42+
//
43+
// UFFD might receive events in not in their causal order
44+
// -----------------------------------------------------
45+
//
46+
// For example, the guest
47+
// kernel might first respond to a balloon inflation by freeing some memory, and
48+
// telling Firecracker about this. Firecracker will then madvise(MADV_DONTNEED) the
49+
// free memory range, which causes a `remove` event to be sent to UFFD. Then, the
50+
// guest kernel might immediately fault the page in again (for example because
51+
// default_on_oom was set). which causes a `pagefault` event to be sent to UFFD.
52+
//
53+
// However, the pagefault will be triggered from inside KVM on the vCPU thread, while
54+
// the balloon device is handled by Firecracker on its VMM thread. This
55+
// means that potentially this handler can receive the `pagefault` _before_
56+
// the `remove` event.
57+
//
58+
// This means that the simple "greedy" strategy of simply prefetching _all_ UFFD events
59+
// to make sure no `remove` event is blocking us can result in the handler acting on
60+
// the `pagefault` event before the `remove` message (despite the `remove` event being
61+
// in the causal past of the `pagefault` event), which means that we will fault in a
62+
// page from the snapshot file, while really we should be faulting in a zero
63+
// page.
64+
//
65+
// In this example handler, we ignore this problem, to avoid
66+
// complexity (under the assumption that the guest kernel will zero a newly faulted in
67+
// page anyway). A production handler will most likely want to ensure that `remove`
68+
// events for a specific range are always handled before `pagefault` events.
69+
//
70+
// Lastly, we still need to deal with the race condition where a `remove` event arrives
71+
// in the UFFD queue after we got done reading all events, in which case we need to go
72+
// back to reading more events before we can continue processing `pagefault`s.
73+
let mut deferred_events = Vec::new();
7074

71-
loop {
72-
// First, try events that we couldn't handle last round
73-
let mut events_to_handle = Vec::from_iter(deferred_events.drain(..));
75+
loop {
76+
// First, try events that we couldn't handle last round
77+
let mut events_to_handle = Vec::from_iter(deferred_events.drain(..));
7478

75-
// Read all events from the userfaultfd.
76-
while let Some(event) = uffd_handler.read_event().expect("Failed to read uffd_msg") {
77-
events_to_handle.push(event);
78-
}
79+
// Read all events from the userfaultfd.
80+
while let Some(event) = uffd_handler.read_event().expect("Failed to read uffd_msg")
81+
{
82+
events_to_handle.push(event);
83+
}
7984

80-
for event in events_to_handle.drain(..) {
81-
// We expect to receive either a Page Fault or `remove`
82-
// event (if the balloon device is enabled).
83-
match event {
84-
userfaultfd::Event::Pagefault { addr, .. } => {
85-
if !uffd_handler.serve_pf(addr.cast(), uffd_handler.page_size) {
86-
deferred_events.push(event);
85+
for event in events_to_handle.drain(..) {
86+
// We expect to receive either a Page Fault or `remove`
87+
// event (if the balloon device is enabled).
88+
match event {
89+
userfaultfd::Event::Pagefault { addr, .. } => {
90+
if !uffd_handler.serve_pf(addr.cast(), uffd_handler.page_size) {
91+
deferred_events.push(event);
92+
}
8793
}
94+
userfaultfd::Event::Remove { start, end } => {
95+
uffd_handler.mark_range_removed(start as u64, end as u64)
96+
}
97+
_ => panic!("Unexpected event on userfaultfd"),
8898
}
89-
userfaultfd::Event::Remove { start, end } => {
90-
uffd_handler.mark_range_removed(start as u64, end as u64)
91-
}
92-
_ => panic!("Unexpected event on userfaultfd"),
9399
}
94-
}
95100

96-
// We assume that really only the above removed/pagefault interaction can result in
97-
// deferred events. In that scenario, the loop will always terminate (unless
98-
// newly arriving `remove` events end up indefinitely blocking it, but there's nothing
99-
// we can do about that, and it's a largely theoretical problem).
100-
if deferred_events.is_empty() {
101-
break;
101+
// We assume that really only the above removed/pagefault interaction can result in
102+
// deferred events. In that scenario, the loop will always terminate (unless
103+
// newly arriving `remove` events end up indefinitely blocking it, but there's
104+
// nothing we can do about that, and it's a largely theoretical
105+
// problem).
106+
if deferred_events.is_empty() {
107+
break;
108+
}
102109
}
103-
}
104-
});
110+
},
111+
|_uffd_handler: &mut UffdHandler, _offset: usize| {},
112+
);
105113
}

0 commit comments

Comments
 (0)