Skip to content

Commit 85f95bf

Browse files
committed
test(uffd_utils): accept guest_memfd and bitmap memfd
Accept receiving 3 fds instead of 1, where fds[1] is guest_memfd and fds[2] is userfault bitmap memfd. Also handle the FaultRequest message over the UDS socket by calling a new callback in the Runtime and sending a FaultReply. TODO: sob patrick Signed-off-by: Nikita Kalyazin <[email protected]>
1 parent 8b99360 commit 85f95bf

File tree

4 files changed

+245
-111
lines changed

4 files changed

+245
-111
lines changed

src/firecracker/examples/uffd/fault_all_handler.rs

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -29,24 +29,27 @@ fn main() {
2929

3030
let mut runtime = Runtime::new(stream, file);
3131
runtime.install_panic_hook();
32-
runtime.run(|uffd_handler: &mut UffdHandler| {
33-
// Read an event from the userfaultfd.
34-
let event = uffd_handler
35-
.read_event()
36-
.expect("Failed to read uffd_msg")
37-
.expect("uffd_msg not ready");
38-
39-
match event {
40-
userfaultfd::Event::Pagefault { .. } => {
41-
let start = get_time_us(ClockType::Monotonic);
42-
for region in uffd_handler.mem_regions.clone() {
43-
uffd_handler.serve_pf(region.base_host_virt_addr as _, region.size);
32+
runtime.run(
33+
|uffd_handler: &mut UffdHandler| {
34+
// Read an event from the userfaultfd.
35+
let event = uffd_handler
36+
.read_event()
37+
.expect("Failed to read uffd_msg")
38+
.expect("uffd_msg not ready");
39+
40+
match event {
41+
userfaultfd::Event::Pagefault { .. } => {
42+
let start = get_time_us(ClockType::Monotonic);
43+
for region in uffd_handler.mem_regions.clone() {
44+
uffd_handler.serve_pf(region.base_host_virt_addr as _, region.size);
45+
}
46+
let end = get_time_us(ClockType::Monotonic);
47+
48+
println!("Finished Faulting All: {}us", end - start);
4449
}
45-
let end = get_time_us(ClockType::Monotonic);
46-
47-
println!("Finished Faulting All: {}us", end - start);
50+
_ => panic!("Unexpected event on userfaultfd"),
4851
}
49-
_ => panic!("Unexpected event on userfaultfd"),
50-
}
51-
});
52+
},
53+
|_uffd_handler: &mut UffdHandler, _offset: usize| {},
54+
);
5255
}

src/firecracker/examples/uffd/malicious_handler.rs

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,18 @@ fn main() {
2626
.expect("Cannot set non-blocking");
2727

2828
let mut runtime = Runtime::new(stream, file);
29-
runtime.run(|uffd_handler: &mut UffdHandler| {
30-
// Read an event from the userfaultfd.
31-
let event = uffd_handler
32-
.read_event()
33-
.expect("Failed to read uffd_msg")
34-
.expect("uffd_msg not ready");
35-
36-
if let userfaultfd::Event::Pagefault { .. } = event {
37-
panic!("Fear me! I am the malicious page fault handler.")
38-
}
39-
});
29+
runtime.run(
30+
|uffd_handler: &mut UffdHandler| {
31+
// Read an event from the userfaultfd.
32+
let event = uffd_handler
33+
.read_event()
34+
.expect("Failed to read uffd_msg")
35+
.expect("uffd_msg not ready");
36+
37+
if let userfaultfd::Event::Pagefault { .. } = event {
38+
panic!("Fear me! I am the malicious page fault handler.")
39+
}
40+
},
41+
|_uffd_handler: &mut UffdHandler, _offset: usize| {},
42+
);
4043
}

src/firecracker/examples/uffd/on_demand_handler.rs

Lines changed: 77 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -28,81 +28,89 @@ fn main() {
2828

2929
let mut runtime = Runtime::new(stream, file);
3030
runtime.install_panic_hook();
31-
runtime.run(|uffd_handler: &mut UffdHandler| {
32-
// !DISCLAIMER!
33-
// When using UFFD together with the balloon device, this handler needs to deal with
34-
// `remove` and `pagefault` events. There are multiple things to keep in mind in
35-
// such setups:
36-
//
37-
// As long as any `remove` event is pending in the UFFD queue, all ioctls return EAGAIN
38-
// -----------------------------------------------------------------------------------
39-
//
40-
// This means we cannot process UFFD events simply one-by-one anymore - if a `remove` event
41-
// arrives, we need to pre-fetch all other events up to the `remove` event, to unblock the
42-
// UFFD, and then go back to the process the pre-fetched events.
43-
//
44-
// UFFD might receive events in not in their causal order
45-
// -----------------------------------------------------
46-
//
47-
// For example, the guest
48-
// kernel might first respond to a balloon inflation by freeing some memory, and
49-
// telling Firecracker about this. Firecracker will then madvise(MADV_DONTNEED) the
50-
// free memory range, which causes a `remove` event to be sent to UFFD. Then, the
51-
// guest kernel might immediately fault the page in again (for example because
52-
// default_on_oom was set). which causes a `pagefault` event to be sent to UFFD.
53-
//
54-
// However, the pagefault will be triggered from inside KVM on the vCPU thread, while the
55-
// balloon device is handled by Firecracker on its VMM thread. This means that potentially
56-
// this handler can receive the `pagefault` _before_ the `remove` event.
57-
//
58-
// This means that the simple "greedy" strategy of simply prefetching _all_ UFFD events
59-
// to make sure no `remove` event is blocking us can result in the handler acting on
60-
// the `pagefault` event before the `remove` message (despite the `remove` event being
61-
// in the causal past of the `pagefault` event), which means that we will fault in a page
62-
// from the snapshot file, while really we should be faulting in a zero page.
63-
//
64-
// In this example handler, we ignore this problem, to avoid
65-
// complexity (under the assumption that the guest kernel will zero a newly faulted in
66-
// page anyway). A production handler will most likely want to ensure that `remove`
67-
// events for a specific range are always handled before `pagefault` events.
68-
//
69-
// Lastly, we still need to deal with the race condition where a `remove` event arrives
70-
// in the UFFD queue after we got done reading all events, in which case we need to go
71-
// back to reading more events before we can continue processing `pagefault`s.
72-
let mut deferred_events = Vec::new();
31+
runtime.run(
32+
|uffd_handler: &mut UffdHandler| {
33+
// !DISCLAIMER!
34+
// When using UFFD together with the balloon device, this handler needs to deal with
35+
// `remove` and `pagefault` events. There are multiple things to keep in mind in
36+
// such setups:
37+
//
38+
// As long as any `remove` event is pending in the UFFD queue, all ioctls return EAGAIN
39+
// -----------------------------------------------------------------------------------
40+
//
41+
// This means we cannot process UFFD events simply one-by-one anymore - if a `remove`
42+
// event arrives, we need to pre-fetch all other events up to the `remove`
43+
// event, to unblock the UFFD, and then go back to the process the
44+
// pre-fetched events.
45+
//
46+
// UFFD might receive events in not in their causal order
47+
// -----------------------------------------------------
48+
//
49+
// For example, the guest
50+
// kernel might first respond to a balloon inflation by freeing some memory, and
51+
// telling Firecracker about this. Firecracker will then madvise(MADV_DONTNEED) the
52+
// free memory range, which causes a `remove` event to be sent to UFFD. Then, the
53+
// guest kernel might immediately fault the page in again (for example because
54+
// default_on_oom was set). which causes a `pagefault` event to be sent to UFFD.
55+
//
56+
// However, the pagefault will be triggered from inside KVM on the vCPU thread, while
57+
// the balloon device is handled by Firecracker on its VMM thread. This
58+
// means that potentially this handler can receive the `pagefault` _before_
59+
// the `remove` event.
60+
//
61+
// This means that the simple "greedy" strategy of simply prefetching _all_ UFFD events
62+
// to make sure no `remove` event is blocking us can result in the handler acting on
63+
// the `pagefault` event before the `remove` message (despite the `remove` event being
64+
// in the causal past of the `pagefault` event), which means that we will fault in a
65+
// page from the snapshot file, while really we should be faulting in a zero
66+
// page.
67+
//
68+
// In this example handler, we ignore this problem, to avoid
69+
// complexity (under the assumption that the guest kernel will zero a newly faulted in
70+
// page anyway). A production handler will most likely want to ensure that `remove`
71+
// events for a specific range are always handled before `pagefault` events.
72+
//
73+
// Lastly, we still need to deal with the race condition where a `remove` event arrives
74+
// in the UFFD queue after we got done reading all events, in which case we need to go
75+
// back to reading more events before we can continue processing `pagefault`s.
76+
let mut deferred_events = Vec::new();
7377

74-
loop {
75-
// First, try events that we couldn't handle last round
76-
let mut events_to_handle = Vec::from_iter(deferred_events.drain(..));
78+
loop {
79+
// First, try events that we couldn't handle last round
80+
let mut events_to_handle = Vec::from_iter(deferred_events.drain(..));
7781

78-
// Read all events from the userfaultfd.
79-
while let Some(event) = uffd_handler.read_event().expect("Failed to read uffd_msg") {
80-
events_to_handle.push(event);
81-
}
82+
// Read all events from the userfaultfd.
83+
while let Some(event) = uffd_handler.read_event().expect("Failed to read uffd_msg")
84+
{
85+
events_to_handle.push(event);
86+
}
8287

83-
for event in events_to_handle.drain(..) {
84-
// We expect to receive either a Page Fault or `remove`
85-
// event (if the balloon device is enabled).
86-
match event {
87-
userfaultfd::Event::Pagefault { addr, .. } => {
88-
if !uffd_handler.serve_pf(addr.cast(), uffd_handler.page_size) {
89-
deferred_events.push(event);
88+
for event in events_to_handle.drain(..) {
89+
// We expect to receive either a Page Fault or `remove`
90+
// event (if the balloon device is enabled).
91+
match event {
92+
userfaultfd::Event::Pagefault { addr, .. } => {
93+
if !uffd_handler.serve_pf(addr.cast(), uffd_handler.page_size) {
94+
deferred_events.push(event);
95+
}
9096
}
97+
userfaultfd::Event::Remove { start, end } => {
98+
uffd_handler.mark_range_removed(start as u64, end as u64)
99+
}
100+
_ => panic!("Unexpected event on userfaultfd"),
91101
}
92-
userfaultfd::Event::Remove { start, end } => {
93-
uffd_handler.mark_range_removed(start as u64, end as u64)
94-
}
95-
_ => panic!("Unexpected event on userfaultfd"),
96102
}
97-
}
98103

99-
// We assume that really only the above removed/pagefault interaction can result in
100-
// deferred events. In that scenario, the loop will always terminate (unless
101-
// newly arriving `remove` events end up indefinitely blocking it, but there's nothing
102-
// we can do about that, and it's a largely theoretical problem).
103-
if deferred_events.is_empty() {
104-
break;
104+
// We assume that really only the above removed/pagefault interaction can result in
105+
// deferred events. In that scenario, the loop will always terminate (unless
106+
// newly arriving `remove` events end up indefinitely blocking it, but there's
107+
// nothing we can do about that, and it's a largely theoretical
108+
// problem).
109+
if deferred_events.is_empty() {
110+
break;
111+
}
105112
}
106-
}
107-
});
113+
},
114+
|_uffd_handler: &mut UffdHandler, _offset: usize| {},
115+
);
108116
}

0 commit comments

Comments
 (0)