@@ -28,81 +28,89 @@ fn main() {
28
28
29
29
let mut runtime = Runtime :: new ( stream, file) ;
30
30
runtime. install_panic_hook ( ) ;
31
- runtime. run ( |uffd_handler : & mut UffdHandler | {
32
- // !DISCLAIMER!
33
- // When using UFFD together with the balloon device, this handler needs to deal with
34
- // `remove` and `pagefault` events. There are multiple things to keep in mind in
35
- // such setups:
36
- //
37
- // As long as any `remove` event is pending in the UFFD queue, all ioctls return EAGAIN
38
- // -----------------------------------------------------------------------------------
39
- //
40
- // This means we cannot process UFFD events simply one-by-one anymore - if a `remove` event
41
- // arrives, we need to pre-fetch all other events up to the `remove` event, to unblock the
42
- // UFFD, and then go back to the process the pre-fetched events.
43
- //
44
- // UFFD might receive events in not in their causal order
45
- // -----------------------------------------------------
46
- //
47
- // For example, the guest
48
- // kernel might first respond to a balloon inflation by freeing some memory, and
49
- // telling Firecracker about this. Firecracker will then madvise(MADV_DONTNEED) the
50
- // free memory range, which causes a `remove` event to be sent to UFFD. Then, the
51
- // guest kernel might immediately fault the page in again (for example because
52
- // default_on_oom was set). which causes a `pagefault` event to be sent to UFFD.
53
- //
54
- // However, the pagefault will be triggered from inside KVM on the vCPU thread, while the
55
- // balloon device is handled by Firecracker on its VMM thread. This means that potentially
56
- // this handler can receive the `pagefault` _before_ the `remove` event.
57
- //
58
- // This means that the simple "greedy" strategy of simply prefetching _all_ UFFD events
59
- // to make sure no `remove` event is blocking us can result in the handler acting on
60
- // the `pagefault` event before the `remove` message (despite the `remove` event being
61
- // in the causal past of the `pagefault` event), which means that we will fault in a page
62
- // from the snapshot file, while really we should be faulting in a zero page.
63
- //
64
- // In this example handler, we ignore this problem, to avoid
65
- // complexity (under the assumption that the guest kernel will zero a newly faulted in
66
- // page anyway). A production handler will most likely want to ensure that `remove`
67
- // events for a specific range are always handled before `pagefault` events.
68
- //
69
- // Lastly, we still need to deal with the race condition where a `remove` event arrives
70
- // in the UFFD queue after we got done reading all events, in which case we need to go
71
- // back to reading more events before we can continue processing `pagefault`s.
72
- let mut deferred_events = Vec :: new ( ) ;
31
+ runtime. run (
32
+ |uffd_handler : & mut UffdHandler | {
33
+ // !DISCLAIMER!
34
+ // When using UFFD together with the balloon device, this handler needs to deal with
35
+ // `remove` and `pagefault` events. There are multiple things to keep in mind in
36
+ // such setups:
37
+ //
38
+ // As long as any `remove` event is pending in the UFFD queue, all ioctls return EAGAIN
39
+ // -----------------------------------------------------------------------------------
40
+ //
41
+ // This means we cannot process UFFD events simply one-by-one anymore - if a `remove`
42
+ // event arrives, we need to pre-fetch all other events up to the `remove`
43
+ // event, to unblock the UFFD, and then go back to the process the
44
+ // pre-fetched events.
45
+ //
46
+ // UFFD might receive events in not in their causal order
47
+ // -----------------------------------------------------
48
+ //
49
+ // For example, the guest
50
+ // kernel might first respond to a balloon inflation by freeing some memory, and
51
+ // telling Firecracker about this. Firecracker will then madvise(MADV_DONTNEED) the
52
+ // free memory range, which causes a `remove` event to be sent to UFFD. Then, the
53
+ // guest kernel might immediately fault the page in again (for example because
54
+ // default_on_oom was set). which causes a `pagefault` event to be sent to UFFD.
55
+ //
56
+ // However, the pagefault will be triggered from inside KVM on the vCPU thread, while
57
+ // the balloon device is handled by Firecracker on its VMM thread. This
58
+ // means that potentially this handler can receive the `pagefault` _before_
59
+ // the `remove` event.
60
+ //
61
+ // This means that the simple "greedy" strategy of simply prefetching _all_ UFFD events
62
+ // to make sure no `remove` event is blocking us can result in the handler acting on
63
+ // the `pagefault` event before the `remove` message (despite the `remove` event being
64
+ // in the causal past of the `pagefault` event), which means that we will fault in a
65
+ // page from the snapshot file, while really we should be faulting in a zero
66
+ // page.
67
+ //
68
+ // In this example handler, we ignore this problem, to avoid
69
+ // complexity (under the assumption that the guest kernel will zero a newly faulted in
70
+ // page anyway). A production handler will most likely want to ensure that `remove`
71
+ // events for a specific range are always handled before `pagefault` events.
72
+ //
73
+ // Lastly, we still need to deal with the race condition where a `remove` event arrives
74
+ // in the UFFD queue after we got done reading all events, in which case we need to go
75
+ // back to reading more events before we can continue processing `pagefault`s.
76
+ let mut deferred_events = Vec :: new ( ) ;
73
77
74
- loop {
75
- // First, try events that we couldn't handle last round
76
- let mut events_to_handle = Vec :: from_iter ( deferred_events. drain ( ..) ) ;
78
+ loop {
79
+ // First, try events that we couldn't handle last round
80
+ let mut events_to_handle = Vec :: from_iter ( deferred_events. drain ( ..) ) ;
77
81
78
- // Read all events from the userfaultfd.
79
- while let Some ( event) = uffd_handler. read_event ( ) . expect ( "Failed to read uffd_msg" ) {
80
- events_to_handle. push ( event) ;
81
- }
82
+ // Read all events from the userfaultfd.
83
+ while let Some ( event) = uffd_handler. read_event ( ) . expect ( "Failed to read uffd_msg" )
84
+ {
85
+ events_to_handle. push ( event) ;
86
+ }
82
87
83
- for event in events_to_handle. drain ( ..) {
84
- // We expect to receive either a Page Fault or `remove`
85
- // event (if the balloon device is enabled).
86
- match event {
87
- userfaultfd:: Event :: Pagefault { addr, .. } => {
88
- if !uffd_handler. serve_pf ( addr. cast ( ) , uffd_handler. page_size ) {
89
- deferred_events. push ( event) ;
88
+ for event in events_to_handle. drain ( ..) {
89
+ // We expect to receive either a Page Fault or `remove`
90
+ // event (if the balloon device is enabled).
91
+ match event {
92
+ userfaultfd:: Event :: Pagefault { addr, .. } => {
93
+ if !uffd_handler. serve_pf ( addr. cast ( ) , uffd_handler. page_size ) {
94
+ deferred_events. push ( event) ;
95
+ }
90
96
}
97
+ userfaultfd:: Event :: Remove { start, end } => {
98
+ uffd_handler. mark_range_removed ( start as u64 , end as u64 )
99
+ }
100
+ _ => panic ! ( "Unexpected event on userfaultfd" ) ,
91
101
}
92
- userfaultfd:: Event :: Remove { start, end } => {
93
- uffd_handler. mark_range_removed ( start as u64 , end as u64 )
94
- }
95
- _ => panic ! ( "Unexpected event on userfaultfd" ) ,
96
102
}
97
- }
98
103
99
- // We assume that really only the above removed/pagefault interaction can result in
100
- // deferred events. In that scenario, the loop will always terminate (unless
101
- // newly arriving `remove` events end up indefinitely blocking it, but there's nothing
102
- // we can do about that, and it's a largely theoretical problem).
103
- if deferred_events. is_empty ( ) {
104
- break ;
104
+ // We assume that really only the above removed/pagefault interaction can result in
105
+ // deferred events. In that scenario, the loop will always terminate (unless
106
+ // newly arriving `remove` events end up indefinitely blocking it, but there's
107
+ // nothing we can do about that, and it's a largely theoretical
108
+ // problem).
109
+ if deferred_events. is_empty ( ) {
110
+ break ;
111
+ }
105
112
}
106
- }
107
- } ) ;
113
+ } ,
114
+ |_uffd_handler : & mut UffdHandler , _offset : usize | { } ,
115
+ ) ;
108
116
}
0 commit comments