@@ -152,74 +152,17 @@ impl<'a> BuilderExternalNetworking<'a> {
152152 // Check the planning input: there shouldn't be any external networking
153153 // resources in the database (the source of `input`) that we don't know
154154 // about from the parent blueprint.
155- for external_ip_entry in
156- input. network_resources ( ) . omicron_zone_external_ips ( )
157- {
158- // As above, ignore localhost (used by the test suite).
159- if external_ip_entry. ip . ip ( ) . is_loopback ( ) {
160- continue ;
161- }
162- if !external_ip_alloc. contains ( & external_ip_entry. ip ) ? {
163- bail ! (
164- "planning input contains unexpected external IP \
165- (IP not found in parent blueprint): {external_ip_entry:?}"
166- ) ;
167- }
168- }
169- for nic_entry in input. network_resources ( ) . omicron_zone_nics ( ) {
170- if !used_macs. contains ( & nic_entry. nic . mac ) {
171- bail ! (
172- "planning input contains unexpected NIC \
173- (MAC not found in parent blueprint): {nic_entry:?}"
174- ) ;
175- }
176- match nic_entry. nic . ip {
177- IpAddr :: V4 ( ip) if NEXUS_OPTE_IPV4_SUBNET . contains ( ip) => {
178- if !existing_nexus_v4_ips. contains ( & ip) {
179- bail ! (
180- "planning input contains unexpected NIC \
181- (IP not found in parent blueprint): {nic_entry:?}"
182- ) ;
183- }
184- }
185- IpAddr :: V4 ( ip) if NTP_OPTE_IPV4_SUBNET . contains ( ip) => {
186- if !existing_boundary_ntp_v4_ips. contains ( & ip) {
187- bail ! (
188- "planning input contains unexpected NIC \
189- (IP not found in parent blueprint): {nic_entry:?}"
190- ) ;
191- }
192- }
193- IpAddr :: V4 ( ip) if DNS_OPTE_IPV4_SUBNET . contains ( ip) => {
194- // TODO check existing_dns_v4_ips, once it exists
195- }
196- IpAddr :: V6 ( ip) if NEXUS_OPTE_IPV6_SUBNET . contains ( ip) => {
197- if !existing_nexus_v6_ips. contains ( & ip) {
198- bail ! (
199- "planning input contains unexpected NIC \
200- (IP not found in parent blueprint): {nic_entry:?}"
201- ) ;
202- }
203- }
204- IpAddr :: V6 ( ip) if NTP_OPTE_IPV6_SUBNET . contains ( ip) => {
205- if !existing_boundary_ntp_v6_ips. contains ( & ip) {
206- bail ! (
207- "planning input contains unexpected NIC \
208- (IP not found in parent blueprint): {nic_entry:?}"
209- ) ;
210- }
211- }
212- IpAddr :: V6 ( ip) if DNS_OPTE_IPV6_SUBNET . contains ( ip) => {
213- // TODO check existing_dns_v6_ips, once it exists
214- }
215- _ => {
216- bail ! (
217- "planning input contains unexpected NIC \
218- (IP not contained in known OPTE subnet): {nic_entry:?}"
219- )
220- }
221- }
222- }
155+ //
156+ // Logically this could be the first thing we do in this function, but
157+ // we have some tests that check error cases in the above block that
158+ // would also fail these checks (so reordering the checks would require
159+ // those tests to do more work to construct valid `input`s), and we
160+ // never expect this to fail in practice, so there's no use in "failing
161+ // fast".
162+ ensure_input_records_appear_in_parent_blueprint (
163+ parent_blueprint,
164+ input,
165+ ) ?;
223166
224167 // TODO-performance Building these iterators as "walk through the list
225168 // and skip anything we've used already" is fine as long as we're
@@ -333,6 +276,142 @@ impl<'a> BuilderExternalNetworking<'a> {
333276 }
334277}
335278
279+ // Helper to validate that the system hasn't gone off the rails. There should
280+ // never be any external networking resources in the planning input (which is
281+ // derived from the contents of CRDB) that we don't know about from the parent
282+ // blueprint. It's possible a given planning iteration could see such a state
283+ // there have been intermediate changes made by other Nexus instances; e.g.,
284+ //
285+ // 1. Nexus A generates a `PlanningInput` by reading from CRDB
286+ // 2. Nexus B executes on a target blueprint that removes IPs/NICs from
287+ // CRDB
288+ // 3. Nexus B regenerates a new blueprint and prunes the zone(s) associated
289+ // with the IPs/NICs from step 2
290+ // 4. Nexus B makes this new blueprint the target
291+ // 5. Nexus A attempts to run planning with its `PlanningInput` from step 1 but
292+ // the target blueprint from step 4; this will fail the following checks
293+ // because the input contains records that were removed in step 3
294+ //
295+ // We do not need to handle this class of error; it's a transient failure that
296+ // will clear itself up when Nexus A repeats its planning loop from the top and
297+ // generates a new `PlanningInput`.
298+ //
299+ // There may still be database records corresponding to _expunged_ zones, but
300+ // that's okay: it just means we haven't yet realized a blueprint where those
301+ // zones are expunged. And those should should still be in the blueprint (not
302+ // pruned) until their database records are cleaned up.
303+ //
304+ // It's also possible that there may be networking records in the database
305+ // assigned to zones that have been expunged, and our parent blueprint uses
306+ // those same records for new zones. This is also fine and expected, and is a
307+ // similar case to the previous paragraph: a zone with networking resources was
308+ // expunged, the database doesn't realize it yet, but can still move forward and
309+ // make planning decisions that reuse those resources for new zones.
310+ fn ensure_input_records_appear_in_parent_blueprint (
311+ parent_blueprint : & Blueprint ,
312+ input : & PlanningInput ,
313+ ) -> anyhow:: Result < ( ) > {
314+ let mut all_macs: HashSet < MacAddr > = HashSet :: new ( ) ;
315+ let mut all_nexus_nic_ips: HashSet < IpAddr > = HashSet :: new ( ) ;
316+ let mut all_boundary_ntp_nic_ips: HashSet < IpAddr > = HashSet :: new ( ) ;
317+ let mut all_external_ips: HashSet < OmicronZoneExternalIp > = HashSet :: new ( ) ;
318+
319+ // Unlike the construction of the external IP allocator and existing IPs
320+ // constructed above in `BuilderExternalNetworking::new()`, we do not
321+ // check for duplicates here: we could very well see reuse of IPs
322+ // between expunged zones or between expunged -> running zones.
323+ for ( _, z) in parent_blueprint. all_omicron_zones ( BlueprintZoneFilter :: All ) {
324+ let zone_type = & z. zone_type ;
325+ match zone_type {
326+ BlueprintZoneType :: BoundaryNtp ( ntp) => {
327+ all_boundary_ntp_nic_ips. insert ( ntp. nic . ip ) ;
328+ }
329+ BlueprintZoneType :: Nexus ( nexus) => {
330+ all_nexus_nic_ips. insert ( nexus. nic . ip ) ;
331+ }
332+ // TODO: external-dns
333+ _ => ( ) ,
334+ }
335+
336+ if let Some ( ( external_ip, nic) ) = zone_type. external_networking ( ) {
337+ // As above, ignore localhost (used by the test suite).
338+ if !external_ip. ip ( ) . is_loopback ( ) {
339+ all_external_ips. insert ( external_ip) ;
340+ }
341+ all_macs. insert ( nic. mac ) ;
342+ }
343+ }
344+ for external_ip_entry in
345+ input. network_resources ( ) . omicron_zone_external_ips ( )
346+ {
347+ // As above, ignore localhost (used by the test suite).
348+ if external_ip_entry. ip . ip ( ) . is_loopback ( ) {
349+ continue ;
350+ }
351+ if !all_external_ips. contains ( & external_ip_entry. ip ) {
352+ bail ! (
353+ "planning input contains unexpected external IP \
354+ (IP not found in parent blueprint): {external_ip_entry:?}"
355+ ) ;
356+ }
357+ }
358+ for nic_entry in input. network_resources ( ) . omicron_zone_nics ( ) {
359+ if !all_macs. contains ( & nic_entry. nic . mac ) {
360+ bail ! (
361+ "planning input contains unexpected NIC \
362+ (MAC not found in parent blueprint): {nic_entry:?}"
363+ ) ;
364+ }
365+ match nic_entry. nic . ip {
366+ IpAddr :: V4 ( ip) if NEXUS_OPTE_IPV4_SUBNET . contains ( ip) => {
367+ if !all_nexus_nic_ips. contains ( & ip. into ( ) ) {
368+ bail ! (
369+ "planning input contains unexpected NIC \
370+ (IP not found in parent blueprint): {nic_entry:?}"
371+ ) ;
372+ }
373+ }
374+ IpAddr :: V4 ( ip) if NTP_OPTE_IPV4_SUBNET . contains ( ip) => {
375+ if !all_boundary_ntp_nic_ips. contains ( & ip. into ( ) ) {
376+ bail ! (
377+ "planning input contains unexpected NIC \
378+ (IP not found in parent blueprint): {nic_entry:?}"
379+ ) ;
380+ }
381+ }
382+ IpAddr :: V4 ( ip) if DNS_OPTE_IPV4_SUBNET . contains ( ip) => {
383+ // TODO check all_dns_nic_ips, once it exists
384+ }
385+ IpAddr :: V6 ( ip) if NEXUS_OPTE_IPV6_SUBNET . contains ( ip) => {
386+ if !all_nexus_nic_ips. contains ( & ip. into ( ) ) {
387+ bail ! (
388+ "planning input contains unexpected NIC \
389+ (IP not found in parent blueprint): {nic_entry:?}"
390+ ) ;
391+ }
392+ }
393+ IpAddr :: V6 ( ip) if NTP_OPTE_IPV6_SUBNET . contains ( ip) => {
394+ if !all_boundary_ntp_nic_ips. contains ( & ip. into ( ) ) {
395+ bail ! (
396+ "planning input contains unexpected NIC \
397+ (IP not found in parent blueprint): {nic_entry:?}"
398+ ) ;
399+ }
400+ }
401+ IpAddr :: V6 ( ip) if DNS_OPTE_IPV6_SUBNET . contains ( ip) => {
402+ // TODO check all_dns_nic_ips, once it exists
403+ }
404+ _ => {
405+ bail ! (
406+ "planning input contains unexpected NIC \
407+ (IP not contained in known OPTE subnet): {nic_entry:?}"
408+ )
409+ }
410+ }
411+ }
412+ Ok ( ( ) )
413+ }
414+
336415#[ derive( Debug , Clone , Copy ) ]
337416pub ( super ) struct ExternalNetworkingChoice {
338417 pub ( super ) external_ip : IpAddr ,
@@ -440,6 +519,7 @@ impl<'a> ExternalIpAllocator<'a> {
440519 // Returns `Ok(true)` if we contain this IP exactly, `Ok(false)` if we do
441520 // not contain this IP, and an error if we contain a matching IP address but
442521 // a mismatched exclusiveness / SNAT-ness.
522+ #[ cfg( test) ]
443523 fn contains (
444524 & self ,
445525 external_ip : & OmicronZoneExternalIp ,
0 commit comments