diff --git a/dpd-client/tests/integration_tests/mcast.rs b/dpd-client/tests/integration_tests/mcast.rs index 1e342d9..8b2085f 100644 --- a/dpd-client/tests/integration_tests/mcast.rs +++ b/dpd-client/tests/integration_tests/mcast.rs @@ -53,6 +53,30 @@ impl ToIpAddr for types::UnderlayMulticastIpv6 { } } +/// Count table entries matching a specific IP address in any key field. +fn count_entries_for_ip(entries: &[types::TableEntry], ip: &str) -> usize { + entries + .iter() + .filter(|e| e.keys.values().any(|v| v.contains(ip))) + .count() +} + +/// Check if any table entry for the given IP has a `forward_vlan` action with +/// the specified VLAN ID. +fn has_vlan_action_for_ip( + entries: &[types::TableEntry], + ip: &str, + vlan: u16, +) -> bool { + entries.iter().any(|e| { + e.keys.values().any(|v| v.contains(ip)) + && e.action_args + .get("vlan_id") + .map(|v| v == &vlan.to_string()) + .unwrap_or(false) + }) +} + async fn check_counter_incremented( switch: &Switch, counter_name: &str, @@ -268,7 +292,8 @@ fn get_nat_target( match response { types::MulticastGroupResponse::Underlay { .. } => None, types::MulticastGroupResponse::External { - internal_forwarding, .. + internal_forwarding, + .. } => internal_forwarding.nat_target.as_ref(), } } @@ -393,8 +418,10 @@ fn prepare_expected_pkt( encapped.deparse().unwrap().to_vec() }; - let switch_port_mac = - switch.get_port_mac(switch_port.unwrap()).unwrap().to_string(); + let switch_port_mac = switch + .get_port_mac(switch_port.unwrap()) + .unwrap() + .to_string(); let mut forward_pkt = common::gen_external_geneve_packet( Endpoint::parse( @@ -539,6 +566,66 @@ async fn test_group_creation_with_validation() -> TestResult { _ => panic!("Expected ErrorResponse for invalid group ID"), } + // Test with reserved VLAN ID 0 (also invalid) + let external_vlan0 = types::MulticastGroupCreateExternalEntry { + group_ip: IpAddr::V4(MULTICAST_TEST_IPV4), + tag: Some(TEST_TAG.to_string()), + internal_forwarding: types::InternalForwarding { + nat_target: Some(nat_target.clone()), + }, + external_forwarding: types::ExternalForwarding { + vlan_id: Some(0), // Invalid: VLAN 0 is reserved + }, + sources: None, + }; + + let res = switch + .client + .multicast_group_create_external(&external_vlan0) + .await + .expect_err("Should fail with reserved VLAN ID 0"); + + match res { + Error::ErrorResponse(inner) => { + assert_eq!( + inner.status(), + 400, + "Expected 400 Bad Request status code for VLAN 0" + ); + } + _ => panic!("Expected ErrorResponse for reserved VLAN ID 0"), + } + + // Test with reserved VLAN ID 1 (also invalid) + let external_vlan1 = types::MulticastGroupCreateExternalEntry { + group_ip: IpAddr::V4(MULTICAST_TEST_IPV4), + tag: Some(TEST_TAG.to_string()), + internal_forwarding: types::InternalForwarding { + nat_target: Some(nat_target.clone()), + }, + external_forwarding: types::ExternalForwarding { + vlan_id: Some(1), // Invalid: VLAN 1 is reserved + }, + sources: None, + }; + + let res = switch + .client + .multicast_group_create_external(&external_vlan1) + .await + .expect_err("Should fail with reserved VLAN ID 1"); + + match res { + Error::ErrorResponse(inner) => { + assert_eq!( + inner.status(), + 400, + "Expected 400 Bad Request status code for VLAN 1" + ); + } + _ => panic!("Expected ErrorResponse for reserved VLAN ID 1"), + } + // Test with valid parameters // IPv4 groups are always external let external_valid = types::MulticastGroupCreateExternalEntry { @@ -581,7 +668,9 @@ async fn test_group_creation_with_validation() -> TestResult { ); // Clean up external first (references internal via NAT target), then internal - cleanup_test_group(switch, created.group_ip, TEST_TAG).await.unwrap(); + cleanup_test_group(switch, created.group_ip, TEST_TAG) + .await + .unwrap(); cleanup_test_group(switch, internal_multicast_ip, TEST_TAG).await } @@ -707,8 +796,52 @@ async fn test_vlan_propagation_to_internal() -> TestResult { "ff04::200".parse::().unwrap() ); - // Verify the admin-scoped group now has access to the VLAN via NAT target reference - // Check the bitmap table to see if VLAN 42 is properly set (this is where VLAN matters for P4) + // Verify IPv4 route table has ONE entry for the VLAN group. + // Route tables use simple dst_addr matching with forward_vlan(vid) action. + // VLAN isolation (preventing translation) is handled by NAT ingress tables. + let route_table = switch + .client + .table_dump("pipe.Ingress.l3_router.MulticastRouter4.tbl") + .await + .expect("Should dump IPv4 route table") + .into_inner(); + let group_entries: Vec<_> = route_table + .entries + .iter() + .filter(|e| e.keys.values().any(|v| v.contains("224.1.2.3"))) + .collect(); + assert_eq!( + group_entries.len(), + 1, + "Route table uses dst_addr only - 1 entry per group" + ); + // Verify the action is forward_vlan with VLAN 42 + assert!( + group_entries[0] + .action_args + .get("vlan_id") + .map(|v| v == "42") + .unwrap_or(false), + "Route entry should have forward_vlan(42) action" + ); + + // Verify NAT ingress table has TWO entries for VLAN isolation: + // 1. Untagged match -> forward (for decapsulated Geneve) + // 2. Tagged match with VLAN 42 -> forward (for already-tagged) + let nat_table = switch + .client + .table_dump("pipe.Ingress.nat_ingress.ingress_ipv4_mcast") + .await + .expect("Should dump NAT ingress table") + .into_inner(); + assert_eq!( + count_entries_for_ip(&nat_table.entries, "224.1.2.3"), + 2, + "NAT table should have 2 entries for VLAN isolation (untagged + tagged)" + ); + + // Verify the admin-scoped group now has access to the VLAN via NAT target + // reference let bitmap_table = switch .client .table_dump("pipe.Egress.mcast_egress.tbl_decap_ports") @@ -716,7 +849,8 @@ async fn test_vlan_propagation_to_internal() -> TestResult { .expect("Should clean up internal group") .into_inner(); - // Verify the admin-scoped group's bitmap entry has VLAN 42 from external group propagation + // Verify the admin-scoped group's bitmap entry has VLAN 42 from external + // group propagation assert!( bitmap_table .entries @@ -725,7 +859,8 @@ async fn test_vlan_propagation_to_internal() -> TestResult { "Admin-scoped group bitmap should have VLAN 42 from external group" ); - // Delete external group first since it references the internal group via NAT target + // Delete external group first since it references the internal group via + // NAT target cleanup_test_group(switch, created_external.group_ip, TEST_TAG) .await .unwrap(); @@ -784,6 +919,42 @@ async fn test_group_api_lifecycle() { ); assert_eq!(created.external_forwarding.vlan_id, Some(vlan_id)); + // Route table: 1 entry (dst_addr only, VLAN via action) + let route_table = switch + .client + .table_dump("pipe.Ingress.l3_router.MulticastRouter4.tbl") + .await + .expect("Should dump route table") + .into_inner(); + let route_entries: Vec<_> = route_table + .entries + .iter() + .filter(|e| e.keys.values().any(|v| v.contains("224.0.1.0"))) + .collect(); + assert_eq!( + route_entries.len(), + 1, + "Route table should have 1 entry per group (dst_addr only)" + ); + + // NAT table: 2 entries for VLAN groups (untagged + tagged match keys) + let nat_table = switch + .client + .table_dump("pipe.Ingress.nat_ingress.ingress_ipv4_mcast") + .await + .expect("Should dump NAT table") + .into_inner(); + let nat_entries: Vec<_> = nat_table + .entries + .iter() + .filter(|e| e.keys.values().any(|v| v.contains("224.0.1.0"))) + .collect(); + assert_eq!( + nat_entries.len(), + 2, + "NAT table should have 2 entries for VLAN group (untagged + tagged)" + ); + // Get all groups and verify our group is included let groups = switch .client @@ -792,8 +963,9 @@ async fn test_group_api_lifecycle() { .await .expect("Should be able to list groups"); - let found_in_list = - groups.iter().any(|g| get_external_group_id(g) == external_group_id); + let found_in_list = groups + .iter() + .any(|g| get_external_group_id(g) == external_group_id); assert!(found_in_list, "Created group should be in the list"); // Get groups by tag @@ -804,7 +976,10 @@ async fn test_group_api_lifecycle() { .await .expect("Should be able to get groups by tag"); - assert!(!tagged_groups.is_empty(), "Tagged group list should not be empty"); + assert!( + !tagged_groups.is_empty(), + "Tagged group list should not be empty" + ); let found_by_tag = tagged_groups .iter() .any(|g| get_external_group_id(g) == external_group_id); @@ -904,15 +1079,18 @@ async fn test_group_api_lifecycle() { .expect("Should be able to list groups"); // Check if the specific deleted group is still in the list - let deleted_group_still_in_list = - groups_after_delete.iter().any(|g| get_group_ip(g) == group_ip); + let deleted_group_still_in_list = groups_after_delete + .iter() + .any(|g| get_group_ip(g) == group_ip); assert!( !deleted_group_still_in_list, "Deleted group should not be in the list" ); // Clean up the internal group (external was already deleted above) - cleanup_test_group(switch, internal_multicast_ip, TEST_TAG).await.unwrap(); + cleanup_test_group(switch, internal_multicast_ip, TEST_TAG) + .await + .unwrap(); } /// Tests tag validation behavior on multicast group deletion. @@ -959,8 +1137,10 @@ async fn test_multicast_del_tag_validation() -> TestResult { // Attempt delete with wrong tag - should fail let del_tag = make_tag(TAG_B); - let wrong_tag_result = - switch.client.multicast_group_delete(&tagged_group_ip, &del_tag).await; + let wrong_tag_result = switch + .client + .multicast_group_delete(&tagged_group_ip, &del_tag) + .await; match &wrong_tag_result { Err(Error::ErrorResponse(resp)) => { @@ -1029,7 +1209,10 @@ async fn test_multicast_del_tag_validation() -> TestResult { .into_inner(); // The group should have an auto-generated tag - assert!(!created.tag.is_empty(), "Group should have auto-generated tag"); + assert!( + !created.tag.is_empty(), + "Group should have auto-generated tag" + ); let auto_tag = created.tag.clone(); // Delete with correct auto-generated tag should succeed @@ -1377,7 +1560,9 @@ async fn test_api_invalid_combinations() -> TestResult { ), } - cleanup_test_group(switch, created_ipv4.group_ip, TEST_TAG).await.unwrap(); + cleanup_test_group(switch, created_ipv4.group_ip, TEST_TAG) + .await + .unwrap(); cleanup_test_group(switch, created_non_admin.group_ip, TEST_TAG) .await .unwrap(); @@ -1453,13 +1638,18 @@ async fn test_ipv4_multicast_invalid_destination_mac() -> TestResult { // Generate packet with invalid MAC let to_send = common::gen_udp_packet(src_endpoint, dst_endpoint); - let test_pkt = TestPacket { packet: Arc::new(to_send), port: ingress }; + let test_pkt = TestPacket { + packet: Arc::new(to_send), + port: ingress, + }; // Expect no output packets (invalid MAC should be dropped) let expected_pkts = vec![]; - let ctr_baseline = - switch.get_counter("multicast_invalid_mac", None).await.unwrap(); + let ctr_baseline = switch + .get_counter("multicast_invalid_mac", None) + .await + .unwrap(); switch.packet_test(vec![test_pkt], expected_pkts).unwrap(); @@ -1524,13 +1714,18 @@ async fn test_ipv6_multicast_invalid_destination_mac() -> TestResult { // Generate packet with invalid MAC let to_send = common::gen_udp_packet(src_endpoint, dst_endpoint); - let test_pkt = TestPacket { packet: Arc::new(to_send), port: ingress }; + let test_pkt = TestPacket { + packet: Arc::new(to_send), + port: ingress, + }; // Expect no output packets (invalid MAC should be dropped) let expected_pkts = vec![]; - let ctr_baseline = - switch.get_counter("multicast_invalid_mac", None).await.unwrap(); + let ctr_baseline = switch + .get_counter("multicast_invalid_mac", None) + .await + .unwrap(); let port_label_ingress = switch.port_label(ingress).unwrap(); @@ -1621,7 +1816,10 @@ async fn test_multicast_ttl_zero() -> TestResult { // Set TTL to 0 (should be dropped) ipv4::Ipv4Hdr::adjust_ttl(&mut to_send, -255); // Set to 0 - let test_pkt = TestPacket { packet: Arc::new(to_send), port: ingress }; + let test_pkt = TestPacket { + packet: Arc::new(to_send), + port: ingress, + }; // Expect no output packets (should be dropped due to TTL=0) let expected_pkts = vec![]; @@ -1702,7 +1900,10 @@ async fn test_multicast_ttl_one() -> TestResult { // because the switch decrements it to 0 during processing ipv4::Ipv4Hdr::adjust_ttl(&mut to_send, -254); // Set to 1 - let test_pkt = TestPacket { packet: Arc::new(to_send), port: ingress }; + let test_pkt = TestPacket { + packet: Arc::new(to_send), + port: ingress, + }; // Expect no output packets (should be dropped due to TTL=1) let expected_pkts = vec![]; @@ -1825,17 +2026,28 @@ async fn test_ipv4_multicast_basic_replication_nat_ingress() -> TestResult { Some(egress3), ); - let test_pkt = TestPacket { packet: Arc::new(to_send), port: ingress }; + let test_pkt = TestPacket { + packet: Arc::new(to_send), + port: ingress, + }; let expected_pkts = vec![ - TestPacket { packet: Arc::new(to_recv1), port: egress1 }, - TestPacket { packet: Arc::new(to_recv2), port: egress3 }, + TestPacket { + packet: Arc::new(to_recv1), + port: egress1, + }, + TestPacket { + packet: Arc::new(to_recv2), + port: egress3, + }, ]; let port_label_ingress = switch.port_label(ingress).unwrap(); - let ctr_baseline_ingress = - switch.get_counter(&port_label_ingress, Some("ingress")).await.unwrap(); + let ctr_baseline_ingress = switch + .get_counter(&port_label_ingress, Some("ingress")) + .await + .unwrap(); switch.packet_test(vec![test_pkt], expected_pkts).unwrap(); @@ -1955,19 +2167,30 @@ async fn test_encapped_multicast_geneve_mcast_tag_to_external_members() &payload, ); - let test_pkt = TestPacket { packet: Arc::new(geneve_pkt), port: ingress }; + let test_pkt = TestPacket { + packet: Arc::new(geneve_pkt), + port: ingress, + }; // We expect the packet to be decapsulated and forwarded to both egress // ports let expected_pkts = vec![ - TestPacket { packet: Arc::new(expected_pkt1), port: egress1 }, - TestPacket { packet: Arc::new(expected_pkt2), port: egress2 }, + TestPacket { + packet: Arc::new(expected_pkt1), + port: egress1, + }, + TestPacket { + packet: Arc::new(expected_pkt2), + port: egress2, + }, ]; let port_label_ingress = switch.port_label(ingress).unwrap(); - let ctr_baseline_ingress = - switch.get_counter(&port_label_ingress, Some("ingress")).await.unwrap(); + let ctr_baseline_ingress = switch + .get_counter(&port_label_ingress, Some("ingress")) + .await + .unwrap(); switch.packet_test(vec![test_pkt], expected_pkts).unwrap(); @@ -2077,8 +2300,10 @@ async fn test_encapped_multicast_geneve_mcast_tag_to_underlay_members() &payload, ); - let test_pkt = - TestPacket { packet: Arc::new(geneve_pkt.clone()), port: ingress }; + let test_pkt = TestPacket { + packet: Arc::new(geneve_pkt.clone()), + port: ingress, + }; // Vlan should be stripped and we only replicate to underlay ports let recv_pkt1 = @@ -2089,14 +2314,22 @@ async fn test_encapped_multicast_geneve_mcast_tag_to_underlay_members() // We expect the packet not be decapped and forwarded to both egress // ports let expected_pkts = vec![ - TestPacket { packet: Arc::new(recv_pkt1), port: egress3 }, - TestPacket { packet: Arc::new(recv_pkt2), port: egress4 }, + TestPacket { + packet: Arc::new(recv_pkt1), + port: egress3, + }, + TestPacket { + packet: Arc::new(recv_pkt2), + port: egress4, + }, ]; let port_label_ingress = switch.port_label(ingress).unwrap(); - let ctr_baseline_ingress = - switch.get_counter(&port_label_ingress, Some("ingress")).await.unwrap(); + let ctr_baseline_ingress = switch + .get_counter(&port_label_ingress, Some("ingress")) + .await + .unwrap(); switch.packet_test(vec![test_pkt], expected_pkts).unwrap(); @@ -2210,8 +2443,10 @@ async fn test_encapped_multicast_geneve_mcast_tag_to_underlay_and_external_membe &payload, ); - let test_pkt = - TestPacket { packet: Arc::new(geneve_pkt.clone()), port: ingress }; + let test_pkt = TestPacket { + packet: Arc::new(geneve_pkt.clone()), + port: ingress, + }; // External ports should be replicated with Vlan information let recv_pkt1 = @@ -2229,16 +2464,30 @@ async fn test_encapped_multicast_geneve_mcast_tag_to_underlay_and_external_membe // We expect 2 packets to be decapped and forwarded to external ports // and 2 packets to be forwarded to underlay ports (still encapped) let expected_pkts = vec![ - TestPacket { packet: Arc::new(recv_pkt1), port: egress1 }, - TestPacket { packet: Arc::new(recv_pkt2), port: egress2 }, - TestPacket { packet: Arc::new(recv_pkt3), port: egress3 }, - TestPacket { packet: Arc::new(recv_pkt4), port: egress4 }, + TestPacket { + packet: Arc::new(recv_pkt1), + port: egress1, + }, + TestPacket { + packet: Arc::new(recv_pkt2), + port: egress2, + }, + TestPacket { + packet: Arc::new(recv_pkt3), + port: egress3, + }, + TestPacket { + packet: Arc::new(recv_pkt4), + port: egress4, + }, ]; let port_label_ingress = switch.port_label(ingress).unwrap(); - let ctr_baseline_ingress = - switch.get_counter(&port_label_ingress, Some("ingress")).await.unwrap(); + let ctr_baseline_ingress = switch + .get_counter(&port_label_ingress, Some("ingress")) + .await + .unwrap(); switch.packet_test(vec![test_pkt], expected_pkts).unwrap(); @@ -2308,14 +2557,19 @@ async fn test_ipv4_multicast_drops_ingress_is_egress_port() -> TestResult { dst_port, ); - let test_pkt = TestPacket { packet: Arc::new(to_send), port: ingress }; + let test_pkt = TestPacket { + packet: Arc::new(to_send), + port: ingress, + }; let expected_pkts = vec![]; let port_label_ingress = switch.port_label(ingress).unwrap(); - let ctr_baseline_ingress = - switch.get_counter(&port_label_ingress, Some("ingress")).await.unwrap(); + let ctr_baseline_ingress = switch + .get_counter(&port_label_ingress, Some("ingress")) + .await + .unwrap(); switch.packet_test(vec![test_pkt], expected_pkts).unwrap(); @@ -2390,7 +2644,10 @@ async fn test_ipv6_multicast_hop_limit_zero() -> TestResult { // Set Hop Limit to 0 (should be dropped) ipv6::Ipv6Hdr::adjust_hlim(&mut to_send, -255); // Set to 0 - let test_pkt = TestPacket { packet: Arc::new(to_send), port: ingress }; + let test_pkt = TestPacket { + packet: Arc::new(to_send), + port: ingress, + }; // Expect no output packets (should be dropped due to Hop Limit=0) let expected_pkts = vec![]; @@ -2475,7 +2732,10 @@ async fn test_ipv6_multicast_hop_limit_one() -> TestResult { // because the switch decrements it to 0 during processing ipv6::Ipv6Hdr::adjust_hlim(&mut to_send, -254); // Set to 1 (255 - 254 = 1) - let test_pkt = TestPacket { packet: Arc::new(to_send), port: ingress }; + let test_pkt = TestPacket { + packet: Arc::new(to_send), + port: ingress, + }; // Expect no output packets (should be dropped due to Hop Limit=1) let expected_pkts = vec![]; @@ -2570,15 +2830,22 @@ async fn test_ipv6_multicast_basic_replication_nat_ingress() -> TestResult { Some(egress1), ); - let test_pkt = TestPacket { packet: Arc::new(to_send), port: ingress }; + let test_pkt = TestPacket { + packet: Arc::new(to_send), + port: ingress, + }; - let expected_pkts = - vec![TestPacket { packet: Arc::new(to_recv1), port: egress1 }]; + let expected_pkts = vec![TestPacket { + packet: Arc::new(to_recv1), + port: egress1, + }]; let port_label_ingress = switch.port_label(ingress).unwrap(); - let ctr_baseline_ingress = - switch.get_counter(&port_label_ingress, Some("ingress")).await.unwrap(); + let ctr_baseline_ingress = switch + .get_counter(&port_label_ingress, Some("ingress")) + .await + .unwrap(); switch.packet_test(vec![test_pkt], expected_pkts).unwrap(); @@ -2682,18 +2949,32 @@ async fn test_ipv4_multicast_source_filtering_exact_match() -> TestResult { ); let test_pkts = vec![ - TestPacket { packet: Arc::new(allowed_pkt), port: ingress1 }, - TestPacket { packet: Arc::new(filtered_pkt), port: ingress2 }, + TestPacket { + packet: Arc::new(allowed_pkt), + port: ingress1, + }, + TestPacket { + packet: Arc::new(filtered_pkt), + port: ingress2, + }, ]; // Only expect packets from the allowed source let expected_pkts = vec![ - TestPacket { packet: Arc::new(to_recv11), port: egress1 }, - TestPacket { packet: Arc::new(to_recv12), port: egress2 }, + TestPacket { + packet: Arc::new(to_recv11), + port: egress1, + }, + TestPacket { + packet: Arc::new(to_recv12), + port: egress2, + }, ]; - let ctr_baseline = - switch.get_counter("multicast_src_filtered", None).await.unwrap(); + let ctr_baseline = switch + .get_counter("multicast_src_filtered", None) + .await + .unwrap(); switch.packet_test(test_pkts, expected_pkts).unwrap(); @@ -2828,21 +3109,44 @@ async fn test_ipv4_multicast_source_filtering_multiple_exact() -> TestResult { ); let test_pkts = vec![ - TestPacket { packet: Arc::new(allowed_pkt1), port: ingress1 }, - TestPacket { packet: Arc::new(allowed_pkt2), port: ingress2 }, - TestPacket { packet: Arc::new(filtered_pkt), port: ingress2 }, + TestPacket { + packet: Arc::new(allowed_pkt1), + port: ingress1, + }, + TestPacket { + packet: Arc::new(allowed_pkt2), + port: ingress2, + }, + TestPacket { + packet: Arc::new(filtered_pkt), + port: ingress2, + }, ]; // Only expect packets from the allowed sources let expected_pkts = vec![ - TestPacket { packet: Arc::new(to_recv11), port: egress1 }, - TestPacket { packet: Arc::new(to_recv22), port: egress2 }, - TestPacket { packet: Arc::new(to_recv12), port: egress2 }, - TestPacket { packet: Arc::new(to_recv21), port: egress1 }, + TestPacket { + packet: Arc::new(to_recv11), + port: egress1, + }, + TestPacket { + packet: Arc::new(to_recv22), + port: egress2, + }, + TestPacket { + packet: Arc::new(to_recv12), + port: egress2, + }, + TestPacket { + packet: Arc::new(to_recv21), + port: egress1, + }, ]; - let ctr_baseline = - switch.get_counter("multicast_src_filtered", None).await.unwrap(); + let ctr_baseline = switch + .get_counter("multicast_src_filtered", None) + .await + .unwrap(); switch.packet_test(test_pkts, expected_pkts).unwrap(); @@ -2977,23 +3281,46 @@ async fn test_ipv6_multicast_multiple_source_filtering() -> TestResult { ); let test_pkts = vec![ - TestPacket { packet: Arc::new(allowed_pkt1), port: ingress1 }, - TestPacket { packet: Arc::new(allowed_pkt2), port: ingress2 }, - TestPacket { packet: Arc::new(filtered_pkt), port: ingress3 }, + TestPacket { + packet: Arc::new(allowed_pkt1), + port: ingress1, + }, + TestPacket { + packet: Arc::new(allowed_pkt2), + port: ingress2, + }, + TestPacket { + packet: Arc::new(filtered_pkt), + port: ingress3, + }, ]; // Only expect packets from the allowed sources let expected_pkts = vec![ // First allowed source - TestPacket { packet: Arc::new(to_recv11), port: egress1 }, - TestPacket { packet: Arc::new(to_recv12), port: egress2 }, + TestPacket { + packet: Arc::new(to_recv11), + port: egress1, + }, + TestPacket { + packet: Arc::new(to_recv12), + port: egress2, + }, // Second allowed source - TestPacket { packet: Arc::new(to_recv21), port: egress1 }, - TestPacket { packet: Arc::new(to_recv22), port: egress2 }, + TestPacket { + packet: Arc::new(to_recv21), + port: egress1, + }, + TestPacket { + packet: Arc::new(to_recv22), + port: egress2, + }, ]; - let ctr_baseline = - switch.get_counter("multicast_src_filtered", None).await.unwrap(); + let ctr_baseline = switch + .get_counter("multicast_src_filtered", None) + .await + .unwrap(); switch.packet_test(test_pkts, expected_pkts).unwrap(); @@ -3083,12 +3410,20 @@ async fn test_multicast_dynamic_membership() -> TestResult { Some(egress2), ); - let test_pkt = - TestPacket { packet: Arc::new(to_send.clone()), port: ingress }; + let test_pkt = TestPacket { + packet: Arc::new(to_send.clone()), + port: ingress, + }; let expected_pkts = vec![ - TestPacket { packet: Arc::new(to_recv1), port: egress1 }, - TestPacket { packet: Arc::new(to_recv2), port: egress2 }, + TestPacket { + packet: Arc::new(to_recv1), + port: egress1, + }, + TestPacket { + packet: Arc::new(to_recv2), + port: egress2, + }, ]; let result1 = switch.packet_test(vec![test_pkt], expected_pkts); @@ -3166,14 +3501,25 @@ async fn test_multicast_dynamic_membership() -> TestResult { Some(egress3), ); - let test_pkt_new = TestPacket { packet: Arc::new(to_send), port: ingress }; + let test_pkt_new = TestPacket { + packet: Arc::new(to_send), + port: ingress, + }; let expected_pkts_new = vec![ - TestPacket { packet: Arc::new(to_recv1_new), port: egress2 }, - TestPacket { packet: Arc::new(to_recv2_new), port: egress3 }, + TestPacket { + packet: Arc::new(to_recv1_new), + port: egress2, + }, + TestPacket { + packet: Arc::new(to_recv2_new), + port: egress3, + }, ]; - switch.packet_test(vec![test_pkt_new], expected_pkts_new).unwrap(); + switch + .packet_test(vec![test_pkt_new], expected_pkts_new) + .unwrap(); cleanup_test_group(switch, get_group_ip(&created_group), TEST_TAG) .await @@ -3329,21 +3675,51 @@ async fn test_multicast_multiple_groups() -> TestResult { ); let test_pkts = vec![ - TestPacket { packet: Arc::new(to_send1), port: ingress }, - TestPacket { packet: Arc::new(to_send2), port: ingress }, + TestPacket { + packet: Arc::new(to_send1), + port: ingress, + }, + TestPacket { + packet: Arc::new(to_send2), + port: ingress, + }, ]; let expected_pkts = vec![ // First multicast group - replicates to all ports since both groups share same NAT target - TestPacket { packet: Arc::new(to_recv1_1), port: egress1 }, - TestPacket { packet: Arc::new(to_recv1_2), port: egress2 }, - TestPacket { packet: Arc::new(to_recv1_3), port: egress3 }, - TestPacket { packet: Arc::new(to_recv1_4), port: egress4 }, + TestPacket { + packet: Arc::new(to_recv1_1), + port: egress1, + }, + TestPacket { + packet: Arc::new(to_recv1_2), + port: egress2, + }, + TestPacket { + packet: Arc::new(to_recv1_3), + port: egress3, + }, + TestPacket { + packet: Arc::new(to_recv1_4), + port: egress4, + }, // Second multicast group - also replicates to all ports - TestPacket { packet: Arc::new(to_recv2_3), port: egress1 }, - TestPacket { packet: Arc::new(to_recv2_4), port: egress2 }, - TestPacket { packet: Arc::new(to_recv2_1), port: egress3 }, - TestPacket { packet: Arc::new(to_recv2_2), port: egress4 }, + TestPacket { + packet: Arc::new(to_recv2_3), + port: egress1, + }, + TestPacket { + packet: Arc::new(to_recv2_4), + port: egress2, + }, + TestPacket { + packet: Arc::new(to_recv2_1), + port: egress3, + }, + TestPacket { + packet: Arc::new(to_recv2_2), + port: egress4, + }, ]; switch.packet_test(test_pkts, expected_pkts).unwrap(); @@ -3655,7 +4031,10 @@ async fn test_multicast_reset_all_tables() -> TestResult { .await .expect("Should be able to list groups"); - assert!(groups_after.is_empty(), "No groups should exist after reset"); + assert!( + groups_after.is_empty(), + "No groups should exist after reset" + ); // Try to get each group specifically for group_ip in [ @@ -3677,10 +4056,6 @@ async fn test_multicast_reset_all_tables() -> TestResult { Ok(()) } -/* - * Commented out untl https://github.com/oxidecomputer/dendrite/issues/107 is - * fixed - * #[tokio::test] #[ignore] async fn test_multicast_vlan_translation_not_possible() -> TestResult { @@ -3757,7 +4132,6 @@ async fn test_multicast_vlan_translation_not_possible() -> TestResult { .unwrap(); cleanup_test_group(switch, internal_multicast_ip, TEST_TAG).await } -*/ #[tokio::test] #[ignore] @@ -3849,20 +4223,31 @@ async fn test_multicast_multiple_packets() -> TestResult { Some(egress3), ); - test_pkts.push(TestPacket { packet: Arc::new(to_send), port: ingress }); - - expected_pkts - .push(TestPacket { packet: Arc::new(to_recv1), port: egress1 }); - expected_pkts - .push(TestPacket { packet: Arc::new(to_recv2), port: egress2 }); - expected_pkts - .push(TestPacket { packet: Arc::new(to_recv3), port: egress3 }); + test_pkts.push(TestPacket { + packet: Arc::new(to_send), + port: ingress, + }); + + expected_pkts.push(TestPacket { + packet: Arc::new(to_recv1), + port: egress1, + }); + expected_pkts.push(TestPacket { + packet: Arc::new(to_recv2), + port: egress2, + }); + expected_pkts.push(TestPacket { + packet: Arc::new(to_recv3), + port: egress3, + }); } let port_label_ingress = switch.port_label(ingress).unwrap(); - let ctr_baseline_ingress = - switch.get_counter(&port_label_ingress, Some("ingress")).await.unwrap(); + let ctr_baseline_ingress = switch + .get_counter(&port_label_ingress, Some("ingress")) + .await + .unwrap(); switch.packet_test(test_pkts, expected_pkts).unwrap(); @@ -3898,8 +4283,10 @@ async fn test_multicast_no_group_configured() -> TestResult { let src_mac = switch.get_port_mac(ingress).unwrap(); // Get baseline counter before any test packets - let initial_ctr_baseline = - switch.get_counter("multicast_no_group", None).await.unwrap(); + let initial_ctr_baseline = switch + .get_counter("multicast_no_group", None) + .await + .unwrap(); // Test IPv4 multicast with no configured group { @@ -3911,7 +4298,10 @@ async fn test_multicast_no_group_configured() -> TestResult { 4444, ); - let test_pkt = TestPacket { packet: Arc::new(to_send), port: ingress }; + let test_pkt = TestPacket { + packet: Arc::new(to_send), + port: ingress, + }; let expected_pkts = vec![]; @@ -3941,7 +4331,10 @@ async fn test_multicast_no_group_configured() -> TestResult { 4444, ); - let test_pkt = TestPacket { packet: Arc::new(to_send), port: ingress }; + let test_pkt = TestPacket { + packet: Arc::new(to_send), + port: ingress, + }; // Expect no output packets - should be dropped let expected_pkts = vec![]; @@ -4103,8 +4496,10 @@ async fn test_ipv6_multicast_scope_validation() { }], }; - let admin_local_result = - switch.client.multicast_group_create_underlay(&admin_local_group).await; + let admin_local_result = switch + .client + .multicast_group_create_underlay(&admin_local_group) + .await; assert!( admin_local_result.is_ok(), "Admin-local scope (ff04::/16) should work with internal API" @@ -4121,8 +4516,10 @@ async fn test_ipv6_multicast_scope_validation() { }], }; - let site_local_result = - switch.client.multicast_group_create_underlay(&site_local_group).await; + let site_local_result = switch + .client + .multicast_group_create_underlay(&site_local_group) + .await; assert!( site_local_result.is_err(), "Site-local scope (ff05::/16) should be rejected - only admin-local (ff04) allowed" @@ -4139,8 +4536,10 @@ async fn test_ipv6_multicast_scope_validation() { }], }; - let org_local_result = - switch.client.multicast_group_create_underlay(&org_local_group).await; + let org_local_result = switch + .client + .multicast_group_create_underlay(&org_local_group) + .await; assert!( org_local_result.is_err(), "Organization-local scope (ff08::/16) should be rejected - only admin-local (ff04) allowed" @@ -4273,7 +4672,10 @@ async fn test_multicast_group_id_recycling() -> TestResult { ) .await; - assert_ne!(get_external_group_id(&group1), get_external_group_id(&group2)); + assert_ne!( + get_external_group_id(&group1), + get_external_group_id(&group2) + ); // Delete the first group let del_tag = make_tag(TEST_TAG); @@ -4291,7 +4693,9 @@ async fn test_multicast_group_id_recycling() -> TestResult { .await .expect("Should be able to list groups"); assert!( - !groups_after_delete1.iter().any(|g| get_group_ip(g) == group1_ip), + !groups_after_delete1 + .iter() + .any(|g| get_group_ip(g) == group1_ip), "Group1 should be deleted" ); @@ -4331,7 +4735,9 @@ async fn test_multicast_group_id_recycling() -> TestResult { .await .expect("Should be able to list groups"); assert!( - !groups_after_delete2.iter().any(|g| get_group_ip(g) == group2_ip), + !groups_after_delete2 + .iter() + .any(|g| get_group_ip(g) == group2_ip), "Group2 should be deleted" ); @@ -4355,7 +4761,9 @@ async fn test_multicast_group_id_recycling() -> TestResult { "Fourth group should reuse Group2's underlay ID due to LIFO recycling" ); - cleanup_test_group(switch, group3_ip, TEST_TAG).await.unwrap(); + cleanup_test_group(switch, group3_ip, TEST_TAG) + .await + .unwrap(); cleanup_test_group(switch, group4_ip, TEST_TAG).await } @@ -4428,11 +4836,15 @@ async fn test_multicast_empty_then_add_members_ipv6() -> TestResult { .expect("Should find the created external group"); assert!( - get_members(internal_group).map(|m| m.is_empty()).unwrap_or(true), + get_members(internal_group) + .map(|m| m.is_empty()) + .unwrap_or(true), "Empty internal group should have no members initially" ); assert!( - get_members(external_group).map(|m| m.is_empty()).unwrap_or(true), + get_members(external_group) + .map(|m| m.is_empty()) + .unwrap_or(true), "Empty external group should have no members initially" ); @@ -4475,8 +4887,10 @@ async fn test_multicast_empty_then_add_members_ipv6() -> TestResult { &payload, ); - let send = - TestPacket { packet: Arc::new(geneve_pkt.clone()), port: ingress_port }; + let send = TestPacket { + packet: Arc::new(geneve_pkt.clone()), + port: ingress_port, + }; // Verify no packets are replicated when group is empty switch.packet_test(vec![send], Vec::new())?; @@ -4620,13 +5034,24 @@ async fn test_multicast_empty_then_add_members_ipv6() -> TestResult { Some(egress3), ); - let send_again = - TestPacket { packet: Arc::new(to_send_again), port: ingress_port }; + let send_again = TestPacket { + packet: Arc::new(to_send_again), + port: ingress_port, + }; let expected_pkts = vec![ - TestPacket { packet: Arc::new(expected1), port: egress1 }, - TestPacket { packet: Arc::new(expected2), port: egress2 }, - TestPacket { packet: Arc::new(expected3), port: egress3 }, + TestPacket { + packet: Arc::new(expected1), + port: egress1, + }, + TestPacket { + packet: Arc::new(expected2), + port: egress2, + }, + TestPacket { + packet: Arc::new(expected3), + port: egress3, + }, ]; // Verify packets are now replicated to all 3 members (2 external + 1 underlay) @@ -4696,15 +5121,19 @@ async fn test_multicast_empty_then_add_members_ipv6() -> TestResult { "Bitmap table should be empty again when group has no members" ); - let send_final = - TestPacket { packet: Arc::new(to_send_final), port: ingress_port }; + let send_final = TestPacket { + packet: Arc::new(to_send_final), + port: ingress_port, + }; // Should only see packet on ingress, no replication to egress ports let expected_final = vec![]; switch.packet_test(vec![send_final], expected_final)?; - cleanup_test_group(&switch, external_group_ip, TEST_TAG).await.unwrap(); + cleanup_test_group(&switch, external_group_ip, TEST_TAG) + .await + .unwrap(); cleanup_test_group(&switch, internal_group_ip, TEST_TAG).await } @@ -4774,11 +5203,15 @@ async fn test_multicast_empty_then_add_members_ipv4() -> TestResult { .expect("Should find the created external group"); assert!( - get_members(internal_group).map(|m| m.is_empty()).unwrap_or(true), + get_members(internal_group) + .map(|m| m.is_empty()) + .unwrap_or(true), "Empty internal group should have no members initially" ); assert!( - get_members(external_group).map(|m| m.is_empty()).unwrap_or(true), + get_members(external_group) + .map(|m| m.is_empty()) + .unwrap_or(true), "Empty external group should have no members initially" ); @@ -4821,8 +5254,10 @@ async fn test_multicast_empty_then_add_members_ipv4() -> TestResult { &payload, ); - let send = - TestPacket { packet: Arc::new(geneve_pkt.clone()), port: ingress_port }; + let send = TestPacket { + packet: Arc::new(geneve_pkt.clone()), + port: ingress_port, + }; // Verify no packets are replicated when group is empty switch.packet_test(vec![send], Vec::new())?; @@ -4965,13 +5400,24 @@ async fn test_multicast_empty_then_add_members_ipv4() -> TestResult { Some(egress3), ); - let send_again = - TestPacket { packet: Arc::new(test_packet2), port: ingress_port }; + let send_again = TestPacket { + packet: Arc::new(test_packet2), + port: ingress_port, + }; let expected_pkts = vec![ - TestPacket { packet: Arc::new(expected1), port: egress1 }, - TestPacket { packet: Arc::new(expected2), port: egress2 }, - TestPacket { packet: Arc::new(expected3), port: egress3 }, + TestPacket { + packet: Arc::new(expected1), + port: egress1, + }, + TestPacket { + packet: Arc::new(expected2), + port: egress2, + }, + TestPacket { + packet: Arc::new(expected3), + port: egress3, + }, ]; // Verify packets are now replicated to all 3 members via NAT target @@ -5042,15 +5488,19 @@ async fn test_multicast_empty_then_add_members_ipv4() -> TestResult { ); // Test: Send packet again - should now only reach ingress (no replication) - let send_final = - TestPacket { packet: Arc::new(to_send_final), port: ingress_port }; + let send_final = TestPacket { + packet: Arc::new(to_send_final), + port: ingress_port, + }; // Should only see packet on ingress, no replication to egress ports let expected_final = vec![]; switch.packet_test(vec![send_final], expected_final)?; - cleanup_test_group(&switch, external_group_ip, TEST_TAG).await.unwrap(); + cleanup_test_group(&switch, external_group_ip, TEST_TAG) + .await + .unwrap(); cleanup_test_group(&switch, internal_group_ip, TEST_TAG).await } @@ -5137,11 +5587,16 @@ async fn test_multicast_rollback_external_group_creation_failure() -> TestResult }; // This should fail and trigger rollback - let result = - switch.client.multicast_group_create_external(&external_entry).await; + let result = switch + .client + .multicast_group_create_external(&external_entry) + .await; // Verify the creation failed - assert!(result.is_err(), "External group creation should have failed"); + assert!( + result.is_err(), + "External group creation should have failed" + ); // Verify rollback worked - check that no state was left behind @@ -5250,8 +5705,9 @@ async fn test_multicast_rollback_member_update_failure() -> TestResult { .multicast_group_get(&internal_group_ip) .await .expect("Should be able to get initial group state"); - let initial_member_count = - get_members(&initial_group.into_inner()).map(|m| m.len()).unwrap_or(0); + let initial_member_count = get_members(&initial_group.into_inner()) + .map(|m| m.len()) + .unwrap_or(0); // Try to add a member that should cause ASIC operations to fail // Use a valid port but with an invalid link ID that should cause issues @@ -5262,8 +5718,9 @@ async fn test_multicast_rollback_member_update_failure() -> TestResult { direction: types::Direction::External, }]; - let update_request = - types::MulticastGroupUpdateUnderlayEntry { members: invalid_members }; + let update_request = types::MulticastGroupUpdateUnderlayEntry { + members: invalid_members, + }; let ipv6_update = types::UnderlayMulticastIpv6(match internal_group_ip { IpAddr::V6(ipv6) => ipv6, @@ -5292,7 +5749,9 @@ async fn test_multicast_rollback_member_update_failure() -> TestResult { .into_inner(); assert_eq!( - get_members(&post_failure_group).map(|m| m.len()).unwrap_or(0), + get_members(&post_failure_group) + .map(|m| m.len()) + .unwrap_or(0), initial_member_count, "Member count should be unchanged after rollback" ); @@ -5394,10 +5853,11 @@ async fn test_multicast_rollback_nat_transition_failure() -> TestResult { assert!(result.is_err(), "NAT update should have failed"); // Verify rollback worked - external group should be unchanged - let post_failure_external_group = - switch.client.multicast_group_get(&external_group_ip).await.expect( - "Should be able to get external group state after rollback", - ); + let post_failure_external_group = switch + .client + .multicast_group_get(&external_group_ip) + .await + .expect("Should be able to get external group state after rollback"); // NAT target should be unchanged let initial_group_inner = initial_external_group.into_inner(); @@ -5406,8 +5866,14 @@ async fn test_multicast_rollback_nat_transition_failure() -> TestResult { let initial_nat = get_nat_target(&initial_group_inner); let current_nat = get_nat_target(&post_failure_group_inner); - assert!(initial_nat.is_some(), "Initial group should have NAT target"); - assert!(current_nat.is_some(), "Current group should have NAT target"); + assert!( + initial_nat.is_some(), + "Initial group should have NAT target" + ); + assert!( + current_nat.is_some(), + "Current group should have NAT target" + ); if let (Some(original), Some(current)) = (initial_nat, current_nat) { assert_eq!( @@ -5433,7 +5899,9 @@ async fn test_multicast_rollback_nat_transition_failure() -> TestResult { "NAT table should be unchanged after rollback" ); - cleanup_test_group(&switch, external_group_ip, TEST_TAG).await.unwrap(); + cleanup_test_group(&switch, external_group_ip, TEST_TAG) + .await + .unwrap(); cleanup_test_group(&switch, internal_group_ip, TEST_TAG).await } @@ -5506,8 +5974,10 @@ async fn test_multicast_rollback_vlan_propagation_consistency() { }; // This should fail because the NAT target (internal group) no longer exists - let result = - switch.client.multicast_group_create_external(&external_entry).await; + let result = switch + .client + .multicast_group_create_external(&external_entry) + .await; assert!( result.is_err(), @@ -5671,7 +6141,9 @@ async fn test_multicast_rollback_source_filter_update() -> TestResult { ); // Clean up external group first (it references internal group via NAT target) - cleanup_test_group(&switch, group_ip, TEST_TAG).await.unwrap(); + cleanup_test_group(&switch, group_ip, TEST_TAG) + .await + .unwrap(); cleanup_test_group(&switch, internal_multicast_ip, TEST_TAG).await } @@ -5703,8 +6175,9 @@ async fn test_multicast_rollback_partial_member_addition() -> TestResult { .multicast_group_get(&internal_group_ip) .await .expect("Should be able to get initial group state"); - let initial_member_count = - get_members(&initial_group.into_inner()).map(|m| m.len()).unwrap_or(0); + let initial_member_count = get_members(&initial_group.into_inner()) + .map(|m| m.len()) + .unwrap_or(0); // Create a mix of valid and invalid members to trigger partial addition failure let (valid_port_1, valid_link_1) = switch.link_id(PhysPort(17)).unwrap(); @@ -5732,8 +6205,9 @@ async fn test_multicast_rollback_partial_member_addition() -> TestResult { }, ]; - let update_request = - types::MulticastGroupUpdateUnderlayEntry { members: mixed_members }; + let update_request = types::MulticastGroupUpdateUnderlayEntry { + members: mixed_members, + }; let ipv6_update = types::UnderlayMulticastIpv6(match internal_group_ip { IpAddr::V6(ipv6) => ipv6, @@ -5765,7 +6239,9 @@ async fn test_multicast_rollback_partial_member_addition() -> TestResult { .into_inner(); assert_eq!( - get_members(&post_failure_group).map(|m| m.len()).unwrap_or(0), + get_members(&post_failure_group) + .map(|m| m.len()) + .unwrap_or(0), initial_member_count, "Member count should be unchanged after partial addition rollback" ); @@ -5840,8 +6316,10 @@ async fn test_multicast_rollback_table_operation_failure() { }; // This should fail because the NAT target (internal group) doesn't exist - let result = - switch.client.multicast_group_create_external(&external_entry).await; + let result = switch + .client + .multicast_group_create_external(&external_entry) + .await; // Verify the creation failed assert!( @@ -6097,7 +6575,9 @@ async fn test_source_filter_ipv4_collapses_to_any() -> TestResult { } // Cleanup in correct order: external first, then internal - cleanup_test_group(switch, external_group_ip, TEST_TAG).await.unwrap(); + cleanup_test_group(switch, external_group_ip, TEST_TAG) + .await + .unwrap(); cleanup_test_group(switch, internal_group_ip, TEST_TAG).await } @@ -6208,7 +6688,9 @@ async fn test_source_filter_ipv6_collapses_to_any() -> TestResult { } // Cleanup in correct order: external first, then internal - cleanup_test_group(switch, external_group_ip, TEST_TAG).await.unwrap(); + cleanup_test_group(switch, external_group_ip, TEST_TAG) + .await + .unwrap(); cleanup_test_group(switch, internal_group_ip, TEST_TAG).await } @@ -6328,7 +6810,9 @@ async fn test_source_filter_update_to_any() -> TestResult { ); // Cleanup in correct order: external first, then internal - cleanup_test_group(switch, external_group_ip, TEST_TAG).await.unwrap(); + cleanup_test_group(switch, external_group_ip, TEST_TAG) + .await + .unwrap(); cleanup_test_group(switch, internal_group_ip, TEST_TAG).await } @@ -6501,7 +6985,9 @@ async fn test_source_filter_empty_vec_normalizes_to_any() -> TestResult { ); // Cleanup - cleanup_test_group(switch, external_group_ip, TEST_TAG).await.unwrap(); + cleanup_test_group(switch, external_group_ip, TEST_TAG) + .await + .unwrap(); // Verify the /0 entry was removed let after_delete_table = switch @@ -6599,8 +7085,10 @@ async fn test_delete_nonexistent_group_returns_404() -> TestResult { let nonexistent_ip = IpAddr::V4(Ipv4Addr::new(239, 255, 255, 253)); let del_tag = make_tag("some_tag"); - let result = - switch.client.multicast_group_delete(&nonexistent_ip, &del_tag).await; + let result = switch + .client + .multicast_group_delete(&nonexistent_ip, &del_tag) + .await; match result { Err(Error::ErrorResponse(resp)) => { @@ -6721,6 +7209,229 @@ async fn test_underlay_delete_recreate_recovery_flow() -> TestResult { Ok(()) } +/// Tests the full VLAN lifecycle for multicast groups, verifying route table +/// entries are correctly managed through all VLAN transitions. +/// +/// Route tables use dst_addr only matching with action-based VLAN tagging: +/// - 1 entry per group: forward (no VLAN) or forward_vlan(vid) +/// +/// NAT tables use VLAN-aware matching for isolation: +/// - 2 entries for VLAN groups: untagged + tagged match keys +#[tokio::test] +#[ignore] +async fn test_vlan_lifecycle_route_entries() -> TestResult { + let switch = &*get_switch().await; + let tag = "vlan_lifecycle_test"; + + // Setup: create internal admin-scoped group for NAT target + let internal_ip = IpAddr::V6(MULTICAST_NAT_IP); + let egress_port = PhysPort(28); + create_test_multicast_group( + switch, + internal_ip, + Some(tag), + &[(egress_port, types::Direction::Underlay)], + types::InternalForwarding { nat_target: None }, + types::ExternalForwarding { vlan_id: None }, + None, + ) + .await; + + let group_ip = IpAddr::V4(MULTICAST_TEST_IPV4); + let nat_target = create_nat_target_ipv4(); + let test_ip = "224.0.1.0"; + + // Case: Create with NO VLAN - should have 1 route entry + let create_no_vlan = types::MulticastGroupCreateExternalEntry { + group_ip, + tag: Some(tag.to_string()), + internal_forwarding: types::InternalForwarding { + nat_target: Some(nat_target.clone()), + }, + external_forwarding: types::ExternalForwarding { vlan_id: None }, + sources: None, + }; + + switch + .client + .multicast_group_create_external(&create_no_vlan) + .await + .expect("Should create group without VLAN"); + + let table = switch + .client + .table_dump("pipe.Ingress.l3_router.MulticastRouter4.tbl") + .await? + .into_inner(); + assert_eq!( + count_entries_for_ip(&table.entries, test_ip), + 1, + "Group without VLAN should have 1 route entry" + ); + + // Case: Update to ADD VLAN 10 + // Route: 1 entry with forward_vlan(10) action + // NAT: 2 entries (untagged + tagged) for VLAN isolation + let update_add_vlan = types::MulticastGroupUpdateExternalEntry { + internal_forwarding: types::InternalForwarding { + nat_target: Some(nat_target.clone()), + }, + external_forwarding: types::ExternalForwarding { vlan_id: Some(10) }, + sources: None, + }; + + let updated = switch + .client + .multicast_group_update_external( + &group_ip, + &make_tag(tag), + &update_add_vlan, + ) + .await + .expect("Should update group to add VLAN"); + assert_eq!(updated.into_inner().external_forwarding.vlan_id, Some(10)); + + let table = switch + .client + .table_dump("pipe.Ingress.l3_router.MulticastRouter4.tbl") + .await? + .into_inner(); + assert_eq!( + count_entries_for_ip(&table.entries, test_ip), + 1, + "Route table uses dst_addr only - 1 entry per group" + ); + assert!( + has_vlan_action_for_ip(&table.entries, test_ip, 10), + "Route entry should have forward_vlan(10) action" + ); + + // Verify NAT table has 2 entries for VLAN isolation + let nat_table = switch + .client + .table_dump("pipe.Ingress.nat_ingress.ingress_ipv4_mcast") + .await? + .into_inner(); + assert_eq!( + count_entries_for_ip(&nat_table.entries, test_ip), + 2, + "NAT table should have 2 entries for VLAN group (untagged + tagged)" + ); + + // Case: Update to CHANGE VLAN 10 -> 20 + // Route: same 1 entry, action changes to forward_vlan(20) + // NAT: 2 entries with new VLAN, no stale entries + let update_change_vlan = types::MulticastGroupUpdateExternalEntry { + internal_forwarding: types::InternalForwarding { + nat_target: Some(nat_target.clone()), + }, + external_forwarding: types::ExternalForwarding { vlan_id: Some(20) }, + sources: None, + }; + + let updated = switch + .client + .multicast_group_update_external( + &group_ip, + &make_tag(tag), + &update_change_vlan, + ) + .await + .expect("Should update group to change VLAN"); + assert_eq!(updated.into_inner().external_forwarding.vlan_id, Some(20)); + + let table = switch + .client + .table_dump("pipe.Ingress.l3_router.MulticastRouter4.tbl") + .await? + .into_inner(); + assert_eq!( + count_entries_for_ip(&table.entries, test_ip), + 1, + "Route table uses dst_addr only - 1 entry per group" + ); + assert!( + has_vlan_action_for_ip(&table.entries, test_ip, 20), + "Route entry should have forward_vlan(20) action" + ); + assert!( + !has_vlan_action_for_ip(&table.entries, test_ip, 10), + "Should NOT have stale forward_vlan(10) action" + ); + + // NAT table should still have 2 entries + let nat_table = switch + .client + .table_dump("pipe.Ingress.nat_ingress.ingress_ipv4_mcast") + .await? + .into_inner(); + assert_eq!( + count_entries_for_ip(&nat_table.entries, test_ip), + 2, + "NAT table should have 2 entries for VLAN group" + ); + + // Case: Update to REMOVE VLAN + // Route: 1 entry with forward action (no VLAN) + // NAT: 1 entry (untagged only) + let update_remove_vlan = types::MulticastGroupUpdateExternalEntry { + internal_forwarding: types::InternalForwarding { + nat_target: Some(nat_target.clone()), + }, + external_forwarding: types::ExternalForwarding { vlan_id: None }, + sources: None, + }; + + let updated = switch + .client + .multicast_group_update_external( + &group_ip, + &make_tag(tag), + &update_remove_vlan, + ) + .await + .expect("Should update group to remove VLAN"); + assert_eq!(updated.into_inner().external_forwarding.vlan_id, None); + + let table = switch + .client + .table_dump("pipe.Ingress.l3_router.MulticastRouter4.tbl") + .await? + .into_inner(); + + assert_eq!( + count_entries_for_ip(&table.entries, test_ip), + 1, + "Route table uses dst_addr only - 1 entry per group" + ); + + assert!( + !has_vlan_action_for_ip(&table.entries, test_ip, 20), + "Should NOT have forward_vlan action after VLAN removal" + ); + + // NAT table should have 1 entry now (no VLAN = untagged only) + let nat_table = switch + .client + .table_dump("pipe.Ingress.nat_ingress.ingress_ipv4_mcast") + .await? + .into_inner(); + assert_eq!( + count_entries_for_ip(&nat_table.entries, test_ip), + 1, + "NAT table should have 1 entry after VLAN removal" + ); + + // Cleanup + switch + .client + .multicast_reset_by_tag(&make_tag(tag)) + .await + .expect("Should cleanup by tag"); + + Ok(()) +} + /// Tests that update operations validate tags. /// /// When updating a multicast group, the provided tag must match the existing @@ -6945,8 +7656,12 @@ async fn test_tag_validation() -> TestResult { _ => panic!("Expected ErrorResponse for tag mismatch"), } - cleanup_test_group(switch, external_ip, TAG_A).await.unwrap(); - cleanup_test_group(switch, internal_ip, TEST_TAG).await.unwrap(); + cleanup_test_group(switch, external_ip, TAG_A) + .await + .unwrap(); + cleanup_test_group(switch, internal_ip, TEST_TAG) + .await + .unwrap(); // Case: Case-sensitive tag matching diff --git a/dpd/p4/sidecar.p4 b/dpd/p4/sidecar.p4 index 2d5c149..5aa723b 100644 --- a/dpd/p4/sidecar.p4 +++ b/dpd/p4/sidecar.p4 @@ -612,8 +612,16 @@ control NatIngress ( } // Separate table for IPv4 multicast packets that need to be encapsulated. + // Groups without VLAN match untagged only. Groups with VLAN match both + // untagged (for decapsulated Geneve from underlay) and correctly tagged. + // Packets with wrong VLAN miss and are not NAT encapsulated. + // When hdr.vlan.isValid()==false, vlan_id matches as 0. table ingress_ipv4_mcast { - key = { hdr.ipv4.dst_addr : exact; } + key = { + hdr.ipv4.dst_addr : exact; + hdr.vlan.isValid() : exact; + hdr.vlan.vlan_id : exact; + } actions = { mcast_forward_ipv4_to; } const size = IPV4_MULTICAST_TABLE_SIZE; counters = mcast_ipv4_ingress_ctr; @@ -630,8 +638,16 @@ control NatIngress ( } // Separate table for IPv6 multicast packets that need to be encapsulated. + // Groups without VLAN match untagged only. Groups with VLAN match both + // untagged (for decapsulated Geneve from underlay) and correctly tagged. + // Packets with wrong VLAN miss and are not NAT encapsulated. + // When hdr.vlan.isValid()==false, vlan_id matches as 0. table ingress_ipv6_mcast { - key = { hdr.ipv6.dst_addr : exact; } + key = { + hdr.ipv6.dst_addr : exact; + hdr.vlan.isValid() : exact; + hdr.vlan.vlan_id : exact; + } actions = { mcast_forward_ipv6_to; } const size = IPV6_MULTICAST_TABLE_SIZE; counters = mcast_ipv6_ingress_ctr; @@ -1311,7 +1327,9 @@ control MulticastRouter4( apply { // If the packet came in with a VLAN tag, we need to invalidate // the VLAN header before we do the lookup. The VLAN header - // will be re-attached if set in the forward_vlan action. + // will be re-attached if set in the forward_vlan action (or + // untagged for groups without VLAN). This prevents unintended + // VLAN translation. if (hdr.vlan.isValid()) { hdr.ethernet.ether_type = hdr.vlan.ether_type; hdr.vlan.setInvalid(); @@ -1449,7 +1467,9 @@ control MulticastRouter6 ( apply { // If the packet came in with a VLAN tag, we need to invalidate // the VLAN header before we do the lookup. The VLAN header - // will be re-attached if set in the forward_vlan action. + // will be re-attached if set in the forward_vlan action (or + // untagged for groups without VLAN). This prevents unintended + // VLAN translation. if (hdr.vlan.isValid()) { hdr.ethernet.ether_type = hdr.vlan.ether_type; hdr.vlan.setInvalid(); diff --git a/dpd/src/counters.rs b/dpd/src/counters.rs index dfad527..d10f34a 100644 --- a/dpd/src/counters.rs +++ b/dpd/src/counters.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/ // -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company /// This module contains the support for reading the indirect counters defined /// by the p4 program. While direct counters are attached to an existing table, @@ -284,6 +284,7 @@ enum DropReason { GeneveOptionsTooLong, GeneveOptionMalformed, GeneveOptionUnknown, + VlanMismatch, } impl TryFrom for DropReason { @@ -317,6 +318,7 @@ impl TryFrom for DropReason { 23 => Ok(DropReason::GeneveOptionsTooLong), 24 => Ok(DropReason::GeneveOptionMalformed), 25 => Ok(DropReason::GeneveOptionUnknown), + 26 => Ok(DropReason::VlanMismatch), x => Err(format!("Unrecognized drop reason: {x}")), } } @@ -343,8 +345,8 @@ fn reason_label(ctr: u8) -> Result, String> { DropReason::Ipv4TtlExceeded => "ipv4_ttl_exceeded".to_string(), DropReason::Ipv6TtlInvalid => "ipv6_ttl_invalid".to_string(), DropReason::Ipv6TtlExceeded => "ipv6_ttl_exceeded".to_string(), - DropReason::Ipv4Unrouteable => "ipv6_unrouteable".to_string(), - DropReason::Ipv6Unrouteable => "ipv4_unrouteable".to_string(), + DropReason::Ipv4Unrouteable => "ipv4_unrouteable".to_string(), + DropReason::Ipv6Unrouteable => "ipv6_unrouteable".to_string(), DropReason::NatIngressMiss => "nat_ingress_miss".to_string(), DropReason::MulticastNoGroup => "multicast_no_group".to_string(), DropReason::MulticastInvalidMac => "multicast_invalid_mac".to_string(), @@ -362,6 +364,7 @@ fn reason_label(ctr: u8) -> Result, String> { "geneve_option_malformed".to_string() } DropReason::GeneveOptionUnknown => "geneve_option_unknown".to_string(), + DropReason::VlanMismatch => "vlan_mismatch".to_string(), }; Ok(Some(label)) } diff --git a/dpd/src/mcast/mod.rs b/dpd/src/mcast/mod.rs index 230fc1c..e2f88a2 100644 --- a/dpd/src/mcast/mod.rs +++ b/dpd/src/mcast/mod.rs @@ -260,8 +260,10 @@ impl MulticastGroupData { /// Returns a ScopedGroupId that will automatically return the ID to the /// free pool when dropped. fn generate_group_id(&mut self) -> DpdResult { - let mut pool = - self.free_group_ids.lock().expect("group ID pool lock poisoned"); + let mut pool = self + .free_group_ids + .lock() + .expect("group ID pool lock poisoned"); let id = pool.pop().ok_or_else(|| { DpdError::ResourceExhausted( "no free multicast group IDs available (exhausted range 100-65534)".to_string(), @@ -277,7 +279,8 @@ impl MulticastGroupData { external_group_ip: IpAddr, admin_scoped_ip: UnderlayMulticastIpv6, ) { - self.nat_target_refs.insert(admin_scoped_ip, external_group_ip); + self.nat_target_refs + .insert(admin_scoped_ip, external_group_ip); } /// Remove 1:1 forwarding reference. @@ -351,6 +354,7 @@ pub(crate) fn add_group_external( scoped_underlay_id.id(), nat_target, group_info.sources.as_deref(), + group_info.external_forwarding.vlan_id, ); // Configure external tables and handle VLAN propagation @@ -567,8 +571,10 @@ pub(crate) fn del_group( let group = mcast.groups.remove(&group_ip).unwrap(); - let nat_target_to_remove = - group.int_fwding.nat_target.map(|nat| nat.internal_ip.into()); + let nat_target_to_remove = group + .int_fwding + .nat_target + .map(|nat| nat.internal_ip.into()); debug!(s.log, "deleting multicast group for IP {group_ip}"); @@ -642,11 +648,14 @@ pub(crate) fn modify_group_external( validate_tag(&existing_group.tag, tag)?; let nat_target = - new_group_info.internal_forwarding.nat_target.ok_or_else(|| { - DpdError::Invalid( - "external groups must have NAT target".to_string(), - ) - })?; + new_group_info + .internal_forwarding + .nat_target + .ok_or_else(|| { + DpdError::Invalid( + "external groups must have NAT target".to_string(), + ) + })?; validate_multicast_address(group_ip, new_group_info.sources.as_deref())?; validate_nat_target(nat_target)?; @@ -656,8 +665,12 @@ pub(crate) fn modify_group_external( // Create rollback context for external group update let group_entry_for_rollback = group_entry.clone(); - let rollback_ctx = - GroupUpdateRollbackContext::new(s, group_ip, &group_entry_for_rollback); + let rollback_ctx = GroupUpdateRollbackContext::new( + s, + group_ip, + &group_entry_for_rollback, + new_group_info.external_forwarding.vlan_id, + ); // Pre-compute normalized sources for rollback purposes let normalized_sources = normalize_sources(new_group_info.sources.clone()); @@ -693,10 +706,9 @@ pub(crate) fn modify_group_external( updated_group.int_fwding.nat_target = Some(nat_target); let old_vlan_id = updated_group.ext_fwding.vlan_id; - updated_group.ext_fwding.vlan_id = new_group_info - .external_forwarding - .vlan_id - .or(updated_group.ext_fwding.vlan_id); + // VLAN is assigned directly -> Some(x) sets VLAN, None removes VLAN + updated_group.ext_fwding.vlan_id = + new_group_info.external_forwarding.vlan_id; updated_group.sources = normalize_sources( new_group_info.sources.clone().or(updated_group.sources), ); @@ -789,9 +801,11 @@ pub(crate) fn modify_group_internal( s, group_ip.into(), &group_entry_for_rollback, + // Internal groups don't have VLANs, so `attempted_vlan_id` is None. + None, ); - // Internal groups don't update sources - always `None` + // Internal groups don't update sources, so always `None` debug_assert!( group_entry.sources.is_none(), "Internal groups should not have sources" @@ -1182,7 +1196,7 @@ pub(super) fn sources_contain_any(sources: &[IpSrc]) -> bool { /// This ensures uniqueness across the group's lifecycle and prevents /// tag collision when group IPs are reused after deletion. fn generate_default_tag(group_ip: IpAddr) -> String { - format!("{}:{}", Uuid::new_v4(), group_ip) + format!("{}:{group_ip}", Uuid::new_v4()) } /// Multiple representations map to "allow any source" in P4: @@ -1357,12 +1371,17 @@ fn configure_external_tables( add_source_filters(s, group_ip, group_info.sources.as_deref())?; // Add NAT entry + let vlan_id = group_info.external_forwarding.vlan_id; match group_ip { IpAddr::V4(ipv4) => { - table::mcast::mcast_nat::add_ipv4_entry(s, ipv4, nat_target)?; + table::mcast::mcast_nat::add_ipv4_entry( + s, ipv4, nat_target, vlan_id, + )?; } IpAddr::V6(ipv6) => { - table::mcast::mcast_nat::add_ipv6_entry(s, ipv6, nat_target)?; + table::mcast::mcast_nat::add_ipv6_entry( + s, ipv6, nat_target, vlan_id, + )?; } } @@ -1439,12 +1458,14 @@ fn create_asic_group( group_id: MulticastGroupId, group_ip: IpAddr, ) -> DpdResult<()> { - s.asic_hdl.mc_group_create(group_id).map_err(|e: AsicError| { - DpdError::McastGroupFailure(format!( - "failed to create multicast group for IP {group_ip} with ID \ + s.asic_hdl + .mc_group_create(group_id) + .map_err(|e: AsicError| { + DpdError::McastGroupFailure(format!( + "failed to create multicast group for IP {group_ip} with ID \ {group_id}: {e:?}" - )) - }) + )) + }) } fn add_ports_to_groups( @@ -1493,7 +1514,9 @@ fn process_membership_changes( ) -> DpdResult<(Vec, Vec)> { // First validate that IPv4 doesn't have underlay members if group_ip.is_ipv4() - && new_members.iter().any(|m| m.direction == Direction::Underlay) + && new_members + .iter() + .any(|m| m.direction == Direction::Underlay) { return Err(DpdError::Invalid(format!( "multicast group for IPv4 {group_ip} cannot have underlay members" @@ -1688,39 +1711,48 @@ fn update_external_tables( add_source_filters(s, group_ip, new_sources.as_deref())?; } + let old_vlan_id = group_entry.ext_fwding.vlan_id; + let new_vlan_id = new_group_info.external_forwarding.vlan_id; + // Update NAT target - external groups always have NAT targets - if Some(new_group_info.internal_forwarding.nat_target.ok_or_else(|| { - DpdError::Invalid("external groups must have NAT target".to_string()) - })?) != group_entry.int_fwding.nat_target + // Also handles VLAN changes since VLAN is part of the NAT match key + let new_nat_target = new_group_info + .internal_forwarding + .nat_target + .ok_or_else(|| { + DpdError::Invalid( + "external groups must have NAT target".to_string(), + ) + })?; + + if Some(new_nat_target) != group_entry.int_fwding.nat_target + || old_vlan_id != new_vlan_id { update_nat_tables( s, group_ip, - Some(new_group_info.internal_forwarding.nat_target.ok_or_else( - || { - DpdError::Invalid( - "external groups must have NAT target".to_string(), - ) - }, - )?), + Some(new_nat_target), group_entry.int_fwding.nat_target, + old_vlan_id, + new_vlan_id, )?; } - // Update VLAN if it changed - if new_group_info.external_forwarding.vlan_id - != group_entry.ext_fwding.vlan_id - { + // Update route table VLAN if it changed + // Route tables use simple dst_addr matching but select forward vs forward_vlan action + if old_vlan_id != new_vlan_id { match group_ip { IpAddr::V4(ipv4) => table::mcast::mcast_route::update_ipv4_entry( s, ipv4, - new_group_info.external_forwarding.vlan_id, + old_vlan_id, + new_vlan_id, ), IpAddr::V6(ipv6) => table::mcast::mcast_route::update_ipv6_entry( s, ipv6, - new_group_info.external_forwarding.vlan_id, + old_vlan_id, + new_vlan_id, ), }?; } @@ -1811,7 +1843,11 @@ fn delete_group_tables( // (which have NAT targets). if group.int_fwding.nat_target.is_some() { remove_ipv4_source_filters(s, ipv4, group.sources.as_deref())?; - table::mcast::mcast_nat::del_ipv4_entry(s, ipv4)?; + table::mcast::mcast_nat::del_ipv4_entry( + s, + ipv4, + group.ext_fwding.vlan_id, + )?; } delete_group_bitmap_entries(s, group)?; @@ -1825,7 +1861,11 @@ fn delete_group_tables( // NAT targets). Internal groups don't have source filtering. if group.int_fwding.nat_target.is_some() { remove_ipv6_source_filters(s, ipv6, group.sources.as_deref())?; - table::mcast::mcast_nat::del_ipv6_entry(s, ipv6)?; + table::mcast::mcast_nat::del_ipv6_entry( + s, + ipv6, + group.ext_fwding.vlan_id, + )?; } delete_group_bitmap_entries(s, group)?; @@ -1863,31 +1903,47 @@ fn update_nat_tables( group_ip: IpAddr, new_nat_target: Option, old_nat_target: Option, + old_vlan_id: Option, + new_vlan_id: Option, ) -> DpdResult<()> { match (group_ip, new_nat_target, old_nat_target) { - (IpAddr::V4(ipv4), Some(nat), Some(_)) => { - // NAT to NAT - update existing entry - table::mcast::mcast_nat::update_ipv4_entry(s, ipv4, nat) + (IpAddr::V4(ipv4), Some(new_nat), Some(old_nat)) => { + // NAT to NAT - update existing entry (handles VLAN changes internally) + table::mcast::mcast_nat::update_ipv4_entry( + s, + ipv4, + new_nat, + old_nat, + old_vlan_id, + new_vlan_id, + ) } - (IpAddr::V6(ipv6), Some(nat), Some(_)) => { - // NAT to NAT - update existing entry - table::mcast::mcast_nat::update_ipv6_entry(s, ipv6, nat) + (IpAddr::V6(ipv6), Some(new_nat), Some(old_nat)) => { + // NAT to NAT - update existing entry (handles VLAN changes internally) + table::mcast::mcast_nat::update_ipv6_entry( + s, + ipv6, + new_nat, + old_nat, + old_vlan_id, + new_vlan_id, + ) } (IpAddr::V4(ipv4), Some(nat), None) => { // No NAT to NAT - add new entry - table::mcast::mcast_nat::add_ipv4_entry(s, ipv4, nat) + table::mcast::mcast_nat::add_ipv4_entry(s, ipv4, nat, new_vlan_id) } (IpAddr::V6(ipv6), Some(nat), None) => { // No NAT to NAT - add new entry - table::mcast::mcast_nat::add_ipv6_entry(s, ipv6, nat) + table::mcast::mcast_nat::add_ipv6_entry(s, ipv6, nat, new_vlan_id) } (IpAddr::V4(ipv4), None, Some(_)) => { // NAT to no NAT - delete entry - table::mcast::mcast_nat::del_ipv4_entry(s, ipv4) + table::mcast::mcast_nat::del_ipv4_entry(s, ipv4, old_vlan_id) } (IpAddr::V6(ipv6), None, Some(_)) => { // NAT to no NAT - delete entry - table::mcast::mcast_nat::del_ipv6_entry(s, ipv6) + table::mcast::mcast_nat::del_ipv6_entry(s, ipv6, old_vlan_id) } _ => Ok(()), // No change (None → None) } @@ -1934,33 +1990,49 @@ fn update_internal_group_bitmap_tables( /// Only updates the external bitmap entry since that's the only bitmap /// entry created during group configuration. The underlay replication /// is handled separately via the ASIC's multicast group primitives. +/// +/// # Arguments +/// +/// * `s` - Switch instance for table operations. +/// * `group_ip` - IP address of the multicast group. +/// * `external_group_id` - ID of the external multicast group for bitmap updates. +/// * `members` - Group members used to recreate port bitmap. +/// * `current_vlan_id` - VLAN currently in the table (may be the attempted new VLAN). +/// * `target_vlan_id` - VLAN to restore to (the original VLAN). fn update_fwding_tables( s: &Switch, group_ip: IpAddr, external_group_id: MulticastGroupId, members: &[MulticastGroupMember], - vlan_id: Option, + current_vlan_id: Option, + target_vlan_id: Option, ) -> DpdResult<()> { match group_ip { - IpAddr::V4(ipv4) => { - table::mcast::mcast_route::update_ipv4_entry(s, ipv4, vlan_id) - } - IpAddr::V6(ipv6) => { - table::mcast::mcast_route::update_ipv6_entry(s, ipv6, vlan_id) - .and_then(|_| { - // Update external bitmap for external members - // (only external bitmap entries exist; underlay replication - // uses ASIC multicast groups directly) - let external_port_bitmap = - create_port_bitmap(members, Direction::External); - table::mcast::mcast_egress::update_bitmap_entry( - s, - external_group_id, - &external_port_bitmap, - vlan_id, - ) - }) - } + IpAddr::V4(ipv4) => table::mcast::mcast_route::update_ipv4_entry( + s, + ipv4, + current_vlan_id, + target_vlan_id, + ), + IpAddr::V6(ipv6) => table::mcast::mcast_route::update_ipv6_entry( + s, + ipv6, + current_vlan_id, + target_vlan_id, + ) + .and_then(|_| { + // Update external bitmap for external members + // (only external bitmap entries exist, underlay replication + // uses ASIC multicast groups directly) + let external_port_bitmap = + create_port_bitmap(members, Direction::External); + table::mcast::mcast_egress::update_bitmap_entry( + s, + external_group_id, + &external_port_bitmap, + target_vlan_id, + ) + }), } } diff --git a/dpd/src/mcast/rollback.rs b/dpd/src/mcast/rollback.rs index 722a1f1..ac63827 100644 --- a/dpd/src/mcast/rollback.rs +++ b/dpd/src/mcast/rollback.rs @@ -57,12 +57,12 @@ trait RollbackOps { ) -> DpdResult<()> { self.log_rollback_error( "remove new source filters", - &format!("for group {}", self.group_ip()), + &format!("for group {group_ip}", group_ip = self.group_ip()), remove_source_filters(self.switch(), self.group_ip(), new_sources), ); self.log_rollback_error( "restore original source filters", - &format!("for group {}", self.group_ip()), + &format!("for group {group_ip}", group_ip = self.group_ip()), add_source_filters(self.switch(), self.group_ip(), orig_sources), ); Ok(()) @@ -128,6 +128,7 @@ pub(crate) struct GroupCreateRollbackContext<'a> { underlay_id: MulticastGroupId, nat_target: Option, sources: Option<&'a [IpSrc]>, + vlan_id: Option, } impl RollbackOps for GroupCreateRollbackContext<'_> { @@ -253,6 +254,7 @@ impl<'a> GroupCreateRollbackContext<'a> { underlay_id: MulticastGroupId, nat_target: NatTarget, sources: Option<&'a [IpSrc]>, + vlan_id: Option, ) -> Self { Self { switch, @@ -261,6 +263,7 @@ impl<'a> GroupCreateRollbackContext<'a> { underlay_id, nat_target: Some(nat_target), sources, + vlan_id, } } @@ -278,6 +281,7 @@ impl<'a> GroupCreateRollbackContext<'a> { underlay_id, nat_target: None, sources: None, + vlan_id: None, } } @@ -343,16 +347,18 @@ impl<'a> GroupCreateRollbackContext<'a> { self.log_rollback_error( "remove external multicast group", &format!( - "for IP {} with ID {}", - self.group_ip, self.external_id + "for IP {group_ip} with ID {external_id}", + group_ip = self.group_ip, + external_id = self.external_id ), self.switch.asic_hdl.mc_group_destroy(self.external_id), ); self.log_rollback_error( "remove underlay multicast group", &format!( - "for IP {} with ID {}", - self.group_ip, self.underlay_id + "for IP {group_ip} with ID {underlay_id}", + group_ip = self.group_ip, + underlay_id = self.underlay_id ), self.switch.asic_hdl.mc_group_destroy(self.underlay_id), ); @@ -417,6 +423,7 @@ impl<'a> GroupCreateRollbackContext<'a> { table::mcast::mcast_nat::del_ipv4_entry( self.switch, ipv4, + self.vlan_id, ), ); } @@ -434,7 +441,10 @@ impl<'a> GroupCreateRollbackContext<'a> { // (bitmap entries are only created for internal groups with both group types) self.log_rollback_error( "delete IPv6 egress bitmap entry", - &format!("for external group {}", self.external_id), + &format!( + "for external group {external_id}", + external_id = self.external_id + ), table::mcast::mcast_egress::del_bitmap_entry( self.switch, self.external_id, @@ -507,6 +517,7 @@ impl<'a> GroupCreateRollbackContext<'a> { table::mcast::mcast_nat::del_ipv6_entry( self.switch, ipv6, + self.vlan_id, ), ); } @@ -529,6 +540,10 @@ pub(crate) struct GroupUpdateRollbackContext<'a> { switch: &'a Switch, group_ip: IpAddr, original_group: &'a MulticastGroup, + /// The VLAN that the update is attempting to set. This may differ from + /// `original_group.ext_fwding.vlan_id` when a VLAN change is in progress. + /// Used during rollback to delete entries that may have been added. + attempted_vlan_id: Option, } impl RollbackOps for GroupUpdateRollbackContext<'_> { @@ -559,7 +574,7 @@ impl RollbackOps for GroupUpdateRollbackContext<'_> { return Ok(()); } - // Internal group - perform actual port rollback + // Internal group -> perform actual port rollback debug!( self.switch.log, "rolling back multicast group update"; @@ -707,12 +722,26 @@ impl RollbackOps for GroupUpdateRollbackContext<'_> { impl<'a> GroupUpdateRollbackContext<'a> { /// Create rollback context for group update operations. + /// + /// # Arguments + /// + /// * `switch` - Switch instance for table operations. + /// * `group_ip` - IP address of the multicast group being updated. + /// * `original_group` - Reference to the group's state before the update. + /// * `attempted_vlan_id` - The VLAN that the update is attempting to set. + /// Used during rollback to delete entries that may have been added. pub(crate) fn new( switch: &'a Switch, group_ip: IpAddr, original_group: &'a MulticastGroup, + attempted_vlan_id: Option, ) -> Self { - Self { switch, group_ip, original_group } + Self { + switch, + group_ip, + original_group, + attempted_vlan_id, + } } /// Restore tables and return error. @@ -740,7 +769,7 @@ impl<'a> GroupUpdateRollbackContext<'a> { if let Some(replication_info) = replication_info { self.log_rollback_error( "restore replication settings", - &format!("for group {}", self.group_ip), + &format!("for group {group_ip}", group_ip = self.group_ip), update_replication_tables( self.switch, self.group_ip, @@ -751,54 +780,57 @@ impl<'a> GroupUpdateRollbackContext<'a> { ); } - // Restore NAT settings - match (self.group_ip, nat_target) { - (IpAddr::V4(ipv4), Some(nat)) => { - self.log_rollback_error( - "restore IPv4 NAT settings", - &format!("for group {}", self.group_ip), - table::mcast::mcast_nat::update_ipv4_entry( - self.switch, - ipv4, - nat, - ), - ); - } - (IpAddr::V6(ipv6), Some(nat)) => { - self.log_rollback_error( - "restore IPv6 NAT settings", - &format!("for group {}", self.group_ip), - table::mcast::mcast_nat::update_ipv6_entry( - self.switch, - ipv6, - nat, - ), - ); - } - (IpAddr::V4(ipv4), None) => { - self.log_rollback_error( - "remove IPv4 NAT settings", - &format!("for group {}", self.group_ip), - table::mcast::mcast_nat::del_ipv4_entry(self.switch, ipv4), - ); - } - (IpAddr::V6(ipv6), None) => { - self.log_rollback_error( - "remove IPv6 NAT settings", - &format!("for group {}", self.group_ip), - table::mcast::mcast_nat::del_ipv6_entry(self.switch, ipv6), - ); + // Restore NAT settings for external groups (internal groups have no + // NAT entries). Both new_tgt and old_tgt are the same original NAT + // target since we're restoring to the original state. + if let Some(nat) = nat_target { + match self.group_ip { + IpAddr::V4(ipv4) => { + self.log_rollback_error( + "restore IPv4 NAT settings", + &format!( + "for group {group_ip}", + group_ip = self.group_ip + ), + table::mcast::mcast_nat::update_ipv4_entry( + self.switch, + ipv4, + nat, + nat, + self.attempted_vlan_id, + vlan_id, + ), + ); + } + IpAddr::V6(ipv6) => { + self.log_rollback_error( + "restore IPv6 NAT settings", + &format!( + "for group {group_ip}", + group_ip = self.group_ip + ), + table::mcast::mcast_nat::update_ipv6_entry( + self.switch, + ipv6, + nat, + nat, + self.attempted_vlan_id, + vlan_id, + ), + ); + } } } self.log_rollback_error( "restore VLAN settings", - &format!("for group {}", self.group_ip), + &format!("for group {group_ip}", group_ip = self.group_ip), update_fwding_tables( self.switch, self.group_ip, external_group_id, &prev_members, + self.attempted_vlan_id, vlan_id, ), ); diff --git a/dpd/src/mcast/validate.rs b/dpd/src/mcast/validate.rs index 0dbdd4c..745b636 100644 --- a/dpd/src/mcast/validate.rs +++ b/dpd/src/mcast/validate.rs @@ -14,7 +14,6 @@ use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; use super::IpSrc; use crate::types::{DpdError, DpdResult}; use common::network::NatTarget; -use dpd_api::MulticastTag; use omicron_common::address::{ IPV4_LINK_LOCAL_MULTICAST_SUBNET, IPV4_SSM_SUBNET, IPV6_INTERFACE_LOCAL_MULTICAST_SUBNET, IPV6_LINK_LOCAL_MULTICAST_SUBNET, @@ -52,16 +51,16 @@ pub(crate) fn validate_multicast_address( pub(crate) fn validate_nat_target(nat_target: NatTarget) -> DpdResult<()> { if !nat_target.inner_mac.is_multicast() { return Err(DpdError::Invalid(format!( - "NAT target inner MAC address {} is not a multicast MAC address", - nat_target.inner_mac + "NAT target inner MAC address {inner_mac} is not a multicast MAC address", + inner_mac = nat_target.inner_mac ))); } if !UNDERLAY_MULTICAST_SUBNET.contains(nat_target.internal_ip) { return Err(DpdError::Invalid(format!( - "NAT target internal IP address {} is not in the reserved \ + "NAT target internal IP address {internal_ip} is not in the reserved \ underlay multicast subnet (ff04::/64)", - nat_target.internal_ip + internal_ip = nat_target.internal_ip ))); } @@ -230,17 +229,42 @@ fn validate_ipv6_source_address(ipv6: Ipv6Addr) -> DpdResult<()> { Ok(()) } +/// Maximum length for multicast group tags. +/// +/// Keep in sync with Omicron's database schema column type for multicast group +/// tags. This is sized to accommodate the auto-generated format +/// `{uuid}:{group_ip}` for both IPv4 and IPv6 group IPs. +const MAX_TAG_LENGTH: usize = 80; + /// Validates tag format for group creation. /// -/// Delegates to [`MulticastTag::from_str`] which enforces: -/// - Length: 1-80 ASCII bytes -/// - Characters: alphanumeric, hyphens, underscores, colons, or periods +/// Tags must be 1-80 ASCII bytes containing only alphanumeric characters, +/// hyphens, underscores, colons, or periods. +/// +/// This character set is compatible with URL path segments, though colons are +/// RFC 3986 reserved characters and may require percent-encoding in some HTTP +/// client contexts. /// /// Auto-generated tags use the format `{uuid}:{group_ip}`. pub(crate) fn validate_tag_format(tag: &str) -> DpdResult<()> { - tag.parse::() - .map(|_| ()) - .map_err(|e| DpdError::Invalid(e.to_string())) + if tag.is_empty() { + return Err(DpdError::Invalid("tag cannot be empty".to_string())); + } + if tag.len() > MAX_TAG_LENGTH { + return Err(DpdError::Invalid(format!( + "tag cannot exceed {MAX_TAG_LENGTH} bytes" + ))); + } + if !tag.bytes().all(|b| { + b.is_ascii_alphanumeric() || matches!(b, b'-' | b'_' | b':' | b'.') + }) { + return Err(DpdError::Invalid( + "tag must contain only ASCII alphanumeric characters, hyphens, \ + underscores, colons, or periods" + .to_string(), + )); + } + Ok(()) } /// Validates that the request tag matches the existing group's tag. @@ -264,7 +288,6 @@ pub(crate) fn validate_tag( mod tests { use super::*; use common::network::{MacAddr, Vni}; - use dpd_api::MAX_TAG_LENGTH; /// Admin-local IPv6 multicast prefix (ff04::/16, scope 4). const ADMIN_LOCAL_PREFIX: u16 = 0xff04; diff --git a/dpd/src/table/mcast/mcast_nat.rs b/dpd/src/table/mcast/mcast_nat.rs index 62c5cd9..280edd8 100644 --- a/dpd/src/table/mcast/mcast_nat.rs +++ b/dpd/src/table/mcast/mcast_nat.rs @@ -10,8 +10,7 @@ use std::net::{Ipv4Addr, Ipv6Addr}; use crate::{Switch, table::*}; -use super::{Ipv4MatchKey, Ipv6MatchKey}; - +use super::{Ipv4VlanMatchKey, Ipv6VlanMatchKey}; use aal::ActionParse; use aal_macros::*; use common::network::{MacAddr, NatTarget}; @@ -27,70 +26,263 @@ pub(crate) const IPV6_TABLE_NAME: &str = #[derive(ActionParse, Debug)] enum Ipv4Action { #[action_xlate(name = "mcast_forward_ipv4_to")] - Forward { target: Ipv6Addr, inner_mac: MacAddr, vni: u32 }, + Forward { + target: Ipv6Addr, + inner_mac: MacAddr, + vni: u32, + }, } #[derive(ActionParse, Debug)] enum Ipv6Action { #[action_xlate(name = "mcast_forward_ipv6_to")] - Forward { target: Ipv6Addr, inner_mac: MacAddr, vni: u32 }, + Forward { + target: Ipv6Addr, + inner_mac: MacAddr, + vni: u32, + }, } -/// Add a NAT entry for IPv4 multicast traffic, keyed on `ip`. +/// Add NAT entries for IPv4 multicast traffic. +/// +/// For groups with a VLAN, two entries are added: +/// 1. Untagged ingress match -> forward (for decapsulated Geneve packets) +/// 2. Correctly tagged ingress match -> forward (for already-tagged packets) +/// +/// This allows both packet types to match, while packets with the wrong VLAN +/// will miss both entries and not be NAT encapsulated. pub(crate) fn add_ipv4_entry( s: &Switch, ip: Ipv4Addr, tgt: NatTarget, + vlan_id: Option, ) -> DpdResult<()> { - let match_key = Ipv4MatchKey::new(ip); let action_key = Ipv4Action::Forward { target: tgt.internal_ip, inner_mac: tgt.inner_mac, vni: tgt.vni.as_u32(), }; - debug!(s.log, "add ingress mcast entry {} -> {:?}", match_key, action_key); - - s.table_entry_add(TableType::NatIngressIpv4Mcast, &match_key, &action_key) + match vlan_id { + None => { + // Untagged only + let match_key = Ipv4VlanMatchKey::new(ip, None); + debug!( + s.log, + "add ingress mcast entry {} -> {:?}", match_key, action_key + ); + s.table_entry_add( + TableType::NatIngressIpv4Mcast, + &match_key, + &action_key, + ) + } + Some(vid) => { + common::network::validate_vlan(vid)?; + + // Untagged entry + let match_key_untagged = Ipv4VlanMatchKey::new(ip, None); + debug!( + s.log, + "add ingress mcast entry {} -> {:?}", + match_key_untagged, + action_key + ); + s.table_entry_add( + TableType::NatIngressIpv4Mcast, + &match_key_untagged, + &action_key, + )?; + + // Tagged entry + let match_key_tagged = Ipv4VlanMatchKey::new(ip, Some(vid)); + debug!( + s.log, + "add ingress mcast entry {} -> {:?}", + match_key_tagged, + action_key + ); + if let Err(e) = s.table_entry_add( + TableType::NatIngressIpv4Mcast, + &match_key_tagged, + &action_key, + ) { + // Rollback untagged entry + debug!(s.log, "rollback: removing untagged entry"); + let _ = s.table_entry_del( + TableType::NatIngressIpv4Mcast, + &match_key_untagged, + ); + return Err(e); + } + Ok(()) + } + } } -/// Update a NAT entry for IPv4 multicast traffic. +/// Update NAT entries for IPv4 multicast traffic. +/// +/// When VLAN changes, old entries are deleted and new ones added because +/// the VLAN is part of the match key and cannot be updated in place. +/// +/// # Arguments +/// +/// * `new_tgt` - NAT target for the new entries. +/// * `old_tgt` - NAT target for restoring entries on failure. Required when +/// VLAN changes since entries must be deleted and re-added. pub(crate) fn update_ipv4_entry( s: &Switch, ip: Ipv4Addr, - tgt: NatTarget, + new_tgt: NatTarget, + old_tgt: NatTarget, + old_vlan_id: Option, + new_vlan_id: Option, ) -> DpdResult<()> { - let match_key = Ipv4MatchKey::new(ip); - let action_key = Ipv4Action::Forward { - target: tgt.internal_ip, - inner_mac: tgt.inner_mac, - vni: tgt.vni.as_u32(), - }; - - debug!( - s.log, - "update ingress mcast entry {} -> {:?}", match_key, action_key - ); - - s.table_entry_update( - TableType::NatIngressIpv4Mcast, - &match_key, - &action_key, - ) + if old_vlan_id == new_vlan_id { + let action_key = Ipv4Action::Forward { + target: new_tgt.internal_ip, + inner_mac: new_tgt.inner_mac, + vni: new_tgt.vni.as_u32(), + }; + + let match_key_untagged = Ipv4VlanMatchKey::new(ip, None); + debug!( + s.log, + "update ingress mcast entry {} -> {:?}", + match_key_untagged, + action_key + ); + s.table_entry_update( + TableType::NatIngressIpv4Mcast, + &match_key_untagged, + &action_key, + )?; + + if let Some(vid) = old_vlan_id { + let match_key_tagged = Ipv4VlanMatchKey::new(ip, Some(vid)); + debug!( + s.log, + "update ingress mcast entry {} -> {:?}", + match_key_tagged, + action_key + ); + s.table_entry_update( + TableType::NatIngressIpv4Mcast, + &match_key_tagged, + &action_key, + )?; + } + return Ok(()); + } + + del_ipv4_entry_with_tgt(s, ip, old_tgt, old_vlan_id)?; + if let Err(e) = add_ipv4_entry(s, ip, new_tgt, new_vlan_id) { + // Restore deleted entries with old target + debug!(s.log, "add failed, restoring old NAT entries for {ip}"); + let _ = add_ipv4_entry(s, ip, old_tgt, old_vlan_id); + return Err(e); + } + Ok(()) } -/// Delete a NAT entry for IPv4 multicast traffic, keyed on `ip`. -pub(crate) fn del_ipv4_entry(s: &Switch, ip: Ipv4Addr) -> DpdResult<()> { - let match_key = Ipv4MatchKey::new(ip); - - debug!(s.log, "delete ingress mcast entry {}", match_key); +/// Delete NAT entries for IPv4 multicast traffic. +/// +/// Deletes both entries for VLAN groups (see `add_ipv4_entry` for details). +/// This version does not support rollback on partial failure. +pub(crate) fn del_ipv4_entry( + s: &Switch, + ip: Ipv4Addr, + vlan_id: Option, +) -> DpdResult<()> { + match vlan_id { + None => { + let match_key = Ipv4VlanMatchKey::new(ip, None); + debug!(s.log, "delete ingress mcast entry {}", match_key); + s.table_entry_del(TableType::NatIngressIpv4Mcast, &match_key) + } + Some(vid) => { + // Untagged + let match_key_untagged = Ipv4VlanMatchKey::new(ip, None); + debug!(s.log, "delete ingress mcast entry {}", match_key_untagged); + s.table_entry_del( + TableType::NatIngressIpv4Mcast, + &match_key_untagged, + )?; + + // Tagged + let match_key_tagged = Ipv4VlanMatchKey::new(ip, Some(vid)); + debug!(s.log, "delete ingress mcast entry {}", match_key_tagged); + if let Err(e) = s.table_entry_del( + TableType::NatIngressIpv4Mcast, + &match_key_tagged, + ) { + // Can't rollback without original action + debug!(s.log, "rollback not possible for untagged entry"); + return Err(e); + } + Ok(()) + } + } +} - s.table_entry_del(TableType::NatIngressIpv4Mcast, &match_key) +/// Delete NAT entries for IPv4 multicast traffic with rollback support. +/// +/// Deletes both entries for VLAN groups. If the tagged entry deletion fails +/// after the untagged entry was deleted, attempts to restore the untagged +/// entry using the provided NAT target. +pub(crate) fn del_ipv4_entry_with_tgt( + s: &Switch, + ip: Ipv4Addr, + tgt: NatTarget, + vlan_id: Option, +) -> DpdResult<()> { + match vlan_id { + None => { + let match_key = Ipv4VlanMatchKey::new(ip, None); + debug!(s.log, "delete ingress mcast entry {}", match_key); + s.table_entry_del(TableType::NatIngressIpv4Mcast, &match_key) + } + Some(vid) => { + // Delete untagged first + let match_key_untagged = Ipv4VlanMatchKey::new(ip, None); + debug!(s.log, "delete ingress mcast entry {}", match_key_untagged); + s.table_entry_del( + TableType::NatIngressIpv4Mcast, + &match_key_untagged, + )?; + + // Delete tagged + let match_key_tagged = Ipv4VlanMatchKey::new(ip, Some(vid)); + debug!(s.log, "delete ingress mcast entry {}", match_key_tagged); + if let Err(e) = s.table_entry_del( + TableType::NatIngressIpv4Mcast, + &match_key_tagged, + ) { + // Rollback: restore the untagged entry + debug!( + s.log, + "tagged delete failed, restoring untagged entry for {ip}" + ); + let action_key = Ipv4Action::Forward { + target: tgt.internal_ip, + inner_mac: tgt.inner_mac, + vni: tgt.vni.as_u32(), + }; + let _ = s.table_entry_add( + TableType::NatIngressIpv4Mcast, + &match_key_untagged, + &action_key, + ); + return Err(e); + } + Ok(()) + } + } } /// Dump the IPv4 NAT table's contents. pub(crate) fn ipv4_table_dump(s: &Switch) -> DpdResult { - s.table_dump::(TableType::NatIngressIpv4Mcast) + s.table_dump::(TableType::NatIngressIpv4Mcast) } /// Fetch the IPv4 NAT table's counters. @@ -98,7 +290,10 @@ pub(crate) fn ipv4_counter_fetch( s: &Switch, force_sync: bool, ) -> DpdResult> { - s.counter_fetch::(force_sync, TableType::NatIngressIpv4Mcast) + s.counter_fetch::( + force_sync, + TableType::NatIngressIpv4Mcast, + ) } /// Reset the Ipv4 NAT table. @@ -106,61 +301,246 @@ pub(crate) fn reset_ipv4(s: &Switch) -> DpdResult<()> { s.table_clear(TableType::NatIngressIpv4Mcast) } -/// Add a NAT entry for IPv6 multicast traffic, keyed on `ip`. +/// Add NAT entries for IPv6 multicast traffic. +/// +/// For groups with a VLAN, two entries are added: +/// 1. Untagged ingress match -> forward (for decapsulated Geneve packets) +/// 2. Correctly tagged ingress match -> forward (for already-tagged packets) +/// +/// This allows both packet types to match, while packets with the wrong VLAN +/// will miss both entries and not be NAT encapsulated. pub(crate) fn add_ipv6_entry( s: &Switch, ip: Ipv6Addr, tgt: NatTarget, + vlan_id: Option, ) -> DpdResult<()> { - let match_key = Ipv6MatchKey::new(ip); let action_key = Ipv6Action::Forward { target: tgt.internal_ip, inner_mac: tgt.inner_mac, vni: tgt.vni.as_u32(), }; - debug!(s.log, "add ingress mcast entry {} -> {:?}", match_key, action_key); - - s.table_entry_add(TableType::NatIngressIpv6Mcast, &match_key, &action_key) + match vlan_id { + None => { + // Untagged only + let match_key = Ipv6VlanMatchKey::new(ip, None); + debug!( + s.log, + "add ingress mcast entry {} -> {:?}", match_key, action_key + ); + s.table_entry_add( + TableType::NatIngressIpv6Mcast, + &match_key, + &action_key, + ) + } + Some(vid) => { + common::network::validate_vlan(vid)?; + + // Untagged entry + let match_key_untagged = Ipv6VlanMatchKey::new(ip, None); + debug!( + s.log, + "add ingress mcast entry {} -> {:?}", + match_key_untagged, + action_key + ); + s.table_entry_add( + TableType::NatIngressIpv6Mcast, + &match_key_untagged, + &action_key, + )?; + + // Tagged entry + let match_key_tagged = Ipv6VlanMatchKey::new(ip, Some(vid)); + debug!( + s.log, + "add ingress mcast entry {} -> {:?}", + match_key_tagged, + action_key + ); + if let Err(e) = s.table_entry_add( + TableType::NatIngressIpv6Mcast, + &match_key_tagged, + &action_key, + ) { + // Rollback untagged entry + debug!(s.log, "rollback: removing untagged entry"); + let _ = s.table_entry_del( + TableType::NatIngressIpv6Mcast, + &match_key_untagged, + ); + return Err(e); + } + Ok(()) + } + } } -/// Update a NAT entry for IPv6 multicast traffic. +/// Update NAT entries for IPv6 multicast traffic. +/// +/// When VLAN changes, old entries are deleted and new ones added because +/// the VLAN is part of the match key and cannot be updated in place. +/// +/// # Arguments +/// +/// * `new_tgt` - NAT target for the new entries. +/// * `old_tgt` - NAT target for restoring entries on failure. Required when +/// VLAN changes since entries must be deleted and re-added. pub(crate) fn update_ipv6_entry( s: &Switch, ip: Ipv6Addr, - tgt: NatTarget, + new_tgt: NatTarget, + old_tgt: NatTarget, + old_vlan_id: Option, + new_vlan_id: Option, ) -> DpdResult<()> { - let match_key = Ipv6MatchKey::new(ip); - let action_key = Ipv6Action::Forward { - target: tgt.internal_ip, - inner_mac: tgt.inner_mac, - vni: tgt.vni.as_u32(), - }; - - debug!( - s.log, - "update ingress mcast entry {} -> {:?}", match_key, action_key - ); - - s.table_entry_update( - TableType::NatIngressIpv6Mcast, - &match_key, - &action_key, - ) + if old_vlan_id == new_vlan_id { + let action_key = Ipv6Action::Forward { + target: new_tgt.internal_ip, + inner_mac: new_tgt.inner_mac, + vni: new_tgt.vni.as_u32(), + }; + + let match_key_untagged = Ipv6VlanMatchKey::new(ip, None); + debug!( + s.log, + "update ingress mcast entry {} -> {:?}", + match_key_untagged, + action_key + ); + s.table_entry_update( + TableType::NatIngressIpv6Mcast, + &match_key_untagged, + &action_key, + )?; + + if let Some(vid) = old_vlan_id { + let match_key_tagged = Ipv6VlanMatchKey::new(ip, Some(vid)); + debug!( + s.log, + "update ingress mcast entry {} -> {:?}", + match_key_tagged, + action_key + ); + s.table_entry_update( + TableType::NatIngressIpv6Mcast, + &match_key_tagged, + &action_key, + )?; + } + return Ok(()); + } + + del_ipv6_entry_with_tgt(s, ip, old_tgt, old_vlan_id)?; + if let Err(e) = add_ipv6_entry(s, ip, new_tgt, new_vlan_id) { + // Restore deleted entries with old target + debug!(s.log, "add failed, restoring old NAT entries for {ip}"); + let _ = add_ipv6_entry(s, ip, old_tgt, old_vlan_id); + return Err(e); + } + Ok(()) } -/// Delete a NAT entry for IPv6 multicast traffic, keyed on `ip`. -pub(crate) fn del_ipv6_entry(s: &Switch, ip: Ipv6Addr) -> DpdResult<()> { - let match_key = Ipv6MatchKey::new(ip); - - debug!(s.log, "delete ingress mcast entry {}", match_key); +/// Delete NAT entries for IPv6 multicast traffic. +/// +/// Deletes both entries for VLAN groups (see `add_ipv6_entry` for details). +/// This version does not support rollback on partial failure. +pub(crate) fn del_ipv6_entry( + s: &Switch, + ip: Ipv6Addr, + vlan_id: Option, +) -> DpdResult<()> { + match vlan_id { + None => { + let match_key = Ipv6VlanMatchKey::new(ip, None); + debug!(s.log, "delete ingress mcast entry {}", match_key); + s.table_entry_del(TableType::NatIngressIpv6Mcast, &match_key) + } + Some(vid) => { + // Untagged + let match_key_untagged = Ipv6VlanMatchKey::new(ip, None); + debug!(s.log, "delete ingress mcast entry {}", match_key_untagged); + s.table_entry_del( + TableType::NatIngressIpv6Mcast, + &match_key_untagged, + )?; + + // Tagged + let match_key_tagged = Ipv6VlanMatchKey::new(ip, Some(vid)); + debug!(s.log, "delete ingress mcast entry {}", match_key_tagged); + if let Err(e) = s.table_entry_del( + TableType::NatIngressIpv6Mcast, + &match_key_tagged, + ) { + // Can't rollback without original action + debug!(s.log, "rollback not possible for untagged entry"); + return Err(e); + } + Ok(()) + } + } +} - s.table_entry_del(TableType::NatIngressIpv6Mcast, &match_key) +/// Delete NAT entries for IPv6 multicast traffic with rollback support. +/// +/// Deletes both entries for VLAN groups. If the tagged entry deletion fails +/// after the untagged entry was deleted, attempts to restore the untagged +/// entry using the provided NAT target. +pub(crate) fn del_ipv6_entry_with_tgt( + s: &Switch, + ip: Ipv6Addr, + tgt: NatTarget, + vlan_id: Option, +) -> DpdResult<()> { + match vlan_id { + None => { + let match_key = Ipv6VlanMatchKey::new(ip, None); + debug!(s.log, "delete ingress mcast entry {}", match_key); + s.table_entry_del(TableType::NatIngressIpv6Mcast, &match_key) + } + Some(vid) => { + // Delete untagged first + let match_key_untagged = Ipv6VlanMatchKey::new(ip, None); + debug!(s.log, "delete ingress mcast entry {}", match_key_untagged); + s.table_entry_del( + TableType::NatIngressIpv6Mcast, + &match_key_untagged, + )?; + + // Delete tagged + let match_key_tagged = Ipv6VlanMatchKey::new(ip, Some(vid)); + debug!(s.log, "delete ingress mcast entry {}", match_key_tagged); + if let Err(e) = s.table_entry_del( + TableType::NatIngressIpv6Mcast, + &match_key_tagged, + ) { + // Rollback: restore the untagged entry + debug!( + s.log, + "tagged delete failed, restoring untagged entry for {ip}" + ); + let action_key = Ipv6Action::Forward { + target: tgt.internal_ip, + inner_mac: tgt.inner_mac, + vni: tgt.vni.as_u32(), + }; + let _ = s.table_entry_add( + TableType::NatIngressIpv6Mcast, + &match_key_untagged, + &action_key, + ); + return Err(e); + } + Ok(()) + } + } } /// Dump the IPv6 NAT table's contents. pub(crate) fn ipv6_table_dump(s: &Switch) -> DpdResult { - s.table_dump::(TableType::NatIngressIpv6Mcast) + s.table_dump::(TableType::NatIngressIpv6Mcast) } /// Fetch the IPv6 NAT table's counters. @@ -168,7 +548,10 @@ pub(crate) fn ipv6_counter_fetch( s: &Switch, force_sync: bool, ) -> DpdResult> { - s.counter_fetch::(force_sync, TableType::NatIngressIpv6Mcast) + s.counter_fetch::( + force_sync, + TableType::NatIngressIpv6Mcast, + ) } /// Reset the Ipv6 NAT table. diff --git a/dpd/src/table/mcast/mcast_route.rs b/dpd/src/table/mcast/mcast_route.rs index d86c2e4..292b612 100644 --- a/dpd/src/table/mcast/mcast_route.rs +++ b/dpd/src/table/mcast/mcast_route.rs @@ -5,18 +5,24 @@ // Copyright 2026 Oxide Computer Company //! Table operations for multicast routing entries (on Ingress to the switch). +//! +//! Route tables match only on destination address and select the egress action: +//! - `forward`: Forward without VLAN modification +//! - `forward_vlan(vid)`: Add VLAN tag on egress +//! +//! VLAN-based access control (preventing VLAN translation) is handled by NAT +//! ingress tables before encapsulation, not by route tables. use std::net::{Ipv4Addr, Ipv6Addr}; -use crate::{Switch, table::*}; - -use super::{Ipv4MatchKey, Ipv6MatchKey}; - use aal::ActionParse; use aal_macros::*; use omicron_common::address::UNDERLAY_MULTICAST_SUBNET; use slog::debug; +use super::{Ipv4MatchKey, Ipv6MatchKey}; +use crate::{Switch, table::*}; + /// IPv4 Table for multicast routing entries. pub(crate) const IPV4_TABLE_NAME: &str = "pipe.Ingress.l3_router.MulticastRouter4.tbl"; @@ -40,58 +46,63 @@ enum Ipv6Action { ForwardVLAN { vlan_id: u16 }, } -/// Add an IPv4 multicast route entry to the routing table, keyed on -/// `route`, with an optional `vlan_id`. +/// Add an IPv4 multicast route entry. +/// +/// The action is selected based on VLAN configuration: +/// - No VLAN: `forward` (no VLAN modification on egress) +/// - With VLAN: `forward_vlan(vid)` (add VLAN tag on egress) pub(crate) fn add_ipv4_entry( s: &Switch, route: Ipv4Addr, vlan_id: Option, ) -> DpdResult<()> { let match_key = Ipv4MatchKey::new(route); - - let action_data = match vlan_id { + let action = match vlan_id { None => Ipv4Action::Forward, - Some(vlan_id) => { - common::network::validate_vlan(vlan_id)?; - Ipv4Action::ForwardVLAN { vlan_id } + Some(vid) => { + common::network::validate_vlan(vid)?; + Ipv4Action::ForwardVLAN { vlan_id: vid } } }; - debug!(s.log, "add multicast route entry {} -> {:?}", route, action_data); - - s.table_entry_add(TableType::RouteIpv4Mcast, &match_key, &action_data) + debug!(s.log, "add multicast route entry {match_key} -> {action:?}"); + s.table_entry_add(TableType::RouteIpv4Mcast, &match_key, &action) } -/// Update an IPv4 multicast route entry in the routing table. +/// Update an IPv4 multicast route entry. +/// +/// Updates the action when VLAN configuration changes. Since the match key +/// is just the destination address, this can be done in place. pub(crate) fn update_ipv4_entry( s: &Switch, route: Ipv4Addr, - vlan_id: Option, + old_vlan_id: Option, + new_vlan_id: Option, ) -> DpdResult<()> { + if old_vlan_id == new_vlan_id { + return Ok(()); + } + let match_key = Ipv4MatchKey::new(route); - let action_data = match vlan_id { + let action = match new_vlan_id { None => Ipv4Action::Forward, - Some(vlan_id) => { - common::network::validate_vlan(vlan_id)?; - Ipv4Action::ForwardVLAN { vlan_id } + Some(vid) => { + common::network::validate_vlan(vid)?; + Ipv4Action::ForwardVLAN { vlan_id: vid } } }; debug!( s.log, - "update multicast route entry {} -> {:?}", route, action_data + "update multicast route entry {match_key} -> {action:?}" ); - - s.table_entry_update(TableType::RouteIpv4Mcast, &match_key, &action_data) + s.table_entry_update(TableType::RouteIpv4Mcast, &match_key, &action) } -/// Delete an IPv4 multicast route entry from table, keyed on -/// `route`. +/// Delete an IPv4 multicast route entry. pub(crate) fn del_ipv4_entry(s: &Switch, route: Ipv4Addr) -> DpdResult<()> { let match_key = Ipv4MatchKey::new(route); - - debug!(s.log, "delete multicast route entry {}", match_key); - + debug!(s.log, "delete multicast route entry {match_key}"); s.table_entry_del(TableType::RouteIpv4Mcast, &match_key) } @@ -113,8 +124,15 @@ pub(crate) fn reset_ipv4(s: &Switch) -> DpdResult<()> { s.table_clear(TableType::RouteIpv4Mcast) } -/// Add an IPv6 multicast route entry to the routing table, keyed on -/// `route`, with an optional `vlan_id`. +/// Add an IPv6 multicast route entry. +/// +/// The action is selected based on VLAN configuration: +/// - No VLAN: `forward` (no VLAN modification on egress) +/// - With VLAN: `forward_vlan(vid)` (add VLAN tag on egress) +/// +/// Reserved underlay multicast subnet (ff04::/64) is internal to the rack +/// and always uses Forward without VLAN tagging, regardless of the vlan_id +/// parameter. pub(crate) fn add_ipv6_entry( s: &Switch, route: Ipv6Addr, @@ -123,65 +141,64 @@ pub(crate) fn add_ipv6_entry( let match_key = Ipv6MatchKey::new(route); // Reserved underlay multicast subnet (ff04::/64) is internal to the rack - // and doesn't require VLAN tagging. Other admin-local addresses - // (e.g., ff04:0:0:1::/64) may be used by customer external groups and - // can receive VLAN tagging. - let action_data: Ipv6Action = if UNDERLAY_MULTICAST_SUBNET.contains(route) { + // and doesn't require VLAN tagging. + let action: Ipv6Action = if UNDERLAY_MULTICAST_SUBNET.contains(route) { Ipv6Action::Forward } else { match vlan_id { None => Ipv6Action::Forward, - Some(vlan_id) => { - common::network::validate_vlan(vlan_id)?; - Ipv6Action::ForwardVLAN { vlan_id } + Some(vid) => { + common::network::validate_vlan(vid)?; + Ipv6Action::ForwardVLAN { vlan_id: vid } } } }; - debug!(s.log, "add multicast route entry {} -> {:?}", route, action_data); - - s.table_entry_add(TableType::RouteIpv6Mcast, &match_key, &action_data) + debug!(s.log, "add multicast route entry {match_key} -> {action:?}"); + s.table_entry_add(TableType::RouteIpv6Mcast, &match_key, &action) } -/// Update an IPv6 multicast route entry in the routing table. +/// Update an IPv6 multicast route entry. +/// +/// Updates the action when VLAN configuration changes. Since the match key +/// is just the destination address, this can be done in place. pub(crate) fn update_ipv6_entry( s: &Switch, route: Ipv6Addr, - vlan_id: Option, + old_vlan_id: Option, + new_vlan_id: Option, ) -> DpdResult<()> { + if old_vlan_id == new_vlan_id { + return Ok(()); + } + let match_key = Ipv6MatchKey::new(route); // Reserved underlay multicast subnet (ff04::/64) is internal to the rack - // and doesn't require VLAN tagging. Other admin-local addresses - // (e.g., ff04:0:0:1::/64) may be used by customer external groups and - // can receive VLAN tagging. - let action_data: Ipv6Action = if UNDERLAY_MULTICAST_SUBNET.contains(route) { + // and doesn't require VLAN tagging. + let action: Ipv6Action = if UNDERLAY_MULTICAST_SUBNET.contains(route) { Ipv6Action::Forward } else { - match vlan_id { + match new_vlan_id { None => Ipv6Action::Forward, - Some(vlan_id) => { - common::network::validate_vlan(vlan_id)?; - Ipv6Action::ForwardVLAN { vlan_id } + Some(vid) => { + common::network::validate_vlan(vid)?; + Ipv6Action::ForwardVLAN { vlan_id: vid } } } }; debug!( s.log, - "update multicast route entry {} -> {:?}", route, action_data + "update multicast route entry {match_key} -> {action:?}" ); - - s.table_entry_update(TableType::RouteIpv6Mcast, &match_key, &action_data) + s.table_entry_update(TableType::RouteIpv6Mcast, &match_key, &action) } -/// Delete an IPv6 multicast entry from routing table, keyed on -/// `route`. +/// Delete an IPv6 multicast route entry. pub(crate) fn del_ipv6_entry(s: &Switch, route: Ipv6Addr) -> DpdResult<()> { let match_key = Ipv6MatchKey::new(route); - - debug!(s.log, "delete multicast route entry {}", match_key); - + debug!(s.log, "delete multicast route entry {match_key}"); s.table_entry_del(TableType::RouteIpv6Mcast, &match_key) } diff --git a/dpd/src/table/mcast/mod.rs b/dpd/src/table/mcast/mod.rs index 600f7c7..a3b3f24 100644 --- a/dpd/src/table/mcast/mod.rs +++ b/dpd/src/table/mcast/mod.rs @@ -55,3 +55,85 @@ impl fmt::Display for Ipv6MatchKey { write!(f, "{}", self.dst_addr) } } + +/// VLAN-aware match key for NAT ingress tables. +/// +/// Matches on destination address, VLAN header validity, and VLAN ID to prevent +/// VLAN translation. For groups with a VLAN, two entries are installed (untagged +/// and tagged), packets with a mismatched VLAN miss both and are dropped rather +/// than being translated to another customer's VLAN. +#[derive(MatchParse, Hash)] +pub(super) struct Ipv4VlanMatchKey { + dst_addr: Ipv4Addr, + #[match_xlate(name = "$valid")] + vlan_valid: bool, + vlan_id: u16, +} + +impl Ipv4VlanMatchKey { + pub(super) fn new(dst_addr: Ipv4Addr, vlan_id: Option) -> Self { + match vlan_id { + Some(id) => Self { + dst_addr, + vlan_valid: true, + vlan_id: id, + }, + None => Self { + dst_addr, + vlan_valid: false, + vlan_id: 0, + }, + } + } +} + +impl fmt::Display for Ipv4VlanMatchKey { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.vlan_valid { + write!(f, "{} vlan={}", self.dst_addr, self.vlan_id) + } else { + write!(f, "{} untagged", self.dst_addr) + } + } +} + +/// VLAN-aware match key for NAT ingress tables. +/// +/// Matches on destination address, VLAN header validity, and VLAN ID to prevent +/// VLAN translation. For groups with a VLAN, two entries are installed (untagged +/// and tagged), packets with a mismatched VLAN miss both and are dropped rather +/// than being translated to another customer's VLAN. +#[derive(MatchParse, Hash)] +pub(super) struct Ipv6VlanMatchKey { + dst_addr: Ipv6Addr, + #[match_xlate(name = "$valid")] + vlan_valid: bool, + vlan_id: u16, +} + +impl Ipv6VlanMatchKey { + pub(super) fn new(dst_addr: Ipv6Addr, vlan_id: Option) -> Self { + match vlan_id { + Some(id) => Self { + dst_addr, + vlan_valid: true, + vlan_id: id, + }, + None => Self { + dst_addr, + vlan_valid: false, + vlan_id: 0, + }, + } + } +} + +impl fmt::Display for Ipv6VlanMatchKey { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.vlan_valid { + write!(f, "{} vlan={}", self.dst_addr, self.vlan_id) + } else { + write!(f, "{} untagged", self.dst_addr) + } + } +}