From 55ca834b86f90b6b3e94fa3f562d96159724e25c Mon Sep 17 00:00:00 2001 From: keithnguyen Date: Fri, 7 Mar 2025 19:07:33 +0000 Subject: [PATCH] Hopefully recover this file from master --- cns/middlewares/k8sSwiftV2.go | 401 +++++++++------------------------- 1 file changed, 105 insertions(+), 296 deletions(-) diff --git a/cns/middlewares/k8sSwiftV2.go b/cns/middlewares/k8sSwiftV2.go index 2df5c18e0f..bb075cc3e0 100644 --- a/cns/middlewares/k8sSwiftV2.go +++ b/cns/middlewares/k8sSwiftV2.go @@ -3,6 +3,7 @@ package middlewares import ( "context" "fmt" + "net/netip" "github.com/Azure/azure-container-networking/cns" "github.com/Azure/azure-container-networking/cns/configuration" @@ -11,47 +12,121 @@ import ( "github.com/Azure/azure-container-networking/cns/types" "github.com/Azure/azure-container-networking/crd/multitenancy/api/v1alpha1" "github.com/pkg/errors" - v1 "k8s.io/api/core/v1" - k8stypes "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/controller-runtime/pkg/client" ) -var ( - errMTPNCNotReady = errors.New("mtpnc is not ready") - errInvalidSWIFTv2NICType = errors.New("invalid NIC type for SWIFT v2 scenario") - errInvalidMTPNCPrefixLength = errors.New("invalid prefix length for MTPNC primaryIP, must be 32") -) +// setRoutes sets the routes for podIPInfo used in SWIFT V2 scenario. +func (k *K8sSWIFTv2Middleware) setRoutes(podIPInfo *cns.PodIpInfo) error { + logger.Printf("[SWIFTv2Middleware] set routes for pod with nic type : %s", podIPInfo.NICType) + var routes []cns.Route -const ( - prefixLength = 32 - overlayGatewayv4 = "169.254.1.1" - virtualGW = "169.254.2.1" - overlayGatewayV6 = "fe80::1234:5678:9abc" -) + switch podIPInfo.NICType { + case cns.DelegatedVMNIC: + virtualGWRoute := cns.Route{ + IPAddress: fmt.Sprintf("%s/%d", virtualGW, prefixLength), + } + // default route via SWIFT v2 interface + route := cns.Route{ + IPAddress: "0.0.0.0/0", + GatewayIPAddress: virtualGW, + } + routes = append(routes, virtualGWRoute, route) + + case cns.InfraNIC: + // Linux CNS middleware sets the infra routes(pod, infravnet and service cidrs) to infraNIC interface for the podIPInfo used in SWIFT V2 Linux scenario + infraRoutes, err := k.getInfraRoutes(podIPInfo) + if err != nil { + return errors.Wrap(err, "failed to get infra routes for infraNIC interface") + } + routes = infraRoutes + podIPInfo.SkipDefaultRoutes = true + + case cns.NodeNetworkInterfaceBackendNIC: //nolint:exhaustive // ignore exhaustive types check + // No-op NIC types. + default: + return errInvalidSWIFTv2NICType + } + + podIPInfo.Routes = routes + return nil +} + +// Linux CNS gets pod CIDRs from configuration env +// Containerd reassigns the IP to the adapter and kernel configures the pod cidr route by default on Windows VM +// Hence the windows swiftv2 scenario does not require pod cidr +// GetPodCidrs() will return v4PodCidrs as first []string and v6PodCidrs as second []string +func (k *K8sSWIFTv2Middleware) GetPodCidrs() ([]string, []string, error) { //nolint + v4PodCidrs := []string{} + v6PodCidrs := []string{} + + // Get and parse podCIDRs from env + podCIDRs, err := configuration.PodCIDRs() + if err != nil { + return nil, nil, errors.Wrapf(err, "failed to get podCIDRs from env") + } + podCIDRsV4, podCIDRv6, err := utils.ParseCIDRs(podCIDRs) + if err != nil { + return nil, nil, errors.Wrapf(err, "failed to parse podCIDRs") + } -type K8sSWIFTv2Middleware struct { - Cli client.Client + v4PodCidrs = append(v4PodCidrs, podCIDRsV4...) + v6PodCidrs = append(v6PodCidrs, podCIDRv6...) + + return v4PodCidrs, v6PodCidrs, nil } -// Verify interface compliance at compile time -var _ cns.IPConfigsHandlerMiddleware = (*K8sSWIFTv2Middleware)(nil) +// getInfraRoutes() returns the infra routes including infravnet/pod/service cidrs for the podIPInfo used in SWIFT V2 Linux scenario +// Linux uses 169.254.1.1 as the default ipv4 gateway and fe80::1234:5678:9abc as the default ipv6 gateway +func (k *K8sSWIFTv2Middleware) getInfraRoutes(podIPInfo *cns.PodIpInfo) ([]cns.Route, error) { + var routes []cns.Route + + ip, err := netip.ParseAddr(podIPInfo.PodIPConfig.IPAddress) + if err != nil { + return nil, errors.Wrapf(err, "failed to parse podIPConfig IP address %s", podIPInfo.PodIPConfig.IPAddress) + } -func (k *K8sSWIFTv2Middleware) GetPodInfoForIPConfigsRequest(ctx context.Context, req *cns.IPConfigsRequest) (podInfo cns.PodInfo, respCode types.ResponseCode, message string) { - // gets pod info for the specified request - podInfo, pod, respCode, message := k.GetPodInfo(ctx, req) - if respCode != types.Success { - return nil, respCode, message + v4IPs, v6IPs, err := k.GetInfravnetAndServiceCidrs() + if err != nil { + return nil, errors.Wrap(err, "failed to get infravnet and service CIDRs") } - // validates if pod is swiftv2 - isSwiftv2 := ValidateSwiftv2Pod(pod) + v4PodIPs, v6PodIPs, err := k.GetPodCidrs() + if err != nil { + return nil, errors.Wrap(err, "failed to get pod CIDRs") + } + + v4IPs = append(v4IPs, v4PodIPs...) + v6IPs = append(v6IPs, v6PodIPs...) + + if ip.Is4() { + routes = append(routes, k.AddRoutes(v4IPs, overlayGatewayv4)...) + } else { + routes = append(routes, k.AddRoutes(v6IPs, overlayGatewayV6)...) + } + + return routes, nil +} + +// assignSubnetPrefixLengthFields is a no-op for linux swiftv2 as the default prefix-length is sufficient +func (k *K8sSWIFTv2Middleware) assignSubnetPrefixLengthFields(_ *cns.PodIpInfo, _ v1alpha1.InterfaceInfo, _ string) error { + return nil +} + +// add default route is done on setRoutes() for Linux swiftv2 +func (k *K8sSWIFTv2Middleware) addDefaultRoute(*cns.PodIpInfo, string) {} + +// IPConfigsRequestHandlerWrapper is the middleware function for handling SWIFT v2 IP configs requests for AKS-SWIFT. This function wrapped the default SWIFT request +// and release IP configs handlers. +func (k *K8sSWIFTv2Middleware) IPConfigsRequestHandlerWrapper(defaultHandler, failureHandler cns.IPConfigsHandlerFunc) cns.IPConfigsHandlerFunc { + return func(ctx context.Context, req cns.IPConfigsRequest) (*cns.IPConfigsResponse, error) { + podInfo, respCode, message := k.GetPodInfoForIPConfigsRequest(ctx, &req) - var mtpnc v1alpha1.MultitenantPodNetworkConfig - // if swiftv2 is enabled, get mtpnc - if isSwiftv2 { - mtpnc, respCode, message = k.getMTPNC(ctx, podInfo) if respCode != types.Success { - return nil, respCode, message + return &cns.IPConfigsResponse{ + Response: cns.Response{ + ReturnCode: respCode, + Message: message, + }, + }, errors.New("failed to validate IP configs request") } ipConfigsResp, err := defaultHandler(ctx, req) // If the pod is not v2, return the response from the handler @@ -102,269 +177,3 @@ func (k *K8sSWIFTv2Middleware) GetPodInfoForIPConfigsRequest(ctx context.Context return ipConfigsResp, nil } } - -// validateIPConfigsRequest validates if pod is multitenant by checking the pod labels, used in SWIFT V2 AKS scenario. -// nolint -func (k *K8sSWIFTv2Middleware) validateIPConfigsRequest(ctx context.Context, req *cns.IPConfigsRequest) (podInfo cns.PodInfo, respCode types.ResponseCode, message string) { - // Retrieve the pod from the cluster - podInfo, err := cns.UnmarshalPodInfo(req.OrchestratorContext) - if err != nil { - errBuf := errors.Wrapf(err, "failed to unmarshalling pod info from ipconfigs request %+v", req) - return nil, types.UnexpectedError, errBuf.Error() - } - logger.Printf("[SWIFTv2Middleware] validate ipconfigs request for pod %s", podInfo.Name()) - podNamespacedName := k8stypes.NamespacedName{Namespace: podInfo.Namespace(), Name: podInfo.Name()} - pod := v1.Pod{} - if err := k.Cli.Get(ctx, podNamespacedName, &pod); err != nil { - errBuf := errors.Wrapf(err, "failed to get pod %+v", podNamespacedName) - return nil, types.UnexpectedError, errBuf.Error() - } - - // check the pod labels for Swift V2, set the request's SecondaryInterfaceSet flag to true and check if its MTPNC CRD is ready - _, swiftV2PodNetworkLabel := pod.Labels[configuration.LabelPodSwiftV2] - _, swiftV2PodNetworkInstanceLabel := pod.Labels[configuration.LabelPodNetworkInstanceSwiftV2] - if swiftV2PodNetworkLabel || swiftV2PodNetworkInstanceLabel { - - // Check if the MTPNC CRD exists for the pod, if not, return error - mtpnc := v1alpha1.MultitenantPodNetworkConfig{} - mtpncNamespacedName := k8stypes.NamespacedName{Namespace: podInfo.Namespace(), Name: podInfo.Name()} - if err := k.Cli.Get(ctx, mtpncNamespacedName, &mtpnc); err != nil { - return nil, types.UnexpectedError, fmt.Errorf("failed to get pod's mtpnc from cache : %w", err).Error() - } - // Check if the MTPNC CRD is ready. If one of the fields is empty, return error - if !mtpnc.IsReady() { - return nil, types.UnexpectedError, errMTPNCNotReady.Error() - } - // If primary Ip is set in status field, it indicates the presence of secondary interfaces - if mtpnc.Status.PrimaryIP != "" { - req.SecondaryInterfacesExist = true - } - interfaceInfos := mtpnc.Status.InterfaceInfos - for _, interfaceInfo := range interfaceInfos { - if interfaceInfo.DeviceType == v1alpha1.DeviceTypeInfiniBandNIC { - if interfaceInfo.MacAddress == "" || interfaceInfo.NCID == "" { - return nil, types.UnexpectedError, errMTPNCNotReady.Error() - } - req.BackendInterfaceExist = true - req.BackendInterfaceMacAddresses = append(req.BackendInterfaceMacAddresses, interfaceInfo.MacAddress) - - } - if interfaceInfo.DeviceType == v1alpha1.DeviceTypeVnetNIC { - req.SecondaryInterfacesExist = true - } - } - } - logger.Printf("[SWIFTv2Middleware] pod %s has secondary interface : %v", podInfo.Name(), req.SecondaryInterfacesExist) - logger.Printf("[SWIFTv2Middleware] pod %s has backend interface : %v", podInfo.Name(), req.BackendInterfaceExist) - - return podInfo, types.Success, "" -} - -// getIPConfig returns the pod's SWIFT V2 IP configuration. -func (k *K8sSWIFTv2Middleware) getIPConfig(ctx context.Context, podInfo cns.PodInfo) ([]cns.PodIpInfo, error) { - // Check if the MTPNC CRD exists for the pod, if not, return error - mtpnc := v1alpha1.MultitenantPodNetworkConfig{} - mtpncNamespacedName := k8stypes.NamespacedName{Namespace: podInfo.Namespace(), Name: podInfo.Name()} - if err := k.Cli.Get(ctx, mtpncNamespacedName, &mtpnc); err != nil { - return nil, errors.Wrapf(err, "failed to get pod's mtpnc from cache") - } - - // Check if the MTPNC CRD is ready. If one of the fields is empty, return error - if !mtpnc.IsReady() { - return nil, errMTPNCNotReady - } - logger.Printf("[SWIFTv2Middleware] mtpnc for pod %s is : %+v", podInfo.Name(), mtpnc) - - var podIPInfos []cns.PodIpInfo - - if len(mtpnc.Status.InterfaceInfos) == 0 { - // Use fields from mtpnc.Status if InterfaceInfos is empty - ip, prefixSize, err := utils.ParseIPAndPrefix(mtpnc.Status.PrimaryIP) - if err != nil { - return nil, errors.Wrap(err, "failed to parse mtpnc primary IP and prefix") - } - if prefixSize != prefixLength { - return nil, errors.Wrapf(errInvalidMTPNCPrefixLength, "mtpnc primaryIP prefix length is %d", prefixSize) - } - - podIPInfos = append(podIPInfos, cns.PodIpInfo{ - PodIPConfig: cns.IPSubnet{ - IPAddress: ip, - PrefixLength: uint8(prefixSize), - }, - MacAddress: mtpnc.Status.MacAddress, - NICType: cns.DelegatedVMNIC, - SkipDefaultRoutes: false, - // InterfaceName is empty for DelegatedVMNIC - }) - } else { - for _, interfaceInfo := range mtpnc.Status.InterfaceInfos { - var ( - nicType cns.NICType - ip string - prefixSize int - err error - ) - switch { - case interfaceInfo.DeviceType == v1alpha1.DeviceTypeVnetNIC: - nicType = cns.DelegatedVMNIC - case interfaceInfo.DeviceType == v1alpha1.DeviceTypeInfiniBandNIC: - nicType = cns.NodeNetworkInterfaceBackendNIC - default: - nicType = cns.DelegatedVMNIC - } - if nicType != cns.NodeNetworkInterfaceBackendNIC { - // Parse MTPNC primaryIP to get the IP address and prefix length - ip, prefixSize, err = utils.ParseIPAndPrefix(interfaceInfo.PrimaryIP) - if err != nil { - return nil, errors.Wrap(err, "failed to parse mtpnc primary IP and prefix") - } - if prefixSize != prefixLength { - return nil, errors.Wrapf(errInvalidMTPNCPrefixLength, "mtpnc primaryIP prefix length is %d", prefixSize) - } - - podIPInfo := cns.PodIpInfo{ - PodIPConfig: cns.IPSubnet{ - IPAddress: ip, - PrefixLength: uint8(prefixSize), - }, - MacAddress: interfaceInfo.MacAddress, - NICType: nicType, - SkipDefaultRoutes: false, - // InterfaceName is empty for DelegatedVMNIC and AccelnetFrontendNIC - } - // for windows scenario, it is required to add additional fields with the exact subnetAddressSpace - // received from MTPNC, this function assigns them for windows while linux is a no-op - err = k.assignSubnetPrefixLengthFields(&podIPInfo, interfaceInfo, ip) - if err != nil { - return nil, errors.Wrap(err, "failed to parse mtpnc subnetAddressSpace prefix") - } - podIPInfos = append(podIPInfos, podIPInfo) - // for windows scenario, it is required to add default route with gatewayIP from CNS - k.addDefaultRoute(&podIPInfo, interfaceInfo.GatewayIP) - } - } - } - - return podIPInfos, nil -} - -func (k *K8sSWIFTv2Middleware) Type() cns.SWIFTV2Mode { - return cns.K8sSWIFTV2 -} - -// gets Pod Data -func (k *K8sSWIFTv2Middleware) GetPodInfo(ctx context.Context, req *cns.IPConfigsRequest) (podInfo cns.PodInfo, k8sPod v1.Pod, respCode types.ResponseCode, message string) { - // Retrieve the pod from the cluster - podInfo, err := cns.UnmarshalPodInfo(req.OrchestratorContext) - if err != nil { - errBuf := errors.Wrapf(err, "failed to unmarshalling pod info from ipconfigs request %+v", req) - return nil, v1.Pod{}, types.UnexpectedError, errBuf.Error() - } - logger.Printf("[SWIFTv2Middleware] validate ipconfigs request for pod %s", podInfo.Name()) - podNamespacedName := k8stypes.NamespacedName{Namespace: podInfo.Namespace(), Name: podInfo.Name()} - pod := v1.Pod{} - if err := k.Cli.Get(ctx, podNamespacedName, &pod); err != nil { - errBuf := errors.Wrapf(err, "failed to get pod %+v", podNamespacedName) - return nil, v1.Pod{}, types.UnexpectedError, errBuf.Error() - } - return podInfo, pod, types.Success, "" -} - -// validates if pod is multitenant by checking the pod labels, used in SWIFT V2 AKS scenario. -func ValidateSwiftv2Pod(pod v1.Pod) bool { - // check the pod labels for Swift V2 - _, swiftV2PodNetworkLabel := pod.Labels[configuration.LabelPodSwiftV2] - _, swiftV2PodNetworkInstanceLabel := pod.Labels[configuration.LabelPodNetworkInstanceSwiftV2] - return swiftV2PodNetworkLabel || swiftV2PodNetworkInstanceLabel -} - -func (k *K8sSWIFTv2Middleware) getMTPNC(ctx context.Context, podInfo cns.PodInfo) (mtpncResource v1alpha1.MultitenantPodNetworkConfig, respCode types.ResponseCode, message string) { - // Check if the MTPNC CRD exists for the pod, if not, return error - mtpnc := v1alpha1.MultitenantPodNetworkConfig{} - mtpncNamespacedName := k8stypes.NamespacedName{Namespace: podInfo.Namespace(), Name: podInfo.Name()} - if err := k.Cli.Get(ctx, mtpncNamespacedName, &mtpnc); err != nil { - return v1alpha1.MultitenantPodNetworkConfig{}, types.UnexpectedError, fmt.Errorf("failed to get pod's mtpnc from cache : %w", err).Error() - } - // Check if the MTPNC CRD is ready. If one of the fields is empty, return error - if !mtpnc.IsReady() { - return v1alpha1.MultitenantPodNetworkConfig{}, types.UnexpectedError, errMTPNCNotReady.Error() - } - return mtpnc, types.Success, "" -} - -// Updates Ip Config Request -func (k *K8sSWIFTv2Middleware) UpdateIPConfigRequest(mtpnc v1alpha1.MultitenantPodNetworkConfig, req *cns.IPConfigsRequest) ( - respCode types.ResponseCode, - message string, -) { - // If primary Ip is set in status field, it indicates the presence of secondary interfaces - if mtpnc.Status.PrimaryIP != "" { - req.SecondaryInterfacesExist = true - } - - interfaceInfos := mtpnc.Status.InterfaceInfos - for _, interfaceInfo := range interfaceInfos { - if interfaceInfo.DeviceType == v1alpha1.DeviceTypeInfiniBandNIC { - if interfaceInfo.MacAddress == "" || interfaceInfo.NCID == "" { - return types.UnexpectedError, errMTPNCNotReady.Error() - } - req.BackendInterfaceExist = true - req.BackendInterfaceMacAddresses = append(req.BackendInterfaceMacAddresses, interfaceInfo.MacAddress) - - } - if interfaceInfo.DeviceType == v1alpha1.DeviceTypeVnetNIC { - req.SecondaryInterfacesExist = true - } - } - - return types.Success, "" -} - -func (k *K8sSWIFTv2Middleware) AddRoutes(cidrs []string, gatewayIP string) []cns.Route { - routes := make([]cns.Route, len(cidrs)) - for i, cidr := range cidrs { - routes[i] = cns.Route{ - IPAddress: cidr, - GatewayIPAddress: gatewayIP, - } - } - return routes -} - -// Both Linux and Windows CNS gets infravnet and service CIDRs from configuration env -// GetInfravnetAndServiceCidrs() returns v4CIDRs(infravnet and service cidrs) as first []string and v6CIDRs(infravnet and service) as second []string -func (k *K8sSWIFTv2Middleware) GetInfravnetAndServiceCidrs() ([]string, []string, error) { //nolint - v4Cidrs := []string{} - v6Cidrs := []string{} - - // Get and parse infraVNETCIDRs from env - infraVNETCIDRs, err := configuration.InfraVNETCIDRs() - if err != nil { - return nil, nil, errors.Wrapf(err, "failed to get infraVNETCIDRs from env") - } - infraVNETCIDRsv4, infraVNETCIDRsv6, err := utils.ParseCIDRs(infraVNETCIDRs) - if err != nil { - return nil, nil, errors.Wrapf(err, "failed to parse infraVNETCIDRs") - } - - // Add infravnet CIDRs to v4 and v6 IPs - v4Cidrs = append(v4Cidrs, infraVNETCIDRsv4...) - v6Cidrs = append(v6Cidrs, infraVNETCIDRsv6...) - - // Get and parse serviceCIDRs from env - serviceCIDRs, err := configuration.ServiceCIDRs() - if err != nil { - return nil, nil, errors.Wrapf(err, "failed to get serviceCIDRs from env") - } - serviceCIDRsV4, serviceCIDRsV6, err := utils.ParseCIDRs(serviceCIDRs) - if err != nil { - return nil, nil, errors.Wrapf(err, "failed to parse serviceCIDRs") - } - - // Add service CIDRs to v4 and v6 IPs - v4Cidrs = append(v4Cidrs, serviceCIDRsV4...) - v6Cidrs = append(v6Cidrs, serviceCIDRsV6...) - - return v4Cidrs, v6Cidrs, nil -}