Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[release-4.18] OCPBUGS-50582: Graceful cleanup of IPsec states #2644

Open
wants to merge 6 commits into
base: release-4.18
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ spec:

[Service]
Type=oneshot
ExecStartPre=rm -f /etc/ipsec.d/cno.conf
ExecStart=systemctl enable --now ipsec.service

[Install]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ spec:

[Service]
Type=oneshot
ExecStartPre=rm -f /etc/ipsec.d/cno.conf
ExecStart=systemctl enable --now ipsec.service

[Install]
Expand Down
45 changes: 15 additions & 30 deletions bindata/network/ovn-kubernetes/common/ipsec-host.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -239,38 +239,23 @@ spec:
defaultcpinclude="include \/etc\/crypto-policies\/back-ends\/libreswan.config"
if ! grep -q "# ${defaultcpinclude}" /etc/ipsec.conf; then
sed -i "/${defaultcpinclude}/s/^/# /" /etc/ipsec.conf
fi
# since pluto is on the host, we need to restart it after changing connection
# parameters.
chroot /proc/1/root ipsec restart

# Use /etc/ipsec.d/cno.conf file to write our own default IPsec connection parameters.
# The /etc/ipsec.d/openshift.conf file can not be used because it is managed by openvswitch.
touch /etc/ipsec.d/cno.conf
if ! grep -q "narrowing=yes" /etc/ipsec.d/cno.conf; then
cat <<EOF > /etc/ipsec.d/cno.conf
# Default IPsec connection parameters rendered by network operator.
# The narrowing=yes is needed to narrow down the proposals exchanged
# by two peers to a mutually acceptable set, otherwise it sometimes
# have traffic hit between peer nodes.
conn %default
narrowing=yes
EOF
counter=0
until [ -r /run/pluto/pluto.ctl ]; do
counter=$((counter+1))
sleep 1
if [ $counter -gt 300 ];
then
echo "ipsec has not started after $counter seconds"
exit 1
fi
done
echo "ipsec service is restarted"
fi

# since pluto is on the host, we need to restart it after changing connection
# parameters.
chroot /proc/1/root ipsec restart

counter=0
until [ -r /run/pluto/pluto.ctl ]; do
counter=$((counter+1))
sleep 1
if [ $counter -gt 300 ];
then
echo "ipsec has not started after $counter seconds"
exit 1
fi
done
echo "ipsec service is restarted"

# Workaround for https://github.com/libreswan/libreswan/issues/373
ulimit -n 1024

Expand Down Expand Up @@ -407,7 +392,7 @@ spec:
# When east-west ipsec is not disabled, then do not flush xfrm states and
# policies in order to maintain traffic flows during container restart.
ipsecflush() {
if [ "$(kubectl get networks.operator.openshift.io cluster -ojsonpath='{.spec.defaultNetwork.ovnKubernetesConfig.ipsecConfig.mode}')" != "Full" ] || \
if [ "$(kubectl get networks.operator.openshift.io cluster -ojsonpath='{.spec.defaultNetwork.ovnKubernetesConfig.ipsecConfig.mode}')" != "Full" ] && \
[ "$(kubectl get networks.operator.openshift.io cluster -ojsonpath='{.spec.defaultNetwork.ovnKubernetesConfig.ipsecConfig}')" != "{}" ]; then
ip x s flush
ip x p flush
Expand Down
9 changes: 7 additions & 2 deletions pkg/bootstrap/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,13 @@ type OVNUpdateStatus struct {
// OVNIPsecStatus contains status of current IPsec configuration
// in the cluster.
type OVNIPsecStatus struct {
LegacyIPsecUpgrade bool // true if IPsec in 4.14 or Pre-4.14 cluster is upgraded to latest version
OVNIPsecActive bool // set to true unless we are sure it is not.
// LegacyIPsecUpgrade true if IPsec in 4.14.x cluster is upgraded to 4.15.x version.
LegacyIPsecUpgrade bool
// IsOVNIPsecActiveOrRollingOut set to true unless we are sure it is not. Note that this is
// set to true when ovnkube-node daemonset is in progressing state which is not reflecting
// actual ovn ipsec state. so must be precautious in making decisions at the time of machine
// configs rollout and node reboot scenarios.
IsOVNIPsecActiveOrRollingOut bool
}

type OVNBootstrapResult struct {
Expand Down
199 changes: 78 additions & 121 deletions pkg/network/ovn_kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -522,25 +522,6 @@ func renderOVNKubernetes(conf *operv1.NetworkSpec, bootstrapResult *bootstrap.Bo
anno[names.CreateWaitAnnotation] = "true"
o.SetAnnotations(anno)
})
// The legacy ovn-ipsec deployment is only rendered during upgrades until we
// are ready to remove it.
ovnIPsecLegacyDS := &appsv1.DaemonSet{
TypeMeta: metav1.TypeMeta{
Kind: "DaemonSet",
APIVersion: appsv1.SchemeGroupVersion.String(),
},
ObjectMeta: metav1.ObjectMeta{
Name: "ovn-ipsec",
Namespace: util.OVN_NAMESPACE,
// We never update the legacy ovn-ipsec daemonset.
Annotations: map[string]string{names.CreateWaitAnnotation: "true"},
},
}
obj, err := k8s.ToUnstructured(ovnIPsecLegacyDS)
if err != nil {
return nil, progressing, fmt.Errorf("unable to render legacy ovn-ipsec daemonset: %w", err)
}
objs = append(objs, obj)
}

klog.Infof("ovnk components: ovnkube-node: isRunning=%t, update=%t; ovnkube-control-plane: isRunning=%t, update=%t",
Expand Down Expand Up @@ -612,69 +593,76 @@ func IsIPsecLegacyAPI(conf *operv1.OVNKubernetesConfig) bool {
return conf.IPsecConfig == nil || conf.IPsecConfig.Mode == ""
}

// shouldRenderIPsec method ensures the have following IPsec states for upgrade path from 4.14 to 4.15 or later versions:
// When 4.14 cluster is already installed with MachineConfig for IPsec extension and ipsecConfig is set in network operator
// config (i.e. IPsec for NS+EW), then render CNO's IPsec MC extension and ipsec-host daemonset.
// When 4.14 cluster is just running with ipsecConfig set in network operator config (i.e. IPsec for EW only), then activate
// IPsec MachineConfig and render ipsec-host daemonset.
// When 4.14 cluster is just installed with MachineConfig for IPsec extension (i.e. IPsec for NS only), then just keep MachineConfig
// to be in the same state without rendering IPsec daemonsets.
// When 4.14 cluster is Hypershift cluster running with ipsecConfig set, then just render ovn-ipsec-containerized daemonset as
// MachineConfig kind is not supported there.
// For Upgrade path from pre-4.14 to 5.15 or later versions:
// When pre-4.14 cluster is just running with ipsecConfig set in network operator config (i.e. IPsec for EW only), then activate
// IPsec MachineConfig and render ipsec-host daemonset.
// When pre-4.14 cluster is Hypershift cluster running with ipsecConfig set, then just render ovn-ipsec-containerized daemonset as
// MachineConfig kind is not supported there.
// All Other cases are not supported in pre-4.14 deployments.
// shouldRenderIPsec method ensures the needed states when enabling, disabling
// or upgrading IPsec
func shouldRenderIPsec(conf *operv1.OVNKubernetesConfig, bootstrapResult *bootstrap.BootstrapResult) (renderCNOIPsecMachineConfig, renderIPsecDaemonSet,
renderIPsecOVN, renderIPsecHostDaemonSet, renderIPsecContainerizedDaemonSet, renderIPsecDaemonSetAsCreateWaitOnly bool) {
isHypershiftHostedCluster := bootstrapResult.Infra.HostedControlPlane != nil
isIpsecUpgrade := bootstrapResult.OVN.IPsecUpdateStatus != nil && bootstrapResult.OVN.IPsecUpdateStatus.LegacyIPsecUpgrade
isOVNIPsecActive := bootstrapResult.OVN.IPsecUpdateStatus != nil && bootstrapResult.OVN.IPsecUpdateStatus.OVNIPsecActive

mode := GetIPsecMode(conf)
// Note on 4.14 to 4.15 legacy IPsec upgrade for self managed clusters:
// during this upgrade both host and containerized daemonsets are rendered.
// Internally, these damonsets coordinate when they are active or dormant:
// before the IPsec MachineConfig extensions are active, the containerized
// daemonset is active and the host daemonset is dormant; after rebooting
// with the the IPsec MachineConfig extensions active, the containerized
// daemonset is dormant and the host daemonset is active. When the upgrade
// finishes, the containerized daemonset is then not rendered.

// On upgrade, we will just remove any existing ipsec deployment without making any
// change to them. So during upgrade, we must keep track if IPsec MachineConfigs are
// active or not for non Hybrid hosted cluster.
isIPsecMachineConfigActive := isIPsecMachineConfigActive(bootstrapResult.Infra)
isIPsecMachineConfigNotActiveOnUpgrade := isIpsecUpgrade && !isIPsecMachineConfigActive && !isHypershiftHostedCluster
isMachineConfigClusterOperatorReady := bootstrapResult.Infra.MachineConfigClusterOperatorReady
isHypershiftHostedCluster := bootstrapResult.Infra.HostedControlPlane != nil
isIpsecLegacyUpgrade := bootstrapResult.OVN.IPsecUpdateStatus != nil && bootstrapResult.OVN.IPsecUpdateStatus.LegacyIPsecUpgrade
isOVNIPsecActiveOrRollingOut := bootstrapResult.OVN.IPsecUpdateStatus != nil && bootstrapResult.OVN.IPsecUpdateStatus.IsOVNIPsecActiveOrRollingOut
isCNOIPsecMachineConfigPresent := isCNOIPsecMachineConfigPresent(bootstrapResult.Infra)
isUserDefinedIPsecMachineConfigPresent := isUserDefinedIPsecMachineConfigPresent(bootstrapResult.Infra)
isMachineConfigClusterOperatorReady := bootstrapResult.Infra.MachineConfigClusterOperatorReady

mode := GetIPsecMode(conf)

// when OVN is rolling out, OVN IPsec might be fully or partially active or inactive.
// If MachineConfigs are not present, we know its inactive since we only stop rendering them once inactive.
isOVNIPsecActive := isOVNIPsecActiveOrRollingOut && (isCNOIPsecMachineConfigPresent || isUserDefinedIPsecMachineConfigPresent || isHypershiftHostedCluster)

// We render the ipsec deployment if IPsec is already active in OVN
// or if EW IPsec config is enabled.
renderIPsecDaemonSet = isOVNIPsecActive || mode == operv1.IPsecModeFull

// If ipsec is enabled, we render the host ipsec deployment except for
// hypershift hosted clusters and we need to wait for the ipsec MachineConfig
// extensions to be active first. We must also render host ipsec deployment
// at the time of upgrade though user created IPsec Machine Config is not
// present/active.
renderIPsecHostDaemonSet = (renderIPsecDaemonSet && isIPsecMachineConfigActive && !isHypershiftHostedCluster) || isIPsecMachineConfigNotActiveOnUpgrade

// The containerized ipsec deployment is only rendered during upgrades or
// for hypershift hosted clusters.
renderIPsecContainerizedDaemonSet = (renderIPsecDaemonSet && isHypershiftHostedCluster) || isIPsecMachineConfigNotActiveOnUpgrade

// MachineConfig IPsec extensions rollout is needed for the ipsec enablement and are used in both External and Full modes.
// except when the containerized deployment is used in hypershift hosted clusters. Also do not render Machine Config if
// user already created their own machine config for IPsec.
// To enable IPsec, specific MachineConfig extensions need to be rolled out
// first with the following exceptions:
// - not needed for the containerized deployment is used in hypershift
// hosted clusters
// - not needed if the user already created their own
renderCNOIPsecMachineConfig = (mode != operv1.IPsecModeDisabled || renderIPsecDaemonSet) && !isHypershiftHostedCluster &&
!isUserDefinedIPsecMachineConfigPresent
// Wait for MCO to be ready unless we had already rendered the IPsec MachineConfig.
renderCNOIPsecMachineConfig = renderCNOIPsecMachineConfig && (isCNOIPsecMachineConfigPresent || isMachineConfigClusterOperatorReady)

// We render OVN IPsec if East-West IPsec is enabled or it's upgrade is in progress.
// If NS IPsec is enabled as well, we need to wait to IPsec MachineConfig
// to be active if it's not an upgrade and not a hypershift hosted cluster.
renderIPsecOVN = (renderIPsecHostDaemonSet || renderIPsecContainerizedDaemonSet) && mode == operv1.IPsecModeFull
// As a general rule, we need to wait until the IPsec MachineConfig
// extensions are active before rendendering the IPsec daemonsets. Note that
// during upgrades or node reboots there is a period of time where the IPsec
// machine configs are not active and the daemonset won't be rendered but
// that is fine since the IPsec configuration should persist. The exception
// is 4.14 to 4.15 legacy IPsec upgrade as noted above.
isIPsecMachineConfigActive := isIPsecMachineConfigActive(bootstrapResult.Infra)
isIPsecMachineConfigNotActiveOnLegacyUpgrade := isIpsecLegacyUpgrade && !isIPsecMachineConfigActive && !isHypershiftHostedCluster

// While OVN ipsec is being upgraded and IPsec MachineConfigs deployment is in progress
// (or) IPsec config in OVN is being disabled, then ipsec deployment is not updated.
renderIPsecDaemonSetAsCreateWaitOnly = isIPsecMachineConfigNotActiveOnUpgrade || (isOVNIPsecActive && !renderIPsecOVN)
// We render the host ipsec deployment for self managed clusters after the
// ipsec MachineConfig extensions have been rolled out, except for the 4.14
// to 4.15 legacy IPsec upgrade as noted above.
renderIPsecHostDaemonSet = (renderIPsecDaemonSet && isIPsecMachineConfigActive && !isHypershiftHostedCluster) || isIPsecMachineConfigNotActiveOnLegacyUpgrade

// We render the containerized ipsec deployment for hosted clusters. It does
// not depend on any machine config extension however we also render it for
// the 4.14 to 4.15 legacy IPsec upgrade as noted above.
renderIPsecContainerizedDaemonSet = (renderIPsecDaemonSet && isHypershiftHostedCluster) || isIPsecMachineConfigNotActiveOnLegacyUpgrade

// We render OVN IPsec if EW IPsec is enabled and before the daemon sets are
// rendered. If it is already rendered, keep it rendered unless disabled.
renderIPsecOVN = (renderIPsecHostDaemonSet || renderIPsecContainerizedDaemonSet || isOVNIPsecActive) && mode == operv1.IPsecModeFull

// Keep IPsec daemonsets updated (but avoid creating) in the following circumstances:
// - on the 4.14 to 4.15 legacy IPsec upgrade, where we just want to update
// them as noted above
// - when disabling OVN IPsec, we want to keep the daemonsets until after
// OVN IPsec is disabled
renderIPsecDaemonSetAsCreateWaitOnly = isIPsecMachineConfigNotActiveOnLegacyUpgrade || (isOVNIPsecActive && !renderIPsecOVN)

return
}
Expand Down Expand Up @@ -1184,7 +1172,7 @@ func bootstrapOVN(conf *operv1.Network, kubeClient cnoclient.Client, infraStatus
nodeStatus.Progressing = daemonSetProgressing(nodeDaemonSet, true)
// Retrieve OVN IPsec status from ovnkube-node daemonset as this is being used to rollout IPsec
// config from 4.14.
ovnIPsecStatus.OVNIPsecActive = !isOVNIPsecNotActiveInDaemonSet(nodeDaemonSet)
ovnIPsecStatus.IsOVNIPsecActiveOrRollingOut = !isOVNIPsecNotActiveInDaemonSet(nodeDaemonSet)
klog.Infof("ovnkube-node DaemonSet status: progressing=%t", nodeStatus.Progressing)

}
Expand All @@ -1210,77 +1198,46 @@ func bootstrapOVN(conf *operv1.Network, kubeClient cnoclient.Client, infraStatus
prepullerStatus.Progressing = daemonSetProgressing(prePullerDaemonSet, true)
}

ipsecDaemonSet := &appsv1.DaemonSet{
ipsecContainerizedDaemonSet := &appsv1.DaemonSet{
TypeMeta: metav1.TypeMeta{
Kind: "DaemonSet",
APIVersion: appsv1.SchemeGroupVersion.String(),
},
}

ipsecStatus := &bootstrap.OVNIPsecStatus{}

// The IPsec daemonset name is ovn-ipsec if we are upgrading from <= 4.13.
nsn = types.NamespacedName{Namespace: util.OVN_NAMESPACE, Name: "ovn-ipsec"}
if err := kubeClient.ClientFor("").CRClient().Get(context.TODO(), nsn, ipsecDaemonSet); err != nil {
ipsecHostDaemonSet := &appsv1.DaemonSet{
TypeMeta: metav1.TypeMeta{
Kind: "DaemonSet",
APIVersion: appsv1.SchemeGroupVersion.String(),
},
}
// Retrieve container based IPsec daemonset with name ovn-ipsec-containerized.
nsn = types.NamespacedName{Namespace: util.OVN_NAMESPACE, Name: "ovn-ipsec-containerized"}
if err := kubeClient.ClientFor("").CRClient().Get(context.TODO(), nsn, ipsecContainerizedDaemonSet); err != nil {
if !apierrors.IsNotFound(err) {
return nil, fmt.Errorf("Failed to retrieve existing pre-4.14 ipsec DaemonSet: %w", err)
return nil, fmt.Errorf("Failed to retrieve existing ipsec containerized DaemonSet: %w", err)
} else {
ipsecStatus = nil
ipsecContainerizedDaemonSet = nil
}
} else {
ipsecStatus.LegacyIPsecUpgrade = true
}

if ipsecStatus == nil {
ipsecStatus = &bootstrap.OVNIPsecStatus{}
ipsecContainerizedDaemonSet := &appsv1.DaemonSet{
TypeMeta: metav1.TypeMeta{
Kind: "DaemonSet",
APIVersion: appsv1.SchemeGroupVersion.String(),
},
}
ipsecHostDaemonSet := &appsv1.DaemonSet{
TypeMeta: metav1.TypeMeta{
Kind: "DaemonSet",
APIVersion: appsv1.SchemeGroupVersion.String(),
},
}
// Retrieve container based IPsec daemonset with name ovn-ipsec-containerized.
nsn = types.NamespacedName{Namespace: util.OVN_NAMESPACE, Name: "ovn-ipsec-containerized"}
if err := kubeClient.ClientFor("").CRClient().Get(context.TODO(), nsn, ipsecContainerizedDaemonSet); err != nil {
if !apierrors.IsNotFound(err) {
return nil, fmt.Errorf("Failed to retrieve existing ipsec containerized DaemonSet: %w", err)
} else {
ipsecContainerizedDaemonSet = nil
}
}
// Retrieve host based IPsec daemonset with name ovn-ipsec-host
nsn = types.NamespacedName{Namespace: util.OVN_NAMESPACE, Name: "ovn-ipsec-host"}
if err := kubeClient.ClientFor("").CRClient().Get(context.TODO(), nsn, ipsecHostDaemonSet); err != nil {
if !apierrors.IsNotFound(err) {
return nil, fmt.Errorf("Failed to retrieve existing ipsec host DaemonSet: %w", err)
} else {
ipsecHostDaemonSet = nil
}
}
if ipsecContainerizedDaemonSet != nil && ipsecHostDaemonSet != nil {
// Both IPsec daemonset versions exist, so this is an upgrade from 4.14.
ipsecStatus.LegacyIPsecUpgrade = true
} else if ipsecContainerizedDaemonSet == nil && ipsecHostDaemonSet == nil {
ipsecStatus = nil
}
// Retrieve host based IPsec daemonset with name ovn-ipsec-host
nsn = types.NamespacedName{Namespace: util.OVN_NAMESPACE, Name: "ovn-ipsec-host"}
if err := kubeClient.ClientFor("").CRClient().Get(context.TODO(), nsn, ipsecHostDaemonSet); err != nil {
if !apierrors.IsNotFound(err) {
return nil, fmt.Errorf("Failed to retrieve existing ipsec host DaemonSet: %w", err)
} else {
ipsecHostDaemonSet = nil
}
}

// set OVN IPsec status into ipsecStatus only when IPsec daemonset(s) exists in the cluster.
if ipsecStatus != nil {
ipsecStatus.OVNIPsecActive = ovnIPsecStatus.OVNIPsecActive
if ipsecContainerizedDaemonSet != nil && ipsecHostDaemonSet != nil {
// Both IPsec daemonset versions exist, so this is an upgrade from 4.14.
ovnIPsecStatus.LegacyIPsecUpgrade = true
}

res := bootstrap.OVNBootstrapResult{
ControlPlaneReplicaCount: controlPlaneReplicaCount,
ControlPlaneUpdateStatus: controlPlaneStatus,
NodeUpdateStatus: nodeStatus,
IPsecUpdateStatus: ipsecStatus,
IPsecUpdateStatus: ovnIPsecStatus,
PrePullerUpdateStatus: prepullerStatus,
OVNKubernetesConfig: ovnConfigResult,
FlowsConfig: bootstrapFlowsConfig(kubeClient.ClientFor("").CRClient()),
Expand Down
Loading