Skip to content

Commit

Permalink
Merge pull request #62 from digitalocean/awg/update-webhook-check
Browse files Browse the repository at this point in the history
Update the DOKS admission controller webhook check
  • Loading branch information
adamwg authored Oct 4, 2019
2 parents d80f88e + af31dfe commit 56e3306
Show file tree
Hide file tree
Showing 5 changed files with 322 additions and 135 deletions.
30 changes: 24 additions & 6 deletions checks.md
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,10 @@ spec:
- Name: `admission-controller-webhook`
- Groups: `doks`

When you configure admission controllers with webhooks that have a `failurePolicy` set to `Fail`, it prevents managed components like Cilium, kube-proxy, and CoreDNS from starting on a new node during an upgrade. This will result in the cluster upgrade failing.
Admission control webhooks can disrupt upgrade and node replacement operations by preventing system components from starting. Specifically, this happens when an admission control webhook:
* has failurePolicy set to Fail,
* targets a service other than the Kubernetes apiserver, and
* applies to both kube-system and the namespace of the targeted service.

### Example

Expand All @@ -233,7 +236,7 @@ webhooks:
scope: "Namespaced"
clientConfig:
service:
namespace: kube-system
namespace: webhook
name: webhook-server
path: /pods
admissionReviewVersions:
Expand All @@ -244,8 +247,23 @@ webhooks:

### How to Fix

There are a few options:
1. Use the `Ignore` `failurePolicy`.
2. Use an apiserver extension as your webhook service.
3. Explicitly exclude the kube-system namespace.
4. Explicitly exclude the webhook service's namespace.

```yaml
# Recommended: Exclude objects in the `kube-system` namespace by explicitly specifying a namespaceSelector or objectSelector
# Recommended: Exclude objects in the `webhook` namespace by explicitly specifying a namespaceSelector.

apiVersion: v1
kind: Namespace
metadata:
name: webhook
labels:
skip-webhooks: "yes"

---
apiVersion: admissionregistration.k8s.io/v1beta1
kind: ValidatingWebhookConfiguration
metadata:
Expand All @@ -264,7 +282,7 @@ webhooks:
scope: "Namespaced"
clientConfig:
service:
namespace: kube-system
namespace: webhook
name: webhook-server
path: /pods
admissionReviewVersions:
Expand All @@ -273,8 +291,8 @@ webhooks:
failurePolicy: Fail
namespaceSelector:
matchExpressions:
- key: "doks.digitalocean.com/webhook"
operator: "Exists"
- key: "skip-webhooks"
operator: "DoesNotExist"
```
## Pod State
Expand Down
121 changes: 95 additions & 26 deletions checks/doks/admission_controller_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,13 @@ limitations under the License.
package doks

import (
"errors"

"github.com/digitalocean/clusterlint/checks"
"github.com/digitalocean/clusterlint/kube"
ar "k8s.io/api/admissionregistration/v1beta1"
corev1 "k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

Expand All @@ -43,46 +46,112 @@ func (w *webhookCheck) Groups() []string {
// Description returns a detailed human-readable description of what this check
// does.
func (w *webhookCheck) Description() string {
return "Check for admission controllers that could prevent managed components from starting"
return "Check for admission control webhooks that could cause problems during upgrades"
}

// Run runs this check on a set of Kubernetes objects. It can return warnings
// (low-priority problems) and errors (high-priority problems) as well as an
// error value indicating that the check failed to run.
// Run runs this check on a set of Kubernetes objects.
func (w *webhookCheck) Run(objects *kube.Objects) ([]checks.Diagnostic, error) {
const apiserverServiceName = "kubernetes"

var diagnostics []checks.Diagnostic

for _, config := range objects.ValidatingWebhookConfigurations.Items {
for _, validatingWebhook := range config.Webhooks {
if *validatingWebhook.FailurePolicy == ar.Fail &&
validatingWebhook.ClientConfig.Service != nil &&
selectorMatchesNamespace(validatingWebhook.NamespaceSelector, objects.SystemNamespace) {
d := checks.Diagnostic{
Severity: checks.Error,
Message: "Webhook matches objects in the kube-system namespace. This can cause problems when upgrading the cluster.",
Kind: checks.ValidatingWebhookConfiguration,
Object: &config.ObjectMeta,
Owners: config.ObjectMeta.GetOwnerReferences(),
for _, wh := range config.Webhooks {
if *wh.FailurePolicy == ar.Ignore {
// Webhooks with failurePolicy: Ignore are fine.
continue
}
if wh.ClientConfig.Service == nil {
// Webhooks whose targets are external to the cluster are fine.
continue
}
if wh.ClientConfig.Service.Namespace == metav1.NamespaceDefault &&
wh.ClientConfig.Service.Name == apiserverServiceName {
// Webhooks that target the kube-apiserver are fine.
continue
}
if !selectorMatchesNamespace(wh.NamespaceSelector, objects.SystemNamespace) {
// Webhooks that don't apply to kube-system are fine.
continue
}
var svcNamespace *v1.Namespace
for _, ns := range objects.Namespaces.Items {
if ns.Name == wh.ClientConfig.Service.Namespace {
svcNamespace = &ns
}
diagnostics = append(diagnostics, d)
}
if svcNamespace == nil {
return nil, errors.New("webhook refers to service in non-existent namespace")
}
if !selectorMatchesNamespace(wh.NamespaceSelector, svcNamespace) && len(objects.Nodes.Items) > 1 {
// Webhooks that don't apply to their own namespace are fine, as
// long as there's more than one node in the cluster.
continue
}

d := checks.Diagnostic{
Severity: checks.Error,
Message: "Validating webhook is configured in such a way that it may be problematic during upgrades.",
Kind: checks.ValidatingWebhookConfiguration,
Object: &config.ObjectMeta,
Owners: config.ObjectMeta.GetOwnerReferences(),
}
diagnostics = append(diagnostics, d)

// We don't want to produce diagnostics for multiple webhooks in the
// same webhook configuration, so break out of the inner loop if we
// get here.
break
}
}

for _, config := range objects.MutatingWebhookConfigurations.Items {
for _, mutatingWebhook := range config.Webhooks {
if *mutatingWebhook.FailurePolicy == ar.Fail &&
mutatingWebhook.ClientConfig.Service != nil &&
selectorMatchesNamespace(mutatingWebhook.NamespaceSelector, objects.SystemNamespace) {
d := checks.Diagnostic{
Severity: checks.Error,
Message: "Webhook matches objects in the kube-system namespace. This can cause problems when upgrading the cluster.",
Kind: checks.MutatingWebhookConfiguration,
Object: &config.ObjectMeta,
Owners: config.ObjectMeta.GetOwnerReferences(),
for _, wh := range config.Webhooks {
if *wh.FailurePolicy == ar.Ignore {
// Webhooks with failurePolicy: Ignore are fine.
continue
}
if wh.ClientConfig.Service == nil {
// Webhooks whose targets are external to the cluster are fine.
continue
}
if wh.ClientConfig.Service.Namespace == metav1.NamespaceDefault &&
wh.ClientConfig.Service.Name == apiserverServiceName {
// Webhooks that target the kube-apiserver are fine.
continue
}
if !selectorMatchesNamespace(wh.NamespaceSelector, objects.SystemNamespace) {
// Webhooks that don't apply to kube-system are fine.
continue
}
var svcNamespace *v1.Namespace
for _, ns := range objects.Namespaces.Items {
if ns.Name == wh.ClientConfig.Service.Namespace {
svcNamespace = &ns
}
diagnostics = append(diagnostics, d)
}
if svcNamespace == nil {
return nil, errors.New("webhook refers to service in non-existent namespace")
}
if !selectorMatchesNamespace(wh.NamespaceSelector, svcNamespace) && len(objects.Nodes.Items) > 1 {
// Webhooks that don't apply to their own namespace are fine, as
// long as there's more than one node in the cluster.
continue
}

d := checks.Diagnostic{
Severity: checks.Error,
Message: "Mutating webhook is configured in such a way that it may be problematic during upgrades.",
Kind: checks.MutatingWebhookConfiguration,
Object: &config.ObjectMeta,
Owners: config.ObjectMeta.GetOwnerReferences(),
}
diagnostics = append(diagnostics, d)

// We don't want to produce diagnostics for multiple webhooks in the
// same webhook configuration, so break out of the inner loop if we
// get here.
break
}
}
return diagnostics, nil
Expand Down
Loading

0 comments on commit 56e3306

Please sign in to comment.