Skip to content

Commit

Permalink
Add Kubernetes health checks to the HBS (#80)
Browse files Browse the repository at this point in the history
* Add k8s api server check

* Add testing

* Undo dockerfile changes

* Add more testing

* Test isInMaintenance

* Review comments

* Add tests without k8s

* Add more metrics

* Add metrics test

* Use context.WithTimeout

* Add k8s unhealthy test
  • Loading branch information
cristinaleonr authored Aug 29, 2022
1 parent 2eb6a20 commit 5a62379
Show file tree
Hide file tree
Showing 14 changed files with 662 additions and 41 deletions.
4 changes: 3 additions & 1 deletion cmd/heartbeat/Dockerfile.heartbeat
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
FROM golang:1.18.3-alpine3.16 AS build
RUN apk add git
ADD . /go/src/github.com/m-lab/locate
RUN go install -v github.com/m-lab/locate/cmd/heartbeat@latest
RUN go install -v \
-ldflags "-X github.com/m-lab/go/prometheusx.GitShortCommit=$(git log -1 --format=%h)" \
github.com/m-lab/locate/cmd/heartbeat

# Now copy the resulting command into the minimal base image.
FROM alpine:3.16
Expand Down
26 changes: 21 additions & 5 deletions cmd/heartbeat/health/checker.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
package health

import (
"golang.org/x/net/context"
)

// Checker checks the health of a local experiment instance.
type Checker struct {
pp *PortProbe
pp *PortProbe
k8s *KubernetesClient
}

// NewChecker creates a new Checker.
Expand All @@ -12,10 +17,21 @@ func NewChecker(pp *PortProbe) *Checker {
}
}

// NewCheckerK8S creates a new Checker for Kubernetes deployments.
func NewCheckerK8S(pp *PortProbe, k8s *KubernetesClient) *Checker {
return &Checker{
pp: pp,
k8s: k8s,
}
}

// GetHealth combines a set of health checks into a single score.
func (hc *Checker) GetHealth() float64 {
if hc.pp.checkPorts() {
return 1
func (hc *Checker) GetHealth(ctx context.Context) float64 {
if !hc.pp.checkPorts() {
return 0
}
if hc.k8s != nil && !hc.k8s.isHealthy(ctx) {
return 0
}
return 0
return 1
}
113 changes: 100 additions & 13 deletions cmd/heartbeat/health/checker_test.go
Original file line number Diff line number Diff line change
@@ -1,33 +1,120 @@
package health

import "testing"
import (
"context"
"testing"

v1 "k8s.io/api/core/v1"
"k8s.io/client-go/kubernetes/fake"
)

func TestChecker_getHealth(t *testing.T) {
tests := []struct {
name string
pp *PortProbe
want float64
name string
checker *Checker
want float64
}{

{
name: "health-1",
pp: &PortProbe{},
checker: NewCheckerK8S(
&PortProbe{},
&KubernetesClient{
clientset: healthyClientset,
},
),
want: 1,
},
{
name: "health-1-k8s-nil",
checker: NewChecker(
&PortProbe{},
),
want: 1,
},
{
name: "ports-unhealthy",
checker: NewCheckerK8S(
&PortProbe{
ports: map[string]bool{"65536": true},
},
&KubernetesClient{
clientset: healthyClientset,
},
),
want: 0,
},
{
name: "kubernetes-call-fail",
checker: NewCheckerK8S(
&PortProbe{},
&KubernetesClient{
clientset: fake.NewSimpleClientset(),
},
),
want: 1,
},
{
name: "health-0",
pp: &PortProbe{
ports: map[string]bool{"65536": true},
},
name: "kubernetes-unhealthy",
checker: NewCheckerK8S(
&PortProbe{},
&KubernetesClient{
clientset: fake.NewSimpleClientset(
&v1.Pod{
Status: v1.PodStatus{
Phase: "Pending",
},
},
&v1.Node{
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{Type: "Ready", Status: "False"},
},
},
},
),
},
),
want: 0,
},
{
name: "all-unhealthy",
checker: NewCheckerK8S(
&PortProbe{
ports: map[string]bool{"65536": true},
},
&KubernetesClient{
clientset: fake.NewSimpleClientset(
&v1.Pod{
Status: v1.PodStatus{
Phase: "Pending",
},
},
&v1.Node{
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{Type: "Ready", Status: "False"},
},
},
},
),
},
),
want: 0,
},
{
name: "all-unhealthy-k8s-nil",
checker: NewChecker(
&PortProbe{
ports: map[string]bool{"65536": true},
},
),
want: 0,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
hc := NewChecker(tt.pp)

got := hc.GetHealth()
got := tt.checker.GetHealth(context.Background())
if got != tt.want {
t.Errorf("Checker.GetHealth() = %v, want %v", got, tt.want)
}
Expand Down
142 changes: 142 additions & 0 deletions cmd/heartbeat/health/kubernetes-client.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
package health

import (
"context"
"net/url"
"path"
"strconv"
"time"

"github.com/m-lab/go/rtx"
"github.com/m-lab/locate/metrics"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"
"k8s.io/client-go/tools/clientcmd/api"
)

// KubernetesClient manages requests to the Kubernetes API server.
type KubernetesClient struct {
pod string
node string
namespace string
clientset kubernetes.Interface
}

// MustNewKubernetesClient creates a new KubenernetesClient instance.
// If the client cannot be instantiated, the function will exit.
func MustNewKubernetesClient(url *url.URL, pod, node, namespace, auth string) *KubernetesClient {
defConfig := getDefaultClientConfig(url, auth)
restConfig, err := defConfig.ClientConfig()
rtx.Must(err, "failed to create kubernetes config")

clientset, err := kubernetes.NewForConfig(restConfig)
rtx.Must(err, "failed to create kubernetes clientset")

client := &KubernetesClient{
pod: pod,
node: node,
namespace: namespace,
clientset: clientset,
}
return client
}

func getDefaultClientConfig(url *url.URL, auth string) clientcmd.ClientConfig {
// This is a low-level structure normally created from parsing a kubeconfig
// file. Since we know all values we can create the client object directly.
//
// The cluster and user names serve only to define a context that
// associates login credentials with a specific cluster.
clusterClient := api.Config{
Clusters: map[string]*api.Cluster{
// Define the cluster address and CA Certificate.
"cluster": {
Server: url.String(),
InsecureSkipTLSVerify: false, // Require a valid CA Certificate.
CertificateAuthority: path.Join(auth, "ca.crt"),
},
},
AuthInfos: map[string]*api.AuthInfo{
// Define the user credentials for access to the API.
"user": {
TokenFile: path.Join(auth, "token"),
},
},
Contexts: map[string]*api.Context{
// Define a context that refers to the above cluster and user.
"cluster-user": {
Cluster: "cluster",
AuthInfo: "user",
},
},
// Use the above context.
CurrentContext: "cluster-user",
}

defConfig := clientcmd.NewDefaultClientConfig(
clusterClient,
&clientcmd.ConfigOverrides{
ClusterInfo: api.Cluster{Server: ""},
},
)

return defConfig
}

// isHealthy returns true if it can determine the following conditions are true:
// - The Pod's status is "Running"
// - The Node's Ready condition is "True"
// - The Node does not have a "lame-duck" taint
//
// OR if it cannot contact the API Server to make a determination.
func (c *KubernetesClient) isHealthy(ctx context.Context) bool {
start := time.Now()
isHealthy := c.isPodRunning(ctx) && c.isNodeReady(ctx)
metrics.KubernetesRequestTimeHistogram.WithLabelValues(strconv.FormatBool(isHealthy)).Observe(time.Since(start).Seconds())
return isHealthy
}

func (c *KubernetesClient) isPodRunning(ctx context.Context) bool {
pod, err := c.clientset.CoreV1().Pods(c.namespace).Get(ctx, c.pod, metav1.GetOptions{})
if err != nil {
metrics.KubernetesRequestsTotal.WithLabelValues(err.Error()).Inc()
return true
}

metrics.KubernetesRequestsTotal.WithLabelValues("OK").Inc()
return pod.Status.Phase == "Running"
}

// isNodeReady returns true if it can determine the following conditions are true:
// - The Node's Ready condition is "True"
// - The Node does not have a "lame-duck" taint
//
// OR if it cannot contact the API Server to make a determination.
func (c *KubernetesClient) isNodeReady(ctx context.Context) bool {
node, err := c.clientset.CoreV1().Nodes().Get(ctx, c.node, metav1.GetOptions{})
if err != nil {
metrics.KubernetesRequestsTotal.WithLabelValues(err.Error()).Inc()
return true
}

metrics.KubernetesRequestsTotal.WithLabelValues("OK").Inc()
for _, condition := range node.Status.Conditions {
if condition.Type == "Ready" && condition.Status == "True" {
return !isInMaintenance(node)
}
}

return false
}

func isInMaintenance(node *v1.Node) bool {
for _, taint := range node.Spec.Taints {
if taint.Key == "lame-duck" {
return true
}
}

return false
}
Loading

0 comments on commit 5a62379

Please sign in to comment.