Skip to content

Commit d1409b3

Browse files
authored
feat: CNS checks apiserver in healthz (#3269)
* feat: CNS checks apiserver in healthz * chore: only check NNCs if `ChannelMode` is `CRD` not every instance of CNS will need (or can) check NNCs. The `CRD` channel mode is used by AKS to indicate that CNS will be reading/watching NNCs. `AzureHost` is a newer mode that's used in nodesubnet where NNCs aren't used and therefore CNS has no reason to have its health depend on NNC access. * test: add unit tests * refactor: return error from NewHealthzHandlerWithChecks instead of panicking * chore: address lint errors * refactor: only get kubeConfig when in CRD mode * chore: fix lint errors
1 parent 450ec63 commit d1409b3

File tree

3 files changed

+358
-1
lines changed

3 files changed

+358
-1
lines changed

cns/healthserver/healthz.go

+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
package healthserver
2+
3+
import (
4+
"net/http"
5+
6+
"github.com/Azure/azure-container-networking/cns"
7+
"github.com/Azure/azure-container-networking/cns/configuration"
8+
"github.com/Azure/azure-container-networking/crd/nodenetworkconfig/api/v1alpha"
9+
"github.com/pkg/errors"
10+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
11+
"k8s.io/apimachinery/pkg/runtime"
12+
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
13+
ctrl "sigs.k8s.io/controller-runtime"
14+
"sigs.k8s.io/controller-runtime/pkg/client"
15+
"sigs.k8s.io/controller-runtime/pkg/healthz"
16+
)
17+
18+
var scheme = runtime.NewScheme()
19+
20+
func init() {
21+
utilruntime.Must(v1alpha.AddToScheme(scheme))
22+
}
23+
24+
// NewHealthzHandlerWithChecks will return a [http.Handler] for CNS's /healthz endpoint.
25+
// Depending on what we expect CNS to be able to read (based on the [configuration.CNSConfig])
26+
// then the checks registered to the handler will test for those expectations. For example, in
27+
// ChannelMode: CRD, the health check will ensure that CNS is able to list NNCs successfully.
28+
func NewHealthzHandlerWithChecks(cnsConfig *configuration.CNSConfig) (http.Handler, error) {
29+
checks := make(map[string]healthz.Checker)
30+
if cnsConfig.ChannelMode == cns.CRD {
31+
cfg, err := ctrl.GetConfig()
32+
if err != nil {
33+
return nil, errors.Wrap(err, "failed to get kubeconfig")
34+
}
35+
cli, err := client.New(cfg, client.Options{
36+
Scheme: scheme,
37+
})
38+
if err != nil {
39+
return nil, errors.Wrap(err, "failed to build client")
40+
}
41+
42+
checks["nnc"] = func(req *http.Request) error {
43+
ctx := req.Context()
44+
// we just care that we're allowed to List NNCs so set limit to 1 to minimize
45+
// additional load on apiserver
46+
if err := cli.List(ctx, &v1alpha.NodeNetworkConfigList{}, &client.ListOptions{
47+
Namespace: metav1.NamespaceSystem,
48+
Limit: int64(1),
49+
}); err != nil {
50+
return errors.Wrap(err, "failed to list NodeNetworkConfig")
51+
}
52+
return nil
53+
}
54+
}
55+
56+
// strip prefix so that it runs through all checks registered on the handler.
57+
// otherwise it will look for a check named "healthz" and return a 404 if not there.
58+
return http.StripPrefix("/healthz", &healthz.Handler{
59+
Checks: checks,
60+
}), nil
61+
}

cns/healthserver/healthz_test.go

+290
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,290 @@
1+
package healthserver
2+
3+
import (
4+
"fmt"
5+
"net/http"
6+
"net/http/httptest"
7+
"os"
8+
"testing"
9+
10+
"github.com/Azure/azure-container-networking/cns/configuration"
11+
"github.com/stretchr/testify/require"
12+
)
13+
14+
const nncCRD = `{
15+
"kind": "APIResourceList",
16+
"apiVersion": "v1",
17+
"groupVersion": "acn.azure.com/v1alpha",
18+
"resources": [
19+
{
20+
"name": "nodenetworkconfigs",
21+
"singularName": "nodenetworkconfig",
22+
"namespaced": true,
23+
"kind": "NodeNetworkConfig",
24+
"verbs": [
25+
"delete",
26+
"deletecollection",
27+
"get",
28+
"list",
29+
"patch",
30+
"create",
31+
"update",
32+
"watch"
33+
],
34+
"shortNames": [
35+
"nnc"
36+
],
37+
"storageVersionHash": "aGVsbG93cmxk"
38+
},
39+
{
40+
"name": "nodenetworkconfigs/status",
41+
"singularName": "",
42+
"namespaced": true,
43+
"kind": "NodeNetworkConfig",
44+
"verbs": [
45+
"get",
46+
"patch",
47+
"update"
48+
]
49+
}
50+
]
51+
}`
52+
53+
const nncResult = `{
54+
"apiVersion": "acn.azure.com/v1alpha",
55+
"items": [
56+
{
57+
"apiVersion": "acn.azure.com/v1alpha",
58+
"kind": "NodeNetworkConfig",
59+
"metadata": {
60+
"creationTimestamp": "2024-12-04T20:42:17Z",
61+
"finalizers": [
62+
"finalizers.acn.azure.com/dnc-operations"
63+
],
64+
"generation": 1,
65+
"labels": {
66+
"kubernetes.azure.com/podnetwork-delegationguid": "",
67+
"kubernetes.azure.com/podnetwork-subnet": "",
68+
"kubernetes.azure.com/podnetwork-type": "overlay",
69+
"managed": "true",
70+
"owner": "aks-nodepool1-1234567-vmss000000"
71+
},
72+
"managedFields": [
73+
{
74+
"apiVersion": "acn.azure.com/v1alpha",
75+
"fieldsType": "FieldsV1",
76+
"fieldsV1": {
77+
"f:metadata": {
78+
"f:finalizers": {
79+
".": {},
80+
"v:\"finalizers.acn.azure.com/dnc-operations\"": {}
81+
},
82+
"f:labels": {
83+
".": {},
84+
"f:kubernetes.azure.com/podnetwork-delegationguid": {},
85+
"f:kubernetes.azure.com/podnetwork-subnet": {},
86+
"f:kubernetes.azure.com/podnetwork-type": {},
87+
"f:managed": {},
88+
"f:owner": {}
89+
},
90+
"f:ownerReferences": {
91+
".": {},
92+
"k:{\"uid\":\"f5117020-bbc5-11ef-8433-1b9e59caeb1d\"}": {}
93+
}
94+
},
95+
"f:spec": {
96+
".": {},
97+
"f:requestedIPCount": {}
98+
}
99+
},
100+
"manager": "dnc-rc",
101+
"operation": "Update",
102+
"time": "2024-12-04T20:42:17Z"
103+
},
104+
{
105+
"apiVersion": "acn.azure.com/v1alpha",
106+
"fieldsType": "FieldsV1",
107+
"fieldsV1": {
108+
"f:status": {
109+
".": {},
110+
"f:assignedIPCount": {},
111+
"f:networkContainers": {}
112+
}
113+
},
114+
"manager": "dnc-rc",
115+
"operation": "Update",
116+
"subresource": "status",
117+
"time": "2024-12-04T20:42:18Z"
118+
}
119+
],
120+
"name": "aks-nodepool1-1234567-vmss000000",
121+
"namespace": "kube-system",
122+
"ownerReferences": [
123+
{
124+
"apiVersion": "v1",
125+
"blockOwnerDeletion": true,
126+
"controller": true,
127+
"kind": "Node",
128+
"name": "aks-nodepool1-1234567-vmss000000",
129+
"uid": "02df1fcc-bbc6-11ef-a76a-4b1af8d399a2"
130+
}
131+
],
132+
"resourceVersion": "123456789",
133+
"uid": "0dc75e5e-bbc6-11ef-878f-ab45432262d6"
134+
},
135+
"spec": {
136+
"requestedIPCount": 0
137+
},
138+
"status": {
139+
"assignedIPCount": 256,
140+
"networkContainers": [
141+
{
142+
"assignmentMode": "static",
143+
"id": "13f630c0-bbc6-11ef-b3b7-bb8e46de5973",
144+
"nodeIP": "10.224.0.4",
145+
"primaryIP": "10.244.2.0/24",
146+
"subnetAddressSpace": "10.244.0.0/16",
147+
"subnetName": "routingdomain_1f7eb6ba-bbc6-11ef-8c54-7b2c1e3cbbe4_overlaysubnet",
148+
"type": "overlay",
149+
"version": 0
150+
}
151+
]
152+
}
153+
}
154+
],
155+
"kind": "NodeNetworkConfigList",
156+
"metadata": {
157+
"continue": "",
158+
"resourceVersion": "9876543210"
159+
}
160+
}`
161+
162+
func TestNewHealthzHandlerWithChecks(t *testing.T) {
163+
tests := []struct {
164+
name string
165+
cnsConfig *configuration.CNSConfig
166+
apiStatusCode int
167+
expectedHealthy bool
168+
}{
169+
{
170+
name: "list NNC gives 200 should indicate healthy",
171+
cnsConfig: &configuration.CNSConfig{
172+
ChannelMode: "CRD",
173+
},
174+
apiStatusCode: http.StatusOK,
175+
expectedHealthy: true,
176+
},
177+
{
178+
name: "unauthorized (401) from apiserver should be unhealthy",
179+
cnsConfig: &configuration.CNSConfig{
180+
ChannelMode: "CRD",
181+
},
182+
apiStatusCode: http.StatusUnauthorized,
183+
expectedHealthy: false,
184+
},
185+
{
186+
name: "channel nodesubnet should not call apiserver so it doesn't matter if the status code is a 401",
187+
cnsConfig: &configuration.CNSConfig{
188+
ChannelMode: "AzureHost",
189+
},
190+
apiStatusCode: http.StatusUnauthorized,
191+
expectedHealthy: true,
192+
},
193+
}
194+
195+
for _, tt := range tests {
196+
t.Run(tt.name, func(t *testing.T) {
197+
configureLocalAPIServer(t, tt.apiStatusCode)
198+
199+
responseRecorder := httptest.NewRecorder()
200+
healthHandler, err := NewHealthzHandlerWithChecks(tt.cnsConfig)
201+
require.NoError(t, err)
202+
203+
healthHandler.ServeHTTP(responseRecorder, httptest.NewRequest("GET", "/healthz", http.NoBody))
204+
205+
require.Equal(t, tt.expectedHealthy, responseRecorder.Code == http.StatusOK)
206+
})
207+
}
208+
}
209+
210+
func configureLocalAPIServer(t *testing.T, expectedNNCStatusCode int) {
211+
// setup apiserver
212+
server := setupMockAPIServer(expectedNNCStatusCode)
213+
214+
// write kubeConfig for test server
215+
kubeConfigFile, err := writeTmpKubeConfig(server.URL)
216+
require.NoError(t, err)
217+
218+
// set env var to kubeconfig
219+
os.Setenv("KUBECONFIG", kubeConfigFile)
220+
221+
t.Cleanup(func() {
222+
server.Close()
223+
os.Remove(kubeConfigFile)
224+
os.Unsetenv("KUBECONFIG")
225+
})
226+
}
227+
228+
func writeTmpKubeConfig(host string) (string, error) {
229+
tempKubeConfig := `
230+
apiVersion: v1
231+
clusters:
232+
- cluster:
233+
server: ` + host + `
234+
name: test-cluster
235+
contexts:
236+
- context:
237+
cluster: test-cluster
238+
user: test-user
239+
name: test-context
240+
current-context: test-context
241+
kind: Config
242+
preferences: {}
243+
users:
244+
- name: test-user
245+
user:
246+
token: test-token
247+
`
248+
kubeConfigFile, err := os.CreateTemp("", "kubeconfig")
249+
if err != nil {
250+
return "", fmt.Errorf("failed to create temp kubeconfig file: %w", err)
251+
}
252+
253+
_, err = kubeConfigFile.WriteString(tempKubeConfig)
254+
if err != nil {
255+
return "", fmt.Errorf("failed to write kubeconfig to temp file: %w", err)
256+
}
257+
kubeConfigFile.Close()
258+
return kubeConfigFile.Name(), nil
259+
}
260+
261+
func setupMockAPIServer(code int) *httptest.Server {
262+
// Start a mock HTTP server
263+
mockServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
264+
// Handle requests based on the path
265+
switch r.URL.Path {
266+
case "/apis/acn.azure.com/v1alpha":
267+
_, err := w.Write([]byte(nncCRD))
268+
if err != nil {
269+
http.Error(w, err.Error(), http.StatusInternalServerError)
270+
return
271+
}
272+
case "/apis/acn.azure.com/v1alpha/namespaces/kube-system/nodenetworkconfigs":
273+
if code == http.StatusOK {
274+
w.Header().Set("Cache-Control", "no-cache, private")
275+
w.Header().Set("Content-Type", "application/json")
276+
_, err := w.Write([]byte(nncResult))
277+
if err != nil {
278+
http.Error(w, err.Error(), http.StatusInternalServerError)
279+
return
280+
}
281+
} else {
282+
w.WriteHeader(code)
283+
}
284+
default:
285+
w.WriteHeader(http.StatusNotFound)
286+
}
287+
}))
288+
289+
return mockServer
290+
}

cns/service/main.go

+7-1
Original file line numberDiff line numberDiff line change
@@ -649,7 +649,13 @@ func main() {
649649
return nil
650650
}),
651651
}
652-
go healthserver.Start(z, cnsconfig.MetricsBindAddress, &healthz.Handler{}, readyChecker)
652+
653+
healthzHandler, err := healthserver.NewHealthzHandlerWithChecks(cnsconfig)
654+
if err != nil {
655+
logger.Errorf("unable to initialize a healthz handler: %v", err)
656+
return
657+
}
658+
go healthserver.Start(z, cnsconfig.MetricsBindAddress, healthzHandler, readyChecker)
653659

654660
nmaConfig, err := nmagent.NewConfig(cnsconfig.WireserverIP)
655661
if err != nil {

0 commit comments

Comments
 (0)