[HPA] Properly update HPA when an additional metric is added to the s…

…pec (#1462) * remove patch logic and replace with update request * test an update that adds another metric * minor fixes * rename chloggen * split tests up by version * use patch again instead of update * add error check * address review feedback * delete commented line * fix chloggen description * fix lint * simplify the logic - always replace the existing metrics with desired * fix lint * add to e2e tests * fix e2e bug * address review feedback * decrease stabilization window * cleanup * simplify e2e
open-telemetry · Mar 1, 2023 · 278a55e · 278a55e
1 parent d03785f
commit 278a55e
Show file tree

Hide file tree

Showing 11 changed files with 329 additions and 53 deletions.
diff --git a/.chloggen/1439-fix-HPA-updates.yaml b/.chloggen/1439-fix-HPA-updates.yaml
@@ -0,0 +1,17 @@
+# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
+change_type: bug_fix
+
+# The name of the component, or a single word describing the area of concern, (e.g. operator, target allocator, github action)
+component: Autoscaler
+
+# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
+note: Fix the issue where HPA fails to update when an additional metric is added to the spec.
+
+# One or more tracking issues related to the change
+issues:
+  - 1439
+
+# (Optional) One or more lines of additional information to render under the primary note.
+# These lines will be padded with 2 spaces and then inserted directly into the document.
+# Use pipe (|) for multiline entries.
+subtext:
diff --git a/pkg/collector/reconcile/horizontalpodautoscaler.go b/pkg/collector/reconcile/horizontalpodautoscaler.go
@@ -20,7 +20,6 @@ import (
 
 	autoscalingv2 "k8s.io/api/autoscaling/v2"
 	autoscalingv2beta2 "k8s.io/api/autoscaling/v2beta2"
-	corev1 "k8s.io/api/core/v1"
 	k8serrors "k8s.io/apimachinery/pkg/api/errors"
 	"k8s.io/apimachinery/pkg/api/meta"
 	"k8s.io/apimachinery/pkg/types"
@@ -85,7 +84,7 @@ func expectedHorizontalPodAutoscalers(ctx context.Context, params Params, expect
 
 		updated := existing.DeepCopyObject().(client.Object)
 		updated.SetOwnerReferences(desired.GetOwnerReferences())
-		setAutoscalerSpec(params, autoscalingVersion, updated)
+		setAutoscalerSpec(params, autoscalingVersion, updated, obj)
 
 		annotations := updated.GetAnnotations()
 		for k, v := range desired.GetAnnotations() {
@@ -110,7 +109,7 @@ func expectedHorizontalPodAutoscalers(ctx context.Context, params Params, expect
 	return nil
 }
 
-func setAutoscalerSpec(params Params, autoscalingVersion autodetect.AutoscalingVersion, updated client.Object) {
+func setAutoscalerSpec(params Params, autoscalingVersion autodetect.AutoscalingVersion, updated client.Object, desired client.Object) {
 	one := int32(1)
 	if params.Instance.Spec.Autoscaler.MaxReplicas != nil {
 		if autoscalingVersion == autodetect.AutoscalingVersionV2Beta2 {
@@ -120,23 +119,19 @@ func setAutoscalerSpec(params Params, autoscalingVersion autodetect.AutoscalingV
 			} else {
 				updated.(*autoscalingv2beta2.HorizontalPodAutoscaler).Spec.MinReplicas = &one
 			}
-			updated.(*autoscalingv2beta2.HorizontalPodAutoscaler).Spec.Metrics[0].Resource.Target.AverageUtilization = params.Instance.Spec.Autoscaler.TargetCPUUtilization
-		} else {
+
+			desiredMetrics := desired.(*autoscalingv2beta2.HorizontalPodAutoscaler).Spec.Metrics
+			updated.(*autoscalingv2beta2.HorizontalPodAutoscaler).Spec.Metrics = desiredMetrics
+		} else { // autoscalingv2
 			updated.(*autoscalingv2.HorizontalPodAutoscaler).Spec.MaxReplicas = *params.Instance.Spec.Autoscaler.MaxReplicas
 			if params.Instance.Spec.Autoscaler.MinReplicas != nil {
 				updated.(*autoscalingv2.HorizontalPodAutoscaler).Spec.MinReplicas = params.Instance.Spec.Autoscaler.MinReplicas
 			} else {
 				updated.(*autoscalingv2.HorizontalPodAutoscaler).Spec.MinReplicas = &one
 			}
 
-			// This will update memory and CPU usage for now, and can be used to update other metrics in the future
-			for _, metric := range updated.(*autoscalingv2.HorizontalPodAutoscaler).Spec.Metrics {
-				if metric.Resource.Name == corev1.ResourceCPU {
-					metric.Resource.Target.AverageUtilization = params.Instance.Spec.Autoscaler.TargetCPUUtilization
-				} else if metric.Resource.Name == corev1.ResourceMemory {
-					metric.Resource.Target.AverageUtilization = params.Instance.Spec.Autoscaler.TargetMemoryUtilization
-				}
-			}
+			desiredMetrics := desired.(*autoscalingv2.HorizontalPodAutoscaler).Spec.Metrics
+			updated.(*autoscalingv2.HorizontalPodAutoscaler).Spec.Metrics = desiredMetrics
 		}
 	}
 }

diff --git a/pkg/collector/reconcile/horizontalpodautoscaler_test.go b/pkg/collector/reconcile/horizontalpodautoscaler_test.go
@@ -21,6 +21,7 @@ import (
 	"testing"
 
 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 	v1 "k8s.io/api/apps/v1"
 	autoscalingv2 "k8s.io/api/autoscaling/v2"
 	autoscalingv2beta2 "k8s.io/api/autoscaling/v2beta2"
@@ -40,56 +41,114 @@ import (
 var hpaUpdateErr error
 var withHPA bool
 
-func TestExpectedHPA(t *testing.T) {
+func TestExpectedHPAVersionV2Beta2(t *testing.T) {
 	params := paramsWithHPA(autodetect.AutoscalingVersionV2Beta2)
 	err := params.Config.AutoDetect()
 	assert.NoError(t, err)
-	autoscalingVersion := params.Config.AutoscalingVersion()
 
 	expectedHPA := collector.HorizontalPodAutoscaler(params.Config, logger, params.Instance)
 	t.Run("should create HPA", func(t *testing.T) {
 		err = expectedHorizontalPodAutoscalers(context.Background(), params, []client.Object{expectedHPA})
 		assert.NoError(t, err)
 
-		exists, hpaErr := populateObjectIfExists(t, &autoscalingv2beta2.HorizontalPodAutoscaler{}, types.NamespacedName{Namespace: "default", Name: "test-collector"})
+		actual := autoscalingv2beta2.HorizontalPodAutoscaler{}
+		exists, hpaErr := populateObjectIfExists(t, &actual, types.NamespacedName{Namespace: "default", Name: "test-collector"})
 		assert.NoError(t, hpaErr)
+		require.Len(t, actual.Spec.Metrics, 1)
+		assert.Equal(t, int32(90), *actual.Spec.Metrics[0].Resource.Target.AverageUtilization)
+
 		assert.True(t, exists)
 	})
 
 	t.Run("should update HPA", func(t *testing.T) {
 		minReplicas := int32(1)
 		maxReplicas := int32(3)
+		memUtilization := int32(70)
 		updateParms := paramsWithHPA(autodetect.AutoscalingVersionV2Beta2)
 		updateParms.Instance.Spec.Autoscaler.MinReplicas = &minReplicas
 		updateParms.Instance.Spec.Autoscaler.MaxReplicas = &maxReplicas
+		updateParms.Instance.Spec.Autoscaler.TargetMemoryUtilization = &memUtilization
+		updatedHPA := collector.HorizontalPodAutoscaler(updateParms.Config, logger, updateParms.Instance)
+
+		hpaUpdateErr = expectedHorizontalPodAutoscalers(context.Background(), updateParms, []client.Object{updatedHPA})
+		require.NoError(t, hpaUpdateErr)
+
+		actual := autoscalingv2beta2.HorizontalPodAutoscaler{}
+		withHPA, hpaUpdateErr = populateObjectIfExists(t, &actual, types.NamespacedName{Namespace: "default", Name: "test-collector"})
+
+		assert.NoError(t, hpaUpdateErr)
+		assert.True(t, withHPA)
+		assert.Equal(t, int32(1), *actual.Spec.MinReplicas)
+		assert.Equal(t, int32(3), actual.Spec.MaxReplicas)
+		assert.Len(t, actual.Spec.Metrics, 2)
+
+		// check metric values
+		for _, metric := range actual.Spec.Metrics {
+			if metric.Resource.Name == corev1.ResourceCPU {
+				assert.Equal(t, int32(90), *metric.Resource.Target.AverageUtilization)
+			} else if metric.Resource.Name == corev1.ResourceMemory {
+				assert.Equal(t, int32(70), *metric.Resource.Target.AverageUtilization)
+			}
+		}
+	})
+
+	t.Run("should delete HPA", func(t *testing.T) {
+		err = deleteHorizontalPodAutoscalers(context.Background(), params, []client.Object{expectedHPA})
+		assert.NoError(t, err)
+
+		actual := v1.Deployment{}
+		exists, _ := populateObjectIfExists(t, &actual, types.NamespacedName{Namespace: "default", Name: "test-collecto"})
+		assert.False(t, exists)
+	})
+}
+
+func TestExpectedHPAVersionV2(t *testing.T) {
+	params := paramsWithHPA(autodetect.AutoscalingVersionV2)
+	err := params.Config.AutoDetect()
+	assert.NoError(t, err)
+
+	expectedHPA := collector.HorizontalPodAutoscaler(params.Config, logger, params.Instance)
+	t.Run("should create HPA", func(t *testing.T) {
+		err = expectedHorizontalPodAutoscalers(context.Background(), params, []client.Object{expectedHPA})
+		assert.NoError(t, err)
+
+		actual := autoscalingv2.HorizontalPodAutoscaler{}
+		exists, hpaErr := populateObjectIfExists(t, &actual, types.NamespacedName{Namespace: "default", Name: "test-collector"})
+		assert.NoError(t, hpaErr)
+		require.Len(t, actual.Spec.Metrics, 1)
+		assert.Equal(t, int32(90), *actual.Spec.Metrics[0].Resource.Target.AverageUtilization)
+
+		assert.True(t, exists)
+	})
+
+	t.Run("should update HPA", func(t *testing.T) {
+		minReplicas := int32(1)
+		maxReplicas := int32(3)
+		memUtilization := int32(70)
+		updateParms := paramsWithHPA(autodetect.AutoscalingVersionV2)
+		updateParms.Instance.Spec.Autoscaler.MinReplicas = &minReplicas
+		updateParms.Instance.Spec.Autoscaler.MaxReplicas = &maxReplicas
+		updateParms.Instance.Spec.Autoscaler.TargetMemoryUtilization = &memUtilization
 		updatedHPA := collector.HorizontalPodAutoscaler(updateParms.Config, logger, updateParms.Instance)
 
-		if autoscalingVersion == autodetect.AutoscalingVersionV2Beta2 {
-			updatedAutoscaler := *updatedHPA.(*autoscalingv2beta2.HorizontalPodAutoscaler)
-			createObjectIfNotExists(t, "test-collector", &updatedAutoscaler)
-			hpaUpdateErr = expectedHorizontalPodAutoscalers(context.Background(), updateParms, []client.Object{updatedHPA})
-			assert.NoError(t, hpaUpdateErr)
-
-			actual := autoscalingv2beta2.HorizontalPodAutoscaler{}
-			withHPA, hpaUpdateErr = populateObjectIfExists(t, &actual, types.NamespacedName{Namespace: "default", Name: "test-collector"})
-
-			assert.NoError(t, hpaUpdateErr)
-			assert.True(t, withHPA)
-			assert.Equal(t, int32(1), *actual.Spec.MinReplicas)
-			assert.Equal(t, int32(3), actual.Spec.MaxReplicas)
-		} else {
-			updatedAutoscaler := *updatedHPA.(*autoscalingv2.HorizontalPodAutoscaler)
-			createObjectIfNotExists(t, "test-collector", &updatedAutoscaler)
-			hpaUpdateErr = expectedHorizontalPodAutoscalers(context.Background(), updateParms, []client.Object{updatedHPA})
-			assert.NoError(t, hpaUpdateErr)
-
-			actual := autoscalingv2.HorizontalPodAutoscaler{}
-			withHPA, hpaUpdateErr := populateObjectIfExists(t, &actual, types.NamespacedName{Namespace: "default", Name: "test-collector"})
-
-			assert.NoError(t, hpaUpdateErr)
-			assert.True(t, withHPA)
-			assert.Equal(t, int32(1), *actual.Spec.MinReplicas)
-			assert.Equal(t, int32(3), actual.Spec.MaxReplicas)
+		hpaUpdateErr = expectedHorizontalPodAutoscalers(context.Background(), updateParms, []client.Object{updatedHPA})
+		require.NoError(t, hpaUpdateErr)
+
+		actual := autoscalingv2.HorizontalPodAutoscaler{}
+		withHPA, hpaUpdateErr := populateObjectIfExists(t, &actual, types.NamespacedName{Namespace: "default", Name: "test-collector"})
+
+		assert.NoError(t, hpaUpdateErr)
+		assert.True(t, withHPA)
+		assert.Equal(t, int32(1), *actual.Spec.MinReplicas)
+		assert.Equal(t, int32(3), actual.Spec.MaxReplicas)
+		assert.Len(t, actual.Spec.Metrics, 2)
+		// check metric values
+		for _, metric := range actual.Spec.Metrics {
+			if metric.Resource.Name == corev1.ResourceCPU {
+				assert.Equal(t, int32(90), *metric.Resource.Target.AverageUtilization)
+			} else if metric.Resource.Name == corev1.ResourceMemory {
+				assert.Equal(t, int32(70), *metric.Resource.Target.AverageUtilization)
+			}
 		}
 	})
 

diff --git a/tests/e2e/autoscale/00-install.yaml b/tests/e2e/autoscale/00-install.yaml
@@ -1,6 +1,8 @@
 # This creates two different deployments:
-#   * The first one will be used to see if we scale properly
-#   * The second is to check the targetCPUUtilization option
+#   * The first one is to ensure an autoscaler is created without
+#     setting any metrics in the Spec.
+#   * The second is to check that scaling works properly and that
+#     spec updates are working as expected.
 #
 apiVersion: opentelemetry.io/v1alpha1
 kind: OpenTelemetryCollector
@@ -53,6 +55,9 @@ spec:
   maxReplicas: 2
   autoscaler:
     targetCPUUtilization: 50
+    behavior:
+      scaleDown:
+        stabilizationWindowSeconds: 15
   resources:
     limits:
       cpu: 500m

diff --git a/tests/e2e/autoscale/02-install.yaml b/tests/e2e/autoscale/02-install.yaml
@@ -0,0 +1,40 @@
+# This updates an existing deployment with a new Spec.
+# Target memory utilization is now added. 
+apiVersion: opentelemetry.io/v1alpha1
+kind: OpenTelemetryCollector
+metadata:
+  name: simplest-set-utilization
+spec:
+  minReplicas: 1
+  maxReplicas: 2
+  autoscaler:
+    targetCPUUtilization: 50
+    targetMemoryUtilization: 70
+    behavior:
+      scaleDown:
+        stabilizationWindowSeconds: 15
+  resources:
+    limits:
+      cpu: 500m
+      memory: 128Mi
+    requests:
+      cpu: 5m
+      memory: 64Mi
+
+  config: |
+    receivers:
+      otlp:
+        protocols:
+          grpc:
+          http:
+    processors:
+
+    exporters:
+      logging:
+
+    service:
+      pipelines:
+        traces:
+          receivers: [otlp]
+          processors: []
+          exporters: [logging]
diff --git a/tests/e2e/autoscale/03-assert.yaml b/tests/e2e/autoscale/03-assert.yaml
@@ -1,8 +1,7 @@
-# Wait until tracegen has completed and the simplest deployment has scaled up to 2
 apiVersion: opentelemetry.io/v1alpha1
 kind: OpenTelemetryCollector
 metadata:
-  name: simplest
+  name: simplest-set-utilization
 status:
   scale:
     replicas: 2
diff --git a/tests/e2e/autoscale/03-install.yaml b/tests/e2e/autoscale/03-install.yaml
@@ -1,7 +1,7 @@
 apiVersion: batch/v1
 kind: Job
 metadata:
-  name: tracegen
+  name: tracegen-set-utilization
 spec:
   template:
     spec:
@@ -11,11 +11,11 @@ spec:
           command:
             - "./tracegen"
           args:
-            - -otlp-endpoint=simplest-collector-headless:4317
+            - -otlp-endpoint=simplest-set-utilization-collector-headless:4317
             - -otlp-insecure
             # High duration to ensure the trace creation doesn't stop.
             # It'll be stopped in step 4
             - -duration=1m
             - -workers=20
       restartPolicy: Never
-  backoffLimit: 4
+  backoffLimit: 4
diff --git a/tests/e2e/autoscale/03-verify-simplest-set-collector-hpa-metrics.yaml b/tests/e2e/autoscale/03-verify-simplest-set-collector-hpa-metrics.yaml
@@ -0,0 +1,4 @@
+apiVersion: kuttl.dev/v1beta1
+kind: TestStep
+commands:
+  - script: go run ./cmd/verify/wait-and-validate-metrics.go --cpu-value 50 --memory-value 70 --hpa simplest-set-utilization-collector
diff --git a/tests/e2e/autoscale/04-delete.yaml b/tests/e2e/autoscale/04-delete.yaml
@@ -3,5 +3,6 @@ kind: TestStep
 delete:
 - apiVersion: batch/v1
   kind: Job
+  propagationPolicy: Background
   metadata:
-    name: tracegen
+    name: tracegen-set-utilization
diff --git a/tests/e2e/autoscale/05-assert.yaml b/tests/e2e/autoscale/05-assert.yaml
@@ -1,8 +1,7 @@
-# Wait for the collector to scale back down to 1
 apiVersion: opentelemetry.io/v1alpha1
 kind: OpenTelemetryCollector
 metadata:
-  name: simplest
+  name: simplest-set-utilization
 status:
   scale:
-    replicas: 1
+    replicas: 1