From c1a42db5312ec2985a3b60d881ca9e7c0c1d7d47 Mon Sep 17 00:00:00 2001 From: Kam Saiyed Date: Thu, 7 Aug 2025 19:11:14 +0000 Subject: [PATCH 1/7] Introduce API changes and fetaure gate CPU startup boost --- .../crds/vpa-v1-crd-gen.yaml | 87 ++++++ .../deploy/vpa-v1-crd-gen.yaml | 87 ++++++ vertical-pod-autoscaler/docs/api.md | 39 +++ vertical-pod-autoscaler/docs/features.md | 62 ++++- vertical-pod-autoscaler/docs/flags.md | 6 +- .../resource/vpa/handler.go | 43 +++ .../resource/vpa/handler_test.go | 252 ++++++++++++++++++ .../pkg/apis/autoscaling.k8s.io/v1/types.go | 57 ++++ .../pkg/features/features.go | 6 + .../pkg/features/versioned_features.go | 3 + 10 files changed, 637 insertions(+), 5 deletions(-) diff --git a/vertical-pod-autoscaler/charts/vertical-pod-autoscaler/crds/vpa-v1-crd-gen.yaml b/vertical-pod-autoscaler/charts/vertical-pod-autoscaler/crds/vpa-v1-crd-gen.yaml index a3f8d0e7c319..c1472ea211bc 100644 --- a/vertical-pod-autoscaler/charts/vertical-pod-autoscaler/crds/vpa-v1-crd-gen.yaml +++ b/vertical-pod-autoscaler/charts/vertical-pod-autoscaler/crds/vpa-v1-crd-gen.yaml @@ -388,9 +388,96 @@ spec: when OOM is detected. pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ x-kubernetes-int-or-string: true + startupBoost: + description: |- + startupBoost specifies the startup boost policy for the container. + This overrides any pod-level startup boost policy. + The startup boost policy takes precedence over the rest of the fields in + this struct, except for ContainerName and ControlledValues. + properties: + cpu: + description: |- + cpu specifies the CPU startup boost policy. + If this field is not set, no startup boost is applied. + properties: + duration: + description: |- + duration indicates for how long to keep the pod boosted after it goes to Ready. + Defaults to 0s. + type: string + factor: + description: |- + factor specifies the factor to apply to the resource request. + This field is to be used only when Type is "Factor". + format: int32 + type: integer + quantity: + anyOf: + - type: integer + - type: string + description: |- + quantity specifies the absolute resource quantity to be used as the + resource request and limit during the boost phase. + This field is to be used only when Type is "Quantity". + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: |- + type specifies the kind of boost to apply. + Supported values are: "Factor", "Quantity". + enum: + - Factor + - Quantity + type: string + required: + - type + type: object + type: object type: object type: array type: object + startupBoost: + description: startupBoost specifies the startup boost policy for the + pod. + properties: + cpu: + description: |- + cpu specifies the CPU startup boost policy. + If this field is not set, no startup boost is applied. + properties: + duration: + description: |- + duration indicates for how long to keep the pod boosted after it goes to Ready. + Defaults to 0s. + type: string + factor: + description: |- + factor specifies the factor to apply to the resource request. + This field is to be used only when Type is "Factor". + format: int32 + type: integer + quantity: + anyOf: + - type: integer + - type: string + description: |- + quantity specifies the absolute resource quantity to be used as the + resource request and limit during the boost phase. + This field is to be used only when Type is "Quantity". + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: |- + type specifies the kind of boost to apply. + Supported values are: "Factor", "Quantity". + enum: + - Factor + - Quantity + type: string + required: + - type + type: object + type: object targetRef: description: |- TargetRef points to the controller managing the set of pods for the diff --git a/vertical-pod-autoscaler/deploy/vpa-v1-crd-gen.yaml b/vertical-pod-autoscaler/deploy/vpa-v1-crd-gen.yaml index a3f8d0e7c319..c1472ea211bc 100644 --- a/vertical-pod-autoscaler/deploy/vpa-v1-crd-gen.yaml +++ b/vertical-pod-autoscaler/deploy/vpa-v1-crd-gen.yaml @@ -388,9 +388,96 @@ spec: when OOM is detected. pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ x-kubernetes-int-or-string: true + startupBoost: + description: |- + startupBoost specifies the startup boost policy for the container. + This overrides any pod-level startup boost policy. + The startup boost policy takes precedence over the rest of the fields in + this struct, except for ContainerName and ControlledValues. + properties: + cpu: + description: |- + cpu specifies the CPU startup boost policy. + If this field is not set, no startup boost is applied. + properties: + duration: + description: |- + duration indicates for how long to keep the pod boosted after it goes to Ready. + Defaults to 0s. + type: string + factor: + description: |- + factor specifies the factor to apply to the resource request. + This field is to be used only when Type is "Factor". + format: int32 + type: integer + quantity: + anyOf: + - type: integer + - type: string + description: |- + quantity specifies the absolute resource quantity to be used as the + resource request and limit during the boost phase. + This field is to be used only when Type is "Quantity". + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: |- + type specifies the kind of boost to apply. + Supported values are: "Factor", "Quantity". + enum: + - Factor + - Quantity + type: string + required: + - type + type: object + type: object type: object type: array type: object + startupBoost: + description: startupBoost specifies the startup boost policy for the + pod. + properties: + cpu: + description: |- + cpu specifies the CPU startup boost policy. + If this field is not set, no startup boost is applied. + properties: + duration: + description: |- + duration indicates for how long to keep the pod boosted after it goes to Ready. + Defaults to 0s. + type: string + factor: + description: |- + factor specifies the factor to apply to the resource request. + This field is to be used only when Type is "Factor". + format: int32 + type: integer + quantity: + anyOf: + - type: integer + - type: string + description: |- + quantity specifies the absolute resource quantity to be used as the + resource request and limit during the boost phase. + This field is to be used only when Type is "Quantity". + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: |- + type specifies the kind of boost to apply. + Supported values are: "Factor", "Quantity". + enum: + - Factor + - Quantity + type: string + required: + - type + type: object + type: object targetRef: description: |- TargetRef points to the controller managing the set of pods for the diff --git a/vertical-pod-autoscaler/docs/api.md b/vertical-pod-autoscaler/docs/api.md index 874353cb1d76..8094ba3181c8 100644 --- a/vertical-pod-autoscaler/docs/api.md +++ b/vertical-pod-autoscaler/docs/api.md @@ -50,6 +50,7 @@ _Appears in:_ | `controlledValues` _[ContainerControlledValues](#containercontrolledvalues)_ | Specifies which resource values should be controlled.
The default is "RequestsAndLimits". | | Enum: [RequestsAndLimits RequestsOnly]
| | `oomBumpUpRatio` _[Quantity](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#quantity-resource-api)_ | oomBumpUpRatio is the ratio to increase memory when OOM is detected. | | | | `oomMinBumpUp` _[Quantity](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.32/#quantity-resource-api)_ | oomMinBumpUp is the minimum increase in memory when OOM is detected. | | | +| `startupBoost` _[StartupBoost](#startupboost)_ | startupBoost specifies the startup boost policy for the container.
This overrides any pod-level startup boost policy.
The startup boost policy takes precedence over the rest of the fields in
this struct, except for ContainerName and ControlledValues. | | | #### ContainerScalingMode @@ -107,6 +108,8 @@ _Appears in:_ | `changeRequirement` _[EvictionChangeRequirement](#evictionchangerequirement)_ | | | Enum: [TargetHigherThanRequests TargetLowerThanRequests]
| + + #### HistogramCheckpoint @@ -203,6 +206,41 @@ _Appears in:_ | `containerRecommendations` _[RecommendedContainerResources](#recommendedcontainerresources) array_ | Resources recommended by the autoscaler for each container. | | | +#### StartupBoost + + + +StartupBoost defines the startup boost policy. + + + +_Appears in:_ +- [ContainerResourcePolicy](#containerresourcepolicy) +- [VerticalPodAutoscalerSpec](#verticalpodautoscalerspec) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `cpu` _[GenericStartupBoost](#genericstartupboost)_ | cpu specifies the CPU startup boost policy.
If this field is not set, no startup boost is applied. | | | + + +#### StartupBoostType + +_Underlying type:_ _string_ + +StartupBoostType is the type of startup boost. + +_Validation:_ +- Enum: [Factor Quantity] + +_Appears in:_ +- [GenericStartupBoost](#genericstartupboost) + +| Field | Description | +| --- | --- | +| `Factor` | FactorStartupBoostType applies a factor to the resource.
| +| `Quantity` | QuantityStartupBoostType applies a fixed quantity to the resource.
| + + #### UpdateMode _Underlying type:_ _string_ @@ -379,6 +417,7 @@ _Appears in:_ | `updatePolicy` _[PodUpdatePolicy](#podupdatepolicy)_ | Describes the rules on how changes are applied to the pods.
If not specified, all fields in the `PodUpdatePolicy` are set to their
default values. | | | | `resourcePolicy` _[PodResourcePolicy](#podresourcepolicy)_ | Controls how the autoscaler computes recommended resources.
The resource policy may be used to set constraints on the recommendations
for individual containers.
If any individual containers need to be excluded from getting the VPA recommendations, then
it must be disabled explicitly by setting mode to "Off" under containerPolicies.
If not specified, the autoscaler computes recommended resources for all containers in the pod,
without additional constraints. | | | | `recommenders` _[VerticalPodAutoscalerRecommenderSelector](#verticalpodautoscalerrecommenderselector) array_ | Recommender responsible for generating recommendation for this object.
List should be empty (then the default recommender will generate the
recommendation) or contain exactly one recommender. | | | +| `startupBoost` _[StartupBoost](#startupboost)_ | startupBoost specifies the startup boost policy for the pod. | | | #### VerticalPodAutoscalerStatus diff --git a/vertical-pod-autoscaler/docs/features.md b/vertical-pod-autoscaler/docs/features.md index 9b3c292d1bec..d0759b479885 100644 --- a/vertical-pod-autoscaler/docs/features.md +++ b/vertical-pod-autoscaler/docs/features.md @@ -7,6 +7,7 @@ - [CPU Recommendation Rounding](#cpu-recommendation-rounding) - [Memory Recommendation Rounding](#memory-recommendation-rounding) - [In-Place Updates](#in-place-updates-inplaceorrecreate) +- [CPU Startup Boost](#cpu-startup-boost) ## Limits control @@ -80,7 +81,7 @@ To enable this feature, set the `--round-memory-bytes` flag when running the VPA ## In-Place Updates (`InPlaceOrRecreate`) -> [!WARNING] +> [!WARNING] > FEATURE STATE: VPA v1.4.0 [alpha] > FEATURE STATE: VPA v1.5.0 [beta] @@ -125,7 +126,7 @@ Enable the feature by setting the following flags in VPA components ( for both u ```bash --feature-gates=InPlaceOrRecreate=true -``` +``` ### Limitations @@ -153,3 +154,60 @@ VPA provides metrics to track in-place update operations: * `vpa_vpas_with_in_place_updatable_pods_total`: Number of VPAs with pods eligible for in-place updates * `vpa_vpas_with_in_place_updated_pods_total`: Number of VPAs with successfully in-place updated pods * `vpa_updater_failed_in_place_update_attempts_total`: Number of failed attempts to update pods in-place. + +## CPU Startup Boost + +> [!WARNING] +> FEATURE STATE: VPA v1.5.0 [alpha] + +The CPU Startup Boost feature allows VPA to temporarily increase CPU requests and limits for containers during pod startup. This can help workloads that have high CPU demands during their initialization phase, such as Java applications, to start faster. Once the pod is considered `Ready` and an optional duration has passed, VPA scales the CPU resources back down to their normal levels using an in-place resize. + +For more details, see [AEP-7862: CPU Startup Boost](https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler/enhancements/7862-cpu-startup-boost). + +### Usage + +CPU Startup Boost is configured via the `startupBoost` field in the `VerticalPodAutoscalerSpec` or within the per-container `containerPolicies`. This allows for both global and per-container boost configurations. + +This example enables a startup boost for all containers in the targeted deployment. The CPU will be multiplied by a factor of 3 for 10 seconds after the pod becomes ready. + +```yaml +apiVersion: "autoscaling.k8s.io/v1" +kind: VerticalPodAutoscaler +metadata: + name: example-vpa +spec: + targetRef: + apiVersion: "apps/v1" + kind: Deployment + name: example + updatePolicy: + updateMode: "Recreate" + startupBoost: + cpu: + value: "3" + duration: 10s +``` + +### Behavior + +1. When a pod managed by the VPA is created, the VPA Admission Controller applies the CPU boost. +2. The VPA Updater monitors the pod. Once the pod's condition is `Ready` and the `startupBoost.cpu.duration` has elapsed, it scales the CPU resources down in-place. +3. The scale-down/unboost target is either the VPA recommendation (if VPA is enabled for the container) or the original CPU resources defined in the pod spec. + +### Requirements + +* Kubernetes 1.33+ with the `InPlacePodVerticalScaling` feature gate enabled. +* VPA version 1.5.0+ with the `CPUStartupBoost` feature gate enabled. + +### Configuration + +Enable the feature by setting the `CPUStartupBoost` feature gate in the VPA admission-controller and updater components: + +```bash +--feature-gates=CPUStartupBoost=true +``` + +The `startupBoost` field has the following sub-fields: +* `cpu.type`: The type of boost. Can be `Factor` (default) to multiply the CPU, or `Quantity` to set a specific CPU value. +* `cpu.value`: The magnitude of the boost. A multiplier (e.g., "2") for `Factor` type, or a resource quantity (e.g., "500m") for `Quantity` type. +* `cpu.duration`: (Optional) How long to keep the boost active *after* the pod becomes `Ready`. Defaults to `0s`. diff --git a/vertical-pod-autoscaler/docs/flags.md b/vertical-pod-autoscaler/docs/flags.md index 66dc939535ce..874facf0c5b9 100644 --- a/vertical-pod-autoscaler/docs/flags.md +++ b/vertical-pod-autoscaler/docs/flags.md @@ -14,7 +14,7 @@ This document is auto-generated from the flag definitions in the VPA admission-c | `address` | string | ":8944" | The address to expose Prometheus metrics. | | `alsologtostderr` | | | log to standard error as well as files (no effect when -logtostderr=true) | | `client-ca-file` | string | "/etc/tls-certs/caCert.pem" | Path to CA PEM file. | -| `feature-gates` | mapStringBool | | A set of key=value pairs that describe feature gates for alpha/experimental features. Options are:
AllAlpha=true\|false (ALPHA - default=false)
AllBeta=true\|false (BETA - default=false)
InPlaceOrRecreate=true\|false (BETA - default=true)
PerVPAConfig=true\|false (ALPHA - default=false) | +| `feature-gates` | mapStringBool | | A set of key=value pairs that describe feature gates for alpha/experimental features. Options are:
AllAlpha=true\|false (ALPHA - default=false)
AllBeta=true\|false (BETA - default=false)
CPUStartupBoost=true\|false (ALPHA - default=false)
InPlaceOrRecreate=true\|false (BETA - default=true)
PerVPAConfig=true\|false (ALPHA - default=false) | | `ignored-vpa-object-namespaces` | string | | A comma-separated list of namespaces to ignore when searching for VPA objects. Leave empty to avoid ignoring any namespaces. These namespaces will not be cleaned by the garbage collector. | | `kube-api-burst` | float | 100 | QPS burst limit when making requests to Kubernetes apiserver | | `kube-api-qps` | float | 50 | QPS limit when making requests to Kubernetes apiserver | @@ -68,7 +68,7 @@ This document is auto-generated from the flag definitions in the VPA recommender | `cpu-integer-post-processor-enabled` | | | Enable the cpu-integer recommendation post processor. The post processor will round up CPU recommendations to a whole CPU for pods which were opted in by setting an appropriate label on VPA object (experimental) | | `external-metrics-cpu-metric` | string | | ALPHA. Metric to use with external metrics provider for CPU usage. | | `external-metrics-memory-metric` | string | | ALPHA. Metric to use with external metrics provider for memory usage. | -| `feature-gates` | mapStringBool | | A set of key=value pairs that describe feature gates for alpha/experimental features. Options are:
AllAlpha=true\|false (ALPHA - default=false)
AllBeta=true\|false (BETA - default=false)
InPlaceOrRecreate=true\|false (BETA - default=true)
PerVPAConfig=true\|false (ALPHA - default=false) | +| `feature-gates` | mapStringBool | | A set of key=value pairs that describe feature gates for alpha/experimental features. Options are:
AllAlpha=true\|false (ALPHA - default=false)
AllBeta=true\|false (BETA - default=false)
CPUStartupBoost=true\|false (ALPHA - default=false)
InPlaceOrRecreate=true\|false (BETA - default=true)
PerVPAConfig=true\|false (ALPHA - default=false) | | `history-length` | string | "8d" | How much time back prometheus have to be queried to get historical metrics | | `history-resolution` | string | "1h" | Resolution at which Prometheus is queried for historical metrics | | `humanize-memory` | | | DEPRECATED: Convert memory values in recommendations to the highest appropriate SI unit with up to 2 decimal places for better readability. This flag is deprecated and will be removed in a future version. Use --round-memory-bytes instead. | @@ -144,7 +144,7 @@ This document is auto-generated from the flag definitions in the VPA updater cod | `eviction-rate-burst` | int | 1 | Burst of pods that can be evicted. | | `eviction-rate-limit` | float | | Number of pods that can be evicted per seconds. A rate limit set to 0 or -1 will disable
the rate limiter. (default -1) | | `eviction-tolerance` | float | 0.5 | Fraction of replica count that can be evicted for update, if more than one pod can be evicted. | -| `feature-gates` | mapStringBool | | A set of key=value pairs that describe feature gates for alpha/experimental features. Options are:
AllAlpha=true\|false (ALPHA - default=false)
AllBeta=true\|false (BETA - default=false)
InPlaceOrRecreate=true\|false (BETA - default=true)
PerVPAConfig=true\|false (ALPHA - default=false) | +| `feature-gates` | mapStringBool | | A set of key=value pairs that describe feature gates for alpha/experimental features. Options are:
AllAlpha=true\|false (ALPHA - default=false)
AllBeta=true\|false (BETA - default=false)
CPUStartupBoost=true\|false (ALPHA - default=false)
InPlaceOrRecreate=true\|false (BETA - default=true)
PerVPAConfig=true\|false (ALPHA - default=false) | | `ignored-vpa-object-namespaces` | string | | A comma-separated list of namespaces to ignore when searching for VPA objects. Leave empty to avoid ignoring any namespaces. These namespaces will not be cleaned by the garbage collector. | | `in-recommendation-bounds-eviction-lifetime-threshold` | | 12h0m0s | duration Pods that live for at least that long can be evicted even if their request is within the [MinRecommended...MaxRecommended] range | | `kube-api-burst` | float | 100 | QPS burst limit when making requests to Kubernetes apiserver | diff --git a/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/handler.go b/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/handler.go index f03f9e1759a7..daa555f05efa 100644 --- a/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/handler.go +++ b/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/handler.go @@ -186,9 +186,16 @@ func ValidateVPA(vpa *vpa_types.VerticalPodAutoscaler, isCreate bool) error { return fmt.Errorf("controlledValues shouldn't be specified if container scaling mode is off") } } + if err := validateStartupBoost(policy.StartupBoost, isCreate); err != nil { + return fmt.Errorf("invalid startupBoost in container %s: %v", policy.ContainerName, err) + } } } + if err := validateStartupBoost(vpa.Spec.StartupBoost, isCreate); err != nil { + return fmt.Errorf("invalid startupBoost: %v", err) + } + if isCreate && vpa.Spec.TargetRef == nil { return fmt.Errorf("targetRef is required. If you're using v1beta1 version of the API, please migrate to v1") } @@ -200,6 +207,42 @@ func ValidateVPA(vpa *vpa_types.VerticalPodAutoscaler, isCreate bool) error { return nil } +func validateStartupBoost(startupBoost *vpa_types.StartupBoost, isCreate bool) error { + if startupBoost == nil { + return nil + } + + if !features.Enabled(features.CPUStartupBoost) && isCreate { + return fmt.Errorf("in order to use startupBoost, you must enable feature gate %s in the admission-controller args", features.CPUStartupBoost) + } + + cpuBoost := startupBoost.CPU + if cpuBoost == nil { + return nil + } + boostType := cpuBoost.Type + + switch boostType { + case vpa_types.FactorStartupBoostType: + if cpuBoost.Factor == nil { + return fmt.Errorf("StartupBoost.CPU.Factor is required when Type is Factor") + } + if *cpuBoost.Factor < 1 { + return fmt.Errorf("invalid StartupBoost.CPU.Factor: must be >= 1 for Type Factor") + } + case vpa_types.QuantityStartupBoostType: + if cpuBoost.Quantity == nil { + return fmt.Errorf("StartupBoost.CPU.Quantity is required when Type is Quantity") + } + if err := validateCPUResolution(*cpuBoost.Quantity); err != nil { + return fmt.Errorf("invalid StartupBoost.CPU.Quantity: %v", err) + } + default: + return fmt.Errorf("unexpected StartupBoost.CPU.Type value %s", boostType) + } + return nil +} + func validateResourceResolution(name corev1.ResourceName, val apires.Quantity) error { switch name { case corev1.ResourceCPU: diff --git a/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/handler_test.go b/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/handler_test.go index 78044ff61b9f..9a9c15eee645 100644 --- a/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/handler_test.go +++ b/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/handler_test.go @@ -21,6 +21,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + autoscaling "k8s.io/api/autoscaling/v1" apiv1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" featuregatetesting "k8s.io/component-base/featuregate/testing" @@ -45,6 +46,14 @@ func TestValidateVPA(t *testing.T) { scalingModeOff := vpa_types.ContainerScalingModeOff controlledValuesRequestsAndLimits := vpa_types.ContainerControlledValuesRequestsAndLimits inPlaceOrRecreateUpdateMode := vpa_types.UpdateModeInPlaceOrRecreate + badCPUBoostFactor := int32(0) + validCPUBoostFactor := int32(2) + badCPUBoostQuantity := resource.MustParse("187500u") + validCPUBoostQuantity := resource.MustParse("100m") + badCPUBoostType := vpa_types.StartupBoostType("bad") + validCPUBoostTypeFactor := vpa_types.FactorStartupBoostType + validCPUBoostTypeQuantity := vpa_types.QuantityStartupBoostType + tests := []struct { name string vpa vpa_types.VerticalPodAutoscaler @@ -52,6 +61,7 @@ func TestValidateVPA(t *testing.T) { expectError error inPlaceOrRecreateFeatureGateDisabled bool PerVPAConfigDisabled bool + cpuStartupBoostFeatureGateDisabled bool }{ { name: "empty update", @@ -320,6 +330,247 @@ func TestValidateVPA(t *testing.T) { }, }, }, + + { + name: "top-level startupBoost with feature gate disabled", + vpa: vpa_types.VerticalPodAutoscaler{ + Spec: vpa_types.VerticalPodAutoscalerSpec{ + StartupBoost: &vpa_types.StartupBoost{ + CPU: &vpa_types.GenericStartupBoost{ + Factor: &validCPUBoostFactor, + }, + }, + }, + }, + isCreate: true, + cpuStartupBoostFeatureGateDisabled: true, + expectError: fmt.Errorf("invalid startupBoost: in order to use startupBoost, you must enable feature gate %s in the admission-controller args", features.CPUStartupBoost), + }, + { + name: "container startupBoost with feature gate disabled", + vpa: vpa_types.VerticalPodAutoscaler{ + Spec: vpa_types.VerticalPodAutoscalerSpec{ + ResourcePolicy: &vpa_types.PodResourcePolicy{ + ContainerPolicies: []vpa_types.ContainerResourcePolicy{ + { + ContainerName: "loot box", + StartupBoost: &vpa_types.StartupBoost{ + CPU: &vpa_types.GenericStartupBoost{ + Factor: &validCPUBoostFactor, + }, + }, + }, + }, + }, + }, + }, + isCreate: true, + cpuStartupBoostFeatureGateDisabled: true, + expectError: fmt.Errorf("invalid startupBoost in container loot box: in order to use startupBoost, you must enable feature gate %s in the admission-controller args", features.CPUStartupBoost), + }, + { + name: "top-level startupBoost with bad factor", + vpa: vpa_types.VerticalPodAutoscaler{ + Spec: vpa_types.VerticalPodAutoscalerSpec{ + StartupBoost: &vpa_types.StartupBoost{ + CPU: &vpa_types.GenericStartupBoost{ + Type: vpa_types.FactorStartupBoostType, + Factor: &badCPUBoostFactor, + }, + }, + }, + }, + isCreate: true, + expectError: fmt.Errorf("invalid startupBoost: invalid StartupBoost.CPU.Factor: must be >= 1 for Type Factor"), + }, + { + name: "container startupBoost with bad factor", + vpa: vpa_types.VerticalPodAutoscaler{ + Spec: vpa_types.VerticalPodAutoscalerSpec{ + ResourcePolicy: &vpa_types.PodResourcePolicy{ + ContainerPolicies: []vpa_types.ContainerResourcePolicy{ + { + ContainerName: "loot box", + StartupBoost: &vpa_types.StartupBoost{ + CPU: &vpa_types.GenericStartupBoost{ + Type: vpa_types.FactorStartupBoostType, + Factor: &badCPUBoostFactor, + }, + }, + }, + }, + }, + }, + }, + isCreate: true, + expectError: fmt.Errorf("invalid startupBoost in container loot box: invalid StartupBoost.CPU.Factor: must be >= 1 for Type Factor"), + }, + { + name: "top-level startupBoost with bad quantity", + vpa: vpa_types.VerticalPodAutoscaler{ + Spec: vpa_types.VerticalPodAutoscalerSpec{ + TargetRef: &autoscaling.CrossVersionObjectReference{ + Kind: "Deployment", + Name: "my-app", + }, + StartupBoost: &vpa_types.StartupBoost{ + CPU: &vpa_types.GenericStartupBoost{ + Type: validCPUBoostTypeQuantity, + Quantity: &badCPUBoostQuantity, + }, + }, + }, + }, + isCreate: true, + expectError: fmt.Errorf("invalid startupBoost: invalid StartupBoost.CPU.Quantity: CPU [%v] must be a whole number of milli CPUs", &badCPUBoostQuantity), + }, + { + name: "container startupBoost with bad quantity", + vpa: vpa_types.VerticalPodAutoscaler{ + Spec: vpa_types.VerticalPodAutoscalerSpec{ + TargetRef: &autoscaling.CrossVersionObjectReference{ + Kind: "Deployment", + Name: "my-app", + }, + ResourcePolicy: &vpa_types.PodResourcePolicy{ + ContainerPolicies: []vpa_types.ContainerResourcePolicy{ + { + ContainerName: "loot box", + StartupBoost: &vpa_types.StartupBoost{ + CPU: &vpa_types.GenericStartupBoost{ + Type: validCPUBoostTypeQuantity, + Quantity: &badCPUBoostQuantity, + }, + }, + }, + }, + }, + }, + }, + isCreate: true, + expectError: fmt.Errorf("invalid startupBoost in container loot box: invalid StartupBoost.CPU.Quantity: CPU [%v] must be a whole number of milli CPUs", &badCPUBoostQuantity), + }, + { + name: "top-level startupBoost with bad type", + vpa: vpa_types.VerticalPodAutoscaler{ + Spec: vpa_types.VerticalPodAutoscalerSpec{ + StartupBoost: &vpa_types.StartupBoost{ + CPU: &vpa_types.GenericStartupBoost{ + Type: badCPUBoostType, + }, + }, + }, + }, + isCreate: true, + expectError: fmt.Errorf("invalid startupBoost: unexpected StartupBoost.CPU.Type value bad"), + }, + { + name: "container startupBoost with bad type", + vpa: vpa_types.VerticalPodAutoscaler{ + Spec: vpa_types.VerticalPodAutoscalerSpec{ + ResourcePolicy: &vpa_types.PodResourcePolicy{ + ContainerPolicies: []vpa_types.ContainerResourcePolicy{ + { + ContainerName: "loot box", + StartupBoost: &vpa_types.StartupBoost{ + CPU: &vpa_types.GenericStartupBoost{ + Type: badCPUBoostType, + }, + }, + }, + }, + }, + }, + }, + isCreate: true, + expectError: fmt.Errorf("invalid startupBoost in container loot box: unexpected StartupBoost.CPU.Type value bad"), + }, + { + name: "top-level startupBoost with valid factor", + vpa: vpa_types.VerticalPodAutoscaler{ + Spec: vpa_types.VerticalPodAutoscalerSpec{ + TargetRef: &autoscaling.CrossVersionObjectReference{ + Kind: "Deployment", + Name: "my-app", + }, + StartupBoost: &vpa_types.StartupBoost{ + CPU: &vpa_types.GenericStartupBoost{ + Type: validCPUBoostTypeFactor, + Factor: &validCPUBoostFactor, + }, + }, + }, + }, + isCreate: true, + }, + { + name: "container startupBoost with valid factor", + vpa: vpa_types.VerticalPodAutoscaler{ + Spec: vpa_types.VerticalPodAutoscalerSpec{ + TargetRef: &autoscaling.CrossVersionObjectReference{ + Kind: "Deployment", + Name: "my-app", + }, + ResourcePolicy: &vpa_types.PodResourcePolicy{ + ContainerPolicies: []vpa_types.ContainerResourcePolicy{ + { + ContainerName: "loot box", + StartupBoost: &vpa_types.StartupBoost{ + CPU: &vpa_types.GenericStartupBoost{ + Type: validCPUBoostTypeFactor, + Factor: &validCPUBoostFactor, + }, + }, + }, + }, + }, + }, + }, + isCreate: true, + }, + { + name: "top-level startupBoost with valid quantity", + vpa: vpa_types.VerticalPodAutoscaler{ + Spec: vpa_types.VerticalPodAutoscalerSpec{ + TargetRef: &autoscaling.CrossVersionObjectReference{ + Kind: "Deployment", + Name: "my-app", + }, + StartupBoost: &vpa_types.StartupBoost{ + CPU: &vpa_types.GenericStartupBoost{ + Type: validCPUBoostTypeQuantity, + Quantity: &validCPUBoostQuantity, + }, + }, + }, + }, + isCreate: true, + }, + { + name: "container startupBoost with valid quantity", + vpa: vpa_types.VerticalPodAutoscaler{ + Spec: vpa_types.VerticalPodAutoscalerSpec{ + TargetRef: &autoscaling.CrossVersionObjectReference{ + Kind: "Deployment", + Name: "my-app", + }, + ResourcePolicy: &vpa_types.PodResourcePolicy{ + ContainerPolicies: []vpa_types.ContainerResourcePolicy{ + { + ContainerName: "loot box", + StartupBoost: &vpa_types.StartupBoost{ + CPU: &vpa_types.GenericStartupBoost{ + Type: validCPUBoostTypeQuantity, + Quantity: &validCPUBoostQuantity, + }, + }, + }, + }, + }, + }, + }, + isCreate: true, + }, { name: "per-vpa config active and used", vpa: vpa_types.VerticalPodAutoscaler{ @@ -378,6 +629,7 @@ func TestValidateVPA(t *testing.T) { t.Run(fmt.Sprintf("test case: %s", tc.name), func(t *testing.T) { featuregatetesting.SetFeatureGateDuringTest(t, features.MutableFeatureGate, features.InPlaceOrRecreate, !tc.inPlaceOrRecreateFeatureGateDisabled) featuregatetesting.SetFeatureGateDuringTest(t, features.MutableFeatureGate, features.PerVPAConfig, !tc.PerVPAConfigDisabled) + featuregatetesting.SetFeatureGateDuringTest(t, features.MutableFeatureGate, features.CPUStartupBoost, !tc.cpuStartupBoostFeatureGateDisabled) err := ValidateVPA(&tc.vpa, tc.isCreate) if tc.expectError == nil { assert.NoError(t, err) diff --git a/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1/types.go b/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1/types.go index dc365b8c686f..acd1a4e0b31b 100644 --- a/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1/types.go +++ b/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1/types.go @@ -108,8 +108,58 @@ type VerticalPodAutoscalerSpec struct { // recommendation) or contain exactly one recommender. // +optional Recommenders []*VerticalPodAutoscalerRecommenderSelector `json:"recommenders,omitempty" protobuf:"bytes,4,opt,name=recommenders"` + + // startupBoost specifies the startup boost policy for the pod. + // +optional + StartupBoost *StartupBoost `json:"startupBoost,omitempty" protobuf:"bytes,5,opt,name=startupBoost"` +} + +// StartupBoost defines the startup boost policy. +type StartupBoost struct { + // cpu specifies the CPU startup boost policy. + // If this field is not set, no startup boost is applied. + // +optional + CPU *GenericStartupBoost `json:"cpu,omitempty" protobuf:"bytes,1,opt,name=cpu"` +} + +// GenericStartupBoost defines the startup boost policy for a resource. +// +union +type GenericStartupBoost struct { + // type specifies the kind of boost to apply. + // Supported values are: "Factor", "Quantity". + // +unionDiscriminator + // +required + Type StartupBoostType `json:"type" protobuf:"bytes,1,opt,name=type"` + // factor specifies the factor to apply to the resource request. + // This field is to be used only when Type is "Factor". + // +unionMember=Factor + // +optional + Factor *int32 `json:"factor,omitempty" protobuf:"bytes,2,opt,name=factor"` + + // quantity specifies the absolute resource quantity to be used as the + // resource request and limit during the boost phase. + // This field is to be used only when Type is "Quantity". + // +unionMember=Quantity + // +optional + Quantity *resource.Quantity `json:"quantity,omitempty" protobuf:"bytes,3,opt,name=quantity"` + + // duration indicates for how long to keep the pod boosted after it goes to Ready. + // Defaults to 0s. + // +optional + Duration *metav1.Duration `json:"duration,omitempty" protobuf:"bytes,4,opt,name=duration"` } +// StartupBoostType is the type of startup boost. +// +kubebuilder:validation:Enum=Factor;Quantity +type StartupBoostType string + +const ( + // FactorStartupBoostType applies a factor to the resource. + FactorStartupBoostType StartupBoostType = "Factor" + // QuantityStartupBoostType applies a fixed quantity to the resource. + QuantityStartupBoostType StartupBoostType = "Quantity" +) + // EvictionChangeRequirement refers to the relationship between the new target recommendation for a Pod and its current requests, what kind of change is necessary for the Pod to be evicted // +kubebuilder:validation:Enum:=TargetHigherThanRequests;TargetLowerThanRequests type EvictionChangeRequirement string @@ -233,6 +283,13 @@ type ContainerResourcePolicy struct { // oomMinBumpUp is the minimum increase in memory when OOM is detected. // +optional OOMMinBumpUp *resource.Quantity `json:"oomMinBumpUp,omitempty" protobuf:"bytes,8,opt,name=oomMinBumpUp"` + + // startupBoost specifies the startup boost policy for the container. + // This overrides any pod-level startup boost policy. + // The startup boost policy takes precedence over the rest of the fields in + // this struct, except for ContainerName and ControlledValues. + // +optional + StartupBoost *StartupBoost `json:"startupBoost,omitempty" protobuf:"bytes,7,opt,name=startupBoost"` } const ( diff --git a/vertical-pod-autoscaler/pkg/features/features.go b/vertical-pod-autoscaler/pkg/features/features.go index 42a3d0d9f5d7..b20ae01482be 100644 --- a/vertical-pod-autoscaler/pkg/features/features.go +++ b/vertical-pod-autoscaler/pkg/features/features.go @@ -40,6 +40,12 @@ const ( // In each feature gate description, you must specify "components". // The feature must be enabled by the --feature-gates argument on each listed component. + // alpha: v1.5.0 + // components: admission-controller, updater + + // CPUStartupBoost enables the CPU startup boost feature. + CPUStartupBoost featuregate.Feature = "CPUStartupBoost" + // alpha: v1.4.0 // beta: v1.5.0 diff --git a/vertical-pod-autoscaler/pkg/features/versioned_features.go b/vertical-pod-autoscaler/pkg/features/versioned_features.go index 19f2feb270c0..088126eabe8d 100644 --- a/vertical-pod-autoscaler/pkg/features/versioned_features.go +++ b/vertical-pod-autoscaler/pkg/features/versioned_features.go @@ -27,6 +27,9 @@ import ( // Entries are alphabetized. var defaultVersionedFeatureGates = map[featuregate.Feature]featuregate.VersionedSpecs{ + CPUStartupBoost: { + {Version: version.MustParse("1.5"), Default: false, PreRelease: featuregate.Alpha}, + }, InPlaceOrRecreate: { {Version: version.MustParse("1.4"), Default: false, PreRelease: featuregate.Alpha}, {Version: version.MustParse("1.5"), Default: true, PreRelease: featuregate.Beta}, From 0554a5c880e328ecddf0223ad69194c86de00377 Mon Sep 17 00:00:00 2001 From: Kam Saiyed Date: Mon, 18 Aug 2025 18:13:47 +0000 Subject: [PATCH 2/7] Apply CPU startup boost in admission controller if its set --- vertical-pod-autoscaler/docs/flags.md | 1 + .../pkg/admission-controller/main.go | 8 +- .../resource/pod/patch/resource_updates.go | 161 ++++++- .../pod/patch/resource_updates_test.go | 409 +++++++++++++++++- .../resource/vpa/matcher.go | 2 +- .../pkg/utils/annotations/vpa_cpu_boost.go | 71 +++ .../utils/annotations/vpa_cpu_boost_test.go | 185 ++++++++ .../pkg/utils/test/test_vpa.go | 21 + .../pkg/utils/vpa/capping.go | 6 +- 9 files changed, 852 insertions(+), 12 deletions(-) create mode 100644 vertical-pod-autoscaler/pkg/utils/annotations/vpa_cpu_boost.go create mode 100644 vertical-pod-autoscaler/pkg/utils/annotations/vpa_cpu_boost_test.go diff --git a/vertical-pod-autoscaler/docs/flags.md b/vertical-pod-autoscaler/docs/flags.md index 874facf0c5b9..652c99829082 100644 --- a/vertical-pod-autoscaler/docs/flags.md +++ b/vertical-pod-autoscaler/docs/flags.md @@ -24,6 +24,7 @@ This document is auto-generated from the flag definitions in the VPA admission-c | `log-file` | string | | If non-empty, use this log file (no effect when -logtostderr=true) | | `log-file-max-size` | int | 1800 | uDefines the maximum size a log file can grow to (no effect when -logtostderr=true). Unit is megabytes. If the value is 0, the maximum file size is unlimited. | | `logtostderr` | | true | log to standard error instead of files | +| `max-allowed-cpu-boost` | | | quantity Maximum amount of CPU that will be applied for a container with boost. | | `min-tls-version` | string | | The minimum TLS version to accept. Must be set to either tls1_2 or tls1_3. (default "tls1_2") | | `one-output` | severity | | If true, only write logs to their native level (vs also writing to each lower severity level; no effect when -logtostderr=true) | | `port` | int | 8000 | The port to listen on. | diff --git a/vertical-pod-autoscaler/pkg/admission-controller/main.go b/vertical-pod-autoscaler/pkg/admission-controller/main.go index efb633bad6a6..2bbf16bcf7ec 100644 --- a/vertical-pod-autoscaler/pkg/admission-controller/main.go +++ b/vertical-pod-autoscaler/pkg/admission-controller/main.go @@ -25,6 +25,7 @@ import ( "time" "github.com/spf13/pflag" + "k8s.io/apimachinery/pkg/api/resource" "k8s.io/client-go/informers" kube_client "k8s.io/client-go/kubernetes" typedadmregv1 "k8s.io/client-go/kubernetes/typed/admissionregistration/v1" @@ -78,8 +79,13 @@ var ( registerWebhook = flag.Bool("register-webhook", true, "If set to true, admission webhook object will be created on start up to register with the API server.") webhookLabels = flag.String("webhook-labels", "", "Comma separated list of labels to add to the webhook object. Format: key1:value1,key2:value2") registerByURL = flag.Bool("register-by-url", false, "If set to true, admission webhook will be registered by URL (webhookAddress:webhookPort) instead of by service name") + maxAllowedCPUBoost = resource.QuantityValue{} ) +func init() { + flag.Var(&maxAllowedCPUBoost, "max-allowed-cpu-boost", "Maximum amount of CPU that will be applied for a container with boost.") +} + func main() { commonFlags := common.InitCommonFlags() klog.InitFlags(nil) @@ -145,7 +151,7 @@ func main() { hostname, ) - calculators := []patch.Calculator{patch.NewResourceUpdatesCalculator(recommendationProvider), patch.NewObservedContainersCalculator()} + calculators := []patch.Calculator{patch.NewResourceUpdatesCalculator(recommendationProvider, maxAllowedCPUBoost), patch.NewObservedContainersCalculator()} as := logic.NewAdmissionServer(podPreprocessor, vpaPreprocessor, limitRangeCalculator, vpaMatcher, calculators) http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { as.Serve(w, r) diff --git a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/resource_updates.go b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/resource_updates.go index 3bc230e9f029..28d75a7a6128 100644 --- a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/resource_updates.go +++ b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/resource_updates.go @@ -21,10 +21,13 @@ import ( "strings" core "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" resource_admission "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/recommendation" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/features" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/annotations" resourcehelpers "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/resources" vpa_api_util "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa" ) @@ -37,13 +40,15 @@ const ( type resourcesUpdatesPatchCalculator struct { recommendationProvider recommendation.Provider + maxAllowedCPUBoost resource.Quantity } // NewResourceUpdatesCalculator returns a calculator for // resource update patches. -func NewResourceUpdatesCalculator(recommendationProvider recommendation.Provider) Calculator { +func NewResourceUpdatesCalculator(recommendationProvider recommendation.Provider, maxAllowedCPUBoost resource.QuantityValue) Calculator { return &resourcesUpdatesPatchCalculator{ recommendationProvider: recommendationProvider, + maxAllowedCPUBoost: maxAllowedCPUBoost.Quantity, } } @@ -59,15 +64,43 @@ func (c *resourcesUpdatesPatchCalculator) CalculatePatches(pod *core.Pod, vpa *v return []resource_admission.PatchRecord{}, fmt.Errorf("failed to calculate resource patch for pod %s/%s: %v", pod.Namespace, pod.Name, err) } + if vpa_api_util.GetUpdateMode(vpa) == vpa_types.UpdateModeOff { + // If update mode is "Off", we don't want to apply any recommendations, + // but we still want to apply startup boost. + for i := range containersResources { + containersResources[i].Requests = nil + containersResources[i].Limits = nil + } + annotationsPerContainer = vpa_api_util.ContainerToAnnotationsMap{} + } + if annotationsPerContainer == nil { annotationsPerContainer = vpa_api_util.ContainerToAnnotationsMap{} } updatesAnnotation := []string{} - for i, containerResources := range containersResources { - newPatches, newUpdatesAnnotation := getContainerPatch(pod, i, annotationsPerContainer, containerResources) - result = append(result, newPatches...) - updatesAnnotation = append(updatesAnnotation, newUpdatesAnnotation) + cpuStartupBoostEnabled := features.Enabled(features.CPUStartupBoost) + for i := range containersResources { + + // Apply startup boost if configured + if cpuStartupBoostEnabled { + // Get the container resource policy to check for scaling mode. + policy := vpa_api_util.GetContainerResourcePolicy(pod.Spec.Containers[i].Name, vpa.Spec.ResourcePolicy) + if policy != nil && policy.Mode != nil && *policy.Mode == vpa_types.ContainerScalingModeOff { + continue + } + boostPatches, err := c.applyCPUStartupBoost(&pod.Spec.Containers[i], vpa, &containersResources[i]) + if err != nil { + return nil, err + } + result = append(result, boostPatches...) + } + + newPatches, newUpdatesAnnotation := getContainerPatch(pod, i, annotationsPerContainer, containersResources[i]) + if len(newPatches) > 0 { + result = append(result, newPatches...) + updatesAnnotation = append(updatesAnnotation, newUpdatesAnnotation) + } } if len(updatesAnnotation) > 0 { @@ -108,3 +141,121 @@ func appendPatchesAndAnnotations(patches []resource_admission.PatchRecord, annot } return patches, annotations } + +func (c *resourcesUpdatesPatchCalculator) applyCPUStartupBoost(container *core.Container, vpa *vpa_types.VerticalPodAutoscaler, containerResources *vpa_api_util.ContainerResources) ([]resource_admission.PatchRecord, error) { + var patches []resource_admission.PatchRecord + + startupBoostPolicy := getContainerStartupBoostPolicy(container, vpa) + if startupBoostPolicy == nil { + return nil, nil + } + + err := c.applyControlledCPUResources(container, vpa, containerResources, startupBoostPolicy) + if err != nil { + return nil, err + } + + originalResources, err := annotations.GetOriginalResourcesAnnotationValue(container) + if err != nil { + return nil, err + } + patches = append(patches, GetAddAnnotationPatch(annotations.StartupCPUBoostAnnotation, originalResources)) + + return patches, nil +} + +func getContainerStartupBoostPolicy(container *core.Container, vpa *vpa_types.VerticalPodAutoscaler) *vpa_types.StartupBoost { + policy := vpa_api_util.GetContainerResourcePolicy(container.Name, vpa.Spec.ResourcePolicy) + startupBoost := vpa.Spec.StartupBoost + if policy != nil && policy.StartupBoost != nil { + startupBoost = policy.StartupBoost + } + return startupBoost +} + +func (c *resourcesUpdatesPatchCalculator) calculateBoostedCPUValue(baseCPU resource.Quantity, startupBoost *vpa_types.StartupBoost) (*resource.Quantity, error) { + boostType := startupBoost.CPU.Type + if boostType == "" { + boostType = vpa_types.FactorStartupBoostType + } + + switch boostType { + case vpa_types.FactorStartupBoostType: + if startupBoost.CPU.Factor == nil { + return nil, fmt.Errorf("startupBoost.CPU.Factor is required when Type is Factor or not specified") + } + factor := *startupBoost.CPU.Factor + if factor < 1 { + return nil, fmt.Errorf("boost factor must be >= 1") + } + boostedCPUMilli := baseCPU.MilliValue() + boostedCPUMilli = int64(float64(boostedCPUMilli) * float64(factor)) + return resource.NewMilliQuantity(boostedCPUMilli, resource.DecimalSI), nil + case vpa_types.QuantityStartupBoostType: + if startupBoost.CPU.Quantity == nil { + return nil, fmt.Errorf("startupBoost.CPU.Quantity is required when Type is Quantity") + } + quantity := *startupBoost.CPU.Quantity + boostedCPUMilli := baseCPU.MilliValue() + quantity.MilliValue() + return resource.NewMilliQuantity(boostedCPUMilli, resource.DecimalSI), nil + default: + return nil, fmt.Errorf("unsupported startup boost type: %s", startupBoost.CPU.Type) + } +} + +func (c *resourcesUpdatesPatchCalculator) calculateBoostedCPU(recommendedCPU, originalCPU resource.Quantity, startupBoost *vpa_types.StartupBoost) (*resource.Quantity, error) { + baseCPU := recommendedCPU + if baseCPU.IsZero() { + baseCPU = originalCPU + } + + if startupBoost == nil { + return &baseCPU, nil + } + + boostedCPU, err := c.calculateBoostedCPUValue(baseCPU, startupBoost) + if err != nil { + return nil, err + } + + if !c.maxAllowedCPUBoost.IsZero() && boostedCPU.Cmp(c.maxAllowedCPUBoost) > 0 { + return &c.maxAllowedCPUBoost, nil + } + return boostedCPU, nil +} + +func (c *resourcesUpdatesPatchCalculator) applyControlledCPUResources(container *core.Container, vpa *vpa_types.VerticalPodAutoscaler, containerResources *vpa_api_util.ContainerResources, startupBoostPolicy *vpa_types.StartupBoost) error { + controlledValues := vpa_api_util.GetContainerControlledValues(container.Name, vpa.Spec.ResourcePolicy) + + recommendedRequest := containerResources.Requests[core.ResourceCPU] + originalRequest := container.Resources.Requests[core.ResourceCPU] + boostedRequest, err := c.calculateBoostedCPU(recommendedRequest, originalRequest, startupBoostPolicy) + if err != nil { + return err + } + + if containerResources.Requests == nil { + containerResources.Requests = core.ResourceList{} + } + containerResources.Requests[core.ResourceCPU] = *boostedRequest + + switch controlledValues { + case vpa_types.ContainerControlledValuesRequestsOnly: + vpa_api_util.CapRecommendationToContainerLimit(containerResources.Requests, container.Resources.Limits) + case vpa_types.ContainerControlledValuesRequestsAndLimits: + if containerResources.Limits == nil { + containerResources.Limits = core.ResourceList{} + } + originalLimit := container.Resources.Limits[core.ResourceCPU] + if originalLimit.IsZero() { + originalLimit = container.Resources.Requests[core.ResourceCPU] + } + recommendedLimit := containerResources.Limits[core.ResourceCPU] + boostedLimit, err := c.calculateBoostedCPU(recommendedLimit, originalLimit, startupBoostPolicy) + if err != nil { + return err + } + containerResources.Limits[core.ResourceCPU] = *boostedLimit + } + return nil +} diff --git a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/resource_updates_test.go b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/resource_updates_test.go index 2a3cc5d9a0ec..63bda557a7a7 100644 --- a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/resource_updates_test.go +++ b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/resource_updates_test.go @@ -24,9 +24,12 @@ import ( "github.com/stretchr/testify/assert" core "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + featuregatetesting "k8s.io/component-base/featuregate/testing" resource_admission "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/features" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/annotations" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/test" vpa_api_util "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa" ) @@ -289,11 +292,22 @@ func TestCalculatePatches_ResourceUpdates(t *testing.T) { addAnnotationRequest([][]string{{cpu}}, limit), }, }, + { + name: "no recommendation present", + pod: test.Pod(). + AddContainer(core.Container{}). + AddContainerStatus(test.ContainerStatus(). + WithCPULimit(resource.MustParse("0")).Get()).Get(), + namespace: "default", + recommendResources: make([]vpa_api_util.ContainerResources, 1), + recommendAnnotations: vpa_api_util.ContainerToAnnotationsMap{}, + expectPatches: []resource_admission.PatchRecord{}, + }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { frp := fakeRecommendationProvider{tc.recommendResources, tc.recommendAnnotations, tc.recommendError} - c := NewResourceUpdatesCalculator(&frp) + c := NewResourceUpdatesCalculator(&frp, resource.QuantityValue{}) patches, err := c.CalculatePatches(tc.pod, test.VerticalPodAutoscaler().WithContainer("test").WithName("name").Get()) if tc.expectError == nil { assert.NoError(t, err) @@ -335,7 +349,7 @@ func TestGetPatches_TwoReplacementResources(t *testing.T) { } recommendAnnotations := vpa_api_util.ContainerToAnnotationsMap{} frp := fakeRecommendationProvider{recommendResources, recommendAnnotations, nil} - c := NewResourceUpdatesCalculator(&frp) + c := NewResourceUpdatesCalculator(&frp, resource.QuantityValue{}) patches, err := c.CalculatePatches(pod, test.VerticalPodAutoscaler().WithName("name").WithContainer("test").Get()) assert.NoError(t, err) // Order of updates for cpu and unobtanium depends on order of iterating a map, both possible results are valid. @@ -350,3 +364,394 @@ func TestGetPatches_TwoReplacementResources(t *testing.T) { AssertPatchOneOf(t, patches[2], []resource_admission.PatchRecord{cpuFirstUnobtaniumSecond, unobtaniumFirstCpuSecond}) } } + +func TestCalculatePatches_StartupBoost(t *testing.T) { + factor2 := int32(2) + factor3 := int32(3) + quantity := resource.MustParse("500m") + invalidFactor := int32(0) + invalidQuantity := resource.MustParse("200m") + tests := []struct { + name string + pod *core.Pod + vpa *vpa_types.VerticalPodAutoscaler + recommendResources []vpa_api_util.ContainerResources + recommendAnnotations vpa_api_util.ContainerToAnnotationsMap + recommendError error + maxAllowedCpu resource.QuantityValue + expectPatches []resource_admission.PatchRecord + expectError error + featureGateEnabled bool + }{ + { + name: "startup boost factor", + pod: &core.Pod{ + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container1", + Resources: core.ResourceRequirements{ + Requests: core.ResourceList{ + cpu: resource.MustParse("1m"), + }, + Limits: core.ResourceList{ + cpu: resource.MustParse("1m"), + }, + }, + }, + }, + }, + }, + vpa: test.VerticalPodAutoscaler().WithName("name").WithContainer("container1").WithCPUStartupBoost(vpa_types.FactorStartupBoostType, &factor2, nil, "10s").Get(), + recommendResources: []vpa_api_util.ContainerResources{ + { + Requests: core.ResourceList{ + cpu: resource.MustParse("100m"), + }, + Limits: core.ResourceList{ + cpu: resource.MustParse("100m"), + }, + }, + }, + maxAllowedCpu: resource.QuantityValue{}, + featureGateEnabled: true, + expectPatches: []resource_admission.PatchRecord{ + GetAddAnnotationPatch(annotations.StartupCPUBoostAnnotation, "{\"requests\":{\"cpu\":\"1m\"},\"limits\":{\"cpu\":\"1m\"}}"), + addResourceRequestPatch(0, cpu, "200m"), + addResourceLimitPatch(0, cpu, "200m"), + GetAddAnnotationPatch(ResourceUpdatesAnnotation, "Pod resources updated by name: container 0: cpu request, cpu limit"), + }, + }, + { + name: "startup boost factor with 0s duration", + pod: &core.Pod{ + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container1", + Resources: core.ResourceRequirements{ + Requests: core.ResourceList{ + cpu: resource.MustParse("1m"), + }, + Limits: core.ResourceList{ + cpu: resource.MustParse("1m"), + }, + }, + }, + }, + }, + }, + vpa: test.VerticalPodAutoscaler().WithName("name").WithContainer("container1").WithCPUStartupBoost(vpa_types.FactorStartupBoostType, &factor2, nil, "0s").Get(), + recommendResources: []vpa_api_util.ContainerResources{ + { + Requests: core.ResourceList{ + cpu: resource.MustParse("100m"), + }, + Limits: core.ResourceList{ + cpu: resource.MustParse("100m"), + }, + }, + }, + maxAllowedCpu: resource.QuantityValue{}, + featureGateEnabled: true, + expectPatches: []resource_admission.PatchRecord{ + GetAddAnnotationPatch(annotations.StartupCPUBoostAnnotation, "{\"requests\":{\"cpu\":\"1m\"},\"limits\":{\"cpu\":\"1m\"}}"), + addResourceRequestPatch(0, cpu, "200m"), + addResourceLimitPatch(0, cpu, "200m"), + GetAddAnnotationPatch(ResourceUpdatesAnnotation, "Pod resources updated by name: container 0: cpu request, cpu limit"), + }, + }, + { + name: "startup boost quantity", + pod: &core.Pod{ + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container1", + Resources: core.ResourceRequirements{ + Requests: core.ResourceList{ + cpu: resource.MustParse("1m"), + }, + Limits: core.ResourceList{ + cpu: resource.MustParse("1m"), + }, + }, + }, + }, + }, + }, + vpa: test.VerticalPodAutoscaler().WithName("name").WithContainer("container1").WithCPUStartupBoost(vpa_types.QuantityStartupBoostType, nil, &quantity, "10s").Get(), + recommendResources: []vpa_api_util.ContainerResources{ + { + Requests: core.ResourceList{ + cpu: resource.MustParse("100m"), + }, + Limits: core.ResourceList{ + cpu: resource.MustParse("100m"), + }, + }, + }, + maxAllowedCpu: resource.QuantityValue{}, + featureGateEnabled: true, + expectPatches: []resource_admission.PatchRecord{ + GetAddAnnotationPatch(annotations.StartupCPUBoostAnnotation, "{\"requests\":{\"cpu\":\"1m\"},\"limits\":{\"cpu\":\"1m\"}}"), + addResourceRequestPatch(0, cpu, "600m"), + addResourceLimitPatch(0, cpu, "600m"), + GetAddAnnotationPatch(ResourceUpdatesAnnotation, "Pod resources updated by name: container 0: cpu request, cpu limit"), + }, + }, + { + name: "feature gate disabled", + pod: &core.Pod{ + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container1", + Resources: core.ResourceRequirements{ + Requests: core.ResourceList{ + cpu: resource.MustParse("100m"), + }, + }, + }, + }, + }, + }, + vpa: test.VerticalPodAutoscaler().WithName("name").WithContainer("container1").WithCPUStartupBoost(vpa_types.FactorStartupBoostType, &factor2, nil, "10s").Get(), + recommendResources: []vpa_api_util.ContainerResources{ + { + Requests: core.ResourceList{ + cpu: resource.MustParse("100m"), + }, + }, + }, + maxAllowedCpu: resource.QuantityValue{}, + featureGateEnabled: false, + expectPatches: []resource_admission.PatchRecord{ + addResourceRequestPatch(0, cpu, "100m"), + addAnnotationRequest([][]string{{cpu}}, "request"), + }, + }, + { + name: "invalid factor", + pod: &core.Pod{ + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container1", + Resources: core.ResourceRequirements{ + Requests: core.ResourceList{ + cpu: resource.MustParse("1m"), + }, + }, + }, + }, + }, + }, + vpa: test.VerticalPodAutoscaler().WithName("name").WithContainer("container1").WithCPUStartupBoost(vpa_types.FactorStartupBoostType, &invalidFactor, nil, "10s").Get(), + recommendResources: []vpa_api_util.ContainerResources{ + { + Requests: core.ResourceList{ + cpu: resource.MustParse("100m"), + }, + }, + }, + maxAllowedCpu: resource.QuantityValue{}, + featureGateEnabled: true, + expectError: fmt.Errorf("boost factor must be >= 1"), + }, + { + name: "quantity less than request", + pod: &core.Pod{ + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container1", + Resources: core.ResourceRequirements{ + Requests: core.ResourceList{ + cpu: resource.MustParse("400m"), + }, + Limits: core.ResourceList{ + cpu: resource.MustParse("400m"), + }, + }, + }, + }, + }, + }, + vpa: test.VerticalPodAutoscaler().WithName("name").WithContainer("container1").WithCPUStartupBoost(vpa_types.QuantityStartupBoostType, nil, &invalidQuantity, "10s").Get(), + recommendResources: []vpa_api_util.ContainerResources{ + { + Requests: core.ResourceList{ + cpu: resource.MustParse("500m"), + }, + Limits: core.ResourceList{ + cpu: resource.MustParse("500m"), + }, + }, + }, + maxAllowedCpu: resource.QuantityValue{}, + featureGateEnabled: true, + expectPatches: []resource_admission.PatchRecord{ + GetAddAnnotationPatch(annotations.StartupCPUBoostAnnotation, "{\"requests\":{\"cpu\":\"400m\"},\"limits\":{\"cpu\":\"400m\"}}"), + addResourceRequestPatch(0, cpu, "700m"), + addResourceLimitPatch(0, cpu, "700m"), + GetAddAnnotationPatch(ResourceUpdatesAnnotation, "Pod resources updated by name: container 0: cpu request, cpu limit"), + }, + }, + { + name: "startup boost capped", + pod: &core.Pod{ + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container1", + Resources: core.ResourceRequirements{ + Requests: core.ResourceList{ + cpu: resource.MustParse("1m"), + }, + Limits: core.ResourceList{ + cpu: resource.MustParse("1m"), + }, + }, + }, + }, + }, + }, + vpa: test.VerticalPodAutoscaler().WithName("name").WithContainer("container1").WithCPUStartupBoost(vpa_types.FactorStartupBoostType, &factor3, nil, "1s").Get(), + recommendResources: []vpa_api_util.ContainerResources{ + { + Requests: core.ResourceList{ + cpu: resource.MustParse("20m"), + }, + Limits: core.ResourceList{ + cpu: resource.MustParse("20m"), + }, + }, + }, + maxAllowedCpu: resource.QuantityValue{Quantity: resource.MustParse("40m")}, + featureGateEnabled: true, + expectPatches: []resource_admission.PatchRecord{ + GetAddAnnotationPatch(annotations.StartupCPUBoostAnnotation, "{\"requests\":{\"cpu\":\"1m\"},\"limits\":{\"cpu\":\"1m\"}}"), + addResourceRequestPatch(0, cpu, "40m"), + addResourceLimitPatch(0, cpu, "40m"), + GetAddAnnotationPatch(ResourceUpdatesAnnotation, "Pod resources updated by name: container 0: cpu request, cpu limit"), + }, + }, + { + name: "startup boost with scaling mode off", + pod: &core.Pod{ + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container1", + Resources: core.ResourceRequirements{ + Requests: core.ResourceList{ + cpu: resource.MustParse("100m"), + }, + }, + }, + }, + }, + }, + vpa: test.VerticalPodAutoscaler().WithName("name").WithContainer("container1").WithCPUStartupBoost(vpa_types.FactorStartupBoostType, &factor2, nil, "10s").WithScalingMode("container1", vpa_types.ContainerScalingModeOff).Get(), + recommendResources: []vpa_api_util.ContainerResources{ + { + Requests: core.ResourceList{ + cpu: resource.MustParse("1"), + }, + }, + }, + maxAllowedCpu: resource.QuantityValue{}, + featureGateEnabled: true, + expectPatches: []resource_admission.PatchRecord{}, + }, + { + name: "startup boost no recommendation", + pod: &core.Pod{ + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container1", + Resources: core.ResourceRequirements{ + Requests: core.ResourceList{ + cpu: resource.MustParse("100m"), + }, + Limits: core.ResourceList{ + cpu: resource.MustParse("100m"), + }, + }, + }, + }, + }, + }, + vpa: test.VerticalPodAutoscaler().WithName("name").WithContainer("container1").WithCPUStartupBoost(vpa_types.FactorStartupBoostType, &factor2, nil, "10s").Get(), + recommendResources: make([]vpa_api_util.ContainerResources, 1), + maxAllowedCpu: resource.QuantityValue{}, + featureGateEnabled: true, + expectPatches: []resource_admission.PatchRecord{ + GetAddAnnotationPatch(annotations.StartupCPUBoostAnnotation, "{\"requests\":{\"cpu\":\"100m\"},\"limits\":{\"cpu\":\"100m\"}}"), + addResourceRequestPatch(0, cpu, "200m"), + addResourceLimitPatch(0, cpu, "200m"), + GetAddAnnotationPatch(ResourceUpdatesAnnotation, "Pod resources updated by name: container 0: cpu request, cpu limit"), + }, + }, + { + name: "startup boost with ControlledValues=RequestsOnly", + pod: &core.Pod{ + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container1", + Resources: core.ResourceRequirements{ + Requests: core.ResourceList{ + cpu: resource.MustParse("100m"), + }, + Limits: core.ResourceList{ + cpu: resource.MustParse("300m"), + }, + }, + }, + }, + }, + }, + vpa: test.VerticalPodAutoscaler().WithName("name").WithContainer("container1").WithCPUStartupBoost(vpa_types.FactorStartupBoostType, &factor2, nil, "10s").WithControlledValues("container1", vpa_types.ContainerControlledValuesRequestsOnly).Get(), + recommendResources: []vpa_api_util.ContainerResources{ + { + Requests: core.ResourceList{ + cpu: resource.MustParse("100m"), + }, + }, + }, + maxAllowedCpu: resource.QuantityValue{}, + featureGateEnabled: true, + expectPatches: []resource_admission.PatchRecord{ + GetAddAnnotationPatch(annotations.StartupCPUBoostAnnotation, "{\"requests\":{\"cpu\":\"100m\"},\"limits\":{\"cpu\":\"300m\"}}"), + addResourceRequestPatch(0, cpu, "200m"), + GetAddAnnotationPatch(ResourceUpdatesAnnotation, "Pod resources updated by name: container 0: cpu request"), + }, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + featuregatetesting.SetFeatureGateDuringTest(t, features.MutableFeatureGate, features.CPUStartupBoost, tc.featureGateEnabled) + + frp := fakeRecommendationProvider{tc.recommendResources, tc.recommendAnnotations, tc.recommendError} + c := NewResourceUpdatesCalculator(&frp, tc.maxAllowedCpu) + patches, err := c.CalculatePatches(tc.pod, tc.vpa) + if tc.expectError == nil { + assert.NoError(t, err) + } else { + if assert.Error(t, err) { + assert.Equal(t, tc.expectError.Error(), err.Error()) + } + } + if assert.Len(t, patches, len(tc.expectPatches), fmt.Sprintf("got %+v, want %+v", patches, tc.expectPatches)) { + for i, gotPatch := range patches { + if !EqPatch(gotPatch, tc.expectPatches[i]) { + t.Errorf("Expected patch at position %d to be %+v, got %+v", i, tc.expectPatches[i], gotPatch) + } + } + } + }) + } +} diff --git a/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/matcher.go b/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/matcher.go index 704749649ad2..5d1e93468d11 100644 --- a/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/matcher.go +++ b/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/matcher.go @@ -69,7 +69,7 @@ func (m *matcher) GetMatchingVPA(ctx context.Context, pod *core.Pod) *vpa_types. var controllingVpa *vpa_types.VerticalPodAutoscaler for _, vpaConfig := range configs { - if vpa_api_util.GetUpdateMode(vpaConfig) == vpa_types.UpdateModeOff { + if vpa_api_util.GetUpdateMode(vpaConfig) == vpa_types.UpdateModeOff && vpaConfig.Spec.StartupBoost == nil { continue } if vpaConfig.Spec.TargetRef == nil { diff --git a/vertical-pod-autoscaler/pkg/utils/annotations/vpa_cpu_boost.go b/vertical-pod-autoscaler/pkg/utils/annotations/vpa_cpu_boost.go new file mode 100644 index 000000000000..2ab6a3dd5b14 --- /dev/null +++ b/vertical-pod-autoscaler/pkg/utils/annotations/vpa_cpu_boost.go @@ -0,0 +1,71 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package annotations + +import ( + "encoding/json" + + core "k8s.io/api/core/v1" +) + +const ( + // StartupCPUBoostAnnotation is the annotation set on a pod when a CPU boost is applied. + // The value of the annotation is the original resource specification of the container. + StartupCPUBoostAnnotation = "startup-cpu-boost" +) + +// OriginalResources contains the original resources of a container. +type OriginalResources struct { + Requests core.ResourceList `json:"requests"` + Limits core.ResourceList `json:"limits"` +} + +// GetOriginalResourcesAnnotationValue returns the annotation value for the original resources. +func GetOriginalResourcesAnnotationValue(container *core.Container) (string, error) { + original := OriginalResources{ + Requests: core.ResourceList{}, + Limits: core.ResourceList{}, + } + if cpu, ok := container.Resources.Requests[core.ResourceCPU]; ok { + original.Requests[core.ResourceCPU] = cpu + } + if mem, ok := container.Resources.Requests[core.ResourceMemory]; ok { + original.Requests[core.ResourceMemory] = mem + } + if cpu, ok := container.Resources.Limits[core.ResourceCPU]; ok { + original.Limits[core.ResourceCPU] = cpu + } + if mem, ok := container.Resources.Limits[core.ResourceMemory]; ok { + original.Limits[core.ResourceMemory] = mem + } + b, err := json.Marshal(original) + return string(b), err +} + +// GetOriginalResourcesFromAnnotation returns the original resources from the annotation. +func GetOriginalResourcesFromAnnotation(pod *core.Pod) (*OriginalResources, error) { + val, ok := pod.Annotations[StartupCPUBoostAnnotation] + if !ok { + return nil, nil + } + var original OriginalResources + err := json.Unmarshal([]byte(val), &original) + if err != nil { + return nil, err + } + return &original, nil +} diff --git a/vertical-pod-autoscaler/pkg/utils/annotations/vpa_cpu_boost_test.go b/vertical-pod-autoscaler/pkg/utils/annotations/vpa_cpu_boost_test.go new file mode 100644 index 000000000000..b18e49cf84a2 --- /dev/null +++ b/vertical-pod-autoscaler/pkg/utils/annotations/vpa_cpu_boost_test.go @@ -0,0 +1,185 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package annotations + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" + core "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestGetOriginalResourcesAnnotationValue(t *testing.T) { + testCases := []struct { + name string + container *core.Container + expected *OriginalResources + expectErr bool + }{ + { + name: "full resources", + container: &core.Container{ + Resources: core.ResourceRequirements{ + Requests: core.ResourceList{ + core.ResourceCPU: resource.MustParse("1"), + core.ResourceMemory: resource.MustParse("1Gi"), + }, + Limits: core.ResourceList{ + core.ResourceCPU: resource.MustParse("2"), + core.ResourceMemory: resource.MustParse("2Gi"), + }, + }, + }, + expected: &OriginalResources{ + Requests: core.ResourceList{ + core.ResourceCPU: resource.MustParse("1"), + core.ResourceMemory: resource.MustParse("1Gi"), + }, + Limits: core.ResourceList{ + core.ResourceCPU: resource.MustParse("2"), + core.ResourceMemory: resource.MustParse("2Gi"), + }, + }, + expectErr: false, + }, + { + name: "only requests", + container: &core.Container{ + Resources: core.ResourceRequirements{ + Requests: core.ResourceList{ + core.ResourceCPU: resource.MustParse("1"), + core.ResourceMemory: resource.MustParse("1Gi"), + }, + }, + }, + expected: &OriginalResources{ + Requests: core.ResourceList{ + core.ResourceCPU: resource.MustParse("1"), + core.ResourceMemory: resource.MustParse("1Gi"), + }, + Limits: core.ResourceList{}, + }, + expectErr: false, + }, + { + name: "no resources", + container: &core.Container{ + Resources: core.ResourceRequirements{}, + }, + expected: &OriginalResources{ + Requests: core.ResourceList{}, + Limits: core.ResourceList{}, + }, + expectErr: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + val, err := GetOriginalResourcesAnnotationValue(tc.container) + if tc.expectErr { + assert.Error(t, err) + return + } + assert.NoError(t, err) + + var got OriginalResources + err = json.Unmarshal([]byte(val), &got) + assert.NoError(t, err) + assert.True(t, tc.expected.Requests.Cpu().Equal(*got.Requests.Cpu()), "CPU requests do not match") + assert.True(t, tc.expected.Requests.Memory().Equal(*got.Requests.Memory()), "Memory requests do not match") + assert.True(t, tc.expected.Limits.Cpu().Equal(*got.Limits.Cpu()), "CPU limits do not match") + assert.True(t, tc.expected.Limits.Memory().Equal(*got.Limits.Memory()), "Memory limits do not match") + }) + } +} + +func TestGetOriginalResourcesFromAnnotation(t *testing.T) { + testCases := []struct { + name string + pod *core.Pod + expected *OriginalResources + expectErr bool + }{ + { + name: "valid annotation", + pod: &core.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + StartupCPUBoostAnnotation: `{"requests":{"cpu":"1","memory":"1Gi"},"limits":{"cpu":"2","memory":"2Gi"}}`, + }, + }, + }, + expected: &OriginalResources{ + Requests: core.ResourceList{ + core.ResourceCPU: resource.MustParse("1"), + core.ResourceMemory: resource.MustParse("1Gi"), + }, + Limits: core.ResourceList{ + core.ResourceCPU: resource.MustParse("2"), + core.ResourceMemory: resource.MustParse("2Gi"), + }, + }, + expectErr: false, + }, + { + name: "no annotation", + pod: &core.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{}, + }, + }, + expected: nil, + expectErr: false, + }, + { + name: "invalid json", + pod: &core.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + StartupCPUBoostAnnotation: "invalid-json", + }, + }, + }, + expected: nil, + expectErr: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + got, err := GetOriginalResourcesFromAnnotation(tc.pod) + if tc.expectErr { + assert.Error(t, err) + return + } + assert.NoError(t, err) + if tc.expected == nil { + assert.Nil(t, got) + } else { + assert.NotNil(t, got) + assert.True(t, tc.expected.Requests.Cpu().Equal(*got.Requests.Cpu()), "CPU requests do not match") + assert.True(t, tc.expected.Requests.Memory().Equal(*got.Requests.Memory()), "Memory requests do not match") + assert.True(t, tc.expected.Limits.Cpu().Equal(*got.Limits.Cpu()), "CPU limits do not match") + assert.True(t, tc.expected.Limits.Memory().Equal(*got.Limits.Memory()), "Memory limits do not match") + } + }) + } +} diff --git a/vertical-pod-autoscaler/pkg/utils/test/test_vpa.go b/vertical-pod-autoscaler/pkg/utils/test/test_vpa.go index 5a32bc0a2069..fbb238dec39e 100644 --- a/vertical-pod-autoscaler/pkg/utils/test/test_vpa.go +++ b/vertical-pod-autoscaler/pkg/utils/test/test_vpa.go @@ -50,6 +50,7 @@ type VerticalPodAutoscalerBuilder interface { WithMinReplicas(minReplicas *int32) VerticalPodAutoscalerBuilder WithOOMBumpUpRatio(ratio *resource.Quantity) VerticalPodAutoscalerBuilder WithOOMMinBumpUp(minBumpUp *resource.Quantity) VerticalPodAutoscalerBuilder + WithCPUStartupBoost(boostType vpa_types.StartupBoostType, factor *int32, quantity *resource.Quantity, duration string) VerticalPodAutoscalerBuilder AppendCondition(conditionType vpa_types.VerticalPodAutoscalerConditionType, status core.ConditionStatus, reason, message string, lastTransitionTime time.Time) VerticalPodAutoscalerBuilder AppendRecommendation(vpa_types.RecommendedContainerResources) VerticalPodAutoscalerBuilder @@ -84,6 +85,7 @@ type verticalPodAutoscalerBuilder struct { maxAllowed map[string]core.ResourceList controlledValues map[string]*vpa_types.ContainerControlledValues scalingMode map[string]*vpa_types.ContainerScalingMode + startupBoost *vpa_types.StartupBoost recommendation RecommendationBuilder conditions []vpa_types.VerticalPodAutoscalerCondition annotations map[string]string @@ -249,6 +251,24 @@ func (b *verticalPodAutoscalerBuilder) AppendRecommendation(recommendation vpa_t return &c } +func (b *verticalPodAutoscalerBuilder) WithCPUStartupBoost(boostType vpa_types.StartupBoostType, factor *int32, quantity *resource.Quantity, duration string) VerticalPodAutoscalerBuilder { + c := *b + parsedDuration, _ := time.ParseDuration(duration) + cpuStartupBoost := &vpa_types.GenericStartupBoost{ + Type: boostType, + Duration: &meta.Duration{Duration: parsedDuration}, + } + if factor != nil { + cpuStartupBoost.Factor = factor + } else { + cpuStartupBoost.Quantity = quantity + } + c.startupBoost = &vpa_types.StartupBoost{ + CPU: cpuStartupBoost, + } + return &c +} + func (b *verticalPodAutoscalerBuilder) Get() *vpa_types.VerticalPodAutoscaler { if len(b.containerNames) == 0 { panic("Must call WithContainer() before Get()") @@ -299,6 +319,7 @@ func (b *verticalPodAutoscalerBuilder) Get() *vpa_types.VerticalPodAutoscaler { ResourcePolicy: &resourcePolicy, TargetRef: b.targetRef, Recommenders: recommenders, + StartupBoost: b.startupBoost, }, Status: vpa_types.VerticalPodAutoscalerStatus{ Recommendation: recommendation, diff --git a/vertical-pod-autoscaler/pkg/utils/vpa/capping.go b/vertical-pod-autoscaler/pkg/utils/vpa/capping.go index 0f2842329c97..6d3a82024668 100644 --- a/vertical-pod-autoscaler/pkg/utils/vpa/capping.go +++ b/vertical-pod-autoscaler/pkg/utils/vpa/capping.go @@ -136,7 +136,7 @@ func getCappedRecommendationForContainer( } // TODO: If limits and policy are conflicting, set some condition on the VPA. if containerControlledValues == vpa_types.ContainerControlledValuesRequestsOnly { - annotations = capRecommendationToContainerLimit(recommendation, containerLimits) + annotations = CapRecommendationToContainerLimit(recommendation, containerLimits) if genAnnotations { cappingAnnotations = append(cappingAnnotations, annotations...) } @@ -150,9 +150,9 @@ func getCappedRecommendationForContainer( return cappedRecommendations, cappingAnnotations, nil } -// capRecommendationToContainerLimit makes sure recommendation is not above current limit for the container. +// CapRecommendationToContainerLimit makes sure recommendation is not above current limit for the container. // If this function makes adjustments appropriate annotations are returned. -func capRecommendationToContainerLimit(recommendation apiv1.ResourceList, containerLimits apiv1.ResourceList) []string { +func CapRecommendationToContainerLimit(recommendation apiv1.ResourceList, containerLimits apiv1.ResourceList) []string { annotations := make([]string, 0) // Iterate over limits set in the container. Unset means Infinite limit. for resourceName, limit := range containerLimits { From 30aebbe05afdb7ea6666199f11134c998f9ac9b9 Mon Sep 17 00:00:00 2001 From: Brendan Palkowski Date: Mon, 6 Oct 2025 21:04:40 -0400 Subject: [PATCH 3/7] Fix VPA startup boost validation error messages Fix VPA startup boost validation error messages --- .../resource/vpa/handler.go | 15 ++++--- .../resource/vpa/handler_test.go | 43 ++++++++++++++++--- 2 files changed, 47 insertions(+), 11 deletions(-) diff --git a/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/handler.go b/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/handler.go index daa555f05efa..60d50ac61b01 100644 --- a/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/handler.go +++ b/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/handler.go @@ -221,24 +221,29 @@ func validateStartupBoost(startupBoost *vpa_types.StartupBoost, isCreate bool) e return nil } boostType := cpuBoost.Type + if boostType == "" { + return fmt.Errorf("startupBoost.cpu.type field is required and must be either %s or %s", + vpa_types.FactorStartupBoostType, vpa_types.QuantityStartupBoostType) + } switch boostType { case vpa_types.FactorStartupBoostType: if cpuBoost.Factor == nil { - return fmt.Errorf("StartupBoost.CPU.Factor is required when Type is Factor") + return fmt.Errorf("startupBoost.cpu.factor is required when type is Factor") } if *cpuBoost.Factor < 1 { - return fmt.Errorf("invalid StartupBoost.CPU.Factor: must be >= 1 for Type Factor") + return fmt.Errorf("invalid startupBoost.cpu.factor: must be >= 1 for type Factor") } case vpa_types.QuantityStartupBoostType: if cpuBoost.Quantity == nil { - return fmt.Errorf("StartupBoost.CPU.Quantity is required when Type is Quantity") + return fmt.Errorf("startupBoost.cpu.quantity is required when type is Quantity") } if err := validateCPUResolution(*cpuBoost.Quantity); err != nil { - return fmt.Errorf("invalid StartupBoost.CPU.Quantity: %v", err) + return fmt.Errorf("invalid startupBoost.cpu.quantity: %v", err) } default: - return fmt.Errorf("unexpected StartupBoost.CPU.Type value %s", boostType) + return fmt.Errorf("startupBoost.cpu.type field is required and must be either %s or %s, got %v", + vpa_types.FactorStartupBoostType, vpa_types.QuantityStartupBoostType, boostType) } return nil } diff --git a/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/handler_test.go b/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/handler_test.go index 9a9c15eee645..e40008422035 100644 --- a/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/handler_test.go +++ b/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/handler_test.go @@ -381,7 +381,7 @@ func TestValidateVPA(t *testing.T) { }, }, isCreate: true, - expectError: fmt.Errorf("invalid startupBoost: invalid StartupBoost.CPU.Factor: must be >= 1 for Type Factor"), + expectError: fmt.Errorf("invalid startupBoost: invalid startupBoost.cpu.factor: must be >= 1 for type Factor"), }, { name: "container startupBoost with bad factor", @@ -403,7 +403,7 @@ func TestValidateVPA(t *testing.T) { }, }, isCreate: true, - expectError: fmt.Errorf("invalid startupBoost in container loot box: invalid StartupBoost.CPU.Factor: must be >= 1 for Type Factor"), + expectError: fmt.Errorf("invalid startupBoost in container loot box: invalid startupBoost.cpu.factor: must be >= 1 for type Factor"), }, { name: "top-level startupBoost with bad quantity", @@ -422,7 +422,7 @@ func TestValidateVPA(t *testing.T) { }, }, isCreate: true, - expectError: fmt.Errorf("invalid startupBoost: invalid StartupBoost.CPU.Quantity: CPU [%v] must be a whole number of milli CPUs", &badCPUBoostQuantity), + expectError: fmt.Errorf("invalid startupBoost: invalid startupBoost.cpu.quantity: CPU [%v] must be a whole number of milli CPUs", &badCPUBoostQuantity), }, { name: "container startupBoost with bad quantity", @@ -448,7 +448,7 @@ func TestValidateVPA(t *testing.T) { }, }, isCreate: true, - expectError: fmt.Errorf("invalid startupBoost in container loot box: invalid StartupBoost.CPU.Quantity: CPU [%v] must be a whole number of milli CPUs", &badCPUBoostQuantity), + expectError: fmt.Errorf("invalid startupBoost in container loot box: invalid startupBoost.cpu.quantity: CPU [%v] must be a whole number of milli CPUs", &badCPUBoostQuantity), }, { name: "top-level startupBoost with bad type", @@ -462,7 +462,7 @@ func TestValidateVPA(t *testing.T) { }, }, isCreate: true, - expectError: fmt.Errorf("invalid startupBoost: unexpected StartupBoost.CPU.Type value bad"), + expectError: fmt.Errorf("invalid startupBoost: startupBoost.cpu.type field is required and must be either %s or %s, got %v", vpa_types.FactorStartupBoostType, vpa_types.QuantityStartupBoostType, badCPUBoostType), }, { name: "container startupBoost with bad type", @@ -483,7 +483,38 @@ func TestValidateVPA(t *testing.T) { }, }, isCreate: true, - expectError: fmt.Errorf("invalid startupBoost in container loot box: unexpected StartupBoost.CPU.Type value bad"), + expectError: fmt.Errorf("invalid startupBoost in container loot box: startupBoost.cpu.type field is required and must be either %s or %s, got %v", vpa_types.FactorStartupBoostType, vpa_types.QuantityStartupBoostType, badCPUBoostType), + }, + { + name: "top-level startupBoost with empty type", + vpa: vpa_types.VerticalPodAutoscaler{ + Spec: vpa_types.VerticalPodAutoscalerSpec{ + StartupBoost: &vpa_types.StartupBoost{ + CPU: &vpa_types.GenericStartupBoost{}, + }, + }, + }, + isCreate: true, + expectError: fmt.Errorf("invalid startupBoost: startupBoost.cpu.type field is required and must be either %s or %s", vpa_types.FactorStartupBoostType, vpa_types.QuantityStartupBoostType), + }, + { + name: "container startupBoost with empty type", + vpa: vpa_types.VerticalPodAutoscaler{ + Spec: vpa_types.VerticalPodAutoscalerSpec{ + ResourcePolicy: &vpa_types.PodResourcePolicy{ + ContainerPolicies: []vpa_types.ContainerResourcePolicy{ + { + ContainerName: "loot box", + StartupBoost: &vpa_types.StartupBoost{ + CPU: &vpa_types.GenericStartupBoost{}, + }, + }, + }, + }, + }, + }, + isCreate: true, + expectError: fmt.Errorf("invalid startupBoost in container loot box: startupBoost.cpu.type field is required and must be either %s or %s", vpa_types.FactorStartupBoostType, vpa_types.QuantityStartupBoostType), }, { name: "top-level startupBoost with valid factor", From b25e43c4785fd60f16ee5862ab69bc6496bbdec4 Mon Sep 17 00:00:00 2001 From: Kam Saiyed Date: Mon, 6 Oct 2025 17:29:02 +0000 Subject: [PATCH 4/7] Make changes to updater to add the unboosting logic --- vertical-pod-autoscaler/go.mod | 4 + vertical-pod-autoscaler/go.sum | 8 + .../resource/pod/patch/util.go | 8 + .../pkg/updater/inplace/resource_updates.go | 24 +- .../inplace/unboost_patch_calculator.go | 49 +++ .../pkg/updater/logic/updater.go | 79 ++++- .../pkg/updater/logic/updater_test.go | 297 +++++++++++++++++- vertical-pod-autoscaler/pkg/updater/main.go | 2 +- .../restriction/pods_inplace_restriction.go | 26 ++ .../pkg/utils/test/test_utils.go | 6 + vertical-pod-autoscaler/pkg/utils/vpa/api.go | 27 ++ .../pkg/utils/vpa/api_test.go | 117 +++++++ 12 files changed, 613 insertions(+), 34 deletions(-) create mode 100644 vertical-pod-autoscaler/pkg/updater/inplace/unboost_patch_calculator.go diff --git a/vertical-pod-autoscaler/go.mod b/vertical-pod-autoscaler/go.mod index a0d3baaf954e..40774e18c6b3 100644 --- a/vertical-pod-autoscaler/go.mod +++ b/vertical-pod-autoscaler/go.mod @@ -18,6 +18,7 @@ require ( k8s.io/code-generator v0.34.0 k8s.io/component-base v0.34.0 k8s.io/klog/v2 v2.130.1 + k8s.io/kubernetes v1.34.0 k8s.io/metrics v0.34.0 k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d ) @@ -68,6 +69,9 @@ require ( gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/apiextensions-apiserver v0.0.0 // indirect + k8s.io/apiserver v0.34.0 // indirect + k8s.io/controller-manager v0.0.0 // indirect k8s.io/gengo/v2 v2.0.0-20250820003526-c297c0c1eb9d // indirect k8s.io/kube-openapi v0.0.0-20250814151709-d7b6acb124c3 // indirect sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect diff --git a/vertical-pod-autoscaler/go.sum b/vertical-pod-autoscaler/go.sum index d348d1ed8903..5035bc5745a5 100644 --- a/vertical-pod-autoscaler/go.sum +++ b/vertical-pod-autoscaler/go.sum @@ -173,20 +173,28 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= k8s.io/api v0.34.0 h1:L+JtP2wDbEYPUeNGbeSa/5GwFtIA662EmT2YSLOkAVE= k8s.io/api v0.34.0/go.mod h1:YzgkIzOOlhl9uwWCZNqpw6RJy9L2FK4dlJeayUoydug= +k8s.io/apiextensions-apiserver v0.34.0 h1:B3hiB32jV7BcyKcMU5fDaDxk882YrJ1KU+ZSkA9Qxoc= +k8s.io/apiextensions-apiserver v0.34.0/go.mod h1:hLI4GxE1BDBy9adJKxUxCEHBGZtGfIg98Q+JmTD7+g0= k8s.io/apimachinery v0.34.1 h1:dTlxFls/eikpJxmAC7MVE8oOeP1zryV7iRyIjB0gky4= k8s.io/apimachinery v0.34.1/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw= +k8s.io/apiserver v0.34.0 h1:Z51fw1iGMqN7uJ1kEaynf2Aec1Y774PqU+FVWCFV3Jg= +k8s.io/apiserver v0.34.0/go.mod h1:52ti5YhxAvewmmpVRqlASvaqxt0gKJxvCeW7ZrwgazQ= k8s.io/client-go v0.34.0 h1:YoWv5r7bsBfb0Hs2jh8SOvFbKzzxyNo0nSb0zC19KZo= k8s.io/client-go v0.34.0/go.mod h1:ozgMnEKXkRjeMvBZdV1AijMHLTh3pbACPvK7zFR+QQY= k8s.io/code-generator v0.34.0 h1:Ze2i1QsvUprIlX3oHiGv09BFQRLCz+StA8qKwwFzees= k8s.io/code-generator v0.34.0/go.mod h1:Py2+4w2HXItL8CGhks8uI/wS3Y93wPKO/9mBQUYNua0= k8s.io/component-base v0.34.0 h1:bS8Ua3zlJzapklsB1dZgjEJuJEeHjj8yTu1gxE2zQX8= k8s.io/component-base v0.34.0/go.mod h1:RSCqUdvIjjrEm81epPcjQ/DS+49fADvGSCkIP3IC6vg= +k8s.io/controller-manager v0.34.0 h1:oCHoqS8dcFp7zDSu7HUvTpakq3isSxil3GprGGlJMsE= +k8s.io/controller-manager v0.34.0/go.mod h1:XFto21U+Mm9BT8r/Jd5E4tHCGtwjKAUFOuDcqaj2VK0= k8s.io/gengo/v2 v2.0.0-20250820003526-c297c0c1eb9d h1:qUrYOinhdAUL0xxhA4gPqogPBaS9nIq2l2kTb6pmeB0= k8s.io/gengo/v2 v2.0.0-20250820003526-c297c0c1eb9d/go.mod h1:EJykeLsmFC60UQbYJezXkEsG2FLrt0GPNkU5iK5GWxU= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= k8s.io/kube-openapi v0.0.0-20250814151709-d7b6acb124c3 h1:liMHz39T5dJO1aOKHLvwaCjDbf07wVh6yaUlTpunnkE= k8s.io/kube-openapi v0.0.0-20250814151709-d7b6acb124c3/go.mod h1:UZ2yyWbFTpuhSbFhv24aGNOdoRdJZgsIObGBUaYVsts= +k8s.io/kubernetes v1.34.0 h1:NvUrwPAVB4W3mSOpJ/RtNGHWWYyUP/xPaX5rUSpzA0w= +k8s.io/kubernetes v1.34.0/go.mod h1:iu+FhII+Oc/1gGWLJcer6wpyih441aNFHl7Pvm8yPto= k8s.io/metrics v0.34.0 h1:nYSfG2+tnL6/MRC2I+sGHjtNEGoEoM/KktgGOoQFwws= k8s.io/metrics v0.34.0/go.mod h1:KCuXmotE0v4AvoARKUP8NC4lUnbK/Du1mluGdor5h4M= k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d h1:wAhiDyZ4Tdtt7e46e9M5ZSAJ/MnPGPs+Ki1gHw4w1R0= diff --git a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/util.go b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/util.go index 0c68ab6cd557..b930be0f1988 100644 --- a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/util.go +++ b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/util.go @@ -43,6 +43,14 @@ func GetAddAnnotationPatch(annotationName, annotationValue string) resource_admi } } +// GetRemoveAnnotationPatch returns a patch to remove an annotation. +func GetRemoveAnnotationPatch(annotationName string) resource_admission.PatchRecord { + return resource_admission.PatchRecord{ + Op: "remove", + Path: fmt.Sprintf("/metadata/annotations/%s", annotationName), + } +} + // GetAddResourceRequirementValuePatch returns a patch record to add resource requirements to a container. func GetAddResourceRequirementValuePatch(i int, kind string, resource core.ResourceName, quantity resource.Quantity) resource_admission.PatchRecord { return resource_admission.PatchRecord{ diff --git a/vertical-pod-autoscaler/pkg/updater/inplace/resource_updates.go b/vertical-pod-autoscaler/pkg/updater/inplace/resource_updates.go index d15d2bb67d73..6f3c4c200eec 100644 --- a/vertical-pod-autoscaler/pkg/updater/inplace/resource_updates.go +++ b/vertical-pod-autoscaler/pkg/updater/inplace/resource_updates.go @@ -25,6 +25,7 @@ import ( "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/recommendation" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/annotations" vpa_api_util "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa" ) @@ -49,9 +50,26 @@ func (*resourcesInplaceUpdatesPatchCalculator) PatchResourceTarget() patch.Patch func (c *resourcesInplaceUpdatesPatchCalculator) CalculatePatches(pod *core.Pod, vpa *vpa_types.VerticalPodAutoscaler) ([]resource_admission.PatchRecord, error) { result := []resource_admission.PatchRecord{} - containersResources, _, err := c.recommendationProvider.GetContainersResourcesForPod(pod, vpa) - if err != nil { - return []resource_admission.PatchRecord{}, fmt.Errorf("failed to calculate resource patch for pod %s/%s: %v", pod.Namespace, pod.Name, err) + var containersResources []vpa_api_util.ContainerResources + if vpa_api_util.GetUpdateMode(vpa) == vpa_types.UpdateModeOff { + // If update mode is "Off", we don't want to apply any recommendations, + // but we still want to unboost. + original, err := annotations.GetOriginalResourcesFromAnnotation(pod) + if err != nil { + return nil, err + } + containersResources = []vpa_api_util.ContainerResources{ + { + Requests: original.Requests, + Limits: original.Limits, + }, + } + } else { + var err error + containersResources, _, err = c.recommendationProvider.GetContainersResourcesForPod(pod, vpa) + if err != nil { + return []resource_admission.PatchRecord{}, fmt.Errorf("failed to calculate resource patch for pod %s/%s: %v", pod.Namespace, pod.Name, err) + } } for i, containerResources := range containersResources { diff --git a/vertical-pod-autoscaler/pkg/updater/inplace/unboost_patch_calculator.go b/vertical-pod-autoscaler/pkg/updater/inplace/unboost_patch_calculator.go new file mode 100644 index 000000000000..fd6045500e80 --- /dev/null +++ b/vertical-pod-autoscaler/pkg/updater/inplace/unboost_patch_calculator.go @@ -0,0 +1,49 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package inplace + +import ( + core "k8s.io/api/core/v1" + + resource_admission "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch" + vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/annotations" + vpa_api_util "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa" +) + +type unboostAnnotationPatchCalculator struct{} + +// NewUnboostAnnotationCalculator returns a calculator for the unboost annotation patch. +func NewUnboostAnnotationCalculator() patch.Calculator { + return &unboostAnnotationPatchCalculator{} +} + +// PatchResourceTarget returns the Pod resource to apply calculator patches. +func (*unboostAnnotationPatchCalculator) PatchResourceTarget() patch.PatchResourceTarget { + return patch.Pod +} + +// CalculatePatches calculates the patch to remove the startup CPU boost annotation if the pod is ready to be unboosted. +func (c *unboostAnnotationPatchCalculator) CalculatePatches(pod *core.Pod, vpa *vpa_types.VerticalPodAutoscaler) ([]resource_admission.PatchRecord, error) { + if vpa_api_util.PodHasCPUBoostInProgress(pod) && vpa_api_util.IsPodReadyAndStartupBoostDurationPassed(pod, vpa) { + return []resource_admission.PatchRecord{ + patch.GetRemoveAnnotationPatch(annotations.StartupCPUBoostAnnotation), + }, nil + } + return []resource_admission.PatchRecord{}, nil +} diff --git a/vertical-pod-autoscaler/pkg/updater/logic/updater.go b/vertical-pod-autoscaler/pkg/updater/logic/updater.go index ffe266d7cbd0..ec4a4091ba48 100644 --- a/vertical-pod-autoscaler/pkg/updater/logic/updater.go +++ b/vertical-pod-autoscaler/pkg/updater/logic/updater.go @@ -171,13 +171,15 @@ func (u *updater) RunOnce(ctx context.Context) { klog.V(3).InfoS("Skipping VPA object in ignored namespace", "vpa", klog.KObj(vpa), "namespace", vpa.Namespace) continue } - // Log deprecation warnings for VPAs using deprecated modes logDeprecationWarnings(vpa) - if vpa_api_util.GetUpdateMode(vpa) != vpa_types.UpdateModeRecreate && - vpa_api_util.GetUpdateMode(vpa) != vpa_types.UpdateModeAuto && vpa_api_util.GetUpdateMode(vpa) != vpa_types.UpdateModeInPlaceOrRecreate { - klog.V(3).InfoS("Skipping VPA object because its mode is not \"InPlaceOrRecreate\", \"Recreate\" or \"Auto\"", "vpa", klog.KObj(vpa)) + updateMode := vpa_api_util.GetUpdateMode(vpa) + if updateMode != vpa_types.UpdateModeRecreate && + updateMode != vpa_types.UpdateModeAuto && + updateMode != vpa_types.UpdateModeInPlaceOrRecreate && + vpa.Spec.StartupBoost == nil { + klog.V(3).InfoS("Skipping VPA object because its mode is not \"InPlaceOrRecreate\", \"Recreate\" or \"Auto\" and it doesn't have startupBoost configured", "vpa", klog.KObj(vpa)) continue } selector, err := u.selectorFetcher.Fetch(ctx, vpa) @@ -242,8 +244,7 @@ func (u *updater) RunOnce(ctx context.Context) { defer vpasWithInPlaceUpdatablePodsCounter.Observe() defer vpasWithInPlaceUpdatedPodsCounter.Observe() - // NOTE: this loop assumes that controlledPods are filtered - // to contain only Pods controlled by a VPA in auto, recreate, or inPlaceOrRecreate mode + cpuStartupBoostEnabled := features.Enabled(features.CPUStartupBoost) for vpa, livePods := range controlledPods { vpaSize := len(livePods) updateMode := vpa_api_util.GetUpdateMode(vpa) @@ -254,31 +255,80 @@ func (u *updater) RunOnce(ctx context.Context) { continue } - evictionLimiter := u.restrictionFactory.NewPodsEvictionRestriction(creatorToSingleGroupStatsMap, podToReplicaCreatorMap) inPlaceLimiter := u.restrictionFactory.NewPodsInPlaceRestriction(creatorToSingleGroupStatsMap, podToReplicaCreatorMap) + podsAvailableForUpdate := make([]*apiv1.Pod, 0) + podsToUnboost := make([]*apiv1.Pod, 0) + withInPlaceUpdated := false - podsForInPlace := make([]*apiv1.Pod, 0) + if cpuStartupBoostEnabled && vpa.Spec.StartupBoost != nil { + // First, handle unboosting for pods that have finished their startup period. + for _, pod := range livePods { + if vpa_api_util.PodHasCPUBoostInProgress(pod) { + if vpa_api_util.IsPodReadyAndStartupBoostDurationPassed(pod, vpa) { + podsToUnboost = append(podsToUnboost, pod) + } + } else { + podsAvailableForUpdate = append(podsAvailableForUpdate, pod) + } + } + + // Perform unboosting + for _, pod := range podsToUnboost { + if inPlaceLimiter.CanUnboost(pod, vpa) { + klog.V(2).InfoS("Unboosting pod", "pod", klog.KObj(pod)) + err = u.inPlaceRateLimiter.Wait(ctx) + if err != nil { + klog.V(0).InfoS("In-place rate limiter wait failed for unboosting", "error", err) + return + } + err := inPlaceLimiter.InPlaceUpdate(pod, vpa, u.eventRecorder) + if err != nil { + klog.V(0).InfoS("Unboosting failed", "error", err, "pod", klog.KObj(pod)) + metrics_updater.RecordFailedInPlaceUpdate(vpaSize, "UnboostError") + } else { + klog.V(2).InfoS("Successfully unboosted pod", "pod", klog.KObj(pod)) + withInPlaceUpdated = true + metrics_updater.AddInPlaceUpdatedPod(vpaSize) + } + } + } + } else { + // CPU Startup Boost is not enabled or configured for this VPA, + // so all live pods are available for potential standard VPA updates. + podsAvailableForUpdate = livePods + } + + if updateMode == vpa_types.UpdateModeOff || updateMode == vpa_types.UpdateModeInitial { + continue + } + + evictionLimiter := u.restrictionFactory.NewPodsEvictionRestriction(creatorToSingleGroupStatsMap, podToReplicaCreatorMap) podsForEviction := make([]*apiv1.Pod, 0) + podsForInPlace := make([]*apiv1.Pod, 0) + withInPlaceUpdatable := false + withEvictable := false if updateMode == vpa_types.UpdateModeInPlaceOrRecreate && inPlaceFeatureEnable { - podsForInPlace = u.getPodsUpdateOrder(filterNonInPlaceUpdatablePods(livePods, inPlaceLimiter), vpa) + podsForInPlace = u.getPodsUpdateOrder(filterNonInPlaceUpdatablePods(podsAvailableForUpdate, inPlaceLimiter), vpa) inPlaceUpdatablePodsCounter.Add(vpaSize, len(podsForInPlace)) + if len(podsForInPlace) > 0 { + withInPlaceUpdatable = true + } } else { // If the feature gate is not enabled but update mode is InPlaceOrRecreate, updater will always fallback to eviction. if updateMode == vpa_types.UpdateModeInPlaceOrRecreate { klog.InfoS("Warning: feature gate is not enabled for this updateMode", "featuregate", features.InPlaceOrRecreate, "updateMode", vpa_types.UpdateModeInPlaceOrRecreate) } - podsForEviction = u.getPodsUpdateOrder(filterNonEvictablePods(livePods, evictionLimiter), vpa) + podsForEviction = u.getPodsUpdateOrder(filterNonEvictablePods(podsAvailableForUpdate, evictionLimiter), vpa) evictablePodsCounter.Add(vpaSize, updateMode, len(podsForEviction)) + if len(podsForEviction) > 0 { + withEvictable = true + } } - withInPlaceUpdatable := false - withInPlaceUpdated := false - withEvictable := false withEvicted := false for _, pod := range podsForInPlace { - withInPlaceUpdatable = true decision := inPlaceLimiter.CanInPlaceUpdate(pod) if decision == utils.InPlaceDeferred { @@ -306,7 +356,6 @@ func (u *updater) RunOnce(ctx context.Context) { } for _, pod := range podsForEviction { - withEvictable = true if !evictionLimiter.CanEvict(pod) { continue } diff --git a/vertical-pod-autoscaler/pkg/updater/logic/updater_test.go b/vertical-pod-autoscaler/pkg/updater/logic/updater_test.go index 65fc196ba8eb..9505ee4752e1 100644 --- a/vertical-pod-autoscaler/pkg/updater/logic/updater_test.go +++ b/vertical-pod-autoscaler/pkg/updater/logic/updater_test.go @@ -62,6 +62,7 @@ func TestRunOnce_Mode(t *testing.T) { expectedInPlacedCount int canEvict bool canInPlaceUpdate utils.InPlaceDecision + isCPUBoostTest bool }{ { name: "with Auto mode", @@ -133,6 +134,50 @@ func TestRunOnce_Mode(t *testing.T) { canEvict: true, canInPlaceUpdate: utils.InPlaceApproved, }, + { + name: "with InPlaceOrRecreate mode and unboost", + updateMode: vpa_types.UpdateModeInPlaceOrRecreate, + shouldInPlaceFail: false, + expectFetchCalls: true, + expectedEvictionCount: 0, + expectedInPlacedCount: 5, + canEvict: true, + canInPlaceUpdate: utils.InPlaceApproved, + isCPUBoostTest: true, + }, + { + name: "with Recreate mode and unboost", + updateMode: vpa_types.UpdateModeRecreate, + shouldInPlaceFail: false, + expectFetchCalls: true, + expectedEvictionCount: 0, + expectedInPlacedCount: 5, + canEvict: true, + canInPlaceUpdate: utils.InPlaceApproved, + isCPUBoostTest: true, + }, + { + name: "with Auto mode and unboost", + updateMode: vpa_types.UpdateModeAuto, + shouldInPlaceFail: false, + expectFetchCalls: true, + expectedEvictionCount: 0, + expectedInPlacedCount: 5, + canEvict: true, + canInPlaceUpdate: utils.InPlaceApproved, + isCPUBoostTest: true, + }, + { + name: "with InPlaceOrRecreate mode and unboost and In-place fails", + updateMode: vpa_types.UpdateModeInPlaceOrRecreate, + shouldInPlaceFail: true, + expectFetchCalls: true, + expectedEvictionCount: 0, + expectedInPlacedCount: 5, + canEvict: true, + canInPlaceUpdate: utils.InPlaceApproved, + isCPUBoostTest: true, + }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { @@ -145,6 +190,7 @@ func TestRunOnce_Mode(t *testing.T) { tc.expectedEvictionCount, tc.expectedInPlacedCount, tc.canInPlaceUpdate, + tc.isCPUBoostTest, ) }) } @@ -184,6 +230,7 @@ func TestRunOnce_Status(t *testing.T) { tc.expectedEvictionCount, tc.expectedInPlacedCount, utils.InPlaceApproved, + false, ) }) } @@ -198,8 +245,10 @@ func testRunOnceBase( expectedEvictionCount int, expectedInPlacedCount int, canInPlaceUpdate utils.InPlaceDecision, + isCPUBoostTest bool, ) { featuregatetesting.SetFeatureGateDuringTest(t, features.MutableFeatureGate, features.InPlaceOrRecreate, true) + featuregatetesting.SetFeatureGateDuringTest(t, features.MutableFeatureGate, features.CPUStartupBoost, true) ctrl := gomock.NewController(t) defer ctrl.Finish() @@ -225,6 +274,18 @@ func testRunOnceBase( eviction := &test.PodsEvictionRestrictionMock{} inplace := &test.PodsInPlaceRestrictionMock{} + vpaObj := test.VerticalPodAutoscaler(). + WithContainer(containerName). + WithTarget("2", "200M"). + WithMinAllowed(containerName, "1", "100M"). + WithMaxAllowed(containerName, "3", "1G"). + WithTargetRef(&v1.CrossVersionObjectReference{ + Kind: rc.Kind, + Name: rc.Name, + APIVersion: rc.APIVersion, + }). + Get() + for i := range pods { pods[i] = test.Pod().WithName("test_"+strconv.Itoa(i)). AddContainer(test.Container().WithName(containerName).WithCPURequest(resource.MustParse("1")).WithMemRequest(resource.MustParse("100M")).Get()). @@ -232,15 +293,30 @@ func testRunOnceBase( Get() pods[i].Labels = labels + if isCPUBoostTest { + pods[i].Annotations = map[string]string{ + "startup-cpu-boost": "", + } + pods[i].Status.Conditions = []apiv1.PodCondition{ + { + Type: apiv1.PodReady, + Status: apiv1.ConditionTrue, + }, + } + } - inplace.On("CanInPlaceUpdate", pods[i]).Return(canInPlaceUpdate) + if !isCPUBoostTest { + inplace.On("CanInPlaceUpdate", pods[i]).Return(canInPlaceUpdate) + eviction.On("CanEvict", pods[i]).Return(true) + } else { + inplace.On("CanUnboost", pods[i], vpaObj).Return(isCPUBoostTest) + } if shouldInPlaceFail { inplace.On("InPlaceUpdate", pods[i], nil).Return(fmt.Errorf("in-place update failed")) } else { inplace.On("InPlaceUpdate", pods[i], nil).Return(nil) } - eviction.On("CanEvict", pods[i]).Return(true) eviction.On("Evict", pods[i], nil).Return(nil) } @@ -252,21 +328,17 @@ func testRunOnceBase( podLister := &test.PodListerMock{} podLister.On("List").Return(pods, nil) - targetRef := &v1.CrossVersionObjectReference{ - Kind: rc.Kind, - Name: rc.Name, - APIVersion: rc.APIVersion, - } - - vpaObj := test.VerticalPodAutoscaler(). - WithContainer(containerName). - WithTarget("2", "200M"). - WithMinAllowed(containerName, "1", "100M"). - WithMaxAllowed(containerName, "3", "1G"). - WithTargetRef(targetRef). - Get() vpaObj.Spec.UpdatePolicy = &vpa_types.PodUpdatePolicy{UpdateMode: &updateMode} + if isCPUBoostTest { + cpuStartupBoost := &vpa_types.GenericStartupBoost{ + Type: vpa_types.FactorStartupBoostType, + Duration: &metav1.Duration{Duration: 1 * time.Minute}, + } + vpaObj.Spec.StartupBoost = &vpa_types.StartupBoost{ + CPU: cpuStartupBoost, + } + } vpaLister.On("List").Return([]*vpa_types.VerticalPodAutoscaler{vpaObj}, nil).Once() mockSelectorFetcher := target_mock.NewMockVpaTargetSelectorFetcher(ctrl) @@ -567,3 +639,198 @@ func TestLogDeprecationWarnings(t *testing.T) { }) } } +func TestRunOnce_AutoUnboostThenEvict(t *testing.T) { + featuregatetesting.SetFeatureGateDuringTest(t, features.MutableFeatureGate, features.InPlaceOrRecreate, true) + + featuregatetesting.SetFeatureGateDuringTest(t, features.MutableFeatureGate, features.CPUStartupBoost, true) + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + replicas := int32(5) + livePods := 5 + labels := map[string]string{"app": "testingApp"} + selector := parseLabelSelector("app = testingApp") + containerName := "container1" + rc := apiv1.ReplicationController{ + TypeMeta: metav1.TypeMeta{ + Kind: "ReplicationController", + APIVersion: "apps/v1", + }, + ObjectMeta: metav1.ObjectMeta{Name: "rc", Namespace: "default"}, + Spec: apiv1.ReplicationControllerSpec{Replicas: &replicas}, + } + pods := make([]*apiv1.Pod, livePods) + vpaObj := test.VerticalPodAutoscaler(). + WithContainer(containerName). + WithTarget("2", "200M"). + WithMinAllowed(containerName, "1", "100M"). + WithMaxAllowed(containerName, "3", "1G"). + WithTargetRef(&v1.CrossVersionObjectReference{Kind: rc.Kind, Name: rc.Name, APIVersion: rc.APIVersion}). + WithCPUStartupBoost(vpa_types.FactorStartupBoostType, nil, nil, "1m"). + Get() + + for i := range pods { + pods[i] = test.Pod().WithName("test_"+strconv.Itoa(i)). + AddContainer(test.Container().WithName(containerName).WithCPURequest(resource.MustParse("1")).WithMemRequest(resource.MustParse("100M")).Get()). + WithCreator(&rc.ObjectMeta, &rc.TypeMeta). + Get() + pods[i].Labels = labels + } + + eviction := &test.PodsEvictionRestrictionMock{} + inplace := &test.PodsInPlaceRestrictionMock{} + factory := &restriction.FakePodsRestrictionFactory{Eviction: eviction, InPlace: inplace} + vpaLister := &test.VerticalPodAutoscalerListerMock{} + podLister := &test.PodListerMock{} + mockSelectorFetcher := target_mock.NewMockVpaTargetSelectorFetcher(ctrl) + + updater := &updater{ + vpaLister: vpaLister, + podLister: podLister, + restrictionFactory: factory, + evictionRateLimiter: rate.NewLimiter(rate.Inf, 0), + inPlaceRateLimiter: rate.NewLimiter(rate.Inf, 0), + evictionAdmission: priority.NewDefaultPodEvictionAdmission(), + recommendationProcessor: &test.FakeRecommendationProcessor{}, + selectorFetcher: mockSelectorFetcher, + controllerFetcher: controllerfetcher.FakeControllerFetcher{}, + useAdmissionControllerStatus: true, + statusValidator: newFakeValidator(true), + priorityProcessor: priority.NewProcessor(), + } + + // Cycle 1: Unboost the cpu + for i := range pods { + pods[i].Annotations = map[string]string{"startup-cpu-boost": ""} + pods[i].Status.Conditions = []apiv1.PodCondition{ + { + Type: apiv1.PodReady, + Status: apiv1.ConditionTrue, + }, + } + inplace.On("CanUnboost", pods[i], vpaObj).Return(true).Once() + inplace.On("InPlaceUpdate", pods[i], nil).Return(nil) + } + vpaLister.On("List").Return([]*vpa_types.VerticalPodAutoscaler{vpaObj}, nil).Once() + podLister.On("List").Return(pods, nil).Once() + mockSelectorFetcher.EXPECT().Fetch(gomock.Eq(vpaObj)).Return(selector, nil) + + updater.RunOnce(context.Background()) + inplace.AssertNumberOfCalls(t, "InPlaceUpdate", 5) + inplace.AssertNumberOfCalls(t, "CanUnboost", 5) + eviction.AssertNumberOfCalls(t, "Evict", 0) + + // Cycle 2: Regular patch which will lead to eviction + for i := range pods { + pods[i].Annotations = nil + inplace.On("CanUnboost", pods[i], vpaObj).Return(false).Once() + eviction.On("CanEvict", pods[i]).Return(true) + eviction.On("Evict", pods[i], nil).Return(nil) + } + vpaLister.On("List").Return([]*vpa_types.VerticalPodAutoscaler{vpaObj}, nil).Once() + podLister.On("List").Return(pods, nil).Once() + mockSelectorFetcher.EXPECT().Fetch(gomock.Eq(vpaObj)).Return(selector, nil) + + updater.RunOnce(context.Background()) + inplace.AssertNumberOfCalls(t, "InPlaceUpdate", 5) // all 5 from previous run only + inplace.AssertNumberOfCalls(t, "CanUnboost", 5) // all 5 from previous run only + eviction.AssertNumberOfCalls(t, "Evict", 5) +} + +func TestRunOnce_AutoUnboostThenInPlace(t *testing.T) { + featuregatetesting.SetFeatureGateDuringTest(t, features.MutableFeatureGate, features.InPlaceOrRecreate, true) + + featuregatetesting.SetFeatureGateDuringTest(t, features.MutableFeatureGate, features.CPUStartupBoost, true) + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + replicas := int32(5) + livePods := 5 + labels := map[string]string{"app": "testingApp"} + selector := parseLabelSelector("app = testingApp") + containerName := "container1" + rc := apiv1.ReplicationController{ + TypeMeta: metav1.TypeMeta{ + Kind: "ReplicationController", + APIVersion: "apps/v1", + }, + ObjectMeta: metav1.ObjectMeta{Name: "rc", Namespace: "default"}, + Spec: apiv1.ReplicationControllerSpec{Replicas: &replicas}, + } + pods := make([]*apiv1.Pod, livePods) + vpaObj := test.VerticalPodAutoscaler(). + WithContainer(containerName). + WithUpdateMode(vpa_types.UpdateModeInPlaceOrRecreate). + WithTarget("2", "200M"). + WithMinAllowed(containerName, "1", "100M"). + WithMaxAllowed(containerName, "3", "1G"). + WithTargetRef(&v1.CrossVersionObjectReference{Kind: rc.Kind, Name: rc.Name, APIVersion: rc.APIVersion}). + WithCPUStartupBoost(vpa_types.FactorStartupBoostType, nil, nil, "1m"). + Get() + + for i := range pods { + pods[i] = test.Pod().WithName("test_"+strconv.Itoa(i)). + AddContainer(test.Container().WithName(containerName).WithCPURequest(resource.MustParse("1")).WithMemRequest(resource.MustParse("100M")).Get()). + WithCreator(&rc.ObjectMeta, &rc.TypeMeta). + Get() + pods[i].Labels = labels + } + + eviction := &test.PodsEvictionRestrictionMock{} + inplace := &test.PodsInPlaceRestrictionMock{} + factory := &restriction.FakePodsRestrictionFactory{Eviction: eviction, InPlace: inplace} + vpaLister := &test.VerticalPodAutoscalerListerMock{} + podLister := &test.PodListerMock{} + mockSelectorFetcher := target_mock.NewMockVpaTargetSelectorFetcher(ctrl) + + updater := &updater{ + vpaLister: vpaLister, + podLister: podLister, + restrictionFactory: factory, + evictionRateLimiter: rate.NewLimiter(rate.Inf, 0), + inPlaceRateLimiter: rate.NewLimiter(rate.Inf, 0), + evictionAdmission: priority.NewDefaultPodEvictionAdmission(), + recommendationProcessor: &test.FakeRecommendationProcessor{}, + selectorFetcher: mockSelectorFetcher, + controllerFetcher: controllerfetcher.FakeControllerFetcher{}, + useAdmissionControllerStatus: true, + statusValidator: newFakeValidator(true), + priorityProcessor: priority.NewProcessor(), + } + + // Cycle 1: Unboost the cpu + for i := range pods { + pods[i].Annotations = map[string]string{"startup-cpu-boost": ""} + pods[i].Status.Conditions = []apiv1.PodCondition{ + { + Type: apiv1.PodReady, + Status: apiv1.ConditionTrue, + }, + } + inplace.On("CanUnboost", pods[i], vpaObj).Return(true).Once() + inplace.On("InPlaceUpdate", pods[i], nil).Return(nil) + } + vpaLister.On("List").Return([]*vpa_types.VerticalPodAutoscaler{vpaObj}, nil).Once() + podLister.On("List").Return(pods, nil).Once() + mockSelectorFetcher.EXPECT().Fetch(gomock.Eq(vpaObj)).Return(selector, nil) + + updater.RunOnce(context.Background()) + inplace.AssertNumberOfCalls(t, "InPlaceUpdate", 5) + inplace.AssertNumberOfCalls(t, "CanUnboost", 5) + eviction.AssertNumberOfCalls(t, "Evict", 0) + + // Cycle 2: Regular patch which will lead to eviction + for i := range pods { + pods[i].Annotations = nil + inplace.On("CanInPlaceUpdate", pods[i]).Return(utils.InPlaceApproved) + inplace.On("InPlaceUpdate", pods[i], nil).Return(nil) + } + vpaLister.On("List").Return([]*vpa_types.VerticalPodAutoscaler{vpaObj}, nil).Once() + podLister.On("List").Return(pods, nil).Once() + mockSelectorFetcher.EXPECT().Fetch(gomock.Eq(vpaObj)).Return(selector, nil) + + updater.RunOnce(context.Background()) + inplace.AssertNumberOfCalls(t, "InPlaceUpdate", 10) + inplace.AssertNumberOfCalls(t, "CanUnboost", 5) // all 5 from previous run only + eviction.AssertNumberOfCalls(t, "Evict", 0) +} diff --git a/vertical-pod-autoscaler/pkg/updater/main.go b/vertical-pod-autoscaler/pkg/updater/main.go index 8394fd54b29c..d120841bac8e 100644 --- a/vertical-pod-autoscaler/pkg/updater/main.go +++ b/vertical-pod-autoscaler/pkg/updater/main.go @@ -206,7 +206,7 @@ func run(healthCheck *metrics.HealthCheck, commonFlag *common.CommonFlags) { recommendationProvider := recommendation.NewProvider(limitRangeCalculator, vpa_api_util.NewCappingRecommendationProcessor(limitRangeCalculator)) - calculators := []patch.Calculator{inplace.NewResourceInPlaceUpdatesCalculator(recommendationProvider), inplace.NewInPlaceUpdatedCalculator()} + calculators := []patch.Calculator{inplace.NewResourceInPlaceUpdatesCalculator(recommendationProvider), inplace.NewInPlaceUpdatedCalculator(), inplace.NewUnboostAnnotationCalculator()} // TODO: use SharedInformerFactory in updater updater, err := updater.NewUpdater( diff --git a/vertical-pod-autoscaler/pkg/updater/restriction/pods_inplace_restriction.go b/vertical-pod-autoscaler/pkg/updater/restriction/pods_inplace_restriction.go index b38b8f306385..35573c0d2ccb 100644 --- a/vertical-pod-autoscaler/pkg/updater/restriction/pods_inplace_restriction.go +++ b/vertical-pod-autoscaler/pkg/updater/restriction/pods_inplace_restriction.go @@ -35,6 +35,7 @@ import ( vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/features" utils "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/updater/utils" + vpa_api_util "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa" ) // TODO: Make these configurable by flags @@ -57,6 +58,8 @@ type PodsInPlaceRestriction interface { InPlaceUpdate(pod *apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler, eventRecorder record.EventRecorder) error // CanInPlaceUpdate checks if pod can be safely updated in-place. If not, it will return a decision to potentially evict the pod. CanInPlaceUpdate(pod *apiv1.Pod) utils.InPlaceDecision + // CanUnboost checks if a pod can be safely unboosted. + CanUnboost(pod *apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler) bool } // PodsInPlaceRestrictionImpl is the implementation of the PodsInPlaceRestriction interface. @@ -98,6 +101,29 @@ func (ip *PodsInPlaceRestrictionImpl) CanInPlaceUpdate(pod *apiv1.Pod) utils.InP return utils.InPlaceDeferred } +// CanUnboost checks if a pod can be safely unboosted. +func (ip *PodsInPlaceRestrictionImpl) CanUnboost(pod *apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler) bool { + if !features.Enabled(features.CPUStartupBoost) { + return false + } + durationPassed := vpa_api_util.IsPodReadyAndStartupBoostDurationPassed(pod, vpa) + hasAnnotation := vpa_api_util.PodHasCPUBoostInProgress(pod) + + klog.V(2).InfoS("Checking if pod can be unboosted", "pod", klog.KObj(pod), "durationPassed", durationPassed, "hasAnnotation", hasAnnotation) + + if !durationPassed || !hasAnnotation { + return false + } + cr, present := ip.podToReplicaCreatorMap[getPodID(pod)] + if present { + singleGroupStats, present := ip.creatorToSingleGroupStatsMap[cr] + if present { + return singleGroupStats.isPodDisruptable() + } + } + return false +} + // InPlaceUpdate sends calculates patches and sends resize request to api client. Returns error if pod cannot be in-place updated or if client returned error. // Does not check if pod was actually in-place updated after grace period. func (ip *PodsInPlaceRestrictionImpl) InPlaceUpdate(podToUpdate *apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler, eventRecorder record.EventRecorder) error { diff --git a/vertical-pod-autoscaler/pkg/utils/test/test_utils.go b/vertical-pod-autoscaler/pkg/utils/test/test_utils.go index 8ae360177f31..202985b0bb62 100644 --- a/vertical-pod-autoscaler/pkg/utils/test/test_utils.go +++ b/vertical-pod-autoscaler/pkg/utils/test/test_utils.go @@ -139,6 +139,12 @@ func (m *PodsInPlaceRestrictionMock) CanInPlaceUpdate(pod *apiv1.Pod) utils.InPl return args.Get(0).(utils.InPlaceDecision) } +// CanUnboost is a mock implementation of PodsInPlaceRestriction.CanUnboost +func (m *PodsInPlaceRestrictionMock) CanUnboost(pod *apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler) bool { + args := m.Called(pod, vpa) + return args.Bool(0) +} + // PodListerMock is a mock of PodLister type PodListerMock struct { mock.Mock diff --git a/vertical-pod-autoscaler/pkg/utils/vpa/api.go b/vertical-pod-autoscaler/pkg/utils/vpa/api.go index b30f3fc6039d..ff41c3b3c88b 100644 --- a/vertical-pod-autoscaler/pkg/utils/vpa/api.go +++ b/vertical-pod-autoscaler/pkg/utils/vpa/api.go @@ -32,12 +32,14 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/cache" "k8s.io/klog/v2" + podutil "k8s.io/kubernetes/pkg/api/v1/pod" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" vpa_clientset "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/client/clientset/versioned" vpa_api "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/client/clientset/versioned/typed/autoscaling.k8s.io/v1" vpa_lister "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/client/listers/autoscaling.k8s.io/v1" controllerfetcher "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/target/controller_fetcher" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/annotations" ) // VpaWithSelector is a pair of VPA and its selector. @@ -291,3 +293,28 @@ func CreateOrUpdateVpaCheckpoint(vpaCheckpointClient vpa_api.VerticalPodAutoscal } return nil } + +// IsPodReadyAndStartupBoostDurationPassed returns true if the pod is ready and the startup boost duration has passed. +func IsPodReadyAndStartupBoostDurationPassed(pod *core.Pod, vpa *vpa_types.VerticalPodAutoscaler) bool { + if vpa.Spec.StartupBoost == nil || vpa.Spec.StartupBoost.CPU.Duration == nil || vpa.Spec.StartupBoost.CPU.Duration.Duration == 0 { + return true + } + if !podutil.IsPodReady(pod) { + return false + } + for _, cond := range pod.Status.Conditions { + if cond.Type == core.PodReady { + return time.Since(cond.LastTransitionTime.Time) > vpa.Spec.StartupBoost.CPU.Duration.Duration + } + } + return false +} + +// PodHasCPUBoostInProgress returns true if the pod has the CPU boost annotation. +func PodHasCPUBoostInProgress(pod *core.Pod) bool { + if pod.Annotations == nil { + return false + } + _, found := pod.Annotations[annotations.StartupCPUBoostAnnotation] + return found +} diff --git a/vertical-pod-autoscaler/pkg/utils/vpa/api_test.go b/vertical-pod-autoscaler/pkg/utils/vpa/api_test.go index 1f1c712f9e95..5c14a0e0529b 100644 --- a/vertical-pod-autoscaler/pkg/utils/vpa/api_test.go +++ b/vertical-pod-autoscaler/pkg/utils/vpa/api_test.go @@ -399,3 +399,120 @@ func TestFindParentControllerForPod(t *testing.T) { }) } } + +func TestIsPodReadyAndStartupBoostDurationPassed(t *testing.T) { + now := meta.Now() + past := meta.Time{Time: now.Add(-2 * time.Minute)} + testCases := []struct { + name string + pod *core.Pod + vpa *vpa_types.VerticalPodAutoscaler + expected bool + }{ + { + name: "No StartupBoost config", + pod: &core.Pod{}, + vpa: &vpa_types.VerticalPodAutoscaler{}, + expected: true, + }, + { + name: "No duration in StartupBoost", + pod: &core.Pod{}, + vpa: test.VerticalPodAutoscaler().WithContainer(containerName).WithCPUStartupBoost(vpa_types.FactorStartupBoostType, nil, nil, "").Get(), + expected: true, + }, + { + name: "Pod not ready", + pod: &core.Pod{ + Status: core.PodStatus{ + Conditions: []core.PodCondition{ + { + Type: core.PodReady, + Status: core.ConditionFalse, + }, + }, + }, + }, + vpa: test.VerticalPodAutoscaler().WithContainer(containerName).WithCPUStartupBoost(vpa_types.FactorStartupBoostType, nil, nil, "1m").Get(), + expected: false, + }, + { + name: "Duration passed", + pod: &core.Pod{ + Status: core.PodStatus{ + Conditions: []core.PodCondition{ + { + Type: core.PodReady, + Status: core.ConditionTrue, + LastTransitionTime: past, + }, + }, + }, + }, + vpa: test.VerticalPodAutoscaler().WithContainer(containerName).WithCPUStartupBoost(vpa_types.FactorStartupBoostType, nil, nil, "1m").Get(), + expected: true, + }, + { + name: "Duration not passed", + pod: &core.Pod{ + Status: core.PodStatus{ + Conditions: []core.PodCondition{ + { + Type: core.PodReady, + Status: core.ConditionTrue, + LastTransitionTime: now, + }, + }, + }, + }, + vpa: test.VerticalPodAutoscaler().WithContainer(containerName).WithCPUStartupBoost(vpa_types.FactorStartupBoostType, nil, nil, "1m").Get(), + expected: false, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expected, IsPodReadyAndStartupBoostDurationPassed(tc.pod, tc.vpa)) + }) + } +} + +func TestPodHasCPUBoostInProgress(t *testing.T) { + testCases := []struct { + name string + pod *core.Pod + expected bool + }{ + { + name: "No annotations", + pod: &core.Pod{}, + expected: false, + }, + { + name: "Annotation present", + pod: &core.Pod{ + ObjectMeta: meta.ObjectMeta{ + Annotations: map[string]string{ + "startup-cpu-boost": "", + }, + }, + }, + expected: true, + }, + { + name: "Annotation not present", + pod: &core.Pod{ + ObjectMeta: meta.ObjectMeta{ + Annotations: map[string]string{ + "another-annotation": "true", + }, + }, + }, + expected: false, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expected, PodHasCPUBoostInProgress(tc.pod)) + }) + } +} From 351296118ed7cd6f8c856e0d357b9e063550f356 Mon Sep 17 00:00:00 2001 From: Kam Saiyed Date: Tue, 11 Nov 2025 16:23:56 +0000 Subject: [PATCH 5/7] Fix test failure after rebase --- vertical-pod-autoscaler/e2e/go.mod | 2 +- vertical-pod-autoscaler/pkg/updater/logic/updater.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/vertical-pod-autoscaler/e2e/go.mod b/vertical-pod-autoscaler/e2e/go.mod index e99e68d3ea8d..eca5f9b99a73 100644 --- a/vertical-pod-autoscaler/e2e/go.mod +++ b/vertical-pod-autoscaler/e2e/go.mod @@ -12,6 +12,7 @@ require ( github.com/onsi/gomega v1.38.2 k8s.io/api v0.34.0 k8s.io/apimachinery v0.34.0 + k8s.io/apiserver v0.34.0 k8s.io/autoscaler/vertical-pod-autoscaler v1.4.1 k8s.io/client-go v0.34.0 k8s.io/component-base v0.34.0 @@ -140,7 +141,6 @@ require ( gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/apiextensions-apiserver v0.34.0 // indirect - k8s.io/apiserver v0.34.0 // indirect k8s.io/cloud-provider v0.34.0 // indirect k8s.io/component-helpers v0.34.0 // indirect k8s.io/controller-manager v0.34.0 // indirect diff --git a/vertical-pod-autoscaler/pkg/updater/logic/updater.go b/vertical-pod-autoscaler/pkg/updater/logic/updater.go index ec4a4091ba48..362ef6256bb2 100644 --- a/vertical-pod-autoscaler/pkg/updater/logic/updater.go +++ b/vertical-pod-autoscaler/pkg/updater/logic/updater.go @@ -284,11 +284,11 @@ func (u *updater) RunOnce(ctx context.Context) { err := inPlaceLimiter.InPlaceUpdate(pod, vpa, u.eventRecorder) if err != nil { klog.V(0).InfoS("Unboosting failed", "error", err, "pod", klog.KObj(pod)) - metrics_updater.RecordFailedInPlaceUpdate(vpaSize, "UnboostError") + metrics_updater.RecordFailedInPlaceUpdate(vpaSize, vpa.Name, vpa.Namespace, "UnboostError") } else { klog.V(2).InfoS("Successfully unboosted pod", "pod", klog.KObj(pod)) withInPlaceUpdated = true - metrics_updater.AddInPlaceUpdatedPod(vpaSize) + metrics_updater.AddInPlaceUpdatedPod(vpaSize, vpa.Name, vpa.Namespace) } } } From 202579ba87c7704ef07287a00ce301fef9858b12 Mon Sep 17 00:00:00 2001 From: Kam Saiyed Date: Tue, 21 Oct 2025 19:15:22 +0000 Subject: [PATCH 6/7] Add e2e tests for CPU startup boost --- .../e2e/v1/admission_controller.go | 116 ++++++++++++++++++ vertical-pod-autoscaler/e2e/v1/common.go | 26 +++- vertical-pod-autoscaler/e2e/v1/full_vpa.go | 115 +++++++++++++++++ vertical-pod-autoscaler/e2e/v1/updater.go | 81 ++++++++++++ 4 files changed, 333 insertions(+), 5 deletions(-) diff --git a/vertical-pod-autoscaler/e2e/v1/admission_controller.go b/vertical-pod-autoscaler/e2e/v1/admission_controller.go index 2fa12fdc4a6b..80ab1d9195f0 100644 --- a/vertical-pod-autoscaler/e2e/v1/admission_controller.go +++ b/vertical-pod-autoscaler/e2e/v1/admission_controller.go @@ -1098,6 +1098,122 @@ var _ = AdmissionControllerE2eDescribe("Admission-controller", func() { }) }) +var _ = AdmissionControllerE2eDescribe("Admission-controller", func() { + f := framework.NewDefaultFramework("vertical-pod-autoscaling") + f.NamespacePodSecurityEnforceLevel = podsecurity.LevelBaseline + + ginkgo.BeforeEach(func() { + waitForVpaWebhookRegistration(f) + }) + + f.It("boosts CPU by factor on pod creation", framework.WithFeatureGate(features.CPUStartupBoost), func() { + initialCPU := ParseQuantityOrDie("100m") + expectedCPU := ParseQuantityOrDie("200m") + d := NewHamsterDeploymentWithResources(f, initialCPU, ParseQuantityOrDie("100Mi")) + + ginkgo.By("Setting up a VPA with a startup boost policy (factor)") + containerName := utils.GetHamsterContainerNameByIndex(0) + factor := int32(2) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(utils.HamsterTargetRef). + WithContainer(containerName). + WithCPUStartupBoost(vpa_types.FactorStartupBoostType, &factor, nil, "15s"). + AppendRecommendation( + test.Recommendation(). + WithContainer(containerName). + WithTarget("100m", "100Mi"). + GetContainerResources(), + ). + Get() + utils.InstallVPA(f, vpaCRD) + + ginkgo.By("Starting the deployment and verifying the pod is boosted") + podList := utils.StartDeploymentPods(f, d) + pod := podList.Items[0] + gomega.Expect(pod.Spec.Containers[0].Resources.Requests.Cpu().Cmp(expectedCPU)).To(gomega.Equal(0)) + gomega.Expect(pod.Spec.Containers[0].Resources.Limits.Cpu().Cmp(expectedCPU)).To(gomega.Equal(0)) + }) + + f.It("boosts CPU by quantity on pod creation", framework.WithFeatureGate(features.CPUStartupBoost), func() { + initialCPU := ParseQuantityOrDie("100m") + boostCPUQuantity := ParseQuantityOrDie("500m") + expectedCPU := ParseQuantityOrDie("600m") + d := NewHamsterDeploymentWithResources(f, initialCPU, ParseQuantityOrDie("100Mi")) + + ginkgo.By("Setting up a VPA with a startup boost policy (quantity)") + containerName := utils.GetHamsterContainerNameByIndex(0) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(utils.HamsterTargetRef). + WithContainer(containerName). + WithCPUStartupBoost(vpa_types.QuantityStartupBoostType, nil, &boostCPUQuantity, "15s"). + AppendRecommendation( + test.Recommendation(). + WithContainer(containerName). + WithTarget("100m", "100Mi"). + GetContainerResources(), + ). + Get() + utils.InstallVPA(f, vpaCRD) + + ginkgo.By("Starting the deployment and verifying the pod is boosted") + podList := utils.StartDeploymentPods(f, d) + pod := podList.Items[0] + gomega.Expect(pod.Spec.Containers[0].Resources.Requests.Cpu().Cmp(expectedCPU)).To(gomega.Equal(0)) + gomega.Expect(pod.Spec.Containers[0].Resources.Limits.Cpu().Cmp(expectedCPU)).To(gomega.Equal(0)) + }) + + f.It("boosts CPU on pod creation when VPA update mode is Off", framework.WithFeatureGate(features.CPUStartupBoost), func() { + initialCPU := ParseQuantityOrDie("100m") + expectedCPU := ParseQuantityOrDie("200m") + d := NewHamsterDeploymentWithResources(f, initialCPU, ParseQuantityOrDie("100Mi")) + + ginkgo.By("Setting up a VPA with updateMode Off and a startup boost policy") + containerName := utils.GetHamsterContainerNameByIndex(0) + factor := int32(2) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(utils.HamsterTargetRef). + WithContainer(containerName). + WithUpdateMode(vpa_types.UpdateModeOff). // VPA is off, but boost should still work + WithCPUStartupBoost(vpa_types.FactorStartupBoostType, &factor, nil, "15s"). + Get() + utils.InstallVPA(f, vpaCRD) + + ginkgo.By("Starting the deployment and verifying the pod is boosted") + podList := utils.StartDeploymentPods(f, d) + pod := podList.Items[0] + gomega.Expect(pod.Spec.Containers[0].Resources.Requests.Cpu().Cmp(expectedCPU)).To(gomega.Equal(0)) + }) + + f.It("doesn't boost CPU on pod creation when scaling mode is Off", framework.WithFeatureGate(features.CPUStartupBoost), func() { + initialCPU := ParseQuantityOrDie("100m") + d := NewHamsterDeploymentWithResources(f, initialCPU, ParseQuantityOrDie("100Mi")) + + ginkgo.By("Setting up a VPA with a startup boost policy and scaling mode Off") + containerName := utils.GetHamsterContainerNameByIndex(0) + factor := int32(2) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(utils.HamsterTargetRef). + WithContainer(containerName). + WithCPUStartupBoost(vpa_types.FactorStartupBoostType, &factor, nil, "15s"). + WithScalingMode(containerName, vpa_types.ContainerScalingModeOff). + Get() + utils.InstallVPA(f, vpaCRD) + + ginkgo.By("Starting the deployment and verifying the pod is NOT boosted") + podList := utils.StartDeploymentPods(f, d) + pod := podList.Items[0] + gomega.Expect(pod.Spec.Containers[0].Resources.Requests.Cpu().Cmp(initialCPU)).To(gomega.Equal(0)) + }) +}) + func waitForVpaWebhookRegistration(f *framework.Framework) { ginkgo.By("Waiting for VPA webhook registration") gomega.Eventually(func() bool { diff --git a/vertical-pod-autoscaler/e2e/v1/common.go b/vertical-pod-autoscaler/e2e/v1/common.go index 17517ff15918..10d432558ee7 100644 --- a/vertical-pod-autoscaler/e2e/v1/common.go +++ b/vertical-pod-autoscaler/e2e/v1/common.go @@ -244,14 +244,30 @@ func InstallRawVPA(f *framework.Framework, obj interface{}) error { // AnnotatePod adds annotation for an existing pod. func AnnotatePod(f *framework.Framework, podName, annotationName, annotationValue string) { - bytes, err := json.Marshal([]utils.PatchRecord{{ + pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(context.TODO(), podName, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred(), "Failed to get pod.") + + patches := []utils.PatchRecord{} + if pod.Annotations == nil { + patches = append(patches, utils.PatchRecord{ + Op: "add", + Path: "/metadata/annotations", + Value: make(map[string]string), + }) + } + + patches = append(patches, utils.PatchRecord{ Op: "add", - Path: fmt.Sprintf("/metadata/annotations/%v", annotationName), + Path: fmt.Sprintf("/metadata/annotations/%s", annotationName), Value: annotationValue, - }}) - pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Patch(context.TODO(), podName, types.JSONPatchType, bytes, metav1.PatchOptions{}) + }) + + bytes, err := json.Marshal(patches) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + patchedPod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Patch(context.TODO(), podName, types.JSONPatchType, bytes, metav1.PatchOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred(), "Failed to patch pod.") - gomega.Expect(pod.Annotations[annotationName]).To(gomega.Equal(annotationValue)) + gomega.Expect(patchedPod.Annotations[annotationName]).To(gomega.Equal(annotationValue)) } // ParseQuantityOrDie parses quantity from string and dies with an error if diff --git a/vertical-pod-autoscaler/e2e/v1/full_vpa.go b/vertical-pod-autoscaler/e2e/v1/full_vpa.go index 480031ae83b6..dc0c5285db9a 100644 --- a/vertical-pod-autoscaler/e2e/v1/full_vpa.go +++ b/vertical-pod-autoscaler/e2e/v1/full_vpa.go @@ -356,6 +356,121 @@ var _ = FullVpaE2eDescribe("Pods under VPA with non-recognized recommender expli }) }) +var _ = FullVpaE2eDescribe("Pods under VPA with CPUStartupBoost", func() { + var ( + rc *ResourceConsumer + ) + replicas := 3 + + ginkgo.AfterEach(func() { + rc.CleanUp() + }) + + f := framework.NewDefaultFramework("vertical-pod-autoscaling") + f.NamespacePodSecurityEnforceLevel = podsecurity.LevelBaseline + + ginkgo.Describe("have CPU startup boost recommendation applied", func() { + ginkgo.BeforeEach(func() { + waitForVpaWebhookRegistration(f) + }) + + f.It("to all containers of a pod", framework.WithFeatureGate(features.CPUStartupBoost), func() { + ns := f.Namespace.Name + ginkgo.By("Setting up a VPA CRD with CPUStartupBoost") + targetRef := &autoscaling.CrossVersionObjectReference{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: "hamster", + } + + containerName := utils.GetHamsterContainerNameByIndex(0) + factor := int32(100) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(targetRef). + WithUpdateMode(vpa_types.UpdateModeInPlaceOrRecreate). + WithContainer(containerName). + WithCPUStartupBoost(vpa_types.FactorStartupBoostType, &factor, nil, "10s"). + Get() + utils.InstallVPA(f, vpaCRD) + + ginkgo.By("Setting up a hamster deployment") + rc = NewDynamicResourceConsumer("hamster", ns, KindDeployment, + replicas, + 1, /*initCPUTotal*/ + 10, /*initMemoryTotal*/ + 1, /*initCustomMetric*/ + initialCPU, /*cpuRequest*/ + initialMemory, /*memRequest*/ + f.ClientSet, + f.ScalesGetter) + + // Pods should be created with boosted CPU (10m * 100 = 1000m) + err := waitForResourceRequestInRangeInPods( + f, utils.PollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceCPU, + ParseQuantityOrDie("900m"), ParseQuantityOrDie("1100m")) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Pods should be scaled back down in-place after they become Ready and + // StartupBoost.CPU.Duration has elapsed + err = waitForResourceRequestInRangeInPods( + f, utils.PollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceCPU, + ParseQuantityOrDie(minimalCPULowerBound), ParseQuantityOrDie(minimalCPUUpperBound)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + f.It("to a subset of containers in a pod", framework.WithFeatureGate(features.CPUStartupBoost), func() { + ns := f.Namespace.Name + + ginkgo.By("Setting up a VPA CRD with CPUStartupBoost") + targetRef := &autoscaling.CrossVersionObjectReference{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: "hamster", + } + + containerName := utils.GetHamsterContainerNameByIndex(0) + factor := int32(100) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(targetRef). + WithUpdateMode(vpa_types.UpdateModeInPlaceOrRecreate). + WithContainer(containerName). + WithCPUStartupBoost(vpa_types.FactorStartupBoostType, &factor, nil, "10s"). + Get() + + utils.InstallVPA(f, vpaCRD) + + ginkgo.By("Setting up a hamster deployment") + rc = NewDynamicResourceConsumer("hamster", ns, KindDeployment, + replicas, + 1, /*initCPUTotal*/ + 10, /*initMemoryTotal*/ + 1, /*initCustomMetric*/ + initialCPU, /*cpuRequest*/ + initialMemory, /*memRequest*/ + f.ClientSet, + f.ScalesGetter) + + // Pods should be created with boosted CPU (10m * 100 = 1000m) + err := waitForResourceRequestInRangeInPods( + f, utils.PollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceCPU, + ParseQuantityOrDie("900m"), ParseQuantityOrDie("1100m")) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Pods should be scaled back down in-place after they become Ready and + // StartupBoost.CPU.Duration has elapsed + err = waitForResourceRequestInRangeInPods( + f, utils.PollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceCPU, + ParseQuantityOrDie(minimalCPULowerBound), ParseQuantityOrDie(minimalCPUUpperBound)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + +}) + var _ = FullVpaE2eDescribe("OOMing pods under VPA", func() { const replicas = 3 diff --git a/vertical-pod-autoscaler/e2e/v1/updater.go b/vertical-pod-autoscaler/e2e/v1/updater.go index 19327021c8e0..18af0e8d08b2 100644 --- a/vertical-pod-autoscaler/e2e/v1/updater.go +++ b/vertical-pod-autoscaler/e2e/v1/updater.go @@ -26,6 +26,8 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/autoscaler/vertical-pod-autoscaler/e2e/utils" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/features" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/annotations" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/status" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/test" "k8s.io/kubernetes/test/e2e/framework" @@ -207,6 +209,85 @@ var _ = UpdaterE2eDescribe("Updater", func() { }) }) +var _ = UpdaterE2eDescribe("Updater", func() { + f := framework.NewDefaultFramework("vertical-pod-autoscaling") + f.NamespacePodSecurityEnforceLevel = podsecurity.LevelBaseline + + f.It("Unboost pods when they become Ready", framework.WithFeatureGate(features.CPUStartupBoost), func() { + const statusUpdateInterval = 10 * time.Second + + ginkgo.By("Setting up the Admission Controller status") + stopCh := make(chan struct{}) + statusUpdater := status.NewUpdater( + f.ClientSet, + status.AdmissionControllerStatusName, + status.AdmissionControllerStatusNamespace, + statusUpdateInterval, + "e2e test", + ) + defer func() { + // Schedule a cleanup of the Admission Controller status. + // Status is created outside the test namespace. + ginkgo.By("Deleting the Admission Controller status") + close(stopCh) + err := f.ClientSet.CoordinationV1().Leases(status.AdmissionControllerStatusNamespace). + Delete(context.TODO(), status.AdmissionControllerStatusName, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }() + statusUpdater.Run(stopCh) + + podList := setupPodsForCPUBoost(f, "100m", "100Mi") + initialPods := podList.DeepCopy() + + ginkgo.By("Waiting for pods to be in-place updated") + err := WaitForPodsUpdatedWithoutEviction(f, initialPods) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + +}) + +func setupPodsForCPUBoost(f *framework.Framework, hamsterCPU, hamsterMemory string) *apiv1.PodList { + controller := &autoscaling.CrossVersionObjectReference{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: "hamster-deployment", + } + ginkgo.By(fmt.Sprintf("Setting up a hamster %v", controller.Kind)) + // Create pods with boosted CPU, which is 2x the target recommendation + boostedCPU := "200m" + setupHamsterController(f, controller.Kind, boostedCPU, hamsterMemory, utils.DefaultHamsterReplicas) + podList, err := GetHamsterPods(f) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Setting up a VPA CRD") + containerName := utils.GetHamsterContainerNameByIndex(0) + factor := int32(2) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(controller). + WithUpdateMode(vpa_types.UpdateModeAuto). + WithContainer(containerName). + WithCPUStartupBoost(vpa_types.FactorStartupBoostType, &factor, nil, "1s"). + AppendRecommendation( + test.Recommendation(). + WithContainer(containerName). + WithTarget(hamsterCPU, hamsterMemory). + GetContainerResources(), + ). + Get() + + utils.InstallVPA(f, vpaCRD) + + ginkgo.By("Annotating pods with boost annotation") + for _, pod := range podList.Items { + original, err := annotations.GetOriginalResourcesAnnotationValue(&pod.Spec.Containers[0]) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + AnnotatePod(f, pod.Name, annotations.StartupCPUBoostAnnotation, original) + } + return podList +} + func setupPodsForUpscalingEviction(f *framework.Framework) *apiv1.PodList { return setupPodsForEviction(f, "100m", "100Mi", nil) } From e8b925e6530fb199e872927b0d1ddf974e4ced3c Mon Sep 17 00:00:00 2001 From: Kam Saiyed Date: Fri, 14 Nov 2025 16:09:53 +0000 Subject: [PATCH 7/7] Update VPA version for startupboost feature --- vertical-pod-autoscaler/docs/features.md | 16 +++++++++------- vertical-pod-autoscaler/pkg/features/features.go | 2 +- .../pkg/features/versioned_features.go | 2 +- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/vertical-pod-autoscaler/docs/features.md b/vertical-pod-autoscaler/docs/features.md index d0759b479885..8c3d5b26c5a1 100644 --- a/vertical-pod-autoscaler/docs/features.md +++ b/vertical-pod-autoscaler/docs/features.md @@ -158,7 +158,7 @@ VPA provides metrics to track in-place update operations: ## CPU Startup Boost > [!WARNING] -> FEATURE STATE: VPA v1.5.0 [alpha] +> FEATURE STATE: VPA v1.6.0 [alpha] The CPU Startup Boost feature allows VPA to temporarily increase CPU requests and limits for containers during pod startup. This can help workloads that have high CPU demands during their initialization phase, such as Java applications, to start faster. Once the pod is considered `Ready` and an optional duration has passed, VPA scales the CPU resources back down to their normal levels using an in-place resize. @@ -184,7 +184,8 @@ spec: updateMode: "Recreate" startupBoost: cpu: - value: "3" + type: "Factor" + factor: 3 duration: 10s ``` @@ -197,7 +198,7 @@ spec: ### Requirements * Kubernetes 1.33+ with the `InPlacePodVerticalScaling` feature gate enabled. -* VPA version 1.5.0+ with the `CPUStartupBoost` feature gate enabled. +* VPA version 1.6.0+ with the `CPUStartupBoost` feature gate enabled. ### Configuration @@ -207,7 +208,8 @@ Enable the feature by setting the `CPUStartupBoost` feature gate in the VPA admi --feature-gates=CPUStartupBoost=true ``` -The `startupBoost` field has the following sub-fields: -* `cpu.type`: The type of boost. Can be `Factor` (default) to multiply the CPU, or `Quantity` to set a specific CPU value. -* `cpu.value`: The magnitude of the boost. A multiplier (e.g., "2") for `Factor` type, or a resource quantity (e.g., "500m") for `Quantity` type. -* `cpu.duration`: (Optional) How long to keep the boost active *after* the pod becomes `Ready`. Defaults to `0s`. +The `startupBoost` field contains a `cpu` field with the following sub-fields: +* `type`: (Required) The type of boost. Can be `Factor` to multiply the CPU, or `Quantity` to add a specific CPU value. +* `factor`: (Optional) The multiplier to apply if `type` is `Factor` (e.g., 2 for 2x CPU). Required if `type` is `Factor`. +* `quantity`: (Optional) The amount of CPU to add if `type` is `Quantity` (e.g., "500m"). Required if `type` is `Quantity`. +* `duration`: (Optional) How long to keep the boost active *after* the pod becomes `Ready`. Defaults to `0s`. diff --git a/vertical-pod-autoscaler/pkg/features/features.go b/vertical-pod-autoscaler/pkg/features/features.go index b20ae01482be..7ebafe5ca607 100644 --- a/vertical-pod-autoscaler/pkg/features/features.go +++ b/vertical-pod-autoscaler/pkg/features/features.go @@ -40,7 +40,7 @@ const ( // In each feature gate description, you must specify "components". // The feature must be enabled by the --feature-gates argument on each listed component. - // alpha: v1.5.0 + // alpha: v1.6.0 // components: admission-controller, updater // CPUStartupBoost enables the CPU startup boost feature. diff --git a/vertical-pod-autoscaler/pkg/features/versioned_features.go b/vertical-pod-autoscaler/pkg/features/versioned_features.go index 088126eabe8d..915ed1503a48 100644 --- a/vertical-pod-autoscaler/pkg/features/versioned_features.go +++ b/vertical-pod-autoscaler/pkg/features/versioned_features.go @@ -28,7 +28,7 @@ import ( // Entries are alphabetized. var defaultVersionedFeatureGates = map[featuregate.Feature]featuregate.VersionedSpecs{ CPUStartupBoost: { - {Version: version.MustParse("1.5"), Default: false, PreRelease: featuregate.Alpha}, + {Version: version.MustParse("1.6"), Default: false, PreRelease: featuregate.Alpha}, }, InPlaceOrRecreate: { {Version: version.MustParse("1.4"), Default: false, PreRelease: featuregate.Alpha},