Skip to content

Commit ae88086

Browse files
committed
test: add unit tests for idleTimeoutSeconds config per worker groups
Signed-off-by: alimaazamat <[email protected]>
1 parent 79bd749 commit ae88086

File tree

2 files changed

+179
-0
lines changed

2 files changed

+179
-0
lines changed

ray-operator/controllers/ray/utils/validation.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,9 @@ func ValidateRayClusterSpec(spec *rayv1.RayClusterSpec, annotations map[string]s
103103
if err := validateRayGroupLabels(workerGroup.GroupName, workerGroup.RayStartParams, workerGroup.Labels); err != nil {
104104
return err
105105
}
106+
if err := validateWorkerGroupIdleTimeout(workerGroup, spec); err != nil {
107+
return err
108+
}
106109
}
107110

108111
if annotations[RayFTEnabledAnnotationKey] != "" && spec.GcsFaultToleranceOptions != nil {
@@ -574,3 +577,18 @@ func validateLegacyDeletionPolicies(rayJob *rayv1.RayJob) error {
574577

575578
return nil
576579
}
580+
581+
// validateWorkerGroupIdleTimeout validates the idleTimeoutSeconds field in a worker group spec
582+
func validateWorkerGroupIdleTimeout(workerGroup rayv1.WorkerGroupSpec, spec *rayv1.RayClusterSpec) error {
583+
idleTimeoutSeconds := workerGroup.IdleTimeoutSeconds
584+
if idleTimeoutSeconds != nil && *idleTimeoutSeconds < 0 {
585+
return fmt.Errorf("idleTimeoutSeconds must be non-negative, got %d", *idleTimeoutSeconds)
586+
}
587+
588+
// idleTimeoutSeconds only allowed on autoscaler v2
589+
if idleTimeoutSeconds != nil && !IsAutoscalingV2Enabled(spec) {
590+
return fmt.Errorf("worker group %s has idleTimeoutSeconds set, but autoscaler version is not v2. Please set .spec.autoscalerOptions.version to v2", workerGroup.GroupName)
591+
}
592+
593+
return nil
594+
}

ray-operator/controllers/ray/utils/validation_test.go

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1823,3 +1823,164 @@ func TestValidateClusterUpgradeOptions(t *testing.T) {
18231823
})
18241824
}
18251825
}
1826+
1827+
func TestValidateWorkerGroupIdleTimeout(t *testing.T) {
1828+
tests := map[string]struct {
1829+
expectedErr string
1830+
spec rayv1.RayClusterSpec
1831+
}{
1832+
"should accept worker group with valid idleTimeoutSeconds": {
1833+
spec: rayv1.RayClusterSpec{
1834+
EnableInTreeAutoscaling: ptr.To(true),
1835+
AutoscalerOptions: &rayv1.AutoscalerOptions{
1836+
Version: ptr.To(rayv1.AutoscalerVersionV2),
1837+
},
1838+
HeadGroupSpec: rayv1.HeadGroupSpec{
1839+
Template: podTemplateSpec(nil, nil),
1840+
},
1841+
WorkerGroupSpecs: []rayv1.WorkerGroupSpec{
1842+
{
1843+
GroupName: "worker-group-1",
1844+
Template: podTemplateSpec(nil, nil),
1845+
IdleTimeoutSeconds: ptr.To(int32(60)),
1846+
MinReplicas: ptr.To(int32(0)),
1847+
MaxReplicas: ptr.To(int32(10)),
1848+
},
1849+
},
1850+
},
1851+
expectedErr: "",
1852+
},
1853+
"should reject negative idleTimeoutSeconds": {
1854+
spec: rayv1.RayClusterSpec{
1855+
EnableInTreeAutoscaling: ptr.To(true),
1856+
AutoscalerOptions: &rayv1.AutoscalerOptions{
1857+
Version: ptr.To(rayv1.AutoscalerVersionV2),
1858+
},
1859+
HeadGroupSpec: rayv1.HeadGroupSpec{
1860+
Template: podTemplateSpec(nil, nil),
1861+
},
1862+
WorkerGroupSpecs: []rayv1.WorkerGroupSpec{
1863+
{
1864+
GroupName: "worker-group-1",
1865+
Template: podTemplateSpec(nil, nil),
1866+
IdleTimeoutSeconds: ptr.To(int32(-10)),
1867+
MinReplicas: ptr.To(int32(0)),
1868+
MaxReplicas: ptr.To(int32(10)),
1869+
},
1870+
},
1871+
},
1872+
expectedErr: "idleTimeoutSeconds must be non-negative, got -10",
1873+
},
1874+
"should accept zero idleTimeoutSeconds": {
1875+
spec: rayv1.RayClusterSpec{
1876+
EnableInTreeAutoscaling: ptr.To(true),
1877+
AutoscalerOptions: &rayv1.AutoscalerOptions{
1878+
Version: ptr.To(rayv1.AutoscalerVersionV2),
1879+
},
1880+
HeadGroupSpec: rayv1.HeadGroupSpec{
1881+
Template: podTemplateSpec(nil, nil),
1882+
},
1883+
WorkerGroupSpecs: []rayv1.WorkerGroupSpec{
1884+
{
1885+
GroupName: "worker-group-1",
1886+
Template: podTemplateSpec(nil, nil),
1887+
IdleTimeoutSeconds: ptr.To(int32(0)),
1888+
MinReplicas: ptr.To(int32(0)),
1889+
MaxReplicas: ptr.To(int32(10)),
1890+
},
1891+
},
1892+
},
1893+
expectedErr: "",
1894+
},
1895+
"should reject idleTimeoutSeconds when autoscaler version is not v2": {
1896+
spec: rayv1.RayClusterSpec{
1897+
EnableInTreeAutoscaling: ptr.To(true),
1898+
AutoscalerOptions: &rayv1.AutoscalerOptions{
1899+
Version: ptr.To(rayv1.AutoscalerVersionV1),
1900+
},
1901+
HeadGroupSpec: rayv1.HeadGroupSpec{
1902+
Template: podTemplateSpec(nil, nil),
1903+
},
1904+
WorkerGroupSpecs: []rayv1.WorkerGroupSpec{
1905+
{
1906+
GroupName: "worker-group-1",
1907+
Template: podTemplateSpec(nil, nil),
1908+
IdleTimeoutSeconds: ptr.To(int32(60)),
1909+
MinReplicas: ptr.To(int32(0)),
1910+
MaxReplicas: ptr.To(int32(10)),
1911+
},
1912+
},
1913+
},
1914+
expectedErr: "worker group worker-group-1 has idleTimeoutSeconds set, but autoscaler version is not v2. Please set .spec.autoscalerOptions.version to v2",
1915+
},
1916+
"should reject idleTimeoutSeconds when autoscaler version is not set": {
1917+
spec: rayv1.RayClusterSpec{
1918+
EnableInTreeAutoscaling: ptr.To(true),
1919+
HeadGroupSpec: rayv1.HeadGroupSpec{
1920+
Template: podTemplateSpec(nil, nil),
1921+
},
1922+
WorkerGroupSpecs: []rayv1.WorkerGroupSpec{
1923+
{
1924+
GroupName: "worker-group-1",
1925+
Template: podTemplateSpec(nil, nil),
1926+
IdleTimeoutSeconds: ptr.To(int32(60)),
1927+
MinReplicas: ptr.To(int32(0)),
1928+
MaxReplicas: ptr.To(int32(10)),
1929+
},
1930+
},
1931+
},
1932+
expectedErr: "worker group worker-group-1 has idleTimeoutSeconds set, but autoscaler version is not v2. Please set .spec.autoscalerOptions.version to v2",
1933+
},
1934+
"should reject idleTimeoutSeconds when AutoscalerOptions is nil": {
1935+
spec: rayv1.RayClusterSpec{
1936+
EnableInTreeAutoscaling: ptr.To(true),
1937+
AutoscalerOptions: nil,
1938+
HeadGroupSpec: rayv1.HeadGroupSpec{
1939+
Template: podTemplateSpec(nil, nil),
1940+
},
1941+
WorkerGroupSpecs: []rayv1.WorkerGroupSpec{
1942+
{
1943+
GroupName: "worker-group-1",
1944+
Template: podTemplateSpec(nil, nil),
1945+
IdleTimeoutSeconds: ptr.To(int32(60)),
1946+
MinReplicas: ptr.To(int32(0)),
1947+
MaxReplicas: ptr.To(int32(10)),
1948+
},
1949+
},
1950+
},
1951+
expectedErr: "worker group worker-group-1 has idleTimeoutSeconds set, but autoscaler version is not v2. Please set .spec.autoscalerOptions.version to v2",
1952+
},
1953+
"should accept worker group without idleTimeoutSeconds and without autoscaler v2": {
1954+
spec: rayv1.RayClusterSpec{
1955+
EnableInTreeAutoscaling: ptr.To(true),
1956+
AutoscalerOptions: &rayv1.AutoscalerOptions{
1957+
Version: ptr.To(rayv1.AutoscalerVersionV1),
1958+
},
1959+
HeadGroupSpec: rayv1.HeadGroupSpec{
1960+
Template: podTemplateSpec(nil, nil),
1961+
},
1962+
WorkerGroupSpecs: []rayv1.WorkerGroupSpec{
1963+
{
1964+
GroupName: "worker-group-1",
1965+
Template: podTemplateSpec(nil, nil),
1966+
MinReplicas: ptr.To(int32(0)),
1967+
MaxReplicas: ptr.To(int32(10)),
1968+
},
1969+
},
1970+
},
1971+
expectedErr: "",
1972+
},
1973+
}
1974+
1975+
for name, tc := range tests {
1976+
t.Run(name, func(t *testing.T) {
1977+
err := ValidateRayClusterSpec(&tc.spec, nil)
1978+
if tc.expectedErr != "" {
1979+
require.Error(t, err)
1980+
require.EqualError(t, err, tc.expectedErr)
1981+
} else {
1982+
require.NoError(t, err)
1983+
}
1984+
})
1985+
}
1986+
}

0 commit comments

Comments
 (0)