@@ -13,6 +13,7 @@ import (
13
13
type RayClusterSpec struct {
14
14
// Suspend indicates whether a RayCluster should be suspended.
15
15
// A suspended RayCluster will have head pods and worker pods deleted.
16
+ // +optional
16
17
Suspend * bool `json:"suspend,omitempty"`
17
18
// ManagedBy is an optional configuration for the controller or entity that manages a RayCluster.
18
19
// The value must be either 'ray.io/kuberay-operator' or 'kueue.x-k8s.io/multikueue'.
@@ -22,47 +23,62 @@ type RayClusterSpec struct {
22
23
// The field is immutable.
23
24
// +kubebuilder:validation:XValidation:rule="self == oldSelf",message="the managedBy field is immutable"
24
25
// +kubebuilder:validation:XValidation:rule="self in ['ray.io/kuberay-operator', 'kueue.x-k8s.io/multikueue']",message="the managedBy field value must be either 'ray.io/kuberay-operator' or 'kueue.x-k8s.io/multikueue'"
26
+ // +optional
25
27
ManagedBy * string `json:"managedBy,omitempty"`
26
28
// AutoscalerOptions specifies optional configuration for the Ray autoscaler.
27
- AutoscalerOptions * AutoscalerOptions `json:"autoscalerOptions,omitempty"`
28
- HeadServiceAnnotations map [string ]string `json:"headServiceAnnotations,omitempty"`
29
+ // +optional
30
+ AutoscalerOptions * AutoscalerOptions `json:"autoscalerOptions,omitempty"`
31
+ // +optional
32
+ HeadServiceAnnotations map [string ]string `json:"headServiceAnnotations,omitempty"`
29
33
// EnableInTreeAutoscaling indicates whether operator should create in tree autoscaling configs
34
+ // +optional
30
35
EnableInTreeAutoscaling * bool `json:"enableInTreeAutoscaling,omitempty"`
31
36
// GcsFaultToleranceOptions for enabling GCS FT
37
+ // +optional
32
38
GcsFaultToleranceOptions * GcsFaultToleranceOptions `json:"gcsFaultToleranceOptions,omitempty"`
33
39
// HeadGroupSpec is the spec for the head pod
34
40
HeadGroupSpec HeadGroupSpec `json:"headGroupSpec"`
35
41
// RayVersion is used to determine the command for the Kubernetes Job managed by RayJob
42
+ // +optional
36
43
RayVersion string `json:"rayVersion,omitempty"`
37
44
// WorkerGroupSpecs are the specs for the worker pods
45
+ // +optional
38
46
WorkerGroupSpecs []WorkerGroupSpec `json:"workerGroupSpecs,omitempty"`
39
47
}
40
48
41
49
// GcsFaultToleranceOptions contains configs for GCS FT
42
50
type GcsFaultToleranceOptions struct {
43
- RedisUsername * RedisCredential `json:"redisUsername,omitempty"`
44
- RedisPassword * RedisCredential `json:"redisPassword,omitempty"`
45
- ExternalStorageNamespace string `json:"externalStorageNamespace,omitempty"`
46
- RedisAddress string `json:"redisAddress"`
51
+ // +optional
52
+ RedisUsername * RedisCredential `json:"redisUsername,omitempty"`
53
+ // +optional
54
+ RedisPassword * RedisCredential `json:"redisPassword,omitempty"`
55
+ // +optional
56
+ ExternalStorageNamespace string `json:"externalStorageNamespace,omitempty"`
57
+ RedisAddress string `json:"redisAddress"`
47
58
}
48
59
49
60
// RedisCredential is the redis username/password or a reference to the source containing the username/password
50
61
type RedisCredential struct {
62
+ // +optional
51
63
ValueFrom * corev1.EnvVarSource `json:"valueFrom,omitempty"`
52
- Value string `json:"value,omitempty"`
64
+ // +optional
65
+ Value string `json:"value,omitempty"`
53
66
}
54
67
55
68
// HeadGroupSpec are the spec for the head pod
56
69
type HeadGroupSpec struct {
57
70
// Template is the exact pod template used in K8s depoyments, statefulsets, etc.
58
71
Template corev1.PodTemplateSpec `json:"template"`
59
72
// HeadService is the Kubernetes service of the head pod.
73
+ // +optional
60
74
HeadService * corev1.Service `json:"headService,omitempty"`
61
75
// EnableIngress indicates whether operator should create ingress object for head service or not.
76
+ // +optional
62
77
EnableIngress * bool `json:"enableIngress,omitempty"`
63
78
// RayStartParams are the params of the start command: node-manager-port, object-store-memory, ...
64
79
RayStartParams map [string ]string `json:"rayStartParams"`
65
80
// ServiceType is Kubernetes service type of the head service. it will be used by the workers to connect to the head pod
81
+ // +optional
66
82
ServiceType corev1.ServiceType `json:"serviceType,omitempty"`
67
83
}
68
84
@@ -71,11 +87,13 @@ type WorkerGroupSpec struct {
71
87
// Suspend indicates whether a worker group should be suspended.
72
88
// A suspended worker group will have all pods deleted.
73
89
// This is not a user-facing API and is only used by RayJob DeletionPolicy.
90
+ // +optional
74
91
Suspend * bool `json:"suspend,omitempty"`
75
92
// we can have multiple worker groups, we distinguish them by name
76
93
GroupName string `json:"groupName"`
77
94
// Replicas is the number of desired Pods for this worker group. See https://github.com/ray-project/kuberay/pull/1443 for more details about the reason for making this field optional.
78
95
// +kubebuilder:default:=0
96
+ // +optional
79
97
Replicas * int32 `json:"replicas,omitempty"`
80
98
// MinReplicas denotes the minimum number of desired Pods for this worker group.
81
99
// +kubebuilder:default:=0
@@ -85,15 +103,18 @@ type WorkerGroupSpec struct {
85
103
MaxReplicas * int32 `json:"maxReplicas"`
86
104
// IdleTimeoutSeconds denotes the number of seconds to wait before the v2 autoscaler terminates an idle worker pod of this type.
87
105
// This value is only used with the Ray Autoscaler enabled and defaults to the value set by the AutoscalingConfig if not specified for this worker group.
106
+ // +optional
88
107
IdleTimeoutSeconds * int32 `json:"idleTimeoutSeconds,omitempty"`
89
108
// RayStartParams are the params of the start command: address, object-store-memory, ...
90
109
RayStartParams map [string ]string `json:"rayStartParams"`
91
110
// Template is a pod template for the worker
92
111
Template corev1.PodTemplateSpec `json:"template"`
93
112
// ScaleStrategy defines which pods to remove
113
+ // +optional
94
114
ScaleStrategy ScaleStrategy `json:"scaleStrategy,omitempty"`
95
115
// NumOfHosts denotes the number of hosts to create per replica. The default value is 1.
96
116
// +kubebuilder:default:=1
117
+ // +optional
97
118
NumOfHosts int32 `json:"numOfHosts,omitempty"`
98
119
}
99
120
@@ -107,29 +128,38 @@ type ScaleStrategy struct {
107
128
type AutoscalerOptions struct {
108
129
// Resources specifies optional resource request and limit overrides for the autoscaler container.
109
130
// Default values: 500m CPU request and limit. 512Mi memory request and limit.
131
+ // +optional
110
132
Resources * corev1.ResourceRequirements `json:"resources,omitempty"`
111
133
// Image optionally overrides the autoscaler's container image. This override is for provided for autoscaler testing and development.
134
+ // +optional
112
135
Image * string `json:"image,omitempty"`
113
136
// ImagePullPolicy optionally overrides the autoscaler container's image pull policy. This override is for provided for autoscaler testing and development.
137
+ // +optional
114
138
ImagePullPolicy * corev1.PullPolicy `json:"imagePullPolicy,omitempty"`
115
139
// SecurityContext defines the security options the container should be run with.
116
140
// If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext.
117
141
// More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/
142
+ // +optional
118
143
SecurityContext * corev1.SecurityContext `json:"securityContext,omitempty"`
119
144
// IdleTimeoutSeconds is the number of seconds to wait before scaling down a worker pod which is not using Ray resources.
120
145
// Defaults to 60 (one minute). It is not read by the KubeRay operator but by the Ray autoscaler.
146
+ // +optional
121
147
IdleTimeoutSeconds * int32 `json:"idleTimeoutSeconds,omitempty"`
122
148
// UpscalingMode is "Conservative", "Default", or "Aggressive."
123
149
// Conservative: Upscaling is rate-limited; the number of pending worker pods is at most the size of the Ray cluster.
124
150
// Default: Upscaling is not rate-limited.
125
151
// Aggressive: An alias for Default; upscaling is not rate-limited.
126
152
// It is not read by the KubeRay operator but by the Ray autoscaler.
153
+ // +optional
127
154
UpscalingMode * UpscalingMode `json:"upscalingMode,omitempty"`
128
155
// Optional list of environment variables to set in the autoscaler container.
156
+ // +optional
129
157
Env []corev1.EnvVar `json:"env,omitempty"`
130
158
// Optional list of sources to populate environment variables in the autoscaler container.
159
+ // +optional
131
160
EnvFrom []corev1.EnvFromSource `json:"envFrom,omitempty"`
132
161
// Optional list of volumeMounts. This is needed for enabling TLS for the autoscaler container.
162
+ // +optional
133
163
VolumeMounts []corev1.VolumeMount `json:"volumeMounts,omitempty"`
134
164
}
135
165
@@ -153,46 +183,62 @@ type RayClusterStatus struct {
153
183
// Status reflects the status of the cluster
154
184
//
155
185
// Deprecated: the State field is replaced by the Conditions field.
186
+ // +optional
156
187
State ClusterState `json:"state,omitempty"`
157
188
// DesiredCPU indicates total desired CPUs for the cluster
189
+ // +optional
158
190
DesiredCPU resource.Quantity `json:"desiredCPU,omitempty"`
159
191
// DesiredMemory indicates total desired memory for the cluster
192
+ // +optional
160
193
DesiredMemory resource.Quantity `json:"desiredMemory,omitempty"`
161
194
// DesiredGPU indicates total desired GPUs for the cluster
195
+ // +optional
162
196
DesiredGPU resource.Quantity `json:"desiredGPU,omitempty"`
163
197
// DesiredTPU indicates total desired TPUs for the cluster
198
+ // +optional
164
199
DesiredTPU resource.Quantity `json:"desiredTPU,omitempty"`
165
200
// LastUpdateTime indicates last update timestamp for this cluster status.
166
201
// +nullable
167
202
LastUpdateTime * metav1.Time `json:"lastUpdateTime,omitempty"`
168
203
// StateTransitionTimes indicates the time of the last state transition for each state.
204
+ // +optional
169
205
StateTransitionTimes map [ClusterState ]* metav1.Time `json:"stateTransitionTimes,omitempty"`
170
206
// Service Endpoints
207
+ // +optional
171
208
Endpoints map [string ]string `json:"endpoints,omitempty"`
172
209
// Head info
210
+ // +optional
173
211
Head HeadInfo `json:"head,omitempty"`
174
212
// Reason provides more information about current State
213
+ // +optional
175
214
Reason string `json:"reason,omitempty"`
176
215
177
216
// Represents the latest available observations of a RayCluster's current state.
178
217
// +patchMergeKey=type
179
218
// +patchStrategy=merge
180
219
// +listType=map
181
220
// +listMapKey=type
221
+ // +optional
182
222
Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"`
183
223
184
224
// ReadyWorkerReplicas indicates how many worker replicas are ready in the cluster
225
+ // +optional
185
226
ReadyWorkerReplicas int32 `json:"readyWorkerReplicas,omitempty"`
186
227
// AvailableWorkerReplicas indicates how many replicas are available in the cluster
228
+ // +optional
187
229
AvailableWorkerReplicas int32 `json:"availableWorkerReplicas,omitempty"`
188
230
// DesiredWorkerReplicas indicates overall desired replicas claimed by the user at the cluster level.
231
+ // +optional
189
232
DesiredWorkerReplicas int32 `json:"desiredWorkerReplicas,omitempty"`
190
233
// MinWorkerReplicas indicates sum of minimum replicas of each node group.
234
+ // +optional
191
235
MinWorkerReplicas int32 `json:"minWorkerReplicas,omitempty"`
192
236
// MaxWorkerReplicas indicates sum of maximum replicas of each node group.
237
+ // +optional
193
238
MaxWorkerReplicas int32 `json:"maxWorkerReplicas,omitempty"`
194
239
// observedGeneration is the most recent generation observed for this RayCluster. It corresponds to the
195
240
// RayCluster's generation, which is updated on mutation by the API Server.
241
+ // +optional
196
242
ObservedGeneration int64 `json:"observedGeneration,omitempty"`
197
243
}
198
244
@@ -224,9 +270,13 @@ const (
224
270
225
271
// HeadInfo gives info about head
226
272
type HeadInfo struct {
227
- PodIP string `json:"podIP,omitempty"`
228
- ServiceIP string `json:"serviceIP,omitempty"`
229
- PodName string `json:"podName,omitempty"`
273
+ // +optional
274
+ PodIP string `json:"podIP,omitempty"`
275
+ // +optional
276
+ ServiceIP string `json:"serviceIP,omitempty"`
277
+ // +optional
278
+ PodName string `json:"podName,omitempty"`
279
+ // +optional
230
280
ServiceName string `json:"serviceName,omitempty"`
231
281
}
232
282
@@ -259,11 +309,14 @@ const (
259
309
// +genclient
260
310
type RayCluster struct {
261
311
// Standard object metadata.
262
- metav1.TypeMeta `json:",inline"`
312
+ metav1.TypeMeta `json:",inline"`
313
+ // +optional
263
314
metav1.ObjectMeta `json:"metadata,omitempty"`
264
315
265
316
// Specification of the desired behavior of the RayCluster.
266
- Spec RayClusterSpec `json:"spec,omitempty"`
317
+ // +optional
318
+ Spec RayClusterSpec `json:"spec,omitempty"`
319
+ // +optional
267
320
Status RayClusterStatus `json:"status,omitempty"`
268
321
}
269
322
@@ -272,6 +325,7 @@ type RayCluster struct {
272
325
// RayClusterList contains a list of RayCluster
273
326
type RayClusterList struct {
274
327
metav1.TypeMeta `json:",inline"`
328
+ // +optional
275
329
metav1.ListMeta `json:"metadata,omitempty"`
276
330
Items []RayCluster `json:"items"`
277
331
}
0 commit comments