Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions ray-operator/controllers/ray/common/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,20 @@ func configureGCSFaultTolerance(podTemplate *corev1.PodTemplateSpec, instance ra
options := instance.Spec.GcsFaultToleranceOptions
container := &podTemplate.Spec.Containers[utils.RayContainerIndex]

// Configure lifecycle preStop hook for graceful shutdown when GCS FT is enabled
if rayNodeType == rayv1.HeadNode {
if container.Lifecycle == nil {
container.Lifecycle = &corev1.Lifecycle{}
}
if container.Lifecycle.PreStop == nil {
container.Lifecycle.PreStop = &corev1.LifecycleHandler{
Exec: &corev1.ExecAction{
Command: []string{"/bin/sh", "-c", "ray stop --force"},
},
}
}
}

// Configure the GCS RPC server reconnect timeout for GCS FT.
if !utils.EnvVarExists(utils.RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S, container.Env) && rayNodeType == rayv1.WorkerNode {
// If GCS FT is enabled and RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S is not set, set the worker's
Expand Down
Loading