1
+ # Copyright 2025 IBM, Red Hat
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
1
15
"""
2
16
RayJob client for submitting and managing Ray jobs using the odh-kuberay-client.
3
17
"""
4
18
5
19
import logging
6
- from typing import Dict , Any , Optional
20
+ from typing import Dict , Any , Optional , Tuple
7
21
from odh_kuberay_client .kuberay_job_api import RayjobApi
8
22
23
+ from .status import (
24
+ RayJobDeploymentStatus ,
25
+ CodeflareRayJobStatus ,
26
+ RayJobInfo ,
27
+ )
28
+ from . import pretty_print
29
+
9
30
# Set up logging
10
31
logger = logging .getLogger (__name__ )
11
32
@@ -15,7 +36,7 @@ class RayJob:
15
36
A client for managing Ray jobs using the KubeRay operator.
16
37
17
38
This class provides a simplified interface for submitting and managing
18
- Ray jobs in a Kubernetes cluster with the KubeRay operator installed .
39
+ RayJob CRs (using the KubeRay RayJob python client) .
19
40
"""
20
41
21
42
def __init__ (
@@ -109,3 +130,73 @@ def _build_rayjob_cr(
109
130
rayjob_cr ["spec" ]["runtimeEnvYAML" ] = str (runtime_env )
110
131
111
132
return rayjob_cr
133
+
134
+ def status (
135
+ self , print_to_console : bool = True
136
+ ) -> Tuple [CodeflareRayJobStatus , bool ]:
137
+ """
138
+ Get the status of the Ray job.
139
+
140
+ Args:
141
+ print_to_console (bool): Whether to print formatted status to console (default: True)
142
+
143
+ Returns:
144
+ Tuple of (CodeflareRayJobStatus, ready: bool) where ready indicates job completion
145
+ """
146
+ status_data = self ._api .get_job_status (
147
+ name = self .name , k8s_namespace = self .namespace
148
+ )
149
+
150
+ if not status_data :
151
+ if print_to_console :
152
+ pretty_print .print_no_job_found (self .name , self .namespace )
153
+ return CodeflareRayJobStatus .UNKNOWN , False
154
+
155
+ # Map deployment status to our enums
156
+ deployment_status_str = status_data .get ("jobDeploymentStatus" , "Unknown" )
157
+
158
+ try :
159
+ deployment_status = RayJobDeploymentStatus (deployment_status_str )
160
+ except ValueError :
161
+ deployment_status = RayJobDeploymentStatus .UNKNOWN
162
+
163
+ # Create RayJobInfo dataclass
164
+ job_info = RayJobInfo (
165
+ name = self .name ,
166
+ job_id = status_data .get ("jobId" , "" ),
167
+ status = deployment_status ,
168
+ namespace = self .namespace ,
169
+ cluster_name = self .cluster_name ,
170
+ start_time = status_data .get ("startTime" ),
171
+ end_time = status_data .get ("endTime" ),
172
+ failed_attempts = status_data .get ("failed" , 0 ),
173
+ succeeded_attempts = status_data .get ("succeeded" , 0 ),
174
+ )
175
+
176
+ # Map to CodeFlare status and determine readiness
177
+ codeflare_status , ready = self ._map_to_codeflare_status (deployment_status )
178
+
179
+ if print_to_console :
180
+ pretty_print .print_job_status (job_info )
181
+
182
+ return codeflare_status , ready
183
+
184
+ def _map_to_codeflare_status (
185
+ self , deployment_status : RayJobDeploymentStatus
186
+ ) -> Tuple [CodeflareRayJobStatus , bool ]:
187
+ """
188
+ Map deployment status to CodeFlare status and determine readiness.
189
+
190
+ Returns:
191
+ Tuple of (CodeflareRayJobStatus, ready: bool)
192
+ """
193
+ status_mapping = {
194
+ RayJobDeploymentStatus .COMPLETE : (CodeflareRayJobStatus .COMPLETE , True ),
195
+ RayJobDeploymentStatus .RUNNING : (CodeflareRayJobStatus .RUNNING , False ),
196
+ RayJobDeploymentStatus .FAILED : (CodeflareRayJobStatus .FAILED , False ),
197
+ RayJobDeploymentStatus .SUSPENDED : (CodeflareRayJobStatus .SUSPENDED , False ),
198
+ }
199
+
200
+ return status_mapping .get (
201
+ deployment_status , (CodeflareRayJobStatus .UNKNOWN , False )
202
+ )
0 commit comments