diff --git a/tests/e2e/local_interactive_sdk_kind_test.py b/tests/e2e/local_interactive_sdk_kind_test.py new file mode 100644 index 000000000..eefe421f5 --- /dev/null +++ b/tests/e2e/local_interactive_sdk_kind_test.py @@ -0,0 +1,79 @@ +from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication, generate_cert + +import pytest +import ray +import math + +from support import * + + +@pytest.mark.kind +class TestRayLocalInteractiveOauth: + def setup_method(self): + initialize_kubernetes_client(self) + + def teardown_method(self): + delete_namespace(self) + + def test_local_interactives(self): + self.setup_method() + create_namespace(self) + create_kueue_resources(self) + self.run_local_interactives() + self.teardown_method() + + def run_local_interactives(self): + ray_image = get_ray_image() + + cluster_name = "test-ray-cluster-li" + + cluster = Cluster( + ClusterConfiguration( + name=cluster_name, + namespace=self.namespace, + num_workers=1, + head_cpus="500m", + head_memory=2, + min_cpus="500m", + max_cpus=1, + min_memory=1, + max_memory=2, + num_gpus=0, + image=ray_image, + write_to_file=True, + verify_tls=False, + ) + ) + cluster.up() + cluster.wait_ready() + + generate_cert.generate_tls_cert(cluster_name, self.namespace) + generate_cert.export_env(cluster_name, self.namespace) + + print(cluster.local_client_url()) + ray.shutdown() + ray.init(address=cluster.local_client_url(), logging_level="DEBUG") + + @ray.remote + def heavy_calculation_part(num_iterations): + result = 0.0 + for i in range(num_iterations): + for j in range(num_iterations): + for k in range(num_iterations): + result += math.sin(i) * math.cos(j) * math.tan(k) + return result + + @ray.remote + def heavy_calculation(num_iterations): + results = ray.get( + [heavy_calculation_part.remote(num_iterations // 30) for _ in range(30)] + ) + return sum(results) + + ref = heavy_calculation.remote(3000) + result = ray.get(ref) + assert result == 1789.4644387076714 + ray.cancel(ref) + ray.shutdown() + + cluster.down() diff --git a/tests/e2e/local_interactive_sdk_oauth_test.py b/tests/e2e/local_interactive_sdk_oauth_test.py new file mode 100644 index 000000000..5210fe250 --- /dev/null +++ b/tests/e2e/local_interactive_sdk_oauth_test.py @@ -0,0 +1,87 @@ +from codeflare_sdk import ( + Cluster, + ClusterConfiguration, + TokenAuthentication, + generate_cert, +) + +import math +import pytest +import ray + +from support import * + + +@pytest.mark.openshift +class TestRayLocalInteractiveOauth: + def setup_method(self): + initialize_kubernetes_client(self) + + def teardown_method(self): + delete_namespace(self) + + def test_local_interactives(self): + self.setup_method() + create_namespace(self) + create_kueue_resources(self) + self.run_local_interactives() + self.teardown_method() + + def run_local_interactives(self): + ray_image = get_ray_image() + + auth = TokenAuthentication( + token=run_oc_command(["whoami", "--show-token=true"]), + server=run_oc_command(["whoami", "--show-server=true"]), + skip_tls=True, + ) + auth.login() + + cluster_name = "test-ray-cluster-li" + + cluster = Cluster( + ClusterConfiguration( + namespace=self.namespace, + name=cluster_name, + num_workers=1, + min_cpus=1, + max_cpus=1, + min_memory=4, + max_memory=4, + num_gpus=0, + image=ray_image, + verify_tls=False, + ) + ) + cluster.up() + cluster.wait_ready() + + generate_cert.generate_tls_cert(cluster_name, self.namespace) + generate_cert.export_env(cluster_name, self.namespace) + + ray.shutdown() + ray.init(address=cluster.local_client_url(), logging_level="DEBUG") + + @ray.remote + def heavy_calculation_part(num_iterations): + result = 0.0 + for i in range(num_iterations): + for j in range(num_iterations): + for k in range(num_iterations): + result += math.sin(i) * math.cos(j) * math.tan(k) + return result + + @ray.remote + def heavy_calculation(num_iterations): + results = ray.get( + [heavy_calculation_part.remote(num_iterations // 30) for _ in range(30)] + ) + return sum(results) + + ref = heavy_calculation.remote(3000) + result = ray.get(ref) + assert result == 1789.4644387076714 + ray.cancel(ref) + ray.shutdown() + + cluster.down() diff --git a/tests/e2e/mnist_raycluster_sdk_kind_test.py b/tests/e2e/mnist_raycluster_sdk_kind_test.py deleted file mode 100644 index 51c659478..000000000 --- a/tests/e2e/mnist_raycluster_sdk_kind_test.py +++ /dev/null @@ -1,105 +0,0 @@ -import requests - -from time import sleep - -from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication -from codeflare_sdk.job import RayJobClient - -import pytest - -from support import * - -# This test creates a Ray Cluster and covers the Ray Job submission functionality on Kind Cluster - - -@pytest.mark.kind -class TestRayClusterSDKKind: - def setup_method(self): - initialize_kubernetes_client(self) - - def teardown_method(self): - delete_namespace(self) - - def test_mnist_ray_cluster_sdk_kind(self): - self.setup_method() - create_namespace(self) - create_kueue_resources(self) - self.run_mnist_raycluster_sdk_kind() - - def run_mnist_raycluster_sdk_kind(self): - ray_image = get_ray_image() - - cluster = Cluster( - ClusterConfiguration( - name="mnist", - namespace=self.namespace, - num_workers=1, - head_cpus="500m", - head_memory=2, - min_cpus="500m", - max_cpus=1, - min_memory=1, - max_memory=2, - num_gpus=0, - image=ray_image, - write_to_file=True, - verify_tls=False, - ) - ) - - cluster.up() - - cluster.status() - - cluster.wait_ready() - - cluster.status() - - cluster.details() - - self.assert_jobsubmit_withoutlogin_kind(cluster) - - # Assertions - - def assert_jobsubmit_withoutlogin_kind(self, cluster): - ray_dashboard = cluster.cluster_dashboard_uri() - client = RayJobClient(address=ray_dashboard, verify=False) - - submission_id = client.submit_job( - entrypoint="python mnist.py", - runtime_env={ - "working_dir": "./tests/e2e/", - "pip": "./tests/e2e/mnist_pip_requirements.txt", - }, - ) - print(f"Submitted job with ID: {submission_id}") - done = False - time = 0 - timeout = 900 - while not done: - status = client.get_job_status(submission_id) - if status.is_terminal(): - break - if not done: - print(status) - if timeout and time >= timeout: - raise TimeoutError(f"job has timed out after waiting {timeout}s") - sleep(5) - time += 5 - - logs = client.get_job_logs(submission_id) - print(logs) - - self.assert_job_completion(status) - - client.delete_job(submission_id) - - cluster.down() - - def assert_job_completion(self, status): - if status == "SUCCEEDED": - print(f"Job has completed: '{status}'") - assert True - else: - print(f"Job has completed: '{status}'") - assert False diff --git a/tests/e2e/mnist_raycluster_sdk_oauth_test.py b/tests/e2e/mnist_raycluster_sdk_oauth_test.py index d0d090f38..548c80ad0 100644 --- a/tests/e2e/mnist_raycluster_sdk_oauth_test.py +++ b/tests/e2e/mnist_raycluster_sdk_oauth_test.py @@ -25,6 +25,7 @@ def test_mnist_ray_cluster_sdk_auth(self): create_namespace(self) create_kueue_resources(self) self.run_mnist_raycluster_sdk_oauth() + self.teardown_method() def run_mnist_raycluster_sdk_oauth(self): ray_image = get_ray_image()