diff --git a/src/codeflare_sdk/common/kueue/test_kueue.py b/src/codeflare_sdk/common/kueue/test_kueue.py index e9de364d8..77095d4d9 100644 --- a/src/codeflare_sdk/common/kueue/test_kueue.py +++ b/src/codeflare_sdk/common/kueue/test_kueue.py @@ -51,8 +51,7 @@ def test_cluster_creation_no_aw_local_queue(mocker): config.write_to_file = True config.local_queue = "local-queue-default" cluster = Cluster(config) - assert cluster.app_wrapper_yaml == f"{aw_dir}unit-test-cluster-kueue.yaml" - assert cluster.app_wrapper_name == "unit-test-cluster-kueue" + assert cluster.resource_yaml == f"{aw_dir}unit-test-cluster-kueue.yaml" assert filecmp.cmp( f"{aw_dir}unit-test-cluster-kueue.yaml", f"{parent}/tests/test_cluster_yamls/kueue/ray_cluster_kueue.yaml", @@ -65,10 +64,9 @@ def test_cluster_creation_no_aw_local_queue(mocker): config.write_to_file = False cluster = Cluster(config) - test_rc = yaml.load(cluster.app_wrapper_yaml, Loader=yaml.FullLoader) with open(f"{parent}/tests/test_cluster_yamls/kueue/ray_cluster_kueue.yaml") as f: expected_rc = yaml.load(f, Loader=yaml.FullLoader) - assert test_rc == expected_rc + assert cluster.resource_yaml == expected_rc def test_aw_creation_local_queue(mocker): @@ -87,8 +85,7 @@ def test_aw_creation_local_queue(mocker): config.write_to_file = True config.local_queue = "local-queue-default" cluster = Cluster(config) - assert cluster.app_wrapper_yaml == f"{aw_dir}unit-test-aw-kueue.yaml" - assert cluster.app_wrapper_name == "unit-test-aw-kueue" + assert cluster.resource_yaml == f"{aw_dir}unit-test-aw-kueue.yaml" assert filecmp.cmp( f"{aw_dir}unit-test-aw-kueue.yaml", f"{parent}/tests/test_cluster_yamls/kueue/aw_kueue.yaml", @@ -102,10 +99,9 @@ def test_aw_creation_local_queue(mocker): config.write_to_file = False cluster = Cluster(config) - test_rc = yaml.load(cluster.app_wrapper_yaml, Loader=yaml.FullLoader) with open(f"{parent}/tests/test_cluster_yamls/kueue/aw_kueue.yaml") as f: expected_rc = yaml.load(f, Loader=yaml.FullLoader) - assert test_rc == expected_rc + assert cluster.resource_yaml == expected_rc def test_get_local_queue_exists_fail(mocker): diff --git a/src/codeflare_sdk/common/utils/unit_test_support.py b/src/codeflare_sdk/common/utils/unit_test_support.py index 61a16260c..88b65aa4f 100644 --- a/src/codeflare_sdk/common/utils/unit_test_support.py +++ b/src/codeflare_sdk/common/utils/unit_test_support.py @@ -20,6 +20,7 @@ import yaml from pathlib import Path from kubernetes import client +from unittest.mock import patch parent = Path(__file__).resolve().parents[4] # project directory aw_dir = os.path.expanduser("~/.codeflare/resources/") @@ -381,3 +382,38 @@ def mocked_ingress(port, cluster_name="unit-test-cluster", annotations: dict = N ), ) return mock_ingress + + +@patch.dict("os.environ", {"NB_PREFIX": "test-prefix"}) +def create_cluster_all_config_params(mocker, cluster_name, is_appwrapper) -> Cluster: + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), + ) + + config = ClusterConfiguration( + name=cluster_name, + namespace="ns", + head_cpu_requests=4, + head_cpu_limits=8, + head_memory_requests=12, + head_memory_limits=16, + head_extended_resource_requests={"nvidia.com/gpu": 1, "intel.com/gpu": 2}, + worker_cpu_requests=4, + worker_cpu_limits=8, + num_workers=10, + worker_memory_requests=12, + worker_memory_limits=16, + appwrapper=is_appwrapper, + envs={"key1": "value1", "key2": "value2"}, + image="example/ray:tag", + image_pull_secrets=["secret1", "secret2"], + write_to_file=True, + verify_tls=True, + labels={"key1": "value1", "key2": "value2"}, + worker_extended_resource_requests={"nvidia.com/gpu": 1}, + extended_resource_mapping={"example.com/gpu": "GPU", "intel.com/gpu": "TPU"}, + overwrite_default_resource_mapping=True, + local_queue="local-queue-default", + ) + return Cluster(config) diff --git a/src/codeflare_sdk/ray/appwrapper/test_status.py b/src/codeflare_sdk/ray/appwrapper/test_status.py index 8c693767c..a3fcf8700 100644 --- a/src/codeflare_sdk/ray/appwrapper/test_status.py +++ b/src/codeflare_sdk/ray/appwrapper/test_status.py @@ -19,6 +19,7 @@ ) from codeflare_sdk.ray.appwrapper import AppWrapper, AppWrapperStatus from codeflare_sdk.ray.cluster.status import CodeFlareClusterStatus +from codeflare_sdk.common.utils.unit_test_support import get_local_queue import os aw_dir = os.path.expanduser("~/.codeflare/resources/") @@ -28,8 +29,8 @@ def test_cluster_status(mocker): mocker.patch("kubernetes.client.ApisApi.get_api_versions") mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") mocker.patch( - "codeflare_sdk.common.kueue.kueue.local_queue_exists", - return_value="true", + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), ) fake_aw = AppWrapper("test", AppWrapperStatus.FAILED) @@ -39,7 +40,7 @@ def test_cluster_status(mocker): namespace="ns", write_to_file=True, appwrapper=True, - local_queue="local_default_queue", + local_queue="local-queue-default", ) ) mocker.patch( diff --git a/src/codeflare_sdk/ray/cluster/test_generate_yaml.py b/src/codeflare_sdk/ray/cluster/test_build_ray_cluster.py similarity index 84% rename from src/codeflare_sdk/ray/cluster/test_generate_yaml.py rename to src/codeflare_sdk/ray/cluster/test_build_ray_cluster.py index 606cc950c..7d6d3d0a6 100644 --- a/src/codeflare_sdk/ray/cluster/test_generate_yaml.py +++ b/src/codeflare_sdk/ray/cluster/test_build_ray_cluster.py @@ -13,7 +13,7 @@ # limitations under the License. from collections import namedtuple import sys -from .generate_yaml import gen_names, update_image +from .build_ray_cluster import gen_names, update_image import uuid @@ -39,7 +39,7 @@ def test_gen_names_without_name(mocker): def test_update_image_without_supported_python_version(mocker): # Mock SUPPORTED_PYTHON_VERSIONS mocker.patch.dict( - "codeflare_sdk.ray.cluster.generate_yaml.SUPPORTED_PYTHON_VERSIONS", + "codeflare_sdk.ray.cluster.build_ray_cluster.SUPPORTED_PYTHON_VERSIONS", { "3.9": "ray-py3.9", "3.11": "ray-py3.11", @@ -55,16 +55,13 @@ def test_update_image_without_supported_python_version(mocker): # Mock warnings.warn to check if it gets called warn_mock = mocker.patch("warnings.warn") - # Create a sample spec - spec = {"containers": [{"image": None}]} - # Call the update_image function with no image provided - update_image(spec, None) + image = update_image(None) # Assert that the warning was called with the expected message warn_mock.assert_called_once_with( "No default Ray image defined for 3.8. Please provide your own image or use one of the following python versions: 3.9, 3.11." ) - # Assert that no image was set in the containers since the Python version is not supported - assert spec["containers"][0]["image"] is None + # Assert that no image was set since the Python version is not supported + assert image is None diff --git a/src/codeflare_sdk/ray/cluster/test_cluster.py b/src/codeflare_sdk/ray/cluster/test_cluster.py index 20438bbe3..5e83c82a8 100644 --- a/src/codeflare_sdk/ray/cluster/test_cluster.py +++ b/src/codeflare_sdk/ray/cluster/test_cluster.py @@ -25,21 +25,17 @@ arg_check_apply_effect, get_local_queue, createClusterConfig, - route_list_retrieval, get_ray_obj, - get_aw_obj, - get_named_aw, get_obj_none, get_ray_obj_with_status, get_aw_obj_with_status, ) -from codeflare_sdk.ray.cluster.generate_yaml import ( - is_openshift_cluster, - is_kind_cluster, -) +from codeflare_sdk.ray.cluster.cluster import _is_openshift_cluster from pathlib import Path from unittest.mock import MagicMock from kubernetes import client +import yaml +import filecmp import os parent = Path(__file__).resolve().parents[4] # project directory @@ -205,7 +201,7 @@ def test_local_client_url(mocker): return_value="rayclient-unit-test-cluster-localinter-ns.apps.cluster.awsroute.org", ) mocker.patch( - "codeflare_sdk.ray.cluster.cluster.Cluster.create_app_wrapper", + "codeflare_sdk.ray.cluster.cluster.Cluster.create_resource", return_value="unit-test-cluster-localinter.yaml", ) @@ -225,120 +221,51 @@ def test_local_client_url(mocker): """ -def test_get_cluster_openshift(mocker): +def test_get_cluster_no_appwrapper(mocker): + """ + This test uses the "test all params" unit test file as a comparison + """ + mocker.patch("kubernetes.client.ApisApi.get_api_versions") mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") - # Mock the client.ApisApi function to return a mock object - mock_api = MagicMock() - mock_api.get_api_versions.return_value.groups = [ - MagicMock(versions=[MagicMock(group_version="route.openshift.io/v1")]) - ] - mocker.patch("kubernetes.client.ApisApi", return_value=mock_api) mocker.patch( - "codeflare_sdk.common.kueue.kueue.local_queue_exists", - return_value="true", + "codeflare_sdk.ray.cluster.cluster._check_aw_exists", + return_value=False, ) - assert is_openshift_cluster() - - def custom_side_effect(group, version, namespace, plural, **kwargs): - if plural == "routes": - return route_list_retrieval("route.openshift.io", "v1", "ns", "routes") - elif plural == "rayclusters": - return get_ray_obj("ray.io", "v1", "ns", "rayclusters") - elif plural == "appwrappers": - return get_aw_obj("workload.codeflare.dev", "v1beta2", "ns", "appwrappers") - elif plural == "localqueues": - return get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues") - - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", get_aw_obj - ) - - mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - side_effect=custom_side_effect, - ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", - return_value=get_named_aw, - ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", - side_effect=route_list_retrieval("route.openshift.io", "v1", "ns", "routes")[ - "items" - ], - ) - mocker.patch( - "codeflare_sdk.common.kueue.kueue.local_queue_exists", - return_value="true", - ) - - cluster = get_cluster( - "test-cluster-a", "ns" - ) # see tests/test_cluster_yamls/support_clusters - cluster_config = cluster.config - - assert cluster_config.name == "test-cluster-a" and cluster_config.namespace == "ns" - assert cluster_config.head_cpu_requests == 2 and cluster_config.head_cpu_limits == 2 - assert ( - cluster_config.head_memory_requests == "8G" - and cluster_config.head_memory_limits == "8G" - ) - assert ( - cluster_config.worker_cpu_requests == 1 - and cluster_config.worker_cpu_limits == 1 - ) - assert ( - cluster_config.worker_memory_requests == "2G" - and cluster_config.worker_memory_limits == "2G" - ) - assert cluster_config.num_workers == 1 - assert cluster_config.write_to_file == False - assert cluster_config.local_queue == "local_default_queue" + with open(f"{expected_clusters_dir}/ray/unit-test-all-params.yaml") as f: + expected_rc = yaml.load(f, Loader=yaml.FullLoader) + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", + return_value=expected_rc, + ) + get_cluster("test-all-params", "ns", write_to_file=True) + assert filecmp.cmp( + f"{aw_dir}test-all-params.yaml", + f"{expected_clusters_dir}/ray/unit-test-all-params.yaml", + shallow=True, + ) -def test_get_cluster(mocker): - # test get_cluster for Kind Clusters +def test_get_cluster_with_appwrapper(mocker): mocker.patch("kubernetes.client.ApisApi.get_api_versions") mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") mocker.patch( - "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", - side_effect=get_ray_obj, - ) - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", - side_effect=get_named_aw, - ) - mocker.patch( - "kubernetes.client.NetworkingV1Api.list_namespaced_ingress", - return_value=ingress_retrieval(cluster_name="quicktest", client_ing=True), - ) - mocker.patch( - "codeflare_sdk.common.kueue.kueue.local_queue_exists", - return_value="true", + "codeflare_sdk.ray.cluster.cluster._check_aw_exists", + return_value=True, ) - cluster = get_cluster( - "test-cluster-a" - ) # see tests/test_cluster_yamls/support_clusters - cluster_config = cluster.config - assert cluster_config.name == "test-cluster-a" and cluster_config.namespace == "ns" - assert cluster_config.head_cpu_requests == 2 and cluster_config.head_cpu_limits == 2 - assert ( - cluster_config.head_memory_requests == "8G" - and cluster_config.head_memory_limits == "8G" - ) - assert ( - cluster_config.worker_cpu_requests == 1 - and cluster_config.worker_cpu_limits == 1 - ) - assert ( - cluster_config.worker_memory_requests == "2G" - and cluster_config.worker_memory_limits == "2G" - ) - assert cluster_config.num_workers == 1 - assert cluster_config.write_to_file == False - assert cluster_config.local_queue == "local_default_queue" + with open(f"{expected_clusters_dir}/appwrapper/unit-test-all-params.yaml") as f: + expected_aw = yaml.load(f, Loader=yaml.FullLoader) + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", + return_value=expected_aw, + ) + get_cluster("aw-all-params", "ns", write_to_file=True) + assert filecmp.cmp( + f"{aw_dir}aw-all-params.yaml", + f"{expected_clusters_dir}/appwrapper/unit-test-all-params.yaml", + shallow=True, + ) def test_wait_ready(mocker, capsys): @@ -356,10 +283,6 @@ def test_wait_ready(mocker, capsys): mocker.patch( "codeflare_sdk.ray.cluster.cluster._ray_cluster_status", return_value=None ) - mocker.patch( - "codeflare_sdk.common.kueue.kueue.local_queue_exists", - return_value="true", - ) mocker.patch.object( client.CustomObjectsApi, "list_namespaced_custom_object", @@ -381,7 +304,6 @@ def test_wait_ready(mocker, capsys): namespace="ns", write_to_file=False, appwrapper=True, - local_queue="local-queue-default", ) ) try: @@ -460,7 +382,7 @@ def test_list_queue_rayclusters(mocker, capsys): ] mocker.patch("kubernetes.client.ApisApi", return_value=mock_api) - assert is_openshift_cluster() == True + assert _is_openshift_cluster() == True mocker.patch( "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", return_value=get_obj_none("ray.io", "v1", "ns", "rayclusters"), @@ -566,7 +488,7 @@ def test_map_to_ray_cluster(mocker): mocker.patch("kubernetes.config.load_kube_config") mocker.patch( - "codeflare_sdk.ray.cluster.cluster.is_openshift_cluster", return_value=True + "codeflare_sdk.ray.cluster.cluster._is_openshift_cluster", return_value=True ) mock_api_client = mocker.MagicMock(spec=client.ApiClient) @@ -608,3 +530,9 @@ def custom_side_effect(group, version, namespace, plural, **kwargs): assert result is not None assert result.dashboard == rc_dashboard + + +# Make sure to always keep this function last +def test_cleanup(): + os.remove(f"{aw_dir}test-all-params.yaml") + os.remove(f"{aw_dir}aw-all-params.yaml") diff --git a/src/codeflare_sdk/ray/cluster/test_config.py b/src/codeflare_sdk/ray/cluster/test_config.py index f1ac53559..1423fc2b5 100644 --- a/src/codeflare_sdk/ray/cluster/test_config.py +++ b/src/codeflare_sdk/ray/cluster/test_config.py @@ -12,10 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from codeflare_sdk.common.utils.unit_test_support import createClusterWrongType +from codeflare_sdk.common.utils.unit_test_support import ( + createClusterWrongType, + get_local_queue, + create_cluster_all_config_params, +) from codeflare_sdk.ray.cluster.cluster import ClusterConfiguration, Cluster from pathlib import Path -from unittest.mock import patch import filecmp import pytest import yaml @@ -31,17 +34,11 @@ def test_default_cluster_creation(mocker): mocker.patch("kubernetes.client.ApisApi.get_api_versions") mocker.patch("kubernetes.client.CustomObjectsApi.list_namespaced_custom_object") - cluster = Cluster( - ClusterConfiguration( - name="default-cluster", - namespace="ns", - ) - ) + cluster = Cluster(ClusterConfiguration(name="default-cluster", namespace="ns")) - test_rc = yaml.load(cluster.app_wrapper_yaml, Loader=yaml.FullLoader) with open(f"{expected_clusters_dir}/ray/default-ray-cluster.yaml") as f: expected_rc = yaml.load(f, Loader=yaml.FullLoader) - assert test_rc == expected_rc + assert cluster.resource_yaml == expected_rc def test_default_appwrapper_creation(mocker): @@ -53,81 +50,46 @@ def test_default_appwrapper_creation(mocker): ClusterConfiguration(name="default-appwrapper", namespace="ns", appwrapper=True) ) - test_aw = yaml.load(cluster.app_wrapper_yaml, Loader=yaml.FullLoader) with open(f"{expected_clusters_dir}/ray/default-appwrapper.yaml") as f: expected_aw = yaml.load(f, Loader=yaml.FullLoader) - assert test_aw == expected_aw + assert cluster.resource_yaml == expected_aw -@patch.dict("os.environ", {"NB_PREFIX": "test-prefix"}) def test_config_creation_all_parameters(mocker): from codeflare_sdk.ray.cluster.config import DEFAULT_RESOURCE_MAPPING - mocker.patch( - "codeflare_sdk.common.kueue.kueue.local_queue_exists", - return_value="true", - ) - extended_resource_mapping = DEFAULT_RESOURCE_MAPPING - extended_resource_mapping.update({"example.com/gpu": "GPU"}) - - config = ClusterConfiguration( - name="test-all-params", - namespace="ns", - head_info=["test1", "test2"], - head_cpu_requests=4, - head_cpu_limits=8, - head_memory_requests=12, - head_memory_limits=16, - head_extended_resource_requests={"nvidia.com/gpu": 1}, - machine_types={"gpu.small", "gpu.large"}, - worker_cpu_requests=4, - worker_cpu_limits=8, - num_workers=10, - worker_memory_requests=12, - worker_memory_limits=16, - template=f"{parent}/src/codeflare_sdk/ray/templates/base-template.yaml", - appwrapper=False, - envs={"key1": "value1", "key2": "value2"}, - image="example/ray:tag", - image_pull_secrets=["secret1", "secret2"], - write_to_file=True, - verify_tls=True, - labels={"key1": "value1", "key2": "value2"}, - worker_extended_resource_requests={"nvidia.com/gpu": 1}, - extended_resource_mapping=extended_resource_mapping, - overwrite_default_resource_mapping=True, - local_queue="local-queue-default", - ) - Cluster(config) - - assert config.name == "test-all-params" and config.namespace == "ns" - assert config.head_info == ["test1", "test2"] - assert config.head_cpu_requests == 4 - assert config.head_cpu_limits == 8 - assert config.head_memory_requests == "12G" - assert config.head_memory_limits == "16G" - assert config.head_extended_resource_requests == {"nvidia.com/gpu": 1} - assert config.machine_types == {"gpu.small", "gpu.large"} - assert config.worker_cpu_requests == 4 - assert config.worker_cpu_limits == 8 - assert config.num_workers == 10 - assert config.worker_memory_requests == "12G" - assert config.worker_memory_limits == "16G" + expected_extended_resource_mapping = DEFAULT_RESOURCE_MAPPING + expected_extended_resource_mapping.update({"example.com/gpu": "GPU"}) + expected_extended_resource_mapping["intel.com/gpu"] = "TPU" + + cluster = create_cluster_all_config_params(mocker, "test-all-params", False) + assert cluster.config.name == "test-all-params" and cluster.config.namespace == "ns" + assert cluster.config.head_cpu_requests == 4 + assert cluster.config.head_cpu_limits == 8 + assert cluster.config.head_memory_requests == "12G" + assert cluster.config.head_memory_limits == "16G" + assert cluster.config.head_extended_resource_requests == { + "nvidia.com/gpu": 1, + "intel.com/gpu": 2, + } + assert cluster.config.worker_cpu_requests == 4 + assert cluster.config.worker_cpu_limits == 8 + assert cluster.config.num_workers == 10 + assert cluster.config.worker_memory_requests == "12G" + assert cluster.config.worker_memory_limits == "16G" + assert cluster.config.appwrapper == False + assert cluster.config.envs == {"key1": "value1", "key2": "value2"} + assert cluster.config.image == "example/ray:tag" + assert cluster.config.image_pull_secrets == ["secret1", "secret2"] + assert cluster.config.write_to_file == True + assert cluster.config.verify_tls == True + assert cluster.config.labels == {"key1": "value1", "key2": "value2"} + assert cluster.config.worker_extended_resource_requests == {"nvidia.com/gpu": 1} assert ( - config.template - == f"{parent}/src/codeflare_sdk/ray/templates/base-template.yaml" + cluster.config.extended_resource_mapping == expected_extended_resource_mapping ) - assert config.appwrapper == False - assert config.envs == {"key1": "value1", "key2": "value2"} - assert config.image == "example/ray:tag" - assert config.image_pull_secrets == ["secret1", "secret2"] - assert config.write_to_file == True - assert config.verify_tls == True - assert config.labels == {"key1": "value1", "key2": "value2"} - assert config.worker_extended_resource_requests == {"nvidia.com/gpu": 1} - assert config.extended_resource_mapping == extended_resource_mapping - assert config.overwrite_default_resource_mapping == True - assert config.local_queue == "local-queue-default" + assert cluster.config.overwrite_default_resource_mapping == True + assert cluster.config.local_queue == "local-queue-default" assert filecmp.cmp( f"{aw_dir}test-all-params.yaml", @@ -136,6 +98,15 @@ def test_config_creation_all_parameters(mocker): ) +def test_all_config_params_aw(mocker): + create_cluster_all_config_params(mocker, "aw-all-params", True) + assert filecmp.cmp( + f"{aw_dir}aw-all-params.yaml", + f"{expected_clusters_dir}/appwrapper/unit-test-all-params.yaml", + shallow=True, + ) + + def test_config_creation_wrong_type(): with pytest.raises(TypeError): createClusterWrongType() @@ -168,3 +139,4 @@ def test_cluster_config_deprecation_conversion(mocker): # Make sure to always keep this function last def test_cleanup(): os.remove(f"{aw_dir}test-all-params.yaml") + os.remove(f"{aw_dir}aw-all-params.yaml") diff --git a/src/codeflare_sdk/ray/cluster/test_pretty_print.py b/src/codeflare_sdk/ray/cluster/test_pretty_print.py index b0da42011..329a1354d 100644 --- a/src/codeflare_sdk/ray/cluster/test_pretty_print.py +++ b/src/codeflare_sdk/ray/cluster/test_pretty_print.py @@ -29,6 +29,7 @@ ClusterConfiguration, _copy_to_ray, ) +from codeflare_sdk.common.utils.unit_test_support import get_local_queue def test_print_no_resources(capsys): @@ -100,15 +101,15 @@ def test_ray_details(mocker, capsys): return_value="", ) mocker.patch( - "codeflare_sdk.common.kueue.kueue.local_queue_exists", - return_value="true", + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), ) cf = Cluster( ClusterConfiguration( name="raytest2", namespace="ns", appwrapper=True, - local_queue="local_default_queue", + local_queue="local-queue-default", ) ) captured = capsys.readouterr() diff --git a/src/codeflare_sdk/ray/cluster/test_status.py b/src/codeflare_sdk/ray/cluster/test_status.py index 146d21901..27eda49ec 100644 --- a/src/codeflare_sdk/ray/cluster/test_status.py +++ b/src/codeflare_sdk/ray/cluster/test_status.py @@ -23,6 +23,7 @@ RayCluster, ) import os +from ...common.utils.unit_test_support import get_local_queue aw_dir = os.path.expanduser("~/.codeflare/resources/") @@ -30,10 +31,6 @@ def test_cluster_status(mocker): mocker.patch("kubernetes.client.ApisApi.get_api_versions") mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") - mocker.patch( - "codeflare_sdk.common.kueue.kueue.local_queue_exists", - return_value="true", - ) fake_ray = RayCluster( name="test", @@ -50,13 +47,19 @@ def test_cluster_status(mocker): head_mem_requests=8, head_mem_limits=8, ) + + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), + ) + cf = Cluster( ClusterConfiguration( name="test", namespace="ns", write_to_file=True, appwrapper=False, - local_queue="local_default_queue", + local_queue="local-queue-default", ) ) mocker.patch( diff --git a/tests/test_cluster_yamls/appwrapper/test-case-bad.yaml b/tests/test_cluster_yamls/appwrapper/test-case-bad.yaml index a072e9e92..9166eced5 100644 --- a/tests/test_cluster_yamls/appwrapper/test-case-bad.yaml +++ b/tests/test_cluster_yamls/appwrapper/test-case-bad.yaml @@ -18,7 +18,6 @@ spec: spec: autoscalerOptions: idleTimeoutSeconds: 60 - imagePullPolicy: Always resources: limits: cpu: 500m @@ -44,7 +43,7 @@ spec: fieldRef: fieldPath: status.podIP image: quay.io/modh/ray@sha256:0d715f92570a2997381b7cafc0e224cfa25323f18b9545acfd23bc2b71576d06 - imagePullPolicy: Always + imagePullPolicy: IfNotPresent lifecycle: preStop: exec: diff --git a/tests/test_cluster_yamls/appwrapper/unit-test-all-params.yaml b/tests/test_cluster_yamls/appwrapper/unit-test-all-params.yaml new file mode 100644 index 000000000..6d2c5440a --- /dev/null +++ b/tests/test_cluster_yamls/appwrapper/unit-test-all-params.yaml @@ -0,0 +1,176 @@ +apiVersion: workload.codeflare.dev/v1beta2 +kind: AppWrapper +metadata: + labels: + kueue.x-k8s.io/queue-name: local-queue-default + name: aw-all-params + namespace: ns +spec: + components: + - template: + apiVersion: ray.io/v1 + kind: RayCluster + metadata: + annotations: + app.kubernetes.io/managed-by: test-prefix + labels: + controller-tools.k8s.io: '1.0' + key1: value1 + key2: value2 + name: aw-all-params + namespace: ns + spec: + autoscalerOptions: + idleTimeoutSeconds: 60 + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 500m + memory: 512Mi + upscalingMode: Default + enableInTreeAutoscaling: false + headGroupSpec: + enableIngress: false + rayStartParams: + block: 'true' + dashboard-host: 0.0.0.0 + num-gpus: '1' + resources: '"{\"TPU\": 2}"' + serviceType: ClusterIP + template: + spec: + containers: + - env: + - name: key1 + value: value1 + - name: key2 + value: value2 + image: example/ray:tag + imagePullPolicy: Always + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - ray stop + name: ray-head + ports: + - containerPort: 6379 + name: gcs + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + cpu: 8 + intel.com/gpu: 2 + memory: 16G + nvidia.com/gpu: 1 + requests: + cpu: 4 + intel.com/gpu: 2 + memory: 12G + nvidia.com/gpu: 1 + volumeMounts: + - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/pki/tls/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + imagePullSecrets: + - name: secret1 + - name: secret2 + volumes: + - configMap: + items: + - key: ca-bundle.crt + path: odh-trusted-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-trusted-ca-cert + - configMap: + items: + - key: odh-ca-bundle.crt + path: odh-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-ca-cert + rayVersion: 2.35.0 + workerGroupSpecs: + - groupName: small-group-aw-all-params + maxReplicas: 10 + minReplicas: 10 + rayStartParams: + block: 'true' + num-gpus: '1' + resources: '"{}"' + replicas: 10 + template: + spec: + containers: + - env: + - name: key1 + value: value1 + - name: key2 + value: value2 + image: example/ray:tag + imagePullPolicy: Always + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - ray stop + name: machine-learning + resources: + limits: + cpu: 8 + memory: 16G + nvidia.com/gpu: 1 + requests: + cpu: 4 + memory: 12G + nvidia.com/gpu: 1 + volumeMounts: + - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-trusted-ca-bundle.crt + name: odh-trusted-ca-cert + subPath: odh-trusted-ca-bundle.crt + - mountPath: /etc/pki/tls/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + - mountPath: /etc/ssl/certs/odh-ca-bundle.crt + name: odh-ca-cert + subPath: odh-ca-bundle.crt + imagePullSecrets: + - name: secret1 + - name: secret2 + volumes: + - configMap: + items: + - key: ca-bundle.crt + path: odh-trusted-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-trusted-ca-cert + - configMap: + items: + - key: odh-ca-bundle.crt + path: odh-ca-bundle.crt + name: odh-trusted-ca-bundle + optional: true + name: odh-ca-cert diff --git a/tests/test_cluster_yamls/kueue/aw_kueue.yaml b/tests/test_cluster_yamls/kueue/aw_kueue.yaml index 2c6d868ac..402ffb6a6 100644 --- a/tests/test_cluster_yamls/kueue/aw_kueue.yaml +++ b/tests/test_cluster_yamls/kueue/aw_kueue.yaml @@ -18,7 +18,6 @@ spec: spec: autoscalerOptions: idleTimeoutSeconds: 60 - imagePullPolicy: Always resources: limits: cpu: 500m @@ -76,7 +75,6 @@ spec: - mountPath: /etc/ssl/certs/odh-ca-bundle.crt name: odh-ca-cert subPath: odh-ca-bundle.crt - imagePullSecrets: [] volumes: - configMap: items: @@ -103,14 +101,10 @@ spec: resources: '"{}"' replicas: 2 template: - metadata: - annotations: - key: value - labels: - key: value spec: containers: - image: quay.io/modh/ray@sha256:0d715f92570a2997381b7cafc0e224cfa25323f18b9545acfd23bc2b71576d06 + imagePullPolicy: Always lifecycle: preStop: exec: @@ -139,7 +133,6 @@ spec: - mountPath: /etc/ssl/certs/odh-ca-bundle.crt name: odh-ca-cert subPath: odh-ca-bundle.crt - imagePullSecrets: [] volumes: - configMap: items: diff --git a/tests/test_cluster_yamls/kueue/ray_cluster_kueue.yaml b/tests/test_cluster_yamls/kueue/ray_cluster_kueue.yaml index 0c4efb29a..a5cb36164 100644 --- a/tests/test_cluster_yamls/kueue/ray_cluster_kueue.yaml +++ b/tests/test_cluster_yamls/kueue/ray_cluster_kueue.yaml @@ -18,7 +18,6 @@ spec: spec: autoscalerOptions: idleTimeoutSeconds: 60 - imagePullPolicy: Always resources: limits: cpu: 500m @@ -76,7 +75,6 @@ spec: - mountPath: /etc/ssl/certs/odh-ca-bundle.crt name: odh-ca-cert subPath: odh-ca-bundle.crt - imagePullSecrets: [] volumes: - configMap: items: @@ -103,14 +101,10 @@ spec: resources: '"{}"' replicas: 2 template: - metadata: - annotations: - key: value - labels: - key: value spec: containers: - image: quay.io/modh/ray@sha256:0d715f92570a2997381b7cafc0e224cfa25323f18b9545acfd23bc2b71576d06 + imagePullPolicy: Always lifecycle: preStop: exec: @@ -139,7 +133,6 @@ spec: - mountPath: /etc/ssl/certs/odh-ca-bundle.crt name: odh-ca-cert subPath: odh-ca-bundle.crt - imagePullSecrets: [] volumes: - configMap: items: diff --git a/tests/test_cluster_yamls/ray/default-appwrapper.yaml b/tests/test_cluster_yamls/ray/default-appwrapper.yaml index 60152c1e7..3e97474d4 100644 --- a/tests/test_cluster_yamls/ray/default-appwrapper.yaml +++ b/tests/test_cluster_yamls/ray/default-appwrapper.yaml @@ -16,7 +16,6 @@ spec: spec: autoscalerOptions: idleTimeoutSeconds: 60 - imagePullPolicy: Always resources: limits: cpu: 500m @@ -74,7 +73,6 @@ spec: - mountPath: /etc/ssl/certs/odh-ca-bundle.crt name: odh-ca-cert subPath: odh-ca-bundle.crt - imagePullSecrets: [] volumes: - configMap: items: @@ -101,14 +99,10 @@ spec: resources: '"{}"' replicas: 1 template: - metadata: - annotations: - key: value - labels: - key: value spec: containers: - image: quay.io/modh/ray@sha256:0d715f92570a2997381b7cafc0e224cfa25323f18b9545acfd23bc2b71576d06 + imagePullPolicy: Always lifecycle: preStop: exec: @@ -137,7 +131,6 @@ spec: - mountPath: /etc/ssl/certs/odh-ca-bundle.crt name: odh-ca-cert subPath: odh-ca-bundle.crt - imagePullSecrets: [] volumes: - configMap: items: diff --git a/tests/test_cluster_yamls/ray/default-ray-cluster.yaml b/tests/test_cluster_yamls/ray/default-ray-cluster.yaml index 7a3329b6d..34de53d2b 100644 --- a/tests/test_cluster_yamls/ray/default-ray-cluster.yaml +++ b/tests/test_cluster_yamls/ray/default-ray-cluster.yaml @@ -8,7 +8,6 @@ metadata: spec: autoscalerOptions: idleTimeoutSeconds: 60 - imagePullPolicy: Always resources: limits: cpu: 500m @@ -66,7 +65,6 @@ spec: - mountPath: /etc/ssl/certs/odh-ca-bundle.crt name: odh-ca-cert subPath: odh-ca-bundle.crt - imagePullSecrets: [] volumes: - configMap: items: @@ -93,14 +91,10 @@ spec: resources: '"{}"' replicas: 1 template: - metadata: - annotations: - key: value - labels: - key: value spec: containers: - image: quay.io/modh/ray@sha256:0d715f92570a2997381b7cafc0e224cfa25323f18b9545acfd23bc2b71576d06 + imagePullPolicy: Always lifecycle: preStop: exec: @@ -129,7 +123,6 @@ spec: - mountPath: /etc/ssl/certs/odh-ca-bundle.crt name: odh-ca-cert subPath: odh-ca-bundle.crt - imagePullSecrets: [] volumes: - configMap: items: diff --git a/tests/test_cluster_yamls/ray/unit-test-all-params.yaml b/tests/test_cluster_yamls/ray/unit-test-all-params.yaml index eda7270f6..8426eede6 100644 --- a/tests/test_cluster_yamls/ray/unit-test-all-params.yaml +++ b/tests/test_cluster_yamls/ray/unit-test-all-params.yaml @@ -13,7 +13,6 @@ metadata: spec: autoscalerOptions: idleTimeoutSeconds: 60 - imagePullPolicy: Always resources: limits: cpu: 500m @@ -29,14 +28,16 @@ spec: block: 'true' dashboard-host: 0.0.0.0 num-gpus: '1' - resources: '"{}"' + resources: '"{\"TPU\": 2}"' serviceType: ClusterIP template: spec: containers: - - env: &id001 - key1: value1 - key2: value2 + - env: + - name: key1 + value: value1 + - name: key2 + value: value2 image: example/ray:tag imagePullPolicy: Always lifecycle: @@ -57,10 +58,12 @@ spec: resources: limits: cpu: 8 + intel.com/gpu: 2 memory: 16G nvidia.com/gpu: 1 requests: cpu: 4 + intel.com/gpu: 2 memory: 12G nvidia.com/gpu: 1 volumeMounts: @@ -105,15 +108,15 @@ spec: resources: '"{}"' replicas: 10 template: - metadata: - annotations: - key: value - labels: - key: value spec: containers: - - env: *id001 + - env: + - name: key1 + value: value1 + - name: key2 + value: value2 image: example/ray:tag + imagePullPolicy: Always lifecycle: preStop: exec: diff --git a/tests/test_cluster_yamls/support_clusters/test-aw-a.yaml b/tests/test_cluster_yamls/support_clusters/test-aw-a.yaml index 9b8a647f6..fe26900d7 100644 --- a/tests/test_cluster_yamls/support_clusters/test-aw-a.yaml +++ b/tests/test_cluster_yamls/support_clusters/test-aw-a.yaml @@ -18,7 +18,6 @@ spec: spec: autoscalerOptions: idleTimeoutSeconds: 60 - imagePullPolicy: Always resources: limits: cpu: 500m @@ -40,7 +39,7 @@ spec: spec: containers: - image: quay.io/modh/ray@sha256:0d715f92570a2997381b7cafc0e224cfa25323f18b9545acfd23bc2b71576d06 - imagePullPolicy: Always + imagePullPolicy: IfNotPresent lifecycle: preStop: exec: diff --git a/tests/test_cluster_yamls/support_clusters/test-aw-b.yaml b/tests/test_cluster_yamls/support_clusters/test-aw-b.yaml index 763eb5c2a..eed571fe7 100644 --- a/tests/test_cluster_yamls/support_clusters/test-aw-b.yaml +++ b/tests/test_cluster_yamls/support_clusters/test-aw-b.yaml @@ -18,7 +18,6 @@ spec: spec: autoscalerOptions: idleTimeoutSeconds: 60 - imagePullPolicy: Always resources: limits: cpu: 500m @@ -40,7 +39,7 @@ spec: spec: containers: - image: quay.io/modh/ray@sha256:0d715f92570a2997381b7cafc0e224cfa25323f18b9545acfd23bc2b71576d06 - imagePullPolicy: Always + imagePullPolicy: IfNotPresent lifecycle: preStop: exec: diff --git a/tests/test_cluster_yamls/support_clusters/test-rc-a.yaml b/tests/test_cluster_yamls/support_clusters/test-rc-a.yaml index f12ffde00..5f5d456cb 100644 --- a/tests/test_cluster_yamls/support_clusters/test-rc-a.yaml +++ b/tests/test_cluster_yamls/support_clusters/test-rc-a.yaml @@ -9,7 +9,6 @@ metadata: spec: autoscalerOptions: idleTimeoutSeconds: 60 - imagePullPolicy: Always resources: limits: cpu: 500m @@ -31,7 +30,7 @@ spec: spec: containers: - image: quay.io/modh/ray@sha256:0d715f92570a2997381b7cafc0e224cfa25323f18b9545acfd23bc2b71576d06 - imagePullPolicy: Always + imagePullPolicy: IfNotPresent lifecycle: preStop: exec: diff --git a/tests/test_cluster_yamls/support_clusters/test-rc-b.yaml b/tests/test_cluster_yamls/support_clusters/test-rc-b.yaml index 1d41e365f..3bf894dbb 100644 --- a/tests/test_cluster_yamls/support_clusters/test-rc-b.yaml +++ b/tests/test_cluster_yamls/support_clusters/test-rc-b.yaml @@ -9,7 +9,6 @@ metadata: spec: autoscalerOptions: idleTimeoutSeconds: 60 - imagePullPolicy: Always resources: limits: cpu: 500m @@ -31,7 +30,7 @@ spec: spec: containers: - image: quay.io/modh/ray@sha256:0d715f92570a2997381b7cafc0e224cfa25323f18b9545acfd23bc2b71576d06 - imagePullPolicy: Always + imagePullPolicy: IfNotPresent lifecycle: preStop: exec: diff --git a/ui-tests/playwright.config.js b/ui-tests/playwright.config.js index 88003f8ea..1488012e8 100644 --- a/ui-tests/playwright.config.js +++ b/ui-tests/playwright.config.js @@ -6,7 +6,7 @@ module.exports = { webServer: { command: 'yarn start', url: 'http://localhost:8888/lab', - timeout: 120 * 1000, + timeout: 600 * 1000, reuseExistingServer: !process.env.CI, }, retries: 0,