From a6686d79f0a55e0b6a73420025d57c5446da780d Mon Sep 17 00:00:00 2001 From: r2k1 Date: Wed, 26 Feb 2025 18:35:54 +1300 Subject: [PATCH] update tests --- e2e/scenario_test.go | 45 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/e2e/scenario_test.go b/e2e/scenario_test.go index 9bbeba8e7dc..4dc36a575a6 100644 --- a/e2e/scenario_test.go +++ b/e2e/scenario_test.go @@ -1662,7 +1662,7 @@ func Test_Ubuntu2404ARM(t *testing.T) { }) } -func Test_Ubuntu2404Gen2Containerd_AMDGPU_MI300(t *testing.T) { +func Test_Ubuntu2204Gen2Containerd_AMDGPU_MI300(t *testing.T) { t.Skip("Provisioning of Standard_ND96isr_MI300X_v5 isn't reliable yet") //E2E_LOCATION=eastus2euap //SUBSCRIPTION_ID=4f3dc0e4-0c77-40ff-bf9a-6ade1e3048ef @@ -1673,12 +1673,10 @@ func Test_Ubuntu2404Gen2Containerd_AMDGPU_MI300(t *testing.T) { }, Config: Config{ Cluster: ClusterKubenet, - VHD: config.VHDUbuntu2404Gen2Containerd, //TODO: add support for older + VHD: config.VHDUbuntu2204Gen2Containerd, BootstrapConfigMutator: func(nbc *datamodel.NodeBootstrappingConfiguration) { nbc.ContainerService.Properties.AgentPoolProfiles[0].VMSize = "Standard_ND96isr_MI300X_v5" - nbc.ContainerService.Properties.AgentPoolProfiles[0].Distro = "aks-cblmariner-v2-gen2" nbc.AgentPoolProfile.VMSize = "Standard_ND96isr_MI300X_v5" - nbc.AgentPoolProfile.Distro = "aks-cblmariner-v2-gen2" nbc.EnableAMDGPU = true nbc.ConfigGPUDriverIfNeeded = true }, @@ -1709,9 +1707,7 @@ func Test_Ubuntu2204Gen2Containerd_AMDGPU_V710(t *testing.T) { VHD: config.VHDUbuntu2204Gen2Containerd, BootstrapConfigMutator: func(nbc *datamodel.NodeBootstrappingConfiguration) { nbc.ContainerService.Properties.AgentPoolProfiles[0].VMSize = "Standard_NV4ads_V710_v5" - nbc.ContainerService.Properties.AgentPoolProfiles[0].Distro = "aks-cblmariner-v2-gen2" nbc.AgentPoolProfile.VMSize = "Standard_NV4ads_V710_v5" - nbc.AgentPoolProfile.Distro = "aks-cblmariner-v2-gen2" nbc.EnableAMDGPU = true nbc.ConfigGPUDriverIfNeeded = true @@ -1722,6 +1718,43 @@ func Test_Ubuntu2204Gen2Containerd_AMDGPU_V710(t *testing.T) { vmss.Properties.VirtualMachineProfile.StorageProfile.OSDisk.DiskSizeGB = to.Ptr[int32](128) }, Validator: func(ctx context.Context, s *Scenario) { + res := execScriptOnVMForScenario(ctx, s, "df") + t.Log(res.String()) + ValidateAMDGPU(ctx, s) + }, + }, + }) +} + +func Test_Ubuntu2404Gen2_AMDGPU_V710(t *testing.T) { + // the SKU isn't available in subscriptrion/region we run tests + // TODO: enable once the SKU is available + t.Skip("Provisioning of NV4ads_V710_v5 isn't reliable yet") + //E2E_LOCATION=southcentralus + //SUBSCRIPTION_ID=4f3dc0e4-0c77-40ff-bf9a-6ade1e3048ef + RunScenario(t, &Scenario{ + Description: "Tests that a GPU-enabled node using a MarinerV2 VHD can be properly bootstrapped", + Tags: Tags{ + GPU: true, + }, + Config: Config{ + Cluster: ClusterKubenet, + VHD: config.VHDUbuntu2404Gen2Containerd, + BootstrapConfigMutator: func(nbc *datamodel.NodeBootstrappingConfiguration) { + nbc.ContainerService.Properties.AgentPoolProfiles[0].VMSize = "Standard_NV4ads_V710_v5" + nbc.AgentPoolProfile.VMSize = "Standard_NV4ads_V710_v5" + nbc.EnableAMDGPU = true + nbc.ConfigGPUDriverIfNeeded = true + + }, + VMConfigMutator: func(vmss *armcompute.VirtualMachineScaleSet) { + vmss.SKU.Name = to.Ptr("Standard_NV4ads_V710_v5") + // rocm images are huge, some space for manual testing + vmss.Properties.VirtualMachineProfile.StorageProfile.OSDisk.DiskSizeGB = to.Ptr[int32](128) + //vmss.Properties.VirtualMachineProfile.ExtensionProfile.Extensions[0].Properties.ProtectedSettings[] + }, + Validator: func(ctx context.Context, s *Scenario) { + execScriptOnVMForScenario(ctx, s, "df") ValidateAMDGPU(ctx, s) }, },