Skip to content

Commit 71bf9f6

Browse files
committed
Update DRA testing to stable API version and prepare it to test more types of drivers
1 parent 147e565 commit 71bf9f6

File tree

13 files changed

+163
-51
lines changed

13 files changed

+163
-51
lines changed

clusterloader2/pkg/dependency/dra/dra.go

Lines changed: 102 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"fmt"
2323
"time"
2424

25+
corev1 "k8s.io/api/core/v1"
2526
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2627
"k8s.io/apimachinery/pkg/util/wait"
2728
"k8s.io/klog/v2"
@@ -31,16 +32,16 @@ import (
3132
)
3233

3334
const (
34-
draDependencyName = "DRATestDriver"
35-
//TODO: this needs to be converted into a parameter. Will will not need this until parititionable devices test
35+
draDependencyName = "DRATestDriver"
3636
draNamespace = "dra-example-driver"
37+
draManifests = "manifests/*.yaml"
3738
defaultWorkerNodeCount = "100"
3839
draDaemonsetName = "dra-example-driver-kubeletplugin"
3940
checkDRAReadyInterval = 30 * time.Second
4041
defaultDRATimeout = 10 * time.Minute
4142
)
4243

43-
//go:embed manifests/*.yaml
44+
//go:embed manifests/**/*.yaml
4445
var manifestsFS embed.FS
4546

4647
func init() {
@@ -57,13 +58,24 @@ type draDependency struct{}
5758

5859
func (d *draDependency) Setup(config *dependency.Config) error {
5960
klog.V(2).Infof("%s: Installing DRA example driver", d)
60-
if err := client.CreateNamespace(config.ClusterFramework.GetClientSets().GetClient(), draNamespace); err != nil {
61-
return fmt.Errorf("namespace %s creation error: %v", draNamespace, err)
61+
62+
namespace, err := getNamespace(config)
63+
if err != nil {
64+
return err
6265
}
6366

64-
namespace, ok := config.Params["Namespace"]
65-
if !ok {
66-
namespace = draNamespace
67+
if err := client.CreateNamespace(config.ClusterFramework.GetClientSets().GetClient(), namespace); err != nil {
68+
return fmt.Errorf("namespace %s creation error: %v", namespace, err)
69+
}
70+
71+
manifests, err := getManifests(config)
72+
if err != nil {
73+
return err
74+
}
75+
76+
daemonsetName, err := getDaemonset(config)
77+
if err != nil {
78+
return err
6779
}
6880

6981
mapping := map[string]interface{}{
@@ -72,7 +84,7 @@ func (d *draDependency) Setup(config *dependency.Config) error {
7284
}
7385
if err := config.ClusterFramework.ApplyTemplatedManifests(
7486
manifestsFS,
75-
"manifests/*.yaml",
87+
manifests,
7688
mapping,
7789
client.Retry(client.IsRetryableAPIError),
7890
); err != nil {
@@ -82,8 +94,8 @@ func (d *draDependency) Setup(config *dependency.Config) error {
8294
if err != nil {
8395
return err
8496
}
85-
klog.V(2).Infof("%s: checking if DRA driver %s is healthy", d, draDaemonsetName)
86-
if err := d.waitForDRADriverToBeHealthy(config, timeout); err != nil {
97+
klog.V(2).Infof("%s: checking if DRA driver %s is healthy", d, daemonsetName)
98+
if err := d.waitForDRADriverToBeHealthy(config, timeout, daemonsetName, namespace); err != nil {
8799
return err
88100
}
89101

@@ -94,25 +106,30 @@ func (d *draDependency) Setup(config *dependency.Config) error {
94106
func (d *draDependency) Teardown(config *dependency.Config) error {
95107
klog.V(2).Infof("%s: Tearing down DRA example driver", d)
96108

109+
namespace, err := getNamespace(config)
110+
if err != nil {
111+
return err
112+
}
113+
97114
// Delete namespace (this will delete all resources in it)
98-
if err := client.DeleteNamespace(config.ClusterFramework.GetClientSets().GetClient(), draNamespace); err != nil {
99-
return fmt.Errorf("deleting %s namespace error: %v", draNamespace, err)
115+
if err := client.DeleteNamespace(config.ClusterFramework.GetClientSets().GetClient(), namespace); err != nil {
116+
return fmt.Errorf("deleting %s namespace error: %v", namespace, err)
100117
}
101118

102-
if err := client.WaitForDeleteNamespace(config.ClusterFramework.GetClientSets().GetClient(), draNamespace, client.DefaultNamespaceDeletionTimeout); err != nil {
119+
if err := client.WaitForDeleteNamespace(config.ClusterFramework.GetClientSets().GetClient(), namespace, client.DefaultNamespaceDeletionTimeout); err != nil {
103120
return err
104121
}
105122

106123
klog.V(2).Infof("%s: DRA example driver uninstalled successfully", d)
107124
return nil
108125
}
109126

110-
func (d *draDependency) waitForDRADriverToBeHealthy(config *dependency.Config, timeout time.Duration) error {
127+
func (d *draDependency) waitForDRADriverToBeHealthy(config *dependency.Config, timeout time.Duration, daemonsetName string, namespace string) error {
111128
if err := wait.PollImmediate(
112129
checkDRAReadyInterval,
113130
timeout,
114131
func() (done bool, err error) {
115-
return d.isDRADriverReady(config)
132+
return d.isDRADriverReady(config, daemonsetName, namespace)
116133
}); err != nil {
117134
return err
118135
}
@@ -127,27 +144,33 @@ func (d *draDependency) waitForDRADriverToBeHealthy(config *dependency.Config, t
127144
return nil
128145
}
129146

130-
func (d *draDependency) isDRADriverReady(config *dependency.Config) (done bool, err error) {
147+
func (d *draDependency) isDRADriverReady(config *dependency.Config, daemonsetName string, namespace string) (done bool, err error) {
131148
ds, err := config.ClusterFramework.GetClientSets().
132149
GetClient().
133150
AppsV1().
134-
DaemonSets(draNamespace).
135-
Get(context.Background(), draDaemonsetName, metav1.GetOptions{})
151+
DaemonSets(namespace).
152+
Get(context.Background(), daemonsetName, metav1.GetOptions{})
136153
if err != nil {
137-
return false, fmt.Errorf("failed to get %s: %v", draDaemonsetName, err)
154+
return false, fmt.Errorf("failed to get %s: %v", daemonsetName, err)
138155
}
139156
ready := ds.Status.NumberReady == ds.Status.DesiredNumberScheduled
140157
if !ready {
141158
klog.V(2).Infof("%s is not ready, "+
142-
"DesiredNumberScheduled: %d, NumberReady: %d", draDaemonsetName, ds.Status.DesiredNumberScheduled, ds.Status.NumberReady)
159+
"DesiredNumberScheduled: %d, NumberReady: %d", daemonsetName, ds.Status.DesiredNumberScheduled, ds.Status.NumberReady)
143160
}
144161
return ready, nil
145162
}
146163

147164
func isResourceSlicesPublished(config *dependency.Config) (bool, error) {
148-
workerCount := int(getWorkerCount(config).(float64))
165+
// Get a list of all nodes
166+
nodes, err := getReadyNodesCount(config)
167+
if err != nil {
168+
return false, fmt.Errorf("failed to list nodes: %v", err)
169+
}
170+
171+
workerCount := nodes
149172

150-
resourceSlices, err := config.ClusterFramework.GetClientSets().GetClient().ResourceV1beta1().ResourceSlices().List(context.Background(), metav1.ListOptions{})
173+
resourceSlices, err := config.ClusterFramework.GetClientSets().GetClient().ResourceV1().ResourceSlices().List(context.Background(), metav1.ListOptions{})
151174
if err != nil {
152175
return false, fmt.Errorf("failed to list resourceslices: %v", err)
153176
}
@@ -159,6 +182,25 @@ func isResourceSlicesPublished(config *dependency.Config) (bool, error) {
159182
return true, nil
160183
}
161184

185+
func getReadyNodesCount(config *dependency.Config) (int, error) {
186+
// Get a list of all nodes
187+
nodes, err := config.ClusterFramework.GetClientSets().GetClient().CoreV1().Nodes().List(context.Background(), metav1.ListOptions{})
188+
if err != nil {
189+
return 0, fmt.Errorf("failed to list nodes: %v", err)
190+
}
191+
192+
readyNodes := 0
193+
for _, node := range nodes.Items {
194+
for _, condition := range node.Status.Conditions {
195+
if condition.Type == corev1.NodeReady && condition.Status == corev1.ConditionTrue {
196+
readyNodes++
197+
break // Found the Ready condition, move to the next node
198+
}
199+
}
200+
}
201+
return readyNodes, nil
202+
}
203+
162204
func getWorkerCount(config *dependency.Config) interface{} {
163205
workerCount, ok := config.Params["WorkerNodeCount"]
164206
if !ok {
@@ -167,6 +209,43 @@ func getWorkerCount(config *dependency.Config) interface{} {
167209
return workerCount
168210
}
169211

212+
func getNamespace(config *dependency.Config) (string, error) {
213+
namespace, ok := config.Params["Namespace"]
214+
if !ok {
215+
namespace = draNamespace
216+
}
217+
namespaceString, ok := namespace.(string)
218+
219+
if !ok {
220+
return "", fmt.Errorf("namespace parameter is not a string: %v", namespace)
221+
}
222+
return namespaceString, nil
223+
}
224+
225+
func getManifests(config *dependency.Config) (string, error) {
226+
manifests, ok := config.Params["Manifests"]
227+
if !ok {
228+
manifests = draManifests
229+
}
230+
manifestsString, ok := manifests.(string)
231+
if !ok {
232+
return "", fmt.Errorf("manifests parameter is not a string: %v", manifests)
233+
}
234+
return "manifests/" + manifestsString + "/*.yaml", nil
235+
}
236+
237+
func getDaemonset(config *dependency.Config) (string, error) {
238+
daemonsetName, ok := config.Params["DaemonsetName"]
239+
if !ok {
240+
daemonsetName = draDaemonsetName
241+
}
242+
daemonsetNameString, ok := daemonsetName.(string)
243+
if !ok {
244+
return "", fmt.Errorf("DaemonsetName parameter is not a string: %v", daemonsetName)
245+
}
246+
return daemonsetNameString, nil
247+
}
248+
170249
// String returns string representation of this dependency.
171250
func (d *draDependency) String() string {
172251
return draDependencyName

clusterloader2/pkg/dependency/dra/manifests/deviceclass.yaml renamed to clusterloader2/pkg/dependency/dra/manifests/example/deviceclass.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
---
22
# Source: dra-example-driver/templates/deviceclass.yaml
3-
apiVersion: resource.k8s.io/v1beta1
3+
apiVersion: resource.k8s.io/v1
44
kind: DeviceClass
55
metadata:
66
name: gpu.example.com

clusterloader2/pkg/dependency/dra/manifests/kubeletplugin.yaml renamed to clusterloader2/pkg/dependency/dra/manifests/example/kubeletplugin.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ spec:
3434
- name: plugin
3535
securityContext:
3636
privileged: true
37-
image: registry.k8s.io/dra-example-driver/dra-example-driver:v0.1.0
37+
image: registry.k8s.io/dra-example-driver/dra-example-driver:v0.2.0
3838
imagePullPolicy: IfNotPresent
3939
command: ["dra-example-kubeletplugin"]
4040
resources:

clusterloader2/pkg/dependency/dra/manifests/validatingadmissionpolicy.yaml renamed to clusterloader2/pkg/dependency/dra/manifests/example/validatingadmissionpolicy.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ spec:
99
matchConstraints:
1010
resourceRules:
1111
- apiGroups: ["resource.k8s.io"]
12-
apiVersions: ["v1beta1"]
12+
apiVersions: ["v1"]
1313
operations: ["CREATE", "UPDATE", "DELETE"]
1414
resources: ["resourceslices"]
1515
matchConditions:

clusterloader2/pkg/measurement/common/slos/resourceclaim_allocation_latency.go

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import (
2424
"time"
2525

2626
corev1 "k8s.io/api/core/v1"
27-
resourcev1beta2 "k8s.io/api/resource/v1beta2"
27+
resourcev1 "k8s.io/api/resource/v1"
2828
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2929
"k8s.io/apimachinery/pkg/runtime"
3030
"k8s.io/apimachinery/pkg/watch"
@@ -145,18 +145,16 @@ func (m *resourceClaimAllocationLatencyMeasurement) start(c clientset.Interface)
145145
m.isRunning = true
146146
m.stopCh = make(chan struct{})
147147
m.client = c
148-
149148
lw := &cache.ListWatch{
150149
ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
151150
m.selector.ApplySelectors(&options)
152-
return c.ResourceV1beta2().ResourceClaims(m.selector.Namespace).List(context.TODO(), options)
151+
return c.ResourceV1().ResourceClaims(m.selector.Namespace).List(context.TODO(), options)
153152
},
154153
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
155154
m.selector.ApplySelectors(&options)
156-
return c.ResourceV1beta2().ResourceClaims(m.selector.Namespace).Watch(context.TODO(), options)
155+
return c.ResourceV1().ResourceClaims(m.selector.Namespace).Watch(context.TODO(), options)
157156
},
158157
}
159-
160158
claimInf := informer.NewInformer(lw, m.addEvent)
161159

162160
podLW := &cache.ListWatch{
@@ -219,7 +217,7 @@ func (m *resourceClaimAllocationLatencyMeasurement) processEvent(ev *claimEventD
219217
return
220218
}
221219

222-
claim, ok := ev.obj.(*resourcev1beta2.ResourceClaim)
220+
claim, ok := ev.obj.(*resourcev1.ResourceClaim)
223221
if !ok {
224222
return
225223
}
@@ -303,7 +301,7 @@ func (m *resourceClaimAllocationLatencyMeasurement) gather(_ clientset.Interface
303301
return []measurement.Summary{measurement.CreateSummary(summaryName, "json", content)}, err
304302
}
305303

306-
func isAllocated(claim *resourcev1beta2.ResourceClaim) bool {
304+
func isAllocated(claim *resourcev1.ResourceClaim) bool {
307305
return claim.Status.Allocation != nil || len(claim.Status.ReservedFor) > 0 || len(claim.Status.Devices) > 0
308306
}
309307

@@ -316,7 +314,7 @@ func usesResourceClaimTemplate(p *corev1.Pod) bool {
316314
return false
317315
}
318316

319-
func (m *resourceClaimAllocationLatencyMeasurement) getCachedPodCreateTime(cl *resourcev1beta2.ResourceClaim) (time.Time, bool) {
317+
func (m *resourceClaimAllocationLatencyMeasurement) getCachedPodCreateTime(cl *resourcev1.ResourceClaim) (time.Time, bool) {
320318
for _, o := range cl.OwnerReferences {
321319
if o.Kind == "Pod" && o.Name != "" {
322320
key := fmt.Sprintf("%s/%s", cl.Namespace, o.Name)
@@ -329,7 +327,7 @@ func (m *resourceClaimAllocationLatencyMeasurement) getCachedPodCreateTime(cl *r
329327
return time.Time{}, false
330328
}
331329

332-
func (m *resourceClaimAllocationLatencyMeasurement) fetchPodCreateTime(cl *resourcev1beta2.ResourceClaim) (time.Time, bool) {
330+
func (m *resourceClaimAllocationLatencyMeasurement) fetchPodCreateTime(cl *resourcev1.ResourceClaim) (time.Time, bool) {
333331
for _, o := range cl.OwnerReferences {
334332
if o.Kind == "Pod" && o.Name != "" {
335333
atomic.AddInt64(&m.podGetCalls, 1)

0 commit comments

Comments
 (0)