Skip to content

Commit 14f61b2

Browse files
Test step monitor run function and fix found bug (#103)
This PR fixes an issue where the step monitor calculates the number of reorderings based on the `hosts` list provided by Nova, which may not necessarily be sorted by the hosts' weights. Now, we compare the input and output weights correctly.
1 parent 1ec2fe7 commit 14f61b2

File tree

5 files changed

+121
-39
lines changed

5 files changed

+121
-39
lines changed

internal/scheduler/monitor.go

+21-13
Original file line numberDiff line numberDiff line change
@@ -217,14 +217,19 @@ func (s *StepMonitor) Run(request api.Request) (map[string]float64, error) {
217217
defer timer.ObserveDuration()
218218
}
219219

220-
weights, err := s.Step.Run(request)
220+
inWeights := request.GetWeights()
221+
outWeights, err := s.Step.Run(request)
221222
if err != nil {
222223
return nil, err
223224
}
225+
slog.Info(
226+
"scheduler: finished step", "name", stepName,
227+
"inWeights", inWeights, "outWeights", outWeights,
228+
)
224229

225230
// Observe how much the step modifies the weights of the hosts.
226231
if s.stepHostWeight != nil {
227-
for host, weight := range weights {
232+
for host, weight := range outWeights {
228233
s.stepHostWeight.WithLabelValues(host, stepName).Add(weight)
229234
if weight != 0.0 {
230235
slog.Info("scheduler: modified host weight", "name", stepName, "weight", weight)
@@ -233,11 +238,9 @@ func (s *StepMonitor) Run(request api.Request) (map[string]float64, error) {
233238
}
234239

235240
// Observe how many hosts are removed from the state.
236-
hostsInScenario := make(map[string]struct{})
237-
for _, host := range request.GetHosts() {
238-
hostsInScenario[host] = struct{}{}
239-
}
240-
nHostsRemoved := len(hostsInScenario) - len(weights)
241+
hostsIn := request.GetHosts()
242+
hostsOut := slices.Collect(maps.Keys(outWeights))
243+
nHostsRemoved := len(hostsIn) - len(hostsOut)
241244
if nHostsRemoved < 0 {
242245
slog.Info("scheduler: removed hosts", "name", stepName, "count", nHostsRemoved)
243246
}
@@ -248,14 +251,19 @@ func (s *StepMonitor) Run(request api.Request) (map[string]float64, error) {
248251
// Observe the number of reorderings conducted by the scheduler.
249252
if s.reorderingsObserver != nil {
250253
// Calculate the Levenshtein distance between the hosts going in and out.
251-
hosts := slices.Collect(maps.Keys(weights))
252-
sort.Slice(hosts, func(i, j int) bool {
253-
return weights[hosts[i]] > weights[hosts[j]]
254+
sort.Slice(hostsIn, func(i, j int) bool {
255+
return inWeights[hostsIn[i]] > inWeights[hostsIn[j]]
256+
})
257+
sort.Slice(hostsOut, func(i, j int) bool {
258+
return outWeights[hostsOut[i]] > outWeights[hostsOut[j]]
254259
})
255-
distance := levenshteinDistance(request.GetHosts(), hosts)
256-
slog.Info("scheduler: reorderings", "name", stepName, "distance", distance, "hosts_in", request.GetHosts(), "hosts_out", hosts)
260+
distance := levenshteinDistance(hostsIn, hostsOut)
261+
slog.Info(
262+
"scheduler: reorderings", "name", stepName, "distance", distance,
263+
"hostsIn", hostsIn, "hostsOut", hostsOut,
264+
)
257265
s.reorderingsObserver.Observe(float64(distance))
258266
}
259267

260-
return weights, nil
268+
return outWeights, nil
261269
}

internal/scheduler/monitor_test.go

+51-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,57 @@
33

44
package scheduler
55

6-
import "testing"
6+
import (
7+
"testing"
8+
9+
"github.com/cobaltcore-dev/cortex/internal/scheduler/api"
10+
"github.com/cobaltcore-dev/cortex/testlib/monitoring"
11+
testlibAPI "github.com/cobaltcore-dev/cortex/testlib/scheduler/api"
12+
"github.com/cobaltcore-dev/cortex/testlib/scheduler/plugins"
13+
)
14+
15+
func TestStepMonitorRun(t *testing.T) {
16+
runTimer := &monitoring.MockObserver{}
17+
removedHostsObserver := &monitoring.MockObserver{}
18+
reorderingsObserver := &monitoring.MockObserver{}
19+
monitor := &StepMonitor{
20+
Step: &plugins.MockStep{
21+
Name: "mock_step",
22+
RunFunc: func(request api.Request) (map[string]float64, error) {
23+
return map[string]float64{"host1": 0.0, "host2": 1.0, "host3": 0.0}, nil
24+
},
25+
},
26+
runTimer: runTimer,
27+
stepHostWeight: nil,
28+
removedHostsObserver: removedHostsObserver,
29+
reorderingsObserver: reorderingsObserver,
30+
}
31+
request := &testlibAPI.MockRequest{
32+
Hosts: []string{"host1", "host2", "host3"},
33+
Weights: map[string]float64{"host1": 0.0, "host2": 0.0, "host3": 0.0},
34+
}
35+
if _, err := monitor.Run(request); err != nil {
36+
t.Fatalf("Run() error = %v, want nil", err)
37+
}
38+
if len(removedHostsObserver.Observations) != 1 {
39+
t.Errorf("removedHostsObserver.Observations = %v, want 1", len(removedHostsObserver.Observations))
40+
}
41+
if removedHostsObserver.Observations[0] != 0 {
42+
t.Errorf("removedHostsObserver.Observations[0] = %v, want 0", removedHostsObserver.Observations[0])
43+
}
44+
if len(reorderingsObserver.Observations) != 1 {
45+
t.Errorf("reorderingsObserver.Observations = %v, want 1", len(reorderingsObserver.Observations))
46+
}
47+
if reorderingsObserver.Observations[0] != 2 {
48+
t.Errorf("reorderingsObserver.Observations[0] = %v, want 2", reorderingsObserver.Observations[0])
49+
}
50+
if len(runTimer.Observations) != 1 {
51+
t.Errorf("runTimer.Observations = %v, want 1", len(runTimer.Observations))
52+
}
53+
if runTimer.Observations[0] <= 0 {
54+
t.Errorf("runTimer.Observations[0] = %v, want > 0", runTimer.Observations[0])
55+
}
56+
}
757

858
func TestLevenshteinDistance(t *testing.T) {
959
tests := []struct {

internal/scheduler/validation_test.go

+7-25
Original file line numberDiff line numberDiff line change
@@ -11,29 +11,11 @@ import (
1111
"github.com/cobaltcore-dev/cortex/internal/db"
1212
"github.com/cobaltcore-dev/cortex/internal/scheduler/api"
1313
testlibAPI "github.com/cobaltcore-dev/cortex/testlib/scheduler/api"
14+
"github.com/cobaltcore-dev/cortex/testlib/scheduler/plugins"
1415
)
1516

16-
// MockStep is a manual mock implementation of the plugins.Step interface.
17-
type MockStep struct {
18-
Name string
19-
InitFunc func(db db.DB, opts conf.RawOpts) error
20-
RunFunc func(request api.Request) (map[string]float64, error)
21-
}
22-
23-
func (m *MockStep) GetName() string {
24-
return m.Name
25-
}
26-
27-
func (m *MockStep) Init(db db.DB, opts conf.RawOpts) error {
28-
return m.InitFunc(db, opts)
29-
}
30-
31-
func (m *MockStep) Run(request api.Request) (map[string]float64, error) {
32-
return m.RunFunc(request)
33-
}
34-
3517
func TestStepValidator_GetName(t *testing.T) {
36-
mockStep := &MockStep{
18+
mockStep := &plugins.MockStep{
3719
Name: "mock-step",
3820
}
3921

@@ -47,7 +29,7 @@ func TestStepValidator_GetName(t *testing.T) {
4729
}
4830

4931
func TestStepValidator_Init(t *testing.T) {
50-
mockStep := &MockStep{
32+
mockStep := &plugins.MockStep{
5133
InitFunc: func(db db.DB, opts conf.RawOpts) error {
5234
return nil
5335
},
@@ -66,7 +48,7 @@ func TestStepValidator_Init(t *testing.T) {
6648
}
6749

6850
func TestStepValidator_Run_ValidHosts(t *testing.T) {
69-
mockStep := &MockStep{
51+
mockStep := &plugins.MockStep{
7052
RunFunc: func(request api.Request) (map[string]float64, error) {
7153
return map[string]float64{
7254
"host1": 1.0,
@@ -102,7 +84,7 @@ func TestStepValidator_Run_ValidHosts(t *testing.T) {
10284
}
10385

10486
func TestStepValidator_Run_HostNumberMismatch(t *testing.T) {
105-
mockStep := &MockStep{
87+
mockStep := &plugins.MockStep{
10688
RunFunc: func(request api.Request) (map[string]float64, error) {
10789
return map[string]float64{
10890
"host1": 1.0,
@@ -137,7 +119,7 @@ func TestStepValidator_Run_HostNumberMismatch(t *testing.T) {
137119
}
138120

139121
func TestStepValidator_Run_DisabledValidation(t *testing.T) {
140-
mockStep := &MockStep{
122+
mockStep := &plugins.MockStep{
141123
RunFunc: func(request api.Request) (map[string]float64, error) {
142124
return map[string]float64{
143125
"host1": 1.0,
@@ -171,7 +153,7 @@ func TestStepValidator_Run_DisabledValidation(t *testing.T) {
171153
}
172154

173155
func TestValidateStep(t *testing.T) {
174-
mockStep := &MockStep{}
156+
mockStep := &plugins.MockStep{}
175157
disabledValidations := conf.SchedulerStepDisabledValidationsConfig{
176158
SameHostNumberInOut: true,
177159
}

testlib/monitoring/mock.go

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright 2025 SAP SE
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package monitoring
5+
6+
type MockObserver struct {
7+
// Observations recorded by the mock observer.
8+
Observations []float64
9+
}
10+
11+
func (m *MockObserver) Observe(value float64) {
12+
m.Observations = append(m.Observations, value)
13+
}
+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// Copyright 2025 SAP SE
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package plugins
5+
6+
import (
7+
"github.com/cobaltcore-dev/cortex/internal/conf"
8+
"github.com/cobaltcore-dev/cortex/internal/db"
9+
"github.com/cobaltcore-dev/cortex/internal/scheduler/api"
10+
)
11+
12+
// MockStep is a manual mock implementation of the plugins.Step interface.
13+
type MockStep struct {
14+
Name string
15+
InitFunc func(db db.DB, opts conf.RawOpts) error
16+
RunFunc func(request api.Request) (map[string]float64, error)
17+
}
18+
19+
func (m *MockStep) GetName() string {
20+
return m.Name
21+
}
22+
23+
func (m *MockStep) Init(db db.DB, opts conf.RawOpts) error {
24+
return m.InitFunc(db, opts)
25+
}
26+
27+
func (m *MockStep) Run(request api.Request) (map[string]float64, error) {
28+
return m.RunFunc(request)
29+
}

0 commit comments

Comments
 (0)