Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions internal/bminventory/inventory.go
Original file line number Diff line number Diff line change
Expand Up @@ -1450,10 +1450,10 @@ func (b *bareMetalInventory) InstallClusterInternal(ctx context.Context, params
// auto select hosts roles if not selected yet.
err = b.db.Transaction(func(tx *gorm.DB) error {
var updated bool
sortedHosts, canRefreshRoles := host.SortHosts(cluster.Hosts)
sortedHosts, canRefreshRoles := host.SortHosts(cluster.Hosts, cluster.ControlPlaneCount)
if canRefreshRoles {
for i := range sortedHosts {
updated, err = b.hostApi.AutoAssignRole(ctx, cluster.Hosts[i], tx)
updated, err = b.hostApi.AutoAssignRole(ctx, sortedHosts[i], tx)
if err != nil {
return err
}
Expand Down
69 changes: 66 additions & 3 deletions internal/host/host_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3170,6 +3170,49 @@ var _ = Describe("AutoAssignRole", func() {
verifyAutoAssignRole(&h, true, true)
Expect(hostutil.GetHostFromDB(*h.ID, infraEnvId, db).Role).Should(Equal(models.HostRoleArbiter))
})

It("TNA cluster with day-0 workers having similar resources to arbiter", func() {
cluster.ControlPlaneCount = common.MinMasterHostsNeededForInstallationInHaArbiterMode
db.Save(cluster)

hosts := []*models.Host{
// 2 intended masters: high resources
generateAutoAssignHost(strfmt.UUID(uuid.New().String()), 16, 64, false, "master-capable-1"),
generateAutoAssignHost(strfmt.UUID(uuid.New().String()), 16, 64, false, "master-capable-2"),
// 1 intended arbiter: lower resources
generateAutoAssignHost(strfmt.UUID(uuid.New().String()), 8, 32, false, "arbiter-capable"),
// 2 intended workers: similar resources to arbiter
generateAutoAssignHost(strfmt.UUID(uuid.New().String()), 8, 32, false, "worker-1"),
generateAutoAssignHost(strfmt.UUID(uuid.New().String()), 8, 32, false, "worker-2"),
}

cluster.Hosts = hosts

sortedHosts, _ := SortHosts(hosts, cluster.ControlPlaneCount)

var masterCount, arbiterCount, workerCount int

for _, host := range sortedHosts {
Expect(db.Create(host).Error).ShouldNot(HaveOccurred())
verifyAutoAssignRole(host, true, true)
role := hostutil.GetHostFromDB(*host.ID, infraEnvId, db).Role
switch role {
case models.HostRoleMaster:
// Masters should be the most capable hosts
Expect(host.RequestedHostname).To(HavePrefix("master-capable"))
masterCount++
case models.HostRoleArbiter:
arbiterCount++
case models.HostRoleWorker:
workerCount++
}
}

Expect(masterCount).To(Equal(2), "Should have exactly 2 masters")
Expect(arbiterCount).To(Equal(1), "Should have exactly 1 arbiter")
Expect(workerCount).To(Equal(2), "Should have exactly 2 workers")
})

It("should assign roles based on hardware with GPU weight affecting priority", func() {
cluster.ControlPlaneCount = common.MinMasterHostsNeededForInstallationInHaMode
hosts := []*models.Host{
Expand All @@ -3185,7 +3228,7 @@ var _ = Describe("AutoAssignRole", func() {
cluster.Hosts = hosts

// Sort hosts first (like the real auto-assign logic does)
sortedHosts, _ := SortHosts(hosts)
sortedHosts, _ := SortHosts(hosts, common.MinMasterHostsNeededForInstallationInHaMode)

var masterCount, workerCount int

Expand Down Expand Up @@ -3218,7 +3261,7 @@ var _ = Describe("AutoAssignRole", func() {
cluster.Hosts = hosts

// Sort hosts first (like the real auto-assign logic does)
sortedHosts, _ := SortHosts(hosts)
sortedHosts, _ := SortHosts(hosts, common.MinMasterHostsNeededForInstallationInHaMode)

var masterCount, workerCount int

Expand Down Expand Up @@ -4410,7 +4453,7 @@ var _ = Describe("sortHost by hardware", func() {
}

It("verify host order", func() {
sorted, _ := SortHosts(generateHosts())
sorted, _ := SortHosts(generateHosts(), common.MinMasterHostsNeededForInstallationInHaMode)
expected := []string{
"insufficient for both master and worker",
"minimal worker with 3 disks (total of 120 GB)",
Expand All @@ -4428,6 +4471,26 @@ var _ = Describe("sortHost by hardware", func() {
Expect(h.RequestedHostname).To(Equal(expected[i]))
}
})
It("verify host order for two-node topology", func() {
sorted, _ := SortHosts(generateHosts(), common.MinMasterHostsNeededForInstallationInHaArbiterMode)
expected := []string{
"odf worker with 3 disks (total of 120 GB)",
"odf worker with 3 disks (total of 80 GB)",
"insufficient for both master and worker",
"odf worker with 1 disk of 40 GB",
"odf master with 3 disks (total of 120 GB)",
"sno master with 3 disks (total of 120 GB)",
"minimal master with 3 disks (total of 120 GB)",
"minimal master with 3 disks (total of 80 GB)",
"minimal master with no disks",
"minimal worker with 3 disks (total of 120 GB)",
// GPU hosts still last
"host with minimal hardware to be either master/worker, with GPU",
}
for i, h := range sorted {
Expect(h.RequestedHostname).To(Equal(expected[i]))
}
})
})

var _ = Describe("update node labels", func() {
Expand Down
22 changes: 20 additions & 2 deletions internal/host/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func (m *Manager) initMonitoringQueryGenerator() {
}
}

func SortHosts(hosts []*models.Host) ([]*models.Host, bool) {
func SortHosts(hosts []*models.Host, controlPlaneCount int64) ([]*models.Host, bool) {
diskCapacityGiB := func(disks []*models.Disk) int64 {
return funk.Reduce(disks, func(acc int64, d *models.Disk) int64 {
if d.InstallationEligibility.Eligible {
Expand Down Expand Up @@ -116,6 +116,9 @@ func SortHosts(hosts []*models.Host) ([]*models.Host, bool) {
}
}

isTwoNodeTopology := controlPlaneCount >= common.MinMasterHostsNeededForInstallationInHaArbiterMode &&
controlPlaneCount < common.MinMasterHostsNeededForInstallationInHaMode

sortByWeight := func(hostList []*models.Host) {
sort.SliceStable(hostList, func(i, j int) bool {
inventory_i, _ := common.UnmarshalInventory(hostList[i].Inventory)
Expand All @@ -137,6 +140,10 @@ func SortHosts(hosts []*models.Host) ([]*models.Host, bool) {
HostWeightMemWeight*(float64(memInGib(inventory_j))-HostWeightMinimumMemGib) +
HostWeightDiskWeight*(float64(diskCapacityGiB(inventory_j.Disks))-HostWeightMinimumDiskCapacityGib)

if isTwoNodeTopology {
return wi > wj
}

return wi < wj
})
}
Expand All @@ -150,6 +157,17 @@ func SortHosts(hosts []*models.Host) ([]*models.Host, bool) {
result = append(result, hostsWithoutGPU...)
result = append(result, hostsWithGPU...)

// for TNA, move the least capable non-GPU host (last in the non-GPU
// descending section) to the arbiter position so that the greedy algorithm
// in selectRole assings it as a arbiter instead of a more capable worker.
if isTwoNodeTopology && len(hostsWithoutGPU) > int(controlPlaneCount) {
lastNonGPUIdx := len(hostsWithoutGPU) - 1
leastCapable := result[lastNonGPUIdx]
arbiterPos := int(controlPlaneCount)
copy(result[arbiterPos+1:], result[arbiterPos:lastNonGPUIdx])
result[arbiterPos] = leastCapable
}

return result, allHostsHasInventory
}

Expand Down Expand Up @@ -202,7 +220,7 @@ func (m *Manager) clusterHostMonitoring() {

for _, c := range clusters {
inventoryCache := make(InventoryCache)
sortedHosts, canRefreshRoles := SortHosts(c.Hosts)
sortedHosts, canRefreshRoles := SortHosts(c.Hosts, c.ControlPlaneCount)

log = log.WithField("cluster", c.ID.String())

Expand Down