Skip to content

[AKS VMs Pool] Add VMs implementation to master #8078

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 60 additions & 23 deletions cluster-autoscaler/cloudprovider/azure/azure_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"sync"
"time"

"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v5"
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2022-08-01/compute"
"github.com/Azure/go-autorest/autorest/to"
"github.com/Azure/skewer"
Expand Down Expand Up @@ -67,13 +68,18 @@ type azureCache struct {

// Cache content.

// resourceGroup specifies the name of the resource group that this cache tracks
resourceGroup string
// resourceGroup specifies the name of the node resource group that this cache tracks
resourceGroup string
clusterResourceGroup string
clusterName string

// enableVMsAgentPool specifies whether VMs agent pool type is supported.
enableVMsAgentPool bool

// vmType can be one of vmTypeVMSS (default), vmTypeStandard
vmType string

vmsPoolSet map[string]struct{} // track the nodepools that're vms pool
vmsPoolMap map[string]armcontainerservice.AgentPool // track the nodepools that're vms pool

// scaleSets keeps the set of all known scalesets in the resource group, populated/refreshed via VMSS.List() call.
// It is only used/populated if vmType is vmTypeVMSS (default).
Expand Down Expand Up @@ -106,8 +112,11 @@ func newAzureCache(client *azClient, cacheTTL time.Duration, config Config) (*az
azClient: client,
refreshInterval: cacheTTL,
resourceGroup: config.ResourceGroup,
clusterResourceGroup: config.ClusterResourceGroup,
clusterName: config.ClusterName,
enableVMsAgentPool: config.EnableVMsAgentPool,
vmType: config.VMType,
vmsPoolSet: make(map[string]struct{}),
vmsPoolMap: make(map[string]armcontainerservice.AgentPool),
scaleSets: make(map[string]compute.VirtualMachineScaleSet),
virtualMachines: make(map[string][]compute.VirtualMachine),
registeredNodeGroups: make([]cloudprovider.NodeGroup, 0),
Expand All @@ -130,11 +139,11 @@ func newAzureCache(client *azClient, cacheTTL time.Duration, config Config) (*az
return cache, nil
}

func (m *azureCache) getVMsPoolSet() map[string]struct{} {
func (m *azureCache) getVMsPoolMap() map[string]armcontainerservice.AgentPool {
m.mutex.Lock()
defer m.mutex.Unlock()

return m.vmsPoolSet
return m.vmsPoolMap
}

func (m *azureCache) getVirtualMachines() map[string][]compute.VirtualMachine {
Expand Down Expand Up @@ -232,13 +241,20 @@ func (m *azureCache) fetchAzureResources() error {
return err
}
m.scaleSets = vmssResult
vmResult, vmsPoolSet, err := m.fetchVirtualMachines()
vmResult, err := m.fetchVirtualMachines()
if err != nil {
return err
}
// we fetch both sets of resources since CAS may operate on mixed nodepools
m.virtualMachines = vmResult
m.vmsPoolSet = vmsPoolSet
// fetch VMs pools if enabled
if m.enableVMsAgentPool {
vmsPoolMap, err := m.fetchVMsPools()
if err != nil {
return err
}
m.vmsPoolMap = vmsPoolMap
}

return nil
}
Expand All @@ -251,19 +267,17 @@ const (
)

// fetchVirtualMachines returns the updated list of virtual machines in the config resource group using the Azure API.
func (m *azureCache) fetchVirtualMachines() (map[string][]compute.VirtualMachine, map[string]struct{}, error) {
func (m *azureCache) fetchVirtualMachines() (map[string][]compute.VirtualMachine, error) {
ctx, cancel := getContextWithCancel()
defer cancel()

result, err := m.azClient.virtualMachinesClient.List(ctx, m.resourceGroup)
if err != nil {
klog.Errorf("VirtualMachinesClient.List in resource group %q failed: %v", m.resourceGroup, err)
return nil, nil, err.Error()
return nil, err.Error()
}

instances := make(map[string][]compute.VirtualMachine)
// track the nodepools that're vms pools
vmsPoolSet := make(map[string]struct{})
for _, instance := range result {
if instance.Tags == nil {
continue
Expand All @@ -280,20 +294,43 @@ func (m *azureCache) fetchVirtualMachines() (map[string][]compute.VirtualMachine
}

instances[to.String(vmPoolName)] = append(instances[to.String(vmPoolName)], instance)
}
return instances, nil
}

// if the nodepool is already in the map, skip it
if _, ok := vmsPoolSet[to.String(vmPoolName)]; ok {
continue
// fetchVMsPools returns a name to agentpool map of all the VMs pools in the cluster
func (m *azureCache) fetchVMsPools() (map[string]armcontainerservice.AgentPool, error) {
ctx, cancel := getContextWithTimeout(vmsContextTimeout)
defer cancel()

// defensive check, should never happen when enableVMsAgentPool toggle is on
if m.azClient.agentPoolClient == nil {
return nil, errors.New("agentPoolClient is nil")
}

vmsPoolMap := make(map[string]armcontainerservice.AgentPool)
pager := m.azClient.agentPoolClient.NewListPager(m.clusterResourceGroup, m.clusterName, nil)
var aps []*armcontainerservice.AgentPool
for pager.More() {
resp, err := pager.NextPage(ctx)
if err != nil {
klog.Errorf("agentPoolClient.pager.NextPage in cluster %s resource group %s failed: %v",
m.clusterName, m.clusterResourceGroup, err)
return nil, err
}
aps = append(aps, resp.Value...)
}

// nodes from vms pool will have tag "aks-managed-agentpool-type" set to "VirtualMachines"
if agentpoolType := tags[agentpoolTypeTag]; agentpoolType != nil {
if strings.EqualFold(to.String(agentpoolType), vmsPoolType) {
vmsPoolSet[to.String(vmPoolName)] = struct{}{}
}
for _, ap := range aps {
if ap != nil && ap.Name != nil && ap.Properties != nil && ap.Properties.Type != nil &&
*ap.Properties.Type == armcontainerservice.AgentPoolTypeVirtualMachines {
// we only care about VMs pools, skip other types
klog.V(6).Infof("Found VMs pool %q", *ap.Name)
vmsPoolMap[*ap.Name] = *ap
}
}
return instances, vmsPoolSet, nil

return vmsPoolMap, nil
}

// fetchScaleSets returns the updated list of scale sets in the config resource group using the Azure API.
Expand Down Expand Up @@ -422,7 +459,7 @@ func (m *azureCache) HasInstance(providerID string) (bool, error) {

// FindForInstance returns node group of the given Instance
func (m *azureCache) FindForInstance(instance *azureRef, vmType string) (cloudprovider.NodeGroup, error) {
vmsPoolSet := m.getVMsPoolSet()
vmsPoolMap := m.getVMsPoolMap()
m.mutex.Lock()
defer m.mutex.Unlock()

Expand All @@ -441,7 +478,7 @@ func (m *azureCache) FindForInstance(instance *azureRef, vmType string) (cloudpr
}

// cluster with vmss pool only
if vmType == providerazureconsts.VMTypeVMSS && len(vmsPoolSet) == 0 {
if vmType == providerazureconsts.VMTypeVMSS && len(vmsPoolMap) == 0 {
if m.areAllScaleSetsUniform() {
// Omit virtual machines not managed by vmss only in case of uniform scale set.
if ok := virtualMachineRE.Match([]byte(inst.Name)); ok {
Expand Down
33 changes: 33 additions & 0 deletions cluster-autoscaler/cloudprovider/azure/azure_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,42 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
providerazureconsts "sigs.k8s.io/cloud-provider-azure/pkg/consts"

"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v5"
"github.com/Azure/go-autorest/autorest/to"
"github.com/stretchr/testify/assert"
"go.uber.org/mock/gomock"
)

func TestFetchVMsPools(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()

provider := newTestProvider(t)
ac := provider.azureManager.azureCache
mockAgentpoolclient := NewMockAgentPoolsClient(ctrl)
ac.azClient.agentPoolClient = mockAgentpoolclient

vmsPool := getTestVMsAgentPool(false)
vmssPoolType := armcontainerservice.AgentPoolTypeVirtualMachineScaleSets
vmssPool := armcontainerservice.AgentPool{
Name: to.StringPtr("vmsspool1"),
Properties: &armcontainerservice.ManagedClusterAgentPoolProfileProperties{
Type: &vmssPoolType,
},
}
invalidPool := armcontainerservice.AgentPool{}
fakeAPListPager := getFakeAgentpoolListPager(&vmsPool, &vmssPool, &invalidPool)
mockAgentpoolclient.EXPECT().NewListPager(gomock.Any(), gomock.Any(), nil).
Return(fakeAPListPager)

vmsPoolMap, err := ac.fetchVMsPools()
assert.NoError(t, err)
assert.Equal(t, 1, len(vmsPoolMap))

_, ok := vmsPoolMap[to.String(vmsPool.Name)]
assert.True(t, ok)
}

func TestRegister(t *testing.T) {
provider := newTestProvider(t)
ss := newTestScaleSet(provider.azureManager, "ss")
Expand Down
Loading
Loading