Skip to content

Commit

Permalink
DigitalOcean: Implement TemplateNodeInfo in cloud provider CA to supp…
Browse files Browse the repository at this point in the history
…ort scale to zero
  • Loading branch information
dylanrhysscott committed Feb 14, 2025
1 parent cac225e commit afe9b9c
Show file tree
Hide file tree
Showing 8 changed files with 201 additions and 75 deletions.
8 changes: 4 additions & 4 deletions cluster-autoscaler/cloudprovider/digitalocean/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,24 +28,24 @@ picks up the configuration from the API and adjusts the behavior accordingly.
# Development

Make sure you're inside the root path of the [autoscaler
repository](https://github.com/kubernetes/autoscaler)
repository](https://github.com/kubernetes/autoscaler/cluster-autoscaler)

1.) Build the `cluster-autoscaler` binary:


```
make build-in-docker
GOARCH=amd64 make build-in-docker
```

2.) Build the docker image:

```
docker build -t digitalocean/cluster-autoscaler:dev .
docker build --platform linux/amd64 -f Dockerfile.amd64 -t digitalocean/cluster-autoscaler:dev .
```


3.) Push the docker image to Docker hub:

```
docker push digitalocean/cluster-autoscaler:dev
docker push --platform linux/amd64 digitalocean/cluster-autoscaler:dev
```
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,73 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
)

func defaultDOClientMock(clusterID string) *doClientMock {
client := &doClientMock{}
ctx := context.Background()

client.On("ListNodePools", ctx, clusterID, nil).Return(
[]*godo.KubernetesNodePool{
{
ID: "1",
Nodes: []*godo.KubernetesNode{
{ID: "1", Status: &godo.KubernetesNodeStatus{State: "running"}},
{ID: "2", Status: &godo.KubernetesNodeStatus{State: "running"}},
},
AutoScale: true,
},
{
ID: "2",
Nodes: []*godo.KubernetesNode{
{ID: "3", Status: &godo.KubernetesNodeStatus{State: "deleting"}},
{ID: "4", Status: &godo.KubernetesNodeStatus{State: "running"}},
},
AutoScale: true,
},
{
ID: "3",
Nodes: []*godo.KubernetesNode{
{ID: "5", Status: &godo.KubernetesNodeStatus{State: "provisioning"}},
{ID: "6", Status: &godo.KubernetesNodeStatus{State: "running"}},
},
AutoScale: true,
},
{
ID: "4",
Nodes: []*godo.KubernetesNode{
{ID: "7", Status: &godo.KubernetesNodeStatus{State: "draining"}},
{ID: "8", Status: &godo.KubernetesNodeStatus{State: "running"}},
},
AutoScale: false,
},
},
&godo.Response{},
nil,
).Once()
return client
}

func setGetNodeTemplateMock(c *doClientMock, times int) *doClientMock {
c.On("GetNodePoolTemplate", context.Background(), "123456", "").Return(&godo.KubernetesNodePoolTemplateResponse{
ClusterUUID: "123456",
Name: "some-pool",
Slug: "s-1vcpu-2gb",
Template: &godo.KubernetesNodePoolTemplate{
Labels: make(map[string]string),
Capacity: &godo.KubernetesNodePoolResources{
CPU: 1,
Memory: "2048Mi",
Pods: 110,
},
Allocatable: &godo.KubernetesNodePoolResources{
CPU: 380,
Memory: "1024MI",
Pods: 110,
},
},
}, &godo.Response{}, nil).Times(times)
return c
}

func testCloudProvider(t *testing.T, client *doClientMock) *digitaloceanCloudProvider {
cfg := `{"cluster_id": "123456", "token": "123-123-123", "url": "https://api.digitalocean.com/v2", "version": "dev"}`

Expand All @@ -38,47 +105,7 @@ func testCloudProvider(t *testing.T, client *doClientMock) *digitaloceanCloudPro

// fill the test provider with some example
if client == nil {
client = &doClientMock{}
ctx := context.Background()

client.On("ListNodePools", ctx, manager.clusterID, nil).Return(
[]*godo.KubernetesNodePool{
{
ID: "1",
Nodes: []*godo.KubernetesNode{
{ID: "1", Status: &godo.KubernetesNodeStatus{State: "running"}},
{ID: "2", Status: &godo.KubernetesNodeStatus{State: "running"}},
},
AutoScale: true,
},
{
ID: "2",
Nodes: []*godo.KubernetesNode{
{ID: "3", Status: &godo.KubernetesNodeStatus{State: "deleting"}},
{ID: "4", Status: &godo.KubernetesNodeStatus{State: "running"}},
},
AutoScale: true,
},
{
ID: "3",
Nodes: []*godo.KubernetesNode{
{ID: "5", Status: &godo.KubernetesNodeStatus{State: "provisioning"}},
{ID: "6", Status: &godo.KubernetesNodeStatus{State: "running"}},
},
AutoScale: true,
},
{
ID: "4",
Nodes: []*godo.KubernetesNode{
{ID: "7", Status: &godo.KubernetesNodeStatus{State: "draining"}},
{ID: "8", Status: &godo.KubernetesNodeStatus{State: "running"}},
},
AutoScale: false,
},
},
&godo.Response{},
nil,
).Once()
client = defaultDOClientMock(manager.clusterID)
}

manager.client = client
Expand All @@ -102,7 +129,10 @@ func TestDigitalOceanCloudProvider_Name(t *testing.T) {
}

func TestDigitalOceanCloudProvider_NodeGroups(t *testing.T) {
provider := testCloudProvider(t, nil)
c := defaultDOClientMock("123456")
c = setGetNodeTemplateMock(c, 3)

provider := testCloudProvider(t, c)
err := provider.manager.Refresh()
assert.NoError(t, err)

Expand All @@ -124,7 +154,7 @@ func TestDigitalOceanCloudProvider_NodeGroupForNode(t *testing.T) {
t.Run("success", func(t *testing.T) {
client := &doClientMock{}
ctx := context.Background()

client = setGetNodeTemplateMock(client, 2)
client.On("ListNodePools", ctx, clusterID, nil).Return(
[]*godo.KubernetesNodePool{
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ var (
)

type nodeGroupClient interface {
// GetNodePoolTemplate returns the template for a given node pool - used in helping CA for scale up from zero simulations.
GetNodePoolTemplate(ctx context.Context, clusterID string, nodePoolName string) (*godo.KubernetesNodePoolTemplateResponse, *godo.Response, error)

// ListNodePools lists all the node pools found in a Kubernetes cluster.
ListNodePools(ctx context.Context, clusterID string, opts *godo.ListOptions) ([]*godo.KubernetesNodePool, *godo.Response, error)

Expand Down Expand Up @@ -147,17 +150,22 @@ func (m *Manager) Refresh() error {
if !nodePool.AutoScale {
continue
}

nodePoolTemplateResponse, _, err := m.client.GetNodePoolTemplate(ctx, m.clusterID, nodePool.Name)
klog.V(4).Infof("fetched template response - %v", nodePoolTemplateResponse)
if err != nil {
return err
}
klog.V(4).Infof("adding node pool: %q name: %s min: %d max: %d",
nodePool.ID, nodePool.Name, nodePool.MinNodes, nodePool.MaxNodes)

group = append(group, &NodeGroup{
id: nodePool.ID,
clusterID: m.clusterID,
client: m.client,
nodePool: nodePool,
minSize: nodePool.MinNodes,
maxSize: nodePool.MaxNodes,
id: nodePool.ID,
clusterID: m.clusterID,
client: m.client,
nodePool: nodePool,
nodePoolTemplate: nodePoolTemplateResponse,
minSize: nodePool.MinNodes,
maxSize: nodePool.MaxNodes,
})
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ func TestDigitalOceanManager_Refresh(t *testing.T) {
assert.NoError(t, err)

client := &doClientMock{}
client = setGetNodeTemplateMock(client, 4)
ctx := context.Background()

client.On("ListNodePools", ctx, manager.clusterID, nil).Return(
Expand Down Expand Up @@ -147,6 +148,8 @@ func TestDigitalOceanManager_RefreshWithNodeSpec(t *testing.T) {
assert.NoError(t, err)

client := &doClientMock{}
client = setGetNodeTemplateMock(client, 4)

ctx := context.Background()

client.On("ListNodePools", ctx, manager.clusterID, nil).Return(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,14 @@ import (
"context"
"errors"
"fmt"

"github.com/digitalocean/godo"
apiv1 "k8s.io/api/core/v1"

"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/config"
"k8s.io/autoscaler/cluster-autoscaler/simulator/framework"
utilerrors "k8s.io/autoscaler/cluster-autoscaler/utils/errors"
)

const (
Expand All @@ -43,13 +44,13 @@ var (
// configuration info and functions to control a set of nodes that have the
// same capacity and set of labels.
type NodeGroup struct {
id string
clusterID string
client nodeGroupClient
nodePool *godo.KubernetesNodePool

minSize int
maxSize int
id string
clusterID string
client nodeGroupClient
nodePool *godo.KubernetesNodePool
nodePoolTemplate *godo.KubernetesNodePoolTemplateResponse
minSize int
maxSize int
}

// MaxSize returns maximum size of the node group.
Expand Down Expand Up @@ -213,7 +214,21 @@ func (n *NodeGroup) Nodes() ([]cloudprovider.Instance, error) {
// that are started on the node by default, using manifest (most likely only
// kube-proxy). Implementation optional.
func (n *NodeGroup) TemplateNodeInfo() (*framework.NodeInfo, error) {
return nil, cloudprovider.ErrNotImplemented
if n.nodePoolTemplate != nil {
// Template has already been populated from cache - convert to node info and return
tni, err := toNodeInfoTemplate(n.nodePoolTemplate)
if err != nil {
return nil, utilerrors.NewAutoscalerError(utilerrors.InternalError, err.Error())
}
return tni, nil
}

// No template present in cache - attempt to fetch from API
resp, _, err := n.client.GetNodePoolTemplate(context.TODO(), n.clusterID, n.nodePool.Name)
if err != nil {
return nil, utilerrors.NewAutoscalerError(utilerrors.InternalError, err.Error())
}
return toNodeInfoTemplate(resp)
}

// Exist checks if the node group really exists on the cloud provider side.
Expand Down Expand Up @@ -292,3 +307,57 @@ func toInstanceStatus(nodeState *godo.KubernetesNodeStatus) *cloudprovider.Insta

return st
}

func toNodeInfoTemplate(resp *godo.KubernetesNodePoolTemplateResponse) (*framework.NodeInfo, error) {
allocatable, err := parseToQuanitity(resp.Template.Allocatable.CPU, resp.Template.Allocatable.Pods, resp.Template.Allocatable.Memory)
if err != nil {
return nil, fmt.Errorf("failed to create allocatable resources - %s", err)
}
capacity, err := parseToQuanitity(resp.Template.Capacity.CPU, resp.Template.Capacity.Pods, resp.Template.Capacity.Memory)
if err != nil {
return nil, fmt.Errorf("failed to create capacity resources - %s", err)
}
l := map[string]string{
apiv1.LabelOSStable: cloudprovider.DefaultOS,
apiv1.LabelArchStable: cloudprovider.DefaultArch,
}
l = cloudprovider.JoinStringMaps(l, resp.Template.Labels)
node := &apiv1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: generateWorkerName(resp.Name, 1),
Labels: l,
},
Spec: apiv1.NodeSpec{},
Status: apiv1.NodeStatus{
Allocatable: allocatable,
Capacity: capacity,
Phase: apiv1.NodeRunning,
Conditions: cloudprovider.BuildReadyConditions(),
},
}
return framework.NewNodeInfo(node, nil), nil
}

func parseToQuanitity(cpu int64, pods int64, memory string) (apiv1.ResourceList, error) {
c := resource.NewQuantity(cpu, resource.DecimalSI)
p := resource.NewQuantity(pods, resource.DecimalSI)
m, err := resource.ParseQuantity(memory)
if err != nil {
return nil, err
}
return apiv1.ResourceList{
apiv1.ResourceCPU: *c,
apiv1.ResourceMemory: m,
apiv1.ResourcePods: *p,
}, nil
}

func generateWorkerName(poolName string, workerID int64) string {
var customAlphabet string = "n38uc7mqfyxojrbwgea6tl2ps5kh4ivd01z9"
var customAlphabetSize int64 = int64(len(customAlphabet))
s := ""
for ; workerID > 0; workerID = workerID / customAlphabetSize {
s = string(customAlphabet[workerID%customAlphabetSize]) + s
}
return fmt.Sprintf("%s-%s", poolName, s)
}
Original file line number Diff line number Diff line change
Expand Up @@ -460,3 +460,8 @@ func (m *doClientMock) DeleteNode(ctx context.Context, clusterID, poolID, nodeID
args := m.Called(ctx, clusterID, poolID, nodeID, nil)
return args.Get(0).(*godo.Response), args.Error(1)
}

func (m *doClientMock) GetNodePoolTemplate(ctx context.Context, clusterID string, nodePoolName string) (*godo.KubernetesNodePoolTemplateResponse, *godo.Response, error) {
args := m.Called(ctx, clusterID, nodePoolName)
return args.Get(0).(*godo.KubernetesNodePoolTemplateResponse), args.Get(1).(*godo.Response), args.Error(2)
}
Loading

0 comments on commit afe9b9c

Please sign in to comment.