Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DigitalOcean: Implement TemplateNodeInfo in DO cloud provider CA to support scaling up from zero #7840

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions cluster-autoscaler/cloudprovider/digitalocean/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,24 +28,24 @@ picks up the configuration from the API and adjusts the behavior accordingly.
# Development

Make sure you're inside the root path of the [autoscaler
repository](https://github.com/kubernetes/autoscaler)
repository](https://github.com/kubernetes/autoscaler/cluster-autoscaler)

1.) Build the `cluster-autoscaler` binary:


```
make build-in-docker
GOARCH=amd64 make build-in-docker
```

2.) Build the docker image:

```
docker build -t digitalocean/cluster-autoscaler:dev .
docker build --platform linux/amd64 -f Dockerfile.amd64 -t digitalocean/cluster-autoscaler:dev .
```


3.) Push the docker image to Docker hub:

```
docker push digitalocean/cluster-autoscaler:dev
docker push --platform linux/amd64 digitalocean/cluster-autoscaler:dev
```
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,73 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
)

func defaultDOClientMock(clusterID string) *doClientMock {
client := &doClientMock{}
ctx := context.Background()

client.On("ListNodePools", ctx, clusterID, nil).Return(
[]*godo.KubernetesNodePool{
{
ID: "1",
Nodes: []*godo.KubernetesNode{
{ID: "1", Status: &godo.KubernetesNodeStatus{State: "running"}},
{ID: "2", Status: &godo.KubernetesNodeStatus{State: "running"}},
},
AutoScale: true,
},
{
ID: "2",
Nodes: []*godo.KubernetesNode{
{ID: "3", Status: &godo.KubernetesNodeStatus{State: "deleting"}},
{ID: "4", Status: &godo.KubernetesNodeStatus{State: "running"}},
},
AutoScale: true,
},
{
ID: "3",
Nodes: []*godo.KubernetesNode{
{ID: "5", Status: &godo.KubernetesNodeStatus{State: "provisioning"}},
{ID: "6", Status: &godo.KubernetesNodeStatus{State: "running"}},
},
AutoScale: true,
},
{
ID: "4",
Nodes: []*godo.KubernetesNode{
{ID: "7", Status: &godo.KubernetesNodeStatus{State: "draining"}},
{ID: "8", Status: &godo.KubernetesNodeStatus{State: "running"}},
},
AutoScale: false,
},
},
&godo.Response{},
nil,
).Once()
return client
}

func setGetNodeTemplateMock(c *doClientMock, times int) *doClientMock {
c.On("GetNodePoolTemplate", context.Background(), "123456", "").Return(&godo.KubernetesNodePoolTemplateResponse{
ClusterUUID: "123456",
Name: "some-pool",
Slug: "s-1vcpu-2gb",
Template: &godo.KubernetesNodePoolTemplate{
Labels: make(map[string]string),
Capacity: &godo.KubernetesNodePoolResources{
CPU: 1,
Memory: "2048Mi",
Pods: 110,
},
Allocatable: &godo.KubernetesNodePoolResources{
CPU: 380,
Memory: "1024MI",
Pods: 110,
},
},
}, &godo.Response{}, nil).Times(times)
return c
}

func testCloudProvider(t *testing.T, client *doClientMock) *digitaloceanCloudProvider {
cfg := `{"cluster_id": "123456", "token": "123-123-123", "url": "https://api.digitalocean.com/v2", "version": "dev"}`

Expand All @@ -38,47 +105,7 @@ func testCloudProvider(t *testing.T, client *doClientMock) *digitaloceanCloudPro

// fill the test provider with some example
if client == nil {
client = &doClientMock{}
ctx := context.Background()

client.On("ListNodePools", ctx, manager.clusterID, nil).Return(
[]*godo.KubernetesNodePool{
{
ID: "1",
Nodes: []*godo.KubernetesNode{
{ID: "1", Status: &godo.KubernetesNodeStatus{State: "running"}},
{ID: "2", Status: &godo.KubernetesNodeStatus{State: "running"}},
},
AutoScale: true,
},
{
ID: "2",
Nodes: []*godo.KubernetesNode{
{ID: "3", Status: &godo.KubernetesNodeStatus{State: "deleting"}},
{ID: "4", Status: &godo.KubernetesNodeStatus{State: "running"}},
},
AutoScale: true,
},
{
ID: "3",
Nodes: []*godo.KubernetesNode{
{ID: "5", Status: &godo.KubernetesNodeStatus{State: "provisioning"}},
{ID: "6", Status: &godo.KubernetesNodeStatus{State: "running"}},
},
AutoScale: true,
},
{
ID: "4",
Nodes: []*godo.KubernetesNode{
{ID: "7", Status: &godo.KubernetesNodeStatus{State: "draining"}},
{ID: "8", Status: &godo.KubernetesNodeStatus{State: "running"}},
},
AutoScale: false,
},
},
&godo.Response{},
nil,
).Once()
client = defaultDOClientMock(manager.clusterID)
}

manager.client = client
Expand All @@ -102,7 +129,10 @@ func TestDigitalOceanCloudProvider_Name(t *testing.T) {
}

func TestDigitalOceanCloudProvider_NodeGroups(t *testing.T) {
provider := testCloudProvider(t, nil)
c := defaultDOClientMock("123456")
c = setGetNodeTemplateMock(c, 3)

provider := testCloudProvider(t, c)
err := provider.manager.Refresh()
assert.NoError(t, err)

Expand All @@ -124,7 +154,7 @@ func TestDigitalOceanCloudProvider_NodeGroupForNode(t *testing.T) {
t.Run("success", func(t *testing.T) {
client := &doClientMock{}
ctx := context.Background()

client = setGetNodeTemplateMock(client, 2)
client.On("ListNodePools", ctx, clusterID, nil).Return(
[]*godo.KubernetesNodePool{
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ var (
)

type nodeGroupClient interface {
// GetNodePoolTemplate returns the template for a given node pool - used in helping CA for scale up from zero simulations.
GetNodePoolTemplate(ctx context.Context, clusterID string, nodePoolName string) (*godo.KubernetesNodePoolTemplateResponse, *godo.Response, error)

// ListNodePools lists all the node pools found in a Kubernetes cluster.
ListNodePools(ctx context.Context, clusterID string, opts *godo.ListOptions) ([]*godo.KubernetesNodePool, *godo.Response, error)

Expand Down Expand Up @@ -147,17 +150,22 @@ func (m *Manager) Refresh() error {
if !nodePool.AutoScale {
continue
}

nodePoolTemplateResponse, _, err := m.client.GetNodePoolTemplate(ctx, m.clusterID, nodePool.Name)
klog.V(4).Infof("fetched template response - %v", nodePoolTemplateResponse)
if err != nil {
return err
}
klog.V(4).Infof("adding node pool: %q name: %s min: %d max: %d",
nodePool.ID, nodePool.Name, nodePool.MinNodes, nodePool.MaxNodes)

group = append(group, &NodeGroup{
id: nodePool.ID,
clusterID: m.clusterID,
client: m.client,
nodePool: nodePool,
minSize: nodePool.MinNodes,
maxSize: nodePool.MaxNodes,
id: nodePool.ID,
clusterID: m.clusterID,
client: m.client,
nodePool: nodePool,
nodePoolTemplate: nodePoolTemplateResponse,
minSize: nodePool.MinNodes,
maxSize: nodePool.MaxNodes,
})
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ func TestDigitalOceanManager_Refresh(t *testing.T) {
assert.NoError(t, err)

client := &doClientMock{}
client = setGetNodeTemplateMock(client, 4)
ctx := context.Background()

client.On("ListNodePools", ctx, manager.clusterID, nil).Return(
Expand Down Expand Up @@ -147,6 +148,8 @@ func TestDigitalOceanManager_RefreshWithNodeSpec(t *testing.T) {
assert.NoError(t, err)

client := &doClientMock{}
client = setGetNodeTemplateMock(client, 4)

ctx := context.Background()

client.On("ListNodePools", ctx, manager.clusterID, nil).Return(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,13 @@ import (

"github.com/digitalocean/godo"
apiv1 "k8s.io/api/core/v1"

"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/config"
"k8s.io/autoscaler/cluster-autoscaler/simulator/framework"
utilerrors "k8s.io/autoscaler/cluster-autoscaler/utils/errors"
"k8s.io/kubernetes/pkg/util/taints"
)

const (
Expand All @@ -43,13 +46,13 @@ var (
// configuration info and functions to control a set of nodes that have the
// same capacity and set of labels.
type NodeGroup struct {
id string
clusterID string
client nodeGroupClient
nodePool *godo.KubernetesNodePool

minSize int
maxSize int
id string
clusterID string
client nodeGroupClient
nodePool *godo.KubernetesNodePool
nodePoolTemplate *godo.KubernetesNodePoolTemplateResponse
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks a bit inconsistent. This should rather be a KubernetesNodePoolTemplate instead of the rpc response type.

minSize int
maxSize int
}

// MaxSize returns maximum size of the node group.
Expand Down Expand Up @@ -213,7 +216,21 @@ func (n *NodeGroup) Nodes() ([]cloudprovider.Instance, error) {
// that are started on the node by default, using manifest (most likely only
// kube-proxy). Implementation optional.
func (n *NodeGroup) TemplateNodeInfo() (*framework.NodeInfo, error) {
return nil, cloudprovider.ErrNotImplemented
if n.nodePoolTemplate != nil {
// Template has already been populated from cache - convert to node info and return
tni, err := toNodeInfoTemplate(n.nodePoolTemplate)
if err != nil {
return nil, utilerrors.NewAutoscalerError(utilerrors.InternalError, err.Error())
}
return tni, nil
}

// No template present in cache - attempt to fetch from API
resp, _, err := n.client.GetNodePoolTemplate(context.TODO(), n.clusterID, n.nodePool.Name)
if err != nil {
return nil, utilerrors.NewAutoscalerError(utilerrors.InternalError, err.Error())
}
return toNodeInfoTemplate(resp)
}

// Exist checks if the node group really exists on the cloud provider side.
Expand Down Expand Up @@ -292,3 +309,64 @@ func toInstanceStatus(nodeState *godo.KubernetesNodeStatus) *cloudprovider.Insta

return st
}

func toNodeInfoTemplate(resp *godo.KubernetesNodePoolTemplateResponse) (*framework.NodeInfo, error) {
allocatable, err := parseToQuanitity(resp.Template.Allocatable.CPU, resp.Template.Allocatable.Pods, resp.Template.Allocatable.Memory)
if err != nil {
return nil, fmt.Errorf("failed to create allocatable resources - %s", err)
}
capacity, err := parseToQuanitity(resp.Template.Capacity.CPU, resp.Template.Capacity.Pods, resp.Template.Capacity.Memory)
if err != nil {
return nil, fmt.Errorf("failed to create capacity resources - %s", err)
}
addedTaints, _, err := taints.ParseTaints(resp.Template.Taints)
if err != nil {
return nil, fmt.Errorf("failed to parse taints from template - %s", err)
}
l := map[string]string{
apiv1.LabelOSStable: cloudprovider.DefaultOS,
apiv1.LabelArchStable: cloudprovider.DefaultArch,
}

l = cloudprovider.JoinStringMaps(l, resp.Template.Labels)
node := &apiv1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: generateWorkerName(resp.Name, 1),
Labels: l,
},
Spec: apiv1.NodeSpec{
Taints: addedTaints,
},
Status: apiv1.NodeStatus{
Allocatable: allocatable,
Capacity: capacity,
Phase: apiv1.NodeRunning,
Conditions: cloudprovider.BuildReadyConditions(),
},
}
return framework.NewNodeInfo(node, nil), nil
}

func parseToQuanitity(cpu int64, pods int64, memory string) (apiv1.ResourceList, error) {
c := resource.NewQuantity(cpu, resource.DecimalSI)
p := resource.NewQuantity(pods, resource.DecimalSI)
m, err := resource.ParseQuantity(memory)
if err != nil {
return nil, err
}
return apiv1.ResourceList{
apiv1.ResourceCPU: *c,
apiv1.ResourceMemory: m,
apiv1.ResourcePods: *p,
}, nil
}

func generateWorkerName(poolName string, workerID int64) string {
var customAlphabet string = "n38uc7mqfyxojrbwgea6tl2ps5kh4ivd01z9"
var customAlphabetSize int64 = int64(len(customAlphabet))
s := ""
for ; workerID > 0; workerID = workerID / customAlphabetSize {
s = string(customAlphabet[workerID%customAlphabetSize]) + s
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't quite follow the logic of this. The worker ID seems to be intended to be something dynamic.
However the only call I can find is for the worker ID to be 1.
If you just want to create a random fixed size string suffix from a defined charset, you could follow something like this https://www.calhoun.io/creating-random-strings-in-go/

return fmt.Sprintf("%s-%s", poolName, s)
}
Original file line number Diff line number Diff line change
Expand Up @@ -460,3 +460,8 @@ func (m *doClientMock) DeleteNode(ctx context.Context, clusterID, poolID, nodeID
args := m.Called(ctx, clusterID, poolID, nodeID, nil)
return args.Get(0).(*godo.Response), args.Error(1)
}

func (m *doClientMock) GetNodePoolTemplate(ctx context.Context, clusterID string, nodePoolName string) (*godo.KubernetesNodePoolTemplateResponse, *godo.Response, error) {
args := m.Called(ctx, clusterID, nodePoolName)
return args.Get(0).(*godo.KubernetesNodePoolTemplateResponse), args.Get(1).(*godo.Response), args.Error(2)
}
Loading
Loading