Skip to content

Commit

Permalink
[FEATURE] Memory Limits (#494, @konradmalik)
Browse files Browse the repository at this point in the history
  • Loading branch information
konradmalik authored Mar 29, 2021
1 parent 16fa107 commit e495fe8
Show file tree
Hide file tree
Showing 13 changed files with 307 additions and 5 deletions.
9 changes: 8 additions & 1 deletion cmd/cluster/clusterCreate.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,13 @@ package cluster

import (
"fmt"
"github.com/docker/go-connections/nat"
"os"
"runtime"
"strings"
"time"

"github.com/docker/go-connections/nat"

"github.com/spf13/cobra"
"github.com/spf13/viper"
"gopkg.in/yaml.v2"
Expand Down Expand Up @@ -316,6 +317,12 @@ func NewCmdClusterCreate() *cobra.Command {
cmd.Flags().String("gpus", "", "GPU devices to add to the cluster node containers ('all' to pass all GPUs) [From docker]")
_ = cfgViper.BindPFlag("options.runtime.gpurequest", cmd.Flags().Lookup("gpus"))

cmd.Flags().String("servers-memory", "", "Memory limit imposed on the server nodes [From docker]")
_ = cfgViper.BindPFlag("options.runtime.serversmemory", cmd.Flags().Lookup("servers-memory"))

cmd.Flags().String("agents-memory", "", "Memory limit imposed on the agents nodes [From docker]")
_ = cfgViper.BindPFlag("options.runtime.agentsmemory", cmd.Flags().Lookup("agents-memory"))

/* Image Importing */
cmd.Flags().Bool("no-image-volume", false, "Disable the creation of a volume for importing images")
_ = cfgViper.BindPFlag("options.k3d.disableimagevolume", cmd.Flags().Lookup("no-image-volume"))
Expand Down
13 changes: 13 additions & 0 deletions cmd/node/nodeCreate.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (

"github.com/spf13/cobra"

dockerunits "github.com/docker/go-units"
"github.com/rancher/k3d/v4/cmd/util"
k3dc "github.com/rancher/k3d/v4/pkg/client"
"github.com/rancher/k3d/v4/pkg/runtimes"
Expand Down Expand Up @@ -67,6 +68,7 @@ func NewCmdNodeCreate() *cobra.Command {
}

cmd.Flags().StringP("image", "i", fmt.Sprintf("%s:%s", k3d.DefaultK3sImageRepo, version.GetK3sVersion(false)), "Specify k3s image used for the node(s)")
cmd.Flags().String("memory", "", "Memory limit imposed on the node [From docker]")

cmd.Flags().BoolVar(&createNodeOpts.Wait, "wait", false, "Wait for the node(s) to be ready before returning.")
cmd.Flags().DurationVar(&createNodeOpts.Timeout, "timeout", 0*time.Second, "Maximum waiting time for '--wait' before canceling/returning.")
Expand Down Expand Up @@ -112,6 +114,16 @@ func parseCreateNodeCmd(cmd *cobra.Command, args []string) ([]*k3d.Node, *k3d.Cl
Name: clusterName,
}

// --memory
memory, err := cmd.Flags().GetString("memory")
if err != nil {
log.Errorln("No memory specified")
log.Fatalln(err)
}
if _, err := dockerunits.RAMInBytes(memory); memory != "" && err != nil {
log.Errorf("Provided memory limit value is invalid")
}

// generate list of nodes
nodes := []*k3d.Node{}
for i := 0; i < replicas; i++ {
Expand All @@ -123,6 +135,7 @@ func parseCreateNodeCmd(cmd *cobra.Command, args []string) ([]*k3d.Node, *k3d.Cl
k3d.LabelRole: roleStr,
},
Restart: true,
Memory: memory,
}
nodes = append(nodes, node)
}
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ require (
github.com/docker/distribution v2.7.1+incompatible // indirect
github.com/docker/docker v20.10.5+incompatible
github.com/docker/go-connections v0.4.0
github.com/docker/go-units v0.4.0
github.com/go-test/deep v1.0.4
github.com/gogo/protobuf v1.3.2 // indirect
github.com/heroku/docker-registry-client v0.0.0-20190909225348-afc9e1acc3d5
Expand Down
46 changes: 46 additions & 0 deletions pkg/client/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,17 @@ import (
"bytes"
"context"
"fmt"
"os"
"reflect"
"strings"
"time"

dockerunits "github.com/docker/go-units"
"github.com/imdario/mergo"
"github.com/rancher/k3d/v4/pkg/runtimes"
"github.com/rancher/k3d/v4/pkg/runtimes/docker"
k3d "github.com/rancher/k3d/v4/pkg/types"
"github.com/rancher/k3d/v4/pkg/util"
log "github.com/sirupsen/logrus"
"golang.org/x/sync/errgroup"
)
Expand Down Expand Up @@ -329,6 +333,37 @@ func NodeCreate(ctx context.Context, runtime runtimes.Runtime, node *k3d.Node, c
}
}

// memory limits
if node.Memory != "" {
if runtime != runtimes.Docker {
log.Warn("ignoring specified memory limits as runtime is not Docker")
} else {
memory, err := dockerunits.RAMInBytes(node.Memory)
if err != nil {
return fmt.Errorf("Invalid memory limit format: %+v", err)
}
// mount fake meminfo as readonly
fakemempath, err := util.MakeFakeMeminfo(memory, node.Name)
if err != nil {
return fmt.Errorf("Failed to create fake meminfo: %+v", err)
}
node.Volumes = append(node.Volumes, fmt.Sprintf("%s:%s:ro", fakemempath, util.MemInfoPath))
// mount empty edac folder, but only if it exists
exists, err := docker.CheckIfDirectoryExists(ctx, node.Image, util.EdacFolderPath)
if err != nil {
return fmt.Errorf("Failed to check for the existence of edac folder: %+v", err)
}
if exists {
log.Debugln("Found edac folder")
fakeedacpath, err := util.MakeFakeEdac(node.Name)
if err != nil {
return fmt.Errorf("Failed to create fake edac: %+v", err)
}
node.Volumes = append(node.Volumes, fmt.Sprintf("%s:%s:ro", fakeedacpath, util.EdacFolderPath))
}
}
}

/*
* CREATION
*/
Expand All @@ -346,6 +381,17 @@ func NodeDelete(ctx context.Context, runtime runtimes.Runtime, node *k3d.Node, o
log.Error(err)
}

// delete fake folder created for limits
if node.Memory != "" {
log.Debug("Cleaning fake files folder from k3d config dir for this node...")
filepath, err := util.GetNodeFakerDirOrCreate(node.Name)
err = os.RemoveAll(filepath)
if err != nil {
// this err prob should not be fatal, just log it
log.Errorf("Could not remove fake files folder for node %s: %+v", node.Name, err)
}
}

// update the server loadbalancer
if !opts.SkipLBUpdate && (node.Role == k3d.ServerRole || node.Role == k3d.AgentRole) {
cluster, err := ClusterGet(ctx, runtime, &k3d.Cluster{Name: node.Labels[k3d.LabelClusterName]})
Expand Down
11 changes: 8 additions & 3 deletions pkg/config/transform.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ func TransformSimpleToClusterConfig(ctx context.Context, runtime runtimes.Runtim
Image: simpleConfig.Image,
Args: simpleConfig.Options.K3sOptions.ExtraServerArgs,
ServerOpts: k3d.ServerOpts{},
Memory: simpleConfig.Options.Runtime.ServersMemory,
}

// first server node will be init node if we have more than one server specified but no external datastore
Expand All @@ -120,9 +121,10 @@ func TransformSimpleToClusterConfig(ctx context.Context, runtime runtimes.Runtim

for i := 0; i < simpleConfig.Agents; i++ {
agentNode := k3d.Node{
Role: k3d.AgentRole,
Image: simpleConfig.Image,
Args: simpleConfig.Options.K3sOptions.ExtraAgentArgs,
Role: k3d.AgentRole,
Image: simpleConfig.Image,
Args: simpleConfig.Options.K3sOptions.ExtraAgentArgs,
Memory: simpleConfig.Options.Runtime.AgentsMemory,
}
newCluster.Nodes = append(newCluster.Nodes, &agentNode)
}
Expand Down Expand Up @@ -226,6 +228,9 @@ func TransformSimpleToClusterConfig(ctx context.Context, runtime runtimes.Runtim
DisableLoadBalancer: simpleConfig.Options.K3dOptions.DisableLoadbalancer,
K3sServerArgs: simpleConfig.Options.K3sOptions.ExtraServerArgs,
K3sAgentArgs: simpleConfig.Options.K3sOptions.ExtraAgentArgs,
GPURequest: simpleConfig.Options.Runtime.GPURequest,
ServersMemory: simpleConfig.Options.Runtime.ServersMemory,
AgentsMemory: simpleConfig.Options.Runtime.AgentsMemory,
GlobalLabels: map[string]string{}, // empty init
GlobalEnv: []string{}, // empty init
}
Expand Down
6 changes: 6 additions & 0 deletions pkg/config/v1alpha2/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,12 @@
"properties": {
"gpuRequest": {
"type": "string"
},
"serversMemory": {
"type": "string"
},
"agentsMemory": {
"type": "string"
}
}
}
Expand Down
4 changes: 3 additions & 1 deletion pkg/config/v1alpha2/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,9 @@ type SimpleConfigOptions struct {
}

type SimpleConfigOptionsRuntime struct {
GPURequest string `mapstructure:"gpuRequest" yaml:"gpuRequest"`
GPURequest string `mapstructure:"gpuRequest" yaml:"gpuRequest"`
ServersMemory string `mapstructure:"serversMemory" yaml:"serversMemory"`
AgentsMemory string `mapstructure:"agentsMemory" yaml:"agentsMemory"`
}

type SimpleConfigOptionsK3d struct {
Expand Down
16 changes: 16 additions & 0 deletions pkg/config/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (

"fmt"

dockerunits "github.com/docker/go-units"
log "github.com/sirupsen/logrus"
)

Expand Down Expand Up @@ -63,6 +64,21 @@ func ValidateClusterConfig(ctx context.Context, runtime runtimes.Runtime, config
return fmt.Errorf("The API Port can not be changed when using 'host' network")
}

// memory limits must have proper format
// if empty we don't care about errors in parsing
if config.ClusterCreateOpts.ServersMemory != "" {
if _, err := dockerunits.RAMInBytes(config.ClusterCreateOpts.ServersMemory); err != nil {
return fmt.Errorf("Provided servers memory limit value is invalid")
}

}

if config.ClusterCreateOpts.AgentsMemory != "" {
if _, err := dockerunits.RAMInBytes(config.ClusterCreateOpts.AgentsMemory); err != nil {
return fmt.Errorf("Provided agents memory limit value is invalid")
}
}

// validate nodes one by one
for _, node := range config.Cluster.Nodes {

Expand Down
59 changes: 59 additions & 0 deletions pkg/runtimes/docker/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,62 @@ func getNodeContainer(ctx context.Context, node *k3d.Node) (*types.Container, er
return &containers[0], nil

}

// executes an arbitrary command in a container while returning its exit code.
// useful to check something in docker env
func executeCheckInContainer(ctx context.Context, image string, cmd []string) (int64, error) {
docker, err := client.NewClientWithOpts(client.FromEnv, client.WithAPIVersionNegotiation())
if err != nil {
log.Errorln("Failed to create docker client")
return -1, err
}
defer docker.Close()

if err = pullImage(ctx, docker, image); err != nil {
return -1, err
}

resp, err := docker.ContainerCreate(ctx, &container.Config{
Image: image,
Cmd: cmd,
Tty: false,
Entrypoint: []string{},
}, nil, nil, nil, "")
if err != nil {
log.Errorf("Failed to create container from image %s with cmd %s", image, cmd)
return -1, err
}

if err = startContainer(ctx, resp.ID); err != nil {
return -1, err
}

exitCode := -1
statusCh, errCh := docker.ContainerWait(ctx, resp.ID, container.WaitConditionNotRunning)
select {
case err := <-errCh:
if err != nil {
log.Errorf("Error while waiting for container %s to exit", resp.ID)
return -1, err
}
case status := <-statusCh:
exitCode = int(status.StatusCode)
}

if err = removeContainer(ctx, resp.ID); err != nil {
return -1, err
}

return int64(exitCode), nil
}

// CheckIfDirectoryExists checks for the existence of a given path inside the docker environment
func CheckIfDirectoryExists(ctx context.Context, image string, dir string) (bool, error) {
log.Tracef("checking if dir %s exists in docker environment...", dir)
shellCmd := fmt.Sprintf("[ -d \"%s\" ] && exit 0 || exit 1", dir)
cmd := []string{"sh", "-c", shellCmd}
exitCode, err := executeCheckInContainer(ctx, image, cmd)
log.Tracef("check dir container returned %d exist code", exitCode)
return exitCode == 0, err

}
19 changes: 19 additions & 0 deletions pkg/runtimes/docker/translate.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
log "github.com/sirupsen/logrus"

dockercliopts "github.com/docker/cli/opts"
dockerunits "github.com/docker/go-units"
)

// TranslateNodeToContainer translates a k3d node specification to a docker container representation
Expand Down Expand Up @@ -85,6 +86,16 @@ func TranslateNodeToContainer(node *k3d.Node) (*NodeInDocker, error) {
hostConfig.DeviceRequests = gpuopts.Value()
}

// memory limits
// fake meminfo is mounted to hostConfig.Binds
if node.Memory != "" {
memory, err := dockerunits.RAMInBytes(node.Memory)
if err != nil {
return nil, fmt.Errorf("Failed to set memory limit: %+v", err)
}
hostConfig.Memory = memory
}

/* They have to run in privileged mode */
// TODO: can we replace this by a reduced set of capabilities?
hostConfig.Privileged = true
Expand Down Expand Up @@ -233,6 +244,13 @@ func TranslateContainerDetailsToNode(containerDetails types.ContainerJSON) (*k3d
Status: containerDetails.ContainerJSONBase.State.Status,
}

// memory limit
memoryStr := dockerunits.HumanSize(float64(containerDetails.HostConfig.Memory))
// no-limit is returned as 0B, filter this out
if memoryStr == "0B" {
memoryStr = ""
}

node := &k3d.Node{
Name: strings.TrimPrefix(containerDetails.Name, "/"), // container name with leading '/' cut off
Role: k3d.NodeRoles[containerDetails.Config.Labels[k3d.LabelRole]],
Expand All @@ -249,6 +267,7 @@ func TranslateContainerDetailsToNode(containerDetails types.ContainerJSON) (*k3d
ServerOpts: serverOpts,
AgentOpts: k3d.AgentOpts{},
State: nodeState,
Memory: memoryStr,
}
return node, nil
}
3 changes: 3 additions & 0 deletions pkg/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,8 @@ type ClusterCreateOpts struct {
K3sServerArgs []string `yaml:"k3sServerArgs" json:"k3sServerArgs,omitempty"`
K3sAgentArgs []string `yaml:"k3sAgentArgs" json:"k3sAgentArgs,omitempty"`
GPURequest string `yaml:"gpuRequest" json:"gpuRequest,omitempty"`
ServersMemory string `yaml:"serversMemory" json:"serversMemory,omitempty"`
AgentsMemory string `yaml:"agentsMemory" json:"agentsMemory,omitempty"`
NodeHooks []NodeHook `yaml:"nodeHooks,omitempty" json:"nodeHooks,omitempty"`
GlobalLabels map[string]string `yaml:"globalLabels,omitempty" json:"globalLabels,omitempty"`
GlobalEnv []string `yaml:"globalEnv,omitempty" json:"globalEnv,omitempty"`
Expand Down Expand Up @@ -328,6 +330,7 @@ type Node struct {
ServerOpts ServerOpts `yaml:"serverOpts" json:"serverOpts,omitempty"`
AgentOpts AgentOpts `yaml:"agentOpts" json:"agentOpts,omitempty"`
GPURequest string // filled automatically
Memory string // filled automatically
State NodeState // filled automatically
}

Expand Down
Loading

0 comments on commit e495fe8

Please sign in to comment.