Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEATURE] Memory Limits (#494, @konradmalik) #494

Merged
merged 15 commits into from
Mar 29, 2021
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion cmd/cluster/clusterCreate.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,13 @@ package cluster

import (
"fmt"
"github.com/docker/go-connections/nat"
"os"
"runtime"
"strings"
"time"

"github.com/docker/go-connections/nat"

"github.com/spf13/cobra"
"github.com/spf13/viper"
"gopkg.in/yaml.v2"
Expand Down Expand Up @@ -316,6 +317,12 @@ func NewCmdClusterCreate() *cobra.Command {
cmd.Flags().String("gpus", "", "GPU devices to add to the cluster node containers ('all' to pass all GPUs) [From docker]")
_ = cfgViper.BindPFlag("options.runtime.gpurequest", cmd.Flags().Lookup("gpus"))

cmd.Flags().String("servers-memory", "", "Memory limit imposed on the server nodes [From docker]")
_ = cfgViper.BindPFlag("options.runtime.serversmemory", cmd.Flags().Lookup("servers-memory"))

cmd.Flags().String("agents-memory", "", "Memory limit imposed on the agents nodes [From docker]")
_ = cfgViper.BindPFlag("options.runtime.agentsmemory", cmd.Flags().Lookup("agents-memory"))

/* Image Importing */
cmd.Flags().Bool("no-image-volume", false, "Disable the creation of a volume for importing images")
_ = cfgViper.BindPFlag("options.k3d.disableimagevolume", cmd.Flags().Lookup("no-image-volume"))
Expand Down
13 changes: 13 additions & 0 deletions cmd/node/nodeCreate.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (

"github.com/spf13/cobra"

dockerunits "github.com/docker/go-units"
"github.com/rancher/k3d/v4/cmd/util"
k3dc "github.com/rancher/k3d/v4/pkg/client"
"github.com/rancher/k3d/v4/pkg/runtimes"
Expand Down Expand Up @@ -67,6 +68,7 @@ func NewCmdNodeCreate() *cobra.Command {
}

cmd.Flags().StringP("image", "i", fmt.Sprintf("%s:%s", k3d.DefaultK3sImageRepo, version.GetK3sVersion(false)), "Specify k3s image used for the node(s)")
cmd.Flags().String("memory", "", "Memory limit imposed on the node [From docker]")

cmd.Flags().BoolVar(&createNodeOpts.Wait, "wait", false, "Wait for the node(s) to be ready before returning.")
cmd.Flags().DurationVar(&createNodeOpts.Timeout, "timeout", 0*time.Second, "Maximum waiting time for '--wait' before canceling/returning.")
Expand Down Expand Up @@ -112,6 +114,16 @@ func parseCreateNodeCmd(cmd *cobra.Command, args []string) ([]*k3d.Node, *k3d.Cl
Name: clusterName,
}

// --memory
memory, err := cmd.Flags().GetString("memory")
if err != nil {
log.Errorln("No memory specified")
log.Fatalln(err)
}
if _, err := dockerunits.RAMInBytes(memory); memory != "" && err != nil {
log.Errorf("Provided memory limit value is invalid")
}

// generate list of nodes
nodes := []*k3d.Node{}
for i := 0; i < replicas; i++ {
Expand All @@ -123,6 +135,7 @@ func parseCreateNodeCmd(cmd *cobra.Command, args []string) ([]*k3d.Node, *k3d.Cl
k3d.LabelRole: roleStr,
},
Restart: true,
Memory: memory,
}
nodes = append(nodes, node)
}
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ require (
github.com/docker/distribution v2.7.1+incompatible // indirect
github.com/docker/docker v20.10.5+incompatible
github.com/docker/go-connections v0.4.0
github.com/docker/go-units v0.4.0
github.com/go-test/deep v1.0.4
github.com/gogo/protobuf v1.3.2 // indirect
github.com/heroku/docker-registry-client v0.0.0-20190909225348-afc9e1acc3d5
Expand Down
46 changes: 46 additions & 0 deletions pkg/client/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,17 @@ import (
"bytes"
"context"
"fmt"
"os"
"reflect"
"strings"
"time"

dockerunits "github.com/docker/go-units"
"github.com/imdario/mergo"
"github.com/rancher/k3d/v4/pkg/runtimes"
"github.com/rancher/k3d/v4/pkg/runtimes/docker"
k3d "github.com/rancher/k3d/v4/pkg/types"
"github.com/rancher/k3d/v4/pkg/util"
log "github.com/sirupsen/logrus"
"golang.org/x/sync/errgroup"
)
Expand Down Expand Up @@ -329,6 +333,37 @@ func NodeCreate(ctx context.Context, runtime runtimes.Runtime, node *k3d.Node, c
}
}

// memory limits
if node.Memory != "" {
if runtime != runtimes.Docker {
log.Warn("ignoring specified memory limits as runtime is not Docker")
} else {
memory, err := dockerunits.RAMInBytes(node.Memory)
if err != nil {
return fmt.Errorf("Invalid memory limit format: %+v", err)
}
// mount fake meminfo as readonly
fakemempath, err := util.MakeFakeMeminfo(memory, node.Name)
if err != nil {
return fmt.Errorf("Failed to create fake meminfo: %+v", err)
}
node.Volumes = append(node.Volumes, fmt.Sprintf("%s:%s:ro", util.MemInfoPath, fakemempath))
// mount empty edac folder, but only if it exists
exists, err := docker.CheckIfDirectoryExists(ctx, node.Image, util.EdacFolderPath)
if err != nil {
return fmt.Errorf("Failed to check for the existence of edac folder: %+v", err)
}
if exists {
log.Debugln("Found edac folder")
fakeedacpath, err := util.MakeFakeEdac(node.Name)
if err != nil {
return fmt.Errorf("Failed to create fake edac: %+v", err)
}
node.Volumes = append(node.Volumes, fmt.Sprintf("%s:%s:ro", util.EdacFolderPath, fakeedacpath))
}
}
}

/*
* CREATION
*/
Expand All @@ -346,6 +381,17 @@ func NodeDelete(ctx context.Context, runtime runtimes.Runtime, node *k3d.Node, o
log.Error(err)
}

// delete fake folder created for limits
if node.Memory != "" {
log.Debug("Cleaning fake files folder from k3d config dir for this node...")
filepath, err := util.GetNodeFakerDirOrCreate(node.Name)
err = os.RemoveAll(filepath)
if err != nil {
// this err prob should not be fatal, just log it
log.Errorf("Could not remove fake files folder for node %s: %+v", node.Name, err)
}
}

// update the server loadbalancer
if !opts.SkipLBUpdate && (node.Role == k3d.ServerRole || node.Role == k3d.AgentRole) {
cluster, err := ClusterGet(ctx, runtime, &k3d.Cluster{Name: node.Labels[k3d.LabelClusterName]})
Expand Down
11 changes: 8 additions & 3 deletions pkg/config/transform.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ func TransformSimpleToClusterConfig(ctx context.Context, runtime runtimes.Runtim
Image: simpleConfig.Image,
Args: simpleConfig.Options.K3sOptions.ExtraServerArgs,
ServerOpts: k3d.ServerOpts{},
Memory: simpleConfig.Options.Runtime.ServersMemory,
}

// first server node will be init node if we have more than one server specified but no external datastore
Expand All @@ -120,9 +121,10 @@ func TransformSimpleToClusterConfig(ctx context.Context, runtime runtimes.Runtim

for i := 0; i < simpleConfig.Agents; i++ {
agentNode := k3d.Node{
Role: k3d.AgentRole,
Image: simpleConfig.Image,
Args: simpleConfig.Options.K3sOptions.ExtraAgentArgs,
Role: k3d.AgentRole,
Image: simpleConfig.Image,
Args: simpleConfig.Options.K3sOptions.ExtraAgentArgs,
Memory: simpleConfig.Options.Runtime.AgentsMemory,
}
newCluster.Nodes = append(newCluster.Nodes, &agentNode)
}
Expand Down Expand Up @@ -226,6 +228,9 @@ func TransformSimpleToClusterConfig(ctx context.Context, runtime runtimes.Runtim
DisableLoadBalancer: simpleConfig.Options.K3dOptions.DisableLoadbalancer,
K3sServerArgs: simpleConfig.Options.K3sOptions.ExtraServerArgs,
K3sAgentArgs: simpleConfig.Options.K3sOptions.ExtraAgentArgs,
GPURequest: simpleConfig.Options.Runtime.GPURequest,
ServersMemory: simpleConfig.Options.Runtime.ServersMemory,
AgentsMemory: simpleConfig.Options.Runtime.AgentsMemory,
GlobalLabels: map[string]string{}, // empty init
GlobalEnv: []string{}, // empty init
}
Expand Down
6 changes: 6 additions & 0 deletions pkg/config/v1alpha2/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,12 @@
"properties": {
"gpuRequest": {
"type": "string"
},
"serversMemory": {
"type": "string"
},
"agentsMemory": {
"type": "string"
}
}
}
Expand Down
4 changes: 3 additions & 1 deletion pkg/config/v1alpha2/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,9 @@ type SimpleConfigOptions struct {
}

type SimpleConfigOptionsRuntime struct {
GPURequest string `mapstructure:"gpuRequest" yaml:"gpuRequest"`
GPURequest string `mapstructure:"gpuRequest" yaml:"gpuRequest"`
ServersMemory string `mapstructure:"serversMemory" yaml:"serversMemory"`
AgentsMemory string `mapstructure:"agentsMemory" yaml:"agentsMemory"`
}

type SimpleConfigOptionsK3d struct {
Expand Down
16 changes: 16 additions & 0 deletions pkg/config/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (

"fmt"

dockerunits "github.com/docker/go-units"
log "github.com/sirupsen/logrus"
)

Expand Down Expand Up @@ -63,6 +64,21 @@ func ValidateClusterConfig(ctx context.Context, runtime runtimes.Runtime, config
return fmt.Errorf("The API Port can not be changed when using 'host' network")
}

// memory limits must have proper format
// if empty we don't care about errors in parsing
if config.ClusterCreateOpts.ServersMemory != "" {
if _, err := dockerunits.RAMInBytes(config.ClusterCreateOpts.ServersMemory); err != nil {
return fmt.Errorf("Provided servers memory limit value is invalid")
}

}

if config.ClusterCreateOpts.AgentsMemory != "" {
if _, err := dockerunits.RAMInBytes(config.ClusterCreateOpts.AgentsMemory); err != nil {
return fmt.Errorf("Provided agents memory limit value is invalid")
}
}

// validate nodes one by one
for _, node := range config.Cluster.Nodes {

Expand Down
59 changes: 59 additions & 0 deletions pkg/runtimes/docker/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,62 @@ func getNodeContainer(ctx context.Context, node *k3d.Node) (*types.Container, er
return &containers[0], nil

}

// executes an arbitrary command in a container while returning its exit code.
// useful to check something in docker env
func executeCheckInContainer(ctx context.Context, image string, cmd []string) (int64, error) {
docker, err := client.NewClientWithOpts(client.FromEnv, client.WithAPIVersionNegotiation())
if err != nil {
log.Errorln("Failed to create docker client")
return -1, err
}
defer docker.Close()

if err = pullImage(ctx, docker, image); err != nil {
return -1, err
}

resp, err := docker.ContainerCreate(ctx, &container.Config{
Image: image,
Cmd: cmd,
Tty: false,
Entrypoint: []string{},
}, nil, nil, nil, "")
if err != nil {
log.Errorf("Failed to create container from image %s with cmd %s", image, cmd)
return -1, err
}

if err = startContainer(ctx, resp.ID); err != nil {
return -1, err
}

exitCode := -1
statusCh, errCh := docker.ContainerWait(ctx, resp.ID, container.WaitConditionNotRunning)
select {
case err := <-errCh:
if err != nil {
log.Errorf("Error while waiting for container %s to exit", resp.ID)
return -1, err
}
case status := <-statusCh:
exitCode = int(status.StatusCode)
}

if err = removeContainer(ctx, resp.ID); err != nil {
return -1, err
}

return int64(exitCode), nil
}

// CheckIfDirectoryExists checks for the existence of a given path inside the docker environment
func CheckIfDirectoryExists(ctx context.Context, image string, dir string) (bool, error) {
log.Tracef("checking if dir %s exists in docker environment...", dir)
shellCmd := fmt.Sprintf("[ -d \"%s\" ] && exit 0 || exit 1", dir)
cmd := []string{"sh", "-c", shellCmd}
exitCode, err := executeCheckInContainer(ctx, image, cmd)
log.Tracef("check dir container returned %d exist code", exitCode)
return exitCode == 0, err

}
19 changes: 19 additions & 0 deletions pkg/runtimes/docker/translate.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
log "github.com/sirupsen/logrus"

dockercliopts "github.com/docker/cli/opts"
dockerunits "github.com/docker/go-units"
)

// TranslateNodeToContainer translates a k3d node specification to a docker container representation
Expand Down Expand Up @@ -85,6 +86,16 @@ func TranslateNodeToContainer(node *k3d.Node) (*NodeInDocker, error) {
hostConfig.DeviceRequests = gpuopts.Value()
}

// memory limits
// fake meminfo is mounted to hostConfig.Binds
if node.Memory != "" {
memory, err := dockerunits.RAMInBytes(node.Memory)
if err != nil {
return nil, fmt.Errorf("Failed to set memory limit: %+v", err)
}
hostConfig.Memory = memory
}

/* They have to run in privileged mode */
// TODO: can we replace this by a reduced set of capabilities?
hostConfig.Privileged = true
Expand Down Expand Up @@ -233,6 +244,13 @@ func TranslateContainerDetailsToNode(containerDetails types.ContainerJSON) (*k3d
Status: containerDetails.ContainerJSONBase.State.Status,
}

// memory limit
memoryStr := dockerunits.HumanSize(float64(containerDetails.HostConfig.Memory))
// no-limit is returned as 0B, filter this out
if memoryStr == "0B" {
memoryStr = ""
}

node := &k3d.Node{
Name: strings.TrimPrefix(containerDetails.Name, "/"), // container name with leading '/' cut off
Role: k3d.NodeRoles[containerDetails.Config.Labels[k3d.LabelRole]],
Expand All @@ -249,6 +267,7 @@ func TranslateContainerDetailsToNode(containerDetails types.ContainerJSON) (*k3d
ServerOpts: serverOpts,
AgentOpts: k3d.AgentOpts{},
State: nodeState,
Memory: memoryStr,
}
return node, nil
}
3 changes: 3 additions & 0 deletions pkg/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,8 @@ type ClusterCreateOpts struct {
K3sServerArgs []string `yaml:"k3sServerArgs" json:"k3sServerArgs,omitempty"`
K3sAgentArgs []string `yaml:"k3sAgentArgs" json:"k3sAgentArgs,omitempty"`
GPURequest string `yaml:"gpuRequest" json:"gpuRequest,omitempty"`
ServersMemory string `yaml:"serversMemory" json:"serversMemory,omitempty"`
AgentsMemory string `yaml:"agentsMemory" json:"agentsMemory,omitempty"`
NodeHooks []NodeHook `yaml:"nodeHooks,omitempty" json:"nodeHooks,omitempty"`
GlobalLabels map[string]string `yaml:"globalLabels,omitempty" json:"globalLabels,omitempty"`
GlobalEnv []string `yaml:"globalEnv,omitempty" json:"globalEnv,omitempty"`
Expand Down Expand Up @@ -328,6 +330,7 @@ type Node struct {
ServerOpts ServerOpts `yaml:"serverOpts" json:"serverOpts,omitempty"`
AgentOpts AgentOpts `yaml:"agentOpts" json:"agentOpts,omitempty"`
GPURequest string // filled automatically
Memory string // filled automatically
State NodeState // filled automatically
}

Expand Down
Loading