Skip to content

Commit

Permalink
Add ability to enable CDI to AgentBaker.
Browse files Browse the repository at this point in the history
  • Loading branch information
Alex Benn committed Mar 7, 2025
1 parent 47b9aa9 commit 6f053bf
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 10 deletions.
35 changes: 35 additions & 0 deletions aks-node-controller/parser/helper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,41 @@ oom_score = -999
X-Meta-Source-Client = ["azure/aks"]
[metrics]
address = "0.0.0.0:10257"
`)),
},
{
name: "Containerd Configurations with NVIDIA and CDI",
args: args{
aksnodeconfig: &aksnodeconfigv1.Configuration{
NeedsCgroupv2: ToPtr(true),
GpuConfig: &aksnodeconfigv1.GpuConfig{
EnableNvidia: ToPtr(true),
RequiresCdi: ToPtr(true),
},
},
noGpu: false,
},
want: base64.StdEncoding.EncodeToString([]byte(`version = 2
oom_score = -999
[plugins."io.containerd.grpc.v1.cri"]
enable_cdi = true
cdi_spec_dirs = ["/etc/cdi", "/var/run/cdi"]
sandbox_image = ""
[plugins."io.containerd.grpc.v1.cri".containerd]
default_runtime_name = "nvidia-container-runtime"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-container-runtime]
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-container-runtime.options]
BinaryName = "/usr/bin/nvidia-container-runtime"
SystemdCgroup = true
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.untrusted]
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.untrusted.options]
BinaryName = "/usr/bin/nvidia-container-runtime"
[plugins."io.containerd.grpc.v1.cri".registry.headers]
X-Meta-Source-Client = ["azure/aks"]
[metrics]
address = "0.0.0.0:10257"
`)),
},
}
Expand Down
4 changes: 4 additions & 0 deletions aks-node-controller/parser/templates/containerd.toml.gtpl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ version = 2
oom_score = -999{{if getHasDataDir .KubeletConfig}}
root = "{{.KubeletConfig.GetContainerDataDir}}"{{- end}}
[plugins."io.containerd.grpc.v1.cri"]
{{- if and ( .GetGpuConfig ) ( .GpuConfig.GetRequiresCdi ) }}
enable_cdi = true
cdi_spec_dirs = ["/etc/cdi", "/var/run/cdi"]
{{- end }}
sandbox_image = "{{ .KubeBinaryConfig.GetPodInfraContainerImageUrl }}"
[plugins."io.containerd.grpc.v1.cri".containerd]
{{- if .TeleportConfig.GetStatus }}
Expand Down
33 changes: 23 additions & 10 deletions aks-node-controller/pkg/gen/aksnodeconfig/v1/gpu_config.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions aks-node-controller/proto/aksnodeconfig/v1/gpu_config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,7 @@ message GpuConfig {

// Same as enable_nvidia, but for AMD GPUs.
optional bool enable_amd_gpu = 5;

// Whether this GPU configuration requires CDI (see https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html).
optional bool requires_cdi = 6;
}

0 comments on commit 6f053bf

Please sign in to comment.