Skip to content

Commit 3bdbcc6

Browse files
committed
Handle multiple GPUs in CDI spec generation from CSV
This change allows CDI specs to be generated for multiple devices when using CSV mode. This can be used in cases where a Tegra-based system consists of an iGPU and dGPU. This behavior can be opted out of using the disable-multiple-csv-devices feature flag. This can be specified by adding the --feaure-flags=disable-multiple-csv-devices command line option to the nvidia-ctk cdi generate command or to the automatic CDI spec generation by adding NVIDIA_CTK_CDI_GENERATE_FEATURE_FLAGS=disable-multiple-csv-devices to the /etc/nvidia-container-toolkit/nvidia-cdi-refresh.env file. Signed-off-by: Evan Lezar <[email protected]>
1 parent 0f4bd44 commit 3bdbcc6

File tree

5 files changed

+297
-39
lines changed

5 files changed

+297
-39
lines changed

pkg/nvcdi/api.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,4 +88,8 @@ const (
8888
// FeatureEnableCoherentAnnotations enables the addition of annotations
8989
// coherent or non-coherent devices.
9090
FeatureEnableCoherentAnnotations = FeatureFlag("enable-coherent-annotations")
91+
92+
// FeatureDisableMultipleCSVDevices disables the handling of multiple devices
93+
// in CSV mode.
94+
FeatureDisableMultipleCSVDevices = FeatureFlag("disable-multiple-csv-devices")
9195
)

pkg/nvcdi/common-nvml.go

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,16 +25,7 @@ import (
2525
// newCommonNVMLDiscoverer returns a discoverer for entities that are not associated with a specific CDI device.
2626
// This includes driver libraries and meta devices, for example.
2727
func (l *nvmllib) newCommonNVMLDiscoverer() (discover.Discover, error) {
28-
metaDevices := discover.NewCharDeviceDiscoverer(
29-
l.logger,
30-
l.devRoot,
31-
[]string{
32-
"/dev/nvidia-modeset",
33-
"/dev/nvidia-uvm-tools",
34-
"/dev/nvidia-uvm",
35-
"/dev/nvidiactl",
36-
},
37-
)
28+
metaDevices := l.controlDeviceNodeDiscoverer()
3829

3930
graphicsMounts, err := discover.NewGraphicsMountsDiscoverer(l.logger, l.driver, l.hookCreator)
4031
if err != nil {
@@ -54,3 +45,16 @@ func (l *nvmllib) newCommonNVMLDiscoverer() (discover.Discover, error) {
5445

5546
return d, nil
5647
}
48+
49+
func (l *nvmllib) controlDeviceNodeDiscoverer() discover.Discover {
50+
return discover.NewCharDeviceDiscoverer(
51+
l.logger,
52+
l.devRoot,
53+
[]string{
54+
"/dev/nvidia-modeset",
55+
"/dev/nvidia-uvm-tools",
56+
"/dev/nvidia-uvm",
57+
"/dev/nvidiactl",
58+
},
59+
)
60+
}

pkg/nvcdi/full-gpu-nvml.go

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ type fullGPUDeviceSpecGenerator struct {
3737
uuid string
3838
index int
3939

40-
featureFlags map[FeatureFlag]bool
40+
featureFlags map[FeatureFlag]bool
41+
additionalDiscoverers []discover.Discover
4142
}
4243

4344
var _ DeviceSpecGenerator = (*fullGPUDeviceSpecGenerator)(nil)
@@ -145,7 +146,6 @@ func (l *fullGPUDeviceSpecGenerator) getDeviceEdits() (*cdi.ContainerEdits, erro
145146
if err != nil {
146147
return nil, fmt.Errorf("failed to create device discoverer: %v", err)
147148
}
148-
149149
editsForDevice, err := edits.FromDiscoverer(deviceDiscoverer)
150150
if err != nil {
151151
return nil, fmt.Errorf("failed to create container edits for device: %v", err)
@@ -177,10 +177,18 @@ func (l *fullGPUDeviceSpecGenerator) newFullGPUDiscoverer(d device.Device) (disc
177177
deviceNodes,
178178
)
179179

180-
dd := discover.Merge(
180+
var discoverers []discover.Discover
181+
182+
discoverers = append(discoverers,
181183
deviceNodes,
182184
deviceFolderPermissionHooks,
183185
)
184186

187+
discoverers = append(discoverers, l.additionalDiscoverers...)
188+
189+
dd := discover.Merge(
190+
discoverers...,
191+
)
192+
185193
return dd, nil
186194
}

0 commit comments

Comments
 (0)