Skip to content

Commit f302340

Browse files
committed
Use jit-cdi mode for CSV systems
Signed-off-by: Evan Lezar <[email protected]>
1 parent bad48b6 commit f302340

File tree

5 files changed

+44
-26
lines changed

5 files changed

+44
-26
lines changed

internal/info/auto.go

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,10 @@ type RuntimeModeResolver interface {
5353
type modeResolver struct {
5454
logger logger.Interface
5555
// TODO: This only needs to consider the requested devices.
56-
image *image.CUDA
57-
propertyExtractor info.PropertyExtractor
58-
defaultMode RuntimeMode
56+
image *image.CUDA
57+
propertyExtractor info.PropertyExtractor
58+
defaultMode RuntimeMode
59+
forceCSVModeForTegraSystems bool
5960
}
6061

6162
type Option func(*modeResolver)
@@ -66,6 +67,12 @@ func WithDefaultMode(defaultMode RuntimeMode) Option {
6667
}
6768
}
6869

70+
func WithForceCSVModeForTegraSystems(forceCSVModeForTegraSystems bool) Option {
71+
return func(mr *modeResolver) {
72+
mr.forceCSVModeForTegraSystems = forceCSVModeForTegraSystems
73+
}
74+
}
75+
6976
func WithLogger(logger logger.Interface) Option {
7077
return func(mr *modeResolver) {
7178
mr.logger = logger
@@ -130,7 +137,10 @@ func (m *modeResolver) ResolveRuntimeMode(mode string) (rmode RuntimeMode) {
130137
case info.PlatformNVML, info.PlatformWSL:
131138
return m.defaultMode
132139
case info.PlatformTegra:
133-
return CSVRuntimeMode
140+
if m.forceCSVModeForTegraSystems {
141+
return CSVRuntimeMode
142+
}
143+
return JitCDIRuntimeMode
134144
}
135145
return m.defaultMode
136146
}

internal/info/auto_test.go

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -55,34 +55,34 @@ func TestResolveAutoMode(t *testing.T) {
5555
expectedMode: "jit-cdi",
5656
},
5757
{
58-
description: "non-nvml, non-tegra, nvgpu resolves to csv",
58+
description: "non-nvml, non-tegra, nvgpu resolves to jit-cdi",
5959
mode: "auto",
6060
info: map[string]bool{
6161
"nvml": false,
6262
"tegra": false,
6363
"nvgpu": true,
6464
},
65-
expectedMode: "csv",
65+
expectedMode: "jit-cdi",
6666
},
6767
{
68-
description: "non-nvml, tegra, non-nvgpu resolves to csv",
68+
description: "non-nvml, tegra, non-nvgpu resolves to jit-cdi",
6969
mode: "auto",
7070
info: map[string]bool{
7171
"nvml": false,
7272
"tegra": true,
7373
"nvgpu": false,
7474
},
75-
expectedMode: "csv",
75+
expectedMode: "jit-cdi",
7676
},
7777
{
78-
description: "non-nvml, tegra, nvgpu resolves to csv",
78+
description: "non-nvml, tegra, nvgpu resolves to jit-cdi",
7979
mode: "auto",
8080
info: map[string]bool{
8181
"nvml": false,
8282
"tegra": true,
8383
"nvgpu": true,
8484
},
85-
expectedMode: "csv",
85+
expectedMode: "jit-cdi",
8686
},
8787
{
8888
description: "nvml, non-tegra, non-nvgpu resolves to jit-cdi",
@@ -95,14 +95,14 @@ func TestResolveAutoMode(t *testing.T) {
9595
expectedMode: "jit-cdi",
9696
},
9797
{
98-
description: "nvml, non-tegra, nvgpu resolves to csv",
98+
description: "nvml, non-tegra, nvgpu resolves to jit-cdi",
9999
mode: "auto",
100100
info: map[string]bool{
101101
"nvml": true,
102102
"tegra": false,
103103
"nvgpu": true,
104104
},
105-
expectedMode: "csv",
105+
expectedMode: "jit-cdi",
106106
},
107107
{
108108
description: "nvml, tegra, non-nvgpu resolves to jit-cdi",
@@ -115,14 +115,14 @@ func TestResolveAutoMode(t *testing.T) {
115115
expectedMode: "jit-cdi",
116116
},
117117
{
118-
description: "nvml, tegra, nvgpu resolves to csv",
118+
description: "nvml, tegra, nvgpu resolves to jit-cdi",
119119
mode: "auto",
120120
info: map[string]bool{
121121
"nvml": true,
122122
"tegra": true,
123123
"nvgpu": true,
124124
},
125-
expectedMode: "csv",
125+
expectedMode: "jit-cdi",
126126
},
127127
{
128128
description: "cdi devices resolves to cdi",
@@ -154,7 +154,7 @@ func TestResolveAutoMode(t *testing.T) {
154154
expectedMode: "jit-cdi",
155155
},
156156
{
157-
description: "at least one non-cdi device resolves to csv",
157+
description: "at least one non-cdi device resolves to jit-cdi",
158158
mode: "auto",
159159
envmap: map[string]string{
160160
"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=0,0",
@@ -164,7 +164,7 @@ func TestResolveAutoMode(t *testing.T) {
164164
"tegra": true,
165165
"nvgpu": false,
166166
},
167-
expectedMode: "csv",
167+
expectedMode: "jit-cdi",
168168
},
169169
{
170170
description: "cdi mount devices resolves to CDI",

internal/modifier/cdi.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ func NewCDIModifier(logger logger.Interface, cfg *config.Config, image image.CUD
6565
automaticDevices = append(automaticDevices, withUniqueDevices(gatedDevices(image)).DeviceRequests()...)
6666
automaticDevices = append(automaticDevices, withUniqueDevices(imexDevices(image)).DeviceRequests()...)
6767

68-
automaticModifier, err := newAutomaticCDISpecModifier(logger, cfg, automaticDevices)
68+
automaticModifier, err := newAutomaticCDISpecModifier(logger, cfg, image, automaticDevices)
6969
if err == nil {
7070
return automaticModifier, nil
7171
}
@@ -163,9 +163,10 @@ func filterAutomaticDevices(devices []string) []string {
163163
return automatic
164164
}
165165

166-
func newAutomaticCDISpecModifier(logger logger.Interface, cfg *config.Config, devices []string) (oci.SpecModifier, error) {
166+
func newAutomaticCDISpecModifier(logger logger.Interface, cfg *config.Config, image image.CUDA, devices []string) (oci.SpecModifier, error) {
167167
logger.Debugf("Generating in-memory CDI specs for devices %v", devices)
168168

169+
csvFileList := getCSVFileList(cfg, image)
169170
cdiModeIdentifiers := cdiModeIdentfiersFromDevices(devices...)
170171

171172
logger.Debugf("Per-mode identifiers: %v", cdiModeIdentifiers)
@@ -179,6 +180,7 @@ func newAutomaticCDISpecModifier(logger logger.Interface, cfg *config.Config, de
179180
nvcdi.WithClass(cdiModeIdentifiers.deviceClassByMode[mode]),
180181
nvcdi.WithMode(mode),
181182
nvcdi.WithFeatureFlags(cfg.NVIDIAContainerRuntimeConfig.Modes.JitCDI.NVCDIFeatureFlags...),
183+
nvcdi.WithCSVFiles(csvFileList),
182184
)
183185
if err != nil {
184186
return nil, fmt.Errorf("failed to construct CDI library for mode %q: %w", mode, err)

internal/modifier/csv.go

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,7 @@ func NewCSVModifier(logger logger.Interface, cfg *config.Config, container image
4444
return nil, fmt.Errorf("requirements not met: %v", err)
4545
}
4646

47-
csvFiles, err := csv.GetFileList(cfg.NVIDIAContainerRuntimeConfig.Modes.CSV.MountSpecPath)
48-
if err != nil {
49-
return nil, fmt.Errorf("failed to get list of CSV files: %v", err)
50-
}
51-
52-
if container.Getenv(image.EnvVarNvidiaRequireJetpack) != "csv-mounts=all" {
53-
csvFiles = csv.BaseFilesOnly(csvFiles)
54-
}
47+
csvFiles := getCSVFileList(cfg, container)
5548

5649
cdilib, err := nvcdi.New(
5750
nvcdi.WithLogger(logger),
@@ -106,3 +99,14 @@ func checkRequirements(logger logger.Interface, image image.CUDA) error {
10699

107100
return r.Assert()
108101
}
102+
103+
func getCSVFileList(cfg *config.Config, container image.CUDA) []string {
104+
csvFiles, err := csv.GetFileList(cfg.NVIDIAContainerRuntimeConfig.Modes.CSV.MountSpecPath)
105+
if err != nil {
106+
return nil
107+
}
108+
if container.Getenv(image.EnvVarNvidiaRequireJetpack) != "csv-mounts=all" {
109+
csvFiles = csv.BaseFilesOnly(csvFiles)
110+
}
111+
return csvFiles
112+
}

internal/runtime/runtime_factory.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,8 @@ func initRuntimeModeAndImage(logger logger.Interface, cfg *config.Config, ociSpe
141141
modeResolver := info.NewRuntimeModeResolver(
142142
info.WithLogger(logger),
143143
info.WithImage(&image),
144+
// TODO: Add a feature flag.
145+
info.WithForceCSVModeForTegraSystems(false),
144146
)
145147
mode := modeResolver.ResolveRuntimeMode(cfg.NVIDIAContainerRuntimeConfig.Mode)
146148
// We update the mode here so that we can continue passing just the config to other functions.

0 commit comments

Comments
 (0)