Skip to content

Commit a5f79dd

Browse files
Nvidia/Mellanox expose ROCE ECN information on sysfs on the path (#695)
/sys/class/net/<interface>/ecn/<protocol>/ There are 2 protocols Reaction Point (rp) and Notification point (np) For each of the protocols they have a list of attributes: /sys/class/net/<interface>/ecn/<protocol>/params/<requested attribute> Each protocol will also if ECN is enabled per priority (where X is the priority): /sys/class/net/<interface>/ecn/<protocol>/enable/X This is documented here https://docs.nvidia.com/networking/display/mlnxofedv571020/explicit+congestion+notification+(ecn) The attributes are documented here: https://enterprise-support.nvidia.com/s/article/dcqcn-parameters Signed-off-by: Diego Asturias <[email protected]>
1 parent c5a546e commit a5f79dd

File tree

3 files changed

+679
-0
lines changed

3 files changed

+679
-0
lines changed

sysfs/net_class_ecn.go

Lines changed: 387 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,387 @@
1+
// Copyright 2024 The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
//go:build linux
15+
// +build linux
16+
17+
package sysfs
18+
19+
import (
20+
"fmt"
21+
"os"
22+
"path/filepath"
23+
"strconv"
24+
25+
"github.com/prometheus/procfs/internal/util"
26+
)
27+
28+
// Documentation of the sysfs path
29+
// https://docs.nvidia.com/networking/display/mlnxofedv571020/explicit+congestion+notification+(ecn)
30+
// https://enterprise-support.nvidia.com/s/article/dcqcn-parameters
31+
32+
// Ecn contains values from /sys/class/net/<iface>/ecn/roce_np/
33+
// for single interface (iface).
34+
type RoceNpEcn struct {
35+
// A map from a priority to it's enabled status
36+
Ecn map[uint8]bool
37+
// Minimum time between sending CNPs from the port, in microseconds.
38+
// Range: 0-4095, Default: 4
39+
MinTimeBetweenCnps uint64
40+
// The DSCP value for CNPs.
41+
// Range: 0-63, Default: 48
42+
CnpDscp uint64
43+
// The PCP value for CNPs.
44+
// Range: 0-7, Default: 6
45+
Cnp802pPriority uint64
46+
}
47+
48+
// Ecn contains values from /sys/class/net/<iface>/ecn/roce_rp/
49+
// for single interface (iface).
50+
type RoceRpEcn struct {
51+
// A map from a priority to it's enabled status
52+
Ecn map[uint8]bool
53+
54+
// Alpha Update
55+
56+
// Every Alpha Update Period alpha is updated.
57+
// If the CNP is received during this period, alpha is incremented.
58+
// Otherwise, it is decremented.
59+
// Range: 0-1023, Default: 1019
60+
DceTCPG uint64
61+
// The Alpha Update Period used in the formula for DceTCPG. Unit is microseconds.
62+
// Range: 1-131071, Default: 1
63+
DceTCPRtt uint64
64+
// This parameter sets the initial value of alpha that should be used when receiving
65+
// the first CNP for a flow. Fixed point with 10 bits in the fraction part.
66+
// Range: 1-1023, Default: 1023
67+
InitialAlphaValue uint64
68+
69+
// Rate Decrease
70+
71+
// Rates (current, target) on first CNP (0 – 85% of line rate) in Mbps.
72+
// Range: 0, 1-line rate, Default: 0
73+
RateToSetOnFirstCnp uint64
74+
// This parameter defines the maximal ratio of rate decrease in a single event.
75+
// Range: 0-100, Default: 50
76+
RpgMinDecFac uint64
77+
// This parameter defines the minimal rate limit of the QP in Mbps.
78+
// Range: 1-line rate, Default: 1
79+
RpgMinRate uint64
80+
// The coefficient between alpha and the rate reduction factor.
81+
// Range: 10-11, Default: 11
82+
RpgGd uint64
83+
// The time period between rate reductions in microseconds.
84+
// Range: 0-UINT32, Default: 4
85+
RateReduceMonitorPeriod uint64
86+
87+
// Rate Increase
88+
89+
// If set, every rate decreases. The target rate is updated to the current rate.
90+
// Otherwise, the target rate is updated to the current rate only on the first
91+
// decrement after the increment event.
92+
ClampTgtRate bool
93+
// The time period between rate increase events in microseconds.
94+
// Range: 1-131071, Default: 300
95+
RpgTimeReset uint64
96+
// The sent bytes counter between rate increase events.
97+
// Range: 1-32767, Default: 32767
98+
RpgByteReset uint64
99+
// The threshold of rate increase events for moving to next rate increase phase.
100+
// Range: 1-31, Default: 1
101+
RpgThreshold uint64
102+
// The rate increase value in the Additive Increase phase in Mbps.
103+
// Range: 1-line rate, Default: 5
104+
RpgAiRate uint64
105+
// The rate increase value in the Hyper Increase phase in Mbps.
106+
// Range: 1-line rate, Default: 1
107+
RpgHaiRate uint64
108+
}
109+
110+
// EcnIface contains Ecn info from files in /sys/class/net/<iface>/ecn/
111+
// for single interface (iface).
112+
type EcnIface struct {
113+
Name string // Interface name
114+
// protocols
115+
RoceNpEcn RoceNpEcn // Notification point
116+
RoceRpEcn RoceRpEcn // Reaction point
117+
}
118+
119+
// AllEcnIface is collection of Ecn info for every interface (iface) in /sys/class/net.
120+
// The map keys are interface (iface) names.
121+
type AllEcnIface map[string]EcnIface
122+
123+
// EcnByIface returns info for a single net interfaces (iface).
124+
func (fs FS) EcnByIface(devicePath string) (*EcnIface, error) {
125+
_, err := fs.NetClassByIface(devicePath)
126+
if err != nil {
127+
return nil, err
128+
}
129+
130+
path := fs.sys.Path(netclassPath)
131+
ecnPath := filepath.Join(path, devicePath, "ecn")
132+
validPath, err := PathExistsAndIsDir(ecnPath)
133+
if err != nil {
134+
return nil, err
135+
}
136+
if !validPath {
137+
// this device doesn't have ECN values at this path
138+
return nil, fmt.Errorf("Does not have ECN values: %q", devicePath)
139+
}
140+
141+
ecnIface, err := ParseEcnIfaceInfo(ecnPath)
142+
if err != nil {
143+
return nil, err
144+
}
145+
ecnIface.Name = devicePath
146+
147+
return ecnIface, nil
148+
}
149+
150+
// EcnDevices returns EcnIface for all net interfaces (iface) read from /sys/class/net/<iface>/ecn.
151+
func (fs FS) EcnDevices() (AllEcnIface, error) {
152+
devices, err := fs.NetClassDevices()
153+
if err != nil {
154+
return nil, err
155+
}
156+
157+
path := fs.sys.Path(netclassPath)
158+
allEcnIface := AllEcnIface{}
159+
for _, devicePath := range devices {
160+
ecnPath := filepath.Join(path, devicePath, "ecn")
161+
validPath, err := PathExistsAndIsDir(ecnPath)
162+
if err != nil {
163+
return nil, err
164+
}
165+
if !validPath {
166+
// this device doesn't have ECN values at this path
167+
continue
168+
}
169+
ecnIface, err := ParseEcnIfaceInfo(ecnPath)
170+
if err != nil {
171+
return nil, err
172+
}
173+
ecnIface.Name = devicePath
174+
allEcnIface[devicePath] = *ecnIface
175+
}
176+
177+
return allEcnIface, nil
178+
}
179+
180+
// ParseEcnIfaceInfo scans predefined files in /sys/class/net/<iface>/ecn
181+
// directory and gets their contents.
182+
func ParseEcnIfaceInfo(ecnPath string) (*EcnIface, error) {
183+
ecnIface := EcnIface{}
184+
err := ParseRoceNpEcnInfo(filepath.Join(ecnPath, "roce_np"), &ecnIface.RoceNpEcn)
185+
if err != nil {
186+
return nil, err
187+
}
188+
189+
err = ParseRoceRpEcnInfo(filepath.Join(ecnPath, "roce_rp"), &ecnIface.RoceRpEcn)
190+
if err != nil {
191+
return nil, err
192+
}
193+
194+
return &ecnIface, nil
195+
}
196+
197+
// ParseEcnIfaceInfo scans predefined files in /sys/class/net/<iface>/ecn/roce_np/
198+
// directory and gets their contents.
199+
func ParseRoceNpEcnInfo(ecnPath string, ecn *RoceNpEcn) error {
200+
value, err := ParseEcnEnable(filepath.Join(ecnPath, "enable"))
201+
if err != nil {
202+
return err
203+
}
204+
ecn.Ecn = value
205+
206+
files, err := os.ReadDir(ecnPath)
207+
if err != nil {
208+
return err
209+
}
210+
211+
for _, f := range files {
212+
if !f.Type().IsRegular() {
213+
continue
214+
}
215+
if err := ParseRoceNpEcnAttribute(ecnPath, f.Name(), ecn); err != nil {
216+
return err
217+
}
218+
}
219+
return nil
220+
}
221+
222+
// Parses all of the attributes in for ROCE NP protocol.
223+
func ParseRoceNpEcnAttribute(ecnPath string, attrName string, ecn *RoceNpEcn) error {
224+
attrPath := filepath.Join(ecnPath, attrName)
225+
value, err := util.SysReadFile(attrPath)
226+
if err != nil {
227+
if canIgnoreError(err) {
228+
return nil
229+
}
230+
return fmt.Errorf("failed to read file %q: %w", attrPath, err)
231+
}
232+
233+
vp := util.NewValueParser(value)
234+
switch attrName {
235+
case "min_time_between_cnps":
236+
ecn.MinTimeBetweenCnps = *vp.PUInt64()
237+
case "cnp_802p_prio":
238+
ecn.Cnp802pPriority = *vp.PUInt64()
239+
case "cnp_dscp":
240+
ecn.CnpDscp = *vp.PUInt64()
241+
default:
242+
return nil
243+
}
244+
245+
return nil
246+
}
247+
248+
// ParseRoceRpEcnInfo scans predefined files in /sys/class/net/<iface>/ecn/roce_rp/
249+
// directory and gets their contents.
250+
func ParseRoceRpEcnInfo(ecnPath string, ecn *RoceRpEcn) error {
251+
value, err := ParseEcnEnable(filepath.Join(ecnPath, "enable"))
252+
if err != nil {
253+
return err
254+
}
255+
ecn.Ecn = value
256+
257+
files, err := os.ReadDir(ecnPath)
258+
if err != nil {
259+
return err
260+
}
261+
262+
for _, f := range files {
263+
if !f.Type().IsRegular() {
264+
continue
265+
}
266+
if err := ParseRoceRpEcnAttribute(ecnPath, f.Name(), ecn); err != nil {
267+
return err
268+
}
269+
}
270+
return nil
271+
}
272+
273+
// Parses all of the attributes in for ROCE RP protocol.
274+
func ParseRoceRpEcnAttribute(ecnPath string, attrName string, ecn *RoceRpEcn) error {
275+
attrPath := filepath.Join(ecnPath, attrName)
276+
value, err := util.SysReadFile(attrPath)
277+
if err != nil {
278+
if canIgnoreError(err) {
279+
return nil
280+
}
281+
return fmt.Errorf("failed to read file %q: %w", attrPath, err)
282+
}
283+
284+
vp := util.NewValueParser(value)
285+
switch attrName {
286+
case "clamp_tgt_rate":
287+
if *vp.PUInt64() == 0 {
288+
ecn.ClampTgtRate = false
289+
} else if *vp.PUInt64() == 1 {
290+
ecn.ClampTgtRate = true
291+
} else {
292+
return fmt.Errorf("failed to parse file %q: %w", attrPath, err)
293+
}
294+
case "dce_tcp_g":
295+
ecn.DceTCPG = *vp.PUInt64()
296+
case "dce_tcp_rtt":
297+
ecn.DceTCPRtt = *vp.PUInt64()
298+
case "initial_alpha_value":
299+
ecn.InitialAlphaValue = *vp.PUInt64()
300+
case "rate_reduce_monitor_period":
301+
ecn.RateReduceMonitorPeriod = *vp.PUInt64()
302+
case "rate_to_set_on_first_cnp":
303+
ecn.RateToSetOnFirstCnp = *vp.PUInt64()
304+
case "rpg_ai_rate":
305+
ecn.RpgAiRate = *vp.PUInt64()
306+
case "rpg_byte_reset":
307+
ecn.RpgByteReset = *vp.PUInt64()
308+
case "rpg_gd":
309+
ecn.RpgGd = *vp.PUInt64()
310+
case "rpg_hai_rate":
311+
ecn.RpgHaiRate = *vp.PUInt64()
312+
case "rpg_min_dec_fac":
313+
ecn.RpgMinDecFac = *vp.PUInt64()
314+
case "rpg_min_rate":
315+
ecn.RpgMinRate = *vp.PUInt64()
316+
case "rpg_threshold":
317+
ecn.RpgThreshold = *vp.PUInt64()
318+
case "rpg_time_reset":
319+
ecn.RpgTimeReset = *vp.PUInt64()
320+
default:
321+
return nil
322+
}
323+
324+
return nil
325+
}
326+
327+
// parses the ECN enable directory. It takes a path which should be a directory.
328+
// This directory should have filenames that are uint8 and the content of the file is
329+
// either 0 or 1.
330+
func ParseEcnEnable(path string) (map[uint8]bool, error) {
331+
// Read the files in the directory
332+
files, err := os.ReadDir(path)
333+
if err != nil {
334+
return nil, err
335+
}
336+
337+
ecn := make(map[uint8]bool)
338+
// Iterate through each file in the directory
339+
for _, file := range files {
340+
// Only process files (skip directories)
341+
if file.IsDir() {
342+
continue
343+
}
344+
345+
// Extract the file name (which should be the integer key)
346+
filename := file.Name()
347+
348+
// Attempt to convert the file name to an integer
349+
filenameInt, err := strconv.ParseUint(filename, 10, 8)
350+
if err != nil {
351+
// Skip the file if the name cannot be converted to an integer
352+
continue
353+
}
354+
355+
value, err := util.SysReadFile(filepath.Join(path, filename))
356+
if err != nil {
357+
if canIgnoreError(err) {
358+
return nil, err
359+
}
360+
return nil, fmt.Errorf("failed to read file %q: %w", filename, err)
361+
}
362+
363+
vp := util.NewValueParser(value)
364+
fileValue := *vp.PUInt64()
365+
if fileValue == 0 {
366+
ecn[uint8(filenameInt)] = false
367+
} else if fileValue == 1 {
368+
ecn[uint8(filenameInt)] = true
369+
} else {
370+
return nil, fmt.Errorf("failed to parse file %q: %q", filename, value)
371+
}
372+
}
373+
374+
return ecn, nil
375+
}
376+
377+
// Utility function that given a path will return if the path is a dir or not.
378+
func PathExistsAndIsDir(path string) (bool, error) {
379+
info, err := os.Stat(path)
380+
if err != nil {
381+
if os.IsNotExist(err) {
382+
return false, nil // Path does not exist
383+
}
384+
return false, err // Some other error occurred
385+
}
386+
return info.IsDir(), nil // Check if the path is a directory
387+
}

0 commit comments

Comments
 (0)