Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions cmd/kepler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,9 +233,17 @@ func createCPUMeter(logger *slog.Logger, cfg *config.Config) (device.CPUPowerMet
logger.Info("rapl zones are filtered", "zones-enabled", cfg.Rapl.Zones)
}

// Convert config MSR settings to device MSRConfig
msrConfig := device.MSRConfig{
Enabled: cfg.MSR.Enabled,
Force: cfg.MSR.Force,
DevicePath: cfg.MSR.DevicePath,
}

return device.NewCPUPowerMeter(
cfg.Host.SysFS,
device.WithRaplLogger(logger),
device.WithZoneFilter(cfg.Rapl.Zones),
device.WithMSRConfig(msrConfig),
)
}
5 changes: 5 additions & 0 deletions compose/dev/kepler-dev/etc/kepler/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ host:
rapl:
zones: [] # zones to be enabled, empty enables all default zones

msr: # MSR fallback configuration for RAPL energy reading
enabled: false # enable automatic MSR fallback when powercap unavailable (default: false)
force: false # force MSR usage even if powercap available (testing only, default: false)
devicePath: /dev/cpu/%d/msr # MSR device path template (default: "/dev/cpu/%d/msr")

exporter:
stdout: # stdout exporter related config
enabled: false # disabled by default
Expand Down
35 changes: 35 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,16 @@ type (
Zones []string `yaml:"zones"`
}

// MSR configuration for fallback power reading
MSR struct {
// Enable automatic MSR fallback when powercap unavailable
Enabled *bool `yaml:"enabled"`
// Force MSR usage even if powercap available (testing)
Force *bool `yaml:"force"`
// MSR device path template
DevicePath string `yaml:"devicePath"`
}

// Development mode settings; disabled by default
Dev struct {
FakeCpuMeter struct {
Expand Down Expand Up @@ -98,6 +108,7 @@ type (
Host Host `yaml:"host"`
Monitor Monitor `yaml:"monitor"`
Rapl Rapl `yaml:"rapl"`
MSR MSR `yaml:"msr"`
Exporter Exporter `yaml:"exporter"`
Web Web `yaml:"web"`
Debug Debug `yaml:"debug"`
Expand Down Expand Up @@ -168,6 +179,12 @@ const (
// RAPL
RaplZones = "rapl.zones" // not a flag

// MSR - NOTE: MSR settings are not exposed as CLI flags per proposal
// They should only be configured via YAML files due to security implications
MSREnabled = "msr.enabled" // not a flag
MSRForce = "msr.force" // not a flag
MSRDevicePath = "msr.devicePath" // not a flag

pprofEnabledFlag = "debug.pprof"

WebConfigFlag = "web.config-file"
Expand Down Expand Up @@ -203,6 +220,11 @@ func DefaultConfig() *Config {
Rapl: Rapl{
Zones: []string{},
},
MSR: MSR{
Enabled: ptr.To(false), // Opt-in for security
Force: ptr.To(false),
DevicePath: "/dev/cpu/%d/msr",
},
Monitor: Monitor{
Interval: 5 * time.Second,
Staleness: 500 * time.Millisecond,
Expand Down Expand Up @@ -408,6 +430,9 @@ func (c *Config) sanitize() {
c.Rapl.Zones[i] = strings.TrimSpace(c.Rapl.Zones[i])
}

// MSR settings sanitization
c.MSR.DevicePath = strings.TrimSpace(c.MSR.DevicePath)

for i := range c.Exporter.Prometheus.DebugCollectors {
c.Exporter.Prometheus.DebugCollectors[i] = strings.TrimSpace(c.Exporter.Prometheus.DebugCollectors[i])
}
Expand Down Expand Up @@ -488,6 +513,16 @@ func (c *Config) Validate(skips ...SkipValidation) error {
errs = append(errs, fmt.Sprintf("invalid monitor min terminated energy threshold: %d can't be negative", c.Monitor.MinTerminatedEnergyThreshold))
}
}
{ // MSR settings
if c.MSR.DevicePath == "" {
errs = append(errs, "MSR device path cannot be empty")
} else {
// Basic validation that device path is a template
if !strings.Contains(c.MSR.DevicePath, "%d") {
errs = append(errs, "MSR device path must contain '%d' placeholder for CPU ID")
}
}
}
{ // Kubernetes
if ptr.Deref(c.Kube.Enabled, false) {
if c.Kube.Config != "" {
Expand Down
Loading
Loading