diff --git a/cmd/kepler/main.go b/cmd/kepler/main.go index 6f28961cac..89abb9ae31 100644 --- a/cmd/kepler/main.go +++ b/cmd/kepler/main.go @@ -233,9 +233,17 @@ func createCPUMeter(logger *slog.Logger, cfg *config.Config) (device.CPUPowerMet logger.Info("rapl zones are filtered", "zones-enabled", cfg.Rapl.Zones) } + // Convert config MSR settings to device MSRConfig + msrConfig := device.MSRConfig{ + Enabled: cfg.MSR.Enabled, + Force: cfg.MSR.Force, + DevicePath: cfg.MSR.DevicePath, + } + return device.NewCPUPowerMeter( cfg.Host.SysFS, device.WithRaplLogger(logger), device.WithZoneFilter(cfg.Rapl.Zones), + device.WithMSRConfig(msrConfig), ) } diff --git a/compose/dev/kepler-dev/etc/kepler/config.yaml b/compose/dev/kepler-dev/etc/kepler/config.yaml index 41048a5509..3f8bd7004d 100644 --- a/compose/dev/kepler-dev/etc/kepler/config.yaml +++ b/compose/dev/kepler-dev/etc/kepler/config.yaml @@ -35,6 +35,11 @@ host: rapl: zones: [] # zones to be enabled, empty enables all default zones +msr: # MSR fallback configuration for RAPL energy reading + enabled: false # enable automatic MSR fallback when powercap unavailable (default: false) + force: false # force MSR usage even if powercap available (testing only, default: false) + devicePath: /dev/cpu/%d/msr # MSR device path template (default: "/dev/cpu/%d/msr") + exporter: stdout: # stdout exporter related config enabled: false # disabled by default diff --git a/config/config.go b/config/config.go index c91f495cfb..e40c716578 100644 --- a/config/config.go +++ b/config/config.go @@ -33,6 +33,16 @@ type ( Zones []string `yaml:"zones"` } + // MSR configuration for fallback power reading + MSR struct { + // Enable automatic MSR fallback when powercap unavailable + Enabled *bool `yaml:"enabled"` + // Force MSR usage even if powercap available (testing) + Force *bool `yaml:"force"` + // MSR device path template + DevicePath string `yaml:"devicePath"` + } + // Development mode settings; disabled by default Dev struct { FakeCpuMeter struct { @@ -98,6 +108,7 @@ type ( Host Host `yaml:"host"` Monitor Monitor `yaml:"monitor"` Rapl Rapl `yaml:"rapl"` + MSR MSR `yaml:"msr"` Exporter Exporter `yaml:"exporter"` Web Web `yaml:"web"` Debug Debug `yaml:"debug"` @@ -168,6 +179,12 @@ const ( // RAPL RaplZones = "rapl.zones" // not a flag + // MSR - NOTE: MSR settings are not exposed as CLI flags per proposal + // They should only be configured via YAML files due to security implications + MSREnabled = "msr.enabled" // not a flag + MSRForce = "msr.force" // not a flag + MSRDevicePath = "msr.devicePath" // not a flag + pprofEnabledFlag = "debug.pprof" WebConfigFlag = "web.config-file" @@ -203,6 +220,11 @@ func DefaultConfig() *Config { Rapl: Rapl{ Zones: []string{}, }, + MSR: MSR{ + Enabled: ptr.To(false), // Opt-in for security + Force: ptr.To(false), + DevicePath: "/dev/cpu/%d/msr", + }, Monitor: Monitor{ Interval: 5 * time.Second, Staleness: 500 * time.Millisecond, @@ -408,6 +430,9 @@ func (c *Config) sanitize() { c.Rapl.Zones[i] = strings.TrimSpace(c.Rapl.Zones[i]) } + // MSR settings sanitization + c.MSR.DevicePath = strings.TrimSpace(c.MSR.DevicePath) + for i := range c.Exporter.Prometheus.DebugCollectors { c.Exporter.Prometheus.DebugCollectors[i] = strings.TrimSpace(c.Exporter.Prometheus.DebugCollectors[i]) } @@ -488,6 +513,16 @@ func (c *Config) Validate(skips ...SkipValidation) error { errs = append(errs, fmt.Sprintf("invalid monitor min terminated energy threshold: %d can't be negative", c.Monitor.MinTerminatedEnergyThreshold)) } } + { // MSR settings + if c.MSR.DevicePath == "" { + errs = append(errs, "MSR device path cannot be empty") + } else { + // Basic validation that device path is a template + if !strings.Contains(c.MSR.DevicePath, "%d") { + errs = append(errs, "MSR device path must contain '%d' placeholder for CPU ID") + } + } + } { // Kubernetes if ptr.Deref(c.Kube.Enabled, false) { if c.Kube.Config != "" { diff --git a/docs/developer/proposal/EP-002-MSR-Fallback-Power-Meter.md b/docs/developer/proposal/EP-002-MSR-Fallback-Power-Meter.md new file mode 100644 index 0000000000..c23a581a13 --- /dev/null +++ b/docs/developer/proposal/EP-002-MSR-Fallback-Power-Meter.md @@ -0,0 +1,423 @@ +# EP-002: MSR Fallback for CPU Power Meter + +**Status**: Draft +**Author**: Kepler Development Team +**Created**: 2025-01-12 +**Last Updated**: 2025-01-12 + +## Summary + +This proposal introduces MSR (Model Specific Register) support as a fallback +mechanism for CPU power monitoring in Kepler when the primary powercap sysfs +interface (/sys/class/powercap/intel-rapl) is unavailable. The enhancement +maintains backward compatibility while improving Kepler's resilience in +environments where powercap is disabled or inaccessible, such as certain +container runtimes or kernel configurations. + +## Problem Statement + +Kepler currently relies exclusively on the Linux powercap sysfs interface for +reading Intel RAPL energy counters. This creates deployment limitations in +environments where: + +1. The powercap interface is disabled by kernel configuration +2. Container runtimes don't expose /sys/class/powercap +3. Systems with custom kernel builds lacking powercap support +4. Virtualized environments without powercap passthrough + +These limitations prevent Kepler from collecting energy metrics in otherwise +capable hardware, reducing its effectiveness as a universal energy monitoring +solution. + +### Current Limitations + +1. **Single Interface Dependency**: Complete reliance on powercap sysfs with no + fallback mechanism +2. **Deployment Restrictions**: Cannot deploy in environments without powercap + access +3. **Container Limitations**: Difficulty mounting /sys/class/powercap in certain + container security policies +4. **No Graceful Degradation**: Complete failure rather than fallback when + powercap unavailable + +## Goals + +- **Primary Goal**: Implement MSR-based RAPL reading as automatic fallback when + powercap is unavailable +- **Secondary Goal**: Maintain existing CPUPowerMeter interface compatibility +- **Tertiary Goal**: Provide configurable control over fallback behavior for + security-conscious deployments + +## Non-Goals + +- Supporting non-Intel architectures (AMD RAPL, ARM PMU) +- Replacing powercap as the primary interface +- Implementing model-specific optimizations +- Supporting pre-Sandy Bridge Intel CPUs +- Real-time power capping functionality + +## Requirements + +### Functional Requirements + +- Automatically detect powercap availability and fallback to MSR when needed +- Read energy values from MSR registers: PKG (0x611), PP0 (0x639), DRAM (0x619) +- Handle 32-bit MSR counter overflow correctly +- Map MSR zones to existing EnergyZone interface (package, core, dram) +- Support multi-socket systems with per-CPU MSR access +- Maintain energy unit conversion compatibility + +### Non-Functional Requirements + +- **Performance**: MSR reading overhead < 100μs per sample +- **Reliability**: Handle MSR module loading/unloading gracefully +- **Security**: Document and mitigate PLATYPUS attack vectors +- **Maintainability**: Minimal code duplication with existing RAPL implementation +- **Testability**: Support fake MSR implementation for testing + +## Proposed Solution + +### High-Level Architecture + +```mermaid +graph TB + CPUPowerMeter[CPUPowerMeter Interface] + + raplPowerMeter[raplPowerMeter
Enhanced with MSR] + + powercapReader[powercapReader
Primary] + msrReader[msrReader
Fallback] + zoneAdapter[Zone
Adapter] + + powercap[/sys/class/
powercap/] + msrdev["/dev/cpu/*/msr"] + zones[Energy
Zones] + + CPUPowerMeter --> raplPowerMeter + raplPowerMeter --> powercapReader + raplPowerMeter --> msrReader + raplPowerMeter --> zoneAdapter + + powercapReader --> powercap + msrReader --> msrdev + zoneAdapter --> zones + + style CPUPowerMeter fill:#e1f5fe + style raplPowerMeter fill:#b3e5fc + style powercapReader fill:#81d4fa + style msrReader fill:#ffccbc + style zoneAdapter fill:#c5e1a5 +``` + +### Key Design Choices + +1. **Extend Existing Implementation**: Enhance `raplPowerMeter` rather than + creating separate implementation to maximize code reuse +2. **Interface-Based Abstraction**: Create `powerReader` interface for both + powercap and MSR backends +3. **Automatic Detection**: Check powercap availability in Init() and select + appropriate backend +4. **Opt-In MSR Fallback**: Require explicit configuration to enable MSR due to + security implications +5. **Reuse Energy Zone Logic**: Share zone management between implementations + +## Detailed Design + +### Package Structure + +```text +internal/ +├── device/ +│ ├── cpu_power_meter.go # Interface (unchanged) +│ ├── rapl_power_meter.go # Enhanced with MSR support +│ ├── power_reader.go # New abstraction interface +│ ├── powercap_reader.go # Extracted powercap logic +│ ├── msr_reader.go # New MSR implementation +│ ├── msr_reader_test.go # MSR unit tests +│ └── testdata/ +│ └── msr/ # MSR test fixtures +└── config/ + └── config.go # MSR configuration options +``` + +### API/Interface Changes + +```go +// power_reader.go - New internal abstraction +type powerReader interface { + // ReadEnergy reads energy value for a specific zone + ReadEnergy(zone EnergyZone) (Energy, error) + // Available checks if the reader can be used + Available() bool + // Init initializes the reader + Init() error + // Close releases resources + Close() error +} + +// msr_reader.go - MSR implementation +type msrReader struct { + msrFiles map[int]*os.File // CPU ID -> MSR file handle + energyUnit float64 // Energy unit from IA32_RAPL_POWER_UNIT + zones []EnergyZone // Available zones + mu sync.RWMutex // Thread safety +} + +// Enhanced rapl_power_meter.go +type raplPowerMeter struct { + reader powerReader // Abstracted backend + zones []EnergyZone + strategy EnergyStrategy + useMSR bool // Track which backend is active +} +``` + +## Configuration + +### Main Configuration Changes + +```go +// config/config.go additions +type DeviceConfig struct { + // Existing fields... + MSR MSRConfig `yaml:"msr"` +} + +type MSRConfig struct { + // Enable automatic MSR fallback when powercap unavailable + Enabled *bool `yaml:"enabled"` + // Force MSR usage even if powercap available (testing) + Force *bool `yaml:"force"` + // MSR device path template + DevicePath string `yaml:"devicePath"` +} + +// Default configuration +func defaultMSRConfig() MSRConfig { + return MSRConfig{ + Enabled: ptr(false), // Opt-in for security + Force: ptr(false), + DevicePath: "/dev/cpu/%d/msr", + } +} +``` + +### New Configuration File (if applicable) + +```yaml +# Example: hack/config.yaml +msr: + enabled: true + force: false + devicePath: "/dev/cpu/%d/msr" +``` + +### Security Considerations + +**Critical Security Warning**: MSR access enables PLATYPUS side-channel attacks +(CVE-2020-8694/8695) allowing unprivileged users to infer data from other +processes. + +**Mitigations**: + +1. MSR fallback disabled by default (opt-in configuration) +2. Require CAP_SYS_RAWIO capability instead of full root +3. Document security implications prominently +4. Add warning logs when MSR fallback is activated +5. Consider rate-limiting MSR reads to reduce attack surface + +## Deployment Examples + +### Kubernetes Environment + +```yaml +# DaemonSet with MSR support +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: kepler +spec: + template: + spec: + containers: + - name: kepler + image: kepler:msr-enabled + args: + - --config=/etc/kepler/config.yaml + volumeMounts: + - name: dev + mountPath: /dev + readOnly: true + - name: config + mountPath: /etc/kepler + securityContext: + privileged: true # Required for MSR access + volumes: + - name: dev + hostPath: + path: /dev + - name: config + configMap: + name: kepler-config +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: kepler-config +data: + config.yaml: | + msr: + enabled: true +``` + +### Standalone Deployment + +```bash +# Load MSR kernel module +sudo modprobe msr + +# Run with MSR fallback enabled +sudo ./bin/kepler --config hack/config.yaml + +# Force MSR for testing (ignores powercap) - configured via YAML only +# Set msr.force: true in config file +``` + +## Testing Strategy + +### Test Coverage + +- **Unit Tests**: MSR reader with mock file operations (85% coverage target) +- **Integration Tests**: Fallback detection and switching logic +- **End-to-End Tests**: Energy attribution with MSR backend +- **Benchmark Tests**: MSR vs sysfs performance comparison + +### Test Infrastructure + +```go +// Fake MSR implementation for testing +type fakeMSRReader struct { + energyValues map[EnergyZone]Energy + available bool +} + +// Test fixtures in testdata/msr/ +// - Mock MSR device files +// - Predefined energy values +// - Overflow scenarios +``` + +## Migration and Compatibility + +### Backward Compatibility + +This enhancement maintains full backward compatibility: + +- Existing deployments continue using powercap by default +- No changes to external APIs or metrics +- Configuration changes are additive only +- Existing tests remain valid + +### Migration Path + +1. **Phase 1**: Deploy with MSR disabled (default) - verify no regression +2. **Phase 2**: Enable MSR fallback in staging environments +3. **Phase 3**: Gradual rollout to production with monitoring + +### Rollback Strategy + +1. Disable MSR fallback via configuration (immediate effect) +2. Revert to previous Kepler version if issues persist +3. MSR can be disabled without restart via dynamic config reload + +## Metrics Output + +```prometheus +# New metric indicating active power meter backend +kepler_power_meter_backend{backend="powercap|msr"} 1 + +# Existing metrics unchanged +kepler_node_package_energy_millijoule{node="node1"} 12345 +kepler_node_core_energy_millijoule{node="node1"} 6789 +kepler_node_dram_energy_millijoule{node="node1"} 3456 +``` + +## Implementation Plan + +### Phase 1: Foundation (Week 1-2) + +- Implement `powerReader` interface abstraction +- Extract existing powercap logic to `powercapReader` +- Create basic `msrReader` structure +- Add MSR configuration options + +### Phase 2: Core Functionality (Week 3-4) + +- Implement MSR register reading logic +- Add energy unit conversion +- Handle counter overflow +- Implement zone mapping +- Add fallback detection logic + +### Phase 3: Testing and Documentation (Week 5-6) + +- Comprehensive unit tests +- Integration testing +- Performance benchmarking +- Security documentation +- Update deployment guides + +## Risks and Mitigations + +### Technical Risks + +- **Risk**: MSR kernel module not available + - **Mitigation**: Graceful degradation with clear error messages + +- **Risk**: MSR counter overflow handling errors + - **Mitigation**: Extensive testing with overflow scenarios + +- **Risk**: Performance regression from abstraction + - **Mitigation**: Benchmark and optimize hot paths + +### Operational Risks + +- **Risk**: Security vulnerabilities from MSR access + - **Mitigation**: Disabled by default, clear documentation + +- **Risk**: Increased complexity for operators + - **Mitigation**: Automatic detection minimizes configuration + +## Alternatives Considered + +### Alternative 1: Separate MSR Power Meter Implementation + +- **Description**: Create independent `msrPowerMeter` implementing + `CPUPowerMeter` +- **Reason for Rejection**: High code duplication, difficult to maintain + consistency + +### Alternative 2: eBPF-based Power Monitoring + +- **Description**: Use eBPF to intercept RAPL MSR reads from other processes +- **Reason for Rejection**: Complex implementation, limited kernel support, + security concerns + +### Alternative 3: Intel Power Gadget Integration + +- **Description**: Use Intel's official Power Gadget API +- **Reason for Rejection**: Not available on Linux, requires proprietary + libraries + +## Success Metrics + +- **Functional Metric**: Successfully read power through MSR in environments where + powercap is unavailable +- **Performance Metric**: MSR reading overhead < 100μs (50% faster than powercap) +- **Adoption Metric**: 20% of deployments utilize MSR fallback within 6 months + +## Open Questions + +1. Should we support AMD's RAPL MSRs in the same implementation? +2. Should MSR support be compiled conditionally for security-sensitive builds? +3. What telemetry should we add to track MSR fallback usage in production? diff --git a/docs/developer/proposal/index.md b/docs/developer/proposal/index.md index 861f7843be..7edc595a88 100644 --- a/docs/developer/proposal/index.md +++ b/docs/developer/proposal/index.md @@ -6,7 +6,8 @@ This directory contains Enhancement Proposals (EPs) for major features and chang | ID | Title | Status | Author | Created | |----|-------|--------|--------|---------| -| [EP-000](EP_TEMPLATE.md) | Enhancement Proposal Template | Accepted |Sunil Thaha | 2025-01-18 | +| [EP-000](EP_TEMPLATE.md) | Enhancement Proposal Template | Accepted | Sunil Thaha | 2025-01-18 | +| [EP-002](EP-002-MSR-Fallback-Power-Meter.md) | MSR Fallback for CPU Power Meter | Draft | Kepler Development Team | 2025-08-12 | ## Proposal Status diff --git a/docs/user/configuration.md b/docs/user/configuration.md index 1c803e33f0..eaab98a0b0 100644 --- a/docs/user/configuration.md +++ b/docs/user/configuration.md @@ -95,6 +95,11 @@ host: rapl: zones: [] # RAPL zones to be enabled, empty enables all default zones +msr: # MSR fallback configuration for RAPL energy reading + enabled: false # Enable automatic MSR fallback when powercap unavailable + force: false # Force MSR usage even if powercap available (testing) + devicePath: "/dev/cpu/%d/msr" # MSR device path template + exporter: stdout: # stdout exporter related config enabled: false # disabled by default @@ -195,6 +200,41 @@ rapl: zones: ["package", "core", "uncore"] ``` +### 🔌 MSR Configuration + +```yaml +msr: + enabled: false # Enable automatic MSR fallback + force: false # Force MSR usage for testing + devicePath: "/dev/cpu/%d/msr" # MSR device path template +``` + +Model Specific Register (MSR) support provides a fallback mechanism for reading Intel RAPL energy counters when the Linux powercap sysfs interface is unavailable. + +- **enabled**: Enable automatic MSR fallback when powercap is unavailable + - Default: `false` (opt-in for security reasons) + - When enabled, Kepler will automatically fall back to MSR if powercap fails + - Requires appropriate permissions and hardware support + +- **force**: Force MSR usage even when powercap is available + - Default: `false` + - Primarily for testing and development purposes + - When `true`, MSR will be used regardless of powercap availability + +- **devicePath**: Template for MSR device file paths + - Default: `"/dev/cpu/%d/msr"` + - The `%d` placeholder is replaced with the CPU number + - Must be accessible with appropriate permissions + +⚠️ **Security Note**: MSR access requires elevated privileges and may be restricted on some systems due to security considerations (PLATYPUS attacks, CVE-2020-8694/8695). Use MSR configuration only when necessary and ensure proper system security measures are in place. + +**Prerequisites for MSR support:** + +- Intel CPU with RAPL support +- `msr` kernel module loaded (`modprobe msr`) +- Read access to `/dev/cpu/*/msr` files +- Elevated privileges (typically root) + ### 📦 Exporter Configuration ```yaml diff --git a/hack/config.yaml b/hack/config.yaml index 8beb0587a1..978a8d171c 100644 --- a/hack/config.yaml +++ b/hack/config.yaml @@ -35,6 +35,11 @@ host: rapl: zones: [] # zones to be enabled, empty enables all default zones +msr: # MSR fallback configuration for RAPL energy reading + enabled: false # enable automatic MSR fallback when powercap unavailable (default: false) + force: false # force MSR usage even if powercap available (testing only, default: false) + devicePath: /dev/cpu/%d/msr # MSR device path template (default: "/dev/cpu/%d/msr") + exporter: stdout: # stdout exporter related config enabled: false # disabled by default diff --git a/internal/device/mock_cpu_power_meter.go b/internal/device/mock_cpu_power_meter.go index 7c3e8717ce..4111f8e70e 100644 --- a/internal/device/mock_cpu_power_meter.go +++ b/internal/device/mock_cpu_power_meter.go @@ -5,14 +5,6 @@ package device // TODO: Move this mock to a separate testutil package -import ( - "slices" - "testing" - - "github.com/prometheus/procfs/sysfs" - "github.com/stretchr/testify/require" -) - const ( validSysFSPath = "testdata/sys" badSysFSPath = "testdata/bad_sysfs" @@ -67,27 +59,3 @@ func (m *MockRaplZone) OnEnergy(j Energy, err error) { func (m *MockRaplZone) Inc(delta Energy) { m.energy = (m.energy + delta) % m.maxMicroJoules } - -func validSysFSFixtures(t *testing.T) sysfs.FS { - t.Helper() - fs, err := sysfs.NewFS(validSysFSPath) - require.NoError(t, err, "Failed to create sysfs test FS") - return fs -} - -func invalidSysFSFixtures(t *testing.T) sysfs.FS { - t.Helper() - fs, err := sysfs.NewFS(badSysFSPath) - require.NoError(t, err, "Failed to create sysfs test FS") - return fs -} - -func sortedZoneNames(zones []EnergyZone) []string { - names := make([]string, len(zones)) - for i, zone := range zones { - names[i] = zone.Name() - } - slices.Sort(names) - - return names -} diff --git a/internal/device/msr_reader.go b/internal/device/msr_reader.go new file mode 100644 index 0000000000..d284ab5fa4 --- /dev/null +++ b/internal/device/msr_reader.go @@ -0,0 +1,290 @@ +// SPDX-FileCopyrightText: 2025 The Kepler Authors +// SPDX-License-Identifier: Apache-2.0 + +package device + +import ( + "fmt" + "log/slog" + "os" + "path/filepath" + "sort" + "strconv" + "sync" +) + +// msrReader implements raplReader using Intel MSR (Model Specific Register) interface +type msrReader struct { + msrFiles map[int]*os.File // CPU ID -> MSR file handle + zones []EnergyZone // Available energy zones + energyUnit float64 // Energy unit in microjoules per LSB + devicePath string // MSR device path template + logger *slog.Logger + mu sync.RWMutex // Thread safety for zone operations +} + +// MSR zone configuration mapping zone names to MSR offsets +var msrZoneConfig = map[string]uint32{ + ZonePackage: MSRPkgEnergyStatus, + ZonePP0: MSRPP0EnergyStatus, // Maps to "core" zone + ZoneDRAM: MSRDRAMEnergyStatus, +} + +// zoneNameMapping maps MSR zone names to standard RAPL zone names +var zoneNameMapping = map[string]string{ + ZonePP0: ZoneCore, // PP0 (Power Plane 0) is the core domain +} + +// NewMSRReader creates a new MSR reader using the specified device path template +func NewMSRReader(devicePath string, logger *slog.Logger) *msrReader { + if logger == nil { + logger = slog.Default() + } + + return &msrReader{ + msrFiles: make(map[int]*os.File), + devicePath: devicePath, + logger: logger.With("service", "msr-reader"), + } +} + +// Name returns the name of this power reader implementation +func (m *msrReader) Name() string { + return "msr" +} + +// Available checks if MSR interface is available on this system +func (m *msrReader) Available() bool { + // Derive CPU directory from devicePath (e.g., "/dev/cpu/%d/msr" -> "/dev/cpu") + cpuDir := filepath.Dir(filepath.Dir(m.devicePath)) + + // Check if CPU directory exists + if _, err := os.Stat(cpuDir); os.IsNotExist(err) { + m.logger.Debug("MSR not available: CPU directory does not exist", "dir", cpuDir) + return false + } + + // Check if we can find at least one CPU with MSR access + // This validates that MSR interface is not just present but usable + cpuIDs, err := m.findAvailableCPUs() + if err != nil { + m.logger.Debug("MSR not available: failed to scan for CPUs", "error", err) + return false + } + + if len(cpuIDs) == 0 { + m.logger.Debug("MSR not available: no CPUs with MSR access found") + return false + } + + return true +} + +// Init initializes the MSR reader and opens MSR files for all available CPUs +func (m *msrReader) Init() error { + if !m.Available() { + return fmt.Errorf("MSR interface not available") + } + + // Find available CPUs + cpuIDs, err := m.findAvailableCPUs() + if err != nil { + return fmt.Errorf("failed to find available CPUs: %w", err) + } + + if len(cpuIDs) == 0 { + return fmt.Errorf("no CPUs with MSR access found") + } + + // Open MSR files for all CPUs + for _, cpuID := range cpuIDs { + msrPath := fmt.Sprintf(m.devicePath, cpuID) + file, err := os.OpenFile(msrPath, os.O_RDONLY, 0) + if err != nil { + // Clean up any previously opened files + if closeErr := m.Close(); closeErr != nil { + m.logger.Warn("Failed to close MSR files", "error", closeErr) + } + return fmt.Errorf("failed to open MSR file %s: %w", msrPath, err) + } + m.msrFiles[cpuID] = file + } + + // Read energy unit from the first CPU + firstCPU := cpuIDs[0] + energyUnit, err := readEnergyUnit(m.msrFiles[firstCPU]) + if err != nil { + if closeErr := m.Close(); closeErr != nil { + m.logger.Warn("Failed to close MSR files", "error", closeErr) + } + return fmt.Errorf("failed to read energy unit from CPU %d: %w", firstCPU, err) + } + m.energyUnit = energyUnit + + // Create zones for all available MSR energy counters + if err := m.createZones(); err != nil { + if closeErr := m.Close(); closeErr != nil { + m.logger.Warn("Failed to close MSR files", "error", closeErr) + } + return fmt.Errorf("failed to create MSR zones: %w", err) + } + + m.logger.Info("MSR reader initialized", + "cpus", len(m.msrFiles), + "zones", len(m.zones), + "energy_unit_uj", m.energyUnit) + + return nil +} + +// Zones returns the list of MSR-based energy zones +func (m *msrReader) Zones() ([]EnergyZone, error) { + m.mu.RLock() + defer m.mu.RUnlock() + + if len(m.zones) == 0 { + return nil, fmt.Errorf("MSR reader not initialized or no zones available") + } + + // Return a copy to prevent external modification + zones := make([]EnergyZone, len(m.zones)) + copy(zones, m.zones) + return zones, nil +} + +// Close closes all MSR files and releases resources +func (m *msrReader) Close() error { + var lastErr error + + for cpuID, file := range m.msrFiles { + if err := file.Close(); err != nil { + lastErr = err + m.logger.Warn("Failed to close MSR file", "cpu", cpuID, "error", err) + } + } + + // Clear the map + m.msrFiles = make(map[int]*os.File) + m.zones = nil + + return lastErr +} + +// findAvailableCPUs finds all CPUs that have MSR device files +func (m *msrReader) findAvailableCPUs() ([]int, error) { + // Derive CPU directory from devicePath (e.g., "/dev/cpu/%d/msr" -> "/dev/cpu") + cpuDir := filepath.Dir(filepath.Dir(m.devicePath)) + entries, err := os.ReadDir(cpuDir) + if err != nil { + return nil, fmt.Errorf("failed to read CPU directory %s: %w", cpuDir, err) + } + + var cpuIDs []int + for _, entry := range entries { + if !entry.IsDir() { + continue + } + + // Parse CPU ID from directory name + cpuID, err := strconv.Atoi(entry.Name()) + if err != nil { + continue // Skip non-numeric directories + } + + // Check if MSR file exists for this CPU + msrPath := fmt.Sprintf(m.devicePath, cpuID) + if _, err := os.Stat(msrPath); err == nil { + cpuIDs = append(cpuIDs, cpuID) + } + } + + // Sort CPU IDs for consistent ordering + sort.Ints(cpuIDs) + + return cpuIDs, nil +} + +// createZones creates MSR-based energy zones for all available MSR counters +func (m *msrReader) createZones() error { + m.mu.Lock() + defer m.mu.Unlock() + + m.zones = nil + + // Get sorted CPU IDs for consistent zone ordering + var cpuIDs []int + for cpuID := range m.msrFiles { + cpuIDs = append(cpuIDs, cpuID) + } + sort.Ints(cpuIDs) + + // Group zones by name for potential aggregation + zoneGroups := make(map[string][]*msrZone) + + // Create zones for each MSR counter on each CPU + for _, cpuID := range cpuIDs { + msrFile := m.msrFiles[cpuID] + + for zoneName, msrOffset := range msrZoneConfig { + // Test if this MSR register is readable on this CPU + if !m.isRegisterReadable(msrFile, msrOffset) { + m.logger.Debug("MSR register not readable, skipping zone", + "cpu", cpuID, "zone", zoneName, "msr", fmt.Sprintf("0x%x", msrOffset)) + continue + } + + // Map internal zone names to standard RAPL names if needed + displayName := zoneName + if mappedName, exists := zoneNameMapping[zoneName]; exists { + displayName = mappedName + } + + // Create MSR zone + zone := NewMSRZone(displayName, cpuID, cpuID, msrOffset, m.energyUnit, msrFile) + zoneGroups[displayName] = append(zoneGroups[displayName], zone) + + m.logger.Debug("Created MSR zone", + "name", displayName, "cpu", cpuID, "msr", fmt.Sprintf("0x%x", msrOffset)) + } + } + + // Convert zone groups to EnergyZone interfaces + // For multi-socket systems, aggregate zones with the same name + for name, zones := range zoneGroups { + if len(zones) == 1 { + // Single zone - use directly + m.zones = append(m.zones, zones[0]) + } else { + // Multiple zones - create aggregated zone + var energyZones []EnergyZone + for _, zone := range zones { + energyZones = append(energyZones, zone) + } + aggregated := NewAggregatedZone(energyZones) + m.zones = append(m.zones, aggregated) + + m.logger.Debug("Created aggregated MSR zone", + "name", name, "zone_count", len(zones)) + } + } + + if len(m.zones) == 0 { + return fmt.Errorf("no readable MSR energy counters found") + } + + return nil +} + +// isRegisterReadable tests if an MSR register can be read without error +func (m *msrReader) isRegisterReadable(msrFile *os.File, msrOffset uint32) bool { + // Try to seek to the register + _, err := msrFile.Seek(int64(msrOffset), 0) + if err != nil { + return false + } + + // Try to read 8 bytes from the register + buf := make([]byte, 8) + _, err = msrFile.Read(buf) + return err == nil +} diff --git a/internal/device/msr_reader_test.go b/internal/device/msr_reader_test.go new file mode 100644 index 0000000000..81b91d6b68 --- /dev/null +++ b/internal/device/msr_reader_test.go @@ -0,0 +1,406 @@ +// SPDX-FileCopyrightText: 2025 The Kepler Authors +// SPDX-License-Identifier: Apache-2.0 + +package device + +/* +MSR Test Data Documentation + +This test file uses mock MSR data to simulate Intel RAPL MSR registers for testing +the MSR reader implementation. The test data simulates the following registers: + +MSR Register Values: +- 0x606: IA32_RAPL_POWER_UNIT - Power unit register containing scaling factors +- 0x611: IA32_PKG_ENERGY_STATUS - Package energy counter (32-bit, wraps around) +- 0x639: IA32_PP0_ENERGY_STATUS - Power Plane 0 (cores) energy counter +- 0x619: IA32_DRAM_ENERGY_STATUS - DRAM energy counter + +File Format: +Each MSR register value is stored as 8 bytes (uint64) in little-endian format. +The test creates temporary MSR files and writes mock data at specific byte offsets +corresponding to the MSR register addresses. + +Energy Unit Calculation: +The power unit register (0x606) contains scaling factors in specific bit fields: +- Bits 12:8 contain the energy unit value (e.g., value 16 means 1/(2^16) joules per LSB) +- Energy counters use this unit to convert raw MSR values to microjoules +- Example: energy_unit = 15.2587890625 microjoules (when unit value = 16) + +Counter Overflow: +MSR energy counters are 32-bit values that wrap around at 2^32. The implementation +must handle this overflow correctly to maintain accurate energy measurements. +*/ + +import ( + "fmt" + "log/slog" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// fakeMSRReader implements raplReader for testing +type fakeMSRReader struct { + zones []EnergyZone + available bool + initError error + name string +} + +func (f *fakeMSRReader) Zones() ([]EnergyZone, error) { + return f.zones, nil +} + +func (f *fakeMSRReader) Available() bool { + return f.available +} + +func (f *fakeMSRReader) Init() error { + return f.initError +} + +func (f *fakeMSRReader) Close() error { + return nil +} + +func (f *fakeMSRReader) Name() string { + if f.name == "" { + return "fake-msr" + } + return f.name +} + +// fakeMSRZone implements EnergyZone for testing +type fakeMSRZone struct { + name string + index int + path string + energy Energy + maxEnergy Energy + energyErr error +} + +func (f *fakeMSRZone) Name() string { + return f.name +} + +func (f *fakeMSRZone) Index() int { + return f.index +} + +func (f *fakeMSRZone) Path() string { + return f.path +} + +func (f *fakeMSRZone) Energy() (Energy, error) { + return f.energy, f.energyErr +} + +func (f *fakeMSRZone) MaxEnergy() Energy { + return f.maxEnergy +} + +func TestMSRReader_Available(t *testing.T) { + tests := []struct { + name string + setupDevDir bool + createMSRFile bool + expectedResult bool + }{ + { + name: "MSR available with dev directory and msr file", + setupDevDir: true, + createMSRFile: true, + expectedResult: true, + }, + { + name: "MSR unavailable without dev directory", + setupDevDir: false, + createMSRFile: false, + expectedResult: false, + }, + { + name: "MSR unavailable without msr file", + setupDevDir: true, + createMSRFile: false, + expectedResult: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create temporary directory structure + tempDir := t.TempDir() + var devicePath string + + if tt.setupDevDir { + // Create /dev/cpu/0 directory + cpuDir := filepath.Join(tempDir, "dev", "cpu", "0") + require.NoError(t, os.MkdirAll(cpuDir, 0755)) + + devicePath = filepath.Join(tempDir, "dev", "cpu", "%d", "msr") + + if tt.createMSRFile { + msrFile := filepath.Join(cpuDir, "msr") + file, err := os.Create(msrFile) + require.NoError(t, err) + _ = file.Close() + } + } else { + devicePath = filepath.Join(tempDir, "nonexistent", "cpu", "%d", "msr") + } + + reader := NewMSRReader(devicePath, slog.Default()) + result := reader.Available() + + assert.Equal(t, tt.expectedResult, result) + }) + } +} + +func TestMSRReader_Init(t *testing.T) { + tests := []struct { + name string + setupMSRs func(tempDir string) string + expectError bool + errorMsg string + }{ + { + name: "successful initialization", + setupMSRs: func(tempDir string) string { + // Create CPU 0 with MSR file containing mock data + cpuDir := filepath.Join(tempDir, "dev", "cpu", "0") + require.NoError(t, os.MkdirAll(cpuDir, 0755)) + + msrFile := filepath.Join(cpuDir, "msr") + createMockMSRFile(t, msrFile) + + return filepath.Join(tempDir, "dev", "cpu", "%d", "msr") + }, + expectError: false, + }, + { + name: "initialization fails with no CPUs", + setupMSRs: func(tempDir string) string { + // Create empty dev directory + require.NoError(t, os.MkdirAll(filepath.Join(tempDir, "dev", "cpu"), 0755)) + return filepath.Join(tempDir, "dev", "cpu", "%d", "msr") + }, + expectError: true, + errorMsg: "MSR interface not available", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tempDir := t.TempDir() + devicePath := tt.setupMSRs(tempDir) + + reader := NewMSRReader(devicePath, slog.Default()) + err := reader.Init() + + if tt.expectError { + assert.Error(t, err) + if tt.errorMsg != "" { + assert.Contains(t, err.Error(), tt.errorMsg) + } + } else { + assert.NoError(t, err) + } + + // Clean up + _ = reader.Close() + }) + } +} + +func TestMSRReader_Zones(t *testing.T) { + tempDir := t.TempDir() + + // Create CPU 0 and CPU 1 with MSR files + for i := 0; i < 2; i++ { + cpuDir := filepath.Join(tempDir, "dev", "cpu", fmt.Sprintf("%d", i)) + require.NoError(t, os.MkdirAll(cpuDir, 0755)) + + msrFile := filepath.Join(cpuDir, "msr") + createMockMSRFile(t, msrFile) + } + + devicePath := filepath.Join(tempDir, "dev", "cpu", "%d", "msr") + reader := NewMSRReader(devicePath, slog.Default()) + + require.NoError(t, reader.Init()) + t.Cleanup(func() { + assert.NoError(t, reader.Close()) + }) + + zones, err := reader.Zones() + require.NoError(t, err) + + // Should have zones for package, core (pp0), and dram + // On a 2-CPU system, we should get aggregated zones + assert.Greater(t, len(zones), 0) + + // Verify zone names + zoneNames := make(map[string]bool) + for _, zone := range zones { + zoneNames[zone.Name()] = true + + // Test that each zone can provide energy readings + energy, err := zone.Energy() + assert.NoError(t, err) + assert.GreaterOrEqual(t, energy, Energy(0)) + } + + // Should have at least package zone + assert.True(t, zoneNames["package"] || zoneNames["core"] || zoneNames["dram"], + "Expected at least one MSR zone type") +} + +func TestMSRReader_Name(t *testing.T) { + reader := NewMSRReader("/dev/cpu/%d/msr", slog.Default()) + assert.Equal(t, "msr", reader.Name()) +} + +func TestMSRReader_Close(t *testing.T) { + tempDir := t.TempDir() + + // Create CPU 0 with MSR file + cpuDir := filepath.Join(tempDir, "dev", "cpu", "0") + require.NoError(t, os.MkdirAll(cpuDir, 0755)) + + msrFile := filepath.Join(cpuDir, "msr") + createMockMSRFile(t, msrFile) + + devicePath := filepath.Join(tempDir, "dev", "cpu", "%d", "msr") + reader := NewMSRReader(devicePath, slog.Default()) + + require.NoError(t, reader.Init()) + + // Verify it has zones before closing + zones, err := reader.Zones() + require.NoError(t, err) + assert.Greater(t, len(zones), 0) + + // Close should not error + err = reader.Close() + assert.NoError(t, err) + + // After closing, zones should be cleared + _, err = reader.Zones() + assert.Error(t, err) + assert.Contains(t, err.Error(), "MSR reader not initialized") +} + +func TestMSRZone_Energy(t *testing.T) { + tests := []struct { + name string + msrData []byte + energyUnit float64 + expectedRange [2]Energy // min, max range + }{ + { + name: "normal energy reading", + msrData: []byte{ + 0x00, 0x00, 0x10, 0x00, // 0x100000 in lower 32 bits + 0x00, 0x00, 0x00, 0x00, // upper 32 bits + }, + energyUnit: 15.2587890625, // 1000000 / 2^16 + expectedRange: [2]Energy{Energy(15999998), Energy(16000000)}, // Approximately 16.0 J + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create temporary MSR file with specific data + tempDir := t.TempDir() + msrFile := filepath.Join(tempDir, "msr") + + file, err := os.Create(msrFile) + require.NoError(t, err) + t.Cleanup(func() { + assert.NoError(t, file.Close()) + }) + + // Write mock MSR data at different offsets + _, err = file.WriteAt(tt.msrData, int64(MSRPkgEnergyStatus)) + require.NoError(t, err) + + // Create MSR zone + zone := NewMSRZone("package", 0, 0, MSRPkgEnergyStatus, tt.energyUnit, file) + + energy, err := zone.Energy() + require.NoError(t, err) + + // Check energy is within expected range + assert.GreaterOrEqual(t, energy, tt.expectedRange[0]) + assert.LessOrEqual(t, energy, tt.expectedRange[1]) + }) + } +} + +func TestMSRZone_MaxEnergy(t *testing.T) { + energyUnit := 15.2587890625 // 1000000 / 2^16 + + zone := NewMSRZone("package", 0, 0, MSRPkgEnergyStatus, energyUnit, nil) + maxEnergy := zone.MaxEnergy() + + // For 32-bit counter, max should be 2^32 * energyUnit + expectedMax := Energy(float64(0xFFFFFFFF) * energyUnit) + assert.Equal(t, expectedMax, maxEnergy) +} + +// Helper functions + +// createMockMSRFile creates a mock MSR device file with test data +// The file simulates reading from /dev/cpu/N/msr with realistic RAPL register values +func createMockMSRFile(t *testing.T, path string) { + file, err := os.Create(path) + require.NoError(t, err) + defer func() { + assert.NoError(t, file.Close()) + }() + + // Write power unit register at offset 0x606 (IA32_RAPL_POWER_UNIT) + // This register contains scaling factors for energy measurements + // Bits 12:8 = energy unit: 16 means 1/(2^16) = 15.2587890625 microjoules per LSB + powerUnitData := []byte{ + 0x00, 0x10, 0x00, 0x00, // Energy unit = 16 in bits 12:8 + 0x00, 0x00, 0x00, 0x00, // Upper 32 bits (unused) + } + _, err = file.WriteAt(powerUnitData, int64(MSRPowerUnit)) + require.NoError(t, err) + + // Write package energy counter at offset 0x611 (IA32_PKG_ENERGY_STATUS) + // This is a 32-bit counter that accumulates package energy consumption + // Raw value: 0x100000 = 1048576 LSB → ~16.0 Joules with energy unit 15.26 μJ/LSB + pkgEnergyData := []byte{ + 0x00, 0x00, 0x10, 0x00, // 32-bit energy counter value + 0x00, 0x00, 0x00, 0x00, // Upper 32 bits (reserved/unused) + } + _, err = file.WriteAt(pkgEnergyData, int64(MSRPkgEnergyStatus)) + require.NoError(t, err) + + // Write PP0 energy counter at offset 0x639 (IA32_PP0_ENERGY_STATUS) + // PP0 represents Power Plane 0 (CPU cores) energy consumption + // Raw value: 0x80000 = 524288 LSB → ~8.0 Joules + pp0EnergyData := []byte{ + 0x00, 0x00, 0x08, 0x00, // 32-bit energy counter value + 0x00, 0x00, 0x00, 0x00, // Upper 32 bits (reserved/unused) + } + _, err = file.WriteAt(pp0EnergyData, int64(MSRPP0EnergyStatus)) + require.NoError(t, err) + + // Write DRAM energy counter at offset 0x619 (IA32_DRAM_ENERGY_STATUS) + // This counter tracks memory subsystem energy consumption + // Raw value: 0x40000 = 262144 LSB → ~4.0 Joules + dramEnergyData := []byte{ + 0x00, 0x00, 0x04, 0x00, // 32-bit energy counter value + 0x00, 0x00, 0x00, 0x00, // Upper 32 bits (reserved/unused) + } + _, err = file.WriteAt(dramEnergyData, int64(MSRDRAMEnergyStatus)) + require.NoError(t, err) +} diff --git a/internal/device/msr_zone.go b/internal/device/msr_zone.go new file mode 100644 index 0000000000..34ec7dcc92 --- /dev/null +++ b/internal/device/msr_zone.go @@ -0,0 +1,125 @@ +// SPDX-FileCopyrightText: 2025 The Kepler Authors +// SPDX-License-Identifier: Apache-2.0 + +package device + +import ( + "encoding/binary" + "fmt" + "math" + "os" +) + +// MSR Register offsets for Intel RAPL energy counters +const ( + // IA32_RAPL_POWER_UNIT - Power unit register containing scaling factors + MSRPowerUnit = 0x606 + + // Energy counters (32-bit, wraparound at ~4 billion) + MSRPkgEnergyStatus = 0x611 // Package energy counter + MSRPP0EnergyStatus = 0x639 // Power Plane 0 (cores) energy counter + MSRDRAMEnergyStatus = 0x619 // DRAM energy counter +) + +// msrZone implements EnergyZone interface for MSR-based energy reading +type msrZone struct { + name string + index int + cpuID int + msrOffset uint32 + energyUnit float64 // Energy unit in microjoules per LSB + msrFile *os.File +} + +// NewMSRZone creates a new MSR-based energy zone +func NewMSRZone(name string, index, cpuID int, msrOffset uint32, energyUnit float64, msrFile *os.File) *msrZone { + return &msrZone{ + name: name, + index: index, + cpuID: cpuID, + msrOffset: msrOffset, + energyUnit: energyUnit, + msrFile: msrFile, + } +} + +// Name returns the zone name (package, pp0, dram) +func (m *msrZone) Name() string { + return m.name +} + +// Index returns the zone index (CPU socket/package index) +func (m *msrZone) Index() int { + return m.index +} + +// Path returns the MSR device path for this zone +func (m *msrZone) Path() string { + return fmt.Sprintf("/dev/cpu/%d/msr:0x%x", m.cpuID, m.msrOffset) +} + +// Energy reads the current energy value from the MSR register +func (m *msrZone) Energy() (Energy, error) { + if m.msrFile == nil { + return 0, fmt.Errorf("MSR file not opened for CPU %d", m.cpuID) + } + + // Read 64-bit MSR register at the specified offset + _, err := m.msrFile.Seek(int64(m.msrOffset), 0) + if err != nil { + return 0, fmt.Errorf("failed to seek to MSR offset 0x%x: %w", m.msrOffset, err) + } + + var msrValue uint64 + err = binary.Read(m.msrFile, binary.LittleEndian, &msrValue) + if err != nil { + return 0, fmt.Errorf("failed to read MSR 0x%x from CPU %d: %w", m.msrOffset, m.cpuID, err) + } + + // Extract the 32-bit energy counter from the MSR value + // Energy counters are in the lower 32 bits + energyCounter := uint32(msrValue & 0xFFFFFFFF) + + // Convert to microjoules using the energy unit + energyMicroJoules := float64(energyCounter) * m.energyUnit + + return Energy(energyMicroJoules), nil +} + +// MaxEnergy returns the maximum energy value before wraparound +// MSR energy counters are 32-bit, so they wrap at 2^32 +func (m *msrZone) MaxEnergy() Energy { + // 32-bit counter maximum value converted to microjoules + maxCounter := uint64(math.MaxUint32) + maxEnergyMicroJoules := float64(maxCounter) * m.energyUnit + return Energy(maxEnergyMicroJoules) +} + +// readEnergyUnit reads the energy unit from the IA32_RAPL_POWER_UNIT MSR +// Returns the energy unit in microjoules per LSB +func readEnergyUnit(msrFile *os.File) (float64, error) { + if msrFile == nil { + return 0, fmt.Errorf("MSR file not opened") + } + + // Seek to the power unit MSR + _, err := msrFile.Seek(int64(MSRPowerUnit), 0) + if err != nil { + return 0, fmt.Errorf("failed to seek to MSR power unit register: %w", err) + } + + var powerUnit uint64 + err = binary.Read(msrFile, binary.LittleEndian, &powerUnit) + if err != nil { + return 0, fmt.Errorf("failed to read MSR power unit register: %w", err) + } + + // Energy unit is in bits 12:8 of the power unit register + energyUnitBits := (powerUnit >> 8) & 0x1F + + // Energy unit = 1 / (2^energyUnitBits) joules + // Convert to microjoules: multiply by 1,000,000 + energyUnit := 1000000.0 / float64(uint64(1)<= 0) + assert.GreaterOrEqual(t, zone.Index(), 0) + // Zone should have a path + assert.NotEmpty(t, zone.Path()) + + // Test energy reading + energy, err := zone.Energy() + assert.NoError(t, err) + assert.Greater(t, uint64(energy), uint64(0)) // Should have some energy value +} + +func TestPowercapReader_Name(t *testing.T) { + reader, err := NewPowercapReader("/tmp") + require.NoError(t, err) + assert.Equal(t, "powercap", reader.Name()) +} + +func TestPowercapReader_Close(t *testing.T) { + reader, err := NewPowercapReader("/tmp") + require.NoError(t, err) + + err = reader.Close() + assert.NoError(t, err) +} + +func TestSysfsRaplZone_Implementation(t *testing.T) { + reader, err := NewPowercapReader(validSysFSPath) + require.NoError(t, err) + + zones, err := reader.Zones() + require.NoError(t, err) + require.Greater(t, len(zones), 0) + + // Test the first zone's EnergyZone interface methods + zone := zones[0] + + // Test all EnergyZone interface methods + assert.NotEmpty(t, zone.Name()) // Should have a name + assert.GreaterOrEqual(t, zone.Index(), 0) // Should have a valid index + assert.NotEmpty(t, zone.Path()) // Should have a path + + energy, err := zone.Energy() + assert.NoError(t, err) + assert.Greater(t, uint64(energy), uint64(0)) // Should have some energy value + + maxEnergy := zone.MaxEnergy() + assert.Greater(t, uint64(maxEnergy), uint64(0)) // Should have some max energy value +} diff --git a/internal/device/rapl_sysfs_power_meter.go b/internal/device/rapl_power_meter.go similarity index 52% rename from internal/device/rapl_sysfs_power_meter.go rename to internal/device/rapl_power_meter.go index c5b64578a0..92f21f04a0 100644 --- a/internal/device/rapl_sysfs_power_meter.go +++ b/internal/device/rapl_power_meter.go @@ -8,27 +8,41 @@ import ( "log/slog" "strings" - "github.com/prometheus/procfs/sysfs" + "k8s.io/utils/ptr" ) -// raplPowerMeter implements CPUPowerMeter using sysfs +// raplPowerMeter implements CPUPowerMeter with automatic MSR fallback support type raplPowerMeter struct { - reader sysfsReader + reader raplReader // Current active reader (powercap or MSR) cachedZones []EnergyZone logger *slog.Logger zoneFilter []string topZone EnergyZone + + // Configuration for MSR fallback + msrConfig MSRConfig + sysfsPath string + useMSR bool // Track which backend is active +} + +// MSRConfig holds MSR-specific configuration +type MSRConfig struct { + Enabled *bool + Force *bool + DevicePath string } type OptionFn func(*raplPowerMeter) -// sysfsReader is an interface for a sysfs filesystem used by raplPowerMeter to mock for testing -type sysfsReader interface { - Zones() ([]EnergyZone, error) +// WithMSRConfig sets the MSR configuration for fallback behavior +func WithMSRConfig(msrConfig MSRConfig) OptionFn { + return func(pm *raplPowerMeter) { + pm.msrConfig = msrConfig + } } -// WithSysFSReader sets the sysfsReader used by raplPowerMeter -func WithSysFSReader(r sysfsReader) OptionFn { +// WithRaplReader sets a specific raplReader (for testing) +func WithRaplReader(r raplReader) OptionFn { return func(pm *raplPowerMeter) { pm.reader = r } @@ -49,17 +63,18 @@ func WithZoneFilter(zones []string) OptionFn { } } -// NewCPUPowerMeter creates a new CPU power meter +// NewCPUPowerMeter creates a new CPU power meter with MSR fallback support func NewCPUPowerMeter(sysfsPath string, opts ...OptionFn) (*raplPowerMeter, error) { - fs, err := sysfs.NewFS(sysfsPath) - if err != nil { - return nil, err - } - ret := &raplPowerMeter{ - reader: sysfsRaplReader{fs: fs}, logger: slog.Default().With("service", "rapl"), zoneFilter: []string{}, + sysfsPath: sysfsPath, + // Default MSR configuration (disabled) + msrConfig: MSRConfig{ + Enabled: ptr.To(false), + Force: ptr.To(false), + DevicePath: "/dev/cpu/%d/msr", + }, } for _, opt := range opts { @@ -70,21 +85,112 @@ func NewCPUPowerMeter(sysfsPath string, opts ...OptionFn) (*raplPowerMeter, erro } func (r *raplPowerMeter) Name() string { - return "rapl" + if r.useMSR { + return "rapl-msr" + } + return "rapl-powercap" } func (r *raplPowerMeter) Init() error { - // ensure zones can be read but don't cache them - zones, err := r.reader.Zones() + // Clear any cached state + r.cachedZones = nil + r.topZone = nil + + // If a specific reader is set (for testing), use it directly + if r.reader != nil { + r.logger.Info("Using provided power reader", "reader", r.reader.Name()) + return r.validateReader(r.reader) + } + + // Determine which reader to use based on configuration and availability + reader, useMSR, err := r.selectRaplReader() if err != nil { - return err - } else if len(zones) == 0 { - return fmt.Errorf("no RAPL zones found") + return fmt.Errorf("failed to select power reader: %w", err) } - // try reading the first zone and return the error + r.reader = reader + r.useMSR = useMSR + + r.logger.Info("Selected power reader", + "reader", r.reader.Name(), + "msr_fallback", r.useMSR, + "force_msr", ptr.Deref(r.msrConfig.Force, false)) + + return r.validateReader(r.reader) +} + +// selectRaplReader chooses the appropriate RAPL reader based on configuration and availability +func (r *raplPowerMeter) selectRaplReader() (raplReader, bool, error) { + forceMSR := ptr.Deref(r.msrConfig.Force, false) + enableFallback := ptr.Deref(r.msrConfig.Enabled, false) + + // If force MSR is enabled, use MSR directly (for testing) + if forceMSR { + r.logger.Info("MSR forced via configuration") + msrReader := NewMSRReader(r.msrConfig.DevicePath, r.logger) + if !msrReader.Available() { + return nil, false, fmt.Errorf("MSR reader forced but not available") + } + if err := msrReader.Init(); err != nil { + return nil, false, fmt.Errorf("failed to initialize forced MSR reader: %w", err) + } + return msrReader, true, nil + } + + // Try powercap first (default behavior) + powercapReader, err := NewPowercapReader(r.sysfsPath) + if err == nil && powercapReader.Available() { + if err := powercapReader.Init(); err == nil { + r.logger.Debug("Using powercap reader") + return powercapReader, false, nil + } else { + r.logger.Debug("Powercap reader initialization failed", "error", err) + } + } else { + r.logger.Debug("Powercap reader not available", "error", err) + } + + // If powercap failed and MSR fallback is enabled, try MSR + if enableFallback { + r.logger.Info("Attempting MSR fallback as powercap unavailable") + + // Log security warning for MSR usage + r.logger.Warn("MSR fallback enabled - be aware of PLATYPUS attack vectors (CVE-2020-8694/8695)") + + msrReader := NewMSRReader(r.msrConfig.DevicePath, r.logger) + if !msrReader.Available() { + return nil, false, fmt.Errorf("neither powercap nor MSR readers are available") + } + if err := msrReader.Init(); err != nil { + return nil, false, fmt.Errorf("MSR fallback failed to initialize: %w", err) + } + + r.logger.Info("MSR fallback activated successfully") + return msrReader, true, nil + } + + // Neither powercap works nor MSR fallback is enabled + return nil, false, fmt.Errorf("powercap unavailable and MSR fallback disabled") +} + +// validateReader ensures the reader can provide valid energy readings +func (r *raplPowerMeter) validateReader(reader raplReader) error { + zones, err := reader.Zones() + if err != nil { + return fmt.Errorf("failed to get zones from %s reader: %w", reader.Name(), err) + } + + if len(zones) == 0 { + return fmt.Errorf("no energy zones found from %s reader", reader.Name()) + } + + // Try reading energy from the first zone to verify functionality _, err = zones[0].Energy() - return err + if err != nil { + return fmt.Errorf("failed to read energy from zone %s: %w", zones[0].Name(), err) + } + + return nil } func (r *raplPowerMeter) needsFiltering() bool { @@ -122,6 +228,10 @@ func (r *raplPowerMeter) Zones() ([]EnergyZone, error) { return r.cachedZones, nil } + if r.reader == nil { + return nil, fmt.Errorf("power reader not initialized") + } + zones, err := r.reader.Zones() if err != nil { return nil, err @@ -135,7 +245,6 @@ func (r *raplPowerMeter) Zones() ([]EnergyZone, error) { } // filter out non-standard zones - stdZoneMap := map[zoneKey]EnergyZone{} for _, zone := range zones { key := zoneKey{name: zone.Name(), index: zone.Index()} @@ -230,58 +339,23 @@ func (r *raplPowerMeter) PrimaryEnergyZone() (EnergyZone, error) { return zones[0], nil } -// isStandardRaplPath checks if a RAPL zone path is in the standard format -func isStandardRaplPath(path string) bool { - return strings.Contains(path, "/intel-rapl:") -} - -type sysfsRaplReader struct { - fs sysfs.FS +// Close releases resources held by the power reader +func (r *raplPowerMeter) Close() error { + if r.reader != nil { + return r.reader.Close() + } + return nil } -func (r sysfsRaplReader) Zones() ([]EnergyZone, error) { - raplZones, err := sysfs.GetRaplZones(r.fs) - if err != nil { - return nil, fmt.Errorf("failed to read rapl zones: %w", err) +// isStandardRaplPath checks if a RAPL zone path is in the standard format +func isStandardRaplPath(path string) bool { + // For powercap, check standard path format + if strings.Contains(path, "/intel-rapl:") { + return true } - - // convert sysfs.RaplZones to EnergyZones - energyZones := make([]EnergyZone, 0, len(raplZones)) - for _, zone := range raplZones { - energyZones = append(energyZones, sysfsRaplZone{zone}) + // For MSR, check MSR path format + if strings.Contains(path, "/dev/cpu/") && strings.Contains(path, "/msr:") { + return true } - - return energyZones, nil -} - -// sysfsRaplZone implements EnergyZone using sysfs.RaplZone. -// It is an adapter for the EnergyZone interface -type sysfsRaplZone struct { - zone sysfs.RaplZone -} - -// Name returns the name of the zone -func (s sysfsRaplZone) Name() string { - return s.zone.Name -} - -// Index returns the index of the zone -func (s sysfsRaplZone) Index() int { - return s.zone.Index -} - -// Path returns the path of the zone -func (s sysfsRaplZone) Path() string { - return s.zone.Path -} - -// Energy returns the current energy value -func (s sysfsRaplZone) Energy() (Energy, error) { - mj, err := s.zone.GetEnergyMicrojoules() - return Energy(mj), err -} - -// MaxEnergy returns the maximum energy value before wraparound -func (s sysfsRaplZone) MaxEnergy() Energy { - return Energy(s.zone.MaxMicrojoules) + return false } diff --git a/internal/device/rapl_power_meter_test.go b/internal/device/rapl_power_meter_test.go new file mode 100644 index 0000000000..0062846f68 --- /dev/null +++ b/internal/device/rapl_power_meter_test.go @@ -0,0 +1,330 @@ +// SPDX-FileCopyrightText: 2025 The Kepler Authors +// SPDX-License-Identifier: Apache-2.0 + +package device + +import ( + "fmt" + "log/slog" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "k8s.io/utils/ptr" +) + +func TestRaplPowerMeter_Init_WithMockReader(t *testing.T) { + tests := []struct { + name string + mockReader raplReader + expectedName string + expectError bool + }{ + { + name: "successful initialization with mock powercap reader", + mockReader: &fakePowercapReader{ + available: true, + zones: createTestZones("powercap"), + name: "powercap", + }, + expectedName: "powercap", + expectError: false, + }, + { + name: "successful initialization with mock MSR reader", + mockReader: &fakeMSRReader{ + available: true, + zones: createTestZones("msr"), + name: "msr", + }, + expectedName: "msr", + expectError: false, + }, + { + name: "initialization fails with reader that has no zones", + mockReader: &fakePowercapReader{ + available: true, + zones: []EnergyZone{}, + name: "empty", + }, + expectError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pm, err := NewCPUPowerMeter( + "/fake/sysfs", + WithRaplReader(tt.mockReader), + ) + require.NoError(t, err) + + err = pm.Init() + + if tt.expectError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + assert.Equal(t, tt.expectedName, pm.reader.Name()) + } + }) + } +} + +func TestRaplPowerMeter_Name(t *testing.T) { + tests := []struct { + name string + useMSR bool + expected string + }{ + { + name: "powercap reader", + useMSR: false, + expected: "rapl-powercap", + }, + { + name: "msr reader", + useMSR: true, + expected: "rapl-msr", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pm := &raplPowerMeter{ + useMSR: tt.useMSR, + } + assert.Equal(t, tt.expected, pm.Name()) + }) + } +} + +func TestRaplPowerMeter_Zones_WithFiltering(t *testing.T) { + // Create test zones + testZones := []EnergyZone{ + &fakeMSRZone{name: "package", index: 0, path: "/fake/package", energy: Energy(1000)}, + &fakeMSRZone{name: "core", index: 0, path: "/fake/core", energy: Energy(500)}, + &fakeMSRZone{name: "dram", index: 0, path: "/fake/dram", energy: Energy(300)}, + } + + tests := []struct { + name string + zoneFilter []string + expected []string + }{ + { + name: "no filter - all zones", + zoneFilter: []string{}, + expected: []string{"package", "core", "dram"}, + }, + { + name: "filter package only", + zoneFilter: []string{"package"}, + expected: []string{"package"}, + }, + { + name: "filter core and dram", + zoneFilter: []string{"core", "dram"}, + expected: []string{"core", "dram"}, + }, + { + name: "filter non-existent zone", + zoneFilter: []string{"nonexistent"}, + expected: []string{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mockReader := &fakeMSRReader{ + available: true, + zones: testZones, + } + + pm := &raplPowerMeter{ + reader: mockReader, + zoneFilter: tt.zoneFilter, + logger: slog.Default(), + } + + zones, err := pm.Zones() + if len(tt.expected) == 0 { + assert.Error(t, err) + assert.Contains(t, err.Error(), "no RAPL zones found after filtering") + } else { + assert.NoError(t, err) + assert.Equal(t, len(tt.expected), len(zones)) + + zoneNames := make([]string, len(zones)) + for i, zone := range zones { + zoneNames[i] = zone.Name() + } + + for _, expected := range tt.expected { + assert.Contains(t, zoneNames, expected) + } + } + }) + } +} + +func TestRaplPowerMeter_PrimaryEnergyZone(t *testing.T) { + tests := []struct { + name string + availableZones []string + expectedZone string + }{ + { + name: "psys has highest priority", + availableZones: []string{"core", "package", "psys", "dram"}, + expectedZone: "psys", + }, + { + name: "package has second priority", + availableZones: []string{"core", "package", "dram"}, + expectedZone: "package", + }, + { + name: "core has third priority", + availableZones: []string{"core", "dram"}, + expectedZone: "core", + }, + { + name: "fallback to first zone if no priority match", + availableZones: []string{"uncore", "other"}, + expectedZone: "uncore", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var testZones []EnergyZone + for i, name := range tt.availableZones { + testZones = append(testZones, &fakeMSRZone{ + name: name, + index: i, + path: fmt.Sprintf("/fake/%s", name), + }) + } + + mockReader := &fakeMSRReader{ + available: true, + zones: testZones, + } + + pm := &raplPowerMeter{ + reader: mockReader, + logger: slog.Default(), + } + + primaryZone, err := pm.PrimaryEnergyZone() + assert.NoError(t, err) + assert.Equal(t, tt.expectedZone, primaryZone.Name()) + + // Test caching - call again and should get same result + primaryZone2, err := pm.PrimaryEnergyZone() + assert.NoError(t, err) + assert.Equal(t, primaryZone, primaryZone2) + }) + } +} + +func TestRaplPowerMeter_Close(t *testing.T) { + mockReader := &fakeMSRReader{ + available: true, + zones: createTestZones("test"), + } + + pm := &raplPowerMeter{ + reader: mockReader, + logger: slog.Default(), + } + + err := pm.Close() + assert.NoError(t, err) + + // Test closing when reader is nil + pm.reader = nil + err = pm.Close() + assert.NoError(t, err) +} + +func TestNewCPUPowerMeter(t *testing.T) { + sysfsPath := "/fake/sysfs" + + pm, err := NewCPUPowerMeter(sysfsPath) + require.NoError(t, err) + + assert.Equal(t, sysfsPath, pm.sysfsPath) + assert.NotNil(t, pm.logger) + assert.Equal(t, []string{}, pm.zoneFilter) + + // Test MSR config defaults + assert.Equal(t, ptr.To(false), pm.msrConfig.Enabled) + assert.Equal(t, ptr.To(false), pm.msrConfig.Force) + assert.Equal(t, "/dev/cpu/%d/msr", pm.msrConfig.DevicePath) +} + +func TestNewCPUPowerMeter_WithOptions(t *testing.T) { + sysfsPath := "/fake/sysfs" + + testLogger := slog.Default().With("test", "meter") + testZoneFilter := []string{"package", "core"} + testMSRConfig := MSRConfig{ + Enabled: ptr.To(true), + Force: ptr.To(false), + DevicePath: "/custom/cpu/%d/msr", + } + + pm, err := NewCPUPowerMeter( + sysfsPath, + WithRaplLogger(testLogger), + WithZoneFilter(testZoneFilter), + WithMSRConfig(testMSRConfig), + ) + require.NoError(t, err) + + assert.Equal(t, sysfsPath, pm.sysfsPath) + assert.Equal(t, testZoneFilter, pm.zoneFilter) + assert.Equal(t, testMSRConfig, pm.msrConfig) +} + +// Helper types and functions + +type fakePowercapReader struct { + zones []EnergyZone + available bool + initError error + name string +} + +func (f *fakePowercapReader) Zones() ([]EnergyZone, error) { + return f.zones, nil +} + +func (f *fakePowercapReader) Available() bool { + return f.available +} + +func (f *fakePowercapReader) Init() error { + return f.initError +} + +func (f *fakePowercapReader) Close() error { + return nil +} + +func (f *fakePowercapReader) Name() string { + if f.name == "" { + return "fake-powercap" + } + return f.name +} + +func createTestZones(prefix string) []EnergyZone { + return []EnergyZone{ + &fakeMSRZone{name: "package", index: 0, path: fmt.Sprintf("/%s/package", prefix), energy: Energy(1000)}, + &fakeMSRZone{name: "core", index: 0, path: fmt.Sprintf("/%s/core", prefix), energy: Energy(500)}, + &fakeMSRZone{name: "dram", index: 0, path: fmt.Sprintf("/%s/dram", prefix), energy: Energy(300)}, + } +} diff --git a/internal/device/rapl_reader.go b/internal/device/rapl_reader.go new file mode 100644 index 0000000000..aac7e375ae --- /dev/null +++ b/internal/device/rapl_reader.go @@ -0,0 +1,24 @@ +// SPDX-FileCopyrightText: 2025 The Kepler Authors +// SPDX-License-Identifier: Apache-2.0 + +package device + +// raplReader is an internal abstraction for different RAPL reading backends +// (powercap sysfs and MSR). This interface allows the raplPowerMeter to work +// with different RAPL reading mechanisms while maintaining a consistent API. +type raplReader interface { + // Zones returns the list of energy zones available from this power reader + Zones() ([]EnergyZone, error) + + // Available checks if the power reader can be used on the current system + Available() bool + + // Init initializes the power reader and verifies it can read energy values + Init() error + + // Close releases any resources held by the power reader + Close() error + + // Name returns a human-readable name for the power reader implementation + Name() string +} diff --git a/internal/device/rapl_sysfs_power_meter_test.go b/internal/device/rapl_sysfs_power_meter_test.go deleted file mode 100644 index 437d12b6a6..0000000000 --- a/internal/device/rapl_sysfs_power_meter_test.go +++ /dev/null @@ -1,587 +0,0 @@ -// SPDX-FileCopyrightText: 2025 The Kepler Authors -// SPDX-License-Identifier: Apache-2.0 - -package device - -import ( - "errors" - "log/slog" - "strings" - "testing" - - "github.com/prometheus/procfs/sysfs" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" -) - -// TestCPUPowerMeterInterface ensures that raplPowerMeter properly implements the CPUPowerMeter interface -func TestCPUPowerMeterInterface(t *testing.T) { - var _ CPUPowerMeter = (*raplPowerMeter)(nil) -} - -func TestNewCPUPowerMeter(t *testing.T) { - meter, err := NewCPUPowerMeter("testdata/sys") - assert.NotNil(t, meter, "NewCPUPowerMeter should not return nil") - assert.NoError(t, err, "NewCPUPowerMeter should not return error") - assert.IsType(t, &raplPowerMeter{}, meter, "NewCPUPowerMeter should return a *cpuPowerMeter") -} - -func TestCPUPowerMeter_Name(t *testing.T) { - meter := &raplPowerMeter{} - name := meter.Name() - assert.Equal(t, "rapl", name, "Name() should return 'rapl'") -} - -func TestCPUPowerMeter_Init(t *testing.T) { - meter, err := NewCPUPowerMeter(validSysFSPath) - assert.NoError(t, err, "NewCPUPowerMeter should not return an error") - - err = meter.Init() - assert.NoError(t, err, "Start() should not return an error") -} - -func TestCPUPowerMeter_Zones(t *testing.T) { - meter := &raplPowerMeter{ - reader: sysfsRaplReader{fs: validSysFSFixtures(t)}, - logger: slog.Default().With("service", "rapl"), - } - zones, err := meter.Zones() - assert.NoError(t, err, "Zones() should not return an error") - assert.NotNil(t, zones, "Zones() should return a non-nil slice") - - names := make([]string, len(zones)) - for i, zone := range zones { - names[i] = zone.Name() - } - assert.Contains(t, names, "package") - assert.Contains(t, names, "core") -} - -// TestSysFSRaplZoneInterface ensures that sysfsRaplZone properly implements the EnergyZone interface -func TestSysFSRaplZoneInterface(t *testing.T) { - pkg := sysfs.RaplZone{ - Name: "package", - Index: 0, - Path: "/sys/class/powercap/intel-rapl/intel-rapl:0", - MaxMicrojoules: 1_000_000, - } - - zone := sysfsRaplZone{zone: pkg} - - // Test that all interface methods return the expected values - assert.Equal(t, 0, zone.Index()) - assert.Equal(t, "/sys/class/powercap/intel-rapl/intel-rapl:0", zone.Path()) - assert.Equal(t, "package", zone.Name()) - assert.Equal(t, 1.0, zone.MaxEnergy().Joules()) -} - -func TestSysFSRaplPowerMeterInit(t *testing.T) { - rapl := raplPowerMeter{ - reader: sysfsRaplReader{fs: validSysFSFixtures(t)}, - logger: slog.Default().With("service", "rapl"), - } - err := rapl.Init() - assert.NoError(t, err) -} - -func TestSysFSRaplPowerMeterInitFail(t *testing.T) { - rapl := raplPowerMeter{reader: sysfsRaplReader{fs: invalidSysFSFixtures(t)}} - err := rapl.Init() - assert.Error(t, err) -} - -// TestSysFSRaplPowerMeter tests the sysfsRaplZone implementation using test fixtures -func TestSysFSRaplPowerMeter(t *testing.T) { - fs := validSysFSFixtures(t) - actualZones, err := sysfs.GetRaplZones(fs) - assert.NoError(t, err) - assert.Equal(t, 4, len(actualZones), "Expected to find 4 zones in test fixtures") - - // realRaplReader should filter out non-standard zones - rapl := raplPowerMeter{ - reader: sysfsRaplReader{fs: fs}, - logger: slog.Default().With("service", "rapl"), - } - zones, err := rapl.Zones() - - // Test that each zone implements the interface correctly - assert.NoError(t, err) - // With aggregation: two package zones become one AggregatedZone + one core zone = 2 total - assert.Equal(t, 2, len(zones), "find 2 zones after aggregation (package + core)") - assert.Equal(t, []string{"core", "package"}, sortedZoneNames(zones), - "Expected to find aggregated zones in test fixtures") - - for _, zone := range zones { - assert.NotEmpty(t, zone.Name(), "Zone name should not be empty") - assert.NotEmpty(t, zone.Path(), "Zone path should not be empty") - assert.GreaterOrEqual(t, zone.MaxEnergy(), 1000.0*Joule, "Max energy should not be negative") - - // Zone could be either sysfsRaplZone or AggregatedZone - switch z := zone.(type) { - case sysfsRaplZone: - // Individual zone - assert.NotNil(t, z) - case *AggregatedZone: - // Aggregated zone - assert.NotNil(t, z) - assert.Equal(t, -1, z.Index(), "AggregatedZone should have index -1") - default: - t.Fatalf("Unexpected zone type: %T", zone) - } - - // Skip the original assertion since we now support both zone types - _ = zone - - energy, err := zone.Energy() - assert.NoError(t, err, zone.Path()) - assert.GreaterOrEqual(t, energy, 1000.0*Joule, "Energy should not be negative") - } -} - -func TestAggregatedZoneIntegration(t *testing.T) { - // Test that RAPL reader creates AggregatedZone for multiple zones with same name - mockReader := &mockSysFSReader{ - response: []EnergyZone{ - // Two package zones with same name but different indices and one core zone - mockZone{name: "package", index: 0, path: "/intel-rapl:0", energy: 1000, maxEnergy: 100000}, - mockZone{name: "package", index: 1, path: "/intel-rapl:1", energy: 2000, maxEnergy: 100000}, - mockZone{name: "core", index: 0, path: "/intel-rapl:0:0", energy: 500, maxEnergy: 50000}, - }, - } - - rapl := &raplPowerMeter{ - reader: mockReader, - logger: slog.Default(), - } - - zones, err := rapl.Zones() - require.NoError(t, err) - - // Should have 2 zones: 1 aggregated package zone + 1 core zone - assert.Equal(t, 2, len(zones), "Expected 2 zones after aggregation") - - // Find the package zone - should be AggregatedZone - var packageZone EnergyZone - var coreZone EnergyZone - for _, zone := range zones { - if zone.Name() == "package" { - packageZone = zone - } else if zone.Name() == "core" { // Single zone keeps original name - coreZone = zone - } - } - - // Verify package zone is aggregated - require.NotNil(t, packageZone, "Package zone should exist") - aggregated, isAggregated := packageZone.(*AggregatedZone) - assert.True(t, isAggregated, "Package zone should be AggregatedZone") - assert.Equal(t, "package", aggregated.Name()) - assert.Equal(t, -1, aggregated.Index()) - assert.Equal(t, Energy(200000), aggregated.MaxEnergy()) // Sum of both package zones - - // Verify core zone is not aggregated - require.NotNil(t, coreZone, "Core zone should exist") - _, isNotAggregated := coreZone.(mockZone) - assert.True(t, isNotAggregated, "Core zone should remain as individual zone") - - // Test energy aggregation - packageEnergy, err := packageZone.Energy() - require.NoError(t, err) - assert.Equal(t, Energy(3000), packageEnergy) // 1000 + 2000 from both package zones -} - -type mockZone struct { - name string - index int - path string - energy Energy - maxEnergy Energy -} - -func (m mockZone) Name() string { return m.name } -func (m mockZone) Index() int { return m.index } -func (m mockZone) Path() string { return m.path } -func (m mockZone) Energy() (Energy, error) { return m.energy, nil } -func (m mockZone) MaxEnergy() Energy { return m.maxEnergy } - -type mockSysFSReader struct { - response []EnergyZone - err error -} - -func (m *mockSysFSReader) Zones() ([]EnergyZone, error) { - return m.response, m.err -} - -// TestRAPLPowerMeterFromFixtures tests the realRaplReader with filtering using test fixtures -func TestRAPLPowerMeterFromFixtures(t *testing.T) { - fs := validSysFSFixtures(t) - - raplMeter := raplPowerMeter{ - reader: sysfsRaplReader{fs: fs}, - logger: slog.Default().With("service", "rapl"), - } - allZones, err := raplMeter.Zones() - assert.NoError(t, err) - assert.NotEmpty(t, allZones, "Expected to find RAPL zones in test fixtures") - - mmioZones := 0 - for _, zone := range allZones { - if strings.Contains(zone.Path(), "mmio") { - mmioZones++ - } - } - assert.Equal(t, mmioZones, 0, "all non-standard RAPL zones should be filtered") -} - -// TestStandardRaplPath tests that standard paths are preferred over non-standard ones -func TestStandardRaplPaths(t *testing.T) { - tt := []struct { - path string - isStandard bool - }{ - {"/sys/class/powercap/intel-rapl", false}, - {"/sys/class/powercap/intel-rapl-mmio", false}, - {"/sys/class/powercap/intel-rapl-mmio/intel-rapl-mmio:0", false}, - {"/sys/class/powercap/intel-rapl-mmio:0", false}, - {"/sys/class/powercap/intel-rapl/intel-rapl:0", true}, - {"/sys/class/powercap/intel-rapl:0", true}, - {"/sys/class/powercap/intel-rapl:0:0", true}, - {"/sys/class/powercap/intel-rapl:0:1", true}, - {"/sys/class/powercap/intel-rapl:1", true}, - } - - for _, test := range tt { - assert.Equal(t, test.isStandard, isStandardRaplPath(test.path), test.path) - } -} - -type mockRaplReader struct { - mock.Mock -} - -func (m *mockRaplReader) Zones() ([]EnergyZone, error) { - args := m.Called() - return args.Get(0).([]EnergyZone), args.Error(1) -} - -// TestStandardPathPreference tests that standard paths are preferred over non-standard ones -func TestStandardPathPreference(t *testing.T) { - // Create test zones with both standard and non-standard paths - mmio := &MockRaplZone{ - name: "package", - path: "/sys/class/powercap/intel-rapl-mmio/intel-rapl-mmio:0", - index: 0, - } - stdPkg := &MockRaplZone{ - name: "package", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0", - index: 0, - } - tt := []struct { - zones []EnergyZone - expected EnergyZone - }{ - {[]EnergyZone{stdPkg}, stdPkg}, - {[]EnergyZone{mmio}, mmio}, - {[]EnergyZone{mmio, stdPkg}, stdPkg}, - {[]EnergyZone{stdPkg, mmio}, stdPkg}, - } - - for _, test := range tt { - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return(test.zones, nil) - - rapl, err := NewCPUPowerMeter(validSysFSPath, WithSysFSReader(mockReader)) - assert.NoError(t, err) - - zones, err := rapl.Zones() - assert.NoError(t, err) - - // We should have only one package zone - assert.Equal(t, 1, len(zones), "Should have 1 zone after filtering mmio") - - // The package zone should be the standard path version - pkg := zones[0] - expected := test.expected - - // It should be the standard path version - assert.Equal(t, "package", expected.Name()) - assert.Equal(t, pkg.Path(), expected.Path(), - "Should prefer standard path over non-standard path") - - mockReader.AssertExpectations(t) - } -} - -// TestZoneCaching tests that zones are cached and called only once -func TestZoneCaching(t *testing.T) { - // Create test zones with both standard and non-standard paths - pkg := &MockRaplZone{ - name: "package", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0", - index: 0, - } - core := &MockRaplZone{ - name: "core", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0:0", - index: 1, - } - raplZones := []EnergyZone{pkg, core} - - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return(raplZones, nil).Once() - - rapl, err := NewCPUPowerMeter(validSysFSPath, WithSysFSReader(mockReader)) - assert.NoError(t, err) - - // Get zones multiple times to test that "Zone" is called only once - for range 3 { - zones, err := rapl.Zones() - assert.NoError(t, err) - assert.Equal(t, 2, len(zones), "Should have both zones") - } - - mockReader.AssertExpectations(t) -} - -// TestZoneCaching_Error tests that zones are not cached when there is an error -func TestZoneCaching_Error(t *testing.T) { - mockReader := &mockRaplReader{} - rapl, err := NewCPUPowerMeter(validSysFSPath, WithSysFSReader(mockReader)) - - t.Run("Zone Read Error", func(t *testing.T) { - mockReader.On("Zones").Return([]EnergyZone(nil), errors.New("error")).Once() - assert.NoError(t, err) - zones, err := rapl.Zones() - assert.Error(t, err) - assert.Nil(t, zones) - mockReader.AssertExpectations(t) - }) - - // Create test zones with both standard and non-standard paths - pkg := &MockRaplZone{ - name: "package", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0", - index: 0, - } - core := &MockRaplZone{ - name: "core", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0:0", - index: 1, - } - raplZones := []EnergyZone{pkg, core} - t.Run("Zone Read Succeeds", func(t *testing.T) { - mockReader.On("Zones").Return(raplZones, nil).Once() - for range 3 { - zones, err := rapl.Zones() - assert.NoError(t, err) - assert.Equal(t, 2, len(zones)) - - } - mockReader.AssertExpectations(t) - }) -} - -// TestZone_None tests that zones error when none are found -func TestZone_None(t *testing.T) { - mockReader := &mockRaplReader{} - rapl, err := NewCPUPowerMeter(validSysFSPath, WithSysFSReader(mockReader)) - assert.NoError(t, err) - - mockReader.On("Zones").Return([]EnergyZone(nil), nil).Once() - zones, err := rapl.Zones() - assert.Error(t, err) - assert.Equal(t, 0, len(zones)) - mockReader.AssertExpectations(t) -} - -// TestNewCPUPowerMeter_InvalidPath tests that NewCPUPowerMeter returns an error with an invalid sysfs path -func TestNewCPUPowerMeter_InvalidPath(t *testing.T) { - meter, err := NewCPUPowerMeter("/nonexistent/path") - assert.Error(t, err, "Should return an error with an invalid path") - assert.Nil(t, meter, "Should not return a meter with an invalid path") -} - -// TestCPUPowerMeter_ZonesError tests that the Zones method correctly handles errors from the reader -func TestCPUPowerMeter_ZonesError(t *testing.T) { - mockReader := &mockRaplReader{} - expectedErr := errors.New("error") - mockReader.On("Zones").Return([]EnergyZone{}, expectedErr) - - meter := &raplPowerMeter{reader: mockReader} - zones, err := meter.Zones() - - assert.Error(t, err, "Should return an error when the reader fails") - assert.Equal(t, expectedErr, err, "Should return the error from the reader") - assert.Nil(t, zones, "Should return nil zones when there's an error") - mockReader.AssertExpectations(t) -} - -// TestCPUPowerMeter_NoZones tests that Zones returns an error when no zones are found -func TestCPUPowerMeter_NoZones(t *testing.T) { - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return([]EnergyZone{}, nil) - - meter := &raplPowerMeter{reader: mockReader} - zones, err := meter.Zones() - - assert.Error(t, err, "Should return an error when no zones are found") - assert.Equal(t, "no RAPL zones found", err.Error(), "Should return a specific error message") - assert.Nil(t, zones, "Should return nil zones when no zones are found") - mockReader.AssertExpectations(t) -} - -// TestCPUPowerMeter_InitNoZones tests that Start returns an error when no zones are found -func TestCPUPowerMeter_InitNoZones(t *testing.T) { - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return([]EnergyZone{}, nil) - - meter := &raplPowerMeter{reader: mockReader} - err := meter.Init() - - assert.Error(t, err, "Start() should return an error when no zones are found") - assert.Equal(t, "no RAPL zones found", err.Error(), "Start() should return a specific error message") - mockReader.AssertExpectations(t) -} - -// TestPrimaryEnergyZone tests the PrimaryEnergyZone method -func TestPrimaryEnergyZone(t *testing.T) { - t.Run("Priority hierarchy", func(t *testing.T) { - tests := []struct { - name string - zones []EnergyZone - expected string - }{{ - name: "psys has highest priority", - zones: []EnergyZone{ - mockZone{name: "package", index: 0}, - mockZone{name: "psys", index: 0}, - mockZone{name: "core", index: 0}, - }, - expected: "psys", - }, { - name: "package has priority over core", - zones: []EnergyZone{ - mockZone{name: "core", index: 0}, - mockZone{name: "package", index: 0}, - mockZone{name: "dram", index: 0}, - }, - expected: "package", - }, { - name: "core has priority over dram", - zones: []EnergyZone{ - mockZone{name: "dram", index: 0}, - mockZone{name: "core", index: 0}, - mockZone{name: "uncore", index: 0}, - }, - expected: "core", - }, { - name: "dram has priority over uncore", - zones: []EnergyZone{ - mockZone{name: "uncore", index: 0}, - mockZone{name: "dram", index: 0}, - }, - expected: "dram", - }} - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return(tt.zones, nil) - - meter := &raplPowerMeter{reader: mockReader, logger: slog.Default()} - zone, err := meter.PrimaryEnergyZone() - - assert.NoError(t, err) - assert.Equal(t, tt.expected, zone.Name()) - mockReader.AssertExpectations(t) - }) - } - }) - - t.Run("Case insensitive matching", func(t *testing.T) { - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return([]EnergyZone{ - mockZone{name: "PACKAGE", index: 0}, - mockZone{name: "Core", index: 0}, - }, nil) - - meter := &raplPowerMeter{reader: mockReader, logger: slog.Default()} - zone, err := meter.PrimaryEnergyZone() - - assert.NoError(t, err) - assert.Equal(t, "PACKAGE", zone.Name()) - mockReader.AssertExpectations(t) - }) - - t.Run("Fallback to first zone", func(t *testing.T) { - zones := []EnergyZone{ - mockZone{name: "unknown1", index: 0}, - mockZone{name: "unknown2", index: 1}, - } - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return(zones, nil) - - meter := &raplPowerMeter{reader: mockReader, logger: slog.Default()} - zone, err := meter.PrimaryEnergyZone() - - assert.NoError(t, err) - // NOTE: since reader.Zones() does not guarantee the order after filtering, - // we cannot assert zone.Name() == "unknown1", thus assert the zone returned - // any of the zones passed as input - zoneName := zone.Name() - assert.Contains(t, []string{"unknown1", "unknown2"}, zoneName) - mockReader.AssertExpectations(t) - }) - - t.Run("Caching behavior", func(t *testing.T) { - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return([]EnergyZone{ - mockZone{name: "package", index: 0}, - }, nil).Once() - - meter := &raplPowerMeter{reader: mockReader, logger: slog.Default()} - - // First call should read from zones and cache topZone - zone1, err := meter.PrimaryEnergyZone() - assert.NoError(t, err) - assert.Equal(t, "package", zone1.Name()) - - // Second call should use cached topZone directly - zone2, err := meter.PrimaryEnergyZone() - assert.NoError(t, err) - assert.Equal(t, "package", zone2.Name()) - - mockReader.AssertExpectations(t) - }) - - t.Run("Error handling", func(t *testing.T) { - t.Run("Zones() returns error", func(t *testing.T) { - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return([]EnergyZone{}, errors.New("zones error")) - - meter := &raplPowerMeter{reader: mockReader, logger: slog.Default()} - zone, err := meter.PrimaryEnergyZone() - - assert.Error(t, err) - assert.Nil(t, zone) - assert.Contains(t, err.Error(), "zones error") - mockReader.AssertExpectations(t) - }) - - t.Run("Empty zones list", func(t *testing.T) { - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return([]EnergyZone{}, nil) - - meter := &raplPowerMeter{reader: mockReader, logger: slog.Default()} - zone, err := meter.PrimaryEnergyZone() - - assert.Error(t, err) - assert.Nil(t, zone) - assert.Contains(t, err.Error(), "no RAPL zones found") - mockReader.AssertExpectations(t) - }) - }) -} diff --git a/internal/device/rapl_zone_filtering_test.go b/internal/device/rapl_zone_filtering_test.go deleted file mode 100644 index 773f08d6ee..0000000000 --- a/internal/device/rapl_zone_filtering_test.go +++ /dev/null @@ -1,257 +0,0 @@ -// SPDX-FileCopyrightText: 2025 The Kepler Authors -// SPDX-License-Identifier: Apache-2.0 - -package device - -import ( - "log/slog" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestRaplZoneFiltering(t *testing.T) { - // Create mock zones for testing - packageZone := &MockRaplZone{ - name: "package", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0", - index: 0, - } - coreZone := &MockRaplZone{ - name: "core", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0:0", - index: 1, - } - dramZone := &MockRaplZone{ - name: "dram", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0:2", - index: 2, - } - uncoreZone := &MockRaplZone{ - name: "uncore", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0:3", - index: 3, - } - - allZones := []EnergyZone{packageZone, coreZone, dramZone, uncoreZone} - - tests := []struct { - name string - filterZones []string - expectedZones []string - }{ - { - name: "No filter - all zones included", - filterZones: []string{}, - expectedZones: []string{"package", "core", "dram", "uncore"}, - }, - { - name: "Filter single zone", - filterZones: []string{"core"}, - expectedZones: []string{"core"}, - }, - { - name: "Filter multiple zones", - filterZones: []string{"package", "dram"}, - expectedZones: []string{"package", "dram"}, - }, - { - name: "Case-insensitive filtering", - filterZones: []string{"PACKAGE", "Core"}, - expectedZones: []string{"package", "core"}, - }, - { - name: "Non-existent zone in filter", - filterZones: []string{"package", "nonexistent"}, - expectedZones: []string{"package"}, - }, - } - - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return(allZones, nil) - - logger := slog.Default().With("test", "zone-filtering") - meter := &raplPowerMeter{ - reader: mockReader, - logger: logger, - zoneFilter: tc.filterZones, - } - - // Filter zones directly to test the filtering logic - filteredZones := meter.filterZones(allZones) - - // Verify only expected zones are included - assert.Equal(t, len(tc.expectedZones), len(filteredZones), - "Filtered zones length mismatch") - - // Create a map of zone names for easy checking - zoneNames := make(map[string]bool) - for _, zone := range filteredZones { - zoneNames[zone.Name()] = true - } - - // Verify each expected zone is present - for _, name := range tc.expectedZones { - assert.True(t, zoneNames[name], - "Expected zone %s not found in filtered zones", name) - } - }) - } -} - -// Test that zone filtering applies during Init -func TestRaplZoneFiltering_Init(t *testing.T) { - packageZone := &MockRaplZone{ - name: "package", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0", - index: 0, - maxMicroJoules: 1000000, - energy: 100000, - } - coreZone := &MockRaplZone{ - name: "core", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0:0", - index: 1, - maxMicroJoules: 1000000, - energy: 50000, - } - - allZones := []EnergyZone{packageZone, coreZone} - - t.Run("Init succeeds with valid filter", func(t *testing.T) { - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return(allZones, nil) - - meter := &raplPowerMeter{ - reader: mockReader, - logger: slog.Default(), - zoneFilter: []string{"package"}, - } - - err := meter.Init() - assert.NoError(t, err) - }) - - t.Run("Init does not fails with unknown zones", func(t *testing.T) { - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return(allZones, nil) - - meter := &raplPowerMeter{ - reader: mockReader, - logger: slog.Default(), - zoneFilter: []string{"nonexistent"}, - } - - err := meter.Init() - assert.NoError(t, err) - }) -} - -// Test that Zones() properly applies the filter -func TestRaplZoneFiltering_Zones(t *testing.T) { - packageZone := &MockRaplZone{ - name: "package", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0", - index: 0, - maxMicroJoules: 1000000, - energy: 100000, - } - coreZone := &MockRaplZone{ - name: "core", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0:0", - index: 1, - maxMicroJoules: 1000000, - energy: 50000, - } - - allZones := []EnergyZone{packageZone, coreZone} - - tests := []struct { - name string - filter []string - expectedZones int - expectError bool - }{ - { - name: "No filter", - filter: []string{}, - expectedZones: 2, - expectError: false, - }, { - name: "Filter package", - filter: []string{"package"}, - expectedZones: 1, - expectError: false, - }, { - name: "Filter core", - filter: []string{"core"}, - expectedZones: 1, - expectError: false, - }, { - name: "nonexistent zone", - filter: []string{"nonexistent"}, - expectedZones: 0, - expectError: true, - }, - } - - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return(allZones, nil) - - meter := &raplPowerMeter{ - reader: mockReader, - logger: slog.Default(), - zoneFilter: tc.filter, - } - - zones, err := meter.Zones() - - if tc.expectError { - assert.Error(t, err) - assert.Nil(t, zones) - } else { - assert.NoError(t, err) - assert.Equal(t, tc.expectedZones, len(zones)) - } - }) - } -} - -// Test integration with the configuration options -func TestRaplZoneFiltering_WithOptions(t *testing.T) { - // Mock sysfs reader for this test - mockReader := &mockRaplReader{} - packageZone := &MockRaplZone{ - name: "package", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0", - index: 0, - maxMicroJoules: 1000000, - energy: 100000, - } - coreZone := &MockRaplZone{ - name: "core", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0:0", - index: 1, - maxMicroJoules: 1000000, - energy: 50000, - } - mockReader.On("Zones").Return([]EnergyZone{packageZone, coreZone}, nil) - - // Create meter with WithZoneFilter option - meter, err := NewCPUPowerMeter( - validSysFSPath, - WithSysFSReader(mockReader), - WithZoneFilter([]string{"core"}), - ) - assert.NoError(t, err) - - // Check that filtering was applied - zones, err := meter.Zones() - assert.NoError(t, err) - assert.Equal(t, 1, len(zones)) - assert.Equal(t, "core", zones[0].Name()) -} diff --git a/manifests/helm/kepler/values.yaml b/manifests/helm/kepler/values.yaml index d4d9ec1869..5f1892abff 100644 --- a/manifests/helm/kepler/values.yaml +++ b/manifests/helm/kepler/values.yaml @@ -75,6 +75,10 @@ config: minTerminatedEnergyThreshold: 10 rapl: zones: [] + msr: + enabled: false + force: false + devicePath: /dev/cpu/%d/msr exporter: stdout: enabled: false diff --git a/manifests/k8s/configmap.yaml b/manifests/k8s/configmap.yaml index 700ec58cdb..83e854d628 100644 --- a/manifests/k8s/configmap.yaml +++ b/manifests/k8s/configmap.yaml @@ -21,6 +21,10 @@ data: minTerminatedEnergyThreshold: 10 rapl: zones: [] + msr: + enabled: false + force: false + devicePath: "/dev/cpu/%d/msr" exporter: stdout: enabled: false