diff --git a/cmd/kepler/main.go b/cmd/kepler/main.go
index 6f28961cac..89abb9ae31 100644
--- a/cmd/kepler/main.go
+++ b/cmd/kepler/main.go
@@ -233,9 +233,17 @@ func createCPUMeter(logger *slog.Logger, cfg *config.Config) (device.CPUPowerMet
logger.Info("rapl zones are filtered", "zones-enabled", cfg.Rapl.Zones)
}
+ // Convert config MSR settings to device MSRConfig
+ msrConfig := device.MSRConfig{
+ Enabled: cfg.MSR.Enabled,
+ Force: cfg.MSR.Force,
+ DevicePath: cfg.MSR.DevicePath,
+ }
+
return device.NewCPUPowerMeter(
cfg.Host.SysFS,
device.WithRaplLogger(logger),
device.WithZoneFilter(cfg.Rapl.Zones),
+ device.WithMSRConfig(msrConfig),
)
}
diff --git a/compose/dev/kepler-dev/etc/kepler/config.yaml b/compose/dev/kepler-dev/etc/kepler/config.yaml
index 41048a5509..3f8bd7004d 100644
--- a/compose/dev/kepler-dev/etc/kepler/config.yaml
+++ b/compose/dev/kepler-dev/etc/kepler/config.yaml
@@ -35,6 +35,11 @@ host:
rapl:
zones: [] # zones to be enabled, empty enables all default zones
+msr: # MSR fallback configuration for RAPL energy reading
+ enabled: false # enable automatic MSR fallback when powercap unavailable (default: false)
+ force: false # force MSR usage even if powercap available (testing only, default: false)
+ devicePath: /dev/cpu/%d/msr # MSR device path template (default: "/dev/cpu/%d/msr")
+
exporter:
stdout: # stdout exporter related config
enabled: false # disabled by default
diff --git a/config/config.go b/config/config.go
index c91f495cfb..e40c716578 100644
--- a/config/config.go
+++ b/config/config.go
@@ -33,6 +33,16 @@ type (
Zones []string `yaml:"zones"`
}
+ // MSR configuration for fallback power reading
+ MSR struct {
+ // Enable automatic MSR fallback when powercap unavailable
+ Enabled *bool `yaml:"enabled"`
+ // Force MSR usage even if powercap available (testing)
+ Force *bool `yaml:"force"`
+ // MSR device path template
+ DevicePath string `yaml:"devicePath"`
+ }
+
// Development mode settings; disabled by default
Dev struct {
FakeCpuMeter struct {
@@ -98,6 +108,7 @@ type (
Host Host `yaml:"host"`
Monitor Monitor `yaml:"monitor"`
Rapl Rapl `yaml:"rapl"`
+ MSR MSR `yaml:"msr"`
Exporter Exporter `yaml:"exporter"`
Web Web `yaml:"web"`
Debug Debug `yaml:"debug"`
@@ -168,6 +179,12 @@ const (
// RAPL
RaplZones = "rapl.zones" // not a flag
+ // MSR - NOTE: MSR settings are not exposed as CLI flags per proposal
+ // They should only be configured via YAML files due to security implications
+ MSREnabled = "msr.enabled" // not a flag
+ MSRForce = "msr.force" // not a flag
+ MSRDevicePath = "msr.devicePath" // not a flag
+
pprofEnabledFlag = "debug.pprof"
WebConfigFlag = "web.config-file"
@@ -203,6 +220,11 @@ func DefaultConfig() *Config {
Rapl: Rapl{
Zones: []string{},
},
+ MSR: MSR{
+ Enabled: ptr.To(false), // Opt-in for security
+ Force: ptr.To(false),
+ DevicePath: "/dev/cpu/%d/msr",
+ },
Monitor: Monitor{
Interval: 5 * time.Second,
Staleness: 500 * time.Millisecond,
@@ -408,6 +430,9 @@ func (c *Config) sanitize() {
c.Rapl.Zones[i] = strings.TrimSpace(c.Rapl.Zones[i])
}
+ // MSR settings sanitization
+ c.MSR.DevicePath = strings.TrimSpace(c.MSR.DevicePath)
+
for i := range c.Exporter.Prometheus.DebugCollectors {
c.Exporter.Prometheus.DebugCollectors[i] = strings.TrimSpace(c.Exporter.Prometheus.DebugCollectors[i])
}
@@ -488,6 +513,16 @@ func (c *Config) Validate(skips ...SkipValidation) error {
errs = append(errs, fmt.Sprintf("invalid monitor min terminated energy threshold: %d can't be negative", c.Monitor.MinTerminatedEnergyThreshold))
}
}
+ { // MSR settings
+ if c.MSR.DevicePath == "" {
+ errs = append(errs, "MSR device path cannot be empty")
+ } else {
+ // Basic validation that device path is a template
+ if !strings.Contains(c.MSR.DevicePath, "%d") {
+ errs = append(errs, "MSR device path must contain '%d' placeholder for CPU ID")
+ }
+ }
+ }
{ // Kubernetes
if ptr.Deref(c.Kube.Enabled, false) {
if c.Kube.Config != "" {
diff --git a/docs/developer/proposal/EP-002-MSR-Fallback-Power-Meter.md b/docs/developer/proposal/EP-002-MSR-Fallback-Power-Meter.md
new file mode 100644
index 0000000000..c23a581a13
--- /dev/null
+++ b/docs/developer/proposal/EP-002-MSR-Fallback-Power-Meter.md
@@ -0,0 +1,423 @@
+# EP-002: MSR Fallback for CPU Power Meter
+
+**Status**: Draft
+**Author**: Kepler Development Team
+**Created**: 2025-01-12
+**Last Updated**: 2025-01-12
+
+## Summary
+
+This proposal introduces MSR (Model Specific Register) support as a fallback
+mechanism for CPU power monitoring in Kepler when the primary powercap sysfs
+interface (/sys/class/powercap/intel-rapl) is unavailable. The enhancement
+maintains backward compatibility while improving Kepler's resilience in
+environments where powercap is disabled or inaccessible, such as certain
+container runtimes or kernel configurations.
+
+## Problem Statement
+
+Kepler currently relies exclusively on the Linux powercap sysfs interface for
+reading Intel RAPL energy counters. This creates deployment limitations in
+environments where:
+
+1. The powercap interface is disabled by kernel configuration
+2. Container runtimes don't expose /sys/class/powercap
+3. Systems with custom kernel builds lacking powercap support
+4. Virtualized environments without powercap passthrough
+
+These limitations prevent Kepler from collecting energy metrics in otherwise
+capable hardware, reducing its effectiveness as a universal energy monitoring
+solution.
+
+### Current Limitations
+
+1. **Single Interface Dependency**: Complete reliance on powercap sysfs with no
+ fallback mechanism
+2. **Deployment Restrictions**: Cannot deploy in environments without powercap
+ access
+3. **Container Limitations**: Difficulty mounting /sys/class/powercap in certain
+ container security policies
+4. **No Graceful Degradation**: Complete failure rather than fallback when
+ powercap unavailable
+
+## Goals
+
+- **Primary Goal**: Implement MSR-based RAPL reading as automatic fallback when
+ powercap is unavailable
+- **Secondary Goal**: Maintain existing CPUPowerMeter interface compatibility
+- **Tertiary Goal**: Provide configurable control over fallback behavior for
+ security-conscious deployments
+
+## Non-Goals
+
+- Supporting non-Intel architectures (AMD RAPL, ARM PMU)
+- Replacing powercap as the primary interface
+- Implementing model-specific optimizations
+- Supporting pre-Sandy Bridge Intel CPUs
+- Real-time power capping functionality
+
+## Requirements
+
+### Functional Requirements
+
+- Automatically detect powercap availability and fallback to MSR when needed
+- Read energy values from MSR registers: PKG (0x611), PP0 (0x639), DRAM (0x619)
+- Handle 32-bit MSR counter overflow correctly
+- Map MSR zones to existing EnergyZone interface (package, core, dram)
+- Support multi-socket systems with per-CPU MSR access
+- Maintain energy unit conversion compatibility
+
+### Non-Functional Requirements
+
+- **Performance**: MSR reading overhead < 100μs per sample
+- **Reliability**: Handle MSR module loading/unloading gracefully
+- **Security**: Document and mitigate PLATYPUS attack vectors
+- **Maintainability**: Minimal code duplication with existing RAPL implementation
+- **Testability**: Support fake MSR implementation for testing
+
+## Proposed Solution
+
+### High-Level Architecture
+
+```mermaid
+graph TB
+ CPUPowerMeter[CPUPowerMeter Interface]
+
+ raplPowerMeter[raplPowerMeter
Enhanced with MSR]
+
+ powercapReader[powercapReader
Primary]
+ msrReader[msrReader
Fallback]
+ zoneAdapter[Zone
Adapter]
+
+ powercap[/sys/class/
powercap/]
+ msrdev["/dev/cpu/*/msr"]
+ zones[Energy
Zones]
+
+ CPUPowerMeter --> raplPowerMeter
+ raplPowerMeter --> powercapReader
+ raplPowerMeter --> msrReader
+ raplPowerMeter --> zoneAdapter
+
+ powercapReader --> powercap
+ msrReader --> msrdev
+ zoneAdapter --> zones
+
+ style CPUPowerMeter fill:#e1f5fe
+ style raplPowerMeter fill:#b3e5fc
+ style powercapReader fill:#81d4fa
+ style msrReader fill:#ffccbc
+ style zoneAdapter fill:#c5e1a5
+```
+
+### Key Design Choices
+
+1. **Extend Existing Implementation**: Enhance `raplPowerMeter` rather than
+ creating separate implementation to maximize code reuse
+2. **Interface-Based Abstraction**: Create `powerReader` interface for both
+ powercap and MSR backends
+3. **Automatic Detection**: Check powercap availability in Init() and select
+ appropriate backend
+4. **Opt-In MSR Fallback**: Require explicit configuration to enable MSR due to
+ security implications
+5. **Reuse Energy Zone Logic**: Share zone management between implementations
+
+## Detailed Design
+
+### Package Structure
+
+```text
+internal/
+├── device/
+│ ├── cpu_power_meter.go # Interface (unchanged)
+│ ├── rapl_power_meter.go # Enhanced with MSR support
+│ ├── power_reader.go # New abstraction interface
+│ ├── powercap_reader.go # Extracted powercap logic
+│ ├── msr_reader.go # New MSR implementation
+│ ├── msr_reader_test.go # MSR unit tests
+│ └── testdata/
+│ └── msr/ # MSR test fixtures
+└── config/
+ └── config.go # MSR configuration options
+```
+
+### API/Interface Changes
+
+```go
+// power_reader.go - New internal abstraction
+type powerReader interface {
+ // ReadEnergy reads energy value for a specific zone
+ ReadEnergy(zone EnergyZone) (Energy, error)
+ // Available checks if the reader can be used
+ Available() bool
+ // Init initializes the reader
+ Init() error
+ // Close releases resources
+ Close() error
+}
+
+// msr_reader.go - MSR implementation
+type msrReader struct {
+ msrFiles map[int]*os.File // CPU ID -> MSR file handle
+ energyUnit float64 // Energy unit from IA32_RAPL_POWER_UNIT
+ zones []EnergyZone // Available zones
+ mu sync.RWMutex // Thread safety
+}
+
+// Enhanced rapl_power_meter.go
+type raplPowerMeter struct {
+ reader powerReader // Abstracted backend
+ zones []EnergyZone
+ strategy EnergyStrategy
+ useMSR bool // Track which backend is active
+}
+```
+
+## Configuration
+
+### Main Configuration Changes
+
+```go
+// config/config.go additions
+type DeviceConfig struct {
+ // Existing fields...
+ MSR MSRConfig `yaml:"msr"`
+}
+
+type MSRConfig struct {
+ // Enable automatic MSR fallback when powercap unavailable
+ Enabled *bool `yaml:"enabled"`
+ // Force MSR usage even if powercap available (testing)
+ Force *bool `yaml:"force"`
+ // MSR device path template
+ DevicePath string `yaml:"devicePath"`
+}
+
+// Default configuration
+func defaultMSRConfig() MSRConfig {
+ return MSRConfig{
+ Enabled: ptr(false), // Opt-in for security
+ Force: ptr(false),
+ DevicePath: "/dev/cpu/%d/msr",
+ }
+}
+```
+
+### New Configuration File (if applicable)
+
+```yaml
+# Example: hack/config.yaml
+msr:
+ enabled: true
+ force: false
+ devicePath: "/dev/cpu/%d/msr"
+```
+
+### Security Considerations
+
+**Critical Security Warning**: MSR access enables PLATYPUS side-channel attacks
+(CVE-2020-8694/8695) allowing unprivileged users to infer data from other
+processes.
+
+**Mitigations**:
+
+1. MSR fallback disabled by default (opt-in configuration)
+2. Require CAP_SYS_RAWIO capability instead of full root
+3. Document security implications prominently
+4. Add warning logs when MSR fallback is activated
+5. Consider rate-limiting MSR reads to reduce attack surface
+
+## Deployment Examples
+
+### Kubernetes Environment
+
+```yaml
+# DaemonSet with MSR support
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+ name: kepler
+spec:
+ template:
+ spec:
+ containers:
+ - name: kepler
+ image: kepler:msr-enabled
+ args:
+ - --config=/etc/kepler/config.yaml
+ volumeMounts:
+ - name: dev
+ mountPath: /dev
+ readOnly: true
+ - name: config
+ mountPath: /etc/kepler
+ securityContext:
+ privileged: true # Required for MSR access
+ volumes:
+ - name: dev
+ hostPath:
+ path: /dev
+ - name: config
+ configMap:
+ name: kepler-config
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: kepler-config
+data:
+ config.yaml: |
+ msr:
+ enabled: true
+```
+
+### Standalone Deployment
+
+```bash
+# Load MSR kernel module
+sudo modprobe msr
+
+# Run with MSR fallback enabled
+sudo ./bin/kepler --config hack/config.yaml
+
+# Force MSR for testing (ignores powercap) - configured via YAML only
+# Set msr.force: true in config file
+```
+
+## Testing Strategy
+
+### Test Coverage
+
+- **Unit Tests**: MSR reader with mock file operations (85% coverage target)
+- **Integration Tests**: Fallback detection and switching logic
+- **End-to-End Tests**: Energy attribution with MSR backend
+- **Benchmark Tests**: MSR vs sysfs performance comparison
+
+### Test Infrastructure
+
+```go
+// Fake MSR implementation for testing
+type fakeMSRReader struct {
+ energyValues map[EnergyZone]Energy
+ available bool
+}
+
+// Test fixtures in testdata/msr/
+// - Mock MSR device files
+// - Predefined energy values
+// - Overflow scenarios
+```
+
+## Migration and Compatibility
+
+### Backward Compatibility
+
+This enhancement maintains full backward compatibility:
+
+- Existing deployments continue using powercap by default
+- No changes to external APIs or metrics
+- Configuration changes are additive only
+- Existing tests remain valid
+
+### Migration Path
+
+1. **Phase 1**: Deploy with MSR disabled (default) - verify no regression
+2. **Phase 2**: Enable MSR fallback in staging environments
+3. **Phase 3**: Gradual rollout to production with monitoring
+
+### Rollback Strategy
+
+1. Disable MSR fallback via configuration (immediate effect)
+2. Revert to previous Kepler version if issues persist
+3. MSR can be disabled without restart via dynamic config reload
+
+## Metrics Output
+
+```prometheus
+# New metric indicating active power meter backend
+kepler_power_meter_backend{backend="powercap|msr"} 1
+
+# Existing metrics unchanged
+kepler_node_package_energy_millijoule{node="node1"} 12345
+kepler_node_core_energy_millijoule{node="node1"} 6789
+kepler_node_dram_energy_millijoule{node="node1"} 3456
+```
+
+## Implementation Plan
+
+### Phase 1: Foundation (Week 1-2)
+
+- Implement `powerReader` interface abstraction
+- Extract existing powercap logic to `powercapReader`
+- Create basic `msrReader` structure
+- Add MSR configuration options
+
+### Phase 2: Core Functionality (Week 3-4)
+
+- Implement MSR register reading logic
+- Add energy unit conversion
+- Handle counter overflow
+- Implement zone mapping
+- Add fallback detection logic
+
+### Phase 3: Testing and Documentation (Week 5-6)
+
+- Comprehensive unit tests
+- Integration testing
+- Performance benchmarking
+- Security documentation
+- Update deployment guides
+
+## Risks and Mitigations
+
+### Technical Risks
+
+- **Risk**: MSR kernel module not available
+ - **Mitigation**: Graceful degradation with clear error messages
+
+- **Risk**: MSR counter overflow handling errors
+ - **Mitigation**: Extensive testing with overflow scenarios
+
+- **Risk**: Performance regression from abstraction
+ - **Mitigation**: Benchmark and optimize hot paths
+
+### Operational Risks
+
+- **Risk**: Security vulnerabilities from MSR access
+ - **Mitigation**: Disabled by default, clear documentation
+
+- **Risk**: Increased complexity for operators
+ - **Mitigation**: Automatic detection minimizes configuration
+
+## Alternatives Considered
+
+### Alternative 1: Separate MSR Power Meter Implementation
+
+- **Description**: Create independent `msrPowerMeter` implementing
+ `CPUPowerMeter`
+- **Reason for Rejection**: High code duplication, difficult to maintain
+ consistency
+
+### Alternative 2: eBPF-based Power Monitoring
+
+- **Description**: Use eBPF to intercept RAPL MSR reads from other processes
+- **Reason for Rejection**: Complex implementation, limited kernel support,
+ security concerns
+
+### Alternative 3: Intel Power Gadget Integration
+
+- **Description**: Use Intel's official Power Gadget API
+- **Reason for Rejection**: Not available on Linux, requires proprietary
+ libraries
+
+## Success Metrics
+
+- **Functional Metric**: Successfully read power through MSR in environments where
+ powercap is unavailable
+- **Performance Metric**: MSR reading overhead < 100μs (50% faster than powercap)
+- **Adoption Metric**: 20% of deployments utilize MSR fallback within 6 months
+
+## Open Questions
+
+1. Should we support AMD's RAPL MSRs in the same implementation?
+2. Should MSR support be compiled conditionally for security-sensitive builds?
+3. What telemetry should we add to track MSR fallback usage in production?
diff --git a/docs/developer/proposal/index.md b/docs/developer/proposal/index.md
index 861f7843be..7edc595a88 100644
--- a/docs/developer/proposal/index.md
+++ b/docs/developer/proposal/index.md
@@ -6,7 +6,8 @@ This directory contains Enhancement Proposals (EPs) for major features and chang
| ID | Title | Status | Author | Created |
|----|-------|--------|--------|---------|
-| [EP-000](EP_TEMPLATE.md) | Enhancement Proposal Template | Accepted |Sunil Thaha | 2025-01-18 |
+| [EP-000](EP_TEMPLATE.md) | Enhancement Proposal Template | Accepted | Sunil Thaha | 2025-01-18 |
+| [EP-002](EP-002-MSR-Fallback-Power-Meter.md) | MSR Fallback for CPU Power Meter | Draft | Kepler Development Team | 2025-08-12 |
## Proposal Status
diff --git a/docs/user/configuration.md b/docs/user/configuration.md
index 1c803e33f0..eaab98a0b0 100644
--- a/docs/user/configuration.md
+++ b/docs/user/configuration.md
@@ -95,6 +95,11 @@ host:
rapl:
zones: [] # RAPL zones to be enabled, empty enables all default zones
+msr: # MSR fallback configuration for RAPL energy reading
+ enabled: false # Enable automatic MSR fallback when powercap unavailable
+ force: false # Force MSR usage even if powercap available (testing)
+ devicePath: "/dev/cpu/%d/msr" # MSR device path template
+
exporter:
stdout: # stdout exporter related config
enabled: false # disabled by default
@@ -195,6 +200,41 @@ rapl:
zones: ["package", "core", "uncore"]
```
+### 🔌 MSR Configuration
+
+```yaml
+msr:
+ enabled: false # Enable automatic MSR fallback
+ force: false # Force MSR usage for testing
+ devicePath: "/dev/cpu/%d/msr" # MSR device path template
+```
+
+Model Specific Register (MSR) support provides a fallback mechanism for reading Intel RAPL energy counters when the Linux powercap sysfs interface is unavailable.
+
+- **enabled**: Enable automatic MSR fallback when powercap is unavailable
+ - Default: `false` (opt-in for security reasons)
+ - When enabled, Kepler will automatically fall back to MSR if powercap fails
+ - Requires appropriate permissions and hardware support
+
+- **force**: Force MSR usage even when powercap is available
+ - Default: `false`
+ - Primarily for testing and development purposes
+ - When `true`, MSR will be used regardless of powercap availability
+
+- **devicePath**: Template for MSR device file paths
+ - Default: `"/dev/cpu/%d/msr"`
+ - The `%d` placeholder is replaced with the CPU number
+ - Must be accessible with appropriate permissions
+
+⚠️ **Security Note**: MSR access requires elevated privileges and may be restricted on some systems due to security considerations (PLATYPUS attacks, CVE-2020-8694/8695). Use MSR configuration only when necessary and ensure proper system security measures are in place.
+
+**Prerequisites for MSR support:**
+
+- Intel CPU with RAPL support
+- `msr` kernel module loaded (`modprobe msr`)
+- Read access to `/dev/cpu/*/msr` files
+- Elevated privileges (typically root)
+
### 📦 Exporter Configuration
```yaml
diff --git a/hack/config.yaml b/hack/config.yaml
index 8beb0587a1..978a8d171c 100644
--- a/hack/config.yaml
+++ b/hack/config.yaml
@@ -35,6 +35,11 @@ host:
rapl:
zones: [] # zones to be enabled, empty enables all default zones
+msr: # MSR fallback configuration for RAPL energy reading
+ enabled: false # enable automatic MSR fallback when powercap unavailable (default: false)
+ force: false # force MSR usage even if powercap available (testing only, default: false)
+ devicePath: /dev/cpu/%d/msr # MSR device path template (default: "/dev/cpu/%d/msr")
+
exporter:
stdout: # stdout exporter related config
enabled: false # disabled by default
diff --git a/internal/device/mock_cpu_power_meter.go b/internal/device/mock_cpu_power_meter.go
index 7c3e8717ce..4111f8e70e 100644
--- a/internal/device/mock_cpu_power_meter.go
+++ b/internal/device/mock_cpu_power_meter.go
@@ -5,14 +5,6 @@ package device
// TODO: Move this mock to a separate testutil package
-import (
- "slices"
- "testing"
-
- "github.com/prometheus/procfs/sysfs"
- "github.com/stretchr/testify/require"
-)
-
const (
validSysFSPath = "testdata/sys"
badSysFSPath = "testdata/bad_sysfs"
@@ -67,27 +59,3 @@ func (m *MockRaplZone) OnEnergy(j Energy, err error) {
func (m *MockRaplZone) Inc(delta Energy) {
m.energy = (m.energy + delta) % m.maxMicroJoules
}
-
-func validSysFSFixtures(t *testing.T) sysfs.FS {
- t.Helper()
- fs, err := sysfs.NewFS(validSysFSPath)
- require.NoError(t, err, "Failed to create sysfs test FS")
- return fs
-}
-
-func invalidSysFSFixtures(t *testing.T) sysfs.FS {
- t.Helper()
- fs, err := sysfs.NewFS(badSysFSPath)
- require.NoError(t, err, "Failed to create sysfs test FS")
- return fs
-}
-
-func sortedZoneNames(zones []EnergyZone) []string {
- names := make([]string, len(zones))
- for i, zone := range zones {
- names[i] = zone.Name()
- }
- slices.Sort(names)
-
- return names
-}
diff --git a/internal/device/msr_reader.go b/internal/device/msr_reader.go
new file mode 100644
index 0000000000..d284ab5fa4
--- /dev/null
+++ b/internal/device/msr_reader.go
@@ -0,0 +1,290 @@
+// SPDX-FileCopyrightText: 2025 The Kepler Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package device
+
+import (
+ "fmt"
+ "log/slog"
+ "os"
+ "path/filepath"
+ "sort"
+ "strconv"
+ "sync"
+)
+
+// msrReader implements raplReader using Intel MSR (Model Specific Register) interface
+type msrReader struct {
+ msrFiles map[int]*os.File // CPU ID -> MSR file handle
+ zones []EnergyZone // Available energy zones
+ energyUnit float64 // Energy unit in microjoules per LSB
+ devicePath string // MSR device path template
+ logger *slog.Logger
+ mu sync.RWMutex // Thread safety for zone operations
+}
+
+// MSR zone configuration mapping zone names to MSR offsets
+var msrZoneConfig = map[string]uint32{
+ ZonePackage: MSRPkgEnergyStatus,
+ ZonePP0: MSRPP0EnergyStatus, // Maps to "core" zone
+ ZoneDRAM: MSRDRAMEnergyStatus,
+}
+
+// zoneNameMapping maps MSR zone names to standard RAPL zone names
+var zoneNameMapping = map[string]string{
+ ZonePP0: ZoneCore, // PP0 (Power Plane 0) is the core domain
+}
+
+// NewMSRReader creates a new MSR reader using the specified device path template
+func NewMSRReader(devicePath string, logger *slog.Logger) *msrReader {
+ if logger == nil {
+ logger = slog.Default()
+ }
+
+ return &msrReader{
+ msrFiles: make(map[int]*os.File),
+ devicePath: devicePath,
+ logger: logger.With("service", "msr-reader"),
+ }
+}
+
+// Name returns the name of this power reader implementation
+func (m *msrReader) Name() string {
+ return "msr"
+}
+
+// Available checks if MSR interface is available on this system
+func (m *msrReader) Available() bool {
+ // Derive CPU directory from devicePath (e.g., "/dev/cpu/%d/msr" -> "/dev/cpu")
+ cpuDir := filepath.Dir(filepath.Dir(m.devicePath))
+
+ // Check if CPU directory exists
+ if _, err := os.Stat(cpuDir); os.IsNotExist(err) {
+ m.logger.Debug("MSR not available: CPU directory does not exist", "dir", cpuDir)
+ return false
+ }
+
+ // Check if we can find at least one CPU with MSR access
+ // This validates that MSR interface is not just present but usable
+ cpuIDs, err := m.findAvailableCPUs()
+ if err != nil {
+ m.logger.Debug("MSR not available: failed to scan for CPUs", "error", err)
+ return false
+ }
+
+ if len(cpuIDs) == 0 {
+ m.logger.Debug("MSR not available: no CPUs with MSR access found")
+ return false
+ }
+
+ return true
+}
+
+// Init initializes the MSR reader and opens MSR files for all available CPUs
+func (m *msrReader) Init() error {
+ if !m.Available() {
+ return fmt.Errorf("MSR interface not available")
+ }
+
+ // Find available CPUs
+ cpuIDs, err := m.findAvailableCPUs()
+ if err != nil {
+ return fmt.Errorf("failed to find available CPUs: %w", err)
+ }
+
+ if len(cpuIDs) == 0 {
+ return fmt.Errorf("no CPUs with MSR access found")
+ }
+
+ // Open MSR files for all CPUs
+ for _, cpuID := range cpuIDs {
+ msrPath := fmt.Sprintf(m.devicePath, cpuID)
+ file, err := os.OpenFile(msrPath, os.O_RDONLY, 0)
+ if err != nil {
+ // Clean up any previously opened files
+ if closeErr := m.Close(); closeErr != nil {
+ m.logger.Warn("Failed to close MSR files", "error", closeErr)
+ }
+ return fmt.Errorf("failed to open MSR file %s: %w", msrPath, err)
+ }
+ m.msrFiles[cpuID] = file
+ }
+
+ // Read energy unit from the first CPU
+ firstCPU := cpuIDs[0]
+ energyUnit, err := readEnergyUnit(m.msrFiles[firstCPU])
+ if err != nil {
+ if closeErr := m.Close(); closeErr != nil {
+ m.logger.Warn("Failed to close MSR files", "error", closeErr)
+ }
+ return fmt.Errorf("failed to read energy unit from CPU %d: %w", firstCPU, err)
+ }
+ m.energyUnit = energyUnit
+
+ // Create zones for all available MSR energy counters
+ if err := m.createZones(); err != nil {
+ if closeErr := m.Close(); closeErr != nil {
+ m.logger.Warn("Failed to close MSR files", "error", closeErr)
+ }
+ return fmt.Errorf("failed to create MSR zones: %w", err)
+ }
+
+ m.logger.Info("MSR reader initialized",
+ "cpus", len(m.msrFiles),
+ "zones", len(m.zones),
+ "energy_unit_uj", m.energyUnit)
+
+ return nil
+}
+
+// Zones returns the list of MSR-based energy zones
+func (m *msrReader) Zones() ([]EnergyZone, error) {
+ m.mu.RLock()
+ defer m.mu.RUnlock()
+
+ if len(m.zones) == 0 {
+ return nil, fmt.Errorf("MSR reader not initialized or no zones available")
+ }
+
+ // Return a copy to prevent external modification
+ zones := make([]EnergyZone, len(m.zones))
+ copy(zones, m.zones)
+ return zones, nil
+}
+
+// Close closes all MSR files and releases resources
+func (m *msrReader) Close() error {
+ var lastErr error
+
+ for cpuID, file := range m.msrFiles {
+ if err := file.Close(); err != nil {
+ lastErr = err
+ m.logger.Warn("Failed to close MSR file", "cpu", cpuID, "error", err)
+ }
+ }
+
+ // Clear the map
+ m.msrFiles = make(map[int]*os.File)
+ m.zones = nil
+
+ return lastErr
+}
+
+// findAvailableCPUs finds all CPUs that have MSR device files
+func (m *msrReader) findAvailableCPUs() ([]int, error) {
+ // Derive CPU directory from devicePath (e.g., "/dev/cpu/%d/msr" -> "/dev/cpu")
+ cpuDir := filepath.Dir(filepath.Dir(m.devicePath))
+ entries, err := os.ReadDir(cpuDir)
+ if err != nil {
+ return nil, fmt.Errorf("failed to read CPU directory %s: %w", cpuDir, err)
+ }
+
+ var cpuIDs []int
+ for _, entry := range entries {
+ if !entry.IsDir() {
+ continue
+ }
+
+ // Parse CPU ID from directory name
+ cpuID, err := strconv.Atoi(entry.Name())
+ if err != nil {
+ continue // Skip non-numeric directories
+ }
+
+ // Check if MSR file exists for this CPU
+ msrPath := fmt.Sprintf(m.devicePath, cpuID)
+ if _, err := os.Stat(msrPath); err == nil {
+ cpuIDs = append(cpuIDs, cpuID)
+ }
+ }
+
+ // Sort CPU IDs for consistent ordering
+ sort.Ints(cpuIDs)
+
+ return cpuIDs, nil
+}
+
+// createZones creates MSR-based energy zones for all available MSR counters
+func (m *msrReader) createZones() error {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+
+ m.zones = nil
+
+ // Get sorted CPU IDs for consistent zone ordering
+ var cpuIDs []int
+ for cpuID := range m.msrFiles {
+ cpuIDs = append(cpuIDs, cpuID)
+ }
+ sort.Ints(cpuIDs)
+
+ // Group zones by name for potential aggregation
+ zoneGroups := make(map[string][]*msrZone)
+
+ // Create zones for each MSR counter on each CPU
+ for _, cpuID := range cpuIDs {
+ msrFile := m.msrFiles[cpuID]
+
+ for zoneName, msrOffset := range msrZoneConfig {
+ // Test if this MSR register is readable on this CPU
+ if !m.isRegisterReadable(msrFile, msrOffset) {
+ m.logger.Debug("MSR register not readable, skipping zone",
+ "cpu", cpuID, "zone", zoneName, "msr", fmt.Sprintf("0x%x", msrOffset))
+ continue
+ }
+
+ // Map internal zone names to standard RAPL names if needed
+ displayName := zoneName
+ if mappedName, exists := zoneNameMapping[zoneName]; exists {
+ displayName = mappedName
+ }
+
+ // Create MSR zone
+ zone := NewMSRZone(displayName, cpuID, cpuID, msrOffset, m.energyUnit, msrFile)
+ zoneGroups[displayName] = append(zoneGroups[displayName], zone)
+
+ m.logger.Debug("Created MSR zone",
+ "name", displayName, "cpu", cpuID, "msr", fmt.Sprintf("0x%x", msrOffset))
+ }
+ }
+
+ // Convert zone groups to EnergyZone interfaces
+ // For multi-socket systems, aggregate zones with the same name
+ for name, zones := range zoneGroups {
+ if len(zones) == 1 {
+ // Single zone - use directly
+ m.zones = append(m.zones, zones[0])
+ } else {
+ // Multiple zones - create aggregated zone
+ var energyZones []EnergyZone
+ for _, zone := range zones {
+ energyZones = append(energyZones, zone)
+ }
+ aggregated := NewAggregatedZone(energyZones)
+ m.zones = append(m.zones, aggregated)
+
+ m.logger.Debug("Created aggregated MSR zone",
+ "name", name, "zone_count", len(zones))
+ }
+ }
+
+ if len(m.zones) == 0 {
+ return fmt.Errorf("no readable MSR energy counters found")
+ }
+
+ return nil
+}
+
+// isRegisterReadable tests if an MSR register can be read without error
+func (m *msrReader) isRegisterReadable(msrFile *os.File, msrOffset uint32) bool {
+ // Try to seek to the register
+ _, err := msrFile.Seek(int64(msrOffset), 0)
+ if err != nil {
+ return false
+ }
+
+ // Try to read 8 bytes from the register
+ buf := make([]byte, 8)
+ _, err = msrFile.Read(buf)
+ return err == nil
+}
diff --git a/internal/device/msr_reader_test.go b/internal/device/msr_reader_test.go
new file mode 100644
index 0000000000..81b91d6b68
--- /dev/null
+++ b/internal/device/msr_reader_test.go
@@ -0,0 +1,406 @@
+// SPDX-FileCopyrightText: 2025 The Kepler Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package device
+
+/*
+MSR Test Data Documentation
+
+This test file uses mock MSR data to simulate Intel RAPL MSR registers for testing
+the MSR reader implementation. The test data simulates the following registers:
+
+MSR Register Values:
+- 0x606: IA32_RAPL_POWER_UNIT - Power unit register containing scaling factors
+- 0x611: IA32_PKG_ENERGY_STATUS - Package energy counter (32-bit, wraps around)
+- 0x639: IA32_PP0_ENERGY_STATUS - Power Plane 0 (cores) energy counter
+- 0x619: IA32_DRAM_ENERGY_STATUS - DRAM energy counter
+
+File Format:
+Each MSR register value is stored as 8 bytes (uint64) in little-endian format.
+The test creates temporary MSR files and writes mock data at specific byte offsets
+corresponding to the MSR register addresses.
+
+Energy Unit Calculation:
+The power unit register (0x606) contains scaling factors in specific bit fields:
+- Bits 12:8 contain the energy unit value (e.g., value 16 means 1/(2^16) joules per LSB)
+- Energy counters use this unit to convert raw MSR values to microjoules
+- Example: energy_unit = 15.2587890625 microjoules (when unit value = 16)
+
+Counter Overflow:
+MSR energy counters are 32-bit values that wrap around at 2^32. The implementation
+must handle this overflow correctly to maintain accurate energy measurements.
+*/
+
+import (
+ "fmt"
+ "log/slog"
+ "os"
+ "path/filepath"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+// fakeMSRReader implements raplReader for testing
+type fakeMSRReader struct {
+ zones []EnergyZone
+ available bool
+ initError error
+ name string
+}
+
+func (f *fakeMSRReader) Zones() ([]EnergyZone, error) {
+ return f.zones, nil
+}
+
+func (f *fakeMSRReader) Available() bool {
+ return f.available
+}
+
+func (f *fakeMSRReader) Init() error {
+ return f.initError
+}
+
+func (f *fakeMSRReader) Close() error {
+ return nil
+}
+
+func (f *fakeMSRReader) Name() string {
+ if f.name == "" {
+ return "fake-msr"
+ }
+ return f.name
+}
+
+// fakeMSRZone implements EnergyZone for testing
+type fakeMSRZone struct {
+ name string
+ index int
+ path string
+ energy Energy
+ maxEnergy Energy
+ energyErr error
+}
+
+func (f *fakeMSRZone) Name() string {
+ return f.name
+}
+
+func (f *fakeMSRZone) Index() int {
+ return f.index
+}
+
+func (f *fakeMSRZone) Path() string {
+ return f.path
+}
+
+func (f *fakeMSRZone) Energy() (Energy, error) {
+ return f.energy, f.energyErr
+}
+
+func (f *fakeMSRZone) MaxEnergy() Energy {
+ return f.maxEnergy
+}
+
+func TestMSRReader_Available(t *testing.T) {
+ tests := []struct {
+ name string
+ setupDevDir bool
+ createMSRFile bool
+ expectedResult bool
+ }{
+ {
+ name: "MSR available with dev directory and msr file",
+ setupDevDir: true,
+ createMSRFile: true,
+ expectedResult: true,
+ },
+ {
+ name: "MSR unavailable without dev directory",
+ setupDevDir: false,
+ createMSRFile: false,
+ expectedResult: false,
+ },
+ {
+ name: "MSR unavailable without msr file",
+ setupDevDir: true,
+ createMSRFile: false,
+ expectedResult: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // Create temporary directory structure
+ tempDir := t.TempDir()
+ var devicePath string
+
+ if tt.setupDevDir {
+ // Create /dev/cpu/0 directory
+ cpuDir := filepath.Join(tempDir, "dev", "cpu", "0")
+ require.NoError(t, os.MkdirAll(cpuDir, 0755))
+
+ devicePath = filepath.Join(tempDir, "dev", "cpu", "%d", "msr")
+
+ if tt.createMSRFile {
+ msrFile := filepath.Join(cpuDir, "msr")
+ file, err := os.Create(msrFile)
+ require.NoError(t, err)
+ _ = file.Close()
+ }
+ } else {
+ devicePath = filepath.Join(tempDir, "nonexistent", "cpu", "%d", "msr")
+ }
+
+ reader := NewMSRReader(devicePath, slog.Default())
+ result := reader.Available()
+
+ assert.Equal(t, tt.expectedResult, result)
+ })
+ }
+}
+
+func TestMSRReader_Init(t *testing.T) {
+ tests := []struct {
+ name string
+ setupMSRs func(tempDir string) string
+ expectError bool
+ errorMsg string
+ }{
+ {
+ name: "successful initialization",
+ setupMSRs: func(tempDir string) string {
+ // Create CPU 0 with MSR file containing mock data
+ cpuDir := filepath.Join(tempDir, "dev", "cpu", "0")
+ require.NoError(t, os.MkdirAll(cpuDir, 0755))
+
+ msrFile := filepath.Join(cpuDir, "msr")
+ createMockMSRFile(t, msrFile)
+
+ return filepath.Join(tempDir, "dev", "cpu", "%d", "msr")
+ },
+ expectError: false,
+ },
+ {
+ name: "initialization fails with no CPUs",
+ setupMSRs: func(tempDir string) string {
+ // Create empty dev directory
+ require.NoError(t, os.MkdirAll(filepath.Join(tempDir, "dev", "cpu"), 0755))
+ return filepath.Join(tempDir, "dev", "cpu", "%d", "msr")
+ },
+ expectError: true,
+ errorMsg: "MSR interface not available",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ tempDir := t.TempDir()
+ devicePath := tt.setupMSRs(tempDir)
+
+ reader := NewMSRReader(devicePath, slog.Default())
+ err := reader.Init()
+
+ if tt.expectError {
+ assert.Error(t, err)
+ if tt.errorMsg != "" {
+ assert.Contains(t, err.Error(), tt.errorMsg)
+ }
+ } else {
+ assert.NoError(t, err)
+ }
+
+ // Clean up
+ _ = reader.Close()
+ })
+ }
+}
+
+func TestMSRReader_Zones(t *testing.T) {
+ tempDir := t.TempDir()
+
+ // Create CPU 0 and CPU 1 with MSR files
+ for i := 0; i < 2; i++ {
+ cpuDir := filepath.Join(tempDir, "dev", "cpu", fmt.Sprintf("%d", i))
+ require.NoError(t, os.MkdirAll(cpuDir, 0755))
+
+ msrFile := filepath.Join(cpuDir, "msr")
+ createMockMSRFile(t, msrFile)
+ }
+
+ devicePath := filepath.Join(tempDir, "dev", "cpu", "%d", "msr")
+ reader := NewMSRReader(devicePath, slog.Default())
+
+ require.NoError(t, reader.Init())
+ t.Cleanup(func() {
+ assert.NoError(t, reader.Close())
+ })
+
+ zones, err := reader.Zones()
+ require.NoError(t, err)
+
+ // Should have zones for package, core (pp0), and dram
+ // On a 2-CPU system, we should get aggregated zones
+ assert.Greater(t, len(zones), 0)
+
+ // Verify zone names
+ zoneNames := make(map[string]bool)
+ for _, zone := range zones {
+ zoneNames[zone.Name()] = true
+
+ // Test that each zone can provide energy readings
+ energy, err := zone.Energy()
+ assert.NoError(t, err)
+ assert.GreaterOrEqual(t, energy, Energy(0))
+ }
+
+ // Should have at least package zone
+ assert.True(t, zoneNames["package"] || zoneNames["core"] || zoneNames["dram"],
+ "Expected at least one MSR zone type")
+}
+
+func TestMSRReader_Name(t *testing.T) {
+ reader := NewMSRReader("/dev/cpu/%d/msr", slog.Default())
+ assert.Equal(t, "msr", reader.Name())
+}
+
+func TestMSRReader_Close(t *testing.T) {
+ tempDir := t.TempDir()
+
+ // Create CPU 0 with MSR file
+ cpuDir := filepath.Join(tempDir, "dev", "cpu", "0")
+ require.NoError(t, os.MkdirAll(cpuDir, 0755))
+
+ msrFile := filepath.Join(cpuDir, "msr")
+ createMockMSRFile(t, msrFile)
+
+ devicePath := filepath.Join(tempDir, "dev", "cpu", "%d", "msr")
+ reader := NewMSRReader(devicePath, slog.Default())
+
+ require.NoError(t, reader.Init())
+
+ // Verify it has zones before closing
+ zones, err := reader.Zones()
+ require.NoError(t, err)
+ assert.Greater(t, len(zones), 0)
+
+ // Close should not error
+ err = reader.Close()
+ assert.NoError(t, err)
+
+ // After closing, zones should be cleared
+ _, err = reader.Zones()
+ assert.Error(t, err)
+ assert.Contains(t, err.Error(), "MSR reader not initialized")
+}
+
+func TestMSRZone_Energy(t *testing.T) {
+ tests := []struct {
+ name string
+ msrData []byte
+ energyUnit float64
+ expectedRange [2]Energy // min, max range
+ }{
+ {
+ name: "normal energy reading",
+ msrData: []byte{
+ 0x00, 0x00, 0x10, 0x00, // 0x100000 in lower 32 bits
+ 0x00, 0x00, 0x00, 0x00, // upper 32 bits
+ },
+ energyUnit: 15.2587890625, // 1000000 / 2^16
+ expectedRange: [2]Energy{Energy(15999998), Energy(16000000)}, // Approximately 16.0 J
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // Create temporary MSR file with specific data
+ tempDir := t.TempDir()
+ msrFile := filepath.Join(tempDir, "msr")
+
+ file, err := os.Create(msrFile)
+ require.NoError(t, err)
+ t.Cleanup(func() {
+ assert.NoError(t, file.Close())
+ })
+
+ // Write mock MSR data at different offsets
+ _, err = file.WriteAt(tt.msrData, int64(MSRPkgEnergyStatus))
+ require.NoError(t, err)
+
+ // Create MSR zone
+ zone := NewMSRZone("package", 0, 0, MSRPkgEnergyStatus, tt.energyUnit, file)
+
+ energy, err := zone.Energy()
+ require.NoError(t, err)
+
+ // Check energy is within expected range
+ assert.GreaterOrEqual(t, energy, tt.expectedRange[0])
+ assert.LessOrEqual(t, energy, tt.expectedRange[1])
+ })
+ }
+}
+
+func TestMSRZone_MaxEnergy(t *testing.T) {
+ energyUnit := 15.2587890625 // 1000000 / 2^16
+
+ zone := NewMSRZone("package", 0, 0, MSRPkgEnergyStatus, energyUnit, nil)
+ maxEnergy := zone.MaxEnergy()
+
+ // For 32-bit counter, max should be 2^32 * energyUnit
+ expectedMax := Energy(float64(0xFFFFFFFF) * energyUnit)
+ assert.Equal(t, expectedMax, maxEnergy)
+}
+
+// Helper functions
+
+// createMockMSRFile creates a mock MSR device file with test data
+// The file simulates reading from /dev/cpu/N/msr with realistic RAPL register values
+func createMockMSRFile(t *testing.T, path string) {
+ file, err := os.Create(path)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, file.Close())
+ }()
+
+ // Write power unit register at offset 0x606 (IA32_RAPL_POWER_UNIT)
+ // This register contains scaling factors for energy measurements
+ // Bits 12:8 = energy unit: 16 means 1/(2^16) = 15.2587890625 microjoules per LSB
+ powerUnitData := []byte{
+ 0x00, 0x10, 0x00, 0x00, // Energy unit = 16 in bits 12:8
+ 0x00, 0x00, 0x00, 0x00, // Upper 32 bits (unused)
+ }
+ _, err = file.WriteAt(powerUnitData, int64(MSRPowerUnit))
+ require.NoError(t, err)
+
+ // Write package energy counter at offset 0x611 (IA32_PKG_ENERGY_STATUS)
+ // This is a 32-bit counter that accumulates package energy consumption
+ // Raw value: 0x100000 = 1048576 LSB → ~16.0 Joules with energy unit 15.26 μJ/LSB
+ pkgEnergyData := []byte{
+ 0x00, 0x00, 0x10, 0x00, // 32-bit energy counter value
+ 0x00, 0x00, 0x00, 0x00, // Upper 32 bits (reserved/unused)
+ }
+ _, err = file.WriteAt(pkgEnergyData, int64(MSRPkgEnergyStatus))
+ require.NoError(t, err)
+
+ // Write PP0 energy counter at offset 0x639 (IA32_PP0_ENERGY_STATUS)
+ // PP0 represents Power Plane 0 (CPU cores) energy consumption
+ // Raw value: 0x80000 = 524288 LSB → ~8.0 Joules
+ pp0EnergyData := []byte{
+ 0x00, 0x00, 0x08, 0x00, // 32-bit energy counter value
+ 0x00, 0x00, 0x00, 0x00, // Upper 32 bits (reserved/unused)
+ }
+ _, err = file.WriteAt(pp0EnergyData, int64(MSRPP0EnergyStatus))
+ require.NoError(t, err)
+
+ // Write DRAM energy counter at offset 0x619 (IA32_DRAM_ENERGY_STATUS)
+ // This counter tracks memory subsystem energy consumption
+ // Raw value: 0x40000 = 262144 LSB → ~4.0 Joules
+ dramEnergyData := []byte{
+ 0x00, 0x00, 0x04, 0x00, // 32-bit energy counter value
+ 0x00, 0x00, 0x00, 0x00, // Upper 32 bits (reserved/unused)
+ }
+ _, err = file.WriteAt(dramEnergyData, int64(MSRDRAMEnergyStatus))
+ require.NoError(t, err)
+}
diff --git a/internal/device/msr_zone.go b/internal/device/msr_zone.go
new file mode 100644
index 0000000000..34ec7dcc92
--- /dev/null
+++ b/internal/device/msr_zone.go
@@ -0,0 +1,125 @@
+// SPDX-FileCopyrightText: 2025 The Kepler Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package device
+
+import (
+ "encoding/binary"
+ "fmt"
+ "math"
+ "os"
+)
+
+// MSR Register offsets for Intel RAPL energy counters
+const (
+ // IA32_RAPL_POWER_UNIT - Power unit register containing scaling factors
+ MSRPowerUnit = 0x606
+
+ // Energy counters (32-bit, wraparound at ~4 billion)
+ MSRPkgEnergyStatus = 0x611 // Package energy counter
+ MSRPP0EnergyStatus = 0x639 // Power Plane 0 (cores) energy counter
+ MSRDRAMEnergyStatus = 0x619 // DRAM energy counter
+)
+
+// msrZone implements EnergyZone interface for MSR-based energy reading
+type msrZone struct {
+ name string
+ index int
+ cpuID int
+ msrOffset uint32
+ energyUnit float64 // Energy unit in microjoules per LSB
+ msrFile *os.File
+}
+
+// NewMSRZone creates a new MSR-based energy zone
+func NewMSRZone(name string, index, cpuID int, msrOffset uint32, energyUnit float64, msrFile *os.File) *msrZone {
+ return &msrZone{
+ name: name,
+ index: index,
+ cpuID: cpuID,
+ msrOffset: msrOffset,
+ energyUnit: energyUnit,
+ msrFile: msrFile,
+ }
+}
+
+// Name returns the zone name (package, pp0, dram)
+func (m *msrZone) Name() string {
+ return m.name
+}
+
+// Index returns the zone index (CPU socket/package index)
+func (m *msrZone) Index() int {
+ return m.index
+}
+
+// Path returns the MSR device path for this zone
+func (m *msrZone) Path() string {
+ return fmt.Sprintf("/dev/cpu/%d/msr:0x%x", m.cpuID, m.msrOffset)
+}
+
+// Energy reads the current energy value from the MSR register
+func (m *msrZone) Energy() (Energy, error) {
+ if m.msrFile == nil {
+ return 0, fmt.Errorf("MSR file not opened for CPU %d", m.cpuID)
+ }
+
+ // Read 64-bit MSR register at the specified offset
+ _, err := m.msrFile.Seek(int64(m.msrOffset), 0)
+ if err != nil {
+ return 0, fmt.Errorf("failed to seek to MSR offset 0x%x: %w", m.msrOffset, err)
+ }
+
+ var msrValue uint64
+ err = binary.Read(m.msrFile, binary.LittleEndian, &msrValue)
+ if err != nil {
+ return 0, fmt.Errorf("failed to read MSR 0x%x from CPU %d: %w", m.msrOffset, m.cpuID, err)
+ }
+
+ // Extract the 32-bit energy counter from the MSR value
+ // Energy counters are in the lower 32 bits
+ energyCounter := uint32(msrValue & 0xFFFFFFFF)
+
+ // Convert to microjoules using the energy unit
+ energyMicroJoules := float64(energyCounter) * m.energyUnit
+
+ return Energy(energyMicroJoules), nil
+}
+
+// MaxEnergy returns the maximum energy value before wraparound
+// MSR energy counters are 32-bit, so they wrap at 2^32
+func (m *msrZone) MaxEnergy() Energy {
+ // 32-bit counter maximum value converted to microjoules
+ maxCounter := uint64(math.MaxUint32)
+ maxEnergyMicroJoules := float64(maxCounter) * m.energyUnit
+ return Energy(maxEnergyMicroJoules)
+}
+
+// readEnergyUnit reads the energy unit from the IA32_RAPL_POWER_UNIT MSR
+// Returns the energy unit in microjoules per LSB
+func readEnergyUnit(msrFile *os.File) (float64, error) {
+ if msrFile == nil {
+ return 0, fmt.Errorf("MSR file not opened")
+ }
+
+ // Seek to the power unit MSR
+ _, err := msrFile.Seek(int64(MSRPowerUnit), 0)
+ if err != nil {
+ return 0, fmt.Errorf("failed to seek to MSR power unit register: %w", err)
+ }
+
+ var powerUnit uint64
+ err = binary.Read(msrFile, binary.LittleEndian, &powerUnit)
+ if err != nil {
+ return 0, fmt.Errorf("failed to read MSR power unit register: %w", err)
+ }
+
+ // Energy unit is in bits 12:8 of the power unit register
+ energyUnitBits := (powerUnit >> 8) & 0x1F
+
+ // Energy unit = 1 / (2^energyUnitBits) joules
+ // Convert to microjoules: multiply by 1,000,000
+ energyUnit := 1000000.0 / float64(uint64(1)<= 0)
+ assert.GreaterOrEqual(t, zone.Index(), 0)
+ // Zone should have a path
+ assert.NotEmpty(t, zone.Path())
+
+ // Test energy reading
+ energy, err := zone.Energy()
+ assert.NoError(t, err)
+ assert.Greater(t, uint64(energy), uint64(0)) // Should have some energy value
+}
+
+func TestPowercapReader_Name(t *testing.T) {
+ reader, err := NewPowercapReader("/tmp")
+ require.NoError(t, err)
+ assert.Equal(t, "powercap", reader.Name())
+}
+
+func TestPowercapReader_Close(t *testing.T) {
+ reader, err := NewPowercapReader("/tmp")
+ require.NoError(t, err)
+
+ err = reader.Close()
+ assert.NoError(t, err)
+}
+
+func TestSysfsRaplZone_Implementation(t *testing.T) {
+ reader, err := NewPowercapReader(validSysFSPath)
+ require.NoError(t, err)
+
+ zones, err := reader.Zones()
+ require.NoError(t, err)
+ require.Greater(t, len(zones), 0)
+
+ // Test the first zone's EnergyZone interface methods
+ zone := zones[0]
+
+ // Test all EnergyZone interface methods
+ assert.NotEmpty(t, zone.Name()) // Should have a name
+ assert.GreaterOrEqual(t, zone.Index(), 0) // Should have a valid index
+ assert.NotEmpty(t, zone.Path()) // Should have a path
+
+ energy, err := zone.Energy()
+ assert.NoError(t, err)
+ assert.Greater(t, uint64(energy), uint64(0)) // Should have some energy value
+
+ maxEnergy := zone.MaxEnergy()
+ assert.Greater(t, uint64(maxEnergy), uint64(0)) // Should have some max energy value
+}
diff --git a/internal/device/rapl_sysfs_power_meter.go b/internal/device/rapl_power_meter.go
similarity index 52%
rename from internal/device/rapl_sysfs_power_meter.go
rename to internal/device/rapl_power_meter.go
index c5b64578a0..92f21f04a0 100644
--- a/internal/device/rapl_sysfs_power_meter.go
+++ b/internal/device/rapl_power_meter.go
@@ -8,27 +8,41 @@ import (
"log/slog"
"strings"
- "github.com/prometheus/procfs/sysfs"
+ "k8s.io/utils/ptr"
)
-// raplPowerMeter implements CPUPowerMeter using sysfs
+// raplPowerMeter implements CPUPowerMeter with automatic MSR fallback support
type raplPowerMeter struct {
- reader sysfsReader
+ reader raplReader // Current active reader (powercap or MSR)
cachedZones []EnergyZone
logger *slog.Logger
zoneFilter []string
topZone EnergyZone
+
+ // Configuration for MSR fallback
+ msrConfig MSRConfig
+ sysfsPath string
+ useMSR bool // Track which backend is active
+}
+
+// MSRConfig holds MSR-specific configuration
+type MSRConfig struct {
+ Enabled *bool
+ Force *bool
+ DevicePath string
}
type OptionFn func(*raplPowerMeter)
-// sysfsReader is an interface for a sysfs filesystem used by raplPowerMeter to mock for testing
-type sysfsReader interface {
- Zones() ([]EnergyZone, error)
+// WithMSRConfig sets the MSR configuration for fallback behavior
+func WithMSRConfig(msrConfig MSRConfig) OptionFn {
+ return func(pm *raplPowerMeter) {
+ pm.msrConfig = msrConfig
+ }
}
-// WithSysFSReader sets the sysfsReader used by raplPowerMeter
-func WithSysFSReader(r sysfsReader) OptionFn {
+// WithRaplReader sets a specific raplReader (for testing)
+func WithRaplReader(r raplReader) OptionFn {
return func(pm *raplPowerMeter) {
pm.reader = r
}
@@ -49,17 +63,18 @@ func WithZoneFilter(zones []string) OptionFn {
}
}
-// NewCPUPowerMeter creates a new CPU power meter
+// NewCPUPowerMeter creates a new CPU power meter with MSR fallback support
func NewCPUPowerMeter(sysfsPath string, opts ...OptionFn) (*raplPowerMeter, error) {
- fs, err := sysfs.NewFS(sysfsPath)
- if err != nil {
- return nil, err
- }
-
ret := &raplPowerMeter{
- reader: sysfsRaplReader{fs: fs},
logger: slog.Default().With("service", "rapl"),
zoneFilter: []string{},
+ sysfsPath: sysfsPath,
+ // Default MSR configuration (disabled)
+ msrConfig: MSRConfig{
+ Enabled: ptr.To(false),
+ Force: ptr.To(false),
+ DevicePath: "/dev/cpu/%d/msr",
+ },
}
for _, opt := range opts {
@@ -70,21 +85,112 @@ func NewCPUPowerMeter(sysfsPath string, opts ...OptionFn) (*raplPowerMeter, erro
}
func (r *raplPowerMeter) Name() string {
- return "rapl"
+ if r.useMSR {
+ return "rapl-msr"
+ }
+ return "rapl-powercap"
}
func (r *raplPowerMeter) Init() error {
- // ensure zones can be read but don't cache them
- zones, err := r.reader.Zones()
+ // Clear any cached state
+ r.cachedZones = nil
+ r.topZone = nil
+
+ // If a specific reader is set (for testing), use it directly
+ if r.reader != nil {
+ r.logger.Info("Using provided power reader", "reader", r.reader.Name())
+ return r.validateReader(r.reader)
+ }
+
+ // Determine which reader to use based on configuration and availability
+ reader, useMSR, err := r.selectRaplReader()
if err != nil {
- return err
- } else if len(zones) == 0 {
- return fmt.Errorf("no RAPL zones found")
+ return fmt.Errorf("failed to select power reader: %w", err)
}
- // try reading the first zone and return the error
+ r.reader = reader
+ r.useMSR = useMSR
+
+ r.logger.Info("Selected power reader",
+ "reader", r.reader.Name(),
+ "msr_fallback", r.useMSR,
+ "force_msr", ptr.Deref(r.msrConfig.Force, false))
+
+ return r.validateReader(r.reader)
+}
+
+// selectRaplReader chooses the appropriate RAPL reader based on configuration and availability
+func (r *raplPowerMeter) selectRaplReader() (raplReader, bool, error) {
+ forceMSR := ptr.Deref(r.msrConfig.Force, false)
+ enableFallback := ptr.Deref(r.msrConfig.Enabled, false)
+
+ // If force MSR is enabled, use MSR directly (for testing)
+ if forceMSR {
+ r.logger.Info("MSR forced via configuration")
+ msrReader := NewMSRReader(r.msrConfig.DevicePath, r.logger)
+ if !msrReader.Available() {
+ return nil, false, fmt.Errorf("MSR reader forced but not available")
+ }
+ if err := msrReader.Init(); err != nil {
+ return nil, false, fmt.Errorf("failed to initialize forced MSR reader: %w", err)
+ }
+ return msrReader, true, nil
+ }
+
+ // Try powercap first (default behavior)
+ powercapReader, err := NewPowercapReader(r.sysfsPath)
+ if err == nil && powercapReader.Available() {
+ if err := powercapReader.Init(); err == nil {
+ r.logger.Debug("Using powercap reader")
+ return powercapReader, false, nil
+ } else {
+ r.logger.Debug("Powercap reader initialization failed", "error", err)
+ }
+ } else {
+ r.logger.Debug("Powercap reader not available", "error", err)
+ }
+
+ // If powercap failed and MSR fallback is enabled, try MSR
+ if enableFallback {
+ r.logger.Info("Attempting MSR fallback as powercap unavailable")
+
+ // Log security warning for MSR usage
+ r.logger.Warn("MSR fallback enabled - be aware of PLATYPUS attack vectors (CVE-2020-8694/8695)")
+
+ msrReader := NewMSRReader(r.msrConfig.DevicePath, r.logger)
+ if !msrReader.Available() {
+ return nil, false, fmt.Errorf("neither powercap nor MSR readers are available")
+ }
+ if err := msrReader.Init(); err != nil {
+ return nil, false, fmt.Errorf("MSR fallback failed to initialize: %w", err)
+ }
+
+ r.logger.Info("MSR fallback activated successfully")
+ return msrReader, true, nil
+ }
+
+ // Neither powercap works nor MSR fallback is enabled
+ return nil, false, fmt.Errorf("powercap unavailable and MSR fallback disabled")
+}
+
+// validateReader ensures the reader can provide valid energy readings
+func (r *raplPowerMeter) validateReader(reader raplReader) error {
+ zones, err := reader.Zones()
+ if err != nil {
+ return fmt.Errorf("failed to get zones from %s reader: %w", reader.Name(), err)
+ }
+
+ if len(zones) == 0 {
+ return fmt.Errorf("no energy zones found from %s reader", reader.Name())
+ }
+
+ // Try reading energy from the first zone to verify functionality
_, err = zones[0].Energy()
- return err
+ if err != nil {
+ return fmt.Errorf("failed to read energy from zone %s: %w", zones[0].Name(), err)
+ }
+
+ return nil
}
func (r *raplPowerMeter) needsFiltering() bool {
@@ -122,6 +228,10 @@ func (r *raplPowerMeter) Zones() ([]EnergyZone, error) {
return r.cachedZones, nil
}
+ if r.reader == nil {
+ return nil, fmt.Errorf("power reader not initialized")
+ }
+
zones, err := r.reader.Zones()
if err != nil {
return nil, err
@@ -135,7 +245,6 @@ func (r *raplPowerMeter) Zones() ([]EnergyZone, error) {
}
// filter out non-standard zones
-
stdZoneMap := map[zoneKey]EnergyZone{}
for _, zone := range zones {
key := zoneKey{name: zone.Name(), index: zone.Index()}
@@ -230,58 +339,23 @@ func (r *raplPowerMeter) PrimaryEnergyZone() (EnergyZone, error) {
return zones[0], nil
}
-// isStandardRaplPath checks if a RAPL zone path is in the standard format
-func isStandardRaplPath(path string) bool {
- return strings.Contains(path, "/intel-rapl:")
-}
-
-type sysfsRaplReader struct {
- fs sysfs.FS
+// Close releases resources held by the power reader
+func (r *raplPowerMeter) Close() error {
+ if r.reader != nil {
+ return r.reader.Close()
+ }
+ return nil
}
-func (r sysfsRaplReader) Zones() ([]EnergyZone, error) {
- raplZones, err := sysfs.GetRaplZones(r.fs)
- if err != nil {
- return nil, fmt.Errorf("failed to read rapl zones: %w", err)
+// isStandardRaplPath checks if a RAPL zone path is in the standard format
+func isStandardRaplPath(path string) bool {
+ // For powercap, check standard path format
+ if strings.Contains(path, "/intel-rapl:") {
+ return true
}
-
- // convert sysfs.RaplZones to EnergyZones
- energyZones := make([]EnergyZone, 0, len(raplZones))
- for _, zone := range raplZones {
- energyZones = append(energyZones, sysfsRaplZone{zone})
+ // For MSR, check MSR path format
+ if strings.Contains(path, "/dev/cpu/") && strings.Contains(path, "/msr:") {
+ return true
}
-
- return energyZones, nil
-}
-
-// sysfsRaplZone implements EnergyZone using sysfs.RaplZone.
-// It is an adapter for the EnergyZone interface
-type sysfsRaplZone struct {
- zone sysfs.RaplZone
-}
-
-// Name returns the name of the zone
-func (s sysfsRaplZone) Name() string {
- return s.zone.Name
-}
-
-// Index returns the index of the zone
-func (s sysfsRaplZone) Index() int {
- return s.zone.Index
-}
-
-// Path returns the path of the zone
-func (s sysfsRaplZone) Path() string {
- return s.zone.Path
-}
-
-// Energy returns the current energy value
-func (s sysfsRaplZone) Energy() (Energy, error) {
- mj, err := s.zone.GetEnergyMicrojoules()
- return Energy(mj), err
-}
-
-// MaxEnergy returns the maximum energy value before wraparound
-func (s sysfsRaplZone) MaxEnergy() Energy {
- return Energy(s.zone.MaxMicrojoules)
+ return false
}
diff --git a/internal/device/rapl_power_meter_test.go b/internal/device/rapl_power_meter_test.go
new file mode 100644
index 0000000000..0062846f68
--- /dev/null
+++ b/internal/device/rapl_power_meter_test.go
@@ -0,0 +1,330 @@
+// SPDX-FileCopyrightText: 2025 The Kepler Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package device
+
+import (
+ "fmt"
+ "log/slog"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ "k8s.io/utils/ptr"
+)
+
+func TestRaplPowerMeter_Init_WithMockReader(t *testing.T) {
+ tests := []struct {
+ name string
+ mockReader raplReader
+ expectedName string
+ expectError bool
+ }{
+ {
+ name: "successful initialization with mock powercap reader",
+ mockReader: &fakePowercapReader{
+ available: true,
+ zones: createTestZones("powercap"),
+ name: "powercap",
+ },
+ expectedName: "powercap",
+ expectError: false,
+ },
+ {
+ name: "successful initialization with mock MSR reader",
+ mockReader: &fakeMSRReader{
+ available: true,
+ zones: createTestZones("msr"),
+ name: "msr",
+ },
+ expectedName: "msr",
+ expectError: false,
+ },
+ {
+ name: "initialization fails with reader that has no zones",
+ mockReader: &fakePowercapReader{
+ available: true,
+ zones: []EnergyZone{},
+ name: "empty",
+ },
+ expectError: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ pm, err := NewCPUPowerMeter(
+ "/fake/sysfs",
+ WithRaplReader(tt.mockReader),
+ )
+ require.NoError(t, err)
+
+ err = pm.Init()
+
+ if tt.expectError {
+ assert.Error(t, err)
+ } else {
+ assert.NoError(t, err)
+ assert.Equal(t, tt.expectedName, pm.reader.Name())
+ }
+ })
+ }
+}
+
+func TestRaplPowerMeter_Name(t *testing.T) {
+ tests := []struct {
+ name string
+ useMSR bool
+ expected string
+ }{
+ {
+ name: "powercap reader",
+ useMSR: false,
+ expected: "rapl-powercap",
+ },
+ {
+ name: "msr reader",
+ useMSR: true,
+ expected: "rapl-msr",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ pm := &raplPowerMeter{
+ useMSR: tt.useMSR,
+ }
+ assert.Equal(t, tt.expected, pm.Name())
+ })
+ }
+}
+
+func TestRaplPowerMeter_Zones_WithFiltering(t *testing.T) {
+ // Create test zones
+ testZones := []EnergyZone{
+ &fakeMSRZone{name: "package", index: 0, path: "/fake/package", energy: Energy(1000)},
+ &fakeMSRZone{name: "core", index: 0, path: "/fake/core", energy: Energy(500)},
+ &fakeMSRZone{name: "dram", index: 0, path: "/fake/dram", energy: Energy(300)},
+ }
+
+ tests := []struct {
+ name string
+ zoneFilter []string
+ expected []string
+ }{
+ {
+ name: "no filter - all zones",
+ zoneFilter: []string{},
+ expected: []string{"package", "core", "dram"},
+ },
+ {
+ name: "filter package only",
+ zoneFilter: []string{"package"},
+ expected: []string{"package"},
+ },
+ {
+ name: "filter core and dram",
+ zoneFilter: []string{"core", "dram"},
+ expected: []string{"core", "dram"},
+ },
+ {
+ name: "filter non-existent zone",
+ zoneFilter: []string{"nonexistent"},
+ expected: []string{},
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ mockReader := &fakeMSRReader{
+ available: true,
+ zones: testZones,
+ }
+
+ pm := &raplPowerMeter{
+ reader: mockReader,
+ zoneFilter: tt.zoneFilter,
+ logger: slog.Default(),
+ }
+
+ zones, err := pm.Zones()
+ if len(tt.expected) == 0 {
+ assert.Error(t, err)
+ assert.Contains(t, err.Error(), "no RAPL zones found after filtering")
+ } else {
+ assert.NoError(t, err)
+ assert.Equal(t, len(tt.expected), len(zones))
+
+ zoneNames := make([]string, len(zones))
+ for i, zone := range zones {
+ zoneNames[i] = zone.Name()
+ }
+
+ for _, expected := range tt.expected {
+ assert.Contains(t, zoneNames, expected)
+ }
+ }
+ })
+ }
+}
+
+func TestRaplPowerMeter_PrimaryEnergyZone(t *testing.T) {
+ tests := []struct {
+ name string
+ availableZones []string
+ expectedZone string
+ }{
+ {
+ name: "psys has highest priority",
+ availableZones: []string{"core", "package", "psys", "dram"},
+ expectedZone: "psys",
+ },
+ {
+ name: "package has second priority",
+ availableZones: []string{"core", "package", "dram"},
+ expectedZone: "package",
+ },
+ {
+ name: "core has third priority",
+ availableZones: []string{"core", "dram"},
+ expectedZone: "core",
+ },
+ {
+ name: "fallback to first zone if no priority match",
+ availableZones: []string{"uncore", "other"},
+ expectedZone: "uncore",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ var testZones []EnergyZone
+ for i, name := range tt.availableZones {
+ testZones = append(testZones, &fakeMSRZone{
+ name: name,
+ index: i,
+ path: fmt.Sprintf("/fake/%s", name),
+ })
+ }
+
+ mockReader := &fakeMSRReader{
+ available: true,
+ zones: testZones,
+ }
+
+ pm := &raplPowerMeter{
+ reader: mockReader,
+ logger: slog.Default(),
+ }
+
+ primaryZone, err := pm.PrimaryEnergyZone()
+ assert.NoError(t, err)
+ assert.Equal(t, tt.expectedZone, primaryZone.Name())
+
+ // Test caching - call again and should get same result
+ primaryZone2, err := pm.PrimaryEnergyZone()
+ assert.NoError(t, err)
+ assert.Equal(t, primaryZone, primaryZone2)
+ })
+ }
+}
+
+func TestRaplPowerMeter_Close(t *testing.T) {
+ mockReader := &fakeMSRReader{
+ available: true,
+ zones: createTestZones("test"),
+ }
+
+ pm := &raplPowerMeter{
+ reader: mockReader,
+ logger: slog.Default(),
+ }
+
+ err := pm.Close()
+ assert.NoError(t, err)
+
+ // Test closing when reader is nil
+ pm.reader = nil
+ err = pm.Close()
+ assert.NoError(t, err)
+}
+
+func TestNewCPUPowerMeter(t *testing.T) {
+ sysfsPath := "/fake/sysfs"
+
+ pm, err := NewCPUPowerMeter(sysfsPath)
+ require.NoError(t, err)
+
+ assert.Equal(t, sysfsPath, pm.sysfsPath)
+ assert.NotNil(t, pm.logger)
+ assert.Equal(t, []string{}, pm.zoneFilter)
+
+ // Test MSR config defaults
+ assert.Equal(t, ptr.To(false), pm.msrConfig.Enabled)
+ assert.Equal(t, ptr.To(false), pm.msrConfig.Force)
+ assert.Equal(t, "/dev/cpu/%d/msr", pm.msrConfig.DevicePath)
+}
+
+func TestNewCPUPowerMeter_WithOptions(t *testing.T) {
+ sysfsPath := "/fake/sysfs"
+
+ testLogger := slog.Default().With("test", "meter")
+ testZoneFilter := []string{"package", "core"}
+ testMSRConfig := MSRConfig{
+ Enabled: ptr.To(true),
+ Force: ptr.To(false),
+ DevicePath: "/custom/cpu/%d/msr",
+ }
+
+ pm, err := NewCPUPowerMeter(
+ sysfsPath,
+ WithRaplLogger(testLogger),
+ WithZoneFilter(testZoneFilter),
+ WithMSRConfig(testMSRConfig),
+ )
+ require.NoError(t, err)
+
+ assert.Equal(t, sysfsPath, pm.sysfsPath)
+ assert.Equal(t, testZoneFilter, pm.zoneFilter)
+ assert.Equal(t, testMSRConfig, pm.msrConfig)
+}
+
+// Helper types and functions
+
+type fakePowercapReader struct {
+ zones []EnergyZone
+ available bool
+ initError error
+ name string
+}
+
+func (f *fakePowercapReader) Zones() ([]EnergyZone, error) {
+ return f.zones, nil
+}
+
+func (f *fakePowercapReader) Available() bool {
+ return f.available
+}
+
+func (f *fakePowercapReader) Init() error {
+ return f.initError
+}
+
+func (f *fakePowercapReader) Close() error {
+ return nil
+}
+
+func (f *fakePowercapReader) Name() string {
+ if f.name == "" {
+ return "fake-powercap"
+ }
+ return f.name
+}
+
+func createTestZones(prefix string) []EnergyZone {
+ return []EnergyZone{
+ &fakeMSRZone{name: "package", index: 0, path: fmt.Sprintf("/%s/package", prefix), energy: Energy(1000)},
+ &fakeMSRZone{name: "core", index: 0, path: fmt.Sprintf("/%s/core", prefix), energy: Energy(500)},
+ &fakeMSRZone{name: "dram", index: 0, path: fmt.Sprintf("/%s/dram", prefix), energy: Energy(300)},
+ }
+}
diff --git a/internal/device/rapl_reader.go b/internal/device/rapl_reader.go
new file mode 100644
index 0000000000..aac7e375ae
--- /dev/null
+++ b/internal/device/rapl_reader.go
@@ -0,0 +1,24 @@
+// SPDX-FileCopyrightText: 2025 The Kepler Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package device
+
+// raplReader is an internal abstraction for different RAPL reading backends
+// (powercap sysfs and MSR). This interface allows the raplPowerMeter to work
+// with different RAPL reading mechanisms while maintaining a consistent API.
+type raplReader interface {
+ // Zones returns the list of energy zones available from this power reader
+ Zones() ([]EnergyZone, error)
+
+ // Available checks if the power reader can be used on the current system
+ Available() bool
+
+ // Init initializes the power reader and verifies it can read energy values
+ Init() error
+
+ // Close releases any resources held by the power reader
+ Close() error
+
+ // Name returns a human-readable name for the power reader implementation
+ Name() string
+}
diff --git a/internal/device/rapl_sysfs_power_meter_test.go b/internal/device/rapl_sysfs_power_meter_test.go
deleted file mode 100644
index 437d12b6a6..0000000000
--- a/internal/device/rapl_sysfs_power_meter_test.go
+++ /dev/null
@@ -1,587 +0,0 @@
-// SPDX-FileCopyrightText: 2025 The Kepler Authors
-// SPDX-License-Identifier: Apache-2.0
-
-package device
-
-import (
- "errors"
- "log/slog"
- "strings"
- "testing"
-
- "github.com/prometheus/procfs/sysfs"
- "github.com/stretchr/testify/assert"
- "github.com/stretchr/testify/mock"
- "github.com/stretchr/testify/require"
-)
-
-// TestCPUPowerMeterInterface ensures that raplPowerMeter properly implements the CPUPowerMeter interface
-func TestCPUPowerMeterInterface(t *testing.T) {
- var _ CPUPowerMeter = (*raplPowerMeter)(nil)
-}
-
-func TestNewCPUPowerMeter(t *testing.T) {
- meter, err := NewCPUPowerMeter("testdata/sys")
- assert.NotNil(t, meter, "NewCPUPowerMeter should not return nil")
- assert.NoError(t, err, "NewCPUPowerMeter should not return error")
- assert.IsType(t, &raplPowerMeter{}, meter, "NewCPUPowerMeter should return a *cpuPowerMeter")
-}
-
-func TestCPUPowerMeter_Name(t *testing.T) {
- meter := &raplPowerMeter{}
- name := meter.Name()
- assert.Equal(t, "rapl", name, "Name() should return 'rapl'")
-}
-
-func TestCPUPowerMeter_Init(t *testing.T) {
- meter, err := NewCPUPowerMeter(validSysFSPath)
- assert.NoError(t, err, "NewCPUPowerMeter should not return an error")
-
- err = meter.Init()
- assert.NoError(t, err, "Start() should not return an error")
-}
-
-func TestCPUPowerMeter_Zones(t *testing.T) {
- meter := &raplPowerMeter{
- reader: sysfsRaplReader{fs: validSysFSFixtures(t)},
- logger: slog.Default().With("service", "rapl"),
- }
- zones, err := meter.Zones()
- assert.NoError(t, err, "Zones() should not return an error")
- assert.NotNil(t, zones, "Zones() should return a non-nil slice")
-
- names := make([]string, len(zones))
- for i, zone := range zones {
- names[i] = zone.Name()
- }
- assert.Contains(t, names, "package")
- assert.Contains(t, names, "core")
-}
-
-// TestSysFSRaplZoneInterface ensures that sysfsRaplZone properly implements the EnergyZone interface
-func TestSysFSRaplZoneInterface(t *testing.T) {
- pkg := sysfs.RaplZone{
- Name: "package",
- Index: 0,
- Path: "/sys/class/powercap/intel-rapl/intel-rapl:0",
- MaxMicrojoules: 1_000_000,
- }
-
- zone := sysfsRaplZone{zone: pkg}
-
- // Test that all interface methods return the expected values
- assert.Equal(t, 0, zone.Index())
- assert.Equal(t, "/sys/class/powercap/intel-rapl/intel-rapl:0", zone.Path())
- assert.Equal(t, "package", zone.Name())
- assert.Equal(t, 1.0, zone.MaxEnergy().Joules())
-}
-
-func TestSysFSRaplPowerMeterInit(t *testing.T) {
- rapl := raplPowerMeter{
- reader: sysfsRaplReader{fs: validSysFSFixtures(t)},
- logger: slog.Default().With("service", "rapl"),
- }
- err := rapl.Init()
- assert.NoError(t, err)
-}
-
-func TestSysFSRaplPowerMeterInitFail(t *testing.T) {
- rapl := raplPowerMeter{reader: sysfsRaplReader{fs: invalidSysFSFixtures(t)}}
- err := rapl.Init()
- assert.Error(t, err)
-}
-
-// TestSysFSRaplPowerMeter tests the sysfsRaplZone implementation using test fixtures
-func TestSysFSRaplPowerMeter(t *testing.T) {
- fs := validSysFSFixtures(t)
- actualZones, err := sysfs.GetRaplZones(fs)
- assert.NoError(t, err)
- assert.Equal(t, 4, len(actualZones), "Expected to find 4 zones in test fixtures")
-
- // realRaplReader should filter out non-standard zones
- rapl := raplPowerMeter{
- reader: sysfsRaplReader{fs: fs},
- logger: slog.Default().With("service", "rapl"),
- }
- zones, err := rapl.Zones()
-
- // Test that each zone implements the interface correctly
- assert.NoError(t, err)
- // With aggregation: two package zones become one AggregatedZone + one core zone = 2 total
- assert.Equal(t, 2, len(zones), "find 2 zones after aggregation (package + core)")
- assert.Equal(t, []string{"core", "package"}, sortedZoneNames(zones),
- "Expected to find aggregated zones in test fixtures")
-
- for _, zone := range zones {
- assert.NotEmpty(t, zone.Name(), "Zone name should not be empty")
- assert.NotEmpty(t, zone.Path(), "Zone path should not be empty")
- assert.GreaterOrEqual(t, zone.MaxEnergy(), 1000.0*Joule, "Max energy should not be negative")
-
- // Zone could be either sysfsRaplZone or AggregatedZone
- switch z := zone.(type) {
- case sysfsRaplZone:
- // Individual zone
- assert.NotNil(t, z)
- case *AggregatedZone:
- // Aggregated zone
- assert.NotNil(t, z)
- assert.Equal(t, -1, z.Index(), "AggregatedZone should have index -1")
- default:
- t.Fatalf("Unexpected zone type: %T", zone)
- }
-
- // Skip the original assertion since we now support both zone types
- _ = zone
-
- energy, err := zone.Energy()
- assert.NoError(t, err, zone.Path())
- assert.GreaterOrEqual(t, energy, 1000.0*Joule, "Energy should not be negative")
- }
-}
-
-func TestAggregatedZoneIntegration(t *testing.T) {
- // Test that RAPL reader creates AggregatedZone for multiple zones with same name
- mockReader := &mockSysFSReader{
- response: []EnergyZone{
- // Two package zones with same name but different indices and one core zone
- mockZone{name: "package", index: 0, path: "/intel-rapl:0", energy: 1000, maxEnergy: 100000},
- mockZone{name: "package", index: 1, path: "/intel-rapl:1", energy: 2000, maxEnergy: 100000},
- mockZone{name: "core", index: 0, path: "/intel-rapl:0:0", energy: 500, maxEnergy: 50000},
- },
- }
-
- rapl := &raplPowerMeter{
- reader: mockReader,
- logger: slog.Default(),
- }
-
- zones, err := rapl.Zones()
- require.NoError(t, err)
-
- // Should have 2 zones: 1 aggregated package zone + 1 core zone
- assert.Equal(t, 2, len(zones), "Expected 2 zones after aggregation")
-
- // Find the package zone - should be AggregatedZone
- var packageZone EnergyZone
- var coreZone EnergyZone
- for _, zone := range zones {
- if zone.Name() == "package" {
- packageZone = zone
- } else if zone.Name() == "core" { // Single zone keeps original name
- coreZone = zone
- }
- }
-
- // Verify package zone is aggregated
- require.NotNil(t, packageZone, "Package zone should exist")
- aggregated, isAggregated := packageZone.(*AggregatedZone)
- assert.True(t, isAggregated, "Package zone should be AggregatedZone")
- assert.Equal(t, "package", aggregated.Name())
- assert.Equal(t, -1, aggregated.Index())
- assert.Equal(t, Energy(200000), aggregated.MaxEnergy()) // Sum of both package zones
-
- // Verify core zone is not aggregated
- require.NotNil(t, coreZone, "Core zone should exist")
- _, isNotAggregated := coreZone.(mockZone)
- assert.True(t, isNotAggregated, "Core zone should remain as individual zone")
-
- // Test energy aggregation
- packageEnergy, err := packageZone.Energy()
- require.NoError(t, err)
- assert.Equal(t, Energy(3000), packageEnergy) // 1000 + 2000 from both package zones
-}
-
-type mockZone struct {
- name string
- index int
- path string
- energy Energy
- maxEnergy Energy
-}
-
-func (m mockZone) Name() string { return m.name }
-func (m mockZone) Index() int { return m.index }
-func (m mockZone) Path() string { return m.path }
-func (m mockZone) Energy() (Energy, error) { return m.energy, nil }
-func (m mockZone) MaxEnergy() Energy { return m.maxEnergy }
-
-type mockSysFSReader struct {
- response []EnergyZone
- err error
-}
-
-func (m *mockSysFSReader) Zones() ([]EnergyZone, error) {
- return m.response, m.err
-}
-
-// TestRAPLPowerMeterFromFixtures tests the realRaplReader with filtering using test fixtures
-func TestRAPLPowerMeterFromFixtures(t *testing.T) {
- fs := validSysFSFixtures(t)
-
- raplMeter := raplPowerMeter{
- reader: sysfsRaplReader{fs: fs},
- logger: slog.Default().With("service", "rapl"),
- }
- allZones, err := raplMeter.Zones()
- assert.NoError(t, err)
- assert.NotEmpty(t, allZones, "Expected to find RAPL zones in test fixtures")
-
- mmioZones := 0
- for _, zone := range allZones {
- if strings.Contains(zone.Path(), "mmio") {
- mmioZones++
- }
- }
- assert.Equal(t, mmioZones, 0, "all non-standard RAPL zones should be filtered")
-}
-
-// TestStandardRaplPath tests that standard paths are preferred over non-standard ones
-func TestStandardRaplPaths(t *testing.T) {
- tt := []struct {
- path string
- isStandard bool
- }{
- {"/sys/class/powercap/intel-rapl", false},
- {"/sys/class/powercap/intel-rapl-mmio", false},
- {"/sys/class/powercap/intel-rapl-mmio/intel-rapl-mmio:0", false},
- {"/sys/class/powercap/intel-rapl-mmio:0", false},
- {"/sys/class/powercap/intel-rapl/intel-rapl:0", true},
- {"/sys/class/powercap/intel-rapl:0", true},
- {"/sys/class/powercap/intel-rapl:0:0", true},
- {"/sys/class/powercap/intel-rapl:0:1", true},
- {"/sys/class/powercap/intel-rapl:1", true},
- }
-
- for _, test := range tt {
- assert.Equal(t, test.isStandard, isStandardRaplPath(test.path), test.path)
- }
-}
-
-type mockRaplReader struct {
- mock.Mock
-}
-
-func (m *mockRaplReader) Zones() ([]EnergyZone, error) {
- args := m.Called()
- return args.Get(0).([]EnergyZone), args.Error(1)
-}
-
-// TestStandardPathPreference tests that standard paths are preferred over non-standard ones
-func TestStandardPathPreference(t *testing.T) {
- // Create test zones with both standard and non-standard paths
- mmio := &MockRaplZone{
- name: "package",
- path: "/sys/class/powercap/intel-rapl-mmio/intel-rapl-mmio:0",
- index: 0,
- }
- stdPkg := &MockRaplZone{
- name: "package",
- path: "/sys/class/powercap/intel-rapl/intel-rapl:0",
- index: 0,
- }
- tt := []struct {
- zones []EnergyZone
- expected EnergyZone
- }{
- {[]EnergyZone{stdPkg}, stdPkg},
- {[]EnergyZone{mmio}, mmio},
- {[]EnergyZone{mmio, stdPkg}, stdPkg},
- {[]EnergyZone{stdPkg, mmio}, stdPkg},
- }
-
- for _, test := range tt {
- mockReader := &mockRaplReader{}
- mockReader.On("Zones").Return(test.zones, nil)
-
- rapl, err := NewCPUPowerMeter(validSysFSPath, WithSysFSReader(mockReader))
- assert.NoError(t, err)
-
- zones, err := rapl.Zones()
- assert.NoError(t, err)
-
- // We should have only one package zone
- assert.Equal(t, 1, len(zones), "Should have 1 zone after filtering mmio")
-
- // The package zone should be the standard path version
- pkg := zones[0]
- expected := test.expected
-
- // It should be the standard path version
- assert.Equal(t, "package", expected.Name())
- assert.Equal(t, pkg.Path(), expected.Path(),
- "Should prefer standard path over non-standard path")
-
- mockReader.AssertExpectations(t)
- }
-}
-
-// TestZoneCaching tests that zones are cached and called only once
-func TestZoneCaching(t *testing.T) {
- // Create test zones with both standard and non-standard paths
- pkg := &MockRaplZone{
- name: "package",
- path: "/sys/class/powercap/intel-rapl/intel-rapl:0",
- index: 0,
- }
- core := &MockRaplZone{
- name: "core",
- path: "/sys/class/powercap/intel-rapl/intel-rapl:0:0",
- index: 1,
- }
- raplZones := []EnergyZone{pkg, core}
-
- mockReader := &mockRaplReader{}
- mockReader.On("Zones").Return(raplZones, nil).Once()
-
- rapl, err := NewCPUPowerMeter(validSysFSPath, WithSysFSReader(mockReader))
- assert.NoError(t, err)
-
- // Get zones multiple times to test that "Zone" is called only once
- for range 3 {
- zones, err := rapl.Zones()
- assert.NoError(t, err)
- assert.Equal(t, 2, len(zones), "Should have both zones")
- }
-
- mockReader.AssertExpectations(t)
-}
-
-// TestZoneCaching_Error tests that zones are not cached when there is an error
-func TestZoneCaching_Error(t *testing.T) {
- mockReader := &mockRaplReader{}
- rapl, err := NewCPUPowerMeter(validSysFSPath, WithSysFSReader(mockReader))
-
- t.Run("Zone Read Error", func(t *testing.T) {
- mockReader.On("Zones").Return([]EnergyZone(nil), errors.New("error")).Once()
- assert.NoError(t, err)
- zones, err := rapl.Zones()
- assert.Error(t, err)
- assert.Nil(t, zones)
- mockReader.AssertExpectations(t)
- })
-
- // Create test zones with both standard and non-standard paths
- pkg := &MockRaplZone{
- name: "package",
- path: "/sys/class/powercap/intel-rapl/intel-rapl:0",
- index: 0,
- }
- core := &MockRaplZone{
- name: "core",
- path: "/sys/class/powercap/intel-rapl/intel-rapl:0:0",
- index: 1,
- }
- raplZones := []EnergyZone{pkg, core}
- t.Run("Zone Read Succeeds", func(t *testing.T) {
- mockReader.On("Zones").Return(raplZones, nil).Once()
- for range 3 {
- zones, err := rapl.Zones()
- assert.NoError(t, err)
- assert.Equal(t, 2, len(zones))
-
- }
- mockReader.AssertExpectations(t)
- })
-}
-
-// TestZone_None tests that zones error when none are found
-func TestZone_None(t *testing.T) {
- mockReader := &mockRaplReader{}
- rapl, err := NewCPUPowerMeter(validSysFSPath, WithSysFSReader(mockReader))
- assert.NoError(t, err)
-
- mockReader.On("Zones").Return([]EnergyZone(nil), nil).Once()
- zones, err := rapl.Zones()
- assert.Error(t, err)
- assert.Equal(t, 0, len(zones))
- mockReader.AssertExpectations(t)
-}
-
-// TestNewCPUPowerMeter_InvalidPath tests that NewCPUPowerMeter returns an error with an invalid sysfs path
-func TestNewCPUPowerMeter_InvalidPath(t *testing.T) {
- meter, err := NewCPUPowerMeter("/nonexistent/path")
- assert.Error(t, err, "Should return an error with an invalid path")
- assert.Nil(t, meter, "Should not return a meter with an invalid path")
-}
-
-// TestCPUPowerMeter_ZonesError tests that the Zones method correctly handles errors from the reader
-func TestCPUPowerMeter_ZonesError(t *testing.T) {
- mockReader := &mockRaplReader{}
- expectedErr := errors.New("error")
- mockReader.On("Zones").Return([]EnergyZone{}, expectedErr)
-
- meter := &raplPowerMeter{reader: mockReader}
- zones, err := meter.Zones()
-
- assert.Error(t, err, "Should return an error when the reader fails")
- assert.Equal(t, expectedErr, err, "Should return the error from the reader")
- assert.Nil(t, zones, "Should return nil zones when there's an error")
- mockReader.AssertExpectations(t)
-}
-
-// TestCPUPowerMeter_NoZones tests that Zones returns an error when no zones are found
-func TestCPUPowerMeter_NoZones(t *testing.T) {
- mockReader := &mockRaplReader{}
- mockReader.On("Zones").Return([]EnergyZone{}, nil)
-
- meter := &raplPowerMeter{reader: mockReader}
- zones, err := meter.Zones()
-
- assert.Error(t, err, "Should return an error when no zones are found")
- assert.Equal(t, "no RAPL zones found", err.Error(), "Should return a specific error message")
- assert.Nil(t, zones, "Should return nil zones when no zones are found")
- mockReader.AssertExpectations(t)
-}
-
-// TestCPUPowerMeter_InitNoZones tests that Start returns an error when no zones are found
-func TestCPUPowerMeter_InitNoZones(t *testing.T) {
- mockReader := &mockRaplReader{}
- mockReader.On("Zones").Return([]EnergyZone{}, nil)
-
- meter := &raplPowerMeter{reader: mockReader}
- err := meter.Init()
-
- assert.Error(t, err, "Start() should return an error when no zones are found")
- assert.Equal(t, "no RAPL zones found", err.Error(), "Start() should return a specific error message")
- mockReader.AssertExpectations(t)
-}
-
-// TestPrimaryEnergyZone tests the PrimaryEnergyZone method
-func TestPrimaryEnergyZone(t *testing.T) {
- t.Run("Priority hierarchy", func(t *testing.T) {
- tests := []struct {
- name string
- zones []EnergyZone
- expected string
- }{{
- name: "psys has highest priority",
- zones: []EnergyZone{
- mockZone{name: "package", index: 0},
- mockZone{name: "psys", index: 0},
- mockZone{name: "core", index: 0},
- },
- expected: "psys",
- }, {
- name: "package has priority over core",
- zones: []EnergyZone{
- mockZone{name: "core", index: 0},
- mockZone{name: "package", index: 0},
- mockZone{name: "dram", index: 0},
- },
- expected: "package",
- }, {
- name: "core has priority over dram",
- zones: []EnergyZone{
- mockZone{name: "dram", index: 0},
- mockZone{name: "core", index: 0},
- mockZone{name: "uncore", index: 0},
- },
- expected: "core",
- }, {
- name: "dram has priority over uncore",
- zones: []EnergyZone{
- mockZone{name: "uncore", index: 0},
- mockZone{name: "dram", index: 0},
- },
- expected: "dram",
- }}
-
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- mockReader := &mockRaplReader{}
- mockReader.On("Zones").Return(tt.zones, nil)
-
- meter := &raplPowerMeter{reader: mockReader, logger: slog.Default()}
- zone, err := meter.PrimaryEnergyZone()
-
- assert.NoError(t, err)
- assert.Equal(t, tt.expected, zone.Name())
- mockReader.AssertExpectations(t)
- })
- }
- })
-
- t.Run("Case insensitive matching", func(t *testing.T) {
- mockReader := &mockRaplReader{}
- mockReader.On("Zones").Return([]EnergyZone{
- mockZone{name: "PACKAGE", index: 0},
- mockZone{name: "Core", index: 0},
- }, nil)
-
- meter := &raplPowerMeter{reader: mockReader, logger: slog.Default()}
- zone, err := meter.PrimaryEnergyZone()
-
- assert.NoError(t, err)
- assert.Equal(t, "PACKAGE", zone.Name())
- mockReader.AssertExpectations(t)
- })
-
- t.Run("Fallback to first zone", func(t *testing.T) {
- zones := []EnergyZone{
- mockZone{name: "unknown1", index: 0},
- mockZone{name: "unknown2", index: 1},
- }
- mockReader := &mockRaplReader{}
- mockReader.On("Zones").Return(zones, nil)
-
- meter := &raplPowerMeter{reader: mockReader, logger: slog.Default()}
- zone, err := meter.PrimaryEnergyZone()
-
- assert.NoError(t, err)
- // NOTE: since reader.Zones() does not guarantee the order after filtering,
- // we cannot assert zone.Name() == "unknown1", thus assert the zone returned
- // any of the zones passed as input
- zoneName := zone.Name()
- assert.Contains(t, []string{"unknown1", "unknown2"}, zoneName)
- mockReader.AssertExpectations(t)
- })
-
- t.Run("Caching behavior", func(t *testing.T) {
- mockReader := &mockRaplReader{}
- mockReader.On("Zones").Return([]EnergyZone{
- mockZone{name: "package", index: 0},
- }, nil).Once()
-
- meter := &raplPowerMeter{reader: mockReader, logger: slog.Default()}
-
- // First call should read from zones and cache topZone
- zone1, err := meter.PrimaryEnergyZone()
- assert.NoError(t, err)
- assert.Equal(t, "package", zone1.Name())
-
- // Second call should use cached topZone directly
- zone2, err := meter.PrimaryEnergyZone()
- assert.NoError(t, err)
- assert.Equal(t, "package", zone2.Name())
-
- mockReader.AssertExpectations(t)
- })
-
- t.Run("Error handling", func(t *testing.T) {
- t.Run("Zones() returns error", func(t *testing.T) {
- mockReader := &mockRaplReader{}
- mockReader.On("Zones").Return([]EnergyZone{}, errors.New("zones error"))
-
- meter := &raplPowerMeter{reader: mockReader, logger: slog.Default()}
- zone, err := meter.PrimaryEnergyZone()
-
- assert.Error(t, err)
- assert.Nil(t, zone)
- assert.Contains(t, err.Error(), "zones error")
- mockReader.AssertExpectations(t)
- })
-
- t.Run("Empty zones list", func(t *testing.T) {
- mockReader := &mockRaplReader{}
- mockReader.On("Zones").Return([]EnergyZone{}, nil)
-
- meter := &raplPowerMeter{reader: mockReader, logger: slog.Default()}
- zone, err := meter.PrimaryEnergyZone()
-
- assert.Error(t, err)
- assert.Nil(t, zone)
- assert.Contains(t, err.Error(), "no RAPL zones found")
- mockReader.AssertExpectations(t)
- })
- })
-}
diff --git a/internal/device/rapl_zone_filtering_test.go b/internal/device/rapl_zone_filtering_test.go
deleted file mode 100644
index 773f08d6ee..0000000000
--- a/internal/device/rapl_zone_filtering_test.go
+++ /dev/null
@@ -1,257 +0,0 @@
-// SPDX-FileCopyrightText: 2025 The Kepler Authors
-// SPDX-License-Identifier: Apache-2.0
-
-package device
-
-import (
- "log/slog"
- "testing"
-
- "github.com/stretchr/testify/assert"
-)
-
-func TestRaplZoneFiltering(t *testing.T) {
- // Create mock zones for testing
- packageZone := &MockRaplZone{
- name: "package",
- path: "/sys/class/powercap/intel-rapl/intel-rapl:0",
- index: 0,
- }
- coreZone := &MockRaplZone{
- name: "core",
- path: "/sys/class/powercap/intel-rapl/intel-rapl:0:0",
- index: 1,
- }
- dramZone := &MockRaplZone{
- name: "dram",
- path: "/sys/class/powercap/intel-rapl/intel-rapl:0:2",
- index: 2,
- }
- uncoreZone := &MockRaplZone{
- name: "uncore",
- path: "/sys/class/powercap/intel-rapl/intel-rapl:0:3",
- index: 3,
- }
-
- allZones := []EnergyZone{packageZone, coreZone, dramZone, uncoreZone}
-
- tests := []struct {
- name string
- filterZones []string
- expectedZones []string
- }{
- {
- name: "No filter - all zones included",
- filterZones: []string{},
- expectedZones: []string{"package", "core", "dram", "uncore"},
- },
- {
- name: "Filter single zone",
- filterZones: []string{"core"},
- expectedZones: []string{"core"},
- },
- {
- name: "Filter multiple zones",
- filterZones: []string{"package", "dram"},
- expectedZones: []string{"package", "dram"},
- },
- {
- name: "Case-insensitive filtering",
- filterZones: []string{"PACKAGE", "Core"},
- expectedZones: []string{"package", "core"},
- },
- {
- name: "Non-existent zone in filter",
- filterZones: []string{"package", "nonexistent"},
- expectedZones: []string{"package"},
- },
- }
-
- for _, tc := range tests {
- t.Run(tc.name, func(t *testing.T) {
- mockReader := &mockRaplReader{}
- mockReader.On("Zones").Return(allZones, nil)
-
- logger := slog.Default().With("test", "zone-filtering")
- meter := &raplPowerMeter{
- reader: mockReader,
- logger: logger,
- zoneFilter: tc.filterZones,
- }
-
- // Filter zones directly to test the filtering logic
- filteredZones := meter.filterZones(allZones)
-
- // Verify only expected zones are included
- assert.Equal(t, len(tc.expectedZones), len(filteredZones),
- "Filtered zones length mismatch")
-
- // Create a map of zone names for easy checking
- zoneNames := make(map[string]bool)
- for _, zone := range filteredZones {
- zoneNames[zone.Name()] = true
- }
-
- // Verify each expected zone is present
- for _, name := range tc.expectedZones {
- assert.True(t, zoneNames[name],
- "Expected zone %s not found in filtered zones", name)
- }
- })
- }
-}
-
-// Test that zone filtering applies during Init
-func TestRaplZoneFiltering_Init(t *testing.T) {
- packageZone := &MockRaplZone{
- name: "package",
- path: "/sys/class/powercap/intel-rapl/intel-rapl:0",
- index: 0,
- maxMicroJoules: 1000000,
- energy: 100000,
- }
- coreZone := &MockRaplZone{
- name: "core",
- path: "/sys/class/powercap/intel-rapl/intel-rapl:0:0",
- index: 1,
- maxMicroJoules: 1000000,
- energy: 50000,
- }
-
- allZones := []EnergyZone{packageZone, coreZone}
-
- t.Run("Init succeeds with valid filter", func(t *testing.T) {
- mockReader := &mockRaplReader{}
- mockReader.On("Zones").Return(allZones, nil)
-
- meter := &raplPowerMeter{
- reader: mockReader,
- logger: slog.Default(),
- zoneFilter: []string{"package"},
- }
-
- err := meter.Init()
- assert.NoError(t, err)
- })
-
- t.Run("Init does not fails with unknown zones", func(t *testing.T) {
- mockReader := &mockRaplReader{}
- mockReader.On("Zones").Return(allZones, nil)
-
- meter := &raplPowerMeter{
- reader: mockReader,
- logger: slog.Default(),
- zoneFilter: []string{"nonexistent"},
- }
-
- err := meter.Init()
- assert.NoError(t, err)
- })
-}
-
-// Test that Zones() properly applies the filter
-func TestRaplZoneFiltering_Zones(t *testing.T) {
- packageZone := &MockRaplZone{
- name: "package",
- path: "/sys/class/powercap/intel-rapl/intel-rapl:0",
- index: 0,
- maxMicroJoules: 1000000,
- energy: 100000,
- }
- coreZone := &MockRaplZone{
- name: "core",
- path: "/sys/class/powercap/intel-rapl/intel-rapl:0:0",
- index: 1,
- maxMicroJoules: 1000000,
- energy: 50000,
- }
-
- allZones := []EnergyZone{packageZone, coreZone}
-
- tests := []struct {
- name string
- filter []string
- expectedZones int
- expectError bool
- }{
- {
- name: "No filter",
- filter: []string{},
- expectedZones: 2,
- expectError: false,
- }, {
- name: "Filter package",
- filter: []string{"package"},
- expectedZones: 1,
- expectError: false,
- }, {
- name: "Filter core",
- filter: []string{"core"},
- expectedZones: 1,
- expectError: false,
- }, {
- name: "nonexistent zone",
- filter: []string{"nonexistent"},
- expectedZones: 0,
- expectError: true,
- },
- }
-
- for _, tc := range tests {
- t.Run(tc.name, func(t *testing.T) {
- mockReader := &mockRaplReader{}
- mockReader.On("Zones").Return(allZones, nil)
-
- meter := &raplPowerMeter{
- reader: mockReader,
- logger: slog.Default(),
- zoneFilter: tc.filter,
- }
-
- zones, err := meter.Zones()
-
- if tc.expectError {
- assert.Error(t, err)
- assert.Nil(t, zones)
- } else {
- assert.NoError(t, err)
- assert.Equal(t, tc.expectedZones, len(zones))
- }
- })
- }
-}
-
-// Test integration with the configuration options
-func TestRaplZoneFiltering_WithOptions(t *testing.T) {
- // Mock sysfs reader for this test
- mockReader := &mockRaplReader{}
- packageZone := &MockRaplZone{
- name: "package",
- path: "/sys/class/powercap/intel-rapl/intel-rapl:0",
- index: 0,
- maxMicroJoules: 1000000,
- energy: 100000,
- }
- coreZone := &MockRaplZone{
- name: "core",
- path: "/sys/class/powercap/intel-rapl/intel-rapl:0:0",
- index: 1,
- maxMicroJoules: 1000000,
- energy: 50000,
- }
- mockReader.On("Zones").Return([]EnergyZone{packageZone, coreZone}, nil)
-
- // Create meter with WithZoneFilter option
- meter, err := NewCPUPowerMeter(
- validSysFSPath,
- WithSysFSReader(mockReader),
- WithZoneFilter([]string{"core"}),
- )
- assert.NoError(t, err)
-
- // Check that filtering was applied
- zones, err := meter.Zones()
- assert.NoError(t, err)
- assert.Equal(t, 1, len(zones))
- assert.Equal(t, "core", zones[0].Name())
-}
diff --git a/manifests/helm/kepler/values.yaml b/manifests/helm/kepler/values.yaml
index d4d9ec1869..5f1892abff 100644
--- a/manifests/helm/kepler/values.yaml
+++ b/manifests/helm/kepler/values.yaml
@@ -75,6 +75,10 @@ config:
minTerminatedEnergyThreshold: 10
rapl:
zones: []
+ msr:
+ enabled: false
+ force: false
+ devicePath: /dev/cpu/%d/msr
exporter:
stdout:
enabled: false
diff --git a/manifests/k8s/configmap.yaml b/manifests/k8s/configmap.yaml
index 700ec58cdb..83e854d628 100644
--- a/manifests/k8s/configmap.yaml
+++ b/manifests/k8s/configmap.yaml
@@ -21,6 +21,10 @@ data:
minTerminatedEnergyThreshold: 10
rapl:
zones: []
+ msr:
+ enabled: false
+ force: false
+ devicePath: "/dev/cpu/%d/msr"
exporter:
stdout:
enabled: false