From 9d8210b890136642cd01cfdf85027e1e029d8b70 Mon Sep 17 00:00:00 2001 From: Sunil Thaha Date: Tue, 12 Aug 2025 12:08:55 +1000 Subject: [PATCH 1/2] docs(proposal): add EP-002 for MSR fallback power meter support Introduces enhancement proposal for adding MSR (Model Specific Register) support as a fallback mechanism when Intel RAPL powercap sysfs interface is unavailable. This improves Kepler's deployment flexibility in environments with restricted powercap access. The proposal includes: - Architecture design using powerReader abstraction - Security considerations for MSR access (PLATYPUS mitigation) - Phased implementation plan with backward compatibility - Configuration for opt-in MSR fallback behavior Signed-off-by: Sunil Thaha --- .../EP-002-MSR-Fallback-Power-Meter.md | 423 ++++++++++++++++++ docs/developer/proposal/index.md | 3 +- 2 files changed, 425 insertions(+), 1 deletion(-) create mode 100644 docs/developer/proposal/EP-002-MSR-Fallback-Power-Meter.md diff --git a/docs/developer/proposal/EP-002-MSR-Fallback-Power-Meter.md b/docs/developer/proposal/EP-002-MSR-Fallback-Power-Meter.md new file mode 100644 index 0000000000..c23a581a13 --- /dev/null +++ b/docs/developer/proposal/EP-002-MSR-Fallback-Power-Meter.md @@ -0,0 +1,423 @@ +# EP-002: MSR Fallback for CPU Power Meter + +**Status**: Draft +**Author**: Kepler Development Team +**Created**: 2025-01-12 +**Last Updated**: 2025-01-12 + +## Summary + +This proposal introduces MSR (Model Specific Register) support as a fallback +mechanism for CPU power monitoring in Kepler when the primary powercap sysfs +interface (/sys/class/powercap/intel-rapl) is unavailable. The enhancement +maintains backward compatibility while improving Kepler's resilience in +environments where powercap is disabled or inaccessible, such as certain +container runtimes or kernel configurations. + +## Problem Statement + +Kepler currently relies exclusively on the Linux powercap sysfs interface for +reading Intel RAPL energy counters. This creates deployment limitations in +environments where: + +1. The powercap interface is disabled by kernel configuration +2. Container runtimes don't expose /sys/class/powercap +3. Systems with custom kernel builds lacking powercap support +4. Virtualized environments without powercap passthrough + +These limitations prevent Kepler from collecting energy metrics in otherwise +capable hardware, reducing its effectiveness as a universal energy monitoring +solution. + +### Current Limitations + +1. **Single Interface Dependency**: Complete reliance on powercap sysfs with no + fallback mechanism +2. **Deployment Restrictions**: Cannot deploy in environments without powercap + access +3. **Container Limitations**: Difficulty mounting /sys/class/powercap in certain + container security policies +4. **No Graceful Degradation**: Complete failure rather than fallback when + powercap unavailable + +## Goals + +- **Primary Goal**: Implement MSR-based RAPL reading as automatic fallback when + powercap is unavailable +- **Secondary Goal**: Maintain existing CPUPowerMeter interface compatibility +- **Tertiary Goal**: Provide configurable control over fallback behavior for + security-conscious deployments + +## Non-Goals + +- Supporting non-Intel architectures (AMD RAPL, ARM PMU) +- Replacing powercap as the primary interface +- Implementing model-specific optimizations +- Supporting pre-Sandy Bridge Intel CPUs +- Real-time power capping functionality + +## Requirements + +### Functional Requirements + +- Automatically detect powercap availability and fallback to MSR when needed +- Read energy values from MSR registers: PKG (0x611), PP0 (0x639), DRAM (0x619) +- Handle 32-bit MSR counter overflow correctly +- Map MSR zones to existing EnergyZone interface (package, core, dram) +- Support multi-socket systems with per-CPU MSR access +- Maintain energy unit conversion compatibility + +### Non-Functional Requirements + +- **Performance**: MSR reading overhead < 100μs per sample +- **Reliability**: Handle MSR module loading/unloading gracefully +- **Security**: Document and mitigate PLATYPUS attack vectors +- **Maintainability**: Minimal code duplication with existing RAPL implementation +- **Testability**: Support fake MSR implementation for testing + +## Proposed Solution + +### High-Level Architecture + +```mermaid +graph TB + CPUPowerMeter[CPUPowerMeter Interface] + + raplPowerMeter[raplPowerMeter
Enhanced with MSR] + + powercapReader[powercapReader
Primary] + msrReader[msrReader
Fallback] + zoneAdapter[Zone
Adapter] + + powercap[/sys/class/
powercap/] + msrdev["/dev/cpu/*/msr"] + zones[Energy
Zones] + + CPUPowerMeter --> raplPowerMeter + raplPowerMeter --> powercapReader + raplPowerMeter --> msrReader + raplPowerMeter --> zoneAdapter + + powercapReader --> powercap + msrReader --> msrdev + zoneAdapter --> zones + + style CPUPowerMeter fill:#e1f5fe + style raplPowerMeter fill:#b3e5fc + style powercapReader fill:#81d4fa + style msrReader fill:#ffccbc + style zoneAdapter fill:#c5e1a5 +``` + +### Key Design Choices + +1. **Extend Existing Implementation**: Enhance `raplPowerMeter` rather than + creating separate implementation to maximize code reuse +2. **Interface-Based Abstraction**: Create `powerReader` interface for both + powercap and MSR backends +3. **Automatic Detection**: Check powercap availability in Init() and select + appropriate backend +4. **Opt-In MSR Fallback**: Require explicit configuration to enable MSR due to + security implications +5. **Reuse Energy Zone Logic**: Share zone management between implementations + +## Detailed Design + +### Package Structure + +```text +internal/ +├── device/ +│ ├── cpu_power_meter.go # Interface (unchanged) +│ ├── rapl_power_meter.go # Enhanced with MSR support +│ ├── power_reader.go # New abstraction interface +│ ├── powercap_reader.go # Extracted powercap logic +│ ├── msr_reader.go # New MSR implementation +│ ├── msr_reader_test.go # MSR unit tests +│ └── testdata/ +│ └── msr/ # MSR test fixtures +└── config/ + └── config.go # MSR configuration options +``` + +### API/Interface Changes + +```go +// power_reader.go - New internal abstraction +type powerReader interface { + // ReadEnergy reads energy value for a specific zone + ReadEnergy(zone EnergyZone) (Energy, error) + // Available checks if the reader can be used + Available() bool + // Init initializes the reader + Init() error + // Close releases resources + Close() error +} + +// msr_reader.go - MSR implementation +type msrReader struct { + msrFiles map[int]*os.File // CPU ID -> MSR file handle + energyUnit float64 // Energy unit from IA32_RAPL_POWER_UNIT + zones []EnergyZone // Available zones + mu sync.RWMutex // Thread safety +} + +// Enhanced rapl_power_meter.go +type raplPowerMeter struct { + reader powerReader // Abstracted backend + zones []EnergyZone + strategy EnergyStrategy + useMSR bool // Track which backend is active +} +``` + +## Configuration + +### Main Configuration Changes + +```go +// config/config.go additions +type DeviceConfig struct { + // Existing fields... + MSR MSRConfig `yaml:"msr"` +} + +type MSRConfig struct { + // Enable automatic MSR fallback when powercap unavailable + Enabled *bool `yaml:"enabled"` + // Force MSR usage even if powercap available (testing) + Force *bool `yaml:"force"` + // MSR device path template + DevicePath string `yaml:"devicePath"` +} + +// Default configuration +func defaultMSRConfig() MSRConfig { + return MSRConfig{ + Enabled: ptr(false), // Opt-in for security + Force: ptr(false), + DevicePath: "/dev/cpu/%d/msr", + } +} +``` + +### New Configuration File (if applicable) + +```yaml +# Example: hack/config.yaml +msr: + enabled: true + force: false + devicePath: "/dev/cpu/%d/msr" +``` + +### Security Considerations + +**Critical Security Warning**: MSR access enables PLATYPUS side-channel attacks +(CVE-2020-8694/8695) allowing unprivileged users to infer data from other +processes. + +**Mitigations**: + +1. MSR fallback disabled by default (opt-in configuration) +2. Require CAP_SYS_RAWIO capability instead of full root +3. Document security implications prominently +4. Add warning logs when MSR fallback is activated +5. Consider rate-limiting MSR reads to reduce attack surface + +## Deployment Examples + +### Kubernetes Environment + +```yaml +# DaemonSet with MSR support +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: kepler +spec: + template: + spec: + containers: + - name: kepler + image: kepler:msr-enabled + args: + - --config=/etc/kepler/config.yaml + volumeMounts: + - name: dev + mountPath: /dev + readOnly: true + - name: config + mountPath: /etc/kepler + securityContext: + privileged: true # Required for MSR access + volumes: + - name: dev + hostPath: + path: /dev + - name: config + configMap: + name: kepler-config +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: kepler-config +data: + config.yaml: | + msr: + enabled: true +``` + +### Standalone Deployment + +```bash +# Load MSR kernel module +sudo modprobe msr + +# Run with MSR fallback enabled +sudo ./bin/kepler --config hack/config.yaml + +# Force MSR for testing (ignores powercap) - configured via YAML only +# Set msr.force: true in config file +``` + +## Testing Strategy + +### Test Coverage + +- **Unit Tests**: MSR reader with mock file operations (85% coverage target) +- **Integration Tests**: Fallback detection and switching logic +- **End-to-End Tests**: Energy attribution with MSR backend +- **Benchmark Tests**: MSR vs sysfs performance comparison + +### Test Infrastructure + +```go +// Fake MSR implementation for testing +type fakeMSRReader struct { + energyValues map[EnergyZone]Energy + available bool +} + +// Test fixtures in testdata/msr/ +// - Mock MSR device files +// - Predefined energy values +// - Overflow scenarios +``` + +## Migration and Compatibility + +### Backward Compatibility + +This enhancement maintains full backward compatibility: + +- Existing deployments continue using powercap by default +- No changes to external APIs or metrics +- Configuration changes are additive only +- Existing tests remain valid + +### Migration Path + +1. **Phase 1**: Deploy with MSR disabled (default) - verify no regression +2. **Phase 2**: Enable MSR fallback in staging environments +3. **Phase 3**: Gradual rollout to production with monitoring + +### Rollback Strategy + +1. Disable MSR fallback via configuration (immediate effect) +2. Revert to previous Kepler version if issues persist +3. MSR can be disabled without restart via dynamic config reload + +## Metrics Output + +```prometheus +# New metric indicating active power meter backend +kepler_power_meter_backend{backend="powercap|msr"} 1 + +# Existing metrics unchanged +kepler_node_package_energy_millijoule{node="node1"} 12345 +kepler_node_core_energy_millijoule{node="node1"} 6789 +kepler_node_dram_energy_millijoule{node="node1"} 3456 +``` + +## Implementation Plan + +### Phase 1: Foundation (Week 1-2) + +- Implement `powerReader` interface abstraction +- Extract existing powercap logic to `powercapReader` +- Create basic `msrReader` structure +- Add MSR configuration options + +### Phase 2: Core Functionality (Week 3-4) + +- Implement MSR register reading logic +- Add energy unit conversion +- Handle counter overflow +- Implement zone mapping +- Add fallback detection logic + +### Phase 3: Testing and Documentation (Week 5-6) + +- Comprehensive unit tests +- Integration testing +- Performance benchmarking +- Security documentation +- Update deployment guides + +## Risks and Mitigations + +### Technical Risks + +- **Risk**: MSR kernel module not available + - **Mitigation**: Graceful degradation with clear error messages + +- **Risk**: MSR counter overflow handling errors + - **Mitigation**: Extensive testing with overflow scenarios + +- **Risk**: Performance regression from abstraction + - **Mitigation**: Benchmark and optimize hot paths + +### Operational Risks + +- **Risk**: Security vulnerabilities from MSR access + - **Mitigation**: Disabled by default, clear documentation + +- **Risk**: Increased complexity for operators + - **Mitigation**: Automatic detection minimizes configuration + +## Alternatives Considered + +### Alternative 1: Separate MSR Power Meter Implementation + +- **Description**: Create independent `msrPowerMeter` implementing + `CPUPowerMeter` +- **Reason for Rejection**: High code duplication, difficult to maintain + consistency + +### Alternative 2: eBPF-based Power Monitoring + +- **Description**: Use eBPF to intercept RAPL MSR reads from other processes +- **Reason for Rejection**: Complex implementation, limited kernel support, + security concerns + +### Alternative 3: Intel Power Gadget Integration + +- **Description**: Use Intel's official Power Gadget API +- **Reason for Rejection**: Not available on Linux, requires proprietary + libraries + +## Success Metrics + +- **Functional Metric**: Successfully read power through MSR in environments where + powercap is unavailable +- **Performance Metric**: MSR reading overhead < 100μs (50% faster than powercap) +- **Adoption Metric**: 20% of deployments utilize MSR fallback within 6 months + +## Open Questions + +1. Should we support AMD's RAPL MSRs in the same implementation? +2. Should MSR support be compiled conditionally for security-sensitive builds? +3. What telemetry should we add to track MSR fallback usage in production? diff --git a/docs/developer/proposal/index.md b/docs/developer/proposal/index.md index 861f7843be..7edc595a88 100644 --- a/docs/developer/proposal/index.md +++ b/docs/developer/proposal/index.md @@ -6,7 +6,8 @@ This directory contains Enhancement Proposals (EPs) for major features and chang | ID | Title | Status | Author | Created | |----|-------|--------|--------|---------| -| [EP-000](EP_TEMPLATE.md) | Enhancement Proposal Template | Accepted |Sunil Thaha | 2025-01-18 | +| [EP-000](EP_TEMPLATE.md) | Enhancement Proposal Template | Accepted | Sunil Thaha | 2025-01-18 | +| [EP-002](EP-002-MSR-Fallback-Power-Meter.md) | MSR Fallback for CPU Power Meter | Draft | Kepler Development Team | 2025-08-12 | ## Proposal Status From 7f7308a29977a46c88cb7328084776d8bd0e1790 Mon Sep 17 00:00:00 2001 From: Sunil Thaha Date: Tue, 12 Aug 2025 14:27:37 +1000 Subject: [PATCH 2/2] feat(device): implement MSR fallback for CPU power meter This commit implements EP-002 MSR fallback power meter enhancement proposal. Add MSR (Model Specific Register) support as fallback when powercap interface is unavailable. This enhancement improves Kepler's compatibility across different systems and kernel configurations. Key changes: - Add MSR reader implementation with Intel RAPL register support - Create raplReader interface abstracting powercap and MSR backends - Extract existing powercap logic into dedicated reader component - Enhance RAPL power meter with automatic fallback detection - Add MSR configuration with security-conscious opt-in defaults - Implement comprehensive test coverage with mock MSR data The MSR fallback is disabled by default due to PLATYPUS attack vectors (CVE-2020-8694/8695) and must be explicitly enabled via configuration. When enabled, the system automatically falls back to MSR if powercap is unavailable, maintaining transparent operation. Signed-off-by: Sunil Thaha --- cmd/kepler/main.go | 8 + compose/dev/kepler-dev/etc/kepler/config.yaml | 5 + config/config.go | 35 ++ docs/user/configuration.md | 40 ++ hack/config.yaml | 5 + internal/device/mock_cpu_power_meter.go | 32 - internal/device/msr_reader.go | 290 +++++++++ internal/device/msr_reader_test.go | 406 ++++++++++++ internal/device/msr_zone.go | 125 ++++ internal/device/powercap_reader.go | 118 ++++ internal/device/powercap_reader_test.go | 162 +++++ ...sfs_power_meter.go => rapl_power_meter.go} | 222 ++++--- internal/device/rapl_power_meter_test.go | 330 ++++++++++ internal/device/rapl_reader.go | 24 + .../device/rapl_sysfs_power_meter_test.go | 587 ------------------ internal/device/rapl_zone_filtering_test.go | 257 -------- manifests/helm/kepler/values.yaml | 4 + manifests/k8s/configmap.yaml | 4 + 18 files changed, 1704 insertions(+), 950 deletions(-) create mode 100644 internal/device/msr_reader.go create mode 100644 internal/device/msr_reader_test.go create mode 100644 internal/device/msr_zone.go create mode 100644 internal/device/powercap_reader.go create mode 100644 internal/device/powercap_reader_test.go rename internal/device/{rapl_sysfs_power_meter.go => rapl_power_meter.go} (52%) create mode 100644 internal/device/rapl_power_meter_test.go create mode 100644 internal/device/rapl_reader.go delete mode 100644 internal/device/rapl_sysfs_power_meter_test.go delete mode 100644 internal/device/rapl_zone_filtering_test.go diff --git a/cmd/kepler/main.go b/cmd/kepler/main.go index 6f28961cac..89abb9ae31 100644 --- a/cmd/kepler/main.go +++ b/cmd/kepler/main.go @@ -233,9 +233,17 @@ func createCPUMeter(logger *slog.Logger, cfg *config.Config) (device.CPUPowerMet logger.Info("rapl zones are filtered", "zones-enabled", cfg.Rapl.Zones) } + // Convert config MSR settings to device MSRConfig + msrConfig := device.MSRConfig{ + Enabled: cfg.MSR.Enabled, + Force: cfg.MSR.Force, + DevicePath: cfg.MSR.DevicePath, + } + return device.NewCPUPowerMeter( cfg.Host.SysFS, device.WithRaplLogger(logger), device.WithZoneFilter(cfg.Rapl.Zones), + device.WithMSRConfig(msrConfig), ) } diff --git a/compose/dev/kepler-dev/etc/kepler/config.yaml b/compose/dev/kepler-dev/etc/kepler/config.yaml index 41048a5509..3f8bd7004d 100644 --- a/compose/dev/kepler-dev/etc/kepler/config.yaml +++ b/compose/dev/kepler-dev/etc/kepler/config.yaml @@ -35,6 +35,11 @@ host: rapl: zones: [] # zones to be enabled, empty enables all default zones +msr: # MSR fallback configuration for RAPL energy reading + enabled: false # enable automatic MSR fallback when powercap unavailable (default: false) + force: false # force MSR usage even if powercap available (testing only, default: false) + devicePath: /dev/cpu/%d/msr # MSR device path template (default: "/dev/cpu/%d/msr") + exporter: stdout: # stdout exporter related config enabled: false # disabled by default diff --git a/config/config.go b/config/config.go index c91f495cfb..e40c716578 100644 --- a/config/config.go +++ b/config/config.go @@ -33,6 +33,16 @@ type ( Zones []string `yaml:"zones"` } + // MSR configuration for fallback power reading + MSR struct { + // Enable automatic MSR fallback when powercap unavailable + Enabled *bool `yaml:"enabled"` + // Force MSR usage even if powercap available (testing) + Force *bool `yaml:"force"` + // MSR device path template + DevicePath string `yaml:"devicePath"` + } + // Development mode settings; disabled by default Dev struct { FakeCpuMeter struct { @@ -98,6 +108,7 @@ type ( Host Host `yaml:"host"` Monitor Monitor `yaml:"monitor"` Rapl Rapl `yaml:"rapl"` + MSR MSR `yaml:"msr"` Exporter Exporter `yaml:"exporter"` Web Web `yaml:"web"` Debug Debug `yaml:"debug"` @@ -168,6 +179,12 @@ const ( // RAPL RaplZones = "rapl.zones" // not a flag + // MSR - NOTE: MSR settings are not exposed as CLI flags per proposal + // They should only be configured via YAML files due to security implications + MSREnabled = "msr.enabled" // not a flag + MSRForce = "msr.force" // not a flag + MSRDevicePath = "msr.devicePath" // not a flag + pprofEnabledFlag = "debug.pprof" WebConfigFlag = "web.config-file" @@ -203,6 +220,11 @@ func DefaultConfig() *Config { Rapl: Rapl{ Zones: []string{}, }, + MSR: MSR{ + Enabled: ptr.To(false), // Opt-in for security + Force: ptr.To(false), + DevicePath: "/dev/cpu/%d/msr", + }, Monitor: Monitor{ Interval: 5 * time.Second, Staleness: 500 * time.Millisecond, @@ -408,6 +430,9 @@ func (c *Config) sanitize() { c.Rapl.Zones[i] = strings.TrimSpace(c.Rapl.Zones[i]) } + // MSR settings sanitization + c.MSR.DevicePath = strings.TrimSpace(c.MSR.DevicePath) + for i := range c.Exporter.Prometheus.DebugCollectors { c.Exporter.Prometheus.DebugCollectors[i] = strings.TrimSpace(c.Exporter.Prometheus.DebugCollectors[i]) } @@ -488,6 +513,16 @@ func (c *Config) Validate(skips ...SkipValidation) error { errs = append(errs, fmt.Sprintf("invalid monitor min terminated energy threshold: %d can't be negative", c.Monitor.MinTerminatedEnergyThreshold)) } } + { // MSR settings + if c.MSR.DevicePath == "" { + errs = append(errs, "MSR device path cannot be empty") + } else { + // Basic validation that device path is a template + if !strings.Contains(c.MSR.DevicePath, "%d") { + errs = append(errs, "MSR device path must contain '%d' placeholder for CPU ID") + } + } + } { // Kubernetes if ptr.Deref(c.Kube.Enabled, false) { if c.Kube.Config != "" { diff --git a/docs/user/configuration.md b/docs/user/configuration.md index 1c803e33f0..eaab98a0b0 100644 --- a/docs/user/configuration.md +++ b/docs/user/configuration.md @@ -95,6 +95,11 @@ host: rapl: zones: [] # RAPL zones to be enabled, empty enables all default zones +msr: # MSR fallback configuration for RAPL energy reading + enabled: false # Enable automatic MSR fallback when powercap unavailable + force: false # Force MSR usage even if powercap available (testing) + devicePath: "/dev/cpu/%d/msr" # MSR device path template + exporter: stdout: # stdout exporter related config enabled: false # disabled by default @@ -195,6 +200,41 @@ rapl: zones: ["package", "core", "uncore"] ``` +### 🔌 MSR Configuration + +```yaml +msr: + enabled: false # Enable automatic MSR fallback + force: false # Force MSR usage for testing + devicePath: "/dev/cpu/%d/msr" # MSR device path template +``` + +Model Specific Register (MSR) support provides a fallback mechanism for reading Intel RAPL energy counters when the Linux powercap sysfs interface is unavailable. + +- **enabled**: Enable automatic MSR fallback when powercap is unavailable + - Default: `false` (opt-in for security reasons) + - When enabled, Kepler will automatically fall back to MSR if powercap fails + - Requires appropriate permissions and hardware support + +- **force**: Force MSR usage even when powercap is available + - Default: `false` + - Primarily for testing and development purposes + - When `true`, MSR will be used regardless of powercap availability + +- **devicePath**: Template for MSR device file paths + - Default: `"/dev/cpu/%d/msr"` + - The `%d` placeholder is replaced with the CPU number + - Must be accessible with appropriate permissions + +⚠️ **Security Note**: MSR access requires elevated privileges and may be restricted on some systems due to security considerations (PLATYPUS attacks, CVE-2020-8694/8695). Use MSR configuration only when necessary and ensure proper system security measures are in place. + +**Prerequisites for MSR support:** + +- Intel CPU with RAPL support +- `msr` kernel module loaded (`modprobe msr`) +- Read access to `/dev/cpu/*/msr` files +- Elevated privileges (typically root) + ### 📦 Exporter Configuration ```yaml diff --git a/hack/config.yaml b/hack/config.yaml index 8beb0587a1..978a8d171c 100644 --- a/hack/config.yaml +++ b/hack/config.yaml @@ -35,6 +35,11 @@ host: rapl: zones: [] # zones to be enabled, empty enables all default zones +msr: # MSR fallback configuration for RAPL energy reading + enabled: false # enable automatic MSR fallback when powercap unavailable (default: false) + force: false # force MSR usage even if powercap available (testing only, default: false) + devicePath: /dev/cpu/%d/msr # MSR device path template (default: "/dev/cpu/%d/msr") + exporter: stdout: # stdout exporter related config enabled: false # disabled by default diff --git a/internal/device/mock_cpu_power_meter.go b/internal/device/mock_cpu_power_meter.go index 7c3e8717ce..4111f8e70e 100644 --- a/internal/device/mock_cpu_power_meter.go +++ b/internal/device/mock_cpu_power_meter.go @@ -5,14 +5,6 @@ package device // TODO: Move this mock to a separate testutil package -import ( - "slices" - "testing" - - "github.com/prometheus/procfs/sysfs" - "github.com/stretchr/testify/require" -) - const ( validSysFSPath = "testdata/sys" badSysFSPath = "testdata/bad_sysfs" @@ -67,27 +59,3 @@ func (m *MockRaplZone) OnEnergy(j Energy, err error) { func (m *MockRaplZone) Inc(delta Energy) { m.energy = (m.energy + delta) % m.maxMicroJoules } - -func validSysFSFixtures(t *testing.T) sysfs.FS { - t.Helper() - fs, err := sysfs.NewFS(validSysFSPath) - require.NoError(t, err, "Failed to create sysfs test FS") - return fs -} - -func invalidSysFSFixtures(t *testing.T) sysfs.FS { - t.Helper() - fs, err := sysfs.NewFS(badSysFSPath) - require.NoError(t, err, "Failed to create sysfs test FS") - return fs -} - -func sortedZoneNames(zones []EnergyZone) []string { - names := make([]string, len(zones)) - for i, zone := range zones { - names[i] = zone.Name() - } - slices.Sort(names) - - return names -} diff --git a/internal/device/msr_reader.go b/internal/device/msr_reader.go new file mode 100644 index 0000000000..d284ab5fa4 --- /dev/null +++ b/internal/device/msr_reader.go @@ -0,0 +1,290 @@ +// SPDX-FileCopyrightText: 2025 The Kepler Authors +// SPDX-License-Identifier: Apache-2.0 + +package device + +import ( + "fmt" + "log/slog" + "os" + "path/filepath" + "sort" + "strconv" + "sync" +) + +// msrReader implements raplReader using Intel MSR (Model Specific Register) interface +type msrReader struct { + msrFiles map[int]*os.File // CPU ID -> MSR file handle + zones []EnergyZone // Available energy zones + energyUnit float64 // Energy unit in microjoules per LSB + devicePath string // MSR device path template + logger *slog.Logger + mu sync.RWMutex // Thread safety for zone operations +} + +// MSR zone configuration mapping zone names to MSR offsets +var msrZoneConfig = map[string]uint32{ + ZonePackage: MSRPkgEnergyStatus, + ZonePP0: MSRPP0EnergyStatus, // Maps to "core" zone + ZoneDRAM: MSRDRAMEnergyStatus, +} + +// zoneNameMapping maps MSR zone names to standard RAPL zone names +var zoneNameMapping = map[string]string{ + ZonePP0: ZoneCore, // PP0 (Power Plane 0) is the core domain +} + +// NewMSRReader creates a new MSR reader using the specified device path template +func NewMSRReader(devicePath string, logger *slog.Logger) *msrReader { + if logger == nil { + logger = slog.Default() + } + + return &msrReader{ + msrFiles: make(map[int]*os.File), + devicePath: devicePath, + logger: logger.With("service", "msr-reader"), + } +} + +// Name returns the name of this power reader implementation +func (m *msrReader) Name() string { + return "msr" +} + +// Available checks if MSR interface is available on this system +func (m *msrReader) Available() bool { + // Derive CPU directory from devicePath (e.g., "/dev/cpu/%d/msr" -> "/dev/cpu") + cpuDir := filepath.Dir(filepath.Dir(m.devicePath)) + + // Check if CPU directory exists + if _, err := os.Stat(cpuDir); os.IsNotExist(err) { + m.logger.Debug("MSR not available: CPU directory does not exist", "dir", cpuDir) + return false + } + + // Check if we can find at least one CPU with MSR access + // This validates that MSR interface is not just present but usable + cpuIDs, err := m.findAvailableCPUs() + if err != nil { + m.logger.Debug("MSR not available: failed to scan for CPUs", "error", err) + return false + } + + if len(cpuIDs) == 0 { + m.logger.Debug("MSR not available: no CPUs with MSR access found") + return false + } + + return true +} + +// Init initializes the MSR reader and opens MSR files for all available CPUs +func (m *msrReader) Init() error { + if !m.Available() { + return fmt.Errorf("MSR interface not available") + } + + // Find available CPUs + cpuIDs, err := m.findAvailableCPUs() + if err != nil { + return fmt.Errorf("failed to find available CPUs: %w", err) + } + + if len(cpuIDs) == 0 { + return fmt.Errorf("no CPUs with MSR access found") + } + + // Open MSR files for all CPUs + for _, cpuID := range cpuIDs { + msrPath := fmt.Sprintf(m.devicePath, cpuID) + file, err := os.OpenFile(msrPath, os.O_RDONLY, 0) + if err != nil { + // Clean up any previously opened files + if closeErr := m.Close(); closeErr != nil { + m.logger.Warn("Failed to close MSR files", "error", closeErr) + } + return fmt.Errorf("failed to open MSR file %s: %w", msrPath, err) + } + m.msrFiles[cpuID] = file + } + + // Read energy unit from the first CPU + firstCPU := cpuIDs[0] + energyUnit, err := readEnergyUnit(m.msrFiles[firstCPU]) + if err != nil { + if closeErr := m.Close(); closeErr != nil { + m.logger.Warn("Failed to close MSR files", "error", closeErr) + } + return fmt.Errorf("failed to read energy unit from CPU %d: %w", firstCPU, err) + } + m.energyUnit = energyUnit + + // Create zones for all available MSR energy counters + if err := m.createZones(); err != nil { + if closeErr := m.Close(); closeErr != nil { + m.logger.Warn("Failed to close MSR files", "error", closeErr) + } + return fmt.Errorf("failed to create MSR zones: %w", err) + } + + m.logger.Info("MSR reader initialized", + "cpus", len(m.msrFiles), + "zones", len(m.zones), + "energy_unit_uj", m.energyUnit) + + return nil +} + +// Zones returns the list of MSR-based energy zones +func (m *msrReader) Zones() ([]EnergyZone, error) { + m.mu.RLock() + defer m.mu.RUnlock() + + if len(m.zones) == 0 { + return nil, fmt.Errorf("MSR reader not initialized or no zones available") + } + + // Return a copy to prevent external modification + zones := make([]EnergyZone, len(m.zones)) + copy(zones, m.zones) + return zones, nil +} + +// Close closes all MSR files and releases resources +func (m *msrReader) Close() error { + var lastErr error + + for cpuID, file := range m.msrFiles { + if err := file.Close(); err != nil { + lastErr = err + m.logger.Warn("Failed to close MSR file", "cpu", cpuID, "error", err) + } + } + + // Clear the map + m.msrFiles = make(map[int]*os.File) + m.zones = nil + + return lastErr +} + +// findAvailableCPUs finds all CPUs that have MSR device files +func (m *msrReader) findAvailableCPUs() ([]int, error) { + // Derive CPU directory from devicePath (e.g., "/dev/cpu/%d/msr" -> "/dev/cpu") + cpuDir := filepath.Dir(filepath.Dir(m.devicePath)) + entries, err := os.ReadDir(cpuDir) + if err != nil { + return nil, fmt.Errorf("failed to read CPU directory %s: %w", cpuDir, err) + } + + var cpuIDs []int + for _, entry := range entries { + if !entry.IsDir() { + continue + } + + // Parse CPU ID from directory name + cpuID, err := strconv.Atoi(entry.Name()) + if err != nil { + continue // Skip non-numeric directories + } + + // Check if MSR file exists for this CPU + msrPath := fmt.Sprintf(m.devicePath, cpuID) + if _, err := os.Stat(msrPath); err == nil { + cpuIDs = append(cpuIDs, cpuID) + } + } + + // Sort CPU IDs for consistent ordering + sort.Ints(cpuIDs) + + return cpuIDs, nil +} + +// createZones creates MSR-based energy zones for all available MSR counters +func (m *msrReader) createZones() error { + m.mu.Lock() + defer m.mu.Unlock() + + m.zones = nil + + // Get sorted CPU IDs for consistent zone ordering + var cpuIDs []int + for cpuID := range m.msrFiles { + cpuIDs = append(cpuIDs, cpuID) + } + sort.Ints(cpuIDs) + + // Group zones by name for potential aggregation + zoneGroups := make(map[string][]*msrZone) + + // Create zones for each MSR counter on each CPU + for _, cpuID := range cpuIDs { + msrFile := m.msrFiles[cpuID] + + for zoneName, msrOffset := range msrZoneConfig { + // Test if this MSR register is readable on this CPU + if !m.isRegisterReadable(msrFile, msrOffset) { + m.logger.Debug("MSR register not readable, skipping zone", + "cpu", cpuID, "zone", zoneName, "msr", fmt.Sprintf("0x%x", msrOffset)) + continue + } + + // Map internal zone names to standard RAPL names if needed + displayName := zoneName + if mappedName, exists := zoneNameMapping[zoneName]; exists { + displayName = mappedName + } + + // Create MSR zone + zone := NewMSRZone(displayName, cpuID, cpuID, msrOffset, m.energyUnit, msrFile) + zoneGroups[displayName] = append(zoneGroups[displayName], zone) + + m.logger.Debug("Created MSR zone", + "name", displayName, "cpu", cpuID, "msr", fmt.Sprintf("0x%x", msrOffset)) + } + } + + // Convert zone groups to EnergyZone interfaces + // For multi-socket systems, aggregate zones with the same name + for name, zones := range zoneGroups { + if len(zones) == 1 { + // Single zone - use directly + m.zones = append(m.zones, zones[0]) + } else { + // Multiple zones - create aggregated zone + var energyZones []EnergyZone + for _, zone := range zones { + energyZones = append(energyZones, zone) + } + aggregated := NewAggregatedZone(energyZones) + m.zones = append(m.zones, aggregated) + + m.logger.Debug("Created aggregated MSR zone", + "name", name, "zone_count", len(zones)) + } + } + + if len(m.zones) == 0 { + return fmt.Errorf("no readable MSR energy counters found") + } + + return nil +} + +// isRegisterReadable tests if an MSR register can be read without error +func (m *msrReader) isRegisterReadable(msrFile *os.File, msrOffset uint32) bool { + // Try to seek to the register + _, err := msrFile.Seek(int64(msrOffset), 0) + if err != nil { + return false + } + + // Try to read 8 bytes from the register + buf := make([]byte, 8) + _, err = msrFile.Read(buf) + return err == nil +} diff --git a/internal/device/msr_reader_test.go b/internal/device/msr_reader_test.go new file mode 100644 index 0000000000..81b91d6b68 --- /dev/null +++ b/internal/device/msr_reader_test.go @@ -0,0 +1,406 @@ +// SPDX-FileCopyrightText: 2025 The Kepler Authors +// SPDX-License-Identifier: Apache-2.0 + +package device + +/* +MSR Test Data Documentation + +This test file uses mock MSR data to simulate Intel RAPL MSR registers for testing +the MSR reader implementation. The test data simulates the following registers: + +MSR Register Values: +- 0x606: IA32_RAPL_POWER_UNIT - Power unit register containing scaling factors +- 0x611: IA32_PKG_ENERGY_STATUS - Package energy counter (32-bit, wraps around) +- 0x639: IA32_PP0_ENERGY_STATUS - Power Plane 0 (cores) energy counter +- 0x619: IA32_DRAM_ENERGY_STATUS - DRAM energy counter + +File Format: +Each MSR register value is stored as 8 bytes (uint64) in little-endian format. +The test creates temporary MSR files and writes mock data at specific byte offsets +corresponding to the MSR register addresses. + +Energy Unit Calculation: +The power unit register (0x606) contains scaling factors in specific bit fields: +- Bits 12:8 contain the energy unit value (e.g., value 16 means 1/(2^16) joules per LSB) +- Energy counters use this unit to convert raw MSR values to microjoules +- Example: energy_unit = 15.2587890625 microjoules (when unit value = 16) + +Counter Overflow: +MSR energy counters are 32-bit values that wrap around at 2^32. The implementation +must handle this overflow correctly to maintain accurate energy measurements. +*/ + +import ( + "fmt" + "log/slog" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// fakeMSRReader implements raplReader for testing +type fakeMSRReader struct { + zones []EnergyZone + available bool + initError error + name string +} + +func (f *fakeMSRReader) Zones() ([]EnergyZone, error) { + return f.zones, nil +} + +func (f *fakeMSRReader) Available() bool { + return f.available +} + +func (f *fakeMSRReader) Init() error { + return f.initError +} + +func (f *fakeMSRReader) Close() error { + return nil +} + +func (f *fakeMSRReader) Name() string { + if f.name == "" { + return "fake-msr" + } + return f.name +} + +// fakeMSRZone implements EnergyZone for testing +type fakeMSRZone struct { + name string + index int + path string + energy Energy + maxEnergy Energy + energyErr error +} + +func (f *fakeMSRZone) Name() string { + return f.name +} + +func (f *fakeMSRZone) Index() int { + return f.index +} + +func (f *fakeMSRZone) Path() string { + return f.path +} + +func (f *fakeMSRZone) Energy() (Energy, error) { + return f.energy, f.energyErr +} + +func (f *fakeMSRZone) MaxEnergy() Energy { + return f.maxEnergy +} + +func TestMSRReader_Available(t *testing.T) { + tests := []struct { + name string + setupDevDir bool + createMSRFile bool + expectedResult bool + }{ + { + name: "MSR available with dev directory and msr file", + setupDevDir: true, + createMSRFile: true, + expectedResult: true, + }, + { + name: "MSR unavailable without dev directory", + setupDevDir: false, + createMSRFile: false, + expectedResult: false, + }, + { + name: "MSR unavailable without msr file", + setupDevDir: true, + createMSRFile: false, + expectedResult: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create temporary directory structure + tempDir := t.TempDir() + var devicePath string + + if tt.setupDevDir { + // Create /dev/cpu/0 directory + cpuDir := filepath.Join(tempDir, "dev", "cpu", "0") + require.NoError(t, os.MkdirAll(cpuDir, 0755)) + + devicePath = filepath.Join(tempDir, "dev", "cpu", "%d", "msr") + + if tt.createMSRFile { + msrFile := filepath.Join(cpuDir, "msr") + file, err := os.Create(msrFile) + require.NoError(t, err) + _ = file.Close() + } + } else { + devicePath = filepath.Join(tempDir, "nonexistent", "cpu", "%d", "msr") + } + + reader := NewMSRReader(devicePath, slog.Default()) + result := reader.Available() + + assert.Equal(t, tt.expectedResult, result) + }) + } +} + +func TestMSRReader_Init(t *testing.T) { + tests := []struct { + name string + setupMSRs func(tempDir string) string + expectError bool + errorMsg string + }{ + { + name: "successful initialization", + setupMSRs: func(tempDir string) string { + // Create CPU 0 with MSR file containing mock data + cpuDir := filepath.Join(tempDir, "dev", "cpu", "0") + require.NoError(t, os.MkdirAll(cpuDir, 0755)) + + msrFile := filepath.Join(cpuDir, "msr") + createMockMSRFile(t, msrFile) + + return filepath.Join(tempDir, "dev", "cpu", "%d", "msr") + }, + expectError: false, + }, + { + name: "initialization fails with no CPUs", + setupMSRs: func(tempDir string) string { + // Create empty dev directory + require.NoError(t, os.MkdirAll(filepath.Join(tempDir, "dev", "cpu"), 0755)) + return filepath.Join(tempDir, "dev", "cpu", "%d", "msr") + }, + expectError: true, + errorMsg: "MSR interface not available", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tempDir := t.TempDir() + devicePath := tt.setupMSRs(tempDir) + + reader := NewMSRReader(devicePath, slog.Default()) + err := reader.Init() + + if tt.expectError { + assert.Error(t, err) + if tt.errorMsg != "" { + assert.Contains(t, err.Error(), tt.errorMsg) + } + } else { + assert.NoError(t, err) + } + + // Clean up + _ = reader.Close() + }) + } +} + +func TestMSRReader_Zones(t *testing.T) { + tempDir := t.TempDir() + + // Create CPU 0 and CPU 1 with MSR files + for i := 0; i < 2; i++ { + cpuDir := filepath.Join(tempDir, "dev", "cpu", fmt.Sprintf("%d", i)) + require.NoError(t, os.MkdirAll(cpuDir, 0755)) + + msrFile := filepath.Join(cpuDir, "msr") + createMockMSRFile(t, msrFile) + } + + devicePath := filepath.Join(tempDir, "dev", "cpu", "%d", "msr") + reader := NewMSRReader(devicePath, slog.Default()) + + require.NoError(t, reader.Init()) + t.Cleanup(func() { + assert.NoError(t, reader.Close()) + }) + + zones, err := reader.Zones() + require.NoError(t, err) + + // Should have zones for package, core (pp0), and dram + // On a 2-CPU system, we should get aggregated zones + assert.Greater(t, len(zones), 0) + + // Verify zone names + zoneNames := make(map[string]bool) + for _, zone := range zones { + zoneNames[zone.Name()] = true + + // Test that each zone can provide energy readings + energy, err := zone.Energy() + assert.NoError(t, err) + assert.GreaterOrEqual(t, energy, Energy(0)) + } + + // Should have at least package zone + assert.True(t, zoneNames["package"] || zoneNames["core"] || zoneNames["dram"], + "Expected at least one MSR zone type") +} + +func TestMSRReader_Name(t *testing.T) { + reader := NewMSRReader("/dev/cpu/%d/msr", slog.Default()) + assert.Equal(t, "msr", reader.Name()) +} + +func TestMSRReader_Close(t *testing.T) { + tempDir := t.TempDir() + + // Create CPU 0 with MSR file + cpuDir := filepath.Join(tempDir, "dev", "cpu", "0") + require.NoError(t, os.MkdirAll(cpuDir, 0755)) + + msrFile := filepath.Join(cpuDir, "msr") + createMockMSRFile(t, msrFile) + + devicePath := filepath.Join(tempDir, "dev", "cpu", "%d", "msr") + reader := NewMSRReader(devicePath, slog.Default()) + + require.NoError(t, reader.Init()) + + // Verify it has zones before closing + zones, err := reader.Zones() + require.NoError(t, err) + assert.Greater(t, len(zones), 0) + + // Close should not error + err = reader.Close() + assert.NoError(t, err) + + // After closing, zones should be cleared + _, err = reader.Zones() + assert.Error(t, err) + assert.Contains(t, err.Error(), "MSR reader not initialized") +} + +func TestMSRZone_Energy(t *testing.T) { + tests := []struct { + name string + msrData []byte + energyUnit float64 + expectedRange [2]Energy // min, max range + }{ + { + name: "normal energy reading", + msrData: []byte{ + 0x00, 0x00, 0x10, 0x00, // 0x100000 in lower 32 bits + 0x00, 0x00, 0x00, 0x00, // upper 32 bits + }, + energyUnit: 15.2587890625, // 1000000 / 2^16 + expectedRange: [2]Energy{Energy(15999998), Energy(16000000)}, // Approximately 16.0 J + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create temporary MSR file with specific data + tempDir := t.TempDir() + msrFile := filepath.Join(tempDir, "msr") + + file, err := os.Create(msrFile) + require.NoError(t, err) + t.Cleanup(func() { + assert.NoError(t, file.Close()) + }) + + // Write mock MSR data at different offsets + _, err = file.WriteAt(tt.msrData, int64(MSRPkgEnergyStatus)) + require.NoError(t, err) + + // Create MSR zone + zone := NewMSRZone("package", 0, 0, MSRPkgEnergyStatus, tt.energyUnit, file) + + energy, err := zone.Energy() + require.NoError(t, err) + + // Check energy is within expected range + assert.GreaterOrEqual(t, energy, tt.expectedRange[0]) + assert.LessOrEqual(t, energy, tt.expectedRange[1]) + }) + } +} + +func TestMSRZone_MaxEnergy(t *testing.T) { + energyUnit := 15.2587890625 // 1000000 / 2^16 + + zone := NewMSRZone("package", 0, 0, MSRPkgEnergyStatus, energyUnit, nil) + maxEnergy := zone.MaxEnergy() + + // For 32-bit counter, max should be 2^32 * energyUnit + expectedMax := Energy(float64(0xFFFFFFFF) * energyUnit) + assert.Equal(t, expectedMax, maxEnergy) +} + +// Helper functions + +// createMockMSRFile creates a mock MSR device file with test data +// The file simulates reading from /dev/cpu/N/msr with realistic RAPL register values +func createMockMSRFile(t *testing.T, path string) { + file, err := os.Create(path) + require.NoError(t, err) + defer func() { + assert.NoError(t, file.Close()) + }() + + // Write power unit register at offset 0x606 (IA32_RAPL_POWER_UNIT) + // This register contains scaling factors for energy measurements + // Bits 12:8 = energy unit: 16 means 1/(2^16) = 15.2587890625 microjoules per LSB + powerUnitData := []byte{ + 0x00, 0x10, 0x00, 0x00, // Energy unit = 16 in bits 12:8 + 0x00, 0x00, 0x00, 0x00, // Upper 32 bits (unused) + } + _, err = file.WriteAt(powerUnitData, int64(MSRPowerUnit)) + require.NoError(t, err) + + // Write package energy counter at offset 0x611 (IA32_PKG_ENERGY_STATUS) + // This is a 32-bit counter that accumulates package energy consumption + // Raw value: 0x100000 = 1048576 LSB → ~16.0 Joules with energy unit 15.26 μJ/LSB + pkgEnergyData := []byte{ + 0x00, 0x00, 0x10, 0x00, // 32-bit energy counter value + 0x00, 0x00, 0x00, 0x00, // Upper 32 bits (reserved/unused) + } + _, err = file.WriteAt(pkgEnergyData, int64(MSRPkgEnergyStatus)) + require.NoError(t, err) + + // Write PP0 energy counter at offset 0x639 (IA32_PP0_ENERGY_STATUS) + // PP0 represents Power Plane 0 (CPU cores) energy consumption + // Raw value: 0x80000 = 524288 LSB → ~8.0 Joules + pp0EnergyData := []byte{ + 0x00, 0x00, 0x08, 0x00, // 32-bit energy counter value + 0x00, 0x00, 0x00, 0x00, // Upper 32 bits (reserved/unused) + } + _, err = file.WriteAt(pp0EnergyData, int64(MSRPP0EnergyStatus)) + require.NoError(t, err) + + // Write DRAM energy counter at offset 0x619 (IA32_DRAM_ENERGY_STATUS) + // This counter tracks memory subsystem energy consumption + // Raw value: 0x40000 = 262144 LSB → ~4.0 Joules + dramEnergyData := []byte{ + 0x00, 0x00, 0x04, 0x00, // 32-bit energy counter value + 0x00, 0x00, 0x00, 0x00, // Upper 32 bits (reserved/unused) + } + _, err = file.WriteAt(dramEnergyData, int64(MSRDRAMEnergyStatus)) + require.NoError(t, err) +} diff --git a/internal/device/msr_zone.go b/internal/device/msr_zone.go new file mode 100644 index 0000000000..34ec7dcc92 --- /dev/null +++ b/internal/device/msr_zone.go @@ -0,0 +1,125 @@ +// SPDX-FileCopyrightText: 2025 The Kepler Authors +// SPDX-License-Identifier: Apache-2.0 + +package device + +import ( + "encoding/binary" + "fmt" + "math" + "os" +) + +// MSR Register offsets for Intel RAPL energy counters +const ( + // IA32_RAPL_POWER_UNIT - Power unit register containing scaling factors + MSRPowerUnit = 0x606 + + // Energy counters (32-bit, wraparound at ~4 billion) + MSRPkgEnergyStatus = 0x611 // Package energy counter + MSRPP0EnergyStatus = 0x639 // Power Plane 0 (cores) energy counter + MSRDRAMEnergyStatus = 0x619 // DRAM energy counter +) + +// msrZone implements EnergyZone interface for MSR-based energy reading +type msrZone struct { + name string + index int + cpuID int + msrOffset uint32 + energyUnit float64 // Energy unit in microjoules per LSB + msrFile *os.File +} + +// NewMSRZone creates a new MSR-based energy zone +func NewMSRZone(name string, index, cpuID int, msrOffset uint32, energyUnit float64, msrFile *os.File) *msrZone { + return &msrZone{ + name: name, + index: index, + cpuID: cpuID, + msrOffset: msrOffset, + energyUnit: energyUnit, + msrFile: msrFile, + } +} + +// Name returns the zone name (package, pp0, dram) +func (m *msrZone) Name() string { + return m.name +} + +// Index returns the zone index (CPU socket/package index) +func (m *msrZone) Index() int { + return m.index +} + +// Path returns the MSR device path for this zone +func (m *msrZone) Path() string { + return fmt.Sprintf("/dev/cpu/%d/msr:0x%x", m.cpuID, m.msrOffset) +} + +// Energy reads the current energy value from the MSR register +func (m *msrZone) Energy() (Energy, error) { + if m.msrFile == nil { + return 0, fmt.Errorf("MSR file not opened for CPU %d", m.cpuID) + } + + // Read 64-bit MSR register at the specified offset + _, err := m.msrFile.Seek(int64(m.msrOffset), 0) + if err != nil { + return 0, fmt.Errorf("failed to seek to MSR offset 0x%x: %w", m.msrOffset, err) + } + + var msrValue uint64 + err = binary.Read(m.msrFile, binary.LittleEndian, &msrValue) + if err != nil { + return 0, fmt.Errorf("failed to read MSR 0x%x from CPU %d: %w", m.msrOffset, m.cpuID, err) + } + + // Extract the 32-bit energy counter from the MSR value + // Energy counters are in the lower 32 bits + energyCounter := uint32(msrValue & 0xFFFFFFFF) + + // Convert to microjoules using the energy unit + energyMicroJoules := float64(energyCounter) * m.energyUnit + + return Energy(energyMicroJoules), nil +} + +// MaxEnergy returns the maximum energy value before wraparound +// MSR energy counters are 32-bit, so they wrap at 2^32 +func (m *msrZone) MaxEnergy() Energy { + // 32-bit counter maximum value converted to microjoules + maxCounter := uint64(math.MaxUint32) + maxEnergyMicroJoules := float64(maxCounter) * m.energyUnit + return Energy(maxEnergyMicroJoules) +} + +// readEnergyUnit reads the energy unit from the IA32_RAPL_POWER_UNIT MSR +// Returns the energy unit in microjoules per LSB +func readEnergyUnit(msrFile *os.File) (float64, error) { + if msrFile == nil { + return 0, fmt.Errorf("MSR file not opened") + } + + // Seek to the power unit MSR + _, err := msrFile.Seek(int64(MSRPowerUnit), 0) + if err != nil { + return 0, fmt.Errorf("failed to seek to MSR power unit register: %w", err) + } + + var powerUnit uint64 + err = binary.Read(msrFile, binary.LittleEndian, &powerUnit) + if err != nil { + return 0, fmt.Errorf("failed to read MSR power unit register: %w", err) + } + + // Energy unit is in bits 12:8 of the power unit register + energyUnitBits := (powerUnit >> 8) & 0x1F + + // Energy unit = 1 / (2^energyUnitBits) joules + // Convert to microjoules: multiply by 1,000,000 + energyUnit := 1000000.0 / float64(uint64(1)<= 0) + assert.GreaterOrEqual(t, zone.Index(), 0) + // Zone should have a path + assert.NotEmpty(t, zone.Path()) + + // Test energy reading + energy, err := zone.Energy() + assert.NoError(t, err) + assert.Greater(t, uint64(energy), uint64(0)) // Should have some energy value +} + +func TestPowercapReader_Name(t *testing.T) { + reader, err := NewPowercapReader("/tmp") + require.NoError(t, err) + assert.Equal(t, "powercap", reader.Name()) +} + +func TestPowercapReader_Close(t *testing.T) { + reader, err := NewPowercapReader("/tmp") + require.NoError(t, err) + + err = reader.Close() + assert.NoError(t, err) +} + +func TestSysfsRaplZone_Implementation(t *testing.T) { + reader, err := NewPowercapReader(validSysFSPath) + require.NoError(t, err) + + zones, err := reader.Zones() + require.NoError(t, err) + require.Greater(t, len(zones), 0) + + // Test the first zone's EnergyZone interface methods + zone := zones[0] + + // Test all EnergyZone interface methods + assert.NotEmpty(t, zone.Name()) // Should have a name + assert.GreaterOrEqual(t, zone.Index(), 0) // Should have a valid index + assert.NotEmpty(t, zone.Path()) // Should have a path + + energy, err := zone.Energy() + assert.NoError(t, err) + assert.Greater(t, uint64(energy), uint64(0)) // Should have some energy value + + maxEnergy := zone.MaxEnergy() + assert.Greater(t, uint64(maxEnergy), uint64(0)) // Should have some max energy value +} diff --git a/internal/device/rapl_sysfs_power_meter.go b/internal/device/rapl_power_meter.go similarity index 52% rename from internal/device/rapl_sysfs_power_meter.go rename to internal/device/rapl_power_meter.go index c5b64578a0..92f21f04a0 100644 --- a/internal/device/rapl_sysfs_power_meter.go +++ b/internal/device/rapl_power_meter.go @@ -8,27 +8,41 @@ import ( "log/slog" "strings" - "github.com/prometheus/procfs/sysfs" + "k8s.io/utils/ptr" ) -// raplPowerMeter implements CPUPowerMeter using sysfs +// raplPowerMeter implements CPUPowerMeter with automatic MSR fallback support type raplPowerMeter struct { - reader sysfsReader + reader raplReader // Current active reader (powercap or MSR) cachedZones []EnergyZone logger *slog.Logger zoneFilter []string topZone EnergyZone + + // Configuration for MSR fallback + msrConfig MSRConfig + sysfsPath string + useMSR bool // Track which backend is active +} + +// MSRConfig holds MSR-specific configuration +type MSRConfig struct { + Enabled *bool + Force *bool + DevicePath string } type OptionFn func(*raplPowerMeter) -// sysfsReader is an interface for a sysfs filesystem used by raplPowerMeter to mock for testing -type sysfsReader interface { - Zones() ([]EnergyZone, error) +// WithMSRConfig sets the MSR configuration for fallback behavior +func WithMSRConfig(msrConfig MSRConfig) OptionFn { + return func(pm *raplPowerMeter) { + pm.msrConfig = msrConfig + } } -// WithSysFSReader sets the sysfsReader used by raplPowerMeter -func WithSysFSReader(r sysfsReader) OptionFn { +// WithRaplReader sets a specific raplReader (for testing) +func WithRaplReader(r raplReader) OptionFn { return func(pm *raplPowerMeter) { pm.reader = r } @@ -49,17 +63,18 @@ func WithZoneFilter(zones []string) OptionFn { } } -// NewCPUPowerMeter creates a new CPU power meter +// NewCPUPowerMeter creates a new CPU power meter with MSR fallback support func NewCPUPowerMeter(sysfsPath string, opts ...OptionFn) (*raplPowerMeter, error) { - fs, err := sysfs.NewFS(sysfsPath) - if err != nil { - return nil, err - } - ret := &raplPowerMeter{ - reader: sysfsRaplReader{fs: fs}, logger: slog.Default().With("service", "rapl"), zoneFilter: []string{}, + sysfsPath: sysfsPath, + // Default MSR configuration (disabled) + msrConfig: MSRConfig{ + Enabled: ptr.To(false), + Force: ptr.To(false), + DevicePath: "/dev/cpu/%d/msr", + }, } for _, opt := range opts { @@ -70,21 +85,112 @@ func NewCPUPowerMeter(sysfsPath string, opts ...OptionFn) (*raplPowerMeter, erro } func (r *raplPowerMeter) Name() string { - return "rapl" + if r.useMSR { + return "rapl-msr" + } + return "rapl-powercap" } func (r *raplPowerMeter) Init() error { - // ensure zones can be read but don't cache them - zones, err := r.reader.Zones() + // Clear any cached state + r.cachedZones = nil + r.topZone = nil + + // If a specific reader is set (for testing), use it directly + if r.reader != nil { + r.logger.Info("Using provided power reader", "reader", r.reader.Name()) + return r.validateReader(r.reader) + } + + // Determine which reader to use based on configuration and availability + reader, useMSR, err := r.selectRaplReader() if err != nil { - return err - } else if len(zones) == 0 { - return fmt.Errorf("no RAPL zones found") + return fmt.Errorf("failed to select power reader: %w", err) } - // try reading the first zone and return the error + r.reader = reader + r.useMSR = useMSR + + r.logger.Info("Selected power reader", + "reader", r.reader.Name(), + "msr_fallback", r.useMSR, + "force_msr", ptr.Deref(r.msrConfig.Force, false)) + + return r.validateReader(r.reader) +} + +// selectRaplReader chooses the appropriate RAPL reader based on configuration and availability +func (r *raplPowerMeter) selectRaplReader() (raplReader, bool, error) { + forceMSR := ptr.Deref(r.msrConfig.Force, false) + enableFallback := ptr.Deref(r.msrConfig.Enabled, false) + + // If force MSR is enabled, use MSR directly (for testing) + if forceMSR { + r.logger.Info("MSR forced via configuration") + msrReader := NewMSRReader(r.msrConfig.DevicePath, r.logger) + if !msrReader.Available() { + return nil, false, fmt.Errorf("MSR reader forced but not available") + } + if err := msrReader.Init(); err != nil { + return nil, false, fmt.Errorf("failed to initialize forced MSR reader: %w", err) + } + return msrReader, true, nil + } + + // Try powercap first (default behavior) + powercapReader, err := NewPowercapReader(r.sysfsPath) + if err == nil && powercapReader.Available() { + if err := powercapReader.Init(); err == nil { + r.logger.Debug("Using powercap reader") + return powercapReader, false, nil + } else { + r.logger.Debug("Powercap reader initialization failed", "error", err) + } + } else { + r.logger.Debug("Powercap reader not available", "error", err) + } + + // If powercap failed and MSR fallback is enabled, try MSR + if enableFallback { + r.logger.Info("Attempting MSR fallback as powercap unavailable") + + // Log security warning for MSR usage + r.logger.Warn("MSR fallback enabled - be aware of PLATYPUS attack vectors (CVE-2020-8694/8695)") + + msrReader := NewMSRReader(r.msrConfig.DevicePath, r.logger) + if !msrReader.Available() { + return nil, false, fmt.Errorf("neither powercap nor MSR readers are available") + } + if err := msrReader.Init(); err != nil { + return nil, false, fmt.Errorf("MSR fallback failed to initialize: %w", err) + } + + r.logger.Info("MSR fallback activated successfully") + return msrReader, true, nil + } + + // Neither powercap works nor MSR fallback is enabled + return nil, false, fmt.Errorf("powercap unavailable and MSR fallback disabled") +} + +// validateReader ensures the reader can provide valid energy readings +func (r *raplPowerMeter) validateReader(reader raplReader) error { + zones, err := reader.Zones() + if err != nil { + return fmt.Errorf("failed to get zones from %s reader: %w", reader.Name(), err) + } + + if len(zones) == 0 { + return fmt.Errorf("no energy zones found from %s reader", reader.Name()) + } + + // Try reading energy from the first zone to verify functionality _, err = zones[0].Energy() - return err + if err != nil { + return fmt.Errorf("failed to read energy from zone %s: %w", zones[0].Name(), err) + } + + return nil } func (r *raplPowerMeter) needsFiltering() bool { @@ -122,6 +228,10 @@ func (r *raplPowerMeter) Zones() ([]EnergyZone, error) { return r.cachedZones, nil } + if r.reader == nil { + return nil, fmt.Errorf("power reader not initialized") + } + zones, err := r.reader.Zones() if err != nil { return nil, err @@ -135,7 +245,6 @@ func (r *raplPowerMeter) Zones() ([]EnergyZone, error) { } // filter out non-standard zones - stdZoneMap := map[zoneKey]EnergyZone{} for _, zone := range zones { key := zoneKey{name: zone.Name(), index: zone.Index()} @@ -230,58 +339,23 @@ func (r *raplPowerMeter) PrimaryEnergyZone() (EnergyZone, error) { return zones[0], nil } -// isStandardRaplPath checks if a RAPL zone path is in the standard format -func isStandardRaplPath(path string) bool { - return strings.Contains(path, "/intel-rapl:") -} - -type sysfsRaplReader struct { - fs sysfs.FS +// Close releases resources held by the power reader +func (r *raplPowerMeter) Close() error { + if r.reader != nil { + return r.reader.Close() + } + return nil } -func (r sysfsRaplReader) Zones() ([]EnergyZone, error) { - raplZones, err := sysfs.GetRaplZones(r.fs) - if err != nil { - return nil, fmt.Errorf("failed to read rapl zones: %w", err) +// isStandardRaplPath checks if a RAPL zone path is in the standard format +func isStandardRaplPath(path string) bool { + // For powercap, check standard path format + if strings.Contains(path, "/intel-rapl:") { + return true } - - // convert sysfs.RaplZones to EnergyZones - energyZones := make([]EnergyZone, 0, len(raplZones)) - for _, zone := range raplZones { - energyZones = append(energyZones, sysfsRaplZone{zone}) + // For MSR, check MSR path format + if strings.Contains(path, "/dev/cpu/") && strings.Contains(path, "/msr:") { + return true } - - return energyZones, nil -} - -// sysfsRaplZone implements EnergyZone using sysfs.RaplZone. -// It is an adapter for the EnergyZone interface -type sysfsRaplZone struct { - zone sysfs.RaplZone -} - -// Name returns the name of the zone -func (s sysfsRaplZone) Name() string { - return s.zone.Name -} - -// Index returns the index of the zone -func (s sysfsRaplZone) Index() int { - return s.zone.Index -} - -// Path returns the path of the zone -func (s sysfsRaplZone) Path() string { - return s.zone.Path -} - -// Energy returns the current energy value -func (s sysfsRaplZone) Energy() (Energy, error) { - mj, err := s.zone.GetEnergyMicrojoules() - return Energy(mj), err -} - -// MaxEnergy returns the maximum energy value before wraparound -func (s sysfsRaplZone) MaxEnergy() Energy { - return Energy(s.zone.MaxMicrojoules) + return false } diff --git a/internal/device/rapl_power_meter_test.go b/internal/device/rapl_power_meter_test.go new file mode 100644 index 0000000000..0062846f68 --- /dev/null +++ b/internal/device/rapl_power_meter_test.go @@ -0,0 +1,330 @@ +// SPDX-FileCopyrightText: 2025 The Kepler Authors +// SPDX-License-Identifier: Apache-2.0 + +package device + +import ( + "fmt" + "log/slog" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "k8s.io/utils/ptr" +) + +func TestRaplPowerMeter_Init_WithMockReader(t *testing.T) { + tests := []struct { + name string + mockReader raplReader + expectedName string + expectError bool + }{ + { + name: "successful initialization with mock powercap reader", + mockReader: &fakePowercapReader{ + available: true, + zones: createTestZones("powercap"), + name: "powercap", + }, + expectedName: "powercap", + expectError: false, + }, + { + name: "successful initialization with mock MSR reader", + mockReader: &fakeMSRReader{ + available: true, + zones: createTestZones("msr"), + name: "msr", + }, + expectedName: "msr", + expectError: false, + }, + { + name: "initialization fails with reader that has no zones", + mockReader: &fakePowercapReader{ + available: true, + zones: []EnergyZone{}, + name: "empty", + }, + expectError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pm, err := NewCPUPowerMeter( + "/fake/sysfs", + WithRaplReader(tt.mockReader), + ) + require.NoError(t, err) + + err = pm.Init() + + if tt.expectError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + assert.Equal(t, tt.expectedName, pm.reader.Name()) + } + }) + } +} + +func TestRaplPowerMeter_Name(t *testing.T) { + tests := []struct { + name string + useMSR bool + expected string + }{ + { + name: "powercap reader", + useMSR: false, + expected: "rapl-powercap", + }, + { + name: "msr reader", + useMSR: true, + expected: "rapl-msr", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pm := &raplPowerMeter{ + useMSR: tt.useMSR, + } + assert.Equal(t, tt.expected, pm.Name()) + }) + } +} + +func TestRaplPowerMeter_Zones_WithFiltering(t *testing.T) { + // Create test zones + testZones := []EnergyZone{ + &fakeMSRZone{name: "package", index: 0, path: "/fake/package", energy: Energy(1000)}, + &fakeMSRZone{name: "core", index: 0, path: "/fake/core", energy: Energy(500)}, + &fakeMSRZone{name: "dram", index: 0, path: "/fake/dram", energy: Energy(300)}, + } + + tests := []struct { + name string + zoneFilter []string + expected []string + }{ + { + name: "no filter - all zones", + zoneFilter: []string{}, + expected: []string{"package", "core", "dram"}, + }, + { + name: "filter package only", + zoneFilter: []string{"package"}, + expected: []string{"package"}, + }, + { + name: "filter core and dram", + zoneFilter: []string{"core", "dram"}, + expected: []string{"core", "dram"}, + }, + { + name: "filter non-existent zone", + zoneFilter: []string{"nonexistent"}, + expected: []string{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mockReader := &fakeMSRReader{ + available: true, + zones: testZones, + } + + pm := &raplPowerMeter{ + reader: mockReader, + zoneFilter: tt.zoneFilter, + logger: slog.Default(), + } + + zones, err := pm.Zones() + if len(tt.expected) == 0 { + assert.Error(t, err) + assert.Contains(t, err.Error(), "no RAPL zones found after filtering") + } else { + assert.NoError(t, err) + assert.Equal(t, len(tt.expected), len(zones)) + + zoneNames := make([]string, len(zones)) + for i, zone := range zones { + zoneNames[i] = zone.Name() + } + + for _, expected := range tt.expected { + assert.Contains(t, zoneNames, expected) + } + } + }) + } +} + +func TestRaplPowerMeter_PrimaryEnergyZone(t *testing.T) { + tests := []struct { + name string + availableZones []string + expectedZone string + }{ + { + name: "psys has highest priority", + availableZones: []string{"core", "package", "psys", "dram"}, + expectedZone: "psys", + }, + { + name: "package has second priority", + availableZones: []string{"core", "package", "dram"}, + expectedZone: "package", + }, + { + name: "core has third priority", + availableZones: []string{"core", "dram"}, + expectedZone: "core", + }, + { + name: "fallback to first zone if no priority match", + availableZones: []string{"uncore", "other"}, + expectedZone: "uncore", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var testZones []EnergyZone + for i, name := range tt.availableZones { + testZones = append(testZones, &fakeMSRZone{ + name: name, + index: i, + path: fmt.Sprintf("/fake/%s", name), + }) + } + + mockReader := &fakeMSRReader{ + available: true, + zones: testZones, + } + + pm := &raplPowerMeter{ + reader: mockReader, + logger: slog.Default(), + } + + primaryZone, err := pm.PrimaryEnergyZone() + assert.NoError(t, err) + assert.Equal(t, tt.expectedZone, primaryZone.Name()) + + // Test caching - call again and should get same result + primaryZone2, err := pm.PrimaryEnergyZone() + assert.NoError(t, err) + assert.Equal(t, primaryZone, primaryZone2) + }) + } +} + +func TestRaplPowerMeter_Close(t *testing.T) { + mockReader := &fakeMSRReader{ + available: true, + zones: createTestZones("test"), + } + + pm := &raplPowerMeter{ + reader: mockReader, + logger: slog.Default(), + } + + err := pm.Close() + assert.NoError(t, err) + + // Test closing when reader is nil + pm.reader = nil + err = pm.Close() + assert.NoError(t, err) +} + +func TestNewCPUPowerMeter(t *testing.T) { + sysfsPath := "/fake/sysfs" + + pm, err := NewCPUPowerMeter(sysfsPath) + require.NoError(t, err) + + assert.Equal(t, sysfsPath, pm.sysfsPath) + assert.NotNil(t, pm.logger) + assert.Equal(t, []string{}, pm.zoneFilter) + + // Test MSR config defaults + assert.Equal(t, ptr.To(false), pm.msrConfig.Enabled) + assert.Equal(t, ptr.To(false), pm.msrConfig.Force) + assert.Equal(t, "/dev/cpu/%d/msr", pm.msrConfig.DevicePath) +} + +func TestNewCPUPowerMeter_WithOptions(t *testing.T) { + sysfsPath := "/fake/sysfs" + + testLogger := slog.Default().With("test", "meter") + testZoneFilter := []string{"package", "core"} + testMSRConfig := MSRConfig{ + Enabled: ptr.To(true), + Force: ptr.To(false), + DevicePath: "/custom/cpu/%d/msr", + } + + pm, err := NewCPUPowerMeter( + sysfsPath, + WithRaplLogger(testLogger), + WithZoneFilter(testZoneFilter), + WithMSRConfig(testMSRConfig), + ) + require.NoError(t, err) + + assert.Equal(t, sysfsPath, pm.sysfsPath) + assert.Equal(t, testZoneFilter, pm.zoneFilter) + assert.Equal(t, testMSRConfig, pm.msrConfig) +} + +// Helper types and functions + +type fakePowercapReader struct { + zones []EnergyZone + available bool + initError error + name string +} + +func (f *fakePowercapReader) Zones() ([]EnergyZone, error) { + return f.zones, nil +} + +func (f *fakePowercapReader) Available() bool { + return f.available +} + +func (f *fakePowercapReader) Init() error { + return f.initError +} + +func (f *fakePowercapReader) Close() error { + return nil +} + +func (f *fakePowercapReader) Name() string { + if f.name == "" { + return "fake-powercap" + } + return f.name +} + +func createTestZones(prefix string) []EnergyZone { + return []EnergyZone{ + &fakeMSRZone{name: "package", index: 0, path: fmt.Sprintf("/%s/package", prefix), energy: Energy(1000)}, + &fakeMSRZone{name: "core", index: 0, path: fmt.Sprintf("/%s/core", prefix), energy: Energy(500)}, + &fakeMSRZone{name: "dram", index: 0, path: fmt.Sprintf("/%s/dram", prefix), energy: Energy(300)}, + } +} diff --git a/internal/device/rapl_reader.go b/internal/device/rapl_reader.go new file mode 100644 index 0000000000..aac7e375ae --- /dev/null +++ b/internal/device/rapl_reader.go @@ -0,0 +1,24 @@ +// SPDX-FileCopyrightText: 2025 The Kepler Authors +// SPDX-License-Identifier: Apache-2.0 + +package device + +// raplReader is an internal abstraction for different RAPL reading backends +// (powercap sysfs and MSR). This interface allows the raplPowerMeter to work +// with different RAPL reading mechanisms while maintaining a consistent API. +type raplReader interface { + // Zones returns the list of energy zones available from this power reader + Zones() ([]EnergyZone, error) + + // Available checks if the power reader can be used on the current system + Available() bool + + // Init initializes the power reader and verifies it can read energy values + Init() error + + // Close releases any resources held by the power reader + Close() error + + // Name returns a human-readable name for the power reader implementation + Name() string +} diff --git a/internal/device/rapl_sysfs_power_meter_test.go b/internal/device/rapl_sysfs_power_meter_test.go deleted file mode 100644 index 437d12b6a6..0000000000 --- a/internal/device/rapl_sysfs_power_meter_test.go +++ /dev/null @@ -1,587 +0,0 @@ -// SPDX-FileCopyrightText: 2025 The Kepler Authors -// SPDX-License-Identifier: Apache-2.0 - -package device - -import ( - "errors" - "log/slog" - "strings" - "testing" - - "github.com/prometheus/procfs/sysfs" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" -) - -// TestCPUPowerMeterInterface ensures that raplPowerMeter properly implements the CPUPowerMeter interface -func TestCPUPowerMeterInterface(t *testing.T) { - var _ CPUPowerMeter = (*raplPowerMeter)(nil) -} - -func TestNewCPUPowerMeter(t *testing.T) { - meter, err := NewCPUPowerMeter("testdata/sys") - assert.NotNil(t, meter, "NewCPUPowerMeter should not return nil") - assert.NoError(t, err, "NewCPUPowerMeter should not return error") - assert.IsType(t, &raplPowerMeter{}, meter, "NewCPUPowerMeter should return a *cpuPowerMeter") -} - -func TestCPUPowerMeter_Name(t *testing.T) { - meter := &raplPowerMeter{} - name := meter.Name() - assert.Equal(t, "rapl", name, "Name() should return 'rapl'") -} - -func TestCPUPowerMeter_Init(t *testing.T) { - meter, err := NewCPUPowerMeter(validSysFSPath) - assert.NoError(t, err, "NewCPUPowerMeter should not return an error") - - err = meter.Init() - assert.NoError(t, err, "Start() should not return an error") -} - -func TestCPUPowerMeter_Zones(t *testing.T) { - meter := &raplPowerMeter{ - reader: sysfsRaplReader{fs: validSysFSFixtures(t)}, - logger: slog.Default().With("service", "rapl"), - } - zones, err := meter.Zones() - assert.NoError(t, err, "Zones() should not return an error") - assert.NotNil(t, zones, "Zones() should return a non-nil slice") - - names := make([]string, len(zones)) - for i, zone := range zones { - names[i] = zone.Name() - } - assert.Contains(t, names, "package") - assert.Contains(t, names, "core") -} - -// TestSysFSRaplZoneInterface ensures that sysfsRaplZone properly implements the EnergyZone interface -func TestSysFSRaplZoneInterface(t *testing.T) { - pkg := sysfs.RaplZone{ - Name: "package", - Index: 0, - Path: "/sys/class/powercap/intel-rapl/intel-rapl:0", - MaxMicrojoules: 1_000_000, - } - - zone := sysfsRaplZone{zone: pkg} - - // Test that all interface methods return the expected values - assert.Equal(t, 0, zone.Index()) - assert.Equal(t, "/sys/class/powercap/intel-rapl/intel-rapl:0", zone.Path()) - assert.Equal(t, "package", zone.Name()) - assert.Equal(t, 1.0, zone.MaxEnergy().Joules()) -} - -func TestSysFSRaplPowerMeterInit(t *testing.T) { - rapl := raplPowerMeter{ - reader: sysfsRaplReader{fs: validSysFSFixtures(t)}, - logger: slog.Default().With("service", "rapl"), - } - err := rapl.Init() - assert.NoError(t, err) -} - -func TestSysFSRaplPowerMeterInitFail(t *testing.T) { - rapl := raplPowerMeter{reader: sysfsRaplReader{fs: invalidSysFSFixtures(t)}} - err := rapl.Init() - assert.Error(t, err) -} - -// TestSysFSRaplPowerMeter tests the sysfsRaplZone implementation using test fixtures -func TestSysFSRaplPowerMeter(t *testing.T) { - fs := validSysFSFixtures(t) - actualZones, err := sysfs.GetRaplZones(fs) - assert.NoError(t, err) - assert.Equal(t, 4, len(actualZones), "Expected to find 4 zones in test fixtures") - - // realRaplReader should filter out non-standard zones - rapl := raplPowerMeter{ - reader: sysfsRaplReader{fs: fs}, - logger: slog.Default().With("service", "rapl"), - } - zones, err := rapl.Zones() - - // Test that each zone implements the interface correctly - assert.NoError(t, err) - // With aggregation: two package zones become one AggregatedZone + one core zone = 2 total - assert.Equal(t, 2, len(zones), "find 2 zones after aggregation (package + core)") - assert.Equal(t, []string{"core", "package"}, sortedZoneNames(zones), - "Expected to find aggregated zones in test fixtures") - - for _, zone := range zones { - assert.NotEmpty(t, zone.Name(), "Zone name should not be empty") - assert.NotEmpty(t, zone.Path(), "Zone path should not be empty") - assert.GreaterOrEqual(t, zone.MaxEnergy(), 1000.0*Joule, "Max energy should not be negative") - - // Zone could be either sysfsRaplZone or AggregatedZone - switch z := zone.(type) { - case sysfsRaplZone: - // Individual zone - assert.NotNil(t, z) - case *AggregatedZone: - // Aggregated zone - assert.NotNil(t, z) - assert.Equal(t, -1, z.Index(), "AggregatedZone should have index -1") - default: - t.Fatalf("Unexpected zone type: %T", zone) - } - - // Skip the original assertion since we now support both zone types - _ = zone - - energy, err := zone.Energy() - assert.NoError(t, err, zone.Path()) - assert.GreaterOrEqual(t, energy, 1000.0*Joule, "Energy should not be negative") - } -} - -func TestAggregatedZoneIntegration(t *testing.T) { - // Test that RAPL reader creates AggregatedZone for multiple zones with same name - mockReader := &mockSysFSReader{ - response: []EnergyZone{ - // Two package zones with same name but different indices and one core zone - mockZone{name: "package", index: 0, path: "/intel-rapl:0", energy: 1000, maxEnergy: 100000}, - mockZone{name: "package", index: 1, path: "/intel-rapl:1", energy: 2000, maxEnergy: 100000}, - mockZone{name: "core", index: 0, path: "/intel-rapl:0:0", energy: 500, maxEnergy: 50000}, - }, - } - - rapl := &raplPowerMeter{ - reader: mockReader, - logger: slog.Default(), - } - - zones, err := rapl.Zones() - require.NoError(t, err) - - // Should have 2 zones: 1 aggregated package zone + 1 core zone - assert.Equal(t, 2, len(zones), "Expected 2 zones after aggregation") - - // Find the package zone - should be AggregatedZone - var packageZone EnergyZone - var coreZone EnergyZone - for _, zone := range zones { - if zone.Name() == "package" { - packageZone = zone - } else if zone.Name() == "core" { // Single zone keeps original name - coreZone = zone - } - } - - // Verify package zone is aggregated - require.NotNil(t, packageZone, "Package zone should exist") - aggregated, isAggregated := packageZone.(*AggregatedZone) - assert.True(t, isAggregated, "Package zone should be AggregatedZone") - assert.Equal(t, "package", aggregated.Name()) - assert.Equal(t, -1, aggregated.Index()) - assert.Equal(t, Energy(200000), aggregated.MaxEnergy()) // Sum of both package zones - - // Verify core zone is not aggregated - require.NotNil(t, coreZone, "Core zone should exist") - _, isNotAggregated := coreZone.(mockZone) - assert.True(t, isNotAggregated, "Core zone should remain as individual zone") - - // Test energy aggregation - packageEnergy, err := packageZone.Energy() - require.NoError(t, err) - assert.Equal(t, Energy(3000), packageEnergy) // 1000 + 2000 from both package zones -} - -type mockZone struct { - name string - index int - path string - energy Energy - maxEnergy Energy -} - -func (m mockZone) Name() string { return m.name } -func (m mockZone) Index() int { return m.index } -func (m mockZone) Path() string { return m.path } -func (m mockZone) Energy() (Energy, error) { return m.energy, nil } -func (m mockZone) MaxEnergy() Energy { return m.maxEnergy } - -type mockSysFSReader struct { - response []EnergyZone - err error -} - -func (m *mockSysFSReader) Zones() ([]EnergyZone, error) { - return m.response, m.err -} - -// TestRAPLPowerMeterFromFixtures tests the realRaplReader with filtering using test fixtures -func TestRAPLPowerMeterFromFixtures(t *testing.T) { - fs := validSysFSFixtures(t) - - raplMeter := raplPowerMeter{ - reader: sysfsRaplReader{fs: fs}, - logger: slog.Default().With("service", "rapl"), - } - allZones, err := raplMeter.Zones() - assert.NoError(t, err) - assert.NotEmpty(t, allZones, "Expected to find RAPL zones in test fixtures") - - mmioZones := 0 - for _, zone := range allZones { - if strings.Contains(zone.Path(), "mmio") { - mmioZones++ - } - } - assert.Equal(t, mmioZones, 0, "all non-standard RAPL zones should be filtered") -} - -// TestStandardRaplPath tests that standard paths are preferred over non-standard ones -func TestStandardRaplPaths(t *testing.T) { - tt := []struct { - path string - isStandard bool - }{ - {"/sys/class/powercap/intel-rapl", false}, - {"/sys/class/powercap/intel-rapl-mmio", false}, - {"/sys/class/powercap/intel-rapl-mmio/intel-rapl-mmio:0", false}, - {"/sys/class/powercap/intel-rapl-mmio:0", false}, - {"/sys/class/powercap/intel-rapl/intel-rapl:0", true}, - {"/sys/class/powercap/intel-rapl:0", true}, - {"/sys/class/powercap/intel-rapl:0:0", true}, - {"/sys/class/powercap/intel-rapl:0:1", true}, - {"/sys/class/powercap/intel-rapl:1", true}, - } - - for _, test := range tt { - assert.Equal(t, test.isStandard, isStandardRaplPath(test.path), test.path) - } -} - -type mockRaplReader struct { - mock.Mock -} - -func (m *mockRaplReader) Zones() ([]EnergyZone, error) { - args := m.Called() - return args.Get(0).([]EnergyZone), args.Error(1) -} - -// TestStandardPathPreference tests that standard paths are preferred over non-standard ones -func TestStandardPathPreference(t *testing.T) { - // Create test zones with both standard and non-standard paths - mmio := &MockRaplZone{ - name: "package", - path: "/sys/class/powercap/intel-rapl-mmio/intel-rapl-mmio:0", - index: 0, - } - stdPkg := &MockRaplZone{ - name: "package", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0", - index: 0, - } - tt := []struct { - zones []EnergyZone - expected EnergyZone - }{ - {[]EnergyZone{stdPkg}, stdPkg}, - {[]EnergyZone{mmio}, mmio}, - {[]EnergyZone{mmio, stdPkg}, stdPkg}, - {[]EnergyZone{stdPkg, mmio}, stdPkg}, - } - - for _, test := range tt { - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return(test.zones, nil) - - rapl, err := NewCPUPowerMeter(validSysFSPath, WithSysFSReader(mockReader)) - assert.NoError(t, err) - - zones, err := rapl.Zones() - assert.NoError(t, err) - - // We should have only one package zone - assert.Equal(t, 1, len(zones), "Should have 1 zone after filtering mmio") - - // The package zone should be the standard path version - pkg := zones[0] - expected := test.expected - - // It should be the standard path version - assert.Equal(t, "package", expected.Name()) - assert.Equal(t, pkg.Path(), expected.Path(), - "Should prefer standard path over non-standard path") - - mockReader.AssertExpectations(t) - } -} - -// TestZoneCaching tests that zones are cached and called only once -func TestZoneCaching(t *testing.T) { - // Create test zones with both standard and non-standard paths - pkg := &MockRaplZone{ - name: "package", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0", - index: 0, - } - core := &MockRaplZone{ - name: "core", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0:0", - index: 1, - } - raplZones := []EnergyZone{pkg, core} - - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return(raplZones, nil).Once() - - rapl, err := NewCPUPowerMeter(validSysFSPath, WithSysFSReader(mockReader)) - assert.NoError(t, err) - - // Get zones multiple times to test that "Zone" is called only once - for range 3 { - zones, err := rapl.Zones() - assert.NoError(t, err) - assert.Equal(t, 2, len(zones), "Should have both zones") - } - - mockReader.AssertExpectations(t) -} - -// TestZoneCaching_Error tests that zones are not cached when there is an error -func TestZoneCaching_Error(t *testing.T) { - mockReader := &mockRaplReader{} - rapl, err := NewCPUPowerMeter(validSysFSPath, WithSysFSReader(mockReader)) - - t.Run("Zone Read Error", func(t *testing.T) { - mockReader.On("Zones").Return([]EnergyZone(nil), errors.New("error")).Once() - assert.NoError(t, err) - zones, err := rapl.Zones() - assert.Error(t, err) - assert.Nil(t, zones) - mockReader.AssertExpectations(t) - }) - - // Create test zones with both standard and non-standard paths - pkg := &MockRaplZone{ - name: "package", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0", - index: 0, - } - core := &MockRaplZone{ - name: "core", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0:0", - index: 1, - } - raplZones := []EnergyZone{pkg, core} - t.Run("Zone Read Succeeds", func(t *testing.T) { - mockReader.On("Zones").Return(raplZones, nil).Once() - for range 3 { - zones, err := rapl.Zones() - assert.NoError(t, err) - assert.Equal(t, 2, len(zones)) - - } - mockReader.AssertExpectations(t) - }) -} - -// TestZone_None tests that zones error when none are found -func TestZone_None(t *testing.T) { - mockReader := &mockRaplReader{} - rapl, err := NewCPUPowerMeter(validSysFSPath, WithSysFSReader(mockReader)) - assert.NoError(t, err) - - mockReader.On("Zones").Return([]EnergyZone(nil), nil).Once() - zones, err := rapl.Zones() - assert.Error(t, err) - assert.Equal(t, 0, len(zones)) - mockReader.AssertExpectations(t) -} - -// TestNewCPUPowerMeter_InvalidPath tests that NewCPUPowerMeter returns an error with an invalid sysfs path -func TestNewCPUPowerMeter_InvalidPath(t *testing.T) { - meter, err := NewCPUPowerMeter("/nonexistent/path") - assert.Error(t, err, "Should return an error with an invalid path") - assert.Nil(t, meter, "Should not return a meter with an invalid path") -} - -// TestCPUPowerMeter_ZonesError tests that the Zones method correctly handles errors from the reader -func TestCPUPowerMeter_ZonesError(t *testing.T) { - mockReader := &mockRaplReader{} - expectedErr := errors.New("error") - mockReader.On("Zones").Return([]EnergyZone{}, expectedErr) - - meter := &raplPowerMeter{reader: mockReader} - zones, err := meter.Zones() - - assert.Error(t, err, "Should return an error when the reader fails") - assert.Equal(t, expectedErr, err, "Should return the error from the reader") - assert.Nil(t, zones, "Should return nil zones when there's an error") - mockReader.AssertExpectations(t) -} - -// TestCPUPowerMeter_NoZones tests that Zones returns an error when no zones are found -func TestCPUPowerMeter_NoZones(t *testing.T) { - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return([]EnergyZone{}, nil) - - meter := &raplPowerMeter{reader: mockReader} - zones, err := meter.Zones() - - assert.Error(t, err, "Should return an error when no zones are found") - assert.Equal(t, "no RAPL zones found", err.Error(), "Should return a specific error message") - assert.Nil(t, zones, "Should return nil zones when no zones are found") - mockReader.AssertExpectations(t) -} - -// TestCPUPowerMeter_InitNoZones tests that Start returns an error when no zones are found -func TestCPUPowerMeter_InitNoZones(t *testing.T) { - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return([]EnergyZone{}, nil) - - meter := &raplPowerMeter{reader: mockReader} - err := meter.Init() - - assert.Error(t, err, "Start() should return an error when no zones are found") - assert.Equal(t, "no RAPL zones found", err.Error(), "Start() should return a specific error message") - mockReader.AssertExpectations(t) -} - -// TestPrimaryEnergyZone tests the PrimaryEnergyZone method -func TestPrimaryEnergyZone(t *testing.T) { - t.Run("Priority hierarchy", func(t *testing.T) { - tests := []struct { - name string - zones []EnergyZone - expected string - }{{ - name: "psys has highest priority", - zones: []EnergyZone{ - mockZone{name: "package", index: 0}, - mockZone{name: "psys", index: 0}, - mockZone{name: "core", index: 0}, - }, - expected: "psys", - }, { - name: "package has priority over core", - zones: []EnergyZone{ - mockZone{name: "core", index: 0}, - mockZone{name: "package", index: 0}, - mockZone{name: "dram", index: 0}, - }, - expected: "package", - }, { - name: "core has priority over dram", - zones: []EnergyZone{ - mockZone{name: "dram", index: 0}, - mockZone{name: "core", index: 0}, - mockZone{name: "uncore", index: 0}, - }, - expected: "core", - }, { - name: "dram has priority over uncore", - zones: []EnergyZone{ - mockZone{name: "uncore", index: 0}, - mockZone{name: "dram", index: 0}, - }, - expected: "dram", - }} - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return(tt.zones, nil) - - meter := &raplPowerMeter{reader: mockReader, logger: slog.Default()} - zone, err := meter.PrimaryEnergyZone() - - assert.NoError(t, err) - assert.Equal(t, tt.expected, zone.Name()) - mockReader.AssertExpectations(t) - }) - } - }) - - t.Run("Case insensitive matching", func(t *testing.T) { - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return([]EnergyZone{ - mockZone{name: "PACKAGE", index: 0}, - mockZone{name: "Core", index: 0}, - }, nil) - - meter := &raplPowerMeter{reader: mockReader, logger: slog.Default()} - zone, err := meter.PrimaryEnergyZone() - - assert.NoError(t, err) - assert.Equal(t, "PACKAGE", zone.Name()) - mockReader.AssertExpectations(t) - }) - - t.Run("Fallback to first zone", func(t *testing.T) { - zones := []EnergyZone{ - mockZone{name: "unknown1", index: 0}, - mockZone{name: "unknown2", index: 1}, - } - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return(zones, nil) - - meter := &raplPowerMeter{reader: mockReader, logger: slog.Default()} - zone, err := meter.PrimaryEnergyZone() - - assert.NoError(t, err) - // NOTE: since reader.Zones() does not guarantee the order after filtering, - // we cannot assert zone.Name() == "unknown1", thus assert the zone returned - // any of the zones passed as input - zoneName := zone.Name() - assert.Contains(t, []string{"unknown1", "unknown2"}, zoneName) - mockReader.AssertExpectations(t) - }) - - t.Run("Caching behavior", func(t *testing.T) { - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return([]EnergyZone{ - mockZone{name: "package", index: 0}, - }, nil).Once() - - meter := &raplPowerMeter{reader: mockReader, logger: slog.Default()} - - // First call should read from zones and cache topZone - zone1, err := meter.PrimaryEnergyZone() - assert.NoError(t, err) - assert.Equal(t, "package", zone1.Name()) - - // Second call should use cached topZone directly - zone2, err := meter.PrimaryEnergyZone() - assert.NoError(t, err) - assert.Equal(t, "package", zone2.Name()) - - mockReader.AssertExpectations(t) - }) - - t.Run("Error handling", func(t *testing.T) { - t.Run("Zones() returns error", func(t *testing.T) { - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return([]EnergyZone{}, errors.New("zones error")) - - meter := &raplPowerMeter{reader: mockReader, logger: slog.Default()} - zone, err := meter.PrimaryEnergyZone() - - assert.Error(t, err) - assert.Nil(t, zone) - assert.Contains(t, err.Error(), "zones error") - mockReader.AssertExpectations(t) - }) - - t.Run("Empty zones list", func(t *testing.T) { - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return([]EnergyZone{}, nil) - - meter := &raplPowerMeter{reader: mockReader, logger: slog.Default()} - zone, err := meter.PrimaryEnergyZone() - - assert.Error(t, err) - assert.Nil(t, zone) - assert.Contains(t, err.Error(), "no RAPL zones found") - mockReader.AssertExpectations(t) - }) - }) -} diff --git a/internal/device/rapl_zone_filtering_test.go b/internal/device/rapl_zone_filtering_test.go deleted file mode 100644 index 773f08d6ee..0000000000 --- a/internal/device/rapl_zone_filtering_test.go +++ /dev/null @@ -1,257 +0,0 @@ -// SPDX-FileCopyrightText: 2025 The Kepler Authors -// SPDX-License-Identifier: Apache-2.0 - -package device - -import ( - "log/slog" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestRaplZoneFiltering(t *testing.T) { - // Create mock zones for testing - packageZone := &MockRaplZone{ - name: "package", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0", - index: 0, - } - coreZone := &MockRaplZone{ - name: "core", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0:0", - index: 1, - } - dramZone := &MockRaplZone{ - name: "dram", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0:2", - index: 2, - } - uncoreZone := &MockRaplZone{ - name: "uncore", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0:3", - index: 3, - } - - allZones := []EnergyZone{packageZone, coreZone, dramZone, uncoreZone} - - tests := []struct { - name string - filterZones []string - expectedZones []string - }{ - { - name: "No filter - all zones included", - filterZones: []string{}, - expectedZones: []string{"package", "core", "dram", "uncore"}, - }, - { - name: "Filter single zone", - filterZones: []string{"core"}, - expectedZones: []string{"core"}, - }, - { - name: "Filter multiple zones", - filterZones: []string{"package", "dram"}, - expectedZones: []string{"package", "dram"}, - }, - { - name: "Case-insensitive filtering", - filterZones: []string{"PACKAGE", "Core"}, - expectedZones: []string{"package", "core"}, - }, - { - name: "Non-existent zone in filter", - filterZones: []string{"package", "nonexistent"}, - expectedZones: []string{"package"}, - }, - } - - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return(allZones, nil) - - logger := slog.Default().With("test", "zone-filtering") - meter := &raplPowerMeter{ - reader: mockReader, - logger: logger, - zoneFilter: tc.filterZones, - } - - // Filter zones directly to test the filtering logic - filteredZones := meter.filterZones(allZones) - - // Verify only expected zones are included - assert.Equal(t, len(tc.expectedZones), len(filteredZones), - "Filtered zones length mismatch") - - // Create a map of zone names for easy checking - zoneNames := make(map[string]bool) - for _, zone := range filteredZones { - zoneNames[zone.Name()] = true - } - - // Verify each expected zone is present - for _, name := range tc.expectedZones { - assert.True(t, zoneNames[name], - "Expected zone %s not found in filtered zones", name) - } - }) - } -} - -// Test that zone filtering applies during Init -func TestRaplZoneFiltering_Init(t *testing.T) { - packageZone := &MockRaplZone{ - name: "package", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0", - index: 0, - maxMicroJoules: 1000000, - energy: 100000, - } - coreZone := &MockRaplZone{ - name: "core", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0:0", - index: 1, - maxMicroJoules: 1000000, - energy: 50000, - } - - allZones := []EnergyZone{packageZone, coreZone} - - t.Run("Init succeeds with valid filter", func(t *testing.T) { - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return(allZones, nil) - - meter := &raplPowerMeter{ - reader: mockReader, - logger: slog.Default(), - zoneFilter: []string{"package"}, - } - - err := meter.Init() - assert.NoError(t, err) - }) - - t.Run("Init does not fails with unknown zones", func(t *testing.T) { - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return(allZones, nil) - - meter := &raplPowerMeter{ - reader: mockReader, - logger: slog.Default(), - zoneFilter: []string{"nonexistent"}, - } - - err := meter.Init() - assert.NoError(t, err) - }) -} - -// Test that Zones() properly applies the filter -func TestRaplZoneFiltering_Zones(t *testing.T) { - packageZone := &MockRaplZone{ - name: "package", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0", - index: 0, - maxMicroJoules: 1000000, - energy: 100000, - } - coreZone := &MockRaplZone{ - name: "core", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0:0", - index: 1, - maxMicroJoules: 1000000, - energy: 50000, - } - - allZones := []EnergyZone{packageZone, coreZone} - - tests := []struct { - name string - filter []string - expectedZones int - expectError bool - }{ - { - name: "No filter", - filter: []string{}, - expectedZones: 2, - expectError: false, - }, { - name: "Filter package", - filter: []string{"package"}, - expectedZones: 1, - expectError: false, - }, { - name: "Filter core", - filter: []string{"core"}, - expectedZones: 1, - expectError: false, - }, { - name: "nonexistent zone", - filter: []string{"nonexistent"}, - expectedZones: 0, - expectError: true, - }, - } - - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - mockReader := &mockRaplReader{} - mockReader.On("Zones").Return(allZones, nil) - - meter := &raplPowerMeter{ - reader: mockReader, - logger: slog.Default(), - zoneFilter: tc.filter, - } - - zones, err := meter.Zones() - - if tc.expectError { - assert.Error(t, err) - assert.Nil(t, zones) - } else { - assert.NoError(t, err) - assert.Equal(t, tc.expectedZones, len(zones)) - } - }) - } -} - -// Test integration with the configuration options -func TestRaplZoneFiltering_WithOptions(t *testing.T) { - // Mock sysfs reader for this test - mockReader := &mockRaplReader{} - packageZone := &MockRaplZone{ - name: "package", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0", - index: 0, - maxMicroJoules: 1000000, - energy: 100000, - } - coreZone := &MockRaplZone{ - name: "core", - path: "/sys/class/powercap/intel-rapl/intel-rapl:0:0", - index: 1, - maxMicroJoules: 1000000, - energy: 50000, - } - mockReader.On("Zones").Return([]EnergyZone{packageZone, coreZone}, nil) - - // Create meter with WithZoneFilter option - meter, err := NewCPUPowerMeter( - validSysFSPath, - WithSysFSReader(mockReader), - WithZoneFilter([]string{"core"}), - ) - assert.NoError(t, err) - - // Check that filtering was applied - zones, err := meter.Zones() - assert.NoError(t, err) - assert.Equal(t, 1, len(zones)) - assert.Equal(t, "core", zones[0].Name()) -} diff --git a/manifests/helm/kepler/values.yaml b/manifests/helm/kepler/values.yaml index d4d9ec1869..5f1892abff 100644 --- a/manifests/helm/kepler/values.yaml +++ b/manifests/helm/kepler/values.yaml @@ -75,6 +75,10 @@ config: minTerminatedEnergyThreshold: 10 rapl: zones: [] + msr: + enabled: false + force: false + devicePath: /dev/cpu/%d/msr exporter: stdout: enabled: false diff --git a/manifests/k8s/configmap.yaml b/manifests/k8s/configmap.yaml index 700ec58cdb..83e854d628 100644 --- a/manifests/k8s/configmap.yaml +++ b/manifests/k8s/configmap.yaml @@ -21,6 +21,10 @@ data: minTerminatedEnergyThreshold: 10 rapl: zones: [] + msr: + enabled: false + force: false + devicePath: "/dev/cpu/%d/msr" exporter: stdout: enabled: false