Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions generator/test_case_generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,14 @@ var testTypeToTestConfig = map[string][]testConfig{
targets: map[string]map[string]struct{}{"os": {"al2": {}}, "arc": {"amd64": {}}},
},
{testDir: "./test/ssm_document"},
{
testDir: "./test/system_metrics/enabled",
targets: map[string]map[string]struct{}{"os": {"al2": {}}, "arc": {"amd64": {}}},
},
{
testDir: "./test/system_metrics/disabled",
targets: map[string]map[string]struct{}{"os": {"al2": {}}, "arc": {"amd64": {}}},
},
},
testTypeKeyEc2SELinux: {
{testDir: "./test/ca_bundle"},
Expand Down
9 changes: 9 additions & 0 deletions test/system_metrics/disabled/resources/agent_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"metrics": {
"metrics_collected": {
"cpu": {
"measurement": ["cpu_usage_idle"]
}
}
}
}
69 changes: 69 additions & 0 deletions test/system_metrics/disabled/system_metrics_disabled_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: MIT

//go:build !windows

package system_metrics_disabled

import (
"log"
"testing"
"time"

"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/service/cloudwatch/types"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/aws/amazon-cloudwatch-agent-test/environment"
"github.com/aws/amazon-cloudwatch-agent-test/util/awsservice"
"github.com/aws/amazon-cloudwatch-agent-test/util/common"
)

const (
namespace = "CWAgent/System"
// Wait the full batch interval to prove nothing leaks through.
agentRunDuration = 18 * time.Minute
)

func init() {
environment.RegisterEnvironmentMetaDataFlags()
}

// systemMetricNames is the full set of metrics our receiver would publish.
var systemMetricNames = []string{
// JVM
"heap_max_bytes", "heap_committed_bytes", "heap_after_gc_bytes", "heap_free_after_gc_bytes",
"aggregate_jvm_count", "aggregate_heap_max_bytes", "aggregate_heap_free_after_gc_bytes",
"aggregate_heap_after_gc_utilized",
// System
"cpu_time_iowait", "mem_total", "mem_available", "mem_cached", "mem_active",
"aggregate_disk_used", "aggregate_disk_free",
"aggregate_bw_in_allowance_exceeded", "aggregate_bw_out_allowance_exceeded",
"aggregate_pps_allowance_exceeded",
}

func TestSystemMetricsDisabled(t *testing.T) {
common.CopyFile("resources/agent_config.json", common.ConfigOutputPath)
err := common.StartAgent(common.ConfigOutputPath, true, false)
require.NoError(t, err, "agent failed to start")

log.Printf("Agent started, waiting %s to confirm no system metrics are published...", agentRunDuration)
time.Sleep(agentRunDuration)
common.StopAgent()

instanceId := awsservice.GetInstanceId()
dimFilter := []types.DimensionFilter{
{Name: aws.String("InstanceId"), Value: aws.String(instanceId)},
}

// Verify no system metrics exist for THIS instance in CWAgent/System.
// The namespace may exist from other test runs, but no metrics should have our InstanceId.
for _, metricName := range systemMetricNames {
t.Run("Absent/"+metricName, func(t *testing.T) {
err := awsservice.ValidateMetric(metricName, namespace, dimFilter)
assert.Error(t, err, "metric %s should NOT exist for instance %s in %s",
metricName, instanceId, namespace)
})
}
}
12 changes: 12 additions & 0 deletions test/system_metrics/enabled/resources/agent_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"agent": {
"system_metrics_enabled": true
},
"metrics": {
"metrics_collected": {
"cpu": {
"measurement": ["cpu_usage_idle"]
}
}
}
}
10 changes: 10 additions & 0 deletions test/system_metrics/enabled/resources/mock_jvm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/usr/bin/env python3
"""Minimal mock JVM agent. Binds @aws-jvm-metrics-<pid> (SOCK_DGRAM), responds to GET /metrics."""
import socket, os
s = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)
s.bind(f"\x00aws-jvm-metrics-{os.getpid()}".encode())
R = b"jvm_heap_max_bytes 2147483648\njvm_heap_committed_bytes 1073741824\njvm_heap_after_gc_bytes 536870912\njvm_gc_count_total 42\njvm_allocated_bytes 8589934592\n"
while True:
d, a = s.recvfrom(1024)
if d.startswith(b"GET /metrics"):
s.sendto(R, a)
186 changes: 186 additions & 0 deletions test/system_metrics/enabled/system_metrics_enabled_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: MIT

//go:build !windows

package system_metrics_enabled

import (
"fmt"
"log"
"os/exec"
"testing"
"time"

"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/service/cloudwatch/types"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/aws/amazon-cloudwatch-agent-test/environment"
"github.com/aws/amazon-cloudwatch-agent-test/util/awsservice"
"github.com/aws/amazon-cloudwatch-agent-test/util/common"
)

const (
namespace = "CWAgent/System"
// Wait for batch flush (15 min) plus buffer for scrape + publish latency.
agentRunDuration = 18 * time.Minute
// Number of mock JVM agents to start.
mockJVMCount = 2
)

func init() {
environment.RegisterEnvironmentMetaDataFlags()
}

// startMockJVMs launches N mock JVM agents and returns their PIDs for cleanup.
func startMockJVMs(t *testing.T, count int) []string {
t.Helper()
var pids []string
for i := 0; i < count; i++ {
cmd := exec.Command("python3", "resources/mock_jvm.py")
require.NoError(t, cmd.Start(), "failed to start mock JVM agent %d", i)
pid := fmt.Sprintf("%d", cmd.Process.Pid)
pids = append(pids, pid)
log.Printf("Started mock JVM agent %d with PID %s", i, pid)
}
// Give sockets time to bind.
time.Sleep(2 * time.Second)

// Verify sockets appear in /proc/net/unix.
for _, pid := range pids {
out, err := exec.Command("bash", "-c", fmt.Sprintf("grep 'aws-jvm-metrics-%s' /proc/net/unix", pid)).Output()
require.NoError(t, err, "socket for PID %s not found in /proc/net/unix", pid)
require.Contains(t, string(out), "aws-jvm-metrics-"+pid)
log.Printf("Verified socket @aws-jvm-metrics-%s in /proc/net/unix", pid)
}
return pids
}

// stopMockJVMs kills all mock JVM agent processes.
func stopMockJVMs(pids []string) {
for _, pid := range pids {
_ = exec.Command("kill", pid).Run()
}
// Also clean up any strays.
_ = exec.Command("pkill", "-f", "mock_jvm.py").Run()
}

// getInstanceDimFilter returns a dimension filter for this instance's InstanceId.
func getInstanceDimFilter() []types.DimensionFilter {
return []types.DimensionFilter{
{
Name: aws.String("InstanceId"),
Value: aws.String(awsservice.GetInstanceId()),
},
}
}

// getInstanceDims returns dimensions for metric data queries.
func getInstanceDims() []types.Dimension {
return []types.Dimension{
{
Name: aws.String("InstanceId"),
Value: aws.String(awsservice.GetInstanceId()),
},
}
}

// assertMetricExists verifies a metric exists in CloudWatch for this instance.
func assertMetricExists(t *testing.T, metricName string) {
t.Helper()
err := awsservice.ValidateMetric(metricName, namespace, getInstanceDimFilter())
assert.NoError(t, err, "metric %s should exist in %s", metricName, namespace)
}

// assertMetricValue fetches metric values and asserts they are >= 0.
func assertMetricValue(t *testing.T, metricName string, startTime, endTime time.Time) {
t.Helper()
dims := getInstanceDims()
data, err := awsservice.GetMetricStatistics(
metricName, namespace, dims,
startTime, endTime,
900, // 15-min period matching batch interval
[]types.Statistic{types.StatisticAverage},
nil,
)
require.NoError(t, err, "failed to get statistics for %s", metricName)
require.NotEmpty(t, data.Datapoints, "no datapoints for %s", metricName)
for _, dp := range data.Datapoints {
assert.GreaterOrEqual(t, *dp.Average, float64(0), "metric %s should be >= 0", metricName)
}
}

// assertAggregateJVMCount verifies aggregate_jvm_count equals the expected count.
func assertAggregateJVMCount(t *testing.T, expected float64, startTime, endTime time.Time) {
t.Helper()
dims := getInstanceDims()
data, err := awsservice.GetMetricStatistics(
"aggregate_jvm_count", namespace, dims,
startTime, endTime,
900,
[]types.Statistic{types.StatisticAverage},
nil,
)
require.NoError(t, err, "failed to get statistics for aggregate_jvm_count")
require.NotEmpty(t, data.Datapoints, "no datapoints for aggregate_jvm_count")
assert.InDelta(t, expected, *data.Datapoints[0].Average, 0.5, "aggregate_jvm_count should be %v", expected)
}

func TestSystemMetricsEnabled(t *testing.T) {
// Start mock JVM agents before the CWAgent.
pids := startMockJVMs(t, mockJVMCount)
defer stopMockJVMs(pids)

// Start agent. The JSON config has system_metrics_enabled: true.
common.CopyFile("resources/agent_config.json", common.ConfigOutputPath)
err := common.StartAgent(common.ConfigOutputPath, true, false)
require.NoError(t, err, "agent failed to start")

startTime := time.Now()
log.Printf("Agent started, waiting %s for batch flush...", agentRunDuration)
time.Sleep(agentRunDuration)
common.StopAgent()
endTime := time.Now()

log.Printf("Agent stopped. Validating metrics in %s...", namespace)

// --- JVM per-JVM heap metrics ---
jvmMetrics := []string{"heap_max_bytes", "heap_committed_bytes", "heap_after_gc_bytes", "heap_free_after_gc_bytes"}
for _, m := range jvmMetrics {
m := m
t.Run("JVM/"+m, func(t *testing.T) {
assertMetricExists(t, m)
assertMetricValue(t, m, startTime, endTime)
})
}

// --- JVM aggregate metrics ---
t.Run("JVM/aggregate_jvm_count", func(t *testing.T) {
assertAggregateJVMCount(t, float64(mockJVMCount), startTime, endTime)
})
jvmAggMetrics := []string{"aggregate_heap_max_bytes", "aggregate_heap_free_after_gc_bytes", "aggregate_heap_after_gc_utilized"}
for _, m := range jvmAggMetrics {
m := m
t.Run("JVM/"+m, func(t *testing.T) {
assertMetricExists(t, m)
assertMetricValue(t, m, startTime, endTime)
})
}

// --- System metrics (cpu, mem, disk, ena) ---
systemMetrics := []string{
"cpu_time_iowait",
"mem_total", "mem_available", "mem_cached", "mem_active",
"aggregate_disk_used", "aggregate_disk_free",
"aggregate_bw_in_allowance_exceeded", "aggregate_bw_out_allowance_exceeded", "aggregate_pps_allowance_exceeded",
}
for _, m := range systemMetrics {
m := m
t.Run("System/"+m, func(t *testing.T) {
assertMetricExists(t, m)
assertMetricValue(t, m, startTime, endTime)
})
}
}
Loading