diff --git a/aws_lambda_powertools/metrics/metrics.py b/aws_lambda_powertools/metrics/metrics.py index c8c16e8bd51..54f6a3ae807 100644 --- a/aws_lambda_powertools/metrics/metrics.py +++ b/aws_lambda_powertools/metrics/metrics.py @@ -123,6 +123,13 @@ def add_metric( def add_dimension(self, name: str, value: str) -> None: self.provider.add_dimension(name=name, value=value) + def add_dimensions(self, dimensions: dict[str, str]) -> None: + """Add a new set of dimensions creating an additional dimension array. + + Creates a new dimension set in the CloudWatch EMF Dimensions array. + """ + self.provider.add_dimensions(dimensions=dimensions) + def serialize_metric_set( self, metrics: dict | None = None, diff --git a/aws_lambda_powertools/metrics/provider/cloudwatch_emf/cloudwatch.py b/aws_lambda_powertools/metrics/provider/cloudwatch_emf/cloudwatch.py index f84e1b0ff42..70c2f6a419f 100644 --- a/aws_lambda_powertools/metrics/provider/cloudwatch_emf/cloudwatch.py +++ b/aws_lambda_powertools/metrics/provider/cloudwatch_emf/cloudwatch.py @@ -94,6 +94,7 @@ def __init__( self.metadata_set = metadata_set if metadata_set is not None else {} self.timestamp: int | None = None + self.dimension_sets: list[dict[str, str]] = [] # Store multiple dimension sets self._metric_units = [unit.value for unit in MetricUnit] self._metric_unit_valid_options = list(MetricUnit.__members__) @@ -256,21 +257,30 @@ def serialize_metric_set( metric_names_and_values.update({metric_name: metric_value}) + # Build Dimensions array: primary set + additional dimension sets + dimension_arrays: list[list[str]] = [list(dimensions.keys())] + all_dimensions: dict[str, str] = dict(dimensions) + + # Add each additional dimension set + for dim_set in self.dimension_sets: + all_dimensions.update(dim_set) + dimension_arrays.append(list(dim_set.keys())) + return { "_aws": { "Timestamp": self.timestamp or int(datetime.datetime.now().timestamp() * 1000), # epoch "CloudWatchMetrics": [ { "Namespace": self.namespace, # "test_namespace" - "Dimensions": [list(dimensions.keys())], # [ "service" ] + "Dimensions": dimension_arrays, # [["service"], ["env", "region"]] "Metrics": metric_definition, }, ], }, # NOTE: Mypy doesn't recognize splats '** syntax' in TypedDict - **dimensions, # "service": "test_service" - **metadata, # type: ignore[typeddict-item] # "username": "test" - **metric_names_and_values, # "single_metric": 1.0 + **all_dimensions, # type: ignore[typeddict-item] # All dimension key-value pairs + **metadata, # type: ignore[typeddict-item] + **metric_names_and_values, } def add_dimension(self, name: str, value: str) -> None: @@ -316,6 +326,55 @@ def add_dimension(self, name: str, value: str) -> None: self.dimension_set[name] = value + def add_dimensions(self, dimensions: dict[str, str]) -> None: + """Add a new set of dimensions creating an additional dimension array. + + Creates a new dimension set in the CloudWatch EMF Dimensions array. + """ + logger.debug(f"Adding dimension set: {dimensions}") + + if not dimensions: + warnings.warn( + "Empty dimensions dictionary provided", + category=PowertoolsUserWarning, + stacklevel=2, + ) + return + + # Convert values to strings and validate + sanitized: dict[str, str] = {} + for name, value in dimensions.items(): + str_value = value if isinstance(value, str) else str(value) + str_name = name if isinstance(name, str) else str(name) + + if not str_name.strip() or not str_value.strip(): + warnings.warn( + f"Dimension {str_name} has empty name or value", + category=PowertoolsUserWarning, + stacklevel=2, + ) + continue + + sanitized[str_name] = str_value + + if not sanitized: + return + + # Count unique dimensions across all sets + all_keys = set(self.dimension_set.keys()) + for ds in self.dimension_sets: + all_keys.update(ds.keys()) + all_keys.update(sanitized.keys()) + + if len(all_keys) > MAX_DIMENSIONS: + raise SchemaValidationError(f"Maximum dimensions ({MAX_DIMENSIONS}) exceeded") + + # Add default dimensions to this set + with_defaults = dict(self.default_dimensions) + with_defaults.update(sanitized) + + self.dimension_sets.append(with_defaults) + def add_metadata(self, key: str, value: Any) -> None: """Adds high cardinal metadata for metrics object @@ -377,6 +436,7 @@ def clear_metrics(self) -> None: logger.debug("Clearing out existing metric set from memory") self.metric_set.clear() self.dimension_set.clear() + self.dimension_sets.clear() self.metadata_set.clear() self.set_default_dimensions(**self.default_dimensions) diff --git a/tests/functional/metrics/test_dimension_sets.py b/tests/functional/metrics/test_dimension_sets.py new file mode 100644 index 00000000000..693648eab7b --- /dev/null +++ b/tests/functional/metrics/test_dimension_sets.py @@ -0,0 +1,265 @@ +""" +Tests for multiple dimension sets feature +""" + +from __future__ import annotations + +import json + +import pytest + +from aws_lambda_powertools.metrics import Metrics, MetricUnit, SchemaValidationError +from aws_lambda_powertools.metrics.provider.cloudwatch_emf.cloudwatch import AmazonCloudWatchEMFProvider + + +def test_add_dimensions_creates_multiple_dimension_sets(capsys): + # GIVEN a metrics instance + metrics = AmazonCloudWatchEMFProvider(namespace="TestApp") + + # WHEN we add multiple dimension sets + metrics.add_dimension(name="service", value="booking") + metrics.add_dimensions({"environment": "prod", "region": "us-east-1"}) + metrics.add_dimensions({"environment": "prod"}) + metrics.add_dimensions({"region": "us-east-1"}) + metrics.add_metric(name="SuccessfulRequests", unit=MetricUnit.Count, value=10) + + # THEN the serialized output should contain multiple dimension arrays + output = metrics.serialize_metric_set() + + assert len(output["_aws"]["CloudWatchMetrics"]) == 1 + dimensions = output["_aws"]["CloudWatchMetrics"][0]["Dimensions"] + + # Should have 4 dimension sets: primary + 3 added + assert len(dimensions) == 4 + assert dimensions[0] == ["service"] # Primary dimension set + assert set(dimensions[1]) == {"environment", "region"} + assert dimensions[2] == ["environment"] + assert dimensions[3] == ["region"] + + # All dimension values should be in the root + assert output["service"] == "booking" + assert output["environment"] == "prod" + assert output["region"] == "us-east-1" + assert output["SuccessfulRequests"] == [10.0] + + +def test_add_dimensions_with_metrics_wrapper(capsys): + # GIVEN a Metrics instance (not provider directly) + metrics = Metrics(namespace="TestApp", service="payment") + + # WHEN we use add_dimensions through the Metrics wrapper + @metrics.log_metrics + def handler(event, context): + metrics.add_dimensions({"environment": "staging", "region": "us-west-2"}) + metrics.add_dimensions({"environment": "staging"}) + metrics.add_metric(name="PaymentProcessed", unit=MetricUnit.Count, value=1) + + handler({}, {}) + + # THEN the output should contain multiple dimension sets + output = json.loads(capsys.readouterr().out.strip()) + + dimensions = output["_aws"]["CloudWatchMetrics"][0]["Dimensions"] + assert len(dimensions) == 3 # Primary (service) + 2 added + + # Primary dimension from service parameter + assert "service" in dimensions[0] + + # Check added dimension sets - they don't include service unless it's a default dimension + assert set(dimensions[1]) == {"environment", "region"} + assert set(dimensions[2]) == {"environment"} + + +def test_add_dimensions_with_default_dimensions(): + # GIVEN metrics with default dimensions + metrics = AmazonCloudWatchEMFProvider(namespace="TestApp") + metrics.set_default_dimensions(tenant_id="123", application="api") + + # WHEN we add dimension sets after setting defaults + metrics.add_dimensions({"environment": "prod"}) + metrics.add_dimensions({"region": "eu-west-1"}) + metrics.add_metric(name="ApiCalls", unit=MetricUnit.Count, value=5) + + # THEN default dimensions should be included in all dimension sets + output = metrics.serialize_metric_set() + dimensions = output["_aws"]["CloudWatchMetrics"][0]["Dimensions"] + + # Each dimension set should include default dimensions + assert set(dimensions[1]) == {"tenant_id", "application", "environment"} + assert set(dimensions[2]) == {"tenant_id", "application", "region"} + + # Values should be in root + assert output["tenant_id"] == "123" + assert output["application"] == "api" + assert output["environment"] == "prod" + assert output["region"] == "eu-west-1" + + +def test_add_dimensions_duplicate_keys_last_value_wins(): + # GIVEN metrics with overlapping dimension keys + metrics = AmazonCloudWatchEMFProvider(namespace="TestApp") + + # WHEN we add dimension sets with duplicate keys + metrics.add_dimensions({"environment": "dev", "region": "us-east-1"}) + metrics.add_dimensions({"environment": "staging", "region": "us-west-2"}) + metrics.add_dimensions({"environment": "prod"}) # Last value for environment + metrics.add_metric(name="TestMetric", unit=MetricUnit.Count, value=1) + + # THEN the last value should be used in the root + output = metrics.serialize_metric_set() + + # Last values should win + assert output["environment"] == "prod" + assert output["region"] == "us-west-2" + + +def test_add_dimensions_empty_dict_warns(): + # GIVEN metrics instance + metrics = AmazonCloudWatchEMFProvider(namespace="TestApp") + + # WHEN we add an empty dimensions dict + with pytest.warns(UserWarning, match="Empty dimensions dictionary"): + metrics.add_dimensions({}) + + # THEN no dimension set should be added + assert len(metrics.dimension_sets) == 0 + + +def test_add_dimensions_invalid_dimensions_skipped(): + # GIVEN metrics instance + metrics = AmazonCloudWatchEMFProvider(namespace="TestApp") + + # WHEN we add dimensions with empty names or values + with pytest.warns(UserWarning, match="empty name or value"): + metrics.add_dimensions({"": "value", "key": ""}) + + # THEN no dimension set should be added + assert len(metrics.dimension_sets) == 0 + + +def test_add_dimensions_exceeds_max_dimensions(): + # GIVEN metrics with many dimensions + metrics = AmazonCloudWatchEMFProvider(namespace="TestApp") + + # Add 29 dimensions to primary set (max is 30) + for i in range(29): + metrics.add_dimension(name=f"dim{i}", value=f"val{i}") + + # WHEN we try to add dimension set that would exceed max + # THEN it should raise SchemaValidationError + with pytest.raises(SchemaValidationError, match="Maximum dimensions"): + metrics.add_dimensions({"extra1": "val1", "extra2": "val2"}) + + +def test_add_dimensions_converts_values_to_strings(): + # GIVEN metrics instance + metrics = AmazonCloudWatchEMFProvider(namespace="TestApp") + + # WHEN we add dimensions with non-string values + metrics.add_dimensions({"count": 123, "is_active": True, "ratio": 3.14}) + metrics.add_metric(name="TestMetric", unit=MetricUnit.Count, value=1) + + # THEN values should be converted to strings + output = metrics.serialize_metric_set() + assert output["count"] == "123" + assert output["is_active"] == "True" + assert output["ratio"] == "3.14" + + +def test_clear_metrics_clears_dimension_sets(capsys): + # GIVEN metrics with dimension sets + metrics = Metrics(namespace="TestApp", service="api") + + @metrics.log_metrics + def handler(event, context): + metrics.add_dimensions({"environment": "prod"}) + metrics.add_dimensions({"region": "us-east-1"}) + metrics.add_metric(name="Requests", unit=MetricUnit.Count, value=1) + + handler({}, {}) + + # WHEN we call clear_metrics (done automatically by decorator) + # THEN dimension_sets should be cleared + assert len(metrics.provider.dimension_sets) == 0 + + +def test_add_dimensions_order_preserved(): + # GIVEN metrics instance + metrics = AmazonCloudWatchEMFProvider(namespace="TestApp") + + # WHEN we add dimension sets in specific order + metrics.add_dimension(name="service", value="api") + metrics.add_dimensions({"environment": "prod", "region": "us-east-1"}) + metrics.add_dimensions({"environment": "prod"}) + metrics.add_dimensions({"region": "us-east-1"}) + metrics.add_metric(name="TestMetric", unit=MetricUnit.Count, value=1) + + # THEN dimension sets should appear in order added + output = metrics.serialize_metric_set() + dimensions = output["_aws"]["CloudWatchMetrics"][0]["Dimensions"] + + assert dimensions[0] == ["service"] + assert set(dimensions[1]) == {"environment", "region"} + assert dimensions[2] == ["environment"] + assert dimensions[3] == ["region"] + + +def test_add_dimensions_with_metadata(): + # GIVEN metrics with metadata + metrics = AmazonCloudWatchEMFProvider(namespace="TestApp") + + # WHEN we add dimension sets and metadata + metrics.add_dimensions({"environment": "prod"}) + metrics.add_metadata(key="request_id", value="abc-123") + metrics.add_metric(name="ApiLatency", unit=MetricUnit.Milliseconds, value=150) + + # THEN both should be in output + output = metrics.serialize_metric_set() + + assert "environment" in output + assert output["request_id"] == "abc-123" + # Primary dimension_set + 1 additional dimension set + assert len(output["_aws"]["CloudWatchMetrics"][0]["Dimensions"]) == 2 + + +def test_multiple_metrics_with_dimension_sets(): + # GIVEN metrics with multiple metrics and dimension sets + metrics = AmazonCloudWatchEMFProvider(namespace="TestApp") + + # WHEN we add multiple metrics with dimension sets + metrics.add_dimensions({"environment": "prod", "region": "us-east-1"}) + metrics.add_dimensions({"environment": "prod"}) + metrics.add_metric(name="SuccessCount", unit=MetricUnit.Count, value=100) + metrics.add_metric(name="ErrorCount", unit=MetricUnit.Count, value=5) + metrics.add_metric(name="Latency", unit=MetricUnit.Milliseconds, value=250) + + # THEN all metrics should share the same dimension sets + output = metrics.serialize_metric_set() + + assert len(output["_aws"]["CloudWatchMetrics"][0]["Metrics"]) == 3 + # Primary (empty) + 2 added dimension sets + assert len(output["_aws"]["CloudWatchMetrics"][0]["Dimensions"]) == 3 + assert output["SuccessCount"] == [100.0] + assert output["ErrorCount"] == [5.0] + assert output["Latency"] == [250.0] + + +def test_add_dimensions_with_high_resolution_metrics(): + # GIVEN metrics with high resolution + metrics = AmazonCloudWatchEMFProvider(namespace="TestApp") + + # WHEN we add dimension sets with high-resolution metrics + metrics.add_dimensions({"function": "process_order"}) + metrics.add_metric( + name="ProcessingTime", + unit=MetricUnit.Milliseconds, + value=45, + resolution=1, # High resolution + ) + + # THEN dimension sets should work with high-resolution metrics + output = metrics.serialize_metric_set() + + # Primary (empty) + 1 added dimension set + assert len(output["_aws"]["CloudWatchMetrics"][0]["Dimensions"]) == 2 + assert output["_aws"]["CloudWatchMetrics"][0]["Metrics"][0]["StorageResolution"] == 1