diff --git a/README.md b/README.md index 699c368..1f34552 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ Requirements: - Name Length 1-255 characters - Name must be ASCII characters only - Values must be in the range of 8.515920e-109 to 1.174271e+108. In addition, special values (for example, NaN, +Infinity, -Infinity) are not supported. -- Units must meet CW Metrics unit requirements, if not it will default to None. See [MetricDatum](https://docs.aws.amazon.com/AmazonCloudWatch/latest/APIReference/API_MetricDatum.html) for valid values. +- Metrics must meet CloudWatch Metrics requirements, otherwise a `InvalidMetricError` will be thrown. See [MetricDatum](https://docs.aws.amazon.com/AmazonCloudWatch/latest/APIReference/API_MetricDatum.html) for valid values. Examples: @@ -102,6 +102,7 @@ Requirements: - Length 1-255 characters - ASCII characters only +- Dimensions must meet CloudWatch Dimensions requirements, otherwise a `InvalidDimensionError` or `DimensionSetExceededError` will be thrown. See [Dimensions](https://docs.aws.amazon.com/AmazonCloudWatch/latest/APIReference/API_Dimension.html) for valid values. Examples: @@ -122,6 +123,7 @@ Requirements: - Length 1-255 characters - ASCII characters only +- Dimensions must meet CloudWatch Dimensions requirements, otherwise a `InvalidDimensionError` or `DimensionSetExceededError` will be thrown. See [Dimensions](https://docs.aws.amazon.com/AmazonCloudWatch/latest/APIReference/API_Dimension.html) for valid values. Examples: @@ -157,6 +159,7 @@ Requirements: - Name Length 1-255 characters - Name must be ASCII characters only +- Namespace must meet CloudWatch Namespace requirements, otherwise a `InvalidNamespaceError` will be thrown. See [Namespaces](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/cloudwatch_concepts.html#Namespace) for valid values. Examples: diff --git a/aws_embedded_metrics/constants.py b/aws_embedded_metrics/constants.py index 0b975b7..af2ac16 100644 --- a/aws_embedded_metrics/constants.py +++ b/aws_embedded_metrics/constants.py @@ -12,6 +12,11 @@ # limitations under the License. DEFAULT_NAMESPACE = "aws-embedded-metrics" -MAX_DIMENSION_SET_SIZE = 30 MAX_METRICS_PER_EVENT = 100 MAX_DATAPOINTS_PER_METRIC = 100 +MAX_DIMENSION_SET_SIZE = 30 +MAX_DIMENSION_NAME_LENGTH = 250 +MAX_DIMENSION_VALUE_LENGTH = 1024 +MAX_METRIC_NAME_LENGTH = 1024 +MAX_NAMESPACE_LENGTH = 256 +VALID_NAMESPACE_REGEX = '^[a-zA-Z0-9._#:/-]+$' diff --git a/aws_embedded_metrics/exceptions.py b/aws_embedded_metrics/exceptions.py index 2747924..2ca0f8d 100644 --- a/aws_embedded_metrics/exceptions.py +++ b/aws_embedded_metrics/exceptions.py @@ -15,3 +15,21 @@ class DimensionSetExceededError(Exception): def __init__(self, message: str) -> None: # Call the base class constructor with the parameters it needs super().__init__(message) + + +class InvalidDimensionError(Exception): + def __init__(self, message: str) -> None: + # Call the base class constructor with the parameters it needs + super().__init__(message) + + +class InvalidMetricError(Exception): + def __init__(self, message: str) -> None: + # Call the base class constructor with the parameters it needs + super().__init__(message) + + +class InvalidNamespaceError(Exception): + def __init__(self, message: str) -> None: + # Call the base class constructor with the parameters it needs + super().__init__(message) diff --git a/aws_embedded_metrics/logger/metrics_context.py b/aws_embedded_metrics/logger/metrics_context.py index 13a30c0..1b4db2b 100644 --- a/aws_embedded_metrics/logger/metrics_context.py +++ b/aws_embedded_metrics/logger/metrics_context.py @@ -14,9 +14,8 @@ from aws_embedded_metrics import constants, utils from aws_embedded_metrics.config import get_config -from aws_embedded_metrics.constants import MAX_DIMENSION_SET_SIZE -from aws_embedded_metrics.exceptions import DimensionSetExceededError from aws_embedded_metrics.logger.metric import Metric +from aws_embedded_metrics.validator import validate_dimension_set, validate_metric from typing import List, Dict, Any, Set @@ -50,6 +49,7 @@ def put_metric(self, key: str, value: float, unit: str = None) -> None: context.put_metric("Latency", 100, "Milliseconds") ``` """ + validate_metric(key, value, unit) metric = self.metrics.get(key) if metric: # TODO: we should log a warning if the unit has been changed @@ -57,15 +57,6 @@ def put_metric(self, key: str, value: float, unit: str = None) -> None: else: self.metrics[key] = Metric(value, unit) - @staticmethod - def validate_dimension_set(dimensions: Dict[str, str]) -> None: - """ - Validates dimension set length is not more than MAX_DIMENSION_SET_SIZE - """ - if len(dimensions) > MAX_DIMENSION_SET_SIZE: - raise DimensionSetExceededError( - f"Maximum number of dimensions per dimension set allowed are {MAX_DIMENSION_SET_SIZE}") - def put_dimensions(self, dimension_set: Dict[str, str]) -> None: """ Adds dimensions to the context. @@ -77,7 +68,7 @@ def put_dimensions(self, dimension_set: Dict[str, str]) -> None: # TODO add ability to define failure strategy return - self.validate_dimension_set(dimension_set) + validate_dimension_set(dimension_set) # Duplicate dimension sets are removed before being added to the end of the collection. # This ensures only latest dimension value is used as a target member on the root EMF node. @@ -99,7 +90,7 @@ def set_dimensions(self, dimension_sets: List[Dict[str, str]], use_default: bool self.should_use_default_dimensions = use_default for dimension_set in dimension_sets: - self.validate_dimension_set(dimension_set) + validate_dimension_set(dimension_set) self.dimensions = dimension_sets diff --git a/aws_embedded_metrics/logger/metrics_logger.py b/aws_embedded_metrics/logger/metrics_logger.py index e9caa04..e5f5708 100644 --- a/aws_embedded_metrics/logger/metrics_logger.py +++ b/aws_embedded_metrics/logger/metrics_logger.py @@ -13,6 +13,7 @@ from aws_embedded_metrics.environment import Environment from aws_embedded_metrics.logger.metrics_context import MetricsContext +from aws_embedded_metrics.validator import validate_namespace from aws_embedded_metrics.config import get_config from typing import Any, Awaitable, Callable, Dict, Tuple import sys @@ -74,6 +75,7 @@ def reset_dimensions(self, use_default: bool) -> "MetricsLogger": return self def set_namespace(self, namespace: str) -> "MetricsLogger": + validate_namespace(namespace) self.context.namespace = namespace return self diff --git a/aws_embedded_metrics/unit.py b/aws_embedded_metrics/unit.py index dfd01e9..58f25e2 100644 --- a/aws_embedded_metrics/unit.py +++ b/aws_embedded_metrics/unit.py @@ -1,7 +1,17 @@ -from enum import Enum +from enum import Enum, EnumMeta -class Unit(Enum): +class UnitMeta(EnumMeta): + def __contains__(self, item: object) -> bool: + try: + self(item) + except (ValueError, TypeError): + return False + else: + return True + + +class Unit(Enum, metaclass=UnitMeta): SECONDS = "Seconds" MICROSECONDS = "Microseconds" MILLISECONDS = "Milliseconds" @@ -28,3 +38,4 @@ class Unit(Enum): GIGABITS_PER_SECOND = "Gigabits/Second" TERABITS_PER_SECOND = "Terabits/Second" COUNT_PER_SECOND = "Count/Second" + NONE = "None" diff --git a/aws_embedded_metrics/validator.py b/aws_embedded_metrics/validator.py new file mode 100644 index 0000000..b9e8395 --- /dev/null +++ b/aws_embedded_metrics/validator.py @@ -0,0 +1,102 @@ +# Copyright 2019 Amazon.com, Inc. or its affiliates. +# Licensed under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import re +from typing import Dict, Optional +from aws_embedded_metrics.unit import Unit +from aws_embedded_metrics.exceptions import DimensionSetExceededError, InvalidDimensionError, InvalidMetricError, InvalidNamespaceError +import aws_embedded_metrics.constants as constants + + +def validate_dimension_set(dimension_set: Dict[str, str]) -> None: + """ + Validates a dimension set + + Parameters: + dimension_set (Dict[str, str]): The dimension set to validate + + Raises: + DimensionSetExceededError: If the dimension set is too large + InvalidDimensionError: If a dimension is invalid + """ + if len(dimension_set) > constants.MAX_DIMENSION_SET_SIZE: + raise DimensionSetExceededError( + f"Maximum number of dimensions per dimension set allowed are {constants.MAX_DIMENSION_SET_SIZE}") + + for name, value in dimension_set.items(): + if not name or len(name.strip()) == 0: + raise InvalidDimensionError("Dimension name must include at least one non-whitespace character") + + if not value or len(value.strip()) == 0: + raise InvalidDimensionError("Dimension value must include at least one non-whitespace character") + + if len(name) > constants.MAX_DIMENSION_NAME_LENGTH: + raise InvalidDimensionError(f"Dimension name cannot be longer than {constants.MAX_DIMENSION_NAME_LENGTH} characters") + + if len(value) > constants.MAX_DIMENSION_VALUE_LENGTH: + raise InvalidDimensionError(f"Dimension value cannot be longer than {constants.MAX_DIMENSION_VALUE_LENGTH} characters") + + if not name.isascii(): + raise InvalidDimensionError(f"Dimension name contains invalid characters: {name}") + + if not value.isascii(): + raise InvalidDimensionError(f"Dimension value contains invalid characters: {value}") + + if name.startswith(":"): + raise InvalidDimensionError("Dimension name cannot start with ':'") + + +def validate_metric(name: str, value: float, unit: Optional[str]) -> None: + """ + Validates a metric + + Parameters: + name (str): The name of the metric + value (float): The value of the metric + unit (Optional[str]): The unit of the metric + + Raises: + InvalidMetricError: If the metric is invalid + """ + if not name or len(name.strip()) == 0: + raise InvalidMetricError("Metric name must include at least one non-whitespace character") + + if len(name) > constants.MAX_DIMENSION_NAME_LENGTH: + raise InvalidMetricError(f"Metric name cannot be longer than {constants.MAX_DIMENSION_NAME_LENGTH} characters") + + if not math.isfinite(value): + raise InvalidMetricError("Metric value must be finite") + + if unit is not None and unit not in Unit: + raise InvalidMetricError(f"Metric unit is not valid: {unit}") + + +def validate_namespace(namespace: str) -> None: + """ + Validates a namespace + + Parameters: + namespace (str): The namespace to validate + + Raises: + InvalidNamespaceError: If the namespace is invalid + """ + if not namespace or len(namespace.strip()) == 0: + raise InvalidNamespaceError("Namespace must include at least one non-whitespace character") + + if len(namespace) > constants.MAX_NAMESPACE_LENGTH: + raise InvalidNamespaceError(f"Namespace cannot be longer than {constants.MAX_NAMESPACE_LENGTH} characters") + + if not re.match(constants.VALID_NAMESPACE_REGEX, namespace): + raise InvalidNamespaceError(f"Namespace contains invalid characters: {namespace}") diff --git a/tests/logger/test_metrics_context.py b/tests/logger/test_metrics_context.py index c77e234..f20ed43 100644 --- a/tests/logger/test_metrics_context.py +++ b/tests/logger/test_metrics_context.py @@ -1,10 +1,14 @@ +import pytest +import math +import random +from aws_embedded_metrics import constants +from aws_embedded_metrics.unit import Unit from aws_embedded_metrics import config from aws_embedded_metrics.logger.metrics_context import MetricsContext from aws_embedded_metrics.constants import DEFAULT_NAMESPACE -from aws_embedded_metrics.exceptions import DimensionSetExceededError +from aws_embedded_metrics.exceptions import DimensionSetExceededError, InvalidDimensionError, InvalidMetricError from importlib import reload from faker import Faker -import pytest fake = Faker() @@ -228,12 +232,37 @@ def test_get_dimensions_returns_merged_custom_and_default_dimensions(): assert [expected_dimensions] == actual_dimensions +@pytest.mark.parametrize( + "name, value", + [ + (None, "value"), + ("", "value"), + (" ", "value"), + ("a" * (constants.MAX_DIMENSION_NAME_LENGTH + 1), "value"), + ("ḓɨɱɛɳʂɨøɳ", "value"), + (":dim", "value"), + ("dim", ""), + ("dim", " "), + ("dim", "a" * (constants.MAX_DIMENSION_VALUE_LENGTH + 1)), + ("dim", "ṽɑɭʊɛ"), + ] +) +def test_add_invalid_dimensions_raises_exception(name, value): + context = MetricsContext() + + with pytest.raises(InvalidDimensionError): + context.put_dimensions({name: value}) + + with pytest.raises(InvalidDimensionError): + context.set_dimensions([{name: value}]) + + def test_put_metric_adds_metrics(): # arrange context = MetricsContext() metric_key = fake.word() metric_value = fake.random.random() - metric_unit = fake.word() + metric_unit = random.choice(list(Unit)).value # act context.put_metric(metric_key, metric_value, metric_unit) @@ -258,6 +287,28 @@ def test_put_metric_uses_none_unit_if_not_provided(): assert metric.unit == "None" +@pytest.mark.parametrize( + "name, value, unit", + [ + ("", 1, "None"), + (" ", 1, "Seconds"), + ("a" * (constants.MAX_METRIC_NAME_LENGTH + 1), 1, "None"), + ("metric", float("inf"), "Count"), + ("metric", float("-inf"), "Count"), + ("metric", float("nan"), "Count"), + ("metric", math.inf, "Seconds"), + ("metric", -math.inf, "Seconds"), + ("metric", math.nan, "Seconds"), + ("metric", 1, "Kilometers/Fahrenheit") + ] +) +def test_put_invalid_metric_raises_exception(name, value, unit): + context = MetricsContext() + + with pytest.raises(InvalidMetricError): + context.put_metric(name, value, unit) + + def test_create_copy_with_context_creates_new_instance(): # arrange context = MetricsContext() @@ -340,10 +391,10 @@ def test_create_copy_with_context_does_not_copy_metrics(): def test_set_dimensions_overwrites_all_dimensions(): # arrange context = MetricsContext() - context.set_default_dimensions({fake.word(): fake.word}) - context.put_dimensions({fake.word(): fake.word}) + context.set_default_dimensions({fake.word(): fake.word()}) + context.put_dimensions({fake.word(): fake.word()}) - expected_dimensions = [{fake.word(): fake.word}] + expected_dimensions = [{fake.word(): fake.word()}] # act context.set_dimensions(expected_dimensions) diff --git a/tests/logger/test_metrics_logger.py b/tests/logger/test_metrics_logger.py index 641234a..dbcd088 100644 --- a/tests/logger/test_metrics_logger.py +++ b/tests/logger/test_metrics_logger.py @@ -2,6 +2,8 @@ from aws_embedded_metrics.logger import metrics_logger from aws_embedded_metrics.sinks import Sink from aws_embedded_metrics.environment import Environment +from aws_embedded_metrics.exceptions import InvalidNamespaceError +import aws_embedded_metrics.constants as constants import pytest from faker import Faker from asyncio import Future @@ -353,6 +355,14 @@ async def test_can_set_namespace(mocker): assert context.namespace == expected_value +@pytest.mark.parametrize("namespace", [None, "", " ", "a" * (constants.MAX_NAMESPACE_LENGTH + 1), "ŋàɱȅƨƥȁƈȅ", "namespace "]) +def test_set_invalid_namespace_throws_exception(namespace, mocker): + logger, sink, env = get_logger_and_sink(mocker) + + with pytest.raises(InvalidNamespaceError): + logger.set_namespace(namespace) + + @pytest.mark.asyncio async def test_context_is_preserved_across_flushes(mocker): # arrange diff --git a/tests/serializer/test_log_serializer.py b/tests/serializer/test_log_serializer.py index b6098bc..3114b34 100644 --- a/tests/serializer/test_log_serializer.py +++ b/tests/serializer/test_log_serializer.py @@ -94,7 +94,7 @@ def test_serialize_metrics(): def test_serialize_more_than_100_metrics(): # arrange - expected_value = fake.word() + expected_value = fake.random.randrange(0, 100) expected_batches = 3 metrics = 295 @@ -209,7 +209,7 @@ def test_serialize_with_multiple_metrics(): for index in range(metrics): expected_key = f"Metric-{index}" - expected_value = fake.word() + expected_value = fake.random.randrange(0, 100) context.put_metric(expected_key, expected_value) expected_metric_definition = {"Name": expected_key, "Unit": "None"} @@ -229,7 +229,7 @@ def test_serialize_with_multiple_metrics(): def test_serialize_metrics_with_multiple_datapoints(): # arrange expected_key = fake.word() - expected_values = [fake.word(), fake.word()] + expected_values = [fake.random.randrange(0, 100), fake.random.randrange(0, 100)] expected_metric_definition = {"Name": expected_key, "Unit": "None"} expected = {**get_empty_payload()} expected[expected_key] = expected_values