From 082d1d0fe54b5604cd7a789e0776c469e5fbf697 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Wed, 14 Apr 2021 18:07:17 +0200 Subject: [PATCH 01/20] chore: simplify handler conditional --- aws_lambda_powertools/logging/logger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws_lambda_powertools/logging/logger.py b/aws_lambda_powertools/logging/logger.py index 2e9cbb78d2e..55f4c57fad4 100644 --- a/aws_lambda_powertools/logging/logger.py +++ b/aws_lambda_powertools/logging/logger.py @@ -136,7 +136,7 @@ def __init__( ) self.log_level = self._get_log_level(level) self.child = child - self._handler = logging.StreamHandler(stream) if stream is not None else logging.StreamHandler(sys.stdout) + self._handler = logging.StreamHandler(stream) or logging.StreamHandler(sys.stdout) self._default_log_keys = {"service": self.service, "sampling_rate": self.sampling_rate} self._logger = self._get_logger() self._init_logger(**kwargs) From 4268e1c21c19ed531ea4f41cc4144b827532e347 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Wed, 14 Apr 2021 18:31:55 +0200 Subject: [PATCH 02/20] refactor(formatter): reduce dict updates --- aws_lambda_powertools/logging/formatter.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/aws_lambda_powertools/logging/formatter.py b/aws_lambda_powertools/logging/formatter.py index 0140d057f0d..c975af9d77a 100644 --- a/aws_lambda_powertools/logging/formatter.py +++ b/aws_lambda_powertools/logging/formatter.py @@ -181,10 +181,14 @@ def _extract_log_keys(self, log_record: logging.LogRecord) -> Dict: def format(self, record): # noqa: A003 formatted_log = self._extract_log_keys(log_record=record) - formatted_log["message"] = self._extract_log_message(log_record=record) - formatted_log["exception_name"] = self._extract_log_exception_name(log_record=record) - formatted_log["exception"] = self._extract_log_exception(log_record=record) - formatted_log.update({"xray_trace_id": self._get_latest_trace_id()}) # fetch latest Trace ID, if any + formatted_log.update( + { + "message": self._extract_log_message(log_record=record), + "exception_name": self._extract_log_exception_name(log_record=record), + "exception": self._extract_log_exception(log_record=record), + "xray_trace_id": self._get_latest_trace_id(), + } + ) # Filter out top level key with values that are None formatted_log = {k: v for k, v in formatted_log.items() if v is not None} From cbce580a9d71bd0b6679e8565c175623bf8fc0de Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Thu, 15 Apr 2021 19:36:02 +0200 Subject: [PATCH 03/20] refactor(formatter): prep work for decoupling --- aws_lambda_powertools/logging/formatter.py | 75 +++++++++++++-------- aws_lambda_powertools/logging/logger.py | 4 +- tests/functional/test_aws_lambda_logging.py | 2 + 3 files changed, 50 insertions(+), 31 deletions(-) diff --git a/aws_lambda_powertools/logging/formatter.py b/aws_lambda_powertools/logging/formatter.py index c975af9d77a..e3f5b6734b2 100644 --- a/aws_lambda_powertools/logging/formatter.py +++ b/aws_lambda_powertools/logging/formatter.py @@ -1,7 +1,8 @@ import json import logging import os -from typing import Dict, Iterable, Optional, Union +from functools import partial +from typing import Any, Callable, Dict, Iterable, List, Optional, Union from ..shared import constants @@ -30,47 +31,64 @@ ) -class JsonFormatter(logging.Formatter): - """AWS Lambda Logging formatter. - - Formats the log message as a JSON encoded string. If the message is a - dict it will be used directly. If the message can be parsed as JSON, then - the parse d value is used in the output record. - - Originally taken from https://gitlab.com/hadrien/aws_lambda_logging/ +class LambdaPowertoolsFormatter(logging.Formatter): + """AWS Lambda Powertools Logging formatter. + Formats the log message as a JSON encoded string. If the message is a + dict it will be used directly. """ - def __init__(self, **kwargs): - """Return a JsonFormatter instance. - - The `json_default` kwarg is used to specify a formatter for otherwise - unserializable values. It must not throw. Defaults to a function that - coerces the value to a string. + def __init__( + self, + json_encoder: Optional[Callable[[Any], Any]] = None, + json_decoder: Optional[Callable[[Any], Any]] = None, + datefmt: str = None, + log_record_order: List = None, + **kwargs + ): + """Return a LambdaPowertoolsFormatter instance. The `log_record_order` kwarg is used to specify the order of the keys used in the structured json logs. By default the order is: "level", "location", "message", "timestamp", "service" and "sampling_rate". Other kwargs are used to specify log field format strings. + + Parameters + ---------- + json_encoder : Callable, optional + A function to serialize `obj` to a JSON formatted `str`, by default json.dumps + json_decoder : Callable, optional + A function to deserialize `str`, `bytes`, bytearray` containing a JSON document to a Python `obj`, + by default json.loads + datefmt : str, optional + String directives (strftime) to format log timestamp + + See https://docs.python.org/3/library/time.html#time.strftime + kwargs + Key-value to be included in log messages + + Examples + -------- + Create examples """ - # Set the default unserializable function, by default values will be cast as str. - self.default_json_formatter = kwargs.pop("json_default", str) - # Set the insertion order for the log messages - self.log_format = dict.fromkeys(kwargs.pop("log_record_order", ["level", "location", "message", "timestamp"])) + self.json_decoder = json_decoder or json.loads + self.json_encoder = json_encoder or partial(json.dumps, default=str, separators=(",", ":")) + self.datefmt = datefmt + self.log_record_order = log_record_order or ["level", "location", "message", "timestamp"] + self.log_format = dict.fromkeys(self.log_record_order) # Set the insertion order for the log messages self.reserved_keys = ["timestamp", "level", "location"] - # Set the date format used by `asctime` - super(JsonFormatter, self).__init__(datefmt=kwargs.pop("datefmt", None)) - self.log_format.update(self._build_root_keys(**kwargs)) + super(LambdaPowertoolsFormatter, self).__init__(datefmt=self.datefmt) + + self.log_format.update(**self._build_default_keys(), **kwargs) @staticmethod - def _build_root_keys(**kwargs): + def _build_default_keys(): return { "level": "%(levelname)s", "location": "%(funcName)s:%(lineno)d", "timestamp": "%(asctime)s", - **kwargs, } @staticmethod @@ -81,8 +99,7 @@ def _get_latest_trace_id(): def update_formatter(self, **kwargs): self.log_format.update(kwargs) - @staticmethod - def _extract_log_message(log_record: logging.LogRecord) -> Union[Dict, str, bool, Iterable]: + def _extract_log_message(self, log_record: logging.LogRecord) -> Union[Dict, str, bool, Iterable]: """Extract message from log record and attempt to JSON decode it Parameters @@ -100,9 +117,8 @@ def _extract_log_message(log_record: logging.LogRecord) -> Union[Dict, str, bool message: str = log_record.getMessage() - # Attempt to decode non-str messages e.g. msg = '{"x": "y"}' try: - message = json.loads(log_record.msg) + message = self.json_decoder(log_record.msg) except (json.decoder.JSONDecodeError, TypeError, ValueError): pass @@ -162,6 +178,7 @@ def _extract_log_keys(self, log_record: logging.LogRecord) -> Dict: formatted_log = {} + # Refactor: Remove reserved keys and lookup string interpolation instead to allow other keys # We have to iterate over a default or existing log structure # then replace any logging expression for reserved keys e.g. '%(level)s' to 'INFO' # and lastly add or replace incoming keys (those added within the constructor or .structure_logs method) @@ -193,4 +210,4 @@ def format(self, record): # noqa: A003 # Filter out top level key with values that are None formatted_log = {k: v for k, v in formatted_log.items() if v is not None} - return json.dumps(formatted_log, default=self.default_json_formatter) + return self.json_encoder(formatted_log) diff --git a/aws_lambda_powertools/logging/logger.py b/aws_lambda_powertools/logging/logger.py index 55f4c57fad4..fba720ec980 100644 --- a/aws_lambda_powertools/logging/logger.py +++ b/aws_lambda_powertools/logging/logger.py @@ -12,7 +12,7 @@ from ..shared.functions import resolve_env_var_choice, resolve_truthy_env_var_choice from .exceptions import InvalidLoggerSamplingRateError from .filters import SuppressFilter -from .formatter import JsonFormatter +from .formatter import LambdaPowertoolsFormatter from .lambda_context import build_lambda_context_model logger = logging.getLogger(__name__) @@ -305,7 +305,7 @@ def structure_logs(self, append: bool = False, **kwargs): handler.formatter.update_formatter(**kwargs) else: # Set a new formatter for a logger handler - handler.setFormatter(JsonFormatter(**self._default_log_keys, **kwargs)) + handler.setFormatter(LambdaPowertoolsFormatter(**self._default_log_keys, **kwargs)) def set_correlation_id(self, value: str): """Sets the correlation_id in the logging json diff --git a/tests/functional/test_aws_lambda_logging.py b/tests/functional/test_aws_lambda_logging.py index b28a753fc98..164691dfd75 100644 --- a/tests/functional/test_aws_lambda_logging.py +++ b/tests/functional/test_aws_lambda_logging.py @@ -113,6 +113,8 @@ def test_with_unserializable_value_in_message_custom(stdout, service_name): class Unserializable: pass + # Refactor: Needs to decouple Formatter to allow a custom encoder w/ a default coercer + # GIVEN a custom json_default logger = Logger( service=service_name, From 5c8cf72bca52d6a2df918a318cf050d78971c9bb Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Thu, 15 Apr 2021 21:01:42 +0200 Subject: [PATCH 04/20] refactor: allow sampling_rate to be suppressed --- aws_lambda_powertools/logging/formatter.py | 3 ++- aws_lambda_powertools/logging/logger.py | 2 +- tests/functional/test_aws_lambda_logging.py | 6 +++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/aws_lambda_powertools/logging/formatter.py b/aws_lambda_powertools/logging/formatter.py index e3f5b6734b2..f44a50b2971 100644 --- a/aws_lambda_powertools/logging/formatter.py +++ b/aws_lambda_powertools/logging/formatter.py @@ -81,7 +81,8 @@ def __init__( super(LambdaPowertoolsFormatter, self).__init__(datefmt=self.datefmt) - self.log_format.update(**self._build_default_keys(), **kwargs) + keys_combined = {**self._build_default_keys(), **kwargs} + self.log_format.update(**keys_combined) @staticmethod def _build_default_keys(): diff --git a/aws_lambda_powertools/logging/logger.py b/aws_lambda_powertools/logging/logger.py index fba720ec980..0a041a73d71 100644 --- a/aws_lambda_powertools/logging/logger.py +++ b/aws_lambda_powertools/logging/logger.py @@ -129,7 +129,7 @@ def __init__( choice=service, env=os.getenv(constants.SERVICE_NAME_ENV, "service_undefined") ) self.sampling_rate = resolve_env_var_choice( - choice=sampling_rate, env=os.getenv(constants.LOGGER_LOG_SAMPLING_RATE, 0.0) + choice=sampling_rate, env=os.getenv(constants.LOGGER_LOG_SAMPLING_RATE) ) self._is_deduplication_disabled = resolve_truthy_env_var_choice( env=os.getenv(constants.LOGGER_LOG_DEDUPLICATION_ENV, "false") diff --git a/tests/functional/test_aws_lambda_logging.py b/tests/functional/test_aws_lambda_logging.py index 164691dfd75..086fe8148bb 100644 --- a/tests/functional/test_aws_lambda_logging.py +++ b/tests/functional/test_aws_lambda_logging.py @@ -175,7 +175,7 @@ def test_log_custom_formatting(stdout, service_name): def test_log_dict_key_strip_nones(stdout, service_name): # GIVEN a logger confirmation where we set `location` and `timestamp` to None - # Note: level, sampling_rate and service can not be suppressed + # Note: level and service cannot be suppressed logger = Logger(stream=stdout, level=None, location=None, timestamp=None, sampling_rate=None, service=None) # WHEN logging a message @@ -183,8 +183,8 @@ def test_log_dict_key_strip_nones(stdout, service_name): log_dict: dict = json.loads(stdout.getvalue()) - # THEN the keys should only include `level`, `message`, `service`, `sampling_rate` - assert sorted(log_dict.keys()) == ["level", "message", "sampling_rate", "service"] + # THEN the keys should only include `level`, `message`, `service` + assert sorted(log_dict.keys()) == ["level", "message", "service"] assert log_dict["service"] == "service_undefined" From 8bf0ab449accdf51cbddc777e11b6ea503c2c31c Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Thu, 15 Apr 2021 21:16:58 +0200 Subject: [PATCH 05/20] refactor(formatter): merge exception formatting methods --- aws_lambda_powertools/logging/formatter.py | 41 +++++----------------- 1 file changed, 9 insertions(+), 32 deletions(-) diff --git a/aws_lambda_powertools/logging/formatter.py b/aws_lambda_powertools/logging/formatter.py index f44a50b2971..e8ad70212d8 100644 --- a/aws_lambda_powertools/logging/formatter.py +++ b/aws_lambda_powertools/logging/formatter.py @@ -2,7 +2,7 @@ import logging import os from functools import partial -from typing import Any, Callable, Dict, Iterable, List, Optional, Union +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union from ..shared import constants @@ -125,7 +125,7 @@ def _extract_log_message(self, log_record: logging.LogRecord) -> Union[Dict, str return message - def _extract_log_exception(self, log_record: logging.LogRecord) -> Optional[str]: + def _extract_log_exception(self, log_record: logging.LogRecord) -> Union[Tuple[str, str], Tuple[None, None]]: """Format traceback information, if available Parameters @@ -135,31 +135,13 @@ def _extract_log_exception(self, log_record: logging.LogRecord) -> Optional[str] Returns ------- - log_record: Optional[str] - Log record with constant traceback info + log_record: Optional[Tuple[str, str]] + Log record with constant traceback info and exception name """ if log_record.exc_info: - return self.formatException(log_record.exc_info) + return self.formatException(log_record.exc_info), log_record.exc_info[0].__name__ - return None - - def _extract_log_exception_name(self, log_record: logging.LogRecord) -> Optional[str]: - """Extract the exception name, if available - - Parameters - ---------- - log_record : logging.LogRecord - Log record to extract exception name from - - Returns - ------- - log_record: Optional[str] - Log record with exception name - """ - if log_record.exc_info: - return log_record.exc_info[0].__name__ - - return None + return None, None def _extract_log_keys(self, log_record: logging.LogRecord) -> Dict: """Extract and parse custom and reserved log keys @@ -199,14 +181,9 @@ def _extract_log_keys(self, log_record: logging.LogRecord) -> Dict: def format(self, record): # noqa: A003 formatted_log = self._extract_log_keys(log_record=record) - formatted_log.update( - { - "message": self._extract_log_message(log_record=record), - "exception_name": self._extract_log_exception_name(log_record=record), - "exception": self._extract_log_exception(log_record=record), - "xray_trace_id": self._get_latest_trace_id(), - } - ) + formatted_log["message"] = self._extract_log_message(log_record=record) + formatted_log["exception"], formatted_log["exception_name"] = self._extract_log_exception(log_record=record) + formatted_log["xray_trace_id"] = self._get_latest_trace_id() # Filter out top level key with values that are None formatted_log = {k: v for k, v in formatted_log.items() if v is not None} From b91ee9659acec7ed90e0d6a5c4026afcda51917c Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Fri, 16 Apr 2021 10:36:20 +0200 Subject: [PATCH 06/20] refactor(formatter): support std log attr, explicit json default fn --- aws_lambda_powertools/logging/formatter.py | 20 ++++++++++++++------ tests/functional/test_aws_lambda_logging.py | 14 ++++++++++++++ 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/aws_lambda_powertools/logging/formatter.py b/aws_lambda_powertools/logging/formatter.py index e8ad70212d8..6abaab89fda 100644 --- a/aws_lambda_powertools/logging/formatter.py +++ b/aws_lambda_powertools/logging/formatter.py @@ -42,6 +42,7 @@ def __init__( self, json_encoder: Optional[Callable[[Any], Any]] = None, json_decoder: Optional[Callable[[Any], Any]] = None, + json_default: Optional[Callable[[Any], Any]] = None, datefmt: str = None, log_record_order: List = None, **kwargs @@ -61,6 +62,11 @@ def __init__( json_decoder : Callable, optional A function to deserialize `str`, `bytes`, bytearray` containing a JSON document to a Python `obj`, by default json.loads + json_default : Callable, optional + A function to coercer unserializable values, by default str + + Only used when no custom JSON encoder is set + datefmt : str, optional String directives (strftime) to format log timestamp @@ -71,13 +77,16 @@ def __init__( Examples -------- Create examples + + Add example of standard log attributes that use str interpolation e.g. %(process)d + Add example of JSON default fn for unserializable values """ self.json_decoder = json_decoder or json.loads - self.json_encoder = json_encoder or partial(json.dumps, default=str, separators=(",", ":")) + self.json_default = json_default or str + self.json_encoder = json_encoder or partial(json.dumps, default=self.json_default, separators=(",", ":")) self.datefmt = datefmt self.log_record_order = log_record_order or ["level", "location", "message", "timestamp"] self.log_format = dict.fromkeys(self.log_record_order) # Set the insertion order for the log messages - self.reserved_keys = ["timestamp", "level", "location"] super(LambdaPowertoolsFormatter, self).__init__(datefmt=self.datefmt) @@ -161,12 +170,11 @@ def _extract_log_keys(self, log_record: logging.LogRecord) -> Dict: formatted_log = {} - # Refactor: Remove reserved keys and lookup string interpolation instead to allow other keys # We have to iterate over a default or existing log structure - # then replace any logging expression for reserved keys e.g. '%(level)s' to 'INFO' - # and lastly add or replace incoming keys (those added within the constructor or .structure_logs method) + # then replace any std log attribute e.g. '%(level)s' to 'INFO', '%(process)d to '4773' + # lastly add or replace incoming keys (those added within the constructor or .structure_logs method) for key, value in self.log_format.items(): - if value and key in self.reserved_keys: + if isinstance(value, str) and value.startswith("%("): formatted_log[key] = value % record_dict else: formatted_log[key] = value diff --git a/tests/functional/test_aws_lambda_logging.py b/tests/functional/test_aws_lambda_logging.py index 086fe8148bb..2c4a416b47a 100644 --- a/tests/functional/test_aws_lambda_logging.py +++ b/tests/functional/test_aws_lambda_logging.py @@ -243,3 +243,17 @@ def test_log_dict_xray_is_updated_when_tracing_id_changes(stdout, monkeypatch, s assert log_dict_2["xray_trace_id"] == trace_id_2 monkeypatch.delenv(name="_X_AMZN_TRACE_ID") + + +def test_log_custom_std_log_attribute(stdout, service_name): + # GIVEN a logger where we have a standard log attr process + # https://docs.python.org/3/library/logging.html#logrecord-attributes + logger = Logger(service=service_name, stream=stdout, process="%(process)d") + + # WHEN logging a message + logger.info("foo") + + log_dict: dict = json.loads(stdout.getvalue()) + + # THEN process key should be evaluated + assert "%" not in log_dict["process"] From 87f5605167d851adcae354941aea0b2d2626a88a Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Fri, 16 Apr 2021 10:40:43 +0200 Subject: [PATCH 07/20] chore: rename test to new powertools formatter --- ..._aws_lambda_logging.py => test_logger_powertools_formatter.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/functional/{test_aws_lambda_logging.py => test_logger_powertools_formatter.py} (100%) diff --git a/tests/functional/test_aws_lambda_logging.py b/tests/functional/test_logger_powertools_formatter.py similarity index 100% rename from tests/functional/test_aws_lambda_logging.py rename to tests/functional/test_logger_powertools_formatter.py From 209220faaafa09bd76d44ce2c1623f6e8024d37f Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Fri, 16 Apr 2021 11:24:44 +0200 Subject: [PATCH 08/20] feat(formatter): add UTC support --- aws_lambda_powertools/logging/formatter.py | 25 +++++++++++++++++++ .../test_logger_powertools_formatter.py | 9 +++++++ 2 files changed, 34 insertions(+) diff --git a/aws_lambda_powertools/logging/formatter.py b/aws_lambda_powertools/logging/formatter.py index 6abaab89fda..0268d652d1c 100644 --- a/aws_lambda_powertools/logging/formatter.py +++ b/aws_lambda_powertools/logging/formatter.py @@ -1,6 +1,7 @@ import json import logging import os +import time from functools import partial from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union @@ -38,6 +39,9 @@ class LambdaPowertoolsFormatter(logging.Formatter): dict it will be used directly. """ + default_time_format = "%Y-%m-%d %H:%M:%S.%F%z" # '2021-04-17 18:19:57.656+0200' + custom_ms_time_directive = "%F" + def __init__( self, json_encoder: Optional[Callable[[Any], Any]] = None, @@ -45,6 +49,7 @@ def __init__( json_default: Optional[Callable[[Any], Any]] = None, datefmt: str = None, log_record_order: List = None, + utc: bool = False, **kwargs ): """Return a LambdaPowertoolsFormatter instance. @@ -71,6 +76,8 @@ def __init__( String directives (strftime) to format log timestamp See https://docs.python.org/3/library/time.html#time.strftime + utc : bool, optional + Set logging timestamp to UTC, by default False to continue to use local time as per stdlib kwargs Key-value to be included in log messages @@ -80,14 +87,19 @@ def __init__( Add example of standard log attributes that use str interpolation e.g. %(process)d Add example of JSON default fn for unserializable values + Add example for UTC """ self.json_decoder = json_decoder or json.loads self.json_default = json_default or str self.json_encoder = json_encoder or partial(json.dumps, default=self.json_default, separators=(",", ":")) self.datefmt = datefmt + self.utc = utc self.log_record_order = log_record_order or ["level", "location", "message", "timestamp"] self.log_format = dict.fromkeys(self.log_record_order) # Set the insertion order for the log messages + if self.utc: + self.converter = time.gmtime + super(LambdaPowertoolsFormatter, self).__init__(datefmt=self.datefmt) keys_combined = {**self._build_default_keys(), **kwargs} @@ -197,3 +209,16 @@ def format(self, record): # noqa: A003 formatted_log = {k: v for k, v in formatted_log.items() if v is not None} return self.json_encoder(formatted_log) + + def formatTime(self, record: logging.LogRecord, datefmt: Optional[str] = None) -> str: + # NOTE: Pyhton time.strftime doesn't provide msec directives + # so we create a custom one (%F) and replace logging record ts + # Reason 2 is that std logging doesn't support msec after TZ + record_ts = self.converter(record.created) + if datefmt: + ts_formatted = time.strftime(datefmt, record_ts) + else: + msec = "%03d" % record.msecs + custom_fmt = self.default_time_format.replace(self.custom_ms_time_directive, msec) + ts_formatted = time.strftime(custom_fmt, record_ts) + return ts_formatted diff --git a/tests/functional/test_logger_powertools_formatter.py b/tests/functional/test_logger_powertools_formatter.py index 2c4a416b47a..638a2dacf42 100644 --- a/tests/functional/test_logger_powertools_formatter.py +++ b/tests/functional/test_logger_powertools_formatter.py @@ -3,6 +3,7 @@ import json import random import string +import time import pytest @@ -257,3 +258,11 @@ def test_log_custom_std_log_attribute(stdout, service_name): # THEN process key should be evaluated assert "%" not in log_dict["process"] + + +def test_log_in_utc(stdout, service_name): + # GIVEN a logger where UTC TZ has been set + logger = Logger(service=service_name, stream=stdout, utc=True) + + # THEN logging formatter time converter should use gmtime fn + assert logger._logger.handlers[0].formatter.converter == time.gmtime From 5a92347c8beba8d3092746855763c08fc7a60ee6 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Sun, 18 Apr 2021 07:25:45 +0200 Subject: [PATCH 09/20] chore: alias previous formatter --- aws_lambda_powertools/logging/formatter.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/aws_lambda_powertools/logging/formatter.py b/aws_lambda_powertools/logging/formatter.py index 0268d652d1c..3d84c96006b 100644 --- a/aws_lambda_powertools/logging/formatter.py +++ b/aws_lambda_powertools/logging/formatter.py @@ -222,3 +222,6 @@ def formatTime(self, record: logging.LogRecord, datefmt: Optional[str] = None) - custom_fmt = self.default_time_format.replace(self.custom_ms_time_directive, msec) ts_formatted = time.strftime(custom_fmt, record_ts) return ts_formatted + + +JsonFormatter = LambdaPowertoolsFormatter # alias to previous formatter From a4c9afadbfef69e14f57aeeab4f5e203558e5f5a Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Sun, 18 Apr 2021 08:02:15 +0200 Subject: [PATCH 10/20] refactor: move None strip to staticmethod, simplify extras --- aws_lambda_powertools/logging/formatter.py | 26 ++++++++++------------ 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/aws_lambda_powertools/logging/formatter.py b/aws_lambda_powertools/logging/formatter.py index 3d84c96006b..40dc4a24bf1 100644 --- a/aws_lambda_powertools/logging/formatter.py +++ b/aws_lambda_powertools/logging/formatter.py @@ -164,7 +164,7 @@ def _extract_log_exception(self, log_record: logging.LogRecord) -> Union[Tuple[s return None, None - def _extract_log_keys(self, log_record: logging.LogRecord) -> Dict: + def _extract_log_keys(self, log_record: logging.LogRecord) -> Dict[str, Any]: """Extract and parse custom and reserved log keys Parameters @@ -177,12 +177,13 @@ def _extract_log_keys(self, log_record: logging.LogRecord) -> Dict: formatted_log: Dict Structured log as dictionary """ - record_dict = log_record.__dict__.copy() # has extra kwargs we are after - record_dict["asctime"] = self.formatTime(log_record, self.datefmt) + record_dict = log_record.__dict__.copy() + record_dict["asctime"] = self.formatTime(record=log_record, datefmt=self.datefmt) + extras = {k: v for k, v in record_dict.items() if k not in STD_LOGGING_KEYS} - formatted_log = {} + formatted_log = {**extras} - # We have to iterate over a default or existing log structure + # Iterate over a default or existing log structure # then replace any std log attribute e.g. '%(level)s' to 'INFO', '%(process)d to '4773' # lastly add or replace incoming keys (those added within the constructor or .structure_logs method) for key, value in self.log_format.items(): @@ -191,22 +192,19 @@ def _extract_log_keys(self, log_record: logging.LogRecord) -> Dict: else: formatted_log[key] = value - # pick up extra keys when logging a new message e.g. log.info("my message", extra={"additional_key": "value"} - # these messages will be added to the root of the final structure not within `message` key - for key, value in record_dict.items(): - if key not in STD_LOGGING_KEYS: - formatted_log[key] = value - return formatted_log + @staticmethod + def _strip_none_records(records: Dict[str, Any]) -> Dict[str, Any]: + """Remove any key with None as value""" + return {k: v for k, v in records.items() if v is not None} + def format(self, record): # noqa: A003 formatted_log = self._extract_log_keys(log_record=record) formatted_log["message"] = self._extract_log_message(log_record=record) formatted_log["exception"], formatted_log["exception_name"] = self._extract_log_exception(log_record=record) formatted_log["xray_trace_id"] = self._get_latest_trace_id() - - # Filter out top level key with values that are None - formatted_log = {k: v for k, v in formatted_log.items() if v is not None} + formatted_log = self._strip_none_records(records=formatted_log) return self.json_encoder(formatted_log) From bf50133ec72254e0d19c82aab26bdc050137d244 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Sun, 18 Apr 2021 15:44:43 +0200 Subject: [PATCH 11/20] refactor: remove unnecessary str coercion --- aws_lambda_powertools/logging/formatter.py | 24 +++++++++---------- .../test_logger_powertools_formatter.py | 13 ++++++++-- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/aws_lambda_powertools/logging/formatter.py b/aws_lambda_powertools/logging/formatter.py index 40dc4a24bf1..d5c47c65ec1 100644 --- a/aws_lambda_powertools/logging/formatter.py +++ b/aws_lambda_powertools/logging/formatter.py @@ -7,7 +7,7 @@ from ..shared import constants -STD_LOGGING_KEYS = ( +RESERVED_LOG_ATTRS = ( "name", "msg", "args", @@ -121,8 +121,8 @@ def _get_latest_trace_id(): def update_formatter(self, **kwargs): self.log_format.update(kwargs) - def _extract_log_message(self, log_record: logging.LogRecord) -> Union[Dict, str, bool, Iterable]: - """Extract message from log record and attempt to JSON decode it + def _extract_log_message(self, log_record: logging.LogRecord) -> Union[Dict[str, Any], str, bool, Iterable]: + """Extract message from log record and attempt to JSON decode it if str Parameters ---------- @@ -134,15 +134,15 @@ def _extract_log_message(self, log_record: logging.LogRecord) -> Union[Dict, str message: Union[Dict, str, bool, Iterable] Extracted message """ - if isinstance(log_record.msg, dict): - return log_record.msg + message = log_record.msg + if isinstance(message, dict): + return message - message: str = log_record.getMessage() - - try: - message = self.json_decoder(log_record.msg) - except (json.decoder.JSONDecodeError, TypeError, ValueError): - pass + if isinstance(message, str): # could be a JSON string + try: + message = self.json_decoder(message) + except (json.decoder.JSONDecodeError, TypeError, ValueError): + pass return message @@ -179,7 +179,7 @@ def _extract_log_keys(self, log_record: logging.LogRecord) -> Dict[str, Any]: """ record_dict = log_record.__dict__.copy() record_dict["asctime"] = self.formatTime(record=log_record, datefmt=self.datefmt) - extras = {k: v for k, v in record_dict.items() if k not in STD_LOGGING_KEYS} + extras = {k: v for k, v in record_dict.items() if k not in RESERVED_LOG_ATTRS} formatted_log = {**extras} diff --git a/tests/functional/test_logger_powertools_formatter.py b/tests/functional/test_logger_powertools_formatter.py index 638a2dacf42..aedeca1dd36 100644 --- a/tests/functional/test_logger_powertools_formatter.py +++ b/tests/functional/test_logger_powertools_formatter.py @@ -114,8 +114,6 @@ def test_with_unserializable_value_in_message_custom(stdout, service_name): class Unserializable: pass - # Refactor: Needs to decouple Formatter to allow a custom encoder w/ a default coercer - # GIVEN a custom json_default logger = Logger( service=service_name, @@ -266,3 +264,14 @@ def test_log_in_utc(stdout, service_name): # THEN logging formatter time converter should use gmtime fn assert logger._logger.handlers[0].formatter.converter == time.gmtime + + +@pytest.mark.parametrize("message", ["hello", 1.10, {}, [], True, object()]) +def test_logging_various_primitives(stdout, service_name, message): + # GIVEN a logger with default settings + logger = Logger(service=service_name, stream=stdout) + + # WHEN logging a message of multiple common types + # THEN it should raise no serialization/deserialization error + logger.info(message) + json.loads(stdout.getvalue()) From 46913baa20dc1205e20253ced042dabedb0b40e4 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Sun, 18 Apr 2021 20:06:28 +0200 Subject: [PATCH 12/20] refactor: serializer/deserializer over encoder --- aws_lambda_powertools/logging/formatter.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/aws_lambda_powertools/logging/formatter.py b/aws_lambda_powertools/logging/formatter.py index d5c47c65ec1..a2bf0c4d321 100644 --- a/aws_lambda_powertools/logging/formatter.py +++ b/aws_lambda_powertools/logging/formatter.py @@ -44,8 +44,8 @@ class LambdaPowertoolsFormatter(logging.Formatter): def __init__( self, - json_encoder: Optional[Callable[[Any], Any]] = None, - json_decoder: Optional[Callable[[Any], Any]] = None, + json_serializer: Optional[Callable[[Any], Any]] = None, + json_deserializer: Optional[Callable[[Any], Any]] = None, json_default: Optional[Callable[[Any], Any]] = None, datefmt: str = None, log_record_order: List = None, @@ -62,9 +62,9 @@ def __init__( Parameters ---------- - json_encoder : Callable, optional + json_serializer : Callable, optional A function to serialize `obj` to a JSON formatted `str`, by default json.dumps - json_decoder : Callable, optional + json_deserializer : Callable, optional A function to deserialize `str`, `bytes`, bytearray` containing a JSON document to a Python `obj`, by default json.loads json_default : Callable, optional @@ -89,9 +89,9 @@ def __init__( Add example of JSON default fn for unserializable values Add example for UTC """ - self.json_decoder = json_decoder or json.loads + self.json_deserializer = json_deserializer or json.loads self.json_default = json_default or str - self.json_encoder = json_encoder or partial(json.dumps, default=self.json_default, separators=(",", ":")) + self.json_serializer = json_serializer or partial(json.dumps, default=self.json_default, separators=(",", ":")) self.datefmt = datefmt self.utc = utc self.log_record_order = log_record_order or ["level", "location", "message", "timestamp"] @@ -140,7 +140,7 @@ def _extract_log_message(self, log_record: logging.LogRecord) -> Union[Dict[str, if isinstance(message, str): # could be a JSON string try: - message = self.json_decoder(message) + message = self.json_deserializer(message) except (json.decoder.JSONDecodeError, TypeError, ValueError): pass @@ -206,7 +206,7 @@ def format(self, record): # noqa: A003 formatted_log["xray_trace_id"] = self._get_latest_trace_id() formatted_log = self._strip_none_records(records=formatted_log) - return self.json_encoder(formatted_log) + return self.json_serializer(formatted_log) def formatTime(self, record: logging.LogRecord, datefmt: Optional[str] = None) -> str: # NOTE: Pyhton time.strftime doesn't provide msec directives From 754a2a25a9d00f396c2a9af2956496d1a8dbb7d7 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Sun, 18 Apr 2021 20:09:30 +0200 Subject: [PATCH 13/20] fix: add missing reserved log attrs --- aws_lambda_powertools/logging/formatter.py | 5 ++++- tests/functional/test_logger_powertools_formatter.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/aws_lambda_powertools/logging/formatter.py b/aws_lambda_powertools/logging/formatter.py index a2bf0c4d321..a29b97ba844 100644 --- a/aws_lambda_powertools/logging/formatter.py +++ b/aws_lambda_powertools/logging/formatter.py @@ -11,6 +11,7 @@ "name", "msg", "args", + "level", "levelname", "levelno", "pathname", @@ -29,6 +30,8 @@ "processName", "process", "asctime", + "location", + "timestamp", ) @@ -187,7 +190,7 @@ def _extract_log_keys(self, log_record: logging.LogRecord) -> Dict[str, Any]: # then replace any std log attribute e.g. '%(level)s' to 'INFO', '%(process)d to '4773' # lastly add or replace incoming keys (those added within the constructor or .structure_logs method) for key, value in self.log_format.items(): - if isinstance(value, str) and value.startswith("%("): + if value and key in RESERVED_LOG_ATTRS: formatted_log[key] = value % record_dict else: formatted_log[key] = value diff --git a/tests/functional/test_logger_powertools_formatter.py b/tests/functional/test_logger_powertools_formatter.py index aedeca1dd36..20bb72f17df 100644 --- a/tests/functional/test_logger_powertools_formatter.py +++ b/tests/functional/test_logger_powertools_formatter.py @@ -168,7 +168,7 @@ def test_log_custom_formatting(stdout, service_name): log_dict: dict = json.loads(stdout.getvalue()) # THEN the `location` and "timestamp" should match the formatting - assert log_dict["location"] == "[test_log_custom_formatting] test_aws_lambda_logging" + assert log_dict["location"] == "[test_log_custom_formatting] test_logger_powertools_formatter" assert log_dict["timestamp"] == "fake-datefmt" From 456846a6f8bfc9dd8e0bbb36e0ffee9a3e0e6f95 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Sun, 18 Apr 2021 21:20:56 +0200 Subject: [PATCH 14/20] refactor: add append/remove keys, common properties --- aws_lambda_powertools/logging/formatter.py | 13 +++++-- aws_lambda_powertools/logging/logger.py | 43 +++++++++++++++------- tests/functional/test_logger.py | 20 ++++++++++ 3 files changed, 58 insertions(+), 18 deletions(-) diff --git a/aws_lambda_powertools/logging/formatter.py b/aws_lambda_powertools/logging/formatter.py index a29b97ba844..f80aa8f1ca3 100644 --- a/aws_lambda_powertools/logging/formatter.py +++ b/aws_lambda_powertools/logging/formatter.py @@ -51,7 +51,7 @@ def __init__( json_deserializer: Optional[Callable[[Any], Any]] = None, json_default: Optional[Callable[[Any], Any]] = None, datefmt: str = None, - log_record_order: List = None, + log_record_order: List[str] = None, utc: bool = False, **kwargs ): @@ -99,6 +99,7 @@ def __init__( self.utc = utc self.log_record_order = log_record_order or ["level", "location", "message", "timestamp"] self.log_format = dict.fromkeys(self.log_record_order) # Set the insertion order for the log messages + self.update_formatter = self.append_keys # alias to previous method if self.utc: self.converter = time.gmtime @@ -108,6 +109,13 @@ def __init__( keys_combined = {**self._build_default_keys(), **kwargs} self.log_format.update(**keys_combined) + def append_keys(self, **additional_keys): + self.log_format.update(additional_keys) + + def remove_keys(self, keys: Iterable[str]): + for key in keys: + self.log_format.pop(key, None) + @staticmethod def _build_default_keys(): return { @@ -121,9 +129,6 @@ def _get_latest_trace_id(): xray_trace_id = os.getenv(constants.XRAY_TRACE_ID_ENV) return xray_trace_id.split(";")[0].replace("Root=", "") if xray_trace_id else None - def update_formatter(self, **kwargs): - self.log_format.update(kwargs) - def _extract_log_message(self, log_record: logging.LogRecord) -> Union[Dict[str, Any], str, bool, Iterable]: """Extract message from log record and attempt to JSON decode it if str diff --git a/aws_lambda_powertools/logging/logger.py b/aws_lambda_powertools/logging/logger.py index 0a041a73d71..52da3f96323 100644 --- a/aws_lambda_powertools/logging/logger.py +++ b/aws_lambda_powertools/logging/logger.py @@ -4,7 +4,7 @@ import os import random import sys -from typing import Any, Callable, Dict, Optional, Union +from typing import Any, Callable, Dict, Iterable, Optional, Union import jmespath @@ -42,6 +42,7 @@ def _is_cold_start() -> bool: # so we need to return to subclassing removed in #97 # All methods/properties continue to be proxied to inner logger # https://github.com/awslabs/aws-lambda-powertools-python/issues/107 +# noinspection PyRedeclaration class Logger(logging.Logger): # lgtm [py/missing-call-to-init] """Creates and setups a logger to format statements in JSON. @@ -96,7 +97,7 @@ class Logger(logging.Logger): # lgtm [py/missing-call-to-init] >>> logger = Logger(service="payment") >>> >>> def handler(event, context): - logger.structure_logs(append=True, payment_id=event["payment_id"]) + logger.append_keys(payment_id=event["payment_id"]) logger.info("Hello") **Create child Logger using logging inheritance via child param** @@ -139,6 +140,7 @@ def __init__( self._handler = logging.StreamHandler(stream) or logging.StreamHandler(sys.stdout) self._default_log_keys = {"service": self.service, "sampling_rate": self.sampling_rate} self._logger = self._get_logger() + self._init_logger(**kwargs) def __getattr__(self, name): @@ -282,12 +284,28 @@ def decorate(event, context): return decorate - def structure_logs(self, append: bool = False, **kwargs): + def append_keys(self, **additional_keys): + self.registered_formatter.append_keys(**additional_keys) + + def remove_keys(self, keys: Iterable[str]): + self.registered_formatter.remove_keys(keys) + + @property + def registered_handler(self) -> logging.Handler: + """Registered Logger handler""" + handlers = self._logger.parent.handlers if self.child else self._logger.handlers + return handlers[0] + + @property + def registered_formatter(self) -> Optional[LambdaPowertoolsFormatter]: + """Registered Logger formatter""" + return self.registered_handler.formatter + + def structure_logs(self, append: bool = False, **keys): """Sets logging formatting to JSON. Optionally, it can append keyword arguments - to an existing logger so it is available - across future log statements. + to an existing logger so it is available across future log statements. Last keyword argument and value wins if duplicated. @@ -297,15 +315,12 @@ def structure_logs(self, append: bool = False, **kwargs): [description], by default False """ - # Child loggers don't have handlers attached, use its parent handlers - handlers = self._logger.parent.handlers if self.child else self._logger.handlers - for handler in handlers: - if append: - # Update existing formatter in an existing logger handler - handler.formatter.update_formatter(**kwargs) - else: - # Set a new formatter for a logger handler - handler.setFormatter(LambdaPowertoolsFormatter(**self._default_log_keys, **kwargs)) + if append: + # Maintenance: Add deprecation warning for major version, refer to append_keys() when docs are updated + self.append_keys(**keys) + else: + # Set a new formatter for a logger handler + self.registered_handler.setFormatter(LambdaPowertoolsFormatter(**self._default_log_keys, **keys)) def set_correlation_id(self, value: str): """Sets the correlation_id in the logging json diff --git a/tests/functional/test_logger.py b/tests/functional/test_logger.py index ddf5ee226f5..1efc64818df 100644 --- a/tests/functional/test_logger.py +++ b/tests/functional/test_logger.py @@ -474,3 +474,23 @@ def handler(event, context): # THEN log = capture_logging_output(stdout) assert request_id == log["correlation_id"] + + +def test_logger_append_remove_keys(stdout, service_name): + # GIVEN a Logger is initialized + logger = Logger(service=service_name, stream=stdout) + extra_keys = {"request_id": "id", "context": "value"} + + # WHEN keys are updated + logger.append_keys(**extra_keys) + logger.info("message with new keys") + + # And removed + logger.remove_keys(extra_keys.keys()) + logger.info("message after keys being removed") + + # THEN additional keys should only be present in the first log statement + extra_keys_log, keys_removed_log = capture_multiple_logging_statements_output(stdout) + + assert extra_keys.items() <= extra_keys_log.items() + assert (extra_keys.items() <= keys_removed_log.items()) is False From 2f828cd4fb853ce1cd4bab0a59189d2e38c292f1 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 19 Apr 2021 09:06:58 +0200 Subject: [PATCH 15/20] refactor: update methods to use append_keys --- aws_lambda_powertools/logging/logger.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/aws_lambda_powertools/logging/logger.py b/aws_lambda_powertools/logging/logger.py index 52da3f96323..106bda0440b 100644 --- a/aws_lambda_powertools/logging/logger.py +++ b/aws_lambda_powertools/logging/logger.py @@ -271,7 +271,7 @@ def handler(event, context): def decorate(event, context): lambda_context = build_lambda_context_model(context) cold_start = _is_cold_start() - self.structure_logs(append=True, cold_start=cold_start, **lambda_context.__dict__) + self.append_keys(cold_start=cold_start, **lambda_context.__dict__) if correlation_id_path: self.set_correlation_id(jmespath.search(correlation_id_path, event)) @@ -316,7 +316,7 @@ def structure_logs(self, append: bool = False, **keys): """ if append: - # Maintenance: Add deprecation warning for major version, refer to append_keys() when docs are updated + # Maintenance: Add deprecation warning for major version. Refer to append_keys() when docs are updated self.append_keys(**keys) else: # Set a new formatter for a logger handler @@ -330,7 +330,7 @@ def set_correlation_id(self, value: str): value : str Value for the correlation id """ - self.structure_logs(append=True, correlation_id=value) + self.append_keys(correlation_id=value) @staticmethod def _get_log_level(level: Union[str, int, None]) -> Union[str, int]: From 903653f45ea63f7cf0bf62b36e13b6b6c730343c Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 19 Apr 2021 14:16:09 +0200 Subject: [PATCH 16/20] feat: bring your own formatter --- aws_lambda_powertools/logging/formatter.py | 13 +++++- aws_lambda_powertools/logging/logger.py | 21 +++++---- tests/functional/test_logger.py | 50 ++++++++++++++++++++++ 3 files changed, 74 insertions(+), 10 deletions(-) diff --git a/aws_lambda_powertools/logging/formatter.py b/aws_lambda_powertools/logging/formatter.py index f80aa8f1ca3..da8f30c1ad1 100644 --- a/aws_lambda_powertools/logging/formatter.py +++ b/aws_lambda_powertools/logging/formatter.py @@ -2,6 +2,7 @@ import logging import os import time +from abc import ABCMeta, abstractmethod from functools import partial from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union @@ -35,7 +36,17 @@ ) -class LambdaPowertoolsFormatter(logging.Formatter): +class BasePowertoolsFormatter(logging.Formatter, metaclass=ABCMeta): + @abstractmethod + def append_keys(self, **additional_keys): + raise NotImplementedError() + + @abstractmethod + def remove_keys(self, keys: Iterable[str]): + raise NotImplementedError() + + +class LambdaPowertoolsFormatter(BasePowertoolsFormatter): """AWS Lambda Powertools Logging formatter. Formats the log message as a JSON encoded string. If the message is a diff --git a/aws_lambda_powertools/logging/logger.py b/aws_lambda_powertools/logging/logger.py index 106bda0440b..8b4053c2f12 100644 --- a/aws_lambda_powertools/logging/logger.py +++ b/aws_lambda_powertools/logging/logger.py @@ -12,7 +12,7 @@ from ..shared.functions import resolve_env_var_choice, resolve_truthy_env_var_choice from .exceptions import InvalidLoggerSamplingRateError from .filters import SuppressFilter -from .formatter import LambdaPowertoolsFormatter +from .formatter import BasePowertoolsFormatter, LambdaPowertoolsFormatter from .lambda_context import build_lambda_context_model logger = logging.getLogger(__name__) @@ -124,6 +124,7 @@ def __init__( child: bool = False, sampling_rate: float = None, stream: sys.stdout = None, + logger_formatter: Optional[BasePowertoolsFormatter] = None, **kwargs, ): self.service = resolve_env_var_choice( @@ -132,11 +133,12 @@ def __init__( self.sampling_rate = resolve_env_var_choice( choice=sampling_rate, env=os.getenv(constants.LOGGER_LOG_SAMPLING_RATE) ) + self.child = child + self.logger_formatter = logger_formatter + self.log_level = self._get_log_level(level) self._is_deduplication_disabled = resolve_truthy_env_var_choice( env=os.getenv(constants.LOGGER_LOG_DEDUPLICATION_ENV, "false") ) - self.log_level = self._get_log_level(level) - self.child = child self._handler = logging.StreamHandler(stream) or logging.StreamHandler(sys.stdout) self._default_log_keys = {"service": self.service, "sampling_rate": self.sampling_rate} self._logger = self._get_logger() @@ -292,13 +294,13 @@ def remove_keys(self, keys: Iterable[str]): @property def registered_handler(self) -> logging.Handler: - """Registered Logger handler""" + """Convenience property to access logger handler""" handlers = self._logger.parent.handlers if self.child else self._logger.handlers return handlers[0] @property - def registered_formatter(self) -> Optional[LambdaPowertoolsFormatter]: - """Registered Logger formatter""" + def registered_formatter(self) -> Optional[BasePowertoolsFormatter]: + """Convenience property to access logger formatter""" return self.registered_handler.formatter def structure_logs(self, append: bool = False, **keys): @@ -312,15 +314,16 @@ def structure_logs(self, append: bool = False, **keys): Parameters ---------- append : bool, optional - [description], by default False + append keys provided to logger formatter, by default False """ if append: # Maintenance: Add deprecation warning for major version. Refer to append_keys() when docs are updated self.append_keys(**keys) else: - # Set a new formatter for a logger handler - self.registered_handler.setFormatter(LambdaPowertoolsFormatter(**self._default_log_keys, **keys)) + log_keys = {**self._default_log_keys, **keys} + formatter = self.logger_formatter or LambdaPowertoolsFormatter(**log_keys) + self.registered_handler.setFormatter(formatter) def set_correlation_id(self, value: str): """Sets the correlation_id in the logging json diff --git a/tests/functional/test_logger.py b/tests/functional/test_logger.py index 1efc64818df..54e841e15db 100644 --- a/tests/functional/test_logger.py +++ b/tests/functional/test_logger.py @@ -5,12 +5,14 @@ import random import string from collections import namedtuple +from typing import Iterable import pytest from aws_lambda_powertools import Logger, Tracer from aws_lambda_powertools.logging import correlation_paths from aws_lambda_powertools.logging.exceptions import InvalidLoggerSamplingRateError +from aws_lambda_powertools.logging.formatter import BasePowertoolsFormatter from aws_lambda_powertools.logging.logger import set_package_logger from aws_lambda_powertools.shared import constants @@ -494,3 +496,51 @@ def test_logger_append_remove_keys(stdout, service_name): assert extra_keys.items() <= extra_keys_log.items() assert (extra_keys.items() <= keys_removed_log.items()) is False + + +def test_logger_custom_formatter(stdout, service_name, lambda_context): + class CustomFormatter(BasePowertoolsFormatter): + custom_format = {} + + def append_keys(self, **additional_keys): + self.custom_format.update(additional_keys) + + def remove_keys(self, keys: Iterable[str]): + for key in keys: + self.custom_format.pop(key, None) + + def format(self, record: logging.LogRecord) -> str: # noqa: A003 + return json.dumps( + { + "message": super().format(record), + "timestamp": self.formatTime(record), + "my_default_key": "test", + **self.custom_format, + } + ) + + custom_formatter = CustomFormatter() + + # GIVEN a Logger is initialized with a custom formatter + logger = Logger(service=service_name, stream=stdout, logger_formatter=custom_formatter) + + # WHEN a lambda function is decorated with logger + @logger.inject_lambda_context + def handler(event, context): + logger.info("Hello") + + handler({}, lambda_context) + + lambda_context_keys = ( + "function_name", + "function_memory_size", + "function_arn", + "function_request_id", + ) + + log = capture_logging_output(stdout) + + # THEN custom key should always be present + # and lambda contextual info should also be in the logs + assert "my_default_key" in log + assert all(k in log for k in lambda_context_keys) From 4345d8b27a7ca59fc4614631477956c0810d0c9d Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 19 Apr 2021 21:54:54 +0200 Subject: [PATCH 17/20] chore: move public methods up, remove unused test var --- aws_lambda_powertools/logging/formatter.py | 45 ++++++++++--------- .../test_logger_powertools_formatter.py | 4 +- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/aws_lambda_powertools/logging/formatter.py b/aws_lambda_powertools/logging/formatter.py index da8f30c1ad1..3f8458d6edf 100644 --- a/aws_lambda_powertools/logging/formatter.py +++ b/aws_lambda_powertools/logging/formatter.py @@ -120,6 +120,29 @@ def __init__( keys_combined = {**self._build_default_keys(), **kwargs} self.log_format.update(**keys_combined) + def format(self, record: logging.LogRecord) -> str: # noqa: A003 + """Format logging record as structured JSON str""" + formatted_log = self._extract_log_keys(log_record=record) + formatted_log["message"] = self._extract_log_message(log_record=record) + formatted_log["exception"], formatted_log["exception_name"] = self._extract_log_exception(log_record=record) + formatted_log["xray_trace_id"] = self._get_latest_trace_id() + formatted_log = self._strip_none_records(records=formatted_log) + + return self.json_serializer(formatted_log) + + def formatTime(self, record: logging.LogRecord, datefmt: Optional[str] = None) -> str: + # NOTE: Pyhton time.strftime doesn't provide msec directives + # so we create a custom one (%F) and replace logging record ts + # Reason 2 is that std logging doesn't support msec after TZ + record_ts = self.converter(record.created) + if datefmt: + ts_formatted = time.strftime(datefmt, record_ts) + else: + msec = "%03d" % record.msecs + custom_fmt = self.default_time_format.replace(self.custom_ms_time_directive, msec) + ts_formatted = time.strftime(custom_fmt, record_ts) + return ts_formatted + def append_keys(self, **additional_keys): self.log_format.update(additional_keys) @@ -218,27 +241,5 @@ def _strip_none_records(records: Dict[str, Any]) -> Dict[str, Any]: """Remove any key with None as value""" return {k: v for k, v in records.items() if v is not None} - def format(self, record): # noqa: A003 - formatted_log = self._extract_log_keys(log_record=record) - formatted_log["message"] = self._extract_log_message(log_record=record) - formatted_log["exception"], formatted_log["exception_name"] = self._extract_log_exception(log_record=record) - formatted_log["xray_trace_id"] = self._get_latest_trace_id() - formatted_log = self._strip_none_records(records=formatted_log) - - return self.json_serializer(formatted_log) - - def formatTime(self, record: logging.LogRecord, datefmt: Optional[str] = None) -> str: - # NOTE: Pyhton time.strftime doesn't provide msec directives - # so we create a custom one (%F) and replace logging record ts - # Reason 2 is that std logging doesn't support msec after TZ - record_ts = self.converter(record.created) - if datefmt: - ts_formatted = time.strftime(datefmt, record_ts) - else: - msec = "%03d" % record.msecs - custom_fmt = self.default_time_format.replace(self.custom_ms_time_directive, msec) - ts_formatted = time.strftime(custom_fmt, record_ts) - return ts_formatted - JsonFormatter = LambdaPowertoolsFormatter # alias to previous formatter diff --git a/tests/functional/test_logger_powertools_formatter.py b/tests/functional/test_logger_powertools_formatter.py index 20bb72f17df..4b92e6b47b9 100644 --- a/tests/functional/test_logger_powertools_formatter.py +++ b/tests/functional/test_logger_powertools_formatter.py @@ -258,9 +258,9 @@ def test_log_custom_std_log_attribute(stdout, service_name): assert "%" not in log_dict["process"] -def test_log_in_utc(stdout, service_name): +def test_log_in_utc(service_name): # GIVEN a logger where UTC TZ has been set - logger = Logger(service=service_name, stream=stdout, utc=True) + logger = Logger(service=service_name, utc=True) # THEN logging formatter time converter should use gmtime fn assert logger._logger.handlers[0].formatter.converter == time.gmtime From b328431390797874e37e98d88b8745e86b22cc5b Mon Sep 17 00:00:00 2001 From: Heitor Lessa Date: Thu, 22 Apr 2021 13:01:04 +0200 Subject: [PATCH 18/20] Update aws_lambda_powertools/logging/formatter.py Co-authored-by: Michael Brewer --- aws_lambda_powertools/logging/formatter.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/aws_lambda_powertools/logging/formatter.py b/aws_lambda_powertools/logging/formatter.py index 3f8458d6edf..beef8de6b62 100644 --- a/aws_lambda_powertools/logging/formatter.py +++ b/aws_lambda_powertools/logging/formatter.py @@ -131,17 +131,16 @@ def format(self, record: logging.LogRecord) -> str: # noqa: A003 return self.json_serializer(formatted_log) def formatTime(self, record: logging.LogRecord, datefmt: Optional[str] = None) -> str: - # NOTE: Pyhton time.strftime doesn't provide msec directives - # so we create a custom one (%F) and replace logging record ts - # Reason 2 is that std logging doesn't support msec after TZ record_ts = self.converter(record.created) if datefmt: - ts_formatted = time.strftime(datefmt, record_ts) - else: - msec = "%03d" % record.msecs - custom_fmt = self.default_time_format.replace(self.custom_ms_time_directive, msec) - ts_formatted = time.strftime(custom_fmt, record_ts) - return ts_formatted + return time.strftime(datefmt, record_ts) + + # NOTE: Python `time.strftime` doesn't provide msec directives + # so we create a custom one (%F) and replace logging record ts + # Reason 2 is that std logging doesn't support msec after TZ + msecs = "%03d" % record.msecs + custom_fmt = self.default_time_format.replace(self.custom_ms_time_directive, msecs) + return time.strftime(custom_fmt, record_ts) def append_keys(self, **additional_keys): self.log_format.update(additional_keys) From 8488adc21aafc0e54cefee93a6e88c8df532fc26 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Thu, 22 Apr 2021 14:43:05 +0200 Subject: [PATCH 19/20] feat: support custom logger handler --- aws_lambda_powertools/logging/logger.py | 5 +++-- tests/functional/test_logger.py | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/aws_lambda_powertools/logging/logger.py b/aws_lambda_powertools/logging/logger.py index 8b4053c2f12..4dc9f47a501 100644 --- a/aws_lambda_powertools/logging/logger.py +++ b/aws_lambda_powertools/logging/logger.py @@ -125,6 +125,7 @@ def __init__( sampling_rate: float = None, stream: sys.stdout = None, logger_formatter: Optional[BasePowertoolsFormatter] = None, + logger_handler: Optional[logging.Handler] = None, **kwargs, ): self.service = resolve_env_var_choice( @@ -135,11 +136,11 @@ def __init__( ) self.child = child self.logger_formatter = logger_formatter + self.logger_handler = logger_handler or logging.StreamHandler(stream) self.log_level = self._get_log_level(level) self._is_deduplication_disabled = resolve_truthy_env_var_choice( env=os.getenv(constants.LOGGER_LOG_DEDUPLICATION_ENV, "false") ) - self._handler = logging.StreamHandler(stream) or logging.StreamHandler(sys.stdout) self._default_log_keys = {"service": self.service, "sampling_rate": self.sampling_rate} self._logger = self._get_logger() @@ -172,7 +173,7 @@ def _init_logger(self, **kwargs): self._configure_sampling() self._logger.setLevel(self.log_level) - self._logger.addHandler(self._handler) + self._logger.addHandler(self.logger_handler) self.structure_logs(**kwargs) # Pytest Live Log feature duplicates log records for colored output diff --git a/tests/functional/test_logger.py b/tests/functional/test_logger.py index 54e841e15db..ba6e82b72af 100644 --- a/tests/functional/test_logger.py +++ b/tests/functional/test_logger.py @@ -544,3 +544,21 @@ def handler(event, context): # and lambda contextual info should also be in the logs assert "my_default_key" in log assert all(k in log for k in lambda_context_keys) + + +def test_logger_custom_handler(lambda_context, service_name, tmp_path): + # GIVEN a Logger is initialized with a FileHandler + log_file = tmp_path / "log.json" + handler = logging.FileHandler(filename=log_file) + logger = Logger(service=service_name, logger_handler=handler) + + # WHEN a log statement happens + @logger.inject_lambda_context + def handler(event, context): + logger.info("custom handler") + + handler({}, lambda_context) + + # THEN we should output to a file not stdout + log = log_file.read_text() + assert "custom handler" in log From a8c5126310190376e185bfc1ec3412250ebf0b4e Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Thu, 22 Apr 2021 15:21:42 +0200 Subject: [PATCH 20/20] chore: update docstrings to inform new parameters --- aws_lambda_powertools/logging/formatter.py | 20 ++++------ aws_lambda_powertools/logging/logger.py | 46 ++++++++++++++++++++++ 2 files changed, 53 insertions(+), 13 deletions(-) diff --git a/aws_lambda_powertools/logging/formatter.py b/aws_lambda_powertools/logging/formatter.py index beef8de6b62..6a3c10847a4 100644 --- a/aws_lambda_powertools/logging/formatter.py +++ b/aws_lambda_powertools/logging/formatter.py @@ -77,12 +77,12 @@ def __init__( Parameters ---------- json_serializer : Callable, optional - A function to serialize `obj` to a JSON formatted `str`, by default json.dumps + function to serialize `obj` to a JSON formatted `str`, by default json.dumps json_deserializer : Callable, optional - A function to deserialize `str`, `bytes`, bytearray` containing a JSON document to a Python `obj`, + function to deserialize `str`, `bytes`, bytearray` containing a JSON document to a Python `obj`, by default json.loads json_default : Callable, optional - A function to coercer unserializable values, by default str + function to coerce unserializable values, by default str Only used when no custom JSON encoder is set @@ -91,17 +91,11 @@ def __init__( See https://docs.python.org/3/library/time.html#time.strftime utc : bool, optional - Set logging timestamp to UTC, by default False to continue to use local time as per stdlib + set logging timestamp to UTC, by default False to continue to use local time as per stdlib + log_record_order : list, optional + set order of log keys when logging, by default ["level", "location", "message", "timestamp"] kwargs Key-value to be included in log messages - - Examples - -------- - Create examples - - Add example of standard log attributes that use str interpolation e.g. %(process)d - Add example of JSON default fn for unserializable values - Add example for UTC """ self.json_deserializer = json_deserializer or json.loads self.json_default = json_default or str @@ -110,7 +104,7 @@ def __init__( self.utc = utc self.log_record_order = log_record_order or ["level", "location", "message", "timestamp"] self.log_format = dict.fromkeys(self.log_record_order) # Set the insertion order for the log messages - self.update_formatter = self.append_keys # alias to previous method + self.update_formatter = self.append_keys # alias to old method if self.utc: self.converter = time.gmtime diff --git a/aws_lambda_powertools/logging/logger.py b/aws_lambda_powertools/logging/logger.py index 4dc9f47a501..77e0f3db059 100644 --- a/aws_lambda_powertools/logging/logger.py +++ b/aws_lambda_powertools/logging/logger.py @@ -70,6 +70,28 @@ class Logger(logging.Logger): # lgtm [py/missing-call-to-init] sample rate for debug calls within execution context defaults to 0.0 stream: sys.stdout, optional valid output for a logging stream, by default sys.stdout + logger_formatter: BasePowertoolsFormatter, optional + custom logging formatter that implements BasePowertoolsFormatter + logger_handler: logging.Handler, optional + custom logging handler e.g. logging.FileHandler("file.log") + + Parameters propagated to LambdaPowertoolsFormatter + --------------------------------------------- + datefmt: str, optional + String directives (strftime) to format log timestamp, by default it uses RFC 3339. + json_serializer : Callable, optional + function to serialize `obj` to a JSON formatted `str`, by default json.dumps + json_deserializer : Callable, optional + function to deserialize `str`, `bytes`, bytearray` containing a JSON document to a Python `obj`, + by default json.loads + json_default : Callable, optional + function to coerce unserializable values, by default `str()` + + Only used when no custom JSON encoder is set + utc : bool, optional + set logging timestamp to UTC, by default False to continue to use local time as per stdlib + log_record_order : list, optional + set order of log keys when logging, by default ["level", "location", "message", "timestamp"] Example ------- @@ -111,6 +133,30 @@ class Logger(logging.Logger): # lgtm [py/missing-call-to-init] >>> from aws_lambda_powertools import Logger >>> logger = Logger(service="payment", child=True) + **Logging in UTC timezone** + + >>> # app.py + >>> import logging + >>> from aws_lambda_powertools import Logger + >>> + >>> logger = Logger(service="payment", utc=True) + + **Brings message as the first key in log statements** + + >>> # app.py + >>> import logging + >>> from aws_lambda_powertools import Logger + >>> + >>> logger = Logger(service="payment", log_record_order=["message"]) + + **Logging to a file instead of standard output for testing** + + >>> # app.py + >>> import logging + >>> from aws_lambda_powertools import Logger + >>> + >>> logger = Logger(service="payment", logger_handler=logging.FileHandler("log.json")) + Raises ------ InvalidLoggerSamplingRateError