diff --git a/aws_lambda_builders/utils.py b/aws_lambda_builders/utils.py index 6dabe05bd..9f227ea0b 100644 --- a/aws_lambda_builders/utils.py +++ b/aws_lambda_builders/utils.py @@ -235,8 +235,11 @@ def extract_tarfile(tarfile_path: Union[str, os.PathLike], unpack_dir: Union[str def decode(to_decode: bytes, encoding: Optional[str] = None) -> str: """ - Perform a "safe" decoding of a series of bytes. If the decoding works, returns the decoded bytes. - If the decoding fails, returns an empty string instead of throwing an exception. + Perform a "safe" decoding of a series of bytes. Attempts to find the localized encoding + if not provided, and avoids raising an exception, instead, if an unrecognized character + is found, replaces it with a replacement character. + + https://docs.python.org/3/library/codecs.html#codec-base-classes Parameters ---------- @@ -248,11 +251,7 @@ def decode(to_decode: bytes, encoding: Optional[str] = None) -> str: Returns ------- str - Decoded string if decoding succeeds, empty string if decoding fails + Decoded string with unrecognized characters replaced with a replacement character """ - encoding = encoding if encoding else locale.getpreferredencoding() - try: - return to_decode.decode(encoding).strip() - except UnicodeDecodeError: - LOG.debug(f"Unable to decode bytes: {to_decode} with encoding: {encoding}") - return "" + encoding = encoding or locale.getpreferredencoding() + return to_decode.decode(encoding, errors="replace").strip() diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index ab703765c..ba95185ad 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -39,7 +39,7 @@ class TestDecode(TestCase): def test_does_not_crash_non_utf8_encoding(self): message = "hello\n\n ß".encode("iso-8859-1") # Windows will decode this string as expected, *nix systems won't - expected_message = "hello\n\n ß" if platform.system().lower() == "windows" else "" + expected_message = "hello\n\n ß" if platform.system().lower() == "windows" else "hello\n\n �" response = decode(message) self.assertEqual(response, expected_message) @@ -49,7 +49,7 @@ def test_is_able_to_decode_non_utf8_encoding(self): self.assertEqual(response, "hello\n\n ß") @patch("aws_lambda_builders.utils.locale") - def test_isa_able_to_decode_non_utf8_locale(self, mock_locale): + def test_is_able_to_decode_non_utf8_locale(self, mock_locale): mock_locale.getpreferredencoding.return_value = "iso-8859-1" message = "hello\n\n ß".encode("iso-8859-1") response = decode(message)