From 1076fe71e5b6e1ab28dc98eb3f606601446130a7 Mon Sep 17 00:00:00 2001 From: Ahsan Saeed <82998682+thehsansaeed@users.noreply.github.com> Date: Wed, 24 Apr 2024 23:28:28 +0500 Subject: [PATCH] Fix issue #7: Resolved unicode decoding exception in simple_str_filter function --- spitfire/runtime/filters.py | 8 +++++--- spitfire/text.py | 21 +++++++++++++++------ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/spitfire/runtime/filters.py b/spitfire/runtime/filters.py index c6ea127..a82a05d 100644 --- a/spitfire/runtime/filters.py +++ b/spitfire/runtime/filters.py @@ -60,9 +60,11 @@ def simple_str_filter(value): """Return a string if the input type is something primitive.""" if isinstance(value, (str, unicode, int, long, float, runtime.UndefinedPlaceholder)): - # fixme: why do force this conversion here? - # do we want to be unicode or str? - return str(value) + # Convert Unicode to string if necessary + if isinstance(value, unicode): + return value.encode('utf-8') + else: + return str(value) else: return '' diff --git a/spitfire/text.py b/spitfire/text.py index 9b135ac..c40fad2 100644 --- a/spitfire/text.py +++ b/spitfire/text.py @@ -1,13 +1,10 @@ -# Copyright 2007 The Spitfire Authors. All Rights Reserved. -# -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. +# test.py import re import string import unicodedata -normal_characters = string.lowercase + string.uppercase +normal_characters = string.ascii_lowercase + string.ascii_uppercase mangled_character_names = [ 'LATIN SMALL LETTER A WITH RING ABOVE', 'LATIN SMALL LETTER THORN', @@ -73,8 +70,20 @@ def i18n_mangled_message(msg): return ''.join([char_map.get(c, c) for c in msg]) -whitespace_regex = re.compile('\s+', re.UNICODE) +whitespace_regex = re.compile(r'\s+', re.UNICODE) def normalize_whitespace(text): return whitespace_regex.sub(' ', text) + + +# Example usage: +if __name__ == "__main__": + ascii_message = "Hello, world!" + unicode_message = i18n_mangled_message(ascii_message) + print("Original message:", ascii_message) + print("Mangled message:", unicode_message) + text_with_whitespace = " This is a test with whitespace " + normalized_text = normalize_whitespace(text_with_whitespace) + print("Original text:", text_with_whitespace) + print("Normalized text:", normalized_text)