From 71f47018d3447939ec5f6c953c31506c78fba3f4 Mon Sep 17 00:00:00 2001 From: Rob Marrowstone Date: Wed, 27 Dec 2023 09:53:18 -0800 Subject: [PATCH 1/2] Implement Symbol as Text in C-extension By default the IonPy value for a Symbol is IonPySymbol and the bare value is a SymbolToken. With the SYMBOL_AS_TEXT flag set, the IonPy value for a Symbol is IonPyText, and the bare value is str. This makes Symbol handling simpler if one only cares about the text value. It also improves load performance for symbols significantly. Symbols with undefined text cannot be emitted with this flag set and will raise exceptions. --- amazon/ion/ioncmodule.c | 20 +++++++++++++----- tests/test_simpleion.py | 45 ++++++++++++++++++++++++++++++++--------- 2 files changed, 50 insertions(+), 15 deletions(-) diff --git a/amazon/ion/ioncmodule.c b/amazon/ion/ioncmodule.c index 83e52fef..d540daed 100644 --- a/amazon/ion/ioncmodule.c +++ b/amazon/ion/ioncmodule.c @@ -1075,7 +1075,10 @@ static void ionc_add_to_container(PyObject* pyContainer, PyObject* element, BOOL */ iERR ionc_read_value(hREADER hreader, ION_TYPE t, PyObject* container, BOOL in_struct, uint8_t value_model) { iENTER; - BOOL wrap_py_value = !value_model; + + BOOL wrap_py_value = !(value_model & 1); + BOOL symbol_as_text = value_model & 2; + BOOL is_null; ION_STRING field_name; SIZE annotation_count; @@ -1219,8 +1222,15 @@ iERR ionc_read_value(hREADER hreader, ION_TYPE t, PyObject* container, BOOL in_s { ION_STRING string_value; IONCHECK(ion_reader_read_string(hreader, &string_value)); - py_value = ion_string_to_py_symboltoken(&string_value); - ion_nature_constructor = _ionpysymbol_fromvalue; + if (!symbol_as_text) { + py_value = ion_string_to_py_symboltoken(&string_value); + ion_nature_constructor = _ionpysymbol_fromvalue; + } else if (ion_string_is_null(&string_value)) { + _FAILWITHMSG(IERR_INVALID_STATE, "Cannot emit symbol with undefined text when SYMBOL_AS_TEXT is set."); + } else { + py_value = ion_build_py_string(&string_value); + ion_nature_constructor = _ionpytext_fromvalue; + } break; } case tid_STRING_INT: @@ -1475,8 +1485,8 @@ PyObject* ionc_read(PyObject* self, PyObject *args, PyObject *kwds) { &value_model, &text_buffer_size_limit)) { FAILWITH(IERR_INVALID_ARG); } - if (value_model > 1) { - FAILWITHMSG(IERR_INVALID_ARG, "Only ION_PY and MIXED value models are currently supported!") + if (value_model > 3) { + _FAILWITHMSG(IERR_INVALID_ARG, "Only ION_PY, MAY_BE_BARE and SYMBOL_AS_TEXT value models are currently supported!") } iterator = PyObject_New(ionc_read_Iterator, &ionc_read_IteratorType); diff --git a/tests/test_simpleion.py b/tests/test_simpleion.py index 0ef4a68b..ccbb6fcb 100644 --- a/tests/test_simpleion.py +++ b/tests/test_simpleion.py @@ -741,22 +741,47 @@ def test_bare_values(params): assert ion_equals(value, expectation) -def test_value_models_flags(): +@parametrize( + ("foo", IonPyValueModel.ION_PY, IonPySymbol), + ("foo", IonPyValueModel.MAY_BE_BARE, SymbolToken), + ("foo", IonPyValueModel.SYMBOL_AS_TEXT, IonPyText, IonType.SYMBOL), + ("foo", IonPyValueModel.MAY_BE_BARE | IonPyValueModel.SYMBOL_AS_TEXT, str), + ("foo::bar", IonPyValueModel.MAY_BE_BARE, IonPySymbol), + ("foo::bar", IonPyValueModel.MAY_BE_BARE | IonPyValueModel.SYMBOL_AS_TEXT, IonPyText, IonType.SYMBOL), +) +def test_value_model_flags(params): # This function only tests c extension if not c_ext: return - ion_text = "31" - value = simpleion.load_extension(StringIO(ion_text), value_model=IonPyValueModel.ION_PY) - assert type(value) is IonPyInt - value = simpleion.load_extension(StringIO(ion_text), value_model=IonPyValueModel.MAY_BE_BARE) - assert type(value) is int + if len(params) == 3: + ion_text, value_model, expected_type = params + expected_ion_type = None + else: + ion_text, value_model, expected_type, expected_ion_type = params - try: + value = simpleion.load_extension(StringIO(ion_text), value_model=value_model) + assert type(value) is expected_type + if expected_ion_type: + assert value.ion_type == expected_ion_type + + +def test_undefined_symbol_text_as_text(): + ion_text = """ + $ion_symbol_table::{ symbols:[ null ] } + $10 + """ + with raises(IonException, match="Cannot emit symbol with undefined text"): simpleion.load_extension(StringIO(ion_text), value_model=IonPyValueModel.SYMBOL_AS_TEXT) - assert False - except: - pass + + +def test_invalid_value_model_flag(): + # This function only tests c extension + if not c_ext: + return + + with raises(IonException, match="value models are currently supported"): + simpleion.load_extension(StringIO("foo"), value_model=IonPyValueModel.STRUCT_AS_STD_DICT) # See issue https://github.com/amazon-ion/ion-python/issues/232 From 90f87396dab06a4368f8a243db5c6cb180c600fb Mon Sep 17 00:00:00 2001 From: Rob Marrowstone Date: Thu, 28 Dec 2023 14:59:34 -0800 Subject: [PATCH 2/2] Skip test when c_ext = False --- tests/test_simpleion.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_simpleion.py b/tests/test_simpleion.py index ccbb6fcb..4fbb4a84 100644 --- a/tests/test_simpleion.py +++ b/tests/test_simpleion.py @@ -767,6 +767,10 @@ def test_value_model_flags(params): def test_undefined_symbol_text_as_text(): + # This function only tests c extension + if not c_ext: + return + ion_text = """ $ion_symbol_table::{ symbols:[ null ] } $10