Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Bugs with emit_bare_values #313

Merged
merged 4 commits into from
Dec 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 35 additions & 19 deletions amazon/ion/ioncmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -1048,7 +1048,7 @@ static void ionc_add_to_container(PyObject* pyContainer, PyObject* element, BOOL
iERR ionc_read_value(hREADER hreader, ION_TYPE t, PyObject* container, BOOL in_struct, BOOL emit_bare_values_global) {
iENTER;

BOOL emit_bare_values = emit_bare_values_global;
BOOL wrap_py_value = !emit_bare_values_global;
BOOL is_null;
ION_STRING field_name;
SIZE annotation_count;
Expand All @@ -1064,7 +1064,7 @@ iERR ionc_read_value(hREADER hreader, ION_TYPE t, PyObject* container, BOOL in_s

IONCHECK(ion_reader_get_annotation_count(hreader, &annotation_count));
if (annotation_count > 0) {
emit_bare_values = FALSE;
wrap_py_value = TRUE;
ION_STRING* annotations = (ION_STRING*)PyMem_Malloc(annotation_count * sizeof(ION_STRING));
err = ion_reader_get_annotations(hreader, annotations, annotation_count, &annotation_count);
if (err) {
Expand Down Expand Up @@ -1101,8 +1101,12 @@ iERR ionc_read_value(hREADER hreader, ION_TYPE t, PyObject* container, BOOL in_s
}

ion_type = ION_TYPE_INT(null_type);
py_value = Py_None; // INCREFs and returns Python None.
emit_bare_values = emit_bare_values && (ion_type == tid_NULL_INT);
py_value = Py_None;
// you wouldn't think you need to incref Py_None, and in
// newer C API versions you won't, but for now you do.
// see https://github.com/python/cpython/issues/103906 for more
Py_INCREF(py_value);
wrap_py_value = wrap_py_value || (ion_type != tid_NULL_INT);
ion_nature_constructor = _ionpynull_fromvalue;
break;
}
Expand Down Expand Up @@ -1149,6 +1153,7 @@ iERR ionc_read_value(hREADER hreader, ION_TYPE t, PyObject* container, BOOL in_s
{
ION_DECIMAL decimal_value;
IONCHECK(ion_reader_read_ion_decimal(hreader, &decimal_value));

decNumber *read_number;
decQuad read_quad;

Expand All @@ -1173,10 +1178,10 @@ iERR ionc_read_value(hREADER hreader, ION_TYPE t, PyObject* container, BOOL in_s

// Returns a decimal tuple to avoid losing precision.
// Decimal tuple format: (sign, (digits tuple), exponent).
py_value = PyTuple_New(3);
PyTuple_SetItem(py_value, 0, PyLong_FromLong(sign));
PyTuple_SetItem(py_value, 1, digits_tuple);
PyTuple_SetItem(py_value, 2, PyLong_FromLong(read_number_exponent));
PyObject *dec_tuple = PyTuple_New(3);
PyTuple_SetItem(dec_tuple, 0, PyLong_FromLong(sign));
PyTuple_SetItem(dec_tuple, 1, digits_tuple);
PyTuple_SetItem(dec_tuple, 2, PyLong_FromLong(read_number_exponent));

int count = (read_number_digits + DECDPUN - 1) / DECDPUN;
int index = 0;
Expand All @@ -1198,9 +1203,16 @@ iERR ionc_read_value(hREADER hreader, ION_TYPE t, PyObject* container, BOOL in_s
}
}

ion_decimal_free(&decimal_value);
if (decimal_value.type == ION_DECIMAL_TYPE_QUAD) {
free(read_number);
}
if (wrap_py_value) {
py_value = dec_tuple;
} else {
py_value = PyObject_CallFunctionObjArgs(_decimal_constructor, dec_tuple, NULL);
Py_DECREF(dec_tuple);
}

ion_nature_constructor = _ionpydecimal_fromvalue;
break;
Expand All @@ -1213,7 +1225,8 @@ iERR ionc_read_value(hREADER hreader, ION_TYPE t, PyObject* container, BOOL in_s
}
case tid_SYMBOL_INT:
{
emit_bare_values = FALSE; // Symbol values must always be emitted as IonNature because of ambiguity with string.
// Symbols must always be emitted as IonPySymbol, to avoid ambiguity with string.
wrap_py_value = TRUE;
ION_STRING string_value;
IONCHECK(ion_reader_read_string(hreader, &string_value));
ion_nature_constructor = _ionpysymbol_fromvalue;
Expand All @@ -1230,7 +1243,8 @@ iERR ionc_read_value(hREADER hreader, ION_TYPE t, PyObject* container, BOOL in_s
}
case tid_CLOB_INT:
{
emit_bare_values = FALSE; // Clob values must always be emitted as IonNature because of ambiguity with blob.
// Clob values must always be emitted as IonPyBytes, to avoid ambiguity with blob.
wrap_py_value = TRUE;
// intentional fall-through
}
case tid_BLOB_INT:
Expand Down Expand Up @@ -1263,7 +1277,7 @@ iERR ionc_read_value(hREADER hreader, ION_TYPE t, PyObject* container, BOOL in_s
case tid_STRUCT_INT:
{
PyObject* data = PyDict_New();
IONCHECK(ionc_read_into_container(hreader, data, /*is_struct=*/TRUE, emit_bare_values));
IONCHECK(ionc_read_into_container(hreader, data, /*is_struct=*/TRUE, emit_bare_values_global));

py_value = PyObject_CallFunctionObjArgs(
_ionpydict_factory,
Expand All @@ -1276,21 +1290,20 @@ iERR ionc_read_value(hREADER hreader, ION_TYPE t, PyObject* container, BOOL in_s
if (py_value == NULL) {
FAILWITH(IERR_READ_ERROR);
}
// This is subtle. It's not that we're emitting a "bare value" here,
// in fact, we are explicitly creating an IonPy value. But this short-circuits
// the general IonPyFoo creation logic after switch.
emit_bare_values = TRUE;
// we've already wrapped the py value
wrap_py_value = FALSE;
break;
}
case tid_SEXP_INT:
{
emit_bare_values = FALSE; // Sexp values must always be emitted as IonNature because of ambiguity with list.
// Sexp values must always be emitted as IonPyList to avoid ambiguity with list.
wrap_py_value = TRUE;
// intentional fall-through
}
case tid_LIST_INT:
{
py_value = PyList_New(0);
IONCHECK(ionc_read_into_container(hreader, py_value, /*is_struct=*/FALSE, emit_bare_values));
IONCHECK(ionc_read_into_container(hreader, py_value, /*is_struct=*/FALSE, emit_bare_values_global));
ion_nature_constructor = _ionpylist_fromvalue;
break;
}
Expand All @@ -1300,21 +1313,24 @@ iERR ionc_read_value(hREADER hreader, ION_TYPE t, PyObject* container, BOOL in_s
}

PyObject* final_py_value = py_value;
if (!emit_bare_values) {
if (wrap_py_value) {
final_py_value = PyObject_CallFunctionObjArgs(
ion_nature_constructor,
py_ion_type_table[ion_type >> 8],
py_value,
py_annotations,
NULL
);
if (py_value != Py_None) Py_XDECREF(py_value);
Py_XDECREF(py_value);
}

ionc_add_to_container(container, final_py_value, in_struct, py_field_name);

fail:
Py_XDECREF(py_annotations);
// note: we're not actually increffing None when we have a field name that has
// no text, which we technically _should_ be doing.
// todo: consider increffing Py_None in ion_build_py_string
if (py_field_name && py_field_name != Py_None) Py_DECREF(py_field_name);
if (err) {
Py_XDECREF(py_value);
Expand Down
10 changes: 7 additions & 3 deletions amazon/ion/simpleion.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,10 +535,10 @@ def dump_extension(obj, fp, binary=True, sequence_as_stream=False, tuple_as_sexp
fp.write(res)


def load_extension(fp, single_value=True, parse_eagerly=True, text_buffer_size_limit=None):
def load_extension(fp, single_value=True, parse_eagerly=True, text_buffer_size_limit=None, emit_bare_values=False):
"""
Args:
fp (str): A string representation of Ion data.
fp (buffer): A file-handle or other object that implementes the buffer protocol
single_value (Optional[True|False]): When True, the data in ``ion_str`` is interpreted as a single Ion value,
and will be returned without an enclosing container. If True and there are multiple top-level values in
the Ion stream, IonException will be raised. NOTE: this means that when data is dumped using
Expand All @@ -548,8 +548,12 @@ def load_extension(fp, single_value=True, parse_eagerly=True, text_buffer_size_l
text_buffer_size_limit (int): The maximum byte size allowed for text values when the C extension is enabled
(default: 512 bytes). This option only has an effect when the C extension is enabled (and it is enabled by
default). When the C extension is disabled, there is no limit on the size of text values.
emit_bare_values (bool): When possible to do losslessly, the parser will emit values as their native python
type, instead of their IonPy type. Any value that is an IonSexp, IonSymbol, IonClob, typed-null or has
annotations is not emitted as a native python value. Timestamp values are emitted as Ion Timestamps, not
python datetimes.
"""
iterator = ionc.ionc_read(fp, emit_bare_values=False, text_buffer_size_limit=text_buffer_size_limit)
iterator = ionc.ionc_read(fp, emit_bare_values=emit_bare_values, text_buffer_size_limit=text_buffer_size_limit)
if single_value:
try:
value = next(iterator)
Expand Down
36 changes: 34 additions & 2 deletions tests/test_simpleion.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# License.
from datetime import datetime, timedelta
from functools import partial
from io import BytesIO
from io import BytesIO, StringIO

from decimal import Decimal
from itertools import chain
Expand All @@ -28,7 +28,7 @@
from amazon.ion.exceptions import IonException
from amazon.ion.symbols import SymbolToken, SYSTEM_SYMBOL_TABLE
from amazon.ion.writer_binary import _IVM
from amazon.ion.core import IonType, IonEvent, IonEventType, OffsetTZInfo, Multimap, TimestampPrecision
from amazon.ion.core import IonType, IonEvent, IonEventType, OffsetTZInfo, Multimap, TimestampPrecision, Timestamp
from amazon.ion.simple_types import IonPyDict, IonPyText, IonPyList, IonPyNull, IonPyBool, IonPyInt, IonPyFloat, \
IonPyDecimal, IonPyTimestamp, IonPyBytes, IonPySymbol
from amazon.ion.equivalence import ion_equals, obj_has_ion_type_and_annotation
Expand Down Expand Up @@ -709,6 +709,38 @@ def test_loads_unicode_utf8_conversion():
loads(data, parse_eagerly=True)


@parametrize(
("31", int, 31),
("true", bool, True),
("null", type(None), None),
("null.int", IonPyNull, lambda x: x.ion_type == IonType.INT),
("12e-1", float, 1.2),
("1.2", Decimal, Decimal("1.2")),
("2020-08-01T01:05:00-00:00", Timestamp, Timestamp(2020, 8, 1, 1, 5, 0, precision=TimestampPrecision.SECOND)),
('"bar"', str, "bar"),
("bar", IonPySymbol, IonPySymbol("bar", None, None)),
('{{ "foo" }}', IonPyBytes, lambda x: x.ion_type == IonType.CLOB),
("[]", list, []),
# regression test for sexp suppressing bare_values for children
("(31)", IonPyList, lambda x: x.ion_type == IonType.SEXP and type(x[0]) == int),
("foo::31", IonPyInt, lambda x: len(x.ion_annotations) == 1)
)
def test_bare_values(params):
# This function only tests c extension
if not c_ext:
return

ion_text, expected_type, expectation = params

value = simpleion.load_extension(StringIO(ion_text), emit_bare_values=True)

assert type(value) == expected_type
if callable(expectation):
expectation(value)
else:
assert ion_equals(value, expectation)


# See issue https://github.com/amazon-ion/ion-python/issues/232
def test_loads_large_string():
# This function only tests c extension
Expand Down