diff --git a/python/bootstrap.py b/python/bootstrap.py index df1a609c1..f1c9fbee1 100644 --- a/python/bootstrap.py +++ b/python/bootstrap.py @@ -95,7 +95,7 @@ def _define_regexes(self): self.re_func_def = re.compile( r"\n(static inline )?(?Pconst )?(struct |enum )?" r"(?P[A-Za-z0-9_*]+) " - r"(?PArrow[A-Za-z]+)\((?P[^\)]*)\);" + r"(?PArrow[A-Za-z0-9]+)\((?P[^\)]*)\);" ) self.re_tagged_type = re.compile( r"(?Pstruct|union|enum) (?PArrow[A-Za-z]+)" diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx index a79fd3416..1ac9778f4 100644 --- a/python/src/nanoarrow/_lib.pyx +++ b/python/src/nanoarrow/_lib.pyx @@ -1544,8 +1544,10 @@ cdef class CBufferBuilder(CBuffer): return out def write_values(self, obj): + self._assert_valid() + if self._data_type == NANOARROW_TYPE_BOOL: - raise NotImplementedError() + return self._write_bits(obj) struct_obj = Struct(self._format) pack = struct_obj.pack @@ -1554,6 +1556,29 @@ cdef class CBufferBuilder(CBuffer): for item in obj: write(pack(item)) + cdef _write_bits(self, obj): + if self._ptr.size_bytes != 0: + raise NotImplementedError("Append to bitmap that has already been appended to") + + cdef char buffer_item = 0 + cdef int buffer_item_i = 0 + cdef int code + for item in obj: + if item: + buffer_item |= (1 << buffer_item_i) + + buffer_item_i += 1 + if buffer_item_i == 8: + code = ArrowBufferAppendInt8(self._ptr, buffer_item) + Error.raise_error_not_ok("ArrowBufferAppendInt8()", code) + buffer_item = 0 + buffer_item_i = 0 + + if buffer_item_i != 0: + code = ArrowBufferAppendInt8(self._ptr, buffer_item) + Error.raise_error_not_ok("ArrowBufferAppendInt8()", code) + + def finish(self): return self diff --git a/python/tests/test_c_lib.py b/python/tests/test_c_lib.py index e201e19f3..d4d486f81 100644 --- a/python/tests/test_c_lib.py +++ b/python/tests/test_c_lib.py @@ -189,6 +189,25 @@ def test_c_buffer_from_iterable(): assert list(buffer.data) == [1, 2, 3] +def test_c_buffer_bitmap_from_iterable(): + # Check something less than one byte + buffer = c_buffer_from_iterable(na.bool(), [True, False, False, True]) + assert "10010000" in repr(buffer) + assert buffer.size_bytes == 1 + assert buffer.data.data_type == "bool" + assert buffer.data.element_size_bits == 1 + + # Check something exactly one byte + buffer = c_buffer_from_iterable(na.bool(), [True, False, False, True] * 2) + assert "10011001" in repr(buffer) + assert buffer.size_bytes == 1 + + # Check something more than one byte + buffer = c_buffer_from_iterable(na.bool(), [True, False, False, True] * 3) + assert "1001100110010000" in repr(buffer) + assert buffer.size_bytes == 2 + + def test_c_array_from_pybuffer_uint8(): data = b"abcdefg" c_array = c_array_from_pybuffer(data)