Skip to content

Commit

Permalink
bitpacking
Browse files Browse the repository at this point in the history
  • Loading branch information
paleolimbot committed Feb 5, 2024
1 parent 181bc0e commit a82b5da
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 2 deletions.
2 changes: 1 addition & 1 deletion python/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def _define_regexes(self):
self.re_func_def = re.compile(
r"\n(static inline )?(?P<const>const )?(struct |enum )?"
r"(?P<return_type>[A-Za-z0-9_*]+) "
r"(?P<name>Arrow[A-Za-z]+)\((?P<args>[^\)]*)\);"
r"(?P<name>Arrow[A-Za-z0-9]+)\((?P<args>[^\)]*)\);"
)
self.re_tagged_type = re.compile(
r"(?P<type>struct|union|enum) (?P<name>Arrow[A-Za-z]+)"
Expand Down
27 changes: 26 additions & 1 deletion python/src/nanoarrow/_lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1544,8 +1544,10 @@ cdef class CBufferBuilder(CBuffer):
return out

def write_values(self, obj):
self._assert_valid()

if self._data_type == NANOARROW_TYPE_BOOL:
raise NotImplementedError()
return self._write_bits(obj)

struct_obj = Struct(self._format)
pack = struct_obj.pack
Expand All @@ -1554,6 +1556,29 @@ cdef class CBufferBuilder(CBuffer):
for item in obj:
write(pack(item))

cdef _write_bits(self, obj):
if self._ptr.size_bytes != 0:
raise NotImplementedError("Append to bitmap that has already been appended to")

cdef char buffer_item = 0
cdef int buffer_item_i = 0
cdef int code
for item in obj:
if item:
buffer_item |= (<char>1 << buffer_item_i)

buffer_item_i += 1
if buffer_item_i == 8:
code = ArrowBufferAppendInt8(self._ptr, buffer_item)
Error.raise_error_not_ok("ArrowBufferAppendInt8()", code)
buffer_item = 0
buffer_item_i = 0

if buffer_item_i != 0:
code = ArrowBufferAppendInt8(self._ptr, buffer_item)
Error.raise_error_not_ok("ArrowBufferAppendInt8()", code)


def finish(self):
return self

Expand Down
19 changes: 19 additions & 0 deletions python/tests/test_c_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,25 @@ def test_c_buffer_from_iterable():
assert list(buffer.data) == [1, 2, 3]


def test_c_buffer_bitmap_from_iterable():
# Check something less than one byte
buffer = c_buffer_from_iterable(na.bool(), [True, False, False, True])
assert "10010000" in repr(buffer)
assert buffer.size_bytes == 1
assert buffer.data.data_type == "bool"
assert buffer.data.element_size_bits == 1

# Check something exactly one byte
buffer = c_buffer_from_iterable(na.bool(), [True, False, False, True] * 2)
assert "10011001" in repr(buffer)
assert buffer.size_bytes == 1

# Check something more than one byte
buffer = c_buffer_from_iterable(na.bool(), [True, False, False, True] * 3)
assert "1001100110010000" in repr(buffer)
assert buffer.size_bytes == 2


def test_c_array_from_pybuffer_uint8():
data = b"abcdefg"
c_array = c_array_from_pybuffer(data)
Expand Down

0 comments on commit a82b5da

Please sign in to comment.