Skip to content

Commit 91d1abb

Browse files
committed
Optimize PrimitiveBuilder::append_trusted_len_iter
1 parent 8a77bce commit 91d1abb

File tree

3 files changed

+53
-21
lines changed

3 files changed

+53
-21
lines changed

arrow-array/src/builder/primitive_builder.rs

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -284,14 +284,10 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
284284
/// the iterator implement `TrustedLen` once that is stabilized.
285285
#[inline]
286286
pub unsafe fn append_trusted_len_iter(&mut self, iter: impl IntoIterator<Item = T::Native>) {
287-
let iter = iter.into_iter();
288-
let len = iter
289-
.size_hint()
290-
.1
291-
.expect("append_trusted_len_iter requires an upper bound");
292-
293-
self.null_buffer_builder.append_n_non_nulls(len);
287+
let starting_len = self.len();
294288
self.values_builder.append_trusted_len_iter(iter);
289+
self.null_buffer_builder
290+
.append_n_non_nulls(self.len() - starting_len);
295291
}
296292

297293
/// Builds the [`PrimitiveArray`] and reset this builder.

arrow-buffer/src/buffer/mutable.rs

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -543,8 +543,42 @@ impl MutableBuffer {
543543
iterator.for_each(|item| self.push(item));
544544
}
545545

546+
/// Extends a [`MutableBuffer`] from an [`Iterator`] with a trusted (upper) length.
547+
///
548+
/// See [`MutableBuffer::from_trusted_len_iter`] for more details.
549+
///
550+
/// # Safety
551+
/// This method assumes that the iterator's size is correct and is undefined
552+
/// behavior to use it on an iterator that reports an incorrect length.
553+
#[inline]
554+
pub unsafe fn extend_from_trusted_len_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
555+
&mut self,
556+
iterator: I,
557+
) {
558+
let item_size = std::mem::size_of::<T>();
559+
let (lower, _) = iterator.size_hint();
560+
let additional = lower * item_size;
561+
self.reserve(additional);
562+
563+
// this is necessary because of https://github.com/rust-lang/rust/issues/32155
564+
let mut dst = unsafe { self.data.as_ptr().add(self.len) };
565+
for item in iterator {
566+
// note how there is no reserve here (compared with `extend_from_iter`)
567+
let src = item.to_byte_slice().as_ptr();
568+
std::ptr::copy_nonoverlapping(src, dst, item_size);
569+
dst = dst.add(item_size);
570+
}
571+
self.len += additional;
572+
assert_eq!(
573+
dst.offset_from(self.data.as_ptr()) as usize,
574+
self.len,
575+
"Trusted iterator length was not accurately reported"
576+
);
577+
}
578+
546579
/// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted (upper) length.
547-
/// Prefer this to `collect` whenever possible, as it is faster ~60% faster.
580+
/// Prefer this to `collect` whenever possible, as it is ~60% faster.
581+
///
548582
/// # Example
549583
/// ```
550584
/// # use arrow_buffer::buffer::MutableBuffer;
@@ -554,16 +588,18 @@ impl MutableBuffer {
554588
/// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
555589
/// ```
556590
/// # Safety
557-
/// This method assumes that the iterator's size is correct and is undefined behavior
558-
/// to use it on an iterator that reports an incorrect length.
559-
// This implementation is required for two reasons:
560-
// 1. there is no trait `TrustedLen` in stable rust and therefore
561-
// we can't specialize `extend` for `TrustedLen` like `Vec` does.
562-
// 2. `from_trusted_len_iter` is faster.
591+
/// This method assumes that the iterator's size is correct and is undefined
592+
/// behavior to use it on an iterator that reports an incorrect length.
593+
///
594+
/// This implementation is required for two reasons:
595+
/// 1. there is no trait `TrustedLen` in stable Rust and therefore
596+
/// we can't specialize `extend` for `TrustedLen` like `Vec` does.
597+
/// 2. `from_trusted_len_iter` is faster.
563598
#[inline]
564599
pub unsafe fn from_trusted_len_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
565600
iterator: I,
566601
) -> Self {
602+
// TODO: reduce duplication with `extend_from_trusted_len_iter`
567603
let item_size = std::mem::size_of::<T>();
568604
let (_, upper) = iterator.size_hint();
569605
let upper = upper.expect("from_trusted_len_iter requires an upper limit");

arrow-buffer/src/builder/mod.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -330,13 +330,13 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
330330
/// the iterator implement `TrustedLen` once that is stabilized.
331331
#[inline]
332332
pub unsafe fn append_trusted_len_iter(&mut self, iter: impl IntoIterator<Item = T>) {
333-
let iter = iter.into_iter();
334-
let len = iter
335-
.size_hint()
336-
.1
337-
.expect("append_trusted_len_iter expects upper bound");
338-
self.reserve(len);
339-
self.extend(iter);
333+
let starting_buffer_len = self.len;
334+
debug_assert_eq!(
335+
starting_buffer_len,
336+
self.buffer.len() / std::mem::size_of::<T>()
337+
);
338+
self.buffer.extend_from_trusted_len_iter(iter.into_iter());
339+
self.len = self.buffer.len() / std::mem::size_of::<T>();
340340
}
341341

342342
/// Resets this builder and returns an immutable [Buffer].

0 commit comments

Comments
 (0)