From e7b0f8bff681b83f726b00d5a69c8dee5ccfcbe5 Mon Sep 17 00:00:00 2001 From: GnomedDev Date: Fri, 11 Oct 2024 09:43:14 +0100 Subject: [PATCH 1/2] Add ThinVec::extract_if --- Cargo.toml | 6 ++- src/lib.rs | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 148 insertions(+), 10 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0d74561..5d6d29d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,17 +10,19 @@ homepage = "https://github.com/gankra/thin-vec" readme = "README.md" [features] -unstable = [] default = ["std"] std = [] +# Enables APIs that are not currently stable for `Vec`, such as `extract_if`. +unstable = [] + # Gecko specific features. These features cause thin-vec to have the same layout # and behaviour as nsTArray, allowing it to be used in C++ FFI. Requires # the empty header to be statically linked in with the symbol name "sEmptyTArrayHeader" gecko-ffi = [] [dependencies] -serde = {version = "1.0", optional = true} +serde = { version = "1.0", optional = true } [dev-dependencies] serde_test = "1.0" diff --git a/src/lib.rs b/src/lib.rs index 6616514..5150ceb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1505,6 +1505,76 @@ impl ThinVec { } } + /// Creates an iterator which uses a closure to determine if an element should be removed. + /// + /// If the closure returns true, then the element is removed and yielded. + /// If the closure returns false, the element will remain in the vector and will not be yielded + /// by the iterator. + /// + /// If the returned `ExtractIf` is not exhausted, e.g. because it is dropped without iterating + /// or the iteration short-circuits, then the remaining elements will be retained. + /// Use [`ThinVec::retain`] with a negated predicate if you do not need the returned iterator. + /// + /// Using this method is equivalent to the following code: + /// + /// ``` + /// # use thin_vec::{ThinVec, thin_vec}; + /// # let some_predicate = |x: &mut i32| { *x == 2 || *x == 3 || *x == 6 }; + /// # let mut vec = thin_vec![1, 2, 3, 4, 5, 6]; + /// let mut i = 0; + /// while i < vec.len() { + /// if some_predicate(&mut vec[i]) { + /// let val = vec.remove(i); + /// // your code here + /// } else { + /// i += 1; + /// } + /// } + /// + /// # assert_eq!(vec, thin_vec![1, 4, 5]); + /// ``` + /// + /// But `extract_if` is easier to use. `extract_if` is also more efficient, + /// because it can backshift the elements of the array in bulk. + /// + /// Note that `extract_if` also lets you mutate every element in the filter closure, + /// regardless of whether you choose to keep or remove it. + /// + /// # Examples + /// + /// Splitting an array into evens and odds, reusing the original allocation: + /// + /// ``` + /// use thin_vec::{ThinVec, thin_vec}; + /// + /// let mut numbers = thin_vec![1, 2, 3, 4, 5, 6, 8, 9, 11, 13, 14, 15]; + /// + /// let evens = numbers.extract_if(|x| *x % 2 == 0).collect::>(); + /// let odds = numbers; + /// + /// assert_eq!(evens, thin_vec![2, 4, 6, 8, 14]); + /// assert_eq!(odds, thin_vec![1, 3, 5, 9, 11, 13, 15]); + /// ``` + #[cfg(feature = "unstable")] + pub fn extract_if(&mut self, filter: F) -> ExtractIf<'_, T, F> + where + F: FnMut(&mut T) -> bool, + { + let old_len = self.len(); + // Guard against us getting leaked (leak amplification) + unsafe { + self.set_len(0); + } + + ExtractIf { + vec: self, + idx: 0, + del: 0, + old_len, + pred: filter, + } + } + /// Resize the buffer and update its capacity, without changing the length. /// Unsafe because it can cause length to be greater than capacity. unsafe fn reallocate(&mut self, new_cap: usize) { @@ -2261,10 +2331,6 @@ impl ExactSizeIterator for IntoIter {} impl core::iter::FusedIterator for IntoIter {} -// SAFETY: the length calculation is trivial, we're an array! And if it's wrong we're So Screwed. -#[cfg(feature = "unstable")] -unsafe impl core::iter::TrustedLen for IntoIter {} - impl Drop for IntoIter { #[inline] fn drop(&mut self) { @@ -2423,10 +2489,6 @@ impl<'a, T> DoubleEndedIterator for Drain<'a, T> { impl<'a, T> ExactSizeIterator for Drain<'a, T> {} -// SAFETY: we need to keep track of this perfectly Or Else anyway! -#[cfg(feature = "unstable")] -unsafe impl core::iter::TrustedLen for Drain<'_, T> {} - impl core::iter::FusedIterator for Drain<'_, T> {} impl<'a, T> Drop for Drain<'a, T> { @@ -2611,6 +2673,80 @@ impl Drain<'_, T> { } } +/// An iterator for [`ThinVec`] which uses a closure to determine if an element should be removed. +#[must_use = "iterators are lazy and do nothing unless consumed"] +#[cfg(feature = "unstable")] +pub struct ExtractIf<'a, T, F> { + vec: &'a mut ThinVec, + /// The index of the item that will be inspected by the next call to `next`. + idx: usize, + /// The number of items that have been drained (removed) thus far. + del: usize, + /// The original length of `vec` prior to draining. + old_len: usize, + /// The filter test predicate. + pred: F, +} + +#[cfg(feature = "unstable")] +impl Iterator for ExtractIf<'_, T, F> +where + F: FnMut(&mut T) -> bool, +{ + type Item = T; + + fn next(&mut self) -> Option { + unsafe { + while self.idx < self.old_len { + let i = self.idx; + let v = slice::from_raw_parts_mut(self.vec.as_mut_ptr(), self.old_len); + let drained = (self.pred)(&mut v[i]); + // Update the index *after* the predicate is called. If the index + // is updated prior and the predicate panics, the element at this + // index would be leaked. + self.idx += 1; + if drained { + self.del += 1; + return Some(ptr::read(&v[i])); + } else if self.del > 0 { + let del = self.del; + let src: *const T = &v[i]; + let dst: *mut T = &mut v[i - del]; + ptr::copy_nonoverlapping(src, dst, 1); + } + } + None + } + } + + fn size_hint(&self) -> (usize, Option) { + (0, Some(self.old_len - self.idx)) + } +} + +#[cfg(feature = "unstable")] +impl Drop for ExtractIf<'_, A, F> { + fn drop(&mut self) { + unsafe { + if self.idx < self.old_len && self.del > 0 { + // This is a pretty messed up state, and there isn't really an + // obviously right thing to do. We don't want to keep trying + // to execute `pred`, so we just backshift all the unprocessed + // elements and tell the vec that they still exist. The backshift + // is required to prevent a double-drop of the last successfully + // drained item prior to a panic in the predicate. + let ptr = self.vec.as_mut_ptr(); + let src = ptr.add(self.idx); + let dst = src.sub(self.del); + let tail_len = self.old_len - self.idx; + src.copy_to(dst, tail_len); + } + + self.vec.set_len(self.old_len - self.del); + } + } +} + /// Write is implemented for `ThinVec` by appending to the vector. /// The vector will grow as needed. /// This implementation is identical to the one for `Vec`. From e7d04f7e2bf369392f27f321a70699a1941e2e78 Mon Sep 17 00:00:00 2001 From: GnomedDev Date: Fri, 11 Oct 2024 09:44:17 +0100 Subject: [PATCH 2/2] Update CI --- .github/workflows/rust.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 54e4fba..7f33baa 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -33,8 +33,8 @@ jobs: run: cargo build --verbose - name: Run tests run: cargo test --verbose - - name: Run tests - run: cargo test --verbose + - name: Run tests (unstable) + run: cargo test --features=unstable --verbose - name: Run tests (serde) run: cargo test --features=serde --verbose - name: Run tests (gecko-ffi)