diff --git a/Cargo.lock b/Cargo.lock index 2a00eef0141..e3b8891da2c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2441,6 +2441,15 @@ dependencies = [ "icu_benchmark_macros", ] +[[package]] +name = "yoke" +version = "0.1.0" +dependencies = [ + "bincode", + "serde", + "zerovec", +] + [[package]] name = "zerovec" version = "0.2.0" diff --git a/Cargo.toml b/Cargo.toml index 539f4985679..50bad3f890d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ members = [ "utils/litemap", "utils/pattern", "utils/writeable", + "utils/yoke", "utils/zerovec", ] diff --git a/utils/yoke/Cargo.toml b/utils/yoke/Cargo.toml new file mode 100644 index 00000000000..c2789027ac7 --- /dev/null +++ b/utils/yoke/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "yoke" +version = "0.1.0" +authors = ["Manish Goregaokar "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +zerovec = { path = "../zerovec/", version = "0.2.0", optional = true} + +[dev-dependencies] +bincode = "1.3.3" +serde = "1.0.125" \ No newline at end of file diff --git a/utils/yoke/src/cart.rs b/utils/yoke/src/cart.rs new file mode 100644 index 00000000000..6931d7d1b6a --- /dev/null +++ b/utils/yoke/src/cart.rs @@ -0,0 +1,108 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::{Yoke, Yokeable}; +use std::rc::Rc; +use std::sync::Arc; + +/// A [`Cart`] is a type that is acceptable as backing storage in a [`Yoke`]. +/// +/// The essential invariant that must be maintained by implementors is that `Self::Inner` references +/// obtained from this type via `Self::get_inner()` must continue to be valid for the +/// duration of this type provided that this type is never accessed via `&mut` references. +/// +/// For example, `Vec` is a valid [`Cart`], however `Vec>` is not, because +/// in the latter type it is possible to use interior mutation to change the data. +/// +/// In general, this means "no interior mutability", though interior mutability can be fine +/// if the interior mutability is to something that does not affect references. For example, +/// `Rc<[u8]>` does have interior mutability in the refcount, but you cannot get references +/// to it, so it's fine. On the other hand, `Weak>` cannot be a valid [`Cart`] because +/// it is possible for the backing buffer to be cleaned up without warning. +/// +/// Common [`Cart`] types to use with [`Yoke`] are ones wrapping `[u8]` or `str` +/// (`Box<[u8]>`, `Rc<[u8]>`, etc) since those are typical inputs to zero-copy +/// deserialization and parsing. +/// +/// Typically the [`Cart`] trait will be implemented by smart pointers whilst [`Cartable`] is used +/// as a helper to talk about types that have the same property provided they are not moved, sucn that +/// [`Cart`] can be implemented on wrappers that can maintain this property even if they are moved. +pub unsafe trait Cart { + type Inner: ?Sized; + /// Get the inner data + fn get_inner(&self) -> &Self::Inner; +} + +/// [`Cartable`] is a helper trait for implementng [`Cart`]. It has a similar invariant: +/// all references obtained from this type must continue to be valid for the lifetime +/// of this type provided that this type is never moved or accessed via an `&mut` reference. +/// +/// Essentially, this means "no interior mutability", however interior mutability which cannot be +/// seen from the outside is fine. +pub unsafe trait Cartable {} + +unsafe impl Cart for Rc { + type Inner = T; + fn get_inner(&self) -> &Self::Inner { + &**self + } +} + +unsafe impl Cart for Arc { + type Inner = T; + fn get_inner(&self) -> &Self::Inner { + &**self + } +} + +unsafe impl Cart for Box { + type Inner = T; + fn get_inner(&self) -> &Self::Inner { + &**self + } +} + +unsafe impl Cart for Vec { + type Inner = [T]; + fn get_inner(&self) -> &Self::Inner { + &**self + } +} + +unsafe impl Cart for String { + type Inner = str; + fn get_inner(&self) -> &Self::Inner { + &**self + } +} + +unsafe impl Cartable for [T] {} +unsafe impl Cartable for Option {} +unsafe impl Cartable for str {} +unsafe impl Cartable for String {} +unsafe impl Cartable for bool {} +unsafe impl Cartable for char {} +unsafe impl Cartable for u8 {} +unsafe impl Cartable for u16 {} +unsafe impl Cartable for u32 {} +unsafe impl Cartable for u64 {} +unsafe impl Cartable for u128 {} +unsafe impl Cartable for i8 {} +unsafe impl Cartable for i16 {} +unsafe impl Cartable for i32 {} +unsafe impl Cartable for i64 {} +unsafe impl Cartable for i128 {} + +/// This is a neat implementation; it allows one to build certain kinds of +/// caches by nesting [`Yoke`]s where both the [`Cart`] and the parsed [`Yokeable`] +/// are cached. +/// +/// Essentially, this allows the construction of the type +/// `Yoke>, Y>` and `Weak>`: the `Weak` can be stored +/// in your cache, whereas the `Yoke, Y>` is passed around. The cache entry +/// will automatically clean (most of) itself up when all `Rc`s go out of scope. +/// +/// The resultant [`Yoke`] type is a bit more complicated but it's not less efficient +/// since all [`Yoke`] operations except the destructor ignore the [`Cart`]. +unsafe impl Yokeable<'a>, C: Cart> Cartable for Yoke {} diff --git a/utils/yoke/src/lib.rs b/utils/yoke/src/lib.rs new file mode 100644 index 00000000000..937cf1331b9 --- /dev/null +++ b/utils/yoke/src/lib.rs @@ -0,0 +1,11 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +mod cart; +mod yoke; +mod yokeable; + +pub use crate::cart::{Cart, Cartable}; +pub use crate::yoke::Yoke; +pub use crate::yokeable::Yokeable; diff --git a/utils/yoke/src/yoke.rs b/utils/yoke/src/yoke.rs new file mode 100644 index 00000000000..03c9b6dd625 --- /dev/null +++ b/utils/yoke/src/yoke.rs @@ -0,0 +1,151 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::Cart; +use crate::Cartable; +use crate::Yokeable; +use std::rc::Rc; +use std::sync::Arc; + +/// A Cow-like borrowed object "yoked" to its backing data. +/// +/// This allows things like zero copy deserialized data to carry around +/// shared references to their backing buffer. +/// +/// `Y` (the [`Yokeable`]) is the object containing the references, +/// and will typically be of the form `Foo<'static>`. The `'static` is +/// not the actual lifetime of the data, rather it is a convenient way to erase +/// the lifetime and make it dynamic. +/// +/// `C` is the "cart", which `Y` may contain references to. A [`Yoke`] can be constructed +/// with such references using [`Yoke::attach_to_cart()`]. +/// +/// # Example +/// +/// For example, we can use this to store zero-copy deserialized data in a cache: +/// +/// ```rust +/// # use yoke::{Yoke, Yokeable}; +/// # use std::rc::Rc; +/// # use std::borrow::Cow; +/// # fn load_from_cache(_filename: &str) -> Rc<[u8]> { +/// # // dummy implementation +/// # Rc::new([0x68, 0x65, 0x6c, 0x6c, 0x6f]) +/// # } +/// +/// fn load_object(filename: &str) -> Yoke, Rc<[u8]>> { +/// let rc: Rc<[u8]> = load_from_cache(filename); +/// Yoke::, Rc<[u8]>>::attach_to_cart_but_worse(rc, |data: &[u8]| { +/// bincode::deserialize_from(data).unwrap() +/// }) +/// } +/// ``` +/// +pub struct Yoke Yokeable<'a>, C: Cart> { + // must be the first field for drop order + // this will have a 'static lifetime parameter, that parameter is a lie + yokeable: Y, + cart: C, +} + +impl Yokeable<'a>, C: Cart> Yoke { + /// Construct a new [`Yoke`] from static data. There will be no + /// references to `cart` here, this is good for e.g. constructing fully owned + /// [`Yoke`]s with no internal borrowing. + pub fn new(cart: C, yokeable: Y) -> Self { + Self { yokeable, cart } + } + + /// Obtain a valid reference to the yokeable data + /// + /// This essentially transforms the lifetime of the internal yokeable data to + /// be valid. + /// For example, if you're working with a `Yoke, C>`, this + /// will return an `&'a Cow<'a, T>` + pub fn get<'a>(&'a self) -> &'a >::Output { + self.yokeable.transform() + } + + /// Get a reference to the backing cart. + pub fn backing_cart(&self) -> &C { + &self.cart + } + + pub fn with_mut<'a, F>(&'a mut self, f: F) + where + F: 'static + for<'b> FnOnce(&'b mut >::Output), + { + self.yokeable.with_mut(f) + } + + pub fn attach_to_cart(cart: C, f: F) -> Self + where + F: for<'de> FnOnce(&'de C::Inner) -> >::Output, + { + let deserialized = f(cart.get_inner()); + Self { + yokeable: unsafe { Y::make(deserialized) }, + cart, + } + } + + /// Temporary version of attach_to_cart that doesn't hit https://github.com/rust-lang/rust/issues/84937 + pub fn attach_to_cart_but_worse( + cart: C, + f: for<'de> fn(&'de C::Inner) -> >::Output, + ) -> Self { + let deserialized = f(cart.get_inner()); + Self { + yokeable: unsafe { Y::make(deserialized) }, + cart, + } + } +} + +// clone impls only work for reference counted objects, otherwise you should be +// cloning `backing_cart()` and reusing `attach_to_cart()` +impl Yokeable<'a>, T: Cartable + ?Sized> Clone for Yoke> +where + for<'a> >::Output: Clone, +{ + fn clone(&self) -> Self { + Yoke { + yokeable: unsafe { Y::make(self.get().clone()) }, + cart: self.cart.clone(), + } + } +} + +impl Yokeable<'a>, T: Cartable + ?Sized> Clone for Yoke> +where + for<'a> >::Output: Clone, +{ + fn clone(&self) -> Self { + Yoke { + yokeable: unsafe { Y::make(self.get().clone()) }, + cart: self.cart.clone(), + } + } +} + +// #[test] +// // See https://github.com/rust-lang/rust/issues/84937 +// fn this_test_is_broken() { +// use crate::{Yoke, Yokeable}; +// use std::borrow::Cow; +// use std::rc::Rc; +// fn load_from_cache(_filename: &str) -> Rc<[u8]> { +// // dummy implementation +// Rc::new([0x68, 0x65, 0x6c, 0x6c, 0x6f]) +// } + +// fn load_object(filename: &str) -> Yoke, Rc<[u8]>> { +// let rc: Rc<[u8]> = load_from_cache(filename); +// Yoke::, Rc<[u8]>>::attach_to_cart(rc, deserialize); +// unimplemented!() +// } +// fn deserialize<'d>(data: &'d [u8]) -> as Yokeable<'d>>::Output { +// bincode::deserialize_from(data).unwrap() +// } +// } diff --git a/utils/yoke/src/yokeable.rs b/utils/yoke/src/yokeable.rs new file mode 100644 index 00000000000..c69013c64b6 --- /dev/null +++ b/utils/yoke/src/yokeable.rs @@ -0,0 +1,179 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use std::borrow::{Cow, ToOwned}; +use std::mem; + +/// A [`Yokeable`] type is essentially one with a covariant lifetime parameter, +/// matched to the parameter in the trait definition. The trait allows one to cast +/// the covariant lifetime to and from `'static`. +/// +/// While Rust does not yet have GAT syntax, for the purpose of this documentation +/// we shall refer to "`Self` with a lifetime `'a`" with the syntax `Self<'a>`. +/// +/// [`Yokeable`] exposes ways to cast between `Self<'static>` and `Self<'a>` generically. +/// This is useful for turning covariant lifetimes to _dynamic_ lifetimes, where `'static` is +/// used as a way to "erase" the lifetime. +/// +/// # Implementation safety +/// +/// This trait is safe to implement on types with a _covariant_ lifetime parameter, i.e. one where +/// [`Self::transform()`]'s body can simply be `{ self }`. This will occur when the lifetime +/// parameter is used within references, but not in the arguments of function pointers or in mutable +/// positions (either in `&mut` or via interior mutability) +/// +/// This trait must be implemented on the `'static` version of such a type, e.g. one should +/// implement `Yokeable<'a>` (for all `'a`) on `Cow<'static, T>`. +/// +/// There are further constraints on implementation safety on individual methods. +pub unsafe trait Yokeable<'a>: 'static { + /// This type MUST be `Self` with the `'static` replaced with `'a`, i.e. `Self<'a>` + type Output: 'a + Sized; + + /// This method must cast `self` between `&'a Self<'static>` and `&'a Self<'a>`. + /// + /// # Implementation safety + /// + /// If the invariants of [`Yokeable`] are being satisfied, the body of this method + /// should simply be `{ self }`, though it's acceptable to include additional assertions + /// if desired. + fn transform(&'a self) -> &'a Self::Output; + + /// This method can be used to cast away `Self<'a>`'s lifetime. + /// + /// # Safety + /// + /// The returned value must be destroyed before the data `from` was borrowing from is. + /// + /// # Implementation safety + /// + /// A safe implementation of this method must be equivalent to a transmute between + /// `Self<'a>` and `Self<'static>` + unsafe fn make(from: Self::Output) -> Self; + + /// This method must cast `self` between `&'a mut Self<'static>` and `&'a mut Self<'a>`, + /// and pass it to `f`. + /// + /// # Implementation safety + /// + /// A safe implementation of this method must be equivalent to a pointer cast/transmute between + /// `&mut Self<'a>` and `&mut Self<'static>` being passed to `f` + /// + /// # Why is this safe? + /// + /// Typically covariant lifetimes become invariant when hidden behind an `&mut`, + /// which is why the implementation of this method cannot just be `f(self)`. + /// The reason behind this is that while _reading_ a covariant lifetime that has been cast to a shorter + /// one is always safe (this is roughly the definition of a covariant lifetime), writing + /// may not necessarily be safe since you could write a smaller reference to it. For example, + /// the following code is unsound because it manages to stuff a `'a` lifetime into a `Cow<'static>` + /// + /// ```rust,compile_fail + /// # use std::borrow::Cow; + /// # use yoke::Yokeable; + /// struct Foo { + /// str: String, + /// cow: Cow<'static, str>, + /// } + /// + /// fn unsound<'a>(foo: &'a mut Foo) { + /// let a: &str = &foo.str; + /// foo.cow.with_mut(|cow| *cow = Cow::Borrowed(a)); + /// } + /// ``` + /// + /// However, this code will not compile because [`Yokeable::with_mut()`] requires `F: 'static`. + /// This enforces that while `F` may mutate `Self<'a>`, it can only mutate it in a way that does + /// not insert additional references. For example, `F` may call `to_owned()` on a `Cow` and mutate it, + /// but it cannot insert a new _borrowed_ reference because it has nowhere to borrow _from_ -- + /// `f` does not contain any borrowed references, and while we give it `Self<'a>` (which contains borrowed + /// data), that borrowed data is known to be valid + /// + /// Note that the `for<'b>` is also necessary, otherwise the following code would compile: + /// + /// ```rust,compile_fail + /// # use std::borrow::Cow; + /// # use yoke::Yokeable; + /// # use std::mem; + /// + /// // also safely implements Yokeable<'a> + /// struct Bar<'a> { + /// num: u8, + /// cow: Cow<'a, u8>, + /// } + /// + /// fn unsound<'a>(bar: &'a mut Bar<'static>) { + /// bar.with_mut(move |bar| bar.cow = Cow::Borrowed(&bar.num)); + /// } + /// + /// # unsafe impl<'a> Yokeable<'a> for Bar<'static> { + /// # type Output = Bar<'a>; + /// # fn transform(&'a self) -> &'a Bar<'a> { + /// # self + /// # } + /// # + /// # unsafe fn make(from: Bar<'a>) -> Self { + /// # let ret = mem::transmute_copy(&from); + /// # mem::forget(from); + /// # ret + /// # } + /// # + /// # fn with_mut(&'a mut self, f: F) + /// # where + /// # F: 'static + FnOnce(&'a mut Self::Output), + /// # { + /// # unsafe { f(mem::transmute(self)) } + /// # } + /// # } + /// ``` + /// + /// which is unsound because `bar` could be moved later, and we do not want to be able to + /// self-insert references to it. + /// + /// Thus the only types of mutations allowed are ones that move around already-borrowed data, or + /// introduce new owned data: + /// + /// ```rust + /// # use std::borrow::Cow; + /// # use yoke::Yokeable; + /// struct Foo { + /// str: String, + /// cow: Cow<'static, str>, + /// } + /// + /// fn sound<'a>(foo: &'a mut Foo) { + /// foo.cow.with_mut(move |cow| cow.to_mut().push('a')); + /// } + /// ``` + fn with_mut(&'a mut self, f: F) + where + F: 'static + for<'b> FnOnce(&'b mut Self::Output); +} + +unsafe impl<'a, T: 'static + ToOwned + ?Sized> Yokeable<'a> for Cow<'static, T> +where + ::Owned: Sized, +{ + type Output = Cow<'a, T>; + fn transform(&'a self) -> &'a Cow<'a, T> { + self + } + + unsafe fn make(from: Cow<'a, T>) -> Self { + debug_assert!(mem::size_of::>() == mem::size_of::()); + // i hate this + // unfortunately Rust doesn't think `mem::transmute` is possible since it's not sure the sizes + // are the same + let ret = mem::transmute_copy(&from); + mem::forget(from); + ret + } + + fn with_mut(&'a mut self, f: F) + where + F: 'static + for<'b> FnOnce(&'b mut Self::Output), + { + unsafe { f(mem::transmute(self)) } + } +}