From df6b56fd463bc336f82a972ac0b3755d2b250abf Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Mon, 8 Nov 2021 16:19:29 +0000 Subject: [PATCH] Add support for hashing from a reader fixes #141 --- derive/Cargo.toml | 2 +- derive/src/lib.rs | 2 +- derive/src/multihash.rs | 58 ++++++++++++++++++++++++++++++++++++---- derive/src/utils.rs | 2 +- examples/custom_table.rs | 15 +++++++++++ src/hasher.rs | 8 +++++- src/multihash.rs | 26 ++++++++++++++++-- src/multihash_impl.rs | 3 ++- tests/lib.rs | 7 +++++ 9 files changed, 111 insertions(+), 12 deletions(-) diff --git a/derive/Cargo.toml b/derive/Cargo.toml index bb3b4871..11153593 100644 --- a/derive/Cargo.toml +++ b/derive/Cargo.toml @@ -21,7 +21,7 @@ synstructure = "0.12.4" [features] default = ["std"] -std = [] +std = ["multihash/std"] [dev-dependencies] pretty_assertions = "1.0.0" diff --git a/derive/src/lib.rs b/derive/src/lib.rs index a9c633ec..8508f934 100644 --- a/derive/src/lib.rs +++ b/derive/src/lib.rs @@ -22,7 +22,7 @@ //! use multihash::MultihashDigest; //! //! #[derive(Clone, Copy, Debug, Eq, Multihash, PartialEq)] -//! #[mh(alloc_size = 64)] +//! #[mh(alloc_size = 64, io_path = ::std::io)] //! pub enum Code { //! #[mh(code = 0x01, hasher = multihash::Sha2_256)] //! Foo, diff --git a/derive/src/multihash.rs b/derive/src/multihash.rs index 0b23c97f..269c74d7 100644 --- a/derive/src/multihash.rs +++ b/derive/src/multihash.rs @@ -17,6 +17,7 @@ mod kw { custom_keyword!(hasher); custom_keyword!(mh); custom_keyword!(alloc_size); + custom_keyword!(io_path); } /// Attributes for the enum items. @@ -42,12 +43,15 @@ impl Parse for MhAttr { #[derive(Debug)] enum DeriveAttr { AllocSize(utils::Attr), + IoPath(utils::Attr), } impl Parse for DeriveAttr { fn parse(input: ParseStream) -> syn::Result { if input.peek(kw::alloc_size) { Ok(Self::AllocSize(input.parse()?)) + } else if input.peek(kw::io_path) { + Ok(Self::IoPath(input.parse()?)) } else { Err(syn::Error::new(input.span(), "unknown attribute")) } @@ -89,6 +93,17 @@ impl Hash { Multihash::wrap(#code, hasher.finalize()).unwrap() }) } + + fn code_reader(&self) -> TokenStream { + let ident = &self.ident; + let hasher = &self.hasher; + let code = &self.code; + quote!(Self::#ident => { + let mut hasher = #hasher::default(); + io::copy(reader, &mut hasher)?; + Ok(Multihash::wrap(#code, hasher.finalize()).unwrap()) + }) + } } impl<'a> From<&'a VariantInfo<'a>> for Hash { @@ -134,9 +149,11 @@ impl<'a> From<&'a VariantInfo<'a>> for Hash { /// /// Returns the `alloc_size` and whether errors regarding to `alloc_size` should be reported or not. #[allow(dead_code)] // TODO -fn parse_code_enum_attrs(ast: &syn::DeriveInput) -> syn::LitInt { +fn parse_code_enum_attrs(ast: &syn::DeriveInput) -> (syn::LitInt, syn::Path) { let mut alloc_size = None; + let mut io_path = syn::parse_quote!(::std::io); + for attr in &ast.attrs { let derive_attrs: Result, _> = syn::parse2(attr.tokens.clone()); if let Ok(derive_attrs) = derive_attrs { @@ -145,12 +162,15 @@ fn parse_code_enum_attrs(ast: &syn::DeriveInput) -> syn::LitInt { DeriveAttr::AllocSize(alloc_size_attr) => { alloc_size = Some(alloc_size_attr.value) } + DeriveAttr::IoPath(io_path_attr) => { + io_path = io_path_attr.value; + } } } } } match alloc_size { - Some(alloc_size) => alloc_size, + Some(alloc_size) => (alloc_size, io_path), None => { let msg = "enum is missing `alloc_size` attribute: e.g. #[mh(alloc_size = 64)]"; #[cfg(test)] @@ -206,7 +226,7 @@ pub fn multihash(s: Structure) -> TokenStream { } }; let code_enum = &s.ast().ident; - let alloc_size = parse_code_enum_attrs(s.ast()); + let (alloc_size, io_path) = parse_code_enum_attrs(s.ast()); let hashes: Vec<_> = s.variants().iter().map(Hash::from).collect(); error_code_duplicates(&hashes); @@ -218,6 +238,7 @@ pub fn multihash(s: Structure) -> TokenStream { let code_into_u64 = hashes.iter().map(|h| h.code_into_u64(¶ms)); let code_from_u64 = hashes.iter().map(|h| h.code_from_u64()); let code_digest = hashes.iter().map(|h| h.code_digest()); + let code_reader = hashes.iter().map(|h| h.code_reader()); quote! { /// A Multihash with the same allocated size as the Multihashes produces by this derive. @@ -232,6 +253,15 @@ pub fn multihash(s: Structure) -> TokenStream { } } + fn digest_reader(&self, reader: &mut R) -> #io_path::Result { + use #io_path; + use #mh_crate::Hasher; + match self { + #(#code_reader,)* + _ => unreachable!(), + } + } + fn wrap(&self, digest: &[u8]) -> Multihash { Multihash::wrap((*self).into(), digest).unwrap() } @@ -298,9 +328,27 @@ mod tests { } } - fn wrap(&self, digest: &[u8]) -> Multihash { - Multihash::wrap((*self).into(), digest).unwrap() + fn digest_reader(&self, reader: &mut R) -> ::std::io::Result { + use ::std::io; + use multihash::Hasher; + match self { + Self::Identity256 => { + let mut hasher = multihash::Identity256::default(); + io::copy(reader, &mut hasher)?; + Ok(Multihash::wrap(multihash::IDENTITY, hasher.finalize()).unwrap()) + }, + Self::Strobe256 => { + let mut hasher = multihash::Strobe256::default(); + io::copy(reader, &mut hasher)?; + Ok(Multihash::wrap(0x38b64f, hasher.finalize()).unwrap()) + }, + _ => unreachable!(), + } } + + fn wrap(&self, digest: &[u8]) -> Multihash { + Multihash::wrap((*self).into(), digest).unwrap() + } } impl From for u64 { diff --git a/derive/src/utils.rs b/derive/src/utils.rs index b21edb8a..e1642f5e 100644 --- a/derive/src/utils.rs +++ b/derive/src/utils.rs @@ -4,7 +4,7 @@ use syn::parse::{Parse, ParseStream}; use syn::punctuated::Punctuated; use syn::Error; -pub fn use_crate(name: &str) -> Result { +pub(crate) fn use_crate(name: &str) -> Result { match crate_name(name) { Ok(FoundCrate::Name(krate)) => Ok(syn::Ident::new(&krate, Span::call_site())), Ok(FoundCrate::Itself) => Ok(syn::Ident::new("crate", Span::call_site())), diff --git a/examples/custom_table.rs b/examples/custom_table.rs index f96c22f5..f9cdbb76 100644 --- a/examples/custom_table.rs +++ b/examples/custom_table.rs @@ -3,6 +3,12 @@ use std::convert::TryFrom; use multihash::derive::Multihash; use multihash::{Error, Hasher, MultihashDigest, MultihashGeneric, Sha2_256}; +#[cfg(feature = "std")] +use std::io; + +#[cfg(not(feature = "std"))] +use core2::io; + // You can implement a custom hasher. This is a SHA2 256-bit hasher that returns a hash that is // truncated to 160 bits. #[derive(Default, Debug)] @@ -19,6 +25,15 @@ impl Hasher for Sha2_256Truncated20 { } } +impl io::Write for Sha2_256Truncated20 { + fn write(&mut self, input: &[u8]) -> io::Result { + self.0.write(input) + } + fn flush(&mut self) -> io::Result<()> { + Ok(()) + } +} + #[derive(Clone, Copy, Debug, Eq, Multihash, PartialEq)] #[mh(alloc_size = 64)] pub enum Code { diff --git a/src/hasher.rs b/src/hasher.rs index 894cab30..eb8446a9 100644 --- a/src/hasher.rs +++ b/src/hasher.rs @@ -1,5 +1,11 @@ +#[cfg(feature = "std")] +use std::io; + +#[cfg(not(feature = "std"))] +use core2::io; + /// Trait implemented by a hash function implementation. -pub trait Hasher { +pub trait Hasher: io::Write { /// Consume input and update internal state. fn update(&mut self, input: &[u8]); diff --git a/src/multihash.rs b/src/multihash.rs index c679c4a5..d09fc455 100644 --- a/src/multihash.rs +++ b/src/multihash.rs @@ -35,7 +35,27 @@ pub trait MultihashDigest: /// let hash = Code::Sha3_256.digest(b"Hello world!"); /// println!("{:02x?}", hash); /// ``` - fn digest(&self, input: &[u8]) -> Multihash; + fn digest(&self, input: &[u8]) -> Multihash { + let mut input = input; + self.digest_reader(&mut input).unwrap() + } + + /// Calculate the hash of some input stream. + /// + /// # Example + /// + /// ``` + /// // `Code` implements `MultihashDigest` + /// use multihash::{Code, MultihashDigest}; + /// + /// let mut data = std::io::Cursor::new(b"Hello world!"); + /// + /// let hash = Code::Sha3_256.digest_reader(&mut data).unwrap(); + /// println!("{:02x?}", hash); + /// ``` + fn digest_reader(&self, input: &mut R) -> io::Result> + where + Self: Sized; /// Create a multihash from an existing multihash digest. /// @@ -49,7 +69,9 @@ pub trait MultihashDigest: /// let hash = Code::Sha3_256.wrap(&hasher.finalize()); /// println!("{:02x?}", hash); /// ``` - fn wrap(&self, digest: &[u8]) -> Multihash; + fn wrap(&self, digest: &[u8]) -> Multihash { + Multihash::wrap((*self).into(), digest).unwrap() + } } /// A Multihash instance that only supports the basic functionality and no hashing. diff --git a/src/multihash_impl.rs b/src/multihash_impl.rs index 61f8af91..d5325753 100644 --- a/src/multihash_impl.rs +++ b/src/multihash_impl.rs @@ -7,7 +7,8 @@ pub use multihash_derive::Multihash; /// /// [`Multihash` derive]: crate::derive #[derive(Copy, Clone, Debug, Eq, Multihash, PartialEq)] -#[mh(alloc_size = 64)] +#[cfg_attr(feature = "std", mh(alloc_size = 64, io_path = ::std::io))] +#[cfg_attr(not(feature = "std"), mh(alloc_size = 64, io_path = ::core2::io))] pub enum Code { /// SHA-256 (32-byte hash size) #[cfg(feature = "sha2")] diff --git a/tests/lib.rs b/tests/lib.rs index e36ea346..aea44989 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -70,6 +70,13 @@ macro_rules! assert_encode { expected, "{:?} encodes correctly (from hasher)", stringify!($alg) ); + + // From a reader. + assert_eq!( + $code.digest_reader(&mut Cursor::new($data)).unwrap().to_bytes(), + expected, + "{:?} encodes correctly (from hasher)", stringify!($alg) + ); )* } }