diff --git a/ledger/src/shred.rs b/ledger/src/shred.rs index 22da633adbc27e..cc36febbd41092 100644 --- a/ledger/src/shred.rs +++ b/ledger/src/shred.rs @@ -49,10 +49,11 @@ //! So, given a) - c), we must restrict data shred's payload length such that the entire coding //! payload can fit into one coding shred / packet. +pub(crate) use self::merkle::{MerkleRoot, SIZE_OF_MERKLE_ROOT}; #[cfg(test)] -pub(crate) use shred_code::MAX_CODE_SHREDS_PER_SLOT; +pub(crate) use self::shred_code::MAX_CODE_SHREDS_PER_SLOT; use { - self::{merkle::MerkleRoot, shred_code::ShredCode, traits::Shred as _}, + self::{shred_code::ShredCode, traits::Shred as _}, crate::blockstore::{self, MAX_DATA_SHREDS_PER_SLOT}, bitflags::bitflags, num_enum::{IntoPrimitive, TryFromPrimitive}, @@ -678,7 +679,6 @@ pub mod layout { Ok(flags & ShredFlags::SHRED_TICK_REFERENCE_MASK.bits()) } - #[cfg(test)] pub(crate) fn get_merkle_root(shred: &[u8]) -> Option { match get_shred_variant(shred).ok()? { ShredVariant::LegacyCode | ShredVariant::LegacyData => None, diff --git a/ledger/src/shred/merkle.rs b/ledger/src/shred/merkle.rs index 0c6746398a6b3e..41657ac5d582af 100644 --- a/ledger/src/shred/merkle.rs +++ b/ledger/src/shred/merkle.rs @@ -36,7 +36,7 @@ use { }; const_assert_eq!(SIZE_OF_MERKLE_ROOT, 20); -const SIZE_OF_MERKLE_ROOT: usize = std::mem::size_of::(); +pub(crate) const SIZE_OF_MERKLE_ROOT: usize = std::mem::size_of::(); const_assert_eq!(SIZE_OF_MERKLE_PROOF_ENTRY, 20); const SIZE_OF_MERKLE_PROOF_ENTRY: usize = std::mem::size_of::(); const_assert_eq!(ShredData::SIZE_OF_PAYLOAD, 1203); diff --git a/ledger/src/sigverify_shreds.rs b/ledger/src/sigverify_shreds.rs index ae58a35334368b..449b456e1e74a6 100644 --- a/ledger/src/sigverify_shreds.rs +++ b/ledger/src/sigverify_shreds.rs @@ -1,7 +1,7 @@ #![allow(clippy::implicit_hasher)] use { - crate::shred, - itertools::Itertools, + crate::shred::{self, MerkleRoot, SIZE_OF_MERKLE_ROOT}, + itertools::{izip, Itertools}, rayon::{prelude::*, ThreadPool}, sha2::{Digest, Sha512}, solana_metrics::inc_new_counter_debug, @@ -145,6 +145,48 @@ where (keyvec, offsets) } +// Recovers merkle roots from shreds binary. +fn get_merkle_roots( + packets: &[PacketBatch], + recycler_cache: &RecyclerCache, +) -> ( + PinnedVec, // Merkle roots + Vec>, // Offsets +) { + let merkle_roots: Vec> = SIGVERIFY_THREAD_POOL.install(|| { + packets + .par_iter() + .flat_map(|packets| { + packets.par_iter().map(|packet| { + if packet.meta().discard() { + return None; + } + let shred = shred::layout::get_shred(packet)?; + shred::layout::get_merkle_root(shred) + }) + }) + .collect() + }); + let num_merkle_roots = merkle_roots.iter().filter(|root| root.is_some()).count(); + let mut buffer = recycler_cache.buffer().allocate("shred_gpu_merkle_roots"); + buffer.set_pinnable(); + resize_buffer(&mut buffer, num_merkle_roots * SIZE_OF_MERKLE_ROOT); + let offsets = { + let mut size = 0; + merkle_roots + .into_iter() + .map(|root| { + let root = root?; + let offset = size; + size += SIZE_OF_MERKLE_ROOT; + buffer[offset..size].copy_from_slice(&root); + Some(offset) + }) + .collect() + }; + (buffer, offsets) +} + // Resizes the buffer to >= size and a multiple of // std::mem::size_of::(). fn resize_buffer(buffer: &mut PinnedVec, size: usize) { @@ -156,9 +198,20 @@ fn resize_buffer(buffer: &mut PinnedVec, size: usize) { buffer.resize(size, 0u8); } +fn elems_from_buffer(buffer: &PinnedVec) -> perf_libs::Elems { + // resize_buffer ensures that buffer size is a multiple of Packet size. + debug_assert_eq!(buffer.len() % std::mem::size_of::(), 0); + let num_packets = buffer.len() / std::mem::size_of::(); + perf_libs::Elems { + elems: buffer.as_ptr().cast::(), + num: num_packets as u32, + } +} + fn shred_gpu_offsets( offset: usize, batches: &[PacketBatch], + merkle_roots_offsets: impl IntoIterator>, recycler_cache: &RecyclerCache, ) -> (TxOffset, TxOffset, TxOffset) { fn add_offset(range: Range, offset: usize) -> Range { @@ -174,15 +227,22 @@ fn shred_gpu_offsets( offset.checked_add(std::mem::size_of::()) }); let packets = batches.iter().flatten(); - for (offset, packet) in offsets.zip(packets) { + for (offset, packet, merkle_root_offset) in izip!(offsets, packets, merkle_roots_offsets) { let sig = shred::layout::get_signature_range(); let sig = add_offset(sig, offset); debug_assert_eq!(sig.end - sig.start, std::mem::size_of::()); - let shred = shred::layout::get_shred(packet); // Signature may verify for an empty message but the packet will be // discarded during deserialization. - let msg = shred.and_then(shred::layout::get_signed_data_offsets); - let msg = add_offset(msg.unwrap_or_default(), offset); + let msg: Range = match merkle_root_offset { + None => { + let shred = shred::layout::get_shred(packet); + let msg = shred.and_then(shred::layout::get_signed_data_offsets); + add_offset(msg.unwrap_or_default(), offset) + } + Some(merkle_root_offset) => { + merkle_root_offset..merkle_root_offset + SIZE_OF_MERKLE_ROOT + } + }; signature_offsets.push(sig.start as u32); msg_start_offsets.push(msg.start as u32); let msg_size = msg.end.saturating_sub(msg.start); @@ -203,18 +263,24 @@ pub fn verify_shreds_gpu( let (pubkeys, pubkey_offsets) = slot_key_data_for_gpu(batches, slot_leaders, recycler_cache); //HACK: Pubkeys vector is passed along as a `PacketBatch` buffer to the GPU //TODO: GPU needs a more opaque interface, which can handle variable sized structures for data - let offset = pubkeys.len(); + let (merkle_roots, merkle_roots_offsets) = get_merkle_roots(batches, recycler_cache); + // Merkle roots are placed after pubkeys; adjust offsets accordingly. + let merkle_roots_offsets = { + let shift = pubkeys.len(); + merkle_roots_offsets + .into_iter() + .map(move |offset| Some(offset? + shift)) + }; + let offset = pubkeys.len() + merkle_roots.len(); let (signature_offsets, msg_start_offsets, msg_sizes) = - shred_gpu_offsets(offset, batches, recycler_cache); + shred_gpu_offsets(offset, batches, merkle_roots_offsets, recycler_cache); let mut out = recycler_cache.buffer().allocate("out_buffer"); out.set_pinnable(); out.resize(signature_offsets.len(), 0u8); - debug_assert_eq!(pubkeys.len() % std::mem::size_of::(), 0); - let num_pubkey_packets = pubkeys.len() / std::mem::size_of::(); - let mut elems = vec![perf_libs::Elems { - elems: pubkeys.as_ptr().cast::(), - num: num_pubkey_packets as u32, - }]; + let mut elems = vec![ + elems_from_buffer(&pubkeys), + elems_from_buffer(&merkle_roots), + ]; elems.extend(batches.iter().map(|batch| perf_libs::Elems { elems: batch.as_ptr().cast::(), num: batch.len() as u32, @@ -326,21 +392,27 @@ pub fn sign_shreds_gpu( let mut secret_offsets = recycler_cache.offsets().allocate("secret_offsets"); secret_offsets.resize(packet_count, pubkey_size as u32); - let offset: usize = pinned_keypair.len(); + let (merkle_roots, merkle_roots_offsets) = get_merkle_roots(batches, recycler_cache); + // Merkle roots are placed after the keypair; adjust offsets accordingly. + let merkle_roots_offsets = { + let shift = pinned_keypair.len(); + merkle_roots_offsets + .into_iter() + .map(move |offset| Some(offset? + shift)) + }; + let offset = pinned_keypair.len() + merkle_roots.len(); trace!("offset: {}", offset); let (signature_offsets, msg_start_offsets, msg_sizes) = - shred_gpu_offsets(offset, batches, recycler_cache); + shred_gpu_offsets(offset, batches, merkle_roots_offsets, recycler_cache); let total_sigs = signature_offsets.len(); let mut signatures_out = recycler_cache.buffer().allocate("ed25519 signatures"); signatures_out.set_pinnable(); signatures_out.resize(total_sigs * sig_size, 0); - debug_assert_eq!(pinned_keypair.len() % std::mem::size_of::(), 0); - let num_keypair_packets = pinned_keypair.len() / std::mem::size_of::(); - let mut elems = vec![perf_libs::Elems { - elems: pinned_keypair.as_ptr().cast::(), - num: num_keypair_packets as u32, - }]; + let mut elems = vec![ + elems_from_buffer(pinned_keypair), + elems_from_buffer(&merkle_roots), + ]; elems.extend(batches.iter().map(|batch| perf_libs::Elems { elems: batch.as_ptr().cast::(), num: batch.len() as u32,