From b09de259c2a589bf1152e9b71081bdb002600996 Mon Sep 17 00:00:00 2001 From: Tiago Castro Date: Wed, 21 Feb 2024 18:14:29 +0000 Subject: [PATCH] feat(rebuild): add rebuild rangers Adds a set of rebuild rangers which can be used by different rebuilds: 1. full rebuild - walk the entire device range and copy every segment (current nexus full rebuild behaviour). 2. partial rebuild - walk the allocated segments only and copy them. 3. partial eq rebuild - walk the entire device range and copy only allocated segments (current nexus partial rebuild behaviour). Signed-off-by: Tiago Castro --- io-engine/src/core/segment_map.rs | 5 + io-engine/src/rebuild/mod.rs | 1 + io-engine/src/rebuild/nexus_rebuild.rs | 4 + io-engine/src/rebuild/rebuild_map.rs | 6 + io-engine/src/rebuild/rebuild_task.rs | 20 +- io-engine/src/rebuild/rebuilders.rs | 249 +++++++++++++++++++++++++ 6 files changed, 271 insertions(+), 14 deletions(-) create mode 100644 io-engine/src/rebuild/rebuilders.rs diff --git a/io-engine/src/core/segment_map.rs b/io-engine/src/core/segment_map.rs index 7d51409208..d5677cab81 100644 --- a/io-engine/src/core/segment_map.rs +++ b/io-engine/src/core/segment_map.rs @@ -95,4 +95,9 @@ impl SegmentMap { pub(crate) fn count_dirty_blks(&self) -> u64 { self.count_ones() * self.segment_size / self.block_len } + + /// Convert the segments into a bit map. + pub(crate) fn into_bit_vec(self) -> BitVec { + self.segments + } } diff --git a/io-engine/src/rebuild/mod.rs b/io-engine/src/rebuild/mod.rs index 5aff584c49..df8b6ea426 100644 --- a/io-engine/src/rebuild/mod.rs +++ b/io-engine/src/rebuild/mod.rs @@ -9,6 +9,7 @@ mod rebuild_map; mod rebuild_state; mod rebuild_stats; mod rebuild_task; +mod rebuilders; pub use bdev_rebuild::BdevRebuildJob; pub use nexus_rebuild::NexusRebuildJob; diff --git a/io-engine/src/rebuild/nexus_rebuild.rs b/io-engine/src/rebuild/nexus_rebuild.rs index 8bb1f33f2a..75e8d48d89 100644 --- a/io-engine/src/rebuild/nexus_rebuild.rs +++ b/io-engine/src/rebuild/nexus_rebuild.rs @@ -215,6 +215,10 @@ impl NexusRebuildJobBackend { #[async_trait::async_trait(?Send)] impl RebuildTaskCopier for NexusRebuildDescriptor { + fn descriptor(&self) -> &RebuildDescriptor { + &self.common + } + /// Copies one segment worth of data from source into destination. During /// this time the LBA range being copied is locked so that there cannot be /// front end I/O to the same LBA range. diff --git a/io-engine/src/rebuild/rebuild_map.rs b/io-engine/src/rebuild/rebuild_map.rs index 8f04453926..631720efd2 100644 --- a/io-engine/src/rebuild/rebuild_map.rs +++ b/io-engine/src/rebuild/rebuild_map.rs @@ -1,3 +1,4 @@ +use bit_vec::BitVec; use std::fmt::{Debug, Formatter}; use crate::core::SegmentMap; @@ -61,4 +62,9 @@ impl RebuildMap { pub(crate) fn count_dirty_blks(&self) -> u64 { self.segments.count_dirty_blks() } + + /// Convert the segments into a bit map. + pub(crate) fn into_bit_vec(self) -> BitVec { + self.segments.into_bit_vec() + } } diff --git a/io-engine/src/rebuild/rebuild_task.rs b/io-engine/src/rebuild/rebuild_task.rs index 0dd3e492aa..14c3cdb00d 100644 --- a/io-engine/src/rebuild/rebuild_task.rs +++ b/io-engine/src/rebuild/rebuild_task.rs @@ -195,6 +195,7 @@ impl RebuildTasks { /// can be expanded for sub-segment copies. #[async_trait::async_trait(?Send)] pub(super) trait RebuildTaskCopier { + fn descriptor(&self) -> &RebuildDescriptor; /// Copies an entire segment at the given block address, from source to /// target using a `DmaBuf`. async fn copy_segment( @@ -206,25 +207,16 @@ pub(super) trait RebuildTaskCopier { #[async_trait::async_trait(?Send)] impl RebuildTaskCopier for RebuildDescriptor { + fn descriptor(&self) -> &RebuildDescriptor { + self + } + /// Copies one segment worth of data from source into destination. async fn copy_segment( &self, blk: u64, task: &mut RebuildTask, ) -> Result { - // todo: move the map out of the descriptor, into the specific backends. - if self.is_blk_sync(blk) { - return Ok(false); - } - - // Perform the copy. - let result = task.copy_one(blk, self).await; - - // In the case of success, mark the segment as already transferred. - if result.is_ok() { - self.blk_synced(blk); - } - - result + task.copy_one(blk, self).await } } diff --git a/io-engine/src/rebuild/rebuilders.rs b/io-engine/src/rebuild/rebuilders.rs new file mode 100644 index 0000000000..8b45a655a2 --- /dev/null +++ b/io-engine/src/rebuild/rebuilders.rs @@ -0,0 +1,249 @@ +use crate::rebuild::{ + rebuild_descriptor::RebuildDescriptor, + rebuild_task::{RebuildTask, RebuildTaskCopier}, + RebuildError, + RebuildMap, +}; +use std::{ops::Range, rc::Rc}; + +/// A rebuild may rebuild a device by walking it differently, for example: +/// 1. full rebuild - walk the entire device range and copy every segment +/// (current nexus full rebuild behaviour). +/// 2. partial rebuild - walk the allocated segments only and copy them. +/// 3. partial eq rebuild - walk the entire device range and copy only allocated +/// segments (current nexus partial rebuild behaviour). +pub(super) trait RangeRebuilder { + /// Get the next block to rebuild. + fn next(&mut self) -> Option; + /// Get the next block to rebuild. + fn peek_next(&self) -> Option; + /// Get the remaining blocks we have yet to be rebuilt. + fn blocks_remaining(&self) -> u64; + /// Check if this is a partial rebuild. + fn is_partial(&self) -> bool; + /// Get the rebuild descriptor reference. + fn desc(&self) -> &RebuildDescriptor; + /// Get the copier which can copy a segment. + fn copier(&self) -> Rc; +} + +/// The range is the full range of the request, in steps of segment size. +pub(super) struct FullRebuild { + range: PeekableIterator>>, + copier: Rc, +} +impl FullRebuild { + /// Create a full rebuild with the given copier. + #[allow(dead_code)] + pub(super) fn new(copier: T) -> Self { + let desc = copier.descriptor(); + let range = desc.range.clone(); + Self { + range: PeekableIterator::new( + range.step_by(desc.segment_size_blks as usize), + ), + copier: Rc::new(copier), + } + } +} +impl RangeRebuilder for FullRebuild { + fn next(&mut self) -> Option { + self.range.next() + } + fn peek_next(&self) -> Option { + self.range.peek().cloned() + } + + fn blocks_remaining(&self) -> u64 { + self.peek_next() + .map(|r| self.desc().range.end.max(r) - r) + .unwrap_or_default() + } + fn is_partial(&self) -> bool { + false + } + + fn desc(&self) -> &RebuildDescriptor { + self.copier.descriptor() + } + fn copier(&self) -> Rc { + self.copier.clone() + } +} + +/// A partial rebuild range which steps through each segment but triggers +/// the copy only if the segment dirty bit is set. +pub(super) struct PartialRebuild { + range: PeekableIterator>, + copier: Rc, +} +impl PartialRebuild { + /// Create a partial sequential rebuild with the given copier and segment + /// map. + #[allow(dead_code)] + pub(super) fn new(map: RebuildMap, copier: T) -> Self { + let bit_vec = map.into_bit_vec(); + Self { + range: PeekableIterator::new(bit_vec.into_iter().enumerate()), + copier: Rc::new(copier), + } + } +} +impl RangeRebuilder for PartialRebuild { + fn next(&mut self) -> Option { + for (blk, is_set) in self.range.by_ref() { + if is_set { + return Some(blk as u64); + } + } + None + } + fn peek_next(&self) -> Option { + // todo: should we add a wrapper to ensure we peek only set bits? + self.range.peek().map(|(blk, _)| *blk as u64) + } + + fn blocks_remaining(&self) -> u64 { + self.peek_next() + .map(|r| self.desc().range.end.max(r) - r) + .unwrap_or_default() + } + fn is_partial(&self) -> bool { + false + } + + fn desc(&self) -> &RebuildDescriptor { + self.copier.descriptor() + } + fn copier(&self) -> Rc { + self.copier.clone() + } +} + +/// The range is the full range of the request, in steps of segment size +/// and a copy is triggered for each segment. +/// However, during the copy itself, clean segments are skipped. +pub(super) struct PartialSeqRebuild { + range: PeekableIterator>>, + copier: Rc>, +} +impl PartialSeqRebuild { + /// Create a partial sequential rebuild with the given copier and segment + /// map. + #[allow(dead_code)] + pub(super) fn new(map: RebuildMap, copier: T) -> Self { + let desc = copier.descriptor(); + let range = desc.range.clone(); + Self { + range: PeekableIterator::new( + range.step_by(desc.segment_size_blks as usize), + ), + copier: Rc::new(PartialSeqCopier::new(map, copier)), + } + } +} +impl RangeRebuilder> + for PartialSeqRebuild +{ + fn next(&mut self) -> Option { + self.range.next() + } + fn peek_next(&self) -> Option { + self.range.peek().cloned() + } + + fn blocks_remaining(&self) -> u64 { + self.copier.map.lock().count_dirty_blks() + } + fn is_partial(&self) -> bool { + true + } + + fn desc(&self) -> &RebuildDescriptor { + self.copier.descriptor() + } + fn copier(&self) -> Rc> { + self.copier.clone() + } +} +/// The partial sequential rebuild copier, which uses a bitmap to determine if a +/// particular block range must be copied. +pub(super) struct PartialSeqCopier { + map: parking_lot::Mutex, + copier: T, +} +impl PartialSeqCopier { + fn new(map: RebuildMap, copier: T) -> Self { + Self { + map: parking_lot::Mutex::new(map), + copier, + } + } + /// Checks if the block has to be transferred. + /// If no rebuild map is present, all blocks are considered unsynced. + #[inline(always)] + fn is_blk_sync(&self, blk: u64) -> bool { + self.map.lock().is_blk_clean(blk) + } + + /// Marks the rebuild segment starting from the given logical block as + /// already transferred. + #[inline(always)] + fn blk_synced(&self, blk: u64) { + self.map.lock().blk_clean(blk); + } +} +#[async_trait::async_trait(?Send)] +impl RebuildTaskCopier for PartialSeqCopier { + fn descriptor(&self) -> &RebuildDescriptor { + self.copier.descriptor() + } + + /// Copies one segment worth of data from source into destination. + async fn copy_segment( + &self, + blk: u64, + task: &mut RebuildTask, + ) -> Result { + if self.is_blk_sync(blk) { + return Ok(false); + } + + let result = self.copier.copy_segment(blk, task).await; + + // In the case of success, mark the segment as already transferred. + if result.is_ok() { + self.blk_synced(blk); + } + + result + } +} + +/// Adds peekable functionality to a generic iterator. +/// > Note: the peekable from the std library is not sufficient here because it +/// > requires a mutable reference to peek. We get around this limitation by +/// > always setting the peek at a small performance cost. +struct PeekableIterator { + iter: I, + peek: Option, +} +impl PeekableIterator { + fn new(mut iter: I) -> Self { + Self { + peek: iter.next(), + iter, + } + } + /// Peek into the future for the next value which next would yield. + fn peek(&self) -> Option<&I::Item> { + self.peek.as_ref() + } +} +impl Iterator for PeekableIterator { + type Item = I::Item; + + fn next(&mut self) -> Option { + std::mem::replace(&mut self.peek, self.iter.next()) + } +}