|
1 | 1 | // SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. |
2 | 2 | // SPDX-License-Identifier: Apache-2.0 |
3 | 3 |
|
4 | | -use crate::block_manager::layout::{BlockLayoutConfig, LayoutError}; |
| 4 | +use crate::block_manager::layout::{BlockLayoutConfig, GenericBlockLayout, LayoutError}; |
5 | 5 | use crate::block_manager::storage::Storage; |
6 | 6 |
|
7 | 7 | use validator::ValidationError; |
8 | 8 |
|
| 9 | +/// Verification result for a memory region |
| 10 | +#[derive(Debug, Clone)] |
| 11 | +#[allow(dead_code)] |
| 12 | +pub struct RegionVerificationResult { |
| 13 | + /// Block index that was verified |
| 14 | + pub block_idx: usize, |
| 15 | + /// Layer index that was verified |
| 16 | + pub layer_idx: usize, |
| 17 | + /// Outer dimension index that was verified |
| 18 | + pub outer_idx: usize, |
| 19 | + /// Expected memory address for this region |
| 20 | + pub expected_addr: usize, |
| 21 | + /// Actual memory address for this region |
| 22 | + pub actual_addr: usize, |
| 23 | + /// Expected size in bytes for this region |
| 24 | + pub expected_size: usize, |
| 25 | + /// Actual size in bytes for this region |
| 26 | + pub actual_size: usize, |
| 27 | + /// Whether the addresses match |
| 28 | + pub addr_matches: bool, |
| 29 | + /// Whether the sizes match |
| 30 | + pub size_matches: bool, |
| 31 | +} |
| 32 | + |
| 33 | +/// Layout verification statistics |
| 34 | +#[derive(Debug, Clone, Default)] |
| 35 | +#[allow(dead_code)] |
| 36 | +pub struct LayoutVerificationStats { |
| 37 | + /// Total number of memory regions verified |
| 38 | + pub total_regions: usize, |
| 39 | + /// Number of regions with address mismatches |
| 40 | + pub addr_mismatches: usize, |
| 41 | + /// Number of regions with size mismatches |
| 42 | + pub size_mismatches: usize, |
| 43 | + /// Number of regions that passed all verifications |
| 44 | + pub successful_verifications: usize, |
| 45 | +} |
| 46 | + |
| 47 | +#[derive(Debug)] |
| 48 | +#[allow(dead_code)] |
| 49 | +pub struct WorkerLayoutVerifier { |
| 50 | + stats: LayoutVerificationStats, |
| 51 | +} |
| 52 | + |
| 53 | +#[allow(dead_code)] |
| 54 | +impl WorkerLayoutVerifier { |
| 55 | + // Constructor: Start with clean slate |
| 56 | + pub fn new() -> Self { |
| 57 | + Self { |
| 58 | + stats: LayoutVerificationStats::default(), |
| 59 | + } |
| 60 | + } |
| 61 | + |
| 62 | + pub fn verify_layout_consistency<L: GenericBlockLayout>( |
| 63 | + &mut self, |
| 64 | + layout: &L, |
| 65 | + ) -> Result<Vec<RegionVerificationResult>, LayoutError> { |
| 66 | + // This is the main orchestrator method. |
| 67 | + // It systematically checks every memory region in |
| 68 | + // the layout to ensure consistency. |
| 69 | + |
| 70 | + self.stats = LayoutVerificationStats::default(); |
| 71 | + let mut results = Vec::new(); |
| 72 | + |
| 73 | + // Iterate over all blocks, layers, and outer dimensions |
| 74 | + for block_idx in 0..layout.num_blocks() { |
| 75 | + for layer_idx in 0..layout.num_layers() { |
| 76 | + for outer_idx in 0..layout.outer_dim() { |
| 77 | + let result = |
| 78 | + self.verify_memory_region(layout, block_idx, layer_idx, outer_idx)?; |
| 79 | + self.update_stats(&result); |
| 80 | + results.push(result); |
| 81 | + } |
| 82 | + } |
| 83 | + } |
| 84 | + |
| 85 | + Ok(results) |
| 86 | + } |
| 87 | + |
| 88 | + pub fn verify_memory_region<L: GenericBlockLayout>( |
| 89 | + &mut self, |
| 90 | + layout: &L, |
| 91 | + block_idx: usize, |
| 92 | + layer_idx: usize, |
| 93 | + outer_idx: usize, |
| 94 | + ) -> Result<RegionVerificationResult, LayoutError> { |
| 95 | + let memory_region = layout.memory_region(block_idx, layer_idx, outer_idx)?; |
| 96 | + |
| 97 | + let config = layout.config(); |
| 98 | + let expected_size = config.page_size * config.inner_dim * config.dtype_width_bytes; |
| 99 | + |
| 100 | + let expected_addr = memory_region.addr(); |
| 101 | + |
| 102 | + Ok(RegionVerificationResult { |
| 103 | + block_idx, |
| 104 | + layer_idx, |
| 105 | + outer_idx, |
| 106 | + expected_addr, |
| 107 | + actual_addr: memory_region.addr(), |
| 108 | + expected_size, |
| 109 | + actual_size: memory_region.size(), |
| 110 | + addr_matches: expected_addr == memory_region.addr(), |
| 111 | + size_matches: expected_size == memory_region.size(), |
| 112 | + }) |
| 113 | + } |
| 114 | + |
| 115 | + fn update_stats(&mut self, result: &RegionVerificationResult) { |
| 116 | + self.stats.total_regions += 1; |
| 117 | + if !result.addr_matches { |
| 118 | + self.stats.addr_mismatches += 1; |
| 119 | + } |
| 120 | + if !result.size_matches { |
| 121 | + self.stats.size_mismatches += 1; |
| 122 | + } |
| 123 | + if result.addr_matches && result.size_matches { |
| 124 | + self.stats.successful_verifications += 1; |
| 125 | + } |
| 126 | + } |
| 127 | + |
| 128 | + pub fn has_critical_mismatches(&self) -> bool { |
| 129 | + self.stats.size_mismatches > 0 |
| 130 | + } |
| 131 | +} |
| 132 | + |
9 | 133 | /// Validation function for Option<usize> to check if it's Some(power_of_2). |
10 | 134 | pub fn validate_power_of_2(alignment: usize) -> Result<(), ValidationError> { |
11 | 135 | if !alignment.is_power_of_two() { |
@@ -87,3 +211,90 @@ pub fn validate_indices<C: BlockLayoutConfig>( |
87 | 211 |
|
88 | 212 | Ok(()) |
89 | 213 | } |
| 214 | + |
| 215 | +#[cfg(test)] |
| 216 | +mod worker_verification_tests { |
| 217 | + use super::*; |
| 218 | + use crate::block_manager::LayoutConfig; |
| 219 | + use crate::block_manager::layout::{FullyContiguous, LayerSeparate}; |
| 220 | + use crate::block_manager::storage::tests::{NullDeviceAllocator, NullDeviceStorage}; |
| 221 | + |
| 222 | + // Test constants (same as layout.rs tests) |
| 223 | + const NUM_BLOCKS: usize = 7; |
| 224 | + const NUM_LAYERS: usize = 5; |
| 225 | + const OUTER_DIM: usize = 2; |
| 226 | + const PAGE_SIZE: usize = 4; |
| 227 | + const INNER_DIM: usize = 13; |
| 228 | + const DTYPE_WIDTH_BYTES: usize = 4; |
| 229 | + |
| 230 | + fn create_test_config() -> LayoutConfig { |
| 231 | + LayoutConfig { |
| 232 | + num_blocks: NUM_BLOCKS, |
| 233 | + num_layers: NUM_LAYERS, |
| 234 | + outer_dim: OUTER_DIM, |
| 235 | + page_size: PAGE_SIZE, |
| 236 | + inner_dim: INNER_DIM, |
| 237 | + alignment: 1, |
| 238 | + dtype_width_bytes: DTYPE_WIDTH_BYTES, |
| 239 | + } |
| 240 | + } |
| 241 | + |
| 242 | + fn create_fully_contiguous_layout() -> Result<FullyContiguous<NullDeviceStorage>, LayoutError> { |
| 243 | + let config = create_test_config(); |
| 244 | + FullyContiguous::allocate(config, &NullDeviceAllocator) |
| 245 | + } |
| 246 | + |
| 247 | + fn create_layer_separate_layout() -> Result<LayerSeparate<NullDeviceStorage>, LayoutError> { |
| 248 | + let config = create_test_config(); |
| 249 | + LayerSeparate::allocate(config, &NullDeviceAllocator, true) // outer_contiguous = true |
| 250 | + } |
| 251 | + |
| 252 | + #[test] |
| 253 | + fn test_verify_initialisation() { |
| 254 | + let verifier = WorkerLayoutVerifier::new(); |
| 255 | + |
| 256 | + assert_eq!(verifier.stats.total_regions, 0); |
| 257 | + assert_eq!(verifier.stats.addr_mismatches, 0); |
| 258 | + assert_eq!(verifier.stats.size_mismatches, 0); |
| 259 | + assert_eq!(verifier.stats.successful_verifications, 0); |
| 260 | + |
| 261 | + assert!(!verifier.has_critical_mismatches()); |
| 262 | + } |
| 263 | + |
| 264 | + #[test] |
| 265 | + fn test_layer_separate_verification() { |
| 266 | + let layout = create_layer_separate_layout().expect("Failed to create LayerSeparate layout"); |
| 267 | + let mut verifier = WorkerLayoutVerifier::new(); |
| 268 | + let results = verifier |
| 269 | + .verify_layout_consistency(&layout) |
| 270 | + .expect("Failed to verify layout"); |
| 271 | + |
| 272 | + assert_eq!(results.len(), NUM_BLOCKS * NUM_LAYERS * OUTER_DIM); |
| 273 | + assert!( |
| 274 | + !verifier.has_critical_mismatches(), |
| 275 | + "Expected no critical mismatches but got: total={}, size_mismatches={}, successful={}", |
| 276 | + verifier.stats.total_regions, |
| 277 | + verifier.stats.size_mismatches, |
| 278 | + verifier.stats.successful_verifications |
| 279 | + ); |
| 280 | + } |
| 281 | + |
| 282 | + #[test] |
| 283 | + fn test_fully_contiguous_verification() { |
| 284 | + let layout = |
| 285 | + create_fully_contiguous_layout().expect("Failed to create FullyContiguous layout"); |
| 286 | + let mut verifier = WorkerLayoutVerifier::new(); |
| 287 | + let results = verifier |
| 288 | + .verify_layout_consistency(&layout) |
| 289 | + .expect("Failed to verify layout"); |
| 290 | + assert_eq!(results.len(), NUM_BLOCKS * NUM_LAYERS * OUTER_DIM); |
| 291 | + |
| 292 | + assert!( |
| 293 | + !verifier.has_critical_mismatches(), |
| 294 | + "Expected no critical mismatches but got: total={}, size_mismatches={}, successful={}", |
| 295 | + verifier.stats.total_regions, |
| 296 | + verifier.stats.size_mismatches, |
| 297 | + verifier.stats.successful_verifications |
| 298 | + ); |
| 299 | + } |
| 300 | +} |
0 commit comments