From d8588774faae0566fb8bbaffd0f61cb235016f4b Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Wed, 9 Oct 2024 10:12:49 +0200 Subject: [PATCH 01/49] Basic Logging in binary format --- betree/src/allocator.rs | 9 +++- betree/src/data_management/dmu.rs | 69 +++++++++++++++++++++++++++---- betree/src/database/mod.rs | 1 + 3 files changed, 70 insertions(+), 9 deletions(-) diff --git a/betree/src/allocator.rs b/betree/src/allocator.rs index 7210602d..1b6159aa 100644 --- a/betree/src/allocator.rs +++ b/betree/src/allocator.rs @@ -1,9 +1,10 @@ //! This module provides `SegmentAllocator` and `SegmentId` for bitmap //! allocation of 1GiB segments. -use crate::{cow_bytes::CowBytes, storage_pool::DiskOffset, vdev::Block}; +use crate::{cow_bytes::CowBytes, storage_pool::DiskOffset, vdev::Block, Error}; use bitvec::prelude::*; use byteorder::{BigEndian, ByteOrder}; +use std::io::Write; /// 256KiB, so that `vdev::BLOCK_SIZE * SEGMENT_SIZE == 1GiB` pub const SEGMENT_SIZE: usize = 1 << SEGMENT_SIZE_LOG_2; @@ -101,6 +102,12 @@ impl SegmentAllocator { range.fill(action.as_bool()); } + + /// Writes the bitmap to a writer. + pub fn write_bitmap(&self, writer: &mut W) -> Result<(), Error> { + writer.write_all(self.data.as_raw_slice())?; + Ok(()) + } } // TODO better wording diff --git a/betree/src/data_management/dmu.rs b/betree/src/data_management/dmu.rs index 6cb7bb07..b3c377fb 100644 --- a/betree/src/data_management/dmu.rs +++ b/betree/src/data_management/dmu.rs @@ -6,7 +6,7 @@ use super::{ CopyOnWriteEvent, Dml, HasStoragePreference, Object, ObjectReference, }; use crate::{ - allocator::{Action, SegmentAllocator, SegmentId}, + allocator::{Action, SegmentAllocator, SegmentId, SEGMENT_SIZE_BYTES}, buffer::Buf, cache::{Cache, ChangeKeyError, RemoveError}, checksum::{Builder, Checksum, State}, @@ -17,14 +17,17 @@ use crate::{ size::{Size, SizeMut, StaticSize}, storage_pool::{DiskOffset, StoragePoolLayer, NUM_STORAGE_CLASSES}, tree::{Node, PivotKey}, - vdev::{Block, BLOCK_SIZE}, + vdev::{Block, File, BLOCK_SIZE}, StoragePreference, }; +use byteorder::{LittleEndian, WriteBytesExt}; use crossbeam_channel::Sender; use futures::{executor::block_on, future::ok, prelude::*}; use parking_lot::{Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard}; use std::{ collections::HashMap, + fs::OpenOptions, + io::Write, mem::replace, ops::DerefMut, pin::Pin, @@ -35,6 +38,8 @@ use std::{ thread::yield_now, }; +const ALLOCATION_LOG_FILE: &str = "allocation_log.bin"; + /// The Data Management Unit. pub struct Dmu where @@ -60,6 +65,7 @@ where next_modified_node_id: AtomicU64, next_disk_id: AtomicU64, report_tx: Option>, + allocation_log_file: Mutex, } impl Dmu @@ -87,6 +93,15 @@ where .collect::>() .into_boxed_slice(); + let allocation_log_file = Mutex::new( + OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .open(ALLOCATION_LOG_FILE) + .expect("Failed to create allocation log file"), + ); + Dmu { // default_compression_state: default_compression.new_compression().expect("Can't create compression state"), default_compression, @@ -103,6 +118,7 @@ where next_modified_node_id: AtomicU64::new(1), next_disk_id: AtomicU64::new(0), report_tx: None, + allocation_log_file, } } @@ -120,6 +136,33 @@ where pub fn pool(&self) -> &SPL { &self.pool } + + /// Writes the global header for the allocation logging. + pub fn write_global_header(&self) -> Result<(), Error> { + let mut file = self.allocation_log_file.lock(); + + // Number of storage classes + file.write_u8(self.pool.storage_class_count())?; + + // Disks per class + for class in 0..self.pool.storage_class_count() { + let disk_count = self.pool.disk_count(class); + file.write_u16::(disk_count)?; + } + + // Segments per disk + for class in 0..self.pool.storage_class_count() { + for disk in 0..self.pool.disk_count(class) { + let segment_count = self.pool.size_in_blocks(class, disk); + file.write_u64::(segment_count.as_u64())?; + } + } + + // Blocks per segment (constant) + file.write_u64::(SEGMENT_SIZE_BYTES.try_into().unwrap())?; + + Ok(()) + } } impl Dmu @@ -484,6 +527,12 @@ where let strategy = self.alloc_strategy[storage_preference as usize]; + // NOTE: Could we mark classes, disks and/or segments as full to prevent looping over them? + // We would then also need to handle this, when deallocating things. + // Would full mean completely full or just not having enough contiguous memory of some + // size? + // Or save the largest contiguous memory region as a value and compare against that. For + // that the allocator needs to support that and we have to 'bubble' the largest value up. 'class: for &class in strategy.iter().flatten() { let disks_in_class = self.pool.disk_count(class); if disks_in_class == 0 { @@ -536,12 +585,16 @@ where let first_seen_segment_id = *segment_id; loop { - if let Some(segment_offset) = self - .handler - .get_allocation_bitmap(*segment_id, self)? - .access() - .allocate(size.as_u32()) - { + // Has to be split because else the temporary value is dropped while borrowing + let bitmap = self.handler.get_allocation_bitmap(*segment_id, self)?; + let mut allocator = bitmap.access(); + if let Some(segment_offset) = allocator.allocate(size.as_u32()) { + let mut file = self.allocation_log_file.lock(); + // Write local header and bitmap + file.write_u8(class)?; + file.write_u16::(disk_id)?; + file.write_u64::(segment_id.0)?; + allocator.write_bitmap(&mut *file)?; break segment_id.disk_offset(segment_offset); } let next_segment_id = segment_id.next(disk_size); diff --git a/betree/src/database/mod.rs b/betree/src/database/mod.rs index 7b4f1556..c1ef380b 100644 --- a/betree/src/database/mod.rs +++ b/betree/src/database/mod.rs @@ -431,6 +431,7 @@ impl Database { if let Some(tx) = &dml_tx { dmu.set_report(tx.clone()); } + dmu.write_global_header(); let (tree, root_ptr) = builder.select_root_tree(Arc::new(dmu))?; From 612582aff62f6e608fd320b1370786fa4243d5b0 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Wed, 9 Oct 2024 16:27:05 +0200 Subject: [PATCH 02/49] write correct Segment size --- betree/src/data_management/dmu.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/betree/src/data_management/dmu.rs b/betree/src/data_management/dmu.rs index b3c377fb..636ba752 100644 --- a/betree/src/data_management/dmu.rs +++ b/betree/src/data_management/dmu.rs @@ -6,7 +6,7 @@ use super::{ CopyOnWriteEvent, Dml, HasStoragePreference, Object, ObjectReference, }; use crate::{ - allocator::{Action, SegmentAllocator, SegmentId, SEGMENT_SIZE_BYTES}, + allocator::{Action, SegmentAllocator, SegmentId, SEGMENT_SIZE}, buffer::Buf, cache::{Cache, ChangeKeyError, RemoveError}, checksum::{Builder, Checksum, State}, @@ -159,7 +159,7 @@ where } // Blocks per segment (constant) - file.write_u64::(SEGMENT_SIZE_BYTES.try_into().unwrap())?; + file.write_u64::(SEGMENT_SIZE.try_into().unwrap())?; Ok(()) } From cfb14d088f6163b48f4bc406547393c9791aacc7 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Thu, 17 Oct 2024 07:41:48 +0200 Subject: [PATCH 03/49] minor --- betree/src/database/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/betree/src/database/mod.rs b/betree/src/database/mod.rs index c1ef380b..90cc3029 100644 --- a/betree/src/database/mod.rs +++ b/betree/src/database/mod.rs @@ -431,7 +431,7 @@ impl Database { if let Some(tx) = &dml_tx { dmu.set_report(tx.clone()); } - dmu.write_global_header(); + dmu.write_global_header()?; let (tree, root_ptr) = builder.select_root_tree(Arc::new(dmu))?; From 7c78834a65dd96726869c77b6874af8fc0b8ad20 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Thu, 17 Oct 2024 07:47:19 +0200 Subject: [PATCH 04/49] Fix None unwrap --- betree/src/tree/imp/mod.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/betree/src/tree/imp/mod.rs b/betree/src/tree/imp/mod.rs index 63262538..31c98d54 100644 --- a/betree/src/tree/imp/mod.rs +++ b/betree/src/tree/imp/mod.rs @@ -393,14 +393,11 @@ where self.msg_action().apply(key, &msg, &mut tmp); } - // This may never be false. - let data = tmp.unwrap(); - drop(node); if self.evict { self.dml.evict()?; } - Ok(Some((info, data))) + Ok(tmp.map(|data| (info, data))) } } } From 514c9cfa5267d7447fb2eae39f9d028dc8481abd Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Thu, 17 Oct 2024 07:48:06 +0200 Subject: [PATCH 05/49] First visualization of allocations --- betree/scripts/visualize_allocation_log | 166 ++++++++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100755 betree/scripts/visualize_allocation_log diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log new file mode 100755 index 00000000..19fb85a0 --- /dev/null +++ b/betree/scripts/visualize_allocation_log @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 + + +from dataclasses import dataclass +import struct +import sys +import matplotlib.pyplot as plt +import matplotlib.patches as patches +from matplotlib.widgets import Slider +import numpy as np + + +@dataclass +class Config: + num_classes: int + disks_per_class: list[int] + blocks_per_disk: list[list[int]] + blocks_per_segment: int + + +@dataclass +class Timestamp: + time: int + storage_class: int + disk: int + segment: int + + +def parse_header(log_file: str) -> dict: + """Parses the global header of the allocation log file.""" + + with open(log_file, "rb") as f: + num_classes = struct.unpack(" list[dict]: + """Builds a global bitmap representation for each time step.""" + + global_bitmap = [] + + with open(log_file, "rb") as f: + # Skip the global header (already parsed) + f.seek(1 + 2 * config["num_classes"] + 8 * + sum(config["disks_per_class"]) + 8) + + while True: + # Read local header + try: + class_id = struct.unpack(" Date: Mon, 21 Oct 2024 08:26:01 +0200 Subject: [PATCH 06/49] Storage saving binary logging --- betree/scripts/visualize_allocation_log | 389 +++++++++++++++++------- betree/src/data_management/dmu.rs | 20 +- 2 files changed, 285 insertions(+), 124 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index 19fb85a0..1993449e 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -1,32 +1,64 @@ #!/usr/bin/env python3 -from dataclasses import dataclass +from math import floor +import os import struct import sys import matplotlib.pyplot as plt -import matplotlib.patches as patches from matplotlib.widgets import Slider import numpy as np +from tqdm import tqdm + +# Constants to get relevant information from the disk_offset +MASK_STORAGE_CLASS = ((1 << 2) - 1) << (10 + 52) +MASK_DISK_ID = ((1 << 10) - 1) << 52 +MASK_OFFSET = (1 << 52) - 1 +SEGMENT_SIZE_LOG_2 = 18 +SEGMENT_SIZE = 1 << SEGMENT_SIZE_LOG_2 +SEGMENT_SIZE_MASK = SEGMENT_SIZE - 1 +# This is the amount of bytes one (de-)allocation has in the log +SIZE_PER_ALLOCATION = 13 + + +def parse_disk_offset(offset: int) -> (int, int, int): + storage_class = (offset & MASK_STORAGE_CLASS) >> (52 + 10) + disk_id = ((offset & MASK_DISK_ID) >> 52) + block_offset = (offset & MASK_OFFSET) + segment_id = (block_offset & ~SEGMENT_SIZE_MASK) + return storage_class, disk_id, segment_id, block_offset -@dataclass class Config: - num_classes: int - disks_per_class: list[int] + num_storage_ids: int + disks_per_storage_id: list[int] blocks_per_disk: list[list[int]] blocks_per_segment: int + def __init__(self, num_storage_ids: int, + disks_per_storage_id: list[int], + blocks_per_disk: list[list[int]], + blocks_per_segment: int, + ): + self.num_storage_ids = num_storage_ids + self.disks_per_storage_id = disks_per_storage_id + self.blocks_per_disk = blocks_per_disk + self.blocks_per_segment = blocks_per_segment + + def __str__(self) -> str: + return f"num_storage_ids: {self.num_storage_ids}, \ + disks_per_class: {self.disks_per_storage_id}, \ + blocks_per_disk: {self.blocks_per_disk}, \ + blocks_per_segment: {self.blocks_per_segment}" + + def disks_of_storage_id(self, storage_id: int) -> int: + return self.disks_per_storage_id[storage_id] -@dataclass -class Timestamp: - time: int - storage_class: int - disk: int - segment: int + def blocks_of_disk(self, storage_id: int, disk_id: int) -> int: + return self.blocks_per_disk[storage_id][disk_id] -def parse_header(log_file: str) -> dict: +def parse_header(log_file: str) -> Config: """Parses the global header of the allocation log file.""" with open(log_file, "rb") as f: @@ -35,132 +67,253 @@ def parse_header(log_file: str) -> dict: for _ in range(num_classes): disks_per_class.append(struct.unpack(" list[dict]: - """Builds a global bitmap representation for each time step.""" - - global_bitmap = [] - - with open(log_file, "rb") as f: - # Skip the global header (already parsed) - f.seek(1 + 2 * config["num_classes"] + 8 * - sum(config["disks_per_class"]) + 8) - - while True: - # Read local header - try: - class_id = struct.unpack(" int: + """Returns the remaining bytes in a file from the current position of the file pointer.""" + current_position = file_pointer.tell() + file_pointer.seek(0, os.SEEK_END) # Go to the end of the file + end_position = file_pointer.tell() + # Return to the original position + file_pointer.seek(current_position, os.SEEK_SET) + return end_position - current_position + + +class GlobalBitMap: + config: Config + # dict [ (storage, disk, segment), list[(time, bitmap)]] + bitmap: dict[(int, int, int), list[(int, np.ndarray)]] + log_file: str + time: int = 1 + + def __init__(self, log_file: str, config: Config): + self.config = config + self.log_file = log_file + self._initialize_bitmap() + self._build_global_bitmap() + + def __str__(self): + return f"Log_file: {self.log_file}\n" + f"Time: {self.time}\n" + \ + f"Config: {self.config}\n" + + def _initialize_bitmap(self): + self.bitmap = {} + for storage_id in range(self.config.num_storage_ids): + for disk_id in range(self.config.disks_of_storage_id(storage_id)): + used_blocks = 0 + blocks_in_disk = self.config.blocks_of_disk( + storage_id, disk_id) + while (used_blocks < blocks_in_disk): + segment_size = min( + self.config.blocks_per_segment, blocks_in_disk - used_blocks) + + bitmap = np.packbits(np.zeros(segment_size, dtype=bool)) + self.bitmap[(storage_id, disk_id, used_blocks)] = [ + (0, bitmap)] + used_blocks += segment_size + + def _build_global_bitmap(self): + """Builds a global bitmap representation for each time step.""" + with open(self.log_file, "rb") as f: + # Skip the global header (already parsed) + f.seek(1 + 2 * self.config.num_storage_ids + 8 * + sum(self.config.disks_per_storage_id) + 8) + + timesteps = remaining_bytes(f) // SIZE_PER_ALLOCATION + + i = 0 + # while True: + for i in tqdm(range(timesteps), desc="Building bitmap"): + # Read Allocation + try: + op_type = struct.unpack(" np.array: + if time is None or time > self.time: + time = self.time + if time < 0: + raise IndexError("Time has to be non-negative") + + num_blocks = sum(sum(self.config.blocks_per_disk[storage_id]) for storage_id in range( + self.config.num_storage_ids)) + bitmap = np.zeros(num_blocks, dtype=np.uint8) + start = 0 + for storage_id in range(self.config.num_storage_ids): + local_bitmap = self.get_storage(storage_id, time) + bitmap[start:start+len(local_bitmap)] = local_bitmap.copy() + start += len(local_bitmap) + + return bitmap + + # Get the bitmap of a storage id at time. If no time is provided return the + # bitmap of the last timestep. + # If it doesn't match with the config throw an errer + def get_storage(self, storage_id: int, time: int = None) -> np.array: + if time is None or time > self.time: + time = self.time + if time < 0: + raise IndexError("Time has to be non-negative") + if storage_id >= self.config.num_storage_ids or storage_id < 0: + raise IndexError(f"Tried to access storage with id { + storage_id} of 0-{self.config.num_storage_ids - 1}") + if self.config.disks_of_storage_id(storage_id) == 0: + return np.zeros(0) + + num_blocks = sum(self.config.blocks_per_disk[storage_id]) + bitmap = np.zeros(num_blocks, dtype=np.uint8) + start = 0 + for disk_id in range(self.config.disks_of_storage_id(storage_id)): + local_bitmap = self.get_disk(storage_id, disk_id, time) + bitmap[start:start+len(local_bitmap)] = local_bitmap.copy() + start += len(local_bitmap) + + return bitmap + + # Get the bitmap of a disk id at time. If no time is provided return the + # bitmap of the last timestep. + # If it doesn't match with the config throw an errer + def get_disk(self, storage_id: int, disk_id: int, time: int = None) -> np.array: + if time is None or time > self.time: + time = self.time + if time < 0: + raise IndexError("Time has to be non-negative") + if storage_id >= self.config.num_storage_ids or storage_id < 0: + raise IndexError(f"Tried to access storage with id { + storage_id} of 0-{self.config.num_storage_ids - 1}") + if self.config.disks_of_storage_id(storage_id) == 0: + return np.zeros(0) + if disk_id >= self.config.disks_of_storage_id(storage_id) or disk_id < 0: + raise IndexError(f"Tried to access disk with id { + disk_id} of 0-{self.config.disks_of_storage_id(storage_id) - 1}") + + num_blocks = config.blocks_of_disk(storage_id, disk_id) + bitmap = np.zeros(num_blocks, dtype=np.uint8) + blocks_per_segment = self.config.blocks_per_segment + entries = filter(lambda k: k[0] == + storage_id and k[1] == disk_id, self.bitmap) + + for entry in entries: + segment_id = entry[2] + time, bitmap_at_time = self._binary_search_time( + time, entry) + end = min(segment_id + blocks_per_segment, + self.config.blocks_of_disk(entry[0], entry[1])) + + bitmap[segment_id:end] = np.unpackbits(bitmap_at_time) + + return bitmap + + def _binary_search_time(self, time: int, entry: (int, int, int)) -> (int, np.array): + bitmaps = self.bitmap[entry] + left = 0 + right = len(bitmaps) - 1 + result = None + while left <= right: + middle = floor((left + right) / 2) + if bitmaps[middle][0] == time: + return bitmaps[middle] + elif bitmaps[middle][0] < time: + result = bitmaps[middle] + left = middle + 1 + else: + right = middle - 1 + + return result + + def plot(self): + """Plots the bitmap with an interactive slider for timestamp selection.""" + + fig, ax = plt.subplots() + plt.subplots_adjust(bottom=0.25) + + # Initial plot (last timestamp) + bitmap_np = self.get() # Calculate the size of the square size = int(np.ceil(bitmap_np.shape[0] ** 0.5)) - - # Reshape the bitmap to a square bitmap_square = np.pad( bitmap_np, (0, size * size - bitmap_np.shape[0]), "constant" ).reshape(size, size) - im.set_data(bitmap_square) - ax.set_title(f"Class: {class_id}, Disk: { - disk_id}, Segment: {segment_id}") - fig.canvas.draw_idle() + im = ax.imshow(bitmap_square, cmap="gray_r") + + ax.set_title(f"Timestamp: {self.time - 1}") + ax.set_xlabel("Block") + ax.set_ylabel("Block") + ax.set_xticks([]) + ax.set_yticks([]) + + # Create slider + ax_slider = plt.axes([0.25, 0.1, 0.65, 0.03]) + slider = Slider( + ax_slider, + "Timestamp", + 0, + self.time - 1, + valinit=self.time - 1, + valstep=1, + ) - slider.on_changed(update) + def update(val): + timestamp = int(slider.val) + bitmap_np = self.get(timestamp) - plt.show() + # Calculate the size of the square + size = int(np.ceil(bitmap_np.shape[0] ** 0.5)) + bitmap_square = np.pad( + bitmap_np, (0, size * size - bitmap_np.shape[0]), "constant" + ).reshape(size, size) + + im.set_data(bitmap_square) + ax.set_title(f"Timestamp: {timestamp}") + fig.canvas.draw_idle() + + slider.on_changed(update) + + plt.show() if __name__ == "__main__": - # NOTE: This script needs ~10-15 times the memory that the log file has if len(sys.argv) < 2: print("Please provide a file to visualize!") exit(1) log_file = sys.argv[1] config = parse_header(log_file) - global_bitmap = build_global_bitmap(log_file, config) - print(config) - print(len(global_bitmap)) - plot_bitmap(global_bitmap) + global_bitmap = GlobalBitMap(log_file, config) + print(global_bitmap) + + global_bitmap.plot() diff --git a/betree/src/data_management/dmu.rs b/betree/src/data_management/dmu.rs index 636ba752..3fd34304 100644 --- a/betree/src/data_management/dmu.rs +++ b/betree/src/data_management/dmu.rs @@ -93,6 +93,7 @@ where .collect::>() .into_boxed_slice(); + // TODO: make append only let allocation_log_file = Mutex::new( OpenOptions::new() .create(true) @@ -244,6 +245,12 @@ where obj_ptr.offset().disk_id(), obj_ptr.size(), ); + { + let mut file = self.allocation_log_file.lock(); + let _ = file.write_u8(Action::Deallocate.as_bool() as u8); + let _ = file.write_u64::(obj_ptr.offset.as_u64()); + let _ = file.write_u32::(obj_ptr.size.as_u32()); + } if let (CopyOnWriteEvent::Removed, Some(tx), CopyOnWriteReason::Remove) = ( self.handler.copy_on_write( obj_ptr.offset(), @@ -590,12 +597,13 @@ where let mut allocator = bitmap.access(); if let Some(segment_offset) = allocator.allocate(size.as_u32()) { let mut file = self.allocation_log_file.lock(); - // Write local header and bitmap - file.write_u8(class)?; - file.write_u16::(disk_id)?; - file.write_u64::(segment_id.0)?; - allocator.write_bitmap(&mut *file)?; - break segment_id.disk_offset(segment_offset); + let disk_offset = segment_id.disk_offset(segment_offset); + + file.write_u8(Action::Allocate.as_bool() as u8)?; + file.write_u64::(disk_offset.as_u64())?; + file.write_u32::(size.as_u32())?; + + break disk_offset; } let next_segment_id = segment_id.next(disk_size); trace!( From 73a89069c04e1388741be0ab761727d35dafb7f6 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Mon, 21 Oct 2024 17:14:27 +0200 Subject: [PATCH 07/49] faster bitmap getting --- betree/scripts/visualize_allocation_log | 29 +++++++++++-------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index 1993449e..51e21980 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -1,6 +1,7 @@ #!/usr/bin/env python3 +import functools from math import floor import os import struct @@ -97,7 +98,7 @@ class GlobalBitMap: # dict [ (storage, disk, segment), list[(time, bitmap)]] bitmap: dict[(int, int, int), list[(int, np.ndarray)]] log_file: str - time: int = 1 + time: int = 0 def __init__(self, log_file: str, config: Config): self.config = config @@ -137,6 +138,7 @@ class GlobalBitMap: i = 0 # while True: for i in tqdm(range(timesteps), desc="Building bitmap"): + self.time += 1 # Read Allocation try: op_type = struct.unpack(" np.array: + def get_storage(self, output_bitmap: np.array, storage_id: int, time: int = None) -> np.array: if time is None or time > self.time: time = self.time if time < 0: @@ -193,19 +192,18 @@ class GlobalBitMap: return np.zeros(0) num_blocks = sum(self.config.blocks_per_disk[storage_id]) - bitmap = np.zeros(num_blocks, dtype=np.uint8) + # bitmap = np.zeros(num_blocks, dtype=np.uint8) start = 0 for disk_id in range(self.config.disks_of_storage_id(storage_id)): - local_bitmap = self.get_disk(storage_id, disk_id, time) - bitmap[start:start+len(local_bitmap)] = local_bitmap.copy() - start += len(local_bitmap) + start += self.get_disk( + output_bitmap[start:], storage_id, disk_id, time) - return bitmap + return start # Get the bitmap of a disk id at time. If no time is provided return the # bitmap of the last timestep. # If it doesn't match with the config throw an errer - def get_disk(self, storage_id: int, disk_id: int, time: int = None) -> np.array: + def get_disk(self, output_bitmap: np.array, storage_id: int, disk_id: int, time: int = None) -> int: if time is None or time > self.time: time = self.time if time < 0: @@ -219,8 +217,6 @@ class GlobalBitMap: raise IndexError(f"Tried to access disk with id { disk_id} of 0-{self.config.disks_of_storage_id(storage_id) - 1}") - num_blocks = config.blocks_of_disk(storage_id, disk_id) - bitmap = np.zeros(num_blocks, dtype=np.uint8) blocks_per_segment = self.config.blocks_per_segment entries = filter(lambda k: k[0] == storage_id and k[1] == disk_id, self.bitmap) @@ -232,10 +228,11 @@ class GlobalBitMap: end = min(segment_id + blocks_per_segment, self.config.blocks_of_disk(entry[0], entry[1])) - bitmap[segment_id:end] = np.unpackbits(bitmap_at_time) + output_bitmap[segment_id:end] = np.unpackbits(bitmap_at_time) - return bitmap + return end + @functools.lru_cache(1024) def _binary_search_time(self, time: int, entry: (int, int, int)) -> (int, np.array): bitmaps = self.bitmap[entry] left = 0 From cbb50424dad1ba987a671ff346392145fbebf573 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Mon, 21 Oct 2024 23:05:32 +0200 Subject: [PATCH 08/49] minor --- betree/scripts/visualize_allocation_log | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index 51e21980..f3c25b3c 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -180,7 +180,7 @@ class GlobalBitMap: # Get the bitmap of a storage id at time. If no time is provided return the # bitmap of the last timestep. # If it doesn't match with the config throw an errer - def get_storage(self, output_bitmap: np.array, storage_id: int, time: int = None) -> np.array: + def get_storage(self, output_bitmap: np.array, storage_id: int, time: int = None) -> int: if time is None or time > self.time: time = self.time if time < 0: @@ -191,8 +191,6 @@ class GlobalBitMap: if self.config.disks_of_storage_id(storage_id) == 0: return np.zeros(0) - num_blocks = sum(self.config.blocks_per_disk[storage_id]) - # bitmap = np.zeros(num_blocks, dtype=np.uint8) start = 0 for disk_id in range(self.config.disks_of_storage_id(storage_id)): start += self.get_disk( From 1f5e39950286a1df04c8e7f3804594cc0f6411ca Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Wed, 23 Oct 2024 14:36:35 +0200 Subject: [PATCH 09/49] visualize bitmap by storage layer --- betree/scripts/visualize_allocation_log | 124 ++++++++++++++++++++++-- 1 file changed, 118 insertions(+), 6 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index f3c25b3c..c771e570 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -7,7 +7,7 @@ import os import struct import sys import matplotlib.pyplot as plt -from matplotlib.widgets import Slider +from matplotlib.widgets import Slider, CheckButtons import numpy as np from tqdm import tqdm @@ -55,6 +55,9 @@ class Config: def disks_of_storage_id(self, storage_id: int) -> int: return self.disks_per_storage_id[storage_id] + def blocks_of_storage_id(self, storage_id: int) -> int: + return sum(self.blocks_of_disk(storage_id, disk_id) for disk_id in range(self.disks_of_storage_id(storage_id))) + def blocks_of_disk(self, storage_id: int, disk_id: int) -> int: return self.blocks_per_disk[storage_id][disk_id] @@ -138,7 +141,6 @@ class GlobalBitMap: i = 0 # while True: for i in tqdm(range(timesteps), desc="Building bitmap"): - self.time += 1 # Read Allocation try: op_type = struct.unpack(" np.array: + num_blocks = self.config.blocks_of_storage_id(storage_id) + bitmap = np.zeros(num_blocks, dtype=np.uint8) + self.get_storage(bitmap, storage_id, time) + return bitmap + # Get the bitmap of a disk id at time. If no time is provided return the # bitmap of the last timestep. # If it doesn't match with the config throw an errer + def get_disk(self, output_bitmap: np.array, storage_id: int, disk_id: int, time: int = None) -> int: if time is None or time > self.time: time = self.time @@ -230,6 +241,12 @@ class GlobalBitMap: return end + def get_disk_bitmap(self, storage_id: int, disk_id: int, time: int = None) -> np.array: + num_blocks = self.config.blocks_of_disk(storage_id, disk_id) + bitmap = np.zeros(num_blocks, dtype=np.uint8) + self.get_disk(bitmap, storage_id, disk_id, time) + return bitmap + @functools.lru_cache(1024) def _binary_search_time(self, time: int, entry: (int, int, int)) -> (int, np.array): bitmaps = self.bitmap[entry] @@ -248,7 +265,7 @@ class GlobalBitMap: return result - def plot(self): + def plot_simple(self): """Plots the bitmap with an interactive slider for timestamp selection.""" fig, ax = plt.subplots() @@ -262,9 +279,7 @@ class GlobalBitMap: bitmap_square = np.pad( bitmap_np, (0, size * size - bitmap_np.shape[0]), "constant" ).reshape(size, size) - - im = ax.imshow(bitmap_square, cmap="gray_r") - + im = ax.imshow(bitmap_square, cmap="gray_r", interpolation=None) ax.set_title(f"Timestamp: {self.time - 1}") ax.set_xlabel("Block") ax.set_ylabel("Block") @@ -297,10 +312,106 @@ class GlobalBitMap: fig.canvas.draw_idle() slider.on_changed(update) + plt.show() + + def plot(self): + """Plots the bitmap with an interactive slider for timestamp selection and checkboxes for storage toggling.""" + form = [ + ["storage_0", "storage_1", "storage_2", "storage_3"], + ["frag_0", "frag_1", "frag_2", "frag_3"], + ["frag_global", "frag_global", "frag_global", "frag_global"], + ["slider", "slider", "slider", "slider"], + ] + gs_kw = dict(width_ratios=[1, 1, 1, 1], height_ratios=[3, 1, 1, 0.03]) + fig, axd = plt.subplot_mosaic( + form, gridspec_kw=gs_kw, layout="constrained") + + ims = {} + # Initial plot (last timestamp) + for storage_id in range(self.config.num_storage_ids): + name = f"storage_{storage_id}" + storage_bitmap = self.get_storage_bitmap(storage_id) + resized_bitmap = reshape_to_axes(storage_bitmap, axd[name], fig) + + ims[name] = {} + ims[name]["bm"] = storage_bitmap + ims[name]["im"] = axd[name].imshow( + resized_bitmap, cmap="gray_r", aspect="auto", interpolation=None) + axd[name].set_title(f"Storage {storage_id}") + axd[name].set_xlabel("Block") + axd[name].set_ylabel("Block") + axd[name].set_xticks([]) + axd[name].set_yticks([]) + + slider = Slider( + axd["slider"], + "Timestamp", + 0, + self.time - 1, + valinit=self.time - 1, + valstep=1, + ) + + def update(val): + timestamp = int(slider.val) + for storage_id in range(self.config.num_storage_ids): + name = f"storage_{storage_id}" + self.get_storage(ims[name]["bm"], storage_id, timestamp) + resized_bitmap = reshape_to_axes( + ims[name]["bm"], axd[name], fig) + + ims[name]["im"].set_data(resized_bitmap) + + fig.canvas.draw_idle() + # identify_axes(axd) + slider.on_changed(update) plt.show() +def reshape_to_axes(arr_1d, ax, fig): + """ + Reshapes a 1D NumPy array to a 2D array with dimensions close to the + aspect ratio of the given Matplotlib axes. + + Args: + arr_1d: The 1D NumPy array to reshape. + ax: The Matplotlib axes object. + fig: The Matplotlib figure object. + + Returns: + A 2D NumPy array with dimensions close to the axes aspect ratio. + """ + + # Get axes dimensions in inches + bbox = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted()) + width, height = bbox.width, bbox.height + + # Calculate target aspect ratio + target_aspect = width / height + + # Calculate the ideal number of columns for the target aspect ratio + total_pixels = len(arr_1d) + cols = int(np.sqrt(total_pixels * target_aspect)) + + # Adjust columns to find the closest aspect ratio while using all pixels + rows = total_pixels // cols + while rows * cols != total_pixels: + cols -= 1 + rows = total_pixels // cols + + # Reshape the 1D array to the calculated dimensions + arr_2d = arr_1d.reshape(rows, cols) + + return arr_2d + + +def identify_axes(ax_dict, fontsize=24): + kw = dict(ha="center", va="center", fontsize=fontsize, color="darkgrey") + for k, ax in ax_dict.items(): + ax.text(0.5, 0.5, k, transform=ax.transAxes, **kw) + + if __name__ == "__main__": if len(sys.argv) < 2: print("Please provide a file to visualize!") @@ -311,4 +422,5 @@ if __name__ == "__main__": global_bitmap = GlobalBitMap(log_file, config) print(global_bitmap) + global_bitmap.plot_simple() global_bitmap.plot() From a61c46ae2e20832a6a5f02d96f2af622d17aeb1f Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Wed, 23 Oct 2024 14:53:13 +0200 Subject: [PATCH 10/49] Speed up plotting by precalculating things --- betree/scripts/visualize_allocation_log | 39 +++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index c771e570..2289d72b 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -342,6 +342,10 @@ class GlobalBitMap: axd[name].set_ylabel("Block") axd[name].set_xticks([]) axd[name].set_yticks([]) + bbox = axd[name].get_window_extent().transformed( + fig.dpi_scale_trans.inverted()) + ims[name]["width"] = bbox.width + ims[name]["height"] = bbox.height slider = Slider( axd["slider"], @@ -357,8 +361,8 @@ class GlobalBitMap: for storage_id in range(self.config.num_storage_ids): name = f"storage_{storage_id}" self.get_storage(ims[name]["bm"], storage_id, timestamp) - resized_bitmap = reshape_to_axes( - ims[name]["bm"], axd[name], fig) + resized_bitmap = reshape_to_close_aspect( + ims[name]["bm"], ims[name]["width"], ims[name]["height"]) ims[name]["im"].set_data(resized_bitmap) @@ -406,6 +410,37 @@ def reshape_to_axes(arr_1d, ax, fig): return arr_2d +def reshape_to_close_aspect(arr_1d, width, height): + """ + Reshapes a 1D NumPy array to a 2D array with dimensions close to the + aspect ratio of the given width and height + """ + rows, cols = get_close_aspect(width, height, len(arr_1d)) + + # Reshape the 1D array to the calculated dimensions + arr_2d = arr_1d.reshape(rows, cols) + + return arr_2d + + +@functools.lru_cache(128) +def get_close_aspect(width, height, len) -> (int, int): + # Calculate target aspect ratio + target_aspect = width / height + + # Calculate the ideal number of columns for the target aspect ratio + total_pixels = len + cols = int(np.sqrt(total_pixels * target_aspect)) + + # Adjust columns to find the closest aspect ratio while using all pixels + rows = total_pixels // cols + while rows * cols != total_pixels: + cols -= 1 + rows = total_pixels // cols + + return rows, cols + + def identify_axes(ax_dict, fontsize=24): kw = dict(ha="center", va="center", fontsize=fontsize, color="darkgrey") for k, ax in ax_dict.items(): From 58753f6d8965537bd6bc772097adc9eb9a71e8a8 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Thu, 24 Oct 2024 08:27:30 +0200 Subject: [PATCH 11/49] New get with output parameter, initial timestep is 1 --- betree/scripts/visualize_allocation_log | 70 +++++++++++++------------ 1 file changed, 37 insertions(+), 33 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index 2289d72b..15ca4b6b 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -166,18 +166,23 @@ class GlobalBitMap: # Get the global bitmap at time. If no time is provided return the bitmap # of the last timestep. - def get(self, time: int = None) -> np.array: + def get(self, output_bitmap: np.array, time: int = None) -> int: if time is None or time > self.time: time = self.time if time < 0: raise IndexError("Time has to be non-negative") + start = 0 + for storage_id in range(self.config.num_storage_ids): + start += self.get_storage(output_bitmap[start:], storage_id, time) + + return start + + def get_bitmap(self, time: int = None) -> np.array: num_blocks = sum(sum(self.config.blocks_per_disk[storage_id]) for storage_id in range( self.config.num_storage_ids)) bitmap = np.zeros(num_blocks, dtype=np.uint8) - start = 0 - for storage_id in range(self.config.num_storage_ids): - start += self.get_storage(bitmap[start:], storage_id, time) + self.get(bitmap, time) return bitmap @@ -271,16 +276,18 @@ class GlobalBitMap: fig, ax = plt.subplots() plt.subplots_adjust(bottom=0.25) - # Initial plot (last timestamp) - bitmap_np = self.get() + # Initial plot + bitmap_np = self.get_bitmap(1) # Calculate the size of the square size = int(np.ceil(bitmap_np.shape[0] ** 0.5)) - bitmap_square = np.pad( + padded_bitmap = np.pad( bitmap_np, (0, size * size - bitmap_np.shape[0]), "constant" - ).reshape(size, size) - im = ax.imshow(bitmap_square, cmap="gray_r", interpolation=None) - ax.set_title(f"Timestamp: {self.time - 1}") + ) + bitmap_square = padded_bitmap.reshape(size, size) + im = ax.imshow(bitmap_square, cmap="gray_r", + interpolation=None, animated=True) + ax.set_title(f"Global Storage Bitmap") ax.set_xlabel("Block") ax.set_ylabel("Block") ax.set_xticks([]) @@ -293,22 +300,16 @@ class GlobalBitMap: "Timestamp", 0, self.time - 1, - valinit=self.time - 1, + valinit=1, valstep=1, ) def update(val): timestamp = int(slider.val) - bitmap_np = self.get(timestamp) - - # Calculate the size of the square - size = int(np.ceil(bitmap_np.shape[0] ** 0.5)) - bitmap_square = np.pad( - bitmap_np, (0, size * size - bitmap_np.shape[0]), "constant" - ).reshape(size, size) + self.get(padded_bitmap, timestamp) + bitmap_square = padded_bitmap.reshape(size, size) im.set_data(bitmap_square) - ax.set_title(f"Timestamp: {timestamp}") fig.canvas.draw_idle() slider.on_changed(update) @@ -326,16 +327,19 @@ class GlobalBitMap: fig, axd = plt.subplot_mosaic( form, gridspec_kw=gs_kw, layout="constrained") - ims = {} - # Initial plot (last timestamp) + ims = [] + # Initial plot for storage_id in range(self.config.num_storage_ids): name = f"storage_{storage_id}" + # NOTE: We first have to plot a later timestamp to initialize the size of the axes else + # the bitmaps of other storages aren't correctly drawn. We manually set the starting + # time to 1 shortly before plotting. storage_bitmap = self.get_storage_bitmap(storage_id) resized_bitmap = reshape_to_axes(storage_bitmap, axd[name], fig) - ims[name] = {} - ims[name]["bm"] = storage_bitmap - ims[name]["im"] = axd[name].imshow( + ims.append({}) + ims[storage_id]["bm"] = storage_bitmap + ims[storage_id]["im"] = axd[name].imshow( resized_bitmap, cmap="gray_r", aspect="auto", interpolation=None) axd[name].set_title(f"Storage {storage_id}") axd[name].set_xlabel("Block") @@ -344,8 +348,8 @@ class GlobalBitMap: axd[name].set_yticks([]) bbox = axd[name].get_window_extent().transformed( fig.dpi_scale_trans.inverted()) - ims[name]["width"] = bbox.width - ims[name]["height"] = bbox.height + ims[storage_id]["width"] = bbox.width + ims[storage_id]["height"] = bbox.height slider = Slider( axd["slider"], @@ -357,19 +361,19 @@ class GlobalBitMap: ) def update(val): - timestamp = int(slider.val) + timestep = int(slider.val) + for storage_id in range(self.config.num_storage_ids): - name = f"storage_{storage_id}" - self.get_storage(ims[name]["bm"], storage_id, timestamp) + self.get_storage(ims[storage_id]["bm"], storage_id, timestep) resized_bitmap = reshape_to_close_aspect( - ims[name]["bm"], ims[name]["width"], ims[name]["height"]) + ims[storage_id]["bm"], ims[storage_id]["width"], ims[storage_id]["height"]) - ims[name]["im"].set_data(resized_bitmap) + ims[storage_id]["im"].set_data(resized_bitmap) fig.canvas.draw_idle() - # identify_axes(axd) slider.on_changed(update) + slider.set_val(1) plt.show() @@ -423,7 +427,7 @@ def reshape_to_close_aspect(arr_1d, width, height): return arr_2d -@functools.lru_cache(128) +@ functools.lru_cache(128) def get_close_aspect(width, height, len) -> (int, int): # Calculate target aspect ratio target_aspect = width / height From 98ade185a67c3577e516ea8a7cd2caa7546d5aa8 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Thu, 24 Oct 2024 08:28:49 +0200 Subject: [PATCH 12/49] Experimented with unpacked storage --- betree/scripts/visualize_allocation_log | 3 +++ 1 file changed, 3 insertions(+) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index 15ca4b6b..6afed0fa 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -98,6 +98,9 @@ def remaining_bytes(file_pointer) -> int: class GlobalBitMap: config: Config + # NOTE: the bitmap is stored as packed bits and has to be unpacked for accurate plotting, + # based on some experimentation storing unpacked (and using 8x more memory) isn't worth it for + # faster plotting # dict [ (storage, disk, segment), list[(time, bitmap)]] bitmap: dict[(int, int, int), list[(int, np.ndarray)]] log_file: str From b172a39f9ab7b9cd02ec81739fba1967b57a465f Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Thu, 24 Oct 2024 09:24:58 +0200 Subject: [PATCH 13/49] Enforce max timestep --- betree/scripts/visualize_allocation_log | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index 6afed0fa..8afa5213 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -289,8 +289,8 @@ class GlobalBitMap: ) bitmap_square = padded_bitmap.reshape(size, size) im = ax.imshow(bitmap_square, cmap="gray_r", - interpolation=None, animated=True) - ax.set_title(f"Global Storage Bitmap") + interpolation=None) + ax.set_title("Global Storage Bitmap") ax.set_xlabel("Block") ax.set_ylabel("Block") ax.set_xticks([]) @@ -309,6 +309,10 @@ class GlobalBitMap: def update(val): timestamp = int(slider.val) + if timestamp > self.time - 1: + slider.set_val(self.time - 1) + return + self.get(padded_bitmap, timestamp) bitmap_square = padded_bitmap.reshape(size, size) From 69b88904881a0b051bcf01833a97ea5c121ce0b2 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Thu, 24 Oct 2024 09:30:26 +0200 Subject: [PATCH 14/49] Remove simple plot --- betree/scripts/visualize_allocation_log | 49 ------------------------- 1 file changed, 49 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index 8afa5213..c0a7a7b5 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -273,55 +273,6 @@ class GlobalBitMap: return result - def plot_simple(self): - """Plots the bitmap with an interactive slider for timestamp selection.""" - - fig, ax = plt.subplots() - plt.subplots_adjust(bottom=0.25) - - # Initial plot - bitmap_np = self.get_bitmap(1) - - # Calculate the size of the square - size = int(np.ceil(bitmap_np.shape[0] ** 0.5)) - padded_bitmap = np.pad( - bitmap_np, (0, size * size - bitmap_np.shape[0]), "constant" - ) - bitmap_square = padded_bitmap.reshape(size, size) - im = ax.imshow(bitmap_square, cmap="gray_r", - interpolation=None) - ax.set_title("Global Storage Bitmap") - ax.set_xlabel("Block") - ax.set_ylabel("Block") - ax.set_xticks([]) - ax.set_yticks([]) - - # Create slider - ax_slider = plt.axes([0.25, 0.1, 0.65, 0.03]) - slider = Slider( - ax_slider, - "Timestamp", - 0, - self.time - 1, - valinit=1, - valstep=1, - ) - - def update(val): - timestamp = int(slider.val) - if timestamp > self.time - 1: - slider.set_val(self.time - 1) - return - - self.get(padded_bitmap, timestamp) - bitmap_square = padded_bitmap.reshape(size, size) - - im.set_data(bitmap_square) - fig.canvas.draw_idle() - - slider.on_changed(update) - plt.show() - def plot(self): """Plots the bitmap with an interactive slider for timestamp selection and checkboxes for storage toggling.""" form = [ From 667fd52bbf9a0894176e5aa02cdf321511854578 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Thu, 24 Oct 2024 10:23:22 +0200 Subject: [PATCH 15/49] visual improvements --- betree/scripts/visualize_allocation_log | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index c0a7a7b5..59d3eac1 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -276,14 +276,19 @@ class GlobalBitMap: def plot(self): """Plots the bitmap with an interactive slider for timestamp selection and checkboxes for storage toggling.""" form = [ - ["storage_0", "storage_1", "storage_2", "storage_3"], - ["frag_0", "frag_1", "frag_2", "frag_3"], - ["frag_global", "frag_global", "frag_global", "frag_global"], - ["slider", "slider", "slider", "slider"], + [".", ".", ".", ".", ".", "."], + [".", "storage_0", "storage_1", "storage_2", "storage_3", "."], + [".", "frag_0", "frag_1", "frag_2", "frag_3", "."], + [".", "frag_global", "frag_global", "frag_global", "frag_global", "."], + [".", "slider", "slider", "slider", "slider", "."], + [".", ".", ".", ".", ".", "."], ] - gs_kw = dict(width_ratios=[1, 1, 1, 1], height_ratios=[3, 1, 1, 0.03]) + gs_kw = dict(width_ratios=[0.03, 1, 1, 1, 1, 0.03], height_ratios=[ + 0.03, 3, 1, 1, 0.1, 0.03]) fig, axd = plt.subplot_mosaic( form, gridspec_kw=gs_kw, layout="constrained") + fig.set_size_inches(16, 9) + fig.set_dpi(1920/16) ims = [] # Initial plot @@ -309,17 +314,24 @@ class GlobalBitMap: ims[storage_id]["width"] = bbox.width ims[storage_id]["height"] = bbox.height + # Create the format specifier with appropriate spacing to prevent moving of axes + max_digits = len(str(self.time - 1)) + valfmt = f"%{max_digits}d" slider = Slider( axd["slider"], - "Timestamp", + "", 0, self.time - 1, valinit=self.time - 1, valstep=1, + valfmt=valfmt, ) def update(val): timestep = int(slider.val) + if timestep > self.time - 1: + slider.set_val(self.time - 1) + return for storage_id in range(self.config.num_storage_ids): self.get_storage(ims[storage_id]["bm"], storage_id, timestep) @@ -419,5 +431,4 @@ if __name__ == "__main__": global_bitmap = GlobalBitMap(log_file, config) print(global_bitmap) - global_bitmap.plot_simple() global_bitmap.plot() From c9a222595411c04b14cbbfe7b15f37fbd6b10a16 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Thu, 24 Oct 2024 10:53:42 +0200 Subject: [PATCH 16/49] First export to mp4 --- betree/scripts/visualize_allocation_log | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index 59d3eac1..22a0f277 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -1,13 +1,13 @@ #!/usr/bin/env python3 - import functools from math import floor import os import struct import sys import matplotlib.pyplot as plt -from matplotlib.widgets import Slider, CheckButtons +from matplotlib.animation import FFMpegWriter +from matplotlib.widgets import Slider import numpy as np from tqdm import tqdm @@ -219,7 +219,6 @@ class GlobalBitMap: # Get the bitmap of a disk id at time. If no time is provided return the # bitmap of the last timestep. # If it doesn't match with the config throw an errer - def get_disk(self, output_bitmap: np.array, storage_id: int, disk_id: int, time: int = None) -> int: if time is None or time > self.time: time = self.time @@ -343,6 +342,15 @@ class GlobalBitMap: fig.canvas.draw_idle() slider.on_changed(update) + + slider.set_val(1) + writer = FFMpegWriter(fps=60, bitrate=1800) + writer.setup(fig, "test.mp4", dpi=100) + for i in tqdm(range(self.time)): + slider.set_val(slider.val + 1) + writer.grab_frame() + writer.finish() + slider.set_val(1) plt.show() From 25ae21a36d15e3e83cc8b2ee80d5b80854bc9fab Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Thu, 24 Oct 2024 11:46:55 +0200 Subject: [PATCH 17/49] minor --- betree/scripts/visualize_allocation_log | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index 22a0f277..00c42629 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -113,7 +113,7 @@ class GlobalBitMap: self._build_global_bitmap() def __str__(self): - return f"Log_file: {self.log_file}\n" + f"Time: {self.time}\n" + \ + return f"Log_file: {self.log_file}\nTime: {self.time}\n" + \ f"Config: {self.config}\n" def _initialize_bitmap(self): @@ -201,7 +201,7 @@ class GlobalBitMap: raise IndexError(f"Tried to access storage with id { storage_id} of 0-{self.config.num_storage_ids - 1}") if self.config.disks_of_storage_id(storage_id) == 0: - return np.zeros(0) + return 0 start = 0 for disk_id in range(self.config.disks_of_storage_id(storage_id)): @@ -228,7 +228,7 @@ class GlobalBitMap: raise IndexError(f"Tried to access storage with id { storage_id} of 0-{self.config.num_storage_ids - 1}") if self.config.disks_of_storage_id(storage_id) == 0: - return np.zeros(0) + return 0 if disk_id >= self.config.disks_of_storage_id(storage_id) or disk_id < 0: raise IndexError(f"Tried to access disk with id { disk_id} of 0-{self.config.disks_of_storage_id(storage_id) - 1}") From 199654a2b6060aac26682fd9a170d68b1880f12a Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Thu, 24 Oct 2024 14:53:52 +0200 Subject: [PATCH 18/49] Color disks seperately --- betree/scripts/visualize_allocation_log | 79 ++++++++++++++++++++----- 1 file changed, 63 insertions(+), 16 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index 00c42629..c47644d3 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -5,6 +5,7 @@ from math import floor import os import struct import sys + import matplotlib.pyplot as plt from matplotlib.animation import FFMpegWriter from matplotlib.widgets import Slider @@ -105,16 +106,18 @@ class GlobalBitMap: bitmap: dict[(int, int, int), list[(int, np.ndarray)]] log_file: str time: int = 0 + disk_begins: list[list] = [] def __init__(self, log_file: str, config: Config): self.config = config self.log_file = log_file self._initialize_bitmap() + self._initialize_begins() self._build_global_bitmap() def __str__(self): return f"Log_file: {self.log_file}\nTime: {self.time}\n" + \ - f"Config: {self.config}\n" + f"Config: {self.config}\nDisk_begins: {self.disk_begins}\n" def _initialize_bitmap(self): self.bitmap = {} @@ -132,6 +135,14 @@ class GlobalBitMap: (0, bitmap)] used_blocks += segment_size + def _initialize_begins(self): + for storage_id in range(self.config.num_storage_ids): + self.disk_begins.append([]) + offset = 0 + for disk_id in range(self.config.disks_of_storage_id(storage_id)): + self.disk_begins[storage_id].append(offset) + offset += self.config.blocks_of_disk(storage_id, disk_id) + def _build_global_bitmap(self): """Builds a global bitmap representation for each time step.""" with open(self.log_file, "rb") as f: @@ -297,12 +308,15 @@ class GlobalBitMap: # the bitmaps of other storages aren't correctly drawn. We manually set the starting # time to 1 shortly before plotting. storage_bitmap = self.get_storage_bitmap(storage_id) - resized_bitmap = reshape_to_axes(storage_bitmap, axd[name], fig) + colored_bitmap = self._color_disks(storage_bitmap, storage_id) + resized_bitmap = reshape_to_axes( + colored_bitmap, axd[name], fig) ims.append({}) - ims[storage_id]["bm"] = storage_bitmap + ims[storage_id]["storage_bitmap"] = storage_bitmap + ims[storage_id]["resized_bitmap"] = resized_bitmap ims[storage_id]["im"] = axd[name].imshow( - resized_bitmap, cmap="gray_r", aspect="auto", interpolation=None) + resized_bitmap, aspect="auto", interpolation=None) axd[name].set_title(f"Storage {storage_id}") axd[name].set_xlabel("Block") axd[name].set_ylabel("Block") @@ -333,11 +347,14 @@ class GlobalBitMap: return for storage_id in range(self.config.num_storage_ids): - self.get_storage(ims[storage_id]["bm"], storage_id, timestep) + self.get_storage( + ims[storage_id]["storage_bitmap"], storage_id, timestep) resized_bitmap = reshape_to_close_aspect( - ims[storage_id]["bm"], ims[storage_id]["width"], ims[storage_id]["height"]) + ims[storage_id]["storage_bitmap"], ims[storage_id]["width"], ims[storage_id]["height"]) + ims[storage_id]["resized_bitmap"][:, :, 3] = resized_bitmap * 255 - ims[storage_id]["im"].set_data(resized_bitmap) + ims[storage_id]["im"].set_data( + ims[storage_id]["resized_bitmap"]) fig.canvas.draw_idle() @@ -354,11 +371,43 @@ class GlobalBitMap: slider.set_val(1) plt.show() + def _color_disks(self, storage_bitmap: np.array, storage_id: int) -> np.ndarray: + """Colors the disks within a storage differently.""" + colored_bitmap = np.zeros((len(storage_bitmap), 4), dtype=np.uint8) + colored_bitmap[:, 3] = storage_bitmap + for disk_id in range(self.config.disks_of_storage_id(storage_id)): + start = self.disk_begins[storage_id][disk_id] + end = start + self.config.blocks_of_disk(storage_id, disk_id) + length = end - start + + color = id_to_color(disk_id) + color_array = np.tile(color, (length, 1)) + + colored_bitmap[start:end, 0:3] = color_array + + return colored_bitmap + -def reshape_to_axes(arr_1d, ax, fig): +color_mapping = [ + (0, 0, 0), + (0, 0, 255), + (0, 255, 0), + (0, 255, 255), + (255, 0, 0), + (255, 0, 255), + (255, 255, 0), + (255, 255, 255), +] + + +def id_to_color(id: int) -> (int, int, int): + return color_mapping[id % len(color_mapping)] + + +def reshape_to_axes(arr, ax, fig): """ - Reshapes a 1D NumPy array to a 2D array with dimensions close to the - aspect ratio of the given Matplotlib axes. + Reshapes the first dimension of a NumPy array to a array with the first two + dimensions close to the aspect ratio of the given Matplotlib axes. Args: arr_1d: The 1D NumPy array to reshape. @@ -377,7 +426,7 @@ def reshape_to_axes(arr_1d, ax, fig): target_aspect = width / height # Calculate the ideal number of columns for the target aspect ratio - total_pixels = len(arr_1d) + total_pixels = len(arr) cols = int(np.sqrt(total_pixels * target_aspect)) # Adjust columns to find the closest aspect ratio while using all pixels @@ -386,10 +435,8 @@ def reshape_to_axes(arr_1d, ax, fig): cols -= 1 rows = total_pixels // cols - # Reshape the 1D array to the calculated dimensions - arr_2d = arr_1d.reshape(rows, cols) - - return arr_2d + # Reshape the array to the calculated dimensions + return arr.reshape(rows, cols, 4) def reshape_to_close_aspect(arr_1d, width, height): @@ -405,7 +452,7 @@ def reshape_to_close_aspect(arr_1d, width, height): return arr_2d -@ functools.lru_cache(128) +@functools.lru_cache(128) def get_close_aspect(width, height, len) -> (int, int): # Calculate target aspect ratio target_aspect = width / height From f50ee7728218b4051e328b474dc00f73eaa0a5be Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Fri, 25 Oct 2024 08:55:44 +0200 Subject: [PATCH 19/49] Validation helpers --- betree/scripts/visualize_allocation_log | 49 +++++++++++++------------ 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index c47644d3..7d7a8404 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -62,6 +62,20 @@ class Config: def blocks_of_disk(self, storage_id: int, disk_id: int) -> int: return self.blocks_per_disk[storage_id][disk_id] + def is_valid_storage(self, storage_id) -> bool: + if storage_id >= self.num_storage_ids or storage_id < 0: + return False + return True + + def is_valid_disk(self, storage_id, disk_id) -> bool: + if not self.is_valid_storage(storage_id): + return False + + if disk_id >= self.disks_of_storage_id(storage_id) or disk_id < 0: + return False + + return True + def parse_header(log_file: str) -> Config: """Parses the global header of the allocation log file.""" @@ -178,13 +192,17 @@ class GlobalBitMap: self.bitmap[(storage_id, disk_id, segment_id) ].append((self.time, new_bitmap)) + def _get_valid_time(self, time: int = None) -> int: + if time is None or time > self.time: + return self.time + if time < 0: + return 0 + return time + # Get the global bitmap at time. If no time is provided return the bitmap # of the last timestep. def get(self, output_bitmap: np.array, time: int = None) -> int: - if time is None or time > self.time: - time = self.time - if time < 0: - raise IndexError("Time has to be non-negative") + time = self._get_valid_time(time) start = 0 for storage_id in range(self.config.num_storage_ids): @@ -204,14 +222,8 @@ class GlobalBitMap: # bitmap of the last timestep. # If it doesn't match with the config throw an errer def get_storage(self, output_bitmap: np.array, storage_id: int, time: int = None) -> int: - if time is None or time > self.time: - time = self.time - if time < 0: - raise IndexError("Time has to be non-negative") - if storage_id >= self.config.num_storage_ids or storage_id < 0: - raise IndexError(f"Tried to access storage with id { - storage_id} of 0-{self.config.num_storage_ids - 1}") - if self.config.disks_of_storage_id(storage_id) == 0: + time = self._get_valid_time(time) + if not config.is_valid_storage(storage_id): return 0 start = 0 @@ -231,18 +243,9 @@ class GlobalBitMap: # bitmap of the last timestep. # If it doesn't match with the config throw an errer def get_disk(self, output_bitmap: np.array, storage_id: int, disk_id: int, time: int = None) -> int: - if time is None or time > self.time: - time = self.time - if time < 0: - raise IndexError("Time has to be non-negative") - if storage_id >= self.config.num_storage_ids or storage_id < 0: - raise IndexError(f"Tried to access storage with id { - storage_id} of 0-{self.config.num_storage_ids - 1}") - if self.config.disks_of_storage_id(storage_id) == 0: + time = self._get_valid_time(time) + if not self.config.is_valid_disk(storage_id, disk_id): return 0 - if disk_id >= self.config.disks_of_storage_id(storage_id) or disk_id < 0: - raise IndexError(f"Tried to access disk with id { - disk_id} of 0-{self.config.disks_of_storage_id(storage_id) - 1}") blocks_per_segment = self.config.blocks_per_segment entries = filter(lambda k: k[0] == From 6c4b8249ba2078eeb75ed2378bc5f9741601f32a Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Fri, 25 Oct 2024 10:07:12 +0200 Subject: [PATCH 20/49] minor --- betree/scripts/visualize_allocation_log | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index 7d7a8404..37105d9d 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -166,9 +166,7 @@ class GlobalBitMap: timesteps = remaining_bytes(f) // SIZE_PER_ALLOCATION - i = 0 - # while True: - for i in tqdm(range(timesteps), desc="Building bitmap"): + for _ in tqdm(range(timesteps), desc="Building Global Bitmap"): # Read Allocation try: op_type = struct.unpack(" Date: Fri, 25 Oct 2024 10:08:46 +0200 Subject: [PATCH 21/49] Calculate and Plot fragmentation --- betree/scripts/visualize_allocation_log | 142 ++++++++++++++++++++++++ 1 file changed, 142 insertions(+) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index 37105d9d..e4a1b079 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -121,6 +121,10 @@ class GlobalBitMap: log_file: str time: int = 0 disk_begins: list[list] = [] + # storage_id[timestep[(frag, total_free, largest_free)]] + storage_frag: list[list[(float, int, int)]] = [] + # timestep[(frag, total_free, largest_free)] + global_frag: list[(float, int, int)] = [] def __init__(self, log_file: str, config: Config): self.config = config @@ -128,6 +132,8 @@ class GlobalBitMap: self._initialize_bitmap() self._initialize_begins() self._build_global_bitmap() + self._initialize_frag() + self._build_fragmentation() def __str__(self): return f"Log_file: {self.log_file}\nTime: {self.time}\n" + \ @@ -197,6 +203,82 @@ class GlobalBitMap: return 0 return time + def _initialize_frag(self): + for storage_id in range(self.config.num_storage_ids): + largest_size = 0 + total_size = 0 + for disk_id in range(self.config.disks_of_storage_id(storage_id)): + blocks = self.config.blocks_of_disk(storage_id, disk_id) + largest_size = max(largest_size, blocks) + total_size += blocks + + self.storage_frag.append([]) + self.storage_frag[storage_id].append( + (calculate_fragmentation(largest_size, total_size), total_size, largest_size)) + + def _build_fragmentation(self): + for timestep in tqdm(range(1, self.time), "Calculating Fragmentation Data"): + largest_free = 0 + total_free = 0 + for storage_id in range(self.config.num_storage_ids): + frag, total, largest = self.calculate_fragmentation_storage( + storage_id, timestep) + self.storage_frag[storage_id].append((frag, total, largest)) + largest_free = max(largest, largest_free) + total_free += total + + self.global_frag.append((calculate_fragmentation( + total_free, largest_free), total_free, largest_free)) + + def calculate_fragmentation(self, time: int = None) -> (float, int, int): + time = self._get_valid_time(time) + + largest_free = 0 + total_free = 0 + for storage_id in range(self.config.num_storage_ids): + _, total, largest = self.calculate_fragmentation_storage( + storage_id, time) + largest_free = max(largest, largest_free) + total_free += total + + return calculate_fragmentation(total_free, largest_free), total_free, largest_free + + def calculate_fragmentation_storage(self, storage_id, time: int = None) -> (float, int, int): + time = self._get_valid_time(time) + if not self.config.is_valid_storage(storage_id): + return (0.0, 0, 0) + + largest_free = 0 + total_free = 0 + for disk_id in range(self.config.disks_of_storage_id(storage_id)): + _, total, largest = self.calculate_fragmentation_disk( + storage_id, disk_id, time) + largest_free = max(largest, largest_free) + total_free += total + + return calculate_fragmentation(total_free, largest_free), total_free, largest_free + + def calculate_fragmentation_disk(self, storage_id, disk_id, time: int = None) -> (float, int, int): + time = self._get_valid_time(time) + if not self.config.is_valid_disk(storage_id, disk_id): + return (0.0, 0, 0) + + entries = filter(lambda k: k[0] == + storage_id and k[1] == disk_id, self.bitmap) + + largest_free = 0 + total_free = 0 + for entry in entries: + time, bitmap_at_time = self._binary_search_time( + time, entry) + + segment_bitmap = np.unpackbits(bitmap_at_time) + _, total, largest = fragmentation_of_bitmap(segment_bitmap) + largest_free = max(largest, largest_free) + total_free += total + + return calculate_fragmentation(total_free, largest_free), total_free, largest_free + # Get the global bitmap at time. If no time is provided return the bitmap # of the last timestep. def get(self, output_bitmap: np.array, time: int = None) -> int: @@ -301,6 +383,8 @@ class GlobalBitMap: fig.set_size_inches(16, 9) fig.set_dpi(1920/16) + self._plot_fragmentation(axd) + ims = [] # Initial plot for storage_id in range(self.config.num_storage_ids): @@ -372,6 +456,28 @@ class GlobalBitMap: slider.set_val(1) plt.show() + def _plot_fragmentation(self, axd): + """Plots the fragmentation data.""" + for storage_id in range(self.config.num_storage_ids): + # Extract fragmentation values from storage_frag + frag_values = [frag for frag, _, + _ in self.storage_frag[storage_id]] + axd[f"frag_{storage_id}"].plot(frag_values) + axd[f"frag_{storage_id}"].set_xlim([0, self.time - 1]) + axd[f"frag_{storage_id}"].set_ylim([0, 1]) + axd[f"frag_{storage_id}"].set_xlabel("Timestamp") + if storage_id == 0: + axd[f"frag_{storage_id}"].set_ylabel("Fragmentation") + + # Extract fragmentation values from global_frag + frag_values = [frag for frag, _, _ in self.global_frag] + axd["frag_global"].plot(frag_values) + axd["frag_global"].set_ylim([0, 1]) + axd["frag_global"].set_xlim([0, self.time - 1]) + axd["frag_global"].set_title("Global Fragmentation") + axd["frag_global"].set_xlabel("Timestamp") + axd["frag_global"].set_ylabel("Fragmentation") + def _color_disks(self, storage_bitmap: np.array, storage_id: int) -> np.ndarray: """Colors the disks within a storage differently.""" colored_bitmap = np.zeros((len(storage_bitmap), 4), dtype=np.uint8) @@ -471,6 +577,42 @@ def get_close_aspect(width, height, len) -> (int, int): return rows, cols +def fragmentation_of_bitmap(bitmap: np.array) -> (float, int, int): + """Calculates the fragmentation of a bitmap.""" + if len(bitmap) == 0: + return 0 + + # total_free = len(np.where(bitmap == 0)) + total_free = np.count_nonzero(bitmap == 0) + largest_free = longest_repeating_0s(bitmap) + frag = calculate_fragmentation(largest_free, total_free) + + return frag, total_free, largest_free + + +def calculate_fragmentation(total_free: int, largest_free: int) -> float: + return 1 - (largest_free / total_free) + + +def longest_repeating_0s(arr: np.array) -> int: + """ + Calculates the maximum consecutive count of 0s in a binary numpy array. + + Args: + binary_array: A numpy array containing only 0s and 1s. + + Returns: + Maximum consecutive count of 0s. + """ + # Find indices where the array changes value + indices = np.where(np.diff(arr))[0] + 1 + # Split the array at these indices + splits = np.split(arr, indices) + + # Calculate lengths of splits and find maximum for 0 + return max([len(s) for s in splits if s[0] == 0], default=0) + + def identify_axes(ax_dict, fontsize=24): kw = dict(ha="center", va="center", fontsize=fontsize, color="darkgrey") for k, ax in ax_dict.items(): From 155a857de59985cb66900f0a28a3ca8bb712bcc2 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Fri, 25 Oct 2024 11:00:57 +0200 Subject: [PATCH 22/49] Indicate Timestep, handle empty storage --- betree/scripts/visualize_allocation_log | 84 +++++++++++++++++-------- 1 file changed, 59 insertions(+), 25 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index e4a1b079..b92a068b 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -383,7 +383,7 @@ class GlobalBitMap: fig.set_size_inches(16, 9) fig.set_dpi(1920/16) - self._plot_fragmentation(axd) + vlines = self._plot_fragmentation(axd) ims = [] # Initial plot @@ -393,9 +393,16 @@ class GlobalBitMap: # the bitmaps of other storages aren't correctly drawn. We manually set the starting # time to 1 shortly before plotting. storage_bitmap = self.get_storage_bitmap(storage_id) - colored_bitmap = self._color_disks(storage_bitmap, storage_id) - resized_bitmap = reshape_to_axes( - colored_bitmap, axd[name], fig) + if len(storage_bitmap) != 0: + colored_bitmap = self._color_disks(storage_bitmap, storage_id) + resized_bitmap = reshape_to_axes( + colored_bitmap, axd[name], fig) + else: + storage_ax = axd[f"storage_{storage_id}"] + kw = dict(ha="center", va="center", + fontsize=12, color="darkgrey") + storage_ax.text( + 0.5, 0.5, "[Empty]", transform=storage_ax.transAxes, **kw) ims.append({}) ims[storage_id]["storage_bitmap"] = storage_bitmap @@ -434,13 +441,17 @@ class GlobalBitMap: for storage_id in range(self.config.num_storage_ids): self.get_storage( ims[storage_id]["storage_bitmap"], storage_id, timestep) - resized_bitmap = reshape_to_close_aspect( - ims[storage_id]["storage_bitmap"], ims[storage_id]["width"], ims[storage_id]["height"]) - ims[storage_id]["resized_bitmap"][:, :, 3] = resized_bitmap * 255 + if len(ims[storage_id]["storage_bitmap"]) != 0: + resized_bitmap = reshape_to_close_aspect( + ims[storage_id]["storage_bitmap"], ims[storage_id]["width"], ims[storage_id]["height"]) + ims[storage_id]["resized_bitmap"][:, + :, 3] = resized_bitmap * 255 - ims[storage_id]["im"].set_data( - ims[storage_id]["resized_bitmap"]) + ims[storage_id]["im"].set_data( + ims[storage_id]["resized_bitmap"]) + vlines[storage_id].set_xdata([timestep, timestep]) + vlines["global"].set_xdata([timestep, timestep]) fig.canvas.draw_idle() slider.on_changed(update) @@ -456,27 +467,48 @@ class GlobalBitMap: slider.set_val(1) plt.show() - def _plot_fragmentation(self, axd): + def _plot_fragmentation(self, axd) -> dict: """Plots the fragmentation data.""" + # Vertical lines that indicate the timestamp + vlines = {} + for storage_id in range(self.config.num_storage_ids): - # Extract fragmentation values from storage_frag - frag_values = [frag for frag, _, - _ in self.storage_frag[storage_id]] - axd[f"frag_{storage_id}"].plot(frag_values) - axd[f"frag_{storage_id}"].set_xlim([0, self.time - 1]) - axd[f"frag_{storage_id}"].set_ylim([0, 1]) - axd[f"frag_{storage_id}"].set_xlabel("Timestamp") - if storage_id == 0: - axd[f"frag_{storage_id}"].set_ylabel("Fragmentation") + frag_ax = axd[f"frag_{storage_id}"] + if self.config.blocks_of_storage_id(storage_id) != 0: + # Extract fragmentation values from storage_frag + frag_values = [frag for frag, _, + _ in self.storage_frag[storage_id]] + frag_ax.plot(frag_values) + frag_ax.set_xlim([0, self.time - 1]) + frag_ax.set_ylim([0, 1]) + frag_ax.set_xlabel("Timestamp") + if storage_id == 0: + frag_ax.set_ylabel("Fragmentation") + + vlines[storage_id] = frag_ax.axvline( + # Initial position at the end + x=self.time - 1, color="red", linestyle="--", linewidth=1) + else: + kw = dict(ha="center", va="center", + fontsize=12, color="darkgrey") + frag_ax.text( + 0.5, 0.5, "[Empty]", transform=frag_ax.transAxes, **kw) # Extract fragmentation values from global_frag + global_frag_ax = axd["frag_global"] frag_values = [frag for frag, _, _ in self.global_frag] - axd["frag_global"].plot(frag_values) - axd["frag_global"].set_ylim([0, 1]) - axd["frag_global"].set_xlim([0, self.time - 1]) - axd["frag_global"].set_title("Global Fragmentation") - axd["frag_global"].set_xlabel("Timestamp") - axd["frag_global"].set_ylabel("Fragmentation") + global_frag_ax.plot(frag_values) + global_frag_ax.set_ylim([0, 1]) + global_frag_ax.set_xlim([0, self.time - 1]) + global_frag_ax.set_title("Global Fragmentation") + global_frag_ax.set_xlabel("Timestamp") + global_frag_ax.set_ylabel("Fragmentation") + + vlines["global"] = global_frag_ax.axvline( + # Initial position at the end + x=self.time - 1, color="red", linestyle="--", linewidth=1) + + return vlines def _color_disks(self, storage_bitmap: np.array, storage_id: int) -> np.ndarray: """Colors the disks within a storage differently.""" @@ -591,6 +623,8 @@ def fragmentation_of_bitmap(bitmap: np.array) -> (float, int, int): def calculate_fragmentation(total_free: int, largest_free: int) -> float: + if total_free == 0: + return 0 return 1 - (largest_free / total_free) From c549a579ebf0e9c120b38d79ce52d848717ec341 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Fri, 25 Oct 2024 14:23:26 +0200 Subject: [PATCH 23/49] Type hints, formatting --- betree/scripts/visualize_allocation_log | 206 +++++++++++------------- 1 file changed, 96 insertions(+), 110 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index b92a068b..75755897 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -23,11 +23,11 @@ SEGMENT_SIZE_MASK = SEGMENT_SIZE - 1 SIZE_PER_ALLOCATION = 13 -def parse_disk_offset(offset: int) -> (int, int, int): +def parse_disk_offset(offset: int) -> tuple[int, int, int, int]: storage_class = (offset & MASK_STORAGE_CLASS) >> (52 + 10) - disk_id = ((offset & MASK_DISK_ID) >> 52) - block_offset = (offset & MASK_OFFSET) - segment_id = (block_offset & ~SEGMENT_SIZE_MASK) + disk_id = (offset & MASK_DISK_ID) >> 52 + block_offset = offset & MASK_OFFSET + segment_id = block_offset & ~SEGMENT_SIZE_MASK return storage_class, disk_id, segment_id, block_offset @@ -37,11 +37,8 @@ class Config: blocks_per_disk: list[list[int]] blocks_per_segment: int - def __init__(self, num_storage_ids: int, - disks_per_storage_id: list[int], - blocks_per_disk: list[list[int]], - blocks_per_segment: int, - ): + def __init__(self, num_storage_ids: int, disks_per_storage_id: list[int], + blocks_per_disk: list[list[int]], blocks_per_segment: int): self.num_storage_ids = num_storage_ids self.disks_per_storage_id = disks_per_storage_id self.blocks_per_disk = blocks_per_disk @@ -53,21 +50,26 @@ class Config: blocks_per_disk: {self.blocks_per_disk}, \ blocks_per_segment: {self.blocks_per_segment}" + def blocks_global(self) -> int: + return sum(self.blocks_of_storage_id(storage_id) for + storage_id in range(self.num_storage_ids)) + def disks_of_storage_id(self, storage_id: int) -> int: return self.disks_per_storage_id[storage_id] def blocks_of_storage_id(self, storage_id: int) -> int: - return sum(self.blocks_of_disk(storage_id, disk_id) for disk_id in range(self.disks_of_storage_id(storage_id))) + return sum(self.blocks_of_disk(storage_id, disk_id) for + disk_id in range(self.disks_of_storage_id(storage_id))) def blocks_of_disk(self, storage_id: int, disk_id: int) -> int: return self.blocks_per_disk[storage_id][disk_id] - def is_valid_storage(self, storage_id) -> bool: + def is_valid_storage(self, storage_id: int) -> bool: if storage_id >= self.num_storage_ids or storage_id < 0: return False return True - def is_valid_disk(self, storage_id, disk_id) -> bool: + def is_valid_disk(self, storage_id: int, disk_id: int) -> bool: if not self.is_valid_storage(storage_id): return False @@ -90,15 +92,11 @@ def parse_header(log_file: str) -> Config: for i in range(num_classes): blocks_per_disk.append([]) for _ in range(disks_per_class[i]): - blocks_per_disk[i].append( - struct.unpack(" int: @@ -116,15 +114,15 @@ class GlobalBitMap: # NOTE: the bitmap is stored as packed bits and has to be unpacked for accurate plotting, # based on some experimentation storing unpacked (and using 8x more memory) isn't worth it for # faster plotting - # dict [ (storage, disk, segment), list[(time, bitmap)]] - bitmap: dict[(int, int, int), list[(int, np.ndarray)]] + # dict [tuple[storage, disk, segment], list[tuple[time, bitmap]]] + bitmap: dict[tuple[int, int, int], list[tuple[int, np.ndarray]]] log_file: str time: int = 0 disk_begins: list[list] = [] - # storage_id[timestep[(frag, total_free, largest_free)]] - storage_frag: list[list[(float, int, int)]] = [] - # timestep[(frag, total_free, largest_free)] - global_frag: list[(float, int, int)] = [] + # storage_id[timestep[tuple[frag, total_free, largest_free]]] + storage_frag: list[list[tuple[float, int, int]]] = [] + # timestep[tuple[frag, total_free, largest_free]] + global_frag: list[tuple[float, int, int]] = [] def __init__(self, log_file: str, config: Config): self.config = config @@ -135,7 +133,7 @@ class GlobalBitMap: self._initialize_frag() self._build_fragmentation() - def __str__(self): + def __str__(self) -> str: return f"Log_file: {self.log_file}\nTime: {self.time}\n" + \ f"Config: {self.config}\nDisk_begins: {self.disk_begins}\n" @@ -146,13 +144,11 @@ class GlobalBitMap: used_blocks = 0 blocks_in_disk = self.config.blocks_of_disk( storage_id, disk_id) - while (used_blocks < blocks_in_disk): - segment_size = min( - self.config.blocks_per_segment, blocks_in_disk - used_blocks) + while used_blocks < blocks_in_disk: + segment_size = min(self.config.blocks_per_segment, blocks_in_disk - used_blocks) bitmap = np.packbits(np.zeros(segment_size, dtype=bool)) - self.bitmap[(storage_id, disk_id, used_blocks)] = [ - (0, bitmap)] + self.bitmap[(storage_id, disk_id, used_blocks)] = [(0, bitmap)] used_blocks += segment_size def _initialize_begins(self): @@ -167,8 +163,10 @@ class GlobalBitMap: """Builds a global bitmap representation for each time step.""" with open(self.log_file, "rb") as f: # Skip the global header (already parsed) - f.seek(1 + 2 * self.config.num_storage_ids + 8 * - sum(self.config.disks_per_storage_id) + 8) + header_length = (1 + + 2 * self.config.num_storage_ids + + 8 * sum(self.config.disks_per_storage_id) + 8) + f.seek(header_length) timesteps = remaining_bytes(f) // SIZE_PER_ALLOCATION @@ -183,18 +181,16 @@ class GlobalBitMap: self.time += 1 - storage_id, disk_id, segment_id, block_offset = parse_disk_offset( - offset) + storage_id, disk_id, segment_id, block_offset = parse_disk_offset(offset) segment_offset = block_offset % self.config.blocks_per_segment - # Update the bitmap at (storage_id, disk_id, segment_id) with - # op_type at block_offset and size + # Update the bitmap at (storage_id, disk_id, segment_id) + # with op_type at block_offset and size last_entry = self.bitmap[(storage_id, disk_id, segment_id)][-1] new_bitmap = np.unpackbits(last_entry[1]).copy() - new_bitmap[segment_offset: segment_offset+num_blocks] = op_type + new_bitmap[segment_offset: segment_offset + num_blocks] = op_type new_bitmap = np.packbits(new_bitmap) - self.bitmap[(storage_id, disk_id, segment_id) - ].append((self.time, new_bitmap)) + self.bitmap[(storage_id, disk_id, segment_id)].append((self.time, new_bitmap)) def _get_valid_time(self, time: int = None) -> int: if time is None or time > self.time: @@ -213,37 +209,35 @@ class GlobalBitMap: total_size += blocks self.storage_frag.append([]) - self.storage_frag[storage_id].append( - (calculate_fragmentation(largest_size, total_size), total_size, largest_size)) + frag = calculate_fragmentation(largest_size, total_size) + self.storage_frag[storage_id].append((frag, total_size, largest_size)) def _build_fragmentation(self): for timestep in tqdm(range(1, self.time), "Calculating Fragmentation Data"): largest_free = 0 total_free = 0 for storage_id in range(self.config.num_storage_ids): - frag, total, largest = self.calculate_fragmentation_storage( - storage_id, timestep) + frag, total, largest = self.calculate_fragmentation_storage(storage_id, timestep) self.storage_frag[storage_id].append((frag, total, largest)) largest_free = max(largest, largest_free) total_free += total - self.global_frag.append((calculate_fragmentation( - total_free, largest_free), total_free, largest_free)) + frag = calculate_fragmentation(total_free, largest_free) + self.global_frag.append((frag, total_free, largest_free)) - def calculate_fragmentation(self, time: int = None) -> (float, int, int): + def calculate_fragmentation(self, time: int = None) -> tuple[float, int, int]: time = self._get_valid_time(time) largest_free = 0 total_free = 0 for storage_id in range(self.config.num_storage_ids): - _, total, largest = self.calculate_fragmentation_storage( - storage_id, time) + _, total, largest = self.calculate_fragmentation_storage(storage_id, time) largest_free = max(largest, largest_free) total_free += total return calculate_fragmentation(total_free, largest_free), total_free, largest_free - def calculate_fragmentation_storage(self, storage_id, time: int = None) -> (float, int, int): + def calculate_fragmentation_storage(self, storage_id: int, time: int = None) -> tuple[float, int, int]: time = self._get_valid_time(time) if not self.config.is_valid_storage(storage_id): return (0.0, 0, 0) @@ -251,26 +245,23 @@ class GlobalBitMap: largest_free = 0 total_free = 0 for disk_id in range(self.config.disks_of_storage_id(storage_id)): - _, total, largest = self.calculate_fragmentation_disk( - storage_id, disk_id, time) + _, total, largest = self.calculate_fragmentation_disk(storage_id, disk_id, time) largest_free = max(largest, largest_free) total_free += total return calculate_fragmentation(total_free, largest_free), total_free, largest_free - def calculate_fragmentation_disk(self, storage_id, disk_id, time: int = None) -> (float, int, int): + def calculate_fragmentation_disk(self, storage_id: int, disk_id: int, time: int = None) -> tuple[float, int, int]: time = self._get_valid_time(time) if not self.config.is_valid_disk(storage_id, disk_id): return (0.0, 0, 0) - entries = filter(lambda k: k[0] == - storage_id and k[1] == disk_id, self.bitmap) + entries = filter(lambda k: k[0] == storage_id and k[1] == disk_id, self.bitmap) largest_free = 0 total_free = 0 for entry in entries: - time, bitmap_at_time = self._binary_search_time( - time, entry) + time, bitmap_at_time = self._binary_search_time(time, entry) segment_bitmap = np.unpackbits(bitmap_at_time) _, total, largest = fragmentation_of_bitmap(segment_bitmap) @@ -291,8 +282,7 @@ class GlobalBitMap: return start def get_bitmap(self, time: int = None) -> np.array: - num_blocks = sum(sum(self.config.blocks_per_disk[storage_id]) for storage_id in range( - self.config.num_storage_ids)) + num_blocks = self.config.blocks_global() bitmap = np.zeros(num_blocks, dtype=np.uint8) self.get(bitmap, time) @@ -303,13 +293,12 @@ class GlobalBitMap: # If it doesn't match with the config throw an errer def get_storage(self, output_bitmap: np.array, storage_id: int, time: int = None) -> int: time = self._get_valid_time(time) - if not config.is_valid_storage(storage_id): + if not self.config.is_valid_storage(storage_id): return 0 start = 0 for disk_id in range(self.config.disks_of_storage_id(storage_id)): - start += self.get_disk( - output_bitmap[start:], storage_id, disk_id, time) + start += self.get_disk(output_bitmap[start:], storage_id, disk_id, time) return start @@ -328,15 +317,12 @@ class GlobalBitMap: return 0 blocks_per_segment = self.config.blocks_per_segment - entries = filter(lambda k: k[0] == - storage_id and k[1] == disk_id, self.bitmap) + entries = filter(lambda k: k[0] == storage_id and k[1] == disk_id, self.bitmap) for entry in entries: segment_id = entry[2] - time, bitmap_at_time = self._binary_search_time( - time, entry) - end = min(segment_id + blocks_per_segment, - self.config.blocks_of_disk(entry[0], entry[1])) + time, bitmap_at_time = self._binary_search_time(time, entry) + end = min(segment_id + blocks_per_segment, self.config.blocks_of_disk(entry[0], entry[1])) output_bitmap[segment_id:end] = np.unpackbits(bitmap_at_time) @@ -349,7 +335,7 @@ class GlobalBitMap: return bitmap @functools.lru_cache(1024) - def _binary_search_time(self, time: int, entry: (int, int, int)) -> (int, np.array): + def _binary_search_time(self, time: int, entry: (int, int, int)) -> tuple[int, np.array]: bitmaps = self.bitmap[entry] left = 0 right = len(bitmaps) - 1 @@ -376,12 +362,11 @@ class GlobalBitMap: [".", "slider", "slider", "slider", "slider", "."], [".", ".", ".", ".", ".", "."], ] - gs_kw = dict(width_ratios=[0.03, 1, 1, 1, 1, 0.03], height_ratios=[ - 0.03, 3, 1, 1, 0.1, 0.03]) - fig, axd = plt.subplot_mosaic( - form, gridspec_kw=gs_kw, layout="constrained") + gs_kw = {"width_ratios": [0.03, 1, 1, 1, 1, 0.03], + "height_ratios": [0.03, 3, 1, 1, 0.1, 0.03]} + fig, axd = plt.subplot_mosaic(form, gridspec_kw=gs_kw, layout="constrained") fig.set_size_inches(16, 9) - fig.set_dpi(1920/16) + fig.set_dpi(1920 / 16) vlines = self._plot_fragmentation(axd) @@ -393,29 +378,25 @@ class GlobalBitMap: # the bitmaps of other storages aren't correctly drawn. We manually set the starting # time to 1 shortly before plotting. storage_bitmap = self.get_storage_bitmap(storage_id) + resized_bitmap = None if len(storage_bitmap) != 0: colored_bitmap = self._color_disks(storage_bitmap, storage_id) - resized_bitmap = reshape_to_axes( - colored_bitmap, axd[name], fig) + resized_bitmap = reshape_to_axes(colored_bitmap, axd[name], fig) else: storage_ax = axd[f"storage_{storage_id}"] - kw = dict(ha="center", va="center", - fontsize=12, color="darkgrey") - storage_ax.text( - 0.5, 0.5, "[Empty]", transform=storage_ax.transAxes, **kw) + kw = {"ha": "center", "va": "center", "fontsize": 12, "color": "darkgrey"} + storage_ax.text(0.5, 0.5, "[Empty]", transform=storage_ax.transAxes, **kw) ims.append({}) ims[storage_id]["storage_bitmap"] = storage_bitmap ims[storage_id]["resized_bitmap"] = resized_bitmap - ims[storage_id]["im"] = axd[name].imshow( - resized_bitmap, aspect="auto", interpolation=None) + ims[storage_id]["im"] = axd[name].imshow(resized_bitmap, aspect="auto", interpolation=None) axd[name].set_title(f"Storage {storage_id}") axd[name].set_xlabel("Block") axd[name].set_ylabel("Block") axd[name].set_xticks([]) axd[name].set_yticks([]) - bbox = axd[name].get_window_extent().transformed( - fig.dpi_scale_trans.inverted()) + bbox = axd[name].get_window_extent().transformed(fig.dpi_scale_trans.inverted()) ims[storage_id]["width"] = bbox.width ims[storage_id]["height"] = bbox.height @@ -429,26 +410,26 @@ class GlobalBitMap: self.time - 1, valinit=self.time - 1, valstep=1, - valfmt=valfmt, + valfmt=valfmt ) def update(val): - timestep = int(slider.val) + timestep = int(val) if timestep > self.time - 1: slider.set_val(self.time - 1) return for storage_id in range(self.config.num_storage_ids): - self.get_storage( - ims[storage_id]["storage_bitmap"], storage_id, timestep) + self.get_storage(ims[storage_id]["storage_bitmap"], storage_id, timestep) if len(ims[storage_id]["storage_bitmap"]) != 0: resized_bitmap = reshape_to_close_aspect( - ims[storage_id]["storage_bitmap"], ims[storage_id]["width"], ims[storage_id]["height"]) - ims[storage_id]["resized_bitmap"][:, - :, 3] = resized_bitmap * 255 + ims[storage_id]["storage_bitmap"], + ims[storage_id]["width"], + ims[storage_id]["height"] + ) + ims[storage_id]["resized_bitmap"][:, :, 3] = resized_bitmap * 255 - ims[storage_id]["im"].set_data( - ims[storage_id]["resized_bitmap"]) + ims[storage_id]["im"].set_data(ims[storage_id]["resized_bitmap"]) vlines[storage_id].set_xdata([timestep, timestep]) vlines["global"].set_xdata([timestep, timestep]) @@ -485,14 +466,16 @@ class GlobalBitMap: if storage_id == 0: frag_ax.set_ylabel("Fragmentation") + # Initial position at the end vlines[storage_id] = frag_ax.axvline( - # Initial position at the end - x=self.time - 1, color="red", linestyle="--", linewidth=1) + x=self.time - 1, + color="red", + linestyle="--", + linewidth=1 + ) else: - kw = dict(ha="center", va="center", - fontsize=12, color="darkgrey") - frag_ax.text( - 0.5, 0.5, "[Empty]", transform=frag_ax.transAxes, **kw) + kw = {"ha": "center", "va": "center", "fontsize": 12, "color": "darkgrey"} + frag_ax.text(0.5, 0.5, "[Empty]", transform=frag_ax.transAxes, **kw) # Extract fragmentation values from global_frag global_frag_ax = axd["frag_global"] @@ -504,9 +487,13 @@ class GlobalBitMap: global_frag_ax.set_xlabel("Timestamp") global_frag_ax.set_ylabel("Fragmentation") + # Initial position at the end vlines["global"] = global_frag_ax.axvline( - # Initial position at the end - x=self.time - 1, color="red", linestyle="--", linewidth=1) + x=self.time - 1, + color="red", + linestyle="--", + linewidth=1 + ) return vlines @@ -535,15 +522,14 @@ color_mapping = [ (255, 0, 0), (255, 0, 255), (255, 255, 0), - (255, 255, 255), ] -def id_to_color(id: int) -> (int, int, int): - return color_mapping[id % len(color_mapping)] +def id_to_color(i: int) -> tuple[int, int, int]: + return color_mapping[i % len(color_mapping)] -def reshape_to_axes(arr, ax, fig): +def reshape_to_axes(arr: np.ndarray, ax, fig) -> np.ndarray: """ Reshapes the first dimension of a NumPy array to a array with the first two dimensions close to the aspect ratio of the given Matplotlib axes. @@ -592,12 +578,12 @@ def reshape_to_close_aspect(arr_1d, width, height): @functools.lru_cache(128) -def get_close_aspect(width, height, len) -> (int, int): +def get_close_aspect(width: int, height: int, length: int) -> tuple[int, int]: # Calculate target aspect ratio target_aspect = width / height # Calculate the ideal number of columns for the target aspect ratio - total_pixels = len + total_pixels = length cols = int(np.sqrt(total_pixels * target_aspect)) # Adjust columns to find the closest aspect ratio while using all pixels @@ -609,7 +595,7 @@ def get_close_aspect(width, height, len) -> (int, int): return rows, cols -def fragmentation_of_bitmap(bitmap: np.array) -> (float, int, int): +def fragmentation_of_bitmap(bitmap: np.array) -> tuple[float, int, int]: """Calculates the fragmentation of a bitmap.""" if len(bitmap) == 0: return 0 @@ -617,7 +603,7 @@ def fragmentation_of_bitmap(bitmap: np.array) -> (float, int, int): # total_free = len(np.where(bitmap == 0)) total_free = np.count_nonzero(bitmap == 0) largest_free = longest_repeating_0s(bitmap) - frag = calculate_fragmentation(largest_free, total_free) + frag = calculate_fragmentation(total_free, largest_free) return frag, total_free, largest_free @@ -648,7 +634,7 @@ def longest_repeating_0s(arr: np.array) -> int: def identify_axes(ax_dict, fontsize=24): - kw = dict(ha="center", va="center", fontsize=fontsize, color="darkgrey") + kw = {"ha": "center", "va": "center", "fontsize": fontsize, "color": "darkgrey"} for k, ax in ax_dict.items(): ax.text(0.5, 0.5, k, transform=ax.transAxes, **kw) @@ -656,7 +642,7 @@ def identify_axes(ax_dict, fontsize=24): if __name__ == "__main__": if len(sys.argv) < 2: print("Please provide a file to visualize!") - exit(1) + sys.exit(1) log_file = sys.argv[1] config = parse_header(log_file) From 148b84e3ced027a04b667c90a92c2e816551564d Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Fri, 25 Oct 2024 17:51:41 +0200 Subject: [PATCH 24/49] parallel fragmentation building --- betree/scripts/visualize_allocation_log | 39 +++++++++++++++++++++---- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index 75755897..e7b56c0b 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -2,6 +2,7 @@ import functools from math import floor +from multiprocessing import Pool import os import struct import sys @@ -170,7 +171,7 @@ class GlobalBitMap: timesteps = remaining_bytes(f) // SIZE_PER_ALLOCATION - for _ in tqdm(range(timesteps), desc="Building Global Bitmap"): + for _ in tqdm(range(timesteps), desc="Building Global Bitmap", unit="Timestep"): # Read Allocation try: op_type = struct.unpack(" tuple[float, int, int]: time = self._get_valid_time(time) @@ -378,7 +408,6 @@ class GlobalBitMap: # the bitmaps of other storages aren't correctly drawn. We manually set the starting # time to 1 shortly before plotting. storage_bitmap = self.get_storage_bitmap(storage_id) - resized_bitmap = None if len(storage_bitmap) != 0: colored_bitmap = self._color_disks(storage_bitmap, storage_id) resized_bitmap = reshape_to_axes(colored_bitmap, axd[name], fig) From 040f05996cb0d622b663b6d8d755d7be4419bb06 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Mon, 28 Oct 2024 12:17:22 +0100 Subject: [PATCH 25/49] Code reorganization --- betree/scripts/visualize_allocation_log | 121 +++++++++++++----------- 1 file changed, 68 insertions(+), 53 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index e7b56c0b..c6420638 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -7,6 +7,7 @@ import os import struct import sys +import matplotlib import matplotlib.pyplot as plt from matplotlib.animation import FFMpegWriter from matplotlib.widgets import Slider @@ -384,6 +385,16 @@ class GlobalBitMap: def plot(self): """Plots the bitmap with an interactive slider for timestamp selection and checkboxes for storage toggling.""" + fig, axd = self._setup_plot() + ims = self._setup_bitmaps(fig, axd) + vlines = self._plot_fragmentation(axd) + slider = self._setup_slider(fig, axd, ims, vlines) + + self.export_to_video("test.mp4", fig, slider) + + plt.show() + + def _setup_plot(self) -> tuple[plt.Figure, dict[str, matplotlib.axes.Axes]]: form = [ [".", ".", ".", ".", ".", "."], [".", "storage_0", "storage_1", "storage_2", "storage_3", "."], @@ -397,17 +408,15 @@ class GlobalBitMap: fig, axd = plt.subplot_mosaic(form, gridspec_kw=gs_kw, layout="constrained") fig.set_size_inches(16, 9) fig.set_dpi(1920 / 16) + return fig, axd - vlines = self._plot_fragmentation(axd) - + def _setup_bitmaps(self, fig, axd): ims = [] + # Initial plot for storage_id in range(self.config.num_storage_ids): name = f"storage_{storage_id}" - # NOTE: We first have to plot a later timestamp to initialize the size of the axes else - # the bitmaps of other storages aren't correctly drawn. We manually set the starting - # time to 1 shortly before plotting. - storage_bitmap = self.get_storage_bitmap(storage_id) + storage_bitmap = self.get_storage_bitmap(storage_id, 0) if len(storage_bitmap) != 0: colored_bitmap = self._color_disks(storage_bitmap, storage_id) resized_bitmap = reshape_to_axes(colored_bitmap, axd[name], fig) @@ -429,53 +438,7 @@ class GlobalBitMap: ims[storage_id]["width"] = bbox.width ims[storage_id]["height"] = bbox.height - # Create the format specifier with appropriate spacing to prevent moving of axes - max_digits = len(str(self.time - 1)) - valfmt = f"%{max_digits}d" - slider = Slider( - axd["slider"], - "", - 0, - self.time - 1, - valinit=self.time - 1, - valstep=1, - valfmt=valfmt - ) - - def update(val): - timestep = int(val) - if timestep > self.time - 1: - slider.set_val(self.time - 1) - return - - for storage_id in range(self.config.num_storage_ids): - self.get_storage(ims[storage_id]["storage_bitmap"], storage_id, timestep) - if len(ims[storage_id]["storage_bitmap"]) != 0: - resized_bitmap = reshape_to_close_aspect( - ims[storage_id]["storage_bitmap"], - ims[storage_id]["width"], - ims[storage_id]["height"] - ) - ims[storage_id]["resized_bitmap"][:, :, 3] = resized_bitmap * 255 - - ims[storage_id]["im"].set_data(ims[storage_id]["resized_bitmap"]) - vlines[storage_id].set_xdata([timestep, timestep]) - - vlines["global"].set_xdata([timestep, timestep]) - fig.canvas.draw_idle() - - slider.on_changed(update) - - slider.set_val(1) - writer = FFMpegWriter(fps=60, bitrate=1800) - writer.setup(fig, "test.mp4", dpi=100) - for i in tqdm(range(self.time)): - slider.set_val(slider.val + 1) - writer.grab_frame() - writer.finish() - - slider.set_val(1) - plt.show() + return ims def _plot_fragmentation(self, axd) -> dict: """Plots the fragmentation data.""" @@ -526,6 +489,58 @@ class GlobalBitMap: return vlines + def _setup_slider(self, fig, axd, ims, vlines) -> Slider: + # Create the format specifier with appropriate spacing to prevent moving of axes + max_digits = len(str(self.time - 1)) + valfmt = f"%{max_digits}d" + slider = Slider( + axd["slider"], + "", + 0, + self.time - 1, + valinit=0, + valstep=1, + valfmt=valfmt + ) + + def update(val): + timestep = int(val) + if timestep > self.time - 1: + slider.set_val(self.time - 1) + return + + for storage_id in range(self.config.num_storage_ids): + self.get_storage(ims[storage_id]["storage_bitmap"], storage_id, timestep) + if len(ims[storage_id]["storage_bitmap"]) != 0: + resized_bitmap = reshape_to_close_aspect( + ims[storage_id]["storage_bitmap"], + ims[storage_id]["width"], + ims[storage_id]["height"] + ) + ims[storage_id]["resized_bitmap"][:, :, 3] = resized_bitmap * 255 + + ims[storage_id]["im"].set_data(ims[storage_id]["resized_bitmap"]) + vlines[storage_id].set_xdata([timestep, timestep]) + + vlines["global"].set_xdata([timestep, timestep]) + fig.canvas.draw_idle() + + slider.on_changed(update) + + return slider + + def export_to_video(self, filename: str, fig, slider, start: int = 0, end: int = None, fps=60, bitrate=1800, dpi=100): + if end is None or end > self.time: + end = self.time + + slider.set_val(start) + writer = FFMpegWriter(fps=60, bitrate=1800) + writer.setup(fig, "test.mp4", dpi=100) + for _ in tqdm(range(end)): + slider.set_val(slider.val + 1) + writer.grab_frame() + writer.finish() + def _color_disks(self, storage_bitmap: np.array, storage_id: int) -> np.ndarray: """Colors the disks within a storage differently.""" colored_bitmap = np.zeros((len(storage_bitmap), 4), dtype=np.uint8) From 22ddb7c75e2908f54d86a556a7a445e1f2b583e6 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Tue, 29 Oct 2024 14:39:47 +0100 Subject: [PATCH 26/49] parallel video exporting --- betree/scripts/visualize_allocation_log | 96 ++++++++++++++++++++++--- 1 file changed, 88 insertions(+), 8 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index c6420638..4efe2f1c 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -2,10 +2,13 @@ import functools from math import floor -from multiprocessing import Pool +from multiprocessing import Pool, Value, Lock import os +import shutil import struct +import subprocess import sys +import time import matplotlib import matplotlib.pyplot as plt @@ -390,8 +393,6 @@ class GlobalBitMap: vlines = self._plot_fragmentation(axd) slider = self._setup_slider(fig, axd, ims, vlines) - self.export_to_video("test.mp4", fig, slider) - plt.show() def _setup_plot(self) -> tuple[plt.Figure, dict[str, matplotlib.axes.Axes]]: @@ -529,17 +530,95 @@ class GlobalBitMap: return slider - def export_to_video(self, filename: str, fig, slider, start: int = 0, end: int = None, fps=60, bitrate=1800, dpi=100): + def export_to_video(self, filename: str, start: int = 0, end: int = None, fps: int = 60, bitrate: int = 4500, dpi: int = 100, nproc: int = None): if end is None or end > self.time: end = self.time + if nproc is None: + nproc = os.cpu_count() + + timesteps_total = end - start + guarranteed_size = timesteps_total // nproc + optional_size = timesteps_total % nproc + temp_dir = ".exporting_temp" + tasks = [] + chunk_filenames = [] + for i in range(nproc): + name = f"{filename}_{i}.mp4" + chunk_filenames.append(name) + chunk_begin = guarranteed_size * i + min(i, optional_size) + chunk_end = chunk_begin + guarranteed_size + (1 if i < optional_size else 0) + tasks.append((f"{temp_dir}/{name}", chunk_begin, chunk_end, fps, bitrate, dpi)) + + if not os.path.exists(temp_dir): + os.mkdir(temp_dir) + + # Create shared counter and lock + counter = Value('i', 0) + lock = Lock() + + def init_pool(shared_counter, shared_lock): + """Initializer function for worker processes.""" + global counter, lock + counter = shared_counter + lock = shared_lock + + with Pool(processes=nproc, initializer=init_pool, initargs=(counter, lock)) as pool: + results = [] + for task in tasks: + results.append(pool.apply_async(self._export_to_video_chunk, args=task)) + + # Global progress bar + with tqdm(total=timesteps_total, desc="Exporting Video", unit="frame") as pbar: + previous_count = 0 + while True: + with lock: + current_count = counter.value + + if current_count != previous_count: + pbar.update(current_count - previous_count) + previous_count = current_count + if current_count >= timesteps_total: + break + + time.sleep(1.0) + + for result in results: + result.get() # Ensure all tasks are finished + + # Stitch video chunks using FFmpeg + with open(f"{temp_dir}/mylist.txt", mode="w") as f: + for name in chunk_filenames: + f.write(f"file '{name}'\n") + ffmpeg_concat_cmd = ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", f"{temp_dir}/mylist.txt", "-c", "copy", f"{filename}.mp4"] + subprocess.run(ffmpeg_concat_cmd, check=True) + + try: + shutil.rmtree(temp_dir) + except Exception as e: + print(f"Error deleting temporary directory '{temp_dir}': {e}") + + def _export_to_video_chunk(self, filename: str, start: int, end: int, fps=60, bitrate=1800, dpi=100) -> bool: + # NOTE: We have to create and setup a new plot within the chunk function because a + # matplotlib figure is not picklable. + fig, axd = self._setup_plot() + ims = self._setup_bitmaps(fig, axd) + vlines = self._plot_fragmentation(axd) + slider = self._setup_slider(fig, axd, ims, vlines) slider.set_val(start) - writer = FFMpegWriter(fps=60, bitrate=1800) - writer.setup(fig, "test.mp4", dpi=100) - for _ in tqdm(range(end)): + writer = FFMpegWriter(fps=fps, bitrate=bitrate) + writer.setup(fig, filename, dpi=dpi) + for _ in range(end - start): slider.set_val(slider.val + 1) writer.grab_frame() + + with lock: + counter.value += 1 + writer.finish() + plt.close(fig) + + return True def _color_disks(self, storage_bitmap: np.array, storage_id: int) -> np.ndarray: """Colors the disks within a storage differently.""" @@ -693,4 +772,5 @@ if __name__ == "__main__": global_bitmap = GlobalBitMap(log_file, config) print(global_bitmap) - global_bitmap.plot() + global_bitmap.export_to_video("test", nproc=1) + # global_bitmap.plot() From d382ea9c024060800f163e32701e0710b22e90df Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Fri, 1 Nov 2024 11:29:20 +0100 Subject: [PATCH 27/49] Rewrite of visualization script with better performance, memory usage and organization --- betree/scripts/visualize_allocation_log | 997 ++++++++++++------------ 1 file changed, 489 insertions(+), 508 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index 4efe2f1c..d73c3f40 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -1,93 +1,150 @@ #!/usr/bin/env python3 +import argparse import functools -from math import floor from multiprocessing import Pool, Value, Lock import os import shutil -import struct import subprocess -import sys +import struct import time +from typing import Iterator, Any, IO import matplotlib -import matplotlib.pyplot as plt from matplotlib.animation import FFMpegWriter +import matplotlib.pyplot as plt from matplotlib.widgets import Slider import numpy as np +from sortedcontainers import SortedDict from tqdm import tqdm -# Constants to get relevant information from the disk_offset -MASK_STORAGE_CLASS = ((1 << 2) - 1) << (10 + 52) +# Constants to get relevant information from the disk_offset. +MASK_LAYER_ID = ((1 << 2) - 1) << (10 + 52) MASK_DISK_ID = ((1 << 10) - 1) << 52 MASK_OFFSET = (1 << 52) - 1 SEGMENT_SIZE_LOG_2 = 18 SEGMENT_SIZE = 1 << SEGMENT_SIZE_LOG_2 SEGMENT_SIZE_MASK = SEGMENT_SIZE - 1 -# This is the amount of bytes one (de-)allocation has in the log +# This is the amount of bytes one (de-)allocation has in the log. SIZE_PER_ALLOCATION = 13 -def parse_disk_offset(offset: int) -> tuple[int, int, int, int]: - storage_class = (offset & MASK_STORAGE_CLASS) >> (52 + 10) - disk_id = (offset & MASK_DISK_ID) >> 52 - block_offset = offset & MASK_OFFSET - segment_id = block_offset & ~SEGMENT_SIZE_MASK - return storage_class, disk_id, segment_id, block_offset +class StorageConfig: + """Represents the storage configuration of the system""" - -class Config: - num_storage_ids: int - disks_per_storage_id: list[int] - blocks_per_disk: list[list[int]] - blocks_per_segment: int - - def __init__(self, num_storage_ids: int, disks_per_storage_id: list[int], + def __init__(self, num_layers: int, disks_per_layer: list[int], blocks_per_disk: list[list[int]], blocks_per_segment: int): - self.num_storage_ids = num_storage_ids - self.disks_per_storage_id = disks_per_storage_id + self.num_layers = num_layers + self.disks_per_layer = disks_per_layer self.blocks_per_disk = blocks_per_disk self.blocks_per_segment = blocks_per_segment def __str__(self) -> str: - return f"num_storage_ids: {self.num_storage_ids}, \ - disks_per_class: {self.disks_per_storage_id}, \ - blocks_per_disk: {self.blocks_per_disk}, \ - blocks_per_segment: {self.blocks_per_segment}" + return (f"StorageConfig(num_layers={self.num_layers}, " + f"disks_per_layer={self.disks_per_layer}, " + f"blocks_per_disk={self.blocks_per_disk}, " + f"blocks_per_segment={self.blocks_per_segment})") def blocks_global(self) -> int: - return sum(self.blocks_of_storage_id(storage_id) for - storage_id in range(self.num_storage_ids)) - - def disks_of_storage_id(self, storage_id: int) -> int: - return self.disks_per_storage_id[storage_id] - - def blocks_of_storage_id(self, storage_id: int) -> int: - return sum(self.blocks_of_disk(storage_id, disk_id) for - disk_id in range(self.disks_of_storage_id(storage_id))) + """Returns the total number of blocks in the system.""" + return sum(self.blocks_of_layer(layer) for + layer in range(self.num_layers)) + + def disks_of_layer(self, layer: int) -> int: + """Returns the number of disks in the specified layer.""" + return self.disks_per_layer[layer] + + def blocks_of_layer(self, layer: int) -> int: + """Returns the total number of blocks in the specified layer.""" + return sum(self.blocks_of_disk(layer, disk_id) for + disk_id in range(self.disks_of_layer(layer))) + + def blocks_of_disk(self, layer: int, disk_id: int) -> int: + """Returns the number of blocks in the specified disk.""" + return self.blocks_per_disk[layer][disk_id] + + def segments_of_disk(self, layer: int, disk_id: int) -> int: + """Returns the number of segments in the specified disk.""" + bod = self.blocks_of_disk(layer, disk_id) + if (bod % self.blocks_per_segment != 0): + return bod // self.blocks_per_segment + 1 + else: + return bod // self.blocks_per_segment + + def is_valid_layer(self, layer: int) -> bool: + """Checks if the given layer is valid.""" + return 0 <= layer < self.num_layers + + def is_valid_disk(self, layer: int, disk_id: int) -> bool: + """Checks if the given disk ID is valid.""" + return self.is_valid_layer(layer) and \ + 0 <= disk_id < self.disks_of_layer(layer) + + +class Timestamp: + time: int + op_type: int + offset: int + num_blocks: int + layer_id: int + disk_id: int + block_offset: int + segment_id: int + segment_offset: int + + def __init__(self, op_type: int, offset: int, num_blocks: int, time: int): + self.op_type = op_type + self.offset = offset + self.num_blocks = num_blocks + self.time = time + self._parse_offset() - def blocks_of_disk(self, storage_id: int, disk_id: int) -> int: - return self.blocks_per_disk[storage_id][disk_id] - - def is_valid_storage(self, storage_id: int) -> bool: - if storage_id >= self.num_storage_ids or storage_id < 0: - return False - return True - - def is_valid_disk(self, storage_id: int, disk_id: int) -> bool: - if not self.is_valid_storage(storage_id): - return False + def __str__(self) -> str: + return (f"Timestep(op_type: {self.op_type}, " + f"offset: {self.offset}, " + f"num_blocks: {self.num_blocks}, " + f"time: {self.time}, " + f"layer_id: {self.layer_id}, " + f"disk_id: {self.disk_id}, " + f"block_offset: {self.block_offset}, " + f"segment_id: {self.segment_id}, " + f"segment_offset: {self.segment_offset})") + + def _parse_offset(self): + """Parses the offset into human readable values""" + self.layer_id = (self.offset & MASK_LAYER_ID) >> (52 + 10) + self.disk_id = (self.offset & MASK_DISK_ID) >> 52 + self.block_offset = self.offset & MASK_OFFSET + # In haura the segment id is a multiple of the segment size. This is ugly for plotting. + self.segment_id = (self.block_offset & ~SEGMENT_SIZE_MASK) // SEGMENT_SIZE + self.segment_offset = self.block_offset % SEGMENT_SIZE + + +class Parser: + """Parses the allocation log file.""" + log_file: str + _file_handle: IO[Any] + timesteps: int + time: int - if disk_id >= self.disks_of_storage_id(storage_id) or disk_id < 0: - return False + def __init__(self, log_file: str): + self.log_file = log_file + self._file_handle = open(log_file, "rb") # Open the file in binary mode - return True + # Precalculate the number of timesteps. + _ = self.parse_header() + self.timesteps = self._remaining_bytes() // SIZE_PER_ALLOCATION + self._file_handle.seek(0) + def __del__(self): + self._file_handle.close() -def parse_header(log_file: str) -> Config: - """Parses the global header of the allocation log file.""" + def __len__(self) -> int: + return self.timesteps - with open(log_file, "rb") as f: + def parse_header(self) -> StorageConfig: + """Parses the header of the log file and returns a StorageConfig.""" + f = self._file_handle num_classes = struct.unpack(" Config: blocks_per_segment = struct.unpack(" Iterator[Timestamp]: + """Prepares the iterator by skipping the header. Returns itself as the iterator.""" + self._file_handle.seek(0) + _ = self.parse_header() + self.time = 0 + return self -def remaining_bytes(file_pointer) -> int: - """Returns the remaining bytes in a file from the current position of the file pointer.""" - current_position = file_pointer.tell() - file_pointer.seek(0, os.SEEK_END) # Go to the end of the file - end_position = file_pointer.tell() - # Return to the original position - file_pointer.seek(current_position, os.SEEK_SET) - return end_position - current_position + def __next__(self) -> Timestamp: + """Reads the next allocation from the log file and returns a timestamp.""" + try: + op_type = struct.unpack(" int: + """Returns the remaining bytes in a file from the current position of the file pointer.""" + f = self._file_handle + current_position = f.tell() + f.seek(0, os.SEEK_END) + end_position = f.tell() + # Return to the original position. + f.seek(current_position, os.SEEK_SET) + return end_position - current_position + + +class Fragmentation: + def fragmentation_of_bitmap(bitmap: np.array) -> tuple[float, int, int]: + """Calculates the fragmentation of a bitmap.""" + if len(bitmap) == 0: + return 0 + total_free = np.count_nonzero(bitmap == 0) + largest_free = Fragmentation.longest_repeating_0s(bitmap) + frag = Fragmentation.calculate_fragmentation(total_free, largest_free) -class GlobalBitMap: - config: Config - # NOTE: the bitmap is stored as packed bits and has to be unpacked for accurate plotting, - # based on some experimentation storing unpacked (and using 8x more memory) isn't worth it for - # faster plotting - # dict [tuple[storage, disk, segment], list[tuple[time, bitmap]]] - bitmap: dict[tuple[int, int, int], list[tuple[int, np.ndarray]]] - log_file: str - time: int = 0 - disk_begins: list[list] = [] - # storage_id[timestep[tuple[frag, total_free, largest_free]]] - storage_frag: list[list[tuple[float, int, int]]] = [] - # timestep[tuple[frag, total_free, largest_free]] - global_frag: list[tuple[float, int, int]] = [] - - def __init__(self, log_file: str, config: Config): - self.config = config - self.log_file = log_file - self._initialize_bitmap() - self._initialize_begins() - self._build_global_bitmap() - self._initialize_frag() - self._build_fragmentation() + return frag, total_free, largest_free - def __str__(self) -> str: - return f"Log_file: {self.log_file}\nTime: {self.time}\n" + \ - f"Config: {self.config}\nDisk_begins: {self.disk_begins}\n" - - def _initialize_bitmap(self): - self.bitmap = {} - for storage_id in range(self.config.num_storage_ids): - for disk_id in range(self.config.disks_of_storage_id(storage_id)): - used_blocks = 0 - blocks_in_disk = self.config.blocks_of_disk( - storage_id, disk_id) - while used_blocks < blocks_in_disk: - segment_size = min(self.config.blocks_per_segment, blocks_in_disk - used_blocks) - - bitmap = np.packbits(np.zeros(segment_size, dtype=bool)) - self.bitmap[(storage_id, disk_id, used_blocks)] = [(0, bitmap)] - used_blocks += segment_size - - def _initialize_begins(self): - for storage_id in range(self.config.num_storage_ids): - self.disk_begins.append([]) - offset = 0 - for disk_id in range(self.config.disks_of_storage_id(storage_id)): - self.disk_begins[storage_id].append(offset) - offset += self.config.blocks_of_disk(storage_id, disk_id) - - def _build_global_bitmap(self): - """Builds a global bitmap representation for each time step.""" - with open(self.log_file, "rb") as f: - # Skip the global header (already parsed) - header_length = (1 - + 2 * self.config.num_storage_ids - + 8 * sum(self.config.disks_per_storage_id) + 8) - f.seek(header_length) - - timesteps = remaining_bytes(f) // SIZE_PER_ALLOCATION - - for _ in tqdm(range(timesteps), desc="Building Global Bitmap", unit="Timestep"): - # Read Allocation - try: - op_type = struct.unpack(" int: - if time is None or time > self.time: - return self.time - if time < 0: + def calculate_fragmentation(total_free: int, largest_free: int) -> float: + """Calculates the fragmentation based on [wikipedia](https://en.m.wikipedia.org/wiki/Fragmentation_(computing)#Comparison).""" + if total_free == 0: return 0 - return time - - def _initialize_frag(self): - for storage_id in range(self.config.num_storage_ids): - largest_size = 0 - total_size = 0 - for disk_id in range(self.config.disks_of_storage_id(storage_id)): - blocks = self.config.blocks_of_disk(storage_id, disk_id) - largest_size = max(largest_size, blocks) - total_size += blocks - - self.storage_frag.append([]) - frag = calculate_fragmentation(largest_size, total_size) - self.storage_frag[storage_id].append((frag, total_size, largest_size)) - - def _build_fragmentation(self): - """Builds fragmentation data in parallel using a worker pool.""" - - num_processes = os.cpu_count() - # smaller chunk size because the processes need time to spin up and for progress bars - chunk_size = max(1, self.time // (num_processes * 10)) - - with Pool(processes=num_processes) as pool: - # Split the work into chunks - tasks = [(i, min(i + chunk_size, self.time)) - for i in range(1, self.time, chunk_size)] - results = pool.starmap(self._build_fragmentation_chunk, - tqdm(tasks, total=len(tasks), - desc="Calculating Fragmentation", - unit="chunk")) - - # Collect and stitch the results - for i, (storage_frag_chunk, global_frag_chunk) in enumerate(results): - start_time = tasks[i][0] - for storage_id in range(self.config.num_storage_ids): - self.storage_frag[storage_id][start_time:] = storage_frag_chunk[storage_id] - self.global_frag[start_time - 1:] = global_frag_chunk # Adjust index for global_frag - - def _build_fragmentation_chunk(self, start_time, end_time): - """Calculates fragmentation for a chunk of timesteps.""" - storage_frag_chunk = [[] for _ in range(self.config.num_storage_ids)] - global_frag_chunk = [] - - for timestep in range(start_time, end_time): - largest_free = 0 - total_free = 0 - for storage_id in range(self.config.num_storage_ids): - frag, total, largest = self.calculate_fragmentation_storage(storage_id, timestep) - storage_frag_chunk[storage_id].append((frag, total, largest)) - largest_free = max(largest, largest_free) - total_free += total - - frag = calculate_fragmentation(total_free, largest_free) - global_frag_chunk.append((frag, total_free, largest_free)) - - return storage_frag_chunk, global_frag_chunk - - def calculate_fragmentation(self, time: int = None) -> tuple[float, int, int]: - time = self._get_valid_time(time) + return 1 - (largest_free / total_free) - largest_free = 0 - total_free = 0 - for storage_id in range(self.config.num_storage_ids): - _, total, largest = self.calculate_fragmentation_storage(storage_id, time) - largest_free = max(largest, largest_free) - total_free += total + def longest_repeating_0s(arr: np.array) -> int: + """Calculates the maximum consecutive count of 0s in a binary numpy array.""" + # Find indices where the array changes value. + indices = np.where(np.diff(arr))[0] + 1 + # Split the array at these indices. + splits = np.split(arr, indices) - return calculate_fragmentation(total_free, largest_free), total_free, largest_free + # Calculate lengths of splits and find maximum for 0. + return max([len(s) for s in splits if s[0] == 0], default=0) - def calculate_fragmentation_storage(self, storage_id: int, time: int = None) -> tuple[float, int, int]: - time = self._get_valid_time(time) - if not self.config.is_valid_storage(storage_id): - return (0.0, 0, 0) - largest_free = 0 - total_free = 0 - for disk_id in range(self.config.disks_of_storage_id(storage_id)): - _, total, largest = self.calculate_fragmentation_disk(storage_id, disk_id, time) - largest_free = max(largest, largest_free) - total_free += total +class Segment: + id: tuple[int, int, int] # layer, disk, segment + size: int # number of blocks in segment + change_list: list[Timestamp] + frag_list: SortedDict[int, tuple[float, int, int]] # frag, total_free, largest_free - return calculate_fragmentation(total_free, largest_free), total_free, largest_free + def __init__(self, layer: int, disk: int, segment: int, size: int): + self.id = (layer, disk, segment) + self.size = size + self.change_list = [] + self.frag_list = SortedDict({}) - def calculate_fragmentation_disk(self, storage_id: int, disk_id: int, time: int = None) -> tuple[float, int, int]: - time = self._get_valid_time(time) - if not self.config.is_valid_disk(storage_id, disk_id): - return (0.0, 0, 0) + def __str__(self) -> str: + return (f"Segment(id: {self.id}, " + f"size: {self.size}, " + f"change_list: {self.change_list}, " + f"frag_list: {self.frag_list}") + + def add_timestamp(self, timestamp: Timestamp): + """Adds a Timestamp to the changelist.""" + self.change_list.append(timestamp) + + def calculate_fragmentation(self): + """Calculates the fragmentation of a segment for every timestamp available.""" + bitmap = np.zeros(self.size, dtype=np.uint8) + self.frag_list[0] = Fragmentation.fragmentation_of_bitmap(bitmap) + for timestamp in tqdm(self.change_list, desc=f"Calculating fragmentation of segment {self.id}", leave=False, unit="timestamp"): + begin = timestamp.segment_offset + end = begin + timestamp.num_blocks + bitmap[begin:end] = timestamp.op_type + + self.frag_list[timestamp.time] = Fragmentation.fragmentation_of_bitmap(bitmap) + + def get_bitmap(self, time: int) -> np.array: + """Returns the allocation bitmap of a segment at the specified time or a available time + before, if the requested time is not in the changelist.""" + bitmap = np.zeros(self.size, dtype=np.uint8) + if len(self.change_list) == 0: + return bitmap + + for timestamp in self.change_list: + if timestamp.time > time: + break + begin = timestamp.segment_offset + end = begin + timestamp.num_blocks + bitmap[begin:end] = timestamp.op_type - entries = filter(lambda k: k[0] == storage_id and k[1] == disk_id, self.bitmap) + return bitmap - largest_free = 0 - total_free = 0 - for entry in entries: - time, bitmap_at_time = self._binary_search_time(time, entry) + def get_fragmentation(self, time) -> tuple[float, int, int]: + """Returns the fragmentation, largest and total free space of a segment at the specified + time or a available time before, if the requested time is not in the fraglist.""" + key = self.frag_list.bisect_right(time) - 1 + return self.frag_list.peekitem(key)[1] - segment_bitmap = np.unpackbits(bitmap_at_time) - _, total, largest = fragmentation_of_bitmap(segment_bitmap) - largest_free = max(largest, largest_free) - total_free += total - return calculate_fragmentation(total_free, largest_free), total_free, largest_free +class Disk: + id: tuple[int, int] # layer, disk + size: int + segments: list[Segment] + + def __init__(self, layer: int, disk: int, size: int): + self.id = (layer, disk) + self.size = size + self.segments = [] - # Get the global bitmap at time. If no time is provided return the bitmap - # of the last timestep. - def get(self, output_bitmap: np.array, time: int = None) -> int: - time = self._get_valid_time(time) + def __str__(self) -> str: + out = f"Disk(id: {self.id}, " + for segment in self.segments: + out += str(segment) + return out + ")" + def add_timestamp(self, timestamp: Timestamp): + """Adds a Timestamp to the respective segment.""" + try: + self.segments[timestamp.segment_id].add_timestamp(timestamp) + except IndexError as e: + print(f"Error adding timestamp '{timestamp}': {e}") + + def calculate_fragmentation(self): + """Calculates the fragmentation of every segment for every timestamp available.""" + for segment in tqdm(self.segments, desc=f"Calculating fragmentation of disk {self.id}", leave=False, unit="segment"): + segment.calculate_fragmentation() + + def get_bitmap(self, time: int) -> np.array: + """Returns the allocation bitmap of a disk at the specified time or a available time + before, if the requested time is not in the changelist.""" + bitmap = np.zeros(self.size, dtype=np.uint8) start = 0 - for storage_id in range(self.config.num_storage_ids): - start += self.get_storage(output_bitmap[start:], storage_id, time) + for segment in self.segments: + bitmap[start:start + segment.size] = segment.get_bitmap(time) + start += segment.size - return start + return bitmap - def get_bitmap(self, time: int = None) -> np.array: - num_blocks = self.config.blocks_global() - bitmap = np.zeros(num_blocks, dtype=np.uint8) - self.get(bitmap, time) + def get_fragmentation(self, time) -> tuple[float, int, int]: + """Returns the fragmentation, largest and total free space of a disk at the specified + time or a available time before, if the requested time is not in the fraglist.""" + total_free = 0 + largest_free = 0 + for segment in self.segments: + _, total, largest = segment.get_fragmentation(time) + total_free += total + largest_free = max(largest_free, largest) - return bitmap + return Fragmentation.calculate_fragmentation(total_free, largest_free), total_free, largest_free - # Get the bitmap of a storage id at time. If no time is provided return the - # bitmap of the last timestep. - # If it doesn't match with the config throw an errer - def get_storage(self, output_bitmap: np.array, storage_id: int, time: int = None) -> int: - time = self._get_valid_time(time) - if not self.config.is_valid_storage(storage_id): - return 0 - start = 0 - for disk_id in range(self.config.disks_of_storage_id(storage_id)): - start += self.get_disk(output_bitmap[start:], storage_id, disk_id, time) +class Layer: + id: int + size: int + disks: list[Disk] - return start + def __init__(self, id: int, size: int): + self.id = id + self.size = size + self.disks = [] + + def __str__(self) -> str: + out = f"Layer(id: {self.id}, " + for disk in self.disks: + out += str(disk) + return out + ")" + + def add_timestamp(self, timestamp: Timestamp): + """Adds a Timestamp to the respective disk.""" + self.disks[timestamp.disk_id].add_timestamp(timestamp) + + def calculate_fragmentation(self): + """Calculates the fragmentation of every disk for every timestamp available.""" + for disk in tqdm(self.disks, desc=f"Calculating fragmentation of layer {self.id}", leave=False, unit="disk"): + disk.calculate_fragmentation() + + def get_bitmap(self, time: int) -> np.array: + """Returns the allocation bitmap of a Layer at the specified time or a available time + before, if the requested time is not in the changelist.""" + bitmap = np.zeros(self.size, dtype=np.uint8) + start = 0 + for disk in self.disks: + bitmap[start:start + disk.size] = disk.get_bitmap(time) + start += disk.size - def get_storage_bitmap(self, storage_id: int, time: int = None) -> np.array: - num_blocks = self.config.blocks_of_storage_id(storage_id) - bitmap = np.zeros(num_blocks, dtype=np.uint8) - self.get_storage(bitmap, storage_id, time) return bitmap - # Get the bitmap of a disk id at time. If no time is provided return the - # bitmap of the last timestep. - # If it doesn't match with the config throw an errer - def get_disk(self, output_bitmap: np.array, storage_id: int, disk_id: int, time: int = None) -> int: - time = self._get_valid_time(time) - if not self.config.is_valid_disk(storage_id, disk_id): - return 0 + def get_fragmentation(self, time) -> tuple[float, int, int]: + """Returns the fragmentation, largest and total free space of a layer at the specified + time or a available time before, if the requested time is not in the fraglist.""" + total_free = 0 + largest_free = 0 + for disk in self.disks: + _, total, largest = disk.get_fragmentation(time) + total_free += total + largest_free = max(largest_free, largest) - blocks_per_segment = self.config.blocks_per_segment - entries = filter(lambda k: k[0] == storage_id and k[1] == disk_id, self.bitmap) + return Fragmentation.calculate_fragmentation(total_free, largest_free), total_free, largest_free - for entry in entries: - segment_id = entry[2] - time, bitmap_at_time = self._binary_search_time(time, entry) - end = min(segment_id + blocks_per_segment, self.config.blocks_of_disk(entry[0], entry[1])) - output_bitmap[segment_id:end] = np.unpackbits(bitmap_at_time) +class GlobalBitMap: + log_file: str + storage_config: StorageConfig + layers: list[Layer] + size: int + time: int - return end + def __init__(self, log_file: str): + self.log_file = log_file + self.storage_config = Parser(log_file).parse_header() + self.size = self.storage_config.blocks_global() + + # Create the storage structure based on the config. + self.layers = [] + for layer in range(self.storage_config.num_layers): + self.layers.append(Layer(layer, self.storage_config.blocks_of_layer(layer))) + for disk in range(self.storage_config.disks_of_layer(layer)): + self.layers[layer].disks.append(Disk(layer, disk, self.storage_config.blocks_of_disk(layer, disk))) + num_segments = self.storage_config.segments_of_disk(layer, disk) + for segment in range(num_segments): + if segment < num_segments - 1: # not the last segment + size = self.storage_config.blocks_per_segment + else: + size = (self.storage_config.blocks_of_disk(layer, disk) + - segment * self.storage_config.blocks_per_segment) + self.layers[layer].disks[disk].segments.append(Segment(layer, disk, segment, size)) + + self._build_bitmap() + self._calculate_fragmentation() - def get_disk_bitmap(self, storage_id: int, disk_id: int, time: int = None) -> np.array: - num_blocks = self.config.blocks_of_disk(storage_id, disk_id) - bitmap = np.zeros(num_blocks, dtype=np.uint8) - self.get_disk(bitmap, storage_id, disk_id, time) - return bitmap + def __str__(self) -> str: + out = (f"GlobalBitMap(log_file: {self.log_file}, " + f"storage_config: {self.storage_config}, " + f"size: {self.size}, " + f"time: {self.time}, ") + return out + ")" + + def _build_bitmap(self): + """Builds the bitmap of the storage based on the provided log file.""" + parser = Parser(log_file) + for timestamp in tqdm(parser, desc="Building Bitmap", unit="timestep"): + self.layers[timestamp.layer_id].add_timestamp(timestamp) + + self.time = timestamp.time + + def _calculate_fragmentation(self): + """Calculates the fragmentation of every layer for every timestamp available.""" + for layer in tqdm(self.layers, desc="Calculating fragmentation", unit="layer"): + layer.calculate_fragmentation() + + def get_bitmap(self, time: int) -> np.array: + """Returns the allocation bitmap the storage at the specified time.""" + bitmap = np.zeros(self.size, dtype=np.uint8) + start = 0 + for layer in self.layers: + bitmap[start:start + layer.size] = layer.get_bitmap(time) + start += layer.size - @functools.lru_cache(1024) - def _binary_search_time(self, time: int, entry: (int, int, int)) -> tuple[int, np.array]: - bitmaps = self.bitmap[entry] - left = 0 - right = len(bitmaps) - 1 - result = None - while left <= right: - middle = floor((left + right) / 2) - if bitmaps[middle][0] == time: - return bitmaps[middle] - elif bitmaps[middle][0] < time: - result = bitmaps[middle] - left = middle + 1 - else: - right = middle - 1 + def get_fragmentation(self, time) -> tuple[float, int, int]: + """Returns the fragmentation, largest and total free space of the storage.""" + total_free = 0 + largest_free = 0 + for layer in self.layers: + _, total, largest = layer.get_fragmentation(time) + total_free += total + largest_free = max(largest_free, largest) - return result + return Fragmentation.calculate_fragmentation(total_free, largest_free), total_free, largest_free def plot(self): - """Plots the bitmap with an interactive slider for timestamp selection and checkboxes for storage toggling.""" + """Plots the bitmap with an interactive slider for timestamp selection.""" fig, axd = self._setup_plot() ims = self._setup_bitmaps(fig, axd) vlines = self._plot_fragmentation(axd) @@ -396,9 +456,10 @@ class GlobalBitMap: plt.show() def _setup_plot(self) -> tuple[plt.Figure, dict[str, matplotlib.axes.Axes]]: + """Helper method for setting up the plot.""" form = [ [".", ".", ".", ".", ".", "."], - [".", "storage_0", "storage_1", "storage_2", "storage_3", "."], + [".", "layer_0", "layer_1", "layer_2", "layer_3", "."], [".", "frag_0", "frag_1", "frag_2", "frag_3", "."], [".", "frag_global", "frag_global", "frag_global", "frag_global", "."], [".", "slider", "slider", "slider", "slider", "."], @@ -406,73 +467,79 @@ class GlobalBitMap: ] gs_kw = {"width_ratios": [0.03, 1, 1, 1, 1, 0.03], "height_ratios": [0.03, 3, 1, 1, 0.1, 0.03]} - fig, axd = plt.subplot_mosaic(form, gridspec_kw=gs_kw, layout="constrained") + fig, axd = plt.subplot_mosaic( + form, gridspec_kw=gs_kw, layout="constrained") fig.set_size_inches(16, 9) fig.set_dpi(1920 / 16) return fig, axd def _setup_bitmaps(self, fig, axd): + """Helper method for setting up the bitmaps of the layer.""" ims = [] - # Initial plot - for storage_id in range(self.config.num_storage_ids): - name = f"storage_{storage_id}" - storage_bitmap = self.get_storage_bitmap(storage_id, 0) - if len(storage_bitmap) != 0: - colored_bitmap = self._color_disks(storage_bitmap, storage_id) - resized_bitmap = reshape_to_axes(colored_bitmap, axd[name], fig) - else: - storage_ax = axd[f"storage_{storage_id}"] - kw = {"ha": "center", "va": "center", "fontsize": 12, "color": "darkgrey"} - storage_ax.text(0.5, 0.5, "[Empty]", transform=storage_ax.transAxes, **kw) - + for layer in self.layers: + name = f"layer_{layer.id}" ims.append({}) - ims[storage_id]["storage_bitmap"] = storage_bitmap - ims[storage_id]["resized_bitmap"] = resized_bitmap - ims[storage_id]["im"] = axd[name].imshow(resized_bitmap, aspect="auto", interpolation=None) - axd[name].set_title(f"Storage {storage_id}") - axd[name].set_xlabel("Block") - axd[name].set_ylabel("Block") + if layer.size == 0: + kw = {"ha": "center", "va": "center", "fontsize": 12, "color": "darkgrey"} + axd[name].text(0.5, 0.5, "[Empty]", transform=axd[name].transAxes, **kw) + else: + bbox = axd[name].get_window_extent().transformed( + fig.dpi_scale_trans.inverted()) + ims[layer.id]["width"] = bbox.width + ims[layer.id]["height"] = bbox.height + + layer_bitmap = layer.get_bitmap(self.time - 1) + colored_bitmap = self._color_disks(layer_bitmap, layer.id) + rows, cols = get_close_aspect(ims[layer.id]["width"], + ims[layer.id]["height"], + len(colored_bitmap[:, 0])) + resized_bitmap = colored_bitmap.reshape(rows, cols, 4) + ims[layer.id]["bitmap"] = resized_bitmap + ims[layer.id]["im"] = axd[name].imshow( + resized_bitmap, aspect="auto", interpolation=None) + axd[name].set_xlabel("Block") + axd[name].set_ylabel("Block") + + axd[name].set_title(f"Layer {layer.id}") axd[name].set_xticks([]) axd[name].set_yticks([]) - bbox = axd[name].get_window_extent().transformed(fig.dpi_scale_trans.inverted()) - ims[storage_id]["width"] = bbox.width - ims[storage_id]["height"] = bbox.height return ims def _plot_fragmentation(self, axd) -> dict: - """Plots the fragmentation data.""" + """Helper method for plotting the fragmentation of the layers and the storage.""" # Vertical lines that indicate the timestamp vlines = {} - for storage_id in range(self.config.num_storage_ids): - frag_ax = axd[f"frag_{storage_id}"] - if self.config.blocks_of_storage_id(storage_id) != 0: - # Extract fragmentation values from storage_frag - frag_values = [frag for frag, _, - _ in self.storage_frag[storage_id]] + for layer in self.layers: + frag_ax = axd[f"frag_{layer.id}"] + if self.storage_config.blocks_of_layer(layer.id) != 0: + frag_values = [] + for i in range(self.time): + frag, _, _ = layer.get_fragmentation(i) + frag_values.append(frag) + frag_ax.plot(frag_values) frag_ax.set_xlim([0, self.time - 1]) frag_ax.set_ylim([0, 1]) frag_ax.set_xlabel("Timestamp") - if storage_id == 0: + if layer.id == 0: frag_ax.set_ylabel("Fragmentation") - # Initial position at the end - vlines[storage_id] = frag_ax.axvline( - x=self.time - 1, - color="red", - linestyle="--", - linewidth=1 - ) + vlines[layer.id] = frag_ax.axvline( + x=0, color="red", linestyle="--", linewidth=1) else: - kw = {"ha": "center", "va": "center", "fontsize": 12, "color": "darkgrey"} - frag_ax.text(0.5, 0.5, "[Empty]", transform=frag_ax.transAxes, **kw) - - # Extract fragmentation values from global_frag + kw = {"ha": "center", "va": "center", + "fontsize": 12, "color": "darkgrey"} + frag_ax.text(0.5, 0.5, "[Empty]", + transform=frag_ax.transAxes, **kw) + + frag_values = [] + for i in range(self.time): + frag, _, _ = layer.get_fragmentation(i) + frag_values.append(frag) global_frag_ax = axd["frag_global"] - frag_values = [frag for frag, _, _ in self.global_frag] global_frag_ax.plot(frag_values) global_frag_ax.set_ylim([0, 1]) global_frag_ax.set_xlim([0, self.time - 1]) @@ -480,18 +547,16 @@ class GlobalBitMap: global_frag_ax.set_xlabel("Timestamp") global_frag_ax.set_ylabel("Fragmentation") - # Initial position at the end vlines["global"] = global_frag_ax.axvline( - x=self.time - 1, - color="red", - linestyle="--", - linewidth=1 - ) + x=0, color="red", linestyle="--", linewidth=1) return vlines def _setup_slider(self, fig, axd, ims, vlines) -> Slider: - # Create the format specifier with appropriate spacing to prevent moving of axes + """Helper method for setting up the slider for interactive plotting.""" + # TODO: remove moving of entire plot, when the slider value increases + + # Create the format specifier with appropriate spacing to prevent moving of axes. max_digits = len(str(self.time - 1)) valfmt = f"%{max_digits}d" slider = Slider( @@ -510,18 +575,20 @@ class GlobalBitMap: slider.set_val(self.time - 1) return - for storage_id in range(self.config.num_storage_ids): - self.get_storage(ims[storage_id]["storage_bitmap"], storage_id, timestep) - if len(ims[storage_id]["storage_bitmap"]) != 0: - resized_bitmap = reshape_to_close_aspect( - ims[storage_id]["storage_bitmap"], - ims[storage_id]["width"], - ims[storage_id]["height"] - ) - ims[storage_id]["resized_bitmap"][:, :, 3] = resized_bitmap * 255 + for layer in self.layers: + if layer.size == 0: + continue - ims[storage_id]["im"].set_data(ims[storage_id]["resized_bitmap"]) - vlines[storage_id].set_xdata([timestep, timestep]) + # Times 255 because the bitmap is mapped to the alpha channel. + bitmap = layer.get_bitmap(timestep) * 255 + + rows, cols = get_close_aspect(ims[layer.id]["width"], + ims[layer.id]["height"], + len(bitmap)) + resized_bitmap = bitmap.reshape(rows, cols) + ims[layer.id]["bitmap"][:, :, 3] = resized_bitmap + ims[layer.id]["im"].set_data(ims[layer.id]["bitmap"]) + vlines[layer.id].set_xdata([timestep, timestep]) vlines["global"].set_xdata([timestep, timestep]) fig.canvas.draw_idle() @@ -530,12 +597,31 @@ class GlobalBitMap: return slider + def _color_disks(self, layer_bitmap: np.array, layer_id: int) -> np.ndarray: + """Colors the disks within a layer differently.""" + colored_bitmap = np.zeros((len(layer_bitmap), 4), dtype=np.uint8) + colored_bitmap[:, 3] = layer_bitmap + start = 0 + for disk_id in range(self.storage_config.disks_of_layer(layer_id)): + length = self.storage_config.blocks_of_disk(layer_id, disk_id) + + color = id_to_color(disk_id) + color_array = np.tile(color, (length, 1)) + + colored_bitmap[start:start + length, 0:3] = color_array + start += length + + return colored_bitmap + def export_to_video(self, filename: str, start: int = 0, end: int = None, fps: int = 60, bitrate: int = 4500, dpi: int = 100, nproc: int = None): + """Export the plot to a mp4 file for later watching. For that it can use multiple processes + and works with files in temporary directory, which it cleans up after finishing.""" if end is None or end > self.time: end = self.time if nproc is None: nproc = os.cpu_count() + # Split the work into chunks. timesteps_total = end - start guarranteed_size = timesteps_total // nproc optional_size = timesteps_total % nproc @@ -552,7 +638,7 @@ class GlobalBitMap: if not os.path.exists(temp_dir): os.mkdir(temp_dir) - # Create shared counter and lock + # Create shared counter and lock for the global progress bar. counter = Value('i', 0) lock = Lock() @@ -563,11 +649,9 @@ class GlobalBitMap: lock = shared_lock with Pool(processes=nproc, initializer=init_pool, initargs=(counter, lock)) as pool: - results = [] - for task in tasks: - results.append(pool.apply_async(self._export_to_video_chunk, args=task)) + result = pool.starmap_async(self._export_to_video_chunk, tasks) - # Global progress bar + # Display a global progress bar. with tqdm(total=timesteps_total, desc="Exporting Video", unit="frame") as pbar: previous_count = 0 while True: @@ -582,10 +666,9 @@ class GlobalBitMap: time.sleep(1.0) - for result in results: - result.get() # Ensure all tasks are finished + result.get() - # Stitch video chunks using FFmpeg + # Stitch video chunks together using FFmpeg. with open(f"{temp_dir}/mylist.txt", mode="w") as f: for name in chunk_filenames: f.write(f"file '{name}'\n") @@ -598,6 +681,7 @@ class GlobalBitMap: print(f"Error deleting temporary directory '{temp_dir}': {e}") def _export_to_video_chunk(self, filename: str, start: int, end: int, fps=60, bitrate=1800, dpi=100) -> bool: + """Helper method that gets executed in each process used in exporting to video.""" # NOTE: We have to create and setup a new plot within the chunk function because a # matplotlib figure is not picklable. fig, axd = self._setup_plot() @@ -612,6 +696,7 @@ class GlobalBitMap: slider.set_val(slider.val + 1) writer.grab_frame() + # Update the global progress bar. with lock: counter.value += 1 @@ -620,96 +705,31 @@ class GlobalBitMap: return True - def _color_disks(self, storage_bitmap: np.array, storage_id: int) -> np.ndarray: - """Colors the disks within a storage differently.""" - colored_bitmap = np.zeros((len(storage_bitmap), 4), dtype=np.uint8) - colored_bitmap[:, 3] = storage_bitmap - for disk_id in range(self.config.disks_of_storage_id(storage_id)): - start = self.disk_begins[storage_id][disk_id] - end = start + self.config.blocks_of_disk(storage_id, disk_id) - length = end - start - - color = id_to_color(disk_id) - color_array = np.tile(color, (length, 1)) - - colored_bitmap[start:end, 0:3] = color_array - - return colored_bitmap - - -color_mapping = [ - (0, 0, 0), - (0, 0, 255), - (0, 255, 0), - (0, 255, 255), - (255, 0, 0), - (255, 0, 255), - (255, 255, 0), -] - def id_to_color(i: int) -> tuple[int, int, int]: - return color_mapping[i % len(color_mapping)] - - -def reshape_to_axes(arr: np.ndarray, ax, fig) -> np.ndarray: - """ - Reshapes the first dimension of a NumPy array to a array with the first two - dimensions close to the aspect ratio of the given Matplotlib axes. - - Args: - arr_1d: The 1D NumPy array to reshape. - ax: The Matplotlib axes object. - fig: The Matplotlib figure object. - - Returns: - A 2D NumPy array with dimensions close to the axes aspect ratio. - """ - - # Get axes dimensions in inches - bbox = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted()) - width, height = bbox.width, bbox.height - - # Calculate target aspect ratio - target_aspect = width / height - - # Calculate the ideal number of columns for the target aspect ratio - total_pixels = len(arr) - cols = int(np.sqrt(total_pixels * target_aspect)) - - # Adjust columns to find the closest aspect ratio while using all pixels - rows = total_pixels // cols - while rows * cols != total_pixels: - cols -= 1 - rows = total_pixels // cols - - # Reshape the array to the calculated dimensions - return arr.reshape(rows, cols, 4) - - -def reshape_to_close_aspect(arr_1d, width, height): - """ - Reshapes a 1D NumPy array to a 2D array with dimensions close to the - aspect ratio of the given width and height - """ - rows, cols = get_close_aspect(width, height, len(arr_1d)) - - # Reshape the 1D array to the calculated dimensions - arr_2d = arr_1d.reshape(rows, cols) - - return arr_2d + """Maps the id to a color specified in the COLOR_MAPPING""" + COLOR_MAPPING = [ + (0, 0, 0), + (0, 0, 255), + (0, 255, 0), + (0, 255, 255), + (255, 0, 0), + (255, 0, 255), + (255, 255, 0), + ] + return COLOR_MAPPING[i % len(COLOR_MAPPING)] @functools.lru_cache(128) -def get_close_aspect(width: int, height: int, length: int) -> tuple[int, int]: - # Calculate target aspect ratio +def get_close_aspect(width: int, height: int, total_pixels: int) -> tuple[int, int]: + """Returns an aspect ratio, that is close to the provided width and height and is able to + display the pixels comfortably""" target_aspect = width / height - # Calculate the ideal number of columns for the target aspect ratio - total_pixels = length + # Calculate the ideal number of columns for the target aspect ratio. cols = int(np.sqrt(total_pixels * target_aspect)) - # Adjust columns to find the closest aspect ratio while using all pixels + # Adjust columns to find the closest aspect ratio while using all pixels. rows = total_pixels // cols while rows * cols != total_pixels: cols -= 1 @@ -718,59 +738,20 @@ def get_close_aspect(width: int, height: int, length: int) -> tuple[int, int]: return rows, cols -def fragmentation_of_bitmap(bitmap: np.array) -> tuple[float, int, int]: - """Calculates the fragmentation of a bitmap.""" - if len(bitmap) == 0: - return 0 - - # total_free = len(np.where(bitmap == 0)) - total_free = np.count_nonzero(bitmap == 0) - largest_free = longest_repeating_0s(bitmap) - frag = calculate_fragmentation(total_free, largest_free) - - return frag, total_free, largest_free - - -def calculate_fragmentation(total_free: int, largest_free: int) -> float: - if total_free == 0: - return 0 - return 1 - (largest_free / total_free) - - -def longest_repeating_0s(arr: np.array) -> int: - """ - Calculates the maximum consecutive count of 0s in a binary numpy array. - - Args: - binary_array: A numpy array containing only 0s and 1s. - - Returns: - Maximum consecutive count of 0s. - """ - # Find indices where the array changes value - indices = np.where(np.diff(arr))[0] + 1 - # Split the array at these indices - splits = np.split(arr, indices) - - # Calculate lengths of splits and find maximum for 0 - return max([len(s) for s in splits if s[0] == 0], default=0) - - -def identify_axes(ax_dict, fontsize=24): - kw = {"ha": "center", "va": "center", "fontsize": fontsize, "color": "darkgrey"} - for k, ax in ax_dict.items(): - ax.text(0.5, 0.5, k, transform=ax.transAxes, **kw) - - if __name__ == "__main__": - if len(sys.argv) < 2: - print("Please provide a file to visualize!") - sys.exit(1) - log_file = sys.argv[1] - - config = parse_header(log_file) - global_bitmap = GlobalBitMap(log_file, config) - print(global_bitmap) - - global_bitmap.export_to_video("test", nproc=1) - # global_bitmap.plot() + parser = argparse.ArgumentParser(description="Visualize allocation log.") + parser.add_argument("input_file", help="Path to the allocation log file.") + parser.add_argument("-p", "--processes", type=int, default=os.cpu_count(), + help="Number of processes to use for video export (default: all CPU cores)") + parser.add_argument("-e", "--export", metavar="output_file", nargs="?", const="output", + help="Export the visualization to a video file (default: output.mp4)") + + args = parser.parse_args() + + log_file = args.input_file + global_bitmap = GlobalBitMap(log_file) + + if args.export: + global_bitmap.export_to_video(args.export, nproc=args.processes) + else: + global_bitmap.plot() From e7a3f606455e4f064ba14d5b725c6eb103cd03d3 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Wed, 6 Nov 2024 09:44:52 +0100 Subject: [PATCH 28/49] backend selection parameter for matplotlib --- betree/scripts/visualize_allocation_log | 41 +++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index d73c3f40..0889d243 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -738,6 +738,35 @@ def get_close_aspect(width: int, height: int, total_pixels: int) -> tuple[int, i return rows, cols +def get_valid_backends(): + def is_backend_module(fname): + """Identifies if a filename is a matplotlib backend module""" + return fname.startswith('backend_') and fname.endswith('.py') + + def backend_fname_formatter(fname): + """Removes the extension of the given filename, then takes away the leading 'backend_'.""" + return os.path.splitext(fname)[0][8:] + + # get the directory where the backends live + backends_dir = os.path.dirname(matplotlib.backends.__file__) + + # filter all files in that directory to identify all files which provide a backend + backend_fnames = filter(is_backend_module, os.listdir(backends_dir)) + + backends = [backend_fname_formatter(fname) for fname in backend_fnames] + + # validate backends + backends_valid = [] + for b in backends: + try: + plt.switch_backend(b) + backends_valid += [b] + except: + continue + + return backends, backends_valid + + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Visualize allocation log.") parser.add_argument("input_file", help="Path to the allocation log file.") @@ -745,12 +774,24 @@ if __name__ == "__main__": help="Number of processes to use for video export (default: all CPU cores)") parser.add_argument("-e", "--export", metavar="output_file", nargs="?", const="output", help="Export the visualization to a video file (default: output.mp4)") + parser.add_argument("-b", "--backend", type=str, default="TkAgg", + help="Specify the backend for Matplotlib (default: TkAgg)") args = parser.parse_args() log_file = args.input_file global_bitmap = GlobalBitMap(log_file) + try: + matplotlib.pyplot.switch_backend(args.backend) + except ModuleNotFoundError as e: + print(f"\n\033[31mTrying to use invalid backend: {args.backend} ({e})\033[0m\n") + backends, backends_valid = get_valid_backends() + print(f"Available backends: \t{backends}") + print(f"Installed backends: \t{backends_valid}") + exit(1) + + print(f"Using '{matplotlib.get_backend()}' as a backend for matplotlib.") if args.export: global_bitmap.export_to_video(args.export, nproc=args.processes) else: From 16ebb8c32ab277db3037eb2416c7f6bf10bad977 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Wed, 6 Nov 2024 10:50:46 +0100 Subject: [PATCH 29/49] Handle missing input file correctly --- betree/scripts/visualize_allocation_log | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index 0889d243..280c3d6b 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -137,7 +137,11 @@ class Parser: self._file_handle.seek(0) def __del__(self): - self._file_handle.close() + try: + self._file_handle.close() + except AttributeError: + # Happens when the file does not exist + pass def __len__(self) -> int: return self.timesteps @@ -386,7 +390,13 @@ class GlobalBitMap: def __init__(self, log_file: str): self.log_file = log_file - self.storage_config = Parser(log_file).parse_header() + try: + self.storage_config = Parser(log_file).parse_header() + except FileNotFoundError as e: + print(f"Input file `{log_file}` does not exist. Can't continue.") + print(e) + exit(1) + self.size = self.storage_config.blocks_global() # Create the storage structure based on the config. From 9550c738ce69620259627add83403e15a2b6f730 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Wed, 6 Nov 2024 10:51:05 +0100 Subject: [PATCH 30/49] Plot correct global fragmentation --- betree/scripts/visualize_allocation_log | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index 280c3d6b..2ee0f589 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -547,7 +547,7 @@ class GlobalBitMap: frag_values = [] for i in range(self.time): - frag, _, _ = layer.get_fragmentation(i) + frag, _, _ = self.get_fragmentation(i) frag_values.append(frag) global_frag_ax = axd["frag_global"] global_frag_ax.plot(frag_values) From d935e15acd9a6cd13a86e62dd5b3c6a814f9a086 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Sun, 10 Nov 2024 10:24:23 +0100 Subject: [PATCH 31/49] runtime plot adjustments --- betree/scripts/visualize_allocation_log | 353 ++++++++++++++++-------- 1 file changed, 243 insertions(+), 110 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index 2ee0f589..a549fe0d 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -13,7 +13,7 @@ from typing import Iterator, Any, IO import matplotlib from matplotlib.animation import FFMpegWriter import matplotlib.pyplot as plt -from matplotlib.widgets import Slider +from matplotlib.widgets import Slider, CheckButtons import numpy as np from sortedcontainers import SortedDict from tqdm import tqdm @@ -456,178 +456,303 @@ class GlobalBitMap: return Fragmentation.calculate_fragmentation(total_free, largest_free), total_free, largest_free - def plot(self): - """Plots the bitmap with an interactive slider for timestamp selection.""" - fig, axd = self._setup_plot() - ims = self._setup_bitmaps(fig, axd) - vlines = self._plot_fragmentation(axd) - slider = self._setup_slider(fig, axd, ims, vlines) - plt.show() +class Plotter: + """Handles plotting the bitmap and fragmentation data with interactive controls.""" + global_bitmap: GlobalBitMap + plot_config: dict + layers: list[bool] + time: int + + def __init__(self, layers, components, checkboxes): + self.time = 0 - def _setup_plot(self) -> tuple[plt.Figure, dict[str, matplotlib.axes.Axes]]: - """Helper method for setting up the plot.""" - form = [ - [".", ".", ".", ".", ".", "."], - [".", "layer_0", "layer_1", "layer_2", "layer_3", "."], - [".", "frag_0", "frag_1", "frag_2", "frag_3", "."], - [".", "frag_global", "frag_global", "frag_global", "frag_global", "."], - [".", "slider", "slider", "slider", "slider", "."], - [".", ".", ".", ".", ".", "."], - ] - gs_kw = {"width_ratios": [0.03, 1, 1, 1, 1, 0.03], - "height_ratios": [0.03, 3, 1, 1, 0.1, 0.03]} - fig, axd = plt.subplot_mosaic( - form, gridspec_kw=gs_kw, layout="constrained") - fig.set_size_inches(16, 9) - fig.set_dpi(1920 / 16) - return fig, axd - - def _setup_bitmaps(self, fig, axd): - """Helper method for setting up the bitmaps of the layer.""" - ims = [] + # Define initial plotting configuration + self.plot_config = { + "bitmaps": False, + "frag_local": False, + "frag_global": False, + "slider": False, + "checkboxes": checkboxes, + } + + for key in components: + if key not in self.plot_config.keys(): + print(f"\033[31mThe component '{key}' does not exist.\033[0m") + print("Available components are: ") + print(self.plot_config.keys()) + exit(1) + else: + self.plot_config[key] = True + + self.layers = layers + + def plot(self): + """Sets up the plot and displays it""" + self.fig = plt.figure(layout="constrained") + self.fig.set_size_inches(16, 9) + self.fig.set_dpi(1920 / 16) + self._create_layout(self.fig) + plt.show() + def _create_layout(self, fig): + layout, gridspec = self._get_layout_gridspec() + self.axd = fig.subplot_mosaic(layout, gridspec_kw=gridspec) + self.ims = self._setup_bitmaps() + self.vlines_local = self._local_fragmentation() + self.vlines_global = self._global_fragmentation() + self.slider = self._setup_slider() + self.checkboxes = self._setup_checkboxes() + # self.slider.set_val(self.time) + + def _get_layout_gridspec(self) -> tuple[list[list[str]], list[list[str]]]: + layout = [] + gridspec = {"width_ratios": [], "height_ratios": []} + + if self.plot_config["checkboxes"]: + gridspec["width_ratios"].append(0.4) for layer in self.layers: - name = f"layer_{layer.id}" - ims.append({}) + gridspec["width_ratios"].append(1) + + if self.plot_config["bitmaps"]: + layout.append([]) + gridspec["height_ratios"].append(3) + if self.plot_config["checkboxes"]: + layout[-1].append("checkboxes") + for layer in self.layers: + layout[-1].append(f"bitmap_{layer}") + + if self.plot_config["frag_local"]: + layout.append([]) + gridspec["height_ratios"].append(1) + if self.plot_config["checkboxes"]: + layout[-1].append("checkboxes") + for layer in self.layers: + layout[-1].append(f"frag_{layer}") + + if self.plot_config["frag_global"]: + layout.append([]) + gridspec["height_ratios"].append(1) + if self.plot_config["checkboxes"]: + layout[-1].append("checkboxes") + for layer in self.layers: + layout[-1].append("frag_global") + + if self.plot_config["slider"]: + layout.append([]) + gridspec["height_ratios"].append(0.1) + if self.plot_config["checkboxes"]: + layout[-1].append("checkboxes") + for layer in self.layers: + layout[-1].append("slider") + + return layout, gridspec + + def _setup_bitmaps(self): + if not self.plot_config["bitmaps"]: + return + + ims = {} + for layer in self.global_bitmap.layers: + if layer.id not in self.layers: + continue + + name = f"bitmap_{layer.id}" + ims[layer.id] = {} if layer.size == 0: kw = {"ha": "center", "va": "center", "fontsize": 12, "color": "darkgrey"} - axd[name].text(0.5, 0.5, "[Empty]", transform=axd[name].transAxes, **kw) + self.axd[name].text(0.5, 0.5, "[Empty]", transform=self.axd[name].transAxes, **kw) else: - bbox = axd[name].get_window_extent().transformed( - fig.dpi_scale_trans.inverted()) + bbox = self.axd[name].get_window_extent().transformed( + self.fig.dpi_scale_trans.inverted()) ims[layer.id]["width"] = bbox.width ims[layer.id]["height"] = bbox.height - layer_bitmap = layer.get_bitmap(self.time - 1) - colored_bitmap = self._color_disks(layer_bitmap, layer.id) + layer_bitmap = layer.get_bitmap(self.time) + colored_bitmap = self._color_disks( + layer_bitmap, layer.id) rows, cols = get_close_aspect(ims[layer.id]["width"], ims[layer.id]["height"], len(colored_bitmap[:, 0])) resized_bitmap = colored_bitmap.reshape(rows, cols, 4) ims[layer.id]["bitmap"] = resized_bitmap - ims[layer.id]["im"] = axd[name].imshow( + ims[layer.id]["im"] = self.axd[name].imshow( resized_bitmap, aspect="auto", interpolation=None) - axd[name].set_xlabel("Block") - axd[name].set_ylabel("Block") + self.axd[name].set_xlabel("Block") + self.axd[name].set_ylabel("Block") - axd[name].set_title(f"Layer {layer.id}") - axd[name].set_xticks([]) - axd[name].set_yticks([]) + self.axd[name].set_title(f"Layer {layer.id}") + self.axd[name].set_xticks([]) + self.axd[name].set_yticks([]) return ims - def _plot_fragmentation(self, axd) -> dict: + def _color_disks(self, layer_bitmap: np.array, layer_id: int) -> np.ndarray: + """Colors the disks within a layer differently.""" + colored_bitmap = np.zeros((len(layer_bitmap), 4), dtype=np.uint8) + colored_bitmap[:, 3] = layer_bitmap + start = 0 + for disk_id in range(self.global_bitmap.storage_config.disks_of_layer(layer_id)): + length = self.global_bitmap.storage_config.blocks_of_disk(layer_id, disk_id) + + color = id_to_color(disk_id) + color_array = np.tile(color, (length, 1)) + + colored_bitmap[start:start + length, 0:3] = color_array + start += length + + return colored_bitmap + + def _local_fragmentation(self): """Helper method for plotting the fragmentation of the layers and the storage.""" + if not self.plot_config["frag_local"]: + return + # Vertical lines that indicate the timestamp vlines = {} - for layer in self.layers: - frag_ax = axd[f"frag_{layer.id}"] - if self.storage_config.blocks_of_layer(layer.id) != 0: + for i, layer in enumerate(self.layers): + frag_ax = self.axd[f"frag_{layer}"] + if self.global_bitmap.storage_config.blocks_of_layer(layer) != 0: frag_values = [] - for i in range(self.time): - frag, _, _ = layer.get_fragmentation(i) + for i in range(self.global_bitmap.time): + frag, _, _ = self.global_bitmap.layers[layer].get_fragmentation(i) frag_values.append(frag) frag_ax.plot(frag_values) - frag_ax.set_xlim([0, self.time - 1]) + frag_ax.set_xlim([0, self.global_bitmap.time - 1]) frag_ax.set_ylim([0, 1]) frag_ax.set_xlabel("Timestamp") - if layer.id == 0: + if i == 0: frag_ax.set_ylabel("Fragmentation") - vlines[layer.id] = frag_ax.axvline( - x=0, color="red", linestyle="--", linewidth=1) + vlines[layer] = frag_ax.axvline( + x=self.time, color="red", linestyle="--", linewidth=1) else: kw = {"ha": "center", "va": "center", "fontsize": 12, "color": "darkgrey"} frag_ax.text(0.5, 0.5, "[Empty]", transform=frag_ax.transAxes, **kw) + return vlines + + def _global_fragmentation(self): + if not self.plot_config["frag_global"]: + return + + # Vertical line that indicates the timestamp frag_values = [] - for i in range(self.time): - frag, _, _ = self.get_fragmentation(i) + for i in range(self.global_bitmap.time): + frag, _, _ = self.global_bitmap.get_fragmentation(i) frag_values.append(frag) - global_frag_ax = axd["frag_global"] + global_frag_ax = self.axd["frag_global"] global_frag_ax.plot(frag_values) global_frag_ax.set_ylim([0, 1]) - global_frag_ax.set_xlim([0, self.time - 1]) + global_frag_ax.set_xlim([0, self.global_bitmap.time - 1]) global_frag_ax.set_title("Global Fragmentation") global_frag_ax.set_xlabel("Timestamp") global_frag_ax.set_ylabel("Fragmentation") - vlines["global"] = global_frag_ax.axvline( - x=0, color="red", linestyle="--", linewidth=1) + return global_frag_ax.axvline(x=self.time, color="red", linestyle="--", linewidth=1) - return vlines - - def _setup_slider(self, fig, axd, ims, vlines) -> Slider: + def _setup_slider(self): """Helper method for setting up the slider for interactive plotting.""" + if not self.plot_config["slider"]: + return # TODO: remove moving of entire plot, when the slider value increases # Create the format specifier with appropriate spacing to prevent moving of axes. - max_digits = len(str(self.time - 1)) + max_digits = len(str(self.global_bitmap.time - 1)) valfmt = f"%{max_digits}d" slider = Slider( - axd["slider"], + self.axd["slider"], "", 0, - self.time - 1, - valinit=0, + self.global_bitmap.time - 1, + valinit=self.time, valstep=1, valfmt=valfmt ) def update(val): - timestep = int(val) - if timestep > self.time - 1: - slider.set_val(self.time - 1) + self.time = int(val) + if self.time > self.global_bitmap.time - 1: + slider.set_val(self.global_bitmap.time - 1) return - for layer in self.layers: + for layer_id in self.layers: + layer = self.global_bitmap.layers[layer_id] if layer.size == 0: continue - # Times 255 because the bitmap is mapped to the alpha channel. - bitmap = layer.get_bitmap(timestep) * 255 + if self.plot_config["bitmaps"]: + # Times 255 because the bitmap is mapped to the alpha channel. + bitmap = layer.get_bitmap(self.time) * 255 - rows, cols = get_close_aspect(ims[layer.id]["width"], - ims[layer.id]["height"], - len(bitmap)) - resized_bitmap = bitmap.reshape(rows, cols) - ims[layer.id]["bitmap"][:, :, 3] = resized_bitmap - ims[layer.id]["im"].set_data(ims[layer.id]["bitmap"]) - vlines[layer.id].set_xdata([timestep, timestep]) + rows, cols = get_close_aspect(self.ims[layer_id]["width"], + self.ims[layer_id]["height"], + len(bitmap)) + resized_bitmap = bitmap.reshape(rows, cols) + self.ims[layer_id]["bitmap"][:, :, 3] = resized_bitmap + self.ims[layer_id]["im"].set_data(self.ims[layer_id]["bitmap"]) - vlines["global"].set_xdata([timestep, timestep]) - fig.canvas.draw_idle() + if self.plot_config["frag_local"]: + self.vlines_local[layer_id].set_xdata([self.time, self.time]) + + if self.plot_config["frag_global"]: + self.vlines_global.set_xdata([self.time, self.time]) + self.fig.canvas.draw_idle() slider.on_changed(update) return slider - def _color_disks(self, layer_bitmap: np.array, layer_id: int) -> np.ndarray: - """Colors the disks within a layer differently.""" - colored_bitmap = np.zeros((len(layer_bitmap), 4), dtype=np.uint8) - colored_bitmap[:, 3] = layer_bitmap - start = 0 - for disk_id in range(self.storage_config.disks_of_layer(layer_id)): - length = self.storage_config.blocks_of_disk(layer_id, disk_id) + def _setup_checkboxes(self): + """Sets up checkboxes for controlling plot visibility.""" - color = id_to_color(disk_id) - color_array = np.tile(color, (length, 1)) + labels = [] + actives = [] + for k, v in self.plot_config.items(): + labels.append(k) + actives.append(v) - colored_bitmap[start:start + length, 0:3] = color_array - start += length + for layer in self.global_bitmap.layers: + labels.append(f"layer_{layer.id}") + if layer.id in self.layers: + actives.append(True) + else: + actives.append(False) - return colored_bitmap + # Create checkboxes + self.axd["checkboxes"].set_axis_off() + checkboxes = CheckButtons(self.axd["checkboxes"], labels, actives) + + def update_visibility(label): + """Updates plot visibility based on checkbox changes.""" + plt.clf() + + if label in self.plot_config: + self.plot_config[label] = not self.plot_config[label] + else: + layer_id = int(label.split("_")[-1]) + try: + self.layers.remove(layer_id) + except ValueError: + self.layers.append(layer_id) + self.layers.sort() + + self._create_layout(self.fig) + self.fig.canvas.mouse_grabber = None + self.fig.canvas.draw_idle() + + checkboxes.on_clicked(update_visibility) + return checkboxes def export_to_video(self, filename: str, start: int = 0, end: int = None, fps: int = 60, bitrate: int = 4500, dpi: int = 100, nproc: int = None): """Export the plot to a mp4 file for later watching. For that it can use multiple processes and works with files in temporary directory, which it cleans up after finishing.""" - if end is None or end > self.time: - end = self.time + if end is None or end > self.global_bitmap.time: + end = self.global_bitmap.time if nproc is None: nproc = os.cpu_count() @@ -694,16 +819,16 @@ class GlobalBitMap: """Helper method that gets executed in each process used in exporting to video.""" # NOTE: We have to create and setup a new plot within the chunk function because a # matplotlib figure is not picklable. - fig, axd = self._setup_plot() - ims = self._setup_bitmaps(fig, axd) - vlines = self._plot_fragmentation(axd) - slider = self._setup_slider(fig, axd, ims, vlines) + self.fig = plt.figure(layout="constrained") + self.fig.set_size_inches(16, 9) + self.fig.set_dpi(1920 / 16) + self._create_layout(self.fig) - slider.set_val(start) + self.slider.set_val(start) writer = FFMpegWriter(fps=fps, bitrate=bitrate) - writer.setup(fig, filename, dpi=dpi) + writer.setup(self.fig, filename, dpi=dpi) for _ in range(end - start): - slider.set_val(slider.val + 1) + self.slider.set_val(self.slider.val + 1) writer.grab_frame() # Update the global progress bar. @@ -711,7 +836,7 @@ class GlobalBitMap: counter.value += 1 writer.finish() - plt.close(fig) + plt.close(self.fig) return True @@ -780,17 +905,22 @@ def get_valid_backends(): if __name__ == "__main__": parser = argparse.ArgumentParser(description="Visualize allocation log.") parser.add_argument("input_file", help="Path to the allocation log file.") - parser.add_argument("-p", "--processes", type=int, default=os.cpu_count(), - help="Number of processes to use for video export (default: all CPU cores)") - parser.add_argument("-e", "--export", metavar="output_file", nargs="?", const="output", - help="Export the visualization to a video file (default: output.mp4)") parser.add_argument("-b", "--backend", type=str, default="TkAgg", help="Specify the backend for Matplotlib (default: TkAgg)") + parser.add_argument("-c", "--components", nargs="+", default=["slider", "bitmaps", "frag_local", "frag_global"], + help="Specify the components that should be plotted (default: slider bitmaps frag_local frag_global)") + parser.add_argument("-e", "--export", metavar="output_file", nargs="?", const="output", + help="Export the visualization to a video file (default: output.mp4)") + parser.add_argument("-l", "--layers", nargs="+", default=[i for i in range(4)], + help="Specify the layers that should be plotted (default: 1 2 3 4)") + parser.add_argument("-p", "--processes", type=int, default=os.cpu_count(), + help="Number of processes to use for video export (default: all CPU cores)") + parser.add_argument("--disable-checkboxes", default=True, action="store_true", + help="Disable the checkboxes (default: False)") args = parser.parse_args() log_file = args.input_file - global_bitmap = GlobalBitMap(log_file) try: matplotlib.pyplot.switch_backend(args.backend) @@ -800,9 +930,12 @@ if __name__ == "__main__": print(f"Available backends: \t{backends}") print(f"Installed backends: \t{backends_valid}") exit(1) - print(f"Using '{matplotlib.get_backend()}' as a backend for matplotlib.") + + print(args.layers, args.components) + plotter = Plotter(args.layers, args.components, args.disable_checkboxes) + plotter.global_bitmap = GlobalBitMap(log_file) if args.export: - global_bitmap.export_to_video(args.export, nproc=args.processes) + plotter.export_to_video(args.export, nproc=args.processes, end=100) else: - global_bitmap.plot() + plotter.plot() From 921e62d9895443e2ab639f45e24c29964c4958d0 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Sun, 10 Nov 2024 10:34:03 +0100 Subject: [PATCH 32/49] remove dependency of slider for exporting --- betree/scripts/visualize_allocation_log | 57 ++++++++++++++----------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index a549fe0d..0f1e73f9 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -464,7 +464,7 @@ class Plotter: layers: list[bool] time: int - def __init__(self, layers, components, checkboxes): + def __init__(self, layers, components, disable_checkboxes): self.time = 0 # Define initial plotting configuration @@ -473,7 +473,7 @@ class Plotter: "frag_local": False, "frag_global": False, "slider": False, - "checkboxes": checkboxes, + "checkboxes": not disable_checkboxes, } for key in components: @@ -680,35 +680,40 @@ class Plotter: slider.set_val(self.global_bitmap.time - 1) return - for layer_id in self.layers: - layer = self.global_bitmap.layers[layer_id] - if layer.size == 0: - continue + self._timestamp_update() - if self.plot_config["bitmaps"]: - # Times 255 because the bitmap is mapped to the alpha channel. - bitmap = layer.get_bitmap(self.time) * 255 + slider.on_changed(update) - rows, cols = get_close_aspect(self.ims[layer_id]["width"], - self.ims[layer_id]["height"], - len(bitmap)) - resized_bitmap = bitmap.reshape(rows, cols) - self.ims[layer_id]["bitmap"][:, :, 3] = resized_bitmap - self.ims[layer_id]["im"].set_data(self.ims[layer_id]["bitmap"]) + return slider - if self.plot_config["frag_local"]: - self.vlines_local[layer_id].set_xdata([self.time, self.time]) + def _timestamp_update(self): + for layer_id in self.layers: + layer = self.global_bitmap.layers[layer_id] + if layer.size == 0: + continue - if self.plot_config["frag_global"]: - self.vlines_global.set_xdata([self.time, self.time]) - self.fig.canvas.draw_idle() + if self.plot_config["bitmaps"]: + # Times 255 because the bitmap is mapped to the alpha channel. + bitmap = layer.get_bitmap(self.time) * 255 - slider.on_changed(update) + rows, cols = get_close_aspect(self.ims[layer_id]["width"], + self.ims[layer_id]["height"], + len(bitmap)) + resized_bitmap = bitmap.reshape(rows, cols) + self.ims[layer_id]["bitmap"][:, :, 3] = resized_bitmap + self.ims[layer_id]["im"].set_data(self.ims[layer_id]["bitmap"]) - return slider + if self.plot_config["frag_local"]: + self.vlines_local[layer_id].set_xdata([self.time, self.time]) + + if self.plot_config["frag_global"]: + self.vlines_global.set_xdata([self.time, self.time]) + self.fig.canvas.draw_idle() def _setup_checkboxes(self): """Sets up checkboxes for controlling plot visibility.""" + if not self.plot_config["checkboxes"]: + return labels = [] actives = [] @@ -824,11 +829,13 @@ class Plotter: self.fig.set_dpi(1920 / 16) self._create_layout(self.fig) - self.slider.set_val(start) + self.time = start + self._timestamp_update() writer = FFMpegWriter(fps=fps, bitrate=bitrate) writer.setup(self.fig, filename, dpi=dpi) for _ in range(end - start): - self.slider.set_val(self.slider.val + 1) + self.time += 1 + self._timestamp_update() writer.grab_frame() # Update the global progress bar. @@ -915,7 +922,7 @@ if __name__ == "__main__": help="Specify the layers that should be plotted (default: 1 2 3 4)") parser.add_argument("-p", "--processes", type=int, default=os.cpu_count(), help="Number of processes to use for video export (default: all CPU cores)") - parser.add_argument("--disable-checkboxes", default=True, action="store_true", + parser.add_argument("--disable-checkboxes", default=False, action="store_true", help="Disable the checkboxes (default: False)") args = parser.parse_args() From c18af85b371a0b91b881ec3a421118a45863cd00 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Mon, 18 Nov 2024 09:13:47 +0100 Subject: [PATCH 33/49] Plot failed allocations --- betree/scripts/visualize_allocation_log | 40 ++++++++++++++++++++++--- betree/src/allocator.rs | 10 ++++--- betree/src/data_management/dmu.rs | 7 ++++- 3 files changed, 48 insertions(+), 9 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index 0f1e73f9..4563d0ee 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -26,7 +26,7 @@ SEGMENT_SIZE_LOG_2 = 18 SEGMENT_SIZE = 1 << SEGMENT_SIZE_LOG_2 SEGMENT_SIZE_MASK = SEGMENT_SIZE - 1 # This is the amount of bytes one (de-)allocation has in the log. -SIZE_PER_ALLOCATION = 13 +SIZE_PER_ALLOCATION = 17 class StorageConfig: @@ -92,10 +92,11 @@ class Timestamp: segment_id: int segment_offset: int - def __init__(self, op_type: int, offset: int, num_blocks: int, time: int): + def __init__(self, op_type: int, offset: int, num_blocks: int, tries: int, time: int): self.op_type = op_type self.offset = offset self.num_blocks = num_blocks + self.tries = tries self.time = time self._parse_offset() @@ -103,6 +104,7 @@ class Timestamp: return (f"Timestep(op_type: {self.op_type}, " f"offset: {self.offset}, " f"num_blocks: {self.num_blocks}, " + f"tries: {self.tries}, " f"time: {self.time}, " f"layer_id: {self.layer_id}, " f"disk_id: {self.disk_id}, " @@ -177,12 +179,13 @@ class Parser: op_type = struct.unpack(" int: """Returns the remaining bytes in a file from the current position of the file pointer.""" @@ -385,6 +388,7 @@ class GlobalBitMap: log_file: str storage_config: StorageConfig layers: list[Layer] + tries: np.array size: int time: int @@ -398,6 +402,7 @@ class GlobalBitMap: exit(1) self.size = self.storage_config.blocks_global() + self.tries = np.zeros(self.size) # Create the storage structure based on the config. self.layers = [] @@ -429,6 +434,7 @@ class GlobalBitMap: parser = Parser(log_file) for timestamp in tqdm(parser, desc="Building Bitmap", unit="timestep"): self.layers[timestamp.layer_id].add_timestamp(timestamp) + self.tries[timestamp.time] = timestamp.tries self.time = timestamp.time @@ -472,6 +478,7 @@ class Plotter: "bitmaps": False, "frag_local": False, "frag_global": False, + "allocation_tries": False, "slider": False, "checkboxes": not disable_checkboxes, } @@ -501,9 +508,9 @@ class Plotter: self.ims = self._setup_bitmaps() self.vlines_local = self._local_fragmentation() self.vlines_global = self._global_fragmentation() + self.vline_allocation_tries = self._allocation_tries() self.slider = self._setup_slider() self.checkboxes = self._setup_checkboxes() - # self.slider.set_val(self.time) def _get_layout_gridspec(self) -> tuple[list[list[str]], list[list[str]]]: layout = [] @@ -538,6 +545,14 @@ class Plotter: for layer in self.layers: layout[-1].append("frag_global") + if self.plot_config["allocation_tries"]: + layout.append([]) + gridspec["height_ratios"].append(1) + if self.plot_config["checkboxes"]: + layout[-1].append("checkboxes") + for layer in self.layers: + layout[-1].append("allocation_tries") + if self.plot_config["slider"]: layout.append([]) gridspec["height_ratios"].append(0.1) @@ -655,6 +670,19 @@ class Plotter: return global_frag_ax.axvline(x=self.time, color="red", linestyle="--", linewidth=1) + def _allocation_tries(self): + if not self.plot_config["allocation_tries"]: + return + + failed_allocations_ax = self.axd["allocation_tries"] + failed_allocations_ax.plot(self.global_bitmap.tries) + failed_allocations_ax.set_xlim([0, self.global_bitmap.time - 1]) + failed_allocations_ax.set_title("Allocation Tries") + failed_allocations_ax.set_xlabel("Timestamp") + failed_allocations_ax.set_ylabel("Tries") + + return failed_allocations_ax.axvline(x=self.time, color="red", linestyle="--", linewidth=1) + def _setup_slider(self): """Helper method for setting up the slider for interactive plotting.""" if not self.plot_config["slider"]: @@ -708,6 +736,10 @@ class Plotter: if self.plot_config["frag_global"]: self.vlines_global.set_xdata([self.time, self.time]) + + if self.plot_config["allocation_tries"]: + self.vline_allocation_tries.set_xdata([self.time, self.time]) + self.fig.canvas.draw_idle() def _setup_checkboxes(self): diff --git a/betree/src/allocator.rs b/betree/src/allocator.rs index 1b6159aa..2b7d4963 100644 --- a/betree/src/allocator.rs +++ b/betree/src/allocator.rs @@ -29,16 +29,18 @@ impl SegmentAllocator { /// Allocates a block of the given `size`. /// Returns `None` if the allocation request cannot be satisfied. - pub fn allocate(&mut self, size: u32) -> Option { + pub fn allocate(&mut self, size: u32) -> (Option, u32) { if size == 0 { - return Some(0); + return (Some(0), 0); } + let mut tries = 0; let offset = { let mut idx = 0; loop { + tries += 1; loop { if idx + size > SEGMENT_SIZE as u32 { - return None; + return (None, tries); } if !self.data[idx as usize] { break; @@ -56,7 +58,7 @@ impl SegmentAllocator { } }; self.mark(offset, size, Action::Allocate); - Some(offset) + (Some(offset), tries) } /// Allocates a block of the given `size` at `offset`. diff --git a/betree/src/data_management/dmu.rs b/betree/src/data_management/dmu.rs index 3fd34304..9ab82f68 100644 --- a/betree/src/data_management/dmu.rs +++ b/betree/src/data_management/dmu.rs @@ -250,6 +250,7 @@ where let _ = file.write_u8(Action::Deallocate.as_bool() as u8); let _ = file.write_u64::(obj_ptr.offset.as_u64()); let _ = file.write_u32::(obj_ptr.size.as_u32()); + let _ = file.write_u32::(0); } if let (CopyOnWriteEvent::Removed, Some(tx), CopyOnWriteReason::Remove) = ( self.handler.copy_on_write( @@ -540,6 +541,7 @@ where // size? // Or save the largest contiguous memory region as a value and compare against that. For // that the allocator needs to support that and we have to 'bubble' the largest value up. + let mut total_tries: u32 = 0; 'class: for &class in strategy.iter().flatten() { let disks_in_class = self.pool.disk_count(class); if disks_in_class == 0 { @@ -595,13 +597,16 @@ where // Has to be split because else the temporary value is dropped while borrowing let bitmap = self.handler.get_allocation_bitmap(*segment_id, self)?; let mut allocator = bitmap.access(); - if let Some(segment_offset) = allocator.allocate(size.as_u32()) { + let allocation = allocator.allocate(size.as_u32()); + total_tries += allocation.1; + if let Some(segment_offset) = allocation.0 { let mut file = self.allocation_log_file.lock(); let disk_offset = segment_id.disk_offset(segment_offset); file.write_u8(Action::Allocate.as_bool() as u8)?; file.write_u64::(disk_offset.as_u64())?; file.write_u32::(size.as_u32())?; + file.write_u32::(total_tries)?; break disk_offset; } From 6c6bc99cbc5f5d90ddae403116e070b45f9ba0d4 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Mon, 18 Nov 2024 09:21:48 +0100 Subject: [PATCH 34/49] CLI improvements --- betree/scripts/visualize_allocation_log | 27 ++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index 4563d0ee..be4092d0 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -470,7 +470,7 @@ class Plotter: layers: list[bool] time: int - def __init__(self, layers, components, disable_checkboxes): + def __init__(self, args): self.time = 0 # Define initial plotting configuration @@ -480,10 +480,17 @@ class Plotter: "frag_global": False, "allocation_tries": False, "slider": False, - "checkboxes": not disable_checkboxes, + "checkboxes": not args.disable_checkboxes, } - for key in components: + if args.available_components: + print("\033[1mAvailable Components:\033[0m") + for component in self.plot_config.keys(): + print(component) + + exit(1) + + for key in args.components: if key not in self.plot_config.keys(): print(f"\033[31mThe component '{key}' does not exist.\033[0m") print("Available components are: ") @@ -492,7 +499,7 @@ class Plotter: else: self.plot_config[key] = True - self.layers = layers + self.layers = [int(i) for i in args.layers] def plot(self): """Sets up the plot and displays it""" @@ -948,19 +955,23 @@ if __name__ == "__main__": help="Specify the backend for Matplotlib (default: TkAgg)") parser.add_argument("-c", "--components", nargs="+", default=["slider", "bitmaps", "frag_local", "frag_global"], help="Specify the components that should be plotted (default: slider bitmaps frag_local frag_global)") + parser.add_argument("--available-components", default=False, action="store_true", + help="Print the components available to be plotted.") + parser.add_argument("-d", "--disable-checkboxes", default=False, action="store_true", + help="Disable the checkboxes (default: False)") parser.add_argument("-e", "--export", metavar="output_file", nargs="?", const="output", help="Export the visualization to a video file (default: output.mp4)") parser.add_argument("-l", "--layers", nargs="+", default=[i for i in range(4)], help="Specify the layers that should be plotted (default: 1 2 3 4)") parser.add_argument("-p", "--processes", type=int, default=os.cpu_count(), help="Number of processes to use for video export (default: all CPU cores)") - parser.add_argument("--disable-checkboxes", default=False, action="store_true", - help="Disable the checkboxes (default: False)") args = parser.parse_args() log_file = args.input_file + plotter = Plotter(args) + try: matplotlib.pyplot.switch_backend(args.backend) except ModuleNotFoundError as e: @@ -969,10 +980,8 @@ if __name__ == "__main__": print(f"Available backends: \t{backends}") print(f"Installed backends: \t{backends_valid}") exit(1) - print(f"Using '{matplotlib.get_backend()}' as a backend for matplotlib.") + print(f"Using \033[1m{matplotlib.get_backend()}\033[0m as a backend for matplotlib.") - print(args.layers, args.components) - plotter = Plotter(args.layers, args.components, args.disable_checkboxes) plotter.global_bitmap = GlobalBitMap(log_file) if args.export: plotter.export_to_video(args.export, nproc=args.processes, end=100) From 1a9a26ccfdf5a70b0f1e9b4287211385801ad4e0 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Mon, 18 Nov 2024 09:48:07 +0100 Subject: [PATCH 35/49] plot free blocks --- betree/scripts/visualize_allocation_log | 85 +++++++++++++++++++++++-- 1 file changed, 79 insertions(+), 6 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index be4092d0..dadbffbe 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -478,6 +478,8 @@ class Plotter: "bitmaps": False, "frag_local": False, "frag_global": False, + "free_local": False, + "free_global": False, "allocation_tries": False, "slider": False, "checkboxes": not args.disable_checkboxes, @@ -513,8 +515,10 @@ class Plotter: layout, gridspec = self._get_layout_gridspec() self.axd = fig.subplot_mosaic(layout, gridspec_kw=gridspec) self.ims = self._setup_bitmaps() - self.vlines_local = self._local_fragmentation() - self.vlines_global = self._global_fragmentation() + self.vlines_frag_local = self._fragmentation_local() + self.vline_frag_global = self._fragmentation_global() + self.vlines_free_local = self._free_local() + self.vline_free_global = self._free_global() self.vline_allocation_tries = self._allocation_tries() self.slider = self._setup_slider() self.checkboxes = self._setup_checkboxes() @@ -552,6 +556,22 @@ class Plotter: for layer in self.layers: layout[-1].append("frag_global") + if self.plot_config["free_local"]: + layout.append([]) + gridspec["height_ratios"].append(1) + if self.plot_config["checkboxes"]: + layout[-1].append("checkboxes") + for layer in self.layers: + layout[-1].append(f"free_{layer}") + + if self.plot_config["free_global"]: + layout.append([]) + gridspec["height_ratios"].append(1) + if self.plot_config["checkboxes"]: + layout[-1].append("checkboxes") + for layer in self.layers: + layout[-1].append("free_global") + if self.plot_config["allocation_tries"]: layout.append([]) gridspec["height_ratios"].append(1) @@ -625,7 +645,7 @@ class Plotter: return colored_bitmap - def _local_fragmentation(self): + def _fragmentation_local(self): """Helper method for plotting the fragmentation of the layers and the storage.""" if not self.plot_config["frag_local"]: return @@ -658,7 +678,7 @@ class Plotter: return vlines - def _global_fragmentation(self): + def _fragmentation_global(self): if not self.plot_config["frag_global"]: return @@ -677,6 +697,53 @@ class Plotter: return global_frag_ax.axvline(x=self.time, color="red", linestyle="--", linewidth=1) + def _free_local(self): + if not self.plot_config["free_local"]: + return + + # Vertical lines that indicate the timestamp + vlines = {} + + for i, layer in enumerate(self.layers): + free_ax = self.axd[f"free_{layer}"] + if self.global_bitmap.storage_config.blocks_of_layer(layer) != 0: + free_values = [] + for i in range(self.global_bitmap.time): + _, _, free = self.global_bitmap.layers[layer].get_fragmentation(i) + free_values.append(free) + + free_ax.plot(free_values) + free_ax.set_xlim([0, self.global_bitmap.time - 1]) + free_ax.set_xlabel("Timestamp") + if i == 0: + free_ax.set_ylabel("Blocks") + + vlines[layer] = free_ax.axvline(x=self.time, color="red", linestyle="--", linewidth=1) + else: + kw = {"ha": "center", "va": "center", + "fontsize": 12, "color": "darkgrey"} + free_ax.text(0.5, 0.5, "[Empty]", + transform=free_ax.transAxes, **kw) + + return vlines + + def _free_global(self): + if not self.plot_config["free_global"]: + return + + free_values = [] + for i in range(self.global_bitmap.time): + _, _, free = self.global_bitmap.get_fragmentation(i) + free_values.append(free) + global_free_ax = self.axd["free_global"] + global_free_ax.plot(free_values) + global_free_ax.set_xlim([0, self.global_bitmap.time - 1]) + global_free_ax.set_title("Global Free Blocks") + global_free_ax.set_xlabel("Timestamp") + global_free_ax.set_ylabel("Blocks") + + return global_free_ax.axvline(x=self.time, color="red", linestyle="--", linewidth=1) + def _allocation_tries(self): if not self.plot_config["allocation_tries"]: return @@ -739,10 +806,16 @@ class Plotter: self.ims[layer_id]["im"].set_data(self.ims[layer_id]["bitmap"]) if self.plot_config["frag_local"]: - self.vlines_local[layer_id].set_xdata([self.time, self.time]) + self.vlines_frag_local[layer_id].set_xdata([self.time, self.time]) + + if self.plot_config["free_local"]: + self.vlines_free_local[layer_id].set_xdata([self.time, self.time]) if self.plot_config["frag_global"]: - self.vlines_global.set_xdata([self.time, self.time]) + self.vline_frag_global.set_xdata([self.time, self.time]) + + if self.plot_config["free_global"]: + self.vline_free_global.set_xdata([self.time, self.time]) if self.plot_config["allocation_tries"]: self.vline_allocation_tries.set_xdata([self.time, self.time]) From 104e712738c6e6249f30bd8873a6e4fea15c5fa9 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Mon, 18 Nov 2024 10:49:14 +0100 Subject: [PATCH 36/49] use packed bits for faster plotting --- betree/scripts/visualize_allocation_log | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index dadbffbe..d0f403ee 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -611,8 +611,8 @@ class Plotter: ims[layer.id]["height"] = bbox.height layer_bitmap = layer.get_bitmap(self.time) - colored_bitmap = self._color_disks( - layer_bitmap, layer.id) + packed_bitmap = np.packbits(layer_bitmap) + colored_bitmap = self._color_disks(packed_bitmap, layer.id) rows, cols = get_close_aspect(ims[layer.id]["width"], ims[layer.id]["height"], len(colored_bitmap[:, 0])) @@ -635,7 +635,7 @@ class Plotter: colored_bitmap[:, 3] = layer_bitmap start = 0 for disk_id in range(self.global_bitmap.storage_config.disks_of_layer(layer_id)): - length = self.global_bitmap.storage_config.blocks_of_disk(layer_id, disk_id) + length = self.global_bitmap.storage_config.blocks_of_disk(layer_id, disk_id) // 8 color = id_to_color(disk_id) color_array = np.tile(color, (length, 1)) @@ -795,8 +795,7 @@ class Plotter: continue if self.plot_config["bitmaps"]: - # Times 255 because the bitmap is mapped to the alpha channel. - bitmap = layer.get_bitmap(self.time) * 255 + bitmap = np.packbits(layer.get_bitmap(self.time)) rows, cols = get_close_aspect(self.ims[layer_id]["width"], self.ims[layer_id]["height"], @@ -1057,6 +1056,6 @@ if __name__ == "__main__": plotter.global_bitmap = GlobalBitMap(log_file) if args.export: - plotter.export_to_video(args.export, nproc=args.processes, end=100) + plotter.export_to_video(args.export, nproc=args.processes) else: plotter.plot() From 515a021073d89693712d37c62d97fc81765e018f Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Mon, 18 Nov 2024 11:31:10 +0100 Subject: [PATCH 37/49] plot allocation sizes --- betree/scripts/visualize_allocation_log | 55 +++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index d0f403ee..7826edec 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -388,7 +388,8 @@ class GlobalBitMap: log_file: str storage_config: StorageConfig layers: list[Layer] - tries: np.array + tries: list[int] + sizes: list[int] size: int time: int @@ -402,7 +403,8 @@ class GlobalBitMap: exit(1) self.size = self.storage_config.blocks_global() - self.tries = np.zeros(self.size) + self.tries = [] + self.sizes = [] # Create the storage structure based on the config. self.layers = [] @@ -434,7 +436,9 @@ class GlobalBitMap: parser = Parser(log_file) for timestamp in tqdm(parser, desc="Building Bitmap", unit="timestep"): self.layers[timestamp.layer_id].add_timestamp(timestamp) - self.tries[timestamp.time] = timestamp.tries + # TODO: What to do with deallocations (0s) + self.tries.append(timestamp.tries) + self.sizes.append(timestamp.num_blocks) self.time = timestamp.time @@ -481,6 +485,8 @@ class Plotter: "free_local": False, "free_global": False, "allocation_tries": False, + "allocation_sizes": False, + "allocation_sizes_ecdf": False, # empirical cumulative distribution function "slider": False, "checkboxes": not args.disable_checkboxes, } @@ -520,6 +526,8 @@ class Plotter: self.vlines_free_local = self._free_local() self.vline_free_global = self._free_global() self.vline_allocation_tries = self._allocation_tries() + _ = self._allocation_sizes() + _ = self._allocation_sizes_ecdf() self.slider = self._setup_slider() self.checkboxes = self._setup_checkboxes() @@ -580,6 +588,22 @@ class Plotter: for layer in self.layers: layout[-1].append("allocation_tries") + if self.plot_config["allocation_sizes"]: + layout.append([]) + gridspec["height_ratios"].append(1) + if self.plot_config["checkboxes"]: + layout[-1].append("checkboxes") + for layer in self.layers: + layout[-1].append("allocation_sizes") + + if self.plot_config["allocation_sizes_ecdf"]: + layout.append([]) + gridspec["height_ratios"].append(1) + if self.plot_config["checkboxes"]: + layout[-1].append("checkboxes") + for layer in self.layers: + layout[-1].append("allocation_sizes_ecdf") + if self.plot_config["slider"]: layout.append([]) gridspec["height_ratios"].append(0.1) @@ -757,6 +781,31 @@ class Plotter: return failed_allocations_ax.axvline(x=self.time, color="red", linestyle="--", linewidth=1) + def _allocation_sizes(self): + if not self.plot_config["allocation_sizes"]: + return + + failed_allocations_ax = self.axd["allocation_sizes"] + failed_allocations_ax.hist(self.global_bitmap.sizes, bins=int(np.std(self.global_bitmap.sizes))) + failed_allocations_ax.set_title("Allocation sizes") + failed_allocations_ax.set_xlim(0, max(self.global_bitmap.sizes)) + failed_allocations_ax.set_ylim(0) + failed_allocations_ax.set_xlabel("Sizes") + failed_allocations_ax.set_ylabel("Amount") + + def _allocation_sizes_ecdf(self): + if not self.plot_config["allocation_sizes_ecdf"]: + return + + failed_allocations_ax = self.axd["allocation_sizes_ecdf"] + failed_allocations_ax.hist(self.global_bitmap.sizes, bins=max(self.global_bitmap.sizes), + density=True, cumulative=True) + failed_allocations_ax.set_title("Allocation sizes ECDF") + failed_allocations_ax.set_xlim(0, max(self.global_bitmap.sizes)) + failed_allocations_ax.set_ylim(0, 1) + failed_allocations_ax.set_xlabel("Sizes") + failed_allocations_ax.set_ylabel("Proportion") + def _setup_slider(self): """Helper method for setting up the slider for interactive plotting.""" if not self.plot_config["slider"]: From fb1298698848bd27ac229122839e21769e7f64bc Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Mon, 18 Nov 2024 11:38:21 +0100 Subject: [PATCH 38/49] minor changes for visual clarity --- betree/scripts/visualize_allocation_log | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index 7826edec..6323e37b 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -689,8 +689,8 @@ class Plotter: frag_ax.set_xlim([0, self.global_bitmap.time - 1]) frag_ax.set_ylim([0, 1]) frag_ax.set_xlabel("Timestamp") - if i == 0: - frag_ax.set_ylabel("Fragmentation") + # TODO: only plot ylabel on left most layer + frag_ax.set_ylabel("Fragmentation") vlines[layer] = frag_ax.axvline( x=self.time, color="red", linestyle="--", linewidth=1) @@ -699,6 +699,8 @@ class Plotter: "fontsize": 12, "color": "darkgrey"} frag_ax.text(0.5, 0.5, "[Empty]", transform=frag_ax.transAxes, **kw) + frag_ax.set_xticks([], []) + frag_ax.set_yticks([], []) return vlines @@ -737,10 +739,11 @@ class Plotter: free_values.append(free) free_ax.plot(free_values) + free_ax.set_ylim(0) free_ax.set_xlim([0, self.global_bitmap.time - 1]) free_ax.set_xlabel("Timestamp") - if i == 0: - free_ax.set_ylabel("Blocks") + # TODO: only plot ylabel on left most layer + free_ax.set_ylabel("Blocks") vlines[layer] = free_ax.axvline(x=self.time, color="red", linestyle="--", linewidth=1) else: @@ -748,6 +751,8 @@ class Plotter: "fontsize": 12, "color": "darkgrey"} free_ax.text(0.5, 0.5, "[Empty]", transform=free_ax.transAxes, **kw) + free_ax.set_xticks([], []) + free_ax.set_yticks([], []) return vlines @@ -761,6 +766,7 @@ class Plotter: free_values.append(free) global_free_ax = self.axd["free_global"] global_free_ax.plot(free_values) + global_free_ax.set_ylim(0) global_free_ax.set_xlim([0, self.global_bitmap.time - 1]) global_free_ax.set_title("Global Free Blocks") global_free_ax.set_xlabel("Timestamp") @@ -774,6 +780,7 @@ class Plotter: failed_allocations_ax = self.axd["allocation_tries"] failed_allocations_ax.plot(self.global_bitmap.tries) + failed_allocations_ax.set_ylim(0) failed_allocations_ax.set_xlim([0, self.global_bitmap.time - 1]) failed_allocations_ax.set_title("Allocation Tries") failed_allocations_ax.set_xlabel("Timestamp") From dfe6dbe3606e5a150cbc5d9d7c34f9e53520622a Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Tue, 19 Nov 2024 11:06:45 +0100 Subject: [PATCH 39/49] minor --- betree/scripts/visualize_allocation_log | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index 6323e37b..8b777ea5 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -202,7 +202,7 @@ class Fragmentation: def fragmentation_of_bitmap(bitmap: np.array) -> tuple[float, int, int]: """Calculates the fragmentation of a bitmap.""" if len(bitmap) == 0: - return 0 + return 0, 0, 0 total_free = np.count_nonzero(bitmap == 0) largest_free = Fragmentation.longest_repeating_0s(bitmap) @@ -260,7 +260,7 @@ class Segment: self.frag_list[timestamp.time] = Fragmentation.fragmentation_of_bitmap(bitmap) - def get_bitmap(self, time: int) -> np.array: + def get_bitmap(self, time: int) -> np.ndarray: """Returns the allocation bitmap of a segment at the specified time or a available time before, if the requested time is not in the changelist.""" bitmap = np.zeros(self.size, dtype=np.uint8) @@ -311,7 +311,7 @@ class Disk: for segment in tqdm(self.segments, desc=f"Calculating fragmentation of disk {self.id}", leave=False, unit="segment"): segment.calculate_fragmentation() - def get_bitmap(self, time: int) -> np.array: + def get_bitmap(self, time: int) -> np.ndarray: """Returns the allocation bitmap of a disk at the specified time or a available time before, if the requested time is not in the changelist.""" bitmap = np.zeros(self.size, dtype=np.uint8) @@ -360,7 +360,7 @@ class Layer: for disk in tqdm(self.disks, desc=f"Calculating fragmentation of layer {self.id}", leave=False, unit="disk"): disk.calculate_fragmentation() - def get_bitmap(self, time: int) -> np.array: + def get_bitmap(self, time: int) -> np.ndarray: """Returns the allocation bitmap of a Layer at the specified time or a available time before, if the requested time is not in the changelist.""" bitmap = np.zeros(self.size, dtype=np.uint8) @@ -447,7 +447,7 @@ class GlobalBitMap: for layer in tqdm(self.layers, desc="Calculating fragmentation", unit="layer"): layer.calculate_fragmentation() - def get_bitmap(self, time: int) -> np.array: + def get_bitmap(self, time: int) -> np.ndarray: """Returns the allocation bitmap the storage at the specified time.""" bitmap = np.zeros(self.size, dtype=np.uint8) start = 0 @@ -455,6 +455,8 @@ class GlobalBitMap: bitmap[start:start + layer.size] = layer.get_bitmap(time) start += layer.size + return bitmap + def get_fragmentation(self, time) -> tuple[float, int, int]: """Returns the fragmentation, largest and total free space of the storage.""" total_free = 0 @@ -653,7 +655,7 @@ class Plotter: return ims - def _color_disks(self, layer_bitmap: np.array, layer_id: int) -> np.ndarray: + def _color_disks(self, layer_bitmap: np.ndarray, layer_id: int) -> np.ndarray: """Colors the disks within a layer differently.""" colored_bitmap = np.zeros((len(layer_bitmap), 4), dtype=np.uint8) colored_bitmap[:, 3] = layer_bitmap From e34615a32c0b914bf8ab1bb8bea267b48e393452 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Mon, 18 Nov 2024 14:57:15 +0100 Subject: [PATCH 40/49] minor --- betree/scripts/visualize_allocation_log | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index 8b777ea5..027ed4a8 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -741,7 +741,8 @@ class Plotter: free_values.append(free) free_ax.plot(free_values) - free_ax.set_ylim(0) + free_ax.set_yscale("log") + free_ax.set_ylim(1) free_ax.set_xlim([0, self.global_bitmap.time - 1]) free_ax.set_xlabel("Timestamp") # TODO: only plot ylabel on left most layer @@ -768,7 +769,8 @@ class Plotter: free_values.append(free) global_free_ax = self.axd["free_global"] global_free_ax.plot(free_values) - global_free_ax.set_ylim(0) + global_free_ax.set_yscale("log") + global_free_ax.set_ylim(1) global_free_ax.set_xlim([0, self.global_bitmap.time - 1]) global_free_ax.set_title("Global Free Blocks") global_free_ax.set_xlabel("Timestamp") From 9661f90eb32cee3a2d25912306f4cb078c1d8463 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Thu, 21 Nov 2024 12:57:42 +0100 Subject: [PATCH 41/49] unaligned allocation tries --- betree/scripts/visualize_allocation_log | 84 +++++++++++++++++++++---- 1 file changed, 71 insertions(+), 13 deletions(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index 027ed4a8..4dd3563e 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -388,7 +388,7 @@ class GlobalBitMap: log_file: str storage_config: StorageConfig layers: list[Layer] - tries: list[int] + tries: tuple[np.ndarray, np.ndarray] sizes: list[int] size: int time: int @@ -403,7 +403,7 @@ class GlobalBitMap: exit(1) self.size = self.storage_config.blocks_global() - self.tries = [] + self.tries = (np.array([]), np.array([])) self.sizes = [] # Create the storage structure based on the config. @@ -436,8 +436,8 @@ class GlobalBitMap: parser = Parser(log_file) for timestamp in tqdm(parser, desc="Building Bitmap", unit="timestep"): self.layers[timestamp.layer_id].add_timestamp(timestamp) - # TODO: What to do with deallocations (0s) - self.tries.append(timestamp.tries) + if timestamp.tries != 0: + self.tries = (np.append(self.tries[0], timestamp.time), np.append(self.tries[1], timestamp.tries)) self.sizes.append(timestamp.num_blocks) self.time = timestamp.time @@ -487,6 +487,7 @@ class Plotter: "free_local": False, "free_global": False, "allocation_tries": False, + "allocation_tries_aligned": False, "allocation_sizes": False, "allocation_sizes_ecdf": False, # empirical cumulative distribution function "slider": False, @@ -494,23 +495,25 @@ class Plotter: } if args.available_components: - print("\033[1mAvailable Components:\033[0m") - for component in self.plot_config.keys(): - print(component) + self.print_available_components() exit(1) for key in args.components: if key not in self.plot_config.keys(): print(f"\033[31mThe component '{key}' does not exist.\033[0m") - print("Available components are: ") - print(self.plot_config.keys()) + self.print_available_components() exit(1) else: self.plot_config[key] = True self.layers = [int(i) for i in args.layers] + def print_available_components(self): + print("\033[1mAvailable Components:\033[0m") + for component in self.plot_config.keys(): + print(component) + def plot(self): """Sets up the plot and displays it""" self.fig = plt.figure(layout="constrained") @@ -528,6 +531,7 @@ class Plotter: self.vlines_free_local = self._free_local() self.vline_free_global = self._free_global() self.vline_allocation_tries = self._allocation_tries() + self.vline_allocation_tries_aligned = self._allocation_tries_aligned() _ = self._allocation_sizes() _ = self._allocation_sizes_ecdf() self.slider = self._setup_slider() @@ -590,6 +594,14 @@ class Plotter: for layer in self.layers: layout[-1].append("allocation_tries") + if self.plot_config["allocation_tries_aligned"]: + layout.append([]) + gridspec["height_ratios"].append(1) + if self.plot_config["checkboxes"]: + layout[-1].append("checkboxes") + for layer in self.layers: + layout[-1].append("allocation_tries_aligned") + if self.plot_config["allocation_sizes"]: layout.append([]) gridspec["height_ratios"].append(1) @@ -782,11 +794,49 @@ class Plotter: if not self.plot_config["allocation_tries"]: return + data = self.global_bitmap.tries[1] + x_values = np.arange(0, len(data)) + failed_allocations_ax = self.axd["allocation_tries"] - failed_allocations_ax.plot(self.global_bitmap.tries) + failed_allocations_ax.scatter(x_values, data, s=3.0, linewidths=0, label="Allocation Tries") + + window_sizes = [100, 500] + colors = ['red', 'orange', 'black', 'purple'] + line_styles = ['-', '--', '-.', ':'] + moving_average = np.zeros_like(data, dtype=float) + + for i, window_size in enumerate(window_sizes): + moving_average = np.zeros_like(data, dtype=float) + for j in range(len(data)): + window_start = max(0, j - window_size + 1) + window_end = j + 1 + moving_average[j] = np.mean(data[window_start:window_end]) + + # Plot with different colors and line styles + failed_allocations_ax.plot(x_values, moving_average, + linewidth=1.5, + color=colors[i % len(colors)], + linestyle=line_styles[i % len(line_styles)], + label=f"Moving Average ({window_size})") + failed_allocations_ax.set_ylim(0) - failed_allocations_ax.set_xlim([0, self.global_bitmap.time - 1]) + failed_allocations_ax.set_xlim([0, len(data)]) failed_allocations_ax.set_title("Allocation Tries") + failed_allocations_ax.set_xlabel("Allocation") + failed_allocations_ax.set_ylabel("Tries") + failed_allocations_ax.legend(loc="upper left") + + return failed_allocations_ax.axvline(x=self.time, color="red", linestyle="--", linewidth=1) + + def _allocation_tries_aligned(self): + if not self.plot_config["allocation_tries_aligned"]: + return + + failed_allocations_ax = self.axd["allocation_tries_aligned"] + failed_allocations_ax.scatter(self.global_bitmap.tries[0], self.global_bitmap.tries[1], s=3.0, linewidths=0) + failed_allocations_ax.set_ylim(0) + failed_allocations_ax.set_xlim([0, self.global_bitmap.time - 1]) + failed_allocations_ax.set_title("Allocation Tries Aligned") failed_allocations_ax.set_xlabel("Timestamp") failed_allocations_ax.set_ylabel("Tries") @@ -877,7 +927,12 @@ class Plotter: self.vline_free_global.set_xdata([self.time, self.time]) if self.plot_config["allocation_tries"]: - self.vline_allocation_tries.set_xdata([self.time, self.time]) + # Get the index of the last allocation that happened before or at the current timestamp. + index = np.argmax(self.global_bitmap.tries[0] > self.time) - 1 + self.vline_allocation_tries.set_xdata([index, index]) + + if self.plot_config["allocation_tries_aligned"]: + self.vline_allocation_tries_aligned.set_xdata([self.time, self.time]) self.fig.canvas.draw_idle() @@ -1006,7 +1061,10 @@ class Plotter: writer.setup(self.fig, filename, dpi=dpi) for _ in range(end - start): self.time += 1 - self._timestamp_update() + if self.plot_config["slider"]: + self.slider.set_val(self.time) + else: + self._timestamp_update() writer.grab_frame() # Update the global progress bar. From 9ebd2256457eb8926cbc7b0cbbe69d0f9aa882d9 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Thu, 21 Nov 2024 13:31:51 +0100 Subject: [PATCH 42/49] lines to seperate segments --- betree/scripts/visualize_allocation_log | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/betree/scripts/visualize_allocation_log b/betree/scripts/visualize_allocation_log index 4dd3563e..9c534d29 100755 --- a/betree/scripts/visualize_allocation_log +++ b/betree/scripts/visualize_allocation_log @@ -661,6 +661,15 @@ class Plotter: self.axd[name].set_xlabel("Block") self.axd[name].set_ylabel("Block") + # Draw horizontal lines to seperate segments. + begin = 0 + for disk in layer.disks: + for i, segment in enumerate(disk.segments): + # Do not draw line before first segment. + if i > 0: + _ = self.axd[name].axhline(begin / cols / 8, 0, 1, color="black", linestyle="--", linewidth=1) + begin += segment.size + self.axd[name].set_title(f"Layer {layer.id}") self.axd[name].set_xticks([]) self.axd[name].set_yticks([]) @@ -1080,7 +1089,6 @@ class Plotter: def id_to_color(i: int) -> tuple[int, int, int]: """Maps the id to a color specified in the COLOR_MAPPING""" COLOR_MAPPING = [ - (0, 0, 0), (0, 0, 255), (0, 255, 0), (0, 255, 255), From 49d1b0463e1a7624fb20987ebb0fa241a11e8fcf Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Fri, 22 Nov 2024 10:13:47 +0100 Subject: [PATCH 43/49] allocation_log feature flag --- betree/Cargo.toml | 2 + betree/src/data_management/dmu.rs | 61 ++++++++++++++++++------------- 2 files changed, 37 insertions(+), 26 deletions(-) diff --git a/betree/Cargo.toml b/betree/Cargo.toml index 58806262..d9f116a3 100644 --- a/betree/Cargo.toml +++ b/betree/Cargo.toml @@ -83,4 +83,6 @@ figment_config = ["figment"] latency_metrics = [] experimental-api = [] nvm = ["pmdk"] +# Log the allocations and deallocations done for later analysis +allocation_log = [] diff --git a/betree/src/data_management/dmu.rs b/betree/src/data_management/dmu.rs index 9ab82f68..8b3f8f78 100644 --- a/betree/src/data_management/dmu.rs +++ b/betree/src/data_management/dmu.rs @@ -27,7 +27,7 @@ use parking_lot::{Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard}; use std::{ collections::HashMap, fs::OpenOptions, - io::Write, + io::{BufWriter, Write}, mem::replace, ops::DerefMut, pin::Pin, @@ -65,7 +65,8 @@ where next_modified_node_id: AtomicU64, next_disk_id: AtomicU64, report_tx: Option>, - allocation_log_file: Mutex, + #[cfg(feature = "allocation_log")] + allocation_log_file: Mutex>, } impl Dmu @@ -93,15 +94,15 @@ where .collect::>() .into_boxed_slice(); - // TODO: make append only - let allocation_log_file = Mutex::new( + #[cfg(feature = "allocation_log")] + let allocation_log_file = Mutex::new(BufWriter::new( OpenOptions::new() .create(true) .write(true) .truncate(true) .open(ALLOCATION_LOG_FILE) .expect("Failed to create allocation log file"), - ); + )); Dmu { // default_compression_state: default_compression.new_compression().expect("Can't create compression state"), @@ -119,6 +120,7 @@ where next_modified_node_id: AtomicU64::new(1), next_disk_id: AtomicU64::new(0), report_tx: None, + #[cfg(feature = "allocation_log")] allocation_log_file, } } @@ -140,27 +142,30 @@ where /// Writes the global header for the allocation logging. pub fn write_global_header(&self) -> Result<(), Error> { - let mut file = self.allocation_log_file.lock(); + #[cfg(feature = "allocation_log")] + { + let mut file = self.allocation_log_file.lock(); - // Number of storage classes - file.write_u8(self.pool.storage_class_count())?; + // Number of storage classes + file.write_u8(self.pool.storage_class_count())?; - // Disks per class - for class in 0..self.pool.storage_class_count() { - let disk_count = self.pool.disk_count(class); - file.write_u16::(disk_count)?; - } + // Disks per class + for class in 0..self.pool.storage_class_count() { + let disk_count = self.pool.disk_count(class); + file.write_u16::(disk_count)?; + } - // Segments per disk - for class in 0..self.pool.storage_class_count() { - for disk in 0..self.pool.disk_count(class) { - let segment_count = self.pool.size_in_blocks(class, disk); - file.write_u64::(segment_count.as_u64())?; + // Segments per disk + for class in 0..self.pool.storage_class_count() { + for disk in 0..self.pool.disk_count(class) { + let segment_count = self.pool.size_in_blocks(class, disk); + file.write_u64::(segment_count.as_u64())?; + } } - } - // Blocks per segment (constant) - file.write_u64::(SEGMENT_SIZE.try_into().unwrap())?; + // Blocks per segment (constant) + file.write_u64::(SEGMENT_SIZE.try_into().unwrap())?; + } Ok(()) } @@ -245,6 +250,7 @@ where obj_ptr.offset().disk_id(), obj_ptr.size(), ); + #[cfg(feature = "allocation_log")] { let mut file = self.allocation_log_file.lock(); let _ = file.write_u8(Action::Deallocate.as_bool() as u8); @@ -600,13 +606,16 @@ where let allocation = allocator.allocate(size.as_u32()); total_tries += allocation.1; if let Some(segment_offset) = allocation.0 { - let mut file = self.allocation_log_file.lock(); let disk_offset = segment_id.disk_offset(segment_offset); - file.write_u8(Action::Allocate.as_bool() as u8)?; - file.write_u64::(disk_offset.as_u64())?; - file.write_u32::(size.as_u32())?; - file.write_u32::(total_tries)?; + #[cfg(feature = "allocation_log")] + { + let mut file = self.allocation_log_file.lock(); + file.write_u8(Action::Allocate.as_bool() as u8)?; + file.write_u64::(disk_offset.as_u64())?; + file.write_u32::(size.as_u32())?; + file.write_u32::(total_tries)?; + } break disk_offset; } From fc7a1ff00dffa880bcd8147740e0e86043a5a5e0 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Wed, 11 Dec 2024 09:14:40 +0100 Subject: [PATCH 44/49] more complete allocation_log feature flag --- betree/src/allocator.rs | 30 +++++++++++++++++++++--------- betree/src/data_management/dmu.rs | 16 ++++++++++++++-- betree/src/database/mod.rs | 2 ++ 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/betree/src/allocator.rs b/betree/src/allocator.rs index 2b7d4963..1f035783 100644 --- a/betree/src/allocator.rs +++ b/betree/src/allocator.rs @@ -18,6 +18,11 @@ pub struct SegmentAllocator { data: BitArr!(for SEGMENT_SIZE, in u8, Lsb0), } +#[cfg(not(feature = "allocation_log"))] +type AllocateReturnType = Option; +#[cfg(feature = "allocation_log")] +type AllocateReturnType = (Option, u32); + impl SegmentAllocator { /// Constructs a new `SegmentAllocator` given the segment allocation bitmap. /// The `bitmap` must have a length of `SEGMENT_SIZE`. @@ -29,17 +34,27 @@ impl SegmentAllocator { /// Allocates a block of the given `size`. /// Returns `None` if the allocation request cannot be satisfied. - pub fn allocate(&mut self, size: u32) -> (Option, u32) { + pub fn allocate(&mut self, size: u32) -> AllocateReturnType { if size == 0 { + #[cfg(not(feature = "allocation_log"))] + return Some(0); + #[cfg(feature = "allocation_log")] return (Some(0), 0); } + #[cfg(feature = "allocation_log")] let mut tries = 0; let offset = { let mut idx = 0; loop { - tries += 1; + #[cfg(feature = "allocation_log")] + { + tries += 1; + } loop { if idx + size > SEGMENT_SIZE as u32 { + #[cfg(not(feature = "allocation_log"))] + return None; + #[cfg(feature = "allocation_log")] return (None, tries); } if !self.data[idx as usize] { @@ -58,7 +73,10 @@ impl SegmentAllocator { } }; self.mark(offset, size, Action::Allocate); - (Some(offset), tries) + #[cfg(not(feature = "allocation_log"))] + return Some(offset); + #[cfg(feature = "allocation_log")] + return (Some(offset), tries); } /// Allocates a block of the given `size` at `offset`. @@ -104,12 +122,6 @@ impl SegmentAllocator { range.fill(action.as_bool()); } - - /// Writes the bitmap to a writer. - pub fn write_bitmap(&self, writer: &mut W) -> Result<(), Error> { - writer.write_all(self.data.as_raw_slice())?; - Ok(()) - } } // TODO better wording diff --git a/betree/src/data_management/dmu.rs b/betree/src/data_management/dmu.rs index 8b3f8f78..7a04188b 100644 --- a/betree/src/data_management/dmu.rs +++ b/betree/src/data_management/dmu.rs @@ -547,6 +547,7 @@ where // size? // Or save the largest contiguous memory region as a value and compare against that. For // that the allocator needs to support that and we have to 'bubble' the largest value up. + #[cfg(feature = "allocation_log")] let mut total_tries: u32 = 0; 'class: for &class in strategy.iter().flatten() { let disks_in_class = self.pool.disk_count(class); @@ -604,8 +605,18 @@ where let bitmap = self.handler.get_allocation_bitmap(*segment_id, self)?; let mut allocator = bitmap.access(); let allocation = allocator.allocate(size.as_u32()); - total_tries += allocation.1; - if let Some(segment_offset) = allocation.0 { + #[cfg(feature = "allocation_log")] + { + total_tries += allocation.1; + } + + // This has to be done like that, such that offset is in scope below + #[cfg(feature = "allocation_log")] + let offset = allocation.0; + #[cfg(not(feature = "allocation_log"))] + let offset = allocation; + + if let Some(segment_offset) = offset { let disk_offset = segment_id.disk_offset(segment_offset); #[cfg(feature = "allocation_log")] @@ -619,6 +630,7 @@ where break disk_offset; } + let next_segment_id = segment_id.next(disk_size); trace!( "Next allocator segment: {:?} -> {:?} ({:?})", diff --git a/betree/src/database/mod.rs b/betree/src/database/mod.rs index 90cc3029..7b3ef1b7 100644 --- a/betree/src/database/mod.rs +++ b/betree/src/database/mod.rs @@ -431,6 +431,8 @@ impl Database { if let Some(tx) = &dml_tx { dmu.set_report(tx.clone()); } + + #[cfg(feature = "allocation_log")] dmu.write_global_header()?; let (tree, root_ptr) = builder.select_root_tree(Arc::new(dmu))?; From cefae1bb2a741ecb1f6b3d5ea6410a3e026537a6 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Wed, 11 Dec 2024 09:15:39 +0100 Subject: [PATCH 45/49] runtime allocation_log path --- betree/src/data_management/dmu.rs | 6 +++--- betree/src/database/mod.rs | 8 +++++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/betree/src/data_management/dmu.rs b/betree/src/data_management/dmu.rs index 7a04188b..94a37b3f 100644 --- a/betree/src/data_management/dmu.rs +++ b/betree/src/data_management/dmu.rs @@ -30,6 +30,7 @@ use std::{ io::{BufWriter, Write}, mem::replace, ops::DerefMut, + path::PathBuf, pin::Pin, sync::{ atomic::{AtomicU64, Ordering}, @@ -38,8 +39,6 @@ use std::{ thread::yield_now, }; -const ALLOCATION_LOG_FILE: &str = "allocation_log.bin"; - /// The Data Management Unit. pub struct Dmu where @@ -83,6 +82,7 @@ where alloc_strategy: [[Option; NUM_STORAGE_CLASSES]; NUM_STORAGE_CLASSES], cache: E, handler: Handler>>, + #[cfg(feature = "allocation_log")] allocation_log_file_path: PathBuf, ) -> Self { let allocation_data = (0..pool.storage_class_count()) .map(|class| { @@ -100,7 +100,7 @@ where .create(true) .write(true) .truncate(true) - .open(ALLOCATION_LOG_FILE) + .open(allocation_log_file_path) .expect("Failed to create allocation log file"), )); diff --git a/betree/src/database/mod.rs b/betree/src/database/mod.rs index 7b3ef1b7..bc9f37e0 100644 --- a/betree/src/database/mod.rs +++ b/betree/src/database/mod.rs @@ -31,7 +31,7 @@ use serde::{de::DeserializeOwned, Deserialize, Serialize}; use std::{ collections::HashMap, iter::FromIterator, - path::Path, + path::{Path, PathBuf}, sync::{ atomic::{AtomicU64, Ordering}, Arc, @@ -147,6 +147,9 @@ pub struct DatabaseConfiguration { /// If and how to log database metrics pub metrics: Option, + + /// Where to log the allocations + pub allocation_log_file_path: PathBuf, } impl Default for DatabaseConfiguration { @@ -162,6 +165,7 @@ impl Default for DatabaseConfiguration { sync_interval_ms: Some(DEFAULT_SYNC_INTERVAL_MS), metrics: None, migration_policy: None, + allocation_log_file_path: PathBuf::from("allocation_log.bin"), } } } @@ -237,6 +241,8 @@ impl DatabaseConfiguration { strategy, ClockCache::new(self.cache_size), handler, + #[cfg(feature = "allocation_log")] + self.allocation_log_file_path.clone(), ) } From a4458aa8881fcafd33949a22050d35890c61bb19 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Wed, 11 Dec 2024 09:49:12 +0100 Subject: [PATCH 46/49] information on how to visualize allocations --- betree/scripts/README.md | 37 +++++++++++++++++++++++++++++++++ betree/scripts/requirements.txt | 13 ++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 betree/scripts/README.md create mode 100644 betree/scripts/requirements.txt diff --git a/betree/scripts/README.md b/betree/scripts/README.md new file mode 100644 index 00000000..48d4369f --- /dev/null +++ b/betree/scripts/README.md @@ -0,0 +1,37 @@ +# Allocation Log Visualization + +This script visualizes the allocation and deallocation of blocks within the key-value database. It helps to understand how storage space is being used and identify potential optimization opportunities. + +The allocation log visualization script is tested with Python 3.12.7 and the packages listed in `requirements.txt`. + +The main dependencies are matplotlib, tqdm and sortedcontainers. + +## Setup + +Run the following to create a working environment for the script: + +```bash +python3 -m venv .venv +source .venv/bin/activate +python3 -m pip install -r scripts/requirements.txt +``` + +## Generating the Allocation Log + +To generate the `allocation_log.bin` file, you need to enable the allocation_log feature flag when compiling the `betree` crate. For instance by running +```bash +cargo build --features allocation_log +``` +or by enabling it in the `Cargo.toml`. + +The path where the log is saved can be set with the runtime configuration parameter `allocation_log_file_path`. The default is `$PWD/allocation_log.bin` + +## Using the Allocation Log + +Once a log file has been obtained simply run the following to visualize the (de-)allocations recorded. +```bash +./scripts/visualize_allocation_log allocation_log.bin +``` + +To get help and see the options available run the script with the `-h` flag. + diff --git a/betree/scripts/requirements.txt b/betree/scripts/requirements.txt new file mode 100644 index 00000000..80b46cb8 --- /dev/null +++ b/betree/scripts/requirements.txt @@ -0,0 +1,13 @@ +contourpy==1.3.1 +cycler==0.12.1 +fonttools==4.55.3 +kiwisolver==1.4.7 +matplotlib==3.9.3 +numpy==2.2.0 +packaging==24.2 +pillow==11.0.0 +pyparsing==3.2.0 +python-dateutil==2.9.0.post0 +six==1.17.0 +sortedcontainers==2.4.0 +tqdm==4.67.1 From e864db17703ce45b3d54853255f4c9bf325b4887 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Fri, 13 Dec 2024 15:02:20 +0100 Subject: [PATCH 47/49] moved scripts to project root --- {betree/scripts => scripts}/README.md | 0 {betree/scripts => scripts}/requirements.txt | 0 {betree/scripts => scripts}/visualize_allocation_log | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename {betree/scripts => scripts}/README.md (100%) rename {betree/scripts => scripts}/requirements.txt (100%) rename {betree/scripts => scripts}/visualize_allocation_log (100%) diff --git a/betree/scripts/README.md b/scripts/README.md similarity index 100% rename from betree/scripts/README.md rename to scripts/README.md diff --git a/betree/scripts/requirements.txt b/scripts/requirements.txt similarity index 100% rename from betree/scripts/requirements.txt rename to scripts/requirements.txt diff --git a/betree/scripts/visualize_allocation_log b/scripts/visualize_allocation_log similarity index 100% rename from betree/scripts/visualize_allocation_log rename to scripts/visualize_allocation_log From ff6bfb49dd6f64c937e8beb74cce687368107062 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Wed, 18 Dec 2024 09:06:53 +0100 Subject: [PATCH 48/49] removed allocation tries, instead count cycles Different allocators have different or even no notion of a try. So instead remove tries completely and introduce an allocator agnostic metric to measure how long an allocation takes. --- betree/src/allocator.rs | 22 +--- betree/src/data_management/dmu.rs | 50 ++++++--- scripts/visualize_allocation_log | 169 +++++++++++++++++++++++------- 3 files changed, 166 insertions(+), 75 deletions(-) diff --git a/betree/src/allocator.rs b/betree/src/allocator.rs index 1f035783..04f7c73f 100644 --- a/betree/src/allocator.rs +++ b/betree/src/allocator.rs @@ -18,11 +18,6 @@ pub struct SegmentAllocator { data: BitArr!(for SEGMENT_SIZE, in u8, Lsb0), } -#[cfg(not(feature = "allocation_log"))] -type AllocateReturnType = Option; -#[cfg(feature = "allocation_log")] -type AllocateReturnType = (Option, u32); - impl SegmentAllocator { /// Constructs a new `SegmentAllocator` given the segment allocation bitmap. /// The `bitmap` must have a length of `SEGMENT_SIZE`. @@ -34,28 +29,16 @@ impl SegmentAllocator { /// Allocates a block of the given `size`. /// Returns `None` if the allocation request cannot be satisfied. - pub fn allocate(&mut self, size: u32) -> AllocateReturnType { + pub fn allocate(&mut self, size: u32) -> Option { if size == 0 { - #[cfg(not(feature = "allocation_log"))] return Some(0); - #[cfg(feature = "allocation_log")] - return (Some(0), 0); } - #[cfg(feature = "allocation_log")] - let mut tries = 0; let offset = { let mut idx = 0; loop { - #[cfg(feature = "allocation_log")] - { - tries += 1; - } loop { if idx + size > SEGMENT_SIZE as u32 { - #[cfg(not(feature = "allocation_log"))] return None; - #[cfg(feature = "allocation_log")] - return (None, tries); } if !self.data[idx as usize] { break; @@ -73,10 +56,7 @@ impl SegmentAllocator { } }; self.mark(offset, size, Action::Allocate); - #[cfg(not(feature = "allocation_log"))] return Some(offset); - #[cfg(feature = "allocation_log")] - return (Some(offset), tries); } /// Allocates a block of the given `size` at `offset`. diff --git a/betree/src/data_management/dmu.rs b/betree/src/data_management/dmu.rs index 94a37b3f..8b5a27eb 100644 --- a/betree/src/data_management/dmu.rs +++ b/betree/src/data_management/dmu.rs @@ -25,6 +25,7 @@ use crossbeam_channel::Sender; use futures::{executor::block_on, future::ok, prelude::*}; use parking_lot::{Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard}; use std::{ + arch::x86_64::{__rdtscp, _rdtsc}, collections::HashMap, fs::OpenOptions, io::{BufWriter, Write}, @@ -256,7 +257,8 @@ where let _ = file.write_u8(Action::Deallocate.as_bool() as u8); let _ = file.write_u64::(obj_ptr.offset.as_u64()); let _ = file.write_u32::(obj_ptr.size.as_u32()); - let _ = file.write_u32::(0); + let _ = file.write_u64::(0); + let _ = file.write_u64::(0); } if let (CopyOnWriteEvent::Removed, Some(tx), CopyOnWriteReason::Remove) = ( self.handler.copy_on_write( @@ -548,7 +550,9 @@ where // Or save the largest contiguous memory region as a value and compare against that. For // that the allocator needs to support that and we have to 'bubble' the largest value up. #[cfg(feature = "allocation_log")] - let mut total_tries: u32 = 0; + let mut start_cycles_global = get_cycles(); + #[cfg(feature = "allocation_log")] + let mut total_cycles_local: u64 = 0; 'class: for &class in strategy.iter().flatten() { let disks_in_class = self.pool.disk_count(class); if disks_in_class == 0 { @@ -604,31 +608,35 @@ where // Has to be split because else the temporary value is dropped while borrowing let bitmap = self.handler.get_allocation_bitmap(*segment_id, self)?; let mut allocator = bitmap.access(); - let allocation = allocator.allocate(size.as_u32()); - #[cfg(feature = "allocation_log")] + + #[cfg(not(feature = "allocation_log"))] { - total_tries += allocation.1; + let allocation = allocator.allocate(size.as_u32()); + if let Some(segment_offset) = allocation { + let disk_offset = segment_id.disk_offset(segment_offset); + break disk_offset; + } } - - // This has to be done like that, such that offset is in scope below #[cfg(feature = "allocation_log")] - let offset = allocation.0; - #[cfg(not(feature = "allocation_log"))] - let offset = allocation; + { + let start_cycles_allocation = get_cycles(); + let allocation = allocator.allocate(size.as_u32()); + let end_cycles_allocation = get_cycles(); + total_cycles_local += end_cycles_allocation - start_cycles_allocation; - if let Some(segment_offset) = offset { - let disk_offset = segment_id.disk_offset(segment_offset); + if let Some(segment_offset) = allocation { + let disk_offset = segment_id.disk_offset(segment_offset); + let total_cycles_global = end_cycles_allocation - start_cycles_global; - #[cfg(feature = "allocation_log")] - { let mut file = self.allocation_log_file.lock(); file.write_u8(Action::Allocate.as_bool() as u8)?; file.write_u64::(disk_offset.as_u64())?; file.write_u32::(size.as_u32())?; - file.write_u32::(total_tries)?; - } + file.write_u64::(total_cycles_local)?; + file.write_u64::(total_cycles_global)?; - break disk_offset; + break disk_offset; + } } let next_segment_id = segment_id.next(disk_size); @@ -1118,3 +1126,11 @@ where self.report_tx = Some(tx); } } + +fn get_cycles() -> u64 { + unsafe { + //let mut aux = 0; + //__rdtscp(aux) + _rdtsc() + } +} diff --git a/scripts/visualize_allocation_log b/scripts/visualize_allocation_log index 9c534d29..bedff062 100755 --- a/scripts/visualize_allocation_log +++ b/scripts/visualize_allocation_log @@ -26,7 +26,7 @@ SEGMENT_SIZE_LOG_2 = 18 SEGMENT_SIZE = 1 << SEGMENT_SIZE_LOG_2 SEGMENT_SIZE_MASK = SEGMENT_SIZE - 1 # This is the amount of bytes one (de-)allocation has in the log. -SIZE_PER_ALLOCATION = 17 +SIZE_PER_ALLOCATION = 29 class StorageConfig: @@ -86,17 +86,20 @@ class Timestamp: op_type: int offset: int num_blocks: int + cycles_alloc: int + cycles_total: int layer_id: int disk_id: int block_offset: int segment_id: int segment_offset: int - def __init__(self, op_type: int, offset: int, num_blocks: int, tries: int, time: int): + def __init__(self, op_type: int, offset: int, num_blocks: int, cycles_alloc: int, cycles_total: int, time: int): self.op_type = op_type self.offset = offset self.num_blocks = num_blocks - self.tries = tries + self.cycles_alloc = cycles_alloc + self.cycles_total = cycles_total self.time = time self._parse_offset() @@ -104,7 +107,8 @@ class Timestamp: return (f"Timestep(op_type: {self.op_type}, " f"offset: {self.offset}, " f"num_blocks: {self.num_blocks}, " - f"tries: {self.tries}, " + f"cycles_alloc: {self.cycles_alloc}, " + f"cycles_total: {self.cycles_total}, " f"time: {self.time}, " f"layer_id: {self.layer_id}, " f"disk_id: {self.disk_id}, " @@ -179,13 +183,14 @@ class Parser: op_type = struct.unpack(" int: """Returns the remaining bytes in a file from the current position of the file pointer.""" @@ -388,7 +393,8 @@ class GlobalBitMap: log_file: str storage_config: StorageConfig layers: list[Layer] - tries: tuple[np.ndarray, np.ndarray] + cycles_alloc: tuple[np.ndarray, np.ndarray] + cycles_total: tuple[np.ndarray, np.ndarray] sizes: list[int] size: int time: int @@ -403,7 +409,8 @@ class GlobalBitMap: exit(1) self.size = self.storage_config.blocks_global() - self.tries = (np.array([]), np.array([])) + self.cycles_alloc = (np.array([]), np.array([])) + self.cycles_total = (np.array([]), np.array([])) self.sizes = [] # Create the storage structure based on the config. @@ -436,8 +443,10 @@ class GlobalBitMap: parser = Parser(log_file) for timestamp in tqdm(parser, desc="Building Bitmap", unit="timestep"): self.layers[timestamp.layer_id].add_timestamp(timestamp) - if timestamp.tries != 0: - self.tries = (np.append(self.tries[0], timestamp.time), np.append(self.tries[1], timestamp.tries)) + if timestamp.cycles_alloc != 0: + self.cycles_alloc = (np.append(self.cycles_alloc[0], timestamp.time), np.append(self.cycles_alloc[1], timestamp.cycles_alloc)) + if timestamp.cycles_total != 0: + self.cycles_total = (np.append(self.cycles_total[0], timestamp.time), np.append(self.cycles_total[1], timestamp.cycles_total)) self.sizes.append(timestamp.num_blocks) self.time = timestamp.time @@ -486,8 +495,10 @@ class Plotter: "frag_global": False, "free_local": False, "free_global": False, - "allocation_tries": False, - "allocation_tries_aligned": False, + "allocation_cycles_alloc": False, + "allocation_cycles_total": False, + "allocation_cycles_alloc_aligned": False, + "allocation_cycles_total_aligned": False, "allocation_sizes": False, "allocation_sizes_ecdf": False, # empirical cumulative distribution function "slider": False, @@ -530,8 +541,10 @@ class Plotter: self.vline_frag_global = self._fragmentation_global() self.vlines_free_local = self._free_local() self.vline_free_global = self._free_global() - self.vline_allocation_tries = self._allocation_tries() - self.vline_allocation_tries_aligned = self._allocation_tries_aligned() + self.vline_allocation_cycles_alloc = self._allocation_cycles_alloc() + self.vline_allocation_cycles_total = self._allocation_cycles_total() + self.vline_allocation_cycles_alloc_aligned = self._allocation_cycles_alloc_aligned() + self.vline_allocation_cycles_total_aligned = self._allocation_cycles_total_aligned() _ = self._allocation_sizes() _ = self._allocation_sizes_ecdf() self.slider = self._setup_slider() @@ -586,21 +599,37 @@ class Plotter: for layer in self.layers: layout[-1].append("free_global") - if self.plot_config["allocation_tries"]: + if self.plot_config["allocation_cycles_alloc"]: layout.append([]) gridspec["height_ratios"].append(1) if self.plot_config["checkboxes"]: layout[-1].append("checkboxes") for layer in self.layers: - layout[-1].append("allocation_tries") + layout[-1].append("allocation_cycles_alloc") - if self.plot_config["allocation_tries_aligned"]: + if self.plot_config["allocation_cycles_total"]: layout.append([]) gridspec["height_ratios"].append(1) if self.plot_config["checkboxes"]: layout[-1].append("checkboxes") for layer in self.layers: - layout[-1].append("allocation_tries_aligned") + layout[-1].append("allocation_cycles_total") + + if self.plot_config["allocation_cycles_alloc_aligned"]: + layout.append([]) + gridspec["height_ratios"].append(1) + if self.plot_config["checkboxes"]: + layout[-1].append("checkboxes") + for layer in self.layers: + layout[-1].append("allocation_cycles_alloc_aligned") + + if self.plot_config["allocation_cycles_total_aligned"]: + layout.append([]) + gridspec["height_ratios"].append(1) + if self.plot_config["checkboxes"]: + layout[-1].append("checkboxes") + for layer in self.layers: + layout[-1].append("allocation_cycles_total_aligned") if self.plot_config["allocation_sizes"]: layout.append([]) @@ -799,17 +828,57 @@ class Plotter: return global_free_ax.axvline(x=self.time, color="red", linestyle="--", linewidth=1) - def _allocation_tries(self): - if not self.plot_config["allocation_tries"]: + def _allocation_cycles_alloc(self): + if not self.plot_config["allocation_cycles_alloc"]: + return + + data = self.global_bitmap.cycles_alloc[1] + x_values = np.arange(0, len(data)) + + failed_allocations_ax = self.axd["allocation_cycles_alloc"] + failed_allocations_ax.scatter(x_values, data, s=3.0, linewidths=0, label="Allocation Cycles alloc") + + # window_sizes = [100, 500] + window_sizes = [] + colors = ['red', 'orange', 'black', 'purple'] + line_styles = ['-', '--', '-.', ':'] + moving_average = np.zeros_like(data, dtype=float) + + for i, window_size in enumerate(window_sizes): + moving_average = np.zeros_like(data, dtype=float) + for j in range(len(data)): + window_start = max(0, j - window_size + 1) + window_end = j + 1 + moving_average[j] = np.mean(data[window_start:window_end]) + + # Plot with different colors and line styles + failed_allocations_ax.plot(x_values, moving_average, + linewidth=1.5, + color=colors[i % len(colors)], + linestyle=line_styles[i % len(line_styles)], + label=f"Moving Average ({window_size})") + + failed_allocations_ax.set_ylim(0) + failed_allocations_ax.set_xlim([0, len(data)]) + failed_allocations_ax.set_title("Allocation Cycles Allocator") + failed_allocations_ax.set_xlabel("Allocation") + failed_allocations_ax.set_ylabel("Cycles") + failed_allocations_ax.legend() + + return failed_allocations_ax.axvline(x=self.time, color="red", linestyle="--", linewidth=1) + + def _allocation_cycles_total(self): + if not self.plot_config["allocation_cycles_total"]: return - data = self.global_bitmap.tries[1] + data = self.global_bitmap.cycles_total[1] x_values = np.arange(0, len(data)) - failed_allocations_ax = self.axd["allocation_tries"] - failed_allocations_ax.scatter(x_values, data, s=3.0, linewidths=0, label="Allocation Tries") + failed_allocations_ax = self.axd["allocation_cycles_total"] + failed_allocations_ax.scatter(x_values, data, s=3.0, linewidths=0, label="Allocation Cycles Total") - window_sizes = [100, 500] + # window_sizes = [100, 500] + window_sizes = [] colors = ['red', 'orange', 'black', 'purple'] line_styles = ['-', '--', '-.', ':'] moving_average = np.zeros_like(data, dtype=float) @@ -830,24 +899,38 @@ class Plotter: failed_allocations_ax.set_ylim(0) failed_allocations_ax.set_xlim([0, len(data)]) - failed_allocations_ax.set_title("Allocation Tries") + failed_allocations_ax.set_title("Allocation Cycles Total") failed_allocations_ax.set_xlabel("Allocation") - failed_allocations_ax.set_ylabel("Tries") + failed_allocations_ax.set_ylabel("Cycles") failed_allocations_ax.legend(loc="upper left") return failed_allocations_ax.axvline(x=self.time, color="red", linestyle="--", linewidth=1) - def _allocation_tries_aligned(self): - if not self.plot_config["allocation_tries_aligned"]: + def _allocation_cycles_alloc_aligned(self): + if not self.plot_config["allocation_cycles_alloc_aligned"]: return - failed_allocations_ax = self.axd["allocation_tries_aligned"] - failed_allocations_ax.scatter(self.global_bitmap.tries[0], self.global_bitmap.tries[1], s=3.0, linewidths=0) + failed_allocations_ax = self.axd["allocation_cycles_alloc_aligned"] + failed_allocations_ax.scatter(self.global_bitmap.cycles_alloc[0], self.global_bitmap.cycles_alloc[1], s=3.0, linewidths=0) failed_allocations_ax.set_ylim(0) failed_allocations_ax.set_xlim([0, self.global_bitmap.time - 1]) - failed_allocations_ax.set_title("Allocation Tries Aligned") + failed_allocations_ax.set_title("Allocation Cycles Allocator Timestep Aligned") failed_allocations_ax.set_xlabel("Timestamp") - failed_allocations_ax.set_ylabel("Tries") + failed_allocations_ax.set_ylabel("Cycles") + + return failed_allocations_ax.axvline(x=self.time, color="red", linestyle="--", linewidth=1) + + def _allocation_cycles_total_aligned(self): + if not self.plot_config["allocation_cycles_total_aligned"]: + return + + failed_allocations_ax = self.axd["allocation_cycles_total_aligned"] + failed_allocations_ax.scatter(self.global_bitmap.cycles_total[0], self.global_bitmap.cycles_total[1], s=3.0, linewidths=0) + failed_allocations_ax.set_ylim(0) + failed_allocations_ax.set_xlim([0, self.global_bitmap.time - 1]) + failed_allocations_ax.set_title("Allocation Cycles Total Timestep Aligned") + failed_allocations_ax.set_xlabel("Timestamp") + failed_allocations_ax.set_ylabel("Cycles") return failed_allocations_ax.axvline(x=self.time, color="red", linestyle="--", linewidth=1) @@ -935,13 +1018,25 @@ class Plotter: if self.plot_config["free_global"]: self.vline_free_global.set_xdata([self.time, self.time]) - if self.plot_config["allocation_tries"]: + if self.plot_config["allocation_cycles_alloc"]: + # Get the index of the last allocation that happened before or at the current timestamp. + index = np.argmax(self.global_bitmap.cycles_alloc[0] > self.time) - 1 + self.vline_allocation_cycles_alloc.set_xdata([index, index]) + + if self.plot_config["allocation_cycles_total"]: # Get the index of the last allocation that happened before or at the current timestamp. - index = np.argmax(self.global_bitmap.tries[0] > self.time) - 1 - self.vline_allocation_tries.set_xdata([index, index]) + index = np.argmax(self.global_bitmap.cycles_total[0] > self.time) - 1 + self.vline_allocation_cycles_total.set_xdata([index, index]) - if self.plot_config["allocation_tries_aligned"]: - self.vline_allocation_tries_aligned.set_xdata([self.time, self.time]) + if self.plot_config["allocation_cycles_alloc_aligned"]: + # Get the index of the last allocation that happened before or at the current timestamp. + index = np.argmax(self.global_bitmap.cycles_alloc[0] > self.time) - 1 + self.vline_allocation_cycles_alloc_aligned.set_xdata([index, index]) + + if self.plot_config["allocation_cycles_total_aligned"]: + # Get the index of the last allocation that happened before or at the current timestamp. + index = np.argmax(self.global_bitmap.cycles_total[0] > self.time) - 1 + self.vline_allocation_cycles_total_aligned.set_xdata([index, index]) self.fig.canvas.draw_idle() From 8cbe70c0198649927e4df36a56c440c440178916 Mon Sep 17 00:00:00 2001 From: Pascal Zittlau Date: Wed, 18 Dec 2024 09:32:28 +0100 Subject: [PATCH 49/49] proportion of cycles spent in allocator --- scripts/visualize_allocation_log | 77 ++++++++++++++++++++++++++++---- 1 file changed, 68 insertions(+), 9 deletions(-) diff --git a/scripts/visualize_allocation_log b/scripts/visualize_allocation_log index bedff062..b5bbc506 100755 --- a/scripts/visualize_allocation_log +++ b/scripts/visualize_allocation_log @@ -497,8 +497,10 @@ class Plotter: "free_global": False, "allocation_cycles_alloc": False, "allocation_cycles_total": False, + "allocation_cycles_proportion": False, "allocation_cycles_alloc_aligned": False, "allocation_cycles_total_aligned": False, + "allocation_cycles_proportion_aligned": False, "allocation_sizes": False, "allocation_sizes_ecdf": False, # empirical cumulative distribution function "slider": False, @@ -545,6 +547,8 @@ class Plotter: self.vline_allocation_cycles_total = self._allocation_cycles_total() self.vline_allocation_cycles_alloc_aligned = self._allocation_cycles_alloc_aligned() self.vline_allocation_cycles_total_aligned = self._allocation_cycles_total_aligned() + self.vline_allocation_cycles_proportion = self._allocation_cycles_proportion() + self.vline_allocation_cycles_proportion_aligned = self._allocation_cycles_proportion_aligned() _ = self._allocation_sizes() _ = self._allocation_sizes_ecdf() self.slider = self._setup_slider() @@ -615,6 +619,14 @@ class Plotter: for layer in self.layers: layout[-1].append("allocation_cycles_total") + if self.plot_config["allocation_cycles_proportion"]: + layout.append([]) + gridspec["height_ratios"].append(1) + if self.plot_config["checkboxes"]: + layout[-1].append("checkboxes") + for layer in self.layers: + layout[-1].append("allocation_cycles_proportion") + if self.plot_config["allocation_cycles_alloc_aligned"]: layout.append([]) gridspec["height_ratios"].append(1) @@ -631,6 +643,14 @@ class Plotter: for layer in self.layers: layout[-1].append("allocation_cycles_total_aligned") + if self.plot_config["allocation_cycles_proportion_aligned"]: + layout.append([]) + gridspec["height_ratios"].append(1) + if self.plot_config["checkboxes"]: + layout[-1].append("checkboxes") + for layer in self.layers: + layout[-1].append("allocation_cycles_proportion_aligned") + if self.plot_config["allocation_sizes"]: layout.append([]) gridspec["height_ratios"].append(1) @@ -836,7 +856,7 @@ class Plotter: x_values = np.arange(0, len(data)) failed_allocations_ax = self.axd["allocation_cycles_alloc"] - failed_allocations_ax.scatter(x_values, data, s=3.0, linewidths=0, label="Allocation Cycles alloc") + failed_allocations_ax.scatter(x_values, data, s=3.0, linewidths=0, label="Allocation Cycles Alloc") # window_sizes = [100, 500] window_sizes = [] @@ -863,7 +883,7 @@ class Plotter: failed_allocations_ax.set_title("Allocation Cycles Allocator") failed_allocations_ax.set_xlabel("Allocation") failed_allocations_ax.set_ylabel("Cycles") - failed_allocations_ax.legend() + # failed_allocations_ax.legend() return failed_allocations_ax.axvline(x=self.time, color="red", linestyle="--", linewidth=1) @@ -902,7 +922,42 @@ class Plotter: failed_allocations_ax.set_title("Allocation Cycles Total") failed_allocations_ax.set_xlabel("Allocation") failed_allocations_ax.set_ylabel("Cycles") - failed_allocations_ax.legend(loc="upper left") + # failed_allocations_ax.legend(loc="upper left") + + return failed_allocations_ax.axvline(x=self.time, color="red", linestyle="--", linewidth=1) + + def _allocation_cycles_proportion(self): + if not self.plot_config["allocation_cycles_proportion"]: + return + + data = self.global_bitmap.cycles_alloc[1] / self.global_bitmap.cycles_total[1] + x_values = np.arange(0, len(data)) + + failed_allocations_ax = self.axd["allocation_cycles_proportion"] + failed_allocations_ax.scatter(x_values, data, s=3.0, linewidths=0, label="Allocation Cycles Allocator/Total") + + failed_allocations_ax.set_ylim([0, 1]) + failed_allocations_ax.set_xlim([0, len(data)]) + failed_allocations_ax.set_title("Allocation Cycles Allocator/Total") + failed_allocations_ax.set_xlabel("Allocation") + failed_allocations_ax.set_ylabel("Proportion") + + return failed_allocations_ax.axvline(x=self.time, color="red", linestyle="--", linewidth=1) + + def _allocation_cycles_proportion_aligned(self): + if not self.plot_config["allocation_cycles_proportion_aligned"]: + return + + data = self.global_bitmap.cycles_alloc[1] / self.global_bitmap.cycles_total[1] + + failed_allocations_ax = self.axd["allocation_cycles_proportion_aligned"] + failed_allocations_ax.scatter(self.global_bitmap.cycles_alloc[0], data, s=3.0, linewidths=0, label="Allocation Cycles Allocator/Total Timestep Aligned") + + failed_allocations_ax.set_ylim([0, 1]) + failed_allocations_ax.set_xlim([0, self.global_bitmap.time - 1]) + failed_allocations_ax.set_title("Allocation Cycles Allocator/Total Timestep Aligned") + failed_allocations_ax.set_xlabel("Allocation") + failed_allocations_ax.set_ylabel("Proportion") return failed_allocations_ax.axvline(x=self.time, color="red", linestyle="--", linewidth=1) @@ -1028,15 +1083,19 @@ class Plotter: index = np.argmax(self.global_bitmap.cycles_total[0] > self.time) - 1 self.vline_allocation_cycles_total.set_xdata([index, index]) - if self.plot_config["allocation_cycles_alloc_aligned"]: + if self.plot_config["allocation_cycles_proportion"]: # Get the index of the last allocation that happened before or at the current timestamp. - index = np.argmax(self.global_bitmap.cycles_alloc[0] > self.time) - 1 - self.vline_allocation_cycles_alloc_aligned.set_xdata([index, index]) + index = np.argmax(self.global_bitmap.cycles_total[0] > self.time) - 1 + self.vline_allocation_cycles_proportion.set_xdata([index, index]) + + if self.plot_config["allocation_cycles_alloc_aligned"]: + self.vline_allocation_cycles_alloc_aligned.set_xdata([self.time, self.time]) if self.plot_config["allocation_cycles_total_aligned"]: - # Get the index of the last allocation that happened before or at the current timestamp. - index = np.argmax(self.global_bitmap.cycles_total[0] > self.time) - 1 - self.vline_allocation_cycles_total_aligned.set_xdata([index, index]) + self.vline_allocation_cycles_total_aligned.set_xdata([self.time, self.time]) + + if self.plot_config["allocation_cycles_proportion_aligned"]: + self.vline_allocation_cycles_proportion_aligned.set_xdata([self.time, self.time]) self.fig.canvas.draw_idle()