Releases: SUPERCILEX/ftzz
Releases · SUPERCILEX/ftzz
v3.0.0
v2.0.3
Signed-off-by: Alex Saveau <saveau.alexandre@gmail.com>
v2.0.2
Signed-off-by: Alex Saveau <saveau.alexandre@gmail.com>
v2.0.1
Signed-off-by: Alex Saveau <saveau.alexandre@gmail.com>
v2.0.0
The quality of directory shapes was dramatically improved.
Results
- Per directory file count follows a much broader and non-normal distribution resulting in more varied directories.
- Total file count has lasered into the true mean instead of skewing upwards proportional to max depth (atypical parameters still result in skew, but it is at least a normal-ish distribution).
| Old | New |
---|---|---|
Files per directory | ||
Total file count |
Total file count | Old | New |
---|---|---|
Low file count | ||
High depth |
Appendix
Sample collection code:
Subject: [PATCH] Upgrade deps
---
Index: src/core/scheduler.rs
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/core/scheduler.rs b/src/core/scheduler.rs
--- a/src/core/scheduler.rs (revision 6b7e88c4d05ed8fd247178bd9ec86440fa923d5f)
+++ b/src/core/scheduler.rs (date 1679085314728)
@@ -11,6 +11,7 @@
core::{
files::GeneratorTaskOutcome,
tasks::{QueueErrors, QueueOutcome, TaskGenerator},
+ TOTAL_FILE_COUNT,
},
generator::Error,
utils::{with_dir_name, FastPathBuf},
@@ -165,6 +166,11 @@
handle_task_result(task, &mut stats)?;
}
+ {
+ use std::io::Write;
+ let mut count = TOTAL_FILE_COUNT.lock().unwrap();
+ writeln!(count, "{}", stats.files).unwrap();
+ }
Ok(stats)
}
Index: src/core/mod.rs
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/core/mod.rs b/src/core/mod.rs
--- a/src/core/mod.rs (revision 6b7e88c4d05ed8fd247178bd9ec86440fa923d5f)
+++ b/src/core/mod.rs (date 1679085426452)
@@ -1,3 +1,9 @@
+use std::{
+ cell::LazyCell,
+ fs::File,
+ sync::{LazyLock, Mutex},
+};
+
pub use scheduler::{run, GeneratorStats};
pub use tasks::{DynamicGenerator, GeneratorBytes, StaticGenerator};
@@ -5,3 +11,31 @@
mod files;
mod scheduler;
mod tasks;
+
+static FILE_COUNT: LazyLock<Mutex<File>, fn() -> Mutex<File>> = LazyLock::new(|| {
+ Mutex::new(
+ File::options()
+ .create(true)
+ .append(true)
+ .open("file_count.samples")
+ .unwrap(),
+ )
+});
+static DIR_COUNT: LazyLock<Mutex<File>, fn() -> Mutex<File>> = LazyLock::new(|| {
+ Mutex::new(
+ File::options()
+ .create(true)
+ .append(true)
+ .open("dir_count.samples")
+ .unwrap(),
+ )
+});
+static TOTAL_FILE_COUNT: LazyLock<Mutex<File>, fn() -> Mutex<File>> = LazyLock::new(|| {
+ Mutex::new(
+ File::options()
+ .create(true)
+ .append(true)
+ .open("total_file_count.samples")
+ .unwrap(),
+ )
+});
Index: src/lib.rs
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/lib.rs b/src/lib.rs
--- a/src/lib.rs (revision 6b7e88c4d05ed8fd247178bd9ec86440fa923d5f)
+++ b/src/lib.rs (date 1679084880740)
@@ -3,6 +3,7 @@
#![feature(let_chains)]
#![feature(const_option)]
#![feature(inline_const)]
+#![feature(once_cell)]
#![allow(clippy::multiple_crate_versions)]
#![allow(clippy::module_name_repetitions)]
Index: src/core/tasks.rs
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/core/tasks.rs b/src/core/tasks.rs
--- a/src/core/tasks.rs (revision 6b7e88c4d05ed8fd247178bd9ec86440fa923d5f)
+++ b/src/core/tasks.rs (date 1679085314852)
@@ -12,6 +12,7 @@
PreDefinedGeneratedFileContents,
},
files::{create_files_and_dirs, GeneratorTaskOutcome, GeneratorTaskParams},
+ DIR_COUNT, FILE_COUNT,
},
utils::FastPathBuf,
};
@@ -122,6 +123,16 @@
} else {
0
};
+ {
+ use std::io::Write;
+ let mut count = FILE_COUNT.lock().unwrap();
+ writeln!(count, "{}", num_files).unwrap();
+ }
+ {
+ use std::io::Write;
+ let mut count = DIR_COUNT.lock().unwrap();
+ writeln!(count, "{}", num_dirs).unwrap();
+ }
macro_rules! build_params {
($file_contents:expr) => {{
Collection:
cargo b --features dry_run
for d in [0, 5, 20] { for n in [10, 10_000, 1_000_000] { for i in 0..1000 { ./target/debug/ftzz g -n $n /tmp/foo -d $d --seed $i out> /dev/null }; mkdir $"($d)d_($n)n"; mv *.samples $"($d)d_($n)n/" } }
Plots:
!unzip samples.zip
import numpy as np
import matplotlib.pyplot as plt
import os
for dir in os.listdir():
if not dir.endswith('n'):
continue
# List of files containing samples
files = ['file_count.samples', 'total_file_count.samples']
# Loop over each file and plot its samples
for file in files:
# Load samples from file into a list
file = dir + '/' + file
with open(file, 'r') as f:
samples = [int(x.strip()) for x in f.readlines()]
# Calculate some statistics on the samples
mean = np.mean(samples)
std = np.std(samples)
# Plot the histogram of the samples
plt.hist(samples, bins=50, density=True, alpha=0.5)
# Add a vertical line at the mean of the distribution
plt.axvline(x=mean, color='red', linestyle='--')
# Add labels and title to the plot
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.title(f'Distribution of samples from {file}')
# Show the plot
plt.show()
v1.2.0
- Added the
fill-byte
flag which lets you fill files with a specific byte instead of pseudo-random ones.
v1.1.4
- Slight efficiency improvements
- Removed some unnecessary unsafe + other cleanup
v1.1.3
Tweak help output to be slightly clearer.
v1.1.2
Re-release of 1.1.1 with fixed binary builds. Binaries are now statically linked.
v1.1.1
Improvements across internal infrastructure, testing, and error handling.