Skip to content

Releases: SUPERCILEX/ftzz

v3.0.0

09 Apr 03:28
b1101ca
Compare
Choose a tag to compare

Key changes:

  • Remove the generator prefix from the CLI and code as that was premature generalization.
  • Improve mid-range (e.g. 100K files) performance by ~10%.

v2.0.3

09 Oct 02:08
85f8f6d
Compare
Choose a tag to compare
Signed-off-by: Alex Saveau <saveau.alexandre@gmail.com>

v2.0.2

22 Sep 19:45
fb77937
Compare
Choose a tag to compare
Signed-off-by: Alex Saveau <saveau.alexandre@gmail.com>

v2.0.1

28 Jun 18:31
aecfdeb
Compare
Choose a tag to compare
Signed-off-by: Alex Saveau <saveau.alexandre@gmail.com>

v2.0.0

18 Mar 18:08
64d5be9
Compare
Choose a tag to compare

The quality of directory shapes was dramatically improved.

Results

  • Per directory file count follows a much broader and non-normal distribution resulting in more varied directories.
  • Total file count has lasered into the true mean instead of skewing upwards proportional to max depth (atypical parameters still result in skew, but it is at least a normal-ish distribution).
Old New
Files per directory image image
Total file count image image
Total file count Old New
Low file count image image
High depth image image

Appendix

Sample collection code:

Subject: [PATCH] Upgrade deps
---
Index: src/core/scheduler.rs
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/core/scheduler.rs b/src/core/scheduler.rs
--- a/src/core/scheduler.rs	(revision 6b7e88c4d05ed8fd247178bd9ec86440fa923d5f)
+++ b/src/core/scheduler.rs	(date 1679085314728)
@@ -11,6 +11,7 @@
     core::{
         files::GeneratorTaskOutcome,
         tasks::{QueueErrors, QueueOutcome, TaskGenerator},
+        TOTAL_FILE_COUNT,
     },
     generator::Error,
     utils::{with_dir_name, FastPathBuf},
@@ -165,6 +166,11 @@
         handle_task_result(task, &mut stats)?;
     }
 
+    {
+        use std::io::Write;
+        let mut count = TOTAL_FILE_COUNT.lock().unwrap();
+        writeln!(count, "{}", stats.files).unwrap();
+    }
     Ok(stats)
 }
 
Index: src/core/mod.rs
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/core/mod.rs b/src/core/mod.rs
--- a/src/core/mod.rs	(revision 6b7e88c4d05ed8fd247178bd9ec86440fa923d5f)
+++ b/src/core/mod.rs	(date 1679085426452)
@@ -1,3 +1,9 @@
+use std::{
+    cell::LazyCell,
+    fs::File,
+    sync::{LazyLock, Mutex},
+};
+
 pub use scheduler::{run, GeneratorStats};
 pub use tasks::{DynamicGenerator, GeneratorBytes, StaticGenerator};
 
@@ -5,3 +11,31 @@
 mod files;
 mod scheduler;
 mod tasks;
+
+static FILE_COUNT: LazyLock<Mutex<File>, fn() -> Mutex<File>> = LazyLock::new(|| {
+    Mutex::new(
+        File::options()
+            .create(true)
+            .append(true)
+            .open("file_count.samples")
+            .unwrap(),
+    )
+});
+static DIR_COUNT: LazyLock<Mutex<File>, fn() -> Mutex<File>> = LazyLock::new(|| {
+    Mutex::new(
+        File::options()
+            .create(true)
+            .append(true)
+            .open("dir_count.samples")
+            .unwrap(),
+    )
+});
+static TOTAL_FILE_COUNT: LazyLock<Mutex<File>, fn() -> Mutex<File>> = LazyLock::new(|| {
+    Mutex::new(
+        File::options()
+            .create(true)
+            .append(true)
+            .open("total_file_count.samples")
+            .unwrap(),
+    )
+});
Index: src/lib.rs
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/lib.rs b/src/lib.rs
--- a/src/lib.rs	(revision 6b7e88c4d05ed8fd247178bd9ec86440fa923d5f)
+++ b/src/lib.rs	(date 1679084880740)
@@ -3,6 +3,7 @@
 #![feature(let_chains)]
 #![feature(const_option)]
 #![feature(inline_const)]
+#![feature(once_cell)]
 #![allow(clippy::multiple_crate_versions)]
 #![allow(clippy::module_name_repetitions)]
 
Index: src/core/tasks.rs
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/core/tasks.rs b/src/core/tasks.rs
--- a/src/core/tasks.rs	(revision 6b7e88c4d05ed8fd247178bd9ec86440fa923d5f)
+++ b/src/core/tasks.rs	(date 1679085314852)
@@ -12,6 +12,7 @@
             PreDefinedGeneratedFileContents,
         },
         files::{create_files_and_dirs, GeneratorTaskOutcome, GeneratorTaskParams},
+        DIR_COUNT, FILE_COUNT,
     },
     utils::FastPathBuf,
 };
@@ -122,6 +123,16 @@
         } else {
             0
         };
+        {
+            use std::io::Write;
+            let mut count = FILE_COUNT.lock().unwrap();
+            writeln!(count, "{}", num_files).unwrap();
+        }
+        {
+            use std::io::Write;
+            let mut count = DIR_COUNT.lock().unwrap();
+            writeln!(count, "{}", num_dirs).unwrap();
+        }
 
         macro_rules! build_params {
             ($file_contents:expr) => {{

Collection:

cargo b --features dry_run
for d in [0, 5, 20] { for n in [10, 10_000, 1_000_000] { for i in 0..1000 { ./target/debug/ftzz g -n $n /tmp/foo -d $d --seed $i out> /dev/null }; mkdir $"($d)d_($n)n"; mv *.samples $"($d)d_($n)n/" } }

Plots:

!unzip samples.zip
import numpy as np
import matplotlib.pyplot as plt
import os

for dir in os.listdir():
  if not dir.endswith('n'):
    continue

  # List of files containing samples
  files = ['file_count.samples', 'total_file_count.samples']

  # Loop over each file and plot its samples

  for file in files:
      # Load samples from file into a list
      file = dir + '/' + file
      with open(file, 'r') as f:
          samples = [int(x.strip()) for x in f.readlines()]

      # Calculate some statistics on the samples
      mean = np.mean(samples)
      std = np.std(samples)

      # Plot the histogram of the samples
      plt.hist(samples, bins=50, density=True, alpha=0.5)

      # Add a vertical line at the mean of the distribution
      plt.axvline(x=mean, color='red', linestyle='--')

      # Add labels and title to the plot
      plt.xlabel('Values')
      plt.ylabel('Frequency')
      plt.title(f'Distribution of samples from {file}')

      # Show the plot
      plt.show()

v1.2.0

13 Jan 20:39
0a99b4a
Compare
Choose a tag to compare
  • Added the fill-byte flag which lets you fill files with a specific byte instead of pseudo-random ones.

v1.1.4

03 Dec 05:27
ccb302b
Compare
Choose a tag to compare
  • Slight efficiency improvements
  • Removed some unnecessary unsafe + other cleanup

v1.1.3

07 Oct 03:39
236cb8b
Compare
Choose a tag to compare

Tweak help output to be slightly clearer.

v1.1.2

05 Oct 23:41
0d38723
Compare
Choose a tag to compare

Re-release of 1.1.1 with fixed binary builds. Binaries are now statically linked.

v1.1.1

05 Oct 23:40
9133b8d
Compare
Choose a tag to compare

Improvements across internal infrastructure, testing, and error handling.