uutils
diff --git a/‎.github/workflows/CICD.yml‎
Lines changed: 6 additions & 0 deletions b/‎.github/workflows/CICD.yml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎.vscode/cspell.dictionaries/workspace.wordlist.txt‎
Lines changed: 1 addition & 0 deletions b/‎.vscode/cspell.dictionaries/workspace.wordlist.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Cargo.lock‎
Lines changed: 10 additions & 0 deletions b/‎Cargo.lock‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 2 additions & 4 deletions b/‎Cargo.toml‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎src/uu/base64/Cargo.toml‎
Lines changed: 9 additions & 0 deletions b/‎src/uu/base64/Cargo.toml‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎src/uu/base64/benches/base64_bench.rs‎
Lines changed: 72 additions & 0 deletions b/‎src/uu/base64/benches/base64_bench.rs‎
Lines changed: 72 additions & 0 deletions
diff --git a/‎src/uu/cp/BENCHMARKING.md‎
Lines changed: 143 additions & 0 deletions b/‎src/uu/cp/BENCHMARKING.md‎
Lines changed: 143 additions & 0 deletions
diff --git a/‎src/uu/cp/Cargo.toml‎
Lines changed: 9 additions & 0 deletions b/‎src/uu/cp/Cargo.toml‎
Lines changed: 9 additions & 0 deletions
@@ -782,6 +782,9 @@ jobs:
             # We also create a couple optional files pinky looks for
             touch /home/runner/.project
             echo "foo" > /home/runner/.plan
+            # add user with digital username for testing with issue #7787
+            echo 200:x:2000:2000::/home/200:/bin/bash | sudo tee -a /etc/passwd
+            echo 200:x:2000: | sudo tee -a /etc/group
             ;;
         esac
     - uses: taiki-e/install-action@v2
@@ -1156,6 +1159,9 @@ jobs:
             # We also create a couple optional files pinky looks for
             touch /home/runner/.project
             echo "foo" > /home/runner/.plan
+            # add user with digital username for testing with issue #7787
+            echo 200:x:2000:2000::/home/200:/bin/bash | sudo tee -a /etc/passwd
+            echo 200:x:2000: | sudo tee -a /etc/group
             ;;
         esac
 
 
@@ -128,6 +128,7 @@ ENOSYS
 ENOTEMPTY
 EOPNOTSUPP
 EPERM
+EPIPE
 EROFS
 
 # * vars/fcntl
 
@@ -600,12 +600,14 @@ lto = true
 [profile.release-fast]
 inherits = "release"
 panic = "abort"
+codegen-units = 1
 
 # A release-like profile that is as small as possible.
 [profile.release-small]
 inherits = "release"
 opt-level = "z"
 panic = "abort"
+codegen-units = 1
 strip = true
 
 # A release-like profile with debug info, useful for profiling.
@@ -688,20 +690,16 @@ unused_self = "allow"
 map_unwrap_or = "allow"
 enum_glob_use = "allow"
 ptr_cast_constness = "allow"
-if_not_else = "allow"
 borrow_as_ptr = "allow"
 ptr_as_ptr = "allow"
 manual_let_else = "allow"
 unnecessary_semicolon = "allow"
-bool_to_int_with_if = "allow"
 needless_raw_string_hashes = "allow"
 unreadable_literal = "allow"
 unnested_or_patterns = "allow"
-semicolon_if_nothing_returned = "allow"
 implicit_hasher = "allow"
 struct_field_names = "allow"
 doc_link_with_quotes = "allow"
-single_char_pattern = "allow"
 format_push_string = "allow"
 flat_map_option = "allow"
 from_iter_instead_of_collect = "allow"
 
@@ -23,6 +23,15 @@ uucore = { workspace = true, features = ["encoding"] }
 uu_base32 = { workspace = true }
 fluent = { workspace = true }
 
+[dev-dependencies]
+divan = { workspace = true }
+tempfile = { workspace = true }
+uucore = { workspace = true, features = ["benchmark"] }
+
 [[bin]]
 name = "base64"
 path = "src/main.rs"
+
+[[bench]]
+name = "base64_bench"
+harness = false
@@ -0,0 +1,72 @@
+// This file is part of the uutils coreutils package.
+//
+// For the full copyright and license information, please view the LICENSE
+// file that was distributed with this source code.
+
+use divan::{Bencher, black_box};
+use std::ffi::OsString;
+use uu_base64::uumain;
+use uucore::benchmark::{create_test_file, run_util_function, text_data};
+
+fn create_tmp_file(size_mb: usize) -> String {
+    let temp_dir = tempfile::tempdir().unwrap();
+    let data = text_data::generate_by_size(size_mb, 80);
+    let file_path = create_test_file(&data, temp_dir.path());
+    String::from(file_path.to_str().unwrap())
+}
+
+/// Benchmark for base64 encoding
+#[divan::bench()]
+fn b64_encode_synthetic(bencher: Bencher) {
+    let file_path_str = &create_tmp_file(5_000);
+
+    bencher.bench(|| {
+        black_box(run_util_function(uumain, &[file_path_str]));
+    });
+}
+
+// Benchmark for base64 decoding
+#[divan::bench()]
+fn b64_decode_synthetic(bencher: Bencher) {
+    let temp_dir = tempfile::tempdir().unwrap();
+    let file_path_str = &create_tmp_file(5_000);
+    let in_file = create_test_file(b"", temp_dir.path());
+    let in_file_str = in_file.to_str().unwrap();
+    uumain(
+        [
+            OsString::from(file_path_str),
+            OsString::from(format!(">{in_file_str}")),
+        ]
+        .iter()
+        .map(|x| (*x).clone()),
+    );
+
+    bencher.bench(|| {
+        black_box(run_util_function(uumain, &["-d", in_file_str]));
+    });
+}
+
+// Benchmark different file sizes for base64 decoding ignoring garbage characters
+#[divan::bench()]
+fn b64_decode_ignore_garbage_synthetic(bencher: Bencher) {
+    let temp_dir = tempfile::tempdir().unwrap();
+    let file_path_str = &create_tmp_file(5_000);
+    let in_file = create_test_file(b"", temp_dir.path());
+    let in_file_str = in_file.to_str().unwrap();
+    uumain(
+        [
+            OsString::from(file_path_str),
+            OsString::from(format!(">{in_file_str}")),
+        ]
+        .iter()
+        .map(|x| (*x).clone()),
+    );
+
+    bencher.bench(|| {
+        black_box(run_util_function(uumain, &["-d", "-i", in_file_str]));
+    });
+}
+
+fn main() {
+    divan::main();
+}
@@ -0,0 +1,143 @@
+<!-- spell-checker:ignore hyperfine tmpfs reflink fsxattr xattrs clonefile vmtouch APFS pathlib Btrfs fallocate journaling -->
+
+# Benchmarking cp
+
+`cp` copies file contents together with metadata such as permissions, ownership,
+timestamps, extended attributes, and directory structures. Although copying
+looks simple, `cp` exercises many filesystem features. Its performance depends
+heavily on the workload shape (large sequential files, many tiny files, special
+files, sparse images) and the storage stack underneath.
+
+## Understanding cp
+
+Most of the time spent inside `cp` falls into two broad categories:
+
+- **Data transfer path**: When copying large contiguous files, throughput is
+dominated by read/write bandwidth. The overhead from `cp` itself comes from
+performing buffered reads and writes, copying memory between buffers, and the
+number of system calls issued per block.
+- **Metadata handling**: When recursively copying trees with thousands of small
+files, performance is limited by metadata work such as `open`, `stat`,
+`lstat`, attribute preservation, directory creation, and link handling.
+
+`cp` supports many switches that alter these paths, including attribute
+preservation, hard-link and reflink creation, sparse detection, and
+`--remove-destination` semantics. Benchmarks should call out which pathways are
+being exercised so results can be interpreted correctly.
+
+## Benchmarking guidelines
+
+- Build a release binary first: `cargo build --release -p uu_cp`.
+- Use `hyperfine` for timing and rely on the `--prepare` hook to reset state
+between runs.
+- Prefer running on a fast device (RAM disk, tmpfs, NVMe) to minimize raw
+storage latency when isolating the cost of the tool.
+- On Linux, control the page cache where appropriate using tools like
+`vmtouch` or `echo 3 > /proc/sys/vm/drop_caches` (root required). Prioritize
+repeatability and stay within the policies of the host system.
+- Keep the workload definition explicit. When comparing against GNU `cp` or
+other implementations, ensure identical datasets and mount options.
+
+## Large-file throughput
+
+1. Create a clean working directory and reduce cache interference.
+2. Generate an input file of known size, for example with `truncate` or `dd`.
+3. Run repeated copies with `hyperfine`, deleting the destination beforehand.
+
+```shell
+mkdir -p benchmark/cp && cd benchmark/cp
+truncate -s 2G input.bin
+hyperfine \
+  --warmup 2 \
+  --prepare 'rm -f output.bin' \
+  '../target/release/cp input.bin output.bin'
+```
+
+What to record:
+
+- Achieved throughput (MB/s) for large sequential copies.
+- Behavior with `--reflink=auto` or `--sparse=auto` on filesystems that
+support copy-on-write or sparse regions.
+- CPU overhead when enabling attribute preservation such as
+`--preserve=mode,timestamps,xattr`.
+
+If the underlying filesystem performs transparent copy-on-write (for example,
+APFS via `clonefile`), consider running the same benchmark with `--reflink=never`
+or on a filesystem without reflink support to measure raw data transfer.
+
+## Many small files
+
+Large directory trees stress metadata throughput. Pre-create a synthetic tree
+and copy it recursively.
+
+```shell
+mkdir -p dataset/src
+python3 - <<'PY'
+from pathlib import Path
+root = Path('dataset/src')
+for i in range(2000):
+    sub = root / f'dir_{i//200}'
+    sub.mkdir(parents=True, exist_ok=True)
+    for j in range(5):
+        path = sub / f'file_{i}_{j}.txt'
+        path.write_text('payload' * 16)
+PY
+hyperfine \
+  --warmup 1 \
+  --prepare 'rm -rf dataset/dst && mkdir -p dataset/dst' \
+  '../target/release/cp -r dataset/src dataset/dst'
+```
+
+What to record:
+
+- Time spent in directory traversal and metadata replication.
+- Impact of toggling options such as `--preserve`, `--no-preserve`, `--link`,
+`--hard-link`, and `--archive`.
+- Behavior when symbolic links or hard links are present, especially with
+`--dereference` versus `--no-dereference`.
+
+## Copy-on-write and sparse files
+
+`--reflink=always` can dramatically reduce work on Btrfs, XFS, APFS, and other
+reflink-aware filesystems. Compare results with `--reflink=never` to understand
+how much time is spent in copy-on-write system calls versus fallback copying.
+Sparse workloads benefit from dedicated benchmarks as well.
+
+```shell
+truncate -s 4G sparse.img
+fallocate -d sparse.img  # On filesystems that support punching holes
+hyperfine \
+  --prepare 'rm -f sparse-copy.img' \
+  '../target/release/cp --sparse=always sparse.img sparse-copy.img'
+```
+
+Check both the elapsed time and the on-disk size of the destination (for
+example using `du -h sparse-copy.img`) to confirm sparse regions are preserved.
+
+## Evaluating attribute preservation and extras
+
+Measure the incremental cost of individual options by enabling them one at a
+time:
+
+- Test `--preserve=context` or `--preserve=xattr` on files that actually carry
+extended attributes.
+- Evaluate ACL and SELinux handling with `--archive` on systems where those
+features are active.
+- Compare modes that remove or back up the destination (`--remove-destination`,
+`--backup=numbered`) to see the impact of extra file operations.
+
+Supplementary analysis with `strace -c` or `perf record` can show which system
+calls dominate and guide optimization work.
+
+## Interpreting results
+
+- If a benchmark completes in well under a second, increase the dataset size to
+reduce process start-up noise.
+- Document filesystem features such as journaling, compression, or encryption
+that may skew results.
+- When changes are made to `cp`, track how system call counts, I/O patterns,
+and CPU time shift between runs to catch regressions early.
+
+Use these guidelines to isolate the workloads you care about (large sequential
+transfers, directory-heavy copies, attribute preservation, reflink paths) and
+collect reproducible measurements.
@@ -47,6 +47,15 @@ exacl = { workspace = true, optional = true }
 name = "cp"
 path = "src/main.rs"
 
+[dev-dependencies]
+divan = { workspace = true }
+tempfile = { workspace = true }
+uucore = { workspace = true, features = ["benchmark"] }
+
+[[bench]]
+name = "cp_bench"
+harness = false
+
 [features]
 feat_selinux = ["selinux", "uucore/selinux"]
 feat_acl = ["exacl"]