From 6d84ff0aed53aea727b32f6b3d16ee0d045aa0e4 Mon Sep 17 00:00:00 2001 From: C J Silverio Date: Mon, 23 Jan 2023 17:37:34 -0800 Subject: [PATCH] fix(write): set tmpfile length in async writer (#35) Fixes: https://github.com/zkat/cacache-rs/issues/34 The async `poll_write()` implementation was creating a tempfile as a backing for its inner mmap, but it was failing to set the length on the file to match the incoming data. Compare with the sync implementation! This bug was exposed when the `memmap2` crate was swapped in for `memmap`. The older crate was likely more lax about this. Wrote a pair of new tests for `cacache::write_hash_sync` and `cacache::write_hash`. The async test fails without this change, as does any benchmarks run. Everything passes with it. BREAKING CHANGE: This commit also bumps the MSRV for cacache to 1.66.1. --- .github/workflows/ci.yml | 2 +- Cargo.toml | 2 +- benches/benchmarks.rs | 4 ++-- src/content/read.rs | 2 +- src/content/write.rs | 3 ++- src/get.rs | 12 ++++++------ src/index.rs | 4 ++-- src/put.rs | 30 ++++++++++++++++++++++++++++++ src/rm.rs | 2 +- 9 files changed, 46 insertions(+), 15 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e161d7d..087ab90 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,7 +28,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - rust: [1.54.0, stable] + rust: [1.66.1, stable] os: [ubuntu-latest, macOS-latest, windows-latest] steps: diff --git a/Cargo.toml b/Cargo.toml index 6224c99..42dd6e7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "cacache" version = "10.0.2-alpha.0" authors = ["Kat Marchán "] -edition = "2018" +edition = "2021" description = "Content-addressable, key-value, high-performance, on-disk cache." license = "Apache-2.0" repository = "https://github.com/zkat/cacache-rs" diff --git a/benches/benchmarks.rs b/benches/benchmarks.rs index 6f79ed0..99c974c 100644 --- a/benches/benchmarks.rs +++ b/benches/benchmarks.rs @@ -26,7 +26,7 @@ fn baseline_read_many_sync(c: &mut Criterion) { .collect(); let data = b"hello world"; for path in paths.iter() { - let mut fd = File::create(&path).unwrap(); + let mut fd = File::create(path).unwrap(); fd.write_all(data).unwrap(); drop(fd); } @@ -59,7 +59,7 @@ fn baseline_read_many_async(c: &mut Criterion) { .collect(); let data = b"hello world"; for path in paths.iter() { - let mut fd = File::create(&path).unwrap(); + let mut fd = File::create(path).unwrap(); fd.write_all(data).unwrap(); drop(fd); } diff --git a/src/content/read.rs b/src/content/read.rs index ade2c97..816d035 100644 --- a/src/content/read.rs +++ b/src/content/read.rs @@ -70,7 +70,7 @@ pub async fn open_async(cache: &Path, sri: Integrity) -> Result { pub fn read(cache: &Path, sri: &Integrity) -> Result> { let cpath = path::content_path(cache, sri); - let ret = fs::read(&cpath).to_internal()?; + let ret = fs::read(cpath).to_internal()?; sri.check(&ret)?; Ok(ret) } diff --git a/src/content/write.rs b/src/content/write.rs index 677bb2b..f446f95 100644 --- a/src/content/write.rs +++ b/src/content/write.rs @@ -121,11 +121,12 @@ impl AsyncWriter { .create(&tmp_path) .await .to_internal()?; - let tmpfile = task::spawn_blocking(|| NamedTempFile::new_in(tmp_path)) + let mut tmpfile = task::spawn_blocking(|| NamedTempFile::new_in(tmp_path)) .await .to_internal()?; let mmap = if let Some(size) = size { if size <= MAX_MMAP_SIZE { + tmpfile.as_file_mut().set_len(size as u64).to_internal()?; unsafe { MmapMut::map_mut(tmpfile.as_file()).ok() } } else { None diff --git a/src/get.rs b/src/get.rs index 573c081..55432d1 100644 --- a/src/get.rs +++ b/src/get.rs @@ -169,7 +169,7 @@ pub async fn read_hash

(cache: P, sri: &Integrity) -> Result> where P: AsRef, { - Ok(read::read_async(cache.as_ref(), sri).await?) + read::read_async(cache.as_ref(), sri).await } /// Copies cache data to a specified location. Returns the number of bytes @@ -235,7 +235,7 @@ where P: AsRef, K: AsRef, { - Ok(index::find_async(cache.as_ref(), key.as_ref()).await?) + index::find_async(cache.as_ref(), key.as_ref()).await } /// Returns true if the given hash exists in the cache. @@ -584,9 +584,9 @@ mod tests { let tmp = tempfile::tempdir().unwrap(); let dir = tmp.path(); let dest = dir.join("data"); - crate::write_sync(&dir, "my-key", b"hello world").unwrap(); + crate::write_sync(dir, "my-key", b"hello world").unwrap(); - crate::copy_sync(&dir, "my-key", &dest).unwrap(); + crate::copy_sync(dir, "my-key", &dest).unwrap(); let data = fs::read(&dest).unwrap(); assert_eq!(data, b"hello world"); } @@ -596,9 +596,9 @@ mod tests { let tmp = tempfile::tempdir().unwrap(); let dir = tmp.path(); let dest = dir.join("data"); - let sri = crate::write_sync(&dir, "my-key", b"hello world").unwrap(); + let sri = crate::write_sync(dir, "my-key", b"hello world").unwrap(); - crate::copy_hash_sync(&dir, &sri, &dest).unwrap(); + crate::copy_hash_sync(dir, &sri, &dest).unwrap(); let data = fs::read(&dest).unwrap(); assert_eq!(data, b"hello world"); } diff --git a/src/index.rs b/src/index.rs index 18c67ae..274c679 100644 --- a/src/index.rs +++ b/src/index.rs @@ -267,13 +267,13 @@ fn bucket_path(cache: &Path, key: &str) -> PathBuf { fn hash_key(key: &str) -> String { let mut hasher = Sha1::new(); - hasher.update(&key); + hasher.update(key); hex::encode(hasher.finalize()) } fn hash_entry(key: &str) -> String { let mut hasher = Sha256::new(); - hasher.update(&key); + hasher.update(key); hex::encode(hasher.finalize()) } diff --git a/src/put.rs b/src/put.rs index ec5ec2d..8670a9b 100644 --- a/src/put.rs +++ b/src/put.rs @@ -440,4 +440,34 @@ mod tests { let data = crate::read_sync(&dir, "hello").unwrap(); assert_eq!(data, b"hello"); } + + #[test] + fn hash_write_sync() { + let tmp = tempfile::tempdir().unwrap(); + let dir = tmp.path().to_owned(); + let original = format!("hello world{}", 5); + let integrity = crate::write_hash_sync(&dir, &original) + .expect("should be able to write a hash synchronously"); + let bytes = crate::read_hash_sync(&dir, &integrity) + .expect("should be able to read the data we just wrote"); + let result = + String::from_utf8(bytes).expect("we wrote valid utf8 but did not read valid utf8 back"); + assert_eq!(result, original, "we did not read back what we wrote"); + } + + #[async_attributes::test] + async fn hash_write_async() { + let tmp = tempfile::tempdir().unwrap(); + let dir = tmp.path().to_owned(); + let original = format!("hello world{}", 12); + let integrity = crate::write_hash(&dir, &original) + .await + .expect("should be able to write a hash asynchronously"); + let bytes = crate::read_hash(&dir, &integrity) + .await + .expect("should be able to read back what we wrote"); + let result = + String::from_utf8(bytes).expect("we wrote valid utf8 but did not read valid utf8 back"); + assert_eq!(result, original, "we did not read back what we wrote"); + } } diff --git a/src/rm.rs b/src/rm.rs index d0b1706..c185e3d 100644 --- a/src/rm.rs +++ b/src/rm.rs @@ -66,7 +66,7 @@ where /// } /// ``` pub async fn remove_hash>(cache: P, sri: &Integrity) -> Result<()> { - Ok(rm::rm_async(cache.as_ref(), sri).await?) + rm::rm_async(cache.as_ref(), sri).await } /// Removes entire contents of the cache, including temporary files, the entry