Skip to content

Commit

Permalink
Allow to select hash function; support blake3, sha256, sha512
Browse files Browse the repository at this point in the history
Fixes #153
  • Loading branch information
pkolaczk committed Sep 10, 2022
1 parent cc02580 commit 1b8f9eb
Show file tree
Hide file tree
Showing 7 changed files with 291 additions and 46 deletions.
109 changes: 108 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "fclones"
version = "0.27.3"
version = "0.28.0"
description = "Finds duplicate, unique, under- or over-replicated files"
authors = ["Piotr Kołaczkowski <pkolaczk@gmail.com>"]
homepage = "https://github.com/pkolaczk/fclones"
Expand All @@ -20,6 +20,8 @@ exclude = [
[dependencies]
atomic-counter = "1.0"
bincode = "1.3"
blake3 = "1.3"
byteorder = "1.4"
bytesize = "1.1"
byte-unit = "4.0"
chrono = { version = "0.4", features = ["serde", "clock"] }
Expand Down Expand Up @@ -50,6 +52,7 @@ rayon = "1.5"
regex = "1.5"
serde_json = "1.0"
serde = { version = "1.0", features = ["derive"] }
sha2 = "0.10"
sled = "0.34"
smallvec = "1.8"
stfu8 = "0.2"
Expand Down
23 changes: 9 additions & 14 deletions src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize};

use crate::error::Error;
use crate::file::{FileChunk, FileHash, FileId, FileLen, FileMetadata, FilePos};
use crate::hasher::HashAlgorithm;
use crate::hasher::HashFn;
use crate::path::Path;

#[derive(Debug, Serialize, Deserialize)]
Expand Down Expand Up @@ -47,7 +47,7 @@ impl HashCache {
pub fn open(
database_path: &Path,
transform: Option<&str>,
algorithm: HashAlgorithm,
algorithm: HashFn,
) -> Result<HashCache, Error> {
create_dir_all(&database_path.to_path_buf()).map_err(|e| {
format!(
Expand All @@ -71,10 +71,7 @@ impl HashCache {

/// Opens the file hash database located in `fclones` subdir of user cache directory.
/// If the database doesn't exist yet, creates a new one.
pub fn open_default(
transform: Option<&str>,
algorithm: HashAlgorithm,
) -> Result<HashCache, Error> {
pub fn open_default(transform: Option<&str>, algorithm: HashFn) -> Result<HashCache, Error> {
let cache_dir =
dirs::cache_dir().ok_or("Could not obtain user cache directory from the system.")?;
let hash_db_path = cache_dir.join("fclones");
Expand Down Expand Up @@ -160,7 +157,7 @@ mod test {

use crate::cache::HashCache;
use crate::file::{FileChunk, FileHash, FileLen, FileMetadata, FilePos};
use crate::hasher::HashAlgorithm;
use crate::hasher::HashFn;
use crate::path::Path;
use crate::util::test::{create_file, with_dir};

Expand All @@ -174,7 +171,7 @@ mod test {
let chunk = FileChunk::new(&path, FilePos(0), FileLen(1000));

let cache_path = Path::from(root.join("cache"));
let cache = HashCache::open(&cache_path, None, HashAlgorithm::MetroHash128).unwrap();
let cache = HashCache::open(&cache_path, None, HashFn::Metro128).unwrap();
let key = cache.key(&chunk, &metadata).unwrap();
let orig_hash = FileHash(12345);

Expand All @@ -196,7 +193,7 @@ mod test {
let chunk = FileChunk::new(&path, FilePos(0), FileLen(1000));

let cache_path = Path::from(root.join("cache"));
let cache = HashCache::open(&cache_path, None, HashAlgorithm::MetroHash128).unwrap();
let cache = HashCache::open(&cache_path, None, HashFn::Metro128).unwrap();
let key = cache.key(&chunk, &metadata).unwrap();
cache
.put(&key, &metadata, chunk.len, FileHash(12345))
Expand Down Expand Up @@ -228,7 +225,7 @@ mod test {
let chunk = FileChunk::new(&path, FilePos(0), FileLen(1000));

let cache_path = Path::from(root.join("cache"));
let cache = HashCache::open(&cache_path, None, HashAlgorithm::MetroHash128).unwrap();
let cache = HashCache::open(&cache_path, None, HashFn::Metro128).unwrap();
let key = cache.key(&chunk, &metadata).unwrap();

cache
Expand All @@ -255,8 +252,7 @@ mod test {
let chunk = FileChunk::new(&path, FilePos(0), FileLen(1000));

let cache_path = Path::from(root.join("cache"));
let cache =
HashCache::open(&cache_path, None, HashAlgorithm::MetroHash128).unwrap();
let cache = HashCache::open(&cache_path, None, HashFn::Metro128).unwrap();
let key = cache.key(&chunk, &metadata).unwrap();

let orig_hash = FileHash(12345);
Expand All @@ -267,8 +263,7 @@ mod test {
drop(cache); // unlock the db so we can open another cache

let cache =
HashCache::open(&cache_path, Some("transform"), HashAlgorithm::MetroHash128)
.unwrap();
HashCache::open(&cache_path, Some("transform"), HashFn::Metro128).unwrap();
let cached_hash = cache.get(&key, &metadata).unwrap();
assert_eq!(cached_hash, None);
},
Expand Down
5 changes: 5 additions & 0 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use structopt::StructOpt;
use crate::file::FileLen;
use crate::group::FileGroupFilter;
use crate::group::Replication::{Overreplicated, Underreplicated};
use crate::hasher::HashFn;
use crate::path::Path;
use crate::pattern::{Pattern, PatternError, PatternOpts};
use crate::selector::PathSelector;
Expand Down Expand Up @@ -260,6 +261,10 @@ pub struct GroupConfig {
#[structopt(short = "x", long)]
pub regex: bool,

/// Hash function.
#[structopt(long, default_value = "metro128", possible_values = &HashFn::variants())]
pub hash_fn: HashFn,

/// Enables caching of file hashes.
///
/// Caching can significantly speed up subsequent runs of `fclones group` by avoiding
Expand Down
4 changes: 2 additions & 2 deletions src/group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,9 @@ impl<'a> GroupCtx<'a> {
.path_selector(&base_dir)
.map_err(|e| format!("Invalid pattern: {}", e))?;
let hasher = if config.cache {
FileHasher::new_cached(transform, log)?
FileHasher::new_cached(config.hash_fn, transform, log)?
} else {
FileHasher::new(transform, log)
FileHasher::new(config.hash_fn, transform, log)
};

Self::check_pool_config(thread_pool_sizes, &devices)?;
Expand Down
Loading

0 comments on commit 1b8f9eb

Please sign in to comment.