Skip to content

Commit

Permalink
Move traversal and hashing out of main file
Browse files Browse the repository at this point in the history
  • Loading branch information
yury-fedotov committed Jul 30, 2024
1 parent 07c5e54 commit 747f276
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 73 deletions.
17 changes: 17 additions & 0 deletions src/file_utils.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use sha2::{Digest, Sha256};
use std::collections::HashMap;
use std::fs;
use std::io::{self, Read};
use std::path::PathBuf;

#[derive(Clone)]
Expand Down Expand Up @@ -28,6 +30,21 @@ pub fn get_file_size(path: &PathBuf) -> u64 {
.unwrap_or(0)
}

pub fn calculate_hash(path: &PathBuf) -> io::Result<String> {
let mut file = fs::File::open(path)?;
let mut hasher = Sha256::new();
let mut buffer = [0; 1024];
loop {
let n = file.read(&mut buffer)?;
if n == 0 {
break;
}
hasher.update(&buffer[..n]);
}
let hash = hasher.finalize();
Ok(format!("{:x}", hash))
}

pub fn get_largest_files(files: &[FileInfo], count: usize) -> Vec<FileInfo> {
let mut sorted_files: Vec<&FileInfo> = files.iter().collect();
sorted_files.sort_by(|a, b| b.size.cmp(&a.size));
Expand Down
71 changes: 4 additions & 67 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,30 +1,12 @@
mod file_utils;
mod results;
use sha2::{Digest, Sha256};
use std::fs;
use std::io::{self, Read};
mod traversal;

use clap::Parser;
use file_utils::{find_duplicate_groups, get_file_size, get_largest_files, FileInfo};
use ignore::WalkBuilder;
use results::{AnalysisResults, DirectoryTraversalOutput};
use std::path::PathBuf;
use file_utils::{find_duplicate_groups, get_largest_files};
use results::AnalysisResults;
use std::time::Instant;

fn calculate_hash(path: &PathBuf) -> io::Result<String> {
let mut file = fs::File::open(path)?;
let mut hasher = Sha256::new();
let mut buffer = [0; 1024];
loop {
let n = file.read(&mut buffer)?;
if n == 0 {
break;
}
hasher.update(&buffer[..n]);
}
let hash = hasher.finalize();
Ok(format!("{:x}", hash))
}
use traversal::traverse_directory;

/// Program to find and print the largest files in a directory and its subdirectories
#[derive(Parser)]
Expand All @@ -37,51 +19,6 @@ struct Args {
count: usize,
}

fn traverse_directory(dir: &str) -> DirectoryTraversalOutput {
let mut dir_count = 1;
let mut max_depth = 0;
let file_infos: Vec<FileInfo> = WalkBuilder::new(dir)
.hidden(true)
.ignore(true)
.git_ignore(true)
.git_global(true)
.git_exclude(true)
.build()
.filter_map(|e| e.ok())
.filter_map(|entry| {
let depth = entry.depth();
if depth > max_depth {
max_depth = depth;
}
if entry.file_type().map_or(false, |ft| ft.is_dir()) {
dir_count += 1;
}
if entry.file_type().map_or(false, |ft| ft.is_file()) {
let path = entry.into_path();
let size = get_file_size(&path);
let hash = if size > 3 * 1024 * 1024 {
// 3MB in bytes
match calculate_hash(&path) {
Ok(hash) => hash,
Err(_) => String::new(),
}
} else {
String::new()
};
Some(FileInfo { path, size, hash })
} else {
None
}
})
.collect();

DirectoryTraversalOutput {
file_infos,
dir_count,
max_depth,
}
}

fn main() {
let start_time = Instant::now();

Expand Down
6 changes: 0 additions & 6 deletions src/results.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,3 @@ fn format_size(size: u64) -> String {
format!("{} bytes", size)
}
}

pub struct DirectoryTraversalOutput {
pub file_infos: Vec<FileInfo>,
pub dir_count: usize,
pub max_depth: usize,
}
54 changes: 54 additions & 0 deletions src/traversal.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
use crate::file_utils::FileInfo;
use crate::file_utils::{calculate_hash, get_file_size};
use ignore::WalkBuilder;

pub struct DirectoryTraversalOutput {
pub file_infos: Vec<FileInfo>,
pub dir_count: usize,
pub max_depth: usize,
}

pub fn traverse_directory(dir: &str) -> DirectoryTraversalOutput {
let mut dir_count = 1;
let mut max_depth = 0;
let file_infos: Vec<FileInfo> = WalkBuilder::new(dir)
.hidden(true)
.ignore(true)
.git_ignore(true)
.git_global(true)
.git_exclude(true)
.build()
.filter_map(|e| e.ok())
.filter_map(|entry| {
let depth = entry.depth();
if depth > max_depth {
max_depth = depth;
}
if entry.file_type().map_or(false, |ft| ft.is_dir()) {
dir_count += 1;
}
if entry.file_type().map_or(false, |ft| ft.is_file()) {
let path = entry.into_path();
let size = get_file_size(&path);
let hash = if size > 3 * 1024 * 1024 {
// 3MB in bytes
match calculate_hash(&path) {
Ok(hash) => hash,
Err(_) => String::new(),
}
} else {
String::new()
};
Some(FileInfo { path, size, hash })
} else {
None
}
})
.collect();

DirectoryTraversalOutput {
file_infos,
dir_count,
max_depth,
}
}

0 comments on commit 747f276

Please sign in to comment.