Skip to content

Commit

Permalink
cksum/hashsum: try to detect the format faster the first line
Browse files Browse the repository at this point in the history
  • Loading branch information
sylvestre committed May 28, 2024
1 parent 7d34fc7 commit 644a36e
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 43 deletions.
103 changes: 76 additions & 27 deletions src/uucore/src/lib/features/checksum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,32 @@ const DOUBLE_SPACE_REGEX: &str = r"^(?P<checksum>[a-fA-F0-9]+)\s{2}(?P<filename>
// In this case, we ignore the *
const SINGLE_SPACE_REGEX: &str = r"^(?P<checksum>[a-fA-F0-9]+)\s(?P<binary>\*?)(?P<filename>.*)$";

/// Determines the appropriate regular expression to use based on the provided lines.
fn determine_regex(filename: &OsStr, lines: &[String]) -> UResult<(Regex, bool)> {
let algo_based_regex = Regex::new(ALGO_BASED_REGEX).unwrap();
let double_space_regex = Regex::new(DOUBLE_SPACE_REGEX).unwrap();
let single_space_regex = Regex::new(SINGLE_SPACE_REGEX).unwrap();

for line in lines {
let line_trim = line.trim();
if algo_based_regex.is_match(line_trim) {
return Ok((algo_based_regex, true));
} else if double_space_regex.is_match(line_trim) {
return Ok((double_space_regex, false));
} else if single_space_regex.is_match(line_trim) {
return Ok((single_space_regex, false));
}
}

Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"{}: no properly formatted checksum lines found",
filename.maybe_quote()
),
)
.into())
}
/***
* Do the checksum validation (can be strict or not)
*/
Expand All @@ -328,10 +354,6 @@ pub fn perform_checksum_validation<'a, I>(
where
I: Iterator<Item = &'a OsStr>,
{
let algo_based_regex = Regex::new(ALGO_BASED_REGEX).unwrap();
let double_space_regex = Regex::new(DOUBLE_SPACE_REGEX).unwrap();
let single_space_regex = Regex::new(SINGLE_SPACE_REGEX).unwrap();

// if cksum has several input files, it will print the result for each file
for filename_input in files {
let mut bad_format = 0;
Expand All @@ -358,31 +380,13 @@ where
}
}
};
let mut reader = BufReader::new(file);

let mut first_line = String::new();
reader.read_line(&mut first_line)?;

// Determine which regular expression to use based on the first line
let first_line_trim = first_line.trim();
let (chosen_regex, algo_based_format) = if algo_based_regex.is_match(first_line_trim) {
(&algo_based_regex, true)
} else if double_space_regex.is_match(first_line_trim) {
// If the first line contains a double space, use the double space regex
(&double_space_regex, false)
} else {
// It is probably rare but sometimes the checksum file may contain a single space
(&single_space_regex, false)
};

// Push the first line back to the reader
let first_line_reader = io::Cursor::new(first_line);
let chain_reader = first_line_reader.chain(reader);
let reader = BufReader::new(chain_reader);
let reader = BufReader::new(file);
let lines: Vec<String> = reader.lines().collect::<Result<_, _>>()?;
let (chosen_regex, algo_based_format) = determine_regex(filename_input, &lines)?;

// for each line in the input, check if it is a valid checksum line
for (i, line) in reader.lines().enumerate() {
let line = line.unwrap_or_else(|_| String::new());
// Process each line
for (i, line) in lines.iter().enumerate() {
if let Some(caps) = chosen_regex.captures(&line) {
properly_formatted = true;

Expand Down Expand Up @@ -886,4 +890,49 @@ mod tests {
}
}
}

#[test]
fn test_determine_regex() {
// Test algo-based regex
let lines_algo_based =
vec!["MD5 (example.txt) = d41d8cd98f00b204e9800998ecf8427e".to_string()];
let result = determine_regex(&lines_algo_based);
assert!(result.is_ok());
let (regex, algo_based) = result.unwrap();
assert!(algo_based);
assert!(regex.is_match(&lines_algo_based[0]));

// Test double-space regex
let lines_double_space = vec!["d41d8cd98f00b204e9800998ecf8427e example.txt".to_string()];
let result = determine_regex(&lines_double_space);
assert!(result.is_ok());
let (regex, algo_based) = result.unwrap();
assert!(!algo_based);
assert!(regex.is_match(&lines_double_space[0]));

// Test single-space regex
let lines_single_space = vec!["d41d8cd98f00b204e9800998ecf8427e example.txt".to_string()];
let result = determine_regex(&lines_single_space);
assert!(result.is_ok());
let (regex, algo_based) = result.unwrap();
assert!(!algo_based);
assert!(regex.is_match(&lines_single_space[0]));

// Test double-space regex start with invalid
let lines_double_space = vec![
"ERR".to_string(),
"d41d8cd98f00b204e9800998ecf8427e example.txt".to_string(),
];
let result = determine_regex(&lines_double_space);
assert!(result.is_ok());
let (regex, algo_based) = result.unwrap();
assert!(!algo_based);
assert!(!regex.is_match(&lines_double_space[0]));
assert!(regex.is_match(&lines_double_space[1]));

// Test invalid checksum line
let lines_invalid = vec!["invalid checksum line".to_string()];
let result = determine_regex(&lines_invalid);
assert!(result.is_err());
}
}
60 changes: 44 additions & 16 deletions tests/by-util/test_hashsum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -724,62 +724,90 @@ fn test_check_one_two_space_star() {
}

#[test]
fn test_check_one_two_space_star_start_without_star() {
fn test_check_space_star_or_not() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;

at.touch("empty");
at.touch("f");
at.touch("a");
at.touch("*c");

// with one space, the "*" is removed
at.write(
"in.md5",
"d41d8cd98f00b204e9800998ecf8427e f\nd41d8cd98f00b204e9800998ecf8427e *empty\n",
"d41d8cd98f00b204e9800998ecf8427e *c\n
d41d8cd98f00b204e9800998ecf8427e a\n",
);

scene
.ccmd("md5sum")
.arg("--check")
.arg(at.subdir.join("in.md5"))
.succeeds()
.stdout_is("f: OK\nempty: OK\n");
.fails()
.stdout_contains("c: FAILED")
.stdout_does_not_contain("a: FAILED")
.stderr_contains("WARNING: 1 line is improperly formatted");

// with two spaces, the "*" is not removed
at.write(
"in.md5",
"d41d8cd98f00b204e9800998ecf8427e f\nd41d8cd98f00b204e9800998ecf8427e *empty\n",
"d41d8cd98f00b204e9800998ecf8427e a\n
d41d8cd98f00b204e9800998ecf8427e *c\n",
);

// First should fail as *empty doesn't exit
scene
.ccmd("md5sum")
.arg("--check")
.arg(at.subdir.join("in.md5"))
.fails()
.stdout_is("f: OK\n*empty: FAILED open or read\n");
.stdout_contains("a: FAILED")
.stdout_contains("*c: FAILED");
}

at.touch("*empty");
// Should pass as we have the file
#[test]
fn test_check_no_backslash_no_space() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;

at.touch("f");
at.write("in.md5", "MD5(f)= d41d8cd98f00b204e9800998ecf8427e\n");
scene
.ccmd("md5sum")
.arg("--check")
.arg(at.subdir.join("in.md5"))
.succeeds()
.stdout_is("f: OK\n*empty: OK\n");
.stdout_is("f: OK\n");
}

#[test]
fn test_check_no_backslash_no_space() {
fn test_incomplete_format() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;

at.touch("f");
at.write("in.md5", "MD5(f)= d41d8cd98f00b204e9800998ecf8427e\n");
at.write("in.md5", "MD5 (\n");
scene
.ccmd("md5sum")
.arg("--check")
.arg(at.subdir.join("in.md5"))
.succeeds()
.stdout_is("f: OK\n");
.fails()
.stderr_contains("no properly formatted checksum lines found");
}

#[test]
fn test_start_error() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;

at.touch("f");
at.write("in.md5", "ERR\nd41d8cd98f00b204e9800998ecf8427e f\n");
scene
.ccmd("md5sum")
.arg("--check")
.arg("--strict")
.arg(at.subdir.join("in.md5"))
.fails()
.stdout_is("f: OK\n")
.stderr_contains("WARNING: 1 line is improperly formatted");
}

#[test]
Expand Down

0 comments on commit 644a36e

Please sign in to comment.