diff --git a/.gitignore b/.gitignore index 1e87a9d4..57d5f675 100644 --- a/.gitignore +++ b/.gitignore @@ -3,5 +3,4 @@ target .project .cargo .settings -test_data/links/link-f -test_data/links/link-d +test_data/links/link-* diff --git a/Cargo.lock b/Cargo.lock index 67ecae1d..5acc0199 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,7 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +version = 3 + [[package]] name = "aho-corasick" version = "0.7.18" @@ -9,6 +11,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi", +] + [[package]] name = "assert_cmd" version = "2.0.2" @@ -23,6 +34,17 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + [[package]] name = "autocfg" version = "1.0.1" @@ -58,6 +80,34 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chrono" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +dependencies = [ + "libc", + "num-integer", + "num-traits", + "time", + "winapi", +] + +[[package]] +name = "clap" +version = "2.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +dependencies = [ + "ansi_term", + "atty", + "bitflags", + "strsim", + "textwrap", + "unicode-width", + "vec_map", +] + [[package]] name = "difflib" version = "0.4.0" @@ -70,6 +120,12 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" +[[package]] +name = "dunce" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "453440c271cf5577fd2a40e4942540cb7d0d2f85e27c8d07dd0023c925a67541" + [[package]] name = "either" version = "1.6.1" @@ -81,11 +137,14 @@ name = "findutils" version = "0.2.0" dependencies = [ "assert_cmd", + "chrono", "glob", + "once_cell", "predicates", "regex", "serial_test", "tempfile", + "uucore", "walkdir", ] @@ -98,6 +157,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "getopts" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +dependencies = [ + "unicode-width", +] + [[package]] name = "getrandom" version = "0.2.3" @@ -115,6 +183,15 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + [[package]] name = "instant" version = "0.1.9" @@ -166,6 +243,16 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" +[[package]] +name = "num-integer" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" +dependencies = [ + "autocfg", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.14" @@ -175,6 +262,27 @@ dependencies = [ "autocfg", ] +[[package]] +name = "numtoa" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8f8bdf33df195859076e54ab11ee78a1b208382d3a26ec40d142ffc1ecc49ef" + +[[package]] +name = "once_cell" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5" + +[[package]] +name = "os_display" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "748cc1d0dc55247316a5bedd8dc8c5478c8a0c2e2001176b38ce7c0ed732c7a5" +dependencies = [ + "unicode-width", +] + [[package]] name = "parking_lot" version = "0.11.1" @@ -309,6 +417,15 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_termios" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8440d8acb4fd3d277125b4bd01a6f38aee8d814b3b5fc09b3f2b825d37d3fe8f" +dependencies = [ + "redox_syscall 0.2.10", +] + [[package]] name = "regex" version = "1.5.4" @@ -387,6 +504,12 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a55ca5f3b68e41c979bf8c46a6f1da892ca4db8f94023ce0bd32407573b1ac0" +[[package]] +name = "strsim" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" + [[package]] name = "syn" version = "1.0.56" @@ -412,18 +535,78 @@ dependencies = [ "winapi", ] +[[package]] +name = "termion" +version = "1.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "077185e2eac69c3f8379a4298e1e07cd36beb962290d4a51199acf0fdc10607e" +dependencies = [ + "libc", + "numtoa", + "redox_syscall 0.2.10", + "redox_termios", +] + +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "time" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "treeline" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7f741b240f1a48843f9b8e0444fb55fb2a4ff67293b50a9179dfd5ea67f8d41" +[[package]] +name = "unicode-width" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" + [[package]] name = "unicode-xid" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" +[[package]] +name = "uucore" +version = "0.0.10" +source = "git+https://github.com/uutils/coreutils#5c0adb26a57e594b369e4ffd0fe9f54b9d4efd20" +dependencies = [ + "clap", + "dunce", + "getopts", + "libc", + "once_cell", + "os_display", + "termion", + "time", + "wild", + "winapi", +] + +[[package]] +name = "vec_map" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" + [[package]] name = "wait-timeout" version = "0.2.0" @@ -450,6 +633,15 @@ version = "0.10.2+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" +[[package]] +name = "wild" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "035793abb854745033f01a07647a79831eba29ec0be377205f2a25b0aa830020" +dependencies = [ + "glob", +] + [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index 7a3b7347..ba4caf50 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,10 +10,13 @@ description = "Rust implementation of GNU findutils" authors = ["uutils developers"] [dependencies] +chrono = "0.4" glob = "0.3" walkdir = "2.3" tempfile = "3" regex = "1.5" +once_cell = "1.9" +uucore = { git = "https://github.com/uutils/coreutils", features = ["entries", "fs", "fsext"] } [dev-dependencies] assert_cmd = "2" diff --git a/src/find/matchers/delete.rs b/src/find/matchers/delete.rs index 17c1eccc..12e12817 100644 --- a/src/find/matchers/delete.rs +++ b/src/find/matchers/delete.rs @@ -63,7 +63,7 @@ impl Matcher for DeleteMatcher { #[cfg(test)] mod tests { - use std::fs::File; + use std::fs::{create_dir, File}; use tempfile::Builder; use super::*; @@ -79,6 +79,7 @@ mod tests { let temp_dir_path = temp_dir.path().to_string_lossy(); File::create(temp_dir.path().join("test")).expect("created test file"); + create_dir(temp_dir.path().join("test_dir")).expect("created test directory"); let test_entry = get_dir_entry_for(&temp_dir_path, "test"); assert!( matcher.matches(&test_entry, &mut deps.new_matcher_io()), @@ -89,14 +90,13 @@ mod tests { "DeleteMatcher should actually delete files it matches", ); - let temp_dir_name = temp_dir.path().file_name().unwrap().to_string_lossy(); - let temp_dir_entry = get_dir_entry_for(&temp_dir_path, &temp_dir_name); + let temp_dir_entry = get_dir_entry_for(&temp_dir_path, "test_dir"); assert!( matcher.matches(&temp_dir_entry, &mut deps.new_matcher_io()), "DeleteMatcher should match directories", ); assert!( - !temp_dir.path().exists(), + !temp_dir.path().join("test_dir").exists(), "DeleteMatcher should actually delete (empty) directories it matches", ); } diff --git a/src/find/matchers/mod.rs b/src/find/matchers/mod.rs index 9a09f564..3037db5e 100644 --- a/src/find/matchers/mod.rs +++ b/src/find/matchers/mod.rs @@ -10,6 +10,7 @@ mod logical_matchers; mod name; mod perm; mod printer; +mod printf; mod prune; mod size; mod time; @@ -206,6 +207,13 @@ fn build_matcher_tree( while i < args.len() { let possible_submatcher = match args[i] { "-print" => Some(printer::Printer::new_box()), + "-printf" => { + if i >= args.len() - 1 { + return Err(From::from(format!("missing argument to {}", args[i]))); + } + i += 1; + Some(printf::Printf::new_box(args[i])?) + } "-true" => Some(logical_matchers::TrueMatcher::new_box()), "-false" => Some(logical_matchers::FalseMatcher::new_box()), "-name" => { @@ -423,7 +431,13 @@ mod tests { pub fn get_dir_entry_for(directory: &str, filename: &str) -> DirEntry { for wrapped_dir_entry in WalkDir::new(fix_up_slashes(directory)) { let dir_entry = wrapped_dir_entry.unwrap(); - if dir_entry.file_name().to_string_lossy() == filename { + if dir_entry + .path() + .strip_prefix(directory) + .unwrap() + .to_string_lossy() + == fix_up_slashes(filename) + { return dir_entry; } } diff --git a/src/find/matchers/printf.rs b/src/find/matchers/printf.rs new file mode 100644 index 00000000..472aa9e8 --- /dev/null +++ b/src/find/matchers/printf.rs @@ -0,0 +1,1033 @@ +// Copyright 2021 Collabora, Ltd. +// +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file or at +// https://opensource.org/licenses/MIT. + +use std::{ + borrow::Cow, + error::Error, + fs, + path::Path, + time::{Duration, SystemTime}, +}; + +use chrono::{format::StrftimeItems, DateTime, Local}; +use once_cell::unsync::OnceCell; + +use super::{Matcher, MatcherIO}; + +#[cfg(unix)] +use std::os::unix::prelude::{FileTypeExt, MetadataExt}; + +const STANDARD_BLOCK_SIZE: u64 = 512; + +#[derive(Debug, PartialEq, Eq)] +enum Justify { + Left, + Right, +} + +#[derive(Debug, PartialEq, Eq)] +enum TimeFormat { + /// Follow ctime(3). + Ctime, + /// Seconds since the epoch, as a float w/ nanosecond part. + SinceEpoch, + /// Follow strftime-compatible syntax '%c' where 'c' is the given character. + Strftime(char), +} + +impl TimeFormat { + fn apply(&self, time: SystemTime) -> Result, Box> { + const CTIME_FORMAT: &str = "%a %b %d %H:%M:%S.%f %Y"; + + let formatted = match self { + TimeFormat::SinceEpoch => time + .duration_since(SystemTime::UNIX_EPOCH)? + .as_secs_f64() + .to_string(), + TimeFormat::Ctime => DateTime::::from(time) + .format(CTIME_FORMAT) + .to_string(), + TimeFormat::Strftime(c) => DateTime::::from(time) + .format(&format!("%{}", c)) + .to_string(), + }; + + Ok(formatted.into()) + } +} + +#[derive(Debug, PartialEq, Eq)] +enum PermissionsFormat { + Octal, + // trwxrwxrwx + Symbolic, +} + +/// A single % directive in a format string. +#[derive(Debug, PartialEq, Eq)] +enum FormatDirective { + // %a, %Ak + AccessTime(TimeFormat), + // %b, %k + Blocks { large_blocks: bool }, + // %c, %Ck + ChangeTime(TimeFormat), + // %d + Depth, + // %D + Device, + // %f + Basename, + // %F + Filesystem, + // %g, %G + Group { as_name: bool }, + // %h + Dirname, + // %H + StartingPoint, + // %i + Inode, + // %l + SymlinkTarget, + // %m + Permissions(PermissionsFormat), + // %n + HardlinkCount, + // %p, %P + Path { strip_starting_point: bool }, + // %s + Size, + // %S + Sparseness, + // %t, %Tk + ModificationTime(TimeFormat), + // %u, %U + User { as_name: bool }, + // %y, %Y + Type { follow_links: bool }, +} + +/// A component in a full format string. +#[derive(Debug, PartialEq, Eq)] +enum FormatComponent { + Literal(String), + Flush, + Directive { + directive: FormatDirective, + width: Option, + justify: Justify, + }, +} + +struct FormatStringParser<'a> { + string: &'a str, +} + +impl FormatStringParser<'_> { + fn front(&self) -> Result> { + self.string + .chars() + .next() + .ok_or_else(|| "Unexpected EOF".into()) + } + + fn peek(&self, count: usize) -> Result<&str, Box> { + if self.string.len() < count { + return Err("Unexpected EOF".into()); + } + + Ok(&self.string[0..count]) + } + + fn advance_one(&mut self) -> Result> { + let c = self.front()?; + self.string = &self.string[1..]; + Ok(c) + } + + fn advance_by(&mut self, count: usize) -> Result<&str, Box> { + self.peek(count)?; + + let skipped = &self.string[0..count]; + self.string = &self.string[count..]; + Ok(skipped) + } + + fn parse_escape_sequence(&mut self) -> Result> { + const OCTAL_LEN: usize = 3; + const OCTAL_RADIX: u32 = 8; + + // Try parsing an octal sequence first. + let first = self.front()?; + if first.is_digit(OCTAL_RADIX) { + if let Ok(code) = self + .peek(OCTAL_LEN) + .and_then(|octal| u32::from_str_radix(octal, OCTAL_RADIX).map_err(|e| e.into())) + { + // safe to unwrap: .peek() already succeeded above. + let octal = self.advance_by(OCTAL_LEN).unwrap(); + return match char::from_u32(code) { + Some(c) => Ok(FormatComponent::Literal(c.to_string())), + None => Err(format!("Invalid character value: \\{}", octal).into()), + }; + } + } + + self.advance_one()?; + + if first == 'c' { + Ok(FormatComponent::Flush) + } else { + let c = match first { + 'a' => "\x07", + 'b' => "\x08", + 'f' => "\x0C", + 'n' => "\n", + 'r' => "\r", + 't' => "\t", + 'v' => "\x0B", + '0' => "\0", + '\\' => "\\", + c => return Err(format!("Invalid escape sequence: \\{}", c).into()), + }; + + Ok(FormatComponent::Literal(c.to_string())) + } + } + + fn parse_format_width(&mut self) -> Option { + let start = self.string; + let mut digits = 0; + + while self.front().map(|c| c.is_ascii_digit()).unwrap_or(false) { + digits += 1; + // safe to unwrap: the front() check already succeeded above. + self.advance_one().unwrap(); + } + + if digits > 0 { + // safe to unwrap: we already know all the digits are valid due to + // the above checks. + Some((&start[0..digits]).parse().unwrap()) + } else { + None + } + } + + fn parse_time_specifier(&mut self, first: char) -> Result> { + let c = self.advance_one()?; + if c == '@' { + return Ok(TimeFormat::SinceEpoch); + } + + // We can't store the parsed items inside TimeFormat, because the items + // take a reference to the full format string, but we still try to parse + // it here so that errors get caught early. + match StrftimeItems::new(&format!("%{}", c)).next() { + None | Some(chrono::format::Item::Error) => { + Err(format!("Invalid time specifier: %{}{}", first, c).into()) + } + Some(_item) => Ok(TimeFormat::Strftime(c)), + } + } + + fn parse_format_specifier(&mut self) -> Result> { + let mut justify = Justify::Right; + loop { + match self.front()? { + ' ' => (), + '-' => justify = Justify::Left, + _ => break, + } + + // safe to unwrap: .front() already succeeded above. + self.advance_one().unwrap(); + } + + let width = self.parse_format_width(); + + let first = self.advance_one()?; + if first == '%' { + return Ok(FormatComponent::Literal("%".to_owned())); + } + + let directive = match first { + 'a' => FormatDirective::AccessTime(TimeFormat::Ctime), + 'A' => FormatDirective::AccessTime(self.parse_time_specifier(first)?), + 'b' => FormatDirective::Blocks { + large_blocks: false, + }, + 'c' => FormatDirective::ChangeTime(TimeFormat::Ctime), + 'C' => FormatDirective::ChangeTime(self.parse_time_specifier(first)?), + 'd' => FormatDirective::Depth, + 'D' => FormatDirective::Device, + 'f' => FormatDirective::Basename, + 'F' => FormatDirective::Filesystem, + 'g' => FormatDirective::Group { as_name: true }, + 'G' => FormatDirective::Group { as_name: false }, + 'h' => FormatDirective::Dirname, + 'H' => FormatDirective::StartingPoint, + 'k' => FormatDirective::Blocks { large_blocks: true }, + 'i' => FormatDirective::Inode, + 'l' => FormatDirective::SymlinkTarget, + 'm' => FormatDirective::Permissions(PermissionsFormat::Octal), + 'M' => FormatDirective::Permissions(PermissionsFormat::Symbolic), + 'n' => FormatDirective::HardlinkCount, + 'p' => FormatDirective::Path { + strip_starting_point: false, + }, + 'P' => FormatDirective::Path { + strip_starting_point: true, + }, + 's' => FormatDirective::Size, + 'S' => FormatDirective::Sparseness, + 't' => FormatDirective::ModificationTime(TimeFormat::Ctime), + 'T' => FormatDirective::ModificationTime(self.parse_time_specifier(first)?), + 'u' => FormatDirective::User { as_name: true }, + 'U' => FormatDirective::User { as_name: false }, + 'y' => FormatDirective::Type { + follow_links: false, + }, + 'Y' => FormatDirective::Type { follow_links: true }, + // TODO: %Z + _ => return Ok(FormatComponent::Literal(first.to_string())), + }; + + Ok(FormatComponent::Directive { + directive, + width, + justify, + }) + } + + pub fn parse(&mut self) -> Result> { + let mut components = vec![]; + + while let Some(i) = self.string.find(|c| c == '%' || c == '\\') { + if i > 0 { + // safe to unwrap: i is an index into the string, so it cannot + // be any shorter. + let literal = self.advance_by(i).unwrap(); + if !literal.is_empty() { + components.push(FormatComponent::Literal(literal.to_owned())); + } + } + + // safe to unwrap: we've only advanced as far as 'i', which is right + // before the character it identified. + let component = match self.advance_one().unwrap() { + '\\' => self.parse_escape_sequence()?, + '%' => self.parse_format_specifier()?, + _ => panic!("Stopped at unexpected character: {}", self.string), + }; + components.push(component); + } + + if !self.string.is_empty() { + components.push(FormatComponent::Literal(self.string.to_owned())); + } + + Ok(FormatString { components }) + } +} + +struct FormatString { + components: Vec, +} + +impl FormatString { + fn parse(string: &str) -> Result> { + FormatStringParser { string }.parse() + } +} + +fn get_starting_point(file_info: &walkdir::DirEntry) -> &Path { + file_info + .path() + .ancestors() + .nth(file_info.depth()) + // safe to unwrap: the file's depth should never be longer than its path + // (...right?). + .unwrap() +} + +fn format_non_link_file_type(file_type: fs::FileType) -> char { + if file_type.is_file() { + 'f' + } else if file_type.is_dir() { + 'd' + } else { + #[cfg(unix)] + if file_type.is_block_device() { + 'b' + } else if file_type.is_char_device() { + 'c' + } else if file_type.is_fifo() { + 'p' + } else if file_type.is_socket() { + 's' + } else { + 'U' + } + #[cfg(not(unix))] + 'U' + } +} + +fn format_directive<'entry>( + file_info: &'entry walkdir::DirEntry, + directive: &FormatDirective, + meta_cell: &OnceCell, +) -> Result, Box> { + let meta = || { + meta_cell.get_or_try_init(|| { + if file_info.path_is_symlink() && !file_info.file_type().is_symlink() { + // The file_info already followed the symlink, meaning that the + // metadata will be for the target file, which isn't the + // behavior we want, so manually re-compute the metadata for the + // symlink itself instead. + file_info.path().symlink_metadata() + } else { + file_info.metadata().map_err(|e| e.into()) + } + }) + }; + + // NOTE ON QUOTING: + // GNU find's man page claims that several directives that print names (like + // %f) are quoted like ls; however, I could not reproduce this at all in + // pratice, thus the set of rules is undoubtedly very different (if this is + // still done at all). + + let res: Cow<'entry, str> = match directive { + FormatDirective::AccessTime(tf) => tf.apply(meta()?.accessed()?)?, + + FormatDirective::Basename => file_info.file_name().to_string_lossy(), + + FormatDirective::Blocks { large_blocks } => { + #[cfg(unix)] + let blocks = meta()?.blocks(); + #[cfg(not(unix))] + // Estimate using a ceiling division by the block size. + let blocks = (meta()?.len() + STANDARD_BLOCK_SIZE - 1) / STANDARD_BLOCK_SIZE; + + // GNU find says it returns the number of 512-byte blocks for %b, + // but in reality it just returns the number of blocks, *regardless + // of their size on the filesystem*. That behavior is copied here, + // even though it's arguably not 100% correct. + if *large_blocks { + // Ceiling divide in half. + (blocks + 1) / 2 + } else { + blocks + } + .to_string() + .into() + } + + #[cfg(not(unix))] + FormatDirective::ChangeTime(tf) => tf.apply(meta()?.modified()?)?, + #[cfg(unix)] + FormatDirective::ChangeTime(tf) => { + let meta = meta()?; + let ctime = SystemTime::UNIX_EPOCH + + Duration::from_secs(meta.ctime() as u64) + + Duration::from_nanos(meta.ctime_nsec() as u64); + tf.apply(ctime)? + } + + FormatDirective::Depth => file_info.depth().to_string().into(), + + #[cfg(not(unix))] + FormatDirective::Device => "0".into(), + #[cfg(unix)] + FormatDirective::Device => meta()?.dev().to_string().into(), + + // GNU find's behavior for this is a bit...odd: + // - Both the root directory and the paths immediately underneath return an empty string + // - Any path without any slashes (i.e. relative to cwd) returns "." + // - "." also return "." + // - ".." returns "." (???) + // These are all (thankfully) documented on the find(1) man page. + FormatDirective::Dirname => match file_info.path().parent() { + None => "".into(), + Some(p) if p == Path::new("/") => "".into(), + Some(p) if p == Path::new("") => ".".into(), + Some(parent) => parent.to_string_lossy(), + }, + + #[cfg(not(unix))] + FormatDirective::Filesystem => "".into(), + #[cfg(unix)] + FormatDirective::Filesystem => { + let dev_id = meta()?.dev().to_string(); + uucore::fsext::read_fs_list() + .into_iter() + .find(|fs| fs.dev_id == dev_id) + .map(|fs| fs.fs_type) + .unwrap_or_else(|| "".to_owned()) + .into() + } + + #[cfg(not(unix))] + FormatDirective::Group { .. } => "0".into(), + #[cfg(unix)] + FormatDirective::Group { as_name } => { + let gid = meta()?.gid(); + if *as_name { + uucore::entries::gid2grp(gid).unwrap_or_else(|_| gid.to_string()) + } else { + gid.to_string() + } + .into() + } + + #[cfg(not(unix))] + FormatDirective::HardlinkCount => "0".into(), + #[cfg(unix)] + FormatDirective::HardlinkCount => meta()?.nlink().to_string().into(), + + #[cfg(not(unix))] + FormatDirective::Inode => "0".into(), + #[cfg(unix)] + FormatDirective::Inode => meta()?.ino().to_string().into(), + + FormatDirective::ModificationTime(tf) => tf.apply(meta()?.modified()?)?, + + FormatDirective::Path { + strip_starting_point, + } => file_info + .path() + .strip_prefix(if *strip_starting_point { + get_starting_point(file_info) + } else { + Path::new("") + }) + // safe to unwrap: the prefix is derived *from* the path to begin + // with, so it cannot be invalid. + .unwrap() + .to_string_lossy(), + + FormatDirective::Permissions(PermissionsFormat::Symbolic) => { + uucore::fs::display_permissions(meta()?, true).into() + } + #[cfg(not(unix))] + FormatDirective::Permissions(PermissionsFormat::Octal) => "777".into(), + #[cfg(unix)] + FormatDirective::Permissions(PermissionsFormat::Octal) => { + format!("{:>03o}", meta()?.mode() & 0o777).into() + } + + FormatDirective::Size => meta()?.len().to_string().into(), + + #[cfg(not(unix))] + FormatDirective::Sparseness => "1.0".into(), + #[cfg(unix)] + FormatDirective::Sparseness => { + let meta = meta()?; + + if meta.len() > 0 { + format!( + "{:.1}", + // GNU find hardcodes a block size of 512 bytes, regardless + // of the true filesystem block size. + (meta.blocks() * STANDARD_BLOCK_SIZE) as f64 / (meta.len() as f64) + ) + .into() + } else { + "1.0".into() + } + } + + FormatDirective::StartingPoint => get_starting_point(file_info).to_string_lossy(), + + FormatDirective::SymlinkTarget => { + if file_info.path_is_symlink() { + fs::read_link(file_info.path())? + .to_string_lossy() + .into_owned() + .into() + } else { + "".into() + } + } + + FormatDirective::Type { follow_links } => if file_info.path_is_symlink() { + if *follow_links { + match file_info.path().metadata() { + Ok(meta) => format_non_link_file_type(meta.file_type()), + Err(e) if e.kind() == std::io::ErrorKind::NotFound => 'N', + // The ErrorKinds corresponding to ELOOP and ENOTDIR are + // nightly-only: + // https://doc.rust-lang.org/std/io/enum.ErrorKind.html#variant.FilesystemLoop + // so we need to use the raw errno values instead. + #[cfg(unix)] + Err(e) if e.raw_os_error().unwrap_or(0) == uucore::libc::ENOTDIR => 'N', + #[cfg(unix)] + Err(e) if e.raw_os_error().unwrap_or(0) == uucore::libc::ELOOP => 'L', + Err(_) => '?', + } + } else { + 'l' + } + } else { + format_non_link_file_type(file_info.file_type()) + } + .to_string() + .into(), + + #[cfg(not(unix))] + FormatDirective::User { .. } => "0".into(), + #[cfg(unix)] + FormatDirective::User { as_name } => { + let uid = meta()?.uid(); + if *as_name { + uucore::entries::uid2usr(uid).unwrap_or_else(|_| uid.to_string()) + } else { + uid.to_string() + } + .into() + } + }; + + Ok(res) +} + +/// This matcher prints information about its files to stdout, following GNU +/// find's printf syntax. +pub struct Printf { + format: FormatString, +} + +impl Printf { + pub fn new(format: &str) -> Result> { + Ok(Printf { + format: FormatString::parse(format)?, + }) + } + + pub fn new_box(format: &str) -> Result, Box> { + Ok(Box::new(Printf::new(format)?)) + } +} + +impl Matcher for Printf { + fn matches(&self, file_info: &walkdir::DirEntry, matcher_io: &mut MatcherIO) -> bool { + let mut out = matcher_io.deps.get_output().borrow_mut(); + // The metadata is computed lazily, so that anything being printed + // without needing metadata won't incur any performance overhead. + let meta_cell = OnceCell::new(); + + for component in &self.format.components { + match component { + FormatComponent::Literal(literal) => write!(out, "{}", literal).unwrap(), + FormatComponent::Flush => out.flush().unwrap(), + FormatComponent::Directive { + directive, + width, + justify, + } => match format_directive(file_info, directive, &meta_cell) { + Ok(content) => { + if let Some(width) = width { + match justify { + Justify::Left => { + write!(out, "{: { + write!(out, "{:>width$}", content, width = width).unwrap(); + } + } + } else { + write!(out, "{}", content).unwrap(); + } + } + Err(e) => { + eprintln!( + "Error processing '{}': {}", + file_info.path().to_string_lossy(), + e + ); + break; + } + }, + } + } + + true + } + + fn has_side_effects(&self) -> bool { + true + } +} + +#[cfg(test)] +mod tests { + use std::io::ErrorKind; + + use super::*; + use crate::find::matchers::tests::get_dir_entry_for; + use crate::find::matchers::Matcher; + use crate::find::tests::fix_up_slashes; + use crate::find::tests::FakeDependencies; + + #[cfg(unix)] + use std::os::unix::fs::{symlink, PermissionsExt}; + #[cfg(unix)] + use tempfile::Builder; + + #[cfg(windows)] + use std::os::windows::fs::{symlink_dir, symlink_file}; + + #[test] + fn test_parse_basics() { + assert_eq!(FormatString::parse("").unwrap().components, vec![]); + assert_eq!( + FormatString::parse("test stuff").unwrap().components, + vec![FormatComponent::Literal("test stuff".to_owned()),] + ); + } + + #[test] + fn test_parse_escapes() { + assert_eq!( + FormatString::parse("abc\\0\\t\\n\\\\\\141de\\cf") + .unwrap() + .components, + vec![ + FormatComponent::Literal("abc".to_owned()), + FormatComponent::Literal("\0".to_owned()), + FormatComponent::Literal("\t".to_owned()), + FormatComponent::Literal("\n".to_owned()), + FormatComponent::Literal("\\".to_owned()), + FormatComponent::Literal("a".to_owned()), + FormatComponent::Literal("de".to_owned()), + FormatComponent::Flush, + FormatComponent::Literal("f".to_owned()) + ] + ); + + assert!(FormatString::parse("\\X").is_err()); + assert!(FormatString::parse("\\").is_err()); + } + + #[test] + fn test_parse_formatting() { + fn unaligned_directive(directive: FormatDirective) -> FormatComponent { + FormatComponent::Directive { + directive, + width: None, + justify: Justify::Right, + } + } + + assert_eq!( + FormatString::parse("%%%a%A@%Ak%b%c%C@%CH%dTEST%f%F%g%G%h%H") + .unwrap() + .components, + vec![ + FormatComponent::Literal("%".to_owned()), + unaligned_directive(FormatDirective::AccessTime(TimeFormat::Ctime)), + unaligned_directive(FormatDirective::AccessTime(TimeFormat::SinceEpoch)), + unaligned_directive(FormatDirective::AccessTime(TimeFormat::Strftime('k'))), + unaligned_directive(FormatDirective::Blocks { + large_blocks: false + }), + unaligned_directive(FormatDirective::ChangeTime(TimeFormat::Ctime)), + unaligned_directive(FormatDirective::ChangeTime(TimeFormat::SinceEpoch)), + unaligned_directive(FormatDirective::ChangeTime(TimeFormat::Strftime('H'))), + unaligned_directive(FormatDirective::Depth), + FormatComponent::Literal("TEST".to_owned()), + unaligned_directive(FormatDirective::Basename), + unaligned_directive(FormatDirective::Filesystem), + unaligned_directive(FormatDirective::Group { as_name: true }), + unaligned_directive(FormatDirective::Group { as_name: false }), + unaligned_directive(FormatDirective::Dirname), + unaligned_directive(FormatDirective::StartingPoint), + ] + ); + + assert_eq!( + FormatString::parse("%i%k%l%m%M%n%p%P%s%S%t%T@%Td%u%U%y%Y") + .unwrap() + .components, + vec![ + unaligned_directive(FormatDirective::Inode), + unaligned_directive(FormatDirective::Blocks { large_blocks: true }), + unaligned_directive(FormatDirective::SymlinkTarget), + unaligned_directive(FormatDirective::Permissions(PermissionsFormat::Octal)), + unaligned_directive(FormatDirective::Permissions(PermissionsFormat::Symbolic)), + unaligned_directive(FormatDirective::HardlinkCount), + unaligned_directive(FormatDirective::Path { + strip_starting_point: false + }), + unaligned_directive(FormatDirective::Path { + strip_starting_point: true + }), + unaligned_directive(FormatDirective::Size), + unaligned_directive(FormatDirective::Sparseness), + unaligned_directive(FormatDirective::ModificationTime(TimeFormat::Ctime)), + unaligned_directive(FormatDirective::ModificationTime(TimeFormat::SinceEpoch)), + unaligned_directive(FormatDirective::ModificationTime(TimeFormat::Strftime('d'))), + unaligned_directive(FormatDirective::User { as_name: true }), + unaligned_directive(FormatDirective::User { as_name: false }), + unaligned_directive(FormatDirective::Type { + follow_links: false + }), + unaligned_directive(FormatDirective::Type { follow_links: true }), + ] + ); + + assert!(FormatString::parse("%").is_err()); + assert!(FormatString::parse("%A!").is_err()); + } + + #[test] + fn test_parse_formatting_justified() { + assert_eq!( + FormatString::parse("%d%-s%5S%-12n% 3f% -- 4i") + .unwrap() + .components, + vec![ + FormatComponent::Directive { + directive: FormatDirective::Depth, + justify: Justify::Right, + width: None + }, + FormatComponent::Directive { + directive: FormatDirective::Size, + justify: Justify::Left, + width: None + }, + FormatComponent::Directive { + directive: FormatDirective::Sparseness, + justify: Justify::Right, + width: Some(5) + }, + FormatComponent::Directive { + directive: FormatDirective::HardlinkCount, + justify: Justify::Left, + width: Some(12) + }, + FormatComponent::Directive { + directive: FormatDirective::Basename, + justify: Justify::Right, + width: Some(3) + }, + FormatComponent::Directive { + directive: FormatDirective::Inode, + justify: Justify::Left, + width: Some(4) + }, + ] + ); + } + + #[test] + fn test_printf_justified() { + let file_info = get_dir_entry_for("test_data/simple", "abbbc"); + let deps = FakeDependencies::new(); + + let matcher = Printf::new("%f,%7f,%-7f").unwrap(); + assert!(matcher.matches(&file_info, &mut deps.new_matcher_io())); + assert_eq!("abbbc, abbbc,abbbc ", deps.get_output_as_string()); + } + + #[test] + fn test_printf_paths() { + let file_info = get_dir_entry_for("test_data/simple", "abbbc"); + let deps = FakeDependencies::new(); + + let matcher = Printf::new("%h %H %p %P").unwrap(); + assert!(matcher.matches(&file_info, &mut deps.new_matcher_io())); + assert_eq!( + format!( + "{} {} {} {}", + fix_up_slashes("test_data/simple"), + fix_up_slashes("test_data/simple"), + fix_up_slashes("test_data/simple/abbbc"), + fix_up_slashes("abbbc") + ), + deps.get_output_as_string() + ); + } + + #[test] + fn test_printf_paths_in_subdir() { + let file_info = get_dir_entry_for("test_data/simple", "subdir/ABBBC"); + let deps = FakeDependencies::new(); + + let matcher = Printf::new("%h %H %p %P").unwrap(); + assert!(matcher.matches(&file_info, &mut deps.new_matcher_io())); + assert_eq!( + format!( + "{} {} {} {}", + fix_up_slashes("test_data/simple/subdir"), + fix_up_slashes("test_data/simple"), + fix_up_slashes("test_data/simple/subdir/ABBBC"), + fix_up_slashes("subdir/ABBBC") + ), + deps.get_output_as_string() + ); + } + + #[test] + fn test_printf_depth() { + let file_info_1 = get_dir_entry_for("test_data/depth/1", "f1"); + let file_info_2 = get_dir_entry_for("test_data/depth/1", "2/f2"); + let deps = FakeDependencies::new(); + + let matcher = Printf::new("%d.").unwrap(); + assert!(matcher.matches(&file_info_1, &mut deps.new_matcher_io())); + assert!(matcher.matches(&file_info_2, &mut deps.new_matcher_io())); + assert_eq!("1.2.", deps.get_output_as_string()); + } + + #[test] + fn test_printf_basic_types() { + let file_info_f = get_dir_entry_for("test_data/simple", "abbbc"); + let file_info_d = get_dir_entry_for("test_data/simple", "subdir"); + let deps = FakeDependencies::new(); + + let matcher = Printf::new("%y").unwrap(); + assert!(matcher.matches(&file_info_f, &mut deps.new_matcher_io())); + assert!(matcher.matches(&file_info_d, &mut deps.new_matcher_io())); + assert_eq!("fd", deps.get_output_as_string()); + } + + #[test] + fn test_printf_symlinks() { + #[cfg(unix)] + { + if let Err(e) = symlink("abbbc", "test_data/links/link-f") { + if e.kind() != ErrorKind::AlreadyExists { + panic!("Failed to create sym link: {:?}", e); + } + } + if let Err(e) = symlink("subdir", "test_data/links/link-d") { + if e.kind() != ErrorKind::AlreadyExists { + panic!("Failed to create sym link: {:?}", e); + } + } + if let Err(e) = symlink("missing", "test_data/links/link-missing") { + if e.kind() != ErrorKind::AlreadyExists { + panic!("Failed to create sym link: {:?}", e); + } + } + if let Err(e) = symlink("abbbc/x", "test_data/links/link-notdir") { + if e.kind() != ErrorKind::AlreadyExists { + panic!("Failed to create sym link: {:?}", e); + } + } + if let Err(e) = symlink("link-loop", "test_data/links/link-loop") { + if e.kind() != ErrorKind::AlreadyExists { + panic!("Failed to create sym link: {:?}", e); + } + } + } + #[cfg(windows)] + { + if let Err(e) = symlink_file("abbbc", "test_data/links/link-f") { + if e.kind() != ErrorKind::AlreadyExists { + panic!("Failed to create sym link: {:?}", e); + } + } + if let Err(e) = symlink_dir("subdir", "test_data/links/link-d") { + if e.kind() != ErrorKind::AlreadyExists { + panic!("Failed to create sym link: {:?}", e); + } + } + if let Err(e) = symlink_file("missing", "test_data/links/link-missing") { + if e.kind() != ErrorKind::AlreadyExists { + panic!("Failed to create sym link: {:?}", e); + } + } + if let Err(e) = symlink_file("abbbc/x", "test_data/links/link-notdir") { + if e.kind() != ErrorKind::AlreadyExists { + panic!("Failed to create sym link: {:?}", e); + } + } + } + + let link_f = get_dir_entry_for("test_data/links", "link-f"); + let link_d = get_dir_entry_for("test_data/links", "link-d"); + let link_missing = get_dir_entry_for("test_data/links", "link-missing"); + let link_notdir = get_dir_entry_for("test_data/links", "link-notdir"); + #[cfg(unix)] + let link_loop = get_dir_entry_for("test_data/links", "link-loop"); + + let deps = FakeDependencies::new(); + + let matcher = Printf::new("%y %Y %l\n").unwrap(); + assert!(matcher.matches(&link_f, &mut deps.new_matcher_io())); + assert!(matcher.matches(&link_d, &mut deps.new_matcher_io())); + assert!(matcher.matches(&link_missing, &mut deps.new_matcher_io())); + assert!(matcher.matches(&link_notdir, &mut deps.new_matcher_io())); + #[cfg(unix)] + assert!(matcher.matches(&link_loop, &mut deps.new_matcher_io())); + assert_eq!( + vec![ + "l f abbbc", + "l d subdir", + "l N missing", + // We can't detect ENOTDIR on non-unix platforms yet. + #[cfg(not(unix))] + "l ? abbbc/x", + #[cfg(unix)] + "l N abbbc/x", + #[cfg(unix)] + "l L link-loop", + ], + deps.get_output_as_string().lines().collect::>() + ); + } + + #[test] + #[cfg(unix)] + fn test_printf_user_group() { + use std::fs::File; + + let temp_dir = Builder::new().prefix("example").tempdir().unwrap(); + let temp_dir_path = temp_dir.path().to_string_lossy(); + let new_file_name = "newFile"; + File::create(temp_dir.path().join(new_file_name)).expect("create temp file"); + + let file_info = get_dir_entry_for(&temp_dir_path, new_file_name); + let deps = FakeDependencies::new(); + + let matcher = Printf::new("%U %G").unwrap(); + assert!(matcher.matches(&file_info, &mut deps.new_matcher_io())); + assert_eq!( + format!("{} {}", unsafe { uucore::libc::getuid() }, unsafe { + uucore::libc::getgid() + }), + deps.get_output_as_string() + ); + } + + #[test] + #[cfg(unix)] + fn test_printf_permissions() { + use std::fs::File; + + let temp_dir = Builder::new().prefix("example").tempdir().unwrap(); + let temp_dir_path = temp_dir.path().to_string_lossy(); + let new_file_name = "newFile"; + let file = File::create(temp_dir.path().join(new_file_name)).expect("create temp file"); + + let file_info = get_dir_entry_for(&temp_dir_path, new_file_name); + let deps = FakeDependencies::new(); + + let mut perms = file_info.metadata().unwrap().permissions(); + perms.set_mode(0o755); + file.set_permissions(perms).unwrap(); + + let matcher = Printf::new("%m %M").unwrap(); + assert!(matcher.matches(&file_info, &mut deps.new_matcher_io())); + assert_eq!("755 -rwxr-xr-x", deps.get_output_as_string()); + } +} diff --git a/src/find/mod.rs b/src/find/mod.rs index 3e36245c..813fbb28 100644 --- a/src/find/mod.rs +++ b/src/find/mod.rs @@ -172,6 +172,7 @@ If no path is supplied then the current working directory is used by default. Early alpha implementation. Currently the only expressions supported are -print + -printf -name case-sensitive_filename_pattern -iname case-insensitive_filename_pattern -type type_char diff --git a/tests/find_cmd_tests.rs b/tests/find_cmd_tests.rs index 5f32a138..0fc94dc3 100644 --- a/tests/find_cmd_tests.rs +++ b/tests/find_cmd_tests.rs @@ -11,10 +11,20 @@ use assert_cmd::Command; use predicates::prelude::*; use serial_test::serial; -use std::env; use std::fs::File; +use std::{env, io::ErrorKind}; use tempfile::Builder; +#[cfg(unix)] +use std::os::unix::fs::symlink; + +#[cfg(windows)] +use std::os::windows::fs::{symlink_dir, symlink_file}; + +use common::test_helpers::*; + +mod common; + #[serial(working_dir)] #[test] fn no_args() { @@ -115,3 +125,110 @@ fn delete_on_dot_dir() { assert!(temp_dir.path().exists(), "temp dir should still exist"); } + +#[serial(working_dir)] +#[test] +fn find_printf() { + #[cfg(unix)] + { + if let Err(e) = symlink("abbbc", "test_data/links/link-f") { + if e.kind() != ErrorKind::AlreadyExists { + panic!("Failed to create sym link: {:?}", e); + } + } + if let Err(e) = symlink("subdir", "test_data/links/link-d") { + if e.kind() != ErrorKind::AlreadyExists { + panic!("Failed to create sym link: {:?}", e); + } + } + if let Err(e) = symlink("missing", "test_data/links/link-missing") { + if e.kind() != ErrorKind::AlreadyExists { + panic!("Failed to create sym link: {:?}", e); + } + } + if let Err(e) = symlink("abbbc/x", "test_data/links/link-notdir") { + if e.kind() != ErrorKind::AlreadyExists { + panic!("Failed to create sym link: {:?}", e); + } + } + if let Err(e) = symlink("link-loop", "test_data/links/link-loop") { + if e.kind() != ErrorKind::AlreadyExists { + panic!("Failed to create sym link: {:?}", e); + } + } + } + #[cfg(windows)] + { + if let Err(e) = symlink_file("abbbc", "test_data/links/link-f") { + if e.kind() != ErrorKind::AlreadyExists { + panic!("Failed to create sym link: {:?}", e); + } + } + if let Err(e) = symlink_dir("subdir", "test_data/links/link-d") { + if e.kind() != ErrorKind::AlreadyExists { + panic!("Failed to create sym link: {:?}", e); + } + } + if let Err(e) = symlink_file("missing", "test_data/links/link-missing") { + if e.kind() != ErrorKind::AlreadyExists { + panic!("Failed to create sym link: {:?}", e); + } + } + if let Err(e) = symlink_file("abbbc/x", "test_data/links/link-notdir") { + if e.kind() != ErrorKind::AlreadyExists { + panic!("Failed to create sym link: {:?}", e); + } + } + } + + Command::cargo_bin("find") + .expect("found binary") + .args(&[ + &fix_up_slashes("./test_data/simple"), + "-sorted", + "-printf", + "%f %d %h %H %p %P %y\n", + ]) + .assert() + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::diff(fix_up_slashes( + "simple 0 ./test_data ./test_data/simple \ + ./test_data/simple d\n\ + abbbc 1 ./test_data/simple ./test_data/simple \ + ./test_data/simple/abbbc abbbc f\n\ + subdir 1 ./test_data/simple ./test_data/simple \ + ./test_data/simple/subdir subdir d\n\ + ABBBC 2 ./test_data/simple/subdir ./test_data/simple \ + ./test_data/simple/subdir/ABBBC subdir/ABBBC f\n", + ))); + + Command::cargo_bin("find") + .expect("found binary") + .args(&[ + &fix_up_slashes("./test_data/links"), + "-sorted", + "-type", + "l", + "-printf", + "%f %l %y %Y\n", + ]) + .assert() + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::diff( + [ + "link-d subdir l d\n", + "link-f abbbc l f\n", + #[cfg(unix)] + "link-loop link-loop l L\n", + "link-missing missing l N\n", + // We can't detect ENOTDIR on non-unix platforms yet. + #[cfg(not(unix))] + "link-notdir abbbc/x l ?\n", + #[cfg(unix)] + "link-notdir abbbc/x l N\n", + ] + .join(""), + )); +}