diff --git a/CHANGELOG.md b/CHANGELOG.md index 420fac96..4f92a786 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,6 +51,7 @@ This name should be decided amongst the team before the release. - [#747](https://github.com/tweag/topiary/pull/747) Added support for specifying paths to prebuilt grammars in Topiary's configuration - [#832](https://github.com/tweag/topiary/pull/832) Added `typos-cli` to workspace `Cargo.toml` for spellchecking @mkatychev - [#845](https://github.com/tweag/topiary/pull/747) Added support for OpenSCAD, @mkatychev +- [#851](https://github.com/tweag/topiary/pull/851) Added support for following symlinks when specifying input files for formatting ### Changed - [#794](https://github.com/tweag/topiary/pull/794) Bump the `tree-sitter` dependency to 0.24, thanks to @ZedThree diff --git a/README.md b/README.md index e60cba19..97ec3fc4 100644 --- a/README.md +++ b/README.md @@ -223,11 +223,14 @@ Options: Do not check that formatting twice gives the same output -l, --language - Topiary language identifier (for formatting stdin) + Topiary language identifier (when formatting stdin) -q, --query Topiary query file override (when formatting stdin) + -L, --follow-symlinks + Follow symlinks (when formatting files) + -C, --configuration Configuration file @@ -249,7 +252,33 @@ the input files' extensions. To format standard input, you must specify the `--language` and, optionally, `--query` arguments, omitting any input files. -Note: `fmt` is a recognised alias of the `format` subcommand. +> [!TIP] +> `fmt` is a recognised alias of the `format` subcommand. + +> [!TIP] +> Topiary will not accept a process substitution (or any other named +> pipe) as formatting input. Instead, arrange for a redirection into +> Topiary's standard input: +> +> ```bash +> # This won't work +> topiary format <(some_command) +> +> # Do this instead +> some_command | topiary format --language LANGUAGE +> ``` + +> [!NOTE] +> Topiary will skip over some input files under certain conditions, +> which are logged at varying levels: +> +> | Condition | Level | +> | :-------------------------------------------- | :------ | +> | Cannot access file | Error | +> | Not a regular file (e.g., FIFO, socket, etc.) | Warning | +> | A symlink without `--follow-symlinks` | Warning | +> | File with multiple (hard) links | Error | +> | File does not exist (e.g., broken symlink) | Error | #### Visualise @@ -287,7 +316,7 @@ Options: - json: JSON serialisation -l, --language - Topiary language identifier (for formatting stdin) + Topiary language identifier (when formatting stdin) -q, --query Topiary query file override (when formatting stdin) @@ -313,8 +342,9 @@ the input file's extension. To visualise standard input, you must specify the `--language` and, optionally, `--query` arguments, omitting the input file. The visualisation output is written to standard out. -Note: `vis`, `visualize` and `view` are recognised aliases of the -`visualise` subcommand. +> [!TIP] +> `vis`, `visualize` and `view` are recognised aliases of the +> `visualise` subcommand. #### Configuration @@ -333,7 +363,8 @@ Options: ``` -Note: `cfg` is a recognised alias of the `config` subcommand. +> [!TIP] +> `cfg` is a recognised alias of the `config` subcommand. #### Shell Completion @@ -410,7 +441,7 @@ Arguments: Options: -l, --language - Topiary language identifier (for formatting stdin) + Topiary language identifier (when formatting stdin) -q, --query Topiary query file override (when formatting stdin) @@ -1662,18 +1693,20 @@ suggested way to work: CST output that starts with a line like this: `CST node: {Node compilation_unit (0, 0) - (5942, 0)} - Named: true`. - :bulb: As an alternative to using the debugging output, the - `vis` visualisation subcommand line option exists to output the - Tree-sitter syntax tree in a variety of formats. +> [!TIP] +> As an alternative to using the debugging output, the `vis` +> visualisation subcommand line option exists to output the Tree-sitter +> syntax tree in a variety of formats. 5. The test run will output all the differences between the actual output and the expected output, e.g. missing spaces between tokens. Pick a difference you would like to fix, and find the line number and column in the input file. - :bulb: Keep in mind that the CST output uses 0-based line and column - numbers, so if your editor reports line 40, column 37, you probably - want line 39, column 36. +> [!NOTE] +> Keep in mind that the CST output uses 0-based line and column numbers, +> so if your editor reports line 40, column 37, you probably want line +> 39, column 36. 6. In the CST debug or visualisation output, find the nodes in this region, such as the following: diff --git a/topiary-cli/src/cli.rs b/topiary-cli/src/cli.rs index 0798a070..badb6ed8 100644 --- a/topiary-cli/src/cli.rs +++ b/topiary-cli/src/cli.rs @@ -8,7 +8,7 @@ use log::LevelFilter; use crate::{ error::{CLIResult, TopiaryError}, - visualisation, + fs, visualisation, }; #[derive(Debug, Parser)] @@ -58,7 +58,7 @@ pub struct GlobalArgs { // NOTE This abstraction is largely to workaround clap-rs/clap#4707 #[derive(Args, Debug)] pub struct FromStdin { - /// Topiary language identifier (for formatting stdin) + /// Topiary language identifier (when formatting stdin) #[arg(short, long)] pub language: String, @@ -109,6 +109,10 @@ pub struct AtLeastOneInput { /// Language detection and query selection is automatic, mapped from file extensions defined in /// the Topiary configuration. pub files: Vec, + + /// Follow symlinks (when formatting files) + #[arg(short = 'L', long)] + pub follow_symlinks: bool, } // NOTE When changing the subcommands, please update verify-documented-usage.sh respectively. @@ -171,24 +175,6 @@ pub enum Commands { }, } -/// Given a vector of paths, recursively expand those that identify as directories, in place -fn traverse_fs(files: &mut Vec) -> CLIResult<()> { - let mut expanded = vec![]; - - for file in &mut *files { - if file.is_dir() { - let mut subfiles = file.read_dir()?.flatten().map(|f| f.path()).collect(); - traverse_fs(&mut subfiles)?; - expanded.append(&mut subfiles); - } else { - expanded.push(file.to_path_buf()); - } - } - - *files = expanded; - Ok(()) -} - /// Parse CLI arguments and normalise them for the caller pub fn get_args() -> CLIResult { let mut args = Cli::parse(); @@ -217,17 +203,23 @@ pub fn get_args() -> CLIResult { match &mut args.command { Commands::Format { - inputs: AtLeastOneInput { files, .. }, + inputs: + AtLeastOneInput { + files, + follow_symlinks, + .. + }, .. } => { // If we're given a list of FILES... then we assume them to all be on disk, even if "-" // is passed as an argument (i.e., interpret this as a valid filename, rather than as - // stdin). We deduplicate this list to avoid formatting the same file multiple times - // and recursively expand directories until we're left with a list of unique - // (potential) files as input sources. + // stdin). We recursively expand directories until we're left with a list of + // (potential) files, as input sources. This is finally deduplicated to avoid + // formatting the same file multiple times (e.g., in the case that a symlink points to + // a file within the set, or if the same file is specified twice at the command line). + fs::traverse(files, *follow_symlinks)?; files.sort_unstable(); files.dedup(); - traverse_fs(files)?; } Commands::Visualise { diff --git a/topiary-cli/src/fs.rs b/topiary-cli/src/fs.rs new file mode 100644 index 00000000..d9974d15 --- /dev/null +++ b/topiary-cli/src/fs.rs @@ -0,0 +1,167 @@ +use crate::error::CLIResult; +use std::{ + fs, + path::{Path, PathBuf}, +}; + +#[cfg(unix)] +use std::os::unix::fs::MetadataExt; + +// TODO See FileMeta::new, below +// #[cfg(windows)] +// use std::os::windows::fs::MetadataExt; + +enum FileType { + /// Regular file + File, + + /// Directory + Directory, + + /// Something else, which we don't care about + /// (e.g., FIFOs, sockets, etc.) + SomethingElse, +} + +#[allow(dead_code)] +enum Hardlink { + // We know there's at least one link, but cannot say more than that + AtLeastOne, + + // We know exactly how many links there are + // NOTE While technically possible, I would worry about a file that had 2^64 links + Exactly(u64), +} + +struct FileMeta { + filetype: FileType, + symlink: bool, + hardlink: Hardlink, +} + +impl FileMeta { + fn new>(path: &P) -> CLIResult { + // Stat a potential symlink + let lmeta = fs::symlink_metadata(path)?; + let symlink = lmeta.is_symlink(); + + // Follow the symlink, if necessary + let meta = if symlink { fs::metadata(path)? } else { lmeta }; + + let filetype = { + if meta.is_file() { + FileType::File + } else if meta.is_dir() { + FileType::Directory + } else { + FileType::SomethingElse + } + }; + + #[cfg(unix)] + let hardlink = Hardlink::Exactly(meta.nlink()); + + // TODO Windows has fs::MetadataExt::number_of_links, but this is experimental as of + // writing (see https://github.com/rust-lang/rust/issues/63010) + #[cfg(windows)] + let hardlink = Hardlink::AtLeastOne; + + // Everything else + #[cfg(not(any(unix, windows)))] + let hardlink = Hardlink::AtLeastOne; + + Ok(Self { + filetype, + symlink, + hardlink, + }) + } + + fn ignore(&self) -> bool { + matches!(self.filetype, FileType::SomethingElse) + } + + fn is_dir(&self) -> bool { + matches!(self.filetype, FileType::Directory) + } + + fn is_file(&self) -> bool { + matches!(self.filetype, FileType::File) + } + + fn is_symlink(&self) -> bool { + self.symlink + } + + fn has_multiple_links(&self) -> bool { + matches!(self.hardlink, Hardlink::Exactly(n) if n > 1) + } +} + +/// Given a vector of paths, recursively expand those that identify as directories, in place. +/// Follow symlinks, if specified, and skip over files with multiple links. Ultimately, we'll +/// finish with a vector of canonical paths to real files with a single link. +pub fn traverse(files: &mut Vec, follow_symlinks: bool) -> CLIResult<()> { + let mut expanded = vec![]; + + for file in &mut *files { + // Using FileMeta means we, at most, stat each file twice + let meta = match FileMeta::new(file) { + Ok(meta) => meta, + Err(_) => { + log::error!("Skipping {}: Cannot access", file.to_string_lossy()); + continue; + } + }; + + // Skip over everything we don't care about + if meta.ignore() { + // This isn't such an important message, so only warn + log::warn!("Skipping {}: Not a regular file", file.to_string_lossy()); + continue; + } + + let is_dir = if follow_symlinks { + meta.is_dir() + } else { + meta.is_dir() && !meta.is_symlink() + }; + + if is_dir { + // Descend into directory, symlink-aware as required + let mut subfiles = file.read_dir()?.flatten().map(|f| f.path()).collect(); + traverse(&mut subfiles, follow_symlinks)?; + expanded.append(&mut subfiles); + } else if meta.is_file() { + if meta.is_symlink() && !follow_symlinks { + // This isn't such an important message, so only warn + log::warn!( + "Skipping {}: File is a symlink; use --follow-symlinks to follow", + file.to_string_lossy() + ); + continue; + } + + if meta.has_multiple_links() { + log::error!( + "Skipping {}: File has multiple links, which Topiary would break", + file.to_string_lossy() + ); + continue; + } + + // Only push the file if the canonicalisation succeeds (i.e., skip broken symlinks) + if let Ok(candidate) = file.canonicalize() { + expanded.push(candidate); + } else { + log::error!( + "Skipping {}: File does not exist (e.g., broken symlink)", + file.to_string_lossy() + ); + } + } + } + + *files = expanded; + Ok(()) +} diff --git a/topiary-cli/src/main.rs b/topiary-cli/src/main.rs index 67f499ba..1fca6030 100644 --- a/topiary-cli/src/main.rs +++ b/topiary-cli/src/main.rs @@ -1,5 +1,6 @@ mod cli; mod error; +mod fs; mod io; mod language; mod visualisation;