Skip to content

Commit 56c9be3

Browse files
authored
Merge pull request #8836 from cakebaker/unexpand_non_utf8_filenames
unexpand: add support for non-utf8 filenames
2 parents c6f646f + 802e25c commit 56c9be3

File tree

2 files changed

+42
-23
lines changed

2 files changed

+42
-23
lines changed

src/uu/unexpand/src/unexpand.rs

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
// spell-checker:ignore (ToDO) nums aflag uflag scol prevtab amode ctype cwidth nbytes lastcol pctype Preprocess
77

88
use clap::{Arg, ArgAction, Command};
9+
use std::ffi::OsString;
910
use std::fs::File;
1011
use std::io::{BufRead, BufReader, BufWriter, Read, Stdout, Write, stdin, stdout};
1112
use std::num::IntErrorKind;
@@ -76,7 +77,7 @@ mod options {
7677
}
7778

7879
struct Options {
79-
files: Vec<String>,
80+
files: Vec<OsString>,
8081
tabstops: Vec<usize>,
8182
aflag: bool,
8283
uflag: bool,
@@ -93,9 +94,9 @@ impl Options {
9394
&& !matches.get_flag(options::FIRST_ONLY);
9495
let uflag = !matches.get_flag(options::NO_UTF8);
9596

96-
let files = match matches.get_many::<String>(options::FILE) {
97+
let files = match matches.get_many::<OsString>(options::FILE) {
9798
Some(v) => v.cloned().collect(),
98-
None => vec!["-".to_owned()],
99+
None => vec![OsString::from("-")],
99100
};
100101

101102
Ok(Self {
@@ -115,24 +116,28 @@ fn is_digit_or_comma(c: char) -> bool {
115116
/// Preprocess command line arguments and expand shortcuts. For example, "-7" is expanded to
116117
/// "--tabs=7 --first-only" and "-1,3" to "--tabs=1 --tabs=3 --first-only". However, if "-a" or
117118
/// "--all" is provided, "--first-only" is omitted.
118-
fn expand_shortcuts(args: &[String]) -> Vec<String> {
119+
fn expand_shortcuts(args: Vec<OsString>) -> Vec<OsString> {
119120
let mut processed_args = Vec::with_capacity(args.len());
120121
let mut is_all_arg_provided = false;
121122
let mut has_shortcuts = false;
122123

123124
for arg in args {
124-
if arg.starts_with('-') && arg[1..].chars().all(is_digit_or_comma) {
125-
arg[1..]
126-
.split(',')
127-
.filter(|s| !s.is_empty())
128-
.for_each(|s| processed_args.push(format!("--tabs={s}")));
129-
has_shortcuts = true;
130-
} else {
131-
processed_args.push(arg.to_string());
132-
133-
if arg == "--all" || arg == "-a" {
134-
is_all_arg_provided = true;
125+
if let Some(arg) = arg.to_str() {
126+
if arg.starts_with('-') && arg[1..].chars().all(is_digit_or_comma) {
127+
arg[1..]
128+
.split(',')
129+
.filter(|s| !s.is_empty())
130+
.for_each(|s| processed_args.push(OsString::from(format!("--tabs={s}"))));
131+
has_shortcuts = true;
132+
} else {
133+
processed_args.push(arg.into());
134+
135+
if arg == "--all" || arg == "-a" {
136+
is_all_arg_provided = true;
137+
}
135138
}
139+
} else {
140+
processed_args.push(arg);
136141
}
137142
}
138143

@@ -145,9 +150,8 @@ fn expand_shortcuts(args: &[String]) -> Vec<String> {
145150

146151
#[uucore::main]
147152
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
148-
let args = args.collect_ignore();
149-
150-
let matches = uucore::clap_localization::handle_clap_result(uu_app(), expand_shortcuts(&args))?;
153+
let matches =
154+
uucore::clap_localization::handle_clap_result(uu_app(), expand_shortcuts(args.collect()))?;
151155

152156
unexpand(&Options::new(&matches)?)
153157
}
@@ -163,7 +167,8 @@ pub fn uu_app() -> Command {
163167
Arg::new(options::FILE)
164168
.hide(true)
165169
.action(ArgAction::Append)
166-
.value_hint(clap::ValueHint::FilePath),
170+
.value_hint(clap::ValueHint::FilePath)
171+
.value_parser(clap::value_parser!(OsString)),
167172
)
168173
.arg(
169174
Arg::new(options::ALL)
@@ -196,7 +201,7 @@ pub fn uu_app() -> Command {
196201
)
197202
}
198203

199-
fn open(path: &str) -> UResult<BufReader<Box<dyn Read + 'static>>> {
204+
fn open(path: &OsString) -> UResult<BufReader<Box<dyn Read + 'static>>> {
200205
let file_buf;
201206
let filename = Path::new(path);
202207
if filename.is_dir() {
@@ -207,7 +212,7 @@ fn open(path: &str) -> UResult<BufReader<Box<dyn Read + 'static>>> {
207212
} else if path == "-" {
208213
Ok(BufReader::new(Box::new(stdin()) as Box<dyn Read>))
209214
} else {
210-
file_buf = File::open(path).map_err_context(|| path.to_string())?;
215+
file_buf = File::open(path).map_err_context(|| path.to_string_lossy().to_string())?;
211216
Ok(BufReader::new(Box::new(file_buf) as Box<dyn Read>))
212217
}
213218
}

tests/by-util/test_unexpand.rs

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22
//
33
// For the full copyright and license information, please view the LICENSE
44
// file that was distributed with this source code.
5+
//
56
// spell-checker:ignore contenta
6-
use uutests::at_and_ucmd;
7-
use uutests::new_ucmd;
7+
8+
use uutests::{at_and_ucmd, new_ucmd};
89

910
#[test]
1011
fn test_invalid_arg() {
@@ -281,3 +282,16 @@ fn test_one_nonexisting_file() {
281282
.fails()
282283
.stderr_contains("asdf.txt: No such file or directory");
283284
}
285+
286+
#[test]
287+
#[cfg(target_os = "linux")]
288+
fn test_non_utf8_filename() {
289+
use std::os::unix::ffi::OsStringExt;
290+
291+
let (at, mut ucmd) = at_and_ucmd!();
292+
293+
let filename = std::ffi::OsString::from_vec(vec![0xFF, 0xFE]);
294+
std::fs::write(at.plus(&filename), b" a\n").unwrap();
295+
296+
ucmd.arg(&filename).succeeds().stdout_is("\ta\n");
297+
}

0 commit comments

Comments
 (0)