Skip to content

Commit f56ad61

Browse files
committed
printf: Consistently handle negative widths/precision
Also allows character constants with " instead of ', and for interpolated values with %b to use \0XXX notation for octal bytes
1 parent e20500d commit f56ad61

File tree

4 files changed

+145
-27
lines changed

4 files changed

+145
-27
lines changed

src/uucore/src/lib/features/format/argument.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ pub enum FormatArgument {
3333
pub trait ArgumentIter<'a>: Iterator<Item = &'a FormatArgument> {
3434
fn get_char(&mut self) -> u8;
3535
fn get_i64(&mut self) -> i64;
36+
/// extract an i64, returning either that value or zero for any negative value (including
37+
/// negative numbers below i64::MIN)
38+
fn get_i64_non_negative(&mut self) -> i64;
3639
fn get_u64(&mut self) -> u64;
3740
fn get_f64(&mut self) -> f64;
3841
fn get_str(&mut self) -> &'a str;
@@ -72,6 +75,19 @@ impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
7275
}
7376
}
7477

78+
fn get_i64_non_negative(&mut self) -> i64 {
79+
let Some(next) = self.next() else {
80+
return 0;
81+
};
82+
match next {
83+
FormatArgument::SignedInt(n) if !n.is_negative() => *n,
84+
FormatArgument::Unparsed(s) if !s.starts_with('-') => {
85+
extract_value(ParsedNumber::parse_i64(s), s)
86+
}
87+
_ => 0,
88+
}
89+
}
90+
7591
fn get_f64(&mut self) -> f64 {
7692
let Some(next) = self.next() else {
7793
return 0.0;
@@ -113,7 +129,7 @@ fn extract_value<T: Default>(p: Result<T, ParseError<'_, T>>, input: &str) -> T
113129
}
114130
ParseError::PartialMatch(v, rest) => {
115131
let bytes = input.as_encoded_bytes();
116-
if !bytes.is_empty() && bytes[0] == b'\'' {
132+
if !bytes.is_empty() && (bytes[0] == b'\'' || bytes[0] == b'"') {
117133
show_warning!(
118134
"{}: character(s) following character constant have been ignored",
119135
&rest,

src/uucore/src/lib/features/format/num_parser.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ impl ParsedNumber {
161161
#[allow(clippy::cognitive_complexity)]
162162
fn parse(input: &str, integral_only: bool) -> Result<Self, ParseError<'_, Self>> {
163163
// Parse the "'" prefix separately
164-
if let Some(rest) = input.strip_prefix('\'') {
164+
if let Some(rest) = input.strip_prefix(['\'', '"']) {
165165
let mut chars = rest.char_indices().fuse();
166166
let v = chars.next().map(|(_, c)| Self {
167167
base: Base::Decimal,

src/uucore/src/lib/features/format/spec.rs

Lines changed: 42 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ impl Spec {
316316
match self {
317317
Self::Char { width, align_left } => {
318318
let (width, neg_width) =
319-
resolve_asterisk_maybe_negative(*width, &mut args).unwrap_or_default();
319+
resolve_asterisk_width(*width, &mut args).unwrap_or_default();
320320
write_padded(writer, &[args.get_char()], width, *align_left || neg_width)
321321
}
322322
Self::String {
@@ -325,15 +325,15 @@ impl Spec {
325325
precision,
326326
} => {
327327
let (width, neg_width) =
328-
resolve_asterisk_maybe_negative(*width, &mut args).unwrap_or_default();
328+
resolve_asterisk_width(*width, &mut args).unwrap_or_default();
329329

330330
// GNU does do this truncation on a byte level, see for instance:
331331
// printf "%.1s" 🙃
332332
// > �
333333
// For now, we let printf panic when we truncate within a code point.
334334
// TODO: We need to not use Rust's formatting for aligning the output,
335335
// so that we can just write bytes to stdout without panicking.
336-
let precision = resolve_asterisk(*precision, &mut args);
336+
let precision = resolve_asterisk_precision(*precision, &mut args);
337337
let s = args.get_str();
338338
let truncated = match precision {
339339
Some(p) if p < s.len() => &s[..p],
@@ -349,7 +349,7 @@ impl Spec {
349349
Self::EscapedString => {
350350
let s = args.get_str();
351351
let mut parsed = Vec::new();
352-
for c in parse_escape_only(s.as_bytes(), OctalParsing::default()) {
352+
for c in parse_escape_only(s.as_bytes(), OctalParsing::ThreeDigits) {
353353
match c.write(&mut parsed)? {
354354
ControlFlow::Continue(()) => {}
355355
ControlFlow::Break(()) => {
@@ -382,8 +382,10 @@ impl Spec {
382382
positive_sign,
383383
alignment,
384384
} => {
385-
let width = resolve_asterisk(*width, &mut args).unwrap_or(0);
386-
let precision = resolve_asterisk(*precision, &mut args).unwrap_or(0);
385+
let (width, neg_width) =
386+
resolve_asterisk_width(*width, &mut args).unwrap_or((0, false));
387+
let precision =
388+
resolve_asterisk_precision(*precision, &mut args).unwrap_or_default();
387389
let i = args.get_i64();
388390

389391
if precision as u64 > i32::MAX as u64 {
@@ -394,7 +396,11 @@ impl Spec {
394396
width,
395397
precision,
396398
positive_sign: *positive_sign,
397-
alignment: *alignment,
399+
alignment: if neg_width {
400+
NumberAlignment::Left
401+
} else {
402+
*alignment
403+
},
398404
}
399405
.fmt(writer, i)
400406
.map_err(FormatError::IoError)
@@ -405,8 +411,10 @@ impl Spec {
405411
precision,
406412
alignment,
407413
} => {
408-
let width = resolve_asterisk(*width, &mut args).unwrap_or(0);
409-
let precision = resolve_asterisk(*precision, &mut args).unwrap_or(0);
414+
let (width, neg_width) =
415+
resolve_asterisk_width(*width, &mut args).unwrap_or((0, false));
416+
let precision =
417+
resolve_asterisk_precision(*precision, &mut args).unwrap_or_default();
410418
let i = args.get_u64();
411419

412420
if precision as u64 > i32::MAX as u64 {
@@ -417,7 +425,11 @@ impl Spec {
417425
variant: *variant,
418426
precision,
419427
width,
420-
alignment: *alignment,
428+
alignment: if neg_width {
429+
NumberAlignment::Left
430+
} else {
431+
*alignment
432+
},
421433
}
422434
.fmt(writer, i)
423435
.map_err(FormatError::IoError)
@@ -431,8 +443,9 @@ impl Spec {
431443
alignment,
432444
precision,
433445
} => {
434-
let width = resolve_asterisk(*width, &mut args).unwrap_or(0);
435-
let precision = resolve_asterisk(*precision, &mut args).unwrap_or(6);
446+
let (width, neg_width) =
447+
resolve_asterisk_width(*width, &mut args).unwrap_or((0, false));
448+
let precision = resolve_asterisk_precision(*precision, &mut args).unwrap_or(6);
436449
// TODO: We should implement some get_extendedBigDecimal function in args to avoid losing precision.
437450
let f: ExtendedBigDecimal = args.get_f64().into();
438451

@@ -447,7 +460,11 @@ impl Spec {
447460
case: *case,
448461
force_decimal: *force_decimal,
449462
positive_sign: *positive_sign,
450-
alignment: *alignment,
463+
alignment: if neg_width {
464+
NumberAlignment::Left
465+
} else {
466+
*alignment
467+
},
451468
}
452469
.fmt(writer, &f)
453470
.map_err(FormatError::IoError)
@@ -456,18 +473,7 @@ impl Spec {
456473
}
457474
}
458475

459-
fn resolve_asterisk<'a>(
460-
option: Option<CanAsterisk<usize>>,
461-
mut args: impl ArgumentIter<'a>,
462-
) -> Option<usize> {
463-
match option {
464-
None => None,
465-
Some(CanAsterisk::Asterisk) => Some(usize::try_from(args.get_u64()).ok().unwrap_or(0)),
466-
Some(CanAsterisk::Fixed(w)) => Some(w),
467-
}
468-
}
469-
470-
fn resolve_asterisk_maybe_negative<'a>(
476+
fn resolve_asterisk_width<'a>(
471477
option: Option<CanAsterisk<usize>>,
472478
mut args: impl ArgumentIter<'a>,
473479
) -> Option<(usize, bool)> {
@@ -485,6 +491,17 @@ fn resolve_asterisk_maybe_negative<'a>(
485491
}
486492
}
487493

494+
fn resolve_asterisk_precision<'a>(
495+
option: Option<CanAsterisk<usize>>,
496+
mut args: impl ArgumentIter<'a>,
497+
) -> Option<usize> {
498+
match option {
499+
None => None,
500+
Some(CanAsterisk::Asterisk) => usize::try_from(args.get_i64_non_negative()).ok(),
501+
Some(CanAsterisk::Fixed(w)) => Some(w),
502+
}
503+
}
504+
488505
fn write_padded(
489506
mut writer: impl Write,
490507
text: &[u8],

tests/by-util/test_printf.rs

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,19 @@ fn escaped_unicode_eight_digit() {
8282
.stdout_only("ĥ");
8383
}
8484

85+
#[test]
86+
fn escaped_unicode_null_byte() {
87+
new_ucmd!()
88+
.args(&["\\0001_"])
89+
.succeeds()
90+
.stdout_is_bytes([0u8, b'1', b'_']);
91+
92+
new_ucmd!()
93+
.args(&["%b", "\\0001_"])
94+
.succeeds()
95+
.stdout_is_bytes([1u8, b'_']);
96+
}
97+
8598
#[test]
8699
fn escaped_percent_sign() {
87100
new_ucmd!()
@@ -260,6 +273,16 @@ fn sub_num_int_char_const_in() {
260273
.args(&["emoji is %i", "'🙃"])
261274
.succeeds()
262275
.stdout_only("emoji is 128579");
276+
277+
new_ucmd!()
278+
.args(&["ninety seven is %i", "\"a"])
279+
.succeeds()
280+
.stdout_only("ninety seven is 97");
281+
282+
new_ucmd!()
283+
.args(&["emoji is %i", "\"🙃"])
284+
.succeeds()
285+
.stdout_only("emoji is 128579");
263286
}
264287

265288
#[test]
@@ -546,6 +569,68 @@ fn sub_any_asterisk_negative_first_param() {
546569
.stdout_only("a(x )b"); // Would be 'a( x)b' if -5 was 5
547570
}
548571

572+
#[test]
573+
fn sub_any_asterisk_first_param_with_integer() {
574+
new_ucmd!()
575+
.args(&["|%*d|", "3", "0"])
576+
.succeeds()
577+
.stdout_only("| 0|");
578+
579+
new_ucmd!()
580+
.args(&["|%*d|", "1", "0"])
581+
.succeeds()
582+
.stdout_only("|0|");
583+
584+
new_ucmd!()
585+
.args(&["|%*d|", "0", "0"])
586+
.succeeds()
587+
.stdout_only("|0|");
588+
589+
new_ucmd!()
590+
.args(&["|%*d|", "-1", "0"])
591+
.succeeds()
592+
.stdout_only("|0|");
593+
594+
new_ucmd!()
595+
.args(&["|%*d|", "-2", "0"])
596+
.succeeds()
597+
.stdout_only("|0 |");
598+
}
599+
600+
#[test]
601+
fn sub_any_asterisk_second_param_with_integer() {
602+
new_ucmd!()
603+
.args(&["|%.*d|", "3", "10"])
604+
.succeeds()
605+
.stdout_only("|010|");
606+
607+
new_ucmd!()
608+
.args(&["|%*.d|", "1", "10"])
609+
.succeeds()
610+
.stdout_only("|10|");
611+
612+
new_ucmd!()
613+
.args(&["|%.*d|", "0", "10"])
614+
.succeeds()
615+
.stdout_only("|10|");
616+
617+
new_ucmd!()
618+
.args(&["|%.*d|", "-1", "10"])
619+
.succeeds()
620+
.stdout_only("|10|");
621+
622+
new_ucmd!()
623+
.args(&["|%.*d|", "-2", "10"])
624+
.succeeds()
625+
.stdout_only("|10|");
626+
627+
new_ucmd!()
628+
// Force underflow
629+
.args(&["|%.*d|", &format!("-{}", u128::MAX), "10"])
630+
.succeeds()
631+
.stdout_only("|10|");
632+
}
633+
549634
#[test]
550635
fn sub_any_specifiers_no_params() {
551636
new_ucmd!()

0 commit comments

Comments
 (0)