Skip to content

Commit

Permalink
Auto merge of #103828 - cassaundra:fix-format-args-span2, r=cjgillot
Browse files Browse the repository at this point in the history
Fix incorrect span when using byte-escaped rbrace

Fix #103826, a format args span issue introduced in #102214.

The current solution for tracking skipped characters made it so that certain situations were ambiguous enough that the original span couldn't be worked out later. This PR improves on the original solution by keeping track of groups of skipped characters using a map, and fixes the previous bug. See an example of this ambiguity in the [previous PR's discussion](#102214 (comment)).
  • Loading branch information
bors committed Dec 26, 2022
2 parents f206533 + 35c7939 commit 731e0bf
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 48 deletions.
127 changes: 79 additions & 48 deletions compiler/rustc_parse_format/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,24 @@ impl InnerSpan {
}
}

/// The location and before/after width of a character whose width has changed from its source code
/// representation
#[derive(Copy, Clone, PartialEq, Eq)]
pub struct InnerWidthMapping {
/// Index of the character in the source
pub position: usize,
/// The inner width in characters
pub before: usize,
/// The transformed width in characters
pub after: usize,
}

impl InnerWidthMapping {
pub fn new(position: usize, before: usize, after: usize) -> InnerWidthMapping {
InnerWidthMapping { position, before, after }
}
}

/// The type of format string that we are parsing.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum ParseMode {
Expand Down Expand Up @@ -200,8 +218,8 @@ pub struct Parser<'a> {
style: Option<usize>,
/// Start and end byte offset of every successfully parsed argument
pub arg_places: Vec<InnerSpan>,
/// Characters that need to be shifted
skips: Vec<usize>,
/// Characters whose length has been changed from their in-code representation
width_map: Vec<InnerWidthMapping>,
/// Span of the last opening brace seen, used for error reporting
last_opening_brace: Option<InnerSpan>,
/// Whether the source string is comes from `println!` as opposed to `format!` or `print!`
Expand All @@ -224,7 +242,7 @@ impl<'a> Iterator for Parser<'a> {
'{' => {
let curr_last_brace = self.last_opening_brace;
let byte_pos = self.to_span_index(pos);
let lbrace_end = self.to_span_index(pos + 1);
let lbrace_end = InnerOffset(byte_pos.0 + self.to_span_width(pos));
self.last_opening_brace = Some(byte_pos.to(lbrace_end));
self.cur.next();
if self.consume('{') {
Expand All @@ -233,12 +251,15 @@ impl<'a> Iterator for Parser<'a> {
Some(String(self.string(pos + 1)))
} else {
let arg = self.argument(lbrace_end);
if let Some(rbrace_byte_idx) = self.must_consume('}') {
let lbrace_inner_offset = self.to_span_index(pos);
let rbrace_inner_offset = self.to_span_index(rbrace_byte_idx);
if let Some(rbrace_pos) = self.must_consume('}') {
if self.is_literal {
let lbrace_byte_pos = self.to_span_index(pos);
let rbrace_byte_pos = self.to_span_index(rbrace_pos);

let width = self.to_span_width(rbrace_pos);

self.arg_places.push(
lbrace_inner_offset.to(InnerOffset(rbrace_inner_offset.0 + 1)),
lbrace_byte_pos.to(InnerOffset(rbrace_byte_pos.0 + width)),
);
}
} else {
Expand Down Expand Up @@ -285,7 +306,7 @@ impl<'a> Parser<'a> {
append_newline: bool,
mode: ParseMode,
) -> Parser<'a> {
let (skips, is_literal) = find_skips_from_snippet(snippet, style);
let (width_map, is_literal) = find_width_map_from_snippet(snippet, style);
Parser {
mode,
input: s,
Expand All @@ -294,7 +315,7 @@ impl<'a> Parser<'a> {
curarg: 0,
style,
arg_places: vec![],
skips,
width_map,
last_opening_brace: None,
append_newline,
is_literal,
Expand Down Expand Up @@ -367,21 +388,34 @@ impl<'a> Parser<'a> {
None
}

fn remap_pos(&self, mut pos: usize) -> InnerOffset {
for width in &self.width_map {
if pos > width.position {
pos += width.before - width.after;
} else if pos == width.position && width.after == 0 {
pos += width.before;
} else {
break;
}
}

InnerOffset(pos)
}

fn to_span_index(&self, pos: usize) -> InnerOffset {
let mut pos = pos;
// This handles the raw string case, the raw argument is the number of #
// in r###"..."### (we need to add one because of the `r`).
let raw = self.style.map_or(0, |raw| raw + 1);
for skip in &self.skips {
if pos > *skip {
pos += 1;
} else if pos == *skip && raw == 0 {
pos += 1;
} else {
break;
}
let pos = self.remap_pos(pos);
InnerOffset(raw + pos.0 + 1)
}

fn to_span_width(&self, pos: usize) -> usize {
let pos = self.remap_pos(pos);
match self.width_map.iter().find(|w| w.position == pos.0) {
Some(w) => w.before,
None => 1,
}
InnerOffset(raw + pos + 1)
}

fn span(&self, start_pos: usize, end_pos: usize) -> InnerSpan {
Expand Down Expand Up @@ -809,10 +843,10 @@ impl<'a> Parser<'a> {
/// Finds the indices of all characters that have been processed and differ between the actual
/// written code (code snippet) and the `InternedString` that gets processed in the `Parser`
/// in order to properly synthesise the intra-string `Span`s for error diagnostics.
fn find_skips_from_snippet(
fn find_width_map_from_snippet(
snippet: Option<string::String>,
str_style: Option<usize>,
) -> (Vec<usize>, bool) {
) -> (Vec<InnerWidthMapping>, bool) {
let snippet = match snippet {
Some(ref s) if s.starts_with('"') || s.starts_with("r\"") || s.starts_with("r#") => s,
_ => return (vec![], false),
Expand All @@ -825,43 +859,39 @@ fn find_skips_from_snippet(
let snippet = &snippet[1..snippet.len() - 1];

let mut s = snippet.char_indices();
let mut skips = vec![];
let mut width_mappings = vec![];
while let Some((pos, c)) = s.next() {
match (c, s.clone().next()) {
// skip whitespace and empty lines ending in '\\'
('\\', Some((next_pos, '\n'))) => {
skips.push(pos);
skips.push(next_pos);
('\\', Some((_, '\n'))) => {
let _ = s.next();
let mut width = 2;

while let Some((pos, c)) = s.clone().next() {
while let Some((_, c)) = s.clone().next() {
if matches!(c, ' ' | '\n' | '\t') {
skips.push(pos);
width += 1;
let _ = s.next();
} else {
break;
}
}

width_mappings.push(InnerWidthMapping::new(pos, width, 0));
}
('\\', Some((next_pos, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => {
skips.push(next_pos);
('\\', Some((_, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => {
width_mappings.push(InnerWidthMapping::new(pos, 2, 1));
let _ = s.next();
}
('\\', Some((_, 'x'))) => {
for _ in 0..3 {
// consume `\xAB` literal
if let Some((pos, _)) = s.next() {
skips.push(pos);
} else {
break;
}
}
// consume `\xAB` literal
s.nth(2);
width_mappings.push(InnerWidthMapping::new(pos, 4, 1));
}
('\\', Some((_, 'u'))) => {
if let Some((pos, _)) = s.next() {
skips.push(pos);
}
if let Some((next_pos, next_c)) = s.next() {
let mut width = 2;
let _ = s.next();

if let Some((_, next_c)) = s.next() {
if next_c == '{' {
// consume up to 6 hexanumeric chars
let digits_len =
Expand All @@ -881,31 +911,32 @@ fn find_skips_from_snippet(
let required_skips = digits_len.saturating_sub(len_utf8.saturating_sub(1));

// skip '{' and '}' also
for pos in (next_pos..).take(required_skips + 2) {
skips.push(pos)
}
width += required_skips + 2;

s.nth(digits_len);
} else if next_c.is_digit(16) {
skips.push(next_pos);
width += 1;

// We suggest adding `{` and `}` when appropriate, accept it here as if
// it were correct
let mut i = 0; // consume up to 6 hexanumeric chars
while let (Some((next_pos, c)), _) = (s.next(), i < 6) {
while let (Some((_, c)), _) = (s.next(), i < 6) {
if c.is_digit(16) {
skips.push(next_pos);
width += 1;
} else {
break;
}
i += 1;
}
}
}

width_mappings.push(InnerWidthMapping::new(pos, width, 1));
}
_ => {}
}
}
(skips, true)
(width_mappings, true)
}

// Assert a reasonable size for `Piece`
Expand Down
8 changes: 8 additions & 0 deletions src/test/ui/fmt/issue-103826.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
fn main() {
format!("{\x7D");
//~^ ERROR 1 positional argument in format string, but no arguments were given
format!("\x7B\x7D");
//~^ ERROR 1 positional argument in format string, but no arguments were given
format!("{\x7D {\x7D");
//~^ ERROR 2 positional arguments in format string, but no arguments were given
}
20 changes: 20 additions & 0 deletions src/test/ui/fmt/issue-103826.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
error: 1 positional argument in format string, but no arguments were given
--> $DIR/issue-103826.rs:2:14
|
LL | format!("{\x7D");
| ^^^^^

error: 1 positional argument in format string, but no arguments were given
--> $DIR/issue-103826.rs:4:14
|
LL | format!("\x7B\x7D");
| ^^^^^^^^

error: 2 positional arguments in format string, but no arguments were given
--> $DIR/issue-103826.rs:6:14
|
LL | format!("{\x7D {\x7D");
| ^^^^^ ^^^^^

error: aborting due to 3 previous errors

0 comments on commit 731e0bf

Please sign in to comment.