Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix incorrect span when using byte-escaped rbrace #103828

Merged
merged 1 commit into from
Dec 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 79 additions & 48 deletions compiler/rustc_parse_format/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,24 @@ impl InnerSpan {
}
}

/// The location and before/after width of a character whose width has changed from its source code
/// representation
#[derive(Copy, Clone, PartialEq, Eq)]
pub struct InnerWidthMapping {
/// Index of the character in the source
pub position: usize,
/// The inner width in characters
pub before: usize,
/// The transformed width in characters
pub after: usize,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you comment the individual fields?
What are they: positions? Starting at which point of reference?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added some comments.

For what it's worth, I think this could also be refactored to instead have a InnerSpan and a usize, which might make some of the intent more clear.

}

impl InnerWidthMapping {
pub fn new(position: usize, before: usize, after: usize) -> InnerWidthMapping {
InnerWidthMapping { position, before, after }
}
}

/// The type of format string that we are parsing.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum ParseMode {
Expand Down Expand Up @@ -200,8 +218,8 @@ pub struct Parser<'a> {
style: Option<usize>,
/// Start and end byte offset of every successfully parsed argument
pub arg_places: Vec<InnerSpan>,
/// Characters that need to be shifted
skips: Vec<usize>,
/// Characters whose length has been changed from their in-code representation
width_map: Vec<InnerWidthMapping>,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you elaborate the comment? Notably: what is the index of the Vec?

/// Span of the last opening brace seen, used for error reporting
last_opening_brace: Option<InnerSpan>,
/// Whether the source string is comes from `println!` as opposed to `format!` or `print!`
Expand All @@ -224,7 +242,7 @@ impl<'a> Iterator for Parser<'a> {
'{' => {
let curr_last_brace = self.last_opening_brace;
let byte_pos = self.to_span_index(pos);
let lbrace_end = self.to_span_index(pos + 1);
let lbrace_end = InnerOffset(byte_pos.0 + self.to_span_width(pos));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we wrap all creations of InnerOffset?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, not exactly sure what you mean.

Maybe related: a function that gives you both position and width could clean this up

self.last_opening_brace = Some(byte_pos.to(lbrace_end));
self.cur.next();
if self.consume('{') {
Expand All @@ -233,12 +251,15 @@ impl<'a> Iterator for Parser<'a> {
Some(String(self.string(pos + 1)))
} else {
let arg = self.argument(lbrace_end);
if let Some(rbrace_byte_idx) = self.must_consume('}') {
let lbrace_inner_offset = self.to_span_index(pos);
let rbrace_inner_offset = self.to_span_index(rbrace_byte_idx);
if let Some(rbrace_pos) = self.must_consume('}') {
if self.is_literal {
let lbrace_byte_pos = self.to_span_index(pos);
let rbrace_byte_pos = self.to_span_index(rbrace_pos);

let width = self.to_span_width(rbrace_pos);

self.arg_places.push(
lbrace_inner_offset.to(InnerOffset(rbrace_inner_offset.0 + 1)),
lbrace_byte_pos.to(InnerOffset(rbrace_byte_pos.0 + width)),
);
}
} else {
Expand Down Expand Up @@ -285,7 +306,7 @@ impl<'a> Parser<'a> {
append_newline: bool,
mode: ParseMode,
) -> Parser<'a> {
let (skips, is_literal) = find_skips_from_snippet(snippet, style);
let (width_map, is_literal) = find_width_map_from_snippet(snippet, style);
Parser {
mode,
input: s,
Expand All @@ -294,7 +315,7 @@ impl<'a> Parser<'a> {
curarg: 0,
style,
arg_places: vec![],
skips,
width_map,
last_opening_brace: None,
append_newline,
is_literal,
Expand Down Expand Up @@ -367,21 +388,34 @@ impl<'a> Parser<'a> {
None
}

fn remap_pos(&self, mut pos: usize) -> InnerOffset {
for width in &self.width_map {
if pos > width.position {
pos += width.before - width.after;
} else if pos == width.position && width.after == 0 {
pos += width.before;
} else {
break;
}
}

InnerOffset(pos)
}

fn to_span_index(&self, pos: usize) -> InnerOffset {
let mut pos = pos;
// This handles the raw string case, the raw argument is the number of #
// in r###"..."### (we need to add one because of the `r`).
let raw = self.style.map_or(0, |raw| raw + 1);
for skip in &self.skips {
if pos > *skip {
pos += 1;
} else if pos == *skip && raw == 0 {
pos += 1;
} else {
break;
}
let pos = self.remap_pos(pos);
InnerOffset(raw + pos.0 + 1)
}

fn to_span_width(&self, pos: usize) -> usize {
let pos = self.remap_pos(pos);
match self.width_map.iter().find(|w| w.position == pos.0) {
Some(w) => w.before,
None => 1,
}
InnerOffset(raw + pos + 1)
}

fn span(&self, start_pos: usize, end_pos: usize) -> InnerSpan {
Expand Down Expand Up @@ -809,10 +843,10 @@ impl<'a> Parser<'a> {
/// Finds the indices of all characters that have been processed and differ between the actual
/// written code (code snippet) and the `InternedString` that gets processed in the `Parser`
/// in order to properly synthesise the intra-string `Span`s for error diagnostics.
fn find_skips_from_snippet(
fn find_width_map_from_snippet(
snippet: Option<string::String>,
str_style: Option<usize>,
) -> (Vec<usize>, bool) {
) -> (Vec<InnerWidthMapping>, bool) {
let snippet = match snippet {
Some(ref s) if s.starts_with('"') || s.starts_with("r\"") || s.starts_with("r#") => s,
_ => return (vec![], false),
Expand All @@ -825,43 +859,39 @@ fn find_skips_from_snippet(
let snippet = &snippet[1..snippet.len() - 1];

let mut s = snippet.char_indices();
let mut skips = vec![];
let mut width_mappings = vec![];
while let Some((pos, c)) = s.next() {
match (c, s.clone().next()) {
// skip whitespace and empty lines ending in '\\'
('\\', Some((next_pos, '\n'))) => {
skips.push(pos);
skips.push(next_pos);
('\\', Some((_, '\n'))) => {
let _ = s.next();
let mut width = 2;

while let Some((pos, c)) = s.clone().next() {
while let Some((_, c)) = s.clone().next() {
if matches!(c, ' ' | '\n' | '\t') {
skips.push(pos);
width += 1;
let _ = s.next();
} else {
break;
}
}

width_mappings.push(InnerWidthMapping::new(pos, width, 0));
}
('\\', Some((next_pos, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => {
skips.push(next_pos);
('\\', Some((_, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => {
width_mappings.push(InnerWidthMapping::new(pos, 2, 1));
let _ = s.next();
}
('\\', Some((_, 'x'))) => {
for _ in 0..3 {
// consume `\xAB` literal
if let Some((pos, _)) = s.next() {
skips.push(pos);
} else {
break;
}
}
// consume `\xAB` literal
s.nth(2);
width_mappings.push(InnerWidthMapping::new(pos, 4, 1));
}
('\\', Some((_, 'u'))) => {
if let Some((pos, _)) = s.next() {
skips.push(pos);
}
if let Some((next_pos, next_c)) = s.next() {
let mut width = 2;
let _ = s.next();

if let Some((_, next_c)) = s.next() {
if next_c == '{' {
// consume up to 6 hexanumeric chars
let digits_len =
Expand All @@ -881,31 +911,32 @@ fn find_skips_from_snippet(
let required_skips = digits_len.saturating_sub(len_utf8.saturating_sub(1));

// skip '{' and '}' also
for pos in (next_pos..).take(required_skips + 2) {
skips.push(pos)
}
width += required_skips + 2;

s.nth(digits_len);
} else if next_c.is_digit(16) {
skips.push(next_pos);
width += 1;

// We suggest adding `{` and `}` when appropriate, accept it here as if
// it were correct
let mut i = 0; // consume up to 6 hexanumeric chars
while let (Some((next_pos, c)), _) = (s.next(), i < 6) {
while let (Some((_, c)), _) = (s.next(), i < 6) {
if c.is_digit(16) {
skips.push(next_pos);
width += 1;
} else {
break;
}
i += 1;
}
}
}

width_mappings.push(InnerWidthMapping::new(pos, width, 1));
}
_ => {}
}
}
(skips, true)
(width_mappings, true)
}

#[cfg(test)]
Expand Down
8 changes: 8 additions & 0 deletions src/test/ui/fmt/issue-103826.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
fn main() {
format!("{\x7D");
//~^ ERROR 1 positional argument in format string, but no arguments were given
format!("\x7B\x7D");
//~^ ERROR 1 positional argument in format string, but no arguments were given
format!("{\x7D {\x7D");
//~^ ERROR 2 positional arguments in format string, but no arguments were given
}
20 changes: 20 additions & 0 deletions src/test/ui/fmt/issue-103826.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
error: 1 positional argument in format string, but no arguments were given
--> $DIR/issue-103826.rs:2:14
|
LL | format!("{\x7D");
| ^^^^^

error: 1 positional argument in format string, but no arguments were given
--> $DIR/issue-103826.rs:4:14
|
LL | format!("\x7B\x7D");
| ^^^^^^^^

error: 2 positional arguments in format string, but no arguments were given
--> $DIR/issue-103826.rs:6:14
|
LL | format!("{\x7D {\x7D");
| ^^^^^ ^^^^^

error: aborting due to 3 previous errors