-
Notifications
You must be signed in to change notification settings - Fork 13.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix incorrect span when using byte-escaped rbrace #103828
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,6 +38,24 @@ impl InnerSpan { | |
} | ||
} | ||
|
||
/// The location and before/after width of a character whose width has changed from its source code | ||
/// representation | ||
#[derive(Copy, Clone, PartialEq, Eq)] | ||
pub struct InnerWidthMapping { | ||
/// Index of the character in the source | ||
pub position: usize, | ||
/// The inner width in characters | ||
pub before: usize, | ||
/// The transformed width in characters | ||
pub after: usize, | ||
} | ||
|
||
impl InnerWidthMapping { | ||
pub fn new(position: usize, before: usize, after: usize) -> InnerWidthMapping { | ||
InnerWidthMapping { position, before, after } | ||
} | ||
} | ||
|
||
/// The type of format string that we are parsing. | ||
#[derive(Copy, Clone, Debug, Eq, PartialEq)] | ||
pub enum ParseMode { | ||
|
@@ -200,8 +218,8 @@ pub struct Parser<'a> { | |
style: Option<usize>, | ||
/// Start and end byte offset of every successfully parsed argument | ||
pub arg_places: Vec<InnerSpan>, | ||
/// Characters that need to be shifted | ||
skips: Vec<usize>, | ||
/// Characters whose length has been changed from their in-code representation | ||
width_map: Vec<InnerWidthMapping>, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you elaborate the comment? Notably: what is the index of the |
||
/// Span of the last opening brace seen, used for error reporting | ||
last_opening_brace: Option<InnerSpan>, | ||
/// Whether the source string is comes from `println!` as opposed to `format!` or `print!` | ||
|
@@ -224,7 +242,7 @@ impl<'a> Iterator for Parser<'a> { | |
'{' => { | ||
let curr_last_brace = self.last_opening_brace; | ||
let byte_pos = self.to_span_index(pos); | ||
let lbrace_end = self.to_span_index(pos + 1); | ||
let lbrace_end = InnerOffset(byte_pos.0 + self.to_span_width(pos)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we wrap all creations of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, not exactly sure what you mean. Maybe related: a function that gives you both position and width could clean this up |
||
self.last_opening_brace = Some(byte_pos.to(lbrace_end)); | ||
self.cur.next(); | ||
if self.consume('{') { | ||
|
@@ -233,12 +251,15 @@ impl<'a> Iterator for Parser<'a> { | |
Some(String(self.string(pos + 1))) | ||
} else { | ||
let arg = self.argument(lbrace_end); | ||
if let Some(rbrace_byte_idx) = self.must_consume('}') { | ||
let lbrace_inner_offset = self.to_span_index(pos); | ||
let rbrace_inner_offset = self.to_span_index(rbrace_byte_idx); | ||
if let Some(rbrace_pos) = self.must_consume('}') { | ||
if self.is_literal { | ||
let lbrace_byte_pos = self.to_span_index(pos); | ||
let rbrace_byte_pos = self.to_span_index(rbrace_pos); | ||
|
||
let width = self.to_span_width(rbrace_pos); | ||
|
||
self.arg_places.push( | ||
lbrace_inner_offset.to(InnerOffset(rbrace_inner_offset.0 + 1)), | ||
lbrace_byte_pos.to(InnerOffset(rbrace_byte_pos.0 + width)), | ||
); | ||
} | ||
} else { | ||
|
@@ -285,7 +306,7 @@ impl<'a> Parser<'a> { | |
append_newline: bool, | ||
mode: ParseMode, | ||
) -> Parser<'a> { | ||
let (skips, is_literal) = find_skips_from_snippet(snippet, style); | ||
let (width_map, is_literal) = find_width_map_from_snippet(snippet, style); | ||
Parser { | ||
mode, | ||
input: s, | ||
|
@@ -294,7 +315,7 @@ impl<'a> Parser<'a> { | |
curarg: 0, | ||
style, | ||
arg_places: vec![], | ||
skips, | ||
width_map, | ||
last_opening_brace: None, | ||
append_newline, | ||
is_literal, | ||
|
@@ -367,21 +388,34 @@ impl<'a> Parser<'a> { | |
None | ||
} | ||
|
||
fn remap_pos(&self, mut pos: usize) -> InnerOffset { | ||
for width in &self.width_map { | ||
if pos > width.position { | ||
pos += width.before - width.after; | ||
} else if pos == width.position && width.after == 0 { | ||
pos += width.before; | ||
} else { | ||
break; | ||
} | ||
} | ||
|
||
InnerOffset(pos) | ||
} | ||
|
||
fn to_span_index(&self, pos: usize) -> InnerOffset { | ||
let mut pos = pos; | ||
// This handles the raw string case, the raw argument is the number of # | ||
// in r###"..."### (we need to add one because of the `r`). | ||
let raw = self.style.map_or(0, |raw| raw + 1); | ||
for skip in &self.skips { | ||
if pos > *skip { | ||
pos += 1; | ||
} else if pos == *skip && raw == 0 { | ||
pos += 1; | ||
} else { | ||
break; | ||
} | ||
let pos = self.remap_pos(pos); | ||
InnerOffset(raw + pos.0 + 1) | ||
} | ||
|
||
fn to_span_width(&self, pos: usize) -> usize { | ||
let pos = self.remap_pos(pos); | ||
match self.width_map.iter().find(|w| w.position == pos.0) { | ||
Some(w) => w.before, | ||
None => 1, | ||
} | ||
InnerOffset(raw + pos + 1) | ||
} | ||
|
||
fn span(&self, start_pos: usize, end_pos: usize) -> InnerSpan { | ||
|
@@ -809,10 +843,10 @@ impl<'a> Parser<'a> { | |
/// Finds the indices of all characters that have been processed and differ between the actual | ||
/// written code (code snippet) and the `InternedString` that gets processed in the `Parser` | ||
/// in order to properly synthesise the intra-string `Span`s for error diagnostics. | ||
fn find_skips_from_snippet( | ||
fn find_width_map_from_snippet( | ||
snippet: Option<string::String>, | ||
str_style: Option<usize>, | ||
) -> (Vec<usize>, bool) { | ||
) -> (Vec<InnerWidthMapping>, bool) { | ||
let snippet = match snippet { | ||
Some(ref s) if s.starts_with('"') || s.starts_with("r\"") || s.starts_with("r#") => s, | ||
_ => return (vec![], false), | ||
|
@@ -825,43 +859,39 @@ fn find_skips_from_snippet( | |
let snippet = &snippet[1..snippet.len() - 1]; | ||
|
||
let mut s = snippet.char_indices(); | ||
let mut skips = vec![]; | ||
let mut width_mappings = vec![]; | ||
while let Some((pos, c)) = s.next() { | ||
match (c, s.clone().next()) { | ||
// skip whitespace and empty lines ending in '\\' | ||
('\\', Some((next_pos, '\n'))) => { | ||
skips.push(pos); | ||
skips.push(next_pos); | ||
('\\', Some((_, '\n'))) => { | ||
let _ = s.next(); | ||
let mut width = 2; | ||
|
||
while let Some((pos, c)) = s.clone().next() { | ||
while let Some((_, c)) = s.clone().next() { | ||
if matches!(c, ' ' | '\n' | '\t') { | ||
skips.push(pos); | ||
width += 1; | ||
let _ = s.next(); | ||
} else { | ||
break; | ||
} | ||
} | ||
|
||
width_mappings.push(InnerWidthMapping::new(pos, width, 0)); | ||
} | ||
('\\', Some((next_pos, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => { | ||
skips.push(next_pos); | ||
('\\', Some((_, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => { | ||
width_mappings.push(InnerWidthMapping::new(pos, 2, 1)); | ||
let _ = s.next(); | ||
} | ||
('\\', Some((_, 'x'))) => { | ||
for _ in 0..3 { | ||
// consume `\xAB` literal | ||
if let Some((pos, _)) = s.next() { | ||
skips.push(pos); | ||
} else { | ||
break; | ||
} | ||
} | ||
// consume `\xAB` literal | ||
s.nth(2); | ||
width_mappings.push(InnerWidthMapping::new(pos, 4, 1)); | ||
} | ||
('\\', Some((_, 'u'))) => { | ||
if let Some((pos, _)) = s.next() { | ||
skips.push(pos); | ||
} | ||
if let Some((next_pos, next_c)) = s.next() { | ||
let mut width = 2; | ||
let _ = s.next(); | ||
|
||
if let Some((_, next_c)) = s.next() { | ||
if next_c == '{' { | ||
// consume up to 6 hexanumeric chars | ||
let digits_len = | ||
|
@@ -881,31 +911,32 @@ fn find_skips_from_snippet( | |
let required_skips = digits_len.saturating_sub(len_utf8.saturating_sub(1)); | ||
|
||
// skip '{' and '}' also | ||
for pos in (next_pos..).take(required_skips + 2) { | ||
skips.push(pos) | ||
} | ||
width += required_skips + 2; | ||
|
||
s.nth(digits_len); | ||
} else if next_c.is_digit(16) { | ||
skips.push(next_pos); | ||
width += 1; | ||
|
||
// We suggest adding `{` and `}` when appropriate, accept it here as if | ||
// it were correct | ||
let mut i = 0; // consume up to 6 hexanumeric chars | ||
while let (Some((next_pos, c)), _) = (s.next(), i < 6) { | ||
while let (Some((_, c)), _) = (s.next(), i < 6) { | ||
if c.is_digit(16) { | ||
skips.push(next_pos); | ||
width += 1; | ||
} else { | ||
break; | ||
} | ||
i += 1; | ||
} | ||
} | ||
} | ||
|
||
width_mappings.push(InnerWidthMapping::new(pos, width, 1)); | ||
} | ||
_ => {} | ||
} | ||
} | ||
(skips, true) | ||
(width_mappings, true) | ||
} | ||
|
||
#[cfg(test)] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
fn main() { | ||
format!("{\x7D"); | ||
//~^ ERROR 1 positional argument in format string, but no arguments were given | ||
format!("\x7B\x7D"); | ||
//~^ ERROR 1 positional argument in format string, but no arguments were given | ||
format!("{\x7D {\x7D"); | ||
//~^ ERROR 2 positional arguments in format string, but no arguments were given | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
error: 1 positional argument in format string, but no arguments were given | ||
--> $DIR/issue-103826.rs:2:14 | ||
| | ||
LL | format!("{\x7D"); | ||
| ^^^^^ | ||
|
||
error: 1 positional argument in format string, but no arguments were given | ||
--> $DIR/issue-103826.rs:4:14 | ||
| | ||
LL | format!("\x7B\x7D"); | ||
| ^^^^^^^^ | ||
|
||
error: 2 positional arguments in format string, but no arguments were given | ||
--> $DIR/issue-103826.rs:6:14 | ||
| | ||
LL | format!("{\x7D {\x7D"); | ||
| ^^^^^ ^^^^^ | ||
|
||
error: aborting due to 3 previous errors | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you comment the individual fields?
What are they: positions? Starting at which point of reference?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added some comments.
For what it's worth, I think this could also be refactored to instead have a
InnerSpan
and ausize
, which might make some of the intent more clear.