Skip to content

Commit

Permalink
Avoid parsing other parts of a format specification if replacements a…
Browse files Browse the repository at this point in the history
…re present (#6858)

Closes #6767
Replaces #6773 (this cherry-picks
some parts from there)
Alternative to the approach introduced in #6616 which added support for
placeholders in format specifications while retaining parsing of other
format specification parts.

The idea is that if there are placeholders in a format specification we
will not attempt to glean semantic meaning from the other parts of the
format specification we'll just extract all of the placeholders ignoring
other characters. The dynamic content of placeholders can drastically
change the meaning of the format specification in ways unknowable by
static analysis. This change prevents false analysis and will ensure
safety if we build other rules on top of this at the cost of missing
detection of some bad specifications.

Minor note: I've use "replacements" and "placeholders" interchangeably
but am trying to go with "placeholder" as I think it's a better term for
the static analysis concept here
  • Loading branch information
zanieb authored Aug 25, 2023
1 parent 0bac7bd commit 100904a
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 99 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
"{:s} {:y}".format("hello", "world") # [bad-format-character]

"{:*^30s}".format("centered") # OK
"{:{s}}".format("hello", s="s") # OK (nested replacement value not checked)

"{:{s:y}}".format("hello", s="s") # [bad-format-character] (nested replacement format spec checked)
"{:{s}}".format("hello", s="s") # OK (nested placeholder value not checked)
"{:{s:y}}".format("hello", s="s") # [bad-format-character] (nested placeholder format spec checked)
"{0:.{prec}g}".format(1.23, prec=15) # OK (cannot validate after nested placeholder)
"{0:.{foo}{bar}{foobar}y}".format(...) # OK (cannot validate after nested placeholders)
"{0:.{foo}x{bar}y{foobar}g}".format(...) # OK (all nested placeholders are consumed without considering in between chars)

## f-strings

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,14 @@ pub(crate) fn call(checker: &mut Checker, string: &str, range: TextRange) {
));
}
Err(_) => {}
Ok(format_spec) => {
for replacement in format_spec.replacements() {
let FormatPart::Field { format_spec, .. } = replacement else {
Ok(FormatSpec::Static(_)) => {}
Ok(FormatSpec::Dynamic(format_spec)) => {
for placeholder in format_spec.placeholders {
let FormatPart::Field { format_spec, .. } = placeholder else {
continue;
};
if let Err(FormatSpecError::InvalidFormatType) =
FormatSpec::parse(format_spec)
FormatSpec::parse(&format_spec)
{
checker.diagnostics.push(Diagnostic::new(
BadStringFormatCharacter {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,14 @@ bad_string_format_character.py:15:1: PLE1300 Unsupported format character 'y'
17 | "{:*^30s}".format("centered") # OK
|

bad_string_format_character.py:20:1: PLE1300 Unsupported format character 'y'
bad_string_format_character.py:19:1: PLE1300 Unsupported format character 'y'
|
18 | "{:{s}}".format("hello", s="s") # OK (nested replacement value not checked)
19 |
20 | "{:{s:y}}".format("hello", s="s") # [bad-format-character] (nested replacement format spec checked)
17 | "{:*^30s}".format("centered") # OK
18 | "{:{s}}".format("hello", s="s") # OK (nested placeholder value not checked)
19 | "{:{s:y}}".format("hello", s="s") # [bad-format-character] (nested placeholder format spec checked)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLE1300
21 |
22 | ## f-strings
20 | "{0:.{prec}g}".format(1.23, prec=15) # OK (cannot validate after nested placeholder)
21 | "{0:.{foo}{bar}{foobar}y}".format(...) # OK (cannot validate after nested placeholders)
|


157 changes: 71 additions & 86 deletions crates/ruff_python_literal/src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,30 +190,35 @@ impl FormatParse for FormatType {
/// "hello {name:<20}".format(name="test")
/// ```
///
/// Format specifications allow nested replacements for dynamic formatting.
/// Format specifications allow nested placeholders for dynamic formatting.
/// For example, the following statements are equivalent:
/// ```python
/// "hello {name:{fmt}}".format(name="test", fmt="<20")
/// "hello {name:{align}{width}}".format(name="test", align="<", width="20")
/// "hello {name:<20{empty}>}".format(name="test", empty="")
/// ```
///
/// Nested replacements can include additional format specifiers.
/// Nested placeholders can include additional format specifiers.
/// ```python
/// "hello {name:{fmt:*>}}".format(name="test", fmt="<20")
/// ```
///
/// However, replacements can only be singly nested (preserving our sanity).
/// However, placeholders can only be singly nested (preserving our sanity).
/// A [`FormatSpecError::PlaceholderRecursionExceeded`] will be raised while parsing in this case.
/// ```python
/// "hello {name:{fmt:{not_allowed}}}".format(name="test", fmt="<20") # Syntax error
/// ```
///
/// When replacements are present in a format specification, we will parse them and
/// store them in [`FormatSpec`] but will otherwise ignore them if they would introduce
/// an invalid format specification at runtime.
/// When placeholders are present in a format specification, parsing will return a [`DynamicFormatSpec`]
/// and avoid attempting to parse any of the clauses. Otherwise, a [`StaticFormatSpec`] will be used.
#[derive(Debug, PartialEq)]
pub struct FormatSpec {
pub enum FormatSpec {
Static(StaticFormatSpec),
Dynamic(DynamicFormatSpec),
}

#[derive(Debug, PartialEq)]
pub struct StaticFormatSpec {
// Ex) `!s` in `'{!s}'`
conversion: Option<FormatConversion>,
// Ex) `*` in `'{:*^30}'`
Expand All @@ -232,8 +237,12 @@ pub struct FormatSpec {
precision: Option<usize>,
// Ex) `f` in `'{:+f}'`
format_type: Option<FormatType>,
}

#[derive(Debug, PartialEq)]
pub struct DynamicFormatSpec {
// Ex) `x` and `y` in `'{:*{x},{y}b}'`
replacements: Vec<FormatPart>,
pub placeholders: Vec<FormatPart>,
}

#[derive(Copy, Clone, Debug, PartialEq, Default)]
Expand Down Expand Up @@ -318,43 +327,46 @@ fn parse_precision(text: &str) -> Result<(Option<usize>, &str), FormatSpecError>
})
}

/// Parses a format part within a format spec
fn parse_nested_placeholder<'a>(
parts: &mut Vec<FormatPart>,
text: &'a str,
) -> Result<&'a str, FormatSpecError> {
/// Parses a placeholder format part within a format specification
fn parse_nested_placeholder(text: &str) -> Result<Option<(FormatPart, &str)>, FormatSpecError> {
match FormatString::parse_spec(text, AllowPlaceholderNesting::No) {
// Not a nested replacement, OK
Err(FormatParseError::MissingStartBracket) => Ok(text),
// Not a nested placeholder, OK
Err(FormatParseError::MissingStartBracket) => Ok(None),
Err(err) => Err(FormatSpecError::InvalidPlaceholder(err)),
Ok((format_part, text)) => {
parts.push(format_part);
Ok(text)
Ok((format_part, text)) => Ok(Some((format_part, text))),
}
}

/// Parse all placeholders in a format specification
/// If no placeholders are present, an empty vector will be returned
fn parse_nested_placeholders(mut text: &str) -> Result<Vec<FormatPart>, FormatSpecError> {
let mut placeholders = vec![];
while let Some(bracket) = text.find('{') {
if let Some((format_part, rest)) = parse_nested_placeholder(&text[bracket..])? {
text = rest;
placeholders.push(format_part);
} else {
text = &text[bracket + 1..];
}
}
Ok(placeholders)
}

impl FormatSpec {
pub fn parse(text: &str) -> Result<Self, FormatSpecError> {
let mut replacements = vec![];
// get_integer in CPython
let text = parse_nested_placeholder(&mut replacements, text)?;
let placeholders = parse_nested_placeholders(text)?;
if !placeholders.is_empty() {
return Ok(FormatSpec::Dynamic(DynamicFormatSpec { placeholders }));
}

let (conversion, text) = FormatConversion::parse(text);
let text = parse_nested_placeholder(&mut replacements, text)?;
let (mut fill, mut align, text) = parse_fill_and_align(text);
let text = parse_nested_placeholder(&mut replacements, text)?;
let (sign, text) = FormatSign::parse(text);
let text = parse_nested_placeholder(&mut replacements, text)?;
let (alternate_form, text) = parse_alternate_form(text);
let text = parse_nested_placeholder(&mut replacements, text)?;
let (zero, text) = parse_zero(text);
let text = parse_nested_placeholder(&mut replacements, text)?;
let (width, text) = parse_number(text)?;
let text = parse_nested_placeholder(&mut replacements, text)?;
let (grouping_option, text) = FormatGrouping::parse(text);
let text = parse_nested_placeholder(&mut replacements, text)?;
let (precision, text) = parse_precision(text)?;
let text = parse_nested_placeholder(&mut replacements, text)?;

let (format_type, _text) = if text.is_empty() {
(None, text)
Expand All @@ -376,7 +388,7 @@ impl FormatSpec {
align = align.or(Some(FormatAlign::AfterSign));
}

Ok(FormatSpec {
Ok(FormatSpec::Static(StaticFormatSpec {
conversion,
fill,
align,
Expand All @@ -386,12 +398,7 @@ impl FormatSpec {
grouping_option,
precision,
format_type,
replacements,
})
}

pub fn replacements(&self) -> &[FormatPart] {
return self.replacements.as_slice();
}))
}
}

Expand Down Expand Up @@ -437,7 +444,7 @@ impl std::fmt::Display for FormatParseError {
std::write!(fmt, "unescaped start bracket in literal")
}
Self::PlaceholderRecursionExceeded => {
std::write!(fmt, "multiply nested replacement not allowed")
std::write!(fmt, "multiply nested placeholder not allowed")
}
Self::UnknownConversion => {
std::write!(fmt, "unknown conversion")
Expand Down Expand Up @@ -730,7 +737,7 @@ mod tests {

#[test]
fn test_width_only() {
let expected = Ok(FormatSpec {
let expected = Ok(FormatSpec::Static(StaticFormatSpec {
conversion: None,
fill: None,
align: None,
Expand All @@ -740,14 +747,13 @@ mod tests {
grouping_option: None,
precision: None,
format_type: None,
replacements: vec![],
});
}));
assert_eq!(FormatSpec::parse("33"), expected);
}

#[test]
fn test_fill_and_width() {
let expected = Ok(FormatSpec {
let expected = Ok(FormatSpec::Static(StaticFormatSpec {
conversion: None,
fill: Some('<'),
align: Some(FormatAlign::Right),
Expand All @@ -757,45 +763,26 @@ mod tests {
grouping_option: None,
precision: None,
format_type: None,
replacements: vec![],
});
}));
assert_eq!(FormatSpec::parse("<>33"), expected);
}

#[test]
fn test_format_part() {
let expected = Ok(FormatSpec {
conversion: None,
fill: None,
align: None,
sign: None,
alternate_form: false,
width: None,
grouping_option: None,
precision: None,
format_type: None,
replacements: vec![FormatPart::Field {
let expected = Ok(FormatSpec::Dynamic(DynamicFormatSpec {
placeholders: vec![FormatPart::Field {
field_name: "x".to_string(),
conversion_spec: None,
format_spec: String::new(),
}],
});
}));
assert_eq!(FormatSpec::parse("{x}"), expected);
}

#[test]
fn test_format_parts() {
let expected = Ok(FormatSpec {
conversion: None,
fill: None,
align: None,
sign: None,
alternate_form: false,
width: None,
grouping_option: None,
precision: None,
format_type: None,
replacements: vec![
fn test_dynamic_format_spec() {
let expected = Ok(FormatSpec::Dynamic(DynamicFormatSpec {
placeholders: vec![
FormatPart::Field {
field_name: "x".to_string(),
conversion_spec: None,
Expand All @@ -812,34 +799,25 @@ mod tests {
format_spec: String::new(),
},
],
});
}));
assert_eq!(FormatSpec::parse("{x}{y:<2}{z}"), expected);
}

#[test]
fn test_format_part_with_others() {
let expected = Ok(FormatSpec {
conversion: None,
fill: None,
align: Some(FormatAlign::Left),
sign: None,
alternate_form: false,
width: Some(20),
grouping_option: None,
precision: None,
format_type: Some(FormatType::Binary),
replacements: vec![FormatPart::Field {
fn test_dynamic_format_spec_with_others() {
let expected = Ok(FormatSpec::Dynamic(DynamicFormatSpec {
placeholders: vec![FormatPart::Field {
field_name: "x".to_string(),
conversion_spec: None,
format_spec: String::new(),
}],
});
}));
assert_eq!(FormatSpec::parse("<{x}20b"), expected);
}

#[test]
fn test_all() {
let expected = Ok(FormatSpec {
let expected = Ok(FormatSpec::Static(StaticFormatSpec {
conversion: None,
fill: Some('<'),
align: Some(FormatAlign::Right),
Expand All @@ -849,8 +827,7 @@ mod tests {
grouping_option: Some(FormatGrouping::Comma),
precision: Some(11),
format_type: Some(FormatType::Binary),
replacements: vec![],
});
}));
assert_eq!(FormatSpec::parse("<>-#23,.11b"), expected);
}

Expand All @@ -877,7 +854,7 @@ mod tests {
}

#[test]
fn test_format_parse_nested_replacement() {
fn test_format_parse_nested_placeholder() {
let expected = Ok(FormatString {
format_parts: vec![
FormatPart::Literal("abcd".to_owned()),
Expand Down Expand Up @@ -966,7 +943,15 @@ mod tests {
);
assert_eq!(
FormatSpec::parse("{}}"),
Err(FormatSpecError::InvalidFormatType)
// Note this should be an `InvalidFormatType` but we give up
// on all other parsing validation when we see a placeholder
Ok(FormatSpec::Dynamic(DynamicFormatSpec {
placeholders: vec![FormatPart::Field {
field_name: String::new(),
conversion_spec: None,
format_spec: String::new()
}]
}))
);
assert_eq!(
FormatSpec::parse("{{x}}"),
Expand Down

0 comments on commit 100904a

Please sign in to comment.