Skip to content

intern: begin extracting trival identifiers from format_string strings #12343

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 100 additions & 0 deletions crates/hir-def/src/macro_expansion_tests/builtin_fn_macro.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,106 @@ fn main() {
);
}

#[test]
fn test_format_args_expand_with_captured_variables() {
check(
r#"
#[rustc_builtin_macro]
macro_rules! format_args {
($fmt:expr) => ({ /* compiler built-in */ });
($fmt:expr, $($args:tt)*) => ({ /* compiler built-in */ })
}

fn main() {
let a = "foo";
format_args!("{a}");
}
"#,
expect![[r##"
#[rustc_builtin_macro]
macro_rules! format_args {
($fmt:expr) => ({ /* compiler built-in */ });
($fmt:expr, $($args:tt)*) => ({ /* compiler built-in */ })
}

fn main() {
let a = "foo";
unsafe {
std::fmt::Arguments::new_v1(&[], &[std::fmt::ArgumentV1::new(&(a), std::fmt::Display::fmt), ])
};
}
"##]],
);
}

#[test]
fn test_format_args_expand_with_multiple_captured_variables() {
check(
r#"
#[rustc_builtin_macro]
macro_rules! format_args {
($fmt:expr) => ({ /* compiler built-in */ });
($fmt:expr, $($args:tt)*) => ({ /* compiler built-in */ })
}

fn main() {
let a = "foo";
let bar = "bar";
format_args!("{a} {bar:?}");
}
"#,
expect![[r##"
#[rustc_builtin_macro]
macro_rules! format_args {
($fmt:expr) => ({ /* compiler built-in */ });
($fmt:expr, $($args:tt)*) => ({ /* compiler built-in */ })
}

fn main() {
let a = "foo";
let bar = "bar";
unsafe {
std::fmt::Arguments::new_v1(&[], &[std::fmt::ArgumentV1::new(&(a), std::fmt::Display::fmt), std::fmt::ArgumentV1::new(&(bar), std::fmt::Display::fmt), ])
};
}
"##]],
);
}

#[test]
fn test_format_args_expand_with_mixed_variables() {
check(
r#"
#[rustc_builtin_macro]
macro_rules! format_args {
($fmt:expr) => ({ /* compiler built-in */ });
($fmt:expr, $($args:tt)*) => ({ /* compiler built-in */ })
}

fn main() {
let a = "foo";
let bar = "bar";
format_args!("{a} {:04} {bar:?}", 42);
}
"#,
expect![[r##"
#[rustc_builtin_macro]
macro_rules! format_args {
($fmt:expr) => ({ /* compiler built-in */ });
($fmt:expr, $($args:tt)*) => ({ /* compiler built-in */ })
}

fn main() {
let a = "foo";
let bar = "bar";
unsafe {
std::fmt::Arguments::new_v1(&[], &[std::fmt::ArgumentV1::new(&(a), std::fmt::Display::fmt), std::fmt::ArgumentV1::new(&(42), std::fmt::Display::fmt), std::fmt::ArgumentV1::new(&(bar), std::fmt::Display::fmt), ])
};
}
"##]],
);
}

#[test]
fn test_format_args_expand_with_comma_exprs() {
check(
Expand Down
227 changes: 225 additions & 2 deletions crates/hir-expand/src/builtin_fn_macro.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use syntax::{
ast::{self, AstToken},
SmolStr,
};
use tt::{Ident, Leaf, Literal, TokenTree};

use crate::{db::AstDatabase, name, quote, ExpandError, ExpandResult, MacroCallId, MacroCallLoc};

Expand Down Expand Up @@ -256,8 +257,31 @@ fn format_args_expand(
arg.token_trees.drain(..2);
}
}
let _format_string = args.remove(0);
let arg_tts = args.into_iter().flat_map(|arg| {

let format_string = args.remove(0);

let captured_args = if let Some(tt::TokenTree::Leaf(tt::Leaf::Literal(literal))) =
format_string.token_trees.get(0)
{
{
lex_format_specifiers(&literal.text.to_string())
.iter()
.map(|text| {
tt::Subtree {
delimiter: None,
token_trees: vec![TokenTree::Leaf(Leaf::Ident(Ident {
text: "".into(), // FIXME
id: literal.id, // FIXME: No idea what the adequate value is here?
}))],
}
})
.collect()
}
} else {
Vec::new()
};

let arg_tts = captured_args.into_iter().chain(args.into_iter()).flat_map(|arg| {
quote! { std::fmt::ArgumentV1::new(&(#arg), std::fmt::Display::fmt), }
}.token_trees);
let expanded = quote! {
Expand All @@ -272,6 +296,196 @@ fn format_args_expand(
ExpandResult::ok(expanded)
}

// Items pulled out of the format string
pub enum Item {
Inline(String),
Named(String),
Location(usize),
}

pub fn lex_format_specifiers(string: &String) -> Vec<Item> {
let mut identifiers = Vec::new();
let mut chars = string.chars().peekable();

while let Some(first_char) = chars.next() {
if let '{' = first_char {
// Format specifier, see syntax at https://doc.rust-lang.org/std/fmt/index.html#syntax
if let Some('{') = chars.peek() {
// Escaped format specifier, `{{`
chars.next();
continue;
}

// check for integer/identifier
let int_char = chars.peek().copied().unwrap_or_default();
match int_char {
// integer
'0'..='9' => {}
// identifier
c if c == '_' || c.is_alphabetic() => read_identifier(&mut chars, &mut identifiers),
_ => {}
}

if let Some(':') = chars.peek() {
chars.next();

// check for fill/align
let mut cloned = chars.clone().take(2);
let first = cloned.next().unwrap_or_default();
let second = cloned.next().unwrap_or_default();
match second {
'<' | '^' | '>' => {
// alignment specifier, first char specifies fillment
chars.next();
chars.next();
}
_ => {
if let '<' | '^' | '>' = first {
chars.next();
}
}
}

// check for sign
match chars.peek().copied().unwrap_or_default() {
'+' | '-' => {
chars.next();
}
_ => {}
}

// check for `#`
if let Some('#') = chars.peek() {
chars.next();
}

// check for `0`
let mut cloned = chars.clone().take(2);
let first = cloned.next();
let second = cloned.next();

if first == Some('0') && second != Some('$') {
chars.next();
}

// width
match chars.peek().copied().unwrap_or_default() {
'0'..='9' => {
read_integer(&mut chars);
if let Some('$') = chars.peek() {
chars.next();
}
}
c if c == '_' || c.is_alphabetic() => {
read_identifier(&mut chars, &mut identifiers);

if chars.peek() == Some(&'?') {
chars.next();
}

// can be either width (indicated by dollar sign, or type in which case
// the next sign has to be `}`)
let next = chars.peek();

match next {
Some('$') => chars.next(),
Some('}') => {
chars.next();
continue;
}
_ => continue,
};
}
_ => {}
}

// precision
if let Some('.') = chars.peek() {
chars.next();

match chars.peek().copied().unwrap_or_default() {
'*' => {
chars.next();
}
'0'..='9' => {
read_integer(&mut chars);
if let Some('$') = chars.peek() {
chars.next();
}
}
c if c == '_' || c.is_alphabetic() => {
read_identifier(&mut chars, &mut identifiers);
if chars.peek() != Some(&'$') {
continue;
}
chars.next();
}
_ => {
continue;
}
}
}

// type
match chars.peek().copied().unwrap_or_default() {
'?' => {
chars.next();
}
c if c == '_' || c.is_alphabetic() => {
read_identifier(&mut chars, &mut identifiers);

if chars.peek() == Some(&'?') {
chars.next();
}
}
_ => {}
}
}

if let Some('}') = chars.peek() {
chars.next();
}
continue;
};
}

identifiers
}

fn read_integer<I>(chars: &mut std::iter::Peekable<I>)
where
I: Iterator<Item = char>,
{
let c = chars.next().unwrap();
assert!(c.is_ascii_digit());
while let Some(&next_char) = chars.peek() {
if next_char.is_ascii_digit() {
chars.next();
} else {
break;
}
}
}

fn read_identifier<I>(chars: &mut std::iter::Peekable<I>, identifiers: &mut Vec<Item>)
where
I: Iterator<Item = char>,
{
let c = chars.next().unwrap();
assert!(c.is_alphabetic() || c == '_');
let mut buffer = String::new();
buffer.push(c);
while let Some(&next_char) = chars.peek() {
if next_char == '_' || next_char.is_ascii_digit() || next_char.is_alphabetic() {
buffer.push(next_char);
chars.next();
} else {
break;
}
}
identifiers.push(Item::Inline(buffer));
}

fn asm_expand(
_db: &dyn AstDatabase,
_id: MacroCallId,
Expand Down Expand Up @@ -673,3 +887,12 @@ fn option_env_expand(

ExpandResult::ok(ExpandedEager::new(expanded))
}

#[cfg(test)]
mod tests {

#[test]
fn it() {
assert!(false)
}
}