Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v0: demangle structural constants and &str. #55

Merged
merged 4 commits into from
Aug 24, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
267 changes: 262 additions & 5 deletions src/v0.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use core::convert::TryFrom;
use core::{char, fmt, iter, mem};
use core::{char, fmt, iter, mem, str};

#[allow(unused_macros)]
macro_rules! write {
Expand Down Expand Up @@ -287,6 +287,84 @@ impl<'s> HexNibbles<'s> {
}
Some(v)
}

/// Decode a UTF-8 byte sequence (with each byte using a pair of nibbles)
/// into individual `char`s, returning `None` for invalid UTF-8.
fn try_parse_str_chars(&self) -> Option<impl Iterator<Item = char> + 's> {
if self.nibbles.len() % 2 != 0 {
return None;
}

// FIXME(eddyb) use `array_chunks` instead, when that becomes stable.
let mut bytes = self
.nibbles
.as_bytes()
.chunks_exact(2)
.map(|slice| match slice {
[a, b] => [a, b],
_ => unreachable!(),
})
.map(|[&hi, &lo]| {
let half = |nibble: u8| (nibble as char).to_digit(16).unwrap() as u8;
(half(hi) << 4) | half(lo)
});

let chars = iter::from_fn(move || {
// As long as there are any bytes left, there's at least one more
// UTF-8-encoded `char` to decode (or the possibility of error).
bytes.next().map(|first_byte| -> Result<char, ()> {
// FIXME(eddyb) this `enum` and `fn` should be somewhere in `core`.
enum Utf8FirstByteError {
ContinuationByte,
TooLong,
}
fn utf8_len_from_first_byte(byte: u8) -> Result<usize, Utf8FirstByteError> {
match byte {
0x00..=0x7f => Ok(1),
0x80..=0xbf => Err(Utf8FirstByteError::ContinuationByte),
0xc0..=0xdf => Ok(2),
0xe0..=0xef => Ok(3),
0xf0..=0xf7 => Ok(4),
0xf8..=0xff => Err(Utf8FirstByteError::TooLong),
}
}

// Collect the appropriate amount of bytes (up to 4), according
// to the UTF-8 length implied by the first byte.
let utf8_len = utf8_len_from_first_byte(first_byte).map_err(|_| ())?;
let utf8 = &mut [first_byte, 0, 0, 0][..utf8_len];
for i in 1..utf8_len {
utf8[i] = bytes.next().ok_or(())?;
}

// Fully validate the UTF-8 sequence.
let s = str::from_utf8(utf8).map_err(|_| ())?;

// Since we included exactly one UTF-8 sequence, and validation
// succeeded, `str::chars` should return exactly one `char`.
let mut chars = s.chars();
match (chars.next(), chars.next()) {
(Some(c), None) => Ok(c),
_ => unreachable!(
"str::from_utf8({:?}) = {:?} was expected to have 1 char, \
but {} chars were found",
utf8,
s,
s.chars().count()
),
}
})
});

// HACK(eddyb) doing a separate validation iteration like this might be
// wasteful, but it's easier to avoid starting to print a string literal
// in the first place, than to abort it mid-string.
if chars.clone().any(|r| r.is_err()) {
None
} else {
Some(chars.map(Result::unwrap))
}
}
}

fn basic_type(tag: u8) -> Option<&'static str> {
Expand Down Expand Up @@ -815,7 +893,7 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> {
let lt = parse!(self, integer_62);
self.print_lifetime_from_index(lt)
} else if self.eat(b'K') {
self.print_const()
self.print_const(false)
} else {
self.print_type()
}
Expand Down Expand Up @@ -861,7 +939,7 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> {
self.print_type()?;
if tag == b'A' {
self.print("; ")?;
self.print_const()?;
self.print_const(true)?;
}
self.print("]")?;
}
Expand Down Expand Up @@ -1001,11 +1079,28 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> {
Ok(())
}

fn print_const(&mut self) -> fmt::Result {
fn print_const(&mut self, in_value: bool) -> fmt::Result {
let tag = parse!(self, next);

parse!(self, push_depth);

// Only literals (and the names of `const` generic parameters, but they
// don't get mangled at all), can appear in generic argument position
// without any disambiguation, all other expressions require braces.
// To avoid duplicating the mapping between `tag` and what syntax gets
// used (especially any special-casing), every case that needs braces
// has to call `open_brace(self)?` (and the closing brace is automatic).
let mut opened_brace = false;
let mut open_brace_if_outside_expr = |this: &mut Self| {
// If this expression is nested in another, braces aren't required.
if in_value {
return Ok(());
}

opened_brace = true;
this.print("{")
};

match tag {
b'p' => self.print("_")?,

Expand Down Expand Up @@ -1033,13 +1128,82 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> {
None => invalid!(self),
}
}
b'e' => {
// NOTE(eddyb) a string literal `"..."` has type `&str`, so
// to get back the type `str`, `*"..."` syntax is needed
// (even if that may not be valid in Rust itself).
open_brace_if_outside_expr(self)?;
self.print("*")?;

self.print_const_str_literal()?;
}

b'R' | b'Q' => {
// NOTE(eddyb) this prints `"..."` instead of `&*"..."`, which
// is what `Re..._` would imply (see comment for `str` above).
if tag == b'R' && self.eat(b'e') {
self.print_const_str_literal()?;
} else {
open_brace_if_outside_expr(self)?;
self.print("&")?;
if tag != b'R' {
self.print("mut ")?;
}
self.print_const(true)?;
}
}
b'A' => {
open_brace_if_outside_expr(self)?;
self.print("[")?;
self.print_sep_list(|this| this.print_const(true), ", ")?;
self.print("]")?;
}
b'T' => {
open_brace_if_outside_expr(self)?;
self.print("(")?;
let count = self.print_sep_list(|this| this.print_const(true), ", ")?;
if count == 1 {
self.print(",")?;
}
self.print(")")?;
}
b'V' => {
open_brace_if_outside_expr(self)?;
self.print_path(true)?;
match parse!(self, next) {
b'U' => {}
b'T' => {
self.print("(")?;
self.print_sep_list(|this| this.print_const(true), ", ")?;
self.print(")")?;
}
b'S' => {
self.print(" { ")?;
self.print_sep_list(
|this| {
parse!(this, disambiguator);
let name = parse!(this, ident);
this.print(name)?;
this.print(": ")?;
this.print_const(true)
},
", ",
)?;
self.print(" }")?;
}
_ => invalid!(self),
}
}
b'B' => {
self.print_backref(Self::print_const)?;
self.print_backref(|this| this.print_const(in_value))?;
}
_ => invalid!(self),
}

if opened_brace {
self.print("}")?;
}

self.pop_depth();
Ok(())
}
Expand All @@ -1066,6 +1230,13 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> {

Ok(())
}

fn print_const_str_literal(&mut self) -> fmt::Result {
match parse!(self, hex_nibbles).try_parse_str_chars() {
Some(chars) => self.print_quoted_escaped_chars('"', chars),
None => invalid!(self),
}
}
}

#[cfg(test)]
Expand Down Expand Up @@ -1164,6 +1335,92 @@ mod tests {
t_const!("c2202_", "'∂'");
}

#[test]
fn demangle_const_str() {
t_const!("e616263_", "{*\"abc\"}");
t_const!("e27_", r#"{*"'"}"#);
t_const!("e090a_", "{*\"\\t\\n\"}");
t_const!("ee28882c3bc_", "{*\"∂ü\"}");
t_const!(
"ee183a1e18390e183ade1839be18394e1839ae18390e183935fe18392e18394e1839b\
e183a0e18398e18394e1839ae183985fe183a1e18390e18393e18398e1839ae18398_",
"{*\"საჭმელად_გემრიელი_სადილი\"}"
);
t_const!(
"ef09f908af09fa688f09fa686f09f90ae20c2a720f09f90b6f09f9192e298\
95f09f94a520c2a720f09fa7a1f09f929bf09f929af09f9299f09f929c_",
"{*\"🐊🦈🦆🐮 § 🐶👒☕🔥 § 🧡💛💚💙💜\"}"
);
}

// NOTE(eddyb) this uses the same strings as `demangle_const_str` and should
// be kept in sync with it - while a macro could be used to generate both
// `str` and `&str` tests, from a single list of strings, this seems clearer.
#[test]
fn demangle_const_ref_str() {
t_const!("Re616263_", "\"abc\"");
t_const!("Re27_", r#""'""#);
t_const!("Re090a_", "\"\\t\\n\"");
t_const!("Ree28882c3bc_", "\"∂ü\"");
t_const!(
"Ree183a1e18390e183ade1839be18394e1839ae18390e183935fe18392e18394e1839b\
e183a0e18398e18394e1839ae183985fe183a1e18390e18393e18398e1839ae18398_",
"\"საჭმელად_გემრიელი_სადილი\""
);
t_const!(
"Ref09f908af09fa688f09fa686f09f90ae20c2a720f09f90b6f09f9192e298\
95f09f94a520c2a720f09fa7a1f09f929bf09f929af09f9299f09f929c_",
"\"🐊🦈🦆🐮 § 🐶👒☕🔥 § 🧡💛💚💙💜\""
);
}

#[test]
fn demangle_const_ref() {
t_const!("Rp", "{&_}");
t_const!("Rh7b_", "{&123}");
t_const!("Rb0_", "{&false}");
t_const!("Rc58_", "{&'X'}");
t_const!("RRRh0_", "{&&&0}");
t_const!("RRRe_", "{&&\"\"}");
t_const!("QAE", "{&mut []}");
}

#[test]
fn demangle_const_array() {
t_const!("AE", "{[]}");
t_const!("Aj0_E", "{[0]}");
t_const!("Ah1_h2_h3_E", "{[1, 2, 3]}");
t_const!("ARe61_Re62_Re63_E", "{[\"a\", \"b\", \"c\"]}");
t_const!("AAh1_h2_EAh3_h4_EE", "{[[1, 2], [3, 4]]}");
}

#[test]
fn demangle_const_tuple() {
t_const!("TE", "{()}");
t_const!("Tj0_E", "{(0,)}");
t_const!("Th1_b0_E", "{(1, false)}");
t_const!(
"TRe616263_c78_RAh1_h2_h3_EE",
"{(\"abc\", 'x', &[1, 2, 3])}"
);
}

#[test]
fn demangle_const_adt() {
t_const!(
"VNvINtNtC4core6option6OptionjE4NoneU",
"{core::option::Option::<usize>::None}"
);
t_const!(
"VNvINtNtC4core6option6OptionjE4SomeTj0_E",
"{core::option::Option::<usize>::Some(0)}"
);
t_const!(
"VNtC3foo3BarS1sRe616263_2chc78_5sliceRAh1_h2_h3_EE",
"{foo::Bar { s: \"abc\", ch: 'x', slice: &[1, 2, 3] }}"
);
}

#[test]
fn demangle_exponential_explosion() {
// NOTE(eddyb) because of the prefix added by `t_nohash_type!` is
Expand Down