diff --git a/src/v0.rs b/src/v0.rs index c83815b..3e88fa6 100644 --- a/src/v0.rs +++ b/src/v0.rs @@ -1,5 +1,5 @@ use core::convert::TryFrom; -use core::{char, fmt, iter, mem}; +use core::{char, fmt, iter, mem, str}; #[allow(unused_macros)] macro_rules! write { @@ -287,6 +287,84 @@ impl<'s> HexNibbles<'s> { } Some(v) } + + /// Decode a UTF-8 byte sequence (with each byte using a pair of nibbles) + /// into individual `char`s, returning `None` for invalid UTF-8. + fn try_parse_str_chars(&self) -> Option + 's> { + if self.nibbles.len() % 2 != 0 { + return None; + } + + // FIXME(eddyb) use `array_chunks` instead, when that becomes stable. + let mut bytes = self + .nibbles + .as_bytes() + .chunks_exact(2) + .map(|slice| match slice { + [a, b] => [a, b], + _ => unreachable!(), + }) + .map(|[&hi, &lo]| { + let half = |nibble: u8| (nibble as char).to_digit(16).unwrap() as u8; + (half(hi) << 4) | half(lo) + }); + + let chars = iter::from_fn(move || { + // As long as there are any bytes left, there's at least one more + // UTF-8-encoded `char` to decode (or the possibility of error). + bytes.next().map(|first_byte| -> Result { + // FIXME(eddyb) this `enum` and `fn` should be somewhere in `core`. + enum Utf8FirstByteError { + ContinuationByte, + TooLong, + } + fn utf8_len_from_first_byte(byte: u8) -> Result { + match byte { + 0x00..=0x7f => Ok(1), + 0x80..=0xbf => Err(Utf8FirstByteError::ContinuationByte), + 0xc0..=0xdf => Ok(2), + 0xe0..=0xef => Ok(3), + 0xf0..=0xf7 => Ok(4), + 0xf8..=0xff => Err(Utf8FirstByteError::TooLong), + } + } + + // Collect the appropriate amount of bytes (up to 4), according + // to the UTF-8 length implied by the first byte. + let utf8_len = utf8_len_from_first_byte(first_byte).map_err(|_| ())?; + let utf8 = &mut [first_byte, 0, 0, 0][..utf8_len]; + for i in 1..utf8_len { + utf8[i] = bytes.next().ok_or(())?; + } + + // Fully validate the UTF-8 sequence. + let s = str::from_utf8(utf8).map_err(|_| ())?; + + // Since we included exactly one UTF-8 sequence, and validation + // succeeded, `str::chars` should return exactly one `char`. + let mut chars = s.chars(); + match (chars.next(), chars.next()) { + (Some(c), None) => Ok(c), + _ => unreachable!( + "str::from_utf8({:?}) = {:?} was expected to have 1 char, \ + but {} chars were found", + utf8, + s, + s.chars().count() + ), + } + }) + }); + + // HACK(eddyb) doing a separate validation iteration like this might be + // wasteful, but it's easier to avoid starting to print a string literal + // in the first place, than to abort it mid-string. + if chars.clone().any(|r| r.is_err()) { + None + } else { + Some(chars.map(Result::unwrap)) + } + } } fn basic_type(tag: u8) -> Option<&'static str> { @@ -815,7 +893,7 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> { let lt = parse!(self, integer_62); self.print_lifetime_from_index(lt) } else if self.eat(b'K') { - self.print_const() + self.print_const(false) } else { self.print_type() } @@ -861,7 +939,7 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> { self.print_type()?; if tag == b'A' { self.print("; ")?; - self.print_const()?; + self.print_const(true)?; } self.print("]")?; } @@ -1001,11 +1079,28 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> { Ok(()) } - fn print_const(&mut self) -> fmt::Result { + fn print_const(&mut self, in_value: bool) -> fmt::Result { let tag = parse!(self, next); parse!(self, push_depth); + // Only literals (and the names of `const` generic parameters, but they + // don't get mangled at all), can appear in generic argument position + // without any disambiguation, all other expressions require braces. + // To avoid duplicating the mapping between `tag` and what syntax gets + // used (especially any special-casing), every case that needs braces + // has to call `open_brace(self)?` (and the closing brace is automatic). + let mut opened_brace = false; + let mut open_brace_if_outside_expr = |this: &mut Self| { + // If this expression is nested in another, braces aren't required. + if in_value { + return Ok(()); + } + + opened_brace = true; + this.print("{") + }; + match tag { b'p' => self.print("_")?, @@ -1033,13 +1128,82 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> { None => invalid!(self), } } + b'e' => { + // NOTE(eddyb) a string literal `"..."` has type `&str`, so + // to get back the type `str`, `*"..."` syntax is needed + // (even if that may not be valid in Rust itself). + open_brace_if_outside_expr(self)?; + self.print("*")?; + + self.print_const_str_literal()?; + } + b'R' | b'Q' => { + // NOTE(eddyb) this prints `"..."` instead of `&*"..."`, which + // is what `Re..._` would imply (see comment for `str` above). + if tag == b'R' && self.eat(b'e') { + self.print_const_str_literal()?; + } else { + open_brace_if_outside_expr(self)?; + self.print("&")?; + if tag != b'R' { + self.print("mut ")?; + } + self.print_const(true)?; + } + } + b'A' => { + open_brace_if_outside_expr(self)?; + self.print("[")?; + self.print_sep_list(|this| this.print_const(true), ", ")?; + self.print("]")?; + } + b'T' => { + open_brace_if_outside_expr(self)?; + self.print("(")?; + let count = self.print_sep_list(|this| this.print_const(true), ", ")?; + if count == 1 { + self.print(",")?; + } + self.print(")")?; + } + b'V' => { + open_brace_if_outside_expr(self)?; + self.print_path(true)?; + match parse!(self, next) { + b'U' => {} + b'T' => { + self.print("(")?; + self.print_sep_list(|this| this.print_const(true), ", ")?; + self.print(")")?; + } + b'S' => { + self.print(" { ")?; + self.print_sep_list( + |this| { + parse!(this, disambiguator); + let name = parse!(this, ident); + this.print(name)?; + this.print(": ")?; + this.print_const(true) + }, + ", ", + )?; + self.print(" }")?; + } + _ => invalid!(self), + } + } b'B' => { - self.print_backref(Self::print_const)?; + self.print_backref(|this| this.print_const(in_value))?; } _ => invalid!(self), } + if opened_brace { + self.print("}")?; + } + self.pop_depth(); Ok(()) } @@ -1066,6 +1230,13 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> { Ok(()) } + + fn print_const_str_literal(&mut self) -> fmt::Result { + match parse!(self, hex_nibbles).try_parse_str_chars() { + Some(chars) => self.print_quoted_escaped_chars('"', chars), + None => invalid!(self), + } + } } #[cfg(test)] @@ -1164,6 +1335,92 @@ mod tests { t_const!("c2202_", "'∂'"); } + #[test] + fn demangle_const_str() { + t_const!("e616263_", "{*\"abc\"}"); + t_const!("e27_", r#"{*"'"}"#); + t_const!("e090a_", "{*\"\\t\\n\"}"); + t_const!("ee28882c3bc_", "{*\"∂ü\"}"); + t_const!( + "ee183a1e18390e183ade1839be18394e1839ae18390e183935fe18392e18394e1839b\ + e183a0e18398e18394e1839ae183985fe183a1e18390e18393e18398e1839ae18398_", + "{*\"საჭმელად_გემრიელი_სადილი\"}" + ); + t_const!( + "ef09f908af09fa688f09fa686f09f90ae20c2a720f09f90b6f09f9192e298\ + 95f09f94a520c2a720f09fa7a1f09f929bf09f929af09f9299f09f929c_", + "{*\"🐊🦈🦆🐮 § 🐶👒☕🔥 § 🧡💛💚💙💜\"}" + ); + } + + // NOTE(eddyb) this uses the same strings as `demangle_const_str` and should + // be kept in sync with it - while a macro could be used to generate both + // `str` and `&str` tests, from a single list of strings, this seems clearer. + #[test] + fn demangle_const_ref_str() { + t_const!("Re616263_", "\"abc\""); + t_const!("Re27_", r#""'""#); + t_const!("Re090a_", "\"\\t\\n\""); + t_const!("Ree28882c3bc_", "\"∂ü\""); + t_const!( + "Ree183a1e18390e183ade1839be18394e1839ae18390e183935fe18392e18394e1839b\ + e183a0e18398e18394e1839ae183985fe183a1e18390e18393e18398e1839ae18398_", + "\"საჭმელად_გემრიელი_სადილი\"" + ); + t_const!( + "Ref09f908af09fa688f09fa686f09f90ae20c2a720f09f90b6f09f9192e298\ + 95f09f94a520c2a720f09fa7a1f09f929bf09f929af09f9299f09f929c_", + "\"🐊🦈🦆🐮 § 🐶👒☕🔥 § 🧡💛💚💙💜\"" + ); + } + + #[test] + fn demangle_const_ref() { + t_const!("Rp", "{&_}"); + t_const!("Rh7b_", "{&123}"); + t_const!("Rb0_", "{&false}"); + t_const!("Rc58_", "{&'X'}"); + t_const!("RRRh0_", "{&&&0}"); + t_const!("RRRe_", "{&&\"\"}"); + t_const!("QAE", "{&mut []}"); + } + + #[test] + fn demangle_const_array() { + t_const!("AE", "{[]}"); + t_const!("Aj0_E", "{[0]}"); + t_const!("Ah1_h2_h3_E", "{[1, 2, 3]}"); + t_const!("ARe61_Re62_Re63_E", "{[\"a\", \"b\", \"c\"]}"); + t_const!("AAh1_h2_EAh3_h4_EE", "{[[1, 2], [3, 4]]}"); + } + + #[test] + fn demangle_const_tuple() { + t_const!("TE", "{()}"); + t_const!("Tj0_E", "{(0,)}"); + t_const!("Th1_b0_E", "{(1, false)}"); + t_const!( + "TRe616263_c78_RAh1_h2_h3_EE", + "{(\"abc\", 'x', &[1, 2, 3])}" + ); + } + + #[test] + fn demangle_const_adt() { + t_const!( + "VNvINtNtC4core6option6OptionjE4NoneU", + "{core::option::Option::::None}" + ); + t_const!( + "VNvINtNtC4core6option6OptionjE4SomeTj0_E", + "{core::option::Option::::Some(0)}" + ); + t_const!( + "VNtC3foo3BarS1sRe616263_2chc78_5sliceRAh1_h2_h3_EE", + "{foo::Bar { s: \"abc\", ch: 'x', slice: &[1, 2, 3] }}" + ); + } + #[test] fn demangle_exponential_explosion() { // NOTE(eddyb) because of the prefix added by `t_nohash_type!` is