Skip to content

Commit c16f710

Browse files
committed
Remove token::Lit from ast::Lit.
`token::Lit` contains a `kind` field that indicates what kind of literal it is. `ast::Lit` currently wraps a `token::Lit` but also has its own `kind` field. Which means that `ast::Lit` encodes the literal kind in two different ways. This commit changes `ast::Lit` so it no longer wraps `token::Lit`. It now contains the `symbol` and `suffix` fields from `token::Lit`, but not the `kind` field, eliminating the redundancy. This requires extending `ast::LitKind::ByteStr` to include a `StrStyle` to properly distinguish between cooked and raw byte string literals at the `ast::Lit` level.
1 parent 78a891d commit c16f710

File tree

24 files changed

+137
-103
lines changed

24 files changed

+137
-103
lines changed

Diff for: compiler/rustc_ast/src/ast.rs

+45-14
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ pub use GenericArgs::*;
2323
pub use UnsafeSource::*;
2424

2525
use crate::ptr::P;
26-
use crate::token::{self, CommentKind, Delimiter};
26+
use crate::token::{CommentKind, Delimiter};
2727
use crate::tokenstream::{DelimSpan, LazyTokenStream, TokenStream};
2828
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
2929
use rustc_data_structures::stack::ensure_sufficient_stack;
@@ -1719,15 +1719,49 @@ pub enum StrStyle {
17191719
/// An AST literal.
17201720
#[derive(Clone, Encodable, Decodable, Debug, HashStable_Generic)]
17211721
pub struct Lit {
1722-
/// The original literal token as written in source code.
1723-
pub token_lit: token::Lit,
1722+
/// The original literal as written in the source code.
1723+
pub symbol: Symbol,
1724+
/// The original suffix as written in the source code.
1725+
pub suffix: Option<Symbol>,
17241726
/// The "semantic" representation of the literal lowered from the original tokens.
17251727
/// Strings are unescaped, hexadecimal forms are eliminated, etc.
1726-
/// FIXME: Remove this and only create the semantic representation during lowering to HIR.
17271728
pub kind: LitKind,
17281729
pub span: Span,
17291730
}
17301731

1732+
impl fmt::Display for Lit {
1733+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1734+
let Lit { kind, symbol, suffix, .. } = self;
1735+
match kind {
1736+
LitKind::Byte(_) => write!(f, "b'{}'", symbol)?,
1737+
LitKind::Char(_) => write!(f, "'{}'", symbol)?,
1738+
LitKind::Str(_, StrStyle::Cooked) => write!(f, "\"{}\"", symbol)?,
1739+
LitKind::Str(_, StrStyle::Raw(n)) => write!(
1740+
f,
1741+
"r{delim}\"{string}\"{delim}",
1742+
delim = "#".repeat(*n as usize),
1743+
string = symbol
1744+
)?,
1745+
LitKind::ByteStr(_, StrStyle::Cooked) => write!(f, "b\"{}\"", symbol)?,
1746+
LitKind::ByteStr(_, StrStyle::Raw(n)) => write!(
1747+
f,
1748+
"br{delim}\"{string}\"{delim}",
1749+
delim = "#".repeat(*n as usize),
1750+
string = symbol
1751+
)?,
1752+
LitKind::Int(..) | LitKind::Float(..) | LitKind::Bool(..) | LitKind::Err => {
1753+
write!(f, "{}", symbol)?
1754+
}
1755+
}
1756+
1757+
if let Some(suffix) = suffix {
1758+
write!(f, "{}", suffix)?;
1759+
}
1760+
1761+
Ok(())
1762+
}
1763+
}
1764+
17311765
/// Same as `Lit`, but restricted to string literals.
17321766
#[derive(Clone, Copy, Encodable, Decodable, Debug)]
17331767
pub struct StrLit {
@@ -1737,18 +1771,14 @@ pub struct StrLit {
17371771
pub suffix: Option<Symbol>,
17381772
pub span: Span,
17391773
/// The unescaped "semantic" representation of the literal lowered from the original token.
1740-
/// FIXME: Remove this and only create the semantic representation during lowering to HIR.
17411774
pub symbol_unescaped: Symbol,
17421775
}
17431776

17441777
impl StrLit {
17451778
pub fn as_lit(&self) -> Lit {
1746-
let token_kind = match self.style {
1747-
StrStyle::Cooked => token::Str,
1748-
StrStyle::Raw(n) => token::StrRaw(n),
1749-
};
17501779
Lit {
1751-
token_lit: token::Lit::new(token_kind, self.symbol, self.suffix),
1780+
symbol: self.symbol,
1781+
suffix: self.suffix,
17521782
span: self.span,
17531783
kind: LitKind::Str(self.symbol_unescaped, self.style),
17541784
}
@@ -1785,8 +1815,9 @@ pub enum LitKind {
17851815
/// A string literal (`"foo"`). The symbol is unescaped, and so may differ
17861816
/// from the original token's symbol.
17871817
Str(Symbol, StrStyle),
1788-
/// A byte string (`b"foo"`).
1789-
ByteStr(Lrc<[u8]>),
1818+
/// A byte string (`b"foo"`). Not stored as a symbol because it might be
1819+
/// non-utf8, and symbols only allow utf8 strings.
1820+
ByteStr(Lrc<[u8]>, StrStyle),
17901821
/// A byte char (`b'f'`).
17911822
Byte(u8),
17921823
/// A character literal (`'a'`).
@@ -1810,7 +1841,7 @@ impl LitKind {
18101841

18111842
/// Returns `true` if this literal is byte literal string.
18121843
pub fn is_bytestr(&self) -> bool {
1813-
matches!(self, LitKind::ByteStr(_))
1844+
matches!(self, LitKind::ByteStr(..))
18141845
}
18151846

18161847
/// Returns `true` if this is a numeric literal.
@@ -3084,7 +3115,7 @@ mod size_asserts {
30843115
static_assert_size!(Impl, 200);
30853116
static_assert_size!(Item, 184);
30863117
static_assert_size!(ItemKind, 112);
3087-
static_assert_size!(Lit, 48);
3118+
static_assert_size!(Lit, 40);
30883119
static_assert_size!(LitKind, 24);
30893120
static_assert_size!(Local, 72);
30903121
static_assert_size!(Param, 40);

Diff for: compiler/rustc_ast/src/util/literal.rs

+32-14
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
//! Code related to parsing literals.
22
3-
use crate::ast::{self, Lit, LitKind};
3+
use crate::ast::{self, Lit, LitKind, StrStyle};
44
use crate::token::{self, Token};
5-
65
use rustc_lexer::unescape::{unescape_byte, unescape_char};
76
use rustc_lexer::unescape::{unescape_byte_literal, unescape_literal, Mode};
87
use rustc_span::symbol::{kw, sym, Symbol};
98
use rustc_span::Span;
10-
119
use std::ascii;
1210

1311
pub enum LitError {
@@ -120,9 +118,9 @@ impl LitKind {
120118
}
121119
});
122120
error?;
123-
LitKind::ByteStr(buf.into())
121+
LitKind::ByteStr(buf.into(), StrStyle::Cooked)
124122
}
125-
token::ByteStrRaw(_) => {
123+
token::ByteStrRaw(n) => {
126124
let s = symbol.as_str();
127125
let bytes = if s.contains('\r') {
128126
let mut buf = Vec::with_capacity(s.len());
@@ -143,7 +141,7 @@ impl LitKind {
143141
symbol.to_string().into_bytes()
144142
};
145143

146-
LitKind::ByteStr(bytes.into())
144+
LitKind::ByteStr(bytes.into(), StrStyle::Raw(n))
147145
}
148146
token::Err => LitKind::Err,
149147
})
@@ -162,15 +160,19 @@ impl LitKind {
162160
(token::Str, symbol, None)
163161
}
164162
LitKind::Str(symbol, ast::StrStyle::Raw(n)) => (token::StrRaw(n), symbol, None),
165-
LitKind::ByteStr(ref bytes) => {
163+
LitKind::ByteStr(ref bytes, ast::StrStyle::Cooked) => {
166164
let string = bytes
167165
.iter()
168-
.cloned()
166+
.copied()
169167
.flat_map(ascii::escape_default)
170168
.map(Into::<char>::into)
171169
.collect::<String>();
172170
(token::ByteStr, Symbol::intern(&string), None)
173171
}
172+
LitKind::ByteStr(ref bytes, ast::StrStyle::Raw(n)) => {
173+
let string = bytes.iter().copied().map(Into::<char>::into).collect::<String>();
174+
(token::ByteStrRaw(n), Symbol::intern(&string), None)
175+
}
174176
LitKind::Byte(byte) => {
175177
let string: String = ascii::escape_default(byte).map(Into::<char>::into).collect();
176178
(token::Byte, Symbol::intern(&string), None)
@@ -210,7 +212,12 @@ impl LitKind {
210212
impl Lit {
211213
/// Converts literal token into an AST literal.
212214
pub fn from_token_lit(token_lit: token::Lit, span: Span) -> Result<Lit, LitError> {
213-
Ok(Lit { token_lit, kind: LitKind::from_token_lit(token_lit)?, span })
215+
Ok(Lit {
216+
symbol: token_lit.symbol,
217+
suffix: token_lit.suffix,
218+
kind: LitKind::from_token_lit(token_lit)?,
219+
span,
220+
})
214221
}
215222

216223
/// Converts arbitrary token into an AST literal.
@@ -240,16 +247,27 @@ impl Lit {
240247
/// This function is used when the original token doesn't exist (e.g. the literal is created
241248
/// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing).
242249
pub fn from_lit_kind(kind: LitKind, span: Span) -> Lit {
243-
Lit { token_lit: kind.to_token_lit(), kind, span }
250+
let token_lit = kind.to_token_lit();
251+
Lit { symbol: token_lit.symbol, suffix: token_lit.suffix, kind, span }
244252
}
245253

246254
/// Losslessly convert an AST literal into a token.
247255
pub fn to_token(&self) -> Token {
248-
let kind = match self.token_lit.kind {
249-
token::Bool => token::Ident(self.token_lit.symbol, false),
250-
_ => token::Literal(self.token_lit),
256+
let symbol = self.symbol;
257+
let suffix = self.suffix;
258+
let kind = match self.kind {
259+
LitKind::Bool(_) => return Token::new(token::Ident(symbol, false), self.span),
260+
LitKind::Str(_, StrStyle::Cooked) => token::LitKind::Str,
261+
LitKind::Str(_, StrStyle::Raw(n)) => token::LitKind::StrRaw(n),
262+
LitKind::ByteStr(_, StrStyle::Cooked) => token::LitKind::ByteStr,
263+
LitKind::ByteStr(_, StrStyle::Raw(n)) => token::LitKind::ByteStrRaw(n),
264+
LitKind::Byte(_) => token::LitKind::Byte,
265+
LitKind::Char(_) => token::LitKind::Char,
266+
LitKind::Int(..) => token::LitKind::Integer,
267+
LitKind::Float(..) => token::LitKind::Float,
268+
LitKind::Err => token::LitKind::Err,
251269
};
252-
Token::new(kind, self.span)
270+
Token::new(token::Literal(token::Lit { kind, symbol, suffix }), self.span)
253271
}
254272
}
255273

Diff for: compiler/rustc_ast_lowering/src/lib.rs

+1-5
Original file line numberDiff line numberDiff line change
@@ -925,11 +925,7 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> {
925925
let lit = if let ExprKind::Lit(lit) = &expr.kind {
926926
lit.clone()
927927
} else {
928-
Lit {
929-
token_lit: token::Lit::new(token::LitKind::Err, kw::Empty, None),
930-
kind: LitKind::Err,
931-
span: DUMMY_SP,
932-
}
928+
Lit { symbol: kw::Empty, suffix: None, kind: LitKind::Err, span: DUMMY_SP }
933929
};
934930
MacArgs::Eq(eq_span, MacArgsEq::Hir(lit))
935931
}

Diff for: compiler/rustc_ast_pretty/src/pprust/state.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,7 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
372372

373373
fn print_literal(&mut self, lit: &ast::Lit) {
374374
self.maybe_print_comment(lit.span.lo());
375-
self.word(lit.token_lit.to_string())
375+
self.word(lit.to_string())
376376
}
377377

378378
fn print_string(&mut self, st: &str, style: ast::StrStyle) {

Diff for: compiler/rustc_builtin_macros/src/concat_bytes.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ fn handle_array_element(
9090
) if val <= u8::MAX.into() => Some(val as u8),
9191

9292
ast::LitKind::Byte(val) => Some(val),
93-
ast::LitKind::ByteStr(_) => {
93+
ast::LitKind::ByteStr(..) => {
9494
if !*has_errors {
9595
cx.struct_span_err(expr.span, "cannot concatenate doubly nested array")
9696
.note("byte strings are treated as arrays of bytes")
@@ -157,7 +157,7 @@ pub fn expand_concat_bytes(
157157
ast::LitKind::Byte(val) => {
158158
accumulator.push(val);
159159
}
160-
ast::LitKind::ByteStr(ref bytes) => {
160+
ast::LitKind::ByteStr(ref bytes, _) => {
161161
accumulator.extend_from_slice(&bytes);
162162
}
163163
_ => {

Diff for: compiler/rustc_builtin_macros/src/derive.rs

+6-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::cfg_eval::cfg_eval;
22

33
use rustc_ast as ast;
4-
use rustc_ast::{attr, token, GenericParamKind, ItemKind, MetaItemKind, NestedMetaItem, StmtKind};
4+
use rustc_ast::{attr, GenericParamKind, ItemKind, MetaItemKind, NestedMetaItem, StmtKind};
55
use rustc_errors::{struct_span_err, Applicability};
66
use rustc_expand::base::{Annotatable, ExpandResult, ExtCtxt, Indeterminate, MultiItemModifier};
77
use rustc_feature::AttributeTemplate;
@@ -126,9 +126,11 @@ fn report_bad_target(sess: &Session, item: &Annotatable, span: Span) -> bool {
126126
}
127127

128128
fn report_unexpected_literal(sess: &Session, lit: &ast::Lit) {
129-
let help_msg = match lit.token_lit.kind {
130-
token::Str if rustc_lexer::is_ident(lit.token_lit.symbol.as_str()) => {
131-
format!("try using `#[derive({})]`", lit.token_lit.symbol)
129+
let help_msg = match lit.kind {
130+
ast::LitKind::Str(_, ast::StrStyle::Cooked)
131+
if rustc_lexer::is_ident(lit.symbol.as_str()) =>
132+
{
133+
format!("try using `#[derive({})]`", lit.symbol)
132134
}
133135
_ => "for example, write `#[derive(Debug)]` for `Debug`".to_string(),
134136
};

Diff for: compiler/rustc_expand/src/base.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1217,7 +1217,7 @@ pub fn expr_to_spanned_string<'a>(
12171217
Err(match expr.kind {
12181218
ast::ExprKind::Lit(ref l) => match l.kind {
12191219
ast::LitKind::Str(s, style) => return Ok((s, style, expr.span)),
1220-
ast::LitKind::ByteStr(_) => {
1220+
ast::LitKind::ByteStr(..) => {
12211221
let mut err = cx.struct_span_err(l.span, err_msg);
12221222
err.span_suggestion(
12231223
expr.span.shrink_to_lo(),

Diff for: compiler/rustc_expand/src/build.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ impl<'a> ExtCtxt<'a> {
358358
}
359359

360360
pub fn expr_byte_str(&self, sp: Span, bytes: Vec<u8>) -> P<ast::Expr> {
361-
self.expr_lit(sp, ast::LitKind::ByteStr(Lrc::from(bytes)))
361+
self.expr_lit(sp, ast::LitKind::ByteStr(Lrc::from(bytes), ast::StrStyle::Cooked))
362362
}
363363

364364
/// `[expr1, expr2, ...]`

Diff for: compiler/rustc_expand/src/proc_macro_server.rs

+6-13
Original file line numberDiff line numberDiff line change
@@ -499,26 +499,19 @@ impl server::TokenStream for Rustc<'_, '_> {
499499
// We don't use `TokenStream::from_ast` as the tokenstream currently cannot
500500
// be recovered in the general case.
501501
match &expr.kind {
502-
ast::ExprKind::Lit(l) if l.token_lit.kind == token::Bool => {
503-
Ok(tokenstream::TokenStream::token_alone(
504-
token::Ident(l.token_lit.symbol, false),
505-
l.span,
506-
))
507-
}
508502
ast::ExprKind::Lit(l) => {
509-
Ok(tokenstream::TokenStream::token_alone(token::Literal(l.token_lit), l.span))
503+
let token = l.to_token();
504+
Ok(tokenstream::TokenStream::token_alone(token.kind, token.span))
510505
}
511506
ast::ExprKind::Unary(ast::UnOp::Neg, e) => match &e.kind {
512-
ast::ExprKind::Lit(l) => match l.token_lit {
513-
token::Lit { kind: token::Integer | token::Float, .. } => {
507+
ast::ExprKind::Lit(l) => match l.kind {
508+
ast::LitKind::Int(..) | ast::LitKind::Float(..) => {
509+
let token = l.to_token();
514510
Ok(Self::TokenStream::from_iter([
515511
// FIXME: The span of the `-` token is lost when
516512
// parsing, so we cannot faithfully recover it here.
517513
tokenstream::TokenTree::token_alone(token::BinOp(token::Minus), e.span),
518-
tokenstream::TokenTree::token_alone(
519-
token::Literal(l.token_lit),
520-
l.span,
521-
),
514+
tokenstream::TokenTree::token_alone(token.kind, token.span),
522515
]))
523516
}
524517
_ => Err(()),

Diff for: compiler/rustc_lint/src/hidden_unicode_codepoints.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,8 @@ impl EarlyLintPass for HiddenUnicodeCodepoints {
120120
fn check_expr(&mut self, cx: &EarlyContext<'_>, expr: &ast::Expr) {
121121
// byte strings are already handled well enough by `EscapeError::NonAsciiCharInByteString`
122122
let (text, span, padding) = match &expr.kind {
123-
ast::ExprKind::Lit(ast::Lit { token_lit, kind, span }) => {
124-
let text = token_lit.symbol;
123+
ast::ExprKind::Lit(ast::Lit { symbol, kind, span, .. }) => {
124+
let text = symbol;
125125
if !contains_text_flow_control_chars(text.as_str()) {
126126
return;
127127
}
@@ -136,6 +136,6 @@ impl EarlyLintPass for HiddenUnicodeCodepoints {
136136
}
137137
_ => return,
138138
};
139-
self.lint_text_direction_codepoint(cx, text, *span, padding, true, "literal");
139+
self.lint_text_direction_codepoint(cx, *text, *span, padding, true, "literal");
140140
}
141141
}

Diff for: compiler/rustc_mir_build/src/build/expr/as_constant.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -122,14 +122,14 @@ pub(crate) fn lit_to_mir_constant<'tcx>(
122122
let allocation = tcx.intern_const_alloc(allocation);
123123
ConstValue::Slice { data: allocation, start: 0, end: s.len() }
124124
}
125-
(ast::LitKind::ByteStr(data), ty::Ref(_, inner_ty, _))
125+
(ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _))
126126
if matches!(inner_ty.kind(), ty::Slice(_)) =>
127127
{
128128
let allocation = Allocation::from_bytes_byte_aligned_immutable(data as &[u8]);
129129
let allocation = tcx.intern_const_alloc(allocation);
130130
ConstValue::Slice { data: allocation, start: 0, end: data.len() }
131131
}
132-
(ast::LitKind::ByteStr(data), ty::Ref(_, inner_ty, _)) if inner_ty.is_array() => {
132+
(ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _)) if inner_ty.is_array() => {
133133
let id = tcx.allocate_bytes(data);
134134
ConstValue::Scalar(Scalar::from_pointer(id.into(), &tcx))
135135
}

Diff for: compiler/rustc_mir_build/src/thir/constant.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,13 @@ pub(crate) fn lit_to_const<'tcx>(
2424
let str_bytes = s.as_str().as_bytes();
2525
ty::ValTree::from_raw_bytes(tcx, str_bytes)
2626
}
27-
(ast::LitKind::ByteStr(data), ty::Ref(_, inner_ty, _))
27+
(ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _))
2828
if matches!(inner_ty.kind(), ty::Slice(_)) =>
2929
{
3030
let bytes = data as &[u8];
3131
ty::ValTree::from_raw_bytes(tcx, bytes)
3232
}
33-
(ast::LitKind::ByteStr(data), ty::Ref(_, inner_ty, _)) if inner_ty.is_array() => {
33+
(ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _)) if inner_ty.is_array() => {
3434
let bytes = data as &[u8];
3535
ty::ValTree::from_raw_bytes(tcx, bytes)
3636
}

Diff for: compiler/rustc_parse/src/parser/expr.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1723,8 +1723,8 @@ impl<'a> Parser<'a> {
17231723
Some(lit) => match lit.kind {
17241724
ast::LitKind::Str(symbol_unescaped, style) => Ok(ast::StrLit {
17251725
style,
1726-
symbol: lit.token_lit.symbol,
1727-
suffix: lit.token_lit.suffix,
1726+
symbol: lit.symbol,
1727+
suffix: lit.suffix,
17281728
span: lit.span,
17291729
symbol_unescaped,
17301730
}),

0 commit comments

Comments
 (0)