Skip to content

Commit

Permalink
[EXPERIMENT] Disallow all literal suffixes except the standard numeri…
Browse files Browse the repository at this point in the history
…c ones.

Partly out of curiosity, and partly because this would significantly
simplify parts of the lexer and parser.
  • Loading branch information
nnethercote committed Nov 2, 2022
1 parent d726c84 commit 1d0b161
Show file tree
Hide file tree
Showing 21 changed files with 301 additions and 329 deletions.
10 changes: 5 additions & 5 deletions compiler/rustc_ast/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1729,9 +1729,9 @@ pub enum LitFloatType {
Unsuffixed,
}

/// Literal kind.
///
/// E.g., `"foo"`, `42`, `12.34`, or `bool`.
/// Note that the entire literal (including the suffix) is considered when
/// deciding the `LitKind`. This means that float literals like `1f32` are
/// classified by this type as `Float`.
#[derive(Clone, Encodable, Decodable, Debug, Hash, Eq, PartialEq, HashStable_Generic)]
pub enum LitKind {
/// A string literal (`"foo"`). The symbol is unescaped, and so may differ
Expand All @@ -1745,8 +1745,8 @@ pub enum LitKind {
Char(char),
/// An integer literal (`1`).
Int(u128, LitIntType),
/// A float literal (`1f64` or `1E10f64`). Stored as a symbol rather than
/// `f64` so that `LitKind` can impl `Eq` and `Hash`.
/// A float literal (`1.0`, `1f64` or `1E10f64`). Stored as a symbol rather
/// than `f64` so that `LitKind` can impl `Eq` and `Hash`.
Float(Symbol, LitFloatType),
/// A boolean literal.
Bool(bool),
Expand Down
22 changes: 6 additions & 16 deletions compiler/rustc_ast/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,16 @@ pub enum Delimiter {
Invisible,
}

/// Note that the entire literal (including the suffix) is considered when
/// deciding the `LitKind`. This means that float literals like `1f32` are
/// classified by this type as `Float`.
#[derive(Clone, Copy, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
pub enum LitKind {
Bool, // AST only, must never appear in a `Token`
Byte,
Char,
Integer,
Float,
Integer, // e.g. `1`, `1u8`
Float, // e.g. `1.`, `1.0`, `1f32`, `1e3f32`
Str,
StrRaw(u8), // raw string delimited by `n` hash symbols
ByteStr,
Expand All @@ -77,7 +80,7 @@ pub enum LitKind {
pub struct Lit {
pub kind: LitKind,
pub symbol: Symbol,
pub suffix: Option<Symbol>,
pub suffix: Option<Symbol>, // njn: change to a type?
}

impl fmt::Display for Lit {
Expand Down Expand Up @@ -120,19 +123,6 @@ impl LitKind {
}
}

pub fn descr(self) -> &'static str {
match self {
Bool => panic!("literal token contains `Lit::Bool`"),
Byte => "byte",
Char => "char",
Integer => "integer",
Float => "float",
Str | StrRaw(..) => "string",
ByteStr | ByteStrRaw(..) => "byte string",
Err => "error",
}
}

pub(crate) fn may_have_suffix(self) -> bool {
matches!(self, Integer | Float | Err)
}
Expand Down
38 changes: 14 additions & 24 deletions compiler/rustc_ast/src/util/literal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,21 @@ use rustc_span::Span;

use std::ascii;

// njn: how much of this will be left?
pub enum LitError {
NotLiteral,
LexerError,
InvalidSuffix,
InvalidIntSuffix,
InvalidFloatSuffix,
NonDecimalFloat(u32),
IntTooLarge,
}

impl LitKind {
/// Converts literal token into a semantic literal.
pub fn from_token_lit(lit: token::Lit) -> Result<LitKind, LitError> {
let token::Lit { kind, symbol, suffix } = lit;
// njn: could even move the suffix into `kind`...
if suffix.is_some() && !kind.may_have_suffix() {
return Err(LitError::InvalidSuffix);
// njn: yuk
return Err(LitError::LexerError);
}

Ok(match kind {
Expand Down Expand Up @@ -259,33 +258,23 @@ fn strip_underscores(symbol: Symbol) -> Symbol {
symbol
}

fn filtered_float_lit(
symbol: Symbol,
suffix: Option<Symbol>,
base: u32,
) -> Result<LitKind, LitError> {
debug!("filtered_float_lit: {:?}, {:?}, {:?}", symbol, suffix, base);
if base != 10 {
return Err(LitError::NonDecimalFloat(base));
}
fn float_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> {
debug!("float_lit: {:?}, {:?}", symbol, suffix);
let symbol = strip_underscores(symbol);

Ok(match suffix {
Some(suf) => LitKind::Float(
symbol,
ast::LitFloatType::Suffixed(match suf {
sym::f32 => ast::FloatTy::F32,
sym::f64 => ast::FloatTy::F64,
_ => return Err(LitError::InvalidFloatSuffix),
_ => return Err(LitError::LexerError),
}),
),
None => LitKind::Float(symbol, ast::LitFloatType::Unsuffixed),
})
}

fn float_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> {
debug!("float_lit: {:?}, {:?}", symbol, suffix);
filtered_float_lit(strip_underscores(symbol), suffix, 10)
}

fn integer_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> {
debug!("integer_lit: {:?}, {:?}", symbol, suffix);
let symbol = strip_underscores(symbol);
Expand All @@ -312,10 +301,11 @@ fn integer_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitErr
sym::u32 => ast::LitIntType::Unsigned(ast::UintTy::U32),
sym::u64 => ast::LitIntType::Unsigned(ast::UintTy::U64),
sym::u128 => ast::LitIntType::Unsigned(ast::UintTy::U128),
// `1f64` and `2f32` etc. are valid float literals, and
// `fxxx` looks more like an invalid float literal than invalid integer literal.
_ if suf.as_str().starts_with('f') => return filtered_float_lit(symbol, suffix, base),
_ => return Err(LitError::InvalidIntSuffix),
_ =>
//return Err(LitError::LexerError), // njn: hmm
{
return Ok(ast::LitKind::Err);
}
},
_ => ast::LitIntType::Unsuffixed,
};
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_ast_lowering/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -962,6 +962,7 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> {
let lit = if let ExprKind::Lit(lit) = &expr.kind {
lit.clone()
} else {
// njn: use Lit::from_token_lit here?
Lit {
token_lit: token::Lit::new(token::LitKind::Err, kw::Empty, None),
kind: LitKind::Err,
Expand Down
19 changes: 18 additions & 1 deletion compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,13 @@ pub enum DocStyle {
Inner,
}

// Note that the suffix is *not* considered when deciding the `LiteralKind` in
// this type. This means that float literals like `1f32` are classified by this
// type as `Int`. (Compare against `rustc_ast::token::LitKind` and
// `rustc_ast::ast::LitKind.)
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum LiteralKind {
/// "12_u8", "0o100", "0b120i99"
/// "12_u8", "0o100", "0b120i99", "1f32".
Int { base: Base, empty_int: bool },
/// "12.34f32", "0b100.100"
Float { base: Base, empty_exponent: bool },
Expand All @@ -187,6 +191,19 @@ pub enum LiteralKind {
RawByteStr { n_hashes: Option<u8> },
}

impl LiteralKind {
pub fn descr(self) -> &'static str {
match self {
Int { .. } => "integer",
Float { .. } => "float",
Char { .. } => "char",
Byte { .. } => "byte",
Str { .. } | RawStr { .. } => "string",
ByteStr { .. } | RawByteStr { .. } => "byte string",
}
}
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum RawStrError {
/// Non `#` characters exist between `r` and `"`, e.g. `r##~"abcde"##`
Expand Down
Loading

0 comments on commit 1d0b161

Please sign in to comment.