From 307f90c323193ef2eb3a935e7118769668b85235 Mon Sep 17 00:00:00 2001 From: Youssef Khalil <38569107+DarkyEG@users.noreply.github.com> Date: Sun, 9 Apr 2023 00:13:12 +0200 Subject: [PATCH 01/19] chore: define the grammar --- docs/grammar.txt | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 docs/grammar.txt diff --git a/docs/grammar.txt b/docs/grammar.txt new file mode 100644 index 0000000..de4cb35 --- /dev/null +++ b/docs/grammar.txt @@ -0,0 +1,40 @@ +program → item* +item → fnDecl | structDecl | enumDecl | implDecl | useDecl | modDecl +structDecl → "struct" IDENTIFIER ("{" structField* "}" | "(" structField* ")" | ";") +structField → IDENTIFIER ":" type "," ","? +enumDecl → "enum" IDENTIFIER "{" enumVariant* "}" +enumVariant → IDENTIFIER ( "(" type ( "," type )_ ")" )? "," ","? +implDecl → "impl" IDENTIFIER "{" implMethod_ "}" +implMethod → fnDecl +useDecl → "use" PATH ";" | "use" PATH "as" IDENTIFIER ";" +modDecl → "mod" IDENTIFIER "{" item* "}" +PATH → IDENTIFIER ( "::" IDENTIFIER )* +letDecl → "let" IDENTIFIER ( ":" type )? ( "=" expression )? ";" +constDecl → "const" IDENTIFIER ( ":" type )? "=" expression ";" +fnDecl → "fn" IDENTIFIER "(" parameters? ")" ( "->" type )? blockStmt +parameters → parameter ( "," parameter )_ +parameter → PATH ":" type +ifStmt → "if" expression blockStmt ( "else" blockStmt )? +matchStmt → "match" expression "{" matchCase_ "}" +matchCase → expression "=>" blockStmt ";" +loopStmt → "loop" blockStmt +whileStmt → "while" expression blockStmt +forStmt → "for" IDENTIFIER "in" expression blockStmt +blockStmt → "{" statement* "}" +exprStmt → expression ";" +expression → literal | IDENTIFIER | unaryExpr | binaryExpr | groupExpr | callExpr | accessExpr +literal → INT | FLOAT | STRING | "true" | "false" +unaryExpr → ( "-" | "!" ) expression +binaryExpr → expression operator expression +operator → "+" | "-" | "*" | "/" | "%" | "==" | "!=" | ">" | "<" | ">=" | "<=" | "&&" | "||" | "&" | "|" | "^" | "<<" | ">>" | "+=" | "-=" | "_=" | "/=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" +groupExpr → "(" expression ")" +callExpr → expression "(" arguments? ")" +arguments → expression ( "," expression )_ +accessExpr → expression ( "." IDENTIFIER | "[" expression "]" )_ +type → "int" | "float" | "bool" | "str" | IDENTIFIER +IDENTIFIER → ALPHA ( ALPHA | DIGIT | "\_" )_ +ALPHA → "a" ... "z" | "A" ... "Z" +DIGIT → "0" ... "9" +INT → DIGIT+ +FLOAT → DIGIT+ "." DIGIT* +STRING → '"' * '"' From cf85b795bb7a58388c4b53b7be2796ed52f46fbf Mon Sep 17 00:00:00 2001 From: Youssef Khalil <38569107+DarkyEG@users.noreply.github.com> Date: Tue, 11 Apr 2023 02:56:23 +0200 Subject: [PATCH 02/19] chore: deps --- .cargo/config.toml | 2 ++ Cargo.toml | 2 +- compiler/ast/Cargo.toml | 7 +++++++ compiler/lexer/Cargo.toml | 9 +++++++++ compiler/span/Cargo.toml | 9 +++++++++ 5 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 .cargo/config.toml create mode 100644 compiler/ast/Cargo.toml create mode 100644 compiler/lexer/Cargo.toml create mode 100644 compiler/span/Cargo.toml diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..b23105b --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[registries.crates-io] +protocol = "sparse" \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 0847d6b..4626465 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,2 +1,2 @@ [workspace] -members = ["compiler/raccoon"] +members = ["compiler/lexer", "compiler/ast", "compiler/span"] diff --git a/compiler/ast/Cargo.toml b/compiler/ast/Cargo.toml new file mode 100644 index 0000000..d0f1931 --- /dev/null +++ b/compiler/ast/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "raccoon_ast" +version = "0.0.0" +edition = "2021" + +[dependencies] +raccoon_span = { path = "../span" } diff --git a/compiler/lexer/Cargo.toml b/compiler/lexer/Cargo.toml new file mode 100644 index 0000000..f4d3143 --- /dev/null +++ b/compiler/lexer/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "raccoon_lexer" +version = "0.0.0" +edition = "2021" + + +[dependencies] +raccoon_span = { path = "../span" } +raccoon_ast = { path = "../ast" } diff --git a/compiler/span/Cargo.toml b/compiler/span/Cargo.toml new file mode 100644 index 0000000..28beb59 --- /dev/null +++ b/compiler/span/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "raccoon_span" +version = "0.0.0" +edition = "2021" + +[dependencies] +fxhash = "0.2.1" +lazy_static = "1.4.0" +typed-arena = "2.0.2" From 72400784b36588d8c536696be995edaa2d2e2db8 Mon Sep 17 00:00:00 2001 From: Youssef Khalil <38569107+DarkyEG@users.noreply.github.com> Date: Tue, 11 Apr 2023 02:57:28 +0200 Subject: [PATCH 03/19] feat: implement lexer --- compiler/lexer/src/lib.rs | 271 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 271 insertions(+) create mode 100644 compiler/lexer/src/lib.rs diff --git a/compiler/lexer/src/lib.rs b/compiler/lexer/src/lib.rs new file mode 100644 index 0000000..de86908 --- /dev/null +++ b/compiler/lexer/src/lib.rs @@ -0,0 +1,271 @@ +//! The lexer module provides a [Lexer] struct that can be used to lex tokens from a source code string. + +use std::str::Chars; + +use raccoon_ast::{BinOpToken, CondOpToken, Delimiter, Lit, Token, TokenKind, UnOpToken}; +use raccoon_span::{BytePos, Span, Symbol}; +use TokenKind::*; + +/// A `Cursor` is a wrapper around a [Chars] iterator that provides some additional methods +/// for lexing purposes and tracks the current position in the source code for error reporting. +#[derive(Debug, Clone)] +pub struct Cursor<'a> { + input: Chars<'a>, + len: usize, +} + +/// A `Lexer` is a wrapper around a [Cursor] that provides methods for lexing tokens +/// from the source code and tracks the current position in the source code for error reporting +pub struct Lexer<'a> { + pub cursor: Cursor<'a>, +} + +impl<'a> Cursor<'a> { + pub fn new(input: &'a str) -> Cursor<'a> { + Cursor { + input: input.chars(), + len: input.len(), + } + } + + #[inline] + pub fn peek(&self, ch: char) -> bool { + self.input.clone().next() == Some(ch) + } + + #[inline] + pub fn bump(&mut self) { + self.next(); + } + + #[inline] + pub fn bump_by(&mut self, n: usize) { + for _ in 0..n { + self.bump(); + } + } + + #[inline] + pub fn pos(&self) -> BytePos { + BytePos((self.len - self.input.as_str().len()) as u32) + } +} + +impl Iterator for Cursor<'_> { + type Item = char; + + fn next(&mut self) -> Option { + self.input.next() + } +} + +impl<'a> Lexer<'a> { + pub fn new(input: &'a str) -> Lexer<'a> { + Lexer { + cursor: Cursor::new(input), + } + } + + pub fn advance(&mut self) -> Token { + let start = self.cursor.pos(); + let Some(ch) = self.cursor.next() else { + return Token::new(TokenKind::Eof, Span::new(start, start)); + }; + + // A macro to make it easier to write if-else chains that return early + macro_rules! if_cond { + ($char:expr,$if:expr, $else:expr) => { + if self.cursor.peek($char) { + self.cursor.bump(); + $if + } else { + $else + } + }; + } + + let kind = match ch { + '=' => if_cond!('=', CondOp(CondOpToken::Eq), Eq), + '+' => if_cond!('=', BinOpEq(BinOpToken::Add), BinOp(BinOpToken::Add)), + '-' => if_cond!('=', BinOpEq(BinOpToken::Sub), BinOp(BinOpToken::Sub)), + '*' => if_cond!('=', BinOpEq(BinOpToken::Mul), BinOp(BinOpToken::Mul)), + '^' => if_cond!('=', BinOpEq(BinOpToken::Xor), BinOp(BinOpToken::Xor)), + '%' => if_cond!('=', BinOpEq(BinOpToken::Rem), BinOp(BinOpToken::Rem)), + '!' => if_cond!('=', CondOp(CondOpToken::Ne), UnOp(UnOpToken::Not)), + ':' => if_cond!(':', ColonColon, Colon), + '.' => Dot, + ',' => Comma, + ';' => Semi, + '(' => OpenDelim(Delimiter::Parenthesis), + ')' => CloseDelim(Delimiter::Parenthesis), + '{' => OpenDelim(Delimiter::Brace), + '}' => CloseDelim(Delimiter::Brace), + '[' => OpenDelim(Delimiter::Bracket), + ']' => CloseDelim(Delimiter::Bracket), + + '/' => if_cond!( + '/', + return self.skip_inline_comment(), + if_cond!( + '*', + return self.skip_block_comment(), + if_cond!('=', BinOpEq(BinOpToken::Div), BinOp(BinOpToken::Div)) + ) + ), + '&' => if_cond!( + '&', + CondOp(CondOpToken::And), + if_cond!('=', BinOpEq(BinOpToken::And), BinOp(BinOpToken::And)) + ), + '|' => if_cond!( + '|', + CondOp(CondOpToken::Or), + if_cond!('=', BinOpEq(BinOpToken::Or), BinOp(BinOpToken::Or)) + ), + '>' => if_cond!( + '=', + CondOp(CondOpToken::Ge), + if_cond!( + '>', + if_cond!('=', BinOpEq(BinOpToken::Shr), BinOp(BinOpToken::Shr)), + CondOp(CondOpToken::Gt) + ) + ), + '<' => if_cond!( + '=', + CondOp(CondOpToken::Le), + if_cond!( + '<', + if_cond!('=', BinOpEq(BinOpToken::Shl), BinOp(BinOpToken::Shl)), + CondOp(CondOpToken::Lt) + ) + ), + '"' => return self.scan_string(true), + '0'..='9' => return self.scan_number(Some(ch), true), + 'a'..='z' | 'A'..='Z' | '_' => return self.scan_ident(Some(ch)), + ch if ch.is_whitespace() => return self.skip_whitespace(), + + // TODO: handle invalid character + _ => panic!("unexpected character: {}", ch), + }; + + Token::new(kind, Span::new(start, self.cursor.pos())) + } + + #[inline] + pub fn skip_whitespace(&mut self) -> Token { + while let Some(ch) = self.cursor.clone().next() { + if !ch.is_whitespace() { + break; + } + self.cursor.bump(); + } + + self.advance() + } + + pub fn scan_ident(&mut self, first: Option) -> Token { + let start = self.cursor.pos() - BytePos(if first.is_some() { 1 } else { 0 }); + let mut buf = if let Some(ch) = first { + ch.to_string() + } else { + String::new() + }; + + while let Some(ch) = self.cursor.clone().next() { + match ch { + 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => buf.push(ch), + _ => break, + } + self.cursor.bump(); + } + + let sym = Symbol::intern(&buf); + + Token::new(TokenKind::Ident(sym), Span::new(start, self.cursor.pos())) + } + + pub fn scan_number(&mut self, first: Option, scan_float: bool) -> Token { + let start = self.cursor.pos() - BytePos(if first.is_some() { 1 } else { 0 }); + let mut buf = if let Some(ch) = first { + ch.to_string() + } else { + String::new() + }; + let mut scanned_float = false; + + while let Some(ch) = self.cursor.clone().next() { + match ch { + '0'..='9' => buf.push(ch), + '.' => { + if scan_float { + buf.push(ch); + scanned_float = true; + } else { + break; + } + } + _ => break, + } + self.cursor.bump(); + } + + let sym = if scanned_float { + Lit::new_float(Symbol::intern(&buf)) + } else { + Lit::new_int(Symbol::intern(&buf)) + }; + + Token::new(TokenKind::Lit(sym), Span::new(start, self.cursor.pos())) + } + + #[inline] + pub fn scan_string(&mut self, inside_next: bool) -> Token { + let start = self.cursor.pos() - BytePos(if inside_next { 1 } else { 0 }); + let mut buf = String::new(); + let mut terminated = false; + for ch in &mut self.cursor { + if ch == '"' { + terminated = true; + break; + } + buf.push(ch); + } + if !terminated { + // TODO: update it to error handling + panic!("Unterminated string literal"); + } + + let sym = Lit::new_str(Symbol::intern(&buf)); + + Token::new(TokenKind::Lit(sym), Span::new(start, self.cursor.pos())) + } + + #[inline] + pub fn skip_inline_comment(&mut self) -> Token { + for ch in &mut self.cursor { + if ch == '\n' { + break; + } + } + self.advance() + } + + #[inline] + pub fn skip_block_comment(&mut self) -> Token { + let mut terminated = false; + while let Some(ch) = self.cursor.next() { + if ch == '*' && self.cursor.peek('/') { + terminated = true; + self.cursor.bump(); + break; + } + } + if !terminated { + // TODO: update it to error handling + panic!("Unterminated block comment"); + } + + self.advance() + } +} From 7bcbc77099259fa280bec108a234cb29afc4be7f Mon Sep 17 00:00:00 2001 From: Youssef Khalil <38569107+DarkyEG@users.noreply.github.com> Date: Tue, 11 Apr 2023 02:58:20 +0200 Subject: [PATCH 04/19] feat: implement lexer tokens --- compiler/ast/src/lib.rs | 6 ++ compiler/ast/src/token.rs | 205 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 211 insertions(+) create mode 100644 compiler/ast/src/lib.rs create mode 100644 compiler/ast/src/token.rs diff --git a/compiler/ast/src/lib.rs b/compiler/ast/src/lib.rs new file mode 100644 index 0000000..f868ad3 --- /dev/null +++ b/compiler/ast/src/lib.rs @@ -0,0 +1,6 @@ +//! The AST module contains the AST data structures and token definitions +//! used by the compiler. + +pub mod token; + +pub use token::*; diff --git a/compiler/ast/src/token.rs b/compiler/ast/src/token.rs new file mode 100644 index 0000000..dfd78d8 --- /dev/null +++ b/compiler/ast/src/token.rs @@ -0,0 +1,205 @@ +//! A token is a lexical unit of the source code. + +use raccoon_span::{Span, Symbol, DUMMY_SP}; + +pub const DUMMY_TOKEN: Token = Token::new(TokenKind::Dummy, DUMMY_SP); + +/// A token is a lexical unit of the source code. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Token { + pub kind: TokenKind, + pub span: Span, +} + +impl Token { + pub const fn new(kind: TokenKind, span: Span) -> Token { + Token { kind, span } + } + + pub fn is_eof(&self) -> bool { + self.kind == TokenKind::Eof + } +} + +/// A token kind. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum TokenKind { + /// A conditional operator. + CondOp(CondOpToken), + + /// A binary operator. + BinOp(BinOpToken), + + /// A binary operator with an assignment. + BinOpEq(BinOpToken), + + /// A unary operator. + UnOp(UnOpToken), + + /// `=`. + Eq, + + /// `.`. + Dot, + + /// `,`. + Comma, + + /// `;`. + Semi, + + /// `:`. + Colon, + + /// `::`. + ColonColon, + + /// `"`. + Quote, + + /// A literal. + Lit(Lit), + + /// An opening delimiter e.g. `(`. + OpenDelim(Delimiter), + + /// A closing delimiter e.g. `)`. + CloseDelim(Delimiter), + + /// An identifier. + Ident(Symbol), + + /// An end-of-file token. + Eof, + + /// A dummy token. + Dummy, +} + +/// A delimiter. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Delimiter { + /// `(...)`. + Parenthesis, + + /// `[...]`. + Bracket, + + /// `{...}`. + Brace, +} + +/// A literal value. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Lit { + pub kind: LitKind, + pub symbol: Symbol, +} +impl Lit { + pub const fn new(kind: LitKind, symbol: Symbol) -> Lit { + Lit { kind, symbol } + } + + pub fn new_str(symbol: Symbol) -> Lit { + Lit::new(LitKind::Str, symbol) + } + + pub fn new_int(symbol: Symbol) -> Lit { + Lit::new(LitKind::Int, symbol) + } + + pub fn new_float(symbol: Symbol) -> Lit { + Lit::new(LitKind::Float, symbol) + } +} + +/// A literal kind. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum LitKind { + /// An integer literal e.g. `1`. + Int, + + /// A floating point literal e.g. `1.0`. + Float, + + /// A string literal e.g. `"hello"`. + Str, + + /// A boolean literal e.g. `true`. + Bool, +} + +/// A conditional operator. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum CondOpToken { + /// `==` + Eq, + + /// `!=` + Ne, + + /// `<` + Lt, + + /// `<=` + Le, + + /// `>` + Gt, + + /// `>=` + Ge, + + /// `&&` + And, + + /// `||` + Or, +} + +/// A binary operator. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum BinOpToken { + /// `+` + Add, + + /// `-` + Sub, + + /// `*` + Mul, + + /// `/` + Div, + + /// `%` + Rem, + + /// `&` + And, + + /// `|` + Or, + + /// `^` + Xor, + + /// `<<` + Shl, + + /// `>>` + Shr, +} + +/// A unary operator. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum UnOpToken { + /// `!` + Not, + + /// `~` + NotBitwise, + + /// `-` + Neg, +} From 4fbc0768d1bbeb466af51124f31f318037bf9d61 Mon Sep 17 00:00:00 2001 From: Youssef Khalil <38569107+DarkyEG@users.noreply.github.com> Date: Tue, 11 Apr 2023 02:58:50 +0200 Subject: [PATCH 05/19] feat: implement symbol --- compiler/span/src/lib.rs | 92 +++++++++++++++++++++++++++ compiler/span/src/symbol.rs | 121 ++++++++++++++++++++++++++++++++++++ 2 files changed, 213 insertions(+) create mode 100644 compiler/span/src/lib.rs create mode 100644 compiler/span/src/symbol.rs diff --git a/compiler/span/src/lib.rs b/compiler/span/src/lib.rs new file mode 100644 index 0000000..e86eb65 --- /dev/null +++ b/compiler/span/src/lib.rs @@ -0,0 +1,92 @@ +//! This crate defines the [Span] and [BytePos] types, which are used +//! to represent a contiguous region of source text. +//! The [Span] type is used to represent a contiguous region of source text. +//! The [BytePos] type is used to represent a single byte position in the source text. +//! +mod symbol; + +use std::ops::{Add, Sub}; + +use lazy_static::lazy_static; +pub use symbol::*; + +pub const DUMMY_SP: Span = Span::new(BytePos(0), BytePos(0)); + +/// A `Span` represents a contiguous region of source text. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Span { + pub lo: BytePos, + pub hi: BytePos, +} + +/// A `BytePos` is a byte offset into the source text. +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] +pub struct BytePos(pub u32); + +/// A `GlobalSession` contains global data that is shared across all compilation sessions. +pub struct GlobalSession { + pub symbols: Interner, +} + +impl Span { + pub const fn new(lo: BytePos, hi: BytePos) -> Span { + Span { lo, hi } + } + + pub fn is_dummy(&self) -> bool { + *self == DUMMY_SP + } +} + +impl GlobalSession { + pub fn new() -> GlobalSession { + GlobalSession { + symbols: Interner::new(), + } + } +} +impl Default for GlobalSession { + fn default() -> Self { + Self::new() + } +} + +lazy_static! { + pub static ref GLOBAL_SESSION: GlobalSession = GlobalSession::new(); +} + +impl Add for BytePos { + type Output = BytePos; + + fn add(self, other: BytePos) -> BytePos { + BytePos(self.0 + other.0) + } +} + +impl Sub for BytePos { + type Output = BytePos; + + fn sub(self, other: BytePos) -> BytePos { + BytePos(self.0 - other.0) + } +} + +impl Add for BytePos { + type Output = BytePos; + + fn add(self, other: u32) -> BytePos { + BytePos(self.0 + other) + } +} + +impl From for BytePos { + fn from(pos: u32) -> BytePos { + BytePos(pos) + } +} + +impl From for BytePos { + fn from(pos: usize) -> BytePos { + BytePos(pos as u32) + } +} diff --git a/compiler/span/src/symbol.rs b/compiler/span/src/symbol.rs new file mode 100644 index 0000000..8729917 --- /dev/null +++ b/compiler/span/src/symbol.rs @@ -0,0 +1,121 @@ +//! This module defines the [Symbol] type, which is used to represent identifiers, keywords and other strings. + +#![allow(non_upper_case_globals)] + +use std::{fmt::Display, sync::Mutex}; + +use fxhash::FxHashMap; +use typed_arena::Arena; + +use crate::GLOBAL_SESSION; + +macro_rules! keywords { + ($($name:ident: $string:expr),* $(,)?) => { + pub mod kw { + lazy_static::lazy_static! { + $( + pub static ref $name: $crate::Symbol = $crate::Symbol::intern($string); + )* + } + } + }; +} + +keywords! { + Let: "let", + Const: "const", + If: "if", + Else: "else", + While: "while", + For: "for", + In: "in", + Loop: "loop", + Break: "break", + Continue: "continue", + Return: "return", + Fn: "fn", + Struct: "struct", + Enum: "enum", + +} + +/// A `Symbol` is an interned string that is used to represent identifiers and keywords. +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] +pub struct Symbol(u32); + +impl Symbol { + #[inline] + pub fn as_u32(self) -> u32 { + self.0 + } + + #[inline] + pub fn as_usize(self) -> usize { + self.0 as usize + } + + #[inline] + pub fn as_str(self) -> &'static str { + GLOBAL_SESSION.symbols.get(self) + } + + #[inline] + pub fn intern(string: &str) -> Symbol { + GLOBAL_SESSION.symbols.intern(string) + } +} + +/// An `Interner` is used to intern strings into [Symbol]s. +pub struct Interner(Mutex); + +pub struct InternerInner { + arena: Arena, + symbols: FxHashMap<&'static str, Symbol>, + strings: Vec<&'static str>, +} + +impl Interner { + #[inline] + pub fn new() -> Interner { + Interner(Mutex::new(InternerInner { + arena: Arena::new(), + symbols: FxHashMap::default(), + strings: Vec::new(), + })) + } + + pub fn intern(&self, string: &str) -> Symbol { + let mut inner = self.0.lock().unwrap(); + if let Some(&symbol) = inner.symbols.get(string) { + return symbol; + } + + let symbol = Symbol(inner.strings.len() as u32); + + // SAFETY: The string is guaranteed to be valid for the lifetime of the arena. + // The arena is never freed. + let string: &'static str = unsafe { &*(inner.arena.alloc_str(string) as *const str) }; + + inner.symbols.insert(string, symbol); + inner.strings.push(string); + + symbol + } + + pub fn get(&self, symbol: Symbol) -> &'static str { + let inner = self.0.lock().unwrap(); + inner.strings[symbol.as_usize()] + } +} + +impl Display for Symbol { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +impl Default for Interner { + fn default() -> Self { + Self::new() + } +} From 74cfea64bcf6f5f694850e20396d89ebc12b6319 Mon Sep 17 00:00:00 2001 From: Youssef Khalil <38569107+DarkyEG@users.noreply.github.com> Date: Tue, 11 Apr 2023 12:15:55 +0200 Subject: [PATCH 06/19] fix(lexer): bug in scan float Fix bug in lexer float scanning for multiple floats. --- compiler/lexer/src/lib.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/compiler/lexer/src/lib.rs b/compiler/lexer/src/lib.rs index de86908..3442e7b 100644 --- a/compiler/lexer/src/lib.rs +++ b/compiler/lexer/src/lib.rs @@ -197,13 +197,9 @@ impl<'a> Lexer<'a> { while let Some(ch) = self.cursor.clone().next() { match ch { '0'..='9' => buf.push(ch), - '.' => { - if scan_float { - buf.push(ch); - scanned_float = true; - } else { - break; - } + '.' if scan_float && !scanned_float => { + buf.push(ch); + scanned_float = true; } _ => break, } From 50ba66e13c8bbb2036a1d0bdeeb8c651da934fa4 Mon Sep 17 00:00:00 2001 From: Youssef Khalil <38569107+DarkyEG@users.noreply.github.com> Date: Tue, 11 Apr 2023 12:21:46 +0200 Subject: [PATCH 07/19] refactor(lexer): implement `peek_char` Add peek_char method to lexer for easy cloning and character advancement. --- compiler/lexer/src/lib.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/compiler/lexer/src/lib.rs b/compiler/lexer/src/lib.rs index 3442e7b..4c7d8d9 100644 --- a/compiler/lexer/src/lib.rs +++ b/compiler/lexer/src/lib.rs @@ -30,7 +30,12 @@ impl<'a> Cursor<'a> { #[inline] pub fn peek(&self, ch: char) -> bool { - self.input.clone().next() == Some(ch) + self.peek_char() == Some(ch) + } + + #[inline] + pub fn peek_char(&self) -> Option { + self.input.clone().next() } #[inline] @@ -154,7 +159,7 @@ impl<'a> Lexer<'a> { #[inline] pub fn skip_whitespace(&mut self) -> Token { - while let Some(ch) = self.cursor.clone().next() { + while let Some(ch) = self.cursor.peek_char() { if !ch.is_whitespace() { break; } @@ -172,7 +177,7 @@ impl<'a> Lexer<'a> { String::new() }; - while let Some(ch) = self.cursor.clone().next() { + while let Some(ch) = self.cursor.peek_char() { match ch { 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => buf.push(ch), _ => break, @@ -194,7 +199,7 @@ impl<'a> Lexer<'a> { }; let mut scanned_float = false; - while let Some(ch) = self.cursor.clone().next() { + while let Some(ch) = self.cursor.peek_char() { match ch { '0'..='9' => buf.push(ch), '.' if scan_float && !scanned_float => { From 42bb66f1d911dd1dea080203e87d2bc18577fe6b Mon Sep 17 00:00:00 2001 From: Youssef Khalil <38569107+DarkyEG@users.noreply.github.com> Date: Fri, 28 Apr 2023 20:48:55 +0300 Subject: [PATCH 08/19] chore: add parse and macros crates to workspace --- Cargo.toml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 4626465..3a03ac6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,2 +1,8 @@ [workspace] -members = ["compiler/lexer", "compiler/ast", "compiler/span"] +members = [ + "compiler/lexer", + "compiler/ast", + "compiler/span", + "compiler/parse", + "compiler/macros", +] From 4158ba7b063d8a7695689b78682a0e466ccb1d31 Mon Sep 17 00:00:00 2001 From: Youssef Khalil <38569107+DarkyEG@users.noreply.github.com> Date: Fri, 28 Apr 2023 20:50:21 +0300 Subject: [PATCH 09/19] feat: update struct declaration --- docs/grammar.txt | 29 ++++++++++++++++------------- docs/spec.md | 2 +- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/docs/grammar.txt b/docs/grammar.txt index de4cb35..d9f1360 100644 --- a/docs/grammar.txt +++ b/docs/grammar.txt @@ -1,28 +1,31 @@ program → item* item → fnDecl | structDecl | enumDecl | implDecl | useDecl | modDecl -structDecl → "struct" IDENTIFIER ("{" structField* "}" | "(" structField* ")" | ";") -structField → IDENTIFIER ":" type "," ","? +structDecl → "struct" IDENTIFIER ("{" structField* ","? "}" | "(" structField* ","? ")" | ";") +structField → IDENTIFIER ":" type "," enumDecl → "enum" IDENTIFIER "{" enumVariant* "}" enumVariant → IDENTIFIER ( "(" type ( "," type )_ ")" )? "," ","? -implDecl → "impl" IDENTIFIER "{" implMethod_ "}" +implDecl → "impl" IDENTIFIER "{" implMethod* "}" implMethod → fnDecl useDecl → "use" PATH ";" | "use" PATH "as" IDENTIFIER ";" modDecl → "mod" IDENTIFIER "{" item* "}" PATH → IDENTIFIER ( "::" IDENTIFIER )* letDecl → "let" IDENTIFIER ( ":" type )? ( "=" expression )? ";" constDecl → "const" IDENTIFIER ( ":" type )? "=" expression ";" -fnDecl → "fn" IDENTIFIER "(" parameters? ")" ( "->" type )? blockStmt -parameters → parameter ( "," parameter )_ +fnDecl → "fn" IDENTIFIER "(" parameters? ")" ( "->" type )? blockExpr +parameters → parameter ( "," parameter )* ","? parameter → PATH ":" type -ifStmt → "if" expression blockStmt ( "else" blockStmt )? -matchStmt → "match" expression "{" matchCase_ "}" -matchCase → expression "=>" blockStmt ";" -loopStmt → "loop" blockStmt -whileStmt → "while" expression blockStmt -forStmt → "for" IDENTIFIER "in" expression blockStmt -blockStmt → "{" statement* "}" +ifExpr → "if" expression blockExpr ("else" (blockExpr | ifExpr) )? +matchExpr → "match" expression "{" matchCase "}" +matchCase → pattern "=>" ((expression ";") | blockExpr) +loopExpr → "loop" blockExpr +whileExpr → "while" expression blockExpr +forExpr → "for" IDENTIFIER "in" expression blockExpr +blockExpr → "{" statement* "}" +structExpr → "#{" structFieldExpr* ","? "}" +structFieldExpr → IDENTIFIER ":" expression "," +statement → letDecl | item | exprStmt exprStmt → expression ";" -expression → literal | IDENTIFIER | unaryExpr | binaryExpr | groupExpr | callExpr | accessExpr +expression → literal | IDENTIFIER | unaryExpr | binaryExpr | groupExpr | callExpr | accessExpr | ifExpr | matchExpr | loopExpr | whileExpr | forExpr literal → INT | FLOAT | STRING | "true" | "false" unaryExpr → ( "-" | "!" ) expression binaryExpr → expression operator expression diff --git a/docs/spec.md b/docs/spec.md index 2ad6d4b..9158600 100644 --- a/docs/spec.md +++ b/docs/spec.md @@ -336,7 +336,7 @@ let person = Person("John", 20); ```rust -let person: Person = { +let person: Person = #{ name: "John", age: 20, }; From 726b0f42ec63ad31c4bbd520783066245bc087cf Mon Sep 17 00:00:00 2001 From: Youssef Khalil <38569107+DarkyEG@users.noreply.github.com> Date: Fri, 28 Apr 2023 20:51:40 +0300 Subject: [PATCH 10/19] feat: implement `AST` --- compiler/ast/src/ast.rs | 633 ++++++++++++++++++++++++++++++++++++++++ compiler/ast/src/lib.rs | 2 + 2 files changed, 635 insertions(+) create mode 100644 compiler/ast/src/ast.rs diff --git a/compiler/ast/src/ast.rs b/compiler/ast/src/ast.rs new file mode 100644 index 0000000..30e7c4a --- /dev/null +++ b/compiler/ast/src/ast.rs @@ -0,0 +1,633 @@ +use raccoon_span::{Ident, Span, Symbol}; +use thin_vec::{thin_vec, ThinVec}; + +use crate::Lit; + +/// Crate is the root of the AST. +pub struct Crate { + pub items: ThinVec, + pub span: Span, +} + +/// An item (e.g. `fn foo() {}`, `struct Bar;`, `extern { ... }`, etc.) +pub struct Item { + /// Visibility of the item (e.g. `pub`, `pub(crate)`, etc.) + pub vis: Visibility, + + /// Name of the item (e.g. `foo` for `fn foo() {}`) + pub ident: Ident, + + /// Kind of the item (e.g. `Fn`, `Struct`, etc.) + pub kind: ItemKind, + + /// span of the entire item + pub span: Span, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +/// A visibility qualifier (e.g. `pub`, `pub(crate)`, etc.) +pub enum Visibility { + /// `pub` + Public, + + /// `pub(crate)` + Crate, + + /// No visibility qualifier + Inherited, +} + +/// The kind of an item (e.g. `Fn`, `Struct`, `ExternCrate`, etc.) +pub enum ItemKind { + /// A module declaration. + Mod(Box), + + /// A use declaration. + Use(Box), + + /// A function declaration. + Fn(Box), + + /// A struct declaration. + Struct(Box), + + /// An enum declaration. + Enum(Box), +} + +/// A module declaration. +pub enum ModKind { + /// A module with a body. + Loaded(ThinVec, Inline, Span), + + /// A module with an external body. + Unloaded, +} + +/// A use declaration. +pub struct UseTree { + /// The path of the use tree. + pub base: Path, + + /// The use tree's kind. + pub kind: UseTreeKind, + + /// span of the entire use tree + pub span: Span, +} + +/// A use tree kind (e.g. `use foo::bar`, `use foo::bar as baz`, etc.) +pub enum UseTreeKind { + /// A use tree with a simple path (e.g. `use foo::bar` or `use foo::bar as baz`) + Single(Option), + + /// A use tree with a nested use tree (e.g. `use foo::{bar, baz}`) + Nested(ThinVec), + + /// A use tree with a glob (e.g. `use foo::*`) + Glob, +} + +pub enum Inline { + Yes, + No, +} + +/// A function declaration. +pub struct Fn { + /// The function's signature. + pub sig: FnSig, + + /// The function's body. + pub body: Block, +} + +/// A function signature. +pub struct FnSig { + /// The function's parameters. + pub params: ThinVec, + + /// The function's return type. + pub ret_ty: Option, + + /// span of the entire function signature + pub span: Span, +} + +/// A function parameter. +pub struct Param { + /// The parameter's name. + pub ident: Pat, + + /// The parameter's type. + pub ty: Ty, + + /// span of the entire parameter + pub span: Span, +} + +/// A type (e.g. `int`, `str`, etc.) +pub struct Ty { + /// The type's kind. + pub kind: TyKind, + + /// span of the entire type + pub span: Span, +} + +/// A type kind (e.g. `int`, `str[]`, etc.) +pub enum TyKind { + /// An Array type (e.g. `int[]`) + Array(Box), + + /// A Tuple type (e.g. `(int, str)`) + Tuple(ThinVec), + + /// We will use this type for + Paren(Box), + + /// Represents an inferred type. + Infer, + + /// Represents a method that has an implicit `self` parameter. + ImplicitSelf, + + /// A Path type (e.g. `std::vec::Vec`) + Path(Path), + + /// an unit type (e.g. `()`) + Unit, +} + +/// A block of statements. +pub struct Block { + /// The block's statements. + pub stmts: ThinVec, + + /// span of the entire block + pub span: Span, +} + +/// A statement (e.g. `let x = 1;`, `x = 2;`, etc.) +pub struct Stmt { + /// The statement's kind. + pub kind: StmtKind, + + /// span of the entire statement + pub span: Span, +} + +/// A statement kind (e.g. `let x = 1;`, `x = 2;`, etc.) +pub enum StmtKind { + /// A item statement (e.g. `fn foo() {}`). + Item(Box), + + /// A let statement (e.g. `let x = 1;`). + Let(Box), + + /// An expression statement (e.g. `x = 2;`). + Expr(Box), + + /// A semi-colon statement (e.g. `x;`). + Semi(Box), + + /// An empty statement (e.g. `;`). + Empty, +} + +/// A let statement (e.g. `let x = 1;`). +pub struct Let { + /// The let statement's pattern. + pub pat: Pat, + + /// The let statement's type. + pub ty: Option, + + /// The let statement's initializer. + pub init: Option, + + /// span of the entire let statement + pub span: Span, +} + +/// A pattern (e.g. `x`, `(x, y)`, `Foo { x, y }`, etc.) +pub struct Pat { + /// The pattern's kind. + pub kind: PatKind, + + /// span of the entire pattern + pub span: Span, +} + +/// A pattern kind (e.g. `x`, `(x, y)`, `Foo { x, y }`, etc.) +pub enum PatKind { + /// A variable pattern (e.g. `x`). + Ident(Ident), + + /// A tuple pattern (e.g. `(x, y)`). + Tuple(ThinVec), + + /// A path pattern (e.g. `Foo::Bar`). + Path(Path), + + /// A struct pattern (e.g. `Foo { x, y }`). + Struct(Box), + + /// A enum pattern (e.g. `Foo::Bar(x, y)`). + Enum(Box), + + /// A slice pattern (e.g. `[x, y]`). + Slice(ThinVec), +} + +/// A struct pattern (e.g. `Foo { x, y }`). +pub struct StructPat { + /// The struct pattern's path. + pub path: Path, + + /// The struct pattern's fields. + pub fields: ThinVec, +} +/// A struct pattern field (e.g. `x` in `Foo { x, y }`). +pub struct StructPatField { + /// The struct pattern field's name. + pub ident: Ident, + + /// The struct pattern field's pattern. + pub pat: Option, + + /// span of the entire struct pattern field + pub span: Span, +} + +/// An enum pattern (e.g. `Foo::Bar(x, y)`). +pub struct EnumPat { + /// The enum pattern's path. + pub path: Path, + + /// The enum pattern's fields. + pub fields: ThinVec, +} + +/// An expression (e.g. `1`, `x + 1`, etc.) +pub struct Expr { + /// The expression's kind. + pub kind: ExprKind, + + /// span of the entire expression + pub span: Span, +} + +/// An expression kind (e.g. `1`, `x + 1`, etc.) +pub enum ExprKind { + /// A literal expression (e.g. `1`). + Lit(Lit), + + /// A binary expression (e.g. `x + 1`). + Binary(Box), + + /// An assignment expression (e.g. `x = 1`). + Assign(Box), + + /// An assignment operation (e.g. `x += 1`). + AssignOp(Box), + + /// A unary expression (e.g. `!x`). + Unary(Box), + + /// A path expression (e.g. `foo::bar`). + Path(Path), + + /// A call expression (e.g. `foo(1)`). + Call(Box), + + /// an indexing expression (e.g. `foo[1]`). + Index(Box), + + /// A field access expression (e.g. `foo.bar` or `foo.0`). + Field(Box), + + /// A struct expression (e.g. `#{ x, y }`). + Struct(ThinVec), + + /// A tuple expression (e.g. `(x, y)`). + Tuple(ThinVec), + + /// A slice expression (e.g. `[x, y]`). + Array(ThinVec), + + /// A block expression (e.g. `{ let x = 1; x }`). + Block(Box), + + /// An if expression (e.g. `if x { 1 } else { 2 }`). + If(Box), + + /// A loop expression (e.g. `loop { x += 1; }`). + Loop(Box), + + /// A while expression (e.g. `while x { x += 1; }`). + While(Box), + + /// A for expression (e.g. `for x in y { x += 1; }`). + For(Box), + + /// A match expression (e.g. `match x { 1 => 2, _ => 3 }`). + Match(Box), + + /// A return expression (e.g. `return 1`). + Return(Option>), + + /// A break expression (e.g. `break`). + Break(Option>), + + /// A continue expression (e.g. `continue`). + Continue, + + /// A parenthesized expression (e.g. `(x * y)`). + Paren(Box), +} + +/// An assignment expression (e.g. `x = 1`). +pub struct Assign { + /// The assignment expression's left-hand side. + pub lhs: Expr, + + /// The assignment expression's right-hand side. + pub rhs: Expr, +} + +/// A binary expression (e.g. `x + 1`). +pub struct Binary { + /// The binary expression's left-hand side. + pub lhs: Expr, + + /// The binary expression's operator. + pub op: BinOp, + + /// The binary expression's right-hand side. + pub rhs: Expr, +} + +/// A binary operator (e.g. `+`). +pub struct BinOp { + /// The binary operator's kind. + pub kind: BinOpKind, + + /// span of the entire binary operator + pub span: Span, +} + +/// A binary operator kind (e.g. `+`). +pub enum BinOpKind { + /// A `+` operator. + Add, + + /// A `-` operator. + Sub, + + /// A `*` operator. + Mul, + + /// A `/` operator. + Div, + + /// A `%` operator. + Rem, + + /// A `&` operator. + BitAnd, + + /// A `|` operator. + BitOr, + + /// A `^` operator. + BitXor, + + /// A `<<` operator. + Shl, + + /// A `>>` operator. + Shr, + + /// A `&&` operator. + And, + + /// A `||` operator. + Or, + + /// A `==` operator. + Eq, + + /// A `!=` operator. + Ne, + + /// A `<` operator. + Lt, + + /// A `<=` operator. + Le, + + /// A `>` operator. + Gt, + + /// A `>=` operator. + Ge, +} + +/// A unary expression (e.g. `!x`). +pub struct Unary { + /// The unary expression's operator. + pub op: UnaryOp, + + /// The unary expression's operand. + pub expr: Expr, +} + +/// A unary operator (e.g. `!`). +pub struct UnaryOp { + /// The unary operator's kind. + pub kind: UnaryOpKind, + + /// span of the entire unary operator + pub span: Span, +} + +/// A unary operator kind (e.g. `!`). +pub enum UnaryOpKind { + /// A `!` operator. + Not, + + /// A `-` operator. + Neg, + + /// A `~` operator. + BitNot, +} + +/// A call expression (e.g. `foo(1)`). +pub struct Call { + /// The call expression's function. + pub callee: Expr, + + /// The call expression's arguments. + pub args: ThinVec, +} + +/// A field access expression (e.g. `foo.bar` or `foo.0`). +pub struct Field { + /// The field access expression's base. + pub base: Expr, + + /// The field access expression's field. + pub kind: FieldKind, +} + +pub enum FieldKind { + /// A named field (e.g. `foo.bar`). + Named(Ident), + + /// An unnamed field (e.g. `foo.0`). + Unnamed(Symbol), +} + +/// A struct expression field (e.g. `x`). +pub struct StructExprField { + /// The struct expression field's name. + pub name: Ident, + + /// The struct expression field's value. + pub value: Expr, + + /// The span of the entire struct expression field. + pub span: Span, +} + +/// An if expression (e.g. `if x { 1 } else { 2 }`). +pub struct If { + /// The if expression's condition. + pub cond: Expr, + + /// The if expression's then block. + pub then_branch: Block, + + /// The if expression's else block. + pub else_branch: Option, +} + +/// A while expression (e.g. `while x { x += 1; }`). +pub struct While { + /// The while expression's condition. + pub cond: Expr, + + /// The while expression's body. + pub body: Block, +} + +/// A for expression (e.g. `for x in y { x += 1; }`). +pub struct For { + /// The for expression's variable. + pub pat: Pat, + + /// The for expression's iterator. + pub iter: Expr, + + /// The for expression's body. + pub body: Block, +} + +/// A match expression (e.g. `match x { 1 => 2, _ => 3 }`). +pub struct Match { + /// The match expression's discriminant. + pub discriminant: Expr, + + /// The match expression's arms. + pub arms: ThinVec, +} + +/// A match expression arm (e.g. `1 => 2`). +pub struct MatchArm { + /// The match expression arm's pattern. + pub pattern: Pat, + + /// The match expression arm's body. + pub body: Expr, +} + +/// A index expression (e.g. `foo[1]`). +pub struct Index { + /// The index expression's base. + pub base: Expr, + + /// The index expression's index. + pub index: Expr, +} + +/// A path (e.g. `std::mem::replace`). +pub struct Path { + /// The path's segments. + pub segments: ThinVec, +} + +/// A path segment (e.g. `mem` in `std::mem::replace`). +pub struct PathSegment { + /// The path segment's identifier. + pub ident: Ident, + + /// The path segment's span. + pub span: Span, +} + +/// A struct declaration. +pub struct Struct { + /// The struct's fields. + pub fields: StructFields, +} + +pub enum StructFields { + /// A tuple struct (e.g. `struct Foo(u32, u32)`). + Tuple(ThinVec<(Visibility, Ty)>), + + /// A struct (e.g. `struct Foo { x: u32, y: u32 }`). + Struct(ThinVec<(Visibility, Ident, Ty)>), + + /// A unit struct (e.g. `struct Foo`). + Unit, +} + +/// A enum declaration. +pub struct Enum { + /// The enum's variants. + pub variants: Option>, +} + +/// A enum variant (e.g. `Foo` in `enum Foo { ... }`). +pub struct EnumVariant { + /// The enum variant's name. + pub ident: Ident, + + /// The enum variant's fields. + pub fields: EnumVariantFields, +} + +/// A enum variant fields (e.g. `Foo(u32, u32)`). +pub enum EnumVariantFields { + /// A tuple enum variant (e.g. `Foo(u32, u32)`). + Tuple(ThinVec<(Visibility, Ty)>), + + /// A struct enum variant (e.g. `Foo { x: u32, y: u32 }`). + Struct(ThinVec<(Visibility, Ident, Ty)>), + + /// A unit enum variant (e.g. `Foo`). + Unit, +} + +impl From for Path { + fn from(ident: Ident) -> Path { + Path { + segments: thin_vec![PathSegment { + ident, + span: ident.span, + }], + } + } +} diff --git a/compiler/ast/src/lib.rs b/compiler/ast/src/lib.rs index f868ad3..962ea88 100644 --- a/compiler/ast/src/lib.rs +++ b/compiler/ast/src/lib.rs @@ -1,6 +1,8 @@ //! The AST module contains the AST data structures and token definitions //! used by the compiler. +pub mod ast; pub mod token; +pub use ast::*; pub use token::*; From ec977264d7bf62a980b879fd976bf6f1324e9c3a Mon Sep 17 00:00:00 2001 From: Youssef Khalil <38569107+DarkyEG@users.noreply.github.com> Date: Fri, 28 Apr 2023 20:52:24 +0300 Subject: [PATCH 11/19] chore: use `thin-vec` to decrease enum size --- compiler/ast/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/compiler/ast/Cargo.toml b/compiler/ast/Cargo.toml index d0f1931..fe30ec0 100644 --- a/compiler/ast/Cargo.toml +++ b/compiler/ast/Cargo.toml @@ -4,4 +4,5 @@ version = "0.0.0" edition = "2021" [dependencies] +thin-vec = "0.2.12" raccoon_span = { path = "../span" } From 73db348ae4f35d63070e760892b037be6f85d0dc Mon Sep 17 00:00:00 2001 From: Youssef Khalil <38569107+DarkyEG@users.noreply.github.com> Date: Fri, 28 Apr 2023 20:53:17 +0300 Subject: [PATCH 12/19] feat: add `RArrow` and `Hash` variants --- compiler/ast/src/token.rs | 80 ++++++++++++++++++++++++++------------- 1 file changed, 54 insertions(+), 26 deletions(-) diff --git a/compiler/ast/src/token.rs b/compiler/ast/src/token.rs index dfd78d8..aa645a2 100644 --- a/compiler/ast/src/token.rs +++ b/compiler/ast/src/token.rs @@ -1,28 +1,18 @@ //! A token is a lexical unit of the source code. -use raccoon_span::{Span, Symbol, DUMMY_SP}; +use raccoon_span::{Ident, Span, Symbol, DUMMY_SP}; pub const DUMMY_TOKEN: Token = Token::new(TokenKind::Dummy, DUMMY_SP); /// A token is a lexical unit of the source code. -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct Token { pub kind: TokenKind, pub span: Span, } -impl Token { - pub const fn new(kind: TokenKind, span: Span) -> Token { - Token { kind, span } - } - - pub fn is_eof(&self) -> bool { - self.kind == TokenKind::Eof - } -} - /// A token kind. -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum TokenKind { /// A conditional operator. CondOp(CondOpToken), @@ -52,11 +42,17 @@ pub enum TokenKind { Colon, /// `::`. - ColonColon, + DoubleColon, /// `"`. Quote, + /// `->`. + RArrow, + + /// `#`. + Hash, + /// A literal. Lit(Lit), @@ -80,7 +76,7 @@ pub enum TokenKind { #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum Delimiter { /// `(...)`. - Parenthesis, + Paren, /// `[...]`. Bracket, @@ -90,7 +86,7 @@ pub enum Delimiter { } /// A literal value. -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct Lit { pub kind: LitKind, pub symbol: Symbol, @@ -111,6 +107,10 @@ impl Lit { pub fn new_float(symbol: Symbol) -> Lit { Lit::new(LitKind::Float, symbol) } + + pub fn new_bool(symbol: Symbol) -> Lit { + Lit::new(LitKind::Bool, symbol) + } } /// A literal kind. @@ -149,12 +149,6 @@ pub enum CondOpToken { /// `>=` Ge, - - /// `&&` - And, - - /// `||` - Or, } /// A binary operator. @@ -176,19 +170,25 @@ pub enum BinOpToken { Rem, /// `&` - And, + BitAnd, /// `|` - Or, + BitOr, /// `^` - Xor, + BitXor, /// `<<` Shl, /// `>>` Shr, + + /// `&&` + And, + + /// `||` + Or, } /// A unary operator. @@ -198,8 +198,36 @@ pub enum UnOpToken { Not, /// `~` - NotBitwise, + BitNot, /// `-` Neg, } + +impl Token { + pub const fn new(kind: TokenKind, span: Span) -> Token { + Token { kind, span } + } + + pub fn is_eof(&self) -> bool { + self.kind == TokenKind::Eof + } + + pub fn is_dummy(&self) -> bool { + self.kind == TokenKind::Dummy + } + + pub fn lit(&self) -> Option { + match self.kind { + TokenKind::Lit(lit) => Some(lit), + _ => None, + } + } + + pub fn ident(&self) -> Option { + match self.kind { + TokenKind::Ident(ident) => Some(Ident::new(ident, self.span)), + _ => None, + } + } +} From 544fc23c94c334f7f84983e5750e56fd3142b8b9 Mon Sep 17 00:00:00 2001 From: Youssef Khalil <38569107+DarkyEG@users.noreply.github.com> Date: Fri, 28 Apr 2023 20:53:34 +0300 Subject: [PATCH 13/19] chore: vscode settings --- .vscode/settings.json | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..9cf88bb --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,4 @@ +{ + "rust-analyzer.linkedProjects": ["./compiler/parse/Cargo.toml"], + "cSpell.words": ["prec"] +} From 17f63c1c71565914d4ff9681e8d3bcde74c79051 Mon Sep 17 00:00:00 2001 From: Youssef Khalil <38569107+DarkyEG@users.noreply.github.com> Date: Fri, 28 Apr 2023 20:54:33 +0300 Subject: [PATCH 14/19] refactor: use `proc-macro` to implement keywords --- compiler/span/src/symbol.rs | 120 ++++++++++++++++++++++++------------ 1 file changed, 79 insertions(+), 41 deletions(-) diff --git a/compiler/span/src/symbol.rs b/compiler/span/src/symbol.rs index 8729917..115bee6 100644 --- a/compiler/span/src/symbol.rs +++ b/compiler/span/src/symbol.rs @@ -2,47 +2,62 @@ #![allow(non_upper_case_globals)] -use std::{fmt::Display, sync::Mutex}; +use std::{fmt::Display, ops::Deref, sync::Mutex}; use fxhash::FxHashMap; +use raccoon_macros::symbols; use typed_arena::Arena; -use crate::GLOBAL_SESSION; - -macro_rules! keywords { - ($($name:ident: $string:expr),* $(,)?) => { - pub mod kw { - lazy_static::lazy_static! { - $( - pub static ref $name: $crate::Symbol = $crate::Symbol::intern($string); - )* - } - } - }; -} - -keywords! { - Let: "let", - Const: "const", - If: "if", - Else: "else", - While: "while", - For: "for", - In: "in", - Loop: "loop", - Break: "break", - Continue: "continue", - Return: "return", - Fn: "fn", - Struct: "struct", - Enum: "enum", - +use crate::{Span, DUMMY_SP, GLOBAL_SESSION}; + +symbols! { + { + // Special tokens + Empty: "", + Wildcard: "_", + + // Keywords + Let: "let", + Const: "const", + If: "if", + Else: "else", + While: "while", + For: "for", + In: "in", + Loop: "loop", + Break: "break", + Continue: "continue", + Return: "return", + Mod: "mod", + Use: "use", + Fn: "fn", + Struct: "struct", + Enum: "enum", + Pub: "pub", + True: "true", + False: "false", + As: "as", + Crate: "crate", + SelfLower: "self", + SelfUpper: "Self", + Super: "super", + } } /// A `Symbol` is an interned string that is used to represent identifiers and keywords. #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] pub struct Symbol(u32); +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +/// An identifier (e.g. `foo`). +pub struct Ident { + /// The name of the identifier. + pub name: Symbol, + + /// span of the identifier + pub span: Span, +} + impl Symbol { #[inline] pub fn as_u32(self) -> u32 { @@ -63,6 +78,36 @@ impl Symbol { pub fn intern(string: &str) -> Symbol { GLOBAL_SESSION.symbols.intern(string) } + + #[inline] + pub fn is_keyword(self) -> bool { + self <= kw::Pub && self >= kw::Let + } + + pub fn is_path_segment_keyword(self) -> bool { + self == kw::SelfLower || self == kw::Super || self == kw::SelfUpper || self == kw::Crate + } +} + +impl Ident { + pub fn new(name: Symbol, span: Span) -> Self { + Ident { name, span } + } + + pub fn empty() -> Self { + Ident { + name: kw::Empty, + span: DUMMY_SP, + } + } +} + +impl Deref for Ident { + type Target = Symbol; + + fn deref(&self) -> &Symbol { + &self.name + } } /// An `Interner` is used to intern strings into [Symbol]s. @@ -75,12 +120,11 @@ pub struct InternerInner { } impl Interner { - #[inline] - pub fn new() -> Interner { + pub fn prefill(strings: &[&'static str]) -> Self { Interner(Mutex::new(InternerInner { arena: Arena::new(), - symbols: FxHashMap::default(), - strings: Vec::new(), + strings: strings.to_owned(), + symbols: strings.iter().copied().zip((0..).map(Symbol)).collect(), })) } @@ -113,9 +157,3 @@ impl Display for Symbol { write!(f, "{}", self.as_str()) } } - -impl Default for Interner { - fn default() -> Self { - Self::new() - } -} From 19792897b1e449ba91dc895f3cdddeef1d0fee31 Mon Sep 17 00:00:00 2001 From: Youssef Khalil <38569107+DarkyEG@users.noreply.github.com> Date: Fri, 28 Apr 2023 20:55:40 +0300 Subject: [PATCH 15/19] feat: add `to` function --- compiler/span/src/lib.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/compiler/span/src/lib.rs b/compiler/span/src/lib.rs index e86eb65..0d13e20 100644 --- a/compiler/span/src/lib.rs +++ b/compiler/span/src/lib.rs @@ -36,12 +36,16 @@ impl Span { pub fn is_dummy(&self) -> bool { *self == DUMMY_SP } + + pub fn to(self, other: Span) -> Span { + Span::new(self.lo, other.hi) + } } impl GlobalSession { pub fn new() -> GlobalSession { GlobalSession { - symbols: Interner::new(), + symbols: Interner::fresh(), } } } @@ -55,6 +59,12 @@ lazy_static! { pub static ref GLOBAL_SESSION: GlobalSession = GlobalSession::new(); } +impl BytePos { + pub fn to(self, other: BytePos) -> Span { + Span::new(self, other) + } +} + impl Add for BytePos { type Output = BytePos; From 2fb5b702db5a680a0b1018fb2b760acc1525e44b Mon Sep 17 00:00:00 2001 From: Youssef Khalil <38569107+DarkyEG@users.noreply.github.com> Date: Fri, 28 Apr 2023 20:56:33 +0300 Subject: [PATCH 16/19] refactor: refactoring lexer --- compiler/lexer/src/lib.rs | 50 +++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 12 deletions(-) diff --git a/compiler/lexer/src/lib.rs b/compiler/lexer/src/lib.rs index 4c7d8d9..be04593 100644 --- a/compiler/lexer/src/lib.rs +++ b/compiler/lexer/src/lib.rs @@ -3,7 +3,7 @@ use std::str::Chars; use raccoon_ast::{BinOpToken, CondOpToken, Delimiter, Lit, Token, TokenKind, UnOpToken}; -use raccoon_span::{BytePos, Span, Symbol}; +use raccoon_span::{kw, BytePos, Span, Symbol}; use TokenKind::*; /// A `Cursor` is a wrapper around a [Chars] iterator that provides some additional methods @@ -54,6 +54,11 @@ impl<'a> Cursor<'a> { pub fn pos(&self) -> BytePos { BytePos((self.len - self.input.as_str().len()) as u32) } + + #[inline] + pub fn is_eof(&self) -> bool { + self.input.as_str().is_empty() + } } impl Iterator for Cursor<'_> { @@ -92,17 +97,21 @@ impl<'a> Lexer<'a> { let kind = match ch { '=' => if_cond!('=', CondOp(CondOpToken::Eq), Eq), '+' => if_cond!('=', BinOpEq(BinOpToken::Add), BinOp(BinOpToken::Add)), - '-' => if_cond!('=', BinOpEq(BinOpToken::Sub), BinOp(BinOpToken::Sub)), + '-' => if_cond!( + '=', + BinOpEq(BinOpToken::Sub), + if_cond!('>', RArrow, BinOp(BinOpToken::Sub)) + ), '*' => if_cond!('=', BinOpEq(BinOpToken::Mul), BinOp(BinOpToken::Mul)), - '^' => if_cond!('=', BinOpEq(BinOpToken::Xor), BinOp(BinOpToken::Xor)), + '^' => if_cond!('=', BinOpEq(BinOpToken::BitXor), BinOp(BinOpToken::BitXor)), '%' => if_cond!('=', BinOpEq(BinOpToken::Rem), BinOp(BinOpToken::Rem)), '!' => if_cond!('=', CondOp(CondOpToken::Ne), UnOp(UnOpToken::Not)), - ':' => if_cond!(':', ColonColon, Colon), + ':' => if_cond!(':', DoubleColon, Colon), '.' => Dot, ',' => Comma, ';' => Semi, - '(' => OpenDelim(Delimiter::Parenthesis), - ')' => CloseDelim(Delimiter::Parenthesis), + '(' => OpenDelim(Delimiter::Paren), + ')' => CloseDelim(Delimiter::Paren), '{' => OpenDelim(Delimiter::Brace), '}' => CloseDelim(Delimiter::Brace), '[' => OpenDelim(Delimiter::Bracket), @@ -119,13 +128,13 @@ impl<'a> Lexer<'a> { ), '&' => if_cond!( '&', - CondOp(CondOpToken::And), - if_cond!('=', BinOpEq(BinOpToken::And), BinOp(BinOpToken::And)) + if_cond!('=', BinOpEq(BinOpToken::And), BinOp(BinOpToken::And)), + if_cond!('=', BinOpEq(BinOpToken::BitAnd), BinOp(BinOpToken::BitAnd)) ), '|' => if_cond!( '|', - CondOp(CondOpToken::Or), - if_cond!('=', BinOpEq(BinOpToken::Or), BinOp(BinOpToken::Or)) + if_cond!('=', BinOpEq(BinOpToken::Or), BinOp(BinOpToken::Or)), + if_cond!('=', BinOpEq(BinOpToken::BitOr), BinOp(BinOpToken::BitOr)) ), '>' => if_cond!( '=', @@ -187,6 +196,13 @@ impl<'a> Lexer<'a> { let sym = Symbol::intern(&buf); + if matches!(sym, kw::True | kw::False) { + return Token::new( + TokenKind::Lit(Lit::new_bool(sym)), + Span::new(start, self.cursor.pos()), + ); + } + Token::new(TokenKind::Ident(sym), Span::new(start, self.cursor.pos())) } @@ -211,13 +227,13 @@ impl<'a> Lexer<'a> { self.cursor.bump(); } - let sym = if scanned_float { + let lit = if scanned_float { Lit::new_float(Symbol::intern(&buf)) } else { Lit::new_int(Symbol::intern(&buf)) }; - Token::new(TokenKind::Lit(sym), Span::new(start, self.cursor.pos())) + Token::new(TokenKind::Lit(lit), Span::new(start, self.cursor.pos())) } #[inline] @@ -269,4 +285,14 @@ impl<'a> Lexer<'a> { self.advance() } + + #[inline] + pub fn is_eof(&self) -> bool { + self.cursor.is_eof() + } + + #[inline] + pub fn pos(&self) -> BytePos { + self.cursor.pos() + } } From fe084c0a711a7da4f89289b594ee9e2ec22a7df9 Mon Sep 17 00:00:00 2001 From: Youssef Khalil <38569107+DarkyEG@users.noreply.github.com> Date: Fri, 28 Apr 2023 20:56:56 +0300 Subject: [PATCH 17/19] chore: deps --- compiler/macros/Cargo.toml | 12 ++++++++++++ compiler/parse/Cargo.toml | 12 ++++++++++++ compiler/span/Cargo.toml | 1 + 3 files changed, 25 insertions(+) create mode 100644 compiler/macros/Cargo.toml create mode 100644 compiler/parse/Cargo.toml diff --git a/compiler/macros/Cargo.toml b/compiler/macros/Cargo.toml new file mode 100644 index 0000000..7ee1bbf --- /dev/null +++ b/compiler/macros/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "raccoon_macros" +version = "0.0.0" +edition = "2021" + +[lib] +proc-macro = true + +[dependencies] +syn = { version = "2", features = ["full"] } +proc-macro2 = "1" +quote = "1" diff --git a/compiler/parse/Cargo.toml b/compiler/parse/Cargo.toml new file mode 100644 index 0000000..3428b9e --- /dev/null +++ b/compiler/parse/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "parse" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +thin-vec = "0.2.12" +raccoon_lexer = { path = "../lexer" } +raccoon_ast = { path = "../ast" } +raccoon_span = { path = "../span" } diff --git a/compiler/span/Cargo.toml b/compiler/span/Cargo.toml index 28beb59..1bf52fb 100644 --- a/compiler/span/Cargo.toml +++ b/compiler/span/Cargo.toml @@ -7,3 +7,4 @@ edition = "2021" fxhash = "0.2.1" lazy_static = "1.4.0" typed-arena = "2.0.2" +raccoon_macros = { path = "../macros" } From eab29b18a0cc7cc24a993d595d45176c77a75ecf Mon Sep 17 00:00:00 2001 From: Youssef Khalil <38569107+DarkyEG@users.noreply.github.com> Date: Fri, 28 Apr 2023 20:57:16 +0300 Subject: [PATCH 18/19] feat: implement symbols macro --- compiler/macros/src/lib.rs | 8 ++ compiler/macros/src/symbols.rs | 147 +++++++++++++++++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 compiler/macros/src/lib.rs create mode 100644 compiler/macros/src/symbols.rs diff --git a/compiler/macros/src/lib.rs b/compiler/macros/src/lib.rs new file mode 100644 index 0000000..c5c621f --- /dev/null +++ b/compiler/macros/src/lib.rs @@ -0,0 +1,8 @@ +use proc_macro::TokenStream; + +mod symbols; + +#[proc_macro] +pub fn symbols(input: TokenStream) -> TokenStream { + symbols::symbols(input.into()).into() +} diff --git a/compiler/macros/src/symbols.rs b/compiler/macros/src/symbols.rs new file mode 100644 index 0000000..4cd471b --- /dev/null +++ b/compiler/macros/src/symbols.rs @@ -0,0 +1,147 @@ +#![allow(clippy::explicit_counter_loop)] + +/// original module: https://github.com/rust-lang/rust/blob/master/compiler/rustc_macros/src/symbols.rs +use proc_macro2::{Span, TokenStream}; +use quote::quote; +use std::collections::HashMap; +use syn::parse::{Parse, ParseStream, Result}; +use syn::{braced, punctuated::Punctuated, Ident, LitStr, Token}; + +struct Keyword { + name: Ident, + value: LitStr, +} + +impl Parse for Keyword { + fn parse(input: ParseStream<'_>) -> Result { + let name = input.parse()?; + input.parse::()?; + let value = input.parse()?; + + Ok(Keyword { name, value }) + } +} + +pub struct Keywords(Punctuated); + +impl Parse for Keywords { + fn parse(input: ParseStream<'_>) -> Result { + let content; + braced!(content in input); + let keywords = content.parse_terminated(Keyword::parse, Token![,])?; + + Ok(Keywords(keywords)) + } +} + +#[derive(Default)] +struct Errors { + list: Vec, +} + +impl Errors { + fn error(&mut self, span: Span, message: String) { + self.list.push(syn::Error::new(span, message)); + } +} + +pub fn symbols(input: TokenStream) -> TokenStream { + let (mut output, errors) = symbols_with_errors(input); + + // If we generated any errors, then report them as compiler_error!() macro calls. + // This lets the errors point back to the most relevant span. It also allows us + // to report as many errors as we can during a single run. + output.extend(errors.into_iter().map(|e| e.to_compile_error())); + + output +} + +fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec) { + let mut errors = Errors::default(); + + let keywords = match syn::parse2::(input) { + Ok(keywords) => keywords.0, + Err(e) => { + // This allows us to display errors at the proper span, while minimizing + // unrelated errors caused by bailing out (and not generating code). + errors.list.push(e); + Default::default() + } + }; + + let mut keyword_stream = quote! {}; + let mut prefill_stream = quote! {}; + let mut counter = 0u32; + let mut keys = HashMap::::with_capacity(keywords.len() + 10); + // let mut prev_key: Option<(Span, String)> = None; + + let mut check_dup = |span: Span, str: &str, errors: &mut Errors| { + if let Some(prev_span) = keys.get(str) { + errors.error(span, format!("Symbol `{str}` is duplicated")); + errors.error(*prev_span, "location of previous definition".to_string()); + } else { + keys.insert(str.to_string(), span); + } + }; + + // let mut check_order = |span: Span, str: &str, errors: &mut Errors| { + // if let Some((prev_span, ref prev_str)) = prev_key { + // if str < prev_str { + // errors.error(span, format!("Symbol `{str}` must precede `{prev_str}`")); + // errors.error( + // prev_span, + // format!("location of previous symbol `{prev_str}`"), + // ); + // } + // } + // prev_key = Some((span, str.to_string())); + // }; + + // Generate the listed keywords. + for keyword in keywords.iter() { + let name = &keyword.name; + let value = &keyword.value; + let value_string = value.value(); + check_dup(keyword.name.span(), &value_string, &mut errors); + prefill_stream.extend(quote! { + #value, + }); + keyword_stream.extend(quote! { + pub const #name: Symbol = Symbol(#counter); + }); + counter += 1; + } + + // Generate symbols for the strings "0", "1", ..., "9". + // let digits_base = counter; + // counter += 10; + // for n in 0..10 { + // let n = n.to_string(); + // check_dup(Span::call_site(), &n, &mut errors); + // prefill_stream.extend(quote! { + // #n, + // }); + // } + + let output = quote! { + // const SYMBOL_DIGITS_BASE: u32 = #digits_base; + // const PREINTERNED_SYMBOLS_COUNT: u32 = #counter; + + #[doc(hidden)] + #[allow(non_upper_case_globals)] + pub mod kw { + use super::Symbol; + #keyword_stream + } + + impl Interner { + pub(crate) fn fresh() -> Self { + Interner::prefill(&[ + #prefill_stream + ]) + } + } + }; + + (output, errors.list) +} From 3dd249b2d35b5e5a10183eb8391248407bbac90d Mon Sep 17 00:00:00 2001 From: Youssef Khalil <38569107+DarkyEG@users.noreply.github.com> Date: Fri, 28 Apr 2023 20:57:36 +0300 Subject: [PATCH 19/19] feat: implement the parser --- compiler/parse/src/expr.rs | 358 +++++++++++++++++++++++++++++++++++++ compiler/parse/src/item.rs | 245 +++++++++++++++++++++++++ compiler/parse/src/lib.rs | 195 ++++++++++++++++++++ compiler/parse/src/pat.rs | 62 +++++++ compiler/parse/src/path.rs | 51 ++++++ compiler/parse/src/stmt.rs | 66 +++++++ compiler/parse/src/ty.rs | 44 +++++ 7 files changed, 1021 insertions(+) create mode 100644 compiler/parse/src/expr.rs create mode 100644 compiler/parse/src/item.rs create mode 100644 compiler/parse/src/lib.rs create mode 100644 compiler/parse/src/pat.rs create mode 100644 compiler/parse/src/path.rs create mode 100644 compiler/parse/src/stmt.rs create mode 100644 compiler/parse/src/ty.rs diff --git a/compiler/parse/src/expr.rs b/compiler/parse/src/expr.rs new file mode 100644 index 0000000..fdac7d4 --- /dev/null +++ b/compiler/parse/src/expr.rs @@ -0,0 +1,358 @@ +use crate::{path::PathStyle, ParseError, Parser}; +use raccoon_ast::{ + Assign, BinOp, BinOpKind, BinOpToken, Binary, Block, Call, CondOpToken, Delimiter, Expr, + ExprKind, Field, FieldKind, For, If, Index, StructExprField, Token, TokenKind, UnOpToken, + Unary, UnaryOp, UnaryOpKind, While, +}; +use raccoon_span::{kw, Span}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Precedence { + Any, + Assign, + Or, + And, + BitOr, + BitXor, + BitAnd, + Compare, + Shift, + Arithmetic, + Term, +} + +impl Precedence { + pub fn from_token(token: &Token) -> Option { + use TokenKind::{BinOp, BinOpEq, CondOp}; + Some(match token.kind { + BinOpEq(_) | TokenKind::Eq => Precedence::Assign, + BinOp(BinOpToken::Or) => Precedence::Or, + BinOp(BinOpToken::And) => Precedence::And, + BinOp(BinOpToken::BitOr) => Precedence::BitOr, + BinOp(BinOpToken::BitAnd) => Precedence::BitAnd, + BinOp(BinOpToken::BitXor) => Precedence::BitXor, + CondOp(CondOpToken::Eq) + | CondOp(CondOpToken::Ne) + | CondOp(CondOpToken::Lt) + | CondOp(CondOpToken::Le) + | CondOp(CondOpToken::Gt) + | CondOp(CondOpToken::Ge) => Precedence::Compare, + BinOp(BinOpToken::Shl) | BinOp(BinOpToken::Shr) => Precedence::Shift, + BinOp(BinOpToken::Add) | BinOp(BinOpToken::Sub) => Precedence::Arithmetic, + BinOp(BinOpToken::Mul) | BinOp(BinOpToken::Div) | BinOp(BinOpToken::Rem) => { + Precedence::Term + } + + _ => return None, + }) + } +} + +impl Parser<'_> { + pub fn parse_expr(&mut self) -> Result { + let lhs = self.parse_unary_expr()?; + self.parse_bin_rhs(Precedence::Any, lhs) + } + + fn parse_unary_expr(&mut self) -> Result { + let start = self.token.span; + + if matches!(self.token.kind, TokenKind::UnOp(_)) { + let op = self.parse_unary_op()?; + return Ok(Expr { + kind: ExprKind::Unary( + Unary { + op, + expr: self.parse_unary_expr()?, + } + .into(), + ), + span: start.to(self.prev_token.span), + }); + }; + + let mut expr = self.parse_primary_expr()?; + + loop { + if self.check_delim(Delimiter::Paren) { + let args = self.parse_parenthesized(|p| p.parse_expr())?; + expr = self.mk_expr( + ExprKind::Call(Call { callee: expr, args }.into()), + start.to(self.prev_token.span), + ); + } else if self.eat_delim(Delimiter::Brace) { + let index = self.parse_expr()?; + expr = self.mk_expr( + ExprKind::Index(Index { base: expr, index }.into()), + start.to(self.prev_token.span), + ); + self.eat(TokenKind::CloseDelim(Delimiter::Brace)); + } else if self.eat(TokenKind::Dot) { + let number = self.advance_int(); + + let kind = match number { + Some(number) => FieldKind::Unnamed(number), + None => FieldKind::Named(self.parse_ident(true)?), + }; + expr = self.mk_expr( + ExprKind::Field(Field { base: expr, kind }.into()), + start.to(self.prev_token.span), + ); + } else { + break; + } + } + + Ok(expr) + } + + fn parse_bin_rhs(&mut self, min_prec: Precedence, mut lhs: Expr) -> Result { + let start = self.token.span; + + while let Some(prec) = Precedence::from_token(&self.token) { + if prec < min_prec { + break; + } + + let token = self.token; + let op = self.parse_bin_op().unwrap(); + let mut rhs = self.parse_unary_expr()?; + + while let Some(next_prec) = Precedence::from_token(&self.token) { + if next_prec <= prec && prec != Precedence::Assign { + break; + } + + rhs = self.parse_bin_rhs(next_prec, rhs)?; + } + + match token.kind { + TokenKind::Eq => { + lhs = self.mk_expr( + ExprKind::Assign(Assign { lhs, rhs }.into()), + start.to(self.prev_token.span), + ); + continue; + } + + TokenKind::BinOpEq(..) => { + lhs = self.mk_expr( + ExprKind::AssignOp(Binary { lhs, op, rhs }.into()), + start.to(self.prev_token.span), + ); + continue; + } + + _ => (), + } + + lhs = self.mk_expr( + ExprKind::Binary(Binary { lhs, op, rhs }.into()), + start.to(self.prev_token.span), + ); + } + + Ok(lhs) + } + + fn parse_unary_op(&mut self) -> Result { + let start = self.token.span; + use TokenKind::UnOp; + let op = match self.token.kind { + UnOp(UnOpToken::Not) => UnaryOpKind::Not, + UnOp(UnOpToken::Neg) => UnaryOpKind::Neg, + UnOp(UnOpToken::BitNot) => UnaryOpKind::BitNot, + _ => return Err(ParseError), + }; + + self.advance(); + + Ok(UnaryOp { + kind: op, + span: start.to(self.prev_token.span), + }) + } + + fn parse_bin_op(&mut self) -> Option { + use TokenKind::{BinOp, BinOpEq, CondOp}; + let op = match self.token.kind { + BinOp(BinOpToken::BitOr) | BinOpEq(BinOpToken::BitOr) => BinOpKind::BitOr, + BinOp(BinOpToken::Shl) | BinOpEq(BinOpToken::Shl) => BinOpKind::Shl, + BinOp(BinOpToken::Shr) | BinOpEq(BinOpToken::Shr) => BinOpKind::Shr, + BinOp(BinOpToken::Add) | BinOpEq(BinOpToken::Add) => BinOpKind::Add, + BinOp(BinOpToken::Sub) | BinOpEq(BinOpToken::Sub) => BinOpKind::Sub, + BinOp(BinOpToken::Mul) | BinOpEq(BinOpToken::Mul) => BinOpKind::Mul, + BinOp(BinOpToken::Div) | BinOpEq(BinOpToken::Div) => BinOpKind::Div, + BinOp(BinOpToken::Rem) | BinOpEq(BinOpToken::Rem) => BinOpKind::Rem, + BinOp(BinOpToken::BitAnd) | BinOpEq(BinOpToken::BitAnd) => BinOpKind::BitAnd, + BinOp(BinOpToken::BitXor) | BinOpEq(BinOpToken::BitXor) => BinOpKind::BitXor, + + // we just add `==` to skip None + CondOp(CondOpToken::Eq) | TokenKind::Eq => BinOpKind::Eq, + CondOp(CondOpToken::Ne) => BinOpKind::Ne, + CondOp(CondOpToken::Lt) => BinOpKind::Lt, + CondOp(CondOpToken::Le) => BinOpKind::Le, + CondOp(CondOpToken::Gt) => BinOpKind::Gt, + CondOp(CondOpToken::Ge) => BinOpKind::Ge, + + _ => return None, + }; + + Some(raccoon_ast::BinOp { + kind: op, + span: self.token.span, + }) + } + + fn parse_primary_expr(&mut self) -> Result { + let start = self.token.span; + + let expr = match self.token.kind { + TokenKind::Lit(lit) => { + self.advance(); + ExprKind::Lit(lit) + } + TokenKind::Ident(ident) => match ident { + kw::If => self.parse_if_expr()?, + kw::While => self.parse_while_expr()?, + kw::For => self.parse_for_expr()?, + kw::Loop => self.parse_loop_expr()?, + kw::Return => self.parse_return_expr()?, + kw::Break => self.parse_break_expr()?, + kw::Continue => { + self.advance(); + ExprKind::Continue + } + _ => ExprKind::Path(self.parse_path(PathStyle::Expr)?), + }, + TokenKind::OpenDelim(Delimiter::Paren) => { + let mut exprs = self.parse_parenthesized(|p| p.parse_expr())?; + + if exprs.len() == 1 { + ExprKind::Paren(exprs.remove(0).into()) + } else { + ExprKind::Tuple(exprs) + } + } + TokenKind::OpenDelim(Delimiter::Brace) => { + let stmts = self.parse_braced(|p| p.parse_stmt())?; + ExprKind::Block( + Block { + stmts, + span: start.to(self.prev_token.span), + } + .into(), + ) + } + + TokenKind::OpenDelim(Delimiter::Bracket) => { + ExprKind::Array(self.parse_bracketed(|p| p.parse_expr())?) + } + + TokenKind::Hash => { + let fields = self.parse_braced(|p| p.parse_struct_expr_field())?; + + ExprKind::Struct(fields) + } + _ => return Err(ParseError), + }; + + Ok(self.mk_expr(expr, start)) + } + + fn parse_struct_expr_field(&mut self) -> Result { + let start = self.token.span; + let name = self.parse_ident(true)?; + + let value = if self.eat(TokenKind::Colon) { + self.parse_expr()? + } else { + self.mk_expr(ExprKind::Path(name.into()), start) + }; + + Ok(StructExprField { + name, + value, + span: start.to(self.prev_token.span), + }) + } + + fn parse_if_expr(&mut self) -> Result { + self.expect_keyword(kw::If)?; + let cond = self.parse_expr()?; + let then_branch = self.parse_block()?; + let else_branch = if self.eat_keyword(kw::Else) { + if self.eat_keyword(kw::If) { + let start = self.token.span; + let expr = self.parse_if_expr()?; + Some(self.mk_expr(expr, start.to(self.prev_token.span))) + } else { + let start = self.token.span; + let block = self.parse_block()?.into(); + Some(self.mk_expr(ExprKind::Block(block), start.to(self.prev_token.span))) + } + } else { + None + }; + + Ok(ExprKind::If( + If { + cond, + then_branch, + else_branch, + } + .into(), + )) + } + + fn parse_while_expr(&mut self) -> Result { + Ok(ExprKind::While( + While { + cond: self.parse_expr()?, + body: self.parse_block()?, + } + .into(), + )) + } + + fn parse_for_expr(&mut self) -> Result { + self.expect_keyword(kw::For)?; + let pat = self.parse_pat()?; + self.expect_keyword(kw::In)?; + let iter = self.parse_expr()?; + let body = self.parse_block()?; + + Ok(ExprKind::For(For { pat, iter, body }.into())) + } + + fn parse_loop_expr(&mut self) -> Result { + self.expect_keyword(kw::Loop)?; + Ok(ExprKind::Loop(self.parse_block()?.into())) + } + + fn parse_return_expr(&mut self) -> Result { + self.expect_keyword(kw::Return)?; + let expr = if !self.eat(TokenKind::Semi) { + Some(self.parse_expr()?.into()) + } else { + None + }; + + Ok(ExprKind::Return(expr)) + } + + fn parse_break_expr(&mut self) -> Result { + self.expect_keyword(kw::Break)?; + let expr = if !self.eat(TokenKind::Semi) { + Some(self.parse_expr()?.into()) + } else { + None + }; + + Ok(ExprKind::Break(expr)) + } + #[inline] + fn mk_expr(&self, kind: ExprKind, span: Span) -> Expr { + Expr { kind, span } + } +} diff --git a/compiler/parse/src/item.rs b/compiler/parse/src/item.rs new file mode 100644 index 0000000..1663139 --- /dev/null +++ b/compiler/parse/src/item.rs @@ -0,0 +1,245 @@ +use crate::{path::PathStyle, ParseError, Parser}; +use raccoon_ast::{ + BinOpToken, Block, Crate, Delimiter, Enum, EnumVariant, EnumVariantFields, Fn, FnSig, Inline, + Item, ItemKind, ModKind, Param, Struct, StructFields, TokenKind, Ty, UseTree, UseTreeKind, + Visibility, +}; +use raccoon_span::{kw, Ident, Span}; +use thin_vec::{thin_vec, ThinVec}; + +impl Parser<'_> { + pub fn parse_crate(&mut self) -> Result { + let (items, span) = self.parse_mod()?; + + Ok(Crate { items, span }) + } + + pub fn parse_item(&mut self) -> Result, ParseError> { + let vis = self.parse_visibility()?; + let start = self.token.span; + let item_type = self.parse_ident(false)?; + + let (kind, ident) = match item_type.name { + kw::Fn => self.parse_fn_item()?, + kw::Struct => self.parse_struct_item()?, + kw::Enum => self.parse_enum_item()?, + kw::Mod => self.parse_mod_item()?, + kw::Use => self.parse_use_item()?, + + _ => { + if vis == Visibility::Inherited { + return Ok(None); + } else { + return Err(ParseError); + } + } + }; + + Ok(Some(Item { + vis, + kind, + ident, + span: start.to(self.prev_token.span), + })) + } + + fn parse_visibility(&mut self) -> Result { + if !self.eat_keyword(kw::Pub) { + return Ok(Visibility::Inherited); + } + + // TODO: Parse `pub(crate)` + Ok(Visibility::Public) + } +} + +type ItemInfo = (ItemKind, Ident); + +// parse functions and methods +impl Parser<'_> { + pub fn parse_fn_item(&mut self) -> Result { + let start = self.token.span; + if !self.eat_keyword(kw::Fn) { + return Err(ParseError); + } + let ident = self.parse_ident(true)?; + + let sig = { + let params = self.parse_parenthesized(|p| p.parse_param())?; + + let ret_ty = if self.eat(TokenKind::RArrow) { + Some(self.parse_ty()?) + } else { + None + }; + + FnSig { + params, + ret_ty, + span: start.to(self.prev_token.span), + } + }; + + Ok(( + ItemKind::Fn( + Fn { + sig, + body: self.parse_body()?, + } + .into(), + ), + ident, + )) + } + + fn parse_param(&mut self) -> Result { + let start = self.token.span; + let ident = self.parse_pat()?; + self.expect(TokenKind::Colon)?; + let ty = self.parse_ty()?; + Ok(Param { + ident, + ty, + span: start.to(self.prev_token.span), + }) + } + + fn parse_body(&mut self) -> Result { + self.parse_block() + } +} + +/// parse structs and enums; +impl Parser<'_> { + pub fn parse_struct_item(&mut self) -> Result { + if !self.eat_keyword(kw::Struct) { + return Err(ParseError); + } + + let ident = self.parse_ident(true)?; + + let fields = if self.check_delim(Delimiter::Brace) { + StructFields::Struct(self.parse_braced(|p| p.parse_struct_field())?) + } else if self.check_delim(Delimiter::Paren) { + let fields = StructFields::Tuple(self.parse_parenthesized(|p| p.parse_tuple_field())?); + self.expect_semi()?; + fields + } else { + self.expect_semi()?; + StructFields::Unit + }; + + Ok((ItemKind::Struct(Struct { fields }.into()), ident)) + } + fn parse_struct_field(&mut self) -> Result<(Visibility, Ident, Ty), ParseError> { + let vis = self.parse_visibility()?; + let ident = self.parse_ident(true)?; + self.expect(TokenKind::Colon)?; + let ty = self.parse_ty()?; + Ok((vis, ident, ty)) + } + + fn parse_tuple_field(&mut self) -> Result<(Visibility, Ty), ParseError> { + let vis = self.parse_visibility()?; + let ty = self.parse_ty()?; + Ok((vis, ty)) + } + + pub fn parse_enum_item(&mut self) -> Result { + if !self.eat_keyword(kw::Enum) { + return Err(ParseError); + } + + let ident = self.parse_ident(true)?; + + let variants = if self.check_delim(Delimiter::Brace) { + Some(self.parse_braced(|p| p.parse_variant())?) + } else { + self.expect_semi()?; + None + }; + + Ok((ItemKind::Enum(Enum { variants }.into()), ident)) + } + + fn parse_variant(&mut self) -> Result { + let ident = self.parse_ident(true)?; + + let fields = if self.check_delim(Delimiter::Brace) { + EnumVariantFields::Struct(self.parse_braced(|p| p.parse_struct_field())?) + } else if self.check_delim(Delimiter::Paren) { + EnumVariantFields::Tuple(self.parse_parenthesized(|p| p.parse_tuple_field())?) + } else { + EnumVariantFields::Unit + }; + + Ok(EnumVariant { ident, fields }) + } +} + +/// parse modules and use statements +impl Parser<'_> { + pub fn parse_mod_item(&mut self) -> Result { + if !self.eat_keyword(kw::Mod) { + return Err(ParseError); + } + + let ident = self.parse_ident(true)?; + + let kind = if self.eat(TokenKind::Semi) { + ModKind::Unloaded + } else { + let (items, span) = self.parse_mod()?; + ModKind::Loaded(items, Inline::Yes, span) + }; + + Ok((ItemKind::Mod(kind.into()), ident)) + } + + pub fn parse_mod(&mut self) -> Result<(ThinVec, Span), ParseError> { + let start = self.token.span; + let mut items = thin_vec![]; + while !self.lexer.is_eof() { + let Some(item) = self.parse_item()? else { + continue; + }; + items.push(item); + } + Ok((items, start.to(self.prev_token.span))) + } + + pub fn parse_use_item(&mut self) -> Result { + if !self.eat_keyword(kw::Use) { + return Err(ParseError); + } + + let tree = self.parse_use_tree()?; + + Ok((ItemKind::Use(tree.into()), Ident::empty())) + } + + fn parse_use_tree(&mut self) -> Result { + let start = self.token.span; + let base = self.parse_path(PathStyle::Mod)?; + + let kind = if self.eat(TokenKind::DoubleColon) { + if self.eat(TokenKind::BinOp(BinOpToken::Mul)) { + UseTreeKind::Glob + } else { + UseTreeKind::Nested(self.parse_braced(|p| p.parse_use_tree())?) + } + } else { + UseTreeKind::Single(if self.eat_keyword(kw::As) { + Some(self.parse_ident(true)?) + } else { + None + }) + }; + + Ok(UseTree { + base, + kind, + span: start.to(self.prev_token.span), + }) + } +} diff --git a/compiler/parse/src/lib.rs b/compiler/parse/src/lib.rs new file mode 100644 index 0000000..f6e3b05 --- /dev/null +++ b/compiler/parse/src/lib.rs @@ -0,0 +1,195 @@ +mod expr; +mod item; +mod pat; +mod path; +mod stmt; +mod ty; + +use raccoon_ast::{Delimiter, Token, TokenKind, DUMMY_TOKEN}; +use raccoon_lexer::Lexer; +use raccoon_span::{Ident, Symbol}; + +use std::mem::replace; +use thin_vec::ThinVec; + +// TODO: Replace this with diagnostic reporting +pub struct ParseError; + +pub struct Parser<'a> { + lexer: Lexer<'a>, + token: Token, + prev_token: Token, +} + +impl<'a> Parser<'a> { + pub fn new(input: &'a str) -> Self { + let mut parser = Self { + lexer: Lexer::new(input), + token: DUMMY_TOKEN, + prev_token: DUMMY_TOKEN, + }; + + parser.advance(); + + parser + } + + pub fn advance(&mut self) -> Token { + let token = self.lexer.advance(); + self.prev_token = replace(&mut self.token, token); + token + } + + pub fn advance_int(&mut self) -> Option { + let token = self.lexer.scan_number(None, false); + let number = token.lit().unwrap().symbol; + + if number.as_str().is_empty() { + None + } else { + self.prev_token = replace(&mut self.token, token); + Some(number) + } + } + + pub fn eat(&mut self, kind: TokenKind) -> bool { + if self.token.kind == kind { + self.advance(); + true + } else { + false + } + } + + pub fn eat_keyword(&mut self, keyword: Symbol) -> bool { + self.eat(TokenKind::Ident(keyword)) + } + + pub fn eat_delim(&mut self, delim: Delimiter) -> bool { + self.eat(TokenKind::OpenDelim(delim)) + } + + pub fn check(&self, kind: TokenKind) -> bool { + self.token.kind == kind + } + + pub fn check_keyword(&self, keyword: Symbol) -> bool { + self.check(TokenKind::Ident(keyword)) + } + + pub fn check_delim(&self, delim: Delimiter) -> bool { + self.check(TokenKind::OpenDelim(delim)) + } + + pub fn expect(&mut self, kind: TokenKind) -> Result<(), ParseError> { + if self.token.kind == kind { + self.advance(); + Ok(()) + } else { + Err(ParseError) + } + } + + pub fn expect_keyword(&mut self, keyword: Symbol) -> Result<(), ParseError> { + self.expect(TokenKind::Ident(keyword)) + } + + pub fn expect_semi(&mut self) -> Result<(), ParseError> { + self.expect(TokenKind::Semi) + } + + /// Parses a delimited list of items. + #[inline] + pub fn parse_terminals( + &mut self, + start: TokenKind, + sep: TokenKind, + end: TokenKind, + f: impl Fn(&mut Self) -> Result, + ) -> Result, ParseError> { + self.expect(start)?; + + let mut items = ThinVec::new(); + + loop { + if self.eat(end) { + break; + } + + let item = f(self)?; + + items.push(item); + + if self.eat(end) { + break; + } + + self.expect(sep)?; + } + + Ok(items) + } + + /// Parses a comma-separated list of items. + #[inline] + pub fn parse_comma_separated( + &mut self, + start: TokenKind, + end: TokenKind, + f: impl Fn(&mut Self) -> Result, + ) -> Result, ParseError> { + self.parse_terminals(start, TokenKind::Comma, end, f) + } + + /// Parses a comma-separated list of items, enclosed in parentheses. + #[inline] + pub fn parse_parenthesized( + &mut self, + f: impl Fn(&mut Self) -> Result, + ) -> Result, ParseError> { + self.parse_comma_separated( + TokenKind::OpenDelim(Delimiter::Paren), + TokenKind::CloseDelim(Delimiter::Paren), + f, + ) + } + + /// Parses a comma-separated list of items, enclosed in braces. + #[inline] + pub fn parse_braced( + &mut self, + f: impl Fn(&mut Self) -> Result, + ) -> Result, ParseError> { + self.parse_comma_separated( + TokenKind::OpenDelim(Delimiter::Brace), + TokenKind::CloseDelim(Delimiter::Brace), + f, + ) + } + + /// Parses a comma-separated list of items, enclosed in brackets. + #[inline] + pub fn parse_bracketed( + &mut self, + f: impl Fn(&mut Self) -> Result, + ) -> Result, ParseError> { + self.parse_comma_separated( + TokenKind::OpenDelim(Delimiter::Bracket), + TokenKind::CloseDelim(Delimiter::Bracket), + f, + ) + } + + pub fn parse_ident(&mut self, eat: bool) -> Result { + match self.token.ident() { + Some(ident) if !ident.is_keyword() => { + if eat { + self.advance(); + } + + Ok(ident) + } + _ => Err(ParseError), + } + } +} diff --git a/compiler/parse/src/pat.rs b/compiler/parse/src/pat.rs new file mode 100644 index 0000000..c4b8954 --- /dev/null +++ b/compiler/parse/src/pat.rs @@ -0,0 +1,62 @@ +use raccoon_ast::{Delimiter, EnumPat, Pat, PatKind, StructPat, StructPatField, TokenKind}; + +use crate::{path::PathStyle, ParseError, Parser}; + +impl<'a> Parser<'a> { + pub fn parse_pat(&mut self) -> Result { + let start = self.token.span; + let pat = self.parse_pat_kind()?; + Ok(Pat { + kind: pat, + span: start.to(self.prev_token.span), + }) + } + + fn parse_pat_kind(&mut self) -> Result { + let pat = match self.token.kind { + TokenKind::Ident(_) => { + let path = self.parse_path(PathStyle::Expr)?; + + match self.token.kind { + TokenKind::OpenDelim(Delimiter::Brace) => { + let fields = self.parse_braced(|p| p.parse_pat_field())?; + PatKind::Struct(StructPat { path, fields }.into()) + } + TokenKind::OpenDelim(Delimiter::Paren) => { + let fields = self.parse_parenthesized(|p| p.parse_pat())?; + PatKind::Enum(EnumPat { path, fields }.into()) + } + + _ => PatKind::Path(path), + } + } + TokenKind::OpenDelim(Delimiter::Paren) => { + PatKind::Tuple(self.parse_parenthesized(|p| p.parse_pat())?) + } + + TokenKind::OpenDelim(Delimiter::Bracket) => { + PatKind::Slice(self.parse_bracketed(|p| p.parse_pat())?) + } + + _ => return Err(ParseError), + }; + + Ok(pat) + } + + fn parse_pat_field(&mut self) -> Result { + let start = self.token.span; + let ident = self.parse_ident(true)?; + let pat = if self.eat(TokenKind::Colon) { + Some(self.parse_pat()?) + } else { + None + }; + + Ok(StructPatField { + ident, + pat, + span: start.to(self.prev_token.span), + }) + } +} diff --git a/compiler/parse/src/path.rs b/compiler/parse/src/path.rs new file mode 100644 index 0000000..c87f98a --- /dev/null +++ b/compiler/parse/src/path.rs @@ -0,0 +1,51 @@ +use raccoon_ast::{BinOpToken, Delimiter, Path, PathSegment, TokenKind}; +use thin_vec::thin_vec; + +use crate::{ParseError, Parser}; + +/// The style of path being parsed. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PathStyle { + /// A path in expression position, e.g. `foo::bar::` + Expr, + + /// A path in type position, e.g. `foo::bar` + Type, + + /// A path in module declaration position, e.g. `foo::bar::Baz` + Mod, +} + +impl<'a> Parser<'a> { + pub fn parse_path(&mut self, style: PathStyle) -> Result { + let mut segments = thin_vec![PathSegment { + ident: self.parse_ident(true)?, + span: self.prev_token.span, + }]; + + while self.check(TokenKind::DoubleColon) { + if style == PathStyle::Mod + && matches!( + self.token.kind, + TokenKind::OpenDelim(Delimiter::Brace) | TokenKind::BinOp(BinOpToken::Mul) + ) + { + break; + } + + self.advance(); + + let ident = match self.token.ident() { + Some(ident) if ident.is_path_segment_keyword() => ident, + _ => self.parse_ident(true)?, + }; + + segments.push(PathSegment { + ident, + span: self.prev_token.span, + }); + } + + Ok(Path { segments }) + } +} diff --git a/compiler/parse/src/stmt.rs b/compiler/parse/src/stmt.rs new file mode 100644 index 0000000..fdc7c64 --- /dev/null +++ b/compiler/parse/src/stmt.rs @@ -0,0 +1,66 @@ +use raccoon_ast::{Block, Let, Stmt, StmtKind, TokenKind}; +use raccoon_span::kw; + +use crate::{ParseError, Parser}; + +impl<'a> Parser<'a> { + pub fn parse_stmt(&mut self) -> Result { + let start = self.token.span; + let stmt = self.parse_stmt_kind()?; + Ok(Stmt { + kind: stmt, + span: start.to(self.prev_token.span), + }) + } + + fn parse_stmt_kind(&mut self) -> Result { + if self.check_keyword(kw::Let) { + return Ok(StmtKind::Let(self.parse_let_stmt()?.into())); + }; + + if let Some(item) = self.parse_item()? { + return Ok(StmtKind::Item(item.into())); + } + if self.eat(TokenKind::Semi) { + return Ok(StmtKind::Empty); + } + + Ok(StmtKind::Expr(self.parse_expr()?.into())) + } + + pub fn parse_let_stmt(&mut self) -> Result { + let start = self.token.span; + self.expect_keyword(kw::Let)?; + let pat = self.parse_pat()?; + + let ty = if self.eat(TokenKind::Colon) { + Some(self.parse_ty()?) + } else { + None + }; + + let init = if self.eat(TokenKind::Eq) { + Some(self.parse_expr()?) + } else { + None + }; + + self.expect(TokenKind::Semi)?; + + Ok(Let { + pat, + ty, + init, + span: start.to(self.prev_token.span), + }) + } + + pub fn parse_block(&mut self) -> Result { + let start = self.token.span; + let stmts = self.parse_braced(|p| p.parse_stmt())?; + Ok(Block { + stmts, + span: start.to(self.prev_token.span), + }) + } +} diff --git a/compiler/parse/src/ty.rs b/compiler/parse/src/ty.rs new file mode 100644 index 0000000..cc3fa42 --- /dev/null +++ b/compiler/parse/src/ty.rs @@ -0,0 +1,44 @@ +use crate::{path::PathStyle, ParseError, Parser}; +use raccoon_ast::{Delimiter, TokenKind, Ty, TyKind}; +use raccoon_span::kw; + +impl<'a> Parser<'a> { + pub fn parse_ty(&mut self) -> Result { + let start = self.token.span; + let kind = self.parse_ty_kind()?; + let ty = Ty { + kind, + span: start.to(self.prev_token.span), + }; + + if self.eat_delim(Delimiter::Bracket) { + self.expect(TokenKind::CloseDelim(Delimiter::Bracket))?; + Ok(Ty { + kind: TyKind::Array(ty.into()), + span: start.to(self.prev_token.span), + }) + } else { + Ok(ty) + } + } + + fn parse_ty_kind(&mut self) -> Result { + let kind = if self.check_delim(Delimiter::Paren) { + let mut types = self.parse_parenthesized(|p| p.parse_ty())?; + + if types.len() == 1 { + TyKind::Paren(types.remove(0).into()) + } else if types.is_empty() { + TyKind::Unit + } else { + TyKind::Tuple(types) + } + } else if self.check_keyword(kw::Wildcard) { + TyKind::Infer + } else { + TyKind::Path(self.parse_path(PathStyle::Type)?) + }; + + Ok(kind) + } +}