Skip to content

Commit

Permalink
feat(lexer): improve lexer structure
Browse files Browse the repository at this point in the history
  • Loading branch information
H1ghBre4k3r committed Sep 20, 2023
1 parent 322fcfc commit b4f3562
Show file tree
Hide file tree
Showing 4 changed files with 247 additions and 229 deletions.
28 changes: 28 additions & 0 deletions src/lexer/lexmap.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
use std::collections::HashMap;

use super::Terminal;

/// Struct for storing terminal symbols with their respective "key".
#[derive(Debug, Clone, Default)]
pub struct LexMap {
map: HashMap<&'static str, Terminal>,
}

impl LexMap {
pub fn insert(&mut self, key: &'static str, value: Terminal) {
self.map.insert(key, value);
}

pub fn can_match(&self, key: &str) -> bool {
for map_key in self.map.keys() {
if map_key.starts_with(key) {
return true;
}
}
false
}

pub fn get(&self, key: &str) -> Option<Terminal> {
self.map.get(key).cloned()
}
}
255 changes: 26 additions & 229 deletions src/lexer.rs → src/lexer/mod.rs
Original file line number Diff line number Diff line change
@@ -1,229 +1,13 @@
use lazy_static::lazy_static;
use pesca_parser_derive::Token as ParseToken;
use std::{collections::HashMap, error::Error, fmt::Display, iter::Peekable, str::Chars};

type Position = (usize, usize);

#[derive(Debug, Clone, ParseToken)]
pub enum Token {
#[terminal]
Eq {
position: Position,
},
#[terminal]
Let {
position: Position,
},
Id {
value: String,
position: Position,
},
Num {
value: u64,
position: Position,
},
#[terminal]
Semicolon {
position: Position,
},
// TODO: think about lexing comments
Comment {
value: String,
position: Position,
},
#[terminal]
Plus {
position: Position,
},
#[terminal]
Times {
position: Position,
},
#[terminal]
LParen {
position: Position,
},
#[terminal]
RParen {
position: Position,
},
#[terminal]
LBrace {
position: Position,
},
#[terminal]
RBrace {
position: Position,
},
#[terminal]
FnKeyword {
position: Position,
},
#[terminal]
ReturnKeyword {
position: Position,
},
#[terminal]
Colon {
position: Position,
},
#[terminal]
Comma {
position: Position,
},
}

impl Terminal {
pub fn to_token(&self, position: Position) -> Token {
match self {
Terminal::Eq => Token::Eq { position },
Terminal::Let => Token::Let { position },
Terminal::Semicolon => Token::Semicolon { position },
Terminal::Plus => Token::Plus { position },
Terminal::Times => Token::Times { position },
Terminal::LParen => Token::LParen { position },
Terminal::RParen => Token::RParen { position },
Terminal::LBrace => Token::LBrace { position },
Terminal::RBrace => Token::RBrace { position },
Terminal::FnKeyword => Token::FnKeyword { position },
Terminal::ReturnKeyword => Token::ReturnKeyword { position },
Terminal::Colon => Token::Colon { position },
Terminal::Comma => Token::Comma { position },
}
}
}

// TODO: move this to own derive macro
impl PartialEq for Token {
fn eq(&self, other: &Self) -> bool {
use Token::*;
matches!(
(self, other),
(Eq { .. }, Eq { .. })
| (Let { .. }, Let { .. })
| (Id { .. }, Id { .. })
| (Num { .. }, Num { .. })
| (Semicolon { .. }, Semicolon { .. })
| (Comment { .. }, Comment { .. })
| (Plus { .. }, Plus { .. })
| (Times { .. }, Times { .. })
| (LParen { .. }, LParen { .. })
| (RParen { .. }, RParen { .. })
| (LBrace { .. }, LBrace { .. })
| (RBrace { .. }, RBrace { .. })
| (FnKeyword { .. }, FnKeyword { .. })
| (ReturnKeyword { .. }, ReturnKeyword { .. })
| (Colon { .. }, Colon { .. })
| (Comma { .. }, Comma { .. })
)
}
}

impl Eq for Token {}

impl Token {
pub fn position(&self) -> Position {
match self {
Token::Eq { position } => *position,
Token::Let { position } => *position,
Token::Id { position, .. } => *position,
Token::Num { position, .. } => *position,
Token::Semicolon { position } => *position,
Token::Comment { position, .. } => *position,
Token::Plus { position } => *position,
Token::Times { position } => *position,
Token::LParen { position } => *position,
Token::RParen { position } => *position,
Token::LBrace { position } => *position,
Token::RBrace { position } => *position,
Token::FnKeyword { position } => *position,
Token::ReturnKeyword { position } => *position,
Token::Colon { position } => *position,
Token::Comma { position } => *position,
}
}
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Tokens<T> {
tokens: Vec<T>,
index: usize,
}

impl<T> Tokens<T>
where
T: Clone,
{
pub fn new(tokens: Vec<T>) -> Self {
Self { tokens, index: 0 }
}

pub fn next(&mut self) -> Option<T> {
if self.index < self.tokens.len() {
let item = self.tokens.get(self.index).cloned();
self.index += 1;
return item;
}

None
}

pub fn peek(&mut self) -> Option<T> {
return self.tokens.get(self.index).cloned();
}

pub fn get_index(&self) -> usize {
self.index
}

pub fn set_index(&mut self, index: usize) {
self.index = index;
}
}

impl<T> From<Vec<T>> for Tokens<T>
where
T: Clone,
{
fn from(value: Vec<T>) -> Self {
Self::new(value)
}
}
mod lexmap;
mod token;
mod tokens;

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct LexError(String);
pub use lexmap::*;
pub use token::*;
pub use tokens::*;

impl Display for LexError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.0.as_str())
}
}

impl Error for LexError {}

#[derive(Debug, Clone, Default)]
struct LexMap {
map: HashMap<&'static str, Terminal>,
}

impl LexMap {
pub fn insert(&mut self, key: &'static str, value: Terminal) {
self.map.insert(key, value);
}

pub fn can_match(&self, key: &str) -> bool {
for map_key in self.map.keys() {
if map_key.starts_with(key) {
return true;
}
}
false
}

pub fn get(&self, key: &str) -> Option<Terminal> {
self.map.get(key).cloned()
}
}
use lazy_static::lazy_static;
use std::{error::Error, fmt::Display, iter::Peekable, str::Chars};

#[macro_export]
macro_rules! terminal {
Expand Down Expand Up @@ -254,6 +38,19 @@ lazy_static! {
};
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct LexError(String);

pub type LexResult<T> = Result<T, LexError>;

impl Display for LexError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.0.as_str())
}
}

impl Error for LexError {}

#[derive(Debug, Clone)]
pub struct Lexer<'a> {
tokens: Vec<Token>,
Expand Down Expand Up @@ -299,13 +96,13 @@ impl<'a> Lexer<'a> {
}
}

pub fn lex(mut self) -> Result<Vec<Token>, LexError> {
pub fn lex(mut self) -> LexResult<Vec<Token>> {
self.lex_internal()?;

Ok(self.tokens)
}

pub fn lex_internal(&mut self) -> Result<(), LexError> {
pub fn lex_internal(&mut self) -> LexResult<()> {
self.eat_whitespace();

let Some(next) = self.peek() else {
Expand All @@ -321,7 +118,7 @@ impl<'a> Lexer<'a> {
Ok(())
}

fn lex_special(&mut self) -> Result<(), LexError> {
fn lex_special(&mut self) -> LexResult<()> {
let mut stack = vec![];

let position = (self.line, self.col);
Expand Down Expand Up @@ -357,7 +154,7 @@ impl<'a> Lexer<'a> {
self.lex_internal()
}

fn lex_alphanumeric(&mut self) -> Result<(), LexError> {
fn lex_alphanumeric(&mut self) -> LexResult<()> {
let mut stack = vec![];

let position = (self.line, self.col);
Expand All @@ -381,7 +178,7 @@ impl<'a> Lexer<'a> {
self.lex_internal()
}

fn lex_numeric(&mut self) -> Result<(), LexError> {
fn lex_numeric(&mut self) -> LexResult<()> {
let mut stack = vec![];

let position = (self.line, self.col);
Expand Down
Loading

0 comments on commit b4f3562

Please sign in to comment.