-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This implements the [token scanning](http://craftinginterpreters.com/scanning.html). I tried to stay as close to the original Java implementation as possible while still being "rusty". Differences to the Java implementation: * There is no `null` check and a few `if-else` blocks became `unwrap_or'('\0')`. * Literals are not of type `Object` but use `String` and `f64`. * `is_digit` and `is_alpha` are much simpler in Rust. * The `KEYWORDS` map is not only static but create at compile time thanks to [phf](https://crates.io/crates/phf). Possible improvements in Rust: * One could use a `PeekableIterator` for `Chars`. However, this would diverged from the original scanner quite a bit. * One could strike for zero copy for lexemes. To keep it simple I stuck with `String`. * The strangest part is creating a substring with ``` self.source .get(self.start..self.current).expect("Source token is empty."); ``` I was afraid that changing it would make it harder to relate to the original chapter. * The scanner could be less stateful and avoid `self.current` and `self.start`.
- Loading branch information
Showing
9 changed files
with
407 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
language: rust | ||
rust: | ||
- stable | ||
- beta | ||
- nightly |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
[package] | ||
name = "lox-rs" | ||
version = "0.1.0" | ||
authors = ["Karsten Jeschkies <k@jeschkies.xyz>"] | ||
edition = "2018" | ||
build = "build.rs" | ||
|
||
[dependencies] | ||
phf = "0.7.24" | ||
|
||
[build-dependencies] | ||
phf_codegen = "0.7.24" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,6 @@ | ||
# lox-rs | ||
A Lox Interpreter in Rust | ||
# Lox-rs [![Build Status](https://travis-ci.com/jeschkies/lox-rs.svg?branch=master)](https://travis-ci.com/jeschkies/lox-rs) | ||
|
||
A [Lox](http://craftinginterpreters.com/the-lox-language.html) Interpreter in Rust based on the | ||
[Crafting Interpreters](http://craftinginterpreters.com) book. | ||
|
||
Each commit corresponds to one chapter in the book. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
extern crate phf_codegen; | ||
|
||
use std::env; | ||
use std::fs::File; | ||
use std::io::{BufWriter, Write}; | ||
use std::path::Path; | ||
|
||
fn main() { | ||
let path = Path::new(&env::var("OUT_DIR").unwrap()).join("keywords.rs"); | ||
let mut file = BufWriter::new(File::create(&path).unwrap()); | ||
|
||
write!( | ||
&mut file, | ||
"pub static KEYWORDS: phf::Map<&'static str, TokenType> = | ||
" | ||
) | ||
.unwrap(); | ||
phf_codegen::Map::new() | ||
.entry("and", "TokenType::And") | ||
.entry("class", "TokenType::Class") | ||
.entry("else", "TokenType::Else") | ||
.entry("false", "TokenType::False") | ||
.entry("fun", "TokenType::Fun") | ||
.entry("for", "TokenType::For") | ||
.entry("if", "TokenType::If") | ||
.entry("nil", "TokenType::Nil") | ||
.entry("or", "TokenType::Or") | ||
.entry("print", "TokenType::Print") | ||
.entry("return", "TokenType::Return") | ||
.entry("super", "TokenType::Super") | ||
.entry("this", "TokenType::This") | ||
.entry("true", "TokenType::True") | ||
.entry("var", "TokenType::Var") | ||
.entry("while", "TokenType::While") | ||
.build(&mut file) | ||
.unwrap(); | ||
write!(&mut file, ";\n").unwrap(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
pub fn error(line: i32, message: &str) { | ||
report(line, "", message); | ||
} | ||
|
||
pub fn report(line: i32, where_: &str, message: &str) { | ||
eprintln!("[line {}] Error{}: {}", line, where_, message); | ||
// had_error = true; TODO: Use custom Error type | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
mod error; | ||
mod scanner; | ||
mod token; | ||
|
||
use std::io::{self, BufRead}; | ||
use std::process::exit; | ||
use std::{env, fs}; | ||
|
||
use scanner::Scanner; | ||
|
||
fn main() -> Result<(), Box<dyn std::error::Error + 'static>> { | ||
let args: Vec<String> = env::args().collect(); | ||
match args.as_slice() { | ||
[_, file] => run_file(file)?, | ||
[_] => run_prompt()?, | ||
_ => { | ||
eprintln!("Usage: lox-rs [script]"); | ||
exit(64) | ||
} | ||
} | ||
Ok(()) | ||
} | ||
|
||
fn run_file(path: &str) -> io::Result<()> { | ||
let source = fs::read_to_string(path)?; | ||
run(source) | ||
} | ||
|
||
fn run_prompt() -> io::Result<()> { | ||
let stdin = io::stdin(); | ||
for line in stdin.lock().lines() { | ||
run(line?); // Ignore error. | ||
print!("> "); | ||
} | ||
Ok(()) | ||
} | ||
|
||
fn run(source: String) -> io::Result<()> { | ||
let mut scanner = Scanner::new(source); | ||
let tokens = scanner.scan_tokens(); | ||
|
||
for token in tokens { | ||
println!("{}", token); | ||
} | ||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,208 @@ | ||
use crate::error::error; | ||
use crate::token::{Token, TokenType, KEYWORDS}; | ||
|
||
pub struct Scanner { | ||
source: String, | ||
tokens: Vec<Token>, | ||
start: usize, | ||
current: usize, | ||
line: i32, | ||
} | ||
|
||
impl Scanner { | ||
pub fn new(source: String) -> Self { | ||
Self { | ||
source, | ||
tokens: Vec::new(), | ||
start: 0, | ||
current: 0, | ||
line: 1, | ||
} | ||
} | ||
|
||
pub fn scan_tokens(&mut self) -> &Vec<Token> { | ||
while !self.is_at_end() { | ||
self.start = self.current; | ||
self.scan_token(); | ||
} | ||
|
||
self.tokens.push(Token::new(TokenType::EOF, "", self.line)); | ||
&self.tokens | ||
} | ||
|
||
fn scan_token(&mut self) { | ||
let c: char = self.advance(); | ||
match c { | ||
'(' => self.add_token(TokenType::LeftParen), | ||
')' => self.add_token(TokenType::RightParen), | ||
'{' => self.add_token(TokenType::LeftBrace), | ||
'}' => self.add_token(TokenType::RightBrace), | ||
',' => self.add_token(TokenType::Comma), | ||
'.' => self.add_token(TokenType::Dot), | ||
'-' => self.add_token(TokenType::Minus), | ||
'+' => self.add_token(TokenType::Plus), | ||
';' => self.add_token(TokenType::Semicolon), | ||
'*' => self.add_token(TokenType::Star), | ||
'!' => { | ||
if self.r#match('=') { | ||
self.add_token(TokenType::BangEqual) | ||
} else { | ||
self.add_token(TokenType::Bang) | ||
} | ||
} | ||
'=' => { | ||
if self.r#match('=') { | ||
self.add_token(TokenType::EqualEqual) | ||
} else { | ||
self.add_token(TokenType::Equal) | ||
} | ||
} | ||
'<' => { | ||
if self.r#match('=') { | ||
self.add_token(TokenType::LessEqual) | ||
} else { | ||
self.add_token(TokenType::Less) | ||
} | ||
} | ||
'>' => { | ||
if self.r#match('=') { | ||
self.add_token(TokenType::GreaterEqual) | ||
} else { | ||
self.add_token(TokenType::Greater) | ||
} | ||
} | ||
'/' => { | ||
if self.r#match('/') { | ||
// A comment goes until the end of the line. | ||
while self.peek() != '\n' && !self.is_at_end() { | ||
self.advance(); | ||
} | ||
} else { | ||
self.add_token(TokenType::Slash) | ||
} | ||
} | ||
' ' | '\r' | '\t' => (), // Ignore whitespace | ||
'\n' => self.line += 1, | ||
'"' => self.string(), | ||
c => { | ||
if c.is_digit(10) { | ||
self.number() | ||
} else if c.is_alphabetic() || c == '_' { | ||
self.identifier() | ||
} else { | ||
error(self.line, "Unexpected character.") | ||
} | ||
} | ||
} | ||
} | ||
|
||
fn identifier(&mut self) { | ||
while self.peek().is_alphanumeric() || self.peek() == '_' { | ||
self.advance(); | ||
} | ||
|
||
// See if the identifier is a reserved word. | ||
let text = self | ||
.source | ||
.get(self.start..self.current) | ||
.expect("Unexpected end."); | ||
|
||
let tpe: TokenType = KEYWORDS.get(text).cloned().unwrap_or(TokenType::Identifier); | ||
self.add_token(tpe); | ||
} | ||
|
||
fn number(&mut self) { | ||
while self.peek().is_digit(10) { | ||
self.advance(); | ||
} | ||
|
||
// Look for a fractional part. | ||
if self.peek() == '.' && self.peek_next().is_digit(10) { | ||
// Consumer the ".". | ||
self.advance(); | ||
|
||
while self.peek().is_digit(10) { | ||
self.advance(); | ||
} | ||
} | ||
|
||
let n: f64 = self | ||
.source | ||
.get(self.start..self.current) | ||
.expect("Unexpected end.") | ||
.parse() | ||
.expect("Scanned number could not be parsed."); | ||
self.add_token(TokenType::Number { literal: n }) | ||
} | ||
|
||
fn string(&mut self) { | ||
while self.peek() != '"' && !self.is_at_end() { | ||
if self.peek() == '\n' { | ||
self.line += 1; | ||
} | ||
self.advance(); | ||
} | ||
|
||
// Unterminated string. | ||
if self.is_at_end() { | ||
error(self.line, "Unterminated string."); | ||
} | ||
|
||
// The closing ". | ||
self.advance(); | ||
|
||
// Trim the surrounding quotes. | ||
let literal = self | ||
.source | ||
.get((self.start + 1)..(self.current - 1)) | ||
.expect("Unexpected end.") | ||
.to_string(); | ||
self.add_token(TokenType::String { literal }); | ||
} | ||
|
||
fn r#match(&mut self, expected: char) -> bool { | ||
if self.is_at_end() { | ||
return false; | ||
} | ||
// TODO: !self.source.get(self.current..self.current).contains(expected) | ||
if self | ||
.source | ||
.chars() | ||
.nth(self.current) | ||
.expect("Unexpected end of source.") | ||
!= expected | ||
{ | ||
return false; | ||
} | ||
|
||
self.current += 1; | ||
true | ||
} | ||
|
||
fn peek(&self) -> char { | ||
self.source.chars().nth(self.current).unwrap_or('\0') | ||
} | ||
|
||
fn peek_next(&self) -> char { | ||
self.source.chars().nth(self.current + 1).unwrap_or('\0') | ||
} | ||
|
||
fn is_at_end(&self) -> bool { | ||
self.current >= self.source.len() | ||
} | ||
|
||
fn advance(&mut self) -> char { | ||
self.current += 1; | ||
// TODO: work on &str directly. | ||
let char_vec: Vec<char> = self.source.chars().collect(); | ||
char_vec[self.current - 1] | ||
} | ||
|
||
fn add_token(&mut self, tpe: TokenType) { | ||
let text = self | ||
.source | ||
.get(self.start..self.current) | ||
.expect("Source token is empty."); | ||
self.tokens.push(Token::new(tpe, text, self.line)) | ||
} | ||
} |
Oops, something went wrong.