Skip to content

Commit

Permalink
Chapter 4: Scanning (#1)
Browse files Browse the repository at this point in the history
This implements the [token scanning](http://craftinginterpreters.com/scanning.html).
I tried to stay as close to the original Java implementation as possible while still
being "rusty".

Differences to the Java implementation:
* There is no `null` check and a few `if-else` blocks became `unwrap_or'('\0')`.
* Literals are not of type `Object` but use `String` and `f64`.
* `is_digit` and `is_alpha` are much simpler in Rust.
* The `KEYWORDS` map is not only static but create at compile time thanks
  to [phf](https://crates.io/crates/phf).

Possible improvements in Rust:
* One could use a `PeekableIterator` for `Chars`. However, this would diverged from
  the original scanner quite a bit.
* One could strike for zero copy for lexemes. To keep it simple I stuck with `String`.
* The strangest part is creating a substring with
  ```
  self.source .get(self.start..self.current).expect("Source token is empty.");
  ```
  I was afraid that changing it would make it harder to relate to the original chapter.
* The scanner could be less stateful and avoid `self.current` and `self.start`.
  • Loading branch information
jeschkies authored Oct 10, 2019
1 parent edcd88e commit 9fef15e
Show file tree
Hide file tree
Showing 9 changed files with 407 additions and 3 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock
# Cargo.lock

# These are backup files generated by rustfmt
**/*.rs.bk
5 changes: 5 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
language: rust
rust:
- stable
- beta
- nightly
12 changes: 12 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[package]
name = "lox-rs"
version = "0.1.0"
authors = ["Karsten Jeschkies <k@jeschkies.xyz>"]
edition = "2018"
build = "build.rs"

[dependencies]
phf = "0.7.24"

[build-dependencies]
phf_codegen = "0.7.24"
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
# lox-rs
A Lox Interpreter in Rust
# Lox-rs [![Build Status](https://travis-ci.com/jeschkies/lox-rs.svg?branch=master)](https://travis-ci.com/jeschkies/lox-rs)

A [Lox](http://craftinginterpreters.com/the-lox-language.html) Interpreter in Rust based on the
[Crafting Interpreters](http://craftinginterpreters.com) book.

Each commit corresponds to one chapter in the book.
38 changes: 38 additions & 0 deletions build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
extern crate phf_codegen;

use std::env;
use std::fs::File;
use std::io::{BufWriter, Write};
use std::path::Path;

fn main() {
let path = Path::new(&env::var("OUT_DIR").unwrap()).join("keywords.rs");
let mut file = BufWriter::new(File::create(&path).unwrap());

write!(
&mut file,
"pub static KEYWORDS: phf::Map<&'static str, TokenType> =
"
)
.unwrap();
phf_codegen::Map::new()
.entry("and", "TokenType::And")
.entry("class", "TokenType::Class")
.entry("else", "TokenType::Else")
.entry("false", "TokenType::False")
.entry("fun", "TokenType::Fun")
.entry("for", "TokenType::For")
.entry("if", "TokenType::If")
.entry("nil", "TokenType::Nil")
.entry("or", "TokenType::Or")
.entry("print", "TokenType::Print")
.entry("return", "TokenType::Return")
.entry("super", "TokenType::Super")
.entry("this", "TokenType::This")
.entry("true", "TokenType::True")
.entry("var", "TokenType::Var")
.entry("while", "TokenType::While")
.build(&mut file)
.unwrap();
write!(&mut file, ";\n").unwrap();
}
8 changes: 8 additions & 0 deletions src/error.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
pub fn error(line: i32, message: &str) {
report(line, "", message);
}

pub fn report(line: i32, where_: &str, message: &str) {
eprintln!("[line {}] Error{}: {}", line, where_, message);
// had_error = true; TODO: Use custom Error type
}
46 changes: 46 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
mod error;
mod scanner;
mod token;

use std::io::{self, BufRead};
use std::process::exit;
use std::{env, fs};

use scanner::Scanner;

fn main() -> Result<(), Box<dyn std::error::Error + 'static>> {
let args: Vec<String> = env::args().collect();
match args.as_slice() {
[_, file] => run_file(file)?,
[_] => run_prompt()?,
_ => {
eprintln!("Usage: lox-rs [script]");
exit(64)
}
}
Ok(())
}

fn run_file(path: &str) -> io::Result<()> {
let source = fs::read_to_string(path)?;
run(source)
}

fn run_prompt() -> io::Result<()> {
let stdin = io::stdin();
for line in stdin.lock().lines() {
run(line?); // Ignore error.
print!("> ");
}
Ok(())
}

fn run(source: String) -> io::Result<()> {
let mut scanner = Scanner::new(source);
let tokens = scanner.scan_tokens();

for token in tokens {
println!("{}", token);
}
Ok(())
}
208 changes: 208 additions & 0 deletions src/scanner.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
use crate::error::error;
use crate::token::{Token, TokenType, KEYWORDS};

pub struct Scanner {
source: String,
tokens: Vec<Token>,
start: usize,
current: usize,
line: i32,
}

impl Scanner {
pub fn new(source: String) -> Self {
Self {
source,
tokens: Vec::new(),
start: 0,
current: 0,
line: 1,
}
}

pub fn scan_tokens(&mut self) -> &Vec<Token> {
while !self.is_at_end() {
self.start = self.current;
self.scan_token();
}

self.tokens.push(Token::new(TokenType::EOF, "", self.line));
&self.tokens
}

fn scan_token(&mut self) {
let c: char = self.advance();
match c {
'(' => self.add_token(TokenType::LeftParen),
')' => self.add_token(TokenType::RightParen),
'{' => self.add_token(TokenType::LeftBrace),
'}' => self.add_token(TokenType::RightBrace),
',' => self.add_token(TokenType::Comma),
'.' => self.add_token(TokenType::Dot),
'-' => self.add_token(TokenType::Minus),
'+' => self.add_token(TokenType::Plus),
';' => self.add_token(TokenType::Semicolon),
'*' => self.add_token(TokenType::Star),
'!' => {
if self.r#match('=') {
self.add_token(TokenType::BangEqual)
} else {
self.add_token(TokenType::Bang)
}
}
'=' => {
if self.r#match('=') {
self.add_token(TokenType::EqualEqual)
} else {
self.add_token(TokenType::Equal)
}
}
'<' => {
if self.r#match('=') {
self.add_token(TokenType::LessEqual)
} else {
self.add_token(TokenType::Less)
}
}
'>' => {
if self.r#match('=') {
self.add_token(TokenType::GreaterEqual)
} else {
self.add_token(TokenType::Greater)
}
}
'/' => {
if self.r#match('/') {
// A comment goes until the end of the line.
while self.peek() != '\n' && !self.is_at_end() {
self.advance();
}
} else {
self.add_token(TokenType::Slash)
}
}
' ' | '\r' | '\t' => (), // Ignore whitespace
'\n' => self.line += 1,
'"' => self.string(),
c => {
if c.is_digit(10) {
self.number()
} else if c.is_alphabetic() || c == '_' {
self.identifier()
} else {
error(self.line, "Unexpected character.")
}
}
}
}

fn identifier(&mut self) {
while self.peek().is_alphanumeric() || self.peek() == '_' {
self.advance();
}

// See if the identifier is a reserved word.
let text = self
.source
.get(self.start..self.current)
.expect("Unexpected end.");

let tpe: TokenType = KEYWORDS.get(text).cloned().unwrap_or(TokenType::Identifier);
self.add_token(tpe);
}

fn number(&mut self) {
while self.peek().is_digit(10) {
self.advance();
}

// Look for a fractional part.
if self.peek() == '.' && self.peek_next().is_digit(10) {
// Consumer the ".".
self.advance();

while self.peek().is_digit(10) {
self.advance();
}
}

let n: f64 = self
.source
.get(self.start..self.current)
.expect("Unexpected end.")
.parse()
.expect("Scanned number could not be parsed.");
self.add_token(TokenType::Number { literal: n })
}

fn string(&mut self) {
while self.peek() != '"' && !self.is_at_end() {
if self.peek() == '\n' {
self.line += 1;
}
self.advance();
}

// Unterminated string.
if self.is_at_end() {
error(self.line, "Unterminated string.");
}

// The closing ".
self.advance();

// Trim the surrounding quotes.
let literal = self
.source
.get((self.start + 1)..(self.current - 1))
.expect("Unexpected end.")
.to_string();
self.add_token(TokenType::String { literal });
}

fn r#match(&mut self, expected: char) -> bool {
if self.is_at_end() {
return false;
}
// TODO: !self.source.get(self.current..self.current).contains(expected)
if self
.source
.chars()
.nth(self.current)
.expect("Unexpected end of source.")
!= expected
{
return false;
}

self.current += 1;
true
}

fn peek(&self) -> char {
self.source.chars().nth(self.current).unwrap_or('\0')
}

fn peek_next(&self) -> char {
self.source.chars().nth(self.current + 1).unwrap_or('\0')
}

fn is_at_end(&self) -> bool {
self.current >= self.source.len()
}

fn advance(&mut self) -> char {
self.current += 1;
// TODO: work on &str directly.
let char_vec: Vec<char> = self.source.chars().collect();
char_vec[self.current - 1]
}

fn add_token(&mut self, tpe: TokenType) {
let text = self
.source
.get(self.start..self.current)
.expect("Source token is empty.");
self.tokens.push(Token::new(tpe, text, self.line))
}
}
Loading

0 comments on commit 9fef15e

Please sign in to comment.