-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Lexer tests and token struct * fix: module naming and import resolution and start of lexer Because we don't have our package published / setup to publish properly, we can't pull a package from the github domain. * Functions for lexer to read chars in strings. Added tests. * Support for parsing numbers and eating white space * Added more tokens to lexer. Created lookahead functionality. * Implementation of REPL. Can run from command line. * Fixed go test failing * style: gopls format --------- Co-authored-by: Joseph Porrino <joeyporrino1998@gmail.com>
- Loading branch information
1 parent
099840e
commit 7ef6735
Showing
6 changed files
with
365 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
module github.com/dineshUmasankar/YARTBML | ||
module YARTBML | ||
|
||
go 1.22.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,135 @@ | ||
package lexer | ||
|
||
import "YARTBML/token" | ||
|
||
type Lexer struct { | ||
input string | ||
position int // current position in input (points to current char) | ||
readPosition int // current reading position in input (after current char) | ||
ch byte // current char under examination | ||
} | ||
|
||
func New(input string) *Lexer { | ||
l := &Lexer{input: input} | ||
l.readChar() | ||
return l | ||
} | ||
|
||
func (l *Lexer) readChar() { | ||
if l.readPosition >= len(l.input) { | ||
l.ch = 0 | ||
} else { | ||
l.ch = l.input[l.readPosition] | ||
} | ||
l.position = l.readPosition | ||
l.readPosition += 1 | ||
} | ||
|
||
func newToken(tokenType token.TokenType, ch byte) token.Token { | ||
return token.Token{Type: tokenType, Literal: string(ch)} | ||
} | ||
|
||
func (l *Lexer) NextToken() token.Token { | ||
var tok token.Token | ||
l.skipWhitespace() | ||
switch l.ch { | ||
case '=': | ||
if l.peekChar() == '=' { | ||
ch := l.ch | ||
l.readChar() | ||
literal := string(ch) + string(l.ch) | ||
tok = token.Token{Type: token.EQ, Literal: literal} | ||
} else { | ||
tok = newToken(token.ASSIGN, l.ch) | ||
} | ||
case '+': | ||
tok = newToken(token.PLUS, l.ch) | ||
case '-': | ||
tok = newToken(token.MINUS, l.ch) | ||
case '!': | ||
if l.peekChar() == '=' { | ||
ch := l.ch | ||
l.readChar() | ||
literal := string(ch) + string(l.ch) | ||
tok = token.Token{Type: token.NOT_EQ, Literal: literal} | ||
} else { | ||
tok = newToken(token.BANG, l.ch) | ||
} | ||
case '/': | ||
tok = newToken(token.SLASH, l.ch) | ||
case '*': | ||
tok = newToken(token.ASTERISK, l.ch) | ||
case '<': | ||
tok = newToken(token.LT, l.ch) | ||
case '>': | ||
tok = newToken(token.GT, l.ch) | ||
case '(': | ||
tok = newToken(token.LPAREN, l.ch) | ||
case ')': | ||
tok = newToken(token.RPAREN, l.ch) | ||
case ',': | ||
tok = newToken(token.COMMA, l.ch) | ||
case ';': | ||
tok = newToken(token.SEMICOLON, l.ch) | ||
case '{': | ||
tok = newToken(token.LBRACE, l.ch) | ||
case '}': | ||
tok = newToken(token.RBRACE, l.ch) | ||
case 0: | ||
tok.Literal = "" | ||
tok.Type = token.EOF | ||
default: | ||
if isLetter(l.ch) { | ||
tok.Literal = l.readIdentifier() | ||
tok.Type = token.LookupIdent(tok.Literal) | ||
return tok | ||
} else if isDigit(l.ch) { | ||
tok.Type = token.INT | ||
tok.Literal = l.readNumber() | ||
return tok | ||
} else { | ||
tok = newToken(token.ILLEGAL, l.ch) | ||
} | ||
} | ||
l.readChar() | ||
return tok | ||
} | ||
|
||
func (l *Lexer) readIdentifier() string { | ||
position := l.position | ||
for isLetter(l.ch) { | ||
l.readChar() | ||
} | ||
return l.input[position:l.position] | ||
} | ||
|
||
func (l *Lexer) readNumber() string { | ||
position := l.position | ||
for isDigit(l.ch) { | ||
l.readChar() | ||
} | ||
return l.input[position:l.position] | ||
} | ||
|
||
func isLetter(ch byte) bool { | ||
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' | ||
} | ||
|
||
func isDigit(ch byte) bool { | ||
return '0' <= ch && ch <= '9' | ||
} | ||
|
||
func (l *Lexer) skipWhitespace() { | ||
for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' { | ||
l.readChar() | ||
} | ||
} | ||
|
||
func (l *Lexer) peekChar() byte { | ||
if l.readPosition >= len(l.input) { | ||
return 0 | ||
} else { | ||
return l.input[l.readPosition] | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
package lexer | ||
|
||
import ( | ||
"YARTBML/token" | ||
"testing" | ||
) | ||
|
||
func TestNextToken(t *testing.T) { | ||
input := ` | ||
let five = 5; | ||
let ten = 10; | ||
let add = fn(x, y) { | ||
x + y; | ||
}; | ||
let result = add(five, ten); | ||
!-/*5; | ||
5 < 10 > 5; | ||
if (5 < 10) { | ||
return true; | ||
} else { | ||
return false; | ||
} | ||
10 == 10; | ||
10 != 9; | ||
` | ||
tests := []struct { | ||
expectedType token.TokenType | ||
expectedLiteral string | ||
}{ | ||
{token.LET, "let"}, | ||
{token.IDENT, "five"}, | ||
{token.ASSIGN, "="}, | ||
{token.INT, "5"}, | ||
{token.SEMICOLON, ";"}, | ||
{token.LET, "let"}, | ||
{token.IDENT, "ten"}, | ||
{token.ASSIGN, "="}, | ||
{token.INT, "10"}, | ||
{token.SEMICOLON, ";"}, | ||
{token.LET, "let"}, | ||
{token.IDENT, "add"}, | ||
{token.ASSIGN, "="}, | ||
{token.FUNCTION, "fn"}, | ||
{token.LPAREN, "("}, | ||
{token.IDENT, "x"}, | ||
{token.COMMA, ","}, | ||
{token.IDENT, "y"}, | ||
{token.RPAREN, ")"}, | ||
{token.LBRACE, "{"}, | ||
{token.IDENT, "x"}, | ||
{token.PLUS, "+"}, | ||
{token.IDENT, "y"}, | ||
{token.SEMICOLON, ";"}, | ||
{token.RBRACE, "}"}, | ||
{token.SEMICOLON, ";"}, | ||
{token.LET, "let"}, | ||
{token.IDENT, "result"}, | ||
{token.ASSIGN, "="}, | ||
{token.IDENT, "add"}, | ||
{token.LPAREN, "("}, | ||
{token.IDENT, "five"}, | ||
{token.COMMA, ","}, | ||
{token.IDENT, "ten"}, | ||
{token.RPAREN, ")"}, | ||
{token.SEMICOLON, ";"}, | ||
{token.BANG, "!"}, | ||
{token.MINUS, "-"}, | ||
{token.SLASH, "/"}, | ||
{token.ASTERISK, "*"}, | ||
{token.INT, "5"}, | ||
{token.SEMICOLON, ";"}, | ||
{token.INT, "5"}, | ||
{token.LT, "<"}, | ||
{token.INT, "10"}, | ||
{token.GT, ">"}, | ||
{token.INT, "5"}, | ||
{token.SEMICOLON, ";"}, | ||
{token.IF, "if"}, | ||
{token.LPAREN, "("}, | ||
{token.INT, "5"}, | ||
{token.LT, "<"}, | ||
{token.INT, "10"}, | ||
{token.RPAREN, ")"}, | ||
{token.LBRACE, "{"}, | ||
{token.RETURN, "return"}, | ||
{token.TRUE, "true"}, | ||
{token.SEMICOLON, ";"}, | ||
{token.RBRACE, "}"}, | ||
{token.ELSE, "else"}, | ||
{token.LBRACE, "{"}, | ||
{token.RETURN, "return"}, | ||
{token.FALSE, "false"}, | ||
{token.SEMICOLON, ";"}, | ||
{token.RBRACE, "}"}, | ||
{token.INT, "10"}, | ||
{token.EQ, "=="}, | ||
{token.INT, "10"}, | ||
{token.SEMICOLON, ";"}, | ||
{token.INT, "10"}, | ||
{token.NOT_EQ, "!="}, | ||
{token.INT, "9"}, | ||
{token.SEMICOLON, ";"}, | ||
{token.EOF, ""}, | ||
} | ||
|
||
l := New(input) | ||
|
||
for i, tt := range tests { | ||
tok := l.NextToken() | ||
|
||
if tok.Type != tt.expectedType { | ||
t.Fatalf("tests[%d] - tokentype wrong. expected=%q, got=%q", | ||
i, tt.expectedType, tok.Type) | ||
} | ||
if tok.Literal != tt.expectedLiteral { | ||
t.Fatalf("tests[%d] - literal wrong. expected=%q, got=%q", | ||
i, tt.expectedLiteral, tok.Literal) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,19 @@ | ||
package main | ||
|
||
import "fmt" | ||
import ( | ||
"YARTBML/repl" | ||
"fmt" | ||
"os" | ||
"os/user" | ||
) | ||
|
||
func main() { | ||
fmt.Println("Hello World!") | ||
user, err := user.Current() | ||
if err != nil { | ||
panic(err) | ||
} | ||
fmt.Printf("Hello %s! This is the YARTBML programming language!\n", | ||
user.Username) | ||
fmt.Printf("Feel free to type in commands\n") | ||
repl.Start(os.Stdin, os.Stdout) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,28 @@ | ||
package repl | ||
|
||
import ( | ||
"YARTBML/lexer" | ||
"YARTBML/token" | ||
"bufio" | ||
"fmt" | ||
"io" | ||
) | ||
|
||
const PROMPT = ">> " | ||
|
||
func Start(in io.Reader, out io.Writer) { | ||
scanner := bufio.NewScanner(in) | ||
for { | ||
fmt.Fprintf(out, PROMPT) | ||
scanned := scanner.Scan() | ||
if !scanned { | ||
return | ||
} | ||
line := scanner.Text() | ||
l := lexer.New(line) | ||
for tok := l.NextToken(); tok.Type != token.EOF; tok = l.NextToken() { | ||
fmt.Fprintf(out, "%+v\n", tok) | ||
} | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,68 @@ | ||
package token | ||
|
||
type TokenType string | ||
|
||
type Token struct { | ||
Type TokenType | ||
Literal string | ||
} | ||
|
||
// Symbolic names substituted at complile time for the assigned value | ||
const ( | ||
ILLEGAL = "ILLEGAL" | ||
EOF = "EOF" | ||
|
||
// Identifiers + literals | ||
IDENT = "IDENT" // add, foobar, x, y, ... | ||
INT = "INT" // 123456 | ||
|
||
// operators | ||
ASSIGN = "=" | ||
PLUS = "+" | ||
MINUS = "-" | ||
BANG = "!" | ||
ASTERISK = "*" | ||
SLASH = "/" | ||
|
||
LT = "<" | ||
GT = ">" | ||
|
||
EQ = "==" | ||
NOT_EQ = "!=" | ||
|
||
// Delimiters | ||
COMMA = "," | ||
SEMICOLON = ";" | ||
|
||
LPAREN = "(" | ||
RPAREN = ")" | ||
LBRACE = "{" | ||
RBRACE = "}" | ||
|
||
// Keywords | ||
FUNCTION = "FUNCTION" | ||
LET = "LET" | ||
TRUE = "TRUE" | ||
FALSE = "FALSE" | ||
IF = "IF" | ||
ELSE = "ELSE" | ||
RETURN = "RETURN" | ||
) | ||
|
||
var keywords = map[string]TokenType{ | ||
"fn": FUNCTION, | ||
"let": LET, | ||
"true": TRUE, | ||
"false": FALSE, | ||
"if": IF, | ||
"else": ELSE, | ||
"return": RETURN, | ||
} | ||
|
||
func LookupIdent(ident string) TokenType { | ||
if tok, ok := keywords[ident]; ok { | ||
return tok | ||
} | ||
return IDENT | ||
} | ||
|